From 4bb00e607021be606cf450e9fee72cbec8fa54ce Mon Sep 17 00:00:00 2001 From: Peter Bieringer Date: Sun, 15 Dec 2024 11:40:02 +0100 Subject: [PATCH 1/8] item-cache-mtime-size: add new option --- config | 4 ++++ radicale/config.py | 4 ++++ radicale/storage/multifilesystem/__init__.py | 1 + radicale/storage/multifilesystem/base.py | 3 +++ 4 files changed, 12 insertions(+) diff --git a/config b/config index 11c6bc91..731c60f6 100644 --- a/config +++ b/config @@ -155,6 +155,10 @@ # Note: use only on single-instance setup, will break consistency with client in multi-instance setup #use_cache_subfolder_for_synctoken = False +# Use last modifiction time (nanoseconds) and size (bytes) for 'item' cache instead of SHA256 (improves speed) +# Note: check used filesystem mtime precision before enabling +#use_mtime_and_size_for_item_cache=False + # Use configured umask for folder creation (not applicable for OS Windows) # Useful value: 0077 | 0027 | 0007 | 0022 #folder_umask = (system default, usual 0022) diff --git a/radicale/config.py b/radicale/config.py index e824009b..73b04dc4 100644 --- a/radicale/config.py +++ b/radicale/config.py @@ -299,6 +299,10 @@ def json_str(value: Any) -> dict: "value": "False", "help": "use subfolder 'collection-cache' for 'sync-token' cache file structure instead of inside collection folder", "type": bool}), + ("use_mtime_and_size_for_item_cache", { + "value": "False", + "help": "use mtime and file size instead of SHA256 for 'item' cache (improves speed)", + "type": bool}), ("folder_umask", { "value": "", "help": "umask for folder creation (empty: system default)", diff --git a/radicale/storage/multifilesystem/__init__.py b/radicale/storage/multifilesystem/__init__.py index 6592b515..4e5271f5 100644 --- a/radicale/storage/multifilesystem/__init__.py +++ b/radicale/storage/multifilesystem/__init__.py @@ -97,6 +97,7 @@ def __init__(self, configuration: config.Configuration) -> None: logger.info("storage cache subfolder usage for 'item': %s", self._use_cache_subfolder_for_item) logger.info("storage cache subfolder usage for 'history': %s", self._use_cache_subfolder_for_history) logger.info("storage cache subfolder usage for 'sync-token': %s", self._use_cache_subfolder_for_synctoken) + logger.info("storage cache use mtime and size for 'item': %s", self._use_mtime_and_size_for_item_cache) logger.debug("storage cache action logging: %s", self._debug_cache_actions) if self._use_cache_subfolder_for_item is True or self._use_cache_subfolder_for_history is True or self._use_cache_subfolder_for_synctoken is True: logger.info("storage cache subfolder: %r", self._get_collection_cache_folder()) diff --git a/radicale/storage/multifilesystem/base.py b/radicale/storage/multifilesystem/base.py index a580cf19..dc013b34 100644 --- a/radicale/storage/multifilesystem/base.py +++ b/radicale/storage/multifilesystem/base.py @@ -74,6 +74,7 @@ class StorageBase(storage.BaseStorage): _use_cache_subfolder_for_item: bool _use_cache_subfolder_for_history: bool _use_cache_subfolder_for_synctoken: bool + _use_mtime_and_size_for_item_cache: bool _debug_cache_actions: bool _folder_umask: str _config_umask: int @@ -92,6 +93,8 @@ def __init__(self, configuration: config.Configuration) -> None: "storage", "use_cache_subfolder_for_history") self._use_cache_subfolder_for_synctoken = configuration.get( "storage", "use_cache_subfolder_for_synctoken") + self._use_mtime_and_size_for_item_cache = configuration.get( + "storage", "use_mtime_and_size_for_item_cache") self._folder_umask = configuration.get( "storage", "folder_umask") self._debug_cache_actions = configuration.get( From ff3f2fc3dea49311af58ce65b172b80231b2d223 Mon Sep 17 00:00:00 2001 From: Peter Bieringer Date: Sun, 15 Dec 2024 11:40:20 +0100 Subject: [PATCH 2/8] item-cache-mtime-size new option doc --- DOCUMENTATION.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/DOCUMENTATION.md b/DOCUMENTATION.md index 76a0b65c..c7a1d9ee 100644 --- a/DOCUMENTATION.md +++ b/DOCUMENTATION.md @@ -1050,6 +1050,14 @@ Default: `False` Note: use only on single-instance setup, will break consistency with client in multi-instance setup +##### use_mtime_and_size_for_item_cache + +Use last modifiction time (nanoseconds) and size (bytes) for 'item' cache instead of SHA256 (improves speed) + +Default: `False` + +Note: check used filesystem mtime precision before enabling + ##### folder_umask Use configured umask for folder creation (not applicable for OS Windows) From 62bdfeab406e10c64ab63349111dd4489b8195d9 Mon Sep 17 00:00:00 2001 From: Peter Bieringer Date: Sun, 15 Dec 2024 11:40:58 +0100 Subject: [PATCH 3/8] item-cache-mtime-size: feature --- radicale/storage/multifilesystem/cache.py | 21 ++++++- radicale/storage/multifilesystem/get.py | 18 +++++- radicale/storage/multifilesystem/upload.py | 66 ++++++++++++++-------- 3 files changed, 76 insertions(+), 29 deletions(-) diff --git a/radicale/storage/multifilesystem/cache.py b/radicale/storage/multifilesystem/cache.py index 1d90f975..0f37381c 100644 --- a/radicale/storage/multifilesystem/cache.py +++ b/radicale/storage/multifilesystem/cache.py @@ -73,6 +73,10 @@ def _item_cache_hash(raw_text: bytes) -> str: _hash.update(raw_text) return _hash.hexdigest() + @staticmethod + def _item_cache_mtime_and_size(size: bytes, raw_text: bytes) -> str: + return str(storage.CACHE_VERSION.decode()) + "size=" + str(size) + ";mtime=" + str(raw_text) + def _item_cache_content(self, item: radicale_item.Item) -> CacheContent: return CacheContent(item.uid, item.etag, item.serialize(), item.name, item.component_name, *item.time_range) @@ -80,8 +84,11 @@ def _item_cache_content(self, item: radicale_item.Item) -> CacheContent: def _store_item_cache(self, href: str, item: radicale_item.Item, cache_hash: str = "") -> CacheContent: if not cache_hash: - cache_hash = self._item_cache_hash( - item.serialize().encode(self._encoding)) + if self._storage._use_mtime_and_size_for_item_cache is True: + raise RuntimeError("_store_item_cache called without cache_hash is not supported if [storage] use_mtime_and_size_for_item_cache is True") + else: + cache_hash = self._item_cache_hash( + item.serialize().encode(self._encoding)) cache_folder = self._storage._get_collection_cache_subfolder(self._filesystem_path, ".Radicale.cache", "item") content = self._item_cache_content(item) self._storage._makedirs_synced(cache_folder) @@ -96,12 +103,20 @@ def _store_item_cache(self, href: str, item: radicale_item.Item, def _load_item_cache(self, href: str, cache_hash: str ) -> Optional[CacheContent]: cache_folder = self._storage._get_collection_cache_subfolder(self._filesystem_path, ".Radicale.cache", "item") + path = os.path.join(cache_folder, href) try: - with open(os.path.join(cache_folder, href), "rb") as f: + with open(path, "rb") as f: hash_, *remainder = pickle.load(f) if hash_ and hash_ == cache_hash: + if self._storage._debug_cache_actions is True: + logger.debug("Item cache match : %r with hash %r", path, cache_hash) return CacheContent(*remainder) + else: + if self._storage._debug_cache_actions is True: + logger.debug("Item cache no match : %r with hash %r", path, cache_hash) except FileNotFoundError: + if self._storage._debug_cache_actions is True: + logger.debug("Item cache not found : %r with hash %r", path, cache_hash) pass except (pickle.UnpicklingError, ValueError) as e: logger.warning("Failed to load item cache entry %r in %r: %s", diff --git a/radicale/storage/multifilesystem/get.py b/radicale/storage/multifilesystem/get.py index 543f004a..f74c8fb6 100644 --- a/radicale/storage/multifilesystem/get.py +++ b/radicale/storage/multifilesystem/get.py @@ -80,11 +80,18 @@ def _get(self, href: str, verify_href: bool = True raise # The hash of the component in the file system. This is used to check, # if the entry in the cache is still valid. - cache_hash = self._item_cache_hash(raw_text) - if self._storage._debug_cache_actions is True: - logger.debug("Check cache for: %r with hash %r", path, cache_hash) + if self._storage._use_mtime_and_size_for_item_cache is True: + cache_hash = self._item_cache_mtime_and_size(os.stat(path).st_size, os.stat(path).st_mtime_ns) + if self._storage._debug_cache_actions is True: + logger.debug("Item cache check for: %r with mtime and size %r", path, cache_hash) + else: + cache_hash = self._item_cache_hash(raw_text) + if self._storage._debug_cache_actions is True: + logger.debug("Item cache check for: %r with hash %r", path, cache_hash) cache_content = self._load_item_cache(href, cache_hash) if cache_content is None: + if self._storage._debug_cache_actions is True: + logger.debug("Item cache miss for: %r", path) with self._acquire_cache_lock("item"): # Lock the item cache to prevent multiple processes from # generating the same data in parallel. @@ -101,6 +108,8 @@ def _get(self, href: str, verify_href: bool = True vobject_item, = vobject_items temp_item = radicale_item.Item( collection=self, vobject_item=vobject_item) + if self._storage._debug_cache_actions is True: + logger.debug("Item cache store for: %r", path) cache_content = self._store_item_cache( href, temp_item, cache_hash) except Exception as e: @@ -115,6 +124,9 @@ def _get(self, href: str, verify_href: bool = True if not self._item_cache_cleaned: self._item_cache_cleaned = True self._clean_item_cache() + else: + if self._storage._debug_cache_actions is True: + logger.debug("Item cache hit for: %r", path) last_modified = time.strftime( "%a, %d %b %Y %H:%M:%S GMT", time.gmtime(os.path.getmtime(path))) diff --git a/radicale/storage/multifilesystem/upload.py b/radicale/storage/multifilesystem/upload.py index e9783e85..3814f428 100644 --- a/radicale/storage/multifilesystem/upload.py +++ b/radicale/storage/multifilesystem/upload.py @@ -41,19 +41,26 @@ def upload(self, href: str, item: radicale_item.Item raise pathutils.UnsafePathError(href) path = pathutils.path_to_filesystem(self._filesystem_path, href) try: + with self._atomic_write(path, newline="") as fo: # type: ignore + f = cast(TextIO, fo) + f.write(item.serialize()) + except Exception as e: + raise ValueError("Failed to store item %r in collection %r: %s" % + (href, self.path, e)) from e + # store cache file + if self._storage._use_mtime_and_size_for_item_cache is True: + cache_hash = self._item_cache_mtime_and_size(os.stat(path).st_size, os.stat(path).st_mtime_ns) + if self._storage._debug_cache_actions is True: + logger.debug("Item cache store for: %r with mtime and size %r", path, cache_hash) + else: cache_hash = self._item_cache_hash(item.serialize().encode(self._encoding)) - logger.debug("Store cache for: %r with hash %r", path, cache_hash) + if self._storage._debug_cache_actions is True: + logger.debug("Item cache store for: %r with hash %r", path, cache_hash) + try: self._store_item_cache(href, item, cache_hash) except Exception as e: - raise ValueError("Failed to store item %r in collection %r: %s" % + raise ValueError("Failed to store item cache of %r in collection %r: %s" % (href, self.path, e)) from e - # TODO: better fix for "mypy" - with self._atomic_write(path, newline="") as fo: # type: ignore - f = cast(TextIO, fo) - f.write(item.serialize()) - # Clean the cache after the actual item is stored, or the cache entry - # will be removed again. - self._clean_item_cache() # Track the change self._update_history_etag(href, item) self._clean_history() @@ -84,15 +91,11 @@ def get_safe_free_hrefs(uid: str) -> Iterator[str]: for item in items: uid = item.uid logger.debug("Store item from list with uid: '%s'" % uid) - try: - cache_content = self._item_cache_content(item) - except Exception as e: - raise ValueError( - "Failed to store item %r in temporary collection %r: %s" % - (uid, self.path, e)) from e + cache_content = self._item_cache_content(item) for href in get_safe_free_hrefs(uid): + path = os.path.join(self._filesystem_path, href) try: - f = open(os.path.join(self._filesystem_path, href), + f = open(path, "w", newline="", encoding=self._encoding) except OSError as e: if (sys.platform != "win32" and e.errno == errno.EINVAL or @@ -104,14 +107,31 @@ def get_safe_free_hrefs(uid: str) -> Iterator[str]: else: raise RuntimeError("No href found for item %r in temporary " "collection %r" % (uid, self.path)) - with f: - f.write(item.serialize()) - f.flush() - self._storage._fsync(f) - with open(os.path.join(cache_folder, href), "wb") as fb: + + try: + with f: + f.write(item.serialize()) + f.flush() + self._storage._fsync(f) + except Exception as e: + raise ValueError( + "Failed to store item %r in temporary collection %r: %s" % + (uid, self.path, e)) from e + + # store cache file + if self._storage._use_mtime_and_size_for_item_cache is True: + cache_hash = self._item_cache_mtime_and_size(os.stat(path).st_size, os.stat(path).st_mtime_ns) + if self._storage._debug_cache_actions is True: + logger.debug("Item cache store for: %r with mtime and size %r", path, cache_hash) + else: cache_hash = self._item_cache_hash(item.serialize().encode(self._encoding)) - logger.debug("Store cache for: %r with hash %r", fb.name, cache_hash) - pickle.dump(cache_content, fb) + if self._storage._debug_cache_actions is True: + logger.debug("Item cache store for: %r with hash %r", path, cache_hash) + path_cache = os.path.join(cache_folder, href) + if self._storage._debug_cache_actions is True: + logger.debug("Item cache store into: %r", path_cache) + with open(os.path.join(cache_folder, href), "wb") as fb: + pickle.dump((cache_hash, *cache_content), fb) fb.flush() self._storage._fsync(fb) self._storage._sync_directory(cache_folder) From dc20f518dd3e948242e77e6b49fc4b5f2f82c7c4 Mon Sep 17 00:00:00 2001 From: Peter Bieringer Date: Sun, 15 Dec 2024 11:41:08 +0100 Subject: [PATCH 4/8] item-cache-mtime-size: changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4ed800a2..9b38513a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,8 @@ * Add: option [auth] uc_username for uppercase conversion (similar to existing lc_username) * Add: option [debug] storage_cache_action for conditional logging * Fix: set PRODID on collection upload (instead of vobject is inserting default one) +* Add: option [storage] use_mtime_and_size_for_item_cache for changing cache lookup from SHA256 to mtime_ns + size +* Fix: buggy cache file content creation on collection upload ## 3.3.1 From 11dad85404b48be80a702c9d97bfc30fe0d76456 Mon Sep 17 00:00:00 2001 From: Peter Bieringer Date: Sun, 15 Dec 2024 11:45:38 +0100 Subject: [PATCH 5/8] fix types (mpy) --- radicale/storage/multifilesystem/cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/radicale/storage/multifilesystem/cache.py b/radicale/storage/multifilesystem/cache.py index 0f37381c..158641e4 100644 --- a/radicale/storage/multifilesystem/cache.py +++ b/radicale/storage/multifilesystem/cache.py @@ -74,7 +74,7 @@ def _item_cache_hash(raw_text: bytes) -> str: return _hash.hexdigest() @staticmethod - def _item_cache_mtime_and_size(size: bytes, raw_text: bytes) -> str: + def _item_cache_mtime_and_size(size: int, raw_text: int) -> str: return str(storage.CACHE_VERSION.decode()) + "size=" + str(size) + ";mtime=" + str(raw_text) def _item_cache_content(self, item: radicale_item.Item) -> CacheContent: From fc7c50b4cbb949159f7389b56eca3aaf58d2208f Mon Sep 17 00:00:00 2001 From: Peter Bieringer Date: Sun, 15 Dec 2024 12:20:24 +0100 Subject: [PATCH 6/8] add note about storage verification --- DOCUMENTATION.md | 2 ++ config | 1 + 2 files changed, 3 insertions(+) diff --git a/DOCUMENTATION.md b/DOCUMENTATION.md index c7a1d9ee..d2dce4e7 100644 --- a/DOCUMENTATION.md +++ b/DOCUMENTATION.md @@ -1058,6 +1058,8 @@ Default: `False` Note: check used filesystem mtime precision before enabling +Note: conversion is done on access, bulk conversion can be done offline using storage verification option `radicale --verify-storage` + ##### folder_umask Use configured umask for folder creation (not applicable for OS Windows) diff --git a/config b/config index 731c60f6..c8e9802b 100644 --- a/config +++ b/config @@ -157,6 +157,7 @@ # Use last modifiction time (nanoseconds) and size (bytes) for 'item' cache instead of SHA256 (improves speed) # Note: check used filesystem mtime precision before enabling +# Note: conversion is done on access, bulk conversion can be done offline using storage verification option: radicale --verify-storage #use_mtime_and_size_for_item_cache=False # Use configured umask for folder creation (not applicable for OS Windows) From 5f79b089c8076035c9541accd53c259df18da4e7 Mon Sep 17 00:00:00 2001 From: Peter Bieringer Date: Sun, 15 Dec 2024 12:21:39 +0100 Subject: [PATCH 7/8] fix option name --- CHANGELOG.md | 2 +- DOCUMENTATION.md | 4 ++-- config | 4 ++-- radicale/config.py | 2 +- radicale/storage/multifilesystem/base.py | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9b38513a..63ee7acb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ * Improve: log important module versions on startup * Improve: auth.ldap config shown on startup, terminate in case no password is supplied for bind user * Add: option [auth] uc_username for uppercase conversion (similar to existing lc_username) -* Add: option [debug] storage_cache_action for conditional logging +* Add: option [logging] storage_cache_action_on_debug for conditional logging * Fix: set PRODID on collection upload (instead of vobject is inserting default one) * Add: option [storage] use_mtime_and_size_for_item_cache for changing cache lookup from SHA256 to mtime_ns + size * Fix: buggy cache file content creation on collection upload diff --git a/DOCUMENTATION.md b/DOCUMENTATION.md index d2dce4e7..cf8b8058 100644 --- a/DOCUMENTATION.md +++ b/DOCUMENTATION.md @@ -1180,9 +1180,9 @@ Log rights rule which doesn't match on level=debug Default: `False` -##### #storage_cache_actions +##### storage_cache_actions_on_debug -Log storage cache actions +Log storage cache actions on level=debug Default: `False` diff --git a/config b/config index c8e9802b..9d30509e 100644 --- a/config +++ b/config @@ -231,8 +231,8 @@ # Log rights rule which doesn't match on level=debug #rights_rule_doesnt_match_on_debug = False -# Log storage cache actions -#storage_cache_actions = False +# Log storage cache actions on level=debug +#storage_cache_actions_on_debug = False [headers] diff --git a/radicale/config.py b/radicale/config.py index 73b04dc4..0ac5970c 100644 --- a/radicale/config.py +++ b/radicale/config.py @@ -380,7 +380,7 @@ def json_str(value: Any) -> dict: "value": "False", "help": "log rights rules which doesn't match on level=debug", "type": bool}), - ("storage_cache_actions", { + ("storage_cache_actions_on_debug", { "value": "False", "help": "log storage cache action on level=debug", "type": bool}), diff --git a/radicale/storage/multifilesystem/base.py b/radicale/storage/multifilesystem/base.py index dc013b34..394e89bf 100644 --- a/radicale/storage/multifilesystem/base.py +++ b/radicale/storage/multifilesystem/base.py @@ -98,7 +98,7 @@ def __init__(self, configuration: config.Configuration) -> None: self._folder_umask = configuration.get( "storage", "folder_umask") self._debug_cache_actions = configuration.get( - "logging", "storage_cache_actions") + "logging", "storage_cache_actions_on_debug") def _get_collection_root_folder(self) -> str: return os.path.join(self._filesystem_folder, "collection-root") From dc51a74e5a600744c31c80372292f33667cd1d8b Mon Sep 17 00:00:00 2001 From: Peter Bieringer Date: Sun, 15 Dec 2024 12:51:02 +0100 Subject: [PATCH 8/8] add test case --- radicale/tests/test_storage.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/radicale/tests/test_storage.py b/radicale/tests/test_storage.py index 5eca0753..1957a137 100644 --- a/radicale/tests/test_storage.py +++ b/radicale/tests/test_storage.py @@ -116,6 +116,22 @@ def test_item_cache_rebuild_subfolder(self) -> None: assert answer1 == answer2 assert os.path.exists(os.path.join(cache_folder, "event1.ics")) + def test_item_cache_rebuild_mtime_and_size(self) -> None: + """Delete the item cache and verify that it is rebuild.""" + self.configure({"storage": {"use_mtime_and_size_for_item_cache": "True"}}) + self.mkcalendar("/calendar.ics/") + event = get_file_content("event1.ics") + path = "/calendar.ics/event1.ics" + self.put(path, event) + _, answer1 = self.get(path) + cache_folder = os.path.join(self.colpath, "collection-root", + "calendar.ics", ".Radicale.cache", "item") + assert os.path.exists(os.path.join(cache_folder, "event1.ics")) + shutil.rmtree(cache_folder) + _, answer2 = self.get(path) + assert answer1 == answer2 + assert os.path.exists(os.path.join(cache_folder, "event1.ics")) + def test_put_whole_calendar_uids_used_as_file_names(self) -> None: """Test if UIDs are used as file names.""" _TestBaseRequests.test_put_whole_calendar(