diff --git a/CHANGELOG.md b/CHANGELOG.md index 4ed800a2..63ee7acb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,8 +13,10 @@ * Improve: log important module versions on startup * Improve: auth.ldap config shown on startup, terminate in case no password is supplied for bind user * Add: option [auth] uc_username for uppercase conversion (similar to existing lc_username) -* Add: option [debug] storage_cache_action for conditional logging +* Add: option [logging] storage_cache_action_on_debug for conditional logging * Fix: set PRODID on collection upload (instead of vobject is inserting default one) +* Add: option [storage] use_mtime_and_size_for_item_cache for changing cache lookup from SHA256 to mtime_ns + size +* Fix: buggy cache file content creation on collection upload ## 3.3.1 diff --git a/DOCUMENTATION.md b/DOCUMENTATION.md index 76a0b65c..cf8b8058 100644 --- a/DOCUMENTATION.md +++ b/DOCUMENTATION.md @@ -1050,6 +1050,16 @@ Default: `False` Note: use only on single-instance setup, will break consistency with client in multi-instance setup +##### use_mtime_and_size_for_item_cache + +Use last modifiction time (nanoseconds) and size (bytes) for 'item' cache instead of SHA256 (improves speed) + +Default: `False` + +Note: check used filesystem mtime precision before enabling + +Note: conversion is done on access, bulk conversion can be done offline using storage verification option `radicale --verify-storage` + ##### folder_umask Use configured umask for folder creation (not applicable for OS Windows) @@ -1170,9 +1180,9 @@ Log rights rule which doesn't match on level=debug Default: `False` -##### #storage_cache_actions +##### storage_cache_actions_on_debug -Log storage cache actions +Log storage cache actions on level=debug Default: `False` diff --git a/config b/config index 11c6bc91..9d30509e 100644 --- a/config +++ b/config @@ -155,6 +155,11 @@ # Note: use only on single-instance setup, will break consistency with client in multi-instance setup #use_cache_subfolder_for_synctoken = False +# Use last modifiction time (nanoseconds) and size (bytes) for 'item' cache instead of SHA256 (improves speed) +# Note: check used filesystem mtime precision before enabling +# Note: conversion is done on access, bulk conversion can be done offline using storage verification option: radicale --verify-storage +#use_mtime_and_size_for_item_cache=False + # Use configured umask for folder creation (not applicable for OS Windows) # Useful value: 0077 | 0027 | 0007 | 0022 #folder_umask = (system default, usual 0022) @@ -226,8 +231,8 @@ # Log rights rule which doesn't match on level=debug #rights_rule_doesnt_match_on_debug = False -# Log storage cache actions -#storage_cache_actions = False +# Log storage cache actions on level=debug +#storage_cache_actions_on_debug = False [headers] diff --git a/radicale/config.py b/radicale/config.py index e824009b..0ac5970c 100644 --- a/radicale/config.py +++ b/radicale/config.py @@ -299,6 +299,10 @@ def json_str(value: Any) -> dict: "value": "False", "help": "use subfolder 'collection-cache' for 'sync-token' cache file structure instead of inside collection folder", "type": bool}), + ("use_mtime_and_size_for_item_cache", { + "value": "False", + "help": "use mtime and file size instead of SHA256 for 'item' cache (improves speed)", + "type": bool}), ("folder_umask", { "value": "", "help": "umask for folder creation (empty: system default)", @@ -376,7 +380,7 @@ def json_str(value: Any) -> dict: "value": "False", "help": "log rights rules which doesn't match on level=debug", "type": bool}), - ("storage_cache_actions", { + ("storage_cache_actions_on_debug", { "value": "False", "help": "log storage cache action on level=debug", "type": bool}), diff --git a/radicale/storage/multifilesystem/__init__.py b/radicale/storage/multifilesystem/__init__.py index 6592b515..4e5271f5 100644 --- a/radicale/storage/multifilesystem/__init__.py +++ b/radicale/storage/multifilesystem/__init__.py @@ -97,6 +97,7 @@ def __init__(self, configuration: config.Configuration) -> None: logger.info("storage cache subfolder usage for 'item': %s", self._use_cache_subfolder_for_item) logger.info("storage cache subfolder usage for 'history': %s", self._use_cache_subfolder_for_history) logger.info("storage cache subfolder usage for 'sync-token': %s", self._use_cache_subfolder_for_synctoken) + logger.info("storage cache use mtime and size for 'item': %s", self._use_mtime_and_size_for_item_cache) logger.debug("storage cache action logging: %s", self._debug_cache_actions) if self._use_cache_subfolder_for_item is True or self._use_cache_subfolder_for_history is True or self._use_cache_subfolder_for_synctoken is True: logger.info("storage cache subfolder: %r", self._get_collection_cache_folder()) diff --git a/radicale/storage/multifilesystem/base.py b/radicale/storage/multifilesystem/base.py index a580cf19..394e89bf 100644 --- a/radicale/storage/multifilesystem/base.py +++ b/radicale/storage/multifilesystem/base.py @@ -74,6 +74,7 @@ class StorageBase(storage.BaseStorage): _use_cache_subfolder_for_item: bool _use_cache_subfolder_for_history: bool _use_cache_subfolder_for_synctoken: bool + _use_mtime_and_size_for_item_cache: bool _debug_cache_actions: bool _folder_umask: str _config_umask: int @@ -92,10 +93,12 @@ def __init__(self, configuration: config.Configuration) -> None: "storage", "use_cache_subfolder_for_history") self._use_cache_subfolder_for_synctoken = configuration.get( "storage", "use_cache_subfolder_for_synctoken") + self._use_mtime_and_size_for_item_cache = configuration.get( + "storage", "use_mtime_and_size_for_item_cache") self._folder_umask = configuration.get( "storage", "folder_umask") self._debug_cache_actions = configuration.get( - "logging", "storage_cache_actions") + "logging", "storage_cache_actions_on_debug") def _get_collection_root_folder(self) -> str: return os.path.join(self._filesystem_folder, "collection-root") diff --git a/radicale/storage/multifilesystem/cache.py b/radicale/storage/multifilesystem/cache.py index 1d90f975..158641e4 100644 --- a/radicale/storage/multifilesystem/cache.py +++ b/radicale/storage/multifilesystem/cache.py @@ -73,6 +73,10 @@ def _item_cache_hash(raw_text: bytes) -> str: _hash.update(raw_text) return _hash.hexdigest() + @staticmethod + def _item_cache_mtime_and_size(size: int, raw_text: int) -> str: + return str(storage.CACHE_VERSION.decode()) + "size=" + str(size) + ";mtime=" + str(raw_text) + def _item_cache_content(self, item: radicale_item.Item) -> CacheContent: return CacheContent(item.uid, item.etag, item.serialize(), item.name, item.component_name, *item.time_range) @@ -80,8 +84,11 @@ def _item_cache_content(self, item: radicale_item.Item) -> CacheContent: def _store_item_cache(self, href: str, item: radicale_item.Item, cache_hash: str = "") -> CacheContent: if not cache_hash: - cache_hash = self._item_cache_hash( - item.serialize().encode(self._encoding)) + if self._storage._use_mtime_and_size_for_item_cache is True: + raise RuntimeError("_store_item_cache called without cache_hash is not supported if [storage] use_mtime_and_size_for_item_cache is True") + else: + cache_hash = self._item_cache_hash( + item.serialize().encode(self._encoding)) cache_folder = self._storage._get_collection_cache_subfolder(self._filesystem_path, ".Radicale.cache", "item") content = self._item_cache_content(item) self._storage._makedirs_synced(cache_folder) @@ -96,12 +103,20 @@ def _store_item_cache(self, href: str, item: radicale_item.Item, def _load_item_cache(self, href: str, cache_hash: str ) -> Optional[CacheContent]: cache_folder = self._storage._get_collection_cache_subfolder(self._filesystem_path, ".Radicale.cache", "item") + path = os.path.join(cache_folder, href) try: - with open(os.path.join(cache_folder, href), "rb") as f: + with open(path, "rb") as f: hash_, *remainder = pickle.load(f) if hash_ and hash_ == cache_hash: + if self._storage._debug_cache_actions is True: + logger.debug("Item cache match : %r with hash %r", path, cache_hash) return CacheContent(*remainder) + else: + if self._storage._debug_cache_actions is True: + logger.debug("Item cache no match : %r with hash %r", path, cache_hash) except FileNotFoundError: + if self._storage._debug_cache_actions is True: + logger.debug("Item cache not found : %r with hash %r", path, cache_hash) pass except (pickle.UnpicklingError, ValueError) as e: logger.warning("Failed to load item cache entry %r in %r: %s", diff --git a/radicale/storage/multifilesystem/get.py b/radicale/storage/multifilesystem/get.py index 543f004a..f74c8fb6 100644 --- a/radicale/storage/multifilesystem/get.py +++ b/radicale/storage/multifilesystem/get.py @@ -80,11 +80,18 @@ def _get(self, href: str, verify_href: bool = True raise # The hash of the component in the file system. This is used to check, # if the entry in the cache is still valid. - cache_hash = self._item_cache_hash(raw_text) - if self._storage._debug_cache_actions is True: - logger.debug("Check cache for: %r with hash %r", path, cache_hash) + if self._storage._use_mtime_and_size_for_item_cache is True: + cache_hash = self._item_cache_mtime_and_size(os.stat(path).st_size, os.stat(path).st_mtime_ns) + if self._storage._debug_cache_actions is True: + logger.debug("Item cache check for: %r with mtime and size %r", path, cache_hash) + else: + cache_hash = self._item_cache_hash(raw_text) + if self._storage._debug_cache_actions is True: + logger.debug("Item cache check for: %r with hash %r", path, cache_hash) cache_content = self._load_item_cache(href, cache_hash) if cache_content is None: + if self._storage._debug_cache_actions is True: + logger.debug("Item cache miss for: %r", path) with self._acquire_cache_lock("item"): # Lock the item cache to prevent multiple processes from # generating the same data in parallel. @@ -101,6 +108,8 @@ def _get(self, href: str, verify_href: bool = True vobject_item, = vobject_items temp_item = radicale_item.Item( collection=self, vobject_item=vobject_item) + if self._storage._debug_cache_actions is True: + logger.debug("Item cache store for: %r", path) cache_content = self._store_item_cache( href, temp_item, cache_hash) except Exception as e: @@ -115,6 +124,9 @@ def _get(self, href: str, verify_href: bool = True if not self._item_cache_cleaned: self._item_cache_cleaned = True self._clean_item_cache() + else: + if self._storage._debug_cache_actions is True: + logger.debug("Item cache hit for: %r", path) last_modified = time.strftime( "%a, %d %b %Y %H:%M:%S GMT", time.gmtime(os.path.getmtime(path))) diff --git a/radicale/storage/multifilesystem/upload.py b/radicale/storage/multifilesystem/upload.py index e9783e85..3814f428 100644 --- a/radicale/storage/multifilesystem/upload.py +++ b/radicale/storage/multifilesystem/upload.py @@ -41,19 +41,26 @@ def upload(self, href: str, item: radicale_item.Item raise pathutils.UnsafePathError(href) path = pathutils.path_to_filesystem(self._filesystem_path, href) try: + with self._atomic_write(path, newline="") as fo: # type: ignore + f = cast(TextIO, fo) + f.write(item.serialize()) + except Exception as e: + raise ValueError("Failed to store item %r in collection %r: %s" % + (href, self.path, e)) from e + # store cache file + if self._storage._use_mtime_and_size_for_item_cache is True: + cache_hash = self._item_cache_mtime_and_size(os.stat(path).st_size, os.stat(path).st_mtime_ns) + if self._storage._debug_cache_actions is True: + logger.debug("Item cache store for: %r with mtime and size %r", path, cache_hash) + else: cache_hash = self._item_cache_hash(item.serialize().encode(self._encoding)) - logger.debug("Store cache for: %r with hash %r", path, cache_hash) + if self._storage._debug_cache_actions is True: + logger.debug("Item cache store for: %r with hash %r", path, cache_hash) + try: self._store_item_cache(href, item, cache_hash) except Exception as e: - raise ValueError("Failed to store item %r in collection %r: %s" % + raise ValueError("Failed to store item cache of %r in collection %r: %s" % (href, self.path, e)) from e - # TODO: better fix for "mypy" - with self._atomic_write(path, newline="") as fo: # type: ignore - f = cast(TextIO, fo) - f.write(item.serialize()) - # Clean the cache after the actual item is stored, or the cache entry - # will be removed again. - self._clean_item_cache() # Track the change self._update_history_etag(href, item) self._clean_history() @@ -84,15 +91,11 @@ def get_safe_free_hrefs(uid: str) -> Iterator[str]: for item in items: uid = item.uid logger.debug("Store item from list with uid: '%s'" % uid) - try: - cache_content = self._item_cache_content(item) - except Exception as e: - raise ValueError( - "Failed to store item %r in temporary collection %r: %s" % - (uid, self.path, e)) from e + cache_content = self._item_cache_content(item) for href in get_safe_free_hrefs(uid): + path = os.path.join(self._filesystem_path, href) try: - f = open(os.path.join(self._filesystem_path, href), + f = open(path, "w", newline="", encoding=self._encoding) except OSError as e: if (sys.platform != "win32" and e.errno == errno.EINVAL or @@ -104,14 +107,31 @@ def get_safe_free_hrefs(uid: str) -> Iterator[str]: else: raise RuntimeError("No href found for item %r in temporary " "collection %r" % (uid, self.path)) - with f: - f.write(item.serialize()) - f.flush() - self._storage._fsync(f) - with open(os.path.join(cache_folder, href), "wb") as fb: + + try: + with f: + f.write(item.serialize()) + f.flush() + self._storage._fsync(f) + except Exception as e: + raise ValueError( + "Failed to store item %r in temporary collection %r: %s" % + (uid, self.path, e)) from e + + # store cache file + if self._storage._use_mtime_and_size_for_item_cache is True: + cache_hash = self._item_cache_mtime_and_size(os.stat(path).st_size, os.stat(path).st_mtime_ns) + if self._storage._debug_cache_actions is True: + logger.debug("Item cache store for: %r with mtime and size %r", path, cache_hash) + else: cache_hash = self._item_cache_hash(item.serialize().encode(self._encoding)) - logger.debug("Store cache for: %r with hash %r", fb.name, cache_hash) - pickle.dump(cache_content, fb) + if self._storage._debug_cache_actions is True: + logger.debug("Item cache store for: %r with hash %r", path, cache_hash) + path_cache = os.path.join(cache_folder, href) + if self._storage._debug_cache_actions is True: + logger.debug("Item cache store into: %r", path_cache) + with open(os.path.join(cache_folder, href), "wb") as fb: + pickle.dump((cache_hash, *cache_content), fb) fb.flush() self._storage._fsync(fb) self._storage._sync_directory(cache_folder) diff --git a/radicale/tests/test_storage.py b/radicale/tests/test_storage.py index 5eca0753..1957a137 100644 --- a/radicale/tests/test_storage.py +++ b/radicale/tests/test_storage.py @@ -116,6 +116,22 @@ def test_item_cache_rebuild_subfolder(self) -> None: assert answer1 == answer2 assert os.path.exists(os.path.join(cache_folder, "event1.ics")) + def test_item_cache_rebuild_mtime_and_size(self) -> None: + """Delete the item cache and verify that it is rebuild.""" + self.configure({"storage": {"use_mtime_and_size_for_item_cache": "True"}}) + self.mkcalendar("/calendar.ics/") + event = get_file_content("event1.ics") + path = "/calendar.ics/event1.ics" + self.put(path, event) + _, answer1 = self.get(path) + cache_folder = os.path.join(self.colpath, "collection-root", + "calendar.ics", ".Radicale.cache", "item") + assert os.path.exists(os.path.join(cache_folder, "event1.ics")) + shutil.rmtree(cache_folder) + _, answer2 = self.get(path) + assert answer1 == answer2 + assert os.path.exists(os.path.join(cache_folder, "event1.ics")) + def test_put_whole_calendar_uids_used_as_file_names(self) -> None: """Test if UIDs are used as file names.""" _TestBaseRequests.test_put_whole_calendar(