Skip to content

Commit

Permalink
Merge pull request #1655 from pbiering/item-cache-mtime-size
Browse files Browse the repository at this point in the history
Item cache mtime size option
  • Loading branch information
pbiering authored Dec 15, 2024
2 parents b7ae6b3 + dc51a74 commit c33e96c
Show file tree
Hide file tree
Showing 10 changed files with 124 additions and 36 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
* Improve: log important module versions on startup
* Improve: auth.ldap config shown on startup, terminate in case no password is supplied for bind user
* Add: option [auth] uc_username for uppercase conversion (similar to existing lc_username)
* Add: option [debug] storage_cache_action for conditional logging
* Add: option [logging] storage_cache_action_on_debug for conditional logging
* Fix: set PRODID on collection upload (instead of vobject is inserting default one)
* Add: option [storage] use_mtime_and_size_for_item_cache for changing cache lookup from SHA256 to mtime_ns + size
* Fix: buggy cache file content creation on collection upload

## 3.3.1

Expand Down
14 changes: 12 additions & 2 deletions DOCUMENTATION.md
Original file line number Diff line number Diff line change
Expand Up @@ -1050,6 +1050,16 @@ Default: `False`

Note: use only on single-instance setup, will break consistency with client in multi-instance setup

##### use_mtime_and_size_for_item_cache

Use last modifiction time (nanoseconds) and size (bytes) for 'item' cache instead of SHA256 (improves speed)

Default: `False`

Note: check used filesystem mtime precision before enabling

Note: conversion is done on access, bulk conversion can be done offline using storage verification option `radicale --verify-storage`

##### folder_umask

Use configured umask for folder creation (not applicable for OS Windows)
Expand Down Expand Up @@ -1170,9 +1180,9 @@ Log rights rule which doesn't match on level=debug

Default: `False`

##### #storage_cache_actions
##### storage_cache_actions_on_debug

Log storage cache actions
Log storage cache actions on level=debug

Default: `False`

Expand Down
9 changes: 7 additions & 2 deletions config
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,11 @@
# Note: use only on single-instance setup, will break consistency with client in multi-instance setup
#use_cache_subfolder_for_synctoken = False

# Use last modifiction time (nanoseconds) and size (bytes) for 'item' cache instead of SHA256 (improves speed)
# Note: check used filesystem mtime precision before enabling
# Note: conversion is done on access, bulk conversion can be done offline using storage verification option: radicale --verify-storage
#use_mtime_and_size_for_item_cache=False

# Use configured umask for folder creation (not applicable for OS Windows)
# Useful value: 0077 | 0027 | 0007 | 0022
#folder_umask = (system default, usual 0022)
Expand Down Expand Up @@ -226,8 +231,8 @@
# Log rights rule which doesn't match on level=debug
#rights_rule_doesnt_match_on_debug = False

# Log storage cache actions
#storage_cache_actions = False
# Log storage cache actions on level=debug
#storage_cache_actions_on_debug = False

[headers]

Expand Down
6 changes: 5 additions & 1 deletion radicale/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,10 @@ def json_str(value: Any) -> dict:
"value": "False",
"help": "use subfolder 'collection-cache' for 'sync-token' cache file structure instead of inside collection folder",
"type": bool}),
("use_mtime_and_size_for_item_cache", {
"value": "False",
"help": "use mtime and file size instead of SHA256 for 'item' cache (improves speed)",
"type": bool}),
("folder_umask", {
"value": "",
"help": "umask for folder creation (empty: system default)",
Expand Down Expand Up @@ -376,7 +380,7 @@ def json_str(value: Any) -> dict:
"value": "False",
"help": "log rights rules which doesn't match on level=debug",
"type": bool}),
("storage_cache_actions", {
("storage_cache_actions_on_debug", {
"value": "False",
"help": "log storage cache action on level=debug",
"type": bool}),
Expand Down
1 change: 1 addition & 0 deletions radicale/storage/multifilesystem/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ def __init__(self, configuration: config.Configuration) -> None:
logger.info("storage cache subfolder usage for 'item': %s", self._use_cache_subfolder_for_item)
logger.info("storage cache subfolder usage for 'history': %s", self._use_cache_subfolder_for_history)
logger.info("storage cache subfolder usage for 'sync-token': %s", self._use_cache_subfolder_for_synctoken)
logger.info("storage cache use mtime and size for 'item': %s", self._use_mtime_and_size_for_item_cache)
logger.debug("storage cache action logging: %s", self._debug_cache_actions)
if self._use_cache_subfolder_for_item is True or self._use_cache_subfolder_for_history is True or self._use_cache_subfolder_for_synctoken is True:
logger.info("storage cache subfolder: %r", self._get_collection_cache_folder())
Expand Down
5 changes: 4 additions & 1 deletion radicale/storage/multifilesystem/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ class StorageBase(storage.BaseStorage):
_use_cache_subfolder_for_item: bool
_use_cache_subfolder_for_history: bool
_use_cache_subfolder_for_synctoken: bool
_use_mtime_and_size_for_item_cache: bool
_debug_cache_actions: bool
_folder_umask: str
_config_umask: int
Expand All @@ -92,10 +93,12 @@ def __init__(self, configuration: config.Configuration) -> None:
"storage", "use_cache_subfolder_for_history")
self._use_cache_subfolder_for_synctoken = configuration.get(
"storage", "use_cache_subfolder_for_synctoken")
self._use_mtime_and_size_for_item_cache = configuration.get(
"storage", "use_mtime_and_size_for_item_cache")
self._folder_umask = configuration.get(
"storage", "folder_umask")
self._debug_cache_actions = configuration.get(
"logging", "storage_cache_actions")
"logging", "storage_cache_actions_on_debug")

def _get_collection_root_folder(self) -> str:
return os.path.join(self._filesystem_folder, "collection-root")
Expand Down
21 changes: 18 additions & 3 deletions radicale/storage/multifilesystem/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,15 +73,22 @@ def _item_cache_hash(raw_text: bytes) -> str:
_hash.update(raw_text)
return _hash.hexdigest()

@staticmethod
def _item_cache_mtime_and_size(size: int, raw_text: int) -> str:
return str(storage.CACHE_VERSION.decode()) + "size=" + str(size) + ";mtime=" + str(raw_text)

def _item_cache_content(self, item: radicale_item.Item) -> CacheContent:
return CacheContent(item.uid, item.etag, item.serialize(), item.name,
item.component_name, *item.time_range)

def _store_item_cache(self, href: str, item: radicale_item.Item,
cache_hash: str = "") -> CacheContent:
if not cache_hash:
cache_hash = self._item_cache_hash(
item.serialize().encode(self._encoding))
if self._storage._use_mtime_and_size_for_item_cache is True:
raise RuntimeError("_store_item_cache called without cache_hash is not supported if [storage] use_mtime_and_size_for_item_cache is True")
else:
cache_hash = self._item_cache_hash(
item.serialize().encode(self._encoding))
cache_folder = self._storage._get_collection_cache_subfolder(self._filesystem_path, ".Radicale.cache", "item")
content = self._item_cache_content(item)
self._storage._makedirs_synced(cache_folder)
Expand All @@ -96,12 +103,20 @@ def _store_item_cache(self, href: str, item: radicale_item.Item,
def _load_item_cache(self, href: str, cache_hash: str
) -> Optional[CacheContent]:
cache_folder = self._storage._get_collection_cache_subfolder(self._filesystem_path, ".Radicale.cache", "item")
path = os.path.join(cache_folder, href)
try:
with open(os.path.join(cache_folder, href), "rb") as f:
with open(path, "rb") as f:
hash_, *remainder = pickle.load(f)
if hash_ and hash_ == cache_hash:
if self._storage._debug_cache_actions is True:
logger.debug("Item cache match : %r with hash %r", path, cache_hash)
return CacheContent(*remainder)
else:
if self._storage._debug_cache_actions is True:
logger.debug("Item cache no match : %r with hash %r", path, cache_hash)
except FileNotFoundError:
if self._storage._debug_cache_actions is True:
logger.debug("Item cache not found : %r with hash %r", path, cache_hash)
pass
except (pickle.UnpicklingError, ValueError) as e:
logger.warning("Failed to load item cache entry %r in %r: %s",
Expand Down
18 changes: 15 additions & 3 deletions radicale/storage/multifilesystem/get.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,18 @@ def _get(self, href: str, verify_href: bool = True
raise
# The hash of the component in the file system. This is used to check,
# if the entry in the cache is still valid.
cache_hash = self._item_cache_hash(raw_text)
if self._storage._debug_cache_actions is True:
logger.debug("Check cache for: %r with hash %r", path, cache_hash)
if self._storage._use_mtime_and_size_for_item_cache is True:
cache_hash = self._item_cache_mtime_and_size(os.stat(path).st_size, os.stat(path).st_mtime_ns)
if self._storage._debug_cache_actions is True:
logger.debug("Item cache check for: %r with mtime and size %r", path, cache_hash)
else:
cache_hash = self._item_cache_hash(raw_text)
if self._storage._debug_cache_actions is True:
logger.debug("Item cache check for: %r with hash %r", path, cache_hash)
cache_content = self._load_item_cache(href, cache_hash)
if cache_content is None:
if self._storage._debug_cache_actions is True:
logger.debug("Item cache miss for: %r", path)
with self._acquire_cache_lock("item"):
# Lock the item cache to prevent multiple processes from
# generating the same data in parallel.
Expand All @@ -101,6 +108,8 @@ def _get(self, href: str, verify_href: bool = True
vobject_item, = vobject_items
temp_item = radicale_item.Item(
collection=self, vobject_item=vobject_item)
if self._storage._debug_cache_actions is True:
logger.debug("Item cache store for: %r", path)
cache_content = self._store_item_cache(
href, temp_item, cache_hash)
except Exception as e:
Expand All @@ -115,6 +124,9 @@ def _get(self, href: str, verify_href: bool = True
if not self._item_cache_cleaned:
self._item_cache_cleaned = True
self._clean_item_cache()
else:
if self._storage._debug_cache_actions is True:
logger.debug("Item cache hit for: %r", path)
last_modified = time.strftime(
"%a, %d %b %Y %H:%M:%S GMT",
time.gmtime(os.path.getmtime(path)))
Expand Down
66 changes: 43 additions & 23 deletions radicale/storage/multifilesystem/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,19 +41,26 @@ def upload(self, href: str, item: radicale_item.Item
raise pathutils.UnsafePathError(href)
path = pathutils.path_to_filesystem(self._filesystem_path, href)
try:
with self._atomic_write(path, newline="") as fo: # type: ignore
f = cast(TextIO, fo)
f.write(item.serialize())
except Exception as e:
raise ValueError("Failed to store item %r in collection %r: %s" %
(href, self.path, e)) from e
# store cache file
if self._storage._use_mtime_and_size_for_item_cache is True:
cache_hash = self._item_cache_mtime_and_size(os.stat(path).st_size, os.stat(path).st_mtime_ns)
if self._storage._debug_cache_actions is True:
logger.debug("Item cache store for: %r with mtime and size %r", path, cache_hash)
else:
cache_hash = self._item_cache_hash(item.serialize().encode(self._encoding))
logger.debug("Store cache for: %r with hash %r", path, cache_hash)
if self._storage._debug_cache_actions is True:
logger.debug("Item cache store for: %r with hash %r", path, cache_hash)
try:
self._store_item_cache(href, item, cache_hash)
except Exception as e:
raise ValueError("Failed to store item %r in collection %r: %s" %
raise ValueError("Failed to store item cache of %r in collection %r: %s" %
(href, self.path, e)) from e
# TODO: better fix for "mypy"
with self._atomic_write(path, newline="") as fo: # type: ignore
f = cast(TextIO, fo)
f.write(item.serialize())
# Clean the cache after the actual item is stored, or the cache entry
# will be removed again.
self._clean_item_cache()
# Track the change
self._update_history_etag(href, item)
self._clean_history()
Expand Down Expand Up @@ -84,15 +91,11 @@ def get_safe_free_hrefs(uid: str) -> Iterator[str]:
for item in items:
uid = item.uid
logger.debug("Store item from list with uid: '%s'" % uid)
try:
cache_content = self._item_cache_content(item)
except Exception as e:
raise ValueError(
"Failed to store item %r in temporary collection %r: %s" %
(uid, self.path, e)) from e
cache_content = self._item_cache_content(item)
for href in get_safe_free_hrefs(uid):
path = os.path.join(self._filesystem_path, href)
try:
f = open(os.path.join(self._filesystem_path, href),
f = open(path,
"w", newline="", encoding=self._encoding)
except OSError as e:
if (sys.platform != "win32" and e.errno == errno.EINVAL or
Expand All @@ -104,14 +107,31 @@ def get_safe_free_hrefs(uid: str) -> Iterator[str]:
else:
raise RuntimeError("No href found for item %r in temporary "
"collection %r" % (uid, self.path))
with f:
f.write(item.serialize())
f.flush()
self._storage._fsync(f)
with open(os.path.join(cache_folder, href), "wb") as fb:

try:
with f:
f.write(item.serialize())
f.flush()
self._storage._fsync(f)
except Exception as e:
raise ValueError(
"Failed to store item %r in temporary collection %r: %s" %
(uid, self.path, e)) from e

# store cache file
if self._storage._use_mtime_and_size_for_item_cache is True:
cache_hash = self._item_cache_mtime_and_size(os.stat(path).st_size, os.stat(path).st_mtime_ns)
if self._storage._debug_cache_actions is True:
logger.debug("Item cache store for: %r with mtime and size %r", path, cache_hash)
else:
cache_hash = self._item_cache_hash(item.serialize().encode(self._encoding))
logger.debug("Store cache for: %r with hash %r", fb.name, cache_hash)
pickle.dump(cache_content, fb)
if self._storage._debug_cache_actions is True:
logger.debug("Item cache store for: %r with hash %r", path, cache_hash)
path_cache = os.path.join(cache_folder, href)
if self._storage._debug_cache_actions is True:
logger.debug("Item cache store into: %r", path_cache)
with open(os.path.join(cache_folder, href), "wb") as fb:
pickle.dump((cache_hash, *cache_content), fb)
fb.flush()
self._storage._fsync(fb)
self._storage._sync_directory(cache_folder)
Expand Down
16 changes: 16 additions & 0 deletions radicale/tests/test_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,22 @@ def test_item_cache_rebuild_subfolder(self) -> None:
assert answer1 == answer2
assert os.path.exists(os.path.join(cache_folder, "event1.ics"))

def test_item_cache_rebuild_mtime_and_size(self) -> None:
"""Delete the item cache and verify that it is rebuild."""
self.configure({"storage": {"use_mtime_and_size_for_item_cache": "True"}})
self.mkcalendar("/calendar.ics/")
event = get_file_content("event1.ics")
path = "/calendar.ics/event1.ics"
self.put(path, event)
_, answer1 = self.get(path)
cache_folder = os.path.join(self.colpath, "collection-root",
"calendar.ics", ".Radicale.cache", "item")
assert os.path.exists(os.path.join(cache_folder, "event1.ics"))
shutil.rmtree(cache_folder)
_, answer2 = self.get(path)
assert answer1 == answer2
assert os.path.exists(os.path.join(cache_folder, "event1.ics"))

def test_put_whole_calendar_uids_used_as_file_names(self) -> None:
"""Test if UIDs are used as file names."""
_TestBaseRequests.test_put_whole_calendar(
Expand Down

0 comments on commit c33e96c

Please sign in to comment.