Merge pull request #1655 from pbiering/item-cache-mtime-size

Item cache mtime size option
This commit is contained in:
Peter Bieringer 2024-12-15 11:59:33 +00:00 committed by GitHub
commit c33e96c5a3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 125 additions and 37 deletions

View file

@ -13,8 +13,10 @@
* Improve: log important module versions on startup
* Improve: auth.ldap config shown on startup, terminate in case no password is supplied for bind user
* Add: option [auth] uc_username for uppercase conversion (similar to existing lc_username)
* Add: option [debug] storage_cache_action for conditional logging
* Add: option [logging] storage_cache_action_on_debug for conditional logging
* Fix: set PRODID on collection upload (instead of vobject is inserting default one)
* Add: option [storage] use_mtime_and_size_for_item_cache for changing cache lookup from SHA256 to mtime_ns + size
* Fix: buggy cache file content creation on collection upload
## 3.3.1

View file

@ -1050,6 +1050,16 @@ Default: `False`
Note: use only on single-instance setup, will break consistency with client in multi-instance setup
##### use_mtime_and_size_for_item_cache
Use last modifiction time (nanoseconds) and size (bytes) for 'item' cache instead of SHA256 (improves speed)
Default: `False`
Note: check used filesystem mtime precision before enabling
Note: conversion is done on access, bulk conversion can be done offline using storage verification option `radicale --verify-storage`
##### folder_umask
Use configured umask for folder creation (not applicable for OS Windows)
@ -1170,9 +1180,9 @@ Log rights rule which doesn't match on level=debug
Default: `False`
##### #storage_cache_actions
##### storage_cache_actions_on_debug
Log storage cache actions
Log storage cache actions on level=debug
Default: `False`

9
config
View file

@ -155,6 +155,11 @@
# Note: use only on single-instance setup, will break consistency with client in multi-instance setup
#use_cache_subfolder_for_synctoken = False
# Use last modifiction time (nanoseconds) and size (bytes) for 'item' cache instead of SHA256 (improves speed)
# Note: check used filesystem mtime precision before enabling
# Note: conversion is done on access, bulk conversion can be done offline using storage verification option: radicale --verify-storage
#use_mtime_and_size_for_item_cache=False
# Use configured umask for folder creation (not applicable for OS Windows)
# Useful value: 0077 | 0027 | 0007 | 0022
#folder_umask = (system default, usual 0022)
@ -226,8 +231,8 @@
# Log rights rule which doesn't match on level=debug
#rights_rule_doesnt_match_on_debug = False
# Log storage cache actions
#storage_cache_actions = False
# Log storage cache actions on level=debug
#storage_cache_actions_on_debug = False
[headers]

View file

@ -299,6 +299,10 @@ DEFAULT_CONFIG_SCHEMA: types.CONFIG_SCHEMA = OrderedDict([
"value": "False",
"help": "use subfolder 'collection-cache' for 'sync-token' cache file structure instead of inside collection folder",
"type": bool}),
("use_mtime_and_size_for_item_cache", {
"value": "False",
"help": "use mtime and file size instead of SHA256 for 'item' cache (improves speed)",
"type": bool}),
("folder_umask", {
"value": "",
"help": "umask for folder creation (empty: system default)",
@ -376,7 +380,7 @@ DEFAULT_CONFIG_SCHEMA: types.CONFIG_SCHEMA = OrderedDict([
"value": "False",
"help": "log rights rules which doesn't match on level=debug",
"type": bool}),
("storage_cache_actions", {
("storage_cache_actions_on_debug", {
"value": "False",
"help": "log storage cache action on level=debug",
"type": bool}),

View file

@ -97,6 +97,7 @@ class Storage(
logger.info("storage cache subfolder usage for 'item': %s", self._use_cache_subfolder_for_item)
logger.info("storage cache subfolder usage for 'history': %s", self._use_cache_subfolder_for_history)
logger.info("storage cache subfolder usage for 'sync-token': %s", self._use_cache_subfolder_for_synctoken)
logger.info("storage cache use mtime and size for 'item': %s", self._use_mtime_and_size_for_item_cache)
logger.debug("storage cache action logging: %s", self._debug_cache_actions)
if self._use_cache_subfolder_for_item is True or self._use_cache_subfolder_for_history is True or self._use_cache_subfolder_for_synctoken is True:
logger.info("storage cache subfolder: %r", self._get_collection_cache_folder())

View file

@ -74,6 +74,7 @@ class StorageBase(storage.BaseStorage):
_use_cache_subfolder_for_item: bool
_use_cache_subfolder_for_history: bool
_use_cache_subfolder_for_synctoken: bool
_use_mtime_and_size_for_item_cache: bool
_debug_cache_actions: bool
_folder_umask: str
_config_umask: int
@ -92,10 +93,12 @@ class StorageBase(storage.BaseStorage):
"storage", "use_cache_subfolder_for_history")
self._use_cache_subfolder_for_synctoken = configuration.get(
"storage", "use_cache_subfolder_for_synctoken")
self._use_mtime_and_size_for_item_cache = configuration.get(
"storage", "use_mtime_and_size_for_item_cache")
self._folder_umask = configuration.get(
"storage", "folder_umask")
self._debug_cache_actions = configuration.get(
"logging", "storage_cache_actions")
"logging", "storage_cache_actions_on_debug")
def _get_collection_root_folder(self) -> str:
return os.path.join(self._filesystem_folder, "collection-root")

View file

@ -73,6 +73,10 @@ class CollectionPartCache(CollectionBase):
_hash.update(raw_text)
return _hash.hexdigest()
@staticmethod
def _item_cache_mtime_and_size(size: int, raw_text: int) -> str:
return str(storage.CACHE_VERSION.decode()) + "size=" + str(size) + ";mtime=" + str(raw_text)
def _item_cache_content(self, item: radicale_item.Item) -> CacheContent:
return CacheContent(item.uid, item.etag, item.serialize(), item.name,
item.component_name, *item.time_range)
@ -80,8 +84,11 @@ class CollectionPartCache(CollectionBase):
def _store_item_cache(self, href: str, item: radicale_item.Item,
cache_hash: str = "") -> CacheContent:
if not cache_hash:
cache_hash = self._item_cache_hash(
item.serialize().encode(self._encoding))
if self._storage._use_mtime_and_size_for_item_cache is True:
raise RuntimeError("_store_item_cache called without cache_hash is not supported if [storage] use_mtime_and_size_for_item_cache is True")
else:
cache_hash = self._item_cache_hash(
item.serialize().encode(self._encoding))
cache_folder = self._storage._get_collection_cache_subfolder(self._filesystem_path, ".Radicale.cache", "item")
content = self._item_cache_content(item)
self._storage._makedirs_synced(cache_folder)
@ -96,12 +103,20 @@ class CollectionPartCache(CollectionBase):
def _load_item_cache(self, href: str, cache_hash: str
) -> Optional[CacheContent]:
cache_folder = self._storage._get_collection_cache_subfolder(self._filesystem_path, ".Radicale.cache", "item")
path = os.path.join(cache_folder, href)
try:
with open(os.path.join(cache_folder, href), "rb") as f:
with open(path, "rb") as f:
hash_, *remainder = pickle.load(f)
if hash_ and hash_ == cache_hash:
if self._storage._debug_cache_actions is True:
logger.debug("Item cache match : %r with hash %r", path, cache_hash)
return CacheContent(*remainder)
else:
if self._storage._debug_cache_actions is True:
logger.debug("Item cache no match : %r with hash %r", path, cache_hash)
except FileNotFoundError:
if self._storage._debug_cache_actions is True:
logger.debug("Item cache not found : %r with hash %r", path, cache_hash)
pass
except (pickle.UnpicklingError, ValueError) as e:
logger.warning("Failed to load item cache entry %r in %r: %s",

View file

@ -80,11 +80,18 @@ class CollectionPartGet(CollectionPartCache, CollectionPartLock,
raise
# The hash of the component in the file system. This is used to check,
# if the entry in the cache is still valid.
cache_hash = self._item_cache_hash(raw_text)
if self._storage._debug_cache_actions is True:
logger.debug("Check cache for: %r with hash %r", path, cache_hash)
if self._storage._use_mtime_and_size_for_item_cache is True:
cache_hash = self._item_cache_mtime_and_size(os.stat(path).st_size, os.stat(path).st_mtime_ns)
if self._storage._debug_cache_actions is True:
logger.debug("Item cache check for: %r with mtime and size %r", path, cache_hash)
else:
cache_hash = self._item_cache_hash(raw_text)
if self._storage._debug_cache_actions is True:
logger.debug("Item cache check for: %r with hash %r", path, cache_hash)
cache_content = self._load_item_cache(href, cache_hash)
if cache_content is None:
if self._storage._debug_cache_actions is True:
logger.debug("Item cache miss for: %r", path)
with self._acquire_cache_lock("item"):
# Lock the item cache to prevent multiple processes from
# generating the same data in parallel.
@ -101,6 +108,8 @@ class CollectionPartGet(CollectionPartCache, CollectionPartLock,
vobject_item, = vobject_items
temp_item = radicale_item.Item(
collection=self, vobject_item=vobject_item)
if self._storage._debug_cache_actions is True:
logger.debug("Item cache store for: %r", path)
cache_content = self._store_item_cache(
href, temp_item, cache_hash)
except Exception as e:
@ -115,6 +124,9 @@ class CollectionPartGet(CollectionPartCache, CollectionPartLock,
if not self._item_cache_cleaned:
self._item_cache_cleaned = True
self._clean_item_cache()
else:
if self._storage._debug_cache_actions is True:
logger.debug("Item cache hit for: %r", path)
last_modified = time.strftime(
"%a, %d %b %Y %H:%M:%S GMT",
time.gmtime(os.path.getmtime(path)))

View file

@ -41,19 +41,26 @@ class CollectionPartUpload(CollectionPartGet, CollectionPartCache,
raise pathutils.UnsafePathError(href)
path = pathutils.path_to_filesystem(self._filesystem_path, href)
try:
cache_hash = self._item_cache_hash(item.serialize().encode(self._encoding))
logger.debug("Store cache for: %r with hash %r", path, cache_hash)
self._store_item_cache(href, item, cache_hash)
with self._atomic_write(path, newline="") as fo: # type: ignore
f = cast(TextIO, fo)
f.write(item.serialize())
except Exception as e:
raise ValueError("Failed to store item %r in collection %r: %s" %
(href, self.path, e)) from e
# TODO: better fix for "mypy"
with self._atomic_write(path, newline="") as fo: # type: ignore
f = cast(TextIO, fo)
f.write(item.serialize())
# Clean the cache after the actual item is stored, or the cache entry
# will be removed again.
self._clean_item_cache()
# store cache file
if self._storage._use_mtime_and_size_for_item_cache is True:
cache_hash = self._item_cache_mtime_and_size(os.stat(path).st_size, os.stat(path).st_mtime_ns)
if self._storage._debug_cache_actions is True:
logger.debug("Item cache store for: %r with mtime and size %r", path, cache_hash)
else:
cache_hash = self._item_cache_hash(item.serialize().encode(self._encoding))
if self._storage._debug_cache_actions is True:
logger.debug("Item cache store for: %r with hash %r", path, cache_hash)
try:
self._store_item_cache(href, item, cache_hash)
except Exception as e:
raise ValueError("Failed to store item cache of %r in collection %r: %s" %
(href, self.path, e)) from e
# Track the change
self._update_history_etag(href, item)
self._clean_history()
@ -84,15 +91,11 @@ class CollectionPartUpload(CollectionPartGet, CollectionPartCache,
for item in items:
uid = item.uid
logger.debug("Store item from list with uid: '%s'" % uid)
try:
cache_content = self._item_cache_content(item)
except Exception as e:
raise ValueError(
"Failed to store item %r in temporary collection %r: %s" %
(uid, self.path, e)) from e
cache_content = self._item_cache_content(item)
for href in get_safe_free_hrefs(uid):
path = os.path.join(self._filesystem_path, href)
try:
f = open(os.path.join(self._filesystem_path, href),
f = open(path,
"w", newline="", encoding=self._encoding)
except OSError as e:
if (sys.platform != "win32" and e.errno == errno.EINVAL or
@ -104,14 +107,31 @@ class CollectionPartUpload(CollectionPartGet, CollectionPartCache,
else:
raise RuntimeError("No href found for item %r in temporary "
"collection %r" % (uid, self.path))
with f:
f.write(item.serialize())
f.flush()
self._storage._fsync(f)
with open(os.path.join(cache_folder, href), "wb") as fb:
try:
with f:
f.write(item.serialize())
f.flush()
self._storage._fsync(f)
except Exception as e:
raise ValueError(
"Failed to store item %r in temporary collection %r: %s" %
(uid, self.path, e)) from e
# store cache file
if self._storage._use_mtime_and_size_for_item_cache is True:
cache_hash = self._item_cache_mtime_and_size(os.stat(path).st_size, os.stat(path).st_mtime_ns)
if self._storage._debug_cache_actions is True:
logger.debug("Item cache store for: %r with mtime and size %r", path, cache_hash)
else:
cache_hash = self._item_cache_hash(item.serialize().encode(self._encoding))
logger.debug("Store cache for: %r with hash %r", fb.name, cache_hash)
pickle.dump(cache_content, fb)
if self._storage._debug_cache_actions is True:
logger.debug("Item cache store for: %r with hash %r", path, cache_hash)
path_cache = os.path.join(cache_folder, href)
if self._storage._debug_cache_actions is True:
logger.debug("Item cache store into: %r", path_cache)
with open(os.path.join(cache_folder, href), "wb") as fb:
pickle.dump((cache_hash, *cache_content), fb)
fb.flush()
self._storage._fsync(fb)
self._storage._sync_directory(cache_folder)

View file

@ -116,6 +116,22 @@ class TestMultiFileSystem(BaseTest):
assert answer1 == answer2
assert os.path.exists(os.path.join(cache_folder, "event1.ics"))
def test_item_cache_rebuild_mtime_and_size(self) -> None:
"""Delete the item cache and verify that it is rebuild."""
self.configure({"storage": {"use_mtime_and_size_for_item_cache": "True"}})
self.mkcalendar("/calendar.ics/")
event = get_file_content("event1.ics")
path = "/calendar.ics/event1.ics"
self.put(path, event)
_, answer1 = self.get(path)
cache_folder = os.path.join(self.colpath, "collection-root",
"calendar.ics", ".Radicale.cache", "item")
assert os.path.exists(os.path.join(cache_folder, "event1.ics"))
shutil.rmtree(cache_folder)
_, answer2 = self.get(path)
assert answer1 == answer2
assert os.path.exists(os.path.join(cache_folder, "event1.ics"))
def test_put_whole_calendar_uids_used_as_file_names(self) -> None:
"""Test if UIDs are used as file names."""
_TestBaseRequests.test_put_whole_calendar(