Merge pull request #1655 from pbiering/item-cache-mtime-size

Item cache mtime size option
This commit is contained in:
Peter Bieringer 2024-12-15 11:59:33 +00:00 committed by GitHub
commit c33e96c5a3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 125 additions and 37 deletions

View file

@ -13,8 +13,10 @@
* Improve: log important module versions on startup * Improve: log important module versions on startup
* Improve: auth.ldap config shown on startup, terminate in case no password is supplied for bind user * Improve: auth.ldap config shown on startup, terminate in case no password is supplied for bind user
* Add: option [auth] uc_username for uppercase conversion (similar to existing lc_username) * Add: option [auth] uc_username for uppercase conversion (similar to existing lc_username)
* Add: option [debug] storage_cache_action for conditional logging * Add: option [logging] storage_cache_action_on_debug for conditional logging
* Fix: set PRODID on collection upload (instead of vobject is inserting default one) * Fix: set PRODID on collection upload (instead of vobject is inserting default one)
* Add: option [storage] use_mtime_and_size_for_item_cache for changing cache lookup from SHA256 to mtime_ns + size
* Fix: buggy cache file content creation on collection upload
## 3.3.1 ## 3.3.1

View file

@ -1050,6 +1050,16 @@ Default: `False`
Note: use only on single-instance setup, will break consistency with client in multi-instance setup Note: use only on single-instance setup, will break consistency with client in multi-instance setup
##### use_mtime_and_size_for_item_cache
Use last modifiction time (nanoseconds) and size (bytes) for 'item' cache instead of SHA256 (improves speed)
Default: `False`
Note: check used filesystem mtime precision before enabling
Note: conversion is done on access, bulk conversion can be done offline using storage verification option `radicale --verify-storage`
##### folder_umask ##### folder_umask
Use configured umask for folder creation (not applicable for OS Windows) Use configured umask for folder creation (not applicable for OS Windows)
@ -1170,9 +1180,9 @@ Log rights rule which doesn't match on level=debug
Default: `False` Default: `False`
##### #storage_cache_actions ##### storage_cache_actions_on_debug
Log storage cache actions Log storage cache actions on level=debug
Default: `False` Default: `False`

9
config
View file

@ -155,6 +155,11 @@
# Note: use only on single-instance setup, will break consistency with client in multi-instance setup # Note: use only on single-instance setup, will break consistency with client in multi-instance setup
#use_cache_subfolder_for_synctoken = False #use_cache_subfolder_for_synctoken = False
# Use last modifiction time (nanoseconds) and size (bytes) for 'item' cache instead of SHA256 (improves speed)
# Note: check used filesystem mtime precision before enabling
# Note: conversion is done on access, bulk conversion can be done offline using storage verification option: radicale --verify-storage
#use_mtime_and_size_for_item_cache=False
# Use configured umask for folder creation (not applicable for OS Windows) # Use configured umask for folder creation (not applicable for OS Windows)
# Useful value: 0077 | 0027 | 0007 | 0022 # Useful value: 0077 | 0027 | 0007 | 0022
#folder_umask = (system default, usual 0022) #folder_umask = (system default, usual 0022)
@ -226,8 +231,8 @@
# Log rights rule which doesn't match on level=debug # Log rights rule which doesn't match on level=debug
#rights_rule_doesnt_match_on_debug = False #rights_rule_doesnt_match_on_debug = False
# Log storage cache actions # Log storage cache actions on level=debug
#storage_cache_actions = False #storage_cache_actions_on_debug = False
[headers] [headers]

View file

@ -299,6 +299,10 @@ DEFAULT_CONFIG_SCHEMA: types.CONFIG_SCHEMA = OrderedDict([
"value": "False", "value": "False",
"help": "use subfolder 'collection-cache' for 'sync-token' cache file structure instead of inside collection folder", "help": "use subfolder 'collection-cache' for 'sync-token' cache file structure instead of inside collection folder",
"type": bool}), "type": bool}),
("use_mtime_and_size_for_item_cache", {
"value": "False",
"help": "use mtime and file size instead of SHA256 for 'item' cache (improves speed)",
"type": bool}),
("folder_umask", { ("folder_umask", {
"value": "", "value": "",
"help": "umask for folder creation (empty: system default)", "help": "umask for folder creation (empty: system default)",
@ -376,7 +380,7 @@ DEFAULT_CONFIG_SCHEMA: types.CONFIG_SCHEMA = OrderedDict([
"value": "False", "value": "False",
"help": "log rights rules which doesn't match on level=debug", "help": "log rights rules which doesn't match on level=debug",
"type": bool}), "type": bool}),
("storage_cache_actions", { ("storage_cache_actions_on_debug", {
"value": "False", "value": "False",
"help": "log storage cache action on level=debug", "help": "log storage cache action on level=debug",
"type": bool}), "type": bool}),

View file

@ -97,6 +97,7 @@ class Storage(
logger.info("storage cache subfolder usage for 'item': %s", self._use_cache_subfolder_for_item) logger.info("storage cache subfolder usage for 'item': %s", self._use_cache_subfolder_for_item)
logger.info("storage cache subfolder usage for 'history': %s", self._use_cache_subfolder_for_history) logger.info("storage cache subfolder usage for 'history': %s", self._use_cache_subfolder_for_history)
logger.info("storage cache subfolder usage for 'sync-token': %s", self._use_cache_subfolder_for_synctoken) logger.info("storage cache subfolder usage for 'sync-token': %s", self._use_cache_subfolder_for_synctoken)
logger.info("storage cache use mtime and size for 'item': %s", self._use_mtime_and_size_for_item_cache)
logger.debug("storage cache action logging: %s", self._debug_cache_actions) logger.debug("storage cache action logging: %s", self._debug_cache_actions)
if self._use_cache_subfolder_for_item is True or self._use_cache_subfolder_for_history is True or self._use_cache_subfolder_for_synctoken is True: if self._use_cache_subfolder_for_item is True or self._use_cache_subfolder_for_history is True or self._use_cache_subfolder_for_synctoken is True:
logger.info("storage cache subfolder: %r", self._get_collection_cache_folder()) logger.info("storage cache subfolder: %r", self._get_collection_cache_folder())

View file

@ -74,6 +74,7 @@ class StorageBase(storage.BaseStorage):
_use_cache_subfolder_for_item: bool _use_cache_subfolder_for_item: bool
_use_cache_subfolder_for_history: bool _use_cache_subfolder_for_history: bool
_use_cache_subfolder_for_synctoken: bool _use_cache_subfolder_for_synctoken: bool
_use_mtime_and_size_for_item_cache: bool
_debug_cache_actions: bool _debug_cache_actions: bool
_folder_umask: str _folder_umask: str
_config_umask: int _config_umask: int
@ -92,10 +93,12 @@ class StorageBase(storage.BaseStorage):
"storage", "use_cache_subfolder_for_history") "storage", "use_cache_subfolder_for_history")
self._use_cache_subfolder_for_synctoken = configuration.get( self._use_cache_subfolder_for_synctoken = configuration.get(
"storage", "use_cache_subfolder_for_synctoken") "storage", "use_cache_subfolder_for_synctoken")
self._use_mtime_and_size_for_item_cache = configuration.get(
"storage", "use_mtime_and_size_for_item_cache")
self._folder_umask = configuration.get( self._folder_umask = configuration.get(
"storage", "folder_umask") "storage", "folder_umask")
self._debug_cache_actions = configuration.get( self._debug_cache_actions = configuration.get(
"logging", "storage_cache_actions") "logging", "storage_cache_actions_on_debug")
def _get_collection_root_folder(self) -> str: def _get_collection_root_folder(self) -> str:
return os.path.join(self._filesystem_folder, "collection-root") return os.path.join(self._filesystem_folder, "collection-root")

View file

@ -73,6 +73,10 @@ class CollectionPartCache(CollectionBase):
_hash.update(raw_text) _hash.update(raw_text)
return _hash.hexdigest() return _hash.hexdigest()
@staticmethod
def _item_cache_mtime_and_size(size: int, raw_text: int) -> str:
return str(storage.CACHE_VERSION.decode()) + "size=" + str(size) + ";mtime=" + str(raw_text)
def _item_cache_content(self, item: radicale_item.Item) -> CacheContent: def _item_cache_content(self, item: radicale_item.Item) -> CacheContent:
return CacheContent(item.uid, item.etag, item.serialize(), item.name, return CacheContent(item.uid, item.etag, item.serialize(), item.name,
item.component_name, *item.time_range) item.component_name, *item.time_range)
@ -80,8 +84,11 @@ class CollectionPartCache(CollectionBase):
def _store_item_cache(self, href: str, item: radicale_item.Item, def _store_item_cache(self, href: str, item: radicale_item.Item,
cache_hash: str = "") -> CacheContent: cache_hash: str = "") -> CacheContent:
if not cache_hash: if not cache_hash:
cache_hash = self._item_cache_hash( if self._storage._use_mtime_and_size_for_item_cache is True:
item.serialize().encode(self._encoding)) raise RuntimeError("_store_item_cache called without cache_hash is not supported if [storage] use_mtime_and_size_for_item_cache is True")
else:
cache_hash = self._item_cache_hash(
item.serialize().encode(self._encoding))
cache_folder = self._storage._get_collection_cache_subfolder(self._filesystem_path, ".Radicale.cache", "item") cache_folder = self._storage._get_collection_cache_subfolder(self._filesystem_path, ".Radicale.cache", "item")
content = self._item_cache_content(item) content = self._item_cache_content(item)
self._storage._makedirs_synced(cache_folder) self._storage._makedirs_synced(cache_folder)
@ -96,12 +103,20 @@ class CollectionPartCache(CollectionBase):
def _load_item_cache(self, href: str, cache_hash: str def _load_item_cache(self, href: str, cache_hash: str
) -> Optional[CacheContent]: ) -> Optional[CacheContent]:
cache_folder = self._storage._get_collection_cache_subfolder(self._filesystem_path, ".Radicale.cache", "item") cache_folder = self._storage._get_collection_cache_subfolder(self._filesystem_path, ".Radicale.cache", "item")
path = os.path.join(cache_folder, href)
try: try:
with open(os.path.join(cache_folder, href), "rb") as f: with open(path, "rb") as f:
hash_, *remainder = pickle.load(f) hash_, *remainder = pickle.load(f)
if hash_ and hash_ == cache_hash: if hash_ and hash_ == cache_hash:
if self._storage._debug_cache_actions is True:
logger.debug("Item cache match : %r with hash %r", path, cache_hash)
return CacheContent(*remainder) return CacheContent(*remainder)
else:
if self._storage._debug_cache_actions is True:
logger.debug("Item cache no match : %r with hash %r", path, cache_hash)
except FileNotFoundError: except FileNotFoundError:
if self._storage._debug_cache_actions is True:
logger.debug("Item cache not found : %r with hash %r", path, cache_hash)
pass pass
except (pickle.UnpicklingError, ValueError) as e: except (pickle.UnpicklingError, ValueError) as e:
logger.warning("Failed to load item cache entry %r in %r: %s", logger.warning("Failed to load item cache entry %r in %r: %s",

View file

@ -80,11 +80,18 @@ class CollectionPartGet(CollectionPartCache, CollectionPartLock,
raise raise
# The hash of the component in the file system. This is used to check, # The hash of the component in the file system. This is used to check,
# if the entry in the cache is still valid. # if the entry in the cache is still valid.
cache_hash = self._item_cache_hash(raw_text) if self._storage._use_mtime_and_size_for_item_cache is True:
if self._storage._debug_cache_actions is True: cache_hash = self._item_cache_mtime_and_size(os.stat(path).st_size, os.stat(path).st_mtime_ns)
logger.debug("Check cache for: %r with hash %r", path, cache_hash) if self._storage._debug_cache_actions is True:
logger.debug("Item cache check for: %r with mtime and size %r", path, cache_hash)
else:
cache_hash = self._item_cache_hash(raw_text)
if self._storage._debug_cache_actions is True:
logger.debug("Item cache check for: %r with hash %r", path, cache_hash)
cache_content = self._load_item_cache(href, cache_hash) cache_content = self._load_item_cache(href, cache_hash)
if cache_content is None: if cache_content is None:
if self._storage._debug_cache_actions is True:
logger.debug("Item cache miss for: %r", path)
with self._acquire_cache_lock("item"): with self._acquire_cache_lock("item"):
# Lock the item cache to prevent multiple processes from # Lock the item cache to prevent multiple processes from
# generating the same data in parallel. # generating the same data in parallel.
@ -101,6 +108,8 @@ class CollectionPartGet(CollectionPartCache, CollectionPartLock,
vobject_item, = vobject_items vobject_item, = vobject_items
temp_item = radicale_item.Item( temp_item = radicale_item.Item(
collection=self, vobject_item=vobject_item) collection=self, vobject_item=vobject_item)
if self._storage._debug_cache_actions is True:
logger.debug("Item cache store for: %r", path)
cache_content = self._store_item_cache( cache_content = self._store_item_cache(
href, temp_item, cache_hash) href, temp_item, cache_hash)
except Exception as e: except Exception as e:
@ -115,6 +124,9 @@ class CollectionPartGet(CollectionPartCache, CollectionPartLock,
if not self._item_cache_cleaned: if not self._item_cache_cleaned:
self._item_cache_cleaned = True self._item_cache_cleaned = True
self._clean_item_cache() self._clean_item_cache()
else:
if self._storage._debug_cache_actions is True:
logger.debug("Item cache hit for: %r", path)
last_modified = time.strftime( last_modified = time.strftime(
"%a, %d %b %Y %H:%M:%S GMT", "%a, %d %b %Y %H:%M:%S GMT",
time.gmtime(os.path.getmtime(path))) time.gmtime(os.path.getmtime(path)))

View file

@ -41,19 +41,26 @@ class CollectionPartUpload(CollectionPartGet, CollectionPartCache,
raise pathutils.UnsafePathError(href) raise pathutils.UnsafePathError(href)
path = pathutils.path_to_filesystem(self._filesystem_path, href) path = pathutils.path_to_filesystem(self._filesystem_path, href)
try: try:
cache_hash = self._item_cache_hash(item.serialize().encode(self._encoding)) with self._atomic_write(path, newline="") as fo: # type: ignore
logger.debug("Store cache for: %r with hash %r", path, cache_hash) f = cast(TextIO, fo)
self._store_item_cache(href, item, cache_hash) f.write(item.serialize())
except Exception as e: except Exception as e:
raise ValueError("Failed to store item %r in collection %r: %s" % raise ValueError("Failed to store item %r in collection %r: %s" %
(href, self.path, e)) from e (href, self.path, e)) from e
# TODO: better fix for "mypy" # store cache file
with self._atomic_write(path, newline="") as fo: # type: ignore if self._storage._use_mtime_and_size_for_item_cache is True:
f = cast(TextIO, fo) cache_hash = self._item_cache_mtime_and_size(os.stat(path).st_size, os.stat(path).st_mtime_ns)
f.write(item.serialize()) if self._storage._debug_cache_actions is True:
# Clean the cache after the actual item is stored, or the cache entry logger.debug("Item cache store for: %r with mtime and size %r", path, cache_hash)
# will be removed again. else:
self._clean_item_cache() cache_hash = self._item_cache_hash(item.serialize().encode(self._encoding))
if self._storage._debug_cache_actions is True:
logger.debug("Item cache store for: %r with hash %r", path, cache_hash)
try:
self._store_item_cache(href, item, cache_hash)
except Exception as e:
raise ValueError("Failed to store item cache of %r in collection %r: %s" %
(href, self.path, e)) from e
# Track the change # Track the change
self._update_history_etag(href, item) self._update_history_etag(href, item)
self._clean_history() self._clean_history()
@ -84,15 +91,11 @@ class CollectionPartUpload(CollectionPartGet, CollectionPartCache,
for item in items: for item in items:
uid = item.uid uid = item.uid
logger.debug("Store item from list with uid: '%s'" % uid) logger.debug("Store item from list with uid: '%s'" % uid)
try: cache_content = self._item_cache_content(item)
cache_content = self._item_cache_content(item)
except Exception as e:
raise ValueError(
"Failed to store item %r in temporary collection %r: %s" %
(uid, self.path, e)) from e
for href in get_safe_free_hrefs(uid): for href in get_safe_free_hrefs(uid):
path = os.path.join(self._filesystem_path, href)
try: try:
f = open(os.path.join(self._filesystem_path, href), f = open(path,
"w", newline="", encoding=self._encoding) "w", newline="", encoding=self._encoding)
except OSError as e: except OSError as e:
if (sys.platform != "win32" and e.errno == errno.EINVAL or if (sys.platform != "win32" and e.errno == errno.EINVAL or
@ -104,14 +107,31 @@ class CollectionPartUpload(CollectionPartGet, CollectionPartCache,
else: else:
raise RuntimeError("No href found for item %r in temporary " raise RuntimeError("No href found for item %r in temporary "
"collection %r" % (uid, self.path)) "collection %r" % (uid, self.path))
with f:
f.write(item.serialize()) try:
f.flush() with f:
self._storage._fsync(f) f.write(item.serialize())
with open(os.path.join(cache_folder, href), "wb") as fb: f.flush()
self._storage._fsync(f)
except Exception as e:
raise ValueError(
"Failed to store item %r in temporary collection %r: %s" %
(uid, self.path, e)) from e
# store cache file
if self._storage._use_mtime_and_size_for_item_cache is True:
cache_hash = self._item_cache_mtime_and_size(os.stat(path).st_size, os.stat(path).st_mtime_ns)
if self._storage._debug_cache_actions is True:
logger.debug("Item cache store for: %r with mtime and size %r", path, cache_hash)
else:
cache_hash = self._item_cache_hash(item.serialize().encode(self._encoding)) cache_hash = self._item_cache_hash(item.serialize().encode(self._encoding))
logger.debug("Store cache for: %r with hash %r", fb.name, cache_hash) if self._storage._debug_cache_actions is True:
pickle.dump(cache_content, fb) logger.debug("Item cache store for: %r with hash %r", path, cache_hash)
path_cache = os.path.join(cache_folder, href)
if self._storage._debug_cache_actions is True:
logger.debug("Item cache store into: %r", path_cache)
with open(os.path.join(cache_folder, href), "wb") as fb:
pickle.dump((cache_hash, *cache_content), fb)
fb.flush() fb.flush()
self._storage._fsync(fb) self._storage._fsync(fb)
self._storage._sync_directory(cache_folder) self._storage._sync_directory(cache_folder)

View file

@ -116,6 +116,22 @@ class TestMultiFileSystem(BaseTest):
assert answer1 == answer2 assert answer1 == answer2
assert os.path.exists(os.path.join(cache_folder, "event1.ics")) assert os.path.exists(os.path.join(cache_folder, "event1.ics"))
def test_item_cache_rebuild_mtime_and_size(self) -> None:
"""Delete the item cache and verify that it is rebuild."""
self.configure({"storage": {"use_mtime_and_size_for_item_cache": "True"}})
self.mkcalendar("/calendar.ics/")
event = get_file_content("event1.ics")
path = "/calendar.ics/event1.ics"
self.put(path, event)
_, answer1 = self.get(path)
cache_folder = os.path.join(self.colpath, "collection-root",
"calendar.ics", ".Radicale.cache", "item")
assert os.path.exists(os.path.join(cache_folder, "event1.ics"))
shutil.rmtree(cache_folder)
_, answer2 = self.get(path)
assert answer1 == answer2
assert os.path.exists(os.path.join(cache_folder, "event1.ics"))
def test_put_whole_calendar_uids_used_as_file_names(self) -> None: def test_put_whole_calendar_uids_used_as_file_names(self) -> None:
"""Test if UIDs are used as file names.""" """Test if UIDs are used as file names."""
_TestBaseRequests.test_put_whole_calendar( _TestBaseRequests.test_put_whole_calendar(