mod_storage_internal: Use a binary search for time based ranges

Iterating over an entire archive to find a few items in the far end from
where iteration started is expensive, and probably more expensive with
the lazy-loading of items added in the previous commit.

Since we can now efficiently read items in random order, we can now use
a binary search to find a better starting point for iteration.
This commit is contained in:
Kim Alvefur 2021-05-12 01:32:03 +02:00
parent f4be79f6d6
commit 7259b41e23

View file

@ -122,6 +122,31 @@ function archive:append(username, key, value, when, with)
return key;
end
local function binary_search(haystack, test, min, max)
if min == nil then
min = 1;
end
if max == nil then
max = #haystack;
end
local floor = math.floor;
while min < max do
local mid = floor((max + min) / 2);
local result = test(haystack[mid]);
if result < 0 then
max = mid;
elseif result > 0 then
min = mid + 1;
else
return mid, haystack[mid];
end
end
return min, nil;
end
function archive:find(username, query)
local list, err = datamanager.list_open(username, host, self.store);
if not list then
@ -171,16 +196,38 @@ function archive:find(username, query)
end, iter);
end
if query.start then
iter = it.filter(function(item)
local when = item.when or datetime.parse(item.attr.stamp);
return when >= query.start;
end, iter);
if not query.reverse then
local wi, exact = binary_search(list, function(item)
local when = item.when or datetime.parse(item.attr.stamp);
return query.start - when;
end);
if exact then
i = wi - 1;
elseif wi then
i = wi;
end
else
iter = it.filter(function(item)
local when = item.when or datetime.parse(item.attr.stamp);
return when >= query.start;
end, iter);
end
end
if query["end"] then
iter = it.filter(function(item)
local when = item.when or datetime.parse(item.attr.stamp);
return when <= query["end"];
end, iter);
if query.reverse then
local wi = binary_search(list, function(item)
local when = item.when or datetime.parse(item.attr.stamp);
return query["end"] - when;
end);
if wi then
i = wi + 1;
end
else
iter = it.filter(function(item)
local when = item.when or datetime.parse(item.attr.stamp);
return when <= query["end"];
end, iter);
end
end
if query.after then
local found = false;