prosody/plugins/mod_mam/mod_mam.lua
2025-03-23 19:59:45 +01:00

614 lines
20 KiB
Lua

-- Prosody IM
-- Copyright (C) 2008-2017 Matthew Wild
-- Copyright (C) 2008-2017 Waqas Hussain
-- Copyright (C) 2011-2021 Kim Alvefur
--
-- This project is MIT/X11 licensed. Please see the
-- COPYING file in the source package for more information.
--
-- XEP-0313: Message Archive Management for Prosody
--
local xmlns_mam = "urn:xmpp:mam:2";
local xmlns_mam_ext = "urn:xmpp:mam:2#extended";
local xmlns_delay = "urn:xmpp:delay";
local xmlns_forward = "urn:xmpp:forward:0";
local xmlns_st_id = "urn:xmpp:sid:0";
local um = require "prosody.core.usermanager";
local st = require "prosody.util.stanza";
local rsm = require "prosody.util.rsm";
local get_prefs = module:require"mamprefs".get;
local set_prefs = module:require"mamprefs".set;
local prefs_to_stanza = module:require"mamprefsxml".tostanza;
local prefs_from_stanza = module:require"mamprefsxml".fromstanza;
local jid_bare = require "prosody.util.jid".bare;
local jid_split = require "prosody.util.jid".split;
local jid_resource = require "prosody.util.jid".resource;
local jid_prepped_split = require "prosody.util.jid".prepped_split;
local dataform = require "prosody.util.dataforms".new;
local get_form_type = require "prosody.util.dataforms".get_type;
local host = module.host;
local rm_load_roster = require "prosody.core.rostermanager".load_roster;
local is_stanza = st.is_stanza;
local tostring = tostring;
local time_now = require "prosody.util.time".now;
local m_min = math.min;
local timestamp, datestamp = import( "util.datetime", "datetime", "date");
local default_max_items, max_max_items = 20, module:get_option_integer("max_archive_query_results", 50, 0);
local strip_tags = module:get_option_set("dont_archive_namespaces", { "http://jabber.org/protocol/chatstates" });
local archive_store = module:get_option_string("archive_store", "archive");
local archive = module:open_store(archive_store, "archive");
local cleanup_after = module:get_option_period("archive_expires_after", "1w");
local archive_item_limit = module:get_option_integer("storage_archive_item_limit", archive.caps and archive.caps.quota or 1000, 0);
local archive_truncate = math.floor(archive_item_limit * 0.99);
if not archive.find then
error("mod_"..(archive._provided_by or archive.name and "storage_"..archive.name).." does not support archiving\n"
.."See https://prosody.im/doc/storage and https://prosody.im/doc/archiving for more information");
end
local use_total = module:get_option_boolean("mam_include_total", true);
function schedule_cleanup(_username, _date) -- luacheck: ignore 212
-- Called to make a note of which users have messages on which days, which in
-- turn is used to optimize the message expiry routine.
--
-- This noop is conditionally replaced later depending on retention settings
-- and storage backend capabilities.
end
-- Handle prefs.
module:hook("iq/self/"..xmlns_mam..":prefs", function(event)
local origin, stanza = event.origin, event.stanza;
local user = origin.username;
if stanza.attr.type == "set" then
local new_prefs = stanza:get_child("prefs", xmlns_mam);
local prefs = prefs_from_stanza(new_prefs);
local ok, err = set_prefs(user, prefs);
if not ok then
origin.send(st.error_reply(stanza, "cancel", "internal-server-error", "Error storing preferences: "..tostring(err)));
return true;
end
end
local prefs = prefs_to_stanza(get_prefs(user, true));
local reply = st.reply(stanza):add_child(prefs);
origin.send(reply);
return true;
end);
local query_form = dataform {
{ name = "FORM_TYPE"; type = "hidden"; value = xmlns_mam };
{ name = "with"; type = "jid-single" };
{ name = "start"; type = "text-single"; datatype = "xs:dateTime" };
{ name = "end"; type = "text-single"; datatype = "xs:dateTime" };
};
if archive.caps and archive.caps.full_id_range then
table.insert(query_form, { name = "before-id"; type = "text-single"; });
table.insert(query_form, { name = "after-id"; type = "text-single"; });
end
if archive.caps and archive.caps.ids then
table.insert(query_form, { name = "ids"; type = "list-multi"; });
end
-- Serve form
module:hook("iq-get/self/"..xmlns_mam..":query", function(event)
local origin, stanza = event.origin, event.stanza;
get_prefs(origin.username, true);
origin.send(st.reply(stanza):query(xmlns_mam):add_child(query_form:form()));
return true;
end);
-- Handle archive queries
module:hook("iq-set/self/"..xmlns_mam..":query", function(event)
local origin, stanza = event.origin, event.stanza;
local query = stanza.tags[1];
local qid = query.attr.queryid;
origin.mam_requested = true;
get_prefs(origin.username, true);
-- Search query parameters
local qwith, qstart, qend, qbefore, qafter, qids;
local form = query:get_child("x", "jabber:x:data");
if form then
local form_type, err = get_form_type(form);
if not form_type then
origin.send(st.error_reply(stanza, "modify", "bad-request", "Invalid dataform: "..err));
return true;
elseif form_type ~= xmlns_mam then
origin.send(st.error_reply(stanza, "modify", "bad-request", "Unexpected FORM_TYPE, expected '"..xmlns_mam.."'"));
return true;
end
form, err = query_form:data(form);
if err then
origin.send(st.error_reply(stanza, "modify", "bad-request", select(2, next(err))));
return true;
end
qwith, qstart, qend = form["with"], form["start"], form["end"];
qbefore, qafter = form["before-id"], form["after-id"];
qids = form["ids"];
qwith = qwith and jid_bare(qwith); -- dataforms does jidprep
end
-- RSM stuff
local qset = rsm.get(query);
local qmax = m_min(qset and qset.max or default_max_items, max_max_items);
local reverse = qset and qset.before or false;
local before, after = qset and qset.before or qbefore, qset and qset.after or qafter;
if type(before) ~= "string" then before = nil; end
-- A reverse query needs to be flipped
local flip = reverse;
-- A flip-page query needs to be the opposite of that.
if query:get_child("flip-page") then flip = not flip end
module:log("debug", "Archive query by %s id=%s with=%s when=%s...%s rsm=%q",
origin.username,
qid or stanza.attr.id,
qwith or "*",
qstart and timestamp(qstart) or "",
qend and timestamp(qend) or "",
qset);
-- Load all the data!
local data, err = archive:find(origin.username, {
start = qstart; ["end"] = qend; -- Time range
with = qwith;
limit = qmax == 0 and 0 or qmax + 1;
before = before; after = after;
ids = qids;
reverse = reverse;
total = use_total or qmax == 0;
});
if not data then
module:log("debug", "Archive query id=%s failed: %s", qid or stanza.attr.id, err);
if err == "item-not-found" then
origin.send(st.error_reply(stanza, "modify", "item-not-found"));
else
origin.send(st.error_reply(stanza, "cancel", "internal-server-error"));
end
return true;
end
local total = tonumber(err);
local msg_reply_attr = { to = stanza.attr.from, from = stanza.attr.to };
local results = {};
-- Wrap it in stuff and deliver
local first, last;
local count = 0;
local complete = "true";
for id, item, when in data do
count = count + 1;
if count > qmax then
-- We requested qmax+1 items. If that many items are retrieved then
-- there are more results to page through, so:
complete = nil;
break;
end
local fwd_st = st.message(msg_reply_attr)
:tag("result", { xmlns = xmlns_mam, queryid = qid, id = id })
:tag("forwarded", { xmlns = xmlns_forward })
:tag("delay", { xmlns = xmlns_delay, stamp = timestamp(when) }):up();
if not is_stanza(item) then
item = st.deserialize(item);
end
item.attr.xmlns = "jabber:client";
fwd_st:add_child(item);
if not first then first = id; end
last = id;
if flip then
results[count] = fwd_st;
else
origin.send(fwd_st);
end
end
if flip then
for i = #results, 1, -1 do
origin.send(results[i]);
end
end
if reverse then
first, last = last, first;
end
origin.send(st.reply(stanza)
:tag("fin", { xmlns = xmlns_mam, complete = complete })
:add_child(rsm.generate {
first = first, last = last, count = total }));
-- That's all folks!
module:log("debug", "Archive query id=%s completed, %d items returned", qid or stanza.attr.id, complete and count or count - 1);
return true;
end);
module:hook("iq-get/self/"..xmlns_mam..":metadata", function (event)
local origin, stanza = event.origin, event.stanza;
local reply = st.reply(stanza):tag("metadata", { xmlns = xmlns_mam });
do
local first = archive:find(origin.username, { limit = 1 });
if not first then
origin.send(st.error_reply(stanza, "cancel", "internal-server-error"));
return true;
end
for id, _, when in first do
reply:tag("start", { id = id, timestamp = timestamp(when) }):up();
end
end
do
local last = archive:find(origin.username, { limit = 1, reverse = true });
if not last then
origin.send(st.error_reply(stanza, "cancel", "internal-server-error"));
return true;
end
for id, _, when in last do
reply:tag("end", { id = id, timestamp = timestamp(when) }):up();
end
end
origin.send(reply);
return true;
end);
local function has_in_roster(user, who)
local roster = rm_load_roster(user, host);
module:log("debug", "%s has %s in roster? %s", user, who, roster[who] and "yes" or "no");
return roster[who];
end
local function shall_store(user, who)
-- TODO Cache this?
if not um.user_exists(user, host) then
module:log("debug", "%s@%s does not exist", user, host)
return false;
end
local prefs = get_prefs(user);
local rule = prefs[who];
module:log("debug", "%s's rule for %s is %s", user, who, rule);
if rule ~= nil then
return rule;
end
-- Below could be done by a metatable
local default = prefs[false];
module:log("debug", "%s's default rule is %s", user, default);
if default == "roster" then
return has_in_roster(user, who);
end
return default;
end
local function strip_stanza_id(stanza, user)
if stanza:get_child("stanza-id", xmlns_st_id) then
stanza = st.clone(stanza);
stanza:maptags(function (tag)
if tag.name == "stanza-id" and tag.attr.xmlns == xmlns_st_id then
local by_user, by_host, res = jid_prepped_split(tag.attr.by);
if not res and by_host == host and by_user == user then
return nil;
end
end
return tag;
end);
end
return stanza;
end
local function should_store(stanza) --> boolean, reason: string
local st_type = stanza.attr.type or "normal";
-- FIXME pass direction of stanza and use that along with bare/full JID addressing
-- for more accurate MUC / type=groupchat check
if st_type == "headline" then
-- Headline messages are ephemeral by definition
return false, "headline";
end
if st_type == "error" then
-- Errors not sent sent from a local client
-- Why would a client send an error anyway?
if jid_resource(stanza.attr.to) then
-- Store delivery failure notifications so you know if your own messages
-- were not delivered.
return true, "bounce";
else
-- Skip errors for messages that come from your account, such as PEP
-- notifications.
return false, "bounce";
end
end
if st_type == "groupchat" then
-- MUC messages always go to the full JID, usually archived by the MUC
return false, "groupchat";
end
if stanza:get_child("no-store", "urn:xmpp:hints")
or stanza:get_child("no-permanent-store", "urn:xmpp:hints") then
return false, "hint";
end
if stanza:get_child("store", "urn:xmpp:hints") then
return true, "hint";
end
if stanza:get_child("body") then
return true, "body";
end
if stanza:get_child("subject") then
-- XXX Who would send a message with a subject but without a body?
return true, "subject";
end
if stanza:get_child("encryption", "urn:xmpp:eme:0") then
-- Since we can't know what an encrypted message contains, we assume it's important
-- XXX Experimental XEP
return true, "encrypted";
end
if stanza:get_child(nil, "urn:xmpp:receipts") then
-- If it's important enough to ask for a receipt then it's important enough to archive
-- and the same applies to the receipt
return true, "receipt";
end
if stanza:get_child(nil, "urn:xmpp:chat-markers:0") then
return true, "marker";
end
if stanza:get_child("x", "jabber:x:conference")
or stanza:find("{http://jabber.org/protocol/muc#user}x/invite") then
return true, "invite";
end
if stanza:get_child(nil, "urn:xmpp:jingle-message:0") or stanza:get_child(nil, "urn:xmpp:jingle-message:1") then
-- XXX Experimental XEP
return true, "jingle call";
end
-- The IM-NG thing to do here would be to return `not st_to_full`
-- One day ...
return false, "default";
end
module:hook("archive-should-store", function (event)
local should, why = should_store(event.stanza);
event.reason = why;
return should;
end, -1)
-- Handle messages
local function message_handler(event, c2s)
local origin, stanza = event.origin, event.stanza;
local log = c2s and origin.log or module._log;
local orig_from = stanza.attr.from;
local orig_to = stanza.attr.to or orig_from;
-- Stanza without 'to' are treated as if it was to their own bare jid
-- Whose storage do we put it in?
local store_user = c2s and origin.username or jid_split(orig_to);
-- And who are they chatting with?
local with = jid_bare(c2s and orig_to or orig_from);
-- Filter out <stanza-id> that claim to be from us
event.stanza = strip_stanza_id(stanza, store_user);
local event_payload = { stanza = stanza; session = origin };
local should = module:fire_event("archive-should-store", event_payload);
local why = event_payload.reason;
if not should then
log("debug", "Not archiving stanza: %s (%s)", stanza:top_tag(), event_payload.reason);
return;
end
local clone_for_storage;
if not strip_tags:empty() then
clone_for_storage = st.clone(stanza);
clone_for_storage:maptags(function (tag)
if strip_tags:contains(tag.attr.xmlns) then
return nil;
else
return tag;
end
end);
if #clone_for_storage.tags == 0 then
log("debug", "Not archiving stanza: %s (empty when stripped)", stanza:top_tag());
return;
end
else
clone_for_storage = stanza;
end
-- Check with the users preferences
if shall_store(store_user, with) then
log("debug", "Archiving stanza: %s (%s)", stanza:top_tag(), why);
-- And stash it
local time = time_now();
local ok, err = archive:append(store_user, nil, clone_for_storage, time, with);
if not ok and err == "quota-limit" then
if cleanup_after ~= math.huge then
module:log("debug", "User '%s' over quota, cleaning archive", store_user);
local cleaned = archive:delete(store_user, {
["end"] = (os.time() - cleanup_after);
});
if cleaned then
ok, err = archive:append(store_user, nil, clone_for_storage, time, with);
end
end
if not ok and (archive.caps and archive.caps.truncate) then
module:log("debug", "User '%s' over quota, truncating archive", store_user);
local truncated = archive:delete(store_user, {
truncate = archive_truncate;
});
if truncated then
ok, err = archive:append(store_user, nil, clone_for_storage, time, with);
end
end
end
if ok then
local clone_for_other_handlers = st.clone(stanza);
local id = ok;
clone_for_other_handlers:tag("stanza-id", { xmlns = xmlns_st_id, by = store_user.."@"..host, id = id }):up();
event.stanza = clone_for_other_handlers;
schedule_cleanup(store_user);
module:fire_event("archive-message-added", { origin = origin, stanza = clone_for_storage, for_user = store_user, id = id });
else
log("error", "Could not archive stanza: %s", err);
end
else
log("debug", "Not archiving stanza: %s (prefs)", stanza:top_tag());
end
end
local function c2s_message_handler(event)
return message_handler(event, true);
end
-- Filter out <stanza-id> before the message leaves the server to prevent privacy leak.
local function strip_stanza_id_after_other_events(event)
event.stanza = strip_stanza_id(event.stanza, event.origin.username);
end
module:hook("pre-message/bare", strip_stanza_id_after_other_events, -1);
module:hook("pre-message/full", strip_stanza_id_after_other_events, -1);
-- Catch messages not stored by mod_offline and mark them as stored if they
-- have been archived. This would generally only happen if mod_offline is
-- disabled. Otherwise the message would generate a delivery failure report,
-- which would not be accurate because it has been archived.
module:hook("message/offline/handle", function(event)
local stanza = event.stanza;
local user = event.username .. "@" .. host;
if stanza:get_child_with_attr("stanza-id", xmlns_st_id, "by", user) then
return true;
end
end, -2);
-- Don't broadcast offline messages to clients that have queried the archive.
module:hook("message/offline/broadcast", function (event)
if event.origin.mam_requested then
return true;
end
end);
if cleanup_after ~= math.huge then
local cleanup_storage = module:open_store("archive_cleanup");
local cleanup_map = module:open_store("archive_cleanup", "map");
module:log("debug", "archive_expires_after = %d -- in seconds", cleanup_after);
if not archive.delete then
module:log("error", "archive_expires_after set but mod_%s does not support deleting", archive._provided_by);
return false;
end
-- For each day, store a set of users that have new messages. To expire
-- messages, we collect the union of sets of users from dates that fall
-- outside the cleanup range.
if not (archive.caps and archive.caps.wildcard_delete) then
local last_date = require "prosody.util.cache".new(module:get_option_integer("archive_cleanup_date_cache_size", 1000, 1));
function schedule_cleanup(username, date)
date = date or datestamp();
if last_date:get(username) == date then return end
local ok = cleanup_map:set(date, username, true);
if ok then
last_date:set(username, date);
end
end
end
local cleanup_time = module:measure("cleanup", "times");
local async = require "prosody.util.async";
module:daily("Remove expired messages", function ()
local cleanup_done = cleanup_time();
if archive.caps and archive.caps.wildcard_delete then
local ok, err = archive:delete(true, { ["end"] = os.time() - cleanup_after })
if ok then
local sum = tonumber(ok);
if sum then
module:log("info", "Deleted %d expired messages", sum);
else
-- driver did not tell
module:log("info", "Deleted all expired messages");
end
else
module:log("error", "Could not delete messages: %s", err);
end
cleanup_done();
return;
end
local users = {};
local cut_off = datestamp(os.time() - cleanup_after);
for date in cleanup_storage:users() do
if date <= cut_off then
module:log("debug", "Messages from %q should be expired", date);
local messages_this_day = cleanup_storage:get(date);
if messages_this_day then
for user in pairs(messages_this_day) do
users[user] = true;
end
if date < cut_off then
-- Messages from the same day as the cut-off might not have expired yet,
-- but all earlier will have, so clear storage for those days.
cleanup_storage:set(date, nil);
end
end
end
end
local sum, num_users = 0, 0;
for user in pairs(users) do
local ok, err = archive:delete(user, { ["end"] = os.time() - cleanup_after; })
if ok then
num_users = num_users + 1;
sum = sum + (tonumber(ok) or 0);
else
cleanup_map:set(cut_off, user, true);
module:log("error", "Could not delete messages for user '%s': %s", user, err);
end
local wait, done = async.waiter();
module:add_timer(0.01, done);
wait();
end
module:log("info", "Deleted %d expired messages for %d users", sum, num_users);
cleanup_done();
end);
else
module:log("debug", "Archive expiry disabled");
-- Don't ask the backend to count the potentially unbounded number of items,
-- it'll get slow.
use_total = module:get_option_boolean("mam_include_total", false);
end
-- Stanzas sent by local clients
module:hook("pre-message/bare", c2s_message_handler, 0);
module:hook("pre-message/full", c2s_message_handler, 0);
-- Stanzas to local clients
module:hook("message/bare", message_handler, 0);
module:hook("message/full", message_handler, 0);
local advertise_extended = archive.caps and archive.caps.full_id_range and archive.caps.ids;
module:hook("account-disco-info", function(event)
(event.reply or event.stanza):tag("feature", {var=xmlns_mam}):up();
if advertise_extended then
(event.reply or event.stanza):tag("feature", {var=xmlns_mam_ext}):up();
end
(event.reply or event.stanza):tag("feature", {var=xmlns_st_id}):up();
end);