prosody/util/xml.lua
Jonas Schäfer e0b15fcba3 util.xml: Do not allow doctypes, comments or processing instructions
Yes. This is as bad as it sounds. CVE pending.

In Prosody itself, this only affects mod_websocket, which uses util.xml
to parse the <open/> frame, thus allowing unauthenticated remote DoS
using Billion Laughs. However, third-party modules using util.xml may
also be affected by this.

This commit installs handlers which disallow the use of doctype
declarations and processing instructions without any escape hatch. It,
by default, also introduces such a handler for comments, however, there
is a way to enable comments nontheless.

This is because util.xml is used to parse human-facing data, where
comments are generally a desirable feature, and also because comments
are generally harmless.
2022-01-10 18:23:54 +01:00

102 lines
2.6 KiB
Lua

local st = require "util.stanza";
local lxp = require "lxp";
local t_insert = table.insert;
local t_remove = table.remove;
local error = error;
local _ENV = nil;
-- luacheck: std none
local parse_xml = (function()
local ns_prefixes = {
["http://www.w3.org/XML/1998/namespace"] = "xml";
};
local ns_separator = "\1";
local ns_pattern = "^([^"..ns_separator.."]*)"..ns_separator.."?(.*)$";
return function(xml, options)
--luacheck: ignore 212/self
local handler = {};
local stanza = st.stanza("root");
local namespaces = {};
local prefixes = {};
function handler:StartNamespaceDecl(prefix, url)
if prefix ~= nil then
t_insert(namespaces, url);
t_insert(prefixes, prefix);
end
end
function handler:EndNamespaceDecl(prefix)
if prefix ~= nil then
-- we depend on each StartNamespaceDecl having a paired EndNamespaceDecl
t_remove(namespaces);
t_remove(prefixes);
end
end
function handler:StartElement(tagname, attr)
local curr_ns,name = tagname:match(ns_pattern);
if name == "" then
curr_ns, name = "", curr_ns;
end
if curr_ns ~= "" then
attr.xmlns = curr_ns;
end
for i=1,#attr do
local k = attr[i];
attr[i] = nil;
local ns, nm = k:match(ns_pattern);
if nm ~= "" then
ns = ns_prefixes[ns];
if ns then
attr[ns..":"..nm] = attr[k];
attr[k] = nil;
end
end
end
local n = {}
for i=1,#namespaces do
n[prefixes[i]] = namespaces[i];
end
stanza:tag(name, attr, n);
end
function handler:CharacterData(data)
stanza:text(data);
end
function handler:EndElement()
stanza:up();
end
local parser;
-- SECURITY: These two handlers, especially the Doctype one, are required to prevent exploits such as Billion Laughs.
function handler:StartDoctypeDecl()
if not parser.stop or not parser:stop() then
error("Failed to abort parsing");
end
end
function handler:ProcessingInstruction()
if not parser.stop or not parser:stop() then
error("Failed to abort parsing");
end
end
if not options or not options.allow_comments then
-- NOTE: comments are generally harmless and can be useful when parsing configuration files or other data, even user-provided data
function handler:Comment()
if not parser.stop or not parser:stop() then
error("Failed to abort parsing");
end
end
end
parser = lxp.new(handler, ns_separator);
local ok, err, line, col = parser:parse(xml);
if ok then ok, err, line, col = parser:parse(); end
--parser:close();
if ok then
return stanza.tags[1];
else
return ok, ("%s (line %d, col %d))"):format(err, line, col);
end
end;
end)();
return {
parse = parse_xml;
};