util.stanza: Allow U+7F

Allowed by XML despite arguably being a control character.

Drops the part of the range meant to rule out octets invalid in UTF-8
(\247 starts a 4-byte sequence), since UTF-8 correctness is validated by
util.encodings.utf8.valid().
This commit is contained in:
Kim Alvefur 2022-11-22 23:56:01 +01:00
parent 6bd9bed561
commit 4fa3808e8d

View file

@ -45,8 +45,12 @@ local _ENV = nil;
local stanza_mt = { __name = "stanza" };
stanza_mt.__index = stanza_mt;
-- Basic check for valid XML character data.
-- Disallow control characters.
-- Tab U+09 and newline U+0A are allowed.
-- For attributes, allow the \1 separator between namespace and name.
local function valid_xml_cdata(str, attr)
return not s_find(str, attr and "[^\1\9\10\13\20-~\128-\247]" or "[^\9\10\13\20-~\128-\247]");
return not s_find(str, attr and "[^\1\9\10\13\20-\255]" or "[^\9\10\13\20-\255]");
end
local function check_name(name, name_type)