util.format: Escape invalid UTF-8 by passing trough serialization

Should prevent invalid UTF-8 from making it into the logs, which can
cause trouble with terminals or log viewers or other tools, such as when
grep determines that log files are binary.
This commit is contained in:
Kim Alvefur 2021-12-10 22:48:45 +01:00
parent ab4991e829
commit d4c1451794
2 changed files with 14 additions and 1 deletions

View file

@ -20,5 +20,9 @@ describe("util.format", function()
assert.equal("", format("%s", "\1"));
end);
it("escapes invalid UTF-8", function ()
assert.equal("\"Hello w\\195rld\"", format("%s", "Hello w\195rld"));
end);
end);
end);

View file

@ -5,6 +5,7 @@
local tostring = tostring;
local unpack = table.unpack or unpack; -- luacheck: ignore 113/unpack
local pack = require "util.table".pack; -- TODO table.pack in 5.2+
local valid_utf8 = require "util.encodings".utf8.valid;
local type = type;
local dump = require "util.serialization".new("debug");
local num_type = math.type or function (n)
@ -60,10 +61,18 @@ local function format(formatstring, ...)
args[i] = dump(arg);
spec = "%s";
elseif option == "s" then
args[i] = tostring(arg):gsub("[%z\1-\8\11-\31\127]", control_symbols):gsub("\n\t?", "\n\t");
arg = tostring(arg);
if arg:find("[\128-\255]") and not valid_utf8(arg) then
args[i] = dump(arg);
else
args[i] = arg:gsub("[%z\1-\8\11-\31\127]", control_symbols):gsub("\n\t?", "\n\t");
end
elseif type(arg) ~= "number" then -- arg isn't number as expected?
args[i] = tostring(arg);
spec = "[%s]";
option = "s";
spec = "[%s]";
t = "string";
elseif expects_integer[option] and num_type(arg) ~= "integer" then
args[i] = tostring(arg);
spec = "[%s]";