mirror of
https://github.com/bjc/prosody.git
synced 2025-04-03 21:27:38 +03:00
util.json: New, faster, stricter, more compliant JSON decoder. Now returns nil,err instead of throwing errors on invalid input.
This commit is contained in:
parent
11684a2d6c
commit
d960ec8fb7
1 changed files with 168 additions and 205 deletions
373
util/json.lua
373
util/json.lua
|
@ -185,214 +185,177 @@ end
|
|||
-----------------------------------
|
||||
|
||||
|
||||
local function _skip_whitespace(json, index)
|
||||
return json:find("[^ \t\r\n]", index) or index; -- no need to check \r\n, we converted those to \t
|
||||
end
|
||||
local function _fixobject(obj)
|
||||
local __array = obj.__array;
|
||||
if __array then
|
||||
obj.__array = nil;
|
||||
for i,v in ipairs(__array) do
|
||||
t_insert(obj, v);
|
||||
end
|
||||
end
|
||||
local __hash = obj.__hash;
|
||||
if __hash then
|
||||
obj.__hash = nil;
|
||||
local k;
|
||||
for i,v in ipairs(__hash) do
|
||||
if k ~= nil then
|
||||
obj[k] = v; k = nil;
|
||||
else
|
||||
k = v;
|
||||
end
|
||||
end
|
||||
end
|
||||
return obj;
|
||||
end
|
||||
local _readvalue, _readstring;
|
||||
local function _readobject(json, index)
|
||||
local o = {};
|
||||
while true do
|
||||
local key, val;
|
||||
index = _skip_whitespace(json, index + 1);
|
||||
if json:byte(index) ~= 0x22 then -- "\""
|
||||
if json:byte(index) == 0x7d then return o, index + 1; end -- "}"
|
||||
return nil, "key expected";
|
||||
end
|
||||
key, index = _readstring(json, index);
|
||||
if key == nil then return nil, index; end
|
||||
index = _skip_whitespace(json, index);
|
||||
if json:byte(index) ~= 0x3a then return nil, "colon expected"; end -- ":"
|
||||
val, index = _readvalue(json, index + 1);
|
||||
if val == nil then return nil, index; end
|
||||
o[key] = val;
|
||||
index = _skip_whitespace(json, index);
|
||||
local b = json:byte(index);
|
||||
if b == 0x7d then return _fixobject(o), index + 1; end -- "}"
|
||||
if b ~= 0x2c then return nil, "object eof"; end -- ","
|
||||
end
|
||||
end
|
||||
local function _readarray(json, index)
|
||||
local a = {};
|
||||
local oindex = index;
|
||||
while true do
|
||||
local val;
|
||||
val, index = _readvalue(json, index + 1);
|
||||
if val == nil then
|
||||
if json:byte(oindex + 1) == 0x5d then return setmetatable(a, array_mt), oindex + 2; end -- "]"
|
||||
return val, index;
|
||||
end
|
||||
t_insert(a, val);
|
||||
index = _skip_whitespace(json, index);
|
||||
local b = json:byte(index);
|
||||
if b == 0x5d then return setmetatable(a, array_mt), index + 1; end -- "]"
|
||||
if b ~= 0x2c then return nil, "array eof"; end -- ","
|
||||
end
|
||||
end
|
||||
local _unescape_error;
|
||||
local function _unescape_surrogate_func(x)
|
||||
local lead, trail = tonumber(x:sub(3, 6), 16), tonumber(x:sub(9, 12), 16);
|
||||
local codepoint = lead * 0x400 + trail - 0x35FDC00;
|
||||
local a = codepoint % 64;
|
||||
codepoint = (codepoint - a) / 64;
|
||||
local b = codepoint % 64;
|
||||
codepoint = (codepoint - b) / 64;
|
||||
local c = codepoint % 64;
|
||||
codepoint = (codepoint - c) / 64;
|
||||
return s_char(0xF0 + codepoint, 0x80 + c, 0x80 + b, 0x80 + a);
|
||||
end
|
||||
local function _unescape_func(x)
|
||||
x = x:match("%x%x%x%x", 3);
|
||||
if x then
|
||||
--if x >= 0xD800 and x <= 0xDFFF then _unescape_error = true; end -- bad surrogate pair
|
||||
return codepoint_to_utf8(tonumber(x, 16));
|
||||
end
|
||||
_unescape_error = true;
|
||||
end
|
||||
function _readstring(json, index)
|
||||
index = index + 1;
|
||||
local endindex = json:find("\"", index, true);
|
||||
if endindex then
|
||||
local s = json:sub(index, endindex - 1);
|
||||
--if s:find("[%z-\31]") then return nil, "control char in string"; end
|
||||
-- FIXME handle control characters
|
||||
_unescape_error = nil;
|
||||
--s = s:gsub("\\u[dD][89abAB]%x%x\\u[dD][cdefCDEF]%x%x", _unescape_surrogate_func);
|
||||
-- FIXME handle escapes beyond BMP
|
||||
s = s:gsub("\\u.?.?.?.?", _unescape_func);
|
||||
if _unescape_error then return nil, "invalid escape"; end
|
||||
return s, endindex + 1;
|
||||
end
|
||||
return nil, "string eof";
|
||||
end
|
||||
local function _readnumber(json, index)
|
||||
local m = json:match("[0-9%.%-eE%+]+", index); -- FIXME do strict checking
|
||||
return tonumber(m), index + #m;
|
||||
end
|
||||
local function _readnull(json, index)
|
||||
local a, b, c = json:byte(index + 1, index + 3);
|
||||
if a == 0x75 and b == 0x6c and c == 0x6c then
|
||||
return null, index + 4;
|
||||
end
|
||||
return nil, "null parse failed";
|
||||
end
|
||||
local function _readtrue(json, index)
|
||||
local a, b, c = json:byte(index + 1, index + 3);
|
||||
if a == 0x72 and b == 0x75 and c == 0x65 then
|
||||
return true, index + 4;
|
||||
end
|
||||
return nil, "true parse failed";
|
||||
end
|
||||
local function _readfalse(json, index)
|
||||
local a, b, c, d = json:byte(index + 1, index + 4);
|
||||
if a == 0x61 and b == 0x6c and c == 0x73 and d == 0x65 then
|
||||
return false, index + 5;
|
||||
end
|
||||
return nil, "false parse failed";
|
||||
end
|
||||
function _readvalue(json, index)
|
||||
index = _skip_whitespace(json, index);
|
||||
local b = json:byte(index);
|
||||
-- TODO try table lookup instead of if-else?
|
||||
if b == 0x7B then -- "{"
|
||||
return _readobject(json, index);
|
||||
elseif b == 0x5B then -- "["
|
||||
return _readarray(json, index);
|
||||
elseif b == 0x22 then -- "\""
|
||||
return _readstring(json, index);
|
||||
elseif b ~= nil and b >= 0x30 and b <= 0x39 or b == 0x2d then -- "0"-"9" or "-"
|
||||
return _readnumber(json, index);
|
||||
elseif b == 0x6e then -- "n"
|
||||
return _readnull(json, index);
|
||||
elseif b == 0x74 then -- "t"
|
||||
return _readtrue(json, index);
|
||||
elseif b == 0x66 then -- "f"
|
||||
return _readfalse(json, index);
|
||||
else
|
||||
return nil, "value expected";
|
||||
end
|
||||
end
|
||||
local first_escape = {
|
||||
["\\\""] = "\\u0022";
|
||||
["\\\\"] = "\\u005c";
|
||||
["\\/" ] = "\\u002f";
|
||||
["\\b" ] = "\\u0008";
|
||||
["\\f" ] = "\\u000C";
|
||||
["\\n" ] = "\\u000A";
|
||||
["\\r" ] = "\\u000D";
|
||||
["\\t" ] = "\\u0009";
|
||||
["\\u" ] = "\\u";
|
||||
};
|
||||
|
||||
function json.decode(json)
|
||||
json = json.." "; -- appending a space ensures valid json wouldn't touch EOF
|
||||
local pos = 1;
|
||||
local current = {};
|
||||
local stack = {};
|
||||
local ch, peek;
|
||||
local function next()
|
||||
ch = json:sub(pos, pos);
|
||||
if ch == "" then error("Unexpected EOF"); end
|
||||
pos = pos+1;
|
||||
peek = json:sub(pos, pos);
|
||||
return ch;
|
||||
end
|
||||
json = json:gsub("\\.", first_escape) -- get rid of all escapes except \uXXXX, making string parsing much simpler
|
||||
--:gsub("[\r\n]", "\t"); -- \r\n\t are equivalent, we care about none of them, and none of them can be in strings
|
||||
|
||||
local function skipwhitespace()
|
||||
while ch and (ch == "\r" or ch == "\n" or ch == "\t" or ch == " ") do
|
||||
next();
|
||||
end
|
||||
end
|
||||
local function skiplinecomment()
|
||||
repeat next(); until not(ch) or ch == "\r" or ch == "\n";
|
||||
skipwhitespace();
|
||||
end
|
||||
local function skipstarcomment()
|
||||
next(); next(); -- skip '/', '*'
|
||||
while peek and ch ~= "*" and peek ~= "/" do next(); end
|
||||
if not peek then error("eof in star comment") end
|
||||
next(); next(); -- skip '*', '/'
|
||||
skipwhitespace();
|
||||
end
|
||||
local function skipstuff()
|
||||
while true do
|
||||
skipwhitespace();
|
||||
if ch == "/" and peek == "*" then
|
||||
skipstarcomment();
|
||||
elseif ch == "/" and peek == "/" then
|
||||
skiplinecomment();
|
||||
else
|
||||
return;
|
||||
end
|
||||
end
|
||||
end
|
||||
-- TODO do encoding verification
|
||||
|
||||
local readvalue;
|
||||
local function readarray()
|
||||
local t = setmetatable({}, array_mt);
|
||||
next(); -- skip '['
|
||||
skipstuff();
|
||||
if ch == "]" then next(); return t; end
|
||||
t_insert(t, readvalue());
|
||||
while true do
|
||||
skipstuff();
|
||||
if ch == "]" then next(); return t; end
|
||||
if not ch then error("eof while reading array");
|
||||
elseif ch == "," then next();
|
||||
elseif ch then error("unexpected character in array, comma expected"); end
|
||||
if not ch then error("eof while reading array"); end
|
||||
t_insert(t, readvalue());
|
||||
end
|
||||
end
|
||||
|
||||
local function checkandskip(c)
|
||||
local x = ch or "eof";
|
||||
if x ~= c then error("unexpected "..x..", '"..c.."' expected"); end
|
||||
next();
|
||||
end
|
||||
local function readliteral(lit, val)
|
||||
for c in lit:gmatch(".") do
|
||||
checkandskip(c);
|
||||
end
|
||||
return val;
|
||||
end
|
||||
local function readstring()
|
||||
local s = {};
|
||||
checkandskip("\"");
|
||||
while ch do
|
||||
while ch and ch ~= "\\" and ch ~= "\"" do
|
||||
t_insert(s, ch); next();
|
||||
end
|
||||
if ch == "\\" then
|
||||
next();
|
||||
if unescapes[ch] then
|
||||
t_insert(s, unescapes[ch]);
|
||||
next();
|
||||
elseif ch == "u" then
|
||||
local seq = "";
|
||||
for i=1,4 do
|
||||
next();
|
||||
if not ch then error("unexpected eof in string"); end
|
||||
if not ch:match("[0-9a-fA-F]") then error("invalid unicode escape sequence in string"); end
|
||||
seq = seq..ch;
|
||||
end
|
||||
t_insert(s, codepoint_to_utf8(tonumber(seq, 16)));
|
||||
next();
|
||||
else error("invalid escape sequence in string"); end
|
||||
end
|
||||
if ch == "\"" then
|
||||
next();
|
||||
return t_concat(s);
|
||||
end
|
||||
end
|
||||
error("eof while reading string");
|
||||
end
|
||||
local function readnumber()
|
||||
local s = "";
|
||||
if ch == "-" then
|
||||
s = s..ch; next();
|
||||
if not ch:match("[0-9]") then error("number format error"); end
|
||||
end
|
||||
if ch == "0" then
|
||||
s = s..ch; next();
|
||||
if ch:match("[0-9]") then error("number format error"); end
|
||||
else
|
||||
while ch and ch:match("[0-9]") do
|
||||
s = s..ch; next();
|
||||
end
|
||||
end
|
||||
if ch == "." then
|
||||
s = s..ch; next();
|
||||
if not ch:match("[0-9]") then error("number format error"); end
|
||||
while ch and ch:match("[0-9]") do
|
||||
s = s..ch; next();
|
||||
end
|
||||
if ch == "e" or ch == "E" then
|
||||
s = s..ch; next();
|
||||
if ch == "+" or ch == "-" then
|
||||
s = s..ch; next();
|
||||
if not ch:match("[0-9]") then error("number format error"); end
|
||||
while ch and ch:match("[0-9]") do
|
||||
s = s..ch; next();
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
return tonumber(s);
|
||||
end
|
||||
local function readmember(t)
|
||||
skipstuff();
|
||||
local k = readstring();
|
||||
skipstuff();
|
||||
checkandskip(":");
|
||||
t[k] = readvalue();
|
||||
end
|
||||
local function fixobject(obj)
|
||||
local __array = obj.__array;
|
||||
if __array then
|
||||
obj.__array = nil;
|
||||
for i,v in ipairs(__array) do
|
||||
t_insert(obj, v);
|
||||
end
|
||||
end
|
||||
local __hash = obj.__hash;
|
||||
if __hash then
|
||||
obj.__hash = nil;
|
||||
local k;
|
||||
for i,v in ipairs(__hash) do
|
||||
if k ~= nil then
|
||||
obj[k] = v; k = nil;
|
||||
else
|
||||
k = v;
|
||||
end
|
||||
end
|
||||
end
|
||||
return obj;
|
||||
end
|
||||
local function readobject()
|
||||
local t = {};
|
||||
next(); -- skip '{'
|
||||
skipstuff();
|
||||
if ch == "}" then next(); return t; end
|
||||
if not ch then error("eof while reading object"); end
|
||||
readmember(t);
|
||||
while true do
|
||||
skipstuff();
|
||||
if ch == "}" then next(); return fixobject(t); end
|
||||
if not ch then error("eof while reading object");
|
||||
elseif ch == "," then next();
|
||||
elseif ch then error("unexpected character in object, comma expected"); end
|
||||
if not ch then error("eof while reading object"); end
|
||||
readmember(t);
|
||||
end
|
||||
end
|
||||
|
||||
function readvalue()
|
||||
skipstuff();
|
||||
while ch do
|
||||
if ch == "{" then
|
||||
return readobject();
|
||||
elseif ch == "[" then
|
||||
return readarray();
|
||||
elseif ch == "\"" then
|
||||
return readstring();
|
||||
elseif ch:match("[%-0-9%.]") then
|
||||
return readnumber();
|
||||
elseif ch == "n" then
|
||||
return readliteral("null", null);
|
||||
elseif ch == "t" then
|
||||
return readliteral("true", true);
|
||||
elseif ch == "f" then
|
||||
return readliteral("false", false);
|
||||
else
|
||||
error("invalid character at value start: "..ch);
|
||||
end
|
||||
end
|
||||
error("eof while reading value");
|
||||
end
|
||||
next();
|
||||
return readvalue();
|
||||
local val, index = _readvalue(json, 1);
|
||||
if val == nil then return val, index; end
|
||||
if json:find("[^ \t\r\n]", index) then return nil, "garbage at eof"; end
|
||||
|
||||
return val;
|
||||
end
|
||||
|
||||
function json.test(object)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue