util.json: New, faster, stricter, more compliant JSON decoder. Now returns nil,err instead of throwing errors on invalid input.

This commit is contained in:
Waqas Hussain 2013-05-07 10:42:44 -04:00
parent 11684a2d6c
commit d960ec8fb7

View file

@ -185,214 +185,177 @@ end
-----------------------------------
local function _skip_whitespace(json, index)
return json:find("[^ \t\r\n]", index) or index; -- no need to check \r\n, we converted those to \t
end
local function _fixobject(obj)
local __array = obj.__array;
if __array then
obj.__array = nil;
for i,v in ipairs(__array) do
t_insert(obj, v);
end
end
local __hash = obj.__hash;
if __hash then
obj.__hash = nil;
local k;
for i,v in ipairs(__hash) do
if k ~= nil then
obj[k] = v; k = nil;
else
k = v;
end
end
end
return obj;
end
local _readvalue, _readstring;
local function _readobject(json, index)
local o = {};
while true do
local key, val;
index = _skip_whitespace(json, index + 1);
if json:byte(index) ~= 0x22 then -- "\""
if json:byte(index) == 0x7d then return o, index + 1; end -- "}"
return nil, "key expected";
end
key, index = _readstring(json, index);
if key == nil then return nil, index; end
index = _skip_whitespace(json, index);
if json:byte(index) ~= 0x3a then return nil, "colon expected"; end -- ":"
val, index = _readvalue(json, index + 1);
if val == nil then return nil, index; end
o[key] = val;
index = _skip_whitespace(json, index);
local b = json:byte(index);
if b == 0x7d then return _fixobject(o), index + 1; end -- "}"
if b ~= 0x2c then return nil, "object eof"; end -- ","
end
end
local function _readarray(json, index)
local a = {};
local oindex = index;
while true do
local val;
val, index = _readvalue(json, index + 1);
if val == nil then
if json:byte(oindex + 1) == 0x5d then return setmetatable(a, array_mt), oindex + 2; end -- "]"
return val, index;
end
t_insert(a, val);
index = _skip_whitespace(json, index);
local b = json:byte(index);
if b == 0x5d then return setmetatable(a, array_mt), index + 1; end -- "]"
if b ~= 0x2c then return nil, "array eof"; end -- ","
end
end
local _unescape_error;
local function _unescape_surrogate_func(x)
local lead, trail = tonumber(x:sub(3, 6), 16), tonumber(x:sub(9, 12), 16);
local codepoint = lead * 0x400 + trail - 0x35FDC00;
local a = codepoint % 64;
codepoint = (codepoint - a) / 64;
local b = codepoint % 64;
codepoint = (codepoint - b) / 64;
local c = codepoint % 64;
codepoint = (codepoint - c) / 64;
return s_char(0xF0 + codepoint, 0x80 + c, 0x80 + b, 0x80 + a);
end
local function _unescape_func(x)
x = x:match("%x%x%x%x", 3);
if x then
--if x >= 0xD800 and x <= 0xDFFF then _unescape_error = true; end -- bad surrogate pair
return codepoint_to_utf8(tonumber(x, 16));
end
_unescape_error = true;
end
function _readstring(json, index)
index = index + 1;
local endindex = json:find("\"", index, true);
if endindex then
local s = json:sub(index, endindex - 1);
--if s:find("[%z-\31]") then return nil, "control char in string"; end
-- FIXME handle control characters
_unescape_error = nil;
--s = s:gsub("\\u[dD][89abAB]%x%x\\u[dD][cdefCDEF]%x%x", _unescape_surrogate_func);
-- FIXME handle escapes beyond BMP
s = s:gsub("\\u.?.?.?.?", _unescape_func);
if _unescape_error then return nil, "invalid escape"; end
return s, endindex + 1;
end
return nil, "string eof";
end
local function _readnumber(json, index)
local m = json:match("[0-9%.%-eE%+]+", index); -- FIXME do strict checking
return tonumber(m), index + #m;
end
local function _readnull(json, index)
local a, b, c = json:byte(index + 1, index + 3);
if a == 0x75 and b == 0x6c and c == 0x6c then
return null, index + 4;
end
return nil, "null parse failed";
end
local function _readtrue(json, index)
local a, b, c = json:byte(index + 1, index + 3);
if a == 0x72 and b == 0x75 and c == 0x65 then
return true, index + 4;
end
return nil, "true parse failed";
end
local function _readfalse(json, index)
local a, b, c, d = json:byte(index + 1, index + 4);
if a == 0x61 and b == 0x6c and c == 0x73 and d == 0x65 then
return false, index + 5;
end
return nil, "false parse failed";
end
function _readvalue(json, index)
index = _skip_whitespace(json, index);
local b = json:byte(index);
-- TODO try table lookup instead of if-else?
if b == 0x7B then -- "{"
return _readobject(json, index);
elseif b == 0x5B then -- "["
return _readarray(json, index);
elseif b == 0x22 then -- "\""
return _readstring(json, index);
elseif b ~= nil and b >= 0x30 and b <= 0x39 or b == 0x2d then -- "0"-"9" or "-"
return _readnumber(json, index);
elseif b == 0x6e then -- "n"
return _readnull(json, index);
elseif b == 0x74 then -- "t"
return _readtrue(json, index);
elseif b == 0x66 then -- "f"
return _readfalse(json, index);
else
return nil, "value expected";
end
end
local first_escape = {
["\\\""] = "\\u0022";
["\\\\"] = "\\u005c";
["\\/" ] = "\\u002f";
["\\b" ] = "\\u0008";
["\\f" ] = "\\u000C";
["\\n" ] = "\\u000A";
["\\r" ] = "\\u000D";
["\\t" ] = "\\u0009";
["\\u" ] = "\\u";
};
function json.decode(json)
json = json.." "; -- appending a space ensures valid json wouldn't touch EOF
local pos = 1;
local current = {};
local stack = {};
local ch, peek;
local function next()
ch = json:sub(pos, pos);
if ch == "" then error("Unexpected EOF"); end
pos = pos+1;
peek = json:sub(pos, pos);
return ch;
end
json = json:gsub("\\.", first_escape) -- get rid of all escapes except \uXXXX, making string parsing much simpler
--:gsub("[\r\n]", "\t"); -- \r\n\t are equivalent, we care about none of them, and none of them can be in strings
local function skipwhitespace()
while ch and (ch == "\r" or ch == "\n" or ch == "\t" or ch == " ") do
next();
end
end
local function skiplinecomment()
repeat next(); until not(ch) or ch == "\r" or ch == "\n";
skipwhitespace();
end
local function skipstarcomment()
next(); next(); -- skip '/', '*'
while peek and ch ~= "*" and peek ~= "/" do next(); end
if not peek then error("eof in star comment") end
next(); next(); -- skip '*', '/'
skipwhitespace();
end
local function skipstuff()
while true do
skipwhitespace();
if ch == "/" and peek == "*" then
skipstarcomment();
elseif ch == "/" and peek == "/" then
skiplinecomment();
else
return;
end
end
end
-- TODO do encoding verification
local readvalue;
local function readarray()
local t = setmetatable({}, array_mt);
next(); -- skip '['
skipstuff();
if ch == "]" then next(); return t; end
t_insert(t, readvalue());
while true do
skipstuff();
if ch == "]" then next(); return t; end
if not ch then error("eof while reading array");
elseif ch == "," then next();
elseif ch then error("unexpected character in array, comma expected"); end
if not ch then error("eof while reading array"); end
t_insert(t, readvalue());
end
end
local function checkandskip(c)
local x = ch or "eof";
if x ~= c then error("unexpected "..x..", '"..c.."' expected"); end
next();
end
local function readliteral(lit, val)
for c in lit:gmatch(".") do
checkandskip(c);
end
return val;
end
local function readstring()
local s = {};
checkandskip("\"");
while ch do
while ch and ch ~= "\\" and ch ~= "\"" do
t_insert(s, ch); next();
end
if ch == "\\" then
next();
if unescapes[ch] then
t_insert(s, unescapes[ch]);
next();
elseif ch == "u" then
local seq = "";
for i=1,4 do
next();
if not ch then error("unexpected eof in string"); end
if not ch:match("[0-9a-fA-F]") then error("invalid unicode escape sequence in string"); end
seq = seq..ch;
end
t_insert(s, codepoint_to_utf8(tonumber(seq, 16)));
next();
else error("invalid escape sequence in string"); end
end
if ch == "\"" then
next();
return t_concat(s);
end
end
error("eof while reading string");
end
local function readnumber()
local s = "";
if ch == "-" then
s = s..ch; next();
if not ch:match("[0-9]") then error("number format error"); end
end
if ch == "0" then
s = s..ch; next();
if ch:match("[0-9]") then error("number format error"); end
else
while ch and ch:match("[0-9]") do
s = s..ch; next();
end
end
if ch == "." then
s = s..ch; next();
if not ch:match("[0-9]") then error("number format error"); end
while ch and ch:match("[0-9]") do
s = s..ch; next();
end
if ch == "e" or ch == "E" then
s = s..ch; next();
if ch == "+" or ch == "-" then
s = s..ch; next();
if not ch:match("[0-9]") then error("number format error"); end
while ch and ch:match("[0-9]") do
s = s..ch; next();
end
end
end
end
return tonumber(s);
end
local function readmember(t)
skipstuff();
local k = readstring();
skipstuff();
checkandskip(":");
t[k] = readvalue();
end
local function fixobject(obj)
local __array = obj.__array;
if __array then
obj.__array = nil;
for i,v in ipairs(__array) do
t_insert(obj, v);
end
end
local __hash = obj.__hash;
if __hash then
obj.__hash = nil;
local k;
for i,v in ipairs(__hash) do
if k ~= nil then
obj[k] = v; k = nil;
else
k = v;
end
end
end
return obj;
end
local function readobject()
local t = {};
next(); -- skip '{'
skipstuff();
if ch == "}" then next(); return t; end
if not ch then error("eof while reading object"); end
readmember(t);
while true do
skipstuff();
if ch == "}" then next(); return fixobject(t); end
if not ch then error("eof while reading object");
elseif ch == "," then next();
elseif ch then error("unexpected character in object, comma expected"); end
if not ch then error("eof while reading object"); end
readmember(t);
end
end
function readvalue()
skipstuff();
while ch do
if ch == "{" then
return readobject();
elseif ch == "[" then
return readarray();
elseif ch == "\"" then
return readstring();
elseif ch:match("[%-0-9%.]") then
return readnumber();
elseif ch == "n" then
return readliteral("null", null);
elseif ch == "t" then
return readliteral("true", true);
elseif ch == "f" then
return readliteral("false", false);
else
error("invalid character at value start: "..ch);
end
end
error("eof while reading value");
end
next();
return readvalue();
local val, index = _readvalue(json, 1);
if val == nil then return val, index; end
if json:find("[^ \t\r\n]", index) then return nil, "garbage at eof"; end
return val;
end
function json.test(object)