Commit | Line | Data |
51650966 |
1 | ----------------------------------------------------------------------------- |
2 | -- JSON4Lua: JSON encoding / decoding support for the Lua language. |
3 | -- json Module. |
4 | -- Author: Craig Mason-Jones |
5 | -- Homepage: http://json.luaforge.net/ |
6 | -- Version: 0.9.50 |
7 | -- This module is released under the MIT License (MIT). |
8 | -- Please see LICENCE.txt for details. |
9 | -- |
10 | -- USAGE: |
11 | -- This module exposes two functions: |
12 | -- encode(o) |
13 | -- Returns the table / string / boolean / number / nil / json.null value as a JSON-encoded string. |
14 | -- decode(json_string) |
15 | -- Returns a Lua object populated with the data encoded in the JSON string json_string. |
16 | -- |
17 | -- REQUIREMENTS: |
18 | -- compat-5.1 if using Lua 5.0 |
19 | -- |
20 | -- CHANGELOG |
21 | -- 0.9.50 Radical performance improvement on decode from Eike Decker. Many thanks! |
22 | -- 0.9.40 Changed licence to MIT License (MIT) |
23 | -- 0.9.20 Introduction of local Lua functions for private functions (removed _ function prefix). |
24 | -- Fixed Lua 5.1 compatibility issues. |
25 | -- Introduced json.null to have null values in associative arrays. |
26 | -- encode() performance improvement (more than 50%) through table.concat rather than .. |
27 | -- Introduced decode ability to ignore /**/ comments in the JSON string. |
28 | -- 0.9.10 Fix to array encoding / decoding to correctly manage nil/null values in arrays. |
29 | ----------------------------------------------------------------------------- |
30 | |
31 | ----------------------------------------------------------------------------- |
32 | -- Imports and dependencies |
33 | ----------------------------------------------------------------------------- |
34 | local math = require('math') |
35 | local string = require("string") |
36 | local table = require("table") |
37 | local tostring = tostring |
38 | |
39 | local base = _G |
40 | |
41 | ----------------------------------------------------------------------------- |
42 | -- Module declaration |
43 | ----------------------------------------------------------------------------- |
44 | module("json") |
45 | |
46 | -- Public functions |
47 | |
48 | -- Private functions |
49 | local decode_scanArray |
50 | local decode_scanComment |
51 | local decode_scanConstant |
52 | local decode_scanNumber |
53 | local decode_scanObject |
54 | local decode_scanString |
55 | local decode_scanWhitespace |
56 | local encodeString |
57 | local isArray |
58 | local isEncodable |
59 | |
60 | ----------------------------------------------------------------------------- |
61 | -- PUBLIC FUNCTIONS |
62 | ----------------------------------------------------------------------------- |
63 | --- Encodes an arbitrary Lua object / variable. |
64 | -- @param v The Lua object / variable to be JSON encoded. |
65 | -- @return String containing the JSON encoding in internal Lua string format (i.e. not unicode) |
66 | function encode (v) |
67 | -- Handle nil values |
68 | if v==nil then |
69 | return "null" |
70 | end |
71 | |
72 | local vtype = base.type(v) |
73 | |
74 | -- Handle strings |
75 | if vtype=='string' then |
76 | return '"' .. encodeString(v) .. '"' -- Need to handle encoding in string |
77 | end |
78 | |
79 | -- Handle booleans |
80 | if vtype=='number' or vtype=='boolean' then |
81 | return base.tostring(v) |
82 | end |
83 | |
84 | -- Handle tables |
85 | if vtype=='table' then |
86 | local rval = {} |
87 | -- Consider arrays separately |
88 | local bArray, maxCount = isArray(v) |
89 | if bArray then |
90 | for i = 1,maxCount do |
91 | table.insert(rval, encode(v[i])) |
92 | end |
93 | else -- An object, not an array |
94 | for i,j in base.pairs(v) do |
95 | if isEncodable(i) and isEncodable(j) then |
96 | table.insert(rval, '"' .. encodeString(i) .. '":' .. encode(j)) |
97 | end |
98 | end |
99 | end |
100 | if bArray then |
101 | return '[' .. table.concat(rval,',') ..']' |
102 | else |
103 | return '{' .. table.concat(rval,',') .. '}' |
104 | end |
105 | end |
106 | |
107 | -- Handle null values |
108 | if vtype=='function' and v==null then |
109 | return 'null' |
110 | end |
111 | |
112 | base.assert(false,'encode attempt to encode unsupported type ' .. vtype .. ':' .. base.tostring(v)) |
113 | end |
114 | |
115 | |
116 | --- Decodes a JSON string and returns the decoded value as a Lua data structure / value. |
117 | -- @param s The string to scan. |
118 | -- @return Lua objectthat was scanned, as a Lua table / string / number / boolean or nil. |
119 | function decode(s) |
120 | -- Function is re-defined below after token and other items are created. |
121 | -- Just defined here for code neatness. |
122 | return null |
123 | end |
124 | |
125 | --- The null function allows one to specify a null value in an associative array (which is otherwise |
126 | -- discarded if you set the value with 'nil' in Lua. Simply set t = { first=json.null } |
127 | function null() |
128 | return null -- so json.null() will also return null ;-) |
129 | end |
130 | |
131 | ----------------------------------------------------------------------------- |
132 | -- Internal, PRIVATE functions. |
133 | ----------------------------------------------------------------------------- |
134 | |
135 | --- Encodes a string to be JSON-compatible. |
136 | -- This just involves back-quoting inverted commas, back-quotes and newlines, I think ;-) |
137 | -- @param s The string to return as a JSON encoded (i.e. backquoted string) |
138 | -- @return The string appropriately escaped. |
139 | local qrep = {["\\"]="\\\\", ['"']='\\"',['\n']='\\n',['\t']='\\t'} |
140 | function encodeString(s) |
141 | return tostring(s):gsub('["\\\n\t]',qrep) |
142 | end |
143 | |
144 | -- Determines whether the given Lua type is an array or a table / dictionary. |
145 | -- We consider any table an array if it has indexes 1..n for its n items, and no |
146 | -- other data in the table. |
147 | -- I think this method is currently a little 'flaky', but can't think of a good way around it yet... |
148 | -- @param t The table to evaluate as an array |
149 | -- @return boolean, number True if the table can be represented as an array, false otherwise. If true, |
150 | -- the second returned value is the maximum |
151 | -- number of indexed elements in the array. |
152 | function isArray(t) |
153 | -- Next we count all the elements, ensuring that any non-indexed elements are not-encodable |
154 | -- (with the possible exception of 'n') |
155 | local maxIndex = 0 |
156 | for k,v in base.pairs(t) do |
157 | if (base.type(k)=='number' and math.floor(k)==k and 1<=k) then -- k,v is an indexed pair |
158 | if (not isEncodable(v)) then return false end -- All array elements must be encodable |
159 | maxIndex = math.max(maxIndex,k) |
160 | else |
161 | if (k=='n') then |
162 | if v ~= table.getn(t) then return false end -- False if n does not hold the number of elements |
163 | else -- Else of (k=='n') |
164 | if isEncodable(v) then return false end |
165 | end -- End of (k~='n') |
166 | end -- End of k,v not an indexed pair |
167 | end -- End of loop across all pairs |
168 | return true, maxIndex |
169 | end |
170 | |
171 | --- Determines whether the given Lua object / table / variable can be JSON encoded. The only |
172 | -- types that are JSON encodable are: string, boolean, number, nil, table and json.null. |
173 | -- In this implementation, all other types are ignored. |
174 | -- @param o The object to examine. |
175 | -- @return boolean True if the object should be JSON encoded, false if it should be ignored. |
176 | function isEncodable(o) |
177 | local t = base.type(o) |
178 | return (t=='string' or t=='boolean' or t=='number' or t=='nil' or t=='table') or (t=='function' and o==null) |
179 | end |
180 | |
181 | -- Radical performance improvement for decode from Eike Decker! |
182 | do |
183 | local type = base.type |
184 | local error = base.error |
185 | local assert = base.assert |
186 | local print = base.print |
187 | local tonumber = base.tonumber |
188 | -- initialize some values to be used in decoding function |
189 | |
190 | -- initializes a table to contain a byte=>table mapping |
191 | -- the table contains tokens (byte values) as keys and maps them on other |
192 | -- token tables (mostly, the boolean value 'true' is used to indicate termination |
193 | -- of a token sequence) |
194 | -- the token table's purpose is, that it allows scanning a sequence of bytes |
195 | -- until something interesting has been found (e.g. a token that is not expected) |
196 | -- name is a descriptor for the table to be printed in error messages |
197 | local function init_token_table (tt) |
198 | local struct = {} |
199 | local value |
200 | function struct:link(other_tt) |
201 | value = other_tt |
202 | return struct |
203 | end |
204 | function struct:to(chars) |
205 | for i=1,#chars do |
206 | tt[chars:byte(i)] = value |
207 | end |
208 | return struct |
209 | end |
210 | return function (name) |
211 | tt.name = name |
212 | return struct |
213 | end |
214 | end |
215 | |
216 | -- keep "named" byte values at hands |
217 | local |
218 | c_esc, |
219 | c_e, |
220 | c_l, |
221 | c_r, |
222 | c_u, |
223 | c_f, |
224 | c_a, |
225 | c_s, |
226 | c_slash = ("\\elrufas/"):byte(1,9) |
227 | |
228 | -- token tables - tt_doublequote_string = strDoubleQuot, tt_singlequote_string = strSingleQuot |
229 | local |
230 | tt_object_key, |
231 | tt_object_colon, |
232 | tt_object_value, |
233 | tt_doublequote_string, |
234 | tt_singlequote_string, |
235 | tt_array_value, |
236 | tt_array_seperator, |
237 | tt_numeric, |
238 | tt_boolean, |
239 | tt_null, |
240 | tt_comment_start, |
241 | tt_comment_middle, |
242 | tt_ignore --< tt_ignore is special - marked tokens will be tt_ignored |
243 | = {},{},{},{},{},{},{},{},{},{},{},{},{} |
244 | |
245 | -- strings to be used in certain token tables |
246 | local strchars = "" -- all valid string characters (all except newlines) |
247 | local allchars = "" -- all characters that are valid in comments |
248 | --local escapechar = {} |
249 | for i=0,0xff do |
250 | local c = string.char(i) |
251 | if c~="\n" and c~="\r" then strchars = strchars .. c end |
252 | allchars = allchars .. c |
253 | --escapechar[i] = "\\" .. string.char(i) |
254 | end |
255 | |
256 | --[[ |
257 | charstounescape = "\"\'\\bfnrt/"; |
258 | unescapechars = "\"'\\\b\f\n\r\t\/"; |
259 | for i=1,#charstounescape do |
260 | escapechar[ charstounescape:byte(i) ] = unescapechars:sub(i,i) |
261 | end |
262 | ]]-- |
263 | |
264 | -- obj key reader, expects the end of the object or a quoted string as key |
265 | init_token_table (tt_object_key) "object (' or \" or } or , expected)" |
266 | :link(tt_singlequote_string) :to "'" |
267 | :link(tt_doublequote_string) :to '"' |
268 | :link(true) :to "}" |
269 | :link(tt_object_key) :to "," |
270 | :link(tt_comment_start) :to "/" |
271 | :link(tt_ignore) :to " \t\r\n" |
272 | |
273 | |
274 | -- after the key, a colon is expected (or comment) |
275 | init_token_table (tt_object_colon) "object (: expected)" |
276 | :link(tt_object_value) :to ":" |
277 | :link(tt_comment_start) :to "/" |
278 | :link(tt_ignore) :to" \t\r\n" |
279 | |
280 | -- as values, anything is possible, numbers, arrays, objects, boolean, null, strings |
281 | init_token_table (tt_object_value) "object ({ or [ or ' or \" or number or boolean or null expected)" |
282 | :link(tt_object_key) :to "{" |
283 | :link(tt_array_seperator) :to "[" |
284 | :link(tt_singlequote_string) :to "'" |
285 | :link(tt_doublequote_string) :to '"' |
286 | :link(tt_numeric) :to "0123456789.-" |
287 | :link(tt_boolean) :to "tf" |
288 | :link(tt_null) :to "n" |
289 | :link(tt_comment_start) :to "/" |
290 | :link(tt_ignore) :to " \t\r\n" |
291 | |
292 | -- token tables for reading strings |
293 | init_token_table (tt_doublequote_string) "double quoted string" |
294 | :link(tt_ignore) :to (strchars) |
295 | :link(c_esc) :to "\\" |
296 | :link(true) :to '"' |
297 | |
298 | init_token_table (tt_singlequote_string) "single quoted string" |
299 | :link(tt_ignore) :to (strchars) |
300 | :link(c_esc) :to "\\" |
301 | :link(true) :to "'" |
302 | |
303 | -- array reader that expects termination of the array or a comma that indicates the next value |
304 | init_token_table (tt_array_value) "array (, or ] expected)" |
305 | :link(tt_array_seperator) :to "," |
306 | :link(true) :to "]" |
307 | :link(tt_comment_start) :to "/" |
308 | :link(tt_ignore) :to " \t\r\n" |
309 | |
310 | -- a value, pretty similar to tt_object_value |
311 | init_token_table (tt_array_seperator) "array ({ or [ or ' or \" or number or boolean or null expected)" |
312 | :link(tt_object_key) :to "{" |
313 | :link(tt_array_seperator) :to "[" |
314 | :link(tt_singlequote_string) :to "'" |
315 | :link(tt_doublequote_string) :to '"' |
316 | :link(tt_comment_start) :to "/" |
317 | :link(tt_numeric) :to "0123456789.-" |
318 | :link(tt_boolean) :to "tf" |
319 | :link(tt_null) :to "n" |
320 | :link(tt_ignore) :to " \t\r\n" |
321 | |
322 | -- valid number tokens |
323 | init_token_table (tt_numeric) "number" |
324 | :link(tt_ignore) :to "0123456789.-Ee" |
325 | |
326 | -- once a comment has been started with /, a * is expected |
327 | init_token_table (tt_comment_start) "comment start (* expected)" |
328 | :link(tt_comment_middle) :to "*" |
329 | |
330 | -- now everything is allowed, watch out for * though. The next char is then checked manually |
331 | init_token_table (tt_comment_middle) "comment end" |
332 | :link(tt_ignore) :to (allchars) |
333 | :link(true) :to "*" |
334 | |
335 | function decode (js_string) |
336 | local pos = 1 -- position in the string |
337 | |
338 | -- read the next byte value |
339 | local function next_byte () pos = pos + 1 return js_string:byte(pos-1) end |
340 | |
341 | -- in case of error, report the location using line numbers |
342 | local function location () |
343 | local n = ("\n"):byte() |
344 | local line,lpos = 1,0 |
345 | for i=1,pos do |
346 | if js_string:byte(i) == n then |
347 | line,lpos = line + 1,1 |
348 | else |
349 | lpos = lpos + 1 |
350 | end |
351 | end |
352 | return "Line "..line.." character "..lpos |
353 | end |
354 | |
355 | -- debug func |
356 | --local function status (str) |
357 | -- print(str.." ("..s:sub(math.max(1,p-10),p+10)..")") |
358 | --end |
359 | |
360 | -- read the next token, according to the passed token table |
361 | local function next_token (tok) |
362 | while pos <= #js_string do |
363 | local b = js_string:byte(pos) |
364 | local t = tok[b] |
365 | if not t then |
366 | error("Unexpected character at "..location()..": ".. |
367 | string.char(b).." ("..b..") when reading "..tok.name.."\nContext: \n".. |
368 | js_string:sub(math.max(1,pos-30),pos+30).."\n"..(" "):rep(pos+math.min(-1,30-pos)).."^") |
369 | end |
370 | pos = pos + 1 |
371 | if t~=tt_ignore then return t end |
372 | end |
373 | error("unexpected termination of JSON while looking for "..tok.name) |
374 | end |
375 | |
376 | -- read a string, double and single quoted ones |
377 | local function read_string (tok) |
378 | local start = pos |
379 | --local returnString = {} |
380 | repeat |
381 | local t = next_token(tok) |
382 | if t == c_esc then |
383 | --table.insert(returnString, js_string:sub(start, pos-2)) |
384 | --table.insert(returnString, escapechar[ js_string:byte(pos) ]) |
385 | pos = pos + 1 |
386 | --start = pos |
387 | end -- jump over escaped chars, no matter what |
388 | until t == true |
389 | return (base.loadstring("return " .. js_string:sub(start-1, pos-1) ) ()) |
390 | |
391 | -- We consider the situation where no escaped chars were encountered separately, |
392 | -- and use the fastest possible return in this case. |
393 | |
394 | --if 0 == #returnString then |
395 | -- return js_string:sub(start,pos-2) |
396 | --else |
397 | -- table.insert(returnString, js_string:sub(start,pos-2)) |
398 | -- return table.concat(returnString,""); |
399 | --end |
400 | --return js_string:sub(start,pos-2) |
401 | end |
402 | |
403 | local function read_num () |
404 | local start = pos |
405 | while pos <= #js_string do |
406 | local b = js_string:byte(pos) |
407 | if not tt_numeric[b] then break end |
408 | pos = pos + 1 |
409 | end |
410 | return tonumber(js_string:sub(start-1,pos-1)) |
411 | end |
412 | |
413 | -- read_bool and read_null are both making an assumption that I have not tested: |
414 | -- I would expect that the string extraction is more expensive than actually |
415 | -- making manual comparision of the byte values |
416 | local function read_bool () |
417 | pos = pos + 3 |
418 | local a,b,c,d = js_string:byte(pos-3,pos) |
419 | if a == c_r and b == c_u and c == c_e then return true end |
420 | pos = pos + 1 |
421 | if a ~= c_a or b ~= c_l or c ~= c_s or d ~= c_e then |
422 | error("Invalid boolean: "..js_string:sub(math.max(1,pos-5),pos+5)) |
423 | end |
424 | return false |
425 | end |
426 | |
427 | -- same as read_bool: only last |
428 | local function read_null () |
429 | pos = pos + 3 |
430 | local u,l1,l2 = js_string:byte(pos-3,pos-1) |
431 | if u == c_u and l1 == c_l and l2 == c_l then return nil end |
432 | error("Invalid value (expected null):"..js_string:sub(pos-4,pos-1).. |
433 | " ("..js_string:byte(pos-1).."="..js_string:sub(pos-1,pos-1).." / "..c_l..")") |
434 | end |
435 | |
436 | local read_object_value,read_object_key,read_array,read_value,read_comment |
437 | |
438 | -- read a value depending on what token was returned, might require info what was used (in case of comments) |
439 | function read_value (t,fromt) |
440 | if t == tt_object_key then return read_object_key({}) end |
441 | if t == tt_array_seperator then return read_array({}) end |
442 | if t == tt_singlequote_string or |
443 | t == tt_doublequote_string then return read_string(t) end |
444 | if t == tt_numeric then return read_num() end |
445 | if t == tt_boolean then return read_bool() end |
446 | if t == tt_null then return read_null() end |
447 | if t == tt_comment_start then return read_value(read_comment(fromt)) end |
448 | error("unexpected termination - "..js_string:sub(math.max(1,pos-10),pos+10)) |
449 | end |
450 | |
451 | -- read comments until something noncomment like surfaces, using the token reader which was |
452 | -- used when stumbling over this comment |
453 | function read_comment (fromt) |
454 | while true do |
455 | next_token(tt_comment_start) |
456 | while true do |
457 | local t = next_token(tt_comment_middle) |
458 | if next_byte() == c_slash then |
459 | local t = next_token(fromt) |
460 | if t~= tt_comment_start then return t end |
461 | break |
462 | end |
463 | end |
464 | end |
465 | end |
466 | |
467 | -- read arrays, empty array expected as o arg |
468 | function read_array (o,i) |
469 | --if not i then status "arr open" end |
470 | i = i or 1 |
471 | -- loop until ... |
472 | while true do |
473 | o[i] = read_value(next_token(tt_array_seperator),tt_array_seperator) |
474 | local t = next_token(tt_array_value) |
475 | if t == tt_comment_start then |
476 | t = read_comment(tt_array_value) |
477 | end |
478 | if t == true then -- ... we found a terminator token |
479 | --status "arr close" |
480 | return o |
481 | end |
482 | i = i + 1 |
483 | end |
484 | end |
485 | |
486 | -- object value reading |
487 | function read_object_value (o) |
488 | local t = next_token(tt_object_value) |
489 | return read_value(t,tt_object_value) |
490 | end |
491 | |
492 | -- object key reading, might also terminate the object |
493 | function read_object_key (o) |
494 | while true do |
495 | local t = next_token(tt_object_key) |
496 | if t == tt_comment_start then |
497 | t = read_comment(tt_object_key) |
498 | end |
499 | if t == true then return o end |
500 | if t == tt_object_key then return read_object_key(o) end |
501 | local k = read_string(t) |
502 | |
503 | if next_token(tt_object_colon) == tt_comment_start then |
504 | t = read_comment(tt_object_colon) |
505 | end |
506 | |
507 | local v = read_object_value(o) |
508 | o[k] = v |
509 | end |
510 | end |
511 | |
512 | -- now let's read data from our string and pretend it's an object value |
513 | local r = read_object_value() |
514 | if pos<=#js_string then |
515 | -- not sure about what to do with dangling characters |
516 | --error("Dangling characters in JSON code ("..location()..")") |
517 | end |
518 | |
519 | return r |
520 | end |
521 | end |