Slightly more informative closing message
[scpubgit/stemmatology.git] / analysis / idp_server / json.lua
CommitLineData
51650966 1-----------------------------------------------------------------------------
2-- JSON4Lua: JSON encoding / decoding support for the Lua language.
3-- json Module.
4-- Author: Craig Mason-Jones
5-- Homepage: http://json.luaforge.net/
6-- Version: 0.9.50
7-- This module is released under the MIT License (MIT).
8-- Please see LICENCE.txt for details.
9--
10-- USAGE:
11-- This module exposes two functions:
12-- encode(o)
13-- Returns the table / string / boolean / number / nil / json.null value as a JSON-encoded string.
14-- decode(json_string)
15-- Returns a Lua object populated with the data encoded in the JSON string json_string.
16--
17-- REQUIREMENTS:
18-- compat-5.1 if using Lua 5.0
19--
20-- CHANGELOG
21-- 0.9.50 Radical performance improvement on decode from Eike Decker. Many thanks!
22-- 0.9.40 Changed licence to MIT License (MIT)
23-- 0.9.20 Introduction of local Lua functions for private functions (removed _ function prefix).
24-- Fixed Lua 5.1 compatibility issues.
25-- Introduced json.null to have null values in associative arrays.
26-- encode() performance improvement (more than 50%) through table.concat rather than ..
27-- Introduced decode ability to ignore /**/ comments in the JSON string.
28-- 0.9.10 Fix to array encoding / decoding to correctly manage nil/null values in arrays.
29-----------------------------------------------------------------------------
30
31-----------------------------------------------------------------------------
32-- Imports and dependencies
33-----------------------------------------------------------------------------
34local math = require('math')
35local string = require("string")
36local table = require("table")
37local tostring = tostring
38
39local base = _G
40
41-----------------------------------------------------------------------------
42-- Module declaration
43-----------------------------------------------------------------------------
44module("json")
45
46-- Public functions
47
48-- Private functions
49local decode_scanArray
50local decode_scanComment
51local decode_scanConstant
52local decode_scanNumber
53local decode_scanObject
54local decode_scanString
55local decode_scanWhitespace
56local encodeString
57local isArray
58local isEncodable
59
60-----------------------------------------------------------------------------
61-- PUBLIC FUNCTIONS
62-----------------------------------------------------------------------------
63--- Encodes an arbitrary Lua object / variable.
64-- @param v The Lua object / variable to be JSON encoded.
65-- @return String containing the JSON encoding in internal Lua string format (i.e. not unicode)
66function encode (v)
67 -- Handle nil values
68 if v==nil then
69 return "null"
70 end
71
72 local vtype = base.type(v)
73
74 -- Handle strings
75 if vtype=='string' then
76 return '"' .. encodeString(v) .. '"' -- Need to handle encoding in string
77 end
78
79 -- Handle booleans
80 if vtype=='number' or vtype=='boolean' then
81 return base.tostring(v)
82 end
83
84 -- Handle tables
85 if vtype=='table' then
86 local rval = {}
87 -- Consider arrays separately
88 local bArray, maxCount = isArray(v)
89 if bArray then
90 for i = 1,maxCount do
91 table.insert(rval, encode(v[i]))
92 end
93 else -- An object, not an array
94 for i,j in base.pairs(v) do
95 if isEncodable(i) and isEncodable(j) then
96 table.insert(rval, '"' .. encodeString(i) .. '":' .. encode(j))
97 end
98 end
99 end
100 if bArray then
101 return '[' .. table.concat(rval,',') ..']'
102 else
103 return '{' .. table.concat(rval,',') .. '}'
104 end
105 end
106
107 -- Handle null values
108 if vtype=='function' and v==null then
109 return 'null'
110 end
111
112 base.assert(false,'encode attempt to encode unsupported type ' .. vtype .. ':' .. base.tostring(v))
113end
114
115
116--- Decodes a JSON string and returns the decoded value as a Lua data structure / value.
117-- @param s The string to scan.
118-- @return Lua objectthat was scanned, as a Lua table / string / number / boolean or nil.
119function decode(s)
120 -- Function is re-defined below after token and other items are created.
121 -- Just defined here for code neatness.
122 return null
123end
124
125--- The null function allows one to specify a null value in an associative array (which is otherwise
126-- discarded if you set the value with 'nil' in Lua. Simply set t = { first=json.null }
127function null()
128 return null -- so json.null() will also return null ;-)
129end
130
131-----------------------------------------------------------------------------
132-- Internal, PRIVATE functions.
133-----------------------------------------------------------------------------
134
135--- Encodes a string to be JSON-compatible.
136-- This just involves back-quoting inverted commas, back-quotes and newlines, I think ;-)
137-- @param s The string to return as a JSON encoded (i.e. backquoted string)
138-- @return The string appropriately escaped.
139local qrep = {["\\"]="\\\\", ['"']='\\"',['\n']='\\n',['\t']='\\t'}
140function encodeString(s)
141 return tostring(s):gsub('["\\\n\t]',qrep)
142end
143
144-- Determines whether the given Lua type is an array or a table / dictionary.
145-- We consider any table an array if it has indexes 1..n for its n items, and no
146-- other data in the table.
147-- I think this method is currently a little 'flaky', but can't think of a good way around it yet...
148-- @param t The table to evaluate as an array
149-- @return boolean, number True if the table can be represented as an array, false otherwise. If true,
150-- the second returned value is the maximum
151-- number of indexed elements in the array.
152function isArray(t)
153 -- Next we count all the elements, ensuring that any non-indexed elements are not-encodable
154 -- (with the possible exception of 'n')
155 local maxIndex = 0
156 for k,v in base.pairs(t) do
157 if (base.type(k)=='number' and math.floor(k)==k and 1<=k) then -- k,v is an indexed pair
158 if (not isEncodable(v)) then return false end -- All array elements must be encodable
159 maxIndex = math.max(maxIndex,k)
160 else
161 if (k=='n') then
162 if v ~= table.getn(t) then return false end -- False if n does not hold the number of elements
163 else -- Else of (k=='n')
164 if isEncodable(v) then return false end
165 end -- End of (k~='n')
166 end -- End of k,v not an indexed pair
167 end -- End of loop across all pairs
168 return true, maxIndex
169end
170
171--- Determines whether the given Lua object / table / variable can be JSON encoded. The only
172-- types that are JSON encodable are: string, boolean, number, nil, table and json.null.
173-- In this implementation, all other types are ignored.
174-- @param o The object to examine.
175-- @return boolean True if the object should be JSON encoded, false if it should be ignored.
176function isEncodable(o)
177 local t = base.type(o)
178 return (t=='string' or t=='boolean' or t=='number' or t=='nil' or t=='table') or (t=='function' and o==null)
179end
180
181-- Radical performance improvement for decode from Eike Decker!
182do
183 local type = base.type
184 local error = base.error
185 local assert = base.assert
186 local print = base.print
187 local tonumber = base.tonumber
188 -- initialize some values to be used in decoding function
189
190 -- initializes a table to contain a byte=>table mapping
191 -- the table contains tokens (byte values) as keys and maps them on other
192 -- token tables (mostly, the boolean value 'true' is used to indicate termination
193 -- of a token sequence)
194 -- the token table's purpose is, that it allows scanning a sequence of bytes
195 -- until something interesting has been found (e.g. a token that is not expected)
196 -- name is a descriptor for the table to be printed in error messages
197 local function init_token_table (tt)
198 local struct = {}
199 local value
200 function struct:link(other_tt)
201 value = other_tt
202 return struct
203 end
204 function struct:to(chars)
205 for i=1,#chars do
206 tt[chars:byte(i)] = value
207 end
208 return struct
209 end
210 return function (name)
211 tt.name = name
212 return struct
213 end
214 end
215
216 -- keep "named" byte values at hands
217 local
218 c_esc,
219 c_e,
220 c_l,
221 c_r,
222 c_u,
223 c_f,
224 c_a,
225 c_s,
226 c_slash = ("\\elrufas/"):byte(1,9)
227
228 -- token tables - tt_doublequote_string = strDoubleQuot, tt_singlequote_string = strSingleQuot
229 local
230 tt_object_key,
231 tt_object_colon,
232 tt_object_value,
233 tt_doublequote_string,
234 tt_singlequote_string,
235 tt_array_value,
236 tt_array_seperator,
237 tt_numeric,
238 tt_boolean,
239 tt_null,
240 tt_comment_start,
241 tt_comment_middle,
242 tt_ignore --< tt_ignore is special - marked tokens will be tt_ignored
243 = {},{},{},{},{},{},{},{},{},{},{},{},{}
244
245 -- strings to be used in certain token tables
246 local strchars = "" -- all valid string characters (all except newlines)
247 local allchars = "" -- all characters that are valid in comments
248 --local escapechar = {}
249 for i=0,0xff do
250 local c = string.char(i)
251 if c~="\n" and c~="\r" then strchars = strchars .. c end
252 allchars = allchars .. c
253 --escapechar[i] = "\\" .. string.char(i)
254 end
255
256--[[
257 charstounescape = "\"\'\\bfnrt/";
258 unescapechars = "\"'\\\b\f\n\r\t\/";
259 for i=1,#charstounescape do
260 escapechar[ charstounescape:byte(i) ] = unescapechars:sub(i,i)
261 end
262]]--
263
264 -- obj key reader, expects the end of the object or a quoted string as key
265 init_token_table (tt_object_key) "object (' or \" or } or , expected)"
266 :link(tt_singlequote_string) :to "'"
267 :link(tt_doublequote_string) :to '"'
268 :link(true) :to "}"
269 :link(tt_object_key) :to ","
270 :link(tt_comment_start) :to "/"
271 :link(tt_ignore) :to " \t\r\n"
272
273
274 -- after the key, a colon is expected (or comment)
275 init_token_table (tt_object_colon) "object (: expected)"
276 :link(tt_object_value) :to ":"
277 :link(tt_comment_start) :to "/"
278 :link(tt_ignore) :to" \t\r\n"
279
280 -- as values, anything is possible, numbers, arrays, objects, boolean, null, strings
281 init_token_table (tt_object_value) "object ({ or [ or ' or \" or number or boolean or null expected)"
282 :link(tt_object_key) :to "{"
283 :link(tt_array_seperator) :to "["
284 :link(tt_singlequote_string) :to "'"
285 :link(tt_doublequote_string) :to '"'
286 :link(tt_numeric) :to "0123456789.-"
287 :link(tt_boolean) :to "tf"
288 :link(tt_null) :to "n"
289 :link(tt_comment_start) :to "/"
290 :link(tt_ignore) :to " \t\r\n"
291
292 -- token tables for reading strings
293 init_token_table (tt_doublequote_string) "double quoted string"
294 :link(tt_ignore) :to (strchars)
295 :link(c_esc) :to "\\"
296 :link(true) :to '"'
297
298 init_token_table (tt_singlequote_string) "single quoted string"
299 :link(tt_ignore) :to (strchars)
300 :link(c_esc) :to "\\"
301 :link(true) :to "'"
302
303 -- array reader that expects termination of the array or a comma that indicates the next value
304 init_token_table (tt_array_value) "array (, or ] expected)"
305 :link(tt_array_seperator) :to ","
306 :link(true) :to "]"
307 :link(tt_comment_start) :to "/"
308 :link(tt_ignore) :to " \t\r\n"
309
310 -- a value, pretty similar to tt_object_value
311 init_token_table (tt_array_seperator) "array ({ or [ or ' or \" or number or boolean or null expected)"
312 :link(tt_object_key) :to "{"
313 :link(tt_array_seperator) :to "["
314 :link(tt_singlequote_string) :to "'"
315 :link(tt_doublequote_string) :to '"'
316 :link(tt_comment_start) :to "/"
317 :link(tt_numeric) :to "0123456789.-"
318 :link(tt_boolean) :to "tf"
319 :link(tt_null) :to "n"
320 :link(tt_ignore) :to " \t\r\n"
321
322 -- valid number tokens
323 init_token_table (tt_numeric) "number"
324 :link(tt_ignore) :to "0123456789.-Ee"
325
326 -- once a comment has been started with /, a * is expected
327 init_token_table (tt_comment_start) "comment start (* expected)"
328 :link(tt_comment_middle) :to "*"
329
330 -- now everything is allowed, watch out for * though. The next char is then checked manually
331 init_token_table (tt_comment_middle) "comment end"
332 :link(tt_ignore) :to (allchars)
333 :link(true) :to "*"
334
335 function decode (js_string)
336 local pos = 1 -- position in the string
337
338 -- read the next byte value
339 local function next_byte () pos = pos + 1 return js_string:byte(pos-1) end
340
341 -- in case of error, report the location using line numbers
342 local function location ()
343 local n = ("\n"):byte()
344 local line,lpos = 1,0
345 for i=1,pos do
346 if js_string:byte(i) == n then
347 line,lpos = line + 1,1
348 else
349 lpos = lpos + 1
350 end
351 end
352 return "Line "..line.." character "..lpos
353 end
354
355 -- debug func
356 --local function status (str)
357 -- print(str.." ("..s:sub(math.max(1,p-10),p+10)..")")
358 --end
359
360 -- read the next token, according to the passed token table
361 local function next_token (tok)
362 while pos <= #js_string do
363 local b = js_string:byte(pos)
364 local t = tok[b]
365 if not t then
366 error("Unexpected character at "..location()..": "..
367 string.char(b).." ("..b..") when reading "..tok.name.."\nContext: \n"..
368 js_string:sub(math.max(1,pos-30),pos+30).."\n"..(" "):rep(pos+math.min(-1,30-pos)).."^")
369 end
370 pos = pos + 1
371 if t~=tt_ignore then return t end
372 end
373 error("unexpected termination of JSON while looking for "..tok.name)
374 end
375
376 -- read a string, double and single quoted ones
377 local function read_string (tok)
378 local start = pos
379 --local returnString = {}
380 repeat
381 local t = next_token(tok)
382 if t == c_esc then
383 --table.insert(returnString, js_string:sub(start, pos-2))
384 --table.insert(returnString, escapechar[ js_string:byte(pos) ])
385 pos = pos + 1
386 --start = pos
387 end -- jump over escaped chars, no matter what
388 until t == true
389 return (base.loadstring("return " .. js_string:sub(start-1, pos-1) ) ())
390
391 -- We consider the situation where no escaped chars were encountered separately,
392 -- and use the fastest possible return in this case.
393
394 --if 0 == #returnString then
395 -- return js_string:sub(start,pos-2)
396 --else
397 -- table.insert(returnString, js_string:sub(start,pos-2))
398 -- return table.concat(returnString,"");
399 --end
400 --return js_string:sub(start,pos-2)
401 end
402
403 local function read_num ()
404 local start = pos
405 while pos <= #js_string do
406 local b = js_string:byte(pos)
407 if not tt_numeric[b] then break end
408 pos = pos + 1
409 end
410 return tonumber(js_string:sub(start-1,pos-1))
411 end
412
413 -- read_bool and read_null are both making an assumption that I have not tested:
414 -- I would expect that the string extraction is more expensive than actually
415 -- making manual comparision of the byte values
416 local function read_bool ()
417 pos = pos + 3
418 local a,b,c,d = js_string:byte(pos-3,pos)
419 if a == c_r and b == c_u and c == c_e then return true end
420 pos = pos + 1
421 if a ~= c_a or b ~= c_l or c ~= c_s or d ~= c_e then
422 error("Invalid boolean: "..js_string:sub(math.max(1,pos-5),pos+5))
423 end
424 return false
425 end
426
427 -- same as read_bool: only last
428 local function read_null ()
429 pos = pos + 3
430 local u,l1,l2 = js_string:byte(pos-3,pos-1)
431 if u == c_u and l1 == c_l and l2 == c_l then return nil end
432 error("Invalid value (expected null):"..js_string:sub(pos-4,pos-1)..
433 " ("..js_string:byte(pos-1).."="..js_string:sub(pos-1,pos-1).." / "..c_l..")")
434 end
435
436 local read_object_value,read_object_key,read_array,read_value,read_comment
437
438 -- read a value depending on what token was returned, might require info what was used (in case of comments)
439 function read_value (t,fromt)
440 if t == tt_object_key then return read_object_key({}) end
441 if t == tt_array_seperator then return read_array({}) end
442 if t == tt_singlequote_string or
443 t == tt_doublequote_string then return read_string(t) end
444 if t == tt_numeric then return read_num() end
445 if t == tt_boolean then return read_bool() end
446 if t == tt_null then return read_null() end
447 if t == tt_comment_start then return read_value(read_comment(fromt)) end
448 error("unexpected termination - "..js_string:sub(math.max(1,pos-10),pos+10))
449 end
450
451 -- read comments until something noncomment like surfaces, using the token reader which was
452 -- used when stumbling over this comment
453 function read_comment (fromt)
454 while true do
455 next_token(tt_comment_start)
456 while true do
457 local t = next_token(tt_comment_middle)
458 if next_byte() == c_slash then
459 local t = next_token(fromt)
460 if t~= tt_comment_start then return t end
461 break
462 end
463 end
464 end
465 end
466
467 -- read arrays, empty array expected as o arg
468 function read_array (o,i)
469 --if not i then status "arr open" end
470 i = i or 1
471 -- loop until ...
472 while true do
473 o[i] = read_value(next_token(tt_array_seperator),tt_array_seperator)
474 local t = next_token(tt_array_value)
475 if t == tt_comment_start then
476 t = read_comment(tt_array_value)
477 end
478 if t == true then -- ... we found a terminator token
479 --status "arr close"
480 return o
481 end
482 i = i + 1
483 end
484 end
485
486 -- object value reading
487 function read_object_value (o)
488 local t = next_token(tt_object_value)
489 return read_value(t,tt_object_value)
490 end
491
492 -- object key reading, might also terminate the object
493 function read_object_key (o)
494 while true do
495 local t = next_token(tt_object_key)
496 if t == tt_comment_start then
497 t = read_comment(tt_object_key)
498 end
499 if t == true then return o end
500 if t == tt_object_key then return read_object_key(o) end
501 local k = read_string(t)
502
503 if next_token(tt_object_colon) == tt_comment_start then
504 t = read_comment(tt_object_colon)
505 end
506
507 local v = read_object_value(o)
508 o[k] = v
509 end
510 end
511
512 -- now let's read data from our string and pretend it's an object value
513 local r = read_object_value()
514 if pos<=#js_string then
515 -- not sure about what to do with dangling characters
516 --error("Dangling characters in JSON code ("..location()..")")
517 end
518
519 return r
520 end
521end