-
Notifications
You must be signed in to change notification settings - Fork 9
/
json.lua
402 lines (386 loc) · 10 KB
/
json.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
local modlib, setmetatable, pairs, assert, error, table_insert, table_concat, tonumber, tostring, math_huge, string, type, next
= modlib, setmetatable, pairs, assert, error, table.insert, table.concat, tonumber, tostring, math.huge, string, type, next
local _ENV = {}
setfenv(1, _ENV)
-- See https://tools.ietf.org/id/draft-ietf-json-rfc4627bis-09.html#unichars and https://json.org
-- Null
-- TODO consider using userdata (for ex. by using newproxy)
do
local metatable = {}
-- eq is not among the metamethods, len won't work on 5.1
for _, metamethod in pairs{"add", "sub", "mul", "div", "mod", "pow", "unm", "concat", "len", "lt", "le", "index", "newindex", "call"} do
metatable["__" .. metamethod] = function() return error("attempt to " .. metamethod .. " a null value") end
end
null = setmetatable({}, metatable)
end
local metatable = {__index = _ENV}
_ENV.metatable = metatable
function new(self)
return setmetatable(self, metatable)
end
local whitespace = modlib.table.set{"\t", "\r", "\n", " "}
local decoding_escapes = {
['"'] = '"',
["\\"] = "\\",
["/"] = "/",
b = "\b",
f = "\f",
n = "\n",
r = "\r",
t = "\t"
-- TODO is this complete?
}
-- Set up a DFA for number syntax validations
local number_dfa
do -- as a RegEx: (0|(1-9)(0-9)*)[.(0-9)+[(e|E)[+|-](0-9)+]]; does not need to handle the first sign
-- TODO proper DFA utilities
local function set_transitions(state, transitions)
for chars, next_state in pairs(transitions) do
for char in chars:gmatch"." do
state[char] = next_state
end
end
end
local onenine = "123456789"
local digit = "0" .. onenine
local e = "eE"
local exponent = {final = true}
set_transitions(exponent, {
[digit] = exponent
})
local pre_exponent = {expected = "exponent"}
set_transitions(pre_exponent, {
[digit] = exponent
})
local exponent_sign = {expected = "exponent"}
set_transitions(exponent_sign, {
[digit] = exponent,
["+"] = exponent,
["-"] = exponent
})
local fraction_final = {final = true}
set_transitions(fraction_final, {
[digit] = fraction_final,
[e] = exponent_sign
})
local fraction = {expected = "fraction"}
set_transitions(fraction, {
[digit] = fraction_final
})
local integer = {final = true}
set_transitions(integer, {
[digit] = integer,
[e] = exponent_sign,
["."] = fraction
})
local zero = {final = true}
set_transitions(zero, {
["."] = fraction
})
number_dfa = {}
set_transitions(number_dfa, {
[onenine] = integer,
["0"] = zero
})
end
local hex_digit_values = {}
for i = 0, 9 do
hex_digit_values[tostring(i)] = i
end
for i = 0, 5 do
hex_digit_values[string.char(("a"):byte() + i)] = 10 + i
hex_digit_values[string.char(("A"):byte() + i)] = 10 + i
end
-- TODO SAX vs DOM
local utf8_char = modlib.utf8.char
function read(self, read_)
local index = 0
local char
-- TODO support read functions which provide additional debug output (such as row:column)
local function read()
index = index + 1
char = read_()
return char
end
local function syntax_error(errmsg)
-- TODO ensure the index isn't off
error("syntax error: " .. index .. ": " .. errmsg)
end
local function syntax_assert(value, errmsg)
if not value then
syntax_error(errmsg or "assertion failed!")
end
return value
end
local function skip_whitespace()
while whitespace[char] do
read()
end
end
-- Forward declaration
local value
local function number()
local state = number_dfa
local num = {}
while true do
-- Will work for nil too
local next_state = state[char]
if not next_state then
if not state.final then
if state == number_dfa then
syntax_error"expected a number"
end
syntax_error("invalid number: expected " .. state.expected)
end
return assert(tonumber(table_concat(num)))
end
table_insert(num, char)
state = next_state
read()
end
end
local function utf8_codepoint(codepoint)
return syntax_assert(utf8_char(codepoint), "invalid codepoint")
end
local function string()
local chars = {}
local high_surrogate
while true do
local string_char, next_high_surrogate
if char == '"' then
if high_surrogate then
table_insert(chars, utf8_codepoint(high_surrogate))
end
return table_concat(chars)
end
if char == "\\" then
read()
if char == "u" then
local codepoint = 0
for i = 3, 0, -1 do
codepoint = syntax_assert(hex_digit_values[read()], "expected a hex digit") * (16 ^ i) + codepoint
end
if high_surrogate and codepoint >= 0xDC00 and codepoint <= 0xDFFF then
-- TODO strict mode: throw an error for single surrogates
codepoint = 0x10000 + (high_surrogate - 0xD800) * 0x400 + codepoint - 0xDC00
-- Don't write the high surrogate
high_surrogate = nil
end
if codepoint >= 0xD800 and codepoint <= 0xDBFF then
next_high_surrogate = codepoint
else
string_char = utf8_codepoint(codepoint)
end
else
string_char = syntax_assert(decoding_escapes[char], "invalid escape sequence")
end
else
-- TODO check whether the character is one that must be escaped ("strict" mode)
string_char = syntax_assert(char, "unclosed string")
end
if high_surrogate then
table_insert(chars, utf8_codepoint(high_surrogate))
end
high_surrogate = next_high_surrogate
if string_char then
table_insert(chars, string_char)
end
read()
end
end
local element
local funcs = {
['-'] = function()
return -number()
end,
['"'] = string,
["{"] = function()
local dict = {}
skip_whitespace()
if char == "}" then return dict end
while true do
syntax_assert(char == '"', "key expected")
read()
local key = string()
read()
skip_whitespace()
syntax_assert(char == ":", "colon expected, got " .. char)
local val = element()
dict[key] = val
if char == "}" then return dict end
syntax_assert(char == ",", "comma expected")
read()
skip_whitespace()
end
end,
["["] = function()
local list = {}
skip_whitespace()
if char == "]" then return list end
while true do
table_insert(list, value())
skip_whitespace()
if char == "]" then return list end
syntax_assert(char == ",", "comma expected")
read()
skip_whitespace()
end
end,
}
local function expect_word(word, value)
local msg = word .. " expected"
funcs[word:sub(1, 1)] = function()
syntax_assert(char == word:sub(2, 2), msg)
for i = 3, #word do
read()
syntax_assert(char == word:sub(i, i), msg)
end
return value
end
end
expect_word("true", true)
expect_word("false", false)
expect_word("null", self.null)
function value()
syntax_assert(char, "value expected")
local func = funcs[char]
if func then
-- Advance after first char
read()
local val = func()
-- Advance after last char
read()
return val
end
if char >= "0" and char <= "9" then
return number()
end
syntax_error"value expected"
end
function element()
read()
skip_whitespace()
local val = value()
skip_whitespace()
return val
end
-- TODO consider asserting EOF as read() == nil, perhaps controlled by a parameter
return element()
end
local encoding_escapes = modlib.table.flip(decoding_escapes)
-- Solidus does not need to be escaped
encoding_escapes["/"] = nil
-- Control characters. Note: U+0080 to U+009F and U+007F are not considered control characters.
for byte = 0, 0x1F do
encoding_escapes[string.char(byte)] = string.format("u%04X", byte)
end
modlib.table.map(encoding_escapes, function(str) return "\\" .. str end)
local function escape(str)
return str:gsub(".", encoding_escapes)
end
function write(self, value, write)
local null = self.null
local written_strings = self.cache_escaped_strings and setmetatable({}, {__index = function(self, str)
local escaped_str = escape(str)
self[str] = escaped_str
return escaped_str
end})
local function string(str)
write'"'
write(written_strings and written_strings[str] or escape(str))
return write'"'
end
local dump
local function write_kv(key, value)
assert(type(key) == "string", "not a dictionary")
string(key)
write":"
dump(value)
end
function dump(value)
if value == null then
-- TODO improve null check (checking for equality doesn't allow using nan as null, for instance)
return write"null"
end
if value == true then
return write"true"
end
if value == false then
return write"false"
end
local type_ = type(value)
if type_ == "number" then
assert(value == value, "unsupported number value: nan")
assert(value ~= math_huge, "unsupported number value: inf")
assert(value ~= -math_huge, "unsupported number value: -inf")
return write(("%.17g"):format(value))
end
if type_ == "string" then
return string(value)
end
if type_ == "table" then
local table = value
local len = #table
if len == 0 then
local first, value = next(table)
write"{"
if first ~= nil then
write_kv(first, value)
end
for key, value in next, table, first do
write","
write_kv(key, value)
end
write"}"
else
assert(modlib.table.count(table) == len, "mixed list & hash part")
write"["
for i = 1, len - 1 do
dump(table[i])
write","
end
dump(table[len])
write"]"
end
return
end
error("unsupported type: " .. type_)
end
dump(value)
end
-- TODO get rid of this paste of write_file and write_string (see modlib.luon)
function write_file(self, value, file)
return self:write(value, function(text)
file:write(text)
end)
end
function write_string(self, value)
local rope = {}
self:write(value, function(text)
table_insert(rope, text)
end)
return table_concat(rope)
end
-- TODO read_path (for other serializers too)
function read_file(self, file)
local value = self:read(function()
return file:read(1)
end)
-- TODO consider file:close()
return value
end
function read_string(self, string)
-- TODO move the string -> one char read func pattern to modlib.text
local index = 0
local value = self:read(function()
index = index + 1
if index > #string then
return
end
return string:sub(index, index)
end)
-- We just expect EOF for strings
assert(index > #string, "EOF expected")
return value
end
return _ENV