2 Licensed according to the included 'LICENSE' document
3 Author: Thomas Harning Jr <harningt@gmail.com>
5 local lpeg
= require("lpeg")
6 local jsonutil
= require("json.util")
7 local util
= require("json.decode.util")
8 local merge
= jsonutil
.merge
10 local tonumber = tonumber
11 local string_char
= require("string").char
12 local floor = require("math").floor
13 local table_concat
= require("table").concat
19 local function get_error(item
)
20 local fmt_string
= item
.. " in string [%q] @ %i:%i"
21 return lpeg
.P(function(data
, index
)
22 local line
, line_index
, bad_char
, last_line
= util
.get_invalid_character_info(data
, index
)
23 local err
= fmt_string
:format(bad_char
, line
, line_index
)
28 local bad_unicode
= get_error("Illegal unicode escape")
29 local bad_hex
= get_error("Illegal hex escape")
30 local bad_character
= get_error("Illegal character")
31 local bad_escape
= get_error("Illegal escape")
33 local knownReplacements
= {
47 -- according to the table at http://da.wikipedia.org/wiki/UTF-8
48 local function utf8DecodeUnicode(code1
, code2
)
49 code1
, code2
= tonumber(code1
, 16), tonumber(code2
, 16)
50 if code1
== 0 and code2
< 0x80 then
51 return string_char(code2
)
55 0xC0 + code1
* 4 + floor(code2
/ 64),
59 0xE0 + floor(code1
/ 16),
60 0x80 + (code1
% 16) * 4 + floor(code2
/ 64),
64 local function decodeX(code
)
65 code
= tonumber(code
, 16)
66 return string_char(code
)
69 local doSimpleSub
= lpeg
.C(lpeg
.S("'\"\\/bfnrtvz")) / knownReplacements
70 local doUniSub
= lpeg
.P('u') * (lpeg
.C(util
.hexpair
) * lpeg
.C(util
.hexpair
) + bad_unicode
)
71 local doXSub
= lpeg
.P('x') * (lpeg
.C(util
.hexpair
) + bad_hex
)
73 local defaultOptions
= {
75 additionalEscapes
= false, -- disallow untranslated escapes
76 escapeCheck
= #lpeg
.S('bfnrtv/\\"xu\'z'), -- no check on valid characters
77 decodeUnicode
= utf8DecodeUnicode
,
81 local modeOptions
= {}
83 modeOptions
.strict
= {
84 badChars
= '\b\f\n\r\t\v',
85 additionalEscapes
= false, -- no additional escapes
86 escapeCheck
= #lpeg
.S('bfnrtv/\\"u'), --only these chars are allowed to be escaped
90 local function mergeOptions(options
, mode
)
91 jsonutil
.doOptionMerge(options
, false, 'strings', defaultOptions
, mode
and modeOptions
[mode
])
94 local function buildCaptureString(quote
, badChars
, escapeMatch
)
95 local captureChar
= (1 - lpeg
.S("\\" .. badChars
.. quote
)) + (lpeg
.P("\\") / "" * escapeMatch
)
96 -- During error, force end
97 local captureString
= captureChar^
0 + (-#lpeg
.P(quote
) * bad_character
+ -1)
98 return lpeg
.P(quote
) * lpeg
.Cs(captureString
) * lpeg
.P(quote
)
101 local function generateLexer(options
)
102 options
= options
.strings
103 local quotes
= { '"' }
104 if not options
.strict_quotes
then
105 quotes
[#quotes
+ 1] = "'"
107 local escapeMatch
= doSimpleSub
108 escapeMatch
= escapeMatch
+ doXSub
/ decodeX
109 escapeMatch
= escapeMatch
+ doUniSub
/ options
.decodeUnicode
110 if options
.escapeCheck
then
111 escapeMatch
= options
.escapeCheck
* escapeMatch
+ bad_escape
113 if options
.additionalEscapes
then
114 escapeMatch
= options
.additionalEscapes
+ escapeMatch
117 for i
= 1, #quotes
do
118 local cap
= buildCaptureString(quotes
[i
], options
.badChars
, escapeMatch
)
119 if captureString
== nil then
122 captureString
= captureString
+ cap
129 mergeOptions
= mergeOptions
,
130 generateLexer
= generateLexer