2 Licensed according to the included 'LICENSE' document
3 Author: Thomas Harning Jr <harningt@gmail.com>
5 local lpeg
= require("lpeg")
6 local util
= require("json.decode.util")
7 local merge
= require("json.util").merge
9 local tonumber = tonumber
11 local string_char
= string.char
12 local floor = math
.floor
13 local table_concat
= table.concat
16 module("json.decode.strings")
17 local function get_error(item
)
18 local fmt_string
= item
.. " in string [%q] @ %i:%i"
19 return lpeg
.P(function(data
, index
)
20 local line
, line_index
, bad_char
, last_line
= util
.get_invalid_character_info(data
, index
)
21 local err
= fmt_string
:format(bad_char
, line
, line_index
)
26 local bad_unicode
= get_error("Illegal unicode escape")
27 local bad_hex
= get_error("Illegal hex escape")
28 local bad_character
= get_error("Illegal character")
29 local bad_escape
= get_error("Illegal escape")
31 local knownReplacements
= {
45 -- according to the table at http://da.wikipedia.org/wiki/UTF-8
46 local function utf8DecodeUnicode(code1
, code2
)
47 code1
, code2
= tonumber(code1
, 16), tonumber(code2
, 16)
48 if code1
== 0 and code2
< 0x80 then
49 return string_char(code2
)
53 0xC0 + code1
* 4 + floor(code2
/ 64),
57 0xE0 + floor(code1
/ 16),
58 0x80 + (code1
% 16) * 4 + floor(code2
/ 64),
62 local function decodeX(code
)
63 code
= tonumber(code
, 16)
64 return string_char(code
)
67 local doSimpleSub
= lpeg
.C(lpeg
.S("'\"\\/bfnrtvz")) / knownReplacements
68 local doUniSub
= lpeg
.P('u') * (lpeg
.C(util
.hexpair
) * lpeg
.C(util
.hexpair
) + bad_unicode
)
69 local doXSub
= lpeg
.P('x') * (lpeg
.C(util
.hexpair
) + bad_hex
)
71 local defaultOptions
= {
73 additionalEscapes
= false, -- disallow untranslated escapes
74 escapeCheck
= #lpeg
.S('bfnrtv/\\"xu\'z'), -- no check on valid characters
75 decodeUnicode
= utf8DecodeUnicode
,
79 default
= nil -- Let the buildCapture optimization take place
82 badChars
= '\b\f\n\r\t\v',
83 additionalEscapes
= false, -- no additional escapes
84 escapeCheck
= #lpeg
.S('bfnrtv/\\"u'), --only these chars are allowed to be escaped
88 local function buildCaptureString(quote
, badChars
, escapeMatch
)
89 local captureChar
= (1 - lpeg
.S("\\" .. badChars
.. quote
)) + (lpeg
.P("\\") / "" * escapeMatch
)
90 -- During error, force end
91 local captureString
= captureChar^
0 + (-#lpeg
.P(quote
) * bad_character
+ -1)
92 return lpeg
.P(quote
) * lpeg
.Cs(captureString
) * lpeg
.P(quote
)
95 local function buildCapture(options
)
96 options
= options
and merge({}, defaultOptions
, options
) or defaultOptions
97 local quotes
= { '"' }
98 if not options
.strict_quotes
then
99 quotes
[#quotes
+ 1] = "'"
101 local escapeMatch
= doSimpleSub
102 escapeMatch
= escapeMatch
+ doXSub
/ decodeX
103 escapeMatch
= escapeMatch
+ doUniSub
/ options
.decodeUnicode
104 if options
.additionalEscapes
then
105 escapeMatch
= escapeMatch
+ options
.additionalEscapes
107 if options
.escapeCheck
then
108 escapeMatch
= options
.escapeCheck
* escapeMatch
+ bad_escape
111 for i
= 1, #quotes
do
112 local cap
= buildCaptureString(quotes
[i
], options
.badChars
, escapeMatch
)
113 if captureString
== nil then
116 captureString
= captureString
+ cap
122 function register_types()
123 util
.register_type("STRING")
126 function load_types(options
, global_options
, grammar
)
127 local capture
= buildCapture(options
)
128 local string_id
= util
.types
.STRING
129 grammar
[string_id
] = capture
130 util
.append_grammar_item(grammar
, "VALUE", lpeg
.V(string_id
))