base: fixes regression test lua path
[luajson.git] / lua / json / decode / strings.lua
blob7fcb595f036b26851a603c37f58bad4eeb2ed3b8
1 --[[
2 Licensed according to the included 'LICENSE' document
3 Author: Thomas Harning Jr <harningt@gmail.com>
4 ]]
5 local lpeg = require("lpeg")
6 local util = require("json.decode.util")
7 local merge = require("json.util").merge
9 local tonumber = tonumber
10 local string = string
11 local string_char = string.char
12 local floor = math.floor
13 local table_concat = table.concat
15 local error = error
16 module("json.decode.strings")
17 local function get_error(item)
18 local fmt_string = item .. " in string [%q] @ %i:%i"
19 return lpeg.P(function(data, index)
20 local line, line_index, bad_char, last_line = util.get_invalid_character_info(data, index)
21 local err = fmt_string:format(bad_char, line, line_index)
22 error(err)
23 end) * 1
24 end
26 local bad_unicode = get_error("Illegal unicode escape")
27 local bad_hex = get_error("Illegal hex escape")
28 local bad_character = get_error("Illegal character")
29 local bad_escape = get_error("Illegal escape")
31 local knownReplacements = {
32 ["'"] = "'",
33 ['"'] = '"',
34 ['\\'] = '\\',
35 ['/'] = '/',
36 b = '\b',
37 f = '\f',
38 n = '\n',
39 r = '\r',
40 t = '\t',
41 v = '\v',
42 z = '\z'
45 -- according to the table at http://da.wikipedia.org/wiki/UTF-8
46 local function utf8DecodeUnicode(code1, code2)
47 code1, code2 = tonumber(code1, 16), tonumber(code2, 16)
48 if code1 == 0 and code2 < 0x80 then
49 return string_char(code2)
50 end
51 if code1 < 0x08 then
52 return string_char(
53 0xC0 + code1 * 4 + floor(code2 / 64),
54 0x80 + code2 % 64)
55 end
56 return string_char(
57 0xE0 + floor(code1 / 16),
58 0x80 + (code1 % 16) * 4 + floor(code2 / 64),
59 0x80 + code2 % 64)
60 end
62 local function decodeX(code)
63 code = tonumber(code, 16)
64 return string_char(code)
65 end
67 local doSimpleSub = lpeg.C(lpeg.S("'\"\\/bfnrtvz")) / knownReplacements
68 local doUniSub = lpeg.P('u') * (lpeg.C(util.hexpair) * lpeg.C(util.hexpair) + bad_unicode)
69 local doXSub = lpeg.P('x') * (lpeg.C(util.hexpair) + bad_hex)
71 local defaultOptions = {
72 badChars = '',
73 additionalEscapes = false, -- disallow untranslated escapes
74 escapeCheck = #lpeg.S('bfnrtv/\\"xu\'z'), -- no check on valid characters
75 decodeUnicode = utf8DecodeUnicode,
76 strict_quotes = false
79 default = nil -- Let the buildCapture optimization take place
81 strict = {
82 badChars = '\b\f\n\r\t\v',
83 additionalEscapes = false, -- no additional escapes
84 escapeCheck = #lpeg.S('bfnrtv/\\"u'), --only these chars are allowed to be escaped
85 strict_quotes = true
88 local function buildCaptureString(quote, badChars, escapeMatch)
89 local captureChar = (1 - lpeg.S("\\" .. badChars .. quote)) + (lpeg.P("\\") / "" * escapeMatch)
90 -- During error, force end
91 local captureString = captureChar^0 + (-#lpeg.P(quote) * bad_character + -1)
92 return lpeg.P(quote) * lpeg.Cs(captureString) * lpeg.P(quote)
93 end
95 local function buildCapture(options)
96 options = options and merge({}, defaultOptions, options) or defaultOptions
97 local quotes = { '"' }
98 if not options.strict_quotes then
99 quotes[#quotes + 1] = "'"
101 local escapeMatch = doSimpleSub
102 escapeMatch = escapeMatch + doXSub / decodeX
103 escapeMatch = escapeMatch + doUniSub / options.decodeUnicode
104 if options.additionalEscapes then
105 escapeMatch = escapeMatch + options.additionalEscapes
107 if options.escapeCheck then
108 escapeMatch = options.escapeCheck * escapeMatch + bad_escape
110 local captureString
111 for i = 1, #quotes do
112 local cap = buildCaptureString(quotes[i], options.badChars, escapeMatch)
113 if captureString == nil then
114 captureString = cap
115 else
116 captureString = captureString + cap
119 return captureString
122 function register_types()
123 util.register_type("STRING")
126 function load_types(options, global_options, grammar)
127 local capture = buildCapture(options)
128 local string_id = util.types.STRING
129 grammar[string_id] = capture
130 util.append_grammar_item(grammar, "VALUE", lpeg.V(string_id))