lua/json/decode/strings.lua

   1 --[[
   2         Licensed according to the included 'LICENSE' document
   3         Author: Thomas Harning Jr <harningt@gmail.com>
   4 ]]
   5 local lpeg = require("lpeg")
   6 local jsonutil = require("json.util")
   7 local util = require("json.decode.util")
   8 local merge = jsonutil.merge
   9
  10 local tonumber = tonumber
  11 local string_char = require("string").char
  12 local floor = require("math").floor
  13 local table_concat = require("table").concat
  14
  15 local error = error
  16
  17 local _ENV = nil
  18
  19 local function get_error(item)
  20         local fmt_string = item .. " in string [%q] @ %i:%i"
  21         return lpeg.P(function(data, index)
  22                 local line, line_index, bad_char, last_line = util.get_invalid_character_info(data, index)
  23                 local err = fmt_string:format(bad_char, line, line_index)
  24                 error(err)
  25         end) * 1
  26 end
  27
  28 local bad_unicode   = get_error("Illegal unicode escape")
  29 local bad_hex       = get_error("Illegal hex escape")
  30 local bad_character = get_error("Illegal character")
  31 local bad_escape    = get_error("Illegal escape")
  32
  33 local knownReplacements = {
  34         ["'"] = "'",
  35         ['"'] = '"',
  36         ['\\'] = '\\',
  37         ['/'] = '/',
  38         b = '\b',
  39         f = '\f',
  40         n = '\n',
  41         r = '\r',
  42         t = '\t',
  43         v = '\v',
  44         z = '\z'
  45 }
  46
  47 -- according to the table at http://da.wikipedia.org/wiki/UTF-8
  48 local function utf8DecodeUnicode(code1, code2)
  49         code1, code2 = tonumber(code1, 16), tonumber(code2, 16)
  50         if code1 == 0 and code2 < 0x80 then
  51                 return string_char(code2)
  52         end
  53         if code1 < 0x08 then
  54                 return string_char(
  55                         0xC0 + code1 * 4 + floor(code2 / 64),
  56                         0x80 + code2 % 64)
  57         end
  58         return string_char(
  59                 0xE0 + floor(code1 / 16),
  60                 0x80 + (code1 % 16) * 4 + floor(code2 / 64),
  61                 0x80 + code2 % 64)
  62 end
  63
  64 local function decodeX(code)
  65         code = tonumber(code, 16)
  66         return string_char(code)
  67 end
  68
  69 local doSimpleSub = lpeg.C(lpeg.S("'\"\\/bfnrtvz")) / knownReplacements
  70 local doUniSub = lpeg.P('u') * (lpeg.C(util.hexpair) * lpeg.C(util.hexpair) + bad_unicode)
  71 local doXSub = lpeg.P('x') * (lpeg.C(util.hexpair) + bad_hex)
  72
  73 local defaultOptions = {
  74         badChars = '',
  75         additionalEscapes = false, -- disallow untranslated escapes
  76         escapeCheck = #lpeg.S('bfnrtv/\\"xu\'z'), -- no check on valid characters
  77         decodeUnicode = utf8DecodeUnicode,
  78         strict_quotes = false
  79 }
  80
  81 local modeOptions = {}
  82
  83 modeOptions.strict = {
  84         badChars = '\b\f\n\r\t\v',
  85         additionalEscapes = false, -- no additional escapes
  86         escapeCheck = #lpeg.S('bfnrtv/\\"u'), --only these chars are allowed to be escaped
  87         strict_quotes = true
  88 }
  89
  90 local function mergeOptions(options, mode)
  91         jsonutil.doOptionMerge(options, false, 'strings', defaultOptions, mode and modeOptions[mode])
  92 end
  93
  94 local function buildCaptureString(quote, badChars, escapeMatch)
  95         local captureChar = (1 - lpeg.S("\\" .. badChars .. quote)) + (lpeg.P("\\") / "" * escapeMatch)
  96         -- During error, force end
  97         local captureString = captureChar^0 + (-#lpeg.P(quote) * bad_character + -1)
  98         return lpeg.P(quote) * lpeg.Cs(captureString) * lpeg.P(quote)
  99 end
 100
 101 local function generateLexer(options)
 102         options = options.strings
 103         local quotes = { '"' }
 104         if not options.strict_quotes then
 105                 quotes[#quotes + 1] = "'"
 106         end
 107         local escapeMatch = doSimpleSub
 108         escapeMatch = escapeMatch + doXSub / decodeX
 109         escapeMatch = escapeMatch + doUniSub / options.decodeUnicode
 110         if options.additionalEscapes then
 111                 escapeMatch = escapeMatch + options.additionalEscapes
 112         end
 113         if options.escapeCheck then
 114                 escapeMatch = options.escapeCheck * escapeMatch + bad_escape
 115         end
 116         local captureString
 117         for i = 1, #quotes do
 118                 local cap = buildCaptureString(quotes[i], options.badChars, escapeMatch)
 119                 if captureString == nil then
 120                         captureString = cap
 121                 else
 122                         captureString = captureString + cap
 123                 end
 124         end
 125         return captureString
 126 end
 127
 128 local strings = {
 129         mergeOptions = mergeOptions,
 130         generateLexer = generateLexer
 131 }
 132
 133 return strings