wsutil/wsjson.c

   1 /* wsjson.c
   2  * JSON parsing functions.
   3  *
   4  * Copyright 2016, Dario Lombardo
   5  *
   6  * Wireshark - Network traffic analyzer
   7  * By Gerald Combs <gerald@wireshark.org>
   8  * Copyright 1998 Gerald Combs
   9  *
  10  * SPDX-License-Identifier: GPL-2.0-or-later
  11  */
  12
  13 #include "config.h"
  14 #define WS_LOG_DOMAIN LOG_DOMAIN_MAIN
  15
  16 #include "wsjson.h"
  17
  18 #include <string.h>
  19 #include <errno.h>
  20 #include <wsutil/jsmn.h>
  21 #include <wsutil/str_util.h>
  22 #include <wsutil/unicode-utils.h>
  23 #include <wsutil/wslog.h>
  24
  25 bool
  26 json_validate(const uint8_t *buf, const size_t len)
  27 {
  28     bool ret = true;
  29     /* We expect no more than 1024 tokens */
  30     unsigned max_tokens = 1024;
  31     jsmntok_t* t;
  32     jsmn_parser p;
  33     int rcode;
  34
  35     /*
  36      * Make sure the buffer isn't empty and the first octet isn't a NUL;
  37      * otherwise, the parser will immediately stop parsing and not validate
  38      * anything after that, so it'll just think it was handed an empty string.
  39      *
  40      * XXX - should we check for NULs anywhere in the buffer?
  41      */
  42     if (len == 0) {
  43         ws_debug("JSON string is empty");
  44         return false;
  45     }
  46     if (buf[0] == '\0') {
  47         ws_debug("invalid character inside JSON string");
  48         return false;
  49     }
  50
  51     t = g_new0(jsmntok_t, max_tokens);
  52
  53     if (!t)
  54         return false;
  55
  56     jsmn_init(&p);
  57     rcode = jsmn_parse(&p, buf, len, t, max_tokens);
  58     if (rcode < 0) {
  59         switch (rcode) {
  60             case JSMN_ERROR_NOMEM:
  61                 ws_debug("not enough tokens were provided");
  62                 break;
  63             case JSMN_ERROR_INVAL:
  64                 ws_debug("invalid character inside JSON string");
  65                 break;
  66             case JSMN_ERROR_PART:
  67                 ws_debug("the string is not a full JSON packet, "
  68                     "more bytes expected");
  69                 break;
  70             default:
  71                 ws_debug("unexpected error");
  72                 break;
  73         }
  74         ret = false;
  75     }
  76
  77     g_free(t);
  78     return ret;
  79 }
  80
  81 int
  82 json_parse(const char *buf, jsmntok_t *tokens, unsigned int max_tokens)
  83 {
  84     jsmn_parser p;
  85
  86     jsmn_init(&p);
  87     return jsmn_parse(&p, buf, strlen(buf), tokens, max_tokens);
  88 }
  89
  90 static
  91 jsmntok_t *json_get_next_object(jsmntok_t *cur)
  92 {
  93     int i;
  94     jsmntok_t *next = cur+1;
  95
  96     for (i = 0; i < cur->size; i++) {
  97         next = json_get_next_object(next);
  98     }
  99     return next;
 100 }
 101
 102 jsmntok_t *json_get_object(const char *buf, jsmntok_t *parent, const char *name)
 103 {
 104     int i;
 105     jsmntok_t *cur = parent+1;
 106
 107     for (i = 0; i < parent->size; i++) {
 108         if (cur->type == JSMN_STRING &&
 109             !strncmp(&buf[cur->start], name, cur->end - cur->start)
 110             && strlen(name) == (size_t)(cur->end - cur->start) &&
 111             cur->size == 1 && (cur+1)->type == JSMN_OBJECT) {
 112             return cur+1;
 113         }
 114         cur = json_get_next_object(cur);
 115     }
 116     return NULL;
 117 }
 118
 119 jsmntok_t *json_get_array(const char *buf, jsmntok_t *parent, const char *name)
 120 {
 121     int i;
 122     jsmntok_t *cur = parent+1;
 123
 124     for (i = 0; i < parent->size; i++) {
 125         if (cur->type == JSMN_STRING &&
 126             !strncmp(&buf[cur->start], name, cur->end - cur->start)
 127             && strlen(name) == (size_t)(cur->end - cur->start) &&
 128             cur->size == 1 && (cur+1)->type == JSMN_ARRAY) {
 129             return cur+1;
 130         }
 131         cur = json_get_next_object(cur);
 132     }
 133     return NULL;
 134 }
 135
 136 int json_get_array_len(jsmntok_t *array)
 137 {
 138     if (array->type != JSMN_ARRAY)
 139         return -1;
 140     return array->size;
 141 }
 142
 143 jsmntok_t *json_get_array_index(jsmntok_t *array, int idx)
 144 {
 145     int i;
 146     jsmntok_t *cur = array+1;
 147
 148
 149     if (array->type != JSMN_ARRAY || idx < 0 || idx >= array->size)
 150         return NULL;
 151     for (i = 0; i < idx; i++)
 152         cur = json_get_next_object(cur);
 153     return cur;
 154 }
 155
 156 char *json_get_string(char *buf, jsmntok_t *parent, const char *name)
 157 {
 158     int i;
 159     jsmntok_t *cur = parent+1;
 160
 161     for (i = 0; i < parent->size; i++) {
 162         if (cur->type == JSMN_STRING &&
 163             !strncmp(&buf[cur->start], name, cur->end - cur->start)
 164             && strlen(name) == (size_t)(cur->end - cur->start) &&
 165             cur->size == 1 && (cur+1)->type == JSMN_STRING) {
 166             buf[(cur+1)->end] = '\0';
 167             if (!json_decode_string_inplace(&buf[(cur+1)->start]))
 168                 return NULL;
 169             return &buf[(cur+1)->start];
 170         }
 171         cur = json_get_next_object(cur);
 172     }
 173     return NULL;
 174 }
 175
 176 bool json_get_double(char *buf, jsmntok_t *parent, const char *name, double *val)
 177 {
 178     int i;
 179     jsmntok_t *cur = parent+1;
 180
 181     for (i = 0; i < parent->size; i++) {
 182         if (cur->type == JSMN_STRING &&
 183             !strncmp(&buf[cur->start], name, cur->end - cur->start)
 184             && strlen(name) == (size_t)(cur->end - cur->start) &&
 185             cur->size == 1 && (cur+1)->type == JSMN_PRIMITIVE) {
 186             buf[(cur+1)->end] = '\0';
 187             *val = g_ascii_strtod(&buf[(cur+1)->start], NULL);
 188             if (errno != 0)
 189                 return false;
 190             return true;
 191         }
 192         cur = json_get_next_object(cur);
 193     }
 194     return false;
 195 }
 196
 197 bool json_get_boolean(char *buf, jsmntok_t *parent, const char *name, bool *val)
 198 {
 199     int i;
 200     size_t tok_len;
 201     jsmntok_t *cur = parent+1;
 202
 203     for (i = 0; i < parent->size; i++) {
 204         if (cur->type == JSMN_STRING &&
 205             !strncmp(&buf[cur->start], name, cur->end - cur->start)
 206             && strlen(name) == (size_t)(cur->end - cur->start) &&
 207             cur->size == 1 && (cur+1)->type == JSMN_PRIMITIVE) {
 208             /* JSMN_STRICT guarantees that a primitive starts with the
 209              * correct character.
 210              */
 211             tok_len = (cur+1)->end - (cur+1)->start;
 212             switch (buf[(cur+1)->start]) {
 213             case 't':
 214                 if (tok_len == 4 && strncmp(&buf[(cur+1)->start], "true", tok_len) == 0) {
 215                     *val = true;
 216                     return true;
 217                 }
 218                 return false;
 219             case 'f':
 220                 if (tok_len == 5 && strncmp(&buf[(cur+1)->start], "false", tok_len) == 0) {
 221                     *val = false;
 222                     return true;
 223                 }
 224                 return false;
 225             default:
 226                 return false;
 227             }
 228         }
 229         cur = json_get_next_object(cur);
 230     }
 231     return false;
 232 }
 233
 234 bool
 235 json_decode_string_inplace(char *text)
 236 {
 237     const char *input = text;
 238     char *output = text;
 239     while (*input) {
 240         char ch = *input++;
 241
 242         if (ch == '\\') {
 243             ch = *input++;
 244
 245             switch (ch) {
 246                 case '\"':
 247                 case '\\':
 248                 case '/':
 249                     *output++ = ch;
 250                     break;
 251
 252                 case 'b':
 253                     *output++ = '\b';
 254                     break;
 255                 case 'f':
 256                     *output++ = '\f';
 257                     break;
 258                 case 'n':
 259                     *output++ = '\n';
 260                     break;
 261                 case 'r':
 262                     *output++ = '\r';
 263                     break;
 264                 case 't':
 265                     *output++ = '\t';
 266                     break;
 267
 268                 case 'u':
 269                 {
 270                     uint32_t unicode_hex = 0;
 271                     int k;
 272                     int bin;
 273
 274                     for (k = 0; k < 4; k++) {
 275                         unicode_hex <<= 4;
 276
 277                         ch = *input++;
 278                         bin = ws_xton(ch);
 279                         if (bin == -1)
 280                             return false;
 281                         unicode_hex |= bin;
 282                     }
 283
 284                     if ((IS_LEAD_SURROGATE(unicode_hex))) {
 285                         uint16_t lead_surrogate = unicode_hex;
 286                         uint16_t trail_surrogate = 0;
 287
 288                         if (input[0] != '\\' || input[1] != 'u')
 289                             return false;
 290                         input += 2;
 291
 292                         for (k = 0; k < 4; k++) {
 293                             trail_surrogate <<= 4;
 294
 295                             ch = *input++;
 296                             bin = ws_xton(ch);
 297                             if (bin == -1)
 298                                 return false;
 299                             trail_surrogate |= bin;
 300                         }
 301
 302                         if ((!IS_TRAIL_SURROGATE(trail_surrogate)))
 303                             return false;
 304
 305                         unicode_hex = SURROGATE_VALUE(lead_surrogate,trail_surrogate);
 306
 307                     } else if ((IS_TRAIL_SURROGATE(unicode_hex))) {
 308                         return false;
 309                     }
 310
 311                     if (!g_unichar_validate(unicode_hex))
 312                         return false;
 313
 314                     /* Don't allow NUL byte injection. */
 315                     if (unicode_hex == 0)
 316                         return false;
 317
 318                     /* \uXXXX => 6 bytes, and g_unichar_to_utf8() requires to have output buffer at least 6 bytes -> OK. */
 319                     k = g_unichar_to_utf8(unicode_hex, output);
 320                     output += k;
 321                     break;
 322                 }
 323
 324                 default:
 325                     return false;
 326             }
 327
 328         } else {
 329             *output = ch;
 330             output++;
 331         }
 332     }
 333
 334     *output = '\0';
 335     return true;
 336 }
 337
 338 /*
 339  * Editor modelines  -  https://www.wireshark.org/tools/modelines.html
 340  *
 341  * Local variables:
 342  * c-basic-offset: 4
 343  * tab-width: 8
 344  * indent-tabs-mode: nil
 345  * End:
 346  *
 347  * vi: set shiftwidth=4 tabstop=8 expandtab:
 348  * :indentSize=4:tabSize=8:noTabs=true:
 349  */