src/wps/upnp_xml.c

   1 /*
   2  * UPnP XML helper routines
   3  * Copyright (c) 2000-2003 Intel Corporation
   4  * Copyright (c) 2006-2007 Sony Corporation
   5  * Copyright (c) 2008-2009 Atheros Communications
   6  * Copyright (c) 2009, Jouni Malinen <j@w1.fi>
   7  *
   8  * See wps_upnp.c for more details on licensing and code history.
   9  */
  10
  11 #include "includes.h"
  12
  13 #include "common.h"
  14 #include "base64.h"
  15 #include "http.h"
  16 #include "upnp_xml.h"
  17
  18
  19 /*
  20  * XML parsing and formatting
  21  *
  22  * XML is a markup language based on unicode; usually (and in our case,
  23  * always!) based on utf-8. utf-8 uses a variable number of bytes per
  24  * character. utf-8 has the advantage that all non-ASCII unicode characters are
  25  * represented by sequences of non-ascii (high bit set) bytes, whereas ASCII
  26  * characters are single ascii bytes, thus we can use typical text processing.
  27  *
  28  * (One other interesting thing about utf-8 is that it is possible to look at
  29  * any random byte and determine if it is the first byte of a character as
  30  * versus a continuation byte).
  31  *
  32  * The base syntax of XML uses a few ASCII punctionation characters; any
  33  * characters that would appear in the payload data are rewritten using
  34  * sequences, e.g., &amp; for ampersand(&) and &lt for left angle bracket (<).
  35  * Five such escapes total (more can be defined but that does not apply to our
  36  * case). Thus we can safely parse for angle brackets etc.
  37  *
  38  * XML describes tree structures of tagged data, with each element beginning
  39  * with an opening tag <label> and ending with a closing tag </label> with
  40  * matching label. (There is also a self-closing tag <label/> which is supposed
  41  * to be equivalent to <label></label>, i.e., no payload, but we are unlikely
  42  * to see it for our purpose).
  43  *
  44  * Actually the opening tags are a little more complicated because they can
  45  * contain "attributes" after the label (delimited by ascii space or tab chars)
  46  * of the form attribute_label="value" or attribute_label='value'; as it turns
  47  * out we do not have to read any of these attributes, just ignore them.
  48  *
  49  * Labels are any sequence of chars other than space, tab, right angle bracket
  50  * (and ?), but may have an inner structure of <namespace><colon><plain_label>.
  51  * As it turns out, we can ignore the namespaces, in fact we can ignore the
  52  * entire tree hierarchy, because the plain labels we are looking for will be
  53  * unique (not in general, but for this application). We do however have to be
  54  * careful to skip over the namespaces.
  55  *
  56  * In generating XML we have to be more careful, but that is easy because
  57  * everything we do is pretty canned. The only real care to take is to escape
  58  * any special chars in our payload.
  59  */
  60
  61 /**
  62  * xml_next_tag - Advance to next tag
  63  * @in: Input
  64  * @out: OUT: start of tag just after '<'
  65  * @out_tagname: OUT: start of name of tag, skipping namespace
  66  * @end: OUT: one after tag
  67  * Returns: 0 on success, 1 on failure
  68  *
  69  * A tag has form:
  70  *     <left angle bracket><...><right angle bracket>
  71  * Within the angle brackets, there is an optional leading forward slash (which
  72  * makes the tag an ending tag), then an optional leading label (followed by
  73  * colon) and then the tag name itself.
  74  *
  75  * Note that angle brackets present in the original data must have been encoded
  76  * as &lt; and &gt; so they will not trouble us.
  77  */
  78 static int xml_next_tag(const char *in, const char **out,
  79                         const char **out_tagname, const char **end)
  80 {
  81         while (*in && *in != '<')
  82                 in++;
  83         if (*in != '<')
  84                 return 1;
  85         *out = ++in;
  86         if (*in == '/')
  87                 in++;
  88         *out_tagname = in; /* maybe */
  89         while (isalnum(*in) || *in == '-')
  90                 in++;
  91         if (*in == ':')
  92                 *out_tagname = ++in;
  93         while (*in && *in != '>')
  94                 in++;
  95         if (*in != '>')
  96                 return 1;
  97         *end = ++in;
  98         return 0;
  99 }
 100
 101
 102 /* xml_data_encode -- format data for xml file, escaping special characters.
 103  *
 104  * Note that we assume we are using utf8 both as input and as output!
 105  * In utf8, characters may be classed as follows:
 106  *     0xxxxxxx(2) -- 1 byte ascii char
 107  *     11xxxxxx(2) -- 1st byte of multi-byte char w/ unicode value >= 0x80
 108  *         110xxxxx(2) -- 1st byte of 2 byte sequence (5 payload bits here)
 109  *         1110xxxx(2) -- 1st byte of 3 byte sequence (4 payload bits here)
 110  *         11110xxx(2) -- 1st byte of 4 byte sequence (3 payload bits here)
 111  *      10xxxxxx(2) -- extension byte (6 payload bits per byte)
 112  *      Some values implied by the above are however illegal because they
 113  *      do not represent unicode chars or are not the shortest encoding.
 114  * Actually, we can almost entirely ignore the above and just do
 115  * text processing same as for ascii text.
 116  *
 117  * XML is written with arbitrary unicode characters, except that five
 118  * characters have special meaning and so must be escaped where they
 119  * appear in payload data... which we do here.
 120  */
 121 void xml_data_encode(struct wpabuf *buf, const char *data, int len)
 122 {
 123         int i;
 124         for (i = 0; i < len; i++) {
 125                 u8 c = ((u8 *) data)[i];
 126                 if (c == '<') {
 127                         wpabuf_put_str(buf, "&lt;");
 128                         continue;
 129                 }
 130                 if (c == '>') {
 131                         wpabuf_put_str(buf, "&gt;");
 132                         continue;
 133                 }
 134                 if (c == '&') {
 135                         wpabuf_put_str(buf, "&amp;");
 136                         continue;
 137                 }
 138                 if (c == '\'') {
 139                         wpabuf_put_str(buf, "&apos;");
 140                         continue;
 141                 }
 142                 if (c == '"') {
 143                         wpabuf_put_str(buf, "&quot;");
 144                         continue;
 145                 }
 146                 /*
 147                  * We could try to represent control characters using the
 148                  * sequence: &#x; where x is replaced by a hex numeral, but not
 149                  * clear why we would do this.
 150                  */
 151                 wpabuf_put_u8(buf, c);
 152         }
 153 }
 154
 155
 156 /* xml_add_tagged_data -- format tagged data as a new xml line.
 157  *
 158  * tag must not have any special chars.
 159  * data may have special chars, which are escaped.
 160  */
 161 void xml_add_tagged_data(struct wpabuf *buf, const char *tag, const char *data)
 162 {
 163         wpabuf_printf(buf, "<%s>", tag);
 164         xml_data_encode(buf, data, os_strlen(data));
 165         wpabuf_printf(buf, "</%s>\n", tag);
 166 }
 167
 168
 169 /* A POST body looks something like (per upnp spec):
 170  * <?xml version="1.0"?>
 171  * <s:Envelope
 172  *     xmlns:s="http://schemas.xmlsoap.org/soap/envelope/"
 173  *     s:encodingStyle="http://schemas.xmlsoap.org/soap/encoding/">
 174  *   <s:Body>
 175  *     <u:actionName xmlns:u="urn:schemas-upnp-org:service:serviceType:v">
 176  *       <argumentName>in arg value</argumentName>
 177  *       other in args and their values go here, if any
 178  *     </u:actionName>
 179  *   </s:Body>
 180  * </s:Envelope>
 181  *
 182  * where :
 183  *      s: might be some other namespace name followed by colon
 184  *      u: might be some other namespace name followed by colon
 185  *      actionName will be replaced according to action requested
 186  *      schema following actionName will be WFA scheme instead
 187  *      argumentName will be actual argument name
 188  *      (in arg value) will be actual argument value
 189  */
 190 char * xml_get_first_item(const char *doc, const char *item)
 191 {
 192         const char *match = item;
 193         int match_len = os_strlen(item);
 194         const char *tag, *tagname, *end;
 195         char *value;
 196
 197         /*
 198          * This is crude: ignore any possible tag name conflicts and go right
 199          * to the first tag of this name. This should be ok for the limited
 200          * domain of UPnP messages.
 201          */
 202         for (;;) {
 203                 if (xml_next_tag(doc, &tag, &tagname, &end))
 204                         return NULL;
 205                 doc = end;
 206                 if (!os_strncasecmp(tagname, match, match_len) &&
 207                     *tag != '/' &&
 208                     (tagname[match_len] == '>' ||
 209                      !isgraph(tagname[match_len]))) {
 210                         break;
 211                 }
 212         }
 213         end = doc;
 214         while (*end && *end != '<')
 215                 end++;
 216         value = os_zalloc(1 + (end - doc));
 217         if (value == NULL)
 218                 return NULL;
 219         os_memcpy(value, doc, end - doc);
 220         return value;
 221 }
 222
 223
 224 struct wpabuf * xml_get_base64_item(const char *data, const char *name,
 225                                     enum http_reply_code *ret)
 226 {
 227         char *msg;
 228         struct wpabuf *buf;
 229         unsigned char *decoded;
 230         size_t len;
 231
 232         msg = xml_get_first_item(data, name);
 233         if (msg == NULL) {
 234                 *ret = UPNP_ARG_VALUE_INVALID;
 235                 return NULL;
 236         }
 237
 238         decoded = base64_decode((unsigned char *) msg, os_strlen(msg), &len);
 239         os_free(msg);
 240         if (decoded == NULL) {
 241                 *ret = UPNP_OUT_OF_MEMORY;
 242                 return NULL;
 243         }
 244
 245         buf = wpabuf_alloc_ext_data(decoded, len);
 246         if (buf == NULL) {
 247                 os_free(decoded);
 248                 *ret = UPNP_OUT_OF_MEMORY;
 249                 return NULL;
 250         }
 251         return buf;
 252 }