2 * UPnP XML helper routines
3 * Copyright (c) 2000-2003 Intel Corporation
4 * Copyright (c) 2006-2007 Sony Corporation
5 * Copyright (c) 2008-2009 Atheros Communications
6 * Copyright (c) 2009, Jouni Malinen <j@w1.fi>
8 * See wps_upnp.c for more details on licensing and code history.
20 * XML parsing and formatting
22 * XML is a markup language based on unicode; usually (and in our case,
23 * always!) based on utf-8. utf-8 uses a variable number of bytes per
24 * character. utf-8 has the advantage that all non-ASCII unicode characters are
25 * represented by sequences of non-ascii (high bit set) bytes, whereas ASCII
26 * characters are single ascii bytes, thus we can use typical text processing.
28 * (One other interesting thing about utf-8 is that it is possible to look at
29 * any random byte and determine if it is the first byte of a character as
30 * versus a continuation byte).
32 * The base syntax of XML uses a few ASCII punctionation characters; any
33 * characters that would appear in the payload data are rewritten using
34 * sequences, e.g., & for ampersand(&) and < for left angle bracket (<).
35 * Five such escapes total (more can be defined but that does not apply to our
36 * case). Thus we can safely parse for angle brackets etc.
38 * XML describes tree structures of tagged data, with each element beginning
39 * with an opening tag <label> and ending with a closing tag </label> with
40 * matching label. (There is also a self-closing tag <label/> which is supposed
41 * to be equivalent to <label></label>, i.e., no payload, but we are unlikely
42 * to see it for our purpose).
44 * Actually the opening tags are a little more complicated because they can
45 * contain "attributes" after the label (delimited by ascii space or tab chars)
46 * of the form attribute_label="value" or attribute_label='value'; as it turns
47 * out we do not have to read any of these attributes, just ignore them.
49 * Labels are any sequence of chars other than space, tab, right angle bracket
50 * (and ?), but may have an inner structure of <namespace><colon><plain_label>.
51 * As it turns out, we can ignore the namespaces, in fact we can ignore the
52 * entire tree hierarchy, because the plain labels we are looking for will be
53 * unique (not in general, but for this application). We do however have to be
54 * careful to skip over the namespaces.
56 * In generating XML we have to be more careful, but that is easy because
57 * everything we do is pretty canned. The only real care to take is to escape
58 * any special chars in our payload.
62 * xml_next_tag - Advance to next tag
64 * @out: OUT: start of tag just after '<'
65 * @out_tagname: OUT: start of name of tag, skipping namespace
66 * @end: OUT: one after tag
67 * Returns: 0 on success, 1 on failure
70 * <left angle bracket><...><right angle bracket>
71 * Within the angle brackets, there is an optional leading forward slash (which
72 * makes the tag an ending tag), then an optional leading label (followed by
73 * colon) and then the tag name itself.
75 * Note that angle brackets present in the original data must have been encoded
76 * as < and > so they will not trouble us.
78 static int xml_next_tag(const char *in
, const char **out
,
79 const char **out_tagname
, const char **end
)
81 while (*in
&& *in
!= '<')
88 *out_tagname
= in
; /* maybe */
89 while (isalnum(*in
) || *in
== '-')
93 while (*in
&& *in
!= '>')
102 /* xml_data_encode -- format data for xml file, escaping special characters.
104 * Note that we assume we are using utf8 both as input and as output!
105 * In utf8, characters may be classed as follows:
106 * 0xxxxxxx(2) -- 1 byte ascii char
107 * 11xxxxxx(2) -- 1st byte of multi-byte char w/ unicode value >= 0x80
108 * 110xxxxx(2) -- 1st byte of 2 byte sequence (5 payload bits here)
109 * 1110xxxx(2) -- 1st byte of 3 byte sequence (4 payload bits here)
110 * 11110xxx(2) -- 1st byte of 4 byte sequence (3 payload bits here)
111 * 10xxxxxx(2) -- extension byte (6 payload bits per byte)
112 * Some values implied by the above are however illegal because they
113 * do not represent unicode chars or are not the shortest encoding.
114 * Actually, we can almost entirely ignore the above and just do
115 * text processing same as for ascii text.
117 * XML is written with arbitrary unicode characters, except that five
118 * characters have special meaning and so must be escaped where they
119 * appear in payload data... which we do here.
121 void xml_data_encode(struct wpabuf
*buf
, const char *data
, int len
)
124 for (i
= 0; i
< len
; i
++) {
125 u8 c
= ((u8
*) data
)[i
];
127 wpabuf_put_str(buf
, "<");
131 wpabuf_put_str(buf
, ">");
135 wpabuf_put_str(buf
, "&");
139 wpabuf_put_str(buf
, "'");
143 wpabuf_put_str(buf
, """);
147 * We could try to represent control characters using the
148 * sequence: &#x; where x is replaced by a hex numeral, but not
149 * clear why we would do this.
151 wpabuf_put_u8(buf
, c
);
156 /* xml_add_tagged_data -- format tagged data as a new xml line.
158 * tag must not have any special chars.
159 * data may have special chars, which are escaped.
161 void xml_add_tagged_data(struct wpabuf
*buf
, const char *tag
, const char *data
)
163 wpabuf_printf(buf
, "<%s>", tag
);
164 xml_data_encode(buf
, data
, os_strlen(data
));
165 wpabuf_printf(buf
, "</%s>\n", tag
);
169 /* A POST body looks something like (per upnp spec):
170 * <?xml version="1.0"?>
172 * xmlns:s="http://schemas.xmlsoap.org/soap/envelope/"
173 * s:encodingStyle="http://schemas.xmlsoap.org/soap/encoding/">
175 * <u:actionName xmlns:u="urn:schemas-upnp-org:service:serviceType:v">
176 * <argumentName>in arg value</argumentName>
177 * other in args and their values go here, if any
183 * s: might be some other namespace name followed by colon
184 * u: might be some other namespace name followed by colon
185 * actionName will be replaced according to action requested
186 * schema following actionName will be WFA scheme instead
187 * argumentName will be actual argument name
188 * (in arg value) will be actual argument value
190 char * xml_get_first_item(const char *doc
, const char *item
)
192 const char *match
= item
;
193 int match_len
= os_strlen(item
);
194 const char *tag
, *tagname
, *end
;
198 * This is crude: ignore any possible tag name conflicts and go right
199 * to the first tag of this name. This should be ok for the limited
200 * domain of UPnP messages.
203 if (xml_next_tag(doc
, &tag
, &tagname
, &end
))
206 if (!os_strncasecmp(tagname
, match
, match_len
) &&
208 (tagname
[match_len
] == '>' ||
209 !isgraph(tagname
[match_len
]))) {
214 while (*end
&& *end
!= '<')
216 value
= os_zalloc(1 + (end
- doc
));
219 os_memcpy(value
, doc
, end
- doc
);
224 struct wpabuf
* xml_get_base64_item(const char *data
, const char *name
,
225 enum http_reply_code
*ret
)
229 unsigned char *decoded
;
232 msg
= xml_get_first_item(data
, name
);
234 *ret
= UPNP_ARG_VALUE_INVALID
;
238 decoded
= base64_decode((unsigned char *) msg
, os_strlen(msg
), &len
);
240 if (decoded
== NULL
) {
241 *ret
= UPNP_OUT_OF_MEMORY
;
245 buf
= wpabuf_alloc_ext_data(decoded
, len
);
248 *ret
= UPNP_OUT_OF_MEMORY
;