Bug 452317 - FeedConverter.js: QueryInterface should throw NS_ERROR_NO_INTERFACE...
[wine-gecko.git] / xpcom / io / nsEscape.cpp
blob704ee50887f28aec0bd6395459b2f41fa51f7f09
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
13 * License.
15 * The Original Code is mozilla.org code.
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 1998
20 * the Initial Developer. All Rights Reserved.
22 * Contributor(s):
24 * Alternatively, the contents of this file may be used under the terms of
25 * either of the GNU General Public License Version 2 or later (the "GPL"),
26 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
36 * ***** END LICENSE BLOCK ***** */
38 // First checked in on 98/12/03 by John R. McMullen, derived from net.h/mkparse.c.
40 #include "nsEscape.h"
41 #include "nsMemory.h"
42 #include "nsCRT.h"
43 #include "nsReadableUtils.h"
45 const int netCharType[256] =
46 /* Bit 0 xalpha -- the alphas
47 ** Bit 1 xpalpha -- as xalpha but
48 ** converts spaces to plus and plus to %2B
49 ** Bit 3 ... path -- as xalphas but doesn't escape '/'
51 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
52 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x */
53 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 1x */
54 0,0,0,0,0,0,0,0,0,0,7,4,0,7,7,4, /* 2x !"#$%&'()*+,-./ */
55 7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */
56 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 4x @ABCDEFGHIJKLMNO */
57 /* bits for '@' changed from 7 to 0 so '@' can be escaped */
58 /* in usernames and passwords in publishing. */
59 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,7, /* 5X PQRSTUVWXYZ[\]^_ */
60 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 6x `abcdefghijklmno */
61 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0, /* 7X pqrstuvwxyz{\}~ DEL */
62 0, };
64 /* decode % escaped hex codes into character values
66 #define UNHEX(C) \
67 ((C >= '0' && C <= '9') ? C - '0' : \
68 ((C >= 'A' && C <= 'F') ? C - 'A' + 10 : \
69 ((C >= 'a' && C <= 'f') ? C - 'a' + 10 : 0)))
72 #define IS_OK(C) (netCharType[((unsigned int) (C))] & (flags))
73 #define HEX_ESCAPE '%'
75 //----------------------------------------------------------------------------------------
76 static char* nsEscapeCount(
77 const char * str,
78 nsEscapeMask flags,
79 size_t* out_len)
80 //----------------------------------------------------------------------------------------
82 if (!str)
83 return 0;
85 size_t i, len = 0, charsToEscape = 0;
86 static const char hexChars[] = "0123456789ABCDEF";
88 register const unsigned char* src = (const unsigned char *) str;
89 while (*src)
91 len++;
92 if (!IS_OK(*src++))
93 charsToEscape++;
96 // calculate how much memory should be allocated
97 // original length + 2 bytes for each escaped character + terminating '\0'
98 // do the sum in steps to check for overflow
99 size_t dstSize = len + 1 + charsToEscape;
100 if (dstSize <= len)
101 return 0;
102 dstSize += charsToEscape;
103 if (dstSize < len)
104 return 0;
106 // fail if we need more than 4GB
107 // size_t is likely to be long unsigned int but nsMemory::Alloc(size_t)
108 // calls NS_Alloc_P(size_t) which calls PR_Malloc(PRUint32), so there is
109 // no chance to allocate more than 4GB using nsMemory::Alloc()
110 if (dstSize > PR_UINT32_MAX)
111 return 0;
113 char* result = (char *)nsMemory::Alloc(dstSize);
114 if (!result)
115 return 0;
117 register unsigned char* dst = (unsigned char *) result;
118 src = (const unsigned char *) str;
119 if (flags == url_XPAlphas)
121 for (i = 0; i < len; i++)
123 unsigned char c = *src++;
124 if (IS_OK(c))
125 *dst++ = c;
126 else if (c == ' ')
127 *dst++ = '+'; /* convert spaces to pluses */
128 else
130 *dst++ = HEX_ESCAPE;
131 *dst++ = hexChars[c >> 4]; /* high nibble */
132 *dst++ = hexChars[c & 0x0f]; /* low nibble */
136 else
138 for (i = 0; i < len; i++)
140 unsigned char c = *src++;
141 if (IS_OK(c))
142 *dst++ = c;
143 else
145 *dst++ = HEX_ESCAPE;
146 *dst++ = hexChars[c >> 4]; /* high nibble */
147 *dst++ = hexChars[c & 0x0f]; /* low nibble */
152 *dst = '\0'; /* tack on eos */
153 if(out_len)
154 *out_len = dst - (unsigned char *) result;
155 return result;
158 //----------------------------------------------------------------------------------------
159 NS_COM char* nsEscape(const char * str, nsEscapeMask flags)
160 //----------------------------------------------------------------------------------------
162 if(!str)
163 return NULL;
164 return nsEscapeCount(str, flags, NULL);
167 //----------------------------------------------------------------------------------------
168 NS_COM char* nsUnescape(char * str)
169 //----------------------------------------------------------------------------------------
171 nsUnescapeCount(str);
172 return str;
175 //----------------------------------------------------------------------------------------
176 NS_COM PRInt32 nsUnescapeCount(char * str)
177 //----------------------------------------------------------------------------------------
179 register char *src = str;
180 register char *dst = str;
181 static const char hexChars[] = "0123456789ABCDEFabcdef";
183 char c1[] = " ";
184 char c2[] = " ";
185 char* const pc1 = c1;
186 char* const pc2 = c2;
188 while (*src)
190 c1[0] = *(src+1);
191 if (*(src+1) == '\0')
192 c2[0] = '\0';
193 else
194 c2[0] = *(src+2);
196 if (*src != HEX_ESCAPE || PL_strpbrk(pc1, hexChars) == 0 ||
197 PL_strpbrk(pc2, hexChars) == 0 )
198 *dst++ = *src++;
199 else
201 src++; /* walk over escape */
202 if (*src)
204 *dst = UNHEX(*src) << 4;
205 src++;
207 if (*src)
209 *dst = (*dst + UNHEX(*src));
210 src++;
212 dst++;
216 *dst = 0;
217 return (int)(dst - str);
219 } /* NET_UnEscapeCnt */
222 NS_COM char *
223 nsEscapeHTML(const char * string)
225 /* XXX Hardcoded max entity len. The +1 is for the trailing null. */
226 char *rv = (char *) nsMemory::Alloc(strlen(string) * 6 + 1);
227 char *ptr = rv;
229 if(rv)
231 for(; *string != '\0'; string++)
233 if(*string == '<')
235 *ptr++ = '&';
236 *ptr++ = 'l';
237 *ptr++ = 't';
238 *ptr++ = ';';
240 else if(*string == '>')
242 *ptr++ = '&';
243 *ptr++ = 'g';
244 *ptr++ = 't';
245 *ptr++ = ';';
247 else if(*string == '&')
249 *ptr++ = '&';
250 *ptr++ = 'a';
251 *ptr++ = 'm';
252 *ptr++ = 'p';
253 *ptr++ = ';';
255 else if (*string == '"')
257 *ptr++ = '&';
258 *ptr++ = 'q';
259 *ptr++ = 'u';
260 *ptr++ = 'o';
261 *ptr++ = 't';
262 *ptr++ = ';';
264 else if (*string == '\'')
266 *ptr++ = '&';
267 *ptr++ = '#';
268 *ptr++ = '3';
269 *ptr++ = '9';
270 *ptr++ = ';';
272 else
274 *ptr++ = *string;
277 *ptr = '\0';
280 return(rv);
283 NS_COM PRUnichar *
284 nsEscapeHTML2(const PRUnichar *aSourceBuffer, PRInt32 aSourceBufferLen)
286 // if the caller didn't calculate the length
287 if (aSourceBufferLen == -1) {
288 aSourceBufferLen = nsCRT::strlen(aSourceBuffer); // ...then I will
291 /* XXX Hardcoded max entity len. */
292 PRUnichar *resultBuffer = (PRUnichar *)nsMemory::Alloc(aSourceBufferLen *
293 6 * sizeof(PRUnichar) + sizeof(PRUnichar('\0')));
294 PRUnichar *ptr = resultBuffer;
296 if (resultBuffer) {
297 PRInt32 i;
299 for(i = 0; i < aSourceBufferLen; i++) {
300 if(aSourceBuffer[i] == '<') {
301 *ptr++ = '&';
302 *ptr++ = 'l';
303 *ptr++ = 't';
304 *ptr++ = ';';
305 } else if(aSourceBuffer[i] == '>') {
306 *ptr++ = '&';
307 *ptr++ = 'g';
308 *ptr++ = 't';
309 *ptr++ = ';';
310 } else if(aSourceBuffer[i] == '&') {
311 *ptr++ = '&';
312 *ptr++ = 'a';
313 *ptr++ = 'm';
314 *ptr++ = 'p';
315 *ptr++ = ';';
316 } else if (aSourceBuffer[i] == '"') {
317 *ptr++ = '&';
318 *ptr++ = 'q';
319 *ptr++ = 'u';
320 *ptr++ = 'o';
321 *ptr++ = 't';
322 *ptr++ = ';';
323 } else if (aSourceBuffer[i] == '\'') {
324 *ptr++ = '&';
325 *ptr++ = '#';
326 *ptr++ = '3';
327 *ptr++ = '9';
328 *ptr++ = ';';
329 } else {
330 *ptr++ = aSourceBuffer[i];
333 *ptr = 0;
336 return resultBuffer;
339 //----------------------------------------------------------------------------------------
341 const int EscapeChars[256] =
342 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
344 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
345 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
346 0,1023, 0, 512,1023, 0,1023, 0,1023,1023,1023,1023,1023,1023, 953, 784, /* 2x !"#$%&'()*+,-./ */
347 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1008, 912, 0,1008, 0, 768, /* 3x 0123456789:;<=>? */
348 1008,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, /* 4x @ABCDEFGHIJKLMNO */
349 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896, 896, 896, 896,1023, /* 5x PQRSTUVWXYZ[\]^_ */
350 0,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, /* 6x `abcdefghijklmno */
351 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896,1012, 896,1023, 0, /* 7x pqrstuvwxyz{|}~ */
352 0 /* 8x DEL */
355 #define NO_NEED_ESC(C) (EscapeChars[((unsigned int) (C))] & (flags))
357 //----------------------------------------------------------------------------------------
359 /* returns an escaped string */
361 /* use the following flags to specify which
362 part of an URL you want to escape:
364 esc_Scheme = 1
365 esc_Username = 2
366 esc_Password = 4
367 esc_Host = 8
368 esc_Directory = 16
369 esc_FileBaseName = 32
370 esc_FileExtension = 64
371 esc_Param = 128
372 esc_Query = 256
373 esc_Ref = 512
376 /* by default this function will not escape parts of a string
377 that already look escaped, which means it already includes
378 a valid hexcode. This is done to avoid multiple escapes of
379 a string. Use the following flags to force escaping of a
380 string:
382 esc_Forced = 1024
385 NS_COM PRBool NS_EscapeURL(const char *part,
386 PRInt32 partLen,
387 PRUint32 flags,
388 nsACString &result)
390 if (!part) {
391 NS_NOTREACHED("null pointer");
392 return PR_FALSE;
395 int i = 0;
396 static const char hexChars[] = "0123456789ABCDEF";
397 if (partLen < 0)
398 partLen = strlen(part);
399 PRBool forced = !!(flags & esc_Forced);
400 PRBool ignoreNonAscii = !!(flags & esc_OnlyASCII);
401 PRBool ignoreAscii = !!(flags & esc_OnlyNonASCII);
402 PRBool writing = !!(flags & esc_AlwaysCopy);
403 PRBool colon = !!(flags & esc_Colon);
405 register const unsigned char* src = (const unsigned char *) part;
407 char tempBuffer[100];
408 unsigned int tempBufferPos = 0;
410 PRBool previousIsNonASCII = PR_FALSE;
411 for (i = 0; i < partLen; i++)
413 unsigned char c = *src++;
415 // if the char has not to be escaped or whatever follows % is
416 // a valid escaped string, just copy the char.
418 // Also the % will not be escaped until forced
419 // See bugzilla bug 61269 for details why we changed this
421 // And, we will not escape non-ascii characters if requested.
422 // On special request we will also escape the colon even when
423 // not covered by the matrix.
424 // ignoreAscii is not honored for control characters (C0 and DEL)
426 // And, we should escape the '|' character when it occurs after any
427 // non-ASCII character as it may be part of a multi-byte character.
429 // 0x20..0x7e are the valid ASCII characters. We also escape spaces
430 // (0x20) since they are not legal in URLs.
431 if ((NO_NEED_ESC(c) || (c == HEX_ESCAPE && !forced)
432 || (c > 0x7f && ignoreNonAscii)
433 || (c > 0x20 && c < 0x7f && ignoreAscii))
434 && !(c == ':' && colon)
435 && !(previousIsNonASCII && c == '|' && !ignoreNonAscii))
437 if (writing)
438 tempBuffer[tempBufferPos++] = c;
440 else /* do the escape magic */
442 if (!writing)
444 result.Append(part, i);
445 writing = PR_TRUE;
447 tempBuffer[tempBufferPos++] = HEX_ESCAPE;
448 tempBuffer[tempBufferPos++] = hexChars[c >> 4]; /* high nibble */
449 tempBuffer[tempBufferPos++] = hexChars[c & 0x0f]; /* low nibble */
452 if (tempBufferPos >= sizeof(tempBuffer) - 4)
454 NS_ASSERTION(writing, "should be writing");
455 tempBuffer[tempBufferPos] = '\0';
456 result += tempBuffer;
457 tempBufferPos = 0;
460 previousIsNonASCII = (c > 0x7f);
462 if (writing) {
463 tempBuffer[tempBufferPos] = '\0';
464 result += tempBuffer;
466 return writing;
469 #define ISHEX(c) memchr(hexChars, c, sizeof(hexChars)-1)
471 NS_COM PRBool NS_UnescapeURL(const char *str, PRInt32 len, PRUint32 flags, nsACString &result)
473 if (!str) {
474 NS_NOTREACHED("null pointer");
475 return PR_FALSE;
478 if (len < 0)
479 len = strlen(str);
481 PRBool ignoreNonAscii = !!(flags & esc_OnlyASCII);
482 PRBool ignoreAscii = !!(flags & esc_OnlyNonASCII);
483 PRBool writing = !!(flags & esc_AlwaysCopy);
484 PRBool skipControl = !!(flags & esc_SkipControl);
486 static const char hexChars[] = "0123456789ABCDEFabcdef";
488 const char *last = str;
489 const char *p = str;
491 for (int i=0; i<len; ++i, ++p) {
492 //printf("%c [i=%d of len=%d]\n", *p, i, len);
493 if (*p == HEX_ESCAPE && i < len-2) {
494 unsigned char *p1 = ((unsigned char *) p) + 1;
495 unsigned char *p2 = ((unsigned char *) p) + 2;
496 if (ISHEX(*p1) && ISHEX(*p2) &&
497 ((*p1 < '8' && !ignoreAscii) || (*p1 >= '8' && !ignoreNonAscii)) &&
498 !(skipControl &&
499 (*p1 < '2' || (*p1 == '7' && (*p2 == 'f' || *p2 == 'F'))))) {
500 //printf("- p1=%c p2=%c\n", *p1, *p2);
501 writing = PR_TRUE;
502 if (p > last) {
503 //printf("- p=%p, last=%p\n", p, last);
504 result.Append(last, p - last);
505 last = p;
507 char u = (UNHEX(*p1) << 4) + UNHEX(*p2);
508 //printf("- u=%c\n", u);
509 result.Append(u);
510 i += 2;
511 p += 2;
512 last += 3;
516 if (writing && last < str + len)
517 result.Append(last, str + len - last);
519 return writing;