Bug 460926 A11y hierachy is broken on Ubuntu 8.10 (GNOME 2.24), r=Evan.Yan sr=roc
[wine-gecko.git] / security / nss / lib / base / utf8.c
blobe850ee873dfda55b08368457a34b415723b5b75a
1 /* ***** BEGIN LICENSE BLOCK *****
2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 * The contents of this file are subject to the Mozilla Public License Version
5 * 1.1 (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 * http://www.mozilla.org/MPL/
9 * Software distributed under the License is distributed on an "AS IS" basis,
10 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11 * for the specific language governing rights and limitations under the
12 * License.
14 * The Original Code is the Netscape security libraries.
16 * The Initial Developer of the Original Code is
17 * Netscape Communications Corporation.
18 * Portions created by the Initial Developer are Copyright (C) 1994-2000
19 * the Initial Developer. All Rights Reserved.
21 * Contributor(s):
23 * Alternatively, the contents of this file may be used under the terms of
24 * either the GNU General Public License Version 2 or later (the "GPL"), or
25 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
26 * in which case the provisions of the GPL or the LGPL are applicable instead
27 * of those above. If you wish to allow use of your version of this file only
28 * under the terms of either the GPL or the LGPL, and not to allow others to
29 * use your version of this file under the terms of the MPL, indicate your
30 * decision by deleting the provisions above and replace them with the notice
31 * and other provisions required by the GPL or the LGPL. If you do not delete
32 * the provisions above, a recipient may use your version of this file under
33 * the terms of any one of the MPL, the GPL or the LGPL.
35 * ***** END LICENSE BLOCK ***** */
37 #ifdef DEBUG
38 static const char CVS_ID[] = "@(#) $RCSfile: utf8.c,v $ $Revision: 1.7 $ $Date: 2005/01/20 02:25:45 $";
39 #endif /* DEBUG */
42 * utf8.c
44 * This file contains some additional utility routines required for
45 * handling UTF8 strings.
48 #ifndef BASE_H
49 #include "base.h"
50 #endif /* BASE_H */
52 #include "plstr.h"
55 * NOTES:
57 * There's an "is hex string" function in pki1/atav.c. If we need
58 * it in more places, pull that one out.
62 * nssUTF8_CaseIgnoreMatch
64 * Returns true if the two UTF8-encoded strings pointed to by the
65 * two specified NSSUTF8 pointers differ only in typcase.
67 * The error may be one of the following values:
68 * NSS_ERROR_INVALID_POINTER
70 * Return value:
71 * PR_TRUE if the strings match, ignoring case
72 * PR_FALSE if they don't
73 * PR_FALSE upon error
76 NSS_IMPLEMENT PRBool
77 nssUTF8_CaseIgnoreMatch
79 const NSSUTF8 *a,
80 const NSSUTF8 *b,
81 PRStatus *statusOpt
84 #ifdef NSSDEBUG
85 if( ((const NSSUTF8 *)NULL == a) ||
86 ((const NSSUTF8 *)NULL == b) ) {
87 nss_SetError(NSS_ERROR_INVALID_POINTER);
88 if( (PRStatus *)NULL != statusOpt ) {
89 *statusOpt = PR_FAILURE;
91 return PR_FALSE;
93 #endif /* NSSDEBUG */
95 if( (PRStatus *)NULL != statusOpt ) {
96 *statusOpt = PR_SUCCESS;
100 * XXX fgmr
102 * This is, like, so wrong!
104 if( 0 == PL_strcasecmp((const char *)a, (const char *)b) ) {
105 return PR_TRUE;
106 } else {
107 return PR_FALSE;
112 * nssUTF8_PrintableMatch
114 * Returns true if the two Printable strings pointed to by the
115 * two specified NSSUTF8 pointers match when compared with the
116 * rules for Printable String (leading and trailing spaces are
117 * disregarded, extents of whitespace match irregardless of length,
118 * and case is not significant), then PR_TRUE will be returned.
119 * Otherwise, PR_FALSE will be returned. Upon failure, PR_FALSE
120 * will be returned. If the optional statusOpt argument is not
121 * NULL, then PR_SUCCESS or PR_FAILURE will be stored in that
122 * location.
124 * The error may be one of the following values:
125 * NSS_ERROR_INVALID_POINTER
127 * Return value:
128 * PR_TRUE if the strings match, ignoring case
129 * PR_FALSE if they don't
130 * PR_FALSE upon error
133 NSS_IMPLEMENT PRBool
134 nssUTF8_PrintableMatch
136 const NSSUTF8 *a,
137 const NSSUTF8 *b,
138 PRStatus *statusOpt
141 PRUint8 *c;
142 PRUint8 *d;
144 #ifdef NSSDEBUG
145 if( ((const NSSUTF8 *)NULL == a) ||
146 ((const NSSUTF8 *)NULL == b) ) {
147 nss_SetError(NSS_ERROR_INVALID_POINTER);
148 if( (PRStatus *)NULL != statusOpt ) {
149 *statusOpt = PR_FAILURE;
151 return PR_FALSE;
153 #endif /* NSSDEBUG */
155 if( (PRStatus *)NULL != statusOpt ) {
156 *statusOpt = PR_SUCCESS;
159 c = (PRUint8 *)a;
160 d = (PRUint8 *)b;
162 while( ' ' == *c ) {
163 c++;
166 while( ' ' == *d ) {
167 d++;
170 while( ('\0' != *c) && ('\0' != *d) ) {
171 PRUint8 e, f;
173 e = *c;
174 f = *d;
176 if( ('a' <= e) && (e <= 'z') ) {
177 e -= ('a' - 'A');
180 if( ('a' <= f) && (f <= 'z') ) {
181 f -= ('a' - 'A');
184 if( e != f ) {
185 return PR_FALSE;
188 c++;
189 d++;
191 if( ' ' == *c ) {
192 while( ' ' == *c ) {
193 c++;
195 c--;
198 if( ' ' == *d ) {
199 while( ' ' == *d ) {
200 d++;
202 d--;
206 while( ' ' == *c ) {
207 c++;
210 while( ' ' == *d ) {
211 d++;
214 if( *c == *d ) {
215 /* And both '\0', btw */
216 return PR_TRUE;
217 } else {
218 return PR_FALSE;
223 * nssUTF8_Duplicate
225 * This routine duplicates the UTF8-encoded string pointed to by the
226 * specified NSSUTF8 pointer. If the optional arenaOpt argument is
227 * not null, the memory required will be obtained from that arena;
228 * otherwise, the memory required will be obtained from the heap.
229 * A pointer to the new string will be returned. In case of error,
230 * an error will be placed on the error stack and NULL will be
231 * returned.
233 * The error may be one of the following values:
234 * NSS_ERROR_INVALID_POINTER
235 * NSS_ERROR_INVALID_ARENA
236 * NSS_ERROR_NO_MEMORY
239 NSS_IMPLEMENT NSSUTF8 *
240 nssUTF8_Duplicate
242 const NSSUTF8 *s,
243 NSSArena *arenaOpt
246 NSSUTF8 *rv;
247 PRUint32 len;
249 #ifdef NSSDEBUG
250 if( (const NSSUTF8 *)NULL == s ) {
251 nss_SetError(NSS_ERROR_INVALID_POINTER);
252 return (NSSUTF8 *)NULL;
255 if( (NSSArena *)NULL != arenaOpt ) {
256 if( PR_SUCCESS != nssArena_verifyPointer(arenaOpt) ) {
257 return (NSSUTF8 *)NULL;
260 #endif /* NSSDEBUG */
262 len = PL_strlen((const char *)s);
263 #ifdef PEDANTIC
264 if( '\0' != ((const char *)s)[ len ] ) {
265 /* must have wrapped, e.g., too big for PRUint32 */
266 nss_SetError(NSS_ERROR_NO_MEMORY);
267 return (NSSUTF8 *)NULL;
269 #endif /* PEDANTIC */
270 len++; /* zero termination */
272 rv = nss_ZAlloc(arenaOpt, len);
273 if( (void *)NULL == rv ) {
274 return (NSSUTF8 *)NULL;
277 (void)nsslibc_memcpy(rv, s, len);
278 return rv;
282 * nssUTF8_Size
284 * This routine returns the length in bytes (including the terminating
285 * null) of the UTF8-encoded string pointed to by the specified
286 * NSSUTF8 pointer. Zero is returned on error.
288 * The error may be one of the following values:
289 * NSS_ERROR_INVALID_POINTER
290 * NSS_ERROR_VALUE_TOO_LARGE
292 * Return value:
293 * 0 on error
294 * nonzero length of the string.
297 NSS_IMPLEMENT PRUint32
298 nssUTF8_Size
300 const NSSUTF8 *s,
301 PRStatus *statusOpt
304 PRUint32 sv;
306 #ifdef NSSDEBUG
307 if( (const NSSUTF8 *)NULL == s ) {
308 nss_SetError(NSS_ERROR_INVALID_POINTER);
309 if( (PRStatus *)NULL != statusOpt ) {
310 *statusOpt = PR_FAILURE;
312 return 0;
314 #endif /* NSSDEBUG */
316 sv = PL_strlen((const char *)s) + 1;
317 #ifdef PEDANTIC
318 if( '\0' != ((const char *)s)[ sv-1 ] ) {
319 /* wrapped */
320 nss_SetError(NSS_ERROR_VALUE_TOO_LARGE);
321 if( (PRStatus *)NULL != statusOpt ) {
322 *statusOpt = PR_FAILURE;
324 return 0;
326 #endif /* PEDANTIC */
328 if( (PRStatus *)NULL != statusOpt ) {
329 *statusOpt = PR_SUCCESS;
332 return sv;
336 * nssUTF8_Length
338 * This routine returns the length in characters (not including the
339 * terminating null) of the UTF8-encoded string pointed to by the
340 * specified NSSUTF8 pointer.
342 * The error may be one of the following values:
343 * NSS_ERROR_INVALID_POINTER
344 * NSS_ERROR_VALUE_TOO_LARGE
345 * NSS_ERROR_INVALID_STRING
347 * Return value:
348 * length of the string (which may be zero)
349 * 0 on error
352 NSS_IMPLEMENT PRUint32
353 nssUTF8_Length
355 const NSSUTF8 *s,
356 PRStatus *statusOpt
359 PRUint32 l = 0;
360 const PRUint8 *c = (const PRUint8 *)s;
362 #ifdef NSSDEBUG
363 if( (const NSSUTF8 *)NULL == s ) {
364 nss_SetError(NSS_ERROR_INVALID_POINTER);
365 goto loser;
367 #endif /* NSSDEBUG */
370 * From RFC 2044:
372 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
373 * 0000 0000-0000 007F 0xxxxxxx
374 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
375 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
376 * 0001 0000-001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
377 * 0020 0000-03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
378 * 0400 0000-7FFF FFFF 1111110x 10xxxxxx ... 10xxxxxx
381 while( 0 != *c ) {
382 PRUint32 incr;
383 if( (*c & 0x80) == 0 ) {
384 incr = 1;
385 } else if( (*c & 0xE0) == 0xC0 ) {
386 incr = 2;
387 } else if( (*c & 0xF0) == 0xE0 ) {
388 incr = 3;
389 } else if( (*c & 0xF8) == 0xF0 ) {
390 incr = 4;
391 } else if( (*c & 0xFC) == 0xF8 ) {
392 incr = 5;
393 } else if( (*c & 0xFE) == 0xFC ) {
394 incr = 6;
395 } else {
396 nss_SetError(NSS_ERROR_INVALID_STRING);
397 goto loser;
400 l += incr;
402 #ifdef PEDANTIC
403 if( l < incr ) {
404 /* Wrapped-- too big */
405 nss_SetError(NSS_ERROR_VALUE_TOO_LARGE);
406 goto loser;
410 PRUint8 *d;
411 for( d = &c[1]; d < &c[incr]; d++ ) {
412 if( (*d & 0xC0) != 0xF0 ) {
413 nss_SetError(NSS_ERROR_INVALID_STRING);
414 goto loser;
418 #endif /* PEDANTIC */
420 c += incr;
423 if( (PRStatus *)NULL != statusOpt ) {
424 *statusOpt = PR_SUCCESS;
427 return l;
429 loser:
430 if( (PRStatus *)NULL != statusOpt ) {
431 *statusOpt = PR_FAILURE;
434 return 0;
439 * nssUTF8_Create
441 * This routine creates a UTF8 string from a string in some other
442 * format. Some types of string may include embedded null characters,
443 * so for them the length parameter must be used. For string types
444 * that are null-terminated, the length parameter is optional; if it
445 * is zero, it will be ignored. If the optional arena argument is
446 * non-null, the memory used for the new string will be obtained from
447 * that arena, otherwise it will be obtained from the heap. This
448 * routine may return NULL upon error, in which case it will have
449 * placed an error on the error stack.
451 * The error may be one of the following:
452 * NSS_ERROR_INVALID_POINTER
453 * NSS_ERROR_NO_MEMORY
454 * NSS_ERROR_UNSUPPORTED_TYPE
456 * Return value:
457 * NULL upon error
458 * A non-null pointer to a new UTF8 string otherwise
461 extern const NSSError NSS_ERROR_INTERNAL_ERROR; /* XXX fgmr */
463 NSS_IMPLEMENT NSSUTF8 *
464 nssUTF8_Create
466 NSSArena *arenaOpt,
467 nssStringType type,
468 const void *inputString,
469 PRUint32 size /* in bytes, not characters */
472 NSSUTF8 *rv = NULL;
474 #ifdef NSSDEBUG
475 if( (NSSArena *)NULL != arenaOpt ) {
476 if( PR_SUCCESS != nssArena_verifyPointer(arenaOpt) ) {
477 return (NSSUTF8 *)NULL;
481 if( (const void *)NULL == inputString ) {
482 nss_SetError(NSS_ERROR_INVALID_POINTER);
483 return (NSSUTF8 *)NULL;
485 #endif /* NSSDEBUG */
487 switch( type ) {
488 case nssStringType_DirectoryString:
489 /* This is a composite type requiring BER */
490 nss_SetError(NSS_ERROR_UNSUPPORTED_TYPE);
491 break;
492 case nssStringType_TeletexString:
494 * draft-ietf-pkix-ipki-part1-11 says in part:
496 * In addition, many legacy implementations support names encoded
497 * in the ISO 8859-1 character set (Latin1String) but tag them as
498 * TeletexString. The Latin1String includes characters used in
499 * Western European countries which are not part of the
500 * TeletexString charcter set. Implementations that process
501 * TeletexString SHOULD be prepared to handle the entire ISO
502 * 8859-1 character set.[ISO 8859-1].
504 nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
505 break;
506 case nssStringType_PrintableString:
508 * PrintableString consists of A-Za-z0-9 ,()+,-./:=?
509 * This is a subset of ASCII, which is a subset of UTF8.
510 * So we can just duplicate the string over.
513 if( 0 == size ) {
514 rv = nssUTF8_Duplicate((const NSSUTF8 *)inputString, arenaOpt);
515 } else {
516 rv = nss_ZAlloc(arenaOpt, size+1);
517 if( (NSSUTF8 *)NULL == rv ) {
518 return (NSSUTF8 *)NULL;
521 (void)nsslibc_memcpy(rv, inputString, size);
524 break;
525 case nssStringType_UniversalString:
526 /* 4-byte unicode */
527 nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
528 break;
529 case nssStringType_BMPString:
530 /* Base Multilingual Plane of Unicode */
531 nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
532 break;
533 case nssStringType_UTF8String:
534 if( 0 == size ) {
535 rv = nssUTF8_Duplicate((const NSSUTF8 *)inputString, arenaOpt);
536 } else {
537 rv = nss_ZAlloc(arenaOpt, size+1);
538 if( (NSSUTF8 *)NULL == rv ) {
539 return (NSSUTF8 *)NULL;
542 (void)nsslibc_memcpy(rv, inputString, size);
545 break;
546 case nssStringType_PHGString:
548 * PHGString is an IA5String (with case-insensitive comparisons).
549 * IA5 is ~almost~ ascii; ascii has dollar-sign where IA5 has
550 * currency symbol.
552 nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
553 break;
554 case nssStringType_GeneralString:
555 nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
556 break;
557 default:
558 nss_SetError(NSS_ERROR_UNSUPPORTED_TYPE);
559 break;
562 return rv;
565 NSS_IMPLEMENT NSSItem *
566 nssUTF8_GetEncoding
568 NSSArena *arenaOpt,
569 NSSItem *rvOpt,
570 nssStringType type,
571 NSSUTF8 *string
574 NSSItem *rv = (NSSItem *)NULL;
575 PRStatus status = PR_SUCCESS;
577 #ifdef NSSDEBUG
578 if( (NSSArena *)NULL != arenaOpt ) {
579 if( PR_SUCCESS != nssArena_verifyPointer(arenaOpt) ) {
580 return (NSSItem *)NULL;
584 if( (NSSUTF8 *)NULL == string ) {
585 nss_SetError(NSS_ERROR_INVALID_POINTER);
586 return (NSSItem *)NULL;
588 #endif /* NSSDEBUG */
590 switch( type ) {
591 case nssStringType_DirectoryString:
592 nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
593 break;
594 case nssStringType_TeletexString:
595 nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
596 break;
597 case nssStringType_PrintableString:
598 nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
599 break;
600 case nssStringType_UniversalString:
601 nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
602 break;
603 case nssStringType_BMPString:
604 nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
605 break;
606 case nssStringType_UTF8String:
608 NSSUTF8 *dup = nssUTF8_Duplicate(string, arenaOpt);
609 if( (NSSUTF8 *)NULL == dup ) {
610 return (NSSItem *)NULL;
613 if( (NSSItem *)NULL == rvOpt ) {
614 rv = nss_ZNEW(arenaOpt, NSSItem);
615 if( (NSSItem *)NULL == rv ) {
616 (void)nss_ZFreeIf(dup);
617 return (NSSItem *)NULL;
619 } else {
620 rv = rvOpt;
623 rv->data = dup;
624 dup = (NSSUTF8 *)NULL;
625 rv->size = nssUTF8_Size(rv->data, &status);
626 if( (0 == rv->size) && (PR_SUCCESS != status) ) {
627 if( (NSSItem *)NULL == rvOpt ) {
628 (void)nss_ZFreeIf(rv);
630 return (NSSItem *)NULL;
633 break;
634 case nssStringType_PHGString:
635 nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
636 break;
637 default:
638 nss_SetError(NSS_ERROR_UNSUPPORTED_TYPE);
639 break;
642 return rv;
646 * nssUTF8_CopyIntoFixedBuffer
648 * This will copy a UTF8 string into a fixed-length buffer, making
649 * sure that the all characters are valid. Any remaining space will
650 * be padded with the specified ASCII character, typically either
651 * null or space.
653 * Blah, blah, blah.
656 NSS_IMPLEMENT PRStatus
657 nssUTF8_CopyIntoFixedBuffer
659 NSSUTF8 *string,
660 char *buffer,
661 PRUint32 bufferSize,
662 char pad
665 PRUint32 stringSize = 0;
667 #ifdef NSSDEBUG
668 if( (char *)NULL == buffer ) {
669 nss_SetError(NSS_ERROR_INVALID_POINTER);
670 return PR_FALSE;
673 if( 0 == bufferSize ) {
674 nss_SetError(NSS_ERROR_INVALID_ARGUMENT);
675 return PR_FALSE;
678 if( (pad & 0x80) != 0x00 ) {
679 nss_SetError(NSS_ERROR_INVALID_ARGUMENT);
680 return PR_FALSE;
682 #endif /* NSSDEBUG */
684 if( (NSSUTF8 *)NULL == string ) {
685 string = (NSSUTF8 *) "";
688 stringSize = nssUTF8_Size(string, (PRStatus *)NULL);
689 stringSize--; /* don't count the trailing null */
690 if( stringSize > bufferSize ) {
691 PRUint32 bs = bufferSize;
692 (void)nsslibc_memcpy(buffer, string, bufferSize);
694 if( ( ((buffer[ bs-1 ] & 0x80) == 0x00)) ||
695 ((bs > 1) && ((buffer[ bs-2 ] & 0xE0) == 0xC0)) ||
696 ((bs > 2) && ((buffer[ bs-3 ] & 0xF0) == 0xE0)) ||
697 ((bs > 3) && ((buffer[ bs-4 ] & 0xF8) == 0xF0)) ||
698 ((bs > 4) && ((buffer[ bs-5 ] & 0xFC) == 0xF8)) ||
699 ((bs > 5) && ((buffer[ bs-6 ] & 0xFE) == 0xFC)) ) {
700 /* It fit exactly */
701 return PR_SUCCESS;
704 /* Too long. We have to trim the last character */
705 for( /*bs*/; bs != 0; bs-- ) {
706 if( (buffer[bs-1] & 0xC0) != 0x80 ) {
707 buffer[bs-1] = pad;
708 break;
709 } else {
710 buffer[bs-1] = pad;
713 } else {
714 (void)nsslibc_memset(buffer, pad, bufferSize);
715 (void)nsslibc_memcpy(buffer, string, stringSize);
718 return PR_SUCCESS;
722 * nssUTF8_Equal
726 NSS_IMPLEMENT PRBool
727 nssUTF8_Equal
729 const NSSUTF8 *a,
730 const NSSUTF8 *b,
731 PRStatus *statusOpt
734 PRUint32 la, lb;
736 #ifdef NSSDEBUG
737 if( ((const NSSUTF8 *)NULL == a) ||
738 ((const NSSUTF8 *)NULL == b) ) {
739 nss_SetError(NSS_ERROR_INVALID_POINTER);
740 if( (PRStatus *)NULL != statusOpt ) {
741 *statusOpt = PR_FAILURE;
743 return PR_FALSE;
745 #endif /* NSSDEBUG */
747 la = nssUTF8_Size(a, statusOpt);
748 if( 0 == la ) {
749 return PR_FALSE;
752 lb = nssUTF8_Size(b, statusOpt);
753 if( 0 == lb ) {
754 return PR_FALSE;
757 if( la != lb ) {
758 return PR_FALSE;
761 return nsslibc_memequal(a, b, la, statusOpt);