1 /* ***** BEGIN LICENSE BLOCK *****
2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 * The contents of this file are subject to the Mozilla Public License Version
5 * 1.1 (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 * http://www.mozilla.org/MPL/
9 * Software distributed under the License is distributed on an "AS IS" basis,
10 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11 * for the specific language governing rights and limitations under the
14 * The Original Code is the Netscape security libraries.
16 * The Initial Developer of the Original Code is
17 * Netscape Communications Corporation.
18 * Portions created by the Initial Developer are Copyright (C) 1994-2000
19 * the Initial Developer. All Rights Reserved.
23 * Alternatively, the contents of this file may be used under the terms of
24 * either the GNU General Public License Version 2 or later (the "GPL"), or
25 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
26 * in which case the provisions of the GPL or the LGPL are applicable instead
27 * of those above. If you wish to allow use of your version of this file only
28 * under the terms of either the GPL or the LGPL, and not to allow others to
29 * use your version of this file under the terms of the MPL, indicate your
30 * decision by deleting the provisions above and replace them with the notice
31 * and other provisions required by the GPL or the LGPL. If you do not delete
32 * the provisions above, a recipient may use your version of this file under
33 * the terms of any one of the MPL, the GPL or the LGPL.
35 * ***** END LICENSE BLOCK ***** */
38 static const char CVS_ID
[] = "@(#) $RCSfile: utf8.c,v $ $Revision: 1.7 $ $Date: 2005/01/20 02:25:45 $";
44 * This file contains some additional utility routines required for
45 * handling UTF8 strings.
57 * There's an "is hex string" function in pki1/atav.c. If we need
58 * it in more places, pull that one out.
62 * nssUTF8_CaseIgnoreMatch
64 * Returns true if the two UTF8-encoded strings pointed to by the
65 * two specified NSSUTF8 pointers differ only in typcase.
67 * The error may be one of the following values:
68 * NSS_ERROR_INVALID_POINTER
71 * PR_TRUE if the strings match, ignoring case
72 * PR_FALSE if they don't
77 nssUTF8_CaseIgnoreMatch
85 if( ((const NSSUTF8
*)NULL
== a
) ||
86 ((const NSSUTF8
*)NULL
== b
) ) {
87 nss_SetError(NSS_ERROR_INVALID_POINTER
);
88 if( (PRStatus
*)NULL
!= statusOpt
) {
89 *statusOpt
= PR_FAILURE
;
95 if( (PRStatus
*)NULL
!= statusOpt
) {
96 *statusOpt
= PR_SUCCESS
;
102 * This is, like, so wrong!
104 if( 0 == PL_strcasecmp((const char *)a
, (const char *)b
) ) {
112 * nssUTF8_PrintableMatch
114 * Returns true if the two Printable strings pointed to by the
115 * two specified NSSUTF8 pointers match when compared with the
116 * rules for Printable String (leading and trailing spaces are
117 * disregarded, extents of whitespace match irregardless of length,
118 * and case is not significant), then PR_TRUE will be returned.
119 * Otherwise, PR_FALSE will be returned. Upon failure, PR_FALSE
120 * will be returned. If the optional statusOpt argument is not
121 * NULL, then PR_SUCCESS or PR_FAILURE will be stored in that
124 * The error may be one of the following values:
125 * NSS_ERROR_INVALID_POINTER
128 * PR_TRUE if the strings match, ignoring case
129 * PR_FALSE if they don't
130 * PR_FALSE upon error
134 nssUTF8_PrintableMatch
145 if( ((const NSSUTF8
*)NULL
== a
) ||
146 ((const NSSUTF8
*)NULL
== b
) ) {
147 nss_SetError(NSS_ERROR_INVALID_POINTER
);
148 if( (PRStatus
*)NULL
!= statusOpt
) {
149 *statusOpt
= PR_FAILURE
;
153 #endif /* NSSDEBUG */
155 if( (PRStatus
*)NULL
!= statusOpt
) {
156 *statusOpt
= PR_SUCCESS
;
170 while( ('\0' != *c
) && ('\0' != *d
) ) {
176 if( ('a' <= e
) && (e
<= 'z') ) {
180 if( ('a' <= f
) && (f
<= 'z') ) {
215 /* And both '\0', btw */
225 * This routine duplicates the UTF8-encoded string pointed to by the
226 * specified NSSUTF8 pointer. If the optional arenaOpt argument is
227 * not null, the memory required will be obtained from that arena;
228 * otherwise, the memory required will be obtained from the heap.
229 * A pointer to the new string will be returned. In case of error,
230 * an error will be placed on the error stack and NULL will be
233 * The error may be one of the following values:
234 * NSS_ERROR_INVALID_POINTER
235 * NSS_ERROR_INVALID_ARENA
236 * NSS_ERROR_NO_MEMORY
239 NSS_IMPLEMENT NSSUTF8
*
250 if( (const NSSUTF8
*)NULL
== s
) {
251 nss_SetError(NSS_ERROR_INVALID_POINTER
);
252 return (NSSUTF8
*)NULL
;
255 if( (NSSArena
*)NULL
!= arenaOpt
) {
256 if( PR_SUCCESS
!= nssArena_verifyPointer(arenaOpt
) ) {
257 return (NSSUTF8
*)NULL
;
260 #endif /* NSSDEBUG */
262 len
= PL_strlen((const char *)s
);
264 if( '\0' != ((const char *)s
)[ len
] ) {
265 /* must have wrapped, e.g., too big for PRUint32 */
266 nss_SetError(NSS_ERROR_NO_MEMORY
);
267 return (NSSUTF8
*)NULL
;
269 #endif /* PEDANTIC */
270 len
++; /* zero termination */
272 rv
= nss_ZAlloc(arenaOpt
, len
);
273 if( (void *)NULL
== rv
) {
274 return (NSSUTF8
*)NULL
;
277 (void)nsslibc_memcpy(rv
, s
, len
);
284 * This routine returns the length in bytes (including the terminating
285 * null) of the UTF8-encoded string pointed to by the specified
286 * NSSUTF8 pointer. Zero is returned on error.
288 * The error may be one of the following values:
289 * NSS_ERROR_INVALID_POINTER
290 * NSS_ERROR_VALUE_TOO_LARGE
294 * nonzero length of the string.
297 NSS_IMPLEMENT PRUint32
307 if( (const NSSUTF8
*)NULL
== s
) {
308 nss_SetError(NSS_ERROR_INVALID_POINTER
);
309 if( (PRStatus
*)NULL
!= statusOpt
) {
310 *statusOpt
= PR_FAILURE
;
314 #endif /* NSSDEBUG */
316 sv
= PL_strlen((const char *)s
) + 1;
318 if( '\0' != ((const char *)s
)[ sv
-1 ] ) {
320 nss_SetError(NSS_ERROR_VALUE_TOO_LARGE
);
321 if( (PRStatus
*)NULL
!= statusOpt
) {
322 *statusOpt
= PR_FAILURE
;
326 #endif /* PEDANTIC */
328 if( (PRStatus
*)NULL
!= statusOpt
) {
329 *statusOpt
= PR_SUCCESS
;
338 * This routine returns the length in characters (not including the
339 * terminating null) of the UTF8-encoded string pointed to by the
340 * specified NSSUTF8 pointer.
342 * The error may be one of the following values:
343 * NSS_ERROR_INVALID_POINTER
344 * NSS_ERROR_VALUE_TOO_LARGE
345 * NSS_ERROR_INVALID_STRING
348 * length of the string (which may be zero)
352 NSS_IMPLEMENT PRUint32
360 const PRUint8
*c
= (const PRUint8
*)s
;
363 if( (const NSSUTF8
*)NULL
== s
) {
364 nss_SetError(NSS_ERROR_INVALID_POINTER
);
367 #endif /* NSSDEBUG */
372 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
373 * 0000 0000-0000 007F 0xxxxxxx
374 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
375 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
376 * 0001 0000-001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
377 * 0020 0000-03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
378 * 0400 0000-7FFF FFFF 1111110x 10xxxxxx ... 10xxxxxx
383 if( (*c
& 0x80) == 0 ) {
385 } else if( (*c
& 0xE0) == 0xC0 ) {
387 } else if( (*c
& 0xF0) == 0xE0 ) {
389 } else if( (*c
& 0xF8) == 0xF0 ) {
391 } else if( (*c
& 0xFC) == 0xF8 ) {
393 } else if( (*c
& 0xFE) == 0xFC ) {
396 nss_SetError(NSS_ERROR_INVALID_STRING
);
404 /* Wrapped-- too big */
405 nss_SetError(NSS_ERROR_VALUE_TOO_LARGE
);
411 for( d
= &c
[1]; d
< &c
[incr
]; d
++ ) {
412 if( (*d
& 0xC0) != 0xF0 ) {
413 nss_SetError(NSS_ERROR_INVALID_STRING
);
418 #endif /* PEDANTIC */
423 if( (PRStatus
*)NULL
!= statusOpt
) {
424 *statusOpt
= PR_SUCCESS
;
430 if( (PRStatus
*)NULL
!= statusOpt
) {
431 *statusOpt
= PR_FAILURE
;
441 * This routine creates a UTF8 string from a string in some other
442 * format. Some types of string may include embedded null characters,
443 * so for them the length parameter must be used. For string types
444 * that are null-terminated, the length parameter is optional; if it
445 * is zero, it will be ignored. If the optional arena argument is
446 * non-null, the memory used for the new string will be obtained from
447 * that arena, otherwise it will be obtained from the heap. This
448 * routine may return NULL upon error, in which case it will have
449 * placed an error on the error stack.
451 * The error may be one of the following:
452 * NSS_ERROR_INVALID_POINTER
453 * NSS_ERROR_NO_MEMORY
454 * NSS_ERROR_UNSUPPORTED_TYPE
458 * A non-null pointer to a new UTF8 string otherwise
461 extern const NSSError NSS_ERROR_INTERNAL_ERROR
; /* XXX fgmr */
463 NSS_IMPLEMENT NSSUTF8
*
468 const void *inputString
,
469 PRUint32 size
/* in bytes, not characters */
475 if( (NSSArena
*)NULL
!= arenaOpt
) {
476 if( PR_SUCCESS
!= nssArena_verifyPointer(arenaOpt
) ) {
477 return (NSSUTF8
*)NULL
;
481 if( (const void *)NULL
== inputString
) {
482 nss_SetError(NSS_ERROR_INVALID_POINTER
);
483 return (NSSUTF8
*)NULL
;
485 #endif /* NSSDEBUG */
488 case nssStringType_DirectoryString
:
489 /* This is a composite type requiring BER */
490 nss_SetError(NSS_ERROR_UNSUPPORTED_TYPE
);
492 case nssStringType_TeletexString
:
494 * draft-ietf-pkix-ipki-part1-11 says in part:
496 * In addition, many legacy implementations support names encoded
497 * in the ISO 8859-1 character set (Latin1String) but tag them as
498 * TeletexString. The Latin1String includes characters used in
499 * Western European countries which are not part of the
500 * TeletexString charcter set. Implementations that process
501 * TeletexString SHOULD be prepared to handle the entire ISO
502 * 8859-1 character set.[ISO 8859-1].
504 nss_SetError(NSS_ERROR_INTERNAL_ERROR
); /* unimplemented */
506 case nssStringType_PrintableString
:
508 * PrintableString consists of A-Za-z0-9 ,()+,-./:=?
509 * This is a subset of ASCII, which is a subset of UTF8.
510 * So we can just duplicate the string over.
514 rv
= nssUTF8_Duplicate((const NSSUTF8
*)inputString
, arenaOpt
);
516 rv
= nss_ZAlloc(arenaOpt
, size
+1);
517 if( (NSSUTF8
*)NULL
== rv
) {
518 return (NSSUTF8
*)NULL
;
521 (void)nsslibc_memcpy(rv
, inputString
, size
);
525 case nssStringType_UniversalString
:
527 nss_SetError(NSS_ERROR_INTERNAL_ERROR
); /* unimplemented */
529 case nssStringType_BMPString
:
530 /* Base Multilingual Plane of Unicode */
531 nss_SetError(NSS_ERROR_INTERNAL_ERROR
); /* unimplemented */
533 case nssStringType_UTF8String
:
535 rv
= nssUTF8_Duplicate((const NSSUTF8
*)inputString
, arenaOpt
);
537 rv
= nss_ZAlloc(arenaOpt
, size
+1);
538 if( (NSSUTF8
*)NULL
== rv
) {
539 return (NSSUTF8
*)NULL
;
542 (void)nsslibc_memcpy(rv
, inputString
, size
);
546 case nssStringType_PHGString
:
548 * PHGString is an IA5String (with case-insensitive comparisons).
549 * IA5 is ~almost~ ascii; ascii has dollar-sign where IA5 has
552 nss_SetError(NSS_ERROR_INTERNAL_ERROR
); /* unimplemented */
554 case nssStringType_GeneralString
:
555 nss_SetError(NSS_ERROR_INTERNAL_ERROR
); /* unimplemented */
558 nss_SetError(NSS_ERROR_UNSUPPORTED_TYPE
);
565 NSS_IMPLEMENT NSSItem
*
574 NSSItem
*rv
= (NSSItem
*)NULL
;
575 PRStatus status
= PR_SUCCESS
;
578 if( (NSSArena
*)NULL
!= arenaOpt
) {
579 if( PR_SUCCESS
!= nssArena_verifyPointer(arenaOpt
) ) {
580 return (NSSItem
*)NULL
;
584 if( (NSSUTF8
*)NULL
== string
) {
585 nss_SetError(NSS_ERROR_INVALID_POINTER
);
586 return (NSSItem
*)NULL
;
588 #endif /* NSSDEBUG */
591 case nssStringType_DirectoryString
:
592 nss_SetError(NSS_ERROR_INTERNAL_ERROR
); /* unimplemented */
594 case nssStringType_TeletexString
:
595 nss_SetError(NSS_ERROR_INTERNAL_ERROR
); /* unimplemented */
597 case nssStringType_PrintableString
:
598 nss_SetError(NSS_ERROR_INTERNAL_ERROR
); /* unimplemented */
600 case nssStringType_UniversalString
:
601 nss_SetError(NSS_ERROR_INTERNAL_ERROR
); /* unimplemented */
603 case nssStringType_BMPString
:
604 nss_SetError(NSS_ERROR_INTERNAL_ERROR
); /* unimplemented */
606 case nssStringType_UTF8String
:
608 NSSUTF8
*dup
= nssUTF8_Duplicate(string
, arenaOpt
);
609 if( (NSSUTF8
*)NULL
== dup
) {
610 return (NSSItem
*)NULL
;
613 if( (NSSItem
*)NULL
== rvOpt
) {
614 rv
= nss_ZNEW(arenaOpt
, NSSItem
);
615 if( (NSSItem
*)NULL
== rv
) {
616 (void)nss_ZFreeIf(dup
);
617 return (NSSItem
*)NULL
;
624 dup
= (NSSUTF8
*)NULL
;
625 rv
->size
= nssUTF8_Size(rv
->data
, &status
);
626 if( (0 == rv
->size
) && (PR_SUCCESS
!= status
) ) {
627 if( (NSSItem
*)NULL
== rvOpt
) {
628 (void)nss_ZFreeIf(rv
);
630 return (NSSItem
*)NULL
;
634 case nssStringType_PHGString
:
635 nss_SetError(NSS_ERROR_INTERNAL_ERROR
); /* unimplemented */
638 nss_SetError(NSS_ERROR_UNSUPPORTED_TYPE
);
646 * nssUTF8_CopyIntoFixedBuffer
648 * This will copy a UTF8 string into a fixed-length buffer, making
649 * sure that the all characters are valid. Any remaining space will
650 * be padded with the specified ASCII character, typically either
656 NSS_IMPLEMENT PRStatus
657 nssUTF8_CopyIntoFixedBuffer
665 PRUint32 stringSize
= 0;
668 if( (char *)NULL
== buffer
) {
669 nss_SetError(NSS_ERROR_INVALID_POINTER
);
673 if( 0 == bufferSize
) {
674 nss_SetError(NSS_ERROR_INVALID_ARGUMENT
);
678 if( (pad
& 0x80) != 0x00 ) {
679 nss_SetError(NSS_ERROR_INVALID_ARGUMENT
);
682 #endif /* NSSDEBUG */
684 if( (NSSUTF8
*)NULL
== string
) {
685 string
= (NSSUTF8
*) "";
688 stringSize
= nssUTF8_Size(string
, (PRStatus
*)NULL
);
689 stringSize
--; /* don't count the trailing null */
690 if( stringSize
> bufferSize
) {
691 PRUint32 bs
= bufferSize
;
692 (void)nsslibc_memcpy(buffer
, string
, bufferSize
);
694 if( ( ((buffer
[ bs
-1 ] & 0x80) == 0x00)) ||
695 ((bs
> 1) && ((buffer
[ bs
-2 ] & 0xE0) == 0xC0)) ||
696 ((bs
> 2) && ((buffer
[ bs
-3 ] & 0xF0) == 0xE0)) ||
697 ((bs
> 3) && ((buffer
[ bs
-4 ] & 0xF8) == 0xF0)) ||
698 ((bs
> 4) && ((buffer
[ bs
-5 ] & 0xFC) == 0xF8)) ||
699 ((bs
> 5) && ((buffer
[ bs
-6 ] & 0xFE) == 0xFC)) ) {
704 /* Too long. We have to trim the last character */
705 for( /*bs*/; bs
!= 0; bs
-- ) {
706 if( (buffer
[bs
-1] & 0xC0) != 0x80 ) {
714 (void)nsslibc_memset(buffer
, pad
, bufferSize
);
715 (void)nsslibc_memcpy(buffer
, string
, stringSize
);
737 if( ((const NSSUTF8
*)NULL
== a
) ||
738 ((const NSSUTF8
*)NULL
== b
) ) {
739 nss_SetError(NSS_ERROR_INVALID_POINTER
);
740 if( (PRStatus
*)NULL
!= statusOpt
) {
741 *statusOpt
= PR_FAILURE
;
745 #endif /* NSSDEBUG */
747 la
= nssUTF8_Size(a
, statusOpt
);
752 lb
= nssUTF8_Size(b
, statusOpt
);
761 return nsslibc_memequal(a
, b
, la
, statusOpt
);