* configure.ac: Fix resolver autoconf code so it works (fails)
[gnupg.git] / util / strgutil.c
blobcffdfcf776f22f9571f2b5e864da5c9d050c9a50
1 /* strgutil.c - string utilities
2 * Copyright (C) 1994, 1998, 1999, 2000, 2001,
3 * 2003, 2004, 2005 Free Software Foundation, Inc.
5 * This file is part of GnuPG.
7 * GnuPG is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * GnuPG is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
20 * USA.
23 #include <config.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <ctype.h>
27 #include <errno.h>
28 #ifdef HAVE_LANGINFO_CODESET
29 #include <langinfo.h>
30 #endif
32 /* For W32 we use dynamic loading of the iconv dll and don't need any
33 * iconv headers at all. */
34 #ifndef _WIN32
35 # ifndef HAVE_ICONV
36 # undef USE_GNUPG_ICONV
37 # endif
38 #endif
40 #ifdef USE_GNUPG_ICONV
41 # include <limits.h>
42 # ifndef _WIN32
43 # include <iconv.h>
44 # endif
45 #endif
47 #include "types.h"
48 #include "util.h"
49 #include "memory.h"
50 #include "i18n.h"
51 #include "dynload.h"
54 #ifndef USE_GNUPG_ICONV
55 static ushort koi8_unicode[128] = {
56 0x2500,0x2502,0x250c,0x2510,0x2514,0x2518,0x251c,0x2524,
57 0x252c,0x2534,0x253c,0x2580,0x2584,0x2588,0x258c,0x2590,
58 0x2591,0x2592,0x2593,0x2320,0x25a0,0x2219,0x221a,0x2248,
59 0x2264,0x2265,0x00a0,0x2321,0x00b0,0x00b2,0x00b7,0x00f7,
60 0x2550,0x2551,0x2552,0x0451,0x2553,0x2554,0x2555,0x2556,
61 0x2557,0x2558,0x2559,0x255a,0x255b,0x255c,0x255d,0x255e,
62 0x255f,0x2560,0x2561,0x0401,0x2562,0x2563,0x2564,0x2565,
63 0x2566,0x2567,0x2568,0x2569,0x256a,0x256b,0x256c,0x00a9,
64 0x044e,0x0430,0x0431,0x0446,0x0434,0x0435,0x0444,0x0433,
65 0x0445,0x0438,0x0439,0x043a,0x043b,0x043c,0x043d,0x043e,
66 0x043f,0x044f,0x0440,0x0441,0x0442,0x0443,0x0436,0x0432,
67 0x044c,0x044b,0x0437,0x0448,0x044d,0x0449,0x0447,0x044a,
68 0x042e,0x0410,0x0411,0x0426,0x0414,0x0415,0x0424,0x0413,
69 0x0425,0x0418,0x0419,0x041a,0x041b,0x041c,0x041d,0x041e,
70 0x041f,0x042f,0x0420,0x0421,0x0422,0x0423,0x0416,0x0412,
71 0x042c,0x042b,0x0417,0x0428,0x042d,0x0429,0x0427,0x042a
74 static ushort latin2_unicode[128] = {
75 0x0080,0x0081,0x0082,0x0083,0x0084,0x0085,0x0086,0x0087,
76 0x0088,0x0089,0x008A,0x008B,0x008C,0x008D,0x008E,0x008F,
77 0x0090,0x0091,0x0092,0x0093,0x0094,0x0095,0x0096,0x0097,
78 0x0098,0x0099,0x009A,0x009B,0x009C,0x009D,0x009E,0x009F,
79 0x00A0,0x0104,0x02D8,0x0141,0x00A4,0x013D,0x015A,0x00A7,
80 0x00A8,0x0160,0x015E,0x0164,0x0179,0x00AD,0x017D,0x017B,
81 0x00B0,0x0105,0x02DB,0x0142,0x00B4,0x013E,0x015B,0x02C7,
82 0x00B8,0x0161,0x015F,0x0165,0x017A,0x02DD,0x017E,0x017C,
83 0x0154,0x00C1,0x00C2,0x0102,0x00C4,0x0139,0x0106,0x00C7,
84 0x010C,0x00C9,0x0118,0x00CB,0x011A,0x00CD,0x00CE,0x010E,
85 0x0110,0x0143,0x0147,0x00D3,0x00D4,0x0150,0x00D6,0x00D7,
86 0x0158,0x016E,0x00DA,0x0170,0x00DC,0x00DD,0x0162,0x00DF,
87 0x0155,0x00E1,0x00E2,0x0103,0x00E4,0x013A,0x0107,0x00E7,
88 0x010D,0x00E9,0x0119,0x00EB,0x011B,0x00ED,0x00EE,0x010F,
89 0x0111,0x0144,0x0148,0x00F3,0x00F4,0x0151,0x00F6,0x00F7,
90 0x0159,0x016F,0x00FA,0x0171,0x00FC,0x00FD,0x0163,0x02D9
92 #endif /*!USE_GNUPG_ICONV*/
95 #ifndef MB_LEN_MAX
96 #define MB_LEN_MAX 16
97 #endif
100 static const char *active_charset_name = "iso-8859-1";
101 static ushort *active_charset = NULL;
102 static int no_translation = 0;
103 static int use_iconv = 0;
106 #ifdef _WIN32
107 typedef void* iconv_t;
108 #ifndef ICONV_CONST
109 #define ICONV_CONST const
110 #endif
112 iconv_t (* __stdcall iconv_open) (const char *tocode, const char *fromcode);
113 size_t (* __stdcall iconv) (iconv_t cd,
114 const char **inbuf, size_t *inbytesleft,
115 char **outbuf, size_t *outbytesleft);
116 int (* __stdcall iconv_close) (iconv_t cd);
118 #endif /*_WIN32*/
122 #ifdef _WIN32
123 static int
124 load_libiconv (void)
126 static int done;
128 if (!done)
130 void *handle;
132 done = 1; /* Do it right now because we might get called recursivly
133 through gettext. */
135 handle = dlopen ("iconv.dll", RTLD_LAZY);
136 if (handle)
138 iconv_open = dlsym (handle, "libiconv_open");
139 if (iconv_open)
140 iconv = dlsym (handle, "libiconv");
141 if (iconv)
142 iconv_close = dlsym (handle, "libiconv_close");
144 if (!handle || !iconv_close)
146 log_info (_("error loading `%s': %s\n"),
147 "iconv.dll", dlerror ());
148 log_info(_("please see http://www.gnupg.org/download/iconv.html "
149 "for more information\n"));
150 iconv_open = NULL;
151 iconv = NULL;
152 iconv_close = NULL;
153 if (handle)
154 dlclose (handle);
157 return iconv_open? 0: -1;
159 #endif /* _WIN32 */
164 void
165 free_strlist( STRLIST sl )
167 STRLIST sl2;
169 for(; sl; sl = sl2 ) {
170 sl2 = sl->next;
171 xfree(sl);
176 STRLIST
177 add_to_strlist( STRLIST *list, const char *string )
179 STRLIST sl;
181 sl = xmalloc( sizeof *sl + strlen(string));
182 sl->flags = 0;
183 strcpy(sl->d, string);
184 sl->next = *list;
185 *list = sl;
186 return sl;
189 /****************
190 * Same as add_to_strlist() but if is_utf8 is *not* set a conversion
191 * to UTF8 is done
193 STRLIST
194 add_to_strlist2( STRLIST *list, const char *string, int is_utf8 )
196 STRLIST sl;
198 if( is_utf8 )
199 sl = add_to_strlist( list, string );
200 else {
201 char *p = native_to_utf8( string );
202 sl = add_to_strlist( list, p );
203 xfree( p );
205 return sl;
208 STRLIST
209 append_to_strlist( STRLIST *list, const char *string )
211 STRLIST r, sl;
213 sl = xmalloc( sizeof *sl + strlen(string));
214 sl->flags = 0;
215 strcpy(sl->d, string);
216 sl->next = NULL;
217 if( !*list )
218 *list = sl;
219 else {
220 for( r = *list; r->next; r = r->next )
222 r->next = sl;
224 return sl;
227 STRLIST
228 append_to_strlist2( STRLIST *list, const char *string, int is_utf8 )
230 STRLIST sl;
232 if( is_utf8 )
233 sl = append_to_strlist( list, string );
234 else {
235 char *p = native_to_utf8( string );
236 sl = append_to_strlist( list, p );
237 xfree( p );
239 return sl;
243 STRLIST
244 strlist_prev( STRLIST head, STRLIST node )
246 STRLIST n;
248 for(n=NULL; head && head != node; head = head->next )
249 n = head;
250 return n;
253 STRLIST
254 strlist_last( STRLIST node )
256 if( node )
257 for( ; node->next ; node = node->next )
259 return node;
262 char *
263 pop_strlist( STRLIST *list )
265 char *str=NULL;
266 STRLIST sl=*list;
268 if(sl)
270 str=xmalloc(strlen(sl->d)+1);
271 strcpy(str,sl->d);
273 *list=sl->next;
274 xfree(sl);
277 return str;
280 /****************
281 * Look for the substring SUB in buffer and return a pointer to that
282 * substring in BUF or NULL if not found.
283 * Comparison is case-insensitive.
285 const char *
286 memistr( const char *buf, size_t buflen, const char *sub )
288 const byte *t, *s ;
289 size_t n;
291 for( t=buf, n=buflen, s=sub ; n ; t++, n-- )
292 if( toupper(*t) == toupper(*s) ) {
293 for( buf=t++, buflen = n--, s++;
294 n && toupper(*t) == toupper(*s); t++, s++, n-- )
296 if( !*s )
297 return buf;
298 t = buf; n = buflen; s = sub ;
301 return NULL ;
304 const char *
305 ascii_memistr( const char *buf, size_t buflen, const char *sub )
307 const byte *t, *s ;
308 size_t n;
310 for( t=buf, n=buflen, s=sub ; n ; t++, n-- )
311 if( ascii_toupper(*t) == ascii_toupper(*s) ) {
312 for( buf=t++, buflen = n--, s++;
313 n && ascii_toupper(*t) == ascii_toupper(*s); t++, s++, n-- )
315 if( !*s )
316 return buf;
317 t = buf; n = buflen; s = sub ;
320 return NULL ;
324 /* Like strncpy() but copy at max N-1 bytes and append a '\0'. With
325 * N given as 0 nothing is copied at all. With DEST given as NULL
326 * sufficient memory is allocated using xmalloc (note that xmalloc is
327 * guaranteed to succeed or to abort the process). */
328 char *
329 mem2str( char *dest , const void *src , size_t n )
331 char *d;
332 const char *s;
334 if( n ) {
335 if( !dest )
336 dest = xmalloc( n ) ;
337 d = dest;
338 s = src ;
339 for(n--; n && *s; n-- )
340 *d++ = *s++;
341 *d = '\0' ;
344 return dest ;
349 * Remove leading and trailing white spaces
351 char *
352 trim_spaces( char *str )
354 char *string, *p, *mark;
356 string = str;
357 /* Find first non space character. */
358 for( p=string; *p && isspace( *(byte*)p ) ; p++ )
360 /* Move characters. */
361 for( (mark = NULL); (*string = *p); string++, p++ )
362 if( isspace( *(byte*)p ) ) {
363 if( !mark )
364 mark = string ;
366 else
367 mark = NULL ;
368 if( mark )
369 *mark = '\0' ; /* Remove trailing spaces. */
371 return str ;
376 unsigned int
377 trim_trailing_chars( byte *line, unsigned len, const char *trimchars )
379 byte *p, *mark;
380 unsigned n;
382 for(mark=NULL, p=line, n=0; n < len; n++, p++ ) {
383 if( strchr(trimchars, *p ) ) {
384 if( !mark )
385 mark = p;
387 else
388 mark = NULL;
391 if( mark ) {
392 *mark = 0;
393 return mark - line;
395 return len;
398 /****************
399 * Remove trailing white spaces and return the length of the buffer
401 unsigned
402 trim_trailing_ws( byte *line, unsigned len )
404 return trim_trailing_chars( line, len, " \t\r\n" );
408 unsigned int
409 check_trailing_chars( const byte *line, unsigned int len,
410 const char *trimchars )
412 const byte *p, *mark;
413 unsigned int n;
415 for(mark=NULL, p=line, n=0; n < len; n++, p++ ) {
416 if( strchr(trimchars, *p ) ) {
417 if( !mark )
418 mark = p;
420 else
421 mark = NULL;
424 if( mark ) {
425 return mark - line;
427 return len;
431 /****************
432 * Remove trailing white spaces and return the length of the buffer
434 unsigned int
435 check_trailing_ws( const byte *line, unsigned int len )
437 return check_trailing_chars( line, len, " \t\r\n" );
443 string_count_chr( const char *string, int c )
445 int count;
446 for(count=0; *string; string++ )
447 if( *string == c )
448 count++;
449 return count;
452 #ifdef USE_GNUPG_ICONV
453 static void
454 handle_iconv_error (const char *to, const char *from, int use_fallback)
456 if (errno == EINVAL)
458 static int shown1, shown2;
459 int x;
461 if (to && !strcmp (to, "utf-8"))
463 x = shown1;
464 shown1 = 1;
466 else
468 x = shown2;
469 shown2 = 1;
472 if (!x)
473 log_info (_("conversion from `%s' to `%s' not available\n"),
474 from, to);
476 else
478 static int shown;
480 if (!shown)
481 log_info (_("iconv_open failed: %s\n"), strerror (errno));
482 shown = 1;
485 if (use_fallback)
487 /* To avoid further error messages we fallback to Latin-1 for the
488 native encoding. This is justified as one can expect that on a
489 utf-8 enabled system nl_langinfo() will work and thus we won't
490 never get to here. Thus Latin-1 seems to be a reasonable
491 default. */
492 active_charset_name = "iso-8859-1";
493 no_translation = 0;
494 active_charset = NULL;
495 use_iconv = 0;
498 #endif /*USE_GNUPG_ICONV*/
501 set_native_charset( const char *newset )
503 const char *full_newset;
505 if (!newset) {
506 #ifdef _WIN32
507 static char codepage[30];
508 unsigned int cpno;
509 const char *aliases;
511 /* We are a console program thus we need to use the
512 GetConsoleOutputCP function and not the the GetACP which
513 would give the codepage for a GUI program. Note this is
514 not a bulletproof detection because GetConsoleCP might
515 return a different one for console input. Not sure how to
516 cope with that. If the console Code page is not known we
517 fall back to the system code page. */
518 cpno = GetConsoleOutputCP ();
519 if (!cpno)
520 cpno = GetACP ();
521 sprintf (codepage, "CP%u", cpno );
522 /* Resolve alias. We use a long string string and not the
523 usual array to optimize if the code is taken to a DSO.
524 Taken from libiconv 1.9.2. */
525 newset = codepage;
526 for (aliases = ("CP936" "\0" "GBK" "\0"
527 "CP1361" "\0" "JOHAB" "\0"
528 "CP20127" "\0" "ASCII" "\0"
529 "CP20866" "\0" "KOI8-R" "\0"
530 "CP21866" "\0" "KOI8-RU" "\0"
531 "CP28591" "\0" "ISO-8859-1" "\0"
532 "CP28592" "\0" "ISO-8859-2" "\0"
533 "CP28593" "\0" "ISO-8859-3" "\0"
534 "CP28594" "\0" "ISO-8859-4" "\0"
535 "CP28595" "\0" "ISO-8859-5" "\0"
536 "CP28596" "\0" "ISO-8859-6" "\0"
537 "CP28597" "\0" "ISO-8859-7" "\0"
538 "CP28598" "\0" "ISO-8859-8" "\0"
539 "CP28599" "\0" "ISO-8859-9" "\0"
540 "CP28605" "\0" "ISO-8859-15" "\0"
541 "CP65001" "\0" "UTF-8" "\0");
542 *aliases;
543 aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)
545 if (!strcmp (codepage, aliases) ||(*aliases == '*' && !aliases[1]))
547 newset = aliases + strlen (aliases) + 1;
548 break;
552 #else
553 #ifdef HAVE_LANGINFO_CODESET
554 newset = nl_langinfo (CODESET);
555 #else /* !HAVE_LANGINFO_CODESET */
556 /* Try to get the used charset from environment variables. */
557 static char codepage[30];
558 const char *lc, *dot, *mod;
560 strcpy (codepage, "iso-8859-1");
561 lc = getenv ("LC_ALL");
562 if (!lc || !*lc) {
563 lc = getenv ("LC_CTYPE");
564 if (!lc || !*lc)
565 lc = getenv ("LANG");
567 if (lc && *lc) {
568 dot = strchr (lc, '.');
569 if (dot) {
570 mod = strchr (++dot, '@');
571 if (!mod)
572 mod = dot + strlen (dot);
573 if (mod - dot < sizeof codepage && dot != mod) {
574 memcpy (codepage, dot, mod - dot);
575 codepage [mod - dot] = 0;
579 newset = codepage;
580 #endif /* !HAVE_LANGINFO_CODESET */
581 #endif
584 full_newset = newset;
585 if (strlen (newset) > 3 && !ascii_memcasecmp (newset, "iso", 3)) {
586 newset += 3;
587 if (*newset == '-' || *newset == '_')
588 newset++;
591 /* Note that we silently assume that plain ASCII is actually meant
592 as Latin-1. This makes sense because many Unix system don't
593 have their locale set up properly and thus would get annoying
594 error messages and we have to handle all the "bug"
595 reports. Latin-1 has always been the character set used for 8
596 bit characters on Unix systems. */
597 if( !*newset
598 || !ascii_strcasecmp (newset, "8859-1" )
599 || !ascii_strcasecmp (newset, "646" )
600 || !ascii_strcasecmp (newset, "ASCII" )
601 || !ascii_strcasecmp (newset, "ANSI_X3.4-1968" )
603 active_charset_name = "iso-8859-1";
604 no_translation = 0;
605 active_charset = NULL;
606 use_iconv = 0;
608 else if( !ascii_strcasecmp (newset, "utf8" )
609 || !ascii_strcasecmp(newset, "utf-8") ) {
610 active_charset_name = "utf-8";
611 no_translation = 1;
612 active_charset = NULL;
613 use_iconv = 0;
615 #ifdef USE_GNUPG_ICONV
616 else {
617 iconv_t cd;
619 #ifdef _WIN32
620 if (load_libiconv ())
621 return G10ERR_GENERAL;
622 #endif /*_WIN32*/
624 cd = iconv_open (full_newset, "utf-8");
625 if (cd == (iconv_t)-1) {
626 handle_iconv_error (full_newset, "utf-8", 0);
627 return G10ERR_GENERAL;
629 iconv_close (cd);
630 cd = iconv_open ("utf-8", full_newset);
631 if (cd == (iconv_t)-1) {
632 handle_iconv_error ("utf-8", full_newset, 0);
633 return G10ERR_GENERAL;
635 iconv_close (cd);
636 active_charset_name = full_newset;
637 no_translation = 0;
638 active_charset = NULL;
639 use_iconv = 1;
641 #else /*!USE_GNUPG_ICONV*/
642 else if( !ascii_strcasecmp( newset, "8859-2" ) ) {
643 active_charset_name = "iso-8859-2";
644 no_translation = 0;
645 active_charset = latin2_unicode;
646 use_iconv = 0;
648 else if( !ascii_strcasecmp( newset, "koi8-r" ) ) {
649 active_charset_name = "koi8-r";
650 no_translation = 0;
651 active_charset = koi8_unicode;
652 use_iconv = 0;
654 else
655 return G10ERR_GENERAL;
656 #endif /*!USE_GNUPG_ICONV*/
657 return 0;
660 const char*
661 get_native_charset()
663 return active_charset_name;
666 /****************
667 * Convert string, which is in native encoding to UTF8 and return the
668 * new allocated UTF8 string.
670 char *
671 native_to_utf8( const char *string )
673 const byte *s;
674 char *buffer;
675 byte *p;
676 size_t length=0;
678 if (no_translation)
679 { /* Already utf-8 encoded. */
680 buffer = xstrdup (string);
682 else if( !active_charset && !use_iconv) /* Shortcut implementation
683 for Latin-1. */
685 for(s=string; *s; s++ )
687 length++;
688 if( *s & 0x80 )
689 length++;
691 buffer = xmalloc( length + 1 );
692 for(p=buffer, s=string; *s; s++ )
694 if( *s & 0x80 )
696 *p++ = 0xc0 | ((*s >> 6) & 3);
697 *p++ = 0x80 | ( *s & 0x3f );
699 else
700 *p++ = *s;
702 *p = 0;
704 else /* Need to use a translation table. */
706 #ifdef USE_GNUPG_ICONV
707 iconv_t cd;
708 const char *inptr;
709 char *outptr;
710 size_t inbytes, outbytes;
712 cd = iconv_open ("utf-8", active_charset_name);
713 if (cd == (iconv_t)-1)
715 handle_iconv_error ("utf-8", active_charset_name, 1);
716 return native_to_utf8 (string);
719 for (s=string; *s; s++ )
721 length++;
722 if ((*s & 0x80))
723 length += 5; /* We may need up to 6 bytes for the utf8 output. */
725 buffer = xmalloc (length + 1);
727 inptr = string;
728 inbytes = strlen (string);
729 outptr = buffer;
730 outbytes = length;
731 if ( iconv (cd, (ICONV_CONST char **)&inptr, &inbytes,
732 &outptr, &outbytes) == (size_t)-1)
734 static int shown;
736 if (!shown)
737 log_info (_("conversion from `%s' to `%s' failed: %s\n"),
738 active_charset_name, "utf-8", strerror (errno));
739 shown = 1;
740 /* We don't do any conversion at all but use the strings as is. */
741 strcpy (buffer, string);
743 else /* Success. */
745 *outptr = 0;
746 /* We could realloc the buffer now but I doubt that it makes
747 much sense given that it will get freed anyway soon
748 after. */
750 iconv_close (cd);
752 #else /*!USE_GNUPG_ICONV*/
753 for(s=string; *s; s++ )
755 length++;
756 if( *s & 0x80 )
757 length += 2; /* We may need up to 3 bytes. */
759 buffer = xmalloc( length + 1 );
760 for(p=buffer, s=string; *s; s++ ) {
761 if( *s & 0x80 ) {
762 ushort val = active_charset[ *s & 0x7f ];
763 if( val < 0x0800 ) {
764 *p++ = 0xc0 | ( (val >> 6) & 0x1f );
765 *p++ = 0x80 | ( val & 0x3f );
767 else {
768 *p++ = 0xe0 | ( (val >> 12) & 0x0f );
769 *p++ = 0x80 | ( (val >> 6) & 0x3f );
770 *p++ = 0x80 | ( val & 0x3f );
773 else
774 *p++ = *s;
776 *p = 0;
777 #endif /*!USE_GNUPG_ICONV*/
780 return buffer;
784 /****************
785 * Convert string, which is in UTF8 to native encoding. illegal
786 * encodings by some "\xnn" and quote all control characters. A
787 * character with value DELIM will always be quoted, it must be a
788 * vanilla ASCII character. A DELIM value of -1 is special: it disables
789 * all quoting of control characters.
791 char *
792 utf8_to_native( const char *string, size_t length, int delim )
794 int nleft;
795 int i;
796 byte encbuf[8];
797 int encidx;
798 const byte *s;
799 size_t n;
800 byte *buffer = NULL, *p = NULL;
801 unsigned long val = 0;
802 size_t slen;
803 int resync = 0;
805 /* 1. pass (p==NULL): count the extended utf-8 characters */
806 /* 2. pass (p!=NULL): create string */
807 for( ;; ) {
808 for( slen=length, nleft=encidx=0, n=0, s=string; slen; s++, slen-- ) {
809 if( resync ) {
810 if( !(*s < 128 || (*s >= 0xc0 && *s <= 0xfd)) ) {
811 /* still invalid */
812 if( p ) {
813 sprintf(p, "\\x%02x", *s );
814 p += 4;
816 n += 4;
817 continue;
819 resync = 0;
821 if( !nleft ) {
822 if( !(*s & 0x80) ) { /* plain ascii */
823 if( delim != -1
824 && (*s < 0x20 || *s == 0x7f || *s == delim
825 || (delim && *s=='\\'))) {
826 n++;
827 if( p )
828 *p++ = '\\';
829 switch( *s ) {
830 case '\n': n++; if( p ) *p++ = 'n'; break;
831 case '\r': n++; if( p ) *p++ = 'r'; break;
832 case '\f': n++; if( p ) *p++ = 'f'; break;
833 case '\v': n++; if( p ) *p++ = 'v'; break;
834 case '\b': n++; if( p ) *p++ = 'b'; break;
835 case 0 : n++; if( p ) *p++ = '0'; break;
836 default:
837 n += 3;
838 if ( p ) {
839 sprintf( p, "x%02x", *s );
840 p += 3;
842 break;
845 else {
846 if( p ) *p++ = *s;
847 n++;
850 else if( (*s & 0xe0) == 0xc0 ) { /* 110x xxxx */
851 val = *s & 0x1f;
852 nleft = 1;
853 encidx = 0;
854 encbuf[encidx++] = *s;
856 else if( (*s & 0xf0) == 0xe0 ) { /* 1110 xxxx */
857 val = *s & 0x0f;
858 nleft = 2;
859 encidx = 0;
860 encbuf[encidx++] = *s;
862 else if( (*s & 0xf8) == 0xf0 ) { /* 1111 0xxx */
863 val = *s & 0x07;
864 nleft = 3;
865 encidx = 0;
866 encbuf[encidx++] = *s;
868 else if( (*s & 0xfc) == 0xf8 ) { /* 1111 10xx */
869 val = *s & 0x03;
870 nleft = 4;
871 encidx = 0;
872 encbuf[encidx++] = *s;
874 else if( (*s & 0xfe) == 0xfc ) { /* 1111 110x */
875 val = *s & 0x01;
876 nleft = 5;
877 encidx = 0;
878 encbuf[encidx++] = *s;
880 else { /* invalid encoding: print as \xnn */
881 if( p ) {
882 sprintf(p, "\\x%02x", *s );
883 p += 4;
885 n += 4;
886 resync = 1;
889 else if( *s < 0x80 || *s >= 0xc0 ) { /* invalid */
890 if( p ) {
891 for(i=0; i < encidx; i++ ) {
892 sprintf(p, "\\x%02x", encbuf[i] );
893 p += 4;
895 sprintf(p, "\\x%02x", *s );
896 p += 4;
898 n += 4 + 4*encidx;
899 nleft = 0;
900 encidx = 0;
901 resync = 1;
903 else {
904 encbuf[encidx++] = *s;
905 val <<= 6;
906 val |= *s & 0x3f;
907 if( !--nleft ) { /* ready */
908 if (no_translation) {
909 if( p ) {
910 for(i=0; i < encidx; i++ )
911 *p++ = encbuf[i];
913 n += encidx;
914 encidx = 0;
916 #ifdef USE_GNUPG_ICONV
917 else if(use_iconv) {
918 /* Our strategy for using iconv is a bit
919 * strange but it better keeps compatibility
920 * with previous versions in regard to how
921 * invalid encodings are displayed. What we
922 * do is to keep the utf-8 as is and have the
923 * real translation step then at the end.
924 * Yes, I know that this is ugly. However we
925 * are short of the 1.4 release and for this
926 * branch we should not mee too much around
927 * with iconv things. One reason for this is
928 * that we don't know enough about non-GNU
929 * iconv implementation and want to minimize
930 * the risk of breaking the code on too many
931 * platforms. */
932 if( p ) {
933 for(i=0; i < encidx; i++ )
934 *p++ = encbuf[i];
936 n += encidx;
937 encidx = 0;
939 #endif /*USE_GNUPG_ICONV*/
940 else if( active_charset ) { /* table lookup */
941 for(i=0; i < 128; i++ ) {
942 if( active_charset[i] == val )
943 break;
945 if( i < 128 ) { /* we can print this one */
946 if( p ) *p++ = i+128;
947 n++;
949 else { /* we do not have a translation: print utf8 */
950 if( p ) {
951 for(i=0; i < encidx; i++ ) {
952 sprintf(p, "\\x%02x", encbuf[i] );
953 p += 4;
956 n += encidx*4;
957 encidx = 0;
960 else { /* native set */
961 if( val >= 0x80 && val < 256 ) {
962 n++; /* we can simply print this character */
963 if( p ) *p++ = val;
965 else { /* we do not have a translation: print utf8 */
966 if( p ) {
967 for(i=0; i < encidx; i++ ) {
968 sprintf(p, "\\x%02x", encbuf[i] );
969 p += 4;
972 n += encidx*4;
973 encidx = 0;
980 if( !buffer ) { /* allocate the buffer after the first pass */
981 buffer = p = xmalloc( n + 1 );
983 #ifdef USE_GNUPG_ICONV
984 else if(use_iconv) {
985 /* Note: See above for comments. */
986 iconv_t cd;
987 const char *inptr;
988 char *outbuf, *outptr;
989 size_t inbytes, outbytes;
991 *p = 0; /* Terminate the buffer. */
993 cd = iconv_open (active_charset_name, "utf-8");
994 if (cd == (iconv_t)-1)
996 handle_iconv_error (active_charset_name, "utf-8", 1);
997 xfree (buffer);
998 return utf8_to_native (string, length, delim);
1001 /* Allocate a new buffer large enough to hold all possible
1002 * encodings. */
1003 n = p - buffer + 1;
1004 inbytes = n - 1;;
1005 inptr = buffer;
1006 outbytes = n * MB_LEN_MAX;
1007 if (outbytes / MB_LEN_MAX != n)
1008 BUG (); /* Actually an overflow. */
1009 outbuf = outptr = xmalloc (outbytes);
1010 if ( iconv (cd, (ICONV_CONST char **)&inptr, &inbytes,
1011 &outptr, &outbytes) == (size_t)-1) {
1012 static int shown;
1014 if (!shown)
1015 log_info (_("conversion from `%s' to `%s' failed: %s\n"),
1016 "utf-8", active_charset_name, strerror (errno));
1017 shown = 1;
1018 /* Didn't worked out. Temporary disable the use of
1019 * iconv and fall back to our old code. */
1020 xfree (buffer);
1021 buffer = NULL;
1022 xfree (outbuf);
1023 use_iconv = 0;
1024 outbuf = utf8_to_native (string, length, delim);
1025 use_iconv = 1;
1027 else { /* Success. */
1028 *outptr = 0;
1029 /* We could realloc the buffer now but I doubt that it makes
1030 much sense given that it will get freed anyway soon
1031 after. */
1032 xfree (buffer);
1034 iconv_close (cd);
1035 return outbuf;
1037 #endif /*USE_GNUPG_ICONV*/
1038 else {
1039 *p = 0; /* make a string */
1040 return buffer;
1045 /****************************************************
1046 ******** locale insensitive ctype functions ********
1047 ****************************************************/
1048 /* FIXME: replace them by a table lookup and macros */
1050 ascii_isupper (int c)
1052 return c >= 'A' && c <= 'Z';
1056 ascii_islower (int c)
1058 return c >= 'a' && c <= 'z';
1061 int
1062 ascii_toupper (int c)
1064 if (c >= 'a' && c <= 'z')
1065 c &= ~0x20;
1066 return c;
1069 int
1070 ascii_tolower (int c)
1072 if (c >= 'A' && c <= 'Z')
1073 c |= 0x20;
1074 return c;
1079 ascii_strcasecmp (const char *a, const char *b)
1081 const unsigned char *p1 = (const unsigned char *)a;
1082 const unsigned char *p2 = (const unsigned char *)b;
1083 unsigned char c1, c2;
1085 if (p1 == p2)
1086 return 0;
1090 c1 = ascii_tolower (*p1);
1091 c2 = ascii_tolower (*p2);
1093 if (c1 == '\0')
1094 break;
1096 ++p1;
1097 ++p2;
1099 while (c1 == c2);
1101 return c1 - c2;
1104 int
1105 ascii_strncasecmp (const char *a, const char *b, size_t n)
1107 const unsigned char *p1 = (const unsigned char *)a;
1108 const unsigned char *p2 = (const unsigned char *)b;
1109 unsigned char c1, c2;
1111 if (p1 == p2 || !n )
1112 return 0;
1116 c1 = ascii_tolower (*p1);
1117 c2 = ascii_tolower (*p2);
1119 if ( !--n || c1 == '\0')
1120 break;
1122 ++p1;
1123 ++p2;
1125 while (c1 == c2);
1127 return c1 - c2;
1132 ascii_memcasecmp( const char *a, const char *b, size_t n )
1134 if (a == b)
1135 return 0;
1136 for ( ; n; n--, a++, b++ ) {
1137 if( *a != *b && ascii_toupper (*a) != ascii_toupper (*b) )
1138 return *a == *b? 0 : (ascii_toupper (*a) - ascii_toupper (*b));
1140 return 0;
1145 /*********************************************
1146 ********** missing string functions *********
1147 *********************************************/
1149 #ifndef HAVE_STPCPY
1150 char *
1151 stpcpy(char *a,const char *b)
1153 while( *b )
1154 *a++ = *b++;
1155 *a = 0;
1157 return (char*)a;
1159 #endif
1162 #ifndef HAVE_STRSEP
1163 /* code taken from glibc-2.2.1/sysdeps/generic/strsep.c */
1164 char *
1165 strsep (char **stringp, const char *delim)
1167 char *begin, *end;
1169 begin = *stringp;
1170 if (begin == NULL)
1171 return NULL;
1173 /* A frequent case is when the delimiter string contains only one
1174 character. Here we don't need to call the expensive `strpbrk'
1175 function and instead work using `strchr'. */
1176 if (delim[0] == '\0' || delim[1] == '\0')
1178 char ch = delim[0];
1180 if (ch == '\0')
1181 end = NULL;
1182 else
1184 if (*begin == ch)
1185 end = begin;
1186 else if (*begin == '\0')
1187 end = NULL;
1188 else
1189 end = strchr (begin + 1, ch);
1192 else
1193 /* Find the end of the token. */
1194 end = strpbrk (begin, delim);
1196 if (end)
1198 /* Terminate the token and set *STRINGP past NUL character. */
1199 *end++ = '\0';
1200 *stringp = end;
1202 else
1203 /* No more delimiters; this is the last token. */
1204 *stringp = NULL;
1206 return begin;
1208 #endif /*HAVE_STRSEP*/
1211 #ifndef HAVE_STRLWR
1212 char *
1213 strlwr(char *s)
1215 char *p;
1216 for(p=s; *p; p++ )
1217 *p = tolower(*(unsigned char *)p);
1218 return s;
1220 #endif
1222 #ifndef HAVE_STRCASECMP
1224 strcasecmp( const char *a, const char *b )
1226 for( ; *a && *b; a++, b++ ) {
1227 if( *a != *b
1228 && toupper(*(const byte *)a) != toupper(*(const byte *)b) )
1229 break;
1231 return *(const byte*)a - *(const byte*)b;
1233 #endif
1235 #ifndef HAVE_STRNCASECMP
1237 strncasecmp( const char *a, const char *b, size_t n )
1239 for( ; n && *a && *b; a++, b++, n--) {
1240 if( *a != *b
1241 && toupper(*(const byte *)a) != toupper(*(const byte *)b) )
1242 break;
1244 if (!n)
1245 return 0;
1246 return *(const byte*)a - *(const byte*)b;
1248 #endif
1251 #ifdef _WIN32
1253 * Like vsprintf but provides a pointer to malloc'd storage, which
1254 * must be freed by the caller (xfree). Taken from libiberty as
1255 * found in gcc-2.95.2 and a little bit modernized.
1256 * FIXME: Write a new CRT for W32.
1259 vasprintf (char **result, const char *format, va_list args)
1261 const char *p = format;
1262 /* Add one to make sure that it is never zero, which might cause malloc
1263 to return NULL. */
1264 int total_width = strlen (format) + 1;
1265 va_list ap;
1267 /* this is not really portable but works under Windows */
1268 memcpy ( &ap, &args, sizeof (va_list));
1270 while (*p != '\0')
1272 if (*p++ == '%')
1274 while (strchr ("-+ #0", *p))
1275 ++p;
1276 if (*p == '*')
1278 ++p;
1279 total_width += abs (va_arg (ap, int));
1281 else
1283 char *endp;
1284 total_width += strtoul (p, &endp, 10);
1285 p = endp;
1287 if (*p == '.')
1289 ++p;
1290 if (*p == '*')
1292 ++p;
1293 total_width += abs (va_arg (ap, int));
1295 else
1297 char *endp;
1298 total_width += strtoul (p, &endp, 10);
1299 p = endp;
1302 while (strchr ("hlL", *p))
1303 ++p;
1304 /* Should be big enough for any format specifier except %s
1305 and floats. */
1306 total_width += 30;
1307 switch (*p)
1309 case 'd':
1310 case 'i':
1311 case 'o':
1312 case 'u':
1313 case 'x':
1314 case 'X':
1315 case 'c':
1316 (void) va_arg (ap, int);
1317 break;
1318 case 'f':
1319 case 'e':
1320 case 'E':
1321 case 'g':
1322 case 'G':
1323 (void) va_arg (ap, double);
1324 /* Since an ieee double can have an exponent of 307, we'll
1325 make the buffer wide enough to cover the gross case. */
1326 total_width += 307;
1328 case 's':
1329 total_width += strlen (va_arg (ap, char *));
1330 break;
1331 case 'p':
1332 case 'n':
1333 (void) va_arg (ap, char *);
1334 break;
1338 *result = xmalloc (total_width);
1339 if (*result != NULL)
1340 return vsprintf (*result, format, args);
1341 else
1342 return 0;
1346 asprintf (char **buf, const char *fmt, ...)
1348 int status;
1349 va_list ap;
1351 va_start (ap, fmt);
1352 status = vasprintf (buf, fmt, ap);
1353 va_end (ap);
1354 return status;
1357 const char *
1358 w32_strerror (int w32_errno)
1360 static char strerr[256];
1361 int ec = (int)GetLastError ();
1363 if (w32_errno == 0)
1364 w32_errno = ec;
1365 FormatMessage (FORMAT_MESSAGE_FROM_SYSTEM, NULL, w32_errno,
1366 MAKELANGID (LANG_NEUTRAL, SUBLANG_DEFAULT),
1367 strerr, DIM (strerr)-1, NULL);
1368 return strerr;
1370 #endif /*_WIN32*/