util/strgutil.c

   1 /* strgutil.c -  string utilities
   2  * Copyright (C) 1994, 1998, 1999, 2000, 2001,
   3  *               2003, 2004, 2005 Free Software Foundation, Inc.
   4  *
   5  * This file is part of GnuPG.
   6  *
   7  * GnuPG is free software; you can redistribute it and/or modify
   8  * it under the terms of the GNU General Public License as published by
   9  * the Free Software Foundation; either version 2 of the License, or
  10  * (at your option) any later version.
  11  *
  12  * GnuPG is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  * GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with this program; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
  20  * USA.
  21  */
  22
  23 #include <config.h>
  24 #include <stdlib.h>
  25 #include <string.h>
  26 #include <ctype.h>
  27 #include <errno.h>
  28 #ifdef HAVE_LANGINFO_CODESET
  29 #include <langinfo.h>
  30 #endif
  31
  32 /* For W32 we use dynamic loading of the iconv dll and don't need any
  33  * iconv headers at all. */
  34 #ifndef _WIN32
  35 # ifndef HAVE_ICONV
  36 #  undef USE_GNUPG_ICONV
  37 # endif
  38 #endif
  39
  40 #ifdef USE_GNUPG_ICONV
  41 # include <limits.h>
  42 # ifndef _WIN32
  43 #  include <iconv.h>
  44 # endif
  45 #endif
  46
  47 #include "types.h"
  48 #include "util.h"
  49 #include "memory.h"
  50 #include "i18n.h"
  51 #include "dynload.h"
  52
  53
  54 #ifndef USE_GNUPG_ICONV
  55 static ushort koi8_unicode[128] = {
  56     0x2500,0x2502,0x250c,0x2510,0x2514,0x2518,0x251c,0x2524,
  57     0x252c,0x2534,0x253c,0x2580,0x2584,0x2588,0x258c,0x2590,
  58     0x2591,0x2592,0x2593,0x2320,0x25a0,0x2219,0x221a,0x2248,
  59     0x2264,0x2265,0x00a0,0x2321,0x00b0,0x00b2,0x00b7,0x00f7,
  60     0x2550,0x2551,0x2552,0x0451,0x2553,0x2554,0x2555,0x2556,
  61     0x2557,0x2558,0x2559,0x255a,0x255b,0x255c,0x255d,0x255e,
  62     0x255f,0x2560,0x2561,0x0401,0x2562,0x2563,0x2564,0x2565,
  63     0x2566,0x2567,0x2568,0x2569,0x256a,0x256b,0x256c,0x00a9,
  64     0x044e,0x0430,0x0431,0x0446,0x0434,0x0435,0x0444,0x0433,
  65     0x0445,0x0438,0x0439,0x043a,0x043b,0x043c,0x043d,0x043e,
  66     0x043f,0x044f,0x0440,0x0441,0x0442,0x0443,0x0436,0x0432,
  67     0x044c,0x044b,0x0437,0x0448,0x044d,0x0449,0x0447,0x044a,
  68     0x042e,0x0410,0x0411,0x0426,0x0414,0x0415,0x0424,0x0413,
  69     0x0425,0x0418,0x0419,0x041a,0x041b,0x041c,0x041d,0x041e,
  70     0x041f,0x042f,0x0420,0x0421,0x0422,0x0423,0x0416,0x0412,
  71     0x042c,0x042b,0x0417,0x0428,0x042d,0x0429,0x0427,0x042a
  72 };
  73
  74 static ushort latin2_unicode[128] = {
  75     0x0080,0x0081,0x0082,0x0083,0x0084,0x0085,0x0086,0x0087,
  76     0x0088,0x0089,0x008A,0x008B,0x008C,0x008D,0x008E,0x008F,
  77     0x0090,0x0091,0x0092,0x0093,0x0094,0x0095,0x0096,0x0097,
  78     0x0098,0x0099,0x009A,0x009B,0x009C,0x009D,0x009E,0x009F,
  79     0x00A0,0x0104,0x02D8,0x0141,0x00A4,0x013D,0x015A,0x00A7,
  80     0x00A8,0x0160,0x015E,0x0164,0x0179,0x00AD,0x017D,0x017B,
  81     0x00B0,0x0105,0x02DB,0x0142,0x00B4,0x013E,0x015B,0x02C7,
  82     0x00B8,0x0161,0x015F,0x0165,0x017A,0x02DD,0x017E,0x017C,
  83     0x0154,0x00C1,0x00C2,0x0102,0x00C4,0x0139,0x0106,0x00C7,
  84     0x010C,0x00C9,0x0118,0x00CB,0x011A,0x00CD,0x00CE,0x010E,
  85     0x0110,0x0143,0x0147,0x00D3,0x00D4,0x0150,0x00D6,0x00D7,
  86     0x0158,0x016E,0x00DA,0x0170,0x00DC,0x00DD,0x0162,0x00DF,
  87     0x0155,0x00E1,0x00E2,0x0103,0x00E4,0x013A,0x0107,0x00E7,
  88     0x010D,0x00E9,0x0119,0x00EB,0x011B,0x00ED,0x00EE,0x010F,
  89     0x0111,0x0144,0x0148,0x00F3,0x00F4,0x0151,0x00F6,0x00F7,
  90     0x0159,0x016F,0x00FA,0x0171,0x00FC,0x00FD,0x0163,0x02D9
  91 };
  92 #endif /*!USE_GNUPG_ICONV*/
  93
  94
  95 #ifndef MB_LEN_MAX
  96 #define MB_LEN_MAX 16
  97 #endif
  98
  99
 100 static const char *active_charset_name = "iso-8859-1";
 101 static ushort *active_charset = NULL;
 102 static int no_translation = 0;
 103 static int use_iconv = 0;
 104
 105
 106 #ifdef _WIN32
 107 typedef void* iconv_t;
 108 #ifndef ICONV_CONST
 109 #define ICONV_CONST const
 110 #endif
 111
 112 iconv_t (* __stdcall iconv_open) (const char *tocode, const char *fromcode);
 113 size_t  (* __stdcall iconv) (iconv_t cd,
 114                              const char **inbuf, size_t *inbytesleft,
 115                              char **outbuf, size_t *outbytesleft);
 116 int     (* __stdcall iconv_close) (iconv_t cd);
 117
 118 #endif /*_WIN32*/
 119
 120
 121
 122 #ifdef _WIN32
 123 static int
 124 load_libiconv (void)
 125 {
 126   static int done;
 127
 128   if (!done)
 129     {
 130       void *handle;
 131
 132       done = 1; /* Do it right now because we might get called recursivly
 133                    through gettext.  */
 134
 135       handle = dlopen ("iconv.dll", RTLD_LAZY);
 136       if (handle)
 137         {
 138           iconv_open  = dlsym (handle, "libiconv_open");
 139           if (iconv_open)
 140             iconv      = dlsym (handle, "libiconv");
 141           if (iconv)
 142             iconv_close = dlsym (handle, "libiconv_close");
 143         }
 144       if (!handle || !iconv_close)
 145         {
 146           log_info (_("error loading `%s': %s\n"),
 147                      "iconv.dll",  dlerror ());
 148           log_info(_("please see http://www.gnupg.org/download/iconv.html "
 149                      "for more information\n"));
 150           iconv_open = NULL;
 151           iconv = NULL;
 152           iconv_close = NULL;
 153           if (handle)
 154               dlclose (handle);
 155         }
 156     }
 157   return iconv_open? 0: -1;
 158 }
 159 #endif /* _WIN32 */
 160
 161
 162
 163
 164 void
 165 free_strlist( STRLIST sl )
 166 {
 167     STRLIST sl2;
 168
 169     for(; sl; sl = sl2 ) {
 170         sl2 = sl->next;
 171         xfree(sl);
 172     }
 173 }
 174
 175
 176 STRLIST
 177 add_to_strlist( STRLIST *list, const char *string )
 178 {
 179     STRLIST sl;
 180
 181     sl = xmalloc( sizeof *sl + strlen(string));
 182     sl->flags = 0;
 183     strcpy(sl->d, string);
 184     sl->next = *list;
 185     *list = sl;
 186     return sl;
 187 }
 188
 189 /****************
 190  * Same as add_to_strlist() but if is_utf8 is *not* set a conversion
 191  * to UTF8 is done
 192  */
 193 STRLIST
 194 add_to_strlist2( STRLIST *list, const char *string, int is_utf8 )
 195 {
 196     STRLIST sl;
 197
 198     if( is_utf8 )
 199         sl = add_to_strlist( list, string );
 200     else {
 201         char *p = native_to_utf8( string );
 202         sl = add_to_strlist( list, p );
 203         xfree( p );
 204     }
 205     return sl;
 206 }
 207
 208 STRLIST
 209 append_to_strlist( STRLIST *list, const char *string )
 210 {
 211     STRLIST r, sl;
 212
 213     sl = xmalloc( sizeof *sl + strlen(string));
 214     sl->flags = 0;
 215     strcpy(sl->d, string);
 216     sl->next = NULL;
 217     if( !*list )
 218         *list = sl;
 219     else {
 220         for( r = *list; r->next; r = r->next )
 221             ;
 222         r->next = sl;
 223     }
 224     return sl;
 225 }
 226
 227 STRLIST
 228 append_to_strlist2( STRLIST *list, const char *string, int is_utf8 )
 229 {
 230     STRLIST sl;
 231
 232     if( is_utf8 )
 233         sl = append_to_strlist( list, string );
 234     else {
 235         char *p = native_to_utf8( string );
 236         sl = append_to_strlist( list, p );
 237         xfree( p );
 238     }
 239     return sl;
 240 }
 241
 242
 243 STRLIST
 244 strlist_prev( STRLIST head, STRLIST node )
 245 {
 246     STRLIST n;
 247
 248     for(n=NULL; head && head != node; head = head->next )
 249         n = head;
 250     return n;
 251 }
 252
 253 STRLIST
 254 strlist_last( STRLIST node )
 255 {
 256     if( node )
 257         for( ; node->next ; node = node->next )
 258             ;
 259     return node;
 260 }
 261
 262 char *
 263 pop_strlist( STRLIST *list )
 264 {
 265   char *str=NULL;
 266   STRLIST sl=*list;
 267
 268   if(sl)
 269     {
 270       str=xmalloc(strlen(sl->d)+1);
 271       strcpy(str,sl->d);
 272
 273       *list=sl->next;
 274       xfree(sl);
 275     }
 276
 277   return str;
 278 }
 279
 280 /****************
 281  * Look for the substring SUB in buffer and return a pointer to that
 282  * substring in BUF or NULL if not found.
 283  * Comparison is case-insensitive.
 284  */
 285 const char *
 286 memistr( const char *buf, size_t buflen, const char *sub )
 287 {
 288     const byte *t, *s ;
 289     size_t n;
 290
 291     for( t=buf, n=buflen, s=sub ; n ; t++, n-- )
 292         if( toupper(*t) == toupper(*s) ) {
 293             for( buf=t++, buflen = n--, s++;
 294                  n && toupper(*t) == toupper(*s); t++, s++, n-- )
 295                 ;
 296             if( !*s )
 297                 return buf;
 298             t = buf; n = buflen; s = sub ;
 299         }
 300
 301     return NULL ;
 302 }
 303
 304 const char *
 305 ascii_memistr( const char *buf, size_t buflen, const char *sub )
 306 {
 307     const byte *t, *s ;
 308     size_t n;
 309
 310     for( t=buf, n=buflen, s=sub ; n ; t++, n-- )
 311         if( ascii_toupper(*t) == ascii_toupper(*s) ) {
 312             for( buf=t++, buflen = n--, s++;
 313                  n && ascii_toupper(*t) == ascii_toupper(*s); t++, s++, n-- )
 314                 ;
 315             if( !*s )
 316                 return buf;
 317             t = buf; n = buflen; s = sub ;
 318         }
 319
 320     return NULL ;
 321 }
 322
 323
 324 /* Like strncpy() but copy at max N-1 bytes and append a '\0'.  With
 325  * N given as 0 nothing is copied at all. With DEST given as NULL
 326  * sufficient memory is allocated using xmalloc (note that xmalloc is
 327  * guaranteed to succeed or to abort the process).  */
 328 char *
 329 mem2str( char *dest , const void *src , size_t n )
 330 {
 331     char *d;
 332     const char *s;
 333
 334     if( n ) {
 335         if( !dest )
 336             dest = xmalloc( n ) ;
 337         d = dest;
 338         s = src ;
 339         for(n--; n && *s; n-- )
 340             *d++ = *s++;
 341         *d = '\0' ;
 342     }
 343
 344     return dest ;
 345 }
 346
 347
 348 /*
 349  * Remove leading and trailing white spaces
 350  */
 351 char *
 352 trim_spaces( char *str )
 353 {
 354     char *string, *p, *mark;
 355
 356     string = str;
 357     /* Find first non space character. */
 358     for( p=string; *p && isspace( *(byte*)p ) ; p++ )
 359         ;
 360     /* Move characters. */
 361     for( (mark = NULL); (*string = *p); string++, p++ )
 362         if( isspace( *(byte*)p ) ) {
 363             if( !mark )
 364                 mark = string ;
 365         }
 366         else
 367             mark = NULL ;
 368     if( mark )
 369         *mark = '\0' ;  /* Remove trailing spaces.  */
 370
 371     return str ;
 372 }
 373
 374
 375
 376 unsigned int
 377 trim_trailing_chars( byte *line, unsigned len, const char *trimchars )
 378 {
 379     byte *p, *mark;
 380     unsigned n;
 381
 382     for(mark=NULL, p=line, n=0; n < len; n++, p++ ) {
 383         if( strchr(trimchars, *p ) ) {
 384             if( !mark )
 385                 mark = p;
 386         }
 387         else
 388             mark = NULL;
 389     }
 390
 391     if( mark ) {
 392         *mark = 0;
 393         return mark - line;
 394     }
 395     return len;
 396 }
 397
 398 /****************
 399  * Remove trailing white spaces and return the length of the buffer
 400  */
 401 unsigned
 402 trim_trailing_ws( byte *line, unsigned len )
 403 {
 404     return trim_trailing_chars( line, len, " \t\r\n" );
 405 }
 406
 407
 408 unsigned int
 409 check_trailing_chars( const byte *line, unsigned int len,
 410                       const char *trimchars )
 411 {
 412     const byte *p, *mark;
 413     unsigned int n;
 414
 415     for(mark=NULL, p=line, n=0; n < len; n++, p++ ) {
 416         if( strchr(trimchars, *p ) ) {
 417             if( !mark )
 418                 mark = p;
 419         }
 420         else
 421             mark = NULL;
 422     }
 423
 424     if( mark ) {
 425         return mark - line;
 426     }
 427     return len;
 428 }
 429
 430
 431 /****************
 432  * Remove trailing white spaces and return the length of the buffer
 433  */
 434 unsigned int
 435 check_trailing_ws( const byte *line, unsigned int len )
 436 {
 437     return check_trailing_chars( line, len, " \t\r\n" );
 438 }
 439
 440
 441
 442 int
 443 string_count_chr( const char *string, int c )
 444 {
 445     int count;
 446     for(count=0; *string; string++ )
 447         if( *string == c )
 448             count++;
 449     return count;
 450 }
 451
 452 #ifdef USE_GNUPG_ICONV
 453 static void
 454 handle_iconv_error (const char *to, const char *from, int use_fallback)
 455 {
 456   if (errno == EINVAL)
 457     {
 458       static int shown1, shown2;
 459       int x;
 460
 461       if (to && !strcmp (to, "utf-8"))
 462         {
 463           x = shown1;
 464           shown1 = 1;
 465         }
 466       else
 467         {
 468           x = shown2;
 469           shown2 = 1;
 470         }
 471
 472       if (!x)
 473         log_info (_("conversion from `%s' to `%s' not available\n"),
 474                   from, to);
 475     }
 476   else
 477     {
 478       static int shown;
 479
 480       if (!shown)
 481         log_info (_("iconv_open failed: %s\n"), strerror (errno));
 482       shown = 1;
 483     }
 484
 485   if (use_fallback)
 486     {
 487       /* To avoid further error messages we fallback to Latin-1 for the
 488          native encoding.  This is justified as one can expect that on a
 489          utf-8 enabled system nl_langinfo() will work and thus we won't
 490          never get to here.  Thus Latin-1 seems to be a reasonable
 491          default.  */
 492       active_charset_name = "iso-8859-1";
 493       no_translation = 0;
 494       active_charset = NULL;
 495       use_iconv = 0;
 496     }
 497 }
 498 #endif /*USE_GNUPG_ICONV*/
 499
 500 int
 501 set_native_charset( const char *newset )
 502 {
 503     const char *full_newset;
 504
 505     if (!newset) {
 506 #ifdef _WIN32
 507         static char codepage[30];
 508         unsigned int cpno;
 509         const char *aliases;
 510
 511         /* We are a console program thus we need to use the
 512            GetConsoleOutputCP function and not the the GetACP which
 513            would give the codepage for a GUI program.  Note this is
 514            not a bulletproof detection because GetConsoleCP might
 515            return a different one for console input.  Not sure how to
 516            cope with that.  If the console Code page is not known we
 517            fall back to the system code page.  */
 518         cpno = GetConsoleOutputCP ();
 519         if (!cpno)
 520           cpno = GetACP ();
 521         sprintf (codepage, "CP%u", cpno );
 522         /* Resolve alias.  We use a long string string and not the
 523            usual array to optimize if the code is taken to a DSO.
 524            Taken from libiconv 1.9.2. */
 525         newset = codepage;
 526         for (aliases = ("CP936"   "\0" "GBK" "\0"
 527                         "CP1361"  "\0" "JOHAB" "\0"
 528                         "CP20127" "\0" "ASCII" "\0"
 529                         "CP20866" "\0" "KOI8-R" "\0"
 530                         "CP21866" "\0" "KOI8-RU" "\0"
 531                         "CP28591" "\0" "ISO-8859-1" "\0"
 532                         "CP28592" "\0" "ISO-8859-2" "\0"
 533                         "CP28593" "\0" "ISO-8859-3" "\0"
 534                         "CP28594" "\0" "ISO-8859-4" "\0"
 535                         "CP28595" "\0" "ISO-8859-5" "\0"
 536                         "CP28596" "\0" "ISO-8859-6" "\0"
 537                         "CP28597" "\0" "ISO-8859-7" "\0"
 538                         "CP28598" "\0" "ISO-8859-8" "\0"
 539                         "CP28599" "\0" "ISO-8859-9" "\0"
 540                         "CP28605" "\0" "ISO-8859-15" "\0"
 541                         "CP65001" "\0" "UTF-8" "\0");
 542              *aliases;
 543              aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)
 544           {
 545             if (!strcmp (codepage, aliases) ||(*aliases == '*' && !aliases[1]))
 546               {
 547                 newset = aliases + strlen (aliases) + 1;
 548                 break;
 549               }
 550           }
 551
 552 #else
 553 #ifdef HAVE_LANGINFO_CODESET
 554         newset = nl_langinfo (CODESET);
 555 #else /* !HAVE_LANGINFO_CODESET */
 556         /* Try to get the used charset from environment variables.  */
 557         static char codepage[30];
 558         const char *lc, *dot, *mod;
 559
 560         strcpy (codepage, "iso-8859-1");
 561         lc = getenv ("LC_ALL");
 562         if (!lc || !*lc) {
 563             lc = getenv ("LC_CTYPE");
 564             if (!lc || !*lc)
 565                 lc = getenv ("LANG");
 566         }
 567         if (lc && *lc) {
 568             dot = strchr (lc, '.');
 569             if (dot) {
 570                 mod = strchr (++dot, '@');
 571                 if (!mod)
 572                     mod = dot + strlen (dot);
 573                 if (mod - dot < sizeof codepage && dot != mod) {
 574                     memcpy (codepage, dot, mod - dot);
 575                     codepage [mod - dot] = 0;
 576                 }
 577             }
 578         }
 579         newset = codepage;
 580 #endif  /* !HAVE_LANGINFO_CODESET */
 581 #endif
 582     }
 583
 584     full_newset = newset;
 585     if (strlen (newset) > 3 && !ascii_memcasecmp (newset, "iso", 3)) {
 586         newset += 3;
 587         if (*newset == '-' || *newset == '_')
 588             newset++;
 589     }
 590
 591     /* Note that we silently assume that plain ASCII is actually meant
 592        as Latin-1.  This makes sense because many Unix system don't
 593        have their locale set up properly and thus would get annoying
 594        error messages and we have to handle all the "bug"
 595        reports. Latin-1 has always been the character set used for 8
 596        bit characters on Unix systems. */
 597     if( !*newset
 598         || !ascii_strcasecmp (newset, "8859-1" )
 599         || !ascii_strcasecmp (newset, "646" )
 600         || !ascii_strcasecmp (newset, "ASCII" )
 601         || !ascii_strcasecmp (newset, "ANSI_X3.4-1968" )
 602         ) {
 603         active_charset_name = "iso-8859-1";
 604         no_translation = 0;
 605         active_charset = NULL;
 606         use_iconv = 0;
 607     }
 608     else if( !ascii_strcasecmp (newset, "utf8" )
 609              || !ascii_strcasecmp(newset, "utf-8") ) {
 610         active_charset_name = "utf-8";
 611         no_translation = 1;
 612         active_charset = NULL;
 613         use_iconv = 0;
 614     }
 615 #ifdef USE_GNUPG_ICONV
 616     else {
 617       iconv_t cd;
 618
 619 #ifdef _WIN32
 620       if (load_libiconv ())
 621           return G10ERR_GENERAL;
 622 #endif /*_WIN32*/
 623
 624       cd = iconv_open (full_newset, "utf-8");
 625       if (cd == (iconv_t)-1) {
 626           handle_iconv_error (full_newset, "utf-8", 0);
 627           return G10ERR_GENERAL;
 628       }
 629       iconv_close (cd);
 630       cd = iconv_open ("utf-8", full_newset);
 631       if (cd == (iconv_t)-1) {
 632           handle_iconv_error ("utf-8", full_newset, 0);
 633           return G10ERR_GENERAL;
 634       }
 635       iconv_close (cd);
 636       active_charset_name = full_newset;
 637       no_translation = 0;
 638       active_charset = NULL;
 639       use_iconv = 1;
 640     }
 641 #else /*!USE_GNUPG_ICONV*/
 642     else if( !ascii_strcasecmp( newset, "8859-2" ) ) {
 643         active_charset_name = "iso-8859-2";
 644         no_translation = 0;
 645         active_charset = latin2_unicode;
 646         use_iconv = 0;
 647     }
 648     else if( !ascii_strcasecmp( newset, "koi8-r" ) ) {
 649         active_charset_name = "koi8-r";
 650         no_translation = 0;
 651         active_charset = koi8_unicode;
 652         use_iconv = 0;
 653     }
 654     else
 655         return G10ERR_GENERAL;
 656 #endif /*!USE_GNUPG_ICONV*/
 657     return 0;
 658 }
 659
 660 const char*
 661 get_native_charset()
 662 {
 663     return active_charset_name;
 664 }
 665
 666 /****************
 667  * Convert string, which is in native encoding to UTF8 and return the
 668  * new allocated UTF8 string.
 669  */
 670 char *
 671 native_to_utf8( const char *string )
 672 {
 673   const byte *s;
 674   char *buffer;
 675   byte *p;
 676   size_t length=0;
 677
 678   if (no_translation)
 679     { /* Already utf-8 encoded. */
 680       buffer = xstrdup (string);
 681     }
 682   else if( !active_charset && !use_iconv) /* Shortcut implementation
 683                                              for Latin-1.  */
 684     {
 685       for(s=string; *s; s++ )
 686         {
 687           length++;
 688           if( *s & 0x80 )
 689             length++;
 690         }
 691       buffer = xmalloc( length + 1 );
 692       for(p=buffer, s=string; *s; s++ )
 693         {
 694           if( *s & 0x80 )
 695             {
 696               *p++ = 0xc0 | ((*s >> 6) & 3);
 697               *p++ = 0x80 | ( *s & 0x3f );
 698             }
 699           else
 700             *p++ = *s;
 701         }
 702       *p = 0;
 703     }
 704   else       /* Need to use a translation table. */
 705     {
 706 #ifdef USE_GNUPG_ICONV
 707       iconv_t cd;
 708       const char *inptr;
 709       char *outptr;
 710       size_t inbytes, outbytes;
 711
 712       cd = iconv_open ("utf-8", active_charset_name);
 713       if (cd == (iconv_t)-1)
 714         {
 715           handle_iconv_error ("utf-8", active_charset_name, 1);
 716           return native_to_utf8 (string);
 717         }
 718
 719       for (s=string; *s; s++ )
 720         {
 721           length++;
 722           if ((*s & 0x80))
 723             length += 5; /* We may need up to 6 bytes for the utf8 output. */
 724         }
 725       buffer = xmalloc (length + 1);
 726
 727       inptr = string;
 728       inbytes = strlen (string);
 729       outptr = buffer;
 730       outbytes = length;
 731       if ( iconv (cd, (ICONV_CONST char **)&inptr, &inbytes,
 732                   &outptr, &outbytes) == (size_t)-1)
 733         {
 734           static int shown;
 735
 736           if (!shown)
 737             log_info (_("conversion from `%s' to `%s' failed: %s\n"),
 738                       active_charset_name, "utf-8", strerror (errno));
 739           shown = 1;
 740           /* We don't do any conversion at all but use the strings as is. */
 741           strcpy (buffer, string);
 742         }
 743       else /* Success.  */
 744         {
 745           *outptr = 0;
 746           /* We could realloc the buffer now but I doubt that it makes
 747              much sense given that it will get freed anyway soon
 748              after.  */
 749         }
 750       iconv_close (cd);
 751
 752 #else /*!USE_GNUPG_ICONV*/
 753       for(s=string; *s; s++ )
 754         {
 755           length++;
 756           if( *s & 0x80 )
 757             length += 2; /* We may need up to 3 bytes. */
 758         }
 759       buffer = xmalloc( length + 1 );
 760       for(p=buffer, s=string; *s; s++ ) {
 761         if( *s & 0x80 ) {
 762           ushort val = active_charset[ *s & 0x7f ];
 763           if( val < 0x0800 ) {
 764             *p++ = 0xc0 | ( (val >> 6) & 0x1f );
 765             *p++ = 0x80 | (  val & 0x3f );
 766           }
 767           else {
 768             *p++ = 0xe0 | ( (val >> 12) & 0x0f );
 769             *p++ = 0x80 | ( (val >>  6) & 0x3f );
 770             *p++ = 0x80 | (  val & 0x3f );
 771           }
 772         }
 773         else
 774           *p++ = *s;
 775       }
 776       *p = 0;
 777 #endif /*!USE_GNUPG_ICONV*/
 778
 779     }
 780   return buffer;
 781 }
 782
 783
 784 /****************
 785  * Convert string, which is in UTF8 to native encoding.  illegal
 786  * encodings by some "\xnn" and quote all control characters. A
 787  * character with value DELIM will always be quoted, it must be a
 788  * vanilla ASCII character.  A DELIM value of -1 is special: it disables
 789  * all quoting of control characters.
 790  */
 791 char *
 792 utf8_to_native( const char *string, size_t length, int delim )
 793 {
 794     int nleft;
 795     int i;
 796     byte encbuf[8];
 797     int encidx;
 798     const byte *s;
 799     size_t n;
 800     byte *buffer = NULL, *p = NULL;
 801     unsigned long val = 0;
 802     size_t slen;
 803     int resync = 0;
 804
 805     /* 1. pass (p==NULL): count the extended utf-8 characters */
 806     /* 2. pass (p!=NULL): create string */
 807     for( ;; ) {
 808         for( slen=length, nleft=encidx=0, n=0, s=string; slen; s++, slen-- ) {
 809             if( resync ) {
 810                 if( !(*s < 128 || (*s >= 0xc0 && *s <= 0xfd)) ) {
 811                     /* still invalid */
 812                     if( p ) {
 813                         sprintf(p, "\\x%02x", *s );
 814                         p += 4;
 815                     }
 816                     n += 4;
 817                     continue;
 818                 }
 819                 resync = 0;
 820             }
 821             if( !nleft ) {
 822                 if( !(*s & 0x80) ) { /* plain ascii */
 823                     if( delim != -1
 824                         && (*s < 0x20 || *s == 0x7f || *s == delim
 825                             || (delim && *s=='\\'))) {
 826                         n++;
 827                         if( p )
 828                             *p++ = '\\';
 829                         switch( *s ) {
 830                           case '\n': n++; if( p ) *p++ = 'n'; break;
 831                           case '\r': n++; if( p ) *p++ = 'r'; break;
 832                           case '\f': n++; if( p ) *p++ = 'f'; break;
 833                           case '\v': n++; if( p ) *p++ = 'v'; break;
 834                           case '\b': n++; if( p ) *p++ = 'b'; break;
 835                           case   0 : n++; if( p ) *p++ = '0'; break;
 836                           default:
 837                             n += 3;
 838                             if ( p ) {
 839                                 sprintf( p, "x%02x", *s );
 840                                 p += 3;
 841                             }
 842                             break;
 843                         }
 844                     }
 845                     else {
 846                         if( p ) *p++ = *s;
 847                         n++;
 848                     }
 849                 }
 850                 else if( (*s & 0xe0) == 0xc0 ) { /* 110x xxxx */
 851                     val = *s & 0x1f;
 852                     nleft = 1;
 853                     encidx = 0;
 854                     encbuf[encidx++] = *s;
 855                 }
 856                 else if( (*s & 0xf0) == 0xe0 ) { /* 1110 xxxx */
 857                     val = *s & 0x0f;
 858                     nleft = 2;
 859                     encidx = 0;
 860                     encbuf[encidx++] = *s;
 861                 }
 862                 else if( (*s & 0xf8) == 0xf0 ) { /* 1111 0xxx */
 863                     val = *s & 0x07;
 864                     nleft = 3;
 865                     encidx = 0;
 866                     encbuf[encidx++] = *s;
 867                 }
 868                 else if( (*s & 0xfc) == 0xf8 ) { /* 1111 10xx */
 869                     val = *s & 0x03;
 870                     nleft = 4;
 871                     encidx = 0;
 872                     encbuf[encidx++] = *s;
 873                 }
 874                 else if( (*s & 0xfe) == 0xfc ) { /* 1111 110x */
 875                     val = *s & 0x01;
 876                     nleft = 5;
 877                     encidx = 0;
 878                     encbuf[encidx++] = *s;
 879                 }
 880                 else {  /* invalid encoding: print as \xnn */
 881                     if( p ) {
 882                         sprintf(p, "\\x%02x", *s );
 883                         p += 4;
 884                     }
 885                     n += 4;
 886                     resync = 1;
 887                 }
 888             }
 889             else if( *s < 0x80 || *s >= 0xc0 ) { /* invalid */
 890                 if( p ) {
 891                     for(i=0; i < encidx; i++ ) {
 892                         sprintf(p, "\\x%02x", encbuf[i] );
 893                         p += 4;
 894                     }
 895                     sprintf(p, "\\x%02x", *s );
 896                     p += 4;
 897                 }
 898                 n += 4 + 4*encidx;
 899                 nleft = 0;
 900                 encidx = 0;
 901                 resync = 1;
 902             }
 903             else {
 904                 encbuf[encidx++] = *s;
 905                 val <<= 6;
 906                 val |= *s & 0x3f;
 907                 if( !--nleft ) { /* ready */
 908                     if (no_translation) {
 909                         if( p ) {
 910                             for(i=0; i < encidx; i++ )
 911                                 *p++ = encbuf[i];
 912                         }
 913                         n += encidx;
 914                         encidx = 0;
 915                     }
 916 #ifdef USE_GNUPG_ICONV
 917                     else if(use_iconv) {
 918                         /* Our strategy for using iconv is a bit
 919                          * strange but it better keeps compatibility
 920                          * with previous versions in regard to how
 921                          * invalid encodings are displayed.  What we
 922                          * do is to keep the utf-8 as is and have the
 923                          * real translation step then at the end.
 924                          * Yes, I know that this is ugly.  However we
 925                          * are short of the 1.4 release and for this
 926                          * branch we should not mee too much around
 927                          * with iconv things.  One reason for this is
 928                          * that we don't know enough about non-GNU
 929                          * iconv implementation and want to minimize
 930                          * the risk of breaking the code on too many
 931                          * platforms.  */
 932                         if( p ) {
 933                             for(i=0; i < encidx; i++ )
 934                                 *p++ = encbuf[i];
 935                         }
 936                         n += encidx;
 937                         encidx = 0;
 938                     }
 939 #endif /*USE_GNUPG_ICONV*/
 940                     else if( active_charset ) { /* table lookup */
 941                         for(i=0; i < 128; i++ ) {
 942                             if( active_charset[i] == val )
 943                                 break;
 944                         }
 945                         if( i < 128 ) { /* we can print this one */
 946                             if( p ) *p++ = i+128;
 947                             n++;
 948                         }
 949                         else { /* we do not have a translation: print utf8 */
 950                             if( p ) {
 951                                 for(i=0; i < encidx; i++ ) {
 952                                     sprintf(p, "\\x%02x", encbuf[i] );
 953                                     p += 4;
 954                                 }
 955                             }
 956                             n += encidx*4;
 957                             encidx = 0;
 958                         }
 959                     }
 960                     else { /* native set */
 961                         if( val >= 0x80 && val < 256 ) {
 962                             n++;    /* we can simply print this character */
 963                             if( p ) *p++ = val;
 964                         }
 965                         else { /* we do not have a translation: print utf8 */
 966                             if( p ) {
 967                                 for(i=0; i < encidx; i++ ) {
 968                                     sprintf(p, "\\x%02x", encbuf[i] );
 969                                     p += 4;
 970                                 }
 971                             }
 972                             n += encidx*4;
 973                             encidx = 0;
 974                         }
 975                     }
 976                 }
 977
 978             }
 979         }
 980         if( !buffer ) { /* allocate the buffer after the first pass */
 981             buffer = p = xmalloc( n + 1 );
 982         }
 983 #ifdef USE_GNUPG_ICONV
 984         else if(use_iconv) {
 985             /* Note: See above for comments.  */
 986             iconv_t cd;
 987             const char *inptr;
 988             char *outbuf, *outptr;
 989             size_t inbytes, outbytes;
 990
 991             *p = 0;  /* Terminate the buffer. */
 992
 993             cd = iconv_open (active_charset_name, "utf-8");
 994             if (cd == (iconv_t)-1)
 995                 {
 996                     handle_iconv_error (active_charset_name, "utf-8", 1);
 997                     xfree (buffer);
 998                     return utf8_to_native (string, length, delim);
 999                 }
1000
1001             /* Allocate a new buffer large enough to hold all possible
1002              * encodings. */
1003             n = p - buffer + 1;
1004             inbytes = n - 1;;
1005             inptr = buffer;
1006             outbytes = n * MB_LEN_MAX;
1007             if (outbytes / MB_LEN_MAX != n)
1008                 BUG (); /* Actually an overflow. */
1009             outbuf = outptr = xmalloc (outbytes);
1010             if ( iconv (cd, (ICONV_CONST char **)&inptr, &inbytes,
1011                         &outptr, &outbytes) == (size_t)-1) {
1012                 static int shown;
1013
1014                 if (!shown)
1015                   log_info (_("conversion from `%s' to `%s' failed: %s\n"),
1016                             "utf-8", active_charset_name, strerror (errno));
1017                 shown = 1;
1018                 /* Didn't worked out.  Temporary disable the use of
1019                  * iconv and fall back to our old code. */
1020                 xfree (buffer);
1021                 buffer = NULL;
1022                 xfree (outbuf);
1023                 use_iconv = 0;
1024                 outbuf = utf8_to_native (string, length, delim);
1025                 use_iconv = 1;
1026             }
1027             else { /* Success.  */
1028                 *outptr = 0;
1029                 /* We could realloc the buffer now but I doubt that it makes
1030                    much sense given that it will get freed anyway soon
1031                    after.  */
1032                 xfree (buffer);
1033             }
1034             iconv_close (cd);
1035             return outbuf;
1036         }
1037 #endif /*USE_GNUPG_ICONV*/
1038         else {
1039             *p = 0; /* make a string */
1040             return buffer;
1041         }
1042     }
1043 }
1044
1045 /****************************************************
1046  ******** locale insensitive ctype functions ********
1047  ****************************************************/
1048 /* FIXME: replace them by a table lookup and macros */
1049 int
1050 ascii_isupper (int c)
1051 {
1052     return c >= 'A' && c <= 'Z';
1053 }
1054
1055 int
1056 ascii_islower (int c)
1057 {
1058     return c >= 'a' && c <= 'z';
1059 }
1060
1061 int
1062 ascii_toupper (int c)
1063 {
1064     if (c >= 'a' && c <= 'z')
1065         c &= ~0x20;
1066     return c;
1067 }
1068
1069 int
1070 ascii_tolower (int c)
1071 {
1072     if (c >= 'A' && c <= 'Z')
1073         c |= 0x20;
1074     return c;
1075 }
1076
1077
1078 int
1079 ascii_strcasecmp (const char *a, const char *b)
1080 {
1081   const unsigned char *p1 = (const unsigned char *)a;
1082   const unsigned char *p2 = (const unsigned char *)b;
1083   unsigned char c1, c2;
1084
1085   if (p1 == p2)
1086     return 0;
1087
1088   do
1089     {
1090       c1 = ascii_tolower (*p1);
1091       c2 = ascii_tolower (*p2);
1092
1093       if (c1 == '\0')
1094         break;
1095
1096       ++p1;
1097       ++p2;
1098     }
1099   while (c1 == c2);
1100
1101   return c1 - c2;
1102 }
1103
1104 int
1105 ascii_strncasecmp (const char *a, const char *b, size_t n)
1106 {
1107   const unsigned char *p1 = (const unsigned char *)a;
1108   const unsigned char *p2 = (const unsigned char *)b;
1109   unsigned char c1, c2;
1110
1111   if (p1 == p2 || !n )
1112     return 0;
1113
1114   do
1115     {
1116       c1 = ascii_tolower (*p1);
1117       c2 = ascii_tolower (*p2);
1118
1119       if ( !--n || c1 == '\0')
1120         break;
1121
1122       ++p1;
1123       ++p2;
1124     }
1125   while (c1 == c2);
1126
1127   return c1 - c2;
1128 }
1129
1130
1131 int
1132 ascii_memcasecmp( const char *a, const char *b, size_t n )
1133 {
1134     if (a == b)
1135         return 0;
1136     for ( ; n; n--, a++, b++ ) {
1137         if( *a != *b  && ascii_toupper (*a) != ascii_toupper (*b) )
1138             return *a == *b? 0 : (ascii_toupper (*a) - ascii_toupper (*b));
1139     }
1140     return 0;
1141 }
1142
1143
1144
1145 /*********************************************
1146  ********** missing string functions *********
1147  *********************************************/
1148
1149 #ifndef HAVE_STPCPY
1150 char *
1151 stpcpy(char *a,const char *b)
1152 {
1153     while( *b )
1154         *a++ = *b++;
1155     *a = 0;
1156
1157     return (char*)a;
1158 }
1159 #endif
1160
1161
1162 #ifndef HAVE_STRSEP
1163 /* code taken from glibc-2.2.1/sysdeps/generic/strsep.c */
1164 char *
1165 strsep (char **stringp, const char *delim)
1166 {
1167   char *begin, *end;
1168
1169   begin = *stringp;
1170   if (begin == NULL)
1171     return NULL;
1172
1173   /* A frequent case is when the delimiter string contains only one
1174      character.  Here we don't need to call the expensive `strpbrk'
1175      function and instead work using `strchr'.  */
1176   if (delim[0] == '\0' || delim[1] == '\0')
1177     {
1178       char ch = delim[0];
1179
1180       if (ch == '\0')
1181         end = NULL;
1182       else
1183         {
1184           if (*begin == ch)
1185             end = begin;
1186           else if (*begin == '\0')
1187             end = NULL;
1188           else
1189             end = strchr (begin + 1, ch);
1190         }
1191     }
1192   else
1193     /* Find the end of the token.  */
1194     end = strpbrk (begin, delim);
1195
1196   if (end)
1197     {
1198       /* Terminate the token and set *STRINGP past NUL character.  */
1199       *end++ = '\0';
1200       *stringp = end;
1201     }
1202   else
1203     /* No more delimiters; this is the last token.  */
1204     *stringp = NULL;
1205
1206   return begin;
1207 }
1208 #endif /*HAVE_STRSEP*/
1209
1210
1211 #ifndef HAVE_STRLWR
1212 char *
1213 strlwr(char *s)
1214 {
1215     char *p;
1216     for(p=s; *p; p++ )
1217         *p = tolower(*(unsigned char *)p);
1218     return s;
1219 }
1220 #endif
1221
1222 #ifndef HAVE_STRCASECMP
1223 int
1224 strcasecmp( const char *a, const char *b )
1225 {
1226     for( ; *a && *b; a++, b++ ) {
1227         if( *a != *b
1228             && toupper(*(const byte *)a) != toupper(*(const byte *)b) )
1229             break;
1230     }
1231     return *(const byte*)a - *(const byte*)b;
1232 }
1233 #endif
1234
1235 #ifndef HAVE_STRNCASECMP
1236 int
1237 strncasecmp( const char *a, const char *b, size_t n )
1238 {
1239     for( ; n && *a && *b; a++, b++, n--) {
1240         if( *a != *b
1241             && toupper(*(const byte *)a) != toupper(*(const byte *)b) )
1242             break;
1243     }
1244     if (!n)
1245       return 0;
1246     return *(const byte*)a - *(const byte*)b;
1247 }
1248 #endif
1249
1250
1251 #ifdef _WIN32
1252 /*
1253  * Like vsprintf but provides a pointer to malloc'd storage, which
1254  * must be freed by the caller (xfree).  Taken from libiberty as
1255  * found in gcc-2.95.2 and a little bit modernized.
1256  * FIXME: Write a new CRT for W32.
1257  */
1258 int
1259 vasprintf (char **result, const char *format, va_list args)
1260 {
1261   const char *p = format;
1262   /* Add one to make sure that it is never zero, which might cause malloc
1263      to return NULL.  */
1264   int total_width = strlen (format) + 1;
1265   va_list ap;
1266
1267   /* this is not really portable but works under Windows */
1268   memcpy ( &ap, &args, sizeof (va_list));
1269
1270   while (*p != '\0')
1271     {
1272       if (*p++ == '%')
1273         {
1274           while (strchr ("-+ #0", *p))
1275             ++p;
1276           if (*p == '*')
1277             {
1278               ++p;
1279               total_width += abs (va_arg (ap, int));
1280             }
1281           else
1282             {
1283               char *endp;
1284               total_width += strtoul (p, &endp, 10);
1285               p = endp;
1286             }
1287           if (*p == '.')
1288             {
1289               ++p;
1290               if (*p == '*')
1291                 {
1292                   ++p;
1293                   total_width += abs (va_arg (ap, int));
1294                 }
1295               else
1296                 {
1297                   char *endp;
1298                   total_width += strtoul (p, &endp, 10);
1299                   p = endp;
1300                 }
1301             }
1302           while (strchr ("hlL", *p))
1303             ++p;
1304           /* Should be big enough for any format specifier except %s
1305              and floats.  */
1306           total_width += 30;
1307           switch (*p)
1308             {
1309             case 'd':
1310             case 'i':
1311             case 'o':
1312             case 'u':
1313             case 'x':
1314             case 'X':
1315             case 'c':
1316               (void) va_arg (ap, int);
1317               break;
1318             case 'f':
1319             case 'e':
1320             case 'E':
1321             case 'g':
1322             case 'G':
1323               (void) va_arg (ap, double);
1324               /* Since an ieee double can have an exponent of 307, we'll
1325                  make the buffer wide enough to cover the gross case. */
1326               total_width += 307;
1327
1328             case 's':
1329               total_width += strlen (va_arg (ap, char *));
1330               break;
1331             case 'p':
1332             case 'n':
1333               (void) va_arg (ap, char *);
1334               break;
1335             }
1336         }
1337     }
1338   *result = xmalloc (total_width);
1339   if (*result != NULL)
1340     return vsprintf (*result, format, args);
1341   else
1342     return 0;
1343 }
1344
1345 int
1346 asprintf (char **buf, const char *fmt, ...)
1347 {
1348   int status;
1349   va_list ap;
1350
1351   va_start (ap, fmt);
1352   status = vasprintf (buf, fmt, ap);
1353   va_end (ap);
1354   return status;
1355 }
1356
1357 const char *
1358 w32_strerror (int w32_errno)
1359 {
1360   static char strerr[256];
1361   int ec = (int)GetLastError ();
1362
1363   if (w32_errno == 0)
1364     w32_errno = ec;
1365   FormatMessage (FORMAT_MESSAGE_FROM_SYSTEM, NULL, w32_errno,
1366                  MAKELANGID (LANG_NEUTRAL, SUBLANG_DEFAULT),
1367                  strerr, DIM (strerr)-1, NULL);
1368   return strerr;
1369 }
1370 #endif /*_WIN32*/
1371
1372
1373