2008-06-18 Marcus Brinkmann <marcus@g10code.de>
[gnupg.git] / jnlib / stringhelp.c
blob02875052875a2d833fd5c7ef9ff207fd15acb413
1 /* stringhelp.c - standard string helper functions
2 * Copyright (C) 1998, 1999, 2000, 2001, 2003, 2004, 2005,
3 * 2006, 2007 Free Software Foundation, Inc.
5 * This file is part of JNLIB.
7 * JNLIB is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU Lesser General Public License as
9 * published by the Free Software Foundation; either version 3 of
10 * the License, or (at your option) any later version.
12 * JNLIB is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include <config.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <stdarg.h>
25 #include <ctype.h>
26 #ifdef HAVE_W32_SYSTEM
27 #include <windows.h>
28 #endif
30 #include "libjnlib-config.h"
31 #include "utf8conv.h"
32 #include "stringhelp.h"
35 #define tohex_lower(n) ((n) < 10 ? ((n) + '0') : (((n) - 10) + 'a'))
38 * Look for the substring SUB in buffer and return a pointer to that
39 * substring in BUFFER or NULL if not found.
40 * Comparison is case-insensitive.
42 const char *
43 memistr (const void *buffer, size_t buflen, const char *sub)
45 const unsigned char *buf = buffer;
46 const unsigned char *t = (const unsigned char *)buffer;
47 const unsigned char *s = (const unsigned char *)sub;
48 size_t n = buflen;
50 for ( ; n ; t++, n-- )
52 if ( toupper (*t) == toupper (*s) )
54 for ( buf=t++, buflen = n--, s++;
55 n && toupper (*t) == toupper (*s); t++, s++, n-- )
57 if (!*s)
58 return (const char*)buf;
59 t = buf;
60 s = (const unsigned char *)sub ;
61 n = buflen;
64 return NULL;
67 const char *
68 ascii_memistr ( const void *buffer, size_t buflen, const char *sub )
70 const unsigned char *buf = buffer;
71 const unsigned char *t = (const unsigned char *)buf;
72 const unsigned char *s = (const unsigned char *)sub;
73 size_t n = buflen;
75 for ( ; n ; t++, n-- )
77 if (ascii_toupper (*t) == ascii_toupper (*s) )
79 for ( buf=t++, buflen = n--, s++;
80 n && ascii_toupper (*t) == ascii_toupper (*s); t++, s++, n-- )
82 if (!*s)
83 return (const char*)buf;
84 t = (const unsigned char *)buf;
85 s = (const unsigned char *)sub ;
86 n = buflen;
89 return NULL;
92 /* This function is similar to strncpy(). However it won't copy more
93 than N - 1 characters and makes sure that a '\0' is appended. With
94 N given as 0, nothing will happen. With DEST given as NULL, memory
95 will be allocated using jnlib_xmalloc (i.e. if it runs out of core
96 the function terminates). Returns DES or a pointer to the
97 allocated memory.
99 char *
100 mem2str( char *dest , const void *src , size_t n )
102 char *d;
103 const char *s;
105 if( n ) {
106 if( !dest )
107 dest = jnlib_xmalloc( n ) ;
108 d = dest;
109 s = src ;
110 for(n--; n && *s; n-- )
111 *d++ = *s++;
112 *d = '\0' ;
115 return dest ;
119 /****************
120 * remove leading and trailing white spaces
122 char *
123 trim_spaces( char *str )
125 char *string, *p, *mark;
127 string = str;
128 /* find first non space character */
129 for( p=string; *p && isspace( *(byte*)p ) ; p++ )
131 /* move characters */
132 for( (mark = NULL); (*string = *p); string++, p++ )
133 if( isspace( *(byte*)p ) ) {
134 if( !mark )
135 mark = string ;
137 else
138 mark = NULL ;
139 if( mark )
140 *mark = '\0' ; /* remove trailing spaces */
142 return str ;
145 /****************
146 * remove trailing white spaces
148 char *
149 trim_trailing_spaces( char *string )
151 char *p, *mark;
153 for( mark = NULL, p = string; *p; p++ ) {
154 if( isspace( *(byte*)p ) ) {
155 if( !mark )
156 mark = p;
158 else
159 mark = NULL;
161 if( mark )
162 *mark = '\0' ;
164 return string ;
168 unsigned
169 trim_trailing_chars( byte *line, unsigned len, const char *trimchars )
171 byte *p, *mark;
172 unsigned n;
174 for(mark=NULL, p=line, n=0; n < len; n++, p++ ) {
175 if( strchr(trimchars, *p ) ) {
176 if( !mark )
177 mark = p;
179 else
180 mark = NULL;
183 if( mark ) {
184 *mark = 0;
185 return mark - line;
187 return len;
190 /****************
191 * remove trailing white spaces and return the length of the buffer
193 unsigned
194 trim_trailing_ws( byte *line, unsigned len )
196 return trim_trailing_chars( line, len, " \t\r\n" );
199 size_t
200 length_sans_trailing_chars (const unsigned char *line, size_t len,
201 const char *trimchars )
203 const unsigned char *p, *mark;
204 size_t n;
206 for( mark=NULL, p=line, n=0; n < len; n++, p++ )
208 if (strchr (trimchars, *p ))
210 if( !mark )
211 mark = p;
213 else
214 mark = NULL;
217 if (mark)
218 return mark - line;
219 return len;
223 * Return the length of line ignoring trailing white-space.
225 size_t
226 length_sans_trailing_ws (const unsigned char *line, size_t len)
228 return length_sans_trailing_chars (line, len, " \t\r\n");
233 /***************
234 * Extract from a given path the filename component.
237 char *
238 make_basename(const char *filepath, const char *inputpath)
240 char *p;
242 #ifdef __riscos__
243 return riscos_make_basename(filepath, inputpath);
244 #endif
246 if ( !(p=strrchr(filepath, '/')) )
247 #ifdef HAVE_DRIVE_LETTERS
248 if ( !(p=strrchr(filepath, '\\')) )
249 if ( !(p=strrchr(filepath, ':')) )
250 #endif
252 return jnlib_xstrdup(filepath);
255 return jnlib_xstrdup(p+1);
260 /***************
261 * Extract from a given filename the path prepended to it.
262 * If their isn't a path prepended to the filename, a dot
263 * is returned ('.').
266 char *
267 make_dirname(const char *filepath)
269 char *dirname;
270 int dirname_length;
271 char *p;
273 if ( !(p=strrchr(filepath, '/')) )
274 #ifdef HAVE_DRIVE_LETTERS
275 if ( !(p=strrchr(filepath, '\\')) )
276 if ( !(p=strrchr(filepath, ':')) )
277 #endif
279 return jnlib_xstrdup(".");
282 dirname_length = p-filepath;
283 dirname = jnlib_xmalloc(dirname_length+1);
284 strncpy(dirname, filepath, dirname_length);
285 dirname[dirname_length] = 0;
287 return dirname;
292 /****************
293 * Construct a filename from the NULL terminated list of parts.
294 * Tilde expansion is done here.
296 char *
297 make_filename( const char *first_part, ... )
299 va_list arg_ptr ;
300 size_t n;
301 const char *s;
302 char *name, *home, *p;
304 va_start (arg_ptr, first_part);
305 n = strlen (first_part) + 1;
306 while ( (s = va_arg (arg_ptr, const char *)) )
307 n += strlen(s) + 1;
308 va_end(arg_ptr);
310 home = NULL;
311 if ( *first_part == '~' && first_part[1] == '/'
312 && (home = getenv("HOME")) && *home )
313 n += strlen (home);
315 name = jnlib_xmalloc (n);
316 p = (home
317 ? stpcpy (stpcpy (name,home), first_part + 1)
318 : stpcpy(name, first_part));
320 va_start (arg_ptr, first_part) ;
321 while ( (s = va_arg(arg_ptr, const char *)) )
322 p = stpcpy (stpcpy (p,"/"), s);
323 va_end(arg_ptr);
325 #ifdef HAVE_DRIVE_LETTERS
326 /* We better avoid mixing slashes and backslashes and prefer
327 backslashes. There is usual no problem with mixing them, however
328 a very few W32 API calls can't grok plain slashes. Printing
329 filenames with mixed slashes also looks a bit strange. */
330 if (strchr (name, '\\'))
332 for (p=name; *p; p++)
333 if (*p == '/')
334 *p = '\\';
336 #endif /*HAVE_DRIVE_LETTERS*/
337 return name;
341 /* Compare whether the filenames are identical. This is a
342 special version of strcmp() taking the semantics of filenames in
343 account. Note that this function works only on the supplied names
344 without considereing any context like the current directory. See
345 also same_file_p(). */
347 compare_filenames (const char *a, const char *b)
349 #ifdef HAVE_DRIVE_LETTERS
350 for ( ; *a && *b; a++, b++ )
352 if (*a != *b
353 && (toupper (*(const unsigned char*)a)
354 != toupper (*(const unsigned char*)b) )
355 && !((*a == '/' && *b == '\\') || (*a == '\\' && *b == '/')))
356 break;
358 if ((*a == '/' && *b == '\\') || (*a == '\\' && *b == '/'))
359 return 0;
360 else
361 return (toupper (*(const unsigned char*)a)
362 - toupper (*(const unsigned char*)b));
363 #else
364 return strcmp(a,b);
365 #endif
369 /* Convert 2 hex characters at S to a byte value. Return this value
370 or -1 if there is an error. */
372 hextobyte (const char *s)
374 int c;
376 if ( *s >= '0' && *s <= '9' )
377 c = 16 * (*s - '0');
378 else if ( *s >= 'A' && *s <= 'F' )
379 c = 16 * (10 + *s - 'A');
380 else if ( *s >= 'a' && *s <= 'f' )
381 c = 16 * (10 + *s - 'a');
382 else
383 return -1;
384 s++;
385 if ( *s >= '0' && *s <= '9' )
386 c += *s - '0';
387 else if ( *s >= 'A' && *s <= 'F' )
388 c += 10 + *s - 'A';
389 else if ( *s >= 'a' && *s <= 'f' )
390 c += 10 + *s - 'a';
391 else
392 return -1;
393 return c;
397 /* Print a BUFFER to stream FP while replacing all control characters
398 and the characters DELIM and DELIM2 with standard C escape
399 sequences. Returns the number of characters printed. */
400 size_t
401 print_sanitized_buffer2 (FILE *fp, const void *buffer, size_t length,
402 int delim, int delim2)
404 const unsigned char *p = buffer;
405 size_t count = 0;
407 for (; length; length--, p++, count++)
409 /* Fixme: Check whether *p < 0xa0 is correct for utf8 encoding. */
410 if (*p < 0x20
411 || (*p >= 0x7f && *p < 0xa0)
412 || *p == delim
413 || *p == delim2
414 || ((delim || delim2) && *p=='\\'))
416 putc ('\\', fp);
417 count++;
418 if (*p == '\n')
420 putc ('n', fp);
421 count++;
423 else if (*p == '\r')
425 putc ('r', fp);
426 count++;
428 else if (*p == '\f')
430 putc ('f', fp);
431 count++;
433 else if (*p == '\v')
435 putc ('v', fp);
436 count++;
438 else if (*p == '\b')
440 putc ('b', fp);
441 count++;
443 else if (!*p)
445 putc('0', fp);
446 count++;
448 else
450 fprintf (fp, "x%02x", *p);
451 count += 3;
454 else
456 putc (*p, fp);
457 count++;
461 return count;
464 /* Same as print_sanitized_buffer2 but with just one delimiter. */
465 size_t
466 print_sanitized_buffer (FILE *fp, const void *buffer, size_t length,
467 int delim)
469 return print_sanitized_buffer2 (fp, buffer, length, delim, 0);
473 size_t
474 print_sanitized_utf8_buffer (FILE *fp, const void *buffer,
475 size_t length, int delim)
477 const char *p = buffer;
478 size_t i;
480 /* We can handle plain ascii simpler, so check for it first. */
481 for (i=0; i < length; i++ )
483 if ( (p[i] & 0x80) )
484 break;
486 if (i < length)
488 char *buf = utf8_to_native (p, length, delim);
489 /*(utf8 conversion already does the control character quoting)*/
490 i = strlen (buf);
491 fputs (buf, fp);
492 jnlib_free (buf);
493 return i;
495 else
496 return print_sanitized_buffer (fp, p, length, delim);
500 size_t
501 print_sanitized_string2 (FILE *fp, const char *string, int delim, int delim2)
503 return string? print_sanitized_buffer2 (fp, string, strlen (string),
504 delim, delim2):0;
507 size_t
508 print_sanitized_string (FILE *fp, const char *string, int delim)
510 return string? print_sanitized_buffer (fp, string, strlen (string), delim):0;
513 size_t
514 print_sanitized_utf8_string (FILE *fp, const char *string, int delim)
516 return string? print_sanitized_utf8_buffer (fp,
517 string, strlen (string),
518 delim) : 0;
521 /* Create a string from the buffer P_ARG of length N which is suitable for
522 printing. Caller must release the created string using xfree. */
523 char *
524 sanitize_buffer (const void *p_arg, size_t n, int delim)
526 const unsigned char *p = p_arg;
527 size_t save_n, buflen;
528 const unsigned char *save_p;
529 char *buffer, *d;
531 /* First count length. */
532 for (save_n = n, save_p = p, buflen=1 ; n; n--, p++ )
534 if ( *p < 0x20 || *p == 0x7f || *p == delim || (delim && *p=='\\'))
536 if ( *p=='\n' || *p=='\r' || *p=='\f'
537 || *p=='\v' || *p=='\b' || !*p )
538 buflen += 2;
539 else
540 buflen += 5;
542 else
543 buflen++;
545 p = save_p;
546 n = save_n;
547 /* And now make the string */
548 d = buffer = jnlib_xmalloc( buflen );
549 for ( ; n; n--, p++ )
551 if (*p < 0x20 || *p == 0x7f || *p == delim || (delim && *p=='\\')) {
552 *d++ = '\\';
553 if( *p == '\n' )
554 *d++ = 'n';
555 else if( *p == '\r' )
556 *d++ = 'r';
557 else if( *p == '\f' )
558 *d++ = 'f';
559 else if( *p == '\v' )
560 *d++ = 'v';
561 else if( *p == '\b' )
562 *d++ = 'b';
563 else if( !*p )
564 *d++ = '0';
565 else {
566 sprintf(d, "x%02x", *p );
567 d += 3;
570 else
571 *d++ = *p;
573 *d = 0;
574 return buffer;
578 /* Given a string containing an UTF-8 encoded text, return the number
579 of characters in this string. It differs from strlen in that it
580 only counts complete UTF-8 characters. Note, that this function
581 does not take combined characters into account. */
582 size_t
583 utf8_charcount (const char *s)
585 size_t n;
587 for (n=0; *s; s++)
588 if ( (*s&0xc0) != 0x80 ) /* Exclude continuation bytes: 10xxxxxx */
589 n++;
591 return n;
595 /****************************************************
596 ********** W32 specific functions ****************
597 ****************************************************/
599 #ifdef HAVE_W32_SYSTEM
600 const char *
601 w32_strerror (int ec)
603 static char strerr[256];
605 if (ec == -1)
606 ec = (int)GetLastError ();
607 FormatMessage (FORMAT_MESSAGE_FROM_SYSTEM, NULL, ec,
608 MAKELANGID (LANG_NEUTRAL, SUBLANG_DEFAULT),
609 strerr, DIM (strerr)-1, NULL);
610 return strerr;
612 #endif /*HAVE_W32_SYSTEM*/
615 /****************************************************
616 ******** Locale insensitive ctype functions ********
617 ****************************************************/
618 /* FIXME: replace them by a table lookup and macros */
620 ascii_isupper (int c)
622 return c >= 'A' && c <= 'Z';
626 ascii_islower (int c)
628 return c >= 'a' && c <= 'z';
631 int
632 ascii_toupper (int c)
634 if (c >= 'a' && c <= 'z')
635 c &= ~0x20;
636 return c;
639 int
640 ascii_tolower (int c)
642 if (c >= 'A' && c <= 'Z')
643 c |= 0x20;
644 return c;
649 ascii_strcasecmp( const char *a, const char *b )
651 if (a == b)
652 return 0;
654 for (; *a && *b; a++, b++) {
655 if (*a != *b && ascii_toupper(*a) != ascii_toupper(*b))
656 break;
658 return *a == *b? 0 : (ascii_toupper (*a) - ascii_toupper (*b));
661 int
662 ascii_strncasecmp (const char *a, const char *b, size_t n)
664 const unsigned char *p1 = (const unsigned char *)a;
665 const unsigned char *p2 = (const unsigned char *)b;
666 unsigned char c1, c2;
668 if (p1 == p2 || !n )
669 return 0;
673 c1 = ascii_tolower (*p1);
674 c2 = ascii_tolower (*p2);
676 if ( !--n || c1 == '\0')
677 break;
679 ++p1;
680 ++p2;
682 while (c1 == c2);
684 return c1 - c2;
689 ascii_memcasecmp (const void *a_arg, const void *b_arg, size_t n )
691 const char *a = a_arg;
692 const char *b = b_arg;
694 if (a == b)
695 return 0;
696 for ( ; n; n--, a++, b++ )
698 if( *a != *b && ascii_toupper (*a) != ascii_toupper (*b) )
699 return *a == *b? 0 : (ascii_toupper (*a) - ascii_toupper (*b));
701 return 0;
705 ascii_strcmp( const char *a, const char *b )
707 if (a == b)
708 return 0;
710 for (; *a && *b; a++, b++) {
711 if (*a != *b )
712 break;
714 return *a == *b? 0 : (*(signed char *)a - *(signed char *)b);
718 void *
719 ascii_memcasemem (const void *haystack, size_t nhaystack,
720 const void *needle, size_t nneedle)
723 if (!nneedle)
724 return (void*)haystack; /* finding an empty needle is really easy */
725 if (nneedle <= nhaystack)
727 const char *a = haystack;
728 const char *b = a + nhaystack - nneedle;
730 for (; a <= b; a++)
732 if ( !ascii_memcasecmp (a, needle, nneedle) )
733 return (void *)a;
736 return NULL;
739 /*********************************************
740 ********** missing string functions *********
741 *********************************************/
743 #ifndef HAVE_STPCPY
744 char *
745 stpcpy(char *a,const char *b)
747 while( *b )
748 *a++ = *b++;
749 *a = 0;
751 return (char*)a;
753 #endif
755 #ifndef HAVE_STRSEP
756 /* Code taken from glibc-2.2.1/sysdeps/generic/strsep.c. */
757 char *
758 strsep (char **stringp, const char *delim)
760 char *begin, *end;
762 begin = *stringp;
763 if (begin == NULL)
764 return NULL;
766 /* A frequent case is when the delimiter string contains only one
767 character. Here we don't need to call the expensive `strpbrk'
768 function and instead work using `strchr'. */
769 if (delim[0] == '\0' || delim[1] == '\0')
771 char ch = delim[0];
773 if (ch == '\0')
774 end = NULL;
775 else
777 if (*begin == ch)
778 end = begin;
779 else if (*begin == '\0')
780 end = NULL;
781 else
782 end = strchr (begin + 1, ch);
785 else
786 /* Find the end of the token. */
787 end = strpbrk (begin, delim);
789 if (end)
791 /* Terminate the token and set *STRINGP past NUL character. */
792 *end++ = '\0';
793 *stringp = end;
795 else
796 /* No more delimiters; this is the last token. */
797 *stringp = NULL;
799 return begin;
801 #endif /*HAVE_STRSEP*/
804 #ifndef HAVE_STRLWR
805 char *
806 strlwr(char *s)
808 char *p;
809 for(p=s; *p; p++ )
810 *p = tolower(*p);
811 return s;
813 #endif
816 #ifndef HAVE_STRCASECMP
818 strcasecmp( const char *a, const char *b )
820 for( ; *a && *b; a++, b++ ) {
821 if( *a != *b && toupper(*a) != toupper(*b) )
822 break;
824 return *(const byte*)a - *(const byte*)b;
826 #endif
829 /****************
830 * mingw32/cpd has a memicmp()
832 #ifndef HAVE_MEMICMP
834 memicmp( const char *a, const char *b, size_t n )
836 for( ; n; n--, a++, b++ )
837 if( *a != *b && toupper(*(const byte*)a) != toupper(*(const byte*)b) )
838 return *(const byte *)a - *(const byte*)b;
839 return 0;
841 #endif
844 #ifndef HAVE_MEMRCHR
845 void *
846 memrchr (const void *buffer, int c, size_t n)
848 const unsigned char *p = buffer;
850 for (p += n; n ; n--)
851 if (*--p == c)
852 return (void *)p;
853 return NULL;
855 #endif /*HAVE_MEMRCHR*/
858 /* Percent-escape the string STR by replacing colons with '%3a'. If
859 EXTRA is not NULL all characters in EXTRA are also escaped. */
860 static char *
861 do_percent_escape (const char *str, const char *extra, int die)
863 int i, j;
864 char *ptr;
866 if (!str)
867 return NULL;
869 for (i=j=0; str[i]; i++)
870 if (str[i] == ':' || str[i] == '%' || (extra && strchr (extra, str[i])))
871 j++;
872 if (die)
873 ptr = jnlib_xmalloc (i + 2 * j + 1);
874 else
876 ptr = jnlib_malloc (i + 2 * j + 1);
877 if (!ptr)
878 return NULL;
880 i = 0;
881 while (*str)
883 if (*str == ':')
885 ptr[i++] = '%';
886 ptr[i++] = '3';
887 ptr[i++] = 'a';
889 else if (*str == '%')
891 ptr[i++] = '%';
892 ptr[i++] = '2';
893 ptr[i++] = '5';
895 else if (extra && strchr (extra, *str))
897 ptr[i++] = '%';
898 ptr[i++] = tohex_lower ((*str>>4)&15);
899 ptr[i++] = tohex_lower (*str&15);
901 else
902 ptr[i++] = *str;
903 str++;
905 ptr[i] = '\0';
907 return ptr;
910 /* Percent-escape the string STR by replacing colons with '%3a'. If
911 EXTRA is not NULL all characters in EXTRA are also escaped. */
912 char *
913 percent_escape (const char *str, const char *extra)
915 return do_percent_escape (str, extra, 1);
918 /* Same as percent_escape but return NULL instead of exiting on memory
919 error. */
920 char *
921 try_percent_escape (const char *str, const char *extra)
923 return do_percent_escape (str, extra, 0);