* config/tc-arm.c (arm_cpus): Add entry for ARM Cortex-M0.
[binutils-gdb.git] / gdb / charset.c
blob14862e7ac1ed9a45338fcd823b4bbad8904396b0
1 /* Character set conversion support for GDB.
3 Copyright (C) 2001, 2003, 2007, 2008, 2009 Free Software Foundation, Inc.
5 This file is part of GDB.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
20 #include "defs.h"
21 #include "charset.h"
22 #include "gdbcmd.h"
23 #include "gdb_assert.h"
24 #include "gdb_obstack.h"
25 #include "charset-list.h"
26 #include "vec.h"
28 #include <stddef.h>
29 #include "gdb_string.h"
30 #include <ctype.h>
33 /* How GDB's character set support works
35 GDB has three global settings:
37 - The `current host character set' is the character set GDB should
38 use in talking to the user, and which (hopefully) the user's
39 terminal knows how to display properly. Most users should not
40 change this.
42 - The `current target character set' is the character set the
43 program being debugged uses.
45 - The `current target wide character set' is the wide character set
46 the program being debugged uses, that is, the encoding used for
47 wchar_t.
49 There are commands to set each of these, and mechanisms for
50 choosing reasonable default values. GDB has a global list of
51 character sets that it can use as its host or target character
52 sets.
54 The header file `charset.h' declares various functions that
55 different pieces of GDB need to perform tasks like:
57 - printing target strings and characters to the user's terminal
58 (mostly target->host conversions),
60 - building target-appropriate representations of strings and
61 characters the user enters in expressions (mostly host->target
62 conversions),
64 and so on.
66 To avoid excessive code duplication and maintenance efforts,
67 GDB simply requires a capable iconv function. Users on platforms
68 without a suitable iconv can use the GNU iconv library. */
71 #ifdef PHONY_ICONV
73 /* Provide a phony iconv that does as little as possible. Also,
74 arrange for there to be a single available character set. */
76 #undef GDB_DEFAULT_HOST_CHARSET
77 #define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
78 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
79 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1"
80 #undef DEFAULT_CHARSET_NAMES
81 #define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET ,
83 #undef iconv_t
84 #define iconv_t int
85 #undef iconv_open
86 #undef iconv
87 #undef iconv_close
89 #undef ICONV_CONST
90 #define ICONV_CONST const
92 iconv_t
93 iconv_open (const char *to, const char *from)
95 /* We allow conversions from UCS-4BE, wchar_t, and the host charset.
96 We allow conversions to wchar_t and the host charset. */
97 if (strcmp (from, "UCS-4BE") && strcmp (from, "wchar_t")
98 && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
99 return -1;
100 if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET))
101 return -1;
103 /* Return 1 if we are converting from UCS-4BE, 0 otherwise. This is
104 used as a flag in calls to iconv. */
105 return !strcmp (from, "UCS-4BE");
109 iconv_close (iconv_t arg)
111 return 0;
114 size_t
115 iconv (iconv_t ucs_flag, const char **inbuf, size_t *inbytesleft,
116 char **outbuf, size_t *outbytesleft)
118 if (ucs_flag)
120 while (*inbytesleft >= 4)
122 size_t j;
123 unsigned long c = 0;
125 for (j = 0; j < 4; ++j)
127 c <<= 8;
128 c += (*inbuf)[j] & 0xff;
131 if (c >= 256)
133 errno = EILSEQ;
134 return -1;
136 **outbuf = c & 0xff;
137 ++*outbuf;
138 --*outbytesleft;
140 ++*inbuf;
141 *inbytesleft -= 4;
143 if (*inbytesleft < 4)
145 errno = EINVAL;
146 return -1;
149 else
151 /* In all other cases we simply copy input bytes to the
152 output. */
153 size_t amt = *inbytesleft;
154 if (amt > *outbytesleft)
155 amt = *outbytesleft;
156 memcpy (*outbuf, *inbuf, amt);
157 *inbuf += amt;
158 *outbuf += amt;
159 *inbytesleft -= amt;
160 *outbytesleft -= amt;
163 if (*inbytesleft)
165 errno = E2BIG;
166 return -1;
169 /* The number of non-reversible conversions -- but they were all
170 reversible. */
171 return 0;
174 #endif
178 /* The global lists of character sets and translations. */
181 #ifndef GDB_DEFAULT_TARGET_CHARSET
182 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
183 #endif
185 #ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET
186 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "UCS-4"
187 #endif
189 static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET;
190 static const char *host_charset_name = "auto";
191 static void
192 show_host_charset_name (struct ui_file *file, int from_tty,
193 struct cmd_list_element *c,
194 const char *value)
196 if (!strcmp (value, "auto"))
197 fprintf_filtered (file,
198 _("The host character set is \"auto; currently %s\".\n"),
199 auto_host_charset_name);
200 else
201 fprintf_filtered (file, _("The host character set is \"%s\".\n"), value);
204 static const char *target_charset_name = GDB_DEFAULT_TARGET_CHARSET;
205 static void
206 show_target_charset_name (struct ui_file *file, int from_tty,
207 struct cmd_list_element *c, const char *value)
209 fprintf_filtered (file, _("The target character set is \"%s\".\n"),
210 value);
213 static const char *target_wide_charset_name = GDB_DEFAULT_TARGET_WIDE_CHARSET;
214 static void
215 show_target_wide_charset_name (struct ui_file *file, int from_tty,
216 struct cmd_list_element *c, const char *value)
218 fprintf_filtered (file, _("The target wide character set is \"%s\".\n"),
219 value);
222 static const char *default_charset_names[] =
224 DEFAULT_CHARSET_NAMES
228 static const char **charset_enum;
231 /* If the target wide character set has big- or little-endian
232 variants, these are the corresponding names. */
233 static const char *target_wide_charset_be_name;
234 static const char *target_wide_charset_le_name;
236 /* A helper function for validate which sets the target wide big- and
237 little-endian character set names, if possible. */
239 static void
240 set_be_le_names (void)
242 int i, len;
244 target_wide_charset_le_name = NULL;
245 target_wide_charset_be_name = NULL;
247 len = strlen (target_wide_charset_name);
248 for (i = 0; charset_enum[i]; ++i)
250 if (strncmp (target_wide_charset_name, charset_enum[i], len))
251 continue;
252 if ((charset_enum[i][len] == 'B'
253 || charset_enum[i][len] == 'L')
254 && charset_enum[i][len + 1] == 'E'
255 && charset_enum[i][len + 2] == '\0')
257 if (charset_enum[i][len] == 'B')
258 target_wide_charset_be_name = charset_enum[i];
259 else
260 target_wide_charset_le_name = charset_enum[i];
265 /* 'Set charset', 'set host-charset', 'set target-charset', 'set
266 target-wide-charset', 'set charset' sfunc's. */
268 static void
269 validate (void)
271 iconv_t desc;
272 const char *host_cset = host_charset ();
274 desc = iconv_open (target_wide_charset_name, host_cset);
275 if (desc == (iconv_t) -1)
276 error ("Cannot convert between character sets `%s' and `%s'",
277 target_wide_charset_name, host_cset);
278 iconv_close (desc);
280 desc = iconv_open (target_charset_name, host_cset);
281 if (desc == (iconv_t) -1)
282 error ("Cannot convert between character sets `%s' and `%s'",
283 target_charset_name, host_cset);
284 iconv_close (desc);
286 set_be_le_names ();
289 /* This is the sfunc for the 'set charset' command. */
290 static void
291 set_charset_sfunc (char *charset, int from_tty, struct cmd_list_element *c)
293 /* CAREFUL: set the target charset here as well. */
294 target_charset_name = host_charset_name;
295 validate ();
298 /* 'set host-charset' command sfunc. We need a wrapper here because
299 the function needs to have a specific signature. */
300 static void
301 set_host_charset_sfunc (char *charset, int from_tty,
302 struct cmd_list_element *c)
304 validate ();
307 /* Wrapper for the 'set target-charset' command. */
308 static void
309 set_target_charset_sfunc (char *charset, int from_tty,
310 struct cmd_list_element *c)
312 validate ();
315 /* Wrapper for the 'set target-wide-charset' command. */
316 static void
317 set_target_wide_charset_sfunc (char *charset, int from_tty,
318 struct cmd_list_element *c)
320 validate ();
323 /* sfunc for the 'show charset' command. */
324 static void
325 show_charset (struct ui_file *file, int from_tty, struct cmd_list_element *c,
326 const char *name)
328 show_host_charset_name (file, from_tty, c, host_charset_name);
329 show_target_charset_name (file, from_tty, c, target_charset_name);
330 show_target_wide_charset_name (file, from_tty, c, target_wide_charset_name);
334 /* Accessor functions. */
336 const char *
337 host_charset (void)
339 if (!strcmp (host_charset_name, "auto"))
340 return auto_host_charset_name;
341 return host_charset_name;
344 const char *
345 target_charset (void)
347 return target_charset_name;
350 const char *
351 target_wide_charset (void)
353 if (gdbarch_byte_order (current_gdbarch) == BFD_ENDIAN_BIG)
355 if (target_wide_charset_be_name)
356 return target_wide_charset_be_name;
358 else
360 if (target_wide_charset_le_name)
361 return target_wide_charset_le_name;
364 return target_wide_charset_name;
368 /* Host character set management. For the time being, we assume that
369 the host character set is some superset of ASCII. */
371 char
372 host_letter_to_control_character (char c)
374 if (c == '?')
375 return 0177;
376 return c & 0237;
379 /* Convert a host character, C, to its hex value. C must already have
380 been validated using isxdigit. */
383 host_hex_value (char c)
385 if (isdigit (c))
386 return c - '0';
387 if (c >= 'a' && c <= 'f')
388 return 10 + c - 'a';
389 gdb_assert (c >= 'A' && c <= 'F');
390 return 10 + c - 'A';
394 /* Public character management functions. */
396 /* A cleanup function which is run to close an iconv descriptor. */
398 static void
399 cleanup_iconv (void *p)
401 iconv_t *descp = p;
402 iconv_close (*descp);
405 void
406 convert_between_encodings (const char *from, const char *to,
407 const gdb_byte *bytes, unsigned int num_bytes,
408 int width, struct obstack *output,
409 enum transliterations translit)
411 iconv_t desc;
412 struct cleanup *cleanups;
413 size_t inleft;
414 char *inp;
415 unsigned int space_request;
417 /* Often, the host and target charsets will be the same. */
418 if (!strcmp (from, to))
420 obstack_grow (output, bytes, num_bytes);
421 return;
424 desc = iconv_open (to, from);
425 if (desc == (iconv_t) -1)
426 perror_with_name ("Converting character sets");
427 cleanups = make_cleanup (cleanup_iconv, &desc);
429 inleft = num_bytes;
430 inp = (char *) bytes;
432 space_request = num_bytes;
434 while (inleft > 0)
436 char *outp;
437 size_t outleft, r;
438 int old_size;
440 old_size = obstack_object_size (output);
441 obstack_blank (output, space_request);
443 outp = obstack_base (output) + old_size;
444 outleft = space_request;
446 r = iconv (desc, (ICONV_CONST char **) &inp, &inleft, &outp, &outleft);
448 /* Now make sure that the object on the obstack only includes
449 bytes we have converted. */
450 obstack_blank (output, - (int) outleft);
452 if (r == (size_t) -1)
454 switch (errno)
456 case EILSEQ:
458 int i;
460 /* Invalid input sequence. */
461 if (translit == translit_none)
462 error (_("Could not convert character to `%s' character set"),
463 to);
465 /* We emit escape sequence for the bytes, skip them,
466 and try again. */
467 for (i = 0; i < width; ++i)
469 char octal[5];
471 sprintf (octal, "\\%.3o", *inp & 0xff);
472 obstack_grow_str (output, octal);
474 ++inp;
475 --inleft;
478 break;
480 case E2BIG:
481 /* We ran out of space in the output buffer. Make it
482 bigger next time around. */
483 space_request *= 2;
484 break;
486 case EINVAL:
487 /* Incomplete input sequence. FIXME: ought to report this
488 to the caller somehow. */
489 inleft = 0;
490 break;
492 default:
493 perror_with_name ("Internal error while converting character sets");
498 do_cleanups (cleanups);
503 /* An iterator that returns host wchar_t's from a target string. */
504 struct wchar_iterator
506 /* The underlying iconv descriptor. */
507 iconv_t desc;
509 /* The input string. This is updated as convert characters. */
510 char *input;
511 /* The number of bytes remaining in the input. */
512 size_t bytes;
514 /* The width of an input character. */
515 size_t width;
517 /* The output buffer and its size. */
518 gdb_wchar_t *out;
519 size_t out_size;
522 /* Create a new iterator. */
523 struct wchar_iterator *
524 make_wchar_iterator (const gdb_byte *input, size_t bytes, const char *charset,
525 size_t width)
527 struct wchar_iterator *result;
528 iconv_t desc;
530 desc = iconv_open ("wchar_t", charset);
531 if (desc == (iconv_t) -1)
532 perror_with_name ("Converting character sets");
534 result = XNEW (struct wchar_iterator);
535 result->desc = desc;
536 result->input = (char *) input;
537 result->bytes = bytes;
538 result->width = width;
540 result->out = XNEW (gdb_wchar_t);
541 result->out_size = 1;
543 return result;
546 static void
547 do_cleanup_iterator (void *p)
549 struct wchar_iterator *iter = p;
551 iconv_close (iter->desc);
552 xfree (iter->out);
553 xfree (iter);
556 struct cleanup *
557 make_cleanup_wchar_iterator (struct wchar_iterator *iter)
559 return make_cleanup (do_cleanup_iterator, iter);
563 wchar_iterate (struct wchar_iterator *iter,
564 enum wchar_iterate_result *out_result,
565 gdb_wchar_t **out_chars,
566 const gdb_byte **ptr,
567 size_t *len)
569 size_t out_request;
571 /* Try to convert some characters. At first we try to convert just
572 a single character. The reason for this is that iconv does not
573 necessarily update its outgoing arguments when it encounters an
574 invalid input sequence -- but we want to reliably report this to
575 our caller so it can emit an escape sequence. */
576 out_request = 1;
577 while (iter->bytes > 0)
579 char *outptr = (char *) &iter->out[0];
580 char *orig_inptr = iter->input;
581 size_t orig_in = iter->bytes;
582 size_t out_avail = out_request * sizeof (gdb_wchar_t);
583 size_t num;
584 gdb_wchar_t result;
586 size_t r = iconv (iter->desc,
587 (ICONV_CONST char **) &iter->input, &iter->bytes,
588 &outptr, &out_avail);
589 if (r == (size_t) -1)
591 switch (errno)
593 case EILSEQ:
594 /* Invalid input sequence. Skip it, and let the caller
595 know about it. */
596 *out_result = wchar_iterate_invalid;
597 *ptr = iter->input;
598 *len = iter->width;
599 iter->input += iter->width;
600 iter->bytes -= iter->width;
601 return 0;
603 case E2BIG:
604 /* We ran out of space. We still might have converted a
605 character; if so, return it. Otherwise, grow the
606 buffer and try again. */
607 if (out_avail < out_request * sizeof (gdb_wchar_t))
608 break;
610 ++out_request;
611 if (out_request > iter->out_size)
613 iter->out_size = out_request;
614 iter->out = xrealloc (iter->out,
615 out_request * sizeof (gdb_wchar_t));
617 continue;
619 case EINVAL:
620 /* Incomplete input sequence. Let the caller know, and
621 arrange for future calls to see EOF. */
622 *out_result = wchar_iterate_incomplete;
623 *ptr = iter->input;
624 *len = iter->bytes;
625 iter->bytes = 0;
626 return 0;
628 default:
629 perror_with_name ("Internal error while converting character sets");
633 /* We converted something. */
634 num = out_request - out_avail / sizeof (gdb_wchar_t);
635 *out_result = wchar_iterate_ok;
636 *out_chars = iter->out;
637 *ptr = orig_inptr;
638 *len = orig_in - iter->bytes;
639 return num;
642 /* Really done. */
643 *out_result = wchar_iterate_eof;
644 return -1;
648 /* The charset.c module initialization function. */
650 extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */
652 typedef char *char_ptr;
653 DEF_VEC_P (char_ptr);
655 static VEC (char_ptr) *charsets;
657 #ifdef PHONY_ICONV
659 static void
660 find_charset_names (void)
662 VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET);
663 VEC_safe_push (char_ptr, charsets, NULL);
666 #else /* PHONY_ICONV */
668 /* Sometimes, libiconv redefines iconvlist as libiconvlist -- but
669 provides different symbols in the static and dynamic libraries.
670 So, configure may see libiconvlist but not iconvlist. But, calling
671 iconvlist is the right thing to do and will work. Hence we do a
672 check here but unconditionally call iconvlist below. */
673 #if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST)
675 /* A helper function that adds some character sets to the vector of
676 all character sets. This is a callback function for iconvlist. */
678 static int
679 add_one (unsigned int count, const char *const *names, void *data)
681 unsigned int i;
683 for (i = 0; i < count; ++i)
684 VEC_safe_push (char_ptr, charsets, xstrdup (names[i]));
686 return 0;
689 static void
690 find_charset_names (void)
692 iconvlist (add_one, NULL);
693 VEC_safe_push (char_ptr, charsets, NULL);
696 #else
698 static void
699 find_charset_names (void)
701 FILE *in;
703 in = popen ("iconv -l", "r");
704 /* It is ok to ignore errors; we'll fall back on a default. */
705 if (!in)
706 return;
708 /* POSIX says that iconv -l uses an unspecified format. We parse
709 the glibc format; feel free to add others as needed. */
710 while (!feof (in))
712 /* The size of buf is chosen arbitrarily. A character set name
713 longer than this would not be very nice. */
714 char buf[80];
715 int len;
716 char *r = fgets (buf, sizeof (buf), in);
717 if (!r)
718 break;
719 len = strlen (r);
720 if (len <= 3)
721 continue;
722 if (buf[len - 2] == '/' && buf[len - 3] == '/')
723 buf[len - 3] = '\0';
724 VEC_safe_push (char_ptr, charsets, xstrdup (buf));
727 pclose (in);
729 VEC_safe_push (char_ptr, charsets, NULL);
732 #endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */
733 #endif /* PHONY_ICONV */
735 void
736 _initialize_charset (void)
738 struct cmd_list_element *new_cmd;
740 /* The first element is always "auto"; then we skip it for the
741 commands where it is not allowed. */
742 VEC_safe_push (char_ptr, charsets, "auto");
743 find_charset_names ();
745 if (VEC_length (char_ptr, charsets) > 1)
746 charset_enum = (const char **) VEC_address (char_ptr, charsets);
747 else
748 charset_enum = default_charset_names;
750 #ifndef PHONY_ICONV
751 #ifdef HAVE_LANGINFO_CODESET
752 auto_host_charset_name = nl_langinfo (CODESET);
753 target_charset_name = auto_host_charset_name;
755 set_be_le_names ();
756 #endif
757 #endif
759 add_setshow_enum_cmd ("charset", class_support,
760 &charset_enum[1], &host_charset_name, _("\
761 Set the host and target character sets."), _("\
762 Show the host and target character sets."), _("\
763 The `host character set' is the one used by the system GDB is running on.\n\
764 The `target character set' is the one used by the program being debugged.\n\
765 You may only use supersets of ASCII for your host character set; GDB does\n\
766 not support any others.\n\
767 To see a list of the character sets GDB supports, type `set charset <TAB>'."),
768 /* Note that the sfunc below needs to set
769 target_charset_name, because the 'set
770 charset' command sets two variables. */
771 set_charset_sfunc,
772 show_charset,
773 &setlist, &showlist);
775 add_setshow_enum_cmd ("host-charset", class_support,
776 charset_enum, &host_charset_name, _("\
777 Set the host character set."), _("\
778 Show the host character set."), _("\
779 The `host character set' is the one used by the system GDB is running on.\n\
780 You may only use supersets of ASCII for your host character set; GDB does\n\
781 not support any others.\n\
782 To see a list of the character sets GDB supports, type `set host-charset <TAB>'."),
783 set_host_charset_sfunc,
784 show_host_charset_name,
785 &setlist, &showlist);
787 add_setshow_enum_cmd ("target-charset", class_support,
788 &charset_enum[1], &target_charset_name, _("\
789 Set the target character set."), _("\
790 Show the target character set."), _("\
791 The `target character set' is the one used by the program being debugged.\n\
792 GDB translates characters and strings between the host and target\n\
793 character sets as needed.\n\
794 To see a list of the character sets GDB supports, type `set target-charset'<TAB>"),
795 set_target_charset_sfunc,
796 show_target_charset_name,
797 &setlist, &showlist);
799 add_setshow_enum_cmd ("target-wide-charset", class_support,
800 &charset_enum[1], &target_wide_charset_name,
801 _("\
802 Set the target wide character set."), _("\
803 Show the target wide character set."), _("\
804 The `target wide character set' is the one used by the program being debugged.\n\
805 In particular it is the encoding used by `wchar_t'.\n\
806 GDB translates characters and strings between the host and target\n\
807 character sets as needed.\n\
808 To see a list of the character sets GDB supports, type\n\
809 `set target-wide-charset'<TAB>"),
810 set_target_wide_charset_sfunc,
811 show_target_wide_charset_name,
812 &setlist, &showlist);