Sync usage with man page.
[netbsd-mini2440.git] / gnu / dist / gettext / gettext-tools / src / format-python.c
bloba92054c942042e8b24dcbd5ddc165a04cebef6b4
1 /* Python format strings.
2 Copyright (C) 2001-2004 Free Software Foundation, Inc.
3 Written by Bruno Haible <haible@clisp.cons.org>, 2001.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
23 #include <stdbool.h>
24 #include <stdlib.h>
25 #include <string.h>
27 #include "format.h"
28 #include "c-ctype.h"
29 #include "xalloc.h"
30 #include "xerror.h"
31 #include "format-invalid.h"
32 #include "gettext.h"
34 #define _(str) gettext (str)
36 /* Python format strings are described in
37 Python Library reference
38 2. Built-in Types, Exceptions and Functions
39 2.1. Built-in Types
40 2.1.5. Sequence Types
41 2.1.5.2. String Formatting Operations
42 Any string or Unicode string can act as format string via the '%' operator,
43 implemented in stringobject.c and unicodeobject.c.
44 A directive
45 - starts with '%'
46 - is optionally followed by '(ident)' where ident is any sequence of
47 characters with balanced left and right parentheses,
48 - is optionally followed by any of the characters '-' (left justification),
49 '+' (sign), ' ' (blank), '#' (alt), '0' (zero), each of which acts as a
50 flag,
51 - is optionally followed by a width specification: '*' (reads an argument)
52 or a nonempty digit sequence,
53 - is optionally followed by '.' and a precision specification: '*' (reads
54 an argument) or a nonempty digit sequence,
55 - is optionally followed by a size specifier, one of 'h' 'l' 'L'.
56 - is finished by a specifier
57 - '%', that needs no argument,
58 - 'c', that needs a character argument,
59 - 's', 'r', that need a string argument,
60 - 'i', 'd', 'u', 'o', 'x', 'X', that need an integer argument,
61 - 'e', 'E', 'f', 'g', 'G', that need a floating-point argument.
62 Use of '(ident)' and use of unnamed argument specifications are exclusive,
63 because the first requires a mapping as argument, while the second requires
64 a tuple as argument.
67 enum format_arg_type
69 FAT_NONE,
70 FAT_ANY,
71 FAT_CHARACTER,
72 FAT_STRING,
73 FAT_INTEGER,
74 FAT_FLOAT
77 struct named_arg
79 char *name;
80 enum format_arg_type type;
83 struct unnamed_arg
85 enum format_arg_type type;
88 struct spec
90 unsigned int directives;
91 unsigned int named_arg_count;
92 unsigned int unnamed_arg_count;
93 unsigned int allocated;
94 struct named_arg *named;
95 struct unnamed_arg *unnamed;
98 /* Locale independent test for a decimal digit.
99 Argument can be 'char' or 'unsigned char'. (Whereas the argument of
100 <ctype.h> isdigit must be an 'unsigned char'.) */
101 #undef isdigit
102 #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
105 static int
106 named_arg_compare (const void *p1, const void *p2)
108 return strcmp (((const struct named_arg *) p1)->name,
109 ((const struct named_arg *) p2)->name);
112 #define INVALID_MIXES_NAMED_UNNAMED() \
113 xstrdup (_("The string refers to arguments both through argument names and through unnamed argument specifications."))
115 static void *
116 format_parse (const char *format, bool translated, char **invalid_reason)
118 struct spec spec;
119 struct spec *result;
121 spec.directives = 0;
122 spec.named_arg_count = 0;
123 spec.unnamed_arg_count = 0;
124 spec.allocated = 0;
125 spec.named = NULL;
126 spec.unnamed = NULL;
128 for (; *format != '\0';)
129 if (*format++ == '%')
131 /* A directive. */
132 char *name = NULL;
133 enum format_arg_type type;
135 spec.directives++;
137 if (*format == '(')
139 unsigned int depth;
140 const char *name_start;
141 const char *name_end;
142 size_t n;
144 name_start = ++format;
145 depth = 0;
146 for (; *format != '\0'; format++)
148 if (*format == '(')
149 depth++;
150 else if (*format == ')')
152 if (depth == 0)
153 break;
154 else
155 depth--;
158 if (*format == '\0')
160 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
161 goto bad_format;
163 name_end = format++;
165 n = name_end - name_start;
166 name = (char *) xmalloc (n + 1);
167 memcpy (name, name_start, n);
168 name[n] = '\0';
171 while (*format == '-' || *format == '+' || *format == ' '
172 || *format == '#' || *format == '0')
173 format++;
175 if (*format == '*')
177 format++;
179 /* Named and unnamed specifications are exclusive. */
180 if (spec.named_arg_count > 0)
182 *invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
183 goto bad_format;
186 if (spec.allocated == spec.unnamed_arg_count)
188 spec.allocated = 2 * spec.allocated + 1;
189 spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, spec.allocated * sizeof (struct unnamed_arg));
191 spec.unnamed[spec.unnamed_arg_count].type = FAT_INTEGER;
192 spec.unnamed_arg_count++;
194 else if (isdigit (*format))
196 do format++; while (isdigit (*format));
199 if (*format == '.')
201 format++;
203 if (*format == '*')
205 format++;
207 /* Named and unnamed specifications are exclusive. */
208 if (spec.named_arg_count > 0)
210 *invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
211 goto bad_format;
214 if (spec.allocated == spec.unnamed_arg_count)
216 spec.allocated = 2 * spec.allocated + 1;
217 spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, spec.allocated * sizeof (struct unnamed_arg));
219 spec.unnamed[spec.unnamed_arg_count].type = FAT_INTEGER;
220 spec.unnamed_arg_count++;
222 else if (isdigit (*format))
224 do format++; while (isdigit (*format));
228 if (*format == 'h' || *format == 'l' || *format == 'L')
229 format++;
231 switch (*format)
233 case '%':
234 type = FAT_ANY;
235 break;
236 case 'c':
237 type = FAT_CHARACTER;
238 break;
239 case 's': case 'r':
240 type = FAT_STRING;
241 break;
242 case 'i': case 'd': case 'u': case 'o': case 'x': case 'X':
243 type = FAT_INTEGER;
244 break;
245 case 'e': case 'E': case 'f': case 'g': case 'G':
246 type = FAT_FLOAT;
247 break;
248 default:
249 *invalid_reason =
250 (*format == '\0'
251 ? INVALID_UNTERMINATED_DIRECTIVE ()
252 : INVALID_CONVERSION_SPECIFIER (spec.directives, *format));
253 goto bad_format;
256 if (name != NULL)
258 /* Named argument. */
260 /* Named and unnamed specifications are exclusive. */
261 if (spec.unnamed_arg_count > 0)
263 *invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
264 goto bad_format;
267 if (spec.allocated == spec.named_arg_count)
269 spec.allocated = 2 * spec.allocated + 1;
270 spec.named = (struct named_arg *) xrealloc (spec.named, spec.allocated * sizeof (struct named_arg));
272 spec.named[spec.named_arg_count].name = name;
273 spec.named[spec.named_arg_count].type = type;
274 spec.named_arg_count++;
276 else if (*format != '%')
278 /* Unnamed argument. */
280 /* Named and unnamed specifications are exclusive. */
281 if (spec.named_arg_count > 0)
283 *invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
284 goto bad_format;
287 if (spec.allocated == spec.unnamed_arg_count)
289 spec.allocated = 2 * spec.allocated + 1;
290 spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, spec.allocated * sizeof (struct unnamed_arg));
292 spec.unnamed[spec.unnamed_arg_count].type = type;
293 spec.unnamed_arg_count++;
296 format++;
299 /* Sort the named argument array, and eliminate duplicates. */
300 if (spec.named_arg_count > 1)
302 unsigned int i, j;
303 bool err;
305 qsort (spec.named, spec.named_arg_count, sizeof (struct named_arg),
306 named_arg_compare);
308 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */
309 err = false;
310 for (i = j = 0; i < spec.named_arg_count; i++)
311 if (j > 0 && strcmp (spec.named[i].name, spec.named[j-1].name) == 0)
313 enum format_arg_type type1 = spec.named[i].type;
314 enum format_arg_type type2 = spec.named[j-1].type;
315 enum format_arg_type type_both;
317 if (type1 == type2 || type2 == FAT_ANY)
318 type_both = type1;
319 else if (type1 == FAT_ANY)
320 type_both = type2;
321 else
323 /* Incompatible types. */
324 type_both = FAT_NONE;
325 if (!err)
326 *invalid_reason =
327 xasprintf (_("The string refers to the argument named '%s' in incompatible ways."), spec.named[i].name);
328 err = true;
331 spec.named[j-1].type = type_both;
332 free (spec.named[i].name);
334 else
336 if (j < i)
338 spec.named[j].name = spec.named[i].name;
339 spec.named[j].type = spec.named[i].type;
341 j++;
343 spec.named_arg_count = j;
344 if (err)
345 /* *invalid_reason has already been set above. */
346 goto bad_format;
349 result = (struct spec *) xmalloc (sizeof (struct spec));
350 *result = spec;
351 return result;
353 bad_format:
354 if (spec.named != NULL)
356 unsigned int i;
357 for (i = 0; i < spec.named_arg_count; i++)
358 free (spec.named[i].name);
359 free (spec.named);
361 if (spec.unnamed != NULL)
362 free (spec.unnamed);
363 return NULL;
366 static void
367 format_free (void *descr)
369 struct spec *spec = (struct spec *) descr;
371 if (spec->named != NULL)
373 unsigned int i;
374 for (i = 0; i < spec->named_arg_count; i++)
375 free (spec->named[i].name);
376 free (spec->named);
378 if (spec->unnamed != NULL)
379 free (spec->unnamed);
380 free (spec);
383 static int
384 format_get_number_of_directives (void *descr)
386 struct spec *spec = (struct spec *) descr;
388 return spec->directives;
391 static bool
392 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
393 formatstring_error_logger_t error_logger,
394 const char *pretty_msgstr)
396 struct spec *spec1 = (struct spec *) msgid_descr;
397 struct spec *spec2 = (struct spec *) msgstr_descr;
398 bool err = false;
400 if (spec1->named_arg_count > 0 && spec2->unnamed_arg_count > 0)
402 if (error_logger)
403 error_logger (_("format specifications in 'msgid' expect a mapping, those in '%s' expect a tuple"),
404 pretty_msgstr);
405 err = true;
407 else if (spec1->unnamed_arg_count > 0 && spec2->named_arg_count > 0)
409 if (error_logger)
410 error_logger (_("format specifications in 'msgid' expect a tuple, those in '%s' expect a mapping"),
411 pretty_msgstr);
412 err = true;
414 else
416 if (spec1->named_arg_count + spec2->named_arg_count > 0)
418 unsigned int i, j;
419 unsigned int n1 = spec1->named_arg_count;
420 unsigned int n2 = spec2->named_arg_count;
422 /* Check the argument names are the same.
423 Both arrays are sorted. We search for the first difference. */
424 for (i = 0, j = 0; i < n1 || j < n2; )
426 int cmp = (i >= n1 ? 1 :
427 j >= n2 ? -1 :
428 strcmp (spec1->named[i].name, spec2->named[j].name));
430 if (cmp > 0)
432 if (error_logger)
433 error_logger (_("a format specification for argument '%s', as in '%s', doesn't exist in 'msgid'"),
434 spec2->named[j].name, pretty_msgstr);
435 err = true;
436 break;
438 else if (cmp < 0)
440 if (equality)
442 if (error_logger)
443 error_logger (_("a format specification for argument '%s' doesn't exist in '%s'"),
444 spec1->named[i].name, pretty_msgstr);
445 err = true;
446 break;
448 else
449 i++;
451 else
452 j++, i++;
454 /* Check the argument types are the same. */
455 if (!err)
456 for (i = 0, j = 0; j < n2; )
458 if (strcmp (spec1->named[i].name, spec2->named[j].name) == 0)
460 if (spec1->named[i].type != spec2->named[j].type)
462 if (error_logger)
463 error_logger (_("format specifications in 'msgid' and '%s' for argument '%s' are not the same"),
464 pretty_msgstr, spec2->named[j].name);
465 err = true;
466 break;
468 j++, i++;
470 else
471 i++;
475 if (spec1->unnamed_arg_count + spec2->unnamed_arg_count > 0)
477 unsigned int i;
479 /* Check the argument types are the same. */
480 if (equality
481 ? spec1->unnamed_arg_count != spec2->unnamed_arg_count
482 : spec1->unnamed_arg_count < spec2->unnamed_arg_count)
484 if (error_logger)
485 error_logger (_("number of format specifications in 'msgid' and '%s' does not match"),
486 pretty_msgstr);
487 err = true;
489 else
490 for (i = 0; i < spec2->unnamed_arg_count; i++)
491 if (spec1->unnamed[i].type != spec2->unnamed[i].type)
493 if (error_logger)
494 error_logger (_("format specifications in 'msgid' and '%s' for argument %u are not the same"),
495 pretty_msgstr, i + 1);
496 err = true;
501 return err;
505 struct formatstring_parser formatstring_python =
507 format_parse,
508 format_free,
509 format_get_number_of_directives,
510 format_check
514 #ifdef TEST
516 /* Test program: Print the argument list specification returned by
517 format_parse for strings read from standard input. */
519 #include <stdio.h>
520 #include "getline.h"
522 static void
523 format_print (void *descr)
525 struct spec *spec = (struct spec *) descr;
526 unsigned int i;
528 if (spec == NULL)
530 printf ("INVALID");
531 return;
534 if (spec->named_arg_count > 0)
536 if (spec->unnamed_arg_count > 0)
537 abort ();
539 printf ("{");
540 for (i = 0; i < spec->named_arg_count; i++)
542 if (i > 0)
543 printf (", ");
544 printf ("'%s':", spec->named[i].name);
545 switch (spec->named[i].type)
547 case FAT_ANY:
548 printf ("*");
549 break;
550 case FAT_CHARACTER:
551 printf ("c");
552 break;
553 case FAT_STRING:
554 printf ("s");
555 break;
556 case FAT_INTEGER:
557 printf ("i");
558 break;
559 case FAT_FLOAT:
560 printf ("f");
561 break;
562 default:
563 abort ();
566 printf ("}");
568 else
570 printf ("(");
571 for (i = 0; i < spec->unnamed_arg_count; i++)
573 if (i > 0)
574 printf (" ");
575 switch (spec->unnamed[i].type)
577 case FAT_ANY:
578 printf ("*");
579 break;
580 case FAT_CHARACTER:
581 printf ("c");
582 break;
583 case FAT_STRING:
584 printf ("s");
585 break;
586 case FAT_INTEGER:
587 printf ("i");
588 break;
589 case FAT_FLOAT:
590 printf ("f");
591 break;
592 default:
593 abort ();
596 printf (")");
601 main ()
603 for (;;)
605 char *line = NULL;
606 size_t line_size = 0;
607 int line_len;
608 char *invalid_reason;
609 void *descr;
611 line_len = getline (&line, &line_size, stdin);
612 if (line_len < 0)
613 break;
614 if (line_len > 0 && line[line_len - 1] == '\n')
615 line[--line_len] = '\0';
617 invalid_reason = NULL;
618 descr = format_parse (line, false, &invalid_reason);
620 format_print (descr);
621 printf ("\n");
622 if (descr == NULL)
623 printf ("%s\n", invalid_reason);
625 free (invalid_reason);
626 free (line);
629 return 0;
633 * For Emacs M-x compile
634 * Local Variables:
635 * compile-command: "/bin/sh ../libtool --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-python.c ../lib/libgettextlib.la"
636 * End:
639 #endif /* TEST */