No empty .Rs/.Re
[netbsd-mini2440.git] / gnu / dist / gettext / gettext-tools / src / format-java.c
blobc43404b944ab03c7d518fdd0510d849463c6442c
1 /* Java format strings.
2 Copyright (C) 2001-2004 Free Software Foundation, Inc.
3 Written by Bruno Haible <haible@clisp.cons.org>, 2001.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22 #include <alloca.h>
24 #include <stdbool.h>
25 #include <stdlib.h>
26 #include <string.h>
28 #include "format.h"
29 #include "c-ctype.h"
30 #include "xalloc.h"
31 #include "xallocsa.h"
32 #include "xerror.h"
33 #include "format-invalid.h"
34 #include "gettext.h"
36 #define _(str) gettext (str)
38 /* Java format strings are described in java/text/MessageFormat.html.
39 See also the ICU documentation class_MessageFormat.html.
41 messageFormatPattern := string ( "{" messageFormatElement "}" string )*
43 messageFormatElement := argument { "," elementFormat }
45 elementFormat := "time" { "," datetimeStyle }
46 | "date" { "," datetimeStyle }
47 | "number" { "," numberStyle }
48 | "choice" { "," choiceStyle }
50 datetimeStyle := "short"
51 | "medium"
52 | "long"
53 | "full"
54 | dateFormatPattern
56 numberStyle := "currency"
57 | "percent"
58 | "integer"
59 | numberFormatPattern
61 choiceStyle := choiceFormatPattern
63 dateFormatPattern see SimpleDateFormat.applyPattern
65 numberFormatPattern see DecimalFormat.applyPattern
67 choiceFormatPattern see ChoiceFormat constructor
69 In strings, literal curly braces can be used if quoted between single
70 quotes. A real single quote is represented by ''.
72 If a pattern is used, then unquoted braces in the pattern, if any, must
73 match: that is, "ab {0} de" and "ab '}' de" are ok, but "ab {0'}' de" and
74 "ab } de" are not.
76 The argument is a number from 0 to 9, which corresponds to the arguments
77 presented in an array to be formatted.
79 It is ok to have unused arguments in the array.
81 Adding a dateFormatPattern / numberFormatPattern / choiceFormatPattern
82 to an elementFormat is equivalent to creating a SimpleDateFormat /
83 DecimalFormat / ChoiceFormat and use of setFormat. For example,
85 MessageFormat form =
86 new MessageFormat("The disk \"{1}\" contains {0,choice,0#no files|1#one file|2#{0,number} files}.");
88 is equivalent to
90 MessageFormat form = new MessageFormat("The disk \"{1}\" contains {0}.");
91 form.setFormat(1, // Number of {} occurrence in the string!
92 new ChoiceFormat(new double[] { 0, 1, 2 },
93 new String[] { "no files", "one file",
94 "{0,number} files" }));
96 Note: The behaviour of quotes inside a choiceFormatPattern is not clear.
97 Example 1:
98 "abc{1,choice,0#{1,number,00';'000}}def"
99 JDK 1.1.x: exception
100 JDK 1.3.x: behaves like "abc{1,choice,0#{1,number,00;000}}def"
101 Example 2:
102 "abc{1,choice,0#{1,number,00';'}}def"
103 JDK 1.1.x: interprets the semicolon as number suffix
104 JDK 1.3.x: behaves like "abc{1,choice,0#{1,number,00;}}def"
107 enum format_arg_type
109 FAT_NONE,
110 FAT_OBJECT, /* java.lang.Object */
111 FAT_NUMBER, /* java.lang.Number */
112 FAT_DATE /* java.util.Date */
115 struct numbered_arg
117 unsigned int number;
118 enum format_arg_type type;
121 struct spec
123 unsigned int directives;
124 unsigned int numbered_arg_count;
125 unsigned int allocated;
126 struct numbered_arg *numbered;
130 /* Forward declaration of local functions. */
131 static bool date_format_parse (const char *format);
132 static bool number_format_parse (const char *format);
133 static bool choice_format_parse (const char *format, struct spec *spec,
134 char **invalid_reason);
137 /* Quote handling:
138 - When we see a single-quote, ignore it, but toggle the quoting flag.
139 - When we see a double single-quote, ignore the first of the two.
140 Assumes local variables format, quoting. */
141 #define HANDLE_QUOTE \
142 if (*format == '\'' && *++format != '\'') \
143 quoting = !quoting;
145 /* Note that message_format_parse and choice_format_parse are mutually
146 recursive. This is because MessageFormat can use some ChoiceFormats,
147 and a ChoiceFormat is made up from several MessageFormats. */
149 /* Return true if a format is a valid messageFormatPattern.
150 Extracts argument type information into spec. */
151 static bool
152 message_format_parse (const char *format, struct spec *spec,
153 char **invalid_reason)
155 bool quoting = false;
157 for (;;)
159 HANDLE_QUOTE;
160 if (!quoting && *format == '{')
162 unsigned int depth;
163 const char *element_start;
164 const char *element_end;
165 size_t n;
166 char *element_alloced;
167 char *element;
168 unsigned int number;
169 enum format_arg_type type;
171 spec->directives++;
173 element_start = ++format;
174 depth = 0;
175 for (; *format != '\0'; format++)
177 if (*format == '{')
178 depth++;
179 else if (*format == '}')
181 if (depth == 0)
182 break;
183 else
184 depth--;
187 if (*format == '\0')
189 *invalid_reason =
190 xstrdup (_("The string ends in the middle of a directive: found '{' without matching '}'."));
191 return false;
193 element_end = format++;
195 n = element_end - element_start;
196 element = element_alloced = (char *) xallocsa (n + 1);
197 memcpy (element, element_start, n);
198 element[n] = '\0';
200 if (!c_isdigit (*element))
202 *invalid_reason =
203 xasprintf (_("In the directive number %u, '{' is not followed by an argument number."), spec->directives);
204 freesa (element_alloced);
205 return false;
207 number = 0;
210 number = 10 * number + (*element - '0');
211 element++;
213 while (c_isdigit (*element));
215 type = FAT_OBJECT;
216 if (*element == '\0')
218 else if (strncmp (element, ",time", 5) == 0
219 || strncmp (element, ",date", 5) == 0)
221 type = FAT_DATE;
222 element += 5;
223 if (*element == '\0')
225 else if (*element == ',')
227 element++;
228 if (strcmp (element, "short") == 0
229 || strcmp (element, "medium") == 0
230 || strcmp (element, "long") == 0
231 || strcmp (element, "full") == 0
232 || date_format_parse (element))
234 else
236 *invalid_reason =
237 xasprintf (_("In the directive number %u, the substring \"%s\" is not a valid date/time style."), spec->directives, element);
238 freesa (element_alloced);
239 return false;
242 else
244 *element = '\0';
245 element -= 4;
246 *invalid_reason =
247 xasprintf (_("In the directive number %u, \"%s\" is not followed by a comma."), spec->directives, element);
248 freesa (element_alloced);
249 return false;
252 else if (strncmp (element, ",number", 7) == 0)
254 type = FAT_NUMBER;
255 element += 7;
256 if (*element == '\0')
258 else if (*element == ',')
260 element++;
261 if (strcmp (element, "currency") == 0
262 || strcmp (element, "percent") == 0
263 || strcmp (element, "integer") == 0
264 || number_format_parse (element))
266 else
268 *invalid_reason =
269 xasprintf (_("In the directive number %u, the substring \"%s\" is not a valid number style."), spec->directives, element);
270 freesa (element_alloced);
271 return false;
274 else
276 *element = '\0';
277 element -= 6;
278 *invalid_reason =
279 xasprintf (_("In the directive number %u, \"%s\" is not followed by a comma."), spec->directives, element);
280 freesa (element_alloced);
281 return false;
284 else if (strncmp (element, ",choice", 7) == 0)
286 type = FAT_NUMBER; /* because ChoiceFormat extends NumberFormat */
287 element += 7;
288 if (*element == '\0')
290 else if (*element == ',')
292 element++;
293 if (choice_format_parse (element, spec, invalid_reason))
295 else
297 freesa (element_alloced);
298 return false;
301 else
303 *element = '\0';
304 element -= 6;
305 *invalid_reason =
306 xasprintf (_("In the directive number %u, \"%s\" is not followed by a comma."), spec->directives, element);
307 freesa (element_alloced);
308 return false;
311 else
313 *invalid_reason =
314 xasprintf (_("In the directive number %u, the argument number is not followed by a comma and one of \"%s\", \"%s\", \"%s\", \"%s\"."), spec->directives, "time", "date", "number", "choice");
315 freesa (element_alloced);
316 return false;
318 freesa (element_alloced);
320 if (spec->allocated == spec->numbered_arg_count)
322 spec->allocated = 2 * spec->allocated + 1;
323 spec->numbered = (struct numbered_arg *) xrealloc (spec->numbered, spec->allocated * sizeof (struct numbered_arg));
325 spec->numbered[spec->numbered_arg_count].number = number;
326 spec->numbered[spec->numbered_arg_count].type = type;
327 spec->numbered_arg_count++;
329 /* The doc says "ab}de" is invalid. Even though JDK accepts it. */
330 else if (!quoting && *format == '}')
332 *invalid_reason =
333 xstrdup (_("The string starts in the middle of a directive: found '}' without matching '{'."));
334 return false;
336 else if (*format != '\0')
337 format++;
338 else
339 break;
342 return true;
345 /* Return true if a format is a valid dateFormatPattern. */
346 static bool
347 date_format_parse (const char *format)
349 /* Any string is valid. Single-quote starts a quoted section, to be
350 terminated at the next single-quote or string end. Double single-quote
351 gives a single single-quote. Non-quoted ASCII letters are first grouped
352 into blocks of equal letters. Then each block (e.g. 'yyyy') is
353 interpreted according to some rules. */
354 return true;
357 /* Return true if a format is a valid numberFormatPattern. */
358 static bool
359 number_format_parse (const char *format)
361 /* Pattern Syntax:
362 pattern := pos_pattern{';' neg_pattern}
363 pos_pattern := {prefix}number{suffix}
364 neg_pattern := {prefix}number{suffix}
365 number := integer{'.' fraction}{exponent}
366 prefix := '\u0000'..'\uFFFD' - special_characters
367 suffix := '\u0000'..'\uFFFD' - special_characters
368 integer := min_int | '#' | '#' integer | '#' ',' integer
369 min_int := '0' | '0' min_int | '0' ',' min_int
370 fraction := '0'* '#'*
371 exponent := 'E' '0' '0'*
372 Notation:
373 X* 0 or more instances of X
374 { X } 0 or 1 instances of X
375 X | Y either X or Y
376 X..Y any character from X up to Y, inclusive
377 S - T characters in S, except those in T
378 Single-quote starts a quoted section, to be terminated at the next
379 single-quote or string end. Double single-quote gives a single
380 single-quote.
382 bool quoting = false;
383 bool seen_semicolon = false;
385 HANDLE_QUOTE;
386 for (;;)
388 /* Parse prefix. */
389 while (*format != '\0'
390 && !(!quoting && (*format == '0' || *format == '#')))
392 if (format[0] == '\\')
394 if (format[1] == 'u'
395 && c_isxdigit (format[2])
396 && c_isxdigit (format[3])
397 && c_isxdigit (format[4])
398 && c_isxdigit (format[5]))
399 format += 6;
400 else
401 format += 2;
403 else
404 format += 1;
405 HANDLE_QUOTE;
408 /* Parse integer. */
409 if (!(!quoting && (*format == '0' || *format == '#')))
410 return false;
411 while (!quoting && *format == '#')
413 format++;
414 HANDLE_QUOTE;
415 if (!quoting && *format == ',')
417 format++;
418 HANDLE_QUOTE;
421 while (!quoting && *format == '0')
423 format++;
424 HANDLE_QUOTE;
425 if (!quoting && *format == ',')
427 format++;
428 HANDLE_QUOTE;
432 /* Parse fraction. */
433 if (!quoting && *format == '.')
435 format++;
436 HANDLE_QUOTE;
437 while (!quoting && *format == '0')
439 format++;
440 HANDLE_QUOTE;
442 while (!quoting && *format == '#')
444 format++;
445 HANDLE_QUOTE;
449 /* Parse exponent. */
450 if (!quoting && *format == 'E')
452 const char *format_save = format;
453 format++;
454 HANDLE_QUOTE;
455 if (!quoting && *format == '0')
459 format++;
460 HANDLE_QUOTE;
462 while (!quoting && *format == '0');
464 else
466 /* Back up. */
467 format = format_save;
468 quoting = false;
472 /* Parse suffix. */
473 while (*format != '\0'
474 && (seen_semicolon || !(!quoting && *format == ';')))
476 if (format[0] == '\\')
478 if (format[1] == 'u'
479 && c_isxdigit (format[2])
480 && c_isxdigit (format[3])
481 && c_isxdigit (format[4])
482 && c_isxdigit (format[5]))
483 format += 6;
484 else
485 format += 2;
487 else
488 format += 1;
489 HANDLE_QUOTE;
492 if (seen_semicolon || !(!quoting && *format == ';'))
493 break;
496 return (*format == '\0');
499 /* Return true if a format is a valid choiceFormatPattern.
500 Extracts argument type information into spec. */
501 static bool
502 choice_format_parse (const char *format, struct spec *spec,
503 char **invalid_reason)
505 /* Pattern syntax:
506 pattern := | choice | choice '|' pattern
507 choice := number separator messageformat
508 separator := '<' | '#' | '\u2264'
509 Single-quote starts a quoted section, to be terminated at the next
510 single-quote or string end. Double single-quote gives a single
511 single-quote.
513 bool quoting = false;
515 HANDLE_QUOTE;
516 if (*format == '\0')
517 return true;
518 for (;;)
520 /* Don't bother looking too precisely into the syntax of the number.
521 It can contain various Unicode characters. */
522 bool number_nonempty;
523 char *msgformat;
524 char *mp;
525 bool msgformat_valid;
527 /* Parse number. */
528 number_nonempty = false;
529 while (*format != '\0'
530 && !(!quoting && (*format == '<' || *format == '#'
531 || strncmp (format, "\\u2264", 6) == 0
532 || *format == '|')))
534 if (format[0] == '\\')
536 if (format[1] == 'u'
537 && c_isxdigit (format[2])
538 && c_isxdigit (format[3])
539 && c_isxdigit (format[4])
540 && c_isxdigit (format[5]))
541 format += 6;
542 else
543 format += 2;
545 else
546 format += 1;
547 number_nonempty = true;
548 HANDLE_QUOTE;
551 /* Short clause at end of pattern is valid and is ignored! */
552 if (*format == '\0')
553 break;
555 if (!number_nonempty)
557 *invalid_reason =
558 xasprintf (_("In the directive number %u, a choice contains no number."), spec->directives);
559 return false;
562 if (*format == '<' || *format == '#')
563 format += 1;
564 else if (strncmp (format, "\\u2264", 6) == 0)
565 format += 6;
566 else
568 *invalid_reason =
569 xasprintf (_("In the directive number %u, a choice contains a number that is not followed by '<', '#' or '%s'."), spec->directives, "\\u2264");
570 return false;
572 HANDLE_QUOTE;
574 msgformat = (char *) xallocsa (strlen (format) + 1);
575 mp = msgformat;
577 while (*format != '\0' && !(!quoting && *format == '|'))
579 *mp++ = *format++;
580 HANDLE_QUOTE;
582 *mp = '\0';
584 msgformat_valid = message_format_parse (msgformat, spec, invalid_reason);
586 freesa (msgformat);
588 if (!msgformat_valid)
589 return false;
591 if (*format == '\0')
592 break;
594 format++;
595 HANDLE_QUOTE;
598 return true;
601 static int
602 numbered_arg_compare (const void *p1, const void *p2)
604 unsigned int n1 = ((const struct numbered_arg *) p1)->number;
605 unsigned int n2 = ((const struct numbered_arg *) p2)->number;
607 return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
610 static void *
611 format_parse (const char *format, bool translated, char **invalid_reason)
613 struct spec spec;
614 struct spec *result;
616 spec.directives = 0;
617 spec.numbered_arg_count = 0;
618 spec.allocated = 0;
619 spec.numbered = NULL;
621 if (!message_format_parse (format, &spec, invalid_reason))
622 goto bad_format;
624 /* Sort the numbered argument array, and eliminate duplicates. */
625 if (spec.numbered_arg_count > 1)
627 unsigned int i, j;
628 bool err;
630 qsort (spec.numbered, spec.numbered_arg_count,
631 sizeof (struct numbered_arg), numbered_arg_compare);
633 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */
634 err = false;
635 for (i = j = 0; i < spec.numbered_arg_count; i++)
636 if (j > 0 && spec.numbered[i].number == spec.numbered[j-1].number)
638 enum format_arg_type type1 = spec.numbered[i].type;
639 enum format_arg_type type2 = spec.numbered[j-1].type;
640 enum format_arg_type type_both;
642 if (type1 == type2 || type2 == FAT_OBJECT)
643 type_both = type1;
644 else if (type1 == FAT_OBJECT)
645 type_both = type2;
646 else
648 /* Incompatible types. */
649 type_both = FAT_NONE;
650 if (!err)
651 *invalid_reason =
652 INVALID_INCOMPATIBLE_ARG_TYPES (spec.numbered[i].number);
653 err = true;
656 spec.numbered[j-1].type = type_both;
658 else
660 if (j < i)
662 spec.numbered[j].number = spec.numbered[i].number;
663 spec.numbered[j].type = spec.numbered[i].type;
665 j++;
667 spec.numbered_arg_count = j;
668 if (err)
669 /* *invalid_reason has already been set above. */
670 goto bad_format;
673 result = (struct spec *) xmalloc (sizeof (struct spec));
674 *result = spec;
675 return result;
677 bad_format:
678 if (spec.numbered != NULL)
679 free (spec.numbered);
680 return NULL;
683 static void
684 format_free (void *descr)
686 struct spec *spec = (struct spec *) descr;
688 if (spec->numbered != NULL)
689 free (spec->numbered);
690 free (spec);
693 static int
694 format_get_number_of_directives (void *descr)
696 struct spec *spec = (struct spec *) descr;
698 return spec->directives;
701 static bool
702 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
703 formatstring_error_logger_t error_logger,
704 const char *pretty_msgstr)
706 struct spec *spec1 = (struct spec *) msgid_descr;
707 struct spec *spec2 = (struct spec *) msgstr_descr;
708 bool err = false;
710 if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
712 unsigned int i, j;
713 unsigned int n1 = spec1->numbered_arg_count;
714 unsigned int n2 = spec2->numbered_arg_count;
716 /* Check the argument names are the same.
717 Both arrays are sorted. We search for the first difference. */
718 for (i = 0, j = 0; i < n1 || j < n2; )
720 int cmp = (i >= n1 ? 1 :
721 j >= n2 ? -1 :
722 spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
723 spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
726 if (cmp > 0)
728 if (error_logger)
729 error_logger (_("a format specification for argument {%u}, as in '%s', doesn't exist in 'msgid'"),
730 spec2->numbered[j].number, pretty_msgstr);
731 err = true;
732 break;
734 else if (cmp < 0)
736 if (equality)
738 if (error_logger)
739 error_logger (_("a format specification for argument {%u} doesn't exist in '%s'"),
740 spec1->numbered[i].number, pretty_msgstr);
741 err = true;
742 break;
744 else
745 i++;
747 else
748 j++, i++;
750 /* Check the argument types are the same. */
751 if (!err)
752 for (i = 0, j = 0; j < n2; )
754 if (spec1->numbered[i].number == spec2->numbered[j].number)
756 if (spec1->numbered[i].type != spec2->numbered[j].type)
758 if (error_logger)
759 error_logger (_("format specifications in 'msgid' and '%s' for argument {%u} are not the same"),
760 pretty_msgstr, spec2->numbered[j].number);
761 err = true;
762 break;
764 j++, i++;
766 else
767 i++;
771 return err;
775 struct formatstring_parser formatstring_java =
777 format_parse,
778 format_free,
779 format_get_number_of_directives,
780 format_check
784 #ifdef TEST
786 /* Test program: Print the argument list specification returned by
787 format_parse for strings read from standard input. */
789 #include <stdio.h>
790 #include "getline.h"
792 static void
793 format_print (void *descr)
795 struct spec *spec = (struct spec *) descr;
796 unsigned int last;
797 unsigned int i;
799 if (spec == NULL)
801 printf ("INVALID");
802 return;
805 printf ("(");
806 last = 0;
807 for (i = 0; i < spec->numbered_arg_count; i++)
809 unsigned int number = spec->numbered[i].number;
811 if (i > 0)
812 printf (" ");
813 if (number < last)
814 abort ();
815 for (; last < number; last++)
816 printf ("_ ");
817 switch (spec->numbered[i].type)
819 case FAT_OBJECT:
820 printf ("*");
821 break;
822 case FAT_NUMBER:
823 printf ("Number");
824 break;
825 case FAT_DATE:
826 printf ("Date");
827 break;
828 default:
829 abort ();
831 last = number + 1;
833 printf (")");
837 main ()
839 for (;;)
841 char *line = NULL;
842 size_t line_size = 0;
843 int line_len;
844 char *invalid_reason;
845 void *descr;
847 line_len = getline (&line, &line_size, stdin);
848 if (line_len < 0)
849 break;
850 if (line_len > 0 && line[line_len - 1] == '\n')
851 line[--line_len] = '\0';
853 invalid_reason = NULL;
854 descr = format_parse (line, false, &invalid_reason);
856 format_print (descr);
857 printf ("\n");
858 if (descr == NULL)
859 printf ("%s\n", invalid_reason);
861 free (invalid_reason);
862 free (line);
865 return 0;
869 * For Emacs M-x compile
870 * Local Variables:
871 * compile-command: "/bin/sh ../libtool --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-java.c ../lib/libgettextlib.la"
872 * End:
875 #endif /* TEST */