1 /* Java format strings.
2 Copyright (C) 2001-2004 Free Software Foundation, Inc.
3 Written by Bruno Haible <haible@clisp.cons.org>, 2001.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
33 #include "format-invalid.h"
36 #define _(str) gettext (str)
38 /* Java format strings are described in java/text/MessageFormat.html.
39 See also the ICU documentation class_MessageFormat.html.
41 messageFormatPattern := string ( "{" messageFormatElement "}" string )*
43 messageFormatElement := argument { "," elementFormat }
45 elementFormat := "time" { "," datetimeStyle }
46 | "date" { "," datetimeStyle }
47 | "number" { "," numberStyle }
48 | "choice" { "," choiceStyle }
50 datetimeStyle := "short"
56 numberStyle := "currency"
61 choiceStyle := choiceFormatPattern
63 dateFormatPattern see SimpleDateFormat.applyPattern
65 numberFormatPattern see DecimalFormat.applyPattern
67 choiceFormatPattern see ChoiceFormat constructor
69 In strings, literal curly braces can be used if quoted between single
70 quotes. A real single quote is represented by ''.
72 If a pattern is used, then unquoted braces in the pattern, if any, must
73 match: that is, "ab {0} de" and "ab '}' de" are ok, but "ab {0'}' de" and
76 The argument is a number from 0 to 9, which corresponds to the arguments
77 presented in an array to be formatted.
79 It is ok to have unused arguments in the array.
81 Adding a dateFormatPattern / numberFormatPattern / choiceFormatPattern
82 to an elementFormat is equivalent to creating a SimpleDateFormat /
83 DecimalFormat / ChoiceFormat and use of setFormat. For example,
86 new MessageFormat("The disk \"{1}\" contains {0,choice,0#no files|1#one file|2#{0,number} files}.");
90 MessageFormat form = new MessageFormat("The disk \"{1}\" contains {0}.");
91 form.setFormat(1, // Number of {} occurrence in the string!
92 new ChoiceFormat(new double[] { 0, 1, 2 },
93 new String[] { "no files", "one file",
94 "{0,number} files" }));
96 Note: The behaviour of quotes inside a choiceFormatPattern is not clear.
98 "abc{1,choice,0#{1,number,00';'000}}def"
100 JDK 1.3.x: behaves like "abc{1,choice,0#{1,number,00;000}}def"
102 "abc{1,choice,0#{1,number,00';'}}def"
103 JDK 1.1.x: interprets the semicolon as number suffix
104 JDK 1.3.x: behaves like "abc{1,choice,0#{1,number,00;}}def"
110 FAT_OBJECT
, /* java.lang.Object */
111 FAT_NUMBER
, /* java.lang.Number */
112 FAT_DATE
/* java.util.Date */
118 enum format_arg_type type
;
123 unsigned int directives
;
124 unsigned int numbered_arg_count
;
125 unsigned int allocated
;
126 struct numbered_arg
*numbered
;
130 /* Forward declaration of local functions. */
131 static bool date_format_parse (const char *format
);
132 static bool number_format_parse (const char *format
);
133 static bool choice_format_parse (const char *format
, struct spec
*spec
,
134 char **invalid_reason
);
138 - When we see a single-quote, ignore it, but toggle the quoting flag.
139 - When we see a double single-quote, ignore the first of the two.
140 Assumes local variables format, quoting. */
141 #define HANDLE_QUOTE \
142 if (*format == '\'' && *++format != '\'') \
145 /* Note that message_format_parse and choice_format_parse are mutually
146 recursive. This is because MessageFormat can use some ChoiceFormats,
147 and a ChoiceFormat is made up from several MessageFormats. */
149 /* Return true if a format is a valid messageFormatPattern.
150 Extracts argument type information into spec. */
152 message_format_parse (const char *format
, struct spec
*spec
,
153 char **invalid_reason
)
155 bool quoting
= false;
160 if (!quoting
&& *format
== '{')
163 const char *element_start
;
164 const char *element_end
;
166 char *element_alloced
;
169 enum format_arg_type type
;
173 element_start
= ++format
;
175 for (; *format
!= '\0'; format
++)
179 else if (*format
== '}')
190 xstrdup (_("The string ends in the middle of a directive: found '{' without matching '}'."));
193 element_end
= format
++;
195 n
= element_end
- element_start
;
196 element
= element_alloced
= (char *) xallocsa (n
+ 1);
197 memcpy (element
, element_start
, n
);
200 if (!c_isdigit (*element
))
203 xasprintf (_("In the directive number %u, '{' is not followed by an argument number."), spec
->directives
);
204 freesa (element_alloced
);
210 number
= 10 * number
+ (*element
- '0');
213 while (c_isdigit (*element
));
216 if (*element
== '\0')
218 else if (strncmp (element
, ",time", 5) == 0
219 || strncmp (element
, ",date", 5) == 0)
223 if (*element
== '\0')
225 else if (*element
== ',')
228 if (strcmp (element
, "short") == 0
229 || strcmp (element
, "medium") == 0
230 || strcmp (element
, "long") == 0
231 || strcmp (element
, "full") == 0
232 || date_format_parse (element
))
237 xasprintf (_("In the directive number %u, the substring \"%s\" is not a valid date/time style."), spec
->directives
, element
);
238 freesa (element_alloced
);
247 xasprintf (_("In the directive number %u, \"%s\" is not followed by a comma."), spec
->directives
, element
);
248 freesa (element_alloced
);
252 else if (strncmp (element
, ",number", 7) == 0)
256 if (*element
== '\0')
258 else if (*element
== ',')
261 if (strcmp (element
, "currency") == 0
262 || strcmp (element
, "percent") == 0
263 || strcmp (element
, "integer") == 0
264 || number_format_parse (element
))
269 xasprintf (_("In the directive number %u, the substring \"%s\" is not a valid number style."), spec
->directives
, element
);
270 freesa (element_alloced
);
279 xasprintf (_("In the directive number %u, \"%s\" is not followed by a comma."), spec
->directives
, element
);
280 freesa (element_alloced
);
284 else if (strncmp (element
, ",choice", 7) == 0)
286 type
= FAT_NUMBER
; /* because ChoiceFormat extends NumberFormat */
288 if (*element
== '\0')
290 else if (*element
== ',')
293 if (choice_format_parse (element
, spec
, invalid_reason
))
297 freesa (element_alloced
);
306 xasprintf (_("In the directive number %u, \"%s\" is not followed by a comma."), spec
->directives
, element
);
307 freesa (element_alloced
);
314 xasprintf (_("In the directive number %u, the argument number is not followed by a comma and one of \"%s\", \"%s\", \"%s\", \"%s\"."), spec
->directives
, "time", "date", "number", "choice");
315 freesa (element_alloced
);
318 freesa (element_alloced
);
320 if (spec
->allocated
== spec
->numbered_arg_count
)
322 spec
->allocated
= 2 * spec
->allocated
+ 1;
323 spec
->numbered
= (struct numbered_arg
*) xrealloc (spec
->numbered
, spec
->allocated
* sizeof (struct numbered_arg
));
325 spec
->numbered
[spec
->numbered_arg_count
].number
= number
;
326 spec
->numbered
[spec
->numbered_arg_count
].type
= type
;
327 spec
->numbered_arg_count
++;
329 /* The doc says "ab}de" is invalid. Even though JDK accepts it. */
330 else if (!quoting
&& *format
== '}')
333 xstrdup (_("The string starts in the middle of a directive: found '}' without matching '{'."));
336 else if (*format
!= '\0')
345 /* Return true if a format is a valid dateFormatPattern. */
347 date_format_parse (const char *format
)
349 /* Any string is valid. Single-quote starts a quoted section, to be
350 terminated at the next single-quote or string end. Double single-quote
351 gives a single single-quote. Non-quoted ASCII letters are first grouped
352 into blocks of equal letters. Then each block (e.g. 'yyyy') is
353 interpreted according to some rules. */
357 /* Return true if a format is a valid numberFormatPattern. */
359 number_format_parse (const char *format
)
362 pattern := pos_pattern{';' neg_pattern}
363 pos_pattern := {prefix}number{suffix}
364 neg_pattern := {prefix}number{suffix}
365 number := integer{'.' fraction}{exponent}
366 prefix := '\u0000'..'\uFFFD' - special_characters
367 suffix := '\u0000'..'\uFFFD' - special_characters
368 integer := min_int | '#' | '#' integer | '#' ',' integer
369 min_int := '0' | '0' min_int | '0' ',' min_int
370 fraction := '0'* '#'*
371 exponent := 'E' '0' '0'*
373 X* 0 or more instances of X
374 { X } 0 or 1 instances of X
376 X..Y any character from X up to Y, inclusive
377 S - T characters in S, except those in T
378 Single-quote starts a quoted section, to be terminated at the next
379 single-quote or string end. Double single-quote gives a single
382 bool quoting
= false;
383 bool seen_semicolon
= false;
389 while (*format
!= '\0'
390 && !(!quoting
&& (*format
== '0' || *format
== '#')))
392 if (format
[0] == '\\')
395 && c_isxdigit (format
[2])
396 && c_isxdigit (format
[3])
397 && c_isxdigit (format
[4])
398 && c_isxdigit (format
[5]))
409 if (!(!quoting
&& (*format
== '0' || *format
== '#')))
411 while (!quoting
&& *format
== '#')
415 if (!quoting
&& *format
== ',')
421 while (!quoting
&& *format
== '0')
425 if (!quoting
&& *format
== ',')
432 /* Parse fraction. */
433 if (!quoting
&& *format
== '.')
437 while (!quoting
&& *format
== '0')
442 while (!quoting
&& *format
== '#')
449 /* Parse exponent. */
450 if (!quoting
&& *format
== 'E')
452 const char *format_save
= format
;
455 if (!quoting
&& *format
== '0')
462 while (!quoting
&& *format
== '0');
467 format
= format_save
;
473 while (*format
!= '\0'
474 && (seen_semicolon
|| !(!quoting
&& *format
== ';')))
476 if (format
[0] == '\\')
479 && c_isxdigit (format
[2])
480 && c_isxdigit (format
[3])
481 && c_isxdigit (format
[4])
482 && c_isxdigit (format
[5]))
492 if (seen_semicolon
|| !(!quoting
&& *format
== ';'))
496 return (*format
== '\0');
499 /* Return true if a format is a valid choiceFormatPattern.
500 Extracts argument type information into spec. */
502 choice_format_parse (const char *format
, struct spec
*spec
,
503 char **invalid_reason
)
506 pattern := | choice | choice '|' pattern
507 choice := number separator messageformat
508 separator := '<' | '#' | '\u2264'
509 Single-quote starts a quoted section, to be terminated at the next
510 single-quote or string end. Double single-quote gives a single
513 bool quoting
= false;
520 /* Don't bother looking too precisely into the syntax of the number.
521 It can contain various Unicode characters. */
522 bool number_nonempty
;
525 bool msgformat_valid
;
528 number_nonempty
= false;
529 while (*format
!= '\0'
530 && !(!quoting
&& (*format
== '<' || *format
== '#'
531 || strncmp (format
, "\\u2264", 6) == 0
534 if (format
[0] == '\\')
537 && c_isxdigit (format
[2])
538 && c_isxdigit (format
[3])
539 && c_isxdigit (format
[4])
540 && c_isxdigit (format
[5]))
547 number_nonempty
= true;
551 /* Short clause at end of pattern is valid and is ignored! */
555 if (!number_nonempty
)
558 xasprintf (_("In the directive number %u, a choice contains no number."), spec
->directives
);
562 if (*format
== '<' || *format
== '#')
564 else if (strncmp (format
, "\\u2264", 6) == 0)
569 xasprintf (_("In the directive number %u, a choice contains a number that is not followed by '<', '#' or '%s'."), spec
->directives
, "\\u2264");
574 msgformat
= (char *) xallocsa (strlen (format
) + 1);
577 while (*format
!= '\0' && !(!quoting
&& *format
== '|'))
584 msgformat_valid
= message_format_parse (msgformat
, spec
, invalid_reason
);
588 if (!msgformat_valid
)
602 numbered_arg_compare (const void *p1
, const void *p2
)
604 unsigned int n1
= ((const struct numbered_arg
*) p1
)->number
;
605 unsigned int n2
= ((const struct numbered_arg
*) p2
)->number
;
607 return (n1
> n2
? 1 : n1
< n2
? -1 : 0);
611 format_parse (const char *format
, bool translated
, char **invalid_reason
)
617 spec
.numbered_arg_count
= 0;
619 spec
.numbered
= NULL
;
621 if (!message_format_parse (format
, &spec
, invalid_reason
))
624 /* Sort the numbered argument array, and eliminate duplicates. */
625 if (spec
.numbered_arg_count
> 1)
630 qsort (spec
.numbered
, spec
.numbered_arg_count
,
631 sizeof (struct numbered_arg
), numbered_arg_compare
);
633 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */
635 for (i
= j
= 0; i
< spec
.numbered_arg_count
; i
++)
636 if (j
> 0 && spec
.numbered
[i
].number
== spec
.numbered
[j
-1].number
)
638 enum format_arg_type type1
= spec
.numbered
[i
].type
;
639 enum format_arg_type type2
= spec
.numbered
[j
-1].type
;
640 enum format_arg_type type_both
;
642 if (type1
== type2
|| type2
== FAT_OBJECT
)
644 else if (type1
== FAT_OBJECT
)
648 /* Incompatible types. */
649 type_both
= FAT_NONE
;
652 INVALID_INCOMPATIBLE_ARG_TYPES (spec
.numbered
[i
].number
);
656 spec
.numbered
[j
-1].type
= type_both
;
662 spec
.numbered
[j
].number
= spec
.numbered
[i
].number
;
663 spec
.numbered
[j
].type
= spec
.numbered
[i
].type
;
667 spec
.numbered_arg_count
= j
;
669 /* *invalid_reason has already been set above. */
673 result
= (struct spec
*) xmalloc (sizeof (struct spec
));
678 if (spec
.numbered
!= NULL
)
679 free (spec
.numbered
);
684 format_free (void *descr
)
686 struct spec
*spec
= (struct spec
*) descr
;
688 if (spec
->numbered
!= NULL
)
689 free (spec
->numbered
);
694 format_get_number_of_directives (void *descr
)
696 struct spec
*spec
= (struct spec
*) descr
;
698 return spec
->directives
;
702 format_check (void *msgid_descr
, void *msgstr_descr
, bool equality
,
703 formatstring_error_logger_t error_logger
,
704 const char *pretty_msgstr
)
706 struct spec
*spec1
= (struct spec
*) msgid_descr
;
707 struct spec
*spec2
= (struct spec
*) msgstr_descr
;
710 if (spec1
->numbered_arg_count
+ spec2
->numbered_arg_count
> 0)
713 unsigned int n1
= spec1
->numbered_arg_count
;
714 unsigned int n2
= spec2
->numbered_arg_count
;
716 /* Check the argument names are the same.
717 Both arrays are sorted. We search for the first difference. */
718 for (i
= 0, j
= 0; i
< n1
|| j
< n2
; )
720 int cmp
= (i
>= n1
? 1 :
722 spec1
->numbered
[i
].number
> spec2
->numbered
[j
].number
? 1 :
723 spec1
->numbered
[i
].number
< spec2
->numbered
[j
].number
? -1 :
729 error_logger (_("a format specification for argument {%u}, as in '%s', doesn't exist in 'msgid'"),
730 spec2
->numbered
[j
].number
, pretty_msgstr
);
739 error_logger (_("a format specification for argument {%u} doesn't exist in '%s'"),
740 spec1
->numbered
[i
].number
, pretty_msgstr
);
750 /* Check the argument types are the same. */
752 for (i
= 0, j
= 0; j
< n2
; )
754 if (spec1
->numbered
[i
].number
== spec2
->numbered
[j
].number
)
756 if (spec1
->numbered
[i
].type
!= spec2
->numbered
[j
].type
)
759 error_logger (_("format specifications in 'msgid' and '%s' for argument {%u} are not the same"),
760 pretty_msgstr
, spec2
->numbered
[j
].number
);
775 struct formatstring_parser formatstring_java
=
779 format_get_number_of_directives
,
786 /* Test program: Print the argument list specification returned by
787 format_parse for strings read from standard input. */
793 format_print (void *descr
)
795 struct spec
*spec
= (struct spec
*) descr
;
807 for (i
= 0; i
< spec
->numbered_arg_count
; i
++)
809 unsigned int number
= spec
->numbered
[i
].number
;
815 for (; last
< number
; last
++)
817 switch (spec
->numbered
[i
].type
)
842 size_t line_size
= 0;
844 char *invalid_reason
;
847 line_len
= getline (&line
, &line_size
, stdin
);
850 if (line_len
> 0 && line
[line_len
- 1] == '\n')
851 line
[--line_len
] = '\0';
853 invalid_reason
= NULL
;
854 descr
= format_parse (line
, false, &invalid_reason
);
856 format_print (descr
);
859 printf ("%s\n", invalid_reason
);
861 free (invalid_reason
);
869 * For Emacs M-x compile
871 * compile-command: "/bin/sh ../libtool --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-java.c ../lib/libgettextlib.la"