1 /* Perl format strings.
2 Copyright (C) 2004 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2003.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
30 #include "format-invalid.h"
33 #define _(str) gettext (str)
35 /* Perl format strings are implemented in function Perl_sv_vcatpvfn in
38 - starts with '%' or '%m$' where m is a positive integer starting with a
40 - is optionally followed by any of the characters '#', '0', '-', ' ', '+',
41 each of which acts as a flag,
42 - is optionally followed by a vector specification: 'v' or '*v' (reads an
43 argument) or '*m$v' where m is a positive integer starting with a nonzero
45 - is optionally followed by a width specification: '*' (reads an argument)
46 or '*m$' where m is a positive integer starting with a nonzero digit or
47 a nonempty digit sequence starting with a nonzero digit,
48 - is optionally followed by '.' and a precision specification: '*' (reads
49 an argument) or '*m$' where m is a positive integer starting with a
50 nonzero digit or a digit sequence,
51 - is optionally followed by a size specifier, one of 'h' 'l' 'll' 'L' 'q'
53 - is finished by a specifier
54 - '%', that needs no argument,
55 - 'c', that needs a small integer argument,
56 - 's', that needs a string argument,
57 - '_', that needs a scalar vector argument,
58 - 'p', that needs a pointer argument,
59 - 'i', 'd', 'D', that need an integer argument,
60 - 'u', 'U', 'b', 'o', 'O', 'x', 'X', that need an unsigned integer
62 - 'e', 'E', 'f', 'F', 'g', 'G', that need a floating-point argument,
63 - 'n', that needs a pointer to integer.
64 So there can be numbered argument specifications:
65 - '%m$' for the format string,
66 - '*m$v' for the vector,
67 - '*m$' for the width,
68 - '.*m$' for the precision.
69 Numbered and unnumbered argument specifications can be used in the same
70 string. The effect of '%m$' is to take argument number m, without affecting
71 the current argument number. The current argument number is incremented
72 after processing a directive with an unnumbered argument specification.
83 FAT_SCALAR_VECTOR
= 5,
85 FAT_COUNT_POINTER
= 7,
87 FAT_UNSIGNED
= 1 << 3,
88 FAT_SIZE_SHORT
= 1 << 4,
90 FAT_SIZE_PTR
= 3 << 4,
91 FAT_SIZE_LONG
= 4 << 4,
92 FAT_SIZE_LONGLONG
= 5 << 4,
94 FAT_SIZE_MASK
= (FAT_SIZE_SHORT
| FAT_SIZE_V
| FAT_SIZE_PTR
95 | FAT_SIZE_LONG
| FAT_SIZE_LONGLONG
)
101 enum format_arg_type type
;
106 unsigned int directives
;
107 unsigned int numbered_arg_count
;
108 unsigned int allocated
;
109 struct numbered_arg
*numbered
;
112 /* Locale independent test for a decimal digit.
113 Argument can be 'char' or 'unsigned char'. (Whereas the argument of
114 <ctype.h> isdigit must be an 'unsigned char'.) */
116 #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
118 /* Locale independent test for a nonzero decimal digit. */
119 #define isnonzerodigit(c) ((unsigned int) ((c) - '1') < 9)
123 numbered_arg_compare (const void *p1
, const void *p2
)
125 unsigned int n1
= ((const struct numbered_arg
*) p1
)->number
;
126 unsigned int n2
= ((const struct numbered_arg
*) p2
)->number
;
128 return (n1
> n2
? 1 : n1
< n2
? -1 : 0);
132 format_parse (const char *format
, bool translated
, char **invalid_reason
)
134 unsigned int directives
;
135 unsigned int numbered_arg_count
;
136 unsigned int allocated
;
137 struct numbered_arg
*numbered
;
138 unsigned int unnumbered_arg_count
;
142 numbered_arg_count
= 0;
143 unnumbered_arg_count
= 0;
147 for (; *format
!= '\0';)
148 if (*format
++ == '%')
151 unsigned int number
= 0;
152 bool vectorize
= false;
153 enum format_arg_type type
;
154 enum format_arg_type size
;
158 if (isnonzerodigit (*format
))
160 const char *f
= format
;
165 m
= 10 * m
+ (*f
- '0');
168 while (isdigit (*f
));
178 while (*format
== ' ' || *format
== '+' || *format
== '-'
179 || *format
== '#' || *format
== '0')
188 else if (*format
== '*')
190 const char *f
= format
;
198 /* Unnumbered argument. */
199 if (allocated
== numbered_arg_count
)
201 allocated
= 2 * allocated
+ 1;
202 numbered
= (struct numbered_arg
*) xrealloc (numbered
, allocated
* sizeof (struct numbered_arg
));
204 numbered
[numbered_arg_count
].number
= ++unnumbered_arg_count
;
205 numbered
[numbered_arg_count
].type
= FAT_SCALAR_VECTOR
; /* or FAT_STRING? */
206 numbered_arg_count
++;
208 else if (isnonzerodigit (*f
))
214 m
= 10 * m
+ (*f
- '0');
217 while (isdigit (*f
));
224 unsigned int vector_number
= m
;
229 /* Numbered argument. */
230 /* Note: As of perl-5.8.0, this is not correctly
231 implemented in perl's sv.c. */
232 if (allocated
== numbered_arg_count
)
234 allocated
= 2 * allocated
+ 1;
235 numbered
= (struct numbered_arg
*) xrealloc (numbered
, allocated
* sizeof (struct numbered_arg
));
237 numbered
[numbered_arg_count
].number
= vector_number
;
238 numbered
[numbered_arg_count
].type
= FAT_SCALAR_VECTOR
; /* or FAT_STRING? */
239 numbered_arg_count
++;
247 /* Numbered or unnumbered argument. */
248 if (allocated
== numbered_arg_count
)
250 allocated
= 2 * allocated
+ 1;
251 numbered
= (struct numbered_arg
*) xrealloc (numbered
, allocated
* sizeof (struct numbered_arg
));
253 numbered
[numbered_arg_count
].number
= (number
? number
: ++unnumbered_arg_count
);
254 numbered
[numbered_arg_count
].type
= FAT_SCALAR_VECTOR
;
255 numbered_arg_count
++;
261 unsigned int width_number
= 0;
265 if (isnonzerodigit (*format
))
267 const char *f
= format
;
272 m
= 10 * m
+ (*f
- '0');
275 while (isdigit (*f
));
284 /* Numbered or unnumbered argument. */
285 /* Note: As of perl-5.8.0, this is not correctly
286 implemented in perl's sv.c. */
287 if (allocated
== numbered_arg_count
)
289 allocated
= 2 * allocated
+ 1;
290 numbered
= (struct numbered_arg
*) xrealloc (numbered
, allocated
* sizeof (struct numbered_arg
));
292 numbered
[numbered_arg_count
].number
= (width_number
? width_number
: ++unnumbered_arg_count
);
293 numbered
[numbered_arg_count
].type
= FAT_INTEGER
;
294 numbered_arg_count
++;
296 else if (isnonzerodigit (*format
))
298 do format
++; while (isdigit (*format
));
301 /* Parse precision. */
308 unsigned int precision_number
= 0;
312 if (isnonzerodigit (*format
))
314 const char *f
= format
;
319 m
= 10 * m
+ (*f
- '0');
322 while (isdigit (*f
));
326 precision_number
= m
;
331 /* Numbered or unnumbered argument. */
332 if (allocated
== numbered_arg_count
)
334 allocated
= 2 * allocated
+ 1;
335 numbered
= (struct numbered_arg
*) xrealloc (numbered
, allocated
* sizeof (struct numbered_arg
));
337 numbered
[numbered_arg_count
].number
= (precision_number
? precision_number
: ++unnumbered_arg_count
);
338 numbered
[numbered_arg_count
].type
= FAT_INTEGER
;
339 numbered_arg_count
++;
343 while (isdigit (*format
)) format
++;
351 size
= FAT_SIZE_SHORT
;
354 else if (*format
== 'l')
356 if (format
[1] == 'l')
358 size
= FAT_SIZE_LONGLONG
;
363 size
= FAT_SIZE_LONG
;
367 else if (*format
== 'L' || *format
== 'q')
369 size
= FAT_SIZE_LONGLONG
;
372 else if (*format
== 'V')
377 else if (*format
== 'I')
379 if (format
[1] == '6' && format
[2] == '4')
381 size
= FAT_SIZE_LONGLONG
;
384 else if (format
[1] == '3' && format
[2] == '2')
386 size
= 0; /* FAT_SIZE_INT */
408 type
= FAT_SCALAR_VECTOR
;
411 type
= FAT_INTEGER
| FAT_SIZE_V
;
414 type
= FAT_INTEGER
| size
;
417 type
= FAT_INTEGER
| FAT_UNSIGNED
| FAT_SIZE_V
;
419 case 'u': case 'b': case 'o': case 'x': case 'X':
420 type
= FAT_INTEGER
| FAT_UNSIGNED
| size
;
422 case 'e': case 'E': case 'f': case 'F': case 'g': case 'G':
423 if (size
== FAT_SIZE_SHORT
|| size
== FAT_SIZE_LONG
)
426 xasprintf (_("In the directive number %u, the size specifier is incompatible with the conversion specifier '%c'."), directives
, *format
);
429 type
= FAT_DOUBLE
| size
;
435 type
= FAT_COUNT_POINTER
| size
;
440 ? INVALID_UNTERMINATED_DIRECTIVE ()
441 : INVALID_CONVERSION_SPECIFIER (directives
, *format
));
445 if (type
!= FAT_NONE
&& !vectorize
)
447 /* Numbered or unnumbered argument. */
448 if (allocated
== numbered_arg_count
)
450 allocated
= 2 * allocated
+ 1;
451 numbered
= (struct numbered_arg
*) xrealloc (numbered
, allocated
* sizeof (struct numbered_arg
));
453 numbered
[numbered_arg_count
].number
= (number
? number
: ++unnumbered_arg_count
);
454 numbered
[numbered_arg_count
].type
= type
;
455 numbered_arg_count
++;
461 /* Sort the numbered argument array, and eliminate duplicates. */
462 if (numbered_arg_count
> 1)
467 qsort (numbered
, numbered_arg_count
,
468 sizeof (struct numbered_arg
), numbered_arg_compare
);
470 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */
472 for (i
= j
= 0; i
< numbered_arg_count
; i
++)
473 if (j
> 0 && numbered
[i
].number
== numbered
[j
-1].number
)
475 enum format_arg_type type1
= numbered
[i
].type
;
476 enum format_arg_type type2
= numbered
[j
-1].type
;
477 enum format_arg_type type_both
;
483 /* Incompatible types. */
484 type_both
= FAT_NONE
;
487 INVALID_INCOMPATIBLE_ARG_TYPES (numbered
[i
].number
);
491 numbered
[j
-1].type
= type_both
;
497 numbered
[j
].number
= numbered
[i
].number
;
498 numbered
[j
].type
= numbered
[i
].type
;
502 numbered_arg_count
= j
;
504 /* *invalid_reason has already been set above. */
508 result
= (struct spec
*) xmalloc (sizeof (struct spec
));
509 result
->directives
= directives
;
510 result
->numbered_arg_count
= numbered_arg_count
;
511 result
->allocated
= allocated
;
512 result
->numbered
= numbered
;
516 if (numbered
!= NULL
)
522 format_free (void *descr
)
524 struct spec
*spec
= (struct spec
*) descr
;
526 if (spec
->numbered
!= NULL
)
527 free (spec
->numbered
);
532 format_get_number_of_directives (void *descr
)
534 struct spec
*spec
= (struct spec
*) descr
;
536 return spec
->directives
;
540 format_check (void *msgid_descr
, void *msgstr_descr
, bool equality
,
541 formatstring_error_logger_t error_logger
,
542 const char *pretty_msgstr
)
544 struct spec
*spec1
= (struct spec
*) msgid_descr
;
545 struct spec
*spec2
= (struct spec
*) msgstr_descr
;
548 if (spec1
->numbered_arg_count
+ spec2
->numbered_arg_count
> 0)
551 unsigned int n1
= spec1
->numbered_arg_count
;
552 unsigned int n2
= spec2
->numbered_arg_count
;
554 /* Check the argument names are the same.
555 Both arrays are sorted. We search for the first difference. */
556 for (i
= 0, j
= 0; i
< n1
|| j
< n2
; )
558 int cmp
= (i
>= n1
? 1 :
560 spec1
->numbered
[i
].number
> spec2
->numbered
[j
].number
? 1 :
561 spec1
->numbered
[i
].number
< spec2
->numbered
[j
].number
? -1 :
567 error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in 'msgid'"),
568 spec2
->numbered
[j
].number
, pretty_msgstr
);
577 error_logger (_("a format specification for argument %u doesn't exist in '%s'"),
578 spec1
->numbered
[i
].number
, pretty_msgstr
);
588 /* Check the argument types are the same. */
590 for (i
= 0, j
= 0; j
< n2
; )
592 if (spec1
->numbered
[i
].number
== spec2
->numbered
[j
].number
)
594 if (spec1
->numbered
[i
].type
!= spec2
->numbered
[j
].type
)
597 error_logger (_("format specifications in 'msgid' and '%s' for argument %u are not the same"),
598 pretty_msgstr
, spec2
->numbered
[j
].number
);
613 struct formatstring_parser formatstring_perl
=
617 format_get_number_of_directives
,
624 /* Test program: Print the argument list specification returned by
625 format_parse for strings read from standard input. */
631 format_print (void *descr
)
633 struct spec
*spec
= (struct spec
*) descr
;
645 for (i
= 0; i
< spec
->numbered_arg_count
; i
++)
647 unsigned int number
= spec
->numbered
[i
].number
;
653 for (; last
< number
; last
++)
655 if (spec
->numbered
[i
].type
& FAT_UNSIGNED
)
656 printf ("[unsigned]");
657 switch (spec
->numbered
[i
].type
& FAT_SIZE_MASK
)
673 case FAT_SIZE_LONGLONG
:
674 printf ("[long long]");
679 switch (spec
->numbered
[i
].type
& ~(FAT_UNSIGNED
| FAT_SIZE_MASK
))
693 case FAT_SCALAR_VECTOR
:
699 case FAT_COUNT_POINTER
:
716 size_t line_size
= 0;
718 char *invalid_reason
;
721 line_len
= getline (&line
, &line_size
, stdin
);
724 if (line_len
> 0 && line
[line_len
- 1] == '\n')
725 line
[--line_len
] = '\0';
727 invalid_reason
= NULL
;
728 descr
= format_parse (line
, false, &invalid_reason
);
730 format_print (descr
);
733 printf ("%s\n", invalid_reason
);
735 free (invalid_reason
);
743 * For Emacs M-x compile
745 * compile-command: "/bin/sh ../libtool --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-perl.c ../lib/libgettextlib.la"