1 /* xgettext YCP backend.
2 Copyright (C) 2001-2003 Free Software Foundation, Inc.
4 This file was written by Bruno Haible <haible@clisp.cons.org>, 2001.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
39 #define _(s) gettext(s)
41 #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
44 /* The YCP syntax is defined in libycp/doc/syntax.html.
45 See also libycp/src/scanner.ll. */
49 init_flag_table_ycp ()
51 xgettext_record_flag ("sformat:1:ycp-format");
52 xgettext_record_flag ("y2debug:1:ycp-format");
53 xgettext_record_flag ("y2milestone:1:ycp-format");
54 xgettext_record_flag ("y2warning:1:ycp-format");
55 xgettext_record_flag ("y2error:1:ycp-format");
56 xgettext_record_flag ("y2security:1:ycp-format");
57 xgettext_record_flag ("y2internal:1:ycp-format");
61 /* ======================== Reading of characters. ======================== */
64 /* Real filename, used in error messages about the input file. */
65 static const char *real_file_name
;
67 /* Logical filename and line number, used to label the extracted messages. */
68 static char *logical_file_name
;
69 static int line_number
;
70 static int char_in_line
;
72 /* The input file stream. */
75 /* These are for tracking whether comments count as immediately before
77 static int last_comment_line
;
78 static int last_non_comment_line
;
81 /* 1. line_number handling. */
91 error (EXIT_FAILURE
, errno
, _("error while reading \"%s\""),
107 /* Supports only one pushback character. */
109 phase1_ungetc (int c
)
116 char_in_line
= INT_MAX
;
126 /* 2. Replace each comment that is not inside a character constant or
127 string literal with a space character. We need to remember the
128 comment for later, because it may be attached to a keyword string.
129 YCP comments can be in C comment syntax, C++ comment syntax or sh
132 static unsigned char phase2_pushback
[1];
133 static int phase2_pushback_length
;
139 static size_t bufmax
;
145 if (phase2_pushback_length
)
146 return phase2_pushback
[--phase2_pushback_length
];
148 if (char_in_line
== 0)
150 /* Eat whitespace, to recognize ^[\t ]*# pattern. */
153 while (c
== '\t' || c
== ' ');
159 lineno
= line_number
;
163 if (c
== '\n' || c
== EOF
)
165 /* We skip all leading white space, but not EOLs. */
166 if (!(buflen
== 0 && (c
== ' ' || c
== '\t')))
168 if (buflen
>= bufmax
)
170 bufmax
= 2 * bufmax
+ 10;
171 buffer
= xrealloc (buffer
, bufmax
);
173 buffer
[buflen
++] = c
;
176 if (buflen
>= bufmax
)
178 bufmax
= 2 * bufmax
+ 10;
179 buffer
= xrealloc (buffer
, bufmax
);
181 buffer
[buflen
] = '\0';
182 xgettext_comment_add (buffer
);
183 last_comment_line
= lineno
;
203 lineno
= line_number
;
204 last_was_star
= false;
210 /* We skip all leading white space, but not EOLs. */
211 if (buflen
== 0 && (c
== ' ' || c
== '\t'))
213 if (buflen
>= bufmax
)
215 bufmax
= 2 * bufmax
+ 10;
216 buffer
= xrealloc (buffer
, bufmax
);
218 buffer
[buflen
++] = c
;
224 && (buffer
[buflen
- 1] == ' '
225 || buffer
[buflen
- 1] == '\t'))
227 buffer
[buflen
] = '\0';
228 xgettext_comment_add (buffer
);
230 lineno
= line_number
;
231 last_was_star
= false;
235 last_was_star
= true;
243 && (buffer
[buflen
- 1] == ' '
244 || buffer
[buflen
- 1] == '\t'))
246 buffer
[buflen
] = '\0';
247 xgettext_comment_add (buffer
);
253 last_was_star
= false;
258 last_comment_line
= lineno
;
264 lineno
= line_number
;
268 if (c
== '\n' || c
== EOF
)
270 /* We skip all leading white space, but not EOLs. */
271 if (!(buflen
== 0 && (c
== ' ' || c
== '\t')))
273 if (buflen
>= bufmax
)
275 bufmax
= 2 * bufmax
+ 10;
276 buffer
= xrealloc (buffer
, bufmax
);
278 buffer
[buflen
++] = c
;
281 if (buflen
>= bufmax
)
283 bufmax
= 2 * bufmax
+ 10;
284 buffer
= xrealloc (buffer
, bufmax
);
286 buffer
[buflen
] = '\0';
287 xgettext_comment_add (buffer
);
288 last_comment_line
= lineno
;
296 /* Supports only one pushback character. */
298 phase2_ungetc (int c
)
302 if (phase2_pushback_length
== SIZEOF (phase2_pushback
))
304 phase2_pushback
[phase2_pushback_length
++] = c
;
309 /* ========================== Reading of tokens. ========================== */
315 token_type_lparen
, /* ( */
316 token_type_rparen
, /* ) */
317 token_type_comma
, /* , */
318 token_type_i18n
, /* _( */
319 token_type_string_literal
, /* "abc" */
320 token_type_symbol
, /* symbol, number */
321 token_type_other
/* misc. operator */
323 typedef enum token_type_ty token_type_ty
;
325 typedef struct token_ty token_ty
;
329 char *string
; /* for token_type_string_literal, token_type_symbol */
334 /* 7. Replace escape sequences within character strings with their
335 single character equivalents. */
337 #define P7_QUOTES (1000 + '"')
346 /* Use phase 1, because phase 2 elides comments. */
368 /* FIXME: What is the octal escape syntax?
369 syntax.html says: [0] [0-7]+
370 scanner.ll says: [0-7] [0-7] [0-7]
373 case '0': case '1': case '2': case '3':
374 case '4': case '5': case '6': case '7':
379 for (j
= 0; j
< 3; ++j
)
388 case '0': case '1': case '2': case '3':
389 case '4': case '5': case '6': case '7':
406 /* Combine characters into tokens. Discard whitespace. */
409 x_ycp_lex (token_ty
*tp
)
418 tp
->line_number
= line_number
;
424 tp
->type
= token_type_eof
;
428 if (last_non_comment_line
> last_comment_line
)
429 xgettext_comment_reset ();
434 /* Ignore whitespace and comments. */
438 last_non_comment_line
= tp
->line_number
;
442 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
443 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
444 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
445 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
448 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
449 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
450 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
451 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
453 case '0': case '1': case '2': case '3': case '4':
454 case '5': case '6': case '7': case '8': case '9':
455 /* Symbol, or part of a number. */
459 if (bufpos
>= bufmax
)
461 bufmax
= 2 * bufmax
+ 10;
462 buffer
= xrealloc (buffer
, bufmax
);
464 buffer
[bufpos
++] = c
;
468 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
469 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
470 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
471 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
474 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
475 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
476 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
477 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
479 case '0': case '1': case '2': case '3': case '4':
480 case '5': case '6': case '7': case '8': case '9':
483 if (bufpos
== 1 && buffer
[0] == '_' && c
== '(')
485 tp
->type
= token_type_i18n
;
493 if (bufpos
>= bufmax
)
495 bufmax
= 2 * bufmax
+ 10;
496 buffer
= xrealloc (buffer
, bufmax
);
498 buffer
[bufpos
] = '\0';
499 tp
->string
= xstrdup (buffer
);
500 tp
->type
= token_type_symbol
;
508 if (c
== EOF
|| c
== P7_QUOTES
)
510 if (bufpos
>= bufmax
)
512 bufmax
= 2 * bufmax
+ 10;
513 buffer
= xrealloc (buffer
, bufmax
);
515 buffer
[bufpos
++] = c
;
517 if (bufpos
>= bufmax
)
519 bufmax
= 2 * bufmax
+ 10;
520 buffer
= xrealloc (buffer
, bufmax
);
522 buffer
[bufpos
] = '\0';
523 tp
->string
= xstrdup (buffer
);
524 tp
->type
= token_type_string_literal
;
528 tp
->type
= token_type_lparen
;
532 tp
->type
= token_type_rparen
;
536 tp
->type
= token_type_comma
;
540 /* We could carefully recognize each of the 2 and 3 character
541 operators, but it is not necessary, as we only need to recognize
542 gettext invocations. Don't bother. */
543 tp
->type
= token_type_other
;
550 /* ========================= Extracting strings. ========================== */
553 /* Context lookup table. */
554 static flag_context_list_table_ty
*flag_context_list_table
;
557 /* The file is broken into tokens.
559 Normal handling: Look for
560 [A] _( [B] msgid ... )
561 Plural handling: Look for
562 [A] _( [B] msgid [C] , [D] msgid_plural ... )
563 At point [A]: state == 0.
564 At point [B]: state == 1, plural_mp == NULL.
565 At point [C]: state == 2, plural_mp != NULL.
566 At point [D]: state == 1, plural_mp != NULL.
568 We use recursion because we have to set the context according to the given
572 /* Extract messages until the next balanced closing parenthesis.
573 Extracted messages are added to MLP.
574 Return true upon eof, false upon closing parenthesis. */
576 extract_parenthesized (message_list_ty
*mlp
,
577 flag_context_ty outer_context
,
578 flag_context_list_iterator_ty context_iter
,
581 int state
; /* 1 or 2 inside _( ... ), otherwise 0 */
582 message_ty
*plural_mp
= NULL
; /* defined only when in states 1 and 2 */
583 /* Context iterator that will be used if the next token is a '('. */
584 flag_context_list_iterator_ty next_context_iter
=
585 passthrough_context_list_iterator
;
586 /* Current context. */
587 flag_context_ty inner_context
=
588 inherited_context (outer_context
,
589 flag_context_list_iterator_advance (&context_iter
));
591 /* Start state is 0 or 1. */
592 state
= (in_i18n
? 1 : 0);
601 case token_type_i18n
:
602 if (extract_parenthesized (mlp
, inner_context
, next_context_iter
,
605 next_context_iter
= null_context_list_iterator
;
609 case token_type_string_literal
:
613 pos
.file_name
= logical_file_name
;
614 pos
.line_number
= token
.line_number
;
616 if (plural_mp
== NULL
)
619 plural_mp
= remember_a_message (mlp
, token
.string
,
620 inner_context
, &pos
);
625 /* Seen an msgid_plural. */
626 remember_a_message_plural (plural_mp
, token
.string
,
627 inner_context
, &pos
);
636 next_context_iter
= null_context_list_iterator
;
639 case token_type_symbol
:
641 flag_context_list_iterator (
642 flag_context_list_table_lookup (
643 flag_context_list_table
,
644 token
.string
, strlen (token
.string
)));
649 case token_type_lparen
:
650 if (extract_parenthesized (mlp
, inner_context
, next_context_iter
,
653 next_context_iter
= null_context_list_iterator
;
657 case token_type_rparen
:
660 case token_type_comma
:
666 inherited_context (outer_context
,
667 flag_context_list_iterator_advance (
669 next_context_iter
= passthrough_context_list_iterator
;
672 case token_type_other
:
673 next_context_iter
= null_context_list_iterator
;
688 extract_ycp (FILE *f
,
689 const char *real_filename
, const char *logical_filename
,
690 flag_context_list_table_ty
*flag_table
,
691 msgdomain_list_ty
*mdlp
)
693 message_list_ty
*mlp
= mdlp
->item
[0]->messages
;
696 real_file_name
= real_filename
;
697 logical_file_name
= xstrdup (logical_filename
);
701 last_comment_line
= -1;
702 last_non_comment_line
= -1;
704 flag_context_list_table
= flag_table
;
706 /* Eat tokens until eof is seen. When extract_parenthesized returns
707 due to an unbalanced closing parenthesis, just restart it. */
708 while (!extract_parenthesized (mlp
, null_context
, null_context_list_iterator
,
713 real_file_name
= NULL
;
714 logical_file_name
= NULL
;