Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / openmp / runtime / tools / message-converter.pl
blob5cc297485ebf74aaaad84494b7cbacb1b534f1be
1 #!/usr/bin/env perl
4 #//===----------------------------------------------------------------------===//
5 #//
6 #// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
7 #// See https://llvm.org/LICENSE.txt for license information.
8 #// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
9 #//
10 #//===----------------------------------------------------------------------===//
13 use strict;
14 use warnings;
16 use File::Glob ":glob";
17 use Encode qw{ encode };
19 use FindBin;
20 use lib "$FindBin::Bin/lib";
22 use tools;
24 our $VERSION = "0.04";
25 my $escape = qr{%};
26 my $placeholder = qr{(\d)\$(s|l?[du])};
27 my $target_os;
29 my $sections =
31 meta => { short => "prp" }, # "prp" stands for "property".
32 strings => { short => "str" },
33 formats => { short => "fmt" },
34 messages => { short => "msg" },
35 hints => { short => "hnt" },
37 my @sections = qw{ meta strings formats messages hints };
38 # Assign section properties: long name, set number, base number.
39 map( $sections->{ $sections[ $_ ] }->{ long } = $sections[ $_ ], ( 0 .. @sections - 1 ) );
40 map( $sections->{ $sections[ $_ ] }->{ set } = ( $_ + 1 ), ( 0 .. @sections - 1 ) );
41 map( $sections->{ $sections[ $_ ] }->{ base } = ( ( $_ + 1 ) << 16 ), ( 0 .. @sections - 1 ) );
43 # Properties of Meta section.
44 my @properties = qw{ Language Country LangId Version Revision };
47 sub _generate_comment($$$) {
49 my ( $data, $open, $close ) = @_;
50 my $bulk =
51 $open . " Do not edit this file! " . $close . "\n" .
52 $open . " The file was generated from " . get_file( $data->{ "%meta" }->{ source } ) .
53 " by " . $tool . " on " . localtime() . ". " . $close . "\n";
54 return $bulk;
56 }; # sub _generate_comment
59 sub msg2sgn($) {
61 # Convert message string to signature. Signature is a list of placeholders in sorted order.
62 # For example, signature of "%1$s value \"%2$s\" is invalid." is "%1$s %2$s".
64 my ( $msg ) = @_;
65 my @placeholders;
66 pos( $msg ) = 0;
67 while ( $msg =~ m{\G.*?$escape$placeholder}g ) {
68 $placeholders[ $1 - 1 ] = "%$1\$$2";
69 }; # while
70 for ( my $i = 1; $i <= @placeholders; ++ $i ) {
71 if ( not defined( $placeholders[ $i - 1 ] ) ) {
72 $placeholders[ $i - 1 ] = "%$i\$-";
73 }; # if
74 }; # for $i
75 return join( " ", @placeholders );
77 }; # sub msg2sgn
80 sub msg2src($) {
82 # Convert message string to a C string constant.
84 my ( $msg ) = @_;
85 if ( $target_os eq "win" ) {
86 $msg =~ s{$escape$placeholder}{\%$1!$2!}g;
87 }; # if
88 return $msg;
90 }; # sub msg2src
93 my $special =
95 "n" => "\n",
96 "t" => "\t",
99 sub msg2mc($) {
100 my ( $msg ) = @_;
101 $msg = msg2src( $msg ); # Get windows style placeholders.
102 $msg =~ s{\\(.)}{ exists( $special->{ $1 } ) ? $special->{ $1 } : $1 }ge;
103 return $msg;
104 }; # sub msg2mc
108 sub parse_message($) {
110 my ( $msg ) = @_;
111 pos( $msg ) = 0;
112 for ( ; ; ) {
113 if ( $msg !~ m{\G.*?$escape}gc ) {
114 last;
116 if ( $msg !~ m{\G$placeholder}gc ) {
117 return "Bad %-sequence near \"%" . substr( $msg, pos( $msg ), 7 ) . "\"";
118 }; # if
119 }; # forever
120 return undef;
122 }; # sub parse_message
125 sub parse_source($) {
127 my ( $name ) = @_;
129 my @bulk = read_file( $name, -layer => ":utf8" );
130 my $data = {};
132 my $line;
133 my $n = 0; # Line number.
134 my $obsolete = 0; # Counter of obsolete entries.
135 my $last_idx;
136 my %idents;
137 my $section;
139 my $error =
140 sub {
141 my ( $n, $line, $msg ) = @_;
142 runtime_error( "Error parsing $name line $n: " . "$msg:\n" . " $line" );
143 }; # sub
145 foreach $line ( @bulk ) {
146 ++ $n;
147 # Skip empty lines and comments.
148 if ( $line =~ m{\A\s*(\n|#)} ) {
149 $last_idx = undef;
150 next;
151 }; # if
152 # Parse section header.
153 if ( $line =~ m{\A-\*-\s*([A-Z_]*)\s*-\*-\s*\n\z}i ) {
154 $section = ( lc( $1 ) );
155 if ( not grep( $section eq $_, @sections ) ) {
156 $error->( $n, $line, "Unknown section \"$section\" specified" );
157 }; # if
158 if ( exists( $data->{ $section } ) ) {
159 $error->( $n, $line, "Multiple sections of the same type specified" );
160 }; # if
161 %idents = (); # Clean list of known message identifiers.
162 next;
163 }; # if
164 if ( not defined( $section ) ) {
165 $error->( $n, $line, "Section heading expected" );
166 }; # if
167 # Parse section body.
168 if ( $section eq "meta" ) {
169 if ( $line =~ m{\A([A-Z_][A-Z_0-9]*)\s+"(.*)"\s*?\n?\z}i ) {
170 # Parse meta properties (such as Language, Country, and LangId).
171 my ( $property, $value ) = ( $1, $2 );
172 if ( not grep( $_ eq $property , @properties ) ) {
173 $error->( $n, $line, "Unknown property \"$property\" specified" );
174 }; # if
175 if ( exists( $data->{ "%meta" }->{ $property } ) ) {
176 $error->( $n, $line, "Property \"$property\" has already been specified" );
177 }; # if
178 $data->{ "%meta" }->{ $property } = $value;
179 $last_idx = undef;
180 next;
181 }; # if
182 $error->( $n, $line, "Property line expected" );
183 }; # if
184 # Parse message.
185 if ( $line =~ m{\A([A-Z_][A-Z_0-9]*)\s+"(.*)"\s*?\n?\z}i ) {
186 my ( $ident, $message ) = ( $1, $2 );
187 if ( $ident eq "OBSOLETE" ) {
188 # If id is "OBSOLETE", add a unique suffix. It provides convenient way to mark
189 # obsolete messages.
190 ++ $obsolete;
191 $ident .= $obsolete;
192 }; # if
193 if ( exists( $idents{ $ident } ) ) {
194 $error->( $n, $line, "Identifier \"$ident\" is redefined" );
195 }; # if
196 # Check %-sequences.
197 my $err = parse_message( $message );
198 if ( $err ) {
199 $error->( $n, $line, $err );
200 }; # if
201 # Save message.
202 push( @{ $data->{ $section } }, [ $ident, $message ] );
203 $idents{ $ident } = 1;
204 $last_idx = @{ $data->{ $section } } - 1;
205 next;
206 }; # if
207 # Parse continuation line.
208 if ( $line =~ m{\A\s*"(.*)"\s*\z} ) {
209 my $message = $1;
210 if ( not defined( $last_idx ) ) {
211 $error->( $n, $line, "Unexpected continuation line" );
212 }; # if
213 # Check %-sequences.
214 my $err = parse_message( $message );
215 if ( $err ) {
216 $error->( $n, $line, $err );
217 }; # if
218 # Save continuation.
219 $data->{ $section }->[ $last_idx ]->[ 1 ] .= $message;
220 next;
221 }; # if
222 $error->( $n, $line, "Message definition expected" );
223 }; # foreach
224 $data->{ "%meta" }->{ source } = $name;
225 foreach my $section ( @sections ) {
226 if ( not exists( $data->{ $section } ) ) {
227 $data->{ $section } = [];
228 }; # if
229 }; # foreach $section
231 foreach my $property ( @properties ) {
232 if ( not defined( $data->{ "%meta" }->{ $property } ) ) {
233 runtime_error(
234 "Error parsing $name: " .
235 "Required \"$property\" property is not specified"
237 }; # if
238 push( @{ $data->{ meta } }, [ $property, $data->{ "%meta" }->{ $property } ] );
239 }; # foreach
241 return $data;
243 }; # sub parse_source
246 sub generate_enum($$$) {
248 my ( $data, $file, $prefix ) = @_;
249 my $bulk = "";
251 $bulk =
252 _generate_comment( $data, "//", "//" ) .
253 "\n" .
254 "enum ${prefix}_id {\n\n" .
255 " // A special id for absence of message.\n" .
256 " ${prefix}_null = 0,\n\n";
258 foreach my $section ( @sections ) {
259 my $props = $sections->{ $section }; # Section properties.
260 my $short = $props->{ short }; # Short section name, frequently used.
261 $bulk .=
262 " // Set #$props->{ set }, $props->{ long }.\n" .
263 " ${prefix}_${short}_first = $props->{ base },\n";
264 foreach my $item ( @{ $data->{ $section } } ) {
265 my ( $ident, undef ) = @$item;
266 $bulk .= " ${prefix}_${short}_${ident},\n";
267 }; # foreach
268 $bulk .= " ${prefix}_${short}_last,\n\n";
269 }; # foreach $type
270 $bulk .= " ${prefix}_xxx_lastest\n\n";
272 $bulk .=
273 "}; // enum ${prefix}_id\n" .
274 "\n" .
275 "typedef enum ${prefix}_id ${prefix}_id_t;\n" .
276 "\n";
278 $bulk .=
279 "\n" .
280 "// end of file //\n";
282 write_file( $file, \$bulk );
284 }; # sub generate_enum
287 sub generate_signature($$) {
289 my ( $data, $file ) = @_;
290 my $bulk = "";
292 $bulk .= "// message catalog signature file //\n\n";
294 foreach my $section ( @sections ) {
295 my $props = $sections->{ $section }; # Section properties.
296 my $short = $props->{ short }; # Short section name, frequently used.
297 $bulk .= "-*- " . uc( $props->{ long } ) . "-*-\n\n";
298 foreach my $item ( @{ $data->{ $section } } ) {
299 my ( $ident, $msg ) = @$item;
300 $bulk .= sprintf( "%-40s %s\n", $ident, msg2sgn( $msg ) );
301 }; # foreach
302 $bulk .= "\n";
303 }; # foreach $type
305 $bulk .= "// end of file //\n";
307 write_file( $file, \$bulk );
309 }; # sub generate_signature
312 sub generate_default($$$) {
314 my ( $data, $file, $prefix ) = @_;
315 my $bulk = "";
317 $bulk .=
318 _generate_comment( $data, "//", "//" ) .
319 "\n";
321 foreach my $section ( @sections ) {
322 $bulk .=
323 "static char const *\n" .
324 "__${prefix}_default_${section}" . "[] =\n" .
325 " {\n" .
326 " NULL,\n";
327 foreach my $item ( @{ $data->{ $section } } ) {
328 my ( undef, $msg ) = @$item;
329 $bulk .= " \"" . msg2src( $msg ) . "\",\n";
330 }; # while
331 $bulk .=
332 " NULL\n" .
333 " };\n" .
334 "\n";
335 }; # foreach $type
337 $bulk .=
338 "struct kmp_i18n_section {\n" .
339 " int size;\n" .
340 " char const ** str;\n" .
341 "}; // struct kmp_i18n_section\n" .
342 "typedef struct kmp_i18n_section kmp_i18n_section_t;\n" .
343 "\n" .
344 "static kmp_i18n_section_t\n" .
345 "__${prefix}_sections[] =\n" .
346 " {\n" .
347 " { 0, NULL },\n";
348 foreach my $section ( @sections ) {
349 $bulk .=
350 " { " . @{ $data->{ $section } } . ", __${prefix}_default_${section} },\n";
351 }; # foreach $type
352 $bulk .=
353 " { 0, NULL }\n" .
354 " };\n" .
355 "\n";
357 $bulk .=
358 "struct kmp_i18n_table {\n" .
359 " int size;\n" .
360 " kmp_i18n_section_t * sect;\n" .
361 "}; // struct kmp_i18n_table\n" .
362 "typedef struct kmp_i18n_table kmp_i18n_table_t;\n" .
363 "\n" .
364 "static kmp_i18n_table_t __kmp_i18n_default_table =\n" .
365 " {\n" .
366 " " . @sections . ",\n" .
367 " __kmp_i18n_sections\n" .
368 " };\n" .
369 "\n" .
370 "// end of file //\n";
372 write_file( $file, \$bulk );
374 }; # sub generate_default
377 sub generate_message_unix($$) {
379 my ( $data, $file ) = @_;
380 my $bulk = "";
382 $bulk .=
383 _generate_comment( $data, "\$", "\$" ) .
384 "\n" .
385 "\$quote \"\n\n";
387 foreach my $section ( @sections ) {
388 $bulk .=
389 "\$ " . ( "-" x 78 ) . "\n\$ $section\n\$ " . ( "-" x 78 ) . "\n\n" .
390 "\$set $sections->{ $section }->{ set }\n" .
391 "\n";
392 my $n = 0;
393 foreach my $item ( @{ $data->{ $section } } ) {
394 my ( undef, $msg ) = @$item;
395 ++ $n;
396 $bulk .= "$n \"" . msg2src( $msg ) . "\"\n";
397 }; # foreach
398 $bulk .= "\n";
399 }; # foreach $type
401 $bulk .=
402 "\n" .
403 "\$ end of file \$\n";
405 write_file( $file, \$bulk, -layer => ":utf8" );
407 }; # sub generate_message_linux
410 sub generate_message_windows($$) {
412 my ( $data, $file ) = @_;
413 my $bulk = "";
414 my $language = $data->{ "%meta" }->{ Language };
415 my $langid = $data->{ "%meta" }->{ LangId };
417 $bulk .=
418 _generate_comment( $data, ";", ";" ) .
419 "\n" .
420 "LanguageNames = ($language=$langid:msg_$langid)\n" .
421 "\n";
423 $bulk .=
424 "FacilityNames=(\n";
425 foreach my $section ( @sections ) {
426 my $props = $sections->{ $section }; # Section properties.
427 $bulk .=
428 " $props->{ short }=" . $props->{ set } ."\n";
429 }; # foreach $section
430 $bulk .=
431 ")\n\n";
433 foreach my $section ( @sections ) {
434 my $short = $sections->{ $section }->{ short };
435 my $n = 0;
436 foreach my $item ( @{ $data->{ $section } } ) {
437 my ( undef, $msg ) = @$item;
438 ++ $n;
439 $bulk .=
440 "MessageId=$n\n" .
441 "Facility=$short\n" .
442 "Language=$language\n" .
443 msg2mc( $msg ) . "\n.\n\n";
444 }; # foreach $item
445 }; # foreach $section
447 $bulk .=
448 "\n" .
449 "; end of file ;\n";
451 $bulk = encode( "UTF-16LE", $bulk ); # Convert text to UTF-16LE used in Windows* OS.
452 write_file( $file, \$bulk, -binary => 1 );
454 }; # sub generate_message_windows
458 # Parse command line.
461 my $input_file;
462 my $enum_file;
463 my $signature_file;
464 my $default_file;
465 my $message_file;
466 my $id;
467 my $prefix = "";
468 get_options(
469 "os=s" => \$target_os,
470 "enum-file=s" => \$enum_file,
471 "signature-file=s" => \$signature_file,
472 "default-file=s" => \$default_file,
473 "message-file=s" => \$message_file,
474 "id|lang-id" => \$id,
475 "prefix=s" => \$prefix,
477 if ( @ARGV == 0 ) {
478 cmdline_error( "No source file specified -- nothing to do" );
479 }; # if
480 if ( @ARGV > 1 ) {
481 cmdline_error( "Too many source files specified" );
482 }; # if
483 $input_file = $ARGV[ 0 ];
486 my $generate_message;
487 if ( $target_os =~ m{\A(?:lin|mac)\z} ) {
488 $generate_message = \&generate_message_unix;
489 } elsif ( $target_os eq "win" ) {
490 $generate_message = \&generate_message_windows;
491 } else {
492 runtime_error( "OS \"$target_os\" is not supported" );
493 }; # if
497 # Do the work.
500 my $data = parse_source( $input_file );
501 if ( defined( $id ) ) {
502 print( $data->{ "%meta" }->{ LangId }, "\n" );
503 }; # if
504 if ( defined( $enum_file ) ) {
505 generate_enum( $data, $enum_file, $prefix );
506 }; # if
507 if ( defined( $signature_file ) ) {
508 generate_signature( $data, $signature_file );
509 }; # if
510 if ( defined( $default_file ) ) {
511 generate_default( $data, $default_file, $prefix );
512 }; # if
513 if ( defined( $message_file ) ) {
514 $generate_message->( $data, $message_file );
515 }; # if
517 exit( 0 );
519 __END__
521 =pod
523 =head1 NAME
525 B<message-converter.pl> -- Convert message catalog source file into another text forms.
527 =head1 SYNOPSIS
529 B<message-converter.pl> I<option>... <file>
531 =head1 OPTIONS
533 =over
535 =item B<--enum-file=>I<file>
537 Generate enum file named I<file>.
539 =item B<--default-file=>I<file>
541 Generate default messages file named I<file>.
543 =item B<--lang-id>
545 Print language identifier of the message catalog source file.
547 =item B<--message-file=>I<file>
549 Generate message file.
551 =item B<--signature-file=>I<file>
553 Generate signature file.
555 Signatures are used for checking compatibility. For example, to check a primary
556 catalog and its translation to another language, signatures of both catalogs should be generated
557 and compared. If signatures are identical, catalogs are compatible.
559 =item B<--prefix=>I<prefix>
561 Prefix to be used for all C identifiers (type and variable names) in enum and default messages
562 files.
564 =item B<--os=>I<str>
566 Specify OS name the message formats to be converted for. If not specified explicitly, value of
567 LIBOMP_OS environment variable is used. If LIBOMP_OS is not defined, host OS is detected.
569 Depending on OS, B<message-converter.pl> converts message formats to GNU style or MS style.
571 =item Standard Options
573 =over
575 =item B<--doc>
577 =item B<--manual>
579 Print full documentation and exit.
581 =item B<--help>
583 Print short help message and exit.
585 =item B<--version>
587 Print version string and exit.
589 =back
591 =back
593 =head1 ARGUMENTS
595 =over
597 =item I<file>
599 A name of input file.
601 =back
603 =head1 DESCRIPTION
605 =head2 Message Catalog File Format
607 It is plain text file in UTF-8 encoding. Empty lines and lines beginning with sharp sign (C<#>) are
608 ignored. EBNF syntax of content:
610 catalog = { section };
611 section = header body;
612 header = "-*- " section-id " -*-" "\n";
613 body = { message };
614 message = message-id string "\n" { string "\n" };
615 section-id = identifier;
616 message-id = "OBSOLETE" | identifier;
617 identifier = letter { letter | digit | "_" };
618 string = """ { character } """;
620 Identifier starts with letter, with following letters, digits, and underscores. Identifiers are
621 case-sensitive. Setion identifiers are fixed: C<META>, C<STRINGS>, C<FORMATS>, C<MESSAGES> and
622 C<HINTS>. Message identifiers must be unique within section. Special C<OBSOLETE> pseudo-identifier
623 may be used many times.
625 String is a C string literal which must not cross line boundaries.
626 Long messages may occupy multiple lines, a string per line.
628 Message may include printf-like GNU-style placeholders for arguments: C<%I<n>$I<t>>,
629 where I<n> is argument number (C<1>, C<2>, ...),
630 I<t> -- argument type, C<s> (string) or C<d> (32-bit integer).
632 See also comments in F<i18n/en_US.txt>.
634 =head2 Output Files
636 This script can generate 3 different text files from single source:
638 =over
640 =item Enum file.
642 Enum file is a C include file, containing definitions of message identifiers, e. g.:
644 enum kmp_i18n_id {
646 // Set #1, meta.
647 kmp_i18n_prp_first = 65536,
648 kmp_i18n_prp_Language,
649 kmp_i18n_prp_Country,
650 kmp_i18n_prp_LangId,
651 kmp_i18n_prp_Version,
652 kmp_i18n_prp_Revision,
653 kmp_i18n_prp_last,
655 // Set #2, strings.
656 kmp_i18n_str_first = 131072,
657 kmp_i18n_str_Error,
658 kmp_i18n_str_UnknownFile,
659 kmp_i18n_str_NotANumber,
662 // Set #3, formats.
665 kmp_i18n_xxx_lastest
667 }; // enum kmp_i18n_id
669 typedef enum kmp_i18n_id kmp_i18n_id_t;
671 =item Default messages file.
673 Default messages file is a C include file containing default messages to be embedded into
674 application (and used if external message catalog does not exist or could not be open):
676 static char const *
677 __kmp_i18n_default_meta[] =
679 NULL,
680 "English",
681 "USA",
682 "1033",
683 "2",
684 "20090806",
685 NULL
688 static char const *
689 __kmp_i18n_default_strings[] =
691 "Error",
692 "(unknown file)",
693 "not a number",
695 NULL
700 =item Message file.
702 Message file is an input for message compiler, F<gencat> on Linux* OS and OS X*, or F<mc.exe> on
703 Windows* OS.
705 Here is the example of Linux* OS message file:
707 $quote "
708 1 "Japanese"
709 2 "Japan"
710 3 "1041"
711 4 "2"
712 5 "Based on English message catalog revision 20090806"
715 Example of Windows* OS message file:
717 LanguageNames = (Japanese=10041:msg_1041)
719 FacilityNames = (
720 prp=1
721 str=2
722 fmt=3
726 MessageId=1
727 Facility=prp
728 Language=Japanese
729 Japanese
734 =item Signature.
736 Signature is a processed source file: comments stripped, strings deleted, but placeholders kept and
737 sorted.
739 -*- FORMATS-*-
741 Info %1$d %2$s
742 Warning %1$d %2$s
743 Fatal %1$d %2$s
744 SysErr %1$d %2$s
745 Hint %1$- %2$s
746 Pragma %1$s %2$s %3$s %4$s
748 The purpose of signatures -- compare two message source files for compatibility. If signatures of
749 two message sources are the same, binary message catalogs will be compatible.
751 =back
753 =head1 EXAMPLES
755 Generate include file containing message identifiers:
757 $ message-converter.pl --enum-file=kmp_i18n_id.inc en_US.txt
759 Generate include file containing default messages:
761 $ message-converter.pl --default-file=kmp_i18n_default.inc en_US.txt
763 Generate input file for message compiler, Linux* OS example:
765 $ message-converter.pl --message-file=ru_RU.UTF-8.msg ru_RU.txt
767 Generate input file for message compiler, Windows* OS example:
769 > message-converter.pl --message-file=ru_RU.UTF-8.mc ru_RU.txt
771 =cut
773 # end of file #