3 # a shell script that allows to convert between Amiga-style catalog
4 # description/translation files (.cd/.ct) and gettext-style translation
7 # Copyright 2013-2014 Jens Maus <mail@jens-maus.de>
9 # This program is free software; you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation; either version 2 of the License, or
12 # (at your option) any later version.
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
19 # You should have received a copy of the GNU General Public License
20 # along with this program; if not, write to the Free Software
21 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
28 ########################################################
34 echo >&2 "cd2po.sh v${VERSION} - convert between Amiga-style and gettext translation files"
35 echo >&2 "Copyright (c) 2013-2014 Jens Maus <mail@jens-maus.de>"
37 echo >&2 "Usage: $0 <options> [inputfile (.cd/.ct/.pot/.po)]"
39 echo >&2 " -c <charset> : use <charset> when converting the input file"
40 echo >&2 " default: iso-8859-1"
43 ################################
45 ################################
47 # the following is an awk script that converts an
48 # Amiga-style catalog description file (.cd) to a gettext
49 # PO-style translation template file (.pot).
50 read -d '' cd2pot
<< 'EOF'
56 # get current date/time
57 cmd
="date +'%Y-%m-%d %H:%M%z'"
60 print
"# Translation catalog description file (pot-style)"
63 print
"# WARNING: This file was automatically generated by cd2po.sh"
69 if($0 ~
/^
#version .*/)
74 else if($0 ~
/^
#language .*/)
81 if($0 ~
/^MSG_.
*\
(.
*\
)/)
85 # this is the end of the current
86 # tag so lets output it in PO-format
88 #print "#: " msgcomment
89 if(length
(comment
) > 0)
93 print
"msgctxt \\"" msgctxt "\\""
102 print
"# version " version
103 print
"# language " language
107 print
"msgstr \\"\\""
108 print
"\\"Project-Id-Version
: " version "\\\\n
\\""
109 print
"\\"Report-Msgid-Bugs-To
: http
://URL
/\\\\n
\\""
110 print
"\\"POT-Creation-Date
: " date "\\\\n
\\""
111 print
"\\"PO-Revision-Date
: YEAR-MO-DA HO
:MI
+ZONE
\\\\n
\\""
112 print
"\\"Last-Translator
: FULL NAME
<EMAIL@ADDRESS
>\\\\n
\\""
113 print
"\\"Language-Team
: LANGUAGE
<LL@li.org
>\\\\n
\\""
114 print
"\\"MIME-Version
: 1.0\\\\n
\\""
115 print
"\\"Content-Type
: text
/plain
; charset
=UTF-8
\\\\n
\\""
116 print
"\\"Content-Transfer-Encoding
: 8bit
\\\\n
\\""
117 print
"\\"Language
: " language "\\\\n
\\""
125 # proceed with next word
132 # this is the end of the current
133 # tag so lets output it in PO-format
135 #print "#: " msgcomment
136 if(length
(comment
) > 0)
140 print
"msgctxt \\"" msgctxt "\\""
142 print
"msgstr \\"\\""
149 # proceed with next word
154 if(length
(comment
) > 0)
156 comment
= comment
"\\n"
161 # remove any leading white space
165 gsub
(/\\\\\\\\/, "\\\\", tmp
)
167 comment
= comment
"#. " tmp
170 # proceed with next word
176 // remove any backslash
at the end of line
179 # replace \e with \033
180 gsub
(/\\\\\\e
/, "\\\\033")
182 # replace plain " with \" but make
183 # sure to check if \" is already there
184 gsub
(/\\\\"/, "\\"") # replace \" with "
185 gsub
(/"/, "\\\\\\"") # replace " with \"
188 gsub
(/\\\\\\\\/, "\\\\")
190 # we have to escape the \033 and other escape
192 gsub
(/\\\\0/, "\\\\\\\\\\\\0")
193 gsub
(/\\\\33/, "\\\\\\\\\\\\033")
197 # the .po format doesn't allow empty msgid
198 # strings, thus lets escape them with <EMPTY>
201 msgid
="\\"<EMPTY
>\\""
212 msgid
=msgid
"\\n" "\\"" $0 "\\""
218 # the following is an awk script that converts a
219 # gettext PO-style translation template file (.pot)
220 # to an Amiga-style catalog description file (.cd)
221 read -d '' pot2cd
<< 'EOF'
226 print
"; Catalog description file (Amiga-cd format)"
229 print
"; WARNING: This file was automatically generated by cd2po.sh"
235 if($0 ~
/^
# version .*/)
240 else if($0 ~
/^
# language .*/)
247 if($0 ~
/^msgctxt
"MSG_.*/)
254 print "#version " version
255 print
"#language " language
261 # extract the tag "MSG_XXXXX (X//)" as tag
262 tag
=substr
($0, length
($1)+2)
264 # strip quotes (") from start&end
268 else if($0 ~
/^
#\. .*/)
270 if(length
(comment
) > 0)
272 comment
= comment
"\\n"
275 # replace \\033 with \033
276 gsub
(/\\\\\\\\0/, "\\\\0")
277 gsub
(/\\\\\\\\33/, "\\\\033")
280 gsub
(/\\\\\\\\/, "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\")
282 comment
= comment
"; " substr
($0, length
($1)+2)
284 else if(length
($0) == 0 && length
(tag
) != 0)
291 if(length
(comment
) > 0)
303 if($0 ~
/^msgid
".*/)
305 # get the msgid text only
306 tmp=substr($0, length($1)+2)
308 # strip quotes (") from start
&end
312 # replace \\033 with \033
313 gsub
(/\\\\\\\\0/, "\\\\0", tmp
)
314 gsub
(/\\\\\\\\33/, "\\\\033", tmp
)
317 gsub
(/\\\\\\\\/, "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\", tmp
)
321 # replace "<EMPTY>" with ""
322 gsub
(/<EMPTY
>.
*/, "", tmp
)
332 else if($0 ~
/^msgstr
".*/)
337 else if(msgidfound == 1)
339 # strip quotes (") from start
&end
343 # replace \\033 with \033
344 gsub
(/\\\\\\\\0/, "\\\\0")
345 gsub
(/\\\\\\\\33/, "\\\\033")
348 gsub
(/\\\\\\\\/, "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\")
350 if(length
(msgid
) > 0)
352 msgid
= msgid
"\\\\\\n" $0
366 if(length
(comment
) > 0)
375 # the following is an awk script that converts an
376 # Amiga-style catalog translation file (.ct) to a
377 # gettext PO-style translation file (.po)
378 read -d '' ct2po
<< 'EOF'
384 # get current date/time
385 cmd
="date +'%Y-%m-%d %H:%M%z'"
388 print
"# Catalog translation file (po-style)"
391 print
"# WARNING: This file was automatically generated by cd2po.sh"
397 if($0 ~
/^
## version .*/)
399 version
=substr
($0, length
($1)+length
($2)+3)
402 else if($0 ~
/^
## language .*/)
407 else if($0 ~
/^
## codeset .*/)
412 else if($0 ~
/^
## chunk AUTH .*/)
414 auth
=substr
($0, length
($1)+length
($2)+length
($3)+4)
429 print
"# version " version
430 print
"# language " language
431 print
"# codeset " codeset
432 print
"# chunk AUTH " auth
434 print
"# Translators:"
436 print
"msgstr \\"\\""
437 print
"\\"Project-Id-Version
: " version "\\\\n
\\""
438 print
"\\"Report-Msgid-Bugs-To
: http
://URL
/\\\\n
\\""
439 print
"\\"POT-Creation-Date
: " date "\\\\n
\\""
440 print
"\\"PO-Revision-Date
: " date "\\\\n
\\""
441 print
"\\"Last-Translator
: " auth "\\\\n
\\""
442 print
"\\"Language-Team
: " language "\\\\n
\\""
443 print
"\\"MIME-Version
: 1.0\\\\n
\\""
444 print
"\\"Content-Type
: text
/plain
; charset
=UTF-8
\\\\n
\\""
445 print
"\\"Content-Transfer-Encoding
: 8bit
\\\\n
\\""
446 print
"\\"Language
: " language "\\\\n
\\""
451 # now we have to search in the CD file for the same string
452 cmd
="sed -n '/^" $1 " (/,/^;$/p' *.cd"
453 while((cmd | getline output
) > 0)
455 if(output ~
/^MSG_.
*$
/)
459 else if(output ~
/^
;.
+$
/)
462 gsub
(/\\\\\\\\/, "\\\\", output
)
464 if(length
(comment
) > 0)
466 comment
= comment
"\\n"
469 tmp
=substr
(output
, 2)
471 comment
= comment
"#. " tmp
473 else if(output ~
/^
;$
/)
477 else if(length
(msgctxt
) > 0)
479 # remove any backslash at the end of line
480 gsub
(/\\\\$
/, "", output
)
482 # replace \e with \033
483 gsub
(/\\\\\\e
/, "\\\\033", output
)
485 # replace plain " with \" but make
486 # sure to check if \" is already there
487 gsub
(/\\\\"/, "\\"", output
) # replace \" with "
488 gsub
(/"/, "\\\\\\"", output
) # replace " with \"
491 gsub
(/\\\\\\\\/, "\\\\", output
)
493 # replace \033 with \\033
494 gsub
(/\\\\0/, "\\\\\\\\\\\\0", output
)
495 gsub
(/\\\\33/, "\\\\\\\\\\\\033", output
)
497 if(length
(msgid
) > 0)
502 msgid
= msgid
"\\"" output "\\""
507 if(length
(msgctxt
) == 0)
514 else if($0 ~
/^
;.
*$
/)
520 if(length
(comment
) > 0)
524 print
"msgctxt \\"" msgctxt "\\""
526 # the .po format doesn't allow empty msgid
527 # strings, thus lets escape them with <EMPTY>
528 if(length
(msgid
) <= 2)
530 print
"msgid \\"<EMPTY
>\\""
534 # find out if this msgid is a multiline msgid or not
546 print
"msgstr \\"" msgstr "\\""
557 # remove any backslash at the end of line
560 # replace \e with \033
561 gsub
(/\\\\\\e
/, "\\\\033")
563 # replace plain " with \" but make
564 # sure to check if \" is already there
565 gsub
(/\\\\"/, "\\"") # replace \" with "
566 gsub
(/"/, "\\\\\\"") # replace " with \"
568 # replace \033 with \\033
569 gsub
(/\\\\0/, "\\\\\\\\\\\\0")
570 gsub
(/\\\\33/, "\\\\\\\\\\\\033")
585 # the following is an awk script that converts a
586 # gettext PO-style translation file (.po) to an
587 # Amiga-style catalog translation file (.ct).
588 read -d '' po2ct
<< 'EOF'
598 if($0 ~
/^
#? version .*/)
600 version
=substr
($0, length
($1)+length
($2)+3)
603 else if($0 ~
/^
# \\$Id: .* \\$$/)
605 revision
=$4 # get revision out of $Id$ SVN keyword
608 else if($0 ~
/^
## language .*/)
613 else if($0 ~
/^
## codeset .*/)
618 else if($0 ~
/^
## chunk AUTH .*/)
620 auth
=substr
($0, length
($1)+length
($2)+length
($3)+4)
623 else if($0 ~
/^
"PO-Revision-Date: .*"/)
625 revdate
=substr
($0, length
($1)+2)
626 gsub
(/\\\\n
"/, "", revdate);
628 # parse the revision date
629 cmd="date +'%d.%m.%Y' -d \\"" revdate
"\\""
630 cmd | getline revdate
634 else if($0 ~ /^"Language
: .
*"/)
636 language=substr($0, length($1)+2)
637 gsub(/\\\\n"/, "", language
);
640 else if($0 ~
/^
"Language-Team: .*"/)
642 auth
=substr
($0, length
($1)+2)
643 gsub
(/\\\\n
"/, "", auth);
648 if($0 ~ /^msgctxt "MSG_.
*/)
658 print
"## version $VER: XXXX.catalog " version
"." revision
" (" revdate
")"
659 print
"## language " lang
660 print
"## codeset " cset
661 print
"## chunk AUTH " auth
669 # strip quotes (") so that we get the plain MSG_XXXX
674 else if(length($0) == 0 && length(tag) != 0)
680 if(length(msgstr) > 0)
693 if($0 ~ /^msgid ".
*/)
695 # get the msgid text only
696 tmp
=substr
($0, length
($1)+2)
698 # strip quotes (") from start&end
702 # replace \\033 with \033
703 gsub
(/\\\\\\\\0/, "\\\\0", tmp
)
704 gsub
(/\\\\\\\\33/, "\\\\033", tmp
)
708 if(length
(msgid
) > 0)
710 msgid
= msgid
"\\\\\\n; " tmp
721 else if($0 ~
/^msgstr
".*/)
723 # get the msgid text only
724 tmp=substr($0, length($1)+2)
726 # strip quotes (") from start
&end
730 # replace \\033 with \033
731 gsub
(/\\\\\\\\0/, "\\\\0", tmp
)
732 gsub
(/\\\\\\\\33/, "\\\\033", tmp
)
736 # replace "<EMPTY>" with ""
737 gsub
(/<EMPTY
>.
*/, "", tmp
)
739 if(length
(msgstr
) > 0)
741 msgstr
= msgstr
"\\\\\\n" tmp
752 else if(msgidfound
== 1)
754 # strip quotes (") from start&end
758 # replace \\033 with \033
759 gsub
(/\\\\\\\\0/, "\\\\0")
760 gsub
(/\\\\\\\\33/, "\\\\033")
764 if(length
(msgid
) > 0)
766 msgid
= msgid
"\\\\\\n; " $0
774 else if(msgstrfound
== 1)
776 # strip quotes (") from start&end
780 # replace \\033 with \033
781 gsub
(/\\\\\\\\0/, "\\\\0")
782 gsub
(/\\\\\\\\33/, "\\\\033")
786 if(length
(msgstr
) > 0)
788 msgstr
= msgstr
"\\\\\\n" $0
799 if(length
(tag
) != 0 && length
(msgstr
) > 0)
809 ###################################################
820 charset
="iso-8859-15"
823 charset
="iso-8859-16"
829 charset
="iso-8859-15"
832 charset
="iso-8859-15"
835 charset
="iso-8859-15"
838 charset
="iso-8859-15"
841 charset
="iso-8859-15"
847 charset
="iso-8859-16"
850 charset
="iso-8859-15"
853 charset
="iso-8859-15"
856 charset
="iso-8859-16"
859 charset
="windows-1251" # this should be "Amiga-1251" but iconv doesn't support it :(
862 charset
="iso-8859-16"
868 charset
="iso-8859-15"
871 charset
="iso-8859-15"
936 codeset
="2104" # 'Amiga-1251'
980 language
="nederlands"
983 language
="english-british"
1016 language
="português"
1019 language
="português-brasil"
1025 language
="slovensko"
1045 ###################################################
1049 # parse the command-line options
1050 while getopts "c:" opt
1053 c
) charset
="$OPTARG";;
1059 shift `expr $OPTIND - 1`
1061 if [ -z "${inputfile}" ]; then
1066 # lets identify by the file extension which operation to perform
1067 fname
=$
(basename "${inputfile}")
1068 filename
="${fname%.*}"
1069 extension
="${fname##*.}"
1070 case "${extension}" in
1071 cd) # convert from cd -> pot
1072 if [ -z "${charset}" ]; then
1073 charset
="iso-8859-1"
1075 iconv -c -f "${charset}" -t utf8 ${inputfile} | awk "${cd2pot}"
1077 ct
) # convert from ct -> po
1078 if [ -z "${charset}" ]; then
1079 charset
=$
(identifyCharset
${filename})
1081 iconv -c -f ${charset} -t utf8 ${inputfile} | awk "${ct2po}"
1083 po) # convert from po -> ct
1084 if [ -z "${charset}" ]; then
1085 charset=$(identifyCharset ${filename})
1086 codeset=$(identifyCodeset ${filename})
1088 lang=$(identifyLanguage ${filename})
1089 awk -v lang=${lang} -v cset=${codeset} "${po2ct}" ${inputfile} | iconv -c -f utf8 -t ${charset}
1091 pot) # convert from pot -> cd
1092 if [ -z "${charset}" ]; then
1093 charset="iso-8859-1
"
1095 awk "${pot2cd}" ${inputfile} | iconv -c -f utf8 -t ${charset}