4 * Copyright (C) 1993-1999 Jochen Wiedmann and Marcin Orlowski
5 * Copyright (C) 2002-2014 FlexCat Open Source Team
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or (at
10 * your option) any later version.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 #include <proto/locale.h> /* This is to get locale.library/IsAlpha() */
32 #include "showfuncs.h"
33 #include "readprefs.h"
36 #include "createcat.h"
38 extern char *CatVersionString
;
39 extern char *CatLanguage
;
40 extern char *CatRcsId
;
43 extern int CT_Scanned
;
45 #define IS_NUMBER_OR_LETTER(c) (((c) >= '0' && (c) <= '9') || \
46 ((c) >= 'a' && (c) <= 'z') || \
47 ((c) >= 'A' && (c) <= 'Z'))
49 #if defined(__amigaos3__) || defined(__MORPHOS__) || defined(WIN32) || defined(unix)
50 char *strptime(const char *string
, const char *fmt
, struct tm
*res
);
55 /* This function scans a PO-style format catalog description/translation file.
57 Inputs: pofile - name of the description/translation file to scan.
58 Result: TRUE if successful, FALSE otherwise.
60 int ScanPOFile(char *pofile
)
65 int CodeSet_checked
= FALSE
;
66 int revision_found
= FALSE
;
69 const char *PoSrcCharset
= "utf-8";
70 const char *CatDstCharset
= "iso-8859-1";
71 char CatVersionDate
[255] = "";
72 char CatProjectName
[255] = "";
73 struct CatString
*cs
= NULL
;
74 struct CatString
**csptr
= &FirstCatString
;
80 if((fp
= fopen(pofile
, "r")) == NULL
)
81 ShowErrorQuick(MSG_ERR_NOCATALOGTRANSLATION
, pofile
);
84 setvbuf(fp
, NULL
, _IOFBF
, buffer_size
);
86 while(!feof(fp
) && (line
= newline
= ReadLine(fp
, TRUE
)) != NULL
)
94 // we found the end of the header so lets check if we have all
95 // we require to continue
96 if(CatVersion
> 0 && CatVersionDate
[0] != '\0' && CatProjectName
[0] != '\0' &&
97 CatVersionString
== NULL
)
101 // warn about missing revision information
103 ShowWarn(MSG_ERR_NO_CAT_REVISION
);
105 if(strstr(CatProjectName
, ".catalog") != NULL
)
106 snprintf(buf
, sizeof(buf
), "$VER: %s %d.%d (%s)", CatProjectName
, CatVersion
, CatRevision
, CatVersionDate
);
108 snprintf(buf
, sizeof(buf
), "$VER: %s.catalog %d.%d (%s)", CatProjectName
, CatVersion
, CatRevision
, CatVersionDate
);
109 CatVersionString
= AllocString(buf
);
116 // comment lines start with #
117 // but they may contain some valueable information for catalog
118 // file creation. So lets parse these lines as well
119 while(*line
== '#' || *line
== ' ' || *line
== '\t')
122 if(Strnicmp(line
, "version", 7) == 0)
126 CatVersion
= strtol(line
, &line
, 0);
128 else if(Strnicmp(line
, "revision", 8) == 0)
132 CatRevision
= strtol(line
, &line
, 0);
133 revision_found
= TRUE
;
135 else if(revision_found
== FALSE
&&
136 Strnicmp(line
, "$Id: ", 5) == 0)
143 // search second space
148 CatRevision
= strtol(p
, &p
, 0);
151 else if(revision_found
== FALSE
&&
152 Strnicmp(line
, "$Revision: ", 11) == 0)
155 CatRevision
= strtol(line
, &line
, 0);
162 if(Strnicmp(line
, "\"Language: ", 11) == 0)
165 const char *language
= NULL
;
168 ShowError(MSG_ERR_DOUBLECTLANGUAGE
);
171 p
= strchr(line
, '\\');
175 if(Stricmp(line
, "bs") == 0) // bosnian
177 language
= "bosanski";
178 CatDstCharset
= "iso-8859-2";
180 else if(Stricmp(line
, "ca") == 0) // catalan
182 language
= "catalĂ ";
183 CatDstCharset
= "iso-8859-15";
185 else if(Stricmp(line
, "hr") == 0) // croatian
187 language
= "hrvatski";
188 CatDstCharset
= "iso-8859-16";
190 else if(Stricmp(line
, "cs") == 0) // czech
193 CatDstCharset
= "iso-8859-2";
195 else if(Stricmp(line
, "da") == 0) // danish
198 CatDstCharset
= "iso-8859-15";
200 else if(Stricmp(line
, "nl") == 0) // dutch
202 language
= "nederlands";
203 CatDstCharset
= "iso-8859-15";
205 else if(Stricmp(line
, "en_GB") == 0) // english-british
206 language
= "english-british";
207 else if(Stricmp(line
, "fi") == 0) // finnish
210 CatDstCharset
= "iso-8859-15";
212 else if(Stricmp(line
, "fr") == 0) // french
214 language
= "français";
215 CatDstCharset
= "iso-8859-15";
217 else if(Stricmp(line
, "de") == 0) // german
219 language
= "deutsch";
220 CatDstCharset
= "iso-8859-15";
222 else if(Stricmp(line
, "el") == 0) // greek
225 CatDstCharset
= "iso-8859-7";
227 else if(Stricmp(line
, "hu") == 0) // hungarian
230 CatDstCharset
= "iso-8859-16";
232 else if(Stricmp(line
, "it") == 0) // italian
234 language
= "italiano";
235 CatDstCharset
= "iso-8859-15";
237 else if(Stricmp(line
, "ja") == 0) // japanese
239 language
= "nihongo";
240 CatDstCharset
= "euc-jp";
242 else if(Stricmp(line
, "ko") == 0) // korean
245 CatDstCharset
= "euc-kr";
247 else if(Stricmp(line
, "no") == 0) // norwegian
250 CatDstCharset
= "iso-8859-15";
252 else if(Stricmp(line
, "fa") == 0) // persian
255 CatDstCharset
= "utf-8";
257 else if(Stricmp(line
, "pl") == 0) // polish
260 CatDstCharset
= "iso-8859-16";
262 else if(Stricmp(line
, "pt") == 0) // portuguese
263 language
= "portuguĂŞs";
264 else if(Stricmp(line
, "pt_BR") == 0) // portuguese-brazil
265 language
= "portuguĂŞs-brasil";
266 else if(Stricmp(line
, "ru") == 0) // russian
268 language
= "russian";
270 CatDstCharset
= "Amiga-1251";
272 CatDstCharset
= "windows-1251"; // iconv doesn't know anything about Amiga-1251 :(
275 else if(Stricmp(line
, "sr") == 0) // serbian
278 CatDstCharset
= "iso-8859-16";
280 else if(Stricmp(line
, "sl") == 0) // slovenian
282 language
= "slovensko";
283 CatDstCharset
= "iso-8859-2";
285 else if(Stricmp(line
, "es") == 0) // spanish
287 language
= "español";
288 CatDstCharset
= "iso-8859-15";
290 else if(Stricmp(line
, "sv") == 0) // swedish
292 language
= "svenska";
293 CatDstCharset
= "iso-8859-15";
295 else if(Stricmp(line
, "tr") == 0) // turkish
297 language
= "türkçe";
298 CatDstCharset
= "iso-8859-9";
302 CatLanguage
= AddCatalogChunk(strdup("LANG"), language
);
304 else if(Strnicmp(line
, "\"Language-Team: ", 16) == 0)
309 p
= strchr(line
, '\\');
313 AddCatalogChunk(strdup("AUTH"), line
);
315 else if(CodeSet_checked
== FALSE
&&
316 Strnicmp(line
, "\"Content-Type: ", 15) == 0)
321 p
= strstr(line
, "charset=");
332 PoSrcCharset
= strdup(p
);
335 CodeSet_checked
= TRUE
;
337 else if(Strnicmp(line
, "\"PO-Revision-Date: ", 19) == 0)
342 memset(&tm
, 0, sizeof(tm
));
343 strptime(line
, "%Y-%m-%d", &tm
);
344 strftime(CatVersionDate
, sizeof(CatVersionDate
), "%d.%m.%Y", &tm
);
346 else if(Strnicmp(line
, "\"Catalog-Name: ", 15) == 0)
351 p
= strchr(line
, '\\');
355 strcpy(CatProjectName
, line
);
357 else if(Strnicmp(line
, "\"Project-Id-Version: ", 21) == 0 && CatProjectName
[0] == '\0')
359 // fall back to the project ID as catalog name if it is not yet defined
363 p
= strchr(line
, '\\');
367 strcpy(CatProjectName
, line
);
375 // check if we found a line starting with "msgctxt" as that signals us
376 // a new catalog string should be added
377 if(Strnicmp(line
, "msgctxt \"", 9) == 0)
381 // we found a new 'msgctxt' lets clear cs
388 /* Check for blanks at the start of line. */
389 if(*line
== ' ' || *line
== '\t')
391 ShowError(MSG_ERR_UNEXPECTEDBLANKS
);
396 while(IS_NUMBER_OR_LETTER(*line
) || *line
== '_')
401 ShowError(MSG_ERR_NOIDENTIFIER
);
408 if((cs
= malloc(sizeof(*cs
))) == NULL
)
411 // search for the next catstring ID in case the ID
412 // specifier is missing "(//)" in the msgctxt
415 struct CatString
*scs
;
418 for(scs
= FirstCatString
; scs
!= NULL
; scs
= scs
->Next
)
420 if(scs
->ID
== NextID
)
428 while(found
== FALSE
);
434 cs
->CD_Str
= (char *)"";
439 if((cs
->ID_Str
= malloc((line
- idstr
) + 1)) == NULL
)
442 strncpy(cs
->ID_Str
, idstr
, line
- idstr
);
443 cs
->ID_Str
[line
- idstr
] = '\0';
446 /* Check if next char in line is '('? (//) */
449 ShowError(MSG_ERR_NO_LEADING_BRACKET
, cs
->ID_Str
);
454 struct CatString
*scs
;
459 /* Check for default config of line (//) */
463 NextID
= cs
->ID
= NextID
+ strtol(line
, &line
, 0);
464 else if(*line
== '$')
467 cs
->ID
= NextID
= strtol(line
, &line
, 16);
470 cs
->ID
= NextID
= strtol(line
, &line
, 0);
475 /* Check for already used identifier. */
476 for(scs
= FirstCatString
; scs
!= NULL
; scs
= scs
->Next
)
478 if(scs
->ID
== cs
->ID
)
480 ShowError(MSG_ERR_DOUBLE_ID
, cs
->ID_Str
);
483 if(strcmp(cs
->ID_Str
, scs
->ID_Str
) == 0)
485 ShowError(MSG_ERR_DOUBLE_IDENTIFIER
, cs
->ID_Str
);
490 /* Check for min/len values (//) */
493 ShowWarn(MSG_ERR_NO_MIN_LEN
, cs
->ID_Str
);
502 cs
->MinLen
= strtol(line
, &line
, 0);
507 ShowWarn(MSG_ERR_NO_MAX_LEN
, cs
->ID_Str
);
516 cs
->MaxLen
= strtol(line
, &line
, 0);
521 ShowError(MSG_ERR_NO_TRAILING_BRACKET
, cs
->ID_Str
);
528 if(*line
&& *line
!= '\"')
529 ShowError(MSG_ERR_EXTRA_CHARACTERS_ID
, cs
->ID_Str
);
535 //printf("ID_Str: '%s' (%d)\n", cs->ID_Str, cs->ID);
548 // if the user want to force a certain output (destination)
549 // codeset we set it here.
550 if(DestCodeset
[0] != '\0')
551 CatDstCharset
= DestCodeset
;
553 // Make sure double backslashes end up in a single backslash.
554 // We catch any double backslash followed by a zero character,
555 // which covers strings like "\\0" and "\\033" or "\\33" as these are
556 // common strings in MUI applications.
557 while((p
= strstr(line
, "\\\\0")) != NULL
|| (p
= strstr(line
, "\\\\33")) != NULL
)
558 memmove(p
, p
+1, strlen(p
));
560 // unquote the string
561 if(line
[strlen(line
)-1] == '"')
562 line
[strlen(line
)-1] = '\0';
564 if(Strnicmp(line
, "msgid \"", 7) == 0)
568 // if the string starts with <EMPTY> we out to remove
569 // the rest of the string!
570 if(strncmp(line
, "<EMPTY>", 7) == 0)
574 cs
->CD_Str
= ConvertString(line
, PoSrcCharset
, CatDstCharset
);
577 cs
->CD_Str
= malloc(1);
578 cs
->CD_Str
[0] = '\0';
581 //printf("CD_Str: '%s' '%s'\n", cs->CD_Str, line);
586 else if(Strnicmp(line
, "msgstr \"", 8) == 0)
591 cs
->CT_Str
= ConvertString(line
, PoSrcCharset
, CatDstCharset
);
594 cs
->CT_Str
= malloc(1);
595 cs
->CT_Str
[0] = '\0';
600 //printf("CT_Str: '%s'\n", cs->CT_Str);
605 else if(*line
== '"') // line starts with "
611 char *t
= ConvertString(line
, PoSrcCharset
, CatDstCharset
);
613 cs
->CD_Str
= AddString(cs
->CD_Str
, t
);
615 //printf("CD_Str2: '%s' '%s'\n", cs->CD_Str, line);
619 else if(inMsgSTR
== TRUE
)
621 char *t
= ConvertString(line
, PoSrcCharset
, CatDstCharset
);
623 cs
->CT_Str
= AddString(cs
->CT_Str
, t
);
625 //printf("CT_Str2: '%s' '%s'\n", cs->CT_Str, line);
635 printf("CatVersion: %d.%d\n", CatVersion, CatRevision);
636 printf("CatVersionDate: '%s'\n", CatVersionDate);
637 printf("CatVersionString: '%s'\n", CatVersionString);
638 printf("CatLanguage: '%s'\n", CatLanguage);
639 printf("PoSrcCharset: '%s'\n", PoSrcCharset);
640 printf("CatDstCharset: '%s'\n", CatDstCharset);
644 ShowErrorQuick(MSG_ERR_NOCTCODESET
);
646 if(!(CatVersionString
|| (CatRcsId
&& CatName
)))
647 ShowErrorQuick(MSG_ERR_NOCTVERSION
);
649 // lets translate CatDstCharset to CodeSet number
650 if(Stricmp(CatDstCharset
, "iso-8859-1") == 0)
652 else if(Stricmp(CatDstCharset
, "iso-8859-2") == 0)
654 else if(Stricmp(CatDstCharset
, "iso-8859-7") == 0)
656 else if(Stricmp(CatDstCharset
, "iso-8859-9") == 0)
658 else if(Stricmp(CatDstCharset
, "utf-8") == 0 || Stricmp(CatDstCharset
, "utf8") == 0)
660 else if(Stricmp(CatDstCharset
, "iso-8859-15") == 0)
662 else if(Stricmp(CatDstCharset
, "iso-8859-16") == 0)
664 else if(Stricmp(CatDstCharset
, "amiga-1251") == 0 || Stricmp(CatDstCharset
, "windows-1251"))
669 // check consistenty of translations found
670 for(cs
= FirstCatString
; cs
!= NULL
; cs
= cs
->Next
)
672 if(cs
->CT_Str
== NULL
)
673 ShowWarnQuick(MSG_ERR_MISSINGTRANSLATION
, cs
->ID_Str
);
679 /* Get string length */
680 reallen
= strlen(cs
->CT_Str
);
681 cd_len
= strlen(cs
->CD_Str
);
683 // check for empty translations
688 // for .po files empty strings are really missing translations
689 ShowWarnQuick(MSG_ERR_MISSINGTRANSLATION
, cs
->ID_Str
);
691 // now remove the cs from the list
695 else if(strcmp(cs
->CT_Str
, "<EMPTY>") == 0)
697 // string should be intentionally empty
698 cs
->CT_Str
[0] = '\0';
702 if(cs
->MinLen
> 0 && reallen
< (size_t)cs
->MinLen
)
703 ShowWarnQuick(MSG_ERR_STRING_TOO_SHORT
, cs
->ID_Str
);
705 if(cs
->MaxLen
> 0 && reallen
> (size_t)cs
->MaxLen
)
706 ShowWarnQuick(MSG_ERR_STRING_TOO_LONG
, cs
->ID_Str
);
708 /* Check for trailing ellipsis. */
709 if(reallen
>= 3 && cd_len
>= 3)
711 if(strcmp(&cs
->CD_Str
[cd_len
- 3], "...") == 0 &&
712 strcmp(&cs
->CT_Str
[reallen
- 3], "...") != 0)
714 ShowWarnQuick(MSG_ERR_TRAILING_ELLIPSIS
, cs
->ID_Str
);
717 if(strcmp(&cs
->CD_Str
[cd_len
- 3], "...") != 0 &&
718 strcmp(&cs
->CT_Str
[reallen
- 3], "...") == 0)
720 ShowWarnQuick(MSG_ERR_NO_TRAILING_ELLIPSIS
, cs
->ID_Str
);
724 /* Check for trailing spaces. */
725 if(reallen
>= 1 && cd_len
>= 1)
727 if(strcmp(&cs
->CD_Str
[cd_len
- 1], " ") == 0 &&
728 strcmp(&cs
->CT_Str
[reallen
- 1], " ") != 0)
731 ShowWarnQuick(MSG_ERR_TRAILING_BLANKS
, cs
->ID_Str
);
734 if(strcmp(&cs
->CD_Str
[cd_len
- 1], " ") != 0 &&
735 strcmp(&cs
->CT_Str
[reallen
- 1], " ") == 0)
738 ShowWarnQuick(MSG_ERR_NO_TRAILING_BLANKS
, cs
->ID_Str
);
742 /* Check for matching placeholders */
743 if(reallen
>= 1 && cd_len
>= 1)
745 char *cdP
= cs
->CD_Str
;
746 char *ctP
= cs
->CT_Str
;
750 cdP
= strchr(cdP
, '%');
751 ctP
= strchr(ctP
, '%');
753 if(cdP
== NULL
&& ctP
== NULL
)
755 // no more placeholders, bail out
758 else if(cdP
!= NULL
&& ctP
!= NULL
)
764 // check the placeholder only if the '%' is followed by an
765 // alpha-numerical character or another percent sign
766 if(IS_NUMBER_OR_LETTER(*cdP
) || *cdP
== '%')
770 ShowWarnQuick(MSG_ERR_MISMATCHING_PLACEHOLDERS
, cs
->ID_Str
);
775 // skip the second '%' sign
781 else if(IS_NUMBER_OR_LETTER(*ctP
) || *ctP
== '%')
783 // the translation uses a placeholder while the description
785 ShowWarnQuick(MSG_ERR_EXCESSIVE_PLACEHOLDERS
, cs
->ID_Str
);
790 else if(cdP
!= NULL
&& ctP
== NULL
)
795 // check if really a placeholder follows or just another percent sign
796 // the original string is allowed to contain more single percent signs than the translated string
797 if(IS_NUMBER_OR_LETTER(*cdP
) || *cdP
== '%')
799 // the description uses at least one more placeholder than the translation
800 ShowWarnQuick(MSG_ERR_MISSING_PLACEHOLDERS
, cs
->ID_Str
);
805 else if(cdP
== NULL
&& ctP
!= NULL
)
810 // check if really a placeholder follows or just another percent sign
811 // the translated string is allowed to contain more single percent signs than the original string
812 if(IS_NUMBER_OR_LETTER(*ctP
) || *ctP
== '%')
814 // the translation uses at least one more placeholder than the description
815 ShowWarnQuick(MSG_ERR_EXCESSIVE_PLACEHOLDERS
, cs
->ID_Str
);
833 for(cs
= FirstCatString
; cs
!= NULL
; cs
= cs
->Next
)
835 if(cs
->CT_Str
== NULL
)
837 ShowWarn(MSG_ERR_CTGAP
, cs
->ID_Str
);