4 * Copyright (C) 1993-1999 Jochen Wiedmann and Marcin Orlowski
5 * Copyright (C) 2002-2017 FlexCat Open Source Team
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or (at
10 * your option) any later version.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 #include <proto/locale.h> /* This is to get locale.library/IsAlpha() */
32 #include "showfuncs.h"
33 #include "readprefs.h"
36 #include "createcat.h"
38 extern char *CatVersionString
;
39 extern char *CatLanguage
;
40 extern char *CatRcsId
;
43 extern int CT_Scanned
;
45 #define IS_NUMBER_OR_LETTER(c) (((c) >= '0' && (c) <= '9') || \
46 ((c) >= 'a' && (c) <= 'z') || \
47 ((c) >= 'A' && (c) <= 'Z'))
49 #if defined(__amigaos3__) || defined(__MORPHOS__) || defined(WIN32)
50 char *strptime(const char *string
, const char *fmt
, struct tm
*res
);
55 /* This function scans a PO-style format catalog description/translation file.
57 Inputs: pofile - name of the description/translation file to scan.
58 Result: TRUE if successful, FALSE otherwise.
60 int ScanPOFile(char *pofile
, int verwarning
)
65 int CodeSet_checked
= FALSE
;
68 const char *PoSrcCharset
= "utf-8";
69 const char *CatDstCharset
= "iso-8859-1";
70 char CatVersionDate
[255] = "";
71 char CatProjectName
[255] = "";
72 struct CatString
*cs
= NULL
;
73 struct CatString
**csptr
= &FirstCatString
;
79 if((fp
= fopen(pofile
, "r")) == NULL
)
80 ShowErrorQuick(MSG_ERR_NOCATALOGTRANSLATION
, pofile
);
83 setvbuf(fp
, NULL
, _IOFBF
, buffer_size
);
85 // initialize "readLineBuffer" ahead of the loop
86 // the loop will bail out early for empty files
87 readLineBuffer
= NULL
;
88 while(!feof(fp
) && (readLineBuffer
= ReadLine(fp
, TRUE
)) != NULL
)
90 // the buffer pointer will be modified all the way down, so better work with a copy,
91 // otherwise the free() call after the loop will free the wrong pointer
92 char *line
= readLineBuffer
;
100 // check that we have a valid verson
104 ShowWarn(MSG_ERR_NO_CAT_VERSION
);
109 // we found the end of the header so lets check if we have all
110 // we require to continue
111 if(CatVersionDate
[0] != '\0' && CatProjectName
[0] != '\0' &&
112 CatVersionString
== NULL
)
116 // warn about missing revision information
117 if(CatRevision
== -1)
120 ShowWarn(MSG_ERR_NO_CAT_REVISION
);
125 if(strstr(CatProjectName
, ".catalog") != NULL
)
126 snprintf(buf
, sizeof(buf
), "$VER: %s %d.%d (%s)", CatProjectName
, CatVersion
, CatRevision
, CatVersionDate
);
128 snprintf(buf
, sizeof(buf
), "$VER: %s.catalog %d.%d (%s)", CatProjectName
, CatVersion
, CatRevision
, CatVersionDate
);
130 CatVersionString
= AllocString(buf
);
137 // comment lines start with #
138 // but they may contain some valueable information for catalog
139 // file creation. So lets parse these lines as well
140 while(*line
== '#' || *line
== ' ' || *line
== '\t')
143 if(CatVersion
== -1 && Strnicmp(line
, "version", 7) == 0)
147 CatVersion
= strtol(line
, &line
, 0);
149 else if(CatRevision
== -1 && Strnicmp(line
, "revision", 8) == 0)
153 CatRevision
= strtol(line
, &line
, 0);
155 else if(CatRevision
== -1 &&
156 Strnicmp(line
, "$Id: ", 5) == 0)
163 // search second space
168 CatRevision
= strtol(p
, &p
, 0);
171 else if(CatRevision
== -1 &&
172 Strnicmp(line
, "$Revision: ", 11) == 0)
175 CatRevision
= strtol(line
, &line
, 0);
182 if(Strnicmp(line
, "\"Language: ", 11) == 0)
185 const char *language
= NULL
;
188 ShowError(MSG_ERR_DOUBLECTLANGUAGE
);
191 p
= strchr(line
, '\\');
195 if(Stricmp(line
, "bs") == 0) // bosnian
197 language
= "bosanski";
198 CatDstCharset
= "iso-8859-2";
200 else if(Stricmp(line
, "ca") == 0) // catalan
202 language
= "catal\xE0";
203 CatDstCharset
= "iso-8859-15";
205 else if(Stricmp(line
, "hr") == 0) // croatian
207 language
= "hrvatski";
208 CatDstCharset
= "iso-8859-16";
210 else if(Stricmp(line
, "cs") == 0) // czech
213 CatDstCharset
= "iso-8859-2";
215 else if(Stricmp(line
, "da") == 0) // danish
218 CatDstCharset
= "iso-8859-15";
220 else if(Stricmp(line
, "nl") == 0) // dutch
222 language
= "nederlands";
223 CatDstCharset
= "iso-8859-15";
225 else if(Stricmp(line
, "en_GB") == 0) // english-british
226 language
= "english-british";
227 else if(Stricmp(line
, "fi") == 0) // finnish
230 CatDstCharset
= "iso-8859-15";
232 else if(Stricmp(line
, "fr") == 0) // french
234 language
= "fran""\xE7""ais";
235 CatDstCharset
= "iso-8859-15";
237 else if(Stricmp(line
, "de") == 0) // german
239 language
= "deutsch";
240 CatDstCharset
= "iso-8859-15";
242 else if(Stricmp(line
, "el") == 0) // greek
245 CatDstCharset
= "iso-8859-7";
247 else if(Stricmp(line
, "hu") == 0) // hungarian
250 CatDstCharset
= "iso-8859-16";
252 else if(Stricmp(line
, "it") == 0) // italian
254 language
= "italiano";
255 CatDstCharset
= "iso-8859-15";
257 else if(Stricmp(line
, "ja") == 0) // japanese
259 language
= "nihongo";
260 CatDstCharset
= "euc-jp";
262 else if(Stricmp(line
, "ko") == 0) // korean
265 CatDstCharset
= "euc-kr";
267 else if(Stricmp(line
, "no") == 0) // norwegian
270 CatDstCharset
= "iso-8859-15";
272 else if(Stricmp(line
, "fa") == 0) // persian
275 CatDstCharset
= "utf-8";
277 else if(Stricmp(line
, "pl") == 0) // polish
280 CatDstCharset
= "iso-8859-2";
282 else if(Stricmp(line
, "pt") == 0) // portuguese
283 language
= "portugu""\xEA""s";
284 else if(Stricmp(line
, "pt_BR") == 0) // portuguese-brazil
285 language
= "portugu""\xEA""s-brasil";
286 else if(Stricmp(line
, "ru") == 0) // russian
288 language
= "russian";
290 CatDstCharset
= "Amiga-1251";
292 CatDstCharset
= "windows-1251"; // iconv doesn't know anything about Amiga-1251 :(
295 else if(Stricmp(line
, "sr") == 0) // serbian
298 CatDstCharset
= "iso-8859-16";
300 else if(Stricmp(line
, "sk") == 0) // slovakian
303 CatDstCharset
= "iso-8859-2";
305 else if(Stricmp(line
, "sl") == 0) // slovenian
307 language
= "slovensko";
308 CatDstCharset
= "iso-8859-2";
310 else if(Stricmp(line
, "es") == 0) // spanish
312 language
= "espa""\xF1""ol";
313 CatDstCharset
= "iso-8859-15";
315 else if(Stricmp(line
, "sv") == 0) // swedish
317 language
= "svenska";
318 CatDstCharset
= "iso-8859-15";
320 else if(Stricmp(line
, "tr") == 0) // turkish
322 language
= "t""\xFC""rk""\xE7""e";
323 CatDstCharset
= "iso-8859-9";
327 CatLanguage
= AddCatalogChunk(strdup("LANG"), language
);
329 else if(Strnicmp(line
, "\"Language-Team: ", 16) == 0)
334 p
= strchr(line
, '\\');
338 AddCatalogChunk(strdup("AUTH"), line
);
340 else if(CodeSet_checked
== FALSE
&&
341 Strnicmp(line
, "\"Content-Type: ", 15) == 0)
346 p
= strstr(line
, "charset=");
357 PoSrcCharset
= strdup(p
);
360 CodeSet_checked
= TRUE
;
362 else if(Strnicmp(line
, "\"PO-Revision-Date: ", 19) == 0)
367 memset(&tm
, 0, sizeof(tm
));
368 strptime(line
, "%Y-%m-%d", &tm
);
369 strftime(CatVersionDate
, sizeof(CatVersionDate
), "%d.%m.%Y", &tm
);
371 else if(Strnicmp(line
, "\"Catalog-Name: ", 15) == 0)
376 p
= strchr(line
, '\\');
380 strcpy(CatProjectName
, line
);
382 else if(Strnicmp(line
, "\"Project-Id-Version: ", 21) == 0 && CatProjectName
[0] == '\0')
384 // fall back to the project ID as catalog name if it is not yet defined
388 p
= strchr(line
, '\\');
392 strcpy(CatProjectName
, line
);
400 // check if we found a line starting with "msgctxt" as that signals us
401 // a new catalog string should be added
402 if(Strnicmp(line
, "msgctxt \"", 9) == 0)
406 // we found a new 'msgctxt' lets clear cs
413 /* Check for blanks at the start of line. */
414 if(*line
== ' ' || *line
== '\t')
416 ShowError(MSG_ERR_UNEXPECTEDBLANKS
);
421 while(IS_NUMBER_OR_LETTER(*line
) || *line
== '_')
426 ShowError(MSG_ERR_NOIDENTIFIER
);
433 if((cs
= malloc(sizeof(*cs
))) == NULL
)
436 // search for the next catstring ID in case the ID
437 // specifier is missing "(//)" in the msgctxt
440 struct CatString
*scs
;
443 for(scs
= FirstCatString
; scs
!= NULL
; scs
= scs
->Next
)
445 if(scs
->ID
== NextID
)
453 while(found
== FALSE
);
459 cs
->CD_Str
= (char *)"";
464 if((cs
->ID_Str
= malloc((line
- idstr
) + 1)) == NULL
)
467 strncpy(cs
->ID_Str
, idstr
, line
- idstr
);
468 cs
->ID_Str
[line
- idstr
] = '\0';
471 /* Check if next char in line is '('? (//) */
474 ShowError(MSG_ERR_NO_LEADING_BRACKET
, cs
->ID_Str
);
479 struct CatString
*scs
;
484 /* Check for default config of line (//) */
488 NextID
= cs
->ID
= NextID
+ strtol(line
, &line
, 0);
489 else if(*line
== '$')
492 cs
->ID
= NextID
= strtol(line
, &line
, 16);
495 cs
->ID
= NextID
= strtol(line
, &line
, 0);
500 /* Check for already used identifier. */
501 for(scs
= FirstCatString
; scs
!= NULL
; scs
= scs
->Next
)
503 if(scs
->ID
== cs
->ID
)
505 ShowError(MSG_ERR_DOUBLE_ID
, cs
->ID_Str
);
508 if(strcmp(cs
->ID_Str
, scs
->ID_Str
) == 0)
510 ShowError(MSG_ERR_DOUBLE_IDENTIFIER
, cs
->ID_Str
);
515 /* Check for min/len values (//) */
518 ShowWarn(MSG_ERR_NO_MIN_LEN
, cs
->ID_Str
);
527 cs
->MinLen
= strtol(line
, &line
, 0);
532 ShowWarn(MSG_ERR_NO_MAX_LEN
, cs
->ID_Str
);
541 cs
->MaxLen
= strtol(line
, &line
, 0);
546 ShowError(MSG_ERR_NO_TRAILING_BRACKET
, cs
->ID_Str
);
553 if(*line
&& *line
!= '\"')
554 ShowError(MSG_ERR_EXTRA_CHARACTERS_ID
, cs
->ID_Str
);
560 //printf("ID_Str: '%s' (%d)\n", cs->ID_Str, cs->ID);
573 // if the user want to force a certain output (destination)
574 // codeset we set it here.
575 if(DestCodeset
[0] != '\0')
576 CatDstCharset
= DestCodeset
;
578 // Make sure double backslashes end up in a single backslash.
579 // We catch any double backslash followed by a zero character,
580 // which covers strings like "\\0" and "\\033" or "\\33" as these are
581 // common strings in MUI applications.
582 while((p
= strstr(line
, "\\\\0")) != NULL
|| (p
= strstr(line
, "\\\\33")) != NULL
)
583 memmove(p
, p
+1, strlen(p
));
585 // unquote the string
586 if(line
[0] != '\0' && line
[strlen(line
)-1] == '"')
587 line
[strlen(line
)-1] = '\0';
589 if(Strnicmp(line
, "msgid \"", 7) == 0)
593 // if the string starts with <EMPTY> we out to remove
594 // the rest of the string!
595 if(strncmp(line
, "<EMPTY>", 7) == 0)
600 if((cs
->CD_Str
= ConvertString(line
, PoSrcCharset
, CatDstCharset
)) == NULL
)
601 ShowWarn(MSG_ERR_CONVERSION_FAILED
, cs
->ID_Str
);
605 cs
->CD_Str
= malloc(1);
606 cs
->CD_Str
[0] = '\0';
609 //printf("CD_Str: '%s' '%s'\n", cs->CD_Str, line);
614 else if(Strnicmp(line
, "msgstr \"", 8) == 0)
618 // don't search for "<EMPTY>" here, this will be done later
619 // during all other checks. If the string would be erased here
620 // we would be no longer able to tell it apart from really
621 // missing translations.
624 if((cs
->CT_Str
= ConvertString(line
, PoSrcCharset
, CatDstCharset
)) == NULL
)
625 ShowWarn(MSG_ERR_CONVERSION_FAILED
, cs
->ID_Str
);
629 cs
->CT_Str
= malloc(1);
630 cs
->CT_Str
[0] = '\0';
635 //printf("CT_Str: '%s'\n", cs->CT_Str);
640 else if(*line
== '"') // line starts with "
648 if((t
= ConvertString(line
, PoSrcCharset
, CatDstCharset
)) == NULL
)
649 ShowWarn(MSG_ERR_CONVERSION_FAILED
, cs
->ID_Str
);
651 cs
->CD_Str
= AddString(cs
->CD_Str
, t
);
653 //printf("CD_Str2: '%s' '%s'\n", cs->CD_Str, line);
657 else if(inMsgSTR
== TRUE
)
661 if((t
= ConvertString(line
, PoSrcCharset
, CatDstCharset
)) == NULL
)
662 ShowWarn(MSG_ERR_CONVERSION_FAILED
, cs
->ID_Str
);
664 cs
->CT_Str
= AddString(cs
->CT_Str
, t
);
666 //printf("CT_Str2: '%s' '%s'\n", cs->CT_Str, line);
675 free(readLineBuffer
);
679 printf("CatVersion: %d.%d\n", CatVersion, CatRevision);
680 printf("CatVersionDate: '%s'\n", CatVersionDate);
681 printf("CatVersionString: '%s'\n", CatVersionString);
682 printf("CatLanguage: '%s'\n", CatLanguage);
683 printf("PoSrcCharset: '%s'\n", PoSrcCharset);
684 printf("CatDstCharset: '%s'\n", CatDstCharset);
688 ShowErrorQuick(MSG_ERR_NOCTCODESET
);
690 if(!(CatVersionString
|| (CatRcsId
&& CatName
)))
691 ShowErrorQuick(MSG_ERR_NOCTVERSION
);
693 // lets translate CatDstCharset to CodeSet number
694 if(Stricmp(CatDstCharset
, "iso-8859-1") == 0)
696 else if(Stricmp(CatDstCharset
, "iso-8859-2") == 0)
698 else if(Stricmp(CatDstCharset
, "iso-8859-7") == 0)
700 else if(Stricmp(CatDstCharset
, "iso-8859-9") == 0)
702 else if(Stricmp(CatDstCharset
, "utf-8") == 0 || Stricmp(CatDstCharset
, "utf8") == 0)
704 else if(Stricmp(CatDstCharset
, "iso-8859-15") == 0)
706 else if(Stricmp(CatDstCharset
, "iso-8859-16") == 0)
708 else if(Stricmp(CatDstCharset
, "amiga-1251") == 0 || Stricmp(CatDstCharset
, "windows-1251") == 0)
713 // check consistenty of translations found
714 for(cs
= FirstCatString
; cs
!= NULL
; cs
= cs
->Next
)
716 if(cs
->CT_Str
== NULL
)
717 ShowWarnQuick(MSG_ERR_MISSINGTRANSLATION
, cs
->ID_Str
);
724 // get string length for both ASCII and UTF8 encoding
725 // the length check must be done against the UTF8 length,
726 // which might be less than the ASCII length due to certain
727 // UTF8 characters which are encoded with up to 3 ASCII
729 reallen
= strlen(cs
->CT_Str
);
730 reallen_utf8
= utf8_strlen(cs
->CT_Str
);
731 cd_len
= strlen(cs
->CD_Str
);
733 // check for empty translations
738 // for .po files empty strings are really missing translations
739 ShowWarnQuick(MSG_ERR_MISSINGTRANSLATION
, cs
->ID_Str
);
741 // now remove the cs from the list
747 // check for intentionally empty translations
748 if(strncmp(cs
->CT_Str
, "<EMPTY>", 7) == 0)
749 cs
->CT_Str
[0] = '\0';
751 if(cs
->MinLen
> 0 && reallen_utf8
< (size_t)cs
->MinLen
)
752 ShowWarnQuick(MSG_ERR_STRING_TOO_SHORT
, cs
->ID_Str
);
754 if(cs
->MaxLen
> 0 && reallen_utf8
> (size_t)cs
->MaxLen
)
755 ShowWarnQuick(MSG_ERR_STRING_TOO_LONG
, cs
->ID_Str
);
757 // check for trailing ellipsis
758 if(reallen
>= 3 && cd_len
>= 3)
760 if(strcmp(&cs
->CD_Str
[cd_len
- 3], "...") == 0 &&
761 strcmp(&cs
->CT_Str
[reallen
- 3], "...") != 0)
763 ShowWarnQuick(MSG_ERR_TRAILING_ELLIPSIS
, cs
->ID_Str
);
766 if(strcmp(&cs
->CD_Str
[cd_len
- 3], "...") != 0 &&
767 strcmp(&cs
->CT_Str
[reallen
- 3], "...") == 0)
769 ShowWarnQuick(MSG_ERR_NO_TRAILING_ELLIPSIS
, cs
->ID_Str
);
773 // check for trailing spaces
774 if(reallen
>= 1 && cd_len
>= 1)
776 if(strcmp(&cs
->CD_Str
[cd_len
- 1], " ") == 0 &&
777 strcmp(&cs
->CT_Str
[reallen
- 1], " ") != 0)
780 ShowWarnQuick(MSG_ERR_TRAILING_BLANKS
, cs
->ID_Str
);
783 if(strcmp(&cs
->CD_Str
[cd_len
- 1], " ") != 0 &&
784 strcmp(&cs
->CT_Str
[reallen
- 1], " ") == 0)
787 ShowWarnQuick(MSG_ERR_NO_TRAILING_BLANKS
, cs
->ID_Str
);
791 // check for matching placeholders
792 if(reallen
>= 1 && cd_len
>= 1)
794 char *cdP
= cs
->CD_Str
;
795 char *ctP
= cs
->CT_Str
;
799 cdP
= strchr(cdP
, '%');
800 ctP
= strchr(ctP
, '%');
802 if(cdP
== NULL
&& ctP
== NULL
)
804 // no more placeholders, bail out
807 else if(cdP
!= NULL
&& ctP
!= NULL
)
813 // check the placeholder only if the '%' is followed by an
814 // alpha-numerical character or another percent sign
815 if(IS_NUMBER_OR_LETTER(*cdP
) || *cdP
== '%')
819 ShowWarnQuick(MSG_ERR_MISMATCHING_PLACEHOLDERS
, cs
->ID_Str
);
824 // skip the second '%' sign
830 else if(IS_NUMBER_OR_LETTER(*ctP
) || *ctP
== '%')
832 // the translation uses a placeholder while the description
834 ShowWarnQuick(MSG_ERR_EXCESSIVE_PLACEHOLDERS
, cs
->ID_Str
);
839 else if(cdP
!= NULL
&& ctP
== NULL
)
844 // check if really a placeholder follows or just another percent sign
845 // the original string is allowed to contain more single percent signs than the translated string
846 if(IS_NUMBER_OR_LETTER(*cdP
) || *cdP
== '%')
848 // the description uses at least one more placeholder than the translation
849 ShowWarnQuick(MSG_ERR_MISSING_PLACEHOLDERS
, cs
->ID_Str
);
854 else if(cdP
== NULL
&& ctP
!= NULL
)
859 // check if really a placeholder follows or just another percent sign
860 // the translated string is allowed to contain more single percent signs than the original string
861 if(IS_NUMBER_OR_LETTER(*ctP
) || *ctP
== '%')
863 // the translation uses at least one more placeholder than the description
864 ShowWarnQuick(MSG_ERR_EXCESSIVE_PLACEHOLDERS
, cs
->ID_Str
);
877 for(cs
= FirstCatString
; cs
!= NULL
; cs
= cs
->Next
)
879 if(cs
->CT_Str
== NULL
)
881 ShowWarn(MSG_ERR_CTGAP
, cs
->ID_Str
);