3 # This file is part of the LibreOffice project.
5 # This Source Code Form is subject to the terms of the Mozilla Public
6 # License, v. 2.0. If a copy of the MPL was not distributed with this
7 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 # This file incorporates work covered by the following license notice:
11 # Licensed to the Apache Software Foundation (ASF) under one or more
12 # contributor license agreements. See the NOTICE file distributed
13 # with this work for additional information regarding copyright
14 # ownership. The ASF licenses this file to you under the Apache
15 # License, Version 2.0 (the "License"); you may not use this file
16 # except in compliance with the License. You may obtain a copy of
17 # the License at http://www.apache.org/licenses/LICENSE-2.0 .
19 # Utility to compare MS-LANGID definitions with those defined in ../../inc/i18nlangtag/lang.h
20 # Run in i18nlangtag/source/isolang
22 # outputs new #define LANGUAGE_... 0x... and also some commented out substrings
23 # that were matched in already existing defines.
25 # ATTENTION! The sed filter in the command line examples below assures that a
26 # '|' border is drawn by html2text in data tables, and nowhere else, on which
27 # this awk script relies. This script also heavily relies on the column layout
28 # encountered. Should MS decide to change their layout or their CSS names
29 # ("data..."), this would probably break. Should html2text decide that the last
30 # border="..." attribute encountered wins instead of the first, this may break
33 # sed -e 's/|/,/g; s/<TABLE/<table/g; /<table/\!b; s/\(<table[^>]*\)\(border\|BORDER\)="[0-9]*"/\1/g; s/\(<table\)\([^>]*\(class\|CLASS\)="data\)/\1 border="1"\2/g'
35 # After html2text best if file cleaned up to _only_ contain the table entries,
36 # but not necessary, entries are filtered. Check output.
38 # Expects input from the saved page of one of
41 # http://www.microsoft.com/globaldev/reference/lcid-all.mspx
42 # filtered through ``html2text -nobs ...'', generated table:
43 # blank,name,hex,dec,blank fields:
44 # |Afrikaans_-_South_Africa___|0436___|1078___|
46 # complete command line:
47 # lynx -dump -source http://www.microsoft.com/globaldev/reference/lcid-all.mspx | sed -e 's/|/,/g; s/<TABLE/<table/g; /<table/\!b; s/\(<table[^>]*\)\(border\|BORDER\)="[0-9]*"/\1/g; s/\(<table\)\([^>]*\(class\|CLASS\)="data\)/\1 border="1"\2/g' | html2text -nobs -width 234 | awk -f lcid.awk >outfile
51 # http://www.microsoft.com/globaldev/reference/winxp/xp-lcid.mspx
52 # filtered through ``html2text -nobs ...'', generated table:
53 # blank,name,hex,dec,inputlocales,collection,blank fields:
54 # |Afrikaans |0436 |1078 |0436:00000409, |Basic |
56 # complete command line:
57 # lynx -dump -source http://www.microsoft.com/globaldev/reference/winxp/xp-lcid.mspx | sed -e 's/|/,/g; s/<TABLE/<table/g; /<table/\!b; s/\(<table[^>]*\)\(border\|BORDER\)="[0-9]*"/\1/g; s/\(<table\)\([^>]*\(class\|CLASS\)="data\)/\1 border="1"\2/g' | html2text -nobs -width 234 | awk -f lcid.awk >outfile
61 # http://msdn.microsoft.com/library/en-us/intl/nls_238z.asp
62 # filtered through ``html2text -nobs ...'', generated table:
63 # blank,hex,locale,name,blank fields:
64 # |0x0436___|af-ZA___|Afrikaans_(South_Africa)___|
66 # complete command line:
67 # lynx -dump -source http://msdn.microsoft.com/library/en-us/intl/nls_238z.asp | sed -e 's/|/,/g; s/<TABLE/<table/g; /<table/\!b; s/\(<table[^>]*\)\(border\|BORDER\)="[0-9]*"/\1/g; s/\(<table\)\([^>]*\(class\|CLASS\)="data\)/\1 border="1"\2/g' | html2text -nobs -width 234 | awk -f lcid.awk >outfile
69 # Author: Eike Rathke <erack@sun.com>, <er@openoffice.org>
73 while ((getline < "../../inc/i18nlangtag/lang.h") > 0)
75 if ($
0 ~
/^
#define[ ]*LANGUAGE_[_A-Za-z0-9]*[ ]*0x[0-9a-fA-F]/)
78 lang
[toupper(substr($
3,3))] =
toupper($
2)
79 #print substr($3,3) "=" $2
82 # html2text table follows
88 filetypename
[filetype
] =
"unknown"
89 filetypename
[lcid_all
] =
"lcid_all"
90 filetypename
[xp_lcid
] =
"xp_lcid"
91 filetypename
[nls_238z
] =
"nls_238z"
92 namefield
[lcid_all
] =
2
93 namefield
[xp_lcid
] =
2
94 namefield
[nls_238z
] =
4
95 hexfield
[lcid_all
] =
3
97 hexfield
[nls_238z
] =
2
98 locfield
[lcid_all
] =
0
100 locfield
[nls_238z
] =
3
110 else if ($
2 ~
/^Afrikaans
/)
117 name = namefield
[filetype
]
118 hex = hexfield
[filetype
]
119 loc = locfield
[filetype
]
123 gsub( /^
[^
:]*:/, "", $name
)
124 gsub( /\..
*/, "", $name
)
125 gsub( /(^
[ _
]+)|([ _
]+$
)/, "", $hex
)
126 gsub( /(^
[ _
]+)|([ _
]+$
)/, "", $name
)
128 gsub( /(^
[ _
]+)|([ _
]+$
)/, "", $loc
)
131 ($hex ~
/^
0x
/) { $hex =
substr( $hex
, 3) }
133 # if only 464 instead of 0464, make it match lang.h
134 (length($hex
) < 4) { $hex =
"0" $hex
}
136 ($hex !~
/^
[0-9a
-fA
-F
][0-9a
-fA
-F
]*$
/) { filtered
[$hex
] = $
0; next }
139 { all
[toupper($hex
)] = $name
}
141 (loc
) { comment
[toupper($hex
)] =
" /* " $loc
" */" }
143 # new hex: newlang[HEX]=string
144 !
(toupper($hex
) in lang
) { newlang
[toupper($hex
)] = $name
}
149 print "No file type recognized." >>"/dev/stderr"
152 print "// assuming " filetypename
[filetype
] " file"
156 printf( "xxxxxxx LANGUAGE_%-26s 0x%s%s\n", newlang
[x
], x
, comment
[x
])
157 n =
split(newlang
[x
],arr
,/[^A
-Za
-z0
-9]/)
163 # each identifier word of the language name
166 aup =
toupper(arr
[i
])
170 # contained in already existing definitions?
172 printf( "// %-50s %s\n", arr
[i
] ": " lang
[l
], l
)
176 printf( "#define LANGUAGE_%-26s 0x%s\n", def
, x
)
178 print "\n// --- reverse check follows ----------------------------------\n"
182 print "// not in input file: " x
" " lang
[x
]
184 print "\n// --- filtered table entries follow (if any) -----------------\n"
186 print "// filtered: " x
" " filtered
[x
]