3 # Utility to compare MS-LANGID definitions with those defined in ../../inc/i18npool/lang.h
4 # Run in i18npool/source/isolang
6 # outputs new #define LANGUAGE_... 0x... and also some commented out substrings
7 # that were matched in already existing defines.
9 # ATTENTION! The sed filter in the command line examples below assures that a
10 # '|' border is drawn by html2text in data tables, and nowhere else, on which
11 # this awk script relies. This script also heavily relies on the column layout
12 # encountered. Should MS decide to change their layout or their CSS names
13 # ("data..."), this would probably break. Should html2text decide that the last
14 # border="..." attribute encountered wins instead of the first, this may break
17 # sed -e 's/|/,/g; s/<TABLE/<table/g; /<table/\!b; s/\(<table[^>]*\)\(border\|BORDER\)="[0-9]*"/\1/g; s/\(<table\)\([^>]*\(class\|CLASS\)="data\)/\1 border="1"\2/g'
19 # After html2text best if file cleaned up to _only_ contain the table entries,
20 # but not necessary, entries are filtered. Check output.
22 # Expects input from the saved page of one of
25 # http://www.microsoft.com/globaldev/reference/lcid-all.mspx
26 # filtered through ``html2text -nobs ...'', generated table:
27 # blank,name,hex,dec,blank fields:
28 # |Afrikaans_-_South_Africa___|0436___|1078___|
30 # complete command line:
31 # lynx -dump -source http://www.microsoft.com/globaldev/reference/lcid-all.mspx | sed -e 's/|/,/g; s/<TABLE/<table/g; /<table/\!b; s/\(<table[^>]*\)\(border\|BORDER\)="[0-9]*"/\1/g; s/\(<table\)\([^>]*\(class\|CLASS\)="data\)/\1 border="1"\2/g' | html2text -nobs -width 234 | awk -f lcid.awk >outfile
35 # http://www.microsoft.com/globaldev/reference/winxp/xp-lcid.mspx
36 # filtered through ``html2text -nobs ...'', generated table:
37 # blank,name,hex,dec,inputlocales,collection,blank fields:
38 # |Afrikaans |0436 |1078 |0436:00000409, |Basic |
40 # complete command line:
41 # lynx -dump -source http://www.microsoft.com/globaldev/reference/winxp/xp-lcid.mspx | sed -e 's/|/,/g; s/<TABLE/<table/g; /<table/\!b; s/\(<table[^>]*\)\(border\|BORDER\)="[0-9]*"/\1/g; s/\(<table\)\([^>]*\(class\|CLASS\)="data\)/\1 border="1"\2/g' | html2text -nobs -width 234 | awk -f lcid.awk >outfile
45 # http://msdn.microsoft.com/library/en-us/intl/nls_238z.asp
46 # filtered through ``html2text -nobs ...'', generated table:
47 # blank,hex,locale,name,blank fields:
48 # |0x0436___|af-ZA___|Afrikaans_(South_Africa)___|
50 # complete command line:
51 # lynx -dump -source http://msdn.microsoft.com/library/en-us/intl/nls_238z.asp | sed -e 's/|/,/g; s/<TABLE/<table/g; /<table/\!b; s/\(<table[^>]*\)\(border\|BORDER\)="[0-9]*"/\1/g; s/\(<table\)\([^>]*\(class\|CLASS\)="data\)/\1 border="1"\2/g' | html2text -nobs -width 234 | awk -f lcid.awk >outfile
53 # Author: Eike Rathke <erack@sun.com>, <er@openoffice.org>
57 while ((getline < "../../inc/i18npool/lang.h") > 0)
59 if ($
0 ~
/^
#define[ ]*LANGUAGE_[_A-Za-z0-9]*[ ]*0x[0-9a-fA-F]/)
62 lang
[toupper(substr($
3,3))] =
toupper($
2)
63 #print substr($3,3) "=" $2
66 # html2text table follows
72 filetypename
[filetype
] =
"unknown"
73 filetypename
[lcid_all
] =
"lcid_all"
74 filetypename
[xp_lcid
] =
"xp_lcid"
75 filetypename
[nls_238z
] =
"nls_238z"
76 namefield
[lcid_all
] =
2
77 namefield
[xp_lcid
] =
2
78 namefield
[nls_238z
] =
4
79 hexfield
[lcid_all
] =
3
81 hexfield
[nls_238z
] =
2
82 locfield
[lcid_all
] =
0
84 locfield
[nls_238z
] =
3
94 else if ($
2 ~
/^Afrikaans
/)
101 name = namefield
[filetype
]
102 hex = hexfield
[filetype
]
103 loc = locfield
[filetype
]
107 gsub( /^
[^
:]*:/, "", $name
)
108 gsub( /\..
*/, "", $name
)
109 gsub( /(^
[ _
]+)|([ _
]+$
)/, "", $hex
)
110 gsub( /(^
[ _
]+)|([ _
]+$
)/, "", $name
)
112 gsub( /(^
[ _
]+)|([ _
]+$
)/, "", $loc
)
115 ($hex ~
/^
0x
/) { $hex =
substr( $hex
, 3) }
117 # if only 464 instead of 0464, make it match lang.h
118 (length($hex
) < 4) { $hex =
"0" $hex
}
120 ($hex !~
/^
[0-9a
-fA
-F
][0-9a
-fA
-F
]*$
/) { filtered
[$hex
] = $
0; next }
123 { all
[toupper($hex
)] = $name
}
125 (loc
) { comment
[toupper($hex
)] =
" /* " $loc
" */" }
127 # new hex: newlang[HEX]=string
128 !
(toupper($hex
) in lang
) { newlang
[toupper($hex
)] = $name
}
133 print "No file type recognized." >>"/dev/stderr"
136 print "// assuming " filetypename
[filetype
] " file"
140 printf( "xxxxxxx LANGUAGE_%-26s 0x%s%s\n", newlang
[x
], x
, comment
[x
])
141 n =
split(newlang
[x
],arr
,/[^A
-Za
-z0
-9]/)
147 # each identifier word of the language name
150 aup =
toupper(arr
[i
])
154 # contained in already existing definitions?
156 printf( "// %-50s %s\n", arr
[i
] ": " lang
[l
], l
)
160 printf( "#define LANGUAGE_%-26s 0x%s\n", def
, x
)
162 print "\n// --- reverse check follows ----------------------------------\n"
166 print "// not in input file: " x
" " lang
[x
]
168 print "\n// --- filtered table entries follow (if any) -----------------\n"
170 print "// filtered: " x
" " filtered
[x
]