Update ooo320-m1
[ooovba.git] / i18npool / source / localedata / data / currency-check.awk
blob9aef17636da2e12d02df25c42f985cb106b36a24
1 #!/usr/bin/gawk -f
2 # Usage: gawk -f currency-check.awk *.xml
3 # Check any
4 # <FormatCode>...[$xxx-...]...</FormatCode>
5 # against every
6 # <CurrencySymbol>xxx</CurrencySymbol>
7 # definition of the same XML file and output symbols if no match was found.
8 # For formatindex="12" to formatindex="15" and for formatindex="17" it is
9 # checked if the used currency symbol is the usedInCompatibleFormatCodes
10 # currency symbol as it is needed by the number formatter.
11 # Also generates output if the generic currency symbol (UTF8 string 0xC2A4)
12 # is used instead of a real currency symbol.
13 # Author: Eike Rathke <er@openoffice.org>
15 BEGIN {
16 file = ""
20 file != FILENAME {
21 if ( file )
22 checkIt()
23 file = FILENAME
24 line = 0
25 nFormats = 0
26 nCurrencies = 0
27 bFormatAuto = 0
28 sReplaceFrom = ""
29 sReplaceTo = ""
30 sMatchReplace = ""
31 sRefCurrencyFromLocale = ""
32 crlf = 0
36 ++line
37 # If run under Unix a CrLf spoils ...$ line end checks. DOS line endings
38 # are boo anyways.
39 if ( /\x0D$/ )
41 print "Error: not Unix line ending in line " line
42 crlf = 1
43 exit(1)
45 if ( $1 ~ /^<LC_FORMAT(>|$)/ )
47 if ( $0 ~ /replaceFrom="\[CURRENCY\]"/ )
49 sReplaceFrom = "\\[CURRENCY\\]"
50 sMatchReplace = "^<FormatCode>.*" sReplaceFrom
52 for ( j=2; j<=NF; ++j )
54 if ( $j ~ /^replaceTo="/ )
56 l = 12
57 if ( $j ~ />$/ )
58 ++l
59 if ( $j ~ /\/>$/ )
60 ++l
61 sReplaceTo = substr( $j, 12, length($j)-l )
65 else if ( $1 ~ /^<FormatElement(>|$)/ )
67 if ( $0 ~ /usage="CURRENCY"/ )
69 if ( $0 ~ /formatindex="1[23457]"/ )
70 bFormatAuto = 1
71 else
72 bFormatAuto = 0
75 else if ( $0 ~ /^[[:blank:]]*<FormatCode>.*\[\$.*-[0-9a-fA-F]+\]/ ||
76 (sMatchReplace && $0 ~ sMatchReplace ) )
78 if ( sReplaceFrom )
79 gsub( sReplaceFrom, sReplaceTo )
80 split( $0, arr, /<|>/ )
81 split( arr[3], code, /(\[\$)|(-[0-9a-fA-F]+\])/ )
82 for ( j in code )
84 if ( code[j] && code[j] !~ /\#|0|\[NatNum/ )
86 FormatLine[nFormats] = file " line " line
87 FormatAuto[nFormats] = bFormatAuto
88 Formats[nFormats++] = code[j]
91 bFormatAuto = 0
93 else if ( $1 ~ /^<LC_CURRENCY(>|$)/ )
95 for ( j=2; j<=NF; ++j )
97 if ( $j ~ /^ref="/ )
99 l = 6
100 if ( $j ~ />$/ )
102 if ( $j ~ /\/>$/ )
104 locale = substr( $j, 6, length($j)-l )
105 sRefCurrencyFromLocale = file
106 oldfile = file
107 oldline = line
108 file = locale ".xml"
109 line = 0
110 while ( (getline <file) > 0 )
112 ++line
113 getCurrencyParams()
115 close( file )
116 if ( !line )
117 print "ref locale not available: " file \
118 " (from " oldfile " line " oldline ")"
119 file = oldfile
120 line = oldline
121 sRefCurrencyFromLocale = ""
125 else
126 getCurrencyParams()
130 END {
131 if ( file && !crlf )
132 checkIt()
136 function getCurrencyParams() {
137 # Assumes that each element is on a line on its own!
138 if ( $1 ~ /^<Currency(>|$)/ )
140 if ( $0 ~ /default="true"/ )
141 SymbolDefault[nCurrencies] = 1
142 else
143 SymbolDefault[nCurrencies] = 0
144 if ( $0 ~ /usedInCompatibleFormatCodes="true"/ )
145 SymbolCompati[nCurrencies] = 1
146 else
147 SymbolCompati[nCurrencies] = 0
149 else if ( $0 ~ /^[[:blank:]]*<CurrencyID>/ )
151 split( $0, arr, /<|>/ )
152 if ( sRefCurrencyFromLocale )
153 IDLine[nCurrencies] = file " line " line \
154 " (referenced from " sRefCurrencyFromLocale ")"
155 else
156 IDLine[nCurrencies] = file " line " line
157 IDs[nCurrencies] = arr[3]
159 else if ( $0 ~ /^[[:blank:]]*<CurrencySymbol>/ )
161 split( $0, arr, /<|>/ )
162 if ( sRefCurrencyFromLocale )
163 SymbolLine[nCurrencies] = file " line " line \
164 " (referenced from " sRefCurrencyFromLocale ")"
165 else
166 SymbolLine[nCurrencies] = file " line " line
167 Symbols[nCurrencies] = arr[3]
169 else if ( $0 ~ /^[[:blank:]]*<BankSymbol>/ )
171 split( $0, arr, /<|>/ )
172 if ( sRefCurrencyFromLocale )
173 BankSymbolLine[nCurrencies] = file " line " line \
174 " (referenced from " sRefCurrencyFromLocale ")"
175 else
176 BankSymbolLine[nCurrencies] = file " line " line
177 BankSymbols[nCurrencies] = arr[3]
179 else if ( $1 ~ /^<\/Currency>/ )
181 ++nCurrencies
186 function checkIt() {
187 bad = 0
188 for ( j=0; j<nFormats; ++j )
190 state = FormatInSymbol( Formats[j] )
191 if ( Formats[j] == "\xc2\xa4" )
193 bad = 1
194 print " bad: `" Formats[j] "' (" FormatLine[j] ")"
196 else if ( state == 0 )
198 bad = 1
199 print "unknown: `" Formats[j] "' (" FormatLine[j] ")"
201 else if ( FormatAuto[j] && state < 2 )
203 bad = 1
204 print "badauto: `" Formats[j] "' (" FormatLine[j] ")"
207 if ( bad )
209 for ( j=0; j<nCurrencies; ++j )
211 bDef = 0
212 if ( Symbols[j] == "\xc2\xa4" )
213 print "def bad: `" Symbols[j] "' (" SymbolLine[j] ")"
214 if ( SymbolDefault[j] )
216 bDef = 1
217 print "default: `" Symbols[j] "' (" SymbolLine[j] ")"
219 if ( SymbolCompati[j] )
221 bDef = 1
222 print "compati: `" Symbols[j] "' (" SymbolLine[j] ")"
224 if ( !bDef )
225 print "defined: `" Symbols[j] "' (" SymbolLine[j] ")"
228 else
230 bHasDefault = 0
231 bHasCompati = 0
232 for ( j=0; j<nCurrencies; ++j )
234 if ( Symbols[j] == "\xc2\xa4" )
236 bad = 1
237 print "def bad: `" Symbols[j] "' (" SymbolLine[j] ")"
239 if ( SymbolDefault[j] )
241 if ( !bHasDefault )
242 bHasDefault = 1
243 else
245 bad = 1
246 print "dupe default: `" Symbols[j] "' (" SymbolLine[j] ")"
249 if ( SymbolCompati[j] )
251 if ( !bHasCompati )
252 bHasCompati = 1
253 else
255 bad = 1
256 print "dupe compati: `" Symbols[j] "' (" SymbolLine[j] ")"
260 if ( !bHasDefault )
262 bad = 1
263 print " no default: (" file ")"
265 if ( !bHasCompati )
267 bad = 1
268 print " no compati: (" file ")"
271 for ( j=0; j<nCurrencies; ++j )
273 # Check if CurrencyID at least resembles some ISO 4217 code.
274 # The only exception is zh_MO that had an erroneous original data set
275 # with BankSymbol="P" (stored as ISO code in documents, hence copied to
276 # CurrencyID now) and needs that entry for legacy documents.
277 # There is a strange bug in gawk 3.1.4 that does a match of [A-Z] on
278 # lower case except 'a', regardless of IGNORECASE setting, hence this
279 # ugly notation. [[:upper:]] wouldn't be correct since we want only
280 # ASCII to match.
281 if ( IDs[j] !~ /^[ABCDEFGHIJKLMNOPQRSTUVWXYZ][ABCDEFGHIJKLMNOPQRSTUVWXYZ][ABCDEFGHIJKLMNOPQRSTUVWXYZ]$/ \
282 && !(file == "zh_MO.xml" && IDs[j] == "P") )
284 bad = 1
285 print "no ISO 4217 code: `" IDs[j] "' (" IDLine[j] ")"
287 # CurrencyID should equal BankSymbol for now.
288 if ( IDs[j] != BankSymbols[j] )
290 bad = 1
291 print "not equal: CurrencyID `" IDs[j] "' != BankSymbol `" BankSymbols[j] \
292 "' (" IDLine[j] " and " BankSymbolLine[j] ")"
295 if ( bad )
296 print ""
300 function FormatInSymbol( format ) {
301 state = 0
302 for ( nSym=0; nSym<nCurrencies; ++nSym )
304 if ( format == Symbols[nSym] )
306 # Two currencies can have the same symbol (e.g. az_AZ.xml 'man.'
307 # for AZM and AZN), continue to lookup if the match isn't the
308 # compatible one.
309 if ( SymbolCompati[nSym] )
310 return 2
311 else
312 state = 1
315 return state
318 # vim: ts=4 sw=4 expandtab