Version 4.0.0.1, tag libreoffice-4.0.0.1
[LibreOffice.git] / i18npool / source / localedata / data / currency-check.awk
blob1246211e18a7fffbc1560e413f43a9e171e11425
1 #!/usr/bin/gawk -f
3 # This file is part of the LibreOffice project.
5 # This Source Code Form is subject to the terms of the Mozilla Public
6 # License, v. 2.0. If a copy of the MPL was not distributed with this
7 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 # This file incorporates work covered by the following license notice:
11 # Licensed to the Apache Software Foundation (ASF) under one or more
12 # contributor license agreements. See the NOTICE file distributed
13 # with this work for additional information regarding copyright
14 # ownership. The ASF licenses this file to you under the Apache
15 # License, Version 2.0 (the "License"); you may not use this file
16 # except in compliance with the License. You may obtain a copy of
17 # the License at http://www.apache.org/licenses/LICENSE-2.0 .
19 # Usage: gawk -f currency-check.awk *.xml
20 # Check any
21 # <FormatCode>...[$xxx-...]...</FormatCode>
22 # against every
23 # <CurrencySymbol>xxx</CurrencySymbol>
24 # definition of the same XML file and output symbols if no match was found.
25 # For formatindex="12" to formatindex="15" and for formatindex="17" it is
26 # checked if the used currency symbol is the usedInCompatibleFormatCodes
27 # currency symbol as it is needed by the number formatter.
28 # Also generates output if the generic currency symbol (UTF8 string 0xC2A4)
29 # is used instead of a real currency symbol.
30 # Author: Eike Rathke <er@openoffice.org>
32 BEGIN {
33 file = ""
37 file != FILENAME {
38 if ( file )
39 checkIt()
40 file = FILENAME
41 line = 0
42 nFormats = 0
43 nCurrencies = 0
44 bFormatAuto = 0
45 sReplaceFrom = ""
46 sReplaceTo = ""
47 sMatchReplace = ""
48 sRefCurrencyFromLocale = ""
49 crlf = 0
53 ++line
54 # If run under Unix a CrLf spoils ...$ line end checks. DOS line endings
55 # are boo anyways.
56 if ( /\x0D$/ )
58 print "Error: not Unix line ending in line " line
59 crlf = 1
60 exit(1)
62 if ( $1 ~ /^<LC_FORMAT(>|$)/ )
64 if ( $0 ~ /replaceFrom="\[CURRENCY\]"/ )
66 sReplaceFrom = "\\[CURRENCY\\]"
67 sMatchReplace = "^<FormatCode>.*" sReplaceFrom
69 for ( j=2; j<=NF; ++j )
71 if ( $j ~ /^replaceTo="/ )
73 l = 12
74 if ( $j ~ />$/ )
75 ++l
76 if ( $j ~ /\/>$/ )
77 ++l
78 sReplaceTo = substr( $j, 12, length($j)-l )
82 else if ( $1 ~ /^<FormatElement(>|$)/ )
84 if ( $0 ~ /usage="CURRENCY"/ )
86 if ( $0 ~ /formatindex="1[23457]"/ )
87 bFormatAuto = 1
88 else
89 bFormatAuto = 0
92 else if ( $0 ~ /^[[:blank:]]*<FormatCode>.*\[\$.*-[0-9a-fA-F]+\]/ ||
93 (sMatchReplace && $0 ~ sMatchReplace ) )
95 if ( sReplaceFrom )
96 gsub( sReplaceFrom, sReplaceTo )
97 split( $0, arr, /<|>/ )
98 split( arr[3], code, /(\[\$)|(-[0-9a-fA-F]+\])/ )
99 for ( j in code )
101 if ( code[j] && code[j] !~ /\#|0|\[NatNum/ )
103 FormatLine[nFormats] = file " line " line
104 FormatAuto[nFormats] = bFormatAuto
105 Formats[nFormats++] = code[j]
108 bFormatAuto = 0
110 else if ( $1 ~ /^<LC_CURRENCY(>|$)/ )
112 for ( j=2; j<=NF; ++j )
114 if ( $j ~ /^ref="/ )
116 l = 6
117 if ( $j ~ />$/ )
119 if ( $j ~ /\/>$/ )
121 locale = substr( $j, 6, length($j)-l )
122 sRefCurrencyFromLocale = file
123 oldfile = file
124 oldline = line
125 file = locale ".xml"
126 line = 0
127 while ( (getline <file) > 0 )
129 ++line
130 getCurrencyParams()
132 close( file )
133 if ( !line )
134 print "ref locale not available: " file \
135 " (from " oldfile " line " oldline ")"
136 file = oldfile
137 line = oldline
138 sRefCurrencyFromLocale = ""
142 else
143 getCurrencyParams()
147 END {
148 if ( file && !crlf )
149 checkIt()
153 function getCurrencyParams() {
154 # Assumes that each element is on a line on its own!
155 if ( $1 ~ /^<Currency(>|$)/ )
157 if ( $0 ~ /default="true"/ )
158 SymbolDefault[nCurrencies] = 1
159 else
160 SymbolDefault[nCurrencies] = 0
161 if ( $0 ~ /usedInCompatibleFormatCodes="true"/ )
162 SymbolCompati[nCurrencies] = 1
163 else
164 SymbolCompati[nCurrencies] = 0
166 else if ( $0 ~ /^[[:blank:]]*<CurrencyID>/ )
168 split( $0, arr, /<|>/ )
169 if ( sRefCurrencyFromLocale )
170 IDLine[nCurrencies] = file " line " line \
171 " (referenced from " sRefCurrencyFromLocale ")"
172 else
173 IDLine[nCurrencies] = file " line " line
174 IDs[nCurrencies] = arr[3]
176 else if ( $0 ~ /^[[:blank:]]*<CurrencySymbol>/ )
178 split( $0, arr, /<|>/ )
179 if ( sRefCurrencyFromLocale )
180 SymbolLine[nCurrencies] = file " line " line \
181 " (referenced from " sRefCurrencyFromLocale ")"
182 else
183 SymbolLine[nCurrencies] = file " line " line
184 Symbols[nCurrencies] = arr[3]
186 else if ( $0 ~ /^[[:blank:]]*<BankSymbol>/ )
188 split( $0, arr, /<|>/ )
189 if ( sRefCurrencyFromLocale )
190 BankSymbolLine[nCurrencies] = file " line " line \
191 " (referenced from " sRefCurrencyFromLocale ")"
192 else
193 BankSymbolLine[nCurrencies] = file " line " line
194 BankSymbols[nCurrencies] = arr[3]
196 else if ( $1 ~ /^<\/Currency>/ )
198 ++nCurrencies
203 function checkIt() {
204 bad = 0
205 for ( j=0; j<nFormats; ++j )
207 state = FormatInSymbol( Formats[j] )
208 if ( Formats[j] == "\xc2\xa4" )
210 bad = 1
211 print " bad: `" Formats[j] "' (" FormatLine[j] ")"
213 else if ( state == 0 )
215 bad = 1
216 print "unknown: `" Formats[j] "' (" FormatLine[j] ")"
218 else if ( FormatAuto[j] && state < 2 )
220 bad = 1
221 print "badauto: `" Formats[j] "' (" FormatLine[j] ")"
224 if ( bad )
226 for ( j=0; j<nCurrencies; ++j )
228 bDef = 0
229 if ( Symbols[j] == "\xc2\xa4" )
230 print "def bad: `" Symbols[j] "' (" SymbolLine[j] ")"
231 if ( SymbolDefault[j] )
233 bDef = 1
234 print "default: `" Symbols[j] "' (" SymbolLine[j] ")"
236 if ( SymbolCompati[j] )
238 bDef = 1
239 print "compati: `" Symbols[j] "' (" SymbolLine[j] ")"
241 if ( !bDef )
242 print "defined: `" Symbols[j] "' (" SymbolLine[j] ")"
245 else
247 bHasDefault = 0
248 bHasCompati = 0
249 for ( j=0; j<nCurrencies; ++j )
251 if ( Symbols[j] == "\xc2\xa4" )
253 bad = 1
254 print "def bad: `" Symbols[j] "' (" SymbolLine[j] ")"
256 if ( SymbolDefault[j] )
258 if ( !bHasDefault )
259 bHasDefault = 1
260 else
262 bad = 1
263 print "dupe default: `" Symbols[j] "' (" SymbolLine[j] ")"
266 if ( SymbolCompati[j] )
268 if ( !bHasCompati )
269 bHasCompati = 1
270 else
272 bad = 1
273 print "dupe compati: `" Symbols[j] "' (" SymbolLine[j] ")"
277 if ( !bHasDefault )
279 bad = 1
280 print " no default: (" file ")"
282 if ( !bHasCompati )
284 bad = 1
285 print " no compati: (" file ")"
288 for ( j=0; j<nCurrencies; ++j )
290 # Check if CurrencyID at least resembles some ISO 4217 code.
291 # The only exception is zh_MO that had an erroneous original data set
292 # with BankSymbol="P" (stored as ISO code in documents, hence copied to
293 # CurrencyID now) and needs that entry for legacy documents.
294 # There is a strange bug in gawk 3.1.4 that does a match of [A-Z] on
295 # lower case except 'a', regardless of IGNORECASE setting, hence this
296 # ugly notation. [[:upper:]] wouldn't be correct since we want only
297 # ASCII to match.
298 if ( IDs[j] !~ /^[ABCDEFGHIJKLMNOPQRSTUVWXYZ][ABCDEFGHIJKLMNOPQRSTUVWXYZ][ABCDEFGHIJKLMNOPQRSTUVWXYZ]$/ \
299 && !(file == "zh_MO.xml" && IDs[j] == "P") )
301 bad = 1
302 print "no ISO 4217 code: `" IDs[j] "' (" IDLine[j] ")"
304 # CurrencyID should equal BankSymbol for now.
305 if ( IDs[j] != BankSymbols[j] )
307 bad = 1
308 print "not equal: CurrencyID `" IDs[j] "' != BankSymbol `" BankSymbols[j] \
309 "' (" IDLine[j] " and " BankSymbolLine[j] ")"
312 if ( bad )
313 print ""
317 function FormatInSymbol( format ) {
318 state = 0
319 for ( nSym=0; nSym<nCurrencies; ++nSym )
321 if ( format == Symbols[nSym] )
323 # Two currencies can have the same symbol (e.g. az_AZ.xml 'man.'
324 # for AZM and AZN), continue to lookup if the match isn't the
325 # compatible one.
326 if ( SymbolCompati[nSym] )
327 return 2
328 else
329 state = 1
332 return state
335 # vim: ts=4 sw=4 expandtab