2 # Usage: gawk -f currency-check.awk *.xml
4 # <FormatCode>...[$xxx-...]...</FormatCode>
6 # <CurrencySymbol>xxx</CurrencySymbol>
7 # definition of the same XML file and output symbols if no match was found.
8 # For formatindex="12" to formatindex="15" and for formatindex="17" it is
9 # checked if the used currency symbol is the usedInCompatibleFormatCodes
10 # currency symbol as it is needed by the number formatter.
11 # Also generates output if the generic currency symbol (UTF8 string 0xC2A4)
12 # is used instead of a real currency symbol.
13 # Author: Eike Rathke <er@openoffice.org>
31 sRefCurrencyFromLocale =
""
37 # If run under Unix a CrLf spoils ...$ line end checks. DOS line endings
41 print "Error: not Unix line ending in line " line
45 if ( $
1 ~
/^
<LC_FORMAT
(>|$
)/ )
47 if ( $
0 ~
/replaceFrom=
"\[CURRENCY\]"/ )
49 sReplaceFrom =
"\\[CURRENCY\\]"
50 sMatchReplace =
"^<FormatCode>.*" sReplaceFrom
52 for ( j=
2; j
<=
NF; ++j
)
54 if ( $j ~
/^replaceTo=
"/ )
61 sReplaceTo = substr( $j, 12, length($j)-l )
65 else if ( $1 ~ /^<FormatElement(>|$)/ )
67 if ( $0 ~ /usage="CURRENCY
"/ )
69 if ( $0 ~ /formatindex="1[23457]"/ )
75 else if ( $0 ~ /^[[:blank:]]*<FormatCode>.*\[\$.*-[0-9a-fA-F]+\]/ ||
76 (sMatchReplace && $0 ~ sMatchReplace ) )
79 gsub( sReplaceFrom, sReplaceTo )
80 split( $0, arr, /<|>/ )
81 split( arr[3], code, /(\[\$)|(-[0-9a-fA-F]+\])/ )
84 if ( code[j] && code[j] !~ /\#|0|\[NatNum/ )
86 FormatLine[nFormats] = file " line
" line
87 FormatAuto[nFormats] = bFormatAuto
88 Formats[nFormats++] = code[j]
93 else if ( $1 ~ /^<LC_CURRENCY(>|$)/ )
95 for ( j=2; j<=NF; ++j )
104 locale =
substr( $j
, 6, length($j
)-l
)
105 sRefCurrencyFromLocale = file
110 while ( (getline <file
) > 0 )
117 print "ref locale not available: " file \
118 " (from " oldfile
" line " oldline
")"
121 sRefCurrencyFromLocale =
""
136 function getCurrencyParams
() {
137 # Assumes that each element is on a line on its own!
138 if ( $
1 ~
/^
<Currency
(>|$
)/ )
140 if ( $
0 ~
/default=
"true"/ )
141 SymbolDefault
[nCurrencies
] =
1
143 SymbolDefault
[nCurrencies
] =
0
144 if ( $
0 ~
/usedInCompatibleFormatCodes=
"true"/ )
145 SymbolCompati
[nCurrencies
] =
1
147 SymbolCompati
[nCurrencies
] =
0
149 else if ( $
0 ~
/^
[[:blank
:]]*<CurrencyID
>/ )
151 split( $
0, arr
, /<|>/ )
152 if ( sRefCurrencyFromLocale
)
153 IDLine
[nCurrencies
] = file
" line " line \
154 " (referenced from " sRefCurrencyFromLocale
")"
156 IDLine
[nCurrencies
] = file
" line " line
157 IDs
[nCurrencies
] = arr
[3]
159 else if ( $
0 ~
/^
[[:blank
:]]*<CurrencySymbol
>/ )
161 split( $
0, arr
, /<|>/ )
162 if ( sRefCurrencyFromLocale
)
163 SymbolLine
[nCurrencies
] = file
" line " line \
164 " (referenced from " sRefCurrencyFromLocale
")"
166 SymbolLine
[nCurrencies
] = file
" line " line
167 Symbols
[nCurrencies
] = arr
[3]
169 else if ( $
0 ~
/^
[[:blank
:]]*<BankSymbol
>/ )
171 split( $
0, arr
, /<|>/ )
172 if ( sRefCurrencyFromLocale
)
173 BankSymbolLine
[nCurrencies
] = file
" line " line \
174 " (referenced from " sRefCurrencyFromLocale
")"
176 BankSymbolLine
[nCurrencies
] = file
" line " line
177 BankSymbols
[nCurrencies
] = arr
[3]
179 else if ( $
1 ~
/^
<\
/Currency
>/ )
188 for ( j=
0; j
<nFormats
; ++j
)
190 state = FormatInSymbol
( Formats
[j
] )
191 if ( Formats
[j
] ==
"\xc2\xa4" )
194 print " bad: `" Formats
[j
] "' (" FormatLine
[j
] ")"
196 else if ( state ==
0 )
199 print "unknown: `" Formats
[j
] "' (" FormatLine
[j
] ")"
201 else if ( FormatAuto
[j
] && state
< 2 )
204 print "badauto: `" Formats
[j
] "' (" FormatLine
[j
] ")"
209 for ( j=
0; j
<nCurrencies
; ++j
)
212 if ( Symbols
[j
] ==
"\xc2\xa4" )
213 print "def bad: `" Symbols
[j
] "' (" SymbolLine
[j
] ")"
214 if ( SymbolDefault
[j
] )
217 print "default: `" Symbols
[j
] "' (" SymbolLine
[j
] ")"
219 if ( SymbolCompati
[j
] )
222 print "compati: `" Symbols
[j
] "' (" SymbolLine
[j
] ")"
225 print "defined: `" Symbols
[j
] "' (" SymbolLine
[j
] ")"
232 for ( j=
0; j
<nCurrencies
; ++j
)
234 if ( Symbols
[j
] ==
"\xc2\xa4" )
237 print "def bad: `" Symbols
[j
] "' (" SymbolLine
[j
] ")"
239 if ( SymbolDefault
[j
] )
246 print "dupe default: `" Symbols
[j
] "' (" SymbolLine
[j
] ")"
249 if ( SymbolCompati
[j
] )
256 print "dupe compati: `" Symbols
[j
] "' (" SymbolLine
[j
] ")"
263 print " no default: (" file
")"
268 print " no compati: (" file
")"
271 for ( j=
0; j
<nCurrencies
; ++j
)
273 # Check if CurrencyID at least resembles some ISO 4217 code.
274 # The only exception is zh_MO that had an erroneous original data set
275 # with BankSymbol="P" (stored as ISO code in documents, hence copied to
276 # CurrencyID now) and needs that entry for legacy documents.
277 # There is a strange bug in gawk 3.1.4 that does a match of [A-Z] on
278 # lower case except 'a', regardless of IGNORECASE setting, hence this
279 # ugly notation. [[:upper:]] wouldn't be correct since we want only
281 if ( IDs
[j
] !~
/^
[ABCDEFGHIJKLMNOPQRSTUVWXYZ
][ABCDEFGHIJKLMNOPQRSTUVWXYZ
][ABCDEFGHIJKLMNOPQRSTUVWXYZ
]$
/ \
282 && !
(file ==
"zh_MO.xml" && IDs
[j
] ==
"P") )
285 print "no ISO 4217 code: `" IDs
[j
] "' (" IDLine
[j
] ")"
287 # CurrencyID should equal BankSymbol for now.
288 if ( IDs
[j
] != BankSymbols
[j
] )
291 print "not equal: CurrencyID `" IDs
[j
] "' != BankSymbol `" BankSymbols
[j
] \
292 "' (" IDLine
[j
] " and " BankSymbolLine
[j
] ")"
300 function FormatInSymbol
( format
) {
302 for ( nSym=
0; nSym
<nCurrencies
; ++nSym
)
304 if ( format == Symbols
[nSym
] )
306 # Two currencies can have the same symbol (e.g. az_AZ.xml 'man.'
307 # for AZM and AZN), continue to lookup if the match isn't the
309 if ( SymbolCompati
[nSym
] )
318 # vim: ts=4 sw=4 expandtab