Avoid potential negative array index access to cached text.
[LibreOffice.git] / sc / workben / celltrans / parse.py
blob937d92f64d2843f71f087642ec4595af336f9f53
1 #!/usr/bin/env python
3 # This file is part of the LibreOffice project.
5 # This Source Code Form is subject to the terms of the Mozilla Public
6 # License, v. 2.0. If a copy of the MPL was not distributed with this
7 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 # This file incorporates work covered by the following license notice:
11 # Licensed to the Apache Software Foundation (ASF) under one or more
12 # contributor license agreements. See the NOTICE file distributed
13 # with this work for additional information regarding copyright
14 # ownership. The ASF licenses this file to you under the Apache
15 # License, Version 2.0 (the "License"); you may not use this file
16 # except in compliance with the License. You may obtain a copy of
17 # the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 import sys
22 localeNames = {'fr': 'French', 'hu': 'Hungarian', 'de': 'German'}
23 def getLocaleName (code):
24 global localeNames
25 if code in localeNames:
26 return localeNames[code]
27 else:
28 return "(unknown locale)"
30 def getAscii (ords):
31 ascii = ''
32 for c in ords:
33 ascii += chr(c)
34 return ascii
36 class LocaleData(object):
37 def __init__ (self, locale):
38 self.locale = locale
39 self.funcList = {}
41 def addKeywordMap (self, funcName, localeName, engName):
42 if not funcName in self.funcList:
43 self.funcList[funcName] = []
45 self.funcList[funcName].append([localeName, engName])
47 def getLocaleFuncVarName (self, func, pair):
48 return func.lower() + "_" + getAscii(pair[1]).lower() + "_" + self.locale
50 def dumpCode (self):
51 chars = ""
53 # locale output
54 chars += "// " + "-"*75 + "\n"
55 chars += "// %s language locale (automatically generated)\n"%getLocaleName(self.locale)
56 chars += "// " + "-"*75 + "\n"
57 chars += "static const Locale a" + self.locale.capitalize() + "(OUString(\""
58 chars += self.locale
59 chars += "\"), OUString(), OUString());\n\n"
61 # pre instantiations of localized function names.
62 funcs = sorted(self.funcList.keys())
63 chars += "// pre instantiations of localized function names\n"
64 for func in funcs:
65 for item in self.funcList[func]:
66 chars += "static const sal_Unicode " + self.getLocaleFuncVarName(func, item) + "[] = {\n"
67 chars += " "
68 isFirst = True
69 # Dump the UTF-16 bytes.
70 for uval in item[0]:
71 if isFirst:
72 isFirst = False
73 else:
74 chars += ", "
75 chars += "0x%.4X"%uval
77 # Don't forget to null-terminate the string.
78 if not isFirst:
79 chars += ", "
80 chars += "0x0000"
82 chars += "};\n"
84 # map item instantiations
85 chars += "\n"
86 chars += "static const TransItem p" + self.locale.capitalize() + "[] = {\n"
87 for func in funcs:
88 for item in self.funcList[func]:
89 chars += " "
90 chars += "{%s, \"%s\", %s},\n"%(self.getLocaleFuncVarName(func, item),
91 getAscii(item[1]),
92 "oc"+func.capitalize())
94 chars += " {NULL, NULL, ocNone}\n"
95 chars += "};\n\n"
97 # addToMap call
98 chars += "addToMap(%s, %s);\n"%(
99 "p"+self.locale.capitalize(), "a"+self.locale.capitalize())
101 return chars
103 class Parser(object):
105 def __init__ (self, args):
106 # default input & output files.
107 self.infile = "./keywords_utf16.txt"
108 self.outfile = "../../source/core/tool/cellkeywords.inl"
110 if len(args) >= 2:
111 self.infile = args[1]
112 if len(args) >= 3:
113 self.outfile = args[2]
115 def getDByte (self):
116 # Assume little endian.
117 bh = self.bytes[self.i]
118 bl = self.bytes[self.i+1]
119 try:
120 dbyte = ord(bl)*256 + ord(bh)
121 except:
122 dbyte = bl*256 + bh
123 self.i += 2
124 return dbyte
126 def parseLine (self):
127 buf = []
128 while self.i < self.size:
129 dbyte = self.getDByte()
130 if dbyte == 0x000A:
131 break
132 buf.append(dbyte)
133 return buf
135 def dumpBuf (self, buf, linefeed=True):
136 for item in buf:
137 sys.stdout.write(chr(item))
138 if linefeed:
139 print ('')
141 def parse (self):
143 file = open(self.infile, 'rb')
144 self.bytes = file.read()
145 file.close()
147 self.size = len(self.bytes)
148 self.i = 0
150 localeList = [] # stores an array of locale data objects.
151 funcName = None
152 word = []
153 wordPair = []
155 while self.i < self.size:
156 dbyte = self.getDByte()
157 if dbyte == 0xFEFF and self.i == 2:
158 # unicode signature - ignore it.
159 pass
160 elif dbyte == 0x0024:
161 # $ - locale name
162 buf = self.parseLine()
163 locale = getAscii(buf)
164 localeList.append(LocaleData(locale))
166 elif dbyte == 0x0040:
167 # @ - function name
168 buf = self.parseLine()
169 funcName = getAscii(buf)
171 elif dbyte == 0x002C:
172 # , - comma separator
173 if len(word) > 0:
174 wordPair.append(word)
175 word = []
176 elif dbyte == 0x000A:
177 # linefeed
178 if len(word) > 0:
179 wordPair.append(word)
180 word = []
181 if len(wordPair) >= 2:
182 localeList[-1].addKeywordMap(funcName, wordPair[0], wordPair[1])
183 wordPair = []
184 elif dbyte in [0x0009, 0x0020]:
185 # whitespace - ignore it.
186 pass
187 else:
188 word.append(dbyte)
190 chars = "// This file has been automatically generated. Do not hand-edit this!\n"
191 for obj in localeList:
192 chars += "\n" + obj.dumpCode()
194 # Write to output file.
195 file = open(self.outfile, 'w')
196 file.write(chars)
197 file.close()
199 if __name__=='__main__':
200 parser = Parser(sys.argv)
201 parser.parse()