2 # -*- coding: utf-8 -*-
4 # Copyright 2002-2006 Zuza Software Foundation
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 """classes that hold units of .dtd files (dtdunit) or entire files (dtdfile)
23 these are specific .dtd files for localisation used by mozilla"""
25 from translate
.storage
import base
26 from translate
.misc
import quote
32 def quotefordtd(source
):
35 return "'" + source
.replace("'", ''') + "'"
37 return quote
.singlequotestr(source
)
39 return quote
.quotestr(source
)
41 def unquotefromdtd(source
):
42 """unquotes a quoted dtd definition"""
43 # extract the string, get rid of quoting
44 if len(source
) == 0: source
= '""'
46 extracted
, quotefinished
= quote
.extractwithoutquotes(source
, quotechar
, quotechar
, allowreentry
=False)
47 if quotechar
== "'" and "'" in extracted
:
48 extracted
= extracted
.replace("'", "'")
49 # the quote characters should be the first and last characters in the string
50 # of course there could also be quote characters within the string; not handled here
53 class dtdunit(base
.TranslationUnit
):
54 """this class represents an entity definition from a dtd file (and possibly associated comments)"""
55 def __init__(self
, source
=""):
56 """construct the dtdunit, prepare it for parsing"""
57 super(dtdunit
, self
).__init
__(source
)
59 self
.unparsedlines
= []
62 self
.entity
= "FakeEntityOnlyForInitialisationAndTesting"
65 # Note that source and target are equivalent for monolingual units
66 def setsource(self
, source
):
67 """Sets the definition to the quoted value of source"""
68 self
.definition
= quotefordtd(source
)
71 """gets the unquoted source string"""
72 return unquotefromdtd(self
.definition
)
73 source
= property(getsource
, setsource
)
75 def settarget(self
, target
):
76 """Sets the definition to the quoted value of target"""
79 self
.definition
= quotefordtd(target
)
82 """gets the unquoted target string"""
83 return unquotefromdtd(self
.definition
)
84 target
= property(gettarget
, settarget
)
87 """returns whether this dtdunit doesn't actually have an entity definition"""
88 # for dtds, we currently return a blank string if there is no .entity (==location in other files)
89 # TODO: this needs to work better with base class expectations
90 return self
.entity
is None
92 def parse(self
, dtdsrc
):
93 """read the first dtd element from the source code into this object, return linesprocessed"""
95 # make all the lists the same
96 self
.locfilenotes
= self
.comments
97 self
.locgroupstarts
= self
.comments
98 self
.locgroupends
= self
.comments
99 self
.locnotes
= self
.comments
100 # self.locfilenotes = []
101 # self.locgroupstarts = []
102 # self.locgroupends = []
109 lines
= dtdsrc
.split("\n")
115 # print "line(%d,%d): " % (self.incomment,self.inentity),line[:-1]
116 if not self
.incomment
:
117 if (line
.find('<!--') != -1):
119 self
.continuecomment
= 0
120 # now work out the type of comment, and save it (remember we're not in the comment yet)
121 (comment
, dummy
) = quote
.extract(line
, "<!--", "-->", None, 0)
122 if comment
.find('LOCALIZATION NOTE') != -1:
123 l
= quote
.findend(comment
,'LOCALIZATION NOTE')
124 while (comment
[l
] == ' '): l
+= 1
125 if comment
.find('FILE', l
) == l
:
126 self
.commenttype
= "locfile"
127 elif comment
.find('BEGIN', l
) == l
:
128 self
.commenttype
= "locgroupstart"
129 elif comment
.find('END', l
) == l
:
130 self
.commenttype
= "locgroupend"
132 self
.commenttype
= "locnote"
135 self
.commenttype
= "comment"
138 # some kind of comment
139 (comment
, self
.incomment
) = quote
.extract(line
, "<!--", "-->", None, self
.continuecomment
)
140 # print "comment(%d,%d): " % (self.incomment,self.continuecomment),comment
141 self
.continuecomment
= self
.incomment
142 # strip the comment out of what will be parsed
143 line
= line
.replace(comment
, "", 1)
144 # add a end of line of this is the end of the comment
145 if not self
.incomment
:
151 # check if there's actually an entity definition that's commented out
152 # TODO: parse these, store as obsolete messages
153 # if comment.find('<!ENTITY') != -1:
154 # # remove the entity from the comment
155 # comment, dummy = quote.extractwithoutquotes(comment, ">", "<!ENTITY", None, 1)
156 # depending on the type of comment (worked out at the start), put it in the right place
157 # make it record the comment and type as a tuple
158 commentpair
= (self
.commenttype
, comment
)
159 if self
.commenttype
== "locfile":
160 self
.locfilenotes
.append(commentpair
)
161 elif self
.commenttype
== "locgroupstart":
162 self
.locgroupstarts
.append(commentpair
)
163 elif self
.commenttype
== "locgroupend":
164 self
.locgroupends
.append(commentpair
)
165 elif self
.commenttype
== "locnote":
166 self
.locnotes
.append(commentpair
)
167 elif self
.commenttype
== "comment":
168 self
.comments
.append(commentpair
)
170 if not self
.inentity
and not self
.incomment
:
171 entitypos
= line
.find('<!ENTITY')
174 beforeentity
= line
[:entitypos
].strip()
175 if beforeentity
.startswith("#"):
176 self
.hashprefix
= beforeentity
177 self
.entitypart
= "start"
179 self
.unparsedlines
.append(line
)
182 if self
.entitypart
== "start":
183 # the entity definition
184 e
= quote
.findend(line
,'<!ENTITY')
186 self
.entitypart
= "name"
187 self
.entitytype
= "internal"
188 if self
.entitypart
== "name":
190 while (e
< len(line
) and line
[e
].isspace()): e
+= 1
192 if (e
< len(line
) and line
[e
] == '%'):
193 self
.entitytype
= "external"
194 self
.entityparameter
= ""
196 while (e
< len(line
) and line
[e
].isspace()): e
+= 1
197 while (e
< len(line
) and not line
[e
].isspace()):
198 self
.entity
+= line
[e
]
200 while (e
< len(line
) and line
[e
].isspace()): e
+= 1
202 if self
.entitytype
== "external":
203 self
.entitypart
= "parameter"
205 self
.entitypart
= "definition"
206 # remember the start position and the quote character
208 self
.entityhelp
= None
210 elif self
.entitypart
== "definition":
211 self
.entityhelp
= (e
, line
[e
])
213 if self
.entitypart
== "parameter":
215 while (e
< len(line
) and line
[e
].isalnum()): e
+= 1
216 self
.entityparameter
+= line
[paramstart
:e
]
217 while (e
< len(line
) and line
[e
].isspace()): e
+= 1
222 if line
[0] in ('"', "'"):
223 self
.entitypart
= "definition"
224 self
.entityhelp
= (e
, line
[e
])
226 if self
.entitypart
== "definition":
227 if self
.entityhelp
is None:
229 while (e
< len(line
) and line
[e
].isspace()): e
+= 1
232 self
.entityhelp
= (e
, line
[e
])
234 # actually the lines below should remember instring, rather than using it as dummy
235 e
= self
.entityhelp
[0]
236 if (self
.entityhelp
[1] == "'"):
237 (defpart
, self
.instring
) = quote
.extract(line
[e
:], "'", "'", startinstring
=self
.instring
, allowreentry
=False)
238 elif (self
.entityhelp
[1] == '"'):
239 (defpart
, self
.instring
) = quote
.extract(line
[e
:], '"', '"', startinstring
=self
.instring
, allowreentry
=False)
241 raise ValueError("Unexpected quote character... %r" % (self
.entityhelp
[1]))
242 # for any following lines, start at the beginning of the line. remember the quote character
243 self
.entityhelp
= (0, self
.entityhelp
[1])
244 self
.definition
+= defpart
245 if not self
.instring
:
249 # uncomment this line to debug processing
251 for attr
in dir(self
):
252 r
= repr(getattr(self
, attr
))
253 if len(r
) > 60: r
= r
[:57]+"..."
254 self
.comments
.append(("comment", "self.%s = %s" % (attr
, r
) ))
255 return linesprocessed
258 """convert to a string. double check that unicode is handled somehow here"""
259 source
= self
.getoutput()
260 if isinstance(source
, unicode):
261 return source
.encode(getattr(self
, "encoding", "UTF-8"))
265 """convert the dtd entity back to string form"""
267 lines
.extend([comment
for commenttype
, comment
in self
.comments
])
268 lines
.extend(self
.unparsedlines
)
270 result
= "".join(lines
)
271 return result
.rstrip() + "\n"
272 # for f in self.locfilenotes: yield f
273 # for ge in self.locgroupends: yield ge
274 # for gs in self.locgroupstarts: yield gs
275 # for n in self.locnotes: yield n
276 if len(self
.entity
) > 0:
277 if getattr(self
, 'entitytype', None) == 'external':
278 entityline
= '<!ENTITY % '+self
.entity
+' '+self
.entityparameter
+' '+self
.definition
+'>'
280 entityline
= '<!ENTITY '+self
.entity
+' '+self
.definition
+'>'
281 if getattr(self
, 'hashprefix', None):
282 entityline
= self
.hashprefix
+ " " + entityline
283 if isinstance(entityline
, unicode):
284 entityline
= entityline
.encode('UTF-8')
285 lines
.append(entityline
+'\n')
286 return "".join(lines
)
288 class dtdfile(base
.TranslationStore
):
289 """this class represents a .dtd file, made up of dtdunits"""
291 def __init__(self
, inputfile
=None):
292 """construct a dtdfile, optionally reading in from inputfile"""
293 base
.TranslationStore
.__init
__(self
, unitclass
= self
.UnitClass
)
295 self
.filename
= getattr(inputfile
, 'name', '')
296 if inputfile
is not None:
297 dtdsrc
= inputfile
.read()
301 def parse(self
, dtdsrc
):
302 """read the source code of a dtd file in and include them as dtdunits in self.units (any existing units are lost)"""
306 lines
= dtdsrc
.split("\n")
307 while end
< len(lines
):
308 if (start
== end
): end
+= 1
310 while end
< len(lines
):
311 if end
>= len(lines
):
313 if lines
[end
].find('<!ENTITY') > -1:
315 if foundentity
and re
.match("[\"']\s*>", lines
[end
]):
319 # print "processing from %d to %d" % (start,end)
321 linesprocessed
= 1 # to initialise loop
322 while linesprocessed
>= 1:
325 linesprocessed
= newdtd
.parse("\n".join(lines
[start
:end
]))
326 if linesprocessed
>= 1 and (not newdtd
.isnull() or newdtd
.unparsedlines
):
327 self
.units
.append(newdtd
)
329 warnings
.warn("%s\nError occured between lines %d and %d:\n%s" % (e
, start
+1, end
, "\n".join(lines
[start
:end
])))
330 start
+= linesprocessed
333 """convert to a string. double check that unicode is handled somehow here"""
334 source
= self
.getoutput()
335 if isinstance(source
, unicode):
336 return source
.encode(getattr(self
, "encoding", "UTF-8"))
340 """convert the units back to source"""
341 sources
= [str(dtd
) for dtd
in self
.units
]
342 return "".join(sources
)
345 """makes self.index dictionary keyed on entities"""
347 for dtd
in self
.units
:
349 self
.index
[dtd
.entity
] = dtd
352 for dtd
in self
.units
:
353 lines
= dtd
.definition
.split("\n")
355 definition
= lines
[0]
356 for line
in lines
[1:]:
357 if definition
[-1:].isspace() or line
[:1].isspace():
360 definition
+= " " + line
361 dtd
.definition
= definition
363 if __name__
== "__main__":
365 d
= dtdfile(sys
.stdin
)
367 sys
.stdout
.write(str(d
))