1 """Contains the functions used to read the input file and print the checkpoint
2 files with xml formatting.
4 Copyright (C) 2013, Joshua More and Michele Ceriotti
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http.//www.gnu.org/licenses/>.
21 xml_node: Class to handle a particular xml tag.
22 xml_handler: Class giving general xml data reading methods.
23 xml_parse_string: Parses a string made from a section of a xml input file.
24 xml_parse_file: Parses an entire xml input file.
25 read_type: Reads a string and outputs data of a specified type.
26 read_float: Reads a string and outputs a float.
27 read_int: Reads a string and outputs an integer.
28 read_bool: Reads a string and outputs a boolean.
29 read_list: Reads a string and outputs a list.
30 read_array: Reads a string and outputs an array.
31 read_tuple: Reads a string and outputs a tuple.
32 read_dict: Reads a string and outputs a dictionary.
33 write_type: Writes a string from data of a specified type.
34 write_list: Writes a string from a list.
35 write_tuple: Writes a string from a tuple.
36 write_float: Writes a string from a float.
37 write_bool: Writes a string from a boolean.
38 write_dict: Writes a string from a dictionary.
41 __all__
= ['xml_node', 'xml_handler', 'xml_parse_string', 'xml_parse_file',
42 'read_type', 'read_float', 'read_int', 'read_bool', 'read_list',
43 'read_array', 'read_tuple', 'read_dict', 'write_type', 'write_list',
44 'write_tuple', 'write_float', 'write_bool', 'write_dict']
46 from xml
.sax
import parseString
, parse
47 from xml
.sax
.handler
import ContentHandler
51 class xml_node(object):
52 """Class to handle a particular xml tag.
54 Tags are generally written in the form
55 <tag_name attribs="attrib_data"> main_data </tag_name>. This class holds
56 tag_name, attrib_data and main_data separately so they can be used to
57 create the objects with the appropriate names and data.
60 attribs: The attribute data for the tag.
61 fields: The rest of the data.
65 def __init__(self
, attribs
=None, name
="", fields
=None):
66 """Initialises xml_node.
69 attribs: An optional dictionary giving attribute data. Defaults to {}.
70 fields: An optional dictionary holding all the data between the start
71 and end tags, including information about other nodes.
73 name: An optional string giving the tag name. Defaults to ''.
81 self
.attribs
= attribs
86 class xml_handler(ContentHandler
):
87 """Class giving general xml_reading methods.
89 Uses the standard python xml_reader to read the different kinds of data.
90 Keeps track of the heirarchial nature of an xml file by recording the level
91 of nesting, so that the correct data and attributes can be associated with
95 root: An xml_node object for the root node.
96 open: The list of the tags that the parser is currently between the start
98 level: The level of nesting that the parser is currently at.
99 buffer: A list of the data found between the tags at the different levels
104 """Initialises xml_handler."""
106 #root xml node with all the data
107 self
.root
= xml_node(name
="root", fields
=[])
108 self
.open = [self
.root
]
109 #current level of the hierarchy
111 #Holds all the data between each of the tags.
112 #If level = 1, then buffer[0] holds all the data collected between the
113 #root tags, and buffer[1] holds all the data collected between the
117 def startElement(self
, name
, attrs
):
118 """Reads an opening tag.
120 Adds the opening tag to the list of open tags, adds a new space in the
121 buffer, reads the appropriate attributes and adds a new level to the
126 attrs: The attribute data.
130 newnode
= xml_node(attribs
=dict((k
,attrs
[k
]) for k
in attrs
.keys()), name
=name
, fields
=[])
131 #adds it to the list of open nodes
132 self
.open.append(newnode
)
133 #adds it to the list of fields of the parent tag
134 self
.open[self
.level
].fields
.append((name
,newnode
))
135 #gets ready to read new data
136 self
.buffer.append([""])
139 def characters(self
, data
):
142 Adds the data to the buffer of the current level of the heirarchy.
143 Data is read as a string, and needs to be converted to the required
147 data: The data to be read.
150 self
.buffer[self
.level
].append(data
)
152 def endElement(self
, name
):
153 """Reads a closing tag.
155 Once all the data has been read, and the closing tag found, the buffer
156 is read into the appropriate field.
162 #all the text found between the tags stored in the appropriate xml_node
164 self
.buffer[self
.level
] = ''.join(self
.buffer[self
.level
])
165 self
.open[self
.level
].fields
.append(("_text" , self
.buffer[self
.level
]))
166 #'closes' the xml_node object, as we are no longer within its tags, so
167 #there is no more data to be added to it.
168 #Note that the xml_node is still held within the parent tag, so we
169 #no longer require this xml node object.
170 self
.buffer.pop(self
.level
)
171 self
.open.pop(self
.level
)
174 def xml_parse_string(buf
):
175 """Parses a string made from a section of a xml input file.
178 buf: A string in correct xml format.
181 A xml_node for the root node of the file.
184 myhandle
= xml_handler()
185 parseString(buf
, myhandle
)
188 def xml_parse_file(stream
):
189 """Parses an entire xml input file.
192 stream: A string describing a xml formatted file.
195 A xml_node for the root node of the file.
198 myhandle
= xml_handler()
199 parse(stream
, myhandle
)
202 def read_type(type, data
):
203 """Reads a string and outputs data of a specified type.
206 type: The data type of the target container.
207 data: The string to be read in.
210 TypeError: Raised if it tries to read into a data type that has not been
214 An object of type type.
217 if not type in readtype_funcs
:
218 raise TypeError("Conversion not available for given type")
219 return type(readtype_funcs
[type](data
))
221 def read_float(data
):
222 """Reads a string and outputs a float.
225 data: The string to be read in.
228 ValueError: Raised if the input data is not of the correct format.
237 """Reads a string and outputs a integer.
240 data: The string to be read in.
243 ValueError: Raised if the input data is not of the correct format.
252 """Reads a string and outputs a boolean.
254 Takes a string of the form 'true' or 'false', and returns the appropriate
258 data: The string to be read in.
261 ValueError: Raised if the string is not 'true' or 'false'.
268 if data
.strip().upper() == "TRUE":
270 elif data
.strip().upper() == "FALSE":
273 raise ValueError(data
+ " does not represent a bool value")
275 def read_list(data
, delims
="[]", split
=",", strip
=" \n\t'"):
276 """Reads a formatted string and outputs a list.
278 The string must be formatted in the correct way.
279 The start character must be delimiters[0], the end character
280 must be delimiters[1] and each element must be split along
281 the character split. Characters at the beginning or
282 end of each element in strip are ignored. The standard list format is of the
283 form '[array[0], array[1],..., array[n]]', which is used for actual lists.
284 Other formats are used for tuples and dictionaries.
287 data: The string to be read in. '[]' by default.
288 delims: A string of two characters giving the first and last character of
289 the list format. ',' by default.
290 split: The character between different elements of the list format.
291 strip: Characters to be removed from the beginning and end of each
292 element. ' \n\t' by default.
295 ValueError: Raised if the input data is not of the correct format.
302 begin
= data
.index(delims
[0])
303 end
= data
.index(delims
[1])
305 raise ValueError("Error in list syntax: could not locate delimiters")
307 rlist
= data
[begin
+1:end
].split(split
)
308 for i
in range(len(rlist
)):
309 rlist
[i
] = rlist
[i
].strip(strip
)
311 # handles empty lists correctly
312 if len(rlist
) == 1 and rlist
[0] == "":
317 def read_array(dtype
, data
):
318 """Reads a formatted string and outputs an array.
320 The format is as for standard python arrays, which is
321 [array[0], array[1], ... , array[n]]. Note the use of comma separators, and
322 the use of square brackets.
325 data: The string to be read in.
326 dtype: The data type of the elements of the target array.
329 ValueError: Raised if the input data is not of the correct format.
332 An array of data type dtype.
335 rlist
= read_list(data
)
336 for i
in range(len(rlist
)):
337 rlist
[i
] = read_type(dtype
,rlist
[i
])
339 return np
.array(rlist
, dtype
)
341 def read_tuple(data
, delims
="()", split
=",", strip
=" \n\t'", arg_type
=int):
342 """Reads a formatted string and outputs a tuple.
344 The format is as for standard python tuples, which is
345 (tuple[0], tuple[1], ... , tuple[n]). Note the comma
346 separators, and the use of brackets.
349 data: The string to be read in.
350 delims: A string of two characters giving the first and last character of
351 the list format. ',' by default.
352 split: The character between different elements of the list format.
353 strip: Characters to be removed from the beginning and end of each
354 element. ' \n\t' by default.
355 arg_type: The strings in the input will be converted, and a tuple
356 of ar_type will be returned.
359 ValueError: Raised if the input data is not of the correct format.
362 A tuple of elements of the specified data type.
365 rlist
= read_list(data
, delims
=delims
, split
=split
, strip
=strip
)
366 return tuple([arg_type(i
) for i
in rlist
])
368 def read_dict(data
, delims
="{}", split
=",", key_split
=":", strip
=" \n\t"):
369 """Reads a formatted string and outputs a dictionary.
371 The format is as for standard python dictionaries, which is
372 {keyword[0]: arg[0], keyword[1]: arg[1], ... , keyword[n]: arg[n]}. Note the
373 comma separators, and the use of curly brackets.
376 data: The string to be read in.
377 delims: A string of two characters giving the first and last character of
378 the list format. ',' by default.
379 split: The character between different elements of the list format.
380 key_split: The character between the key word and the value.
381 strip: Characters to be removed from the beginning and end of each
382 element. ' \n\t' by default.
385 ValueError: Raised if the input data is not of the correct format.
388 A dictionary of strings.
391 rlist
= read_list(data
, delims
=delims
, split
=split
, strip
=strip
)
393 return data
.strip(strip
)
396 rtuple
= map(mystrip
,s
.split(key_split
))
397 if not len(rtuple
) == 2:
398 raise ValueError("Format for a key:value format is wrong for item " + s
)
399 rdict
[rtuple
[0]] = rtuple
[1]
403 readtype_funcs
= {np
.ndarray
: read_array
, dict: read_dict
, float: read_float
, int: read_int
, bool: read_bool
, str: string
.strip
, tuple: read_tuple
, np
.uint
: read_int
}
405 def write_type(type, data
):
406 """Writes a formatted string from a value of a specified type.
409 type: The data type of the value.
410 data: The value to be read in.
413 TypeError: Raised if it tries to write from a data type that has not been
420 if not type in writetype_funcs
:
421 raise TypeError("Conversion not available for given type")
422 return writetype_funcs
[type](data
)
424 def write_list(data
, delims
="[]"):
425 """Writes a formatted string from a list.
427 The format of the output is as for a standard python list,
428 [list[0], list[1],..., list[n]]. Note the space after the commas, and the
429 use of square brackets.
432 data: The value to be read in.
433 delims: An optional string of two characters giving the first and last
434 character to be printed. Defaults to "[]".
443 rstr
+= str(v
) + ", "
445 rstr
= rstr
.rstrip(", ")
449 def write_tuple(data
):
450 """Writes a formatted string from a tuple.
452 The format of the output is as for a standard python tuple,
453 (tuple[0], tuple[1],..., tuple[n]). Note the space after the commas, and the
457 data: The value to be read in.
463 return write_list(data
, delims
="()")
465 def write_float(data
):
466 """Writes a formatted string from a float.
468 Floats are printed out in exponential format, to 8 decimal places and
469 filling up any spaces under 16 not used with spaces.
471 For example 1.0 --> ' 1.00000000e+00'
474 data: The value to be read in.
480 return "%16.8e" % (data
)
482 def write_bool(data
):
483 """Writes a formatted string from a float.
485 Booleans are printed as a string of either ' true' or 'false'. Note that
486 both are printed out as exactly 5 characters.
489 data: The value to be read in.
495 return "%5.5s" % (str(data
))
497 def write_dict(data
, delims
="{}"):
498 """Writes a formatted string from a dictionary.
500 The format of the output is as for a standard python dictionary,
501 {keyword[0]: arg[0], keyword[1]: arg[1],..., keyword[n]: arg[n]}. Note the
502 space after the commas, and the use of curly brackets.
505 data: The value to be read in.
506 delims: An optional string of two characters giving the first and last
507 character to be printed. Defaults to "{}".
515 rstr
+= str(v
) + ": " + str(data
[v
]) + ", "
516 rstr
= rstr
.strip(", ")
520 writetype_funcs
= {float: write_float
, dict: write_dict
, int: str, bool: write_bool
, str: string
.strip
, tuple: write_tuple
, np
.uint
: str}