git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@16053 f3b2605a-c512-4ea7-a41b...
[lammps.git] / tools / i-pi / ipi / utils / io / io_xml.py
blob5e43854408c44247298277967e1d73cffa86686f
1 """Contains the functions used to read the input file and print the checkpoint
2 files with xml formatting.
4 Copyright (C) 2013, Joshua More and Michele Ceriotti
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http.//www.gnu.org/licenses/>.
20 Functions:
21 xml_node: Class to handle a particular xml tag.
22 xml_handler: Class giving general xml data reading methods.
23 xml_parse_string: Parses a string made from a section of a xml input file.
24 xml_parse_file: Parses an entire xml input file.
25 read_type: Reads a string and outputs data of a specified type.
26 read_float: Reads a string and outputs a float.
27 read_int: Reads a string and outputs an integer.
28 read_bool: Reads a string and outputs a boolean.
29 read_list: Reads a string and outputs a list.
30 read_array: Reads a string and outputs an array.
31 read_tuple: Reads a string and outputs a tuple.
32 read_dict: Reads a string and outputs a dictionary.
33 write_type: Writes a string from data of a specified type.
34 write_list: Writes a string from a list.
35 write_tuple: Writes a string from a tuple.
36 write_float: Writes a string from a float.
37 write_bool: Writes a string from a boolean.
38 write_dict: Writes a string from a dictionary.
39 """
41 __all__ = ['xml_node', 'xml_handler', 'xml_parse_string', 'xml_parse_file',
42 'read_type', 'read_float', 'read_int', 'read_bool', 'read_list',
43 'read_array', 'read_tuple', 'read_dict', 'write_type', 'write_list',
44 'write_tuple', 'write_float', 'write_bool', 'write_dict']
46 from xml.sax import parseString, parse
47 from xml.sax.handler import ContentHandler
48 import numpy as np
49 import string
51 class xml_node(object):
52 """Class to handle a particular xml tag.
54 Tags are generally written in the form
55 <tag_name attribs="attrib_data"> main_data </tag_name>. This class holds
56 tag_name, attrib_data and main_data separately so they can be used to
57 create the objects with the appropriate names and data.
59 Attributes:
60 attribs: The attribute data for the tag.
61 fields: The rest of the data.
62 name: The tag name.
63 """
65 def __init__(self, attribs=None, name="", fields=None):
66 """Initialises xml_node.
68 Args:
69 attribs: An optional dictionary giving attribute data. Defaults to {}.
70 fields: An optional dictionary holding all the data between the start
71 and end tags, including information about other nodes.
72 Defaults to {}.
73 name: An optional string giving the tag name. Defaults to ''.
74 """
76 if attribs is None:
77 attribs = {}
78 if fields is None:
79 fields = []
81 self.attribs = attribs
82 self.name = name
83 self.fields = fields
86 class xml_handler(ContentHandler):
87 """Class giving general xml_reading methods.
89 Uses the standard python xml_reader to read the different kinds of data.
90 Keeps track of the heirarchial nature of an xml file by recording the level
91 of nesting, so that the correct data and attributes can be associated with
92 the correct tag name.
94 Attributes:
95 root: An xml_node object for the root node.
96 open: The list of the tags that the parser is currently between the start
97 and end tags of.
98 level: The level of nesting that the parser is currently at.
99 buffer: A list of the data found between the tags at the different levels
100 of nesting.
103 def __init__(self):
104 """Initialises xml_handler."""
106 #root xml node with all the data
107 self.root = xml_node(name="root", fields=[])
108 self.open = [self.root]
109 #current level of the hierarchy
110 self.level = 0
111 #Holds all the data between each of the tags.
112 #If level = 1, then buffer[0] holds all the data collected between the
113 #root tags, and buffer[1] holds all the data collected between the
114 #first child tag.
115 self.buffer = [[""]]
117 def startElement(self, name, attrs):
118 """Reads an opening tag.
120 Adds the opening tag to the list of open tags, adds a new space in the
121 buffer, reads the appropriate attributes and adds a new level to the
122 heirarchy.
124 Args:
125 name: The tag_name.
126 attrs: The attribute data.
129 #creates a new node
130 newnode = xml_node(attribs=dict((k,attrs[k]) for k in attrs.keys()), name=name, fields=[])
131 #adds it to the list of open nodes
132 self.open.append(newnode)
133 #adds it to the list of fields of the parent tag
134 self.open[self.level].fields.append((name,newnode))
135 #gets ready to read new data
136 self.buffer.append([""])
137 self.level += 1
139 def characters(self, data):
140 """Reads data.
142 Adds the data to the buffer of the current level of the heirarchy.
143 Data is read as a string, and needs to be converted to the required
144 type later.
146 Args:
147 data: The data to be read.
150 self.buffer[self.level].append(data)
152 def endElement(self, name):
153 """Reads a closing tag.
155 Once all the data has been read, and the closing tag found, the buffer
156 is read into the appropriate field.
158 Args:
159 name: The tag_name.
162 #all the text found between the tags stored in the appropriate xml_node
163 #object
164 self.buffer[self.level] = ''.join(self.buffer[self.level])
165 self.open[self.level].fields.append(("_text" , self.buffer[self.level]))
166 #'closes' the xml_node object, as we are no longer within its tags, so
167 #there is no more data to be added to it.
168 #Note that the xml_node is still held within the parent tag, so we
169 #no longer require this xml node object.
170 self.buffer.pop(self.level)
171 self.open.pop(self.level)
172 self.level -= 1
174 def xml_parse_string(buf):
175 """Parses a string made from a section of a xml input file.
177 Args:
178 buf: A string in correct xml format.
180 Returns:
181 A xml_node for the root node of the file.
184 myhandle = xml_handler()
185 parseString(buf, myhandle)
186 return myhandle.root
188 def xml_parse_file(stream):
189 """Parses an entire xml input file.
191 Args:
192 stream: A string describing a xml formatted file.
194 Returns:
195 A xml_node for the root node of the file.
198 myhandle = xml_handler()
199 parse(stream, myhandle)
200 return myhandle.root
202 def read_type(type, data):
203 """Reads a string and outputs data of a specified type.
205 Args:
206 type: The data type of the target container.
207 data: The string to be read in.
209 Raises:
210 TypeError: Raised if it tries to read into a data type that has not been
211 implemented.
213 Returns:
214 An object of type type.
217 if not type in readtype_funcs:
218 raise TypeError("Conversion not available for given type")
219 return type(readtype_funcs[type](data))
221 def read_float(data):
222 """Reads a string and outputs a float.
224 Args:
225 data: The string to be read in.
227 Raises:
228 ValueError: Raised if the input data is not of the correct format.
230 Returns:
231 A float.
234 return float(data)
236 def read_int(data):
237 """Reads a string and outputs a integer.
239 Args:
240 data: The string to be read in.
242 Raises:
243 ValueError: Raised if the input data is not of the correct format.
245 Returns:
246 An integer.
249 return int(data)
251 def read_bool(data):
252 """Reads a string and outputs a boolean.
254 Takes a string of the form 'true' or 'false', and returns the appropriate
255 boolean.
257 Args:
258 data: The string to be read in.
260 Raises:
261 ValueError: Raised if the string is not 'true' or 'false'.
263 Returns:
264 A boolean.
268 if data.strip().upper() == "TRUE":
269 return True
270 elif data.strip().upper() == "FALSE":
271 return False
272 else:
273 raise ValueError(data + " does not represent a bool value")
275 def read_list(data, delims="[]", split=",", strip=" \n\t'"):
276 """Reads a formatted string and outputs a list.
278 The string must be formatted in the correct way.
279 The start character must be delimiters[0], the end character
280 must be delimiters[1] and each element must be split along
281 the character split. Characters at the beginning or
282 end of each element in strip are ignored. The standard list format is of the
283 form '[array[0], array[1],..., array[n]]', which is used for actual lists.
284 Other formats are used for tuples and dictionaries.
286 Args:
287 data: The string to be read in. '[]' by default.
288 delims: A string of two characters giving the first and last character of
289 the list format. ',' by default.
290 split: The character between different elements of the list format.
291 strip: Characters to be removed from the beginning and end of each
292 element. ' \n\t' by default.
294 Raises:
295 ValueError: Raised if the input data is not of the correct format.
297 Returns:
298 A list of strings.
301 try:
302 begin = data.index(delims[0])
303 end = data.index(delims[1])
304 except ValueError:
305 raise ValueError("Error in list syntax: could not locate delimiters")
307 rlist = data[begin+1:end].split(split)
308 for i in range(len(rlist)):
309 rlist[i] = rlist[i].strip(strip)
311 # handles empty lists correctly
312 if len(rlist) == 1 and rlist[0] == "":
313 rlist = []
315 return rlist
317 def read_array(dtype, data):
318 """Reads a formatted string and outputs an array.
320 The format is as for standard python arrays, which is
321 [array[0], array[1], ... , array[n]]. Note the use of comma separators, and
322 the use of square brackets.
324 Args:
325 data: The string to be read in.
326 dtype: The data type of the elements of the target array.
328 Raises:
329 ValueError: Raised if the input data is not of the correct format.
331 Returns:
332 An array of data type dtype.
335 rlist = read_list(data)
336 for i in range(len(rlist)):
337 rlist[i] = read_type(dtype,rlist[i])
339 return np.array(rlist, dtype)
341 def read_tuple(data, delims="()", split=",", strip=" \n\t'", arg_type=int):
342 """Reads a formatted string and outputs a tuple.
344 The format is as for standard python tuples, which is
345 (tuple[0], tuple[1], ... , tuple[n]). Note the comma
346 separators, and the use of brackets.
348 Args:
349 data: The string to be read in.
350 delims: A string of two characters giving the first and last character of
351 the list format. ',' by default.
352 split: The character between different elements of the list format.
353 strip: Characters to be removed from the beginning and end of each
354 element. ' \n\t' by default.
355 arg_type: The strings in the input will be converted, and a tuple
356 of ar_type will be returned.
358 Raises:
359 ValueError: Raised if the input data is not of the correct format.
361 Returns:
362 A tuple of elements of the specified data type.
365 rlist = read_list(data, delims=delims, split=split, strip=strip)
366 return tuple([arg_type(i) for i in rlist])
368 def read_dict(data, delims="{}", split=",", key_split=":", strip=" \n\t"):
369 """Reads a formatted string and outputs a dictionary.
371 The format is as for standard python dictionaries, which is
372 {keyword[0]: arg[0], keyword[1]: arg[1], ... , keyword[n]: arg[n]}. Note the
373 comma separators, and the use of curly brackets.
375 Args:
376 data: The string to be read in.
377 delims: A string of two characters giving the first and last character of
378 the list format. ',' by default.
379 split: The character between different elements of the list format.
380 key_split: The character between the key word and the value.
381 strip: Characters to be removed from the beginning and end of each
382 element. ' \n\t' by default.
384 Raises:
385 ValueError: Raised if the input data is not of the correct format.
387 Returns:
388 A dictionary of strings.
391 rlist = read_list(data, delims=delims, split=split, strip=strip)
392 def mystrip(data):
393 return data.strip(strip)
394 rdict = {}
395 for s in rlist:
396 rtuple = map(mystrip,s.split(key_split))
397 if not len(rtuple) == 2:
398 raise ValueError("Format for a key:value format is wrong for item " + s)
399 rdict[rtuple[0]] = rtuple[1]
401 return rdict
403 readtype_funcs = {np.ndarray: read_array, dict: read_dict, float: read_float, int: read_int, bool: read_bool, str: string.strip, tuple: read_tuple, np.uint : read_int}
405 def write_type(type, data):
406 """Writes a formatted string from a value of a specified type.
408 Args:
409 type: The data type of the value.
410 data: The value to be read in.
412 Raises:
413 TypeError: Raised if it tries to write from a data type that has not been
414 implemented.
416 Returns:
417 A formatted string.
420 if not type in writetype_funcs:
421 raise TypeError("Conversion not available for given type")
422 return writetype_funcs[type](data)
424 def write_list(data, delims="[]"):
425 """Writes a formatted string from a list.
427 The format of the output is as for a standard python list,
428 [list[0], list[1],..., list[n]]. Note the space after the commas, and the
429 use of square brackets.
431 Args:
432 data: The value to be read in.
433 delims: An optional string of two characters giving the first and last
434 character to be printed. Defaults to "[]".
436 Returns:
437 A formatted string.
440 rstr = delims[0]
442 for v in data:
443 rstr += str(v) + ", "
445 rstr = rstr.rstrip(", ")
446 rstr += delims[1]
447 return rstr
449 def write_tuple(data):
450 """Writes a formatted string from a tuple.
452 The format of the output is as for a standard python tuple,
453 (tuple[0], tuple[1],..., tuple[n]). Note the space after the commas, and the
454 use of brackets.
456 Args:
457 data: The value to be read in.
459 Returns:
460 A formatted string.
463 return write_list(data, delims="()")
465 def write_float(data):
466 """Writes a formatted string from a float.
468 Floats are printed out in exponential format, to 8 decimal places and
469 filling up any spaces under 16 not used with spaces.
471 For example 1.0 --> ' 1.00000000e+00'
473 Args:
474 data: The value to be read in.
476 Returns:
477 A formatted string.
480 return "%16.8e" % (data)
482 def write_bool(data):
483 """Writes a formatted string from a float.
485 Booleans are printed as a string of either ' true' or 'false'. Note that
486 both are printed out as exactly 5 characters.
488 Args:
489 data: The value to be read in.
491 Returns:
492 A formatted string.
495 return "%5.5s" % (str(data))
497 def write_dict(data, delims="{}"):
498 """Writes a formatted string from a dictionary.
500 The format of the output is as for a standard python dictionary,
501 {keyword[0]: arg[0], keyword[1]: arg[1],..., keyword[n]: arg[n]}. Note the
502 space after the commas, and the use of curly brackets.
504 Args:
505 data: The value to be read in.
506 delims: An optional string of two characters giving the first and last
507 character to be printed. Defaults to "{}".
509 Returns:
510 A formatted string.
513 rstr = delims[0]
514 for v in data:
515 rstr += str(v) + ": " + str(data[v]) + ", "
516 rstr = rstr.strip(", ")
517 rstr += delims[1]
518 return rstr
520 writetype_funcs = {float: write_float, dict: write_dict, int: str, bool: write_bool, str: string.strip, tuple: write_tuple, np.uint : str}