2 Routines for reading PDML produced from TShark.
4 Copyright (c) 2003, 2013 by Gilbert Ramirez <gram@alumni.rice.edu>
6 SPDX-License-Identifier: GPL-2.0-or-later
11 from xml
.sax
.saxutils
import quoteattr
12 import cStringIO
as StringIO
17 class FoundItException(Exception):
18 """Used internally for exiting a tree search"""
22 """Holds Packet objects, and has methods for finding
25 def __init__(self
, children
=None):
29 self
.children
= children
31 def __getitem__(self
, index
):
32 """We act like a list."""
33 return self
.children
[index
]
36 return len(self
.children
)
38 def item_exists(self
, name
):
39 """Does an item with name 'name' exist in this
40 PacketList? Returns True or False."""
41 for child
in self
.children
:
42 if child
.name
== name
:
46 for child
in self
.children
:
47 child
._item
_exists
(name
)
49 except FoundItException
:
54 def _item_exists(self
, name
):
55 for child
in self
.children
:
56 if child
.name
== name
:
57 raise FoundItException
58 child
._item
_exists
(name
)
61 def get_items(self
, name
, items
=None):
62 """Return all items that match the name 'name'.
63 They are returned in order of a depth-first-search."""
70 for child
in self
.children
:
71 if child
.name
== name
:
73 child
.get_items(name
, items
)
76 return PacketList(items
)
78 def get_items_before(self
, name
, before_item
, items
=None):
79 """Return all items that match the name 'name' that
80 exist before the before_item. The before_item is an object.
81 They results are returned in order of a depth-first-search.
82 This function allows you to find fields from protocols that occur
83 before other protocols. For example, if you have an HTTP
84 protocol, you can find all tcp.dstport fields *before* that HTTP
85 protocol. This helps analyze in the presence of tunneled protocols."""
92 for child
in self
.children
:
93 if top_level
== 1 and child
== before_item
:
95 if child
.name
== name
:
97 # Call get_items because the 'before_item' applies
98 # only to the top level search.
99 child
.get_items(name
, items
)
102 return PacketList(items
)
105 class ProtoTreeItem(PacketList
):
106 def __init__(self
, xmlattrs
):
107 PacketList
.__init
__(self
)
109 self
.name
= xmlattrs
.get("name", "")
110 self
.showname
= xmlattrs
.get("showname", "")
111 self
.pos
= xmlattrs
.get("pos", "")
112 self
.size
= xmlattrs
.get("size", "")
113 self
.value
= xmlattrs
.get("value", "")
114 self
.show
= xmlattrs
.get("show", "")
115 self
.hide
= xmlattrs
.get("hide", "")
117 def add_child(self
, child
):
118 self
.children
.append(child
)
123 def get_showname(self
):
141 def dump(self
, fh
=sys
.stdout
):
143 print >> fh
, " name=%s" % (quoteattr(self
.name
),),
146 print >> fh
, "showname=%s" % (quoteattr(self
.showname
),),
149 print >> fh
, "pos=%s" % (quoteattr(self
.pos
),),
152 print >> fh
, "size=%s" % (quoteattr(self
.size
),),
155 print >> fh
, "value=%s" % (quoteattr(self
.value
),),
158 print >> fh
, "show=%s" % (quoteattr(self
.show
),),
161 print >> fh
, "hide=%s" % (quoteattr(self
.hide
),),
163 class Packet(ProtoTreeItem
, PacketList
):
164 def dump(self
, fh
=sys
.stdout
, indent
=0):
165 print >> fh
, " " * indent
, "<packet>"
167 for child
in self
.children
:
168 child
.dump(fh
, indent
)
169 print >> fh
, " " * indent
, "</packet>"
172 class Protocol(ProtoTreeItem
):
174 def dump(self
, fh
=sys
.stdout
, indent
=0):
175 print >> fh
, "%s<proto " % (" " * indent
,),
177 ProtoTreeItem
.dump(self
, fh
)
182 for child
in self
.children
:
183 child
.dump(fh
, indent
)
184 print >> fh
, " " * indent
, "</proto>"
187 class Field(ProtoTreeItem
):
189 def dump(self
, fh
=sys
.stdout
, indent
=0):
190 print >> fh
, "%s<field " % (" " * indent
,),
192 ProtoTreeItem
.dump(self
, fh
)
197 for child
in self
.children
:
198 child
.dump(fh
, indent
)
199 print >> fh
, " " * indent
, "</field>"
205 class ParseXML(xml
.sax
.handler
.ContentHandler
):
207 ELEMENT_FILE
= "pdml"
208 ELEMENT_FRAME
= "packet"
209 ELEMENT_PROTOCOL
= "proto"
210 ELEMENT_FIELD
= "field"
212 def __init__(self
, cb
):
215 self
.element_stack
= []
217 def startElement(self
, name
, xmlattrs
):
220 if name
== self
.ELEMENT_FILE
:
221 # Eventually, we should check version number of pdml here
224 elif name
== self
.ELEMENT_FRAME
:
225 elem
= Packet(xmlattrs
)
227 elif name
== self
.ELEMENT_PROTOCOL
:
228 elem
= Protocol(xmlattrs
)
230 elif name
== self
.ELEMENT_FIELD
:
231 elem
= Field(xmlattrs
)
234 sys
.exit("Unknown element: %s" % (name
,))
236 self
.element_stack
.append(elem
)
239 def endElement(self
, name
):
240 elem
= self
.element_stack
.pop()
242 # if isinstance(elem, Field):
243 # if elem.get_name() == "frame.number":
244 # print >> sys.stderr, "Packet:", elem.get_show()
246 # Add element as child to previous element as long
247 # as there is more than 1 element in the stack. Only
248 # one element in the stack means that the element in
249 # the stack is the single CaptureFile element, and we don't
250 # want to add this element to that, as we only want one
251 # Packet element in memory at a time.
252 if len(self
.element_stack
) > 1:
253 parent_elem
= self
.element_stack
[-1]
254 parent_elem
.add_child(elem
)
258 # If we just finished a Packet element, hand it to the
260 if isinstance(elem
, Packet
):
263 def characters(self
, chars
):
264 self
.chars
= self
.chars
+ chars
267 def _create_parser(cb
):
268 """Internal function for setting up the SAX parser."""
271 parser
= xml
.sax
.make_parser()
274 handler
= ParseXML(cb
)
276 # Tell the parser to use our handler
277 parser
.setContentHandler(handler
)
279 # Don't fetch the DTD, in case it is listed
280 parser
.setFeature(xml
.sax
.handler
.feature_external_ges
, False)
284 def parse_fh(fh
, cb
):
285 """Parse a PDML file, given filehandle, and call the callback function (cb),
286 once for each Packet object."""
288 parser
= _create_parser(cb
)
293 # Close the parser ; this is erroring out, but I'm not sure why.
296 def parse_string(text
, cb
):
297 """Parse the PDML contained in a string."""
298 stream
= StringIO
.StringIO(text
)
307 filename
= sys
.argv
[1]
308 fh
= open(filename
, "r")
309 parse_fh(fh
, test_cb
)
311 if __name__
== '__main__':