1 # RFC-822 message manipulation class.
3 # XXX This is only a very rough sketch of a full RFC-822 parser;
4 # additional methods are needed to parse addresses and dates, and to
5 # tokenize lines according to various other syntax rules.
9 # To create a Message object: first open a file, e.g.:
10 # fp = open(file, 'r')
11 # (or use any other legal way of getting an open file object, e.g. use
12 # sys.stdin or call os.popen()).
13 # Then pass the open file object to the Message() constructor:
16 # To get the text of a particular header there are several methods:
17 # str = m.getheader(name)
18 # str = m.getrawheader(name)
19 # where name is the name of the header, e.g. 'Subject'.
20 # The difference is that getheader() strips the leading and trailing
21 # whitespace, while getrawheader() doesn't. Both functions retain
22 # embedded whitespace (including newlines) exactly as they are
23 # specified in the header, and leave the case of the text unchanged.
25 # See the class definition for lower level access methods.
27 # There are also some utility functions here.
36 # Initialize the class instance and read the headers.
38 def __init__(self
, fp
):
42 self
.startofheaders
= self
.fp
.tell()
44 self
.startofheaders
= None
49 self
.startofbody
= self
.fp
.tell()
51 self
.startofbody
= None
54 # Rewind the file to the start of the body (if seekable).
57 self
.fp
.seek(self
.startofbody
)
60 # Read header lines up to the entirely blank line that
61 # terminates them. The (normally blank) line that ends the
62 # headers is skipped, but not included in the returned list.
63 # If a non-header line ends the headers, (which is an error),
64 # an attempt is made to backspace over it; it is never
65 # included in the returned list.
67 # The variable self.status is set to the empty string if all
68 # went well, otherwise it is an error message.
69 # The variable self.headers is a completely uninterpreted list
70 # of lines contained in the header (so printing them will
71 # reproduce the header exactly as it appears in the file).
73 def readheaders(self
):
74 self
.headers
= list = []
78 line
= self
.fp
.readline()
80 self
.status
= 'EOF in headers'
84 elif headerseen
and line
[0] in ' \t':
85 # It's a continuation line.
87 elif regex
.match('^[!-9;-~]+:', line
):
92 # It's not a header line; stop here.
94 self
.status
= 'No headers'
96 self
.status
= 'Bad header'
97 # Try to undo the read.
99 self
.fp
.seek(-len(line
), 1)
102 self
.status
+ '; bad seek'
106 # Method to determine whether a line is a legal end of
107 # RFC-822 headers. You may override this method if your
108 # application wants to bend the rules, e.g. to accept lines
109 # ending in '\r\n', to strip trailing whitespace, or to
110 # recognise MH template separators ('--------').
112 def islast(self
, line
):
116 # Look through the list of headers and find all lines matching
117 # a given header name (and their continuation lines).
118 # A list of the lines is returned, without interpretation.
119 # If the header does not occur, an empty list is returned.
120 # If the header occurs multiple times, all occurrences are
121 # returned. Case is not important in the header name.
123 def getallmatchingheaders(self
, name
):
124 name
= string
.lower(name
) + ':'
128 for line
in self
.headers
:
129 if string
.lower(line
[:n
]) == name
:
131 elif line
[:1] not in string
.whitespace
:
138 # Similar, but return only the first matching header (and its
139 # continuation lines).
141 def getfirstmatchingheader(self
, name
):
142 name
= string
.lower(name
) + ':'
146 for line
in self
.headers
:
147 if string
.lower(line
[:n
]) == name
:
149 elif line
[:1] not in string
.whitespace
:
157 # A higher-level interface to getfirstmatchingheader().
158 # Return a string containing the literal text of the header
159 # but with the keyword stripped. All leading, trailing and
160 # embedded whitespace is kept in the string, however.
161 # Return None if the header does not occur.
163 def getrawheader(self
, name
):
164 list = self
.getfirstmatchingheader(name
)
167 list[0] = list[0][len(name
) + 1:]
168 return string
.joinfields(list, '')
171 # Going one step further: also strip leading and trailing
174 def getheader(self
, name
):
175 text
= self
.getrawheader(name
)
178 return string
.strip(text
)
181 # XXX The next step would be to define self.getaddr(name)
182 # and self.getaddrlist(name) which would parse a header
183 # consisting of a single mail address and a number of mail
184 # addresses, respectively. Lower level functions would be
185 # parseaddr(string) and parseaddrlist(string).
187 # XXX Similar, there would be a function self.getdate(name) to
188 # return a date in canonical form (perhaps a number compatible
189 # to time.time()) and a function parsedate(string).
191 # XXX The inverses of the parse functions may also be useful.
200 # Remove quotes from a string.
201 # XXX Should fix this to be really conformant.
205 if str[0] == '"' and str[-1:] == '"':
207 if str[0] == '<' and str[-1:] == '>':