Import imapsplit-1.0RC1
[imapsplit.git] / imapsplit.py
blobd456f4649f83258cb0a61dd866f6cc2909b20aed
1 #!/usr/bin/env python2.4
3 __version__ = "1.0RC1"
4 __author__ = "Nedko Arnaudov (mailto:nedko@arnaudov.name)"
5 __copyright__ = "(C) 2005 Nedko Arnaudov. GNU GPL 2."
7 print "===================="
8 print "imapsplit version %s %s" % (__version__, __copyright__)
10 #defaults
11 imap_servers = []
12 debug_fetch_parser = False
13 dump_fetch_result_flag = False
14 dump_header_flag = True
16 import getpass, imaplib, types, re, quopri, base64, email.Parser, email.Header, locale
18 out_charset = locale.getpreferredencoding()
19 if out_charset == None:
20 out_charset = "utf-8"
22 #print("Using charset %s for stdout" % out_charset)
24 def printex(str):
25 try:
26 print str.encode(out_charset, "replace")
27 except UnicodeDecodeError, e:
28 print "cannot print: %s" % e
29 except:
30 print "cannot print"
32 def decode_rfc1252(data):
33 debug = 0
34 i = 0
35 result = ""
36 while i < len(data):
37 if i + 1 < len(data) and data[i:i+2] == "=?":
38 if debug >= 2: printex("Found =? mark")
39 m = re.match(r"=\?([A-Za-z0-9-]+)\?([QqBb])\?([^?]+)\?=", data[i:])
40 if m != None:
41 if debug >= 1: printex("----------------------------------")
42 if debug >= 1: printex("Found rfc1522 \"encoded-word\"")
43 if debug >= 1: printex("Charset: %s" % m.group(1))
44 if m.group(2) == "B" or m.group(2) == "b":
45 if debug >= 1: printex("Encoding: BASE64")
46 if debug >= 1: printex("BASE64 data: \"%s\"" % m.group(3))
47 decoded_data = base64.b64decode(m.group(3))
48 if debug >= 2:
49 printex("Decoded data:")
50 printex("----------------")
51 for c in decoded_data:
52 printex("0x%X" % ord(c))
53 printex("----------------")
54 decoded_data_string = unicode(decoded_data, m.group(1))
55 if debug >= 2:
56 printex("Object: %s (%s)" % (repr(decoded_data), repr(type(decoded_data))))
57 printex("Object: %s (%s)" % (repr(decoded_data_string), repr(type(decoded_data_string))))
58 if debug >= 1: printex("Decoded data: \"%s\"" % decoded_data_string)
59 result += decoded_data_string
60 elif m.group(2) == "Q" or m.group(2) == "q":
61 if debug >= 1: printex("Encoding: Quoted-Printable")
62 if debug >= 1: printex("Quoted-Printable data: \"%s\"" % m.group(3))
63 decoded_data = quopri.decodestring(m.group(3), True)
64 if debug >= 2:
65 printex("Decoded data:")
66 printex("----------------")
67 for c in decoded_data:
68 printex("0x%X" % ord(c))
69 printex("----------------")
70 if debug >= 1: printex("Decoded data: \"%s\"" % unicode(decoded_data, m.group(1)))
71 decoded_data_string = unicode(decoded_data, m.group(1))
72 if debug >= 2:
73 printex("Object: %s (%s)" % (repr(decoded_data), repr(type(decoded_data))))
74 printex("Object: %s (%s)" % (repr(decoded_data_string), repr(type(decoded_data_string))))
75 if debug >= 1: printex("Decoded data: \"%s\"" % decoded_data_string)
76 result += decoded_data_string
77 else:
78 printex(u"Unknown encoding \"%s\"" % m.group(2))
79 sys.exit(1)
80 i += 2 + len(m.group(1)) + 1 + len(m.group(2)) + 1 + len(m.group(3)) + 2
81 if debug >= 1: printex("----------------------------------")
82 continue
83 result += data[i].encode('utf-8', 'replace')
84 i += 1
85 return result
87 def parse_imap(data):
88 result = []
89 if debug_fetch_parser: printex("Parsing %s" % repr(data))
90 i = 0
91 while i < len(data):
92 if debug_fetch_parser: printex("%u" % i)
93 if data[i].isspace():
94 if debug_fetch_parser: printex("Ignoring whitespace")
95 i += 1
96 elif data[i].isdigit():
97 if debug_fetch_parser: printex("Found number")
98 m = re.match(r"(\d+)", data[i:])
99 result.append(('number', m.group(1)))
100 i += len(m.group(1))
101 elif data[i] == "{":
102 if debug_fetch_parser: printex("Found string literal")
103 m = re.match(r"{(\d+)}", data[i:])
104 i += len(m.group(1))+2
105 size = int(m.group(1))
106 result.append(('string', data[i:i+size]))
107 i += size
108 elif data[i] == "\"":
109 if debug_fetch_parser: printex("Found quoted string")
110 m = re.match(r"\"([^\"]+)\"", data[i:])
111 result.append(('string', m.group(1)))
112 i += len(m.group(1))+2
113 elif data[i] == "(":
114 if debug_fetch_parser: printex("Found list")
115 j = i+1
116 nest = 0
117 while j < len(data):
118 if data[j] == ")":
119 if (nest == 0):
120 break
121 nest -= 1
122 elif data[j] == "(":
123 nest += 1
124 elif data[j] == "{":
125 m = re.match(r"{(\d+)}", data[j:])
126 j += len(m.group(1))+2
127 j += int(m.group(1))
128 continue
129 j += 1
130 result.append(('list', parse_imap(data[i+1:j])))
131 i = j+1
132 elif data[i] == "\\":
133 if debug_fetch_parser: printex("Found flag")
134 m = re.match(r"([A-Za-z]+)", data[i+1:])
135 result.append(('flag', m.group(1)))
136 i += len(m.group(1))+1
137 else:
138 if debug_fetch_parser: printex("Found identifier")
139 m = re.match(r"([A-Za-z0-9_.-]+)", data[i:])
140 result.append(('identifier', m.group(1)))
141 i += len(m.group(1))
142 return result
144 def parse_fetch_response(data):
145 s = ""
146 for part in data:
147 if (type(part) == types.StringType or type(part) == types.UnicodeType):
148 s += part
149 elif (type(part) == types.TupleType):
150 for tuple_part in part:
151 s += tuple_part
152 else:
153 return None
155 stage1 = parse_imap(s)
157 # printex('------- stage1 --------')
158 # printex("%s" % (repr(stage1)))
159 # printex('-----------------------')
161 result = {}
162 if (stage1[0][0] != 'number'):
163 return None
164 result['sequence_number'] = stage1[0][1]
165 if (stage1[1][0] != 'list'):
166 return None
167 stage2 = stage1[1][1]
168 i = 0
169 while i+1 < len(stage2):
170 # printex("%s" % repr(stage2[i]))
171 if stage2[i][0] != 'identifier':
172 return None
173 # printex("%s" % repr(stage2[i+1]))
174 if stage2[i][1] == 'FLAGS':
175 result[stage2[i][1]] = []
176 for f in stage2[i+1][1]:
177 if f[0] != 'flag':
178 return None
179 result[stage2[i][1]].append(f[1])
180 else:
181 result[stage2[i][1]] = stage2[i+1][1]
182 i += 2
183 return result
185 def print_sequence_number(result):
186 if result.has_key('sequence_number'):
187 printex("sequence_number: %s" % result['sequence_number'])
189 def print_uid(result):
190 if result.has_key('UID'):
191 printex("UID: %s" % result['UID'])
193 def print_flags(result):
194 if result.has_key('FLAGS'):
195 printex("FLAGS: %s" % repr(result['FLAGS']))
197 def print_internal_date(result):
198 if result.has_key('INTERNALDATE'):
199 printex("INTERNALDATE: %s" % result['INTERNALDATE'])
201 def print_header(result):
202 if result.has_key('RFC822.HEADER'):
203 printex('Header:')
204 printex('-----------------------')
205 printex(result['RFC822.HEADER'])
206 printex('-----------------------')
208 def print_body(result):
209 if result.has_key('RFC822.TEXT'):
210 printex('Body:')
211 printex('-----------------------')
212 printex(result['RFC822.TEXT'])
213 printex('-----------------------')
215 def dump_fetch_result(result):
216 printex('-----------------------')
217 for k,v in result.iteritems():
218 printex("'%s': '%s'" % (k,repr(v)))
219 printex('-----------------------')
221 print_sequence_number(result)
222 print_uid(result)
223 print_flags(result)
224 print_internal_date(result)
225 print_header(result)
226 print_body(result)
227 return
229 def unfold_rfc822(data):
230 result = ""
231 i = 0
232 while i < len(data):
233 if i + 2 < len(data) and data[i] == '\r' and data[i+1] == '\n' and (data[i+2] == ' ' or data[i+2] == '\t'):
234 i += 3
235 while data[i] == ' ' or data[i] == '\t':
236 i += 1
237 continue
238 result += data[i]
239 i += 1
240 return result
242 def dump_header(header):
243 msg = email.Parser.Parser().parsestr(header)
244 if msg.has_key("Subject"):
245 field = msg.get("Subject")
246 field = unfold_rfc822(field)
247 field = decode_rfc1252(field)
248 printex("Subject: \"%s\"" % field)
249 if msg.has_key("From"):
250 field = msg.get("From")
251 field = unfold_rfc822(field)
252 field = decode_rfc1252(field)
253 printex("From: %s" % field)
254 if msg.has_key("List-Id"):
255 field = msg.get("List-Id")
256 field = unfold_rfc822(field)
257 field = decode_rfc1252(field)
258 printex("List-Id: \"%s\"" % field)
260 def fetch_header(uid):
261 printex('... Fetching header of message with UID %s ...' % uid)
262 # fetch_request = '(RFC822.SIZE INTERNALDATE FLAGS RFC822.HEADER RFC822.TEXT)'
263 fetch_request = '(RFC822.HEADER)'
264 status, data = M.uid('fetch', uid, fetch_request)
265 if status != "OK":
266 printex('Cannot fetch message with UID %s [%s]' % (uid, status))
267 return None
269 result = parse_fetch_response(data)
270 if result == None:
271 printex('Cannot parse fetch command response for message with UID %s' % uid)
272 return None
274 if dump_fetch_result_flag: dump_fetch_result(result)
276 if not result.has_key('RFC822.HEADER'):
277 printex('Server has not returned header we requested for message with UID %s' % uid)
278 return None
280 if dump_header_flag:
281 try:
282 dump_header(result['RFC822.HEADER'])
283 except:
284 printex("Cannot dump header")
286 return result['RFC822.HEADER']
288 def find_target(uid):
289 header = None
291 index = 0
292 for rule in split_rules:
293 # printex("Checking rule %u" % index)
294 if not rule.has_key('target'):
295 printex('Skipping rule %u because it has no target' % index)
296 return None
297 if not rule.has_key('match'):
298 printex('Skipping rule %u because it has no match method' % index)
299 return None
300 if rule['match'] == 'always':
301 return rule['target']
303 if not rule.has_key('match_regexp'):
304 printex('Skipping rule %u because it has no match regexp' % index)
305 return None
306 if rule['match'] == 'header':
307 if header == None:
308 header = fetch_header(uid)
309 if header == None:
310 printex("Ignoring message with UID %s because cannot fetch its header from server" % uid)
311 return None
313 m = re.search(rule['match_regexp'], header.replace('\r\n', '\n'), re.MULTILINE)
314 if m != None:
315 return rule['target']
316 else:
317 printex('Skipping rule %u because of unknown match method' % index)
318 return None
319 index += 1
321 def move_message(uid, target):
322 printex('Moving message with UID %s to %s ...' % (uid, target))
323 if disable_move: return
324 printex('Copying message with UID %s to %s ...' % (uid, target))
325 status, data = M.uid('copy', uid, target)
326 if status != "OK":
327 printex('Cannot copy message with UID %s to %s [%s]' % (uid, target, status + " " + data[0]))
328 if status == 'NO' and re.match(r"^\[TRYCREATE\]", data[0]) != None:
329 printex('Trying to create mailbox %s ...' % target)
330 status, data = M.create(target)
331 if status != "OK":
332 printex('Cannot create mailbox %s [%s]' % (target, status + " " + data[0]))
333 printex('Retried copy of message with UID %s to %s ...' % (uid, target))
334 status, data = M.uid('copy', uid, target)
335 if status != "OK":
336 printex('Cannot copy message with UID %s to %s [%s]' % (uid, target, status + " " + data[0]))
337 return False
338 else:
339 return False
340 printex('Marking message with UID %s as deleted ...' % uid)
341 status, data = M.uid('store', uid, '+FLAGS', '(\Deleted)')
342 if status != "OK":
343 printex('Cannot set deleted flag to message with UID %s [%s]' % (uid, status))
344 return False
345 return True
347 # Read config file
348 import sys, os
349 old_path = sys.path
350 sys.path = [os.environ['HOME'] + "/.imapsplit"]
351 from config import *
352 sys.path = old_path
354 disable_move = False
356 for server in imap_servers:
357 printex("-------")
358 split_rules = server['split_rules']
360 deleted_count = 0
362 hostname = server['host'];
363 user = server['user'];
364 if server['type'] == 'imaps':
365 port = 993
366 if server.has_key('port'):
367 port = server['port']
368 M = imaplib.IMAP4_SSL(hostname, port)
369 elif server['type'] == 'imap':
370 port = 143
371 if server.has_key('port'):
372 port = server['port']
373 M = imaplib.IMAP4(hostname, port)
374 else:
375 printex("Unknown server type \"%s\"" % server['type'])
376 continue
378 printex("Splitting on %s://%s@%s:%u :" % (server['type'], user, hostname, port))
380 # printex("Protocol: " + M.PROTOCOL_VERSION)
382 if server.has_key('password'):
383 password = server['password']
384 else:
385 password = getpass.getpass("Password for %s://%s@%s:%u : " % (server['type'], user, hostname, port))
386 M.login(user, password)
388 M.select()
389 status, data = M.uid('search', None, 'ALL')
391 if len(data[0].split()) == 0:
392 printex("no new messages")
393 else:
394 printex("%u new message(s)" % len(data[0].split()))
396 for uid in data[0].split():
397 printex('==========================================================================================')
398 printex('... Processing message with UID %s ...' % uid)
399 target = find_target(uid)
400 if (target == None):
401 printex('Ignoring message with UID %s because no split rule matched.' % uid)
402 else:
403 if move_message(uid, target):
404 deleted_count += 1
406 if len(data[0].split()) != 0:
407 printex('==========================================================================================')
409 if deleted_count > 0:
410 printex('Expunging ...')
411 M.expunge()
413 M.close()
415 M.logout()