Added 'list_only' option (and modified 'run()' to respect it).
[python/dscho.git] / Lib / pickle.py
blob7164eb1e395462b3e9046ad9e2719fe9abf95bc6
1 """create portable serialized representations of Python objects.
3 See module cPickle for a (much) faster implementation.
4 See module copy_reg for a mechanism for registering custom picklers.
6 Classes:
8 Pickler
9 Unpickler
11 Functions:
13 dump(object, file)
14 dumps(object) -> string
15 load(file) -> object
16 loads(string) -> object
18 Misc variables:
20 __version__
21 format_version
22 compatible_formats
24 """
26 __version__ = "$Revision$" # Code version
28 from types import *
29 from copy_reg import dispatch_table, safe_constructors
30 import string
31 import marshal
32 import sys
33 import struct
35 format_version = "1.3" # File format version we write
36 compatible_formats = ["1.0", "1.1", "1.2"] # Old format versions we can read
38 mdumps = marshal.dumps
39 mloads = marshal.loads
41 PicklingError = "pickle.PicklingError"
42 UnpicklingError = "pickle.UnpicklingError"
44 try:
45 from org.python.core import PyStringMap
46 except ImportError:
47 PyStringMap = None
49 MARK = '('
50 STOP = '.'
51 POP = '0'
52 POP_MARK = '1'
53 DUP = '2'
54 FLOAT = 'F'
55 INT = 'I'
56 BININT = 'J'
57 BININT1 = 'K'
58 LONG = 'L'
59 BININT2 = 'M'
60 NONE = 'N'
61 PERSID = 'P'
62 BINPERSID = 'Q'
63 REDUCE = 'R'
64 STRING = 'S'
65 BINSTRING = 'T'
66 SHORT_BINSTRING = 'U'
67 APPEND = 'a'
68 BUILD = 'b'
69 GLOBAL = 'c'
70 DICT = 'd'
71 EMPTY_DICT = '}'
72 APPENDS = 'e'
73 GET = 'g'
74 BINGET = 'h'
75 INST = 'i'
76 LONG_BINGET = 'j'
77 LIST = 'l'
78 EMPTY_LIST = ']'
79 OBJ = 'o'
80 PUT = 'p'
81 BINPUT = 'q'
82 LONG_BINPUT = 'r'
83 SETITEM = 's'
84 TUPLE = 't'
85 EMPTY_TUPLE = ')'
86 SETITEMS = 'u'
87 BINFLOAT = 'G'
89 class Pickler:
91 def __init__(self, file, bin = 0):
92 self.write = file.write
93 self.memo = {}
94 self.bin = bin
96 def dump(self, object):
97 self.save(object)
98 self.write(STOP)
100 def put(self, i):
101 if (self.bin):
102 s = mdumps(i)[1:]
103 if (i < 256):
104 return BINPUT + s[0]
106 return LONG_BINPUT + s
108 return PUT + `i` + '\n'
110 def get(self, i):
111 if (self.bin):
112 s = mdumps(i)[1:]
114 if (i < 256):
115 return BINGET + s[0]
117 return LONG_BINGET + s
119 return GET + `i` + '\n'
121 def save(self, object, pers_save = 0):
122 memo = self.memo
124 if (not pers_save):
125 pid = self.persistent_id(object)
126 if (pid is not None):
127 self.save_pers(pid)
128 return
130 d = id(object)
132 t = type(object)
134 if ((t is TupleType) and (len(object) == 0)):
135 if (self.bin):
136 self.save_empty_tuple(object)
137 else:
138 self.save_tuple(object)
139 return
141 if memo.has_key(d):
142 self.write(self.get(memo[d][0]))
143 return
145 try:
146 f = self.dispatch[t]
147 except KeyError:
148 pid = self.inst_persistent_id(object)
149 if pid is not None:
150 self.save_pers(pid)
151 return
153 try:
154 reduce = dispatch_table[t]
155 except KeyError:
156 try:
157 reduce = object.__reduce__
158 except AttributeError:
159 raise PicklingError, \
160 "can't pickle %s objects" % `t.__name__`
161 else:
162 tup = reduce()
163 else:
164 tup = reduce(object)
166 if type(tup) is StringType:
167 self.save_global(object, tup)
168 return
170 if (type(tup) is not TupleType):
171 raise PicklingError, "Value returned by %s must be a " \
172 "tuple" % reduce
174 l = len(tup)
176 if ((l != 2) and (l != 3)):
177 raise PicklingError, "tuple returned by %s must contain " \
178 "only two or three elements" % reduce
180 callable = tup[0]
181 arg_tup = tup[1]
183 if (l > 2):
184 state = tup[2]
185 else:
186 state = None
188 if type(arg_tup) is not TupleType and arg_tup is not None:
189 raise PicklingError, "Second element of tuple returned " \
190 "by %s must be a tuple" % reduce
192 self.save_reduce(callable, arg_tup, state)
193 memo_len = len(memo)
194 self.write(self.put(memo_len))
195 memo[d] = (memo_len, object)
196 return
198 f(self, object)
200 def persistent_id(self, object):
201 return None
203 def inst_persistent_id(self, object):
204 return None
206 def save_pers(self, pid):
207 if (not self.bin):
208 self.write(PERSID + str(pid) + '\n')
209 else:
210 self.save(pid, 1)
211 self.write(BINPERSID)
213 def save_reduce(self, callable, arg_tup, state = None):
214 write = self.write
215 save = self.save
217 save(callable)
218 save(arg_tup)
219 write(REDUCE)
221 if (state is not None):
222 save(state)
223 write(BUILD)
225 dispatch = {}
227 def save_none(self, object):
228 self.write(NONE)
229 dispatch[NoneType] = save_none
231 def save_int(self, object):
232 if (self.bin):
233 i = mdumps(object)[1:]
234 if (i[-2:] == '\000\000'):
235 if (i[-3] == '\000'):
236 self.write(BININT1 + i[:-3])
237 return
239 self.write(BININT2 + i[:-2])
240 return
242 self.write(BININT + i)
243 else:
244 self.write(INT + `object` + '\n')
245 dispatch[IntType] = save_int
247 def save_long(self, object):
248 self.write(LONG + `object` + '\n')
249 dispatch[LongType] = save_long
251 def save_float(self, object, pack=struct.pack):
252 if self.bin:
253 self.write(BINFLOAT + pack('>d', object))
254 else:
255 self.write(FLOAT + `object` + '\n')
256 dispatch[FloatType] = save_float
258 def save_string(self, object):
259 d = id(object)
260 memo = self.memo
262 if (self.bin):
263 l = len(object)
264 s = mdumps(l)[1:]
265 if (l < 256):
266 self.write(SHORT_BINSTRING + s[0] + object)
267 else:
268 self.write(BINSTRING + s + object)
269 else:
270 self.write(STRING + `object` + '\n')
272 memo_len = len(memo)
273 self.write(self.put(memo_len))
274 memo[d] = (memo_len, object)
275 dispatch[StringType] = save_string
277 def save_tuple(self, object):
279 write = self.write
280 save = self.save
281 memo = self.memo
283 d = id(object)
285 write(MARK)
287 for element in object:
288 save(element)
290 if (len(object) and memo.has_key(d)):
291 if (self.bin):
292 write(POP_MARK + self.get(memo[d][0]))
293 return
295 write(POP * (len(object) + 1) + self.get(memo[d][0]))
296 return
298 memo_len = len(memo)
299 self.write(TUPLE + self.put(memo_len))
300 memo[d] = (memo_len, object)
301 dispatch[TupleType] = save_tuple
303 def save_empty_tuple(self, object):
304 self.write(EMPTY_TUPLE)
306 def save_list(self, object):
307 d = id(object)
309 write = self.write
310 save = self.save
311 memo = self.memo
313 if (self.bin):
314 write(EMPTY_LIST)
315 else:
316 write(MARK + LIST)
318 memo_len = len(memo)
319 write(self.put(memo_len))
320 memo[d] = (memo_len, object)
322 using_appends = (self.bin and (len(object) > 1))
324 if (using_appends):
325 write(MARK)
327 for element in object:
328 save(element)
330 if (not using_appends):
331 write(APPEND)
333 if (using_appends):
334 write(APPENDS)
335 dispatch[ListType] = save_list
337 def save_dict(self, object):
338 d = id(object)
340 write = self.write
341 save = self.save
342 memo = self.memo
344 if (self.bin):
345 write(EMPTY_DICT)
346 else:
347 write(MARK + DICT)
349 memo_len = len(memo)
350 self.write(self.put(memo_len))
351 memo[d] = (memo_len, object)
353 using_setitems = (self.bin and (len(object) > 1))
355 if (using_setitems):
356 write(MARK)
358 items = object.items()
359 for key, value in items:
360 save(key)
361 save(value)
363 if (not using_setitems):
364 write(SETITEM)
366 if (using_setitems):
367 write(SETITEMS)
369 dispatch[DictionaryType] = save_dict
370 if not PyStringMap is None:
371 dispatch[PyStringMap] = save_dict
373 def save_inst(self, object):
374 d = id(object)
375 cls = object.__class__
377 memo = self.memo
378 write = self.write
379 save = self.save
381 if hasattr(object, '__getinitargs__'):
382 args = object.__getinitargs__()
383 len(args) # XXX Assert it's a sequence
384 _keep_alive(args, memo)
385 else:
386 args = ()
388 write(MARK)
390 if (self.bin):
391 save(cls)
393 for arg in args:
394 save(arg)
396 memo_len = len(memo)
397 if (self.bin):
398 write(OBJ + self.put(memo_len))
399 else:
400 write(INST + cls.__module__ + '\n' + cls.__name__ + '\n' +
401 self.put(memo_len))
403 memo[d] = (memo_len, object)
405 try:
406 getstate = object.__getstate__
407 except AttributeError:
408 stuff = object.__dict__
409 else:
410 stuff = getstate()
411 _keep_alive(stuff, memo)
412 save(stuff)
413 write(BUILD)
414 dispatch[InstanceType] = save_inst
416 def save_global(self, object, name = None):
417 write = self.write
418 memo = self.memo
420 if (name is None):
421 name = object.__name__
423 try:
424 module = object.__module__
425 except AttributeError:
426 module = whichmodule(object, name)
428 memo_len = len(memo)
429 write(GLOBAL + module + '\n' + name + '\n' +
430 self.put(memo_len))
431 memo[id(object)] = (memo_len, object)
432 dispatch[ClassType] = save_global
433 dispatch[FunctionType] = save_global
434 dispatch[BuiltinFunctionType] = save_global
437 def _keep_alive(x, memo):
438 """Keeps a reference to the object x in the memo.
440 Because we remember objects by their id, we have
441 to assure that possibly temporary objects are kept
442 alive by referencing them.
443 We store a reference at the id of the memo, which should
444 normally not be used unless someone tries to deepcopy
445 the memo itself...
447 try:
448 memo[id(memo)].append(x)
449 except KeyError:
450 # aha, this is the first one :-)
451 memo[id(memo)]=[x]
454 classmap = {}
456 # This is no longer used to find classes, but still for functions
457 def whichmodule(cls, clsname):
458 """Figure out the module in which a class occurs.
460 Search sys.modules for the module.
461 Cache in classmap.
462 Return a module name.
463 If the class cannot be found, return __main__.
465 if classmap.has_key(cls):
466 return classmap[cls]
468 for name, module in sys.modules.items():
469 if name != '__main__' and \
470 hasattr(module, clsname) and \
471 getattr(module, clsname) is cls:
472 break
473 else:
474 name = '__main__'
475 classmap[cls] = name
476 return name
479 class Unpickler:
481 def __init__(self, file):
482 self.readline = file.readline
483 self.read = file.read
484 self.memo = {}
486 def load(self):
487 self.mark = ['spam'] # Any new unique object
488 self.stack = []
489 self.append = self.stack.append
490 read = self.read
491 dispatch = self.dispatch
492 try:
493 while 1:
494 key = read(1)
495 dispatch[key](self)
496 except STOP, value:
497 return value
499 def marker(self):
500 stack = self.stack
501 mark = self.mark
502 k = len(stack)-1
503 while stack[k] is not mark: k = k-1
504 return k
506 dispatch = {}
508 def load_eof(self):
509 raise EOFError
510 dispatch[''] = load_eof
512 def load_persid(self):
513 pid = self.readline()[:-1]
514 self.append(self.persistent_load(pid))
515 dispatch[PERSID] = load_persid
517 def load_binpersid(self):
518 stack = self.stack
520 pid = stack[-1]
521 del stack[-1]
523 self.append(self.persistent_load(pid))
524 dispatch[BINPERSID] = load_binpersid
526 def load_none(self):
527 self.append(None)
528 dispatch[NONE] = load_none
530 def load_int(self):
531 self.append(string.atoi(self.readline()[:-1]))
532 dispatch[INT] = load_int
534 def load_binint(self):
535 self.append(mloads('i' + self.read(4)))
536 dispatch[BININT] = load_binint
538 def load_binint1(self):
539 self.append(mloads('i' + self.read(1) + '\000\000\000'))
540 dispatch[BININT1] = load_binint1
542 def load_binint2(self):
543 self.append(mloads('i' + self.read(2) + '\000\000'))
544 dispatch[BININT2] = load_binint2
546 def load_long(self):
547 self.append(string.atol(self.readline()[:-1], 0))
548 dispatch[LONG] = load_long
550 def load_float(self):
551 self.append(string.atof(self.readline()[:-1]))
552 dispatch[FLOAT] = load_float
554 def load_binfloat(self, unpack=struct.unpack):
555 self.append(unpack('>d', self.read(8))[0])
556 dispatch[BINFLOAT] = load_binfloat
558 def load_string(self):
559 self.append(eval(self.readline()[:-1],
560 {'__builtins__': {}})) # Let's be careful
561 dispatch[STRING] = load_string
563 def load_binstring(self):
564 len = mloads('i' + self.read(4))
565 self.append(self.read(len))
566 dispatch[BINSTRING] = load_binstring
568 def load_short_binstring(self):
569 len = mloads('i' + self.read(1) + '\000\000\000')
570 self.append(self.read(len))
571 dispatch[SHORT_BINSTRING] = load_short_binstring
573 def load_tuple(self):
574 k = self.marker()
575 self.stack[k:] = [tuple(self.stack[k+1:])]
576 dispatch[TUPLE] = load_tuple
578 def load_empty_tuple(self):
579 self.stack.append(())
580 dispatch[EMPTY_TUPLE] = load_empty_tuple
582 def load_empty_list(self):
583 self.stack.append([])
584 dispatch[EMPTY_LIST] = load_empty_list
586 def load_empty_dictionary(self):
587 self.stack.append({})
588 dispatch[EMPTY_DICT] = load_empty_dictionary
590 def load_list(self):
591 k = self.marker()
592 self.stack[k:] = [self.stack[k+1:]]
593 dispatch[LIST] = load_list
595 def load_dict(self):
596 k = self.marker()
597 d = {}
598 items = self.stack[k+1:]
599 for i in range(0, len(items), 2):
600 key = items[i]
601 value = items[i+1]
602 d[key] = value
603 self.stack[k:] = [d]
604 dispatch[DICT] = load_dict
606 def load_inst(self):
607 k = self.marker()
608 args = tuple(self.stack[k+1:])
609 del self.stack[k:]
610 module = self.readline()[:-1]
611 name = self.readline()[:-1]
612 klass = self.find_class(module, name)
613 instantiated = 0
614 if (not args and type(klass) is ClassType and
615 not hasattr(klass, "__getinitargs__")):
616 try:
617 value = _EmptyClass()
618 value.__class__ = klass
619 instantiated = 1
620 except RuntimeError:
621 # In restricted execution, assignment to inst.__class__ is
622 # prohibited
623 pass
624 if not instantiated:
625 try:
626 value = apply(klass, args)
627 except TypeError, err:
628 raise TypeError, "in constructor for %s: %s" % (
629 klass.__name__, str(err)), sys.exc_info()[2]
630 self.append(value)
631 dispatch[INST] = load_inst
633 def load_obj(self):
634 stack = self.stack
635 k = self.marker()
636 klass = stack[k + 1]
637 del stack[k + 1]
638 args = tuple(stack[k + 1:])
639 del stack[k:]
640 instantiated = 0
641 if (not args and type(klass) is ClassType and
642 not hasattr(klass, "__getinitargs__")):
643 try:
644 value = _EmptyClass()
645 value.__class__ = klass
646 instantiated = 1
647 except RuntimeError:
648 # In restricted execution, assignment to inst.__class__ is
649 # prohibited
650 pass
651 if not instantiated:
652 value = apply(klass, args)
653 self.append(value)
654 dispatch[OBJ] = load_obj
656 def load_global(self):
657 module = self.readline()[:-1]
658 name = self.readline()[:-1]
659 klass = self.find_class(module, name)
660 self.append(klass)
661 dispatch[GLOBAL] = load_global
663 def find_class(self, module, name):
664 try:
665 __import__(module)
666 mod = sys.modules[module]
667 klass = getattr(mod, name)
668 except (ImportError, KeyError, AttributeError):
669 raise SystemError, \
670 "Failed to import class %s from module %s" % \
671 (name, module)
672 return klass
674 def load_reduce(self):
675 stack = self.stack
677 callable = stack[-2]
678 arg_tup = stack[-1]
679 del stack[-2:]
681 if type(callable) is not ClassType:
682 if not safe_constructors.has_key(callable):
683 try:
684 safe = callable.__safe_for_unpickling__
685 except AttributeError:
686 safe = None
688 if (not safe):
689 raise UnpicklingError, "%s is not safe for " \
690 "unpickling" % callable
692 if arg_tup is None:
693 value = callable.__basicnew__()
694 else:
695 value = apply(callable, arg_tup)
696 self.append(value)
697 dispatch[REDUCE] = load_reduce
699 def load_pop(self):
700 del self.stack[-1]
701 dispatch[POP] = load_pop
703 def load_pop_mark(self):
704 k = self.marker()
705 del self.stack[k:]
706 dispatch[POP_MARK] = load_pop_mark
708 def load_dup(self):
709 self.append(self.stack[-1])
710 dispatch[DUP] = load_dup
712 def load_get(self):
713 self.append(self.memo[self.readline()[:-1]])
714 dispatch[GET] = load_get
716 def load_binget(self):
717 i = mloads('i' + self.read(1) + '\000\000\000')
718 self.append(self.memo[`i`])
719 dispatch[BINGET] = load_binget
721 def load_long_binget(self):
722 i = mloads('i' + self.read(4))
723 self.append(self.memo[`i`])
724 dispatch[LONG_BINGET] = load_long_binget
726 def load_put(self):
727 self.memo[self.readline()[:-1]] = self.stack[-1]
728 dispatch[PUT] = load_put
730 def load_binput(self):
731 i = mloads('i' + self.read(1) + '\000\000\000')
732 self.memo[`i`] = self.stack[-1]
733 dispatch[BINPUT] = load_binput
735 def load_long_binput(self):
736 i = mloads('i' + self.read(4))
737 self.memo[`i`] = self.stack[-1]
738 dispatch[LONG_BINPUT] = load_long_binput
740 def load_append(self):
741 stack = self.stack
742 value = stack[-1]
743 del stack[-1]
744 list = stack[-1]
745 list.append(value)
746 dispatch[APPEND] = load_append
748 def load_appends(self):
749 stack = self.stack
750 mark = self.marker()
751 list = stack[mark - 1]
752 for i in range(mark + 1, len(stack)):
753 list.append(stack[i])
755 del stack[mark:]
756 dispatch[APPENDS] = load_appends
758 def load_setitem(self):
759 stack = self.stack
760 value = stack[-1]
761 key = stack[-2]
762 del stack[-2:]
763 dict = stack[-1]
764 dict[key] = value
765 dispatch[SETITEM] = load_setitem
767 def load_setitems(self):
768 stack = self.stack
769 mark = self.marker()
770 dict = stack[mark - 1]
771 for i in range(mark + 1, len(stack), 2):
772 dict[stack[i]] = stack[i + 1]
774 del stack[mark:]
775 dispatch[SETITEMS] = load_setitems
777 def load_build(self):
778 stack = self.stack
779 value = stack[-1]
780 del stack[-1]
781 inst = stack[-1]
782 try:
783 setstate = inst.__setstate__
784 except AttributeError:
785 try:
786 inst.__dict__.update(value)
787 except RuntimeError:
788 # XXX In restricted execution, the instance's __dict__ is not
789 # accessible. Use the old way of unpickling the instance
790 # variables. This is a semantic different when unpickling in
791 # restricted vs. unrestricted modes.
792 for k, v in value.items():
793 setattr(inst, k, v)
794 else:
795 setstate(value)
796 dispatch[BUILD] = load_build
798 def load_mark(self):
799 self.append(self.mark)
800 dispatch[MARK] = load_mark
802 def load_stop(self):
803 value = self.stack[-1]
804 del self.stack[-1]
805 raise STOP, value
806 dispatch[STOP] = load_stop
808 # Helper class for load_inst/load_obj
810 class _EmptyClass:
811 pass
813 # Shorthands
815 from StringIO import StringIO
817 def dump(object, file, bin = 0):
818 Pickler(file, bin).dump(object)
820 def dumps(object, bin = 0):
821 file = StringIO()
822 Pickler(file, bin).dump(object)
823 return file.getvalue()
825 def load(file):
826 return Unpickler(file).load()
828 def loads(str):
829 file = StringIO(str)
830 return Unpickler(file).load()
833 # The rest is used for testing only
835 class C:
836 def __cmp__(self, other):
837 return cmp(self.__dict__, other.__dict__)
839 def test():
840 fn = 'out'
841 c = C()
842 c.foo = 1
843 c.bar = 2
844 x = [0, 1, 2, 3]
845 y = ('abc', 'abc', c, c)
846 x.append(y)
847 x.append(y)
848 x.append(5)
849 f = open(fn, 'w')
850 F = Pickler(f)
851 F.dump(x)
852 f.close()
853 f = open(fn, 'r')
854 U = Unpickler(f)
855 x2 = U.load()
856 print x
857 print x2
858 print x == x2
859 print map(id, x)
860 print map(id, x2)
861 print F.memo
862 print U.memo
864 if __name__ == '__main__':
865 test()