1 """Create portable serialized representations of Python objects.
3 See module cPickle for a (much) faster implementation.
4 See module copy_reg for a mechanism for registering custom picklers.
14 dumps(object) -> string
16 loads(string) -> object
26 __version__
= "$Revision$" # Code version
29 from copy_reg
import dispatch_table
, safe_constructors
35 __all__
= ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
36 "Unpickler", "dump", "dumps", "load", "loads"]
38 format_version
= "1.3" # File format version we write
39 compatible_formats
= ["1.0", "1.1", "1.2"] # Old format versions we can read
41 mdumps
= marshal
.dumps
42 mloads
= marshal
.loads
44 class PickleError(Exception): pass
45 class PicklingError(PickleError
): pass
46 class UnpicklingError(PickleError
): pass
48 class _Stop(Exception):
49 def __init__(self
, value
):
53 from org
.python
.core
import PyStringMap
105 __all__
.extend([x
for x
in dir() if re
.match("[A-Z][A-Z0-9_]+$",x
)])
109 def __init__(self
, file, bin
= 0):
110 self
.write
= file.write
114 def dump(self
, object):
124 return LONG_BINPUT
+ s
126 return PUT
+ `i`
+ '\n'
135 return LONG_BINGET
+ s
137 return GET
+ `i`
+ '\n'
139 def save(self
, object, pers_save
= 0):
143 pid
= self
.persistent_id(object)
152 if (t
is TupleType
) and (len(object) == 0):
154 self
.save_empty_tuple(object)
156 self
.save_tuple(object)
160 self
.write(self
.get(memo
[d
][0]))
166 pid
= self
.inst_persistent_id(object)
172 reduce = dispatch_table
[t
]
175 reduce = object.__reduce
__
176 except AttributeError:
177 raise PicklingError
, \
178 "can't pickle %s object: %s" % (`t
.__name
__`
,
185 if type(tup
) is StringType
:
186 self
.save_global(object, tup
)
189 if type(tup
) is not TupleType
:
190 raise PicklingError
, "Value returned by %s must be a " \
195 if (l
!= 2) and (l
!= 3):
196 raise PicklingError
, "tuple returned by %s must contain " \
197 "only two or three elements" % reduce
207 if type(arg_tup
) is not TupleType
and arg_tup
is not None:
208 raise PicklingError
, "Second element of tuple returned " \
209 "by %s must be a tuple" % reduce
211 self
.save_reduce(callable, arg_tup
, state
)
213 self
.write(self
.put(memo_len
))
214 memo
[d
] = (memo_len
, object)
219 def persistent_id(self
, object):
222 def inst_persistent_id(self
, object):
225 def save_pers(self
, pid
):
227 self
.write(PERSID
+ str(pid
) + '\n')
230 self
.write(BINPERSID
)
232 def save_reduce(self
, callable, arg_tup
, state
= None):
240 if state
is not None:
246 def save_none(self
, object):
248 dispatch
[NoneType
] = save_none
250 def save_int(self
, object):
252 # If the int is small enough to fit in a signed 4-byte 2's-comp
253 # format, we can store it more efficiently than the general
255 high_bits
= object >> 31 # note that Python shift sign-extends
256 if high_bits
== 0 or high_bits
== -1:
257 # All high bits are copies of bit 2**31, so the value
258 # fits in a 4-byte signed int.
259 i
= mdumps(object)[1:]
261 if i
[-2:] == '\000\000': # fits in 2-byte unsigned int
262 if i
[-3] == '\000': # fits in 1-byte unsigned int
263 self
.write(BININT1
+ i
[0])
265 self
.write(BININT2
+ i
[:2])
267 self
.write(BININT
+ i
)
269 # Text pickle, or int too big to fit in signed 4-byte format.
270 self
.write(INT
+ `
object`
+ '\n')
271 dispatch
[IntType
] = save_int
273 def save_long(self
, object):
274 self
.write(LONG
+ `
object`
+ '\n')
275 dispatch
[LongType
] = save_long
277 def save_float(self
, object, pack
=struct
.pack
):
279 self
.write(BINFLOAT
+ pack('>d', object))
281 self
.write(FLOAT
+ `
object`
+ '\n')
282 dispatch
[FloatType
] = save_float
284 def save_string(self
, object):
292 self
.write(SHORT_BINSTRING
+ s
[0] + object)
294 self
.write(BINSTRING
+ s
+ object)
296 self
.write(STRING
+ `
object`
+ '\n')
299 self
.write(self
.put(memo_len
))
300 memo
[d
] = (memo_len
, object)
301 dispatch
[StringType
] = save_string
303 def save_unicode(self
, object):
308 encoding
= object.encode('utf-8')
311 self
.write(BINUNICODE
+ s
+ encoding
)
313 object = object.replace("\\", "\\u005c")
314 object = object.replace("\n", "\\u000a")
315 self
.write(UNICODE
+ object.encode('raw-unicode-escape') + '\n')
318 self
.write(self
.put(memo_len
))
319 memo
[d
] = (memo_len
, object)
320 dispatch
[UnicodeType
] = save_unicode
322 if StringType
== UnicodeType
:
323 # This is true for Jython
324 def save_string(self
, object):
327 unicode = object.isunicode()
331 object = object.encode("utf-8")
334 if l
< 256 and not unicode:
335 self
.write(SHORT_BINSTRING
+ s
[0] + object)
338 self
.write(BINUNICODE
+ s
+ object)
340 self
.write(BINSTRING
+ s
+ object)
343 object = object.replace("\\", "\\u005c")
344 object = object.replace("\n", "\\u000a")
345 object = object.encode('raw-unicode-escape')
346 self
.write(UNICODE
+ object + '\n')
348 self
.write(STRING
+ `
object`
+ '\n')
351 self
.write(self
.put(memo_len
))
352 memo
[d
] = (memo_len
, object)
353 dispatch
[StringType
] = save_string
355 def save_tuple(self
, object):
365 for element
in object:
368 if len(object) and memo
.has_key(d
):
370 write(POP_MARK
+ self
.get(memo
[d
][0]))
373 write(POP
* (len(object) + 1) + self
.get(memo
[d
][0]))
377 self
.write(TUPLE
+ self
.put(memo_len
))
378 memo
[d
] = (memo_len
, object)
379 dispatch
[TupleType
] = save_tuple
381 def save_empty_tuple(self
, object):
382 self
.write(EMPTY_TUPLE
)
384 def save_list(self
, object):
397 write(self
.put(memo_len
))
398 memo
[d
] = (memo_len
, object)
400 using_appends
= (self
.bin
and (len(object) > 1))
405 for element
in object:
408 if not using_appends
:
413 dispatch
[ListType
] = save_list
415 def save_dict(self
, object):
428 self
.write(self
.put(memo_len
))
429 memo
[d
] = (memo_len
, object)
431 using_setitems
= (self
.bin
and (len(object) > 1))
436 items
= object.items()
437 for key
, value
in items
:
441 if not using_setitems
:
447 dispatch
[DictionaryType
] = save_dict
448 if not PyStringMap
is None:
449 dispatch
[PyStringMap
] = save_dict
451 def save_inst(self
, object):
453 cls
= object.__class
__
459 if hasattr(object, '__getinitargs__'):
460 args
= object.__getinitargs
__()
461 len(args
) # XXX Assert it's a sequence
462 _keep_alive(args
, memo
)
476 write(OBJ
+ self
.put(memo_len
))
478 write(INST
+ cls
.__module
__ + '\n' + cls
.__name
__ + '\n' +
481 memo
[d
] = (memo_len
, object)
484 getstate
= object.__getstate
__
485 except AttributeError:
486 stuff
= object.__dict
__
489 _keep_alive(stuff
, memo
)
492 dispatch
[InstanceType
] = save_inst
494 def save_global(self
, object, name
= None):
499 name
= object.__name
__
502 module
= object.__module
__
503 except AttributeError:
504 module
= whichmodule(object, name
)
508 mod
= sys
.modules
[module
]
509 klass
= getattr(mod
, name
)
510 except (ImportError, KeyError, AttributeError):
512 "Can't pickle %r: it's not found as %s.%s" %
513 (object, module
, name
))
515 if klass
is not object:
517 "Can't pickle %r: it's not the same object as %s.%s" %
518 (object, module
, name
))
521 write(GLOBAL
+ module
+ '\n' + name
+ '\n' +
523 memo
[id(object)] = (memo_len
, object)
524 dispatch
[ClassType
] = save_global
525 dispatch
[FunctionType
] = save_global
526 dispatch
[BuiltinFunctionType
] = save_global
527 dispatch
[TypeType
] = save_global
530 def _keep_alive(x
, memo
):
531 """Keeps a reference to the object x in the memo.
533 Because we remember objects by their id, we have
534 to assure that possibly temporary objects are kept
535 alive by referencing them.
536 We store a reference at the id of the memo, which should
537 normally not be used unless someone tries to deepcopy
541 memo
[id(memo
)].append(x
)
543 # aha, this is the first one :-)
549 # This is no longer used to find classes, but still for functions
550 def whichmodule(cls
, clsname
):
551 """Figure out the module in which a class occurs.
553 Search sys.modules for the module.
555 Return a module name.
556 If the class cannot be found, return __main__.
558 if classmap
.has_key(cls
):
561 for name
, module
in sys
.modules
.items():
562 if name
!= '__main__' and \
563 hasattr(module
, clsname
) and \
564 getattr(module
, clsname
) is cls
:
574 def __init__(self
, file):
575 self
.readline
= file.readline
576 self
.read
= file.read
580 self
.mark
= object() # any new unique object
582 self
.append
= self
.stack
.append
584 dispatch
= self
.dispatch
589 except _Stop
, stopinst
:
590 return stopinst
.value
596 while stack
[k
] is not mark
: k
= k
-1
603 dispatch
[''] = load_eof
605 def load_persid(self
):
606 pid
= self
.readline()[:-1]
607 self
.append(self
.persistent_load(pid
))
608 dispatch
[PERSID
] = load_persid
610 def load_binpersid(self
):
616 self
.append(self
.persistent_load(pid
))
617 dispatch
[BINPERSID
] = load_binpersid
621 dispatch
[NONE
] = load_none
624 data
= self
.readline()
626 self
.append(int(data
))
628 self
.append(long(data
))
629 dispatch
[INT
] = load_int
631 def load_binint(self
):
632 self
.append(mloads('i' + self
.read(4)))
633 dispatch
[BININT
] = load_binint
635 def load_binint1(self
):
636 self
.append(mloads('i' + self
.read(1) + '\000\000\000'))
637 dispatch
[BININT1
] = load_binint1
639 def load_binint2(self
):
640 self
.append(mloads('i' + self
.read(2) + '\000\000'))
641 dispatch
[BININT2
] = load_binint2
644 self
.append(long(self
.readline()[:-1], 0))
645 dispatch
[LONG
] = load_long
647 def load_float(self
):
648 self
.append(float(self
.readline()[:-1]))
649 dispatch
[FLOAT
] = load_float
651 def load_binfloat(self
, unpack
=struct
.unpack
):
652 self
.append(unpack('>d', self
.read(8))[0])
653 dispatch
[BINFLOAT
] = load_binfloat
655 def load_string(self
):
656 rep
= self
.readline()[:-1]
657 if not self
._is
_string
_secure
(rep
):
658 raise ValueError, "insecure string pickle"
659 self
.append(eval(rep
,
660 {'__builtins__': {}})) # Let's be careful
661 dispatch
[STRING
] = load_string
663 def _is_string_secure(self
, s
):
664 """Return true if s contains a string that is safe to eval
666 The definition of secure string is based on the implementation
667 in cPickle. s is secure as long as it only contains a quoted
668 string and optional trailing whitespace.
671 if q
not in ("'", '"'):
673 # find the closing quote
678 i
= s
.index(q
, offset
)
680 # if there is an error the first time, there is no
686 # check to see if this one is escaped
689 while j
>= offset
and s
[j
] == '\\':
700 def load_binstring(self
):
701 len = mloads('i' + self
.read(4))
702 self
.append(self
.read(len))
703 dispatch
[BINSTRING
] = load_binstring
705 def load_unicode(self
):
706 self
.append(unicode(self
.readline()[:-1],'raw-unicode-escape'))
707 dispatch
[UNICODE
] = load_unicode
709 def load_binunicode(self
):
710 len = mloads('i' + self
.read(4))
711 self
.append(unicode(self
.read(len),'utf-8'))
712 dispatch
[BINUNICODE
] = load_binunicode
714 def load_short_binstring(self
):
715 len = mloads('i' + self
.read(1) + '\000\000\000')
716 self
.append(self
.read(len))
717 dispatch
[SHORT_BINSTRING
] = load_short_binstring
719 def load_tuple(self
):
721 self
.stack
[k
:] = [tuple(self
.stack
[k
+1:])]
722 dispatch
[TUPLE
] = load_tuple
724 def load_empty_tuple(self
):
725 self
.stack
.append(())
726 dispatch
[EMPTY_TUPLE
] = load_empty_tuple
728 def load_empty_list(self
):
729 self
.stack
.append([])
730 dispatch
[EMPTY_LIST
] = load_empty_list
732 def load_empty_dictionary(self
):
733 self
.stack
.append({})
734 dispatch
[EMPTY_DICT
] = load_empty_dictionary
738 self
.stack
[k
:] = [self
.stack
[k
+1:]]
739 dispatch
[LIST
] = load_list
744 items
= self
.stack
[k
+1:]
745 for i
in range(0, len(items
), 2):
750 dispatch
[DICT
] = load_dict
754 args
= tuple(self
.stack
[k
+1:])
756 module
= self
.readline()[:-1]
757 name
= self
.readline()[:-1]
758 klass
= self
.find_class(module
, name
)
760 if (not args
and type(klass
) is ClassType
and
761 not hasattr(klass
, "__getinitargs__")):
763 value
= _EmptyClass()
764 value
.__class
__ = klass
767 # In restricted execution, assignment to inst.__class__ is
772 if not hasattr(klass
, '__safe_for_unpickling__'):
773 raise UnpicklingError('%s is not safe for unpickling' %
775 value
= apply(klass
, args
)
776 except TypeError, err
:
777 raise TypeError, "in constructor for %s: %s" % (
778 klass
.__name
__, str(err
)), sys
.exc_info()[2]
780 dispatch
[INST
] = load_inst
787 args
= tuple(stack
[k
+ 1:])
790 if (not args
and type(klass
) is ClassType
and
791 not hasattr(klass
, "__getinitargs__")):
793 value
= _EmptyClass()
794 value
.__class
__ = klass
797 # In restricted execution, assignment to inst.__class__ is
801 value
= apply(klass
, args
)
803 dispatch
[OBJ
] = load_obj
805 def load_global(self
):
806 module
= self
.readline()[:-1]
807 name
= self
.readline()[:-1]
808 klass
= self
.find_class(module
, name
)
810 dispatch
[GLOBAL
] = load_global
812 def find_class(self
, module
, name
):
814 mod
= sys
.modules
[module
]
815 klass
= getattr(mod
, name
)
818 def load_reduce(self
):
825 if type(callable) is not ClassType
:
826 if not safe_constructors
.has_key(callable):
828 safe
= callable.__safe
_for
_unpickling
__
829 except AttributeError:
833 raise UnpicklingError
, "%s is not safe for " \
834 "unpickling" % callable
837 value
= callable.__basicnew
__()
839 value
= apply(callable, arg_tup
)
841 dispatch
[REDUCE
] = load_reduce
845 dispatch
[POP
] = load_pop
847 def load_pop_mark(self
):
850 dispatch
[POP_MARK
] = load_pop_mark
853 self
.append(self
.stack
[-1])
854 dispatch
[DUP
] = load_dup
857 self
.append(self
.memo
[self
.readline()[:-1]])
858 dispatch
[GET
] = load_get
860 def load_binget(self
):
861 i
= mloads('i' + self
.read(1) + '\000\000\000')
862 self
.append(self
.memo
[`i`
])
863 dispatch
[BINGET
] = load_binget
865 def load_long_binget(self
):
866 i
= mloads('i' + self
.read(4))
867 self
.append(self
.memo
[`i`
])
868 dispatch
[LONG_BINGET
] = load_long_binget
871 self
.memo
[self
.readline()[:-1]] = self
.stack
[-1]
872 dispatch
[PUT
] = load_put
874 def load_binput(self
):
875 i
= mloads('i' + self
.read(1) + '\000\000\000')
876 self
.memo
[`i`
] = self
.stack
[-1]
877 dispatch
[BINPUT
] = load_binput
879 def load_long_binput(self
):
880 i
= mloads('i' + self
.read(4))
881 self
.memo
[`i`
] = self
.stack
[-1]
882 dispatch
[LONG_BINPUT
] = load_long_binput
884 def load_append(self
):
890 dispatch
[APPEND
] = load_append
892 def load_appends(self
):
895 list = stack
[mark
- 1]
896 for i
in range(mark
+ 1, len(stack
)):
897 list.append(stack
[i
])
900 dispatch
[APPENDS
] = load_appends
902 def load_setitem(self
):
909 dispatch
[SETITEM
] = load_setitem
911 def load_setitems(self
):
914 dict = stack
[mark
- 1]
915 for i
in range(mark
+ 1, len(stack
), 2):
916 dict[stack
[i
]] = stack
[i
+ 1]
919 dispatch
[SETITEMS
] = load_setitems
921 def load_build(self
):
927 setstate
= inst
.__setstate
__
928 except AttributeError:
930 inst
.__dict
__.update(value
)
932 # XXX In restricted execution, the instance's __dict__ is not
933 # accessible. Use the old way of unpickling the instance
934 # variables. This is a semantic different when unpickling in
935 # restricted vs. unrestricted modes.
936 for k
, v
in value
.items():
940 dispatch
[BUILD
] = load_build
943 self
.append(self
.mark
)
944 dispatch
[MARK
] = load_mark
947 value
= self
.stack
[-1]
950 dispatch
[STOP
] = load_stop
952 # Helper class for load_inst/load_obj
960 from cStringIO
import StringIO
962 from StringIO
import StringIO
964 def dump(object, file, bin
= 0):
965 Pickler(file, bin
).dump(object)
967 def dumps(object, bin
= 0):
969 Pickler(file, bin
).dump(object)
970 return file.getvalue()
973 return Unpickler(file).load()
977 return Unpickler(file).load()