2 /* Write Python objects to files and read them back.
3 This is intended for writing and reading compiled Python code only;
4 a true persistent storage facility would be much harder, since
5 it would have to take circular links and sharing into account. */
7 #define PY_SSIZE_T_CLEAN
10 #include "longintrepr.h"
14 /* High water mark to determine when the marshalled object is dangerously deep
15 * and risks coring the interpreter. When the object stack gets this deep,
16 * raise an exception instead of continuing.
18 #define MAX_MARSHAL_STACK_DEPTH 5000
22 #define TYPE_FALSE 'F'
24 #define TYPE_STOPITER 'S'
25 #define TYPE_ELLIPSIS '.'
27 #define TYPE_INT64 'I'
28 #define TYPE_FLOAT 'f'
29 #define TYPE_BINARY_FLOAT 'g'
30 #define TYPE_COMPLEX 'x'
31 #define TYPE_BINARY_COMPLEX 'y'
33 #define TYPE_STRING 's'
34 #define TYPE_INTERNED 't'
35 #define TYPE_STRINGREF 'R'
36 #define TYPE_TUPLE '('
40 #define TYPE_UNICODE 'u'
41 #define TYPE_UNKNOWN '?'
43 #define TYPE_FROZENSET '>'
49 /* If fp == NULL, the following are valid: */
53 PyObject
*strings
; /* dict on marshal, list on unmarshal */
57 #define w_byte(c, p) if (((p)->fp)) putc((c), (p)->fp); \
58 else if ((p)->ptr != (p)->end) *(p)->ptr++ = (c); \
62 w_more(int c
, WFILE
*p
)
64 Py_ssize_t size
, newsize
;
66 return; /* An error already occurred */
67 size
= PyString_Size(p
->str
);
68 newsize
= size
+ 1024;
69 if (_PyString_Resize(&p
->str
, newsize
) != 0) {
70 p
->ptr
= p
->end
= NULL
;
73 p
->ptr
= PyString_AS_STRING((PyStringObject
*)p
->str
) + size
;
75 PyString_AS_STRING((PyStringObject
*)p
->str
) + newsize
;
76 *p
->ptr
++ = Py_SAFE_DOWNCAST(c
, int, char);
81 w_string(char *s
, int n
, WFILE
*p
)
84 fwrite(s
, 1, n
, p
->fp
);
95 w_short(int x
, WFILE
*p
)
97 w_byte((char)( x
& 0xff), p
);
98 w_byte((char)((x
>> 8) & 0xff), p
);
102 w_long(long x
, WFILE
*p
)
104 w_byte((char)( x
& 0xff), p
);
105 w_byte((char)((x
>> 8) & 0xff), p
);
106 w_byte((char)((x
>>16) & 0xff), p
);
107 w_byte((char)((x
>>24) & 0xff), p
);
112 w_long64(long x
, WFILE
*p
)
120 w_object(PyObject
*v
, WFILE
*p
)
126 if (p
->depth
> MAX_MARSHAL_STACK_DEPTH
) {
129 else if (v
== NULL
) {
130 w_byte(TYPE_NULL
, p
);
132 else if (v
== Py_None
) {
133 w_byte(TYPE_NONE
, p
);
135 else if (v
== PyExc_StopIteration
) {
136 w_byte(TYPE_STOPITER
, p
);
138 else if (v
== Py_Ellipsis
) {
139 w_byte(TYPE_ELLIPSIS
, p
);
141 else if (v
== Py_False
) {
142 w_byte(TYPE_FALSE
, p
);
144 else if (v
== Py_True
) {
145 w_byte(TYPE_TRUE
, p
);
147 else if (PyInt_Check(v
)) {
148 long x
= PyInt_AS_LONG((PyIntObject
*)v
);
150 long y
= Py_ARITHMETIC_RIGHT_SHIFT(long, x
, 31);
152 w_byte(TYPE_INT64
, p
);
162 else if (PyLong_Check(v
)) {
163 PyLongObject
*ob
= (PyLongObject
*)v
;
164 w_byte(TYPE_LONG
, p
);
169 for (i
= 0; i
< n
; i
++)
170 w_short(ob
->ob_digit
[i
], p
);
172 else if (PyFloat_Check(v
)) {
173 if (p
->version
> 1) {
174 unsigned char buf
[8];
175 if (_PyFloat_Pack8(PyFloat_AsDouble(v
),
180 w_byte(TYPE_BINARY_FLOAT
, p
);
181 w_string((char*)buf
, 8, p
);
184 char buf
[256]; /* Plenty to format any double */
185 PyFloat_AsReprString(buf
, (PyFloatObject
*)v
);
187 w_byte(TYPE_FLOAT
, p
);
189 w_string(buf
, (int)n
, p
);
192 #ifndef WITHOUT_COMPLEX
193 else if (PyComplex_Check(v
)) {
194 if (p
->version
> 1) {
195 unsigned char buf
[8];
196 if (_PyFloat_Pack8(PyComplex_RealAsDouble(v
),
201 w_byte(TYPE_BINARY_COMPLEX
, p
);
202 w_string((char*)buf
, 8, p
);
203 if (_PyFloat_Pack8(PyComplex_ImagAsDouble(v
),
208 w_string((char*)buf
, 8, p
);
211 char buf
[256]; /* Plenty to format any double */
213 w_byte(TYPE_COMPLEX
, p
);
214 temp
= (PyFloatObject
*)PyFloat_FromDouble(
215 PyComplex_RealAsDouble(v
));
216 PyFloat_AsReprString(buf
, temp
);
220 w_string(buf
, (int)n
, p
);
221 temp
= (PyFloatObject
*)PyFloat_FromDouble(
222 PyComplex_ImagAsDouble(v
));
223 PyFloat_AsReprString(buf
, temp
);
227 w_string(buf
, (int)n
, p
);
231 else if (PyString_Check(v
)) {
232 if (p
->strings
&& PyString_CHECK_INTERNED(v
)) {
233 PyObject
*o
= PyDict_GetItem(p
->strings
, v
);
235 long w
= PyInt_AsLong(o
);
236 w_byte(TYPE_STRINGREF
, p
);
241 o
= PyInt_FromSsize_t(PyDict_Size(p
->strings
));
242 PyDict_SetItem(p
->strings
, v
, o
);
244 w_byte(TYPE_INTERNED
, p
);
248 w_byte(TYPE_STRING
, p
);
250 n
= PyString_GET_SIZE(v
);
252 /* huge strings are not supported */
258 w_string(PyString_AS_STRING(v
), (int)n
, p
);
260 #ifdef Py_USING_UNICODE
261 else if (PyUnicode_Check(v
)) {
263 utf8
= PyUnicode_AsUTF8String(v
);
269 w_byte(TYPE_UNICODE
, p
);
270 n
= PyString_GET_SIZE(utf8
);
277 w_string(PyString_AS_STRING(utf8
), (int)n
, p
);
281 else if (PyTuple_Check(v
)) {
282 w_byte(TYPE_TUPLE
, p
);
285 for (i
= 0; i
< n
; i
++) {
286 w_object(PyTuple_GET_ITEM(v
, i
), p
);
289 else if (PyList_Check(v
)) {
290 w_byte(TYPE_LIST
, p
);
291 n
= PyList_GET_SIZE(v
);
293 for (i
= 0; i
< n
; i
++) {
294 w_object(PyList_GET_ITEM(v
, i
), p
);
297 else if (PyDict_Check(v
)) {
299 PyObject
*key
, *value
;
300 w_byte(TYPE_DICT
, p
);
301 /* This one is NULL object terminated! */
303 while (PyDict_Next(v
, &pos
, &key
, &value
)) {
307 w_object((PyObject
*)NULL
, p
);
309 else if (PyAnySet_Check(v
)) {
310 PyObject
*value
, *it
;
312 if (PyObject_TypeCheck(v
, &PySet_Type
))
315 w_byte(TYPE_FROZENSET
, p
);
316 n
= PyObject_Size(v
);
323 it
= PyObject_GetIter(v
);
329 while ((value
= PyIter_Next(it
)) != NULL
) {
334 if (PyErr_Occurred()) {
340 else if (PyCode_Check(v
)) {
341 PyCodeObject
*co
= (PyCodeObject
*)v
;
342 w_byte(TYPE_CODE
, p
);
343 w_long(co
->co_argcount
, p
);
344 w_long(co
->co_nlocals
, p
);
345 w_long(co
->co_stacksize
, p
);
346 w_long(co
->co_flags
, p
);
347 w_object(co
->co_code
, p
);
348 w_object(co
->co_consts
, p
);
349 w_object(co
->co_names
, p
);
350 w_object(co
->co_varnames
, p
);
351 w_object(co
->co_freevars
, p
);
352 w_object(co
->co_cellvars
, p
);
353 w_object(co
->co_filename
, p
);
354 w_object(co
->co_name
, p
);
355 w_long(co
->co_firstlineno
, p
);
356 w_object(co
->co_lnotab
, p
);
358 else if (PyObject_CheckReadBuffer(v
)) {
359 /* Write unknown buffer-style objects as a string */
361 PyBufferProcs
*pb
= v
->ob_type
->tp_as_buffer
;
362 w_byte(TYPE_STRING
, p
);
363 n
= (*pb
->bf_getreadbuffer
)(v
, 0, (void **)&s
);
370 w_string(s
, (int)n
, p
);
373 w_byte(TYPE_UNKNOWN
, p
);
380 /* version currently has no effect for writing longs. */
382 PyMarshal_WriteLongToFile(long x
, FILE *fp
, int version
)
389 wf
.version
= version
;
394 PyMarshal_WriteObjectToFile(PyObject
*x
, FILE *fp
, int version
)
400 wf
.strings
= (version
> 0) ? PyDict_New() : NULL
;
401 wf
.version
= version
;
403 Py_XDECREF(wf
.strings
);
406 typedef WFILE RFILE
; /* Same struct with different invariants */
408 #define rs_byte(p) (((p)->ptr != (p)->end) ? (unsigned char)*(p)->ptr++ : EOF)
410 #define r_byte(p) ((p)->fp ? getc((p)->fp) : rs_byte(p))
413 r_string(char *s
, int n
, RFILE
*p
)
416 /* The result fits into int because it must be <=n. */
417 return (int)fread(s
, 1, n
, p
->fp
);
418 if (p
->end
- p
->ptr
< n
)
419 n
= (int)(p
->end
- p
->ptr
);
420 memcpy(s
, p
->ptr
, n
);
431 /* Sign-extension, in case short greater than 16 bits */
440 register FILE *fp
= p
->fp
;
443 x
|= (long)getc(fp
) << 8;
444 x
|= (long)getc(fp
) << 16;
445 x
|= (long)getc(fp
) << 24;
449 x
|= (long)rs_byte(p
) << 8;
450 x
|= (long)rs_byte(p
) << 16;
451 x
|= (long)rs_byte(p
) << 24;
454 /* Sign extension for 64-bit machines */
455 x
|= -(x
& 0x80000000L
);
460 /* r_long64 deals with the TYPE_INT64 code. On a machine with
461 sizeof(long) > 4, it returns a Python int object, else a Python long
462 object. Note that w_long64 writes out TYPE_INT if 32 bits is enough,
463 so there's no inefficiency here in returning a PyLong on 32-bit boxes
464 for everything written via TYPE_INT64 (i.e., if an int is written via
465 TYPE_INT64, it *needs* more than 32 bits).
470 long lo4
= r_long(p
);
471 long hi4
= r_long(p
);
473 long x
= (hi4
<< 32) | (lo4
& 0xFFFFFFFFL
);
474 return PyInt_FromLong(x
);
476 unsigned char buf
[8];
478 int is_little_endian
= (int)*(char*)&one
;
479 if (is_little_endian
) {
480 memcpy(buf
, &lo4
, 4);
481 memcpy(buf
+4, &hi4
, 4);
484 memcpy(buf
, &hi4
, 4);
485 memcpy(buf
+4, &lo4
, 4);
487 return _PyLong_FromByteArray(buf
, 8, is_little_endian
, 1);
494 /* NULL is a valid return value, it does not necessarily means that
495 an exception is set. */
496 PyObject
*v
, *v2
, *v3
;
498 int type
= r_byte(p
);
503 PyErr_SetString(PyExc_EOFError
,
504 "EOF read where object expected");
515 Py_INCREF(PyExc_StopIteration
);
516 return PyExc_StopIteration
;
519 Py_INCREF(Py_Ellipsis
);
531 return PyInt_FromLong(r_long(p
));
542 ob
= _PyLong_New(size
);
546 for (i
= 0; i
< size
; i
++) {
547 int digit
= r_short(p
);
550 PyErr_SetString(PyExc_ValueError
,
554 ob
->ob_digit
[i
] = digit
;
556 return (PyObject
*)ob
;
564 if (n
== EOF
|| r_string(buf
, (int)n
, p
) != n
) {
565 PyErr_SetString(PyExc_EOFError
,
566 "EOF read where object expected");
570 PyFPE_START_PROTECT("atof", return 0)
571 dx
= PyOS_ascii_atof(buf
);
572 PyFPE_END_PROTECT(dx
)
573 return PyFloat_FromDouble(dx
);
576 case TYPE_BINARY_FLOAT
:
578 unsigned char buf
[8];
580 if (r_string((char*)buf
, 8, p
) != 8) {
581 PyErr_SetString(PyExc_EOFError
,
582 "EOF read where object expected");
585 x
= _PyFloat_Unpack8(buf
, 1);
586 if (x
== -1.0 && PyErr_Occurred()) {
589 return PyFloat_FromDouble(x
);
592 #ifndef WITHOUT_COMPLEX
598 if (n
== EOF
|| r_string(buf
, (int)n
, p
) != n
) {
599 PyErr_SetString(PyExc_EOFError
,
600 "EOF read where object expected");
604 PyFPE_START_PROTECT("atof", return 0)
605 c
.real
= PyOS_ascii_atof(buf
);
608 if (n
== EOF
|| r_string(buf
, (int)n
, p
) != n
) {
609 PyErr_SetString(PyExc_EOFError
,
610 "EOF read where object expected");
614 PyFPE_START_PROTECT("atof", return 0)
615 c
.imag
= PyOS_ascii_atof(buf
);
617 return PyComplex_FromCComplex(c
);
620 case TYPE_BINARY_COMPLEX
:
622 unsigned char buf
[8];
624 if (r_string((char*)buf
, 8, p
) != 8) {
625 PyErr_SetString(PyExc_EOFError
,
626 "EOF read where object expected");
629 c
.real
= _PyFloat_Unpack8(buf
, 1);
630 if (c
.real
== -1.0 && PyErr_Occurred()) {
633 if (r_string((char*)buf
, 8, p
) != 8) {
634 PyErr_SetString(PyExc_EOFError
,
635 "EOF read where object expected");
638 c
.imag
= _PyFloat_Unpack8(buf
, 1);
639 if (c
.imag
== -1.0 && PyErr_Occurred()) {
642 return PyComplex_FromCComplex(c
);
650 PyErr_SetString(PyExc_ValueError
, "bad marshal data");
653 v
= PyString_FromStringAndSize((char *)NULL
, n
);
656 if (r_string(PyString_AS_STRING(v
), (int)n
, p
) != n
) {
658 PyErr_SetString(PyExc_EOFError
,
659 "EOF read where object expected");
662 if (type
== TYPE_INTERNED
) {
663 PyString_InternInPlace(&v
);
664 PyList_Append(p
->strings
, v
);
670 if (n
< 0 || n
>= PyList_GET_SIZE(p
->strings
)) {
671 PyErr_SetString(PyExc_ValueError
, "bad marshal data");
674 v
= PyList_GET_ITEM(p
->strings
, n
);
678 #ifdef Py_USING_UNICODE
685 PyErr_SetString(PyExc_ValueError
, "bad marshal data");
688 buffer
= PyMem_NEW(char, n
);
690 return PyErr_NoMemory();
691 if (r_string(buffer
, (int)n
, p
) != n
) {
693 PyErr_SetString(PyExc_EOFError
,
694 "EOF read where object expected");
697 v
= PyUnicode_DecodeUTF8(buffer
, n
, NULL
);
706 PyErr_SetString(PyExc_ValueError
, "bad marshal data");
709 v
= PyTuple_New((int)n
);
712 for (i
= 0; i
< n
; i
++) {
715 if (!PyErr_Occurred())
716 PyErr_SetString(PyExc_TypeError
,
717 "NULL object in marshal data");
722 PyTuple_SET_ITEM(v
, (int)i
, v2
);
729 PyErr_SetString(PyExc_ValueError
, "bad marshal data");
732 v
= PyList_New((int)n
);
735 for (i
= 0; i
< n
; i
++) {
738 if (!PyErr_Occurred())
739 PyErr_SetString(PyExc_TypeError
,
740 "NULL object in marshal data");
745 PyList_SetItem(v
, (int)i
, v2
);
760 PyDict_SetItem(v
, key
, val
);
764 if (PyErr_Occurred()) {
774 PyErr_SetString(PyExc_ValueError
, "bad marshal data");
777 v
= PyTuple_New((int)n
);
780 for (i
= 0; i
< n
; i
++) {
783 if (!PyErr_Occurred())
784 PyErr_SetString(PyExc_TypeError
,
785 "NULL object in marshal data");
790 PyTuple_SET_ITEM(v
, (int)i
, v2
);
794 if (type
== TYPE_SET
)
797 v3
= PyFrozenSet_New(v
);
802 if (PyEval_GetRestricted()) {
803 PyErr_SetString(PyExc_RuntimeError
,
804 "cannot unmarshal code objects in "
805 "restricted execution mode");
813 PyObject
*code
= NULL
;
814 PyObject
*consts
= NULL
;
815 PyObject
*names
= NULL
;
816 PyObject
*varnames
= NULL
;
817 PyObject
*freevars
= NULL
;
818 PyObject
*cellvars
= NULL
;
819 PyObject
*filename
= NULL
;
820 PyObject
*name
= NULL
;
822 PyObject
*lnotab
= NULL
;
826 argcount
= r_long(p
);
828 stacksize
= r_long(p
);
833 consts
= r_object(p
);
839 varnames
= r_object(p
);
840 if (varnames
== NULL
)
842 freevars
= r_object(p
);
843 if (freevars
== NULL
)
845 cellvars
= r_object(p
);
846 if (cellvars
== NULL
)
848 filename
= r_object(p
);
849 if (filename
== NULL
)
854 firstlineno
= r_long(p
);
855 lnotab
= r_object(p
);
859 v
= (PyObject
*) PyCode_New(
860 argcount
, nlocals
, stacksize
, flags
,
861 code
, consts
, names
, varnames
,
862 freevars
, cellvars
, filename
, name
,
863 firstlineno
, lnotab
);
869 Py_XDECREF(varnames
);
870 Py_XDECREF(freevars
);
871 Py_XDECREF(cellvars
);
872 Py_XDECREF(filename
);
880 /* Bogus data got written, which isn't ideal.
881 This will let you keep working and recover. */
882 PyErr_SetString(PyExc_ValueError
, "bad marshal data");
889 read_object(RFILE
*p
)
892 if (PyErr_Occurred()) {
893 fprintf(stderr
, "XXX readobject called with exception set\n");
897 if (v
== NULL
&& !PyErr_Occurred())
898 PyErr_SetString(PyExc_TypeError
, "NULL object in marshal data");
903 PyMarshal_ReadShortFromFile(FILE *fp
)
909 rf
.end
= rf
.ptr
= NULL
;
914 PyMarshal_ReadLongFromFile(FILE *fp
)
923 /* Return size of file in bytes; < 0 if unknown. */
925 getfilesize(FILE *fp
)
928 if (fstat(fileno(fp
), &st
) != 0)
935 /* If we can get the size of the file up-front, and it's reasonably small,
936 * read it in one gulp and delegate to ...FromString() instead. Much quicker
937 * than reading a byte at a time from file; speeds .pyc imports.
938 * CAUTION: since this may read the entire remainder of the file, don't
939 * call it unless you know you're done with the file.
942 PyMarshal_ReadLastObjectFromFile(FILE *fp
)
944 /* 75% of 2.1's .pyc files can exploit SMALL_FILE_LIMIT.
945 * REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc.
947 #define SMALL_FILE_LIMIT (1L << 14)
948 #define REASONABLE_FILE_LIMIT (1L << 18)
953 filesize
= getfilesize(fp
);
955 char buf
[SMALL_FILE_LIMIT
];
957 if (filesize
<= SMALL_FILE_LIMIT
)
959 else if (filesize
<= REASONABLE_FILE_LIMIT
)
960 pBuf
= (char *)PyMem_MALLOC(filesize
);
964 /* filesize must fit into an int, because it
965 is smaller than REASONABLE_FILE_LIMIT */
966 n
= fread(pBuf
, 1, (int)filesize
, fp
);
967 v
= PyMarshal_ReadObjectFromString(pBuf
, n
);
975 /* We don't have fstat, or we do but the file is larger than
976 * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
978 return PyMarshal_ReadObjectFromFile(fp
);
980 #undef SMALL_FILE_LIMIT
981 #undef REASONABLE_FILE_LIMIT
985 PyMarshal_ReadObjectFromFile(FILE *fp
)
990 rf
.strings
= PyList_New(0);
991 result
= r_object(&rf
);
992 Py_DECREF(rf
.strings
);
997 PyMarshal_ReadObjectFromString(char *str
, Py_ssize_t len
)
1004 rf
.strings
= PyList_New(0);
1005 result
= r_object(&rf
);
1006 Py_DECREF(rf
.strings
);
1011 PyMarshal_WriteObjectToString(PyObject
*x
, int version
)
1015 wf
.str
= PyString_FromStringAndSize((char *)NULL
, 50);
1018 wf
.ptr
= PyString_AS_STRING((PyStringObject
*)wf
.str
);
1019 wf
.end
= wf
.ptr
+ PyString_Size(wf
.str
);
1022 wf
.version
= version
;
1023 wf
.strings
= (version
> 0) ? PyDict_New() : NULL
;
1025 Py_XDECREF(wf
.strings
);
1027 _PyString_Resize(&wf
.str
,
1029 PyString_AS_STRING((PyStringObject
*)wf
.str
)));
1032 PyErr_SetString(PyExc_ValueError
,
1033 (wf
.error
==1)?"unmarshallable object"
1034 :"object too deeply nested to marshal");
1040 /* And an interface for Python programs... */
1043 marshal_dump(PyObject
*self
, PyObject
*args
)
1048 int version
= Py_MARSHAL_VERSION
;
1049 if (!PyArg_ParseTuple(args
, "OO|i:dump", &x
, &f
, &version
))
1051 if (!PyFile_Check(f
)) {
1052 PyErr_SetString(PyExc_TypeError
,
1053 "marshal.dump() 2nd arg must be file");
1056 wf
.fp
= PyFile_AsFile(f
);
1058 wf
.ptr
= wf
.end
= NULL
;
1061 wf
.strings
= (version
> 0) ? PyDict_New() : 0;
1062 wf
.version
= version
;
1064 Py_XDECREF(wf
.strings
);
1066 PyErr_SetString(PyExc_ValueError
,
1067 (wf
.error
==1)?"unmarshallable object"
1068 :"object too deeply nested to marshal");
1076 marshal_load(PyObject
*self
, PyObject
*f
)
1080 if (!PyFile_Check(f
)) {
1081 PyErr_SetString(PyExc_TypeError
,
1082 "marshal.load() arg must be file");
1085 rf
.fp
= PyFile_AsFile(f
);
1086 rf
.strings
= PyList_New(0);
1087 result
= read_object(&rf
);
1088 Py_DECREF(rf
.strings
);
1093 marshal_dumps(PyObject
*self
, PyObject
*args
)
1096 int version
= Py_MARSHAL_VERSION
;
1097 if (!PyArg_ParseTuple(args
, "O|i:dumps", &x
, &version
))
1099 return PyMarshal_WriteObjectToString(x
, version
);
1103 marshal_loads(PyObject
*self
, PyObject
*args
)
1109 if (!PyArg_ParseTuple(args
, "s#:loads", &s
, &n
))
1114 rf
.strings
= PyList_New(0);
1115 result
= read_object(&rf
);
1116 Py_DECREF(rf
.strings
);
1120 static PyMethodDef marshal_methods
[] = {
1121 {"dump", marshal_dump
, METH_VARARGS
},
1122 {"load", marshal_load
, METH_O
},
1123 {"dumps", marshal_dumps
, METH_VARARGS
},
1124 {"loads", marshal_loads
, METH_VARARGS
},
1125 {NULL
, NULL
} /* sentinel */
1129 PyMarshal_Init(void)
1131 PyObject
*mod
= Py_InitModule("marshal", marshal_methods
);
1134 PyModule_AddIntConstant(mod
, "version", Py_MARSHAL_VERSION
);