Updated for 2.1b2 distribution.
[python/dscho.git] / Python / marshal.c
blob1b9ab9af141d4b82ce52c1d5c64f8219f3ab8eed
2 /* Write Python objects to files and read them back.
3 This is intended for writing and reading compiled Python code only;
4 a true persistent storage facility would be much harder, since
5 it would have to take circular links and sharing into account. */
7 #include "Python.h"
8 #include "longintrepr.h"
9 #include "compile.h"
10 #include "marshal.h"
12 /* High water mark to determine when the marshalled object is dangerously deep
13 * and risks coring the interpreter. When the object stack gets this deep,
14 * raise an exception instead of continuing.
16 #define MAX_MARSHAL_STACK_DEPTH 5000
18 #define TYPE_NULL '0'
19 #define TYPE_NONE 'N'
20 #define TYPE_ELLIPSIS '.'
21 #define TYPE_INT 'i'
22 #define TYPE_INT64 'I'
23 #define TYPE_FLOAT 'f'
24 #define TYPE_COMPLEX 'x'
25 #define TYPE_LONG 'l'
26 #define TYPE_STRING 's'
27 #define TYPE_TUPLE '('
28 #define TYPE_LIST '['
29 #define TYPE_DICT '{'
30 #define TYPE_CODE 'c'
31 #define TYPE_UNICODE 'u'
32 #define TYPE_UNKNOWN '?'
34 typedef struct {
35 FILE *fp;
36 int error;
37 int depth;
38 /* If fp == NULL, the following are valid: */
39 PyObject *str;
40 char *ptr;
41 char *end;
42 } WFILE;
44 #define w_byte(c, p) if (((p)->fp)) putc((c), (p)->fp); \
45 else if ((p)->ptr != (p)->end) *(p)->ptr++ = (c); \
46 else w_more(c, p)
48 static void
49 w_more(int c, WFILE *p)
51 int size, newsize;
52 if (p->str == NULL)
53 return; /* An error already occurred */
54 size = PyString_Size(p->str);
55 newsize = size + 1024;
56 if (_PyString_Resize(&p->str, newsize) != 0) {
57 p->ptr = p->end = NULL;
59 else {
60 p->ptr = PyString_AS_STRING((PyStringObject *)p->str) + size;
61 p->end =
62 PyString_AS_STRING((PyStringObject *)p->str) + newsize;
63 *p->ptr++ = Py_SAFE_DOWNCAST(c, int, char);
67 static void
68 w_string(char *s, int n, WFILE *p)
70 if (p->fp != NULL) {
71 fwrite(s, 1, n, p->fp);
73 else {
74 while (--n >= 0) {
75 w_byte(*s, p);
76 s++;
81 static void
82 w_short(int x, WFILE *p)
84 w_byte( x & 0xff, p);
85 w_byte((x>> 8) & 0xff, p);
88 static void
89 w_long(long x, WFILE *p)
91 w_byte((int)( x & 0xff), p);
92 w_byte((int)((x>> 8) & 0xff), p);
93 w_byte((int)((x>>16) & 0xff), p);
94 w_byte((int)((x>>24) & 0xff), p);
97 #if SIZEOF_LONG > 4
98 static void
99 w_long64(long x, WFILE *p)
101 w_long(x, p);
102 w_long(x>>32, p);
104 #endif
106 static void
107 w_object(PyObject *v, WFILE *p)
109 int i, n;
110 PyBufferProcs *pb;
112 p->depth++;
114 if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
115 p->error = 2;
117 else if (v == NULL) {
118 w_byte(TYPE_NULL, p);
120 else if (v == Py_None) {
121 w_byte(TYPE_NONE, p);
123 else if (v == Py_Ellipsis) {
124 w_byte(TYPE_ELLIPSIS, p);
126 else if (PyInt_Check(v)) {
127 long x = PyInt_AS_LONG((PyIntObject *)v);
128 #if SIZEOF_LONG > 4
129 long y = x>>31;
130 if (y && y != -1) {
131 w_byte(TYPE_INT64, p);
132 w_long64(x, p);
134 else
135 #endif
137 w_byte(TYPE_INT, p);
138 w_long(x, p);
141 else if (PyLong_Check(v)) {
142 PyLongObject *ob = (PyLongObject *)v;
143 w_byte(TYPE_LONG, p);
144 n = ob->ob_size;
145 w_long((long)n, p);
146 if (n < 0)
147 n = -n;
148 for (i = 0; i < n; i++)
149 w_short(ob->ob_digit[i], p);
151 else if (PyFloat_Check(v)) {
152 extern void PyFloat_AsString(char *, PyFloatObject *);
153 char buf[256]; /* Plenty to format any double */
154 PyFloat_AsString(buf, (PyFloatObject *)v);
155 n = strlen(buf);
156 w_byte(TYPE_FLOAT, p);
157 w_byte(n, p);
158 w_string(buf, n, p);
160 #ifndef WITHOUT_COMPLEX
161 else if (PyComplex_Check(v)) {
162 extern void PyFloat_AsString(char *, PyFloatObject *);
163 char buf[256]; /* Plenty to format any double */
164 PyFloatObject *temp;
165 w_byte(TYPE_COMPLEX, p);
166 temp = (PyFloatObject*)PyFloat_FromDouble(
167 PyComplex_RealAsDouble(v));
168 PyFloat_AsString(buf, temp);
169 Py_DECREF(temp);
170 n = strlen(buf);
171 w_byte(n, p);
172 w_string(buf, n, p);
173 temp = (PyFloatObject*)PyFloat_FromDouble(
174 PyComplex_ImagAsDouble(v));
175 PyFloat_AsString(buf, temp);
176 Py_DECREF(temp);
177 n = strlen(buf);
178 w_byte(n, p);
179 w_string(buf, n, p);
181 #endif
182 else if (PyString_Check(v)) {
183 w_byte(TYPE_STRING, p);
184 n = PyString_GET_SIZE(v);
185 w_long((long)n, p);
186 w_string(PyString_AS_STRING(v), n, p);
188 else if (PyUnicode_Check(v)) {
189 PyObject *utf8;
190 utf8 = PyUnicode_AsUTF8String(v);
191 if (utf8 == NULL) {
192 p->depth--;
193 p->error = 1;
194 return;
196 w_byte(TYPE_UNICODE, p);
197 n = PyString_GET_SIZE(utf8);
198 w_long((long)n, p);
199 w_string(PyString_AS_STRING(utf8), n, p);
200 Py_DECREF(utf8);
202 else if (PyTuple_Check(v)) {
203 w_byte(TYPE_TUPLE, p);
204 n = PyTuple_Size(v);
205 w_long((long)n, p);
206 for (i = 0; i < n; i++) {
207 w_object(PyTuple_GET_ITEM(v, i), p);
210 else if (PyList_Check(v)) {
211 w_byte(TYPE_LIST, p);
212 n = PyList_GET_SIZE(v);
213 w_long((long)n, p);
214 for (i = 0; i < n; i++) {
215 w_object(PyList_GET_ITEM(v, i), p);
218 else if (PyDict_Check(v)) {
219 int pos;
220 PyObject *key, *value;
221 w_byte(TYPE_DICT, p);
222 /* This one is NULL object terminated! */
223 pos = 0;
224 while (PyDict_Next(v, &pos, &key, &value)) {
225 w_object(key, p);
226 w_object(value, p);
228 w_object((PyObject *)NULL, p);
230 else if (PyCode_Check(v)) {
231 PyCodeObject *co = (PyCodeObject *)v;
232 w_byte(TYPE_CODE, p);
233 w_short(co->co_argcount, p);
234 w_short(co->co_nlocals, p);
235 w_short(co->co_stacksize, p);
236 w_short(co->co_flags, p);
237 w_object(co->co_code, p);
238 w_object(co->co_consts, p);
239 w_object(co->co_names, p);
240 w_object(co->co_varnames, p);
241 w_object(co->co_freevars, p);
242 w_object(co->co_cellvars, p);
243 w_object(co->co_filename, p);
244 w_object(co->co_name, p);
245 w_short(co->co_firstlineno, p);
246 w_object(co->co_lnotab, p);
248 else if ((pb = v->ob_type->tp_as_buffer) != NULL &&
249 pb->bf_getsegcount != NULL &&
250 pb->bf_getreadbuffer != NULL &&
251 (*pb->bf_getsegcount)(v, NULL) == 1)
253 /* Write unknown buffer-style objects as a string */
254 char *s;
255 w_byte(TYPE_STRING, p);
256 n = (*pb->bf_getreadbuffer)(v, 0, (void **)&s);
257 w_long((long)n, p);
258 w_string(s, n, p);
260 else {
261 w_byte(TYPE_UNKNOWN, p);
262 p->error = 1;
265 p->depth--;
268 void
269 PyMarshal_WriteLongToFile(long x, FILE *fp)
271 WFILE wf;
272 wf.fp = fp;
273 wf.error = 0;
274 wf.depth = 0;
275 w_long(x, &wf);
278 void
279 PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp)
281 WFILE wf;
282 wf.fp = fp;
283 wf.error = 0;
284 wf.depth = 0;
285 w_object(x, &wf);
288 typedef WFILE RFILE; /* Same struct with different invariants */
290 #define rs_byte(p) (((p)->ptr != (p)->end) ? (unsigned char)*(p)->ptr++ : EOF)
292 #define r_byte(p) ((p)->fp ? getc((p)->fp) : rs_byte(p))
294 static int
295 r_string(char *s, int n, RFILE *p)
297 if (p->fp != NULL)
298 return fread(s, 1, n, p->fp);
299 if (p->end - p->ptr < n)
300 n = p->end - p->ptr;
301 memcpy(s, p->ptr, n);
302 p->ptr += n;
303 return n;
306 static int
307 r_short(RFILE *p)
309 register short x;
310 x = r_byte(p);
311 x |= r_byte(p) << 8;
312 /* Sign-extension, in case short greater than 16 bits */
313 x |= -(x & 0x8000);
314 return x;
317 static long
318 r_long(RFILE *p)
320 register long x;
321 register FILE *fp = p->fp;
322 if (fp) {
323 x = getc(fp);
324 x |= (long)getc(fp) << 8;
325 x |= (long)getc(fp) << 16;
326 x |= (long)getc(fp) << 24;
328 else {
329 x = rs_byte(p);
330 x |= (long)rs_byte(p) << 8;
331 x |= (long)rs_byte(p) << 16;
332 x |= (long)rs_byte(p) << 24;
334 #if SIZEOF_LONG > 4
335 /* Sign extension for 64-bit machines */
336 x |= -(x & 0x80000000L);
337 #endif
338 return x;
341 static long
342 r_long64(RFILE *p)
344 register long x;
345 x = r_long(p);
346 #if SIZEOF_LONG > 4
347 x = (x & 0xFFFFFFFFL) | (r_long(p) << 32);
348 #else
349 if (r_long(p) != 0) {
350 PyObject *f = PySys_GetObject("stderr");
351 if (f != NULL)
352 (void) PyFile_WriteString(
353 "Warning: un-marshal 64-bit int in 32-bit mode\n",
356 #endif
357 return x;
360 static PyObject *
361 r_object(RFILE *p)
363 PyObject *v, *v2;
364 long i, n;
365 int type = r_byte(p);
367 switch (type) {
369 case EOF:
370 PyErr_SetString(PyExc_EOFError,
371 "EOF read where object expected");
372 return NULL;
374 case TYPE_NULL:
375 return NULL;
377 case TYPE_NONE:
378 Py_INCREF(Py_None);
379 return Py_None;
381 case TYPE_ELLIPSIS:
382 Py_INCREF(Py_Ellipsis);
383 return Py_Ellipsis;
385 case TYPE_INT:
386 return PyInt_FromLong(r_long(p));
388 case TYPE_INT64:
389 return PyInt_FromLong(r_long64(p));
391 case TYPE_LONG:
393 int size;
394 PyLongObject *ob;
395 n = r_long(p);
396 size = n<0 ? -n : n;
397 ob = _PyLong_New(size);
398 if (ob == NULL)
399 return NULL;
400 ob->ob_size = n;
401 for (i = 0; i < size; i++)
402 ob->ob_digit[i] = r_short(p);
403 return (PyObject *)ob;
406 case TYPE_FLOAT:
408 char buf[256];
409 double dx;
410 n = r_byte(p);
411 if (r_string(buf, (int)n, p) != n) {
412 PyErr_SetString(PyExc_EOFError,
413 "EOF read where object expected");
414 return NULL;
416 buf[n] = '\0';
417 PyFPE_START_PROTECT("atof", return 0)
418 dx = atof(buf);
419 PyFPE_END_PROTECT(dx)
420 return PyFloat_FromDouble(dx);
423 #ifndef WITHOUT_COMPLEX
424 case TYPE_COMPLEX:
426 char buf[256];
427 Py_complex c;
428 n = r_byte(p);
429 if (r_string(buf, (int)n, p) != n) {
430 PyErr_SetString(PyExc_EOFError,
431 "EOF read where object expected");
432 return NULL;
434 buf[n] = '\0';
435 PyFPE_START_PROTECT("atof", return 0)
436 c.real = atof(buf);
437 PyFPE_END_PROTECT(c)
438 n = r_byte(p);
439 if (r_string(buf, (int)n, p) != n) {
440 PyErr_SetString(PyExc_EOFError,
441 "EOF read where object expected");
442 return NULL;
444 buf[n] = '\0';
445 PyFPE_START_PROTECT("atof", return 0)
446 c.imag = atof(buf);
447 PyFPE_END_PROTECT(c)
448 return PyComplex_FromCComplex(c);
450 #endif
452 case TYPE_STRING:
453 n = r_long(p);
454 if (n < 0) {
455 PyErr_SetString(PyExc_ValueError, "bad marshal data");
456 return NULL;
458 v = PyString_FromStringAndSize((char *)NULL, n);
459 if (v != NULL) {
460 if (r_string(PyString_AS_STRING(v), (int)n, p) != n) {
461 Py_DECREF(v);
462 v = NULL;
463 PyErr_SetString(PyExc_EOFError,
464 "EOF read where object expected");
467 return v;
469 case TYPE_UNICODE:
471 char *buffer;
473 n = r_long(p);
474 if (n < 0) {
475 PyErr_SetString(PyExc_ValueError, "bad marshal data");
476 return NULL;
478 buffer = PyMem_NEW(char, n);
479 if (buffer == NULL)
480 return PyErr_NoMemory();
481 if (r_string(buffer, (int)n, p) != n) {
482 PyMem_DEL(buffer);
483 PyErr_SetString(PyExc_EOFError,
484 "EOF read where object expected");
485 return NULL;
487 v = PyUnicode_DecodeUTF8(buffer, n, NULL);
488 PyMem_DEL(buffer);
489 return v;
492 case TYPE_TUPLE:
493 n = r_long(p);
494 if (n < 0) {
495 PyErr_SetString(PyExc_ValueError, "bad marshal data");
496 return NULL;
498 v = PyTuple_New((int)n);
499 if (v == NULL)
500 return v;
501 for (i = 0; i < n; i++) {
502 v2 = r_object(p);
503 if ( v2 == NULL ) {
504 Py_DECREF(v);
505 v = NULL;
506 break;
508 PyTuple_SET_ITEM(v, (int)i, v2);
510 return v;
512 case TYPE_LIST:
513 n = r_long(p);
514 if (n < 0) {
515 PyErr_SetString(PyExc_ValueError, "bad marshal data");
516 return NULL;
518 v = PyList_New((int)n);
519 if (v == NULL)
520 return v;
521 for (i = 0; i < n; i++) {
522 v2 = r_object(p);
523 if ( v2 == NULL ) {
524 Py_DECREF(v);
525 v = NULL;
526 break;
528 PyList_SetItem(v, (int)i, v2);
530 return v;
532 case TYPE_DICT:
533 v = PyDict_New();
534 if (v == NULL)
535 return NULL;
536 for (;;) {
537 PyObject *key, *val;
538 key = r_object(p);
539 if (key == NULL)
540 break; /* XXX Assume TYPE_NULL, not an error */
541 val = r_object(p);
542 if (val != NULL)
543 PyDict_SetItem(v, key, val);
544 Py_DECREF(key);
545 Py_XDECREF(val);
547 return v;
549 case TYPE_CODE:
551 int argcount = r_short(p);
552 int nlocals = r_short(p);
553 int stacksize = r_short(p);
554 int flags = r_short(p);
555 PyObject *code = NULL;
556 PyObject *consts = NULL;
557 PyObject *names = NULL;
558 PyObject *varnames = NULL;
559 PyObject *freevars = NULL;
560 PyObject *cellvars = NULL;
561 PyObject *filename = NULL;
562 PyObject *name = NULL;
563 int firstlineno = 0;
564 PyObject *lnotab = NULL;
566 code = r_object(p);
567 if (code) consts = r_object(p);
568 if (consts) names = r_object(p);
569 if (names) varnames = r_object(p);
570 if (varnames) freevars = r_object(p);
571 if (freevars) cellvars = r_object(p);
572 if (cellvars) filename = r_object(p);
573 if (filename) name = r_object(p);
574 if (name) {
575 firstlineno = r_short(p);
576 lnotab = r_object(p);
579 if (!PyErr_Occurred()) {
580 v = (PyObject *) PyCode_New(
581 argcount, nlocals, stacksize, flags,
582 code, consts, names, varnames,
583 freevars, cellvars, filename, name,
584 firstlineno, lnotab);
586 else
587 v = NULL;
588 Py_XDECREF(code);
589 Py_XDECREF(consts);
590 Py_XDECREF(names);
591 Py_XDECREF(varnames);
592 Py_XDECREF(freevars);
593 Py_XDECREF(cellvars);
594 Py_XDECREF(filename);
595 Py_XDECREF(name);
596 Py_XDECREF(lnotab);
599 return v;
601 default:
602 /* Bogus data got written, which isn't ideal.
603 This will let you keep working and recover. */
604 PyErr_SetString(PyExc_ValueError, "bad marshal data");
605 return NULL;
610 long
611 PyMarshal_ReadLongFromFile(FILE *fp)
613 RFILE rf;
614 rf.fp = fp;
615 return r_long(&rf);
618 #ifdef HAVE_FSTAT
619 /* Return size of file in bytes; < 0 if unknown. */
620 static off_t
621 getfilesize(FILE *fp)
623 struct stat st;
624 if (fstat(fileno(fp), &st) != 0)
625 return -1;
626 else
627 return st.st_size;
629 #endif
631 /* If we can get the size of the file up-front, and it's reasonably small,
632 * read it in one gulp and delegate to ...FromString() instead. Much quicker
633 * than reading a byte at a time from file; speeds .pyc imports.
634 * CAUTION: since this may read the entire remainder of the file, don't
635 * call it unless you know you're done with the file.
637 PyObject *
638 PyMarshal_ReadLastObjectFromFile(FILE *fp)
640 /* 75% of 2.1's .pyc files can exploit SMALL_FILE_LIMIT.
641 * REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc.
643 #define SMALL_FILE_LIMIT (1L << 14)
644 #define REASONABLE_FILE_LIMIT (1L << 18)
645 #ifdef HAVE_FSTAT
646 off_t filesize;
647 #endif
648 if (PyErr_Occurred()) {
649 fprintf(stderr, "XXX rd_object called with exception set\n");
650 return NULL;
652 #ifdef HAVE_FSTAT
653 filesize = getfilesize(fp);
654 if (filesize > 0) {
655 char buf[SMALL_FILE_LIMIT];
656 char* pBuf = NULL;
657 if (filesize <= SMALL_FILE_LIMIT)
658 pBuf = buf;
659 else if (filesize <= REASONABLE_FILE_LIMIT)
660 pBuf = (char *)PyMem_MALLOC(filesize);
661 if (pBuf != NULL) {
662 PyObject* v;
663 size_t n = fread(pBuf, 1, filesize, fp);
664 v = PyMarshal_ReadObjectFromString(pBuf, n);
665 if (pBuf != buf)
666 PyMem_FREE(pBuf);
667 return v;
671 #endif
672 /* We don't have fstat, or we do but the file is larger than
673 * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
675 return PyMarshal_ReadObjectFromFile(fp);
677 #undef SMALL_FILE_LIMIT
678 #undef REASONABLE_FILE_LIMIT
681 PyObject *
682 PyMarshal_ReadObjectFromFile(FILE *fp)
684 RFILE rf;
685 if (PyErr_Occurred()) {
686 fprintf(stderr, "XXX rd_object called with exception set\n");
687 return NULL;
689 rf.fp = fp;
690 return r_object(&rf);
693 PyObject *
694 PyMarshal_ReadObjectFromString(char *str, int len)
696 RFILE rf;
697 if (PyErr_Occurred()) {
698 fprintf(stderr, "XXX rds_object called with exception set\n");
699 return NULL;
701 rf.fp = NULL;
702 rf.str = NULL;
703 rf.ptr = str;
704 rf.end = str + len;
705 return r_object(&rf);
708 PyObject *
709 PyMarshal_WriteObjectToString(PyObject *x) /* wrs_object() */
711 WFILE wf;
712 wf.fp = NULL;
713 wf.str = PyString_FromStringAndSize((char *)NULL, 50);
714 if (wf.str == NULL)
715 return NULL;
716 wf.ptr = PyString_AS_STRING((PyStringObject *)wf.str);
717 wf.end = wf.ptr + PyString_Size(wf.str);
718 wf.error = 0;
719 wf.depth = 0;
720 w_object(x, &wf);
721 if (wf.str != NULL)
722 _PyString_Resize(&wf.str,
723 (int) (wf.ptr -
724 PyString_AS_STRING((PyStringObject *)wf.str)));
725 if (wf.error) {
726 Py_XDECREF(wf.str);
727 PyErr_SetString(PyExc_ValueError,
728 (wf.error==1)?"unmarshallable object"
729 :"object too deeply nested to marshal");
730 return NULL;
732 return wf.str;
735 /* And an interface for Python programs... */
737 static PyObject *
738 marshal_dump(PyObject *self, PyObject *args)
740 WFILE wf;
741 PyObject *x;
742 PyObject *f;
743 if (!PyArg_ParseTuple(args, "OO:dump", &x, &f))
744 return NULL;
745 if (!PyFile_Check(f)) {
746 PyErr_SetString(PyExc_TypeError,
747 "marshal.dump() 2nd arg must be file");
748 return NULL;
750 wf.fp = PyFile_AsFile(f);
751 wf.str = NULL;
752 wf.ptr = wf.end = NULL;
753 wf.error = 0;
754 wf.depth = 0;
755 w_object(x, &wf);
756 if (wf.error) {
757 PyErr_SetString(PyExc_ValueError,
758 (wf.error==1)?"unmarshallable object"
759 :"object too deeply nested to marshal");
760 return NULL;
762 Py_INCREF(Py_None);
763 return Py_None;
766 static PyObject *
767 marshal_load(PyObject *self, PyObject *args)
769 RFILE rf;
770 PyObject *f;
771 PyObject *v;
772 if (!PyArg_ParseTuple(args, "O:load", &f))
773 return NULL;
774 if (!PyFile_Check(f)) {
775 PyErr_SetString(PyExc_TypeError,
776 "marshal.load() arg must be file");
777 return NULL;
779 rf.fp = PyFile_AsFile(f);
780 rf.str = NULL;
781 rf.ptr = rf.end = NULL;
782 PyErr_Clear();
783 v = r_object(&rf);
784 if (PyErr_Occurred()) {
785 Py_XDECREF(v);
786 v = NULL;
788 return v;
791 static PyObject *
792 marshal_dumps(PyObject *self, PyObject *args)
794 PyObject *x;
795 if (!PyArg_ParseTuple(args, "O:dumps", &x))
796 return NULL;
797 return PyMarshal_WriteObjectToString(x);
800 static PyObject *
801 marshal_loads(PyObject *self, PyObject *args)
803 RFILE rf;
804 PyObject *v;
805 char *s;
806 int n;
807 if (!PyArg_ParseTuple(args, "s#:loads", &s, &n))
808 return NULL;
809 rf.fp = NULL;
810 rf.str = args;
811 rf.ptr = s;
812 rf.end = s + n;
813 PyErr_Clear();
814 v = r_object(&rf);
815 if (PyErr_Occurred()) {
816 Py_XDECREF(v);
817 v = NULL;
819 return v;
822 static PyMethodDef marshal_methods[] = {
823 {"dump", marshal_dump, 1},
824 {"load", marshal_load, 1},
825 {"dumps", marshal_dumps, 1},
826 {"loads", marshal_loads, 1},
827 {NULL, NULL} /* sentinel */
830 void
831 PyMarshal_Init(void)
833 (void) Py_InitModule("marshal", marshal_methods);