1 /**********************************************************************
6 created at: Thu Apr 27 16:30:01 JST 1995
8 Copyright (C) 1993-2007 Yukihiro Matsumoto
10 **********************************************************************/
12 #include "ruby/ruby.h"
15 #include "ruby/util.h"
16 #include "ruby/encoding.h"
26 #define BITSPERSHORT (2*CHAR_BIT)
27 #define SHORTMASK ((1<<BITSPERSHORT)-1)
28 #define SHORTDN(x) RSHIFT(x,BITSPERSHORT)
30 #if SIZEOF_SHORT == SIZEOF_BDIGITS
31 #define SHORTLEN(x) (x)
34 shortlen(long len
, BDIGIT
*ds
)
44 return (len
- 1)*sizeof(BDIGIT
)/2 + offset
;
46 #define SHORTLEN(x) shortlen((x),d)
49 #define MARSHAL_MAJOR 4
50 #define MARSHAL_MINOR 8
54 #define TYPE_FALSE 'F'
55 #define TYPE_FIXNUM 'i'
57 #define TYPE_EXTENDED 'e'
58 #define TYPE_UCLASS 'C'
59 #define TYPE_OBJECT 'o'
61 #define TYPE_USERDEF 'u'
62 #define TYPE_USRMARSHAL 'U'
63 #define TYPE_FLOAT 'f'
64 #define TYPE_BIGNUM 'l'
65 #define TYPE_STRING '"'
66 #define TYPE_REGEXP '/'
67 #define TYPE_ARRAY '['
69 #define TYPE_HASH_DEF '}'
70 #define TYPE_STRUCT 'S'
71 #define TYPE_MODULE_OLD 'M'
72 #define TYPE_CLASS 'c'
73 #define TYPE_MODULE 'm'
75 #define TYPE_SYMBOL ':'
76 #define TYPE_SYMLINK ';'
81 static ID s_dump
, s_load
, s_mdump
, s_mload
;
82 static ID s_dump_data
, s_load_data
, s_alloc
;
83 static ID s_getbyte
, s_read
, s_write
, s_binmode
;
85 ID
rb_id_encoding(void);
90 VALUE (*dumper
)(VALUE
);
91 VALUE (*loader
)(VALUE
, VALUE
);
94 static st_table
*compat_allocator_tbl
;
95 static VALUE compat_allocator_tbl_wrapper
;
98 mark_marshal_compat_i(st_data_t key
, st_data_t value
)
100 marshal_compat_t
*p
= (marshal_compat_t
*)value
;
101 rb_gc_mark(p
->newclass
);
102 rb_gc_mark(p
->oldclass
);
107 mark_marshal_compat_t(void *tbl
)
110 st_foreach(tbl
, mark_marshal_compat_i
, 0);
114 rb_marshal_define_compat(VALUE newclass
, VALUE oldclass
, VALUE (*dumper
)(VALUE
), VALUE (*loader
)(VALUE
, VALUE
))
116 marshal_compat_t
*compat
;
117 rb_alloc_func_t allocator
= rb_get_alloc_func(newclass
);
120 rb_raise(rb_eTypeError
, "no allocator");
123 compat
= ALLOC(marshal_compat_t
);
124 compat
->newclass
= Qnil
;
125 compat
->oldclass
= Qnil
;
126 compat
->newclass
= newclass
;
127 compat
->oldclass
= oldclass
;
128 compat
->dumper
= dumper
;
129 compat
->loader
= loader
;
131 st_insert(compat_allocator_tbl
, (st_data_t
)allocator
, (st_data_t
)compat
);
141 st_table
*compat_tbl
;
146 struct dump_call_arg
{
148 struct dump_arg
*arg
;
153 check_dump_arg(struct dump_arg
*arg
)
155 if (!DATA_PTR(arg
->wrapper
)) {
156 rb_raise(rb_eRuntimeError
, "Marshal.dump reentered");
161 mark_dump_arg(void *ptr
)
163 struct dump_arg
*p
= ptr
;
166 rb_mark_set(p
->data
);
167 rb_mark_hash(p
->compat_tbl
);
171 class2path(VALUE klass
)
173 VALUE path
= rb_class_path(klass
);
174 char *n
= RSTRING_PTR(path
);
177 rb_raise(rb_eTypeError
, "can't dump anonymous %s %s",
178 (TYPE(klass
) == T_CLASS
? "class" : "module"),
181 if (rb_path2class(n
) != rb_class_real(klass
)) {
182 rb_raise(rb_eTypeError
, "%s can't be referred", n
);
187 static void w_long(long, struct dump_arg
*);
190 w_nbyte(const char *s
, int n
, struct dump_arg
*arg
)
192 VALUE buf
= arg
->str
;
193 rb_str_buf_cat(buf
, s
, n
);
194 if (arg
->dest
&& RSTRING_LEN(buf
) >= BUFSIZ
) {
195 if (arg
->taint
) OBJ_TAINT(buf
);
196 if (arg
->untrust
) OBJ_UNTRUST(buf
);
197 rb_io_write(arg
->dest
, buf
);
198 rb_str_resize(buf
, 0);
203 w_byte(char c
, struct dump_arg
*arg
)
209 w_bytes(const char *s
, int n
, struct dump_arg
*arg
)
216 w_short(int x
, struct dump_arg
*arg
)
218 w_byte((char)((x
>> 0) & 0xff), arg
);
219 w_byte((char)((x
>> 8) & 0xff), arg
);
223 w_long(long x
, struct dump_arg
*arg
)
225 char buf
[sizeof(long)+1];
229 if (!(RSHIFT(x
, 31) == 0 || RSHIFT(x
, 31) == -1)) {
230 /* big long does not fit in 4 bytes */
231 rb_raise(rb_eTypeError
, "long too big to dump");
239 if (0 < x
&& x
< 123) {
240 w_byte((char)(x
+ 5), arg
);
243 if (-124 < x
&& x
< 0) {
244 w_byte((char)((x
- 5)&0xff), arg
);
247 for (i
=1;i
<sizeof(long)+1;i
++) {
260 for (i
=0;i
<=len
;i
++) {
266 #define DECIMAL_MANT (53-16) /* from IEEE754 double precision */
268 #if DBL_MANT_DIG > 32
270 #elif DBL_MANT_DIG > 24
272 #elif DBL_MANT_DIG > 16
279 save_mantissa(double d
, char *buf
)
285 d
= modf(ldexp(frexp(fabs(d
), &e
), DECIMAL_MANT
), &d
);
289 d
= modf(ldexp(d
, MANT_BITS
), &n
);
290 m
= (unsigned long)n
;
302 while (!buf
[i
- 1]) --i
;
308 load_mantissa(double d
, const char *buf
, int len
)
310 if (--len
> 0 && !*buf
++) { /* binary mantissa mark */
311 int e
, s
= d
< 0, dig
= 0;
314 modf(ldexp(frexp(fabs(d
), &e
), DECIMAL_MANT
), &d
);
318 default: m
= *buf
++ & 0xff;
320 case 3: m
= (m
<< 8) | (*buf
++ & 0xff);
323 case 2: m
= (m
<< 8) | (*buf
++ & 0xff);
326 case 1: m
= (m
<< 8) | (*buf
++ & 0xff);
329 dig
-= len
< MANT_BITS
/ 8 ? 8 * (unsigned)len
: MANT_BITS
;
330 d
+= ldexp((double)m
, dig
);
331 } while ((len
-= MANT_BITS
/ 8) > 0);
332 d
= ldexp(d
, e
- DECIMAL_MANT
);
338 #define load_mantissa(d, buf, len) (d)
339 #define save_mantissa(d, buf) 0
343 #define FLOAT_DIG (DBL_DIG+2)
349 w_float(double d
, struct dump_arg
*arg
)
351 char buf
[FLOAT_DIG
+ (DECIMAL_MANT
+ 7) / 8 + 10];
354 if (d
< 0) strcpy(buf
, "-inf");
355 else strcpy(buf
, "inf");
361 if (1.0/d
< 0) strcpy(buf
, "-0");
362 else strcpy(buf
, "0");
367 /* xxx: should not use system's sprintf(3) */
368 snprintf(buf
, sizeof(buf
), "%.*g", FLOAT_DIG
, d
);
370 w_bytes(buf
, len
+ save_mantissa(d
, buf
+ len
), arg
);
373 w_bytes(buf
, strlen(buf
), arg
);
377 w_symbol(ID id
, struct dump_arg
*arg
)
382 if (st_lookup(arg
->symbols
, id
, &num
)) {
383 w_byte(TYPE_SYMLINK
, arg
);
384 w_long((long)num
, arg
);
387 sym
= rb_id2name(id
);
389 rb_raise(rb_eTypeError
, "can't dump anonymous ID %ld", id
);
391 w_byte(TYPE_SYMBOL
, arg
);
392 w_bytes(sym
, strlen(sym
), arg
);
393 st_add_direct(arg
->symbols
, id
, arg
->symbols
->num_entries
);
398 w_unique(const char *s
, struct dump_arg
*arg
)
401 rb_raise(rb_eTypeError
, "can't dump anonymous class %s", s
);
403 w_symbol(rb_intern(s
), arg
);
406 static void w_object(VALUE
,struct dump_arg
*,int);
409 hash_each(VALUE key
, VALUE value
, struct dump_call_arg
*arg
)
411 w_object(key
, arg
->arg
, arg
->limit
);
412 w_object(value
, arg
->arg
, arg
->limit
);
417 w_extended(VALUE klass
, struct dump_arg
*arg
, int check
)
421 if (check
&& FL_TEST(klass
, FL_SINGLETON
)) {
422 if (RCLASS_M_TBL(klass
)->num_entries
||
423 (RCLASS_IV_TBL(klass
) && RCLASS_IV_TBL(klass
)->num_entries
> 1)) {
424 rb_raise(rb_eTypeError
, "singleton can't be dumped");
426 klass
= RCLASS_SUPER(klass
);
428 while (BUILTIN_TYPE(klass
) == T_ICLASS
) {
429 path
= rb_class2name(RBASIC(klass
)->klass
);
430 w_byte(TYPE_EXTENDED
, arg
);
432 klass
= RCLASS_SUPER(klass
);
437 w_class(char type
, VALUE obj
, struct dump_arg
*arg
, int check
)
444 if (st_lookup(arg
->compat_tbl
, (st_data_t
)obj
, &real_obj
)) {
445 obj
= (VALUE
)real_obj
;
447 klass
= CLASS_OF(obj
);
448 w_extended(klass
, arg
, check
);
450 p
= class2path(rb_class_real(klass
));
451 path
= RSTRING_PTR(p
);
456 w_uclass(VALUE obj
, VALUE super
, struct dump_arg
*arg
)
458 VALUE klass
= CLASS_OF(obj
);
460 w_extended(klass
, arg
, Qtrue
);
461 klass
= rb_class_real(klass
);
462 if (klass
!= super
) {
463 w_byte(TYPE_UCLASS
, arg
);
464 w_unique(RSTRING_PTR(class2path(klass
)), arg
);
469 w_obj_each(ID id
, VALUE value
, struct dump_call_arg
*arg
)
471 if (id
== rb_id_encoding()) return ST_CONTINUE
;
472 w_symbol(id
, arg
->arg
);
473 w_object(value
, arg
->arg
, arg
->limit
);
478 w_encoding(VALUE obj
, long num
, struct dump_call_arg
*arg
)
480 int encidx
= rb_enc_get_index(obj
);
481 rb_encoding
*enc
= 0;
484 if (encidx
<= 0 || !(enc
= rb_enc_from_index(encidx
))) {
485 w_long(num
, arg
->arg
);
488 w_long(num
+ 1, arg
->arg
);
489 w_symbol(rb_id_encoding(), arg
->arg
);
491 if (!arg
->arg
->encodings
)
492 arg
->arg
->encodings
= st_init_strcasetable();
493 else if (st_lookup(arg
->arg
->encodings
, (st_data_t
)rb_enc_name(enc
), &name
))
495 name
= (st_data_t
)rb_str_new2(rb_enc_name(enc
));
496 st_insert(arg
->arg
->encodings
, (st_data_t
)rb_enc_name(enc
), name
);
498 w_object(name
, arg
->arg
, arg
->limit
);
502 w_ivar(VALUE obj
, st_table
*tbl
, struct dump_call_arg
*arg
)
504 long num
= tbl
? tbl
->num_entries
: 0;
506 w_encoding(obj
, num
, arg
);
508 st_foreach_safe(tbl
, w_obj_each
, (st_data_t
)arg
);
513 w_objivar(VALUE obj
, struct dump_call_arg
*arg
)
518 len
= ROBJECT_NUMIV(obj
);
519 ptr
= ROBJECT_IVPTR(obj
);
521 for (i
= 0; i
< len
; i
++)
522 if (ptr
[i
] != Qundef
)
525 w_encoding(obj
, num
, arg
);
527 rb_ivar_foreach(obj
, w_obj_each
, (st_data_t
)arg
);
532 w_object(VALUE obj
, struct dump_arg
*arg
, int limit
)
534 struct dump_call_arg c_arg
;
538 #define has_ivars(obj, ivtbl) ((ivtbl = rb_generic_ivar_table(obj)) != 0 || \
539 (!SPECIAL_CONST_P(obj) && !ENCODING_IS_ASCII8BIT(obj)))
542 rb_raise(rb_eArgError
, "exceed depth limit");
549 if (st_lookup(arg
->data
, obj
, &num
)) {
550 w_byte(TYPE_LINK
, arg
);
551 w_long((long)num
, arg
);
555 if ((hasiv
= has_ivars(obj
, ivtbl
)) != 0) {
556 w_byte(TYPE_IVAR
, arg
);
559 w_byte(TYPE_NIL
, arg
);
561 else if (obj
== Qtrue
) {
562 w_byte(TYPE_TRUE
, arg
);
564 else if (obj
== Qfalse
) {
565 w_byte(TYPE_FALSE
, arg
);
567 else if (FIXNUM_P(obj
)) {
569 w_byte(TYPE_FIXNUM
, arg
);
570 w_long(FIX2INT(obj
), arg
);
572 if (RSHIFT((long)obj
, 31) == 0 || RSHIFT((long)obj
, 31) == -1) {
573 w_byte(TYPE_FIXNUM
, arg
);
574 w_long(FIX2LONG(obj
), arg
);
577 w_object(rb_int2big(FIX2LONG(obj
)), arg
, limit
);
581 else if (SYMBOL_P(obj
)) {
582 w_symbol(SYM2ID(obj
), arg
);
585 if (OBJ_TAINTED(obj
)) arg
->taint
= Qtrue
;
586 if (OBJ_UNTRUSTED(obj
)) arg
->untrust
= Qtrue
;
588 if (rb_respond_to(obj
, s_mdump
)) {
591 st_add_direct(arg
->data
, obj
, arg
->data
->num_entries
);
593 v
= rb_funcall(obj
, s_mdump
, 0, 0);
595 w_class(TYPE_USRMARSHAL
, obj
, arg
, Qfalse
);
596 w_object(v
, arg
, limit
);
597 if (hasiv
) w_ivar(obj
, 0, &c_arg
);
600 if (rb_respond_to(obj
, s_dump
)) {
602 st_table
*ivtbl2
= 0;
605 v
= rb_funcall(obj
, s_dump
, 1, INT2NUM(limit
));
607 if (TYPE(v
) != T_STRING
) {
608 rb_raise(rb_eTypeError
, "_dump() must return string");
610 if ((hasiv2
= has_ivars(v
, ivtbl2
)) != 0 && !hasiv
) {
611 w_byte(TYPE_IVAR
, arg
);
613 w_class(TYPE_USERDEF
, obj
, arg
, Qfalse
);
614 w_bytes(RSTRING_PTR(v
), RSTRING_LEN(v
), arg
);
616 w_ivar(v
, ivtbl2
, &c_arg
);
619 w_ivar(obj
, ivtbl
, &c_arg
);
621 st_add_direct(arg
->data
, obj
, arg
->data
->num_entries
);
625 st_add_direct(arg
->data
, obj
, arg
->data
->num_entries
);
628 st_data_t compat_data
;
629 rb_alloc_func_t allocator
= rb_get_alloc_func(RBASIC(obj
)->klass
);
630 if (st_lookup(compat_allocator_tbl
,
631 (st_data_t
)allocator
,
633 marshal_compat_t
*compat
= (marshal_compat_t
*)compat_data
;
634 VALUE real_obj
= obj
;
635 obj
= compat
->dumper(real_obj
);
636 st_insert(arg
->compat_tbl
, (st_data_t
)obj
, (st_data_t
)real_obj
);
640 switch (BUILTIN_TYPE(obj
)) {
642 if (FL_TEST(obj
, FL_SINGLETON
)) {
643 rb_raise(rb_eTypeError
, "singleton class can't be dumped");
645 w_byte(TYPE_CLASS
, arg
);
647 volatile VALUE path
= class2path(obj
);
648 w_bytes(RSTRING_PTR(path
), RSTRING_LEN(path
), arg
);
653 w_byte(TYPE_MODULE
, arg
);
655 VALUE path
= class2path(obj
);
656 w_bytes(RSTRING_PTR(path
), RSTRING_LEN(path
), arg
);
661 w_byte(TYPE_FLOAT
, arg
);
662 w_float(RFLOAT_VALUE(obj
), arg
);
666 w_byte(TYPE_BIGNUM
, arg
);
668 char sign
= RBIGNUM_SIGN(obj
) ? '+' : '-';
669 long len
= RBIGNUM_LEN(obj
);
670 BDIGIT
*d
= RBIGNUM_DIGITS(obj
);
673 w_long(SHORTLEN(len
), arg
); /* w_short? */
675 #if SIZEOF_BDIGITS > SIZEOF_SHORT
679 for (i
=0; i
<SIZEOF_BDIGITS
; i
+=SIZEOF_SHORT
) {
680 w_short(num
& SHORTMASK
, arg
);
682 if (len
== 0 && num
== 0) break;
693 w_uclass(obj
, rb_cString
, arg
);
694 w_byte(TYPE_STRING
, arg
);
695 w_bytes(RSTRING_PTR(obj
), RSTRING_LEN(obj
), arg
);
699 w_uclass(obj
, rb_cRegexp
, arg
);
700 w_byte(TYPE_REGEXP
, arg
);
702 int opts
= rb_reg_options(obj
);
703 w_bytes(RREGEXP_SRC_PTR(obj
), RREGEXP_SRC_LEN(obj
), arg
);
704 w_byte((char)opts
, arg
);
709 w_uclass(obj
, rb_cArray
, arg
);
710 w_byte(TYPE_ARRAY
, arg
);
712 long i
, len
= RARRAY_LEN(obj
);
715 for (i
=0; i
<RARRAY_LEN(obj
); i
++) {
716 w_object(RARRAY_PTR(obj
)[i
], arg
, limit
);
717 if (len
!= RARRAY_LEN(obj
)) {
718 rb_raise(rb_eRuntimeError
, "array modified during dump");
725 w_uclass(obj
, rb_cHash
, arg
);
726 if (NIL_P(RHASH(obj
)->ifnone
)) {
727 w_byte(TYPE_HASH
, arg
);
729 else if (FL_TEST(obj
, FL_USER2
)) {
730 /* FL_USER2 means HASH_PROC_DEFAULT (see hash.c) */
731 rb_raise(rb_eTypeError
, "can't dump hash with default proc");
734 w_byte(TYPE_HASH_DEF
, arg
);
736 w_long(RHASH_SIZE(obj
), arg
);
737 rb_hash_foreach(obj
, hash_each
, (st_data_t
)&c_arg
);
738 if (!NIL_P(RHASH(obj
)->ifnone
)) {
739 w_object(RHASH(obj
)->ifnone
, arg
, limit
);
744 w_class(TYPE_STRUCT
, obj
, arg
, Qtrue
);
746 long len
= RSTRUCT_LEN(obj
);
751 mem
= rb_struct_members(obj
);
752 for (i
=0; i
<len
; i
++) {
753 w_symbol(SYM2ID(RARRAY_PTR(mem
)[i
]), arg
);
754 w_object(RSTRUCT_PTR(obj
)[i
], arg
, limit
);
760 w_class(TYPE_OBJECT
, obj
, arg
, Qtrue
);
761 w_objivar(obj
, &c_arg
);
768 if (!rb_respond_to(obj
, s_dump_data
)) {
769 rb_raise(rb_eTypeError
,
770 "no marshal_dump is defined for class %s",
771 rb_obj_classname(obj
));
773 v
= rb_funcall(obj
, s_dump_data
, 0);
775 w_class(TYPE_DATA
, obj
, arg
, Qtrue
);
776 w_object(v
, arg
, limit
);
781 rb_raise(rb_eTypeError
, "can't dump %s",
782 rb_obj_classname(obj
));
787 w_ivar(obj
, ivtbl
, &c_arg
);
792 dump(struct dump_call_arg
*arg
)
794 w_object(arg
->obj
, arg
->arg
, arg
->limit
);
795 if (arg
->arg
->dest
) {
796 rb_io_write(arg
->arg
->dest
, arg
->arg
->str
);
797 rb_str_resize(arg
->arg
->str
, 0);
803 dump_ensure(struct dump_arg
*arg
)
805 if (!DATA_PTR(arg
->wrapper
)) return 0;
806 st_free_table(arg
->symbols
);
807 st_free_table(arg
->data
);
808 st_free_table(arg
->compat_tbl
);
809 if (arg
->encodings
) st_free_table(arg
->encodings
);
810 DATA_PTR(arg
->wrapper
) = 0;
816 OBJ_UNTRUST(arg
->str
);
823 * dump( obj [, anIO] , limit=--1 ) => anIO
825 * Serializes obj and all descendent objects. If anIO is
826 * specified, the serialized data will be written to it, otherwise the
827 * data will be returned as a String. If limit is specified, the
828 * traversal of subobjects will be limited to that depth. If limit is
829 * negative, no checking of depth will be performed.
832 * def initialize(str)
840 * (produces no output)
842 * o = Klass.new("hello\n")
843 * data = Marshal.dump(o)
844 * obj = Marshal.load(data)
845 * obj.sayHello #=> "hello\n"
848 marshal_dump(int argc
, VALUE
*argv
)
850 VALUE obj
, port
, a1
, a2
;
853 struct dump_call_arg c_arg
;
856 rb_scan_args(argc
, argv
, "12", &obj
, &a1
, &a2
);
858 if (!NIL_P(a2
)) limit
= NUM2INT(a2
);
859 if (NIL_P(a1
)) goto type_error
;
862 else if (argc
== 2) {
863 if (FIXNUM_P(a1
)) limit
= FIX2INT(a1
);
864 else if (NIL_P(a1
)) goto type_error
;
869 if (!rb_respond_to(port
, s_write
)) {
871 rb_raise(rb_eTypeError
, "instance of IO needed");
873 arg
.str
= rb_str_buf_new(0);
875 if (rb_respond_to(port
, s_binmode
)) {
876 rb_funcall2(port
, s_binmode
, 0, 0);
880 port
= rb_str_buf_new(0);
884 arg
.symbols
= st_init_numtable();
885 arg
.data
= st_init_numtable();
887 arg
.untrust
= Qfalse
;
888 arg
.compat_tbl
= st_init_numtable();
889 arg
.wrapper
= Data_Wrap_Struct(rb_cData
, mark_dump_arg
, 0, &arg
);
895 w_byte(MARSHAL_MAJOR
, &arg
);
896 w_byte(MARSHAL_MINOR
, &arg
);
898 rb_ensure(dump
, (VALUE
)&c_arg
, dump_ensure
, (VALUE
)&arg
);
911 st_table
*compat_tbl
;
912 VALUE compat_tbl_wrapper
;
916 check_load_arg(struct load_arg
*arg
)
918 if (!DATA_PTR(arg
->compat_tbl_wrapper
)) {
919 rb_raise(rb_eRuntimeError
, "Marshal.load reentered");
923 static VALUE
r_entry(VALUE v
, struct load_arg
*arg
);
924 static VALUE
r_object(struct load_arg
*arg
);
925 static VALUE
path2class(const char *path
);
928 r_byte(struct load_arg
*arg
)
932 if (TYPE(arg
->src
) == T_STRING
) {
933 if (RSTRING_LEN(arg
->src
) > arg
->offset
) {
934 c
= (unsigned char)RSTRING_PTR(arg
->src
)[arg
->offset
++];
937 rb_raise(rb_eArgError
, "marshal data too short");
941 VALUE src
= arg
->src
;
942 VALUE v
= rb_funcall2(src
, s_getbyte
, 0, 0);
944 if (NIL_P(v
)) rb_eof_error();
945 c
= (unsigned char)NUM2CHR(v
);
951 long_toobig(int size
)
953 rb_raise(rb_eTypeError
, "long too big for this architecture (size "
954 STRINGIZE(SIZEOF_LONG
)", given %d)", size
);
957 #undef SIGN_EXTEND_CHAR
959 # define SIGN_EXTEND_CHAR(c) ((signed char)(c))
960 #else /* not __STDC__ */
961 /* As in Harbison and Steele. */
962 # define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
966 r_long(struct load_arg
*arg
)
969 int c
= SIGN_EXTEND_CHAR(r_byte(arg
));
972 if (c
== 0) return 0;
974 if (4 < c
&& c
< 128) {
977 if (c
> sizeof(long)) long_toobig(c
);
980 x
|= (long)r_byte(arg
) << (8*i
);
984 if (-129 < c
&& c
< -4) {
988 if (c
> sizeof(long)) long_toobig(c
);
991 x
&= ~((long)0xff << (8*i
));
992 x
|= (long)r_byte(arg
) << (8*i
);
998 #define r_bytes(arg) r_bytes0(r_long(arg), (arg))
1001 r_bytes0(long len
, struct load_arg
*arg
)
1005 if (len
== 0) return rb_str_new(0, 0);
1006 if (TYPE(arg
->src
) == T_STRING
) {
1007 if (RSTRING_LEN(arg
->src
) - arg
->offset
>= len
) {
1008 str
= rb_str_new(RSTRING_PTR(arg
->src
)+arg
->offset
, len
);
1013 rb_raise(rb_eArgError
, "marshal data too short");
1017 VALUE src
= arg
->src
;
1018 VALUE n
= LONG2NUM(len
);
1019 str
= rb_funcall2(src
, s_read
, 1, &n
);
1020 check_load_arg(arg
);
1021 if (NIL_P(str
)) goto too_short
;
1023 if (RSTRING_LEN(str
) != len
) goto too_short
;
1024 if (OBJ_TAINTED(str
)) arg
->taint
= Qtrue
;
1025 if (OBJ_UNTRUSTED(str
)) arg
->untrust
= Qtrue
;
1031 r_symlink(struct load_arg
*arg
)
1034 long num
= r_long(arg
);
1036 if (st_lookup(arg
->symbols
, num
, &id
)) {
1039 rb_raise(rb_eArgError
, "bad symbol");
1043 r_symreal(struct load_arg
*arg
)
1045 volatile VALUE s
= r_bytes(arg
);
1046 ID id
= rb_intern(RSTRING_PTR(s
));
1048 st_insert(arg
->symbols
, arg
->symbols
->num_entries
, id
);
1054 r_symbol(struct load_arg
*arg
)
1058 switch ((type
= r_byte(arg
))) {
1060 return r_symreal(arg
);
1062 return r_symlink(arg
);
1064 rb_raise(rb_eArgError
, "dump format error(0x%x)", type
);
1070 r_unique(struct load_arg
*arg
)
1072 return rb_id2name(r_symbol(arg
));
1076 r_string(struct load_arg
*arg
)
1078 return r_bytes(arg
);
1082 r_entry(VALUE v
, struct load_arg
*arg
)
1084 st_data_t real_obj
= (VALUE
)Qundef
;
1085 if (st_lookup(arg
->compat_tbl
, v
, &real_obj
)) {
1086 rb_hash_aset(arg
->data
, INT2FIX(RHASH_SIZE(arg
->data
)), (VALUE
)real_obj
);
1089 rb_hash_aset(arg
->data
, INT2FIX(RHASH_SIZE(arg
->data
)), v
);
1093 if ((VALUE
)real_obj
!= Qundef
)
1094 OBJ_TAINT((VALUE
)real_obj
);
1098 if ((VALUE
)real_obj
!= Qundef
)
1099 OBJ_UNTRUST((VALUE
)real_obj
);
1105 r_leave(VALUE v
, struct load_arg
*arg
)
1108 if (st_lookup(arg
->compat_tbl
, v
, &data
)) {
1109 VALUE real_obj
= (VALUE
)data
;
1110 rb_alloc_func_t allocator
= rb_get_alloc_func(CLASS_OF(real_obj
));
1112 if (st_lookup(compat_allocator_tbl
, (st_data_t
)allocator
, &data
)) {
1113 marshal_compat_t
*compat
= (marshal_compat_t
*)data
;
1114 compat
->loader(real_obj
, v
);
1116 st_delete(arg
->compat_tbl
, &key
, 0);
1120 v
= rb_funcall(arg
->proc
, rb_intern("call"), 1, v
);
1121 check_load_arg(arg
);
1127 r_ivar(VALUE obj
, struct load_arg
*arg
)
1134 ID id
= r_symbol(arg
);
1135 VALUE val
= r_object(arg
);
1136 if (id
== rb_id_encoding()) {
1137 int idx
= rb_enc_find_index(StringValueCStr(val
));
1138 if (idx
> 0) rb_enc_associate_index(obj
, idx
);
1141 rb_ivar_set(obj
, id
, val
);
1148 path2class(const char *path
)
1150 VALUE v
= rb_path2class(path
);
1152 if (TYPE(v
) != T_CLASS
) {
1153 rb_raise(rb_eArgError
, "%s does not refer class", path
);
1159 path2module(const char *path
)
1161 VALUE v
= rb_path2class(path
);
1163 if (TYPE(v
) != T_MODULE
) {
1164 rb_raise(rb_eArgError
, "%s does not refer module", path
);
1170 obj_alloc_by_path(const char *path
, struct load_arg
*arg
)
1174 rb_alloc_func_t allocator
;
1176 klass
= path2class(path
);
1178 allocator
= rb_get_alloc_func(klass
);
1179 if (st_lookup(compat_allocator_tbl
, (st_data_t
)allocator
, &data
)) {
1180 marshal_compat_t
*compat
= (marshal_compat_t
*)data
;
1181 VALUE real_obj
= rb_obj_alloc(klass
);
1182 VALUE obj
= rb_obj_alloc(compat
->oldclass
);
1183 st_insert(arg
->compat_tbl
, (st_data_t
)obj
, (st_data_t
)real_obj
);
1187 return rb_obj_alloc(klass
);
1191 r_object0(struct load_arg
*arg
, int *ivp
, VALUE extmod
)
1194 int type
= r_byte(arg
);
1200 v
= rb_hash_aref(arg
->data
, LONG2FIX(id
));
1201 check_load_arg(arg
);
1203 rb_raise(rb_eArgError
, "dump format error (unlinked)");
1206 v
= rb_funcall(arg
->proc
, rb_intern("call"), 1, v
);
1207 check_load_arg(arg
);
1215 v
= r_object0(arg
, &ivar
, extmod
);
1216 if (ivar
) r_ivar(v
, arg
);
1222 VALUE m
= path2module(r_unique(arg
));
1224 if (NIL_P(extmod
)) extmod
= rb_ary_new2(0);
1225 rb_ary_push(extmod
, m
);
1227 v
= r_object0(arg
, 0, extmod
);
1228 while (RARRAY_LEN(extmod
) > 0) {
1229 m
= rb_ary_pop(extmod
);
1230 rb_extend_object(v
, m
);
1237 VALUE c
= path2class(r_unique(arg
));
1239 if (FL_TEST(c
, FL_SINGLETON
)) {
1240 rb_raise(rb_eTypeError
, "singleton can't be loaded");
1242 v
= r_object0(arg
, 0, extmod
);
1243 if (rb_special_const_p(v
) || TYPE(v
) == T_OBJECT
|| TYPE(v
) == T_CLASS
) {
1245 rb_raise(rb_eArgError
, "dump format error (user class)");
1247 if (TYPE(v
) == T_MODULE
|| !RTEST(rb_class_inherited_p(c
, RBASIC(v
)->klass
))) {
1248 VALUE tmp
= rb_obj_alloc(c
);
1250 if (TYPE(v
) != TYPE(tmp
)) goto format_error
;
1252 RBASIC(v
)->klass
= c
;
1258 v
= r_leave(v
, arg
);
1263 v
= r_leave(v
, arg
);
1268 v
= r_leave(v
, arg
);
1273 long i
= r_long(arg
);
1276 v
= r_leave(v
, arg
);
1282 VALUE str
= r_bytes(arg
);
1283 const char *ptr
= RSTRING_PTR(str
);
1285 if (strcmp(ptr
, "nan") == 0) {
1288 else if (strcmp(ptr
, "inf") == 0) {
1291 else if (strcmp(ptr
, "-inf") == 0) {
1296 d
= strtod(ptr
, &e
);
1297 d
= load_mantissa(d
, e
, RSTRING_LEN(str
) - (e
- ptr
));
1300 v
= r_entry(v
, arg
);
1301 v
= r_leave(v
, arg
);
1309 volatile VALUE data
;
1311 NEWOBJ(big
, struct RBignum
);
1312 OBJSETUP(big
, rb_cBignum
, T_BIGNUM
);
1313 RBIGNUM_SET_SIGN(big
, (r_byte(arg
) == '+'));
1315 data
= r_bytes0(len
* 2, arg
);
1316 #if SIZEOF_BDIGITS == SIZEOF_SHORT
1317 rb_big_resize((VALUE
)big
, len
);
1319 rb_big_resize((VALUE
)big
, (len
+ 1) * 2 / sizeof(BDIGIT
));
1321 digits
= RBIGNUM_DIGITS(big
);
1322 MEMCPY(digits
, RSTRING_PTR(data
), char, len
* 2);
1323 #if SIZEOF_BDIGITS > SIZEOF_SHORT
1324 MEMZERO((char *)digits
+ len
* 2, char,
1325 RBIGNUM_LEN(big
) * sizeof(BDIGIT
) - len
* 2);
1327 len
= RBIGNUM_LEN(big
);
1329 unsigned char *p
= (unsigned char *)digits
;
1331 #if SIZEOF_BDIGITS > SIZEOF_SHORT
1335 for (i
=0; i
<SIZEOF_BDIGITS
; i
++) {
1336 num
|= (int)p
[i
] << shift
;
1340 num
= p
[0] | (p
[1] << 8);
1345 v
= rb_big_norm((VALUE
)big
);
1346 v
= r_entry(v
, arg
);
1347 v
= r_leave(v
, arg
);
1352 v
= r_entry(r_string(arg
), arg
);
1353 v
= r_leave(v
, arg
);
1358 volatile VALUE str
= r_bytes(arg
);
1359 int options
= r_byte(arg
);
1360 v
= r_entry(rb_reg_new_str(str
, options
), arg
);
1361 v
= r_leave(v
, arg
);
1367 volatile long len
= r_long(arg
); /* gcc 2.7.2.3 -O2 bug?? */
1369 v
= rb_ary_new2(len
);
1370 v
= r_entry(v
, arg
);
1372 rb_ary_push(v
, r_object(arg
));
1374 v
= r_leave(v
, arg
);
1381 long len
= r_long(arg
);
1384 v
= r_entry(v
, arg
);
1386 VALUE key
= r_object(arg
);
1387 VALUE value
= r_object(arg
);
1388 rb_hash_aset(v
, key
, value
);
1390 if (type
== TYPE_HASH_DEF
) {
1391 RHASH(v
)->ifnone
= r_object(arg
);
1393 v
= r_leave(v
, arg
);
1401 volatile long i
; /* gcc 2.7.2.3 -O2 bug?? */
1405 klass
= path2class(r_unique(arg
));
1408 v
= rb_obj_alloc(klass
);
1409 if (TYPE(v
) != T_STRUCT
) {
1410 rb_raise(rb_eTypeError
, "class %s not a struct", rb_class2name(klass
));
1412 mem
= rb_struct_s_members(klass
);
1413 if (RARRAY_LEN(mem
) != len
) {
1414 rb_raise(rb_eTypeError
, "struct %s not compatible (struct size differs)",
1415 rb_class2name(klass
));
1418 v
= r_entry(v
, arg
);
1419 values
= rb_ary_new2(len
);
1420 for (i
=0; i
<len
; i
++) {
1421 slot
= r_symbol(arg
);
1423 if (RARRAY_PTR(mem
)[i
] != ID2SYM(slot
)) {
1424 rb_raise(rb_eTypeError
, "struct %s not compatible (:%s for :%s)",
1425 rb_class2name(klass
),
1427 rb_id2name(SYM2ID(RARRAY_PTR(mem
)[i
])));
1429 rb_ary_push(values
, r_object(arg
));
1431 rb_struct_initialize(v
, values
);
1432 v
= r_leave(v
, arg
);
1438 VALUE klass
= path2class(r_unique(arg
));
1441 if (!rb_respond_to(klass
, s_load
)) {
1442 rb_raise(rb_eTypeError
, "class %s needs to have method `_load'",
1443 rb_class2name(klass
));
1445 data
= r_string(arg
);
1450 v
= rb_funcall(klass
, s_load
, 1, data
);
1451 check_load_arg(arg
);
1452 v
= r_entry(v
, arg
);
1453 v
= r_leave(v
, arg
);
1457 case TYPE_USRMARSHAL
:
1459 VALUE klass
= path2class(r_unique(arg
));
1462 v
= rb_obj_alloc(klass
);
1463 if (!NIL_P(extmod
)) {
1464 while (RARRAY_LEN(extmod
) > 0) {
1465 VALUE m
= rb_ary_pop(extmod
);
1466 rb_extend_object(v
, m
);
1469 if (!rb_respond_to(v
, s_mload
)) {
1470 rb_raise(rb_eTypeError
, "instance of %s needs to have method `marshal_load'",
1471 rb_class2name(klass
));
1473 v
= r_entry(v
, arg
);
1474 data
= r_object(arg
);
1475 rb_funcall(v
, s_mload
, 1, data
);
1476 check_load_arg(arg
);
1477 v
= r_leave(v
, arg
);
1483 v
= obj_alloc_by_path(r_unique(arg
), arg
);
1484 if (TYPE(v
) != T_OBJECT
) {
1485 rb_raise(rb_eArgError
, "dump format error");
1487 v
= r_entry(v
, arg
);
1489 v
= r_leave(v
, arg
);
1495 VALUE klass
= path2class(r_unique(arg
));
1496 if (rb_respond_to(klass
, s_alloc
)) {
1497 static int warn
= Qtrue
;
1499 rb_warn("define `allocate' instead of `_alloc'");
1502 v
= rb_funcall(klass
, s_alloc
, 0);
1503 check_load_arg(arg
);
1506 v
= rb_obj_alloc(klass
);
1508 if (TYPE(v
) != T_DATA
) {
1509 rb_raise(rb_eArgError
, "dump format error");
1511 v
= r_entry(v
, arg
);
1512 if (!rb_respond_to(v
, s_load_data
)) {
1513 rb_raise(rb_eTypeError
,
1514 "class %s needs to have instance method `_load_data'",
1515 rb_class2name(klass
));
1517 rb_funcall(v
, s_load_data
, 1, r_object0(arg
, 0, extmod
));
1518 check_load_arg(arg
);
1519 v
= r_leave(v
, arg
);
1523 case TYPE_MODULE_OLD
:
1525 volatile VALUE str
= r_bytes(arg
);
1527 v
= rb_path2class(RSTRING_PTR(str
));
1528 v
= r_entry(v
, arg
);
1529 v
= r_leave(v
, arg
);
1535 volatile VALUE str
= r_bytes(arg
);
1537 v
= path2class(RSTRING_PTR(str
));
1538 v
= r_entry(v
, arg
);
1539 v
= r_leave(v
, arg
);
1545 volatile VALUE str
= r_bytes(arg
);
1547 v
= path2module(RSTRING_PTR(str
));
1548 v
= r_entry(v
, arg
);
1549 v
= r_leave(v
, arg
);
1554 v
= ID2SYM(r_symreal(arg
));
1555 v
= r_leave(v
, arg
);
1559 v
= ID2SYM(r_symlink(arg
));
1563 rb_raise(rb_eArgError
, "dump format error(0x%x)", type
);
1570 r_object(struct load_arg
*arg
)
1572 return r_object0(arg
, 0, Qnil
);
1576 load(struct load_arg
*arg
)
1578 return r_object(arg
);
1582 load_ensure(struct load_arg
*arg
)
1584 if (!DATA_PTR(arg
->compat_tbl_wrapper
)) return 0;
1585 st_free_table(arg
->symbols
);
1586 st_free_table(arg
->compat_tbl
);
1587 DATA_PTR(arg
->compat_tbl_wrapper
) = 0;
1588 arg
->compat_tbl_wrapper
= 0;
1594 * load( source [, proc] ) => obj
1595 * restore( source [, proc] ) => obj
1597 * Returns the result of converting the serialized data in source into a
1598 * Ruby object (possibly with associated subordinate objects). source
1599 * may be either an instance of IO or an object that responds to
1600 * to_str. If proc is specified, it will be passed each object as it
1604 marshal_load(int argc
, VALUE
*argv
)
1609 struct load_arg arg
;
1611 rb_scan_args(argc
, argv
, "11", &port
, &proc
);
1612 v
= rb_check_string_type(port
);
1614 arg
.taint
= OBJ_TAINTED(port
); /* original taintedness */
1617 else if (rb_respond_to(port
, s_getbyte
) && rb_respond_to(port
, s_read
)) {
1618 if (rb_respond_to(port
, s_binmode
)) {
1619 rb_funcall2(port
, s_binmode
, 0, 0);
1624 rb_raise(rb_eTypeError
, "instance of IO needed");
1628 arg
.compat_tbl
= st_init_numtable();
1629 arg
.compat_tbl_wrapper
= Data_Wrap_Struct(rb_cData
, rb_mark_tbl
, 0, arg
.compat_tbl
);
1631 major
= r_byte(&arg
);
1632 minor
= r_byte(&arg
);
1633 if (major
!= MARSHAL_MAJOR
|| minor
> MARSHAL_MINOR
) {
1634 rb_raise(rb_eTypeError
, "incompatible marshal file format (can't be read)\n\
1635 \tformat version %d.%d required; %d.%d given",
1636 MARSHAL_MAJOR
, MARSHAL_MINOR
, major
, minor
);
1638 if (RTEST(ruby_verbose
) && minor
!= MARSHAL_MINOR
) {
1639 rb_warn("incompatible marshal file format (can be read)\n\
1640 \tformat version %d.%d required; %d.%d given",
1641 MARSHAL_MAJOR
, MARSHAL_MINOR
, major
, minor
);
1644 arg
.symbols
= st_init_numtable();
1645 arg
.data
= rb_hash_new();
1646 RBASIC(arg
.data
)->klass
= 0;
1647 if (NIL_P(proc
)) arg
.proc
= 0;
1648 else arg
.proc
= proc
;
1649 v
= rb_ensure(load
, (VALUE
)&arg
, load_ensure
, (VALUE
)&arg
);
1655 * The marshaling library converts collections of Ruby objects into a
1656 * byte stream, allowing them to be stored outside the currently
1657 * active script. This data may subsequently be read and the original
1658 * objects reconstituted.
1659 * Marshaled data has major and minor version numbers stored along
1660 * with the object information. In normal use, marshaling can only
1661 * load data written with the same major version number and an equal
1662 * or lower minor version number. If Ruby's ``verbose'' flag is set
1663 * (normally using -d, -v, -w, or --verbose) the major and minor
1664 * numbers must match exactly. Marshal versioning is independent of
1665 * Ruby's version numbers. You can extract the version by reading the
1666 * first two bytes of marshaled data.
1668 * str = Marshal.dump("thing")
1669 * RUBY_VERSION #=> "1.9.0"
1673 * Some objects cannot be dumped: if the objects to be dumped include
1674 * bindings, procedure or method objects, instances of class IO, or
1675 * singleton objects, a TypeError will be raised.
1676 * If your class has special serialization needs (for example, if you
1677 * want to serialize in some specific format), or if it contains
1678 * objects that would otherwise not be serializable, you can implement
1679 * your own serialization strategy by defining two methods, _dump and
1681 * The instance method _dump should return a String object containing
1682 * all the information necessary to reconstitute objects of this class
1683 * and all referenced objects up to a maximum depth given as an integer
1684 * parameter (a value of -1 implies that you should disable depth checking).
1685 * The class method _load should take a String and return an object of this class.
1692 VALUE rb_mMarshal
= rb_define_module("Marshal");
1694 s_dump
= rb_intern("_dump");
1695 s_load
= rb_intern("_load");
1696 s_mdump
= rb_intern("marshal_dump");
1697 s_mload
= rb_intern("marshal_load");
1698 s_dump_data
= rb_intern("_dump_data");
1699 s_load_data
= rb_intern("_load_data");
1700 s_alloc
= rb_intern("_alloc");
1701 s_getbyte
= rb_intern("getbyte");
1702 s_read
= rb_intern("read");
1703 s_write
= rb_intern("write");
1704 s_binmode
= rb_intern("binmode");
1706 rb_define_module_function(rb_mMarshal
, "dump", marshal_dump
, -1);
1707 rb_define_module_function(rb_mMarshal
, "load", marshal_load
, -1);
1708 rb_define_module_function(rb_mMarshal
, "restore", marshal_load
, -1);
1710 rb_define_const(rb_mMarshal
, "MAJOR_VERSION", INT2FIX(MARSHAL_MAJOR
));
1711 rb_define_const(rb_mMarshal
, "MINOR_VERSION", INT2FIX(MARSHAL_MINOR
));
1713 compat_allocator_tbl
= st_init_numtable();
1714 rb_gc_register_address(&compat_allocator_tbl_wrapper
);
1715 compat_allocator_tbl_wrapper
=
1716 Data_Wrap_Struct(rb_cData
, mark_marshal_compat_t
, 0, compat_allocator_tbl
);
1720 rb_marshal_dump(VALUE obj
, VALUE port
)
1727 if (!NIL_P(port
)) argc
= 2;
1728 return marshal_dump(argc
, argv
);
1732 rb_marshal_load(VALUE port
)
1734 return marshal_load(1, &port
);