1 /**********************************************************************
6 created at: Thu Apr 27 16:30:01 JST 1995
8 Copyright (C) 1993-2007 Yukihiro Matsumoto
10 **********************************************************************/
12 #include "ruby/ruby.h"
15 #include "ruby/util.h"
16 #include "ruby/encoding.h"
26 #define BITSPERSHORT (2*CHAR_BIT)
27 #define SHORTMASK ((1<<BITSPERSHORT)-1)
28 #define SHORTDN(x) RSHIFT(x,BITSPERSHORT)
30 #if SIZEOF_SHORT == SIZEOF_BDIGITS
31 #define SHORTLEN(x) (x)
34 shortlen(long len
, BDIGIT
*ds
)
44 return (len
- 1)*sizeof(BDIGIT
)/2 + offset
;
46 #define SHORTLEN(x) shortlen((x),d)
49 #define MARSHAL_MAJOR 4
50 #define MARSHAL_MINOR 8
54 #define TYPE_FALSE 'F'
55 #define TYPE_FIXNUM 'i'
57 #define TYPE_EXTENDED 'e'
58 #define TYPE_UCLASS 'C'
59 #define TYPE_OBJECT 'o'
61 #define TYPE_USERDEF 'u'
62 #define TYPE_USRMARSHAL 'U'
63 #define TYPE_FLOAT 'f'
64 #define TYPE_BIGNUM 'l'
65 #define TYPE_STRING '"'
66 #define TYPE_REGEXP '/'
67 #define TYPE_ARRAY '['
69 #define TYPE_HASH_DEF '}'
70 #define TYPE_STRUCT 'S'
71 #define TYPE_MODULE_OLD 'M'
72 #define TYPE_CLASS 'c'
73 #define TYPE_MODULE 'm'
75 #define TYPE_SYMBOL ':'
76 #define TYPE_SYMLINK ';'
81 static ID s_dump
, s_load
, s_mdump
, s_mload
;
82 static ID s_dump_data
, s_load_data
, s_alloc
;
83 static ID s_getbyte
, s_read
, s_write
, s_binmode
;
85 ID
rb_id_encoding(void);
90 VALUE (*dumper
)(VALUE
);
91 VALUE (*loader
)(VALUE
, VALUE
);
94 static st_table
*compat_allocator_tbl
;
95 static VALUE compat_allocator_tbl_wrapper
;
98 mark_marshal_compat_i(st_data_t key
, st_data_t value
)
100 marshal_compat_t
*p
= (marshal_compat_t
*)value
;
101 rb_gc_mark(p
->newclass
);
102 rb_gc_mark(p
->oldclass
);
107 mark_marshal_compat_t(void *tbl
)
110 st_foreach(tbl
, mark_marshal_compat_i
, 0);
114 rb_marshal_define_compat(VALUE newclass
, VALUE oldclass
, VALUE (*dumper
)(VALUE
), VALUE (*loader
)(VALUE
, VALUE
))
116 marshal_compat_t
*compat
;
117 rb_alloc_func_t allocator
= rb_get_alloc_func(newclass
);
120 rb_raise(rb_eTypeError
, "no allocator");
123 compat
= ALLOC(marshal_compat_t
);
124 compat
->newclass
= Qnil
;
125 compat
->oldclass
= Qnil
;
126 compat
->newclass
= newclass
;
127 compat
->oldclass
= oldclass
;
128 compat
->dumper
= dumper
;
129 compat
->loader
= loader
;
131 st_insert(compat_allocator_tbl
, (st_data_t
)allocator
, (st_data_t
)compat
);
140 st_table
*compat_tbl
;
145 struct dump_call_arg
{
147 struct dump_arg
*arg
;
152 mark_dump_arg(void *ptr
)
154 struct dump_arg
*p
= ptr
;
157 rb_mark_set(p
->data
);
158 rb_mark_hash(p
->compat_tbl
);
162 class2path(VALUE klass
)
164 VALUE path
= rb_class_path(klass
);
165 char *n
= RSTRING_PTR(path
);
168 rb_raise(rb_eTypeError
, "can't dump anonymous %s %s",
169 (TYPE(klass
) == T_CLASS
? "class" : "module"),
172 if (rb_path2class(n
) != rb_class_real(klass
)) {
173 rb_raise(rb_eTypeError
, "%s can't be referred", n
);
178 static void w_long(long, struct dump_arg
*);
181 w_nbyte(const char *s
, int n
, struct dump_arg
*arg
)
183 VALUE buf
= arg
->str
;
184 rb_str_buf_cat(buf
, s
, n
);
185 if (arg
->dest
&& RSTRING_LEN(buf
) >= BUFSIZ
) {
186 if (arg
->taint
) OBJ_TAINT(buf
);
187 rb_io_write(arg
->dest
, buf
);
188 rb_str_resize(buf
, 0);
193 w_byte(char c
, struct dump_arg
*arg
)
199 w_bytes(const char *s
, int n
, struct dump_arg
*arg
)
206 w_short(int x
, struct dump_arg
*arg
)
208 w_byte((char)((x
>> 0) & 0xff), arg
);
209 w_byte((char)((x
>> 8) & 0xff), arg
);
213 w_long(long x
, struct dump_arg
*arg
)
215 char buf
[sizeof(long)+1];
219 if (!(RSHIFT(x
, 31) == 0 || RSHIFT(x
, 31) == -1)) {
220 /* big long does not fit in 4 bytes */
221 rb_raise(rb_eTypeError
, "long too big to dump");
229 if (0 < x
&& x
< 123) {
230 w_byte((char)(x
+ 5), arg
);
233 if (-124 < x
&& x
< 0) {
234 w_byte((char)((x
- 5)&0xff), arg
);
237 for (i
=1;i
<sizeof(long)+1;i
++) {
250 for (i
=0;i
<=len
;i
++) {
256 #define DECIMAL_MANT (53-16) /* from IEEE754 double precision */
258 #if DBL_MANT_DIG > 32
260 #elif DBL_MANT_DIG > 24
262 #elif DBL_MANT_DIG > 16
269 save_mantissa(double d
, char *buf
)
275 d
= modf(ldexp(frexp(fabs(d
), &e
), DECIMAL_MANT
), &d
);
279 d
= modf(ldexp(d
, MANT_BITS
), &n
);
280 m
= (unsigned long)n
;
292 while (!buf
[i
- 1]) --i
;
298 load_mantissa(double d
, const char *buf
, int len
)
300 if (--len
> 0 && !*buf
++) { /* binary mantissa mark */
301 int e
, s
= d
< 0, dig
= 0;
304 modf(ldexp(frexp(fabs(d
), &e
), DECIMAL_MANT
), &d
);
308 default: m
= *buf
++ & 0xff;
310 case 3: m
= (m
<< 8) | (*buf
++ & 0xff);
313 case 2: m
= (m
<< 8) | (*buf
++ & 0xff);
316 case 1: m
= (m
<< 8) | (*buf
++ & 0xff);
319 dig
-= len
< MANT_BITS
/ 8 ? 8 * (unsigned)len
: MANT_BITS
;
320 d
+= ldexp((double)m
, dig
);
321 } while ((len
-= MANT_BITS
/ 8) > 0);
322 d
= ldexp(d
, e
- DECIMAL_MANT
);
328 #define load_mantissa(d, buf, len) (d)
329 #define save_mantissa(d, buf) 0
333 #define FLOAT_DIG (DBL_DIG+2)
339 w_float(double d
, struct dump_arg
*arg
)
341 char buf
[FLOAT_DIG
+ (DECIMAL_MANT
+ 7) / 8 + 10];
344 if (d
< 0) strcpy(buf
, "-inf");
345 else strcpy(buf
, "inf");
351 if (1.0/d
< 0) strcpy(buf
, "-0");
352 else strcpy(buf
, "0");
357 /* xxx: should not use system's sprintf(3) */
358 snprintf(buf
, sizeof(buf
), "%.*g", FLOAT_DIG
, d
);
360 w_bytes(buf
, len
+ save_mantissa(d
, buf
+ len
), arg
);
363 w_bytes(buf
, strlen(buf
), arg
);
367 w_symbol(ID id
, struct dump_arg
*arg
)
372 if (st_lookup(arg
->symbols
, id
, &num
)) {
373 w_byte(TYPE_SYMLINK
, arg
);
374 w_long((long)num
, arg
);
377 sym
= rb_id2name(id
);
379 rb_raise(rb_eTypeError
, "can't dump anonymous ID %ld", id
);
381 w_byte(TYPE_SYMBOL
, arg
);
382 w_bytes(sym
, strlen(sym
), arg
);
383 st_add_direct(arg
->symbols
, id
, arg
->symbols
->num_entries
);
388 w_unique(const char *s
, struct dump_arg
*arg
)
391 rb_raise(rb_eTypeError
, "can't dump anonymous class %s", s
);
393 w_symbol(rb_intern(s
), arg
);
396 static void w_object(VALUE
,struct dump_arg
*,int);
399 hash_each(VALUE key
, VALUE value
, struct dump_call_arg
*arg
)
401 w_object(key
, arg
->arg
, arg
->limit
);
402 w_object(value
, arg
->arg
, arg
->limit
);
407 w_extended(VALUE klass
, struct dump_arg
*arg
, int check
)
411 if (check
&& FL_TEST(klass
, FL_SINGLETON
)) {
412 if (RCLASS_M_TBL(klass
)->num_entries
||
413 (RCLASS_IV_TBL(klass
) && RCLASS_IV_TBL(klass
)->num_entries
> 1)) {
414 rb_raise(rb_eTypeError
, "singleton can't be dumped");
416 klass
= RCLASS_SUPER(klass
);
418 while (BUILTIN_TYPE(klass
) == T_ICLASS
) {
419 path
= rb_class2name(RBASIC(klass
)->klass
);
420 w_byte(TYPE_EXTENDED
, arg
);
422 klass
= RCLASS_SUPER(klass
);
427 w_class(char type
, VALUE obj
, struct dump_arg
*arg
, int check
)
434 if (st_lookup(arg
->compat_tbl
, (st_data_t
)obj
, &real_obj
)) {
435 obj
= (VALUE
)real_obj
;
437 klass
= CLASS_OF(obj
);
438 w_extended(klass
, arg
, check
);
440 p
= class2path(rb_class_real(klass
));
441 path
= RSTRING_PTR(p
);
446 w_uclass(VALUE obj
, VALUE super
, struct dump_arg
*arg
)
448 VALUE klass
= CLASS_OF(obj
);
450 w_extended(klass
, arg
, Qtrue
);
451 klass
= rb_class_real(klass
);
452 if (klass
!= super
) {
453 w_byte(TYPE_UCLASS
, arg
);
454 w_unique(RSTRING_PTR(class2path(klass
)), arg
);
459 w_obj_each(ID id
, VALUE value
, struct dump_call_arg
*arg
)
461 if (id
== rb_id_encoding()) return ST_CONTINUE
;
462 w_symbol(id
, arg
->arg
);
463 w_object(value
, arg
->arg
, arg
->limit
);
468 w_encoding(VALUE obj
, long num
, struct dump_call_arg
*arg
)
470 int encidx
= rb_enc_get_index(obj
);
471 rb_encoding
*enc
= 0;
474 if (encidx
<= 0 || !(enc
= rb_enc_from_index(encidx
))) {
475 w_long(num
, arg
->arg
);
478 w_long(num
+ 1, arg
->arg
);
479 w_symbol(rb_id_encoding(), arg
->arg
);
481 if (!arg
->arg
->encodings
)
482 arg
->arg
->encodings
= st_init_strcasetable();
483 else if (st_lookup(arg
->arg
->encodings
, (st_data_t
)rb_enc_name(enc
), &name
))
485 name
= (st_data_t
)rb_str_new2(rb_enc_name(enc
));
486 st_insert(arg
->arg
->encodings
, (st_data_t
)rb_enc_name(enc
), name
);
488 w_object(name
, arg
->arg
, arg
->limit
);
492 w_ivar(VALUE obj
, st_table
*tbl
, struct dump_call_arg
*arg
)
494 long num
= tbl
? tbl
->num_entries
: 0;
496 w_encoding(obj
, num
, arg
);
498 st_foreach_safe(tbl
, w_obj_each
, (st_data_t
)arg
);
503 w_objivar(VALUE obj
, struct dump_call_arg
*arg
)
508 len
= ROBJECT_NUMIV(obj
);
509 ptr
= ROBJECT_IVPTR(obj
);
511 for (i
= 0; i
< len
; i
++)
512 if (ptr
[i
] != Qundef
)
515 w_encoding(obj
, num
, arg
);
517 rb_ivar_foreach(obj
, w_obj_each
, (st_data_t
)arg
);
522 w_object(VALUE obj
, struct dump_arg
*arg
, int limit
)
524 struct dump_call_arg c_arg
;
528 #define has_ivars(obj, ivtbl) ((ivtbl = rb_generic_ivar_table(obj)) != 0 || \
529 (!SPECIAL_CONST_P(obj) && !ENCODING_IS_ASCII8BIT(obj)))
532 rb_raise(rb_eArgError
, "exceed depth limit");
539 if (st_lookup(arg
->data
, obj
, &num
)) {
540 w_byte(TYPE_LINK
, arg
);
541 w_long((long)num
, arg
);
545 if ((hasiv
= has_ivars(obj
, ivtbl
)) != 0) {
546 w_byte(TYPE_IVAR
, arg
);
549 w_byte(TYPE_NIL
, arg
);
551 else if (obj
== Qtrue
) {
552 w_byte(TYPE_TRUE
, arg
);
554 else if (obj
== Qfalse
) {
555 w_byte(TYPE_FALSE
, arg
);
557 else if (FIXNUM_P(obj
)) {
559 w_byte(TYPE_FIXNUM
, arg
);
560 w_long(FIX2INT(obj
), arg
);
562 if (RSHIFT((long)obj
, 31) == 0 || RSHIFT((long)obj
, 31) == -1) {
563 w_byte(TYPE_FIXNUM
, arg
);
564 w_long(FIX2LONG(obj
), arg
);
567 w_object(rb_int2big(FIX2LONG(obj
)), arg
, limit
);
571 else if (SYMBOL_P(obj
)) {
572 w_symbol(SYM2ID(obj
), arg
);
575 if (OBJ_TAINTED(obj
)) arg
->taint
= Qtrue
;
577 if (rb_respond_to(obj
, s_mdump
)) {
580 st_add_direct(arg
->data
, obj
, arg
->data
->num_entries
);
582 v
= rb_funcall(obj
, s_mdump
, 0, 0);
583 w_class(TYPE_USRMARSHAL
, obj
, arg
, Qfalse
);
584 w_object(v
, arg
, limit
);
585 if (hasiv
) w_ivar(obj
, 0, &c_arg
);
588 if (rb_respond_to(obj
, s_dump
)) {
590 st_table
*ivtbl2
= 0;
593 v
= rb_funcall(obj
, s_dump
, 1, INT2NUM(limit
));
594 if (TYPE(v
) != T_STRING
) {
595 rb_raise(rb_eTypeError
, "_dump() must return string");
597 if ((hasiv2
= has_ivars(v
, ivtbl2
)) != 0 && !hasiv
) {
598 w_byte(TYPE_IVAR
, arg
);
600 w_class(TYPE_USERDEF
, obj
, arg
, Qfalse
);
601 w_bytes(RSTRING_PTR(v
), RSTRING_LEN(v
), arg
);
603 w_ivar(v
, ivtbl2
, &c_arg
);
606 w_ivar(obj
, ivtbl
, &c_arg
);
608 st_add_direct(arg
->data
, obj
, arg
->data
->num_entries
);
612 st_add_direct(arg
->data
, obj
, arg
->data
->num_entries
);
615 st_data_t compat_data
;
616 rb_alloc_func_t allocator
= rb_get_alloc_func(RBASIC(obj
)->klass
);
617 if (st_lookup(compat_allocator_tbl
,
618 (st_data_t
)allocator
,
620 marshal_compat_t
*compat
= (marshal_compat_t
*)compat_data
;
621 VALUE real_obj
= obj
;
622 obj
= compat
->dumper(real_obj
);
623 st_insert(arg
->compat_tbl
, (st_data_t
)obj
, (st_data_t
)real_obj
);
627 switch (BUILTIN_TYPE(obj
)) {
629 if (FL_TEST(obj
, FL_SINGLETON
)) {
630 rb_raise(rb_eTypeError
, "singleton class can't be dumped");
632 w_byte(TYPE_CLASS
, arg
);
634 volatile VALUE path
= class2path(obj
);
635 w_bytes(RSTRING_PTR(path
), RSTRING_LEN(path
), arg
);
640 w_byte(TYPE_MODULE
, arg
);
642 VALUE path
= class2path(obj
);
643 w_bytes(RSTRING_PTR(path
), RSTRING_LEN(path
), arg
);
648 w_byte(TYPE_FLOAT
, arg
);
649 w_float(RFLOAT_VALUE(obj
), arg
);
653 w_byte(TYPE_BIGNUM
, arg
);
655 char sign
= RBIGNUM_SIGN(obj
) ? '+' : '-';
656 long len
= RBIGNUM_LEN(obj
);
657 BDIGIT
*d
= RBIGNUM_DIGITS(obj
);
660 w_long(SHORTLEN(len
), arg
); /* w_short? */
662 #if SIZEOF_BDIGITS > SIZEOF_SHORT
666 for (i
=0; i
<SIZEOF_BDIGITS
; i
+=SIZEOF_SHORT
) {
667 w_short(num
& SHORTMASK
, arg
);
669 if (len
== 0 && num
== 0) break;
680 w_uclass(obj
, rb_cString
, arg
);
681 w_byte(TYPE_STRING
, arg
);
682 w_bytes(RSTRING_PTR(obj
), RSTRING_LEN(obj
), arg
);
686 w_uclass(obj
, rb_cRegexp
, arg
);
687 w_byte(TYPE_REGEXP
, arg
);
688 w_bytes(RREGEXP(obj
)->str
, RREGEXP(obj
)->len
, arg
);
689 w_byte((char)rb_reg_options(obj
), arg
);
693 w_uclass(obj
, rb_cArray
, arg
);
694 w_byte(TYPE_ARRAY
, arg
);
696 long len
= RARRAY_LEN(obj
);
697 VALUE
*ptr
= RARRAY_PTR(obj
);
701 w_object(*ptr
, arg
, limit
);
708 w_uclass(obj
, rb_cHash
, arg
);
709 if (NIL_P(RHASH(obj
)->ifnone
)) {
710 w_byte(TYPE_HASH
, arg
);
712 else if (FL_TEST(obj
, FL_USER2
)) {
713 /* FL_USER2 means HASH_PROC_DEFAULT (see hash.c) */
714 rb_raise(rb_eTypeError
, "can't dump hash with default proc");
717 w_byte(TYPE_HASH_DEF
, arg
);
719 w_long(RHASH_SIZE(obj
), arg
);
720 rb_hash_foreach(obj
, hash_each
, (st_data_t
)&c_arg
);
721 if (!NIL_P(RHASH(obj
)->ifnone
)) {
722 w_object(RHASH(obj
)->ifnone
, arg
, limit
);
727 w_class(TYPE_STRUCT
, obj
, arg
, Qtrue
);
729 long len
= RSTRUCT_LEN(obj
);
734 mem
= rb_struct_members(obj
);
735 for (i
=0; i
<len
; i
++) {
736 w_symbol(SYM2ID(RARRAY_PTR(mem
)[i
]), arg
);
737 w_object(RSTRUCT_PTR(obj
)[i
], arg
, limit
);
743 w_class(TYPE_OBJECT
, obj
, arg
, Qtrue
);
744 w_objivar(obj
, &c_arg
);
751 if (!rb_respond_to(obj
, s_dump_data
)) {
752 rb_raise(rb_eTypeError
,
753 "no marshal_dump is defined for class %s",
754 rb_obj_classname(obj
));
756 v
= rb_funcall(obj
, s_dump_data
, 0);
757 w_class(TYPE_DATA
, obj
, arg
, Qtrue
);
758 w_object(v
, arg
, limit
);
763 rb_raise(rb_eTypeError
, "can't dump %s",
764 rb_obj_classname(obj
));
769 w_ivar(obj
, ivtbl
, &c_arg
);
774 dump(struct dump_call_arg
*arg
)
776 w_object(arg
->obj
, arg
->arg
, arg
->limit
);
777 if (arg
->arg
->dest
) {
778 rb_io_write(arg
->arg
->dest
, arg
->arg
->str
);
779 rb_str_resize(arg
->arg
->str
, 0);
785 dump_ensure(struct dump_arg
*arg
)
787 st_free_table(arg
->symbols
);
788 st_free_table(arg
->data
);
789 st_free_table(arg
->compat_tbl
);
790 DATA_PTR(arg
->wrapper
) = 0;
800 * dump( obj [, anIO] , limit=--1 ) => anIO
802 * Serializes obj and all descendent objects. If anIO is
803 * specified, the serialized data will be written to it, otherwise the
804 * data will be returned as a String. If limit is specified, the
805 * traversal of subobjects will be limited to that depth. If limit is
806 * negative, no checking of depth will be performed.
809 * def initialize(str)
817 * (produces no output)
819 * o = Klass.new("hello\n")
820 * data = Marshal.dump(o)
821 * obj = Marshal.load(data)
822 * obj.sayHello #=> "hello\n"
825 marshal_dump(int argc
, VALUE
*argv
)
827 VALUE obj
, port
, a1
, a2
;
830 struct dump_call_arg c_arg
;
833 rb_scan_args(argc
, argv
, "12", &obj
, &a1
, &a2
);
835 if (!NIL_P(a2
)) limit
= NUM2INT(a2
);
836 if (NIL_P(a1
)) goto type_error
;
839 else if (argc
== 2) {
840 if (FIXNUM_P(a1
)) limit
= FIX2INT(a1
);
841 else if (NIL_P(a1
)) goto type_error
;
846 if (!rb_respond_to(port
, s_write
)) {
848 rb_raise(rb_eTypeError
, "instance of IO needed");
850 arg
.str
= rb_str_buf_new(0);
852 if (rb_respond_to(port
, s_binmode
)) {
853 rb_funcall2(port
, s_binmode
, 0, 0);
857 port
= rb_str_buf_new(0);
861 arg
.symbols
= st_init_numtable();
862 arg
.data
= st_init_numtable();
864 arg
.compat_tbl
= st_init_numtable();
865 arg
.wrapper
= Data_Wrap_Struct(rb_cData
, mark_dump_arg
, 0, &arg
);
871 w_byte(MARSHAL_MAJOR
, &arg
);
872 w_byte(MARSHAL_MINOR
, &arg
);
874 rb_ensure(dump
, (VALUE
)&c_arg
, dump_ensure
, (VALUE
)&arg
);
886 st_table
*compat_tbl
;
887 VALUE compat_tbl_wrapper
;
890 static VALUE
r_entry(VALUE v
, struct load_arg
*arg
);
891 static VALUE
r_object(struct load_arg
*arg
);
892 static VALUE
path2class(const char *path
);
895 r_byte(struct load_arg
*arg
)
899 if (TYPE(arg
->src
) == T_STRING
) {
900 if (RSTRING_LEN(arg
->src
) > arg
->offset
) {
901 c
= (unsigned char)RSTRING_PTR(arg
->src
)[arg
->offset
++];
904 rb_raise(rb_eArgError
, "marshal data too short");
908 VALUE src
= arg
->src
;
909 VALUE v
= rb_funcall2(src
, s_getbyte
, 0, 0);
910 if (NIL_P(v
)) rb_eof_error();
911 c
= (unsigned char)NUM2CHR(v
);
917 long_toobig(int size
)
919 rb_raise(rb_eTypeError
, "long too big for this architecture (size %d, given %d)",
923 #undef SIGN_EXTEND_CHAR
925 # define SIGN_EXTEND_CHAR(c) ((signed char)(c))
926 #else /* not __STDC__ */
927 /* As in Harbison and Steele. */
928 # define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
932 r_long(struct load_arg
*arg
)
935 int c
= SIGN_EXTEND_CHAR(r_byte(arg
));
938 if (c
== 0) return 0;
940 if (4 < c
&& c
< 128) {
943 if (c
> sizeof(long)) long_toobig(c
);
946 x
|= (long)r_byte(arg
) << (8*i
);
950 if (-129 < c
&& c
< -4) {
954 if (c
> sizeof(long)) long_toobig(c
);
957 x
&= ~((long)0xff << (8*i
));
958 x
|= (long)r_byte(arg
) << (8*i
);
964 #define r_bytes(arg) r_bytes0(r_long(arg), (arg))
967 r_bytes0(long len
, struct load_arg
*arg
)
971 if (len
== 0) return rb_str_new(0, 0);
972 if (TYPE(arg
->src
) == T_STRING
) {
973 if (RSTRING_LEN(arg
->src
) - arg
->offset
>= len
) {
974 str
= rb_str_new(RSTRING_PTR(arg
->src
)+arg
->offset
, len
);
979 rb_raise(rb_eArgError
, "marshal data too short");
983 VALUE src
= arg
->src
;
984 VALUE n
= LONG2NUM(len
);
985 str
= rb_funcall2(src
, s_read
, 1, &n
);
986 if (NIL_P(str
)) goto too_short
;
988 if (RSTRING_LEN(str
) != len
) goto too_short
;
989 if (OBJ_TAINTED(str
)) arg
->taint
= Qtrue
;
995 r_symlink(struct load_arg
*arg
)
998 long num
= r_long(arg
);
1000 if (st_lookup(arg
->symbols
, num
, &id
)) {
1003 rb_raise(rb_eArgError
, "bad symbol");
1007 r_symreal(struct load_arg
*arg
)
1009 volatile VALUE s
= r_bytes(arg
);
1010 ID id
= rb_intern(RSTRING_PTR(s
));
1012 st_insert(arg
->symbols
, arg
->symbols
->num_entries
, id
);
1018 r_symbol(struct load_arg
*arg
)
1022 switch ((type
= r_byte(arg
))) {
1024 return r_symreal(arg
);
1026 return r_symlink(arg
);
1028 rb_raise(rb_eArgError
, "dump format error(0x%x)", type
);
1034 r_unique(struct load_arg
*arg
)
1036 return rb_id2name(r_symbol(arg
));
1040 r_string(struct load_arg
*arg
)
1042 return r_bytes(arg
);
1046 r_entry(VALUE v
, struct load_arg
*arg
)
1048 st_data_t real_obj
= (VALUE
)Qundef
;
1049 if (st_lookup(arg
->compat_tbl
, v
, &real_obj
)) {
1050 rb_hash_aset(arg
->data
, INT2FIX(RHASH_SIZE(arg
->data
)), (VALUE
)real_obj
);
1053 rb_hash_aset(arg
->data
, INT2FIX(RHASH_SIZE(arg
->data
)), v
);
1057 if ((VALUE
)real_obj
!= Qundef
)
1058 OBJ_TAINT((VALUE
)real_obj
);
1064 r_leave(VALUE v
, struct load_arg
*arg
)
1067 if (st_lookup(arg
->compat_tbl
, v
, &data
)) {
1068 VALUE real_obj
= (VALUE
)data
;
1069 rb_alloc_func_t allocator
= rb_get_alloc_func(CLASS_OF(real_obj
));
1071 if (st_lookup(compat_allocator_tbl
, (st_data_t
)allocator
, &data
)) {
1072 marshal_compat_t
*compat
= (marshal_compat_t
*)data
;
1073 compat
->loader(real_obj
, v
);
1075 st_delete(arg
->compat_tbl
, &key
, 0);
1079 v
= rb_funcall(arg
->proc
, rb_intern("call"), 1, v
);
1085 r_ivar(VALUE obj
, struct load_arg
*arg
)
1092 ID id
= r_symbol(arg
);
1093 VALUE val
= r_object(arg
);
1094 if (id
== rb_id_encoding()) {
1095 int idx
= rb_enc_find_index(StringValueCStr(val
));
1096 if (idx
> 0) rb_enc_associate_index(obj
, idx
);
1099 rb_ivar_set(obj
, id
, val
);
1106 path2class(const char *path
)
1108 VALUE v
= rb_path2class(path
);
1110 if (TYPE(v
) != T_CLASS
) {
1111 rb_raise(rb_eArgError
, "%s does not refer class", path
);
1117 path2module(const char *path
)
1119 VALUE v
= rb_path2class(path
);
1121 if (TYPE(v
) != T_MODULE
) {
1122 rb_raise(rb_eArgError
, "%s does not refer module", path
);
1128 obj_alloc_by_path(const char *path
, struct load_arg
*arg
)
1132 rb_alloc_func_t allocator
;
1134 klass
= path2class(path
);
1136 allocator
= rb_get_alloc_func(klass
);
1137 if (st_lookup(compat_allocator_tbl
, (st_data_t
)allocator
, &data
)) {
1138 marshal_compat_t
*compat
= (marshal_compat_t
*)data
;
1139 VALUE real_obj
= rb_obj_alloc(klass
);
1140 VALUE obj
= rb_obj_alloc(compat
->oldclass
);
1141 st_insert(arg
->compat_tbl
, (st_data_t
)obj
, (st_data_t
)real_obj
);
1145 return rb_obj_alloc(klass
);
1149 r_object0(struct load_arg
*arg
, int *ivp
, VALUE extmod
)
1152 int type
= r_byte(arg
);
1158 v
= rb_hash_aref(arg
->data
, LONG2FIX(id
));
1160 rb_raise(rb_eArgError
, "dump format error (unlinked)");
1163 v
= rb_funcall(arg
->proc
, rb_intern("call"), 1, v
);
1171 v
= r_object0(arg
, &ivar
, extmod
);
1172 if (ivar
) r_ivar(v
, arg
);
1178 VALUE m
= path2module(r_unique(arg
));
1180 if (NIL_P(extmod
)) extmod
= rb_ary_new2(0);
1181 rb_ary_push(extmod
, m
);
1183 v
= r_object0(arg
, 0, extmod
);
1184 while (RARRAY_LEN(extmod
) > 0) {
1185 m
= rb_ary_pop(extmod
);
1186 rb_extend_object(v
, m
);
1193 VALUE c
= path2class(r_unique(arg
));
1195 if (FL_TEST(c
, FL_SINGLETON
)) {
1196 rb_raise(rb_eTypeError
, "singleton can't be loaded");
1198 v
= r_object0(arg
, 0, extmod
);
1199 if (rb_special_const_p(v
) || TYPE(v
) == T_OBJECT
|| TYPE(v
) == T_CLASS
) {
1201 rb_raise(rb_eArgError
, "dump format error (user class)");
1203 if (TYPE(v
) == T_MODULE
|| !RTEST(rb_class_inherited_p(c
, RBASIC(v
)->klass
))) {
1204 VALUE tmp
= rb_obj_alloc(c
);
1206 if (TYPE(v
) != TYPE(tmp
)) goto format_error
;
1208 RBASIC(v
)->klass
= c
;
1214 v
= r_leave(v
, arg
);
1219 v
= r_leave(v
, arg
);
1224 v
= r_leave(v
, arg
);
1229 long i
= r_long(arg
);
1232 v
= r_leave(v
, arg
);
1238 VALUE str
= r_bytes(arg
);
1239 const char *ptr
= RSTRING_PTR(str
);
1241 if (strcmp(ptr
, "nan") == 0) {
1244 else if (strcmp(ptr
, "inf") == 0) {
1247 else if (strcmp(ptr
, "-inf") == 0) {
1252 d
= strtod(ptr
, &e
);
1253 d
= load_mantissa(d
, e
, RSTRING_LEN(str
) - (e
- ptr
));
1256 v
= r_entry(v
, arg
);
1257 v
= r_leave(v
, arg
);
1265 volatile VALUE data
;
1267 NEWOBJ(big
, struct RBignum
);
1268 OBJSETUP(big
, rb_cBignum
, T_BIGNUM
);
1269 RBIGNUM_SET_SIGN(big
, (r_byte(arg
) == '+'));
1271 data
= r_bytes0(len
* 2, arg
);
1272 #if SIZEOF_BDIGITS == SIZEOF_SHORT
1273 rb_big_resize((VALUE
)big
, len
);
1275 rb_big_resize((VALUE
)big
, (len
+ 1) * 2 / sizeof(BDIGIT
));
1277 digits
= RBIGNUM_DIGITS(big
);
1278 MEMCPY(digits
, RSTRING_PTR(data
), char, len
* 2);
1279 #if SIZEOF_BDIGITS > SIZEOF_SHORT
1280 MEMZERO((char *)digits
+ len
* 2, char,
1281 RBIGNUM_LEN(big
) * sizeof(BDIGIT
) - len
* 2);
1283 len
= RBIGNUM_LEN(big
);
1285 unsigned char *p
= (unsigned char *)digits
;
1287 #if SIZEOF_BDIGITS > SIZEOF_SHORT
1291 for (i
=0; i
<SIZEOF_BDIGITS
; i
++) {
1292 num
|= (int)p
[i
] << shift
;
1296 num
= p
[0] | (p
[1] << 8);
1301 v
= rb_big_norm((VALUE
)big
);
1302 v
= r_entry(v
, arg
);
1303 v
= r_leave(v
, arg
);
1308 v
= r_entry(r_string(arg
), arg
);
1309 v
= r_leave(v
, arg
);
1314 volatile VALUE str
= r_bytes(arg
);
1315 int options
= r_byte(arg
);
1316 v
= r_entry(rb_reg_new_str(str
, options
), arg
);
1317 v
= r_leave(v
, arg
);
1323 volatile long len
= r_long(arg
); /* gcc 2.7.2.3 -O2 bug?? */
1325 v
= rb_ary_new2(len
);
1326 v
= r_entry(v
, arg
);
1328 rb_ary_push(v
, r_object(arg
));
1330 v
= r_leave(v
, arg
);
1337 long len
= r_long(arg
);
1340 v
= r_entry(v
, arg
);
1342 VALUE key
= r_object(arg
);
1343 VALUE value
= r_object(arg
);
1344 rb_hash_aset(v
, key
, value
);
1346 if (type
== TYPE_HASH_DEF
) {
1347 RHASH(v
)->ifnone
= r_object(arg
);
1349 v
= r_leave(v
, arg
);
1357 volatile long i
; /* gcc 2.7.2.3 -O2 bug?? */
1361 klass
= path2class(r_unique(arg
));
1364 v
= rb_obj_alloc(klass
);
1365 if (TYPE(v
) != T_STRUCT
) {
1366 rb_raise(rb_eTypeError
, "class %s not a struct", rb_class2name(klass
));
1368 mem
= rb_struct_s_members(klass
);
1369 if (RARRAY_LEN(mem
) != len
) {
1370 rb_raise(rb_eTypeError
, "struct %s not compatible (struct size differs)",
1371 rb_class2name(klass
));
1374 v
= r_entry(v
, arg
);
1375 values
= rb_ary_new2(len
);
1376 for (i
=0; i
<len
; i
++) {
1377 slot
= r_symbol(arg
);
1379 if (RARRAY_PTR(mem
)[i
] != ID2SYM(slot
)) {
1380 rb_raise(rb_eTypeError
, "struct %s not compatible (:%s for :%s)",
1381 rb_class2name(klass
),
1383 rb_id2name(SYM2ID(RARRAY_PTR(mem
)[i
])));
1385 rb_ary_push(values
, r_object(arg
));
1387 rb_struct_initialize(v
, values
);
1388 v
= r_leave(v
, arg
);
1394 VALUE klass
= path2class(r_unique(arg
));
1397 if (!rb_respond_to(klass
, s_load
)) {
1398 rb_raise(rb_eTypeError
, "class %s needs to have method `_load'",
1399 rb_class2name(klass
));
1401 data
= r_string(arg
);
1406 v
= rb_funcall(klass
, s_load
, 1, data
);
1407 v
= r_entry(v
, arg
);
1408 v
= r_leave(v
, arg
);
1412 case TYPE_USRMARSHAL
:
1414 VALUE klass
= path2class(r_unique(arg
));
1417 v
= rb_obj_alloc(klass
);
1418 if (!NIL_P(extmod
)) {
1419 while (RARRAY_LEN(extmod
) > 0) {
1420 VALUE m
= rb_ary_pop(extmod
);
1421 rb_extend_object(v
, m
);
1424 if (!rb_respond_to(v
, s_mload
)) {
1425 rb_raise(rb_eTypeError
, "instance of %s needs to have method `marshal_load'",
1426 rb_class2name(klass
));
1428 v
= r_entry(v
, arg
);
1429 data
= r_object(arg
);
1430 rb_funcall(v
, s_mload
, 1, data
);
1431 v
= r_leave(v
, arg
);
1437 v
= obj_alloc_by_path(r_unique(arg
), arg
);
1438 if (TYPE(v
) != T_OBJECT
) {
1439 rb_raise(rb_eArgError
, "dump format error");
1441 v
= r_entry(v
, arg
);
1443 v
= r_leave(v
, arg
);
1449 VALUE klass
= path2class(r_unique(arg
));
1450 if (rb_respond_to(klass
, s_alloc
)) {
1451 static int warn
= Qtrue
;
1453 rb_warn("define `allocate' instead of `_alloc'");
1456 v
= rb_funcall(klass
, s_alloc
, 0);
1459 v
= rb_obj_alloc(klass
);
1461 if (TYPE(v
) != T_DATA
) {
1462 rb_raise(rb_eArgError
, "dump format error");
1464 v
= r_entry(v
, arg
);
1465 if (!rb_respond_to(v
, s_load_data
)) {
1466 rb_raise(rb_eTypeError
,
1467 "class %s needs to have instance method `_load_data'",
1468 rb_class2name(klass
));
1470 rb_funcall(v
, s_load_data
, 1, r_object0(arg
, 0, extmod
));
1471 v
= r_leave(v
, arg
);
1475 case TYPE_MODULE_OLD
:
1477 volatile VALUE str
= r_bytes(arg
);
1479 v
= rb_path2class(RSTRING_PTR(str
));
1480 v
= r_entry(v
, arg
);
1481 v
= r_leave(v
, arg
);
1487 volatile VALUE str
= r_bytes(arg
);
1489 v
= path2class(RSTRING_PTR(str
));
1490 v
= r_entry(v
, arg
);
1491 v
= r_leave(v
, arg
);
1497 volatile VALUE str
= r_bytes(arg
);
1499 v
= path2module(RSTRING_PTR(str
));
1500 v
= r_entry(v
, arg
);
1501 v
= r_leave(v
, arg
);
1506 v
= ID2SYM(r_symreal(arg
));
1507 v
= r_leave(v
, arg
);
1511 v
= ID2SYM(r_symlink(arg
));
1515 rb_raise(rb_eArgError
, "dump format error(0x%x)", type
);
1522 r_object(struct load_arg
*arg
)
1524 return r_object0(arg
, 0, Qnil
);
1528 load(struct load_arg
*arg
)
1530 return r_object(arg
);
1534 load_ensure(struct load_arg
*arg
)
1536 st_free_table(arg
->symbols
);
1537 st_free_table(arg
->compat_tbl
);
1538 DATA_PTR(arg
->compat_tbl_wrapper
) = 0;
1539 arg
->compat_tbl_wrapper
= 0;
1545 * load( source [, proc] ) => obj
1546 * restore( source [, proc] ) => obj
1548 * Returns the result of converting the serialized data in source into a
1549 * Ruby object (possibly with associated subordinate objects). source
1550 * may be either an instance of IO or an object that responds to
1551 * to_str. If proc is specified, it will be passed each object as it
1555 marshal_load(int argc
, VALUE
*argv
)
1560 struct load_arg arg
;
1562 rb_scan_args(argc
, argv
, "11", &port
, &proc
);
1563 if (rb_respond_to(port
, rb_intern("to_str"))) {
1564 arg
.taint
= OBJ_TAINTED(port
); /* original taintedness */
1565 StringValue(port
); /* possible conversion */
1567 else if (rb_respond_to(port
, s_getbyte
) && rb_respond_to(port
, s_read
)) {
1568 if (rb_respond_to(port
, s_binmode
)) {
1569 rb_funcall2(port
, s_binmode
, 0, 0);
1574 rb_raise(rb_eTypeError
, "instance of IO needed");
1578 arg
.compat_tbl
= st_init_numtable();
1579 arg
.compat_tbl_wrapper
= Data_Wrap_Struct(rb_cData
, rb_mark_tbl
, 0, arg
.compat_tbl
);
1581 major
= r_byte(&arg
);
1582 minor
= r_byte(&arg
);
1583 if (major
!= MARSHAL_MAJOR
|| minor
> MARSHAL_MINOR
) {
1584 rb_raise(rb_eTypeError
, "incompatible marshal file format (can't be read)\n\
1585 \tformat version %d.%d required; %d.%d given",
1586 MARSHAL_MAJOR
, MARSHAL_MINOR
, major
, minor
);
1588 if (RTEST(ruby_verbose
) && minor
!= MARSHAL_MINOR
) {
1589 rb_warn("incompatible marshal file format (can be read)\n\
1590 \tformat version %d.%d required; %d.%d given",
1591 MARSHAL_MAJOR
, MARSHAL_MINOR
, major
, minor
);
1594 arg
.symbols
= st_init_numtable();
1595 arg
.data
= rb_hash_new();
1596 if (NIL_P(proc
)) arg
.proc
= 0;
1597 else arg
.proc
= proc
;
1598 v
= rb_ensure(load
, (VALUE
)&arg
, load_ensure
, (VALUE
)&arg
);
1604 * The marshaling library converts collections of Ruby objects into a
1605 * byte stream, allowing them to be stored outside the currently
1606 * active script. This data may subsequently be read and the original
1607 * objects reconstituted.
1608 * Marshaled data has major and minor version numbers stored along
1609 * with the object information. In normal use, marshaling can only
1610 * load data written with the same major version number and an equal
1611 * or lower minor version number. If Ruby's ``verbose'' flag is set
1612 * (normally using -d, -v, -w, or --verbose) the major and minor
1613 * numbers must match exactly. Marshal versioning is independent of
1614 * Ruby's version numbers. You can extract the version by reading the
1615 * first two bytes of marshaled data.
1617 * str = Marshal.dump("thing")
1618 * RUBY_VERSION #=> "1.9.0"
1622 * Some objects cannot be dumped: if the objects to be dumped include
1623 * bindings, procedure or method objects, instances of class IO, or
1624 * singleton objects, a TypeError will be raised.
1625 * If your class has special serialization needs (for example, if you
1626 * want to serialize in some specific format), or if it contains
1627 * objects that would otherwise not be serializable, you can implement
1628 * your own serialization strategy by defining two methods, _dump and
1630 * The instance method _dump should return a String object containing
1631 * all the information necessary to reconstitute objects of this class
1632 * and all referenced objects up to a maximum depth given as an integer
1633 * parameter (a value of -1 implies that you should disable depth checking).
1634 * The class method _load should take a String and return an object of this class.
1639 VALUE rb_mMarshal
= rb_define_module("Marshal");
1641 s_dump
= rb_intern("_dump");
1642 s_load
= rb_intern("_load");
1643 s_mdump
= rb_intern("marshal_dump");
1644 s_mload
= rb_intern("marshal_load");
1645 s_dump_data
= rb_intern("_dump_data");
1646 s_load_data
= rb_intern("_load_data");
1647 s_alloc
= rb_intern("_alloc");
1648 s_getbyte
= rb_intern("getbyte");
1649 s_read
= rb_intern("read");
1650 s_write
= rb_intern("write");
1651 s_binmode
= rb_intern("binmode");
1653 rb_define_module_function(rb_mMarshal
, "dump", marshal_dump
, -1);
1654 rb_define_module_function(rb_mMarshal
, "load", marshal_load
, -1);
1655 rb_define_module_function(rb_mMarshal
, "restore", marshal_load
, -1);
1657 rb_define_const(rb_mMarshal
, "MAJOR_VERSION", INT2FIX(MARSHAL_MAJOR
));
1658 rb_define_const(rb_mMarshal
, "MINOR_VERSION", INT2FIX(MARSHAL_MINOR
));
1660 compat_allocator_tbl
= st_init_numtable();
1661 rb_gc_register_address(&compat_allocator_tbl_wrapper
);
1662 compat_allocator_tbl_wrapper
=
1663 Data_Wrap_Struct(rb_cData
, mark_marshal_compat_t
, 0, compat_allocator_tbl
);
1667 rb_marshal_dump(VALUE obj
, VALUE port
)
1674 if (!NIL_P(port
)) argc
= 2;
1675 return marshal_dump(argc
, argv
);
1679 rb_marshal_load(VALUE port
)
1681 return marshal_load(1, &port
);