1 /**********************************************************************
6 created at: Thu Apr 27 16:30:01 JST 1995
8 Copyright (C) 1993-2007 Yukihiro Matsumoto
10 **********************************************************************/
12 #include "ruby/ruby.h"
15 #include "ruby/util.h"
16 #include "ruby/encoding.h"
26 #define BITSPERSHORT (2*CHAR_BIT)
27 #define SHORTMASK ((1<<BITSPERSHORT)-1)
28 #define SHORTDN(x) RSHIFT(x,BITSPERSHORT)
30 #if SIZEOF_SHORT == SIZEOF_BDIGITS
31 #define SHORTLEN(x) (x)
34 shortlen(long len
, BDIGIT
*ds
)
44 return (len
- 1)*sizeof(BDIGIT
)/2 + offset
;
46 #define SHORTLEN(x) shortlen((x),d)
49 #define MARSHAL_MAJOR 4
50 #define MARSHAL_MINOR 8
54 #define TYPE_FALSE 'F'
55 #define TYPE_FIXNUM 'i'
57 #define TYPE_EXTENDED 'e'
58 #define TYPE_UCLASS 'C'
59 #define TYPE_OBJECT 'o'
61 #define TYPE_USERDEF 'u'
62 #define TYPE_USRMARSHAL 'U'
63 #define TYPE_FLOAT 'f'
64 #define TYPE_BIGNUM 'l'
65 #define TYPE_STRING '"'
66 #define TYPE_REGEXP '/'
67 #define TYPE_ARRAY '['
69 #define TYPE_HASH_DEF '}'
70 #define TYPE_STRUCT 'S'
71 #define TYPE_MODULE_OLD 'M'
72 #define TYPE_CLASS 'c'
73 #define TYPE_MODULE 'm'
75 #define TYPE_SYMBOL ':'
76 #define TYPE_SYMLINK ';'
81 static ID s_dump
, s_load
, s_mdump
, s_mload
;
82 static ID s_dump_data
, s_load_data
, s_alloc
;
83 static ID s_getbyte
, s_read
, s_write
, s_binmode
;
85 ID
rb_id_encoding(void);
90 VALUE (*dumper
)(VALUE
);
91 VALUE (*loader
)(VALUE
, VALUE
);
94 static st_table
*compat_allocator_tbl
;
95 static VALUE compat_allocator_tbl_wrapper
;
98 mark_marshal_compat_i(st_data_t key
, st_data_t value
)
100 marshal_compat_t
*p
= (marshal_compat_t
*)value
;
101 rb_gc_mark(p
->newclass
);
102 rb_gc_mark(p
->oldclass
);
107 mark_marshal_compat_t(void *tbl
)
110 st_foreach(tbl
, mark_marshal_compat_i
, 0);
114 rb_marshal_define_compat(VALUE newclass
, VALUE oldclass
, VALUE (*dumper
)(VALUE
), VALUE (*loader
)(VALUE
, VALUE
))
116 marshal_compat_t
*compat
;
117 rb_alloc_func_t allocator
= rb_get_alloc_func(newclass
);
120 rb_raise(rb_eTypeError
, "no allocator");
123 compat
= ALLOC(marshal_compat_t
);
124 compat
->newclass
= Qnil
;
125 compat
->oldclass
= Qnil
;
126 compat
->newclass
= newclass
;
127 compat
->oldclass
= oldclass
;
128 compat
->dumper
= dumper
;
129 compat
->loader
= loader
;
131 st_insert(compat_allocator_tbl
, (st_data_t
)allocator
, (st_data_t
)compat
);
140 st_table
*compat_tbl
;
145 struct dump_call_arg
{
147 struct dump_arg
*arg
;
152 check_dump_arg(struct dump_arg
*arg
)
154 if (!DATA_PTR(arg
->wrapper
)) {
155 rb_raise(rb_eRuntimeError
, "Marshal.dump reentered");
160 mark_dump_arg(void *ptr
)
162 struct dump_arg
*p
= ptr
;
165 rb_mark_set(p
->data
);
166 rb_mark_hash(p
->compat_tbl
);
170 class2path(VALUE klass
)
172 VALUE path
= rb_class_path(klass
);
173 char *n
= RSTRING_PTR(path
);
176 rb_raise(rb_eTypeError
, "can't dump anonymous %s %s",
177 (TYPE(klass
) == T_CLASS
? "class" : "module"),
180 if (rb_path2class(n
) != rb_class_real(klass
)) {
181 rb_raise(rb_eTypeError
, "%s can't be referred", n
);
186 static void w_long(long, struct dump_arg
*);
189 w_nbyte(const char *s
, int n
, struct dump_arg
*arg
)
191 VALUE buf
= arg
->str
;
192 rb_str_buf_cat(buf
, s
, n
);
193 if (arg
->dest
&& RSTRING_LEN(buf
) >= BUFSIZ
) {
194 if (arg
->taint
) OBJ_TAINT(buf
);
195 rb_io_write(arg
->dest
, buf
);
196 rb_str_resize(buf
, 0);
201 w_byte(char c
, struct dump_arg
*arg
)
207 w_bytes(const char *s
, int n
, struct dump_arg
*arg
)
214 w_short(int x
, struct dump_arg
*arg
)
216 w_byte((char)((x
>> 0) & 0xff), arg
);
217 w_byte((char)((x
>> 8) & 0xff), arg
);
221 w_long(long x
, struct dump_arg
*arg
)
223 char buf
[sizeof(long)+1];
227 if (!(RSHIFT(x
, 31) == 0 || RSHIFT(x
, 31) == -1)) {
228 /* big long does not fit in 4 bytes */
229 rb_raise(rb_eTypeError
, "long too big to dump");
237 if (0 < x
&& x
< 123) {
238 w_byte((char)(x
+ 5), arg
);
241 if (-124 < x
&& x
< 0) {
242 w_byte((char)((x
- 5)&0xff), arg
);
245 for (i
=1;i
<sizeof(long)+1;i
++) {
258 for (i
=0;i
<=len
;i
++) {
264 #define DECIMAL_MANT (53-16) /* from IEEE754 double precision */
266 #if DBL_MANT_DIG > 32
268 #elif DBL_MANT_DIG > 24
270 #elif DBL_MANT_DIG > 16
277 save_mantissa(double d
, char *buf
)
283 d
= modf(ldexp(frexp(fabs(d
), &e
), DECIMAL_MANT
), &d
);
287 d
= modf(ldexp(d
, MANT_BITS
), &n
);
288 m
= (unsigned long)n
;
300 while (!buf
[i
- 1]) --i
;
306 load_mantissa(double d
, const char *buf
, int len
)
308 if (--len
> 0 && !*buf
++) { /* binary mantissa mark */
309 int e
, s
= d
< 0, dig
= 0;
312 modf(ldexp(frexp(fabs(d
), &e
), DECIMAL_MANT
), &d
);
316 default: m
= *buf
++ & 0xff;
318 case 3: m
= (m
<< 8) | (*buf
++ & 0xff);
321 case 2: m
= (m
<< 8) | (*buf
++ & 0xff);
324 case 1: m
= (m
<< 8) | (*buf
++ & 0xff);
327 dig
-= len
< MANT_BITS
/ 8 ? 8 * (unsigned)len
: MANT_BITS
;
328 d
+= ldexp((double)m
, dig
);
329 } while ((len
-= MANT_BITS
/ 8) > 0);
330 d
= ldexp(d
, e
- DECIMAL_MANT
);
336 #define load_mantissa(d, buf, len) (d)
337 #define save_mantissa(d, buf) 0
341 #define FLOAT_DIG (DBL_DIG+2)
347 w_float(double d
, struct dump_arg
*arg
)
349 char buf
[FLOAT_DIG
+ (DECIMAL_MANT
+ 7) / 8 + 10];
352 if (d
< 0) strcpy(buf
, "-inf");
353 else strcpy(buf
, "inf");
359 if (1.0/d
< 0) strcpy(buf
, "-0");
360 else strcpy(buf
, "0");
365 /* xxx: should not use system's sprintf(3) */
366 snprintf(buf
, sizeof(buf
), "%.*g", FLOAT_DIG
, d
);
368 w_bytes(buf
, len
+ save_mantissa(d
, buf
+ len
), arg
);
371 w_bytes(buf
, strlen(buf
), arg
);
375 w_symbol(ID id
, struct dump_arg
*arg
)
380 if (st_lookup(arg
->symbols
, id
, &num
)) {
381 w_byte(TYPE_SYMLINK
, arg
);
382 w_long((long)num
, arg
);
385 sym
= rb_id2name(id
);
387 rb_raise(rb_eTypeError
, "can't dump anonymous ID %ld", id
);
389 w_byte(TYPE_SYMBOL
, arg
);
390 w_bytes(sym
, strlen(sym
), arg
);
391 st_add_direct(arg
->symbols
, id
, arg
->symbols
->num_entries
);
396 w_unique(const char *s
, struct dump_arg
*arg
)
399 rb_raise(rb_eTypeError
, "can't dump anonymous class %s", s
);
401 w_symbol(rb_intern(s
), arg
);
404 static void w_object(VALUE
,struct dump_arg
*,int);
407 hash_each(VALUE key
, VALUE value
, struct dump_call_arg
*arg
)
409 w_object(key
, arg
->arg
, arg
->limit
);
410 w_object(value
, arg
->arg
, arg
->limit
);
415 w_extended(VALUE klass
, struct dump_arg
*arg
, int check
)
419 if (check
&& FL_TEST(klass
, FL_SINGLETON
)) {
420 if (RCLASS_M_TBL(klass
)->num_entries
||
421 (RCLASS_IV_TBL(klass
) && RCLASS_IV_TBL(klass
)->num_entries
> 1)) {
422 rb_raise(rb_eTypeError
, "singleton can't be dumped");
424 klass
= RCLASS_SUPER(klass
);
426 while (BUILTIN_TYPE(klass
) == T_ICLASS
) {
427 path
= rb_class2name(RBASIC(klass
)->klass
);
428 w_byte(TYPE_EXTENDED
, arg
);
430 klass
= RCLASS_SUPER(klass
);
435 w_class(char type
, VALUE obj
, struct dump_arg
*arg
, int check
)
442 if (st_lookup(arg
->compat_tbl
, (st_data_t
)obj
, &real_obj
)) {
443 obj
= (VALUE
)real_obj
;
445 klass
= CLASS_OF(obj
);
446 w_extended(klass
, arg
, check
);
448 p
= class2path(rb_class_real(klass
));
449 path
= RSTRING_PTR(p
);
454 w_uclass(VALUE obj
, VALUE super
, struct dump_arg
*arg
)
456 VALUE klass
= CLASS_OF(obj
);
458 w_extended(klass
, arg
, Qtrue
);
459 klass
= rb_class_real(klass
);
460 if (klass
!= super
) {
461 w_byte(TYPE_UCLASS
, arg
);
462 w_unique(RSTRING_PTR(class2path(klass
)), arg
);
467 w_obj_each(ID id
, VALUE value
, struct dump_call_arg
*arg
)
469 if (id
== rb_id_encoding()) return ST_CONTINUE
;
470 w_symbol(id
, arg
->arg
);
471 w_object(value
, arg
->arg
, arg
->limit
);
476 w_encoding(VALUE obj
, long num
, struct dump_call_arg
*arg
)
478 int encidx
= rb_enc_get_index(obj
);
479 rb_encoding
*enc
= 0;
482 if (encidx
<= 0 || !(enc
= rb_enc_from_index(encidx
))) {
483 w_long(num
, arg
->arg
);
486 w_long(num
+ 1, arg
->arg
);
487 w_symbol(rb_id_encoding(), arg
->arg
);
489 if (!arg
->arg
->encodings
)
490 arg
->arg
->encodings
= st_init_strcasetable();
491 else if (st_lookup(arg
->arg
->encodings
, (st_data_t
)rb_enc_name(enc
), &name
))
493 name
= (st_data_t
)rb_str_new2(rb_enc_name(enc
));
494 st_insert(arg
->arg
->encodings
, (st_data_t
)rb_enc_name(enc
), name
);
496 w_object(name
, arg
->arg
, arg
->limit
);
500 w_ivar(VALUE obj
, st_table
*tbl
, struct dump_call_arg
*arg
)
502 long num
= tbl
? tbl
->num_entries
: 0;
504 w_encoding(obj
, num
, arg
);
506 st_foreach_safe(tbl
, w_obj_each
, (st_data_t
)arg
);
511 w_objivar(VALUE obj
, struct dump_call_arg
*arg
)
516 len
= ROBJECT_NUMIV(obj
);
517 ptr
= ROBJECT_IVPTR(obj
);
519 for (i
= 0; i
< len
; i
++)
520 if (ptr
[i
] != Qundef
)
523 w_encoding(obj
, num
, arg
);
525 rb_ivar_foreach(obj
, w_obj_each
, (st_data_t
)arg
);
530 w_object(VALUE obj
, struct dump_arg
*arg
, int limit
)
532 struct dump_call_arg c_arg
;
536 #define has_ivars(obj, ivtbl) ((ivtbl = rb_generic_ivar_table(obj)) != 0 || \
537 (!SPECIAL_CONST_P(obj) && !ENCODING_IS_ASCII8BIT(obj)))
540 rb_raise(rb_eArgError
, "exceed depth limit");
547 if (st_lookup(arg
->data
, obj
, &num
)) {
548 w_byte(TYPE_LINK
, arg
);
549 w_long((long)num
, arg
);
553 if ((hasiv
= has_ivars(obj
, ivtbl
)) != 0) {
554 w_byte(TYPE_IVAR
, arg
);
557 w_byte(TYPE_NIL
, arg
);
559 else if (obj
== Qtrue
) {
560 w_byte(TYPE_TRUE
, arg
);
562 else if (obj
== Qfalse
) {
563 w_byte(TYPE_FALSE
, arg
);
565 else if (FIXNUM_P(obj
)) {
567 w_byte(TYPE_FIXNUM
, arg
);
568 w_long(FIX2INT(obj
), arg
);
570 if (RSHIFT((long)obj
, 31) == 0 || RSHIFT((long)obj
, 31) == -1) {
571 w_byte(TYPE_FIXNUM
, arg
);
572 w_long(FIX2LONG(obj
), arg
);
575 w_object(rb_int2big(FIX2LONG(obj
)), arg
, limit
);
579 else if (SYMBOL_P(obj
)) {
580 w_symbol(SYM2ID(obj
), arg
);
583 if (OBJ_TAINTED(obj
)) arg
->taint
= Qtrue
;
585 if (rb_respond_to(obj
, s_mdump
)) {
588 st_add_direct(arg
->data
, obj
, arg
->data
->num_entries
);
590 v
= rb_funcall(obj
, s_mdump
, 0, 0);
592 w_class(TYPE_USRMARSHAL
, obj
, arg
, Qfalse
);
593 w_object(v
, arg
, limit
);
594 if (hasiv
) w_ivar(obj
, 0, &c_arg
);
597 if (rb_respond_to(obj
, s_dump
)) {
599 st_table
*ivtbl2
= 0;
602 v
= rb_funcall(obj
, s_dump
, 1, INT2NUM(limit
));
604 if (TYPE(v
) != T_STRING
) {
605 rb_raise(rb_eTypeError
, "_dump() must return string");
607 if ((hasiv2
= has_ivars(v
, ivtbl2
)) != 0 && !hasiv
) {
608 w_byte(TYPE_IVAR
, arg
);
610 w_class(TYPE_USERDEF
, obj
, arg
, Qfalse
);
611 w_bytes(RSTRING_PTR(v
), RSTRING_LEN(v
), arg
);
613 w_ivar(v
, ivtbl2
, &c_arg
);
616 w_ivar(obj
, ivtbl
, &c_arg
);
618 st_add_direct(arg
->data
, obj
, arg
->data
->num_entries
);
622 st_add_direct(arg
->data
, obj
, arg
->data
->num_entries
);
625 st_data_t compat_data
;
626 rb_alloc_func_t allocator
= rb_get_alloc_func(RBASIC(obj
)->klass
);
627 if (st_lookup(compat_allocator_tbl
,
628 (st_data_t
)allocator
,
630 marshal_compat_t
*compat
= (marshal_compat_t
*)compat_data
;
631 VALUE real_obj
= obj
;
632 obj
= compat
->dumper(real_obj
);
633 st_insert(arg
->compat_tbl
, (st_data_t
)obj
, (st_data_t
)real_obj
);
637 switch (BUILTIN_TYPE(obj
)) {
639 if (FL_TEST(obj
, FL_SINGLETON
)) {
640 rb_raise(rb_eTypeError
, "singleton class can't be dumped");
642 w_byte(TYPE_CLASS
, arg
);
644 volatile VALUE path
= class2path(obj
);
645 w_bytes(RSTRING_PTR(path
), RSTRING_LEN(path
), arg
);
650 w_byte(TYPE_MODULE
, arg
);
652 VALUE path
= class2path(obj
);
653 w_bytes(RSTRING_PTR(path
), RSTRING_LEN(path
), arg
);
658 w_byte(TYPE_FLOAT
, arg
);
659 w_float(RFLOAT_VALUE(obj
), arg
);
663 w_byte(TYPE_BIGNUM
, arg
);
665 char sign
= RBIGNUM_SIGN(obj
) ? '+' : '-';
666 long len
= RBIGNUM_LEN(obj
);
667 BDIGIT
*d
= RBIGNUM_DIGITS(obj
);
670 w_long(SHORTLEN(len
), arg
); /* w_short? */
672 #if SIZEOF_BDIGITS > SIZEOF_SHORT
676 for (i
=0; i
<SIZEOF_BDIGITS
; i
+=SIZEOF_SHORT
) {
677 w_short(num
& SHORTMASK
, arg
);
679 if (len
== 0 && num
== 0) break;
690 w_uclass(obj
, rb_cString
, arg
);
691 w_byte(TYPE_STRING
, arg
);
692 w_bytes(RSTRING_PTR(obj
), RSTRING_LEN(obj
), arg
);
696 w_uclass(obj
, rb_cRegexp
, arg
);
697 w_byte(TYPE_REGEXP
, arg
);
699 int opts
= rb_reg_options(obj
);
700 w_bytes(RREGEXP_SRC_PTR(obj
), RREGEXP_SRC_LEN(obj
), arg
);
701 w_byte((char)opts
, arg
);
706 w_uclass(obj
, rb_cArray
, arg
);
707 w_byte(TYPE_ARRAY
, arg
);
709 long i
, len
= RARRAY_LEN(obj
);
712 for (i
=0; i
<RARRAY_LEN(obj
); i
++) {
713 w_object(RARRAY_PTR(obj
)[i
], arg
, limit
);
714 if (len
!= RARRAY_LEN(obj
)) {
715 rb_raise(rb_eRuntimeError
, "array modified during dump");
722 w_uclass(obj
, rb_cHash
, arg
);
723 if (NIL_P(RHASH(obj
)->ifnone
)) {
724 w_byte(TYPE_HASH
, arg
);
726 else if (FL_TEST(obj
, FL_USER2
)) {
727 /* FL_USER2 means HASH_PROC_DEFAULT (see hash.c) */
728 rb_raise(rb_eTypeError
, "can't dump hash with default proc");
731 w_byte(TYPE_HASH_DEF
, arg
);
733 w_long(RHASH_SIZE(obj
), arg
);
734 rb_hash_foreach(obj
, hash_each
, (st_data_t
)&c_arg
);
735 if (!NIL_P(RHASH(obj
)->ifnone
)) {
736 w_object(RHASH(obj
)->ifnone
, arg
, limit
);
741 w_class(TYPE_STRUCT
, obj
, arg
, Qtrue
);
743 long len
= RSTRUCT_LEN(obj
);
748 mem
= rb_struct_members(obj
);
749 for (i
=0; i
<len
; i
++) {
750 w_symbol(SYM2ID(RARRAY_PTR(mem
)[i
]), arg
);
751 w_object(RSTRUCT_PTR(obj
)[i
], arg
, limit
);
757 w_class(TYPE_OBJECT
, obj
, arg
, Qtrue
);
758 w_objivar(obj
, &c_arg
);
765 if (!rb_respond_to(obj
, s_dump_data
)) {
766 rb_raise(rb_eTypeError
,
767 "no marshal_dump is defined for class %s",
768 rb_obj_classname(obj
));
770 v
= rb_funcall(obj
, s_dump_data
, 0);
772 w_class(TYPE_DATA
, obj
, arg
, Qtrue
);
773 w_object(v
, arg
, limit
);
778 rb_raise(rb_eTypeError
, "can't dump %s",
779 rb_obj_classname(obj
));
784 w_ivar(obj
, ivtbl
, &c_arg
);
789 dump(struct dump_call_arg
*arg
)
791 w_object(arg
->obj
, arg
->arg
, arg
->limit
);
792 if (arg
->arg
->dest
) {
793 rb_io_write(arg
->arg
->dest
, arg
->arg
->str
);
794 rb_str_resize(arg
->arg
->str
, 0);
800 dump_ensure(struct dump_arg
*arg
)
802 if (!DATA_PTR(arg
->wrapper
)) return 0;
803 st_free_table(arg
->symbols
);
804 st_free_table(arg
->data
);
805 st_free_table(arg
->compat_tbl
);
806 if (arg
->encodings
) st_free_table(arg
->encodings
);
807 DATA_PTR(arg
->wrapper
) = 0;
817 * dump( obj [, anIO] , limit=--1 ) => anIO
819 * Serializes obj and all descendent objects. If anIO is
820 * specified, the serialized data will be written to it, otherwise the
821 * data will be returned as a String. If limit is specified, the
822 * traversal of subobjects will be limited to that depth. If limit is
823 * negative, no checking of depth will be performed.
826 * def initialize(str)
834 * (produces no output)
836 * o = Klass.new("hello\n")
837 * data = Marshal.dump(o)
838 * obj = Marshal.load(data)
839 * obj.sayHello #=> "hello\n"
842 marshal_dump(int argc
, VALUE
*argv
)
844 VALUE obj
, port
, a1
, a2
;
847 struct dump_call_arg c_arg
;
850 rb_scan_args(argc
, argv
, "12", &obj
, &a1
, &a2
);
852 if (!NIL_P(a2
)) limit
= NUM2INT(a2
);
853 if (NIL_P(a1
)) goto type_error
;
856 else if (argc
== 2) {
857 if (FIXNUM_P(a1
)) limit
= FIX2INT(a1
);
858 else if (NIL_P(a1
)) goto type_error
;
863 if (!rb_respond_to(port
, s_write
)) {
865 rb_raise(rb_eTypeError
, "instance of IO needed");
867 arg
.str
= rb_str_buf_new(0);
869 if (rb_respond_to(port
, s_binmode
)) {
870 rb_funcall2(port
, s_binmode
, 0, 0);
874 port
= rb_str_buf_new(0);
878 arg
.symbols
= st_init_numtable();
879 arg
.data
= st_init_numtable();
881 arg
.compat_tbl
= st_init_numtable();
882 arg
.wrapper
= Data_Wrap_Struct(rb_cData
, mark_dump_arg
, 0, &arg
);
888 w_byte(MARSHAL_MAJOR
, &arg
);
889 w_byte(MARSHAL_MINOR
, &arg
);
891 rb_ensure(dump
, (VALUE
)&c_arg
, dump_ensure
, (VALUE
)&arg
);
903 st_table
*compat_tbl
;
904 VALUE compat_tbl_wrapper
;
908 check_load_arg(struct load_arg
*arg
)
910 if (!DATA_PTR(arg
->compat_tbl_wrapper
)) {
911 rb_raise(rb_eRuntimeError
, "Marshal.load reentered");
915 static VALUE
r_entry(VALUE v
, struct load_arg
*arg
);
916 static VALUE
r_object(struct load_arg
*arg
);
917 static VALUE
path2class(const char *path
);
920 r_byte(struct load_arg
*arg
)
924 if (TYPE(arg
->src
) == T_STRING
) {
925 if (RSTRING_LEN(arg
->src
) > arg
->offset
) {
926 c
= (unsigned char)RSTRING_PTR(arg
->src
)[arg
->offset
++];
929 rb_raise(rb_eArgError
, "marshal data too short");
933 VALUE src
= arg
->src
;
934 VALUE v
= rb_funcall2(src
, s_getbyte
, 0, 0);
936 if (NIL_P(v
)) rb_eof_error();
937 c
= (unsigned char)NUM2CHR(v
);
943 long_toobig(int size
)
945 rb_raise(rb_eTypeError
, "long too big for this architecture (size "
946 STRINGIZE(SIZEOF_LONG
)", given %d)", size
);
949 #undef SIGN_EXTEND_CHAR
951 # define SIGN_EXTEND_CHAR(c) ((signed char)(c))
952 #else /* not __STDC__ */
953 /* As in Harbison and Steele. */
954 # define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
958 r_long(struct load_arg
*arg
)
961 int c
= SIGN_EXTEND_CHAR(r_byte(arg
));
964 if (c
== 0) return 0;
966 if (4 < c
&& c
< 128) {
969 if (c
> sizeof(long)) long_toobig(c
);
972 x
|= (long)r_byte(arg
) << (8*i
);
976 if (-129 < c
&& c
< -4) {
980 if (c
> sizeof(long)) long_toobig(c
);
983 x
&= ~((long)0xff << (8*i
));
984 x
|= (long)r_byte(arg
) << (8*i
);
990 #define r_bytes(arg) r_bytes0(r_long(arg), (arg))
993 r_bytes0(long len
, struct load_arg
*arg
)
997 if (len
== 0) return rb_str_new(0, 0);
998 if (TYPE(arg
->src
) == T_STRING
) {
999 if (RSTRING_LEN(arg
->src
) - arg
->offset
>= len
) {
1000 str
= rb_str_new(RSTRING_PTR(arg
->src
)+arg
->offset
, len
);
1005 rb_raise(rb_eArgError
, "marshal data too short");
1009 VALUE src
= arg
->src
;
1010 VALUE n
= LONG2NUM(len
);
1011 str
= rb_funcall2(src
, s_read
, 1, &n
);
1012 check_load_arg(arg
);
1013 if (NIL_P(str
)) goto too_short
;
1015 if (RSTRING_LEN(str
) != len
) goto too_short
;
1016 if (OBJ_TAINTED(str
)) arg
->taint
= Qtrue
;
1022 r_symlink(struct load_arg
*arg
)
1025 long num
= r_long(arg
);
1027 if (st_lookup(arg
->symbols
, num
, &id
)) {
1030 rb_raise(rb_eArgError
, "bad symbol");
1034 r_symreal(struct load_arg
*arg
)
1036 volatile VALUE s
= r_bytes(arg
);
1037 ID id
= rb_intern(RSTRING_PTR(s
));
1039 st_insert(arg
->symbols
, arg
->symbols
->num_entries
, id
);
1045 r_symbol(struct load_arg
*arg
)
1049 switch ((type
= r_byte(arg
))) {
1051 return r_symreal(arg
);
1053 return r_symlink(arg
);
1055 rb_raise(rb_eArgError
, "dump format error(0x%x)", type
);
1061 r_unique(struct load_arg
*arg
)
1063 return rb_id2name(r_symbol(arg
));
1067 r_string(struct load_arg
*arg
)
1069 return r_bytes(arg
);
1073 r_entry(VALUE v
, struct load_arg
*arg
)
1075 st_data_t real_obj
= (VALUE
)Qundef
;
1076 if (st_lookup(arg
->compat_tbl
, v
, &real_obj
)) {
1077 rb_hash_aset(arg
->data
, INT2FIX(RHASH_SIZE(arg
->data
)), (VALUE
)real_obj
);
1080 rb_hash_aset(arg
->data
, INT2FIX(RHASH_SIZE(arg
->data
)), v
);
1084 if ((VALUE
)real_obj
!= Qundef
)
1085 OBJ_TAINT((VALUE
)real_obj
);
1091 r_leave(VALUE v
, struct load_arg
*arg
)
1094 if (st_lookup(arg
->compat_tbl
, v
, &data
)) {
1095 VALUE real_obj
= (VALUE
)data
;
1096 rb_alloc_func_t allocator
= rb_get_alloc_func(CLASS_OF(real_obj
));
1098 if (st_lookup(compat_allocator_tbl
, (st_data_t
)allocator
, &data
)) {
1099 marshal_compat_t
*compat
= (marshal_compat_t
*)data
;
1100 compat
->loader(real_obj
, v
);
1102 st_delete(arg
->compat_tbl
, &key
, 0);
1106 v
= rb_funcall(arg
->proc
, rb_intern("call"), 1, v
);
1107 check_load_arg(arg
);
1113 r_ivar(VALUE obj
, struct load_arg
*arg
)
1120 ID id
= r_symbol(arg
);
1121 VALUE val
= r_object(arg
);
1122 if (id
== rb_id_encoding()) {
1123 int idx
= rb_enc_find_index(StringValueCStr(val
));
1124 if (idx
> 0) rb_enc_associate_index(obj
, idx
);
1127 rb_ivar_set(obj
, id
, val
);
1134 path2class(const char *path
)
1136 VALUE v
= rb_path2class(path
);
1138 if (TYPE(v
) != T_CLASS
) {
1139 rb_raise(rb_eArgError
, "%s does not refer class", path
);
1145 path2module(const char *path
)
1147 VALUE v
= rb_path2class(path
);
1149 if (TYPE(v
) != T_MODULE
) {
1150 rb_raise(rb_eArgError
, "%s does not refer module", path
);
1156 obj_alloc_by_path(const char *path
, struct load_arg
*arg
)
1160 rb_alloc_func_t allocator
;
1162 klass
= path2class(path
);
1164 allocator
= rb_get_alloc_func(klass
);
1165 if (st_lookup(compat_allocator_tbl
, (st_data_t
)allocator
, &data
)) {
1166 marshal_compat_t
*compat
= (marshal_compat_t
*)data
;
1167 VALUE real_obj
= rb_obj_alloc(klass
);
1168 VALUE obj
= rb_obj_alloc(compat
->oldclass
);
1169 st_insert(arg
->compat_tbl
, (st_data_t
)obj
, (st_data_t
)real_obj
);
1173 return rb_obj_alloc(klass
);
1177 r_object0(struct load_arg
*arg
, int *ivp
, VALUE extmod
)
1180 int type
= r_byte(arg
);
1186 v
= rb_hash_aref(arg
->data
, LONG2FIX(id
));
1187 check_load_arg(arg
);
1189 rb_raise(rb_eArgError
, "dump format error (unlinked)");
1192 v
= rb_funcall(arg
->proc
, rb_intern("call"), 1, v
);
1193 check_load_arg(arg
);
1201 v
= r_object0(arg
, &ivar
, extmod
);
1202 if (ivar
) r_ivar(v
, arg
);
1208 VALUE m
= path2module(r_unique(arg
));
1210 if (NIL_P(extmod
)) extmod
= rb_ary_new2(0);
1211 rb_ary_push(extmod
, m
);
1213 v
= r_object0(arg
, 0, extmod
);
1214 while (RARRAY_LEN(extmod
) > 0) {
1215 m
= rb_ary_pop(extmod
);
1216 rb_extend_object(v
, m
);
1223 VALUE c
= path2class(r_unique(arg
));
1225 if (FL_TEST(c
, FL_SINGLETON
)) {
1226 rb_raise(rb_eTypeError
, "singleton can't be loaded");
1228 v
= r_object0(arg
, 0, extmod
);
1229 if (rb_special_const_p(v
) || TYPE(v
) == T_OBJECT
|| TYPE(v
) == T_CLASS
) {
1231 rb_raise(rb_eArgError
, "dump format error (user class)");
1233 if (TYPE(v
) == T_MODULE
|| !RTEST(rb_class_inherited_p(c
, RBASIC(v
)->klass
))) {
1234 VALUE tmp
= rb_obj_alloc(c
);
1236 if (TYPE(v
) != TYPE(tmp
)) goto format_error
;
1238 RBASIC(v
)->klass
= c
;
1244 v
= r_leave(v
, arg
);
1249 v
= r_leave(v
, arg
);
1254 v
= r_leave(v
, arg
);
1259 long i
= r_long(arg
);
1262 v
= r_leave(v
, arg
);
1268 VALUE str
= r_bytes(arg
);
1269 const char *ptr
= RSTRING_PTR(str
);
1271 if (strcmp(ptr
, "nan") == 0) {
1274 else if (strcmp(ptr
, "inf") == 0) {
1277 else if (strcmp(ptr
, "-inf") == 0) {
1282 d
= strtod(ptr
, &e
);
1283 d
= load_mantissa(d
, e
, RSTRING_LEN(str
) - (e
- ptr
));
1286 v
= r_entry(v
, arg
);
1287 v
= r_leave(v
, arg
);
1295 volatile VALUE data
;
1297 NEWOBJ(big
, struct RBignum
);
1298 OBJSETUP(big
, rb_cBignum
, T_BIGNUM
);
1299 RBIGNUM_SET_SIGN(big
, (r_byte(arg
) == '+'));
1301 data
= r_bytes0(len
* 2, arg
);
1302 #if SIZEOF_BDIGITS == SIZEOF_SHORT
1303 rb_big_resize((VALUE
)big
, len
);
1305 rb_big_resize((VALUE
)big
, (len
+ 1) * 2 / sizeof(BDIGIT
));
1307 digits
= RBIGNUM_DIGITS(big
);
1308 MEMCPY(digits
, RSTRING_PTR(data
), char, len
* 2);
1309 #if SIZEOF_BDIGITS > SIZEOF_SHORT
1310 MEMZERO((char *)digits
+ len
* 2, char,
1311 RBIGNUM_LEN(big
) * sizeof(BDIGIT
) - len
* 2);
1313 len
= RBIGNUM_LEN(big
);
1315 unsigned char *p
= (unsigned char *)digits
;
1317 #if SIZEOF_BDIGITS > SIZEOF_SHORT
1321 for (i
=0; i
<SIZEOF_BDIGITS
; i
++) {
1322 num
|= (int)p
[i
] << shift
;
1326 num
= p
[0] | (p
[1] << 8);
1331 v
= rb_big_norm((VALUE
)big
);
1332 v
= r_entry(v
, arg
);
1333 v
= r_leave(v
, arg
);
1338 v
= r_entry(r_string(arg
), arg
);
1339 v
= r_leave(v
, arg
);
1344 volatile VALUE str
= r_bytes(arg
);
1345 int options
= r_byte(arg
);
1346 v
= r_entry(rb_reg_new_str(str
, options
), arg
);
1347 v
= r_leave(v
, arg
);
1353 volatile long len
= r_long(arg
); /* gcc 2.7.2.3 -O2 bug?? */
1355 v
= rb_ary_new2(len
);
1356 v
= r_entry(v
, arg
);
1358 rb_ary_push(v
, r_object(arg
));
1360 v
= r_leave(v
, arg
);
1367 long len
= r_long(arg
);
1370 v
= r_entry(v
, arg
);
1372 VALUE key
= r_object(arg
);
1373 VALUE value
= r_object(arg
);
1374 rb_hash_aset(v
, key
, value
);
1376 if (type
== TYPE_HASH_DEF
) {
1377 RHASH(v
)->ifnone
= r_object(arg
);
1379 v
= r_leave(v
, arg
);
1387 volatile long i
; /* gcc 2.7.2.3 -O2 bug?? */
1391 klass
= path2class(r_unique(arg
));
1394 v
= rb_obj_alloc(klass
);
1395 if (TYPE(v
) != T_STRUCT
) {
1396 rb_raise(rb_eTypeError
, "class %s not a struct", rb_class2name(klass
));
1398 mem
= rb_struct_s_members(klass
);
1399 if (RARRAY_LEN(mem
) != len
) {
1400 rb_raise(rb_eTypeError
, "struct %s not compatible (struct size differs)",
1401 rb_class2name(klass
));
1404 v
= r_entry(v
, arg
);
1405 values
= rb_ary_new2(len
);
1406 for (i
=0; i
<len
; i
++) {
1407 slot
= r_symbol(arg
);
1409 if (RARRAY_PTR(mem
)[i
] != ID2SYM(slot
)) {
1410 rb_raise(rb_eTypeError
, "struct %s not compatible (:%s for :%s)",
1411 rb_class2name(klass
),
1413 rb_id2name(SYM2ID(RARRAY_PTR(mem
)[i
])));
1415 rb_ary_push(values
, r_object(arg
));
1417 rb_struct_initialize(v
, values
);
1418 v
= r_leave(v
, arg
);
1424 VALUE klass
= path2class(r_unique(arg
));
1427 if (!rb_respond_to(klass
, s_load
)) {
1428 rb_raise(rb_eTypeError
, "class %s needs to have method `_load'",
1429 rb_class2name(klass
));
1431 data
= r_string(arg
);
1436 v
= rb_funcall(klass
, s_load
, 1, data
);
1437 check_load_arg(arg
);
1438 v
= r_entry(v
, arg
);
1439 v
= r_leave(v
, arg
);
1443 case TYPE_USRMARSHAL
:
1445 VALUE klass
= path2class(r_unique(arg
));
1448 v
= rb_obj_alloc(klass
);
1449 if (!NIL_P(extmod
)) {
1450 while (RARRAY_LEN(extmod
) > 0) {
1451 VALUE m
= rb_ary_pop(extmod
);
1452 rb_extend_object(v
, m
);
1455 if (!rb_respond_to(v
, s_mload
)) {
1456 rb_raise(rb_eTypeError
, "instance of %s needs to have method `marshal_load'",
1457 rb_class2name(klass
));
1459 v
= r_entry(v
, arg
);
1460 data
= r_object(arg
);
1461 rb_funcall(v
, s_mload
, 1, data
);
1462 check_load_arg(arg
);
1463 v
= r_leave(v
, arg
);
1469 v
= obj_alloc_by_path(r_unique(arg
), arg
);
1470 if (TYPE(v
) != T_OBJECT
) {
1471 rb_raise(rb_eArgError
, "dump format error");
1473 v
= r_entry(v
, arg
);
1475 v
= r_leave(v
, arg
);
1481 VALUE klass
= path2class(r_unique(arg
));
1482 if (rb_respond_to(klass
, s_alloc
)) {
1483 static int warn
= Qtrue
;
1485 rb_warn("define `allocate' instead of `_alloc'");
1488 v
= rb_funcall(klass
, s_alloc
, 0);
1489 check_load_arg(arg
);
1492 v
= rb_obj_alloc(klass
);
1494 if (TYPE(v
) != T_DATA
) {
1495 rb_raise(rb_eArgError
, "dump format error");
1497 v
= r_entry(v
, arg
);
1498 if (!rb_respond_to(v
, s_load_data
)) {
1499 rb_raise(rb_eTypeError
,
1500 "class %s needs to have instance method `_load_data'",
1501 rb_class2name(klass
));
1503 rb_funcall(v
, s_load_data
, 1, r_object0(arg
, 0, extmod
));
1504 check_load_arg(arg
);
1505 v
= r_leave(v
, arg
);
1509 case TYPE_MODULE_OLD
:
1511 volatile VALUE str
= r_bytes(arg
);
1513 v
= rb_path2class(RSTRING_PTR(str
));
1514 v
= r_entry(v
, arg
);
1515 v
= r_leave(v
, arg
);
1521 volatile VALUE str
= r_bytes(arg
);
1523 v
= path2class(RSTRING_PTR(str
));
1524 v
= r_entry(v
, arg
);
1525 v
= r_leave(v
, arg
);
1531 volatile VALUE str
= r_bytes(arg
);
1533 v
= path2module(RSTRING_PTR(str
));
1534 v
= r_entry(v
, arg
);
1535 v
= r_leave(v
, arg
);
1540 v
= ID2SYM(r_symreal(arg
));
1541 v
= r_leave(v
, arg
);
1545 v
= ID2SYM(r_symlink(arg
));
1549 rb_raise(rb_eArgError
, "dump format error(0x%x)", type
);
1556 r_object(struct load_arg
*arg
)
1558 return r_object0(arg
, 0, Qnil
);
1562 load(struct load_arg
*arg
)
1564 return r_object(arg
);
1568 load_ensure(struct load_arg
*arg
)
1570 if (!DATA_PTR(arg
->compat_tbl_wrapper
)) return 0;
1571 st_free_table(arg
->symbols
);
1572 st_free_table(arg
->compat_tbl
);
1573 DATA_PTR(arg
->compat_tbl_wrapper
) = 0;
1574 arg
->compat_tbl_wrapper
= 0;
1580 * load( source [, proc] ) => obj
1581 * restore( source [, proc] ) => obj
1583 * Returns the result of converting the serialized data in source into a
1584 * Ruby object (possibly with associated subordinate objects). source
1585 * may be either an instance of IO or an object that responds to
1586 * to_str. If proc is specified, it will be passed each object as it
1590 marshal_load(int argc
, VALUE
*argv
)
1595 struct load_arg arg
;
1597 rb_scan_args(argc
, argv
, "11", &port
, &proc
);
1598 v
= rb_check_string_type(port
);
1600 arg
.taint
= OBJ_TAINTED(port
); /* original taintedness */
1603 else if (rb_respond_to(port
, s_getbyte
) && rb_respond_to(port
, s_read
)) {
1604 if (rb_respond_to(port
, s_binmode
)) {
1605 rb_funcall2(port
, s_binmode
, 0, 0);
1610 rb_raise(rb_eTypeError
, "instance of IO needed");
1614 arg
.compat_tbl
= st_init_numtable();
1615 arg
.compat_tbl_wrapper
= Data_Wrap_Struct(rb_cData
, rb_mark_tbl
, 0, arg
.compat_tbl
);
1617 major
= r_byte(&arg
);
1618 minor
= r_byte(&arg
);
1619 if (major
!= MARSHAL_MAJOR
|| minor
> MARSHAL_MINOR
) {
1620 rb_raise(rb_eTypeError
, "incompatible marshal file format (can't be read)\n\
1621 \tformat version %d.%d required; %d.%d given",
1622 MARSHAL_MAJOR
, MARSHAL_MINOR
, major
, minor
);
1624 if (RTEST(ruby_verbose
) && minor
!= MARSHAL_MINOR
) {
1625 rb_warn("incompatible marshal file format (can be read)\n\
1626 \tformat version %d.%d required; %d.%d given",
1627 MARSHAL_MAJOR
, MARSHAL_MINOR
, major
, minor
);
1630 arg
.symbols
= st_init_numtable();
1631 arg
.data
= rb_hash_new();
1632 RBASIC(arg
.data
)->klass
= 0;
1633 if (NIL_P(proc
)) arg
.proc
= 0;
1634 else arg
.proc
= proc
;
1635 v
= rb_ensure(load
, (VALUE
)&arg
, load_ensure
, (VALUE
)&arg
);
1641 * The marshaling library converts collections of Ruby objects into a
1642 * byte stream, allowing them to be stored outside the currently
1643 * active script. This data may subsequently be read and the original
1644 * objects reconstituted.
1645 * Marshaled data has major and minor version numbers stored along
1646 * with the object information. In normal use, marshaling can only
1647 * load data written with the same major version number and an equal
1648 * or lower minor version number. If Ruby's ``verbose'' flag is set
1649 * (normally using -d, -v, -w, or --verbose) the major and minor
1650 * numbers must match exactly. Marshal versioning is independent of
1651 * Ruby's version numbers. You can extract the version by reading the
1652 * first two bytes of marshaled data.
1654 * str = Marshal.dump("thing")
1655 * RUBY_VERSION #=> "1.9.0"
1659 * Some objects cannot be dumped: if the objects to be dumped include
1660 * bindings, procedure or method objects, instances of class IO, or
1661 * singleton objects, a TypeError will be raised.
1662 * If your class has special serialization needs (for example, if you
1663 * want to serialize in some specific format), or if it contains
1664 * objects that would otherwise not be serializable, you can implement
1665 * your own serialization strategy by defining two methods, _dump and
1667 * The instance method _dump should return a String object containing
1668 * all the information necessary to reconstitute objects of this class
1669 * and all referenced objects up to a maximum depth given as an integer
1670 * parameter (a value of -1 implies that you should disable depth checking).
1671 * The class method _load should take a String and return an object of this class.
1678 VALUE rb_mMarshal
= rb_define_module("Marshal");
1680 s_dump
= rb_intern("_dump");
1681 s_load
= rb_intern("_load");
1682 s_mdump
= rb_intern("marshal_dump");
1683 s_mload
= rb_intern("marshal_load");
1684 s_dump_data
= rb_intern("_dump_data");
1685 s_load_data
= rb_intern("_load_data");
1686 s_alloc
= rb_intern("_alloc");
1687 s_getbyte
= rb_intern("getbyte");
1688 s_read
= rb_intern("read");
1689 s_write
= rb_intern("write");
1690 s_binmode
= rb_intern("binmode");
1692 rb_define_module_function(rb_mMarshal
, "dump", marshal_dump
, -1);
1693 rb_define_module_function(rb_mMarshal
, "load", marshal_load
, -1);
1694 rb_define_module_function(rb_mMarshal
, "restore", marshal_load
, -1);
1696 rb_define_const(rb_mMarshal
, "MAJOR_VERSION", INT2FIX(MARSHAL_MAJOR
));
1697 rb_define_const(rb_mMarshal
, "MINOR_VERSION", INT2FIX(MARSHAL_MINOR
));
1699 compat_allocator_tbl
= st_init_numtable();
1700 rb_gc_register_address(&compat_allocator_tbl_wrapper
);
1701 compat_allocator_tbl_wrapper
=
1702 Data_Wrap_Struct(rb_cData
, mark_marshal_compat_t
, 0, compat_allocator_tbl
);
1706 rb_marshal_dump(VALUE obj
, VALUE port
)
1713 if (!NIL_P(port
)) argc
= 2;
1714 return marshal_dump(argc
, argv
);
1718 rb_marshal_load(VALUE port
)
1720 return marshal_load(1, &port
);