1 /**********************************************************************
6 created at: Thu Apr 27 16:30:01 JST 1995
8 Copyright (C) 1993-2007 Yukihiro Matsumoto
10 **********************************************************************/
12 #include "ruby/ruby.h"
15 #include "ruby/util.h"
16 #include "ruby/encoding.h"
26 #define BITSPERSHORT (2*CHAR_BIT)
27 #define SHORTMASK ((1<<BITSPERSHORT)-1)
28 #define SHORTDN(x) RSHIFT(x,BITSPERSHORT)
30 #if SIZEOF_SHORT == SIZEOF_BDIGITS
31 #define SHORTLEN(x) (x)
34 shortlen(long len
, BDIGIT
*ds
)
44 return (len
- 1)*sizeof(BDIGIT
)/2 + offset
;
46 #define SHORTLEN(x) shortlen((x),d)
49 #define MARSHAL_MAJOR 4
50 #define MARSHAL_MINOR 8
54 #define TYPE_FALSE 'F'
55 #define TYPE_FIXNUM 'i'
57 #define TYPE_EXTENDED 'e'
58 #define TYPE_UCLASS 'C'
59 #define TYPE_OBJECT 'o'
61 #define TYPE_USERDEF 'u'
62 #define TYPE_USRMARSHAL 'U'
63 #define TYPE_FLOAT 'f'
64 #define TYPE_BIGNUM 'l'
65 #define TYPE_STRING '"'
66 #define TYPE_REGEXP '/'
67 #define TYPE_ARRAY '['
69 #define TYPE_HASH_DEF '}'
70 #define TYPE_STRUCT 'S'
71 #define TYPE_MODULE_OLD 'M'
72 #define TYPE_CLASS 'c'
73 #define TYPE_MODULE 'm'
75 #define TYPE_SYMBOL ':'
76 #define TYPE_SYMLINK ';'
81 static ID s_dump
, s_load
, s_mdump
, s_mload
;
82 static ID s_dump_data
, s_load_data
, s_alloc
;
83 static ID s_getbyte
, s_read
, s_write
, s_binmode
;
85 ID
rb_id_encoding(void);
90 VALUE (*dumper
)(VALUE
);
91 VALUE (*loader
)(VALUE
, VALUE
);
94 static st_table
*compat_allocator_tbl
;
95 static VALUE compat_allocator_tbl_wrapper
;
98 mark_marshal_compat_i(st_data_t key
, st_data_t value
)
100 marshal_compat_t
*p
= (marshal_compat_t
*)value
;
101 rb_gc_mark(p
->newclass
);
102 rb_gc_mark(p
->oldclass
);
107 mark_marshal_compat_t(void *tbl
)
110 st_foreach(tbl
, mark_marshal_compat_i
, 0);
114 rb_marshal_define_compat(VALUE newclass
, VALUE oldclass
, VALUE (*dumper
)(VALUE
), VALUE (*loader
)(VALUE
, VALUE
))
116 marshal_compat_t
*compat
;
117 rb_alloc_func_t allocator
= rb_get_alloc_func(newclass
);
120 rb_raise(rb_eTypeError
, "no allocator");
123 compat
= ALLOC(marshal_compat_t
);
124 compat
->newclass
= Qnil
;
125 compat
->oldclass
= Qnil
;
126 compat
->newclass
= newclass
;
127 compat
->oldclass
= oldclass
;
128 compat
->dumper
= dumper
;
129 compat
->loader
= loader
;
131 st_insert(compat_allocator_tbl
, (st_data_t
)allocator
, (st_data_t
)compat
);
140 st_table
*compat_tbl
;
145 struct dump_call_arg
{
147 struct dump_arg
*arg
;
152 check_dump_arg(struct dump_arg
*arg
)
154 if (!DATA_PTR(arg
->wrapper
)) {
155 rb_raise(rb_eRuntimeError
, "Marshal.dump reentered");
160 mark_dump_arg(void *ptr
)
162 struct dump_arg
*p
= ptr
;
165 rb_mark_set(p
->data
);
166 rb_mark_hash(p
->compat_tbl
);
170 class2path(VALUE klass
)
172 VALUE path
= rb_class_path(klass
);
173 char *n
= RSTRING_PTR(path
);
176 rb_raise(rb_eTypeError
, "can't dump anonymous %s %s",
177 (TYPE(klass
) == T_CLASS
? "class" : "module"),
180 if (rb_path2class(n
) != rb_class_real(klass
)) {
181 rb_raise(rb_eTypeError
, "%s can't be referred", n
);
186 static void w_long(long, struct dump_arg
*);
189 w_nbyte(const char *s
, int n
, struct dump_arg
*arg
)
191 VALUE buf
= arg
->str
;
192 rb_str_buf_cat(buf
, s
, n
);
193 if (arg
->dest
&& RSTRING_LEN(buf
) >= BUFSIZ
) {
194 if (arg
->taint
) OBJ_TAINT(buf
);
195 rb_io_write(arg
->dest
, buf
);
196 rb_str_resize(buf
, 0);
201 w_byte(char c
, struct dump_arg
*arg
)
207 w_bytes(const char *s
, int n
, struct dump_arg
*arg
)
214 w_short(int x
, struct dump_arg
*arg
)
216 w_byte((char)((x
>> 0) & 0xff), arg
);
217 w_byte((char)((x
>> 8) & 0xff), arg
);
221 w_long(long x
, struct dump_arg
*arg
)
223 char buf
[sizeof(long)+1];
227 if (!(RSHIFT(x
, 31) == 0 || RSHIFT(x
, 31) == -1)) {
228 /* big long does not fit in 4 bytes */
229 rb_raise(rb_eTypeError
, "long too big to dump");
237 if (0 < x
&& x
< 123) {
238 w_byte((char)(x
+ 5), arg
);
241 if (-124 < x
&& x
< 0) {
242 w_byte((char)((x
- 5)&0xff), arg
);
245 for (i
=1;i
<sizeof(long)+1;i
++) {
258 for (i
=0;i
<=len
;i
++) {
264 #define DECIMAL_MANT (53-16) /* from IEEE754 double precision */
266 #if DBL_MANT_DIG > 32
268 #elif DBL_MANT_DIG > 24
270 #elif DBL_MANT_DIG > 16
277 save_mantissa(double d
, char *buf
)
283 d
= modf(ldexp(frexp(fabs(d
), &e
), DECIMAL_MANT
), &d
);
287 d
= modf(ldexp(d
, MANT_BITS
), &n
);
288 m
= (unsigned long)n
;
300 while (!buf
[i
- 1]) --i
;
306 load_mantissa(double d
, const char *buf
, int len
)
308 if (--len
> 0 && !*buf
++) { /* binary mantissa mark */
309 int e
, s
= d
< 0, dig
= 0;
312 modf(ldexp(frexp(fabs(d
), &e
), DECIMAL_MANT
), &d
);
316 default: m
= *buf
++ & 0xff;
318 case 3: m
= (m
<< 8) | (*buf
++ & 0xff);
321 case 2: m
= (m
<< 8) | (*buf
++ & 0xff);
324 case 1: m
= (m
<< 8) | (*buf
++ & 0xff);
327 dig
-= len
< MANT_BITS
/ 8 ? 8 * (unsigned)len
: MANT_BITS
;
328 d
+= ldexp((double)m
, dig
);
329 } while ((len
-= MANT_BITS
/ 8) > 0);
330 d
= ldexp(d
, e
- DECIMAL_MANT
);
336 #define load_mantissa(d, buf, len) (d)
337 #define save_mantissa(d, buf) 0
341 #define FLOAT_DIG (DBL_DIG+2)
347 w_float(double d
, struct dump_arg
*arg
)
349 char buf
[FLOAT_DIG
+ (DECIMAL_MANT
+ 7) / 8 + 10];
352 if (d
< 0) strcpy(buf
, "-inf");
353 else strcpy(buf
, "inf");
359 if (1.0/d
< 0) strcpy(buf
, "-0");
360 else strcpy(buf
, "0");
365 /* xxx: should not use system's sprintf(3) */
366 snprintf(buf
, sizeof(buf
), "%.*g", FLOAT_DIG
, d
);
368 w_bytes(buf
, len
+ save_mantissa(d
, buf
+ len
), arg
);
371 w_bytes(buf
, strlen(buf
), arg
);
375 w_symbol(ID id
, struct dump_arg
*arg
)
380 if (st_lookup(arg
->symbols
, id
, &num
)) {
381 w_byte(TYPE_SYMLINK
, arg
);
382 w_long((long)num
, arg
);
385 sym
= rb_id2name(id
);
387 rb_raise(rb_eTypeError
, "can't dump anonymous ID %ld", id
);
389 w_byte(TYPE_SYMBOL
, arg
);
390 w_bytes(sym
, strlen(sym
), arg
);
391 st_add_direct(arg
->symbols
, id
, arg
->symbols
->num_entries
);
396 w_unique(const char *s
, struct dump_arg
*arg
)
399 rb_raise(rb_eTypeError
, "can't dump anonymous class %s", s
);
401 w_symbol(rb_intern(s
), arg
);
404 static void w_object(VALUE
,struct dump_arg
*,int);
407 hash_each(VALUE key
, VALUE value
, struct dump_call_arg
*arg
)
409 w_object(key
, arg
->arg
, arg
->limit
);
410 w_object(value
, arg
->arg
, arg
->limit
);
415 w_extended(VALUE klass
, struct dump_arg
*arg
, int check
)
419 if (check
&& FL_TEST(klass
, FL_SINGLETON
)) {
420 if (RCLASS_M_TBL(klass
)->num_entries
||
421 (RCLASS_IV_TBL(klass
) && RCLASS_IV_TBL(klass
)->num_entries
> 1)) {
422 rb_raise(rb_eTypeError
, "singleton can't be dumped");
424 klass
= RCLASS_SUPER(klass
);
426 while (BUILTIN_TYPE(klass
) == T_ICLASS
) {
427 path
= rb_class2name(RBASIC(klass
)->klass
);
428 w_byte(TYPE_EXTENDED
, arg
);
430 klass
= RCLASS_SUPER(klass
);
435 w_class(char type
, VALUE obj
, struct dump_arg
*arg
, int check
)
442 if (st_lookup(arg
->compat_tbl
, (st_data_t
)obj
, &real_obj
)) {
443 obj
= (VALUE
)real_obj
;
445 klass
= CLASS_OF(obj
);
446 w_extended(klass
, arg
, check
);
448 p
= class2path(rb_class_real(klass
));
449 path
= RSTRING_PTR(p
);
454 w_uclass(VALUE obj
, VALUE super
, struct dump_arg
*arg
)
456 VALUE klass
= CLASS_OF(obj
);
458 w_extended(klass
, arg
, Qtrue
);
459 klass
= rb_class_real(klass
);
460 if (klass
!= super
) {
461 w_byte(TYPE_UCLASS
, arg
);
462 w_unique(RSTRING_PTR(class2path(klass
)), arg
);
467 w_obj_each(ID id
, VALUE value
, struct dump_call_arg
*arg
)
469 if (id
== rb_id_encoding()) return ST_CONTINUE
;
470 w_symbol(id
, arg
->arg
);
471 w_object(value
, arg
->arg
, arg
->limit
);
476 w_encoding(VALUE obj
, long num
, struct dump_call_arg
*arg
)
478 int encidx
= rb_enc_get_index(obj
);
479 rb_encoding
*enc
= 0;
482 if (encidx
<= 0 || !(enc
= rb_enc_from_index(encidx
))) {
483 w_long(num
, arg
->arg
);
486 w_long(num
+ 1, arg
->arg
);
487 w_symbol(rb_id_encoding(), arg
->arg
);
489 if (!arg
->arg
->encodings
)
490 arg
->arg
->encodings
= st_init_strcasetable();
491 else if (st_lookup(arg
->arg
->encodings
, (st_data_t
)rb_enc_name(enc
), &name
))
493 name
= (st_data_t
)rb_str_new2(rb_enc_name(enc
));
494 st_insert(arg
->arg
->encodings
, (st_data_t
)rb_enc_name(enc
), name
);
496 w_object(name
, arg
->arg
, arg
->limit
);
500 w_ivar(VALUE obj
, st_table
*tbl
, struct dump_call_arg
*arg
)
502 long num
= tbl
? tbl
->num_entries
: 0;
504 w_encoding(obj
, num
, arg
);
506 st_foreach_safe(tbl
, w_obj_each
, (st_data_t
)arg
);
511 w_objivar(VALUE obj
, struct dump_call_arg
*arg
)
516 len
= ROBJECT_NUMIV(obj
);
517 ptr
= ROBJECT_IVPTR(obj
);
519 for (i
= 0; i
< len
; i
++)
520 if (ptr
[i
] != Qundef
)
523 w_encoding(obj
, num
, arg
);
525 rb_ivar_foreach(obj
, w_obj_each
, (st_data_t
)arg
);
530 w_object(VALUE obj
, struct dump_arg
*arg
, int limit
)
532 struct dump_call_arg c_arg
;
536 #define has_ivars(obj, ivtbl) ((ivtbl = rb_generic_ivar_table(obj)) != 0 || \
537 (!SPECIAL_CONST_P(obj) && !ENCODING_IS_ASCII8BIT(obj)))
540 rb_raise(rb_eArgError
, "exceed depth limit");
547 if (st_lookup(arg
->data
, obj
, &num
)) {
548 w_byte(TYPE_LINK
, arg
);
549 w_long((long)num
, arg
);
553 if ((hasiv
= has_ivars(obj
, ivtbl
)) != 0) {
554 w_byte(TYPE_IVAR
, arg
);
557 w_byte(TYPE_NIL
, arg
);
559 else if (obj
== Qtrue
) {
560 w_byte(TYPE_TRUE
, arg
);
562 else if (obj
== Qfalse
) {
563 w_byte(TYPE_FALSE
, arg
);
565 else if (FIXNUM_P(obj
)) {
567 w_byte(TYPE_FIXNUM
, arg
);
568 w_long(FIX2INT(obj
), arg
);
570 if (RSHIFT((long)obj
, 31) == 0 || RSHIFT((long)obj
, 31) == -1) {
571 w_byte(TYPE_FIXNUM
, arg
);
572 w_long(FIX2LONG(obj
), arg
);
575 w_object(rb_int2big(FIX2LONG(obj
)), arg
, limit
);
579 else if (SYMBOL_P(obj
)) {
580 w_symbol(SYM2ID(obj
), arg
);
583 if (OBJ_TAINTED(obj
)) arg
->taint
= Qtrue
;
585 if (rb_obj_respond_to(obj
, s_mdump
, Qtrue
)) {
588 st_add_direct(arg
->data
, obj
, arg
->data
->num_entries
);
590 v
= rb_funcall(obj
, s_mdump
, 0, 0);
592 w_class(TYPE_USRMARSHAL
, obj
, arg
, Qfalse
);
593 w_object(v
, arg
, limit
);
594 if (hasiv
) w_ivar(obj
, 0, &c_arg
);
597 if (rb_obj_respond_to(obj
, s_dump
, Qtrue
)) {
599 st_table
*ivtbl2
= 0;
602 v
= rb_funcall(obj
, s_dump
, 1, INT2NUM(limit
));
604 if (TYPE(v
) != T_STRING
) {
605 rb_raise(rb_eTypeError
, "_dump() must return string");
607 if ((hasiv2
= has_ivars(v
, ivtbl2
)) != 0 && !hasiv
) {
608 w_byte(TYPE_IVAR
, arg
);
610 w_class(TYPE_USERDEF
, obj
, arg
, Qfalse
);
611 w_bytes(RSTRING_PTR(v
), RSTRING_LEN(v
), arg
);
613 w_ivar(v
, ivtbl2
, &c_arg
);
616 w_ivar(obj
, ivtbl
, &c_arg
);
618 st_add_direct(arg
->data
, obj
, arg
->data
->num_entries
);
622 st_add_direct(arg
->data
, obj
, arg
->data
->num_entries
);
625 st_data_t compat_data
;
626 rb_alloc_func_t allocator
= rb_get_alloc_func(RBASIC(obj
)->klass
);
627 if (st_lookup(compat_allocator_tbl
,
628 (st_data_t
)allocator
,
630 marshal_compat_t
*compat
= (marshal_compat_t
*)compat_data
;
631 VALUE real_obj
= obj
;
632 obj
= compat
->dumper(real_obj
);
633 st_insert(arg
->compat_tbl
, (st_data_t
)obj
, (st_data_t
)real_obj
);
637 switch (BUILTIN_TYPE(obj
)) {
639 if (FL_TEST(obj
, FL_SINGLETON
)) {
640 rb_raise(rb_eTypeError
, "singleton class can't be dumped");
642 w_byte(TYPE_CLASS
, arg
);
644 volatile VALUE path
= class2path(obj
);
645 w_bytes(RSTRING_PTR(path
), RSTRING_LEN(path
), arg
);
650 w_byte(TYPE_MODULE
, arg
);
652 VALUE path
= class2path(obj
);
653 w_bytes(RSTRING_PTR(path
), RSTRING_LEN(path
), arg
);
658 w_byte(TYPE_FLOAT
, arg
);
659 w_float(RFLOAT_VALUE(obj
), arg
);
663 w_byte(TYPE_BIGNUM
, arg
);
665 char sign
= RBIGNUM_SIGN(obj
) ? '+' : '-';
666 long len
= RBIGNUM_LEN(obj
);
667 BDIGIT
*d
= RBIGNUM_DIGITS(obj
);
670 w_long(SHORTLEN(len
), arg
); /* w_short? */
672 #if SIZEOF_BDIGITS > SIZEOF_SHORT
676 for (i
=0; i
<SIZEOF_BDIGITS
; i
+=SIZEOF_SHORT
) {
677 w_short(num
& SHORTMASK
, arg
);
679 if (len
== 0 && num
== 0) break;
690 w_uclass(obj
, rb_cString
, arg
);
691 w_byte(TYPE_STRING
, arg
);
692 w_bytes(RSTRING_PTR(obj
), RSTRING_LEN(obj
), arg
);
696 w_uclass(obj
, rb_cRegexp
, arg
);
697 w_byte(TYPE_REGEXP
, arg
);
698 w_bytes(RREGEXP(obj
)->str
, RREGEXP(obj
)->len
, arg
);
699 w_byte((char)rb_reg_options(obj
), arg
);
703 w_uclass(obj
, rb_cArray
, arg
);
704 w_byte(TYPE_ARRAY
, arg
);
706 long len
= RARRAY_LEN(obj
);
707 VALUE
*ptr
= RARRAY_PTR(obj
);
711 w_object(*ptr
, arg
, limit
);
718 w_uclass(obj
, rb_cHash
, arg
);
719 if (NIL_P(RHASH(obj
)->ifnone
)) {
720 w_byte(TYPE_HASH
, arg
);
722 else if (FL_TEST(obj
, FL_USER2
)) {
723 /* FL_USER2 means HASH_PROC_DEFAULT (see hash.c) */
724 rb_raise(rb_eTypeError
, "can't dump hash with default proc");
727 w_byte(TYPE_HASH_DEF
, arg
);
729 w_long(RHASH_SIZE(obj
), arg
);
730 rb_hash_foreach(obj
, hash_each
, (st_data_t
)&c_arg
);
731 if (!NIL_P(RHASH(obj
)->ifnone
)) {
732 w_object(RHASH(obj
)->ifnone
, arg
, limit
);
737 w_class(TYPE_STRUCT
, obj
, arg
, Qtrue
);
739 long len
= RSTRUCT_LEN(obj
);
744 mem
= rb_struct_members(obj
);
745 for (i
=0; i
<len
; i
++) {
746 w_symbol(SYM2ID(RARRAY_PTR(mem
)[i
]), arg
);
747 w_object(RSTRUCT_PTR(obj
)[i
], arg
, limit
);
753 w_class(TYPE_OBJECT
, obj
, arg
, Qtrue
);
754 w_objivar(obj
, &c_arg
);
761 if (!rb_obj_respond_to(obj
, s_dump_data
, Qtrue
)) {
762 rb_raise(rb_eTypeError
,
763 "no marshal_dump is defined for class %s",
764 rb_obj_classname(obj
));
766 v
= rb_funcall(obj
, s_dump_data
, 0);
768 w_class(TYPE_DATA
, obj
, arg
, Qtrue
);
769 w_object(v
, arg
, limit
);
774 rb_raise(rb_eTypeError
, "can't dump %s",
775 rb_obj_classname(obj
));
780 w_ivar(obj
, ivtbl
, &c_arg
);
785 dump(struct dump_call_arg
*arg
)
787 w_object(arg
->obj
, arg
->arg
, arg
->limit
);
788 if (arg
->arg
->dest
) {
789 rb_io_write(arg
->arg
->dest
, arg
->arg
->str
);
790 rb_str_resize(arg
->arg
->str
, 0);
796 dump_ensure(struct dump_arg
*arg
)
798 if (!DATA_PTR(arg
->wrapper
)) return 0;
799 st_free_table(arg
->symbols
);
800 st_free_table(arg
->data
);
801 st_free_table(arg
->compat_tbl
);
802 DATA_PTR(arg
->wrapper
) = 0;
812 * dump( obj [, anIO] , limit=--1 ) => anIO
814 * Serializes obj and all descendent objects. If anIO is
815 * specified, the serialized data will be written to it, otherwise the
816 * data will be returned as a String. If limit is specified, the
817 * traversal of subobjects will be limited to that depth. If limit is
818 * negative, no checking of depth will be performed.
821 * def initialize(str)
829 * (produces no output)
831 * o = Klass.new("hello\n")
832 * data = Marshal.dump(o)
833 * obj = Marshal.load(data)
834 * obj.sayHello #=> "hello\n"
837 marshal_dump(int argc
, VALUE
*argv
)
839 VALUE obj
, port
, a1
, a2
;
842 struct dump_call_arg c_arg
;
845 rb_scan_args(argc
, argv
, "12", &obj
, &a1
, &a2
);
847 if (!NIL_P(a2
)) limit
= NUM2INT(a2
);
848 if (NIL_P(a1
)) goto type_error
;
851 else if (argc
== 2) {
852 if (FIXNUM_P(a1
)) limit
= FIX2INT(a1
);
853 else if (NIL_P(a1
)) goto type_error
;
858 if (!rb_obj_respond_to(port
, s_write
, Qtrue
)) {
860 rb_raise(rb_eTypeError
, "instance of IO needed");
862 arg
.str
= rb_str_buf_new(0);
864 if (rb_obj_respond_to(port
, s_binmode
, Qtrue
)) {
865 rb_funcall2(port
, s_binmode
, 0, 0);
869 port
= rb_str_buf_new(0);
873 arg
.symbols
= st_init_numtable();
874 arg
.data
= st_init_numtable();
876 arg
.compat_tbl
= st_init_numtable();
877 arg
.wrapper
= Data_Wrap_Struct(rb_cData
, mark_dump_arg
, 0, &arg
);
883 w_byte(MARSHAL_MAJOR
, &arg
);
884 w_byte(MARSHAL_MINOR
, &arg
);
886 rb_ensure(dump
, (VALUE
)&c_arg
, dump_ensure
, (VALUE
)&arg
);
898 st_table
*compat_tbl
;
899 VALUE compat_tbl_wrapper
;
903 check_load_arg(struct load_arg
*arg
)
905 if (!DATA_PTR(arg
->compat_tbl_wrapper
)) {
906 rb_raise(rb_eRuntimeError
, "Marshal.load reentered");
910 static VALUE
r_entry(VALUE v
, struct load_arg
*arg
);
911 static VALUE
r_object(struct load_arg
*arg
);
912 static VALUE
path2class(const char *path
);
915 r_byte(struct load_arg
*arg
)
919 if (TYPE(arg
->src
) == T_STRING
) {
920 if (RSTRING_LEN(arg
->src
) > arg
->offset
) {
921 c
= (unsigned char)RSTRING_PTR(arg
->src
)[arg
->offset
++];
924 rb_raise(rb_eArgError
, "marshal data too short");
928 VALUE src
= arg
->src
;
929 VALUE v
= rb_funcall2(src
, s_getbyte
, 0, 0);
931 if (NIL_P(v
)) rb_eof_error();
932 c
= (unsigned char)NUM2CHR(v
);
938 long_toobig(int size
)
940 rb_raise(rb_eTypeError
, "long too big for this architecture (size %d, given %d)",
944 #undef SIGN_EXTEND_CHAR
946 # define SIGN_EXTEND_CHAR(c) ((signed char)(c))
947 #else /* not __STDC__ */
948 /* As in Harbison and Steele. */
949 # define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
953 r_long(struct load_arg
*arg
)
956 int c
= SIGN_EXTEND_CHAR(r_byte(arg
));
959 if (c
== 0) return 0;
961 if (4 < c
&& c
< 128) {
964 if (c
> sizeof(long)) long_toobig(c
);
967 x
|= (long)r_byte(arg
) << (8*i
);
971 if (-129 < c
&& c
< -4) {
975 if (c
> sizeof(long)) long_toobig(c
);
978 x
&= ~((long)0xff << (8*i
));
979 x
|= (long)r_byte(arg
) << (8*i
);
985 #define r_bytes(arg) r_bytes0(r_long(arg), (arg))
988 r_bytes0(long len
, struct load_arg
*arg
)
992 if (len
== 0) return rb_str_new(0, 0);
993 if (TYPE(arg
->src
) == T_STRING
) {
994 if (RSTRING_LEN(arg
->src
) - arg
->offset
>= len
) {
995 str
= rb_str_new(RSTRING_PTR(arg
->src
)+arg
->offset
, len
);
1000 rb_raise(rb_eArgError
, "marshal data too short");
1004 VALUE src
= arg
->src
;
1005 VALUE n
= LONG2NUM(len
);
1006 str
= rb_funcall2(src
, s_read
, 1, &n
);
1007 check_load_arg(arg
);
1008 if (NIL_P(str
)) goto too_short
;
1010 if (RSTRING_LEN(str
) != len
) goto too_short
;
1011 if (OBJ_TAINTED(str
)) arg
->taint
= Qtrue
;
1017 r_symlink(struct load_arg
*arg
)
1020 long num
= r_long(arg
);
1022 if (st_lookup(arg
->symbols
, num
, &id
)) {
1025 rb_raise(rb_eArgError
, "bad symbol");
1029 r_symreal(struct load_arg
*arg
)
1031 volatile VALUE s
= r_bytes(arg
);
1032 ID id
= rb_intern(RSTRING_PTR(s
));
1034 st_insert(arg
->symbols
, arg
->symbols
->num_entries
, id
);
1040 r_symbol(struct load_arg
*arg
)
1044 switch ((type
= r_byte(arg
))) {
1046 return r_symreal(arg
);
1048 return r_symlink(arg
);
1050 rb_raise(rb_eArgError
, "dump format error(0x%x)", type
);
1056 r_unique(struct load_arg
*arg
)
1058 return rb_id2name(r_symbol(arg
));
1062 r_string(struct load_arg
*arg
)
1064 return r_bytes(arg
);
1068 r_entry(VALUE v
, struct load_arg
*arg
)
1070 st_data_t real_obj
= (VALUE
)Qundef
;
1071 if (st_lookup(arg
->compat_tbl
, v
, &real_obj
)) {
1072 rb_hash_aset(arg
->data
, INT2FIX(RHASH_SIZE(arg
->data
)), (VALUE
)real_obj
);
1075 rb_hash_aset(arg
->data
, INT2FIX(RHASH_SIZE(arg
->data
)), v
);
1079 if ((VALUE
)real_obj
!= Qundef
)
1080 OBJ_TAINT((VALUE
)real_obj
);
1086 r_leave(VALUE v
, struct load_arg
*arg
)
1089 if (st_lookup(arg
->compat_tbl
, v
, &data
)) {
1090 VALUE real_obj
= (VALUE
)data
;
1091 rb_alloc_func_t allocator
= rb_get_alloc_func(CLASS_OF(real_obj
));
1093 if (st_lookup(compat_allocator_tbl
, (st_data_t
)allocator
, &data
)) {
1094 marshal_compat_t
*compat
= (marshal_compat_t
*)data
;
1095 compat
->loader(real_obj
, v
);
1097 st_delete(arg
->compat_tbl
, &key
, 0);
1101 v
= rb_funcall(arg
->proc
, rb_intern("call"), 1, v
);
1102 check_load_arg(arg
);
1108 r_ivar(VALUE obj
, struct load_arg
*arg
)
1115 ID id
= r_symbol(arg
);
1116 VALUE val
= r_object(arg
);
1117 if (id
== rb_id_encoding()) {
1118 int idx
= rb_enc_find_index(StringValueCStr(val
));
1119 if (idx
> 0) rb_enc_associate_index(obj
, idx
);
1122 rb_ivar_set(obj
, id
, val
);
1129 path2class(const char *path
)
1131 VALUE v
= rb_path2class(path
);
1133 if (TYPE(v
) != T_CLASS
) {
1134 rb_raise(rb_eArgError
, "%s does not refer class", path
);
1140 path2module(const char *path
)
1142 VALUE v
= rb_path2class(path
);
1144 if (TYPE(v
) != T_MODULE
) {
1145 rb_raise(rb_eArgError
, "%s does not refer module", path
);
1151 obj_alloc_by_path(const char *path
, struct load_arg
*arg
)
1155 rb_alloc_func_t allocator
;
1157 klass
= path2class(path
);
1159 allocator
= rb_get_alloc_func(klass
);
1160 if (st_lookup(compat_allocator_tbl
, (st_data_t
)allocator
, &data
)) {
1161 marshal_compat_t
*compat
= (marshal_compat_t
*)data
;
1162 VALUE real_obj
= rb_obj_alloc(klass
);
1163 VALUE obj
= rb_obj_alloc(compat
->oldclass
);
1164 st_insert(arg
->compat_tbl
, (st_data_t
)obj
, (st_data_t
)real_obj
);
1168 return rb_obj_alloc(klass
);
1172 r_object0(struct load_arg
*arg
, int *ivp
, VALUE extmod
)
1175 int type
= r_byte(arg
);
1181 v
= rb_hash_aref(arg
->data
, LONG2FIX(id
));
1182 check_load_arg(arg
);
1184 rb_raise(rb_eArgError
, "dump format error (unlinked)");
1187 v
= rb_funcall(arg
->proc
, rb_intern("call"), 1, v
);
1188 check_load_arg(arg
);
1196 v
= r_object0(arg
, &ivar
, extmod
);
1197 if (ivar
) r_ivar(v
, arg
);
1203 VALUE m
= path2module(r_unique(arg
));
1205 if (NIL_P(extmod
)) extmod
= rb_ary_new2(0);
1206 rb_ary_push(extmod
, m
);
1208 v
= r_object0(arg
, 0, extmod
);
1209 while (RARRAY_LEN(extmod
) > 0) {
1210 m
= rb_ary_pop(extmod
);
1211 rb_extend_object(v
, m
);
1218 VALUE c
= path2class(r_unique(arg
));
1220 if (FL_TEST(c
, FL_SINGLETON
)) {
1221 rb_raise(rb_eTypeError
, "singleton can't be loaded");
1223 v
= r_object0(arg
, 0, extmod
);
1224 if (rb_special_const_p(v
) || TYPE(v
) == T_OBJECT
|| TYPE(v
) == T_CLASS
) {
1226 rb_raise(rb_eArgError
, "dump format error (user class)");
1228 if (TYPE(v
) == T_MODULE
|| !RTEST(rb_class_inherited_p(c
, RBASIC(v
)->klass
))) {
1229 VALUE tmp
= rb_obj_alloc(c
);
1231 if (TYPE(v
) != TYPE(tmp
)) goto format_error
;
1233 RBASIC(v
)->klass
= c
;
1239 v
= r_leave(v
, arg
);
1244 v
= r_leave(v
, arg
);
1249 v
= r_leave(v
, arg
);
1254 long i
= r_long(arg
);
1257 v
= r_leave(v
, arg
);
1263 VALUE str
= r_bytes(arg
);
1264 const char *ptr
= RSTRING_PTR(str
);
1266 if (strcmp(ptr
, "nan") == 0) {
1269 else if (strcmp(ptr
, "inf") == 0) {
1272 else if (strcmp(ptr
, "-inf") == 0) {
1277 d
= strtod(ptr
, &e
);
1278 d
= load_mantissa(d
, e
, RSTRING_LEN(str
) - (e
- ptr
));
1281 v
= r_entry(v
, arg
);
1282 v
= r_leave(v
, arg
);
1290 volatile VALUE data
;
1292 NEWOBJ(big
, struct RBignum
);
1293 OBJSETUP(big
, rb_cBignum
, T_BIGNUM
);
1294 RBIGNUM_SET_SIGN(big
, (r_byte(arg
) == '+'));
1296 data
= r_bytes0(len
* 2, arg
);
1297 #if SIZEOF_BDIGITS == SIZEOF_SHORT
1298 rb_big_resize((VALUE
)big
, len
);
1300 rb_big_resize((VALUE
)big
, (len
+ 1) * 2 / sizeof(BDIGIT
));
1302 digits
= RBIGNUM_DIGITS(big
);
1303 MEMCPY(digits
, RSTRING_PTR(data
), char, len
* 2);
1304 #if SIZEOF_BDIGITS > SIZEOF_SHORT
1305 MEMZERO((char *)digits
+ len
* 2, char,
1306 RBIGNUM_LEN(big
) * sizeof(BDIGIT
) - len
* 2);
1308 len
= RBIGNUM_LEN(big
);
1310 unsigned char *p
= (unsigned char *)digits
;
1312 #if SIZEOF_BDIGITS > SIZEOF_SHORT
1316 for (i
=0; i
<SIZEOF_BDIGITS
; i
++) {
1317 num
|= (int)p
[i
] << shift
;
1321 num
= p
[0] | (p
[1] << 8);
1326 v
= rb_big_norm((VALUE
)big
);
1327 v
= r_entry(v
, arg
);
1328 v
= r_leave(v
, arg
);
1333 v
= r_entry(r_string(arg
), arg
);
1334 v
= r_leave(v
, arg
);
1339 volatile VALUE str
= r_bytes(arg
);
1340 int options
= r_byte(arg
);
1341 v
= r_entry(rb_reg_new_str(str
, options
), arg
);
1342 v
= r_leave(v
, arg
);
1348 volatile long len
= r_long(arg
); /* gcc 2.7.2.3 -O2 bug?? */
1350 v
= rb_ary_new2(len
);
1351 v
= r_entry(v
, arg
);
1353 rb_ary_push(v
, r_object(arg
));
1355 v
= r_leave(v
, arg
);
1362 long len
= r_long(arg
);
1365 v
= r_entry(v
, arg
);
1367 VALUE key
= r_object(arg
);
1368 VALUE value
= r_object(arg
);
1369 rb_hash_aset(v
, key
, value
);
1371 if (type
== TYPE_HASH_DEF
) {
1372 RHASH(v
)->ifnone
= r_object(arg
);
1374 v
= r_leave(v
, arg
);
1382 volatile long i
; /* gcc 2.7.2.3 -O2 bug?? */
1386 klass
= path2class(r_unique(arg
));
1389 v
= rb_obj_alloc(klass
);
1390 if (TYPE(v
) != T_STRUCT
) {
1391 rb_raise(rb_eTypeError
, "class %s not a struct", rb_class2name(klass
));
1393 mem
= rb_struct_s_members(klass
);
1394 if (RARRAY_LEN(mem
) != len
) {
1395 rb_raise(rb_eTypeError
, "struct %s not compatible (struct size differs)",
1396 rb_class2name(klass
));
1399 v
= r_entry(v
, arg
);
1400 values
= rb_ary_new2(len
);
1401 for (i
=0; i
<len
; i
++) {
1402 slot
= r_symbol(arg
);
1404 if (RARRAY_PTR(mem
)[i
] != ID2SYM(slot
)) {
1405 rb_raise(rb_eTypeError
, "struct %s not compatible (:%s for :%s)",
1406 rb_class2name(klass
),
1408 rb_id2name(SYM2ID(RARRAY_PTR(mem
)[i
])));
1410 rb_ary_push(values
, r_object(arg
));
1412 rb_struct_initialize(v
, values
);
1413 v
= r_leave(v
, arg
);
1419 VALUE klass
= path2class(r_unique(arg
));
1422 if (!rb_obj_respond_to(klass
, s_load
, Qtrue
)) {
1423 rb_raise(rb_eTypeError
, "class %s needs to have method `_load'",
1424 rb_class2name(klass
));
1426 data
= r_string(arg
);
1431 v
= rb_funcall(klass
, s_load
, 1, data
);
1432 check_load_arg(arg
);
1433 v
= r_entry(v
, arg
);
1434 v
= r_leave(v
, arg
);
1438 case TYPE_USRMARSHAL
:
1440 VALUE klass
= path2class(r_unique(arg
));
1443 v
= rb_obj_alloc(klass
);
1444 if (!NIL_P(extmod
)) {
1445 while (RARRAY_LEN(extmod
) > 0) {
1446 VALUE m
= rb_ary_pop(extmod
);
1447 rb_extend_object(v
, m
);
1450 if (!rb_obj_respond_to(v
, s_mload
, Qtrue
)) {
1451 rb_raise(rb_eTypeError
, "instance of %s needs to have method `marshal_load'",
1452 rb_class2name(klass
));
1454 v
= r_entry(v
, arg
);
1455 data
= r_object(arg
);
1456 rb_funcall(v
, s_mload
, 1, data
);
1457 check_load_arg(arg
);
1458 v
= r_leave(v
, arg
);
1464 v
= obj_alloc_by_path(r_unique(arg
), arg
);
1465 if (TYPE(v
) != T_OBJECT
) {
1466 rb_raise(rb_eArgError
, "dump format error");
1468 v
= r_entry(v
, arg
);
1470 v
= r_leave(v
, arg
);
1476 VALUE klass
= path2class(r_unique(arg
));
1477 if (rb_obj_respond_to(klass
, s_alloc
, Qtrue
)) {
1478 static int warn
= Qtrue
;
1480 rb_warn("define `allocate' instead of `_alloc'");
1483 v
= rb_funcall(klass
, s_alloc
, 0);
1484 check_load_arg(arg
);
1487 v
= rb_obj_alloc(klass
);
1489 if (TYPE(v
) != T_DATA
) {
1490 rb_raise(rb_eArgError
, "dump format error");
1492 v
= r_entry(v
, arg
);
1493 if (!rb_obj_respond_to(v
, s_load_data
, Qtrue
)) {
1494 rb_raise(rb_eTypeError
,
1495 "class %s needs to have instance method `_load_data'",
1496 rb_class2name(klass
));
1498 rb_funcall(v
, s_load_data
, 1, r_object0(arg
, 0, extmod
));
1499 check_load_arg(arg
);
1500 v
= r_leave(v
, arg
);
1504 case TYPE_MODULE_OLD
:
1506 volatile VALUE str
= r_bytes(arg
);
1508 v
= rb_path2class(RSTRING_PTR(str
));
1509 v
= r_entry(v
, arg
);
1510 v
= r_leave(v
, arg
);
1516 volatile VALUE str
= r_bytes(arg
);
1518 v
= path2class(RSTRING_PTR(str
));
1519 v
= r_entry(v
, arg
);
1520 v
= r_leave(v
, arg
);
1526 volatile VALUE str
= r_bytes(arg
);
1528 v
= path2module(RSTRING_PTR(str
));
1529 v
= r_entry(v
, arg
);
1530 v
= r_leave(v
, arg
);
1535 v
= ID2SYM(r_symreal(arg
));
1536 v
= r_leave(v
, arg
);
1540 v
= ID2SYM(r_symlink(arg
));
1544 rb_raise(rb_eArgError
, "dump format error(0x%x)", type
);
1551 r_object(struct load_arg
*arg
)
1553 return r_object0(arg
, 0, Qnil
);
1557 load(struct load_arg
*arg
)
1559 return r_object(arg
);
1563 load_ensure(struct load_arg
*arg
)
1565 if (!DATA_PTR(arg
->compat_tbl_wrapper
)) return 0;
1566 st_free_table(arg
->symbols
);
1567 st_free_table(arg
->compat_tbl
);
1568 DATA_PTR(arg
->compat_tbl_wrapper
) = 0;
1569 arg
->compat_tbl_wrapper
= 0;
1575 * load( source [, proc] ) => obj
1576 * restore( source [, proc] ) => obj
1578 * Returns the result of converting the serialized data in source into a
1579 * Ruby object (possibly with associated subordinate objects). source
1580 * may be either an instance of IO or an object that responds to
1581 * to_str. If proc is specified, it will be passed each object as it
1585 marshal_load(int argc
, VALUE
*argv
)
1590 struct load_arg arg
;
1592 rb_scan_args(argc
, argv
, "11", &port
, &proc
);
1593 v
= rb_check_string_type(port
);
1595 arg
.taint
= OBJ_TAINTED(port
); /* original taintedness */
1598 else if (rb_obj_respond_to(port
, s_getbyte
, Qtrue
) && rb_obj_respond_to(port
, s_read
, Qtrue
)) {
1599 if (rb_obj_respond_to(port
, s_binmode
, Qtrue
)) {
1600 rb_funcall2(port
, s_binmode
, 0, 0);
1605 rb_raise(rb_eTypeError
, "instance of IO needed");
1609 arg
.compat_tbl
= st_init_numtable();
1610 arg
.compat_tbl_wrapper
= Data_Wrap_Struct(rb_cData
, rb_mark_tbl
, 0, arg
.compat_tbl
);
1612 major
= r_byte(&arg
);
1613 minor
= r_byte(&arg
);
1614 if (major
!= MARSHAL_MAJOR
|| minor
> MARSHAL_MINOR
) {
1615 rb_raise(rb_eTypeError
, "incompatible marshal file format (can't be read)\n\
1616 \tformat version %d.%d required; %d.%d given",
1617 MARSHAL_MAJOR
, MARSHAL_MINOR
, major
, minor
);
1619 if (RTEST(ruby_verbose
) && minor
!= MARSHAL_MINOR
) {
1620 rb_warn("incompatible marshal file format (can be read)\n\
1621 \tformat version %d.%d required; %d.%d given",
1622 MARSHAL_MAJOR
, MARSHAL_MINOR
, major
, minor
);
1625 arg
.symbols
= st_init_numtable();
1626 arg
.data
= rb_hash_new();
1627 RBASIC(arg
.data
)->klass
= 0;
1628 if (NIL_P(proc
)) arg
.proc
= 0;
1629 else arg
.proc
= proc
;
1630 v
= rb_ensure(load
, (VALUE
)&arg
, load_ensure
, (VALUE
)&arg
);
1636 * The marshaling library converts collections of Ruby objects into a
1637 * byte stream, allowing them to be stored outside the currently
1638 * active script. This data may subsequently be read and the original
1639 * objects reconstituted.
1640 * Marshaled data has major and minor version numbers stored along
1641 * with the object information. In normal use, marshaling can only
1642 * load data written with the same major version number and an equal
1643 * or lower minor version number. If Ruby's ``verbose'' flag is set
1644 * (normally using -d, -v, -w, or --verbose) the major and minor
1645 * numbers must match exactly. Marshal versioning is independent of
1646 * Ruby's version numbers. You can extract the version by reading the
1647 * first two bytes of marshaled data.
1649 * str = Marshal.dump("thing")
1650 * RUBY_VERSION #=> "1.9.0"
1654 * Some objects cannot be dumped: if the objects to be dumped include
1655 * bindings, procedure or method objects, instances of class IO, or
1656 * singleton objects, a TypeError will be raised.
1657 * If your class has special serialization needs (for example, if you
1658 * want to serialize in some specific format), or if it contains
1659 * objects that would otherwise not be serializable, you can implement
1660 * your own serialization strategy by defining two methods, _dump and
1662 * The instance method _dump should return a String object containing
1663 * all the information necessary to reconstitute objects of this class
1664 * and all referenced objects up to a maximum depth given as an integer
1665 * parameter (a value of -1 implies that you should disable depth checking).
1666 * The class method _load should take a String and return an object of this class.
1671 VALUE rb_mMarshal
= rb_define_module("Marshal");
1673 s_dump
= rb_intern("_dump");
1674 s_load
= rb_intern("_load");
1675 s_mdump
= rb_intern("marshal_dump");
1676 s_mload
= rb_intern("marshal_load");
1677 s_dump_data
= rb_intern("_dump_data");
1678 s_load_data
= rb_intern("_load_data");
1679 s_alloc
= rb_intern("_alloc");
1680 s_getbyte
= rb_intern("getbyte");
1681 s_read
= rb_intern("read");
1682 s_write
= rb_intern("write");
1683 s_binmode
= rb_intern("binmode");
1685 rb_define_module_function(rb_mMarshal
, "dump", marshal_dump
, -1);
1686 rb_define_module_function(rb_mMarshal
, "load", marshal_load
, -1);
1687 rb_define_module_function(rb_mMarshal
, "restore", marshal_load
, -1);
1689 rb_define_const(rb_mMarshal
, "MAJOR_VERSION", INT2FIX(MARSHAL_MAJOR
));
1690 rb_define_const(rb_mMarshal
, "MINOR_VERSION", INT2FIX(MARSHAL_MINOR
));
1692 compat_allocator_tbl
= st_init_numtable();
1693 rb_gc_register_address(&compat_allocator_tbl_wrapper
);
1694 compat_allocator_tbl_wrapper
=
1695 Data_Wrap_Struct(rb_cData
, mark_marshal_compat_t
, 0, compat_allocator_tbl
);
1699 rb_marshal_dump(VALUE obj
, VALUE port
)
1706 if (!NIL_P(port
)) argc
= 2;
1707 return marshal_dump(argc
, argv
);
1711 rb_marshal_load(VALUE port
)
1713 return marshal_load(1, &port
);