1 /**********************************************************************
6 created at: Thu Feb 10 15:17:05 JST 1994
8 Copyright (C) 1993-2007 Yukihiro Matsumoto
10 **********************************************************************/
12 #include "ruby/ruby.h"
13 #include <sys/types.h>
19 #if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4
24 # define OFF16B(p) ((char*)(p) + (natint?0:(sizeof(short) - SIZE16)))
25 # define OFF32B(p) ((char*)(p) + (natint?0:(sizeof(long) - SIZE32)))
26 # define NATINT_LEN(type,len) (natint?sizeof(type):(len))
27 # ifdef WORDS_BIGENDIAN
28 # define OFF16(p) OFF16B(p)
29 # define OFF32(p) OFF32B(p)
31 # define NATINT_HTOVS(x) (natint?htovs(x):htov16(x))
32 # define NATINT_HTOVL(x) (natint?htovl(x):htov32(x))
33 # define NATINT_HTONS(x) (natint?htons(x):hton16(x))
34 # define NATINT_HTONL(x) (natint?htonl(x):hton32(x))
36 # define NATINT_LEN(type,len) sizeof(type)
37 # define NATINT_HTOVS(x) htovs(x)
38 # define NATINT_HTOVL(x) htovl(x)
39 # define NATINT_HTONS(x) htons(x)
40 # define NATINT_HTONL(x) htonl(x)
44 # define OFF16(p) (char*)(p)
45 # define OFF32(p) (char*)(p)
48 # define OFF16B(p) (char*)(p)
49 # define OFF32B(p) (char*)(p)
52 #define define_swapx(x, xtype) \
54 TOKEN_PASTE(swap,x)(xtype z) \
58 unsigned char *s, *t; \
61 zp = xmalloc(sizeof(xtype)); \
63 s = (unsigned char*)zp; \
64 t = xmalloc(sizeof(xtype)); \
65 for (i=0; i<sizeof(xtype); i++) { \
66 t[sizeof(xtype)-i-1] = s[i]; \
75 #define swap16(x) ((((x)&0xFF)<<8) | (((x)>>8)&0xFF))
78 #define swaps(x) swap16(x)
81 #define swaps(x) ((((x)&0xFF)<<24) \
83 |(((x)&0x0000FF00)<<8) \
84 |(((x)&0x00FF0000)>>8) )
91 #define swap32(x) ((((x)&0xFF)<<24) \
93 |(((x)&0x0000FF00)<<8) \
94 |(((x)&0x00FF0000)>>8) )
97 #define swapl(x) swap32(x)
100 #define swapl(x) ((((x)&0x00000000000000FF)<<56) \
101 |(((x)&0xFF00000000000000)>>56) \
102 |(((x)&0x000000000000FF00)<<40) \
103 |(((x)&0x00FF000000000000)>>40) \
104 |(((x)&0x0000000000FF0000)<<24) \
105 |(((x)&0x0000FF0000000000)>>24) \
106 |(((x)&0x00000000FF000000)<<8) \
107 |(((x)&0x000000FF00000000)>>8))
113 #if SIZEOF_FLOAT == 4
114 #if SIZEOF_LONG == 4 /* SIZEOF_FLOAT == 4 == SIZEOF_LONG */
115 #define swapf(x) swapl(x)
116 #define FLOAT_SWAPPER unsigned long
118 #if SIZEOF_SHORT == 4 /* SIZEOF_FLOAT == 4 == SIZEOF_SHORT */
119 #define swapf(x) swaps(x)
120 #define FLOAT_SWAPPER unsigned short
121 #else /* SIZEOF_FLOAT == 4 but undivide by known size of int */
122 define_swapx(f
,float)
123 #endif /* #if SIZEOF_SHORT == 4 */
124 #endif /* #if SIZEOF_LONG == 4 */
125 #else /* SIZEOF_FLOAT != 4 */
126 define_swapx(f
,float)
127 #endif /* #if SIZEOF_FLOAT == 4 */
129 #if SIZEOF_DOUBLE == 8
130 #if SIZEOF_LONG == 8 /* SIZEOF_DOUBLE == 8 == SIZEOF_LONG */
131 #define swapd(x) swapl(x)
132 #define DOUBLE_SWAPPER unsigned long
134 #if SIZEOF_LONG == 4 /* SIZEOF_DOUBLE == 8 && 4 == SIZEOF_LONG */
136 swapd(const double d
)
139 unsigned long utmp
[2];
142 utmp
[0] = 0; utmp
[1] = 0;
143 memcpy(utmp
,&dtmp
,sizeof(double));
145 utmp
[0] = swapl(utmp
[1]);
146 utmp
[1] = swapl(utmp0
);
147 memcpy(&dtmp
,utmp
,sizeof(double));
151 #if SIZEOF_SHORT == 4 /* SIZEOF_DOUBLE == 8 && 4 == SIZEOF_SHORT */
153 swapd(const double d
)
156 unsigned short utmp
[2];
157 unsigned short utmp0
;
159 utmp
[0] = 0; utmp
[1] = 0;
160 memcpy(utmp
,&dtmp
,sizeof(double));
162 utmp
[0] = swaps(utmp
[1]);
163 utmp
[1] = swaps(utmp0
);
164 memcpy(&dtmp
,utmp
,sizeof(double));
167 #else /* SIZEOF_DOUBLE == 8 but undivied by known size of int */
168 define_swapx(d
, double)
169 #endif /* #if SIZEOF_SHORT == 4 */
170 #endif /* #if SIZEOF_LONG == 4 */
171 #endif /* #if SIZEOF_LONG == 8 */
172 #else /* SIZEOF_DOUBLE != 8 */
173 define_swapx(d
, double)
174 #endif /* #if SIZEOF_DOUBLE == 8 */
178 #ifdef DYNAMIC_ENDIAN
189 static int endian_value
;
192 if (init
) return endian_value
;
195 return endian_value
= p
[0]?0:1;
198 #define ntohs(x) (endian()?(x):swaps(x))
199 #define ntohl(x) (endian()?(x):swapl(x))
200 #define ntohf(x) (endian()?(x):swapf(x))
201 #define ntohd(x) (endian()?(x):swapd(x))
202 #define htons(x) (endian()?(x):swaps(x))
203 #define htonl(x) (endian()?(x):swapl(x))
204 #define htonf(x) (endian()?(x):swapf(x))
205 #define htond(x) (endian()?(x):swapd(x))
206 #define htovs(x) (endian()?swaps(x):(x))
207 #define htovl(x) (endian()?swapl(x):(x))
208 #define htovf(x) (endian()?swapf(x):(x))
209 #define htovd(x) (endian()?swapd(x):(x))
210 #define vtohs(x) (endian()?swaps(x):(x))
211 #define vtohl(x) (endian()?swapl(x):(x))
212 #define vtohf(x) (endian()?swapf(x):(x))
213 #define vtohd(x) (endian()?swapd(x):(x))
215 #define htov16(x) (endian()?swap16(x):(x))
216 #define htov32(x) (endian()?swap32(x):(x))
217 #define hton16(x) (endian()?(x):swap16(x))
218 #define hton32(x) (endian()?(x):swap32(x))
221 #ifdef WORDS_BIGENDIAN
232 #define htovs(x) swaps(x)
233 #define htovl(x) swapl(x)
234 #define htovf(x) swapf(x)
235 #define htovd(x) swapd(x)
236 #define vtohs(x) swaps(x)
237 #define vtohl(x) swapl(x)
238 #define vtohf(x) swapf(x)
239 #define vtohd(x) swapd(x)
241 #define htov16(x) swap16(x)
242 #define htov32(x) swap32(x)
243 #define hton16(x) (x)
244 #define hton32(x) (x)
246 #else /* LITTLE ENDIAN */
253 #define ntohs(x) swaps(x)
254 #define ntohl(x) swapl(x)
255 #define htons(x) swaps(x)
256 #define htonl(x) swapl(x)
257 #define ntohf(x) swapf(x)
258 #define ntohd(x) swapd(x)
259 #define htonf(x) swapf(x)
260 #define htond(x) swapd(x)
270 #define htov16(x) (x)
271 #define htov32(x) (x)
272 #define hton16(x) swap16(x)
273 #define hton32(x) swap32(x)
279 #define FLOAT_CONVWITH(y) FLOAT_SWAPPER y;
280 #define HTONF(x,y) (memcpy(&y,&x,sizeof(float)), \
281 y = htonf((FLOAT_SWAPPER)y), \
282 memcpy(&x,&y,sizeof(float)), \
284 #define HTOVF(x,y) (memcpy(&y,&x,sizeof(float)), \
285 y = htovf((FLOAT_SWAPPER)y), \
286 memcpy(&x,&y,sizeof(float)), \
288 #define NTOHF(x,y) (memcpy(&y,&x,sizeof(float)), \
289 y = ntohf((FLOAT_SWAPPER)y), \
290 memcpy(&x,&y,sizeof(float)), \
292 #define VTOHF(x,y) (memcpy(&y,&x,sizeof(float)), \
293 y = vtohf((FLOAT_SWAPPER)y), \
294 memcpy(&x,&y,sizeof(float)), \
297 #define FLOAT_CONVWITH(y)
298 #define HTONF(x,y) htonf(x)
299 #define HTOVF(x,y) htovf(x)
300 #define NTOHF(x,y) ntohf(x)
301 #define VTOHF(x,y) vtohf(x)
304 #ifdef DOUBLE_SWAPPER
305 #define DOUBLE_CONVWITH(y) DOUBLE_SWAPPER y;
306 #define HTOND(x,y) (memcpy(&y,&x,sizeof(double)), \
307 y = htond((DOUBLE_SWAPPER)y), \
308 memcpy(&x,&y,sizeof(double)), \
310 #define HTOVD(x,y) (memcpy(&y,&x,sizeof(double)), \
311 y = htovd((DOUBLE_SWAPPER)y), \
312 memcpy(&x,&y,sizeof(double)), \
314 #define NTOHD(x,y) (memcpy(&y,&x,sizeof(double)), \
315 y = ntohd((DOUBLE_SWAPPER)y), \
316 memcpy(&x,&y,sizeof(double)), \
318 #define VTOHD(x,y) (memcpy(&y,&x,sizeof(double)), \
319 y = vtohd((DOUBLE_SWAPPER)y), \
320 memcpy(&x,&y,sizeof(double)), \
323 #define DOUBLE_CONVWITH(y)
324 #define HTOND(x,y) htond(x)
325 #define HTOVD(x,y) htovd(x)
326 #define NTOHD(x,y) ntohd(x)
327 #define VTOHD(x,y) vtohd(x)
330 unsigned long rb_big2ulong_pack(VALUE x
);
335 x
= rb_to_int(x
); /* is nil OK? (should not) */
337 if (FIXNUM_P(x
)) return FIX2LONG(x
);
338 if (TYPE(x
) == T_BIGNUM
) {
339 return rb_big2ulong_pack(x
);
341 rb_raise(rb_eTypeError
, "can't convert %s to `integer'", rb_obj_classname(x
));
342 return 0; /* not reached */
345 #if SIZEOF_LONG == SIZE32
348 /* invariant in modulo 1<<31 */
349 # define EXTEND32(x) do { if (!natint) {(x) = (((1L<<31)-1-(x))^~(~0L<<31));}} while(0)
351 #if SIZEOF_SHORT == SIZE16
354 # define EXTEND16(x) do { if (!natint) {(x) = (short)(((1<<15)-1-(x))^~(~0<<15));}} while(0)
357 #ifdef HAVE_LONG_LONG
358 # define QUAD_SIZE sizeof(LONG_LONG)
362 static const char toofew
[] = "too few arguments";
364 static void encodes(VALUE
,const char*,long,int);
365 static void qpencode(VALUE
,VALUE
,long);
367 static unsigned long utf8_to_uv(const char*,long*);
371 * arr.pack ( aTemplateString ) -> aBinaryString
373 * Packs the contents of <i>arr</i> into a binary sequence according to
374 * the directives in <i>aTemplateString</i> (see the table below)
375 * Directives ``A,'' ``a,'' and ``Z'' may be followed by a count,
376 * which gives the width of the resulting field. The remaining
377 * directives also may take a count, indicating the number of array
378 * elements to convert. If the count is an asterisk
379 * (``<code>*</code>''), all remaining array elements will be
380 * converted. Any of the directives ``<code>sSiIlL</code>'' may be
381 * followed by an underscore (``<code>_</code>'') to use the underlying
382 * platform's native size for the specified type; otherwise, they use a
383 * platform-independent size. Spaces are ignored in the template
384 * string. See also <code>String#unpack</code>.
386 * a = [ "a", "b", "c" ]
388 * a.pack("A3A3A3") #=> "a b c "
389 * a.pack("a3a3a3") #=> "a\000\000b\000\000c\000\000"
390 * n.pack("ccc") #=> "ABC"
392 * Directives for +pack+.
395 * ---------------------------------------------------------------
396 * @ | Moves to absolute position
397 * A | arbitrary binary string (space padded, count is width)
398 * a | arbitrary binary string (null padded, count is width)
399 * B | Bit string (descending bit order)
400 * b | Bit string (ascending bit order)
401 * C | Unsigned byte (C unsigned char)
403 * D, d | Double-precision float, native format
404 * E | Double-precision float, little-endian byte order
405 * e | Single-precision float, little-endian byte order
406 * F, f | Single-precision float, native format
407 * G | Double-precision float, network (big-endian) byte order
408 * g | Single-precision float, network (big-endian) byte order
409 * H | Hex string (high nibble first)
410 * h | Hex string (low nibble first)
411 * I | Unsigned integer
415 * M | Quoted printable, MIME encoding (see RFC2045)
416 * m | Base64 encoded string
417 * N | Long, network (big-endian) byte order
418 * n | Short, network (big-endian) byte-order
419 * P | Pointer to a structure (fixed-length string)
420 * p | Pointer to a null-terminated string
421 * Q, q | 64-bit number
425 * u | UU-encoded string
426 * V | Long, little-endian byte order
427 * v | Short, little-endian byte order
428 * w | BER-compressed integer\fnm
431 * Z | Same as ``a'', except that null is added with *
435 pack_pack(VALUE ary
, VALUE fmt
)
437 static const char nul10
[] = "\0\0\0\0\0\0\0\0\0\0";
438 static const char spc10
[] = " ";
439 const char *p
, *pend
;
440 VALUE res
, from
, associates
= 0;
442 long items
, len
, idx
, plen
;
445 int natint
; /* native integer */
449 p
= RSTRING_PTR(fmt
);
450 pend
= p
+ RSTRING_LEN(fmt
);
451 res
= rb_str_buf_new(0);
453 items
= RARRAY_LEN(ary
);
456 #define TOO_FEW (rb_raise(rb_eArgError, toofew), 0)
457 #define THISFROM (items > 0 ? RARRAY_PTR(ary)[idx] : TOO_FEW)
458 #define NEXTFROM (items-- > 0 ? RARRAY_PTR(ary)[idx++] : TOO_FEW)
461 if (RSTRING_PTR(fmt
) + RSTRING_LEN(fmt
) != pend
) {
462 rb_raise(rb_eRuntimeError
, "format string modified");
464 type
= *p
++; /* get data type */
469 if (ISSPACE(type
)) continue;
471 while ((p
< pend
) && (*p
!= '\n')) {
476 if (*p
== '_' || *p
== '!') {
477 const char *natstr
= "sSiIlL";
479 if (strchr(natstr
, type
)) {
486 rb_raise(rb_eArgError
, "'%c' allowed only after types %s", *p
, natstr
);
489 if (*p
== '*') { /* set data length */
490 len
= strchr("@Xxu", type
) ? 0 : items
;
493 else if (ISDIGIT(*p
)) {
494 len
= STRTOUL(p
, (char**)&p
, 10);
501 case 'A': case 'a': case 'Z':
511 ptr
= RSTRING_PTR(from
);
512 plen
= RSTRING_LEN(from
);
513 OBJ_INFECT(res
, from
);
520 case 'a': /* arbitrary binary string (null padded) */
521 case 'A': /* arbitrary binary string (ASCII space padded) */
522 case 'Z': /* null terminated string */
524 rb_str_buf_cat(res
, ptr
, len
);
525 if (p
[-1] == '*' && type
== 'Z')
526 rb_str_buf_cat(res
, nul10
, 1);
529 rb_str_buf_cat(res
, ptr
, plen
);
532 rb_str_buf_cat(res
, (type
== 'A')?spc10
:nul10
, 10);
535 rb_str_buf_cat(res
, (type
== 'A')?spc10
:nul10
, len
);
539 case 'b': /* bit string (ascending) */
545 j
= (len
- plen
+ 1)/2;
548 for (i
=0; i
++ < len
; ptr
++) {
554 char c
= byte
& 0xff;
555 rb_str_buf_cat(res
, &c
, 1);
561 byte
>>= 7 - (len
& 7);
563 rb_str_buf_cat(res
, &c
, 1);
570 case 'B': /* bit string (descending) */
576 j
= (len
- plen
+ 1)/2;
579 for (i
=0; i
++ < len
; ptr
++) {
584 char c
= byte
& 0xff;
585 rb_str_buf_cat(res
, &c
, 1);
591 byte
<<= 7 - (len
& 7);
593 rb_str_buf_cat(res
, &c
, 1);
600 case 'h': /* hex string (low nibble first) */
606 j
= (len
- plen
+ 1)/2;
609 for (i
=0; i
++ < len
; ptr
++) {
611 byte
|= (((*ptr
& 15) + 9) & 15) << 4;
613 byte
|= (*ptr
& 15) << 4;
617 char c
= byte
& 0xff;
618 rb_str_buf_cat(res
, &c
, 1);
623 char c
= byte
& 0xff;
624 rb_str_buf_cat(res
, &c
, 1);
631 case 'H': /* hex string (high nibble first) */
637 j
= (len
- plen
+ 1)/2;
640 for (i
=0; i
++ < len
; ptr
++) {
642 byte
|= ((*ptr
& 15) + 9) & 15;
648 char c
= byte
& 0xff;
649 rb_str_buf_cat(res
, &c
, 1);
654 char c
= byte
& 0xff;
655 rb_str_buf_cat(res
, &c
, 1);
664 case 'c': /* signed char */
665 case 'C': /* unsigned char */
671 rb_str_buf_cat(res
, &c
, sizeof(char));
675 case 's': /* signed short */
676 case 'S': /* unsigned short */
682 rb_str_buf_cat(res
, OFF16(&s
), NATINT_LEN(short,2));
686 case 'i': /* signed int */
687 case 'I': /* unsigned int */
693 rb_str_buf_cat(res
, OFF32(&i
), NATINT_LEN(int,4));
697 case 'l': /* signed long */
698 case 'L': /* unsigned long */
704 rb_str_buf_cat(res
, OFF32(&l
), NATINT_LEN(long,4));
708 case 'q': /* signed quad (64bit) int */
709 case 'Q': /* unsigned quad (64bit) int */
714 rb_quad_pack(tmp
, from
);
715 rb_str_buf_cat(res
, (char*)&tmp
, QUAD_SIZE
);
719 case 'n': /* unsigned short (network byte-order) */
726 rb_str_buf_cat(res
, OFF16(&s
), NATINT_LEN(short,2));
730 case 'N': /* unsigned long (network byte-order) */
737 rb_str_buf_cat(res
, OFF32(&l
), NATINT_LEN(long,4));
741 case 'v': /* unsigned short (VAX byte-order) */
748 rb_str_buf_cat(res
, OFF16(&s
), NATINT_LEN(short,2));
752 case 'V': /* unsigned long (VAX byte-order) */
759 rb_str_buf_cat(res
, OFF32(&l
), NATINT_LEN(long,4));
763 case 'f': /* single precision float in native format */
764 case 'F': /* ditto */
769 f
= RFLOAT_VALUE(rb_Float(from
));
770 rb_str_buf_cat(res
, (char*)&f
, sizeof(float));
774 case 'e': /* single precision float in VAX byte-order */
777 FLOAT_CONVWITH(ftmp
);
780 f
= RFLOAT_VALUE(rb_Float(from
));
782 rb_str_buf_cat(res
, (char*)&f
, sizeof(float));
786 case 'E': /* double precision float in VAX byte-order */
789 DOUBLE_CONVWITH(dtmp
);
792 d
= RFLOAT_VALUE(rb_Float(from
));
794 rb_str_buf_cat(res
, (char*)&d
, sizeof(double));
798 case 'd': /* double precision float in native format */
799 case 'D': /* ditto */
804 d
= RFLOAT_VALUE(rb_Float(from
));
805 rb_str_buf_cat(res
, (char*)&d
, sizeof(double));
809 case 'g': /* single precision float in network byte-order */
812 FLOAT_CONVWITH(ftmp
);
815 f
= RFLOAT_VALUE(rb_Float(from
));
817 rb_str_buf_cat(res
, (char*)&f
, sizeof(float));
821 case 'G': /* double precision float in network byte-order */
824 DOUBLE_CONVWITH(dtmp
);
827 d
= RFLOAT_VALUE(rb_Float(from
));
829 rb_str_buf_cat(res
, (char*)&d
, sizeof(double));
833 case 'x': /* null byte */
836 rb_str_buf_cat(res
, nul10
, 10);
839 rb_str_buf_cat(res
, nul10
, len
);
842 case 'X': /* back up byte */
844 plen
= RSTRING_LEN(res
);
846 rb_raise(rb_eArgError
, "X outside of string");
847 rb_str_set_len(res
, plen
- len
);
850 case '@': /* null fill to absolute position */
851 len
-= RSTRING_LEN(res
);
852 if (len
> 0) goto grow
;
854 if (len
> 0) goto shrink
;
858 rb_raise(rb_eArgError
, "%% is not supported");
861 case 'U': /* Unicode character */
868 from
= rb_to_int(from
);
871 rb_raise(rb_eRangeError
, "pack(U): value out of range");
873 le
= rb_uv_to_utf8(buf
, l
);
874 rb_str_buf_cat(res
, (char*)buf
, le
);
878 case 'u': /* uuencoded string */
879 case 'm': /* base64 encoded string */
882 ptr
= RSTRING_PTR(from
);
883 plen
= RSTRING_LEN(from
);
896 encodes(res
, ptr
, todo
, type
);
902 case 'M': /* quoted-printable encoded string */
903 from
= rb_obj_as_string(NEXTFROM
);
906 qpencode(res
, from
, len
);
909 case 'P': /* pointer to packed byte string */
913 if (RSTRING_LEN(from
) < len
) {
914 rb_raise(rb_eArgError
, "too short buffer for P(%ld for %ld)",
915 RSTRING_LEN(from
), len
);
920 case 'p': /* pointer to string */
928 t
= StringValuePtr(from
);
931 associates
= rb_ary_new();
933 rb_ary_push(associates
, from
);
935 rb_str_buf_cat(res
, (char*)&t
, sizeof(char*));
939 case 'w': /* BER compressed integer */
942 VALUE buf
= rb_str_new(0, 0);
943 char c
, *bufs
, *bufe
;
946 if (TYPE(from
) == T_BIGNUM
) {
947 VALUE big128
= rb_uint2big(128);
948 while (TYPE(from
) == T_BIGNUM
) {
949 from
= rb_big_divmod(from
, big128
);
950 c
= NUM2INT(RARRAY_PTR(from
)[1]) | 0x80; /* mod */
951 rb_str_buf_cat(buf
, &c
, sizeof(char));
952 from
= RARRAY_PTR(from
)[0]; /* div */
957 long l
= NUM2LONG(from
);
959 rb_raise(rb_eArgError
, "can't compress negative numbers");
965 c
= ((ul
& 0x7f) | 0x80);
966 rb_str_buf_cat(buf
, &c
, sizeof(char));
970 if (RSTRING_LEN(buf
)) {
971 bufs
= RSTRING_PTR(buf
);
972 bufe
= bufs
+ RSTRING_LEN(buf
) - 1;
973 *bufs
&= 0x7f; /* clear continue bit */
974 while (bufs
< bufe
) { /* reverse */
979 rb_str_buf_cat(res
, RSTRING_PTR(buf
), RSTRING_LEN(buf
));
983 rb_str_buf_cat(res
, &c
, sizeof(char));
994 rb_str_associate(res
, associates
);
999 static const char uu_table
[] =
1000 "`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
1001 static const char b64_table
[] =
1002 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1005 encodes(VALUE str
, const char *s
, long len
, int type
)
1007 char *buff
= ALLOCA_N(char, len
* 4 / 3 + 6);
1009 const char *trans
= type
== 'u' ? uu_table
: b64_table
;
1013 buff
[i
++] = len
+ ' ';
1020 buff
[i
++] = trans
[077 & (*s
>> 2)];
1021 buff
[i
++] = trans
[077 & (((*s
<< 4) & 060) | ((s
[1] >> 4) & 017))];
1022 buff
[i
++] = trans
[077 & (((s
[1] << 2) & 074) | ((s
[2] >> 6) & 03))];
1023 buff
[i
++] = trans
[077 & s
[2]];
1028 buff
[i
++] = trans
[077 & (*s
>> 2)];
1029 buff
[i
++] = trans
[077 & (((*s
<< 4) & 060) | ((s
[1] >> 4) & 017))];
1030 buff
[i
++] = trans
[077 & (((s
[1] << 2) & 074) | (('\0' >> 6) & 03))];
1031 buff
[i
++] = padding
;
1033 else if (len
== 1) {
1034 buff
[i
++] = trans
[077 & (*s
>> 2)];
1035 buff
[i
++] = trans
[077 & (((*s
<< 4) & 060) | (('\0' >> 4) & 017))];
1036 buff
[i
++] = padding
;
1037 buff
[i
++] = padding
;
1040 rb_str_buf_cat(str
, buff
, i
);
1043 static const char hex_table
[] = "0123456789ABCDEF";
1046 qpencode(VALUE str
, VALUE from
, long len
)
1049 long i
= 0, n
= 0, prev
= EOF
;
1050 unsigned char *s
= (unsigned char*)RSTRING_PTR(from
);
1051 unsigned char *send
= s
+ RSTRING_LEN(from
);
1055 (*s
< 32 && *s
!= '\n' && *s
!= '\t') ||
1058 buff
[i
++] = hex_table
[*s
>> 4];
1059 buff
[i
++] = hex_table
[*s
& 0x0f];
1063 else if (*s
== '\n') {
1064 if (prev
== ' ' || prev
== '\t') {
1084 rb_str_buf_cat(str
, buff
, i
);
1094 rb_str_buf_cat(str
, buff
, i
);
1102 case '0': case '1': case '2': case '3': case '4':
1103 case '5': case '6': case '7': case '8': case '9':
1105 case 'a': case 'b': case 'c':
1106 case 'd': case 'e': case 'f':
1107 return c
- 'a' + 10;
1108 case 'A': case 'B': case 'C':
1109 case 'D': case 'E': case 'F':
1110 return c
- 'A' + 10;
1116 #define PACK_LENGTH_ADJUST_SIZE(sz) do { \
1118 if (len > (send-s)/sz) { \
1120 tmp = len-(send-s)/sz; \
1122 len = (send-s)/sz; \
1127 #define PACK_LENGTH_ADJUST(type,sz) do { \
1128 int t__len = NATINT_LEN(type,(sz)); \
1129 PACK_LENGTH_ADJUST_SIZE(t__len); \
1132 #define PACK_LENGTH_ADJUST(type,sz) \
1133 PACK_LENGTH_ADJUST_SIZE(sizeof(type))
1136 #define PACK_ITEM_ADJUST() while (tmp--) rb_ary_push(ary, Qnil)
1139 infected_str_new(const char *ptr
, long len
, VALUE str
)
1141 VALUE s
= rb_str_new(ptr
, len
);
1149 * str.unpack(format) => anArray
1151 * Decodes <i>str</i> (which may contain binary data) according to the
1152 * format string, returning an array of each value extracted. The
1153 * format string consists of a sequence of single-character directives,
1154 * summarized in the table at the end of this entry.
1155 * Each directive may be followed
1156 * by a number, indicating the number of times to repeat with this
1157 * directive. An asterisk (``<code>*</code>'') will use up all
1158 * remaining elements. The directives <code>sSiIlL</code> may each be
1159 * followed by an underscore (``<code>_</code>'') to use the underlying
1160 * platform's native size for the specified type; otherwise, it uses a
1161 * platform-independent consistent size. Spaces are ignored in the
1162 * format string. See also <code>Array#pack</code>.
1164 * "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "]
1165 * "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"]
1166 * "abc \0abc \0".unpack('Z*Z*') #=> ["abc ", "abc "]
1167 * "aa".unpack('b8B8') #=> ["10000110", "01100001"]
1168 * "aaa".unpack('h2H2c') #=> ["16", "61", 97]
1169 * "\xfe\xff\xfe\xff".unpack('sS') #=> [-2, 65534]
1170 * "now=20is".unpack('M*') #=> ["now is"]
1171 * "whole".unpack('xax2aX2aX1aX2a') #=> ["h", "e", "l", "l", "o"]
1173 * This table summarizes the various formats and the Ruby classes
1176 * Format | Returns | Function
1177 * -------+---------+-----------------------------------------
1178 * A | String | arbitrary binary string with trailing
1179 * | | nulls and ASCII spaces removed
1180 * -------+---------+-----------------------------------------
1181 * a | String | arbitrary binary string
1182 * -------+---------+-----------------------------------------
1183 * B | String | extract bits from each character (msb first)
1184 * -------+---------+-----------------------------------------
1185 * b | String | extract bits from each character (lsb first)
1186 * -------+---------+-----------------------------------------
1187 * C | Fixnum | extract a byte (C char) as an unsigned integer
1188 * -------+---------+-----------------------------------------
1189 * c | Fixnum | extract a byte (C char) as an integer
1190 * -------+---------+-----------------------------------------
1191 * d,D | Float | treat sizeof(double) characters as
1192 * | | a native double
1193 * -------+---------+-----------------------------------------
1194 * E | Float | treat sizeof(double) characters as
1195 * | | a double in little-endian byte order
1196 * -------+---------+-----------------------------------------
1197 * e | Float | treat sizeof(float) characters as
1198 * | | a float in little-endian byte order
1199 * -------+---------+-----------------------------------------
1200 * f,F | Float | treat sizeof(float) characters as
1201 * | | a native float
1202 * -------+---------+-----------------------------------------
1203 * G | Float | treat sizeof(double) characters as
1204 * | | a double in network byte order
1205 * -------+---------+-----------------------------------------
1206 * g | Float | treat sizeof(float) characters as a
1207 * | | float in network byte order
1208 * -------+---------+-----------------------------------------
1209 * H | String | extract hex nibbles from each character
1210 * | | (most significant first)
1211 * -------+---------+-----------------------------------------
1212 * h | String | extract hex nibbles from each character
1213 * | | (least significant first)
1214 * -------+---------+-----------------------------------------
1215 * I | Integer | treat sizeof(int) (modified by _)
1216 * | | successive characters as an unsigned
1217 * | | native integer
1218 * -------+---------+-----------------------------------------
1219 * i | Integer | treat sizeof(int) (modified by _)
1220 * | | successive characters as a signed
1221 * | | native integer
1222 * -------+---------+-----------------------------------------
1223 * L | Integer | treat four (modified by _) successive
1224 * | | characters as an unsigned native
1226 * -------+---------+-----------------------------------------
1227 * l | Integer | treat four (modified by _) successive
1228 * | | characters as a signed native
1230 * -------+---------+-----------------------------------------
1231 * M | String | quoted-printable
1232 * -------+---------+-----------------------------------------
1233 * m | String | base64-encoded
1234 * -------+---------+-----------------------------------------
1235 * N | Integer | treat four characters as an unsigned
1236 * | | long in network byte order
1237 * -------+---------+-----------------------------------------
1238 * n | Fixnum | treat two characters as an unsigned
1239 * | | short in network byte order
1240 * -------+---------+-----------------------------------------
1241 * P | String | treat sizeof(char *) characters as a
1242 * | | pointer, and return \emph{len} characters
1243 * | | from the referenced location
1244 * -------+---------+-----------------------------------------
1245 * p | String | treat sizeof(char *) characters as a
1246 * | | pointer to a null-terminated string
1247 * -------+---------+-----------------------------------------
1248 * Q | Integer | treat 8 characters as an unsigned
1249 * | | quad word (64 bits)
1250 * -------+---------+-----------------------------------------
1251 * q | Integer | treat 8 characters as a signed
1252 * | | quad word (64 bits)
1253 * -------+---------+-----------------------------------------
1254 * S | Fixnum | treat two (different if _ used)
1255 * | | successive characters as an unsigned
1256 * | | short in native byte order
1257 * -------+---------+-----------------------------------------
1258 * s | Fixnum | Treat two (different if _ used)
1259 * | | successive characters as a signed short
1260 * | | in native byte order
1261 * -------+---------+-----------------------------------------
1262 * U | Integer | UTF-8 characters as unsigned integers
1263 * -------+---------+-----------------------------------------
1264 * u | String | UU-encoded
1265 * -------+---------+-----------------------------------------
1266 * V | Fixnum | treat four characters as an unsigned
1267 * | | long in little-endian byte order
1268 * -------+---------+-----------------------------------------
1269 * v | Fixnum | treat two characters as an unsigned
1270 * | | short in little-endian byte order
1271 * -------+---------+-----------------------------------------
1272 * w | Integer | BER-compressed integer (see Array.pack)
1273 * -------+---------+-----------------------------------------
1274 * X | --- | skip backward one character
1275 * -------+---------+-----------------------------------------
1276 * x | --- | skip forward one character
1277 * -------+---------+-----------------------------------------
1278 * Z | String | with trailing nulls removed
1279 * | | upto first null with *
1280 * -------+---------+-----------------------------------------
1281 * @ | --- | skip to the offset given by the
1282 * | | length argument
1283 * -------+---------+-----------------------------------------
1287 pack_unpack(VALUE str
, VALUE fmt
)
1289 static const char *hexdigits
= "0123456789abcdef0123456789ABCDEFx";
1297 int natint
; /* native integer */
1299 int block_p
= rb_block_given_p();
1300 #define UNPACK_PUSH(item) do {\
1301 VALUE item_val = (item);\
1303 rb_yield(item_val);\
1306 rb_ary_push(ary, item_val);\
1312 s
= RSTRING_PTR(str
);
1313 send
= s
+ RSTRING_LEN(str
);
1314 p
= RSTRING_PTR(fmt
);
1315 pend
= p
+ RSTRING_LEN(fmt
);
1317 ary
= block_p
? Qnil
: rb_ary_new();
1324 if (ISSPACE(type
)) continue;
1326 while ((p
< pend
) && (*p
!= '\n')) {
1332 if (*p
== '_' || *p
== '!') {
1333 const char *natstr
= "sSiIlL";
1335 if (strchr(natstr
, type
)) {
1342 rb_raise(rb_eArgError
, "'%c' allowed only after types %s", *p
, natstr
);
1347 else if (*p
== '*') {
1352 else if (ISDIGIT(*p
)) {
1353 len
= STRTOUL(p
, (char**)&p
, 10);
1356 len
= (type
!= '@');
1361 rb_raise(rb_eArgError
, "%% is not supported");
1365 if (len
> send
- s
) len
= send
- s
;
1368 char *t
= s
+ len
- 1;
1371 if (*t
!= ' ' && *t
!= '\0') break;
1374 UNPACK_PUSH(infected_str_new(s
, len
, str
));
1383 if (len
> send
-s
) len
= send
-s
;
1384 while (t
< s
+len
&& *t
) t
++;
1385 UNPACK_PUSH(infected_str_new(s
, t
-s
, str
));
1387 s
= star
? t
: s
+len
;
1392 if (len
> send
- s
) len
= send
- s
;
1393 UNPACK_PUSH(infected_str_new(s
, len
, str
));
1404 if (p
[-1] == '*' || len
> (send
- s
) * 8)
1405 len
= (send
- s
) * 8;
1407 UNPACK_PUSH(bitstr
= rb_str_new(0, len
));
1408 t
= RSTRING_PTR(bitstr
);
1409 for (i
=0; i
<len
; i
++) {
1410 if (i
& 7) bits
>>= 1;
1412 *t
++ = (bits
& 1) ? '1' : '0';
1424 if (p
[-1] == '*' || len
> (send
- s
) * 8)
1425 len
= (send
- s
) * 8;
1427 UNPACK_PUSH(bitstr
= rb_str_new(0, len
));
1428 t
= RSTRING_PTR(bitstr
);
1429 for (i
=0; i
<len
; i
++) {
1430 if (i
& 7) bits
<<= 1;
1432 *t
++ = (bits
& 128) ? '1' : '0';
1444 if (p
[-1] == '*' || len
> (send
- s
) * 2)
1445 len
= (send
- s
) * 2;
1447 UNPACK_PUSH(bitstr
= rb_str_new(0, len
));
1448 t
= RSTRING_PTR(bitstr
);
1449 for (i
=0; i
<len
; i
++) {
1454 *t
++ = hexdigits
[bits
& 15];
1466 if (p
[-1] == '*' || len
> (send
- s
) * 2)
1467 len
= (send
- s
) * 2;
1469 UNPACK_PUSH(bitstr
= rb_str_new(0, len
));
1470 t
= RSTRING_PTR(bitstr
);
1471 for (i
=0; i
<len
; i
++) {
1476 *t
++ = hexdigits
[(bits
>> 4) & 15];
1482 PACK_LENGTH_ADJUST(char,sizeof(char));
1485 if (c
> (char)127) c
-=256;
1486 UNPACK_PUSH(INT2FIX(c
));
1492 PACK_LENGTH_ADJUST(unsigned char,sizeof(unsigned char));
1494 unsigned char c
= *s
++;
1495 UNPACK_PUSH(INT2FIX(c
));
1501 PACK_LENGTH_ADJUST(short,2);
1504 memcpy(OFF16(&tmp
), s
, NATINT_LEN(short,2));
1506 s
+= NATINT_LEN(short,2);
1507 UNPACK_PUSH(INT2FIX(tmp
));
1513 PACK_LENGTH_ADJUST(unsigned short,2);
1515 unsigned short tmp
= 0;
1516 memcpy(OFF16(&tmp
), s
, NATINT_LEN(unsigned short,2));
1517 s
+= NATINT_LEN(unsigned short,2);
1518 UNPACK_PUSH(INT2FIX(tmp
));
1524 PACK_LENGTH_ADJUST(int,sizeof(int));
1527 memcpy(&tmp
, s
, sizeof(int));
1529 UNPACK_PUSH(INT2NUM(tmp
));
1535 PACK_LENGTH_ADJUST(unsigned int,sizeof(unsigned int));
1538 memcpy(&tmp
, s
, sizeof(unsigned int));
1539 s
+= sizeof(unsigned int);
1540 UNPACK_PUSH(UINT2NUM(tmp
));
1546 PACK_LENGTH_ADJUST(long,4);
1549 memcpy(OFF32(&tmp
), s
, NATINT_LEN(long,4));
1551 s
+= NATINT_LEN(long,4);
1552 UNPACK_PUSH(LONG2NUM(tmp
));
1557 PACK_LENGTH_ADJUST(unsigned long,4);
1559 unsigned long tmp
= 0;
1560 memcpy(OFF32(&tmp
), s
, NATINT_LEN(unsigned long,4));
1561 s
+= NATINT_LEN(unsigned long,4);
1562 UNPACK_PUSH(ULONG2NUM(tmp
));
1568 PACK_LENGTH_ADJUST_SIZE(QUAD_SIZE
);
1570 char *tmp
= (char*)s
;
1572 UNPACK_PUSH(rb_quad_unpack(tmp
, 1));
1577 PACK_LENGTH_ADJUST_SIZE(QUAD_SIZE
);
1579 char *tmp
= (char*)s
;
1581 UNPACK_PUSH(rb_quad_unpack(tmp
, 0));
1586 PACK_LENGTH_ADJUST(unsigned short,2);
1588 unsigned short tmp
= 0;
1589 memcpy(OFF16B(&tmp
), s
, NATINT_LEN(unsigned short,2));
1590 s
+= NATINT_LEN(unsigned short,2);
1591 UNPACK_PUSH(UINT2NUM(ntohs(tmp
)));
1597 PACK_LENGTH_ADJUST(unsigned long,4);
1599 unsigned long tmp
= 0;
1600 memcpy(OFF32B(&tmp
), s
, NATINT_LEN(unsigned long,4));
1601 s
+= NATINT_LEN(unsigned long,4);
1602 UNPACK_PUSH(ULONG2NUM(ntohl(tmp
)));
1608 PACK_LENGTH_ADJUST(unsigned short,2);
1610 unsigned short tmp
= 0;
1611 memcpy(OFF16(&tmp
), s
, NATINT_LEN(unsigned short,2));
1612 s
+= NATINT_LEN(unsigned short,2);
1613 UNPACK_PUSH(UINT2NUM(vtohs(tmp
)));
1619 PACK_LENGTH_ADJUST(unsigned long,4);
1621 unsigned long tmp
= 0;
1622 memcpy(OFF32(&tmp
), s
, NATINT_LEN(long,4));
1623 s
+= NATINT_LEN(long,4);
1624 UNPACK_PUSH(ULONG2NUM(vtohl(tmp
)));
1631 PACK_LENGTH_ADJUST(float,sizeof(float));
1634 memcpy(&tmp
, s
, sizeof(float));
1636 UNPACK_PUSH(DOUBLE2NUM((double)tmp
));
1642 PACK_LENGTH_ADJUST(float,sizeof(float));
1645 FLOAT_CONVWITH(ftmp
);
1647 memcpy(&tmp
, s
, sizeof(float));
1649 tmp
= VTOHF(tmp
,ftmp
);
1650 UNPACK_PUSH(DOUBLE2NUM((double)tmp
));
1656 PACK_LENGTH_ADJUST(double,sizeof(double));
1659 DOUBLE_CONVWITH(dtmp
);
1661 memcpy(&tmp
, s
, sizeof(double));
1662 s
+= sizeof(double);
1663 tmp
= VTOHD(tmp
,dtmp
);
1664 UNPACK_PUSH(DOUBLE2NUM(tmp
));
1671 PACK_LENGTH_ADJUST(double,sizeof(double));
1674 memcpy(&tmp
, s
, sizeof(double));
1675 s
+= sizeof(double);
1676 UNPACK_PUSH(DOUBLE2NUM(tmp
));
1682 PACK_LENGTH_ADJUST(float,sizeof(float));
1685 FLOAT_CONVWITH(ftmp
;)
1687 memcpy(&tmp
, s
, sizeof(float));
1689 tmp
= NTOHF(tmp
,ftmp
);
1690 UNPACK_PUSH(DOUBLE2NUM((double)tmp
));
1696 PACK_LENGTH_ADJUST(double,sizeof(double));
1699 DOUBLE_CONVWITH(dtmp
);
1701 memcpy(&tmp
, s
, sizeof(double));
1702 s
+= sizeof(double);
1703 tmp
= NTOHD(tmp
,dtmp
);
1704 UNPACK_PUSH(DOUBLE2NUM(tmp
));
1710 if (len
> send
- s
) len
= send
- s
;
1711 while (len
> 0 && s
< send
) {
1712 long alen
= send
- s
;
1715 l
= utf8_to_uv(s
, &alen
);
1717 UNPACK_PUSH(ULONG2NUM(l
));
1723 VALUE buf
= infected_str_new(0, (send
- s
)*3/4, str
);
1724 char *ptr
= RSTRING_PTR(buf
);
1727 while (s
< send
&& *s
> ' ' && *s
< 'a') {
1732 len
= (*s
++ - ' ') & 077;
1734 if (total
> RSTRING_LEN(buf
)) {
1735 len
-= total
- RSTRING_LEN(buf
);
1736 total
= RSTRING_LEN(buf
);
1740 long mlen
= len
> 3 ? 3 : len
;
1742 if (s
< send
&& *s
>= ' ')
1743 a
= (*s
++ - ' ') & 077;
1746 if (s
< send
&& *s
>= ' ')
1747 b
= (*s
++ - ' ') & 077;
1750 if (s
< send
&& *s
>= ' ')
1751 c
= (*s
++ - ' ') & 077;
1754 if (s
< send
&& *s
>= ' ')
1755 d
= (*s
++ - ' ') & 077;
1758 hunk
[0] = a
<< 2 | b
>> 4;
1759 hunk
[1] = b
<< 4 | c
>> 2;
1760 hunk
[2] = c
<< 6 | d
;
1761 memcpy(ptr
, hunk
, mlen
);
1765 if (*s
== '\r') s
++;
1766 if (*s
== '\n') s
++;
1767 else if (s
< send
&& (s
+1 == send
|| s
[1] == '\n'))
1768 s
+= 2; /* possible checksum byte */
1771 rb_str_set_len(buf
, total
);
1778 VALUE buf
= infected_str_new(0, (send
- s
)*3/4, str
);
1779 char *ptr
= RSTRING_PTR(buf
);
1780 int a
= -1,b
= -1,c
= 0,d
;
1781 static int first
= 1;
1782 static int b64_xtable
[256];
1788 for (i
= 0; i
< 256; i
++) {
1791 for (i
= 0; i
< 64; i
++) {
1792 b64_xtable
[(int)b64_table
[i
]] = i
;
1797 while((a
= b64_xtable
[(int)(*(unsigned char*)s
)]) == -1 && s
< send
) { s
++; }
1798 if( s
>= send
) break;
1800 while((b
= b64_xtable
[(int)(*(unsigned char*)s
)]) == -1 && s
< send
) { s
++; }
1801 if( s
>= send
) break;
1803 while((c
= b64_xtable
[(int)(*(unsigned char*)s
)]) == -1 && s
< send
) { if( *s
== '=' ) break; s
++; }
1804 if( *s
== '=' || s
>= send
) break;
1806 while((d
= b64_xtable
[(int)(*(unsigned char*)s
)]) == -1 && s
< send
) { if( *s
== '=' ) break; s
++; }
1807 if( *s
== '=' || s
>= send
) break;
1809 *ptr
++ = a
<< 2 | b
>> 4;
1810 *ptr
++ = b
<< 4 | c
>> 2;
1811 *ptr
++ = c
<< 6 | d
;
1813 if (a
!= -1 && b
!= -1) {
1814 if (c
== -1 && *s
== '=')
1815 *ptr
++ = a
<< 2 | b
>> 4;
1816 else if (c
!= -1 && *s
== '=') {
1817 *ptr
++ = a
<< 2 | b
>> 4;
1818 *ptr
++ = b
<< 4 | c
>> 2;
1821 rb_str_set_len(buf
, ptr
- RSTRING_PTR(buf
));
1828 VALUE buf
= infected_str_new(0, send
- s
, str
);
1829 char *ptr
= RSTRING_PTR(buf
);
1834 if (++s
== send
) break;
1835 if (s
+1 < send
&& *s
== '\r' && *(s
+1) == '\n')
1838 if ((c1
= hex2num(*s
)) == -1) break;
1839 if (++s
== send
) break;
1840 if ((c2
= hex2num(*s
)) == -1) break;
1841 *ptr
++ = c1
<< 4 | c2
;
1849 rb_str_set_len(buf
, ptr
- RSTRING_PTR(buf
));
1855 if (len
> RSTRING_LEN(str
))
1856 rb_raise(rb_eArgError
, "@ outside of string");
1857 s
= RSTRING_PTR(str
) + len
;
1861 if (len
> s
- RSTRING_PTR(str
))
1862 rb_raise(rb_eArgError
, "X outside of string");
1868 rb_raise(rb_eArgError
, "x outside of string");
1873 if (sizeof(char *) <= send
- s
) {
1877 memcpy(&t
, s
, sizeof(char *));
1878 s
+= sizeof(char *);
1883 if (!(a
= rb_str_associated(str
))) {
1884 rb_raise(rb_eArgError
, "no associated pointer");
1887 pend
= p
+ RARRAY_LEN(a
);
1889 if (TYPE(*p
) == T_STRING
&& RSTRING_PTR(*p
) == t
) {
1890 if (len
< RSTRING_LEN(*p
)) {
1891 tmp
= rb_tainted_str_new(t
, len
);
1892 rb_str_associate(tmp
, a
);
1902 rb_raise(rb_eArgError
, "non associated pointer");
1910 if (len
> (send
- s
) / sizeof(char *))
1911 len
= (send
- s
) / sizeof(char *);
1913 if (send
- s
< sizeof(char *))
1919 memcpy(&t
, s
, sizeof(char *));
1920 s
+= sizeof(char *);
1925 if (!(a
= rb_str_associated(str
))) {
1926 rb_raise(rb_eArgError
, "no associated pointer");
1929 pend
= p
+ RARRAY_LEN(a
);
1931 if (TYPE(*p
) == T_STRING
&& RSTRING_PTR(*p
) == t
) {
1938 rb_raise(rb_eArgError
, "non associated pointer");
1948 unsigned long ul
= 0;
1949 unsigned long ulmask
= 0xfeUL
<< ((sizeof(unsigned long) - 1) * 8);
1951 while (len
> 0 && s
< send
) {
1954 if (!(*s
++ & 0x80)) {
1955 UNPACK_PUSH(ULONG2NUM(ul
));
1959 else if (ul
& ulmask
) {
1960 VALUE big
= rb_uint2big(ul
);
1961 VALUE big128
= rb_uint2big(128);
1963 big
= rb_big_mul(big
, big128
);
1964 big
= rb_big_plus(big
, rb_uint2big(*s
& 0x7f));
1965 if (!(*s
++ & 0x80)) {
1988 rb_uv_to_utf8(char buf
[6], unsigned long uv
)
1995 buf
[0] = ((uv
>>6)&0xff)|0xc0;
1996 buf
[1] = (uv
&0x3f)|0x80;
2000 buf
[0] = ((uv
>>12)&0xff)|0xe0;
2001 buf
[1] = ((uv
>>6)&0x3f)|0x80;
2002 buf
[2] = (uv
&0x3f)|0x80;
2005 if (uv
<= 0x1fffff) {
2006 buf
[0] = ((uv
>>18)&0xff)|0xf0;
2007 buf
[1] = ((uv
>>12)&0x3f)|0x80;
2008 buf
[2] = ((uv
>>6)&0x3f)|0x80;
2009 buf
[3] = (uv
&0x3f)|0x80;
2012 if (uv
<= 0x3ffffff) {
2013 buf
[0] = ((uv
>>24)&0xff)|0xf8;
2014 buf
[1] = ((uv
>>18)&0x3f)|0x80;
2015 buf
[2] = ((uv
>>12)&0x3f)|0x80;
2016 buf
[3] = ((uv
>>6)&0x3f)|0x80;
2017 buf
[4] = (uv
&0x3f)|0x80;
2020 if (uv
<= 0x7fffffff) {
2021 buf
[0] = ((uv
>>30)&0xff)|0xfc;
2022 buf
[1] = ((uv
>>24)&0x3f)|0x80;
2023 buf
[2] = ((uv
>>18)&0x3f)|0x80;
2024 buf
[3] = ((uv
>>12)&0x3f)|0x80;
2025 buf
[4] = ((uv
>>6)&0x3f)|0x80;
2026 buf
[5] = (uv
&0x3f)|0x80;
2029 rb_raise(rb_eRangeError
, "pack(U): value out of range");
2032 static const unsigned long utf8_limits
[] = {
2042 static unsigned long
2043 utf8_to_uv(const char *p
, long *lenp
)
2045 int c
= *p
++ & 0xff;
2046 unsigned long uv
= c
;
2055 rb_raise(rb_eArgError
, "malformed UTF-8 character");
2058 if (!(uv
& 0x20)) { n
= 2; uv
&= 0x1f; }
2059 else if (!(uv
& 0x10)) { n
= 3; uv
&= 0x0f; }
2060 else if (!(uv
& 0x08)) { n
= 4; uv
&= 0x07; }
2061 else if (!(uv
& 0x04)) { n
= 5; uv
&= 0x03; }
2062 else if (!(uv
& 0x02)) { n
= 6; uv
&= 0x01; }
2065 rb_raise(rb_eArgError
, "malformed UTF-8 character");
2068 rb_raise(rb_eArgError
, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)",
2075 if ((c
& 0xc0) != 0x80) {
2077 rb_raise(rb_eArgError
, "malformed UTF-8 character");
2086 if (uv
< utf8_limits
[n
]) {
2087 rb_raise(rb_eArgError
, "redundant UTF-8 sequence");
2095 rb_define_method(rb_cArray
, "pack", pack_pack
, 1);
2096 rb_define_method(rb_cString
, "unpack", pack_unpack
, 1);