1 /**********************************************************************
6 created at: Thu Feb 10 15:17:05 JST 1994
8 Copyright (C) 1993-2007 Yukihiro Matsumoto
10 **********************************************************************/
12 #include "ruby/ruby.h"
13 #include <sys/types.h>
20 #if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4
25 # define OFF16B(p) ((char*)(p) + (natint?0:(sizeof(short) - SIZE16)))
26 # define OFF32B(p) ((char*)(p) + (natint?0:(sizeof(long) - SIZE32)))
27 # define NATINT_LEN(type,len) (natint?sizeof(type):(len))
28 # ifdef WORDS_BIGENDIAN
29 # define OFF16(p) OFF16B(p)
30 # define OFF32(p) OFF32B(p)
32 # define NATINT_HTOVS(x) (natint?htovs(x):htov16(x))
33 # define NATINT_HTOVL(x) (natint?htovl(x):htov32(x))
34 # define NATINT_HTONS(x) (natint?htons(x):hton16(x))
35 # define NATINT_HTONL(x) (natint?htonl(x):hton32(x))
37 # define NATINT_LEN(type,len) sizeof(type)
38 # define NATINT_HTOVS(x) htovs(x)
39 # define NATINT_HTOVL(x) htovl(x)
40 # define NATINT_HTONS(x) htons(x)
41 # define NATINT_HTONL(x) htonl(x)
45 # define OFF16(p) (char*)(p)
46 # define OFF32(p) (char*)(p)
49 # define OFF16B(p) (char*)(p)
50 # define OFF32B(p) (char*)(p)
53 #define define_swapx(x, xtype) \
55 TOKEN_PASTE(swap,x)(xtype z) \
59 unsigned char *s, *t; \
62 zp = xmalloc(sizeof(xtype)); \
64 s = (unsigned char*)zp; \
65 t = xmalloc(sizeof(xtype)); \
66 for (i=0; i<sizeof(xtype); i++) { \
67 t[sizeof(xtype)-i-1] = s[i]; \
76 #define swap16(x) ((((x)&0xFF)<<8) | (((x)>>8)&0xFF))
79 #define swaps(x) swap16(x)
82 #define swaps(x) ((((x)&0xFF)<<24) \
84 |(((x)&0x0000FF00)<<8) \
85 |(((x)&0x00FF0000)>>8) )
92 #define swap32(x) ((((x)&0xFF)<<24) \
94 |(((x)&0x0000FF00)<<8) \
95 |(((x)&0x00FF0000)>>8) )
98 #define swapl(x) swap32(x)
101 #define swapl(x) ((((x)&0x00000000000000FF)<<56) \
102 |(((x)&0xFF00000000000000)>>56) \
103 |(((x)&0x000000000000FF00)<<40) \
104 |(((x)&0x00FF000000000000)>>40) \
105 |(((x)&0x0000000000FF0000)<<24) \
106 |(((x)&0x0000FF0000000000)>>24) \
107 |(((x)&0x00000000FF000000)<<8) \
108 |(((x)&0x000000FF00000000)>>8))
114 #if SIZEOF_FLOAT == 4
115 #if SIZEOF_LONG == 4 /* SIZEOF_FLOAT == 4 == SIZEOF_LONG */
116 #define swapf(x) swapl(x)
117 #define FLOAT_SWAPPER unsigned long
119 #if SIZEOF_SHORT == 4 /* SIZEOF_FLOAT == 4 == SIZEOF_SHORT */
120 #define swapf(x) swaps(x)
121 #define FLOAT_SWAPPER unsigned short
122 #else /* SIZEOF_FLOAT == 4 but undivide by known size of int */
123 define_swapx(f
,float)
124 #endif /* #if SIZEOF_SHORT == 4 */
125 #endif /* #if SIZEOF_LONG == 4 */
126 #else /* SIZEOF_FLOAT != 4 */
127 define_swapx(f
,float)
128 #endif /* #if SIZEOF_FLOAT == 4 */
130 #if SIZEOF_DOUBLE == 8
131 #if SIZEOF_LONG == 8 /* SIZEOF_DOUBLE == 8 == SIZEOF_LONG */
132 #define swapd(x) swapl(x)
133 #define DOUBLE_SWAPPER unsigned long
135 #if SIZEOF_LONG == 4 /* SIZEOF_DOUBLE == 8 && 4 == SIZEOF_LONG */
137 swapd(const double d
)
140 unsigned long utmp
[2];
143 utmp
[0] = 0; utmp
[1] = 0;
144 memcpy(utmp
,&dtmp
,sizeof(double));
146 utmp
[0] = swapl(utmp
[1]);
147 utmp
[1] = swapl(utmp0
);
148 memcpy(&dtmp
,utmp
,sizeof(double));
152 #if SIZEOF_SHORT == 4 /* SIZEOF_DOUBLE == 8 && 4 == SIZEOF_SHORT */
154 swapd(const double d
)
157 unsigned short utmp
[2];
158 unsigned short utmp0
;
160 utmp
[0] = 0; utmp
[1] = 0;
161 memcpy(utmp
,&dtmp
,sizeof(double));
163 utmp
[0] = swaps(utmp
[1]);
164 utmp
[1] = swaps(utmp0
);
165 memcpy(&dtmp
,utmp
,sizeof(double));
168 #else /* SIZEOF_DOUBLE == 8 but undivied by known size of int */
169 define_swapx(d
, double)
170 #endif /* #if SIZEOF_SHORT == 4 */
171 #endif /* #if SIZEOF_LONG == 4 */
172 #endif /* #if SIZEOF_LONG == 8 */
173 #else /* SIZEOF_DOUBLE != 8 */
174 define_swapx(d
, double)
175 #endif /* #if SIZEOF_DOUBLE == 8 */
179 #ifdef DYNAMIC_ENDIAN
190 static int endian_value
;
193 if (init
) return endian_value
;
196 return endian_value
= p
[0]?0:1;
199 #define ntohs(x) (endian()?(x):swaps(x))
200 #define ntohl(x) (endian()?(x):swapl(x))
201 #define ntohf(x) (endian()?(x):swapf(x))
202 #define ntohd(x) (endian()?(x):swapd(x))
203 #define htons(x) (endian()?(x):swaps(x))
204 #define htonl(x) (endian()?(x):swapl(x))
205 #define htonf(x) (endian()?(x):swapf(x))
206 #define htond(x) (endian()?(x):swapd(x))
207 #define htovs(x) (endian()?swaps(x):(x))
208 #define htovl(x) (endian()?swapl(x):(x))
209 #define htovf(x) (endian()?swapf(x):(x))
210 #define htovd(x) (endian()?swapd(x):(x))
211 #define vtohs(x) (endian()?swaps(x):(x))
212 #define vtohl(x) (endian()?swapl(x):(x))
213 #define vtohf(x) (endian()?swapf(x):(x))
214 #define vtohd(x) (endian()?swapd(x):(x))
216 #define htov16(x) (endian()?swap16(x):(x))
217 #define htov32(x) (endian()?swap32(x):(x))
218 #define hton16(x) (endian()?(x):swap16(x))
219 #define hton32(x) (endian()?(x):swap32(x))
222 #ifdef WORDS_BIGENDIAN
233 #define htovs(x) swaps(x)
234 #define htovl(x) swapl(x)
235 #define htovf(x) swapf(x)
236 #define htovd(x) swapd(x)
237 #define vtohs(x) swaps(x)
238 #define vtohl(x) swapl(x)
239 #define vtohf(x) swapf(x)
240 #define vtohd(x) swapd(x)
242 #define htov16(x) swap16(x)
243 #define htov32(x) swap32(x)
244 #define hton16(x) (x)
245 #define hton32(x) (x)
247 #else /* LITTLE ENDIAN */
254 #define ntohs(x) swaps(x)
255 #define ntohl(x) swapl(x)
256 #define htons(x) swaps(x)
257 #define htonl(x) swapl(x)
258 #define ntohf(x) swapf(x)
259 #define ntohd(x) swapd(x)
260 #define htonf(x) swapf(x)
261 #define htond(x) swapd(x)
271 #define htov16(x) (x)
272 #define htov32(x) (x)
273 #define hton16(x) swap16(x)
274 #define hton32(x) swap32(x)
280 #define FLOAT_CONVWITH(y) FLOAT_SWAPPER y;
281 #define HTONF(x,y) (memcpy(&y,&x,sizeof(float)), \
282 y = htonf((FLOAT_SWAPPER)y), \
283 memcpy(&x,&y,sizeof(float)), \
285 #define HTOVF(x,y) (memcpy(&y,&x,sizeof(float)), \
286 y = htovf((FLOAT_SWAPPER)y), \
287 memcpy(&x,&y,sizeof(float)), \
289 #define NTOHF(x,y) (memcpy(&y,&x,sizeof(float)), \
290 y = ntohf((FLOAT_SWAPPER)y), \
291 memcpy(&x,&y,sizeof(float)), \
293 #define VTOHF(x,y) (memcpy(&y,&x,sizeof(float)), \
294 y = vtohf((FLOAT_SWAPPER)y), \
295 memcpy(&x,&y,sizeof(float)), \
298 #define FLOAT_CONVWITH(y)
299 #define HTONF(x,y) htonf(x)
300 #define HTOVF(x,y) htovf(x)
301 #define NTOHF(x,y) ntohf(x)
302 #define VTOHF(x,y) vtohf(x)
305 #ifdef DOUBLE_SWAPPER
306 #define DOUBLE_CONVWITH(y) DOUBLE_SWAPPER y;
307 #define HTOND(x,y) (memcpy(&y,&x,sizeof(double)), \
308 y = htond((DOUBLE_SWAPPER)y), \
309 memcpy(&x,&y,sizeof(double)), \
311 #define HTOVD(x,y) (memcpy(&y,&x,sizeof(double)), \
312 y = htovd((DOUBLE_SWAPPER)y), \
313 memcpy(&x,&y,sizeof(double)), \
315 #define NTOHD(x,y) (memcpy(&y,&x,sizeof(double)), \
316 y = ntohd((DOUBLE_SWAPPER)y), \
317 memcpy(&x,&y,sizeof(double)), \
319 #define VTOHD(x,y) (memcpy(&y,&x,sizeof(double)), \
320 y = vtohd((DOUBLE_SWAPPER)y), \
321 memcpy(&x,&y,sizeof(double)), \
324 #define DOUBLE_CONVWITH(y)
325 #define HTOND(x,y) htond(x)
326 #define HTOVD(x,y) htovd(x)
327 #define NTOHD(x,y) ntohd(x)
328 #define VTOHD(x,y) vtohd(x)
331 unsigned long rb_big2ulong_pack(VALUE x
);
336 x
= rb_to_int(x
); /* is nil OK? (should not) */
338 if (FIXNUM_P(x
)) return FIX2LONG(x
);
339 if (TYPE(x
) == T_BIGNUM
) {
340 return rb_big2ulong_pack(x
);
342 rb_raise(rb_eTypeError
, "can't convert %s to `integer'", rb_obj_classname(x
));
343 return 0; /* not reached */
346 #if SIZEOF_LONG == SIZE32
349 /* invariant in modulo 1<<31 */
350 # define EXTEND32(x) do { if (!natint) {(x) = (((1L<<31)-1-(x))^~(~0L<<31));}} while(0)
352 #if SIZEOF_SHORT == SIZE16
355 # define EXTEND16(x) do { if (!natint) {(x) = (short)(((1<<15)-1-(x))^~(~0<<15));}} while(0)
358 #ifdef HAVE_LONG_LONG
359 # define QUAD_SIZE sizeof(LONG_LONG)
363 static const char toofew
[] = "too few arguments";
365 static void encodes(VALUE
,const char*,long,int);
366 static void qpencode(VALUE
,VALUE
,long);
368 static unsigned long utf8_to_uv(const char*,long*);
372 * arr.pack ( aTemplateString ) -> aBinaryString
374 * Packs the contents of <i>arr</i> into a binary sequence according to
375 * the directives in <i>aTemplateString</i> (see the table below)
376 * Directives ``A,'' ``a,'' and ``Z'' may be followed by a count,
377 * which gives the width of the resulting field. The remaining
378 * directives also may take a count, indicating the number of array
379 * elements to convert. If the count is an asterisk
380 * (``<code>*</code>''), all remaining array elements will be
381 * converted. Any of the directives ``<code>sSiIlL</code>'' may be
382 * followed by an underscore (``<code>_</code>'') to use the underlying
383 * platform's native size for the specified type; otherwise, they use a
384 * platform-independent size. Spaces are ignored in the template
385 * string. See also <code>String#unpack</code>.
387 * a = [ "a", "b", "c" ]
389 * a.pack("A3A3A3") #=> "a b c "
390 * a.pack("a3a3a3") #=> "a\000\000b\000\000c\000\000"
391 * n.pack("ccc") #=> "ABC"
393 * Directives for +pack+.
396 * ---------------------------------------------------------------
397 * @ | Moves to absolute position
398 * A | arbitrary binary string (space padded, count is width)
399 * a | arbitrary binary string (null padded, count is width)
400 * B | Bit string (descending bit order)
401 * b | Bit string (ascending bit order)
402 * C | Unsigned byte (C unsigned char)
404 * D, d | Double-precision float, native format
405 * E | Double-precision float, little-endian byte order
406 * e | Single-precision float, little-endian byte order
407 * F, f | Single-precision float, native format
408 * G | Double-precision float, network (big-endian) byte order
409 * g | Single-precision float, network (big-endian) byte order
410 * H | Hex string (high nibble first)
411 * h | Hex string (low nibble first)
412 * I | Unsigned integer
416 * M | Quoted printable, MIME encoding (see RFC2045)
417 * m | Base64 encoded string
418 * N | Long, network (big-endian) byte order
419 * n | Short, network (big-endian) byte-order
420 * P | Pointer to a structure (fixed-length string)
421 * p | Pointer to a null-terminated string
422 * Q, q | 64-bit number
426 * u | UU-encoded string
427 * V | Long, little-endian byte order
428 * v | Short, little-endian byte order
429 * w | BER-compressed integer\fnm
432 * Z | Same as ``a'', except that null is added with *
436 pack_pack(VALUE ary
, VALUE fmt
)
438 static const char nul10
[] = "\0\0\0\0\0\0\0\0\0\0";
439 static const char spc10
[] = " ";
440 const char *p
, *pend
;
441 VALUE res
, from
, associates
= 0;
443 long items
, len
, idx
, plen
;
446 int natint
; /* native integer */
450 p
= RSTRING_PTR(fmt
);
451 pend
= p
+ RSTRING_LEN(fmt
);
452 res
= rb_str_buf_new(0);
454 items
= RARRAY_LEN(ary
);
457 #define TOO_FEW (rb_raise(rb_eArgError, toofew), 0)
458 #define THISFROM (items > 0 ? RARRAY_PTR(ary)[idx] : TOO_FEW)
459 #define NEXTFROM (items-- > 0 ? RARRAY_PTR(ary)[idx++] : TOO_FEW)
462 if (RSTRING_PTR(fmt
) + RSTRING_LEN(fmt
) != pend
) {
463 rb_raise(rb_eRuntimeError
, "format string modified");
465 type
= *p
++; /* get data type */
470 if (ISSPACE(type
)) continue;
472 while ((p
< pend
) && (*p
!= '\n')) {
477 if (*p
== '_' || *p
== '!') {
478 static const char natstr
[] = "sSiIlL";
480 if (strchr(natstr
, type
)) {
487 rb_raise(rb_eArgError
, "'%c' allowed only after types %s", *p
, natstr
);
490 if (*p
== '*') { /* set data length */
491 len
= strchr("@Xxu", type
) ? 0 : items
;
494 else if (ISDIGIT(*p
)) {
496 len
= STRTOUL(p
, (char**)&p
, 10);
498 rb_raise(rb_eRangeError
, "pack length too big");
506 case 'A': case 'a': case 'Z':
516 ptr
= RSTRING_PTR(from
);
517 plen
= RSTRING_LEN(from
);
518 OBJ_INFECT(res
, from
);
525 case 'a': /* arbitrary binary string (null padded) */
526 case 'A': /* arbitrary binary string (ASCII space padded) */
527 case 'Z': /* null terminated string */
529 rb_str_buf_cat(res
, ptr
, len
);
530 if (p
[-1] == '*' && type
== 'Z')
531 rb_str_buf_cat(res
, nul10
, 1);
534 rb_str_buf_cat(res
, ptr
, plen
);
537 rb_str_buf_cat(res
, (type
== 'A')?spc10
:nul10
, 10);
540 rb_str_buf_cat(res
, (type
== 'A')?spc10
:nul10
, len
);
544 case 'b': /* bit string (ascending) */
550 j
= (len
- plen
+ 1)/2;
553 for (i
=0; i
++ < len
; ptr
++) {
559 char c
= byte
& 0xff;
560 rb_str_buf_cat(res
, &c
, 1);
566 byte
>>= 7 - (len
& 7);
568 rb_str_buf_cat(res
, &c
, 1);
575 case 'B': /* bit string (descending) */
581 j
= (len
- plen
+ 1)/2;
584 for (i
=0; i
++ < len
; ptr
++) {
589 char c
= byte
& 0xff;
590 rb_str_buf_cat(res
, &c
, 1);
596 byte
<<= 7 - (len
& 7);
598 rb_str_buf_cat(res
, &c
, 1);
605 case 'h': /* hex string (low nibble first) */
611 j
= (len
- plen
+ 1)/2;
614 for (i
=0; i
++ < len
; ptr
++) {
616 byte
|= (((*ptr
& 15) + 9) & 15) << 4;
618 byte
|= (*ptr
& 15) << 4;
622 char c
= byte
& 0xff;
623 rb_str_buf_cat(res
, &c
, 1);
628 char c
= byte
& 0xff;
629 rb_str_buf_cat(res
, &c
, 1);
636 case 'H': /* hex string (high nibble first) */
642 j
= (len
- plen
+ 1)/2;
645 for (i
=0; i
++ < len
; ptr
++) {
647 byte
|= ((*ptr
& 15) + 9) & 15;
653 char c
= byte
& 0xff;
654 rb_str_buf_cat(res
, &c
, 1);
659 char c
= byte
& 0xff;
660 rb_str_buf_cat(res
, &c
, 1);
669 case 'c': /* signed char */
670 case 'C': /* unsigned char */
676 rb_str_buf_cat(res
, &c
, sizeof(char));
680 case 's': /* signed short */
681 case 'S': /* unsigned short */
687 rb_str_buf_cat(res
, OFF16(&s
), NATINT_LEN(short,2));
691 case 'i': /* signed int */
692 case 'I': /* unsigned int */
698 rb_str_buf_cat(res
, (char*)&i
, sizeof(int));
702 case 'l': /* signed long */
703 case 'L': /* unsigned long */
709 rb_str_buf_cat(res
, OFF32(&l
), NATINT_LEN(long,4));
713 case 'q': /* signed quad (64bit) int */
714 case 'Q': /* unsigned quad (64bit) int */
719 rb_quad_pack(tmp
, from
);
720 rb_str_buf_cat(res
, (char*)&tmp
, QUAD_SIZE
);
724 case 'n': /* unsigned short (network byte-order) */
731 rb_str_buf_cat(res
, OFF16(&s
), NATINT_LEN(short,2));
735 case 'N': /* unsigned long (network byte-order) */
742 rb_str_buf_cat(res
, OFF32(&l
), NATINT_LEN(long,4));
746 case 'v': /* unsigned short (VAX byte-order) */
753 rb_str_buf_cat(res
, OFF16(&s
), NATINT_LEN(short,2));
757 case 'V': /* unsigned long (VAX byte-order) */
764 rb_str_buf_cat(res
, OFF32(&l
), NATINT_LEN(long,4));
768 case 'f': /* single precision float in native format */
769 case 'F': /* ditto */
774 f
= RFLOAT_VALUE(rb_Float(from
));
775 rb_str_buf_cat(res
, (char*)&f
, sizeof(float));
779 case 'e': /* single precision float in VAX byte-order */
782 FLOAT_CONVWITH(ftmp
);
785 f
= RFLOAT_VALUE(rb_Float(from
));
787 rb_str_buf_cat(res
, (char*)&f
, sizeof(float));
791 case 'E': /* double precision float in VAX byte-order */
794 DOUBLE_CONVWITH(dtmp
);
797 d
= RFLOAT_VALUE(rb_Float(from
));
799 rb_str_buf_cat(res
, (char*)&d
, sizeof(double));
803 case 'd': /* double precision float in native format */
804 case 'D': /* ditto */
809 d
= RFLOAT_VALUE(rb_Float(from
));
810 rb_str_buf_cat(res
, (char*)&d
, sizeof(double));
814 case 'g': /* single precision float in network byte-order */
817 FLOAT_CONVWITH(ftmp
);
820 f
= RFLOAT_VALUE(rb_Float(from
));
822 rb_str_buf_cat(res
, (char*)&f
, sizeof(float));
826 case 'G': /* double precision float in network byte-order */
829 DOUBLE_CONVWITH(dtmp
);
832 d
= RFLOAT_VALUE(rb_Float(from
));
834 rb_str_buf_cat(res
, (char*)&d
, sizeof(double));
838 case 'x': /* null byte */
841 rb_str_buf_cat(res
, nul10
, 10);
844 rb_str_buf_cat(res
, nul10
, len
);
847 case 'X': /* back up byte */
849 plen
= RSTRING_LEN(res
);
851 rb_raise(rb_eArgError
, "X outside of string");
852 rb_str_set_len(res
, plen
- len
);
855 case '@': /* null fill to absolute position */
856 len
-= RSTRING_LEN(res
);
857 if (len
> 0) goto grow
;
859 if (len
> 0) goto shrink
;
863 rb_raise(rb_eArgError
, "%% is not supported");
866 case 'U': /* Unicode character */
873 from
= rb_to_int(from
);
876 rb_raise(rb_eRangeError
, "pack(U): value out of range");
878 le
= rb_uv_to_utf8(buf
, l
);
879 rb_str_buf_cat(res
, (char*)buf
, le
);
883 case 'u': /* uuencoded string */
884 case 'm': /* base64 encoded string */
887 ptr
= RSTRING_PTR(from
);
888 plen
= RSTRING_LEN(from
);
901 encodes(res
, ptr
, todo
, type
);
907 case 'M': /* quoted-printable encoded string */
908 from
= rb_obj_as_string(NEXTFROM
);
911 qpencode(res
, from
, len
);
914 case 'P': /* pointer to packed byte string */
918 if (RSTRING_LEN(from
) < len
) {
919 rb_raise(rb_eArgError
, "too short buffer for P(%ld for %ld)",
920 RSTRING_LEN(from
), len
);
925 case 'p': /* pointer to string */
933 t
= StringValuePtr(from
);
936 associates
= rb_ary_new();
938 rb_ary_push(associates
, from
);
940 rb_str_buf_cat(res
, (char*)&t
, sizeof(char*));
944 case 'w': /* BER compressed integer */
947 VALUE buf
= rb_str_new(0, 0);
948 char c
, *bufs
, *bufe
;
951 if (TYPE(from
) == T_BIGNUM
) {
952 VALUE big128
= rb_uint2big(128);
953 while (TYPE(from
) == T_BIGNUM
) {
954 from
= rb_big_divmod(from
, big128
);
955 c
= NUM2INT(RARRAY_PTR(from
)[1]) | 0x80; /* mod */
956 rb_str_buf_cat(buf
, &c
, sizeof(char));
957 from
= RARRAY_PTR(from
)[0]; /* div */
962 long l
= NUM2LONG(from
);
964 rb_raise(rb_eArgError
, "can't compress negative numbers");
970 c
= ((ul
& 0x7f) | 0x80);
971 rb_str_buf_cat(buf
, &c
, sizeof(char));
975 if (RSTRING_LEN(buf
)) {
976 bufs
= RSTRING_PTR(buf
);
977 bufe
= bufs
+ RSTRING_LEN(buf
) - 1;
978 *bufs
&= 0x7f; /* clear continue bit */
979 while (bufs
< bufe
) { /* reverse */
984 rb_str_buf_cat(res
, RSTRING_PTR(buf
), RSTRING_LEN(buf
));
988 rb_str_buf_cat(res
, &c
, sizeof(char));
999 rb_str_associate(res
, associates
);
1004 static const char uu_table
[] =
1005 "`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
1006 static const char b64_table
[] =
1007 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1010 encodes(VALUE str
, const char *s
, long len
, int type
)
1012 char *buff
= ALLOCA_N(char, len
* 4 / 3 + 6);
1014 const char *trans
= type
== 'u' ? uu_table
: b64_table
;
1018 buff
[i
++] = len
+ ' ';
1025 buff
[i
++] = trans
[077 & (*s
>> 2)];
1026 buff
[i
++] = trans
[077 & (((*s
<< 4) & 060) | ((s
[1] >> 4) & 017))];
1027 buff
[i
++] = trans
[077 & (((s
[1] << 2) & 074) | ((s
[2] >> 6) & 03))];
1028 buff
[i
++] = trans
[077 & s
[2]];
1033 buff
[i
++] = trans
[077 & (*s
>> 2)];
1034 buff
[i
++] = trans
[077 & (((*s
<< 4) & 060) | ((s
[1] >> 4) & 017))];
1035 buff
[i
++] = trans
[077 & (((s
[1] << 2) & 074) | (('\0' >> 6) & 03))];
1036 buff
[i
++] = padding
;
1038 else if (len
== 1) {
1039 buff
[i
++] = trans
[077 & (*s
>> 2)];
1040 buff
[i
++] = trans
[077 & (((*s
<< 4) & 060) | (('\0' >> 4) & 017))];
1041 buff
[i
++] = padding
;
1042 buff
[i
++] = padding
;
1045 rb_str_buf_cat(str
, buff
, i
);
1048 static const char hex_table
[] = "0123456789ABCDEF";
1051 qpencode(VALUE str
, VALUE from
, long len
)
1054 long i
= 0, n
= 0, prev
= EOF
;
1055 unsigned char *s
= (unsigned char*)RSTRING_PTR(from
);
1056 unsigned char *send
= s
+ RSTRING_LEN(from
);
1060 (*s
< 32 && *s
!= '\n' && *s
!= '\t') ||
1063 buff
[i
++] = hex_table
[*s
>> 4];
1064 buff
[i
++] = hex_table
[*s
& 0x0f];
1068 else if (*s
== '\n') {
1069 if (prev
== ' ' || prev
== '\t') {
1089 rb_str_buf_cat(str
, buff
, i
);
1099 rb_str_buf_cat(str
, buff
, i
);
1107 case '0': case '1': case '2': case '3': case '4':
1108 case '5': case '6': case '7': case '8': case '9':
1110 case 'a': case 'b': case 'c':
1111 case 'd': case 'e': case 'f':
1112 return c
- 'a' + 10;
1113 case 'A': case 'B': case 'C':
1114 case 'D': case 'E': case 'F':
1115 return c
- 'A' + 10;
1121 #define PACK_LENGTH_ADJUST_SIZE(sz) do { \
1123 if (len > (send-s)/sz) { \
1125 tmp = len-(send-s)/sz; \
1127 len = (send-s)/sz; \
1132 #define PACK_LENGTH_ADJUST(type,sz) do { \
1133 int t__len = NATINT_LEN(type,(sz)); \
1134 PACK_LENGTH_ADJUST_SIZE(t__len); \
1137 #define PACK_LENGTH_ADJUST(type,sz) \
1138 PACK_LENGTH_ADJUST_SIZE(sizeof(type))
1141 #define PACK_ITEM_ADJUST() while (tmp--) rb_ary_push(ary, Qnil)
1144 infected_str_new(const char *ptr
, long len
, VALUE str
)
1146 VALUE s
= rb_str_new(ptr
, len
);
1154 * str.unpack(format) => anArray
1156 * Decodes <i>str</i> (which may contain binary data) according to the
1157 * format string, returning an array of each value extracted. The
1158 * format string consists of a sequence of single-character directives,
1159 * summarized in the table at the end of this entry.
1160 * Each directive may be followed
1161 * by a number, indicating the number of times to repeat with this
1162 * directive. An asterisk (``<code>*</code>'') will use up all
1163 * remaining elements. The directives <code>sSiIlL</code> may each be
1164 * followed by an underscore (``<code>_</code>'') to use the underlying
1165 * platform's native size for the specified type; otherwise, it uses a
1166 * platform-independent consistent size. Spaces are ignored in the
1167 * format string. See also <code>Array#pack</code>.
1169 * "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "]
1170 * "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"]
1171 * "abc \0abc \0".unpack('Z*Z*') #=> ["abc ", "abc "]
1172 * "aa".unpack('b8B8') #=> ["10000110", "01100001"]
1173 * "aaa".unpack('h2H2c') #=> ["16", "61", 97]
1174 * "\xfe\xff\xfe\xff".unpack('sS') #=> [-2, 65534]
1175 * "now=20is".unpack('M*') #=> ["now is"]
1176 * "whole".unpack('xax2aX2aX1aX2a') #=> ["h", "e", "l", "l", "o"]
1178 * This table summarizes the various formats and the Ruby classes
1181 * Format | Returns | Function
1182 * -------+---------+-----------------------------------------
1183 * A | String | arbitrary binary string with trailing
1184 * | | nulls and ASCII spaces removed
1185 * -------+---------+-----------------------------------------
1186 * a | String | arbitrary binary string
1187 * -------+---------+-----------------------------------------
1188 * B | String | extract bits from each character (msb first)
1189 * -------+---------+-----------------------------------------
1190 * b | String | extract bits from each character (lsb first)
1191 * -------+---------+-----------------------------------------
1192 * C | Fixnum | extract a byte (C char) as an unsigned integer
1193 * -------+---------+-----------------------------------------
1194 * c | Fixnum | extract a byte (C char) as an integer
1195 * -------+---------+-----------------------------------------
1196 * d,D | Float | treat sizeof(double) characters as
1197 * | | a native double
1198 * -------+---------+-----------------------------------------
1199 * E | Float | treat sizeof(double) characters as
1200 * | | a double in little-endian byte order
1201 * -------+---------+-----------------------------------------
1202 * e | Float | treat sizeof(float) characters as
1203 * | | a float in little-endian byte order
1204 * -------+---------+-----------------------------------------
1205 * f,F | Float | treat sizeof(float) characters as
1206 * | | a native float
1207 * -------+---------+-----------------------------------------
1208 * G | Float | treat sizeof(double) characters as
1209 * | | a double in network byte order
1210 * -------+---------+-----------------------------------------
1211 * g | Float | treat sizeof(float) characters as a
1212 * | | float in network byte order
1213 * -------+---------+-----------------------------------------
1214 * H | String | extract hex nibbles from each character
1215 * | | (most significant first)
1216 * -------+---------+-----------------------------------------
1217 * h | String | extract hex nibbles from each character
1218 * | | (least significant first)
1219 * -------+---------+-----------------------------------------
1220 * I | Integer | treat sizeof(int) (modified by _)
1221 * | | successive characters as an unsigned
1222 * | | native integer
1223 * -------+---------+-----------------------------------------
1224 * i | Integer | treat sizeof(int) (modified by _)
1225 * | | successive characters as a signed
1226 * | | native integer
1227 * -------+---------+-----------------------------------------
1228 * L | Integer | treat four (modified by _) successive
1229 * | | characters as an unsigned native
1231 * -------+---------+-----------------------------------------
1232 * l | Integer | treat four (modified by _) successive
1233 * | | characters as a signed native
1235 * -------+---------+-----------------------------------------
1236 * M | String | quoted-printable
1237 * -------+---------+-----------------------------------------
1238 * m | String | base64-encoded
1239 * -------+---------+-----------------------------------------
1240 * N | Integer | treat four characters as an unsigned
1241 * | | long in network byte order
1242 * -------+---------+-----------------------------------------
1243 * n | Fixnum | treat two characters as an unsigned
1244 * | | short in network byte order
1245 * -------+---------+-----------------------------------------
1246 * P | String | treat sizeof(char *) characters as a
1247 * | | pointer, and return \emph{len} characters
1248 * | | from the referenced location
1249 * -------+---------+-----------------------------------------
1250 * p | String | treat sizeof(char *) characters as a
1251 * | | pointer to a null-terminated string
1252 * -------+---------+-----------------------------------------
1253 * Q | Integer | treat 8 characters as an unsigned
1254 * | | quad word (64 bits)
1255 * -------+---------+-----------------------------------------
1256 * q | Integer | treat 8 characters as a signed
1257 * | | quad word (64 bits)
1258 * -------+---------+-----------------------------------------
1259 * S | Fixnum | treat two (different if _ used)
1260 * | | successive characters as an unsigned
1261 * | | short in native byte order
1262 * -------+---------+-----------------------------------------
1263 * s | Fixnum | Treat two (different if _ used)
1264 * | | successive characters as a signed short
1265 * | | in native byte order
1266 * -------+---------+-----------------------------------------
1267 * U | Integer | UTF-8 characters as unsigned integers
1268 * -------+---------+-----------------------------------------
1269 * u | String | UU-encoded
1270 * -------+---------+-----------------------------------------
1271 * V | Fixnum | treat four characters as an unsigned
1272 * | | long in little-endian byte order
1273 * -------+---------+-----------------------------------------
1274 * v | Fixnum | treat two characters as an unsigned
1275 * | | short in little-endian byte order
1276 * -------+---------+-----------------------------------------
1277 * w | Integer | BER-compressed integer (see Array.pack)
1278 * -------+---------+-----------------------------------------
1279 * X | --- | skip backward one character
1280 * -------+---------+-----------------------------------------
1281 * x | --- | skip forward one character
1282 * -------+---------+-----------------------------------------
1283 * Z | String | with trailing nulls removed
1284 * | | upto first null with *
1285 * -------+---------+-----------------------------------------
1286 * @ | --- | skip to the offset given by the
1287 * | | length argument
1288 * -------+---------+-----------------------------------------
1292 pack_unpack(VALUE str
, VALUE fmt
)
1294 static const char hexdigits
[] = "0123456789abcdef";
1302 int natint
; /* native integer */
1304 int block_p
= rb_block_given_p();
1305 #define UNPACK_PUSH(item) do {\
1306 VALUE item_val = (item);\
1308 rb_yield(item_val);\
1311 rb_ary_push(ary, item_val);\
1317 s
= RSTRING_PTR(str
);
1318 send
= s
+ RSTRING_LEN(str
);
1319 p
= RSTRING_PTR(fmt
);
1320 pend
= p
+ RSTRING_LEN(fmt
);
1322 ary
= block_p
? Qnil
: rb_ary_new();
1329 if (ISSPACE(type
)) continue;
1331 while ((p
< pend
) && (*p
!= '\n')) {
1337 if (*p
== '_' || *p
== '!') {
1338 static const char natstr
[] = "sSiIlL";
1340 if (strchr(natstr
, type
)) {
1347 rb_raise(rb_eArgError
, "'%c' allowed only after types %s", *p
, natstr
);
1352 else if (*p
== '*') {
1357 else if (ISDIGIT(*p
)) {
1359 len
= STRTOUL(p
, (char**)&p
, 10);
1361 rb_raise(rb_eRangeError
, "pack length too big");
1365 len
= (type
!= '@');
1370 rb_raise(rb_eArgError
, "%% is not supported");
1374 if (len
> send
- s
) len
= send
- s
;
1377 char *t
= s
+ len
- 1;
1380 if (*t
!= ' ' && *t
!= '\0') break;
1383 UNPACK_PUSH(infected_str_new(s
, len
, str
));
1392 if (len
> send
-s
) len
= send
-s
;
1393 while (t
< s
+len
&& *t
) t
++;
1394 UNPACK_PUSH(infected_str_new(s
, t
-s
, str
));
1396 s
= star
? t
: s
+len
;
1401 if (len
> send
- s
) len
= send
- s
;
1402 UNPACK_PUSH(infected_str_new(s
, len
, str
));
1413 if (p
[-1] == '*' || len
> (send
- s
) * 8)
1414 len
= (send
- s
) * 8;
1416 UNPACK_PUSH(bitstr
= rb_str_new(0, len
));
1417 t
= RSTRING_PTR(bitstr
);
1418 for (i
=0; i
<len
; i
++) {
1419 if (i
& 7) bits
>>= 1;
1421 *t
++ = (bits
& 1) ? '1' : '0';
1433 if (p
[-1] == '*' || len
> (send
- s
) * 8)
1434 len
= (send
- s
) * 8;
1436 UNPACK_PUSH(bitstr
= rb_str_new(0, len
));
1437 t
= RSTRING_PTR(bitstr
);
1438 for (i
=0; i
<len
; i
++) {
1439 if (i
& 7) bits
<<= 1;
1441 *t
++ = (bits
& 128) ? '1' : '0';
1453 if (p
[-1] == '*' || len
> (send
- s
) * 2)
1454 len
= (send
- s
) * 2;
1456 UNPACK_PUSH(bitstr
= rb_str_new(0, len
));
1457 t
= RSTRING_PTR(bitstr
);
1458 for (i
=0; i
<len
; i
++) {
1463 *t
++ = hexdigits
[bits
& 15];
1475 if (p
[-1] == '*' || len
> (send
- s
) * 2)
1476 len
= (send
- s
) * 2;
1478 UNPACK_PUSH(bitstr
= rb_str_new(0, len
));
1479 t
= RSTRING_PTR(bitstr
);
1480 for (i
=0; i
<len
; i
++) {
1485 *t
++ = hexdigits
[(bits
>> 4) & 15];
1491 PACK_LENGTH_ADJUST(char,sizeof(char));
1494 if (c
> (char)127) c
-=256;
1495 UNPACK_PUSH(INT2FIX(c
));
1501 PACK_LENGTH_ADJUST(unsigned char,sizeof(unsigned char));
1503 unsigned char c
= *s
++;
1504 UNPACK_PUSH(INT2FIX(c
));
1510 PACK_LENGTH_ADJUST(short,2);
1513 memcpy(OFF16(&tmp
), s
, NATINT_LEN(short,2));
1515 s
+= NATINT_LEN(short,2);
1516 UNPACK_PUSH(INT2FIX(tmp
));
1522 PACK_LENGTH_ADJUST(unsigned short,2);
1524 unsigned short tmp
= 0;
1525 memcpy(OFF16(&tmp
), s
, NATINT_LEN(unsigned short,2));
1526 s
+= NATINT_LEN(unsigned short,2);
1527 UNPACK_PUSH(INT2FIX(tmp
));
1533 PACK_LENGTH_ADJUST(int,sizeof(int));
1536 memcpy(&tmp
, s
, sizeof(int));
1538 UNPACK_PUSH(INT2NUM(tmp
));
1544 PACK_LENGTH_ADJUST(unsigned int,sizeof(unsigned int));
1547 memcpy(&tmp
, s
, sizeof(unsigned int));
1548 s
+= sizeof(unsigned int);
1549 UNPACK_PUSH(UINT2NUM(tmp
));
1555 PACK_LENGTH_ADJUST(long,4);
1558 memcpy(OFF32(&tmp
), s
, NATINT_LEN(long,4));
1560 s
+= NATINT_LEN(long,4);
1561 UNPACK_PUSH(LONG2NUM(tmp
));
1566 PACK_LENGTH_ADJUST(unsigned long,4);
1568 unsigned long tmp
= 0;
1569 memcpy(OFF32(&tmp
), s
, NATINT_LEN(unsigned long,4));
1570 s
+= NATINT_LEN(unsigned long,4);
1571 UNPACK_PUSH(ULONG2NUM(tmp
));
1577 PACK_LENGTH_ADJUST_SIZE(QUAD_SIZE
);
1579 char *tmp
= (char*)s
;
1581 UNPACK_PUSH(rb_quad_unpack(tmp
, 1));
1586 PACK_LENGTH_ADJUST_SIZE(QUAD_SIZE
);
1588 char *tmp
= (char*)s
;
1590 UNPACK_PUSH(rb_quad_unpack(tmp
, 0));
1595 PACK_LENGTH_ADJUST(unsigned short,2);
1597 unsigned short tmp
= 0;
1598 memcpy(OFF16B(&tmp
), s
, NATINT_LEN(unsigned short,2));
1599 s
+= NATINT_LEN(unsigned short,2);
1600 UNPACK_PUSH(UINT2NUM(ntohs(tmp
)));
1606 PACK_LENGTH_ADJUST(unsigned long,4);
1608 unsigned long tmp
= 0;
1609 memcpy(OFF32B(&tmp
), s
, NATINT_LEN(unsigned long,4));
1610 s
+= NATINT_LEN(unsigned long,4);
1611 UNPACK_PUSH(ULONG2NUM(ntohl(tmp
)));
1617 PACK_LENGTH_ADJUST(unsigned short,2);
1619 unsigned short tmp
= 0;
1620 memcpy(&tmp
, s
, NATINT_LEN(unsigned short,2));
1621 s
+= NATINT_LEN(unsigned short,2);
1622 UNPACK_PUSH(UINT2NUM(vtohs(tmp
)));
1628 PACK_LENGTH_ADJUST(unsigned long,4);
1630 unsigned long tmp
= 0;
1631 memcpy(&tmp
, s
, NATINT_LEN(long,4));
1632 s
+= NATINT_LEN(long,4);
1633 UNPACK_PUSH(ULONG2NUM(vtohl(tmp
)));
1640 PACK_LENGTH_ADJUST(float,sizeof(float));
1643 memcpy(&tmp
, s
, sizeof(float));
1645 UNPACK_PUSH(DOUBLE2NUM((double)tmp
));
1651 PACK_LENGTH_ADJUST(float,sizeof(float));
1654 FLOAT_CONVWITH(ftmp
);
1656 memcpy(&tmp
, s
, sizeof(float));
1658 tmp
= VTOHF(tmp
,ftmp
);
1659 UNPACK_PUSH(DOUBLE2NUM((double)tmp
));
1665 PACK_LENGTH_ADJUST(double,sizeof(double));
1668 DOUBLE_CONVWITH(dtmp
);
1670 memcpy(&tmp
, s
, sizeof(double));
1671 s
+= sizeof(double);
1672 tmp
= VTOHD(tmp
,dtmp
);
1673 UNPACK_PUSH(DOUBLE2NUM(tmp
));
1680 PACK_LENGTH_ADJUST(double,sizeof(double));
1683 memcpy(&tmp
, s
, sizeof(double));
1684 s
+= sizeof(double);
1685 UNPACK_PUSH(DOUBLE2NUM(tmp
));
1691 PACK_LENGTH_ADJUST(float,sizeof(float));
1694 FLOAT_CONVWITH(ftmp
;)
1696 memcpy(&tmp
, s
, sizeof(float));
1698 tmp
= NTOHF(tmp
,ftmp
);
1699 UNPACK_PUSH(DOUBLE2NUM((double)tmp
));
1705 PACK_LENGTH_ADJUST(double,sizeof(double));
1708 DOUBLE_CONVWITH(dtmp
);
1710 memcpy(&tmp
, s
, sizeof(double));
1711 s
+= sizeof(double);
1712 tmp
= NTOHD(tmp
,dtmp
);
1713 UNPACK_PUSH(DOUBLE2NUM(tmp
));
1719 if (len
> send
- s
) len
= send
- s
;
1720 while (len
> 0 && s
< send
) {
1721 long alen
= send
- s
;
1724 l
= utf8_to_uv(s
, &alen
);
1726 UNPACK_PUSH(ULONG2NUM(l
));
1732 VALUE buf
= infected_str_new(0, (send
- s
)*3/4, str
);
1733 char *ptr
= RSTRING_PTR(buf
);
1736 while (s
< send
&& *s
> ' ' && *s
< 'a') {
1741 len
= (*s
++ - ' ') & 077;
1743 if (total
> RSTRING_LEN(buf
)) {
1744 len
-= total
- RSTRING_LEN(buf
);
1745 total
= RSTRING_LEN(buf
);
1749 long mlen
= len
> 3 ? 3 : len
;
1751 if (s
< send
&& *s
>= ' ')
1752 a
= (*s
++ - ' ') & 077;
1755 if (s
< send
&& *s
>= ' ')
1756 b
= (*s
++ - ' ') & 077;
1759 if (s
< send
&& *s
>= ' ')
1760 c
= (*s
++ - ' ') & 077;
1763 if (s
< send
&& *s
>= ' ')
1764 d
= (*s
++ - ' ') & 077;
1767 hunk
[0] = a
<< 2 | b
>> 4;
1768 hunk
[1] = b
<< 4 | c
>> 2;
1769 hunk
[2] = c
<< 6 | d
;
1770 memcpy(ptr
, hunk
, mlen
);
1774 if (*s
== '\r') s
++;
1775 if (*s
== '\n') s
++;
1776 else if (s
< send
&& (s
+1 == send
|| s
[1] == '\n'))
1777 s
+= 2; /* possible checksum byte */
1780 rb_str_set_len(buf
, total
);
1787 VALUE buf
= infected_str_new(0, (send
- s
)*3/4, str
);
1788 char *ptr
= RSTRING_PTR(buf
);
1789 int a
= -1,b
= -1,c
= 0,d
;
1790 static signed char b64_xtable
[256];
1792 if (b64_xtable
['/'] <= 0) {
1795 for (i
= 0; i
< 256; i
++) {
1798 for (i
= 0; i
< 64; i
++) {
1799 b64_xtable
[(unsigned char)b64_table
[i
]] = i
;
1804 while ((a
= b64_xtable
[(unsigned char)*s
]) == -1 && s
< send
) {s
++;}
1805 if (s
>= send
) break;
1807 while ((b
= b64_xtable
[(unsigned char)*s
]) == -1 && s
< send
) {s
++;}
1808 if (s
>= send
) break;
1810 while ((c
= b64_xtable
[(unsigned char)*s
]) == -1 && s
< send
) {if (*s
== '=') break; s
++;}
1811 if (*s
== '=' || s
>= send
) break;
1813 while ((d
= b64_xtable
[(unsigned char)*s
]) == -1 && s
< send
) {if (*s
== '=') break; s
++;}
1814 if (*s
== '=' || s
>= send
) break;
1816 *ptr
++ = a
<< 2 | b
>> 4;
1817 *ptr
++ = b
<< 4 | c
>> 2;
1818 *ptr
++ = c
<< 6 | d
;
1820 if (a
!= -1 && b
!= -1) {
1821 if (c
== -1 && *s
== '=')
1822 *ptr
++ = a
<< 2 | b
>> 4;
1823 else if (c
!= -1 && *s
== '=') {
1824 *ptr
++ = a
<< 2 | b
>> 4;
1825 *ptr
++ = b
<< 4 | c
>> 2;
1828 rb_str_set_len(buf
, ptr
- RSTRING_PTR(buf
));
1835 VALUE buf
= infected_str_new(0, send
- s
, str
);
1836 char *ptr
= RSTRING_PTR(buf
);
1841 if (++s
== send
) break;
1842 if (s
+1 < send
&& *s
== '\r' && *(s
+1) == '\n')
1845 if ((c1
= hex2num(*s
)) == -1) break;
1846 if (++s
== send
) break;
1847 if ((c2
= hex2num(*s
)) == -1) break;
1848 *ptr
++ = c1
<< 4 | c2
;
1856 rb_str_set_len(buf
, ptr
- RSTRING_PTR(buf
));
1862 if (len
> RSTRING_LEN(str
))
1863 rb_raise(rb_eArgError
, "@ outside of string");
1864 s
= RSTRING_PTR(str
) + len
;
1868 if (len
> s
- RSTRING_PTR(str
))
1869 rb_raise(rb_eArgError
, "X outside of string");
1875 rb_raise(rb_eArgError
, "x outside of string");
1880 if (sizeof(char *) <= send
- s
) {
1884 memcpy(&t
, s
, sizeof(char *));
1885 s
+= sizeof(char *);
1890 if (!(a
= rb_str_associated(str
))) {
1891 rb_raise(rb_eArgError
, "no associated pointer");
1894 pend
= p
+ RARRAY_LEN(a
);
1896 if (TYPE(*p
) == T_STRING
&& RSTRING_PTR(*p
) == t
) {
1897 if (len
< RSTRING_LEN(*p
)) {
1898 tmp
= rb_tainted_str_new(t
, len
);
1899 rb_str_associate(tmp
, a
);
1909 rb_raise(rb_eArgError
, "non associated pointer");
1917 if (len
> (send
- s
) / sizeof(char *))
1918 len
= (send
- s
) / sizeof(char *);
1920 if (send
- s
< sizeof(char *))
1926 memcpy(&t
, s
, sizeof(char *));
1927 s
+= sizeof(char *);
1932 if (!(a
= rb_str_associated(str
))) {
1933 rb_raise(rb_eArgError
, "no associated pointer");
1936 pend
= p
+ RARRAY_LEN(a
);
1938 if (TYPE(*p
) == T_STRING
&& RSTRING_PTR(*p
) == t
) {
1945 rb_raise(rb_eArgError
, "non associated pointer");
1955 unsigned long ul
= 0;
1956 unsigned long ulmask
= 0xfeUL
<< ((sizeof(unsigned long) - 1) * 8);
1958 while (len
> 0 && s
< send
) {
1961 if (!(*s
++ & 0x80)) {
1962 UNPACK_PUSH(ULONG2NUM(ul
));
1966 else if (ul
& ulmask
) {
1967 VALUE big
= rb_uint2big(ul
);
1968 VALUE big128
= rb_uint2big(128);
1970 big
= rb_big_mul(big
, big128
);
1971 big
= rb_big_plus(big
, rb_uint2big(*s
& 0x7f));
1972 if (!(*s
++ & 0x80)) {
1995 rb_uv_to_utf8(char buf
[6], unsigned long uv
)
2002 buf
[0] = ((uv
>>6)&0xff)|0xc0;
2003 buf
[1] = (uv
&0x3f)|0x80;
2007 buf
[0] = ((uv
>>12)&0xff)|0xe0;
2008 buf
[1] = ((uv
>>6)&0x3f)|0x80;
2009 buf
[2] = (uv
&0x3f)|0x80;
2012 if (uv
<= 0x1fffff) {
2013 buf
[0] = ((uv
>>18)&0xff)|0xf0;
2014 buf
[1] = ((uv
>>12)&0x3f)|0x80;
2015 buf
[2] = ((uv
>>6)&0x3f)|0x80;
2016 buf
[3] = (uv
&0x3f)|0x80;
2019 if (uv
<= 0x3ffffff) {
2020 buf
[0] = ((uv
>>24)&0xff)|0xf8;
2021 buf
[1] = ((uv
>>18)&0x3f)|0x80;
2022 buf
[2] = ((uv
>>12)&0x3f)|0x80;
2023 buf
[3] = ((uv
>>6)&0x3f)|0x80;
2024 buf
[4] = (uv
&0x3f)|0x80;
2027 if (uv
<= 0x7fffffff) {
2028 buf
[0] = ((uv
>>30)&0xff)|0xfc;
2029 buf
[1] = ((uv
>>24)&0x3f)|0x80;
2030 buf
[2] = ((uv
>>18)&0x3f)|0x80;
2031 buf
[3] = ((uv
>>12)&0x3f)|0x80;
2032 buf
[4] = ((uv
>>6)&0x3f)|0x80;
2033 buf
[5] = (uv
&0x3f)|0x80;
2036 rb_raise(rb_eRangeError
, "pack(U): value out of range");
2039 static const unsigned long utf8_limits
[] = {
2049 static unsigned long
2050 utf8_to_uv(const char *p
, long *lenp
)
2052 int c
= *p
++ & 0xff;
2053 unsigned long uv
= c
;
2062 rb_raise(rb_eArgError
, "malformed UTF-8 character");
2065 if (!(uv
& 0x20)) { n
= 2; uv
&= 0x1f; }
2066 else if (!(uv
& 0x10)) { n
= 3; uv
&= 0x0f; }
2067 else if (!(uv
& 0x08)) { n
= 4; uv
&= 0x07; }
2068 else if (!(uv
& 0x04)) { n
= 5; uv
&= 0x03; }
2069 else if (!(uv
& 0x02)) { n
= 6; uv
&= 0x01; }
2072 rb_raise(rb_eArgError
, "malformed UTF-8 character");
2075 rb_raise(rb_eArgError
, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)",
2082 if ((c
& 0xc0) != 0x80) {
2084 rb_raise(rb_eArgError
, "malformed UTF-8 character");
2093 if (uv
< utf8_limits
[n
]) {
2094 rb_raise(rb_eArgError
, "redundant UTF-8 sequence");
2102 rb_define_method(rb_cArray
, "pack", pack_pack
, 1);
2103 rb_define_method(rb_cString
, "unpack", pack_unpack
, 1);