1 /**********************************************************************
6 created at: Tue Oct 30 16:10:22 JST 2007
8 Copyright (C) 2007 Martin Duerst
10 **********************************************************************/
12 #include "ruby/ruby.h"
13 #include "ruby/encoding.h"
15 #include "transcode_data.h"
18 VALUE rb_eConversionUndefined
;
19 VALUE rb_eInvalidByteSequence
;
21 VALUE rb_cEncodingConverter
;
23 static VALUE sym_invalid
, sym_undef
, sym_ignore
, sym_replace
;
24 #define INVALID_IGNORE 0x1
25 #define INVALID_REPLACE 0x2
26 #define UNDEF_IGNORE 0x10
27 #define UNDEF_REPLACE 0x20
30 * Dispatch data and logic
36 const char *lib
; /* maybe null. it means that don't load the library. */
37 const rb_transcoder
*transcoder
;
40 static st_table
*transcoder_table
;
42 static transcoder_entry_t
*
43 make_transcoder_entry(const char *from
, const char *to
)
48 if (!st_lookup(transcoder_table
, (st_data_t
)from
, &val
)) {
49 val
= (st_data_t
)st_init_strcasetable();
50 st_add_direct(transcoder_table
, (st_data_t
)from
, val
);
52 table2
= (st_table
*)val
;
53 if (!st_lookup(table2
, (st_data_t
)to
, &val
)) {
54 transcoder_entry_t
*entry
= ALLOC(transcoder_entry_t
);
58 entry
->transcoder
= NULL
;
59 val
= (st_data_t
)entry
;
60 st_add_direct(table2
, (st_data_t
)to
, val
);
62 return (transcoder_entry_t
*)val
;
65 static transcoder_entry_t
*
66 get_transcoder_entry(const char *from
, const char *to
)
71 if (!st_lookup(transcoder_table
, (st_data_t
)from
, &val
)) {
74 table2
= (st_table
*)val
;
75 if (!st_lookup(table2
, (st_data_t
)to
, &val
)) {
78 return (transcoder_entry_t
*)val
;
82 rb_register_transcoder(const rb_transcoder
*tr
)
84 const char *const from_e
= tr
->from_encoding
;
85 const char *const to_e
= tr
->to_encoding
;
87 transcoder_entry_t
*entry
;
89 entry
= make_transcoder_entry(from_e
, to_e
);
90 if (entry
->transcoder
) {
91 rb_raise(rb_eArgError
, "transcoder from %s to %s has been already registered",
95 entry
->transcoder
= tr
;
99 declare_transcoder(const char *to
, const char *from
, const char *lib
)
101 transcoder_entry_t
*entry
;
103 entry
= make_transcoder_entry(from
, to
);
107 #define MAX_TRANSCODER_LIBNAME_LEN 64
108 static const char transcoder_lib_prefix
[] = "enc/trans/";
111 rb_declare_transcoder(const char *enc1
, const char *enc2
, const char *lib
)
113 if (!lib
|| strlen(lib
) > MAX_TRANSCODER_LIBNAME_LEN
) {
114 rb_raise(rb_eArgError
, "invalid library name - %s",
115 lib
? lib
: "(null)");
117 declare_transcoder(enc1
, enc2
, lib
);
118 declare_transcoder(enc2
, enc1
, lib
);
121 #define encoding_equal(enc1, enc2) (STRCASECMP(enc1, enc2) == 0)
123 typedef struct search_path_queue_tag
{
124 struct search_path_queue_tag
*next
;
126 } search_path_queue_t
;
130 search_path_queue_t
*queue
;
131 search_path_queue_t
**queue_last_ptr
;
132 const char *base_enc
;
136 transcode_search_path_i(st_data_t key
, st_data_t val
, st_data_t arg
)
138 const char *to
= (const char *)key
;
139 search_path_bfs_t
*bfs
= (search_path_bfs_t
*)arg
;
140 search_path_queue_t
*q
;
142 if (st_lookup(bfs
->visited
, (st_data_t
)to
, &val
)) {
146 q
= ALLOC(search_path_queue_t
);
149 *bfs
->queue_last_ptr
= q
;
150 bfs
->queue_last_ptr
= &q
->next
;
152 st_add_direct(bfs
->visited
, (st_data_t
)to
, (st_data_t
)bfs
->base_enc
);
157 transcode_search_path(const char *from
, const char *to
,
158 void (*callback
)(const char *from
, const char *to
, int depth
, void *arg
),
161 search_path_bfs_t bfs
;
162 search_path_queue_t
*q
;
168 if (encoding_equal(from
, to
))
171 q
= ALLOC(search_path_queue_t
);
174 bfs
.queue_last_ptr
= &q
->next
;
177 bfs
.visited
= st_init_strcasetable();
178 st_add_direct(bfs
.visited
, (st_data_t
)from
, (st_data_t
)NULL
);
184 bfs
.queue_last_ptr
= &bfs
.queue
;
186 if (!st_lookup(transcoder_table
, (st_data_t
)q
->enc
, &val
)) {
190 table2
= (st_table
*)val
;
192 if (st_lookup(table2
, (st_data_t
)to
, &val
)) {
193 st_add_direct(bfs
.visited
, (st_data_t
)to
, (st_data_t
)q
->enc
);
199 bfs
.base_enc
= q
->enc
;
200 st_foreach(table2
, transcode_search_path_i
, (st_data_t
)&bfs
);
215 const char *enc
= to
;
219 st_lookup(bfs
.visited
, (st_data_t
)enc
, &val
);
223 enc
= (const char *)val
;
228 st_lookup(bfs
.visited
, (st_data_t
)enc
, &val
);
231 callback((const char *)val
, enc
, --depth
, arg
);
232 enc
= (const char *)val
;
236 st_free_table(bfs
.visited
);
244 static const rb_transcoder
*
245 load_transcoder_entry(transcoder_entry_t
*entry
)
247 if (entry
->transcoder
)
248 return entry
->transcoder
;
251 const char *lib
= entry
->lib
;
252 int len
= strlen(lib
);
253 char path
[sizeof(transcoder_lib_prefix
) + MAX_TRANSCODER_LIBNAME_LEN
];
257 if (len
> MAX_TRANSCODER_LIBNAME_LEN
)
259 memcpy(path
, transcoder_lib_prefix
, sizeof(transcoder_lib_prefix
) - 1);
260 memcpy(path
+ sizeof(transcoder_lib_prefix
) - 1, lib
, len
+ 1);
261 if (!rb_require(path
))
265 if (entry
->transcoder
)
266 return entry
->transcoder
;
272 get_replacement_character(rb_encoding
*enc
, int *len_ret
, const char **repl_enc_ptr
)
274 static rb_encoding
*utf16be_encoding
, *utf16le_encoding
;
275 static rb_encoding
*utf32be_encoding
, *utf32le_encoding
;
276 if (!utf16be_encoding
) {
277 utf16be_encoding
= rb_enc_find("UTF-16BE");
278 utf16le_encoding
= rb_enc_find("UTF-16LE");
279 utf32be_encoding
= rb_enc_find("UTF-32BE");
280 utf32le_encoding
= rb_enc_find("UTF-32LE");
282 if (rb_utf8_encoding() == enc
) {
284 *repl_enc_ptr
= "UTF-8";
285 return "\xEF\xBF\xBD";
287 else if (utf16be_encoding
== enc
) {
289 *repl_enc_ptr
= "UTF-16BE";
292 else if (utf16le_encoding
== enc
) {
294 *repl_enc_ptr
= "UTF-16LE";
297 else if (utf32be_encoding
== enc
) {
299 *repl_enc_ptr
= "UTF-32BE";
300 return "\x00\x00\xFF\xFD";
302 else if (utf32le_encoding
== enc
) {
304 *repl_enc_ptr
= "UTF-32LE";
305 return "\xFD\xFF\x00\x00";
309 *repl_enc_ptr
= "US-ASCII";
315 * Transcoding engine logic
318 static const unsigned char *
319 transcode_char_start(rb_transcoding
*tc
,
320 const unsigned char *in_start
,
321 const unsigned char *inchar_start
,
322 const unsigned char *in_p
,
323 size_t *char_len_ptr
)
325 const unsigned char *ptr
;
326 if (inchar_start
- in_start
< tc
->recognized_len
) {
327 MEMCPY(TRANSCODING_READBUF(tc
) + tc
->recognized_len
,
328 inchar_start
, unsigned char, in_p
- inchar_start
);
329 ptr
= TRANSCODING_READBUF(tc
);
332 ptr
= inchar_start
- tc
->recognized_len
;
334 *char_len_ptr
= tc
->recognized_len
+ (in_p
- inchar_start
);
338 static rb_econv_result_t
339 transcode_restartable0(const unsigned char **in_pos
, unsigned char **out_pos
,
340 const unsigned char *in_stop
, unsigned char *out_stop
,
345 const rb_transcoder
*tr
= tc
->transcoder
;
346 int unitlen
= tr
->input_unit_length
;
347 int readagain_len
= 0;
349 const unsigned char *inchar_start
;
350 const unsigned char *in_p
;
352 unsigned char *out_p
;
354 unsigned char empty_buf
;
355 unsigned char *empty_ptr
= &empty_buf
;
358 in_pos
= (const unsigned char **)&empty_ptr
;
363 out_pos
= &empty_ptr
;
364 out_stop
= empty_ptr
;
367 in_p
= inchar_start
= *in_pos
;
371 #define SUSPEND(ret, num) \
373 tc->resume_position = (num); \
374 if (0 < in_p - inchar_start) \
375 MEMMOVE(TRANSCODING_READBUF(tc)+tc->recognized_len, \
376 inchar_start, unsigned char, in_p - inchar_start); \
379 tc->recognized_len += in_p - inchar_start; \
380 if (readagain_len) { \
381 tc->recognized_len -= readagain_len; \
382 tc->readagain_len = readagain_len; \
385 resume_label ## num:; \
387 #define SUSPEND_OBUF(num) \
389 while (out_stop - out_p < 1) { SUSPEND(econv_destination_buffer_full, num); } \
392 #define SUSPEND_OUTPUT_FOLLOWED_BY_INPUT(num) \
393 if ((opt & ECONV_OUTPUT_FOLLOWED_BY_INPUT) && *out_pos != out_p) { \
394 SUSPEND(econv_output_followed_by_input, num); \
397 #define next_table (tc->next_table)
398 #define next_info (tc->next_info)
399 #define next_byte (tc->next_byte)
400 #define writebuf_len (tc->writebuf_len)
401 #define writebuf_off (tc->writebuf_off)
403 switch (tc
->resume_position
) {
405 case 1: goto resume_label1
;
406 case 2: goto resume_label2
;
407 case 3: goto resume_label3
;
408 case 4: goto resume_label4
;
409 case 5: goto resume_label5
;
410 case 6: goto resume_label6
;
411 case 7: goto resume_label7
;
412 case 8: goto resume_label8
;
413 case 9: goto resume_label9
;
414 case 10: goto resume_label10
;
415 case 11: goto resume_label11
;
416 case 12: goto resume_label12
;
417 case 13: goto resume_label13
;
418 case 14: goto resume_label14
;
419 case 15: goto resume_label15
;
420 case 16: goto resume_label16
;
421 case 17: goto resume_label17
;
422 case 18: goto resume_label18
;
423 case 19: goto resume_label19
;
424 case 20: goto resume_label20
;
425 case 21: goto resume_label21
;
426 case 22: goto resume_label22
;
427 case 23: goto resume_label23
;
428 case 24: goto resume_label24
;
429 case 25: goto resume_label25
;
430 case 26: goto resume_label26
;
435 tc
->recognized_len
= 0;
436 next_table
= tr
->conv_tree_start
;
438 SUSPEND_OUTPUT_FOLLOWED_BY_INPUT(24);
440 if (in_stop
<= in_p
) {
441 if (!(opt
& ECONV_PARTIAL_INPUT
))
443 SUSPEND(econv_source_buffer_empty
, 7);
447 next_byte
= (unsigned char)*in_p
++;
449 if (next_byte
< next_table
->base
[0] || next_table
->base
[1] < next_byte
)
452 unsigned int next_offset
= next_table
->base
[2+next_byte
-next_table
->base
[0]];
453 next_info
= (VALUE
)next_table
->info
[next_offset
];
456 switch (next_info
& 0x1F) {
457 case NOMAP
: /* xxx: copy last byte only? */
458 SUSPEND_OBUF(3); *out_p
++ = next_byte
;
460 case 0x00: case 0x04: case 0x08: case 0x0C:
461 case 0x10: case 0x14: case 0x18: case 0x1C:
462 SUSPEND_OUTPUT_FOLLOWED_BY_INPUT(25);
463 while (in_p
>= in_stop
) {
464 if (!(opt
& ECONV_PARTIAL_INPUT
))
466 SUSPEND(econv_source_buffer_empty
, 5);
468 next_byte
= (unsigned char)*in_p
++;
469 next_table
= (const BYTE_LOOKUP
*)next_info
;
471 case ZERObt
: /* drop input */
474 SUSPEND_OBUF(9); *out_p
++ = getBT1(next_info
);
477 SUSPEND_OBUF(10); *out_p
++ = getBT1(next_info
);
478 SUSPEND_OBUF(21); *out_p
++ = getBT2(next_info
);
481 SUSPEND_OBUF(11); *out_p
++ = getBT1(next_info
);
482 SUSPEND_OBUF(15); *out_p
++ = getBT2(next_info
);
483 SUSPEND_OBUF(16); *out_p
++ = getBT3(next_info
);
486 SUSPEND_OBUF(12); *out_p
++ = getBT0(next_info
);
487 SUSPEND_OBUF(17); *out_p
++ = getBT1(next_info
);
488 SUSPEND_OBUF(18); *out_p
++ = getBT2(next_info
);
489 SUSPEND_OBUF(19); *out_p
++ = getBT3(next_info
);
492 next_info
= (VALUE
)(*tr
->func_ii
)(tc
, next_info
);
496 const unsigned char *char_start
;
498 char_start
= transcode_char_start(tc
, *in_pos
, inchar_start
, in_p
, &char_len
);
499 next_info
= (VALUE
)(*tr
->func_si
)(tc
, char_start
, (size_t)char_len
);
504 if (tr
->max_output
<= out_stop
- out_p
)
505 out_p
+= (VALUE
)(*tr
->func_io
)(tc
, next_info
, out_p
);
507 writebuf_len
= (VALUE
)(*tr
->func_io
)(tc
, next_info
, TRANSCODING_WRITEBUF(tc
));
509 while (writebuf_off
< writebuf_len
) {
511 *out_p
++ = TRANSCODING_WRITEBUF(tc
)[writebuf_off
++];
517 const unsigned char *char_start
;
520 if (tr
->max_output
<= out_stop
- out_p
) {
521 char_start
= transcode_char_start(tc
, *in_pos
, inchar_start
, in_p
, &char_len
);
522 out_p
+= (VALUE
)(*tr
->func_so
)(tc
, char_start
, (size_t)char_len
, out_p
);
525 char_start
= transcode_char_start(tc
, *in_pos
, inchar_start
, in_p
, &char_len
);
526 writebuf_len
= (VALUE
)(*tr
->func_so
)(tc
, char_start
, (size_t)char_len
, TRANSCODING_WRITEBUF(tc
));
528 while (writebuf_off
< writebuf_len
) {
530 *out_p
++ = TRANSCODING_WRITEBUF(tc
)[writebuf_off
++];
536 if (tc
->recognized_len
+ (in_p
- inchar_start
) <= unitlen
) {
537 if (tc
->recognized_len
+ (in_p
- inchar_start
) < unitlen
)
538 SUSPEND_OUTPUT_FOLLOWED_BY_INPUT(26);
539 while ((opt
& ECONV_PARTIAL_INPUT
) && tc
->recognized_len
+ (in_stop
- inchar_start
) < unitlen
) {
541 SUSPEND(econv_source_buffer_empty
, 8);
543 if (tc
->recognized_len
+ (in_stop
- inchar_start
) <= unitlen
) {
547 in_p
= inchar_start
+ (unitlen
- tc
->recognized_len
);
551 int invalid_len
; /* including the last byte which causes invalid */
553 invalid_len
= tc
->recognized_len
+ (in_p
- inchar_start
);
554 discard_len
= ((invalid_len
- 1) / unitlen
) * unitlen
;
555 readagain_len
= invalid_len
- discard_len
;
564 SUSPEND(econv_invalid_byte_sequence
, 1);
568 SUSPEND(econv_undefined_conversion
, 2);
573 if (tr
->finish_func
) {
575 if (tr
->max_output
<= out_stop
- out_p
) {
576 out_p
+= tr
->finish_func(tc
, out_p
);
579 writebuf_len
= tr
->finish_func(tc
, TRANSCODING_WRITEBUF(tc
));
581 while (writebuf_off
< writebuf_len
) {
583 *out_p
++ = TRANSCODING_WRITEBUF(tc
)[writebuf_off
++];
588 SUSPEND(econv_finished
, 6);
597 static rb_econv_result_t
598 transcode_restartable(const unsigned char **in_pos
, unsigned char **out_pos
,
599 const unsigned char *in_stop
, unsigned char *out_stop
,
603 if (tc
->readagain_len
) {
604 unsigned char *readagain_buf
= ALLOCA_N(unsigned char, tc
->readagain_len
);
605 const unsigned char *readagain_pos
= readagain_buf
;
606 const unsigned char *readagain_stop
= readagain_buf
+ tc
->readagain_len
;
607 rb_econv_result_t res
;
609 MEMCPY(readagain_buf
, TRANSCODING_READBUF(tc
) + tc
->recognized_len
,
610 unsigned char, tc
->readagain_len
);
611 tc
->readagain_len
= 0;
612 res
= transcode_restartable0(&readagain_pos
, out_pos
, readagain_stop
, out_stop
, tc
, opt
|ECONV_PARTIAL_INPUT
);
613 if (res
!= econv_source_buffer_empty
) {
614 MEMCPY(TRANSCODING_READBUF(tc
) + tc
->recognized_len
+ tc
->readagain_len
,
615 readagain_pos
, unsigned char, readagain_stop
- readagain_pos
);
616 tc
->readagain_len
+= readagain_stop
- readagain_pos
;
620 return transcode_restartable0(in_pos
, out_pos
, in_stop
, out_stop
, tc
, opt
);
623 static rb_transcoding
*
624 rb_transcoding_open_by_transcoder(const rb_transcoder
*tr
, int flags
)
628 tc
= ALLOC(rb_transcoding
);
631 memset(tc
->stateful
, 0, sizeof(tc
->stateful
));
632 tc
->resume_position
= 0;
633 tc
->recognized_len
= 0;
634 tc
->readagain_len
= 0;
635 tc
->writebuf_len
= 0;
636 tc
->writebuf_off
= 0;
637 if (sizeof(tc
->readbuf
.ary
) < tr
->max_input
) {
638 tc
->readbuf
.ptr
= xmalloc(tr
->max_input
);
640 if (sizeof(tc
->writebuf
.ary
) < tr
->max_output
) {
641 tc
->writebuf
.ptr
= xmalloc(tr
->max_output
);
646 static rb_econv_result_t
647 rb_transcoding_convert(rb_transcoding
*tc
,
648 const unsigned char **input_ptr
, const unsigned char *input_stop
,
649 unsigned char **output_ptr
, unsigned char *output_stop
,
652 return transcode_restartable(
653 input_ptr
, output_ptr
,
654 input_stop
, output_stop
,
659 rb_transcoding_close(rb_transcoding
*tc
)
661 const rb_transcoder
*tr
= tc
->transcoder
;
662 if (sizeof(tc
->readbuf
.ary
) < tr
->max_input
)
663 xfree(tc
->readbuf
.ptr
);
664 if (sizeof(tc
->writebuf
.ary
) < tr
->max_output
)
665 xfree(tc
->writebuf
.ptr
);
670 rb_econv_open_by_transcoder_entries(int n
, transcoder_entry_t
**entries
)
675 for (i
= 0; i
< n
; i
++) {
676 const rb_transcoder
*tr
;
677 tr
= load_transcoder_entry(entries
[i
]);
682 ec
= ALLOC(rb_econv_t
);
683 ec
->source_encoding_name
= NULL
;
684 ec
->destination_encoding_name
= NULL
;
685 ec
->in_buf_start
= NULL
;
686 ec
->in_data_start
= NULL
;
687 ec
->in_data_end
= NULL
;
688 ec
->in_buf_end
= NULL
;
690 ec
->elems
= ALLOC_N(rb_econv_elem_t
, ec
->num_trans
);
691 ec
->num_finished
= 0;
693 ec
->last_trans_index
= -1;
694 ec
->source_encoding
= NULL
;
695 ec
->destination_encoding
= NULL
;
696 for (i
= 0; i
< ec
->num_trans
; i
++) {
697 const rb_transcoder
*tr
= load_transcoder_entry(entries
[i
]);
698 ec
->elems
[i
].tc
= rb_transcoding_open_by_transcoder(tr
, 0);
699 ec
->elems
[i
].out_buf_start
= NULL
;
700 ec
->elems
[i
].out_data_start
= NULL
;
701 ec
->elems
[i
].out_data_end
= NULL
;
702 ec
->elems
[i
].out_buf_end
= NULL
;
703 ec
->elems
[i
].last_result
= econv_source_buffer_empty
;
705 ec
->last_tc
= ec
->elems
[ec
->num_trans
-1].tc
;
706 ec
->last_trans_index
= ec
->num_trans
-1;
708 for (i
= 0; i
< ec
->num_trans
-1; i
++) {
711 p
= xmalloc(bufsize
);
712 ec
->elems
[i
].out_buf_start
= p
;
713 ec
->elems
[i
].out_buf_end
= p
+ bufsize
;
714 ec
->elems
[i
].out_data_start
= p
;
715 ec
->elems
[i
].out_data_end
= p
;
722 trans_open_i(const char *from
, const char *to
, int depth
, void *arg
)
724 transcoder_entry_t
***entries_ptr
= arg
;
725 transcoder_entry_t
**entries
;
728 entries
= ALLOC_N(transcoder_entry_t
*, depth
+1+2);
729 *entries_ptr
= entries
;
732 entries
= *entries_ptr
;
734 entries
[depth
] = get_transcoder_entry(from
, to
);
738 rb_econv_open(const char *from
, const char *to
, int flags
)
740 transcoder_entry_t
**entries
= NULL
;
742 static rb_econv_t
*ec
;
744 num_trans
= transcode_search_path(from
, to
, trans_open_i
, (void *)&entries
);
746 if (num_trans
< 0 || !entries
)
749 if (flags
& (ECONV_CRLF_NEWLINE_ENCODER
|ECONV_CR_NEWLINE_ENCODER
)) {
750 const char *name
= (flags
& ECONV_CRLF_NEWLINE_ENCODER
) ? "crlf_newline" : "cr_newline";
751 transcoder_entry_t
*e
= get_transcoder_entry("", name
);
754 MEMMOVE(entries
+1, entries
, transcoder_entry_t
*, num_trans
);
759 if (flags
& ECONV_UNIVERSAL_NEWLINE_DECODER
) {
760 transcoder_entry_t
*e
= get_transcoder_entry("universal_newline", "");
763 entries
[num_trans
++] = e
;
766 ec
= rb_econv_open_by_transcoder_entries(num_trans
, entries
);
768 rb_raise(rb_eArgError
, "encoding conversion not supported (from %s to %s)", from
, to
);
770 ec
->source_encoding_name
= from
;
771 ec
->destination_encoding_name
= to
;
773 if (flags
& ECONV_UNIVERSAL_NEWLINE_DECODER
) {
774 ec
->last_tc
= ec
->elems
[ec
->num_trans
-2].tc
;
775 ec
->last_trans_index
= ec
->num_trans
-2;
782 trans_sweep(rb_econv_t
*ec
,
783 const unsigned char **input_ptr
, const unsigned char *input_stop
,
784 unsigned char **output_ptr
, unsigned char *output_stop
,
791 const unsigned char **ipp
, *is
, *iold
;
792 unsigned char **opp
, *os
, *oold
;
793 rb_econv_result_t res
;
798 for (i
= start
; i
< ec
->num_trans
; i
++) {
799 rb_econv_elem_t
*te
= &ec
->elems
[i
];
806 rb_econv_elem_t
*prev_te
= &ec
->elems
[i
-1];
807 ipp
= (const unsigned char **)&prev_te
->out_data_start
;
808 is
= prev_te
->out_data_end
;
811 if (i
== ec
->num_trans
-1) {
816 if (te
->out_buf_start
!= te
->out_data_start
) {
817 int len
= te
->out_data_end
- te
->out_data_start
;
818 int off
= te
->out_data_start
- te
->out_buf_start
;
819 MEMMOVE(te
->out_buf_start
, te
->out_data_start
, unsigned char, len
);
820 te
->out_data_start
= te
->out_buf_start
;
821 te
->out_data_end
-= off
;
823 opp
= &te
->out_data_end
;
824 os
= te
->out_buf_end
;
828 if (ec
->num_finished
!= i
)
829 f
|= ECONV_PARTIAL_INPUT
;
830 if (i
== 0 && (flags
& ECONV_OUTPUT_FOLLOWED_BY_INPUT
)) {
832 flags
&= ~ECONV_OUTPUT_FOLLOWED_BY_INPUT
;
835 f
&= ~ECONV_OUTPUT_FOLLOWED_BY_INPUT
;
838 te
->last_result
= res
= rb_transcoding_convert(te
->tc
, ipp
, is
, opp
, os
, f
);
839 if (iold
!= *ipp
|| oold
!= *opp
)
843 case econv_invalid_byte_sequence
:
844 case econv_undefined_conversion
:
845 case econv_output_followed_by_input
:
848 case econv_destination_buffer_full
:
849 case econv_source_buffer_empty
:
853 ec
->num_finished
= i
+1;
861 static rb_econv_result_t
862 rb_trans_conv(rb_econv_t
*ec
,
863 const unsigned char **input_ptr
, const unsigned char *input_stop
,
864 unsigned char **output_ptr
, unsigned char *output_stop
,
866 int *result_position_ptr
)
869 int needreport_index
;
872 unsigned char empty_buf
;
873 unsigned char *empty_ptr
= &empty_buf
;
876 input_ptr
= (const unsigned char **)&empty_ptr
;
877 input_stop
= empty_ptr
;
881 output_ptr
= &empty_ptr
;
882 output_stop
= empty_ptr
;
885 if (ec
->elems
[0].last_result
== econv_output_followed_by_input
)
886 ec
->elems
[0].last_result
= econv_source_buffer_empty
;
888 needreport_index
= -1;
889 for (i
= ec
->num_trans
-1; 0 <= i
; i
--) {
890 switch (ec
->elems
[i
].last_result
) {
891 case econv_invalid_byte_sequence
:
892 case econv_undefined_conversion
:
893 case econv_output_followed_by_input
:
896 needreport_index
= i
;
897 goto found_needreport
;
899 case econv_destination_buffer_full
:
900 case econv_source_buffer_empty
:
904 rb_bug("unexpected transcode last result");
908 /* /^[sd]+$/ is confirmed. but actually /^s*d*$/. */
910 if (ec
->elems
[ec
->num_trans
-1].last_result
== econv_destination_buffer_full
&&
911 (flags
& ECONV_OUTPUT_FOLLOWED_BY_INPUT
)) {
912 rb_econv_result_t res
;
914 res
= rb_trans_conv(ec
, NULL
, NULL
, output_ptr
, output_stop
,
915 (flags
& ~ECONV_OUTPUT_FOLLOWED_BY_INPUT
)|ECONV_PARTIAL_INPUT
,
916 result_position_ptr
);
918 if (res
== econv_source_buffer_empty
)
919 return econv_output_followed_by_input
;
928 needreport_index
= trans_sweep(ec
, input_ptr
, input_stop
, output_ptr
, output_stop
, flags
, sweep_start
);
929 sweep_start
= needreport_index
+ 1;
930 } while (needreport_index
!= -1 && needreport_index
!= ec
->num_trans
-1);
932 for (i
= ec
->num_trans
-1; 0 <= i
; i
--) {
933 if (ec
->elems
[i
].last_result
!= econv_source_buffer_empty
) {
934 rb_econv_result_t res
= ec
->elems
[i
].last_result
;
935 if (res
== econv_invalid_byte_sequence
||
936 res
== econv_undefined_conversion
||
937 res
== econv_output_followed_by_input
) {
938 ec
->elems
[i
].last_result
= econv_source_buffer_empty
;
940 if (result_position_ptr
)
941 *result_position_ptr
= i
;
945 if (result_position_ptr
)
946 *result_position_ptr
= -1;
947 return econv_source_buffer_empty
;
951 rb_econv_convert(rb_econv_t
*ec
,
952 const unsigned char **input_ptr
, const unsigned char *input_stop
,
953 unsigned char **output_ptr
, unsigned char *output_stop
,
956 rb_econv_result_t res
;
960 memset(&ec
->last_error
, 0, sizeof(ec
->last_error
));
962 if (ec
->elems
[ec
->num_trans
-1].out_data_start
) {
963 unsigned char *data_start
= ec
->elems
[ec
->num_trans
-1].out_data_start
;
964 unsigned char *data_end
= ec
->elems
[ec
->num_trans
-1].out_data_end
;
965 if (data_start
!= data_end
) {
967 if (output_stop
- *output_ptr
< data_end
- data_start
) {
968 len
= output_stop
- *output_ptr
;
969 memcpy(*output_ptr
, data_start
, len
);
970 *output_ptr
= output_stop
;
971 ec
->elems
[ec
->num_trans
-1].out_data_start
+= len
;
972 res
= econv_destination_buffer_full
;
975 len
= data_end
- data_start
;
976 memcpy(*output_ptr
, data_start
, len
);
978 ec
->elems
[ec
->num_trans
-1].out_data_start
=
979 ec
->elems
[ec
->num_trans
-1].out_data_end
=
980 ec
->elems
[ec
->num_trans
-1].out_buf_start
;
985 if (ec
->in_buf_start
&&
986 ec
->in_data_start
!= ec
->in_data_end
) {
987 res
= rb_trans_conv(ec
, (const unsigned char **)&ec
->in_data_start
, ec
->in_data_end
, output_ptr
, output_stop
,
988 (flags
&~ECONV_OUTPUT_FOLLOWED_BY_INPUT
)|ECONV_PARTIAL_INPUT
, &result_position
);
989 if (res
!= econv_source_buffer_empty
)
994 (flags
& ECONV_OUTPUT_FOLLOWED_BY_INPUT
) &&
995 *input_ptr
!= input_stop
) {
996 input_stop
= *input_ptr
;
997 res
= rb_trans_conv(ec
, input_ptr
, input_stop
, output_ptr
, output_stop
, flags
, &result_position
);
998 if (res
== econv_source_buffer_empty
)
999 res
= econv_output_followed_by_input
;
1001 else if ((flags
& ECONV_OUTPUT_FOLLOWED_BY_INPUT
) ||
1002 ec
->num_trans
== 1) {
1003 res
= rb_trans_conv(ec
, input_ptr
, input_stop
, output_ptr
, output_stop
, flags
, &result_position
);
1006 flags
|= ECONV_OUTPUT_FOLLOWED_BY_INPUT
;
1008 res
= rb_trans_conv(ec
, input_ptr
, input_stop
, output_ptr
, output_stop
, flags
, &result_position
);
1009 } while (res
== econv_output_followed_by_input
);
1013 ec
->last_error
.result
= res
;
1014 ec
->last_error
.partial_input
= flags
& ECONV_PARTIAL_INPUT
;
1015 if (res
== econv_invalid_byte_sequence
||
1016 res
== econv_undefined_conversion
) {
1017 rb_transcoding
*error_tc
= ec
->elems
[result_position
].tc
;
1018 ec
->last_error
.error_tc
= error_tc
;
1019 ec
->last_error
.source_encoding
= error_tc
->transcoder
->from_encoding
;
1020 ec
->last_error
.destination_encoding
= error_tc
->transcoder
->to_encoding
;
1021 ec
->last_error
.error_bytes_start
= TRANSCODING_READBUF(error_tc
);
1022 ec
->last_error
.error_bytes_len
= error_tc
->recognized_len
;
1023 ec
->last_error
.readagain_len
= error_tc
->readagain_len
;
1030 rb_econv_encoding_to_insert_output(rb_econv_t
*ec
)
1032 rb_transcoding
*tc
= ec
->last_tc
;
1033 const rb_transcoder
*tr
= tc
->transcoder
;
1035 if (tr
->stateful_type
== stateful_encoder
)
1036 return tr
->from_encoding
;
1037 return tr
->to_encoding
;
1040 static unsigned char *
1041 allocate_converted_string(const char *str_encoding
, const char *insert_encoding
,
1042 const unsigned char *str
, size_t len
,
1043 size_t *dst_len_ptr
)
1045 unsigned char *dst_str
;
1047 size_t dst_bufsize
= len
;
1050 rb_econv_result_t res
;
1052 const unsigned char *sp
;
1055 if (dst_bufsize
== 0)
1058 ec
= rb_econv_open(str_encoding
, insert_encoding
, 0);
1061 dst_str
= xmalloc(dst_bufsize
);
1064 dp
= dst_str
+dst_len
;
1065 res
= rb_econv_convert(ec
, &sp
, str
+len
, &dp
, dst_str
+dst_bufsize
, 0);
1066 dst_len
= dp
- dst_str
;
1067 while (res
== econv_destination_buffer_full
) {
1068 if (dst_bufsize
* 2 < dst_bufsize
) {
1074 dst_str
= xrealloc(dst_str
, dst_bufsize
);
1075 dp
= dst_str
+dst_len
;
1076 res
= rb_econv_convert(ec
, &sp
, str
+len
, &dp
, dst_str
+dst_bufsize
, 0);
1077 dst_len
= dp
- dst_str
;
1079 if (res
!= econv_finished
) {
1085 *dst_len_ptr
= dst_len
;
1089 /* result: 0:success -1:failure */
1091 rb_econv_insert_output(rb_econv_t
*ec
,
1092 const unsigned char *str
, size_t len
, const char *str_encoding
)
1094 const char *insert_encoding
= rb_econv_encoding_to_insert_output(ec
);
1095 const unsigned char *insert_str
;
1099 const rb_transcoder
*tr
;
1101 unsigned char **buf_start_p
;
1102 unsigned char **data_start_p
;
1103 unsigned char **data_end_p
;
1104 unsigned char **buf_end_p
;
1111 if (encoding_equal(insert_encoding
, str_encoding
)) {
1116 insert_str
= allocate_converted_string(str_encoding
, insert_encoding
, str
, len
, &insert_len
);
1117 if (insert_str
== NULL
)
1122 tr
= tc
->transcoder
;
1125 if (tr
->stateful_type
== stateful_encoder
) {
1126 need
+= tc
->readagain_len
;
1127 if (need
< insert_len
)
1129 if (ec
->last_trans_index
== 0) {
1130 buf_start_p
= &ec
->in_buf_start
;
1131 data_start_p
= &ec
->in_data_start
;
1132 data_end_p
= &ec
->in_data_end
;
1133 buf_end_p
= &ec
->in_buf_end
;
1136 rb_econv_elem_t
*ee
= &ec
->elems
[ec
->last_trans_index
-1];
1137 buf_start_p
= &ee
->out_buf_start
;
1138 data_start_p
= &ee
->out_data_start
;
1139 data_end_p
= &ee
->out_data_end
;
1140 buf_end_p
= &ee
->out_buf_end
;
1144 rb_econv_elem_t
*ee
= &ec
->elems
[ec
->last_trans_index
];
1145 buf_start_p
= &ee
->out_buf_start
;
1146 data_start_p
= &ee
->out_data_start
;
1147 data_end_p
= &ee
->out_data_end
;
1148 buf_end_p
= &ee
->out_buf_end
;
1151 if (*buf_start_p
== NULL
) {
1152 unsigned char *buf
= xmalloc(need
);
1154 *data_start_p
= buf
;
1156 *buf_end_p
= buf
+need
;
1158 else if (*buf_end_p
- *data_end_p
< need
) {
1159 MEMMOVE(*buf_start_p
, *data_start_p
, unsigned char, *data_end_p
- *data_start_p
);
1160 *data_end_p
= *buf_start_p
+ (*data_end_p
- *data_start_p
);
1161 *data_start_p
= *buf_start_p
;
1162 if (*buf_end_p
- *data_end_p
< need
) {
1164 size_t s
= (*data_end_p
- *buf_start_p
) + need
;
1167 buf
= xrealloc(*buf_start_p
, s
);
1168 *data_start_p
= buf
;
1169 *data_end_p
= buf
+ (*data_end_p
- *buf_start_p
);
1171 *buf_end_p
= buf
+ s
;
1175 if (tr
->stateful_type
== stateful_encoder
) {
1176 memcpy(*data_end_p
, TRANSCODING_READBUF(tc
)+tc
->recognized_len
, tc
->readagain_len
);
1177 *data_end_p
+= tc
->readagain_len
;
1178 tc
->readagain_len
= 0;
1180 memcpy(*data_end_p
, insert_str
, insert_len
);
1181 *data_end_p
+= insert_len
;
1183 if (insert_str
!= str
)
1184 xfree((void*)insert_str
);
1188 if (insert_str
!= str
)
1189 xfree((void*)insert_str
);
1194 rb_econv_close(rb_econv_t
*ec
)
1198 for (i
= 0; i
< ec
->num_trans
; i
++) {
1199 rb_transcoding_close(ec
->elems
[i
].tc
);
1200 if (ec
->elems
[i
].out_buf_start
)
1201 xfree(ec
->elems
[i
].out_buf_start
);
1209 rb_econv_putbackable(rb_econv_t
*ec
)
1211 return ec
->elems
[0].tc
->readagain_len
;
1215 rb_econv_putback(rb_econv_t
*ec
, unsigned char *p
, int n
)
1217 rb_transcoding
*tc
= ec
->elems
[0].tc
;
1218 memcpy(p
, TRANSCODING_READBUF(tc
) + tc
->recognized_len
, n
);
1219 tc
->readagain_len
-= n
;
1222 struct stateless_encoding_t
{
1223 const char *stateless_enc
;
1224 const char *stateful_enc
;
1228 stateless_encoding_i(st_data_t key
, st_data_t val
, st_data_t arg
)
1230 struct stateless_encoding_t
*data
= (struct stateless_encoding_t
*)arg
;
1231 st_table
*table2
= (st_table
*)val
;
1234 if (st_lookup(table2
, (st_data_t
)data
->stateful_enc
, &v
)) {
1235 transcoder_entry_t
*entry
= (transcoder_entry_t
*)v
;
1236 const rb_transcoder
*tr
= load_transcoder_entry(entry
);
1237 if (tr
&& tr
->stateful_type
== stateful_encoder
) {
1238 data
->stateless_enc
= tr
->from_encoding
;
1246 rb_econv_stateless_encoding(const char *stateful_enc
)
1248 struct stateless_encoding_t data
;
1249 data
.stateful_enc
= stateful_enc
;
1250 data
.stateless_enc
= NULL
;
1251 st_foreach(transcoder_table
, stateless_encoding_i
, (st_data_t
)&data
);
1252 if (data
.stateless_enc
)
1253 return data
.stateless_enc
;
1258 rb_econv_string(rb_econv_t
*ec
, VALUE src
, long off
, long len
, VALUE dst
, int flags
)
1260 unsigned const char *ss
, *sp
, *se
;
1261 unsigned char *ds
, *dp
, *de
;
1262 rb_econv_result_t res
;
1265 dst
= rb_str_buf_new(len
);
1268 res
= econv_destination_buffer_full
;
1269 while (res
== econv_destination_buffer_full
) {
1270 long dlen
= RSTRING_LEN(dst
);
1271 int max_output
= ec
->last_tc
->transcoder
->max_output
;
1272 if (rb_str_capacity(dst
) - dlen
< (size_t)len
+ max_output
) {
1273 unsigned long new_capa
= (unsigned long)dlen
+ len
+ max_output
;
1274 if (LONG_MAX
< new_capa
)
1275 rb_raise(rb_eArgError
, "too long string");
1276 rb_str_resize(dst
, new_capa
);
1277 rb_str_set_len(dst
, dlen
);
1279 ss
= sp
= (const unsigned char *)RSTRING_PTR(src
) + off
;
1281 ds
= dp
= (unsigned char *)RSTRING_PTR(dst
) + dlen
;
1282 de
= ds
+ rb_str_capacity(dst
);
1283 res
= rb_econv_convert(ec
, &sp
, se
, &dp
, de
, flags
);
1286 rb_str_set_len(dst
, dlen
+ (dp
- ds
));
1287 rb_econv_check_error(ec
);
1295 make_econv_exception(rb_econv_t
*ec
)
1298 if (ec
->last_error
.result
== econv_invalid_byte_sequence
) {
1299 VALUE bytes
= rb_str_new((const char *)ec
->last_error
.error_bytes_start
,
1300 ec
->last_error
.error_bytes_len
);
1302 dumped
= rb_str_dump(bytes
);
1303 mesg
= rb_sprintf("invalid byte sequence: %s on %s",
1304 StringValueCStr(dumped
),
1305 ec
->last_error
.source_encoding
);
1306 exc
= rb_exc_new3(rb_eInvalidByteSequence
, mesg
);
1307 rb_ivar_set(exc
, rb_intern("source_encoding"), rb_str_new2(ec
->last_error
.source_encoding
));
1308 rb_ivar_set(exc
, rb_intern("destination_encoding"), rb_str_new2(ec
->last_error
.destination_encoding
));
1309 rb_ivar_set(exc
, rb_intern("error_bytes"), bytes
);
1312 if (ec
->last_error
.result
== econv_undefined_conversion
) {
1313 VALUE bytes
= rb_str_new((const char *)ec
->last_error
.error_bytes_start
,
1314 ec
->last_error
.error_bytes_len
);
1317 dumped
= rb_str_dump(bytes
);
1318 mesg
= rb_sprintf("conversion undefined: %s from %s to %s",
1319 StringValueCStr(dumped
),
1320 ec
->last_error
.source_encoding
,
1321 ec
->last_error
.destination_encoding
);
1322 exc
= rb_exc_new3(rb_eConversionUndefined
, mesg
);
1323 idx
= rb_enc_find_index(ec
->last_error
.source_encoding
);
1324 rb_ivar_set(exc
, rb_intern("source_encoding"), rb_str_new2(ec
->last_error
.source_encoding
));
1325 rb_ivar_set(exc
, rb_intern("destination_encoding"), rb_str_new2(ec
->last_error
.destination_encoding
));
1326 idx
= rb_enc_find_index(ec
->last_error
.source_encoding
);
1328 rb_enc_associate_index(bytes
, idx
);
1329 rb_ivar_set(exc
, rb_intern("error_char"), bytes
);
1338 unsigned char *(*resize_destination
)(VALUE
, int, int),
1340 unsigned char **out_start_ptr
,
1341 unsigned char **out_pos
,
1342 unsigned char **out_stop_ptr
)
1344 size_t len
= (*out_pos
- *out_start_ptr
);
1345 size_t new_len
= (len
+ max_output
) * 2;
1346 *out_start_ptr
= resize_destination(destination
, len
, new_len
);
1347 *out_pos
= *out_start_ptr
+ len
;
1348 *out_stop_ptr
= *out_start_ptr
+ new_len
;
1352 output_replacement_character(rb_econv_t
*ec
)
1354 rb_transcoding
*tc
= ec
->last_tc
;
1355 const rb_transcoder
*tr
;
1357 const unsigned char *replacement
;
1358 const char *repl_enc
;
1362 tr
= tc
->transcoder
;
1363 enc
= rb_enc_find(tr
->to_encoding
);
1365 replacement
= (const unsigned char *)get_replacement_character(enc
, &len
, &repl_enc
);
1367 ret
= rb_econv_insert_output(ec
, replacement
, len
, repl_enc
);
1376 transcode_loop(const unsigned char **in_pos
, unsigned char **out_pos
,
1377 const unsigned char *in_stop
, unsigned char *out_stop
,
1379 unsigned char *(*resize_destination
)(VALUE
, int, int),
1380 const char *from_encoding
,
1381 const char *to_encoding
,
1385 rb_transcoding
*last_tc
;
1386 rb_econv_result_t ret
;
1387 unsigned char *out_start
= *out_pos
;
1391 ec
= rb_econv_open(from_encoding
, to_encoding
, 0);
1393 rb_raise(rb_eArgError
, "transcoding not supported (from %s to %s)", from_encoding
, to_encoding
);
1395 last_tc
= ec
->last_tc
;
1396 max_output
= last_tc
->transcoder
->max_output
;
1399 ret
= rb_econv_convert(ec
, in_pos
, in_stop
, out_pos
, out_stop
, opt
);
1400 if (ret
== econv_invalid_byte_sequence
) {
1401 /* deal with invalid byte sequence */
1402 /* todo: add more alternative behaviors */
1403 if (opt
&INVALID_IGNORE
) {
1406 else if (opt
&INVALID_REPLACE
) {
1407 if (output_replacement_character(ec
) == 0)
1410 exc
= make_econv_exception(ec
);
1414 if (ret
== econv_undefined_conversion
) {
1415 /* valid character in from encoding
1416 * but no related character(s) in to encoding */
1417 /* todo: add more alternative behaviors */
1418 if (opt
&UNDEF_IGNORE
) {
1421 else if (opt
&UNDEF_REPLACE
) {
1422 if (output_replacement_character(ec
) == 0)
1425 exc
= make_econv_exception(ec
);
1429 if (ret
== econv_destination_buffer_full
) {
1430 more_output_buffer(destination
, resize_destination
, max_output
, &out_start
, out_pos
, &out_stop
);
1438 /* sample transcode_loop implementation in byte-by-byte stream style */
1440 transcode_loop(const unsigned char **in_pos
, unsigned char **out_pos
,
1441 const unsigned char *in_stop
, unsigned char *out_stop
,
1443 unsigned char *(*resize_destination
)(VALUE
, int, int),
1444 const char *from_encoding
,
1445 const char *to_encoding
,
1449 rb_transcoding
*last_tc
;
1450 rb_econv_result_t ret
;
1451 unsigned char *out_start
= *out_pos
;
1452 const unsigned char *ptr
;
1456 ec
= rb_econv_open(from_encoding
, to_encoding
, 0);
1458 rb_raise(rb_eArgError
, "transcoding not supported (from %s to %s)", from_encoding
, to_encoding
);
1460 last_tc
= ec
->last_tc
;
1461 max_output
= ec
->elems
[ec
->num_trans
-1].tc
->transcoder
->max_output
;
1463 ret
= econv_source_buffer_empty
;
1465 while (ret
!= econv_finished
) {
1466 unsigned char input_byte
;
1467 const unsigned char *p
= &input_byte
;
1469 if (ret
== econv_source_buffer_empty
) {
1470 if (ptr
< in_stop
) {
1472 ret
= rb_econv_convert(ec
, &p
, p
+1, out_pos
, out_stop
, ECONV_PARTIAL_INPUT
);
1475 ret
= rb_econv_convert(ec
, NULL
, NULL
, out_pos
, out_stop
, 0);
1479 ret
= rb_econv_convert(ec
, NULL
, NULL
, out_pos
, out_stop
, ECONV_PARTIAL_INPUT
);
1481 if (&input_byte
!= p
)
1482 ptr
+= p
- &input_byte
;
1484 case econv_invalid_byte_sequence
:
1485 /* deal with invalid byte sequence */
1486 /* todo: add more alternative behaviors */
1487 if (opt
&INVALID_IGNORE
) {
1490 else if (opt
&INVALID_REPLACE
) {
1491 if (output_replacement_character(ec
) == 0)
1494 exc
= make_econv_exception(ec
);
1499 case econv_undefined_conversion
:
1500 /* valid character in from encoding
1501 * but no related character(s) in to encoding */
1502 /* todo: add more alternative behaviors */
1503 if (opt
&UNDEF_IGNORE
) {
1506 else if (opt
&UNDEF_REPLACE
) {
1507 if (output_replacement_character(ec
) == 0)
1510 exc
= make_econv_exception(ec
);
1515 case econv_destination_buffer_full
:
1516 more_output_buffer(destination
, resize_destination
, max_output
, &out_start
, out_pos
, &out_stop
);
1519 case econv_source_buffer_empty
:
1522 case econv_finished
:
1534 * String-specific code
1537 static unsigned char *
1538 str_transcoding_resize(VALUE destination
, int len
, int new_len
)
1540 rb_str_resize(destination
, new_len
);
1541 return (unsigned char *)RSTRING_PTR(destination
);
1545 str_transcode(int argc
, VALUE
*argv
, VALUE
*self
)
1550 unsigned char *buf
, *bp
, *sp
;
1551 const unsigned char *fromp
;
1552 rb_encoding
*from_enc
, *to_enc
;
1553 const char *from_e
, *to_e
;
1554 int from_encidx
, to_encidx
;
1555 VALUE from_encval
, to_encval
;
1559 opt
= rb_check_convert_type(argv
[argc
-1], T_HASH
, "Hash", "to_hash");
1564 v
= rb_hash_aref(opt
, sym_invalid
);
1567 else if (v
==sym_ignore
) {
1568 options
|= INVALID_IGNORE
;
1570 else if (v
==sym_replace
) {
1571 options
|= INVALID_REPLACE
;
1572 v
= rb_hash_aref(opt
, sym_replace
);
1575 rb_raise(rb_eArgError
, "unknown value for invalid character option");
1577 v
= rb_hash_aref(opt
, sym_undef
);
1580 else if (v
==sym_ignore
) {
1581 options
|= UNDEF_IGNORE
;
1583 else if (v
==sym_replace
) {
1584 options
|= UNDEF_REPLACE
;
1587 rb_raise(rb_eArgError
, "unknown value for undefined character option");
1590 if (argc
< 1 || argc
> 2) {
1591 rb_raise(rb_eArgError
, "wrong number of arguments (%d for 1..2)", argc
);
1593 if ((to_encidx
= rb_to_encoding_index(to_encval
= argv
[0])) < 0) {
1596 to_e
= StringValueCStr(to_encval
);
1599 to_enc
= rb_enc_from_index(to_encidx
);
1600 to_e
= rb_enc_name(to_enc
);
1603 from_encidx
= rb_enc_get_index(str
);
1604 from_enc
= rb_enc_from_index(from_encidx
);
1605 from_e
= rb_enc_name(from_enc
);
1607 else if ((from_encidx
= rb_to_encoding_index(from_encval
= argv
[1])) < 0) {
1609 from_e
= StringValueCStr(from_encval
);
1612 from_enc
= rb_enc_from_index(from_encidx
);
1613 from_e
= rb_enc_name(from_enc
);
1616 if (from_enc
&& from_enc
== to_enc
) {
1619 if (from_enc
&& to_enc
&& rb_enc_asciicompat(from_enc
) && rb_enc_asciicompat(to_enc
)) {
1620 if (ENC_CODERANGE(str
) == ENC_CODERANGE_7BIT
) {
1624 if (encoding_equal(from_e
, to_e
)) {
1628 fromp
= sp
= (unsigned char *)RSTRING_PTR(str
);
1629 slen
= RSTRING_LEN(str
);
1630 blen
= slen
+ 30; /* len + margin */
1631 dest
= rb_str_tmp_new(blen
);
1632 bp
= (unsigned char *)RSTRING_PTR(dest
);
1634 transcode_loop(&fromp
, &bp
, (sp
+slen
), (bp
+blen
), dest
, str_transcoding_resize
, from_e
, to_e
, options
);
1635 if (fromp
!= sp
+slen
) {
1636 rb_raise(rb_eArgError
, "not fully converted, %"PRIdPTRDIFF
" bytes left", sp
+slen
-fromp
);
1638 buf
= (unsigned char *)RSTRING_PTR(dest
);
1640 rb_str_set_len(dest
, bp
- buf
);
1644 to_encidx
= rb_define_dummy_encoding(to_e
);
1652 str_encode_associate(VALUE str
, int encidx
)
1656 rb_enc_associate_index(str
, encidx
);
1658 /* transcoded string never be broken. */
1659 if (rb_enc_asciicompat(rb_enc_from_index(encidx
))) {
1660 rb_str_coderange_scan_restartable(RSTRING_PTR(str
), RSTRING_END(str
), 0, &cr
);
1663 cr
= ENC_CODERANGE_VALID
;
1665 ENC_CODERANGE_SET(str
, cr
);
1671 * str.encode!(encoding [, options] ) => str
1672 * str.encode!(to_encoding, from_encoding [, options] ) => str
1674 * The first form transcodes the contents of <i>str</i> from
1675 * str.encoding to +encoding+.
1676 * The second form transcodes the contents of <i>str</i> from
1677 * from_encoding to to_encoding.
1678 * The options Hash gives details for conversion. See String#encode
1680 * Returns the string even if no changes were made.
1684 str_encode_bang(int argc
, VALUE
*argv
, VALUE str
)
1687 int encidx
= str_transcode(argc
, argv
, &newstr
);
1689 if (encidx
< 0) return str
;
1690 rb_str_shared_replace(str
, newstr
);
1691 return str_encode_associate(str
, encidx
);
1696 * str.encode(encoding [, options] ) => str
1697 * str.encode(to_encoding, from_encoding [, options] ) => str
1699 * The first form returns a copy of <i>str</i> transcoded
1700 * to encoding +encoding+.
1701 * The second form returns a copy of <i>str</i> transcoded
1702 * from from_encoding to to_encoding.
1703 * The options Hash gives details for conversion. Details
1708 str_encode(int argc
, VALUE
*argv
, VALUE str
)
1711 int encidx
= str_transcode(argc
, argv
, &newstr
);
1713 if (encidx
< 0) return rb_str_dup(str
);
1714 RBASIC(newstr
)->klass
= rb_obj_class(str
);
1715 return str_encode_associate(newstr
, encidx
);
1719 rb_str_transcode(VALUE str
, VALUE to
)
1721 return str_encode(1, &to
, str
);
1725 econv_free(rb_econv_t
*ec
)
1731 econv_s_allocate(VALUE klass
)
1733 return Data_Wrap_Struct(klass
, NULL
, econv_free
, NULL
);
1736 static rb_encoding
*
1737 make_dummy_encoding(const char *name
)
1741 idx
= rb_define_dummy_encoding(name
);
1742 enc
= rb_enc_from_index(idx
);
1748 * Encoding::Converter.new(source_encoding, destination_encoding)
1749 * Encoding::Converter.new(source_encoding, destination_encoding, flags)
1752 * Encoding::Converter::UNIVERSAL_NEWLINE_DECODER # convert CRLF and CR to LF at last
1753 * Encoding::Converter::CRLF_NEWLINE_ENCODER # convert LF to CRLF at first
1754 * Encoding::Converter::CR_NEWLINE_ENCODER # convert LF to CR at first
1756 * Encoding::Converter.new creates an instance of Encoding::Converter.
1758 * source_encoding and destination_encoding should be a string.
1759 * flags should be an integer.
1762 * # UTF-16BE to UTF-8
1763 * ec = Encoding::Converter.new("UTF-16BE", "UTF-8")
1765 * # (1) convert UTF-16BE to UTF-8
1766 * # (2) convert CRLF and CR to LF
1767 * ec = Encoding::Converter.new("UTF-16BE", "UTF-8", Encoding::Converter::UNIVERSAL_NEWLINE_DECODER)
1769 * # (1) convert LF to CRLF
1770 * # (2) convert UTF-8 to UTF-16BE
1771 * ec = Encoding::Converter.new("UTF-8", "UTF-16BE", Encoding::Converter::CRLF_NEWLINE_ENCODER)
1775 econv_init(int argc
, VALUE
*argv
, VALUE self
)
1777 VALUE source_encoding
, destination_encoding
, flags_v
;
1779 const char *sname
, *dname
;
1780 rb_encoding
*senc
, *denc
;
1784 rb_scan_args(argc
, argv
, "21", &source_encoding
, &destination_encoding
, &flags_v
);
1786 if (flags_v
== Qnil
)
1789 flags
= NUM2INT(flags_v
);
1792 sidx
= rb_to_encoding_index(source_encoding
);
1794 senc
= rb_enc_from_index(sidx
);
1797 StringValue(source_encoding
);
1801 didx
= rb_to_encoding_index(destination_encoding
);
1803 denc
= rb_enc_from_index(didx
);
1806 StringValue(destination_encoding
);
1809 sname
= senc
? senc
->name
: StringValueCStr(source_encoding
);
1810 dname
= denc
? denc
->name
: StringValueCStr(destination_encoding
);
1812 if (DATA_PTR(self
)) {
1813 rb_raise(rb_eTypeError
, "already initialized");
1816 ec
= rb_econv_open(sname
, dname
, flags
);
1818 rb_raise(rb_eArgError
, "encoding convewrter not supported (from %s to %s)", sname
, dname
);
1821 if (*sname
&& *dname
) { /* check "" to "universal_newline" */
1823 senc
= make_dummy_encoding(sname
);
1825 denc
= make_dummy_encoding(dname
);
1828 ec
->source_encoding
= senc
;
1829 ec
->destination_encoding
= denc
;
1831 ec
->source_encoding_name
= ec
->elems
[0].tc
->transcoder
->from_encoding
;
1832 ec
->destination_encoding_name
= ec
->last_tc
->transcoder
->to_encoding
;
1834 DATA_PTR(self
) = ec
;
1840 econv_inspect(VALUE self
)
1842 const char *cname
= rb_obj_classname(self
);
1843 rb_econv_t
*ec
= DATA_PTR(self
);
1846 return rb_sprintf("#<%s: uninitialized>", cname
);
1848 return rb_sprintf("#<%s: %s to %s>", cname
,
1849 ec
->source_encoding_name
,
1850 ec
->destination_encoding_name
);
1853 #define IS_ECONV(obj) (RDATA(obj)->dfree == (RUBY_DATA_FUNC)econv_free)
1856 check_econv(VALUE self
)
1858 Check_Type(self
, T_DATA
);
1859 if (!IS_ECONV(self
)) {
1860 rb_raise(rb_eTypeError
, "wrong argument type %s (expected Encoding::Converter)",
1861 rb_class2name(CLASS_OF(self
)));
1863 if (!DATA_PTR(self
)) {
1864 rb_raise(rb_eTypeError
, "uninitialized encoding converter");
1866 return DATA_PTR(self
);
1871 * source_encoding -> encoding
1873 * returns source encoding as Encoding object.
1876 econv_source_encoding(VALUE self
)
1878 rb_econv_t
*ec
= check_econv(self
);
1879 if (!ec
->source_encoding
)
1881 return rb_enc_from_encoding(ec
->source_encoding
);
1886 * destination_encoding -> encoding
1888 * returns destination encoding as Encoding object.
1891 econv_destination_encoding(VALUE self
)
1893 rb_econv_t
*ec
= check_econv(self
);
1894 if (!ec
->destination_encoding
)
1896 return rb_enc_from_encoding(ec
->destination_encoding
);
1900 econv_result_to_symbol(rb_econv_result_t res
)
1903 case econv_invalid_byte_sequence
: return ID2SYM(rb_intern("invalid_byte_sequence"));
1904 case econv_undefined_conversion
: return ID2SYM(rb_intern("undefined_conversion"));
1905 case econv_destination_buffer_full
: return ID2SYM(rb_intern("destination_buffer_full"));
1906 case econv_source_buffer_empty
: return ID2SYM(rb_intern("source_buffer_empty"));
1907 case econv_finished
: return ID2SYM(rb_intern("finished"));
1908 case econv_output_followed_by_input
: return ID2SYM(rb_intern("output_followed_by_input"));
1909 default: return INT2NUM(res
); /* should not be reached */
1915 * primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize) -> symbol
1916 * primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize, flags) -> symbol
1919 * Encoding::Converter::PARTIAL_INPUT # source buffer may be part of larger source
1920 * Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT # stop conversion after output before input
1923 * :invalid_byte_sequence
1924 * :undefined_conversion
1925 * :output_followed_by_input
1926 * :destination_buffer_full
1927 * :source_buffer_empty
1930 * primitive_convert converts source_buffer into destination_buffer.
1932 * source_buffer and destination_buffer should be a string.
1933 * destination_byteoffset should be an integer or nil.
1934 * destination_bytesize and flags should be an integer.
1936 * primitive_convert convert the content of source_buffer from beginning
1937 * and store the result into destination_buffer.
1939 * destination_byteoffset and destination_bytesize specify the region which
1940 * the converted result is stored.
1941 * destination_byteoffset specifies the start position in destination_buffer in bytes.
1942 * If destination_byteoffset is nil,
1943 * destination_buffer.bytesize is used for appending the result.
1944 * destination_bytesize specifies maximum number of bytes.
1945 * After conversion, destination_buffer is resized to
1946 * destination_byteoffset + actually converted number of bytes.
1947 * Also destination_buffer's encoding is set to destination_encoding.
1949 * primitive_convert drops the first part of source_buffer.
1950 * the dropped part is converted in destination_buffer or
1951 * buffered in Encoding::Converter object.
1953 * primitive_convert stops conversion when one of following condition met.
1954 * - invalid byte sequence found in source buffer (:invalid_byte_sequence)
1955 * - character not representable in output encoding (:undefined_conversion)
1956 * - after some output is generated, before input is done (:output_followed_by_input)
1957 * this occur only when OUTPUT_FOLLOWED_BY_INPUT is specified.
1958 * - destination buffer is full (:destination_buffer_full)
1959 * - source buffer is empty (:source_buffer_empty)
1960 * this occur only when PARTIAL_INPUT is specified.
1961 * - conversion is finished (:finished)
1964 * ec = Encoding::Converter.new("UTF-8", "UTF-16BE")
1965 * ret = ec.primitive_convert(src="pi", dst="", 100)
1966 * p [ret, src, dst] #=> [:finished, "", "\x00p\x00i"]
1968 * ec = Encoding::Converter.new("UTF-8", "UTF-16BE")
1969 * ret = ec.primitive_convert(src="pi", dst="", 1)
1970 * p [ret, src, dst] #=> [:destination_buffer_full, "i", "\x00"]
1971 * ret = ec.primitive_convert(src, dst="", 1)
1972 * p [ret, src, dst] #=> [:destination_buffer_full, "", "p"]
1973 * ret = ec.primitive_convert(src, dst="", 1)
1974 * p [ret, src, dst] #=> [:destination_buffer_full, "", "\x00"]
1975 * ret = ec.primitive_convert(src, dst="", 1)
1976 * p [ret, src, dst] #=> [:finished, "", "i"]
1980 econv_primitive_convert(int argc
, VALUE
*argv
, VALUE self
)
1982 VALUE input
, output
, output_byteoffset_v
, output_bytesize_v
, flags_v
;
1983 rb_econv_t
*ec
= check_econv(self
);
1984 rb_econv_result_t res
;
1985 const unsigned char *ip
, *is
;
1986 unsigned char *op
, *os
;
1987 long output_byteoffset
, output_bytesize
;
1988 unsigned long output_byteend
;
1991 rb_scan_args(argc
, argv
, "41", &input
, &output
, &output_byteoffset_v
, &output_bytesize_v
, &flags_v
);
1993 if (output_byteoffset_v
== Qnil
)
1994 output_byteoffset
= 0;
1996 output_byteoffset
= NUM2LONG(output_byteoffset_v
);
1998 output_bytesize
= NUM2LONG(output_bytesize_v
);
2000 if (flags_v
== Qnil
)
2003 flags
= NUM2INT(flags_v
);
2005 StringValue(output
);
2007 rb_str_modify(output
);
2009 if (output_byteoffset_v
== Qnil
)
2010 output_byteoffset
= RSTRING_LEN(output
);
2012 if (output_byteoffset
< 0)
2013 rb_raise(rb_eArgError
, "negative output_byteoffset");
2015 if (RSTRING_LEN(output
) < output_byteoffset
)
2016 rb_raise(rb_eArgError
, "output_byteoffset too big");
2018 if (output_bytesize
< 0)
2019 rb_raise(rb_eArgError
, "negative output_bytesize");
2021 output_byteend
= (unsigned long)output_byteoffset
+
2022 (unsigned long)output_bytesize
;
2024 if (output_byteend
< (unsigned long)output_byteoffset
||
2025 LONG_MAX
< output_byteend
)
2026 rb_raise(rb_eArgError
, "output_byteoffset+output_bytesize too big");
2028 if (rb_str_capacity(output
) < output_byteend
)
2029 rb_str_resize(output
, output_byteend
);
2031 ip
= (const unsigned char *)RSTRING_PTR(input
);
2032 is
= ip
+ RSTRING_LEN(input
);
2034 op
= (unsigned char *)RSTRING_PTR(output
) + output_byteoffset
;
2035 os
= op
+ output_bytesize
;
2037 res
= rb_econv_convert(ec
, &ip
, is
, &op
, os
, flags
);
2038 rb_str_set_len(output
, op
-(unsigned char *)RSTRING_PTR(output
));
2039 rb_str_drop_bytes(input
, ip
- (unsigned char *)RSTRING_PTR(input
));
2041 if (ec
->destination_encoding
) {
2042 rb_enc_associate(output
, ec
->destination_encoding
);
2045 return econv_result_to_symbol(res
);
2050 * primitive_errinfo -> array
2052 * primitive_errinfo returns a precious information of last error result
2053 * as a 6-elements array:
2055 * [result, enc1, enc2, error_bytes, readagain_bytes, partial_input]
2057 * result is the last result of primitive_convert.
2059 * partial_input is :partial_input or nil.
2060 * :partial_input means that Encoding::Converter::PARTIAL_INPUT is specified
2061 * for primitive_convert.
2063 * Other elements are only meaningful when result is
2064 * :invalid_byte_sequence or :undefined_conversion.
2066 * enc1 and enc2 indicats a conversion step as pair of strings.
2067 * For example, EUC-JP to ISO-8859-1 is
2068 * converted as EUC-JP -> UTF-8 -> ISO-8859-1.
2069 * So [enc1, enc2] is ["EUC-JP", "UTF-8"] or ["UTF-8", "ISO-8859-1"].
2071 * error_bytes and readagain_bytes indicats the byte sequences which causes the error.
2072 * error_bytes is discarded portion.
2073 * readagain_bytes is buffered portion which is read again on next conversion.
2077 * # \xff is invalid as EUC-JP.
2078 * ec = Encoding::Converter.new("EUC-JP", "Shift_JIS")
2079 * ec.primitive_convert(src="\xff", dst="", nil, 10)
2080 * p ec.primitive_errinfo
2081 * #=> [:invalid_byte_sequence, "EUC-JP", "UTF-8", "\xFF", "", nil]
2083 * # HIRAGANA LETTER A (\xa4\xa2 in EUC-JP) is not representable in ISO-8859-1.
2084 * # Since this error is occur in UTF-8 to ISO-8859-1 conversion,
2085 * # error_bytes is HIRAGANA LETTER A in UTF-8 (\xE3\x81\x82).
2086 * ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
2087 * ec.primitive_convert(src="\xa4\xa2", dst="", nil, 10)
2088 * p ec.primitive_errinfo
2089 * #=> [:undefined_conversion, "UTF-8", "ISO-8859-1", "\xE3\x81\x82", "", nil]
2091 * # partial character is invalid
2092 * ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
2093 * ec.primitive_convert(src="\xa4", dst="", nil, 10)
2094 * p ec.primitive_errinfo
2095 * #=> [:invalid_byte_sequence, "EUC-JP", "UTF-8", "\xA4", "", nil]
2097 * # Encoding::Converter::PARTIAL_INPUT prevents invalid errors by
2098 * # partial characters.
2099 * ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
2100 * ec.primitive_convert(src="\xa4", dst="", nil, 10, Encoding::Converter::PARTIAL_INPUT)
2101 * p ec.primitive_errinfo
2102 * #=> [:source_buffer_empty, nil, nil, nil, nil, :partial_input]
2104 * # \xd8\x00\x00@ is invalid as UTF-16BE because
2105 * # no low surrogate after high surrogate (\xd8\x00).
2106 * # It is detected by 3rd byte (\00) which is part of next character.
2107 * # So the high surrogate (\xd8\x00) is discarded and
2108 * # the 3rd byte is read again later.
2109 * # Since the byte is buffered in ec, it is dropped from src.
2110 * ec = Encoding::Converter.new("UTF-16BE", "UTF-8")
2111 * ec.primitive_convert(src="\xd8\x00\x00@", dst="", nil, 10)
2112 * p ec.primitive_errinfo
2113 * #=> [:invalid_byte_sequence, "UTF-16BE", "UTF-8", "\xD8\x00", "\x00", nil]
2117 * # Similar to UTF-16BE, \x00\xd8@\x00 is invalid as UTF-16LE.
2118 * # The problem is detected by 4th byte.
2119 * ec = Encoding::Converter.new("UTF-16LE", "UTF-8")
2120 * ec.primitive_convert(src="\x00\xd8@\x00", dst="", nil, 10)
2121 * p ec.primitive_errinfo
2122 * #=> [:invalid_byte_sequence, "UTF-16LE", "UTF-8", "\x00\xD8", "@\x00", nil]
2128 econv_primitive_errinfo(VALUE self
)
2130 rb_econv_t
*ec
= check_econv(self
);
2134 ary
= rb_ary_new2(6);
2136 rb_ary_store(ary
, 0, econv_result_to_symbol(ec
->last_error
.result
));
2138 if (ec
->last_error
.source_encoding
)
2139 rb_ary_store(ary
, 1, rb_str_new2(ec
->last_error
.source_encoding
));
2141 if (ec
->last_error
.destination_encoding
)
2142 rb_ary_store(ary
, 2, rb_str_new2(ec
->last_error
.destination_encoding
));
2144 if (ec
->last_error
.error_bytes_start
) {
2145 rb_ary_store(ary
, 3, rb_str_new((const char *)ec
->last_error
.error_bytes_start
, ec
->last_error
.error_bytes_len
));
2146 rb_ary_store(ary
, 4, rb_str_new((const char *)ec
->last_error
.error_bytes_start
+ ec
->last_error
.error_bytes_len
, ec
->last_error
.readagain_len
));
2149 rb_ary_store(ary
, 5, ec
->last_error
.partial_input
? ID2SYM(rb_intern("partial_input")) : Qnil
);
2155 econv_primitive_insert_output(VALUE self
, VALUE string
)
2157 const char *insert_enc
;
2161 rb_econv_t
*ec
= check_econv(self
);
2163 StringValue(string
);
2164 insert_enc
= rb_econv_encoding_to_insert_output(ec
);
2165 string
= rb_str_transcode(string
, rb_enc_from_encoding(rb_enc_find(insert_enc
)));
2167 ret
= rb_econv_insert_output(ec
, (const unsigned char *)RSTRING_PTR(string
), RSTRING_LEN(string
), insert_enc
);
2175 econv_primitive_putback(VALUE self
, VALUE max
)
2177 rb_econv_t
*ec
= check_econv(self
);
2183 n
= rb_econv_putbackable(ec
);
2186 putbackable
= rb_econv_putbackable(ec
);
2187 if (putbackable
< n
)
2191 str
= rb_str_new(NULL
, n
);
2192 rb_econv_putback(ec
, (unsigned char *)RSTRING_PTR(str
), n
);
2198 rb_econv_check_error(rb_econv_t
*ec
)
2202 exc
= make_econv_exception(ec
);
2209 ecerr_source_encoding(VALUE self
)
2211 return rb_attr_get(self
, rb_intern("source_encoding"));
2215 ecerr_destination_encoding(VALUE self
)
2217 return rb_attr_get(self
, rb_intern("destination_encoding"));
2221 ecerr_error_char(VALUE self
)
2223 return rb_attr_get(self
, rb_intern("error_char"));
2227 ecerr_error_bytes(VALUE self
)
2229 return rb_attr_get(self
, rb_intern("error_bytes"));
2233 Init_transcode(void)
2235 rb_eConversionUndefined
= rb_define_class_under(rb_cEncoding
, "ConversionUndefined", rb_eStandardError
);
2236 rb_eInvalidByteSequence
= rb_define_class_under(rb_cEncoding
, "InvalidByteSequence", rb_eStandardError
);
2238 transcoder_table
= st_init_strcasetable();
2240 sym_invalid
= ID2SYM(rb_intern("invalid"));
2241 sym_undef
= ID2SYM(rb_intern("undef"));
2242 sym_ignore
= ID2SYM(rb_intern("ignore"));
2243 sym_replace
= ID2SYM(rb_intern("replace"));
2245 rb_define_method(rb_cString
, "encode", str_encode
, -1);
2246 rb_define_method(rb_cString
, "encode!", str_encode_bang
, -1);
2248 rb_cEncodingConverter
= rb_define_class_under(rb_cEncoding
, "Converter", rb_cData
);
2249 rb_define_alloc_func(rb_cEncodingConverter
, econv_s_allocate
);
2250 rb_define_method(rb_cEncodingConverter
, "initialize", econv_init
, -1);
2251 rb_define_method(rb_cEncodingConverter
, "inspect", econv_inspect
, 0);
2252 rb_define_method(rb_cEncodingConverter
, "source_encoding", econv_source_encoding
, 0);
2253 rb_define_method(rb_cEncodingConverter
, "destination_encoding", econv_destination_encoding
, 0);
2254 rb_define_method(rb_cEncodingConverter
, "primitive_convert", econv_primitive_convert
, -1);
2255 rb_define_method(rb_cEncodingConverter
, "primitive_errinfo", econv_primitive_errinfo
, 0);
2256 rb_define_method(rb_cEncodingConverter
, "primitive_insert_output", econv_primitive_insert_output
, 1);
2257 rb_define_method(rb_cEncodingConverter
, "primitive_putback", econv_primitive_putback
, 1);
2258 rb_define_const(rb_cEncodingConverter
, "PARTIAL_INPUT", INT2FIX(ECONV_PARTIAL_INPUT
));
2259 rb_define_const(rb_cEncodingConverter
, "OUTPUT_FOLLOWED_BY_INPUT", INT2FIX(ECONV_OUTPUT_FOLLOWED_BY_INPUT
));
2260 rb_define_const(rb_cEncodingConverter
, "UNIVERSAL_NEWLINE_DECODER", INT2FIX(ECONV_UNIVERSAL_NEWLINE_DECODER
));
2261 rb_define_const(rb_cEncodingConverter
, "CRLF_NEWLINE_ENCODER", INT2FIX(ECONV_CRLF_NEWLINE_ENCODER
));
2262 rb_define_const(rb_cEncodingConverter
, "CR_NEWLINE_ENCODER", INT2FIX(ECONV_CR_NEWLINE_ENCODER
));
2264 rb_define_method(rb_eConversionUndefined
, "source_encoding", ecerr_source_encoding
, 0);
2265 rb_define_method(rb_eConversionUndefined
, "destination_encoding", ecerr_destination_encoding
, 0);
2266 rb_define_method(rb_eConversionUndefined
, "error_char", ecerr_error_char
, 0);
2268 rb_define_method(rb_eInvalidByteSequence
, "source_encoding", ecerr_source_encoding
, 0);
2269 rb_define_method(rb_eInvalidByteSequence
, "destination_encoding", ecerr_destination_encoding
, 0);
2270 rb_define_method(rb_eInvalidByteSequence
, "error_bytes", ecerr_error_bytes
, 0);