4 require_relative 'envutil'
6 class TestIO_M17N < Test::Unit::TestCase
22 def with_pipe(enc=nil)
32 def generate_file(path, content)
33 open(path, "wb") {|f| f.write content }
37 "#{str.dump}.force_encoding(#{str.encoding.name.dump})"
40 def assert_str_equal(expected, actual, message=nil)
41 full_message = build_message(message, <<EOT)
42 #{encdump expected} expected but not equal to
45 assert_block(full_message) { expected == actual }
50 generate_file('tmp', "")
52 assert_equal(Encoding.default_external, f.external_encoding)
53 assert_equal(nil, f.internal_encoding)
60 generate_file('tmp', "")
61 open("tmp", "rb") {|f|
62 assert_equal(Encoding.default_external, f.external_encoding)
63 assert_equal(nil, f.internal_encoding)
70 generate_file('tmp', "")
71 open("tmp", "r:euc-jp") {|f|
72 assert_equal(Encoding::EUC_JP, f.external_encoding)
73 assert_equal(nil, f.internal_encoding)
78 def test_open_r_enc_enc
80 generate_file('tmp', "")
81 open("tmp", "r:euc-jp:utf-8") {|f|
82 assert_equal(Encoding::EUC_JP, f.external_encoding)
83 assert_equal(Encoding::UTF_8, f.internal_encoding)
91 assert_equal(nil, f.external_encoding)
92 assert_equal(nil, f.internal_encoding)
99 open("tmp", "wb") {|f|
100 assert_equal(nil, f.external_encoding)
101 assert_equal(nil, f.internal_encoding)
108 open("tmp", "w:euc-jp") {|f|
109 assert_equal(Encoding::EUC_JP, f.external_encoding)
110 assert_equal(nil, f.internal_encoding)
115 def test_open_w_enc_enc
117 open("tmp", "w:euc-jp:utf-8") {|f|
118 assert_equal(Encoding::EUC_JP, f.external_encoding)
119 assert_equal(Encoding::UTF_8, f.internal_encoding)
124 def test_open_w_enc_enc_perm
126 open("tmp", "w:euc-jp:utf-8", 0600) {|f|
127 assert_equal(Encoding::EUC_JP, f.external_encoding)
128 assert_equal(Encoding::UTF_8, f.internal_encoding)
134 assert_equal(Encoding.default_external, STDIN.external_encoding)
135 assert_equal(nil, STDIN.internal_encoding)
139 assert_equal(nil, STDOUT.external_encoding)
140 assert_equal(nil, STDOUT.internal_encoding)
144 assert_equal(nil, STDERR.external_encoding)
145 assert_equal(nil, STDERR.internal_encoding)
148 def test_terminator_conversion
150 generate_file('tmp', "before \u00FF after")
151 s = open("tmp", "r:utf-8:iso-8859-1") {|f|
152 f.gets("\xFF".force_encoding("iso-8859-1"))
154 assert_equal(Encoding.find("iso-8859-1"), s.encoding)
155 assert_str_equal("before \xFF".force_encoding("iso-8859-1"), s, '[ruby-core:14288]')
159 def test_terminator_conversion2
161 generate_file('tmp', "before \xA1\xA2\xA2\xA3 after")
162 s = open("tmp", "r:euc-jp:utf-8") {|f|
163 f.gets("\xA2\xA2".force_encoding("euc-jp").encode("utf-8"))
165 assert_equal(Encoding.find("utf-8"), s.encoding)
166 assert_str_equal("before \xA1\xA2\xA2\xA3 after".force_encoding("euc-jp").encode("utf-8"), s, '[ruby-core:14319]')
170 def test_terminator_stateful_conversion
172 src = "before \e$B\x23\x30\x23\x31\e(B after".force_encoding("iso-2022-jp")
173 generate_file('tmp', src)
174 s = open("tmp", "r:iso-2022-jp:euc-jp") {|f|
175 f.gets("0".force_encoding("euc-jp"))
177 assert_equal(Encoding.find("euc-jp"), s.encoding)
178 assert_str_equal(src.encode("euc-jp"), s)
182 def test_nonascii_terminator
184 generate_file('tmp', "before \xA2\xA2 after")
185 open("tmp", "r:euc-jp") {|f|
186 assert_raise(ArgumentError) {
187 f.gets("\xA2\xA2".force_encoding("utf-8"))
193 def test_pipe_terminator_conversion
194 with_pipe("euc-jp:utf-8") {|r, w|
195 w.write "before \xa2\xa2 after"
196 rs = "\xA2\xA2".encode("utf-8", "euc-jp")
199 assert_equal("before \xa2\xa2".encode("utf-8", "euc-jp"),
205 def test_pipe_conversion
206 with_pipe("euc-jp:utf-8") {|r, w|
208 assert_equal("\xa1\xa1".encode("utf-8", "euc-jp"), r.getc)
212 def test_pipe_convert_partial_read
213 with_pipe("euc-jp:utf-8") {|r, w|
220 assert_equal("\xa1\xa1".encode("utf-8", "euc-jp"), r.getc)
227 def test_getc_invalid
228 with_pipe("euc-jp:utf-8") {|r, w|
231 err = assert_raise(Encoding::InvalidByteSequence) { r.getc }
232 assert_equal("\xA1".force_encoding("ascii-8bit"), err.error_bytes)
233 assert_equal("xyz", r.read(10))
237 def test_getc_stateful_conversion
239 src = "\e$B\x23\x30\x23\x31\e(B".force_encoding("iso-2022-jp")
240 generate_file('tmp', src)
241 open("tmp", "r:iso-2022-jp:euc-jp") {|f|
242 assert_equal("\xa3\xb0".force_encoding("euc-jp"), f.getc)
243 assert_equal("\xa3\xb1".force_encoding("euc-jp"), f.getc)
248 def test_ungetc_stateful_conversion
250 src = "before \e$B\x23\x30\x23\x31\e(B after".force_encoding("iso-2022-jp")
251 generate_file('tmp', src)
252 s = open("tmp", "r:iso-2022-jp:euc-jp") {|f|
253 f.ungetc("0".force_encoding("euc-jp"))
256 assert_equal(Encoding.find("euc-jp"), s.encoding)
257 assert_str_equal("0" + src.encode("euc-jp"), s)
261 def test_ungetc_stateful_conversion2
263 src = "before \e$B\x23\x30\x23\x31\e(B after".force_encoding("iso-2022-jp")
264 former = "before \e$B\x23\x30\e(B".force_encoding("iso-2022-jp")
265 rs = "\e$B\x23\x30\e(B".force_encoding("iso-2022-jp")
266 latter = "\e$B\x23\x31\e(B after".force_encoding("iso-2022-jp")
267 generate_file('tmp', src)
268 s = open("tmp", "r:iso-2022-jp:euc-jp") {|f|
269 assert_equal(former.encode("euc-jp", "iso-2022-jp"),
270 f.gets(rs.encode("euc-jp", "iso-2022-jp")))
274 assert_equal(Encoding.find("euc-jp"), s.encoding)
275 assert_str_equal("0" + latter.encode("euc-jp"), s)
282 generate_file('tmp', "abc\n")
284 s = open('tmp', "r:#{enc}") {|f| f.gets }
285 assert_equal(enc, s.encoding)
286 assert_str_equal(src, s)
291 def test_open_nonascii
294 generate_file('tmp', src)
296 content = src.dup.force_encoding(enc)
297 s = open('tmp', "r:#{enc}") {|f| f.gets }
298 assert_equal(enc, s.encoding)
299 assert_str_equal(content, s)
304 def test_read_encoding
306 src = "\xc2\xa1\n".force_encoding("ASCII-8BIT")
307 generate_file('tmp', "\xc2\xa1\n")
309 content = src.dup.force_encoding(enc)
310 open('tmp', "r:#{enc}") {|f|
312 assert_equal(enc, s.encoding)
313 assert_str_equal(content[0], s)
315 open('tmp', "r:#{enc}") {|f|
317 assert_equal(enc, s.encoding)
318 assert_str_equal(content[0], s)
320 open('tmp', "r:#{enc}") {|f|
322 assert_equal(enc, s.encoding)
323 assert_str_equal(content, s)
325 open('tmp', "r:#{enc}") {|f|
327 assert_equal(enc, s.encoding)
328 assert_str_equal(content, s)
330 open('tmp', "r:#{enc}") {|f|
332 assert_equal(1, lines.length)
334 assert_equal(enc, s.encoding)
335 assert_str_equal(content, s)
337 open('tmp', "r:#{enc}") {|f|
339 assert_equal(enc, s.encoding)
340 assert_str_equal(content, s)
343 open('tmp', "r:#{enc}") {|f|
345 assert_equal(enc, s.encoding)
346 assert_str_equal(content, s)
348 open('tmp', "r:#{enc}") {|f|
350 assert_equal(Encoding::ASCII_8BIT, s.encoding)
351 assert_str_equal(src[0], s)
353 open('tmp', "r:#{enc}") {|f|
355 assert_equal(Encoding::ASCII_8BIT, s.encoding)
356 assert_str_equal(src[0], s)
358 open('tmp', "r:#{enc}") {|f|
360 assert_equal(Encoding::ASCII_8BIT, s.encoding)
361 assert_str_equal(src[0], s)
368 src = "\xc2\xa1\n".force_encoding("ascii-8bit")
370 open('tmp', "w") {|f|
372 f.write src.dup.force_encoding(enc)
375 open('tmp', 'r:ascii-8bit') {|f|
376 assert_equal(src*ENCS.length, f.read)
381 def test_write_conversion
383 eucjp = "\xb3\xa2".force_encoding("EUC-JP")
385 open('tmp', "w:EUC-JP") {|f|
386 assert_equal(Encoding::EUC_JP, f.external_encoding)
387 assert_equal(nil, f.internal_encoding)
390 assert_equal(eucjp, File.read('tmp').force_encoding("EUC-JP"))
391 open('tmp', 'r:EUC-JP:UTF-8') {|f|
392 assert_equal(Encoding::EUC_JP, f.external_encoding)
393 assert_equal(Encoding::UTF_8, f.internal_encoding)
394 assert_equal(utf8, f.read)
401 eucjp = "\xb3\xa2".force_encoding("EUC-JP")
404 assert_equal(Encoding.default_external, r.external_encoding)
405 assert_equal(nil, r.internal_encoding)
409 assert_equal(Encoding.default_external, s.encoding)
410 assert_str_equal(utf8.dup.force_encoding(Encoding.default_external), s)
413 with_pipe("EUC-JP") {|r,w|
414 assert_equal(Encoding::EUC_JP, r.external_encoding)
415 assert_equal(nil, r.internal_encoding)
418 assert_equal(eucjp, r.read)
421 with_pipe("UTF-8:EUC-JP") {|r,w|
422 assert_equal(Encoding::UTF_8, r.external_encoding)
423 assert_equal(Encoding::EUC_JP, r.internal_encoding)
426 assert_equal(eucjp, r.read)
430 with_pipe(enc) {|r, w|
434 assert_equal(enc, s.encoding)
439 next if enc == Encoding::ASCII_8BIT
440 next if enc == Encoding::UTF_8
441 with_pipe("#{enc}:UTF-8") {|r, w|
445 assert_equal(Encoding::UTF_8, s.encoding)
446 assert_equal(s.encode("UTF-8"), s)
453 with_pipe("EUC-JP") {|r, w|
455 Marshal.dump(data, w)
458 assert_nothing_raised("[ruby-dev:33264]") { result = Marshal.load(r) }
459 assert_equal(data, result)
464 with_pipe("UTF-8:EUC-JP") {|r, w|
468 assert_equal("\u{3042}".encode("euc-jp"), result)
473 with_pipe("euc-jp") {|r, w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close
474 assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.gets(1))
476 with_pipe("euc-jp") {|r, w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close
477 assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.gets(2))
479 with_pipe("euc-jp") {|r, w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close
480 assert_equal("\xa4\xa2\xa4\xa4".force_encoding("euc-jp"), r.gets(3))
482 with_pipe("euc-jp") {|r, w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close
483 assert_equal("\xa4\xa2\xa4\xa4".force_encoding("euc-jp"), r.gets(4))
485 with_pipe("euc-jp") {|r, w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close
486 assert_equal("\xa4\xa2\xa4\xa4\xa4\xa6".force_encoding("euc-jp"), r.gets(5))
488 with_pipe("euc-jp") {|r, w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close
489 assert_equal("\xa4\xa2\xa4\xa4\xa4\xa6".force_encoding("euc-jp"), r.gets(6))
491 with_pipe("euc-jp") {|r, w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close
492 assert_equal("\xa4\xa2\xa4\xa4\xa4\xa6\n".force_encoding("euc-jp"), r.gets(7))
494 with_pipe("euc-jp") {|r, w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close
495 assert_equal("\xa4\xa2\xa4\xa4\xa4\xa6\n".force_encoding("euc-jp"), r.gets(8))
497 with_pipe("euc-jp") {|r, w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close
498 assert_equal("\xa4\xa2\xa4\xa4\xa4\xa6\n".force_encoding("euc-jp"), r.gets(9))
502 def test_gets_invalid
503 with_pipe("utf-8:euc-jp") {|r, w|
504 before = "\u{3042}\u{3044}"
505 invalid = "\x80".force_encoding("utf-8")
506 after = "\u{3046}\u{3048}"
507 w << before + invalid + after
509 err = assert_raise(Encoding::InvalidByteSequence) { r.gets }
510 assert_equal(invalid.force_encoding("ascii-8bit"), err.error_bytes)
511 assert_equal(after.encode("euc-jp"), r.gets)
515 def test_getc_invalid
516 with_pipe("utf-8:euc-jp") {|r, w|
519 invalid = "\x80".force_encoding("utf-8")
522 w << before1 + before2 + invalid + after1 + after2
524 assert_equal(before1.encode("euc-jp"), r.getc)
525 assert_equal(before2.encode("euc-jp"), r.getc)
526 err = assert_raise(Encoding::InvalidByteSequence) { r.getc }
527 assert_equal(invalid.force_encoding("ascii-8bit"), err.error_bytes)
528 assert_equal(after1.encode("euc-jp"), r.getc)
529 assert_equal(after2.encode("euc-jp"), r.getc)
533 def test_getc_invalid2
534 with_pipe("utf-16le:euc-jp") {|r, w|
535 before1 = "\x42\x30".force_encoding("utf-16le")
536 before2 = "\x44\x30".force_encoding("utf-16le")
537 invalid = "\x00\xd8".force_encoding("utf-16le")
538 after1 = "\x46\x30".force_encoding("utf-16le")
539 after2 = "\x48\x30".force_encoding("utf-16le")
540 w << before1 + before2 + invalid + after1 + after2
542 assert_equal(before1.encode("euc-jp"), r.getc)
543 assert_equal(before2.encode("euc-jp"), r.getc)
544 err = assert_raise(Encoding::InvalidByteSequence) { r.getc }
545 assert_equal(invalid.force_encoding("ascii-8bit"), err.error_bytes)
546 assert_equal(after1.encode("euc-jp"), r.getc)
547 assert_equal(after2.encode("euc-jp"), r.getc)
552 with_pipe("utf-8:euc-jp") {|r, w|
556 assert_equal(str.encode("euc-jp"), r.read)
560 def test_read_all_invalid
561 with_pipe("utf-8:euc-jp") {|r, w|
562 before = "\u{3042}\u{3044}"
563 invalid = "\x80".force_encoding("utf-8")
564 after = "\u{3046}\u{3048}"
565 w << before + invalid + after
567 err = assert_raise(Encoding::InvalidByteSequence) { r.read }
568 assert_equal(invalid.force_encoding("ascii-8bit"), err.error_bytes)
569 assert_equal(after.encode("euc-jp"), r.read)
573 def test_file_foreach
575 generate_file('tst', 'a' * 8191 + "\xa1\xa1")
576 assert_nothing_raised {
577 File.foreach('tst', :encoding=>"euc-jp") {|line| line.inspect }
582 def test_set_encoding
583 with_pipe("utf-8:euc-jp") {|r, w|
584 s = "\u3042".force_encoding("ascii-8bit")
585 s << "\x82\xa0".force_encoding("ascii-8bit")
588 assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc)
589 r.set_encoding("shift_jis:euc-jp")
590 assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc)
594 def test_set_encoding2
595 with_pipe("utf-8:euc-jp") {|r, w|
596 s = "\u3042".force_encoding("ascii-8bit")
597 s << "\x82\xa0".force_encoding("ascii-8bit")
600 assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc)
601 r.set_encoding("shift_jis", "euc-jp")
602 assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc)
606 def test_set_encoding_nil
607 with_pipe("utf-8:euc-jp") {|r, w|
608 s = "\u3042".force_encoding("ascii-8bit")
609 s << "\x82\xa0".force_encoding("ascii-8bit")
612 assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc)
614 assert_equal("\x82\xa0".force_encoding(Encoding.default_external), r.read)
618 def test_set_encoding_enc
619 with_pipe("utf-8:euc-jp") {|r, w|
620 s = "\u3042".force_encoding("ascii-8bit")
621 s << "\x82\xa0".force_encoding("ascii-8bit")
624 assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc)
625 r.set_encoding(Encoding::Shift_JIS)
626 assert_equal("\x82\xa0".force_encoding(Encoding::Shift_JIS), r.getc)
630 def test_write_conversion_fixenc
632 w.set_encoding("iso-2022-jp:utf-8")
633 t = Thread.new { r.read.force_encoding("ascii-8bit") }
637 assert_equal("\e$B$\"$$\e(B".force_encoding("ascii-8bit"), t.value)
641 def test_write_conversion_anyenc_stateful
643 w.set_encoding("iso-2022-jp")
644 t = Thread.new { r.read.force_encoding("ascii-8bit") }
646 w << "\x82\xa2".force_encoding("sjis")
648 assert_equal("\e$B$\"$$\e(B".force_encoding("ascii-8bit"), t.value)
652 def test_write_conversion_anyenc_stateless
654 w.set_encoding("euc-jp")
655 t = Thread.new { r.read.force_encoding("ascii-8bit") }
657 w << "\x82\xa2".force_encoding("sjis")
659 assert_equal("\xa4\xa2\xa4\xa4".force_encoding("ascii-8bit"), t.value)
663 def test_write_conversion_anyenc_stateful_nosync
666 w.set_encoding("iso-2022-jp")
667 t = Thread.new { r.read.force_encoding("ascii-8bit") }
669 w << "\x82\xa2".force_encoding("sjis")
671 assert_equal("\e$B$\"$$\e(B".force_encoding("ascii-8bit"), t.value)
675 def test_stdin_external_encoding_with_reopen
677 open("tst", "w+") {|f|
678 pid = spawn(EnvUtil.rubybin, '-e', <<-'End', 10=>f)
679 io = IO.new(10, "r+")
681 STDIN.external_encoding
687 result = f.read.force_encoding("ascii-8bit")
688 assert_equal("\u3042".force_encoding("ascii-8bit"), result)