4 class TestM17N < Test::Unit::TestCase
5 def assert_encoding(encname, actual, message=nil)
6 assert_equal(Encoding.find(encname), actual, message)
10 def a(str) str.dup.force_encoding("ASCII-8BIT") end
11 def e(str) str.dup.force_encoding("EUC-JP") end
12 def s(str) str.dup.force_encoding("Windows-31J") end
13 def u(str) str.dup.force_encoding("UTF-8") end
18 def assert_strenc(bytes, enc, actual, message=nil)
19 assert_instance_of(String, actual, message)
20 enc = Encoding.find(enc) if String === enc
21 assert_equal(enc, actual.encoding, message)
22 assert_equal(a(bytes), a(actual), message)
25 def assert_warning(pat, mesg=nil)
28 $stderr = StringIO.new(warn = '')
33 assert_match(pat, warn, mesg)
36 def assert_regexp_generic_encoding(r)
37 assert(!r.fixed_encoding?)
38 %w[ASCII-8BIT EUC-JP Windows-31J UTF-8].each {|ename|
39 # "\xc2\xa1" is a valid sequence for ASCII-8BIT, EUC-JP, Windows-31J and UTF-8.
40 assert_nothing_raised { r =~ "\xc2\xa1".force_encoding(ename) }
44 def assert_regexp_fixed_encoding(r)
45 assert(r.fixed_encoding?)
46 %w[ASCII-8BIT EUC-JP Windows-31J UTF-8].each {|ename|
47 enc = Encoding.find(ename)
49 assert_nothing_raised { r =~ "\xc2\xa1".force_encoding(enc) }
51 assert_raise(ArgumentError) { r =~ "\xc2\xa1".force_encoding(enc) }
56 def assert_regexp_generic_ascii(r)
57 assert_encoding("US-ASCII", r.encoding)
58 assert_regexp_generic_encoding(r)
61 def assert_regexp_fixed_ascii8bit(r)
62 assert_encoding("ASCII-8BIT", r.encoding)
63 assert_regexp_fixed_encoding(r)
66 def assert_regexp_fixed_eucjp(r)
67 assert_encoding("EUC-JP", r.encoding)
68 assert_regexp_fixed_encoding(r)
71 def assert_regexp_fixed_sjis(r)
72 assert_encoding("Windows-31J", r.encoding)
73 assert_regexp_fixed_encoding(r)
76 def assert_regexp_fixed_utf8(r)
77 assert_encoding("UTF-8", r.encoding)
78 assert_regexp_fixed_encoding(r)
81 def assert_regexp_usascii_literal(r, enc, ex = nil)
82 code = "# -*- encoding: US-ASCII -*-\n#{r}.encoding"
84 assert_raise(ex) { eval(code) }
86 assert_equal(enc, eval(code))
92 if /\.force_encoding\("[A-Za-z0-9.:_+-]*"\)\z/ =~ d
95 "#{d}.force_encoding(#{str.encoding.name.dump})"
101 args.each_with_index {|a, i|
113 def assert_str_enc_propagation(t, s1, s2)
115 assert_equal(s1.encoding, t.encoding)
116 elsif !s2.ascii_only?
117 assert_equal(s2.encoding, t.encoding)
119 assert([s1.encoding, s2.encoding].include?(t.encoding))
123 def assert_same_result(expected_proc, actual_proc)
126 t = expected_proc.call
131 assert_raise(e.class) { actual_proc.call }
133 assert_equal(t, actual_proc.call)
137 def str_enc_compatible?(*strs)
140 encs << s.encoding if !s.ascii_only?
148 def test_string_ascii_literal
149 assert_encoding("US-ASCII", eval(a(%{""})).encoding)
150 assert_encoding("US-ASCII", eval(a(%{"a"})).encoding)
153 def test_string_eucjp_literal
154 assert_encoding("US-ASCII", eval(e(%{""})).encoding)
155 assert_encoding("US-ASCII", eval(e(%{"a"})).encoding)
156 assert_encoding("EUC-JP", eval(e(%{"\xa1\xa1"})).encoding)
157 assert_encoding("EUC-JP", eval(e(%{"\\xa1\\xa1"})).encoding)
158 assert_encoding("US-ASCII", eval(e(%{"\\x20"})).encoding)
159 assert_encoding("US-ASCII", eval(e(%{"\\n"})).encoding)
160 assert_encoding("EUC-JP", eval(e(%{"\\x80"})).encoding)
163 def test_utf8_literal
164 assert_equal(Encoding::UTF_8, "\u3042".encoding, "[ruby-dev:33406] \"\\u3042\".encoding")
165 assert_raise(SyntaxError) { eval(a('\u3052\x80')) }
168 def test_string_mixed_unicode
169 assert_raise(SyntaxError) { eval(a(%{"\xc2\xa1\\u{6666}"})) }
170 assert_raise(SyntaxError) { eval(e(%{"\xc2\xa1\\u{6666}"})) }
171 assert_raise(SyntaxError) { eval(s(%{"\xc2\xa1\\u{6666}"})) }
172 assert_nothing_raised { eval(u(%{"\xc2\xa1\\u{6666}"})) }
173 assert_raise(SyntaxError) { eval(a(%{"\\u{6666}\xc2\xa1"})) }
174 assert_raise(SyntaxError) { eval(e(%{"\\u{6666}\xc2\xa1"})) }
175 assert_raise(SyntaxError) { eval(s(%{"\\u{6666}\xc2\xa1"})) }
176 assert_nothing_raised { eval(u(%{"\\u{6666}\xc2\xa1"})) }
179 def test_string_inspect
180 assert_equal('"\xFE"', e("\xfe").inspect)
181 assert_equal('"\x8E"', e("\x8e").inspect)
182 assert_equal('"\x8F"', e("\x8f").inspect)
183 assert_equal('"\x8F\xA1"', e("\x8f\xa1").inspect)
184 assert_equal('"\xEF"', s("\xef").inspect)
185 assert_equal('"\xC2"', u("\xc2").inspect)
186 assert_equal('"\xE0\x80"', u("\xe0\x80").inspect)
187 assert_equal('"\xF0\x80\x80"', u("\xf0\x80\x80").inspect)
188 assert_equal('"\xF8\x80\x80\x80"', u("\xf8\x80\x80\x80").inspect)
189 assert_equal('"\xFC\x80\x80\x80\x80"', u("\xfc\x80\x80\x80\x80").inspect)
191 assert_equal('"\xFE "', e("\xfe ").inspect)
192 assert_equal('"\x8E "', e("\x8e ").inspect)
193 assert_equal('"\x8F "', e("\x8f ").inspect)
194 assert_equal('"\x8F\xA1 "', e("\x8f\xa1 ").inspect)
195 assert_equal('"\xEF "', s("\xef ").inspect)
196 assert_equal('"\xC2 "', u("\xc2 ").inspect)
197 assert_equal('"\xE0\x80 "', u("\xe0\x80 ").inspect)
198 assert_equal('"\xF0\x80\x80 "', u("\xf0\x80\x80 ").inspect)
199 assert_equal('"\xF8\x80\x80\x80 "', u("\xf8\x80\x80\x80 ").inspect)
200 assert_equal('"\xFC\x80\x80\x80\x80 "', u("\xfc\x80\x80\x80\x80 ").inspect)
203 assert_equal(e("\"\\xA1\x8f\xA1\xA1\""), e("\xa1\x8f\xa1\xa1").inspect)
205 assert_equal('"\x81."', s("\x81.").inspect)
206 assert_equal(s("\"\x81@\""), s("\x81@").inspect)
208 assert_equal('"\xFC"', u("\xfc").inspect)
221 u("\xf8\x80\x80\x80"),
222 u("\xfc\x80\x80\x80\x80"),
232 u("\xf8\x80\x80\x80 "),
233 u("\xfc\x80\x80\x80\x80 "),
236 e("\xa1\x8f\xa1\xa1"),
245 assert_equal(str, eval(str.dump), "[ruby-dev:33142]")
249 def test_validate_redundant_utf8
250 bits_0x10ffff = "11110100 10001111 10111111 10111111"
254 "1110XXXX 10Xxxxxx 10xxxxxx",
255 "11110XXX 10XXxxxx 10xxxxxx 10xxxxxx",
256 "111110XX 10XXXxxx 10xxxxxx 10xxxxxx 10xxxxxx",
257 "1111110X 10XXXXxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx",
258 "11111110 10XXXXXx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx",
259 "11111111 10XXXXXX 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx",
266 pat1.sub(/X([^X]*)\z/, '1\1').gsub(/X/, "0"),
269 s = [pat2.gsub(/ /, "")].pack("B*").force_encoding("utf-8")
270 if pat2 <= bits_0x10ffff
271 assert(s.valid_encoding?, "#{pat2}")
273 assert(!s.valid_encoding?, "#{pat2}")
277 pat3 = pat1.gsub(/X/, "0")
278 s = [pat3.gsub(/ /, "")].pack("B*").force_encoding("utf-8")
279 assert(!s.valid_encoding?, "#{pat3}")
285 def test_validate_surrogate
286 # 1110XXXX 10Xxxxxx 10xxxxxx : 3 bytes UTF-8
288 "11101101 10011111 10111111", # just before surrogate high
289 "11101101 1010xxxx 10xxxxxx", # surrogate high
290 "11101101 1011xxxx 10xxxxxx", # surrogate low
291 "11101110 10000000 10000000", # just after surrogate low
293 pats.values_at(1,2).each {|pat0|
298 s = [pat1.gsub(/ /, "")].pack("B*").force_encoding("utf-8")
299 assert(!s.valid_encoding?, "#{pat1}")
302 pats.values_at(0,3).each {|pat|
303 s = [pat.gsub(/ /, "")].pack("B*").force_encoding("utf-8")
304 assert(s.valid_encoding?, "#{pat}")
308 def test_regexp_too_short_multibyte_character
309 assert_raise(SyntaxError) { eval('/\xfe/e') }
310 assert_raise(SyntaxError) { eval('/\x8e/e') }
311 assert_raise(SyntaxError) { eval('/\x8f/e') }
312 assert_raise(SyntaxError) { eval('/\x8f\xa1/e') }
313 assert_raise(SyntaxError) { eval('/\xef/s') }
314 assert_raise(SyntaxError) { eval('/\xc2/u') }
315 assert_raise(SyntaxError) { eval('/\xe0\x80/u') }
316 assert_raise(SyntaxError) { eval('/\xf0\x80\x80/u') }
317 assert_raise(SyntaxError) { eval('/\xf8\x80\x80\x80/u') }
318 assert_raise(SyntaxError) { eval('/\xfc\x80\x80\x80\x80/u') }
321 assert_raise(SyntaxError) { eval("/\xfe/e") }
322 assert_raise(SyntaxError) { eval("/\xc2/u") }
325 assert_raise(SyntaxError) { eval('/\xc2\xff/u') }
326 assert_raise(SyntaxError) { eval('/\xc2 /u') }
327 assert_raise(SyntaxError) { eval('/\xc2\x20/u') }
330 def test_regexp_generic
331 assert_regexp_generic_ascii(/a/)
332 assert_regexp_generic_ascii(Regexp.new(a("a")))
333 assert_regexp_generic_ascii(Regexp.new(e("a")))
334 assert_regexp_generic_ascii(Regexp.new(s("a")))
335 assert_regexp_generic_ascii(Regexp.new(u("a")))
337 [/a/, Regexp.new(a("a"))].each {|r|
338 assert_equal(0, r =~ a("a"))
339 assert_equal(0, r =~ e("a"))
340 assert_equal(0, r =~ s("a"))
341 assert_equal(0, r =~ u("a"))
342 assert_equal(nil, r =~ a("\xc2\xa1"))
343 assert_equal(nil, r =~ e("\xc2\xa1"))
344 assert_equal(nil, r =~ s("\xc2\xa1"))
345 assert_equal(nil, r =~ u("\xc2\xa1"))
349 def test_regexp_ascii_none
352 assert_warning(%r{regexp match /.../n against to}) {
353 assert_regexp_generic_ascii(r)
356 assert_equal(0, r =~ a("a"))
357 assert_equal(0, r =~ e("a"))
358 assert_equal(0, r =~ s("a"))
359 assert_equal(0, r =~ u("a"))
360 assert_equal(nil, r =~ a("\xc2\xa1"))
361 assert_warning(%r{regexp match /.../n against to EUC-JP string}) {
362 assert_equal(nil, r =~ e("\xc2\xa1"))
364 assert_warning(%r{regexp match /.../n against to Windows-31J string}) {
365 assert_equal(nil, r =~ s("\xc2\xa1"))
367 assert_warning(%r{regexp match /.../n against to UTF-8 string}) {
368 assert_equal(nil, r =~ u("\xc2\xa1"))
371 assert_nothing_raised { eval(e("/\\x80/n")) }
374 def test_regexp_ascii
375 assert_regexp_fixed_ascii8bit(/\xc2\xa1/n)
376 assert_regexp_fixed_ascii8bit(eval(a(%{/\xc2\xa1/})))
377 assert_regexp_fixed_ascii8bit(eval(a(%{/\xc2\xa1/n})))
378 assert_regexp_fixed_ascii8bit(eval(a(%q{/\xc2\xa1/})))
380 assert_raise(SyntaxError) { eval("/\xa1\xa1/n".force_encoding("euc-jp")) }
382 [/\xc2\xa1/n, eval(a(%{/\xc2\xa1/})), eval(a(%{/\xc2\xa1/n}))].each {|r|
383 assert_equal(nil, r =~ a("a"))
384 assert_equal(nil, r =~ e("a"))
385 assert_equal(nil, r =~ s("a"))
386 assert_equal(nil, r =~ u("a"))
387 assert_equal(0, r =~ a("\xc2\xa1"))
388 assert_raise(ArgumentError) { r =~ e("\xc2\xa1") }
389 assert_raise(ArgumentError) { r =~ s("\xc2\xa1") }
390 assert_raise(ArgumentError) { r =~ u("\xc2\xa1") }
395 assert_regexp_fixed_eucjp(/a/e)
396 assert_regexp_fixed_eucjp(/\xc2\xa1/e)
397 assert_regexp_fixed_eucjp(eval(e(%{/\xc2\xa1/})))
398 assert_regexp_fixed_eucjp(eval(e(%q{/\xc2\xa1/})))
401 assert_equal(0, r =~ a("a"))
402 assert_equal(0, r =~ e("a"))
403 assert_equal(0, r =~ s("a"))
404 assert_equal(0, r =~ u("a"))
405 assert_raise(ArgumentError) { r =~ a("\xc2\xa1") }
406 assert_equal(nil, r =~ e("\xc2\xa1"))
407 assert_raise(ArgumentError) { r =~ s("\xc2\xa1") }
408 assert_raise(ArgumentError) { r =~ u("\xc2\xa1") }
411 [/\xc2\xa1/e, eval(e(%{/\xc2\xa1/})), eval(e(%q{/\xc2\xa1/}))].each {|r|
412 assert_equal(nil, r =~ a("a"))
413 assert_equal(nil, r =~ e("a"))
414 assert_equal(nil, r =~ s("a"))
415 assert_equal(nil, r =~ u("a"))
416 assert_raise(ArgumentError) { r =~ a("\xc2\xa1") }
417 assert_equal(0, r =~ e("\xc2\xa1"))
418 assert_raise(ArgumentError) { r =~ s("\xc2\xa1") }
419 assert_raise(ArgumentError) { r =~ u("\xc2\xa1") }
424 assert_regexp_fixed_sjis(/a/s)
425 assert_regexp_fixed_sjis(/\xc2\xa1/s)
426 assert_regexp_fixed_sjis(eval(s(%{/\xc2\xa1/})))
427 assert_regexp_fixed_sjis(eval(s(%q{/\xc2\xa1/})))
430 def test_regexp_windows_31j
432 Regexp.new("\xa1".force_encoding("windows-31j")) =~ "\xa1\xa1".force_encoding("euc-jp")
436 assert_match(/windows-31j/i, err.message)
439 def test_regexp_embed
440 r = eval(e("/\xc2\xa1/"))
441 assert_raise(ArgumentError) { eval(s("/\xc2\xa1\#{r}/s")) }
442 assert_raise(ArgumentError) { eval(s("/\#{r}\xc2\xa1/s")) }
445 assert_raise(ArgumentError) { eval(s("/\xc2\xa1\#{r}/s")) }
446 assert_raise(ArgumentError) { eval(s("/\#{r}\xc2\xa1/s")) }
448 r = eval(e("/\xc2\xa1/"))
449 assert_raise(ArgumentError) { /\xc2\xa1#{r}/s }
452 assert_raise(ArgumentError) { /\xc2\xa1#{r}/s }
454 r1 = Regexp.new('foo'.force_encoding("ascii-8bit"))
455 r2 = eval('/bar#{r1}/'.force_encoding('ascii-8bit'))
456 assert_equal(Encoding::US_ASCII, r2.encoding)
458 r1 = Regexp.new('foo'.force_encoding("us-ascii"))
459 r2 = eval('/bar#{r1}/'.force_encoding('ascii-8bit'))
460 assert_equal(Encoding::US_ASCII, r2.encoding)
462 r1 = Regexp.new('foo'.force_encoding("ascii-8bit"))
463 r2 = eval('/bar#{r1}/'.force_encoding('us-ascii'))
464 assert_equal(Encoding::US_ASCII, r2.encoding)
466 r1 = Regexp.new('foo'.force_encoding("us-ascii"))
467 r2 = eval('/bar#{r1}/'.force_encoding('us-ascii'))
468 assert_equal(Encoding::US_ASCII, r2.encoding)
470 r1 = Regexp.new('\xa1'.force_encoding("ascii-8bit"))
471 r2 = eval('/bar#{r1}/'.force_encoding('ascii-8bit'))
472 assert_equal(Encoding::ASCII_8BIT, r2.encoding)
474 r1 = Regexp.new('\xa1'.force_encoding("ascii-8bit"))
475 r2 = eval('/bar#{r1}/'.force_encoding('us-ascii'))
476 assert_equal(Encoding::ASCII_8BIT, r2.encoding)
478 r1 = Regexp.new('foo'.force_encoding("ascii-8bit"))
479 r2 = eval('/\xa1#{r1}/'.force_encoding('ascii-8bit'))
480 assert_equal(Encoding::ASCII_8BIT, r2.encoding)
482 r1 = Regexp.new('foo'.force_encoding("us-ascii"))
483 r2 = eval('/\xa1#{r1}/'.force_encoding('ascii-8bit'))
484 assert_equal(Encoding::ASCII_8BIT, r2.encoding)
486 r1 = Regexp.new('\xa1'.force_encoding("ascii-8bit"))
487 r2 = eval('/\xa1#{r1}/'.force_encoding('ascii-8bit'))
488 assert_equal(Encoding::ASCII_8BIT, r2.encoding)
491 def test_regexp_named_class
492 assert_match(/[[:space:]]/u, "\u{00a0}")
493 assert_match(/[[:space:]]/, "\u{00a0}")
496 def test_regexp_property
497 s = '\p{Hiragana}'.force_encoding("euc-jp")
498 assert_equal(Encoding::EUC_JP, s.encoding)
500 assert_nothing_raised {
503 assert(r.fixed_encoding?)
504 assert_match(r, "\xa4\xa2".force_encoding("euc-jp"))
506 r = eval('/\p{Hiragana}/'.force_encoding("euc-jp"))
507 assert(r.fixed_encoding?)
508 assert_match(r, "\xa4\xa2".force_encoding("euc-jp"))
511 assert(r.fixed_encoding?)
512 assert_match(r, "\xa4\xa2".force_encoding("euc-jp"))
515 def test_regexp_embed_preprocess
518 assert(r2.source.include?(r1.source))
521 def test_begin_end_offset
522 str = e("\244\242\244\244\244\246\244\250\244\252a")
524 assert_equal("a", $&)
525 assert_equal(5, $~.begin(0))
526 assert_equal(6, $~.end(0))
527 assert_equal([5,6], $~.offset(0))
528 assert_equal(5, $~.begin(1))
529 assert_equal(6, $~.end(1))
530 assert_equal([5,6], $~.offset(1))
533 def test_begin_end_offset_sjis
536 assert_equal(s("\x81@"), $`)
537 assert_equal("@", $&)
539 assert_equal([1,2], $~.offset(0))
543 assert_regexp_generic_ascii(/#{Regexp.quote(a("a"))}#{Regexp.quote(e("e"))}/)
545 assert_encoding("US-ASCII", Regexp.quote(a("")).encoding)
546 assert_encoding("US-ASCII", Regexp.quote(e("")).encoding)
547 assert_encoding("US-ASCII", Regexp.quote(s("")).encoding)
548 assert_encoding("US-ASCII", Regexp.quote(u("")).encoding)
549 assert_encoding("US-ASCII", Regexp.quote(a("a")).encoding)
550 assert_encoding("US-ASCII", Regexp.quote(e("a")).encoding)
551 assert_encoding("US-ASCII", Regexp.quote(s("a")).encoding)
552 assert_encoding("US-ASCII", Regexp.quote(u("a")).encoding)
554 assert_encoding("ASCII-8BIT", Regexp.quote(a("\xc2\xa1")).encoding)
555 assert_encoding("EUC-JP", Regexp.quote(e("\xc2\xa1")).encoding)
556 assert_encoding("Windows-31J", Regexp.quote(s("\xc2\xa1")).encoding)
557 assert_encoding("UTF-8", Regexp.quote(u("\xc2\xa1")).encoding)
562 assert_regexp_generic_ascii(r)
569 def test_union_1_asciionly_string
570 assert_regexp_generic_ascii(Regexp.union(a("")))
571 assert_regexp_generic_ascii(Regexp.union(e("")))
572 assert_regexp_generic_ascii(Regexp.union(s("")))
573 assert_regexp_generic_ascii(Regexp.union(u("")))
574 assert_regexp_generic_ascii(Regexp.union(a("a")))
575 assert_regexp_generic_ascii(Regexp.union(e("a")))
576 assert_regexp_generic_ascii(Regexp.union(s("a")))
577 assert_regexp_generic_ascii(Regexp.union(u("a")))
578 assert_regexp_generic_ascii(Regexp.union(a("\t")))
579 assert_regexp_generic_ascii(Regexp.union(e("\t")))
580 assert_regexp_generic_ascii(Regexp.union(s("\t")))
581 assert_regexp_generic_ascii(Regexp.union(u("\t")))
584 def test_union_1_nonascii_string
585 assert_regexp_fixed_ascii8bit(Regexp.union(a("\xc2\xa1")))
586 assert_regexp_fixed_eucjp(Regexp.union(e("\xc2\xa1")))
587 assert_regexp_fixed_sjis(Regexp.union(s("\xc2\xa1")))
588 assert_regexp_fixed_utf8(Regexp.union(u("\xc2\xa1")))
591 def test_union_1_regexp
592 assert_regexp_generic_ascii(Regexp.union(//))
593 assert_warning(%r{regexp match /.../n against to}) {
594 assert_regexp_generic_ascii(Regexp.union(//n))
596 assert_regexp_fixed_eucjp(Regexp.union(//e))
597 assert_regexp_fixed_sjis(Regexp.union(//s))
598 assert_regexp_fixed_utf8(Regexp.union(//u))
603 a(""), e(""), s(""), u(""),
604 a("\xc2\xa1"), e("\xc2\xa1"), s("\xc2\xa1"), u("\xc2\xa1")
610 assert_regexp_generic_ascii(Regexp.union(s1, s2))
612 r = Regexp.union(s1, s2)
613 assert_regexp_fixed_encoding(r)
614 assert_equal(s2.encoding, r.encoding)
618 r = Regexp.union(s1, s2)
619 assert_regexp_fixed_encoding(r)
620 assert_equal(s1.encoding, r.encoding)
622 if s1.encoding == s2.encoding
623 r = Regexp.union(s1, s2)
624 assert_regexp_fixed_encoding(r)
625 assert_equal(s1.encoding, r.encoding)
627 assert_raise(ArgumentError) { Regexp.union(s1, s2) }
635 def test_dynamic_ascii_regexp
636 assert_warning(%r{regexp match /.../n against to}) {
637 assert_regexp_generic_ascii(/#{}/n)
639 assert_regexp_fixed_ascii8bit(/#{}\xc2\xa1/n)
640 assert_regexp_fixed_ascii8bit(/\xc2\xa1#{}/n)
641 assert_nothing_raised { s1, s2 = a('\xc2'), a('\xa1'); /#{s1}#{s2}/ }
644 def test_dynamic_eucjp_regexp
645 assert_regexp_fixed_eucjp(/#{}/e)
646 assert_regexp_fixed_eucjp(/#{}\xc2\xa1/e)
647 assert_regexp_fixed_eucjp(/\xc2\xa1#{}/e)
648 assert_raise(SyntaxError) { eval('/\xc2#{}/e') }
649 assert_raise(SyntaxError) { eval('/#{}\xc2/e') }
650 assert_raise(SyntaxError) { eval('/\xc2#{}\xa1/e') }
651 assert_raise(ArgumentError) { s1, s2 = e('\xc2'), e('\xa1'); /#{s1}#{s2}/ }
654 def test_dynamic_sjis_regexp
655 assert_regexp_fixed_sjis(/#{}/s)
656 assert_regexp_fixed_sjis(/#{}\xc2\xa1/s)
657 assert_regexp_fixed_sjis(/\xc2\xa1#{}/s)
658 assert_raise(SyntaxError) { eval('/\x81#{}/s') }
659 assert_raise(SyntaxError) { eval('/#{}\x81/s') }
660 assert_raise(SyntaxError) { eval('/\x81#{}\xa1/s') }
661 assert_raise(ArgumentError) { s1, s2 = s('\x81'), s('\xa1'); /#{s1}#{s2}/ }
664 def test_dynamic_utf8_regexp
665 assert_regexp_fixed_utf8(/#{}/u)
666 assert_regexp_fixed_utf8(/#{}\xc2\xa1/u)
667 assert_regexp_fixed_utf8(/\xc2\xa1#{}/u)
668 assert_raise(SyntaxError) { eval('/\xc2#{}/u') }
669 assert_raise(SyntaxError) { eval('/#{}\xc2/u') }
670 assert_raise(SyntaxError) { eval('/\xc2#{}\xa1/u') }
671 assert_raise(ArgumentError) { s1, s2 = u('\xc2'), u('\xa1'); /#{s1}#{s2}/ }
674 def test_regexp_unicode
675 assert_nothing_raised { eval '/\u{0}/u' }
676 assert_nothing_raised { eval '/\u{D7FF}/u' }
677 assert_raise(SyntaxError) { eval '/\u{D800}/u' }
678 assert_raise(SyntaxError) { eval '/\u{DFFF}/u' }
679 assert_nothing_raised { eval '/\u{E000}/u' }
680 assert_nothing_raised { eval '/\u{10FFFF}/u' }
681 assert_raise(SyntaxError) { eval '/\u{110000}/u' }
684 def test_regexp_mixed_unicode
685 assert_raise(SyntaxError) { eval(a(%{/\xc2\xa1\\u{6666}/})) }
686 assert_raise(SyntaxError) { eval(e(%{/\xc2\xa1\\u{6666}/})) }
687 assert_raise(SyntaxError) { eval(s(%{/\xc2\xa1\\u{6666}/})) }
688 assert_nothing_raised { eval(u(%{/\xc2\xa1\\u{6666}/})) }
689 assert_raise(SyntaxError) { eval(a(%{/\\u{6666}\xc2\xa1/})) }
690 assert_raise(SyntaxError) { eval(e(%{/\\u{6666}\xc2\xa1/})) }
691 assert_raise(SyntaxError) { eval(s(%{/\\u{6666}\xc2\xa1/})) }
692 assert_nothing_raised { eval(u(%{/\\u{6666}\xc2\xa1/})) }
694 assert_raise(SyntaxError) { eval(a(%{/\\xc2\\xa1\\u{6666}/})) }
695 assert_raise(SyntaxError) { eval(e(%{/\\xc2\\xa1\\u{6666}/})) }
696 assert_raise(SyntaxError) { eval(s(%{/\\xc2\\xa1\\u{6666}/})) }
697 assert_nothing_raised { eval(u(%{/\\xc2\\xa1\\u{6666}/})) }
698 assert_raise(SyntaxError) { eval(a(%{/\\u{6666}\\xc2\\xa1/})) }
699 assert_raise(SyntaxError) { eval(e(%{/\\u{6666}\\xc2\\xa1/})) }
700 assert_raise(SyntaxError) { eval(s(%{/\\u{6666}\\xc2\\xa1/})) }
701 assert_nothing_raised { eval(u(%{/\\u{6666}\\xc2\\xa1/})) }
703 assert_raise(SyntaxError) { eval(a(%{/\xc2\xa1#{}\\u{6666}/})) }
704 assert_raise(SyntaxError) { eval(e(%{/\xc2\xa1#{}\\u{6666}/})) }
705 assert_raise(SyntaxError) { eval(s(%{/\xc2\xa1#{}\\u{6666}/})) }
706 assert_nothing_raised { eval(u(%{/\xc2\xa1#{}\\u{6666}/})) }
707 assert_raise(SyntaxError) { eval(a(%{/\\u{6666}#{}\xc2\xa1/})) }
708 assert_raise(SyntaxError) { eval(e(%{/\\u{6666}#{}\xc2\xa1/})) }
709 assert_raise(SyntaxError) { eval(s(%{/\\u{6666}#{}\xc2\xa1/})) }
710 assert_nothing_raised { eval(u(%{/\\u{6666}#{}\xc2\xa1/})) }
712 assert_raise(SyntaxError) { eval(a(%{/\\xc2\\xa1#{}\\u{6666}/})) }
713 assert_raise(SyntaxError) { eval(e(%{/\\xc2\\xa1#{}\\u{6666}/})) }
714 assert_raise(SyntaxError) { eval(s(%{/\\xc2\\xa1#{}\\u{6666}/})) }
715 assert_nothing_raised { eval(u(%{/\\xc2\\xa1#{}\\u{6666}/})) }
716 assert_raise(SyntaxError) { eval(a(%{/\\u{6666}#{}\\xc2\\xa1/})) }
717 assert_raise(SyntaxError) { eval(e(%{/\\u{6666}#{}\\xc2\\xa1/})) }
718 assert_raise(SyntaxError) { eval(s(%{/\\u{6666}#{}\\xc2\\xa1/})) }
719 assert_nothing_raised { eval(u(%{/\\u{6666}#{}\\xc2\\xa1/})) }
722 def test_str_allocate
724 assert_equal(Encoding::ASCII_8BIT, s.encoding)
729 assert_equal(Encoding::US_ASCII, s.encoding)
733 assert_strenc("\x80", 'ASCII-8BIT', a("%c") % 128)
734 #assert_raise(ArgumentError) { a("%c") % 0xc2a1 }
735 assert_strenc("\xc2\xa1", 'EUC-JP', e("%c") % 0xc2a1)
736 assert_raise(ArgumentError) { e("%c") % 0xc2 }
737 assert_strenc("\xc2", 'Windows-31J', s("%c") % 0xc2)
738 #assert_raise(ArgumentError) { s("%c") % 0xc2a1 }
739 assert_strenc("\u{c2a1}", 'UTF-8', u("%c") % 0xc2a1)
740 assert_strenc("\u{c2}", 'UTF-8', u("%c") % 0xc2)
741 assert_raise(ArgumentError) {
742 "%s%s" % [s("\xc2\xa1"), e("\xc2\xa1")]
747 assert_strenc('""', 'ASCII-8BIT', a("%p") % a(""))
748 assert_strenc('""', 'EUC-JP', e("%p") % e(""))
749 assert_strenc('""', 'Windows-31J', s("%p") % s(""))
750 assert_strenc('""', 'UTF-8', u("%p") % u(""))
752 assert_strenc('"a"', 'ASCII-8BIT', a("%p") % a("a"))
753 assert_strenc('"a"', 'EUC-JP', e("%p") % e("a"))
754 assert_strenc('"a"', 'Windows-31J', s("%p") % s("a"))
755 assert_strenc('"a"', 'UTF-8', u("%p") % u("a"))
757 assert_strenc('"\xC2\xA1"', 'ASCII-8BIT', a("%p") % a("\xc2\xa1"))
758 assert_strenc("\"\xC2\xA1\"", 'EUC-JP', e("%p") % e("\xc2\xa1"))
759 #assert_strenc("\"\xC2\xA1\"", 'Windows-31J', s("%p") % s("\xc2\xa1"))
760 assert_strenc("\"\xC2\xA1\"", 'UTF-8', u("%p") % u("\xc2\xa1"))
762 assert_strenc('"\xC2\xA1"', 'US-ASCII', "%10p" % a("\xc2\xa1"))
763 assert_strenc(" \"\xC2\xA1\"", 'EUC-JP', "%10p" % e("\xc2\xa1"))
764 #assert_strenc(" \"\xC2\xA1\"", 'Windows-31J', "%10p" % s("\xc2\xa1"))
765 assert_strenc(" \"\xC2\xA1\"", 'UTF-8', "%10p" % u("\xc2\xa1"))
767 assert_strenc('"\x00"', 'ASCII-8BIT', a("%p") % a("\x00"))
768 assert_strenc('"\x00"', 'EUC-JP', e("%p") % e("\x00"))
769 assert_strenc('"\x00"', 'Windows-31J', s("%p") % s("\x00"))
770 assert_strenc('"\x00"', 'UTF-8', u("%p") % u("\x00"))
774 assert_strenc('', 'ASCII-8BIT', a("%s") % a(""))
775 assert_strenc('', 'EUC-JP', e("%s") % e(""))
776 assert_strenc('', 'Windows-31J', s("%s") % s(""))
777 assert_strenc('', 'UTF-8', u("%s") % u(""))
779 assert_strenc('a', 'ASCII-8BIT', a("%s") % a("a"))
780 assert_strenc('a', 'EUC-JP', e("%s") % e("a"))
781 assert_strenc('a', 'Windows-31J', s("%s") % s("a"))
782 assert_strenc('a', 'UTF-8', u("%s") % u("a"))
784 assert_strenc("\xC2\xA1", 'ASCII-8BIT', a("%s") % a("\xc2\xa1"))
785 assert_strenc("\xC2\xA1", 'EUC-JP', e("%s") % e("\xc2\xa1"))
786 #assert_strenc("\xC2\xA1", 'Windows-31J', s("%s") % s("\xc2\xa1"))
787 assert_strenc("\xC2\xA1", 'UTF-8', u("%s") % u("\xc2\xa1"))
789 assert_strenc(" \xC2\xA1", 'ASCII-8BIT', "%10s" % a("\xc2\xa1"))
790 assert_strenc(" \xA1\xA1", 'EUC-JP', "%10s" % e("\xa1\xa1"))
791 #assert_strenc(" \xC2\xA1", 'Windows-31J', "%10s" % s("\xc2\xa1"))
792 assert_strenc(" \xC2\xA1", 'UTF-8', "%10s" % u("\xc2\xa1"))
794 assert_strenc("\x00", 'ASCII-8BIT', a("%s") % a("\x00"))
795 assert_strenc("\x00", 'EUC-JP', e("%s") % e("\x00"))
796 assert_strenc("\x00", 'Windows-31J', s("%s") % s("\x00"))
797 assert_strenc("\x00", 'UTF-8', u("%s") % u("\x00"))
798 assert_equal("EUC-JP", (e("\xc2\xa1 %s") % "foo").encoding.name)
802 assert(a("a") < a("\xa1"))
803 assert(a("a") < s("\xa1"))
804 assert(s("a") < a("\xa1"))
807 def test_str_multiply
809 assert_equal(true, (str * 0).ascii_only?, "[ruby-dev:33895]")
810 assert_equal(false, (str * 1).ascii_only?)
811 assert_equal(false, (str * 2).ascii_only?)
815 assert_equal(a("\xc2"), a("\xc2\xa1")[0])
816 assert_equal(a("\xa1"), a("\xc2\xa1")[1])
817 assert_equal(nil, a("\xc2\xa1")[2])
818 assert_equal(e("\xc2\xa1"), e("\xc2\xa1")[0])
819 assert_equal(nil, e("\xc2\xa1")[1])
820 assert_equal(s("\xc2"), s("\xc2\xa1")[0])
821 assert_equal(s("\xa1"), s("\xc2\xa1")[1])
822 assert_equal(nil, s("\xc2\xa1")[2])
823 assert_equal(u("\xc2\xa1"), u("\xc2\xa1")[0])
824 assert_equal(nil, u("\xc2\xa1")[1])
827 assert_equal(true, str[0, 0].ascii_only?, "[ruby-dev:33895]")
828 assert_equal(false, str[0, 1].ascii_only?)
829 assert_equal(false, str[0..-1].ascii_only?)
832 def test_utf8str_aref
833 s = "abcdefghijklmnopqrstuvwxyz\u{3042 3044 3046 3048 304A}"
834 assert_equal("a", s[0])
835 assert_equal("h", s[7])
836 assert_equal("i", s[8])
837 assert_equal("j", s[9])
838 assert_equal("\u{3044}", s[27])
839 assert_equal("\u{3046}", s[28])
840 assert_equal("\u{3048}", s[29])
841 s = "abcdefghijklmnopqrstuvw\u{3042 3044 3046 3048 304A}"
842 assert_equal("\u{3044}", s[24])
845 def test_str_aref_len
846 assert_equal(a("\xa1"), a("\xc2\xa1\xc2\xa2\xc2\xa3")[1, 1])
847 assert_equal(a("\xa1\xc2"), a("\xc2\xa1\xc2\xa2\xc2\xa3")[1, 2])
849 assert_equal(e("\xc2\xa2"), e("\xc2\xa1\xc2\xa2\xc2\xa3")[1, 1])
850 assert_equal(e("\xc2\xa2\xc2\xa3"), e("\xc2\xa1\xc2\xa2\xc2\xa3")[1, 2])
852 assert_equal(s("\xa1"), s("\xc2\xa1\xc2\xa2\xc2\xa3")[1, 1])
853 assert_equal(s("\xa1\xc2"), s("\xc2\xa1\xc2\xa2\xc2\xa3")[1, 2])
855 assert_equal(u("\xc2\xa2"), u("\xc2\xa1\xc2\xa2\xc2\xa3")[1, 1])
856 assert_equal(u("\xc2\xa2\xc2\xa3"), u("\xc2\xa1\xc2\xa2\xc2\xa3")[1, 2])
859 def test_str_aref_substr
860 assert_equal(a("\xa1\xc2"), a("\xc2\xa1\xc2\xa2\xc2\xa3")[a("\xa1\xc2")])
861 assert_raise(ArgumentError) { a("\xc2\xa1\xc2\xa2\xc2\xa3")[e("\xa1\xc2")] }
863 assert_equal(nil, e("\xc2\xa1\xc2\xa2\xc2\xa3")[e("\xa1\xc2")])
864 assert_raise(ArgumentError) { e("\xc2\xa1\xc2\xa2\xc2\xa3")[s("\xa1\xc2")] }
866 assert_equal(s("\xa1\xc2"), s("\xc2\xa1\xc2\xa2\xc2\xa3")[s("\xa1\xc2")])
867 assert_raise(ArgumentError) { s("\xc2\xa1\xc2\xa2\xc2\xa3")[u("\xa1\xc2")] }
869 assert_equal(nil, u("\xc2\xa1\xc2\xa2\xc2\xa3")[u("\xa1\xc2")])
870 assert_raise(ArgumentError) { u("\xc2\xa1\xc2\xa2\xc2\xa3")[a("\xa1\xc2")] }
871 assert_nil(e("\xa1\xa2\xa3\xa4")[e("\xa2\xa3")])
875 s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
876 assert_raise(ArgumentError){s["\xb0\xa3"] = "foo"}
880 assert_encoding("EUC-JP", "a".center(5, e("\xa1\xa2")).encoding)
881 assert_encoding("EUC-JP", e("\xa3\xb0").center(10).encoding)
885 s = e("\xa3\xb0\xa3\xb1\xa3\xb1\xa3\xb3\xa3\xb4")
886 assert_equal(e("\xa3\xb0\xa3\xb1\xa3\xb3\xa3\xb4"), s.squeeze)
891 assert_equal(s.tr("A", "B"), s)
892 assert_equal(s.tr_s("A", "B"), s)
894 assert_nothing_raised {
895 "a".force_encoding("ASCII-8BIT").tr(a("a"), a("a"))
898 assert_equal(e("\xA1\xA1"), a("a").tr(a("a"), e("\xA1\xA1")))
900 assert_equal("X\u3042\u3044X", "A\u3042\u3044\u3046".tr("^\u3042\u3044", "X"))
901 assert_equal("\u3042\u3046" * 100, ("\u3042\u3044" * 100).tr("\u3044", "\u3046"))
905 assert_equal("\xA1\xA1".force_encoding("EUC-JP"),
906 "a".force_encoding("ASCII-8BIT").tr("a".force_encoding("ASCII-8BIT"), "\xA1\xA1".force_encoding("EUC-JP")))
910 assert_equal(0, e("\xa1\xa2").count("z"))
911 s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
912 assert_raise(ArgumentError){s.count(a("\xa3\xb0"))}
916 assert_equal(1, e("\xa1\xa2").delete("z").length)
917 s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
918 assert_raise(ArgumentError){s.delete(a("\xa3\xb2"))}
920 a = "\u3042\u3044\u3046\u3042\u3044\u3046"
921 a.delete!("\u3042\u3044", "^\u3044")
922 assert_equal("\u3044\u3046\u3044\u3046", a)
926 assert_equal(false, e("\xa1\xa2\xa3\xa4").include?(e("\xa3")))
927 s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
928 assert_equal(false, s.include?(e("\xb0\xa3")))
932 s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
933 assert_nil(s.index(e("\xb3\xa3")))
934 assert_nil(e("\xa1\xa2\xa3\xa4").index(e("\xa3")))
935 assert_nil(e("\xa1\xa2\xa3\xa4").rindex(e("\xa3")))
936 s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
937 assert_raise(ArgumentError){s.rindex(a("\xb1\xa3"))}
943 (94*94+94).times { s2.next! }
944 assert_not_equal(s1, s2)
948 s = "abc".sub(/b/, "\xa1\xa1".force_encoding("euc-jp"))
949 assert_encoding("EUC-JP", s.encoding)
950 assert_equal(Encoding::EUC_JP, "\xa4\xa2".force_encoding("euc-jp").sub(/./, '\&').encoding)
951 assert_equal(Encoding::EUC_JP, "\xa4\xa2".force_encoding("euc-jp").gsub(/./, '\&').encoding)
955 s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
956 assert_equal(e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4a"), s.insert(-1, "a"))
960 assert_equal(["a"], e("\xa1\xa2a\xa3\xa4").scan(/a/))
964 s1 = e("\xa4\xa2")*100
965 s2 = s1.dup.force_encoding("ascii-8bit")
967 assert_equal(Encoding::ASCII_8BIT, f.encoding)
972 s1 = e("\xa4\xa2")*100
973 s2 = s1.dup.force_encoding("ascii-8bit")
974 assert_equal(Encoding::ASCII_8BIT, s2[10..-1].encoding)
980 assert_raise(ArgumentError){s1.upto(s2) {|x| break }}
986 assert_not_equal(0, s1.casecmp(s2))
990 assert_equal(u("\xf0jihgfedcba"), u("abcdefghij\xf0").reverse)
993 def test_reverse_bang
994 s = u("abcdefghij\xf0")
996 assert_equal(u("\xf0jihgfedcba"), s)
1000 assert_raise(ArgumentError){u("\xe3\x81\x82") + a("\xa1")}
1004 s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
1005 assert_raise(ArgumentError){s.chomp(s("\xa3\xb4"))}
1011 s.gsub!(/b/, "\x80")
1012 assert_equal(false, s.ascii_only?, "[ruby-core:14566] reported by Sam Ruby")
1014 s = "abc".force_encoding(Encoding::ASCII_8BIT)
1015 t = s.gsub(/b/, "\xa1\xa1".force_encoding("euc-jp"))
1016 assert_equal(Encoding::ASCII_8BIT, s.encoding)
1018 assert_raise(ArgumentError) {
1019 "abc".gsub(/[ac]/) {
1020 $& == "a" ? "\xc2\xa1".force_encoding("euc-jp") :
1021 "\xc2\xa1".force_encoding("utf-8")
1024 s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
1025 assert_equal(e("\xa3\xb0z\xa3\xb2\xa3\xb3\xa3\xb4"), s.gsub(/\xa3\xb1/e, "z"))
1027 assert_equal(Encoding::EUC_JP, (a("").gsub(//) { e("") }.encoding))
1028 assert_equal(Encoding::EUC_JP, (a("a").gsub(/a/) { e("") }.encoding))
1034 assert_equal(false, s1.end_with?(s2), "#{encdump s1}.end_with?(#{encdump s2})")
1038 s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
1039 assert_raise(ArgumentError){s.each_line(a("\xa3\xb1")) {|l| }}
1040 s = e("\xa4\xa2\nfoo")
1043 s.each_line {|line| actual << line }
1044 expected = [e("\xa4\xa2\n"), e("foo")]
1045 assert_equal(expected, actual)
1049 a = [e("\xa4\xa2"), "b", e("\xa4\xa4"), "c"]
1050 s = "\xa4\xa2b\xa4\xa4c".force_encoding("euc-jp")
1051 assert_equal(a, s.each_char.to_a, "[ruby-dev:33211] #{encdump s}.each_char.to_a")
1054 def test_regexp_match
1055 assert_equal([0,0], //.match("\xa1\xa1".force_encoding("euc-jp"),-1).offset(0))
1056 assert_equal(0, // =~ :a)
1060 assert_equal(e("\xa1\xa2\xa1\xa3").split(//),
1061 [e("\xa1\xa2"), e("\xa1\xa3")],
1065 def test_nonascii_method_name
1066 eval(e("def \xc2\xa1() @nonascii_method_name = :e end"))
1067 eval(u("def \xc2\xa1() @nonascii_method_name = :u end"))
1068 eval(e("\xc2\xa1()"))
1069 assert_equal(:e, @nonascii_method_name)
1070 eval(u("\xc2\xa1()"))
1071 assert_equal(:u, @nonascii_method_name)
1072 me = method(e("\xc2\xa1"))
1073 mu = method(u("\xc2\xa1"))
1074 assert_not_equal(me.name, mu.name)
1075 assert_not_equal(me.inspect, mu.inspect)
1076 assert_equal(e("\xc2\xa1"), me.name.to_s)
1077 assert_equal(u("\xc2\xa1"), mu.name.to_s)
1081 s1 = "\xc2\xa1".force_encoding("euc-jp").intern
1082 s2 = "\xc2\xa1".force_encoding("utf-8").intern
1083 assert_not_equal(s1, s2)
1088 .. ... + - +(binary) -(binary) * / % ** +@ -@ | ^ & ! <=> > >= < <= ==
1089 === != =~ !~ ~ ! [] []= << >> :: `
1092 assert_equal(Encoding::US_ASCII, op.intern.encoding, "[ruby-dev:33449]")
1098 assert_equal([b].pack("C"), b.chr)
1103 s1 = "\xa1\xa1".force_encoding("euc-jp")
1104 s2 = Marshal.load(Marshal.dump(s1))
1105 assert_equal(s1, s2)
1110 assert_equal(Encoding::ASCII_8BIT, k.encoding)
1111 assert_equal(Encoding::ASCII_8BIT, v.encoding)
1115 def test_empty_string
1116 assert_equal(Encoding::US_ASCII, "".encoding)
1120 assert_equal(Encoding::US_ASCII, nil.to_s.encoding)
1123 def test_nil_inspect
1124 assert_equal(Encoding::US_ASCII, nil.inspect.encoding)
1128 assert_equal(Encoding::US_ASCII, true.to_s.encoding)
1132 assert_equal(Encoding::US_ASCII, false.to_s.encoding)
1135 def test_fixnum_to_s
1136 assert_equal(Encoding::US_ASCII, 1.to_s.encoding)
1140 assert_equal(Encoding::US_ASCII, 1.0.to_s.encoding)
1143 def test_bignum_to_s
1144 assert_equal(Encoding::US_ASCII, (1<<129).to_s.encoding)
1148 assert_equal(Encoding::US_ASCII, [].to_s.encoding)
1149 assert_equal(Encoding::US_ASCII, [nil].to_s.encoding)
1150 assert_equal(Encoding::US_ASCII, [1].to_s.encoding)
1151 assert_equal(Encoding::US_ASCII, [""].to_s.encoding)
1152 assert_equal(Encoding::US_ASCII, ["a"].to_s.encoding)
1153 assert_equal(Encoding::US_ASCII, [nil,1,"","a","\x20",[]].to_s.encoding)
1157 assert_equal(Encoding::US_ASCII, {}.to_s.encoding)
1158 assert_equal(Encoding::US_ASCII, {1=>nil,"foo"=>""}.to_s.encoding)
1161 def test_encoding_find
1162 assert_raise(TypeError) {Encoding.find(nil)}
1163 assert_raise(TypeError) {Encoding.find(0)}
1164 assert_raise(TypeError) {Encoding.find([])}
1165 assert_raise(TypeError) {Encoding.find({})}
1168 def test_encoding_to_s
1169 assert_equal(Encoding::US_ASCII, Encoding::US_ASCII.to_s.encoding)
1170 assert_equal(Encoding::US_ASCII, Encoding::US_ASCII.inspect.encoding)
1173 def test_regexp_source
1174 s = "\xa4\xa2".force_encoding("euc-jp")
1177 assert_equal(s, t, "[ruby-dev:33377] Regexp.new(#{encdump s}).source")
1180 def test_magic_comment
1181 assert_equal(Encoding::US_ASCII, eval("__ENCODING__".force_encoding("US-ASCII")))
1182 assert_equal(Encoding::ASCII_8BIT, eval("__ENCODING__".force_encoding("ASCII-8BIT")))
1183 assert_equal(Encoding::US_ASCII, eval("# -*- encoding: US-ASCII -*-\n__ENCODING__".force_encoding("ASCII-8BIT")))
1184 assert_equal(Encoding::ASCII_8BIT, eval("# -*- encoding: ASCII-8BIT -*-\n__ENCODING__".force_encoding("US-ASCII")))
1187 def test_regexp_usascii
1188 assert_regexp_usascii_literal('//', Encoding::US_ASCII)
1189 assert_regexp_usascii_literal('/#{}/', Encoding::US_ASCII)
1190 assert_regexp_usascii_literal('/#{"a"}/', Encoding::US_ASCII)
1191 assert_regexp_usascii_literal('/#{%q"\x80"}/', Encoding::ASCII_8BIT)
1192 assert_regexp_usascii_literal('/#{"\x80"}/', nil, SyntaxError)
1194 assert_regexp_usascii_literal('/a/', Encoding::US_ASCII)
1195 assert_regexp_usascii_literal('/a#{}/', Encoding::US_ASCII)
1196 assert_regexp_usascii_literal('/a#{"a"}/', Encoding::US_ASCII)
1197 assert_regexp_usascii_literal('/a#{%q"\x80"}/', Encoding::ASCII_8BIT)
1198 assert_regexp_usascii_literal('/a#{"\x80"}/', nil, SyntaxError)
1200 assert_regexp_usascii_literal('/\x80/', Encoding::ASCII_8BIT)
1201 assert_regexp_usascii_literal('/\x80#{}/', Encoding::ASCII_8BIT)
1202 assert_regexp_usascii_literal('/\x80#{"a"}/', Encoding::ASCII_8BIT)
1203 assert_regexp_usascii_literal('/\x80#{%q"\x80"}/', Encoding::ASCII_8BIT)
1204 assert_regexp_usascii_literal('/\x80#{"\x80"}/', nil, SyntaxError)
1206 assert_regexp_usascii_literal('/\u1234/', Encoding::UTF_8)
1207 assert_regexp_usascii_literal('/\u1234#{}/', Encoding::UTF_8)
1208 assert_regexp_usascii_literal('/\u1234#{"a"}/', Encoding::UTF_8)
1209 assert_regexp_usascii_literal('/\u1234#{%q"\x80"}/', nil, SyntaxError)
1210 assert_regexp_usascii_literal('/\u1234#{"\x80"}/', nil, SyntaxError)
1211 assert_regexp_usascii_literal('/\u1234\x80/', nil, SyntaxError)
1212 assert_regexp_usascii_literal('/\u1234#{}\x80/', nil, ArgumentError)
1216 assert_equal("", "\x81\x40".force_encoding("GBK").chop)
1220 assert_equal("a", "a\x8e\xa2\xa1\xa1".force_encoding("euc-tw").chop)
1223 def test_valid_encoding
1224 s = "\xa1".force_encoding("euc-jp")
1225 assert_equal(false, s.valid_encoding?)
1226 assert_equal(true, (s+s).valid_encoding?, "[ruby-dev:33826]")
1227 assert_equal(true, (s*2).valid_encoding?, "[ruby-dev:33826]")
1228 assert_equal(true, ("%s%s" % [s, s]).valid_encoding?)
1229 assert_equal(true, (s.dup << s).valid_encoding?)
1230 assert_equal(true, "".center(2, s).valid_encoding?)
1232 s = "\xa1\xa1\x8f".force_encoding("euc-jp")
1233 assert_equal(false, s.valid_encoding?)
1234 assert_equal(true, s.reverse.valid_encoding?)
1238 assert_equal(0x82, u("\xE3\x81\x82\xE3\x81\x84").getbyte(2))
1239 assert_equal(0x82, u("\xE3\x81\x82\xE3\x81\x84").getbyte(-4))
1240 assert_nil(u("\xE3\x81\x82\xE3\x81\x84").getbyte(100))
1244 s = u("\xE3\x81\x82\xE3\x81\x84")
1246 assert_equal(u("\xE3\x81\x84\xE3\x81\x84"), s)
1248 s = u("\xE3\x81\x82\xE3\x81\x84")
1249 assert_raise(IndexError) { s.setbyte(100, 0) }
1251 s = u("\xE3\x81\x82\xE3\x81\x84")
1253 assert_equal(u("\xE3\x81\x84\xE3\x81\x84"), s)
1257 assert_equal(nil, Encoding.compatible?("",0), "moved from btest/knownbug")
1260 def test_force_encoding
1261 assert(("".center(1, "\x80".force_encoding("utf-8")); true),
1262 "moved from btest/knownbug, [ruby-dev:33807]")