* io.c (rb_open_file): encoding in mode string was ignored if perm is
[ruby-svn.git] / lib / rexml / encoding.rb
blob608c69cd65a8607443922433641088b7d0c59208
1 # -*- mode: ruby; ruby-indent-level: 2; indent-tabs-mode: t; tab-width: 2 -*- vim: sw=2 ts=2
2 module REXML
3   module Encoding
4     @encoding_methods = {}
5     def self.register(enc, &block)
6       @encoding_methods[enc] = block
7     end
8     def self.apply(obj, enc)
9       @encoding_methods[enc][obj]
10     end
11     def self.encoding_method(enc)
12       @encoding_methods[enc]
13     end
15     # Native, default format is UTF-8, so it is declared here rather than in
16     # an encodings/ definition.
17     UTF_8 = 'UTF-8'
18     UTF_16 = 'UTF-16'
19     UNILE = 'UNILE'
21     # ID ---> Encoding name
22     attr_reader :encoding
23     def encoding=( enc )
24       old_verbosity = $VERBOSE
25       begin
26         $VERBOSE = false
27         enc = enc.nil? ? nil : enc.upcase
28         return false if defined? @encoding and enc == @encoding
29         if enc and enc != UTF_8
30           @encoding = enc
31           raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
32           @encoding.untaint 
33           begin
34             require 'rexml/encodings/ICONV.rb'
35             Encoding.apply(self, "ICONV")
36           rescue LoadError, Exception
37             begin
38               enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
39               require enc_file
40               Encoding.apply(self, @encoding)
41             rescue LoadError => err
42               puts err.message
43               raise ArgumentError, "No decoder found for encoding #@encoding.  Please install iconv."
44             end
45           end
46         else
47           @encoding = UTF_8
48           require 'rexml/encodings/UTF-8.rb'
49           Encoding.apply(self, @encoding)
50         end
51       ensure
52         $VERBOSE = old_verbosity
53       end
54       true
55     end
57     def check_encoding str
58       # We have to recognize UTF-16, LSB UTF-16, and UTF-8
59       if str[0,2] == "\xfe\xff"
60         str[0,2] = ""
61         return UTF_16
62       elsif str[0,2] == "\xff\xfe"
63         str[0,2] = ""
64         return UNILE
65       end
66       str =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/m
67       return $3.upcase if $3
68       return UTF_8
69     end
70   end
71 end