Fix the tag.
[python/dscho.git] / Lib / encodings / utf_16.py
blob5500c0623c916b9831ac18bd57a38fee090c80d9
1 """ Python 'utf-16' Codec
4 Written by Marc-Andre Lemburg (mal@lemburg.com).
6 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
8 """
9 import codecs, sys
11 ### Codec APIs
13 encode = codecs.utf_16_encode
15 def decode(input, errors='strict'):
16 return codecs.utf_16_decode(input, errors, True)
18 class IncrementalEncoder(codecs.IncrementalEncoder):
19 def __init__(self, errors='strict'):
20 codecs.IncrementalEncoder.__init__(self, errors)
21 self.encoder = None
23 def encode(self, input, final=False):
24 if self.encoder is None:
25 result = codecs.utf_16_encode(input, self.errors)[0]
26 if sys.byteorder == 'little':
27 self.encoder = codecs.utf_16_le_encode
28 else:
29 self.encoder = codecs.utf_16_be_encode
30 return result
31 return self.encoder(input, self.errors)[0]
33 def reset(self):
34 codecs.IncrementalEncoder.reset(self)
35 self.encoder = None
37 def getstate(self):
38 # state info we return to the caller:
39 # 0: stream is in natural order for this platform
40 # 2: endianness hasn't been determined yet
41 # (we're never writing in unnatural order)
42 return (2 if self.encoder is None else 0)
44 def setstate(self, state):
45 if state:
46 self.encoder = None
47 else:
48 if sys.byteorder == 'little':
49 self.encoder = codecs.utf_16_le_encode
50 else:
51 self.encoder = codecs.utf_16_be_encode
53 class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
54 def __init__(self, errors='strict'):
55 codecs.BufferedIncrementalDecoder.__init__(self, errors)
56 self.decoder = None
58 def _buffer_decode(self, input, errors, final):
59 if self.decoder is None:
60 (output, consumed, byteorder) = \
61 codecs.utf_16_ex_decode(input, errors, 0, final)
62 if byteorder == -1:
63 self.decoder = codecs.utf_16_le_decode
64 elif byteorder == 1:
65 self.decoder = codecs.utf_16_be_decode
66 elif consumed >= 2:
67 raise UnicodeError("UTF-16 stream does not start with BOM")
68 return (output, consumed)
69 return self.decoder(input, self.errors, final)
71 def reset(self):
72 codecs.BufferedIncrementalDecoder.reset(self)
73 self.decoder = None
75 def getstate(self):
76 # additonal state info from the base class must be None here,
77 # as it isn't passed along to the caller
78 state = codecs.BufferedIncrementalDecoder.getstate(self)[0]
79 # additional state info we pass to the caller:
80 # 0: stream is in natural order for this platform
81 # 1: stream is in unnatural order
82 # 2: endianness hasn't been determined yet
83 if self.decoder is None:
84 return (state, 2)
85 addstate = int((sys.byteorder == "big") !=
86 (self.decoder is codecs.utf_16_be_decode))
87 return (state, addstate)
89 def setstate(self, state):
90 # state[1] will be ignored by BufferedIncrementalDecoder.setstate()
91 codecs.BufferedIncrementalDecoder.setstate(self, state)
92 state = state[1]
93 if state == 0:
94 self.decoder = (codecs.utf_16_be_decode
95 if sys.byteorder == "big"
96 else codecs.utf_16_le_decode)
97 elif state == 1:
98 self.decoder = (codecs.utf_16_le_decode
99 if sys.byteorder == "big"
100 else codecs.utf_16_be_decode)
101 else:
102 self.decoder = None
104 class StreamWriter(codecs.StreamWriter):
105 def __init__(self, stream, errors='strict'):
106 self.bom_written = False
107 codecs.StreamWriter.__init__(self, stream, errors)
109 def encode(self, input, errors='strict'):
110 self.bom_written = True
111 result = codecs.utf_16_encode(input, errors)
112 if sys.byteorder == 'little':
113 self.encode = codecs.utf_16_le_encode
114 else:
115 self.encode = codecs.utf_16_be_encode
116 return result
118 class StreamReader(codecs.StreamReader):
120 def reset(self):
121 codecs.StreamReader.reset(self)
122 try:
123 del self.decode
124 except AttributeError:
125 pass
127 def decode(self, input, errors='strict'):
128 (object, consumed, byteorder) = \
129 codecs.utf_16_ex_decode(input, errors, 0, False)
130 if byteorder == -1:
131 self.decode = codecs.utf_16_le_decode
132 elif byteorder == 1:
133 self.decode = codecs.utf_16_be_decode
134 elif consumed>=2:
135 raise UnicodeError("UTF-16 stream does not start with BOM")
136 return (object, consumed)
138 ### encodings module API
140 def getregentry():
141 return codecs.CodecInfo(
142 name='utf-16',
143 encode=encode,
144 decode=decode,
145 incrementalencoder=IncrementalEncoder,
146 incrementaldecoder=IncrementalDecoder,
147 streamreader=StreamReader,
148 streamwriter=StreamWriter,