1 /****************************************************************************
2 ** libebml : parse EBML files, see http://embl.sourceforge.net/
4 ** <file/class description>
6 ** Copyright (C) 2002-2005 Steve Lhomme. All rights reserved.
8 ** This file is part of libebml.
10 ** This library is free software; you can redistribute it and/or
11 ** modify it under the terms of the GNU Lesser General Public
12 ** License as published by the Free Software Foundation; either
13 ** version 2.1 of the License, or (at your option) any later version.
15 ** This library is distributed in the hope that it will be useful,
16 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ** Lesser General Public License for more details.
20 ** You should have received a copy of the GNU Lesser General Public
21 ** License along with this library; if not, write to the Free Software
22 ** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 ** See http://www.matroska.org/license/lgpl/ for LGPL licensing information.
26 ** Contact license@matroska.org if any conditions of this licensing are
29 **********************************************************************/
33 \version \$Id: EbmlUnicodeString.cpp 1079 2005-03-03 13:18:14Z robux4 $
34 \author Steve Lhomme <robux4 @ users.sf.net>
35 \author Jory Stone <jcsston @ toughguy.net>
40 #if __GNUC__ == 2 && ! defined ( __OpenBSD__ )
44 #include "ebml/EbmlUnicodeString.h"
46 START_LIBEBML_NAMESPACE
48 // ===================== UTFstring class ===================
50 UTFstring::UTFstring()
55 UTFstring::UTFstring(const wchar_t * _aBuf
)
62 UTFstring::~UTFstring()
67 UTFstring::UTFstring(const UTFstring
& _aBuf
)
71 *this = _aBuf
.c_str();
74 UTFstring
& UTFstring::operator=(const UTFstring
& _aBuf
)
76 *this = _aBuf
.c_str();
80 UTFstring
& UTFstring::operator=(const wchar_t * _aBuf
)
84 _Data
= new wchar_t[1];
91 for (aLen
=0; _aBuf
[aLen
] != 0; aLen
++);
93 _Data
= new wchar_t[_Length
+1];
94 for (aLen
=0; _aBuf
[aLen
] != 0; aLen
++) {
95 _Data
[aLen
] = _aBuf
[aLen
];
102 UTFstring
& UTFstring::operator=(wchar_t _aChar
)
105 _Data
= new wchar_t[2];
113 bool UTFstring::operator==(const UTFstring
& _aStr
) const
115 if ((_Data
== NULL
) && (_aStr
._Data
== NULL
))
117 if ((_Data
== NULL
) || (_aStr
._Data
== NULL
))
119 return wcscmp_internal(_Data
, _aStr
._Data
);
122 void UTFstring::SetUTF8(const std::string
& _aStr
)
131 void UTFstring::UpdateFromUTF8()
134 // find the size of the final UCS-2 string
136 for (_Length
=0, i
=0; i
<UTF8string
.length(); _Length
++) {
137 if ((UTF8string
[i
] & 0x80) == 0) {
139 } else if ((UTF8string
[i
] & 0x20) == 0) {
141 } else if ((UTF8string
[i
] & 0x10) == 0) {
145 _Data
= new wchar_t[_Length
+1];
147 for (j
=0, i
=0; i
<UTF8string
.length(); j
++) {
148 if ((UTF8string
[i
] & 0x80) == 0) {
149 _Data
[j
] = UTF8string
[i
];
151 } else if ((UTF8string
[i
] & 0x20) == 0) {
152 _Data
[j
] = ((UTF8string
[i
] & 0x1F) << 6) + (UTF8string
[i
+1] & 0x3F);
154 } else if ((UTF8string
[i
] & 0x10) == 0) {
155 _Data
[j
] = ((UTF8string
[i
] & 0x0F) << 12) + ((UTF8string
[i
+1] & 0x3F) << 6) + (UTF8string
[i
+2] & 0x3F);
162 void UTFstring::UpdateFromUCS2()
164 // find the size of the final UTF-8 string
166 for (i
=0; i
<_Length
; i
++)
168 if (_Data
[i
] < 0x80) {
170 } else if (_Data
[i
] < 0x800) {
172 } else if (_Data
[i
] < 0x10000) {
176 std::string::value_type
*tmpStr
= new std::string::value_type
[Size
+1];
177 for (i
=0, Size
=0; i
<_Length
; i
++)
179 if (_Data
[i
] < 0x80) {
180 tmpStr
[Size
++] = _Data
[i
];
181 } else if (_Data
[i
] < 0x800) {
182 tmpStr
[Size
++] = 0xC0 | (_Data
[i
] >> 6);
183 tmpStr
[Size
++] = 0x80 | (_Data
[i
] & 0x3F);
184 } else if (_Data
[i
] < 0x10000) {
185 tmpStr
[Size
++] = 0xE0 | (_Data
[i
] >> 12);
186 tmpStr
[Size
++] = 0x80 | ((_Data
[i
] >> 6) & 0x3F);
187 tmpStr
[Size
++] = 0x80 | (_Data
[i
] & 0x3F);
191 UTF8string
= tmpStr
; // implicit conversion
196 bool UTFstring::wcscmp_internal(const wchar_t *str1
, const wchar_t *str2
)
199 while (str1
[Index
] == str2
[Index
] && str1
[Index
] != 0) {
202 return (str1
[Index
] == str2
[Index
]);
205 // ===================== EbmlUnicodeString class ===================
207 EbmlUnicodeString::EbmlUnicodeString()
208 :EbmlElement(0, false)
213 EbmlUnicodeString::EbmlUnicodeString(const UTFstring
& aDefaultValue
)
214 :EbmlElement(0, true), Value(aDefaultValue
), DefaultValue(aDefaultValue
)
220 EbmlUnicodeString::EbmlUnicodeString(const EbmlUnicodeString
& ElementToClone
)
221 :EbmlElement(ElementToClone
)
222 ,Value(ElementToClone
.Value
)
223 ,DefaultValue(ElementToClone
.DefaultValue
)
228 \note limited to UCS-2
229 \todo handle exception on errors
231 uint32
EbmlUnicodeString::RenderData(IOCallback
& output
, bool bForceRender
, bool bKeepIntact
)
233 uint32 Result
= Value
.GetUTF8().length();
236 output
.writeFully(Value
.GetUTF8().c_str(), Result
);
239 if (Result
< DefaultSize
) {
240 // pad the rest with 0
241 binary
*Pad
= new binary
[DefaultSize
- Result
];
243 memset(Pad
, 0x00, DefaultSize
- Result
);
244 output
.writeFully(Pad
, DefaultSize
- Result
);
246 Result
= DefaultSize
;
254 EbmlUnicodeString
& EbmlUnicodeString::operator=(const UTFstring
& NewString
)
262 \note limited to UCS-2
264 uint64
EbmlUnicodeString::UpdateSize(bool bKeepIntact
, bool bForceRender
)
266 if (!bKeepIntact
&& IsDefaultValue())
269 Size
= Value
.GetUTF8().length();
270 if (Size
< DefaultSize
)
277 \note limited to UCS-2
279 uint64
EbmlUnicodeString::ReadData(IOCallback
& input
, ScopeMode ReadFully
)
281 if (ReadFully
!= SCOPE_NO_DATA
)
284 Value
= UTFstring::value_type(0);
287 char *Buffer
= new char[Size
+1];
288 if (Buffer
== NULL
) {
289 // impossible to read, skip it
290 input
.setFilePointer(Size
, seek_current
);
292 input
.readFully(Buffer
, Size
);
293 if (Buffer
[Size
-1] != 0) {
297 Value
.SetUTF8(Buffer
); // implicit conversion to std::string
307 END_LIBEBML_NAMESPACE