scide: implement selectionLength for openDocument
[supercollider.git] / external_libraries / yaml-cpp-0.3.0 / src / stream.cpp
blob5cfb1bbf104fadeeb9925081875621902cb289b3
1 #include "stream.h"
2 #include <iostream>
3 #include "exp.h"
5 #ifndef YAML_PREFETCH_SIZE
6 #define YAML_PREFETCH_SIZE 2048
7 #endif
9 #define S_ARRAY_SIZE( A ) (sizeof(A)/sizeof(*(A)))
10 #define S_ARRAY_END( A ) ((A) + S_ARRAY_SIZE(A))
12 #define CP_REPLACEMENT_CHARACTER (0xFFFD)
14 namespace YAML
16 enum UtfIntroState {
17 uis_start,
18 uis_utfbe_b1,
19 uis_utf32be_b2,
20 uis_utf32be_bom3,
21 uis_utf32be,
22 uis_utf16be,
23 uis_utf16be_bom1,
24 uis_utfle_bom1,
25 uis_utf16le_bom2,
26 uis_utf32le_bom3,
27 uis_utf16le,
28 uis_utf32le,
29 uis_utf8_imp,
30 uis_utf16le_imp,
31 uis_utf32le_imp3,
32 uis_utf8_bom1,
33 uis_utf8_bom2,
34 uis_utf8,
35 uis_error
38 enum UtfIntroCharType {
39 uict00,
40 uictBB,
41 uictBF,
42 uictEF,
43 uictFE,
44 uictFF,
45 uictAscii,
46 uictOther,
47 uictMax
50 static bool s_introFinalState[] = {
51 false, //uis_start
52 false, //uis_utfbe_b1
53 false, //uis_utf32be_b2
54 false, //uis_utf32be_bom3
55 true, //uis_utf32be
56 true, //uis_utf16be
57 false, //uis_utf16be_bom1
58 false, //uis_utfle_bom1
59 false, //uis_utf16le_bom2
60 false, //uis_utf32le_bom3
61 true, //uis_utf16le
62 true, //uis_utf32le
63 false, //uis_utf8_imp
64 false, //uis_utf16le_imp
65 false, //uis_utf32le_imp3
66 false, //uis_utf8_bom1
67 false, //uis_utf8_bom2
68 true, //uis_utf8
69 true, //uis_error
72 static UtfIntroState s_introTransitions[][uictMax] = {
73 // uict00, uictBB, uictBF, uictEF, uictFE, uictFF, uictAscii, uictOther
74 {uis_utfbe_b1, uis_utf8, uis_utf8, uis_utf8_bom1, uis_utf16be_bom1, uis_utfle_bom1, uis_utf8_imp, uis_utf8},
75 {uis_utf32be_b2, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf16be, uis_utf8},
76 {uis_utf32be, uis_utf8, uis_utf8, uis_utf8, uis_utf32be_bom3, uis_utf8, uis_utf8, uis_utf8},
77 {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf32be, uis_utf8, uis_utf8},
78 {uis_utf32be, uis_utf32be, uis_utf32be, uis_utf32be, uis_utf32be, uis_utf32be, uis_utf32be, uis_utf32be},
79 {uis_utf16be, uis_utf16be, uis_utf16be, uis_utf16be, uis_utf16be, uis_utf16be, uis_utf16be, uis_utf16be},
80 {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf16be, uis_utf8, uis_utf8},
81 {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf16le_bom2, uis_utf8, uis_utf8, uis_utf8},
82 {uis_utf32le_bom3, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le},
83 {uis_utf32le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le},
84 {uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le},
85 {uis_utf32le, uis_utf32le, uis_utf32le, uis_utf32le, uis_utf32le, uis_utf32le, uis_utf32le, uis_utf32le},
86 {uis_utf16le_imp, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8},
87 {uis_utf32le_imp3, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le},
88 {uis_utf32le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le},
89 {uis_utf8, uis_utf8_bom2, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8},
90 {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8},
91 {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8},
94 static char s_introUngetCount[][uictMax] = {
95 // uict00, uictBB, uictBF, uictEF, uictFE, uictFF, uictAscii, uictOther
96 {0, 1, 1, 0, 0, 0, 0, 1},
97 {0, 2, 2, 2, 2, 2, 2, 2},
98 {3, 3, 3, 3, 0, 3, 3, 3},
99 {4, 4, 4, 4, 4, 0, 4, 4},
100 {1, 1, 1, 1, 1, 1, 1, 1},
101 {1, 1, 1, 1, 1, 1, 1, 1},
102 {2, 2, 2, 2, 2, 0, 2, 2},
103 {2, 2, 2, 2, 0, 2, 2, 2},
104 {0, 1, 1, 1, 1, 1, 1, 1},
105 {0, 2, 2, 2, 2, 2, 2, 2},
106 {1, 1, 1, 1, 1, 1, 1, 1},
107 {1, 1, 1, 1, 1, 1, 1, 1},
108 {0, 2, 2, 2, 2, 2, 2, 2},
109 {0, 3, 3, 3, 3, 3, 3, 3},
110 {4, 4, 4, 4, 4, 4, 4, 4},
111 {2, 0, 2, 2, 2, 2, 2, 2},
112 {3, 3, 0, 3, 3, 3, 3, 3},
113 {1, 1, 1, 1, 1, 1, 1, 1},
116 inline UtfIntroCharType IntroCharTypeOf(std::istream::int_type ch)
118 if (std::istream::traits_type::eof() == ch) {
119 return uictOther;
122 switch (ch) {
123 case 0: return uict00;
124 case 0xBB: return uictBB;
125 case 0xBF: return uictBF;
126 case 0xEF: return uictEF;
127 case 0xFE: return uictFE;
128 case 0xFF: return uictFF;
131 if ((ch > 0) && (ch < 0xFF)) {
132 return uictAscii;
135 return uictOther;
138 inline char Utf8Adjust(unsigned long ch, unsigned char lead_bits, unsigned char rshift)
140 const unsigned char header = ((1 << lead_bits) - 1) << (8 - lead_bits);
141 const unsigned char mask = (0xFF >> (lead_bits + 1));
142 return static_cast<char>(static_cast<unsigned char>(
143 header | ((ch >> rshift) & mask)
147 inline void QueueUnicodeCodepoint(std::deque<char>& q, unsigned long ch)
149 // We are not allowed to queue the Stream::eof() codepoint, so
150 // replace it with CP_REPLACEMENT_CHARACTER
151 if (static_cast<unsigned long>(Stream::eof()) == ch)
153 ch = CP_REPLACEMENT_CHARACTER;
156 if (ch < 0x80)
158 q.push_back(Utf8Adjust(ch, 0, 0));
160 else if (ch < 0x800)
162 q.push_back(Utf8Adjust(ch, 2, 6));
163 q.push_back(Utf8Adjust(ch, 1, 0));
165 else if (ch < 0x10000)
167 q.push_back(Utf8Adjust(ch, 3, 12));
168 q.push_back(Utf8Adjust(ch, 1, 6));
169 q.push_back(Utf8Adjust(ch, 1, 0));
171 else
173 q.push_back(Utf8Adjust(ch, 4, 18));
174 q.push_back(Utf8Adjust(ch, 1, 12));
175 q.push_back(Utf8Adjust(ch, 1, 6));
176 q.push_back(Utf8Adjust(ch, 1, 0));
180 Stream::Stream(std::istream& input)
181 : m_input(input),
182 m_pPrefetched(new unsigned char[YAML_PREFETCH_SIZE]),
183 m_nPrefetchedAvailable(0), m_nPrefetchedUsed(0)
185 typedef std::istream::traits_type char_traits;
187 if(!input)
188 return;
190 // Determine (or guess) the character-set by reading the BOM, if any. See
191 // the YAML specification for the determination algorithm.
192 char_traits::int_type intro[4];
193 int nIntroUsed = 0;
194 UtfIntroState state = uis_start;
195 for(; !s_introFinalState[state]; ) {
196 std::istream::int_type ch = input.get();
197 intro[nIntroUsed++] = ch;
198 UtfIntroCharType charType = IntroCharTypeOf(ch);
199 UtfIntroState newState = s_introTransitions[state][charType];
200 int nUngets = s_introUngetCount[state][charType];
201 if(nUngets > 0) {
202 input.clear();
203 for(; nUngets > 0; --nUngets) {
204 if(char_traits::eof() != intro[--nIntroUsed])
205 input.putback(char_traits::to_char_type(intro[nIntroUsed]));
208 state = newState;
211 switch (state) {
212 case uis_utf8: m_charSet = utf8; break;
213 case uis_utf16le: m_charSet = utf16le; break;
214 case uis_utf16be: m_charSet = utf16be; break;
215 case uis_utf32le: m_charSet = utf32le; break;
216 case uis_utf32be: m_charSet = utf32be; break;
217 default: m_charSet = utf8; break;
220 ReadAheadTo(0);
223 Stream::~Stream()
225 delete[] m_pPrefetched;
228 char Stream::peek() const
230 if (m_readahead.empty())
232 return Stream::eof();
235 return m_readahead[0];
238 Stream::operator bool() const
240 return m_input.good() || (!m_readahead.empty() && m_readahead[0] != Stream::eof());
243 // get
244 // . Extracts a character from the stream and updates our position
245 char Stream::get()
247 char ch = peek();
248 AdvanceCurrent();
249 m_mark.column++;
251 if(ch == '\n') {
252 m_mark.column = 0;
253 m_mark.line++;
256 return ch;
259 // get
260 // . Extracts 'n' characters from the stream and updates our position
261 std::string Stream::get(int n)
263 std::string ret;
264 ret.reserve(n);
265 for(int i=0;i<n;i++)
266 ret += get();
267 return ret;
270 // eat
271 // . Eats 'n' characters and updates our position.
272 void Stream::eat(int n)
274 for(int i=0;i<n;i++)
275 get();
278 void Stream::AdvanceCurrent()
280 if (!m_readahead.empty())
282 m_readahead.pop_front();
283 m_mark.pos++;
286 ReadAheadTo(0);
289 bool Stream::_ReadAheadTo(size_t i) const
291 while (m_input.good() && (m_readahead.size() <= i))
293 switch (m_charSet)
295 case utf8: StreamInUtf8(); break;
296 case utf16le: StreamInUtf16(); break;
297 case utf16be: StreamInUtf16(); break;
298 case utf32le: StreamInUtf32(); break;
299 case utf32be: StreamInUtf32(); break;
303 // signal end of stream
304 if(!m_input.good())
305 m_readahead.push_back(Stream::eof());
307 return m_readahead.size() > i;
310 void Stream::StreamInUtf8() const
312 unsigned char b = GetNextByte();
313 if (m_input.good())
315 m_readahead.push_back(b);
319 void Stream::StreamInUtf16() const
321 unsigned long ch = 0;
322 unsigned char bytes[2];
323 int nBigEnd = (m_charSet == utf16be) ? 0 : 1;
325 bytes[0] = GetNextByte();
326 bytes[1] = GetNextByte();
327 if (!m_input.good())
329 return;
331 ch = (static_cast<unsigned long>(bytes[nBigEnd]) << 8) |
332 static_cast<unsigned long>(bytes[1 ^ nBigEnd]);
334 if (ch >= 0xDC00 && ch < 0xE000)
336 // Trailing (low) surrogate...ugh, wrong order
337 QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER);
338 return;
340 else if (ch >= 0xD800 && ch < 0xDC00)
342 // ch is a leading (high) surrogate
344 // Four byte UTF-8 code point
346 // Read the trailing (low) surrogate
347 for (;;)
349 bytes[0] = GetNextByte();
350 bytes[1] = GetNextByte();
351 if (!m_input.good())
353 QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER);
354 return;
356 unsigned long chLow = (static_cast<unsigned long>(bytes[nBigEnd]) << 8) |
357 static_cast<unsigned long>(bytes[1 ^ nBigEnd]);
358 if (chLow < 0xDC00 || ch >= 0xE000)
360 // Trouble...not a low surrogate. Dump a REPLACEMENT CHARACTER into the stream.
361 QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER);
363 // Deal with the next UTF-16 unit
364 if (chLow < 0xD800 || ch >= 0xE000)
366 // Easiest case: queue the codepoint and return
367 QueueUnicodeCodepoint(m_readahead, ch);
368 return;
370 else
372 // Start the loop over with the new high surrogate
373 ch = chLow;
374 continue;
378 // Select the payload bits from the high surrogate
379 ch &= 0x3FF;
380 ch <<= 10;
382 // Include bits from low surrogate
383 ch |= (chLow & 0x3FF);
385 // Add the surrogacy offset
386 ch += 0x10000;
390 QueueUnicodeCodepoint(m_readahead, ch);
393 inline char* ReadBuffer(unsigned char* pBuffer)
395 return reinterpret_cast<char*>(pBuffer);
398 unsigned char Stream::GetNextByte() const
400 if (m_nPrefetchedUsed >= m_nPrefetchedAvailable)
402 std::streambuf *pBuf = m_input.rdbuf();
403 m_nPrefetchedAvailable = pBuf->sgetn(ReadBuffer(m_pPrefetched),
404 YAML_PREFETCH_SIZE);
405 m_nPrefetchedUsed = 0;
406 if (!m_nPrefetchedAvailable)
408 m_input.setstate(std::ios_base::eofbit);
411 if (0 == m_nPrefetchedAvailable)
413 return 0;
417 return m_pPrefetched[m_nPrefetchedUsed++];
420 void Stream::StreamInUtf32() const
422 static int indexes[2][4] = {
423 {3, 2, 1, 0},
424 {0, 1, 2, 3}
427 unsigned long ch = 0;
428 unsigned char bytes[4];
429 int* pIndexes = (m_charSet == utf32be) ? indexes[1] : indexes[0];
431 bytes[0] = GetNextByte();
432 bytes[1] = GetNextByte();
433 bytes[2] = GetNextByte();
434 bytes[3] = GetNextByte();
435 if (!m_input.good())
437 return;
440 for (int i = 0; i < 4; ++i)
442 ch <<= 8;
443 ch |= bytes[pIndexes[i]];
446 QueueUnicodeCodepoint(m_readahead, ch);