2 * Copyright (C) 2003-2006 Gabest
3 * http://www.gabest.org
5 * This Program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2, or (at your option)
10 * This Program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with GNU Make; see the file COPYING. If not, write to
17 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
18 * http://www.gnu.org/copyleft/gpl.html
28 CTextFile::CTextFile(enc e
)
30 , m_defaultencoding(e
)
35 bool CTextFile::Open(LPCTSTR lpszFileName
)
37 if (!__super::Open(lpszFileName
, modeRead
| typeBinary
| shareDenyNone
)) {
41 m_encoding
= m_defaultencoding
;
44 if(__super::GetLength() >= 2)
47 if(sizeof(w
) != Read(&w
, sizeof(w
)))
48 return Close(), false;
60 else if(w
== 0xbbef && __super::GetLength() >= 3)
63 if(sizeof(b
) != Read(&b
, sizeof(b
)))
64 return Close(), false;
74 if (m_encoding
== ASCII
) {
75 if (!ReopenAsText()) {
78 } else if (m_offset
== 0) { // No BOM detected, ensure the file is read from the beginning
85 bool CTextFile::ReopenAsText()
87 __super::Close(); // CWebTextFile::Close() would delete the temp file if we called it...
89 return __super::Open(m_strFileName
, modeRead
| typeText
| shareDenyNone
)==TRUE
;
92 bool CTextFile::Save(LPCTSTR lpszFileName
, enc e
)
94 if(!__super::Open(lpszFileName
, modeCreate
|modeWrite
|shareDenyWrite
|(e
==ASCII
?typeText
:typeBinary
)))
99 BYTE b
[3] = {0xef,0xbb,0xbf};
104 BYTE b
[2] = {0xff,0xfe};
109 BYTE b
[2] = {0xfe,0xff};
118 void CTextFile::SetEncoding(enc e
)
123 CTextFile::enc
CTextFile::GetEncoding()
128 bool CTextFile::IsUnicode()
130 return m_encoding
== UTF8
|| m_encoding
== LE16
|| m_encoding
== BE16
;
135 CString
CTextFile::GetFilePath() const
137 // to avoid a CException coming from CTime
138 return m_strFileName
; // __super::GetFilePath();
143 ULONGLONG
CTextFile::GetPosition() const
145 return(CStdioFile::GetPosition() - m_offset
);
148 ULONGLONG
CTextFile::GetLength() const
150 return(CStdioFile::GetLength() - m_offset
);
153 ULONGLONG
CTextFile::Seek(LONGLONG lOff
, UINT nFrom
)
155 ULONGLONG pos
= GetPosition();
156 ULONGLONG len
= GetLength();
161 case begin
: lOff
= lOff
; break;
162 case current
: lOff
= pos
+ lOff
; break;
163 case end
: lOff
= len
- lOff
; break;
166 lOff
= max(min((ULONGLONG
)lOff
, len
), 0) + m_offset
;
168 pos
= CStdioFile::Seek(lOff
, begin
) - m_offset
;
173 void CTextFile::WriteString(LPCSTR lpsz
/*CStringA str*/)
177 if(m_encoding
== ASCII
)
179 __super::WriteString(AToT(str
));
181 else if(m_encoding
== ANSI
)
183 str
.Replace("\n", "\r\n");
184 Write((LPCSTR
)str
, str
.GetLength());
186 else if(m_encoding
== UTF8
)
188 WriteString(AToW(str
));
190 else if(m_encoding
== LE16
)
192 WriteString(AToW(str
));
194 else if(m_encoding
== BE16
)
196 WriteString(AToW(str
));
200 void CTextFile::WriteString(LPCWSTR lpsz
/*CStringW str*/)
204 if(m_encoding
== ASCII
)
206 __super::WriteString(WToT(str
));
208 else if(m_encoding
== ANSI
)
210 str
.Replace(L
"\n", L
"\r\n");
211 CStringA stra
= CStringA(CString(str
)); // TODO: codepage
212 Write((LPCSTR
)stra
, stra
.GetLength());
214 else if(m_encoding
== UTF8
)
216 str
.Replace(L
"\n", L
"\r\n");
217 for (unsigned int i
= 0, l
= str
.GetLength(); i
< l
; i
++)
219 DWORD c
= (WORD
)str
[i
];
221 if(0 <= c
&& c
< 0x80) // 0xxxxxxx
225 else if(0x80 <= c
&& c
< 0x800) // 110xxxxx 10xxxxxx
227 c
= 0xc080|((c
<<2)&0x1f00)|(c
&0x003f);
228 Write((BYTE
*)&c
+1, 1);
231 else if(0x800 <= c
&& c
< 0xFFFF) // 1110xxxx 10xxxxxx 10xxxxxx
233 c
= 0xe08080|((c
<<4)&0x0f0000)|((c
<<2)&0x3f00)|(c
&0x003f);
234 Write((BYTE
*)&c
+2, 1);
235 Write((BYTE
*)&c
+1, 1);
245 else if(m_encoding
== LE16
)
247 str
.Replace(L
"\n", L
"\r\n");
248 Write((LPCWSTR
)str
, str
.GetLength()*2);
250 else if(m_encoding
== BE16
)
252 str
.Replace(L
"\n", L
"\r\n");
253 for (unsigned int i
= 0, l
= str
.GetLength(); i
< l
; i
++) {
254 str
.SetAt(i
, ((str
[i
] >> 8) & 0x00ff) | ((str
[i
] << 8) & 0xff00));
256 Write((LPCWSTR
)str
, str
.GetLength() * 2);
260 BOOL
CTextFile::ReadString(CStringA
& str
)
266 if(m_encoding
== ASCII
)
269 fEOF
= !__super::ReadString(s
);
271 // For consistency with other encodings, we continue reading
272 // the file even when a NUL char is encountered.
274 while (fEOF
&& (Read(&c
, sizeof(c
)) == sizeof(c
))) {
276 fEOF
= !__super::ReadString(s
);
280 else if(m_encoding
== ANSI
)
283 while(Read(&c
, sizeof(c
)) == sizeof(c
))
286 if(c
== '\r') continue;
291 else if(m_encoding
== UTF8
)
297 while (Read(&buffer
[0], sizeof(buffer
[0])) == sizeof(buffer
[0]))
303 if (Utf8::isSingleByte(buffer
[0]))
305 c
= buffer
[0] & 0x7f;
307 else if (Utf8::isFirstOfMultibyte(buffer
[0]))
309 int nContinuationBytes
= Utf8::continuationBytes(buffer
[0]);
310 bValid
= (nContinuationBytes
<= 2);
312 // We don't support characters wider than 16 bits
314 UINT nRead
= Read(&buffer
[1], nContinuationBytes
* sizeof(buffer
[1]));
315 nBytesRead
+= nContinuationBytes
;
316 bValid
= (nRead
== nContinuationBytes
* sizeof(buffer
[1]));
319 for (int i
= 0; i
< nContinuationBytes
; i
++) {
320 if (!Utf8::isContinuation(buffer
[i
+ 1])) {
325 switch (nContinuationBytes
) {
327 c
= buffer
[0] & 0x7f;
329 case 1: // 110xxxxx 10xxxxxx
330 case 2: // 1110xxxx 10xxxxxx 10xxxxxx
331 // Unsupported for non unicode strings
354 // Switch to text and read again
356 // Rewind to the end of the line and save the position
357 Seek(-nBytesRead
, current
);
358 ULONGLONG currentPosition
= GetPosition();
360 fEOF
= !ReopenAsText();
364 // Seek back at the beginning of the line where we stopped
365 Seek(currentPosition
, begin
);
367 fEOF
= !ReadString(str
);
374 else if (m_encoding
== LE16
)
377 while(Read(&w
, sizeof(w
)) == sizeof(w
))
381 if(!(w
&0xff00)) c
= w
&0xff;
382 if(c
== '\r') continue;
387 else if(m_encoding
== BE16
)
390 while(Read(&w
, sizeof(w
)) == sizeof(w
))
394 if(!(w
&0xff)) c
= w
>>8;
395 if(c
== '\r') continue;
404 BOOL
CTextFile::ReadString(CStringW
& str
)
410 if(m_encoding
== ASCII
)
413 fEOF
= !__super::ReadString(s
);
415 // For consistency with other encodings, we continue reading
416 // the file even when a NUL char is encountered.
418 while (fEOF
&& (Read(&c
, sizeof(c
)) == sizeof(c
))) {
420 fEOF
= !__super::ReadString(s
);
424 else if(m_encoding
== ANSI
)
428 while(Read(&c
, sizeof(c
)) == sizeof(c
))
431 if(c
== '\r') continue;
435 str
= CStringW(CString(stra
)); // TODO: codepage
437 else if(m_encoding
== UTF8
)
443 while (Read(&buffer
[0], sizeof(buffer
[0])) == sizeof(buffer
[0])) {
448 if (Utf8::isSingleByte(buffer
[0])) { // 0xxxxxxx
449 c
= buffer
[0] & 0x7f;
450 } else if (Utf8::isFirstOfMultibyte(buffer
[0])) {
451 int nContinuationBytes
= Utf8::continuationBytes(buffer
[0]);
452 bValid
= (nContinuationBytes
<= 2);
454 // We don't support characters wider than 16 bits
456 UINT nRead
= Read(&buffer
[1], nContinuationBytes
* sizeof(buffer
[1]));
457 nBytesRead
+= nContinuationBytes
;
458 bValid
= (nRead
== nContinuationBytes
* sizeof(buffer
[1]));
461 for (int i
= 0; i
< nContinuationBytes
; i
++) {
462 if (!Utf8::isContinuation(buffer
[i
+ 1])) {
467 switch (nContinuationBytes
) {
469 c
= buffer
[0] & 0x7f;
471 case 1: // 110xxxxx 10xxxxxx
472 c
= (buffer
[0] & 0x1f) << 6 | (buffer
[1] & 0x3f);
474 case 2: // 1110xxxx 10xxxxxx 10xxxxxx
475 c
= (buffer
[0] & 0x0f) << 12 | (buffer
[1] & 0x3f) << 6 | (buffer
[2] & 0x3f);
493 // Switch to text and read again
495 // Rewind to the end of the line and save the position
496 Seek(-nBytesRead
, current
);
497 ULONGLONG currentPosition
= GetPosition();
499 fEOF
= !ReopenAsText();
502 // Seek back to the beginning of the line where we stopped
503 Seek(currentPosition
, begin
);
505 fEOF
= !ReadString(str
);
512 else if(m_encoding
== LE16
)
515 while(Read(&wc
, sizeof(wc
)) == sizeof(wc
))
518 if(wc
== '\r') continue;
519 if(wc
== '\n') break;
523 else if(m_encoding
== BE16
)
526 while(Read(&wc
, sizeof(wc
)) == sizeof(wc
))
529 wc
= ((wc
>>8)&0x00ff)|((wc
<<8)&0xff00);
530 if(wc
== '\r') continue;
531 if(wc
== '\n') break;
539 UINT
CTextFile::Read( void* lpBuf
, UINT nCount
)
541 return __super::Read(lpBuf
,nCount
);
547 CWebTextFile::CWebTextFile(CTextFile::enc e
, LONGLONG llMaxSize
)
549 , m_llMaxSize(llMaxSize
)
553 bool CWebTextFile::Open(LPCTSTR lpszFileName
)
555 CString
fn(lpszFileName
);
557 if(fn
.Find(_T("http://")) != 0)
558 return __super::Open(lpszFileName
);
564 CAutoPtr
<CStdioFile
> f(is
.OpenURL(fn
, 1, INTERNET_FLAG_TRANSFER_BINARY
|INTERNET_FLAG_EXISTING_CONNECT
));
565 if(!f
) return(false);
567 TCHAR path
[MAX_PATH
];
568 GetTempPath(MAX_PATH
, path
);
570 fn
= path
+ fn
.Mid(fn
.ReverseFind('/')+1);
571 int i
= fn
.Find(_T("?"));
572 if(i
> 0) fn
= fn
.Left(i
);
574 if(!temp
.Open(fn
, modeCreate
|modeWrite
|typeBinary
|shareDenyWrite
))
582 while((len
= f
->Read(buff
, 1024)) == 1024 && (m_llMaxSize
< 0 || (total
+=1024) < m_llMaxSize
))
583 temp
.Write(buff
, len
);
584 if(len
> 0) temp
.Write(buff
, len
);
588 f
->Close(); // must close it because the desctructor doesn't seem to do it and we will get an exception when "is" is destroying
590 catch(CInternetException
* ie
)
596 return __super::Open(m_tempfn
);
599 bool CWebTextFile::Save(LPCTSTR lpszFileName
, enc e
)
601 // CWebTextFile is read-only...
606 void CWebTextFile::Close()
610 if(!m_tempfn
.IsEmpty())
617 ///////////////////////////////////////////////////////////////
619 CStringW
AToW(const CStringA
& str
)
622 for(int i
= 0, j
= str
.GetLength(); i
< j
; i
++)
623 ret
+= (WCHAR
)(BYTE
)str
[i
];
627 CStringA
WToA(const CStringW
& str
)
630 for(int i
= 0, j
= str
.GetLength(); i
< j
; i
++)
631 ret
+= (CHAR
)(WORD
)str
[i
];
635 CString
AToT(const CStringA
& str
)
638 for(int i
= 0, j
= str
.GetLength(); i
< j
; i
++)
639 ret
+= (TCHAR
)(BYTE
)str
[i
];
643 CString
WToT(const CStringW
& str
)
649 for(int i
= 0, j
= str
.GetLength(); i
< j
; i
++)
650 ret
+= (TCHAR
)(WORD
)str
[i
];
655 CStringA
TToA(const CString
& str
)
659 for(int i
= 0, j
= str
.GetLength(); i
< j
; i
++)
660 ret
+= (CHAR
)(BYTE
)str
[i
];
667 CStringW
TToW(const CString
& str
)
673 for(size_t i
= 0, j
= str
.GetLength(); i
< j
; i
++)
674 ret
+= (WCHAR
)(BYTE
)str
[i
];