1 #include "ACEXML/common/FileCharStream.h"
3 #include "ace/Log_Msg.h"
4 #include "ace/OS_NS_stdio.h"
5 #include "ace/OS_NS_sys_stat.h"
6 #include "ace/Truncate.h"
8 #if defined (ACE_USES_WCHAR)
9 # include "ace/OS_NS_wchar.h"
10 #endif /* ACE_USES_WCHAR */
12 ACEXML_FileCharStream::ACEXML_FileCharStream ()
13 : filename_ (0), encoding_ (0), size_ (0), infile_ (0),
14 close_infile_ (true), peek_ (0)
18 ACEXML_FileCharStream::~ACEXML_FileCharStream ()
24 ACEXML_FileCharStream::use_stream_i (FILE* open_file
, const ACEXML_Char
*name
)
26 delete[] this->filename_
;
29 delete[] this->encoding_
;
32 this->infile_
= open_file
;
33 if (this->infile_
== 0)
37 if (ACE_OS::stat (name
, &statbuf
) < 0)
40 this->size_
= ACE_Utils::truncate_cast
<ACE_OFF_T
> (statbuf
.st_size
);
41 this->filename_
= ACE::strnew (name
);
42 return this->determine_encoding();
46 ACEXML_FileCharStream::use_stream (FILE* open_file
, const ACEXML_Char
*name
)
49 ACE_OS::rewind(open_file
);
51 this->close_infile_
= false;
52 return use_stream_i(open_file
, name
);
56 ACEXML_FileCharStream::open (const ACEXML_Char
*name
)
58 this->close_infile_
= true;
59 return use_stream_i(ACE_OS::fopen (name
, ACE_TEXT ("r")), name
);
63 ACEXML_FileCharStream::determine_encoding ()
65 if (this->infile_
== 0)
71 for (; i
< 4 && retval
!= -1; ++i
)
72 retval
= this->getchar_i(input
[i
]);
77 ACE_OS::rewind (this->infile_
);
79 const ACEXML_Char
* temp
= ACEXML_Encoding::get_encoding (input
);
84 delete [] this->encoding_
;
85 this->encoding_
= ACE::strnew (temp
);
86 // ACE_DEBUG ((LM_DEBUG, ACE_TEXT ("File's encoding is %s\n"),
89 // Move over the byte-order-mark if present.
91 for (int j
= 0; j
< 3; ++j
)
93 if (this->getchar_i (ch
) < 0)
95 if (ch
== '\xFF' || ch
== '\xFE' || ch
== '\xEF' || ch
== '\xBB' ||
100 ACE_OS::ungetc (ch
, this->infile_
);
108 ACEXML_FileCharStream::rewind()
110 if (this->infile_
== 0)
112 ACE_OS::rewind (this->infile_
);
113 this->determine_encoding();
117 ACEXML_FileCharStream::available ()
119 if (this->infile_
== 0)
123 if ((curr
= ACE_OS::ftell (this->infile_
)) < 0)
125 return static_cast<int> (this->size_
- curr
);
129 ACEXML_FileCharStream::close ()
131 if (this->infile_
!= 0)
133 if (this->close_infile_
)
135 ACE_OS::fclose (this->infile_
);
139 delete[] this->filename_
;
141 delete[] this->encoding_
;
150 ACEXML_FileCharStream::getchar_i (char& ch
)
152 ch
= static_cast<char> (ACE_OS::fgetc (this->infile_
));
153 return (feof(this->infile_
) ? -1 : 0);
157 ACEXML_FileCharStream::read (ACEXML_Char
*str
,
160 if (this->infile_
== 0)
163 return static_cast<int> (ACE_OS::fread (str
, sizeof (ACEXML_Char
), len
, this->infile_
));
167 ACEXML_FileCharStream::get (ACEXML_Char
& ch
)
169 if (this->infile_
== 0)
171 #if defined (ACE_USES_WCHAR)
172 return this->get_i (ch
);
174 ch
= (ACEXML_Char
) ACE_OS::fgetc (this->infile_
);
175 return (feof(this->infile_
) ? -1 : 0);
176 #endif /* ACE_USES_WCHAR */
180 ACEXML_FileCharStream::peek ()
182 if (this->infile_
== 0)
184 #if defined (ACE_USES_WCHAR)
185 return this->peek_i();
188 ACEXML_Char ch
= static_cast<ACEXML_Char
> (ACE_OS::fgetc (this->infile_
));
189 ACE_OS::ungetc (ch
, this->infile_
);
191 #endif /* ACE_USES_WCHAR */
194 #if defined (ACE_USES_WCHAR)
196 ACEXML_FileCharStream::get_i (ACEXML_Char
& ch
)
198 if (ACE_OS::strcmp (this->encoding_
, ACE_TEXT ("UTF-8")) == 0)
200 ch
= (ACEXML_Char
) ACE_OS::fgetc (this->infile_
);
201 return (feof(this->infile_
) ? -1 : 0);
203 // If we have a value in peek_, return it.
204 if (this->peek_
!= 0)
211 int BE
= (ACE_OS::strcmp (this->encoding_
,
212 ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0;
213 ACEXML_Char input
[2];
215 for (; i
< 2 && !feof (this->infile_
); ++i
)
217 input
[i
] = ACE_OS::fgetwc (this->infile_
);
224 ch
= BE
? input
[0] << 8 | input
[1] : input
[1] << 8 | input
[0];
229 ACEXML_FileCharStream::peek_i ()
231 // If we are reading a UTF-8 encoded file, just use the plain unget.
232 if (ACE_OS::strcmp (this->encoding_
, ACE_TEXT ("UTF-8")) == 0)
234 ACEXML_Char ch
= (ACEXML_Char
) ACE_OS::fgetc (this->infile_
);
235 ACE_OS::ungetc (ch
, this->infile_
);
239 // If somebody had already called peek() and not consumed it, return the
240 // value held in this->peek_.
241 if (this->peek_
!= 0)
244 // Peek into the stream. This reads two characters off the stream, keeps
246 int BE
= (ACE_OS::strcmp (this->encoding_
,
247 ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0;
248 ACEXML_Char input
[2];
250 for (; i
< 2 && !feof (this->infile_
); ++i
)
252 input
[i
] = ACE_OS::fgetwc (this->infile_
);
259 this->peek_
= BE
? input
[0] << 8 | input
[1] : input
[1] << 8 | input
[0];
262 #endif /* ACE_USES_WCHAR */
265 ACEXML_FileCharStream::getEncoding ()
267 return this->encoding_
;
271 ACEXML_FileCharStream::getSystemId ()
273 return this->filename_
;