1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <boost/scoped_array.hpp>
21 #include <tools/stream.hxx>
22 #include <hintids.hxx>
23 #include <rtl/tencinfo.h>
24 #include <sfx2/printer.hxx>
25 #include <editeng/fontitem.hxx>
26 #include <editeng/langitem.hxx>
27 #include <editeng/formatbreakitem.hxx>
28 #include <editeng/scripttypeitem.hxx>
29 #include <shellio.hxx>
31 #include <swtypes.hxx>
36 #include <pagedesc.hxx>
37 #include <breakit.hxx>
39 #include <statstr.hrc> // ResId for the status bar
40 #include <mdiexp.hxx> // ...Percent()
41 #include <poolfmt.hxx>
43 #include "vcl/metric.hxx"
45 #define ASC_BUFFLEN 4096
53 const SwAsciiOptions
& rOpt
;
59 sal_uLong
ReadChars();
60 void InsertText( const String
& rStr
);
63 SwASCIIParser( SwDoc
* pD
, const SwPaM
& rCrsr
, SvStream
& rIn
,
64 int bReadNewDoc
, const SwAsciiOptions
& rOpts
);
67 sal_uLong
CallParser();
71 // Call for the general reader interface
72 sal_uLong
AsciiReader::Read( SwDoc
&rDoc
, const String
&, SwPaM
&rPam
, const String
& )
76 OSL_ENSURE( !this, "ASCII read without a stream" );
77 return ERR_SWG_READ_ERROR
;
80 SwASCIIParser
* pParser
= new SwASCIIParser( &rDoc
, rPam
, *pStrm
,
81 !bInsertMode
, aOpt
.GetASCIIOpts() );
82 sal_uLong nRet
= pParser
->CallParser();
85 // after Read reset the options
86 aOpt
.ResetASCIIOpts();
90 SwASCIIParser::SwASCIIParser(SwDoc
* pD
, const SwPaM
& rCrsr
, SvStream
& rIn
,
91 int bReadNewDoc
, const SwAsciiOptions
& rOpts
)
92 : pDoc(pD
), rInput(rIn
), rOpt(rOpts
), nFileSize(0), nScript(0)
93 , bNewDoc(bReadNewDoc
)
95 pPam
= new SwPaM( *rCrsr
.GetPoint() );
96 pArr
= new sal_Char
[ ASC_BUFFLEN
+ 2 ];
98 pItemSet
= new SfxItemSet( pDoc
->GetAttrPool(),
99 RES_CHRATR_FONT
, RES_CHRATR_LANGUAGE
,
100 RES_CHRATR_CJK_FONT
, RES_CHRATR_CJK_LANGUAGE
,
101 RES_CHRATR_CTL_FONT
, RES_CHRATR_CTL_LANGUAGE
,
104 // set defaults from the options
105 if( rOpt
.GetLanguage() )
107 SvxLanguageItem
aLang( (LanguageType
)rOpt
.GetLanguage(),
108 RES_CHRATR_LANGUAGE
);
109 pItemSet
->Put( aLang
);
110 pItemSet
->Put( aLang
, RES_CHRATR_CJK_LANGUAGE
);
111 pItemSet
->Put( aLang
, RES_CHRATR_CTL_LANGUAGE
);
113 if( rOpt
.GetFontName().Len() )
115 Font
aTextFont( rOpt
.GetFontName(), Size( 0, 10 ) );
116 if( pDoc
->getPrinter( false ) )
117 aTextFont
= pDoc
->getPrinter( false )->GetFontMetric( aTextFont
);
118 SvxFontItem
aFont( aTextFont
.GetFamily(), aTextFont
.GetName(),
119 aEmptyStr
, aTextFont
.GetPitch(), aTextFont
.GetCharSet(), RES_CHRATR_FONT
);
120 pItemSet
->Put( aFont
);
121 pItemSet
->Put( aFont
, RES_CHRATR_CJK_FONT
);
122 pItemSet
->Put( aFont
, RES_CHRATR_CTL_FONT
);
126 SwASCIIParser::~SwASCIIParser()
134 // Calling the parser
135 sal_uLong
SwASCIIParser::CallParser()
137 rInput
.Seek(STREAM_SEEK_TO_END
);
140 nFileSize
= rInput
.Tell();
141 rInput
.Seek(STREAM_SEEK_TO_BEGIN
);
144 ::StartProgress( STR_STATSTR_W4WREAD
, 0, nFileSize
, pDoc
->GetDocShell() );
147 xub_StrLen nSttCntnt
= 0;
150 const SwNodeIndex
& rTmp
= pPam
->GetPoint()->nNode
;
151 pInsPam
= new SwPaM( rTmp
, rTmp
, 0, -1 );
152 nSttCntnt
= pPam
->GetPoint()->nContent
.GetIndex();
155 SwTxtFmtColl
*pColl
= 0;
159 pColl
= pDoc
->GetTxtCollFromPool(RES_POOLCOLL_HTML_PRE
, false);
161 pColl
= pDoc
->GetTxtCollFromPool(RES_POOLCOLL_STANDARD
,false);
163 pDoc
->SetTxtFmtColl(*pPam
, pColl
);
166 sal_uLong nError
= ReadChars();
170 // set only the attribute, for scanned scripts.
171 if( !( SCRIPTTYPE_LATIN
& nScript
))
173 pItemSet
->ClearItem( RES_CHRATR_FONT
);
174 pItemSet
->ClearItem( RES_CHRATR_LANGUAGE
);
176 if( !( SCRIPTTYPE_ASIAN
& nScript
))
178 pItemSet
->ClearItem( RES_CHRATR_CJK_FONT
);
179 pItemSet
->ClearItem( RES_CHRATR_CJK_LANGUAGE
);
181 if( !( SCRIPTTYPE_COMPLEX
& nScript
))
183 pItemSet
->ClearItem( RES_CHRATR_CTL_FONT
);
184 pItemSet
->ClearItem( RES_CHRATR_CTL_LANGUAGE
);
186 if( pItemSet
->Count() )
192 // Using the pool defaults for the font causes significant
193 // trouble for the HTML filter, because it is not able
194 // to export the pool defaults (or to be more precise:
195 // the HTML filter is not able to detect whether a pool
196 // default has changed or not. Even a comparison with the
197 // HTMLi template does not work, because the defaults are
198 // not copied when a new doc is created. The result of
199 // comparing pool defaults therefor would be that the
200 // defaults are exported always if the have changed for
201 // text documents in general. That's not sensible, as well
202 // as it is not sensible to export them always.
203 sal_uInt16 aWhichIds
[4] =
205 RES_CHRATR_FONT
, RES_CHRATR_CJK_FONT
,
206 RES_CHRATR_CTL_FONT
, 0
208 sal_uInt16
*pWhichIds
= aWhichIds
;
211 const SfxPoolItem
*pItem
;
212 if (SFX_ITEM_SET
== pItemSet
->GetItemState(*pWhichIds
,
215 pColl
->SetFmtAttr( *pItem
);
216 pItemSet
->ClearItem( *pWhichIds
);
221 if (pItemSet
->Count())
222 pDoc
->SetDefault(*pItemSet
);
226 // then set over the insert range the defined attributes
227 *pInsPam
->GetMark() = *pPam
->GetPoint();
228 pInsPam
->GetPoint()->nNode
++;
229 pInsPam
->GetPoint()->nContent
.Assign(
230 pInsPam
->GetCntntNode(), nSttCntnt
);
233 OSL_ENSURE( !this, "Have to change - hard attr. to para. style" );
234 pDoc
->InsertItemSet( *pInsPam
, *pItemSet
, 0 );
237 delete pItemSet
, pItemSet
= 0;
242 ::EndProgress( pDoc
->GetDocShell() );
246 sal_uLong
SwASCIIParser::ReadChars()
248 sal_Unicode
*pStt
= 0, *pEnd
= 0, *pLastStt
= 0;
249 long nReadCnt
= 0, nLineLen
= 0;
250 sal_Unicode cLastCR
= 0;
251 bool bSwapUnicode
= false;
253 const SwAsciiOptions
*pUseMe
=&rOpt
;
254 SwAsciiOptions aEmpty
;
255 if (nFileSize
>= 2 &&
256 aEmpty
.GetFontName() == rOpt
.GetFontName() &&
257 aEmpty
.GetCharSet() == rOpt
.GetCharSet() &&
258 aEmpty
.GetLanguage() == rOpt
.GetLanguage() &&
259 aEmpty
.GetParaFlags() == rOpt
.GetParaFlags())
261 sal_uLong nLen
, nOrig
;
262 nOrig
= nLen
= rInput
.Read(pArr
, ASC_BUFFLEN
);
264 bool bRet
= SwIoSystem::IsDetectableText(pArr
, nLen
, &eCharSet
, &bSwapUnicode
);
265 OSL_ENSURE(bRet
, "Autodetect of text import without nag dialog must "
267 if (bRet
&& eCharSet
!= RTL_TEXTENCODING_DONTKNOW
)
269 aEmpty
.SetCharSet(eCharSet
);
270 rInput
.SeekRel(-(long(nLen
)));
273 rInput
.SeekRel(-(long(nOrig
)));
277 rtl_TextToUnicodeConverter hConverter
=0;
278 rtl_TextToUnicodeContext hContext
=0;
279 CharSet currentCharSet
= pUseMe
->GetCharSet();
280 if (RTL_TEXTENCODING_UCS2
!= currentCharSet
)
282 if( currentCharSet
== RTL_TEXTENCODING_DONTKNOW
)
283 currentCharSet
= RTL_TEXTENCODING_ASCII_US
;
284 hConverter
= rtl_createTextToUnicodeConverter( currentCharSet
);
285 OSL_ENSURE( hConverter
, "no string convert available" );
287 return ERROR_SW_READ_BASE
;
288 bSwapUnicode
= false;
289 hContext
= rtl_createTextToUnicodeContext( hConverter
);
291 else if (pUseMe
!= &aEmpty
) //Already successfully figured out type
293 rInput
.StartReadingUnicodeText( currentCharSet
);
294 bSwapUnicode
= rInput
.IsEndianSwap();
297 boost::scoped_array
<sal_Unicode
> aWork
;
298 sal_uLong nArrOffset
= 0;
303 if( pLastStt
!= pStt
)
304 InsertText( OUString( pLastStt
));
308 if( SVSTREAM_OK
!= rInput
.GetError() || 0 == (lGCount
=
309 rInput
.Read( pArr
+ nArrOffset
,
310 ASC_BUFFLEN
- nArrOffset
)))
311 break; // break from the while loop
314 If there was some unconverted bytes on the last cycle then they
315 were put at the beginning of the array, so total bytes available
316 to convert this cycle includes them. If we found 0 following bytes
317 then we ignore the previous partial character.
324 sal_Size nNewLen
= lGCount
, nCntBytes
;
325 aWork
.reset(new sal_Unicode
[nNewLen
+ 1]); // add 1 for '\0'
326 sal_Unicode
* pBuf
= aWork
.get();
328 nNewLen
= rtl_convertTextToUnicode( hConverter
, hContext
,
329 pArr
, lGCount
, pBuf
, nNewLen
,
331 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT
|
332 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT
|
333 RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT
|
334 RTL_TEXTTOUNICODE_FLAGS_GLOBAL_SIGNATURE
338 if( 0 != ( nArrOffset
= lGCount
- nCntBytes
) )
339 memmove( pArr
, pArr
+ nCntBytes
, nArrOffset
);
341 pStt
= pLastStt
= aWork
.get();
342 pEnd
= pStt
+ nNewLen
;
346 pStt
= pLastStt
= (sal_Unicode
*)pArr
;
347 pEnd
= (sal_Unicode
*)(pArr
+ lGCount
);
351 sal_Char
* pF
= pArr
, *pN
= pArr
+ 1;
352 for( sal_uLong n
= 0; n
< lGCount
; n
+= 2, pF
+= 2, pN
+= 2 )
364 ::SetProgressState( nReadCnt
, pDoc
->GetDocShell() );
368 if( 0x0a == *pStt
&& 0x0d == cLastCR
)
372 // We skip the last one at the end
373 if( !rInput
.IsEof() || !(pEnd
== pStt
||
374 ( !*pEnd
&& pEnd
== pStt
+1 ) ) )
375 pDoc
->SplitNode( *pPam
->GetPoint(), false );
379 bool bIns
= true, bSplitNode
= false;
383 case 0x0a: if( LINEEND_LF
== pUseMe
->GetParaFlags() )
389 // We skip the last one at the end
390 if( !rInput
.IsEof() || pEnd
!= pStt
)
395 case 0x0d: if( LINEEND_LF
!= pUseMe
->GetParaFlags() )
401 bool bChkSplit
= false;
402 if( LINEEND_CRLF
== pUseMe
->GetParaFlags() )
406 else if( 0x0a == *pStt
)
415 // We skip the last one at the end
416 if( bChkSplit
&& ( !rInput
.IsEof() || pEnd
!= pStt
))
423 // Insert a hard page break
427 InsertText( OUString( pLastStt
));
429 pDoc
->SplitNode( *pPam
->GetPoint(), false );
430 pDoc
->InsertPoolItem( *pPam
, SvxFmtBreakItem(
431 SVX_BREAK_PAGE_BEFORE
, RES_BREAK
), 0);
439 if( nReadCnt
== nFileSize
&& pStt
+1 == pEnd
)
442 *pStt
= '#'; // Replacement visualisation
449 // Found control char, replace with '#'
456 if( ( nLineLen
>= MAX_ASCII_PARA
- 100 ) &&
457 ( ( *pStt
== ' ' ) || ( nLineLen
>= MAX_ASCII_PARA
- 1 ) ) )
459 sal_Unicode c
= *pStt
;
461 InsertText( OUString( pLastStt
));
462 pDoc
->SplitNode( *pPam
->GetPoint(), false );
470 else if( bSplitNode
)
472 // We found a CR/LF, thus save the text
473 InsertText( OUString( pLastStt
));
474 pDoc
->SplitNode( *pPam
->GetPoint(), false );
482 rtl_destroyTextToUnicodeContext( hConverter
, hContext
);
483 rtl_destroyTextToUnicodeConverter( hConverter
);
488 void SwASCIIParser::InsertText( const String
& rStr
)
490 pDoc
->InsertString( *pPam
, rStr
);
491 pDoc
->UpdateRsid( *pPam
, rStr
.Len() );
492 pDoc
->UpdateParRsid( pPam
->GetPoint()->nNode
.GetNode().GetTxtNode() );
494 if( pItemSet
&& g_pBreakIt
&& nScript
!= ( SCRIPTTYPE_LATIN
|
496 SCRIPTTYPE_COMPLEX
) )
497 nScript
|= g_pBreakIt
->GetAllScriptsOfText( rStr
);
500 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */