bump product version to 4.1.6.2
[LibreOffice.git] / sw / source / filter / ascii / parasc.cxx
bloba6800fdc8e36925c285271d991a5a8353da15b41
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <boost/scoped_array.hpp>
21 #include <tools/stream.hxx>
22 #include <hintids.hxx>
23 #include <rtl/tencinfo.h>
24 #include <sfx2/printer.hxx>
25 #include <editeng/fontitem.hxx>
26 #include <editeng/langitem.hxx>
27 #include <editeng/formatbreakitem.hxx>
28 #include <editeng/scripttypeitem.hxx>
29 #include <shellio.hxx>
30 #include <doc.hxx>
31 #include <swtypes.hxx>
32 #include <ndtxt.hxx>
33 #include <pam.hxx>
34 #include <frmatr.hxx>
35 #include <fltini.hxx>
36 #include <pagedesc.hxx>
37 #include <breakit.hxx>
38 #include <swerror.h>
39 #include <statstr.hrc> // ResId for the status bar
40 #include <mdiexp.hxx> // ...Percent()
41 #include <poolfmt.hxx>
43 #include "vcl/metric.hxx"
45 #define ASC_BUFFLEN 4096
47 class SwASCIIParser
49 SwDoc* pDoc;
50 SwPaM* pPam;
51 SvStream& rInput;
52 sal_Char* pArr;
53 const SwAsciiOptions& rOpt;
54 SfxItemSet* pItemSet;
55 long nFileSize;
56 sal_uInt16 nScript;
57 bool bNewDoc;
59 sal_uLong ReadChars();
60 void InsertText( const String& rStr );
62 public:
63 SwASCIIParser( SwDoc* pD, const SwPaM& rCrsr, SvStream& rIn,
64 int bReadNewDoc, const SwAsciiOptions& rOpts );
65 ~SwASCIIParser();
67 sal_uLong CallParser();
71 // Call for the general reader interface
72 sal_uLong AsciiReader::Read( SwDoc &rDoc, const String&, SwPaM &rPam, const String & )
74 if( !pStrm )
76 OSL_ENSURE( !this, "ASCII read without a stream" );
77 return ERR_SWG_READ_ERROR;
80 SwASCIIParser* pParser = new SwASCIIParser( &rDoc, rPam, *pStrm,
81 !bInsertMode, aOpt.GetASCIIOpts() );
82 sal_uLong nRet = pParser->CallParser();
84 delete pParser;
85 // after Read reset the options
86 aOpt.ResetASCIIOpts();
87 return nRet;
90 SwASCIIParser::SwASCIIParser(SwDoc* pD, const SwPaM& rCrsr, SvStream& rIn,
91 int bReadNewDoc, const SwAsciiOptions& rOpts)
92 : pDoc(pD), rInput(rIn), rOpt(rOpts), nFileSize(0), nScript(0)
93 , bNewDoc(bReadNewDoc)
95 pPam = new SwPaM( *rCrsr.GetPoint() );
96 pArr = new sal_Char [ ASC_BUFFLEN + 2 ];
98 pItemSet = new SfxItemSet( pDoc->GetAttrPool(),
99 RES_CHRATR_FONT, RES_CHRATR_LANGUAGE,
100 RES_CHRATR_CJK_FONT, RES_CHRATR_CJK_LANGUAGE,
101 RES_CHRATR_CTL_FONT, RES_CHRATR_CTL_LANGUAGE,
102 0 );
104 // set defaults from the options
105 if( rOpt.GetLanguage() )
107 SvxLanguageItem aLang( (LanguageType)rOpt.GetLanguage(),
108 RES_CHRATR_LANGUAGE );
109 pItemSet->Put( aLang );
110 pItemSet->Put( aLang, RES_CHRATR_CJK_LANGUAGE );
111 pItemSet->Put( aLang, RES_CHRATR_CTL_LANGUAGE );
113 if( rOpt.GetFontName().Len() )
115 Font aTextFont( rOpt.GetFontName(), Size( 0, 10 ) );
116 if( pDoc->getPrinter( false ) )
117 aTextFont = pDoc->getPrinter( false )->GetFontMetric( aTextFont );
118 SvxFontItem aFont( aTextFont.GetFamily(), aTextFont.GetName(),
119 aEmptyStr, aTextFont.GetPitch(), aTextFont.GetCharSet(), RES_CHRATR_FONT );
120 pItemSet->Put( aFont );
121 pItemSet->Put( aFont, RES_CHRATR_CJK_FONT );
122 pItemSet->Put( aFont, RES_CHRATR_CTL_FONT );
126 SwASCIIParser::~SwASCIIParser()
128 delete pPam;
129 delete [] pArr;
130 delete pItemSet;
134 // Calling the parser
135 sal_uLong SwASCIIParser::CallParser()
137 rInput.Seek(STREAM_SEEK_TO_END);
138 rInput.ResetError();
140 nFileSize = rInput.Tell();
141 rInput.Seek(STREAM_SEEK_TO_BEGIN);
142 rInput.ResetError();
144 ::StartProgress( STR_STATSTR_W4WREAD, 0, nFileSize, pDoc->GetDocShell() );
146 SwPaM* pInsPam = 0;
147 xub_StrLen nSttCntnt = 0;
148 if (!bNewDoc)
150 const SwNodeIndex& rTmp = pPam->GetPoint()->nNode;
151 pInsPam = new SwPaM( rTmp, rTmp, 0, -1 );
152 nSttCntnt = pPam->GetPoint()->nContent.GetIndex();
155 SwTxtFmtColl *pColl = 0;
157 if (bNewDoc)
159 pColl = pDoc->GetTxtCollFromPool(RES_POOLCOLL_HTML_PRE, false);
160 if (!pColl)
161 pColl = pDoc->GetTxtCollFromPool(RES_POOLCOLL_STANDARD,false);
162 if (pColl)
163 pDoc->SetTxtFmtColl(*pPam, pColl);
166 sal_uLong nError = ReadChars();
168 if( pItemSet )
170 // set only the attribute, for scanned scripts.
171 if( !( SCRIPTTYPE_LATIN & nScript ))
173 pItemSet->ClearItem( RES_CHRATR_FONT );
174 pItemSet->ClearItem( RES_CHRATR_LANGUAGE );
176 if( !( SCRIPTTYPE_ASIAN & nScript ))
178 pItemSet->ClearItem( RES_CHRATR_CJK_FONT );
179 pItemSet->ClearItem( RES_CHRATR_CJK_LANGUAGE );
181 if( !( SCRIPTTYPE_COMPLEX & nScript ))
183 pItemSet->ClearItem( RES_CHRATR_CTL_FONT );
184 pItemSet->ClearItem( RES_CHRATR_CTL_LANGUAGE );
186 if( pItemSet->Count() )
188 if( bNewDoc )
190 if (pColl)
192 // Using the pool defaults for the font causes significant
193 // trouble for the HTML filter, because it is not able
194 // to export the pool defaults (or to be more precise:
195 // the HTML filter is not able to detect whether a pool
196 // default has changed or not. Even a comparison with the
197 // HTMLi template does not work, because the defaults are
198 // not copied when a new doc is created. The result of
199 // comparing pool defaults therefor would be that the
200 // defaults are exported always if the have changed for
201 // text documents in general. That's not sensible, as well
202 // as it is not sensible to export them always.
203 sal_uInt16 aWhichIds[4] =
205 RES_CHRATR_FONT, RES_CHRATR_CJK_FONT,
206 RES_CHRATR_CTL_FONT, 0
208 sal_uInt16 *pWhichIds = aWhichIds;
209 while (*pWhichIds)
211 const SfxPoolItem *pItem;
212 if (SFX_ITEM_SET == pItemSet->GetItemState(*pWhichIds,
213 false, &pItem))
215 pColl->SetFmtAttr( *pItem );
216 pItemSet->ClearItem( *pWhichIds );
218 ++pWhichIds;
221 if (pItemSet->Count())
222 pDoc->SetDefault(*pItemSet);
224 else if( pInsPam )
226 // then set over the insert range the defined attributes
227 *pInsPam->GetMark() = *pPam->GetPoint();
228 pInsPam->GetPoint()->nNode++;
229 pInsPam->GetPoint()->nContent.Assign(
230 pInsPam->GetCntntNode(), nSttCntnt );
232 // !!!!!
233 OSL_ENSURE( !this, "Have to change - hard attr. to para. style" );
234 pDoc->InsertItemSet( *pInsPam, *pItemSet, 0 );
237 delete pItemSet, pItemSet = 0;
240 delete pInsPam;
242 ::EndProgress( pDoc->GetDocShell() );
243 return nError;
246 sal_uLong SwASCIIParser::ReadChars()
248 sal_Unicode *pStt = 0, *pEnd = 0, *pLastStt = 0;
249 long nReadCnt = 0, nLineLen = 0;
250 sal_Unicode cLastCR = 0;
251 bool bSwapUnicode = false;
253 const SwAsciiOptions *pUseMe=&rOpt;
254 SwAsciiOptions aEmpty;
255 if (nFileSize >= 2 &&
256 aEmpty.GetFontName() == rOpt.GetFontName() &&
257 aEmpty.GetCharSet() == rOpt.GetCharSet() &&
258 aEmpty.GetLanguage() == rOpt.GetLanguage() &&
259 aEmpty.GetParaFlags() == rOpt.GetParaFlags())
261 sal_uLong nLen, nOrig;
262 nOrig = nLen = rInput.Read(pArr, ASC_BUFFLEN);
263 CharSet eCharSet;
264 bool bRet = SwIoSystem::IsDetectableText(pArr, nLen, &eCharSet, &bSwapUnicode);
265 OSL_ENSURE(bRet, "Autodetect of text import without nag dialog must "
266 "have failed");
267 if (bRet && eCharSet != RTL_TEXTENCODING_DONTKNOW)
269 aEmpty.SetCharSet(eCharSet);
270 rInput.SeekRel(-(long(nLen)));
272 else
273 rInput.SeekRel(-(long(nOrig)));
274 pUseMe=&aEmpty;
277 rtl_TextToUnicodeConverter hConverter=0;
278 rtl_TextToUnicodeContext hContext=0;
279 CharSet currentCharSet = pUseMe->GetCharSet();
280 if (RTL_TEXTENCODING_UCS2 != currentCharSet)
282 if( currentCharSet == RTL_TEXTENCODING_DONTKNOW )
283 currentCharSet = RTL_TEXTENCODING_ASCII_US;
284 hConverter = rtl_createTextToUnicodeConverter( currentCharSet );
285 OSL_ENSURE( hConverter, "no string convert available" );
286 if (!hConverter)
287 return ERROR_SW_READ_BASE;
288 bSwapUnicode = false;
289 hContext = rtl_createTextToUnicodeContext( hConverter );
291 else if (pUseMe != &aEmpty) //Already successfully figured out type
293 rInput.StartReadingUnicodeText( currentCharSet );
294 bSwapUnicode = rInput.IsEndianSwap();
297 boost::scoped_array<sal_Unicode> aWork;
298 sal_uLong nArrOffset = 0;
300 do {
301 if( pStt >= pEnd )
303 if( pLastStt != pStt )
304 InsertText( OUString( pLastStt ));
306 // Read a new block
307 sal_uLong lGCount;
308 if( SVSTREAM_OK != rInput.GetError() || 0 == (lGCount =
309 rInput.Read( pArr + nArrOffset,
310 ASC_BUFFLEN - nArrOffset )))
311 break; // break from the while loop
314 If there was some unconverted bytes on the last cycle then they
315 were put at the beginning of the array, so total bytes available
316 to convert this cycle includes them. If we found 0 following bytes
317 then we ignore the previous partial character.
319 lGCount+=nArrOffset;
321 if( hConverter )
323 sal_uInt32 nInfo;
324 sal_Size nNewLen = lGCount, nCntBytes;
325 aWork.reset(new sal_Unicode[nNewLen + 1]); // add 1 for '\0'
326 sal_Unicode* pBuf = aWork.get();
328 nNewLen = rtl_convertTextToUnicode( hConverter, hContext,
329 pArr, lGCount, pBuf, nNewLen,
331 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT |
332 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
333 RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT |
334 RTL_TEXTTOUNICODE_FLAGS_GLOBAL_SIGNATURE
336 &nInfo,
337 &nCntBytes );
338 if( 0 != ( nArrOffset = lGCount - nCntBytes ) )
339 memmove( pArr, pArr + nCntBytes, nArrOffset );
341 pStt = pLastStt = aWork.get();
342 pEnd = pStt + nNewLen;
344 else
346 pStt = pLastStt = (sal_Unicode*)pArr;
347 pEnd = (sal_Unicode*)(pArr + lGCount);
349 if( bSwapUnicode )
351 sal_Char* pF = pArr, *pN = pArr + 1;
352 for( sal_uLong n = 0; n < lGCount; n += 2, pF += 2, pN += 2 )
354 sal_Char c = *pF;
355 *pF = *pN;
356 *pN = c;
361 *pEnd = 0;
362 nReadCnt += lGCount;
364 ::SetProgressState( nReadCnt, pDoc->GetDocShell() );
366 if( cLastCR )
368 if( 0x0a == *pStt && 0x0d == cLastCR )
369 pLastStt = ++pStt;
370 cLastCR = 0;
371 nLineLen = 0;
372 // We skip the last one at the end
373 if( !rInput.IsEof() || !(pEnd == pStt ||
374 ( !*pEnd && pEnd == pStt+1 ) ) )
375 pDoc->SplitNode( *pPam->GetPoint(), false );
379 bool bIns = true, bSplitNode = false;
380 switch( *pStt )
383 case 0x0a: if( LINEEND_LF == pUseMe->GetParaFlags() )
385 bIns = false;
386 *pStt = 0;
387 ++pStt;
389 // We skip the last one at the end
390 if( !rInput.IsEof() || pEnd != pStt )
391 bSplitNode = true;
393 break;
395 case 0x0d: if( LINEEND_LF != pUseMe->GetParaFlags() )
397 bIns = false;
398 *pStt = 0;
399 ++pStt;
401 bool bChkSplit = false;
402 if( LINEEND_CRLF == pUseMe->GetParaFlags() )
404 if( pStt == pEnd )
405 cLastCR = 0x0d;
406 else if( 0x0a == *pStt )
408 ++pStt;
409 bChkSplit = true;
412 else
413 bChkSplit = true;
415 // We skip the last one at the end
416 if( bChkSplit && ( !rInput.IsEof() || pEnd != pStt ))
417 bSplitNode = true;
419 break;
421 case 0x0c:
423 // Insert a hard page break
424 *pStt++ = 0;
425 if( nLineLen )
427 InsertText( OUString( pLastStt ));
429 pDoc->SplitNode( *pPam->GetPoint(), false );
430 pDoc->InsertPoolItem( *pPam, SvxFmtBreakItem(
431 SVX_BREAK_PAGE_BEFORE, RES_BREAK ), 0);
432 pLastStt = pStt;
433 nLineLen = 0;
434 bIns = false;
436 break;
438 case 0x1a:
439 if( nReadCnt == nFileSize && pStt+1 == pEnd )
440 *pStt = 0;
441 else
442 *pStt = '#'; // Replacement visualisation
443 break;
445 case '\t': break;
447 default:
448 if( ' ' > *pStt )
449 // Found control char, replace with '#'
450 *pStt = '#';
451 break;
454 if( bIns )
456 if( ( nLineLen >= MAX_ASCII_PARA - 100 ) &&
457 ( ( *pStt == ' ' ) || ( nLineLen >= MAX_ASCII_PARA - 1 ) ) )
459 sal_Unicode c = *pStt;
460 *pStt = 0;
461 InsertText( OUString( pLastStt ));
462 pDoc->SplitNode( *pPam->GetPoint(), false );
463 pLastStt = pStt;
464 nLineLen = 0;
465 *pStt = c;
467 ++pStt;
468 ++nLineLen;
470 else if( bSplitNode )
472 // We found a CR/LF, thus save the text
473 InsertText( OUString( pLastStt ));
474 pDoc->SplitNode( *pPam->GetPoint(), false );
475 pLastStt = pStt;
476 nLineLen = 0;
478 } while(true);
480 if( hConverter )
482 rtl_destroyTextToUnicodeContext( hConverter, hContext );
483 rtl_destroyTextToUnicodeConverter( hConverter );
485 return 0;
488 void SwASCIIParser::InsertText( const String& rStr )
490 pDoc->InsertString( *pPam, rStr );
491 pDoc->UpdateRsid( *pPam, rStr.Len() );
492 pDoc->UpdateParRsid( pPam->GetPoint()->nNode.GetNode().GetTxtNode() );
494 if( pItemSet && g_pBreakIt && nScript != ( SCRIPTTYPE_LATIN |
495 SCRIPTTYPE_ASIAN |
496 SCRIPTTYPE_COMPLEX ) )
497 nScript |= g_pBreakIt->GetAllScriptsOfText( rStr );
500 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */