update dev300-m58
[ooovba.git] / sw / source / filter / ascii / parasc.cxx
blob96d23eabee06fe0a350cae70dbfd2bca23da829f
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: parasc.cxx,v $
10 * $Revision: 1.30 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_sw.hxx"
33 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil -*- */
36 #include <tools/stream.hxx>
37 #include <hintids.hxx>
38 #include <rtl/tencinfo.h>
39 #include <sfx2/printer.hxx>
40 #include <svx/fontitem.hxx>
41 #include <svx/langitem.hxx>
42 #include <svx/brkitem.hxx>
43 #include <svx/scripttypeitem.hxx>
44 #include <shellio.hxx>
45 #include <doc.hxx>
46 #include <swtypes.hxx>
47 #include <ndtxt.hxx>
48 #include <pam.hxx>
49 #include <frmatr.hxx>
50 #include <fltini.hxx>
51 #include <pagedesc.hxx>
52 #include <breakit.hxx>
53 #include <swerror.h>
54 #ifndef _STATSTR_HRC
55 #include <statstr.hrc> // ResId fuer Statusleiste
56 #endif
57 #include <mdiexp.hxx> // ...Percent()
58 #include <poolfmt.hxx>
60 #define ASC_BUFFLEN 4096
62 class SwASCIIParser
64 SwDoc* pDoc;
65 SwPaM* pPam;
66 SvStream& rInput;
67 sal_Char* pArr;
68 const SwAsciiOptions& rOpt;
69 SfxItemSet* pItemSet;
70 long nFileSize;
71 USHORT nScript;
72 bool bNewDoc;
74 ULONG ReadChars();
75 void InsertText( const String& rStr );
77 public:
78 SwASCIIParser( SwDoc* pD, const SwPaM& rCrsr, SvStream& rIn,
79 int bReadNewDoc, const SwAsciiOptions& rOpts );
80 ~SwASCIIParser();
82 ULONG CallParser();
86 // Aufruf fuer die allg. Reader-Schnittstelle
87 ULONG AsciiReader::Read( SwDoc &rDoc, const String&, SwPaM &rPam, const String & )
89 if( !pStrm )
91 ASSERT( !this, "ASCII-Read ohne Stream" );
92 return ERR_SWG_READ_ERROR;
95 //JP 18.01.96: Alle Ueberschriften sind normalerweise ohne
96 // Kapitelnummer. Darum hier explizit abschalten
97 // weil das Default jetzt wieder auf AN ist.
98 if( !bInsertMode )
99 Reader::SetNoOutlineNum( rDoc );
101 SwASCIIParser* pParser = new SwASCIIParser( &rDoc, rPam, *pStrm,
102 !bInsertMode, aOpt.GetASCIIOpts() );
103 ULONG nRet = pParser->CallParser();
105 delete pParser;
106 // after Read reset the options
107 aOpt.ResetASCIIOpts();
108 return nRet;
111 SwASCIIParser::SwASCIIParser(SwDoc* pD, const SwPaM& rCrsr, SvStream& rIn,
112 int bReadNewDoc, const SwAsciiOptions& rOpts)
113 : pDoc(pD), rInput(rIn), rOpt(rOpts), nScript(0), bNewDoc(bReadNewDoc)
115 pPam = new SwPaM( *rCrsr.GetPoint() );
116 pArr = new sal_Char [ ASC_BUFFLEN + 2 ];
118 pItemSet = new SfxItemSet( pDoc->GetAttrPool(),
119 RES_CHRATR_FONT, RES_CHRATR_LANGUAGE,
120 RES_CHRATR_CJK_FONT, RES_CHRATR_CJK_LANGUAGE,
121 RES_CHRATR_CTL_FONT, RES_CHRATR_CTL_LANGUAGE,
122 0 );
124 // set defaults from the options
125 if( rOpt.GetLanguage() )
127 SvxLanguageItem aLang( (LanguageType)rOpt.GetLanguage(),
128 RES_CHRATR_LANGUAGE );
129 pItemSet->Put( aLang );
130 pItemSet->Put( aLang, RES_CHRATR_CJK_LANGUAGE );
131 pItemSet->Put( aLang, RES_CHRATR_CTL_LANGUAGE );
133 if( rOpt.GetFontName().Len() )
135 bool bDelete = false;
136 const SfxFont* pFnt = 0;
137 if( pDoc->getPrinter( false ) )
138 pFnt = pDoc->getPrinter( false )->GetFontByName( rOpt.GetFontName() );
140 if( !pFnt )
142 pFnt = new SfxFont( FAMILY_DONTKNOW, rOpt.GetFontName() );
143 bDelete = true;
145 SvxFontItem aFont( pFnt->GetFamily(), pFnt->GetName(),
146 aEmptyStr, pFnt->GetPitch(), pFnt->GetCharSet(), RES_CHRATR_FONT );
147 pItemSet->Put( aFont );
148 pItemSet->Put( aFont, RES_CHRATR_CJK_FONT );
149 pItemSet->Put( aFont, RES_CHRATR_CTL_FONT );
151 if( bDelete )
152 delete (SfxFont*)pFnt;
156 SwASCIIParser::~SwASCIIParser()
158 delete pPam;
159 delete [] pArr;
160 delete pItemSet;
164 // Aufruf des Parsers
165 ULONG SwASCIIParser::CallParser()
167 rInput.Seek(STREAM_SEEK_TO_END);
168 rInput.ResetError();
170 nFileSize = rInput.Tell();
171 rInput.Seek(STREAM_SEEK_TO_BEGIN);
172 rInput.ResetError();
174 ::StartProgress( STR_STATSTR_W4WREAD, 0, nFileSize, pDoc->GetDocShell() );
176 SwPaM* pInsPam = 0;
177 xub_StrLen nSttCntnt = 0;
178 if (!bNewDoc)
180 const SwNodeIndex& rTmp = pPam->GetPoint()->nNode;
181 pInsPam = new SwPaM( rTmp, rTmp, 0, -1 );
182 nSttCntnt = pPam->GetPoint()->nContent.GetIndex();
185 SwTxtFmtColl *pColl = 0;
187 if (bNewDoc)
189 pColl = pDoc->GetTxtCollFromPool(RES_POOLCOLL_HTML_PRE, false);
190 if (!pColl)
191 pColl = pDoc->GetTxtCollFromPool(RES_POOLCOLL_STANDARD,false);
192 if (pColl)
193 pDoc->SetTxtFmtColl(*pPam, pColl);
196 ULONG nError = ReadChars();
198 if( pItemSet )
200 // set only the attribute, for scanned scripts.
201 if( !( SCRIPTTYPE_LATIN & nScript ))
203 pItemSet->ClearItem( RES_CHRATR_FONT );
204 pItemSet->ClearItem( RES_CHRATR_LANGUAGE );
206 if( !( SCRIPTTYPE_ASIAN & nScript ))
208 pItemSet->ClearItem( RES_CHRATR_CJK_FONT );
209 pItemSet->ClearItem( RES_CHRATR_CJK_LANGUAGE );
211 if( !( SCRIPTTYPE_COMPLEX & nScript ))
213 pItemSet->ClearItem( RES_CHRATR_CTL_FONT );
214 pItemSet->ClearItem( RES_CHRATR_CTL_LANGUAGE );
216 if( pItemSet->Count() )
218 if( bNewDoc )
220 if (pColl)
222 // Using the pool defaults for the font causes significant
223 // trouble for the HTML filter, because it is not able
224 // to export the pool defaults (or to be more precice:
225 // the HTML filter is not able to detect whether a pool
226 // default has changed or not. Even a comparison with the
227 // HTMLi template does not work, because the defaults are
228 // not copied when a new doc is created. The result of
229 // comparing pool defaults therfor would be that the
230 // defaults are exported always if the have changed for
231 // text documents in general. That's not sensible, as well
232 // as it is not sensible to export them always.
233 sal_uInt16 aWhichIds[4] =
235 RES_CHRATR_FONT, RES_CHRATR_CJK_FONT,
236 RES_CHRATR_CTL_FONT, 0
238 sal_uInt16 *pWhichIds = aWhichIds;
239 while (*pWhichIds)
241 const SfxPoolItem *pItem;
242 if (SFX_ITEM_SET == pItemSet->GetItemState(*pWhichIds,
243 false, &pItem))
245 pColl->SetFmtAttr( *pItem );
246 pItemSet->ClearItem( *pWhichIds );
248 ++pWhichIds;
251 if (pItemSet->Count())
252 pDoc->SetDefault(*pItemSet);
254 else if( pInsPam )
256 // then set over the insert range the defined attributes
257 *pInsPam->GetMark() = *pPam->GetPoint();
258 pInsPam->GetPoint()->nNode++;
259 pInsPam->GetPoint()->nContent.Assign(
260 pInsPam->GetCntntNode(), nSttCntnt );
262 // !!!!!
263 ASSERT( !this, "Have to change - hard attr. to para. style" );
264 pDoc->Insert( *pInsPam, *pItemSet, 0 );
267 delete pItemSet, pItemSet = 0;
270 if( pInsPam )
271 delete pInsPam;
273 ::EndProgress( pDoc->GetDocShell() );
274 return nError;
277 ULONG SwASCIIParser::ReadChars()
279 sal_Unicode *pStt = 0, *pEnd = 0, *pLastStt = 0;
280 long nReadCnt = 0, nLineLen = 0;
281 sal_Unicode cLastCR = 0;
282 bool bSwapUnicode = false;
284 const SwAsciiOptions *pUseMe=&rOpt;
285 SwAsciiOptions aEmpty;
286 if (nFileSize >= 2 &&
287 aEmpty.GetFontName() == rOpt.GetFontName() &&
288 aEmpty.GetCharSet() == rOpt.GetCharSet() &&
289 aEmpty.GetLanguage() == rOpt.GetLanguage() &&
290 aEmpty.GetParaFlags() == rOpt.GetParaFlags())
292 ULONG nLen, nOrig;
293 nOrig = nLen = rInput.Read(pArr, ASC_BUFFLEN);
294 CharSet eCharSet;
295 bool bRet = SwIoSystem::IsDetectableText(pArr, nLen, &eCharSet, &bSwapUnicode);
296 ASSERT(bRet, "Autodetect of text import without nag dialog must "
297 "have failed");
298 if (bRet && eCharSet != RTL_TEXTENCODING_DONTKNOW)
300 aEmpty.SetCharSet(eCharSet);
301 rInput.SeekRel(-(long(nLen)));
303 else
304 rInput.SeekRel(-(long(nOrig)));
305 pUseMe=&aEmpty;
308 rtl_TextToUnicodeConverter hConverter=0;
309 rtl_TextToUnicodeContext hContext=0;
310 CharSet currentCharSet = pUseMe->GetCharSet();
311 if (RTL_TEXTENCODING_UCS2 != currentCharSet)
313 if( currentCharSet == RTL_TEXTENCODING_DONTKNOW )
314 currentCharSet = RTL_TEXTENCODING_ASCII_US;
315 hConverter = rtl_createTextToUnicodeConverter( currentCharSet );
316 ASSERT( hConverter, "no string convert avaiable" );
317 if (!hConverter)
318 return ERROR_SW_READ_BASE;
319 bSwapUnicode = false;
320 hContext = rtl_createTextToUnicodeContext( hConverter );
322 else if (pUseMe != &aEmpty) //Already successfully figured out type
324 rInput.StartReadingUnicodeText();
325 bSwapUnicode = rInput.IsEndianSwap();
328 String sWork;
329 ULONG nArrOffset = 0;
331 do {
332 if( pStt >= pEnd )
334 if( pLastStt != pStt )
335 InsertText( String( pLastStt ));
337 // lese einen neuen Block ein
338 ULONG lGCount;
339 if( SVSTREAM_OK != rInput.GetError() || 0 == (lGCount =
340 rInput.Read( pArr + nArrOffset,
341 ASC_BUFFLEN - nArrOffset )))
342 break; // aus der WHILE-Schleife heraus
345 #98380#
346 If there was some unconverted bytes on the last cycle then they
347 were put at the beginning of the array, so total bytes available
348 to convert this cycle includes them. If we found 0 following bytes
349 then we ignore the previous partial character.
351 lGCount+=nArrOffset;
353 if( hConverter )
355 sal_uInt32 nInfo;
356 sal_Size nNewLen = lGCount, nCntBytes;
357 sal_Unicode* pBuf = sWork.AllocBuffer( static_cast< xub_StrLen >(nNewLen) );
359 nNewLen = rtl_convertTextToUnicode( hConverter, hContext,
360 pArr, lGCount, pBuf, nNewLen,
362 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT |
363 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
364 RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT |
365 RTL_TEXTTOUNICODE_FLAGS_GLOBAL_SIGNATURE
367 &nInfo,
368 &nCntBytes );
369 if( 0 != ( nArrOffset = lGCount - nCntBytes ) )
370 memmove( pArr, pArr + nCntBytes, nArrOffset );
371 sWork.ReleaseBufferAccess( static_cast< xub_StrLen >(nNewLen) );
373 pStt = pLastStt = sWork.GetBufferAccess();
374 pEnd = pStt + nNewLen;
376 else
378 pStt = pLastStt = (sal_Unicode*)pArr;
379 pEnd = (sal_Unicode*)(pArr + lGCount);
381 if( bSwapUnicode )
383 sal_Char* pF = pArr, *pN = pArr + 1;
384 for( ULONG n = 0; n < lGCount; n += 2, pF += 2, pN += 2 )
386 sal_Char c = *pF;
387 *pF = *pN;
388 *pN = c;
393 *pEnd = 0;
394 nReadCnt += lGCount;
396 ::SetProgressState( nReadCnt, pDoc->GetDocShell() );
398 if( cLastCR )
400 if( 0x0a == *pStt && 0x0d == cLastCR )
401 pLastStt = ++pStt;
402 cLastCR = 0;
403 nLineLen = 0;
404 // JP 03.04.96: das letze am Ende nehmen wir nicht
405 if( !rInput.IsEof() || !(pEnd == pStt ||
406 ( !*pEnd && pEnd == pStt+1 ) ) )
407 pDoc->SplitNode( *pPam->GetPoint(), false );
411 bool bIns = true, bSplitNode = false;
412 switch( *pStt )
414 //JP 12.11.2001: task 94636 - don't ignore all behind the zero character,
415 // change it to the default "control character"
416 // case 0:
417 // pEnd = pStt;
418 // bIns = false ;
419 // break;
421 case 0x0a: if( LINEEND_LF == pUseMe->GetParaFlags() )
423 bIns = false;
424 *pStt = 0;
425 ++pStt;
427 // JP 03.04.96: das letze am Ende nehmen wir nicht
428 if( !rInput.IsEof() || pEnd != pStt )
429 bSplitNode = true;
431 break;
433 case 0x0d: if( LINEEND_LF != pUseMe->GetParaFlags() )
435 bIns = false;
436 *pStt = 0;
437 ++pStt;
439 bool bChkSplit = false;
440 if( LINEEND_CRLF == pUseMe->GetParaFlags() )
442 if( pStt == pEnd )
443 cLastCR = 0x0d;
444 else if( 0x0a == *pStt )
446 ++pStt;
447 bChkSplit = true;
450 else
451 bChkSplit = true;
453 // JP 03.04.96: das letze am Ende nehmen wir nicht
454 if( bChkSplit && ( !rInput.IsEof() || pEnd != pStt ))
455 bSplitNode = true;
457 break;
459 case 0x0c:
461 // dann mal einen harten Seitenumbruch einfuegen
462 *pStt++ = 0;
463 if( nLineLen )
465 // Change to charset system!!!!
466 //rOpt.GetCharSet();
467 InsertText( String( pLastStt ));
469 pDoc->SplitNode( *pPam->GetPoint(), false );
470 pDoc->Insert( *pPam, SvxFmtBreakItem(
471 SVX_BREAK_PAGE_BEFORE, RES_BREAK ), 0);
472 pLastStt = pStt;
473 nLineLen = 0;
474 bIns = false;
476 break;
478 case 0x1a:
479 if( nReadCnt == nFileSize && pStt+1 == pEnd )
480 *pStt = 0;
481 else
482 *pStt = '#'; // Ersatzdarstellung
483 break;
485 case '\t': break;
487 default:
488 if( ' ' > *pStt )
489 // Ctrl-Zchn gefunden ersetze durch '#'
490 *pStt = '#';
491 break;
494 if( bIns )
496 if( ( nLineLen >= MAX_ASCII_PARA - 100 ) &&
497 ( ( *pStt == ' ' ) || ( nLineLen >= MAX_ASCII_PARA - 1 ) ) )
499 sal_Unicode c = *pStt;
500 *pStt = 0;
501 InsertText( String( pLastStt ));
502 pDoc->SplitNode( *pPam->GetPoint(), false );
503 pLastStt = pStt;
504 nLineLen = 0;
505 *pStt = c;
507 ++pStt;
508 ++nLineLen;
510 else if( bSplitNode )
512 // es wurde ein CR/LF erkannt, also speichere den Text
514 InsertText( String( pLastStt ));
515 pDoc->SplitNode( *pPam->GetPoint(), false );
516 pLastStt = pStt;
517 nLineLen = 0;
519 } while(true);
521 if( hConverter )
523 rtl_destroyTextToUnicodeContext( hConverter, hContext );
524 rtl_destroyTextToUnicodeConverter( hConverter );
526 return 0;
529 void SwASCIIParser::InsertText( const String& rStr )
531 pDoc->Insert( *pPam, rStr, true );
532 if( pItemSet && pBreakIt && nScript != ( SCRIPTTYPE_LATIN |
533 SCRIPTTYPE_ASIAN |
534 SCRIPTTYPE_COMPLEX ) )
535 nScript |= pBreakIt->GetAllScriptsOfText( rStr );
538 /* vi:set tabstop=4 shiftwidth=4 expandtab: */