1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: parasc.cxx,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_sw.hxx"
33 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil -*- */
36 #include <tools/stream.hxx>
37 #include <hintids.hxx>
38 #include <rtl/tencinfo.h>
39 #include <sfx2/printer.hxx>
40 #include <svx/fontitem.hxx>
41 #include <svx/langitem.hxx>
42 #include <svx/brkitem.hxx>
43 #include <svx/scripttypeitem.hxx>
44 #include <shellio.hxx>
46 #include <swtypes.hxx>
51 #include <pagedesc.hxx>
52 #include <breakit.hxx>
55 #include <statstr.hrc> // ResId fuer Statusleiste
57 #include <mdiexp.hxx> // ...Percent()
58 #include <poolfmt.hxx>
60 #define ASC_BUFFLEN 4096
68 const SwAsciiOptions
& rOpt
;
75 void InsertText( const String
& rStr
);
78 SwASCIIParser( SwDoc
* pD
, const SwPaM
& rCrsr
, SvStream
& rIn
,
79 int bReadNewDoc
, const SwAsciiOptions
& rOpts
);
86 // Aufruf fuer die allg. Reader-Schnittstelle
87 ULONG
AsciiReader::Read( SwDoc
&rDoc
, const String
&, SwPaM
&rPam
, const String
& )
91 ASSERT( !this, "ASCII-Read ohne Stream" );
92 return ERR_SWG_READ_ERROR
;
95 //JP 18.01.96: Alle Ueberschriften sind normalerweise ohne
96 // Kapitelnummer. Darum hier explizit abschalten
97 // weil das Default jetzt wieder auf AN ist.
99 Reader::SetNoOutlineNum( rDoc
);
101 SwASCIIParser
* pParser
= new SwASCIIParser( &rDoc
, rPam
, *pStrm
,
102 !bInsertMode
, aOpt
.GetASCIIOpts() );
103 ULONG nRet
= pParser
->CallParser();
106 // after Read reset the options
107 aOpt
.ResetASCIIOpts();
111 SwASCIIParser::SwASCIIParser(SwDoc
* pD
, const SwPaM
& rCrsr
, SvStream
& rIn
,
112 int bReadNewDoc
, const SwAsciiOptions
& rOpts
)
113 : pDoc(pD
), rInput(rIn
), rOpt(rOpts
), nScript(0), bNewDoc(bReadNewDoc
)
115 pPam
= new SwPaM( *rCrsr
.GetPoint() );
116 pArr
= new sal_Char
[ ASC_BUFFLEN
+ 2 ];
118 pItemSet
= new SfxItemSet( pDoc
->GetAttrPool(),
119 RES_CHRATR_FONT
, RES_CHRATR_LANGUAGE
,
120 RES_CHRATR_CJK_FONT
, RES_CHRATR_CJK_LANGUAGE
,
121 RES_CHRATR_CTL_FONT
, RES_CHRATR_CTL_LANGUAGE
,
124 // set defaults from the options
125 if( rOpt
.GetLanguage() )
127 SvxLanguageItem
aLang( (LanguageType
)rOpt
.GetLanguage(),
128 RES_CHRATR_LANGUAGE
);
129 pItemSet
->Put( aLang
);
130 pItemSet
->Put( aLang
, RES_CHRATR_CJK_LANGUAGE
);
131 pItemSet
->Put( aLang
, RES_CHRATR_CTL_LANGUAGE
);
133 if( rOpt
.GetFontName().Len() )
135 bool bDelete
= false;
136 const SfxFont
* pFnt
= 0;
137 if( pDoc
->getPrinter( false ) )
138 pFnt
= pDoc
->getPrinter( false )->GetFontByName( rOpt
.GetFontName() );
142 pFnt
= new SfxFont( FAMILY_DONTKNOW
, rOpt
.GetFontName() );
145 SvxFontItem
aFont( pFnt
->GetFamily(), pFnt
->GetName(),
146 aEmptyStr
, pFnt
->GetPitch(), pFnt
->GetCharSet(), RES_CHRATR_FONT
);
147 pItemSet
->Put( aFont
);
148 pItemSet
->Put( aFont
, RES_CHRATR_CJK_FONT
);
149 pItemSet
->Put( aFont
, RES_CHRATR_CTL_FONT
);
152 delete (SfxFont
*)pFnt
;
156 SwASCIIParser::~SwASCIIParser()
164 // Aufruf des Parsers
165 ULONG
SwASCIIParser::CallParser()
167 rInput
.Seek(STREAM_SEEK_TO_END
);
170 nFileSize
= rInput
.Tell();
171 rInput
.Seek(STREAM_SEEK_TO_BEGIN
);
174 ::StartProgress( STR_STATSTR_W4WREAD
, 0, nFileSize
, pDoc
->GetDocShell() );
177 xub_StrLen nSttCntnt
= 0;
180 const SwNodeIndex
& rTmp
= pPam
->GetPoint()->nNode
;
181 pInsPam
= new SwPaM( rTmp
, rTmp
, 0, -1 );
182 nSttCntnt
= pPam
->GetPoint()->nContent
.GetIndex();
185 SwTxtFmtColl
*pColl
= 0;
189 pColl
= pDoc
->GetTxtCollFromPool(RES_POOLCOLL_HTML_PRE
, false);
191 pColl
= pDoc
->GetTxtCollFromPool(RES_POOLCOLL_STANDARD
,false);
193 pDoc
->SetTxtFmtColl(*pPam
, pColl
);
196 ULONG nError
= ReadChars();
200 // set only the attribute, for scanned scripts.
201 if( !( SCRIPTTYPE_LATIN
& nScript
))
203 pItemSet
->ClearItem( RES_CHRATR_FONT
);
204 pItemSet
->ClearItem( RES_CHRATR_LANGUAGE
);
206 if( !( SCRIPTTYPE_ASIAN
& nScript
))
208 pItemSet
->ClearItem( RES_CHRATR_CJK_FONT
);
209 pItemSet
->ClearItem( RES_CHRATR_CJK_LANGUAGE
);
211 if( !( SCRIPTTYPE_COMPLEX
& nScript
))
213 pItemSet
->ClearItem( RES_CHRATR_CTL_FONT
);
214 pItemSet
->ClearItem( RES_CHRATR_CTL_LANGUAGE
);
216 if( pItemSet
->Count() )
222 // Using the pool defaults for the font causes significant
223 // trouble for the HTML filter, because it is not able
224 // to export the pool defaults (or to be more precice:
225 // the HTML filter is not able to detect whether a pool
226 // default has changed or not. Even a comparison with the
227 // HTMLi template does not work, because the defaults are
228 // not copied when a new doc is created. The result of
229 // comparing pool defaults therfor would be that the
230 // defaults are exported always if the have changed for
231 // text documents in general. That's not sensible, as well
232 // as it is not sensible to export them always.
233 sal_uInt16 aWhichIds
[4] =
235 RES_CHRATR_FONT
, RES_CHRATR_CJK_FONT
,
236 RES_CHRATR_CTL_FONT
, 0
238 sal_uInt16
*pWhichIds
= aWhichIds
;
241 const SfxPoolItem
*pItem
;
242 if (SFX_ITEM_SET
== pItemSet
->GetItemState(*pWhichIds
,
245 pColl
->SetFmtAttr( *pItem
);
246 pItemSet
->ClearItem( *pWhichIds
);
251 if (pItemSet
->Count())
252 pDoc
->SetDefault(*pItemSet
);
256 // then set over the insert range the defined attributes
257 *pInsPam
->GetMark() = *pPam
->GetPoint();
258 pInsPam
->GetPoint()->nNode
++;
259 pInsPam
->GetPoint()->nContent
.Assign(
260 pInsPam
->GetCntntNode(), nSttCntnt
);
263 ASSERT( !this, "Have to change - hard attr. to para. style" );
264 pDoc
->InsertItemSet( *pInsPam
, *pItemSet
, 0 );
267 delete pItemSet
, pItemSet
= 0;
273 ::EndProgress( pDoc
->GetDocShell() );
277 ULONG
SwASCIIParser::ReadChars()
279 sal_Unicode
*pStt
= 0, *pEnd
= 0, *pLastStt
= 0;
280 long nReadCnt
= 0, nLineLen
= 0;
281 sal_Unicode cLastCR
= 0;
282 bool bSwapUnicode
= false;
284 const SwAsciiOptions
*pUseMe
=&rOpt
;
285 SwAsciiOptions aEmpty
;
286 if (nFileSize
>= 2 &&
287 aEmpty
.GetFontName() == rOpt
.GetFontName() &&
288 aEmpty
.GetCharSet() == rOpt
.GetCharSet() &&
289 aEmpty
.GetLanguage() == rOpt
.GetLanguage() &&
290 aEmpty
.GetParaFlags() == rOpt
.GetParaFlags())
293 nOrig
= nLen
= rInput
.Read(pArr
, ASC_BUFFLEN
);
295 bool bRet
= SwIoSystem::IsDetectableText(pArr
, nLen
, &eCharSet
, &bSwapUnicode
);
296 ASSERT(bRet
, "Autodetect of text import without nag dialog must "
298 if (bRet
&& eCharSet
!= RTL_TEXTENCODING_DONTKNOW
)
300 aEmpty
.SetCharSet(eCharSet
);
301 rInput
.SeekRel(-(long(nLen
)));
304 rInput
.SeekRel(-(long(nOrig
)));
308 rtl_TextToUnicodeConverter hConverter
=0;
309 rtl_TextToUnicodeContext hContext
=0;
310 CharSet currentCharSet
= pUseMe
->GetCharSet();
311 if (RTL_TEXTENCODING_UCS2
!= currentCharSet
)
313 if( currentCharSet
== RTL_TEXTENCODING_DONTKNOW
)
314 currentCharSet
= RTL_TEXTENCODING_ASCII_US
;
315 hConverter
= rtl_createTextToUnicodeConverter( currentCharSet
);
316 ASSERT( hConverter
, "no string convert avaiable" );
318 return ERROR_SW_READ_BASE
;
319 bSwapUnicode
= false;
320 hContext
= rtl_createTextToUnicodeContext( hConverter
);
322 else if (pUseMe
!= &aEmpty
) //Already successfully figured out type
324 rInput
.StartReadingUnicodeText();
325 bSwapUnicode
= rInput
.IsEndianSwap();
329 ULONG nArrOffset
= 0;
334 if( pLastStt
!= pStt
)
335 InsertText( String( pLastStt
));
337 // lese einen neuen Block ein
339 if( SVSTREAM_OK
!= rInput
.GetError() || 0 == (lGCount
=
340 rInput
.Read( pArr
+ nArrOffset
,
341 ASC_BUFFLEN
- nArrOffset
)))
342 break; // aus der WHILE-Schleife heraus
346 If there was some unconverted bytes on the last cycle then they
347 were put at the beginning of the array, so total bytes available
348 to convert this cycle includes them. If we found 0 following bytes
349 then we ignore the previous partial character.
356 sal_Size nNewLen
= lGCount
, nCntBytes
;
357 sal_Unicode
* pBuf
= sWork
.AllocBuffer( static_cast< xub_StrLen
>(nNewLen
) );
359 nNewLen
= rtl_convertTextToUnicode( hConverter
, hContext
,
360 pArr
, lGCount
, pBuf
, nNewLen
,
362 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT
|
363 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT
|
364 RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT
|
365 RTL_TEXTTOUNICODE_FLAGS_GLOBAL_SIGNATURE
369 if( 0 != ( nArrOffset
= lGCount
- nCntBytes
) )
370 memmove( pArr
, pArr
+ nCntBytes
, nArrOffset
);
371 sWork
.ReleaseBufferAccess( static_cast< xub_StrLen
>(nNewLen
) );
373 pStt
= pLastStt
= sWork
.GetBufferAccess();
374 pEnd
= pStt
+ nNewLen
;
378 pStt
= pLastStt
= (sal_Unicode
*)pArr
;
379 pEnd
= (sal_Unicode
*)(pArr
+ lGCount
);
383 sal_Char
* pF
= pArr
, *pN
= pArr
+ 1;
384 for( ULONG n
= 0; n
< lGCount
; n
+= 2, pF
+= 2, pN
+= 2 )
396 ::SetProgressState( nReadCnt
, pDoc
->GetDocShell() );
400 if( 0x0a == *pStt
&& 0x0d == cLastCR
)
404 // JP 03.04.96: das letze am Ende nehmen wir nicht
405 if( !rInput
.IsEof() || !(pEnd
== pStt
||
406 ( !*pEnd
&& pEnd
== pStt
+1 ) ) )
407 pDoc
->SplitNode( *pPam
->GetPoint(), false );
411 bool bIns
= true, bSplitNode
= false;
414 //JP 12.11.2001: task 94636 - don't ignore all behind the zero character,
415 // change it to the default "control character"
421 case 0x0a: if( LINEEND_LF
== pUseMe
->GetParaFlags() )
427 // JP 03.04.96: das letze am Ende nehmen wir nicht
428 if( !rInput
.IsEof() || pEnd
!= pStt
)
433 case 0x0d: if( LINEEND_LF
!= pUseMe
->GetParaFlags() )
439 bool bChkSplit
= false;
440 if( LINEEND_CRLF
== pUseMe
->GetParaFlags() )
444 else if( 0x0a == *pStt
)
453 // JP 03.04.96: das letze am Ende nehmen wir nicht
454 if( bChkSplit
&& ( !rInput
.IsEof() || pEnd
!= pStt
))
461 // dann mal einen harten Seitenumbruch einfuegen
465 // Change to charset system!!!!
467 InsertText( String( pLastStt
));
469 pDoc
->SplitNode( *pPam
->GetPoint(), false );
470 pDoc
->InsertPoolItem( *pPam
, SvxFmtBreakItem(
471 SVX_BREAK_PAGE_BEFORE
, RES_BREAK
), 0);
479 if( nReadCnt
== nFileSize
&& pStt
+1 == pEnd
)
482 *pStt
= '#'; // Ersatzdarstellung
489 // Ctrl-Zchn gefunden ersetze durch '#'
496 if( ( nLineLen
>= MAX_ASCII_PARA
- 100 ) &&
497 ( ( *pStt
== ' ' ) || ( nLineLen
>= MAX_ASCII_PARA
- 1 ) ) )
499 sal_Unicode c
= *pStt
;
501 InsertText( String( pLastStt
));
502 pDoc
->SplitNode( *pPam
->GetPoint(), false );
510 else if( bSplitNode
)
512 // es wurde ein CR/LF erkannt, also speichere den Text
514 InsertText( String( pLastStt
));
515 pDoc
->SplitNode( *pPam
->GetPoint(), false );
523 rtl_destroyTextToUnicodeContext( hConverter
, hContext
);
524 rtl_destroyTextToUnicodeConverter( hConverter
);
529 void SwASCIIParser::InsertText( const String
& rStr
)
531 pDoc
->InsertString( *pPam
, rStr
);
532 if( pItemSet
&& pBreakIt
&& nScript
!= ( SCRIPTTYPE_LATIN
|
534 SCRIPTTYPE_COMPLEX
) )
535 nScript
|= pBreakIt
->GetAllScriptsOfText( rStr
);
538 /* vi:set tabstop=4 shiftwidth=4 expandtab: */