update dev300-m58
[ooovba.git] / sw / source / core / text / guess.cxx
blob049ea79078b7db9f72a1ca6a5db5f72fe61fbe00
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: guess.cxx,v $
10 * $Revision: 1.50 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_sw.hxx"
35 #include <ctype.h>
36 #include <svx/unolingu.hxx>
37 #include <tools/shl.hxx> // needed for SW_MOD() macro
38 #include <errhdl.hxx> // ASSERTs
39 #include <dlelstnr.hxx>
40 #include <swmodule.hxx>
41 #include <IDocumentSettingAccess.hxx>
42 #include <txtcfg.hxx>
43 #include <guess.hxx>
44 #include <inftxt.hxx>
45 #include <pagefrm.hxx>
46 #include <pagedesc.hxx> // SwPageDesc
47 #include <tgrditem.hxx>
48 #include <com/sun/star/i18n/BreakType.hpp>
49 #include <com/sun/star/i18n/WordType.hpp>
50 #include <unotools/charclass.hxx>
51 #include <porfld.hxx>
53 using ::rtl::OUString;
54 using namespace ::com::sun::star;
55 using namespace ::com::sun::star::uno;
56 using namespace ::com::sun::star::i18n;
57 using namespace ::com::sun::star::beans;
58 using namespace ::com::sun::star::linguistic2;
60 #define CH_FULL_BLANK 0x3000
62 /*************************************************************************
63 * SwTxtGuess::Guess
65 * provides information for line break calculation
66 * returns true if no line break has to be performed
67 * otherwise possible break or hyphenation position is determined
68 *************************************************************************/
70 sal_Bool SwTxtGuess::Guess( const SwTxtPortion& rPor, SwTxtFormatInfo &rInf,
71 const KSHORT nPorHeight )
73 nCutPos = rInf.GetIdx();
75 // Leere Strings sind immer 0
76 if( !rInf.GetLen() || !rInf.GetTxt().Len() )
77 return sal_False;
79 ASSERT( rInf.GetIdx() < rInf.GetTxt().Len(),
80 "+SwTxtGuess::Guess: invalid SwTxtFormatInfo" );
82 ASSERT( nPorHeight, "+SwTxtGuess::Guess: no height" );
84 USHORT nMinSize;
85 USHORT nMaxSizeDiff;
87 const SwScriptInfo& rSI =
88 ((SwParaPortion*)rInf.GetParaPortion())->GetScriptInfo();
90 USHORT nMaxComp = ( SW_CJK == rInf.GetFont()->GetActual() ) &&
91 rSI.CountCompChg() &&
92 ! rInf.IsMulti() &&
93 ! rPor.InFldGrp() &&
94 ! rPor.IsDropPortion() ?
95 10000 :
96 0 ;
98 SwTwips nLineWidth = rInf.Width() - rInf.X();
99 xub_StrLen nMaxLen = rInf.GetTxt().Len() - rInf.GetIdx();
101 if ( rInf.GetLen() < nMaxLen )
102 nMaxLen = rInf.GetLen();
104 if( !nMaxLen )
105 return sal_False;
107 KSHORT nItalic = 0;
108 if( ITALIC_NONE != rInf.GetFont()->GetItalic() && !rInf.NotEOL() )
110 sal_Bool bAddItalic = sal_True;
112 // do not add extra italic value if we have an active character grid
113 if ( rInf.SnapToGrid() )
115 GETGRID( rInf.GetTxtFrm()->FindPageFrm() )
116 bAddItalic = !pGrid || GRID_LINES_CHARS != pGrid->GetGridType();
119 // do not add extra italic value for an isolated blank:
120 if ( 1 == rInf.GetLen() &&
121 CH_BLANK == rInf.GetTxt().GetChar( rInf.GetIdx() ) )
122 bAddItalic = sal_False;
124 nItalic = bAddItalic ? nPorHeight / 12 : 0;
126 nLineWidth -= nItalic;
128 // --> FME 2005-05-13 #i46524# LineBreak bug with italics
129 if ( nLineWidth < 0 ) nLineWidth = 0;
130 // <--
133 // first check if everything fits to line
134 if ( long ( nLineWidth ) * 2 > long ( nMaxLen ) * nPorHeight )
136 // call GetTxtSize with maximum compression (for kanas)
137 rInf.GetTxtSize( &rSI, rInf.GetIdx(), nMaxLen,
138 nMaxComp, nMinSize, nMaxSizeDiff );
140 nBreakWidth = nMinSize;
142 if ( nBreakWidth <= nLineWidth )
144 // portion fits to line
145 nCutPos = rInf.GetIdx() + nMaxLen;
146 if( nItalic &&
147 ( nCutPos >= rInf.GetTxt().Len() ||
148 // --> FME 2005-05-13 #i48035# Needed for CalcFitToContent
149 // if first line ends with a manual line break
150 rInf.GetTxt().GetChar( nCutPos ) == CH_BREAK ) )
151 // <--
152 nBreakWidth = nBreakWidth + nItalic;
154 // save maximum width for later use
155 if ( nMaxSizeDiff )
156 rInf.SetMaxWidthDiff( (ULONG)&rPor, nMaxSizeDiff );
158 return sal_True;
162 sal_Bool bHyph = rInf.IsHyphenate() && !rInf.IsHyphForbud();
163 xub_StrLen nHyphPos = 0;
165 // nCutPos is the first character not fitting to the current line
166 // nHyphPos is the first character not fitting to the current line,
167 // considering an additional "-" for hyphenation
168 if( bHyph )
170 nCutPos = rInf.GetTxtBreak( nLineWidth, nMaxLen, nMaxComp, nHyphPos );
172 if ( !nHyphPos && rInf.GetIdx() )
173 nHyphPos = rInf.GetIdx() - 1;
175 else
177 nCutPos = rInf.GetTxtBreak( nLineWidth, nMaxLen, nMaxComp );
179 #ifndef PRODUCT
180 if ( STRING_LEN != nCutPos )
182 rInf.GetTxtSize( &rSI, rInf.GetIdx(), nCutPos - rInf.GetIdx(),
183 nMaxComp, nMinSize, nMaxSizeDiff );
184 ASSERT( nMinSize <= nLineWidth, "What a Guess!!!" );
186 #endif
189 if( nCutPos > rInf.GetIdx() + nMaxLen )
191 // second check if everything fits to line
192 nCutPos = nBreakPos = rInf.GetIdx() + nMaxLen - 1;
193 rInf.GetTxtSize( &rSI, rInf.GetIdx(), nMaxLen, nMaxComp,
194 nMinSize, nMaxSizeDiff );
196 nBreakWidth = nMinSize;
198 // Der folgende Vergleich sollte eigenlich immer sal_True ergeben, sonst
199 // hat es wohl bei GetTxtBreak einen Pixel-Rundungsfehler gegeben...
200 if ( nBreakWidth <= nLineWidth )
202 if( nItalic && ( nBreakPos + 1 ) >= rInf.GetTxt().Len() )
203 nBreakWidth = nBreakWidth + nItalic;
205 // save maximum width for later use
206 if ( nMaxSizeDiff )
207 rInf.SetMaxWidthDiff( (ULONG)&rPor, nMaxSizeDiff );
209 return sal_True;
213 // we have to trigger an underflow for a footnote portion
214 // which does not fit to the current line
215 if ( rPor.IsFtnPortion() )
217 nBreakPos = rInf.GetIdx();
218 nCutPos = rInf.GetLen();
219 return sal_False;
222 xub_StrLen nPorLen = 0;
223 // do not call the break iterator nCutPos is a blank
224 xub_Unicode cCutChar = rInf.GetTxt().GetChar( nCutPos );
225 if( CH_BLANK == cCutChar || CH_FULL_BLANK == cCutChar )
227 nBreakPos = nCutPos;
228 xub_StrLen nX = nBreakPos;
230 // we step back until a non blank character has been found
231 // or there is only one more character left
232 while( nX && nBreakPos > rInf.GetLineStart() + 1 &&
233 ( CH_BLANK == ( cCutChar = rInf.GetChar( --nX ) ) ||
234 CH_FULL_BLANK == cCutChar ) )
235 --nBreakPos;
237 if( nBreakPos > rInf.GetIdx() )
238 nPorLen = nBreakPos - rInf.GetIdx();
239 while( ++nCutPos < rInf.GetTxt().Len() &&
240 ( CH_BLANK == ( cCutChar = rInf.GetChar( nCutPos ) ) ||
241 CH_FULL_BLANK == cCutChar ) )
242 ; // nothing
244 nBreakStart = nCutPos;
246 else if( pBreakIt->GetBreakIter().is() )
248 // New: We should have a look into the last portion, if it was a
249 // field portion. For this, we expand the text of the field portion
250 // into our string. If the line break position is inside of before
251 // the field portion, we trigger an underflow.
253 xub_StrLen nOldIdx = rInf.GetIdx();
254 xub_Unicode cFldChr = 0;
256 #if OSL_DEBUG_LEVEL > 1
257 XubString aDebugString;
258 #endif
260 // be careful: a field portion can be both: 0x01 (common field)
261 // or 0x02 (the follow of a footnode)
262 if ( rInf.GetLast() && rInf.GetLast()->InFldGrp() &&
263 ! rInf.GetLast()->IsFtnPortion() &&
264 rInf.GetIdx() > rInf.GetLineStart() &&
265 CH_TXTATR_BREAKWORD ==
266 ( cFldChr = rInf.GetTxt().GetChar( rInf.GetIdx() - 1 ) ) )
268 SwFldPortion* pFld = (SwFldPortion*)rInf.GetLast();
269 XubString aTxt;
270 pFld->GetExpTxt( rInf, aTxt );
272 if ( aTxt.Len() )
274 nFieldDiff = aTxt.Len() - 1;
275 nCutPos = nCutPos + nFieldDiff;
276 nHyphPos = nHyphPos + nFieldDiff;
278 #if OSL_DEBUG_LEVEL > 1
279 aDebugString = rInf.GetTxt();
280 #endif
282 XubString& rOldTxt = (XubString&)rInf.GetTxt();
283 rOldTxt.Erase( rInf.GetIdx() - 1, 1 );
284 rOldTxt.Insert( aTxt, rInf.GetIdx() - 1 );
285 rInf.SetIdx( rInf.GetIdx() + nFieldDiff );
287 else
288 cFldChr = 0;
291 LineBreakHyphenationOptions aHyphOpt;
292 Reference< XHyphenator > xHyph;
293 if( bHyph )
295 xHyph = ::GetHyphenator();
296 aHyphOpt = LineBreakHyphenationOptions( xHyph,
297 rInf.GetHyphValues(), nHyphPos );
300 // Get Language for break iterator.
301 // We have to switch the current language if we have a script
302 // change at nCutPos. Otherwise LATIN punctuation would never
303 // be allowed to be hanging punctuation.
304 // NEVER call GetLang if the string has been modified!!!
305 LanguageType aLang = rInf.GetFont()->GetLanguage();
307 // If we are inside a field portion, we use a temporar string which
308 // differs from the string at the textnode. Therefore we are not allowed
309 // to call the GetLang function.
310 if ( nCutPos && ! rPor.InFldGrp() )
312 const CharClass& rCC = GetAppCharClass();
314 // step back until a non-punctuation character is reached
315 xub_StrLen nLangIndex = nCutPos;
317 // If a field has been expanded right in front of us we do not
318 // step further than the beginning of the expanded field
319 // (which is the position of the field placeholder in our
320 // original string).
321 const xub_StrLen nDoNotStepOver = CH_TXTATR_BREAKWORD == cFldChr ?
322 rInf.GetIdx() - nFieldDiff - 1:
325 while ( nLangIndex > nDoNotStepOver &&
326 ! rCC.isLetterNumeric( rInf.GetTxt(), nLangIndex ) )
327 --nLangIndex;
329 // last "real" character is not inside our current portion
330 // we have to check the script type of the last "real" character
331 if ( nLangIndex < rInf.GetIdx() )
333 USHORT nScript = pBreakIt->GetRealScriptOfText( rInf.GetTxt(),
334 nLangIndex );
335 ASSERT( nScript, "Script is not between 1 and 4" );
337 // compare current script with script from last "real" character
338 if ( nScript - 1 != rInf.GetFont()->GetActual() )
339 aLang = rInf.GetTxtFrm()->GetTxtNode()->GetLang(
340 CH_TXTATR_BREAKWORD == cFldChr ?
341 nDoNotStepOver :
342 nLangIndex, 0, nScript );
346 const ForbiddenCharacters aForbidden(
347 *rInf.GetTxtFrm()->GetNode()->getIDocumentSettingAccess()->getForbiddenCharacters( aLang, true ) );
349 const sal_Bool bAllowHanging = rInf.IsHanging() && ! rInf.IsMulti() &&
350 ! rPor.InFldGrp();
352 LineBreakUserOptions aUserOpt(
353 aForbidden.beginLine, aForbidden.endLine,
354 rInf.HasForbiddenChars(), bAllowHanging, sal_False );
356 //! register listener to LinguServiceEvents now in order to get
357 //! notified about relevant changes in the future
358 SwModule *pModule = SW_MOD();
359 if (!pModule->GetLngSvcEvtListener().is())
360 pModule->CreateLngSvcEvtListener();
362 // !!! We must have a local copy of the locale, because inside
363 // getLineBreak the LinguEventListener can trigger a new formatting,
364 // which can corrupt the locale pointer inside pBreakIt.
365 const lang::Locale aLocale = pBreakIt->GetLocale( aLang );
367 // determines first possible line break from nRightPos to
368 // start index of current line
369 LineBreakResults aResult = pBreakIt->GetBreakIter()->getLineBreak(
370 rInf.GetTxt(), nCutPos, aLocale,
371 rInf.GetLineStart(), aHyphOpt, aUserOpt );
373 nBreakPos = (xub_StrLen)aResult.breakIndex;
375 // if we are formatting multi portions we want to allow line breaks
376 // at the border between single line and multi line portion
377 // we have to be carefull with footnote portions, they always come in
378 // with an index 0
379 if ( nBreakPos < rInf.GetLineStart() && rInf.IsFirstMulti() &&
380 ! rInf.IsFtnInside() )
381 nBreakPos = rInf.GetLineStart();
383 nBreakStart = nBreakPos;
385 bHyph = BreakType::HYPHENATION == aResult.breakType;
387 if ( bHyph && nBreakPos != STRING_LEN)
389 // found hyphenation position within line
390 // nBreakPos is set to the hyphenation position
391 xHyphWord = aResult.rHyphenatedWord;
392 nBreakPos += xHyphWord->getHyphenationPos() + 1;
394 #if OSL_DEBUG_LEVEL > 1
395 // e.g., Schif-fahrt, referes to our string
396 const String aWord = xHyphWord->getWord();
397 // e.g., Schiff-fahrt, referes to the word after hyphenation
398 const String aHyphenatedWord = xHyphWord->getHyphenatedWord();
399 // e.g., Schif-fahrt: 5, referes to our string
400 const USHORT nHyphenationPos = xHyphWord->getHyphenationPos();
401 (void)nHyphenationPos;
402 // e.g., Schiff-fahrt: 6, referes to the word after hyphenation
403 const USHORT nHyphenPos = xHyphWord->getHyphenPos();
404 (void)nHyphenPos;
405 #endif
407 // if not in interactive mode, we have to break behind a soft hyphen
408 if ( ! rInf.IsInterHyph() && rInf.GetIdx() )
410 const long nSoftHyphPos =
411 xHyphWord->getWord().indexOf( CHAR_SOFTHYPHEN );
413 if ( nSoftHyphPos >= 0 &&
414 nBreakStart + nSoftHyphPos <= nBreakPos &&
415 nBreakPos > rInf.GetLineStart() )
416 nBreakPos = rInf.GetIdx() - 1;
419 if( nBreakPos >= rInf.GetIdx() )
421 nPorLen = nBreakPos - rInf.GetIdx();
422 if( '-' == rInf.GetTxt().GetChar( nBreakPos - 1 ) )
423 xHyphWord = NULL;
426 else if ( !bHyph && nBreakPos >= rInf.GetLineStart() )
428 ASSERT( nBreakPos != STRING_LEN, "we should have found a break pos" );
430 // found break position within line
431 xHyphWord = NULL;
433 // check, if break position is soft hyphen and an underflow
434 // has to be triggered
435 if( nBreakPos > rInf.GetLineStart() && rInf.GetIdx() &&
436 CHAR_SOFTHYPHEN == rInf.GetTxt().GetChar( nBreakPos - 1 ) )
437 nBreakPos = rInf.GetIdx() - 1;
439 // Delete any blanks at the end of a line, but be careful:
440 // If a field has been expanded, we do not want to delete any
441 // blanks inside the field portion. This would cause an unwanted
442 // underflow
443 xub_StrLen nX = nBreakPos;
444 while( nX > rInf.GetLineStart() &&
445 ( CH_TXTATR_BREAKWORD != cFldChr || nX > rInf.GetIdx() ) &&
446 ( CH_BLANK == rInf.GetChar( --nX ) ||
447 CH_FULL_BLANK == rInf.GetChar( nX ) ) )
448 nBreakPos = nX;
449 if( nBreakPos > rInf.GetIdx() )
450 nPorLen = nBreakPos - rInf.GetIdx();
452 else
454 // no line break found, setting nBreakPos to STRING_LEN
455 // causes a break cut
456 nBreakPos = STRING_LEN;
457 ASSERT( nCutPos >= rInf.GetIdx(), "Deep cut" );
458 nPorLen = nCutPos - rInf.GetIdx();
461 if( nBreakPos > nCutPos && nBreakPos != STRING_LEN )
463 const xub_StrLen nHangingLen = nBreakPos - nCutPos;
464 SwPosSize aTmpSize = rInf.GetTxtSize( &rSI, nCutPos,
465 nHangingLen, 0 );
466 ASSERT( !pHanging, "A hanging portion is hanging around" );
467 pHanging = new SwHangingPortion( aTmpSize );
468 pHanging->SetLen( nHangingLen );
469 nPorLen = nCutPos - rInf.GetIdx();
472 // If we expanded a field, we must repair the original string.
473 // In case we do not trigger an underflow, we correct the nBreakPos
474 // value, but we cannot correct the nBreakStart value:
475 // If we have found a hyphenation position, nBreakStart can lie before
476 // the field.
477 if ( CH_TXTATR_BREAKWORD == cFldChr )
479 if ( nBreakPos < rInf.GetIdx() )
480 nBreakPos = nOldIdx - 1;
481 else if ( STRING_LEN != nBreakPos )
483 ASSERT( nBreakPos >= nFieldDiff, "I've got field trouble!" );
484 nBreakPos = nBreakPos - nFieldDiff;
487 ASSERT( nCutPos >= rInf.GetIdx() && nCutPos >= nFieldDiff,
488 "I've got field trouble, part2!" );
489 nCutPos = nCutPos - nFieldDiff;
491 XubString& rOldTxt = (XubString&)rInf.GetTxt();
492 rOldTxt.Erase( nOldIdx - 1, nFieldDiff + 1 );
493 rOldTxt.Insert( cFldChr, nOldIdx - 1 );
494 rInf.SetIdx( nOldIdx );
496 #if OSL_DEBUG_LEVEL > 1
497 ASSERT( aDebugString == rInf.GetTxt(),
498 "Somebody, somebody, somebody put something in my string" );
499 #endif
503 if( nPorLen )
505 rInf.GetTxtSize( &rSI, rInf.GetIdx(), nPorLen,
506 nMaxComp, nMinSize, nMaxSizeDiff );
508 // save maximum width for later use
509 if ( nMaxSizeDiff )
510 rInf.SetMaxWidthDiff( (ULONG)&rPor, nMaxSizeDiff );
512 nBreakWidth = nItalic + nMinSize;
514 else
515 nBreakWidth = 0;
517 if( pHanging )
518 nBreakPos = nCutPos;
520 return sal_False;
523 /*************************************************************************
524 * SwTxtGuess::AlternativeSpelling
525 *************************************************************************/
527 // returns true if word at position nPos has a diffenrent spelling
528 // if hyphenated at this position (old german spelling)
530 sal_Bool SwTxtGuess::AlternativeSpelling( const SwTxtFormatInfo &rInf,
531 const xub_StrLen nPos )
533 // get word boundaries
534 xub_StrLen nWordLen;
536 Boundary aBound =
537 pBreakIt->GetBreakIter()->getWordBoundary( rInf.GetTxt(), nPos,
538 pBreakIt->GetLocale( rInf.GetFont()->GetLanguage() ),
539 WordType::DICTIONARY_WORD, sal_True );
540 nBreakStart = (xub_StrLen)aBound.startPos;
541 nWordLen = static_cast<xub_StrLen>(aBound.endPos - nBreakStart);
543 // if everything else fails, we want to cut at nPos
544 nCutPos = nPos;
546 XubString aTxt( rInf.GetTxt().Copy( nBreakStart, nWordLen ) );
548 // check, if word has alternative spelling
549 Reference< XHyphenator > xHyph( ::GetHyphenator() );
550 ASSERT( xHyph.is(), "Hyphenator is missing");
551 //! subtract 1 since the UNO-interface is 0 based
552 xHyphWord = xHyph->queryAlternativeSpelling( OUString(aTxt),
553 pBreakIt->GetLocale( rInf.GetFont()->GetLanguage() ),
554 nPos - nBreakStart, rInf.GetHyphValues() );
555 return xHyphWord.is() && xHyphWord->isAlternativeSpelling();