1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: _XBreakIterator.java,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
33 import java
.util
.Vector
;
35 import lib
.MultiMethodTest
;
37 import lib
.StatusException
;
39 import com
.sun
.star
.i18n
.Boundary
;
40 import com
.sun
.star
.i18n
.LineBreakHyphenationOptions
;
41 import com
.sun
.star
.i18n
.LineBreakResults
;
42 import com
.sun
.star
.i18n
.LineBreakUserOptions
;
43 import com
.sun
.star
.i18n
.ScriptType
;
44 import com
.sun
.star
.i18n
.WordType
;
45 import com
.sun
.star
.i18n
.XBreakIterator
;
46 import com
.sun
.star
.lang
.Locale
;
49 * Testing <code>com.sun.star.i18n.XBreakIterator</code>
52 * <li><code> nextCharacters()</code></li>
53 * <li><code> previousCharacters()</code></li>
54 * <li><code> nextWord()</code></li>
55 * <li><code> previousWord()</code></li>
56 * <li><code> getWordBoundary()</code></li>
57 * <li><code> getWordType()</code></li>
58 * <li><code> isBeginWord()</code></li>
59 * <li><code> isEndWord()</code></li>
60 * <li><code> beginOfSentence()</code></li>
61 * <li><code> endOfSentence()</code></li>
62 * <li><code> getLineBreak()</code></li>
63 * <li><code> beginOfScript()</code></li>
64 * <li><code> endOfScript()</code></li>
65 * <li><code> nextScript()</code></li>
66 * <li><code> previousScript()</code></li>
67 * <li><code> getScriptType()</code></li>
68 * <li><code> beginOfCharBlock()</code></li>
69 * <li><code> endOfCharBlock()</code></li>
70 * <li><code> nextCharBlock()</code></li>
71 * <li><code> previousCharBlock()</code></li>
73 * This test needs the following object relations :
75 * <li> <code>'Locale'</code>
76 * (of type <code>com.sun.star.lang.Locale</code>):
77 * this locale is used as locale argument for tested methods.
79 * <li> <code>'UnicodeString'</code>
80 * (of type <code>String</code>): Unicode string which is passed
81 * to methods except 'CharacterBlock' methods.
84 * @see com.sun.star.i18n.XBreakIterator
86 public class _XBreakIterator
extends MultiMethodTest
{
88 public XBreakIterator oObj
= null;
91 String UnicodeString
= null;
93 short wordType
= WordType
.ANYWORD_IGNOREWHITESPACES
;
96 * Retrieves object relations.
97 * @throws StatusException If one of relations not found.
99 protected void before() {
100 locale
= (Locale
)tEnv
.getObjRelation("Locale");
101 if (locale
== null) {
102 throw new StatusException
103 (Status
.failed("Relation 'Locale' not found")) ;
106 UnicodeString
= (String
)tEnv
.getObjRelation("UnicodeString");
107 if (UnicodeString
== null) {
108 throw new StatusException(Status
.failed
109 ("Relation 'UnicodeString' not found")) ;
114 * Compares returned next character positions with expected values. <p>
116 * Has <b>OK</b> status if position after travel and traveled length
117 * has expected values.
119 public void _nextCharacters() {
120 short nCharacterIteratorMode
=
121 com
.sun
.star
.i18n
.CharacterIteratorMode
.SKIPCHARACTER
;
123 int strLength
= UnicodeString
.length();
125 //Start from position : Travel ... chars :
126 // Actual position after : How many chars traveled
127 int[][] nextCharacters
= {
128 { 1, 5000, strLength
, strLength
- 1 },
133 for(int i
= 0; i
< nextCharacters
.length
; i
++) {
134 int[] lDone
= new int[1];
135 long lRes
= oObj
.nextCharacters(UnicodeString
, nextCharacters
[i
][0],
136 locale
, nCharacterIteratorMode
, nextCharacters
[i
][1], lDone
);
137 log
.println("Expected result is: lRes = " + nextCharacters
[i
][2] +
138 "; lDone = " + nextCharacters
[i
][3] );
139 log
.println("Actual result is: lRes = " + lRes
+
140 "; lDone = " + lDone
[0] );
142 bRes
= bRes
&& lRes
== nextCharacters
[i
][2];
143 bRes
= bRes
&& lDone
[0] == nextCharacters
[i
][3];
146 tRes
.tested("nextCharacters()", bRes
);
150 * Compares returned previous character positions with expected values. <p>
152 * Has <b>OK</b> status if position after travel and traveled length
153 * has expected values.
155 public void _previousCharacters() {
156 short nCharacterIteratorMode
=
157 com
.sun
.star
.i18n
.CharacterIteratorMode
.SKIPCHARACTER
;
160 //Start from position : Travel ... chars : Actual position after :
161 //How many chars traveled
162 int[][] previousCharacters
= {
167 for(int i
= 0; i
< previousCharacters
.length
; i
++) {
168 int[] lDone
= new int[1];
169 int lRes
= oObj
.previousCharacters(UnicodeString
,
170 previousCharacters
[i
][0],
171 locale
, nCharacterIteratorMode
,
172 previousCharacters
[i
][1], lDone
);
173 log
.println("Expected result is: lRes = " + previousCharacters
[i
][2]
174 + "; lDone = " + previousCharacters
[i
][3] );
175 log
.println("Actual result is: lRes = " + lRes
176 + "; lDone = " + lDone
[0]);
178 bRes
= bRes
&& lRes
== previousCharacters
[i
][2];
179 bRes
= bRes
&& lDone
[0] == previousCharacters
[i
][3];
182 tRes
.tested("previousCharacters()", bRes
);
185 Vector vBounds
= new Vector();
188 * Saves bounds of all returned words for the future tests. <p>
189 * Has <b>OK</b> status.
191 public void _nextWord() {
194 while( i
< UnicodeString
.length() - 1 ) {
195 Boundary bounds
= oObj
.nextWord
196 (UnicodeString
, i
, locale
, wordType
);
197 if (bounds
.endPos
- bounds
.startPos
> 3) {
198 vBounds
.add( bounds
);
199 log
.println("Word " + vBounds
.size() + "("
200 + bounds
.startPos
+ "," + bounds
.endPos
+ "): '" +
201 UnicodeString
.substring(bounds
.startPos
,
202 bounds
.endPos
) + "'");
204 i
= bounds
.endPos
- 1;
206 log
.println("In text there are " + vBounds
.size()
207 + " words, if count from left to right");
208 tRes
.tested("nextWord()", true);
212 * Compares number of word bounds with number of word bounds saved
213 * by the method _nextWord().<p>
214 * Has <b>OK</b> status if number of word bounds are equal.
216 public void _previousWord() {
217 requiredMethod("nextWord()");
219 int i
= UnicodeString
.length() - 1;
220 Vector vPrevBounds
= new Vector();
223 oObj
.previousWord(UnicodeString
, i
, locale
, wordType
);
224 if (bounds
.endPos
- bounds
.startPos
> 3) {
225 vPrevBounds
.add( bounds
);
226 log
.println("Word " + vPrevBounds
.size() + "("
227 + bounds
.startPos
+ "," + bounds
.endPos
+ "): '"
228 + UnicodeString
.substring(bounds
.startPos
, bounds
.endPos
)
233 log
.println("In text there are " + vPrevBounds
.size()
234 + " words, if count from right to left");
235 tRes
.tested("previousWord()", vPrevBounds
.size() == vBounds
.size() );
239 * For every word in array obtained by <code>nextWord</code> method test
240 * computes bounds of the word, passing its internal character position.<p>
242 * Has <b>OK</b> status if bounds calculated by <code>getWordBoundary()</code>
243 * method are the same as bounds obtained by <code>nextWord</code> method.
245 public void _getWordBoundary() {
246 requiredMethod("nextWord()");
250 for(int i
= 0; i
< vBounds
.size(); i
++) {
251 // calculate middle of the word
252 Boundary iBounds
= (Boundary
)vBounds
.get(i
);
253 int iPos
= (iBounds
.endPos
- iBounds
.startPos
) / 2
255 Boundary bounds
= oObj
.getWordBoundary(UnicodeString
, iPos
,
256 locale
, wordType
, true);
257 log
.println("Expected result is: startPos = " + iBounds
.startPos
+
258 "; endPos = " + iBounds
.endPos
);
259 log
.println("Actual result is: startPos = " + bounds
.startPos
260 + "; endPos = " + bounds
.endPos
+ " Word is: '"
261 + UnicodeString
.substring(bounds
.startPos
, bounds
.endPos
) + "'");
263 bRes
= bRes
&& iBounds
.startPos
== bounds
.startPos
;
264 bRes
= bRes
&& iBounds
.endPos
== bounds
.endPos
;
267 tRes
.tested("getWordBoundary()", bRes
);
271 * For every word in array obtained by <code>nextWord</code> method test
272 * get its type, passing its internal character position.<p>
274 * Has <b>OK</b> status if every word has type <code>WordType.ANY_WORD</code>
276 public void _getWordType() {
277 requiredMethod("nextWord()");
281 for(int i
= 0; i
< vBounds
.size(); i
++) {
282 // calculate middle of the word
283 Boundary iBounds
= (Boundary
)vBounds
.get(i
);
284 int iPos
= (iBounds
.endPos
- iBounds
.startPos
) / 2
287 short type
= oObj
.getWordType(UnicodeString
, iPos
, locale
);
289 bRes
= bRes
&& type
== WordType
.ANY_WORD
;
292 tRes
.tested("getWordType()", bRes
);
296 * For every word in array obtained by <code>nextWord</code> method test
297 * tries to determine if the character at a position starts a word.
298 * First word starting position is passed, then internal character
299 * position is passed. <p>
300 * Has <b>OK</b> status if in the first case <code>true</code>
301 * returned and in the second - <code>false</code> for every word.
303 public void _isBeginWord() {
304 requiredMethod("nextWord()");
308 for(int i
= 0; i
< vBounds
.size(); i
++) {
309 Boundary iBounds
= (Boundary
)vBounds
.get(i
);
310 boolean isBegin
= oObj
.isBeginWord(UnicodeString
, iBounds
.startPos
,
311 locale
, WordType
.ANY_WORD
);
312 bRes
= bRes
&& isBegin
;
313 boolean isNotBegin
= !oObj
.isBeginWord(UnicodeString
,
314 iBounds
.startPos
+ 1, locale
, WordType
.ANY_WORD
);
315 bRes
= bRes
&& isNotBegin
;
317 log
.println("At position + " + iBounds
.startPos
318 + " isBeginWord? " + isBegin
);
319 log
.println("At position + " + (iBounds
.startPos
+ 1)
320 + " isBeginWord? " + !isNotBegin
);
323 tRes
.tested("isBeginWord()", bRes
);
327 * For every word in array obtained by <code>nextWord</code> method test
328 * tries to determine if the character at a position ends a word.
329 * First word ending position is passed, then internal character
330 * position is passed. <p>
332 * Has <b>OK</b> status if in the first case <code>true</code>
333 * returned and in the second - <code>false</code> for every word.
335 public void _isEndWord() {
336 requiredMethod("nextWord()");
340 for(int i
= 0; i
< vBounds
.size(); i
++) {
341 Boundary iBounds
= (Boundary
)vBounds
.get(i
);
342 boolean isEnd
= oObj
.isEndWord(UnicodeString
, iBounds
.endPos
,
343 locale
, WordType
.ANY_WORD
);
344 bRes
= bRes
&& isEnd
;
345 boolean isNotEnd
= !oObj
.isEndWord(UnicodeString
,
346 iBounds
.endPos
- 1, locale
, WordType
.ANY_WORD
);
347 bRes
= bRes
&& isNotEnd
;
349 log
.println("At position + " + iBounds
.endPos
350 + " isEndWord? " + isEnd
);
351 log
.println("At position + " + (iBounds
.endPos
- 1)
352 + " isEndWord? " + !isNotEnd
);
355 tRes
.tested("isEndWord()", bRes
);
358 Vector vSentenceStart
= new Vector();
360 * Tries to find all sentences starting positions passing every character
361 * as position parameter and stores them. Then tries to pass invalid
362 * position parameters.
364 * Has <b>OK</b> status if -1 is returned for wrong position arguments.
366 public void _beginOfSentence() {
368 while( iPos
< UnicodeString
.length() ) {
369 Integer start
= new Integer( oObj
.beginOfSentence(UnicodeString
,
371 if (start
.intValue() >= 0 && !vSentenceStart
.contains(start
) ) {
372 vSentenceStart
.add( start
);
373 log
.println("Sentence " + vSentenceStart
.size()
374 + " : start from position " + start
);
379 //test for invalid nStartPosition
380 boolean bRes
= oObj
.beginOfSentence(UnicodeString
, -10, locale
) == -1;
381 bRes
&= oObj
.beginOfSentence(UnicodeString
,
382 UnicodeString
.length() + 1, locale
) == -1;
385 log
.println("When invalid position, returned value isn't equal to -1");
388 tRes
.tested("beginOfSentence()", bRes
);
392 * For every sentence starting position found in
393 * <code>beginOfSentence()</code> test tries to compute end
394 * position of a sentence and checks that the end position is
395 * greater than starting.
396 * Then wrong position arguments are passed.
398 * Has <b>OK</b> status if the end position of every sentence
399 * greater than starting and -1 returned for invalid arguments.
401 public void _endOfSentence() {
403 for(int i
= 0; i
< vSentenceStart
.size(); i
++) {
404 int start
= ((Integer
)vSentenceStart
.get(i
)).intValue();
405 int end
= oObj
.endOfSentence(UnicodeString
, start
, locale
);
407 log
.println("Sentence " + i
+ " range is [" + start
+ ", "
411 //test for invalid nStartPosition
412 boolean bInvRes
= oObj
.endOfSentence(UnicodeString
, -10, locale
) == -1;
413 bInvRes
&= oObj
.endOfSentence(UnicodeString
,
414 UnicodeString
.length() + 1, locale
) == -1;
417 log
.println("When invalid position, returned value isn't equal to -1");
420 tRes
.tested("endOfSentence()", bRes
&& bInvRes
);
424 * Tries to break a string in position other than 0 iterating characters
425 * from the string beginning (Hyphenation is not used for a while). <p>
427 * Has <b>OK</b> status if non-zero break position was found and it is
428 * less or equal than position we trying to break.
430 public void _getLineBreak() {
432 LineBreakResults lineBreakResults
;
433 LineBreakHyphenationOptions lineBreakHyphenationOptions
=
434 new LineBreakHyphenationOptions();
435 LineBreakUserOptions lineBreakUserOptions
= new LineBreakUserOptions();
437 lineBreakUserOptions
.applyForbiddenRules
= false;
438 lineBreakUserOptions
.allowHyphenateEnglish
= false;
443 while(breakPos
== 0 && pos
< UnicodeString
.length() ) {
444 lineBreakResults
= oObj
.getLineBreak(UnicodeString
, pos
,
445 locale
, 0, lineBreakHyphenationOptions
, lineBreakUserOptions
);
446 breakPos
= lineBreakResults
.breakIndex
;
450 // finally the position of break must be found in the middle and
451 // it must be before the break position specified
452 bRes
= breakPos
<= pos
&& breakPos
> 0;
455 log
.println("The last position was: " + pos
456 + ", and the break position was: " + breakPos
);
459 tRes
.tested("getLineBreak()", bRes
);
463 private static String katakana
= new String(new char[] {0x30A1, 0x30A2}) ;
465 private static String arrows
= new String(new char[] {0x2190, 0x2191}) ;
466 // Complex type script
467 private static String arabic
= new String(new char[] {0x0641, 0x0642}) ;
470 * Tries to find the begining of the nearest script specified
471 * relatively to position passed. <p>
472 * Has <b>OK</b> status if the starting position of script is returned.
474 public void _beginOfScript() {
475 String multiScript
= "ab" + katakana
;
477 int pos
= oObj
.beginOfScript(multiScript
, 3, ScriptType
.ASIAN
) ;
479 log
.println("Position = " + pos
) ;
481 tRes
.tested("beginOfScript()", pos
== 2) ;
485 * Tries to find the end of the nearest script specified
486 * relatively to position passed. <p>
487 * Has <b>OK</b> status if the end position of script is returned.
489 public void _endOfScript() {
490 String multiScript
= "ab" + katakana
+ "cd" ;
492 int pos
= oObj
.endOfScript(multiScript
, 2, ScriptType
.ASIAN
) ;
494 log
.println("Position = " + pos
) ;
496 tRes
.tested("endOfScript()", pos
== 4) ;
500 * Tries to find the next script starting position specified
501 * relatively to position passed. <p>
502 * Has <b>OK</b> status if the appropriate position is returned.
504 public void _nextScript() {
505 String multiScript
= "ab" + katakana
+ "cd" ;
507 int pos
= oObj
.nextScript(multiScript
, 0, ScriptType
.LATIN
) ;
509 log
.println("Position = " + pos
) ;
511 tRes
.tested("nextScript()", pos
== 4) ;
515 * Tries to find the previous script starting position specified
516 * relatively to position passed. <p>
517 * Has <b>OK</b> status if the appropriate position is returned.
519 public void _previousScript() {
520 String multiScript
= "ab" + katakana
+ "cd" ;
522 int pos
= oObj
.previousScript(multiScript
, 5, ScriptType
.ASIAN
) ;
524 log
.println("Position = " + pos
) ;
526 tRes
.tested("previousScript()", pos
== 2) ;
530 * Tries to determine script type (of all four types). <p>
531 * Has <b>OK</b> status if <code>LATIN</code> type returned
532 * for ACSII character, <code>ASIAN</code> for Katakana Unicode
533 * codepoints, <code>COMPLEX</code> for Arabic Unicode
534 * codepoints and <code>WEAK</code> for codepoints from Arrows
537 public void _getScriptType() {
540 res
&= oObj
.getScriptType("abcd", 0) == ScriptType
.LATIN
;
541 res
&= oObj
.getScriptType(katakana
, 0) == ScriptType
.ASIAN
;
542 res
&= oObj
.getScriptType(arabic
, 0) == ScriptType
.COMPLEX
;
543 res
&= oObj
.getScriptType(arrows
, 0) == ScriptType
.WEAK
;
545 tRes
.tested("getScriptType()", res
) ;
548 boolean bCharBlockRes
= true;
550 protected short getCharBlockType(int pos
) {
554 if (oObj
.beginOfCharBlock(UnicodeString
, pos
, locale
, i
) != -1) {
564 Vector vCharBlockBounds
= new Vector();
565 Vector vCharBlockTypes
= new Vector();
568 * Creates array of all char blocks with their boundaries and
569 * types using <code>beginOfCharBlock()</code> and
570 * <code>endOfCharBlock()</code> methods. <p>
572 * Has <b>OK</b> status if the end of each boundary is the same
573 * as start of the next one and if the start of the first block
574 * has position 0 and the end of the last block is at the end
575 * of the whole string.
577 public void _beginOfCharBlock() {
580 while( iPos
< UnicodeString
.length() && iPos
> -1) {
581 short charType
= getCharBlockType(iPos
);
582 int startPos
= oObj
.beginOfCharBlock(UnicodeString
, iPos
,
584 int endPos
= oObj
.endOfCharBlock(UnicodeString
, iPos
,
587 vCharBlockBounds
.add(new Boundary(startPos
, endPos
));
588 log
.println("" + vCharBlockBounds
.size() + "). Bounds: ["
589 + startPos
+ "," + endPos
+ "]; Type = " + charType
);
590 vCharBlockTypes
.add(new Short(charType
));
593 for(int i
= 0; i
< vCharBlockBounds
.size() - 1; i
++) {
594 int endPos
= ((Boundary
)vCharBlockBounds
.get(i
)).endPos
;
595 int startPos
= ((Boundary
)vCharBlockBounds
.get(i
+ 1)).startPos
;
596 bCharBlockRes
&= endPos
== startPos
;
599 log
.println("Testing for no intersections : " + bCharBlockRes
);
600 int startPos
= ((Boundary
)vCharBlockBounds
.get(0)).startPos
;
601 bCharBlockRes
&= startPos
== 0;
602 int endPos
= ((Boundary
)vCharBlockBounds
.get
603 (vCharBlockBounds
.size() - 1)).endPos
;
604 bCharBlockRes
&= endPos
== UnicodeString
.length();
605 log
.println("Regions should starts with 0 and ends with "
606 + UnicodeString
.length());
608 tRes
.tested("beginOfCharBlock()", bCharBlockRes
);
612 * Testing of this method is performed in <code>beginOfCharBlock()</code>
615 * Has the status same as <code>beginOfCharBlock()</code> method status.
617 public void _endOfCharBlock() {
618 requiredMethod("beginOfCharBlock()");
619 tRes
.tested("endOfCharBlock()", bCharBlockRes
);
623 * For every character block obtained in <code>beginOfCharBlock()</code>
624 * method test (except the first) tries to find its starting position
625 * by mean of <code>nextCharBlock()</code> method passing as position
626 * argument the position before the start of a block. <p>
628 * Has <b>OK</b> status if the start of every block was found and it's
629 * equal to this block boundary start.
631 public void _nextCharBlock() {
632 requiredMethod("beginOfCharBlock()");
635 for(int i
= 0; i
< vCharBlockBounds
.size(); i
++) {
636 Boundary bounds
= (Boundary
)vCharBlockBounds
.get(i
);
637 Short type
= (Short
)vCharBlockTypes
.get(i
);
638 if (bounds
.startPos
- 1 < 0) continue;
639 int iPos
= oObj
.nextCharBlock(UnicodeString
, bounds
.startPos
- 1,
640 locale
, type
.shortValue());
641 if (iPos
!= bounds
.startPos
) {
643 log
.println("nextCharBlock(UnicodeString, "
644 + (bounds
.startPos
- 1) + ", locale, " + type
645 + ") should return " + bounds
.startPos
);
646 log
.println("... and actual value is " + iPos
);
650 tRes
.tested("nextCharBlock()", bRes
);
654 * For every character block obtained in <code>beginOfCharBlock()</code>
655 * method test (except the first) tries to find its starting position
656 * by mean of <code>previousCharBlock()</code> method passing as position
657 * argument the position after the end of a block. <p>
659 * Has <b>OK</b> status if the start of every block was found and it's
660 * equal to this block boundary start.
662 public void _previousCharBlock() {
663 requiredMethod("beginOfCharBlock()");
666 for(int i
= 0; i
< vCharBlockBounds
.size(); i
++) {
667 Boundary bounds
= (Boundary
)vCharBlockBounds
.get(i
);
668 Short type
= (Short
)vCharBlockTypes
.get(i
);
669 int iPos
= oObj
.previousCharBlock(UnicodeString
,
670 bounds
.endPos
+ 1, locale
, type
.shortValue());
671 if (iPos
!= bounds
.startPos
) {
673 log
.println("previousCharBlock(UnicodeString, "
674 + (bounds
.endPos
+ 1) + ", locale, " + type
675 + ") should return " + bounds
.startPos
);
676 log
.println("... and actual value is " + iPos
);
680 tRes
.tested("previousCharBlock()", bRes
);