2 * This file is part of the LibreOffice project.
4 * This Source Code Form is subject to the terms of the Mozilla Public
5 * License, v. 2.0. If a copy of the MPL was not distributed with this
6 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 * This file incorporates work covered by the following license notice:
10 * Licensed to the Apache Software Foundation (ASF) under one or more
11 * contributor license agreements. See the NOTICE file distributed
12 * with this work for additional information regarding copyright
13 * ownership. The ASF licenses this file to you under the Apache
14 * License, Version 2.0 (the "License"); you may not use this file
15 * except in compliance with the License. You may obtain a copy of
16 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
21 import java
.util
.ArrayList
;
23 import lib
.MultiMethodTest
;
25 import lib
.StatusException
;
27 import com
.sun
.star
.i18n
.Boundary
;
28 import com
.sun
.star
.i18n
.LineBreakHyphenationOptions
;
29 import com
.sun
.star
.i18n
.LineBreakResults
;
30 import com
.sun
.star
.i18n
.LineBreakUserOptions
;
31 import com
.sun
.star
.i18n
.ScriptType
;
32 import com
.sun
.star
.i18n
.WordType
;
33 import com
.sun
.star
.i18n
.XBreakIterator
;
34 import com
.sun
.star
.lang
.Locale
;
37 * Testing <code>com.sun.star.i18n.XBreakIterator</code>
40 * <li><code> nextCharacters()</code></li>
41 * <li><code> previousCharacters()</code></li>
42 * <li><code> nextWord()</code></li>
43 * <li><code> previousWord()</code></li>
44 * <li><code> getWordBoundary()</code></li>
45 * <li><code> getWordType()</code></li>
46 * <li><code> isBeginWord()</code></li>
47 * <li><code> isEndWord()</code></li>
48 * <li><code> beginOfSentence()</code></li>
49 * <li><code> endOfSentence()</code></li>
50 * <li><code> getLineBreak()</code></li>
51 * <li><code> beginOfScript()</code></li>
52 * <li><code> endOfScript()</code></li>
53 * <li><code> nextScript()</code></li>
54 * <li><code> previousScript()</code></li>
55 * <li><code> getScriptType()</code></li>
56 * <li><code> beginOfCharBlock()</code></li>
57 * <li><code> endOfCharBlock()</code></li>
58 * <li><code> nextCharBlock()</code></li>
59 * <li><code> previousCharBlock()</code></li>
61 * This test needs the following object relations :
63 * <li> <code>'Locale'</code>
64 * (of type <code>com.sun.star.lang.Locale</code>):
65 * this locale is used as locale argument for tested methods.
67 * <li> <code>'UnicodeString'</code>
68 * (of type <code>String</code>): Unicode string which is passed
69 * to methods except 'CharacterBlock' methods.
72 * @see com.sun.star.i18n.XBreakIterator
74 public class _XBreakIterator
extends MultiMethodTest
{
76 public XBreakIterator oObj
= null;
79 String UnicodeString
= null;
81 short wordType
= WordType
.ANYWORD_IGNOREWHITESPACES
;
84 * Retrieves object relations.
85 * @throws StatusException If one of relations not found.
87 protected void before() {
88 locale
= (Locale
)tEnv
.getObjRelation("Locale");
90 throw new StatusException
91 (Status
.failed("Relation 'Locale' not found")) ;
94 UnicodeString
= (String
)tEnv
.getObjRelation("UnicodeString");
95 if (UnicodeString
== null) {
96 throw new StatusException(Status
.failed
97 ("Relation 'UnicodeString' not found")) ;
102 * Compares returned next character positions with expected values. <p>
104 * Has <b>OK</b> status if position after travel and traveled length
105 * has expected values.
107 public void _nextCharacters() {
108 short nCharacterIteratorMode
=
109 com
.sun
.star
.i18n
.CharacterIteratorMode
.SKIPCHARACTER
;
111 int strLength
= UnicodeString
.length();
113 //Start from position : Travel ... chars :
114 // Actual position after : How many chars traveled
115 int[][] nextCharacters
= {
116 { 1, 5000, strLength
, strLength
- 1 },
121 for(int i
= 0; i
< nextCharacters
.length
; i
++) {
122 int[] lDone
= new int[1];
123 long lRes
= oObj
.nextCharacters(UnicodeString
, nextCharacters
[i
][0],
124 locale
, nCharacterIteratorMode
, nextCharacters
[i
][1], lDone
);
125 log
.println("Expected result is: lRes = " + nextCharacters
[i
][2] +
126 "; lDone = " + nextCharacters
[i
][3] );
127 log
.println("Actual result is: lRes = " + lRes
+
128 "; lDone = " + lDone
[0] );
130 bRes
= bRes
&& lRes
== nextCharacters
[i
][2];
131 bRes
= bRes
&& lDone
[0] == nextCharacters
[i
][3];
134 tRes
.tested("nextCharacters()", bRes
);
138 * Compares returned previous character positions with expected values. <p>
140 * Has <b>OK</b> status if position after travel and traveled length
141 * has expected values.
143 public void _previousCharacters() {
144 short nCharacterIteratorMode
=
145 com
.sun
.star
.i18n
.CharacterIteratorMode
.SKIPCHARACTER
;
148 //Start from position : Travel ... chars : Actual position after :
149 //How many chars traveled
150 int[][] previousCharacters
= {
155 for(int i
= 0; i
< previousCharacters
.length
; i
++) {
156 int[] lDone
= new int[1];
157 int lRes
= oObj
.previousCharacters(UnicodeString
,
158 previousCharacters
[i
][0],
159 locale
, nCharacterIteratorMode
,
160 previousCharacters
[i
][1], lDone
);
161 log
.println("Expected result is: lRes = " + previousCharacters
[i
][2]
162 + "; lDone = " + previousCharacters
[i
][3] );
163 log
.println("Actual result is: lRes = " + lRes
164 + "; lDone = " + lDone
[0]);
166 bRes
= bRes
&& lRes
== previousCharacters
[i
][2];
167 bRes
= bRes
&& lDone
[0] == previousCharacters
[i
][3];
170 tRes
.tested("previousCharacters()", bRes
);
173 ArrayList
<Boundary
> vBounds
= new ArrayList
<Boundary
>();
176 * Saves bounds of all returned words for the future tests. <p>
177 * Has <b>OK</b> status.
179 public void _nextWord() {
182 while( i
< UnicodeString
.length() - 1 ) {
183 Boundary bounds
= oObj
.nextWord
184 (UnicodeString
, i
, locale
, wordType
);
185 if (bounds
.endPos
- bounds
.startPos
> 3) {
186 vBounds
.add( bounds
);
187 log
.println("Word " + vBounds
.size() + "("
188 + bounds
.startPos
+ "," + bounds
.endPos
+ "): '" +
189 UnicodeString
.substring(bounds
.startPos
,
190 bounds
.endPos
) + "'");
192 i
= bounds
.endPos
- 1;
194 log
.println("In text there are " + vBounds
.size()
195 + " words, if count from left to right");
196 tRes
.tested("nextWord()", true);
200 * Compares number of word bounds with number of word bounds saved
201 * by the method _nextWord().<p>
202 * Has <b>OK</b> status if number of word bounds are equal.
204 public void _previousWord() {
205 requiredMethod("nextWord()");
207 int i
= UnicodeString
.length() - 1;
208 ArrayList
<Boundary
> vPrevBounds
= new ArrayList
<Boundary
>();
211 oObj
.previousWord(UnicodeString
, i
, locale
, wordType
);
212 if (bounds
.endPos
- bounds
.startPos
> 3) {
213 vPrevBounds
.add( bounds
);
214 log
.println("Word " + vPrevBounds
.size() + "("
215 + bounds
.startPos
+ "," + bounds
.endPos
+ "): '"
216 + UnicodeString
.substring(bounds
.startPos
, bounds
.endPos
)
221 log
.println("In text there are " + vPrevBounds
.size()
222 + " words, if count from right to left");
223 tRes
.tested("previousWord()", vPrevBounds
.size() == vBounds
.size() );
227 * For every word in array obtained by <code>nextWord</code> method test
228 * computes bounds of the word, passing its internal character position.<p>
230 * Has <b>OK</b> status if bounds calculated by <code>getWordBoundary()</code>
231 * method are the same as bounds obtained by <code>nextWord</code> method.
233 public void _getWordBoundary() {
234 requiredMethod("nextWord()");
238 for(int i
= 0; i
< vBounds
.size(); i
++) {
239 // calculate middle of the word
240 Boundary iBounds
= vBounds
.get(i
);
241 int iPos
= (iBounds
.endPos
- iBounds
.startPos
) / 2
243 Boundary bounds
= oObj
.getWordBoundary(UnicodeString
, iPos
,
244 locale
, wordType
, true);
245 log
.println("Expected result is: startPos = " + iBounds
.startPos
+
246 "; endPos = " + iBounds
.endPos
);
247 log
.println("Actual result is: startPos = " + bounds
.startPos
248 + "; endPos = " + bounds
.endPos
+ " Word is: '"
249 + UnicodeString
.substring(bounds
.startPos
, bounds
.endPos
) + "'");
251 bRes
= bRes
&& iBounds
.startPos
== bounds
.startPos
;
252 bRes
= bRes
&& iBounds
.endPos
== bounds
.endPos
;
255 tRes
.tested("getWordBoundary()", bRes
);
259 * For every word in array obtained by <code>nextWord</code> method test
260 * get its type, passing its internal character position.<p>
262 * Has <b>OK</b> status if every word has type <code>WordType.ANY_WORD</code>
264 public void _getWordType() {
265 requiredMethod("nextWord()");
269 for(int i
= 0; i
< vBounds
.size(); i
++) {
270 // calculate middle of the word
271 Boundary iBounds
= vBounds
.get(i
);
272 int iPos
= (iBounds
.endPos
- iBounds
.startPos
) / 2
275 short type
= oObj
.getWordType(UnicodeString
, iPos
, locale
);
277 bRes
= bRes
&& type
== WordType
.ANY_WORD
;
280 tRes
.tested("getWordType()", bRes
);
284 * For every word in array obtained by <code>nextWord</code> method test
285 * tries to determine if the character at a position starts a word.
286 * First word starting position is passed, then internal character
287 * position is passed. <p>
288 * Has <b>OK</b> status if in the first case <code>true</code>
289 * returned and in the second - <code>false</code> for every word.
291 public void _isBeginWord() {
292 requiredMethod("nextWord()");
296 for(int i
= 0; i
< vBounds
.size(); i
++) {
297 Boundary iBounds
= vBounds
.get(i
);
298 boolean isBegin
= oObj
.isBeginWord(UnicodeString
, iBounds
.startPos
,
299 locale
, WordType
.ANY_WORD
);
300 bRes
= bRes
&& isBegin
;
301 boolean isNotBegin
= !oObj
.isBeginWord(UnicodeString
,
302 iBounds
.startPos
+ 1, locale
, WordType
.ANY_WORD
);
303 bRes
= bRes
&& isNotBegin
;
305 log
.println("At position + " + iBounds
.startPos
306 + " isBeginWord? " + isBegin
);
307 log
.println("At position + " + (iBounds
.startPos
+ 1)
308 + " isBeginWord? " + !isNotBegin
);
311 tRes
.tested("isBeginWord()", bRes
);
315 * For every word in array obtained by <code>nextWord</code> method test
316 * tries to determine if the character at a position ends a word.
317 * First word ending position is passed, then internal character
318 * position is passed. <p>
320 * Has <b>OK</b> status if in the first case <code>true</code>
321 * returned and in the second - <code>false</code> for every word.
323 public void _isEndWord() {
324 requiredMethod("nextWord()");
328 for(int i
= 0; i
< vBounds
.size(); i
++) {
329 Boundary iBounds
= vBounds
.get(i
);
330 boolean isEnd
= oObj
.isEndWord(UnicodeString
, iBounds
.endPos
,
331 locale
, WordType
.ANY_WORD
);
332 bRes
= bRes
&& isEnd
;
333 boolean isNotEnd
= !oObj
.isEndWord(UnicodeString
,
334 iBounds
.endPos
- 1, locale
, WordType
.ANY_WORD
);
335 bRes
= bRes
&& isNotEnd
;
337 log
.println("At position + " + iBounds
.endPos
338 + " isEndWord? " + isEnd
);
339 log
.println("At position + " + (iBounds
.endPos
- 1)
340 + " isEndWord? " + !isNotEnd
);
343 tRes
.tested("isEndWord()", bRes
);
346 ArrayList
<Integer
> vSentenceStart
= new ArrayList
<Integer
>();
348 * Tries to find all sentences starting positions passing every character
349 * as position parameter and stores them. Then tries to pass invalid
350 * position parameters.
352 * Has <b>OK</b> status if -1 is returned for wrong position arguments.
354 public void _beginOfSentence() {
356 while( iPos
< UnicodeString
.length() ) {
357 Integer start
= new Integer( oObj
.beginOfSentence(UnicodeString
,
359 if (start
.intValue() >= 0 && !vSentenceStart
.contains(start
) ) {
360 vSentenceStart
.add( start
);
361 log
.println("Sentence " + vSentenceStart
.size()
362 + " : start from position " + start
);
367 //test for invalid nStartPosition
368 boolean bRes
= oObj
.beginOfSentence(UnicodeString
, -10, locale
) == -1;
369 bRes
&= oObj
.beginOfSentence(UnicodeString
,
370 UnicodeString
.length() + 1, locale
) == -1;
373 log
.println("When invalid position, returned value isn't equal to -1");
376 tRes
.tested("beginOfSentence()", bRes
);
380 * For every sentence starting position found in
381 * <code>beginOfSentence()</code> test tries to compute end
382 * position of a sentence and checks that the end position is
383 * greater than starting.
384 * Then wrong position arguments are passed.
386 * Has <b>OK</b> status if the end position of every sentence
387 * greater than starting and -1 returned for invalid arguments.
389 public void _endOfSentence() {
391 for(int i
= 0; i
< vSentenceStart
.size(); i
++) {
392 int start
= vSentenceStart
.get(i
).intValue();
393 int end
= oObj
.endOfSentence(UnicodeString
, start
, locale
);
395 log
.println("Sentence " + i
+ " range is [" + start
+ ", "
399 //test for invalid nStartPosition
400 boolean bInvRes
= oObj
.endOfSentence(UnicodeString
, -10, locale
) == -1;
401 bInvRes
&= oObj
.endOfSentence(UnicodeString
,
402 UnicodeString
.length() + 1, locale
) == -1;
405 log
.println("When invalid position, returned value isn't equal to -1");
408 tRes
.tested("endOfSentence()", bRes
&& bInvRes
);
412 * Tries to break a string in position other than 0 iterating characters
413 * from the string beginning (Hyphenation is not used for a while). <p>
415 * Has <b>OK</b> status if non-zero break position was found and it is
416 * less or equal than position we trying to break.
418 public void _getLineBreak() {
420 LineBreakResults lineBreakResults
;
421 LineBreakHyphenationOptions lineBreakHyphenationOptions
=
422 new LineBreakHyphenationOptions();
423 LineBreakUserOptions lineBreakUserOptions
= new LineBreakUserOptions();
425 lineBreakUserOptions
.applyForbiddenRules
= false;
426 lineBreakUserOptions
.allowHyphenateEnglish
= false;
431 while(breakPos
== 0 && pos
< UnicodeString
.length() ) {
432 lineBreakResults
= oObj
.getLineBreak(UnicodeString
, pos
,
433 locale
, 0, lineBreakHyphenationOptions
, lineBreakUserOptions
);
434 breakPos
= lineBreakResults
.breakIndex
;
438 // finally the position of break must be found in the middle and
439 // it must be before the break position specified
440 bRes
= breakPos
<= pos
&& breakPos
> 0;
443 log
.println("The last position was: " + pos
444 + ", and the break position was: " + breakPos
);
447 tRes
.tested("getLineBreak()", bRes
);
451 private static String katakana
= new String(new char[] {0x30A1, 0x30A2}) ;
453 private static String arrows
= new String(new char[] {0x2190, 0x2191}) ;
454 // Complex type script
455 private static String arabic
= new String(new char[] {0x0641, 0x0642}) ;
458 * Tries to find the beginning of the nearest script specified
459 * relatively to position passed. <p>
460 * Has <b>OK</b> status if the starting position of script is returned.
462 public void _beginOfScript() {
463 String multiScript
= "ab" + katakana
;
465 int pos
= oObj
.beginOfScript(multiScript
, 3, ScriptType
.ASIAN
) ;
467 log
.println("Position = " + pos
) ;
469 tRes
.tested("beginOfScript()", pos
== 2) ;
473 * Tries to find the end of the nearest script specified
474 * relatively to position passed. <p>
475 * Has <b>OK</b> status if the end position of script is returned.
477 public void _endOfScript() {
478 String multiScript
= "ab" + katakana
+ "cd" ;
480 int pos
= oObj
.endOfScript(multiScript
, 2, ScriptType
.ASIAN
) ;
482 log
.println("Position = " + pos
) ;
484 tRes
.tested("endOfScript()", pos
== 4) ;
488 * Tries to find the next script starting position specified
489 * relatively to position passed. <p>
490 * Has <b>OK</b> status if the appropriate position is returned.
492 public void _nextScript() {
493 String multiScript
= "ab" + katakana
+ "cd" ;
495 int pos
= oObj
.nextScript(multiScript
, 0, ScriptType
.LATIN
) ;
497 log
.println("Position = " + pos
) ;
499 tRes
.tested("nextScript()", pos
== 4) ;
503 * Tries to find the previous script starting position specified
504 * relatively to position passed. <p>
505 * Has <b>OK</b> status if the appropriate position is returned.
507 public void _previousScript() {
508 String multiScript
= "ab" + katakana
+ "cd" ;
510 int pos
= oObj
.previousScript(multiScript
, 5, ScriptType
.ASIAN
) ;
512 log
.println("Position = " + pos
) ;
514 tRes
.tested("previousScript()", pos
== 2) ;
518 * Tries to determine script type (of all four types). <p>
519 * Has <b>OK</b> status if <code>LATIN</code> type returned
520 * for ACSII character, <code>ASIAN</code> for Katakana Unicode
521 * codepoints, <code>COMPLEX</code> for Arabic Unicode
522 * codepoints and <code>WEAK</code> for codepoints from Arrows
525 public void _getScriptType() {
528 res
&= oObj
.getScriptType("abcd", 0) == ScriptType
.LATIN
;
529 res
&= oObj
.getScriptType(katakana
, 0) == ScriptType
.ASIAN
;
530 res
&= oObj
.getScriptType(arabic
, 0) == ScriptType
.COMPLEX
;
531 res
&= oObj
.getScriptType(arrows
, 0) == ScriptType
.WEAK
;
533 tRes
.tested("getScriptType()", res
) ;
536 boolean bCharBlockRes
= true;
538 protected short getCharBlockType(int pos
) {
542 if (oObj
.beginOfCharBlock(UnicodeString
, pos
, locale
, i
) != -1) {
552 ArrayList
<Boundary
> vCharBlockBounds
= new ArrayList
<Boundary
>();
553 ArrayList
<Short
> vCharBlockTypes
= new ArrayList
<Short
>();
556 * Creates array of all char blocks with their boundaries and
557 * types using <code>beginOfCharBlock()</code> and
558 * <code>endOfCharBlock()</code> methods. <p>
560 * Has <b>OK</b> status if the end of each boundary is the same
561 * as start of the next one and if the start of the first block
562 * has position 0 and the end of the last block is at the end
563 * of the whole string.
565 public void _beginOfCharBlock() {
568 while( iPos
< UnicodeString
.length() && iPos
> -1) {
569 short charType
= getCharBlockType(iPos
);
570 int startPos
= oObj
.beginOfCharBlock(UnicodeString
, iPos
,
572 int endPos
= oObj
.endOfCharBlock(UnicodeString
, iPos
,
575 vCharBlockBounds
.add(new Boundary(startPos
, endPos
));
576 log
.println("" + vCharBlockBounds
.size() + "). Bounds: ["
577 + startPos
+ "," + endPos
+ "]; Type = " + charType
);
578 vCharBlockTypes
.add(new Short(charType
));
581 for(int i
= 0; i
< vCharBlockBounds
.size() - 1; i
++) {
582 int endPos
= vCharBlockBounds
.get(i
).endPos
;
583 int startPos
= vCharBlockBounds
.get(i
+ 1).startPos
;
584 bCharBlockRes
&= endPos
== startPos
;
587 log
.println("Testing for no intersections : " + bCharBlockRes
);
588 int startPos
= vCharBlockBounds
.get(0).startPos
;
589 bCharBlockRes
&= startPos
== 0;
590 int endPos
= vCharBlockBounds
.get
591 (vCharBlockBounds
.size() - 1).endPos
;
592 bCharBlockRes
&= endPos
== UnicodeString
.length();
593 log
.println("Regions should starts with 0 and ends with "
594 + UnicodeString
.length());
596 tRes
.tested("beginOfCharBlock()", bCharBlockRes
);
600 * Testing of this method is performed in <code>beginOfCharBlock()</code>
603 * Has the status same as <code>beginOfCharBlock()</code> method status.
605 public void _endOfCharBlock() {
606 requiredMethod("beginOfCharBlock()");
607 tRes
.tested("endOfCharBlock()", bCharBlockRes
);
611 * For every character block obtained in <code>beginOfCharBlock()</code>
612 * method test (except the first) tries to find its starting position
613 * by mean of <code>nextCharBlock()</code> method passing as position
614 * argument the position before the start of a block. <p>
616 * Has <b>OK</b> status if the start of every block was found and it's
617 * equal to this block boundary start.
619 public void _nextCharBlock() {
620 requiredMethod("beginOfCharBlock()");
623 for(int i
= 0; i
< vCharBlockBounds
.size(); i
++) {
624 Boundary bounds
= vCharBlockBounds
.get(i
);
625 Short type
= vCharBlockTypes
.get(i
);
626 if (bounds
.startPos
- 1 < 0) continue;
627 int iPos
= oObj
.nextCharBlock(UnicodeString
, bounds
.startPos
- 1,
628 locale
, type
.shortValue());
629 if (iPos
!= bounds
.startPos
) {
631 log
.println("nextCharBlock(UnicodeString, "
632 + (bounds
.startPos
- 1) + ", locale, " + type
633 + ") should return " + bounds
.startPos
);
634 log
.println("... and actual value is " + iPos
);
638 tRes
.tested("nextCharBlock()", bRes
);
642 * For every character block obtained in <code>beginOfCharBlock()</code>
643 * method test (except the first) tries to find its starting position
644 * by mean of <code>previousCharBlock()</code> method passing as position
645 * argument the position after the end of a block. <p>
647 * Has <b>OK</b> status if the start of every block was found and it's
648 * equal to this block boundary start.
650 public void _previousCharBlock() {
651 requiredMethod("beginOfCharBlock()");
654 for(int i
= 0; i
< vCharBlockBounds
.size(); i
++) {
655 Boundary bounds
= vCharBlockBounds
.get(i
);
656 Short type
= vCharBlockTypes
.get(i
);
657 int iPos
= oObj
.previousCharBlock(UnicodeString
,
658 bounds
.endPos
+ 1, locale
, type
.shortValue());
659 if (iPos
!= bounds
.startPos
) {
661 log
.println("previousCharBlock(UnicodeString, "
662 + (bounds
.endPos
+ 1) + ", locale, " + type
663 + ") should return " + bounds
.startPos
);
664 log
.println("... and actual value is " + iPos
);
668 tRes
.tested("previousCharBlock()", bRes
);