2 * This file is part of the LibreOffice project.
4 * This Source Code Form is subject to the terms of the Mozilla Public
5 * License, v. 2.0. If a copy of the MPL was not distributed with this
6 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 * This file incorporates work covered by the following license notice:
10 * Licensed to the Apache Software Foundation (ASF) under one or more
11 * contributor license agreements. See the NOTICE file distributed
12 * with this work for additional information regarding copyright
13 * ownership. The ASF licenses this file to you under the Apache
14 * License, Version 2.0 (the "License"); you may not use this file
15 * except in compliance with the License. You may obtain a copy of
16 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
21 import java
.util
.ArrayList
;
23 import lib
.MultiMethodTest
;
25 import lib
.StatusException
;
27 import com
.sun
.star
.i18n
.Boundary
;
28 import com
.sun
.star
.i18n
.LineBreakHyphenationOptions
;
29 import com
.sun
.star
.i18n
.LineBreakResults
;
30 import com
.sun
.star
.i18n
.LineBreakUserOptions
;
31 import com
.sun
.star
.i18n
.ScriptType
;
32 import com
.sun
.star
.i18n
.WordType
;
33 import com
.sun
.star
.i18n
.XBreakIterator
;
34 import com
.sun
.star
.lang
.Locale
;
37 * Testing <code>com.sun.star.i18n.XBreakIterator</code>
40 * <li><code> nextCharacters()</code></li>
41 * <li><code> previousCharacters()</code></li>
42 * <li><code> nextWord()</code></li>
43 * <li><code> previousWord()</code></li>
44 * <li><code> getWordBoundary()</code></li>
45 * <li><code> getWordType()</code></li>
46 * <li><code> isBeginWord()</code></li>
47 * <li><code> isEndWord()</code></li>
48 * <li><code> beginOfSentence()</code></li>
49 * <li><code> endOfSentence()</code></li>
50 * <li><code> getLineBreak()</code></li>
51 * <li><code> beginOfScript()</code></li>
52 * <li><code> endOfScript()</code></li>
53 * <li><code> nextScript()</code></li>
54 * <li><code> previousScript()</code></li>
55 * <li><code> getScriptType()</code></li>
56 * <li><code> beginOfCharBlock()</code></li>
57 * <li><code> endOfCharBlock()</code></li>
58 * <li><code> nextCharBlock()</code></li>
59 * <li><code> previousCharBlock()</code></li>
61 * This test needs the following object relations :
63 * <li> <code>'Locale'</code>
64 * (of type <code>com.sun.star.lang.Locale</code>):
65 * this locale is used as locale argument for tested methods.
67 * <li> <code>'UnicodeString'</code>
68 * (of type <code>String</code>): Unicode string which is passed
69 * to methods except 'CharacterBlock' methods.
72 * @see com.sun.star.i18n.XBreakIterator
74 public class _XBreakIterator
extends MultiMethodTest
{
76 public XBreakIterator oObj
= null;
79 String UnicodeString
= null;
81 short wordType
= WordType
.ANYWORD_IGNOREWHITESPACES
;
84 * Retrieves object relations.
85 * @throws StatusException If one of relations not found.
88 protected void before() {
89 locale
= (Locale
)tEnv
.getObjRelation("Locale");
91 throw new StatusException
92 (Status
.failed("Relation 'Locale' not found")) ;
95 UnicodeString
= (String
)tEnv
.getObjRelation("UnicodeString");
96 if (UnicodeString
== null) {
97 throw new StatusException(Status
.failed
98 ("Relation 'UnicodeString' not found")) ;
103 * Compares returned next character positions with expected values. <p>
105 * Has <b>OK</b> status if position after travel and traveled length
106 * has expected values.
108 public void _nextCharacters() {
109 short nCharacterIteratorMode
=
110 com
.sun
.star
.i18n
.CharacterIteratorMode
.SKIPCHARACTER
;
112 int strLength
= UnicodeString
.length();
114 //Start from position : Travel ... chars :
115 // Actual position after : How many chars traveled
116 int[][] nextCharacters
= {
117 { 1, 5000, strLength
, strLength
- 1 },
122 for(int i
= 0; i
< nextCharacters
.length
; i
++) {
123 int[] lDone
= new int[1];
124 long lRes
= oObj
.nextCharacters(UnicodeString
, nextCharacters
[i
][0],
125 locale
, nCharacterIteratorMode
, nextCharacters
[i
][1], lDone
);
126 log
.println("Expected result is: lRes = " + nextCharacters
[i
][2] +
127 "; lDone = " + nextCharacters
[i
][3] );
128 log
.println("Actual result is: lRes = " + lRes
+
129 "; lDone = " + lDone
[0] );
131 bRes
= bRes
&& lRes
== nextCharacters
[i
][2];
132 bRes
= bRes
&& lDone
[0] == nextCharacters
[i
][3];
135 tRes
.tested("nextCharacters()", bRes
);
139 * Compares returned previous character positions with expected values. <p>
141 * Has <b>OK</b> status if position after travel and traveled length
142 * has expected values.
144 public void _previousCharacters() {
145 short nCharacterIteratorMode
=
146 com
.sun
.star
.i18n
.CharacterIteratorMode
.SKIPCHARACTER
;
149 //Start from position : Travel ... chars : Actual position after :
150 //How many chars traveled
151 int[][] previousCharacters
= {
156 for(int i
= 0; i
< previousCharacters
.length
; i
++) {
157 int[] lDone
= new int[1];
158 int lRes
= oObj
.previousCharacters(UnicodeString
,
159 previousCharacters
[i
][0],
160 locale
, nCharacterIteratorMode
,
161 previousCharacters
[i
][1], lDone
);
162 log
.println("Expected result is: lRes = " + previousCharacters
[i
][2]
163 + "; lDone = " + previousCharacters
[i
][3] );
164 log
.println("Actual result is: lRes = " + lRes
165 + "; lDone = " + lDone
[0]);
167 bRes
= bRes
&& lRes
== previousCharacters
[i
][2];
168 bRes
= bRes
&& lDone
[0] == previousCharacters
[i
][3];
171 tRes
.tested("previousCharacters()", bRes
);
174 ArrayList
<Boundary
> vBounds
= new ArrayList
<Boundary
>();
177 * Saves bounds of all returned words for the future tests. <p>
178 * Has <b>OK</b> status.
180 public void _nextWord() {
183 while( i
< UnicodeString
.length() - 1 ) {
184 Boundary bounds
= oObj
.nextWord
185 (UnicodeString
, i
, locale
, wordType
);
186 if (bounds
.endPos
- bounds
.startPos
> 3) {
187 vBounds
.add( bounds
);
188 log
.println("Word " + vBounds
.size() + "("
189 + bounds
.startPos
+ "," + bounds
.endPos
+ "): '" +
190 UnicodeString
.substring(bounds
.startPos
,
191 bounds
.endPos
) + "'");
193 i
= bounds
.endPos
- 1;
195 log
.println("In text there are " + vBounds
.size()
196 + " words, if count from left to right");
197 tRes
.tested("nextWord()", true);
201 * Compares number of word bounds with number of word bounds saved
202 * by the method _nextWord().<p>
203 * Has <b>OK</b> status if number of word bounds are equal.
205 public void _previousWord() {
206 requiredMethod("nextWord()");
208 int i
= UnicodeString
.length() - 1;
209 ArrayList
<Boundary
> vPrevBounds
= new ArrayList
<Boundary
>();
212 oObj
.previousWord(UnicodeString
, i
, locale
, wordType
);
213 if (bounds
.endPos
- bounds
.startPos
> 3) {
214 vPrevBounds
.add( bounds
);
215 log
.println("Word " + vPrevBounds
.size() + "("
216 + bounds
.startPos
+ "," + bounds
.endPos
+ "): '"
217 + UnicodeString
.substring(bounds
.startPos
, bounds
.endPos
)
222 log
.println("In text there are " + vPrevBounds
.size()
223 + " words, if count from right to left");
224 tRes
.tested("previousWord()", vPrevBounds
.size() == vBounds
.size() );
228 * For every word in array obtained by <code>nextWord</code> method test
229 * computes bounds of the word, passing its internal character position.<p>
231 * Has <b>OK</b> status if bounds calculated by <code>getWordBoundary()</code>
232 * method are the same as bounds obtained by <code>nextWord</code> method.
234 public void _getWordBoundary() {
235 requiredMethod("nextWord()");
239 for(int i
= 0; i
< vBounds
.size(); i
++) {
240 // calculate middle of the word
241 Boundary iBounds
= vBounds
.get(i
);
242 int iPos
= (iBounds
.endPos
- iBounds
.startPos
) / 2
244 Boundary bounds
= oObj
.getWordBoundary(UnicodeString
, iPos
,
245 locale
, wordType
, true);
246 log
.println("Expected result is: startPos = " + iBounds
.startPos
+
247 "; endPos = " + iBounds
.endPos
);
248 log
.println("Actual result is: startPos = " + bounds
.startPos
249 + "; endPos = " + bounds
.endPos
+ " Word is: '"
250 + UnicodeString
.substring(bounds
.startPos
, bounds
.endPos
) + "'");
252 bRes
= bRes
&& iBounds
.startPos
== bounds
.startPos
;
253 bRes
= bRes
&& iBounds
.endPos
== bounds
.endPos
;
256 tRes
.tested("getWordBoundary()", bRes
);
260 * For every word in array obtained by <code>nextWord</code> method test
261 * get its type, passing its internal character position.<p>
263 * Has <b>OK</b> status if every word has type <code>WordType.ANY_WORD</code>
265 public void _getWordType() {
266 requiredMethod("nextWord()");
270 for(int i
= 0; i
< vBounds
.size(); i
++) {
271 // calculate middle of the word
272 Boundary iBounds
= vBounds
.get(i
);
273 int iPos
= (iBounds
.endPos
- iBounds
.startPos
) / 2
276 short type
= oObj
.getWordType(UnicodeString
, iPos
, locale
);
278 bRes
= bRes
&& type
== WordType
.ANY_WORD
;
281 tRes
.tested("getWordType()", bRes
);
285 * For every word in array obtained by <code>nextWord</code> method test
286 * tries to determine if the character at a position starts a word.
287 * First word starting position is passed, then internal character
288 * position is passed. <p>
289 * Has <b>OK</b> status if in the first case <code>true</code>
290 * returned and in the second - <code>false</code> for every word.
292 public void _isBeginWord() {
293 requiredMethod("nextWord()");
297 for(int i
= 0; i
< vBounds
.size(); i
++) {
298 Boundary iBounds
= vBounds
.get(i
);
299 boolean isBegin
= oObj
.isBeginWord(UnicodeString
, iBounds
.startPos
,
300 locale
, WordType
.ANY_WORD
);
301 bRes
= bRes
&& isBegin
;
302 boolean isNotBegin
= !oObj
.isBeginWord(UnicodeString
,
303 iBounds
.startPos
+ 1, locale
, WordType
.ANY_WORD
);
304 bRes
= bRes
&& isNotBegin
;
306 log
.println("At position + " + iBounds
.startPos
307 + " isBeginWord? " + isBegin
);
308 log
.println("At position + " + (iBounds
.startPos
+ 1)
309 + " isBeginWord? " + !isNotBegin
);
312 tRes
.tested("isBeginWord()", bRes
);
316 * For every word in array obtained by <code>nextWord</code> method test
317 * tries to determine if the character at a position ends a word.
318 * First word ending position is passed, then internal character
319 * position is passed. <p>
321 * Has <b>OK</b> status if in the first case <code>true</code>
322 * returned and in the second - <code>false</code> for every word.
324 public void _isEndWord() {
325 requiredMethod("nextWord()");
329 for(int i
= 0; i
< vBounds
.size(); i
++) {
330 Boundary iBounds
= vBounds
.get(i
);
331 boolean isEnd
= oObj
.isEndWord(UnicodeString
, iBounds
.endPos
,
332 locale
, WordType
.ANY_WORD
);
333 bRes
= bRes
&& isEnd
;
334 boolean isNotEnd
= !oObj
.isEndWord(UnicodeString
,
335 iBounds
.endPos
- 1, locale
, WordType
.ANY_WORD
);
336 bRes
= bRes
&& isNotEnd
;
338 log
.println("At position + " + iBounds
.endPos
339 + " isEndWord? " + isEnd
);
340 log
.println("At position + " + (iBounds
.endPos
- 1)
341 + " isEndWord? " + !isNotEnd
);
344 tRes
.tested("isEndWord()", bRes
);
347 ArrayList
<Integer
> vSentenceStart
= new ArrayList
<Integer
>();
349 * Tries to find all sentences starting positions passing every character
350 * as position parameter and stores them. Then tries to pass invalid
351 * position parameters.
353 * Has <b>OK</b> status if -1 is returned for wrong position arguments.
355 public void _beginOfSentence() {
357 while( iPos
< UnicodeString
.length() ) {
358 Integer start
= Integer
.valueOf( oObj
.beginOfSentence(UnicodeString
,
360 if (start
.intValue() >= 0 && !vSentenceStart
.contains(start
) ) {
361 vSentenceStart
.add( start
);
362 log
.println("Sentence " + vSentenceStart
.size()
363 + " : start from position " + start
);
368 //test for invalid nStartPosition
369 boolean bRes
= oObj
.beginOfSentence(UnicodeString
, -10, locale
) == -1;
370 bRes
&= oObj
.beginOfSentence(UnicodeString
,
371 UnicodeString
.length() + 1, locale
) == -1;
374 log
.println("When invalid position, returned value isn't equal to -1");
377 tRes
.tested("beginOfSentence()", bRes
);
381 * For every sentence starting position found in
382 * <code>beginOfSentence()</code> test tries to compute end
383 * position of a sentence and checks that the end position is
384 * greater than starting.
385 * Then wrong position arguments are passed.
387 * Has <b>OK</b> status if the end position of every sentence
388 * greater than starting and -1 returned for invalid arguments.
390 public void _endOfSentence() {
392 for(int i
= 0; i
< vSentenceStart
.size(); i
++) {
393 int start
= vSentenceStart
.get(i
).intValue();
394 int end
= oObj
.endOfSentence(UnicodeString
, start
, locale
);
396 log
.println("Sentence " + i
+ " range is [" + start
+ ", "
400 //test for invalid nStartPosition
401 boolean bInvRes
= oObj
.endOfSentence(UnicodeString
, -10, locale
) == -1;
402 bInvRes
&= oObj
.endOfSentence(UnicodeString
,
403 UnicodeString
.length() + 1, locale
) == -1;
406 log
.println("When invalid position, returned value isn't equal to -1");
409 tRes
.tested("endOfSentence()", bRes
&& bInvRes
);
413 * Tries to break a string in position other than 0 iterating characters
414 * from the string beginning (Hyphenation is not used for a while). <p>
416 * Has <b>OK</b> status if non-zero break position was found and it is
417 * less or equal than position we trying to break.
419 public void _getLineBreak() {
421 LineBreakResults lineBreakResults
;
422 LineBreakHyphenationOptions lineBreakHyphenationOptions
=
423 new LineBreakHyphenationOptions();
424 LineBreakUserOptions lineBreakUserOptions
= new LineBreakUserOptions();
426 lineBreakUserOptions
.applyForbiddenRules
= false;
427 lineBreakUserOptions
.allowHyphenateEnglish
= false;
432 while(breakPos
== 0 && pos
< UnicodeString
.length() ) {
433 lineBreakResults
= oObj
.getLineBreak(UnicodeString
, pos
,
434 locale
, 0, lineBreakHyphenationOptions
, lineBreakUserOptions
);
435 breakPos
= lineBreakResults
.breakIndex
;
439 // finally the position of break must be found in the middle and
440 // it must be before the break position specified
441 bRes
= breakPos
<= pos
&& breakPos
> 0;
444 log
.println("The last position was: " + pos
445 + ", and the break position was: " + breakPos
);
448 tRes
.tested("getLineBreak()", bRes
);
452 private static String katakana
= new String(new char[] {0x30A1, 0x30A2}) ;
454 private static String arrows
= new String(new char[] {0x2190, 0x2191}) ;
455 // Complex type script
456 private static String arabic
= new String(new char[] {0x0641, 0x0642}) ;
459 * Tries to find the beginning of the nearest script specified
460 * relatively to position passed. <p>
461 * Has <b>OK</b> status if the starting position of script is returned.
463 public void _beginOfScript() {
464 String multiScript
= "ab" + katakana
;
466 int pos
= oObj
.beginOfScript(multiScript
, 3, ScriptType
.ASIAN
) ;
468 log
.println("Position = " + pos
) ;
470 tRes
.tested("beginOfScript()", pos
== 2) ;
474 * Tries to find the end of the nearest script specified
475 * relatively to position passed. <p>
476 * Has <b>OK</b> status if the end position of script is returned.
478 public void _endOfScript() {
479 String multiScript
= "ab" + katakana
+ "cd" ;
481 int pos
= oObj
.endOfScript(multiScript
, 2, ScriptType
.ASIAN
) ;
483 log
.println("Position = " + pos
) ;
485 tRes
.tested("endOfScript()", pos
== 4) ;
489 * Tries to find the next script starting position specified
490 * relatively to position passed. <p>
491 * Has <b>OK</b> status if the appropriate position is returned.
493 public void _nextScript() {
494 String multiScript
= "ab" + katakana
+ "cd" ;
496 int pos
= oObj
.nextScript(multiScript
, 0, ScriptType
.LATIN
) ;
498 log
.println("Position = " + pos
) ;
500 tRes
.tested("nextScript()", pos
== 4) ;
504 * Tries to find the previous script starting position specified
505 * relatively to position passed. <p>
506 * Has <b>OK</b> status if the appropriate position is returned.
508 public void _previousScript() {
509 String multiScript
= "ab" + katakana
+ "cd" ;
511 int pos
= oObj
.previousScript(multiScript
, 5, ScriptType
.ASIAN
) ;
513 log
.println("Position = " + pos
) ;
515 tRes
.tested("previousScript()", pos
== 2) ;
519 * Tries to determine script type (of all four types). <p>
520 * Has <b>OK</b> status if <code>LATIN</code> type returned
521 * for ASCII character, <code>ASIAN</code> for Katakana Unicode
522 * codepoints, <code>COMPLEX</code> for Arabic Unicode
523 * codepoints and <code>WEAK</code> for codepoints from Arrows
526 public void _getScriptType() {
529 res
&= oObj
.getScriptType("abcd", 0) == ScriptType
.LATIN
;
530 res
&= oObj
.getScriptType(katakana
, 0) == ScriptType
.ASIAN
;
531 res
&= oObj
.getScriptType(arabic
, 0) == ScriptType
.COMPLEX
;
532 res
&= oObj
.getScriptType(arrows
, 0) == ScriptType
.WEAK
;
534 tRes
.tested("getScriptType()", res
) ;
537 boolean bCharBlockRes
= true;
539 protected short getCharBlockType(int pos
) {
543 if (oObj
.beginOfCharBlock(UnicodeString
, pos
, locale
, i
) != -1) {
553 ArrayList
<Boundary
> vCharBlockBounds
= new ArrayList
<Boundary
>();
554 ArrayList
<Short
> vCharBlockTypes
= new ArrayList
<Short
>();
557 * Creates array of all char blocks with their boundaries and
558 * types using <code>beginOfCharBlock()</code> and
559 * <code>endOfCharBlock()</code> methods. <p>
561 * Has <b>OK</b> status if the end of each boundary is the same
562 * as start of the next one and if the start of the first block
563 * has position 0 and the end of the last block is at the end
564 * of the whole string.
566 public void _beginOfCharBlock() {
569 while( iPos
< UnicodeString
.length() && iPos
> -1) {
570 short charType
= getCharBlockType(iPos
);
571 int startPos
= oObj
.beginOfCharBlock(UnicodeString
, iPos
,
573 int endPos
= oObj
.endOfCharBlock(UnicodeString
, iPos
,
576 vCharBlockBounds
.add(new Boundary(startPos
, endPos
));
577 log
.println(vCharBlockBounds
.size() + "). Bounds: ["
578 + startPos
+ "," + endPos
+ "]; Type = " + charType
);
579 vCharBlockTypes
.add(Short
.valueOf(charType
));
582 for(int i
= 0; i
< vCharBlockBounds
.size() - 1; i
++) {
583 int endPos
= vCharBlockBounds
.get(i
).endPos
;
584 int startPos
= vCharBlockBounds
.get(i
+ 1).startPos
;
585 if (endPos
!= startPos
) {
586 bCharBlockRes
= false;
590 log
.println("Testing for no intersections : " + bCharBlockRes
);
591 int startPos
= vCharBlockBounds
.get(0).startPos
;
593 bCharBlockRes
= false;
595 int endPos
= vCharBlockBounds
.get
596 (vCharBlockBounds
.size() - 1).endPos
;
597 if (endPos
!= UnicodeString
.length()) {
598 bCharBlockRes
= false;
600 log
.println("Regions should starts with 0 and ends with "
601 + UnicodeString
.length());
603 tRes
.tested("beginOfCharBlock()", bCharBlockRes
);
607 * Testing of this method is performed in <code>beginOfCharBlock()</code>
610 * Has the status same as <code>beginOfCharBlock()</code> method status.
612 public void _endOfCharBlock() {
613 requiredMethod("beginOfCharBlock()");
614 tRes
.tested("endOfCharBlock()", bCharBlockRes
);
618 * For every character block obtained in <code>beginOfCharBlock()</code>
619 * method test (except the first) tries to find its starting position
620 * by mean of <code>nextCharBlock()</code> method passing as position
621 * argument the position before the start of a block. <p>
623 * Has <b>OK</b> status if the start of every block was found and it's
624 * equal to this block boundary start.
626 public void _nextCharBlock() {
627 requiredMethod("beginOfCharBlock()");
630 for(int i
= 0; i
< vCharBlockBounds
.size(); i
++) {
631 Boundary bounds
= vCharBlockBounds
.get(i
);
632 Short type
= vCharBlockTypes
.get(i
);
633 if (bounds
.startPos
- 1 < 0) continue;
634 int iPos
= oObj
.nextCharBlock(UnicodeString
, bounds
.startPos
- 1,
635 locale
, type
.shortValue());
636 if (iPos
!= bounds
.startPos
) {
638 log
.println("nextCharBlock(UnicodeString, "
639 + (bounds
.startPos
- 1) + ", locale, " + type
640 + ") should return " + bounds
.startPos
);
641 log
.println("... and actual value is " + iPos
);
645 tRes
.tested("nextCharBlock()", bRes
);
649 * For every character block obtained in <code>beginOfCharBlock()</code>
650 * method test (except the first) tries to find its starting position
651 * by mean of <code>previousCharBlock()</code> method passing as position
652 * argument the position after the end of a block. <p>
654 * Has <b>OK</b> status if the start of every block was found and it's
655 * equal to this block boundary start.
657 public void _previousCharBlock() {
658 requiredMethod("beginOfCharBlock()");
661 for(int i
= 0; i
< vCharBlockBounds
.size(); i
++) {
662 Boundary bounds
= vCharBlockBounds
.get(i
);
663 Short type
= vCharBlockTypes
.get(i
);
664 int iPos
= oObj
.previousCharBlock(UnicodeString
,
665 bounds
.endPos
+ 1, locale
, type
.shortValue());
666 if (iPos
!= bounds
.startPos
) {
668 log
.println("previousCharBlock(UnicodeString, "
669 + (bounds
.endPos
+ 1) + ", locale, " + type
670 + ") should return " + bounds
.startPos
);
671 log
.println("... and actual value is " + iPos
);
675 tRes
.tested("previousCharBlock()", bRes
);