Avoid potential negative array index access to cached text.
[LibreOffice.git] / qadevOOo / tests / java / ifc / i18n / _XBreakIterator.java
blobd33165fd78a236f874d8599996b0634855e8208c
1 /*
2 * This file is part of the LibreOffice project.
4 * This Source Code Form is subject to the terms of the Mozilla Public
5 * License, v. 2.0. If a copy of the MPL was not distributed with this
6 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 * This file incorporates work covered by the following license notice:
10 * Licensed to the Apache Software Foundation (ASF) under one or more
11 * contributor license agreements. See the NOTICE file distributed
12 * with this work for additional information regarding copyright
13 * ownership. The ASF licenses this file to you under the Apache
14 * License, Version 2.0 (the "License"); you may not use this file
15 * except in compliance with the License. You may obtain a copy of
16 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
19 package ifc.i18n;
21 import java.util.ArrayList;
23 import lib.MultiMethodTest;
24 import lib.Status;
25 import lib.StatusException;
27 import com.sun.star.i18n.Boundary;
28 import com.sun.star.i18n.LineBreakHyphenationOptions;
29 import com.sun.star.i18n.LineBreakResults;
30 import com.sun.star.i18n.LineBreakUserOptions;
31 import com.sun.star.i18n.ScriptType;
32 import com.sun.star.i18n.WordType;
33 import com.sun.star.i18n.XBreakIterator;
34 import com.sun.star.lang.Locale;
36 /**
37 * Testing <code>com.sun.star.i18n.XBreakIterator</code>
38 * interface methods :
39 * <ul>
40 * <li><code> nextCharacters()</code></li>
41 * <li><code> previousCharacters()</code></li>
42 * <li><code> nextWord()</code></li>
43 * <li><code> previousWord()</code></li>
44 * <li><code> getWordBoundary()</code></li>
45 * <li><code> getWordType()</code></li>
46 * <li><code> isBeginWord()</code></li>
47 * <li><code> isEndWord()</code></li>
48 * <li><code> beginOfSentence()</code></li>
49 * <li><code> endOfSentence()</code></li>
50 * <li><code> getLineBreak()</code></li>
51 * <li><code> beginOfScript()</code></li>
52 * <li><code> endOfScript()</code></li>
53 * <li><code> nextScript()</code></li>
54 * <li><code> previousScript()</code></li>
55 * <li><code> getScriptType()</code></li>
56 * <li><code> beginOfCharBlock()</code></li>
57 * <li><code> endOfCharBlock()</code></li>
58 * <li><code> nextCharBlock()</code></li>
59 * <li><code> previousCharBlock()</code></li>
60 * </ul> <p>
61 * This test needs the following object relations :
62 * <ul>
63 * <li> <code>'Locale'</code>
64 * (of type <code>com.sun.star.lang.Locale</code>):
65 * this locale is used as locale argument for tested methods.
66 * </li>
67 * <li> <code>'UnicodeString'</code>
68 * (of type <code>String</code>): Unicode string which is passed
69 * to methods except 'CharacterBlock' methods.
70 * </li>
71 * <ul> <p>
72 * @see com.sun.star.i18n.XBreakIterator
74 public class _XBreakIterator extends MultiMethodTest {
76 public XBreakIterator oObj = null;
78 Locale locale = null;
79 String UnicodeString = null;
81 short wordType = WordType.ANYWORD_IGNOREWHITESPACES;
83 /**
84 * Retrieves object relations.
85 * @throws StatusException If one of relations not found.
87 @Override
88 protected void before() {
89 locale = (Locale)tEnv.getObjRelation("Locale");
90 if (locale == null) {
91 throw new StatusException
92 (Status.failed("Relation 'Locale' not found")) ;
95 UnicodeString = (String)tEnv.getObjRelation("UnicodeString");
96 if (UnicodeString == null) {
97 throw new StatusException(Status.failed
98 ("Relation 'UnicodeString' not found")) ;
103 * Compares returned next character positions with expected values. <p>
105 * Has <b>OK</b> status if position after travel and traveled length
106 * has expected values.
108 public void _nextCharacters() {
109 short nCharacterIteratorMode =
110 com.sun.star.i18n.CharacterIteratorMode.SKIPCHARACTER;
112 int strLength = UnicodeString.length();
114 //Start from position : Travel ... chars :
115 // Actual position after : How many chars traveled
116 int[][] nextCharacters = {
117 { 1, 5000, strLength , strLength - 1 },
118 { 10, 6, 16, 6}};
120 boolean bRes = true;
122 for(int i = 0; i < nextCharacters.length; i++) {
123 int[] lDone = new int[1];
124 long lRes = oObj.nextCharacters(UnicodeString, nextCharacters[i][0],
125 locale, nCharacterIteratorMode, nextCharacters[i][1], lDone);
126 log.println("Expected result is: lRes = " + nextCharacters[i][2] +
127 "; lDone = " + nextCharacters[i][3] );
128 log.println("Actual result is: lRes = " + lRes +
129 "; lDone = " + lDone[0] );
131 bRes = bRes && lRes == nextCharacters[i][2];
132 bRes = bRes && lDone[0] == nextCharacters[i][3];
135 tRes.tested("nextCharacters()", bRes);
139 * Compares returned previous character positions with expected values. <p>
141 * Has <b>OK</b> status if position after travel and traveled length
142 * has expected values.
144 public void _previousCharacters() {
145 short nCharacterIteratorMode =
146 com.sun.star.i18n.CharacterIteratorMode.SKIPCHARACTER;
149 //Start from position : Travel ... chars : Actual position after :
150 //How many chars traveled
151 int[][] previousCharacters = {
152 {5, 5000, 0, 5},
153 {10, 6, 4, 6}};
155 boolean bRes = true;
156 for(int i = 0; i < previousCharacters.length; i++) {
157 int[] lDone = new int[1];
158 int lRes = oObj.previousCharacters(UnicodeString,
159 previousCharacters[i][0],
160 locale, nCharacterIteratorMode,
161 previousCharacters[i][1], lDone);
162 log.println("Expected result is: lRes = " + previousCharacters[i][2]
163 + "; lDone = " + previousCharacters[i][3] );
164 log.println("Actual result is: lRes = " + lRes
165 + "; lDone = " + lDone[0]);
167 bRes = bRes && lRes == previousCharacters[i][2];
168 bRes = bRes && lDone[0] == previousCharacters[i][3];
171 tRes.tested("previousCharacters()", bRes);
174 ArrayList<Boundary> vBounds = new ArrayList<Boundary>();
177 * Saves bounds of all returned words for the future tests. <p>
178 * Has <b>OK</b> status.
180 public void _nextWord() {
181 int i = 0;
183 while( i < UnicodeString.length() - 1 ) {
184 Boundary bounds = oObj.nextWord
185 (UnicodeString, i, locale, wordType);
186 if (bounds.endPos - bounds.startPos > 3) {
187 vBounds.add( bounds );
188 log.println("Word " + vBounds.size() + "("
189 + bounds.startPos + "," + bounds.endPos + "): '" +
190 UnicodeString.substring(bounds.startPos,
191 bounds.endPos) + "'");
193 i = bounds.endPos - 1;
195 log.println("In text there are " + vBounds.size()
196 + " words, if count from left to right");
197 tRes.tested("nextWord()", true);
201 * Compares number of word bounds with number of word bounds saved
202 * by the method _nextWord().<p>
203 * Has <b>OK</b> status if number of word bounds are equal.
205 public void _previousWord() {
206 requiredMethod("nextWord()");
208 int i = UnicodeString.length() - 1;
209 ArrayList<Boundary> vPrevBounds = new ArrayList<Boundary>();
210 while( i > 0 ) {
211 Boundary bounds =
212 oObj.previousWord(UnicodeString, i, locale, wordType);
213 if (bounds.endPos - bounds.startPos > 3) {
214 vPrevBounds.add( bounds );
215 log.println("Word " + vPrevBounds.size() + "("
216 + bounds.startPos + "," + bounds.endPos + "): '"
217 + UnicodeString.substring(bounds.startPos, bounds.endPos)
218 + "'");
220 i = bounds.startPos;
222 log.println("In text there are " + vPrevBounds.size()
223 + " words, if count from right to left");
224 tRes.tested("previousWord()", vPrevBounds.size() == vBounds.size() );
228 * For every word in array obtained by <code>nextWord</code> method test
229 * computes bounds of the word, passing its internal character position.<p>
231 * Has <b>OK</b> status if bounds calculated by <code>getWordBoundary()</code>
232 * method are the same as bounds obtained by <code>nextWord</code> method.
234 public void _getWordBoundary() {
235 requiredMethod("nextWord()");
237 boolean bRes = true;
239 for(int i = 0; i < vBounds.size(); i++) {
240 // calculate middle of the word
241 Boundary iBounds = vBounds.get(i);
242 int iPos = (iBounds.endPos - iBounds.startPos) / 2
243 + iBounds.startPos;
244 Boundary bounds = oObj.getWordBoundary(UnicodeString, iPos,
245 locale, wordType, true);
246 log.println("Expected result is: startPos = " + iBounds.startPos +
247 "; endPos = " + iBounds.endPos);
248 log.println("Actual result is: startPos = " + bounds.startPos
249 + "; endPos = " + bounds.endPos + " Word is: '"
250 + UnicodeString.substring(bounds.startPos, bounds.endPos) + "'");
252 bRes = bRes && iBounds.startPos == bounds.startPos;
253 bRes = bRes && iBounds.endPos == bounds.endPos;
256 tRes.tested("getWordBoundary()", bRes);
260 * For every word in array obtained by <code>nextWord</code> method test
261 * get its type, passing its internal character position.<p>
263 * Has <b>OK</b> status if every word has type <code>WordType.ANY_WORD</code>
265 public void _getWordType() {
266 requiredMethod("nextWord()");
268 boolean bRes = true;
270 for(int i = 0; i < vBounds.size(); i++) {
271 // calculate middle of the word
272 Boundary iBounds = vBounds.get(i);
273 int iPos = (iBounds.endPos - iBounds.startPos) / 2
274 + iBounds.startPos;
276 short type = oObj.getWordType(UnicodeString, iPos, locale);
278 bRes = bRes && type == WordType.ANY_WORD;
281 tRes.tested("getWordType()", bRes);
285 * For every word in array obtained by <code>nextWord</code> method test
286 * tries to determine if the character at a position starts a word.
287 * First word starting position is passed, then internal character
288 * position is passed. <p>
289 * Has <b>OK</b> status if in the first case <code>true</code>
290 * returned and in the second - <code>false</code> for every word.
292 public void _isBeginWord() {
293 requiredMethod("nextWord()");
295 boolean bRes = true;
297 for(int i = 0; i < vBounds.size(); i++) {
298 Boundary iBounds = vBounds.get(i);
299 boolean isBegin = oObj.isBeginWord(UnicodeString, iBounds.startPos,
300 locale, WordType.ANY_WORD);
301 bRes = bRes && isBegin;
302 boolean isNotBegin = !oObj.isBeginWord(UnicodeString,
303 iBounds.startPos + 1, locale, WordType.ANY_WORD);
304 bRes = bRes && isNotBegin;
306 log.println("At position + " + iBounds.startPos
307 + " isBeginWord? " + isBegin);
308 log.println("At position + " + (iBounds.startPos + 1)
309 + " isBeginWord? " + !isNotBegin);
312 tRes.tested("isBeginWord()", bRes);
316 * For every word in array obtained by <code>nextWord</code> method test
317 * tries to determine if the character at a position ends a word.
318 * First word ending position is passed, then internal character
319 * position is passed. <p>
321 * Has <b>OK</b> status if in the first case <code>true</code>
322 * returned and in the second - <code>false</code> for every word.
324 public void _isEndWord() {
325 requiredMethod("nextWord()");
327 boolean bRes = true;
329 for(int i = 0; i < vBounds.size(); i++) {
330 Boundary iBounds = vBounds.get(i);
331 boolean isEnd = oObj.isEndWord(UnicodeString, iBounds.endPos,
332 locale, WordType.ANY_WORD);
333 bRes = bRes && isEnd;
334 boolean isNotEnd = !oObj.isEndWord(UnicodeString,
335 iBounds.endPos - 1, locale, WordType.ANY_WORD);
336 bRes = bRes && isNotEnd;
338 log.println("At position + " + iBounds.endPos
339 + " isEndWord? " + isEnd);
340 log.println("At position + " + (iBounds.endPos - 1)
341 + " isEndWord? " + !isNotEnd);
344 tRes.tested("isEndWord()", bRes);
347 ArrayList<Integer> vSentenceStart = new ArrayList<Integer>();
349 * Tries to find all sentences starting positions passing every character
350 * as position parameter and stores them. Then tries to pass invalid
351 * position parameters.
353 * Has <b>OK</b> status if -1 is returned for wrong position arguments.
355 public void _beginOfSentence() {
356 int iPos = 0;
357 while( iPos < UnicodeString.length() ) {
358 Integer start = Integer.valueOf( oObj.beginOfSentence(UnicodeString,
359 iPos, locale) );
360 if (start.intValue() >= 0 && !vSentenceStart.contains(start) ) {
361 vSentenceStart.add( start );
362 log.println("Sentence " + vSentenceStart.size()
363 + " : start from position " + start);
365 iPos++;
368 //test for invalid nStartPosition
369 boolean bRes = oObj.beginOfSentence(UnicodeString, -10, locale) == -1;
370 bRes &= oObj.beginOfSentence(UnicodeString,
371 UnicodeString.length() + 1, locale) == -1;
373 if (!bRes) {
374 log.println("When invalid position, returned value isn't equal to -1");
377 tRes.tested("beginOfSentence()", bRes);
381 * For every sentence starting position found in
382 * <code>beginOfSentence()</code> test tries to compute end
383 * position of a sentence and checks that the end position is
384 * greater than starting.
385 * Then wrong position arguments are passed.
387 * Has <b>OK</b> status if the end position of every sentence
388 * greater than starting and -1 returned for invalid arguments.
390 public void _endOfSentence() {
391 boolean bRes = true;
392 for(int i = 0; i < vSentenceStart.size(); i++) {
393 int start = vSentenceStart.get(i).intValue();
394 int end = oObj.endOfSentence(UnicodeString, start, locale);
395 bRes &= end > start;
396 log.println("Sentence " + i + " range is [" + start + ", "
397 + end + "]");
400 //test for invalid nStartPosition
401 boolean bInvRes = oObj.endOfSentence(UnicodeString, -10, locale) == -1;
402 bInvRes &= oObj.endOfSentence(UnicodeString,
403 UnicodeString.length() + 1, locale) == -1;
405 if (!bInvRes) {
406 log.println("When invalid position, returned value isn't equal to -1");
409 tRes.tested("endOfSentence()", bRes && bInvRes);
413 * Tries to break a string in position other than 0 iterating characters
414 * from the string beginning (Hyphenation is not used for a while). <p>
416 * Has <b>OK</b> status if non-zero break position was found and it is
417 * less or equal than position we trying to break.
419 public void _getLineBreak() {
420 boolean bRes = true;
421 LineBreakResults lineBreakResults;
422 LineBreakHyphenationOptions lineBreakHyphenationOptions =
423 new LineBreakHyphenationOptions();
424 LineBreakUserOptions lineBreakUserOptions = new LineBreakUserOptions();
426 lineBreakUserOptions.applyForbiddenRules = false;
427 lineBreakUserOptions.allowHyphenateEnglish = false;
429 int breakPos = 0;
430 int pos = 0;
432 while(breakPos == 0 && pos < UnicodeString.length() ) {
433 lineBreakResults = oObj.getLineBreak(UnicodeString, pos,
434 locale, 0, lineBreakHyphenationOptions, lineBreakUserOptions);
435 breakPos = lineBreakResults.breakIndex;
436 pos++;
439 // finally the position of break must be found in the middle and
440 // it must be before the break position specified
441 bRes = breakPos <= pos && breakPos > 0;
443 if (!bRes) {
444 log.println("The last position was: " + pos
445 + ", and the break position was: " + breakPos);
448 tRes.tested("getLineBreak()", bRes);
451 // Asian type script
452 private static String katakana = new String(new char[] {0x30A1, 0x30A2}) ;
453 // Weak type script
454 private static String arrows = new String(new char[] {0x2190, 0x2191}) ;
455 // Complex type script
456 private static String arabic = new String(new char[] {0x0641, 0x0642}) ;
459 * Tries to find the beginning of the nearest script specified
460 * relatively to position passed. <p>
461 * Has <b>OK</b> status if the starting position of script is returned.
463 public void _beginOfScript() {
464 String multiScript = "ab" + katakana ;
466 int pos = oObj.beginOfScript(multiScript, 3, ScriptType.ASIAN) ;
468 log.println("Position = " + pos) ;
470 tRes.tested("beginOfScript()", pos == 2) ;
474 * Tries to find the end of the nearest script specified
475 * relatively to position passed. <p>
476 * Has <b>OK</b> status if the end position of script is returned.
478 public void _endOfScript() {
479 String multiScript = "ab" + katakana + "cd" ;
481 int pos = oObj.endOfScript(multiScript, 2, ScriptType.ASIAN) ;
483 log.println("Position = " + pos) ;
485 tRes.tested("endOfScript()", pos == 4) ;
489 * Tries to find the next script starting position specified
490 * relatively to position passed. <p>
491 * Has <b>OK</b> status if the appropriate position is returned.
493 public void _nextScript() {
494 String multiScript = "ab" + katakana + "cd" ;
496 int pos = oObj.nextScript(multiScript, 0, ScriptType.LATIN) ;
498 log.println("Position = " + pos) ;
500 tRes.tested("nextScript()", pos == 4) ;
504 * Tries to find the previous script starting position specified
505 * relatively to position passed. <p>
506 * Has <b>OK</b> status if the appropriate position is returned.
508 public void _previousScript() {
509 String multiScript = "ab" + katakana + "cd" ;
511 int pos = oObj.previousScript(multiScript, 5, ScriptType.ASIAN) ;
513 log.println("Position = " + pos) ;
515 tRes.tested("previousScript()", pos == 2) ;
519 * Tries to determine script type (of all four types). <p>
520 * Has <b>OK</b> status if <code>LATIN</code> type returned
521 * for ASCII character, <code>ASIAN</code> for Katakana Unicode
522 * codepoints, <code>COMPLEX</code> for Arabic Unicode
523 * codepoints and <code>WEAK</code> for codepoints from Arrows
524 * Unicode block.
526 public void _getScriptType() {
527 boolean res = true ;
529 res &= oObj.getScriptType("abcd", 0) == ScriptType.LATIN ;
530 res &= oObj.getScriptType(katakana, 0) == ScriptType.ASIAN;
531 res &= oObj.getScriptType(arabic, 0) == ScriptType.COMPLEX ;
532 res &= oObj.getScriptType(arrows, 0) == ScriptType.WEAK ;
534 tRes.tested("getScriptType()", res) ;
537 boolean bCharBlockRes = true;
539 protected short getCharBlockType(int pos) {
540 short i = 1;
541 short cType = 0;
542 while (i < 31) {
543 if (oObj.beginOfCharBlock(UnicodeString, pos, locale, i) != -1) {
544 cType = i;
545 i = 100;
547 i++;
550 return cType;
553 ArrayList<Boundary> vCharBlockBounds = new ArrayList<Boundary>();
554 ArrayList<Short> vCharBlockTypes = new ArrayList<Short>();
557 * Creates array of all char blocks with their boundaries and
558 * types using <code>beginOfCharBlock()</code> and
559 * <code>endOfCharBlock()</code> methods. <p>
561 * Has <b>OK</b> status if the end of each boundary is the same
562 * as start of the next one and if the start of the first block
563 * has position 0 and the end of the last block is at the end
564 * of the whole string.
566 public void _beginOfCharBlock() {
567 int iPos = 0;
569 while( iPos < UnicodeString.length() && iPos > -1) {
570 short charType = getCharBlockType(iPos);
571 int startPos = oObj.beginOfCharBlock(UnicodeString, iPos,
572 locale, charType);
573 int endPos = oObj.endOfCharBlock(UnicodeString, iPos,
574 locale, charType);
575 iPos = endPos;
576 vCharBlockBounds.add(new Boundary(startPos, endPos));
577 log.println(vCharBlockBounds.size() + "). Bounds: ["
578 + startPos + "," + endPos + "]; Type = " + charType);
579 vCharBlockTypes.add(Short.valueOf(charType));
582 for(int i = 0; i < vCharBlockBounds.size() - 1; i++) {
583 int endPos = vCharBlockBounds.get(i).endPos;
584 int startPos = vCharBlockBounds.get(i + 1).startPos;
585 if (endPos != startPos) {
586 bCharBlockRes = false;
590 log.println("Testing for no intersections : " + bCharBlockRes);
591 int startPos = vCharBlockBounds.get(0).startPos;
592 if (startPos != 0) {
593 bCharBlockRes = false;
595 int endPos = vCharBlockBounds.get
596 (vCharBlockBounds.size() - 1).endPos;
597 if (endPos != UnicodeString.length()) {
598 bCharBlockRes = false;
600 log.println("Regions should starts with 0 and ends with "
601 + UnicodeString.length());
603 tRes.tested("beginOfCharBlock()", bCharBlockRes);
607 * Testing of this method is performed in <code>beginOfCharBlock()</code>
608 * method test. <p>
610 * Has the status same as <code>beginOfCharBlock()</code> method status.
612 public void _endOfCharBlock() {
613 requiredMethod("beginOfCharBlock()");
614 tRes.tested("endOfCharBlock()", bCharBlockRes);
618 * For every character block obtained in <code>beginOfCharBlock()</code>
619 * method test (except the first) tries to find its starting position
620 * by mean of <code>nextCharBlock()</code> method passing as position
621 * argument the position before the start of a block. <p>
623 * Has <b>OK</b> status if the start of every block was found and it's
624 * equal to this block boundary start.
626 public void _nextCharBlock() {
627 requiredMethod("beginOfCharBlock()");
629 boolean bRes = true;
630 for(int i = 0; i < vCharBlockBounds.size(); i++) {
631 Boundary bounds = vCharBlockBounds.get(i);
632 Short type = vCharBlockTypes.get(i);
633 if (bounds.startPos - 1 < 0) continue;
634 int iPos = oObj.nextCharBlock(UnicodeString, bounds.startPos - 1,
635 locale, type.shortValue());
636 if (iPos != bounds.startPos) {
637 bRes = false;
638 log.println("nextCharBlock(UnicodeString, "
639 + (bounds.startPos - 1) + ", locale, " + type
640 + ") should return " + bounds.startPos);
641 log.println("... and actual value is " + iPos);
645 tRes.tested("nextCharBlock()", bRes);
649 * For every character block obtained in <code>beginOfCharBlock()</code>
650 * method test (except the first) tries to find its starting position
651 * by mean of <code>previousCharBlock()</code> method passing as position
652 * argument the position after the end of a block. <p>
654 * Has <b>OK</b> status if the start of every block was found and it's
655 * equal to this block boundary start.
657 public void _previousCharBlock() {
658 requiredMethod("beginOfCharBlock()");
660 boolean bRes = true;
661 for(int i = 0; i < vCharBlockBounds.size(); i++) {
662 Boundary bounds = vCharBlockBounds.get(i);
663 Short type = vCharBlockTypes.get(i);
664 int iPos = oObj.previousCharBlock(UnicodeString,
665 bounds.endPos + 1, locale, type.shortValue());
666 if (iPos != bounds.startPos) {
667 bRes = false;
668 log.println("previousCharBlock(UnicodeString, "
669 + (bounds.endPos + 1) + ", locale, " + type
670 + ") should return " + bounds.startPos);
671 log.println("... and actual value is " + iPos);
675 tRes.tested("previousCharBlock()", bRes);