1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2000, 2010 Oracle and/or its affiliates.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * This file is part of OpenOffice.org.
11 * OpenOffice.org is free software: you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser General Public License version 3
13 * only, as published by the Free Software Foundation.
15 * OpenOffice.org is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License version 3 for more details
19 * (a copy is included in the LICENSE file that accompanied this code).
21 * You should have received a copy of the GNU Lesser General Public License
22 * version 3 along with OpenOffice.org. If not, see
23 * <http://www.openoffice.org/license.html>
24 * for a copy of the LGPLv3 License.
26 ************************************************************************/
27 #ifndef __com_sun_star_i18n_XBreakIterator_idl__
28 #define __com_sun_star_i18n_XBreakIterator_idl__
30 #ifndef __com_sun_star_lang_Locale_idl__
31 #include
<com
/sun
/star
/lang
/Locale.idl
>
34 #ifndef __com_sun_star_i18n_LineBreakUserOptions_idl__
35 #include
<com
/sun
/star
/i18n
/LineBreakUserOptions.idl
>
38 #ifndef __com_sun_star_i18n_LineBreakHyphenationOptions_idl__
39 #include
<com
/sun
/star
/i18n
/LineBreakHyphenationOptions.idl
>
42 #ifndef __com_sun_star_i18n_LineBreakResults_idl__
43 #include
<com
/sun
/star
/i18n
/LineBreakResults.idl
>
46 #ifndef __com_sun_star_i18n_Boundary_idl__
47 #include
<com
/sun
/star
/i18n
/Boundary.idl
>
50 //============================================================================
52 module com
{ module sun
{ module star
{ module i18n
{
54 //============================================================================
57 contains the base routines for iteration in Unicode string. Iterates over
58 characters, words, sentences and line breaks.
60 <p> Assumption: StartPos is inclusive and EndPos is exclusive. </p>
63 published
interface XBreakIterator
: com
::sun
::star
::uno
::XInterface
65 //------------------------------------------------------------------------
66 /** Traverses specified number of characters/cells in Text from
67 <em>nStartPos</em> forwards.
68 <type>CharacterIteratorMode</type> can be cell based or
69 character based. A cell is made of more than one character.
72 Number of characters to traverse, it should not be less than 0.
73 If you want to traverse in the opposite direction use
74 <member>XBreakIterator::previousCharacters()</member> instead.
76 long nextCharacters
( [in] string aText
, [in] long nStartPos
,
77 [in] ::com
::sun
::star
::lang
::Locale aLocale
,
78 [in] short nCharacterIteratorMode
,
79 [in] long nCount
, [out] long nDone
);
81 //------------------------------------------------------------------------
82 /** Traverses specified number of characters/cells in Text from
83 <em>nStartPos</em> backwards.
84 <type>CharacterIteratorMode</type> can be cell based or
85 character based. A cell is made of more than one character.
88 Number of characters to traverse, it should not be less than 0.
89 If you want to traverse in the opposite direction use
90 <member>XBreakIterator::nextCharacters()</member> instead.
92 long previousCharacters
( [in] string aText
, [in] long nStartPos
,
93 [in] ::com
::sun
::star
::lang
::Locale aLocale
,
94 [in] short nCharacterIteratorMode
,
95 [in] long nCount
, [out] long nDone
);
97 //------------------------------------------------------------------------
98 /** Traverses one word in Text from <em>nStartPos</em> forwards.
101 One of <type>WordType</type>, specifies the type of
105 The <type>Boundary</type> of the found word. Normally used for
108 Boundary nextWord
( [in] string aText
, [in] long nStartPos
,
109 [in] ::com
::sun
::star
::lang
::Locale aLocale
,
110 [in] short nWordType
);
112 //------------------------------------------------------------------------
113 /** Traverses one word in Text from <em>nStartPos</em> backwards.
116 The locale of the character preceding <em>nStartPos</em>.
118 <p> If the previous character is a space character and
119 <em>nWordType</em> indicates spaces should be skipped, and
120 if the first non-space character is an Asian character,
121 then, since Asian word break needs language specific
122 wordbreak dictionaries, the method will return -1 in
123 <member>Boundary::endPos</member> and the position after the
124 Asian character (i.e. the space character) in
125 <member>Boundary::startPos</member>. The caller then has to
126 call this method again with a correct <em>aLocale</em>
127 referring to the Asian character, which is then the previous
128 character of the space character where <em>nStartPos</em>
131 <p> <b>Note</b> that the OpenOffice.org 1.0 / StarOffice 6.0
132 / StarSuite 6.0 i18n framework doesn't behave like this and
133 mixed Western/CJK text may lead to wrong word iteration.
134 This is fixed in later versions. </p>
137 One of <type>WordType</type>, specifies the type of
141 The <type>Boundary</type> of the found word. Normally used for
144 Boundary previousWord
( [in] string aText
, [in] long nStartPos
,
145 [in] ::com
::sun
::star
::lang
::Locale aLocale
,
146 [in] short nWordType
);
148 //------------------------------------------------------------------------
149 /** Identifies StartPos and EndPos of current word.
151 <p> If <em>nPos</em> is the boundary of a word, it is StartPos
152 of one word and EndPos of previous word. In this situation, the
153 outcome of the algorithm can be indeterminate. In this situation
154 the <em>bPreferForward</em> flag is used. If bPreferForward ==
155 <FALSE/>, <em>nPos</em> is considered to be the end of the word
156 and we look backwards for beginning of word, otherwise
157 <em>nPos</em> is considered to be the start of the next word and
158 we look forwards for the end of the word. </p>
161 One of <type>WordType</type>.
164 The Boundary of the current word.
166 Boundary getWordBoundary
( [in] string aText
, [in] long nPos
,
167 [in] ::com
::sun
::star
::lang
::Locale aLocale
,
168 [in] short nWordType
,
169 [in] boolean bPreferForward
);
171 //------------------------------------------------------------------------
173 Get the <type>WordType</type> of the word that starts at
174 position <em>nPos</em>.
176 <p> This method is mis-defined, since <type>WordType</type>
177 is not an attribute of a word, but a way to break words,
178 like excluding or including tail spaces for spellchecker
179 or cursor traveling. It returns 0 always.
182 short getWordType
( [in] string aText
, [in] long nPos
,
183 [in] ::com
::sun
::star
::lang
::Locale aLocale
);
185 //------------------------------------------------------------------------
186 /** If a word starts at position <em>nPos</em>.
188 <p> It is possible that both of this method
189 and following method <em>isEndWord</em> all return
190 <TRUE/>, since StartPos of a word is inclusive
191 while EndPos of a word is exclusive.
195 boolean isBeginWord
( [in] string aText
, [in] long nPos
,
196 [in] ::com
::sun
::star
::lang
::Locale aLocale
,
197 [in] short nWordType
);
199 //------------------------------------------------------------------------
200 /** If a word ends at position <em>nPos</em>.
202 boolean isEndWord
( [in] string aText
, [in] long nPos
,
203 [in] ::com
::sun
::star
::lang
::Locale aLocale
,
204 [in] short nWordType
);
206 //------------------------------------------------------------------------
207 /** Traverses in Text from <em>nStartPos</em> to the start of a
211 The position where the sentence starts.
213 long beginOfSentence
( [in] string aText
, [in] long nStartPos
,
214 [in] ::com
::sun
::star
::lang
::Locale aLocale
);
216 //------------------------------------------------------------------------
217 /** Traverses in Text from <em>nStartPos</em> to the end of a
221 The position where the sentence ends.
223 long endOfSentence
( [in] string aText
, [in] long nStartPos
,
224 [in] ::com
::sun
::star
::lang
::Locale aLocale
);
226 //------------------------------------------------------------------------
227 /** Calculate the line break position in the Text from the specified
231 Defines a minimum break position for hyphenated line break.
232 When the position for hyphenated line break is less than
233 <em>nMinBreakPos</em>, break position in
234 <type>LineBreakResults</type> is set to -1.
237 Defines if the hyphenator is to be used.
240 Defines how to handle hanging punctuations and forbidden
241 characters at the start/end of a line.
244 The <type>LineBreakResults</type> contain the break
245 position of the line, <type>BreakType</type> and
246 <type scope="com::sun::star::linguistic2">XHyphenatedWord</type>
248 LineBreakResults getLineBreak
( [in] string aText
, [in] long nStartPos
,
249 [in] ::com
::sun
::star
::lang
::Locale aLocale
,
250 [in] long nMinBreakPos
,
251 [in] LineBreakHyphenationOptions aHyphOptions
,
252 [in] LineBreakUserOptions aUserOptions
);
254 //------------------------------------------------------------------------
255 /** Traverses in Text from <em>nStartPos</em> to the beginning of
256 the specified script type.
259 One of <type>ScriptType</type>.
262 The position where the script type starts.
264 long beginOfScript
( [in] string aText
, [in] long nStartPos
,
265 [in] short nScriptType
);
267 //------------------------------------------------------------------------
268 /** Traverses in Text from <em>nStartPos</em> to the end of the
269 specified script type.
272 One of <type>ScriptType</type>.
275 The position where the script type ends.
277 long endOfScript
( [in] string aText
, [in] long nStartPos
,
278 [in] short nScriptType
);
280 //------------------------------------------------------------------------
281 /** Traverses in Text from <em>nStartPos</em> to the next start of
282 the specified script type.
285 One of <type>ScriptType</type>.
288 The position where the next script type starts.
290 long nextScript
( [in] string aText
, [in] long nStartPos
,
291 [in] short nScriptType
);
293 //------------------------------------------------------------------------
294 /** Traverses in Text from <em>nStartPos</em> to the previous start
295 of the specified script type.
298 One of <type>ScriptType</type>.
301 The position where the previous script type starts.
303 long previousScript
( [in] string aText
, [in] long nStartPos
,
304 [in] short nScriptType
);
306 //------------------------------------------------------------------------
307 /** Get the script type of the character at position <em>nPos</em>.
310 One of <type>ScriptType</type>.
312 short getScriptType
( [in] string aText
, [in] long nPos
);
314 //------------------------------------------------------------------------
315 /** Traverses in Text from <em>nStartPos</em> to the beginning of
316 the specified character type.
319 One of <type>CharType</type>
322 The position where the character type starts
324 long beginOfCharBlock
( [in] string aText
, [in] long nStartPos
,
325 [in] ::com
::sun
::star
::lang
::Locale aLocale
,
326 [in] short nCharType
);
328 //------------------------------------------------------------------------
329 /** Traverses in Text from <em>nStartPos</em> to the end of the
330 specified character type.
333 One of <type>CharType</type>
336 The position where the character type ends.
338 long endOfCharBlock
( [in] string aText
, [in] long nStartPos
,
339 [in] ::com
::sun
::star
::lang
::Locale aLocale
,
340 [in] short nCharType
);
342 //------------------------------------------------------------------------
343 /** Traverses in Text from <em>nStartPos</em> to the next start of
344 the specified character type.
347 One of <type>CharType</type>
350 The position where the next character type starts.
352 long nextCharBlock
( [in] string aText
, [in] long nStartPos
,
353 [in] ::com
::sun
::star
::lang
::Locale aLocale
,
354 [in] short nCharType
);
356 //------------------------------------------------------------------------
357 /** Traverses in Text from <em>nStartPos</em> to the previous start
358 of the specified character type.
361 One of <type>CharType</type>
364 The position where the previous character type starts.
366 long previousCharBlock
( [in] string aText
, [in] long nStartPos
,
367 [in] ::com
::sun
::star
::lang
::Locale aLocale
,
368 [in] short nCharType
);
371 //============================================================================