Update ooo320-m1
[ooovba.git] / transex3 / source / wtratree.cxx
blob05e3f2e69a46d1c325df9006134a7a87daad27bc
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: wtratree.cxx,v $
10 * $Revision: 1.5 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_transex3.hxx"
35 #include "wtratree.hxx"
39 /** @ATTENTION
40 For reasons of speed, class WordTransTree works with two simple
41 char arrays, sOutput and sInput, instead of secure containers or
42 streams. So be extremely careful, when changing this code!!!
43 **/
47 // NOT FULLY DECLARED SERVICES
48 #include <string.h>
49 #include <stdio.h>
50 #include <ctype.h>
51 #include "wtranode.hxx"
54 const BRANCH_T BR_END = 0;
55 const BRANCH_T BR_NONALPHA = 1;
56 const BRANCH_T BR_HOTKEY = 2;
57 const BRANCH_T BR_BACKSLASH = 3;
58 const BRANCH_T BR_ALPHABASE = 4; /// @ATTENTION All branches not valid for words must be smaller than this value!
59 const BRANCH_T BR_AE = 30;
60 const BRANCH_T BR_OE = 31;
61 const BRANCH_T BR_UE = 32;
62 const BRANCH_T BR_SZ = 33;
63 const BRANCH_T BR_MAX = 34; /// @ATTENTION Must be updated always!
65 const BRANCH_T BR_START = 0;
71 WordTransTree::WordTransTree(CharSet i_nWorkingCharSet)
72 : sInput(0),
73 nInputLength(0),
74 pInputEnd(0),
75 sOutput(0),
76 nOutputMaxLength(0),
77 dpParsingTreeTop(0),
78 pUnknownAlpha(0),
79 // cChar2Branch
80 c_AE(u_char('\xC4')), c_OE(u_char('\xD6')), c_UE(u_char('\xDC')),
81 c_ae(u_char('\xE4')), c_oe(u_char('\xF6')), c_ue(u_char('\xFC')),
82 pInputCurTokenStart(0),
83 pInputPosition(0),
84 pOutputPosition(0),
85 pCurParseNode(0),
86 eCurResult(OK),
87 cCurHotkey(0),
88 cCurHotkeySign(u_char('~'))
90 // Initialize parsing tree:
91 pUnknownAlpha = new WTT_Node(BR_ALPHABASE,0,0); // This will be deleted as part of the parsing tree.
92 for ( UINT8 i = BR_ALPHABASE; i < C_NR_OF_BRANCHES; i++)
94 pUnknownAlpha->SetBranch(i,pUnknownAlpha);
95 } // end for
97 dpParsingTreeTop = new WTT_Node(BR_START,0,pUnknownAlpha);
99 WTT_Node * dpNonAlpha = new WTT_Node(BR_NONALPHA,0,0);
101 dpNonAlpha->SetBranch(BR_NONALPHA,dpNonAlpha);
102 dpParsingTreeTop->SetBranch(BR_NONALPHA,dpNonAlpha);
104 WTT_Node * dpBackslash = new WTT_Node(BR_BACKSLASH,dpNonAlpha,dpNonAlpha);
105 dpBackslash->SetBranch(BR_END,0);
107 dpParsingTreeTop->SetBranch(BR_BACKSLASH,dpBackslash);
108 dpNonAlpha->SetBranch(BR_BACKSLASH,dpBackslash);
111 // Initialize character set:
112 SetCharSet(i_nWorkingCharSet);
114 if (C_BR_ALPHABASE != BR_ALPHABASE || C_NR_OF_BRANCHES != BR_MAX)
116 fprintf(stderr, "Assertion failed: file %s line %d.", __FILE__, __LINE__);
117 exit(1);
121 void
122 WordTransTree::SetCharSet(CharSet i_nWorkingCharSet)
124 ByteString sConvert("\xC4\xD6\xDC\xE4\xF6\xFC\xDF");
125 const u_char * pConvert = (const u_char * ) ( sConvert.Convert(RTL_TEXTENCODING_MS_1252, i_nWorkingCharSet).GetBuffer() );
127 INT16 i = 0;
128 for ( ; i < C_NR_OF_POSSIBLE_CHARS; ++i )
130 cChar2Branch[i] = BR_NONALPHA;
131 } // end for
132 for ( i = 'a'; i <= 'z'; ++i )
134 cChar2Branch[i] = BR_ALPHABASE + i - 'a';
135 } // end for
136 for ( i = 'A'; i <= 'Z'; ++i )
138 cChar2Branch[i] = BR_ALPHABASE + i - 'A';
139 } // end for
140 cChar2Branch[pConvert[0]] = BR_AE;
141 cChar2Branch[pConvert[1]] = BR_OE;
142 cChar2Branch[pConvert[2]] = BR_UE;
143 cChar2Branch[pConvert[3]] = BR_AE;
144 cChar2Branch[pConvert[4]] = BR_OE;
145 cChar2Branch[pConvert[5]] = BR_UE;
146 cChar2Branch[pConvert[6]] = BR_SZ;
148 cChar2Branch[u_char('~')] = BR_HOTKEY;
149 cChar2Branch[u_char('&')] = BR_HOTKEY;
152 c_AE = pConvert[0];
153 c_OE = pConvert[1];
154 c_UE = pConvert[2];
155 c_ae = pConvert[3];
156 c_oe = pConvert[4];
157 c_ue = pConvert[5];
160 WordTransTree::~WordTransTree()
162 delete dpParsingTreeTop;
163 if (sOutput != 0)
164 delete [] sOutput;
167 void
168 WordTransTree::AddWordPair( const ByteString & i_sOldString,
169 const ByteString & i_sReplaceString )
171 if (i_sOldString.Len() == 0)
172 return;
174 pCurParseNode = dpParsingTreeTop;
175 WTT_Node * pBranch = 0;
176 char cBranch = 0;
178 for ( constr pOld = i_sOldString.GetBuffer();
179 *pOld != 0;
180 pOld++ )
182 cBranch = CalculateBranch(*pOld);
183 pBranch = pCurParseNode->GetNextNode(cBranch);
184 if (pBranch == 0 || pBranch == pUnknownAlpha)
186 pBranch = new WTT_Node(cBranch,0,pUnknownAlpha);
187 pCurParseNode->SetBranch(cBranch,pBranch);
189 pCurParseNode = pBranch;
190 } // end for
191 pCurParseNode->SetAsTokenToReplace(i_sReplaceString);
194 void
195 WordTransTree::InitTransformation( const char * i_sInput,
196 UINT32 i_nInputLength,
197 UINT32 i_nOutputMaxLength )
199 sInput = (const u_char *)i_sInput;
200 nInputLength = i_nInputLength;
201 pInputEnd = &sInput[i_nInputLength];
203 pInputCurTokenStart = sInput;
204 pInputPosition = sInput;
206 if (nOutputMaxLength < i_nOutputMaxLength)
208 if (sOutput != 0)
209 delete [] sOutput;
210 sOutput = new unsigned char[i_nOutputMaxLength];
211 nOutputMaxLength = i_nOutputMaxLength;
213 pOutputPosition = sOutput;
216 /** pInputCurTokenStart and CurParseNode are updated just when
217 starting this function. After its end they must not be changed
218 till this functon is called again.
219 Outside this function pInputPositon and pOutputPosition are both
220 on the first not transformed char in their respective array.
222 WordTransTree::E_Result
223 WordTransTree::TransformNextToken()
225 pInputCurTokenStart = pInputPosition;
226 pCurParseNode = dpParsingTreeTop;
227 cCurHotkey = 0;
228 eCurResult = OK;
230 WTT_Node * pBranch = 0;
231 UINT8 cBranch = 0;
233 for ( pCurParseNode = dpParsingTreeTop;
234 pInputPosition != pInputEnd;
235 ++pInputPosition )
237 cBranch = CalculateBranch(*pInputPosition);
238 pBranch = pCurParseNode->GetNextNode( cBranch );
239 if (pBranch != 0)
241 pCurParseNode = pBranch;
243 else
245 if (cBranch == BR_HOTKEY) // current letter is '~' or '&'.
247 // Logic of the following. There are 9 possible cases -
248 // A = alphabetic letter, NA = non alphabetic, TB = token begin,
249 // Eot = end of text:
250 // 1. A~A set hotkey to following letter, continue
251 // 2. A~NA token end
252 // 3. A~Eot token end
253 // 4. NA~A token end
254 // 5. NA~NA continue
255 // 6. A~Eof continue
256 // 7. TB~A set hotkey to following letter, continue
257 // 8. TB~NA continue
258 // 9. TB~Eot continue
260 // bNext and Prev are true, if there are alphabetic letters:
261 BOOL bNext = pInputPosition + 1 != pInputEnd
262 ? CalculateBranch(pInputPosition[1]) >= BR_ALPHABASE
263 : FALSE;
264 BOOL bPrev = pCurParseNode->Value() >= BR_ALPHABASE;
266 if ( bNext && (bPrev || pCurParseNode == dpParsingTreeTop) )
267 { // case 1. and 7.
268 Handle_Hotkey();
269 continue;
271 else if (!bPrev && !bNext)
272 { // case 5.,6.,8.,9.
273 continue;
276 // Case 2.,3.,4. :
277 // so this should be handled as an end of a token.
279 if (pCurParseNode->TokenType() == WTT_Node::token_to_keep)
281 Handle_TokenToKeep();
282 return eCurResult;
284 else
286 Handle_TokenToTransform();
287 return eCurResult;
288 } // endif (pCurParseNode->TokenType() == WTT_Node::token_to_keep)
289 } // endif (pBranch == 0) else
290 } // end for
292 // If here, the text end is reached
293 if (pCurParseNode->TokenType() == WTT_Node::token_to_keep)
295 Handle_TokenToKeep();
296 return eCurResult;
298 else
300 Handle_TokenToTransform();
301 return eCurResult;
305 ByteString
306 WordTransTree::CurReplacingString() const
308 return pCurParseNode->ReplaceString();
311 void
312 WordTransTree::Handle_Hotkey()
314 if (cCurHotkey == 0) // Avoid to replace the first found hotkey by
315 // a later one - though this shouldn't happen anyway.
317 cCurHotkey = (pInputPosition+1) != pInputEnd ? pInputPosition[1] : 0;
318 cCurHotkeySign = *pInputPosition;
322 void
323 WordTransTree::Handle_TokenToKeep()
325 UINT32 nTokenLength = pInputPosition-pInputCurTokenStart;
327 memcpy(pOutputPosition,pInputCurTokenStart,nTokenLength);
329 pOutputPosition += nTokenLength;
330 *pOutputPosition = '\0';
333 void
334 WordTransTree::Handle_TokenToTransform()
336 BOOL bHaveHotkey = CalculateBranch(cCurHotkey) >= BR_ALPHABASE;
337 const ByteString & rReplace = pCurParseNode->ReplaceString();
339 // Find position of hotkey in replace-string:
340 USHORT nHotkeyPos = bHaveHotkey
341 ? rReplace.Search(char(cCurHotkey))
342 : STRING_NOTFOUND;
343 if (nHotkeyPos == STRING_NOTFOUND && bHaveHotkey)
345 if (cCurHotkey < 128)
347 if (islower(cCurHotkey))
348 nHotkeyPos = rReplace.Search(toupper(char(cCurHotkey)));
349 else
350 nHotkeyPos = rReplace.Search(tolower(char(cCurHotkey)));
352 else // cCurHotkey >= 128
354 if (cCurHotkey == c_ae)
355 nHotkeyPos = rReplace.Search(char(c_AE));
356 else if (cCurHotkey == c_oe)
357 nHotkeyPos = rReplace.Search(char(c_OE));
358 else if (cCurHotkey == c_ue)
359 nHotkeyPos = rReplace.Search(char(c_UE));
360 else if (cCurHotkey == c_AE)
361 nHotkeyPos = rReplace.Search(char(c_ae));
362 else if (cCurHotkey == c_OE)
363 nHotkeyPos = rReplace.Search(char(c_oe));
364 else if (cCurHotkey == c_UE)
365 nHotkeyPos = rReplace.Search(char(c_ue));
366 } // endif (cCurHotkey < 128) else
368 if (nHotkeyPos == STRING_NOTFOUND)
370 eCurResult = HOTKEY_LOST;
371 bHaveHotkey = FALSE;
373 } // endif (nHotkeyPos == STRING_NOT_FOUND && bHaveHotkey)
376 UINT32 nOutputTokenLength = rReplace.Len() + (bHaveHotkey ? 1 : 0);
378 if (bHaveHotkey)
380 memcpy( pOutputPosition,
381 pCurParseNode->ReplaceString().GetBuffer(),
382 nHotkeyPos );
383 *(pOutputPosition + nHotkeyPos) = cCurHotkeySign;
384 memcpy( pOutputPosition + nHotkeyPos + 1,
385 pCurParseNode->ReplaceString().GetBuffer() + nHotkeyPos,
386 nOutputTokenLength - nHotkeyPos - 1);
388 else
390 memcpy( pOutputPosition,
391 pCurParseNode->ReplaceString().GetBuffer(),
392 nOutputTokenLength );
395 // Convert first letter into upper if necessary:
396 u_char cInStart = CalculateBranch(*pInputCurTokenStart) == BR_HOTKEY
397 ? pInputCurTokenStart[1]
398 : pInputCurTokenStart[0] ;
399 u_char * pOutStart = nHotkeyPos == 0
400 ? pOutputPosition + 1
401 : pOutputPosition ;
402 if (isupper(cInStart) || cInStart > 127)
403 { // Possibly cInStart is upper character:
404 if (isupper(cInStart) || cInStart == c_AE || cInStart == c_OE || cInStart == c_UE)
405 { // Surely cInStart is upper character:
406 u_char cOutStart = *pOutStart;
407 if (cOutStart < 128)
408 *pOutStart = toupper(cOutStart);
409 else if (cOutStart == c_ae)
410 *pOutStart = c_AE;
411 else if (cOutStart == c_oe)
412 *pOutStart = c_OE;
413 else if (cOutStart == c_ue)
414 *pOutStart = c_UE;
416 } // endif (isupper(cInStart) || cInStart > 127)
418 pOutputPosition += nOutputTokenLength;
419 *pOutputPosition = '\0';