bump product version to 5.0.4.1
[LibreOffice.git] / svl / source / numbers / zforfind.hxx
blob4afe81dfd9b4d2703de2b4fc00f8c9c5ec9a886a
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #ifndef INCLUDED_SVL_SOURCE_NUMBERS_ZFORFIND_HXX
21 #define INCLUDED_SVL_SOURCE_NUMBERS_ZFORFIND_HXX
23 #include <com/sun/star/uno/Sequence.hxx>
24 #include <rtl/ustring.hxx>
26 class Date;
27 class SvNumberformat;
28 class SvNumberFormatter;
30 #define SV_MAX_ANZ_INPUT_STRINGS 20 // max count of substrings in input scanner
32 class ImpSvNumberInputScan
34 public:
35 ImpSvNumberInputScan( SvNumberFormatter* pFormatter );
36 ~ImpSvNumberInputScan();
38 /*!*/ void ChangeIntl(); // MUST be called if language changes
40 /// set reference date for offset calculation
41 void ChangeNullDate( const sal_uInt16 nDay,
42 const sal_uInt16 nMonth,
43 const sal_uInt16 nYear );
45 /// convert input string to number
46 bool IsNumberFormat( const OUString& rString, /// input string
47 short& F_Type, /// format type (in + out)
48 double& fOutNumber, /// value determined (out)
49 const SvNumberformat* pFormat = NULL); /// optional a number format to which compare against
51 /// after IsNumberFormat: get decimal position
52 short GetDecPos() const { return nDecPos; }
53 /// after IsNumberFormat: get count of numeric substrings in input string
54 sal_uInt16 GetAnzNums() const { return nAnzNums; }
56 /// set threshold of two-digit year input
57 void SetYear2000( sal_uInt16 nVal ) { nYear2000 = nVal; }
58 /// get threshold of two-digit year input
59 sal_uInt16 GetYear2000() const { return nYear2000; }
61 /** Whether input can be forced to ISO 8601 format.
63 Depends on locale's date separator and a specific date format order.
65 @param eDateFormat
66 Evaluated only on first call during one scan process, subsequent
67 calls return state of nCanForceToIso8601!
69 @see nCanForceToIso8601
71 bool CanForceToIso8601( DateFormat eDateFormat );
73 void InvalidateDateAcceptancePatterns();
75 private:
76 SvNumberFormatter* pFormatter;
77 OUString* pUpperMonthText; //* Array of month names, uppercase
78 OUString* pUpperAbbrevMonthText; //* Array of month names, abbreviated, uppercase
79 OUString* pUpperGenitiveMonthText; //* Array of genitive month names, uppercase
80 OUString* pUpperGenitiveAbbrevMonthText; //* Array of genitive month names, abbreviated, uppercase
81 OUString* pUpperPartitiveMonthText; //* Array of partitive month names, uppercase
82 OUString* pUpperPartitiveAbbrevMonthText; //* Array of partitive month names, abbreviated, uppercase
83 OUString* pUpperDayText; //* Array of day of week names, uppercase
84 OUString* pUpperAbbrevDayText; //* Array of day of week names, abbreviated, uppercase
85 OUString aUpperCurrSymbol; //* Currency symbol, uppercase
86 bool bTextInitialized; //* Whether days and months are initialized
87 bool bScanGenitiveMonths; //* Whether to scan an input for genitive months
88 bool bScanPartitiveMonths; //* Whether to scan an input for partitive months
89 Date* pNullDate; //* 30Dec1899
90 // Variables for provisional results:
91 OUString sStrArray[SV_MAX_ANZ_INPUT_STRINGS]; //* Array of scanned substrings
92 bool IsNum[SV_MAX_ANZ_INPUT_STRINGS]; //* Whether a substring is numeric
93 sal_uInt16 nNums[SV_MAX_ANZ_INPUT_STRINGS]; //* Sequence of offsets to numeric strings
94 sal_uInt16 nAnzStrings; //* Total count of scanned substrings
95 sal_uInt16 nAnzNums; //* Count of numeric substrings
96 bool bDecSepInDateSeps; //* True <=> DecSep in {.,-,/,DateSep}
97 sal_uInt8 nMatchedAllStrings; //* Scan...String() matched all substrings,
99 // bit mask of nMatched... constants
100 static const sal_uInt8 nMatchedEndString; // 0x01
101 static const sal_uInt8 nMatchedMidString; // 0x02
102 static const sal_uInt8 nMatchedStartString; // 0x04
103 static const sal_uInt8 nMatchedVirgin; // 0x08
104 static const sal_uInt8 nMatchedUsedAsReturn; // 0x10
106 int nSign; // Sign of number
107 int nMonth; // Month (1..x) if date
108 // negative => short format
109 short nMonthPos; // 1 = front, 2 = middle
110 // 3 = end
111 int nDayOfWeek; // Temporary (!) day of week (1..7,-1..-7) if date
112 sal_uInt16 nTimePos; // Index of first time separator (+1)
113 short nDecPos; // Index of substring containing "," (+1)
114 short nNegCheck; // '( )' for negative
115 short nESign; // Sign of exponent
116 short nAmPm; // +1 AM, -1 PM, 0 if none
117 short nLogical; // -1 => False, 1 => True
118 sal_uInt16 nThousand; // Count of group (AKA thousand) separators
119 sal_uInt16 nPosThousandString; // Position of concatenaded 000,000,000 string
120 short eScannedType; // Scanned type
121 short eSetType; // Preset Type
123 sal_uInt16 nStringScanNumFor; // Fixed strings recognized in
124 // pFormat->NumFor[nNumForStringScan]
125 short nStringScanSign; // Sign resulting of FixString
126 sal_uInt16 nYear2000; // Two-digit threshold
127 // Year as 20xx
128 // default 18
129 // number <= nYear2000 => 20xx
130 // number > nYear2000 => 19xx
131 sal_uInt16 nTimezonePos; // Index of timezone separator (+1)
133 /** State of ISO 8601 detection.
135 0:= don't know yet
136 1:= no
137 2:= yes, <=2 digits in year
138 3:= yes, 3 digits in year
139 4:= yes, >=4 digits in year
141 @see MayBeIso8601()
143 sal_uInt8 nMayBeIso8601;
145 /** State of ISO 8601 can be forced.
147 0:= don't know yet
148 1:= no
149 2:= yes
151 @see CanForceToIso8601()
153 sal_uInt8 nCanForceToIso8601;
155 /** State of dd-month-yy or yy-month-dd detection, with month name.
157 0:= don't know yet
158 1:= no
159 2:= yes, dd-month-yy
160 3:= yes, yy-month-dd
162 @see MayBeMonthDate()
164 sal_uInt8 nMayBeMonthDate;
166 /** Input matched this locale dependent date acceptance pattern.
167 -2 if not checked yet, -1 if no match, >=0 matched pattern.
169 @see IsAcceptedDatePattern()
171 sal_Int32 nAcceptedDatePattern;
172 com::sun::star::uno::Sequence< OUString > sDateAcceptancePatterns;
174 /** If input matched a date acceptance pattern that starts at input
175 particle sStrArray[nDatePatternStart].
177 @see IsAcceptedDatePattern()
179 sal_uInt16 nDatePatternStart;
181 /** Count of numbers that matched the accepted pattern, if any, else 0.
183 @see GetDatePatternNumbers()
185 sal_uInt16 nDatePatternNumbers;
187 void Reset(); // Reset all variables before start of analysis
189 void InitText(); // Init of months and days of week
191 // Convert string to double.
192 // Only simple unsigned floating point values without any error detection,
193 // decimal separator has to be '.'
194 // If bForceFraction==true the string is taken to be the fractional part
195 // of 0.1234 without the leading 0. (thus being just "1234").
196 static double StringToDouble( const OUString& rStr,
197 bool bForceFraction = false );
199 // Next number/string symbol
200 static bool NextNumberStringSymbol( const sal_Unicode*& pStr,
201 OUString& rSymbol );
203 // Concatenate ,000,23 blocks
204 // in input to 000123
205 bool SkipThousands( const sal_Unicode*& pStr, OUString& rSymbol );
207 // Divide numbers/strings into
208 // arrays and variables above.
209 // Leading blanks and blanks
210 // after numbers are thrown away
211 void NumberStringDivision( const OUString& rString );
214 /** Whether rString contains word (!) rWhat at nPos.
215 rWhat will not be matched if it is a substring of a word.
217 bool StringContainsWord( const OUString& rWhat,
218 const OUString& rString,
219 sal_Int32 nPos );
221 // optimized substring versions
223 // Whether rString contains rWhat at nPos
224 static inline bool StringContains( const OUString& rWhat,
225 const OUString& rString,
226 sal_Int32 nPos )
228 if (rWhat.isEmpty() || rString.getLength() <= nPos)
230 return false;
232 // mostly used with one character
233 if ( rWhat[ 0 ] != rString[ nPos ] )
235 return false;
237 return StringContainsImpl( rWhat, rString, nPos );
240 // Whether pString contains rWhat at nPos
241 static inline bool StringPtrContains( const OUString& rWhat,
242 const sal_Unicode* pString,
243 sal_Int32 nPos ) // nPos MUST be a valid offset from pString
245 // mostly used with one character
246 if ( rWhat[ 0 ] != pString[ nPos ] )
248 return false;
250 return StringPtrContainsImpl( rWhat, pString, nPos );
253 //! DO NOT use directly
254 static bool StringContainsImpl( const OUString& rWhat,
255 const OUString& rString,
256 sal_Int32 nPos );
257 //! DO NOT use directly
258 static bool StringPtrContainsImpl( const OUString& rWhat,
259 const sal_Unicode* pString,
260 sal_Int32 nPos );
262 // Skip a special character
263 static inline bool SkipChar( sal_Unicode c,
264 const OUString& rString,
265 sal_Int32& nPos );
267 // Skip blank
268 static inline void SkipBlanks( const OUString& rString,
269 sal_Int32& nPos );
271 // Jump over rWhat in rString at nPos
272 static inline bool SkipString( const OUString& rWhat,
273 const OUString& rString,
274 sal_Int32& nPos );
276 // Recognizes exactly ,111 as group separator
277 inline bool GetThousandSep( const OUString& rString,
278 sal_Int32& nPos,
279 sal_uInt16 nStringPos );
280 // Get boolean value
281 short GetLogical( const OUString& rString );
283 // Get month and advance string position
284 short GetMonth( const OUString& rString,
285 sal_Int32& nPos );
287 // Get day of week and advance string position
288 int GetDayOfWeek( const OUString& rString,
289 sal_Int32& nPos );
291 // Get currency symbol and advance string position
292 bool GetCurrency( const OUString& rString,
293 sal_Int32& nPos,
294 const SvNumberformat* pFormat = NULL ); // optional number format to match against
296 // Get symbol AM or PM and advance string position
297 bool GetTimeAmPm( const OUString& rString,
298 sal_Int32& nPos );
300 // Get decimal separator and advance string position
301 inline bool GetDecSep( const OUString& rString,
302 sal_Int32& nPos );
304 // Get hundredth seconds separator and advance string position
305 inline bool GetTime100SecSep( const OUString& rString,
306 sal_Int32& nPos );
308 // Get sign and advance string position
309 // Including special case '('
310 int GetSign( const OUString& rString,
311 sal_Int32& nPos );
313 // Get sign of exponent and advance string position
314 static short GetESign( const OUString& rString,
315 sal_Int32& nPos );
317 // Get next number as array offset
318 inline bool GetNextNumber( sal_uInt16& i,
319 sal_uInt16& j );
321 /** Converts time -> double (only decimals)
323 @return TRUE if time, FALSE if not (e.g. hours >12 with AM/PM)
325 bool GetTimeRef( double& fOutNumber, // result as double
326 sal_uInt16 nIndex, // Index of hour in input
327 sal_uInt16 nAnz ); // Count of time substrings in input
328 sal_uInt16 ImplGetDay ( sal_uInt16 nIndex ); // Day input, 0 if no match
329 sal_uInt16 ImplGetMonth( sal_uInt16 nIndex ); // Month input, zero based return, NumberOfMonths if no match
330 sal_uInt16 ImplGetYear ( sal_uInt16 nIndex ); // Year input, 0 if no match
332 // Conversion of date to number
333 bool GetDateRef( double& fDays, // OUT: days diff to null date
334 sal_uInt16& nCounter, // Count of date substrings
335 const SvNumberformat* pFormat = NULL ); // optional number format to match against
337 // Analyze start of string
338 bool ScanStartString( const OUString& rString,
339 const SvNumberformat* pFormat = NULL );
341 // Analyze middle substring
342 bool ScanMidString( const OUString& rString,
343 sal_uInt16 nStringPos,
344 const SvNumberformat* pFormat = NULL );
347 // Analyze end of string
348 bool ScanEndString( const OUString& rString,
349 const SvNumberformat* pFormat = NULL );
351 // Compare rString to substring of array indexed by nString
352 // nString == 0xFFFF => last substring
353 bool ScanStringNumFor( const OUString& rString,
354 sal_Int32 nPos,
355 const SvNumberformat* pFormat,
356 sal_uInt16 nString,
357 bool bDontDetectNegation = false );
359 // if nMatchedAllStrings set nMatchedUsedAsReturn and return true,
360 // else do nothing and return false
361 bool MatchedReturn();
363 //! Be sure that the string to be analyzed is already converted to upper
364 //! case and if it contained native humber digits that they are already
365 //! converted to ASCII.
367 // Main anlyzing function
368 bool IsNumberFormatMain( const OUString& rString,
369 const SvNumberformat* pFormat = NULL); // optional number format to match against
371 static inline bool MyIsdigit( sal_Unicode c );
373 /** Whether input matches locale dependent date acceptance pattern.
375 @param nStartPatternAt
376 The pattern matching starts at input particle
377 sStrArray[nStartPatternAt].
379 NOTE: once called the result is remembered, subsequent calls with
380 different parameters do not check for a match and do not lead to a
381 different result.
383 bool IsAcceptedDatePattern( sal_uInt16 nStartPatternAt );
385 /** Sets (not advances!) rPos to sStrArray[nParticle].getLength() if string
386 matches separator in pattern at nParticle.
388 @returns TRUE if separator matched.
390 bool SkipDatePatternSeparator( sal_uInt16 nParticle, sal_Int32 & rPos );
392 /** Returns count of numbers in accepted date pattern.
394 sal_uInt16 GetDatePatternNumbers();
396 /** Obtain order of accepted date pattern coded as, for example,
397 ('D'<<16)|('M'<<8)|'Y'
399 sal_uInt32 GetDatePatternOrder();
401 /** Obtain date format order, from accepted date pattern if available or
402 otherwise the locale's default order.
404 DateFormat GetDateOrder();
406 /** Whether input may be an ISO 8601 date format, yyyy-mm-dd...
408 Checks if input has at least 3 numbers for yyyy-mm-dd and the separator
409 is '-', and 1<=mm<=12 and 1<=dd<=31.
411 @see nMayBeIso8601
413 bool MayBeIso8601();
415 /** Whether input may be a dd-month-yy format, with month name, not
416 number.
418 @see nMayBeMonthDate
420 bool MayBeMonthDate();
423 #endif // INCLUDED_SVL_SOURCE_NUMBERS_ZFORFIND_HXX
425 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */