1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
23 #include <com/sun/star/uno/Sequence.hxx>
24 #include <rtl/ustring.hxx>
28 class SvNumberFormatter
;
30 #define SV_MAX_ANZ_INPUT_STRINGS 20 // max count of substrings in input scanner
32 class ImpSvNumberInputScan
35 ImpSvNumberInputScan( SvNumberFormatter
* pFormatter
);
36 ~ImpSvNumberInputScan();
38 /*!*/ void ChangeIntl(); // MUST be called if language changes
40 /// set reference date for offset calculation
41 void ChangeNullDate( const sal_uInt16 nDay
,
42 const sal_uInt16 nMonth
,
43 const sal_uInt16 nYear
);
45 /// convert input string to number
46 bool IsNumberFormat( const OUString
& rString
, /// input string
47 short& F_Type
, /// format type (in + out)
48 double& fOutNumber
, /// value determined (out)
49 const SvNumberformat
* pFormat
= NULL
); /// optional a number format to which compare against
51 /// after IsNumberFormat: get decimal position
52 short GetDecPos() const { return nDecPos
; }
53 /// after IsNumberFormat: get count of numeric substrings in input string
54 sal_uInt16
GetAnzNums() const { return nAnzNums
; }
56 /// set threshold of two-digit year input
57 void SetYear2000( sal_uInt16 nVal
) { nYear2000
= nVal
; }
58 /// get threshold of two-digit year input
59 sal_uInt16
GetYear2000() const { return nYear2000
; }
61 /** Whether input can be forced to ISO 8601 format.
63 Depends on locale's date separator and a specific date format order.
66 Evaluated only on first call during one scan process, subsequent
67 calls return state of nCanForceToIso8601!
69 @see nCanForceToIso8601
71 bool CanForceToIso8601( DateFormat eDateFormat
);
73 void InvalidateDateAcceptancePatterns();
76 SvNumberFormatter
* pFormatter
;
77 OUString
* pUpperMonthText
; //* Array of month names, uppercase
78 OUString
* pUpperAbbrevMonthText
; //* Array of month names, abbreviated, uppercase
79 OUString
* pUpperGenitiveMonthText
; //* Array of genitive month names, uppercase
80 OUString
* pUpperGenitiveAbbrevMonthText
; //* Array of genitive month names, abbreviated, uppercase
81 OUString
* pUpperPartitiveMonthText
; //* Array of partitive month names, uppercase
82 OUString
* pUpperPartitiveAbbrevMonthText
; //* Array of partitive month names, abbreviated, uppercase
83 OUString
* pUpperDayText
; //* Array of day of week names, uppercase
84 OUString
* pUpperAbbrevDayText
; //* Array of day of week names, abbreviated, uppercase
85 OUString aUpperCurrSymbol
; //* Currency symbol, uppercase
86 bool bTextInitialized
; //* Whether days and months are initialized
87 bool bScanGenitiveMonths
; //* Whether to scan an input for genitive months
88 bool bScanPartitiveMonths
; //* Whether to scan an input for partitive months
89 Date
* pNullDate
; //* 30Dec1899
90 // Variables for provisional results:
91 OUString sStrArray
[SV_MAX_ANZ_INPUT_STRINGS
]; //* Array of scanned substrings
92 bool IsNum
[SV_MAX_ANZ_INPUT_STRINGS
]; //* Whether a substring is numeric
93 sal_uInt16 nNums
[SV_MAX_ANZ_INPUT_STRINGS
]; //* Sequence of offsets to numeric strings
94 sal_uInt16 nAnzStrings
; //* Total count of scanned substrings
95 sal_uInt16 nAnzNums
; //* Count of numeric substrings
96 bool bDecSepInDateSeps
; //* True <=> DecSep in {.,-,/,DateSep}
97 sal_uInt8 nMatchedAllStrings
; //* Scan...String() matched all substrings,
99 // bit mask of nMatched... constants
100 static const sal_uInt8 nMatchedEndString
; // 0x01
101 static const sal_uInt8 nMatchedMidString
; // 0x02
102 static const sal_uInt8 nMatchedStartString
; // 0x04
103 static const sal_uInt8 nMatchedVirgin
; // 0x08
104 static const sal_uInt8 nMatchedUsedAsReturn
; // 0x10
106 int nSign
; // Sign of number
107 short nMonth
; // Month (1..x) if date
108 // negative => short format
109 short nMonthPos
; // 1 = front, 2 = middle
111 sal_uInt16 nTimePos
; // Index of first time separator (+1)
112 short nDecPos
; // Index of substring containing "," (+1)
113 short nNegCheck
; // '( )' for negative
114 short nESign
; // Sign of exponent
115 short nAmPm
; // +1 AM, -1 PM, 0 if none
116 short nLogical
; // -1 => False, 1 => True
117 sal_uInt16 nThousand
; // Count of group (AKA thousand) separators
118 sal_uInt16 nPosThousandString
; // Position of concatenaded 000,000,000 string
119 short eScannedType
; // Scanned type
120 short eSetType
; // Preset Type
122 sal_uInt16 nStringScanNumFor
; // Fixed strings recognized in
123 // pFormat->NumFor[nNumForStringScan]
124 short nStringScanSign
; // Sign resulting of FixString
125 sal_uInt16 nYear2000
; // Two-digit threshold
128 // number <= nYear2000 => 20xx
129 // number > nYear2000 => 19xx
130 sal_uInt16 nTimezonePos
; // Index of timezone separator (+1)
132 /** State of ISO 8601 detection.
136 2:= yes, <=2 digits in year
137 3:= yes, 3 digits in year
138 4:= yes, >=4 digits in year
142 sal_uInt8 nMayBeIso8601
;
144 /** State of ISO 8601 can be forced.
150 @see CanForceToIso8601()
152 sal_uInt8 nCanForceToIso8601
;
154 /** State of dd-month-yy or yy-month-dd detection, with month name.
161 @see MayBeMonthDate()
163 sal_uInt8 nMayBeMonthDate
;
165 /** Input matched this locale dependent date acceptance pattern.
166 -2 if not checked yet, -1 if no match, >=0 matched pattern.
168 @see IsAcceptedDatePattern()
170 sal_Int32 nAcceptedDatePattern
;
171 com::sun::star::uno::Sequence
< OUString
> sDateAcceptancePatterns
;
173 /** If input matched a date acceptance pattern that starts at input
174 particle sStrArray[nDatePatternStart].
176 @see IsAcceptedDatePattern()
178 sal_uInt16 nDatePatternStart
;
180 /** Count of numbers that matched the accepted pattern, if any, else 0.
182 @see GetDatePatternNumbers()
184 sal_uInt16 nDatePatternNumbers
;
186 void Reset(); // Reset all variables before start of analysis
188 void InitText(); // Init of months and days of week
190 // Convert string to double.
191 // Only simple unsigned floating point values without any error detection,
192 // decimal separator has to be '.'
193 // If bForceFraction==true the string is taken to be the fractional part
194 // of 0.1234 without the leading 0. (thus being just "1234").
195 double StringToDouble( const OUString
& rStr
,
196 bool bForceFraction
= false );
198 // Next number/string symbol
199 bool NextNumberStringSymbol( const sal_Unicode
*& pStr
,
202 // Concatenate ,000,23 blocks
203 // in input to 000123
204 bool SkipThousands( const sal_Unicode
*& pStr
, OUString
& rSymbol
);
206 // Divide numbers/strings into
207 // arrays and variables above.
208 // Leading blanks and blanks
209 // after numbers are thrown away
210 void NumberStringDivision( const OUString
& rString
);
213 // optimized substring versions
215 // Whether rString contains rWhat at nPos
216 static inline bool StringContains( const OUString
& rWhat
,
217 const OUString
& rString
,
220 if (rWhat
.isEmpty() || rString
.getLength() <= nPos
)
224 // mostly used with one character
225 if ( rWhat
[ 0 ] != rString
[ nPos
] )
229 return StringContainsImpl( rWhat
, rString
, nPos
);
232 // Whether pString contains rWhat at nPos
233 static inline bool StringPtrContains( const OUString
& rWhat
,
234 const sal_Unicode
* pString
,
235 sal_Int32 nPos
) // nPos MUST be a valid offset from pString
237 // mostly used with one character
238 if ( rWhat
[ 0 ] != pString
[ nPos
] )
242 return StringPtrContainsImpl( rWhat
, pString
, nPos
);
245 //! DO NOT use directly
246 static bool StringContainsImpl( const OUString
& rWhat
,
247 const OUString
& rString
,
249 //! DO NOT use directly
250 static bool StringPtrContainsImpl( const OUString
& rWhat
,
251 const sal_Unicode
* pString
,
254 // Skip a special character
255 static inline bool SkipChar( sal_Unicode c
,
256 const OUString
& rString
,
260 static inline void SkipBlanks( const OUString
& rString
,
263 // Jump over rWhat in rString at nPos
264 static inline bool SkipString( const OUString
& rWhat
,
265 const OUString
& rString
,
268 // Recognizes exactly ,111 as group separator
269 inline bool GetThousandSep( const OUString
& rString
,
271 sal_uInt16 nStringPos
);
273 short GetLogical( const OUString
& rString
);
275 // Get month and advance string position
276 short GetMonth( const OUString
& rString
,
279 // Get day of week and advance string position
280 int GetDayOfWeek( const OUString
& rString
,
283 // Get currency symbol and advance string position
284 bool GetCurrency( const OUString
& rString
,
286 const SvNumberformat
* pFormat
= NULL
); // optional number format to match against
288 // Get symbol AM or PM and advance string position
289 bool GetTimeAmPm( const OUString
& rString
,
292 // Get decimal separator and advance string position
293 inline bool GetDecSep( const OUString
& rString
,
296 // Get hundredth seconds separator and advance string position
297 inline bool GetTime100SecSep( const OUString
& rString
,
300 // Get sign and advance string position
301 // Including special case '('
302 int GetSign( const OUString
& rString
,
305 // Get sign of exponent and advance string position
306 short GetESign( const OUString
& rString
,
309 // Get next number as array offset
310 inline bool GetNextNumber( sal_uInt16
& i
,
313 /** Converts time -> double (only decimals)
315 @return TRUE if time, FALSE if not (e.g. hours >12 with AM/PM)
317 bool GetTimeRef( double& fOutNumber
, // result as double
318 sal_uInt16 nIndex
, // Index of hour in input
319 sal_uInt16 nAnz
); // Count of time substrings in input
320 sal_uInt16
ImplGetDay ( sal_uInt16 nIndex
); // Day input, 0 if no match
321 sal_uInt16
ImplGetMonth( sal_uInt16 nIndex
); // Month input, zero based return, NumberOfMonths if no match
322 sal_uInt16
ImplGetYear ( sal_uInt16 nIndex
); // Year input, 0 if no match
324 // Conversion of date to number
325 bool GetDateRef( double& fDays
, // OUT: days diff to null date
326 sal_uInt16
& nCounter
, // Count of date substrings
327 const SvNumberformat
* pFormat
= NULL
); // optional number format to match against
329 // Analyze start of string
330 bool ScanStartString( const OUString
& rString
,
331 const SvNumberformat
* pFormat
= NULL
);
333 // Analyze middle substring
334 bool ScanMidString( const OUString
& rString
,
335 sal_uInt16 nStringPos
,
336 const SvNumberformat
* pFormat
= NULL
);
339 // Analyze end of string
340 bool ScanEndString( const OUString
& rString
,
341 const SvNumberformat
* pFormat
= NULL
);
343 // Compare rString to substring of array indexed by nString
344 // nString == 0xFFFF => last substring
345 bool ScanStringNumFor( const OUString
& rString
,
347 const SvNumberformat
* pFormat
,
349 bool bDontDetectNegation
= false );
351 // if nMatchedAllStrings set nMatchedUsedAsReturn and return true,
352 // else do nothing and return false
353 bool MatchedReturn();
355 //! Be sure that the string to be analyzed is already converted to upper
356 //! case and if it contained native humber digits that they are already
357 //! converted to ASCII.
359 // Main anlyzing function
360 bool IsNumberFormatMain( const OUString
& rString
,
361 const SvNumberformat
* pFormat
= NULL
); // optional number format to match against
363 static inline bool MyIsdigit( sal_Unicode c
);
365 // native number transliteration if necessary
366 void TransformInput( OUString
& rString
);
368 /** Whether input matches locale dependent date acceptance pattern.
370 @param nStartPatternAt
371 The pattern matching starts at input particle
372 sStrArray[nStartPatternAt].
374 NOTE: once called the result is remembered, subsequent calls with
375 different parameters do not check for a match and do not lead to a
378 bool IsAcceptedDatePattern( sal_uInt16 nStartPatternAt
);
380 /** Sets (not advances!) rPos to sStrArray[nParticle].getLength() if string
381 matches separator in pattern at nParticle.
383 @returns TRUE if separator matched.
385 bool SkipDatePatternSeparator( sal_uInt16 nParticle
, sal_Int32
& rPos
);
387 /** Returns count of numbers in accepted date pattern.
389 sal_uInt16
GetDatePatternNumbers();
391 /** Obtain order of accepted date pattern coded as, for example,
392 ('D'<<16)|('M'<<8)|'Y'
394 sal_uInt32
GetDatePatternOrder();
396 /** Obtain date format order, from accepted date pattern if available or
397 otherwise the locale's default order.
399 DateFormat
GetDateOrder();
401 /** Whether input may be an ISO 8601 date format, yyyy-mm-dd...
403 Checks if input has at least 3 numbers for yyyy-mm-dd and the separator
404 is '-', and 1<=mm<=12 and 1<=dd<=31.
410 /** Whether input may be a dd-month-yy format, with month name, not
415 bool MayBeMonthDate();
418 #endif // _ZFORFIND_HXX
420 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */