1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #ifndef INCLUDED_SVL_SOURCE_NUMBERS_ZFORFIND_HXX
21 #define INCLUDED_SVL_SOURCE_NUMBERS_ZFORFIND_HXX
23 #include <com/sun/star/uno/Sequence.hxx>
24 #include <rtl/ustring.hxx>
25 #include <svl/zforlist.hxx>
26 #include <tools/date.hxx>
31 class SvNFLanguageData
;
32 enum class SvNumFormatType
: sal_Int16
;
34 #define SV_MAX_COUNT_INPUT_STRINGS 20 // max count of substrings in input scanner
36 class ImpSvNumberInputScan
39 explicit ImpSvNumberInputScan(SvNFLanguageData
& rCurrentLanguage
);
40 ~ImpSvNumberInputScan();
42 /*!*/ void ChangeIntl(); // MUST be called if language changes
44 /// set reference date for offset calculation
45 void ChangeNullDate( const sal_uInt16 nDay
,
46 const sal_uInt16 nMonth
,
47 const sal_Int16 nYear
);
49 /// convert input string to number
50 bool IsNumberFormat( const OUString
& rString
, /// input string
51 SvNumFormatType
& F_Type
, /// format type (in + out)
52 double& fOutNumber
, /// value determined (out)
53 const SvNumberformat
* pFormat
, /// number format to which compare against
54 const NativeNumberWrapper
& rNatNum
,
55 SvNumInputOptions eInputOptions
);
57 /// after IsNumberFormat: get decimal position
58 short GetDecPos() const { return nDecPos
; }
59 /// after IsNumberFormat: get count of numeric substrings in input string
60 sal_uInt16
GetNumericsCount() const { return nNumericsCnt
; }
62 /// set threshold of two-digit year input
63 void SetYear2000( sal_uInt16 nVal
) { nYear2000
= nVal
; }
64 /// get threshold of two-digit year input
65 sal_uInt16
GetYear2000() const { return nYear2000
; }
67 /** Whether input can be forced to ISO 8601 format.
69 Depends on locale's date separator and a specific date format order.
71 bool CanForceToIso8601( DateOrder eDateOrder
);
73 void InvalidateDateAcceptancePatterns();
75 /** Whether 'T' separator was detected in an ISO 8601 date+time format.
77 bool HasIso8601Tsep() const { return bIso8601Tsep
; }
80 SvNFLanguageData
& mrCurrentLanguageData
;
81 const SvNumberformat
* mpFormat
; //* The format to compare against, if any
82 std::unique_ptr
<OUString
[]> pUpperMonthText
; //* Array of month names, uppercase
83 std::unique_ptr
<OUString
[]> pUpperAbbrevMonthText
; //* Array of month names, abbreviated, uppercase
84 std::unique_ptr
<OUString
[]> pUpperGenitiveMonthText
; //* Array of genitive month names, uppercase
85 std::unique_ptr
<OUString
[]> pUpperGenitiveAbbrevMonthText
; //* Array of genitive month names, abbreviated, uppercase
86 std::unique_ptr
<OUString
[]> pUpperPartitiveMonthText
; //* Array of partitive month names, uppercase
87 std::unique_ptr
<OUString
[]> pUpperPartitiveAbbrevMonthText
;//* Array of partitive month names, abbreviated, uppercase
88 std::unique_ptr
<OUString
[]> pUpperDayText
; //* Array of day of week names, uppercase
89 std::unique_ptr
<OUString
[]> pUpperAbbrevDayText
; //* Array of day of week names, abbreviated, uppercase
90 OUString aUpperCurrSymbol
; //* Currency symbol, uppercase
91 bool bTextInitialized
; //* Whether days and months are initialized
92 bool bScanGenitiveMonths
; //* Whether to scan an input for genitive months
93 bool bScanPartitiveMonths
; //* Whether to scan an input for partitive months
94 std::optional
<Date
> moNullDate
; //* 30Dec1899
95 // Variables for provisional results:
96 OUString sStrArray
[SV_MAX_COUNT_INPUT_STRINGS
];//* Array of scanned substrings
97 bool IsNum
[SV_MAX_COUNT_INPUT_STRINGS
]; //* Whether a substring is numeric
98 sal_uInt16 nNums
[SV_MAX_COUNT_INPUT_STRINGS
]; //* Sequence of offsets to numeric strings
99 sal_uInt16 nStringsCnt
; //* Total count of scanned substrings
100 sal_uInt16 nNumericsCnt
; //* Count of numeric substrings
101 bool bDecSepInDateSeps
; //* True <=> DecSep in {.,-,/,DateSep}
102 sal_uInt8 nMatchedAllStrings
; //* Scan...String() matched all substrings,
104 // bit mask of nMatched... constants
105 static const sal_uInt8 nMatchedEndString
; // 0x01
106 static const sal_uInt8 nMatchedMidString
; // 0x02
107 static const sal_uInt8 nMatchedStartString
; // 0x04
108 static const sal_uInt8 nMatchedVirgin
; // 0x08
109 static const sal_uInt8 nMatchedUsedAsReturn
; // 0x10
111 int nSign
; // Sign of number
112 int nMonth
; // Month (1..x) if date
113 // negative => short format
114 short nMonthPos
; // 1 = front, 2 = middle
116 int nDayOfWeek
; // Temporary (!) day of week (1..7,-1..-7) if date
117 sal_uInt16 nTimePos
; // Index of first time separator (+1)
118 short nDecPos
; // Index of substring containing "," (+1)
119 bool bNegCheck
; // '( )' for negative
120 short nESign
; // Sign of exponent
121 short nAmPm
; // +1 AM, -1 PM, 0 if none
122 short nLogical
; // -1 => False, 1 => True
123 bool mbEraCE
; // Era if date, 0 => BCE, 1 => CE (currently only Gregorian)
124 sal_uInt16 nThousand
; // Count of group (AKA thousand) separators
125 sal_uInt16 nPosThousandString
; // Position of concatenated 000,000,000 string
126 SvNumFormatType eScannedType
; // Scanned type
127 SvNumFormatType eSetType
; // Preset Type
129 sal_uInt16 nStringScanNumFor
; // Fixed strings recognized in
130 // pFormat->NumFor[nNumForStringScan]
131 short nStringScanSign
; // Sign resulting of FixString
132 sal_uInt16 nYear2000
; // Two-digit threshold
135 // number <= nYear2000 => 20xx
136 // number > nYear2000 => 19xx
138 /** State of ISO 8601 detection.
142 2:= yes, <=2 digits in year
143 3:= yes, 3 digits in year
144 4:= yes, >=4 digits in year
148 sal_uInt8 nMayBeIso8601
;
150 /** Whether the 'T' time separator was detected in an ISO 8601 string. */
153 /** State of dd-month-yy or yy-month-dd detection, with month name.
160 @see MayBeMonthDate()
162 sal_uInt8 nMayBeMonthDate
;
164 /** Input matched this locale dependent date acceptance pattern.
165 -2 if not checked yet, -1 if no match, >=0 matched pattern.
167 @see IsAcceptedDatePattern()
169 sal_Int32 nAcceptedDatePattern
;
170 css::uno::Sequence
< OUString
> sDateAcceptancePatterns
;
172 /** If input matched a date acceptance pattern that starts at input
173 particle sStrArray[nDatePatternStart].
175 @see IsAcceptedDatePattern()
177 sal_uInt16 nDatePatternStart
;
179 /** Count of numbers that matched the accepted pattern, if any, else 0.
181 @see GetDatePatternNumbers()
183 sal_uInt16 nDatePatternNumbers
;
185 // Copy assignment is forbidden and not implemented.
186 ImpSvNumberInputScan (const ImpSvNumberInputScan
&) = delete;
187 ImpSvNumberInputScan
& operator= (const ImpSvNumberInputScan
&) = delete;
189 void Reset(); // Reset all variables before start of analysis
191 void InitText(); // Init of months and days of week
193 // Convert string to double.
194 // Only simple unsigned floating point values without any error detection,
195 // decimal separator has to be '.'
196 // If bForceFraction==true the string is taken to be the fractional part
197 // of 0.1234 without the leading 0. (thus being just "1234").
198 static double StringToDouble( std::u16string_view aStr
,
199 bool bForceFraction
= false );
201 // Next number/string symbol
202 static bool NextNumberStringSymbol( const sal_Unicode
*& pStr
,
205 // Concatenate ,000,23 blocks
206 // in input to 000123
207 bool SkipThousands( const sal_Unicode
*& pStr
, OUString
& rSymbol
) const;
209 // Divide numbers/strings into
210 // arrays and variables above.
211 // Leading blanks and blanks
212 // after numbers are thrown away
213 void NumberStringDivision( const OUString
& rString
);
216 /** Whether rString contains word (!) rWhat at nPos.
217 rWhat will not be matched if it is a substring of a word.
219 bool StringContainsWord( const OUString
& rWhat
,
220 const OUString
& rString
,
221 sal_Int32 nPos
) const;
223 // optimized substring versions
225 // Whether rString contains rWhat at nPos
226 static bool StringContains( const OUString
& rWhat
,
227 const OUString
& rString
,
230 if (rWhat
.isEmpty() || rString
.getLength() <= nPos
)
234 // mostly used with one character
235 if ( rWhat
[ 0 ] != rString
[ nPos
] )
239 return StringContainsImpl( rWhat
, rString
, nPos
);
242 // Whether pString contains rWhat at nPos
243 static bool StringPtrContains( const OUString
& rWhat
,
244 const sal_Unicode
* pString
,
245 sal_Int32 nPos
) // nPos MUST be a valid offset from pString
247 // mostly used with one character
248 if ( rWhat
[ 0 ] != pString
[ nPos
] )
252 return StringPtrContainsImpl( rWhat
, pString
, nPos
);
255 //! DO NOT use directly
256 static bool StringContainsImpl( const OUString
& rWhat
,
257 const OUString
& rString
,
259 //! DO NOT use directly
260 static bool StringPtrContainsImpl( const OUString
& rWhat
,
261 const sal_Unicode
* pString
,
264 // Skip a special character
265 static inline bool SkipChar( sal_Unicode c
,
266 std::u16string_view rString
,
270 static inline bool SkipBlanks( const OUString
& rString
,
273 // Jump over rWhat in rString at nPos
274 static inline bool SkipString( const OUString
& rWhat
,
275 const OUString
& rString
,
278 // Recognizes exactly ,111 as group separator
279 inline bool GetThousandSep( std::u16string_view rString
,
281 sal_uInt16 nStringPos
) const;
283 short GetLogical( std::u16string_view rString
) const;
285 // Get month and advance string position
286 short GetMonth( const OUString
& rString
,
289 // Get day of week and advance string position
290 int GetDayOfWeek( const OUString
& rString
,
293 // Get currency symbol and advance string position
294 bool GetCurrency( const OUString
& rString
,
297 // Get symbol AM or PM and advance string position
298 bool GetTimeAmPm( const OUString
& rString
,
301 // Get decimal separator and advance string position
302 inline bool GetDecSep( std::u16string_view rString
,
303 sal_Int32
& nPos
) const;
305 // Get hundredth seconds separator and advance string position
306 inline bool GetTime100SecSep( std::u16string_view rString
,
307 sal_Int32
& nPos
) const;
309 // Get sign and advance string position
310 // Including special case '('
311 int GetSign( std::u16string_view rString
,
314 // Get sign of exponent and advance string position
315 static short GetESign( std::u16string_view rString
,
318 // Get next number as array offset
319 inline bool GetNextNumber( sal_uInt16
& i
,
320 sal_uInt16
& j
) const;
322 /** Converts time -> double (only decimals)
324 @return TRUE if time, FALSE if not (e.g. hours >12 with AM/PM)
326 bool GetTimeRef( double& fOutNumber
, // result as double
327 sal_uInt16 nIndex
, // Index of hour in input
328 sal_uInt16 nCnt
, // Count of time substrings in input
329 SvNumInputOptions eInputOptions
) const;
330 sal_uInt16
ImplGetDay ( sal_uInt16 nIndex
) const; // Day input, 0 if no match
331 sal_uInt16
ImplGetMonth( sal_uInt16 nIndex
) const; // Month input, zero based return, NumberOfMonths if no match
332 sal_uInt16
ImplGetYear ( sal_uInt16 nIndex
); // Year input, 0 if no match
334 // Conversion of date to number
335 bool GetDateRef( double& fDays
, // OUT: days diff to null date
336 sal_uInt16
& nCounter
); // Count of date substrings
338 // Analyze start of string
339 bool ScanStartString( const OUString
& rString
);
341 // Analyze middle substring
342 bool ScanMidString( const OUString
& rString
,
343 sal_uInt16 nStringPos
,
344 sal_uInt16 nCurNumCount
);
347 // Analyze end of string
348 bool ScanEndString( const OUString
& rString
);
350 // Compare rString to substring of array indexed by nString
351 // nString == 0xFFFF => last substring
352 bool ScanStringNumFor( const OUString
& rString
,
355 bool bDontDetectNegation
= false );
357 // if nMatchedAllStrings set nMatchedUsedAsReturn and return true,
358 // else do nothing and return false
359 bool MatchedReturn();
361 //! Be sure that the string to be analyzed is already converted to upper
362 //! case and if it contained native number digits that they are already
363 //! converted to ASCII.
365 // Main analyzing function
366 bool IsNumberFormatMain( const OUString
& rString
,
367 const SvNumberformat
* pFormat
); // number format to match against
369 /** Whether input matches locale dependent date acceptance pattern.
371 @param nStartPatternAt
372 The pattern matching starts at input particle
373 sStrArray[nStartPatternAt].
375 NOTE: once called the result is remembered, subsequent calls with
376 different parameters do not check for a match and do not lead to a
379 bool IsAcceptedDatePattern( sal_uInt16 nStartPatternAt
);
381 /** Sets (not advances!) rPos to sStrArray[nParticle].getLength() if string
382 matches separator in pattern at nParticle.
384 Also detects a signed year case like M/D/-Y
386 @returns TRUE if separator matched.
388 bool SkipDatePatternSeparator( sal_uInt16 nParticle
, sal_Int32
& rPos
, bool & rSignedYear
);
390 /** Returns count of numbers in accepted date pattern.
392 sal_uInt16
GetDatePatternNumbers();
394 /** Whether numeric string nNumber is of type cType in accepted date
395 pattern, 'Y', 'M' or 'D'.
397 bool IsDatePatternNumberOfType( sal_uInt16 nNumber
, sal_Unicode cType
);
399 /** Obtain order of accepted date pattern coded as, for example,
400 ('D'<<16)|('M'<<8)|'Y'
402 sal_uInt32
GetDatePatternOrder();
404 /** Obtain date format order, from accepted date pattern if available or
405 otherwise the locale's default order.
407 @param bFromFormatIfNoPattern
408 If <TRUE/> and no pattern was matched, obtain date order from
409 format if available, instead from format's or current locale.
411 DateOrder
GetDateOrder( bool bFromFormatIfNoPattern
= false );
413 /** Whether input may be an ISO 8601 date format, yyyy-mm-dd...
415 Checks if input has at least 3 numbers for yyyy-mm-dd and the separator
416 is '-', and 1<=mm<=12 and 1<=dd<=31.
422 /** Whether input may be a dd-month-yy format, with month name, not
427 bool MayBeMonthDate();
429 /** Whether input is acceptable as ISO 8601 date format in the current
430 NfEvalDateFormat setting.
432 bool IsAcceptableIso8601();
434 /** If month name in the middle was parsed, get the corresponding
435 LongDateOrder in GetDateRef().
437 LongDateOrder
GetMiddleMonthLongDateOrder( bool bFormatTurn
,
438 const LocaleDataWrapper
* pLoc
,
439 DateOrder eDateOrder
);
442 #endif // INCLUDED_SVL_SOURCE_NUMBERS_ZFORFIND_HXX
444 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */