1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <i18nlangtag/languagetag.hxx>
21 #include <i18nlangtag/mslangid.hxx>
22 #include <rtl/ustrbuf.hxx>
23 #include <sal/macros.h>
24 #include <nativenumbersupplier.hxx>
25 #include <localedata.hxx>
26 #include "data/numberchar.h"
27 #include <comphelper/processfactory.hxx>
28 #include <cppuhelper/supportsservice.hxx>
29 #include <o3tl/string_view.hxx>
34 #include <string_view>
35 #include <unordered_map>
36 #include <com/sun/star/i18n/CharacterClassification.hpp>
37 #include <com/sun/star/i18n/NativeNumberMode.hpp>
38 #include <com/sun/star/linguistic2/NumberText.hpp>
40 using namespace ::com::sun::star::uno
;
41 using namespace ::com::sun::star::i18n
;
42 using namespace ::com::sun::star::lang
;
48 const sal_Unicode
*multiplierChar
;
50 sal_Int16 exponentCount
;
51 const sal_Int16
*multiplierExponent
;
56 #define NUMBER_OMIT_ZERO (1 << 0)
57 #define NUMBER_OMIT_ONLY_ZERO (1 << 1)
58 #define NUMBER_OMIT_ONE_1 (1 << 2)
59 #define NUMBER_OMIT_ONE_2 (1 << 3)
60 #define NUMBER_OMIT_ONE_3 (1 << 4)
61 #define NUMBER_OMIT_ONE_4 (1 << 5)
62 #define NUMBER_OMIT_ONE_5 (1 << 6)
63 #define NUMBER_OMIT_ONE_6 (1 << 7)
64 #define NUMBER_OMIT_ONE_7 (1 << 8)
65 #define NUMBER_OMIT_ONE (NUMBER_OMIT_ONE_1|NUMBER_OMIT_ONE_2|NUMBER_OMIT_ONE_3|NUMBER_OMIT_ONE_4|NUMBER_OMIT_ONE_5|NUMBER_OMIT_ONE_6|NUMBER_OMIT_ONE_7)
66 #define NUMBER_OMIT_ONE_CHECK(bit) (1 << (2 + bit))
67 #define NUMBER_OMIT_ALL ( NUMBER_OMIT_ZERO|NUMBER_OMIT_ONE|NUMBER_OMIT_ONLY_ZERO )
68 #define NUMBER_OMIT_ZERO_ONE ( NUMBER_OMIT_ZERO|NUMBER_OMIT_ONE )
69 #define NUMBER_OMIT_ONE_67 (NUMBER_OMIT_ONE_6|NUMBER_OMIT_ONE_7)
70 #define NUMBER_OMIT_ZERO_ONE_67 ( NUMBER_OMIT_ZERO|NUMBER_OMIT_ONE_67 )
76 std::mutex theNatNumMutex
;
80 static OUString
getHebrewNativeNumberString(const OUString
& aNumberString
, bool useGeresh
);
82 static OUString
getCyrillicNativeNumberString(const OUString
& aNumberString
);
84 /// @throws RuntimeException
85 static OUString
AsciiToNativeChar( const OUString
& inStr
, sal_Int32 nCount
,
86 Sequence
< sal_Int32
>* pOffset
, sal_Int16 number
)
88 const sal_Unicode
*src
= inStr
.getStr();
89 rtl_uString
*newStr
= rtl_uString_alloc(nCount
);
91 pOffset
->realloc(nCount
);
92 auto ppOffset
= pOffset
? pOffset
->getArray() : nullptr;
94 for (sal_Int32 i
= 0; i
< nCount
; i
++)
96 sal_Unicode ch
= src
[i
];
98 newStr
->buffer
[i
] = NumberChar
[number
][ ch
- NUMBER_ZERO
];
99 else if (i
+1 < nCount
&& isNumber(src
[i
+1])) {
100 if (i
> 0 && isNumber(src
[i
-1]) && isSeparator(ch
))
101 newStr
->buffer
[i
] = SeparatorChar
[number
] ? SeparatorChar
[number
] : ch
;
103 newStr
->buffer
[i
] = isDecimal(ch
) ? (DecimalChar
[number
] ? DecimalChar
[number
] : ch
) :
104 isMinus(ch
) ? (MinusChar
[number
] ? MinusChar
[number
] : ch
) : ch
;
107 newStr
->buffer
[i
] = ch
;
111 return OUString(newStr
, SAL_NO_ACQUIRE
); // take ownership
114 static bool AsciiToNative_numberMaker(const sal_Unicode
*str
, sal_Int32 begin
, sal_Int32 len
,
115 sal_Unicode
*dst
, sal_Int32
& count
, sal_Int16 multiChar_index
, Sequence
< sal_Int32
>* pOffset
, sal_Int32 startPos
,
116 const Number
*number
, const sal_Unicode
* numberChar
)
118 sal_Unicode multiChar
= (multiChar_index
== -1 ? 0 : number
->multiplierChar
[multiChar_index
]);
119 auto ppOffset
= pOffset
? pOffset
->getArray() : nullptr;
120 if ( len
<= number
->multiplierExponent
[number
->exponentCount
-1] ) {
121 if (number
->multiplierExponent
[number
->exponentCount
-1] > 1) {
122 bool bNotZero
= false;
123 for (const sal_Int32 end
= begin
+len
; begin
< end
; begin
++) {
124 if (bNotZero
|| str
[begin
] != NUMBER_ZERO
) {
125 dst
[count
] = numberChar
[str
[begin
] - NUMBER_ZERO
];
127 ppOffset
[count
] = begin
+ startPos
;
132 if (bNotZero
&& multiChar
> 0) {
133 dst
[count
] = multiChar
;
135 ppOffset
[count
] = begin
+ startPos
;
139 } else if (str
[begin
] != NUMBER_ZERO
) {
140 if (!(number
->numberFlag
& (multiChar_index
< 0 ? 0 : NUMBER_OMIT_ONE_CHECK(multiChar_index
))) || str
[begin
] != NUMBER_ONE
) {
141 dst
[count
] = numberChar
[str
[begin
] - NUMBER_ZERO
];
143 ppOffset
[count
] = begin
+ startPos
;
147 dst
[count
] = multiChar
;
149 ppOffset
[count
] = begin
+ startPos
;
152 } else if (!(number
->numberFlag
& NUMBER_OMIT_ZERO
) && count
> 0 && dst
[count
-1] != numberChar
[0]) {
153 dst
[count
] = numberChar
[0];
155 ppOffset
[count
] = begin
+ startPos
;
158 return str
[begin
] != NUMBER_ZERO
;
160 bool bPrintPower
= false;
161 // sal_Int16 last = 0;
162 for (sal_Int16 i
= 1; i
<= number
->exponentCount
; i
++) {
163 sal_Int32 tmp
= len
- (i
== number
->exponentCount
? 0 : number
->multiplierExponent
[i
]);
165 bPrintPower
|= AsciiToNative_numberMaker(str
, begin
, tmp
, dst
, count
,
166 (i
== number
->exponentCount
? -1 : i
), pOffset
, startPos
, number
, numberChar
);
172 if (count
> 0 && number
->multiplierExponent
[number
->exponentCount
-1] == 1 &&
173 dst
[count
-1] == numberChar
[0])
176 dst
[count
] = multiChar
;
178 ppOffset
[count
] = begin
+ startPos
;
186 /// @throws RuntimeException
187 static OUString
AsciiToNative( const OUString
& inStr
, sal_Int32 nCount
,
188 Sequence
< sal_Int32
>* pOffset
, const Number
* number
)
192 sal_Int32 strLen
= inStr
.getLength();
193 const sal_Unicode
*numberChar
= NumberChar
[number
->number
];
200 const sal_Unicode
*str
= inStr
.getStr();
201 std::unique_ptr
<sal_Unicode
[]> newStr(new sal_Unicode
[nCount
* 2 + 1]);
202 std::unique_ptr
<sal_Unicode
[]> srcStr(new sal_Unicode
[nCount
+ 1]); // for keeping number without comma
203 sal_Int32 i
, len
= 0, count
= 0;
206 pOffset
->realloc( nCount
* 2 );
207 auto ppOffset
= pOffset
? pOffset
->getArray() : nullptr;
208 bool bDoDecimal
= false;
210 for (i
= 0; i
<= nCount
; i
++)
212 if (i
< nCount
&& isNumber(str
[i
])) {
214 newStr
[count
] = numberChar
[str
[i
] - NUMBER_ZERO
];
220 srcStr
[len
++] = str
[i
];
223 if (i
< nCount
-1 && isSeparator(str
[i
]) && isNumber(str
[i
+1]))
224 continue; // skip comma inside number string
225 bool bNotZero
= false;
226 for (sal_Int32 begin
= 0, end
= len
% number
->multiplierExponent
[0];
227 end
<= len
; begin
= end
, end
+= number
->multiplierExponent
[0]) {
228 if (end
== 0) continue;
229 sal_Int32 _count
= count
;
230 bNotZero
|= AsciiToNative_numberMaker(srcStr
.get(), begin
, end
- begin
, newStr
.get(), count
,
231 end
== len
? -1 : 0, pOffset
, i
- len
, number
, numberChar
);
232 if (count
> 0 && number
->multiplierExponent
[number
->exponentCount
-1] == 1 &&
233 newStr
[count
-1] == numberChar
[0])
235 if (bNotZero
&& _count
== count
&& end
!= len
) {
236 newStr
[count
] = number
->multiplierChar
[0];
238 ppOffset
[count
] = i
- len
;
242 if (! bNotZero
&& ! (number
->numberFlag
& NUMBER_OMIT_ONLY_ZERO
)) {
243 newStr
[count
] = numberChar
[0];
245 ppOffset
[count
] = i
- len
;
251 bDoDecimal
= (!bDoDecimal
&& i
< nCount
-1 && isDecimal(str
[i
]) && isNumber(str
[i
+1]));
253 newStr
[count
] = (DecimalChar
[number
->number
] ? DecimalChar
[number
->number
] : str
[i
]);
254 else if (i
< nCount
-1 && isMinus(str
[i
]) && isNumber(str
[i
+1]))
255 newStr
[count
] = (MinusChar
[number
->number
] ? MinusChar
[number
->number
] : str
[i
]);
256 else if (i
< nCount
-1 && isSeparator(str
[i
]) && isNumber(str
[i
+1]))
257 newStr
[count
] = (SeparatorChar
[number
->number
] ? SeparatorChar
[number
->number
] : str
[i
]);
259 newStr
[count
] = str
[i
];
268 pOffset
->realloc(count
);
269 aRet
= OUString(newStr
.get(), count
);
276 void NativeToAscii_numberMaker(sal_Int16 max
, sal_Int16 prev
, const sal_Unicode
*str
,
277 sal_Int32
& i
, sal_Int32 nCount
, sal_Unicode
*dst
, sal_Int32
& count
, Sequence
< sal_Int32
>* pOffset
,
278 OUString
& numberChar
, OUString
& multiplierChar
)
280 auto ppOffset
= pOffset
? pOffset
->getArray() : nullptr;
281 sal_Int16 curr
= 0, num
= 0, end
= 0, shift
= 0;
282 while (++i
< nCount
) {
283 if ((curr
= sal::static_int_cast
<sal_Int16
>( numberChar
.indexOf(str
[i
]) )) >= 0) {
287 } else if ((curr
= sal::static_int_cast
<sal_Int16
>( multiplierChar
.indexOf(str
[i
]) )) >= 0) {
288 curr
= MultiplierExponent_7_CJK
[curr
% ExponentCount_7_CJK
];
289 if (prev
> curr
&& num
== 0) num
= 1; // One may be omitted in informal format
293 else if (curr
> prev
)
297 while (end
++ < prev
) {
298 dst
[count
] = NUMBER_ZERO
+ (end
== prev
? num
: 0);
305 for (const sal_Int32 countEnd
= count
+shift
; count
< countEnd
; count
++) {
306 dst
[count
] = dst
[count
+ curr
];
308 ppOffset
[count
] = ppOffset
[count
+ curr
];
312 NativeToAscii_numberMaker(max
, curr
, str
, i
, nCount
, dst
,
313 count
, pOffset
, numberChar
, multiplierChar
);
318 while (end
++ < prev
) {
319 dst
[count
] = NUMBER_ZERO
+ (end
== prev
? num
: 0);
321 ppOffset
[count
] = i
- 1;
326 /// @throws RuntimeException
327 OUString
NativeToAscii(const OUString
& inStr
,
328 sal_Int32 nCount
, Sequence
< sal_Int32
>* pOffset
)
332 sal_Int32 strLen
= inStr
.getLength();
338 const sal_Unicode
*str
= inStr
.getStr();
339 std::unique_ptr
<sal_Unicode
[]> newStr(new sal_Unicode
[nCount
* MultiplierExponent_7_CJK
[0] + 2]);
341 pOffset
->realloc( nCount
* MultiplierExponent_7_CJK
[0] + 1 );
342 auto ppOffset
= pOffset
? pOffset
->getArray() : nullptr;
343 sal_Int32 count
= 0, index
;
346 OUString numberChar
, multiplierChar
, decimalChar
, separatorChar
;
347 numberChar
= OUString(NumberChar
[0], 10*NumberChar_Count
);
348 multiplierChar
= OUString(MultiplierChar_7_CJK
[0], ExponentCount_7_CJK
*Multiplier_Count
);
349 decimalChar
= OUString(DecimalChar
, NumberChar_Count
);
350 std::u16string_view
const minusChar(MinusChar
, NumberChar_Count
);
351 separatorChar
= OUString(
352 reinterpret_cast<sal_Unicode
*>(SeparatorChar
), NumberChar_Count
);
354 for ( i
= 0; i
< nCount
; i
++) {
355 if ((index
= multiplierChar
.indexOf(str
[i
])) >= 0) {
356 if (count
== 0 || !isNumber(newStr
[count
-1])) { // add 1 in front of multiplier
357 newStr
[count
] = NUMBER_ONE
;
362 index
= MultiplierExponent_7_CJK
[index
% ExponentCount_7_CJK
];
363 NativeToAscii_numberMaker(
364 sal::static_int_cast
<sal_Int16
>( index
), sal::static_int_cast
<sal_Int16
>( index
),
365 str
, i
, nCount
, newStr
.get(), count
, pOffset
,
366 numberChar
, multiplierChar
);
368 if ((index
= numberChar
.indexOf(str
[i
])) >= 0)
369 newStr
[count
] = sal::static_int_cast
<sal_Unicode
>( (index
% 10) + NUMBER_ZERO
);
370 else if (separatorChar
.indexOf(str
[i
]) >= 0 &&
371 (i
< nCount
-1 && (numberChar
.indexOf(str
[i
+1]) >= 0 ||
372 multiplierChar
.indexOf(str
[i
+1]) >= 0)))
373 newStr
[count
] = SeparatorChar
[NumberChar_HalfWidth
];
374 else if (decimalChar
.indexOf(str
[i
]) >= 0 &&
375 (i
< nCount
-1 && (numberChar
.indexOf(str
[i
+1]) >= 0 ||
376 multiplierChar
.indexOf(str
[i
+1]) >= 0)))
377 // Only when decimal point is followed by numbers,
378 // it will be convert to ASCII decimal point
379 newStr
[count
] = DecimalChar
[NumberChar_HalfWidth
];
380 else if (minusChar
.find(str
[i
]) != std::u16string_view::npos
&&
381 (i
< nCount
-1 && (numberChar
.indexOf(str
[i
+1]) >= 0 ||
382 multiplierChar
.indexOf(str
[i
+1]) >= 0)))
383 // Only when minus is followed by numbers,
384 // it will be convert to ASCII minus sign
385 newStr
[count
] = MinusChar
[NumberChar_HalfWidth
];
387 newStr
[count
] = str
[i
];
395 pOffset
->realloc(count
);
397 aRet
= OUString(newStr
.get(), count
);
402 const Number natnum4
[4] = {
403 { NumberChar_Lower_zh
, MultiplierChar_6_CJK
[Multiplier_Lower_zh
], 0,
404 ExponentCount_6_CJK
, MultiplierExponent_6_CJK
},
405 { NumberChar_Lower_zh
, MultiplierChar_6_CJK
[Multiplier_Lower_zh_TW
], 0,
406 ExponentCount_6_CJK
, MultiplierExponent_6_CJK
},
407 { NumberChar_Modern_ja
, MultiplierChar_7_CJK
[Multiplier_Modern_ja
], NUMBER_OMIT_ZERO_ONE_67
,
408 ExponentCount_7_CJK
, MultiplierExponent_7_CJK
},
409 { NumberChar_Lower_ko
, MultiplierChar_6_CJK
[Multiplier_Lower_ko
], NUMBER_OMIT_ZERO
,
410 ExponentCount_6_CJK
, MultiplierExponent_6_CJK
},
413 const Number natnum5
[4] = {
414 { NumberChar_Upper_zh
, MultiplierChar_6_CJK
[Multiplier_Upper_zh
], 0,
415 ExponentCount_6_CJK
, MultiplierExponent_6_CJK
},
416 { NumberChar_Upper_zh_TW
, MultiplierChar_6_CJK
[Multiplier_Upper_zh_TW
], 0,
417 ExponentCount_6_CJK
, MultiplierExponent_6_CJK
},
418 { NumberChar_Traditional_ja
, MultiplierChar_7_CJK
[Multiplier_Traditional_ja
], NUMBER_OMIT_ZERO_ONE_67
,
419 ExponentCount_7_CJK
, MultiplierExponent_7_CJK
},
420 { NumberChar_Upper_ko
, MultiplierChar_6_CJK
[Multiplier_Upper_ko
], 0,
421 ExponentCount_6_CJK
, MultiplierExponent_6_CJK
},
424 const Number natnum6
[4] = {
425 { NumberChar_FullWidth
, MultiplierChar_6_CJK
[Multiplier_Lower_zh
], 0,
426 ExponentCount_6_CJK
, MultiplierExponent_6_CJK
},
427 { NumberChar_FullWidth
, MultiplierChar_6_CJK
[Multiplier_Lower_zh_TW
], 0,
428 ExponentCount_6_CJK
, MultiplierExponent_6_CJK
},
429 { NumberChar_FullWidth
, MultiplierChar_7_CJK
[Multiplier_Modern_ja
], NUMBER_OMIT_ZERO_ONE_67
,
430 ExponentCount_7_CJK
, MultiplierExponent_7_CJK
},
431 { NumberChar_FullWidth
, MultiplierChar_6_CJK
[Multiplier_Hangul_ko
], NUMBER_OMIT_ZERO
,
432 ExponentCount_6_CJK
, MultiplierExponent_6_CJK
},
435 const Number natnum7
[4] = {
436 { NumberChar_Lower_zh
, MultiplierChar_6_CJK
[Multiplier_Lower_zh
], NUMBER_OMIT_ALL
,
437 ExponentCount_6_CJK
, MultiplierExponent_6_CJK
},
438 { NumberChar_Lower_zh
, MultiplierChar_6_CJK
[Multiplier_Lower_zh_TW
], NUMBER_OMIT_ALL
,
439 ExponentCount_6_CJK
, MultiplierExponent_6_CJK
},
440 { NumberChar_Modern_ja
, MultiplierChar_2_CJK
[Multiplier_Modern_ja
], NUMBER_OMIT_ZERO_ONE
,
441 ExponentCount_2_CJK
, MultiplierExponent_2_CJK
},
442 { NumberChar_Lower_ko
, MultiplierChar_6_CJK
[Multiplier_Lower_ko
], NUMBER_OMIT_ALL
,
443 ExponentCount_6_CJK
, MultiplierExponent_6_CJK
},
446 const Number natnum8
[4] = {
447 { NumberChar_Upper_zh
, MultiplierChar_6_CJK
[Multiplier_Upper_zh
], NUMBER_OMIT_ALL
,
448 ExponentCount_6_CJK
, MultiplierExponent_6_CJK
},
449 { NumberChar_Upper_zh_TW
, MultiplierChar_6_CJK
[Multiplier_Upper_zh_TW
], NUMBER_OMIT_ALL
,
450 ExponentCount_6_CJK
, MultiplierExponent_6_CJK
},
451 { NumberChar_Traditional_ja
, MultiplierChar_2_CJK
[Multiplier_Traditional_ja
], NUMBER_OMIT_ZERO_ONE
,
452 ExponentCount_2_CJK
, MultiplierExponent_2_CJK
},
453 { NumberChar_Upper_ko
, MultiplierChar_6_CJK
[Multiplier_Upper_ko
], NUMBER_OMIT_ALL
,
454 ExponentCount_6_CJK
, MultiplierExponent_6_CJK
},
457 const Number natnum10
= { NumberChar_Hangul_ko
, MultiplierChar_6_CJK
[Multiplier_Hangul_ko
], NUMBER_OMIT_ZERO
,
458 ExponentCount_6_CJK
, MultiplierExponent_6_CJK
};
459 const Number natnum11
= { NumberChar_Hangul_ko
, MultiplierChar_6_CJK
[Multiplier_Hangul_ko
], NUMBER_OMIT_ALL
,
460 ExponentCount_6_CJK
, MultiplierExponent_6_CJK
};
462 //! ATTENTION: Do not change order of elements!
463 //! Append new languages to the end of the list!
464 const char *natnum1Locales
[] = {
492 const sal_Int16 nbOfLocale
= SAL_N_ELEMENTS(natnum1Locales
);
494 //! ATTENTION: Do not change order of elements!
495 //! Number and order must match elements of natnum1Locales!
496 const sal_Int16 natnum1
[] = {
499 NumberChar_Modern_ja
,
521 NumberChar_EastIndic_ar
,
524 const sal_Int16 sizeof_natnum1
= SAL_N_ELEMENTS(natnum1
);
526 //! ATTENTION: Do not change order of elements!
527 //! Order must match first elements of natnum1Locales!
528 const sal_Int16 natnum2
[] = {
530 NumberChar_Upper_zh_TW
,
531 NumberChar_Traditional_ja
,
535 const sal_Int16 sizeof_natnum2
= SAL_N_ELEMENTS(natnum2
);
537 sal_Int16
getLanguageNumber( const Locale
& rLocale
)
539 // return zh_TW for TW, HK and MO, return zh_CN for other zh locales.
540 if (rLocale
.Language
== "zh") return MsLangId::isTraditionalChinese(rLocale
) ? 1 : 0;
542 for (sal_Int16 i
= 2; i
< nbOfLocale
; i
++)
543 if (rLocale
.Language
.equalsAsciiL(natnum1Locales
[i
], 2))
551 sal_Unicode DecimalSeparator
;
552 sal_Unicode ThousandSeparator
;
553 Separators(const Locale
& rLocale
)
555 LocaleDataItem aLocaleItem
= LocaleDataImpl::get()->getLocaleItem(rLocale
);
556 DecimalSeparator
= aLocaleItem
.decimalSeparator
.toChar();
557 ThousandSeparator
= aLocaleItem
.thousandSeparator
.toChar();
561 Separators
getLocaleSeparators(const Locale
& rLocale
, const OUString
& rLocStr
)
563 // Guard the static variable below.
564 std::scoped_lock
aGuard(theNatNumMutex
);
565 // Maximum a couple hundred of pairs with 4-byte structs - so no need for smart managing
566 static std::unordered_map
<OUString
, Separators
> aLocaleSeparatorsBuf
;
567 auto it
= aLocaleSeparatorsBuf
.find(rLocStr
);
568 if (it
== aLocaleSeparatorsBuf
.end())
570 it
= aLocaleSeparatorsBuf
.emplace(rLocStr
, Separators(rLocale
)).first
;
575 OUString
getNumberText(const Locale
& rLocale
, const OUString
& rNumberString
,
576 std::u16string_view sNumberTextParams
)
578 sal_Int32 i
, count
= 0;
579 const sal_Int32 len
= rNumberString
.getLength();
580 const sal_Unicode
* src
= rNumberString
.getStr();
582 OUString aLoc
= LanguageTag::convertToBcp47(rLocale
);
583 Separators aSeparators
= getLocaleSeparators(rLocale
, aLoc
);
585 OUStringBuffer
sBuf(len
);
586 for (i
= 0; i
< len
; i
++)
588 sal_Unicode ch
= src
[i
];
589 if (isNumber(ch
) || ch
== aSeparators
.DecimalSeparator
)
594 else if (ch
== aSeparators
.ThousandSeparator
&& count
> 0)
596 else if (isMinus(ch
) && count
== 0)
602 // Handle also month and day names for NatNum12 date formatting
603 const OUString aNumberStr
= (count
== 0) ? rNumberString
: sBuf
.makeStringAndClear();
605 static auto xNumberText
606 = css::linguistic2::NumberText::create(comphelper::getProcessComponentContext());
608 // Guard the static variables below.
609 std::scoped_lock
aGuard( theNatNumMutex
);
611 OUString numbertext_prefix
;
612 // default "cardinal" gets empty prefix
613 if (!sNumberTextParams
.empty() && sNumberTextParams
!= u
"cardinal")
614 numbertext_prefix
= OUString::Concat(sNumberTextParams
) + " ";
615 // Several hundreds of headings could result typing lags because
616 // of the continuous update of the multiple number names during typing.
617 // We fix this by buffering the result of the conversion.
618 static std::unordered_map
<OUString
, std::map
<OUString
, OUString
>> aBuff
;
619 auto& rItems
= aBuff
[aNumberStr
];
620 auto& rItem
= rItems
[numbertext_prefix
+ aLoc
];
623 rItem
= xNumberText
->getNumberText(numbertext_prefix
+ aNumberStr
, rLocale
);
624 // use number at missing number to text conversion
628 OUString sResult
= rItem
;
629 if (i
!= 0 && i
< len
)
630 sResult
+= rNumberString
.subView(i
);
635 OUString
NativeNumberSupplierService::getNativeNumberString(const OUString
& aNumberString
, const Locale
& rLocale
,
636 sal_Int16 nNativeNumberMode
,
637 Sequence
<sal_Int32
>* pOffset
,
638 std::u16string_view rNativeNumberParams
)
640 if (!isValidNatNumImpl(rLocale
, nNativeNumberMode
))
641 return aNumberString
;
643 if (nNativeNumberMode
== NativeNumberMode::NATNUM12
)
645 // handle capitalization prefixes "capitalize", "upper", "lower" and "title"
657 std::u16string_view aLiteral
;
661 static const CasingEntry Casings
[] =
663 { std::u16string_view(u
"capitalize"), CAPITALIZE
},
664 { std::u16string_view(u
"upper"), UPPER
},
665 { std::u16string_view(u
"lower"), LOWER
},
666 { std::u16string_view(u
"title"), TITLE
}
669 std::size_t nStripCase
= 0;
671 for (nCasing
= 0; nCasing
< std::size(Casings
); ++nCasing
)
673 if (o3tl::starts_with(rNativeNumberParams
, Casings
[nCasing
].aLiteral
))
675 nStripCase
= Casings
[nCasing
].aLiteral
.size();
680 if (nStripCase
> 0 && (rNativeNumberParams
.size() == nStripCase
||
681 rNativeNumberParams
[nStripCase
++] == ' '))
683 OUString aStr
= getNumberText(rLocale
, aNumberString
, rNativeNumberParams
.substr(nStripCase
));
685 if (!xCharClass
.is())
686 xCharClass
= CharacterClassification::create(comphelper::getProcessComponentContext());
688 switch (Casings
[nCasing
].eCasing
)
691 return xCharClass
->toTitle(aStr
, 0, 1, aLocale
) +
692 (aStr
.getLength() > 1 ? aStr
.subView(1) : u
"");
694 return xCharClass
->toUpper(aStr
, 0, aStr
.getLength(), aLocale
);
696 return xCharClass
->toLower(aStr
, 0, aStr
.getLength(), aLocale
);
699 if ( rLocale
.Language
== "en" )
701 // title case is common in English, so fix bugs of toTitle():
702 // not "One Dollar *And* *Twenty-two* Cents", but
703 // "One Dollar *and* *Twenty-Two* Cents".
705 // Add spaces after hyphens to separate the elements of the
706 // hyphenated compound words temporarily, allowing their
707 // capitalization by toTitle()
708 aStr
= aStr
.replaceAll("-", "- ");
709 aStr
= xCharClass
->toTitle(aStr
, 0, aStr
.getLength(), aLocale
);
710 return aStr
.replaceAll("- ", "-").replaceAll(" And ", " and ");
713 return xCharClass
->toTitle(aStr
, 0, aStr
.getLength(), aLocale
);
719 return getNumberText(rLocale
, aNumberString
, rNativeNumberParams
);
723 sal_Int16 langnum
= getLanguageNumber(rLocale
);
725 return aNumberString
;
727 const Number
*number
= nullptr;
730 switch (nNativeNumberMode
)
732 case NativeNumberMode::NATNUM0
: // Ascii
733 return NativeToAscii(aNumberString
, aNumberString
.getLength(), pOffset
);
734 case NativeNumberMode::NATNUM1
: // Char, Lower
735 num
= natnum1
[langnum
];
737 case NativeNumberMode::NATNUM2
: // Char, Upper
738 num
= natnum2
[langnum
];
740 case NativeNumberMode::NATNUM3
: // Char, FullWidth
741 num
= NumberChar_FullWidth
;
743 case NativeNumberMode::NATNUM4
: // Text, Lower, Long
744 number
= &natnum4
[langnum
];
746 case NativeNumberMode::NATNUM5
: // Text, Upper, Long
747 number
= &natnum5
[langnum
];
749 case NativeNumberMode::NATNUM6
: // Text, FullWidth
750 number
= &natnum6
[langnum
];
752 case NativeNumberMode::NATNUM7
: // Text. Lower, Short
753 number
= &natnum7
[langnum
];
755 case NativeNumberMode::NATNUM8
: // Text, Upper, Short
756 number
= &natnum8
[langnum
];
758 case NativeNumberMode::NATNUM9
: // Char, Hangul
759 num
= NumberChar_Hangul_ko
;
761 case NativeNumberMode::NATNUM10
: // Text, Hangul, Long
764 case NativeNumberMode::NATNUM11
: // Text, Hangul, Short
771 if (number
|| num
>= 0) {
772 if (aLocale
.Language
!= rLocale
.Language
||
773 aLocale
.Country
!= rLocale
.Country
||
774 aLocale
.Variant
!= rLocale
.Variant
) {
775 LocaleDataItem item
= LocaleDataImpl::get()->getLocaleItem( rLocale
);
777 DecimalChar
[NumberChar_HalfWidth
]=item
.decimalSeparator
.toChar();
778 if (DecimalChar
[NumberChar_HalfWidth
] > 0x7E || DecimalChar
[NumberChar_HalfWidth
] < 0x21)
779 DecimalChar
[NumberChar_FullWidth
]=0xFF0E;
781 DecimalChar
[NumberChar_FullWidth
]=DecimalChar
[NumberChar_HalfWidth
]+0xFEE0;
782 SeparatorChar
[NumberChar_HalfWidth
]=item
.thousandSeparator
.toChar();
783 if (SeparatorChar
[NumberChar_HalfWidth
] > 0x7E || SeparatorChar
[NumberChar_HalfWidth
] < 0x21)
784 SeparatorChar
[NumberChar_FullWidth
]=0xFF0C;
786 SeparatorChar
[NumberChar_FullWidth
]=SeparatorChar
[NumberChar_HalfWidth
]+0xFEE0;
789 return AsciiToNative( aNumberString
, aNumberString
.getLength(), pOffset
, number
);
790 else if (num
== NumberChar_he
)
791 return getHebrewNativeNumberString(aNumberString
,
792 nNativeNumberMode
== NativeNumberMode::NATNUM2
);
793 else if (num
== NumberChar_cu
)
794 return getCyrillicNativeNumberString(aNumberString
);
796 return AsciiToNativeChar(aNumberString
, aNumberString
.getLength(), pOffset
, num
);
799 return aNumberString
;
802 OUString SAL_CALL
NativeNumberSupplierService::getNativeNumberString(const OUString
& aNumberString
, const Locale
& rLocale
,
803 sal_Int16 nNativeNumberMode
)
805 return getNativeNumberString(aNumberString
, rLocale
, nNativeNumberMode
, nullptr);
808 OUString SAL_CALL
NativeNumberSupplierService::getNativeNumberStringParams(
809 const OUString
& rNumberString
, const css::lang::Locale
& rLocale
, sal_Int16 nNativeNumberMode
,
810 const OUString
& rNativeNumberParams
)
812 return getNativeNumberString(rNumberString
, rLocale
, nNativeNumberMode
, nullptr, rNativeNumberParams
);
815 sal_Unicode
NativeNumberSupplierService::getNativeNumberChar( const sal_Unicode inChar
, const Locale
& rLocale
, sal_Int16 nNativeNumberMode
)
817 if (nNativeNumberMode
== NativeNumberMode::NATNUM0
) { // Ascii
818 for (const auto & i
: NumberChar
)
819 for (sal_Int16 j
= 0; j
< 10; j
++)
825 if (!isNumber(inChar
))
828 if (!isValidNatNumImpl(rLocale
, nNativeNumberMode
))
831 sal_Int16 langnum
= getLanguageNumber(rLocale
);
835 switch (nNativeNumberMode
)
837 case NativeNumberMode::NATNUM1
: // Char, Lower
838 case NativeNumberMode::NATNUM4
: // Text, Lower, Long
839 case NativeNumberMode::NATNUM7
: // Text. Lower, Short
840 return NumberChar
[natnum1
[langnum
]][inChar
- NUMBER_ZERO
];
841 case NativeNumberMode::NATNUM2
: // Char, Upper
842 case NativeNumberMode::NATNUM5
: // Text, Upper, Long
843 case NativeNumberMode::NATNUM8
: // Text, Upper, Short
844 return NumberChar
[natnum2
[langnum
]][inChar
- NUMBER_ZERO
];
845 case NativeNumberMode::NATNUM3
: // Char, FullWidth
846 case NativeNumberMode::NATNUM6
: // Text, FullWidth
847 return NumberChar
[NumberChar_FullWidth
][inChar
- NUMBER_ZERO
];
848 case NativeNumberMode::NATNUM9
: // Char, Hangul
849 case NativeNumberMode::NATNUM10
: // Text, Hangul, Long
850 case NativeNumberMode::NATNUM11
: // Text, Hangul, Short
851 return NumberChar
[NumberChar_Hangul_ko
][inChar
- NUMBER_ZERO
];
859 bool NativeNumberSupplierService::isValidNatNumImpl( const Locale
& rLocale
, sal_Int16 nNativeNumberMode
)
861 sal_Int16 langnum
= getLanguageNumber(rLocale
);
863 switch (nNativeNumberMode
) {
864 case NativeNumberMode::NATNUM0
: // Ascii
865 case NativeNumberMode::NATNUM3
: // Char, FullWidth
866 case NativeNumberMode::NATNUM12
: // spell out numbers, dates and money amounts
868 case NativeNumberMode::NATNUM1
: // Char, Lower
869 return (langnum
>= 0);
870 case NativeNumberMode::NATNUM2
: // Char, Upper
871 if (langnum
== 4) // Hebrew numbering
874 case NativeNumberMode::NATNUM4
: // Text, Lower, Long
875 case NativeNumberMode::NATNUM5
: // Text, Upper, Long
876 case NativeNumberMode::NATNUM6
: // Text, FullWidth
877 case NativeNumberMode::NATNUM7
: // Text. Lower, Short
878 case NativeNumberMode::NATNUM8
: // Text, Upper, Short
879 return (langnum
>= 0 && langnum
< 4); // CJK numbering
880 case NativeNumberMode::NATNUM9
: // Char, Hangul
881 case NativeNumberMode::NATNUM10
: // Text, Hangul, Long
882 case NativeNumberMode::NATNUM11
: // Text, Hangul, Short
883 return (langnum
== 3); // Korean numbering
888 NativeNumberXmlAttributes SAL_CALL
NativeNumberSupplierService::convertToXmlAttributes( const Locale
& rLocale
, sal_Int16 nNativeNumberMode
)
890 static const sal_Int16 attShort
= 0;
891 static const sal_Int16 attMedium
= 1;
892 static const sal_Int16 attLong
= 2;
893 static const char *attType
[] = { "short", "medium", "long" };
895 sal_Int16 number
= NumberChar_HalfWidth
, type
= attShort
;
897 sal_Int16 langnum
= -1;
898 if (isValidNatNum(rLocale
, nNativeNumberMode
)) {
899 langnum
= getLanguageNumber(rLocale
);
902 switch (nNativeNumberMode
) {
903 case NativeNumberMode::NATNUM0
: // Ascii
904 number
= NumberChar_HalfWidth
;
907 case NativeNumberMode::NATNUM1
: // Char, Lower
908 number
= natnum1
[langnum
];
911 case NativeNumberMode::NATNUM2
: // Char, Upper
912 number
= natnum2
[langnum
];
913 type
= number
== NumberChar_he
? attMedium
: attShort
;
915 case NativeNumberMode::NATNUM3
: // Char, FullWidth
916 number
= NumberChar_FullWidth
;
919 case NativeNumberMode::NATNUM4
: // Text, Lower, Long
920 number
= natnum1
[langnum
];
923 case NativeNumberMode::NATNUM5
: // Text, Upper, Long
924 number
= natnum2
[langnum
];
927 case NativeNumberMode::NATNUM6
: // Text, FullWidth
928 number
= NumberChar_FullWidth
;
931 case NativeNumberMode::NATNUM7
: // Text. Lower, Short
932 number
= natnum1
[langnum
];
935 case NativeNumberMode::NATNUM8
: // Text, Upper, Short
936 number
= natnum2
[langnum
];
939 case NativeNumberMode::NATNUM9
: // Char, Hangul
940 number
= NumberChar_Hangul_ko
;
943 case NativeNumberMode::NATNUM10
: // Text, Hangul, Long
944 number
= NumberChar_Hangul_ko
;
947 case NativeNumberMode::NATNUM11
: // Text, Hangul, Short
948 number
= NumberChar_Hangul_ko
;
955 return NativeNumberXmlAttributes(rLocale
, OUString(&NumberChar
[number
][1], 1),
956 OUString::createFromAscii(attType
[type
]));
959 static bool natNumIn(sal_Int16 num
, const sal_Int16 natnum
[], sal_Int16 len
)
961 for (sal_Int16 i
= 0; i
< len
; i
++)
962 if (natnum
[i
] == num
)
967 sal_Int16 SAL_CALL
NativeNumberSupplierService::convertFromXmlAttributes( const NativeNumberXmlAttributes
& aAttr
)
969 sal_Unicode numberChar
[NumberChar_Count
];
970 for (sal_Int16 i
= 0; i
< NumberChar_Count
; i
++)
971 numberChar
[i
] = NumberChar
[i
][1];
972 OUString
number(numberChar
, NumberChar_Count
);
974 sal_Int16 num
= sal::static_int_cast
<sal_Int16
>( number
.indexOf(aAttr
.Format
) );
976 if ( aAttr
.Style
== "short" ) {
977 if (num
== NumberChar_FullWidth
)
978 return NativeNumberMode::NATNUM3
;
979 else if (num
== NumberChar_Hangul_ko
)
980 return NativeNumberMode::NATNUM9
;
981 else if (natNumIn(num
, natnum1
, sizeof_natnum1
))
982 return NativeNumberMode::NATNUM1
;
983 else if (natNumIn(num
, natnum2
, sizeof_natnum2
))
984 return NativeNumberMode::NATNUM2
;
985 } else if ( aAttr
.Style
== "medium" ) {
986 if (num
== NumberChar_Hangul_ko
)
987 return NativeNumberMode::NATNUM11
;
988 else if (num
== NumberChar_he
)
989 return NativeNumberMode::NATNUM2
;
990 else if (natNumIn(num
, natnum1
, sizeof_natnum1
))
991 return NativeNumberMode::NATNUM7
;
992 else if (natNumIn(num
, natnum2
, sizeof_natnum2
))
993 return NativeNumberMode::NATNUM8
;
994 } else if ( aAttr
.Style
== "long" ) {
995 if (num
== NumberChar_FullWidth
)
996 return NativeNumberMode::NATNUM6
;
997 else if (num
== NumberChar_Hangul_ko
)
998 return NativeNumberMode::NATNUM10
;
999 else if (natNumIn(num
, natnum1
, sizeof_natnum1
))
1000 return NativeNumberMode::NATNUM4
;
1001 else if (natNumIn(num
, natnum2
, sizeof_natnum2
))
1002 return NativeNumberMode::NATNUM5
;
1004 throw RuntimeException();
1006 return NativeNumberMode::NATNUM0
;
1010 // Following code generates Hebrew Number,
1011 // see numerical system in the Hebrew Numbering System in following link for details,
1012 // http://smontagu.org/writings/HebrewNumbers.html
1016 struct HebrewNumberChar
{
1023 HebrewNumberChar
const HebrewNumberCharArray
[] = {
1049 const sal_Unicode thousand
[] = {0x05d0, 0x05dc, 0x05e3, 0x0};
1050 const sal_Unicode thousands
[] = {0x05d0, 0x05dc, 0x05e4, 0x05d9, 0x0};
1051 const sal_Unicode thousands_last
[] = {0x05d0, 0x05dc, 0x05e4, 0x05d9, 0x05dd, 0x0};
1052 const sal_Unicode geresh
= 0x05f3;
1053 const sal_Unicode gershayim
= 0x05f4;
1055 static void makeHebrewNumber(sal_Int64 value
, OUStringBuffer
& output
, bool isLast
, bool useGeresh
)
1057 sal_Int16 num
= sal::static_int_cast
<sal_Int16
>(value
% 1000);
1060 makeHebrewNumber(value
/ 1000, output
, num
!= 0, useGeresh
);
1064 output
.append(value
== 1000 ? thousand
: isLast
? thousands_last
: thousands
);
1066 sal_Int16 nbOfChar
= 0;
1067 for (sal_Int32 j
= 0; num
> 0 && j
< sal_Int32(std::size(HebrewNumberCharArray
)); j
++) {
1068 if (num
- HebrewNumberCharArray
[j
].value
>= 0) {
1070 // https://en.wikipedia.org/wiki/Hebrew_numerals#Key_exceptions
1071 // By convention, the numbers 15 and 16 are represented as 9 + 6 and 9 + 7
1072 if (num
== 15 || num
== 16) // substitution for 15 and 16
1074 assert(j
< sal_Int32(SAL_N_ELEMENTS(HebrewNumberCharArray
)));
1075 num
= sal::static_int_cast
<sal_Int16
>( num
- HebrewNumberCharArray
[j
].value
);
1076 output
.append(HebrewNumberCharArray
[j
].code
);
1080 if (nbOfChar
> 1) // a number is written as more than one character
1081 output
.insert(output
.getLength() - 1, gershayim
);
1082 else if (nbOfChar
== 1) // a number is written as a single character
1083 output
.append(geresh
);
1088 OUString
getHebrewNativeNumberString(const OUString
& aNumberString
, bool useGeresh
)
1090 sal_Int64 value
= 0;
1091 sal_Int32 i
, count
= 0, len
= aNumberString
.getLength();
1092 const sal_Unicode
*src
= aNumberString
.getStr();
1094 for (i
= 0; i
< len
; i
++) {
1095 sal_Unicode ch
= src
[i
];
1097 if (++count
>= 20) // Number is too long, could not be handled.
1098 return aNumberString
;
1099 value
= value
* 10 + (ch
- NUMBER_ZERO
);
1101 else if (isSeparator(ch
) && count
> 0) continue;
1102 else if (isMinus(ch
) && count
== 0) continue;
1107 OUStringBuffer
output(count
*2 + 2 + len
- i
);
1109 makeHebrewNumber(value
, output
, true, useGeresh
);
1112 output
.append(aNumberString
.subView(i
));
1114 return output
.makeStringAndClear();
1117 return aNumberString
;
1120 // Support for Cyrillic Numerals
1121 // See UTN 41 for implementation information
1122 // http://www.unicode.org/notes/tn41/
1124 const sal_Unicode cyrillicThousandsMark
= 0x0482;
1125 const sal_Unicode cyrillicTitlo
= 0x0483;
1126 const sal_Unicode cyrillicTen
= 0x0456;
1130 struct CyrillicNumberChar
{
1137 CyrillicNumberChar
const CyrillicNumberCharArray
[] = {
1167 static void makeCyrillicNumber(sal_Int64 value
, OUStringBuffer
& output
, bool addTitlo
)
1169 sal_Int16 num
= sal::static_int_cast
<sal_Int16
>(value
% 1000);
1170 if (value
>= 1000) {
1171 output
.append(cyrillicThousandsMark
);
1172 makeCyrillicNumber(value
/ 1000, output
, false);
1173 if (value
>= 10000 && (value
- 10000) % 1000 != 0) {
1176 if (value
% 1000 == 0)
1180 for (sal_Int32 j
= 0; num
> 0 && j
< sal_Int32(std::size(CyrillicNumberCharArray
)); j
++) {
1181 if (num
< 20 && num
> 10) {
1183 makeCyrillicNumber(num
, output
, false);
1184 output
.append(cyrillicTen
);
1188 if (CyrillicNumberCharArray
[j
].value
<= num
) {
1189 output
.append(CyrillicNumberCharArray
[j
].code
);
1190 num
= sal::static_int_cast
<sal_Int16
>( num
- CyrillicNumberCharArray
[j
].value
);
1197 if (output
.getLength() == 1) {
1198 output
.append(cyrillicTitlo
);
1199 } else if (output
.getLength() == 2) {
1200 if (value
> 800 && value
< 900) {
1201 output
.append(cyrillicTitlo
);
1203 output
.insert(1, cyrillicTitlo
);
1205 } else if (output
.getLength() > 2) {
1206 if (output
.indexOf(" ") == output
.getLength() - 2) {
1207 output
.append(cyrillicTitlo
);
1209 output
.insert(output
.getLength() - 1, cyrillicTitlo
);
1214 OUString
getCyrillicNativeNumberString(const OUString
& aNumberString
)
1216 sal_Int64 value
= 0;
1217 sal_Int32 i
, count
= 0, len
= aNumberString
.getLength();
1218 const sal_Unicode
*src
= aNumberString
.getStr();
1220 for (i
= 0; i
< len
; i
++) {
1221 sal_Unicode ch
= src
[i
];
1223 if (++count
>= 8) // Number is too long, could not be handled.
1224 return aNumberString
;
1225 value
= value
* 10 + (ch
- NUMBER_ZERO
);
1227 else if (isSeparator(ch
) && count
> 0) continue;
1228 else if (isMinus(ch
) && count
== 0) continue;
1233 OUStringBuffer
output(count
*2 + 2 + len
- i
);
1235 makeCyrillicNumber(value
, output
, true);
1238 output
.append(aNumberString
.subView(i
));
1240 return output
.makeStringAndClear();
1243 return aNumberString
;
1246 constexpr OUString implementationName
= u
"com.sun.star.i18n.NativeNumberSupplier"_ustr
;
1248 OUString SAL_CALL
NativeNumberSupplierService::getImplementationName()
1250 return implementationName
;
1254 NativeNumberSupplierService::supportsService(const OUString
& rServiceName
)
1256 return cppu::supportsService(this, rServiceName
);
1259 Sequence
< OUString
> SAL_CALL
1260 NativeNumberSupplierService::getSupportedServiceNames()
1262 return {implementationName
, u
"com.sun.star.i18n.NativeNumberSupplier2"_ustr
};
1267 extern "C" SAL_DLLPUBLIC_EXPORT
css::uno::XInterface
*
1268 com_sun_star_i18n_NativeNumberSupplier_get_implementation(
1269 css::uno::XComponentContext
*,
1270 css::uno::Sequence
<css::uno::Any
> const &)
1272 return cppu::acquire(new i18npool::NativeNumberSupplierService());
1275 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */