1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <i18nlangtag/languagetag.hxx>
21 #include <i18nlangtag/mslangid.hxx>
22 #include <rtl/ustrbuf.hxx>
23 #include <sal/macros.h>
24 #include <nativenumbersupplier.hxx>
25 #include <localedata.hxx>
26 #include "data/numberchar.h"
27 #include <comphelper/processfactory.hxx>
28 #include <cppuhelper/supportsservice.hxx>
31 #include <string_view>
32 #include <unordered_map>
33 #include <com/sun/star/i18n/CharacterClassification.hpp>
34 #include <com/sun/star/i18n/NativeNumberMode.hpp>
35 #include <com/sun/star/linguistic2/NumberText.hpp>
37 using namespace ::com::sun::star::uno
;
38 using namespace ::com::sun::star::i18n
;
39 using namespace ::com::sun::star::lang
;
43 const sal_Unicode
*multiplierChar
;
45 sal_Int16 exponentCount
;
46 const sal_Int16
*multiplierExponent
;
50 #define NUMBER_OMIT_ZERO (1 << 0)
51 #define NUMBER_OMIT_ONLY_ZERO (1 << 1)
52 #define NUMBER_OMIT_ONE_1 (1 << 2)
53 #define NUMBER_OMIT_ONE_2 (1 << 3)
54 #define NUMBER_OMIT_ONE_3 (1 << 4)
55 #define NUMBER_OMIT_ONE_4 (1 << 5)
56 #define NUMBER_OMIT_ONE_5 (1 << 6)
57 #define NUMBER_OMIT_ONE_6 (1 << 7)
58 #define NUMBER_OMIT_ONE_7 (1 << 8)
59 #define NUMBER_OMIT_ONE (NUMBER_OMIT_ONE_1|NUMBER_OMIT_ONE_2|NUMBER_OMIT_ONE_3|NUMBER_OMIT_ONE_4|NUMBER_OMIT_ONE_5|NUMBER_OMIT_ONE_6|NUMBER_OMIT_ONE_7)
60 #define NUMBER_OMIT_ONE_CHECK(bit) (1 << (2 + bit))
61 #define NUMBER_OMIT_ALL ( NUMBER_OMIT_ZERO|NUMBER_OMIT_ONE|NUMBER_OMIT_ONLY_ZERO )
62 #define NUMBER_OMIT_ZERO_ONE ( NUMBER_OMIT_ZERO|NUMBER_OMIT_ONE )
63 #define NUMBER_OMIT_ONE_67 (NUMBER_OMIT_ONE_6|NUMBER_OMIT_ONE_7)
64 #define NUMBER_OMIT_ZERO_ONE_67 ( NUMBER_OMIT_ZERO|NUMBER_OMIT_ONE_67 )
68 struct theNatNumMutex
: public rtl::Static
<osl::Mutex
, theNatNumMutex
> {};
70 static OUString
getHebrewNativeNumberString(const OUString
& aNumberString
, bool useGeresh
);
72 static OUString
getCyrillicNativeNumberString(const OUString
& aNumberString
);
74 /// @throws RuntimeException
75 static OUString
AsciiToNativeChar( const OUString
& inStr
, sal_Int32 nCount
,
76 Sequence
< sal_Int32
>& offset
, bool useOffset
, sal_Int16 number
)
78 const sal_Unicode
*src
= inStr
.getStr();
79 rtl_uString
*newStr
= rtl_uString_alloc(nCount
);
81 offset
.realloc(nCount
);
83 for (sal_Int32 i
= 0; i
< nCount
; i
++)
85 sal_Unicode ch
= src
[i
];
87 newStr
->buffer
[i
] = NumberChar
[number
][ ch
- NUMBER_ZERO
];
88 else if (i
+1 < nCount
&& isNumber(src
[i
+1])) {
89 if (i
> 0 && isNumber(src
[i
-1]) && isSeparator(ch
))
90 newStr
->buffer
[i
] = SeparatorChar
[number
] ? SeparatorChar
[number
] : ch
;
92 newStr
->buffer
[i
] = isDecimal(ch
) ? (DecimalChar
[number
] ? DecimalChar
[number
] : ch
) :
93 isMinus(ch
) ? (MinusChar
[number
] ? MinusChar
[number
] : ch
) : ch
;
96 newStr
->buffer
[i
] = ch
;
100 return OUString(newStr
, SAL_NO_ACQUIRE
); // take ownership
103 static bool AsciiToNative_numberMaker(const sal_Unicode
*str
, sal_Int32 begin
, sal_Int32 len
,
104 sal_Unicode
*dst
, sal_Int32
& count
, sal_Int16 multiChar_index
, Sequence
< sal_Int32
>& offset
, bool useOffset
, sal_Int32 startPos
,
105 const Number
*number
, const sal_Unicode
* numberChar
)
107 sal_Unicode multiChar
= (multiChar_index
== -1 ? 0 : number
->multiplierChar
[multiChar_index
]);
108 if ( len
<= number
->multiplierExponent
[number
->exponentCount
-1] ) {
109 if (number
->multiplierExponent
[number
->exponentCount
-1] > 1) {
110 bool bNotZero
= false;
111 for (const sal_Int32 end
= begin
+len
; begin
< end
; begin
++) {
112 if (bNotZero
|| str
[begin
] != NUMBER_ZERO
) {
113 dst
[count
] = numberChar
[str
[begin
] - NUMBER_ZERO
];
115 offset
[count
] = begin
+ startPos
;
120 if (bNotZero
&& multiChar
> 0) {
121 dst
[count
] = multiChar
;
123 offset
[count
] = begin
+ startPos
;
127 } else if (str
[begin
] != NUMBER_ZERO
) {
128 if (!(number
->numberFlag
& (multiChar_index
< 0 ? 0 : NUMBER_OMIT_ONE_CHECK(multiChar_index
))) || str
[begin
] != NUMBER_ONE
) {
129 dst
[count
] = numberChar
[str
[begin
] - NUMBER_ZERO
];
131 offset
[count
] = begin
+ startPos
;
135 dst
[count
] = multiChar
;
137 offset
[count
] = begin
+ startPos
;
140 } else if (!(number
->numberFlag
& NUMBER_OMIT_ZERO
) && count
> 0 && dst
[count
-1] != numberChar
[0]) {
141 dst
[count
] = numberChar
[0];
143 offset
[count
] = begin
+ startPos
;
146 return str
[begin
] != NUMBER_ZERO
;
148 bool bPrintPower
= false;
149 // sal_Int16 last = 0;
150 for (sal_Int16 i
= 1; i
<= number
->exponentCount
; i
++) {
151 sal_Int32 tmp
= len
- (i
== number
->exponentCount
? 0 : number
->multiplierExponent
[i
]);
153 bPrintPower
|= AsciiToNative_numberMaker(str
, begin
, tmp
, dst
, count
,
154 (i
== number
->exponentCount
? -1 : i
), offset
, useOffset
, startPos
, number
, numberChar
);
160 if (count
> 0 && number
->multiplierExponent
[number
->exponentCount
-1] == 1 &&
161 dst
[count
-1] == numberChar
[0])
164 dst
[count
] = multiChar
;
166 offset
[count
] = begin
+ startPos
;
174 /// @throws RuntimeException
175 static OUString
AsciiToNative( const OUString
& inStr
, sal_Int32 nCount
,
176 Sequence
< sal_Int32
>& offset
, bool useOffset
, const Number
* number
)
180 sal_Int32 strLen
= inStr
.getLength();
181 const sal_Unicode
*numberChar
= NumberChar
[number
->number
];
188 const sal_Unicode
*str
= inStr
.getStr();
189 std::unique_ptr
<sal_Unicode
[]> newStr(new sal_Unicode
[nCount
* 2 + 1]);
190 std::unique_ptr
<sal_Unicode
[]> srcStr(new sal_Unicode
[nCount
+ 1]); // for keeping number without comma
191 sal_Int32 i
, len
= 0, count
= 0;
194 offset
.realloc( nCount
* 2 );
195 bool bDoDecimal
= false;
197 for (i
= 0; i
<= nCount
; i
++)
199 if (i
< nCount
&& isNumber(str
[i
])) {
201 newStr
[count
] = numberChar
[str
[i
] - NUMBER_ZERO
];
207 srcStr
[len
++] = str
[i
];
210 if (i
< nCount
-1 && isSeparator(str
[i
]) && isNumber(str
[i
+1]))
211 continue; // skip comma inside number string
212 bool bNotZero
= false;
213 for (sal_Int32 begin
= 0, end
= len
% number
->multiplierExponent
[0];
214 end
<= len
; begin
= end
, end
+= number
->multiplierExponent
[0]) {
215 if (end
== 0) continue;
216 sal_Int32 _count
= count
;
217 bNotZero
|= AsciiToNative_numberMaker(srcStr
.get(), begin
, end
- begin
, newStr
.get(), count
,
218 end
== len
? -1 : 0, offset
, useOffset
, i
- len
, number
, numberChar
);
219 if (count
> 0 && number
->multiplierExponent
[number
->exponentCount
-1] == 1 &&
220 newStr
[count
-1] == numberChar
[0])
222 if (bNotZero
&& _count
== count
&& end
!= len
) {
223 newStr
[count
] = number
->multiplierChar
[0];
225 offset
[count
] = i
- len
;
229 if (! bNotZero
&& ! (number
->numberFlag
& NUMBER_OMIT_ONLY_ZERO
)) {
230 newStr
[count
] = numberChar
[0];
232 offset
[count
] = i
- len
;
238 bDoDecimal
= (!bDoDecimal
&& i
< nCount
-1 && isDecimal(str
[i
]) && isNumber(str
[i
+1]));
240 newStr
[count
] = (DecimalChar
[number
->number
] ? DecimalChar
[number
->number
] : str
[i
]);
241 else if (i
< nCount
-1 && isMinus(str
[i
]) && isNumber(str
[i
+1]))
242 newStr
[count
] = (MinusChar
[number
->number
] ? MinusChar
[number
->number
] : str
[i
]);
243 else if (i
< nCount
-1 && isSeparator(str
[i
]) && isNumber(str
[i
+1]))
244 newStr
[count
] = (SeparatorChar
[number
->number
] ? SeparatorChar
[number
->number
] : str
[i
]);
246 newStr
[count
] = str
[i
];
255 offset
.realloc(count
);
256 aRet
= OUString(newStr
.get(), count
);
263 void NativeToAscii_numberMaker(sal_Int16 max
, sal_Int16 prev
, const sal_Unicode
*str
,
264 sal_Int32
& i
, sal_Int32 nCount
, sal_Unicode
*dst
, sal_Int32
& count
, Sequence
< sal_Int32
>& offset
, bool useOffset
,
265 OUString
& numberChar
, OUString
& multiplierChar
)
267 sal_Int16 curr
= 0, num
= 0, end
= 0, shift
= 0;
268 while (++i
< nCount
) {
269 if ((curr
= sal::static_int_cast
<sal_Int16
>( numberChar
.indexOf(str
[i
]) )) >= 0) {
273 } else if ((curr
= sal::static_int_cast
<sal_Int16
>( multiplierChar
.indexOf(str
[i
]) )) >= 0) {
274 curr
= MultiplierExponent_7_CJK
[curr
% ExponentCount_7_CJK
];
275 if (prev
> curr
&& num
== 0) num
= 1; // One may be omitted in informal format
279 else if (curr
> prev
)
283 while (end
++ < prev
) {
284 dst
[count
] = NUMBER_ZERO
+ (end
== prev
? num
: 0);
291 for (const sal_Int32 countEnd
= count
+shift
; count
< countEnd
; count
++) {
292 dst
[count
] = dst
[count
+ curr
];
294 offset
[count
] = offset
[count
+ curr
];
298 NativeToAscii_numberMaker(max
, curr
, str
, i
, nCount
, dst
,
299 count
, offset
, useOffset
, numberChar
, multiplierChar
);
304 while (end
++ < prev
) {
305 dst
[count
] = NUMBER_ZERO
+ (end
== prev
? num
: 0);
307 offset
[count
] = i
- 1;
312 /// @throws RuntimeException
313 OUString
NativeToAscii(const OUString
& inStr
,
314 sal_Int32 nCount
, Sequence
< sal_Int32
>& offset
, bool useOffset
)
318 sal_Int32 strLen
= inStr
.getLength();
324 const sal_Unicode
*str
= inStr
.getStr();
325 std::unique_ptr
<sal_Unicode
[]> newStr(new sal_Unicode
[nCount
* MultiplierExponent_7_CJK
[0] + 2]);
327 offset
.realloc( nCount
* MultiplierExponent_7_CJK
[0] + 1 );
328 sal_Int32 count
= 0, index
;
331 OUString numberChar
, multiplierChar
, decimalChar
, minusChar
, separatorChar
;
332 numberChar
= OUString(NumberChar
[0], 10*NumberChar_Count
);
333 multiplierChar
= OUString(MultiplierChar_7_CJK
[0], ExponentCount_7_CJK
*Multiplier_Count
);
334 decimalChar
= OUString(DecimalChar
, NumberChar_Count
);
335 minusChar
= OUString(MinusChar
, NumberChar_Count
);
336 separatorChar
= OUString(
337 reinterpret_cast<sal_Unicode
*>(SeparatorChar
), NumberChar_Count
);
339 for ( i
= 0; i
< nCount
; i
++) {
340 if ((index
= multiplierChar
.indexOf(str
[i
])) >= 0) {
341 if (count
== 0 || !isNumber(newStr
[count
-1])) { // add 1 in front of multiplier
342 newStr
[count
] = NUMBER_ONE
;
347 index
= MultiplierExponent_7_CJK
[index
% ExponentCount_7_CJK
];
348 NativeToAscii_numberMaker(
349 sal::static_int_cast
<sal_Int16
>( index
), sal::static_int_cast
<sal_Int16
>( index
),
350 str
, i
, nCount
, newStr
.get(), count
, offset
, useOffset
,
351 numberChar
, multiplierChar
);
353 if ((index
= numberChar
.indexOf(str
[i
])) >= 0)
354 newStr
[count
] = sal::static_int_cast
<sal_Unicode
>( (index
% 10) + NUMBER_ZERO
);
355 else if ((index
= separatorChar
.indexOf(str
[i
])) >= 0 &&
356 (i
< nCount
-1 && (numberChar
.indexOf(str
[i
+1]) >= 0 ||
357 multiplierChar
.indexOf(str
[i
+1]) >= 0)))
358 newStr
[count
] = SeparatorChar
[NumberChar_HalfWidth
];
359 else if ((index
= decimalChar
.indexOf(str
[i
])) >= 0 &&
360 (i
< nCount
-1 && (numberChar
.indexOf(str
[i
+1]) >= 0 ||
361 multiplierChar
.indexOf(str
[i
+1]) >= 0)))
362 // Only when decimal point is followed by numbers,
363 // it will be convert to ASCII decimal point
364 newStr
[count
] = DecimalChar
[NumberChar_HalfWidth
];
365 else if ((index
= minusChar
.indexOf(str
[i
])) >= 0 &&
366 (i
< nCount
-1 && (numberChar
.indexOf(str
[i
+1]) >= 0 ||
367 multiplierChar
.indexOf(str
[i
+1]) >= 0)))
368 // Only when minus is followed by numbers,
369 // it will be convert to ASCII minus sign
370 newStr
[count
] = MinusChar
[NumberChar_HalfWidth
];
372 newStr
[count
] = str
[i
];
380 offset
.realloc(count
);
382 aRet
= OUString(newStr
.get(), count
);
387 const Number natnum4
[4] = {
388 { NumberChar_Lower_zh
, MultiplierChar_6_CJK
[Multiplier_Lower_zh
], 0,
389 ExponentCount_6_CJK
, MultiplierExponent_6_CJK
},
390 { NumberChar_Lower_zh
, MultiplierChar_6_CJK
[Multiplier_Lower_zh_TW
], 0,
391 ExponentCount_6_CJK
, MultiplierExponent_6_CJK
},
392 { NumberChar_Modern_ja
, MultiplierChar_7_CJK
[Multiplier_Modern_ja
], NUMBER_OMIT_ZERO_ONE_67
,
393 ExponentCount_7_CJK
, MultiplierExponent_7_CJK
},
394 { NumberChar_Lower_ko
, MultiplierChar_6_CJK
[Multiplier_Lower_ko
], NUMBER_OMIT_ZERO
,
395 ExponentCount_6_CJK
, MultiplierExponent_6_CJK
},
398 const Number natnum5
[4] = {
399 { NumberChar_Upper_zh
, MultiplierChar_6_CJK
[Multiplier_Upper_zh
], 0,
400 ExponentCount_6_CJK
, MultiplierExponent_6_CJK
},
401 { NumberChar_Upper_zh_TW
, MultiplierChar_6_CJK
[Multiplier_Upper_zh_TW
], 0,
402 ExponentCount_6_CJK
, MultiplierExponent_6_CJK
},
403 { NumberChar_Traditional_ja
, MultiplierChar_7_CJK
[Multiplier_Traditional_ja
], NUMBER_OMIT_ZERO_ONE_67
,
404 ExponentCount_7_CJK
, MultiplierExponent_7_CJK
},
405 { NumberChar_Upper_ko
, MultiplierChar_6_CJK
[Multiplier_Upper_zh_TW
], NUMBER_OMIT_ZERO
,
406 ExponentCount_6_CJK
, MultiplierExponent_6_CJK
},
409 const Number natnum6
[4] = {
410 { NumberChar_FullWidth
, MultiplierChar_6_CJK
[Multiplier_Lower_zh
], 0,
411 ExponentCount_6_CJK
, MultiplierExponent_6_CJK
},
412 { NumberChar_FullWidth
, MultiplierChar_6_CJK
[Multiplier_Lower_zh_TW
], 0,
413 ExponentCount_6_CJK
, MultiplierExponent_6_CJK
},
414 { NumberChar_FullWidth
, MultiplierChar_7_CJK
[Multiplier_Modern_ja
], NUMBER_OMIT_ZERO_ONE_67
,
415 ExponentCount_7_CJK
, MultiplierExponent_7_CJK
},
416 { NumberChar_FullWidth
, MultiplierChar_6_CJK
[Multiplier_Hangul_ko
], NUMBER_OMIT_ZERO
,
417 ExponentCount_6_CJK
, MultiplierExponent_6_CJK
},
420 const Number natnum7
[4] = {
421 { NumberChar_Lower_zh
, MultiplierChar_6_CJK
[Multiplier_Lower_zh
], NUMBER_OMIT_ALL
,
422 ExponentCount_6_CJK
, MultiplierExponent_6_CJK
},
423 { NumberChar_Lower_zh
, MultiplierChar_6_CJK
[Multiplier_Lower_zh_TW
], NUMBER_OMIT_ALL
,
424 ExponentCount_6_CJK
, MultiplierExponent_6_CJK
},
425 { NumberChar_Modern_ja
, MultiplierChar_2_CJK
[Multiplier_Modern_ja
], NUMBER_OMIT_ZERO_ONE
,
426 ExponentCount_2_CJK
, MultiplierExponent_2_CJK
},
427 { NumberChar_Lower_ko
, MultiplierChar_6_CJK
[Multiplier_Lower_ko
], NUMBER_OMIT_ALL
,
428 ExponentCount_6_CJK
, MultiplierExponent_6_CJK
},
431 const Number natnum8
[4] = {
432 { NumberChar_Upper_zh
, MultiplierChar_6_CJK
[Multiplier_Upper_zh
], NUMBER_OMIT_ALL
,
433 ExponentCount_6_CJK
, MultiplierExponent_6_CJK
},
434 { NumberChar_Upper_zh_TW
, MultiplierChar_6_CJK
[Multiplier_Upper_zh_TW
], NUMBER_OMIT_ALL
,
435 ExponentCount_6_CJK
, MultiplierExponent_6_CJK
},
436 { NumberChar_Traditional_ja
, MultiplierChar_2_CJK
[Multiplier_Traditional_ja
], NUMBER_OMIT_ZERO_ONE
,
437 ExponentCount_2_CJK
, MultiplierExponent_2_CJK
},
438 { NumberChar_Upper_ko
, MultiplierChar_6_CJK
[Multiplier_Upper_zh_TW
], NUMBER_OMIT_ALL
,
439 ExponentCount_6_CJK
, MultiplierExponent_6_CJK
},
442 const Number natnum10
= { NumberChar_Hangul_ko
, MultiplierChar_6_CJK
[Multiplier_Hangul_ko
], NUMBER_OMIT_ZERO
,
443 ExponentCount_6_CJK
, MultiplierExponent_6_CJK
};
444 const Number natnum11
= { NumberChar_Hangul_ko
, MultiplierChar_6_CJK
[Multiplier_Hangul_ko
], NUMBER_OMIT_ALL
,
445 ExponentCount_6_CJK
, MultiplierExponent_6_CJK
};
447 //! ATTENTION: Do not change order of elements!
448 //! Append new languages to the end of the list!
449 const sal_Char
*natnum1Locales
[] = {
477 const sal_Int16 nbOfLocale
= SAL_N_ELEMENTS(natnum1Locales
);
479 //! ATTENTION: Do not change order of elements!
480 //! Number and order must match elements of natnum1Locales!
481 const sal_Int16 natnum1
[] = {
484 NumberChar_Modern_ja
,
506 NumberChar_EastIndic_ar
,
509 const sal_Int16 sizeof_natnum1
= SAL_N_ELEMENTS(natnum1
);
511 //! ATTENTION: Do not change order of elements!
512 //! Order must match first elements of natnum1Locales!
513 const sal_Int16 natnum2
[] = {
515 NumberChar_Upper_zh_TW
,
516 NumberChar_Traditional_ja
,
520 const sal_Int16 sizeof_natnum2
= SAL_N_ELEMENTS(natnum2
);
522 sal_Int16
getLanguageNumber( const Locale
& rLocale
)
524 // return zh_TW for TW, HK and MO, return zh_CN for other zh locales.
525 if (rLocale
.Language
== "zh") return MsLangId::isTraditionalChinese(rLocale
) ? 1 : 0;
527 for (sal_Int16 i
= 2; i
< nbOfLocale
; i
++)
528 if (rLocale
.Language
.equalsAsciiL(natnum1Locales
[i
], 2))
536 sal_Unicode DecimalSeparator
;
537 sal_Unicode ThousandSeparator
;
538 Separators(const Locale
& rLocale
)
540 LocaleDataItem aLocaleItem
= LocaleDataImpl::get()->getLocaleItem(rLocale
);
541 DecimalSeparator
= aLocaleItem
.decimalSeparator
.toChar();
542 ThousandSeparator
= aLocaleItem
.thousandSeparator
.toChar();
546 Separators
getLocaleSeparators(const Locale
& rLocale
, const OUString
& rLocStr
)
548 // Guard the static variable below.
549 osl::MutexGuard
aGuard(theNatNumMutex::get());
550 // Maximum a couple hundred of pairs with 4-byte structs - so no need for smart managing
551 static std::unordered_map
<OUString
, Separators
> aLocaleSeparatorsBuf
;
552 auto it
= aLocaleSeparatorsBuf
.find(rLocStr
);
553 if (it
== aLocaleSeparatorsBuf
.end())
555 it
= aLocaleSeparatorsBuf
.emplace(rLocStr
, Separators(rLocale
)).first
;
560 OUString
getNumberText(const Locale
& rLocale
, const OUString
& rNumberString
,
561 const OUString
& sNumberTextParams
)
563 sal_Int32 i
, count
= 0;
564 const sal_Int32 len
= rNumberString
.getLength();
565 const sal_Unicode
* src
= rNumberString
.getStr();
567 OUString aLoc
= LanguageTag::convertToBcp47(rLocale
);
568 Separators aSeparators
= getLocaleSeparators(rLocale
, aLoc
);
570 OUStringBuffer
sBuf(len
);
571 for (i
= 0; i
< len
; i
++)
573 sal_Unicode ch
= src
[i
];
579 else if (ch
== aSeparators
.DecimalSeparator
)
580 // Convert any decimal separator to point - in case libnumbertext has a different one
581 // for this locale (it seems that point is supported for all locales in libnumbertext)
583 else if (ch
== aSeparators
.ThousandSeparator
&& count
> 0)
585 else if (isMinus(ch
) && count
== 0)
591 // Handle also month and day names for NatNum12 date formatting
592 const OUString
& rNumberStr
= (count
== 0) ? rNumberString
: sBuf
.makeStringAndClear();
594 // Guard the static variables below.
595 osl::MutexGuard
aGuard( theNatNumMutex::get());
597 static auto xNumberText
598 = css::linguistic2::NumberText::create(comphelper::getProcessComponentContext());
599 OUString numbertext_prefix
;
600 // default "cardinal" gets empty prefix
601 if (!sNumberTextParams
.isEmpty() && sNumberTextParams
!= "cardinal")
602 numbertext_prefix
= sNumberTextParams
+ " ";
603 // Several hundreds of headings could result typing lags because
604 // of the continuous update of the multiple number names during typing.
605 // We fix this by buffering the result of the conversion.
606 static std::unordered_map
<OUString
, std::map
<OUString
, OUString
>> aBuff
;
607 auto& rItems
= aBuff
[rNumberStr
];
608 auto& rItem
= rItems
[numbertext_prefix
+ aLoc
];
611 rItem
= xNumberText
->getNumberText(numbertext_prefix
+ rNumberStr
, rLocale
);
612 // use number at missing number to text conversion
616 OUString sResult
= rItem
;
617 if (i
!= 0 && i
< len
)
618 sResult
+= rNumberString
.copy(i
);
623 OUString
NativeNumberSupplierService::getNativeNumberString(const OUString
& aNumberString
, const Locale
& rLocale
,
624 sal_Int16 nNativeNumberMode
,
625 Sequence
<sal_Int32
>& offset
,
626 const OUString
& rNativeNumberParams
)
628 if (!isValidNatNum(rLocale
, nNativeNumberMode
))
629 return aNumberString
;
631 if (nNativeNumberMode
== NativeNumberMode::NATNUM12
)
633 // handle capitalization prefixes "capitalize", "upper" and "title"
644 OUStringLiteral aLiteral
;
648 static const CasingEntry Casings
[] =
650 { OUStringLiteral("capitalize"), CAPITALIZE
},
651 { OUStringLiteral("upper"), UPPER
},
652 { OUStringLiteral("title"), TITLE
}
655 sal_Int32 nStripCase
= 0;
657 for (nCasing
= 0; nCasing
< SAL_N_ELEMENTS(Casings
); ++nCasing
)
659 if (rNativeNumberParams
.startsWith( Casings
[nCasing
].aLiteral
))
661 nStripCase
= Casings
[nCasing
].aLiteral
.size
;
666 if (nStripCase
> 0 && (rNativeNumberParams
.getLength() == nStripCase
||
667 rNativeNumberParams
[nStripCase
++] == ' '))
669 OUString aStr
= getNumberText(rLocale
, aNumberString
, rNativeNumberParams
.copy(nStripCase
));
671 if (!xCharClass
.is())
672 xCharClass
= CharacterClassification::create(comphelper::getProcessComponentContext());
674 switch (Casings
[nCasing
].eCasing
)
677 return xCharClass
->toTitle(aStr
, 0, 1, aLocale
) +
678 (aStr
.getLength() > 1 ? aStr
.copy(1) : OUString());
680 return xCharClass
->toUpper(aStr
, 0, aStr
.getLength(), aLocale
);
682 return xCharClass
->toTitle(aStr
, 0, aStr
.getLength(), aLocale
);
687 return getNumberText(rLocale
, aNumberString
, rNativeNumberParams
);
691 sal_Int16 langnum
= getLanguageNumber(rLocale
);
693 return aNumberString
;
695 const Number
*number
= nullptr;
698 switch (nNativeNumberMode
)
700 case NativeNumberMode::NATNUM0
: // Ascii
701 return NativeToAscii(aNumberString
, aNumberString
.getLength(), offset
, useOffset
);
702 case NativeNumberMode::NATNUM1
: // Char, Lower
703 num
= natnum1
[langnum
];
705 case NativeNumberMode::NATNUM2
: // Char, Upper
706 num
= natnum2
[langnum
];
708 case NativeNumberMode::NATNUM3
: // Char, FullWidth
709 num
= NumberChar_FullWidth
;
711 case NativeNumberMode::NATNUM4
: // Text, Lower, Long
712 number
= &natnum4
[langnum
];
714 case NativeNumberMode::NATNUM5
: // Text, Upper, Long
715 number
= &natnum5
[langnum
];
717 case NativeNumberMode::NATNUM6
: // Text, FullWidth
718 number
= &natnum6
[langnum
];
720 case NativeNumberMode::NATNUM7
: // Text. Lower, Short
721 number
= &natnum7
[langnum
];
723 case NativeNumberMode::NATNUM8
: // Text, Upper, Short
724 number
= &natnum8
[langnum
];
726 case NativeNumberMode::NATNUM9
: // Char, Hangul
727 num
= NumberChar_Hangul_ko
;
729 case NativeNumberMode::NATNUM10
: // Text, Hangul, Long
732 case NativeNumberMode::NATNUM11
: // Text, Hangul, Short
739 if (number
|| num
>= 0) {
740 if (aLocale
.Language
!= rLocale
.Language
||
741 aLocale
.Country
!= rLocale
.Country
||
742 aLocale
.Variant
!= rLocale
.Variant
) {
743 LocaleDataItem item
= LocaleDataImpl::get()->getLocaleItem( rLocale
);
745 DecimalChar
[NumberChar_HalfWidth
]=item
.decimalSeparator
.toChar();
746 if (DecimalChar
[NumberChar_HalfWidth
] > 0x7E || DecimalChar
[NumberChar_HalfWidth
] < 0x21)
747 DecimalChar
[NumberChar_FullWidth
]=0xFF0E;
749 DecimalChar
[NumberChar_FullWidth
]=DecimalChar
[NumberChar_HalfWidth
]+0xFEE0;
750 SeparatorChar
[NumberChar_HalfWidth
]=item
.thousandSeparator
.toChar();
751 if (SeparatorChar
[NumberChar_HalfWidth
] > 0x7E || SeparatorChar
[NumberChar_HalfWidth
] < 0x21)
752 SeparatorChar
[NumberChar_FullWidth
]=0xFF0C;
754 SeparatorChar
[NumberChar_FullWidth
]=SeparatorChar
[NumberChar_HalfWidth
]+0xFEE0;
757 return AsciiToNative( aNumberString
, aNumberString
.getLength(), offset
, useOffset
, number
);
758 else if (num
== NumberChar_he
)
759 return getHebrewNativeNumberString(aNumberString
,
760 nNativeNumberMode
== NativeNumberMode::NATNUM2
);
761 else if (num
== NumberChar_cu
)
762 return getCyrillicNativeNumberString(aNumberString
);
764 return AsciiToNativeChar(aNumberString
, aNumberString
.getLength(), offset
, useOffset
, num
);
767 return aNumberString
;
770 OUString SAL_CALL
NativeNumberSupplierService::getNativeNumberString(const OUString
& aNumberString
, const Locale
& rLocale
,
771 sal_Int16 nNativeNumberMode
)
773 Sequence
< sal_Int32
> offset
;
774 return getNativeNumberString(aNumberString
, rLocale
, nNativeNumberMode
, offset
);
777 OUString SAL_CALL
NativeNumberSupplierService::getNativeNumberStringParams(
778 const OUString
& rNumberString
, const css::lang::Locale
& rLocale
, sal_Int16 nNativeNumberMode
,
779 const OUString
& rNativeNumberParams
)
781 Sequence
<sal_Int32
> offset
;
782 return getNativeNumberString(rNumberString
, rLocale
, nNativeNumberMode
, offset
, rNativeNumberParams
);
785 sal_Unicode
NativeNumberSupplierService::getNativeNumberChar( const sal_Unicode inChar
, const Locale
& rLocale
, sal_Int16 nNativeNumberMode
)
787 if (nNativeNumberMode
== NativeNumberMode::NATNUM0
) { // Ascii
788 for (const auto & i
: NumberChar
)
789 for (sal_Int16 j
= 0; j
< 10; j
++)
795 if (!isNumber(inChar
))
798 if (!isValidNatNum(rLocale
, nNativeNumberMode
))
801 sal_Int16 langnum
= getLanguageNumber(rLocale
);
805 switch (nNativeNumberMode
)
807 case NativeNumberMode::NATNUM1
: // Char, Lower
808 case NativeNumberMode::NATNUM4
: // Text, Lower, Long
809 case NativeNumberMode::NATNUM7
: // Text. Lower, Short
810 return NumberChar
[natnum1
[langnum
]][inChar
- NUMBER_ZERO
];
811 case NativeNumberMode::NATNUM2
: // Char, Upper
812 case NativeNumberMode::NATNUM5
: // Text, Upper, Long
813 case NativeNumberMode::NATNUM8
: // Text, Upper, Short
814 return NumberChar
[natnum2
[langnum
]][inChar
- NUMBER_ZERO
];
815 case NativeNumberMode::NATNUM3
: // Char, FullWidth
816 case NativeNumberMode::NATNUM6
: // Text, FullWidth
817 return NumberChar
[NumberChar_FullWidth
][inChar
- NUMBER_ZERO
];
818 case NativeNumberMode::NATNUM9
: // Char, Hangul
819 case NativeNumberMode::NATNUM10
: // Text, Hangul, Long
820 case NativeNumberMode::NATNUM11
: // Text, Hangul, Short
821 return NumberChar
[NumberChar_Hangul_ko
][inChar
- NUMBER_ZERO
];
829 sal_Bool SAL_CALL
NativeNumberSupplierService::isValidNatNum( const Locale
& rLocale
, sal_Int16 nNativeNumberMode
)
831 sal_Int16 langnum
= getLanguageNumber(rLocale
);
833 switch (nNativeNumberMode
) {
834 case NativeNumberMode::NATNUM0
: // Ascii
835 case NativeNumberMode::NATNUM3
: // Char, FullWidth
836 case NativeNumberMode::NATNUM12
: // spell out numbers, dates and money amounts
838 case NativeNumberMode::NATNUM1
: // Char, Lower
839 return (langnum
>= 0);
840 case NativeNumberMode::NATNUM2
: // Char, Upper
841 if (langnum
== 4) // Hebrew numbering
844 case NativeNumberMode::NATNUM4
: // Text, Lower, Long
845 case NativeNumberMode::NATNUM5
: // Text, Upper, Long
846 case NativeNumberMode::NATNUM6
: // Text, FullWidth
847 case NativeNumberMode::NATNUM7
: // Text. Lower, Short
848 case NativeNumberMode::NATNUM8
: // Text, Upper, Short
849 return (langnum
>= 0 && langnum
< 4); // CJK numbering
850 case NativeNumberMode::NATNUM9
: // Char, Hangul
851 case NativeNumberMode::NATNUM10
: // Text, Hangul, Long
852 case NativeNumberMode::NATNUM11
: // Text, Hangul, Short
853 return (langnum
== 3); // Korean numbering
858 NativeNumberXmlAttributes SAL_CALL
NativeNumberSupplierService::convertToXmlAttributes( const Locale
& rLocale
, sal_Int16 nNativeNumberMode
)
860 static const sal_Int16 attShort
= 0;
861 static const sal_Int16 attMedium
= 1;
862 static const sal_Int16 attLong
= 2;
863 static const sal_Char
*attType
[] = { "short", "medium", "long" };
865 sal_Int16 number
= NumberChar_HalfWidth
, type
= attShort
;
867 sal_Int16 langnum
= -1;
868 if (isValidNatNum(rLocale
, nNativeNumberMode
)) {
869 langnum
= getLanguageNumber(rLocale
);
872 switch (nNativeNumberMode
) {
873 case NativeNumberMode::NATNUM0
: // Ascii
874 number
= NumberChar_HalfWidth
;
877 case NativeNumberMode::NATNUM1
: // Char, Lower
878 number
= natnum1
[langnum
];
881 case NativeNumberMode::NATNUM2
: // Char, Upper
882 number
= natnum2
[langnum
];
883 type
= number
== NumberChar_he
? attMedium
: attShort
;
885 case NativeNumberMode::NATNUM3
: // Char, FullWidth
886 number
= NumberChar_FullWidth
;
889 case NativeNumberMode::NATNUM4
: // Text, Lower, Long
890 number
= natnum1
[langnum
];
893 case NativeNumberMode::NATNUM5
: // Text, Upper, Long
894 number
= natnum2
[langnum
];
897 case NativeNumberMode::NATNUM6
: // Text, FullWidth
898 number
= NumberChar_FullWidth
;
901 case NativeNumberMode::NATNUM7
: // Text. Lower, Short
902 number
= natnum1
[langnum
];
905 case NativeNumberMode::NATNUM8
: // Text, Upper, Short
906 number
= natnum2
[langnum
];
909 case NativeNumberMode::NATNUM9
: // Char, Hangul
910 number
= NumberChar_Hangul_ko
;
913 case NativeNumberMode::NATNUM10
: // Text, Hangul, Long
914 number
= NumberChar_Hangul_ko
;
917 case NativeNumberMode::NATNUM11
: // Text, Hangul, Short
918 number
= NumberChar_Hangul_ko
;
925 return NativeNumberXmlAttributes(rLocale
, OUString(&NumberChar
[number
][1], 1),
926 OUString::createFromAscii(attType
[type
]));
929 static bool natNumIn(sal_Int16 num
, const sal_Int16 natnum
[], sal_Int16 len
)
931 for (sal_Int16 i
= 0; i
< len
; i
++)
932 if (natnum
[i
] == num
)
937 sal_Int16 SAL_CALL
NativeNumberSupplierService::convertFromXmlAttributes( const NativeNumberXmlAttributes
& aAttr
)
939 sal_Unicode numberChar
[NumberChar_Count
];
940 for (sal_Int16 i
= 0; i
< NumberChar_Count
; i
++)
941 numberChar
[i
] = NumberChar
[i
][1];
942 OUString
number(numberChar
, NumberChar_Count
);
944 sal_Int16 num
= sal::static_int_cast
<sal_Int16
>( number
.indexOf(aAttr
.Format
) );
946 if ( aAttr
.Style
== "short" ) {
947 if (num
== NumberChar_FullWidth
)
948 return NativeNumberMode::NATNUM3
;
949 else if (num
== NumberChar_Hangul_ko
)
950 return NativeNumberMode::NATNUM9
;
951 else if (natNumIn(num
, natnum1
, sizeof_natnum1
))
952 return NativeNumberMode::NATNUM1
;
953 else if (natNumIn(num
, natnum2
, sizeof_natnum2
))
954 return NativeNumberMode::NATNUM2
;
955 } else if ( aAttr
.Style
== "medium" ) {
956 if (num
== NumberChar_Hangul_ko
)
957 return NativeNumberMode::NATNUM11
;
958 else if (num
== NumberChar_he
)
959 return NativeNumberMode::NATNUM2
;
960 else if (natNumIn(num
, natnum1
, sizeof_natnum1
))
961 return NativeNumberMode::NATNUM7
;
962 else if (natNumIn(num
, natnum2
, sizeof_natnum2
))
963 return NativeNumberMode::NATNUM8
;
964 } else if ( aAttr
.Style
== "long" ) {
965 if (num
== NumberChar_FullWidth
)
966 return NativeNumberMode::NATNUM6
;
967 else if (num
== NumberChar_Hangul_ko
)
968 return NativeNumberMode::NATNUM10
;
969 else if (natNumIn(num
, natnum1
, sizeof_natnum1
))
970 return NativeNumberMode::NATNUM4
;
971 else if (natNumIn(num
, natnum2
, sizeof_natnum2
))
972 return NativeNumberMode::NATNUM5
;
974 throw RuntimeException();
976 return NativeNumberMode::NATNUM0
;
980 // Following code generates Hebrew Number,
981 // see numerical system in the Hebrew Numbering System in following link for details,
982 // http://smontagu.org/writings/HebrewNumbers.html
984 struct HebrewNumberChar
{
987 } const HebrewNumberCharArray
[] = {
1013 static const sal_Unicode thousand
[] = {0x05d0, 0x05dc, 0x05e3, 0x0};
1014 static const sal_Unicode thousands
[] = {0x05d0, 0x05dc, 0x05e4, 0x05d9, 0x0};
1015 static const sal_Unicode thousands_last
[] = {0x05d0, 0x05dc, 0x05e4, 0x05d9, 0x05dd, 0x0};
1016 static const sal_Unicode geresh
= 0x05f3;
1017 static const sal_Unicode gershayim
= 0x05f4;
1019 static void makeHebrewNumber(sal_Int64 value
, OUStringBuffer
& output
, bool isLast
, bool useGeresh
)
1021 sal_Int16 num
= sal::static_int_cast
<sal_Int16
>(value
% 1000);
1024 makeHebrewNumber(value
/ 1000, output
, num
!= 0, useGeresh
);
1028 output
.append(value
== 1000 ? thousand
: isLast
? thousands_last
: thousands
);
1030 sal_Int16 nbOfChar
= 0;
1031 for (sal_Int32 j
= 0; num
> 0 && j
< sal_Int32(SAL_N_ELEMENTS(HebrewNumberCharArray
)); j
++) {
1032 if (num
- HebrewNumberCharArray
[j
].value
>= 0) {
1034 // https://en.wikipedia.org/wiki/Hebrew_numerals#Key_exceptions
1035 // By convention, the numbers 15 and 16 are represented as 9 + 6 and 9 + 7
1036 if (num
== 15 || num
== 16) // substitution for 15 and 16
1038 assert(j
< sal_Int32(SAL_N_ELEMENTS(HebrewNumberCharArray
)));
1039 num
= sal::static_int_cast
<sal_Int16
>( num
- HebrewNumberCharArray
[j
].value
);
1040 output
.append(HebrewNumberCharArray
[j
].code
);
1044 if (nbOfChar
> 1) // a number is written as more than one character
1045 output
.insert(output
.getLength() - 1, gershayim
);
1046 else if (nbOfChar
== 1) // a number is written as a single character
1047 output
.append(geresh
);
1052 OUString
getHebrewNativeNumberString(const OUString
& aNumberString
, bool useGeresh
)
1054 sal_Int64 value
= 0;
1055 sal_Int32 i
, count
= 0, len
= aNumberString
.getLength();
1056 const sal_Unicode
*src
= aNumberString
.getStr();
1058 for (i
= 0; i
< len
; i
++) {
1059 sal_Unicode ch
= src
[i
];
1061 if (++count
>= 20) // Number is too long, could not be handled.
1062 return aNumberString
;
1063 value
= value
* 10 + (ch
- NUMBER_ZERO
);
1065 else if (isSeparator(ch
) && count
> 0) continue;
1066 else if (isMinus(ch
) && count
== 0) continue;
1071 OUStringBuffer
output(count
*2 + 2 + len
- i
);
1073 makeHebrewNumber(value
, output
, true, useGeresh
);
1076 output
.append(std::u16string_view(aNumberString
).substr(i
));
1078 return output
.makeStringAndClear();
1081 return aNumberString
;
1084 // Support for Cyrillic Numerals
1085 // See UTN 41 for implementation information
1086 // http://www.unicode.org/notes/tn41/
1088 static const sal_Unicode cyrillicThousandsMark
= 0x0482;
1089 static const sal_Unicode cyrillicTitlo
= 0x0483;
1090 static const sal_Unicode cyrillicTen
= 0x0456;
1092 struct CyrillicNumberChar
{
1095 } const CyrillicNumberCharArray
[] = {
1125 static void makeCyrillicNumber(sal_Int64 value
, OUStringBuffer
& output
, bool addTitlo
)
1127 sal_Int16 num
= sal::static_int_cast
<sal_Int16
>(value
% 1000);
1128 if (value
>= 1000) {
1129 output
.append(cyrillicThousandsMark
);
1130 makeCyrillicNumber(value
/ 1000, output
, false);
1131 if (value
>= 10000 && (value
- 10000) % 1000 != 0) {
1134 if (value
% 1000 == 0)
1138 for (sal_Int32 j
= 0; num
> 0 && j
< sal_Int32(SAL_N_ELEMENTS(CyrillicNumberCharArray
)); j
++) {
1139 if (num
< 20 && num
> 10) {
1141 makeCyrillicNumber(num
, output
, false);
1142 output
.append(cyrillicTen
);
1146 if (CyrillicNumberCharArray
[j
].value
<= num
) {
1147 output
.append(CyrillicNumberCharArray
[j
].code
);
1148 num
= sal::static_int_cast
<sal_Int16
>( num
- CyrillicNumberCharArray
[j
].value
);
1153 if (output
.getLength() == 1) {
1154 output
.append(cyrillicTitlo
);
1155 } else if (output
.getLength() == 2) {
1156 if (value
> 800 && value
< 900) {
1157 output
.append(cyrillicTitlo
);
1159 output
.insert(1, cyrillicTitlo
);
1161 } else if (output
.getLength() > 2) {
1162 if (output
.indexOf(" ") == output
.getLength() - 2) {
1163 output
.append(cyrillicTitlo
);
1165 output
.insert(output
.getLength() - 1, cyrillicTitlo
);
1171 OUString
getCyrillicNativeNumberString(const OUString
& aNumberString
)
1173 sal_Int64 value
= 0;
1174 sal_Int32 i
, count
= 0, len
= aNumberString
.getLength();
1175 const sal_Unicode
*src
= aNumberString
.getStr();
1177 for (i
= 0; i
< len
; i
++) {
1178 sal_Unicode ch
= src
[i
];
1180 if (++count
>= 8) // Number is too long, could not be handled.
1181 return aNumberString
;
1182 value
= value
* 10 + (ch
- NUMBER_ZERO
);
1184 else if (isSeparator(ch
) && count
> 0) continue;
1185 else if (isMinus(ch
) && count
== 0) continue;
1190 OUStringBuffer
output(count
*2 + 2 + len
- i
);
1192 makeCyrillicNumber(value
, output
, true);
1195 output
.append(std::u16string_view(aNumberString
).substr(i
));
1197 return output
.makeStringAndClear();
1200 return aNumberString
;
1203 static const sal_Char implementationName
[] = "com.sun.star.i18n.NativeNumberSupplier";
1205 OUString SAL_CALL
NativeNumberSupplierService::getImplementationName()
1207 return implementationName
;
1211 NativeNumberSupplierService::supportsService(const OUString
& rServiceName
)
1213 return cppu::supportsService(this, rServiceName
);
1216 Sequence
< OUString
> SAL_CALL
1217 NativeNumberSupplierService::getSupportedServiceNames()
1219 return {implementationName
, "com.sun.star.i18n.NativeNumberSupplier2"};
1224 extern "C" SAL_DLLPUBLIC_EXPORT
css::uno::XInterface
*
1225 com_sun_star_i18n_NativeNumberSupplier_get_implementation(
1226 css::uno::XComponentContext
*,
1227 css::uno::Sequence
<css::uno::Any
> const &)
1229 return cppu::acquire(new i18npool::NativeNumberSupplierService());
1232 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */