1 //===----------------------------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Copyright (c) Microsoft Corporation.
10 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
12 // Copyright 2018 Ulf Adams
13 // Copyright (c) Microsoft Corporation. All rights reserved.
15 // Boost Software License - Version 1.0 - August 17th, 2003
17 // Permission is hereby granted, free of charge, to any person or organization
18 // obtaining a copy of the software and accompanying documentation covered by
19 // this license (the "Software") to use, reproduce, display, distribute,
20 // execute, and transmit the Software, and to prepare derivative works of the
21 // Software, and to permit third-parties to whom the Software is furnished to
22 // do so, all subject to the following:
24 // The copyright notices in the Software and this entire statement, including
25 // the above license grant, this restriction and the following disclaimer,
26 // must be included in all copies of the Software, in whole or in part, and
27 // all derivative works of the Software, unless such copies or derivative
28 // works are solely in the form of machine-executable object code generated by
29 // a source language processor.
31 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
32 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
33 // FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
34 // SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
35 // FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
36 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
37 // DEALINGS IN THE SOFTWARE.
39 #ifndef _LIBCPP_SRC_INCLUDE_RYU_DS2_INTRINSICS_H
40 #define _LIBCPP_SRC_INCLUDE_RYU_DS2_INTRINSICS_H
42 // Avoid formatting to keep the changes with the original code minimal.
48 #include "include/ryu/ryu.h"
50 _LIBCPP_BEGIN_NAMESPACE_STD
52 #if defined(_M_X64) && defined(_MSC_VER)
53 #define _LIBCPP_INTRINSIC128 1
54 [[nodiscard
]] _LIBCPP_HIDE_FROM_ABI
inline uint64_t __ryu_umul128(const uint64_t __a
, const uint64_t __b
, uint64_t* const __productHi
) {
55 return _umul128(__a
, __b
, __productHi
);
58 [[nodiscard
]] _LIBCPP_HIDE_FROM_ABI
inline uint64_t __ryu_shiftright128(const uint64_t __lo
, const uint64_t __hi
, const uint32_t __dist
) {
59 // For the __shiftright128 intrinsic, the shift value is always
61 // In the current implementation of the double-precision version
62 // of Ryu, the shift value is always < 64.
63 // (The shift value is in the range [49, 58].)
64 // Check this here in case a future change requires larger shift
65 // values. In this case this function needs to be adjusted.
66 _LIBCPP_ASSERT_UNCATEGORIZED(__dist
< 64, "");
67 return __shiftright128(__lo
, __hi
, static_cast<unsigned char>(__dist
));
70 // ^^^ intrinsics available ^^^ / vvv __int128 available vvv
71 #elif defined(__SIZEOF_INT128__) && ( \
72 (defined(__clang__) && !defined(_MSC_VER)) || \
73 (defined(__GNUC__) && !defined(__clang__) && !defined(__CUDACC__)))
74 #define _LIBCPP_INTRINSIC128 1
75 // We have __uint128 support in clang or gcc
76 [[nodiscard
]] _LIBCPP_HIDE_FROM_ABI
inline uint64_t __ryu_umul128(const uint64_t __a
, const uint64_t __b
, uint64_t* const __productHi
) {
77 auto __temp
= __a
* (unsigned __int128
)__b
;
78 *__productHi
= __temp
>> 64;
79 return static_cast<uint64_t>(__temp
);
82 [[nodiscard
]] _LIBCPP_HIDE_FROM_ABI
inline uint64_t __ryu_shiftright128(const uint64_t __lo
, const uint64_t __hi
, const uint32_t __dist
) {
83 // In the current implementation of the double-precision version
84 // of Ryu, the shift value is always < 64.
85 // (The shift value is in the range [49, 58].)
86 // Check this here in case a future change requires larger shift
87 // values. In this case this function needs to be adjusted.
88 _LIBCPP_ASSERT_UNCATEGORIZED(__dist
< 64, "");
89 auto __temp
= __lo
| ((unsigned __int128
)__hi
<< 64);
90 // For x64 128-bit shfits using the `shrd` instruction and two 64-bit
91 // registers, the shift value is modulo 64. Thus the `& 63` is free.
92 return static_cast<uint64_t>(__temp
>> (__dist
& 63));
94 #else // ^^^ __int128 available ^^^ / vvv intrinsics unavailable vvv
96 [[nodiscard
]] _LIBCPP_HIDE_FROM_ABI
inline _LIBCPP_ALWAYS_INLINE
uint64_t __ryu_umul128(const uint64_t __a
, const uint64_t __b
, uint64_t* const __productHi
) {
97 // TRANSITION, VSO-634761
98 // The casts here help MSVC to avoid calls to the __allmul library function.
99 const uint32_t __aLo
= static_cast<uint32_t>(__a
);
100 const uint32_t __aHi
= static_cast<uint32_t>(__a
>> 32);
101 const uint32_t __bLo
= static_cast<uint32_t>(__b
);
102 const uint32_t __bHi
= static_cast<uint32_t>(__b
>> 32);
104 const uint64_t __b00
= static_cast<uint64_t>(__aLo
) * __bLo
;
105 const uint64_t __b01
= static_cast<uint64_t>(__aLo
) * __bHi
;
106 const uint64_t __b10
= static_cast<uint64_t>(__aHi
) * __bLo
;
107 const uint64_t __b11
= static_cast<uint64_t>(__aHi
) * __bHi
;
109 const uint32_t __b00Lo
= static_cast<uint32_t>(__b00
);
110 const uint32_t __b00Hi
= static_cast<uint32_t>(__b00
>> 32);
112 const uint64_t __mid1
= __b10
+ __b00Hi
;
113 const uint32_t __mid1Lo
= static_cast<uint32_t>(__mid1
);
114 const uint32_t __mid1Hi
= static_cast<uint32_t>(__mid1
>> 32);
116 const uint64_t __mid2
= __b01
+ __mid1Lo
;
117 const uint32_t __mid2Lo
= static_cast<uint32_t>(__mid2
);
118 const uint32_t __mid2Hi
= static_cast<uint32_t>(__mid2
>> 32);
120 const uint64_t __pHi
= __b11
+ __mid1Hi
+ __mid2Hi
;
121 const uint64_t __pLo
= (static_cast<uint64_t>(__mid2Lo
) << 32) | __b00Lo
;
123 *__productHi
= __pHi
;
127 [[nodiscard
]] _LIBCPP_HIDE_FROM_ABI
inline uint64_t __ryu_shiftright128(const uint64_t __lo
, const uint64_t __hi
, const uint32_t __dist
) {
128 // We don't need to handle the case __dist >= 64 here (see above).
129 _LIBCPP_ASSERT_UNCATEGORIZED(__dist
< 64, "");
130 #ifdef _LIBCPP_64_BIT
131 _LIBCPP_ASSERT_UNCATEGORIZED(__dist
> 0, "");
132 return (__hi
<< (64 - __dist
)) | (__lo
>> __dist
);
133 #else // ^^^ 64-bit ^^^ / vvv 32-bit vvv
134 // Avoid a 64-bit shift by taking advantage of the range of shift values.
135 _LIBCPP_ASSERT_UNCATEGORIZED(__dist
>= 32, "");
136 return (__hi
<< (64 - __dist
)) | (static_cast<uint32_t>(__lo
>> 32) >> (__dist
- 32));
137 #endif // ^^^ 32-bit ^^^
140 #endif // ^^^ intrinsics unavailable ^^^
142 #ifndef _LIBCPP_64_BIT
144 // Returns the high 64 bits of the 128-bit product of __a and __b.
145 [[nodiscard
]] _LIBCPP_HIDE_FROM_ABI
inline uint64_t __umulh(const uint64_t __a
, const uint64_t __b
) {
146 // Reuse the __ryu_umul128 implementation.
147 // Optimizers will likely eliminate the instructions used to compute the
148 // low part of the product.
150 (void) __ryu_umul128(__a
, __b
, &__hi
);
154 // On 32-bit platforms, compilers typically generate calls to library
155 // functions for 64-bit divisions, even if the divisor is a constant.
157 // TRANSITION, LLVM-37932
159 // The functions here perform division-by-constant using multiplications
160 // in the same way as 64-bit compilers would do.
163 // The multipliers and shift values are the ones generated by clang x64
164 // for expressions like x/5, x/10, etc.
166 [[nodiscard
]] _LIBCPP_HIDE_FROM_ABI
inline uint64_t __div5(const uint64_t __x
) {
167 return __umulh(__x
, 0xCCCCCCCCCCCCCCCDu
) >> 2;
170 [[nodiscard
]] _LIBCPP_HIDE_FROM_ABI
inline uint64_t __div10(const uint64_t __x
) {
171 return __umulh(__x
, 0xCCCCCCCCCCCCCCCDu
) >> 3;
174 [[nodiscard
]] _LIBCPP_HIDE_FROM_ABI
inline uint64_t __div100(const uint64_t __x
) {
175 return __umulh(__x
>> 2, 0x28F5C28F5C28F5C3u
) >> 2;
178 [[nodiscard
]] _LIBCPP_HIDE_FROM_ABI
inline uint64_t __div1e8(const uint64_t __x
) {
179 return __umulh(__x
, 0xABCC77118461CEFDu
) >> 26;
182 [[nodiscard
]] _LIBCPP_HIDE_FROM_ABI
inline uint64_t __div1e9(const uint64_t __x
) {
183 return __umulh(__x
>> 9, 0x44B82FA09B5A53u
) >> 11;
186 [[nodiscard
]] _LIBCPP_HIDE_FROM_ABI
inline uint32_t __mod1e9(const uint64_t __x
) {
187 // Avoid 64-bit math as much as possible.
188 // Returning static_cast<uint32_t>(__x - 1000000000 * __div1e9(__x)) would
189 // perform 32x64-bit multiplication and 64-bit subtraction.
190 // __x and 1000000000 * __div1e9(__x) are guaranteed to differ by
191 // less than 10^9, so their highest 32 bits must be identical,
192 // so we can truncate both sides to uint32_t before subtracting.
193 // We can also simplify static_cast<uint32_t>(1000000000 * __div1e9(__x)).
194 // We can truncate before multiplying instead of after, as multiplying
195 // the highest 32 bits of __div1e9(__x) can't affect the lowest 32 bits.
196 return static_cast<uint32_t>(__x
) - 1000000000 * static_cast<uint32_t>(__div1e9(__x
));
199 #else // ^^^ 32-bit ^^^ / vvv 64-bit vvv
201 [[nodiscard
]] _LIBCPP_HIDE_FROM_ABI
inline uint64_t __div5(const uint64_t __x
) {
205 [[nodiscard
]] _LIBCPP_HIDE_FROM_ABI
inline uint64_t __div10(const uint64_t __x
) {
209 [[nodiscard
]] _LIBCPP_HIDE_FROM_ABI
inline uint64_t __div100(const uint64_t __x
) {
213 [[nodiscard
]] _LIBCPP_HIDE_FROM_ABI
inline uint64_t __div1e8(const uint64_t __x
) {
214 return __x
/ 100000000;
217 [[nodiscard
]] _LIBCPP_HIDE_FROM_ABI
inline uint64_t __div1e9(const uint64_t __x
) {
218 return __x
/ 1000000000;
221 [[nodiscard
]] _LIBCPP_HIDE_FROM_ABI
inline uint32_t __mod1e9(const uint64_t __x
) {
222 return static_cast<uint32_t>(__x
- 1000000000 * __div1e9(__x
));
225 #endif // ^^^ 64-bit ^^^
227 [[nodiscard
]] _LIBCPP_HIDE_FROM_ABI
inline uint32_t __pow5Factor(uint64_t __value
) {
228 uint32_t __count
= 0;
230 _LIBCPP_ASSERT_UNCATEGORIZED(__value
!= 0, "");
231 const uint64_t __q
= __div5(__value
);
232 const uint32_t __r
= static_cast<uint32_t>(__value
) - 5 * static_cast<uint32_t>(__q
);
242 // Returns true if __value is divisible by 5^__p.
243 [[nodiscard
]] _LIBCPP_HIDE_FROM_ABI
inline bool __multipleOfPowerOf5(const uint64_t __value
, const uint32_t __p
) {
244 // I tried a case distinction on __p, but there was no performance difference.
245 return __pow5Factor(__value
) >= __p
;
248 // Returns true if __value is divisible by 2^__p.
249 [[nodiscard
]] _LIBCPP_HIDE_FROM_ABI
inline bool __multipleOfPowerOf2(const uint64_t __value
, const uint32_t __p
) {
250 _LIBCPP_ASSERT_UNCATEGORIZED(__value
!= 0, "");
251 _LIBCPP_ASSERT_UNCATEGORIZED(__p
< 64, "");
252 // __builtin_ctzll doesn't appear to be faster here.
253 return (__value
& ((1ull << __p
) - 1)) == 0;
256 _LIBCPP_END_NAMESPACE_STD
260 #endif // _LIBCPP_SRC_INCLUDE_RYU_DS2_INTRINSICS_H