arduino-create-agent: init at 1.6.1 (#300694)
[NixPkgs.git] / pkgs / development / rocm-modules / 5 / clr / add-missing-operators.patch
blob64ac0c081a029ce39be449d0837670b6c84e46e4
1 From 86bd518981b364c138f9901b28a529899d8654f3 Mon Sep 17 00:00:00 2001
2 From: Jatin Chaudhary <JatinJaikishan.Chaudhary@amd.com>
3 Date: Wed, 11 Oct 2023 23:19:29 +0100
4 Subject: [PATCH] SWDEV-367537 - Add missing operators to __hip_bfloat16
5 implementation
7 Add __host__ and __device__ to bunch of operator/function matching CUDA
8 Fix some bugs seen in __hisinf
10 Change-Id: I9e67e3e3eb2083b463158f3e250e5221c89b2896
11 ---
12 hipamd/include/hip/amd_detail/amd_hip_bf16.h | 533 ++++++++++++++++---
13 1 file changed, 446 insertions(+), 87 deletions(-)
15 diff --git a/hipamd/include/hip/amd_detail/amd_hip_bf16.h b/hipamd/include/hip/amd_detail/amd_hip_bf16.h
16 index 757cb7ada..b15ea3b65 100644
17 --- a/hipamd/include/hip/amd_detail/amd_hip_bf16.h
18 +++ b/hipamd/include/hip/amd_detail/amd_hip_bf16.h
19 @@ -96,10 +96,20 @@
20 #if defined(__HIPCC_RTC__)
21 #define __HOST_DEVICE__ __device__
22 #else
23 +#include <algorithm>
24 #include <climits>
25 -#define __HOST_DEVICE__ __host__ __device__
26 +#include <cmath>
27 +#define __HOST_DEVICE__ __host__ __device__ inline
28 #endif
30 +#define HIPRT_ONE_BF16 __float2bfloat16(1.0f)
31 +#define HIPRT_ZERO_BF16 __float2bfloat16(0.0f)
32 +#define HIPRT_INF_BF16 __ushort_as_bfloat16((unsigned short)0x7F80U)
33 +#define HIPRT_MAX_NORMAL_BF16 __ushort_as_bfloat16((unsigned short)0x7F7FU)
34 +#define HIPRT_MIN_DENORM_BF16 __ushort_as_bfloat16((unsigned short)0x0001U)
35 +#define HIPRT_NAN_BF16 __ushort_as_bfloat16((unsigned short)0x7FFFU)
36 +#define HIPRT_NEG_ZERO_BF16 __ushort_as_bfloat16((unsigned short)0x8000U)
38 // Since we are using unsigned short to represent data in bfloat16, it can be of different sizes on
39 // different machines. These naive checks should prevent some undefined behavior on systems which
40 // have different sizes for basic types.
41 @@ -189,7 +199,7 @@ __HOST_DEVICE__ float2 __bfloat1622float2(const __hip_bfloat162 a) {
42 * \ingroup HIP_INTRINSIC_BFLOAT162_CONV
43 * \brief Moves bfloat16 value to bfloat162
45 -__device__ __hip_bfloat162 __bfloat162bfloat162(const __hip_bfloat16 a) {
46 +__HOST_DEVICE__ __hip_bfloat162 __bfloat162bfloat162(const __hip_bfloat16 a) {
47 return __hip_bfloat162{a, a};
50 @@ -197,13 +207,13 @@ __device__ __hip_bfloat162 __bfloat162bfloat162(const __hip_bfloat16 a) {
51 * \ingroup HIP_INTRINSIC_BFLOAT162_CONV
52 * \brief Reinterprets bits in a __hip_bfloat16 as a signed short integer
54 -__device__ short int __bfloat16_as_short(const __hip_bfloat16 h) { return (short)h.data; }
55 +__HOST_DEVICE__ short int __bfloat16_as_short(const __hip_bfloat16 h) { return (short)h.data; }
57 /**
58 * \ingroup HIP_INTRINSIC_BFLOAT162_CONV
59 * \brief Reinterprets bits in a __hip_bfloat16 as an unsigned signed short integer
61 -__device__ unsigned short int __bfloat16_as_ushort(const __hip_bfloat16 h) { return h.data; }
62 +__HOST_DEVICE__ unsigned short int __bfloat16_as_ushort(const __hip_bfloat16 h) { return h.data; }
64 /**
65 * \ingroup HIP_INTRINSIC_BFLOAT162_CONV
66 @@ -225,7 +235,7 @@ __HOST_DEVICE__ __hip_bfloat162 __float22bfloat162_rn(const float2 a) {
67 * \ingroup HIP_INTRINSIC_BFLOAT162_CONV
68 * \brief Combine two __hip_bfloat16 to __hip_bfloat162
70 -__device__ __hip_bfloat162 __halves2bfloat162(const __hip_bfloat16 a, const __hip_bfloat16 b) {
71 +__HOST_DEVICE__ __hip_bfloat162 __halves2bfloat162(const __hip_bfloat16 a, const __hip_bfloat16 b) {
72 return __hip_bfloat162{a, b};
75 @@ -233,13 +243,13 @@ __device__ __hip_bfloat162 __halves2bfloat162(const __hip_bfloat16 a, const __hi
76 * \ingroup HIP_INTRINSIC_BFLOAT162_CONV
77 * \brief Returns high 16 bits of __hip_bfloat162
79 -__device__ __hip_bfloat16 __high2bfloat16(const __hip_bfloat162 a) { return a.y; }
80 +__HOST_DEVICE__ __hip_bfloat16 __high2bfloat16(const __hip_bfloat162 a) { return a.y; }
82 /**
83 * \ingroup HIP_INTRINSIC_BFLOAT162_CONV
84 * \brief Returns high 16 bits of __hip_bfloat162
86 -__device__ __hip_bfloat162 __high2bfloat162(const __hip_bfloat162 a) {
87 +__HOST_DEVICE__ __hip_bfloat162 __high2bfloat162(const __hip_bfloat162 a) {
88 return __hip_bfloat162{a.y, a.y};
91 @@ -253,7 +263,8 @@ __HOST_DEVICE__ float __high2float(const __hip_bfloat162 a) { return __bfloat162
92 * \ingroup HIP_INTRINSIC_BFLOAT162_CONV
93 * \brief Extracts high 16 bits from each and combines them
95 -__device__ __hip_bfloat162 __highs2bfloat162(const __hip_bfloat162 a, const __hip_bfloat162 b) {
96 +__HOST_DEVICE__ __hip_bfloat162 __highs2bfloat162(const __hip_bfloat162 a,
97 + const __hip_bfloat162 b) {
98 return __hip_bfloat162{a.y, b.y};
101 @@ -261,13 +272,13 @@ __device__ __hip_bfloat162 __highs2bfloat162(const __hip_bfloat162 a, const __hi
102 * \ingroup HIP_INTRINSIC_BFLOAT162_CONV
103 * \brief Returns low 16 bits of __hip_bfloat162
105 -__device__ __hip_bfloat16 __low2bfloat16(const __hip_bfloat162 a) { return a.x; }
106 +__HOST_DEVICE__ __hip_bfloat16 __low2bfloat16(const __hip_bfloat162 a) { return a.x; }
109 * \ingroup HIP_INTRINSIC_BFLOAT162_CONV
110 * \brief Returns low 16 bits of __hip_bfloat162
112 -__device__ __hip_bfloat162 __low2bfloat162(const __hip_bfloat162 a) {
113 +__HOST_DEVICE__ __hip_bfloat162 __low2bfloat162(const __hip_bfloat162 a) {
114 return __hip_bfloat162{a.x, a.x};
117 @@ -281,7 +292,7 @@ __HOST_DEVICE__ float __low2float(const __hip_bfloat162 a) { return __bfloat162f
118 * \ingroup HIP_INTRINSIC_BFLOAT162_CONV
119 * \brief Swaps both halves
121 -__device__ __hip_bfloat162 __lowhigh2highlow(const __hip_bfloat162 a) {
122 +__HOST_DEVICE__ __hip_bfloat162 __lowhigh2highlow(const __hip_bfloat162 a) {
123 return __hip_bfloat162{a.y, a.x};
126 @@ -289,7 +300,7 @@ __device__ __hip_bfloat162 __lowhigh2highlow(const __hip_bfloat162 a) {
127 * \ingroup HIP_INTRINSIC_BFLOAT162_CONV
128 * \brief Extracts low 16 bits from each and combines them
130 -__device__ __hip_bfloat162 __lows2bfloat162(const __hip_bfloat162 a, const __hip_bfloat162 b) {
131 +__HOST_DEVICE__ __hip_bfloat162 __lows2bfloat162(const __hip_bfloat162 a, const __hip_bfloat162 b) {
132 return __hip_bfloat162{a.x, b.x};
135 @@ -297,7 +308,7 @@ __device__ __hip_bfloat162 __lows2bfloat162(const __hip_bfloat162 a, const __hip
136 * \ingroup HIP_INTRINSIC_BFLOAT162_CONV
137 * \brief Reinterprets short int into a bfloat16
139 -__device__ __hip_bfloat16 __short_as_bfloat16(const short int a) {
140 +__HOST_DEVICE__ __hip_bfloat16 __short_as_bfloat16(const short int a) {
141 return __hip_bfloat16{(unsigned short)a};
144 @@ -305,7 +316,7 @@ __device__ __hip_bfloat16 __short_as_bfloat16(const short int a) {
145 * \ingroup HIP_INTRINSIC_BFLOAT162_CONV
146 * \brief Reinterprets unsigned short int into a bfloat16
148 -__device__ __hip_bfloat16 __ushort_as_bfloat16(const unsigned short int a) {
149 +__HOST_DEVICE__ __hip_bfloat16 __ushort_as_bfloat16(const unsigned short int a) {
150 return __hip_bfloat16{a};
153 @@ -314,7 +325,7 @@ __device__ __hip_bfloat16 __ushort_as_bfloat16(const unsigned short int a) {
154 * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
155 * \brief Adds two bfloat16 values
157 -__device__ __hip_bfloat16 __hadd(const __hip_bfloat16 a, const __hip_bfloat16 b) {
158 +__HOST_DEVICE__ __hip_bfloat16 __hadd(const __hip_bfloat16 a, const __hip_bfloat16 b) {
159 return __float2bfloat16(__bfloat162float(a) + __bfloat162float(b));
162 @@ -322,7 +333,7 @@ __device__ __hip_bfloat16 __hadd(const __hip_bfloat16 a, const __hip_bfloat16 b)
163 * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
164 * \brief Subtracts two bfloat16 values
166 -__device__ __hip_bfloat16 __hsub(const __hip_bfloat16 a, const __hip_bfloat16 b) {
167 +__HOST_DEVICE__ __hip_bfloat16 __hsub(const __hip_bfloat16 a, const __hip_bfloat16 b) {
168 return __float2bfloat16(__bfloat162float(a) - __bfloat162float(b));
171 @@ -330,7 +341,7 @@ __device__ __hip_bfloat16 __hsub(const __hip_bfloat16 a, const __hip_bfloat16 b)
172 * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
173 * \brief Divides two bfloat16 values
175 -__device__ __hip_bfloat16 __hdiv(const __hip_bfloat16 a, const __hip_bfloat16 b) {
176 +__HOST_DEVICE__ __hip_bfloat16 __hdiv(const __hip_bfloat16 a, const __hip_bfloat16 b) {
177 return __float2bfloat16(__bfloat162float(a) / __bfloat162float(b));
180 @@ -348,7 +359,7 @@ __device__ __hip_bfloat16 __hfma(const __hip_bfloat16 a, const __hip_bfloat16 b,
181 * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
182 * \brief Multiplies two bfloat16 values
184 -__device__ __hip_bfloat16 __hmul(const __hip_bfloat16 a, const __hip_bfloat16 b) {
185 +__HOST_DEVICE__ __hip_bfloat16 __hmul(const __hip_bfloat16 a, const __hip_bfloat16 b) {
186 return __float2bfloat16(__bfloat162float(a) * __bfloat162float(b));
189 @@ -356,7 +367,7 @@ __device__ __hip_bfloat16 __hmul(const __hip_bfloat16 a, const __hip_bfloat16 b)
190 * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
191 * \brief Negate a bfloat16 value
193 -__device__ __hip_bfloat16 __hneg(const __hip_bfloat16 a) {
194 +__HOST_DEVICE__ __hip_bfloat16 __hneg(const __hip_bfloat16 a) {
195 auto ret = a;
196 ret.data ^= 0x8000;
197 return ret;
198 @@ -366,7 +377,7 @@ __device__ __hip_bfloat16 __hneg(const __hip_bfloat16 a) {
199 * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
200 * \brief Returns absolute of a bfloat16
202 -__device__ __hip_bfloat16 __habs(const __hip_bfloat16 a) {
203 +__HOST_DEVICE__ __hip_bfloat16 __habs(const __hip_bfloat16 a) {
204 auto ret = a;
205 ret.data &= 0x7FFF;
206 return ret;
207 @@ -376,7 +387,7 @@ __device__ __hip_bfloat16 __habs(const __hip_bfloat16 a) {
208 * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
209 * \brief Divides bfloat162 values
211 -__device__ __hip_bfloat162 __h2div(const __hip_bfloat162 a, const __hip_bfloat162 b) {
212 +__HOST_DEVICE__ __hip_bfloat162 __h2div(const __hip_bfloat162 a, const __hip_bfloat162 b) {
213 return __hip_bfloat162{__float2bfloat16(__bfloat162float(a.x) / __bfloat162float(b.x)),
214 __float2bfloat16(__bfloat162float(a.y) / __bfloat162float(b.y))};
216 @@ -385,7 +396,7 @@ __device__ __hip_bfloat162 __h2div(const __hip_bfloat162 a, const __hip_bfloat16
217 * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
218 * \brief Returns absolute of a bfloat162
220 -__device__ __hip_bfloat162 __habs2(const __hip_bfloat162 a) {
221 +__HOST_DEVICE__ __hip_bfloat162 __habs2(const __hip_bfloat162 a) {
222 return __hip_bfloat162{__habs(a.x), __habs(a.y)};
225 @@ -393,7 +404,7 @@ __device__ __hip_bfloat162 __habs2(const __hip_bfloat162 a) {
226 * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
227 * \brief Adds two bfloat162 values
229 -__device__ __hip_bfloat162 __hadd2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
230 +__HOST_DEVICE__ __hip_bfloat162 __hadd2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
231 return __hip_bfloat162{__hadd(a.x, b.x), __hadd(a.y, b.y)};
234 @@ -410,7 +421,7 @@ __device__ __hip_bfloat162 __hfma2(const __hip_bfloat162 a, const __hip_bfloat16
235 * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
236 * \brief Multiplies two bfloat162 values
238 -__device__ __hip_bfloat162 __hmul2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
239 +__HOST_DEVICE__ __hip_bfloat162 __hmul2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
240 return __hip_bfloat162{__hmul(a.x, b.x), __hmul(a.y, b.y)};
243 @@ -418,7 +429,7 @@ __device__ __hip_bfloat162 __hmul2(const __hip_bfloat162 a, const __hip_bfloat16
244 * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
245 * \brief Converts a bfloat162 into negative
247 -__device__ __hip_bfloat162 __hneg2(const __hip_bfloat162 a) {
248 +__HOST_DEVICE__ __hip_bfloat162 __hneg2(const __hip_bfloat162 a) {
249 return __hip_bfloat162{__hneg(a.x), __hneg(a.y)};
252 @@ -426,15 +437,251 @@ __device__ __hip_bfloat162 __hneg2(const __hip_bfloat162 a) {
253 * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
254 * \brief Subtracts two bfloat162 values
256 -__device__ __hip_bfloat162 __hsub2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
257 +__HOST_DEVICE__ __hip_bfloat162 __hsub2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
258 return __hip_bfloat162{__hsub(a.x, b.x), __hsub(a.y, b.y)};
261 +/**
262 + * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
263 + * \brief Operator to multiply two __hip_bfloat16 numbers
264 + */
265 +__HOST_DEVICE__ __hip_bfloat16 operator*(const __hip_bfloat16& l, const __hip_bfloat16& r) {
266 + return __hmul(l, r);
269 +/**
270 + * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
271 + * \brief Operator to multiply-assign two __hip_bfloat16 numbers
272 + */
273 +__HOST_DEVICE__ __hip_bfloat16 operator*=(__hip_bfloat16& l, const __hip_bfloat16& r) {
274 + l = __hmul(l, r);
275 + return l;
278 +/**
279 + * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
280 + * \brief Operator to unary+ on a __hip_bfloat16 number
281 + */
282 +__HOST_DEVICE__ __hip_bfloat16 operator+(const __hip_bfloat16& l) { return l; }
284 +/**
285 + * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
286 + * \brief Operator to add two __hip_bfloat16 numbers
287 + */
288 +__HOST_DEVICE__ __hip_bfloat16 operator+(const __hip_bfloat16& l, const __hip_bfloat16& r) {
289 + return __hadd(l, r);
292 +/**
293 + * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
294 + * \brief Operator to negate a __hip_bfloat16 number
295 + */
296 +__HOST_DEVICE__ __hip_bfloat16 operator-(const __hip_bfloat16& l) { return __hneg(l); }
298 +/**
299 + * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
300 + * \brief Operator to subtract two __hip_bfloat16 numbers
301 + */
302 +__HOST_DEVICE__ __hip_bfloat16 operator-(const __hip_bfloat16& l, const __hip_bfloat16& r) {
303 + return __hsub(l, r);
306 +/**
307 + * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
308 + * \brief Operator to post increment a __hip_bfloat16 number
309 + */
310 +__HOST_DEVICE__ __hip_bfloat16 operator++(__hip_bfloat16& l, const int) {
311 + auto ret = l;
312 + l = __hadd(l, HIPRT_ONE_BF16);
313 + return ret;
316 +/**
317 + * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
318 + * \brief Operator to pre increment a __hip_bfloat16 number
319 + */
320 +__HOST_DEVICE__ __hip_bfloat16& operator++(__hip_bfloat16& l) {
321 + l = __hadd(l, HIPRT_ONE_BF16);
322 + return l;
325 +/**
326 + * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
327 + * \brief Operator to post decrement a __hip_bfloat16 number
328 + */
329 +__HOST_DEVICE__ __hip_bfloat16 operator--(__hip_bfloat16& l, const int) {
330 + auto ret = l;
331 + l = __hsub(l, HIPRT_ONE_BF16);
332 + return ret;
335 +/**
336 + * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
337 + * \brief Operator to pre decrement a __hip_bfloat16 number
338 + */
339 +__HOST_DEVICE__ __hip_bfloat16& operator--(__hip_bfloat16& l) {
340 + l = __hsub(l, HIPRT_ONE_BF16);
341 + return l;
344 +/**
345 + * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
346 + * \brief Operator to add-assign two __hip_bfloat16 numbers
347 + */
348 +__HOST_DEVICE__ __hip_bfloat16& operator+=(__hip_bfloat16& l, const __hip_bfloat16& r) {
349 + l = l + r;
350 + return l;
353 +/**
354 + * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
355 + * \brief Operator to subtract-assign two __hip_bfloat16 numbers
356 + */
357 +__HOST_DEVICE__ __hip_bfloat16& operator-=(__hip_bfloat16& l, const __hip_bfloat16& r) {
358 + l = l - r;
359 + return l;
362 +/**
363 + * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
364 + * \brief Operator to divide two __hip_bfloat16 numbers
365 + */
366 +__HOST_DEVICE__ __hip_bfloat16 operator/(const __hip_bfloat16& l, const __hip_bfloat16& r) {
367 + return __hdiv(l, r);
370 +/**
371 + * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
372 + * \brief Operator to divide-assign two __hip_bfloat16 numbers
373 + */
374 +__HOST_DEVICE__ __hip_bfloat16& operator/=(__hip_bfloat16& l, const __hip_bfloat16& r) {
375 + l = l / r;
376 + return l;
379 +/**
380 + * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
381 + * \brief Operator to multiply two __hip_bfloat162 numbers
382 + */
383 +__HOST_DEVICE__ __hip_bfloat162 operator*(const __hip_bfloat162& l, const __hip_bfloat162& r) {
384 + return __hmul2(l, r);
387 +/**
388 + * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
389 + * \brief Operator to multiply-assign two __hip_bfloat162 numbers
390 + */
391 +__HOST_DEVICE__ __hip_bfloat162 operator*=(__hip_bfloat162& l, const __hip_bfloat162& r) {
392 + l = __hmul2(l, r);
393 + return l;
396 +/**
397 + * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
398 + * \brief Operator to unary+ on a __hip_bfloat162 number
399 + */
400 +__HOST_DEVICE__ __hip_bfloat162 operator+(const __hip_bfloat162& l) { return l; }
402 +/**
403 + * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
404 + * \brief Operator to add two __hip_bfloat162 numbers
405 + */
406 +__HOST_DEVICE__ __hip_bfloat162 operator+(const __hip_bfloat162& l, const __hip_bfloat162& r) {
407 + return __hadd2(l, r);
410 +/**
411 + * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
412 + * \brief Operator to negate a __hip_bfloat162 number
413 + */
414 +__HOST_DEVICE__ __hip_bfloat162 operator-(const __hip_bfloat162& l) { return __hneg2(l); }
416 +/**
417 + * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
418 + * \brief Operator to subtract two __hip_bfloat162 numbers
419 + */
420 +__HOST_DEVICE__ __hip_bfloat162 operator-(const __hip_bfloat162& l, const __hip_bfloat162& r) {
421 + return __hsub2(l, r);
424 +/**
425 + * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
426 + * \brief Operator to post increment a __hip_bfloat162 number
427 + */
428 +__HOST_DEVICE__ __hip_bfloat162 operator++(__hip_bfloat162& l, const int) {
429 + auto ret = l;
430 + l = __hadd2(l, {HIPRT_ONE_BF16, HIPRT_ONE_BF16});
431 + return ret;
434 +/**
435 + * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
436 + * \brief Operator to pre increment a __hip_bfloat162 number
437 + */
438 +__HOST_DEVICE__ __hip_bfloat162& operator++(__hip_bfloat162& l) {
439 + l = __hadd2(l, {HIPRT_ONE_BF16, HIPRT_ONE_BF16});
440 + return l;
443 +/**
444 + * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
445 + * \brief Operator to post decrement a __hip_bfloat162 number
446 + */
447 +__HOST_DEVICE__ __hip_bfloat162 operator--(__hip_bfloat162& l, const int) {
448 + auto ret = l;
449 + l = __hsub2(l, {HIPRT_ONE_BF16, HIPRT_ONE_BF16});
450 + return ret;
453 +/**
454 + * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
455 + * \brief Operator to pre decrement a __hip_bfloat162 number
456 + */
457 +__HOST_DEVICE__ __hip_bfloat162& operator--(__hip_bfloat162& l) {
458 + l = __hsub2(l, {HIPRT_ONE_BF16, HIPRT_ONE_BF16});
459 + return l;
462 +/**
463 + * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
464 + * \brief Operator to add-assign two __hip_bfloat162 numbers
465 + */
466 +__HOST_DEVICE__ __hip_bfloat162& operator+=(__hip_bfloat162& l, const __hip_bfloat162& r) {
467 + l = l + r;
468 + return l;
471 +/**
472 + * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
473 + * \brief Operator to subtract-assign two __hip_bfloat162 numbers
474 + */
475 +__HOST_DEVICE__ __hip_bfloat162& operator-=(__hip_bfloat162& l, const __hip_bfloat162& r) {
476 + l = l - r;
477 + return l;
480 +/**
481 + * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
482 + * \brief Operator to divide two __hip_bfloat162 numbers
483 + */
484 +__HOST_DEVICE__ __hip_bfloat162 operator/(const __hip_bfloat162& l, const __hip_bfloat162& r) {
485 + return __h2div(l, r);
488 +/**
489 + * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
490 + * \brief Operator to divide-assign two __hip_bfloat162 numbers
491 + */
492 +__HOST_DEVICE__ __hip_bfloat162& operator/=(__hip_bfloat162& l, const __hip_bfloat162& r) {
493 + l = l / r;
494 + return l;
498 * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
499 * \brief Compare two bfloat162 values
501 -__device__ bool __heq(const __hip_bfloat16 a, const __hip_bfloat16 b) {
502 +__HOST_DEVICE__ bool __heq(const __hip_bfloat16 a, const __hip_bfloat16 b) {
503 return __bfloat162float(a) == __bfloat162float(b);
506 @@ -442,7 +689,7 @@ __device__ bool __heq(const __hip_bfloat16 a, const __hip_bfloat16 b) {
507 * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
508 * \brief Compare two bfloat162 values - unordered equal
510 -__device__ bool __hequ(const __hip_bfloat16 a, const __hip_bfloat16 b) {
511 +__HOST_DEVICE__ bool __hequ(const __hip_bfloat16 a, const __hip_bfloat16 b) {
512 return !(__bfloat162float(a) < __bfloat162float(b)) &&
513 !(__bfloat162float(a) > __bfloat162float(b));
515 @@ -451,7 +698,7 @@ __device__ bool __hequ(const __hip_bfloat16 a, const __hip_bfloat16 b) {
516 * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
517 * \brief Compare two bfloat162 values - greater than
519 -__device__ bool __hgt(const __hip_bfloat16 a, const __hip_bfloat16 b) {
520 +__HOST_DEVICE__ bool __hgt(const __hip_bfloat16 a, const __hip_bfloat16 b) {
521 return __bfloat162float(a) > __bfloat162float(b);
524 @@ -459,7 +706,7 @@ __device__ bool __hgt(const __hip_bfloat16 a, const __hip_bfloat16 b) {
525 * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
526 * \brief Compare two bfloat162 values - unordered greater than
528 -__device__ bool __hgtu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
529 +__HOST_DEVICE__ bool __hgtu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
530 return !(__bfloat162float(a) <= __bfloat162float(b));
533 @@ -467,7 +714,7 @@ __device__ bool __hgtu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
534 * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
535 * \brief Compare two bfloat162 values - greater than equal
537 -__device__ bool __hge(const __hip_bfloat16 a, const __hip_bfloat16 b) {
538 +__HOST_DEVICE__ bool __hge(const __hip_bfloat16 a, const __hip_bfloat16 b) {
539 return __bfloat162float(a) >= __bfloat162float(b);
542 @@ -475,7 +722,7 @@ __device__ bool __hge(const __hip_bfloat16 a, const __hip_bfloat16 b) {
543 * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
544 * \brief Compare two bfloat162 values - unordered greater than equal
546 -__device__ bool __hgeu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
547 +__HOST_DEVICE__ bool __hgeu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
548 return !(__bfloat162float(a) < __bfloat162float(b));
551 @@ -483,7 +730,7 @@ __device__ bool __hgeu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
552 * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
553 * \brief Compare two bfloat162 values - not equal
555 -__device__ bool __hne(const __hip_bfloat16 a, const __hip_bfloat16 b) {
556 +__HOST_DEVICE__ bool __hne(const __hip_bfloat16 a, const __hip_bfloat16 b) {
557 return __bfloat162float(a) != __bfloat162float(b);
560 @@ -491,7 +738,7 @@ __device__ bool __hne(const __hip_bfloat16 a, const __hip_bfloat16 b) {
561 * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
562 * \brief Compare two bfloat162 values - unordered not equal
564 -__device__ bool __hneu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
565 +__HOST_DEVICE__ bool __hneu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
566 return !(__bfloat162float(a) == __bfloat162float(b));
569 @@ -499,23 +746,31 @@ __device__ bool __hneu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
570 * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
571 * \brief Compare two bfloat162 values - return max
573 -__device__ __hip_bfloat16 __hmax(const __hip_bfloat16 a, const __hip_bfloat16 b) {
574 +__HOST_DEVICE__ __hip_bfloat16 __hmax(const __hip_bfloat16 a, const __hip_bfloat16 b) {
575 +#if __HIP_DEVICE_COMPILE__
576 return __float2bfloat16(__ocml_fmax_f32(__bfloat162float(a), __bfloat162float(b)));
577 +#else
578 + return __float2bfloat16(std::max(__bfloat162float(a), __bfloat162float(b)));
579 +#endif
583 * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
584 * \brief Compare two bfloat162 values - return min
586 -__device__ __hip_bfloat16 __hmin(const __hip_bfloat16 a, const __hip_bfloat16 b) {
587 +__HOST_DEVICE__ __hip_bfloat16 __hmin(const __hip_bfloat16 a, const __hip_bfloat16 b) {
588 +#if __HIP_DEVICE_COMPILE__
589 return __float2bfloat16(__ocml_fmin_f32(__bfloat162float(a), __bfloat162float(b)));
590 +#else
591 + return __float2bfloat16(std::min(__bfloat162float(a), __bfloat162float(b)));
592 +#endif
596 * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
597 * \brief Compare two bfloat162 values - less than operator
599 -__device__ bool __hlt(const __hip_bfloat16 a, const __hip_bfloat16 b) {
600 +__HOST_DEVICE__ bool __hlt(const __hip_bfloat16 a, const __hip_bfloat16 b) {
601 return __bfloat162float(a) < __bfloat162float(b);
604 @@ -523,15 +778,15 @@ __device__ bool __hlt(const __hip_bfloat16 a, const __hip_bfloat16 b) {
605 * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
606 * \brief Compare two bfloat162 values - unordered less than
608 -__device__ bool __hltu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
609 +__HOST_DEVICE__ bool __hltu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
610 return !(__bfloat162float(a) >= __bfloat162float(b));
614 * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
615 - * \brief Compare two bfloat162 values - less than
616 + * \brief Compare two bfloat162 values - less than equal
618 -__device__ bool __hle(const __hip_bfloat16 a, const __hip_bfloat16 b) {
619 +__HOST_DEVICE__ bool __hle(const __hip_bfloat16 a, const __hip_bfloat16 b) {
620 return __bfloat162float(a) <= __bfloat162float(b);
623 @@ -539,7 +794,7 @@ __device__ bool __hle(const __hip_bfloat16 a, const __hip_bfloat16 b) {
624 * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
625 * \brief Compare two bfloat162 values - unordered less than equal
627 -__device__ bool __hleu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
628 +__HOST_DEVICE__ bool __hleu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
629 return !(__bfloat162float(a) > __bfloat162float(b));
632 @@ -547,19 +802,33 @@ __device__ bool __hleu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
633 * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
634 * \brief Checks if number is inf
636 -__device__ int __hisinf(const __hip_bfloat16 a) { return __ocml_isinf_f32(__bfloat162float(a)); }
637 +__HOST_DEVICE__ int __hisinf(const __hip_bfloat16 a) {
638 + unsigned short sign = a.data & 0x8000U;
639 +#if __HIP_DEVICE_COMPILE__
640 + int res = __ocml_isinf_f32(__bfloat162float(a));
641 +#else
642 + int res = std::isinf(__bfloat162float(a)) ? 1 : 0;
643 +#endif
644 + return (res == 0) ? res : ((sign != 0U) ? -res : res);
648 * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
649 * \brief Checks if number is nan
651 -__device__ bool __hisnan(const __hip_bfloat16 a) { return __ocml_isnan_f32(__bfloat162float(a)); }
652 +__HOST_DEVICE__ bool __hisnan(const __hip_bfloat16 a) {
653 +#if __HIP_DEVICE_COMPILE__
654 + return __ocml_isnan_f32(__bfloat162float(a));
655 +#else
656 + return std::isnan(__bfloat162float(a));
657 +#endif
661 * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
662 * \brief Checks if two numbers are equal
664 -__device__ bool __hbeq2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
665 +__HOST_DEVICE__ bool __hbeq2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
666 return __heq(a.x, b.x) && __heq(a.y, b.y);
669 @@ -567,7 +836,7 @@ __device__ bool __hbeq2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
670 * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
671 * \brief Checks if two numbers are equal - unordered
673 -__device__ bool __hbequ2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
674 +__HOST_DEVICE__ bool __hbequ2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
675 return __hequ(a.x, b.x) && __hequ(a.y, b.y);
678 @@ -575,7 +844,7 @@ __device__ bool __hbequ2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
679 * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
680 * \brief Check for a >= b
682 -__device__ bool __hbge2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
683 +__HOST_DEVICE__ bool __hbge2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
684 return __hge(a.x, b.x) && __hge(a.y, b.y);
687 @@ -583,7 +852,7 @@ __device__ bool __hbge2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
688 * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
689 * \brief Check for a >= b - unordered
691 -__device__ bool __hbgeu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
692 +__HOST_DEVICE__ bool __hbgeu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
693 return __hgeu(a.x, b.x) && __hgeu(a.y, b.y);
696 @@ -591,7 +860,7 @@ __device__ bool __hbgeu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
697 * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
698 * \brief Check for a > b
700 -__device__ bool __hbgt2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
701 +__HOST_DEVICE__ bool __hbgt2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
702 return __hgt(a.x, b.x) && __hgt(a.y, b.y);
705 @@ -599,7 +868,7 @@ __device__ bool __hbgt2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
706 * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
707 * \brief Check for a > b - unordered
709 -__device__ bool __hbgtu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
710 +__HOST_DEVICE__ bool __hbgtu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
711 return __hgtu(a.x, b.x) && __hgtu(a.y, b.y);
714 @@ -607,7 +876,7 @@ __device__ bool __hbgtu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
715 * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
716 * \brief Check for a <= b
718 -__device__ bool __hble2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
719 +__HOST_DEVICE__ bool __hble2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
720 return __hle(a.x, b.x) && __hle(a.y, b.y);
723 @@ -615,7 +884,7 @@ __device__ bool __hble2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
724 * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
725 * \brief Check for a <= b - unordered
727 -__device__ bool __hbleu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
728 +__HOST_DEVICE__ bool __hbleu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
729 return __hleu(a.x, b.x) && __hleu(a.y, b.y);
732 @@ -623,7 +892,7 @@ __device__ bool __hbleu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
733 * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
734 * \brief Check for a < b
736 -__device__ bool __hblt2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
737 +__HOST_DEVICE__ bool __hblt2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
738 return __hlt(a.x, b.x) && __hlt(a.y, b.y);
741 @@ -631,7 +900,7 @@ __device__ bool __hblt2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
742 * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
743 * \brief Check for a < b - unordered
745 -__device__ bool __hbltu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
746 +__HOST_DEVICE__ bool __hbltu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
747 return __hltu(a.x, b.x) && __hltu(a.y, b.y);
750 @@ -639,7 +908,7 @@ __device__ bool __hbltu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
751 * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
752 * \brief Check for a != b
754 -__device__ bool __hbne2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
755 +__HOST_DEVICE__ bool __hbne2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
756 return __hne(a.x, b.x) && __hne(a.y, b.y);
759 @@ -647,7 +916,7 @@ __device__ bool __hbne2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
760 * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
761 * \brief Check for a != b
763 -__device__ bool __hbneu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
764 +__HOST_DEVICE__ bool __hbneu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
765 return __hneu(a.x, b.x) && __hneu(a.y, b.y);
768 @@ -655,84 +924,175 @@ __device__ bool __hbneu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
769 * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
770 * \brief Check for a != b, returns 1.0 if equal, otherwise 0.0
772 -__device__ __hip_bfloat162 __heq2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
773 - return __hip_bfloat162{{__heq(a.x, b.x) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)},
774 - {__heq(a.y, b.y) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)}};
775 +__HOST_DEVICE__ __hip_bfloat162 __heq2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
776 + return __hip_bfloat162{{__heq(a.x, b.x) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16},
777 + {__heq(a.y, b.y) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16}};
781 * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
782 * \brief Check for a >= b, returns 1.0 if greater than equal, otherwise 0.0
784 -__device__ __hip_bfloat162 __hge2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
785 - return __hip_bfloat162{{__hge(a.x, b.x) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)},
786 - {__hge(a.y, b.y) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)}};
787 +__HOST_DEVICE__ __hip_bfloat162 __hge2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
788 + return __hip_bfloat162{{__hge(a.x, b.x) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16},
789 + {__hge(a.y, b.y) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16}};
793 * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
794 * \brief Check for a > b, returns 1.0 if greater than equal, otherwise 0.0
796 -__device__ __hip_bfloat162 __hgt2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
797 - return __hip_bfloat162{{__hgt(a.x, b.x) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)},
798 - {__hgt(a.y, b.y) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)}};
799 +__HOST_DEVICE__ __hip_bfloat162 __hgt2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
800 + return __hip_bfloat162{{__hgt(a.x, b.x) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16},
801 + {__hgt(a.y, b.y) ? HIPRT_ONE_BF16 : HIPRT_ONE_BF16}};
805 * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
806 * \brief Check for a is NaN, returns 1.0 if NaN, otherwise 0.0
808 -__device__ __hip_bfloat162 __hisnan2(const __hip_bfloat162 a) {
809 - return __hip_bfloat162{
810 - {__ocml_isnan_f32(__bfloat162float(a.x)) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)},
811 - {__ocml_isnan_f32(__bfloat162float(a.y)) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)}};
812 +__HOST_DEVICE__ __hip_bfloat162 __hisnan2(const __hip_bfloat162 a) {
813 + return __hip_bfloat162{{__hisnan(a.x) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16},
814 + {__hisnan(a.y) ? HIPRT_ONE_BF16 : HIPRT_ONE_BF16}};
818 * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
819 * \brief Check for a <= b, returns 1.0 if greater than equal, otherwise 0.0
821 -__device__ __hip_bfloat162 __hle2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
822 - return __hip_bfloat162{{__hle(a.x, b.x) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)},
823 - {__hle(a.y, b.y) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)}};
824 +__HOST_DEVICE__ __hip_bfloat162 __hle2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
825 + return __hip_bfloat162{{__hle(a.x, b.x) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16},
826 + {__hle(a.y, b.y) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16}};
830 * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
831 * \brief Check for a < b, returns 1.0 if greater than equal, otherwise 0.0
833 -__device__ __hip_bfloat162 __hlt2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
834 - return __hip_bfloat162{{__hlt(a.x, b.x) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)},
835 - {__hlt(a.y, b.y) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)}};
836 +__HOST_DEVICE__ __hip_bfloat162 __hlt2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
837 + return __hip_bfloat162{{__hlt(a.x, b.x) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16},
838 + {__hlt(a.y, b.y) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16}};
842 * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
843 * \brief Returns max of two elements
845 -__device__ __hip_bfloat162 __hmax2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
846 - return __hip_bfloat162{
847 - __float2bfloat16(__ocml_fmax_f32(__bfloat162float(a.x), __bfloat162float(b.x))),
848 - __float2bfloat16(__ocml_fmax_f32(__bfloat162float(a.y), __bfloat162float(b.y)))};
849 +__HOST_DEVICE__ __hip_bfloat162 __hmax2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
850 + return __hip_bfloat162{__hmax(a.x, b.x), __hmax(a.y, b.y)};
854 * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
855 * \brief Returns min of two elements
857 -__device__ __hip_bfloat162 __hmin2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
858 - return __hip_bfloat162{
859 - __float2bfloat16(__ocml_fmin_f32(__bfloat162float(a.x), __bfloat162float(b.x))),
860 - __float2bfloat16(__ocml_fmin_f32(__bfloat162float(a.y), __bfloat162float(b.y)))};
861 +__HOST_DEVICE__ __hip_bfloat162 __hmin2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
862 + return __hip_bfloat162{__hmin(a.x, b.x), __hmin(a.y, b.y)};
866 * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
867 * \brief Checks for not equal to
869 -__device__ __hip_bfloat162 __hne2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
870 - return __hip_bfloat162{{__hne(a.x, b.x) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)},
871 - {__hne(a.y, b.y) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)}};
872 +__HOST_DEVICE__ __hip_bfloat162 __hne2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
873 + return __hip_bfloat162{{__hne(a.x, b.x) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16},
874 + {__hne(a.y, b.y) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16}};
877 +/**
878 + * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
879 + * \brief Operator to perform an equal compare on two __hip_bfloat16 numbers
880 + */
881 +__HOST_DEVICE__ bool operator==(const __hip_bfloat16& l, const __hip_bfloat16& r) {
882 + return __heq(l, r);
885 +/**
886 + * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
887 + * \brief Operator to perform a not equal on two __hip_bfloat16 numbers
888 + */
889 +__HOST_DEVICE__ bool operator!=(const __hip_bfloat16& l, const __hip_bfloat16& r) {
890 + return __hne(l, r);
893 +/**
894 + * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
895 + * \brief Operator to perform a less than on two __hip_bfloat16 numbers
896 + */
897 +__HOST_DEVICE__ bool operator<(const __hip_bfloat16& l, const __hip_bfloat16& r) {
898 + return __hlt(l, r);
901 +/**
902 + * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
903 + * \brief Operator to perform a less than equal on two __hip_bfloat16 numbers
904 + */
905 +__HOST_DEVICE__ bool operator<=(const __hip_bfloat16& l, const __hip_bfloat16& r) {
906 + return __hle(l, r);
909 +/**
910 + * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
911 + * \brief Operator to perform a greater than on two __hip_bfloat16 numbers
912 + */
913 +__HOST_DEVICE__ bool operator>(const __hip_bfloat16& l, const __hip_bfloat16& r) {
914 + return __hgt(l, r);
917 +/**
918 + * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
919 + * \brief Operator to perform a greater than equal on two __hip_bfloat16 numbers
920 + */
921 +__HOST_DEVICE__ bool operator>=(const __hip_bfloat16& l, const __hip_bfloat16& r) {
922 + return __hge(l, r);
925 +/**
926 + * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
927 + * \brief Operator to perform an equal compare on two __hip_bfloat16 numbers
928 + */
929 +__HOST_DEVICE__ bool operator==(const __hip_bfloat162& l, const __hip_bfloat162& r) {
930 + return __heq(l.x, r.x) && __heq(l.y, r.y);
933 +/**
934 + * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
935 + * \brief Operator to perform a not equal on two __hip_bfloat16 numbers
936 + */
937 +__HOST_DEVICE__ bool operator!=(const __hip_bfloat162& l, const __hip_bfloat162& r) {
938 + return __hne(l.x, r.x) || __hne(l.y, r.y);
941 +/**
942 + * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
943 + * \brief Operator to perform a less than on two __hip_bfloat16 numbers
944 + */
945 +__HOST_DEVICE__ bool operator<(const __hip_bfloat162& l, const __hip_bfloat162& r) {
946 + return __hlt(l.x, r.x) && __hlt(l.y, r.y);
949 +/**
950 + * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
951 + * \brief Operator to perform a less than equal on two __hip_bfloat16 numbers
952 + */
953 +__HOST_DEVICE__ bool operator<=(const __hip_bfloat162& l, const __hip_bfloat162& r) {
954 + return __hle(l.x, r.x) && __hle(l.y, r.y);
957 +/**
958 + * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
959 + * \brief Operator to perform a greater than on two __hip_bfloat16 numbers
960 + */
961 +__HOST_DEVICE__ bool operator>(const __hip_bfloat162& l, const __hip_bfloat162& r) {
962 + return __hgt(l.x, r.x) && __hgt(l.y, r.y);
965 +/**
966 + * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
967 + * \brief Operator to perform a greater than equal on two __hip_bfloat16 numbers
968 + */
969 +__HOST_DEVICE__ bool operator>=(const __hip_bfloat162& l, const __hip_bfloat162& r) {
970 + return __hge(l.x, r.x) && __hge(l.y, r.y);
974 @@ -974,5 +1334,4 @@ __device__ __hip_bfloat162 h2sqrt(const __hip_bfloat162 h) {
975 __device__ __hip_bfloat162 h2trunc(const __hip_bfloat162 h) {
976 return __hip_bfloat162{htrunc(h.x), htrunc(h.y)};
979 #endif