1 # OpenCL built-in library: type conversion functions
3 # Copyright (c) 2013 Victor Oliveira <victormatheus@gmail.com>
4 # Copyright (c) 2013 Jesse Towner <jessetowner@lavabit.com>
5 # Copyright (c) 2024 Romaric Jodin <rjodin@chromium.org>
7 # Permission is hereby granted, free of charge, to any person obtaining a copy
8 # of this software and associated documentation files (the "Software"), to deal
9 # in the Software without restriction, including without limitation the rights
10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 # copies of the Software, and to permit persons to whom the Software is
12 # furnished to do so, subject to the following conditions:
14 # The above copyright notice and this permission notice shall be included in
15 # all copies or substantial portions of the Software.
17 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 # This script generates the file convert_type.cl, which contains all of the
26 # OpenCL functions in the form:
28 # convert_<destTypen><_sat><_roundingMode>(<sourceTypen>)
32 parser
= argparse
.ArgumentParser()
34 "--clspv", action
="store_true", help="Generate the clspv variant of the code"
36 args
= parser
.parse_args()
53 int_types
= ["char", "uchar", "short", "ushort", "int", "uint", "long", "ulong"]
54 unsigned_types
= ["uchar", "ushort", "uint", "ulong"]
55 float_types
= ["half", "float", "double"]
56 int64_types
= ["long", "ulong"]
57 float64_types
= ["double"]
58 float16_types
= ["half"]
59 vector_sizes
= ["", "2", "3", "4", "8", "16"]
60 half_sizes
= [("2", ""), ("4", "2"), ("8", "4"), ("16", "8")]
62 saturation
= ["", "_sat"]
63 rounding_modes
= ["_rtz", "_rte", "_rtp", "_rtn"]
106 "uchar": "UCHAR_MAX",
108 "ushort": "USHRT_MAX",
112 "ulong": "ULONG_MAX",
113 "half": "0x1.ffcp+15",
125 "half": "-0x1.ffcp+15",
129 def conditional_guard(src
, dst
):
133 if src
in int64_types
:
134 int64_count
= int64_count
+ 1
135 elif src
in float64_types
:
136 float64_count
= float64_count
+ 1
137 elif src
in float16_types
:
138 float16_count
= float16_count
+ 1
139 if dst
in int64_types
:
140 int64_count
= int64_count
+ 1
141 elif dst
in float64_types
:
142 float64_count
= float64_count
+ 1
143 elif dst
in float16_types
:
144 float16_count
= float16_count
+ 1
145 if float64_count
> 0 and float16_count
> 0:
146 print("#if defined(cl_khr_fp16) && defined(cl_khr_fp64)")
148 elif float64_count
> 0:
149 # In embedded profile, if cl_khr_fp64 is supported cles_khr_int64 has to be
150 print("#ifdef cl_khr_fp64")
152 elif float16_count
> 0:
153 print("#if defined cl_khr_fp16")
155 elif int64_count
> 0:
156 print("#if defined cles_khr_int64 || !defined(__EMBEDDED_PROFILE__)")
162 """/* !!!! AUTOGENERATED FILE generated by convert_type.py !!!!!
164 DON'T CHANGE THIS FILE. MAKE YOUR CHANGES TO convert_type.py AND RUN:
165 $ ./generate-conversion-type-cl.sh
167 OpenCL type conversion functions
169 Copyright (c) 2013 Victor Oliveira <victormatheus@gmail.com>
170 Copyright (c) 2013 Jesse Towner <jessetowner@lavabit.com>
172 Permission is hereby granted, free of charge, to any person obtaining a copy
173 of this software and associated documentation files (the "Software"), to deal
174 in the Software without restriction, including without limitation the rights
175 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
176 copies of the Software, and to permit persons to whom the Software is
177 furnished to do so, subject to the following conditions:
179 The above copyright notice and this permission notice shall be included in
180 all copies or substantial portions of the Software.
182 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
183 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
184 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
185 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
186 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
187 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
194 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
198 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
200 #if defined(__EMBEDDED_PROFILE__) && !defined(cles_khr_int64)
201 #error Embedded profile that supports cl_khr_fp64 also has to support cles_khr_int64
206 #ifdef cles_khr_int64
207 #pragma OPENCL EXTENSION cles_khr_int64 : enable
214 # Default Conversions
216 # All conversions are in accordance with the OpenCL specification,
217 # which cites the C99 conversion rules.
219 # Casting from floating point to integer results in conversions
220 # with truncation, so it should be suitable for the default convert
223 # Conversions from integer to floating-point, and floating-point to
224 # floating-point through casting is done with the default rounding
225 # mode. While C99 allows dynamically changing the rounding mode
226 # during runtime, it is not a supported feature in OpenCL according
227 # to Section 7.1 - Rounding Modes in the OpenCL 1.2 specification.
229 # Therefore, we can assume for optimization purposes that the
230 # rounding mode is fixed to round-to-nearest-even. Platform target
231 # authors should ensure that the rounding-control registers remain
232 # in this state, and that this invariant holds.
234 # Also note, even though the OpenCL specification isn't entirely
235 # clear on this matter, we implement all rounding mode combinations
236 # even for integer-to-integer conversions. When such a conversion
237 # is used, the rounding mode is ignored.
241 def generate_default_conversion(src
, dst
, mode
):
242 close_conditional
= conditional_guard(src
, dst
)
244 for size
in vector_sizes
:
247 f
"""_CLC_DEF _CLC_OVERLOAD {dst} convert_{dst}{mode}({src} x) {{
254 f
"""_CLC_DEF _CLC_OVERLOAD {dst}{size} convert_{dst}{size}{mode}({src}{size} x) {{
255 return __builtin_convertvector(x, {dst}{size});
260 if close_conditional
:
264 # Do not generate default conversion for clspv as they are handled natively
268 generate_default_conversion(src
, dst
, "")
270 for src
in int_types
:
271 for dst
in int_types
:
272 for mode
in rounding_modes
:
273 # Do not generate "_rte" conversion for clspv as they are handled
275 if clspv
and mode
== "_rte":
277 generate_default_conversion(src
, dst
, mode
)
280 # Saturated Conversions To Integers
282 # These functions are dependent on the unsaturated conversion functions
283 # generated above, and use clamp, max, min, and select to eliminate
284 # branching and vectorize the conversions.
286 # Again, as above, we allow all rounding modes for integer-to-integer
287 # conversions with saturation.
291 def generate_saturated_conversion(src
, dst
, size
):
293 close_conditional
= conditional_guard(src
, dst
)
295 """_CLC_DEF _CLC_OVERLOAD
296 {DST}{N} convert_{DST}{N}_sat({SRC}{N} x)
298 DST
=dst
, SRC
=src
, N
=size
302 # FIXME: This is a work around for lack of select function with
303 # signed third argument when the first two arguments are unsigned types.
304 # We cast to the signed type for sign-extension, then do a bitcast to
306 if dst
in unsigned_types
:
307 bool_prefix
= "as_{DST}{N}(convert_{BOOL}{N}".format(
308 DST
=dst
, BOOL
=bool_type
[dst
], N
=size
312 bool_prefix
= "convert_{BOOL}{N}".format(BOOL
=bool_type
[dst
], N
=size
)
318 # Conversion between same types
321 elif src
in float_types
:
324 # Conversion from float to int
326 """ {DST}{N} y = convert_{DST}{N}(x);
327 y = select(y, ({DST}{N}){DST_MIN}, {BP}(x <= ({SRC}{N}){DST_MIN}){BS});
328 y = select(y, ({DST}{N}){DST_MAX}, {BP}(x >= ({SRC}{N}){DST_MAX}){BS});
333 DST_MIN
=limit_min
[dst
],
334 DST_MAX
=limit_max
[dst
],
340 # Conversion from float to int
342 """ {DST}{N} y = convert_{DST}{N}(x);
343 y = select(y, ({DST}{N}){DST_MIN}, {BP}(x < ({SRC}{N}){DST_MIN}){BS});
344 y = select(y, ({DST}{N}){DST_MAX}, {BP}(x > ({SRC}{N}){DST_MAX}){BS});
349 DST_MIN
=limit_min
[dst
],
350 DST_MAX
=limit_max
[dst
],
358 # Integer to integer convesion with sizeof(src) == sizeof(dst)
359 if sizeof_type
[src
] == sizeof_type
[dst
]:
360 if src
in unsigned_types
:
362 " x = min(x, ({SRC}){DST_MAX});".format(
363 SRC
=src
, DST_MAX
=limit_max
[dst
]
367 print(" x = max(x, ({SRC})0);".format(SRC
=src
))
369 # Integer to integer conversion where sizeof(src) > sizeof(dst)
370 elif sizeof_type
[src
] > sizeof_type
[dst
]:
371 if src
in unsigned_types
:
373 " x = min(x, ({SRC}){DST_MAX});".format(
374 SRC
=src
, DST_MAX
=limit_max
[dst
]
379 " x = clamp(x, ({SRC}){DST_MIN}, ({SRC}){DST_MAX});".format(
380 SRC
=src
, DST_MIN
=limit_min
[dst
], DST_MAX
=limit_max
[dst
]
384 # Integer to integer conversion where sizeof(src) < sizeof(dst)
385 elif src
not in unsigned_types
and dst
in unsigned_types
:
386 print(" x = max(x, ({SRC})0);".format(SRC
=src
))
388 print(" return convert_{DST}{N}(x);".format(DST
=dst
, N
=size
))
392 if close_conditional
:
397 for dst
in int_types
:
398 for size
in vector_sizes
:
399 generate_saturated_conversion(src
, dst
, size
)
402 def generate_saturated_conversion_with_rounding(src
, dst
, size
, mode
):
404 close_conditional
= conditional_guard(src
, dst
)
408 """_CLC_DEF _CLC_OVERLOAD
409 {DST}{N} convert_{DST}{N}_sat{M}({SRC}{N} x)
411 return convert_{DST}{N}_sat(x);
414 DST
=dst
, SRC
=src
, N
=size
, M
=mode
419 if close_conditional
:
423 for src
in int_types
:
424 for dst
in int_types
:
425 for size
in vector_sizes
:
426 for mode
in rounding_modes
:
427 generate_saturated_conversion_with_rounding(src
, dst
, size
, mode
)
430 # Conversions To/From Floating-Point With Rounding
432 # Note that we assume as above that casts from floating-point to
433 # integer are done with truncation, and that the default rounding
434 # mode is fixed to round-to-nearest-even, as per C99 and OpenCL
437 # These functions rely on the use of abs, ceil, fabs, floor,
438 # nextafter, sign, rint and the above generated conversion functions.
440 # Only conversions to integers can have saturation.
444 def generate_float_conversion(src
, dst
, size
, mode
, sat
):
446 close_conditional
= conditional_guard(src
, dst
)
448 """_CLC_DEF _CLC_OVERLOAD
449 {DST}{N} convert_{DST}{N}{S}{M}({SRC}{N} x)
451 SRC
=src
, DST
=dst
, N
=size
, M
=mode
, S
=sat
458 print(" x = rint(x);")
460 print(" x = ceil(x);")
462 print(" x = floor(x);")
463 print(" return convert_{DST}{N}{S}(x);".format(DST
=dst
, N
=size
, S
=sat
))
465 print(" return convert_{DST}{N}(x);".format(DST
=dst
, N
=size
))
467 print(" {DST}{N} r = convert_{DST}{N}(x);".format(DST
=dst
, N
=size
))
469 print(" {SRC}{N} y = convert_{SRC}{N}_sat(r);".format(SRC
=src
, N
=size
))
471 print(" {SRC}{N} y = convert_{SRC}{N}(r);".format(SRC
=src
, N
=size
))
475 " {USRC}{N} abs_x = abs(x);".format(
476 USRC
=unsigned_type
[src
], N
=size
480 " {USRC}{N} abs_y = abs(y);".format(
481 USRC
=unsigned_type
[src
], N
=size
485 print(" {SRC}{N} abs_x = fabs(x);".format(SRC
=src
, N
=size
))
486 print(" {SRC}{N} abs_y = fabs(y);".format(SRC
=src
, N
=size
))
489 " {BOOL}{N} c = convert_{BOOL}{N}(abs_y > abs_x);".format(
490 BOOL
=bool_type
[dst
], N
=size
493 if sizeof_type
[src
] >= 4 and src
in int_types
:
495 " c = c || convert_{BOOL}{N}(({SRC}{N}){SRC_MAX} == x);".format(
496 BOOL
=bool_type
[dst
], N
=size
, SRC
=src
, SRC_MAX
=limit_max
[src
]
500 " {DST}{N} sel = select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), c);".format(
501 DST
=dst
, N
=size
, BOOL
=bool_type
[dst
], SRC
=src
506 " {DST}{N} sel = select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), convert_{BOOL}{N}(abs_y > abs_x));".format(
507 DST
=dst
, N
=size
, BOOL
=bool_type
[dst
]
510 if dst
== "half" and src
in int_types
and sizeof_type
[src
] >= 2:
511 dst_max
= limit_max
[dst
]
512 # short is 16 bits signed, so the maximum value rounded to zero is 0x1.ffcp+14 (0x1p+15 == 32768 > 0x7fff == 32767)
514 dst_max
= "0x1.ffcp+14"
516 " return clamp(sel, ({DST}{N}){DST_MIN}, ({DST}{N}){DST_MAX});".format(
517 DST
=dst
, N
=size
, DST_MIN
=limit_min
[dst
], DST_MAX
=dst_max
521 print(" return sel;")
524 " {DST}{N} sel = select(r, nextafter(r, ({DST}{N})INFINITY), convert_{BOOL}{N}(y < x));".format(
525 DST
=dst
, N
=size
, BOOL
=bool_type
[dst
]
528 if dst
== "half" and src
in int_types
and sizeof_type
[src
] >= 2:
530 " return max(sel, ({DST}{N}){DST_MIN});".format(
531 DST
=dst
, N
=size
, DST_MIN
=limit_min
[dst
]
535 print(" return sel;")
539 " {BOOL}{N} c = convert_{BOOL}{N}(y > x);".format(
540 BOOL
=bool_type
[dst
], N
=size
543 if sizeof_type
[src
] >= 4 and src
in int_types
:
545 " c = c || convert_{BOOL}{N}(({SRC}{N}){SRC_MAX} == x);".format(
546 BOOL
=bool_type
[dst
], N
=size
, SRC
=src
, SRC_MAX
=limit_max
[src
]
550 " {DST}{N} sel = select(r, nextafter(r, ({DST}{N})-INFINITY), c);".format(
551 DST
=dst
, N
=size
, BOOL
=bool_type
[dst
], SRC
=src
556 " {DST}{N} sel = select(r, nextafter(r, ({DST}{N})-INFINITY), convert_{BOOL}{N}(y > x));".format(
557 DST
=dst
, N
=size
, BOOL
=bool_type
[dst
]
560 if dst
== "half" and src
in int_types
and sizeof_type
[src
] >= 2:
561 dst_max
= limit_max
[dst
]
562 # short is 16 bits signed, so the maximum value rounded to negative infinity is 0x1.ffcp+14 (0x1p+15 == 32768 > 0x7fff == 32767)
564 dst_max
= "0x1.ffcp+14"
566 " return min(sel, ({DST}{N}){DST_MAX});".format(
567 DST
=dst
, N
=size
, DST_MAX
=dst_max
571 print(" return sel;")
575 if close_conditional
:
579 for src
in float_types
:
580 for dst
in int_types
:
581 for size
in vector_sizes
:
582 for mode
in rounding_modes
:
583 for sat
in saturation
:
584 generate_float_conversion(src
, dst
, size
, mode
, sat
)
588 for dst
in float_types
:
589 for size
in vector_sizes
:
590 for mode
in rounding_modes
:
591 # Do not generate "_rte" conversion for clspv as they are
593 if clspv
and mode
== "_rte":
595 generate_float_conversion(src
, dst
, size
, mode
, "")