libclc/generic/lib/gen_convert.py

   1 # OpenCL built-in library: type conversion functions
   2 #
   3 # Copyright (c) 2013 Victor Oliveira <victormatheus@gmail.com>
   4 # Copyright (c) 2013 Jesse Towner <jessetowner@lavabit.com>
   5 # Copyright (c) 2024 Romaric Jodin <rjodin@chromium.org>
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the "Software"), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23 # THE SOFTWARE.
  24
  25 # This script generates the file convert_type.cl, which contains all of the
  26 # OpenCL functions in the form:
  27 #
  28 # convert_<destTypen><_sat><_roundingMode>(<sourceTypen>)
  29
  30 import argparse
  31
  32 parser = argparse.ArgumentParser()
  33 parser.add_argument(
  34     "--clspv", action="store_true", help="Generate the clspv variant of the code"
  35 )
  36 args = parser.parse_args()
  37
  38 clspv = args.clspv
  39
  40 types = [
  41     "char",
  42     "uchar",
  43     "short",
  44     "ushort",
  45     "int",
  46     "uint",
  47     "long",
  48     "ulong",
  49     "half",
  50     "float",
  51     "double",
  52 ]
  53 int_types = ["char", "uchar", "short", "ushort", "int", "uint", "long", "ulong"]
  54 unsigned_types = ["uchar", "ushort", "uint", "ulong"]
  55 float_types = ["half", "float", "double"]
  56 int64_types = ["long", "ulong"]
  57 float64_types = ["double"]
  58 float16_types = ["half"]
  59 vector_sizes = ["", "2", "3", "4", "8", "16"]
  60 half_sizes = [("2", ""), ("4", "2"), ("8", "4"), ("16", "8")]
  61
  62 saturation = ["", "_sat"]
  63 rounding_modes = ["_rtz", "_rte", "_rtp", "_rtn"]
  64
  65 bool_type = {
  66     "char": "char",
  67     "uchar": "char",
  68     "short": "short",
  69     "ushort": "short",
  70     "int": "int",
  71     "uint": "int",
  72     "long": "long",
  73     "ulong": "long",
  74     "half": "short",
  75     "float": "int",
  76     "double": "long",
  77 }
  78
  79 unsigned_type = {
  80     "char": "uchar",
  81     "uchar": "uchar",
  82     "short": "ushort",
  83     "ushort": "ushort",
  84     "int": "uint",
  85     "uint": "uint",
  86     "long": "ulong",
  87     "ulong": "ulong",
  88 }
  89
  90 sizeof_type = {
  91     "char": 1,
  92     "uchar": 1,
  93     "short": 2,
  94     "ushort": 2,
  95     "int": 4,
  96     "uint": 4,
  97     "long": 8,
  98     "ulong": 8,
  99     "half": 2,
 100     "float": 4,
 101     "double": 8,
 102 }
 103
 104 limit_max = {
 105     "char": "CHAR_MAX",
 106     "uchar": "UCHAR_MAX",
 107     "short": "SHRT_MAX",
 108     "ushort": "USHRT_MAX",
 109     "int": "INT_MAX",
 110     "uint": "UINT_MAX",
 111     "long": "LONG_MAX",
 112     "ulong": "ULONG_MAX",
 113     "half": "0x1.ffcp+15",
 114 }
 115
 116 limit_min = {
 117     "char": "CHAR_MIN",
 118     "uchar": "0",
 119     "short": "SHRT_MIN",
 120     "ushort": "0",
 121     "int": "INT_MIN",
 122     "uint": "0",
 123     "long": "LONG_MIN",
 124     "ulong": "0",
 125     "half": "-0x1.ffcp+15",
 126 }
 127
 128
 129 def conditional_guard(src, dst):
 130     int64_count = 0
 131     float64_count = 0
 132     float16_count = 0
 133     if src in int64_types:
 134         int64_count = int64_count + 1
 135     elif src in float64_types:
 136         float64_count = float64_count + 1
 137     elif src in float16_types:
 138         float16_count = float16_count + 1
 139     if dst in int64_types:
 140         int64_count = int64_count + 1
 141     elif dst in float64_types:
 142         float64_count = float64_count + 1
 143     elif dst in float16_types:
 144         float16_count = float16_count + 1
 145     if float64_count > 0 and float16_count > 0:
 146         print("#if defined(cl_khr_fp16) && defined(cl_khr_fp64)")
 147         return True
 148     elif float64_count > 0:
 149         # In embedded profile, if cl_khr_fp64 is supported cles_khr_int64 has to be
 150         print("#ifdef cl_khr_fp64")
 151         return True
 152     elif float16_count > 0:
 153         print("#if defined cl_khr_fp16")
 154         return True
 155     elif int64_count > 0:
 156         print("#if defined cles_khr_int64 || !defined(__EMBEDDED_PROFILE__)")
 157         return True
 158     return False
 159
 160
 161 print(
 162     """/* !!!! AUTOGENERATED FILE generated by convert_type.py !!!!!
 163
 164    DON'T CHANGE THIS FILE. MAKE YOUR CHANGES TO convert_type.py AND RUN:
 165    $ ./generate-conversion-type-cl.sh
 166
 167    OpenCL type conversion functions
 168
 169    Copyright (c) 2013 Victor Oliveira <victormatheus@gmail.com>
 170    Copyright (c) 2013 Jesse Towner <jessetowner@lavabit.com>
 171
 172    Permission is hereby granted, free of charge, to any person obtaining a copy
 173    of this software and associated documentation files (the "Software"), to deal
 174    in the Software without restriction, including without limitation the rights
 175    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 176    copies of the Software, and to permit persons to whom the Software is
 177    furnished to do so, subject to the following conditions:
 178
 179    The above copyright notice and this permission notice shall be included in
 180    all copies or substantial portions of the Software.
 181
 182    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 183    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 184    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 185    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 186    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 187    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 188    THE SOFTWARE.
 189 */
 190
 191 #include <clc/clc.h>
 192
 193 #ifdef cl_khr_fp16
 194 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
 195 #endif
 196
 197 #ifdef cl_khr_fp64
 198 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 199
 200 #if defined(__EMBEDDED_PROFILE__) && !defined(cles_khr_int64)
 201 #error Embedded profile that supports cl_khr_fp64 also has to support cles_khr_int64
 202 #endif
 203
 204 #endif
 205
 206 #ifdef cles_khr_int64
 207 #pragma OPENCL EXTENSION cles_khr_int64 : enable
 208 #endif
 209
 210 """
 211 )
 212
 213 #
 214 # Default Conversions
 215 #
 216 # All conversions are in accordance with the OpenCL specification,
 217 # which cites the C99 conversion rules.
 218 #
 219 # Casting from floating point to integer results in conversions
 220 # with truncation, so it should be suitable for the default convert
 221 # functions.
 222 #
 223 # Conversions from integer to floating-point, and floating-point to
 224 # floating-point through casting is done with the default rounding
 225 # mode. While C99 allows dynamically changing the rounding mode
 226 # during runtime, it is not a supported feature in OpenCL according
 227 # to Section 7.1 - Rounding Modes in the OpenCL 1.2 specification.
 228 #
 229 # Therefore, we can assume for optimization purposes that the
 230 # rounding mode is fixed to round-to-nearest-even. Platform target
 231 # authors should ensure that the rounding-control registers remain
 232 # in this state, and that this invariant holds.
 233 #
 234 # Also note, even though the OpenCL specification isn't entirely
 235 # clear on this matter, we implement all rounding mode combinations
 236 # even for integer-to-integer conversions. When such a conversion
 237 # is used, the rounding mode is ignored.
 238 #
 239
 240
 241 def generate_default_conversion(src, dst, mode):
 242     close_conditional = conditional_guard(src, dst)
 243
 244     for size in vector_sizes:
 245         if not size:
 246             print(
 247                 f"""_CLC_DEF _CLC_OVERLOAD {dst} convert_{dst}{mode}({src} x) {{
 248   return ({dst})x;
 249 }}
 250 """
 251             )
 252         else:
 253             print(
 254                 f"""_CLC_DEF _CLC_OVERLOAD {dst}{size} convert_{dst}{size}{mode}({src}{size} x) {{
 255   return __builtin_convertvector(x, {dst}{size});
 256 }}
 257 """
 258             )
 259
 260     if close_conditional:
 261         print("#endif")
 262
 263
 264 # Do not generate default conversion for clspv as they are handled natively
 265 if not clspv:
 266     for src in types:
 267         for dst in types:
 268             generate_default_conversion(src, dst, "")
 269
 270 for src in int_types:
 271     for dst in int_types:
 272         for mode in rounding_modes:
 273             # Do not generate "_rte" conversion for clspv as they are handled
 274             # natively
 275             if clspv and mode == "_rte":
 276                 continue
 277             generate_default_conversion(src, dst, mode)
 278
 279 #
 280 # Saturated Conversions To Integers
 281 #
 282 # These functions are dependent on the unsaturated conversion functions
 283 # generated above, and use clamp, max, min, and select to eliminate
 284 # branching and vectorize the conversions.
 285 #
 286 # Again, as above, we allow all rounding modes for integer-to-integer
 287 # conversions with saturation.
 288 #
 289
 290
 291 def generate_saturated_conversion(src, dst, size):
 292     # Header
 293     close_conditional = conditional_guard(src, dst)
 294     print(
 295         """_CLC_DEF _CLC_OVERLOAD
 296 {DST}{N} convert_{DST}{N}_sat({SRC}{N} x)
 297 {{""".format(
 298             DST=dst, SRC=src, N=size
 299         )
 300     )
 301
 302     # FIXME: This is a work around for lack of select function with
 303     # signed third argument when the first two arguments are unsigned types.
 304     # We cast to the signed type for sign-extension, then do a bitcast to
 305     # the unsigned type.
 306     if dst in unsigned_types:
 307         bool_prefix = "as_{DST}{N}(convert_{BOOL}{N}".format(
 308             DST=dst, BOOL=bool_type[dst], N=size
 309         )
 310         bool_suffix = ")"
 311     else:
 312         bool_prefix = "convert_{BOOL}{N}".format(BOOL=bool_type[dst], N=size)
 313         bool_suffix = ""
 314
 315     # Body
 316     if src == dst:
 317
 318         # Conversion between same types
 319         print("  return x;")
 320
 321     elif src in float_types:
 322
 323         if clspv:
 324             # Conversion from float to int
 325             print(
 326                 """  {DST}{N} y = convert_{DST}{N}(x);
 327                 y = select(y, ({DST}{N}){DST_MIN}, {BP}(x <= ({SRC}{N}){DST_MIN}){BS});
 328                 y = select(y, ({DST}{N}){DST_MAX}, {BP}(x >= ({SRC}{N}){DST_MAX}){BS});
 329                 return y;""".format(
 330                     SRC=src,
 331                     DST=dst,
 332                     N=size,
 333                     DST_MIN=limit_min[dst],
 334                     DST_MAX=limit_max[dst],
 335                     BP=bool_prefix,
 336                     BS=bool_suffix,
 337                 )
 338             )
 339         else:
 340             # Conversion from float to int
 341             print(
 342                 """  {DST}{N} y = convert_{DST}{N}(x);
 343                 y = select(y, ({DST}{N}){DST_MIN}, {BP}(x < ({SRC}{N}){DST_MIN}){BS});
 344                 y = select(y, ({DST}{N}){DST_MAX}, {BP}(x > ({SRC}{N}){DST_MAX}){BS});
 345                 return y;""".format(
 346                     SRC=src,
 347                     DST=dst,
 348                     N=size,
 349                     DST_MIN=limit_min[dst],
 350                     DST_MAX=limit_max[dst],
 351                     BP=bool_prefix,
 352                     BS=bool_suffix,
 353                 )
 354             )
 355
 356     else:
 357
 358         # Integer to integer convesion with sizeof(src) == sizeof(dst)
 359         if sizeof_type[src] == sizeof_type[dst]:
 360             if src in unsigned_types:
 361                 print(
 362                     "  x = min(x, ({SRC}){DST_MAX});".format(
 363                         SRC=src, DST_MAX=limit_max[dst]
 364                     )
 365                 )
 366             else:
 367                 print("  x = max(x, ({SRC})0);".format(SRC=src))
 368
 369         # Integer to integer conversion where sizeof(src) > sizeof(dst)
 370         elif sizeof_type[src] > sizeof_type[dst]:
 371             if src in unsigned_types:
 372                 print(
 373                     "  x = min(x, ({SRC}){DST_MAX});".format(
 374                         SRC=src, DST_MAX=limit_max[dst]
 375                     )
 376                 )
 377             else:
 378                 print(
 379                     "  x = clamp(x, ({SRC}){DST_MIN}, ({SRC}){DST_MAX});".format(
 380                         SRC=src, DST_MIN=limit_min[dst], DST_MAX=limit_max[dst]
 381                     )
 382                 )
 383
 384         # Integer to integer conversion where sizeof(src) < sizeof(dst)
 385         elif src not in unsigned_types and dst in unsigned_types:
 386             print("  x = max(x, ({SRC})0);".format(SRC=src))
 387
 388         print("  return convert_{DST}{N}(x);".format(DST=dst, N=size))
 389
 390     # Footer
 391     print("}")
 392     if close_conditional:
 393         print("#endif")
 394
 395
 396 for src in types:
 397     for dst in int_types:
 398         for size in vector_sizes:
 399             generate_saturated_conversion(src, dst, size)
 400
 401
 402 def generate_saturated_conversion_with_rounding(src, dst, size, mode):
 403     # Header
 404     close_conditional = conditional_guard(src, dst)
 405
 406     # Body
 407     print(
 408         """_CLC_DEF _CLC_OVERLOAD
 409 {DST}{N} convert_{DST}{N}_sat{M}({SRC}{N} x)
 410 {{
 411   return convert_{DST}{N}_sat(x);
 412 }}
 413 """.format(
 414             DST=dst, SRC=src, N=size, M=mode
 415         )
 416     )
 417
 418     # Footer
 419     if close_conditional:
 420         print("#endif")
 421
 422
 423 for src in int_types:
 424     for dst in int_types:
 425         for size in vector_sizes:
 426             for mode in rounding_modes:
 427                 generate_saturated_conversion_with_rounding(src, dst, size, mode)
 428
 429 #
 430 # Conversions To/From Floating-Point With Rounding
 431 #
 432 # Note that we assume as above that casts from floating-point to
 433 # integer are done with truncation, and that the default rounding
 434 # mode is fixed to round-to-nearest-even, as per C99 and OpenCL
 435 # rounding rules.
 436 #
 437 # These functions rely on the use of abs, ceil, fabs, floor,
 438 # nextafter, sign, rint and the above generated conversion functions.
 439 #
 440 # Only conversions to integers can have saturation.
 441 #
 442
 443
 444 def generate_float_conversion(src, dst, size, mode, sat):
 445     # Header
 446     close_conditional = conditional_guard(src, dst)
 447     print(
 448         """_CLC_DEF _CLC_OVERLOAD
 449 {DST}{N} convert_{DST}{N}{S}{M}({SRC}{N} x)
 450 {{""".format(
 451             SRC=src, DST=dst, N=size, M=mode, S=sat
 452         )
 453     )
 454
 455     # Perform conversion
 456     if dst in int_types:
 457         if mode == "_rte":
 458             print("  x = rint(x);")
 459         elif mode == "_rtp":
 460             print("  x = ceil(x);")
 461         elif mode == "_rtn":
 462             print("  x = floor(x);")
 463         print("  return convert_{DST}{N}{S}(x);".format(DST=dst, N=size, S=sat))
 464     elif mode == "_rte":
 465         print("  return convert_{DST}{N}(x);".format(DST=dst, N=size))
 466     else:
 467         print("  {DST}{N} r = convert_{DST}{N}(x);".format(DST=dst, N=size))
 468         if clspv:
 469             print("  {SRC}{N} y = convert_{SRC}{N}_sat(r);".format(SRC=src, N=size))
 470         else:
 471             print("  {SRC}{N} y = convert_{SRC}{N}(r);".format(SRC=src, N=size))
 472         if mode == "_rtz":
 473             if src in int_types:
 474                 print(
 475                     "  {USRC}{N} abs_x = abs(x);".format(
 476                         USRC=unsigned_type[src], N=size
 477                     )
 478                 )
 479                 print(
 480                     "  {USRC}{N} abs_y = abs(y);".format(
 481                         USRC=unsigned_type[src], N=size
 482                     )
 483                 )
 484             else:
 485                 print("  {SRC}{N} abs_x = fabs(x);".format(SRC=src, N=size))
 486                 print("  {SRC}{N} abs_y = fabs(y);".format(SRC=src, N=size))
 487             if clspv:
 488                 print(
 489                     "  {BOOL}{N} c = convert_{BOOL}{N}(abs_y > abs_x);".format(
 490                         BOOL=bool_type[dst], N=size
 491                     )
 492                 )
 493                 if sizeof_type[src] >= 4 and src in int_types:
 494                     print(
 495                         "  c = c || convert_{BOOL}{N}(({SRC}{N}){SRC_MAX} == x);".format(
 496                             BOOL=bool_type[dst], N=size, SRC=src, SRC_MAX=limit_max[src]
 497                         )
 498                     )
 499                 print(
 500                     "  {DST}{N} sel = select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), c);".format(
 501                         DST=dst, N=size, BOOL=bool_type[dst], SRC=src
 502                     )
 503                 )
 504             else:
 505                 print(
 506                     "  {DST}{N} sel = select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), convert_{BOOL}{N}(abs_y > abs_x));".format(
 507                         DST=dst, N=size, BOOL=bool_type[dst]
 508                     )
 509                 )
 510             if dst == "half" and src in int_types and sizeof_type[src] >= 2:
 511                 dst_max = limit_max[dst]
 512                 # short is 16 bits signed, so the maximum value rounded to zero is 0x1.ffcp+14 (0x1p+15 == 32768 > 0x7fff == 32767)
 513                 if src == "short":
 514                     dst_max = "0x1.ffcp+14"
 515                 print(
 516                     "  return clamp(sel, ({DST}{N}){DST_MIN}, ({DST}{N}){DST_MAX});".format(
 517                         DST=dst, N=size, DST_MIN=limit_min[dst], DST_MAX=dst_max
 518                     )
 519                 )
 520             else:
 521                 print("  return sel;")
 522         if mode == "_rtp":
 523             print(
 524                 "  {DST}{N} sel = select(r, nextafter(r, ({DST}{N})INFINITY), convert_{BOOL}{N}(y < x));".format(
 525                     DST=dst, N=size, BOOL=bool_type[dst]
 526                 )
 527             )
 528             if dst == "half" and src in int_types and sizeof_type[src] >= 2:
 529                 print(
 530                     "  return max(sel, ({DST}{N}){DST_MIN});".format(
 531                         DST=dst, N=size, DST_MIN=limit_min[dst]
 532                     )
 533                 )
 534             else:
 535                 print("  return sel;")
 536         if mode == "_rtn":
 537             if clspv:
 538                 print(
 539                     "  {BOOL}{N} c = convert_{BOOL}{N}(y > x);".format(
 540                         BOOL=bool_type[dst], N=size
 541                     )
 542                 )
 543                 if sizeof_type[src] >= 4 and src in int_types:
 544                     print(
 545                         "  c = c || convert_{BOOL}{N}(({SRC}{N}){SRC_MAX} == x);".format(
 546                             BOOL=bool_type[dst], N=size, SRC=src, SRC_MAX=limit_max[src]
 547                         )
 548                     )
 549                 print(
 550                     "  {DST}{N} sel = select(r, nextafter(r, ({DST}{N})-INFINITY), c);".format(
 551                         DST=dst, N=size, BOOL=bool_type[dst], SRC=src
 552                     )
 553                 )
 554             else:
 555                 print(
 556                     "  {DST}{N} sel = select(r, nextafter(r, ({DST}{N})-INFINITY), convert_{BOOL}{N}(y > x));".format(
 557                         DST=dst, N=size, BOOL=bool_type[dst]
 558                     )
 559                 )
 560             if dst == "half" and src in int_types and sizeof_type[src] >= 2:
 561                 dst_max = limit_max[dst]
 562                 # short is 16 bits signed, so the maximum value rounded to negative infinity is 0x1.ffcp+14 (0x1p+15 == 32768 > 0x7fff == 32767)
 563                 if src == "short":
 564                     dst_max = "0x1.ffcp+14"
 565                 print(
 566                     "  return min(sel, ({DST}{N}){DST_MAX});".format(
 567                         DST=dst, N=size, DST_MAX=dst_max
 568                     )
 569                 )
 570             else:
 571                 print("  return sel;")
 572
 573     # Footer
 574     print("}")
 575     if close_conditional:
 576         print("#endif")
 577
 578
 579 for src in float_types:
 580     for dst in int_types:
 581         for size in vector_sizes:
 582             for mode in rounding_modes:
 583                 for sat in saturation:
 584                     generate_float_conversion(src, dst, size, mode, sat)
 585
 586
 587 for src in types:
 588     for dst in float_types:
 589         for size in vector_sizes:
 590             for mode in rounding_modes:
 591                 # Do not generate "_rte" conversion for clspv as they are
 592                 # handled natively
 593                 if clspv and mode == "_rte":
 594                     continue
 595                 generate_float_conversion(src, dst, size, mode, "")