src/common/tuklib_integer.h

   1 ///////////////////////////////////////////////////////////////////////////////
   2 //
   3 /// \file       tuklib_integer.h
   4 /// \brief      Various integer and bit operations
   5 ///
   6 /// This file provides macros or functions to do some basic integer and bit
   7 /// operations.
   8 ///
   9 /// Endianness related integer operations (XX = 16, 32, or 64; Y = b or l):
  10 ///   - Byte swapping: bswapXX(num)
  11 ///   - Byte order conversions to/from native: convXXYe(num)
  12 ///   - Aligned reads: readXXYe(ptr)
  13 ///   - Aligned writes: writeXXYe(ptr, num)
  14 ///   - Unaligned reads (16/32-bit only): unaligned_readXXYe(ptr)
  15 ///   - Unaligned writes (16/32-bit only): unaligned_writeXXYe(ptr, num)
  16 ///
  17 /// Since they can macros, the arguments should have no side effects since
  18 /// they may be evaluated more than once.
  19 ///
  20 /// \todo       PowerPC and possibly some other architectures support
  21 ///             byte swapping load and store instructions. This file
  22 ///             doesn't take advantage of those instructions.
  23 ///
  24 /// Bit scan operations for non-zero 32-bit integers:
  25 ///   - Bit scan reverse (find highest non-zero bit): bsr32(num)
  26 ///   - Count leading zeros: clz32(num)
  27 ///   - Count trailing zeros: ctz32(num)
  28 ///   - Bit scan forward (simply an alias for ctz32()): bsf32(num)
  29 ///
  30 /// The above bit scan operations return 0-31. If num is zero,
  31 /// the result is undefined.
  32 //
  33 //  Authors:    Lasse Collin
  34 //              Joachim Henke
  35 //
  36 //  This file has been put into the public domain.
  37 //  You can do whatever you want with this file.
  38 //
  39 ///////////////////////////////////////////////////////////////////////////////
  40
  41 #ifndef TUKLIB_INTEGER_H
  42 #define TUKLIB_INTEGER_H
  43
  44 #include "tuklib_common.h"
  45
  46
  47 ////////////////////////////////////////
  48 // Operating system specific features //
  49 ////////////////////////////////////////
  50
  51 #if defined(HAVE_BYTESWAP_H)
  52         // glibc, uClibc, dietlibc
  53 #       include <byteswap.h>
  54 #       ifdef HAVE_BSWAP_16
  55 #               define bswap16(num) bswap_16(num)
  56 #       endif
  57 #       ifdef HAVE_BSWAP_32
  58 #               define bswap32(num) bswap_32(num)
  59 #       endif
  60 #       ifdef HAVE_BSWAP_64
  61 #               define bswap64(num) bswap_64(num)
  62 #       endif
  63
  64 #elif defined(HAVE_SYS_ENDIAN_H)
  65         // *BSDs and Darwin
  66 #       include <sys/endian.h>
  67
  68 #elif defined(HAVE_SYS_BYTEORDER_H)
  69         // Solaris
  70 #       include <sys/byteorder.h>
  71 #       ifdef BSWAP_16
  72 #               define bswap16(num) BSWAP_16(num)
  73 #       endif
  74 #       ifdef BSWAP_32
  75 #               define bswap32(num) BSWAP_32(num)
  76 #       endif
  77 #       ifdef BSWAP_64
  78 #               define bswap64(num) BSWAP_64(num)
  79 #       endif
  80 #       ifdef BE_16
  81 #               define conv16be(num) BE_16(num)
  82 #       endif
  83 #       ifdef BE_32
  84 #               define conv32be(num) BE_32(num)
  85 #       endif
  86 #       ifdef BE_64
  87 #               define conv64be(num) BE_64(num)
  88 #       endif
  89 #       ifdef LE_16
  90 #               define conv16le(num) LE_16(num)
  91 #       endif
  92 #       ifdef LE_32
  93 #               define conv32le(num) LE_32(num)
  94 #       endif
  95 #       ifdef LE_64
  96 #               define conv64le(num) LE_64(num)
  97 #       endif
  98 #endif
  99
 100
 101 ////////////////////////////////
 102 // Compiler-specific features //
 103 ////////////////////////////////
 104
 105 // Newer Intel C compilers require immintrin.h for _bit_scan_reverse()
 106 // and such functions.
 107 #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1500)
 108 #       include <immintrin.h>
 109 #endif
 110
 111
 112 ///////////////////
 113 // Byte swapping //
 114 ///////////////////
 115
 116 #ifndef bswap16
 117 #       define bswap16(num) \
 118                 (((uint16_t)(num) << 8) | ((uint16_t)(num) >> 8))
 119 #endif
 120
 121 #ifndef bswap32
 122 #       define bswap32(num) \
 123                 ( (((uint32_t)(num) << 24)                       ) \
 124                 | (((uint32_t)(num) <<  8) & UINT32_C(0x00FF0000)) \
 125                 | (((uint32_t)(num) >>  8) & UINT32_C(0x0000FF00)) \
 126                 | (((uint32_t)(num) >> 24)                       ) )
 127 #endif
 128
 129 #ifndef bswap64
 130 #       define bswap64(num) \
 131                 ( (((uint64_t)(num) << 56)                               ) \
 132                 | (((uint64_t)(num) << 40) & UINT64_C(0x00FF000000000000)) \
 133                 | (((uint64_t)(num) << 24) & UINT64_C(0x0000FF0000000000)) \
 134                 | (((uint64_t)(num) <<  8) & UINT64_C(0x000000FF00000000)) \
 135                 | (((uint64_t)(num) >>  8) & UINT64_C(0x00000000FF000000)) \
 136                 | (((uint64_t)(num) >> 24) & UINT64_C(0x0000000000FF0000)) \
 137                 | (((uint64_t)(num) >> 40) & UINT64_C(0x000000000000FF00)) \
 138                 | (((uint64_t)(num) >> 56)                               ) )
 139 #endif
 140
 141 // Define conversion macros using the basic byte swapping macros.
 142 #ifdef WORDS_BIGENDIAN
 143 #       ifndef conv16be
 144 #               define conv16be(num) ((uint16_t)(num))
 145 #       endif
 146 #       ifndef conv32be
 147 #               define conv32be(num) ((uint32_t)(num))
 148 #       endif
 149 #       ifndef conv64be
 150 #               define conv64be(num) ((uint64_t)(num))
 151 #       endif
 152 #       ifndef conv16le
 153 #               define conv16le(num) bswap16(num)
 154 #       endif
 155 #       ifndef conv32le
 156 #               define conv32le(num) bswap32(num)
 157 #       endif
 158 #       ifndef conv64le
 159 #               define conv64le(num) bswap64(num)
 160 #       endif
 161 #else
 162 #       ifndef conv16be
 163 #               define conv16be(num) bswap16(num)
 164 #       endif
 165 #       ifndef conv32be
 166 #               define conv32be(num) bswap32(num)
 167 #       endif
 168 #       ifndef conv64be
 169 #               define conv64be(num) bswap64(num)
 170 #       endif
 171 #       ifndef conv16le
 172 #               define conv16le(num) ((uint16_t)(num))
 173 #       endif
 174 #       ifndef conv32le
 175 #               define conv32le(num) ((uint32_t)(num))
 176 #       endif
 177 #       ifndef conv64le
 178 #               define conv64le(num) ((uint64_t)(num))
 179 #       endif
 180 #endif
 181
 182
 183 //////////////////////////////
 184 // Aligned reads and writes //
 185 //////////////////////////////
 186
 187 static inline uint16_t
 188 read16be(const uint8_t *buf)
 189 {
 190         uint16_t num = *(const uint16_t *)buf;
 191         return conv16be(num);
 192 }
 193
 194
 195 static inline uint16_t
 196 read16le(const uint8_t *buf)
 197 {
 198         uint16_t num = *(const uint16_t *)buf;
 199         return conv16le(num);
 200 }
 201
 202
 203 static inline uint32_t
 204 read32be(const uint8_t *buf)
 205 {
 206         uint32_t num = *(const uint32_t *)buf;
 207         return conv32be(num);
 208 }
 209
 210
 211 static inline uint32_t
 212 read32le(const uint8_t *buf)
 213 {
 214         uint32_t num = *(const uint32_t *)buf;
 215         return conv32le(num);
 216 }
 217
 218
 219 static inline uint64_t
 220 read64be(const uint8_t *buf)
 221 {
 222         uint64_t num = *(const uint64_t *)buf;
 223         return conv64be(num);
 224 }
 225
 226
 227 static inline uint64_t
 228 read64le(const uint8_t *buf)
 229 {
 230         uint64_t num = *(const uint64_t *)buf;
 231         return conv64le(num);
 232 }
 233
 234
 235 // NOTE: Possible byte swapping must be done in a macro to allow GCC
 236 // to optimize byte swapping of constants when using glibc's or *BSD's
 237 // byte swapping macros. The actual write is done in an inline function
 238 // to make type checking of the buf pointer possible similarly to readXXYe()
 239 // functions.
 240
 241 #define write16be(buf, num) write16ne((buf), conv16be(num))
 242 #define write16le(buf, num) write16ne((buf), conv16le(num))
 243 #define write32be(buf, num) write32ne((buf), conv32be(num))
 244 #define write32le(buf, num) write32ne((buf), conv32le(num))
 245 #define write64be(buf, num) write64ne((buf), conv64be(num))
 246 #define write64le(buf, num) write64ne((buf), conv64le(num))
 247
 248
 249 static inline void
 250 write16ne(uint8_t *buf, uint16_t num)
 251 {
 252         *(uint16_t *)buf = num;
 253         return;
 254 }
 255
 256
 257 static inline void
 258 write32ne(uint8_t *buf, uint32_t num)
 259 {
 260         *(uint32_t *)buf = num;
 261         return;
 262 }
 263
 264
 265 static inline void
 266 write64ne(uint8_t *buf, uint64_t num)
 267 {
 268         *(uint64_t *)buf = num;
 269         return;
 270 }
 271
 272
 273 ////////////////////////////////
 274 // Unaligned reads and writes //
 275 ////////////////////////////////
 276
 277 // NOTE: TUKLIB_FAST_UNALIGNED_ACCESS indicates only support for 16-bit and
 278 // 32-bit unaligned integer loads and stores. It's possible that 64-bit
 279 // unaligned access doesn't work or is slower than byte-by-byte access.
 280 // Since unaligned 64-bit is probably not needed as often as 16-bit or
 281 // 32-bit, we simply don't support 64-bit unaligned access for now.
 282 #ifdef TUKLIB_FAST_UNALIGNED_ACCESS
 283 #       define unaligned_read16be read16be
 284 #       define unaligned_read16le read16le
 285 #       define unaligned_read32be read32be
 286 #       define unaligned_read32le read32le
 287 #       define unaligned_write16be write16be
 288 #       define unaligned_write16le write16le
 289 #       define unaligned_write32be write32be
 290 #       define unaligned_write32le write32le
 291
 292 #else
 293
 294 static inline uint16_t
 295 unaligned_read16be(const uint8_t *buf)
 296 {
 297         uint16_t num = ((uint16_t)buf[0] << 8) | (uint16_t)buf[1];
 298         return num;
 299 }
 300
 301
 302 static inline uint16_t
 303 unaligned_read16le(const uint8_t *buf)
 304 {
 305         uint16_t num = ((uint16_t)buf[0]) | ((uint16_t)buf[1] << 8);
 306         return num;
 307 }
 308
 309
 310 static inline uint32_t
 311 unaligned_read32be(const uint8_t *buf)
 312 {
 313         uint32_t num = (uint32_t)buf[0] << 24;
 314         num |= (uint32_t)buf[1] << 16;
 315         num |= (uint32_t)buf[2] << 8;
 316         num |= (uint32_t)buf[3];
 317         return num;
 318 }
 319
 320
 321 static inline uint32_t
 322 unaligned_read32le(const uint8_t *buf)
 323 {
 324         uint32_t num = (uint32_t)buf[0];
 325         num |= (uint32_t)buf[1] << 8;
 326         num |= (uint32_t)buf[2] << 16;
 327         num |= (uint32_t)buf[3] << 24;
 328         return num;
 329 }
 330
 331
 332 static inline void
 333 unaligned_write16be(uint8_t *buf, uint16_t num)
 334 {
 335         buf[0] = (uint8_t)(num >> 8);
 336         buf[1] = (uint8_t)num;
 337         return;
 338 }
 339
 340
 341 static inline void
 342 unaligned_write16le(uint8_t *buf, uint16_t num)
 343 {
 344         buf[0] = (uint8_t)num;
 345         buf[1] = (uint8_t)(num >> 8);
 346         return;
 347 }
 348
 349
 350 static inline void
 351 unaligned_write32be(uint8_t *buf, uint32_t num)
 352 {
 353         buf[0] = (uint8_t)(num >> 24);
 354         buf[1] = (uint8_t)(num >> 16);
 355         buf[2] = (uint8_t)(num >> 8);
 356         buf[3] = (uint8_t)num;
 357         return;
 358 }
 359
 360
 361 static inline void
 362 unaligned_write32le(uint8_t *buf, uint32_t num)
 363 {
 364         buf[0] = (uint8_t)num;
 365         buf[1] = (uint8_t)(num >> 8);
 366         buf[2] = (uint8_t)(num >> 16);
 367         buf[3] = (uint8_t)(num >> 24);
 368         return;
 369 }
 370
 371 #endif
 372
 373
 374 static inline uint32_t
 375 bsr32(uint32_t n)
 376 {
 377         // Check for ICC first, since it tends to define __GNUC__ too.
 378 #if defined(__INTEL_COMPILER)
 379         return _bit_scan_reverse(n);
 380
 381 #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX
 382         // GCC >= 3.4 has __builtin_clz(), which gives good results on
 383         // multiple architectures. On x86, __builtin_clz() ^ 31U becomes
 384         // either plain BSR (so the XOR gets optimized away) or LZCNT and
 385         // XOR (if -march indicates that SSE4a instructions are supported).
 386         return __builtin_clz(n) ^ 31U;
 387
 388 #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
 389         uint32_t i;
 390         __asm__("bsrl %1, %0" : "=r" (i) : "rm" (n));
 391         return i;
 392
 393 #elif defined(_MSC_VER) && _MSC_VER >= 1400
 394         // MSVC isn't supported by tuklib, but since this code exists,
 395         // it doesn't hurt to have it here anyway.
 396         uint32_t i;
 397         _BitScanReverse((DWORD *)&i, n);
 398         return i;
 399
 400 #else
 401         uint32_t i = 31;
 402
 403         if ((n & UINT32_C(0xFFFF0000)) == 0) {
 404                 n <<= 16;
 405                 i = 15;
 406         }
 407
 408         if ((n & UINT32_C(0xFF000000)) == 0) {
 409                 n <<= 8;
 410                 i -= 8;
 411         }
 412
 413         if ((n & UINT32_C(0xF0000000)) == 0) {
 414                 n <<= 4;
 415                 i -= 4;
 416         }
 417
 418         if ((n & UINT32_C(0xC0000000)) == 0) {
 419                 n <<= 2;
 420                 i -= 2;
 421         }
 422
 423         if ((n & UINT32_C(0x80000000)) == 0)
 424                 --i;
 425
 426         return i;
 427 #endif
 428 }
 429
 430
 431 static inline uint32_t
 432 clz32(uint32_t n)
 433 {
 434 #if defined(__INTEL_COMPILER)
 435         return _bit_scan_reverse(n) ^ 31U;
 436
 437 #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX
 438         return __builtin_clz(n);
 439
 440 #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
 441         uint32_t i;
 442         __asm__("bsrl %1, %0\n\t"
 443                 "xorl $31, %0"
 444                 : "=r" (i) : "rm" (n));
 445         return i;
 446
 447 #elif defined(_MSC_VER) && _MSC_VER >= 1400
 448         uint32_t i;
 449         _BitScanReverse((DWORD *)&i, n);
 450         return i ^ 31U;
 451
 452 #else
 453         uint32_t i = 0;
 454
 455         if ((n & UINT32_C(0xFFFF0000)) == 0) {
 456                 n <<= 16;
 457                 i = 16;
 458         }
 459
 460         if ((n & UINT32_C(0xFF000000)) == 0) {
 461                 n <<= 8;
 462                 i += 8;
 463         }
 464
 465         if ((n & UINT32_C(0xF0000000)) == 0) {
 466                 n <<= 4;
 467                 i += 4;
 468         }
 469
 470         if ((n & UINT32_C(0xC0000000)) == 0) {
 471                 n <<= 2;
 472                 i += 2;
 473         }
 474
 475         if ((n & UINT32_C(0x80000000)) == 0)
 476                 ++i;
 477
 478         return i;
 479 #endif
 480 }
 481
 482
 483 static inline uint32_t
 484 ctz32(uint32_t n)
 485 {
 486 #if defined(__INTEL_COMPILER)
 487         return _bit_scan_forward(n);
 488
 489 #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX >= UINT32_MAX
 490         return __builtin_ctz(n);
 491
 492 #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
 493         uint32_t i;
 494         __asm__("bsfl %1, %0" : "=r" (i) : "rm" (n));
 495         return i;
 496
 497 #elif defined(_MSC_VER) && _MSC_VER >= 1400
 498         uint32_t i;
 499         _BitScanForward((DWORD *)&i, n);
 500         return i;
 501
 502 #else
 503         uint32_t i = 0;
 504
 505         if ((n & UINT32_C(0x0000FFFF)) == 0) {
 506                 n >>= 16;
 507                 i = 16;
 508         }
 509
 510         if ((n & UINT32_C(0x000000FF)) == 0) {
 511                 n >>= 8;
 512                 i += 8;
 513         }
 514
 515         if ((n & UINT32_C(0x0000000F)) == 0) {
 516                 n >>= 4;
 517                 i += 4;
 518         }
 519
 520         if ((n & UINT32_C(0x00000003)) == 0) {
 521                 n >>= 2;
 522                 i += 2;
 523         }
 524
 525         if ((n & UINT32_C(0x00000001)) == 0)
 526                 ++i;
 527
 528         return i;
 529 #endif
 530 }
 531
 532 #define bsf32 ctz32
 533
 534 #endif