[AMDGPU][True16][CodeGen] true16 codegen pattern for v_med3_u/i16 (#121850)
[llvm-project.git] / compiler-rt / lib / builtins / popcountdi2.c
blob20dd0b0239efc874b001ae9ecd40886c132bcd71
1 //===-- popcountdi2.c - Implement __popcountdi2 ---------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements __popcountdi2 for the compiler_rt library.
11 //===----------------------------------------------------------------------===//
13 #include "int_lib.h"
15 // Returns: count of 1 bits
17 COMPILER_RT_ABI int __popcountdi2(di_int a) {
18 du_int x2 = (du_int)a;
19 x2 = x2 - ((x2 >> 1) & 0x5555555555555555uLL);
20 // Every 2 bits holds the sum of every pair of bits (32)
21 x2 = ((x2 >> 2) & 0x3333333333333333uLL) + (x2 & 0x3333333333333333uLL);
22 // Every 4 bits holds the sum of every 4-set of bits (3 significant bits) (16)
23 x2 = (x2 + (x2 >> 4)) & 0x0F0F0F0F0F0F0F0FuLL;
24 // Every 8 bits holds the sum of every 8-set of bits (4 significant bits) (8)
25 su_int x = (su_int)(x2 + (x2 >> 32));
26 // The lower 32 bits hold four 16 bit sums (5 significant bits).
27 // Upper 32 bits are garbage
28 x = x + (x >> 16);
29 // The lower 16 bits hold two 32 bit sums (6 significant bits).
30 // Upper 16 bits are garbage
31 return (x + (x >> 8)) & 0x0000007F; // (7 significant bits)