drm/panthor: Don't add write fences to the shared BOs
[drm/drm-misc.git] / arch / x86 / lib / hweight.S
blob774bdf3e6f0a9d633c60943e7b8f0ed11e9ee537
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #include <linux/export.h>
3 #include <linux/linkage.h>
5 #include <asm/asm.h>
7 /*
8  * unsigned int __sw_hweight32(unsigned int w)
9  * %rdi: w
10  */
11 SYM_FUNC_START(__sw_hweight32)
13 #ifdef CONFIG_X86_64
14         movl %edi, %eax                         # w
15 #endif
16         __ASM_SIZE(push,) %__ASM_REG(dx)
17         movl %eax, %edx                         # w -> t
18         shrl %edx                               # t >>= 1
19         andl $0x55555555, %edx                  # t &= 0x55555555
20         subl %edx, %eax                         # w -= t
22         movl %eax, %edx                         # w -> t
23         shrl $2, %eax                           # w_tmp >>= 2
24         andl $0x33333333, %edx                  # t     &= 0x33333333
25         andl $0x33333333, %eax                  # w_tmp &= 0x33333333
26         addl %edx, %eax                         # w = w_tmp + t
28         movl %eax, %edx                         # w -> t
29         shrl $4, %edx                           # t >>= 4
30         addl %edx, %eax                         # w_tmp += t
31         andl  $0x0f0f0f0f, %eax                 # w_tmp &= 0x0f0f0f0f
32         imull $0x01010101, %eax, %eax           # w_tmp *= 0x01010101
33         shrl $24, %eax                          # w = w_tmp >> 24
34         __ASM_SIZE(pop,) %__ASM_REG(dx)
35         RET
36 SYM_FUNC_END(__sw_hweight32)
37 EXPORT_SYMBOL(__sw_hweight32)
40  * No 32-bit variant, because it's implemented as an inline wrapper
41  * on top of __arch_hweight32():
42  */
43 #ifdef CONFIG_X86_64
44 SYM_FUNC_START(__sw_hweight64)
45         pushq   %rdi
46         pushq   %rdx
48         movq    %rdi, %rdx                      # w -> t
49         movabsq $0x5555555555555555, %rax
50         shrq    %rdx                            # t >>= 1
51         andq    %rdx, %rax                      # t &= 0x5555555555555555
52         movabsq $0x3333333333333333, %rdx
53         subq    %rax, %rdi                      # w -= t
55         movq    %rdi, %rax                      # w -> t
56         shrq    $2, %rdi                        # w_tmp >>= 2
57         andq    %rdx, %rax                      # t     &= 0x3333333333333333
58         andq    %rdi, %rdx                      # w_tmp &= 0x3333333333333333
59         addq    %rdx, %rax                      # w = w_tmp + t
61         movq    %rax, %rdx                      # w -> t
62         shrq    $4, %rdx                        # t >>= 4
63         addq    %rdx, %rax                      # w_tmp += t
64         movabsq $0x0f0f0f0f0f0f0f0f, %rdx
65         andq    %rdx, %rax                      # w_tmp &= 0x0f0f0f0f0f0f0f0f
66         movabsq $0x0101010101010101, %rdx
67         imulq   %rdx, %rax                      # w_tmp *= 0x0101010101010101
68         shrq    $56, %rax                       # w = w_tmp >> 56
70         popq    %rdx
71         popq    %rdi
72         RET
73 SYM_FUNC_END(__sw_hweight64)
74 EXPORT_SYMBOL(__sw_hweight64)
75 #endif