1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
3 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
5 ; Loosely based on test/CodeGen/{X86,AArch64}/extract-lowbits.ll,
6 ; but with all 64-bit tests, and tests with loads dropped.
9 ; a) x & (1 << nbits) - 1
10 ; b) x & ~(-1 << nbits)
11 ; c) x & (-1 >> (32 - y))
12 ; d) x << (32 - y) >> (32 - y)
15 ; ---------------------------------------------------------------------------- ;
17 ; ---------------------------------------------------------------------------- ;
19 define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind {
20 ; GCN-LABEL: bzhi32_a0:
22 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23 ; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1
24 ; GCN-NEXT: s_setpc_b64 s[30:31]
25 %onebit = shl i32 1, %numlowbits
26 %mask = add nsw i32 %onebit, -1
27 %masked = and i32 %mask, %val
31 define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
32 ; GCN-LABEL: bzhi32_a1_indexzext:
34 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35 ; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1
36 ; GCN-NEXT: s_setpc_b64 s[30:31]
37 %conv = zext i8 %numlowbits to i32
38 %onebit = shl i32 1, %conv
39 %mask = add nsw i32 %onebit, -1
40 %masked = and i32 %mask, %val
44 define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind {
45 ; GCN-LABEL: bzhi32_a4_commutative:
47 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48 ; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1
49 ; GCN-NEXT: s_setpc_b64 s[30:31]
50 %onebit = shl i32 1, %numlowbits
51 %mask = add nsw i32 %onebit, -1
52 %masked = and i32 %val, %mask ; swapped order
56 ; ---------------------------------------------------------------------------- ;
58 ; ---------------------------------------------------------------------------- ;
60 define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind {
61 ; GCN-LABEL: bzhi32_b0:
63 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
64 ; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1
65 ; GCN-NEXT: s_setpc_b64 s[30:31]
66 %notmask = shl i32 -1, %numlowbits
67 %mask = xor i32 %notmask, -1
68 %masked = and i32 %mask, %val
72 define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
73 ; GCN-LABEL: bzhi32_b1_indexzext:
75 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
76 ; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1
77 ; GCN-NEXT: s_setpc_b64 s[30:31]
78 %conv = zext i8 %numlowbits to i32
79 %notmask = shl i32 -1, %conv
80 %mask = xor i32 %notmask, -1
81 %masked = and i32 %mask, %val
85 define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind {
86 ; GCN-LABEL: bzhi32_b4_commutative:
88 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
89 ; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1
90 ; GCN-NEXT: s_setpc_b64 s[30:31]
91 %notmask = shl i32 -1, %numlowbits
92 %mask = xor i32 %notmask, -1
93 %masked = and i32 %val, %mask ; swapped order
97 ; ---------------------------------------------------------------------------- ;
99 ; ---------------------------------------------------------------------------- ;
101 define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind {
102 ; GCN-LABEL: bzhi32_c0:
104 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
105 ; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1
106 ; GCN-NEXT: s_setpc_b64 s[30:31]
107 %numhighbits = sub i32 32, %numlowbits
108 %mask = lshr i32 -1, %numhighbits
109 %masked = and i32 %mask, %val
113 define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
114 ; SI-LABEL: bzhi32_c1_indexzext:
116 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
117 ; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1
118 ; SI-NEXT: v_and_b32_e32 v1, 0xff, v1
119 ; SI-NEXT: v_lshr_b32_e32 v1, -1, v1
120 ; SI-NEXT: v_and_b32_e32 v0, v1, v0
121 ; SI-NEXT: s_setpc_b64 s[30:31]
123 ; VI-LABEL: bzhi32_c1_indexzext:
125 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
126 ; VI-NEXT: v_sub_u16_e32 v1, 32, v1
127 ; VI-NEXT: v_mov_b32_e32 v2, -1
128 ; VI-NEXT: v_lshrrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
129 ; VI-NEXT: v_and_b32_e32 v0, v1, v0
130 ; VI-NEXT: s_setpc_b64 s[30:31]
131 %numhighbits = sub i8 32, %numlowbits
132 %sh_prom = zext i8 %numhighbits to i32
133 %mask = lshr i32 -1, %sh_prom
134 %masked = and i32 %mask, %val
138 define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind {
139 ; GCN-LABEL: bzhi32_c4_commutative:
141 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
142 ; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1
143 ; GCN-NEXT: s_setpc_b64 s[30:31]
144 %numhighbits = sub i32 32, %numlowbits
145 %mask = lshr i32 -1, %numhighbits
146 %masked = and i32 %val, %mask ; swapped order
150 ; ---------------------------------------------------------------------------- ;
152 ; ---------------------------------------------------------------------------- ;
154 define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind {
155 ; GCN-LABEL: bzhi32_d0:
157 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
158 ; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1
159 ; GCN-NEXT: s_setpc_b64 s[30:31]
160 %numhighbits = sub i32 32, %numlowbits
161 %highbitscleared = shl i32 %val, %numhighbits
162 %masked = lshr i32 %highbitscleared, %numhighbits
166 define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind {
167 ; SI-LABEL: bzhi32_d1_indexzext:
169 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
170 ; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1
171 ; SI-NEXT: v_and_b32_e32 v1, 0xff, v1
172 ; SI-NEXT: v_lshlrev_b32_e32 v0, v1, v0
173 ; SI-NEXT: v_lshrrev_b32_e32 v0, v1, v0
174 ; SI-NEXT: s_setpc_b64 s[30:31]
176 ; VI-LABEL: bzhi32_d1_indexzext:
178 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
179 ; VI-NEXT: v_sub_u16_e32 v1, 32, v1
180 ; VI-NEXT: v_and_b32_e32 v1, 0xff, v1
181 ; VI-NEXT: v_lshlrev_b32_e32 v0, v1, v0
182 ; VI-NEXT: v_lshrrev_b32_e32 v0, v1, v0
183 ; VI-NEXT: s_setpc_b64 s[30:31]
184 %numhighbits = sub i8 32, %numlowbits
185 %sh_prom = zext i8 %numhighbits to i32
186 %highbitscleared = shl i32 %val, %sh_prom
187 %masked = lshr i32 %highbitscleared, %sh_prom