1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-NOBMI2,X86-BASELINE
3 ; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov,+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-NOBMI2,X86-BMI1
4 ; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov,+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-NOBMI2,X86-BMI1
5 ; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov,+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI2
6 ; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov,+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI2
7 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-NOBMI2
8 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-NOBMI2
9 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-NOBMI2
10 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI2
11 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI2
16 ; are equivalent, but we prefer the second variant if we have BMI2.
18 ; We do not test the variant where y = (32 - z), because that is BMI2's BZHI.
20 ; ---------------------------------------------------------------------------- ;
22 ; ---------------------------------------------------------------------------- ;
24 define i8 @clear_highbits8_c0(i8 %val, i8 %numhighbits) nounwind {
25 ; X86-LABEL: clear_highbits8_c0:
27 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
28 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
29 ; X86-NEXT: shlb %cl, %al
30 ; X86-NEXT: shrb %cl, %al
33 ; X64-LABEL: clear_highbits8_c0:
35 ; X64-NEXT: movl %esi, %ecx
36 ; X64-NEXT: movl %edi, %eax
37 ; X64-NEXT: shlb %cl, %al
38 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
39 ; X64-NEXT: shrb %cl, %al
40 ; X64-NEXT: # kill: def $al killed $al killed $eax
42 %mask = lshr i8 -1, %numhighbits
43 %masked = and i8 %mask, %val
47 define i8 @clear_highbits8_c2_load(ptr %w, i8 %numhighbits) nounwind {
48 ; X86-LABEL: clear_highbits8_c2_load:
50 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
51 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
52 ; X86-NEXT: movzbl (%eax), %eax
53 ; X86-NEXT: shlb %cl, %al
54 ; X86-NEXT: shrb %cl, %al
57 ; X64-LABEL: clear_highbits8_c2_load:
59 ; X64-NEXT: movl %esi, %ecx
60 ; X64-NEXT: movzbl (%rdi), %eax
61 ; X64-NEXT: shlb %cl, %al
62 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
63 ; X64-NEXT: shrb %cl, %al
65 %val = load i8, ptr %w
66 %mask = lshr i8 -1, %numhighbits
67 %masked = and i8 %mask, %val
71 define i8 @clear_highbits8_c4_commutative(i8 %val, i8 %numhighbits) nounwind {
72 ; X86-LABEL: clear_highbits8_c4_commutative:
74 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
75 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
76 ; X86-NEXT: shlb %cl, %al
77 ; X86-NEXT: shrb %cl, %al
80 ; X64-LABEL: clear_highbits8_c4_commutative:
82 ; X64-NEXT: movl %esi, %ecx
83 ; X64-NEXT: movl %edi, %eax
84 ; X64-NEXT: shlb %cl, %al
85 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
86 ; X64-NEXT: shrb %cl, %al
87 ; X64-NEXT: # kill: def $al killed $al killed $eax
89 %mask = lshr i8 -1, %numhighbits
90 %masked = and i8 %val, %mask ; swapped order
94 ; ---------------------------------------------------------------------------- ;
96 ; ---------------------------------------------------------------------------- ;
98 define i16 @clear_highbits16_c0(i16 %val, i16 %numhighbits) nounwind {
99 ; X86-NOBMI2-LABEL: clear_highbits16_c0:
100 ; X86-NOBMI2: # %bb.0:
101 ; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
102 ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
103 ; X86-NOBMI2-NEXT: shll %cl, %eax
104 ; X86-NOBMI2-NEXT: movzwl %ax, %eax
105 ; X86-NOBMI2-NEXT: shrl %cl, %eax
106 ; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax
107 ; X86-NOBMI2-NEXT: retl
109 ; X86-BMI2-LABEL: clear_highbits16_c0:
111 ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
112 ; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %ecx
113 ; X86-BMI2-NEXT: movzwl %cx, %ecx
114 ; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax
115 ; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax
116 ; X86-BMI2-NEXT: retl
118 ; X64-NOBMI2-LABEL: clear_highbits16_c0:
119 ; X64-NOBMI2: # %bb.0:
120 ; X64-NOBMI2-NEXT: movl %esi, %ecx
121 ; X64-NOBMI2-NEXT: shll %cl, %edi
122 ; X64-NOBMI2-NEXT: movzwl %di, %eax
123 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
124 ; X64-NOBMI2-NEXT: shrl %cl, %eax
125 ; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax
126 ; X64-NOBMI2-NEXT: retq
128 ; X64-BMI2-LABEL: clear_highbits16_c0:
130 ; X64-BMI2-NEXT: shlxl %esi, %edi, %eax
131 ; X64-BMI2-NEXT: movzwl %ax, %eax
132 ; X64-BMI2-NEXT: shrxl %esi, %eax, %eax
133 ; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax
134 ; X64-BMI2-NEXT: retq
135 %mask = lshr i16 -1, %numhighbits
136 %masked = and i16 %mask, %val
140 define i16 @clear_highbits16_c1_indexzext(i16 %val, i8 %numhighbits) nounwind {
141 ; X86-NOBMI2-LABEL: clear_highbits16_c1_indexzext:
142 ; X86-NOBMI2: # %bb.0:
143 ; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
144 ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
145 ; X86-NOBMI2-NEXT: shll %cl, %eax
146 ; X86-NOBMI2-NEXT: movzwl %ax, %eax
147 ; X86-NOBMI2-NEXT: shrl %cl, %eax
148 ; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax
149 ; X86-NOBMI2-NEXT: retl
151 ; X86-BMI2-LABEL: clear_highbits16_c1_indexzext:
153 ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
154 ; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %ecx
155 ; X86-BMI2-NEXT: movzwl %cx, %ecx
156 ; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax
157 ; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax
158 ; X86-BMI2-NEXT: retl
160 ; X64-NOBMI2-LABEL: clear_highbits16_c1_indexzext:
161 ; X64-NOBMI2: # %bb.0:
162 ; X64-NOBMI2-NEXT: movl %esi, %ecx
163 ; X64-NOBMI2-NEXT: shll %cl, %edi
164 ; X64-NOBMI2-NEXT: movzwl %di, %eax
165 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
166 ; X64-NOBMI2-NEXT: shrl %cl, %eax
167 ; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax
168 ; X64-NOBMI2-NEXT: retq
170 ; X64-BMI2-LABEL: clear_highbits16_c1_indexzext:
172 ; X64-BMI2-NEXT: shlxl %esi, %edi, %eax
173 ; X64-BMI2-NEXT: movzwl %ax, %eax
174 ; X64-BMI2-NEXT: shrxl %esi, %eax, %eax
175 ; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax
176 ; X64-BMI2-NEXT: retq
177 %sh_prom = zext i8 %numhighbits to i16
178 %mask = lshr i16 -1, %sh_prom
179 %masked = and i16 %mask, %val
183 define i16 @clear_highbits16_c2_load(ptr %w, i16 %numhighbits) nounwind {
184 ; X86-NOBMI2-LABEL: clear_highbits16_c2_load:
185 ; X86-NOBMI2: # %bb.0:
186 ; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
187 ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
188 ; X86-NOBMI2-NEXT: movzwl (%eax), %eax
189 ; X86-NOBMI2-NEXT: shll %cl, %eax
190 ; X86-NOBMI2-NEXT: movzwl %ax, %eax
191 ; X86-NOBMI2-NEXT: shrl %cl, %eax
192 ; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax
193 ; X86-NOBMI2-NEXT: retl
195 ; X86-BMI2-LABEL: clear_highbits16_c2_load:
197 ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
198 ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
199 ; X86-BMI2-NEXT: movzwl (%ecx), %ecx
200 ; X86-BMI2-NEXT: shlxl %eax, %ecx, %ecx
201 ; X86-BMI2-NEXT: movzwl %cx, %ecx
202 ; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax
203 ; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax
204 ; X86-BMI2-NEXT: retl
206 ; X64-NOBMI2-LABEL: clear_highbits16_c2_load:
207 ; X64-NOBMI2: # %bb.0:
208 ; X64-NOBMI2-NEXT: movl %esi, %ecx
209 ; X64-NOBMI2-NEXT: movzwl (%rdi), %eax
210 ; X64-NOBMI2-NEXT: shll %cl, %eax
211 ; X64-NOBMI2-NEXT: movzwl %ax, %eax
212 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
213 ; X64-NOBMI2-NEXT: shrl %cl, %eax
214 ; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax
215 ; X64-NOBMI2-NEXT: retq
217 ; X64-BMI2-LABEL: clear_highbits16_c2_load:
219 ; X64-BMI2-NEXT: movzwl (%rdi), %eax
220 ; X64-BMI2-NEXT: shlxl %esi, %eax, %eax
221 ; X64-BMI2-NEXT: movzwl %ax, %eax
222 ; X64-BMI2-NEXT: shrxl %esi, %eax, %eax
223 ; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax
224 ; X64-BMI2-NEXT: retq
225 %val = load i16, ptr %w
226 %mask = lshr i16 -1, %numhighbits
227 %masked = and i16 %mask, %val
231 define i16 @clear_highbits16_c3_load_indexzext(ptr %w, i8 %numhighbits) nounwind {
232 ; X86-NOBMI2-LABEL: clear_highbits16_c3_load_indexzext:
233 ; X86-NOBMI2: # %bb.0:
234 ; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
235 ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
236 ; X86-NOBMI2-NEXT: movzwl (%eax), %eax
237 ; X86-NOBMI2-NEXT: shll %cl, %eax
238 ; X86-NOBMI2-NEXT: movzwl %ax, %eax
239 ; X86-NOBMI2-NEXT: shrl %cl, %eax
240 ; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax
241 ; X86-NOBMI2-NEXT: retl
243 ; X86-BMI2-LABEL: clear_highbits16_c3_load_indexzext:
245 ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
246 ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
247 ; X86-BMI2-NEXT: movzwl (%ecx), %ecx
248 ; X86-BMI2-NEXT: shlxl %eax, %ecx, %ecx
249 ; X86-BMI2-NEXT: movzwl %cx, %ecx
250 ; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax
251 ; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax
252 ; X86-BMI2-NEXT: retl
254 ; X64-NOBMI2-LABEL: clear_highbits16_c3_load_indexzext:
255 ; X64-NOBMI2: # %bb.0:
256 ; X64-NOBMI2-NEXT: movl %esi, %ecx
257 ; X64-NOBMI2-NEXT: movzwl (%rdi), %eax
258 ; X64-NOBMI2-NEXT: shll %cl, %eax
259 ; X64-NOBMI2-NEXT: movzwl %ax, %eax
260 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
261 ; X64-NOBMI2-NEXT: shrl %cl, %eax
262 ; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax
263 ; X64-NOBMI2-NEXT: retq
265 ; X64-BMI2-LABEL: clear_highbits16_c3_load_indexzext:
267 ; X64-BMI2-NEXT: movzwl (%rdi), %eax
268 ; X64-BMI2-NEXT: shlxl %esi, %eax, %eax
269 ; X64-BMI2-NEXT: movzwl %ax, %eax
270 ; X64-BMI2-NEXT: shrxl %esi, %eax, %eax
271 ; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax
272 ; X64-BMI2-NEXT: retq
273 %val = load i16, ptr %w
274 %sh_prom = zext i8 %numhighbits to i16
275 %mask = lshr i16 -1, %sh_prom
276 %masked = and i16 %mask, %val
280 define i16 @clear_highbits16_c4_commutative(i16 %val, i16 %numhighbits) nounwind {
281 ; X86-NOBMI2-LABEL: clear_highbits16_c4_commutative:
282 ; X86-NOBMI2: # %bb.0:
283 ; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
284 ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
285 ; X86-NOBMI2-NEXT: shll %cl, %eax
286 ; X86-NOBMI2-NEXT: movzwl %ax, %eax
287 ; X86-NOBMI2-NEXT: shrl %cl, %eax
288 ; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax
289 ; X86-NOBMI2-NEXT: retl
291 ; X86-BMI2-LABEL: clear_highbits16_c4_commutative:
293 ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
294 ; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %ecx
295 ; X86-BMI2-NEXT: movzwl %cx, %ecx
296 ; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax
297 ; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax
298 ; X86-BMI2-NEXT: retl
300 ; X64-NOBMI2-LABEL: clear_highbits16_c4_commutative:
301 ; X64-NOBMI2: # %bb.0:
302 ; X64-NOBMI2-NEXT: movl %esi, %ecx
303 ; X64-NOBMI2-NEXT: shll %cl, %edi
304 ; X64-NOBMI2-NEXT: movzwl %di, %eax
305 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
306 ; X64-NOBMI2-NEXT: shrl %cl, %eax
307 ; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax
308 ; X64-NOBMI2-NEXT: retq
310 ; X64-BMI2-LABEL: clear_highbits16_c4_commutative:
312 ; X64-BMI2-NEXT: shlxl %esi, %edi, %eax
313 ; X64-BMI2-NEXT: movzwl %ax, %eax
314 ; X64-BMI2-NEXT: shrxl %esi, %eax, %eax
315 ; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax
316 ; X64-BMI2-NEXT: retq
317 %mask = lshr i16 -1, %numhighbits
318 %masked = and i16 %val, %mask ; swapped order
322 ; ---------------------------------------------------------------------------- ;
324 ; ---------------------------------------------------------------------------- ;
326 define i32 @clear_highbits32_c0(i32 %val, i32 %numhighbits) nounwind {
327 ; X86-NOBMI2-LABEL: clear_highbits32_c0:
328 ; X86-NOBMI2: # %bb.0:
329 ; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
330 ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
331 ; X86-NOBMI2-NEXT: shll %cl, %eax
332 ; X86-NOBMI2-NEXT: shrl %cl, %eax
333 ; X86-NOBMI2-NEXT: retl
335 ; X86-BMI2-LABEL: clear_highbits32_c0:
337 ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
338 ; X86-BMI2-NEXT: movl $32, %ecx
339 ; X86-BMI2-NEXT: subl %eax, %ecx
340 ; X86-BMI2-NEXT: bzhil %ecx, {{[0-9]+}}(%esp), %eax
341 ; X86-BMI2-NEXT: retl
343 ; X64-NOBMI2-LABEL: clear_highbits32_c0:
344 ; X64-NOBMI2: # %bb.0:
345 ; X64-NOBMI2-NEXT: movl %esi, %ecx
346 ; X64-NOBMI2-NEXT: movl %edi, %eax
347 ; X64-NOBMI2-NEXT: shll %cl, %eax
348 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
349 ; X64-NOBMI2-NEXT: shrl %cl, %eax
350 ; X64-NOBMI2-NEXT: retq
352 ; X64-BMI2-LABEL: clear_highbits32_c0:
354 ; X64-BMI2-NEXT: movl $32, %eax
355 ; X64-BMI2-NEXT: subl %esi, %eax
356 ; X64-BMI2-NEXT: bzhil %eax, %edi, %eax
357 ; X64-BMI2-NEXT: retq
358 %mask = lshr i32 -1, %numhighbits
359 %masked = and i32 %mask, %val
363 define i32 @clear_highbits32_c1_indexzext(i32 %val, i8 %numhighbits) nounwind {
364 ; X86-NOBMI2-LABEL: clear_highbits32_c1_indexzext:
365 ; X86-NOBMI2: # %bb.0:
366 ; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
367 ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
368 ; X86-NOBMI2-NEXT: shll %cl, %eax
369 ; X86-NOBMI2-NEXT: shrl %cl, %eax
370 ; X86-NOBMI2-NEXT: retl
372 ; X86-BMI2-LABEL: clear_highbits32_c1_indexzext:
374 ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
375 ; X86-BMI2-NEXT: movl $32, %ecx
376 ; X86-BMI2-NEXT: subl %eax, %ecx
377 ; X86-BMI2-NEXT: bzhil %ecx, {{[0-9]+}}(%esp), %eax
378 ; X86-BMI2-NEXT: retl
380 ; X64-NOBMI2-LABEL: clear_highbits32_c1_indexzext:
381 ; X64-NOBMI2: # %bb.0:
382 ; X64-NOBMI2-NEXT: movl %esi, %ecx
383 ; X64-NOBMI2-NEXT: movl %edi, %eax
384 ; X64-NOBMI2-NEXT: shll %cl, %eax
385 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
386 ; X64-NOBMI2-NEXT: shrl %cl, %eax
387 ; X64-NOBMI2-NEXT: retq
389 ; X64-BMI2-LABEL: clear_highbits32_c1_indexzext:
391 ; X64-BMI2-NEXT: movl $32, %eax
392 ; X64-BMI2-NEXT: subl %esi, %eax
393 ; X64-BMI2-NEXT: bzhil %eax, %edi, %eax
394 ; X64-BMI2-NEXT: retq
395 %sh_prom = zext i8 %numhighbits to i32
396 %mask = lshr i32 -1, %sh_prom
397 %masked = and i32 %mask, %val
401 define i32 @clear_highbits32_c2_load(ptr %w, i32 %numhighbits) nounwind {
402 ; X86-NOBMI2-LABEL: clear_highbits32_c2_load:
403 ; X86-NOBMI2: # %bb.0:
404 ; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
405 ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
406 ; X86-NOBMI2-NEXT: movl (%eax), %eax
407 ; X86-NOBMI2-NEXT: shll %cl, %eax
408 ; X86-NOBMI2-NEXT: shrl %cl, %eax
409 ; X86-NOBMI2-NEXT: retl
411 ; X86-BMI2-LABEL: clear_highbits32_c2_load:
413 ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
414 ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
415 ; X86-BMI2-NEXT: movl $32, %edx
416 ; X86-BMI2-NEXT: subl %ecx, %edx
417 ; X86-BMI2-NEXT: bzhil %edx, (%eax), %eax
418 ; X86-BMI2-NEXT: retl
420 ; X64-NOBMI2-LABEL: clear_highbits32_c2_load:
421 ; X64-NOBMI2: # %bb.0:
422 ; X64-NOBMI2-NEXT: movl %esi, %ecx
423 ; X64-NOBMI2-NEXT: movl (%rdi), %eax
424 ; X64-NOBMI2-NEXT: shll %cl, %eax
425 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
426 ; X64-NOBMI2-NEXT: shrl %cl, %eax
427 ; X64-NOBMI2-NEXT: retq
429 ; X64-BMI2-LABEL: clear_highbits32_c2_load:
431 ; X64-BMI2-NEXT: movl $32, %eax
432 ; X64-BMI2-NEXT: subl %esi, %eax
433 ; X64-BMI2-NEXT: bzhil %eax, (%rdi), %eax
434 ; X64-BMI2-NEXT: retq
435 %val = load i32, ptr %w
436 %mask = lshr i32 -1, %numhighbits
437 %masked = and i32 %mask, %val
441 define i32 @clear_highbits32_c3_load_indexzext(ptr %w, i8 %numhighbits) nounwind {
442 ; X86-NOBMI2-LABEL: clear_highbits32_c3_load_indexzext:
443 ; X86-NOBMI2: # %bb.0:
444 ; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
445 ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
446 ; X86-NOBMI2-NEXT: movl (%eax), %eax
447 ; X86-NOBMI2-NEXT: shll %cl, %eax
448 ; X86-NOBMI2-NEXT: shrl %cl, %eax
449 ; X86-NOBMI2-NEXT: retl
451 ; X86-BMI2-LABEL: clear_highbits32_c3_load_indexzext:
453 ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
454 ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
455 ; X86-BMI2-NEXT: movl $32, %edx
456 ; X86-BMI2-NEXT: subl %ecx, %edx
457 ; X86-BMI2-NEXT: bzhil %edx, (%eax), %eax
458 ; X86-BMI2-NEXT: retl
460 ; X64-NOBMI2-LABEL: clear_highbits32_c3_load_indexzext:
461 ; X64-NOBMI2: # %bb.0:
462 ; X64-NOBMI2-NEXT: movl %esi, %ecx
463 ; X64-NOBMI2-NEXT: movl (%rdi), %eax
464 ; X64-NOBMI2-NEXT: shll %cl, %eax
465 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
466 ; X64-NOBMI2-NEXT: shrl %cl, %eax
467 ; X64-NOBMI2-NEXT: retq
469 ; X64-BMI2-LABEL: clear_highbits32_c3_load_indexzext:
471 ; X64-BMI2-NEXT: movl $32, %eax
472 ; X64-BMI2-NEXT: subl %esi, %eax
473 ; X64-BMI2-NEXT: bzhil %eax, (%rdi), %eax
474 ; X64-BMI2-NEXT: retq
475 %val = load i32, ptr %w
476 %sh_prom = zext i8 %numhighbits to i32
477 %mask = lshr i32 -1, %sh_prom
478 %masked = and i32 %mask, %val
482 define i32 @clear_highbits32_c4_commutative(i32 %val, i32 %numhighbits) nounwind {
483 ; X86-NOBMI2-LABEL: clear_highbits32_c4_commutative:
484 ; X86-NOBMI2: # %bb.0:
485 ; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
486 ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
487 ; X86-NOBMI2-NEXT: shll %cl, %eax
488 ; X86-NOBMI2-NEXT: shrl %cl, %eax
489 ; X86-NOBMI2-NEXT: retl
491 ; X86-BMI2-LABEL: clear_highbits32_c4_commutative:
493 ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
494 ; X86-BMI2-NEXT: movl $32, %ecx
495 ; X86-BMI2-NEXT: subl %eax, %ecx
496 ; X86-BMI2-NEXT: bzhil %ecx, {{[0-9]+}}(%esp), %eax
497 ; X86-BMI2-NEXT: retl
499 ; X64-NOBMI2-LABEL: clear_highbits32_c4_commutative:
500 ; X64-NOBMI2: # %bb.0:
501 ; X64-NOBMI2-NEXT: movl %esi, %ecx
502 ; X64-NOBMI2-NEXT: movl %edi, %eax
503 ; X64-NOBMI2-NEXT: shll %cl, %eax
504 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
505 ; X64-NOBMI2-NEXT: shrl %cl, %eax
506 ; X64-NOBMI2-NEXT: retq
508 ; X64-BMI2-LABEL: clear_highbits32_c4_commutative:
510 ; X64-BMI2-NEXT: movl $32, %eax
511 ; X64-BMI2-NEXT: subl %esi, %eax
512 ; X64-BMI2-NEXT: bzhil %eax, %edi, %eax
513 ; X64-BMI2-NEXT: retq
514 %mask = lshr i32 -1, %numhighbits
515 %masked = and i32 %val, %mask ; swapped order
519 ; ---------------------------------------------------------------------------- ;
521 ; ---------------------------------------------------------------------------- ;
523 define i64 @clear_highbits64_c0(i64 %val, i64 %numhighbits) nounwind {
524 ; X86-BASELINE-LABEL: clear_highbits64_c0:
525 ; X86-BASELINE: # %bb.0:
526 ; X86-BASELINE-NEXT: pushl %esi
527 ; X86-BASELINE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
528 ; X86-BASELINE-NEXT: movl $-1, %eax
529 ; X86-BASELINE-NEXT: movl $-1, %esi
530 ; X86-BASELINE-NEXT: shrl %cl, %esi
531 ; X86-BASELINE-NEXT: xorl %edx, %edx
532 ; X86-BASELINE-NEXT: testb $32, %cl
533 ; X86-BASELINE-NEXT: jne .LBB13_1
534 ; X86-BASELINE-NEXT: # %bb.2:
535 ; X86-BASELINE-NEXT: movl %esi, %edx
536 ; X86-BASELINE-NEXT: jmp .LBB13_3
537 ; X86-BASELINE-NEXT: .LBB13_1:
538 ; X86-BASELINE-NEXT: movl %esi, %eax
539 ; X86-BASELINE-NEXT: .LBB13_3:
540 ; X86-BASELINE-NEXT: andl {{[0-9]+}}(%esp), %eax
541 ; X86-BASELINE-NEXT: andl {{[0-9]+}}(%esp), %edx
542 ; X86-BASELINE-NEXT: popl %esi
543 ; X86-BASELINE-NEXT: retl
545 ; X86-BMI1-LABEL: clear_highbits64_c0:
547 ; X86-BMI1-NEXT: pushl %esi
548 ; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
549 ; X86-BMI1-NEXT: movl $-1, %esi
550 ; X86-BMI1-NEXT: movl $-1, %eax
551 ; X86-BMI1-NEXT: shrl %cl, %eax
552 ; X86-BMI1-NEXT: xorl %edx, %edx
553 ; X86-BMI1-NEXT: testb $32, %cl
554 ; X86-BMI1-NEXT: cmovel %eax, %edx
555 ; X86-BMI1-NEXT: cmovel %esi, %eax
556 ; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax
557 ; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx
558 ; X86-BMI1-NEXT: popl %esi
559 ; X86-BMI1-NEXT: retl
561 ; X86-BMI2-LABEL: clear_highbits64_c0:
563 ; X86-BMI2-NEXT: pushl %esi
564 ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
565 ; X86-BMI2-NEXT: movl $-1, %eax
566 ; X86-BMI2-NEXT: shrxl %ecx, %eax, %esi
567 ; X86-BMI2-NEXT: xorl %edx, %edx
568 ; X86-BMI2-NEXT: testb $32, %cl
569 ; X86-BMI2-NEXT: cmovel %esi, %edx
570 ; X86-BMI2-NEXT: cmovnel %esi, %eax
571 ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
572 ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
573 ; X86-BMI2-NEXT: popl %esi
574 ; X86-BMI2-NEXT: retl
576 ; X64-NOBMI2-LABEL: clear_highbits64_c0:
577 ; X64-NOBMI2: # %bb.0:
578 ; X64-NOBMI2-NEXT: movq %rsi, %rcx
579 ; X64-NOBMI2-NEXT: movq %rdi, %rax
580 ; X64-NOBMI2-NEXT: shlq %cl, %rax
581 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx
582 ; X64-NOBMI2-NEXT: shrq %cl, %rax
583 ; X64-NOBMI2-NEXT: retq
585 ; X64-BMI2-LABEL: clear_highbits64_c0:
587 ; X64-BMI2-NEXT: movl $64, %eax
588 ; X64-BMI2-NEXT: subl %esi, %eax
589 ; X64-BMI2-NEXT: bzhiq %rax, %rdi, %rax
590 ; X64-BMI2-NEXT: retq
591 %mask = lshr i64 -1, %numhighbits
592 %masked = and i64 %mask, %val
596 define i64 @clear_highbits64_c1_indexzext(i64 %val, i8 %numhighbits) nounwind {
597 ; X86-BASELINE-LABEL: clear_highbits64_c1_indexzext:
598 ; X86-BASELINE: # %bb.0:
599 ; X86-BASELINE-NEXT: pushl %esi
600 ; X86-BASELINE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
601 ; X86-BASELINE-NEXT: movl $-1, %eax
602 ; X86-BASELINE-NEXT: movl $-1, %esi
603 ; X86-BASELINE-NEXT: shrl %cl, %esi
604 ; X86-BASELINE-NEXT: xorl %edx, %edx
605 ; X86-BASELINE-NEXT: testb $32, %cl
606 ; X86-BASELINE-NEXT: jne .LBB14_1
607 ; X86-BASELINE-NEXT: # %bb.2:
608 ; X86-BASELINE-NEXT: movl %esi, %edx
609 ; X86-BASELINE-NEXT: jmp .LBB14_3
610 ; X86-BASELINE-NEXT: .LBB14_1:
611 ; X86-BASELINE-NEXT: movl %esi, %eax
612 ; X86-BASELINE-NEXT: .LBB14_3:
613 ; X86-BASELINE-NEXT: andl {{[0-9]+}}(%esp), %eax
614 ; X86-BASELINE-NEXT: andl {{[0-9]+}}(%esp), %edx
615 ; X86-BASELINE-NEXT: popl %esi
616 ; X86-BASELINE-NEXT: retl
618 ; X86-BMI1-LABEL: clear_highbits64_c1_indexzext:
620 ; X86-BMI1-NEXT: pushl %esi
621 ; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
622 ; X86-BMI1-NEXT: movl $-1, %esi
623 ; X86-BMI1-NEXT: movl $-1, %eax
624 ; X86-BMI1-NEXT: shrl %cl, %eax
625 ; X86-BMI1-NEXT: xorl %edx, %edx
626 ; X86-BMI1-NEXT: testb $32, %cl
627 ; X86-BMI1-NEXT: cmovel %eax, %edx
628 ; X86-BMI1-NEXT: cmovel %esi, %eax
629 ; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax
630 ; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx
631 ; X86-BMI1-NEXT: popl %esi
632 ; X86-BMI1-NEXT: retl
634 ; X86-BMI2-LABEL: clear_highbits64_c1_indexzext:
636 ; X86-BMI2-NEXT: pushl %esi
637 ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
638 ; X86-BMI2-NEXT: movl $-1, %eax
639 ; X86-BMI2-NEXT: shrxl %ecx, %eax, %esi
640 ; X86-BMI2-NEXT: xorl %edx, %edx
641 ; X86-BMI2-NEXT: testb $32, %cl
642 ; X86-BMI2-NEXT: cmovel %esi, %edx
643 ; X86-BMI2-NEXT: cmovnel %esi, %eax
644 ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
645 ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
646 ; X86-BMI2-NEXT: popl %esi
647 ; X86-BMI2-NEXT: retl
649 ; X64-NOBMI2-LABEL: clear_highbits64_c1_indexzext:
650 ; X64-NOBMI2: # %bb.0:
651 ; X64-NOBMI2-NEXT: movl %esi, %ecx
652 ; X64-NOBMI2-NEXT: movq %rdi, %rax
653 ; X64-NOBMI2-NEXT: shlq %cl, %rax
654 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
655 ; X64-NOBMI2-NEXT: shrq %cl, %rax
656 ; X64-NOBMI2-NEXT: retq
658 ; X64-BMI2-LABEL: clear_highbits64_c1_indexzext:
660 ; X64-BMI2-NEXT: movl $64, %eax
661 ; X64-BMI2-NEXT: subl %esi, %eax
662 ; X64-BMI2-NEXT: bzhiq %rax, %rdi, %rax
663 ; X64-BMI2-NEXT: retq
664 %sh_prom = zext i8 %numhighbits to i64
665 %mask = lshr i64 -1, %sh_prom
666 %masked = and i64 %mask, %val
670 define i64 @clear_highbits64_c2_load(ptr %w, i64 %numhighbits) nounwind {
671 ; X86-BASELINE-LABEL: clear_highbits64_c2_load:
672 ; X86-BASELINE: # %bb.0:
673 ; X86-BASELINE-NEXT: pushl %edi
674 ; X86-BASELINE-NEXT: pushl %esi
675 ; X86-BASELINE-NEXT: movl {{[0-9]+}}(%esp), %esi
676 ; X86-BASELINE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
677 ; X86-BASELINE-NEXT: movl $-1, %eax
678 ; X86-BASELINE-NEXT: movl $-1, %edi
679 ; X86-BASELINE-NEXT: shrl %cl, %edi
680 ; X86-BASELINE-NEXT: xorl %edx, %edx
681 ; X86-BASELINE-NEXT: testb $32, %cl
682 ; X86-BASELINE-NEXT: jne .LBB15_1
683 ; X86-BASELINE-NEXT: # %bb.2:
684 ; X86-BASELINE-NEXT: movl %edi, %edx
685 ; X86-BASELINE-NEXT: jmp .LBB15_3
686 ; X86-BASELINE-NEXT: .LBB15_1:
687 ; X86-BASELINE-NEXT: movl %edi, %eax
688 ; X86-BASELINE-NEXT: .LBB15_3:
689 ; X86-BASELINE-NEXT: andl (%esi), %eax
690 ; X86-BASELINE-NEXT: andl 4(%esi), %edx
691 ; X86-BASELINE-NEXT: popl %esi
692 ; X86-BASELINE-NEXT: popl %edi
693 ; X86-BASELINE-NEXT: retl
695 ; X86-BMI1-LABEL: clear_highbits64_c2_load:
697 ; X86-BMI1-NEXT: pushl %edi
698 ; X86-BMI1-NEXT: pushl %esi
699 ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
700 ; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
701 ; X86-BMI1-NEXT: movl $-1, %edi
702 ; X86-BMI1-NEXT: movl $-1, %eax
703 ; X86-BMI1-NEXT: shrl %cl, %eax
704 ; X86-BMI1-NEXT: xorl %edx, %edx
705 ; X86-BMI1-NEXT: testb $32, %cl
706 ; X86-BMI1-NEXT: cmovel %eax, %edx
707 ; X86-BMI1-NEXT: cmovel %edi, %eax
708 ; X86-BMI1-NEXT: andl (%esi), %eax
709 ; X86-BMI1-NEXT: andl 4(%esi), %edx
710 ; X86-BMI1-NEXT: popl %esi
711 ; X86-BMI1-NEXT: popl %edi
712 ; X86-BMI1-NEXT: retl
714 ; X86-BMI2-LABEL: clear_highbits64_c2_load:
716 ; X86-BMI2-NEXT: pushl %ebx
717 ; X86-BMI2-NEXT: pushl %esi
718 ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
719 ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
720 ; X86-BMI2-NEXT: movl $-1, %eax
721 ; X86-BMI2-NEXT: shrxl %ebx, %eax, %esi
722 ; X86-BMI2-NEXT: xorl %edx, %edx
723 ; X86-BMI2-NEXT: testb $32, %bl
724 ; X86-BMI2-NEXT: cmovel %esi, %edx
725 ; X86-BMI2-NEXT: cmovnel %esi, %eax
726 ; X86-BMI2-NEXT: andl (%ecx), %eax
727 ; X86-BMI2-NEXT: andl 4(%ecx), %edx
728 ; X86-BMI2-NEXT: popl %esi
729 ; X86-BMI2-NEXT: popl %ebx
730 ; X86-BMI2-NEXT: retl
732 ; X64-NOBMI2-LABEL: clear_highbits64_c2_load:
733 ; X64-NOBMI2: # %bb.0:
734 ; X64-NOBMI2-NEXT: movq %rsi, %rcx
735 ; X64-NOBMI2-NEXT: movq (%rdi), %rax
736 ; X64-NOBMI2-NEXT: shlq %cl, %rax
737 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx
738 ; X64-NOBMI2-NEXT: shrq %cl, %rax
739 ; X64-NOBMI2-NEXT: retq
741 ; X64-BMI2-LABEL: clear_highbits64_c2_load:
743 ; X64-BMI2-NEXT: movl $64, %eax
744 ; X64-BMI2-NEXT: subl %esi, %eax
745 ; X64-BMI2-NEXT: bzhiq %rax, (%rdi), %rax
746 ; X64-BMI2-NEXT: retq
747 %val = load i64, ptr %w
748 %mask = lshr i64 -1, %numhighbits
749 %masked = and i64 %mask, %val
753 define i64 @clear_highbits64_c3_load_indexzext(ptr %w, i8 %numhighbits) nounwind {
754 ; X86-BASELINE-LABEL: clear_highbits64_c3_load_indexzext:
755 ; X86-BASELINE: # %bb.0:
756 ; X86-BASELINE-NEXT: pushl %edi
757 ; X86-BASELINE-NEXT: pushl %esi
758 ; X86-BASELINE-NEXT: movl {{[0-9]+}}(%esp), %esi
759 ; X86-BASELINE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
760 ; X86-BASELINE-NEXT: movl $-1, %eax
761 ; X86-BASELINE-NEXT: movl $-1, %edi
762 ; X86-BASELINE-NEXT: shrl %cl, %edi
763 ; X86-BASELINE-NEXT: xorl %edx, %edx
764 ; X86-BASELINE-NEXT: testb $32, %cl
765 ; X86-BASELINE-NEXT: jne .LBB16_1
766 ; X86-BASELINE-NEXT: # %bb.2:
767 ; X86-BASELINE-NEXT: movl %edi, %edx
768 ; X86-BASELINE-NEXT: jmp .LBB16_3
769 ; X86-BASELINE-NEXT: .LBB16_1:
770 ; X86-BASELINE-NEXT: movl %edi, %eax
771 ; X86-BASELINE-NEXT: .LBB16_3:
772 ; X86-BASELINE-NEXT: andl (%esi), %eax
773 ; X86-BASELINE-NEXT: andl 4(%esi), %edx
774 ; X86-BASELINE-NEXT: popl %esi
775 ; X86-BASELINE-NEXT: popl %edi
776 ; X86-BASELINE-NEXT: retl
778 ; X86-BMI1-LABEL: clear_highbits64_c3_load_indexzext:
780 ; X86-BMI1-NEXT: pushl %edi
781 ; X86-BMI1-NEXT: pushl %esi
782 ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
783 ; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
784 ; X86-BMI1-NEXT: movl $-1, %edi
785 ; X86-BMI1-NEXT: movl $-1, %eax
786 ; X86-BMI1-NEXT: shrl %cl, %eax
787 ; X86-BMI1-NEXT: xorl %edx, %edx
788 ; X86-BMI1-NEXT: testb $32, %cl
789 ; X86-BMI1-NEXT: cmovel %eax, %edx
790 ; X86-BMI1-NEXT: cmovel %edi, %eax
791 ; X86-BMI1-NEXT: andl (%esi), %eax
792 ; X86-BMI1-NEXT: andl 4(%esi), %edx
793 ; X86-BMI1-NEXT: popl %esi
794 ; X86-BMI1-NEXT: popl %edi
795 ; X86-BMI1-NEXT: retl
797 ; X86-BMI2-LABEL: clear_highbits64_c3_load_indexzext:
799 ; X86-BMI2-NEXT: pushl %ebx
800 ; X86-BMI2-NEXT: pushl %esi
801 ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
802 ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
803 ; X86-BMI2-NEXT: movl $-1, %eax
804 ; X86-BMI2-NEXT: shrxl %ebx, %eax, %esi
805 ; X86-BMI2-NEXT: xorl %edx, %edx
806 ; X86-BMI2-NEXT: testb $32, %bl
807 ; X86-BMI2-NEXT: cmovel %esi, %edx
808 ; X86-BMI2-NEXT: cmovnel %esi, %eax
809 ; X86-BMI2-NEXT: andl (%ecx), %eax
810 ; X86-BMI2-NEXT: andl 4(%ecx), %edx
811 ; X86-BMI2-NEXT: popl %esi
812 ; X86-BMI2-NEXT: popl %ebx
813 ; X86-BMI2-NEXT: retl
815 ; X64-NOBMI2-LABEL: clear_highbits64_c3_load_indexzext:
816 ; X64-NOBMI2: # %bb.0:
817 ; X64-NOBMI2-NEXT: movl %esi, %ecx
818 ; X64-NOBMI2-NEXT: movq (%rdi), %rax
819 ; X64-NOBMI2-NEXT: shlq %cl, %rax
820 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
821 ; X64-NOBMI2-NEXT: shrq %cl, %rax
822 ; X64-NOBMI2-NEXT: retq
824 ; X64-BMI2-LABEL: clear_highbits64_c3_load_indexzext:
826 ; X64-BMI2-NEXT: movl $64, %eax
827 ; X64-BMI2-NEXT: subl %esi, %eax
828 ; X64-BMI2-NEXT: bzhiq %rax, (%rdi), %rax
829 ; X64-BMI2-NEXT: retq
830 %val = load i64, ptr %w
831 %sh_prom = zext i8 %numhighbits to i64
832 %mask = lshr i64 -1, %sh_prom
833 %masked = and i64 %mask, %val
837 define i64 @clear_highbits64_c4_commutative(i64 %val, i64 %numhighbits) nounwind {
838 ; X86-BASELINE-LABEL: clear_highbits64_c4_commutative:
839 ; X86-BASELINE: # %bb.0:
840 ; X86-BASELINE-NEXT: pushl %esi
841 ; X86-BASELINE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
842 ; X86-BASELINE-NEXT: movl $-1, %eax
843 ; X86-BASELINE-NEXT: movl $-1, %esi
844 ; X86-BASELINE-NEXT: shrl %cl, %esi
845 ; X86-BASELINE-NEXT: xorl %edx, %edx
846 ; X86-BASELINE-NEXT: testb $32, %cl
847 ; X86-BASELINE-NEXT: jne .LBB17_1
848 ; X86-BASELINE-NEXT: # %bb.2:
849 ; X86-BASELINE-NEXT: movl %esi, %edx
850 ; X86-BASELINE-NEXT: jmp .LBB17_3
851 ; X86-BASELINE-NEXT: .LBB17_1:
852 ; X86-BASELINE-NEXT: movl %esi, %eax
853 ; X86-BASELINE-NEXT: .LBB17_3:
854 ; X86-BASELINE-NEXT: andl {{[0-9]+}}(%esp), %eax
855 ; X86-BASELINE-NEXT: andl {{[0-9]+}}(%esp), %edx
856 ; X86-BASELINE-NEXT: popl %esi
857 ; X86-BASELINE-NEXT: retl
859 ; X86-BMI1-LABEL: clear_highbits64_c4_commutative:
861 ; X86-BMI1-NEXT: pushl %esi
862 ; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
863 ; X86-BMI1-NEXT: movl $-1, %esi
864 ; X86-BMI1-NEXT: movl $-1, %eax
865 ; X86-BMI1-NEXT: shrl %cl, %eax
866 ; X86-BMI1-NEXT: xorl %edx, %edx
867 ; X86-BMI1-NEXT: testb $32, %cl
868 ; X86-BMI1-NEXT: cmovel %eax, %edx
869 ; X86-BMI1-NEXT: cmovel %esi, %eax
870 ; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax
871 ; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx
872 ; X86-BMI1-NEXT: popl %esi
873 ; X86-BMI1-NEXT: retl
875 ; X86-BMI2-LABEL: clear_highbits64_c4_commutative:
877 ; X86-BMI2-NEXT: pushl %esi
878 ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
879 ; X86-BMI2-NEXT: movl $-1, %eax
880 ; X86-BMI2-NEXT: shrxl %ecx, %eax, %esi
881 ; X86-BMI2-NEXT: xorl %edx, %edx
882 ; X86-BMI2-NEXT: testb $32, %cl
883 ; X86-BMI2-NEXT: cmovel %esi, %edx
884 ; X86-BMI2-NEXT: cmovnel %esi, %eax
885 ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
886 ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
887 ; X86-BMI2-NEXT: popl %esi
888 ; X86-BMI2-NEXT: retl
890 ; X64-NOBMI2-LABEL: clear_highbits64_c4_commutative:
891 ; X64-NOBMI2: # %bb.0:
892 ; X64-NOBMI2-NEXT: movq %rsi, %rcx
893 ; X64-NOBMI2-NEXT: movq %rdi, %rax
894 ; X64-NOBMI2-NEXT: shlq %cl, %rax
895 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx
896 ; X64-NOBMI2-NEXT: shrq %cl, %rax
897 ; X64-NOBMI2-NEXT: retq
899 ; X64-BMI2-LABEL: clear_highbits64_c4_commutative:
901 ; X64-BMI2-NEXT: movl $64, %eax
902 ; X64-BMI2-NEXT: subl %esi, %eax
903 ; X64-BMI2-NEXT: bzhiq %rax, %rdi, %rax
904 ; X64-BMI2-NEXT: retq
905 %mask = lshr i64 -1, %numhighbits
906 %masked = and i64 %val, %mask ; swapped order
910 ; ---------------------------------------------------------------------------- ;
912 ; ---------------------------------------------------------------------------- ;
914 define i32 @oneuse32_c(i32 %val, i32 %numhighbits, ptr %escape) nounwind {
915 ; X86-NOBMI2-LABEL: oneuse32_c:
916 ; X86-NOBMI2: # %bb.0:
917 ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
918 ; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
919 ; X86-NOBMI2-NEXT: movl $-1, %eax
920 ; X86-NOBMI2-NEXT: shrl %cl, %eax
921 ; X86-NOBMI2-NEXT: movl %eax, (%edx)
922 ; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
923 ; X86-NOBMI2-NEXT: retl
925 ; X86-BMI2-LABEL: oneuse32_c:
927 ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
928 ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
929 ; X86-BMI2-NEXT: movl $-1, %edx
930 ; X86-BMI2-NEXT: shrxl %eax, %edx, %eax
931 ; X86-BMI2-NEXT: movl %eax, (%ecx)
932 ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
933 ; X86-BMI2-NEXT: retl
935 ; X64-NOBMI2-LABEL: oneuse32_c:
936 ; X64-NOBMI2: # %bb.0:
937 ; X64-NOBMI2-NEXT: movl %esi, %ecx
938 ; X64-NOBMI2-NEXT: movl $-1, %eax
939 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
940 ; X64-NOBMI2-NEXT: shrl %cl, %eax
941 ; X64-NOBMI2-NEXT: movl %eax, (%rdx)
942 ; X64-NOBMI2-NEXT: andl %edi, %eax
943 ; X64-NOBMI2-NEXT: retq
945 ; X64-BMI2-LABEL: oneuse32_c:
947 ; X64-BMI2-NEXT: movl $-1, %eax
948 ; X64-BMI2-NEXT: shrxl %esi, %eax, %eax
949 ; X64-BMI2-NEXT: movl %eax, (%rdx)
950 ; X64-BMI2-NEXT: andl %edi, %eax
951 ; X64-BMI2-NEXT: retq
952 %mask = lshr i32 -1, %numhighbits
953 store i32 %mask, ptr %escape
954 %masked = and i32 %mask, %val
958 define i64 @oneuse64_c(i64 %val, i64 %numhighbits, ptr %escape) nounwind {
959 ; X86-BASELINE-LABEL: oneuse64_c:
960 ; X86-BASELINE: # %bb.0:
961 ; X86-BASELINE-NEXT: pushl %esi
962 ; X86-BASELINE-NEXT: movl {{[0-9]+}}(%esp), %esi
963 ; X86-BASELINE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
964 ; X86-BASELINE-NEXT: movl $-1, %eax
965 ; X86-BASELINE-NEXT: movl $-1, %edx
966 ; X86-BASELINE-NEXT: shrl %cl, %edx
967 ; X86-BASELINE-NEXT: testb $32, %cl
968 ; X86-BASELINE-NEXT: je .LBB19_2
969 ; X86-BASELINE-NEXT: # %bb.1:
970 ; X86-BASELINE-NEXT: movl %edx, %eax
971 ; X86-BASELINE-NEXT: xorl %edx, %edx
972 ; X86-BASELINE-NEXT: .LBB19_2:
973 ; X86-BASELINE-NEXT: movl %edx, 4(%esi)
974 ; X86-BASELINE-NEXT: movl %eax, (%esi)
975 ; X86-BASELINE-NEXT: andl {{[0-9]+}}(%esp), %eax
976 ; X86-BASELINE-NEXT: andl {{[0-9]+}}(%esp), %edx
977 ; X86-BASELINE-NEXT: popl %esi
978 ; X86-BASELINE-NEXT: retl
980 ; X86-BMI1-LABEL: oneuse64_c:
982 ; X86-BMI1-NEXT: pushl %edi
983 ; X86-BMI1-NEXT: pushl %esi
984 ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
985 ; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
986 ; X86-BMI1-NEXT: movl $-1, %eax
987 ; X86-BMI1-NEXT: movl $-1, %edi
988 ; X86-BMI1-NEXT: shrl %cl, %edi
989 ; X86-BMI1-NEXT: xorl %edx, %edx
990 ; X86-BMI1-NEXT: testb $32, %cl
991 ; X86-BMI1-NEXT: cmovnel %edi, %eax
992 ; X86-BMI1-NEXT: cmovel %edi, %edx
993 ; X86-BMI1-NEXT: movl %edx, 4(%esi)
994 ; X86-BMI1-NEXT: movl %eax, (%esi)
995 ; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax
996 ; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx
997 ; X86-BMI1-NEXT: popl %esi
998 ; X86-BMI1-NEXT: popl %edi
999 ; X86-BMI1-NEXT: retl
1001 ; X86-BMI2-LABEL: oneuse64_c:
1002 ; X86-BMI2: # %bb.0:
1003 ; X86-BMI2-NEXT: pushl %ebx
1004 ; X86-BMI2-NEXT: pushl %esi
1005 ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
1006 ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
1007 ; X86-BMI2-NEXT: movl $-1, %eax
1008 ; X86-BMI2-NEXT: shrxl %ebx, %eax, %esi
1009 ; X86-BMI2-NEXT: xorl %edx, %edx
1010 ; X86-BMI2-NEXT: testb $32, %bl
1011 ; X86-BMI2-NEXT: cmovnel %esi, %eax
1012 ; X86-BMI2-NEXT: cmovel %esi, %edx
1013 ; X86-BMI2-NEXT: movl %edx, 4(%ecx)
1014 ; X86-BMI2-NEXT: movl %eax, (%ecx)
1015 ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
1016 ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
1017 ; X86-BMI2-NEXT: popl %esi
1018 ; X86-BMI2-NEXT: popl %ebx
1019 ; X86-BMI2-NEXT: retl
1021 ; X64-NOBMI2-LABEL: oneuse64_c:
1022 ; X64-NOBMI2: # %bb.0:
1023 ; X64-NOBMI2-NEXT: movq %rsi, %rcx
1024 ; X64-NOBMI2-NEXT: movq $-1, %rax
1025 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx
1026 ; X64-NOBMI2-NEXT: shrq %cl, %rax
1027 ; X64-NOBMI2-NEXT: movq %rax, (%rdx)
1028 ; X64-NOBMI2-NEXT: andq %rdi, %rax
1029 ; X64-NOBMI2-NEXT: retq
1031 ; X64-BMI2-LABEL: oneuse64_c:
1032 ; X64-BMI2: # %bb.0:
1033 ; X64-BMI2-NEXT: movq $-1, %rax
1034 ; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax
1035 ; X64-BMI2-NEXT: movq %rax, (%rdx)
1036 ; X64-BMI2-NEXT: andq %rdi, %rax
1037 ; X64-BMI2-NEXT: retq
1038 %mask = lshr i64 -1, %numhighbits
1039 store i64 %mask, ptr %escape
1040 %masked = and i64 %mask, %val
1044 define i32 @oneuse32_d(i32 %val, i32 %numhighbits, ptr %escape) nounwind {
1045 ; X86-NOBMI2-LABEL: oneuse32_d:
1046 ; X86-NOBMI2: # %bb.0:
1047 ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
1048 ; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
1049 ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
1050 ; X86-NOBMI2-NEXT: shll %cl, %eax
1051 ; X86-NOBMI2-NEXT: movl %eax, (%edx)
1052 ; X86-NOBMI2-NEXT: shrl %cl, %eax
1053 ; X86-NOBMI2-NEXT: retl
1055 ; X86-BMI2-LABEL: oneuse32_d:
1056 ; X86-BMI2: # %bb.0:
1057 ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
1058 ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
1059 ; X86-BMI2-NEXT: shlxl %ecx, {{[0-9]+}}(%esp), %edx
1060 ; X86-BMI2-NEXT: movl %edx, (%eax)
1061 ; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax
1062 ; X86-BMI2-NEXT: retl
1064 ; X64-NOBMI2-LABEL: oneuse32_d:
1065 ; X64-NOBMI2: # %bb.0:
1066 ; X64-NOBMI2-NEXT: movl %esi, %ecx
1067 ; X64-NOBMI2-NEXT: movl %edi, %eax
1068 ; X64-NOBMI2-NEXT: shll %cl, %eax
1069 ; X64-NOBMI2-NEXT: movl %eax, (%rdx)
1070 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
1071 ; X64-NOBMI2-NEXT: shrl %cl, %eax
1072 ; X64-NOBMI2-NEXT: retq
1074 ; X64-BMI2-LABEL: oneuse32_d:
1075 ; X64-BMI2: # %bb.0:
1076 ; X64-BMI2-NEXT: shlxl %esi, %edi, %eax
1077 ; X64-BMI2-NEXT: movl %eax, (%rdx)
1078 ; X64-BMI2-NEXT: shrxl %esi, %eax, %eax
1079 ; X64-BMI2-NEXT: retq
1080 %sh1 = shl i32 %val, %numhighbits
1081 store i32 %sh1, ptr %escape
1082 %masked = lshr i32 %sh1, %numhighbits
1086 define i64 @oneusei64_d(i64 %val, i64 %numhighbits, ptr %escape) nounwind {
1087 ; X86-BASELINE-LABEL: oneusei64_d:
1088 ; X86-BASELINE: # %bb.0:
1089 ; X86-BASELINE-NEXT: pushl %ebx
1090 ; X86-BASELINE-NEXT: pushl %edi
1091 ; X86-BASELINE-NEXT: pushl %esi
1092 ; X86-BASELINE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
1093 ; X86-BASELINE-NEXT: movl {{[0-9]+}}(%esp), %edx
1094 ; X86-BASELINE-NEXT: movl {{[0-9]+}}(%esp), %eax
1095 ; X86-BASELINE-NEXT: movl %edx, %edi
1096 ; X86-BASELINE-NEXT: shll %cl, %edi
1097 ; X86-BASELINE-NEXT: shldl %cl, %edx, %eax
1098 ; X86-BASELINE-NEXT: testb $32, %cl
1099 ; X86-BASELINE-NEXT: movl %edi, %esi
1100 ; X86-BASELINE-NEXT: jne .LBB21_2
1101 ; X86-BASELINE-NEXT: # %bb.1:
1102 ; X86-BASELINE-NEXT: movl %eax, %esi
1103 ; X86-BASELINE-NEXT: .LBB21_2:
1104 ; X86-BASELINE-NEXT: movl %esi, %eax
1105 ; X86-BASELINE-NEXT: shrl %cl, %eax
1106 ; X86-BASELINE-NEXT: xorl %ebx, %ebx
1107 ; X86-BASELINE-NEXT: testb $32, %cl
1108 ; X86-BASELINE-NEXT: movl $0, %edx
1109 ; X86-BASELINE-NEXT: jne .LBB21_4
1110 ; X86-BASELINE-NEXT: # %bb.3:
1111 ; X86-BASELINE-NEXT: movl %edi, %ebx
1112 ; X86-BASELINE-NEXT: movl %eax, %edx
1113 ; X86-BASELINE-NEXT: .LBB21_4:
1114 ; X86-BASELINE-NEXT: movl %ebx, %edi
1115 ; X86-BASELINE-NEXT: shrdl %cl, %esi, %edi
1116 ; X86-BASELINE-NEXT: testb $32, %cl
1117 ; X86-BASELINE-NEXT: movl {{[0-9]+}}(%esp), %ecx
1118 ; X86-BASELINE-NEXT: movl %ebx, (%ecx)
1119 ; X86-BASELINE-NEXT: movl %esi, 4(%ecx)
1120 ; X86-BASELINE-NEXT: jne .LBB21_6
1121 ; X86-BASELINE-NEXT: # %bb.5:
1122 ; X86-BASELINE-NEXT: movl %edi, %eax
1123 ; X86-BASELINE-NEXT: .LBB21_6:
1124 ; X86-BASELINE-NEXT: popl %esi
1125 ; X86-BASELINE-NEXT: popl %edi
1126 ; X86-BASELINE-NEXT: popl %ebx
1127 ; X86-BASELINE-NEXT: retl
1129 ; X86-BMI1-LABEL: oneusei64_d:
1130 ; X86-BMI1: # %bb.0:
1131 ; X86-BMI1-NEXT: pushl %ebx
1132 ; X86-BMI1-NEXT: pushl %edi
1133 ; X86-BMI1-NEXT: pushl %esi
1134 ; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
1135 ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx
1136 ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
1137 ; X86-BMI1-NEXT: movl %edx, %eax
1138 ; X86-BMI1-NEXT: shll %cl, %eax
1139 ; X86-BMI1-NEXT: shldl %cl, %edx, %esi
1140 ; X86-BMI1-NEXT: testb $32, %cl
1141 ; X86-BMI1-NEXT: cmovnel %eax, %esi
1142 ; X86-BMI1-NEXT: movl %esi, %edi
1143 ; X86-BMI1-NEXT: shrl %cl, %edi
1144 ; X86-BMI1-NEXT: xorl %edx, %edx
1145 ; X86-BMI1-NEXT: testb $32, %cl
1146 ; X86-BMI1-NEXT: cmovnel %edx, %eax
1147 ; X86-BMI1-NEXT: cmovel %edi, %edx
1148 ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %ebx
1149 ; X86-BMI1-NEXT: movl %eax, (%ebx)
1150 ; X86-BMI1-NEXT: shrdl %cl, %esi, %eax
1151 ; X86-BMI1-NEXT: testb $32, %cl
1152 ; X86-BMI1-NEXT: movl %esi, 4(%ebx)
1153 ; X86-BMI1-NEXT: cmovnel %edi, %eax
1154 ; X86-BMI1-NEXT: popl %esi
1155 ; X86-BMI1-NEXT: popl %edi
1156 ; X86-BMI1-NEXT: popl %ebx
1157 ; X86-BMI1-NEXT: retl
1159 ; X86-BMI2-LABEL: oneusei64_d:
1160 ; X86-BMI2: # %bb.0:
1161 ; X86-BMI2-NEXT: pushl %ebx
1162 ; X86-BMI2-NEXT: pushl %edi
1163 ; X86-BMI2-NEXT: pushl %esi
1164 ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
1165 ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
1166 ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
1167 ; X86-BMI2-NEXT: shldl %cl, %eax, %esi
1168 ; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax
1169 ; X86-BMI2-NEXT: xorl %edx, %edx
1170 ; X86-BMI2-NEXT: testb $32, %cl
1171 ; X86-BMI2-NEXT: cmovnel %eax, %esi
1172 ; X86-BMI2-NEXT: cmovnel %edx, %eax
1173 ; X86-BMI2-NEXT: shrxl %ecx, %esi, %edi
1174 ; X86-BMI2-NEXT: cmovel %edi, %edx
1175 ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ebx
1176 ; X86-BMI2-NEXT: movl %eax, (%ebx)
1177 ; X86-BMI2-NEXT: shrdl %cl, %esi, %eax
1178 ; X86-BMI2-NEXT: testb $32, %cl
1179 ; X86-BMI2-NEXT: movl %esi, 4(%ebx)
1180 ; X86-BMI2-NEXT: cmovnel %edi, %eax
1181 ; X86-BMI2-NEXT: popl %esi
1182 ; X86-BMI2-NEXT: popl %edi
1183 ; X86-BMI2-NEXT: popl %ebx
1184 ; X86-BMI2-NEXT: retl
1186 ; X64-NOBMI2-LABEL: oneusei64_d:
1187 ; X64-NOBMI2: # %bb.0:
1188 ; X64-NOBMI2-NEXT: movq %rsi, %rcx
1189 ; X64-NOBMI2-NEXT: movq %rdi, %rax
1190 ; X64-NOBMI2-NEXT: shlq %cl, %rax
1191 ; X64-NOBMI2-NEXT: movq %rax, (%rdx)
1192 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx
1193 ; X64-NOBMI2-NEXT: shrq %cl, %rax
1194 ; X64-NOBMI2-NEXT: retq
1196 ; X64-BMI2-LABEL: oneusei64_d:
1197 ; X64-BMI2: # %bb.0:
1198 ; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax
1199 ; X64-BMI2-NEXT: movq %rax, (%rdx)
1200 ; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax
1201 ; X64-BMI2-NEXT: retq
1202 %sh1 = shl i64 %val, %numhighbits
1203 store i64 %sh1, ptr %escape
1204 %masked = lshr i64 %sh1, %numhighbits
1208 ; ---------------------------------------------------------------------------- ;
1211 ; Variation of pattern
1212 ; c) x & (-1 >> (C - y))
1213 ; but with C != bitwidth(x)
1214 ; ---------------------------------------------------------------------------- ;
1216 define i32 @clear_highbits32_16(i32 %val, i32 %numlowbits) nounwind {
1217 ; X86-NOBMI2-LABEL: clear_highbits32_16:
1218 ; X86-NOBMI2: # %bb.0:
1219 ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
1220 ; X86-NOBMI2-NEXT: movb $16, %cl
1221 ; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
1222 ; X86-NOBMI2-NEXT: shll %cl, %eax
1223 ; X86-NOBMI2-NEXT: shrl %cl, %eax
1224 ; X86-NOBMI2-NEXT: retl
1226 ; X86-BMI2-LABEL: clear_highbits32_16:
1227 ; X86-BMI2: # %bb.0:
1228 ; X86-BMI2-NEXT: movb $16, %al
1229 ; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
1230 ; X86-BMI2-NEXT: movl $32, %ecx
1231 ; X86-BMI2-NEXT: subl %eax, %ecx
1232 ; X86-BMI2-NEXT: bzhil %ecx, {{[0-9]+}}(%esp), %eax
1233 ; X86-BMI2-NEXT: retl
1235 ; X64-NOBMI2-LABEL: clear_highbits32_16:
1236 ; X64-NOBMI2: # %bb.0:
1237 ; X64-NOBMI2-NEXT: movl %edi, %eax
1238 ; X64-NOBMI2-NEXT: movb $16, %cl
1239 ; X64-NOBMI2-NEXT: subb %sil, %cl
1240 ; X64-NOBMI2-NEXT: shll %cl, %eax
1241 ; X64-NOBMI2-NEXT: shrl %cl, %eax
1242 ; X64-NOBMI2-NEXT: retq
1244 ; X64-BMI2-LABEL: clear_highbits32_16:
1245 ; X64-BMI2: # %bb.0:
1246 ; X64-BMI2-NEXT: movb $16, %al
1247 ; X64-BMI2-NEXT: subb %sil, %al
1248 ; X64-BMI2-NEXT: movl $32, %ecx
1249 ; X64-BMI2-NEXT: subl %eax, %ecx
1250 ; X64-BMI2-NEXT: bzhil %ecx, %edi, %eax
1251 ; X64-BMI2-NEXT: retq
1252 %numhighbits = sub i32 16, %numlowbits
1253 %mask = lshr i32 -1, %numhighbits
1254 %masked = and i32 %mask, %val
1257 define i32 @clear_highbits32_48(i32 %val, i32 %numlowbits) nounwind {
1258 ; X86-NOBMI2-LABEL: clear_highbits32_48:
1259 ; X86-NOBMI2: # %bb.0:
1260 ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
1261 ; X86-NOBMI2-NEXT: movb $48, %cl
1262 ; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
1263 ; X86-NOBMI2-NEXT: shll %cl, %eax
1264 ; X86-NOBMI2-NEXT: shrl %cl, %eax
1265 ; X86-NOBMI2-NEXT: retl
1267 ; X86-BMI2-LABEL: clear_highbits32_48:
1268 ; X86-BMI2: # %bb.0:
1269 ; X86-BMI2-NEXT: movb $48, %al
1270 ; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
1271 ; X86-BMI2-NEXT: movl $32, %ecx
1272 ; X86-BMI2-NEXT: subl %eax, %ecx
1273 ; X86-BMI2-NEXT: bzhil %ecx, {{[0-9]+}}(%esp), %eax
1274 ; X86-BMI2-NEXT: retl
1276 ; X64-NOBMI2-LABEL: clear_highbits32_48:
1277 ; X64-NOBMI2: # %bb.0:
1278 ; X64-NOBMI2-NEXT: movl %edi, %eax
1279 ; X64-NOBMI2-NEXT: movb $48, %cl
1280 ; X64-NOBMI2-NEXT: subb %sil, %cl
1281 ; X64-NOBMI2-NEXT: shll %cl, %eax
1282 ; X64-NOBMI2-NEXT: shrl %cl, %eax
1283 ; X64-NOBMI2-NEXT: retq
1285 ; X64-BMI2-LABEL: clear_highbits32_48:
1286 ; X64-BMI2: # %bb.0:
1287 ; X64-BMI2-NEXT: movb $48, %al
1288 ; X64-BMI2-NEXT: subb %sil, %al
1289 ; X64-BMI2-NEXT: movl $32, %ecx
1290 ; X64-BMI2-NEXT: subl %eax, %ecx
1291 ; X64-BMI2-NEXT: bzhil %ecx, %edi, %eax
1292 ; X64-BMI2-NEXT: retq
1293 %numhighbits = sub i32 48, %numlowbits
1294 %mask = lshr i32 -1, %numhighbits
1295 %masked = and i32 %mask, %val
1299 define i32 @clear_highbits32_16_extrause(i32 %val, i32 %numlowbits, ptr %escape) nounwind {
1300 ; X86-NOBMI2-LABEL: clear_highbits32_16_extrause:
1301 ; X86-NOBMI2: # %bb.0:
1302 ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
1303 ; X86-NOBMI2-NEXT: movb $16, %cl
1304 ; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
1305 ; X86-NOBMI2-NEXT: movl $-1, %eax
1306 ; X86-NOBMI2-NEXT: shrl %cl, %eax
1307 ; X86-NOBMI2-NEXT: movl %eax, (%edx)
1308 ; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
1309 ; X86-NOBMI2-NEXT: retl
1311 ; X86-BMI2-LABEL: clear_highbits32_16_extrause:
1312 ; X86-BMI2: # %bb.0:
1313 ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
1314 ; X86-BMI2-NEXT: movb $16, %al
1315 ; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
1316 ; X86-BMI2-NEXT: movl $-1, %edx
1317 ; X86-BMI2-NEXT: shrxl %eax, %edx, %eax
1318 ; X86-BMI2-NEXT: movl %eax, (%ecx)
1319 ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
1320 ; X86-BMI2-NEXT: retl
1322 ; X64-NOBMI2-LABEL: clear_highbits32_16_extrause:
1323 ; X64-NOBMI2: # %bb.0:
1324 ; X64-NOBMI2-NEXT: movb $16, %cl
1325 ; X64-NOBMI2-NEXT: subb %sil, %cl
1326 ; X64-NOBMI2-NEXT: movl $-1, %eax
1327 ; X64-NOBMI2-NEXT: shrl %cl, %eax
1328 ; X64-NOBMI2-NEXT: movl %eax, (%rdx)
1329 ; X64-NOBMI2-NEXT: andl %edi, %eax
1330 ; X64-NOBMI2-NEXT: retq
1332 ; X64-BMI2-LABEL: clear_highbits32_16_extrause:
1333 ; X64-BMI2: # %bb.0:
1334 ; X64-BMI2-NEXT: movb $16, %al
1335 ; X64-BMI2-NEXT: subb %sil, %al
1336 ; X64-BMI2-NEXT: movl $-1, %ecx
1337 ; X64-BMI2-NEXT: shrxl %eax, %ecx, %eax
1338 ; X64-BMI2-NEXT: movl %eax, (%rdx)
1339 ; X64-BMI2-NEXT: andl %edi, %eax
1340 ; X64-BMI2-NEXT: retq
1341 %numhighbits = sub i32 16, %numlowbits
1342 %mask = lshr i32 -1, %numhighbits
1343 store i32 %mask, ptr %escape
1344 %masked = and i32 %mask, %val
1347 define i32 @clear_highbits32_48_extrause(i32 %val, i32 %numlowbits, ptr %escape) nounwind {
1348 ; X86-NOBMI2-LABEL: clear_highbits32_48_extrause:
1349 ; X86-NOBMI2: # %bb.0:
1350 ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
1351 ; X86-NOBMI2-NEXT: movb $48, %cl
1352 ; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
1353 ; X86-NOBMI2-NEXT: movl $-1, %eax
1354 ; X86-NOBMI2-NEXT: shrl %cl, %eax
1355 ; X86-NOBMI2-NEXT: movl %eax, (%edx)
1356 ; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
1357 ; X86-NOBMI2-NEXT: retl
1359 ; X86-BMI2-LABEL: clear_highbits32_48_extrause:
1360 ; X86-BMI2: # %bb.0:
1361 ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
1362 ; X86-BMI2-NEXT: movb $48, %al
1363 ; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
1364 ; X86-BMI2-NEXT: movl $-1, %edx
1365 ; X86-BMI2-NEXT: shrxl %eax, %edx, %eax
1366 ; X86-BMI2-NEXT: movl %eax, (%ecx)
1367 ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
1368 ; X86-BMI2-NEXT: retl
1370 ; X64-NOBMI2-LABEL: clear_highbits32_48_extrause:
1371 ; X64-NOBMI2: # %bb.0:
1372 ; X64-NOBMI2-NEXT: movb $48, %cl
1373 ; X64-NOBMI2-NEXT: subb %sil, %cl
1374 ; X64-NOBMI2-NEXT: movl $-1, %eax
1375 ; X64-NOBMI2-NEXT: shrl %cl, %eax
1376 ; X64-NOBMI2-NEXT: movl %eax, (%rdx)
1377 ; X64-NOBMI2-NEXT: andl %edi, %eax
1378 ; X64-NOBMI2-NEXT: retq
1380 ; X64-BMI2-LABEL: clear_highbits32_48_extrause:
1381 ; X64-BMI2: # %bb.0:
1382 ; X64-BMI2-NEXT: movb $48, %al
1383 ; X64-BMI2-NEXT: subb %sil, %al
1384 ; X64-BMI2-NEXT: movl $-1, %ecx
1385 ; X64-BMI2-NEXT: shrxl %eax, %ecx, %eax
1386 ; X64-BMI2-NEXT: movl %eax, (%rdx)
1387 ; X64-BMI2-NEXT: andl %edi, %eax
1388 ; X64-BMI2-NEXT: retq
1389 %numhighbits = sub i32 48, %numlowbits
1390 %mask = lshr i32 -1, %numhighbits
1391 store i32 %mask, ptr %escape
1392 %masked = and i32 %mask, %val