1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare %s | FileCheck -check-prefix=SI %s
3 ; RUN: opt -S -mtriple=amdgcn-- -mcpu=tonga -amdgpu-codegenprepare %s | FileCheck -check-prefix=VI %s
5 define amdgpu_kernel void @add_i3(i3 %a, i3 %b) {
7 ; SI-NEXT: [[R:%.*]] = add i3 [[A:%.*]], [[B:%.*]]
8 ; SI-NEXT: store volatile i3 [[R]], ptr addrspace(1) undef, align 1
12 ; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
13 ; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
14 ; VI-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]]
15 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
16 ; VI-NEXT: store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
20 store volatile i3 %r, ptr addrspace(1) undef
24 define amdgpu_kernel void @add_nsw_i3(i3 %a, i3 %b) {
25 ; SI-LABEL: @add_nsw_i3(
26 ; SI-NEXT: [[R:%.*]] = add nsw i3 [[A:%.*]], [[B:%.*]]
27 ; SI-NEXT: store volatile i3 [[R]], ptr addrspace(1) undef, align 1
30 ; VI-LABEL: @add_nsw_i3(
31 ; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
32 ; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
33 ; VI-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]]
34 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
35 ; VI-NEXT: store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
38 %r = add nsw i3 %a, %b
39 store volatile i3 %r, ptr addrspace(1) undef
43 define amdgpu_kernel void @add_nuw_i3(i3 %a, i3 %b) {
44 ; SI-LABEL: @add_nuw_i3(
45 ; SI-NEXT: [[R:%.*]] = add nuw i3 [[A:%.*]], [[B:%.*]]
46 ; SI-NEXT: store volatile i3 [[R]], ptr addrspace(1) undef, align 1
49 ; VI-LABEL: @add_nuw_i3(
50 ; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
51 ; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
52 ; VI-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]]
53 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
54 ; VI-NEXT: store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
57 %r = add nuw i3 %a, %b
58 store volatile i3 %r, ptr addrspace(1) undef
62 define amdgpu_kernel void @add_nuw_nsw_i3(i3 %a, i3 %b) {
63 ; SI-LABEL: @add_nuw_nsw_i3(
64 ; SI-NEXT: [[R:%.*]] = add nuw nsw i3 [[A:%.*]], [[B:%.*]]
65 ; SI-NEXT: store volatile i3 [[R]], ptr addrspace(1) undef, align 1
68 ; VI-LABEL: @add_nuw_nsw_i3(
69 ; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
70 ; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
71 ; VI-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]]
72 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
73 ; VI-NEXT: store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
76 %r = add nuw nsw i3 %a, %b
77 store volatile i3 %r, ptr addrspace(1) undef
81 define amdgpu_kernel void @sub_i3(i3 %a, i3 %b) {
83 ; SI-NEXT: [[R:%.*]] = sub i3 [[A:%.*]], [[B:%.*]]
84 ; SI-NEXT: store volatile i3 [[R]], ptr addrspace(1) undef, align 1
88 ; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
89 ; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
90 ; VI-NEXT: [[TMP3:%.*]] = sub nsw i32 [[TMP1]], [[TMP2]]
91 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
92 ; VI-NEXT: store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
96 store volatile i3 %r, ptr addrspace(1) undef
100 define amdgpu_kernel void @sub_nsw_i3(i3 %a, i3 %b) {
101 ; SI-LABEL: @sub_nsw_i3(
102 ; SI-NEXT: [[R:%.*]] = sub nsw i3 [[A:%.*]], [[B:%.*]]
103 ; SI-NEXT: store volatile i3 [[R]], ptr addrspace(1) undef, align 1
106 ; VI-LABEL: @sub_nsw_i3(
107 ; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
108 ; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
109 ; VI-NEXT: [[TMP3:%.*]] = sub nsw i32 [[TMP1]], [[TMP2]]
110 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
111 ; VI-NEXT: store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
114 %r = sub nsw i3 %a, %b
115 store volatile i3 %r, ptr addrspace(1) undef
119 define amdgpu_kernel void @sub_nuw_i3(i3 %a, i3 %b) {
120 ; SI-LABEL: @sub_nuw_i3(
121 ; SI-NEXT: [[R:%.*]] = sub nuw i3 [[A:%.*]], [[B:%.*]]
122 ; SI-NEXT: store volatile i3 [[R]], ptr addrspace(1) undef, align 1
125 ; VI-LABEL: @sub_nuw_i3(
126 ; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
127 ; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
128 ; VI-NEXT: [[TMP3:%.*]] = sub nuw nsw i32 [[TMP1]], [[TMP2]]
129 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
130 ; VI-NEXT: store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
133 %r = sub nuw i3 %a, %b
134 store volatile i3 %r, ptr addrspace(1) undef
138 define amdgpu_kernel void @sub_nuw_nsw_i3(i3 %a, i3 %b) {
139 ; SI-LABEL: @sub_nuw_nsw_i3(
140 ; SI-NEXT: [[R:%.*]] = sub nuw nsw i3 [[A:%.*]], [[B:%.*]]
141 ; SI-NEXT: store volatile i3 [[R]], ptr addrspace(1) undef, align 1
144 ; VI-LABEL: @sub_nuw_nsw_i3(
145 ; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
146 ; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
147 ; VI-NEXT: [[TMP3:%.*]] = sub nuw nsw i32 [[TMP1]], [[TMP2]]
148 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
149 ; VI-NEXT: store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
152 %r = sub nuw nsw i3 %a, %b
153 store volatile i3 %r, ptr addrspace(1) undef
157 define amdgpu_kernel void @mul_i3(i3 %a, i3 %b) {
159 ; SI-NEXT: [[R:%.*]] = mul i3 [[A:%.*]], [[B:%.*]]
160 ; SI-NEXT: store volatile i3 [[R]], ptr addrspace(1) undef, align 1
164 ; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
165 ; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
166 ; VI-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
167 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
168 ; VI-NEXT: store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
172 store volatile i3 %r, ptr addrspace(1) undef
176 define amdgpu_kernel void @mul_nsw_i3(i3 %a, i3 %b) {
177 ; SI-LABEL: @mul_nsw_i3(
178 ; SI-NEXT: [[R:%.*]] = mul nsw i3 [[A:%.*]], [[B:%.*]]
179 ; SI-NEXT: store volatile i3 [[R]], ptr addrspace(1) undef, align 1
182 ; VI-LABEL: @mul_nsw_i3(
183 ; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
184 ; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
185 ; VI-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
186 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
187 ; VI-NEXT: store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
190 %r = mul nsw i3 %a, %b
191 store volatile i3 %r, ptr addrspace(1) undef
195 define amdgpu_kernel void @mul_nuw_i3(i3 %a, i3 %b) {
196 ; SI-LABEL: @mul_nuw_i3(
197 ; SI-NEXT: [[R:%.*]] = mul nuw i3 [[A:%.*]], [[B:%.*]]
198 ; SI-NEXT: store volatile i3 [[R]], ptr addrspace(1) undef, align 1
201 ; VI-LABEL: @mul_nuw_i3(
202 ; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
203 ; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
204 ; VI-NEXT: [[TMP3:%.*]] = mul nuw nsw i32 [[TMP1]], [[TMP2]]
205 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
206 ; VI-NEXT: store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
209 %r = mul nuw i3 %a, %b
210 store volatile i3 %r, ptr addrspace(1) undef
214 define amdgpu_kernel void @mul_nuw_nsw_i3(i3 %a, i3 %b) {
215 ; SI-LABEL: @mul_nuw_nsw_i3(
216 ; SI-NEXT: [[R:%.*]] = mul nuw nsw i3 [[A:%.*]], [[B:%.*]]
217 ; SI-NEXT: store volatile i3 [[R]], ptr addrspace(1) undef, align 1
220 ; VI-LABEL: @mul_nuw_nsw_i3(
221 ; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
222 ; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
223 ; VI-NEXT: [[TMP3:%.*]] = mul nuw nsw i32 [[TMP1]], [[TMP2]]
224 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
225 ; VI-NEXT: store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
228 %r = mul nuw nsw i3 %a, %b
229 store volatile i3 %r, ptr addrspace(1) undef
233 define amdgpu_kernel void @shl_i3(i3 %a, i3 %b) {
235 ; SI-NEXT: [[R:%.*]] = shl i3 [[A:%.*]], [[B:%.*]]
236 ; SI-NEXT: store volatile i3 [[R]], ptr addrspace(1) undef, align 1
240 ; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
241 ; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
242 ; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]]
243 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
244 ; VI-NEXT: store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
248 store volatile i3 %r, ptr addrspace(1) undef
252 define amdgpu_kernel void @shl_nsw_i3(i3 %a, i3 %b) {
253 ; SI-LABEL: @shl_nsw_i3(
254 ; SI-NEXT: [[R:%.*]] = shl nsw i3 [[A:%.*]], [[B:%.*]]
255 ; SI-NEXT: store volatile i3 [[R]], ptr addrspace(1) undef, align 1
258 ; VI-LABEL: @shl_nsw_i3(
259 ; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
260 ; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
261 ; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]]
262 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
263 ; VI-NEXT: store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
266 %r = shl nsw i3 %a, %b
267 store volatile i3 %r, ptr addrspace(1) undef
271 define amdgpu_kernel void @shl_nuw_i3(i3 %a, i3 %b) {
272 ; SI-LABEL: @shl_nuw_i3(
273 ; SI-NEXT: [[R:%.*]] = shl nuw i3 [[A:%.*]], [[B:%.*]]
274 ; SI-NEXT: store volatile i3 [[R]], ptr addrspace(1) undef, align 1
277 ; VI-LABEL: @shl_nuw_i3(
278 ; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
279 ; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
280 ; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]]
281 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
282 ; VI-NEXT: store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
285 %r = shl nuw i3 %a, %b
286 store volatile i3 %r, ptr addrspace(1) undef
290 define amdgpu_kernel void @shl_nuw_nsw_i3(i3 %a, i3 %b) {
291 ; SI-LABEL: @shl_nuw_nsw_i3(
292 ; SI-NEXT: [[R:%.*]] = shl nuw nsw i3 [[A:%.*]], [[B:%.*]]
293 ; SI-NEXT: store volatile i3 [[R]], ptr addrspace(1) undef, align 1
296 ; VI-LABEL: @shl_nuw_nsw_i3(
297 ; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
298 ; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
299 ; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]]
300 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
301 ; VI-NEXT: store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
304 %r = shl nuw nsw i3 %a, %b
305 store volatile i3 %r, ptr addrspace(1) undef
309 define amdgpu_kernel void @lshr_i3(i3 %a, i3 %b) {
310 ; SI-LABEL: @lshr_i3(
311 ; SI-NEXT: [[R:%.*]] = lshr i3 [[A:%.*]], [[B:%.*]]
312 ; SI-NEXT: store volatile i3 [[R]], ptr addrspace(1) undef, align 1
315 ; VI-LABEL: @lshr_i3(
316 ; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
317 ; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
318 ; VI-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP1]], [[TMP2]]
319 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
320 ; VI-NEXT: store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
324 store volatile i3 %r, ptr addrspace(1) undef
328 define amdgpu_kernel void @lshr_exact_i3(i3 %a, i3 %b) {
329 ; SI-LABEL: @lshr_exact_i3(
330 ; SI-NEXT: [[R:%.*]] = lshr exact i3 [[A:%.*]], [[B:%.*]]
331 ; SI-NEXT: store volatile i3 [[R]], ptr addrspace(1) undef, align 1
334 ; VI-LABEL: @lshr_exact_i3(
335 ; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
336 ; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
337 ; VI-NEXT: [[TMP3:%.*]] = lshr exact i32 [[TMP1]], [[TMP2]]
338 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
339 ; VI-NEXT: store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
342 %r = lshr exact i3 %a, %b
343 store volatile i3 %r, ptr addrspace(1) undef
347 define amdgpu_kernel void @ashr_i3(i3 %a, i3 %b) {
348 ; SI-LABEL: @ashr_i3(
349 ; SI-NEXT: [[R:%.*]] = ashr i3 [[A:%.*]], [[B:%.*]]
350 ; SI-NEXT: store volatile i3 [[R]], ptr addrspace(1) undef, align 1
353 ; VI-LABEL: @ashr_i3(
354 ; VI-NEXT: [[TMP1:%.*]] = sext i3 [[A:%.*]] to i32
355 ; VI-NEXT: [[TMP2:%.*]] = sext i3 [[B:%.*]] to i32
356 ; VI-NEXT: [[TMP3:%.*]] = ashr i32 [[TMP1]], [[TMP2]]
357 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
358 ; VI-NEXT: store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
362 store volatile i3 %r, ptr addrspace(1) undef
366 define amdgpu_kernel void @ashr_exact_i3(i3 %a, i3 %b) {
367 ; SI-LABEL: @ashr_exact_i3(
368 ; SI-NEXT: [[R:%.*]] = ashr exact i3 [[A:%.*]], [[B:%.*]]
369 ; SI-NEXT: store volatile i3 [[R]], ptr addrspace(1) undef, align 1
372 ; VI-LABEL: @ashr_exact_i3(
373 ; VI-NEXT: [[TMP1:%.*]] = sext i3 [[A:%.*]] to i32
374 ; VI-NEXT: [[TMP2:%.*]] = sext i3 [[B:%.*]] to i32
375 ; VI-NEXT: [[TMP3:%.*]] = ashr exact i32 [[TMP1]], [[TMP2]]
376 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
377 ; VI-NEXT: store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
380 %r = ashr exact i3 %a, %b
381 store volatile i3 %r, ptr addrspace(1) undef
385 define amdgpu_kernel void @and_i3(i3 %a, i3 %b) {
387 ; SI-NEXT: [[R:%.*]] = and i3 [[A:%.*]], [[B:%.*]]
388 ; SI-NEXT: store volatile i3 [[R]], ptr addrspace(1) undef, align 1
392 ; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
393 ; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
394 ; VI-NEXT: [[TMP3:%.*]] = and i32 [[TMP1]], [[TMP2]]
395 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
396 ; VI-NEXT: store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
400 store volatile i3 %r, ptr addrspace(1) undef
404 define amdgpu_kernel void @or_i3(i3 %a, i3 %b) {
406 ; SI-NEXT: [[R:%.*]] = or i3 [[A:%.*]], [[B:%.*]]
407 ; SI-NEXT: store volatile i3 [[R]], ptr addrspace(1) undef, align 1
411 ; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
412 ; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
413 ; VI-NEXT: [[TMP3:%.*]] = or i32 [[TMP1]], [[TMP2]]
414 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
415 ; VI-NEXT: store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
419 store volatile i3 %r, ptr addrspace(1) undef
423 define amdgpu_kernel void @xor_i3(i3 %a, i3 %b) {
425 ; SI-NEXT: [[R:%.*]] = xor i3 [[A:%.*]], [[B:%.*]]
426 ; SI-NEXT: store volatile i3 [[R]], ptr addrspace(1) undef, align 1
430 ; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
431 ; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
432 ; VI-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
433 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
434 ; VI-NEXT: store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
438 store volatile i3 %r, ptr addrspace(1) undef
442 define amdgpu_kernel void @select_eq_i3(i3 %a, i3 %b) {
443 ; SI-LABEL: @select_eq_i3(
444 ; SI-NEXT: [[CMP:%.*]] = icmp eq i3 [[A:%.*]], [[B:%.*]]
445 ; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
446 ; SI-NEXT: store volatile i3 [[SEL]], ptr addrspace(1) undef, align 1
449 ; VI-LABEL: @select_eq_i3(
450 ; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
451 ; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
452 ; VI-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP1]], [[TMP2]]
453 ; VI-NEXT: [[TMP4:%.*]] = zext i3 [[A]] to i32
454 ; VI-NEXT: [[TMP5:%.*]] = zext i3 [[B]] to i32
455 ; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
456 ; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
457 ; VI-NEXT: store volatile i3 [[TMP7]], ptr addrspace(1) undef, align 1
460 %cmp = icmp eq i3 %a, %b
461 %sel = select i1 %cmp, i3 %a, i3 %b
462 store volatile i3 %sel, ptr addrspace(1) undef
466 define amdgpu_kernel void @select_ne_i3(i3 %a, i3 %b) {
467 ; SI-LABEL: @select_ne_i3(
468 ; SI-NEXT: [[CMP:%.*]] = icmp ne i3 [[A:%.*]], [[B:%.*]]
469 ; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
470 ; SI-NEXT: store volatile i3 [[SEL]], ptr addrspace(1) undef, align 1
473 ; VI-LABEL: @select_ne_i3(
474 ; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
475 ; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
476 ; VI-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
477 ; VI-NEXT: [[TMP4:%.*]] = zext i3 [[A]] to i32
478 ; VI-NEXT: [[TMP5:%.*]] = zext i3 [[B]] to i32
479 ; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
480 ; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
481 ; VI-NEXT: store volatile i3 [[TMP7]], ptr addrspace(1) undef, align 1
484 %cmp = icmp ne i3 %a, %b
485 %sel = select i1 %cmp, i3 %a, i3 %b
486 store volatile i3 %sel, ptr addrspace(1) undef
490 define amdgpu_kernel void @select_ugt_i3(i3 %a, i3 %b) {
491 ; SI-LABEL: @select_ugt_i3(
492 ; SI-NEXT: [[CMP:%.*]] = icmp ugt i3 [[A:%.*]], [[B:%.*]]
493 ; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
494 ; SI-NEXT: store volatile i3 [[SEL]], ptr addrspace(1) undef, align 1
497 ; VI-LABEL: @select_ugt_i3(
498 ; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
499 ; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
500 ; VI-NEXT: [[TMP3:%.*]] = icmp ugt i32 [[TMP1]], [[TMP2]]
501 ; VI-NEXT: [[TMP4:%.*]] = zext i3 [[A]] to i32
502 ; VI-NEXT: [[TMP5:%.*]] = zext i3 [[B]] to i32
503 ; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
504 ; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
505 ; VI-NEXT: store volatile i3 [[TMP7]], ptr addrspace(1) undef, align 1
508 %cmp = icmp ugt i3 %a, %b
509 %sel = select i1 %cmp, i3 %a, i3 %b
510 store volatile i3 %sel, ptr addrspace(1) undef
514 define amdgpu_kernel void @select_uge_i3(i3 %a, i3 %b) {
515 ; SI-LABEL: @select_uge_i3(
516 ; SI-NEXT: [[CMP:%.*]] = icmp uge i3 [[A:%.*]], [[B:%.*]]
517 ; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
518 ; SI-NEXT: store volatile i3 [[SEL]], ptr addrspace(1) undef, align 1
521 ; VI-LABEL: @select_uge_i3(
522 ; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
523 ; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
524 ; VI-NEXT: [[TMP3:%.*]] = icmp uge i32 [[TMP1]], [[TMP2]]
525 ; VI-NEXT: [[TMP4:%.*]] = zext i3 [[A]] to i32
526 ; VI-NEXT: [[TMP5:%.*]] = zext i3 [[B]] to i32
527 ; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
528 ; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
529 ; VI-NEXT: store volatile i3 [[TMP7]], ptr addrspace(1) undef, align 1
532 %cmp = icmp uge i3 %a, %b
533 %sel = select i1 %cmp, i3 %a, i3 %b
534 store volatile i3 %sel, ptr addrspace(1) undef
538 define amdgpu_kernel void @select_ult_i3(i3 %a, i3 %b) {
539 ; SI-LABEL: @select_ult_i3(
540 ; SI-NEXT: [[CMP:%.*]] = icmp ult i3 [[A:%.*]], [[B:%.*]]
541 ; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
542 ; SI-NEXT: store volatile i3 [[SEL]], ptr addrspace(1) undef, align 1
545 ; VI-LABEL: @select_ult_i3(
546 ; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
547 ; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
548 ; VI-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP1]], [[TMP2]]
549 ; VI-NEXT: [[TMP4:%.*]] = zext i3 [[A]] to i32
550 ; VI-NEXT: [[TMP5:%.*]] = zext i3 [[B]] to i32
551 ; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
552 ; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
553 ; VI-NEXT: store volatile i3 [[TMP7]], ptr addrspace(1) undef, align 1
556 %cmp = icmp ult i3 %a, %b
557 %sel = select i1 %cmp, i3 %a, i3 %b
558 store volatile i3 %sel, ptr addrspace(1) undef
562 define amdgpu_kernel void @select_ule_i3(i3 %a, i3 %b) {
563 ; SI-LABEL: @select_ule_i3(
564 ; SI-NEXT: [[CMP:%.*]] = icmp ule i3 [[A:%.*]], [[B:%.*]]
565 ; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
566 ; SI-NEXT: store volatile i3 [[SEL]], ptr addrspace(1) undef, align 1
569 ; VI-LABEL: @select_ule_i3(
570 ; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
571 ; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
572 ; VI-NEXT: [[TMP3:%.*]] = icmp ule i32 [[TMP1]], [[TMP2]]
573 ; VI-NEXT: [[TMP4:%.*]] = zext i3 [[A]] to i32
574 ; VI-NEXT: [[TMP5:%.*]] = zext i3 [[B]] to i32
575 ; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
576 ; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
577 ; VI-NEXT: store volatile i3 [[TMP7]], ptr addrspace(1) undef, align 1
580 %cmp = icmp ule i3 %a, %b
581 %sel = select i1 %cmp, i3 %a, i3 %b
582 store volatile i3 %sel, ptr addrspace(1) undef
586 define amdgpu_kernel void @select_sgt_i3(i3 %a, i3 %b) {
587 ; SI-LABEL: @select_sgt_i3(
588 ; SI-NEXT: [[CMP:%.*]] = icmp sgt i3 [[A:%.*]], [[B:%.*]]
589 ; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
590 ; SI-NEXT: store volatile i3 [[SEL]], ptr addrspace(1) undef, align 1
593 ; VI-LABEL: @select_sgt_i3(
594 ; VI-NEXT: [[TMP1:%.*]] = sext i3 [[A:%.*]] to i32
595 ; VI-NEXT: [[TMP2:%.*]] = sext i3 [[B:%.*]] to i32
596 ; VI-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
597 ; VI-NEXT: [[TMP4:%.*]] = sext i3 [[A]] to i32
598 ; VI-NEXT: [[TMP5:%.*]] = sext i3 [[B]] to i32
599 ; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
600 ; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
601 ; VI-NEXT: store volatile i3 [[TMP7]], ptr addrspace(1) undef, align 1
604 %cmp = icmp sgt i3 %a, %b
605 %sel = select i1 %cmp, i3 %a, i3 %b
606 store volatile i3 %sel, ptr addrspace(1) undef
610 define amdgpu_kernel void @select_sge_i3(i3 %a, i3 %b) {
611 ; SI-LABEL: @select_sge_i3(
612 ; SI-NEXT: [[CMP:%.*]] = icmp sge i3 [[A:%.*]], [[B:%.*]]
613 ; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
614 ; SI-NEXT: store volatile i3 [[SEL]], ptr addrspace(1) undef, align 1
617 ; VI-LABEL: @select_sge_i3(
618 ; VI-NEXT: [[TMP1:%.*]] = sext i3 [[A:%.*]] to i32
619 ; VI-NEXT: [[TMP2:%.*]] = sext i3 [[B:%.*]] to i32
620 ; VI-NEXT: [[TMP3:%.*]] = icmp sge i32 [[TMP1]], [[TMP2]]
621 ; VI-NEXT: [[TMP4:%.*]] = sext i3 [[A]] to i32
622 ; VI-NEXT: [[TMP5:%.*]] = sext i3 [[B]] to i32
623 ; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
624 ; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
625 ; VI-NEXT: store volatile i3 [[TMP7]], ptr addrspace(1) undef, align 1
628 %cmp = icmp sge i3 %a, %b
629 %sel = select i1 %cmp, i3 %a, i3 %b
630 store volatile i3 %sel, ptr addrspace(1) undef
634 define amdgpu_kernel void @select_slt_i3(i3 %a, i3 %b) {
635 ; SI-LABEL: @select_slt_i3(
636 ; SI-NEXT: [[CMP:%.*]] = icmp slt i3 [[A:%.*]], [[B:%.*]]
637 ; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
638 ; SI-NEXT: store volatile i3 [[SEL]], ptr addrspace(1) undef, align 1
641 ; VI-LABEL: @select_slt_i3(
642 ; VI-NEXT: [[TMP1:%.*]] = sext i3 [[A:%.*]] to i32
643 ; VI-NEXT: [[TMP2:%.*]] = sext i3 [[B:%.*]] to i32
644 ; VI-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP1]], [[TMP2]]
645 ; VI-NEXT: [[TMP4:%.*]] = sext i3 [[A]] to i32
646 ; VI-NEXT: [[TMP5:%.*]] = sext i3 [[B]] to i32
647 ; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
648 ; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
649 ; VI-NEXT: store volatile i3 [[TMP7]], ptr addrspace(1) undef, align 1
652 %cmp = icmp slt i3 %a, %b
653 %sel = select i1 %cmp, i3 %a, i3 %b
654 store volatile i3 %sel, ptr addrspace(1) undef
658 define amdgpu_kernel void @select_sle_i3(i3 %a, i3 %b) {
659 ; SI-LABEL: @select_sle_i3(
660 ; SI-NEXT: [[CMP:%.*]] = icmp sle i3 [[A:%.*]], [[B:%.*]]
661 ; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
662 ; SI-NEXT: store volatile i3 [[SEL]], ptr addrspace(1) undef, align 1
665 ; VI-LABEL: @select_sle_i3(
666 ; VI-NEXT: [[TMP1:%.*]] = sext i3 [[A:%.*]] to i32
667 ; VI-NEXT: [[TMP2:%.*]] = sext i3 [[B:%.*]] to i32
668 ; VI-NEXT: [[TMP3:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
669 ; VI-NEXT: [[TMP4:%.*]] = sext i3 [[A]] to i32
670 ; VI-NEXT: [[TMP5:%.*]] = sext i3 [[B]] to i32
671 ; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
672 ; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
673 ; VI-NEXT: store volatile i3 [[TMP7]], ptr addrspace(1) undef, align 1
676 %cmp = icmp sle i3 %a, %b
677 %sel = select i1 %cmp, i3 %a, i3 %b
678 store volatile i3 %sel, ptr addrspace(1) undef
682 declare i3 @llvm.bitreverse.i3(i3)
683 define amdgpu_kernel void @bitreverse_i3(i3 %a) {
684 ; SI-LABEL: @bitreverse_i3(
685 ; SI-NEXT: [[BREV:%.*]] = call i3 @llvm.bitreverse.i3(i3 [[A:%.*]])
686 ; SI-NEXT: store volatile i3 [[BREV]], ptr addrspace(1) undef, align 1
689 ; VI-LABEL: @bitreverse_i3(
690 ; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
691 ; VI-NEXT: [[TMP2:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[TMP1]])
692 ; VI-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP2]], 29
693 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
694 ; VI-NEXT: store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
697 %brev = call i3 @llvm.bitreverse.i3(i3 %a)
698 store volatile i3 %brev, ptr addrspace(1) undef
702 define amdgpu_kernel void @add_i16(i16 %a, i16 %b) {
703 ; SI-LABEL: @add_i16(
704 ; SI-NEXT: [[R:%.*]] = add i16 [[A:%.*]], [[B:%.*]]
705 ; SI-NEXT: store volatile i16 [[R]], ptr addrspace(1) undef, align 2
708 ; VI-LABEL: @add_i16(
709 ; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
710 ; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
711 ; VI-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]]
712 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
713 ; VI-NEXT: store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
717 store volatile i16 %r, ptr addrspace(1) undef
721 define amdgpu_kernel void @constant_add_i16() {
722 ; SI-LABEL: @constant_add_i16(
723 ; SI-NEXT: [[R:%.*]] = add i16 1, 2
724 ; SI-NEXT: store volatile i16 [[R]], ptr addrspace(1) undef, align 2
727 ; VI-LABEL: @constant_add_i16(
728 ; VI-NEXT: store volatile i16 3, ptr addrspace(1) undef, align 2
732 store volatile i16 %r, ptr addrspace(1) undef
736 define amdgpu_kernel void @constant_add_nsw_i16() {
737 ; SI-LABEL: @constant_add_nsw_i16(
738 ; SI-NEXT: [[R:%.*]] = add nsw i16 1, 2
739 ; SI-NEXT: store volatile i16 [[R]], ptr addrspace(1) undef, align 2
742 ; VI-LABEL: @constant_add_nsw_i16(
743 ; VI-NEXT: store volatile i16 3, ptr addrspace(1) undef, align 2
746 %r = add nsw i16 1, 2
747 store volatile i16 %r, ptr addrspace(1) undef
751 define amdgpu_kernel void @constant_add_nuw_i16() {
752 ; SI-LABEL: @constant_add_nuw_i16(
753 ; SI-NEXT: [[R:%.*]] = add nsw i16 1, 2
754 ; SI-NEXT: store volatile i16 [[R]], ptr addrspace(1) undef, align 2
757 ; VI-LABEL: @constant_add_nuw_i16(
758 ; VI-NEXT: store volatile i16 3, ptr addrspace(1) undef, align 2
761 %r = add nsw i16 1, 2
762 store volatile i16 %r, ptr addrspace(1) undef
766 define amdgpu_kernel void @add_nsw_i16(i16 %a, i16 %b) {
767 ; SI-LABEL: @add_nsw_i16(
768 ; SI-NEXT: [[R:%.*]] = add nsw i16 [[A:%.*]], [[B:%.*]]
769 ; SI-NEXT: store volatile i16 [[R]], ptr addrspace(1) undef, align 2
772 ; VI-LABEL: @add_nsw_i16(
773 ; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
774 ; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
775 ; VI-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]]
776 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
777 ; VI-NEXT: store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
780 %r = add nsw i16 %a, %b
781 store volatile i16 %r, ptr addrspace(1) undef
785 define amdgpu_kernel void @add_nuw_i16(i16 %a, i16 %b) {
786 ; SI-LABEL: @add_nuw_i16(
787 ; SI-NEXT: [[R:%.*]] = add nuw i16 [[A:%.*]], [[B:%.*]]
788 ; SI-NEXT: store volatile i16 [[R]], ptr addrspace(1) undef, align 2
791 ; VI-LABEL: @add_nuw_i16(
792 ; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
793 ; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
794 ; VI-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]]
795 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
796 ; VI-NEXT: store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
799 %r = add nuw i16 %a, %b
800 store volatile i16 %r, ptr addrspace(1) undef
804 define amdgpu_kernel void @add_nuw_nsw_i16(i16 %a, i16 %b) {
805 ; SI-LABEL: @add_nuw_nsw_i16(
806 ; SI-NEXT: [[R:%.*]] = add nuw nsw i16 [[A:%.*]], [[B:%.*]]
807 ; SI-NEXT: store volatile i16 [[R]], ptr addrspace(1) undef, align 2
810 ; VI-LABEL: @add_nuw_nsw_i16(
811 ; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
812 ; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
813 ; VI-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]]
814 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
815 ; VI-NEXT: store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
818 %r = add nuw nsw i16 %a, %b
819 store volatile i16 %r, ptr addrspace(1) undef
823 define amdgpu_kernel void @sub_i16(i16 %a, i16 %b) {
824 ; SI-LABEL: @sub_i16(
825 ; SI-NEXT: [[R:%.*]] = sub i16 [[A:%.*]], [[B:%.*]]
826 ; SI-NEXT: store volatile i16 [[R]], ptr addrspace(1) undef, align 2
829 ; VI-LABEL: @sub_i16(
830 ; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
831 ; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
832 ; VI-NEXT: [[TMP3:%.*]] = sub nsw i32 [[TMP1]], [[TMP2]]
833 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
834 ; VI-NEXT: store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
838 store volatile i16 %r, ptr addrspace(1) undef
842 define amdgpu_kernel void @sub_nsw_i16(i16 %a, i16 %b) {
843 ; SI-LABEL: @sub_nsw_i16(
844 ; SI-NEXT: [[R:%.*]] = sub nsw i16 [[A:%.*]], [[B:%.*]]
845 ; SI-NEXT: store volatile i16 [[R]], ptr addrspace(1) undef, align 2
848 ; VI-LABEL: @sub_nsw_i16(
849 ; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
850 ; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
851 ; VI-NEXT: [[TMP3:%.*]] = sub nsw i32 [[TMP1]], [[TMP2]]
852 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
853 ; VI-NEXT: store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
856 %r = sub nsw i16 %a, %b
857 store volatile i16 %r, ptr addrspace(1) undef
861 define amdgpu_kernel void @sub_nuw_i16(i16 %a, i16 %b) {
862 ; SI-LABEL: @sub_nuw_i16(
863 ; SI-NEXT: [[R:%.*]] = sub nuw i16 [[A:%.*]], [[B:%.*]]
864 ; SI-NEXT: store volatile i16 [[R]], ptr addrspace(1) undef, align 2
867 ; VI-LABEL: @sub_nuw_i16(
868 ; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
869 ; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
870 ; VI-NEXT: [[TMP3:%.*]] = sub nuw nsw i32 [[TMP1]], [[TMP2]]
871 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
872 ; VI-NEXT: store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
875 %r = sub nuw i16 %a, %b
876 store volatile i16 %r, ptr addrspace(1) undef
880 define amdgpu_kernel void @sub_nuw_nsw_i16(i16 %a, i16 %b) {
881 ; SI-LABEL: @sub_nuw_nsw_i16(
882 ; SI-NEXT: [[R:%.*]] = sub nuw nsw i16 [[A:%.*]], [[B:%.*]]
883 ; SI-NEXT: store volatile i16 [[R]], ptr addrspace(1) undef, align 2
886 ; VI-LABEL: @sub_nuw_nsw_i16(
887 ; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
888 ; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
889 ; VI-NEXT: [[TMP3:%.*]] = sub nuw nsw i32 [[TMP1]], [[TMP2]]
890 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
891 ; VI-NEXT: store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
894 %r = sub nuw nsw i16 %a, %b
895 store volatile i16 %r, ptr addrspace(1) undef
899 define amdgpu_kernel void @mul_i16(i16 %a, i16 %b) {
900 ; SI-LABEL: @mul_i16(
901 ; SI-NEXT: [[R:%.*]] = mul i16 [[A:%.*]], [[B:%.*]]
902 ; SI-NEXT: store volatile i16 [[R]], ptr addrspace(1) undef, align 2
905 ; VI-LABEL: @mul_i16(
906 ; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
907 ; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
908 ; VI-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
909 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
910 ; VI-NEXT: store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
914 store volatile i16 %r, ptr addrspace(1) undef
918 define amdgpu_kernel void @mul_nsw_i16(i16 %a, i16 %b) {
919 ; SI-LABEL: @mul_nsw_i16(
920 ; SI-NEXT: [[R:%.*]] = mul nsw i16 [[A:%.*]], [[B:%.*]]
921 ; SI-NEXT: store volatile i16 [[R]], ptr addrspace(1) undef, align 2
924 ; VI-LABEL: @mul_nsw_i16(
925 ; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
926 ; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
927 ; VI-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
928 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
929 ; VI-NEXT: store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
932 %r = mul nsw i16 %a, %b
933 store volatile i16 %r, ptr addrspace(1) undef
937 define amdgpu_kernel void @mul_nuw_i16(i16 %a, i16 %b) {
938 ; SI-LABEL: @mul_nuw_i16(
939 ; SI-NEXT: [[R:%.*]] = mul nuw i16 [[A:%.*]], [[B:%.*]]
940 ; SI-NEXT: store volatile i16 [[R]], ptr addrspace(1) undef, align 2
943 ; VI-LABEL: @mul_nuw_i16(
944 ; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
945 ; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
946 ; VI-NEXT: [[TMP3:%.*]] = mul nuw nsw i32 [[TMP1]], [[TMP2]]
947 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
948 ; VI-NEXT: store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
951 %r = mul nuw i16 %a, %b
952 store volatile i16 %r, ptr addrspace(1) undef
956 define amdgpu_kernel void @mul_nuw_nsw_i16(i16 %a, i16 %b) {
957 ; SI-LABEL: @mul_nuw_nsw_i16(
958 ; SI-NEXT: [[R:%.*]] = mul nuw nsw i16 [[A:%.*]], [[B:%.*]]
959 ; SI-NEXT: store volatile i16 [[R]], ptr addrspace(1) undef, align 2
962 ; VI-LABEL: @mul_nuw_nsw_i16(
963 ; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
964 ; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
965 ; VI-NEXT: [[TMP3:%.*]] = mul nuw nsw i32 [[TMP1]], [[TMP2]]
966 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
967 ; VI-NEXT: store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
970 %r = mul nuw nsw i16 %a, %b
971 store volatile i16 %r, ptr addrspace(1) undef
975 define amdgpu_kernel void @shl_i16(i16 %a, i16 %b) {
976 ; SI-LABEL: @shl_i16(
977 ; SI-NEXT: [[R:%.*]] = shl i16 [[A:%.*]], [[B:%.*]]
978 ; SI-NEXT: store volatile i16 [[R]], ptr addrspace(1) undef, align 2
981 ; VI-LABEL: @shl_i16(
982 ; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
983 ; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
984 ; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]]
985 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
986 ; VI-NEXT: store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
990 store volatile i16 %r, ptr addrspace(1) undef
994 define amdgpu_kernel void @shl_nsw_i16(i16 %a, i16 %b) {
995 ; SI-LABEL: @shl_nsw_i16(
996 ; SI-NEXT: [[R:%.*]] = shl nsw i16 [[A:%.*]], [[B:%.*]]
997 ; SI-NEXT: store volatile i16 [[R]], ptr addrspace(1) undef, align 2
1000 ; VI-LABEL: @shl_nsw_i16(
1001 ; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1002 ; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1003 ; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]]
1004 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
1005 ; VI-NEXT: store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
1008 %r = shl nsw i16 %a, %b
1009 store volatile i16 %r, ptr addrspace(1) undef
1013 define amdgpu_kernel void @shl_nuw_i16(i16 %a, i16 %b) {
1014 ; SI-LABEL: @shl_nuw_i16(
1015 ; SI-NEXT: [[R:%.*]] = shl nuw i16 [[A:%.*]], [[B:%.*]]
1016 ; SI-NEXT: store volatile i16 [[R]], ptr addrspace(1) undef, align 2
1019 ; VI-LABEL: @shl_nuw_i16(
1020 ; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1021 ; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1022 ; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]]
1023 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
1024 ; VI-NEXT: store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
1027 %r = shl nuw i16 %a, %b
1028 store volatile i16 %r, ptr addrspace(1) undef
1032 define amdgpu_kernel void @shl_nuw_nsw_i16(i16 %a, i16 %b) {
1033 ; SI-LABEL: @shl_nuw_nsw_i16(
1034 ; SI-NEXT: [[R:%.*]] = shl nuw nsw i16 [[A:%.*]], [[B:%.*]]
1035 ; SI-NEXT: store volatile i16 [[R]], ptr addrspace(1) undef, align 2
1038 ; VI-LABEL: @shl_nuw_nsw_i16(
1039 ; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1040 ; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1041 ; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]]
1042 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
1043 ; VI-NEXT: store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
1046 %r = shl nuw nsw i16 %a, %b
1047 store volatile i16 %r, ptr addrspace(1) undef
1051 define amdgpu_kernel void @lshr_i16(i16 %a, i16 %b) {
1052 ; SI-LABEL: @lshr_i16(
1053 ; SI-NEXT: [[R:%.*]] = lshr i16 [[A:%.*]], [[B:%.*]]
1054 ; SI-NEXT: store volatile i16 [[R]], ptr addrspace(1) undef, align 2
1057 ; VI-LABEL: @lshr_i16(
1058 ; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1059 ; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1060 ; VI-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP1]], [[TMP2]]
1061 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
1062 ; VI-NEXT: store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
1065 %r = lshr i16 %a, %b
1066 store volatile i16 %r, ptr addrspace(1) undef
1070 define amdgpu_kernel void @lshr_exact_i16(i16 %a, i16 %b) {
1071 ; SI-LABEL: @lshr_exact_i16(
1072 ; SI-NEXT: [[R:%.*]] = lshr exact i16 [[A:%.*]], [[B:%.*]]
1073 ; SI-NEXT: store volatile i16 [[R]], ptr addrspace(1) undef, align 2
1076 ; VI-LABEL: @lshr_exact_i16(
1077 ; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1078 ; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1079 ; VI-NEXT: [[TMP3:%.*]] = lshr exact i32 [[TMP1]], [[TMP2]]
1080 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
1081 ; VI-NEXT: store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
1084 %r = lshr exact i16 %a, %b
1085 store volatile i16 %r, ptr addrspace(1) undef
1089 define amdgpu_kernel void @ashr_i16(i16 %a, i16 %b) {
1090 ; SI-LABEL: @ashr_i16(
1091 ; SI-NEXT: [[R:%.*]] = ashr i16 [[A:%.*]], [[B:%.*]]
1092 ; SI-NEXT: store volatile i16 [[R]], ptr addrspace(1) undef, align 2
1095 ; VI-LABEL: @ashr_i16(
1096 ; VI-NEXT: [[TMP1:%.*]] = sext i16 [[A:%.*]] to i32
1097 ; VI-NEXT: [[TMP2:%.*]] = sext i16 [[B:%.*]] to i32
1098 ; VI-NEXT: [[TMP3:%.*]] = ashr i32 [[TMP1]], [[TMP2]]
1099 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
1100 ; VI-NEXT: store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
1103 %r = ashr i16 %a, %b
1104 store volatile i16 %r, ptr addrspace(1) undef
1108 define amdgpu_kernel void @ashr_exact_i16(i16 %a, i16 %b) {
1109 ; SI-LABEL: @ashr_exact_i16(
1110 ; SI-NEXT: [[R:%.*]] = ashr exact i16 [[A:%.*]], [[B:%.*]]
1111 ; SI-NEXT: store volatile i16 [[R]], ptr addrspace(1) undef, align 2
1114 ; VI-LABEL: @ashr_exact_i16(
1115 ; VI-NEXT: [[TMP1:%.*]] = sext i16 [[A:%.*]] to i32
1116 ; VI-NEXT: [[TMP2:%.*]] = sext i16 [[B:%.*]] to i32
1117 ; VI-NEXT: [[TMP3:%.*]] = ashr exact i32 [[TMP1]], [[TMP2]]
1118 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
1119 ; VI-NEXT: store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
1122 %r = ashr exact i16 %a, %b
1123 store volatile i16 %r, ptr addrspace(1) undef
1127 define amdgpu_kernel void @constant_lshr_exact_i16(i16 %a, i16 %b) {
1128 ; SI-LABEL: @constant_lshr_exact_i16(
1129 ; SI-NEXT: [[R:%.*]] = lshr exact i16 4, 1
1130 ; SI-NEXT: store volatile i16 [[R]], ptr addrspace(1) undef, align 2
1133 ; VI-LABEL: @constant_lshr_exact_i16(
1134 ; VI-NEXT: store volatile i16 2, ptr addrspace(1) undef, align 2
1137 %r = lshr exact i16 4, 1
1138 store volatile i16 %r, ptr addrspace(1) undef
1142 define amdgpu_kernel void @and_i16(i16 %a, i16 %b) {
1143 ; SI-LABEL: @and_i16(
1144 ; SI-NEXT: [[R:%.*]] = and i16 [[A:%.*]], [[B:%.*]]
1145 ; SI-NEXT: store volatile i16 [[R]], ptr addrspace(1) undef, align 2
1148 ; VI-LABEL: @and_i16(
1149 ; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1150 ; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1151 ; VI-NEXT: [[TMP3:%.*]] = and i32 [[TMP1]], [[TMP2]]
1152 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
1153 ; VI-NEXT: store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
1157 store volatile i16 %r, ptr addrspace(1) undef
1161 define amdgpu_kernel void @or_i16(i16 %a, i16 %b) {
1162 ; SI-LABEL: @or_i16(
1163 ; SI-NEXT: [[R:%.*]] = or i16 [[A:%.*]], [[B:%.*]]
1164 ; SI-NEXT: store volatile i16 [[R]], ptr addrspace(1) undef, align 2
1167 ; VI-LABEL: @or_i16(
1168 ; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1169 ; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1170 ; VI-NEXT: [[TMP3:%.*]] = or i32 [[TMP1]], [[TMP2]]
1171 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
1172 ; VI-NEXT: store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
1176 store volatile i16 %r, ptr addrspace(1) undef
1180 define amdgpu_kernel void @xor_i16(i16 %a, i16 %b) {
1181 ; SI-LABEL: @xor_i16(
1182 ; SI-NEXT: [[R:%.*]] = xor i16 [[A:%.*]], [[B:%.*]]
1183 ; SI-NEXT: store volatile i16 [[R]], ptr addrspace(1) undef, align 2
1186 ; VI-LABEL: @xor_i16(
1187 ; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1188 ; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1189 ; VI-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
1190 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
1191 ; VI-NEXT: store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
1195 store volatile i16 %r, ptr addrspace(1) undef
1199 define amdgpu_kernel void @select_eq_i16(i16 %a, i16 %b) {
1200 ; SI-LABEL: @select_eq_i16(
1201 ; SI-NEXT: [[CMP:%.*]] = icmp eq i16 [[A:%.*]], [[B:%.*]]
1202 ; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
1203 ; SI-NEXT: store volatile i16 [[SEL]], ptr addrspace(1) undef, align 2
1206 ; VI-LABEL: @select_eq_i16(
1207 ; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1208 ; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1209 ; VI-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP1]], [[TMP2]]
1210 ; VI-NEXT: [[TMP4:%.*]] = zext i16 [[A]] to i32
1211 ; VI-NEXT: [[TMP5:%.*]] = zext i16 [[B]] to i32
1212 ; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
1213 ; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
1214 ; VI-NEXT: store volatile i16 [[TMP7]], ptr addrspace(1) undef, align 2
1217 %cmp = icmp eq i16 %a, %b
1218 %sel = select i1 %cmp, i16 %a, i16 %b
1219 store volatile i16 %sel, ptr addrspace(1) undef
1223 define amdgpu_kernel void @select_ne_i16(i16 %a, i16 %b) {
1224 ; SI-LABEL: @select_ne_i16(
1225 ; SI-NEXT: [[CMP:%.*]] = icmp ne i16 [[A:%.*]], [[B:%.*]]
1226 ; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
1227 ; SI-NEXT: store volatile i16 [[SEL]], ptr addrspace(1) undef, align 2
1230 ; VI-LABEL: @select_ne_i16(
1231 ; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1232 ; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1233 ; VI-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
1234 ; VI-NEXT: [[TMP4:%.*]] = zext i16 [[A]] to i32
1235 ; VI-NEXT: [[TMP5:%.*]] = zext i16 [[B]] to i32
1236 ; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
1237 ; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
1238 ; VI-NEXT: store volatile i16 [[TMP7]], ptr addrspace(1) undef, align 2
1241 %cmp = icmp ne i16 %a, %b
1242 %sel = select i1 %cmp, i16 %a, i16 %b
1243 store volatile i16 %sel, ptr addrspace(1) undef
1247 define amdgpu_kernel void @select_ugt_i16(i16 %a, i16 %b) {
1248 ; SI-LABEL: @select_ugt_i16(
1249 ; SI-NEXT: [[CMP:%.*]] = icmp ugt i16 [[A:%.*]], [[B:%.*]]
1250 ; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
1251 ; SI-NEXT: store volatile i16 [[SEL]], ptr addrspace(1) undef, align 2
1254 ; VI-LABEL: @select_ugt_i16(
1255 ; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1256 ; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1257 ; VI-NEXT: [[TMP3:%.*]] = icmp ugt i32 [[TMP1]], [[TMP2]]
1258 ; VI-NEXT: [[TMP4:%.*]] = zext i16 [[A]] to i32
1259 ; VI-NEXT: [[TMP5:%.*]] = zext i16 [[B]] to i32
1260 ; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
1261 ; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
1262 ; VI-NEXT: store volatile i16 [[TMP7]], ptr addrspace(1) undef, align 2
1265 %cmp = icmp ugt i16 %a, %b
1266 %sel = select i1 %cmp, i16 %a, i16 %b
1267 store volatile i16 %sel, ptr addrspace(1) undef
1271 define amdgpu_kernel void @select_uge_i16(i16 %a, i16 %b) {
1272 ; SI-LABEL: @select_uge_i16(
1273 ; SI-NEXT: [[CMP:%.*]] = icmp uge i16 [[A:%.*]], [[B:%.*]]
1274 ; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
1275 ; SI-NEXT: store volatile i16 [[SEL]], ptr addrspace(1) undef, align 2
1278 ; VI-LABEL: @select_uge_i16(
1279 ; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1280 ; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1281 ; VI-NEXT: [[TMP3:%.*]] = icmp uge i32 [[TMP1]], [[TMP2]]
1282 ; VI-NEXT: [[TMP4:%.*]] = zext i16 [[A]] to i32
1283 ; VI-NEXT: [[TMP5:%.*]] = zext i16 [[B]] to i32
1284 ; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
1285 ; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
1286 ; VI-NEXT: store volatile i16 [[TMP7]], ptr addrspace(1) undef, align 2
1289 %cmp = icmp uge i16 %a, %b
1290 %sel = select i1 %cmp, i16 %a, i16 %b
1291 store volatile i16 %sel, ptr addrspace(1) undef
1295 define amdgpu_kernel void @select_ult_i16(i16 %a, i16 %b) {
1296 ; SI-LABEL: @select_ult_i16(
1297 ; SI-NEXT: [[CMP:%.*]] = icmp ult i16 [[A:%.*]], [[B:%.*]]
1298 ; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
1299 ; SI-NEXT: store volatile i16 [[SEL]], ptr addrspace(1) undef, align 2
1302 ; VI-LABEL: @select_ult_i16(
1303 ; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1304 ; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1305 ; VI-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP1]], [[TMP2]]
1306 ; VI-NEXT: [[TMP4:%.*]] = zext i16 [[A]] to i32
1307 ; VI-NEXT: [[TMP5:%.*]] = zext i16 [[B]] to i32
1308 ; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
1309 ; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
1310 ; VI-NEXT: store volatile i16 [[TMP7]], ptr addrspace(1) undef, align 2
1313 %cmp = icmp ult i16 %a, %b
1314 %sel = select i1 %cmp, i16 %a, i16 %b
1315 store volatile i16 %sel, ptr addrspace(1) undef
1319 define amdgpu_kernel void @select_ule_i16(i16 %a, i16 %b) {
1320 ; SI-LABEL: @select_ule_i16(
1321 ; SI-NEXT: [[CMP:%.*]] = icmp ule i16 [[A:%.*]], [[B:%.*]]
1322 ; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
1323 ; SI-NEXT: store volatile i16 [[SEL]], ptr addrspace(1) undef, align 2
1326 ; VI-LABEL: @select_ule_i16(
1327 ; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1328 ; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1329 ; VI-NEXT: [[TMP3:%.*]] = icmp ule i32 [[TMP1]], [[TMP2]]
1330 ; VI-NEXT: [[TMP4:%.*]] = zext i16 [[A]] to i32
1331 ; VI-NEXT: [[TMP5:%.*]] = zext i16 [[B]] to i32
1332 ; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
1333 ; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
1334 ; VI-NEXT: store volatile i16 [[TMP7]], ptr addrspace(1) undef, align 2
1337 %cmp = icmp ule i16 %a, %b
1338 %sel = select i1 %cmp, i16 %a, i16 %b
1339 store volatile i16 %sel, ptr addrspace(1) undef
1343 define amdgpu_kernel void @select_sgt_i16(i16 %a, i16 %b) {
1344 ; SI-LABEL: @select_sgt_i16(
1345 ; SI-NEXT: [[CMP:%.*]] = icmp sgt i16 [[A:%.*]], [[B:%.*]]
1346 ; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
1347 ; SI-NEXT: store volatile i16 [[SEL]], ptr addrspace(1) undef, align 2
1350 ; VI-LABEL: @select_sgt_i16(
1351 ; VI-NEXT: [[TMP1:%.*]] = sext i16 [[A:%.*]] to i32
1352 ; VI-NEXT: [[TMP2:%.*]] = sext i16 [[B:%.*]] to i32
1353 ; VI-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
1354 ; VI-NEXT: [[TMP4:%.*]] = sext i16 [[A]] to i32
1355 ; VI-NEXT: [[TMP5:%.*]] = sext i16 [[B]] to i32
1356 ; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
1357 ; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
1358 ; VI-NEXT: store volatile i16 [[TMP7]], ptr addrspace(1) undef, align 2
1361 %cmp = icmp sgt i16 %a, %b
1362 %sel = select i1 %cmp, i16 %a, i16 %b
1363 store volatile i16 %sel, ptr addrspace(1) undef
1367 define amdgpu_kernel void @select_sge_i16(i16 %a, i16 %b) {
1368 ; SI-LABEL: @select_sge_i16(
1369 ; SI-NEXT: [[CMP:%.*]] = icmp sge i16 [[A:%.*]], [[B:%.*]]
1370 ; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
1371 ; SI-NEXT: store volatile i16 [[SEL]], ptr addrspace(1) undef, align 2
1374 ; VI-LABEL: @select_sge_i16(
1375 ; VI-NEXT: [[TMP1:%.*]] = sext i16 [[A:%.*]] to i32
1376 ; VI-NEXT: [[TMP2:%.*]] = sext i16 [[B:%.*]] to i32
1377 ; VI-NEXT: [[TMP3:%.*]] = icmp sge i32 [[TMP1]], [[TMP2]]
1378 ; VI-NEXT: [[TMP4:%.*]] = sext i16 [[A]] to i32
1379 ; VI-NEXT: [[TMP5:%.*]] = sext i16 [[B]] to i32
1380 ; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
1381 ; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
1382 ; VI-NEXT: store volatile i16 [[TMP7]], ptr addrspace(1) undef, align 2
1385 %cmp = icmp sge i16 %a, %b
1386 %sel = select i1 %cmp, i16 %a, i16 %b
1387 store volatile i16 %sel, ptr addrspace(1) undef
1391 define amdgpu_kernel void @select_slt_i16(i16 %a, i16 %b) {
1392 ; SI-LABEL: @select_slt_i16(
1393 ; SI-NEXT: [[CMP:%.*]] = icmp slt i16 [[A:%.*]], [[B:%.*]]
1394 ; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
1395 ; SI-NEXT: store volatile i16 [[SEL]], ptr addrspace(1) undef, align 2
1398 ; VI-LABEL: @select_slt_i16(
1399 ; VI-NEXT: [[TMP1:%.*]] = sext i16 [[A:%.*]] to i32
1400 ; VI-NEXT: [[TMP2:%.*]] = sext i16 [[B:%.*]] to i32
1401 ; VI-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP1]], [[TMP2]]
1402 ; VI-NEXT: [[TMP4:%.*]] = sext i16 [[A]] to i32
1403 ; VI-NEXT: [[TMP5:%.*]] = sext i16 [[B]] to i32
1404 ; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
1405 ; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
1406 ; VI-NEXT: store volatile i16 [[TMP7]], ptr addrspace(1) undef, align 2
1409 %cmp = icmp slt i16 %a, %b
1410 %sel = select i1 %cmp, i16 %a, i16 %b
1411 store volatile i16 %sel, ptr addrspace(1) undef
1415 define amdgpu_kernel void @select_sle_i16(i16 %a, i16 %b) {
1416 ; SI-LABEL: @select_sle_i16(
1417 ; SI-NEXT: [[CMP:%.*]] = icmp sle i16 [[A:%.*]], [[B:%.*]]
1418 ; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
1419 ; SI-NEXT: store volatile i16 [[SEL]], ptr addrspace(1) undef, align 2
1422 ; VI-LABEL: @select_sle_i16(
1423 ; VI-NEXT: [[TMP1:%.*]] = sext i16 [[A:%.*]] to i32
1424 ; VI-NEXT: [[TMP2:%.*]] = sext i16 [[B:%.*]] to i32
1425 ; VI-NEXT: [[TMP3:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
1426 ; VI-NEXT: [[TMP4:%.*]] = sext i16 [[A]] to i32
1427 ; VI-NEXT: [[TMP5:%.*]] = sext i16 [[B]] to i32
1428 ; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
1429 ; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
1430 ; VI-NEXT: store volatile i16 [[TMP7]], ptr addrspace(1) undef, align 2
1433 %cmp = icmp sle i16 %a, %b
1434 %sel = select i1 %cmp, i16 %a, i16 %b
1435 store volatile i16 %sel, ptr addrspace(1) undef
1439 declare i16 @llvm.bitreverse.i16(i16)
1441 define amdgpu_kernel void @bitreverse_i16(i16 %a) {
1442 ; SI-LABEL: @bitreverse_i16(
1443 ; SI-NEXT: [[BREV:%.*]] = call i16 @llvm.bitreverse.i16(i16 [[A:%.*]])
1444 ; SI-NEXT: store volatile i16 [[BREV]], ptr addrspace(1) undef, align 2
1447 ; VI-LABEL: @bitreverse_i16(
1448 ; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1449 ; VI-NEXT: [[TMP2:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[TMP1]])
1450 ; VI-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP2]], 16
1451 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
1452 ; VI-NEXT: store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
1455 %brev = call i16 @llvm.bitreverse.i16(i16 %a)
1456 store volatile i16 %brev, ptr addrspace(1) undef
1460 define amdgpu_kernel void @add_3xi15(<3 x i15> %a, <3 x i15> %b) {
1461 ; SI-LABEL: @add_3xi15(
1462 ; SI-NEXT: [[R:%.*]] = add <3 x i15> [[A:%.*]], [[B:%.*]]
1463 ; SI-NEXT: store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1466 ; VI-LABEL: @add_3xi15(
1467 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1468 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1469 ; VI-NEXT: [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1470 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1471 ; VI-NEXT: store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1474 %r = add <3 x i15> %a, %b
1475 store volatile <3 x i15> %r, ptr addrspace(1) undef
1479 define amdgpu_kernel void @add_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1480 ; SI-LABEL: @add_nsw_3xi15(
1481 ; SI-NEXT: [[R:%.*]] = add nsw <3 x i15> [[A:%.*]], [[B:%.*]]
1482 ; SI-NEXT: store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1485 ; VI-LABEL: @add_nsw_3xi15(
1486 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1487 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1488 ; VI-NEXT: [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1489 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1490 ; VI-NEXT: store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1493 %r = add nsw <3 x i15> %a, %b
1494 store volatile <3 x i15> %r, ptr addrspace(1) undef
1498 define amdgpu_kernel void @add_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1499 ; SI-LABEL: @add_nuw_3xi15(
1500 ; SI-NEXT: [[R:%.*]] = add nuw <3 x i15> [[A:%.*]], [[B:%.*]]
1501 ; SI-NEXT: store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1504 ; VI-LABEL: @add_nuw_3xi15(
1505 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1506 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1507 ; VI-NEXT: [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1508 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1509 ; VI-NEXT: store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1512 %r = add nuw <3 x i15> %a, %b
1513 store volatile <3 x i15> %r, ptr addrspace(1) undef
1517 define amdgpu_kernel void @add_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1518 ; SI-LABEL: @add_nuw_nsw_3xi15(
1519 ; SI-NEXT: [[R:%.*]] = add nuw nsw <3 x i15> [[A:%.*]], [[B:%.*]]
1520 ; SI-NEXT: store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1523 ; VI-LABEL: @add_nuw_nsw_3xi15(
1524 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1525 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1526 ; VI-NEXT: [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1527 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1528 ; VI-NEXT: store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1531 %r = add nuw nsw <3 x i15> %a, %b
1532 store volatile <3 x i15> %r, ptr addrspace(1) undef
1536 define amdgpu_kernel void @sub_3xi15(<3 x i15> %a, <3 x i15> %b) {
1537 ; SI-LABEL: @sub_3xi15(
1538 ; SI-NEXT: [[R:%.*]] = sub <3 x i15> [[A:%.*]], [[B:%.*]]
1539 ; SI-NEXT: store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1542 ; VI-LABEL: @sub_3xi15(
1543 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1544 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1545 ; VI-NEXT: [[TMP3:%.*]] = sub nsw <3 x i32> [[TMP1]], [[TMP2]]
1546 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1547 ; VI-NEXT: store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1550 %r = sub <3 x i15> %a, %b
1551 store volatile <3 x i15> %r, ptr addrspace(1) undef
1555 define amdgpu_kernel void @sub_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1556 ; SI-LABEL: @sub_nsw_3xi15(
1557 ; SI-NEXT: [[R:%.*]] = sub nsw <3 x i15> [[A:%.*]], [[B:%.*]]
1558 ; SI-NEXT: store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1561 ; VI-LABEL: @sub_nsw_3xi15(
1562 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1563 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1564 ; VI-NEXT: [[TMP3:%.*]] = sub nsw <3 x i32> [[TMP1]], [[TMP2]]
1565 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1566 ; VI-NEXT: store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1569 %r = sub nsw <3 x i15> %a, %b
1570 store volatile <3 x i15> %r, ptr addrspace(1) undef
1574 define amdgpu_kernel void @sub_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1575 ; SI-LABEL: @sub_nuw_3xi15(
1576 ; SI-NEXT: [[R:%.*]] = sub nuw <3 x i15> [[A:%.*]], [[B:%.*]]
1577 ; SI-NEXT: store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1580 ; VI-LABEL: @sub_nuw_3xi15(
1581 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1582 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1583 ; VI-NEXT: [[TMP3:%.*]] = sub nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1584 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1585 ; VI-NEXT: store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1588 %r = sub nuw <3 x i15> %a, %b
1589 store volatile <3 x i15> %r, ptr addrspace(1) undef
1593 define amdgpu_kernel void @sub_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1594 ; SI-LABEL: @sub_nuw_nsw_3xi15(
1595 ; SI-NEXT: [[R:%.*]] = sub nuw nsw <3 x i15> [[A:%.*]], [[B:%.*]]
1596 ; SI-NEXT: store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1599 ; VI-LABEL: @sub_nuw_nsw_3xi15(
1600 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1601 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1602 ; VI-NEXT: [[TMP3:%.*]] = sub nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1603 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1604 ; VI-NEXT: store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1607 %r = sub nuw nsw <3 x i15> %a, %b
1608 store volatile <3 x i15> %r, ptr addrspace(1) undef
1612 define amdgpu_kernel void @mul_3xi15(<3 x i15> %a, <3 x i15> %b) {
1613 ; SI-LABEL: @mul_3xi15(
1614 ; SI-NEXT: [[R:%.*]] = mul <3 x i15> [[A:%.*]], [[B:%.*]]
1615 ; SI-NEXT: store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1618 ; VI-LABEL: @mul_3xi15(
1619 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1620 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1621 ; VI-NEXT: [[TMP3:%.*]] = mul nuw <3 x i32> [[TMP1]], [[TMP2]]
1622 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1623 ; VI-NEXT: store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1626 %r = mul <3 x i15> %a, %b
1627 store volatile <3 x i15> %r, ptr addrspace(1) undef
1631 define amdgpu_kernel void @mul_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1632 ; SI-LABEL: @mul_nsw_3xi15(
1633 ; SI-NEXT: [[R:%.*]] = mul nsw <3 x i15> [[A:%.*]], [[B:%.*]]
1634 ; SI-NEXT: store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1637 ; VI-LABEL: @mul_nsw_3xi15(
1638 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1639 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1640 ; VI-NEXT: [[TMP3:%.*]] = mul nuw <3 x i32> [[TMP1]], [[TMP2]]
1641 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1642 ; VI-NEXT: store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1645 %r = mul nsw <3 x i15> %a, %b
1646 store volatile <3 x i15> %r, ptr addrspace(1) undef
1650 define amdgpu_kernel void @mul_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1651 ; SI-LABEL: @mul_nuw_3xi15(
1652 ; SI-NEXT: [[R:%.*]] = mul nuw <3 x i15> [[A:%.*]], [[B:%.*]]
1653 ; SI-NEXT: store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1656 ; VI-LABEL: @mul_nuw_3xi15(
1657 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1658 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1659 ; VI-NEXT: [[TMP3:%.*]] = mul nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1660 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1661 ; VI-NEXT: store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1664 %r = mul nuw <3 x i15> %a, %b
1665 store volatile <3 x i15> %r, ptr addrspace(1) undef
1669 define amdgpu_kernel void @mul_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1670 ; SI-LABEL: @mul_nuw_nsw_3xi15(
1671 ; SI-NEXT: [[R:%.*]] = mul nuw nsw <3 x i15> [[A:%.*]], [[B:%.*]]
1672 ; SI-NEXT: store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1675 ; VI-LABEL: @mul_nuw_nsw_3xi15(
1676 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1677 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1678 ; VI-NEXT: [[TMP3:%.*]] = mul nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1679 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1680 ; VI-NEXT: store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1683 %r = mul nuw nsw <3 x i15> %a, %b
1684 store volatile <3 x i15> %r, ptr addrspace(1) undef
1688 define amdgpu_kernel void @shl_3xi15(<3 x i15> %a, <3 x i15> %b) {
1689 ; SI-LABEL: @shl_3xi15(
1690 ; SI-NEXT: [[R:%.*]] = shl <3 x i15> [[A:%.*]], [[B:%.*]]
1691 ; SI-NEXT: store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1694 ; VI-LABEL: @shl_3xi15(
1695 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1696 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1697 ; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1698 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1699 ; VI-NEXT: store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1702 %r = shl <3 x i15> %a, %b
1703 store volatile <3 x i15> %r, ptr addrspace(1) undef
1707 define amdgpu_kernel void @shl_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1708 ; SI-LABEL: @shl_nsw_3xi15(
1709 ; SI-NEXT: [[R:%.*]] = shl nsw <3 x i15> [[A:%.*]], [[B:%.*]]
1710 ; SI-NEXT: store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1713 ; VI-LABEL: @shl_nsw_3xi15(
1714 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1715 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1716 ; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1717 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1718 ; VI-NEXT: store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1721 %r = shl nsw <3 x i15> %a, %b
1722 store volatile <3 x i15> %r, ptr addrspace(1) undef
1726 define amdgpu_kernel void @shl_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1727 ; SI-LABEL: @shl_nuw_3xi15(
1728 ; SI-NEXT: [[R:%.*]] = shl nuw <3 x i15> [[A:%.*]], [[B:%.*]]
1729 ; SI-NEXT: store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1732 ; VI-LABEL: @shl_nuw_3xi15(
1733 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1734 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1735 ; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1736 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1737 ; VI-NEXT: store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1740 %r = shl nuw <3 x i15> %a, %b
1741 store volatile <3 x i15> %r, ptr addrspace(1) undef
1745 define amdgpu_kernel void @shl_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1746 ; SI-LABEL: @shl_nuw_nsw_3xi15(
1747 ; SI-NEXT: [[R:%.*]] = shl nuw nsw <3 x i15> [[A:%.*]], [[B:%.*]]
1748 ; SI-NEXT: store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1751 ; VI-LABEL: @shl_nuw_nsw_3xi15(
1752 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1753 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1754 ; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1755 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1756 ; VI-NEXT: store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1759 %r = shl nuw nsw <3 x i15> %a, %b
1760 store volatile <3 x i15> %r, ptr addrspace(1) undef
1764 define amdgpu_kernel void @lshr_3xi15(<3 x i15> %a, <3 x i15> %b) {
1765 ; SI-LABEL: @lshr_3xi15(
1766 ; SI-NEXT: [[R:%.*]] = lshr <3 x i15> [[A:%.*]], [[B:%.*]]
1767 ; SI-NEXT: store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1770 ; VI-LABEL: @lshr_3xi15(
1771 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1772 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1773 ; VI-NEXT: [[TMP3:%.*]] = lshr <3 x i32> [[TMP1]], [[TMP2]]
1774 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1775 ; VI-NEXT: store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1778 %r = lshr <3 x i15> %a, %b
1779 store volatile <3 x i15> %r, ptr addrspace(1) undef
1783 define amdgpu_kernel void @lshr_exact_3xi15(<3 x i15> %a, <3 x i15> %b) {
1784 ; SI-LABEL: @lshr_exact_3xi15(
1785 ; SI-NEXT: [[R:%.*]] = lshr exact <3 x i15> [[A:%.*]], [[B:%.*]]
1786 ; SI-NEXT: store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1789 ; VI-LABEL: @lshr_exact_3xi15(
1790 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1791 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1792 ; VI-NEXT: [[TMP3:%.*]] = lshr exact <3 x i32> [[TMP1]], [[TMP2]]
1793 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1794 ; VI-NEXT: store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1797 %r = lshr exact <3 x i15> %a, %b
1798 store volatile <3 x i15> %r, ptr addrspace(1) undef
1802 define amdgpu_kernel void @ashr_3xi15(<3 x i15> %a, <3 x i15> %b) {
1803 ; SI-LABEL: @ashr_3xi15(
1804 ; SI-NEXT: [[R:%.*]] = ashr <3 x i15> [[A:%.*]], [[B:%.*]]
1805 ; SI-NEXT: store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1808 ; VI-LABEL: @ashr_3xi15(
1809 ; VI-NEXT: [[TMP1:%.*]] = sext <3 x i15> [[A:%.*]] to <3 x i32>
1810 ; VI-NEXT: [[TMP2:%.*]] = sext <3 x i15> [[B:%.*]] to <3 x i32>
1811 ; VI-NEXT: [[TMP3:%.*]] = ashr <3 x i32> [[TMP1]], [[TMP2]]
1812 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1813 ; VI-NEXT: store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1816 %r = ashr <3 x i15> %a, %b
1817 store volatile <3 x i15> %r, ptr addrspace(1) undef
1821 define amdgpu_kernel void @ashr_exact_3xi15(<3 x i15> %a, <3 x i15> %b) {
1822 ; SI-LABEL: @ashr_exact_3xi15(
1823 ; SI-NEXT: [[R:%.*]] = ashr exact <3 x i15> [[A:%.*]], [[B:%.*]]
1824 ; SI-NEXT: store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1827 ; VI-LABEL: @ashr_exact_3xi15(
1828 ; VI-NEXT: [[TMP1:%.*]] = sext <3 x i15> [[A:%.*]] to <3 x i32>
1829 ; VI-NEXT: [[TMP2:%.*]] = sext <3 x i15> [[B:%.*]] to <3 x i32>
1830 ; VI-NEXT: [[TMP3:%.*]] = ashr exact <3 x i32> [[TMP1]], [[TMP2]]
1831 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1832 ; VI-NEXT: store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1835 %r = ashr exact <3 x i15> %a, %b
1836 store volatile <3 x i15> %r, ptr addrspace(1) undef
1840 define amdgpu_kernel void @and_3xi15(<3 x i15> %a, <3 x i15> %b) {
1841 ; SI-LABEL: @and_3xi15(
1842 ; SI-NEXT: [[R:%.*]] = and <3 x i15> [[A:%.*]], [[B:%.*]]
1843 ; SI-NEXT: store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1846 ; VI-LABEL: @and_3xi15(
1847 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1848 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1849 ; VI-NEXT: [[TMP3:%.*]] = and <3 x i32> [[TMP1]], [[TMP2]]
1850 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1851 ; VI-NEXT: store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1854 %r = and <3 x i15> %a, %b
1855 store volatile <3 x i15> %r, ptr addrspace(1) undef
1859 define amdgpu_kernel void @or_3xi15(<3 x i15> %a, <3 x i15> %b) {
1860 ; SI-LABEL: @or_3xi15(
1861 ; SI-NEXT: [[R:%.*]] = or <3 x i15> [[A:%.*]], [[B:%.*]]
1862 ; SI-NEXT: store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1865 ; VI-LABEL: @or_3xi15(
1866 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1867 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1868 ; VI-NEXT: [[TMP3:%.*]] = or <3 x i32> [[TMP1]], [[TMP2]]
1869 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1870 ; VI-NEXT: store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1873 %r = or <3 x i15> %a, %b
1874 store volatile <3 x i15> %r, ptr addrspace(1) undef
1878 define amdgpu_kernel void @xor_3xi15(<3 x i15> %a, <3 x i15> %b) {
1879 ; SI-LABEL: @xor_3xi15(
1880 ; SI-NEXT: [[R:%.*]] = xor <3 x i15> [[A:%.*]], [[B:%.*]]
1881 ; SI-NEXT: store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1884 ; VI-LABEL: @xor_3xi15(
1885 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1886 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1887 ; VI-NEXT: [[TMP3:%.*]] = xor <3 x i32> [[TMP1]], [[TMP2]]
1888 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1889 ; VI-NEXT: store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1892 %r = xor <3 x i15> %a, %b
1893 store volatile <3 x i15> %r, ptr addrspace(1) undef
1897 define amdgpu_kernel void @select_eq_3xi15(<3 x i15> %a, <3 x i15> %b) {
1898 ; SI-LABEL: @select_eq_3xi15(
1899 ; SI-NEXT: [[CMP:%.*]] = icmp eq <3 x i15> [[A:%.*]], [[B:%.*]]
1900 ; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
1901 ; SI-NEXT: store volatile <3 x i15> [[SEL]], ptr addrspace(1) undef, align 8
1904 ; VI-LABEL: @select_eq_3xi15(
1905 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1906 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1907 ; VI-NEXT: [[TMP3:%.*]] = icmp eq <3 x i32> [[TMP1]], [[TMP2]]
1908 ; VI-NEXT: [[TMP4:%.*]] = zext <3 x i15> [[A]] to <3 x i32>
1909 ; VI-NEXT: [[TMP5:%.*]] = zext <3 x i15> [[B]] to <3 x i32>
1910 ; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
1911 ; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
1912 ; VI-NEXT: store volatile <3 x i15> [[TMP7]], ptr addrspace(1) undef, align 8
1915 %cmp = icmp eq <3 x i15> %a, %b
1916 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1917 store volatile <3 x i15> %sel, ptr addrspace(1) undef
1921 define amdgpu_kernel void @select_ne_3xi15(<3 x i15> %a, <3 x i15> %b) {
1922 ; SI-LABEL: @select_ne_3xi15(
1923 ; SI-NEXT: [[CMP:%.*]] = icmp ne <3 x i15> [[A:%.*]], [[B:%.*]]
1924 ; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
1925 ; SI-NEXT: store volatile <3 x i15> [[SEL]], ptr addrspace(1) undef, align 8
1928 ; VI-LABEL: @select_ne_3xi15(
1929 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1930 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1931 ; VI-NEXT: [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP1]], [[TMP2]]
1932 ; VI-NEXT: [[TMP4:%.*]] = zext <3 x i15> [[A]] to <3 x i32>
1933 ; VI-NEXT: [[TMP5:%.*]] = zext <3 x i15> [[B]] to <3 x i32>
1934 ; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
1935 ; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
1936 ; VI-NEXT: store volatile <3 x i15> [[TMP7]], ptr addrspace(1) undef, align 8
1939 %cmp = icmp ne <3 x i15> %a, %b
1940 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1941 store volatile <3 x i15> %sel, ptr addrspace(1) undef
1945 define amdgpu_kernel void @select_ugt_3xi15(<3 x i15> %a, <3 x i15> %b) {
1946 ; SI-LABEL: @select_ugt_3xi15(
1947 ; SI-NEXT: [[CMP:%.*]] = icmp ugt <3 x i15> [[A:%.*]], [[B:%.*]]
1948 ; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
1949 ; SI-NEXT: store volatile <3 x i15> [[SEL]], ptr addrspace(1) undef, align 8
1952 ; VI-LABEL: @select_ugt_3xi15(
1953 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1954 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1955 ; VI-NEXT: [[TMP3:%.*]] = icmp ugt <3 x i32> [[TMP1]], [[TMP2]]
1956 ; VI-NEXT: [[TMP4:%.*]] = zext <3 x i15> [[A]] to <3 x i32>
1957 ; VI-NEXT: [[TMP5:%.*]] = zext <3 x i15> [[B]] to <3 x i32>
1958 ; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
1959 ; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
1960 ; VI-NEXT: store volatile <3 x i15> [[TMP7]], ptr addrspace(1) undef, align 8
1963 %cmp = icmp ugt <3 x i15> %a, %b
1964 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1965 store volatile <3 x i15> %sel, ptr addrspace(1) undef
1969 define amdgpu_kernel void @select_uge_3xi15(<3 x i15> %a, <3 x i15> %b) {
1970 ; SI-LABEL: @select_uge_3xi15(
1971 ; SI-NEXT: [[CMP:%.*]] = icmp uge <3 x i15> [[A:%.*]], [[B:%.*]]
1972 ; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
1973 ; SI-NEXT: store volatile <3 x i15> [[SEL]], ptr addrspace(1) undef, align 8
1976 ; VI-LABEL: @select_uge_3xi15(
1977 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1978 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1979 ; VI-NEXT: [[TMP3:%.*]] = icmp uge <3 x i32> [[TMP1]], [[TMP2]]
1980 ; VI-NEXT: [[TMP4:%.*]] = zext <3 x i15> [[A]] to <3 x i32>
1981 ; VI-NEXT: [[TMP5:%.*]] = zext <3 x i15> [[B]] to <3 x i32>
1982 ; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
1983 ; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
1984 ; VI-NEXT: store volatile <3 x i15> [[TMP7]], ptr addrspace(1) undef, align 8
1987 %cmp = icmp uge <3 x i15> %a, %b
1988 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1989 store volatile <3 x i15> %sel, ptr addrspace(1) undef
1993 define amdgpu_kernel void @select_ult_3xi15(<3 x i15> %a, <3 x i15> %b) {
1994 ; SI-LABEL: @select_ult_3xi15(
1995 ; SI-NEXT: [[CMP:%.*]] = icmp ult <3 x i15> [[A:%.*]], [[B:%.*]]
1996 ; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
1997 ; SI-NEXT: store volatile <3 x i15> [[SEL]], ptr addrspace(1) undef, align 8
2000 ; VI-LABEL: @select_ult_3xi15(
2001 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
2002 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
2003 ; VI-NEXT: [[TMP3:%.*]] = icmp ult <3 x i32> [[TMP1]], [[TMP2]]
2004 ; VI-NEXT: [[TMP4:%.*]] = zext <3 x i15> [[A]] to <3 x i32>
2005 ; VI-NEXT: [[TMP5:%.*]] = zext <3 x i15> [[B]] to <3 x i32>
2006 ; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2007 ; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
2008 ; VI-NEXT: store volatile <3 x i15> [[TMP7]], ptr addrspace(1) undef, align 8
2011 %cmp = icmp ult <3 x i15> %a, %b
2012 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
2013 store volatile <3 x i15> %sel, ptr addrspace(1) undef
2017 define amdgpu_kernel void @select_ule_3xi15(<3 x i15> %a, <3 x i15> %b) {
2018 ; SI-LABEL: @select_ule_3xi15(
2019 ; SI-NEXT: [[CMP:%.*]] = icmp ule <3 x i15> [[A:%.*]], [[B:%.*]]
2020 ; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
2021 ; SI-NEXT: store volatile <3 x i15> [[SEL]], ptr addrspace(1) undef, align 8
2024 ; VI-LABEL: @select_ule_3xi15(
2025 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
2026 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
2027 ; VI-NEXT: [[TMP3:%.*]] = icmp ule <3 x i32> [[TMP1]], [[TMP2]]
2028 ; VI-NEXT: [[TMP4:%.*]] = zext <3 x i15> [[A]] to <3 x i32>
2029 ; VI-NEXT: [[TMP5:%.*]] = zext <3 x i15> [[B]] to <3 x i32>
2030 ; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2031 ; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
2032 ; VI-NEXT: store volatile <3 x i15> [[TMP7]], ptr addrspace(1) undef, align 8
2035 %cmp = icmp ule <3 x i15> %a, %b
2036 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
2037 store volatile <3 x i15> %sel, ptr addrspace(1) undef
2041 define amdgpu_kernel void @select_sgt_3xi15(<3 x i15> %a, <3 x i15> %b) {
2042 ; SI-LABEL: @select_sgt_3xi15(
2043 ; SI-NEXT: [[CMP:%.*]] = icmp sgt <3 x i15> [[A:%.*]], [[B:%.*]]
2044 ; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
2045 ; SI-NEXT: store volatile <3 x i15> [[SEL]], ptr addrspace(1) undef, align 8
2048 ; VI-LABEL: @select_sgt_3xi15(
2049 ; VI-NEXT: [[TMP1:%.*]] = sext <3 x i15> [[A:%.*]] to <3 x i32>
2050 ; VI-NEXT: [[TMP2:%.*]] = sext <3 x i15> [[B:%.*]] to <3 x i32>
2051 ; VI-NEXT: [[TMP3:%.*]] = icmp sgt <3 x i32> [[TMP1]], [[TMP2]]
2052 ; VI-NEXT: [[TMP4:%.*]] = sext <3 x i15> [[A]] to <3 x i32>
2053 ; VI-NEXT: [[TMP5:%.*]] = sext <3 x i15> [[B]] to <3 x i32>
2054 ; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2055 ; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
2056 ; VI-NEXT: store volatile <3 x i15> [[TMP7]], ptr addrspace(1) undef, align 8
2059 %cmp = icmp sgt <3 x i15> %a, %b
2060 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
2061 store volatile <3 x i15> %sel, ptr addrspace(1) undef
2065 define amdgpu_kernel void @select_sge_3xi15(<3 x i15> %a, <3 x i15> %b) {
2066 ; SI-LABEL: @select_sge_3xi15(
2067 ; SI-NEXT: [[CMP:%.*]] = icmp sge <3 x i15> [[A:%.*]], [[B:%.*]]
2068 ; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
2069 ; SI-NEXT: store volatile <3 x i15> [[SEL]], ptr addrspace(1) undef, align 8
2072 ; VI-LABEL: @select_sge_3xi15(
2073 ; VI-NEXT: [[TMP1:%.*]] = sext <3 x i15> [[A:%.*]] to <3 x i32>
2074 ; VI-NEXT: [[TMP2:%.*]] = sext <3 x i15> [[B:%.*]] to <3 x i32>
2075 ; VI-NEXT: [[TMP3:%.*]] = icmp sge <3 x i32> [[TMP1]], [[TMP2]]
2076 ; VI-NEXT: [[TMP4:%.*]] = sext <3 x i15> [[A]] to <3 x i32>
2077 ; VI-NEXT: [[TMP5:%.*]] = sext <3 x i15> [[B]] to <3 x i32>
2078 ; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2079 ; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
2080 ; VI-NEXT: store volatile <3 x i15> [[TMP7]], ptr addrspace(1) undef, align 8
2083 %cmp = icmp sge <3 x i15> %a, %b
2084 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
2085 store volatile <3 x i15> %sel, ptr addrspace(1) undef
2089 define amdgpu_kernel void @select_slt_3xi15(<3 x i15> %a, <3 x i15> %b) {
2090 ; SI-LABEL: @select_slt_3xi15(
2091 ; SI-NEXT: [[CMP:%.*]] = icmp slt <3 x i15> [[A:%.*]], [[B:%.*]]
2092 ; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
2093 ; SI-NEXT: store volatile <3 x i15> [[SEL]], ptr addrspace(1) undef, align 8
2096 ; VI-LABEL: @select_slt_3xi15(
2097 ; VI-NEXT: [[TMP1:%.*]] = sext <3 x i15> [[A:%.*]] to <3 x i32>
2098 ; VI-NEXT: [[TMP2:%.*]] = sext <3 x i15> [[B:%.*]] to <3 x i32>
2099 ; VI-NEXT: [[TMP3:%.*]] = icmp slt <3 x i32> [[TMP1]], [[TMP2]]
2100 ; VI-NEXT: [[TMP4:%.*]] = sext <3 x i15> [[A]] to <3 x i32>
2101 ; VI-NEXT: [[TMP5:%.*]] = sext <3 x i15> [[B]] to <3 x i32>
2102 ; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2103 ; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
2104 ; VI-NEXT: store volatile <3 x i15> [[TMP7]], ptr addrspace(1) undef, align 8
2107 %cmp = icmp slt <3 x i15> %a, %b
2108 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
2109 store volatile <3 x i15> %sel, ptr addrspace(1) undef
2113 define amdgpu_kernel void @select_sle_3xi15(<3 x i15> %a, <3 x i15> %b) {
2114 ; SI-LABEL: @select_sle_3xi15(
2115 ; SI-NEXT: [[CMP:%.*]] = icmp sle <3 x i15> [[A:%.*]], [[B:%.*]]
2116 ; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
2117 ; SI-NEXT: store volatile <3 x i15> [[SEL]], ptr addrspace(1) undef, align 8
2120 ; VI-LABEL: @select_sle_3xi15(
2121 ; VI-NEXT: [[TMP1:%.*]] = sext <3 x i15> [[A:%.*]] to <3 x i32>
2122 ; VI-NEXT: [[TMP2:%.*]] = sext <3 x i15> [[B:%.*]] to <3 x i32>
2123 ; VI-NEXT: [[TMP3:%.*]] = icmp sle <3 x i32> [[TMP1]], [[TMP2]]
2124 ; VI-NEXT: [[TMP4:%.*]] = sext <3 x i15> [[A]] to <3 x i32>
2125 ; VI-NEXT: [[TMP5:%.*]] = sext <3 x i15> [[B]] to <3 x i32>
2126 ; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2127 ; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
2128 ; VI-NEXT: store volatile <3 x i15> [[TMP7]], ptr addrspace(1) undef, align 8
2131 %cmp = icmp sle <3 x i15> %a, %b
2132 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
2133 store volatile <3 x i15> %sel, ptr addrspace(1) undef
2137 declare <3 x i15> @llvm.bitreverse.v3i15(<3 x i15>)
2138 define amdgpu_kernel void @bitreverse_3xi15(<3 x i15> %a) {
2139 ; SI-LABEL: @bitreverse_3xi15(
2140 ; SI-NEXT: [[BREV:%.*]] = call <3 x i15> @llvm.bitreverse.v3i15(<3 x i15> [[A:%.*]])
2141 ; SI-NEXT: store volatile <3 x i15> [[BREV]], ptr addrspace(1) undef, align 8
2144 ; VI-LABEL: @bitreverse_3xi15(
2145 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
2146 ; VI-NEXT: [[TMP2:%.*]] = call <3 x i32> @llvm.bitreverse.v3i32(<3 x i32> [[TMP1]])
2147 ; VI-NEXT: [[TMP3:%.*]] = lshr <3 x i32> [[TMP2]], <i32 17, i32 17, i32 17>
2148 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
2149 ; VI-NEXT: store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
2152 %brev = call <3 x i15> @llvm.bitreverse.v3i15(<3 x i15> %a)
2153 store volatile <3 x i15> %brev, ptr addrspace(1) undef
2157 define amdgpu_kernel void @add_3xi16(<3 x i16> %a, <3 x i16> %b) {
2158 ; SI-LABEL: @add_3xi16(
2159 ; SI-NEXT: [[R:%.*]] = add <3 x i16> [[A:%.*]], [[B:%.*]]
2160 ; SI-NEXT: store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2163 ; VI-LABEL: @add_3xi16(
2164 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2165 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2166 ; VI-NEXT: [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2167 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2168 ; VI-NEXT: store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2171 %r = add <3 x i16> %a, %b
2172 store volatile <3 x i16> %r, ptr addrspace(1) undef
2176 define amdgpu_kernel void @add_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2177 ; SI-LABEL: @add_nsw_3xi16(
2178 ; SI-NEXT: [[R:%.*]] = add nsw <3 x i16> [[A:%.*]], [[B:%.*]]
2179 ; SI-NEXT: store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2182 ; VI-LABEL: @add_nsw_3xi16(
2183 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2184 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2185 ; VI-NEXT: [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2186 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2187 ; VI-NEXT: store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2190 %r = add nsw <3 x i16> %a, %b
2191 store volatile <3 x i16> %r, ptr addrspace(1) undef
2195 define amdgpu_kernel void @add_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2196 ; SI-LABEL: @add_nuw_3xi16(
2197 ; SI-NEXT: [[R:%.*]] = add nuw <3 x i16> [[A:%.*]], [[B:%.*]]
2198 ; SI-NEXT: store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2201 ; VI-LABEL: @add_nuw_3xi16(
2202 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2203 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2204 ; VI-NEXT: [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2205 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2206 ; VI-NEXT: store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2209 %r = add nuw <3 x i16> %a, %b
2210 store volatile <3 x i16> %r, ptr addrspace(1) undef
2214 define amdgpu_kernel void @add_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2215 ; SI-LABEL: @add_nuw_nsw_3xi16(
2216 ; SI-NEXT: [[R:%.*]] = add nuw nsw <3 x i16> [[A:%.*]], [[B:%.*]]
2217 ; SI-NEXT: store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2220 ; VI-LABEL: @add_nuw_nsw_3xi16(
2221 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2222 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2223 ; VI-NEXT: [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2224 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2225 ; VI-NEXT: store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2228 %r = add nuw nsw <3 x i16> %a, %b
2229 store volatile <3 x i16> %r, ptr addrspace(1) undef
2233 define amdgpu_kernel void @sub_3xi16(<3 x i16> %a, <3 x i16> %b) {
2234 ; SI-LABEL: @sub_3xi16(
2235 ; SI-NEXT: [[R:%.*]] = sub <3 x i16> [[A:%.*]], [[B:%.*]]
2236 ; SI-NEXT: store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2239 ; VI-LABEL: @sub_3xi16(
2240 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2241 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2242 ; VI-NEXT: [[TMP3:%.*]] = sub nsw <3 x i32> [[TMP1]], [[TMP2]]
2243 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2244 ; VI-NEXT: store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2247 %r = sub <3 x i16> %a, %b
2248 store volatile <3 x i16> %r, ptr addrspace(1) undef
2252 define amdgpu_kernel void @sub_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2253 ; SI-LABEL: @sub_nsw_3xi16(
2254 ; SI-NEXT: [[R:%.*]] = sub nsw <3 x i16> [[A:%.*]], [[B:%.*]]
2255 ; SI-NEXT: store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2258 ; VI-LABEL: @sub_nsw_3xi16(
2259 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2260 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2261 ; VI-NEXT: [[TMP3:%.*]] = sub nsw <3 x i32> [[TMP1]], [[TMP2]]
2262 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2263 ; VI-NEXT: store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2266 %r = sub nsw <3 x i16> %a, %b
2267 store volatile <3 x i16> %r, ptr addrspace(1) undef
2271 define amdgpu_kernel void @sub_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2272 ; SI-LABEL: @sub_nuw_3xi16(
2273 ; SI-NEXT: [[R:%.*]] = sub nuw <3 x i16> [[A:%.*]], [[B:%.*]]
2274 ; SI-NEXT: store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2277 ; VI-LABEL: @sub_nuw_3xi16(
2278 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2279 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2280 ; VI-NEXT: [[TMP3:%.*]] = sub nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2281 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2282 ; VI-NEXT: store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2285 %r = sub nuw <3 x i16> %a, %b
2286 store volatile <3 x i16> %r, ptr addrspace(1) undef
2290 define amdgpu_kernel void @sub_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2291 ; SI-LABEL: @sub_nuw_nsw_3xi16(
2292 ; SI-NEXT: [[R:%.*]] = sub nuw nsw <3 x i16> [[A:%.*]], [[B:%.*]]
2293 ; SI-NEXT: store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2296 ; VI-LABEL: @sub_nuw_nsw_3xi16(
2297 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2298 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2299 ; VI-NEXT: [[TMP3:%.*]] = sub nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2300 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2301 ; VI-NEXT: store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2304 %r = sub nuw nsw <3 x i16> %a, %b
2305 store volatile <3 x i16> %r, ptr addrspace(1) undef
2309 define amdgpu_kernel void @mul_3xi16(<3 x i16> %a, <3 x i16> %b) {
2310 ; SI-LABEL: @mul_3xi16(
2311 ; SI-NEXT: [[R:%.*]] = mul <3 x i16> [[A:%.*]], [[B:%.*]]
2312 ; SI-NEXT: store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2315 ; VI-LABEL: @mul_3xi16(
2316 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2317 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2318 ; VI-NEXT: [[TMP3:%.*]] = mul nuw <3 x i32> [[TMP1]], [[TMP2]]
2319 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2320 ; VI-NEXT: store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2323 %r = mul <3 x i16> %a, %b
2324 store volatile <3 x i16> %r, ptr addrspace(1) undef
2328 define amdgpu_kernel void @mul_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2329 ; SI-LABEL: @mul_nsw_3xi16(
2330 ; SI-NEXT: [[R:%.*]] = mul nsw <3 x i16> [[A:%.*]], [[B:%.*]]
2331 ; SI-NEXT: store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2334 ; VI-LABEL: @mul_nsw_3xi16(
2335 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2336 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2337 ; VI-NEXT: [[TMP3:%.*]] = mul nuw <3 x i32> [[TMP1]], [[TMP2]]
2338 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2339 ; VI-NEXT: store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2342 %r = mul nsw <3 x i16> %a, %b
2343 store volatile <3 x i16> %r, ptr addrspace(1) undef
2347 define amdgpu_kernel void @mul_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2348 ; SI-LABEL: @mul_nuw_3xi16(
2349 ; SI-NEXT: [[R:%.*]] = mul nuw <3 x i16> [[A:%.*]], [[B:%.*]]
2350 ; SI-NEXT: store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2353 ; VI-LABEL: @mul_nuw_3xi16(
2354 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2355 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2356 ; VI-NEXT: [[TMP3:%.*]] = mul nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2357 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2358 ; VI-NEXT: store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2361 %r = mul nuw <3 x i16> %a, %b
2362 store volatile <3 x i16> %r, ptr addrspace(1) undef
2366 define amdgpu_kernel void @mul_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2367 ; SI-LABEL: @mul_nuw_nsw_3xi16(
2368 ; SI-NEXT: [[R:%.*]] = mul nuw nsw <3 x i16> [[A:%.*]], [[B:%.*]]
2369 ; SI-NEXT: store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2372 ; VI-LABEL: @mul_nuw_nsw_3xi16(
2373 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2374 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2375 ; VI-NEXT: [[TMP3:%.*]] = mul nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2376 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2377 ; VI-NEXT: store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2380 %r = mul nuw nsw <3 x i16> %a, %b
2381 store volatile <3 x i16> %r, ptr addrspace(1) undef
2385 define amdgpu_kernel void @shl_3xi16(<3 x i16> %a, <3 x i16> %b) {
2386 ; SI-LABEL: @shl_3xi16(
2387 ; SI-NEXT: [[R:%.*]] = shl <3 x i16> [[A:%.*]], [[B:%.*]]
2388 ; SI-NEXT: store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2391 ; VI-LABEL: @shl_3xi16(
2392 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2393 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2394 ; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2395 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2396 ; VI-NEXT: store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2399 %r = shl <3 x i16> %a, %b
2400 store volatile <3 x i16> %r, ptr addrspace(1) undef
2404 define amdgpu_kernel void @shl_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2405 ; SI-LABEL: @shl_nsw_3xi16(
2406 ; SI-NEXT: [[R:%.*]] = shl nsw <3 x i16> [[A:%.*]], [[B:%.*]]
2407 ; SI-NEXT: store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2410 ; VI-LABEL: @shl_nsw_3xi16(
2411 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2412 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2413 ; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2414 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2415 ; VI-NEXT: store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2418 %r = shl nsw <3 x i16> %a, %b
2419 store volatile <3 x i16> %r, ptr addrspace(1) undef
2423 define amdgpu_kernel void @shl_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2424 ; SI-LABEL: @shl_nuw_3xi16(
2425 ; SI-NEXT: [[R:%.*]] = shl nuw <3 x i16> [[A:%.*]], [[B:%.*]]
2426 ; SI-NEXT: store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2429 ; VI-LABEL: @shl_nuw_3xi16(
2430 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2431 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2432 ; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2433 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2434 ; VI-NEXT: store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2437 %r = shl nuw <3 x i16> %a, %b
2438 store volatile <3 x i16> %r, ptr addrspace(1) undef
2442 define amdgpu_kernel void @shl_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2443 ; SI-LABEL: @shl_nuw_nsw_3xi16(
2444 ; SI-NEXT: [[R:%.*]] = shl nuw nsw <3 x i16> [[A:%.*]], [[B:%.*]]
2445 ; SI-NEXT: store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2448 ; VI-LABEL: @shl_nuw_nsw_3xi16(
2449 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2450 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2451 ; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2452 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2453 ; VI-NEXT: store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2456 %r = shl nuw nsw <3 x i16> %a, %b
2457 store volatile <3 x i16> %r, ptr addrspace(1) undef
2461 define amdgpu_kernel void @lshr_3xi16(<3 x i16> %a, <3 x i16> %b) {
2462 ; SI-LABEL: @lshr_3xi16(
2463 ; SI-NEXT: [[R:%.*]] = lshr <3 x i16> [[A:%.*]], [[B:%.*]]
2464 ; SI-NEXT: store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2467 ; VI-LABEL: @lshr_3xi16(
2468 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2469 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2470 ; VI-NEXT: [[TMP3:%.*]] = lshr <3 x i32> [[TMP1]], [[TMP2]]
2471 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2472 ; VI-NEXT: store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2475 %r = lshr <3 x i16> %a, %b
2476 store volatile <3 x i16> %r, ptr addrspace(1) undef
2480 define amdgpu_kernel void @lshr_exact_3xi16(<3 x i16> %a, <3 x i16> %b) {
2481 ; SI-LABEL: @lshr_exact_3xi16(
2482 ; SI-NEXT: [[R:%.*]] = lshr exact <3 x i16> [[A:%.*]], [[B:%.*]]
2483 ; SI-NEXT: store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2486 ; VI-LABEL: @lshr_exact_3xi16(
2487 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2488 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2489 ; VI-NEXT: [[TMP3:%.*]] = lshr exact <3 x i32> [[TMP1]], [[TMP2]]
2490 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2491 ; VI-NEXT: store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2494 %r = lshr exact <3 x i16> %a, %b
2495 store volatile <3 x i16> %r, ptr addrspace(1) undef
2499 define amdgpu_kernel void @ashr_3xi16(<3 x i16> %a, <3 x i16> %b) {
2500 ; SI-LABEL: @ashr_3xi16(
2501 ; SI-NEXT: [[R:%.*]] = ashr <3 x i16> [[A:%.*]], [[B:%.*]]
2502 ; SI-NEXT: store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2505 ; VI-LABEL: @ashr_3xi16(
2506 ; VI-NEXT: [[TMP1:%.*]] = sext <3 x i16> [[A:%.*]] to <3 x i32>
2507 ; VI-NEXT: [[TMP2:%.*]] = sext <3 x i16> [[B:%.*]] to <3 x i32>
2508 ; VI-NEXT: [[TMP3:%.*]] = ashr <3 x i32> [[TMP1]], [[TMP2]]
2509 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2510 ; VI-NEXT: store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2513 %r = ashr <3 x i16> %a, %b
2514 store volatile <3 x i16> %r, ptr addrspace(1) undef
2518 define amdgpu_kernel void @ashr_exact_3xi16(<3 x i16> %a, <3 x i16> %b) {
2519 ; SI-LABEL: @ashr_exact_3xi16(
2520 ; SI-NEXT: [[R:%.*]] = ashr exact <3 x i16> [[A:%.*]], [[B:%.*]]
2521 ; SI-NEXT: store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2524 ; VI-LABEL: @ashr_exact_3xi16(
2525 ; VI-NEXT: [[TMP1:%.*]] = sext <3 x i16> [[A:%.*]] to <3 x i32>
2526 ; VI-NEXT: [[TMP2:%.*]] = sext <3 x i16> [[B:%.*]] to <3 x i32>
2527 ; VI-NEXT: [[TMP3:%.*]] = ashr exact <3 x i32> [[TMP1]], [[TMP2]]
2528 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2529 ; VI-NEXT: store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2532 %r = ashr exact <3 x i16> %a, %b
2533 store volatile <3 x i16> %r, ptr addrspace(1) undef
2537 define amdgpu_kernel void @and_3xi16(<3 x i16> %a, <3 x i16> %b) {
2538 ; SI-LABEL: @and_3xi16(
2539 ; SI-NEXT: [[R:%.*]] = and <3 x i16> [[A:%.*]], [[B:%.*]]
2540 ; SI-NEXT: store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2543 ; VI-LABEL: @and_3xi16(
2544 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2545 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2546 ; VI-NEXT: [[TMP3:%.*]] = and <3 x i32> [[TMP1]], [[TMP2]]
2547 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2548 ; VI-NEXT: store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2551 %r = and <3 x i16> %a, %b
2552 store volatile <3 x i16> %r, ptr addrspace(1) undef
2556 define amdgpu_kernel void @or_3xi16(<3 x i16> %a, <3 x i16> %b) {
2557 ; SI-LABEL: @or_3xi16(
2558 ; SI-NEXT: [[R:%.*]] = or <3 x i16> [[A:%.*]], [[B:%.*]]
2559 ; SI-NEXT: store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2562 ; VI-LABEL: @or_3xi16(
2563 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2564 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2565 ; VI-NEXT: [[TMP3:%.*]] = or <3 x i32> [[TMP1]], [[TMP2]]
2566 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2567 ; VI-NEXT: store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2570 %r = or <3 x i16> %a, %b
2571 store volatile <3 x i16> %r, ptr addrspace(1) undef
2575 define amdgpu_kernel void @xor_3xi16(<3 x i16> %a, <3 x i16> %b) {
2576 ; SI-LABEL: @xor_3xi16(
2577 ; SI-NEXT: [[R:%.*]] = xor <3 x i16> [[A:%.*]], [[B:%.*]]
2578 ; SI-NEXT: store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2581 ; VI-LABEL: @xor_3xi16(
2582 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2583 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2584 ; VI-NEXT: [[TMP3:%.*]] = xor <3 x i32> [[TMP1]], [[TMP2]]
2585 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2586 ; VI-NEXT: store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2589 %r = xor <3 x i16> %a, %b
2590 store volatile <3 x i16> %r, ptr addrspace(1) undef
2594 define amdgpu_kernel void @select_eq_3xi16(<3 x i16> %a, <3 x i16> %b) {
2595 ; SI-LABEL: @select_eq_3xi16(
2596 ; SI-NEXT: [[CMP:%.*]] = icmp eq <3 x i16> [[A:%.*]], [[B:%.*]]
2597 ; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
2598 ; SI-NEXT: store volatile <3 x i16> [[SEL]], ptr addrspace(1) undef, align 8
2601 ; VI-LABEL: @select_eq_3xi16(
2602 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2603 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2604 ; VI-NEXT: [[TMP3:%.*]] = icmp eq <3 x i32> [[TMP1]], [[TMP2]]
2605 ; VI-NEXT: [[TMP4:%.*]] = zext <3 x i16> [[A]] to <3 x i32>
2606 ; VI-NEXT: [[TMP5:%.*]] = zext <3 x i16> [[B]] to <3 x i32>
2607 ; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2608 ; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
2609 ; VI-NEXT: store volatile <3 x i16> [[TMP7]], ptr addrspace(1) undef, align 8
2612 %cmp = icmp eq <3 x i16> %a, %b
2613 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2614 store volatile <3 x i16> %sel, ptr addrspace(1) undef
2618 define amdgpu_kernel void @select_ne_3xi16(<3 x i16> %a, <3 x i16> %b) {
2619 ; SI-LABEL: @select_ne_3xi16(
2620 ; SI-NEXT: [[CMP:%.*]] = icmp ne <3 x i16> [[A:%.*]], [[B:%.*]]
2621 ; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
2622 ; SI-NEXT: store volatile <3 x i16> [[SEL]], ptr addrspace(1) undef, align 8
2625 ; VI-LABEL: @select_ne_3xi16(
2626 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2627 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2628 ; VI-NEXT: [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP1]], [[TMP2]]
2629 ; VI-NEXT: [[TMP4:%.*]] = zext <3 x i16> [[A]] to <3 x i32>
2630 ; VI-NEXT: [[TMP5:%.*]] = zext <3 x i16> [[B]] to <3 x i32>
2631 ; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2632 ; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
2633 ; VI-NEXT: store volatile <3 x i16> [[TMP7]], ptr addrspace(1) undef, align 8
2636 %cmp = icmp ne <3 x i16> %a, %b
2637 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2638 store volatile <3 x i16> %sel, ptr addrspace(1) undef
2642 define amdgpu_kernel void @select_ugt_3xi16(<3 x i16> %a, <3 x i16> %b) {
2643 ; SI-LABEL: @select_ugt_3xi16(
2644 ; SI-NEXT: [[CMP:%.*]] = icmp ugt <3 x i16> [[A:%.*]], [[B:%.*]]
2645 ; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
2646 ; SI-NEXT: store volatile <3 x i16> [[SEL]], ptr addrspace(1) undef, align 8
2649 ; VI-LABEL: @select_ugt_3xi16(
2650 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2651 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2652 ; VI-NEXT: [[TMP3:%.*]] = icmp ugt <3 x i32> [[TMP1]], [[TMP2]]
2653 ; VI-NEXT: [[TMP4:%.*]] = zext <3 x i16> [[A]] to <3 x i32>
2654 ; VI-NEXT: [[TMP5:%.*]] = zext <3 x i16> [[B]] to <3 x i32>
2655 ; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2656 ; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
2657 ; VI-NEXT: store volatile <3 x i16> [[TMP7]], ptr addrspace(1) undef, align 8
2660 %cmp = icmp ugt <3 x i16> %a, %b
2661 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2662 store volatile <3 x i16> %sel, ptr addrspace(1) undef
2666 define amdgpu_kernel void @select_uge_3xi16(<3 x i16> %a, <3 x i16> %b) {
2667 ; SI-LABEL: @select_uge_3xi16(
2668 ; SI-NEXT: [[CMP:%.*]] = icmp uge <3 x i16> [[A:%.*]], [[B:%.*]]
2669 ; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
2670 ; SI-NEXT: store volatile <3 x i16> [[SEL]], ptr addrspace(1) undef, align 8
2673 ; VI-LABEL: @select_uge_3xi16(
2674 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2675 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2676 ; VI-NEXT: [[TMP3:%.*]] = icmp uge <3 x i32> [[TMP1]], [[TMP2]]
2677 ; VI-NEXT: [[TMP4:%.*]] = zext <3 x i16> [[A]] to <3 x i32>
2678 ; VI-NEXT: [[TMP5:%.*]] = zext <3 x i16> [[B]] to <3 x i32>
2679 ; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2680 ; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
2681 ; VI-NEXT: store volatile <3 x i16> [[TMP7]], ptr addrspace(1) undef, align 8
2684 %cmp = icmp uge <3 x i16> %a, %b
2685 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2686 store volatile <3 x i16> %sel, ptr addrspace(1) undef
2690 define amdgpu_kernel void @select_ult_3xi16(<3 x i16> %a, <3 x i16> %b) {
2691 ; SI-LABEL: @select_ult_3xi16(
2692 ; SI-NEXT: [[CMP:%.*]] = icmp ult <3 x i16> [[A:%.*]], [[B:%.*]]
2693 ; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
2694 ; SI-NEXT: store volatile <3 x i16> [[SEL]], ptr addrspace(1) undef, align 8
2697 ; VI-LABEL: @select_ult_3xi16(
2698 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2699 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2700 ; VI-NEXT: [[TMP3:%.*]] = icmp ult <3 x i32> [[TMP1]], [[TMP2]]
2701 ; VI-NEXT: [[TMP4:%.*]] = zext <3 x i16> [[A]] to <3 x i32>
2702 ; VI-NEXT: [[TMP5:%.*]] = zext <3 x i16> [[B]] to <3 x i32>
2703 ; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2704 ; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
2705 ; VI-NEXT: store volatile <3 x i16> [[TMP7]], ptr addrspace(1) undef, align 8
2708 %cmp = icmp ult <3 x i16> %a, %b
2709 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2710 store volatile <3 x i16> %sel, ptr addrspace(1) undef
2714 define amdgpu_kernel void @select_ule_3xi16(<3 x i16> %a, <3 x i16> %b) {
2715 ; SI-LABEL: @select_ule_3xi16(
2716 ; SI-NEXT: [[CMP:%.*]] = icmp ule <3 x i16> [[A:%.*]], [[B:%.*]]
2717 ; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
2718 ; SI-NEXT: store volatile <3 x i16> [[SEL]], ptr addrspace(1) undef, align 8
2721 ; VI-LABEL: @select_ule_3xi16(
2722 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2723 ; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2724 ; VI-NEXT: [[TMP3:%.*]] = icmp ule <3 x i32> [[TMP1]], [[TMP2]]
2725 ; VI-NEXT: [[TMP4:%.*]] = zext <3 x i16> [[A]] to <3 x i32>
2726 ; VI-NEXT: [[TMP5:%.*]] = zext <3 x i16> [[B]] to <3 x i32>
2727 ; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2728 ; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
2729 ; VI-NEXT: store volatile <3 x i16> [[TMP7]], ptr addrspace(1) undef, align 8
2732 %cmp = icmp ule <3 x i16> %a, %b
2733 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2734 store volatile <3 x i16> %sel, ptr addrspace(1) undef
2738 define amdgpu_kernel void @select_sgt_3xi16(<3 x i16> %a, <3 x i16> %b) {
2739 ; SI-LABEL: @select_sgt_3xi16(
2740 ; SI-NEXT: [[CMP:%.*]] = icmp sgt <3 x i16> [[A:%.*]], [[B:%.*]]
2741 ; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
2742 ; SI-NEXT: store volatile <3 x i16> [[SEL]], ptr addrspace(1) undef, align 8
2745 ; VI-LABEL: @select_sgt_3xi16(
2746 ; VI-NEXT: [[TMP1:%.*]] = sext <3 x i16> [[A:%.*]] to <3 x i32>
2747 ; VI-NEXT: [[TMP2:%.*]] = sext <3 x i16> [[B:%.*]] to <3 x i32>
2748 ; VI-NEXT: [[TMP3:%.*]] = icmp sgt <3 x i32> [[TMP1]], [[TMP2]]
2749 ; VI-NEXT: [[TMP4:%.*]] = sext <3 x i16> [[A]] to <3 x i32>
2750 ; VI-NEXT: [[TMP5:%.*]] = sext <3 x i16> [[B]] to <3 x i32>
2751 ; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2752 ; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
2753 ; VI-NEXT: store volatile <3 x i16> [[TMP7]], ptr addrspace(1) undef, align 8
2756 %cmp = icmp sgt <3 x i16> %a, %b
2757 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2758 store volatile <3 x i16> %sel, ptr addrspace(1) undef
2762 define amdgpu_kernel void @select_sge_3xi16(<3 x i16> %a, <3 x i16> %b) {
2763 ; SI-LABEL: @select_sge_3xi16(
2764 ; SI-NEXT: [[CMP:%.*]] = icmp sge <3 x i16> [[A:%.*]], [[B:%.*]]
2765 ; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
2766 ; SI-NEXT: store volatile <3 x i16> [[SEL]], ptr addrspace(1) undef, align 8
2769 ; VI-LABEL: @select_sge_3xi16(
2770 ; VI-NEXT: [[TMP1:%.*]] = sext <3 x i16> [[A:%.*]] to <3 x i32>
2771 ; VI-NEXT: [[TMP2:%.*]] = sext <3 x i16> [[B:%.*]] to <3 x i32>
2772 ; VI-NEXT: [[TMP3:%.*]] = icmp sge <3 x i32> [[TMP1]], [[TMP2]]
2773 ; VI-NEXT: [[TMP4:%.*]] = sext <3 x i16> [[A]] to <3 x i32>
2774 ; VI-NEXT: [[TMP5:%.*]] = sext <3 x i16> [[B]] to <3 x i32>
2775 ; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2776 ; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
2777 ; VI-NEXT: store volatile <3 x i16> [[TMP7]], ptr addrspace(1) undef, align 8
2780 %cmp = icmp sge <3 x i16> %a, %b
2781 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2782 store volatile <3 x i16> %sel, ptr addrspace(1) undef
2786 define amdgpu_kernel void @select_slt_3xi16(<3 x i16> %a, <3 x i16> %b) {
2787 ; SI-LABEL: @select_slt_3xi16(
2788 ; SI-NEXT: [[CMP:%.*]] = icmp slt <3 x i16> [[A:%.*]], [[B:%.*]]
2789 ; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
2790 ; SI-NEXT: store volatile <3 x i16> [[SEL]], ptr addrspace(1) undef, align 8
2793 ; VI-LABEL: @select_slt_3xi16(
2794 ; VI-NEXT: [[TMP1:%.*]] = sext <3 x i16> [[A:%.*]] to <3 x i32>
2795 ; VI-NEXT: [[TMP2:%.*]] = sext <3 x i16> [[B:%.*]] to <3 x i32>
2796 ; VI-NEXT: [[TMP3:%.*]] = icmp slt <3 x i32> [[TMP1]], [[TMP2]]
2797 ; VI-NEXT: [[TMP4:%.*]] = sext <3 x i16> [[A]] to <3 x i32>
2798 ; VI-NEXT: [[TMP5:%.*]] = sext <3 x i16> [[B]] to <3 x i32>
2799 ; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2800 ; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
2801 ; VI-NEXT: store volatile <3 x i16> [[TMP7]], ptr addrspace(1) undef, align 8
2804 %cmp = icmp slt <3 x i16> %a, %b
2805 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2806 store volatile <3 x i16> %sel, ptr addrspace(1) undef
2810 define amdgpu_kernel void @select_sle_3xi16(<3 x i16> %a, <3 x i16> %b) {
2811 ; SI-LABEL: @select_sle_3xi16(
2812 ; SI-NEXT: [[CMP:%.*]] = icmp sle <3 x i16> [[A:%.*]], [[B:%.*]]
2813 ; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
2814 ; SI-NEXT: store volatile <3 x i16> [[SEL]], ptr addrspace(1) undef, align 8
2817 ; VI-LABEL: @select_sle_3xi16(
2818 ; VI-NEXT: [[TMP1:%.*]] = sext <3 x i16> [[A:%.*]] to <3 x i32>
2819 ; VI-NEXT: [[TMP2:%.*]] = sext <3 x i16> [[B:%.*]] to <3 x i32>
2820 ; VI-NEXT: [[TMP3:%.*]] = icmp sle <3 x i32> [[TMP1]], [[TMP2]]
2821 ; VI-NEXT: [[TMP4:%.*]] = sext <3 x i16> [[A]] to <3 x i32>
2822 ; VI-NEXT: [[TMP5:%.*]] = sext <3 x i16> [[B]] to <3 x i32>
2823 ; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2824 ; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
2825 ; VI-NEXT: store volatile <3 x i16> [[TMP7]], ptr addrspace(1) undef, align 8
2828 %cmp = icmp sle <3 x i16> %a, %b
2829 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2830 store volatile <3 x i16> %sel, ptr addrspace(1) undef
2834 declare <3 x i16> @llvm.bitreverse.v3i16(<3 x i16>)
2836 define amdgpu_kernel void @bitreverse_3xi16(<3 x i16> %a) {
2837 ; SI-LABEL: @bitreverse_3xi16(
2838 ; SI-NEXT: [[BREV:%.*]] = call <3 x i16> @llvm.bitreverse.v3i16(<3 x i16> [[A:%.*]])
2839 ; SI-NEXT: store volatile <3 x i16> [[BREV]], ptr addrspace(1) undef, align 8
2842 ; VI-LABEL: @bitreverse_3xi16(
2843 ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2844 ; VI-NEXT: [[TMP2:%.*]] = call <3 x i32> @llvm.bitreverse.v3i32(<3 x i32> [[TMP1]])
2845 ; VI-NEXT: [[TMP3:%.*]] = lshr <3 x i32> [[TMP2]], <i32 16, i32 16, i32 16>
2846 ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2847 ; VI-NEXT: store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2850 %brev = call <3 x i16> @llvm.bitreverse.v3i16(<3 x i16> %a)
2851 store volatile <3 x i16> %brev, ptr addrspace(1) undef