1 ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2 ; RUN: opt -S -mtriple=amdgcn-- -mcpu=tonga -amdgpu-codegenprepare %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
5 ; SI: %r = add i3 %a, %b
6 ; SI-NEXT: store volatile i3 %r
7 ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
8 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
9 ; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]]
10 ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
11 ; VI-NEXT: store volatile i3 %[[R_3]]
12 define amdgpu_kernel void @add_i3(i3 %a, i3 %b) {
14 store volatile i3 %r, i3 addrspace(1)* undef
18 ; GCN-LABEL: @add_nsw_i3(
19 ; SI: %r = add nsw i3 %a, %b
20 ; SI-NEXT: store volatile i3 %r
21 ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
22 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
23 ; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]]
24 ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
25 ; VI-NEXT: store volatile i3 %[[R_3]]
26 define amdgpu_kernel void @add_nsw_i3(i3 %a, i3 %b) {
27 %r = add nsw i3 %a, %b
28 store volatile i3 %r, i3 addrspace(1)* undef
32 ; GCN-LABEL: @add_nuw_i3(
33 ; SI: %r = add nuw i3 %a, %b
34 ; SI-NEXT: store volatile i3 %r
35 ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
36 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
37 ; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]]
38 ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
39 ; VI-NEXT: store volatile i3 %[[R_3]]
40 define amdgpu_kernel void @add_nuw_i3(i3 %a, i3 %b) {
41 %r = add nuw i3 %a, %b
42 store volatile i3 %r, i3 addrspace(1)* undef
46 ; GCN-LABEL: @add_nuw_nsw_i3(
47 ; SI: %r = add nuw nsw i3 %a, %b
48 ; SI-NEXT: store volatile i3 %r
49 ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
50 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
51 ; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]]
52 ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
53 ; VI-NEXT: store volatile i3 %[[R_3]]
54 define amdgpu_kernel void @add_nuw_nsw_i3(i3 %a, i3 %b) {
55 %r = add nuw nsw i3 %a, %b
56 store volatile i3 %r, i3 addrspace(1)* undef
61 ; SI: %r = sub i3 %a, %b
62 ; SI-NEXT: store volatile i3 %r
63 ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
64 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
65 ; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw i32 %[[A_32]], %[[B_32]]
66 ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
67 ; VI-NEXT: store volatile i3 %[[R_3]]
68 define amdgpu_kernel void @sub_i3(i3 %a, i3 %b) {
70 store volatile i3 %r, i3 addrspace(1)* undef
74 ; GCN-LABEL: @sub_nsw_i3(
75 ; SI: %r = sub nsw i3 %a, %b
76 ; SI-NEXT: store volatile i3 %r
77 ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
78 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
79 ; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw i32 %[[A_32]], %[[B_32]]
80 ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
81 ; VI-NEXT: store volatile i3 %[[R_3]]
82 define amdgpu_kernel void @sub_nsw_i3(i3 %a, i3 %b) {
83 %r = sub nsw i3 %a, %b
84 store volatile i3 %r, i3 addrspace(1)* undef
88 ; GCN-LABEL: @sub_nuw_i3(
89 ; SI: %r = sub nuw i3 %a, %b
90 ; SI-NEXT: store volatile i3 %r
91 ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
92 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
93 ; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw i32 %[[A_32]], %[[B_32]]
94 ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
95 ; VI-NEXT: store volatile i3 %[[R_3]]
96 define amdgpu_kernel void @sub_nuw_i3(i3 %a, i3 %b) {
97 %r = sub nuw i3 %a, %b
98 store volatile i3 %r, i3 addrspace(1)* undef
102 ; GCN-LABEL: @sub_nuw_nsw_i3(
103 ; SI: %r = sub nuw nsw i3 %a, %b
104 ; SI-NEXT: store volatile i3 %r
105 ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
106 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
107 ; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw i32 %[[A_32]], %[[B_32]]
108 ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
109 ; VI-NEXT: store volatile i3 %[[R_3]]
110 define amdgpu_kernel void @sub_nuw_nsw_i3(i3 %a, i3 %b) {
111 %r = sub nuw nsw i3 %a, %b
112 store volatile i3 %r, i3 addrspace(1)* undef
116 ; GCN-LABEL: @mul_i3(
117 ; SI: %r = mul i3 %a, %b
118 ; SI-NEXT: store volatile i3 %r
119 ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
120 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
121 ; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw i32 %[[A_32]], %[[B_32]]
122 ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
123 ; VI-NEXT: store volatile i3 %[[R_3]]
124 define amdgpu_kernel void @mul_i3(i3 %a, i3 %b) {
126 store volatile i3 %r, i3 addrspace(1)* undef
130 ; GCN-LABEL: @mul_nsw_i3(
131 ; SI: %r = mul nsw i3 %a, %b
132 ; SI-NEXT: store volatile i3 %r
133 ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
134 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
135 ; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw i32 %[[A_32]], %[[B_32]]
136 ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
137 ; VI-NEXT: store volatile i3 %[[R_3]]
138 define amdgpu_kernel void @mul_nsw_i3(i3 %a, i3 %b) {
139 %r = mul nsw i3 %a, %b
140 store volatile i3 %r, i3 addrspace(1)* undef
144 ; GCN-LABEL: @mul_nuw_i3(
145 ; SI: %r = mul nuw i3 %a, %b
146 ; SI-NEXT: store volatile i3 %r
147 ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
148 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
149 ; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw i32 %[[A_32]], %[[B_32]]
150 ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
151 ; VI-NEXT: store volatile i3 %[[R_3]]
152 define amdgpu_kernel void @mul_nuw_i3(i3 %a, i3 %b) {
153 %r = mul nuw i3 %a, %b
154 store volatile i3 %r, i3 addrspace(1)* undef
158 ; GCN-LABEL: @mul_nuw_nsw_i3(
159 ; SI: %r = mul nuw nsw i3 %a, %b
160 ; SI-NEXT: store volatile i3 %r
161 ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
162 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
163 ; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw i32 %[[A_32]], %[[B_32]]
164 ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
165 ; VI-NEXT: store volatile i3 %[[R_3]]
166 define amdgpu_kernel void @mul_nuw_nsw_i3(i3 %a, i3 %b) {
167 %r = mul nuw nsw i3 %a, %b
168 store volatile i3 %r, i3 addrspace(1)* undef
172 ; GCN-LABEL: @shl_i3(
173 ; SI: %r = shl i3 %a, %b
174 ; SI-NEXT: store volatile i3 %r
175 ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
176 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
177 ; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]]
178 ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
179 ; VI-NEXT: store volatile i3 %[[R_3]]
180 define amdgpu_kernel void @shl_i3(i3 %a, i3 %b) {
182 store volatile i3 %r, i3 addrspace(1)* undef
186 ; GCN-LABEL: @shl_nsw_i3(
187 ; SI: %r = shl nsw i3 %a, %b
188 ; SI-NEXT: store volatile i3 %r
189 ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
190 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
191 ; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]]
192 ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
193 ; VI-NEXT: store volatile i3 %[[R_3]]
194 define amdgpu_kernel void @shl_nsw_i3(i3 %a, i3 %b) {
195 %r = shl nsw i3 %a, %b
196 store volatile i3 %r, i3 addrspace(1)* undef
200 ; GCN-LABEL: @shl_nuw_i3(
201 ; SI: %r = shl nuw i3 %a, %b
202 ; SI-NEXT: store volatile i3 %r
203 ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
204 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
205 ; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]]
206 ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
207 ; VI-NEXT: store volatile i3 %[[R_3]]
208 define amdgpu_kernel void @shl_nuw_i3(i3 %a, i3 %b) {
209 %r = shl nuw i3 %a, %b
210 store volatile i3 %r, i3 addrspace(1)* undef
214 ; GCN-LABEL: @shl_nuw_nsw_i3(
215 ; SI: %r = shl nuw nsw i3 %a, %b
216 ; SI-NEXT: store volatile i3 %r
217 ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
218 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
219 ; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]]
220 ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
221 ; VI-NEXT: store volatile i3 %[[R_3]]
222 define amdgpu_kernel void @shl_nuw_nsw_i3(i3 %a, i3 %b) {
223 %r = shl nuw nsw i3 %a, %b
224 store volatile i3 %r, i3 addrspace(1)* undef
228 ; GCN-LABEL: @lshr_i3(
229 ; SI: %r = lshr i3 %a, %b
230 ; SI-NEXT: store volatile i3 %r
231 ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
232 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
233 ; VI-NEXT: %[[R_32:[0-9]+]] = lshr i32 %[[A_32]], %[[B_32]]
234 ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
235 ; VI-NEXT: store volatile i3 %[[R_3]]
236 define amdgpu_kernel void @lshr_i3(i3 %a, i3 %b) {
238 store volatile i3 %r, i3 addrspace(1)* undef
242 ; GCN-LABEL: @lshr_exact_i3(
243 ; SI: %r = lshr exact i3 %a, %b
244 ; SI-NEXT: store volatile i3 %r
245 ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
246 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
247 ; VI-NEXT: %[[R_32:[0-9]+]] = lshr exact i32 %[[A_32]], %[[B_32]]
248 ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
249 ; VI-NEXT: store volatile i3 %[[R_3]]
250 define amdgpu_kernel void @lshr_exact_i3(i3 %a, i3 %b) {
251 %r = lshr exact i3 %a, %b
252 store volatile i3 %r, i3 addrspace(1)* undef
256 ; GCN-LABEL: @ashr_i3(
257 ; SI: %r = ashr i3 %a, %b
258 ; SI-NEXT: store volatile i3 %r
259 ; VI: %[[A_32:[0-9]+]] = sext i3 %a to i32
260 ; VI-NEXT: %[[B_32:[0-9]+]] = sext i3 %b to i32
261 ; VI-NEXT: %[[R_32:[0-9]+]] = ashr i32 %[[A_32]], %[[B_32]]
262 ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
263 ; VI-NEXT: store volatile i3 %[[R_3]]
264 define amdgpu_kernel void @ashr_i3(i3 %a, i3 %b) {
266 store volatile i3 %r, i3 addrspace(1)* undef
270 ; GCN-LABEL: @ashr_exact_i3(
271 ; SI: %r = ashr exact i3 %a, %b
272 ; SI-NEXT: store volatile i3 %r
273 ; VI: %[[A_32:[0-9]+]] = sext i3 %a to i32
274 ; VI-NEXT: %[[B_32:[0-9]+]] = sext i3 %b to i32
275 ; VI-NEXT: %[[R_32:[0-9]+]] = ashr exact i32 %[[A_32]], %[[B_32]]
276 ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
277 ; VI-NEXT: store volatile i3 %[[R_3]]
278 define amdgpu_kernel void @ashr_exact_i3(i3 %a, i3 %b) {
279 %r = ashr exact i3 %a, %b
280 store volatile i3 %r, i3 addrspace(1)* undef
284 ; GCN-LABEL: @and_i3(
285 ; SI: %r = and i3 %a, %b
286 ; SI-NEXT: store volatile i3 %r
287 ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
288 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
289 ; VI-NEXT: %[[R_32:[0-9]+]] = and i32 %[[A_32]], %[[B_32]]
290 ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
291 ; VI-NEXT: store volatile i3 %[[R_3]]
292 define amdgpu_kernel void @and_i3(i3 %a, i3 %b) {
294 store volatile i3 %r, i3 addrspace(1)* undef
299 ; SI: %r = or i3 %a, %b
300 ; SI-NEXT: store volatile i3 %r
301 ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
302 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
303 ; VI-NEXT: %[[R_32:[0-9]+]] = or i32 %[[A_32]], %[[B_32]]
304 ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
305 ; VI-NEXT: store volatile i3 %[[R_3]]
306 define amdgpu_kernel void @or_i3(i3 %a, i3 %b) {
308 store volatile i3 %r, i3 addrspace(1)* undef
312 ; GCN-LABEL: @xor_i3(
313 ; SI: %r = xor i3 %a, %b
314 ; SI-NEXT: store volatile i3 %r
315 ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
316 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
317 ; VI-NEXT: %[[R_32:[0-9]+]] = xor i32 %[[A_32]], %[[B_32]]
318 ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
319 ; VI-NEXT: store volatile i3 %[[R_3]]
320 define amdgpu_kernel void @xor_i3(i3 %a, i3 %b) {
322 store volatile i3 %r, i3 addrspace(1)* undef
326 ; GCN-LABEL: @select_eq_i3(
327 ; SI: %cmp = icmp eq i3 %a, %b
328 ; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b
329 ; SI-NEXT: store volatile i3 %sel
330 ; VI: %[[A_32_0:[0-9]+]] = zext i3 %a to i32
331 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i3 %b to i32
332 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp eq i32 %[[A_32_0]], %[[B_32_0]]
333 ; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i3 %a to i32
334 ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i3 %b to i32
335 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
336 ; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3
337 ; VI-NEXT: store volatile i3 %[[SEL_3]]
338 define amdgpu_kernel void @select_eq_i3(i3 %a, i3 %b) {
339 %cmp = icmp eq i3 %a, %b
340 %sel = select i1 %cmp, i3 %a, i3 %b
341 store volatile i3 %sel, i3 addrspace(1)* undef
345 ; GCN-LABEL: @select_ne_i3(
346 ; SI: %cmp = icmp ne i3 %a, %b
347 ; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b
348 ; SI-NEXT: store volatile i3 %sel
349 ; VI: %[[A_32_0:[0-9]+]] = zext i3 %a to i32
350 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i3 %b to i32
351 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp ne i32 %[[A_32_0]], %[[B_32_0]]
352 ; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i3 %a to i32
353 ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i3 %b to i32
354 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
355 ; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3
356 ; VI-NEXT: store volatile i3 %[[SEL_3]]
357 define amdgpu_kernel void @select_ne_i3(i3 %a, i3 %b) {
358 %cmp = icmp ne i3 %a, %b
359 %sel = select i1 %cmp, i3 %a, i3 %b
360 store volatile i3 %sel, i3 addrspace(1)* undef
364 ; GCN-LABEL: @select_ugt_i3(
365 ; SI: %cmp = icmp ugt i3 %a, %b
366 ; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b
367 ; SI-NEXT: store volatile i3 %sel
368 ; VI: %[[A_32_0:[0-9]+]] = zext i3 %a to i32
369 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i3 %b to i32
370 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp ugt i32 %[[A_32_0]], %[[B_32_0]]
371 ; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i3 %a to i32
372 ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i3 %b to i32
373 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
374 ; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3
375 ; VI-NEXT: store volatile i3 %[[SEL_3]]
376 define amdgpu_kernel void @select_ugt_i3(i3 %a, i3 %b) {
377 %cmp = icmp ugt i3 %a, %b
378 %sel = select i1 %cmp, i3 %a, i3 %b
379 store volatile i3 %sel, i3 addrspace(1)* undef
383 ; GCN-LABEL: @select_uge_i3(
384 ; SI: %cmp = icmp uge i3 %a, %b
385 ; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b
386 ; SI-NEXT: store volatile i3 %sel
387 ; VI: %[[A_32_0:[0-9]+]] = zext i3 %a to i32
388 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i3 %b to i32
389 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp uge i32 %[[A_32_0]], %[[B_32_0]]
390 ; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i3 %a to i32
391 ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i3 %b to i32
392 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
393 ; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3
394 ; VI-NEXT: store volatile i3 %[[SEL_3]]
395 define amdgpu_kernel void @select_uge_i3(i3 %a, i3 %b) {
396 %cmp = icmp uge i3 %a, %b
397 %sel = select i1 %cmp, i3 %a, i3 %b
398 store volatile i3 %sel, i3 addrspace(1)* undef
402 ; GCN-LABEL: @select_ult_i3(
403 ; SI: %cmp = icmp ult i3 %a, %b
404 ; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b
405 ; SI-NEXT: store volatile i3 %sel
406 ; VI: %[[A_32_0:[0-9]+]] = zext i3 %a to i32
407 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i3 %b to i32
408 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp ult i32 %[[A_32_0]], %[[B_32_0]]
409 ; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i3 %a to i32
410 ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i3 %b to i32
411 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
412 ; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3
413 ; VI-NEXT: store volatile i3 %[[SEL_3]]
414 define amdgpu_kernel void @select_ult_i3(i3 %a, i3 %b) {
415 %cmp = icmp ult i3 %a, %b
416 %sel = select i1 %cmp, i3 %a, i3 %b
417 store volatile i3 %sel, i3 addrspace(1)* undef
421 ; GCN-LABEL: @select_ule_i3(
422 ; SI: %cmp = icmp ule i3 %a, %b
423 ; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b
424 ; SI-NEXT: store volatile i3 %sel
425 ; VI: %[[A_32_0:[0-9]+]] = zext i3 %a to i32
426 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i3 %b to i32
427 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp ule i32 %[[A_32_0]], %[[B_32_0]]
428 ; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i3 %a to i32
429 ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i3 %b to i32
430 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
431 ; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3
432 ; VI-NEXT: store volatile i3 %[[SEL_3]]
433 define amdgpu_kernel void @select_ule_i3(i3 %a, i3 %b) {
434 %cmp = icmp ule i3 %a, %b
435 %sel = select i1 %cmp, i3 %a, i3 %b
436 store volatile i3 %sel, i3 addrspace(1)* undef
440 ; GCN-LABEL: @select_sgt_i3(
441 ; SI: %cmp = icmp sgt i3 %a, %b
442 ; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b
443 ; SI-NEXT: store volatile i3 %sel
444 ; VI: %[[A_32_0:[0-9]+]] = sext i3 %a to i32
445 ; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i3 %b to i32
446 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp sgt i32 %[[A_32_0]], %[[B_32_0]]
447 ; VI-NEXT: %[[A_32_1:[0-9]+]] = sext i3 %a to i32
448 ; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i3 %b to i32
449 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
450 ; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3
451 ; VI-NEXT: store volatile i3 %[[SEL_3]]
452 define amdgpu_kernel void @select_sgt_i3(i3 %a, i3 %b) {
453 %cmp = icmp sgt i3 %a, %b
454 %sel = select i1 %cmp, i3 %a, i3 %b
455 store volatile i3 %sel, i3 addrspace(1)* undef
459 ; GCN-LABEL: @select_sge_i3(
460 ; SI: %cmp = icmp sge i3 %a, %b
461 ; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b
462 ; SI-NEXT: store volatile i3 %sel
463 ; VI: %[[A_32_0:[0-9]+]] = sext i3 %a to i32
464 ; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i3 %b to i32
465 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp sge i32 %[[A_32_0]], %[[B_32_0]]
466 ; VI-NEXT: %[[A_32_1:[0-9]+]] = sext i3 %a to i32
467 ; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i3 %b to i32
468 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
469 ; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3
470 ; VI-NEXT: store volatile i3 %[[SEL_3]]
471 define amdgpu_kernel void @select_sge_i3(i3 %a, i3 %b) {
472 %cmp = icmp sge i3 %a, %b
473 %sel = select i1 %cmp, i3 %a, i3 %b
474 store volatile i3 %sel, i3 addrspace(1)* undef
478 ; GCN-LABEL: @select_slt_i3(
479 ; SI: %cmp = icmp slt i3 %a, %b
480 ; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b
481 ; SI-NEXT: store volatile i3 %sel
482 ; VI: %[[A_32_0:[0-9]+]] = sext i3 %a to i32
483 ; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i3 %b to i32
484 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp slt i32 %[[A_32_0]], %[[B_32_0]]
485 ; VI-NEXT: %[[A_32_1:[0-9]+]] = sext i3 %a to i32
486 ; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i3 %b to i32
487 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
488 ; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3
489 ; VI-NEXT: store volatile i3 %[[SEL_3]]
490 define amdgpu_kernel void @select_slt_i3(i3 %a, i3 %b) {
491 %cmp = icmp slt i3 %a, %b
492 %sel = select i1 %cmp, i3 %a, i3 %b
493 store volatile i3 %sel, i3 addrspace(1)* undef
497 ; GCN-LABEL: @select_sle_i3(
498 ; SI: %cmp = icmp sle i3 %a, %b
499 ; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b
500 ; SI-NEXT: store volatile i3 %sel
501 ; VI: %[[A_32_0:[0-9]+]] = sext i3 %a to i32
502 ; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i3 %b to i32
503 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp sle i32 %[[A_32_0]], %[[B_32_0]]
504 ; VI-NEXT: %[[A_32_1:[0-9]+]] = sext i3 %a to i32
505 ; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i3 %b to i32
506 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
507 ; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3
508 ; VI-NEXT: store volatile i3 %[[SEL_3]]
509 define amdgpu_kernel void @select_sle_i3(i3 %a, i3 %b) {
510 %cmp = icmp sle i3 %a, %b
511 %sel = select i1 %cmp, i3 %a, i3 %b
512 store volatile i3 %sel, i3 addrspace(1)* undef
516 declare i3 @llvm.bitreverse.i3(i3)
517 ; GCN-LABEL: @bitreverse_i3(
518 ; SI: %brev = call i3 @llvm.bitreverse.i3(i3 %a)
519 ; SI-NEXT: store volatile i3 %brev
520 ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
521 ; VI-NEXT: %[[R_32:[0-9]+]] = call i32 @llvm.bitreverse.i32(i32 %[[A_32]])
522 ; VI-NEXT: %[[S_32:[0-9]+]] = lshr i32 %[[R_32]], 29
523 ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[S_32]] to i3
524 ; VI-NEXT: store volatile i3 %[[R_3]]
525 define amdgpu_kernel void @bitreverse_i3(i3 %a) {
526 %brev = call i3 @llvm.bitreverse.i3(i3 %a)
527 store volatile i3 %brev, i3 addrspace(1)* undef
531 ; GCN-LABEL: @add_i16(
532 ; SI: %r = add i16 %a, %b
533 ; SI-NEXT: store volatile i16 %r
534 ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
535 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
536 ; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]]
537 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
538 ; VI-NEXT: store volatile i16 %[[R_16]]
539 define amdgpu_kernel void @add_i16(i16 %a, i16 %b) {
541 store volatile i16 %r, i16 addrspace(1)* undef
545 ; GCN-LABEL: @constant_add_i16(
546 ; VI: store volatile i16 3
547 define amdgpu_kernel void @constant_add_i16() {
549 store volatile i16 %r, i16 addrspace(1)* undef
553 ; GCN-LABEL: @constant_add_nsw_i16(
554 ; VI: store volatile i16 3
555 define amdgpu_kernel void @constant_add_nsw_i16() {
556 %r = add nsw i16 1, 2
557 store volatile i16 %r, i16 addrspace(1)* undef
561 ; GCN-LABEL: @constant_add_nuw_i16(
562 ; VI: store volatile i16 3
563 define amdgpu_kernel void @constant_add_nuw_i16() {
564 %r = add nsw i16 1, 2
565 store volatile i16 %r, i16 addrspace(1)* undef
569 ; GCN-LABEL: @add_nsw_i16(
570 ; SI: %r = add nsw i16 %a, %b
571 ; SI-NEXT: store volatile i16 %r
572 ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
573 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
574 ; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]]
575 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
576 ; VI-NEXT: store volatile i16 %[[R_16]]
577 define amdgpu_kernel void @add_nsw_i16(i16 %a, i16 %b) {
578 %r = add nsw i16 %a, %b
579 store volatile i16 %r, i16 addrspace(1)* undef
583 ; GCN-LABEL: @add_nuw_i16(
584 ; SI: %r = add nuw i16 %a, %b
585 ; SI-NEXT: store volatile i16 %r
586 ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
587 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
588 ; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]]
589 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
590 ; VI-NEXT: store volatile i16 %[[R_16]]
591 define amdgpu_kernel void @add_nuw_i16(i16 %a, i16 %b) {
592 %r = add nuw i16 %a, %b
593 store volatile i16 %r, i16 addrspace(1)* undef
597 ; GCN-LABEL: @add_nuw_nsw_i16(
598 ; SI: %r = add nuw nsw i16 %a, %b
599 ; SI-NEXT: store volatile i16 %r
600 ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
601 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
602 ; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]]
603 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
604 ; VI-NEXT: store volatile i16 %[[R_16]]
605 define amdgpu_kernel void @add_nuw_nsw_i16(i16 %a, i16 %b) {
606 %r = add nuw nsw i16 %a, %b
607 store volatile i16 %r, i16 addrspace(1)* undef
611 ; GCN-LABEL: @sub_i16(
612 ; SI: %r = sub i16 %a, %b
613 ; SI-NEXT: store volatile i16 %r
614 ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
615 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
616 ; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw i32 %[[A_32]], %[[B_32]]
617 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
618 ; VI-NEXT: store volatile i16 %[[R_16]]
619 define amdgpu_kernel void @sub_i16(i16 %a, i16 %b) {
621 store volatile i16 %r, i16 addrspace(1)* undef
625 ; GCN-LABEL: @sub_nsw_i16(
626 ; SI: %r = sub nsw i16 %a, %b
627 ; SI-NEXT: store volatile i16 %r
628 ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
629 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
630 ; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw i32 %[[A_32]], %[[B_32]]
631 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
632 ; VI-NEXT: store volatile i16 %[[R_16]]
633 define amdgpu_kernel void @sub_nsw_i16(i16 %a, i16 %b) {
634 %r = sub nsw i16 %a, %b
635 store volatile i16 %r, i16 addrspace(1)* undef
639 ; GCN-LABEL: @sub_nuw_i16(
640 ; SI: %r = sub nuw i16 %a, %b
641 ; SI-NEXT: store volatile i16 %r
642 ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
643 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
644 ; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw i32 %[[A_32]], %[[B_32]]
645 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
646 ; VI-NEXT: store volatile i16 %[[R_16]]
647 define amdgpu_kernel void @sub_nuw_i16(i16 %a, i16 %b) {
648 %r = sub nuw i16 %a, %b
649 store volatile i16 %r, i16 addrspace(1)* undef
653 ; GCN-LABEL: @sub_nuw_nsw_i16(
654 ; SI: %r = sub nuw nsw i16 %a, %b
655 ; SI-NEXT: store volatile i16 %r
656 ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
657 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
658 ; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw i32 %[[A_32]], %[[B_32]]
659 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
660 ; VI-NEXT: store volatile i16 %[[R_16]]
661 define amdgpu_kernel void @sub_nuw_nsw_i16(i16 %a, i16 %b) {
662 %r = sub nuw nsw i16 %a, %b
663 store volatile i16 %r, i16 addrspace(1)* undef
667 ; GCN-LABEL: @mul_i16(
668 ; SI: %r = mul i16 %a, %b
669 ; SI-NEXT: store volatile i16 %r
670 ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
671 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
672 ; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw i32 %[[A_32]], %[[B_32]]
673 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
674 ; VI-NEXT: store volatile i16 %[[R_16]]
675 define amdgpu_kernel void @mul_i16(i16 %a, i16 %b) {
677 store volatile i16 %r, i16 addrspace(1)* undef
681 ; GCN-LABEL: @mul_nsw_i16(
682 ; SI: %r = mul nsw i16 %a, %b
683 ; SI-NEXT: store volatile i16 %r
684 ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
685 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
686 ; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw i32 %[[A_32]], %[[B_32]]
687 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
688 ; VI-NEXT: store volatile i16 %[[R_16]]
689 define amdgpu_kernel void @mul_nsw_i16(i16 %a, i16 %b) {
690 %r = mul nsw i16 %a, %b
691 store volatile i16 %r, i16 addrspace(1)* undef
695 ; GCN-LABEL: @mul_nuw_i16(
696 ; SI: %r = mul nuw i16 %a, %b
697 ; SI-NEXT: store volatile i16 %r
698 ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
699 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
700 ; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw i32 %[[A_32]], %[[B_32]]
701 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
702 ; VI-NEXT: store volatile i16 %[[R_16]]
703 define amdgpu_kernel void @mul_nuw_i16(i16 %a, i16 %b) {
704 %r = mul nuw i16 %a, %b
705 store volatile i16 %r, i16 addrspace(1)* undef
709 ; GCN-LABEL: @mul_nuw_nsw_i16(
710 ; SI: %r = mul nuw nsw i16 %a, %b
711 ; SI-NEXT: store volatile i16 %r
712 ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
713 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
714 ; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw i32 %[[A_32]], %[[B_32]]
715 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
716 ; VI-NEXT: store volatile i16 %[[R_16]]
717 define amdgpu_kernel void @mul_nuw_nsw_i16(i16 %a, i16 %b) {
718 %r = mul nuw nsw i16 %a, %b
719 store volatile i16 %r, i16 addrspace(1)* undef
723 ; GCN-LABEL: @shl_i16(
724 ; SI: %r = shl i16 %a, %b
725 ; SI-NEXT: store volatile i16 %r
726 ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
727 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
728 ; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]]
729 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
730 ; VI-NEXT: store volatile i16 %[[R_16]]
731 define amdgpu_kernel void @shl_i16(i16 %a, i16 %b) {
733 store volatile i16 %r, i16 addrspace(1)* undef
737 ; GCN-LABEL: @shl_nsw_i16(
738 ; SI: %r = shl nsw i16 %a, %b
739 ; SI-NEXT: store volatile i16 %r
740 ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
741 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
742 ; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]]
743 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
744 ; VI-NEXT: store volatile i16 %[[R_16]]
745 define amdgpu_kernel void @shl_nsw_i16(i16 %a, i16 %b) {
746 %r = shl nsw i16 %a, %b
747 store volatile i16 %r, i16 addrspace(1)* undef
751 ; GCN-LABEL: @shl_nuw_i16(
752 ; SI: %r = shl nuw i16 %a, %b
753 ; SI-NEXT: store volatile i16 %r
754 ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
755 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
756 ; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]]
757 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
758 ; VI-NEXT: store volatile i16 %[[R_16]]
759 define amdgpu_kernel void @shl_nuw_i16(i16 %a, i16 %b) {
760 %r = shl nuw i16 %a, %b
761 store volatile i16 %r, i16 addrspace(1)* undef
765 ; GCN-LABEL: @shl_nuw_nsw_i16(
766 ; SI: %r = shl nuw nsw i16 %a, %b
767 ; SI-NEXT: store volatile i16 %r
768 ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
769 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
770 ; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]]
771 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
772 ; VI-NEXT: store volatile i16 %[[R_16]]
773 define amdgpu_kernel void @shl_nuw_nsw_i16(i16 %a, i16 %b) {
774 %r = shl nuw nsw i16 %a, %b
775 store volatile i16 %r, i16 addrspace(1)* undef
779 ; GCN-LABEL: @lshr_i16(
780 ; SI: %r = lshr i16 %a, %b
781 ; SI-NEXT: store volatile i16 %r
782 ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
783 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
784 ; VI-NEXT: %[[R_32:[0-9]+]] = lshr i32 %[[A_32]], %[[B_32]]
785 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
786 ; VI-NEXT: store volatile i16 %[[R_16]]
787 define amdgpu_kernel void @lshr_i16(i16 %a, i16 %b) {
789 store volatile i16 %r, i16 addrspace(1)* undef
793 ; GCN-LABEL: @lshr_exact_i16(
794 ; SI: %r = lshr exact i16 %a, %b
795 ; SI-NEXT: store volatile i16 %r
796 ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
797 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
798 ; VI-NEXT: %[[R_32:[0-9]+]] = lshr exact i32 %[[A_32]], %[[B_32]]
799 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
800 ; VI-NEXT: store volatile i16 %[[R_16]]
801 define amdgpu_kernel void @lshr_exact_i16(i16 %a, i16 %b) {
802 %r = lshr exact i16 %a, %b
803 store volatile i16 %r, i16 addrspace(1)* undef
807 ; GCN-LABEL: @ashr_i16(
808 ; SI: %r = ashr i16 %a, %b
809 ; SI-NEXT: store volatile i16 %r
810 ; VI: %[[A_32:[0-9]+]] = sext i16 %a to i32
811 ; VI-NEXT: %[[B_32:[0-9]+]] = sext i16 %b to i32
812 ; VI-NEXT: %[[R_32:[0-9]+]] = ashr i32 %[[A_32]], %[[B_32]]
813 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
814 ; VI-NEXT: store volatile i16 %[[R_16]]
815 define amdgpu_kernel void @ashr_i16(i16 %a, i16 %b) {
817 store volatile i16 %r, i16 addrspace(1)* undef
821 ; GCN-LABEL: @ashr_exact_i16(
822 ; SI: %r = ashr exact i16 %a, %b
823 ; SI-NEXT: store volatile i16 %r
824 ; VI: %[[A_32:[0-9]+]] = sext i16 %a to i32
825 ; VI-NEXT: %[[B_32:[0-9]+]] = sext i16 %b to i32
826 ; VI-NEXT: %[[R_32:[0-9]+]] = ashr exact i32 %[[A_32]], %[[B_32]]
827 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
828 ; VI-NEXT: store volatile i16 %[[R_16]]
829 define amdgpu_kernel void @ashr_exact_i16(i16 %a, i16 %b) {
830 %r = ashr exact i16 %a, %b
831 store volatile i16 %r, i16 addrspace(1)* undef
835 ; GCN-LABEL: @constant_lshr_exact_i16(
836 ; VI: store volatile i16 2
837 define amdgpu_kernel void @constant_lshr_exact_i16(i16 %a, i16 %b) {
838 %r = lshr exact i16 4, 1
839 store volatile i16 %r, i16 addrspace(1)* undef
843 ; GCN-LABEL: @and_i16(
844 ; SI: %r = and i16 %a, %b
845 ; SI-NEXT: store volatile i16 %r
846 ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
847 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
848 ; VI-NEXT: %[[R_32:[0-9]+]] = and i32 %[[A_32]], %[[B_32]]
849 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
850 ; VI-NEXT: store volatile i16 %[[R_16]]
851 define amdgpu_kernel void @and_i16(i16 %a, i16 %b) {
853 store volatile i16 %r, i16 addrspace(1)* undef
857 ; GCN-LABEL: @or_i16(
858 ; SI: %r = or i16 %a, %b
859 ; SI-NEXT: store volatile i16 %r
860 ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
861 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
862 ; VI-NEXT: %[[R_32:[0-9]+]] = or i32 %[[A_32]], %[[B_32]]
863 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
864 ; VI-NEXT: store volatile i16 %[[R_16]]
865 define amdgpu_kernel void @or_i16(i16 %a, i16 %b) {
867 store volatile i16 %r, i16 addrspace(1)* undef
871 ; GCN-LABEL: @xor_i16(
872 ; SI: %r = xor i16 %a, %b
873 ; SI-NEXT: store volatile i16 %r
874 ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
875 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
876 ; VI-NEXT: %[[R_32:[0-9]+]] = xor i32 %[[A_32]], %[[B_32]]
877 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
878 ; VI-NEXT: store volatile i16 %[[R_16]]
879 define amdgpu_kernel void @xor_i16(i16 %a, i16 %b) {
881 store volatile i16 %r, i16 addrspace(1)* undef
885 ; GCN-LABEL: @select_eq_i16(
886 ; SI: %cmp = icmp eq i16 %a, %b
887 ; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
888 ; SI-NEXT: store volatile i16 %sel
889 ; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
890 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
891 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp eq i32 %[[A_32_0]], %[[B_32_0]]
892 ; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
893 ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
894 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
895 ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
896 ; VI-NEXT: store volatile i16 %[[SEL_16]]
897 define amdgpu_kernel void @select_eq_i16(i16 %a, i16 %b) {
898 %cmp = icmp eq i16 %a, %b
899 %sel = select i1 %cmp, i16 %a, i16 %b
900 store volatile i16 %sel, i16 addrspace(1)* undef
904 ; GCN-LABEL: @select_ne_i16(
905 ; SI: %cmp = icmp ne i16 %a, %b
906 ; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
907 ; SI-NEXT: store volatile i16 %sel
908 ; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
909 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
910 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp ne i32 %[[A_32_0]], %[[B_32_0]]
911 ; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
912 ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
913 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
914 ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
915 ; VI-NEXT: store volatile i16 %[[SEL_16]]
916 define amdgpu_kernel void @select_ne_i16(i16 %a, i16 %b) {
917 %cmp = icmp ne i16 %a, %b
918 %sel = select i1 %cmp, i16 %a, i16 %b
919 store volatile i16 %sel, i16 addrspace(1)* undef
923 ; GCN-LABEL: @select_ugt_i16(
924 ; SI: %cmp = icmp ugt i16 %a, %b
925 ; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
926 ; SI-NEXT: store volatile i16 %sel
927 ; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
928 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
929 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp ugt i32 %[[A_32_0]], %[[B_32_0]]
930 ; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
931 ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
932 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
933 ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
934 ; VI-NEXT: store volatile i16 %[[SEL_16]]
935 define amdgpu_kernel void @select_ugt_i16(i16 %a, i16 %b) {
936 %cmp = icmp ugt i16 %a, %b
937 %sel = select i1 %cmp, i16 %a, i16 %b
938 store volatile i16 %sel, i16 addrspace(1)* undef
942 ; GCN-LABEL: @select_uge_i16(
943 ; SI: %cmp = icmp uge i16 %a, %b
944 ; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
945 ; SI-NEXT: store volatile i16 %sel
946 ; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
947 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
948 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp uge i32 %[[A_32_0]], %[[B_32_0]]
949 ; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
950 ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
951 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
952 ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
953 ; VI-NEXT: store volatile i16 %[[SEL_16]]
954 define amdgpu_kernel void @select_uge_i16(i16 %a, i16 %b) {
955 %cmp = icmp uge i16 %a, %b
956 %sel = select i1 %cmp, i16 %a, i16 %b
957 store volatile i16 %sel, i16 addrspace(1)* undef
961 ; GCN-LABEL: @select_ult_i16(
962 ; SI: %cmp = icmp ult i16 %a, %b
963 ; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
964 ; SI-NEXT: store volatile i16 %sel
965 ; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
966 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
967 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp ult i32 %[[A_32_0]], %[[B_32_0]]
968 ; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
969 ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
970 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
971 ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
972 ; VI-NEXT: store volatile i16 %[[SEL_16]]
973 define amdgpu_kernel void @select_ult_i16(i16 %a, i16 %b) {
974 %cmp = icmp ult i16 %a, %b
975 %sel = select i1 %cmp, i16 %a, i16 %b
976 store volatile i16 %sel, i16 addrspace(1)* undef
980 ; GCN-LABEL: @select_ule_i16(
981 ; SI: %cmp = icmp ule i16 %a, %b
982 ; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
983 ; SI-NEXT: store volatile i16 %sel
984 ; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
985 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
986 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp ule i32 %[[A_32_0]], %[[B_32_0]]
987 ; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
988 ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
989 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
990 ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
991 ; VI-NEXT: store volatile i16 %[[SEL_16]]
992 define amdgpu_kernel void @select_ule_i16(i16 %a, i16 %b) {
993 %cmp = icmp ule i16 %a, %b
994 %sel = select i1 %cmp, i16 %a, i16 %b
995 store volatile i16 %sel, i16 addrspace(1)* undef
999 ; GCN-LABEL: @select_sgt_i16(
1000 ; SI: %cmp = icmp sgt i16 %a, %b
1001 ; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
1002 ; SI-NEXT: store volatile i16 %sel
1003 ; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32
1004 ; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i16 %b to i32
1005 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp sgt i32 %[[A_32_0]], %[[B_32_0]]
1006 ; VI-NEXT: %[[A_32_1:[0-9]+]] = sext i16 %a to i32
1007 ; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i16 %b to i32
1008 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
1009 ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
1010 ; VI-NEXT: store volatile i16 %[[SEL_16]]
1011 define amdgpu_kernel void @select_sgt_i16(i16 %a, i16 %b) {
1012 %cmp = icmp sgt i16 %a, %b
1013 %sel = select i1 %cmp, i16 %a, i16 %b
1014 store volatile i16 %sel, i16 addrspace(1)* undef
1018 ; GCN-LABEL: @select_sge_i16(
1019 ; SI: %cmp = icmp sge i16 %a, %b
1020 ; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
1021 ; SI-NEXT: store volatile i16 %sel
1022 ; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32
1023 ; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i16 %b to i32
1024 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp sge i32 %[[A_32_0]], %[[B_32_0]]
1025 ; VI-NEXT: %[[A_32_1:[0-9]+]] = sext i16 %a to i32
1026 ; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i16 %b to i32
1027 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
1028 ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
1029 ; VI-NEXT: store volatile i16 %[[SEL_16]]
1030 define amdgpu_kernel void @select_sge_i16(i16 %a, i16 %b) {
1031 %cmp = icmp sge i16 %a, %b
1032 %sel = select i1 %cmp, i16 %a, i16 %b
1033 store volatile i16 %sel, i16 addrspace(1)* undef
1037 ; GCN-LABEL: @select_slt_i16(
1038 ; SI: %cmp = icmp slt i16 %a, %b
1039 ; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
1040 ; SI-NEXT: store volatile i16 %sel
1041 ; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32
1042 ; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i16 %b to i32
1043 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp slt i32 %[[A_32_0]], %[[B_32_0]]
1044 ; VI-NEXT: %[[A_32_1:[0-9]+]] = sext i16 %a to i32
1045 ; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i16 %b to i32
1046 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
1047 ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
1048 ; VI-NEXT: store volatile i16 %[[SEL_16]]
1049 define amdgpu_kernel void @select_slt_i16(i16 %a, i16 %b) {
1050 %cmp = icmp slt i16 %a, %b
1051 %sel = select i1 %cmp, i16 %a, i16 %b
1052 store volatile i16 %sel, i16 addrspace(1)* undef
1056 ; GCN-LABEL: @select_sle_i16(
1057 ; SI: %cmp = icmp sle i16 %a, %b
1058 ; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
1059 ; SI-NEXT: store volatile i16 %sel
1060 ; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32
1061 ; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i16 %b to i32
1062 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp sle i32 %[[A_32_0]], %[[B_32_0]]
1063 ; VI-NEXT: %[[A_32_1:[0-9]+]] = sext i16 %a to i32
1064 ; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i16 %b to i32
1065 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
1066 ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
1067 ; VI-NEXT: store volatile i16 %[[SEL_16]]
1068 define amdgpu_kernel void @select_sle_i16(i16 %a, i16 %b) {
1069 %cmp = icmp sle i16 %a, %b
1070 %sel = select i1 %cmp, i16 %a, i16 %b
1071 store volatile i16 %sel, i16 addrspace(1)* undef
1075 declare i16 @llvm.bitreverse.i16(i16)
1077 ; GCN-LABEL: @bitreverse_i16(
1078 ; SI: %brev = call i16 @llvm.bitreverse.i16(i16 %a)
1079 ; SI-NEXT: store volatile i16 %brev
1080 ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
1081 ; VI-NEXT: %[[R_32:[0-9]+]] = call i32 @llvm.bitreverse.i32(i32 %[[A_32]])
1082 ; VI-NEXT: %[[S_32:[0-9]+]] = lshr i32 %[[R_32]], 16
1083 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[S_32]] to i16
1084 ; VI-NEXT: store volatile i16 %[[R_16]]
1085 define amdgpu_kernel void @bitreverse_i16(i16 %a) {
1086 %brev = call i16 @llvm.bitreverse.i16(i16 %a)
1087 store volatile i16 %brev, i16 addrspace(1)* undef
1091 ; GCN-LABEL: @add_3xi15(
1092 ; SI: %r = add <3 x i15> %a, %b
1093 ; SI-NEXT: store volatile <3 x i15> %r
1094 ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1095 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1096 ; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1097 ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1098 ; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1099 define amdgpu_kernel void @add_3xi15(<3 x i15> %a, <3 x i15> %b) {
1100 %r = add <3 x i15> %a, %b
1101 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1105 ; GCN-LABEL: @add_nsw_3xi15(
1106 ; SI: %r = add nsw <3 x i15> %a, %b
1107 ; SI-NEXT: store volatile <3 x i15> %r
1108 ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1109 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1110 ; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1111 ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1112 ; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1113 define amdgpu_kernel void @add_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1114 %r = add nsw <3 x i15> %a, %b
1115 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1119 ; GCN-LABEL: @add_nuw_3xi15(
1120 ; SI: %r = add nuw <3 x i15> %a, %b
1121 ; SI-NEXT: store volatile <3 x i15> %r
1122 ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1123 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1124 ; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1125 ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1126 ; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1127 define amdgpu_kernel void @add_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1128 %r = add nuw <3 x i15> %a, %b
1129 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1133 ; GCN-LABEL: @add_nuw_nsw_3xi15(
1134 ; SI: %r = add nuw nsw <3 x i15> %a, %b
1135 ; SI-NEXT: store volatile <3 x i15> %r
1136 ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1137 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1138 ; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1139 ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1140 ; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1141 define amdgpu_kernel void @add_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1142 %r = add nuw nsw <3 x i15> %a, %b
1143 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1147 ; GCN-LABEL: @sub_3xi15(
1148 ; SI: %r = sub <3 x i15> %a, %b
1149 ; SI-NEXT: store volatile <3 x i15> %r
1150 ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1151 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1152 ; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw <3 x i32> %[[A_32]], %[[B_32]]
1153 ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1154 ; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1155 define amdgpu_kernel void @sub_3xi15(<3 x i15> %a, <3 x i15> %b) {
1156 %r = sub <3 x i15> %a, %b
1157 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1161 ; GCN-LABEL: @sub_nsw_3xi15(
1162 ; SI: %r = sub nsw <3 x i15> %a, %b
1163 ; SI-NEXT: store volatile <3 x i15> %r
1164 ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1165 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1166 ; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw <3 x i32> %[[A_32]], %[[B_32]]
1167 ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1168 ; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1169 define amdgpu_kernel void @sub_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1170 %r = sub nsw <3 x i15> %a, %b
1171 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1175 ; GCN-LABEL: @sub_nuw_3xi15(
1176 ; SI: %r = sub nuw <3 x i15> %a, %b
1177 ; SI-NEXT: store volatile <3 x i15> %r
1178 ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1179 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1180 ; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1181 ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1182 ; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1183 define amdgpu_kernel void @sub_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1184 %r = sub nuw <3 x i15> %a, %b
1185 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1189 ; GCN-LABEL: @sub_nuw_nsw_3xi15(
1190 ; SI: %r = sub nuw nsw <3 x i15> %a, %b
1191 ; SI-NEXT: store volatile <3 x i15> %r
1192 ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1193 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1194 ; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1195 ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1196 ; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1197 define amdgpu_kernel void @sub_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1198 %r = sub nuw nsw <3 x i15> %a, %b
1199 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1203 ; GCN-LABEL: @mul_3xi15(
1204 ; SI: %r = mul <3 x i15> %a, %b
1205 ; SI-NEXT: store volatile <3 x i15> %r
1206 ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1207 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1208 ; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw <3 x i32> %[[A_32]], %[[B_32]]
1209 ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1210 ; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1211 define amdgpu_kernel void @mul_3xi15(<3 x i15> %a, <3 x i15> %b) {
1212 %r = mul <3 x i15> %a, %b
1213 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1217 ; GCN-LABEL: @mul_nsw_3xi15(
1218 ; SI: %r = mul nsw <3 x i15> %a, %b
1219 ; SI-NEXT: store volatile <3 x i15> %r
1220 ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1221 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1222 ; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw <3 x i32> %[[A_32]], %[[B_32]]
1223 ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1224 ; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1225 define amdgpu_kernel void @mul_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1226 %r = mul nsw <3 x i15> %a, %b
1227 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1231 ; GCN-LABEL: @mul_nuw_3xi15(
1232 ; SI: %r = mul nuw <3 x i15> %a, %b
1233 ; SI-NEXT: store volatile <3 x i15> %r
1234 ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1235 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1236 ; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1237 ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1238 ; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1239 define amdgpu_kernel void @mul_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1240 %r = mul nuw <3 x i15> %a, %b
1241 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1245 ; GCN-LABEL: @mul_nuw_nsw_3xi15(
1246 ; SI: %r = mul nuw nsw <3 x i15> %a, %b
1247 ; SI-NEXT: store volatile <3 x i15> %r
1248 ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1249 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1250 ; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1251 ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1252 ; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1253 define amdgpu_kernel void @mul_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1254 %r = mul nuw nsw <3 x i15> %a, %b
1255 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1259 ; GCN-LABEL: @shl_3xi15(
1260 ; SI: %r = shl <3 x i15> %a, %b
1261 ; SI-NEXT: store volatile <3 x i15> %r
1262 ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1263 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1264 ; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1265 ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1266 ; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1267 define amdgpu_kernel void @shl_3xi15(<3 x i15> %a, <3 x i15> %b) {
1268 %r = shl <3 x i15> %a, %b
1269 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1273 ; GCN-LABEL: @shl_nsw_3xi15(
1274 ; SI: %r = shl nsw <3 x i15> %a, %b
1275 ; SI-NEXT: store volatile <3 x i15> %r
1276 ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1277 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1278 ; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1279 ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1280 ; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1281 define amdgpu_kernel void @shl_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1282 %r = shl nsw <3 x i15> %a, %b
1283 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1287 ; GCN-LABEL: @shl_nuw_3xi15(
1288 ; SI: %r = shl nuw <3 x i15> %a, %b
1289 ; SI-NEXT: store volatile <3 x i15> %r
1290 ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1291 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1292 ; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1293 ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1294 ; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1295 define amdgpu_kernel void @shl_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1296 %r = shl nuw <3 x i15> %a, %b
1297 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1301 ; GCN-LABEL: @shl_nuw_nsw_3xi15(
1302 ; SI: %r = shl nuw nsw <3 x i15> %a, %b
1303 ; SI-NEXT: store volatile <3 x i15> %r
1304 ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1305 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1306 ; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1307 ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1308 ; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1309 define amdgpu_kernel void @shl_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1310 %r = shl nuw nsw <3 x i15> %a, %b
1311 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1315 ; GCN-LABEL: @lshr_3xi15(
1316 ; SI: %r = lshr <3 x i15> %a, %b
1317 ; SI-NEXT: store volatile <3 x i15> %r
1318 ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1319 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1320 ; VI-NEXT: %[[R_32:[0-9]+]] = lshr <3 x i32> %[[A_32]], %[[B_32]]
1321 ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1322 ; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1323 define amdgpu_kernel void @lshr_3xi15(<3 x i15> %a, <3 x i15> %b) {
1324 %r = lshr <3 x i15> %a, %b
1325 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1329 ; GCN-LABEL: @lshr_exact_3xi15(
1330 ; SI: %r = lshr exact <3 x i15> %a, %b
1331 ; SI-NEXT: store volatile <3 x i15> %r
1332 ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1333 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1334 ; VI-NEXT: %[[R_32:[0-9]+]] = lshr exact <3 x i32> %[[A_32]], %[[B_32]]
1335 ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1336 ; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1337 define amdgpu_kernel void @lshr_exact_3xi15(<3 x i15> %a, <3 x i15> %b) {
1338 %r = lshr exact <3 x i15> %a, %b
1339 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1343 ; GCN-LABEL: @ashr_3xi15(
1344 ; SI: %r = ashr <3 x i15> %a, %b
1345 ; SI-NEXT: store volatile <3 x i15> %r
1346 ; VI: %[[A_32:[0-9]+]] = sext <3 x i15> %a to <3 x i32>
1347 ; VI-NEXT: %[[B_32:[0-9]+]] = sext <3 x i15> %b to <3 x i32>
1348 ; VI-NEXT: %[[R_32:[0-9]+]] = ashr <3 x i32> %[[A_32]], %[[B_32]]
1349 ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1350 ; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1351 define amdgpu_kernel void @ashr_3xi15(<3 x i15> %a, <3 x i15> %b) {
1352 %r = ashr <3 x i15> %a, %b
1353 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1357 ; GCN-LABEL: @ashr_exact_3xi15(
1358 ; SI: %r = ashr exact <3 x i15> %a, %b
1359 ; SI-NEXT: store volatile <3 x i15> %r
1360 ; VI: %[[A_32:[0-9]+]] = sext <3 x i15> %a to <3 x i32>
1361 ; VI-NEXT: %[[B_32:[0-9]+]] = sext <3 x i15> %b to <3 x i32>
1362 ; VI-NEXT: %[[R_32:[0-9]+]] = ashr exact <3 x i32> %[[A_32]], %[[B_32]]
1363 ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1364 ; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1365 define amdgpu_kernel void @ashr_exact_3xi15(<3 x i15> %a, <3 x i15> %b) {
1366 %r = ashr exact <3 x i15> %a, %b
1367 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1371 ; GCN-LABEL: @and_3xi15(
1372 ; SI: %r = and <3 x i15> %a, %b
1373 ; SI-NEXT: store volatile <3 x i15> %r
1374 ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1375 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1376 ; VI-NEXT: %[[R_32:[0-9]+]] = and <3 x i32> %[[A_32]], %[[B_32]]
1377 ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1378 ; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1379 define amdgpu_kernel void @and_3xi15(<3 x i15> %a, <3 x i15> %b) {
1380 %r = and <3 x i15> %a, %b
1381 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1385 ; GCN-LABEL: @or_3xi15(
1386 ; SI: %r = or <3 x i15> %a, %b
1387 ; SI-NEXT: store volatile <3 x i15> %r
1388 ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1389 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1390 ; VI-NEXT: %[[R_32:[0-9]+]] = or <3 x i32> %[[A_32]], %[[B_32]]
1391 ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1392 ; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1393 define amdgpu_kernel void @or_3xi15(<3 x i15> %a, <3 x i15> %b) {
1394 %r = or <3 x i15> %a, %b
1395 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1399 ; GCN-LABEL: @xor_3xi15(
1400 ; SI: %r = xor <3 x i15> %a, %b
1401 ; SI-NEXT: store volatile <3 x i15> %r
1402 ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1403 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1404 ; VI-NEXT: %[[R_32:[0-9]+]] = xor <3 x i32> %[[A_32]], %[[B_32]]
1405 ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1406 ; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1407 define amdgpu_kernel void @xor_3xi15(<3 x i15> %a, <3 x i15> %b) {
1408 %r = xor <3 x i15> %a, %b
1409 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1413 ; GCN-LABEL: @select_eq_3xi15(
1414 ; SI: %cmp = icmp eq <3 x i15> %a, %b
1415 ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1416 ; SI-NEXT: store volatile <3 x i15> %sel
1417 ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1418 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1419 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp eq <3 x i32> %[[A_32_0]], %[[B_32_0]]
1420 ; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1421 ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1422 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
1423 ; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15>
1424 ; VI-NEXT: store volatile <3 x i15> %[[SEL_15]]
1425 define amdgpu_kernel void @select_eq_3xi15(<3 x i15> %a, <3 x i15> %b) {
1426 %cmp = icmp eq <3 x i15> %a, %b
1427 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1428 store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
1432 ; GCN-LABEL: @select_ne_3xi15(
1433 ; SI: %cmp = icmp ne <3 x i15> %a, %b
1434 ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1435 ; SI-NEXT: store volatile <3 x i15> %sel
1436 ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1437 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1438 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp ne <3 x i32> %[[A_32_0]], %[[B_32_0]]
1439 ; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1440 ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1441 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
1442 ; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15>
1443 ; VI-NEXT: store volatile <3 x i15> %[[SEL_15]]
1444 define amdgpu_kernel void @select_ne_3xi15(<3 x i15> %a, <3 x i15> %b) {
1445 %cmp = icmp ne <3 x i15> %a, %b
1446 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1447 store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
1451 ; GCN-LABEL: @select_ugt_3xi15(
1452 ; SI: %cmp = icmp ugt <3 x i15> %a, %b
1453 ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1454 ; SI-NEXT: store volatile <3 x i15> %sel
1455 ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1456 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1457 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp ugt <3 x i32> %[[A_32_0]], %[[B_32_0]]
1458 ; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1459 ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1460 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
1461 ; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15>
1462 ; VI-NEXT: store volatile <3 x i15> %[[SEL_15]]
1463 define amdgpu_kernel void @select_ugt_3xi15(<3 x i15> %a, <3 x i15> %b) {
1464 %cmp = icmp ugt <3 x i15> %a, %b
1465 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1466 store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
1470 ; GCN-LABEL: @select_uge_3xi15(
1471 ; SI: %cmp = icmp uge <3 x i15> %a, %b
1472 ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1473 ; SI-NEXT: store volatile <3 x i15> %sel
1474 ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1475 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1476 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp uge <3 x i32> %[[A_32_0]], %[[B_32_0]]
1477 ; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1478 ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1479 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
1480 ; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15>
1481 ; VI-NEXT: store volatile <3 x i15> %[[SEL_15]]
1482 define amdgpu_kernel void @select_uge_3xi15(<3 x i15> %a, <3 x i15> %b) {
1483 %cmp = icmp uge <3 x i15> %a, %b
1484 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1485 store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
1489 ; GCN-LABEL: @select_ult_3xi15(
1490 ; SI: %cmp = icmp ult <3 x i15> %a, %b
1491 ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1492 ; SI-NEXT: store volatile <3 x i15> %sel
1493 ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1494 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1495 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp ult <3 x i32> %[[A_32_0]], %[[B_32_0]]
1496 ; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1497 ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1498 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
1499 ; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15>
1500 ; VI-NEXT: store volatile <3 x i15> %[[SEL_15]]
1501 define amdgpu_kernel void @select_ult_3xi15(<3 x i15> %a, <3 x i15> %b) {
1502 %cmp = icmp ult <3 x i15> %a, %b
1503 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1504 store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
1508 ; GCN-LABEL: @select_ule_3xi15(
1509 ; SI: %cmp = icmp ule <3 x i15> %a, %b
1510 ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1511 ; SI-NEXT: store volatile <3 x i15> %sel
1512 ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1513 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1514 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp ule <3 x i32> %[[A_32_0]], %[[B_32_0]]
1515 ; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1516 ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1517 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
1518 ; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15>
1519 ; VI-NEXT: store volatile <3 x i15> %[[SEL_15]]
1520 define amdgpu_kernel void @select_ule_3xi15(<3 x i15> %a, <3 x i15> %b) {
1521 %cmp = icmp ule <3 x i15> %a, %b
1522 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1523 store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
1527 ; GCN-LABEL: @select_sgt_3xi15(
1528 ; SI: %cmp = icmp sgt <3 x i15> %a, %b
1529 ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1530 ; SI-NEXT: store volatile <3 x i15> %sel
1531 ; VI: %[[A_32_0:[0-9]+]] = sext <3 x i15> %a to <3 x i32>
1532 ; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i15> %b to <3 x i32>
1533 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp sgt <3 x i32> %[[A_32_0]], %[[B_32_0]]
1534 ; VI-NEXT: %[[A_32_1:[0-9]+]] = sext <3 x i15> %a to <3 x i32>
1535 ; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i15> %b to <3 x i32>
1536 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
1537 ; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15>
1538 ; VI-NEXT: store volatile <3 x i15> %[[SEL_15]]
1539 define amdgpu_kernel void @select_sgt_3xi15(<3 x i15> %a, <3 x i15> %b) {
1540 %cmp = icmp sgt <3 x i15> %a, %b
1541 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1542 store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
1546 ; GCN-LABEL: @select_sge_3xi15(
1547 ; SI: %cmp = icmp sge <3 x i15> %a, %b
1548 ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1549 ; SI-NEXT: store volatile <3 x i15> %sel
1550 ; VI: %[[A_32_0:[0-9]+]] = sext <3 x i15> %a to <3 x i32>
1551 ; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i15> %b to <3 x i32>
1552 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp sge <3 x i32> %[[A_32_0]], %[[B_32_0]]
1553 ; VI-NEXT: %[[A_32_1:[0-9]+]] = sext <3 x i15> %a to <3 x i32>
1554 ; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i15> %b to <3 x i32>
1555 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
1556 ; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15>
1557 ; VI-NEXT: store volatile <3 x i15> %[[SEL_15]]
1558 define amdgpu_kernel void @select_sge_3xi15(<3 x i15> %a, <3 x i15> %b) {
1559 %cmp = icmp sge <3 x i15> %a, %b
1560 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1561 store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
1565 ; GCN-LABEL: @select_slt_3xi15(
1566 ; SI: %cmp = icmp slt <3 x i15> %a, %b
1567 ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1568 ; SI-NEXT: store volatile <3 x i15> %sel
1569 ; VI: %[[A_32_0:[0-9]+]] = sext <3 x i15> %a to <3 x i32>
1570 ; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i15> %b to <3 x i32>
1571 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp slt <3 x i32> %[[A_32_0]], %[[B_32_0]]
1572 ; VI-NEXT: %[[A_32_1:[0-9]+]] = sext <3 x i15> %a to <3 x i32>
1573 ; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i15> %b to <3 x i32>
1574 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
1575 ; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15>
1576 ; VI-NEXT: store volatile <3 x i15> %[[SEL_15]]
1577 define amdgpu_kernel void @select_slt_3xi15(<3 x i15> %a, <3 x i15> %b) {
1578 %cmp = icmp slt <3 x i15> %a, %b
1579 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1580 store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
1584 ; GCN-LABEL: @select_sle_3xi15(
1585 ; SI: %cmp = icmp sle <3 x i15> %a, %b
1586 ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1587 ; SI-NEXT: store volatile <3 x i15> %sel
1588 ; VI: %[[A_32_0:[0-9]+]] = sext <3 x i15> %a to <3 x i32>
1589 ; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i15> %b to <3 x i32>
1590 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp sle <3 x i32> %[[A_32_0]], %[[B_32_0]]
1591 ; VI-NEXT: %[[A_32_1:[0-9]+]] = sext <3 x i15> %a to <3 x i32>
1592 ; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i15> %b to <3 x i32>
1593 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
1594 ; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15>
1595 ; VI-NEXT: store volatile <3 x i15> %[[SEL_15]]
1596 define amdgpu_kernel void @select_sle_3xi15(<3 x i15> %a, <3 x i15> %b) {
1597 %cmp = icmp sle <3 x i15> %a, %b
1598 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1599 store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
1603 declare <3 x i15> @llvm.bitreverse.v3i15(<3 x i15>)
1604 ; GCN-LABEL: @bitreverse_3xi15(
1605 ; SI: %brev = call <3 x i15> @llvm.bitreverse.v3i15(<3 x i15> %a)
1606 ; SI-NEXT: store volatile <3 x i15> %brev
1607 ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1608 ; VI-NEXT: %[[R_32:[0-9]+]] = call <3 x i32> @llvm.bitreverse.v3i32(<3 x i32> %[[A_32]])
1609 ; VI-NEXT: %[[S_32:[0-9]+]] = lshr <3 x i32> %[[R_32]], <i32 17, i32 17, i32 17>
1610 ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[S_32]] to <3 x i15>
1611 ; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1612 define amdgpu_kernel void @bitreverse_3xi15(<3 x i15> %a) {
1613 %brev = call <3 x i15> @llvm.bitreverse.v3i15(<3 x i15> %a)
1614 store volatile <3 x i15> %brev, <3 x i15> addrspace(1)* undef
1618 ; GCN-LABEL: @add_3xi16(
1619 ; SI: %r = add <3 x i16> %a, %b
1620 ; SI-NEXT: store volatile <3 x i16> %r
1621 ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1622 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1623 ; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1624 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1625 ; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1626 define amdgpu_kernel void @add_3xi16(<3 x i16> %a, <3 x i16> %b) {
1627 %r = add <3 x i16> %a, %b
1628 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1632 ; GCN-LABEL: @add_nsw_3xi16(
1633 ; SI: %r = add nsw <3 x i16> %a, %b
1634 ; SI-NEXT: store volatile <3 x i16> %r
1635 ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1636 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1637 ; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1638 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1639 ; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1640 define amdgpu_kernel void @add_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
1641 %r = add nsw <3 x i16> %a, %b
1642 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1646 ; GCN-LABEL: @add_nuw_3xi16(
1647 ; SI: %r = add nuw <3 x i16> %a, %b
1648 ; SI-NEXT: store volatile <3 x i16> %r
1649 ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1650 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1651 ; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1652 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1653 ; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1654 define amdgpu_kernel void @add_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
1655 %r = add nuw <3 x i16> %a, %b
1656 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1660 ; GCN-LABEL: @add_nuw_nsw_3xi16(
1661 ; SI: %r = add nuw nsw <3 x i16> %a, %b
1662 ; SI-NEXT: store volatile <3 x i16> %r
1663 ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1664 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1665 ; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1666 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1667 ; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1668 define amdgpu_kernel void @add_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
1669 %r = add nuw nsw <3 x i16> %a, %b
1670 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1674 ; GCN-LABEL: @sub_3xi16(
1675 ; SI: %r = sub <3 x i16> %a, %b
1676 ; SI-NEXT: store volatile <3 x i16> %r
1677 ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1678 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1679 ; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw <3 x i32> %[[A_32]], %[[B_32]]
1680 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1681 ; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1682 define amdgpu_kernel void @sub_3xi16(<3 x i16> %a, <3 x i16> %b) {
1683 %r = sub <3 x i16> %a, %b
1684 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1688 ; GCN-LABEL: @sub_nsw_3xi16(
1689 ; SI: %r = sub nsw <3 x i16> %a, %b
1690 ; SI-NEXT: store volatile <3 x i16> %r
1691 ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1692 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1693 ; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw <3 x i32> %[[A_32]], %[[B_32]]
1694 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1695 ; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1696 define amdgpu_kernel void @sub_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
1697 %r = sub nsw <3 x i16> %a, %b
1698 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1702 ; GCN-LABEL: @sub_nuw_3xi16(
1703 ; SI: %r = sub nuw <3 x i16> %a, %b
1704 ; SI-NEXT: store volatile <3 x i16> %r
1705 ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1706 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1707 ; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1708 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1709 ; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1710 define amdgpu_kernel void @sub_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
1711 %r = sub nuw <3 x i16> %a, %b
1712 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1716 ; GCN-LABEL: @sub_nuw_nsw_3xi16(
1717 ; SI: %r = sub nuw nsw <3 x i16> %a, %b
1718 ; SI-NEXT: store volatile <3 x i16> %r
1719 ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1720 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1721 ; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1722 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1723 ; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1724 define amdgpu_kernel void @sub_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
1725 %r = sub nuw nsw <3 x i16> %a, %b
1726 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1730 ; GCN-LABEL: @mul_3xi16(
1731 ; SI: %r = mul <3 x i16> %a, %b
1732 ; SI-NEXT: store volatile <3 x i16> %r
1733 ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1734 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1735 ; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw <3 x i32> %[[A_32]], %[[B_32]]
1736 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1737 ; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1738 define amdgpu_kernel void @mul_3xi16(<3 x i16> %a, <3 x i16> %b) {
1739 %r = mul <3 x i16> %a, %b
1740 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1744 ; GCN-LABEL: @mul_nsw_3xi16(
1745 ; SI: %r = mul nsw <3 x i16> %a, %b
1746 ; SI-NEXT: store volatile <3 x i16> %r
1747 ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1748 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1749 ; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw <3 x i32> %[[A_32]], %[[B_32]]
1750 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1751 ; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1752 define amdgpu_kernel void @mul_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
1753 %r = mul nsw <3 x i16> %a, %b
1754 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1758 ; GCN-LABEL: @mul_nuw_3xi16(
1759 ; SI: %r = mul nuw <3 x i16> %a, %b
1760 ; SI-NEXT: store volatile <3 x i16> %r
1761 ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1762 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1763 ; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1764 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1765 ; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1766 define amdgpu_kernel void @mul_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
1767 %r = mul nuw <3 x i16> %a, %b
1768 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1772 ; GCN-LABEL: @mul_nuw_nsw_3xi16(
1773 ; SI: %r = mul nuw nsw <3 x i16> %a, %b
1774 ; SI-NEXT: store volatile <3 x i16> %r
1775 ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1776 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1777 ; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1778 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1779 ; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1780 define amdgpu_kernel void @mul_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
1781 %r = mul nuw nsw <3 x i16> %a, %b
1782 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1786 ; GCN-LABEL: @shl_3xi16(
1787 ; SI: %r = shl <3 x i16> %a, %b
1788 ; SI-NEXT: store volatile <3 x i16> %r
1789 ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1790 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1791 ; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1792 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1793 ; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1794 define amdgpu_kernel void @shl_3xi16(<3 x i16> %a, <3 x i16> %b) {
1795 %r = shl <3 x i16> %a, %b
1796 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1800 ; GCN-LABEL: @shl_nsw_3xi16(
1801 ; SI: %r = shl nsw <3 x i16> %a, %b
1802 ; SI-NEXT: store volatile <3 x i16> %r
1803 ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1804 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1805 ; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1806 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1807 ; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1808 define amdgpu_kernel void @shl_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
1809 %r = shl nsw <3 x i16> %a, %b
1810 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1814 ; GCN-LABEL: @shl_nuw_3xi16(
1815 ; SI: %r = shl nuw <3 x i16> %a, %b
1816 ; SI-NEXT: store volatile <3 x i16> %r
1817 ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1818 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1819 ; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1820 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1821 ; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1822 define amdgpu_kernel void @shl_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
1823 %r = shl nuw <3 x i16> %a, %b
1824 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1828 ; GCN-LABEL: @shl_nuw_nsw_3xi16(
1829 ; SI: %r = shl nuw nsw <3 x i16> %a, %b
1830 ; SI-NEXT: store volatile <3 x i16> %r
1831 ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1832 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1833 ; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1834 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1835 ; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1836 define amdgpu_kernel void @shl_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
1837 %r = shl nuw nsw <3 x i16> %a, %b
1838 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1842 ; GCN-LABEL: @lshr_3xi16(
1843 ; SI: %r = lshr <3 x i16> %a, %b
1844 ; SI-NEXT: store volatile <3 x i16> %r
1845 ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1846 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1847 ; VI-NEXT: %[[R_32:[0-9]+]] = lshr <3 x i32> %[[A_32]], %[[B_32]]
1848 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1849 ; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1850 define amdgpu_kernel void @lshr_3xi16(<3 x i16> %a, <3 x i16> %b) {
1851 %r = lshr <3 x i16> %a, %b
1852 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1856 ; GCN-LABEL: @lshr_exact_3xi16(
1857 ; SI: %r = lshr exact <3 x i16> %a, %b
1858 ; SI-NEXT: store volatile <3 x i16> %r
1859 ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1860 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1861 ; VI-NEXT: %[[R_32:[0-9]+]] = lshr exact <3 x i32> %[[A_32]], %[[B_32]]
1862 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1863 ; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1864 define amdgpu_kernel void @lshr_exact_3xi16(<3 x i16> %a, <3 x i16> %b) {
1865 %r = lshr exact <3 x i16> %a, %b
1866 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1870 ; GCN-LABEL: @ashr_3xi16(
1871 ; SI: %r = ashr <3 x i16> %a, %b
1872 ; SI-NEXT: store volatile <3 x i16> %r
1873 ; VI: %[[A_32:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
1874 ; VI-NEXT: %[[B_32:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
1875 ; VI-NEXT: %[[R_32:[0-9]+]] = ashr <3 x i32> %[[A_32]], %[[B_32]]
1876 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1877 ; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1878 define amdgpu_kernel void @ashr_3xi16(<3 x i16> %a, <3 x i16> %b) {
1879 %r = ashr <3 x i16> %a, %b
1880 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1884 ; GCN-LABEL: @ashr_exact_3xi16(
1885 ; SI: %r = ashr exact <3 x i16> %a, %b
1886 ; SI-NEXT: store volatile <3 x i16> %r
1887 ; VI: %[[A_32:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
1888 ; VI-NEXT: %[[B_32:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
1889 ; VI-NEXT: %[[R_32:[0-9]+]] = ashr exact <3 x i32> %[[A_32]], %[[B_32]]
1890 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1891 ; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1892 define amdgpu_kernel void @ashr_exact_3xi16(<3 x i16> %a, <3 x i16> %b) {
1893 %r = ashr exact <3 x i16> %a, %b
1894 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1898 ; GCN-LABEL: @and_3xi16(
1899 ; SI: %r = and <3 x i16> %a, %b
1900 ; SI-NEXT: store volatile <3 x i16> %r
1901 ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1902 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1903 ; VI-NEXT: %[[R_32:[0-9]+]] = and <3 x i32> %[[A_32]], %[[B_32]]
1904 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1905 ; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1906 define amdgpu_kernel void @and_3xi16(<3 x i16> %a, <3 x i16> %b) {
1907 %r = and <3 x i16> %a, %b
1908 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1912 ; GCN-LABEL: @or_3xi16(
1913 ; SI: %r = or <3 x i16> %a, %b
1914 ; SI-NEXT: store volatile <3 x i16> %r
1915 ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1916 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1917 ; VI-NEXT: %[[R_32:[0-9]+]] = or <3 x i32> %[[A_32]], %[[B_32]]
1918 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1919 ; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1920 define amdgpu_kernel void @or_3xi16(<3 x i16> %a, <3 x i16> %b) {
1921 %r = or <3 x i16> %a, %b
1922 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1926 ; GCN-LABEL: @xor_3xi16(
1927 ; SI: %r = xor <3 x i16> %a, %b
1928 ; SI-NEXT: store volatile <3 x i16> %r
1929 ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1930 ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1931 ; VI-NEXT: %[[R_32:[0-9]+]] = xor <3 x i32> %[[A_32]], %[[B_32]]
1932 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1933 ; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1934 define amdgpu_kernel void @xor_3xi16(<3 x i16> %a, <3 x i16> %b) {
1935 %r = xor <3 x i16> %a, %b
1936 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1940 ; GCN-LABEL: @select_eq_3xi16(
1941 ; SI: %cmp = icmp eq <3 x i16> %a, %b
1942 ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
1943 ; SI-NEXT: store volatile <3 x i16> %sel
1944 ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1945 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1946 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp eq <3 x i32> %[[A_32_0]], %[[B_32_0]]
1947 ; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1948 ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1949 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
1950 ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
1951 ; VI-NEXT: store volatile <3 x i16> %[[SEL_16]]
1952 define amdgpu_kernel void @select_eq_3xi16(<3 x i16> %a, <3 x i16> %b) {
1953 %cmp = icmp eq <3 x i16> %a, %b
1954 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
1955 store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
1959 ; GCN-LABEL: @select_ne_3xi16(
1960 ; SI: %cmp = icmp ne <3 x i16> %a, %b
1961 ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
1962 ; SI-NEXT: store volatile <3 x i16> %sel
1963 ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1964 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1965 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp ne <3 x i32> %[[A_32_0]], %[[B_32_0]]
1966 ; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1967 ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1968 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
1969 ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
1970 ; VI-NEXT: store volatile <3 x i16> %[[SEL_16]]
1971 define amdgpu_kernel void @select_ne_3xi16(<3 x i16> %a, <3 x i16> %b) {
1972 %cmp = icmp ne <3 x i16> %a, %b
1973 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
1974 store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
1978 ; GCN-LABEL: @select_ugt_3xi16(
1979 ; SI: %cmp = icmp ugt <3 x i16> %a, %b
1980 ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
1981 ; SI-NEXT: store volatile <3 x i16> %sel
1982 ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1983 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1984 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp ugt <3 x i32> %[[A_32_0]], %[[B_32_0]]
1985 ; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1986 ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1987 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
1988 ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
1989 ; VI-NEXT: store volatile <3 x i16> %[[SEL_16]]
1990 define amdgpu_kernel void @select_ugt_3xi16(<3 x i16> %a, <3 x i16> %b) {
1991 %cmp = icmp ugt <3 x i16> %a, %b
1992 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
1993 store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
1997 ; GCN-LABEL: @select_uge_3xi16(
1998 ; SI: %cmp = icmp uge <3 x i16> %a, %b
1999 ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2000 ; SI-NEXT: store volatile <3 x i16> %sel
2001 ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
2002 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
2003 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp uge <3 x i32> %[[A_32_0]], %[[B_32_0]]
2004 ; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
2005 ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
2006 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
2007 ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
2008 ; VI-NEXT: store volatile <3 x i16> %[[SEL_16]]
2009 define amdgpu_kernel void @select_uge_3xi16(<3 x i16> %a, <3 x i16> %b) {
2010 %cmp = icmp uge <3 x i16> %a, %b
2011 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2012 store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
2016 ; GCN-LABEL: @select_ult_3xi16(
2017 ; SI: %cmp = icmp ult <3 x i16> %a, %b
2018 ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2019 ; SI-NEXT: store volatile <3 x i16> %sel
2020 ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
2021 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
2022 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp ult <3 x i32> %[[A_32_0]], %[[B_32_0]]
2023 ; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
2024 ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
2025 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
2026 ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
2027 ; VI-NEXT: store volatile <3 x i16> %[[SEL_16]]
2028 define amdgpu_kernel void @select_ult_3xi16(<3 x i16> %a, <3 x i16> %b) {
2029 %cmp = icmp ult <3 x i16> %a, %b
2030 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2031 store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
2035 ; GCN-LABEL: @select_ule_3xi16(
2036 ; SI: %cmp = icmp ule <3 x i16> %a, %b
2037 ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2038 ; SI-NEXT: store volatile <3 x i16> %sel
2039 ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
2040 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
2041 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp ule <3 x i32> %[[A_32_0]], %[[B_32_0]]
2042 ; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
2043 ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
2044 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
2045 ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
2046 ; VI-NEXT: store volatile <3 x i16> %[[SEL_16]]
2047 define amdgpu_kernel void @select_ule_3xi16(<3 x i16> %a, <3 x i16> %b) {
2048 %cmp = icmp ule <3 x i16> %a, %b
2049 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2050 store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
2054 ; GCN-LABEL: @select_sgt_3xi16(
2055 ; SI: %cmp = icmp sgt <3 x i16> %a, %b
2056 ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2057 ; SI-NEXT: store volatile <3 x i16> %sel
2058 ; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
2059 ; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
2060 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp sgt <3 x i32> %[[A_32_0]], %[[B_32_0]]
2061 ; VI-NEXT: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
2062 ; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
2063 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
2064 ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
2065 ; VI-NEXT: store volatile <3 x i16> %[[SEL_16]]
2066 define amdgpu_kernel void @select_sgt_3xi16(<3 x i16> %a, <3 x i16> %b) {
2067 %cmp = icmp sgt <3 x i16> %a, %b
2068 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2069 store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
2073 ; GCN-LABEL: @select_sge_3xi16(
2074 ; SI: %cmp = icmp sge <3 x i16> %a, %b
2075 ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2076 ; SI-NEXT: store volatile <3 x i16> %sel
2077 ; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
2078 ; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
2079 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp sge <3 x i32> %[[A_32_0]], %[[B_32_0]]
2080 ; VI-NEXT: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
2081 ; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
2082 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
2083 ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
2084 ; VI-NEXT: store volatile <3 x i16> %[[SEL_16]]
2085 define amdgpu_kernel void @select_sge_3xi16(<3 x i16> %a, <3 x i16> %b) {
2086 %cmp = icmp sge <3 x i16> %a, %b
2087 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2088 store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
2092 ; GCN-LABEL: @select_slt_3xi16(
2093 ; SI: %cmp = icmp slt <3 x i16> %a, %b
2094 ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2095 ; SI-NEXT: store volatile <3 x i16> %sel
2096 ; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
2097 ; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
2098 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp slt <3 x i32> %[[A_32_0]], %[[B_32_0]]
2099 ; VI-NEXT: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
2100 ; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
2101 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
2102 ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
2103 ; VI-NEXT: store volatile <3 x i16> %[[SEL_16]]
2104 define amdgpu_kernel void @select_slt_3xi16(<3 x i16> %a, <3 x i16> %b) {
2105 %cmp = icmp slt <3 x i16> %a, %b
2106 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2107 store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
2111 ; GCN-LABEL: @select_sle_3xi16(
2112 ; SI: %cmp = icmp sle <3 x i16> %a, %b
2113 ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2114 ; SI-NEXT: store volatile <3 x i16> %sel
2115 ; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
2116 ; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
2117 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp sle <3 x i32> %[[A_32_0]], %[[B_32_0]]
2118 ; VI-NEXT: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
2119 ; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
2120 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
2121 ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
2122 ; VI-NEXT: store volatile <3 x i16> %[[SEL_16]]
2123 define amdgpu_kernel void @select_sle_3xi16(<3 x i16> %a, <3 x i16> %b) {
2124 %cmp = icmp sle <3 x i16> %a, %b
2125 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2126 store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
2130 declare <3 x i16> @llvm.bitreverse.v3i16(<3 x i16>)
2132 ; GCN-LABEL: @bitreverse_3xi16(
2133 ; SI: %brev = call <3 x i16> @llvm.bitreverse.v3i16(<3 x i16> %a)
2134 ; SI-NEXT: store volatile <3 x i16> %brev
2135 ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
2136 ; VI-NEXT: %[[R_32:[0-9]+]] = call <3 x i32> @llvm.bitreverse.v3i32(<3 x i32> %[[A_32]])
2137 ; VI-NEXT: %[[S_32:[0-9]+]] = lshr <3 x i32> %[[R_32]], <i32 16, i32 16, i32 16>
2138 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[S_32]] to <3 x i16>
2139 ; VI-NEXT: store volatile <3 x i16> %[[R_16]]
2140 define amdgpu_kernel void @bitreverse_3xi16(<3 x i16> %a) {
2141 %brev = call <3 x i16> @llvm.bitreverse.v3i16(<3 x i16> %a)
2142 store volatile <3 x i16> %brev, <3 x i16> addrspace(1)* undef