1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv64 -global-isel -verify-machineinstrs < %s \
3 ; RUN: | FileCheck %s -check-prefix=RV64I
4 ; RUN: llc -mtriple=riscv64 -global-isel -mattr=+zbkb -verify-machineinstrs < %s \
5 ; RUN: | FileCheck %s -check-prefix=RV64ZBKB
8 define signext i32 @pack_i32(i32 signext %a, i32 signext %b) nounwind {
9 ; RV64I-LABEL: pack_i32:
11 ; RV64I-NEXT: slli a0, a0, 48
12 ; RV64I-NEXT: srli a0, a0, 48
13 ; RV64I-NEXT: slliw a1, a1, 16
14 ; RV64I-NEXT: or a0, a1, a0
17 ; RV64ZBKB-LABEL: pack_i32:
19 ; RV64ZBKB-NEXT: zext.h a0, a0
20 ; RV64ZBKB-NEXT: slliw a1, a1, 16
21 ; RV64ZBKB-NEXT: or a0, a1, a0
23 %shl = and i32 %a, 65535
24 %shl1 = shl i32 %b, 16
25 %or = or i32 %shl1, %shl
30 define signext i32 @pack_i32_2(i16 zeroext %a, i16 zeroext %b) nounwind {
31 ; RV64I-LABEL: pack_i32_2:
33 ; RV64I-NEXT: slli a1, a1, 16
34 ; RV64I-NEXT: or a0, a1, a0
35 ; RV64I-NEXT: sext.w a0, a0
38 ; RV64ZBKB-LABEL: pack_i32_2:
40 ; RV64ZBKB-NEXT: slli a1, a1, 16
41 ; RV64ZBKB-NEXT: or a0, a1, a0
42 ; RV64ZBKB-NEXT: sext.w a0, a0
44 %zexta = zext i16 %a to i32
45 %zextb = zext i16 %b to i32
46 %shl1 = shl i32 %zextb, 16
47 %or = or i32 %shl1, %zexta
51 ; Test case where we don't have a sign_extend_inreg after the or.
53 define signext i32 @pack_i32_3(i16 zeroext %0, i16 zeroext %1, i32 signext %2) {
54 ; RV64I-LABEL: pack_i32_3:
56 ; RV64I-NEXT: slli a0, a0, 16
57 ; RV64I-NEXT: or a0, a0, a1
58 ; RV64I-NEXT: addw a0, a0, a2
61 ; RV64ZBKB-LABEL: pack_i32_3:
63 ; RV64ZBKB-NEXT: slli a0, a0, 16
64 ; RV64ZBKB-NEXT: or a0, a0, a1
65 ; RV64ZBKB-NEXT: addw a0, a0, a2
67 %4 = zext i16 %0 to i32
68 %5 = shl nuw i32 %4, 16
69 %6 = zext i16 %1 to i32
75 define i64 @pack_i64(i64 %a, i64 %b) nounwind {
76 ; RV64I-LABEL: pack_i64:
78 ; RV64I-NEXT: slli a0, a0, 32
79 ; RV64I-NEXT: srli a0, a0, 32
80 ; RV64I-NEXT: slli a1, a1, 32
81 ; RV64I-NEXT: or a0, a1, a0
84 ; RV64ZBKB-LABEL: pack_i64:
86 ; RV64ZBKB-NEXT: pack a0, a0, a1
88 %shl = and i64 %a, 4294967295
89 %shl1 = shl i64 %b, 32
90 %or = or i64 %shl1, %shl
94 ; FIXME: The slli+srli isn't needed with pack.
95 define i64 @pack_i64_2(i32 signext %a, i32 signext %b) nounwind {
96 ; RV64I-LABEL: pack_i64_2:
98 ; RV64I-NEXT: slli a0, a0, 32
99 ; RV64I-NEXT: slli a1, a1, 32
100 ; RV64I-NEXT: srli a0, a0, 32
101 ; RV64I-NEXT: srli a1, a1, 32
102 ; RV64I-NEXT: slli a1, a1, 32
103 ; RV64I-NEXT: or a0, a1, a0
106 ; RV64ZBKB-LABEL: pack_i64_2:
108 ; RV64ZBKB-NEXT: slli a1, a1, 32
109 ; RV64ZBKB-NEXT: srli a1, a1, 32
110 ; RV64ZBKB-NEXT: pack a0, a0, a1
112 %zexta = zext i32 %a to i64
113 %zextb = zext i32 %b to i64
114 %shl1 = shl i64 %zextb, 32
115 %or = or i64 %shl1, %zexta
119 define i64 @pack_i64_3(ptr %0, ptr %1) {
120 ; RV64I-LABEL: pack_i64_3:
122 ; RV64I-NEXT: lwu a0, 0(a0)
123 ; RV64I-NEXT: lwu a1, 0(a1)
124 ; RV64I-NEXT: slli a0, a0, 32
125 ; RV64I-NEXT: or a0, a0, a1
128 ; RV64ZBKB-LABEL: pack_i64_3:
130 ; RV64ZBKB-NEXT: lwu a0, 0(a0)
131 ; RV64ZBKB-NEXT: lwu a1, 0(a1)
132 ; RV64ZBKB-NEXT: pack a0, a1, a0
134 %3 = load i32, ptr %0, align 4
135 %4 = zext i32 %3 to i64
137 %6 = load i32, ptr %1, align 4
138 %7 = zext i32 %6 to i64
144 define signext i32 @packh_i32(i32 signext %a, i32 signext %b) nounwind {
145 ; RV64I-LABEL: packh_i32:
147 ; RV64I-NEXT: lui a2, 16
148 ; RV64I-NEXT: andi a0, a0, 255
149 ; RV64I-NEXT: addiw a2, a2, -256
150 ; RV64I-NEXT: slli a1, a1, 8
151 ; RV64I-NEXT: and a1, a1, a2
152 ; RV64I-NEXT: or a0, a1, a0
155 ; RV64ZBKB-LABEL: packh_i32:
157 ; RV64ZBKB-NEXT: lui a2, 16
158 ; RV64ZBKB-NEXT: andi a0, a0, 255
159 ; RV64ZBKB-NEXT: addiw a2, a2, -256
160 ; RV64ZBKB-NEXT: slli a1, a1, 8
161 ; RV64ZBKB-NEXT: and a1, a1, a2
162 ; RV64ZBKB-NEXT: or a0, a1, a0
164 %and = and i32 %a, 255
165 %and1 = shl i32 %b, 8
166 %shl = and i32 %and1, 65280
167 %or = or i32 %shl, %and
171 define i32 @packh_i32_2(i32 %a, i32 %b) nounwind {
172 ; RV64I-LABEL: packh_i32_2:
174 ; RV64I-NEXT: andi a0, a0, 255
175 ; RV64I-NEXT: andi a1, a1, 255
176 ; RV64I-NEXT: slli a1, a1, 8
177 ; RV64I-NEXT: or a0, a1, a0
180 ; RV64ZBKB-LABEL: packh_i32_2:
182 ; RV64ZBKB-NEXT: packh a0, a0, a1
184 %and = and i32 %a, 255
185 %and1 = and i32 %b, 255
186 %shl = shl i32 %and1, 8
187 %or = or i32 %shl, %and
192 define i64 @packh_i64(i64 %a, i64 %b) nounwind {
193 ; RV64I-LABEL: packh_i64:
195 ; RV64I-NEXT: lui a2, 16
196 ; RV64I-NEXT: andi a0, a0, 255
197 ; RV64I-NEXT: addiw a2, a2, -256
198 ; RV64I-NEXT: slli a1, a1, 8
199 ; RV64I-NEXT: and a1, a1, a2
200 ; RV64I-NEXT: or a0, a1, a0
203 ; RV64ZBKB-LABEL: packh_i64:
205 ; RV64ZBKB-NEXT: lui a2, 16
206 ; RV64ZBKB-NEXT: andi a0, a0, 255
207 ; RV64ZBKB-NEXT: addiw a2, a2, -256
208 ; RV64ZBKB-NEXT: slli a1, a1, 8
209 ; RV64ZBKB-NEXT: and a1, a1, a2
210 ; RV64ZBKB-NEXT: or a0, a1, a0
212 %and = and i64 %a, 255
213 %and1 = shl i64 %b, 8
214 %shl = and i64 %and1, 65280
215 %or = or i64 %shl, %and
219 define i64 @packh_i64_2(i64 %a, i64 %b) nounwind {
220 ; RV64I-LABEL: packh_i64_2:
222 ; RV64I-NEXT: andi a0, a0, 255
223 ; RV64I-NEXT: andi a1, a1, 255
224 ; RV64I-NEXT: slli a1, a1, 8
225 ; RV64I-NEXT: or a0, a1, a0
228 ; RV64ZBKB-LABEL: packh_i64_2:
230 ; RV64ZBKB-NEXT: packh a0, a0, a1
232 %and = and i64 %a, 255
233 %and1 = and i64 %b, 255
234 %shl = shl i64 %and1, 8
235 %or = or i64 %shl, %and
239 define zeroext i16 @packh_i16(i8 zeroext %a, i8 zeroext %b) nounwind {
240 ; RV64I-LABEL: packh_i16:
242 ; RV64I-NEXT: slli a1, a1, 8
243 ; RV64I-NEXT: or a0, a1, a0
246 ; RV64ZBKB-LABEL: packh_i16:
248 ; RV64ZBKB-NEXT: packh a0, a0, a1
250 %zext = zext i8 %a to i16
251 %zext1 = zext i8 %b to i16
252 %shl = shl i16 %zext1, 8
253 %or = or i16 %shl, %zext
257 define zeroext i16 @packh_i16_2(i8 zeroext %0, i8 zeroext %1, i8 zeroext %2) {
258 ; RV64I-LABEL: packh_i16_2:
260 ; RV64I-NEXT: add a0, a1, a0
261 ; RV64I-NEXT: andi a0, a0, 255
262 ; RV64I-NEXT: slli a0, a0, 8
263 ; RV64I-NEXT: or a0, a0, a2
266 ; RV64ZBKB-LABEL: packh_i16_2:
268 ; RV64ZBKB-NEXT: add a0, a1, a0
269 ; RV64ZBKB-NEXT: packh a0, a2, a0
272 %5 = zext i8 %4 to i16
274 %7 = zext i8 %2 to i16
279 define void @packh_i16_3(i8 zeroext %0, i8 zeroext %1, i8 zeroext %2, ptr %p) {
280 ; RV64I-LABEL: packh_i16_3:
282 ; RV64I-NEXT: add a0, a1, a0
283 ; RV64I-NEXT: andi a0, a0, 255
284 ; RV64I-NEXT: slli a0, a0, 8
285 ; RV64I-NEXT: or a0, a0, a2
286 ; RV64I-NEXT: sh a0, 0(a3)
289 ; RV64ZBKB-LABEL: packh_i16_3:
291 ; RV64ZBKB-NEXT: add a0, a1, a0
292 ; RV64ZBKB-NEXT: packh a0, a2, a0
293 ; RV64ZBKB-NEXT: sh a0, 0(a3)
296 %5 = zext i8 %4 to i16
298 %7 = zext i8 %2 to i16
304 define i64 @pack_i64_allWUsers(i32 signext %0, i32 signext %1, i32 signext %2) {
305 ; RV64I-LABEL: pack_i64_allWUsers:
307 ; RV64I-NEXT: add a0, a1, a0
308 ; RV64I-NEXT: slli a2, a2, 32
309 ; RV64I-NEXT: slli a0, a0, 32
310 ; RV64I-NEXT: srli a0, a0, 32
311 ; RV64I-NEXT: slli a0, a0, 32
312 ; RV64I-NEXT: srli a2, a2, 32
313 ; RV64I-NEXT: or a0, a0, a2
316 ; RV64ZBKB-LABEL: pack_i64_allWUsers:
318 ; RV64ZBKB-NEXT: add a0, a1, a0
319 ; RV64ZBKB-NEXT: slli a0, a0, 32
320 ; RV64ZBKB-NEXT: srli a0, a0, 32
321 ; RV64ZBKB-NEXT: pack a0, a2, a0
324 %5 = zext i32 %4 to i64
326 %7 = zext i32 %2 to i64
331 define signext i32 @pack_i32_allWUsers(i16 zeroext %0, i16 zeroext %1, i16 zeroext %2) {
332 ; RV64I-LABEL: pack_i32_allWUsers:
334 ; RV64I-NEXT: add a0, a1, a0
335 ; RV64I-NEXT: slli a0, a0, 48
336 ; RV64I-NEXT: srli a0, a0, 48
337 ; RV64I-NEXT: slli a0, a0, 16
338 ; RV64I-NEXT: or a0, a0, a2
339 ; RV64I-NEXT: sext.w a0, a0
342 ; RV64ZBKB-LABEL: pack_i32_allWUsers:
344 ; RV64ZBKB-NEXT: add a0, a1, a0
345 ; RV64ZBKB-NEXT: zext.h a0, a0
346 ; RV64ZBKB-NEXT: slli a0, a0, 16
347 ; RV64ZBKB-NEXT: or a0, a0, a2
348 ; RV64ZBKB-NEXT: sext.w a0, a0
351 %5 = zext i16 %4 to i32
353 %7 = zext i16 %2 to i32
358 define i64 @pack_i64_imm() {
359 ; RV64I-LABEL: pack_i64_imm:
361 ; RV64I-NEXT: lui a0, 65793
362 ; RV64I-NEXT: slli a0, a0, 4
363 ; RV64I-NEXT: addi a0, a0, 257
364 ; RV64I-NEXT: slli a0, a0, 16
365 ; RV64I-NEXT: addi a0, a0, 257
366 ; RV64I-NEXT: slli a0, a0, 12
367 ; RV64I-NEXT: addi a0, a0, 16
370 ; RV64ZBKB-LABEL: pack_i64_imm:
372 ; RV64ZBKB-NEXT: lui a0, 65793
373 ; RV64ZBKB-NEXT: addi a0, a0, 16
374 ; RV64ZBKB-NEXT: pack a0, a0, a0
376 ret i64 1157442765409226768 ; 0x0101010101010101
379 define i32 @zexth_i32(i32 %a) nounwind {
380 ; RV64I-LABEL: zexth_i32:
382 ; RV64I-NEXT: slli a0, a0, 48
383 ; RV64I-NEXT: srli a0, a0, 48
386 ; RV64ZBKB-LABEL: zexth_i32:
388 ; RV64ZBKB-NEXT: zext.h a0, a0
390 %and = and i32 %a, 65535
394 define i64 @zexth_i64(i64 %a) nounwind {
395 ; RV64I-LABEL: zexth_i64:
397 ; RV64I-NEXT: slli a0, a0, 48
398 ; RV64I-NEXT: srli a0, a0, 48
401 ; RV64ZBKB-LABEL: zexth_i64:
403 ; RV64ZBKB-NEXT: zext.h a0, a0
405 %and = and i64 %a, 65535
409 define i32 @zext_i16_to_i32(i16 %a) nounwind {
410 ; RV64I-LABEL: zext_i16_to_i32:
412 ; RV64I-NEXT: slli a0, a0, 48
413 ; RV64I-NEXT: srli a0, a0, 48
416 ; RV64ZBKB-LABEL: zext_i16_to_i32:
418 ; RV64ZBKB-NEXT: zext.h a0, a0
420 %1 = zext i16 %a to i32
424 define i64 @zext_i16_to_i64(i16 %a) nounwind {
425 ; RV64I-LABEL: zext_i16_to_i64:
427 ; RV64I-NEXT: slli a0, a0, 48
428 ; RV64I-NEXT: srli a0, a0, 48
431 ; RV64ZBKB-LABEL: zext_i16_to_i64:
433 ; RV64ZBKB-NEXT: zext.h a0, a0
435 %1 = zext i16 %a to i64
439 ; This creates a i16->i32 G_ZEXT that we need to be able to select
440 define i32 @zext_i16_i32_2(i1 %z, ptr %x, i32 %y) {
441 ; RV64I-LABEL: zext_i16_i32_2:
443 ; RV64I-NEXT: andi a3, a0, 1
444 ; RV64I-NEXT: bnez a3, .LBB20_2
445 ; RV64I-NEXT: # %bb.1:
446 ; RV64I-NEXT: mv a0, a2
448 ; RV64I-NEXT: .LBB20_2:
449 ; RV64I-NEXT: lh a0, 0(a1)
450 ; RV64I-NEXT: slli a0, a0, 48
451 ; RV64I-NEXT: srli a0, a0, 48
454 ; RV64ZBKB-LABEL: zext_i16_i32_2:
456 ; RV64ZBKB-NEXT: andi a3, a0, 1
457 ; RV64ZBKB-NEXT: bnez a3, .LBB20_2
458 ; RV64ZBKB-NEXT: # %bb.1:
459 ; RV64ZBKB-NEXT: mv a0, a2
461 ; RV64ZBKB-NEXT: .LBB20_2:
462 ; RV64ZBKB-NEXT: lh a0, 0(a1)
463 ; RV64ZBKB-NEXT: zext.h a0, a0
465 %w = load i16, ptr %x
467 %b = zext i16 %a to i32
468 %c = select i1 %z, i32 %b, i32 %y
472 ; This creates a i16->i32 G_SEXT that we need to be able to select
473 define i32 @sext_i16_i32(i1 %z, ptr %x, i32 %y) {
474 ; RV64I-LABEL: sext_i16_i32:
476 ; RV64I-NEXT: andi a3, a0, 1
477 ; RV64I-NEXT: bnez a3, .LBB21_2
478 ; RV64I-NEXT: # %bb.1:
479 ; RV64I-NEXT: mv a0, a2
481 ; RV64I-NEXT: .LBB21_2:
482 ; RV64I-NEXT: lh a0, 0(a1)
483 ; RV64I-NEXT: slli a0, a0, 48
484 ; RV64I-NEXT: srai a0, a0, 48
487 ; RV64ZBKB-LABEL: sext_i16_i32:
489 ; RV64ZBKB-NEXT: andi a3, a0, 1
490 ; RV64ZBKB-NEXT: bnez a3, .LBB21_2
491 ; RV64ZBKB-NEXT: # %bb.1:
492 ; RV64ZBKB-NEXT: mv a0, a2
494 ; RV64ZBKB-NEXT: .LBB21_2:
495 ; RV64ZBKB-NEXT: lh a0, 0(a1)
496 ; RV64ZBKB-NEXT: slli a0, a0, 48
497 ; RV64ZBKB-NEXT: srai a0, a0, 48
499 %w = load i16, ptr %x
501 %b = sext i16 %a to i32
502 %c = select i1 %z, i32 %b, i32 %y