1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
3 ; RUN: | FileCheck -check-prefixes=ALL,SLOW,RV32I %s
4 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
5 ; RUN: | FileCheck -check-prefixes=ALL,SLOW,RV64I %s
6 ; RUN: llc -mtriple=riscv32 -mattr=+unaligned-scalar-mem -verify-machineinstrs < %s \
7 ; RUN: | FileCheck -check-prefixes=ALL,FAST,RV32I-FAST %s
8 ; RUN: llc -mtriple=riscv64 -mattr=+unaligned-scalar-mem -verify-machineinstrs < %s \
9 ; RUN: | FileCheck -check-prefixes=ALL,FAST,RV64I-FAST %s
11 ; A collection of cases showing codegen for unaligned loads and stores
13 define i8 @load_i8(ptr %p) {
16 ; ALL-NEXT: lbu a0, 0(a0)
18 %res = load i8, ptr %p, align 1
22 define i16 @load_i16(ptr %p) {
23 ; SLOW-LABEL: load_i16:
25 ; SLOW-NEXT: lbu a1, 1(a0)
26 ; SLOW-NEXT: lbu a0, 0(a0)
27 ; SLOW-NEXT: slli a1, a1, 8
28 ; SLOW-NEXT: or a0, a1, a0
31 ; FAST-LABEL: load_i16:
33 ; FAST-NEXT: lh a0, 0(a0)
35 %res = load i16, ptr %p, align 1
39 define i24 @load_i24(ptr %p) {
40 ; SLOW-LABEL: load_i24:
42 ; SLOW-NEXT: lbu a1, 1(a0)
43 ; SLOW-NEXT: lbu a2, 0(a0)
44 ; SLOW-NEXT: lbu a0, 2(a0)
45 ; SLOW-NEXT: slli a1, a1, 8
46 ; SLOW-NEXT: or a1, a1, a2
47 ; SLOW-NEXT: slli a0, a0, 16
48 ; SLOW-NEXT: or a0, a1, a0
51 ; FAST-LABEL: load_i24:
53 ; FAST-NEXT: lbu a1, 2(a0)
54 ; FAST-NEXT: lhu a0, 0(a0)
55 ; FAST-NEXT: slli a1, a1, 16
56 ; FAST-NEXT: or a0, a0, a1
58 %res = load i24, ptr %p, align 1
62 define i32 @load_i32(ptr %p) {
63 ; SLOW-LABEL: load_i32:
65 ; SLOW-NEXT: lbu a1, 1(a0)
66 ; SLOW-NEXT: lbu a2, 0(a0)
67 ; SLOW-NEXT: lbu a3, 2(a0)
68 ; SLOW-NEXT: lbu a0, 3(a0)
69 ; SLOW-NEXT: slli a1, a1, 8
70 ; SLOW-NEXT: or a1, a1, a2
71 ; SLOW-NEXT: slli a3, a3, 16
72 ; SLOW-NEXT: slli a0, a0, 24
73 ; SLOW-NEXT: or a0, a0, a3
74 ; SLOW-NEXT: or a0, a0, a1
77 ; FAST-LABEL: load_i32:
79 ; FAST-NEXT: lw a0, 0(a0)
81 %res = load i32, ptr %p, align 1
85 define i64 @load_i64(ptr %p) {
86 ; RV32I-LABEL: load_i64:
88 ; RV32I-NEXT: lbu a1, 1(a0)
89 ; RV32I-NEXT: lbu a2, 0(a0)
90 ; RV32I-NEXT: lbu a3, 2(a0)
91 ; RV32I-NEXT: lbu a4, 3(a0)
92 ; RV32I-NEXT: slli a1, a1, 8
93 ; RV32I-NEXT: or a1, a1, a2
94 ; RV32I-NEXT: slli a3, a3, 16
95 ; RV32I-NEXT: slli a4, a4, 24
96 ; RV32I-NEXT: or a2, a4, a3
97 ; RV32I-NEXT: or a2, a2, a1
98 ; RV32I-NEXT: lbu a1, 5(a0)
99 ; RV32I-NEXT: lbu a3, 4(a0)
100 ; RV32I-NEXT: lbu a4, 6(a0)
101 ; RV32I-NEXT: lbu a0, 7(a0)
102 ; RV32I-NEXT: slli a1, a1, 8
103 ; RV32I-NEXT: or a1, a1, a3
104 ; RV32I-NEXT: slli a4, a4, 16
105 ; RV32I-NEXT: slli a0, a0, 24
106 ; RV32I-NEXT: or a0, a0, a4
107 ; RV32I-NEXT: or a1, a0, a1
108 ; RV32I-NEXT: mv a0, a2
111 ; RV64I-LABEL: load_i64:
113 ; RV64I-NEXT: lbu a1, 1(a0)
114 ; RV64I-NEXT: lbu a2, 0(a0)
115 ; RV64I-NEXT: lbu a3, 2(a0)
116 ; RV64I-NEXT: lbu a4, 3(a0)
117 ; RV64I-NEXT: slli a1, a1, 8
118 ; RV64I-NEXT: or a1, a1, a2
119 ; RV64I-NEXT: slli a3, a3, 16
120 ; RV64I-NEXT: slli a4, a4, 24
121 ; RV64I-NEXT: or a3, a4, a3
122 ; RV64I-NEXT: or a1, a3, a1
123 ; RV64I-NEXT: lbu a2, 5(a0)
124 ; RV64I-NEXT: lbu a3, 4(a0)
125 ; RV64I-NEXT: lbu a4, 6(a0)
126 ; RV64I-NEXT: lbu a0, 7(a0)
127 ; RV64I-NEXT: slli a2, a2, 8
128 ; RV64I-NEXT: or a2, a2, a3
129 ; RV64I-NEXT: slli a4, a4, 16
130 ; RV64I-NEXT: slli a0, a0, 24
131 ; RV64I-NEXT: or a0, a0, a4
132 ; RV64I-NEXT: or a0, a0, a2
133 ; RV64I-NEXT: slli a0, a0, 32
134 ; RV64I-NEXT: or a0, a0, a1
137 ; RV32I-FAST-LABEL: load_i64:
138 ; RV32I-FAST: # %bb.0:
139 ; RV32I-FAST-NEXT: lw a2, 0(a0)
140 ; RV32I-FAST-NEXT: lw a1, 4(a0)
141 ; RV32I-FAST-NEXT: mv a0, a2
142 ; RV32I-FAST-NEXT: ret
144 ; RV64I-FAST-LABEL: load_i64:
145 ; RV64I-FAST: # %bb.0:
146 ; RV64I-FAST-NEXT: ld a0, 0(a0)
147 ; RV64I-FAST-NEXT: ret
148 %res = load i64, ptr %p, align 1
152 define void @store_i8(ptr %p, i8 %v) {
153 ; ALL-LABEL: store_i8:
155 ; ALL-NEXT: sb a1, 0(a0)
157 store i8 %v, ptr %p, align 1
161 define void @store_i16(ptr %p, i16 %v) {
162 ; SLOW-LABEL: store_i16:
164 ; SLOW-NEXT: sb a1, 0(a0)
165 ; SLOW-NEXT: srli a1, a1, 8
166 ; SLOW-NEXT: sb a1, 1(a0)
169 ; FAST-LABEL: store_i16:
171 ; FAST-NEXT: sh a1, 0(a0)
173 store i16 %v, ptr %p, align 1
177 define void @store_i24(ptr %p, i24 %v) {
178 ; SLOW-LABEL: store_i24:
180 ; SLOW-NEXT: sb a1, 0(a0)
181 ; SLOW-NEXT: srli a2, a1, 8
182 ; SLOW-NEXT: sb a2, 1(a0)
183 ; SLOW-NEXT: srli a1, a1, 16
184 ; SLOW-NEXT: sb a1, 2(a0)
187 ; FAST-LABEL: store_i24:
189 ; FAST-NEXT: sh a1, 0(a0)
190 ; FAST-NEXT: srli a1, a1, 16
191 ; FAST-NEXT: sb a1, 2(a0)
193 store i24 %v, ptr %p, align 1
197 define void @store_i32(ptr %p, i32 %v) {
198 ; SLOW-LABEL: store_i32:
200 ; SLOW-NEXT: sb a1, 0(a0)
201 ; SLOW-NEXT: srli a2, a1, 24
202 ; SLOW-NEXT: sb a2, 3(a0)
203 ; SLOW-NEXT: srli a2, a1, 16
204 ; SLOW-NEXT: sb a2, 2(a0)
205 ; SLOW-NEXT: srli a1, a1, 8
206 ; SLOW-NEXT: sb a1, 1(a0)
209 ; FAST-LABEL: store_i32:
211 ; FAST-NEXT: sw a1, 0(a0)
213 store i32 %v, ptr %p, align 1
217 define void @store_i64(ptr %p, i64 %v) {
218 ; RV32I-LABEL: store_i64:
220 ; RV32I-NEXT: sb a2, 4(a0)
221 ; RV32I-NEXT: sb a1, 0(a0)
222 ; RV32I-NEXT: srli a3, a2, 24
223 ; RV32I-NEXT: sb a3, 7(a0)
224 ; RV32I-NEXT: srli a3, a2, 16
225 ; RV32I-NEXT: sb a3, 6(a0)
226 ; RV32I-NEXT: srli a2, a2, 8
227 ; RV32I-NEXT: sb a2, 5(a0)
228 ; RV32I-NEXT: srli a2, a1, 24
229 ; RV32I-NEXT: sb a2, 3(a0)
230 ; RV32I-NEXT: srli a2, a1, 16
231 ; RV32I-NEXT: sb a2, 2(a0)
232 ; RV32I-NEXT: srli a1, a1, 8
233 ; RV32I-NEXT: sb a1, 1(a0)
236 ; RV64I-LABEL: store_i64:
238 ; RV64I-NEXT: sb a1, 0(a0)
239 ; RV64I-NEXT: srli a2, a1, 56
240 ; RV64I-NEXT: sb a2, 7(a0)
241 ; RV64I-NEXT: srli a2, a1, 48
242 ; RV64I-NEXT: sb a2, 6(a0)
243 ; RV64I-NEXT: srli a2, a1, 40
244 ; RV64I-NEXT: sb a2, 5(a0)
245 ; RV64I-NEXT: srli a2, a1, 32
246 ; RV64I-NEXT: sb a2, 4(a0)
247 ; RV64I-NEXT: srli a2, a1, 24
248 ; RV64I-NEXT: sb a2, 3(a0)
249 ; RV64I-NEXT: srli a2, a1, 16
250 ; RV64I-NEXT: sb a2, 2(a0)
251 ; RV64I-NEXT: srli a1, a1, 8
252 ; RV64I-NEXT: sb a1, 1(a0)
255 ; RV32I-FAST-LABEL: store_i64:
256 ; RV32I-FAST: # %bb.0:
257 ; RV32I-FAST-NEXT: sw a2, 4(a0)
258 ; RV32I-FAST-NEXT: sw a1, 0(a0)
259 ; RV32I-FAST-NEXT: ret
261 ; RV64I-FAST-LABEL: store_i64:
262 ; RV64I-FAST: # %bb.0:
263 ; RV64I-FAST-NEXT: sd a1, 0(a0)
264 ; RV64I-FAST-NEXT: ret
265 store i64 %v, ptr %p, align 1
269 define void @merge_stores_i8_i16(ptr %p) {
270 ; SLOW-LABEL: merge_stores_i8_i16:
272 ; SLOW-NEXT: sb zero, 0(a0)
273 ; SLOW-NEXT: sb zero, 1(a0)
276 ; FAST-LABEL: merge_stores_i8_i16:
278 ; FAST-NEXT: sh zero, 0(a0)
281 %p2 = getelementptr i8, ptr %p, i32 1
286 define void @merge_stores_i8_i32(ptr %p) {
287 ; SLOW-LABEL: merge_stores_i8_i32:
289 ; SLOW-NEXT: sb zero, 0(a0)
290 ; SLOW-NEXT: sb zero, 1(a0)
291 ; SLOW-NEXT: sb zero, 2(a0)
292 ; SLOW-NEXT: sb zero, 3(a0)
295 ; FAST-LABEL: merge_stores_i8_i32:
297 ; FAST-NEXT: sw zero, 0(a0)
300 %p2 = getelementptr i8, ptr %p, i32 1
302 %p3 = getelementptr i8, ptr %p, i32 2
304 %p4 = getelementptr i8, ptr %p, i32 3
309 define void @merge_stores_i8_i64(ptr %p) {
310 ; SLOW-LABEL: merge_stores_i8_i64:
312 ; SLOW-NEXT: sb zero, 0(a0)
313 ; SLOW-NEXT: sb zero, 1(a0)
314 ; SLOW-NEXT: sb zero, 2(a0)
315 ; SLOW-NEXT: sb zero, 3(a0)
316 ; SLOW-NEXT: sb zero, 4(a0)
317 ; SLOW-NEXT: sb zero, 5(a0)
318 ; SLOW-NEXT: sb zero, 6(a0)
319 ; SLOW-NEXT: sb zero, 7(a0)
322 ; RV32I-FAST-LABEL: merge_stores_i8_i64:
323 ; RV32I-FAST: # %bb.0:
324 ; RV32I-FAST-NEXT: sw zero, 0(a0)
325 ; RV32I-FAST-NEXT: sw zero, 4(a0)
326 ; RV32I-FAST-NEXT: ret
328 ; RV64I-FAST-LABEL: merge_stores_i8_i64:
329 ; RV64I-FAST: # %bb.0:
330 ; RV64I-FAST-NEXT: sd zero, 0(a0)
331 ; RV64I-FAST-NEXT: ret
333 %p2 = getelementptr i8, ptr %p, i32 1
335 %p3 = getelementptr i8, ptr %p, i32 2
337 %p4 = getelementptr i8, ptr %p, i32 3
339 %p5 = getelementptr i8, ptr %p, i32 4
341 %p6 = getelementptr i8, ptr %p, i32 5
343 %p7 = getelementptr i8, ptr %p, i32 6
345 %p8 = getelementptr i8, ptr %p, i32 7
350 define void @merge_stores_i16_i32(ptr %p) {
351 ; SLOW-LABEL: merge_stores_i16_i32:
353 ; SLOW-NEXT: sh zero, 0(a0)
354 ; SLOW-NEXT: sh zero, 2(a0)
357 ; FAST-LABEL: merge_stores_i16_i32:
359 ; FAST-NEXT: sw zero, 0(a0)
362 %p2 = getelementptr i16, ptr %p, i32 1
367 define void @merge_stores_i16_i64(ptr %p) {
368 ; SLOW-LABEL: merge_stores_i16_i64:
370 ; SLOW-NEXT: sh zero, 0(a0)
371 ; SLOW-NEXT: sh zero, 2(a0)
372 ; SLOW-NEXT: sh zero, 4(a0)
373 ; SLOW-NEXT: sh zero, 6(a0)
376 ; RV32I-FAST-LABEL: merge_stores_i16_i64:
377 ; RV32I-FAST: # %bb.0:
378 ; RV32I-FAST-NEXT: sw zero, 0(a0)
379 ; RV32I-FAST-NEXT: sw zero, 4(a0)
380 ; RV32I-FAST-NEXT: ret
382 ; RV64I-FAST-LABEL: merge_stores_i16_i64:
383 ; RV64I-FAST: # %bb.0:
384 ; RV64I-FAST-NEXT: sd zero, 0(a0)
385 ; RV64I-FAST-NEXT: ret
387 %p2 = getelementptr i16, ptr %p, i32 1
389 %p3 = getelementptr i16, ptr %p, i32 2
391 %p4 = getelementptr i16, ptr %p, i32 3
396 define void @merge_stores_i32_i64(ptr %p) {
397 ; SLOW-LABEL: merge_stores_i32_i64:
399 ; SLOW-NEXT: sw zero, 0(a0)
400 ; SLOW-NEXT: sw zero, 4(a0)
403 ; RV32I-FAST-LABEL: merge_stores_i32_i64:
404 ; RV32I-FAST: # %bb.0:
405 ; RV32I-FAST-NEXT: sw zero, 0(a0)
406 ; RV32I-FAST-NEXT: sw zero, 4(a0)
407 ; RV32I-FAST-NEXT: ret
409 ; RV64I-FAST-LABEL: merge_stores_i32_i64:
410 ; RV64I-FAST: # %bb.0:
411 ; RV64I-FAST-NEXT: sd zero, 0(a0)
412 ; RV64I-FAST-NEXT: ret
414 %p2 = getelementptr i32, ptr %p, i32 1
419 define void @store_large_constant(ptr %x) {
420 ; SLOW-LABEL: store_large_constant:
422 ; SLOW-NEXT: li a1, -2
423 ; SLOW-NEXT: sb a1, 7(a0)
424 ; SLOW-NEXT: li a1, 220
425 ; SLOW-NEXT: sb a1, 6(a0)
426 ; SLOW-NEXT: li a1, 186
427 ; SLOW-NEXT: sb a1, 5(a0)
428 ; SLOW-NEXT: li a1, 152
429 ; SLOW-NEXT: sb a1, 4(a0)
430 ; SLOW-NEXT: li a1, 118
431 ; SLOW-NEXT: sb a1, 3(a0)
432 ; SLOW-NEXT: li a1, 84
433 ; SLOW-NEXT: sb a1, 2(a0)
434 ; SLOW-NEXT: li a1, 50
435 ; SLOW-NEXT: sb a1, 1(a0)
436 ; SLOW-NEXT: li a1, 16
437 ; SLOW-NEXT: sb a1, 0(a0)
440 ; RV32I-FAST-LABEL: store_large_constant:
441 ; RV32I-FAST: # %bb.0:
442 ; RV32I-FAST-NEXT: lui a1, 1043916
443 ; RV32I-FAST-NEXT: addi a1, a1, -1384
444 ; RV32I-FAST-NEXT: sw a1, 4(a0)
445 ; RV32I-FAST-NEXT: lui a1, 484675
446 ; RV32I-FAST-NEXT: addi a1, a1, 528
447 ; RV32I-FAST-NEXT: sw a1, 0(a0)
448 ; RV32I-FAST-NEXT: ret
450 ; RV64I-FAST-LABEL: store_large_constant:
451 ; RV64I-FAST: # %bb.0:
452 ; RV64I-FAST-NEXT: lui a1, %hi(.LCPI16_0)
453 ; RV64I-FAST-NEXT: ld a1, %lo(.LCPI16_0)(a1)
454 ; RV64I-FAST-NEXT: sd a1, 0(a0)
455 ; RV64I-FAST-NEXT: ret
456 store i64 18364758544493064720, ptr %x, align 1