1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s
4 ; Test SIMD loads and stores
6 target triple = "wasm32-unknown-unknown"
8 ; ==============================================================================
10 ; ==============================================================================
11 define <16 x i8> @load_v16i8(ptr %p) {
12 ; CHECK-LABEL: load_v16i8:
13 ; CHECK: .functype load_v16i8 (i32) -> (v128)
14 ; CHECK-NEXT: # %bb.0:
15 ; CHECK-NEXT: local.get 0
16 ; CHECK-NEXT: v128.load 0
17 ; CHECK-NEXT: # fallthrough-return
18 %v = load <16 x i8>, ptr %p
22 define <16 x i8> @load_splat_v16i8(ptr %p) {
23 ; CHECK-LABEL: load_splat_v16i8:
24 ; CHECK: .functype load_splat_v16i8 (i32) -> (v128)
25 ; CHECK-NEXT: # %bb.0:
26 ; CHECK-NEXT: local.get 0
27 ; CHECK-NEXT: v128.load8_splat 0
28 ; CHECK-NEXT: # fallthrough-return
30 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
31 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
35 define <16 x i8> @load_v16i8_with_folded_offset(ptr %p) {
36 ; CHECK-LABEL: load_v16i8_with_folded_offset:
37 ; CHECK: .functype load_v16i8_with_folded_offset (i32) -> (v128)
38 ; CHECK-NEXT: # %bb.0:
39 ; CHECK-NEXT: local.get 0
40 ; CHECK-NEXT: v128.load 16
41 ; CHECK-NEXT: # fallthrough-return
42 %q = ptrtoint ptr %p to i32
43 %r = add nuw i32 %q, 16
44 %s = inttoptr i32 %r to ptr
45 %v = load <16 x i8>, ptr %s
49 define <16 x i8> @load_splat_v16i8_with_folded_offset(ptr %p) {
50 ; CHECK-LABEL: load_splat_v16i8_with_folded_offset:
51 ; CHECK: .functype load_splat_v16i8_with_folded_offset (i32) -> (v128)
52 ; CHECK-NEXT: # %bb.0:
53 ; CHECK-NEXT: local.get 0
54 ; CHECK-NEXT: v128.load8_splat 16
55 ; CHECK-NEXT: # fallthrough-return
56 %q = ptrtoint ptr %p to i32
57 %r = add nuw i32 %q, 16
58 %s = inttoptr i32 %r to ptr
60 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
61 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
65 define <16 x i8> @load_v16i8_with_folded_gep_offset(ptr %p) {
66 ; CHECK-LABEL: load_v16i8_with_folded_gep_offset:
67 ; CHECK: .functype load_v16i8_with_folded_gep_offset (i32) -> (v128)
68 ; CHECK-NEXT: # %bb.0:
69 ; CHECK-NEXT: local.get 0
70 ; CHECK-NEXT: v128.load 16
71 ; CHECK-NEXT: # fallthrough-return
72 %s = getelementptr inbounds <16 x i8>, ptr %p, i32 1
73 %v = load <16 x i8>, ptr %s
77 define <16 x i8> @load_splat_v16i8_with_folded_gep_offset(ptr %p) {
78 ; CHECK-LABEL: load_splat_v16i8_with_folded_gep_offset:
79 ; CHECK: .functype load_splat_v16i8_with_folded_gep_offset (i32) -> (v128)
80 ; CHECK-NEXT: # %bb.0:
81 ; CHECK-NEXT: local.get 0
82 ; CHECK-NEXT: v128.load8_splat 1
83 ; CHECK-NEXT: # fallthrough-return
84 %s = getelementptr inbounds i8, ptr %p, i32 1
86 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
87 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
91 define <16 x i8> @load_v16i8_with_unfolded_gep_negative_offset(ptr %p) {
92 ; CHECK-LABEL: load_v16i8_with_unfolded_gep_negative_offset:
93 ; CHECK: .functype load_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128)
94 ; CHECK-NEXT: # %bb.0:
95 ; CHECK-NEXT: local.get 0
96 ; CHECK-NEXT: i32.const -16
98 ; CHECK-NEXT: v128.load 0
99 ; CHECK-NEXT: # fallthrough-return
100 %s = getelementptr inbounds <16 x i8>, ptr %p, i32 -1
101 %v = load <16 x i8>, ptr %s
105 define <16 x i8> @load_splat_v16i8_with_unfolded_gep_negative_offset(ptr %p) {
106 ; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_negative_offset:
107 ; CHECK: .functype load_splat_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128)
108 ; CHECK-NEXT: # %bb.0:
109 ; CHECK-NEXT: local.get 0
110 ; CHECK-NEXT: i32.const -1
111 ; CHECK-NEXT: i32.add
112 ; CHECK-NEXT: v128.load8_splat 0
113 ; CHECK-NEXT: # fallthrough-return
114 %s = getelementptr inbounds i8, ptr %p, i32 -1
116 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
117 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
121 define <16 x i8> @load_v16i8_with_unfolded_offset(ptr %p) {
122 ; CHECK-LABEL: load_v16i8_with_unfolded_offset:
123 ; CHECK: .functype load_v16i8_with_unfolded_offset (i32) -> (v128)
124 ; CHECK-NEXT: # %bb.0:
125 ; CHECK-NEXT: local.get 0
126 ; CHECK-NEXT: i32.const 16
127 ; CHECK-NEXT: i32.add
128 ; CHECK-NEXT: v128.load 0
129 ; CHECK-NEXT: # fallthrough-return
130 %q = ptrtoint ptr %p to i32
131 %r = add nsw i32 %q, 16
132 %s = inttoptr i32 %r to ptr
133 %v = load <16 x i8>, ptr %s
137 define <16 x i8> @load_splat_v16i8_with_unfolded_offset(ptr %p) {
138 ; CHECK-LABEL: load_splat_v16i8_with_unfolded_offset:
139 ; CHECK: .functype load_splat_v16i8_with_unfolded_offset (i32) -> (v128)
140 ; CHECK-NEXT: # %bb.0:
141 ; CHECK-NEXT: local.get 0
142 ; CHECK-NEXT: i32.const 16
143 ; CHECK-NEXT: i32.add
144 ; CHECK-NEXT: v128.load8_splat 0
145 ; CHECK-NEXT: # fallthrough-return
146 %q = ptrtoint ptr %p to i32
147 %r = add nsw i32 %q, 16
148 %s = inttoptr i32 %r to ptr
150 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
151 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
155 define <16 x i8> @load_v16i8_with_unfolded_gep_offset(ptr %p) {
156 ; CHECK-LABEL: load_v16i8_with_unfolded_gep_offset:
157 ; CHECK: .functype load_v16i8_with_unfolded_gep_offset (i32) -> (v128)
158 ; CHECK-NEXT: # %bb.0:
159 ; CHECK-NEXT: local.get 0
160 ; CHECK-NEXT: i32.const 16
161 ; CHECK-NEXT: i32.add
162 ; CHECK-NEXT: v128.load 0
163 ; CHECK-NEXT: # fallthrough-return
164 %s = getelementptr <16 x i8>, ptr %p, i32 1
165 %v = load <16 x i8>, ptr %s
169 define <16 x i8> @load_splat_v16i8_with_unfolded_gep_offset(ptr %p) {
170 ; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_offset:
171 ; CHECK: .functype load_splat_v16i8_with_unfolded_gep_offset (i32) -> (v128)
172 ; CHECK-NEXT: # %bb.0:
173 ; CHECK-NEXT: local.get 0
174 ; CHECK-NEXT: i32.const 1
175 ; CHECK-NEXT: i32.add
176 ; CHECK-NEXT: v128.load8_splat 0
177 ; CHECK-NEXT: # fallthrough-return
178 %s = getelementptr i8, ptr %p, i32 1
180 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
181 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
185 define <16 x i8> @load_v16i8_from_numeric_address() {
186 ; CHECK-LABEL: load_v16i8_from_numeric_address:
187 ; CHECK: .functype load_v16i8_from_numeric_address () -> (v128)
188 ; CHECK-NEXT: # %bb.0:
189 ; CHECK-NEXT: i32.const 0
190 ; CHECK-NEXT: v128.load 32
191 ; CHECK-NEXT: # fallthrough-return
192 %s = inttoptr i32 32 to ptr
193 %v = load <16 x i8>, ptr %s
197 define <16 x i8> @load_splat_v16i8_from_numeric_address() {
198 ; CHECK-LABEL: load_splat_v16i8_from_numeric_address:
199 ; CHECK: .functype load_splat_v16i8_from_numeric_address () -> (v128)
200 ; CHECK-NEXT: # %bb.0:
201 ; CHECK-NEXT: i32.const 0
202 ; CHECK-NEXT: v128.load8_splat 32
203 ; CHECK-NEXT: # fallthrough-return
204 %s = inttoptr i32 32 to ptr
206 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
207 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
211 @gv_v16i8 = global <16 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
212 define <16 x i8> @load_v16i8_from_global_address() {
213 ; CHECK-LABEL: load_v16i8_from_global_address:
214 ; CHECK: .functype load_v16i8_from_global_address () -> (v128)
215 ; CHECK-NEXT: # %bb.0:
216 ; CHECK-NEXT: i32.const 0
217 ; CHECK-NEXT: v128.load gv_v16i8
218 ; CHECK-NEXT: # fallthrough-return
219 %v = load <16 x i8>, ptr @gv_v16i8
223 @gv_i8 = global i8 42
224 define <16 x i8> @load_splat_v16i8_from_global_address() {
225 ; CHECK-LABEL: load_splat_v16i8_from_global_address:
226 ; CHECK: .functype load_splat_v16i8_from_global_address () -> (v128)
227 ; CHECK-NEXT: # %bb.0:
228 ; CHECK-NEXT: i32.const 0
229 ; CHECK-NEXT: v128.load8_splat gv_i8
230 ; CHECK-NEXT: # fallthrough-return
231 %e = load i8, ptr @gv_i8
232 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
233 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
237 define void @store_v16i8(<16 x i8> %v, ptr %p) {
238 ; CHECK-LABEL: store_v16i8:
239 ; CHECK: .functype store_v16i8 (v128, i32) -> ()
240 ; CHECK-NEXT: # %bb.0:
241 ; CHECK-NEXT: local.get 1
242 ; CHECK-NEXT: local.get 0
243 ; CHECK-NEXT: v128.store 0
244 ; CHECK-NEXT: # fallthrough-return
245 store <16 x i8> %v , ptr %p
249 define void @store_v16i8_with_folded_offset(<16 x i8> %v, ptr %p) {
250 ; CHECK-LABEL: store_v16i8_with_folded_offset:
251 ; CHECK: .functype store_v16i8_with_folded_offset (v128, i32) -> ()
252 ; CHECK-NEXT: # %bb.0:
253 ; CHECK-NEXT: local.get 1
254 ; CHECK-NEXT: local.get 0
255 ; CHECK-NEXT: v128.store 16
256 ; CHECK-NEXT: # fallthrough-return
257 %q = ptrtoint ptr %p to i32
258 %r = add nuw i32 %q, 16
259 %s = inttoptr i32 %r to ptr
260 store <16 x i8> %v , ptr %s
264 define void @store_v16i8_with_folded_gep_offset(<16 x i8> %v, ptr %p) {
265 ; CHECK-LABEL: store_v16i8_with_folded_gep_offset:
266 ; CHECK: .functype store_v16i8_with_folded_gep_offset (v128, i32) -> ()
267 ; CHECK-NEXT: # %bb.0:
268 ; CHECK-NEXT: local.get 1
269 ; CHECK-NEXT: local.get 0
270 ; CHECK-NEXT: v128.store 16
271 ; CHECK-NEXT: # fallthrough-return
272 %s = getelementptr inbounds <16 x i8>, ptr %p, i32 1
273 store <16 x i8> %v , ptr %s
277 define void @store_v16i8_with_unfolded_gep_negative_offset(<16 x i8> %v, ptr %p) {
278 ; CHECK-LABEL: store_v16i8_with_unfolded_gep_negative_offset:
279 ; CHECK: .functype store_v16i8_with_unfolded_gep_negative_offset (v128, i32) -> ()
280 ; CHECK-NEXT: # %bb.0:
281 ; CHECK-NEXT: local.get 1
282 ; CHECK-NEXT: i32.const -16
283 ; CHECK-NEXT: i32.add
284 ; CHECK-NEXT: local.get 0
285 ; CHECK-NEXT: v128.store 0
286 ; CHECK-NEXT: # fallthrough-return
287 %s = getelementptr inbounds <16 x i8>, ptr %p, i32 -1
288 store <16 x i8> %v , ptr %s
292 define void @store_v16i8_with_unfolded_offset(<16 x i8> %v, ptr %p) {
293 ; CHECK-LABEL: store_v16i8_with_unfolded_offset:
294 ; CHECK: .functype store_v16i8_with_unfolded_offset (v128, i32) -> ()
295 ; CHECK-NEXT: # %bb.0:
296 ; CHECK-NEXT: local.get 1
297 ; CHECK-NEXT: i32.const 16
298 ; CHECK-NEXT: i32.add
299 ; CHECK-NEXT: local.get 0
300 ; CHECK-NEXT: v128.store 0
301 ; CHECK-NEXT: # fallthrough-return
302 %q = ptrtoint ptr %p to i32
303 %r = add nsw i32 %q, 16
304 %s = inttoptr i32 %r to ptr
305 store <16 x i8> %v , ptr %s
309 define void @store_v16i8_with_unfolded_gep_offset(<16 x i8> %v, ptr %p) {
310 ; CHECK-LABEL: store_v16i8_with_unfolded_gep_offset:
311 ; CHECK: .functype store_v16i8_with_unfolded_gep_offset (v128, i32) -> ()
312 ; CHECK-NEXT: # %bb.0:
313 ; CHECK-NEXT: local.get 1
314 ; CHECK-NEXT: i32.const 16
315 ; CHECK-NEXT: i32.add
316 ; CHECK-NEXT: local.get 0
317 ; CHECK-NEXT: v128.store 0
318 ; CHECK-NEXT: # fallthrough-return
319 %s = getelementptr <16 x i8>, ptr %p, i32 1
320 store <16 x i8> %v , ptr %s
324 define void @store_v16i8_to_numeric_address(<16 x i8> %v) {
325 ; CHECK-LABEL: store_v16i8_to_numeric_address:
326 ; CHECK: .functype store_v16i8_to_numeric_address (v128) -> ()
327 ; CHECK-NEXT: # %bb.0:
328 ; CHECK-NEXT: i32.const 0
329 ; CHECK-NEXT: local.get 0
330 ; CHECK-NEXT: v128.store 32
331 ; CHECK-NEXT: # fallthrough-return
332 %s = inttoptr i32 32 to ptr
333 store <16 x i8> %v , ptr %s
337 define void @store_v16i8_to_global_address(<16 x i8> %v) {
338 ; CHECK-LABEL: store_v16i8_to_global_address:
339 ; CHECK: .functype store_v16i8_to_global_address (v128) -> ()
340 ; CHECK-NEXT: # %bb.0:
341 ; CHECK-NEXT: i32.const 0
342 ; CHECK-NEXT: local.get 0
343 ; CHECK-NEXT: v128.store gv_v16i8
344 ; CHECK-NEXT: # fallthrough-return
345 store <16 x i8> %v , ptr @gv_v16i8
349 ; ==============================================================================
351 ; ==============================================================================
352 define <8 x i16> @load_v8i16(ptr %p) {
353 ; CHECK-LABEL: load_v8i16:
354 ; CHECK: .functype load_v8i16 (i32) -> (v128)
355 ; CHECK-NEXT: # %bb.0:
356 ; CHECK-NEXT: local.get 0
357 ; CHECK-NEXT: v128.load 0
358 ; CHECK-NEXT: # fallthrough-return
359 %v = load <8 x i16>, ptr %p
363 define <8 x i16> @load_splat_v8i16(ptr %p) {
364 ; CHECK-LABEL: load_splat_v8i16:
365 ; CHECK: .functype load_splat_v8i16 (i32) -> (v128)
366 ; CHECK-NEXT: # %bb.0:
367 ; CHECK-NEXT: local.get 0
368 ; CHECK-NEXT: v128.load16_splat 0
369 ; CHECK-NEXT: # fallthrough-return
370 %e = load i16, ptr %p
371 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
372 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
376 define <8 x i16> @load_sext_v8i16(ptr %p) {
377 ; CHECK-LABEL: load_sext_v8i16:
378 ; CHECK: .functype load_sext_v8i16 (i32) -> (v128)
379 ; CHECK-NEXT: # %bb.0:
380 ; CHECK-NEXT: local.get 0
381 ; CHECK-NEXT: i16x8.load8x8_s 0
382 ; CHECK-NEXT: # fallthrough-return
383 %v = load <8 x i8>, ptr %p
384 %v2 = sext <8 x i8> %v to <8 x i16>
388 define <8 x i16> @load_zext_v8i16(ptr %p) {
389 ; CHECK-LABEL: load_zext_v8i16:
390 ; CHECK: .functype load_zext_v8i16 (i32) -> (v128)
391 ; CHECK-NEXT: # %bb.0:
392 ; CHECK-NEXT: local.get 0
393 ; CHECK-NEXT: i16x8.load8x8_u 0
394 ; CHECK-NEXT: # fallthrough-return
395 %v = load <8 x i8>, ptr %p
396 %v2 = zext <8 x i8> %v to <8 x i16>
400 define <8 x i8> @load_ext_v8i16(ptr %p) {
401 ; CHECK-LABEL: load_ext_v8i16:
402 ; CHECK: .functype load_ext_v8i16 (i32) -> (v128)
403 ; CHECK-NEXT: # %bb.0:
404 ; CHECK-NEXT: local.get 0
405 ; CHECK-NEXT: v128.load64_zero 0
406 ; CHECK-NEXT: # fallthrough-return
407 %v = load <8 x i8>, ptr %p
411 define <8 x i16> @load_v8i16_with_folded_offset(ptr %p) {
412 ; CHECK-LABEL: load_v8i16_with_folded_offset:
413 ; CHECK: .functype load_v8i16_with_folded_offset (i32) -> (v128)
414 ; CHECK-NEXT: # %bb.0:
415 ; CHECK-NEXT: local.get 0
416 ; CHECK-NEXT: v128.load 16
417 ; CHECK-NEXT: # fallthrough-return
418 %q = ptrtoint ptr %p to i32
419 %r = add nuw i32 %q, 16
420 %s = inttoptr i32 %r to ptr
421 %v = load <8 x i16>, ptr %s
425 define <8 x i16> @load_splat_v8i16_with_folded_offset(ptr %p) {
426 ; CHECK-LABEL: load_splat_v8i16_with_folded_offset:
427 ; CHECK: .functype load_splat_v8i16_with_folded_offset (i32) -> (v128)
428 ; CHECK-NEXT: # %bb.0:
429 ; CHECK-NEXT: local.get 0
430 ; CHECK-NEXT: v128.load16_splat 16
431 ; CHECK-NEXT: # fallthrough-return
432 %q = ptrtoint ptr %p to i32
433 %r = add nuw i32 %q, 16
434 %s = inttoptr i32 %r to ptr
435 %e = load i16, ptr %s
436 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
437 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
441 define <8 x i16> @load_sext_v8i16_with_folded_offset(ptr %p) {
442 ; CHECK-LABEL: load_sext_v8i16_with_folded_offset:
443 ; CHECK: .functype load_sext_v8i16_with_folded_offset (i32) -> (v128)
444 ; CHECK-NEXT: # %bb.0:
445 ; CHECK-NEXT: local.get 0
446 ; CHECK-NEXT: i16x8.load8x8_s 16
447 ; CHECK-NEXT: # fallthrough-return
448 %q = ptrtoint ptr %p to i32
449 %r = add nuw i32 %q, 16
450 %s = inttoptr i32 %r to ptr
451 %v = load <8 x i8>, ptr %s
452 %v2 = sext <8 x i8> %v to <8 x i16>
456 define <8 x i16> @load_zext_v8i16_with_folded_offset(ptr %p) {
457 ; CHECK-LABEL: load_zext_v8i16_with_folded_offset:
458 ; CHECK: .functype load_zext_v8i16_with_folded_offset (i32) -> (v128)
459 ; CHECK-NEXT: # %bb.0:
460 ; CHECK-NEXT: local.get 0
461 ; CHECK-NEXT: i16x8.load8x8_u 16
462 ; CHECK-NEXT: # fallthrough-return
463 %q = ptrtoint ptr %p to i32
464 %r = add nuw i32 %q, 16
465 %s = inttoptr i32 %r to ptr
466 %v = load <8 x i8>, ptr %s
467 %v2 = zext <8 x i8> %v to <8 x i16>
471 define <8 x i8> @load_ext_v8i16_with_folded_offset(ptr %p) {
472 ; CHECK-LABEL: load_ext_v8i16_with_folded_offset:
473 ; CHECK: .functype load_ext_v8i16_with_folded_offset (i32) -> (v128)
474 ; CHECK-NEXT: # %bb.0:
475 ; CHECK-NEXT: local.get 0
476 ; CHECK-NEXT: v128.load64_zero 16
477 ; CHECK-NEXT: # fallthrough-return
478 %q = ptrtoint ptr %p to i32
479 %r = add nuw i32 %q, 16
480 %s = inttoptr i32 %r to ptr
481 %v = load <8 x i8>, ptr %s
485 define <8 x i16> @load_v8i16_with_folded_gep_offset(ptr %p) {
486 ; CHECK-LABEL: load_v8i16_with_folded_gep_offset:
487 ; CHECK: .functype load_v8i16_with_folded_gep_offset (i32) -> (v128)
488 ; CHECK-NEXT: # %bb.0:
489 ; CHECK-NEXT: local.get 0
490 ; CHECK-NEXT: v128.load 16
491 ; CHECK-NEXT: # fallthrough-return
492 %s = getelementptr inbounds <8 x i16>, ptr %p, i32 1
493 %v = load <8 x i16>, ptr %s
497 define <8 x i16> @load_splat_v8i16_with_folded_gep_offset(ptr %p) {
498 ; CHECK-LABEL: load_splat_v8i16_with_folded_gep_offset:
499 ; CHECK: .functype load_splat_v8i16_with_folded_gep_offset (i32) -> (v128)
500 ; CHECK-NEXT: # %bb.0:
501 ; CHECK-NEXT: local.get 0
502 ; CHECK-NEXT: v128.load16_splat 2
503 ; CHECK-NEXT: # fallthrough-return
504 %s = getelementptr inbounds i16, ptr %p, i32 1
505 %e = load i16, ptr %s
506 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
507 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
511 define <8 x i16> @load_sext_v8i16_with_folded_gep_offset(ptr %p) {
512 ; CHECK-LABEL: load_sext_v8i16_with_folded_gep_offset:
513 ; CHECK: .functype load_sext_v8i16_with_folded_gep_offset (i32) -> (v128)
514 ; CHECK-NEXT: # %bb.0:
515 ; CHECK-NEXT: local.get 0
516 ; CHECK-NEXT: i16x8.load8x8_s 8
517 ; CHECK-NEXT: # fallthrough-return
518 %s = getelementptr inbounds <8 x i8>, ptr %p, i32 1
519 %v = load <8 x i8>, ptr %s
520 %v2 = sext <8 x i8> %v to <8 x i16>
524 define <8 x i16> @load_zext_v8i16_with_folded_gep_offset(ptr %p) {
525 ; CHECK-LABEL: load_zext_v8i16_with_folded_gep_offset:
526 ; CHECK: .functype load_zext_v8i16_with_folded_gep_offset (i32) -> (v128)
527 ; CHECK-NEXT: # %bb.0:
528 ; CHECK-NEXT: local.get 0
529 ; CHECK-NEXT: i16x8.load8x8_u 8
530 ; CHECK-NEXT: # fallthrough-return
531 %s = getelementptr inbounds <8 x i8>, ptr %p, i32 1
532 %v = load <8 x i8>, ptr %s
533 %v2 = zext <8 x i8> %v to <8 x i16>
537 define <8 x i8> @load_ext_v8i16_with_folded_gep_offset(ptr %p) {
538 ; CHECK-LABEL: load_ext_v8i16_with_folded_gep_offset:
539 ; CHECK: .functype load_ext_v8i16_with_folded_gep_offset (i32) -> (v128)
540 ; CHECK-NEXT: # %bb.0:
541 ; CHECK-NEXT: local.get 0
542 ; CHECK-NEXT: v128.load64_zero 8
543 ; CHECK-NEXT: # fallthrough-return
544 %s = getelementptr inbounds <8 x i8>, ptr %p, i32 1
545 %v = load <8 x i8>, ptr %s
549 define <8 x i16> @load_v8i16_with_unfolded_gep_negative_offset(ptr %p) {
550 ; CHECK-LABEL: load_v8i16_with_unfolded_gep_negative_offset:
551 ; CHECK: .functype load_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
552 ; CHECK-NEXT: # %bb.0:
553 ; CHECK-NEXT: local.get 0
554 ; CHECK-NEXT: i32.const -16
555 ; CHECK-NEXT: i32.add
556 ; CHECK-NEXT: v128.load 0
557 ; CHECK-NEXT: # fallthrough-return
558 %s = getelementptr inbounds <8 x i16>, ptr %p, i32 -1
559 %v = load <8 x i16>, ptr %s
563 define <8 x i16> @load_splat_v8i16_with_unfolded_gep_negative_offset(ptr %p) {
564 ; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_negative_offset:
565 ; CHECK: .functype load_splat_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
566 ; CHECK-NEXT: # %bb.0:
567 ; CHECK-NEXT: local.get 0
568 ; CHECK-NEXT: i32.const -2
569 ; CHECK-NEXT: i32.add
570 ; CHECK-NEXT: v128.load16_splat 0
571 ; CHECK-NEXT: # fallthrough-return
572 %s = getelementptr inbounds i16, ptr %p, i32 -1
573 %e = load i16, ptr %s
574 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
575 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
579 define <8 x i16> @load_sext_v8i16_with_unfolded_gep_negative_offset(ptr %p) {
580 ; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_negative_offset:
581 ; CHECK: .functype load_sext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
582 ; CHECK-NEXT: # %bb.0:
583 ; CHECK-NEXT: local.get 0
584 ; CHECK-NEXT: i32.const -8
585 ; CHECK-NEXT: i32.add
586 ; CHECK-NEXT: i16x8.load8x8_s 0
587 ; CHECK-NEXT: # fallthrough-return
588 %s = getelementptr inbounds <8 x i8>, ptr %p, i32 -1
589 %v = load <8 x i8>, ptr %s
590 %v2 = sext <8 x i8> %v to <8 x i16>
594 define <8 x i16> @load_zext_v8i16_with_unfolded_gep_negative_offset(ptr %p) {
595 ; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_negative_offset:
596 ; CHECK: .functype load_zext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
597 ; CHECK-NEXT: # %bb.0:
598 ; CHECK-NEXT: local.get 0
599 ; CHECK-NEXT: i32.const -8
600 ; CHECK-NEXT: i32.add
601 ; CHECK-NEXT: i16x8.load8x8_u 0
602 ; CHECK-NEXT: # fallthrough-return
603 %s = getelementptr inbounds <8 x i8>, ptr %p, i32 -1
604 %v = load <8 x i8>, ptr %s
605 %v2 = zext <8 x i8> %v to <8 x i16>
609 define <8 x i8> @load_ext_v8i16_with_unfolded_gep_negative_offset(ptr %p) {
610 ; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_negative_offset:
611 ; CHECK: .functype load_ext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
612 ; CHECK-NEXT: # %bb.0:
613 ; CHECK-NEXT: local.get 0
614 ; CHECK-NEXT: i32.const -8
615 ; CHECK-NEXT: i32.add
616 ; CHECK-NEXT: v128.load64_zero 0
617 ; CHECK-NEXT: # fallthrough-return
618 %s = getelementptr inbounds <8 x i8>, ptr %p, i32 -1
619 %v = load <8 x i8>, ptr %s
623 define <8 x i16> @load_v8i16_with_unfolded_offset(ptr %p) {
624 ; CHECK-LABEL: load_v8i16_with_unfolded_offset:
625 ; CHECK: .functype load_v8i16_with_unfolded_offset (i32) -> (v128)
626 ; CHECK-NEXT: # %bb.0:
627 ; CHECK-NEXT: local.get 0
628 ; CHECK-NEXT: i32.const 16
629 ; CHECK-NEXT: i32.add
630 ; CHECK-NEXT: v128.load 0
631 ; CHECK-NEXT: # fallthrough-return
632 %q = ptrtoint ptr %p to i32
633 %r = add nsw i32 %q, 16
634 %s = inttoptr i32 %r to ptr
635 %v = load <8 x i16>, ptr %s
639 define <8 x i16> @load_splat_v8i16_with_unfolded_offset(ptr %p) {
640 ; CHECK-LABEL: load_splat_v8i16_with_unfolded_offset:
641 ; CHECK: .functype load_splat_v8i16_with_unfolded_offset (i32) -> (v128)
642 ; CHECK-NEXT: # %bb.0:
643 ; CHECK-NEXT: local.get 0
644 ; CHECK-NEXT: i32.const 16
645 ; CHECK-NEXT: i32.add
646 ; CHECK-NEXT: v128.load16_splat 0
647 ; CHECK-NEXT: # fallthrough-return
648 %q = ptrtoint ptr %p to i32
649 %r = add nsw i32 %q, 16
650 %s = inttoptr i32 %r to ptr
651 %e = load i16, ptr %s
652 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
653 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
657 define <8 x i16> @load_sext_v8i16_with_unfolded_offset(ptr %p) {
658 ; CHECK-LABEL: load_sext_v8i16_with_unfolded_offset:
659 ; CHECK: .functype load_sext_v8i16_with_unfolded_offset (i32) -> (v128)
660 ; CHECK-NEXT: # %bb.0:
661 ; CHECK-NEXT: local.get 0
662 ; CHECK-NEXT: i32.const 16
663 ; CHECK-NEXT: i32.add
664 ; CHECK-NEXT: i16x8.load8x8_s 0
665 ; CHECK-NEXT: # fallthrough-return
666 %q = ptrtoint ptr %p to i32
667 %r = add nsw i32 %q, 16
668 %s = inttoptr i32 %r to ptr
669 %v = load <8 x i8>, ptr %s
670 %v2 = sext <8 x i8> %v to <8 x i16>
674 define <8 x i16> @load_zext_v8i16_with_unfolded_offset(ptr %p) {
675 ; CHECK-LABEL: load_zext_v8i16_with_unfolded_offset:
676 ; CHECK: .functype load_zext_v8i16_with_unfolded_offset (i32) -> (v128)
677 ; CHECK-NEXT: # %bb.0:
678 ; CHECK-NEXT: local.get 0
679 ; CHECK-NEXT: i32.const 16
680 ; CHECK-NEXT: i32.add
681 ; CHECK-NEXT: i16x8.load8x8_u 0
682 ; CHECK-NEXT: # fallthrough-return
683 %q = ptrtoint ptr %p to i32
684 %r = add nsw i32 %q, 16
685 %s = inttoptr i32 %r to ptr
686 %v = load <8 x i8>, ptr %s
687 %v2 = zext <8 x i8> %v to <8 x i16>
691 define <8 x i8> @load_ext_v8i16_with_unfolded_offset(ptr %p) {
692 ; CHECK-LABEL: load_ext_v8i16_with_unfolded_offset:
693 ; CHECK: .functype load_ext_v8i16_with_unfolded_offset (i32) -> (v128)
694 ; CHECK-NEXT: # %bb.0:
695 ; CHECK-NEXT: local.get 0
696 ; CHECK-NEXT: i32.const 16
697 ; CHECK-NEXT: i32.add
698 ; CHECK-NEXT: v128.load64_zero 0
699 ; CHECK-NEXT: # fallthrough-return
700 %q = ptrtoint ptr %p to i32
701 %r = add nsw i32 %q, 16
702 %s = inttoptr i32 %r to ptr
703 %v = load <8 x i8>, ptr %s
707 define <8 x i16> @load_v8i16_with_unfolded_gep_offset(ptr %p) {
708 ; CHECK-LABEL: load_v8i16_with_unfolded_gep_offset:
709 ; CHECK: .functype load_v8i16_with_unfolded_gep_offset (i32) -> (v128)
710 ; CHECK-NEXT: # %bb.0:
711 ; CHECK-NEXT: local.get 0
712 ; CHECK-NEXT: i32.const 16
713 ; CHECK-NEXT: i32.add
714 ; CHECK-NEXT: v128.load 0
715 ; CHECK-NEXT: # fallthrough-return
716 %s = getelementptr <8 x i16>, ptr %p, i32 1
717 %v = load <8 x i16>, ptr %s
721 define <8 x i16> @load_splat_v8i16_with_unfolded_gep_offset(ptr %p) {
722 ; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_offset:
723 ; CHECK: .functype load_splat_v8i16_with_unfolded_gep_offset (i32) -> (v128)
724 ; CHECK-NEXT: # %bb.0:
725 ; CHECK-NEXT: local.get 0
726 ; CHECK-NEXT: i32.const 2
727 ; CHECK-NEXT: i32.add
728 ; CHECK-NEXT: v128.load16_splat 0
729 ; CHECK-NEXT: # fallthrough-return
730 %s = getelementptr i16, ptr %p, i32 1
731 %e = load i16, ptr %s
732 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
733 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
737 define <8 x i16> @load_sext_v8i16_with_unfolded_gep_offset(ptr %p) {
738 ; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_offset:
739 ; CHECK: .functype load_sext_v8i16_with_unfolded_gep_offset (i32) -> (v128)
740 ; CHECK-NEXT: # %bb.0:
741 ; CHECK-NEXT: local.get 0
742 ; CHECK-NEXT: i32.const 8
743 ; CHECK-NEXT: i32.add
744 ; CHECK-NEXT: i16x8.load8x8_s 0
745 ; CHECK-NEXT: # fallthrough-return
746 %s = getelementptr <8 x i8>, ptr %p, i32 1
747 %v = load <8 x i8>, ptr %s
748 %v2 = sext <8 x i8> %v to <8 x i16>
752 define <8 x i16> @load_zext_v8i16_with_unfolded_gep_offset(ptr %p) {
753 ; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_offset:
754 ; CHECK: .functype load_zext_v8i16_with_unfolded_gep_offset (i32) -> (v128)
755 ; CHECK-NEXT: # %bb.0:
756 ; CHECK-NEXT: local.get 0
757 ; CHECK-NEXT: i32.const 8
758 ; CHECK-NEXT: i32.add
759 ; CHECK-NEXT: i16x8.load8x8_u 0
760 ; CHECK-NEXT: # fallthrough-return
761 %s = getelementptr <8 x i8>, ptr %p, i32 1
762 %v = load <8 x i8>, ptr %s
763 %v2 = zext <8 x i8> %v to <8 x i16>
767 define <8 x i8> @load_ext_v8i16_with_unfolded_gep_offset(ptr %p) {
768 ; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_offset:
769 ; CHECK: .functype load_ext_v8i16_with_unfolded_gep_offset (i32) -> (v128)
770 ; CHECK-NEXT: # %bb.0:
771 ; CHECK-NEXT: local.get 0
772 ; CHECK-NEXT: i32.const 8
773 ; CHECK-NEXT: i32.add
774 ; CHECK-NEXT: v128.load64_zero 0
775 ; CHECK-NEXT: # fallthrough-return
776 %s = getelementptr <8 x i8>, ptr %p, i32 1
777 %v = load <8 x i8>, ptr %s
781 define <8 x i16> @load_v8i16_from_numeric_address() {
782 ; CHECK-LABEL: load_v8i16_from_numeric_address:
783 ; CHECK: .functype load_v8i16_from_numeric_address () -> (v128)
784 ; CHECK-NEXT: # %bb.0:
785 ; CHECK-NEXT: i32.const 0
786 ; CHECK-NEXT: v128.load 32
787 ; CHECK-NEXT: # fallthrough-return
788 %s = inttoptr i32 32 to ptr
789 %v = load <8 x i16>, ptr %s
793 define <8 x i16> @load_splat_v8i16_from_numeric_address() {
794 ; CHECK-LABEL: load_splat_v8i16_from_numeric_address:
795 ; CHECK: .functype load_splat_v8i16_from_numeric_address () -> (v128)
796 ; CHECK-NEXT: # %bb.0:
797 ; CHECK-NEXT: i32.const 0
798 ; CHECK-NEXT: v128.load16_splat 32
799 ; CHECK-NEXT: # fallthrough-return
800 %s = inttoptr i32 32 to ptr
801 %e = load i16, ptr %s
802 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
803 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
807 define <8 x i16> @load_sext_v8i16_from_numeric_address() {
808 ; CHECK-LABEL: load_sext_v8i16_from_numeric_address:
809 ; CHECK: .functype load_sext_v8i16_from_numeric_address () -> (v128)
810 ; CHECK-NEXT: # %bb.0:
811 ; CHECK-NEXT: i32.const 0
812 ; CHECK-NEXT: i16x8.load8x8_s 32
813 ; CHECK-NEXT: # fallthrough-return
814 %s = inttoptr i32 32 to ptr
815 %v = load <8 x i8>, ptr %s
816 %v2 = sext <8 x i8> %v to <8 x i16>
820 define <8 x i16> @load_zext_v8i16_from_numeric_address() {
821 ; CHECK-LABEL: load_zext_v8i16_from_numeric_address:
822 ; CHECK: .functype load_zext_v8i16_from_numeric_address () -> (v128)
823 ; CHECK-NEXT: # %bb.0:
824 ; CHECK-NEXT: i32.const 0
825 ; CHECK-NEXT: i16x8.load8x8_u 32
826 ; CHECK-NEXT: # fallthrough-return
827 %s = inttoptr i32 32 to ptr
828 %v = load <8 x i8>, ptr %s
829 %v2 = zext <8 x i8> %v to <8 x i16>
833 define <8 x i8> @load_ext_v8i16_from_numeric_address() {
834 ; CHECK-LABEL: load_ext_v8i16_from_numeric_address:
835 ; CHECK: .functype load_ext_v8i16_from_numeric_address () -> (v128)
836 ; CHECK-NEXT: # %bb.0:
837 ; CHECK-NEXT: i32.const 0
838 ; CHECK-NEXT: v128.load64_zero 32
839 ; CHECK-NEXT: # fallthrough-return
840 %s = inttoptr i32 32 to ptr
841 %v = load <8 x i8>, ptr %s
845 @gv_v8i16 = global <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
846 define <8 x i16> @load_v8i16_from_global_address() {
847 ; CHECK-LABEL: load_v8i16_from_global_address:
848 ; CHECK: .functype load_v8i16_from_global_address () -> (v128)
849 ; CHECK-NEXT: # %bb.0:
850 ; CHECK-NEXT: i32.const 0
851 ; CHECK-NEXT: v128.load gv_v8i16
852 ; CHECK-NEXT: # fallthrough-return
853 %v = load <8 x i16>, ptr @gv_v8i16
857 @gv_i16 = global i16 42
858 define <8 x i16> @load_splat_v8i16_from_global_address() {
859 ; CHECK-LABEL: load_splat_v8i16_from_global_address:
860 ; CHECK: .functype load_splat_v8i16_from_global_address () -> (v128)
861 ; CHECK-NEXT: # %bb.0:
862 ; CHECK-NEXT: i32.const 0
863 ; CHECK-NEXT: v128.load16_splat gv_i16
864 ; CHECK-NEXT: # fallthrough-return
865 %e = load i16, ptr @gv_i16
866 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
867 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
871 @gv_v8i8 = global <8 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
872 define <8 x i16> @load_sext_v8i16_from_global_address() {
873 ; CHECK-LABEL: load_sext_v8i16_from_global_address:
874 ; CHECK: .functype load_sext_v8i16_from_global_address () -> (v128)
875 ; CHECK-NEXT: # %bb.0:
876 ; CHECK-NEXT: i32.const 0
877 ; CHECK-NEXT: i16x8.load8x8_s gv_v8i8
878 ; CHECK-NEXT: # fallthrough-return
879 %v = load <8 x i8>, ptr @gv_v8i8
880 %v2 = sext <8 x i8> %v to <8 x i16>
884 define <8 x i16> @load_zext_v8i16_from_global_address() {
885 ; CHECK-LABEL: load_zext_v8i16_from_global_address:
886 ; CHECK: .functype load_zext_v8i16_from_global_address () -> (v128)
887 ; CHECK-NEXT: # %bb.0:
888 ; CHECK-NEXT: i32.const 0
889 ; CHECK-NEXT: i16x8.load8x8_u gv_v8i8
890 ; CHECK-NEXT: # fallthrough-return
891 %v = load <8 x i8>, ptr @gv_v8i8
892 %v2 = zext <8 x i8> %v to <8 x i16>
896 define <8 x i8> @load_ext_v8i16_from_global_address() {
897 ; CHECK-LABEL: load_ext_v8i16_from_global_address:
898 ; CHECK: .functype load_ext_v8i16_from_global_address () -> (v128)
899 ; CHECK-NEXT: # %bb.0:
900 ; CHECK-NEXT: i32.const 0
901 ; CHECK-NEXT: v128.load64_zero gv_v8i8
902 ; CHECK-NEXT: # fallthrough-return
903 %v = load <8 x i8>, ptr @gv_v8i8
908 define void @store_v8i16(<8 x i16> %v, ptr %p) {
909 ; CHECK-LABEL: store_v8i16:
910 ; CHECK: .functype store_v8i16 (v128, i32) -> ()
911 ; CHECK-NEXT: # %bb.0:
912 ; CHECK-NEXT: local.get 1
913 ; CHECK-NEXT: local.get 0
914 ; CHECK-NEXT: v128.store 0
915 ; CHECK-NEXT: # fallthrough-return
916 store <8 x i16> %v , ptr %p
920 define void @store_narrowing_v8i16(<8 x i8> %v, ptr %p) {
921 ; CHECK-LABEL: store_narrowing_v8i16:
922 ; CHECK: .functype store_narrowing_v8i16 (v128, i32) -> ()
923 ; CHECK-NEXT: # %bb.0:
924 ; CHECK-NEXT: local.get 1
925 ; CHECK-NEXT: local.get 0
926 ; CHECK-NEXT: v128.store64_lane 0, 0
927 ; CHECK-NEXT: # fallthrough-return
928 store <8 x i8> %v, ptr %p
932 define void @store_v8i16_with_folded_offset(<8 x i16> %v, ptr %p) {
933 ; CHECK-LABEL: store_v8i16_with_folded_offset:
934 ; CHECK: .functype store_v8i16_with_folded_offset (v128, i32) -> ()
935 ; CHECK-NEXT: # %bb.0:
936 ; CHECK-NEXT: local.get 1
937 ; CHECK-NEXT: local.get 0
938 ; CHECK-NEXT: v128.store 16
939 ; CHECK-NEXT: # fallthrough-return
940 %q = ptrtoint ptr %p to i32
941 %r = add nuw i32 %q, 16
942 %s = inttoptr i32 %r to ptr
943 store <8 x i16> %v , ptr %s
947 define void @store_narrowing_v8i16_with_folded_offset(<8 x i8> %v, ptr %p) {
948 ; CHECK-LABEL: store_narrowing_v8i16_with_folded_offset:
949 ; CHECK: .functype store_narrowing_v8i16_with_folded_offset (v128, i32) -> ()
950 ; CHECK-NEXT: # %bb.0:
951 ; CHECK-NEXT: local.get 1
952 ; CHECK-NEXT: local.get 0
953 ; CHECK-NEXT: v128.store64_lane 16, 0
954 ; CHECK-NEXT: # fallthrough-return
955 %q = ptrtoint ptr %p to i32
956 %r = add nuw i32 %q, 16
957 %s = inttoptr i32 %r to ptr
958 store <8 x i8> %v , ptr %s
962 define void @store_v8i16_with_folded_gep_offset(<8 x i16> %v, ptr %p) {
963 ; CHECK-LABEL: store_v8i16_with_folded_gep_offset:
964 ; CHECK: .functype store_v8i16_with_folded_gep_offset (v128, i32) -> ()
965 ; CHECK-NEXT: # %bb.0:
966 ; CHECK-NEXT: local.get 1
967 ; CHECK-NEXT: local.get 0
968 ; CHECK-NEXT: v128.store 16
969 ; CHECK-NEXT: # fallthrough-return
970 %s = getelementptr inbounds <8 x i16>, ptr %p, i32 1
971 store <8 x i16> %v , ptr %s
975 define void @store_narrowing_v8i16_with_folded_gep_offset(<8 x i8> %v, ptr %p) {
976 ; CHECK-LABEL: store_narrowing_v8i16_with_folded_gep_offset:
977 ; CHECK: .functype store_narrowing_v8i16_with_folded_gep_offset (v128, i32) -> ()
978 ; CHECK-NEXT: # %bb.0:
979 ; CHECK-NEXT: local.get 1
980 ; CHECK-NEXT: local.get 0
981 ; CHECK-NEXT: v128.store64_lane 8, 0
982 ; CHECK-NEXT: # fallthrough-return
983 %s = getelementptr inbounds <8 x i8>, ptr %p, i32 1
984 store <8 x i8> %v , ptr %s
988 define void @store_v8i16_with_unfolded_gep_negative_offset(<8 x i16> %v, ptr %p) {
989 ; CHECK-LABEL: store_v8i16_with_unfolded_gep_negative_offset:
990 ; CHECK: .functype store_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> ()
991 ; CHECK-NEXT: # %bb.0:
992 ; CHECK-NEXT: local.get 1
993 ; CHECK-NEXT: i32.const -16
994 ; CHECK-NEXT: i32.add
995 ; CHECK-NEXT: local.get 0
996 ; CHECK-NEXT: v128.store 0
997 ; CHECK-NEXT: # fallthrough-return
998 %s = getelementptr inbounds <8 x i16>, ptr %p, i32 -1
999 store <8 x i16> %v , ptr %s
1003 define void @store_narrowing_v8i16_with_unfolded_gep_negative_offset(<8 x i8> %v, ptr %p) {
1004 ; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_gep_negative_offset:
1005 ; CHECK: .functype store_narrowing_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> ()
1006 ; CHECK-NEXT: # %bb.0:
1007 ; CHECK-NEXT: local.get 1
1008 ; CHECK-NEXT: i32.const -8
1009 ; CHECK-NEXT: i32.add
1010 ; CHECK-NEXT: local.get 0
1011 ; CHECK-NEXT: v128.store64_lane 0, 0
1012 ; CHECK-NEXT: # fallthrough-return
1013 %s = getelementptr inbounds <8 x i8>, ptr %p, i32 -1
1014 store <8 x i8> %v , ptr %s
1018 define void @store_v8i16_with_unfolded_offset(<8 x i16> %v, ptr %p) {
1019 ; CHECK-LABEL: store_v8i16_with_unfolded_offset:
1020 ; CHECK: .functype store_v8i16_with_unfolded_offset (v128, i32) -> ()
1021 ; CHECK-NEXT: # %bb.0:
1022 ; CHECK-NEXT: local.get 1
1023 ; CHECK-NEXT: i32.const 16
1024 ; CHECK-NEXT: i32.add
1025 ; CHECK-NEXT: local.get 0
1026 ; CHECK-NEXT: v128.store 0
1027 ; CHECK-NEXT: # fallthrough-return
1028 %q = ptrtoint ptr %p to i32
1029 %r = add nsw i32 %q, 16
1030 %s = inttoptr i32 %r to ptr
1031 store <8 x i16> %v , ptr %s
1035 define void @store_narrowing_v8i16_with_unfolded_offset(<8 x i8> %v, ptr %p) {
1036 ; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_offset:
1037 ; CHECK: .functype store_narrowing_v8i16_with_unfolded_offset (v128, i32) -> ()
1038 ; CHECK-NEXT: # %bb.0:
1039 ; CHECK-NEXT: local.get 1
1040 ; CHECK-NEXT: i32.const 16
1041 ; CHECK-NEXT: i32.add
1042 ; CHECK-NEXT: local.get 0
1043 ; CHECK-NEXT: v128.store64_lane 0, 0
1044 ; CHECK-NEXT: # fallthrough-return
1045 %q = ptrtoint ptr %p to i32
1046 %r = add nsw i32 %q, 16
1047 %s = inttoptr i32 %r to ptr
1048 store <8 x i8> %v , ptr %s
1052 define void @store_v8i16_with_unfolded_gep_offset(<8 x i16> %v, ptr %p) {
1053 ; CHECK-LABEL: store_v8i16_with_unfolded_gep_offset:
1054 ; CHECK: .functype store_v8i16_with_unfolded_gep_offset (v128, i32) -> ()
1055 ; CHECK-NEXT: # %bb.0:
1056 ; CHECK-NEXT: local.get 1
1057 ; CHECK-NEXT: i32.const 16
1058 ; CHECK-NEXT: i32.add
1059 ; CHECK-NEXT: local.get 0
1060 ; CHECK-NEXT: v128.store 0
1061 ; CHECK-NEXT: # fallthrough-return
1062 %s = getelementptr <8 x i16>, ptr %p, i32 1
1063 store <8 x i16> %v , ptr %s
1067 define void @store_narrowing_v8i16_with_unfolded_gep_offset(<8 x i8> %v, ptr %p) {
1068 ; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_gep_offset:
1069 ; CHECK: .functype store_narrowing_v8i16_with_unfolded_gep_offset (v128, i32) -> ()
1070 ; CHECK-NEXT: # %bb.0:
1071 ; CHECK-NEXT: local.get 1
1072 ; CHECK-NEXT: i32.const 8
1073 ; CHECK-NEXT: i32.add
1074 ; CHECK-NEXT: local.get 0
1075 ; CHECK-NEXT: v128.store64_lane 0, 0
1076 ; CHECK-NEXT: # fallthrough-return
1077 %s = getelementptr <8 x i8>, ptr %p, i32 1
1078 store <8 x i8> %v , ptr %s
1082 define void @store_v8i16_to_numeric_address(<8 x i16> %v) {
1083 ; CHECK-LABEL: store_v8i16_to_numeric_address:
1084 ; CHECK: .functype store_v8i16_to_numeric_address (v128) -> ()
1085 ; CHECK-NEXT: # %bb.0:
1086 ; CHECK-NEXT: i32.const 0
1087 ; CHECK-NEXT: local.get 0
1088 ; CHECK-NEXT: v128.store 32
1089 ; CHECK-NEXT: # fallthrough-return
1090 %s = inttoptr i32 32 to ptr
1091 store <8 x i16> %v , ptr %s
1095 define void @store_narrowing_v8i16_to_numeric_address(<8 x i8> %v, ptr %p) {
1096 ; CHECK-LABEL: store_narrowing_v8i16_to_numeric_address:
1097 ; CHECK: .functype store_narrowing_v8i16_to_numeric_address (v128, i32) -> ()
1098 ; CHECK-NEXT: # %bb.0:
1099 ; CHECK-NEXT: i32.const 0
1100 ; CHECK-NEXT: local.get 0
1101 ; CHECK-NEXT: v128.store64_lane 32, 0
1102 ; CHECK-NEXT: # fallthrough-return
1103 %s = inttoptr i32 32 to ptr
1104 store <8 x i8> %v , ptr %s
1108 define void @store_v8i16_to_global_address(<8 x i16> %v) {
1109 ; CHECK-LABEL: store_v8i16_to_global_address:
1110 ; CHECK: .functype store_v8i16_to_global_address (v128) -> ()
1111 ; CHECK-NEXT: # %bb.0:
1112 ; CHECK-NEXT: i32.const 0
1113 ; CHECK-NEXT: local.get 0
1114 ; CHECK-NEXT: v128.store gv_v8i16
1115 ; CHECK-NEXT: # fallthrough-return
1116 store <8 x i16> %v , ptr @gv_v8i16
1120 define void @store_narrowing_v8i16_to_global_address(<8 x i8> %v) {
1121 ; CHECK-LABEL: store_narrowing_v8i16_to_global_address:
1122 ; CHECK: .functype store_narrowing_v8i16_to_global_address (v128) -> ()
1123 ; CHECK-NEXT: # %bb.0:
1124 ; CHECK-NEXT: i32.const 0
1125 ; CHECK-NEXT: local.get 0
1126 ; CHECK-NEXT: v128.store64_lane gv_v8i8, 0
1127 ; CHECK-NEXT: # fallthrough-return
1128 store <8 x i8> %v , ptr @gv_v8i8
1132 ; ==============================================================================
1134 ; ==============================================================================
1135 define <4 x i32> @load_v4i32(ptr %p) {
1136 ; CHECK-LABEL: load_v4i32:
1137 ; CHECK: .functype load_v4i32 (i32) -> (v128)
1138 ; CHECK-NEXT: # %bb.0:
1139 ; CHECK-NEXT: local.get 0
1140 ; CHECK-NEXT: v128.load 0
1141 ; CHECK-NEXT: # fallthrough-return
1142 %v = load <4 x i32>, ptr %p
1146 define <4 x i32> @load_splat_v4i32(ptr %addr) {
1147 ; CHECK-LABEL: load_splat_v4i32:
1148 ; CHECK: .functype load_splat_v4i32 (i32) -> (v128)
1149 ; CHECK-NEXT: # %bb.0:
1150 ; CHECK-NEXT: local.get 0
1151 ; CHECK-NEXT: v128.load32_splat 0
1152 ; CHECK-NEXT: # fallthrough-return
1153 %e = load i32, ptr %addr, align 4
1154 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1155 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1159 define <4 x i32> @load_sext_v4i16_to_v4i32(ptr %p) {
1160 ; CHECK-LABEL: load_sext_v4i16_to_v4i32:
1161 ; CHECK: .functype load_sext_v4i16_to_v4i32 (i32) -> (v128)
1162 ; CHECK-NEXT: # %bb.0:
1163 ; CHECK-NEXT: local.get 0
1164 ; CHECK-NEXT: i32x4.load16x4_s 0
1165 ; CHECK-NEXT: # fallthrough-return
1166 %v = load <4 x i16>, ptr %p
1167 %v2 = sext <4 x i16> %v to <4 x i32>
1171 define <4 x i32> @load_zext_v4i16_to_v4i32(ptr %p) {
1172 ; CHECK-LABEL: load_zext_v4i16_to_v4i32:
1173 ; CHECK: .functype load_zext_v4i16_to_v4i32 (i32) -> (v128)
1174 ; CHECK-NEXT: # %bb.0:
1175 ; CHECK-NEXT: local.get 0
1176 ; CHECK-NEXT: i32x4.load16x4_u 0
1177 ; CHECK-NEXT: # fallthrough-return
1178 %v = load <4 x i16>, ptr %p
1179 %v2 = zext <4 x i16> %v to <4 x i32>
1183 define <4 x i32> @load_sext_v4i8_to_v4i32(ptr %p) {
1184 ; CHECK-LABEL: load_sext_v4i8_to_v4i32:
1185 ; CHECK: .functype load_sext_v4i8_to_v4i32 (i32) -> (v128)
1186 ; CHECK-NEXT: # %bb.0:
1187 ; CHECK-NEXT: local.get 0
1188 ; CHECK-NEXT: v128.load32_zero 0
1189 ; CHECK-NEXT: i16x8.extend_low_i8x16_s
1190 ; CHECK-NEXT: i32x4.extend_low_i16x8_s
1191 ; CHECK-NEXT: # fallthrough-return
1192 %v = load <4 x i8>, ptr %p
1193 %v2 = sext <4 x i8> %v to <4 x i32>
1197 define <4 x i32> @load_zext_v4i8_to_v4i32(ptr %p) {
1198 ; CHECK-LABEL: load_zext_v4i8_to_v4i32:
1199 ; CHECK: .functype load_zext_v4i8_to_v4i32 (i32) -> (v128)
1200 ; CHECK-NEXT: # %bb.0:
1201 ; CHECK-NEXT: local.get 0
1202 ; CHECK-NEXT: v128.load32_zero 0
1203 ; CHECK-NEXT: i16x8.extend_low_i8x16_u
1204 ; CHECK-NEXT: i32x4.extend_low_i16x8_u
1205 ; CHECK-NEXT: # fallthrough-return
1206 %v = load <4 x i8>, ptr %p
1207 %v2 = zext <4 x i8> %v to <4 x i32>
1211 define <4 x i16> @load_ext_v4i32(ptr %p) {
1212 ; CHECK-LABEL: load_ext_v4i32:
1213 ; CHECK: .functype load_ext_v4i32 (i32) -> (v128)
1214 ; CHECK-NEXT: # %bb.0:
1215 ; CHECK-NEXT: local.get 0
1216 ; CHECK-NEXT: v128.load64_zero 0
1217 ; CHECK-NEXT: # fallthrough-return
1218 %v = load <4 x i16>, ptr %p
1222 define <4 x i32> @load_v4i32_with_folded_offset(ptr %p) {
1223 ; CHECK-LABEL: load_v4i32_with_folded_offset:
1224 ; CHECK: .functype load_v4i32_with_folded_offset (i32) -> (v128)
1225 ; CHECK-NEXT: # %bb.0:
1226 ; CHECK-NEXT: local.get 0
1227 ; CHECK-NEXT: v128.load 16
1228 ; CHECK-NEXT: # fallthrough-return
1229 %q = ptrtoint ptr %p to i32
1230 %r = add nuw i32 %q, 16
1231 %s = inttoptr i32 %r to ptr
1232 %v = load <4 x i32>, ptr %s
1236 define <4 x i32> @load_splat_v4i32_with_folded_offset(ptr %p) {
1237 ; CHECK-LABEL: load_splat_v4i32_with_folded_offset:
1238 ; CHECK: .functype load_splat_v4i32_with_folded_offset (i32) -> (v128)
1239 ; CHECK-NEXT: # %bb.0:
1240 ; CHECK-NEXT: local.get 0
1241 ; CHECK-NEXT: v128.load32_splat 16
1242 ; CHECK-NEXT: # fallthrough-return
1243 %q = ptrtoint ptr %p to i32
1244 %r = add nuw i32 %q, 16
1245 %s = inttoptr i32 %r to ptr
1246 %e = load i32, ptr %s
1247 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1248 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1252 define <4 x i32> @load_sext_v4i16_to_v4i32_with_folded_offset(ptr %p) {
1253 ; CHECK-LABEL: load_sext_v4i16_to_v4i32_with_folded_offset:
1254 ; CHECK: .functype load_sext_v4i16_to_v4i32_with_folded_offset (i32) -> (v128)
1255 ; CHECK-NEXT: # %bb.0:
1256 ; CHECK-NEXT: local.get 0
1257 ; CHECK-NEXT: i32x4.load16x4_s 16
1258 ; CHECK-NEXT: # fallthrough-return
1259 %q = ptrtoint ptr %p to i32
1260 %r = add nuw i32 %q, 16
1261 %s = inttoptr i32 %r to ptr
1262 %v = load <4 x i16>, ptr %s
1263 %v2 = sext <4 x i16> %v to <4 x i32>
1267 define <4 x i32> @load_zext_v4i16_to_v4i32_with_folded_offset(ptr %p) {
1268 ; CHECK-LABEL: load_zext_v4i16_to_v4i32_with_folded_offset:
1269 ; CHECK: .functype load_zext_v4i16_to_v4i32_with_folded_offset (i32) -> (v128)
1270 ; CHECK-NEXT: # %bb.0:
1271 ; CHECK-NEXT: local.get 0
1272 ; CHECK-NEXT: i32x4.load16x4_u 16
1273 ; CHECK-NEXT: # fallthrough-return
1274 %q = ptrtoint ptr %p to i32
1275 %r = add nuw i32 %q, 16
1276 %s = inttoptr i32 %r to ptr
1277 %v = load <4 x i16>, ptr %s
1278 %v2 = zext <4 x i16> %v to <4 x i32>
1282 define <4 x i32> @load_sext_v4i8_to_v4i32_with_folded_offset(ptr %p) {
1283 ; CHECK-LABEL: load_sext_v4i8_to_v4i32_with_folded_offset:
1284 ; CHECK: .functype load_sext_v4i8_to_v4i32_with_folded_offset (i32) -> (v128)
1285 ; CHECK-NEXT: # %bb.0:
1286 ; CHECK-NEXT: local.get 0
1287 ; CHECK-NEXT: v128.load32_zero 16
1288 ; CHECK-NEXT: i16x8.extend_low_i8x16_s
1289 ; CHECK-NEXT: i32x4.extend_low_i16x8_s
1290 ; CHECK-NEXT: # fallthrough-return
1291 %q = ptrtoint ptr %p to i32
1292 %r = add nuw i32 %q, 16
1293 %s = inttoptr i32 %r to ptr
1294 %v = load <4 x i8>, ptr %s
1295 %v2 = sext <4 x i8> %v to <4 x i32>
1299 define <4 x i32> @load_zext_v4i8_to_v4i32_with_folded_offset(ptr %p) {
1300 ; CHECK-LABEL: load_zext_v4i8_to_v4i32_with_folded_offset:
1301 ; CHECK: .functype load_zext_v4i8_to_v4i32_with_folded_offset (i32) -> (v128)
1302 ; CHECK-NEXT: # %bb.0:
1303 ; CHECK-NEXT: local.get 0
1304 ; CHECK-NEXT: v128.load32_zero 16
1305 ; CHECK-NEXT: i16x8.extend_low_i8x16_u
1306 ; CHECK-NEXT: i32x4.extend_low_i16x8_u
1307 ; CHECK-NEXT: # fallthrough-return
1308 %q = ptrtoint ptr %p to i32
1309 %r = add nuw i32 %q, 16
1310 %s = inttoptr i32 %r to ptr
1311 %v = load <4 x i8>, ptr %s
1312 %v2 = zext <4 x i8> %v to <4 x i32>
1316 define <4 x i16> @load_ext_v4i32_with_folded_offset(ptr %p) {
1317 ; CHECK-LABEL: load_ext_v4i32_with_folded_offset:
1318 ; CHECK: .functype load_ext_v4i32_with_folded_offset (i32) -> (v128)
1319 ; CHECK-NEXT: # %bb.0:
1320 ; CHECK-NEXT: local.get 0
1321 ; CHECK-NEXT: v128.load64_zero 16
1322 ; CHECK-NEXT: # fallthrough-return
1323 %q = ptrtoint ptr %p to i32
1324 %r = add nuw i32 %q, 16
1325 %s = inttoptr i32 %r to ptr
1326 %v = load <4 x i16>, ptr %s
1330 define <4 x i32> @load_v4i32_with_folded_gep_offset(ptr %p) {
1331 ; CHECK-LABEL: load_v4i32_with_folded_gep_offset:
1332 ; CHECK: .functype load_v4i32_with_folded_gep_offset (i32) -> (v128)
1333 ; CHECK-NEXT: # %bb.0:
1334 ; CHECK-NEXT: local.get 0
1335 ; CHECK-NEXT: v128.load 16
1336 ; CHECK-NEXT: # fallthrough-return
1337 %s = getelementptr inbounds <4 x i32>, ptr %p, i32 1
1338 %v = load <4 x i32>, ptr %s
1342 define <4 x i32> @load_splat_v4i32_with_folded_gep_offset(ptr %p) {
1343 ; CHECK-LABEL: load_splat_v4i32_with_folded_gep_offset:
1344 ; CHECK: .functype load_splat_v4i32_with_folded_gep_offset (i32) -> (v128)
1345 ; CHECK-NEXT: # %bb.0:
1346 ; CHECK-NEXT: local.get 0
1347 ; CHECK-NEXT: v128.load32_splat 4
1348 ; CHECK-NEXT: # fallthrough-return
1349 %s = getelementptr inbounds i32, ptr %p, i32 1
1350 %e = load i32, ptr %s
1351 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1352 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1356 define <4 x i32> @load_sext_v4i16_to_v4i32_with_folded_gep_offset(ptr %p) {
1357 ; CHECK-LABEL: load_sext_v4i16_to_v4i32_with_folded_gep_offset:
1358 ; CHECK: .functype load_sext_v4i16_to_v4i32_with_folded_gep_offset (i32) -> (v128)
1359 ; CHECK-NEXT: # %bb.0:
1360 ; CHECK-NEXT: local.get 0
1361 ; CHECK-NEXT: i32x4.load16x4_s 8
1362 ; CHECK-NEXT: # fallthrough-return
1363 %s = getelementptr inbounds <4 x i16>, ptr %p, i32 1
1364 %v = load <4 x i16>, ptr %s
1365 %v2 = sext <4 x i16> %v to <4 x i32>
1369 define <4 x i32> @load_zext_v4i16_to_v4i32_with_folded_gep_offset(ptr %p) {
1370 ; CHECK-LABEL: load_zext_v4i16_to_v4i32_with_folded_gep_offset:
1371 ; CHECK: .functype load_zext_v4i16_to_v4i32_with_folded_gep_offset (i32) -> (v128)
1372 ; CHECK-NEXT: # %bb.0:
1373 ; CHECK-NEXT: local.get 0
1374 ; CHECK-NEXT: i32x4.load16x4_u 8
1375 ; CHECK-NEXT: # fallthrough-return
1376 %s = getelementptr inbounds <4 x i16>, ptr %p, i32 1
1377 %v = load <4 x i16>, ptr %s
1378 %v2 = zext <4 x i16> %v to <4 x i32>
1382 define <4 x i32> @load_sext_v4i8_to_v4i32_with_folded_gep_offset(ptr %p) {
1383 ; CHECK-LABEL: load_sext_v4i8_to_v4i32_with_folded_gep_offset:
1384 ; CHECK: .functype load_sext_v4i8_to_v4i32_with_folded_gep_offset (i32) -> (v128)
1385 ; CHECK-NEXT: # %bb.0:
1386 ; CHECK-NEXT: local.get 0
1387 ; CHECK-NEXT: v128.load32_zero 4
1388 ; CHECK-NEXT: i16x8.extend_low_i8x16_s
1389 ; CHECK-NEXT: i32x4.extend_low_i16x8_s
1390 ; CHECK-NEXT: # fallthrough-return
1391 %s = getelementptr inbounds <4 x i8>, ptr %p, i32 1
1392 %v = load <4 x i8>, ptr %s
1393 %v2 = sext <4 x i8> %v to <4 x i32>
1397 define <4 x i32> @load_zext_v4i8_to_v4i32_with_folded_gep_offset(ptr %p) {
1398 ; CHECK-LABEL: load_zext_v4i8_to_v4i32_with_folded_gep_offset:
1399 ; CHECK: .functype load_zext_v4i8_to_v4i32_with_folded_gep_offset (i32) -> (v128)
1400 ; CHECK-NEXT: # %bb.0:
1401 ; CHECK-NEXT: local.get 0
1402 ; CHECK-NEXT: v128.load32_zero 4
1403 ; CHECK-NEXT: i16x8.extend_low_i8x16_u
1404 ; CHECK-NEXT: i32x4.extend_low_i16x8_u
1405 ; CHECK-NEXT: # fallthrough-return
1406 %s = getelementptr inbounds <4 x i8>, ptr %p, i32 1
1407 %v = load <4 x i8>, ptr %s
1408 %v2 = zext <4 x i8> %v to <4 x i32>
1412 define <4 x i16> @load_ext_v4i32_with_folded_gep_offset(ptr %p) {
1413 ; CHECK-LABEL: load_ext_v4i32_with_folded_gep_offset:
1414 ; CHECK: .functype load_ext_v4i32_with_folded_gep_offset (i32) -> (v128)
1415 ; CHECK-NEXT: # %bb.0:
1416 ; CHECK-NEXT: local.get 0
1417 ; CHECK-NEXT: v128.load64_zero 8
1418 ; CHECK-NEXT: # fallthrough-return
1419 %s = getelementptr inbounds <4 x i16>, ptr %p, i32 1
1420 %v = load <4 x i16>, ptr %s
1424 define <4 x i32> @load_v4i32_with_unfolded_gep_negative_offset(ptr %p) {
1425 ; CHECK-LABEL: load_v4i32_with_unfolded_gep_negative_offset:
1426 ; CHECK: .functype load_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1427 ; CHECK-NEXT: # %bb.0:
1428 ; CHECK-NEXT: local.get 0
1429 ; CHECK-NEXT: i32.const -16
1430 ; CHECK-NEXT: i32.add
1431 ; CHECK-NEXT: v128.load 0
1432 ; CHECK-NEXT: # fallthrough-return
1433 %s = getelementptr inbounds <4 x i32>, ptr %p, i32 -1
1434 %v = load <4 x i32>, ptr %s
1438 define <4 x i32> @load_splat_v4i32_with_unfolded_gep_negative_offset(ptr %p) {
1439 ; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_negative_offset:
1440 ; CHECK: .functype load_splat_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1441 ; CHECK-NEXT: # %bb.0:
1442 ; CHECK-NEXT: local.get 0
1443 ; CHECK-NEXT: i32.const -4
1444 ; CHECK-NEXT: i32.add
1445 ; CHECK-NEXT: v128.load32_splat 0
1446 ; CHECK-NEXT: # fallthrough-return
1447 %s = getelementptr inbounds i32, ptr %p, i32 -1
1448 %e = load i32, ptr %s
1449 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1450 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1454 define <4 x i32> @load_sext_v4i16_to_v4i32_with_unfolded_gep_negative_offset(ptr %p) {
1455 ; CHECK-LABEL: load_sext_v4i16_to_v4i32_with_unfolded_gep_negative_offset:
1456 ; CHECK: .functype load_sext_v4i16_to_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1457 ; CHECK-NEXT: # %bb.0:
1458 ; CHECK-NEXT: local.get 0
1459 ; CHECK-NEXT: i32.const -8
1460 ; CHECK-NEXT: i32.add
1461 ; CHECK-NEXT: i32x4.load16x4_s 0
1462 ; CHECK-NEXT: # fallthrough-return
1463 %s = getelementptr inbounds <4 x i16>, ptr %p, i32 -1
1464 %v = load <4 x i16>, ptr %s
1465 %v2 = sext <4 x i16> %v to <4 x i32>
1469 define <4 x i32> @load_zext_v4i16_to_v4i32_with_unfolded_gep_negative_offset(ptr %p) {
1470 ; CHECK-LABEL: load_zext_v4i16_to_v4i32_with_unfolded_gep_negative_offset:
1471 ; CHECK: .functype load_zext_v4i16_to_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1472 ; CHECK-NEXT: # %bb.0:
1473 ; CHECK-NEXT: local.get 0
1474 ; CHECK-NEXT: i32.const -8
1475 ; CHECK-NEXT: i32.add
1476 ; CHECK-NEXT: i32x4.load16x4_u 0
1477 ; CHECK-NEXT: # fallthrough-return
1478 %s = getelementptr inbounds <4 x i16>, ptr %p, i32 -1
1479 %v = load <4 x i16>, ptr %s
1480 %v2 = zext <4 x i16> %v to <4 x i32>
1484 define <4 x i32> @load_sext_v4i8_to_v4i32_with_unfolded_gep_negative_offset(ptr %p) {
1485 ; CHECK-LABEL: load_sext_v4i8_to_v4i32_with_unfolded_gep_negative_offset:
1486 ; CHECK: .functype load_sext_v4i8_to_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1487 ; CHECK-NEXT: # %bb.0:
1488 ; CHECK-NEXT: local.get 0
1489 ; CHECK-NEXT: i32.const -4
1490 ; CHECK-NEXT: i32.add
1491 ; CHECK-NEXT: v128.load32_zero 0
1492 ; CHECK-NEXT: i16x8.extend_low_i8x16_s
1493 ; CHECK-NEXT: i32x4.extend_low_i16x8_s
1494 ; CHECK-NEXT: # fallthrough-return
1495 %s = getelementptr inbounds <4 x i8>, ptr %p, i32 -1
1496 %v = load <4 x i8>, ptr %s
1497 %v2 = sext <4 x i8> %v to <4 x i32>
1501 define <4 x i32> @load_zext_v4i8_to_v4i32_with_unfolded_gep_negative_offset(ptr %p) {
1502 ; CHECK-LABEL: load_zext_v4i8_to_v4i32_with_unfolded_gep_negative_offset:
1503 ; CHECK: .functype load_zext_v4i8_to_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1504 ; CHECK-NEXT: # %bb.0:
1505 ; CHECK-NEXT: local.get 0
1506 ; CHECK-NEXT: i32.const -4
1507 ; CHECK-NEXT: i32.add
1508 ; CHECK-NEXT: v128.load32_zero 0
1509 ; CHECK-NEXT: i16x8.extend_low_i8x16_u
1510 ; CHECK-NEXT: i32x4.extend_low_i16x8_u
1511 ; CHECK-NEXT: # fallthrough-return
1512 %s = getelementptr inbounds <4 x i8>, ptr %p, i32 -1
1513 %v = load <4 x i8>, ptr %s
1514 %v2 = zext <4 x i8> %v to <4 x i32>
1518 define <4 x i16> @load_ext_v4i32_with_unfolded_gep_negative_offset(ptr %p) {
1519 ; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_negative_offset:
1520 ; CHECK: .functype load_ext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1521 ; CHECK-NEXT: # %bb.0:
1522 ; CHECK-NEXT: local.get 0
1523 ; CHECK-NEXT: i32.const -8
1524 ; CHECK-NEXT: i32.add
1525 ; CHECK-NEXT: v128.load64_zero 0
1526 ; CHECK-NEXT: # fallthrough-return
1527 %s = getelementptr inbounds <4 x i16>, ptr %p, i32 -1
1528 %v = load <4 x i16>, ptr %s
1532 define <4 x i32> @load_v4i32_with_unfolded_offset(ptr %p) {
1533 ; CHECK-LABEL: load_v4i32_with_unfolded_offset:
1534 ; CHECK: .functype load_v4i32_with_unfolded_offset (i32) -> (v128)
1535 ; CHECK-NEXT: # %bb.0:
1536 ; CHECK-NEXT: local.get 0
1537 ; CHECK-NEXT: i32.const 16
1538 ; CHECK-NEXT: i32.add
1539 ; CHECK-NEXT: v128.load 0
1540 ; CHECK-NEXT: # fallthrough-return
1541 %q = ptrtoint ptr %p to i32
1542 %r = add nsw i32 %q, 16
1543 %s = inttoptr i32 %r to ptr
1544 %v = load <4 x i32>, ptr %s
1548 define <4 x i32> @load_splat_v4i32_with_unfolded_offset(ptr %p) {
1549 ; CHECK-LABEL: load_splat_v4i32_with_unfolded_offset:
1550 ; CHECK: .functype load_splat_v4i32_with_unfolded_offset (i32) -> (v128)
1551 ; CHECK-NEXT: # %bb.0:
1552 ; CHECK-NEXT: local.get 0
1553 ; CHECK-NEXT: i32.const 16
1554 ; CHECK-NEXT: i32.add
1555 ; CHECK-NEXT: v128.load32_splat 0
1556 ; CHECK-NEXT: # fallthrough-return
1557 %q = ptrtoint ptr %p to i32
1558 %r = add nsw i32 %q, 16
1559 %s = inttoptr i32 %r to ptr
1560 %e = load i32, ptr %s
1561 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1562 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1566 define <4 x i32> @load_sext_v4i16_to_v4i32_with_unfolded_offset(ptr %p) {
1567 ; CHECK-LABEL: load_sext_v4i16_to_v4i32_with_unfolded_offset:
1568 ; CHECK: .functype load_sext_v4i16_to_v4i32_with_unfolded_offset (i32) -> (v128)
1569 ; CHECK-NEXT: # %bb.0:
1570 ; CHECK-NEXT: local.get 0
1571 ; CHECK-NEXT: i32.const 16
1572 ; CHECK-NEXT: i32.add
1573 ; CHECK-NEXT: i32x4.load16x4_s 0
1574 ; CHECK-NEXT: # fallthrough-return
1575 %q = ptrtoint ptr %p to i32
1576 %r = add nsw i32 %q, 16
1577 %s = inttoptr i32 %r to ptr
1578 %v = load <4 x i16>, ptr %s
1579 %v2 = sext <4 x i16> %v to <4 x i32>
1583 define <4 x i32> @load_zext_v4i16_to_v4i32_with_unfolded_offset(ptr %p) {
1584 ; CHECK-LABEL: load_zext_v4i16_to_v4i32_with_unfolded_offset:
1585 ; CHECK: .functype load_zext_v4i16_to_v4i32_with_unfolded_offset (i32) -> (v128)
1586 ; CHECK-NEXT: # %bb.0:
1587 ; CHECK-NEXT: local.get 0
1588 ; CHECK-NEXT: i32.const 16
1589 ; CHECK-NEXT: i32.add
1590 ; CHECK-NEXT: i32x4.load16x4_u 0
1591 ; CHECK-NEXT: # fallthrough-return
1592 %q = ptrtoint ptr %p to i32
1593 %r = add nsw i32 %q, 16
1594 %s = inttoptr i32 %r to ptr
1595 %v = load <4 x i16>, ptr %s
1596 %v2 = zext <4 x i16> %v to <4 x i32>
1600 define <4 x i32> @load_sext_v4i8_to_v4i32_with_unfolded_offset(ptr %p) {
1601 ; CHECK-LABEL: load_sext_v4i8_to_v4i32_with_unfolded_offset:
1602 ; CHECK: .functype load_sext_v4i8_to_v4i32_with_unfolded_offset (i32) -> (v128)
1603 ; CHECK-NEXT: # %bb.0:
1604 ; CHECK-NEXT: local.get 0
1605 ; CHECK-NEXT: i32.const 16
1606 ; CHECK-NEXT: i32.add
1607 ; CHECK-NEXT: v128.load32_zero 0
1608 ; CHECK-NEXT: i16x8.extend_low_i8x16_s
1609 ; CHECK-NEXT: i32x4.extend_low_i16x8_s
1610 ; CHECK-NEXT: # fallthrough-return
1611 %q = ptrtoint ptr %p to i32
1612 %r = add nsw i32 %q, 16
1613 %s = inttoptr i32 %r to ptr
1614 %v = load <4 x i8>, ptr %s
1615 %v2 = sext <4 x i8> %v to <4 x i32>
1619 define <4 x i32> @load_zext_v4i8_to_v4i32_with_unfolded_offset(ptr %p) {
1620 ; CHECK-LABEL: load_zext_v4i8_to_v4i32_with_unfolded_offset:
1621 ; CHECK: .functype load_zext_v4i8_to_v4i32_with_unfolded_offset (i32) -> (v128)
1622 ; CHECK-NEXT: # %bb.0:
1623 ; CHECK-NEXT: local.get 0
1624 ; CHECK-NEXT: i32.const 16
1625 ; CHECK-NEXT: i32.add
1626 ; CHECK-NEXT: v128.load32_zero 0
1627 ; CHECK-NEXT: i16x8.extend_low_i8x16_u
1628 ; CHECK-NEXT: i32x4.extend_low_i16x8_u
1629 ; CHECK-NEXT: # fallthrough-return
1630 %q = ptrtoint ptr %p to i32
1631 %r = add nsw i32 %q, 16
1632 %s = inttoptr i32 %r to ptr
1633 %v = load <4 x i8>, ptr %s
1634 %v2 = zext <4 x i8> %v to <4 x i32>
1638 define <4 x i16> @load_ext_v4i32_with_unfolded_offset(ptr %p) {
1639 ; CHECK-LABEL: load_ext_v4i32_with_unfolded_offset:
1640 ; CHECK: .functype load_ext_v4i32_with_unfolded_offset (i32) -> (v128)
1641 ; CHECK-NEXT: # %bb.0:
1642 ; CHECK-NEXT: local.get 0
1643 ; CHECK-NEXT: i32.const 16
1644 ; CHECK-NEXT: i32.add
1645 ; CHECK-NEXT: v128.load64_zero 0
1646 ; CHECK-NEXT: # fallthrough-return
1647 %q = ptrtoint ptr %p to i32
1648 %r = add nsw i32 %q, 16
1649 %s = inttoptr i32 %r to ptr
1650 %v = load <4 x i16>, ptr %s
1654 define <4 x i32> @load_v4i32_with_unfolded_gep_offset(ptr %p) {
1655 ; CHECK-LABEL: load_v4i32_with_unfolded_gep_offset:
1656 ; CHECK: .functype load_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1657 ; CHECK-NEXT: # %bb.0:
1658 ; CHECK-NEXT: local.get 0
1659 ; CHECK-NEXT: i32.const 16
1660 ; CHECK-NEXT: i32.add
1661 ; CHECK-NEXT: v128.load 0
1662 ; CHECK-NEXT: # fallthrough-return
1663 %s = getelementptr <4 x i32>, ptr %p, i32 1
1664 %v = load <4 x i32>, ptr %s
1668 define <4 x i32> @load_splat_v4i32_with_unfolded_gep_offset(ptr %p) {
1669 ; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_offset:
1670 ; CHECK: .functype load_splat_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1671 ; CHECK-NEXT: # %bb.0:
1672 ; CHECK-NEXT: local.get 0
1673 ; CHECK-NEXT: i32.const 4
1674 ; CHECK-NEXT: i32.add
1675 ; CHECK-NEXT: v128.load32_splat 0
1676 ; CHECK-NEXT: # fallthrough-return
1677 %s = getelementptr i32, ptr %p, i32 1
1678 %e = load i32, ptr %s
1679 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1680 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1684 define <4 x i32> @load_sext_v4i16_to_v4i32_with_unfolded_gep_offset(ptr %p) {
1685 ; CHECK-LABEL: load_sext_v4i16_to_v4i32_with_unfolded_gep_offset:
1686 ; CHECK: .functype load_sext_v4i16_to_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1687 ; CHECK-NEXT: # %bb.0:
1688 ; CHECK-NEXT: local.get 0
1689 ; CHECK-NEXT: i32.const 8
1690 ; CHECK-NEXT: i32.add
1691 ; CHECK-NEXT: i32x4.load16x4_s 0
1692 ; CHECK-NEXT: # fallthrough-return
1693 %s = getelementptr <4 x i16>, ptr %p, i32 1
1694 %v = load <4 x i16>, ptr %s
1695 %v2 = sext <4 x i16> %v to <4 x i32>
1699 define <4 x i32> @load_zext_v4i16_to_v4i32_with_unfolded_gep_offset(ptr %p) {
1700 ; CHECK-LABEL: load_zext_v4i16_to_v4i32_with_unfolded_gep_offset:
1701 ; CHECK: .functype load_zext_v4i16_to_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1702 ; CHECK-NEXT: # %bb.0:
1703 ; CHECK-NEXT: local.get 0
1704 ; CHECK-NEXT: i32.const 8
1705 ; CHECK-NEXT: i32.add
1706 ; CHECK-NEXT: i32x4.load16x4_u 0
1707 ; CHECK-NEXT: # fallthrough-return
1708 %s = getelementptr <4 x i16>, ptr %p, i32 1
1709 %v = load <4 x i16>, ptr %s
1710 %v2 = zext <4 x i16> %v to <4 x i32>
1714 define <4 x i32> @load_sext_v4i8_to_v4i32_with_unfolded_gep_offset(ptr %p) {
1715 ; CHECK-LABEL: load_sext_v4i8_to_v4i32_with_unfolded_gep_offset:
1716 ; CHECK: .functype load_sext_v4i8_to_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1717 ; CHECK-NEXT: # %bb.0:
1718 ; CHECK-NEXT: local.get 0
1719 ; CHECK-NEXT: i32.const 4
1720 ; CHECK-NEXT: i32.add
1721 ; CHECK-NEXT: v128.load32_zero 0
1722 ; CHECK-NEXT: i16x8.extend_low_i8x16_s
1723 ; CHECK-NEXT: i32x4.extend_low_i16x8_s
1724 ; CHECK-NEXT: # fallthrough-return
1725 %s = getelementptr <4 x i8>, ptr %p, i32 1
1726 %v = load <4 x i8>, ptr %s
1727 %v2 = sext <4 x i8> %v to <4 x i32>
1731 define <4 x i32> @load_zext_v4i8_to_v4i32_with_unfolded_gep_offset(ptr %p) {
1732 ; CHECK-LABEL: load_zext_v4i8_to_v4i32_with_unfolded_gep_offset:
1733 ; CHECK: .functype load_zext_v4i8_to_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1734 ; CHECK-NEXT: # %bb.0:
1735 ; CHECK-NEXT: local.get 0
1736 ; CHECK-NEXT: i32.const 4
1737 ; CHECK-NEXT: i32.add
1738 ; CHECK-NEXT: v128.load32_zero 0
1739 ; CHECK-NEXT: i16x8.extend_low_i8x16_u
1740 ; CHECK-NEXT: i32x4.extend_low_i16x8_u
1741 ; CHECK-NEXT: # fallthrough-return
1742 %s = getelementptr <4 x i8>, ptr %p, i32 1
1743 %v = load <4 x i8>, ptr %s
1744 %v2 = zext <4 x i8> %v to <4 x i32>
1748 define <4 x i16> @load_ext_v4i32_with_unfolded_gep_offset(ptr %p) {
1749 ; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_offset:
1750 ; CHECK: .functype load_ext_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1751 ; CHECK-NEXT: # %bb.0:
1752 ; CHECK-NEXT: local.get 0
1753 ; CHECK-NEXT: i32.const 8
1754 ; CHECK-NEXT: i32.add
1755 ; CHECK-NEXT: v128.load64_zero 0
1756 ; CHECK-NEXT: # fallthrough-return
1757 %s = getelementptr <4 x i16>, ptr %p, i32 1
1758 %v = load <4 x i16>, ptr %s
1762 define <4 x i32> @load_v4i32_from_numeric_address() {
1763 ; CHECK-LABEL: load_v4i32_from_numeric_address:
1764 ; CHECK: .functype load_v4i32_from_numeric_address () -> (v128)
1765 ; CHECK-NEXT: # %bb.0:
1766 ; CHECK-NEXT: i32.const 0
1767 ; CHECK-NEXT: v128.load 32
1768 ; CHECK-NEXT: # fallthrough-return
1769 %s = inttoptr i32 32 to ptr
1770 %v = load <4 x i32>, ptr %s
1774 define <4 x i32> @load_splat_v4i32_from_numeric_address() {
1775 ; CHECK-LABEL: load_splat_v4i32_from_numeric_address:
1776 ; CHECK: .functype load_splat_v4i32_from_numeric_address () -> (v128)
1777 ; CHECK-NEXT: # %bb.0:
1778 ; CHECK-NEXT: i32.const 0
1779 ; CHECK-NEXT: v128.load32_splat 32
1780 ; CHECK-NEXT: # fallthrough-return
1781 %s = inttoptr i32 32 to ptr
1782 %e = load i32, ptr %s
1783 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1784 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1788 define <4 x i32> @load_sext_v4i16_to_v4i32_from_numeric_address() {
1789 ; CHECK-LABEL: load_sext_v4i16_to_v4i32_from_numeric_address:
1790 ; CHECK: .functype load_sext_v4i16_to_v4i32_from_numeric_address () -> (v128)
1791 ; CHECK-NEXT: # %bb.0:
1792 ; CHECK-NEXT: i32.const 0
1793 ; CHECK-NEXT: i32x4.load16x4_s 32
1794 ; CHECK-NEXT: # fallthrough-return
1795 %s = inttoptr i32 32 to ptr
1796 %v = load <4 x i16>, ptr %s
1797 %v2 = sext <4 x i16> %v to <4 x i32>
1801 define <4 x i32> @load_zext_v4i16_to_v4i32_from_numeric_address() {
1802 ; CHECK-LABEL: load_zext_v4i16_to_v4i32_from_numeric_address:
1803 ; CHECK: .functype load_zext_v4i16_to_v4i32_from_numeric_address () -> (v128)
1804 ; CHECK-NEXT: # %bb.0:
1805 ; CHECK-NEXT: i32.const 0
1806 ; CHECK-NEXT: i32x4.load16x4_u 32
1807 ; CHECK-NEXT: # fallthrough-return
1808 %s = inttoptr i32 32 to ptr
1809 %v = load <4 x i16>, ptr %s
1810 %v2 = zext <4 x i16> %v to <4 x i32>
1814 define <4 x i32> @load_sext_v4i8_to_v4i32_from_numeric_address() {
1815 ; CHECK-LABEL: load_sext_v4i8_to_v4i32_from_numeric_address:
1816 ; CHECK: .functype load_sext_v4i8_to_v4i32_from_numeric_address () -> (v128)
1817 ; CHECK-NEXT: # %bb.0:
1818 ; CHECK-NEXT: i32.const 0
1819 ; CHECK-NEXT: v128.load32_zero 32
1820 ; CHECK-NEXT: i16x8.extend_low_i8x16_s
1821 ; CHECK-NEXT: i32x4.extend_low_i16x8_s
1822 ; CHECK-NEXT: # fallthrough-return
1823 %s = inttoptr i32 32 to ptr
1824 %v = load <4 x i8>, ptr %s
1825 %v2 = sext <4 x i8> %v to <4 x i32>
1829 define <4 x i32> @load_zext_v4i8_to_v4i32_from_numeric_address() {
1830 ; CHECK-LABEL: load_zext_v4i8_to_v4i32_from_numeric_address:
1831 ; CHECK: .functype load_zext_v4i8_to_v4i32_from_numeric_address () -> (v128)
1832 ; CHECK-NEXT: # %bb.0:
1833 ; CHECK-NEXT: i32.const 0
1834 ; CHECK-NEXT: v128.load32_zero 32
1835 ; CHECK-NEXT: i16x8.extend_low_i8x16_u
1836 ; CHECK-NEXT: i32x4.extend_low_i16x8_u
1837 ; CHECK-NEXT: # fallthrough-return
1838 %s = inttoptr i32 32 to ptr
1839 %v = load <4 x i8>, ptr %s
1840 %v2 = zext <4 x i8> %v to <4 x i32>
1844 define <4 x i16> @load_ext_v4i32_from_numeric_address() {
1845 ; CHECK-LABEL: load_ext_v4i32_from_numeric_address:
1846 ; CHECK: .functype load_ext_v4i32_from_numeric_address () -> (v128)
1847 ; CHECK-NEXT: # %bb.0:
1848 ; CHECK-NEXT: i32.const 0
1849 ; CHECK-NEXT: v128.load64_zero 32
1850 ; CHECK-NEXT: # fallthrough-return
1851 %s = inttoptr i32 32 to ptr
1852 %v = load <4 x i16>, ptr %s
1856 @gv_v4i32 = global <4 x i32> <i32 42, i32 42, i32 42, i32 42>
1857 define <4 x i32> @load_v4i32_from_global_address() {
1858 ; CHECK-LABEL: load_v4i32_from_global_address:
1859 ; CHECK: .functype load_v4i32_from_global_address () -> (v128)
1860 ; CHECK-NEXT: # %bb.0:
1861 ; CHECK-NEXT: i32.const 0
1862 ; CHECK-NEXT: v128.load gv_v4i32
1863 ; CHECK-NEXT: # fallthrough-return
1864 %v = load <4 x i32>, ptr @gv_v4i32
1868 @gv_i32 = global i32 42
1869 define <4 x i32> @load_splat_v4i32_from_global_address() {
1870 ; CHECK-LABEL: load_splat_v4i32_from_global_address:
1871 ; CHECK: .functype load_splat_v4i32_from_global_address () -> (v128)
1872 ; CHECK-NEXT: # %bb.0:
1873 ; CHECK-NEXT: i32.const 0
1874 ; CHECK-NEXT: v128.load32_splat gv_i32
1875 ; CHECK-NEXT: # fallthrough-return
1876 %e = load i32, ptr @gv_i32
1877 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1878 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1882 @gv_v4i16 = global <4 x i16> <i16 42, i16 42, i16 42, i16 42>
1883 define <4 x i32> @load_sext_v4i16_to_v4i32_from_global_address() {
1884 ; CHECK-LABEL: load_sext_v4i16_to_v4i32_from_global_address:
1885 ; CHECK: .functype load_sext_v4i16_to_v4i32_from_global_address () -> (v128)
1886 ; CHECK-NEXT: # %bb.0:
1887 ; CHECK-NEXT: i32.const 0
1888 ; CHECK-NEXT: i32x4.load16x4_s gv_v4i16
1889 ; CHECK-NEXT: # fallthrough-return
1890 %v = load <4 x i16>, ptr @gv_v4i16
1891 %v2 = sext <4 x i16> %v to <4 x i32>
1895 define <4 x i32> @load_zext_v4i16_to_v4i32_from_global_address() {
1896 ; CHECK-LABEL: load_zext_v4i16_to_v4i32_from_global_address:
1897 ; CHECK: .functype load_zext_v4i16_to_v4i32_from_global_address () -> (v128)
1898 ; CHECK-NEXT: # %bb.0:
1899 ; CHECK-NEXT: i32.const 0
1900 ; CHECK-NEXT: i32x4.load16x4_u gv_v4i16
1901 ; CHECK-NEXT: # fallthrough-return
1902 %v = load <4 x i16>, ptr @gv_v4i16
1903 %v2 = zext <4 x i16> %v to <4 x i32>
1907 @gv_v4i8 = global <4 x i8> <i8 42, i8 42, i8 42, i8 42>
1908 define <4 x i32> @load_sext_v4i8_to_v4i32_from_global_address() {
1909 ; CHECK-LABEL: load_sext_v4i8_to_v4i32_from_global_address:
1910 ; CHECK: .functype load_sext_v4i8_to_v4i32_from_global_address () -> (v128)
1911 ; CHECK-NEXT: # %bb.0:
1912 ; CHECK-NEXT: i32.const 0
1913 ; CHECK-NEXT: v128.load32_zero gv_v4i8
1914 ; CHECK-NEXT: i16x8.extend_low_i8x16_s
1915 ; CHECK-NEXT: i32x4.extend_low_i16x8_s
1916 ; CHECK-NEXT: # fallthrough-return
1917 %v = load <4 x i8>, ptr @gv_v4i8
1918 %v2 = sext <4 x i8> %v to <4 x i32>
1922 define <4 x i32> @load_zext_v4i8_to_v4i32_from_global_address() {
1923 ; CHECK-LABEL: load_zext_v4i8_to_v4i32_from_global_address:
1924 ; CHECK: .functype load_zext_v4i8_to_v4i32_from_global_address () -> (v128)
1925 ; CHECK-NEXT: # %bb.0:
1926 ; CHECK-NEXT: i32.const 0
1927 ; CHECK-NEXT: v128.load32_zero gv_v4i8
1928 ; CHECK-NEXT: i16x8.extend_low_i8x16_u
1929 ; CHECK-NEXT: i32x4.extend_low_i16x8_u
1930 ; CHECK-NEXT: # fallthrough-return
1931 %v = load <4 x i8>, ptr @gv_v4i8
1932 %v2 = zext <4 x i8> %v to <4 x i32>
1936 define <4 x i16> @load_ext_v4i32_from_global_address() {
1937 ; CHECK-LABEL: load_ext_v4i32_from_global_address:
1938 ; CHECK: .functype load_ext_v4i32_from_global_address () -> (v128)
1939 ; CHECK-NEXT: # %bb.0:
1940 ; CHECK-NEXT: i32.const 0
1941 ; CHECK-NEXT: v128.load64_zero gv_v4i16
1942 ; CHECK-NEXT: # fallthrough-return
1943 %v = load <4 x i16>, ptr @gv_v4i16
1947 define void @store_v4i32(<4 x i32> %v, ptr %p) {
1948 ; CHECK-LABEL: store_v4i32:
1949 ; CHECK: .functype store_v4i32 (v128, i32) -> ()
1950 ; CHECK-NEXT: # %bb.0:
1951 ; CHECK-NEXT: local.get 1
1952 ; CHECK-NEXT: local.get 0
1953 ; CHECK-NEXT: v128.store 0
1954 ; CHECK-NEXT: # fallthrough-return
1955 store <4 x i32> %v , ptr %p
1959 define void @store_narrowing_v4i32(<4 x i16> %v, ptr %p) {
1960 ; CHECK-LABEL: store_narrowing_v4i32:
1961 ; CHECK: .functype store_narrowing_v4i32 (v128, i32) -> ()
1962 ; CHECK-NEXT: # %bb.0:
1963 ; CHECK-NEXT: local.get 1
1964 ; CHECK-NEXT: local.get 0
1965 ; CHECK-NEXT: v128.store64_lane 0, 0
1966 ; CHECK-NEXT: # fallthrough-return
1967 store <4 x i16> %v , ptr %p
1971 define void @store_v4i32_with_folded_offset(<4 x i32> %v, ptr %p) {
1972 ; CHECK-LABEL: store_v4i32_with_folded_offset:
1973 ; CHECK: .functype store_v4i32_with_folded_offset (v128, i32) -> ()
1974 ; CHECK-NEXT: # %bb.0:
1975 ; CHECK-NEXT: local.get 1
1976 ; CHECK-NEXT: local.get 0
1977 ; CHECK-NEXT: v128.store 16
1978 ; CHECK-NEXT: # fallthrough-return
1979 %q = ptrtoint ptr %p to i32
1980 %r = add nuw i32 %q, 16
1981 %s = inttoptr i32 %r to ptr
1982 store <4 x i32> %v , ptr %s
1986 define void @store_narrowing_v4i32_with_folded_offset(<4 x i16> %v, ptr %p) {
1987 ; CHECK-LABEL: store_narrowing_v4i32_with_folded_offset:
1988 ; CHECK: .functype store_narrowing_v4i32_with_folded_offset (v128, i32) -> ()
1989 ; CHECK-NEXT: # %bb.0:
1990 ; CHECK-NEXT: local.get 1
1991 ; CHECK-NEXT: local.get 0
1992 ; CHECK-NEXT: v128.store64_lane 16, 0
1993 ; CHECK-NEXT: # fallthrough-return
1994 %q = ptrtoint ptr %p to i32
1995 %r = add nuw i32 %q, 16
1996 %s = inttoptr i32 %r to ptr
1997 store <4 x i16> %v , ptr %s
2001 define void @store_v4i32_with_folded_gep_offset(<4 x i32> %v, ptr %p) {
2002 ; CHECK-LABEL: store_v4i32_with_folded_gep_offset:
2003 ; CHECK: .functype store_v4i32_with_folded_gep_offset (v128, i32) -> ()
2004 ; CHECK-NEXT: # %bb.0:
2005 ; CHECK-NEXT: local.get 1
2006 ; CHECK-NEXT: local.get 0
2007 ; CHECK-NEXT: v128.store 16
2008 ; CHECK-NEXT: # fallthrough-return
2009 %s = getelementptr inbounds <4 x i32>, ptr %p, i32 1
2010 store <4 x i32> %v , ptr %s
2014 define void @store_narrowing_v4i32_with_folded_gep_offset(<4 x i16> %v, ptr %p) {
2015 ; CHECK-LABEL: store_narrowing_v4i32_with_folded_gep_offset:
2016 ; CHECK: .functype store_narrowing_v4i32_with_folded_gep_offset (v128, i32) -> ()
2017 ; CHECK-NEXT: # %bb.0:
2018 ; CHECK-NEXT: local.get 1
2019 ; CHECK-NEXT: local.get 0
2020 ; CHECK-NEXT: v128.store64_lane 8, 0
2021 ; CHECK-NEXT: # fallthrough-return
2022 %s = getelementptr inbounds <4 x i16>, ptr %p, i32 1
2023 store <4 x i16> %v , ptr %s
2027 define void @store_v4i32_with_unfolded_gep_negative_offset(<4 x i32> %v, ptr %p) {
2028 ; CHECK-LABEL: store_v4i32_with_unfolded_gep_negative_offset:
2029 ; CHECK: .functype store_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> ()
2030 ; CHECK-NEXT: # %bb.0:
2031 ; CHECK-NEXT: local.get 1
2032 ; CHECK-NEXT: i32.const -16
2033 ; CHECK-NEXT: i32.add
2034 ; CHECK-NEXT: local.get 0
2035 ; CHECK-NEXT: v128.store 0
2036 ; CHECK-NEXT: # fallthrough-return
2037 %s = getelementptr inbounds <4 x i32>, ptr %p, i32 -1
2038 store <4 x i32> %v , ptr %s
2042 define void @store_narrowing_v4i32_with_unfolded_gep_negative_offset(<4 x i16> %v, ptr %p) {
2043 ; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_gep_negative_offset:
2044 ; CHECK: .functype store_narrowing_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> ()
2045 ; CHECK-NEXT: # %bb.0:
2046 ; CHECK-NEXT: local.get 1
2047 ; CHECK-NEXT: i32.const -8
2048 ; CHECK-NEXT: i32.add
2049 ; CHECK-NEXT: local.get 0
2050 ; CHECK-NEXT: v128.store64_lane 0, 0
2051 ; CHECK-NEXT: # fallthrough-return
2052 %s = getelementptr inbounds <4 x i16>, ptr %p, i32 -1
2053 store <4 x i16> %v , ptr %s
2057 define void @store_v4i32_with_unfolded_offset(<4 x i32> %v, ptr %p) {
2058 ; CHECK-LABEL: store_v4i32_with_unfolded_offset:
2059 ; CHECK: .functype store_v4i32_with_unfolded_offset (v128, i32) -> ()
2060 ; CHECK-NEXT: # %bb.0:
2061 ; CHECK-NEXT: local.get 1
2062 ; CHECK-NEXT: i32.const 16
2063 ; CHECK-NEXT: i32.add
2064 ; CHECK-NEXT: local.get 0
2065 ; CHECK-NEXT: v128.store 0
2066 ; CHECK-NEXT: # fallthrough-return
2067 %q = ptrtoint ptr %p to i32
2068 %r = add nsw i32 %q, 16
2069 %s = inttoptr i32 %r to ptr
2070 store <4 x i32> %v , ptr %s
2074 define void @store_narrowing_v4i32_with_unfolded_offset(<4 x i16> %v, ptr %p) {
2075 ; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_offset:
2076 ; CHECK: .functype store_narrowing_v4i32_with_unfolded_offset (v128, i32) -> ()
2077 ; CHECK-NEXT: # %bb.0:
2078 ; CHECK-NEXT: local.get 1
2079 ; CHECK-NEXT: i32.const 16
2080 ; CHECK-NEXT: i32.add
2081 ; CHECK-NEXT: local.get 0
2082 ; CHECK-NEXT: v128.store64_lane 0, 0
2083 ; CHECK-NEXT: # fallthrough-return
2084 %q = ptrtoint ptr %p to i32
2085 %r = add nsw i32 %q, 16
2086 %s = inttoptr i32 %r to ptr
2087 store <4 x i16> %v , ptr %s
2091 define void @store_v4i32_with_unfolded_gep_offset(<4 x i32> %v, ptr %p) {
2092 ; CHECK-LABEL: store_v4i32_with_unfolded_gep_offset:
2093 ; CHECK: .functype store_v4i32_with_unfolded_gep_offset (v128, i32) -> ()
2094 ; CHECK-NEXT: # %bb.0:
2095 ; CHECK-NEXT: local.get 1
2096 ; CHECK-NEXT: i32.const 16
2097 ; CHECK-NEXT: i32.add
2098 ; CHECK-NEXT: local.get 0
2099 ; CHECK-NEXT: v128.store 0
2100 ; CHECK-NEXT: # fallthrough-return
2101 %s = getelementptr <4 x i32>, ptr %p, i32 1
2102 store <4 x i32> %v , ptr %s
2106 define void @store_narrowing_v4i32_with_unfolded_gep_offset(<4 x i16> %v, ptr %p) {
2107 ; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_gep_offset:
2108 ; CHECK: .functype store_narrowing_v4i32_with_unfolded_gep_offset (v128, i32) -> ()
2109 ; CHECK-NEXT: # %bb.0:
2110 ; CHECK-NEXT: local.get 1
2111 ; CHECK-NEXT: i32.const 8
2112 ; CHECK-NEXT: i32.add
2113 ; CHECK-NEXT: local.get 0
2114 ; CHECK-NEXT: v128.store64_lane 0, 0
2115 ; CHECK-NEXT: # fallthrough-return
2116 %s = getelementptr <4 x i16>, ptr %p, i32 1
2117 store <4 x i16> %v , ptr %s
2121 define void @store_v4i32_to_numeric_address(<4 x i32> %v) {
2122 ; CHECK-LABEL: store_v4i32_to_numeric_address:
2123 ; CHECK: .functype store_v4i32_to_numeric_address (v128) -> ()
2124 ; CHECK-NEXT: # %bb.0:
2125 ; CHECK-NEXT: i32.const 0
2126 ; CHECK-NEXT: local.get 0
2127 ; CHECK-NEXT: v128.store 32
2128 ; CHECK-NEXT: # fallthrough-return
2129 %s = inttoptr i32 32 to ptr
2130 store <4 x i32> %v , ptr %s
2134 define void @store_narrowing_v4i32_to_numeric_address(<4 x i16> %v) {
2135 ; CHECK-LABEL: store_narrowing_v4i32_to_numeric_address:
2136 ; CHECK: .functype store_narrowing_v4i32_to_numeric_address (v128) -> ()
2137 ; CHECK-NEXT: # %bb.0:
2138 ; CHECK-NEXT: i32.const 0
2139 ; CHECK-NEXT: local.get 0
2140 ; CHECK-NEXT: v128.store64_lane 32, 0
2141 ; CHECK-NEXT: # fallthrough-return
2142 %s = inttoptr i32 32 to ptr
2143 store <4 x i16> %v , ptr %s
2147 define void @store_v4i32_to_global_address(<4 x i32> %v) {
2148 ; CHECK-LABEL: store_v4i32_to_global_address:
2149 ; CHECK: .functype store_v4i32_to_global_address (v128) -> ()
2150 ; CHECK-NEXT: # %bb.0:
2151 ; CHECK-NEXT: i32.const 0
2152 ; CHECK-NEXT: local.get 0
2153 ; CHECK-NEXT: v128.store gv_v4i32
2154 ; CHECK-NEXT: # fallthrough-return
2155 store <4 x i32> %v , ptr @gv_v4i32
2159 define void @store_narrowing_v4i32_to_global_address(<4 x i16> %v) {
2160 ; CHECK-LABEL: store_narrowing_v4i32_to_global_address:
2161 ; CHECK: .functype store_narrowing_v4i32_to_global_address (v128) -> ()
2162 ; CHECK-NEXT: # %bb.0:
2163 ; CHECK-NEXT: i32.const 0
2164 ; CHECK-NEXT: local.get 0
2165 ; CHECK-NEXT: v128.store64_lane gv_v4i16, 0
2166 ; CHECK-NEXT: # fallthrough-return
2167 store <4 x i16> %v , ptr @gv_v4i16
2171 ; ==============================================================================
2173 ; ==============================================================================
2174 define <2 x i64> @load_v2i64(ptr %p) {
2175 ; CHECK-LABEL: load_v2i64:
2176 ; CHECK: .functype load_v2i64 (i32) -> (v128)
2177 ; CHECK-NEXT: # %bb.0:
2178 ; CHECK-NEXT: local.get 0
2179 ; CHECK-NEXT: v128.load 0
2180 ; CHECK-NEXT: # fallthrough-return
2181 %v = load <2 x i64>, ptr %p
2185 define <2 x i64> @load_splat_v2i64(ptr %p) {
2186 ; CHECK-LABEL: load_splat_v2i64:
2187 ; CHECK: .functype load_splat_v2i64 (i32) -> (v128)
2188 ; CHECK-NEXT: # %bb.0:
2189 ; CHECK-NEXT: local.get 0
2190 ; CHECK-NEXT: v128.load64_splat 0
2191 ; CHECK-NEXT: # fallthrough-return
2192 %e = load i64, ptr %p
2193 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2194 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2198 define <2 x i64> @load_sext_v2i64(ptr %p) {
2199 ; CHECK-LABEL: load_sext_v2i64:
2200 ; CHECK: .functype load_sext_v2i64 (i32) -> (v128)
2201 ; CHECK-NEXT: # %bb.0:
2202 ; CHECK-NEXT: local.get 0
2203 ; CHECK-NEXT: i64x2.load32x2_s 0
2204 ; CHECK-NEXT: # fallthrough-return
2205 %v = load <2 x i32>, ptr %p
2206 %v2 = sext <2 x i32> %v to <2 x i64>
2210 define <2 x i64> @load_zext_v2i64(ptr %p) {
2211 ; CHECK-LABEL: load_zext_v2i64:
2212 ; CHECK: .functype load_zext_v2i64 (i32) -> (v128)
2213 ; CHECK-NEXT: # %bb.0:
2214 ; CHECK-NEXT: local.get 0
2215 ; CHECK-NEXT: i64x2.load32x2_u 0
2216 ; CHECK-NEXT: # fallthrough-return
2217 %v = load <2 x i32>, ptr %p
2218 %v2 = zext <2 x i32> %v to <2 x i64>
2222 define <2 x i32> @load_ext_v2i64(ptr %p) {
2223 ; CHECK-LABEL: load_ext_v2i64:
2224 ; CHECK: .functype load_ext_v2i64 (i32) -> (v128)
2225 ; CHECK-NEXT: # %bb.0:
2226 ; CHECK-NEXT: local.get 0
2227 ; CHECK-NEXT: v128.load64_zero 0
2228 ; CHECK-NEXT: # fallthrough-return
2229 %v = load <2 x i32>, ptr %p
2233 define <2 x i64> @load_v2i64_with_folded_offset(ptr %p) {
2234 ; CHECK-LABEL: load_v2i64_with_folded_offset:
2235 ; CHECK: .functype load_v2i64_with_folded_offset (i32) -> (v128)
2236 ; CHECK-NEXT: # %bb.0:
2237 ; CHECK-NEXT: local.get 0
2238 ; CHECK-NEXT: v128.load 16
2239 ; CHECK-NEXT: # fallthrough-return
2240 %q = ptrtoint ptr %p to i32
2241 %r = add nuw i32 %q, 16
2242 %s = inttoptr i32 %r to ptr
2243 %v = load <2 x i64>, ptr %s
2247 define <2 x i64> @load_splat_v2i64_with_folded_offset(ptr %p) {
2248 ; CHECK-LABEL: load_splat_v2i64_with_folded_offset:
2249 ; CHECK: .functype load_splat_v2i64_with_folded_offset (i32) -> (v128)
2250 ; CHECK-NEXT: # %bb.0:
2251 ; CHECK-NEXT: local.get 0
2252 ; CHECK-NEXT: v128.load64_splat 16
2253 ; CHECK-NEXT: # fallthrough-return
2254 %q = ptrtoint ptr %p to i32
2255 %r = add nuw i32 %q, 16
2256 %s = inttoptr i32 %r to ptr
2257 %e = load i64, ptr %s
2258 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2259 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2263 define <2 x i64> @load_sext_v2i64_with_folded_offset(ptr %p) {
2264 ; CHECK-LABEL: load_sext_v2i64_with_folded_offset:
2265 ; CHECK: .functype load_sext_v2i64_with_folded_offset (i32) -> (v128)
2266 ; CHECK-NEXT: # %bb.0:
2267 ; CHECK-NEXT: local.get 0
2268 ; CHECK-NEXT: i64x2.load32x2_s 16
2269 ; CHECK-NEXT: # fallthrough-return
2270 %q = ptrtoint ptr %p to i32
2271 %r = add nuw i32 %q, 16
2272 %s = inttoptr i32 %r to ptr
2273 %v = load <2 x i32>, ptr %s
2274 %v2 = sext <2 x i32> %v to <2 x i64>
2278 define <2 x i64> @load_zext_v2i64_with_folded_offset(ptr %p) {
2279 ; CHECK-LABEL: load_zext_v2i64_with_folded_offset:
2280 ; CHECK: .functype load_zext_v2i64_with_folded_offset (i32) -> (v128)
2281 ; CHECK-NEXT: # %bb.0:
2282 ; CHECK-NEXT: local.get 0
2283 ; CHECK-NEXT: i64x2.load32x2_u 16
2284 ; CHECK-NEXT: # fallthrough-return
2285 %q = ptrtoint ptr %p to i32
2286 %r = add nuw i32 %q, 16
2287 %s = inttoptr i32 %r to ptr
2288 %v = load <2 x i32>, ptr %s
2289 %v2 = zext <2 x i32> %v to <2 x i64>
2293 define <2 x i32> @load_ext_v2i64_with_folded_offset(ptr %p) {
2294 ; CHECK-LABEL: load_ext_v2i64_with_folded_offset:
2295 ; CHECK: .functype load_ext_v2i64_with_folded_offset (i32) -> (v128)
2296 ; CHECK-NEXT: # %bb.0:
2297 ; CHECK-NEXT: local.get 0
2298 ; CHECK-NEXT: v128.load64_zero 16
2299 ; CHECK-NEXT: # fallthrough-return
2300 %q = ptrtoint ptr %p to i32
2301 %r = add nuw i32 %q, 16
2302 %s = inttoptr i32 %r to ptr
2303 %v = load <2 x i32>, ptr %s
2307 define <2 x i64> @load_v2i64_with_folded_gep_offset(ptr %p) {
2308 ; CHECK-LABEL: load_v2i64_with_folded_gep_offset:
2309 ; CHECK: .functype load_v2i64_with_folded_gep_offset (i32) -> (v128)
2310 ; CHECK-NEXT: # %bb.0:
2311 ; CHECK-NEXT: local.get 0
2312 ; CHECK-NEXT: v128.load 16
2313 ; CHECK-NEXT: # fallthrough-return
2314 %s = getelementptr inbounds <2 x i64>, ptr %p, i32 1
2315 %v = load <2 x i64>, ptr %s
2319 define <2 x i64> @load_splat_v2i64_with_folded_gep_offset(ptr %p) {
2320 ; CHECK-LABEL: load_splat_v2i64_with_folded_gep_offset:
2321 ; CHECK: .functype load_splat_v2i64_with_folded_gep_offset (i32) -> (v128)
2322 ; CHECK-NEXT: # %bb.0:
2323 ; CHECK-NEXT: local.get 0
2324 ; CHECK-NEXT: v128.load64_splat 8
2325 ; CHECK-NEXT: # fallthrough-return
2326 %s = getelementptr inbounds i64, ptr %p, i32 1
2327 %e = load i64, ptr %s
2328 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2329 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2333 define <2 x i64> @load_sext_v2i64_with_folded_gep_offset(ptr %p) {
2334 ; CHECK-LABEL: load_sext_v2i64_with_folded_gep_offset:
2335 ; CHECK: .functype load_sext_v2i64_with_folded_gep_offset (i32) -> (v128)
2336 ; CHECK-NEXT: # %bb.0:
2337 ; CHECK-NEXT: local.get 0
2338 ; CHECK-NEXT: i64x2.load32x2_s 8
2339 ; CHECK-NEXT: # fallthrough-return
2340 %s = getelementptr inbounds <2 x i32>, ptr %p, i32 1
2341 %v = load <2 x i32>, ptr %s
2342 %v2 = sext <2 x i32> %v to <2 x i64>
2346 define <2 x i64> @load_zext_v2i64_with_folded_gep_offset(ptr %p) {
2347 ; CHECK-LABEL: load_zext_v2i64_with_folded_gep_offset:
2348 ; CHECK: .functype load_zext_v2i64_with_folded_gep_offset (i32) -> (v128)
2349 ; CHECK-NEXT: # %bb.0:
2350 ; CHECK-NEXT: local.get 0
2351 ; CHECK-NEXT: i64x2.load32x2_u 8
2352 ; CHECK-NEXT: # fallthrough-return
2353 %s = getelementptr inbounds <2 x i32>, ptr %p, i32 1
2354 %v = load <2 x i32>, ptr %s
2355 %v2 = zext <2 x i32> %v to <2 x i64>
2359 define <2 x i32> @load_ext_v2i64_with_folded_gep_offset(ptr %p) {
2360 ; CHECK-LABEL: load_ext_v2i64_with_folded_gep_offset:
2361 ; CHECK: .functype load_ext_v2i64_with_folded_gep_offset (i32) -> (v128)
2362 ; CHECK-NEXT: # %bb.0:
2363 ; CHECK-NEXT: local.get 0
2364 ; CHECK-NEXT: v128.load64_zero 8
2365 ; CHECK-NEXT: # fallthrough-return
2366 %s = getelementptr inbounds <2 x i32>, ptr %p, i32 1
2367 %v = load <2 x i32>, ptr %s
2371 define <2 x i64> @load_v2i64_with_unfolded_gep_negative_offset(ptr %p) {
2372 ; CHECK-LABEL: load_v2i64_with_unfolded_gep_negative_offset:
2373 ; CHECK: .functype load_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
2374 ; CHECK-NEXT: # %bb.0:
2375 ; CHECK-NEXT: local.get 0
2376 ; CHECK-NEXT: i32.const -16
2377 ; CHECK-NEXT: i32.add
2378 ; CHECK-NEXT: v128.load 0
2379 ; CHECK-NEXT: # fallthrough-return
2380 %s = getelementptr inbounds <2 x i64>, ptr %p, i32 -1
2381 %v = load <2 x i64>, ptr %s
2385 define <2 x i64> @load_splat_v2i64_with_unfolded_gep_negative_offset(ptr %p) {
2386 ; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_negative_offset:
2387 ; CHECK: .functype load_splat_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
2388 ; CHECK-NEXT: # %bb.0:
2389 ; CHECK-NEXT: local.get 0
2390 ; CHECK-NEXT: i32.const -8
2391 ; CHECK-NEXT: i32.add
2392 ; CHECK-NEXT: v128.load64_splat 0
2393 ; CHECK-NEXT: # fallthrough-return
2394 %s = getelementptr inbounds i64, ptr %p, i32 -1
2395 %e = load i64, ptr %s
2396 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2397 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2401 define <2 x i64> @load_sext_v2i64_with_unfolded_gep_negative_offset(ptr %p) {
2402 ; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_negative_offset:
2403 ; CHECK: .functype load_sext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
2404 ; CHECK-NEXT: # %bb.0:
2405 ; CHECK-NEXT: local.get 0
2406 ; CHECK-NEXT: i32.const -8
2407 ; CHECK-NEXT: i32.add
2408 ; CHECK-NEXT: i64x2.load32x2_s 0
2409 ; CHECK-NEXT: # fallthrough-return
2410 %s = getelementptr inbounds <2 x i32>, ptr %p, i32 -1
2411 %v = load <2 x i32>, ptr %s
2412 %v2 = sext <2 x i32> %v to <2 x i64>
2416 define <2 x i64> @load_zext_v2i64_with_unfolded_gep_negative_offset(ptr %p) {
2417 ; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_negative_offset:
2418 ; CHECK: .functype load_zext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
2419 ; CHECK-NEXT: # %bb.0:
2420 ; CHECK-NEXT: local.get 0
2421 ; CHECK-NEXT: i32.const -8
2422 ; CHECK-NEXT: i32.add
2423 ; CHECK-NEXT: i64x2.load32x2_u 0
2424 ; CHECK-NEXT: # fallthrough-return
2425 %s = getelementptr inbounds <2 x i32>, ptr %p, i32 -1
2426 %v = load <2 x i32>, ptr %s
2427 %v2 = zext <2 x i32> %v to <2 x i64>
2431 define <2 x i32> @load_ext_v2i64_with_unfolded_gep_negative_offset(ptr %p) {
2432 ; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_negative_offset:
2433 ; CHECK: .functype load_ext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
2434 ; CHECK-NEXT: # %bb.0:
2435 ; CHECK-NEXT: local.get 0
2436 ; CHECK-NEXT: i32.const -8
2437 ; CHECK-NEXT: i32.add
2438 ; CHECK-NEXT: v128.load64_zero 0
2439 ; CHECK-NEXT: # fallthrough-return
2440 %s = getelementptr inbounds <2 x i32>, ptr %p, i32 -1
2441 %v = load <2 x i32>, ptr %s
2445 define <2 x i64> @load_v2i64_with_unfolded_offset(ptr %p) {
2446 ; CHECK-LABEL: load_v2i64_with_unfolded_offset:
2447 ; CHECK: .functype load_v2i64_with_unfolded_offset (i32) -> (v128)
2448 ; CHECK-NEXT: # %bb.0:
2449 ; CHECK-NEXT: local.get 0
2450 ; CHECK-NEXT: i32.const 16
2451 ; CHECK-NEXT: i32.add
2452 ; CHECK-NEXT: v128.load 0
2453 ; CHECK-NEXT: # fallthrough-return
2454 %q = ptrtoint ptr %p to i32
2455 %r = add nsw i32 %q, 16
2456 %s = inttoptr i32 %r to ptr
2457 %v = load <2 x i64>, ptr %s
2461 define <2 x i64> @load_splat_v2i64_with_unfolded_offset(ptr %p) {
2462 ; CHECK-LABEL: load_splat_v2i64_with_unfolded_offset:
2463 ; CHECK: .functype load_splat_v2i64_with_unfolded_offset (i32) -> (v128)
2464 ; CHECK-NEXT: # %bb.0:
2465 ; CHECK-NEXT: local.get 0
2466 ; CHECK-NEXT: i32.const 16
2467 ; CHECK-NEXT: i32.add
2468 ; CHECK-NEXT: v128.load64_splat 0
2469 ; CHECK-NEXT: # fallthrough-return
2470 %q = ptrtoint ptr %p to i32
2471 %r = add nsw i32 %q, 16
2472 %s = inttoptr i32 %r to ptr
2473 %e = load i64, ptr %s
2474 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2475 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2479 define <2 x i64> @load_sext_v2i64_with_unfolded_offset(ptr %p) {
2480 ; CHECK-LABEL: load_sext_v2i64_with_unfolded_offset:
2481 ; CHECK: .functype load_sext_v2i64_with_unfolded_offset (i32) -> (v128)
2482 ; CHECK-NEXT: # %bb.0:
2483 ; CHECK-NEXT: local.get 0
2484 ; CHECK-NEXT: i32.const 16
2485 ; CHECK-NEXT: i32.add
2486 ; CHECK-NEXT: i64x2.load32x2_s 0
2487 ; CHECK-NEXT: # fallthrough-return
2488 %q = ptrtoint ptr %p to i32
2489 %r = add nsw i32 %q, 16
2490 %s = inttoptr i32 %r to ptr
2491 %v = load <2 x i32>, ptr %s
2492 %v2 = sext <2 x i32> %v to <2 x i64>
2496 define <2 x i64> @load_zext_v2i64_with_unfolded_offset(ptr %p) {
2497 ; CHECK-LABEL: load_zext_v2i64_with_unfolded_offset:
2498 ; CHECK: .functype load_zext_v2i64_with_unfolded_offset (i32) -> (v128)
2499 ; CHECK-NEXT: # %bb.0:
2500 ; CHECK-NEXT: local.get 0
2501 ; CHECK-NEXT: i32.const 16
2502 ; CHECK-NEXT: i32.add
2503 ; CHECK-NEXT: i64x2.load32x2_u 0
2504 ; CHECK-NEXT: # fallthrough-return
2505 %q = ptrtoint ptr %p to i32
2506 %r = add nsw i32 %q, 16
2507 %s = inttoptr i32 %r to ptr
2508 %v = load <2 x i32>, ptr %s
2509 %v2 = zext <2 x i32> %v to <2 x i64>
2513 define <2 x i32> @load_ext_v2i64_with_unfolded_offset(ptr %p) {
2514 ; CHECK-LABEL: load_ext_v2i64_with_unfolded_offset:
2515 ; CHECK: .functype load_ext_v2i64_with_unfolded_offset (i32) -> (v128)
2516 ; CHECK-NEXT: # %bb.0:
2517 ; CHECK-NEXT: local.get 0
2518 ; CHECK-NEXT: i32.const 16
2519 ; CHECK-NEXT: i32.add
2520 ; CHECK-NEXT: v128.load64_zero 0
2521 ; CHECK-NEXT: # fallthrough-return
2522 %q = ptrtoint ptr %p to i32
2523 %r = add nsw i32 %q, 16
2524 %s = inttoptr i32 %r to ptr
2525 %v = load <2 x i32>, ptr %s
2529 define <2 x i64> @load_v2i64_with_unfolded_gep_offset(ptr %p) {
2530 ; CHECK-LABEL: load_v2i64_with_unfolded_gep_offset:
2531 ; CHECK: .functype load_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2532 ; CHECK-NEXT: # %bb.0:
2533 ; CHECK-NEXT: local.get 0
2534 ; CHECK-NEXT: i32.const 16
2535 ; CHECK-NEXT: i32.add
2536 ; CHECK-NEXT: v128.load 0
2537 ; CHECK-NEXT: # fallthrough-return
2538 %s = getelementptr <2 x i64>, ptr %p, i32 1
2539 %v = load <2 x i64>, ptr %s
2543 define <2 x i64> @load_splat_v2i64_with_unfolded_gep_offset(ptr %p) {
2544 ; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_offset:
2545 ; CHECK: .functype load_splat_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2546 ; CHECK-NEXT: # %bb.0:
2547 ; CHECK-NEXT: local.get 0
2548 ; CHECK-NEXT: i32.const 8
2549 ; CHECK-NEXT: i32.add
2550 ; CHECK-NEXT: v128.load64_splat 0
2551 ; CHECK-NEXT: # fallthrough-return
2552 %s = getelementptr i64, ptr %p, i32 1
2553 %e = load i64, ptr %s
2554 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2555 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2559 define <2 x i64> @load_sext_v2i64_with_unfolded_gep_offset(ptr %p) {
2560 ; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_offset:
2561 ; CHECK: .functype load_sext_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2562 ; CHECK-NEXT: # %bb.0:
2563 ; CHECK-NEXT: local.get 0
2564 ; CHECK-NEXT: i32.const 8
2565 ; CHECK-NEXT: i32.add
2566 ; CHECK-NEXT: i64x2.load32x2_s 0
2567 ; CHECK-NEXT: # fallthrough-return
2568 %s = getelementptr <2 x i32>, ptr %p, i32 1
2569 %v = load <2 x i32>, ptr %s
2570 %v2 = sext <2 x i32> %v to <2 x i64>
2574 define <2 x i64> @load_zext_v2i64_with_unfolded_gep_offset(ptr %p) {
2575 ; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_offset:
2576 ; CHECK: .functype load_zext_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2577 ; CHECK-NEXT: # %bb.0:
2578 ; CHECK-NEXT: local.get 0
2579 ; CHECK-NEXT: i32.const 8
2580 ; CHECK-NEXT: i32.add
2581 ; CHECK-NEXT: i64x2.load32x2_u 0
2582 ; CHECK-NEXT: # fallthrough-return
2583 %s = getelementptr <2 x i32>, ptr %p, i32 1
2584 %v = load <2 x i32>, ptr %s
2585 %v2 = zext <2 x i32> %v to <2 x i64>
2589 define <2 x i32> @load_ext_v2i64_with_unfolded_gep_offset(ptr %p) {
2590 ; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_offset:
2591 ; CHECK: .functype load_ext_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2592 ; CHECK-NEXT: # %bb.0:
2593 ; CHECK-NEXT: local.get 0
2594 ; CHECK-NEXT: i32.const 8
2595 ; CHECK-NEXT: i32.add
2596 ; CHECK-NEXT: v128.load64_zero 0
2597 ; CHECK-NEXT: # fallthrough-return
2598 %s = getelementptr <2 x i32>, ptr %p, i32 1
2599 %v = load <2 x i32>, ptr %s
2603 define <2 x i64> @load_v2i64_from_numeric_address() {
2604 ; CHECK-LABEL: load_v2i64_from_numeric_address:
2605 ; CHECK: .functype load_v2i64_from_numeric_address () -> (v128)
2606 ; CHECK-NEXT: # %bb.0:
2607 ; CHECK-NEXT: i32.const 0
2608 ; CHECK-NEXT: v128.load 32
2609 ; CHECK-NEXT: # fallthrough-return
2610 %s = inttoptr i32 32 to ptr
2611 %v = load <2 x i64>, ptr %s
2615 define <2 x i64> @load_splat_v2i64_from_numeric_address() {
2616 ; CHECK-LABEL: load_splat_v2i64_from_numeric_address:
2617 ; CHECK: .functype load_splat_v2i64_from_numeric_address () -> (v128)
2618 ; CHECK-NEXT: # %bb.0:
2619 ; CHECK-NEXT: i32.const 0
2620 ; CHECK-NEXT: v128.load64_splat 32
2621 ; CHECK-NEXT: # fallthrough-return
2622 %s = inttoptr i32 32 to ptr
2623 %e = load i64, ptr %s
2624 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2625 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2629 define <2 x i64> @load_sext_v2i64_from_numeric_address() {
2630 ; CHECK-LABEL: load_sext_v2i64_from_numeric_address:
2631 ; CHECK: .functype load_sext_v2i64_from_numeric_address () -> (v128)
2632 ; CHECK-NEXT: # %bb.0:
2633 ; CHECK-NEXT: i32.const 0
2634 ; CHECK-NEXT: i64x2.load32x2_s 32
2635 ; CHECK-NEXT: # fallthrough-return
2636 %s = inttoptr i32 32 to ptr
2637 %v = load <2 x i32>, ptr %s
2638 %v2 = sext <2 x i32> %v to <2 x i64>
2642 define <2 x i64> @load_zext_v2i64_from_numeric_address() {
2643 ; CHECK-LABEL: load_zext_v2i64_from_numeric_address:
2644 ; CHECK: .functype load_zext_v2i64_from_numeric_address () -> (v128)
2645 ; CHECK-NEXT: # %bb.0:
2646 ; CHECK-NEXT: i32.const 0
2647 ; CHECK-NEXT: i64x2.load32x2_u 32
2648 ; CHECK-NEXT: # fallthrough-return
2649 %s = inttoptr i32 32 to ptr
2650 %v = load <2 x i32>, ptr %s
2651 %v2 = zext <2 x i32> %v to <2 x i64>
2655 define <2 x i32> @load_ext_v2i64_from_numeric_address() {
2656 ; CHECK-LABEL: load_ext_v2i64_from_numeric_address:
2657 ; CHECK: .functype load_ext_v2i64_from_numeric_address () -> (v128)
2658 ; CHECK-NEXT: # %bb.0:
2659 ; CHECK-NEXT: i32.const 0
2660 ; CHECK-NEXT: v128.load64_zero 32
2661 ; CHECK-NEXT: # fallthrough-return
2662 %s = inttoptr i32 32 to ptr
2663 %v = load <2 x i32>, ptr %s
2667 @gv_v2i64 = global <2 x i64> <i64 42, i64 42>
2668 define <2 x i64> @load_v2i64_from_global_address() {
2669 ; CHECK-LABEL: load_v2i64_from_global_address:
2670 ; CHECK: .functype load_v2i64_from_global_address () -> (v128)
2671 ; CHECK-NEXT: # %bb.0:
2672 ; CHECK-NEXT: i32.const 0
2673 ; CHECK-NEXT: v128.load gv_v2i64
2674 ; CHECK-NEXT: # fallthrough-return
2675 %v = load <2 x i64>, ptr @gv_v2i64
2679 @gv_i64 = global i64 42
2680 define <2 x i64> @load_splat_v2i64_from_global_address() {
2681 ; CHECK-LABEL: load_splat_v2i64_from_global_address:
2682 ; CHECK: .functype load_splat_v2i64_from_global_address () -> (v128)
2683 ; CHECK-NEXT: # %bb.0:
2684 ; CHECK-NEXT: i32.const 0
2685 ; CHECK-NEXT: v128.load64_splat gv_i64
2686 ; CHECK-NEXT: # fallthrough-return
2687 %e = load i64, ptr @gv_i64
2688 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2689 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2693 @gv_v2i32 = global <2 x i32> <i32 42, i32 42>
2694 define <2 x i64> @load_sext_v2i64_from_global_address() {
2695 ; CHECK-LABEL: load_sext_v2i64_from_global_address:
2696 ; CHECK: .functype load_sext_v2i64_from_global_address () -> (v128)
2697 ; CHECK-NEXT: # %bb.0:
2698 ; CHECK-NEXT: i32.const 0
2699 ; CHECK-NEXT: i64x2.load32x2_s gv_v2i32
2700 ; CHECK-NEXT: # fallthrough-return
2701 %v = load <2 x i32>, ptr @gv_v2i32
2702 %v2 = sext <2 x i32> %v to <2 x i64>
2706 define <2 x i64> @load_zext_v2i64_from_global_address() {
2707 ; CHECK-LABEL: load_zext_v2i64_from_global_address:
2708 ; CHECK: .functype load_zext_v2i64_from_global_address () -> (v128)
2709 ; CHECK-NEXT: # %bb.0:
2710 ; CHECK-NEXT: i32.const 0
2711 ; CHECK-NEXT: i64x2.load32x2_u gv_v2i32
2712 ; CHECK-NEXT: # fallthrough-return
2713 %v = load <2 x i32>, ptr @gv_v2i32
2714 %v2 = zext <2 x i32> %v to <2 x i64>
2718 define <2 x i32> @load_ext_v2i64_from_global_address() {
2719 ; CHECK-LABEL: load_ext_v2i64_from_global_address:
2720 ; CHECK: .functype load_ext_v2i64_from_global_address () -> (v128)
2721 ; CHECK-NEXT: # %bb.0:
2722 ; CHECK-NEXT: i32.const 0
2723 ; CHECK-NEXT: v128.load64_zero gv_v2i32
2724 ; CHECK-NEXT: # fallthrough-return
2725 %v = load <2 x i32>, ptr @gv_v2i32
2729 define void @store_v2i64(<2 x i64> %v, ptr %p) {
2730 ; CHECK-LABEL: store_v2i64:
2731 ; CHECK: .functype store_v2i64 (v128, i32) -> ()
2732 ; CHECK-NEXT: # %bb.0:
2733 ; CHECK-NEXT: local.get 1
2734 ; CHECK-NEXT: local.get 0
2735 ; CHECK-NEXT: v128.store 0
2736 ; CHECK-NEXT: # fallthrough-return
2737 store <2 x i64> %v , ptr %p
2741 define void @store_v2i64_with_folded_offset(<2 x i64> %v, ptr %p) {
2742 ; CHECK-LABEL: store_v2i64_with_folded_offset:
2743 ; CHECK: .functype store_v2i64_with_folded_offset (v128, i32) -> ()
2744 ; CHECK-NEXT: # %bb.0:
2745 ; CHECK-NEXT: local.get 1
2746 ; CHECK-NEXT: local.get 0
2747 ; CHECK-NEXT: v128.store 16
2748 ; CHECK-NEXT: # fallthrough-return
2749 %q = ptrtoint ptr %p to i32
2750 %r = add nuw i32 %q, 16
2751 %s = inttoptr i32 %r to ptr
2752 store <2 x i64> %v , ptr %s
2756 define void @store_v2i64_with_folded_gep_offset(<2 x i64> %v, ptr %p) {
2757 ; CHECK-LABEL: store_v2i64_with_folded_gep_offset:
2758 ; CHECK: .functype store_v2i64_with_folded_gep_offset (v128, i32) -> ()
2759 ; CHECK-NEXT: # %bb.0:
2760 ; CHECK-NEXT: local.get 1
2761 ; CHECK-NEXT: local.get 0
2762 ; CHECK-NEXT: v128.store 16
2763 ; CHECK-NEXT: # fallthrough-return
2764 %s = getelementptr inbounds <2 x i64>, ptr %p, i32 1
2765 store <2 x i64> %v , ptr %s
2769 define void @store_v2i64_with_unfolded_gep_negative_offset(<2 x i64> %v, ptr %p) {
2770 ; CHECK-LABEL: store_v2i64_with_unfolded_gep_negative_offset:
2771 ; CHECK: .functype store_v2i64_with_unfolded_gep_negative_offset (v128, i32) -> ()
2772 ; CHECK-NEXT: # %bb.0:
2773 ; CHECK-NEXT: local.get 1
2774 ; CHECK-NEXT: i32.const -16
2775 ; CHECK-NEXT: i32.add
2776 ; CHECK-NEXT: local.get 0
2777 ; CHECK-NEXT: v128.store 0
2778 ; CHECK-NEXT: # fallthrough-return
2779 %s = getelementptr inbounds <2 x i64>, ptr %p, i32 -1
2780 store <2 x i64> %v , ptr %s
2784 define void @store_v2i64_with_unfolded_offset(<2 x i64> %v, ptr %p) {
2785 ; CHECK-LABEL: store_v2i64_with_unfolded_offset:
2786 ; CHECK: .functype store_v2i64_with_unfolded_offset (v128, i32) -> ()
2787 ; CHECK-NEXT: # %bb.0:
2788 ; CHECK-NEXT: local.get 1
2789 ; CHECK-NEXT: i32.const 16
2790 ; CHECK-NEXT: i32.add
2791 ; CHECK-NEXT: local.get 0
2792 ; CHECK-NEXT: v128.store 0
2793 ; CHECK-NEXT: # fallthrough-return
2794 %q = ptrtoint ptr %p to i32
2795 %r = add nsw i32 %q, 16
2796 %s = inttoptr i32 %r to ptr
2797 store <2 x i64> %v , ptr %s
2801 define void @store_v2i64_with_unfolded_gep_offset(<2 x i64> %v, ptr %p) {
2802 ; CHECK-LABEL: store_v2i64_with_unfolded_gep_offset:
2803 ; CHECK: .functype store_v2i64_with_unfolded_gep_offset (v128, i32) -> ()
2804 ; CHECK-NEXT: # %bb.0:
2805 ; CHECK-NEXT: local.get 1
2806 ; CHECK-NEXT: i32.const 16
2807 ; CHECK-NEXT: i32.add
2808 ; CHECK-NEXT: local.get 0
2809 ; CHECK-NEXT: v128.store 0
2810 ; CHECK-NEXT: # fallthrough-return
2811 %s = getelementptr <2 x i64>, ptr %p, i32 1
2812 store <2 x i64> %v , ptr %s
2816 define void @store_v2i64_to_numeric_address(<2 x i64> %v) {
2817 ; CHECK-LABEL: store_v2i64_to_numeric_address:
2818 ; CHECK: .functype store_v2i64_to_numeric_address (v128) -> ()
2819 ; CHECK-NEXT: # %bb.0:
2820 ; CHECK-NEXT: i32.const 0
2821 ; CHECK-NEXT: local.get 0
2822 ; CHECK-NEXT: v128.store 32
2823 ; CHECK-NEXT: # fallthrough-return
2824 %s = inttoptr i32 32 to ptr
2825 store <2 x i64> %v , ptr %s
2829 define void @store_v2i64_to_global_address(<2 x i64> %v) {
2830 ; CHECK-LABEL: store_v2i64_to_global_address:
2831 ; CHECK: .functype store_v2i64_to_global_address (v128) -> ()
2832 ; CHECK-NEXT: # %bb.0:
2833 ; CHECK-NEXT: i32.const 0
2834 ; CHECK-NEXT: local.get 0
2835 ; CHECK-NEXT: v128.store gv_v2i64
2836 ; CHECK-NEXT: # fallthrough-return
2837 store <2 x i64> %v , ptr @gv_v2i64
2841 ; ==============================================================================
2843 ; ==============================================================================
2844 define <4 x float> @load_v4f32(ptr %p) {
2845 ; CHECK-LABEL: load_v4f32:
2846 ; CHECK: .functype load_v4f32 (i32) -> (v128)
2847 ; CHECK-NEXT: # %bb.0:
2848 ; CHECK-NEXT: local.get 0
2849 ; CHECK-NEXT: v128.load 0
2850 ; CHECK-NEXT: # fallthrough-return
2851 %v = load <4 x float>, ptr %p
2855 define <4 x float> @load_splat_v4f32(ptr %p) {
2856 ; CHECK-LABEL: load_splat_v4f32:
2857 ; CHECK: .functype load_splat_v4f32 (i32) -> (v128)
2858 ; CHECK-NEXT: # %bb.0:
2859 ; CHECK-NEXT: local.get 0
2860 ; CHECK-NEXT: v128.load32_splat 0
2861 ; CHECK-NEXT: # fallthrough-return
2862 %e = load float, ptr %p
2863 %v1 = insertelement <4 x float> undef, float %e, i32 0
2864 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2868 define <4 x float> @load_v4f32_with_folded_offset(ptr %p) {
2869 ; CHECK-LABEL: load_v4f32_with_folded_offset:
2870 ; CHECK: .functype load_v4f32_with_folded_offset (i32) -> (v128)
2871 ; CHECK-NEXT: # %bb.0:
2872 ; CHECK-NEXT: local.get 0
2873 ; CHECK-NEXT: v128.load 16
2874 ; CHECK-NEXT: # fallthrough-return
2875 %q = ptrtoint ptr %p to i32
2876 %r = add nuw i32 %q, 16
2877 %s = inttoptr i32 %r to ptr
2878 %v = load <4 x float>, ptr %s
2882 define <4 x float> @load_splat_v4f32_with_folded_offset(ptr %p) {
2883 ; CHECK-LABEL: load_splat_v4f32_with_folded_offset:
2884 ; CHECK: .functype load_splat_v4f32_with_folded_offset (i32) -> (v128)
2885 ; CHECK-NEXT: # %bb.0:
2886 ; CHECK-NEXT: local.get 0
2887 ; CHECK-NEXT: v128.load32_splat 16
2888 ; CHECK-NEXT: # fallthrough-return
2889 %q = ptrtoint ptr %p to i32
2890 %r = add nuw i32 %q, 16
2891 %s = inttoptr i32 %r to ptr
2892 %e = load float, ptr %s
2893 %v1 = insertelement <4 x float> undef, float %e, i32 0
2894 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2898 define <4 x float> @load_v4f32_with_folded_gep_offset(ptr %p) {
2899 ; CHECK-LABEL: load_v4f32_with_folded_gep_offset:
2900 ; CHECK: .functype load_v4f32_with_folded_gep_offset (i32) -> (v128)
2901 ; CHECK-NEXT: # %bb.0:
2902 ; CHECK-NEXT: local.get 0
2903 ; CHECK-NEXT: v128.load 16
2904 ; CHECK-NEXT: # fallthrough-return
2905 %s = getelementptr inbounds <4 x float>, ptr %p, i32 1
2906 %v = load <4 x float>, ptr %s
2910 define <4 x float> @load_splat_v4f32_with_folded_gep_offset(ptr %p) {
2911 ; CHECK-LABEL: load_splat_v4f32_with_folded_gep_offset:
2912 ; CHECK: .functype load_splat_v4f32_with_folded_gep_offset (i32) -> (v128)
2913 ; CHECK-NEXT: # %bb.0:
2914 ; CHECK-NEXT: local.get 0
2915 ; CHECK-NEXT: v128.load32_splat 4
2916 ; CHECK-NEXT: # fallthrough-return
2917 %s = getelementptr inbounds float, ptr %p, i32 1
2918 %e = load float, ptr %s
2919 %v1 = insertelement <4 x float> undef, float %e, i32 0
2920 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2924 define <4 x float> @load_v4f32_with_unfolded_gep_negative_offset(ptr %p) {
2925 ; CHECK-LABEL: load_v4f32_with_unfolded_gep_negative_offset:
2926 ; CHECK: .functype load_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128)
2927 ; CHECK-NEXT: # %bb.0:
2928 ; CHECK-NEXT: local.get 0
2929 ; CHECK-NEXT: i32.const -16
2930 ; CHECK-NEXT: i32.add
2931 ; CHECK-NEXT: v128.load 0
2932 ; CHECK-NEXT: # fallthrough-return
2933 %s = getelementptr inbounds <4 x float>, ptr %p, i32 -1
2934 %v = load <4 x float>, ptr %s
2938 define <4 x float> @load_splat_v4f32_with_unfolded_gep_negative_offset(ptr %p) {
2939 ; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_negative_offset:
2940 ; CHECK: .functype load_splat_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128)
2941 ; CHECK-NEXT: # %bb.0:
2942 ; CHECK-NEXT: local.get 0
2943 ; CHECK-NEXT: i32.const -4
2944 ; CHECK-NEXT: i32.add
2945 ; CHECK-NEXT: v128.load32_splat 0
2946 ; CHECK-NEXT: # fallthrough-return
2947 %s = getelementptr inbounds float, ptr %p, i32 -1
2948 %e = load float, ptr %s
2949 %v1 = insertelement <4 x float> undef, float %e, i32 0
2950 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2954 define <4 x float> @load_v4f32_with_unfolded_offset(ptr %p) {
2955 ; CHECK-LABEL: load_v4f32_with_unfolded_offset:
2956 ; CHECK: .functype load_v4f32_with_unfolded_offset (i32) -> (v128)
2957 ; CHECK-NEXT: # %bb.0:
2958 ; CHECK-NEXT: local.get 0
2959 ; CHECK-NEXT: i32.const 16
2960 ; CHECK-NEXT: i32.add
2961 ; CHECK-NEXT: v128.load 0
2962 ; CHECK-NEXT: # fallthrough-return
2963 %q = ptrtoint ptr %p to i32
2964 %r = add nsw i32 %q, 16
2965 %s = inttoptr i32 %r to ptr
2966 %v = load <4 x float>, ptr %s
2970 define <4 x float> @load_splat_v4f32_with_unfolded_offset(ptr %p) {
2971 ; CHECK-LABEL: load_splat_v4f32_with_unfolded_offset:
2972 ; CHECK: .functype load_splat_v4f32_with_unfolded_offset (i32) -> (v128)
2973 ; CHECK-NEXT: # %bb.0:
2974 ; CHECK-NEXT: local.get 0
2975 ; CHECK-NEXT: i32.const 16
2976 ; CHECK-NEXT: i32.add
2977 ; CHECK-NEXT: v128.load32_splat 0
2978 ; CHECK-NEXT: # fallthrough-return
2979 %q = ptrtoint ptr %p to i32
2980 %r = add nsw i32 %q, 16
2981 %s = inttoptr i32 %r to ptr
2982 %e = load float, ptr %s
2983 %v1 = insertelement <4 x float> undef, float %e, i32 0
2984 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2988 define <4 x float> @load_v4f32_with_unfolded_gep_offset(ptr %p) {
2989 ; CHECK-LABEL: load_v4f32_with_unfolded_gep_offset:
2990 ; CHECK: .functype load_v4f32_with_unfolded_gep_offset (i32) -> (v128)
2991 ; CHECK-NEXT: # %bb.0:
2992 ; CHECK-NEXT: local.get 0
2993 ; CHECK-NEXT: i32.const 16
2994 ; CHECK-NEXT: i32.add
2995 ; CHECK-NEXT: v128.load 0
2996 ; CHECK-NEXT: # fallthrough-return
2997 %s = getelementptr <4 x float>, ptr %p, i32 1
2998 %v = load <4 x float>, ptr %s
3002 define <4 x float> @load_splat_v4f32_with_unfolded_gep_offset(ptr %p) {
3003 ; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_offset:
3004 ; CHECK: .functype load_splat_v4f32_with_unfolded_gep_offset (i32) -> (v128)
3005 ; CHECK-NEXT: # %bb.0:
3006 ; CHECK-NEXT: local.get 0
3007 ; CHECK-NEXT: i32.const 4
3008 ; CHECK-NEXT: i32.add
3009 ; CHECK-NEXT: v128.load32_splat 0
3010 ; CHECK-NEXT: # fallthrough-return
3011 %s = getelementptr float, ptr %p, i32 1
3012 %e = load float, ptr %s
3013 %v1 = insertelement <4 x float> undef, float %e, i32 0
3014 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
3018 define <4 x float> @load_v4f32_from_numeric_address() {
3019 ; CHECK-LABEL: load_v4f32_from_numeric_address:
3020 ; CHECK: .functype load_v4f32_from_numeric_address () -> (v128)
3021 ; CHECK-NEXT: # %bb.0:
3022 ; CHECK-NEXT: i32.const 0
3023 ; CHECK-NEXT: v128.load 32
3024 ; CHECK-NEXT: # fallthrough-return
3025 %s = inttoptr i32 32 to ptr
3026 %v = load <4 x float>, ptr %s
3030 define <4 x float> @load_splat_v4f32_from_numeric_address() {
3031 ; CHECK-LABEL: load_splat_v4f32_from_numeric_address:
3032 ; CHECK: .functype load_splat_v4f32_from_numeric_address () -> (v128)
3033 ; CHECK-NEXT: # %bb.0:
3034 ; CHECK-NEXT: i32.const 0
3035 ; CHECK-NEXT: v128.load32_splat 32
3036 ; CHECK-NEXT: # fallthrough-return
3037 %s = inttoptr i32 32 to ptr
3038 %e = load float, ptr %s
3039 %v1 = insertelement <4 x float> undef, float %e, i32 0
3040 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
3044 @gv_v4f32 = global <4 x float> <float 42., float 42., float 42., float 42.>
3045 define <4 x float> @load_v4f32_from_global_address() {
3046 ; CHECK-LABEL: load_v4f32_from_global_address:
3047 ; CHECK: .functype load_v4f32_from_global_address () -> (v128)
3048 ; CHECK-NEXT: # %bb.0:
3049 ; CHECK-NEXT: i32.const 0
3050 ; CHECK-NEXT: v128.load gv_v4f32
3051 ; CHECK-NEXT: # fallthrough-return
3052 %v = load <4 x float>, ptr @gv_v4f32
3056 @gv_f32 = global float 42.
3057 define <4 x float> @load_splat_v4f32_from_global_address() {
3058 ; CHECK-LABEL: load_splat_v4f32_from_global_address:
3059 ; CHECK: .functype load_splat_v4f32_from_global_address () -> (v128)
3060 ; CHECK-NEXT: # %bb.0:
3061 ; CHECK-NEXT: i32.const 0
3062 ; CHECK-NEXT: v128.load32_splat gv_f32
3063 ; CHECK-NEXT: # fallthrough-return
3064 %e = load float, ptr @gv_f32
3065 %v1 = insertelement <4 x float> undef, float %e, i32 0
3066 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
3070 define void @store_v4f32(<4 x float> %v, ptr %p) {
3071 ; CHECK-LABEL: store_v4f32:
3072 ; CHECK: .functype store_v4f32 (v128, i32) -> ()
3073 ; CHECK-NEXT: # %bb.0:
3074 ; CHECK-NEXT: local.get 1
3075 ; CHECK-NEXT: local.get 0
3076 ; CHECK-NEXT: v128.store 0
3077 ; CHECK-NEXT: # fallthrough-return
3078 store <4 x float> %v , ptr %p
3082 define void @store_v4f32_with_folded_offset(<4 x float> %v, ptr %p) {
3083 ; CHECK-LABEL: store_v4f32_with_folded_offset:
3084 ; CHECK: .functype store_v4f32_with_folded_offset (v128, i32) -> ()
3085 ; CHECK-NEXT: # %bb.0:
3086 ; CHECK-NEXT: local.get 1
3087 ; CHECK-NEXT: local.get 0
3088 ; CHECK-NEXT: v128.store 16
3089 ; CHECK-NEXT: # fallthrough-return
3090 %q = ptrtoint ptr %p to i32
3091 %r = add nuw i32 %q, 16
3092 %s = inttoptr i32 %r to ptr
3093 store <4 x float> %v , ptr %s
3097 define void @store_v4f32_with_folded_gep_offset(<4 x float> %v, ptr %p) {
3098 ; CHECK-LABEL: store_v4f32_with_folded_gep_offset:
3099 ; CHECK: .functype store_v4f32_with_folded_gep_offset (v128, i32) -> ()
3100 ; CHECK-NEXT: # %bb.0:
3101 ; CHECK-NEXT: local.get 1
3102 ; CHECK-NEXT: local.get 0
3103 ; CHECK-NEXT: v128.store 16
3104 ; CHECK-NEXT: # fallthrough-return
3105 %s = getelementptr inbounds <4 x float>, ptr %p, i32 1
3106 store <4 x float> %v , ptr %s
3110 define void @store_v4f32_with_unfolded_gep_negative_offset(<4 x float> %v, ptr %p) {
3111 ; CHECK-LABEL: store_v4f32_with_unfolded_gep_negative_offset:
3112 ; CHECK: .functype store_v4f32_with_unfolded_gep_negative_offset (v128, i32) -> ()
3113 ; CHECK-NEXT: # %bb.0:
3114 ; CHECK-NEXT: local.get 1
3115 ; CHECK-NEXT: i32.const -16
3116 ; CHECK-NEXT: i32.add
3117 ; CHECK-NEXT: local.get 0
3118 ; CHECK-NEXT: v128.store 0
3119 ; CHECK-NEXT: # fallthrough-return
3120 %s = getelementptr inbounds <4 x float>, ptr %p, i32 -1
3121 store <4 x float> %v , ptr %s
3125 define void @store_v4f32_with_unfolded_offset(<4 x float> %v, ptr %p) {
3126 ; CHECK-LABEL: store_v4f32_with_unfolded_offset:
3127 ; CHECK: .functype store_v4f32_with_unfolded_offset (v128, i32) -> ()
3128 ; CHECK-NEXT: # %bb.0:
3129 ; CHECK-NEXT: local.get 1
3130 ; CHECK-NEXT: i32.const 16
3131 ; CHECK-NEXT: i32.add
3132 ; CHECK-NEXT: local.get 0
3133 ; CHECK-NEXT: v128.store 0
3134 ; CHECK-NEXT: # fallthrough-return
3135 %q = ptrtoint ptr %p to i32
3136 %r = add nsw i32 %q, 16
3137 %s = inttoptr i32 %r to ptr
3138 store <4 x float> %v , ptr %s
3142 define void @store_v4f32_with_unfolded_gep_offset(<4 x float> %v, ptr %p) {
3143 ; CHECK-LABEL: store_v4f32_with_unfolded_gep_offset:
3144 ; CHECK: .functype store_v4f32_with_unfolded_gep_offset (v128, i32) -> ()
3145 ; CHECK-NEXT: # %bb.0:
3146 ; CHECK-NEXT: local.get 1
3147 ; CHECK-NEXT: i32.const 16
3148 ; CHECK-NEXT: i32.add
3149 ; CHECK-NEXT: local.get 0
3150 ; CHECK-NEXT: v128.store 0
3151 ; CHECK-NEXT: # fallthrough-return
3152 %s = getelementptr <4 x float>, ptr %p, i32 1
3153 store <4 x float> %v , ptr %s
3157 define void @store_v4f32_to_numeric_address(<4 x float> %v) {
3158 ; CHECK-LABEL: store_v4f32_to_numeric_address:
3159 ; CHECK: .functype store_v4f32_to_numeric_address (v128) -> ()
3160 ; CHECK-NEXT: # %bb.0:
3161 ; CHECK-NEXT: i32.const 0
3162 ; CHECK-NEXT: local.get 0
3163 ; CHECK-NEXT: v128.store 32
3164 ; CHECK-NEXT: # fallthrough-return
3165 %s = inttoptr i32 32 to ptr
3166 store <4 x float> %v , ptr %s
3170 define void @store_v4f32_to_global_address(<4 x float> %v) {
3171 ; CHECK-LABEL: store_v4f32_to_global_address:
3172 ; CHECK: .functype store_v4f32_to_global_address (v128) -> ()
3173 ; CHECK-NEXT: # %bb.0:
3174 ; CHECK-NEXT: i32.const 0
3175 ; CHECK-NEXT: local.get 0
3176 ; CHECK-NEXT: v128.store gv_v4f32
3177 ; CHECK-NEXT: # fallthrough-return
3178 store <4 x float> %v , ptr @gv_v4f32
3182 ; ==============================================================================
3184 ; ==============================================================================
3185 define <2 x double> @load_v2f64(ptr %p) {
3186 ; CHECK-LABEL: load_v2f64:
3187 ; CHECK: .functype load_v2f64 (i32) -> (v128)
3188 ; CHECK-NEXT: # %bb.0:
3189 ; CHECK-NEXT: local.get 0
3190 ; CHECK-NEXT: v128.load 0
3191 ; CHECK-NEXT: # fallthrough-return
3192 %v = load <2 x double>, ptr %p
3196 define <2 x double> @load_splat_v2f64(ptr %p) {
3197 ; CHECK-LABEL: load_splat_v2f64:
3198 ; CHECK: .functype load_splat_v2f64 (i32) -> (v128)
3199 ; CHECK-NEXT: # %bb.0:
3200 ; CHECK-NEXT: local.get 0
3201 ; CHECK-NEXT: v128.load64_splat 0
3202 ; CHECK-NEXT: # fallthrough-return
3203 %e = load double, ptr %p
3204 %v1 = insertelement <2 x double> undef, double %e, i32 0
3205 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3206 ret <2 x double> %v2
3209 define <2 x double> @load_promote_v2f64(ptr %p) {
3210 ; CHECK-LABEL: load_promote_v2f64:
3211 ; CHECK: .functype load_promote_v2f64 (i32) -> (v128)
3212 ; CHECK-NEXT: # %bb.0:
3213 ; CHECK-NEXT: local.get 0
3214 ; CHECK-NEXT: v128.load64_zero 0
3215 ; CHECK-NEXT: f64x2.promote_low_f32x4
3216 ; CHECK-NEXT: # fallthrough-return
3217 %e = load <2 x float>, ptr %p
3218 %v = fpext <2 x float> %e to <2 x double>
3222 define <2 x double> @load_v2f64_with_folded_offset(ptr %p) {
3223 ; CHECK-LABEL: load_v2f64_with_folded_offset:
3224 ; CHECK: .functype load_v2f64_with_folded_offset (i32) -> (v128)
3225 ; CHECK-NEXT: # %bb.0:
3226 ; CHECK-NEXT: local.get 0
3227 ; CHECK-NEXT: v128.load 16
3228 ; CHECK-NEXT: # fallthrough-return
3229 %q = ptrtoint ptr %p to i32
3230 %r = add nuw i32 %q, 16
3231 %s = inttoptr i32 %r to ptr
3232 %v = load <2 x double>, ptr %s
3236 define <2 x double> @load_splat_v2f64_with_folded_offset(ptr %p) {
3237 ; CHECK-LABEL: load_splat_v2f64_with_folded_offset:
3238 ; CHECK: .functype load_splat_v2f64_with_folded_offset (i32) -> (v128)
3239 ; CHECK-NEXT: # %bb.0:
3240 ; CHECK-NEXT: local.get 0
3241 ; CHECK-NEXT: v128.load64_splat 16
3242 ; CHECK-NEXT: # fallthrough-return
3243 %q = ptrtoint ptr %p to i32
3244 %r = add nuw i32 %q, 16
3245 %s = inttoptr i32 %r to ptr
3246 %e = load double, ptr %s
3247 %v1 = insertelement <2 x double> undef, double %e, i32 0
3248 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3249 ret <2 x double> %v2
3252 define <2 x double> @load_promote_v2f64_with_folded_offset(ptr %p) {
3253 ; CHECK-LABEL: load_promote_v2f64_with_folded_offset:
3254 ; CHECK: .functype load_promote_v2f64_with_folded_offset (i32) -> (v128)
3255 ; CHECK-NEXT: # %bb.0:
3256 ; CHECK-NEXT: local.get 0
3257 ; CHECK-NEXT: i32.const 16
3258 ; CHECK-NEXT: i32.add
3259 ; CHECK-NEXT: v128.load64_zero 0
3260 ; CHECK-NEXT: f64x2.promote_low_f32x4
3261 ; CHECK-NEXT: # fallthrough-return
3262 %q = ptrtoint ptr %p to i32
3263 %r = add nuw i32 %q, 16
3264 %s = inttoptr i32 %r to ptr
3265 %e = load <2 x float>, ptr %s
3266 %v = fpext <2 x float> %e to <2 x double>
3270 define <2 x double> @load_v2f64_with_folded_gep_offset(ptr %p) {
3271 ; CHECK-LABEL: load_v2f64_with_folded_gep_offset:
3272 ; CHECK: .functype load_v2f64_with_folded_gep_offset (i32) -> (v128)
3273 ; CHECK-NEXT: # %bb.0:
3274 ; CHECK-NEXT: local.get 0
3275 ; CHECK-NEXT: v128.load 16
3276 ; CHECK-NEXT: # fallthrough-return
3277 %s = getelementptr inbounds <2 x double>, ptr %p, i32 1
3278 %v = load <2 x double>, ptr %s
3282 define <2 x double> @load_splat_v2f64_with_folded_gep_offset(ptr %p) {
3283 ; CHECK-LABEL: load_splat_v2f64_with_folded_gep_offset:
3284 ; CHECK: .functype load_splat_v2f64_with_folded_gep_offset (i32) -> (v128)
3285 ; CHECK-NEXT: # %bb.0:
3286 ; CHECK-NEXT: local.get 0
3287 ; CHECK-NEXT: v128.load64_splat 8
3288 ; CHECK-NEXT: # fallthrough-return
3289 %s = getelementptr inbounds double, ptr %p, i32 1
3290 %e = load double, ptr %s
3291 %v1 = insertelement <2 x double> undef, double %e, i32 0
3292 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3293 ret <2 x double> %v2
3296 define <2 x double> @load_promote_v2f64_with_folded_gep_offset(ptr %p) {
3297 ; CHECK-LABEL: load_promote_v2f64_with_folded_gep_offset:
3298 ; CHECK: .functype load_promote_v2f64_with_folded_gep_offset (i32) -> (v128)
3299 ; CHECK-NEXT: # %bb.0:
3300 ; CHECK-NEXT: local.get 0
3301 ; CHECK-NEXT: i32.const 8
3302 ; CHECK-NEXT: i32.add
3303 ; CHECK-NEXT: v128.load64_zero 0
3304 ; CHECK-NEXT: f64x2.promote_low_f32x4
3305 ; CHECK-NEXT: # fallthrough-return
3306 %s = getelementptr inbounds <2 x float>, ptr %p, i32 1
3307 %e = load <2 x float>, ptr %s
3308 %v = fpext <2 x float> %e to <2 x double>
3312 define <2 x double> @load_v2f64_with_unfolded_gep_negative_offset(ptr %p) {
3313 ; CHECK-LABEL: load_v2f64_with_unfolded_gep_negative_offset:
3314 ; CHECK: .functype load_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128)
3315 ; CHECK-NEXT: # %bb.0:
3316 ; CHECK-NEXT: local.get 0
3317 ; CHECK-NEXT: i32.const -16
3318 ; CHECK-NEXT: i32.add
3319 ; CHECK-NEXT: v128.load 0
3320 ; CHECK-NEXT: # fallthrough-return
3321 %s = getelementptr inbounds <2 x double>, ptr %p, i32 -1
3322 %v = load <2 x double>, ptr %s
3326 define <2 x double> @load_splat_v2f64_with_unfolded_gep_negative_offset(ptr %p) {
3327 ; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_negative_offset:
3328 ; CHECK: .functype load_splat_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128)
3329 ; CHECK-NEXT: # %bb.0:
3330 ; CHECK-NEXT: local.get 0
3331 ; CHECK-NEXT: i32.const -8
3332 ; CHECK-NEXT: i32.add
3333 ; CHECK-NEXT: v128.load64_splat 0
3334 ; CHECK-NEXT: # fallthrough-return
3335 %s = getelementptr inbounds double, ptr %p, i32 -1
3336 %e = load double, ptr %s
3337 %v1 = insertelement <2 x double> undef, double %e, i32 0
3338 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3339 ret <2 x double> %v2
3342 define <2 x double> @load_promote_v2f64_with_unfolded_gep_negative_offset(ptr %p) {
3343 ; CHECK-LABEL: load_promote_v2f64_with_unfolded_gep_negative_offset:
3344 ; CHECK: .functype load_promote_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128)
3345 ; CHECK-NEXT: # %bb.0:
3346 ; CHECK-NEXT: local.get 0
3347 ; CHECK-NEXT: i32.const -8
3348 ; CHECK-NEXT: i32.add
3349 ; CHECK-NEXT: v128.load64_zero 0
3350 ; CHECK-NEXT: f64x2.promote_low_f32x4
3351 ; CHECK-NEXT: # fallthrough-return
3352 %s = getelementptr inbounds <2 x float>, ptr %p, i32 -1
3353 %e = load <2 x float>, ptr %s
3354 %v = fpext <2 x float> %e to <2 x double>
3358 define <2 x double> @load_v2f64_with_unfolded_offset(ptr %p) {
3359 ; CHECK-LABEL: load_v2f64_with_unfolded_offset:
3360 ; CHECK: .functype load_v2f64_with_unfolded_offset (i32) -> (v128)
3361 ; CHECK-NEXT: # %bb.0:
3362 ; CHECK-NEXT: local.get 0
3363 ; CHECK-NEXT: i32.const 16
3364 ; CHECK-NEXT: i32.add
3365 ; CHECK-NEXT: v128.load 0
3366 ; CHECK-NEXT: # fallthrough-return
3367 %q = ptrtoint ptr %p to i32
3368 %r = add nsw i32 %q, 16
3369 %s = inttoptr i32 %r to ptr
3370 %v = load <2 x double>, ptr %s
3374 define <2 x double> @load_splat_v2f64_with_unfolded_offset(ptr %p) {
3375 ; CHECK-LABEL: load_splat_v2f64_with_unfolded_offset:
3376 ; CHECK: .functype load_splat_v2f64_with_unfolded_offset (i32) -> (v128)
3377 ; CHECK-NEXT: # %bb.0:
3378 ; CHECK-NEXT: local.get 0
3379 ; CHECK-NEXT: i32.const 16
3380 ; CHECK-NEXT: i32.add
3381 ; CHECK-NEXT: v128.load64_splat 0
3382 ; CHECK-NEXT: # fallthrough-return
3383 %q = ptrtoint ptr %p to i32
3384 %r = add nsw i32 %q, 16
3385 %s = inttoptr i32 %r to ptr
3386 %e = load double, ptr %s
3387 %v1 = insertelement <2 x double> undef, double %e, i32 0
3388 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3389 ret <2 x double> %v2
3392 define <2 x double> @load_promote_v2f64_with_unfolded_offset(ptr %p) {
3393 ; CHECK-LABEL: load_promote_v2f64_with_unfolded_offset:
3394 ; CHECK: .functype load_promote_v2f64_with_unfolded_offset (i32) -> (v128)
3395 ; CHECK-NEXT: # %bb.0:
3396 ; CHECK-NEXT: local.get 0
3397 ; CHECK-NEXT: i32.const 16
3398 ; CHECK-NEXT: i32.add
3399 ; CHECK-NEXT: v128.load64_zero 0
3400 ; CHECK-NEXT: f64x2.promote_low_f32x4
3401 ; CHECK-NEXT: # fallthrough-return
3402 %q = ptrtoint ptr %p to i32
3403 %r = add nsw i32 %q, 16
3404 %s = inttoptr i32 %r to ptr
3405 %e = load <2 x float>, ptr %s
3406 %v = fpext <2 x float> %e to <2 x double>
3410 define <2 x double> @load_v2f64_with_unfolded_gep_offset(ptr %p) {
3411 ; CHECK-LABEL: load_v2f64_with_unfolded_gep_offset:
3412 ; CHECK: .functype load_v2f64_with_unfolded_gep_offset (i32) -> (v128)
3413 ; CHECK-NEXT: # %bb.0:
3414 ; CHECK-NEXT: local.get 0
3415 ; CHECK-NEXT: i32.const 16
3416 ; CHECK-NEXT: i32.add
3417 ; CHECK-NEXT: v128.load 0
3418 ; CHECK-NEXT: # fallthrough-return
3419 %s = getelementptr <2 x double>, ptr %p, i32 1
3420 %v = load <2 x double>, ptr %s
3424 define <2 x double> @load_splat_v2f64_with_unfolded_gep_offset(ptr %p) {
3425 ; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_offset:
3426 ; CHECK: .functype load_splat_v2f64_with_unfolded_gep_offset (i32) -> (v128)
3427 ; CHECK-NEXT: # %bb.0:
3428 ; CHECK-NEXT: local.get 0
3429 ; CHECK-NEXT: i32.const 8
3430 ; CHECK-NEXT: i32.add
3431 ; CHECK-NEXT: v128.load64_splat 0
3432 ; CHECK-NEXT: # fallthrough-return
3433 %s = getelementptr double, ptr %p, i32 1
3434 %e = load double, ptr %s
3435 %v1 = insertelement <2 x double> undef, double %e, i32 0
3436 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3437 ret <2 x double> %v2
3440 define <2 x double> @load_promote_v2f64_with_unfolded_gep_offset(ptr %p) {
3441 ; CHECK-LABEL: load_promote_v2f64_with_unfolded_gep_offset:
3442 ; CHECK: .functype load_promote_v2f64_with_unfolded_gep_offset (i32) -> (v128)
3443 ; CHECK-NEXT: # %bb.0:
3444 ; CHECK-NEXT: local.get 0
3445 ; CHECK-NEXT: i32.const 8
3446 ; CHECK-NEXT: i32.add
3447 ; CHECK-NEXT: v128.load64_zero 0
3448 ; CHECK-NEXT: f64x2.promote_low_f32x4
3449 ; CHECK-NEXT: # fallthrough-return
3450 %s = getelementptr <2 x float>, ptr %p, i32 1
3451 %e = load <2 x float>, ptr %s
3452 %v = fpext <2 x float> %e to <2 x double>
3456 define <2 x double> @load_v2f64_from_numeric_address() {
3457 ; CHECK-LABEL: load_v2f64_from_numeric_address:
3458 ; CHECK: .functype load_v2f64_from_numeric_address () -> (v128)
3459 ; CHECK-NEXT: # %bb.0:
3460 ; CHECK-NEXT: i32.const 0
3461 ; CHECK-NEXT: v128.load 32
3462 ; CHECK-NEXT: # fallthrough-return
3463 %s = inttoptr i32 32 to ptr
3464 %v = load <2 x double>, ptr %s
3468 define <2 x double> @load_splat_v2f64_from_numeric_address() {
3469 ; CHECK-LABEL: load_splat_v2f64_from_numeric_address:
3470 ; CHECK: .functype load_splat_v2f64_from_numeric_address () -> (v128)
3471 ; CHECK-NEXT: # %bb.0:
3472 ; CHECK-NEXT: i32.const 0
3473 ; CHECK-NEXT: v128.load64_splat 32
3474 ; CHECK-NEXT: # fallthrough-return
3475 %s = inttoptr i32 32 to ptr
3476 %e = load double, ptr %s
3477 %v1 = insertelement <2 x double> undef, double %e, i32 0
3478 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3479 ret <2 x double> %v2
3482 define <2 x double> @load_promote_v2f64_from_numeric_address() {
3483 ; CHECK-LABEL: load_promote_v2f64_from_numeric_address:
3484 ; CHECK: .functype load_promote_v2f64_from_numeric_address () -> (v128)
3485 ; CHECK-NEXT: # %bb.0:
3486 ; CHECK-NEXT: i32.const 32
3487 ; CHECK-NEXT: v128.load64_zero 0
3488 ; CHECK-NEXT: f64x2.promote_low_f32x4
3489 ; CHECK-NEXT: # fallthrough-return
3490 %s = inttoptr i32 32 to ptr
3491 %e = load <2 x float>, ptr %s
3492 %v = fpext <2 x float> %e to <2 x double>
3496 @gv_v2f64 = global <2 x double> <double 42., double 42.>
3497 define <2 x double> @load_v2f64_from_global_address() {
3498 ; CHECK-LABEL: load_v2f64_from_global_address:
3499 ; CHECK: .functype load_v2f64_from_global_address () -> (v128)
3500 ; CHECK-NEXT: # %bb.0:
3501 ; CHECK-NEXT: i32.const 0
3502 ; CHECK-NEXT: v128.load gv_v2f64
3503 ; CHECK-NEXT: # fallthrough-return
3504 %v = load <2 x double>, ptr @gv_v2f64
3508 @gv_f64 = global double 42.
3509 define <2 x double> @load_splat_v2f64_from_global_address() {
3510 ; CHECK-LABEL: load_splat_v2f64_from_global_address:
3511 ; CHECK: .functype load_splat_v2f64_from_global_address () -> (v128)
3512 ; CHECK-NEXT: # %bb.0:
3513 ; CHECK-NEXT: i32.const 0
3514 ; CHECK-NEXT: v128.load64_splat gv_f64
3515 ; CHECK-NEXT: # fallthrough-return
3516 %e = load double, ptr @gv_f64
3517 %v1 = insertelement <2 x double> undef, double %e, i32 0
3518 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3519 ret <2 x double> %v2
3522 @gv_v2f32 = global <2 x float> <float 42., float 42.>
3523 define <2 x double> @load_promote_v2f64_from_global_address() {
3524 ; CHECK-LABEL: load_promote_v2f64_from_global_address:
3525 ; CHECK: .functype load_promote_v2f64_from_global_address () -> (v128)
3526 ; CHECK-NEXT: # %bb.0:
3527 ; CHECK-NEXT: i32.const gv_v2f32
3528 ; CHECK-NEXT: v128.load64_zero 0
3529 ; CHECK-NEXT: f64x2.promote_low_f32x4
3530 ; CHECK-NEXT: # fallthrough-return
3531 %e = load <2 x float>, ptr @gv_v2f32
3532 %v = fpext <2 x float> %e to <2 x double>
3536 define void @store_v2f64(<2 x double> %v, ptr %p) {
3537 ; CHECK-LABEL: store_v2f64:
3538 ; CHECK: .functype store_v2f64 (v128, i32) -> ()
3539 ; CHECK-NEXT: # %bb.0:
3540 ; CHECK-NEXT: local.get 1
3541 ; CHECK-NEXT: local.get 0
3542 ; CHECK-NEXT: v128.store 0
3543 ; CHECK-NEXT: # fallthrough-return
3544 store <2 x double> %v , ptr %p
3548 define void @store_v2f64_with_folded_offset(<2 x double> %v, ptr %p) {
3549 ; CHECK-LABEL: store_v2f64_with_folded_offset:
3550 ; CHECK: .functype store_v2f64_with_folded_offset (v128, i32) -> ()
3551 ; CHECK-NEXT: # %bb.0:
3552 ; CHECK-NEXT: local.get 1
3553 ; CHECK-NEXT: local.get 0
3554 ; CHECK-NEXT: v128.store 16
3555 ; CHECK-NEXT: # fallthrough-return
3556 %q = ptrtoint ptr %p to i32
3557 %r = add nuw i32 %q, 16
3558 %s = inttoptr i32 %r to ptr
3559 store <2 x double> %v , ptr %s
3563 define void @store_v2f64_with_folded_gep_offset(<2 x double> %v, ptr %p) {
3564 ; CHECK-LABEL: store_v2f64_with_folded_gep_offset:
3565 ; CHECK: .functype store_v2f64_with_folded_gep_offset (v128, i32) -> ()
3566 ; CHECK-NEXT: # %bb.0:
3567 ; CHECK-NEXT: local.get 1
3568 ; CHECK-NEXT: local.get 0
3569 ; CHECK-NEXT: v128.store 16
3570 ; CHECK-NEXT: # fallthrough-return
3571 %s = getelementptr inbounds <2 x double>, ptr %p, i32 1
3572 store <2 x double> %v , ptr %s
3576 define void @store_v2f64_with_unfolded_gep_negative_offset(<2 x double> %v, ptr %p) {
3577 ; CHECK-LABEL: store_v2f64_with_unfolded_gep_negative_offset:
3578 ; CHECK: .functype store_v2f64_with_unfolded_gep_negative_offset (v128, i32) -> ()
3579 ; CHECK-NEXT: # %bb.0:
3580 ; CHECK-NEXT: local.get 1
3581 ; CHECK-NEXT: i32.const -16
3582 ; CHECK-NEXT: i32.add
3583 ; CHECK-NEXT: local.get 0
3584 ; CHECK-NEXT: v128.store 0
3585 ; CHECK-NEXT: # fallthrough-return
3586 %s = getelementptr inbounds <2 x double>, ptr %p, i32 -1
3587 store <2 x double> %v , ptr %s
3591 define void @store_v2f64_with_unfolded_offset(<2 x double> %v, ptr %p) {
3592 ; CHECK-LABEL: store_v2f64_with_unfolded_offset:
3593 ; CHECK: .functype store_v2f64_with_unfolded_offset (v128, i32) -> ()
3594 ; CHECK-NEXT: # %bb.0:
3595 ; CHECK-NEXT: local.get 1
3596 ; CHECK-NEXT: i32.const 16
3597 ; CHECK-NEXT: i32.add
3598 ; CHECK-NEXT: local.get 0
3599 ; CHECK-NEXT: v128.store 0
3600 ; CHECK-NEXT: # fallthrough-return
3601 %q = ptrtoint ptr %p to i32
3602 %r = add nsw i32 %q, 16
3603 %s = inttoptr i32 %r to ptr
3604 store <2 x double> %v , ptr %s
3608 define void @store_v2f64_with_unfolded_gep_offset(<2 x double> %v, ptr %p) {
3609 ; CHECK-LABEL: store_v2f64_with_unfolded_gep_offset:
3610 ; CHECK: .functype store_v2f64_with_unfolded_gep_offset (v128, i32) -> ()
3611 ; CHECK-NEXT: # %bb.0:
3612 ; CHECK-NEXT: local.get 1
3613 ; CHECK-NEXT: i32.const 16
3614 ; CHECK-NEXT: i32.add
3615 ; CHECK-NEXT: local.get 0
3616 ; CHECK-NEXT: v128.store 0
3617 ; CHECK-NEXT: # fallthrough-return
3618 %s = getelementptr <2 x double>, ptr %p, i32 1
3619 store <2 x double> %v , ptr %s
3623 define void @store_v2f64_to_numeric_address(<2 x double> %v) {
3624 ; CHECK-LABEL: store_v2f64_to_numeric_address:
3625 ; CHECK: .functype store_v2f64_to_numeric_address (v128) -> ()
3626 ; CHECK-NEXT: # %bb.0:
3627 ; CHECK-NEXT: i32.const 0
3628 ; CHECK-NEXT: local.get 0
3629 ; CHECK-NEXT: v128.store 32
3630 ; CHECK-NEXT: # fallthrough-return
3631 %s = inttoptr i32 32 to ptr
3632 store <2 x double> %v , ptr %s
3636 define void @store_v2f64_to_global_address(<2 x double> %v) {
3637 ; CHECK-LABEL: store_v2f64_to_global_address:
3638 ; CHECK: .functype store_v2f64_to_global_address (v128) -> ()
3639 ; CHECK-NEXT: # %bb.0:
3640 ; CHECK-NEXT: i32.const 0
3641 ; CHECK-NEXT: local.get 0
3642 ; CHECK-NEXT: v128.store gv_v2f64
3643 ; CHECK-NEXT: # fallthrough-return
3644 store <2 x double> %v , ptr @gv_v2f64