1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s
4 ; Test SIMD v128.load{32,64}_zero instructions
6 target triple = "wasm32-unknown-unknown"
8 ;===----------------------------------------------------------------------------
10 ;===----------------------------------------------------------------------------
12 define <4 x i32> @load_zero_i32_no_offset(i32* %p) {
13 ; CHECK-LABEL: load_zero_i32_no_offset:
14 ; CHECK: .functype load_zero_i32_no_offset (i32) -> (v128)
15 ; CHECK-NEXT: # %bb.0:
16 ; CHECK-NEXT: local.get 0
17 ; CHECK-NEXT: v128.load32_zero 0
18 ; CHECK-NEXT: # fallthrough-return
19 %x = load i32, i32* %p
20 %v = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
24 define <4 x i32> @load_zero_i32_with_folded_offset(i32* %p) {
25 ; CHECK-LABEL: load_zero_i32_with_folded_offset:
26 ; CHECK: .functype load_zero_i32_with_folded_offset (i32) -> (v128)
27 ; CHECK-NEXT: # %bb.0:
28 ; CHECK-NEXT: local.get 0
29 ; CHECK-NEXT: v128.load32_zero 24
30 ; CHECK-NEXT: # fallthrough-return
31 %q = ptrtoint i32* %p to i32
32 %r = add nuw i32 %q, 24
33 %s = inttoptr i32 %r to i32*
34 %x = load i32, i32* %s
35 %t = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
39 define <4 x i32> @load_zero_i32_with_folded_gep_offset(i32* %p) {
40 ; CHECK-LABEL: load_zero_i32_with_folded_gep_offset:
41 ; CHECK: .functype load_zero_i32_with_folded_gep_offset (i32) -> (v128)
42 ; CHECK-NEXT: # %bb.0:
43 ; CHECK-NEXT: local.get 0
44 ; CHECK-NEXT: v128.load32_zero 24
45 ; CHECK-NEXT: # fallthrough-return
46 %s = getelementptr inbounds i32, i32* %p, i32 6
47 %x = load i32, i32* %s
48 %t = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
52 define <4 x i32> @load_zero_i32_with_unfolded_gep_negative_offset(i32* %p) {
53 ; CHECK-LABEL: load_zero_i32_with_unfolded_gep_negative_offset:
54 ; CHECK: .functype load_zero_i32_with_unfolded_gep_negative_offset (i32) -> (v128)
55 ; CHECK-NEXT: # %bb.0:
56 ; CHECK-NEXT: local.get 0
57 ; CHECK-NEXT: i32.const -24
59 ; CHECK-NEXT: v128.load32_zero 0
60 ; CHECK-NEXT: # fallthrough-return
61 %s = getelementptr inbounds i32, i32* %p, i32 -6
62 %x = load i32, i32* %s
63 %t = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
67 define <4 x i32> @load_zero_i32_with_unfolded_offset(i32* %p) {
68 ; CHECK-LABEL: load_zero_i32_with_unfolded_offset:
69 ; CHECK: .functype load_zero_i32_with_unfolded_offset (i32) -> (v128)
70 ; CHECK-NEXT: # %bb.0:
71 ; CHECK-NEXT: local.get 0
72 ; CHECK-NEXT: i32.const 24
74 ; CHECK-NEXT: v128.load32_zero 0
75 ; CHECK-NEXT: # fallthrough-return
76 %q = ptrtoint i32* %p to i32
77 %r = add nsw i32 %q, 24
78 %s = inttoptr i32 %r to i32*
79 %x = load i32, i32* %s
80 %t = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
84 define <4 x i32> @load_zero_i32_with_unfolded_gep_offset(i32* %p) {
85 ; CHECK-LABEL: load_zero_i32_with_unfolded_gep_offset:
86 ; CHECK: .functype load_zero_i32_with_unfolded_gep_offset (i32) -> (v128)
87 ; CHECK-NEXT: # %bb.0:
88 ; CHECK-NEXT: local.get 0
89 ; CHECK-NEXT: i32.const 24
91 ; CHECK-NEXT: v128.load32_zero 0
92 ; CHECK-NEXT: # fallthrough-return
93 %s = getelementptr i32, i32* %p, i32 6
94 %x = load i32, i32* %s
95 %t = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
99 define <4 x i32> @load_zero_i32_from_numeric_address() {
100 ; CHECK-LABEL: load_zero_i32_from_numeric_address:
101 ; CHECK: .functype load_zero_i32_from_numeric_address () -> (v128)
102 ; CHECK-NEXT: # %bb.0:
103 ; CHECK-NEXT: i32.const 0
104 ; CHECK-NEXT: v128.load32_zero 42
105 ; CHECK-NEXT: # fallthrough-return
106 %s = inttoptr i32 42 to i32*
107 %x = load i32, i32* %s
108 %t = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
112 @gv_i32 = global i32 0
113 define <4 x i32> @load_zero_i32_from_global_address() {
114 ; CHECK-LABEL: load_zero_i32_from_global_address:
115 ; CHECK: .functype load_zero_i32_from_global_address () -> (v128)
116 ; CHECK-NEXT: # %bb.0:
117 ; CHECK-NEXT: i32.const 0
118 ; CHECK-NEXT: v128.load32_zero gv_i32
119 ; CHECK-NEXT: # fallthrough-return
120 %x = load i32, i32* @gv_i32
121 %t = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
125 ;===----------------------------------------------------------------------------
127 ;===----------------------------------------------------------------------------
129 define <2 x i64> @load_zero_i64_no_offset(i64* %p) {
130 ; CHECK-LABEL: load_zero_i64_no_offset:
131 ; CHECK: .functype load_zero_i64_no_offset (i32) -> (v128)
132 ; CHECK-NEXT: # %bb.0:
133 ; CHECK-NEXT: local.get 0
134 ; CHECK-NEXT: v128.load64_zero 0
135 ; CHECK-NEXT: # fallthrough-return
136 %x = load i64, i64* %p
137 %v = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
141 define <2 x i64> @load_zero_i64_with_folded_offset(i64* %p) {
142 ; CHECK-LABEL: load_zero_i64_with_folded_offset:
143 ; CHECK: .functype load_zero_i64_with_folded_offset (i32) -> (v128)
144 ; CHECK-NEXT: # %bb.0:
145 ; CHECK-NEXT: local.get 0
146 ; CHECK-NEXT: v128.load64_zero 24
147 ; CHECK-NEXT: # fallthrough-return
148 %q = ptrtoint i64* %p to i32
149 %r = add nuw i32 %q, 24
150 %s = inttoptr i32 %r to i64*
151 %x = load i64, i64* %s
152 %t = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
156 define <2 x i64> @load_zero_i64_with_folded_gep_offset(i64* %p) {
157 ; CHECK-LABEL: load_zero_i64_with_folded_gep_offset:
158 ; CHECK: .functype load_zero_i64_with_folded_gep_offset (i32) -> (v128)
159 ; CHECK-NEXT: # %bb.0:
160 ; CHECK-NEXT: local.get 0
161 ; CHECK-NEXT: v128.load64_zero 48
162 ; CHECK-NEXT: # fallthrough-return
163 %s = getelementptr inbounds i64, i64* %p, i64 6
164 %x = load i64, i64* %s
165 %t = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
169 define <2 x i64> @load_zero_i64_with_unfolded_gep_negative_offset(i64* %p) {
170 ; CHECK-LABEL: load_zero_i64_with_unfolded_gep_negative_offset:
171 ; CHECK: .functype load_zero_i64_with_unfolded_gep_negative_offset (i32) -> (v128)
172 ; CHECK-NEXT: # %bb.0:
173 ; CHECK-NEXT: local.get 0
174 ; CHECK-NEXT: i32.const -48
175 ; CHECK-NEXT: i32.add
176 ; CHECK-NEXT: v128.load64_zero 0
177 ; CHECK-NEXT: # fallthrough-return
178 %s = getelementptr inbounds i64, i64* %p, i64 -6
179 %x = load i64, i64* %s
180 %t = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
184 define <2 x i64> @load_zero_i64_with_unfolded_offset(i64* %p) {
185 ; CHECK-LABEL: load_zero_i64_with_unfolded_offset:
186 ; CHECK: .functype load_zero_i64_with_unfolded_offset (i32) -> (v128)
187 ; CHECK-NEXT: # %bb.0:
188 ; CHECK-NEXT: local.get 0
189 ; CHECK-NEXT: i32.const 24
190 ; CHECK-NEXT: i32.add
191 ; CHECK-NEXT: v128.load64_zero 0
192 ; CHECK-NEXT: # fallthrough-return
193 %q = ptrtoint i64* %p to i32
194 %r = add nsw i32 %q, 24
195 %s = inttoptr i32 %r to i64*
196 %x = load i64, i64* %s
197 %t = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
201 define <2 x i64> @load_zero_i64_with_unfolded_gep_offset(i64* %p) {
202 ; CHECK-LABEL: load_zero_i64_with_unfolded_gep_offset:
203 ; CHECK: .functype load_zero_i64_with_unfolded_gep_offset (i32) -> (v128)
204 ; CHECK-NEXT: # %bb.0:
205 ; CHECK-NEXT: local.get 0
206 ; CHECK-NEXT: i32.const 48
207 ; CHECK-NEXT: i32.add
208 ; CHECK-NEXT: v128.load64_zero 0
209 ; CHECK-NEXT: # fallthrough-return
210 %s = getelementptr i64, i64* %p, i64 6
211 %x = load i64, i64* %s
212 %t = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
216 define <2 x i64> @load_zero_i64_from_numeric_address() {
217 ; CHECK-LABEL: load_zero_i64_from_numeric_address:
218 ; CHECK: .functype load_zero_i64_from_numeric_address () -> (v128)
219 ; CHECK-NEXT: # %bb.0:
220 ; CHECK-NEXT: i32.const 0
221 ; CHECK-NEXT: v128.load64_zero 42
222 ; CHECK-NEXT: # fallthrough-return
223 %s = inttoptr i32 42 to i64*
224 %x = load i64, i64* %s
225 %t = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
229 @gv_i64 = global i64 0
230 define <2 x i64> @load_zero_i64_from_global_address() {
231 ; CHECK-LABEL: load_zero_i64_from_global_address:
232 ; CHECK: .functype load_zero_i64_from_global_address () -> (v128)
233 ; CHECK-NEXT: # %bb.0:
234 ; CHECK-NEXT: i32.const 0
235 ; CHECK-NEXT: v128.load64_zero gv_i64
236 ; CHECK-NEXT: # fallthrough-return
237 %x = load i64, i64* @gv_i64
238 %t = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0