1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s
4 ; Test wide load+promote patterns, which after combines and legalization are
5 ; represented differently than 128-bit load+promote patterns.
7 target triple = "wasm32-unknown-unknown"
9 define <4 x double> @load_promote_v2f64(ptr %p) {
10 ; CHECK-LABEL: load_promote_v2f64:
11 ; CHECK: .functype load_promote_v2f64 (i32, i32) -> ()
12 ; CHECK-NEXT: # %bb.0:
13 ; CHECK-NEXT: local.get 0
14 ; CHECK-NEXT: local.get 1
15 ; CHECK-NEXT: i32.const 8
17 ; CHECK-NEXT: v128.load64_zero 0
18 ; CHECK-NEXT: f64x2.promote_low_f32x4
19 ; CHECK-NEXT: v128.store 16
20 ; CHECK-NEXT: local.get 0
21 ; CHECK-NEXT: local.get 1
22 ; CHECK-NEXT: v128.load64_zero 0
23 ; CHECK-NEXT: f64x2.promote_low_f32x4
24 ; CHECK-NEXT: v128.store 0
25 ; CHECK-NEXT: # fallthrough-return
26 %e = load <4 x float>, ptr %p
27 %v = fpext <4 x float> %e to <4 x double>
31 define <4 x double> @load_promote_v2f64_with_folded_offset(ptr %p) {
32 ; CHECK-LABEL: load_promote_v2f64_with_folded_offset:
33 ; CHECK: .functype load_promote_v2f64_with_folded_offset (i32, i32) -> ()
34 ; CHECK-NEXT: # %bb.0:
35 ; CHECK-NEXT: local.get 0
36 ; CHECK-NEXT: local.get 1
37 ; CHECK-NEXT: i32.const 24
39 ; CHECK-NEXT: v128.load64_zero 0
40 ; CHECK-NEXT: f64x2.promote_low_f32x4
41 ; CHECK-NEXT: v128.store 16
42 ; CHECK-NEXT: local.get 0
43 ; CHECK-NEXT: local.get 1
44 ; CHECK-NEXT: i32.const 16
46 ; CHECK-NEXT: v128.load64_zero 0
47 ; CHECK-NEXT: f64x2.promote_low_f32x4
48 ; CHECK-NEXT: v128.store 0
49 ; CHECK-NEXT: # fallthrough-return
50 %q = ptrtoint ptr %p to i32
51 %r = add nuw i32 %q, 16
52 %s = inttoptr i32 %r to ptr
53 %e = load <4 x float>, ptr %s
54 %v = fpext <4 x float> %e to <4 x double>
58 define <4 x double> @load_promote_v2f64_with_folded_gep_offset(ptr %p) {
59 ; CHECK-LABEL: load_promote_v2f64_with_folded_gep_offset:
60 ; CHECK: .functype load_promote_v2f64_with_folded_gep_offset (i32, i32) -> ()
61 ; CHECK-NEXT: # %bb.0:
62 ; CHECK-NEXT: local.get 0
63 ; CHECK-NEXT: local.get 1
64 ; CHECK-NEXT: i32.const 24
66 ; CHECK-NEXT: v128.load64_zero 0
67 ; CHECK-NEXT: f64x2.promote_low_f32x4
68 ; CHECK-NEXT: v128.store 16
69 ; CHECK-NEXT: local.get 0
70 ; CHECK-NEXT: local.get 1
71 ; CHECK-NEXT: i32.const 16
73 ; CHECK-NEXT: v128.load64_zero 0
74 ; CHECK-NEXT: f64x2.promote_low_f32x4
75 ; CHECK-NEXT: v128.store 0
76 ; CHECK-NEXT: # fallthrough-return
77 %s = getelementptr inbounds <4 x float>, ptr %p, i32 1
78 %e = load <4 x float>, ptr %s
79 %v = fpext <4 x float> %e to <4 x double>
83 define <4 x double> @load_promote_v2f64_with_unfolded_gep_negative_offset(ptr %p) {
84 ; CHECK-LABEL: load_promote_v2f64_with_unfolded_gep_negative_offset:
85 ; CHECK: .functype load_promote_v2f64_with_unfolded_gep_negative_offset (i32, i32) -> ()
86 ; CHECK-NEXT: # %bb.0:
87 ; CHECK-NEXT: local.get 0
88 ; CHECK-NEXT: local.get 1
89 ; CHECK-NEXT: i32.const -16
91 ; CHECK-NEXT: local.tee 1
92 ; CHECK-NEXT: v128.load64_zero 0
93 ; CHECK-NEXT: f64x2.promote_low_f32x4
94 ; CHECK-NEXT: v128.store 0
95 ; CHECK-NEXT: local.get 0
96 ; CHECK-NEXT: local.get 1
97 ; CHECK-NEXT: i32.const 8
99 ; CHECK-NEXT: v128.load64_zero 0
100 ; CHECK-NEXT: f64x2.promote_low_f32x4
101 ; CHECK-NEXT: v128.store 16
102 ; CHECK-NEXT: # fallthrough-return
103 %s = getelementptr inbounds <4 x float>, ptr %p, i32 -1
104 %e = load <4 x float>, ptr %s
105 %v = fpext <4 x float> %e to <4 x double>
109 define <4 x double> @load_promote_v2f64_with_unfolded_offset(ptr %p) {
110 ; CHECK-LABEL: load_promote_v2f64_with_unfolded_offset:
111 ; CHECK: .functype load_promote_v2f64_with_unfolded_offset (i32, i32) -> ()
112 ; CHECK-NEXT: # %bb.0:
113 ; CHECK-NEXT: local.get 0
114 ; CHECK-NEXT: local.get 1
115 ; CHECK-NEXT: i32.const 24
116 ; CHECK-NEXT: i32.add
117 ; CHECK-NEXT: v128.load64_zero 0
118 ; CHECK-NEXT: f64x2.promote_low_f32x4
119 ; CHECK-NEXT: v128.store 16
120 ; CHECK-NEXT: local.get 0
121 ; CHECK-NEXT: local.get 1
122 ; CHECK-NEXT: i32.const 16
123 ; CHECK-NEXT: i32.add
124 ; CHECK-NEXT: v128.load64_zero 0
125 ; CHECK-NEXT: f64x2.promote_low_f32x4
126 ; CHECK-NEXT: v128.store 0
127 ; CHECK-NEXT: # fallthrough-return
128 %q = ptrtoint ptr %p to i32
129 %r = add nsw i32 %q, 16
130 %s = inttoptr i32 %r to ptr
131 %e = load <4 x float>, ptr %s
132 %v = fpext <4 x float> %e to <4 x double>
136 define <4 x double> @load_promote_v2f64_with_unfolded_gep_offset(ptr %p) {
137 ; CHECK-LABEL: load_promote_v2f64_with_unfolded_gep_offset:
138 ; CHECK: .functype load_promote_v2f64_with_unfolded_gep_offset (i32, i32) -> ()
139 ; CHECK-NEXT: # %bb.0:
140 ; CHECK-NEXT: local.get 0
141 ; CHECK-NEXT: local.get 1
142 ; CHECK-NEXT: i32.const 24
143 ; CHECK-NEXT: i32.add
144 ; CHECK-NEXT: v128.load64_zero 0
145 ; CHECK-NEXT: f64x2.promote_low_f32x4
146 ; CHECK-NEXT: v128.store 16
147 ; CHECK-NEXT: local.get 0
148 ; CHECK-NEXT: local.get 1
149 ; CHECK-NEXT: i32.const 16
150 ; CHECK-NEXT: i32.add
151 ; CHECK-NEXT: v128.load64_zero 0
152 ; CHECK-NEXT: f64x2.promote_low_f32x4
153 ; CHECK-NEXT: v128.store 0
154 ; CHECK-NEXT: # fallthrough-return
155 %s = getelementptr <4 x float>, ptr %p, i32 1
156 %e = load <4 x float>, ptr %s
157 %v = fpext <4 x float> %e to <4 x double>
161 define <4 x double> @load_promote_v2f64_from_numeric_address() {
162 ; CHECK-LABEL: load_promote_v2f64_from_numeric_address:
163 ; CHECK: .functype load_promote_v2f64_from_numeric_address (i32) -> ()
164 ; CHECK-NEXT: # %bb.0:
165 ; CHECK-NEXT: local.get 0
166 ; CHECK-NEXT: i32.const 40
167 ; CHECK-NEXT: v128.load64_zero 0
168 ; CHECK-NEXT: f64x2.promote_low_f32x4
169 ; CHECK-NEXT: v128.store 16
170 ; CHECK-NEXT: local.get 0
171 ; CHECK-NEXT: i32.const 32
172 ; CHECK-NEXT: v128.load64_zero 0
173 ; CHECK-NEXT: f64x2.promote_low_f32x4
174 ; CHECK-NEXT: v128.store 0
175 ; CHECK-NEXT: # fallthrough-return
176 %s = inttoptr i32 32 to ptr
177 %e = load <4 x float>, ptr %s
178 %v = fpext <4 x float> %e to <4 x double>
182 @gv_v4f32 = global <4 x float> <float 42., float 42., float 42., float 42.>
183 define <4 x double> @load_promote_v2f64_from_global_address() {
184 ; CHECK-LABEL: load_promote_v2f64_from_global_address:
185 ; CHECK: .functype load_promote_v2f64_from_global_address (i32) -> ()
186 ; CHECK-NEXT: # %bb.0:
187 ; CHECK-NEXT: local.get 0
188 ; CHECK-NEXT: i32.const gv_v4f32
189 ; CHECK-NEXT: i32.const 8
190 ; CHECK-NEXT: i32.add
191 ; CHECK-NEXT: v128.load64_zero 0
192 ; CHECK-NEXT: f64x2.promote_low_f32x4
193 ; CHECK-NEXT: v128.store 16
194 ; CHECK-NEXT: local.get 0
195 ; CHECK-NEXT: i32.const gv_v4f32
196 ; CHECK-NEXT: v128.load64_zero 0
197 ; CHECK-NEXT: f64x2.promote_low_f32x4
198 ; CHECK-NEXT: v128.store 0
199 ; CHECK-NEXT: # fallthrough-return
200 %e = load <4 x float>, ptr @gv_v4f32
201 %v = fpext <4 x float> %e to <4 x double>