1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -verify-machineinstrs -mcpu=pwr10 < %s | FileCheck %s
4 target datalayout = "e-m:e-i64:64-p:64:64-n32:64-v256:256:256-v512:512:512"
5 target triple = "powerpc64le-unknown-linux-gnu"
7 %_elem_type_of_a = type <{ double }>
8 %_elem_type_of_x = type <{ double }>
10 define void @foo(i32* %.m, i32* %.n, [0 x %_elem_type_of_a]* %.a, [0 x %_elem_type_of_x]* %.x, i32* %.l, <2 x double>* %.vy01, <2 x double>* %.vy02, <2 x double>* %.vy03, <2 x double>* %.vy04, <2 x double>* %.vy05, <2 x double>* %.vy06, <2 x double>* %.vy07, <2 x double>* %.vy08, <2 x double>* %.vy09, <2 x double>* %.vy0a, <2 x double>* %.vy0b, <2 x double>* %.vy0c, <2 x double>* %.vy21, <2 x double>* %.vy22, <2 x double>* %.vy23, <2 x double>* %.vy24, <2 x double>* %.vy25, <2 x double>* %.vy26, <2 x double>* %.vy27, <2 x double>* %.vy28, <2 x double>* %.vy29, <2 x double>* %.vy2a, <2 x double>* %.vy2b, <2 x double>* %.vy2c) {
12 ; CHECK: # %bb.0: # %entry
13 ; CHECK-NEXT: stdu 1, -576(1)
14 ; CHECK-NEXT: .cfi_def_cfa_offset 576
15 ; CHECK-NEXT: .cfi_offset r14, -160
16 ; CHECK-NEXT: .cfi_offset r15, -152
17 ; CHECK-NEXT: .cfi_offset r16, -144
18 ; CHECK-NEXT: .cfi_offset r17, -136
19 ; CHECK-NEXT: .cfi_offset r18, -128
20 ; CHECK-NEXT: .cfi_offset r19, -120
21 ; CHECK-NEXT: .cfi_offset r20, -112
22 ; CHECK-NEXT: .cfi_offset r21, -104
23 ; CHECK-NEXT: .cfi_offset r22, -96
24 ; CHECK-NEXT: .cfi_offset r23, -88
25 ; CHECK-NEXT: .cfi_offset r24, -80
26 ; CHECK-NEXT: .cfi_offset r25, -72
27 ; CHECK-NEXT: .cfi_offset r26, -64
28 ; CHECK-NEXT: .cfi_offset r27, -56
29 ; CHECK-NEXT: .cfi_offset r28, -48
30 ; CHECK-NEXT: .cfi_offset r29, -40
31 ; CHECK-NEXT: .cfi_offset r30, -32
32 ; CHECK-NEXT: .cfi_offset r31, -24
33 ; CHECK-NEXT: .cfi_offset f30, -16
34 ; CHECK-NEXT: .cfi_offset f31, -8
35 ; CHECK-NEXT: .cfi_offset v20, -352
36 ; CHECK-NEXT: .cfi_offset v21, -336
37 ; CHECK-NEXT: .cfi_offset v22, -320
38 ; CHECK-NEXT: .cfi_offset v23, -304
39 ; CHECK-NEXT: .cfi_offset v24, -288
40 ; CHECK-NEXT: .cfi_offset v25, -272
41 ; CHECK-NEXT: .cfi_offset v26, -256
42 ; CHECK-NEXT: .cfi_offset v27, -240
43 ; CHECK-NEXT: .cfi_offset v28, -224
44 ; CHECK-NEXT: .cfi_offset v29, -208
45 ; CHECK-NEXT: .cfi_offset v30, -192
46 ; CHECK-NEXT: .cfi_offset v31, -176
47 ; CHECK-NEXT: lwz 4, 0(4)
48 ; CHECK-NEXT: std 14, 416(1) # 8-byte Folded Spill
49 ; CHECK-NEXT: std 15, 424(1) # 8-byte Folded Spill
50 ; CHECK-NEXT: stxv 52, 224(1) # 16-byte Folded Spill
51 ; CHECK-NEXT: stxv 53, 240(1) # 16-byte Folded Spill
52 ; CHECK-NEXT: stxv 54, 256(1) # 16-byte Folded Spill
53 ; CHECK-NEXT: std 16, 432(1) # 8-byte Folded Spill
54 ; CHECK-NEXT: std 17, 440(1) # 8-byte Folded Spill
55 ; CHECK-NEXT: stxv 55, 272(1) # 16-byte Folded Spill
56 ; CHECK-NEXT: std 18, 448(1) # 8-byte Folded Spill
57 ; CHECK-NEXT: std 19, 456(1) # 8-byte Folded Spill
58 ; CHECK-NEXT: stxv 56, 288(1) # 16-byte Folded Spill
59 ; CHECK-NEXT: stxv 57, 304(1) # 16-byte Folded Spill
60 ; CHECK-NEXT: std 20, 464(1) # 8-byte Folded Spill
61 ; CHECK-NEXT: std 21, 472(1) # 8-byte Folded Spill
62 ; CHECK-NEXT: stxv 58, 320(1) # 16-byte Folded Spill
63 ; CHECK-NEXT: std 22, 480(1) # 8-byte Folded Spill
64 ; CHECK-NEXT: std 23, 488(1) # 8-byte Folded Spill
65 ; CHECK-NEXT: stxv 59, 336(1) # 16-byte Folded Spill
66 ; CHECK-NEXT: stxv 60, 352(1) # 16-byte Folded Spill
67 ; CHECK-NEXT: std 24, 496(1) # 8-byte Folded Spill
68 ; CHECK-NEXT: std 25, 504(1) # 8-byte Folded Spill
69 ; CHECK-NEXT: stxv 61, 368(1) # 16-byte Folded Spill
70 ; CHECK-NEXT: std 26, 512(1) # 8-byte Folded Spill
71 ; CHECK-NEXT: std 27, 520(1) # 8-byte Folded Spill
72 ; CHECK-NEXT: stxv 62, 384(1) # 16-byte Folded Spill
73 ; CHECK-NEXT: stxv 63, 400(1) # 16-byte Folded Spill
74 ; CHECK-NEXT: std 28, 528(1) # 8-byte Folded Spill
75 ; CHECK-NEXT: std 29, 536(1) # 8-byte Folded Spill
76 ; CHECK-NEXT: cmpwi 4, 1
77 ; CHECK-NEXT: std 30, 544(1) # 8-byte Folded Spill
78 ; CHECK-NEXT: std 31, 552(1) # 8-byte Folded Spill
79 ; CHECK-NEXT: stfd 30, 560(1) # 8-byte Folded Spill
80 ; CHECK-NEXT: stfd 31, 568(1) # 8-byte Folded Spill
81 ; CHECK-NEXT: blt 0, .LBB0_7
82 ; CHECK-NEXT: # %bb.1: # %_loop_1_do_.lr.ph
83 ; CHECK-NEXT: mr 23, 5
84 ; CHECK-NEXT: lwz 5, 0(3)
85 ; CHECK-NEXT: cmpwi 5, 1
86 ; CHECK-NEXT: blt 0, .LBB0_7
87 ; CHECK-NEXT: # %bb.2: # %_loop_1_do_.preheader
88 ; CHECK-NEXT: addi 5, 5, 1
89 ; CHECK-NEXT: li 20, 9
90 ; CHECK-NEXT: ld 28, 824(1)
91 ; CHECK-NEXT: ld 19, 712(1)
92 ; CHECK-NEXT: lwa 3, 0(7)
93 ; CHECK-NEXT: ld 7, 784(1)
94 ; CHECK-NEXT: ld 12, 776(1)
95 ; CHECK-NEXT: ld 11, 768(1)
96 ; CHECK-NEXT: ld 2, 760(1)
97 ; CHECK-NEXT: ld 29, 832(1)
98 ; CHECK-NEXT: cmpldi 5, 9
99 ; CHECK-NEXT: ld 27, 816(1)
100 ; CHECK-NEXT: ld 26, 808(1)
101 ; CHECK-NEXT: ld 25, 800(1)
102 ; CHECK-NEXT: ld 24, 792(1)
103 ; CHECK-NEXT: iselgt 5, 5, 20
104 ; CHECK-NEXT: ld 30, 752(1)
105 ; CHECK-NEXT: ld 22, 744(1)
106 ; CHECK-NEXT: ld 21, 736(1)
107 ; CHECK-NEXT: ld 20, 728(1)
108 ; CHECK-NEXT: ld 18, 704(1)
109 ; CHECK-NEXT: ld 17, 696(1)
110 ; CHECK-NEXT: ld 16, 688(1)
111 ; CHECK-NEXT: ld 14, 680(1)
112 ; CHECK-NEXT: sldi 0, 3, 2
113 ; CHECK-NEXT: std 5, 216(1) # 8-byte Folded Spill
114 ; CHECK-NEXT: std 28, 208(1) # 8-byte Folded Spill
115 ; CHECK-NEXT: mr 5, 4
116 ; CHECK-NEXT: ld 4, 720(1)
117 ; CHECK-NEXT: std 19, 96(1) # 8-byte Folded Spill
118 ; CHECK-NEXT: std 4, 104(1) # 8-byte Folded Spill
119 ; CHECK-NEXT: lxv 11, 0(4)
120 ; CHECK-NEXT: mr 4, 5
121 ; CHECK-NEXT: ld 5, 216(1) # 8-byte Folded Reload
122 ; CHECK-NEXT: ld 15, 672(1)
123 ; CHECK-NEXT: sldi 31, 3, 1
124 ; CHECK-NEXT: std 8, 32(1) # 8-byte Folded Spill
125 ; CHECK-NEXT: std 9, 40(1) # 8-byte Folded Spill
126 ; CHECK-NEXT: lxv 43, 0(8)
127 ; CHECK-NEXT: mr 8, 6
128 ; CHECK-NEXT: sldi 6, 3, 3
129 ; CHECK-NEXT: std 2, 144(1) # 8-byte Folded Spill
130 ; CHECK-NEXT: std 11, 152(1) # 8-byte Folded Spill
131 ; CHECK-NEXT: lxv 3, 0(2)
132 ; CHECK-NEXT: lxv 2, 0(11)
133 ; CHECK-NEXT: lxv 0, 0(7)
134 ; CHECK-NEXT: add 6, 6, 23
135 ; CHECK-NEXT: lxv 7, 0(28)
136 ; CHECK-NEXT: add 28, 3, 31
137 ; CHECK-NEXT: lxv 42, 0(9)
138 ; CHECK-NEXT: lxv 41, 0(10)
139 ; CHECK-NEXT: lxv 40, 0(15)
140 ; CHECK-NEXT: lxv 39, 0(14)
141 ; CHECK-NEXT: lxv 38, 0(16)
142 ; CHECK-NEXT: lxv 33, 0(17)
143 ; CHECK-NEXT: lxv 37, 0(18)
144 ; CHECK-NEXT: lxv 13, 0(19)
145 ; CHECK-NEXT: lxv 10, 0(20)
146 ; CHECK-NEXT: lxv 8, 0(21)
147 ; CHECK-NEXT: lxv 6, 0(22)
148 ; CHECK-NEXT: lxv 4, 0(30)
149 ; CHECK-NEXT: lxv 1, 0(12)
150 ; CHECK-NEXT: lxv 32, 0(24)
151 ; CHECK-NEXT: lxv 36, 0(25)
152 ; CHECK-NEXT: lxv 12, 0(26)
153 ; CHECK-NEXT: lxv 9, 0(27)
154 ; CHECK-NEXT: lxv 5, 0(29)
155 ; CHECK-NEXT: addi 5, 5, -2
156 ; CHECK-NEXT: sldi 11, 3, 4
157 ; CHECK-NEXT: std 12, 160(1) # 8-byte Folded Spill
158 ; CHECK-NEXT: std 7, 168(1) # 8-byte Folded Spill
159 ; CHECK-NEXT: add 7, 3, 0
160 ; CHECK-NEXT: add 12, 11, 23
161 ; CHECK-NEXT: addi 11, 6, 32
162 ; CHECK-NEXT: addi 12, 12, 32
163 ; CHECK-NEXT: std 22, 128(1) # 8-byte Folded Spill
164 ; CHECK-NEXT: std 30, 136(1) # 8-byte Folded Spill
165 ; CHECK-NEXT: std 26, 192(1) # 8-byte Folded Spill
166 ; CHECK-NEXT: std 27, 200(1) # 8-byte Folded Spill
167 ; CHECK-NEXT: mulli 26, 3, 48
168 ; CHECK-NEXT: mulli 22, 3, 6
169 ; CHECK-NEXT: sldi 6, 7, 3
170 ; CHECK-NEXT: add 30, 23, 6
171 ; CHECK-NEXT: std 29, 216(1) # 8-byte Folded Spill
172 ; CHECK-NEXT: std 24, 176(1) # 8-byte Folded Spill
173 ; CHECK-NEXT: std 25, 184(1) # 8-byte Folded Spill
174 ; CHECK-NEXT: li 25, 1
175 ; CHECK-NEXT: li 24, 0
176 ; CHECK-NEXT: std 10, 48(1) # 8-byte Folded Spill
177 ; CHECK-NEXT: std 15, 56(1) # 8-byte Folded Spill
178 ; CHECK-NEXT: std 14, 64(1) # 8-byte Folded Spill
179 ; CHECK-NEXT: std 16, 72(1) # 8-byte Folded Spill
180 ; CHECK-NEXT: std 17, 80(1) # 8-byte Folded Spill
181 ; CHECK-NEXT: std 18, 88(1) # 8-byte Folded Spill
182 ; CHECK-NEXT: std 20, 112(1) # 8-byte Folded Spill
183 ; CHECK-NEXT: std 21, 120(1) # 8-byte Folded Spill
184 ; CHECK-NEXT: rldicl 5, 5, 61, 3
185 ; CHECK-NEXT: addi 2, 5, 1
186 ; CHECK-NEXT: sldi 5, 3, 5
187 ; CHECK-NEXT: add 29, 23, 5
188 ; CHECK-NEXT: sldi 5, 28, 3
189 ; CHECK-NEXT: add 27, 23, 5
190 ; CHECK-NEXT: mr 5, 23
191 ; CHECK-NEXT: .p2align 4
192 ; CHECK-NEXT: .LBB0_3: # %_loop_2_do_.lr.ph
193 ; CHECK-NEXT: # =>This Loop Header: Depth=1
194 ; CHECK-NEXT: # Child Loop BB0_4 Depth 2
195 ; CHECK-NEXT: maddld 6, 22, 24, 7
196 ; CHECK-NEXT: maddld 20, 22, 24, 0
197 ; CHECK-NEXT: mtctr 2
198 ; CHECK-NEXT: sldi 6, 6, 3
199 ; CHECK-NEXT: add 21, 23, 6
200 ; CHECK-NEXT: sldi 6, 20, 3
201 ; CHECK-NEXT: add 20, 23, 6
202 ; CHECK-NEXT: maddld 6, 22, 24, 28
203 ; CHECK-NEXT: sldi 6, 6, 3
204 ; CHECK-NEXT: add 19, 23, 6
205 ; CHECK-NEXT: maddld 6, 22, 24, 31
206 ; CHECK-NEXT: sldi 6, 6, 3
207 ; CHECK-NEXT: add 18, 23, 6
208 ; CHECK-NEXT: maddld 6, 22, 24, 3
209 ; CHECK-NEXT: sldi 6, 6, 3
210 ; CHECK-NEXT: add 17, 23, 6
211 ; CHECK-NEXT: mulld 6, 22, 24
212 ; CHECK-NEXT: sldi 6, 6, 3
213 ; CHECK-NEXT: add 16, 23, 6
214 ; CHECK-NEXT: mr 6, 8
215 ; CHECK-NEXT: .p2align 5
216 ; CHECK-NEXT: .LBB0_4: # %_loop_2_do_
217 ; CHECK-NEXT: # Parent Loop BB0_3 Depth=1
218 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2
219 ; CHECK-NEXT: lxvp 34, 0(6)
220 ; CHECK-NEXT: lxvp 44, 0(16)
221 ; CHECK-NEXT: lxvp 46, 0(17)
222 ; CHECK-NEXT: lxvp 48, 0(18)
223 ; CHECK-NEXT: lxvp 50, 0(19)
224 ; CHECK-NEXT: lxvp 62, 0(20)
225 ; CHECK-NEXT: lxvp 60, 0(21)
226 ; CHECK-NEXT: lxvp 58, 32(6)
227 ; CHECK-NEXT: lxvp 56, 32(16)
228 ; CHECK-NEXT: lxvp 54, 32(17)
229 ; CHECK-NEXT: lxvp 52, 32(18)
230 ; CHECK-NEXT: lxvp 30, 32(19)
231 ; CHECK-NEXT: addi 6, 6, 64
232 ; CHECK-NEXT: addi 16, 16, 64
233 ; CHECK-NEXT: addi 17, 17, 64
234 ; CHECK-NEXT: addi 18, 18, 64
235 ; CHECK-NEXT: addi 19, 19, 64
236 ; CHECK-NEXT: xvmaddadp 43, 45, 35
237 ; CHECK-NEXT: xvmaddadp 42, 47, 35
238 ; CHECK-NEXT: xvmaddadp 41, 49, 35
239 ; CHECK-NEXT: xvmaddadp 40, 51, 35
240 ; CHECK-NEXT: xvmaddadp 39, 63, 35
241 ; CHECK-NEXT: xvmaddadp 38, 61, 35
242 ; CHECK-NEXT: xvmaddadp 33, 44, 34
243 ; CHECK-NEXT: xvmaddadp 37, 46, 34
244 ; CHECK-NEXT: xvmaddadp 13, 48, 34
245 ; CHECK-NEXT: xvmaddadp 11, 50, 34
246 ; CHECK-NEXT: xvmaddadp 10, 62, 34
247 ; CHECK-NEXT: xvmaddadp 8, 60, 34
248 ; CHECK-NEXT: lxvp 34, 32(20)
249 ; CHECK-NEXT: lxvp 44, 32(21)
250 ; CHECK-NEXT: addi 20, 20, 64
251 ; CHECK-NEXT: addi 21, 21, 64
252 ; CHECK-NEXT: xvmaddadp 6, 57, 59
253 ; CHECK-NEXT: xvmaddadp 4, 55, 59
254 ; CHECK-NEXT: xvmaddadp 3, 53, 59
255 ; CHECK-NEXT: xvmaddadp 2, 31, 59
256 ; CHECK-NEXT: xvmaddadp 32, 56, 58
257 ; CHECK-NEXT: xvmaddadp 36, 54, 58
258 ; CHECK-NEXT: xvmaddadp 12, 52, 58
259 ; CHECK-NEXT: xvmaddadp 9, 30, 58
260 ; CHECK-NEXT: xvmaddadp 1, 35, 59
261 ; CHECK-NEXT: xvmaddadp 0, 45, 59
262 ; CHECK-NEXT: xvmaddadp 7, 34, 58
263 ; CHECK-NEXT: xvmaddadp 5, 44, 58
264 ; CHECK-NEXT: bdnz .LBB0_4
265 ; CHECK-NEXT: # %bb.5: # %_loop_2_endl_
267 ; CHECK-NEXT: addi 25, 25, 6
268 ; CHECK-NEXT: add 5, 5, 26
269 ; CHECK-NEXT: add 11, 11, 26
270 ; CHECK-NEXT: add 30, 30, 26
271 ; CHECK-NEXT: add 12, 12, 26
272 ; CHECK-NEXT: add 29, 29, 26
273 ; CHECK-NEXT: add 27, 27, 26
274 ; CHECK-NEXT: addi 24, 24, 1
275 ; CHECK-NEXT: cmpld 25, 4
276 ; CHECK-NEXT: ble 0, .LBB0_3
277 ; CHECK-NEXT: # %bb.6: # %_loop_1_loopHeader_._return_bb_crit_edge.loopexit
278 ; CHECK-NEXT: ld 3, 32(1) # 8-byte Folded Reload
279 ; CHECK-NEXT: stxv 43, 0(3)
280 ; CHECK-NEXT: ld 3, 40(1) # 8-byte Folded Reload
281 ; CHECK-NEXT: stxv 42, 0(3)
282 ; CHECK-NEXT: ld 3, 48(1) # 8-byte Folded Reload
283 ; CHECK-NEXT: stxv 41, 0(3)
284 ; CHECK-NEXT: ld 3, 56(1) # 8-byte Folded Reload
285 ; CHECK-NEXT: stxv 40, 0(3)
286 ; CHECK-NEXT: ld 3, 64(1) # 8-byte Folded Reload
287 ; CHECK-NEXT: stxv 39, 0(3)
288 ; CHECK-NEXT: ld 3, 72(1) # 8-byte Folded Reload
289 ; CHECK-NEXT: stxv 38, 0(3)
290 ; CHECK-NEXT: ld 3, 80(1) # 8-byte Folded Reload
291 ; CHECK-NEXT: stxv 33, 0(3)
292 ; CHECK-NEXT: ld 3, 88(1) # 8-byte Folded Reload
293 ; CHECK-NEXT: stxv 37, 0(3)
294 ; CHECK-NEXT: ld 3, 96(1) # 8-byte Folded Reload
295 ; CHECK-NEXT: stxv 13, 0(3)
296 ; CHECK-NEXT: ld 3, 104(1) # 8-byte Folded Reload
297 ; CHECK-NEXT: stxv 11, 0(3)
298 ; CHECK-NEXT: ld 3, 112(1) # 8-byte Folded Reload
299 ; CHECK-NEXT: stxv 10, 0(3)
300 ; CHECK-NEXT: ld 3, 120(1) # 8-byte Folded Reload
301 ; CHECK-NEXT: stxv 8, 0(3)
302 ; CHECK-NEXT: ld 3, 128(1) # 8-byte Folded Reload
303 ; CHECK-NEXT: stxv 6, 0(3)
304 ; CHECK-NEXT: ld 3, 136(1) # 8-byte Folded Reload
305 ; CHECK-NEXT: stxv 4, 0(3)
306 ; CHECK-NEXT: ld 3, 144(1) # 8-byte Folded Reload
307 ; CHECK-NEXT: stxv 3, 0(3)
308 ; CHECK-NEXT: ld 3, 152(1) # 8-byte Folded Reload
309 ; CHECK-NEXT: stxv 2, 0(3)
310 ; CHECK-NEXT: ld 3, 160(1) # 8-byte Folded Reload
311 ; CHECK-NEXT: stxv 1, 0(3)
312 ; CHECK-NEXT: ld 3, 168(1) # 8-byte Folded Reload
313 ; CHECK-NEXT: stxv 0, 0(3)
314 ; CHECK-NEXT: ld 3, 176(1) # 8-byte Folded Reload
315 ; CHECK-NEXT: stxv 32, 0(3)
316 ; CHECK-NEXT: ld 3, 184(1) # 8-byte Folded Reload
317 ; CHECK-NEXT: stxv 36, 0(3)
318 ; CHECK-NEXT: ld 3, 192(1) # 8-byte Folded Reload
319 ; CHECK-NEXT: stxv 12, 0(3)
320 ; CHECK-NEXT: ld 3, 200(1) # 8-byte Folded Reload
321 ; CHECK-NEXT: stxv 9, 0(3)
322 ; CHECK-NEXT: ld 3, 208(1) # 8-byte Folded Reload
323 ; CHECK-NEXT: stxv 7, 0(3)
324 ; CHECK-NEXT: ld 3, 216(1) # 8-byte Folded Reload
325 ; CHECK-NEXT: stxv 5, 0(3)
326 ; CHECK-NEXT: .LBB0_7: # %_return_bb
327 ; CHECK-NEXT: lxv 63, 400(1) # 16-byte Folded Reload
328 ; CHECK-NEXT: lxv 62, 384(1) # 16-byte Folded Reload
329 ; CHECK-NEXT: lxv 61, 368(1) # 16-byte Folded Reload
330 ; CHECK-NEXT: lxv 60, 352(1) # 16-byte Folded Reload
331 ; CHECK-NEXT: lxv 59, 336(1) # 16-byte Folded Reload
332 ; CHECK-NEXT: lxv 58, 320(1) # 16-byte Folded Reload
333 ; CHECK-NEXT: lxv 57, 304(1) # 16-byte Folded Reload
334 ; CHECK-NEXT: lxv 56, 288(1) # 16-byte Folded Reload
335 ; CHECK-NEXT: lxv 55, 272(1) # 16-byte Folded Reload
336 ; CHECK-NEXT: lxv 54, 256(1) # 16-byte Folded Reload
337 ; CHECK-NEXT: lxv 53, 240(1) # 16-byte Folded Reload
338 ; CHECK-NEXT: lxv 52, 224(1) # 16-byte Folded Reload
339 ; CHECK-NEXT: lfd 31, 568(1) # 8-byte Folded Reload
340 ; CHECK-NEXT: lfd 30, 560(1) # 8-byte Folded Reload
341 ; CHECK-NEXT: ld 31, 552(1) # 8-byte Folded Reload
342 ; CHECK-NEXT: ld 30, 544(1) # 8-byte Folded Reload
343 ; CHECK-NEXT: ld 29, 536(1) # 8-byte Folded Reload
344 ; CHECK-NEXT: ld 28, 528(1) # 8-byte Folded Reload
345 ; CHECK-NEXT: ld 27, 520(1) # 8-byte Folded Reload
346 ; CHECK-NEXT: ld 26, 512(1) # 8-byte Folded Reload
347 ; CHECK-NEXT: ld 25, 504(1) # 8-byte Folded Reload
348 ; CHECK-NEXT: ld 24, 496(1) # 8-byte Folded Reload
349 ; CHECK-NEXT: ld 23, 488(1) # 8-byte Folded Reload
350 ; CHECK-NEXT: ld 22, 480(1) # 8-byte Folded Reload
351 ; CHECK-NEXT: ld 21, 472(1) # 8-byte Folded Reload
352 ; CHECK-NEXT: ld 20, 464(1) # 8-byte Folded Reload
353 ; CHECK-NEXT: ld 19, 456(1) # 8-byte Folded Reload
354 ; CHECK-NEXT: ld 18, 448(1) # 8-byte Folded Reload
355 ; CHECK-NEXT: ld 17, 440(1) # 8-byte Folded Reload
356 ; CHECK-NEXT: ld 16, 432(1) # 8-byte Folded Reload
357 ; CHECK-NEXT: ld 15, 424(1) # 8-byte Folded Reload
358 ; CHECK-NEXT: ld 14, 416(1) # 8-byte Folded Reload
359 ; CHECK-NEXT: addi 1, 1, 576
362 %_val_l_ = load i32, i32* %.l, align 4
363 %_conv = sext i32 %_val_l_ to i64
364 %_mult_tmp = shl nsw i64 %_conv, 3
365 %_sub_tmp4 = sub nuw nsw i64 -8, %_mult_tmp
366 %_val_n_ = load i32, i32* %.n, align 4
367 %_leq_tmp.not116 = icmp slt i32 %_val_n_, 1
368 br i1 %_leq_tmp.not116, label %_return_bb, label %_loop_1_do_.lr.ph
370 _loop_1_do_.lr.ph: ; preds = %entry
371 %_val_m_ = load i32, i32* %.m, align 4
372 %_leq_tmp6.not114 = icmp slt i32 %_val_m_, 1
373 br i1 %_leq_tmp6.not114, label %_return_bb, label %_loop_1_do_.preheader
375 _loop_1_do_.preheader: ; preds = %_loop_1_do_.lr.ph
376 %x_rvo_based_addr_112 = getelementptr inbounds [0 x %_elem_type_of_x], [0 x %_elem_type_of_x]* %.x, i64 0, i64 -1
377 %a_byte_ptr_ = bitcast [0 x %_elem_type_of_a]* %.a to i8*
378 %a_rvo_based_addr_ = getelementptr inbounds i8, i8* %a_byte_ptr_, i64 %_sub_tmp4
379 %.vy01.promoted = load <2 x double>, <2 x double>* %.vy01, align 16
380 %.vy02.promoted = load <2 x double>, <2 x double>* %.vy02, align 16
381 %.vy03.promoted = load <2 x double>, <2 x double>* %.vy03, align 16
382 %.vy04.promoted = load <2 x double>, <2 x double>* %.vy04, align 16
383 %.vy05.promoted = load <2 x double>, <2 x double>* %.vy05, align 16
384 %.vy06.promoted = load <2 x double>, <2 x double>* %.vy06, align 16
385 %.vy07.promoted = load <2 x double>, <2 x double>* %.vy07, align 16
386 %.vy08.promoted = load <2 x double>, <2 x double>* %.vy08, align 16
387 %.vy09.promoted = load <2 x double>, <2 x double>* %.vy09, align 16
388 %.vy0a.promoted = load <2 x double>, <2 x double>* %.vy0a, align 16
389 %.vy0b.promoted = load <2 x double>, <2 x double>* %.vy0b, align 16
390 %.vy0c.promoted = load <2 x double>, <2 x double>* %.vy0c, align 16
391 %.vy21.promoted = load <2 x double>, <2 x double>* %.vy21, align 16
392 %.vy22.promoted = load <2 x double>, <2 x double>* %.vy22, align 16
393 %.vy23.promoted = load <2 x double>, <2 x double>* %.vy23, align 16
394 %.vy24.promoted = load <2 x double>, <2 x double>* %.vy24, align 16
395 %.vy25.promoted = load <2 x double>, <2 x double>* %.vy25, align 16
396 %.vy26.promoted = load <2 x double>, <2 x double>* %.vy26, align 16
397 %.vy27.promoted = load <2 x double>, <2 x double>* %.vy27, align 16
398 %.vy28.promoted = load <2 x double>, <2 x double>* %.vy28, align 16
399 %.vy29.promoted = load <2 x double>, <2 x double>* %.vy29, align 16
400 %.vy2a.promoted = load <2 x double>, <2 x double>* %.vy2a, align 16
401 %.vy2b.promoted = load <2 x double>, <2 x double>* %.vy2b, align 16
402 %.vy2c.promoted = load <2 x double>, <2 x double>* %.vy2c, align 16
403 %0 = zext i32 %_val_m_ to i64
404 %1 = zext i32 %_val_n_ to i64
405 br label %_loop_2_do_.lr.ph
407 _loop_2_do_.lr.ph: ; preds = %_loop_2_endl_, %_loop_1_do_.preheader
408 %indvars.iv212 = phi i64 [ %indvars.iv.next213, %_loop_2_endl_ ], [ 1, %_loop_1_do_.preheader ]
409 %2 = phi <2 x double> [ %142, %_loop_2_endl_ ], [ %.vy2c.promoted, %_loop_1_do_.preheader ]
410 %3 = phi <2 x double> [ %140, %_loop_2_endl_ ], [ %.vy2b.promoted, %_loop_1_do_.preheader ]
411 %4 = phi <2 x double> [ %138, %_loop_2_endl_ ], [ %.vy2a.promoted, %_loop_1_do_.preheader ]
412 %5 = phi <2 x double> [ %136, %_loop_2_endl_ ], [ %.vy29.promoted, %_loop_1_do_.preheader ]
413 %6 = phi <2 x double> [ %134, %_loop_2_endl_ ], [ %.vy28.promoted, %_loop_1_do_.preheader ]
414 %7 = phi <2 x double> [ %132, %_loop_2_endl_ ], [ %.vy27.promoted, %_loop_1_do_.preheader ]
415 %8 = phi <2 x double> [ %129, %_loop_2_endl_ ], [ %.vy26.promoted, %_loop_1_do_.preheader ]
416 %9 = phi <2 x double> [ %127, %_loop_2_endl_ ], [ %.vy25.promoted, %_loop_1_do_.preheader ]
417 %10 = phi <2 x double> [ %125, %_loop_2_endl_ ], [ %.vy24.promoted, %_loop_1_do_.preheader ]
418 %11 = phi <2 x double> [ %123, %_loop_2_endl_ ], [ %.vy23.promoted, %_loop_1_do_.preheader ]
419 %12 = phi <2 x double> [ %121, %_loop_2_endl_ ], [ %.vy22.promoted, %_loop_1_do_.preheader ]
420 %13 = phi <2 x double> [ %119, %_loop_2_endl_ ], [ %.vy21.promoted, %_loop_1_do_.preheader ]
421 %14 = phi <2 x double> [ %116, %_loop_2_endl_ ], [ %.vy0c.promoted, %_loop_1_do_.preheader ]
422 %15 = phi <2 x double> [ %114, %_loop_2_endl_ ], [ %.vy0b.promoted, %_loop_1_do_.preheader ]
423 %16 = phi <2 x double> [ %112, %_loop_2_endl_ ], [ %.vy0a.promoted, %_loop_1_do_.preheader ]
424 %17 = phi <2 x double> [ %110, %_loop_2_endl_ ], [ %.vy09.promoted, %_loop_1_do_.preheader ]
425 %18 = phi <2 x double> [ %108, %_loop_2_endl_ ], [ %.vy08.promoted, %_loop_1_do_.preheader ]
426 %19 = phi <2 x double> [ %106, %_loop_2_endl_ ], [ %.vy07.promoted, %_loop_1_do_.preheader ]
427 %20 = phi <2 x double> [ %81, %_loop_2_endl_ ], [ %.vy06.promoted, %_loop_1_do_.preheader ]
428 %21 = phi <2 x double> [ %79, %_loop_2_endl_ ], [ %.vy05.promoted, %_loop_1_do_.preheader ]
429 %22 = phi <2 x double> [ %77, %_loop_2_endl_ ], [ %.vy04.promoted, %_loop_1_do_.preheader ]
430 %23 = phi <2 x double> [ %75, %_loop_2_endl_ ], [ %.vy03.promoted, %_loop_1_do_.preheader ]
431 %24 = phi <2 x double> [ %73, %_loop_2_endl_ ], [ %.vy02.promoted, %_loop_1_do_.preheader ]
432 %25 = phi <2 x double> [ %71, %_loop_2_endl_ ], [ %.vy01.promoted, %_loop_1_do_.preheader ]
433 %_ix_x_len10 = mul i64 %_mult_tmp, %indvars.iv212
434 %a_ix_dim_0_ = getelementptr inbounds i8, i8* %a_rvo_based_addr_, i64 %_ix_x_len10
435 %26 = add nuw nsw i64 %indvars.iv212, 1
436 %_ix_x_len24 = mul i64 %_mult_tmp, %26
437 %a_ix_dim_0_25 = getelementptr inbounds i8, i8* %a_rvo_based_addr_, i64 %_ix_x_len24
438 %27 = add nuw nsw i64 %indvars.iv212, 2
439 %_ix_x_len40 = mul i64 %_mult_tmp, %27
440 %a_ix_dim_0_41 = getelementptr inbounds i8, i8* %a_rvo_based_addr_, i64 %_ix_x_len40
441 %28 = add nuw nsw i64 %indvars.iv212, 3
442 %_ix_x_len56 = mul i64 %_mult_tmp, %28
443 %a_ix_dim_0_57 = getelementptr inbounds i8, i8* %a_rvo_based_addr_, i64 %_ix_x_len56
444 %29 = add nuw nsw i64 %indvars.iv212, 4
445 %_ix_x_len72 = mul i64 %_mult_tmp, %29
446 %a_ix_dim_0_73 = getelementptr inbounds i8, i8* %a_rvo_based_addr_, i64 %_ix_x_len72
447 %30 = add nuw nsw i64 %indvars.iv212, 5
448 %_ix_x_len88 = mul i64 %_mult_tmp, %30
449 %a_ix_dim_0_89 = getelementptr inbounds i8, i8* %a_rvo_based_addr_, i64 %_ix_x_len88
450 br label %_loop_2_do_
452 _loop_2_do_: ; preds = %_loop_2_do_.lr.ph, %_loop_2_do_
453 %indvars.iv = phi i64 [ 1, %_loop_2_do_.lr.ph ], [ %indvars.iv.next, %_loop_2_do_ ]
454 %31 = phi <2 x double> [ %2, %_loop_2_do_.lr.ph ], [ %142, %_loop_2_do_ ]
455 %32 = phi <2 x double> [ %3, %_loop_2_do_.lr.ph ], [ %140, %_loop_2_do_ ]
456 %33 = phi <2 x double> [ %4, %_loop_2_do_.lr.ph ], [ %138, %_loop_2_do_ ]
457 %34 = phi <2 x double> [ %5, %_loop_2_do_.lr.ph ], [ %136, %_loop_2_do_ ]
458 %35 = phi <2 x double> [ %6, %_loop_2_do_.lr.ph ], [ %134, %_loop_2_do_ ]
459 %36 = phi <2 x double> [ %7, %_loop_2_do_.lr.ph ], [ %132, %_loop_2_do_ ]
460 %37 = phi <2 x double> [ %8, %_loop_2_do_.lr.ph ], [ %129, %_loop_2_do_ ]
461 %38 = phi <2 x double> [ %9, %_loop_2_do_.lr.ph ], [ %127, %_loop_2_do_ ]
462 %39 = phi <2 x double> [ %10, %_loop_2_do_.lr.ph ], [ %125, %_loop_2_do_ ]
463 %40 = phi <2 x double> [ %11, %_loop_2_do_.lr.ph ], [ %123, %_loop_2_do_ ]
464 %41 = phi <2 x double> [ %12, %_loop_2_do_.lr.ph ], [ %121, %_loop_2_do_ ]
465 %42 = phi <2 x double> [ %13, %_loop_2_do_.lr.ph ], [ %119, %_loop_2_do_ ]
466 %43 = phi <2 x double> [ %14, %_loop_2_do_.lr.ph ], [ %116, %_loop_2_do_ ]
467 %44 = phi <2 x double> [ %15, %_loop_2_do_.lr.ph ], [ %114, %_loop_2_do_ ]
468 %45 = phi <2 x double> [ %16, %_loop_2_do_.lr.ph ], [ %112, %_loop_2_do_ ]
469 %46 = phi <2 x double> [ %17, %_loop_2_do_.lr.ph ], [ %110, %_loop_2_do_ ]
470 %47 = phi <2 x double> [ %18, %_loop_2_do_.lr.ph ], [ %108, %_loop_2_do_ ]
471 %48 = phi <2 x double> [ %19, %_loop_2_do_.lr.ph ], [ %106, %_loop_2_do_ ]
472 %49 = phi <2 x double> [ %20, %_loop_2_do_.lr.ph ], [ %81, %_loop_2_do_ ]
473 %50 = phi <2 x double> [ %21, %_loop_2_do_.lr.ph ], [ %79, %_loop_2_do_ ]
474 %51 = phi <2 x double> [ %22, %_loop_2_do_.lr.ph ], [ %77, %_loop_2_do_ ]
475 %52 = phi <2 x double> [ %23, %_loop_2_do_.lr.ph ], [ %75, %_loop_2_do_ ]
476 %53 = phi <2 x double> [ %24, %_loop_2_do_.lr.ph ], [ %73, %_loop_2_do_ ]
477 %54 = phi <2 x double> [ %25, %_loop_2_do_.lr.ph ], [ %71, %_loop_2_do_ ]
478 %_ix_x_len = shl nuw nsw i64 %indvars.iv, 3
479 %x_ix_dim_0_113 = getelementptr inbounds %_elem_type_of_x, %_elem_type_of_x* %x_rvo_based_addr_112, i64 %indvars.iv
480 %x_ix_dim_0_ = bitcast %_elem_type_of_x* %x_ix_dim_0_113 to i8*
481 %55 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull %x_ix_dim_0_)
482 %a_ix_dim_1_ = getelementptr inbounds i8, i8* %a_ix_dim_0_, i64 %_ix_x_len
483 %56 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull %a_ix_dim_1_)
484 %a_ix_dim_1_29 = getelementptr inbounds i8, i8* %a_ix_dim_0_25, i64 %_ix_x_len
485 %57 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull %a_ix_dim_1_29)
486 %a_ix_dim_1_45 = getelementptr inbounds i8, i8* %a_ix_dim_0_41, i64 %_ix_x_len
487 %58 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull %a_ix_dim_1_45)
488 %a_ix_dim_1_61 = getelementptr inbounds i8, i8* %a_ix_dim_0_57, i64 %_ix_x_len
489 %59 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull %a_ix_dim_1_61)
490 %a_ix_dim_1_77 = getelementptr inbounds i8, i8* %a_ix_dim_0_73, i64 %_ix_x_len
491 %60 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull %a_ix_dim_1_77)
492 %a_ix_dim_1_93 = getelementptr inbounds i8, i8* %a_ix_dim_0_89, i64 %_ix_x_len
493 %61 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull %a_ix_dim_1_93)
494 %62 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %55)
495 %.fca.0.extract35 = extractvalue { <16 x i8>, <16 x i8> } %62, 0
496 %.fca.1.extract36 = extractvalue { <16 x i8>, <16 x i8> } %62, 1
497 %63 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %56)
498 %.fca.0.extract29 = extractvalue { <16 x i8>, <16 x i8> } %63, 0
499 %.fca.1.extract30 = extractvalue { <16 x i8>, <16 x i8> } %63, 1
500 %64 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %57)
501 %.fca.0.extract23 = extractvalue { <16 x i8>, <16 x i8> } %64, 0
502 %.fca.1.extract24 = extractvalue { <16 x i8>, <16 x i8> } %64, 1
503 %65 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %58)
504 %.fca.0.extract17 = extractvalue { <16 x i8>, <16 x i8> } %65, 0
505 %.fca.1.extract18 = extractvalue { <16 x i8>, <16 x i8> } %65, 1
506 %66 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %59)
507 %.fca.0.extract11 = extractvalue { <16 x i8>, <16 x i8> } %66, 0
508 %.fca.1.extract12 = extractvalue { <16 x i8>, <16 x i8> } %66, 1
509 %67 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %60)
510 %.fca.0.extract5 = extractvalue { <16 x i8>, <16 x i8> } %67, 0
511 %.fca.1.extract6 = extractvalue { <16 x i8>, <16 x i8> } %67, 1
512 %68 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %61)
513 %.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %68, 0
514 %.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %68, 1
515 %69 = bitcast <16 x i8> %.fca.0.extract29 to <2 x double>
516 %70 = bitcast <16 x i8> %.fca.0.extract35 to <2 x double>
517 %71 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %69, <2 x double> %70, <2 x double> %54)
518 %72 = bitcast <16 x i8> %.fca.0.extract23 to <2 x double>
519 %73 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %72, <2 x double> %70, <2 x double> %53)
520 %74 = bitcast <16 x i8> %.fca.0.extract17 to <2 x double>
521 %75 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %74, <2 x double> %70, <2 x double> %52)
522 %76 = bitcast <16 x i8> %.fca.0.extract11 to <2 x double>
523 %77 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %76, <2 x double> %70, <2 x double> %51)
524 %78 = bitcast <16 x i8> %.fca.0.extract5 to <2 x double>
525 %79 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %78, <2 x double> %70, <2 x double> %50)
526 %80 = bitcast <16 x i8> %.fca.0.extract to <2 x double>
527 %81 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %80, <2 x double> %70, <2 x double> %49)
528 %82 = getelementptr %_elem_type_of_x, %_elem_type_of_x* %x_ix_dim_0_113, i64 4
529 %83 = bitcast %_elem_type_of_x* %82 to i8*
530 %84 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %83)
531 %85 = getelementptr i8, i8* %a_ix_dim_1_, i64 32
532 %86 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %85)
533 %87 = getelementptr i8, i8* %a_ix_dim_1_29, i64 32
534 %88 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %87)
535 %89 = getelementptr i8, i8* %a_ix_dim_1_45, i64 32
536 %90 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %89)
537 %91 = getelementptr i8, i8* %a_ix_dim_1_61, i64 32
538 %92 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %91)
539 %93 = getelementptr i8, i8* %a_ix_dim_1_77, i64 32
540 %94 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %93)
541 %95 = getelementptr i8, i8* %a_ix_dim_1_93, i64 32
542 %96 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %95)
543 %97 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %84)
544 %.fca.0.extract37 = extractvalue { <16 x i8>, <16 x i8> } %97, 0
545 %.fca.1.extract39 = extractvalue { <16 x i8>, <16 x i8> } %97, 1
546 %98 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %86)
547 %.fca.0.extract31 = extractvalue { <16 x i8>, <16 x i8> } %98, 0
548 %.fca.1.extract33 = extractvalue { <16 x i8>, <16 x i8> } %98, 1
549 %99 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %88)
550 %.fca.0.extract25 = extractvalue { <16 x i8>, <16 x i8> } %99, 0
551 %.fca.1.extract27 = extractvalue { <16 x i8>, <16 x i8> } %99, 1
552 %100 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %90)
553 %.fca.0.extract19 = extractvalue { <16 x i8>, <16 x i8> } %100, 0
554 %.fca.1.extract21 = extractvalue { <16 x i8>, <16 x i8> } %100, 1
555 %101 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %92)
556 %.fca.0.extract13 = extractvalue { <16 x i8>, <16 x i8> } %101, 0
557 %.fca.1.extract15 = extractvalue { <16 x i8>, <16 x i8> } %101, 1
558 %102 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %94)
559 %.fca.0.extract7 = extractvalue { <16 x i8>, <16 x i8> } %102, 0
560 %.fca.1.extract9 = extractvalue { <16 x i8>, <16 x i8> } %102, 1
561 %103 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %96)
562 %.fca.0.extract1 = extractvalue { <16 x i8>, <16 x i8> } %103, 0
563 %.fca.1.extract3 = extractvalue { <16 x i8>, <16 x i8> } %103, 1
564 %104 = bitcast <16 x i8> %.fca.1.extract30 to <2 x double>
565 %105 = bitcast <16 x i8> %.fca.1.extract36 to <2 x double>
566 %106 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %104, <2 x double> %105, <2 x double> %48)
567 %107 = bitcast <16 x i8> %.fca.1.extract24 to <2 x double>
568 %108 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %107, <2 x double> %105, <2 x double> %47)
569 %109 = bitcast <16 x i8> %.fca.1.extract18 to <2 x double>
570 %110 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %109, <2 x double> %105, <2 x double> %46)
571 %111 = bitcast <16 x i8> %.fca.1.extract12 to <2 x double>
572 %112 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %111, <2 x double> %105, <2 x double> %45)
573 %113 = bitcast <16 x i8> %.fca.1.extract6 to <2 x double>
574 %114 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %113, <2 x double> %105, <2 x double> %44)
575 %115 = bitcast <16 x i8> %.fca.1.extract to <2 x double>
576 %116 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %115, <2 x double> %105, <2 x double> %43)
577 %117 = bitcast <16 x i8> %.fca.0.extract31 to <2 x double>
578 %118 = bitcast <16 x i8> %.fca.0.extract37 to <2 x double>
579 %119 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %117, <2 x double> %118, <2 x double> %42)
580 %120 = bitcast <16 x i8> %.fca.0.extract25 to <2 x double>
581 %121 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %120, <2 x double> %118, <2 x double> %41)
582 %122 = bitcast <16 x i8> %.fca.0.extract19 to <2 x double>
583 %123 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %122, <2 x double> %118, <2 x double> %40)
584 %124 = bitcast <16 x i8> %.fca.0.extract13 to <2 x double>
585 %125 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %124, <2 x double> %118, <2 x double> %39)
586 %126 = bitcast <16 x i8> %.fca.0.extract7 to <2 x double>
587 %127 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %126, <2 x double> %118, <2 x double> %38)
588 %128 = bitcast <16 x i8> %.fca.0.extract1 to <2 x double>
589 %129 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %128, <2 x double> %118, <2 x double> %37)
590 %130 = bitcast <16 x i8> %.fca.1.extract33 to <2 x double>
591 %131 = bitcast <16 x i8> %.fca.1.extract39 to <2 x double>
592 %132 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %130, <2 x double> %131, <2 x double> %36)
593 %133 = bitcast <16 x i8> %.fca.1.extract27 to <2 x double>
594 %134 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %133, <2 x double> %131, <2 x double> %35)
595 %135 = bitcast <16 x i8> %.fca.1.extract21 to <2 x double>
596 %136 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %135, <2 x double> %131, <2 x double> %34)
597 %137 = bitcast <16 x i8> %.fca.1.extract15 to <2 x double>
598 %138 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %137, <2 x double> %131, <2 x double> %33)
599 %139 = bitcast <16 x i8> %.fca.1.extract9 to <2 x double>
600 %140 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %139, <2 x double> %131, <2 x double> %32)
601 %141 = bitcast <16 x i8> %.fca.1.extract3 to <2 x double>
602 %142 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %141, <2 x double> %131, <2 x double> %31)
603 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 8
604 %_leq_tmp6.not = icmp ugt i64 %indvars.iv.next, %0
605 br i1 %_leq_tmp6.not, label %_loop_2_endl_, label %_loop_2_do_
607 _loop_2_endl_: ; preds = %_loop_2_do_
608 %indvars.iv.next213 = add nuw nsw i64 %indvars.iv212, 6
609 %_leq_tmp.not = icmp ugt i64 %indvars.iv.next213, %1
610 br i1 %_leq_tmp.not, label %_loop_1_loopHeader_._return_bb_crit_edge.loopexit, label %_loop_2_do_.lr.ph
612 _loop_1_loopHeader_._return_bb_crit_edge.loopexit: ; preds = %_loop_2_endl_
613 store <2 x double> %71, <2 x double>* %.vy01, align 16
614 store <2 x double> %73, <2 x double>* %.vy02, align 16
615 store <2 x double> %75, <2 x double>* %.vy03, align 16
616 store <2 x double> %77, <2 x double>* %.vy04, align 16
617 store <2 x double> %79, <2 x double>* %.vy05, align 16
618 store <2 x double> %81, <2 x double>* %.vy06, align 16
619 store <2 x double> %106, <2 x double>* %.vy07, align 16
620 store <2 x double> %108, <2 x double>* %.vy08, align 16
621 store <2 x double> %110, <2 x double>* %.vy09, align 16
622 store <2 x double> %112, <2 x double>* %.vy0a, align 16
623 store <2 x double> %114, <2 x double>* %.vy0b, align 16
624 store <2 x double> %116, <2 x double>* %.vy0c, align 16
625 store <2 x double> %119, <2 x double>* %.vy21, align 16
626 store <2 x double> %121, <2 x double>* %.vy22, align 16
627 store <2 x double> %123, <2 x double>* %.vy23, align 16
628 store <2 x double> %125, <2 x double>* %.vy24, align 16
629 store <2 x double> %127, <2 x double>* %.vy25, align 16
630 store <2 x double> %129, <2 x double>* %.vy26, align 16
631 store <2 x double> %132, <2 x double>* %.vy27, align 16
632 store <2 x double> %134, <2 x double>* %.vy28, align 16
633 store <2 x double> %136, <2 x double>* %.vy29, align 16
634 store <2 x double> %138, <2 x double>* %.vy2a, align 16
635 store <2 x double> %140, <2 x double>* %.vy2b, align 16
636 store <2 x double> %142, <2 x double>* %.vy2c, align 16
639 _return_bb: ; preds = %_loop_1_do_.lr.ph, %_loop_1_loopHeader_._return_bb_crit_edge.loopexit, %entry
643 declare <256 x i1> @llvm.ppc.vsx.lxvp(i8*)
644 declare { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1>)
645 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)