1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=MISCHED %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -misched=gcn-iterative-ilp -verify-machineinstrs < %s | FileCheck --check-prefix=GCN-ILP %s
4 ; Test the scheduler when only one wave is requested. The result should be high register usage and max ILP.
6 ; We expect a three digit VGPR usage here since only one wave requested.
8 ; GCN-ILP: NumVgprs: {{[0-9][0-9][0-9]$}}
10 ; FIXME: The machine scheduler is doing a poor job at maximizing ILP here.
11 ; However, if we had not requested only one wave register usage would indeed be
12 ; much lower, demonstrating that is the purpose of this test.
14 ; MISCHED: NumVgprs: {{[7-9][0-9]$}}
16 define amdgpu_kernel void @load_fma_store(ptr addrspace(3) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1) #1 {
18 %tmp = getelementptr inbounds float, ptr addrspace(3) %arg, i32 1
19 %tmp2 = load float, ptr addrspace(3) %tmp, align 4
20 %tmp3 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 2
21 %tmp4 = load float, ptr addrspace(3) %tmp3, align 4
22 %tmp5 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 3
23 %tmp6 = load float, ptr addrspace(3) %tmp5, align 4
24 %tmp7 = tail call float @llvm.fmuladd.f32(float %tmp2, float %tmp4, float %tmp6)
25 %tmp8 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 5
26 %tmp9 = load float, ptr addrspace(3) %tmp8, align 4
27 %tmp10 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 6
28 %tmp11 = load float, ptr addrspace(3) %tmp10, align 4
29 %tmp12 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 7
30 %tmp13 = load float, ptr addrspace(3) %tmp12, align 4
31 %tmp14 = tail call float @llvm.fmuladd.f32(float %tmp9, float %tmp11, float %tmp13)
32 %tmp15 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 9
33 %tmp16 = load float, ptr addrspace(3) %tmp15, align 4
34 %tmp17 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 10
35 %tmp18 = load float, ptr addrspace(3) %tmp17, align 4
36 %tmp19 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 11
37 %tmp20 = load float, ptr addrspace(3) %tmp19, align 4
38 %tmp21 = tail call float @llvm.fmuladd.f32(float %tmp16, float %tmp18, float %tmp20)
39 %tmp22 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 13
40 %tmp23 = load float, ptr addrspace(3) %tmp22, align 4
41 %tmp24 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 14
42 %tmp25 = load float, ptr addrspace(3) %tmp24, align 4
43 %tmp26 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 15
44 %tmp27 = load float, ptr addrspace(3) %tmp26, align 4
45 %tmp28 = tail call float @llvm.fmuladd.f32(float %tmp23, float %tmp25, float %tmp27)
46 %tmp29 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 17
47 %tmp30 = load float, ptr addrspace(3) %tmp29, align 4
48 %tmp31 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 18
49 %tmp32 = load float, ptr addrspace(3) %tmp31, align 4
50 %tmp33 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 19
51 %tmp34 = load float, ptr addrspace(3) %tmp33, align 4
52 %tmp35 = tail call float @llvm.fmuladd.f32(float %tmp30, float %tmp32, float %tmp34)
53 %tmp36 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 21
54 %tmp37 = load float, ptr addrspace(3) %tmp36, align 4
55 %tmp38 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 22
56 %tmp39 = load float, ptr addrspace(3) %tmp38, align 4
57 %tmp40 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 23
58 %tmp41 = load float, ptr addrspace(3) %tmp40, align 4
59 %tmp42 = tail call float @llvm.fmuladd.f32(float %tmp37, float %tmp39, float %tmp41)
60 %tmp43 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 25
61 %tmp44 = load float, ptr addrspace(3) %tmp43, align 4
62 %tmp45 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 26
63 %tmp46 = load float, ptr addrspace(3) %tmp45, align 4
64 %tmp47 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 27
65 %tmp48 = load float, ptr addrspace(3) %tmp47, align 4
66 %tmp49 = tail call float @llvm.fmuladd.f32(float %tmp44, float %tmp46, float %tmp48)
67 %tmp50 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 29
68 %tmp51 = load float, ptr addrspace(3) %tmp50, align 4
69 %tmp52 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 30
70 %tmp53 = load float, ptr addrspace(3) %tmp52, align 4
71 %tmp54 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 31
72 %tmp55 = load float, ptr addrspace(3) %tmp54, align 4
73 %tmp56 = tail call float @llvm.fmuladd.f32(float %tmp51, float %tmp53, float %tmp55)
74 %tmp57 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 33
75 %tmp58 = load float, ptr addrspace(3) %tmp57, align 4
76 %tmp59 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 34
77 %tmp60 = load float, ptr addrspace(3) %tmp59, align 4
78 %tmp61 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 35
79 %tmp62 = load float, ptr addrspace(3) %tmp61, align 4
80 %tmp63 = tail call float @llvm.fmuladd.f32(float %tmp58, float %tmp60, float %tmp62)
81 %tmp64 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 37
82 %tmp65 = load float, ptr addrspace(3) %tmp64, align 4
83 %tmp66 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 38
84 %tmp67 = load float, ptr addrspace(3) %tmp66, align 4
85 %tmp68 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 39
86 %tmp69 = load float, ptr addrspace(3) %tmp68, align 4
87 %tmp70 = tail call float @llvm.fmuladd.f32(float %tmp65, float %tmp67, float %tmp69)
88 %tmp71 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 41
89 %tmp72 = load float, ptr addrspace(3) %tmp71, align 4
90 %tmp73 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 42
91 %tmp74 = load float, ptr addrspace(3) %tmp73, align 4
92 %tmp75 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 43
93 %tmp76 = load float, ptr addrspace(3) %tmp75, align 4
94 %tmp77 = tail call float @llvm.fmuladd.f32(float %tmp72, float %tmp74, float %tmp76)
95 %tmp78 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 45
96 %tmp79 = load float, ptr addrspace(3) %tmp78, align 4
97 %tmp80 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 46
98 %tmp81 = load float, ptr addrspace(3) %tmp80, align 4
99 %tmp82 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 47
100 %tmp83 = load float, ptr addrspace(3) %tmp82, align 4
101 %tmp84 = tail call float @llvm.fmuladd.f32(float %tmp79, float %tmp81, float %tmp83)
102 %tmp85 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 49
103 %tmp86 = load float, ptr addrspace(3) %tmp85, align 4
104 %tmp87 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 50
105 %tmp88 = load float, ptr addrspace(3) %tmp87, align 4
106 %tmp89 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 51
107 %tmp90 = load float, ptr addrspace(3) %tmp89, align 4
108 %tmp91 = tail call float @llvm.fmuladd.f32(float %tmp86, float %tmp88, float %tmp90)
109 %tmp92 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 53
110 %tmp93 = load float, ptr addrspace(3) %tmp92, align 4
111 %tmp94 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 54
112 %tmp95 = load float, ptr addrspace(3) %tmp94, align 4
113 %tmp96 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 55
114 %tmp97 = load float, ptr addrspace(3) %tmp96, align 4
115 %tmp98 = tail call float @llvm.fmuladd.f32(float %tmp93, float %tmp95, float %tmp97)
116 %tmp99 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 57
117 %tmp100 = load float, ptr addrspace(3) %tmp99, align 4
118 %tmp101 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 58
119 %tmp102 = load float, ptr addrspace(3) %tmp101, align 4
120 %tmp103 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 59
121 %tmp104 = load float, ptr addrspace(3) %tmp103, align 4
122 %tmp105 = tail call float @llvm.fmuladd.f32(float %tmp100, float %tmp102, float %tmp104)
123 %tmp106 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 61
124 %tmp107 = load float, ptr addrspace(3) %tmp106, align 4
125 %tmp108 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 62
126 %tmp109 = load float, ptr addrspace(3) %tmp108, align 4
127 %tmp110 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 63
128 %tmp111 = load float, ptr addrspace(3) %tmp110, align 4
129 %tmp112 = tail call float @llvm.fmuladd.f32(float %tmp107, float %tmp109, float %tmp111)
130 %tmp113 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 65
131 %tmp114 = load float, ptr addrspace(3) %tmp113, align 4
132 %tmp115 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 66
133 %tmp116 = load float, ptr addrspace(3) %tmp115, align 4
134 %tmp117 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 67
135 %tmp118 = load float, ptr addrspace(3) %tmp117, align 4
136 %tmp119 = tail call float @llvm.fmuladd.f32(float %tmp114, float %tmp116, float %tmp118)
137 %tmp120 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 69
138 %tmp121 = load float, ptr addrspace(3) %tmp120, align 4
139 %tmp122 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 70
140 %tmp123 = load float, ptr addrspace(3) %tmp122, align 4
141 %tmp124 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 71
142 %tmp125 = load float, ptr addrspace(3) %tmp124, align 4
143 %tmp126 = tail call float @llvm.fmuladd.f32(float %tmp121, float %tmp123, float %tmp125)
144 %tmp127 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 73
145 %tmp128 = load float, ptr addrspace(3) %tmp127, align 4
146 %tmp129 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 74
147 %tmp130 = load float, ptr addrspace(3) %tmp129, align 4
148 %tmp131 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 75
149 %tmp132 = load float, ptr addrspace(3) %tmp131, align 4
150 %tmp133 = tail call float @llvm.fmuladd.f32(float %tmp128, float %tmp130, float %tmp132)
151 %tmp134 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 77
152 %tmp135 = load float, ptr addrspace(3) %tmp134, align 4
153 %tmp136 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 78
154 %tmp137 = load float, ptr addrspace(3) %tmp136, align 4
155 %tmp138 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 79
156 %tmp139 = load float, ptr addrspace(3) %tmp138, align 4
157 %tmp140 = tail call float @llvm.fmuladd.f32(float %tmp135, float %tmp137, float %tmp139)
158 %tmp141 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 81
159 %tmp142 = load float, ptr addrspace(3) %tmp141, align 4
160 %tmp143 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 82
161 %tmp144 = load float, ptr addrspace(3) %tmp143, align 4
162 %tmp145 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 83
163 %tmp146 = load float, ptr addrspace(3) %tmp145, align 4
164 %tmp147 = tail call float @llvm.fmuladd.f32(float %tmp142, float %tmp144, float %tmp146)
165 %tmp148 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 85
166 %tmp149 = load float, ptr addrspace(3) %tmp148, align 4
167 %tmp150 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 86
168 %tmp151 = load float, ptr addrspace(3) %tmp150, align 4
169 %tmp152 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 87
170 %tmp153 = load float, ptr addrspace(3) %tmp152, align 4
171 %tmp154 = tail call float @llvm.fmuladd.f32(float %tmp149, float %tmp151, float %tmp153)
172 %tmp155 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 89
173 %tmp156 = load float, ptr addrspace(3) %tmp155, align 4
174 %tmp157 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 90
175 %tmp158 = load float, ptr addrspace(3) %tmp157, align 4
176 %tmp159 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 91
177 %tmp160 = load float, ptr addrspace(3) %tmp159, align 4
178 %tmp161 = tail call float @llvm.fmuladd.f32(float %tmp156, float %tmp158, float %tmp160)
179 %tmp162 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 93
180 %tmp163 = load float, ptr addrspace(3) %tmp162, align 4
181 %tmp164 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 94
182 %tmp165 = load float, ptr addrspace(3) %tmp164, align 4
183 %tmp166 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 95
184 %tmp167 = load float, ptr addrspace(3) %tmp166, align 4
185 %tmp168 = tail call float @llvm.fmuladd.f32(float %tmp163, float %tmp165, float %tmp167)
186 %tmp169 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 97
187 %tmp170 = load float, ptr addrspace(3) %tmp169, align 4
188 %tmp171 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 98
189 %tmp172 = load float, ptr addrspace(3) %tmp171, align 4
190 %tmp173 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 99
191 %tmp174 = load float, ptr addrspace(3) %tmp173, align 4
192 %tmp175 = tail call float @llvm.fmuladd.f32(float %tmp170, float %tmp172, float %tmp174)
193 %tmp176 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 101
194 %tmp177 = load float, ptr addrspace(3) %tmp176, align 4
195 %tmp178 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 102
196 %tmp179 = load float, ptr addrspace(3) %tmp178, align 4
197 %tmp180 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 103
198 %tmp181 = load float, ptr addrspace(3) %tmp180, align 4
199 %tmp182 = tail call float @llvm.fmuladd.f32(float %tmp177, float %tmp179, float %tmp181)
200 %tmp183 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 105
201 %tmp184 = load float, ptr addrspace(3) %tmp183, align 4
202 %tmp185 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 106
203 %tmp186 = load float, ptr addrspace(3) %tmp185, align 4
204 %tmp187 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 107
205 %tmp188 = load float, ptr addrspace(3) %tmp187, align 4
206 %tmp189 = tail call float @llvm.fmuladd.f32(float %tmp184, float %tmp186, float %tmp188)
207 %tmp190 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 109
208 %tmp191 = load float, ptr addrspace(3) %tmp190, align 4
209 %tmp192 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 110
210 %tmp193 = load float, ptr addrspace(3) %tmp192, align 4
211 %tmp194 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 111
212 %tmp195 = load float, ptr addrspace(3) %tmp194, align 4
213 %tmp196 = tail call float @llvm.fmuladd.f32(float %tmp191, float %tmp193, float %tmp195)
214 %tmp197 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 113
215 %tmp198 = load float, ptr addrspace(3) %tmp197, align 4
216 %tmp199 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 114
217 %tmp200 = load float, ptr addrspace(3) %tmp199, align 4
218 %tmp201 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 115
219 %tmp202 = load float, ptr addrspace(3) %tmp201, align 4
220 %tmp203 = tail call float @llvm.fmuladd.f32(float %tmp198, float %tmp200, float %tmp202)
221 %tmp204 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 117
222 %tmp205 = load float, ptr addrspace(3) %tmp204, align 4
223 %tmp206 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 118
224 %tmp207 = load float, ptr addrspace(3) %tmp206, align 4
225 %tmp208 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 119
226 %tmp209 = load float, ptr addrspace(3) %tmp208, align 4
227 %tmp210 = tail call float @llvm.fmuladd.f32(float %tmp205, float %tmp207, float %tmp209)
228 %tmp211 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 121
229 %tmp212 = load float, ptr addrspace(3) %tmp211, align 4
230 %tmp213 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 122
231 %tmp214 = load float, ptr addrspace(3) %tmp213, align 4
232 %tmp215 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 123
233 %tmp216 = load float, ptr addrspace(3) %tmp215, align 4
234 %tmp217 = tail call float @llvm.fmuladd.f32(float %tmp212, float %tmp214, float %tmp216)
235 %tmp218 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 125
236 %tmp219 = load float, ptr addrspace(3) %tmp218, align 4
237 %tmp220 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 126
238 %tmp221 = load float, ptr addrspace(3) %tmp220, align 4
239 %tmp222 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 127
240 %tmp223 = load float, ptr addrspace(3) %tmp222, align 4
241 %tmp224 = tail call float @llvm.fmuladd.f32(float %tmp219, float %tmp221, float %tmp223)
242 %tmp225 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 129
243 %tmp226 = load float, ptr addrspace(3) %tmp225, align 4
244 %tmp227 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 130
245 %tmp228 = load float, ptr addrspace(3) %tmp227, align 4
246 %tmp229 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 131
247 %tmp230 = load float, ptr addrspace(3) %tmp229, align 4
248 %tmp231 = tail call float @llvm.fmuladd.f32(float %tmp226, float %tmp228, float %tmp230)
249 %tmp232 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 133
250 %tmp233 = load float, ptr addrspace(3) %tmp232, align 4
251 %tmp234 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 134
252 %tmp235 = load float, ptr addrspace(3) %tmp234, align 4
253 %tmp236 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 135
254 %tmp237 = load float, ptr addrspace(3) %tmp236, align 4
255 %tmp238 = tail call float @llvm.fmuladd.f32(float %tmp233, float %tmp235, float %tmp237)
256 %tmp239 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 137
257 %tmp240 = load float, ptr addrspace(3) %tmp239, align 4
258 %tmp241 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 138
259 %tmp242 = load float, ptr addrspace(3) %tmp241, align 4
260 %tmp243 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 139
261 %tmp244 = load float, ptr addrspace(3) %tmp243, align 4
262 %tmp245 = tail call float @llvm.fmuladd.f32(float %tmp240, float %tmp242, float %tmp244)
263 %tmp246 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 141
264 %tmp247 = load float, ptr addrspace(3) %tmp246, align 4
265 %tmp248 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 142
266 %tmp249 = load float, ptr addrspace(3) %tmp248, align 4
267 %tmp250 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 143
268 %tmp251 = load float, ptr addrspace(3) %tmp250, align 4
269 %tmp252 = tail call float @llvm.fmuladd.f32(float %tmp247, float %tmp249, float %tmp251)
270 %tmp253 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 145
271 %tmp254 = load float, ptr addrspace(3) %tmp253, align 4
272 %tmp255 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 146
273 %tmp256 = load float, ptr addrspace(3) %tmp255, align 4
274 %tmp257 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 147
275 %tmp258 = load float, ptr addrspace(3) %tmp257, align 4
276 %tmp259 = tail call float @llvm.fmuladd.f32(float %tmp254, float %tmp256, float %tmp258)
277 %tmp260 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 149
278 %tmp261 = load float, ptr addrspace(3) %tmp260, align 4
279 %tmp262 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 150
280 %tmp263 = load float, ptr addrspace(3) %tmp262, align 4
281 %tmp264 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 151
282 %tmp265 = load float, ptr addrspace(3) %tmp264, align 4
283 %tmp266 = tail call float @llvm.fmuladd.f32(float %tmp261, float %tmp263, float %tmp265)
284 %tmp267 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 153
285 %tmp268 = load float, ptr addrspace(3) %tmp267, align 4
286 %tmp269 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 154
287 %tmp270 = load float, ptr addrspace(3) %tmp269, align 4
288 %tmp271 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 155
289 %tmp272 = load float, ptr addrspace(3) %tmp271, align 4
290 %tmp273 = tail call float @llvm.fmuladd.f32(float %tmp268, float %tmp270, float %tmp272)
291 %tmp274 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 157
292 %tmp275 = load float, ptr addrspace(3) %tmp274, align 4
293 %tmp276 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 158
294 %tmp277 = load float, ptr addrspace(3) %tmp276, align 4
295 %tmp278 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 159
296 %tmp279 = load float, ptr addrspace(3) %tmp278, align 4
297 %tmp280 = tail call float @llvm.fmuladd.f32(float %tmp275, float %tmp277, float %tmp279)
298 %tmp281 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 161
299 %tmp282 = load float, ptr addrspace(3) %tmp281, align 4
300 %tmp283 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 162
301 %tmp284 = load float, ptr addrspace(3) %tmp283, align 4
302 %tmp285 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 163
303 %tmp286 = load float, ptr addrspace(3) %tmp285, align 4
304 %tmp287 = tail call float @llvm.fmuladd.f32(float %tmp282, float %tmp284, float %tmp286)
305 %tmp288 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 165
306 %tmp289 = load float, ptr addrspace(3) %tmp288, align 4
307 %tmp290 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 166
308 %tmp291 = load float, ptr addrspace(3) %tmp290, align 4
309 %tmp292 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 167
310 %tmp293 = load float, ptr addrspace(3) %tmp292, align 4
311 %tmp294 = tail call float @llvm.fmuladd.f32(float %tmp289, float %tmp291, float %tmp293)
312 %tmp295 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 169
313 %tmp296 = load float, ptr addrspace(3) %tmp295, align 4
314 %tmp297 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 170
315 %tmp298 = load float, ptr addrspace(3) %tmp297, align 4
316 %tmp299 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 171
317 %tmp300 = load float, ptr addrspace(3) %tmp299, align 4
318 %tmp301 = tail call float @llvm.fmuladd.f32(float %tmp296, float %tmp298, float %tmp300)
319 %tmp302 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 173
320 %tmp303 = load float, ptr addrspace(3) %tmp302, align 4
321 %tmp304 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 174
322 %tmp305 = load float, ptr addrspace(3) %tmp304, align 4
323 %tmp306 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 175
324 %tmp307 = load float, ptr addrspace(3) %tmp306, align 4
325 %tmp308 = tail call float @llvm.fmuladd.f32(float %tmp303, float %tmp305, float %tmp307)
326 %tmp309 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 177
327 %tmp310 = load float, ptr addrspace(3) %tmp309, align 4
328 %tmp311 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 178
329 %tmp312 = load float, ptr addrspace(3) %tmp311, align 4
330 %tmp313 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 179
331 %tmp314 = load float, ptr addrspace(3) %tmp313, align 4
332 %tmp315 = tail call float @llvm.fmuladd.f32(float %tmp310, float %tmp312, float %tmp314)
333 %tmp316 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 181
334 %tmp317 = load float, ptr addrspace(3) %tmp316, align 4
335 %tmp318 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 182
336 %tmp319 = load float, ptr addrspace(3) %tmp318, align 4
337 %tmp320 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 183
338 %tmp321 = load float, ptr addrspace(3) %tmp320, align 4
339 %tmp322 = tail call float @llvm.fmuladd.f32(float %tmp317, float %tmp319, float %tmp321)
340 %tmp323 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 185
341 %tmp324 = load float, ptr addrspace(3) %tmp323, align 4
342 %tmp325 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 186
343 %tmp326 = load float, ptr addrspace(3) %tmp325, align 4
344 %tmp327 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 187
345 %tmp328 = load float, ptr addrspace(3) %tmp327, align 4
346 %tmp329 = tail call float @llvm.fmuladd.f32(float %tmp324, float %tmp326, float %tmp328)
347 %tmp330 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 189
348 %tmp331 = load float, ptr addrspace(3) %tmp330, align 4
349 %tmp332 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 190
350 %tmp333 = load float, ptr addrspace(3) %tmp332, align 4
351 %tmp334 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 191
352 %tmp335 = load float, ptr addrspace(3) %tmp334, align 4
353 %tmp336 = tail call float @llvm.fmuladd.f32(float %tmp331, float %tmp333, float %tmp335)
354 %tmp337 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 193
355 %tmp338 = load float, ptr addrspace(3) %tmp337, align 4
356 %tmp339 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 194
357 %tmp340 = load float, ptr addrspace(3) %tmp339, align 4
358 %tmp341 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 195
359 %tmp342 = load float, ptr addrspace(3) %tmp341, align 4
360 %tmp343 = tail call float @llvm.fmuladd.f32(float %tmp338, float %tmp340, float %tmp342)
361 %tmp344 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 197
362 %tmp345 = load float, ptr addrspace(3) %tmp344, align 4
363 %tmp346 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 198
364 %tmp347 = load float, ptr addrspace(3) %tmp346, align 4
365 %tmp348 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 199
366 %tmp349 = load float, ptr addrspace(3) %tmp348, align 4
367 %tmp350 = tail call float @llvm.fmuladd.f32(float %tmp345, float %tmp347, float %tmp349)
368 %tmp351 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 201
369 %tmp352 = load float, ptr addrspace(3) %tmp351, align 4
370 %tmp353 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 202
371 %tmp354 = load float, ptr addrspace(3) %tmp353, align 4
372 %tmp355 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 203
373 %tmp356 = load float, ptr addrspace(3) %tmp355, align 4
374 %tmp357 = tail call float @llvm.fmuladd.f32(float %tmp352, float %tmp354, float %tmp356)
375 %tmp358 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 205
376 %tmp359 = load float, ptr addrspace(3) %tmp358, align 4
377 %tmp360 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 206
378 %tmp361 = load float, ptr addrspace(3) %tmp360, align 4
379 %tmp362 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 207
380 %tmp363 = load float, ptr addrspace(3) %tmp362, align 4
381 %tmp364 = tail call float @llvm.fmuladd.f32(float %tmp359, float %tmp361, float %tmp363)
382 %tmp365 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 209
383 %tmp366 = load float, ptr addrspace(3) %tmp365, align 4
384 %tmp367 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 210
385 %tmp368 = load float, ptr addrspace(3) %tmp367, align 4
386 %tmp369 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 211
387 %tmp370 = load float, ptr addrspace(3) %tmp369, align 4
388 %tmp371 = tail call float @llvm.fmuladd.f32(float %tmp366, float %tmp368, float %tmp370)
389 %tmp372 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 213
390 %tmp373 = load float, ptr addrspace(3) %tmp372, align 4
391 %tmp374 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 214
392 %tmp375 = load float, ptr addrspace(3) %tmp374, align 4
393 %tmp376 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 215
394 %tmp377 = load float, ptr addrspace(3) %tmp376, align 4
395 %tmp378 = tail call float @llvm.fmuladd.f32(float %tmp373, float %tmp375, float %tmp377)
396 %tmp379 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 217
397 %tmp380 = load float, ptr addrspace(3) %tmp379, align 4
398 %tmp381 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 218
399 %tmp382 = load float, ptr addrspace(3) %tmp381, align 4
400 %tmp383 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 219
401 %tmp384 = load float, ptr addrspace(3) %tmp383, align 4
402 %tmp385 = tail call float @llvm.fmuladd.f32(float %tmp380, float %tmp382, float %tmp384)
403 %tmp386 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 221
404 %tmp387 = load float, ptr addrspace(3) %tmp386, align 4
405 %tmp388 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 222
406 %tmp389 = load float, ptr addrspace(3) %tmp388, align 4
407 %tmp390 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 223
408 %tmp391 = load float, ptr addrspace(3) %tmp390, align 4
409 %tmp392 = tail call float @llvm.fmuladd.f32(float %tmp387, float %tmp389, float %tmp391)
410 %tmp393 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 225
411 %tmp394 = load float, ptr addrspace(3) %tmp393, align 4
412 %tmp395 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 226
413 %tmp396 = load float, ptr addrspace(3) %tmp395, align 4
414 %tmp397 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 227
415 %tmp398 = load float, ptr addrspace(3) %tmp397, align 4
416 %tmp399 = tail call float @llvm.fmuladd.f32(float %tmp394, float %tmp396, float %tmp398)
417 %tmp400 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 229
418 %tmp401 = load float, ptr addrspace(3) %tmp400, align 4
419 %tmp402 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 230
420 %tmp403 = load float, ptr addrspace(3) %tmp402, align 4
421 %tmp404 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 231
422 %tmp405 = load float, ptr addrspace(3) %tmp404, align 4
423 %tmp406 = tail call float @llvm.fmuladd.f32(float %tmp401, float %tmp403, float %tmp405)
424 %tmp407 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 233
425 %tmp408 = load float, ptr addrspace(3) %tmp407, align 4
426 %tmp409 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 234
427 %tmp410 = load float, ptr addrspace(3) %tmp409, align 4
428 %tmp411 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 235
429 %tmp412 = load float, ptr addrspace(3) %tmp411, align 4
430 %tmp413 = tail call float @llvm.fmuladd.f32(float %tmp408, float %tmp410, float %tmp412)
431 %tmp414 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 237
432 %tmp415 = load float, ptr addrspace(3) %tmp414, align 4
433 %tmp416 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 238
434 %tmp417 = load float, ptr addrspace(3) %tmp416, align 4
435 %tmp418 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 239
436 %tmp419 = load float, ptr addrspace(3) %tmp418, align 4
437 %tmp420 = tail call float @llvm.fmuladd.f32(float %tmp415, float %tmp417, float %tmp419)
438 %tmp421 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 241
439 %tmp422 = load float, ptr addrspace(3) %tmp421, align 4
440 %tmp423 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 242
441 %tmp424 = load float, ptr addrspace(3) %tmp423, align 4
442 %tmp425 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 243
443 %tmp426 = load float, ptr addrspace(3) %tmp425, align 4
444 %tmp427 = tail call float @llvm.fmuladd.f32(float %tmp422, float %tmp424, float %tmp426)
445 %tmp428 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 245
446 %tmp429 = load float, ptr addrspace(3) %tmp428, align 4
447 %tmp430 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 246
448 %tmp431 = load float, ptr addrspace(3) %tmp430, align 4
449 %tmp432 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 247
450 %tmp433 = load float, ptr addrspace(3) %tmp432, align 4
451 %tmp434 = tail call float @llvm.fmuladd.f32(float %tmp429, float %tmp431, float %tmp433)
452 %tmp435 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 249
453 %tmp436 = load float, ptr addrspace(3) %tmp435, align 4
454 %tmp437 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 250
455 %tmp438 = load float, ptr addrspace(3) %tmp437, align 4
456 %tmp439 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 251
457 %tmp440 = load float, ptr addrspace(3) %tmp439, align 4
458 %tmp441 = tail call float @llvm.fmuladd.f32(float %tmp436, float %tmp438, float %tmp440)
459 %tmp442 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 253
460 %tmp443 = load float, ptr addrspace(3) %tmp442, align 4
461 %tmp444 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 254
462 %tmp445 = load float, ptr addrspace(3) %tmp444, align 4
463 %tmp446 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 255
464 %tmp447 = load float, ptr addrspace(3) %tmp446, align 4
465 %tmp448 = tail call float @llvm.fmuladd.f32(float %tmp443, float %tmp445, float %tmp447)
466 store float %tmp7, ptr addrspace(1) %arg1, align 4
467 %tmp449 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 1
468 store float %tmp14, ptr addrspace(1) %tmp449, align 4
469 %tmp450 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 2
470 store float %tmp21, ptr addrspace(1) %tmp450, align 4
471 %tmp451 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 3
472 store float %tmp28, ptr addrspace(1) %tmp451, align 4
473 %tmp452 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 4
474 store float %tmp35, ptr addrspace(1) %tmp452, align 4
475 %tmp453 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 5
476 store float %tmp42, ptr addrspace(1) %tmp453, align 4
477 %tmp454 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 6
478 store float %tmp49, ptr addrspace(1) %tmp454, align 4
479 %tmp455 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 7
480 store float %tmp56, ptr addrspace(1) %tmp455, align 4
481 %tmp456 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 8
482 store float %tmp63, ptr addrspace(1) %tmp456, align 4
483 %tmp457 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 9
484 store float %tmp70, ptr addrspace(1) %tmp457, align 4
485 %tmp458 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 10
486 store float %tmp77, ptr addrspace(1) %tmp458, align 4
487 %tmp459 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 11
488 store float %tmp84, ptr addrspace(1) %tmp459, align 4
489 %tmp460 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 12
490 store float %tmp91, ptr addrspace(1) %tmp460, align 4
491 %tmp461 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 13
492 store float %tmp98, ptr addrspace(1) %tmp461, align 4
493 %tmp462 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 14
494 store float %tmp105, ptr addrspace(1) %tmp462, align 4
495 %tmp463 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 15
496 store float %tmp112, ptr addrspace(1) %tmp463, align 4
497 %tmp464 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 16
498 store float %tmp119, ptr addrspace(1) %tmp464, align 4
499 %tmp465 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 17
500 store float %tmp126, ptr addrspace(1) %tmp465, align 4
501 %tmp466 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 18
502 store float %tmp133, ptr addrspace(1) %tmp466, align 4
503 %tmp467 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 19
504 store float %tmp140, ptr addrspace(1) %tmp467, align 4
505 %tmp468 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 20
506 store float %tmp147, ptr addrspace(1) %tmp468, align 4
507 %tmp469 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 21
508 store float %tmp154, ptr addrspace(1) %tmp469, align 4
509 %tmp470 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 22
510 store float %tmp161, ptr addrspace(1) %tmp470, align 4
511 %tmp471 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 23
512 store float %tmp168, ptr addrspace(1) %tmp471, align 4
513 %tmp472 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 24
514 store float %tmp175, ptr addrspace(1) %tmp472, align 4
515 %tmp473 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 25
516 store float %tmp182, ptr addrspace(1) %tmp473, align 4
517 %tmp474 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 26
518 store float %tmp189, ptr addrspace(1) %tmp474, align 4
519 %tmp475 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 27
520 store float %tmp196, ptr addrspace(1) %tmp475, align 4
521 %tmp476 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 28
522 store float %tmp203, ptr addrspace(1) %tmp476, align 4
523 %tmp477 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 29
524 store float %tmp210, ptr addrspace(1) %tmp477, align 4
525 %tmp478 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 30
526 store float %tmp217, ptr addrspace(1) %tmp478, align 4
527 %tmp479 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 31
528 store float %tmp224, ptr addrspace(1) %tmp479, align 4
529 %tmp480 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 32
530 store float %tmp231, ptr addrspace(1) %tmp480, align 4
531 %tmp481 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 33
532 store float %tmp238, ptr addrspace(1) %tmp481, align 4
533 %tmp482 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 34
534 store float %tmp245, ptr addrspace(1) %tmp482, align 4
535 %tmp483 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 35
536 store float %tmp252, ptr addrspace(1) %tmp483, align 4
537 %tmp484 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 36
538 store float %tmp259, ptr addrspace(1) %tmp484, align 4
539 %tmp485 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 37
540 store float %tmp266, ptr addrspace(1) %tmp485, align 4
541 %tmp486 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 38
542 store float %tmp273, ptr addrspace(1) %tmp486, align 4
543 %tmp487 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 39
544 store float %tmp280, ptr addrspace(1) %tmp487, align 4
545 %tmp488 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 40
546 store float %tmp287, ptr addrspace(1) %tmp488, align 4
547 %tmp489 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 41
548 store float %tmp294, ptr addrspace(1) %tmp489, align 4
549 %tmp490 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 42
550 store float %tmp301, ptr addrspace(1) %tmp490, align 4
551 %tmp491 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 43
552 store float %tmp308, ptr addrspace(1) %tmp491, align 4
553 %tmp492 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 44
554 store float %tmp315, ptr addrspace(1) %tmp492, align 4
555 %tmp493 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 45
556 store float %tmp322, ptr addrspace(1) %tmp493, align 4
557 %tmp494 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 46
558 store float %tmp329, ptr addrspace(1) %tmp494, align 4
559 %tmp495 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 47
560 store float %tmp336, ptr addrspace(1) %tmp495, align 4
561 %tmp496 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 48
562 store float %tmp343, ptr addrspace(1) %tmp496, align 4
563 %tmp497 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 49
564 store float %tmp350, ptr addrspace(1) %tmp497, align 4
565 %tmp498 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 50
566 store float %tmp357, ptr addrspace(1) %tmp498, align 4
567 %tmp499 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 51
568 store float %tmp364, ptr addrspace(1) %tmp499, align 4
569 %tmp500 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 52
570 store float %tmp371, ptr addrspace(1) %tmp500, align 4
571 %tmp501 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 53
572 store float %tmp378, ptr addrspace(1) %tmp501, align 4
573 %tmp502 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 54
574 store float %tmp385, ptr addrspace(1) %tmp502, align 4
575 %tmp503 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 55
576 store float %tmp392, ptr addrspace(1) %tmp503, align 4
577 %tmp504 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 56
578 store float %tmp399, ptr addrspace(1) %tmp504, align 4
579 %tmp505 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 57
580 store float %tmp406, ptr addrspace(1) %tmp505, align 4
581 %tmp506 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 58
582 store float %tmp413, ptr addrspace(1) %tmp506, align 4
583 %tmp507 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 59
584 store float %tmp420, ptr addrspace(1) %tmp507, align 4
585 %tmp508 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 60
586 store float %tmp427, ptr addrspace(1) %tmp508, align 4
587 %tmp509 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 61
588 store float %tmp434, ptr addrspace(1) %tmp509, align 4
589 %tmp510 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 62
590 store float %tmp441, ptr addrspace(1) %tmp510, align 4
591 %tmp511 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 63
592 store float %tmp448, ptr addrspace(1) %tmp511, align 4
596 ; Function Attrs: nounwind readnone
597 declare float @llvm.fmuladd.f32(float, float, float) #0
599 attributes #0 = { nounwind readnone }
600 attributes #1 = { "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" }