x86/xen: resume timer irqs early
[linux/fpc-iii.git] / drivers / gpu / drm / radeon / cik.c
blob85ef9ff42aa6db6ff122c6990b0c56e2bcdb5f2b
1 /*
2 * Copyright 2012 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Alex Deucher
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
45 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
46 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
47 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
48 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
49 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
50 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
51 MODULE_FIRMWARE("radeon/KABINI_me.bin");
52 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
53 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
54 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
55 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
57 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
58 extern void r600_ih_ring_fini(struct radeon_device *rdev);
59 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
60 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
61 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
62 extern void sumo_rlc_fini(struct radeon_device *rdev);
63 extern int sumo_rlc_init(struct radeon_device *rdev);
64 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
65 extern void si_rlc_reset(struct radeon_device *rdev);
66 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
67 extern int cik_sdma_resume(struct radeon_device *rdev);
68 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
69 extern void cik_sdma_fini(struct radeon_device *rdev);
70 extern void cik_sdma_vm_set_page(struct radeon_device *rdev,
71 struct radeon_ib *ib,
72 uint64_t pe,
73 uint64_t addr, unsigned count,
74 uint32_t incr, uint32_t flags);
75 static void cik_rlc_stop(struct radeon_device *rdev);
76 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
77 static void cik_program_aspm(struct radeon_device *rdev);
78 static void cik_init_pg(struct radeon_device *rdev);
79 static void cik_init_cg(struct radeon_device *rdev);
80 static void cik_fini_pg(struct radeon_device *rdev);
81 static void cik_fini_cg(struct radeon_device *rdev);
82 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
83 bool enable);
85 /* get temperature in millidegrees */
86 int ci_get_temp(struct radeon_device *rdev)
88 u32 temp;
89 int actual_temp = 0;
91 temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
92 CTF_TEMP_SHIFT;
94 if (temp & 0x200)
95 actual_temp = 255;
96 else
97 actual_temp = temp & 0x1ff;
99 actual_temp = actual_temp * 1000;
101 return actual_temp;
104 /* get temperature in millidegrees */
105 int kv_get_temp(struct radeon_device *rdev)
107 u32 temp;
108 int actual_temp = 0;
110 temp = RREG32_SMC(0xC0300E0C);
112 if (temp)
113 actual_temp = (temp / 8) - 49;
114 else
115 actual_temp = 0;
117 actual_temp = actual_temp * 1000;
119 return actual_temp;
123 * Indirect registers accessor
125 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
127 unsigned long flags;
128 u32 r;
130 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
131 WREG32(PCIE_INDEX, reg);
132 (void)RREG32(PCIE_INDEX);
133 r = RREG32(PCIE_DATA);
134 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
135 return r;
138 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
140 unsigned long flags;
142 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
143 WREG32(PCIE_INDEX, reg);
144 (void)RREG32(PCIE_INDEX);
145 WREG32(PCIE_DATA, v);
146 (void)RREG32(PCIE_DATA);
147 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
150 static const u32 spectre_rlc_save_restore_register_list[] =
152 (0x0e00 << 16) | (0xc12c >> 2),
153 0x00000000,
154 (0x0e00 << 16) | (0xc140 >> 2),
155 0x00000000,
156 (0x0e00 << 16) | (0xc150 >> 2),
157 0x00000000,
158 (0x0e00 << 16) | (0xc15c >> 2),
159 0x00000000,
160 (0x0e00 << 16) | (0xc168 >> 2),
161 0x00000000,
162 (0x0e00 << 16) | (0xc170 >> 2),
163 0x00000000,
164 (0x0e00 << 16) | (0xc178 >> 2),
165 0x00000000,
166 (0x0e00 << 16) | (0xc204 >> 2),
167 0x00000000,
168 (0x0e00 << 16) | (0xc2b4 >> 2),
169 0x00000000,
170 (0x0e00 << 16) | (0xc2b8 >> 2),
171 0x00000000,
172 (0x0e00 << 16) | (0xc2bc >> 2),
173 0x00000000,
174 (0x0e00 << 16) | (0xc2c0 >> 2),
175 0x00000000,
176 (0x0e00 << 16) | (0x8228 >> 2),
177 0x00000000,
178 (0x0e00 << 16) | (0x829c >> 2),
179 0x00000000,
180 (0x0e00 << 16) | (0x869c >> 2),
181 0x00000000,
182 (0x0600 << 16) | (0x98f4 >> 2),
183 0x00000000,
184 (0x0e00 << 16) | (0x98f8 >> 2),
185 0x00000000,
186 (0x0e00 << 16) | (0x9900 >> 2),
187 0x00000000,
188 (0x0e00 << 16) | (0xc260 >> 2),
189 0x00000000,
190 (0x0e00 << 16) | (0x90e8 >> 2),
191 0x00000000,
192 (0x0e00 << 16) | (0x3c000 >> 2),
193 0x00000000,
194 (0x0e00 << 16) | (0x3c00c >> 2),
195 0x00000000,
196 (0x0e00 << 16) | (0x8c1c >> 2),
197 0x00000000,
198 (0x0e00 << 16) | (0x9700 >> 2),
199 0x00000000,
200 (0x0e00 << 16) | (0xcd20 >> 2),
201 0x00000000,
202 (0x4e00 << 16) | (0xcd20 >> 2),
203 0x00000000,
204 (0x5e00 << 16) | (0xcd20 >> 2),
205 0x00000000,
206 (0x6e00 << 16) | (0xcd20 >> 2),
207 0x00000000,
208 (0x7e00 << 16) | (0xcd20 >> 2),
209 0x00000000,
210 (0x8e00 << 16) | (0xcd20 >> 2),
211 0x00000000,
212 (0x9e00 << 16) | (0xcd20 >> 2),
213 0x00000000,
214 (0xae00 << 16) | (0xcd20 >> 2),
215 0x00000000,
216 (0xbe00 << 16) | (0xcd20 >> 2),
217 0x00000000,
218 (0x0e00 << 16) | (0x89bc >> 2),
219 0x00000000,
220 (0x0e00 << 16) | (0x8900 >> 2),
221 0x00000000,
222 0x3,
223 (0x0e00 << 16) | (0xc130 >> 2),
224 0x00000000,
225 (0x0e00 << 16) | (0xc134 >> 2),
226 0x00000000,
227 (0x0e00 << 16) | (0xc1fc >> 2),
228 0x00000000,
229 (0x0e00 << 16) | (0xc208 >> 2),
230 0x00000000,
231 (0x0e00 << 16) | (0xc264 >> 2),
232 0x00000000,
233 (0x0e00 << 16) | (0xc268 >> 2),
234 0x00000000,
235 (0x0e00 << 16) | (0xc26c >> 2),
236 0x00000000,
237 (0x0e00 << 16) | (0xc270 >> 2),
238 0x00000000,
239 (0x0e00 << 16) | (0xc274 >> 2),
240 0x00000000,
241 (0x0e00 << 16) | (0xc278 >> 2),
242 0x00000000,
243 (0x0e00 << 16) | (0xc27c >> 2),
244 0x00000000,
245 (0x0e00 << 16) | (0xc280 >> 2),
246 0x00000000,
247 (0x0e00 << 16) | (0xc284 >> 2),
248 0x00000000,
249 (0x0e00 << 16) | (0xc288 >> 2),
250 0x00000000,
251 (0x0e00 << 16) | (0xc28c >> 2),
252 0x00000000,
253 (0x0e00 << 16) | (0xc290 >> 2),
254 0x00000000,
255 (0x0e00 << 16) | (0xc294 >> 2),
256 0x00000000,
257 (0x0e00 << 16) | (0xc298 >> 2),
258 0x00000000,
259 (0x0e00 << 16) | (0xc29c >> 2),
260 0x00000000,
261 (0x0e00 << 16) | (0xc2a0 >> 2),
262 0x00000000,
263 (0x0e00 << 16) | (0xc2a4 >> 2),
264 0x00000000,
265 (0x0e00 << 16) | (0xc2a8 >> 2),
266 0x00000000,
267 (0x0e00 << 16) | (0xc2ac >> 2),
268 0x00000000,
269 (0x0e00 << 16) | (0xc2b0 >> 2),
270 0x00000000,
271 (0x0e00 << 16) | (0x301d0 >> 2),
272 0x00000000,
273 (0x0e00 << 16) | (0x30238 >> 2),
274 0x00000000,
275 (0x0e00 << 16) | (0x30250 >> 2),
276 0x00000000,
277 (0x0e00 << 16) | (0x30254 >> 2),
278 0x00000000,
279 (0x0e00 << 16) | (0x30258 >> 2),
280 0x00000000,
281 (0x0e00 << 16) | (0x3025c >> 2),
282 0x00000000,
283 (0x4e00 << 16) | (0xc900 >> 2),
284 0x00000000,
285 (0x5e00 << 16) | (0xc900 >> 2),
286 0x00000000,
287 (0x6e00 << 16) | (0xc900 >> 2),
288 0x00000000,
289 (0x7e00 << 16) | (0xc900 >> 2),
290 0x00000000,
291 (0x8e00 << 16) | (0xc900 >> 2),
292 0x00000000,
293 (0x9e00 << 16) | (0xc900 >> 2),
294 0x00000000,
295 (0xae00 << 16) | (0xc900 >> 2),
296 0x00000000,
297 (0xbe00 << 16) | (0xc900 >> 2),
298 0x00000000,
299 (0x4e00 << 16) | (0xc904 >> 2),
300 0x00000000,
301 (0x5e00 << 16) | (0xc904 >> 2),
302 0x00000000,
303 (0x6e00 << 16) | (0xc904 >> 2),
304 0x00000000,
305 (0x7e00 << 16) | (0xc904 >> 2),
306 0x00000000,
307 (0x8e00 << 16) | (0xc904 >> 2),
308 0x00000000,
309 (0x9e00 << 16) | (0xc904 >> 2),
310 0x00000000,
311 (0xae00 << 16) | (0xc904 >> 2),
312 0x00000000,
313 (0xbe00 << 16) | (0xc904 >> 2),
314 0x00000000,
315 (0x4e00 << 16) | (0xc908 >> 2),
316 0x00000000,
317 (0x5e00 << 16) | (0xc908 >> 2),
318 0x00000000,
319 (0x6e00 << 16) | (0xc908 >> 2),
320 0x00000000,
321 (0x7e00 << 16) | (0xc908 >> 2),
322 0x00000000,
323 (0x8e00 << 16) | (0xc908 >> 2),
324 0x00000000,
325 (0x9e00 << 16) | (0xc908 >> 2),
326 0x00000000,
327 (0xae00 << 16) | (0xc908 >> 2),
328 0x00000000,
329 (0xbe00 << 16) | (0xc908 >> 2),
330 0x00000000,
331 (0x4e00 << 16) | (0xc90c >> 2),
332 0x00000000,
333 (0x5e00 << 16) | (0xc90c >> 2),
334 0x00000000,
335 (0x6e00 << 16) | (0xc90c >> 2),
336 0x00000000,
337 (0x7e00 << 16) | (0xc90c >> 2),
338 0x00000000,
339 (0x8e00 << 16) | (0xc90c >> 2),
340 0x00000000,
341 (0x9e00 << 16) | (0xc90c >> 2),
342 0x00000000,
343 (0xae00 << 16) | (0xc90c >> 2),
344 0x00000000,
345 (0xbe00 << 16) | (0xc90c >> 2),
346 0x00000000,
347 (0x4e00 << 16) | (0xc910 >> 2),
348 0x00000000,
349 (0x5e00 << 16) | (0xc910 >> 2),
350 0x00000000,
351 (0x6e00 << 16) | (0xc910 >> 2),
352 0x00000000,
353 (0x7e00 << 16) | (0xc910 >> 2),
354 0x00000000,
355 (0x8e00 << 16) | (0xc910 >> 2),
356 0x00000000,
357 (0x9e00 << 16) | (0xc910 >> 2),
358 0x00000000,
359 (0xae00 << 16) | (0xc910 >> 2),
360 0x00000000,
361 (0xbe00 << 16) | (0xc910 >> 2),
362 0x00000000,
363 (0x0e00 << 16) | (0xc99c >> 2),
364 0x00000000,
365 (0x0e00 << 16) | (0x9834 >> 2),
366 0x00000000,
367 (0x0000 << 16) | (0x30f00 >> 2),
368 0x00000000,
369 (0x0001 << 16) | (0x30f00 >> 2),
370 0x00000000,
371 (0x0000 << 16) | (0x30f04 >> 2),
372 0x00000000,
373 (0x0001 << 16) | (0x30f04 >> 2),
374 0x00000000,
375 (0x0000 << 16) | (0x30f08 >> 2),
376 0x00000000,
377 (0x0001 << 16) | (0x30f08 >> 2),
378 0x00000000,
379 (0x0000 << 16) | (0x30f0c >> 2),
380 0x00000000,
381 (0x0001 << 16) | (0x30f0c >> 2),
382 0x00000000,
383 (0x0600 << 16) | (0x9b7c >> 2),
384 0x00000000,
385 (0x0e00 << 16) | (0x8a14 >> 2),
386 0x00000000,
387 (0x0e00 << 16) | (0x8a18 >> 2),
388 0x00000000,
389 (0x0600 << 16) | (0x30a00 >> 2),
390 0x00000000,
391 (0x0e00 << 16) | (0x8bf0 >> 2),
392 0x00000000,
393 (0x0e00 << 16) | (0x8bcc >> 2),
394 0x00000000,
395 (0x0e00 << 16) | (0x8b24 >> 2),
396 0x00000000,
397 (0x0e00 << 16) | (0x30a04 >> 2),
398 0x00000000,
399 (0x0600 << 16) | (0x30a10 >> 2),
400 0x00000000,
401 (0x0600 << 16) | (0x30a14 >> 2),
402 0x00000000,
403 (0x0600 << 16) | (0x30a18 >> 2),
404 0x00000000,
405 (0x0600 << 16) | (0x30a2c >> 2),
406 0x00000000,
407 (0x0e00 << 16) | (0xc700 >> 2),
408 0x00000000,
409 (0x0e00 << 16) | (0xc704 >> 2),
410 0x00000000,
411 (0x0e00 << 16) | (0xc708 >> 2),
412 0x00000000,
413 (0x0e00 << 16) | (0xc768 >> 2),
414 0x00000000,
415 (0x0400 << 16) | (0xc770 >> 2),
416 0x00000000,
417 (0x0400 << 16) | (0xc774 >> 2),
418 0x00000000,
419 (0x0400 << 16) | (0xc778 >> 2),
420 0x00000000,
421 (0x0400 << 16) | (0xc77c >> 2),
422 0x00000000,
423 (0x0400 << 16) | (0xc780 >> 2),
424 0x00000000,
425 (0x0400 << 16) | (0xc784 >> 2),
426 0x00000000,
427 (0x0400 << 16) | (0xc788 >> 2),
428 0x00000000,
429 (0x0400 << 16) | (0xc78c >> 2),
430 0x00000000,
431 (0x0400 << 16) | (0xc798 >> 2),
432 0x00000000,
433 (0x0400 << 16) | (0xc79c >> 2),
434 0x00000000,
435 (0x0400 << 16) | (0xc7a0 >> 2),
436 0x00000000,
437 (0x0400 << 16) | (0xc7a4 >> 2),
438 0x00000000,
439 (0x0400 << 16) | (0xc7a8 >> 2),
440 0x00000000,
441 (0x0400 << 16) | (0xc7ac >> 2),
442 0x00000000,
443 (0x0400 << 16) | (0xc7b0 >> 2),
444 0x00000000,
445 (0x0400 << 16) | (0xc7b4 >> 2),
446 0x00000000,
447 (0x0e00 << 16) | (0x9100 >> 2),
448 0x00000000,
449 (0x0e00 << 16) | (0x3c010 >> 2),
450 0x00000000,
451 (0x0e00 << 16) | (0x92a8 >> 2),
452 0x00000000,
453 (0x0e00 << 16) | (0x92ac >> 2),
454 0x00000000,
455 (0x0e00 << 16) | (0x92b4 >> 2),
456 0x00000000,
457 (0x0e00 << 16) | (0x92b8 >> 2),
458 0x00000000,
459 (0x0e00 << 16) | (0x92bc >> 2),
460 0x00000000,
461 (0x0e00 << 16) | (0x92c0 >> 2),
462 0x00000000,
463 (0x0e00 << 16) | (0x92c4 >> 2),
464 0x00000000,
465 (0x0e00 << 16) | (0x92c8 >> 2),
466 0x00000000,
467 (0x0e00 << 16) | (0x92cc >> 2),
468 0x00000000,
469 (0x0e00 << 16) | (0x92d0 >> 2),
470 0x00000000,
471 (0x0e00 << 16) | (0x8c00 >> 2),
472 0x00000000,
473 (0x0e00 << 16) | (0x8c04 >> 2),
474 0x00000000,
475 (0x0e00 << 16) | (0x8c20 >> 2),
476 0x00000000,
477 (0x0e00 << 16) | (0x8c38 >> 2),
478 0x00000000,
479 (0x0e00 << 16) | (0x8c3c >> 2),
480 0x00000000,
481 (0x0e00 << 16) | (0xae00 >> 2),
482 0x00000000,
483 (0x0e00 << 16) | (0x9604 >> 2),
484 0x00000000,
485 (0x0e00 << 16) | (0xac08 >> 2),
486 0x00000000,
487 (0x0e00 << 16) | (0xac0c >> 2),
488 0x00000000,
489 (0x0e00 << 16) | (0xac10 >> 2),
490 0x00000000,
491 (0x0e00 << 16) | (0xac14 >> 2),
492 0x00000000,
493 (0x0e00 << 16) | (0xac58 >> 2),
494 0x00000000,
495 (0x0e00 << 16) | (0xac68 >> 2),
496 0x00000000,
497 (0x0e00 << 16) | (0xac6c >> 2),
498 0x00000000,
499 (0x0e00 << 16) | (0xac70 >> 2),
500 0x00000000,
501 (0x0e00 << 16) | (0xac74 >> 2),
502 0x00000000,
503 (0x0e00 << 16) | (0xac78 >> 2),
504 0x00000000,
505 (0x0e00 << 16) | (0xac7c >> 2),
506 0x00000000,
507 (0x0e00 << 16) | (0xac80 >> 2),
508 0x00000000,
509 (0x0e00 << 16) | (0xac84 >> 2),
510 0x00000000,
511 (0x0e00 << 16) | (0xac88 >> 2),
512 0x00000000,
513 (0x0e00 << 16) | (0xac8c >> 2),
514 0x00000000,
515 (0x0e00 << 16) | (0x970c >> 2),
516 0x00000000,
517 (0x0e00 << 16) | (0x9714 >> 2),
518 0x00000000,
519 (0x0e00 << 16) | (0x9718 >> 2),
520 0x00000000,
521 (0x0e00 << 16) | (0x971c >> 2),
522 0x00000000,
523 (0x0e00 << 16) | (0x31068 >> 2),
524 0x00000000,
525 (0x4e00 << 16) | (0x31068 >> 2),
526 0x00000000,
527 (0x5e00 << 16) | (0x31068 >> 2),
528 0x00000000,
529 (0x6e00 << 16) | (0x31068 >> 2),
530 0x00000000,
531 (0x7e00 << 16) | (0x31068 >> 2),
532 0x00000000,
533 (0x8e00 << 16) | (0x31068 >> 2),
534 0x00000000,
535 (0x9e00 << 16) | (0x31068 >> 2),
536 0x00000000,
537 (0xae00 << 16) | (0x31068 >> 2),
538 0x00000000,
539 (0xbe00 << 16) | (0x31068 >> 2),
540 0x00000000,
541 (0x0e00 << 16) | (0xcd10 >> 2),
542 0x00000000,
543 (0x0e00 << 16) | (0xcd14 >> 2),
544 0x00000000,
545 (0x0e00 << 16) | (0x88b0 >> 2),
546 0x00000000,
547 (0x0e00 << 16) | (0x88b4 >> 2),
548 0x00000000,
549 (0x0e00 << 16) | (0x88b8 >> 2),
550 0x00000000,
551 (0x0e00 << 16) | (0x88bc >> 2),
552 0x00000000,
553 (0x0400 << 16) | (0x89c0 >> 2),
554 0x00000000,
555 (0x0e00 << 16) | (0x88c4 >> 2),
556 0x00000000,
557 (0x0e00 << 16) | (0x88c8 >> 2),
558 0x00000000,
559 (0x0e00 << 16) | (0x88d0 >> 2),
560 0x00000000,
561 (0x0e00 << 16) | (0x88d4 >> 2),
562 0x00000000,
563 (0x0e00 << 16) | (0x88d8 >> 2),
564 0x00000000,
565 (0x0e00 << 16) | (0x8980 >> 2),
566 0x00000000,
567 (0x0e00 << 16) | (0x30938 >> 2),
568 0x00000000,
569 (0x0e00 << 16) | (0x3093c >> 2),
570 0x00000000,
571 (0x0e00 << 16) | (0x30940 >> 2),
572 0x00000000,
573 (0x0e00 << 16) | (0x89a0 >> 2),
574 0x00000000,
575 (0x0e00 << 16) | (0x30900 >> 2),
576 0x00000000,
577 (0x0e00 << 16) | (0x30904 >> 2),
578 0x00000000,
579 (0x0e00 << 16) | (0x89b4 >> 2),
580 0x00000000,
581 (0x0e00 << 16) | (0x3c210 >> 2),
582 0x00000000,
583 (0x0e00 << 16) | (0x3c214 >> 2),
584 0x00000000,
585 (0x0e00 << 16) | (0x3c218 >> 2),
586 0x00000000,
587 (0x0e00 << 16) | (0x8904 >> 2),
588 0x00000000,
589 0x5,
590 (0x0e00 << 16) | (0x8c28 >> 2),
591 (0x0e00 << 16) | (0x8c2c >> 2),
592 (0x0e00 << 16) | (0x8c30 >> 2),
593 (0x0e00 << 16) | (0x8c34 >> 2),
594 (0x0e00 << 16) | (0x9600 >> 2),
597 static const u32 kalindi_rlc_save_restore_register_list[] =
599 (0x0e00 << 16) | (0xc12c >> 2),
600 0x00000000,
601 (0x0e00 << 16) | (0xc140 >> 2),
602 0x00000000,
603 (0x0e00 << 16) | (0xc150 >> 2),
604 0x00000000,
605 (0x0e00 << 16) | (0xc15c >> 2),
606 0x00000000,
607 (0x0e00 << 16) | (0xc168 >> 2),
608 0x00000000,
609 (0x0e00 << 16) | (0xc170 >> 2),
610 0x00000000,
611 (0x0e00 << 16) | (0xc204 >> 2),
612 0x00000000,
613 (0x0e00 << 16) | (0xc2b4 >> 2),
614 0x00000000,
615 (0x0e00 << 16) | (0xc2b8 >> 2),
616 0x00000000,
617 (0x0e00 << 16) | (0xc2bc >> 2),
618 0x00000000,
619 (0x0e00 << 16) | (0xc2c0 >> 2),
620 0x00000000,
621 (0x0e00 << 16) | (0x8228 >> 2),
622 0x00000000,
623 (0x0e00 << 16) | (0x829c >> 2),
624 0x00000000,
625 (0x0e00 << 16) | (0x869c >> 2),
626 0x00000000,
627 (0x0600 << 16) | (0x98f4 >> 2),
628 0x00000000,
629 (0x0e00 << 16) | (0x98f8 >> 2),
630 0x00000000,
631 (0x0e00 << 16) | (0x9900 >> 2),
632 0x00000000,
633 (0x0e00 << 16) | (0xc260 >> 2),
634 0x00000000,
635 (0x0e00 << 16) | (0x90e8 >> 2),
636 0x00000000,
637 (0x0e00 << 16) | (0x3c000 >> 2),
638 0x00000000,
639 (0x0e00 << 16) | (0x3c00c >> 2),
640 0x00000000,
641 (0x0e00 << 16) | (0x8c1c >> 2),
642 0x00000000,
643 (0x0e00 << 16) | (0x9700 >> 2),
644 0x00000000,
645 (0x0e00 << 16) | (0xcd20 >> 2),
646 0x00000000,
647 (0x4e00 << 16) | (0xcd20 >> 2),
648 0x00000000,
649 (0x5e00 << 16) | (0xcd20 >> 2),
650 0x00000000,
651 (0x6e00 << 16) | (0xcd20 >> 2),
652 0x00000000,
653 (0x7e00 << 16) | (0xcd20 >> 2),
654 0x00000000,
655 (0x0e00 << 16) | (0x89bc >> 2),
656 0x00000000,
657 (0x0e00 << 16) | (0x8900 >> 2),
658 0x00000000,
659 0x3,
660 (0x0e00 << 16) | (0xc130 >> 2),
661 0x00000000,
662 (0x0e00 << 16) | (0xc134 >> 2),
663 0x00000000,
664 (0x0e00 << 16) | (0xc1fc >> 2),
665 0x00000000,
666 (0x0e00 << 16) | (0xc208 >> 2),
667 0x00000000,
668 (0x0e00 << 16) | (0xc264 >> 2),
669 0x00000000,
670 (0x0e00 << 16) | (0xc268 >> 2),
671 0x00000000,
672 (0x0e00 << 16) | (0xc26c >> 2),
673 0x00000000,
674 (0x0e00 << 16) | (0xc270 >> 2),
675 0x00000000,
676 (0x0e00 << 16) | (0xc274 >> 2),
677 0x00000000,
678 (0x0e00 << 16) | (0xc28c >> 2),
679 0x00000000,
680 (0x0e00 << 16) | (0xc290 >> 2),
681 0x00000000,
682 (0x0e00 << 16) | (0xc294 >> 2),
683 0x00000000,
684 (0x0e00 << 16) | (0xc298 >> 2),
685 0x00000000,
686 (0x0e00 << 16) | (0xc2a0 >> 2),
687 0x00000000,
688 (0x0e00 << 16) | (0xc2a4 >> 2),
689 0x00000000,
690 (0x0e00 << 16) | (0xc2a8 >> 2),
691 0x00000000,
692 (0x0e00 << 16) | (0xc2ac >> 2),
693 0x00000000,
694 (0x0e00 << 16) | (0x301d0 >> 2),
695 0x00000000,
696 (0x0e00 << 16) | (0x30238 >> 2),
697 0x00000000,
698 (0x0e00 << 16) | (0x30250 >> 2),
699 0x00000000,
700 (0x0e00 << 16) | (0x30254 >> 2),
701 0x00000000,
702 (0x0e00 << 16) | (0x30258 >> 2),
703 0x00000000,
704 (0x0e00 << 16) | (0x3025c >> 2),
705 0x00000000,
706 (0x4e00 << 16) | (0xc900 >> 2),
707 0x00000000,
708 (0x5e00 << 16) | (0xc900 >> 2),
709 0x00000000,
710 (0x6e00 << 16) | (0xc900 >> 2),
711 0x00000000,
712 (0x7e00 << 16) | (0xc900 >> 2),
713 0x00000000,
714 (0x4e00 << 16) | (0xc904 >> 2),
715 0x00000000,
716 (0x5e00 << 16) | (0xc904 >> 2),
717 0x00000000,
718 (0x6e00 << 16) | (0xc904 >> 2),
719 0x00000000,
720 (0x7e00 << 16) | (0xc904 >> 2),
721 0x00000000,
722 (0x4e00 << 16) | (0xc908 >> 2),
723 0x00000000,
724 (0x5e00 << 16) | (0xc908 >> 2),
725 0x00000000,
726 (0x6e00 << 16) | (0xc908 >> 2),
727 0x00000000,
728 (0x7e00 << 16) | (0xc908 >> 2),
729 0x00000000,
730 (0x4e00 << 16) | (0xc90c >> 2),
731 0x00000000,
732 (0x5e00 << 16) | (0xc90c >> 2),
733 0x00000000,
734 (0x6e00 << 16) | (0xc90c >> 2),
735 0x00000000,
736 (0x7e00 << 16) | (0xc90c >> 2),
737 0x00000000,
738 (0x4e00 << 16) | (0xc910 >> 2),
739 0x00000000,
740 (0x5e00 << 16) | (0xc910 >> 2),
741 0x00000000,
742 (0x6e00 << 16) | (0xc910 >> 2),
743 0x00000000,
744 (0x7e00 << 16) | (0xc910 >> 2),
745 0x00000000,
746 (0x0e00 << 16) | (0xc99c >> 2),
747 0x00000000,
748 (0x0e00 << 16) | (0x9834 >> 2),
749 0x00000000,
750 (0x0000 << 16) | (0x30f00 >> 2),
751 0x00000000,
752 (0x0000 << 16) | (0x30f04 >> 2),
753 0x00000000,
754 (0x0000 << 16) | (0x30f08 >> 2),
755 0x00000000,
756 (0x0000 << 16) | (0x30f0c >> 2),
757 0x00000000,
758 (0x0600 << 16) | (0x9b7c >> 2),
759 0x00000000,
760 (0x0e00 << 16) | (0x8a14 >> 2),
761 0x00000000,
762 (0x0e00 << 16) | (0x8a18 >> 2),
763 0x00000000,
764 (0x0600 << 16) | (0x30a00 >> 2),
765 0x00000000,
766 (0x0e00 << 16) | (0x8bf0 >> 2),
767 0x00000000,
768 (0x0e00 << 16) | (0x8bcc >> 2),
769 0x00000000,
770 (0x0e00 << 16) | (0x8b24 >> 2),
771 0x00000000,
772 (0x0e00 << 16) | (0x30a04 >> 2),
773 0x00000000,
774 (0x0600 << 16) | (0x30a10 >> 2),
775 0x00000000,
776 (0x0600 << 16) | (0x30a14 >> 2),
777 0x00000000,
778 (0x0600 << 16) | (0x30a18 >> 2),
779 0x00000000,
780 (0x0600 << 16) | (0x30a2c >> 2),
781 0x00000000,
782 (0x0e00 << 16) | (0xc700 >> 2),
783 0x00000000,
784 (0x0e00 << 16) | (0xc704 >> 2),
785 0x00000000,
786 (0x0e00 << 16) | (0xc708 >> 2),
787 0x00000000,
788 (0x0e00 << 16) | (0xc768 >> 2),
789 0x00000000,
790 (0x0400 << 16) | (0xc770 >> 2),
791 0x00000000,
792 (0x0400 << 16) | (0xc774 >> 2),
793 0x00000000,
794 (0x0400 << 16) | (0xc798 >> 2),
795 0x00000000,
796 (0x0400 << 16) | (0xc79c >> 2),
797 0x00000000,
798 (0x0e00 << 16) | (0x9100 >> 2),
799 0x00000000,
800 (0x0e00 << 16) | (0x3c010 >> 2),
801 0x00000000,
802 (0x0e00 << 16) | (0x8c00 >> 2),
803 0x00000000,
804 (0x0e00 << 16) | (0x8c04 >> 2),
805 0x00000000,
806 (0x0e00 << 16) | (0x8c20 >> 2),
807 0x00000000,
808 (0x0e00 << 16) | (0x8c38 >> 2),
809 0x00000000,
810 (0x0e00 << 16) | (0x8c3c >> 2),
811 0x00000000,
812 (0x0e00 << 16) | (0xae00 >> 2),
813 0x00000000,
814 (0x0e00 << 16) | (0x9604 >> 2),
815 0x00000000,
816 (0x0e00 << 16) | (0xac08 >> 2),
817 0x00000000,
818 (0x0e00 << 16) | (0xac0c >> 2),
819 0x00000000,
820 (0x0e00 << 16) | (0xac10 >> 2),
821 0x00000000,
822 (0x0e00 << 16) | (0xac14 >> 2),
823 0x00000000,
824 (0x0e00 << 16) | (0xac58 >> 2),
825 0x00000000,
826 (0x0e00 << 16) | (0xac68 >> 2),
827 0x00000000,
828 (0x0e00 << 16) | (0xac6c >> 2),
829 0x00000000,
830 (0x0e00 << 16) | (0xac70 >> 2),
831 0x00000000,
832 (0x0e00 << 16) | (0xac74 >> 2),
833 0x00000000,
834 (0x0e00 << 16) | (0xac78 >> 2),
835 0x00000000,
836 (0x0e00 << 16) | (0xac7c >> 2),
837 0x00000000,
838 (0x0e00 << 16) | (0xac80 >> 2),
839 0x00000000,
840 (0x0e00 << 16) | (0xac84 >> 2),
841 0x00000000,
842 (0x0e00 << 16) | (0xac88 >> 2),
843 0x00000000,
844 (0x0e00 << 16) | (0xac8c >> 2),
845 0x00000000,
846 (0x0e00 << 16) | (0x970c >> 2),
847 0x00000000,
848 (0x0e00 << 16) | (0x9714 >> 2),
849 0x00000000,
850 (0x0e00 << 16) | (0x9718 >> 2),
851 0x00000000,
852 (0x0e00 << 16) | (0x971c >> 2),
853 0x00000000,
854 (0x0e00 << 16) | (0x31068 >> 2),
855 0x00000000,
856 (0x4e00 << 16) | (0x31068 >> 2),
857 0x00000000,
858 (0x5e00 << 16) | (0x31068 >> 2),
859 0x00000000,
860 (0x6e00 << 16) | (0x31068 >> 2),
861 0x00000000,
862 (0x7e00 << 16) | (0x31068 >> 2),
863 0x00000000,
864 (0x0e00 << 16) | (0xcd10 >> 2),
865 0x00000000,
866 (0x0e00 << 16) | (0xcd14 >> 2),
867 0x00000000,
868 (0x0e00 << 16) | (0x88b0 >> 2),
869 0x00000000,
870 (0x0e00 << 16) | (0x88b4 >> 2),
871 0x00000000,
872 (0x0e00 << 16) | (0x88b8 >> 2),
873 0x00000000,
874 (0x0e00 << 16) | (0x88bc >> 2),
875 0x00000000,
876 (0x0400 << 16) | (0x89c0 >> 2),
877 0x00000000,
878 (0x0e00 << 16) | (0x88c4 >> 2),
879 0x00000000,
880 (0x0e00 << 16) | (0x88c8 >> 2),
881 0x00000000,
882 (0x0e00 << 16) | (0x88d0 >> 2),
883 0x00000000,
884 (0x0e00 << 16) | (0x88d4 >> 2),
885 0x00000000,
886 (0x0e00 << 16) | (0x88d8 >> 2),
887 0x00000000,
888 (0x0e00 << 16) | (0x8980 >> 2),
889 0x00000000,
890 (0x0e00 << 16) | (0x30938 >> 2),
891 0x00000000,
892 (0x0e00 << 16) | (0x3093c >> 2),
893 0x00000000,
894 (0x0e00 << 16) | (0x30940 >> 2),
895 0x00000000,
896 (0x0e00 << 16) | (0x89a0 >> 2),
897 0x00000000,
898 (0x0e00 << 16) | (0x30900 >> 2),
899 0x00000000,
900 (0x0e00 << 16) | (0x30904 >> 2),
901 0x00000000,
902 (0x0e00 << 16) | (0x89b4 >> 2),
903 0x00000000,
904 (0x0e00 << 16) | (0x3e1fc >> 2),
905 0x00000000,
906 (0x0e00 << 16) | (0x3c210 >> 2),
907 0x00000000,
908 (0x0e00 << 16) | (0x3c214 >> 2),
909 0x00000000,
910 (0x0e00 << 16) | (0x3c218 >> 2),
911 0x00000000,
912 (0x0e00 << 16) | (0x8904 >> 2),
913 0x00000000,
914 0x5,
915 (0x0e00 << 16) | (0x8c28 >> 2),
916 (0x0e00 << 16) | (0x8c2c >> 2),
917 (0x0e00 << 16) | (0x8c30 >> 2),
918 (0x0e00 << 16) | (0x8c34 >> 2),
919 (0x0e00 << 16) | (0x9600 >> 2),
922 static const u32 bonaire_golden_spm_registers[] =
924 0x30800, 0xe0ffffff, 0xe0000000
927 static const u32 bonaire_golden_common_registers[] =
929 0xc770, 0xffffffff, 0x00000800,
930 0xc774, 0xffffffff, 0x00000800,
931 0xc798, 0xffffffff, 0x00007fbf,
932 0xc79c, 0xffffffff, 0x00007faf
935 static const u32 bonaire_golden_registers[] =
937 0x3354, 0x00000333, 0x00000333,
938 0x3350, 0x000c0fc0, 0x00040200,
939 0x9a10, 0x00010000, 0x00058208,
940 0x3c000, 0xffff1fff, 0x00140000,
941 0x3c200, 0xfdfc0fff, 0x00000100,
942 0x3c234, 0x40000000, 0x40000200,
943 0x9830, 0xffffffff, 0x00000000,
944 0x9834, 0xf00fffff, 0x00000400,
945 0x9838, 0x0002021c, 0x00020200,
946 0xc78, 0x00000080, 0x00000000,
947 0x5bb0, 0x000000f0, 0x00000070,
948 0x5bc0, 0xf0311fff, 0x80300000,
949 0x98f8, 0x73773777, 0x12010001,
950 0x350c, 0x00810000, 0x408af000,
951 0x7030, 0x31000111, 0x00000011,
952 0x2f48, 0x73773777, 0x12010001,
953 0x220c, 0x00007fb6, 0x0021a1b1,
954 0x2210, 0x00007fb6, 0x002021b1,
955 0x2180, 0x00007fb6, 0x00002191,
956 0x2218, 0x00007fb6, 0x002121b1,
957 0x221c, 0x00007fb6, 0x002021b1,
958 0x21dc, 0x00007fb6, 0x00002191,
959 0x21e0, 0x00007fb6, 0x00002191,
960 0x3628, 0x0000003f, 0x0000000a,
961 0x362c, 0x0000003f, 0x0000000a,
962 0x2ae4, 0x00073ffe, 0x000022a2,
963 0x240c, 0x000007ff, 0x00000000,
964 0x8a14, 0xf000003f, 0x00000007,
965 0x8bf0, 0x00002001, 0x00000001,
966 0x8b24, 0xffffffff, 0x00ffffff,
967 0x30a04, 0x0000ff0f, 0x00000000,
968 0x28a4c, 0x07ffffff, 0x06000000,
969 0x4d8, 0x00000fff, 0x00000100,
970 0x3e78, 0x00000001, 0x00000002,
971 0x9100, 0x03000000, 0x0362c688,
972 0x8c00, 0x000000ff, 0x00000001,
973 0xe40, 0x00001fff, 0x00001fff,
974 0x9060, 0x0000007f, 0x00000020,
975 0x9508, 0x00010000, 0x00010000,
976 0xac14, 0x000003ff, 0x000000f3,
977 0xac0c, 0xffffffff, 0x00001032
980 static const u32 bonaire_mgcg_cgcg_init[] =
982 0xc420, 0xffffffff, 0xfffffffc,
983 0x30800, 0xffffffff, 0xe0000000,
984 0x3c2a0, 0xffffffff, 0x00000100,
985 0x3c208, 0xffffffff, 0x00000100,
986 0x3c2c0, 0xffffffff, 0xc0000100,
987 0x3c2c8, 0xffffffff, 0xc0000100,
988 0x3c2c4, 0xffffffff, 0xc0000100,
989 0x55e4, 0xffffffff, 0x00600100,
990 0x3c280, 0xffffffff, 0x00000100,
991 0x3c214, 0xffffffff, 0x06000100,
992 0x3c220, 0xffffffff, 0x00000100,
993 0x3c218, 0xffffffff, 0x06000100,
994 0x3c204, 0xffffffff, 0x00000100,
995 0x3c2e0, 0xffffffff, 0x00000100,
996 0x3c224, 0xffffffff, 0x00000100,
997 0x3c200, 0xffffffff, 0x00000100,
998 0x3c230, 0xffffffff, 0x00000100,
999 0x3c234, 0xffffffff, 0x00000100,
1000 0x3c250, 0xffffffff, 0x00000100,
1001 0x3c254, 0xffffffff, 0x00000100,
1002 0x3c258, 0xffffffff, 0x00000100,
1003 0x3c25c, 0xffffffff, 0x00000100,
1004 0x3c260, 0xffffffff, 0x00000100,
1005 0x3c27c, 0xffffffff, 0x00000100,
1006 0x3c278, 0xffffffff, 0x00000100,
1007 0x3c210, 0xffffffff, 0x06000100,
1008 0x3c290, 0xffffffff, 0x00000100,
1009 0x3c274, 0xffffffff, 0x00000100,
1010 0x3c2b4, 0xffffffff, 0x00000100,
1011 0x3c2b0, 0xffffffff, 0x00000100,
1012 0x3c270, 0xffffffff, 0x00000100,
1013 0x30800, 0xffffffff, 0xe0000000,
1014 0x3c020, 0xffffffff, 0x00010000,
1015 0x3c024, 0xffffffff, 0x00030002,
1016 0x3c028, 0xffffffff, 0x00040007,
1017 0x3c02c, 0xffffffff, 0x00060005,
1018 0x3c030, 0xffffffff, 0x00090008,
1019 0x3c034, 0xffffffff, 0x00010000,
1020 0x3c038, 0xffffffff, 0x00030002,
1021 0x3c03c, 0xffffffff, 0x00040007,
1022 0x3c040, 0xffffffff, 0x00060005,
1023 0x3c044, 0xffffffff, 0x00090008,
1024 0x3c048, 0xffffffff, 0x00010000,
1025 0x3c04c, 0xffffffff, 0x00030002,
1026 0x3c050, 0xffffffff, 0x00040007,
1027 0x3c054, 0xffffffff, 0x00060005,
1028 0x3c058, 0xffffffff, 0x00090008,
1029 0x3c05c, 0xffffffff, 0x00010000,
1030 0x3c060, 0xffffffff, 0x00030002,
1031 0x3c064, 0xffffffff, 0x00040007,
1032 0x3c068, 0xffffffff, 0x00060005,
1033 0x3c06c, 0xffffffff, 0x00090008,
1034 0x3c070, 0xffffffff, 0x00010000,
1035 0x3c074, 0xffffffff, 0x00030002,
1036 0x3c078, 0xffffffff, 0x00040007,
1037 0x3c07c, 0xffffffff, 0x00060005,
1038 0x3c080, 0xffffffff, 0x00090008,
1039 0x3c084, 0xffffffff, 0x00010000,
1040 0x3c088, 0xffffffff, 0x00030002,
1041 0x3c08c, 0xffffffff, 0x00040007,
1042 0x3c090, 0xffffffff, 0x00060005,
1043 0x3c094, 0xffffffff, 0x00090008,
1044 0x3c098, 0xffffffff, 0x00010000,
1045 0x3c09c, 0xffffffff, 0x00030002,
1046 0x3c0a0, 0xffffffff, 0x00040007,
1047 0x3c0a4, 0xffffffff, 0x00060005,
1048 0x3c0a8, 0xffffffff, 0x00090008,
1049 0x3c000, 0xffffffff, 0x96e00200,
1050 0x8708, 0xffffffff, 0x00900100,
1051 0xc424, 0xffffffff, 0x0020003f,
1052 0x38, 0xffffffff, 0x0140001c,
1053 0x3c, 0x000f0000, 0x000f0000,
1054 0x220, 0xffffffff, 0xC060000C,
1055 0x224, 0xc0000fff, 0x00000100,
1056 0xf90, 0xffffffff, 0x00000100,
1057 0xf98, 0x00000101, 0x00000000,
1058 0x20a8, 0xffffffff, 0x00000104,
1059 0x55e4, 0xff000fff, 0x00000100,
1060 0x30cc, 0xc0000fff, 0x00000104,
1061 0xc1e4, 0x00000001, 0x00000001,
1062 0xd00c, 0xff000ff0, 0x00000100,
1063 0xd80c, 0xff000ff0, 0x00000100
1066 static const u32 spectre_golden_spm_registers[] =
1068 0x30800, 0xe0ffffff, 0xe0000000
1071 static const u32 spectre_golden_common_registers[] =
1073 0xc770, 0xffffffff, 0x00000800,
1074 0xc774, 0xffffffff, 0x00000800,
1075 0xc798, 0xffffffff, 0x00007fbf,
1076 0xc79c, 0xffffffff, 0x00007faf
1079 static const u32 spectre_golden_registers[] =
1081 0x3c000, 0xffff1fff, 0x96940200,
1082 0x3c00c, 0xffff0001, 0xff000000,
1083 0x3c200, 0xfffc0fff, 0x00000100,
1084 0x6ed8, 0x00010101, 0x00010000,
1085 0x9834, 0xf00fffff, 0x00000400,
1086 0x9838, 0xfffffffc, 0x00020200,
1087 0x5bb0, 0x000000f0, 0x00000070,
1088 0x5bc0, 0xf0311fff, 0x80300000,
1089 0x98f8, 0x73773777, 0x12010001,
1090 0x9b7c, 0x00ff0000, 0x00fc0000,
1091 0x2f48, 0x73773777, 0x12010001,
1092 0x8a14, 0xf000003f, 0x00000007,
1093 0x8b24, 0xffffffff, 0x00ffffff,
1094 0x28350, 0x3f3f3fff, 0x00000082,
1095 0x28354, 0x0000003f, 0x00000000,
1096 0x3e78, 0x00000001, 0x00000002,
1097 0x913c, 0xffff03df, 0x00000004,
1098 0xc768, 0x00000008, 0x00000008,
1099 0x8c00, 0x000008ff, 0x00000800,
1100 0x9508, 0x00010000, 0x00010000,
1101 0xac0c, 0xffffffff, 0x54763210,
1102 0x214f8, 0x01ff01ff, 0x00000002,
1103 0x21498, 0x007ff800, 0x00200000,
1104 0x2015c, 0xffffffff, 0x00000f40,
1105 0x30934, 0xffffffff, 0x00000001
1108 static const u32 spectre_mgcg_cgcg_init[] =
1110 0xc420, 0xffffffff, 0xfffffffc,
1111 0x30800, 0xffffffff, 0xe0000000,
1112 0x3c2a0, 0xffffffff, 0x00000100,
1113 0x3c208, 0xffffffff, 0x00000100,
1114 0x3c2c0, 0xffffffff, 0x00000100,
1115 0x3c2c8, 0xffffffff, 0x00000100,
1116 0x3c2c4, 0xffffffff, 0x00000100,
1117 0x55e4, 0xffffffff, 0x00600100,
1118 0x3c280, 0xffffffff, 0x00000100,
1119 0x3c214, 0xffffffff, 0x06000100,
1120 0x3c220, 0xffffffff, 0x00000100,
1121 0x3c218, 0xffffffff, 0x06000100,
1122 0x3c204, 0xffffffff, 0x00000100,
1123 0x3c2e0, 0xffffffff, 0x00000100,
1124 0x3c224, 0xffffffff, 0x00000100,
1125 0x3c200, 0xffffffff, 0x00000100,
1126 0x3c230, 0xffffffff, 0x00000100,
1127 0x3c234, 0xffffffff, 0x00000100,
1128 0x3c250, 0xffffffff, 0x00000100,
1129 0x3c254, 0xffffffff, 0x00000100,
1130 0x3c258, 0xffffffff, 0x00000100,
1131 0x3c25c, 0xffffffff, 0x00000100,
1132 0x3c260, 0xffffffff, 0x00000100,
1133 0x3c27c, 0xffffffff, 0x00000100,
1134 0x3c278, 0xffffffff, 0x00000100,
1135 0x3c210, 0xffffffff, 0x06000100,
1136 0x3c290, 0xffffffff, 0x00000100,
1137 0x3c274, 0xffffffff, 0x00000100,
1138 0x3c2b4, 0xffffffff, 0x00000100,
1139 0x3c2b0, 0xffffffff, 0x00000100,
1140 0x3c270, 0xffffffff, 0x00000100,
1141 0x30800, 0xffffffff, 0xe0000000,
1142 0x3c020, 0xffffffff, 0x00010000,
1143 0x3c024, 0xffffffff, 0x00030002,
1144 0x3c028, 0xffffffff, 0x00040007,
1145 0x3c02c, 0xffffffff, 0x00060005,
1146 0x3c030, 0xffffffff, 0x00090008,
1147 0x3c034, 0xffffffff, 0x00010000,
1148 0x3c038, 0xffffffff, 0x00030002,
1149 0x3c03c, 0xffffffff, 0x00040007,
1150 0x3c040, 0xffffffff, 0x00060005,
1151 0x3c044, 0xffffffff, 0x00090008,
1152 0x3c048, 0xffffffff, 0x00010000,
1153 0x3c04c, 0xffffffff, 0x00030002,
1154 0x3c050, 0xffffffff, 0x00040007,
1155 0x3c054, 0xffffffff, 0x00060005,
1156 0x3c058, 0xffffffff, 0x00090008,
1157 0x3c05c, 0xffffffff, 0x00010000,
1158 0x3c060, 0xffffffff, 0x00030002,
1159 0x3c064, 0xffffffff, 0x00040007,
1160 0x3c068, 0xffffffff, 0x00060005,
1161 0x3c06c, 0xffffffff, 0x00090008,
1162 0x3c070, 0xffffffff, 0x00010000,
1163 0x3c074, 0xffffffff, 0x00030002,
1164 0x3c078, 0xffffffff, 0x00040007,
1165 0x3c07c, 0xffffffff, 0x00060005,
1166 0x3c080, 0xffffffff, 0x00090008,
1167 0x3c084, 0xffffffff, 0x00010000,
1168 0x3c088, 0xffffffff, 0x00030002,
1169 0x3c08c, 0xffffffff, 0x00040007,
1170 0x3c090, 0xffffffff, 0x00060005,
1171 0x3c094, 0xffffffff, 0x00090008,
1172 0x3c098, 0xffffffff, 0x00010000,
1173 0x3c09c, 0xffffffff, 0x00030002,
1174 0x3c0a0, 0xffffffff, 0x00040007,
1175 0x3c0a4, 0xffffffff, 0x00060005,
1176 0x3c0a8, 0xffffffff, 0x00090008,
1177 0x3c0ac, 0xffffffff, 0x00010000,
1178 0x3c0b0, 0xffffffff, 0x00030002,
1179 0x3c0b4, 0xffffffff, 0x00040007,
1180 0x3c0b8, 0xffffffff, 0x00060005,
1181 0x3c0bc, 0xffffffff, 0x00090008,
1182 0x3c000, 0xffffffff, 0x96e00200,
1183 0x8708, 0xffffffff, 0x00900100,
1184 0xc424, 0xffffffff, 0x0020003f,
1185 0x38, 0xffffffff, 0x0140001c,
1186 0x3c, 0x000f0000, 0x000f0000,
1187 0x220, 0xffffffff, 0xC060000C,
1188 0x224, 0xc0000fff, 0x00000100,
1189 0xf90, 0xffffffff, 0x00000100,
1190 0xf98, 0x00000101, 0x00000000,
1191 0x20a8, 0xffffffff, 0x00000104,
1192 0x55e4, 0xff000fff, 0x00000100,
1193 0x30cc, 0xc0000fff, 0x00000104,
1194 0xc1e4, 0x00000001, 0x00000001,
1195 0xd00c, 0xff000ff0, 0x00000100,
1196 0xd80c, 0xff000ff0, 0x00000100
1199 static const u32 kalindi_golden_spm_registers[] =
1201 0x30800, 0xe0ffffff, 0xe0000000
1204 static const u32 kalindi_golden_common_registers[] =
1206 0xc770, 0xffffffff, 0x00000800,
1207 0xc774, 0xffffffff, 0x00000800,
1208 0xc798, 0xffffffff, 0x00007fbf,
1209 0xc79c, 0xffffffff, 0x00007faf
1212 static const u32 kalindi_golden_registers[] =
1214 0x3c000, 0xffffdfff, 0x6e944040,
1215 0x55e4, 0xff607fff, 0xfc000100,
1216 0x3c220, 0xff000fff, 0x00000100,
1217 0x3c224, 0xff000fff, 0x00000100,
1218 0x3c200, 0xfffc0fff, 0x00000100,
1219 0x6ed8, 0x00010101, 0x00010000,
1220 0x9830, 0xffffffff, 0x00000000,
1221 0x9834, 0xf00fffff, 0x00000400,
1222 0x5bb0, 0x000000f0, 0x00000070,
1223 0x5bc0, 0xf0311fff, 0x80300000,
1224 0x98f8, 0x73773777, 0x12010001,
1225 0x98fc, 0xffffffff, 0x00000010,
1226 0x9b7c, 0x00ff0000, 0x00fc0000,
1227 0x8030, 0x00001f0f, 0x0000100a,
1228 0x2f48, 0x73773777, 0x12010001,
1229 0x2408, 0x000fffff, 0x000c007f,
1230 0x8a14, 0xf000003f, 0x00000007,
1231 0x8b24, 0x3fff3fff, 0x00ffcfff,
1232 0x30a04, 0x0000ff0f, 0x00000000,
1233 0x28a4c, 0x07ffffff, 0x06000000,
1234 0x4d8, 0x00000fff, 0x00000100,
1235 0x3e78, 0x00000001, 0x00000002,
1236 0xc768, 0x00000008, 0x00000008,
1237 0x8c00, 0x000000ff, 0x00000003,
1238 0x214f8, 0x01ff01ff, 0x00000002,
1239 0x21498, 0x007ff800, 0x00200000,
1240 0x2015c, 0xffffffff, 0x00000f40,
1241 0x88c4, 0x001f3ae3, 0x00000082,
1242 0x88d4, 0x0000001f, 0x00000010,
1243 0x30934, 0xffffffff, 0x00000000
1246 static const u32 kalindi_mgcg_cgcg_init[] =
1248 0xc420, 0xffffffff, 0xfffffffc,
1249 0x30800, 0xffffffff, 0xe0000000,
1250 0x3c2a0, 0xffffffff, 0x00000100,
1251 0x3c208, 0xffffffff, 0x00000100,
1252 0x3c2c0, 0xffffffff, 0x00000100,
1253 0x3c2c8, 0xffffffff, 0x00000100,
1254 0x3c2c4, 0xffffffff, 0x00000100,
1255 0x55e4, 0xffffffff, 0x00600100,
1256 0x3c280, 0xffffffff, 0x00000100,
1257 0x3c214, 0xffffffff, 0x06000100,
1258 0x3c220, 0xffffffff, 0x00000100,
1259 0x3c218, 0xffffffff, 0x06000100,
1260 0x3c204, 0xffffffff, 0x00000100,
1261 0x3c2e0, 0xffffffff, 0x00000100,
1262 0x3c224, 0xffffffff, 0x00000100,
1263 0x3c200, 0xffffffff, 0x00000100,
1264 0x3c230, 0xffffffff, 0x00000100,
1265 0x3c234, 0xffffffff, 0x00000100,
1266 0x3c250, 0xffffffff, 0x00000100,
1267 0x3c254, 0xffffffff, 0x00000100,
1268 0x3c258, 0xffffffff, 0x00000100,
1269 0x3c25c, 0xffffffff, 0x00000100,
1270 0x3c260, 0xffffffff, 0x00000100,
1271 0x3c27c, 0xffffffff, 0x00000100,
1272 0x3c278, 0xffffffff, 0x00000100,
1273 0x3c210, 0xffffffff, 0x06000100,
1274 0x3c290, 0xffffffff, 0x00000100,
1275 0x3c274, 0xffffffff, 0x00000100,
1276 0x3c2b4, 0xffffffff, 0x00000100,
1277 0x3c2b0, 0xffffffff, 0x00000100,
1278 0x3c270, 0xffffffff, 0x00000100,
1279 0x30800, 0xffffffff, 0xe0000000,
1280 0x3c020, 0xffffffff, 0x00010000,
1281 0x3c024, 0xffffffff, 0x00030002,
1282 0x3c028, 0xffffffff, 0x00040007,
1283 0x3c02c, 0xffffffff, 0x00060005,
1284 0x3c030, 0xffffffff, 0x00090008,
1285 0x3c034, 0xffffffff, 0x00010000,
1286 0x3c038, 0xffffffff, 0x00030002,
1287 0x3c03c, 0xffffffff, 0x00040007,
1288 0x3c040, 0xffffffff, 0x00060005,
1289 0x3c044, 0xffffffff, 0x00090008,
1290 0x3c000, 0xffffffff, 0x96e00200,
1291 0x8708, 0xffffffff, 0x00900100,
1292 0xc424, 0xffffffff, 0x0020003f,
1293 0x38, 0xffffffff, 0x0140001c,
1294 0x3c, 0x000f0000, 0x000f0000,
1295 0x220, 0xffffffff, 0xC060000C,
1296 0x224, 0xc0000fff, 0x00000100,
1297 0x20a8, 0xffffffff, 0x00000104,
1298 0x55e4, 0xff000fff, 0x00000100,
1299 0x30cc, 0xc0000fff, 0x00000104,
1300 0xc1e4, 0x00000001, 0x00000001,
1301 0xd00c, 0xff000ff0, 0x00000100,
1302 0xd80c, 0xff000ff0, 0x00000100
1305 static void cik_init_golden_registers(struct radeon_device *rdev)
1307 switch (rdev->family) {
1308 case CHIP_BONAIRE:
1309 radeon_program_register_sequence(rdev,
1310 bonaire_mgcg_cgcg_init,
1311 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1312 radeon_program_register_sequence(rdev,
1313 bonaire_golden_registers,
1314 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1315 radeon_program_register_sequence(rdev,
1316 bonaire_golden_common_registers,
1317 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1318 radeon_program_register_sequence(rdev,
1319 bonaire_golden_spm_registers,
1320 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1321 break;
1322 case CHIP_KABINI:
1323 radeon_program_register_sequence(rdev,
1324 kalindi_mgcg_cgcg_init,
1325 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1326 radeon_program_register_sequence(rdev,
1327 kalindi_golden_registers,
1328 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1329 radeon_program_register_sequence(rdev,
1330 kalindi_golden_common_registers,
1331 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1332 radeon_program_register_sequence(rdev,
1333 kalindi_golden_spm_registers,
1334 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1335 break;
1336 case CHIP_KAVERI:
1337 radeon_program_register_sequence(rdev,
1338 spectre_mgcg_cgcg_init,
1339 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1340 radeon_program_register_sequence(rdev,
1341 spectre_golden_registers,
1342 (const u32)ARRAY_SIZE(spectre_golden_registers));
1343 radeon_program_register_sequence(rdev,
1344 spectre_golden_common_registers,
1345 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1346 radeon_program_register_sequence(rdev,
1347 spectre_golden_spm_registers,
1348 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1349 break;
1350 default:
1351 break;
1356 * cik_get_xclk - get the xclk
1358 * @rdev: radeon_device pointer
1360 * Returns the reference clock used by the gfx engine
1361 * (CIK).
1363 u32 cik_get_xclk(struct radeon_device *rdev)
1365 u32 reference_clock = rdev->clock.spll.reference_freq;
1367 if (rdev->flags & RADEON_IS_IGP) {
1368 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1369 return reference_clock / 2;
1370 } else {
1371 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1372 return reference_clock / 4;
1374 return reference_clock;
1378 * cik_mm_rdoorbell - read a doorbell dword
1380 * @rdev: radeon_device pointer
1381 * @offset: byte offset into the aperture
1383 * Returns the value in the doorbell aperture at the
1384 * requested offset (CIK).
1386 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
1388 if (offset < rdev->doorbell.size) {
1389 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
1390 } else {
1391 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
1392 return 0;
1397 * cik_mm_wdoorbell - write a doorbell dword
1399 * @rdev: radeon_device pointer
1400 * @offset: byte offset into the aperture
1401 * @v: value to write
1403 * Writes @v to the doorbell aperture at the
1404 * requested offset (CIK).
1406 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
1408 if (offset < rdev->doorbell.size) {
1409 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
1410 } else {
1411 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
1415 #define BONAIRE_IO_MC_REGS_SIZE 36
1417 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1419 {0x00000070, 0x04400000},
1420 {0x00000071, 0x80c01803},
1421 {0x00000072, 0x00004004},
1422 {0x00000073, 0x00000100},
1423 {0x00000074, 0x00ff0000},
1424 {0x00000075, 0x34000000},
1425 {0x00000076, 0x08000014},
1426 {0x00000077, 0x00cc08ec},
1427 {0x00000078, 0x00000400},
1428 {0x00000079, 0x00000000},
1429 {0x0000007a, 0x04090000},
1430 {0x0000007c, 0x00000000},
1431 {0x0000007e, 0x4408a8e8},
1432 {0x0000007f, 0x00000304},
1433 {0x00000080, 0x00000000},
1434 {0x00000082, 0x00000001},
1435 {0x00000083, 0x00000002},
1436 {0x00000084, 0xf3e4f400},
1437 {0x00000085, 0x052024e3},
1438 {0x00000087, 0x00000000},
1439 {0x00000088, 0x01000000},
1440 {0x0000008a, 0x1c0a0000},
1441 {0x0000008b, 0xff010000},
1442 {0x0000008d, 0xffffefff},
1443 {0x0000008e, 0xfff3efff},
1444 {0x0000008f, 0xfff3efbf},
1445 {0x00000092, 0xf7ffffff},
1446 {0x00000093, 0xffffff7f},
1447 {0x00000095, 0x00101101},
1448 {0x00000096, 0x00000fff},
1449 {0x00000097, 0x00116fff},
1450 {0x00000098, 0x60010000},
1451 {0x00000099, 0x10010000},
1452 {0x0000009a, 0x00006000},
1453 {0x0000009b, 0x00001000},
1454 {0x0000009f, 0x00b48000}
1458 * cik_srbm_select - select specific register instances
1460 * @rdev: radeon_device pointer
1461 * @me: selected ME (micro engine)
1462 * @pipe: pipe
1463 * @queue: queue
1464 * @vmid: VMID
1466 * Switches the currently active registers instances. Some
1467 * registers are instanced per VMID, others are instanced per
1468 * me/pipe/queue combination.
1470 static void cik_srbm_select(struct radeon_device *rdev,
1471 u32 me, u32 pipe, u32 queue, u32 vmid)
1473 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1474 MEID(me & 0x3) |
1475 VMID(vmid & 0xf) |
1476 QUEUEID(queue & 0x7));
1477 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1480 /* ucode loading */
1482 * ci_mc_load_microcode - load MC ucode into the hw
1484 * @rdev: radeon_device pointer
1486 * Load the GDDR MC ucode into the hw (CIK).
1487 * Returns 0 on success, error on failure.
1489 static int ci_mc_load_microcode(struct radeon_device *rdev)
1491 const __be32 *fw_data;
1492 u32 running, blackout = 0;
1493 u32 *io_mc_regs;
1494 int i, ucode_size, regs_size;
1496 if (!rdev->mc_fw)
1497 return -EINVAL;
1499 switch (rdev->family) {
1500 case CHIP_BONAIRE:
1501 default:
1502 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1503 ucode_size = CIK_MC_UCODE_SIZE;
1504 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1505 break;
1508 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1510 if (running == 0) {
1511 if (running) {
1512 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1513 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1516 /* reset the engine and set to writable */
1517 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1518 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1520 /* load mc io regs */
1521 for (i = 0; i < regs_size; i++) {
1522 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1523 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1525 /* load the MC ucode */
1526 fw_data = (const __be32 *)rdev->mc_fw->data;
1527 for (i = 0; i < ucode_size; i++)
1528 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1530 /* put the engine back into the active state */
1531 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1532 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1533 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1535 /* wait for training to complete */
1536 for (i = 0; i < rdev->usec_timeout; i++) {
1537 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1538 break;
1539 udelay(1);
1541 for (i = 0; i < rdev->usec_timeout; i++) {
1542 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1543 break;
1544 udelay(1);
1547 if (running)
1548 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1551 return 0;
1555 * cik_init_microcode - load ucode images from disk
1557 * @rdev: radeon_device pointer
1559 * Use the firmware interface to load the ucode images into
1560 * the driver (not loaded into hw).
1561 * Returns 0 on success, error on failure.
1563 static int cik_init_microcode(struct radeon_device *rdev)
1565 const char *chip_name;
1566 size_t pfp_req_size, me_req_size, ce_req_size,
1567 mec_req_size, rlc_req_size, mc_req_size,
1568 sdma_req_size, smc_req_size;
1569 char fw_name[30];
1570 int err;
1572 DRM_DEBUG("\n");
1574 switch (rdev->family) {
1575 case CHIP_BONAIRE:
1576 chip_name = "BONAIRE";
1577 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1578 me_req_size = CIK_ME_UCODE_SIZE * 4;
1579 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1580 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1581 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1582 mc_req_size = CIK_MC_UCODE_SIZE * 4;
1583 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1584 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1585 break;
1586 case CHIP_KAVERI:
1587 chip_name = "KAVERI";
1588 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1589 me_req_size = CIK_ME_UCODE_SIZE * 4;
1590 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1591 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1592 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1593 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1594 break;
1595 case CHIP_KABINI:
1596 chip_name = "KABINI";
1597 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1598 me_req_size = CIK_ME_UCODE_SIZE * 4;
1599 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1600 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1601 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1602 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1603 break;
1604 default: BUG();
1607 DRM_INFO("Loading %s Microcode\n", chip_name);
1609 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1610 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1611 if (err)
1612 goto out;
1613 if (rdev->pfp_fw->size != pfp_req_size) {
1614 printk(KERN_ERR
1615 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1616 rdev->pfp_fw->size, fw_name);
1617 err = -EINVAL;
1618 goto out;
1621 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1622 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1623 if (err)
1624 goto out;
1625 if (rdev->me_fw->size != me_req_size) {
1626 printk(KERN_ERR
1627 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1628 rdev->me_fw->size, fw_name);
1629 err = -EINVAL;
1632 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1633 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1634 if (err)
1635 goto out;
1636 if (rdev->ce_fw->size != ce_req_size) {
1637 printk(KERN_ERR
1638 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1639 rdev->ce_fw->size, fw_name);
1640 err = -EINVAL;
1643 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1644 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1645 if (err)
1646 goto out;
1647 if (rdev->mec_fw->size != mec_req_size) {
1648 printk(KERN_ERR
1649 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1650 rdev->mec_fw->size, fw_name);
1651 err = -EINVAL;
1654 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1655 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1656 if (err)
1657 goto out;
1658 if (rdev->rlc_fw->size != rlc_req_size) {
1659 printk(KERN_ERR
1660 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1661 rdev->rlc_fw->size, fw_name);
1662 err = -EINVAL;
1665 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1666 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1667 if (err)
1668 goto out;
1669 if (rdev->sdma_fw->size != sdma_req_size) {
1670 printk(KERN_ERR
1671 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1672 rdev->sdma_fw->size, fw_name);
1673 err = -EINVAL;
1676 /* No SMC, MC ucode on APUs */
1677 if (!(rdev->flags & RADEON_IS_IGP)) {
1678 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1679 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1680 if (err)
1681 goto out;
1682 if (rdev->mc_fw->size != mc_req_size) {
1683 printk(KERN_ERR
1684 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1685 rdev->mc_fw->size, fw_name);
1686 err = -EINVAL;
1689 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1690 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1691 if (err) {
1692 printk(KERN_ERR
1693 "smc: error loading firmware \"%s\"\n",
1694 fw_name);
1695 release_firmware(rdev->smc_fw);
1696 rdev->smc_fw = NULL;
1697 err = 0;
1698 } else if (rdev->smc_fw->size != smc_req_size) {
1699 printk(KERN_ERR
1700 "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1701 rdev->smc_fw->size, fw_name);
1702 err = -EINVAL;
1706 out:
1707 if (err) {
1708 if (err != -EINVAL)
1709 printk(KERN_ERR
1710 "cik_cp: Failed to load firmware \"%s\"\n",
1711 fw_name);
1712 release_firmware(rdev->pfp_fw);
1713 rdev->pfp_fw = NULL;
1714 release_firmware(rdev->me_fw);
1715 rdev->me_fw = NULL;
1716 release_firmware(rdev->ce_fw);
1717 rdev->ce_fw = NULL;
1718 release_firmware(rdev->rlc_fw);
1719 rdev->rlc_fw = NULL;
1720 release_firmware(rdev->mc_fw);
1721 rdev->mc_fw = NULL;
1722 release_firmware(rdev->smc_fw);
1723 rdev->smc_fw = NULL;
1725 return err;
1729 * Core functions
1732 * cik_tiling_mode_table_init - init the hw tiling table
1734 * @rdev: radeon_device pointer
1736 * Starting with SI, the tiling setup is done globally in a
1737 * set of 32 tiling modes. Rather than selecting each set of
1738 * parameters per surface as on older asics, we just select
1739 * which index in the tiling table we want to use, and the
1740 * surface uses those parameters (CIK).
1742 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1744 const u32 num_tile_mode_states = 32;
1745 const u32 num_secondary_tile_mode_states = 16;
1746 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1747 u32 num_pipe_configs;
1748 u32 num_rbs = rdev->config.cik.max_backends_per_se *
1749 rdev->config.cik.max_shader_engines;
1751 switch (rdev->config.cik.mem_row_size_in_kb) {
1752 case 1:
1753 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1754 break;
1755 case 2:
1756 default:
1757 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1758 break;
1759 case 4:
1760 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1761 break;
1764 num_pipe_configs = rdev->config.cik.max_tile_pipes;
1765 if (num_pipe_configs > 8)
1766 num_pipe_configs = 8; /* ??? */
1768 if (num_pipe_configs == 8) {
1769 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1770 switch (reg_offset) {
1771 case 0:
1772 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1773 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1774 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1775 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1776 break;
1777 case 1:
1778 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1779 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1780 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1781 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1782 break;
1783 case 2:
1784 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1785 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1786 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1787 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1788 break;
1789 case 3:
1790 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1791 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1792 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1793 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1794 break;
1795 case 4:
1796 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1797 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1798 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1799 TILE_SPLIT(split_equal_to_row_size));
1800 break;
1801 case 5:
1802 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1803 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1804 break;
1805 case 6:
1806 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1807 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1808 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1809 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1810 break;
1811 case 7:
1812 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1813 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1814 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1815 TILE_SPLIT(split_equal_to_row_size));
1816 break;
1817 case 8:
1818 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1819 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
1820 break;
1821 case 9:
1822 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1823 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1824 break;
1825 case 10:
1826 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1827 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1828 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1829 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1830 break;
1831 case 11:
1832 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1833 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1834 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1835 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1836 break;
1837 case 12:
1838 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1839 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1840 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1841 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1842 break;
1843 case 13:
1844 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1845 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1846 break;
1847 case 14:
1848 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1849 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1850 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1851 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1852 break;
1853 case 16:
1854 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1855 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1856 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1857 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1858 break;
1859 case 17:
1860 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1861 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1862 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1863 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1864 break;
1865 case 27:
1866 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1867 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1868 break;
1869 case 28:
1870 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1871 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1872 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1873 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1874 break;
1875 case 29:
1876 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1877 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1878 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1879 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1880 break;
1881 case 30:
1882 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1883 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1884 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1885 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1886 break;
1887 default:
1888 gb_tile_moden = 0;
1889 break;
1891 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1892 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1894 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1895 switch (reg_offset) {
1896 case 0:
1897 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1898 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1899 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1900 NUM_BANKS(ADDR_SURF_16_BANK));
1901 break;
1902 case 1:
1903 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1904 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1905 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1906 NUM_BANKS(ADDR_SURF_16_BANK));
1907 break;
1908 case 2:
1909 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1910 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1911 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1912 NUM_BANKS(ADDR_SURF_16_BANK));
1913 break;
1914 case 3:
1915 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1916 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1917 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1918 NUM_BANKS(ADDR_SURF_16_BANK));
1919 break;
1920 case 4:
1921 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1922 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1923 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1924 NUM_BANKS(ADDR_SURF_8_BANK));
1925 break;
1926 case 5:
1927 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1928 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1929 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1930 NUM_BANKS(ADDR_SURF_4_BANK));
1931 break;
1932 case 6:
1933 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1934 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1935 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1936 NUM_BANKS(ADDR_SURF_2_BANK));
1937 break;
1938 case 8:
1939 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1940 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1941 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1942 NUM_BANKS(ADDR_SURF_16_BANK));
1943 break;
1944 case 9:
1945 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1946 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1947 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1948 NUM_BANKS(ADDR_SURF_16_BANK));
1949 break;
1950 case 10:
1951 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1952 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1953 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1954 NUM_BANKS(ADDR_SURF_16_BANK));
1955 break;
1956 case 11:
1957 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1958 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1959 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1960 NUM_BANKS(ADDR_SURF_16_BANK));
1961 break;
1962 case 12:
1963 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1964 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1965 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1966 NUM_BANKS(ADDR_SURF_8_BANK));
1967 break;
1968 case 13:
1969 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1970 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1971 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1972 NUM_BANKS(ADDR_SURF_4_BANK));
1973 break;
1974 case 14:
1975 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1976 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1977 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1978 NUM_BANKS(ADDR_SURF_2_BANK));
1979 break;
1980 default:
1981 gb_tile_moden = 0;
1982 break;
1984 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1986 } else if (num_pipe_configs == 4) {
1987 if (num_rbs == 4) {
1988 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1989 switch (reg_offset) {
1990 case 0:
1991 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1992 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1993 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1994 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1995 break;
1996 case 1:
1997 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1998 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1999 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2000 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2001 break;
2002 case 2:
2003 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2004 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2005 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2006 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2007 break;
2008 case 3:
2009 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2010 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2011 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2012 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2013 break;
2014 case 4:
2015 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2016 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2017 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2018 TILE_SPLIT(split_equal_to_row_size));
2019 break;
2020 case 5:
2021 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2022 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2023 break;
2024 case 6:
2025 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2026 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2027 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2028 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2029 break;
2030 case 7:
2031 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2032 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2033 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2034 TILE_SPLIT(split_equal_to_row_size));
2035 break;
2036 case 8:
2037 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2038 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2039 break;
2040 case 9:
2041 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2042 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2043 break;
2044 case 10:
2045 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2046 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2047 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2048 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2049 break;
2050 case 11:
2051 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2052 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2053 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2054 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2055 break;
2056 case 12:
2057 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2058 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2059 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2060 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2061 break;
2062 case 13:
2063 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2064 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2065 break;
2066 case 14:
2067 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2068 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2069 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2070 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2071 break;
2072 case 16:
2073 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2074 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2075 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2076 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2077 break;
2078 case 17:
2079 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2080 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2081 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2082 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2083 break;
2084 case 27:
2085 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2086 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2087 break;
2088 case 28:
2089 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2090 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2091 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2092 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2093 break;
2094 case 29:
2095 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2096 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2097 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2098 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2099 break;
2100 case 30:
2101 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2102 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2103 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2104 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2105 break;
2106 default:
2107 gb_tile_moden = 0;
2108 break;
2110 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2111 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2113 } else if (num_rbs < 4) {
2114 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2115 switch (reg_offset) {
2116 case 0:
2117 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2118 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2119 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2120 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2121 break;
2122 case 1:
2123 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2124 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2125 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2126 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2127 break;
2128 case 2:
2129 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2130 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2131 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2132 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2133 break;
2134 case 3:
2135 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2136 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2137 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2138 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2139 break;
2140 case 4:
2141 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2142 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2143 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2144 TILE_SPLIT(split_equal_to_row_size));
2145 break;
2146 case 5:
2147 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2148 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2149 break;
2150 case 6:
2151 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2152 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2153 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2154 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2155 break;
2156 case 7:
2157 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2158 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2159 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2160 TILE_SPLIT(split_equal_to_row_size));
2161 break;
2162 case 8:
2163 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2164 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2165 break;
2166 case 9:
2167 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2168 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2169 break;
2170 case 10:
2171 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2172 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2173 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2174 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2175 break;
2176 case 11:
2177 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2178 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2179 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2180 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2181 break;
2182 case 12:
2183 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2184 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2185 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2186 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2187 break;
2188 case 13:
2189 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2190 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2191 break;
2192 case 14:
2193 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2194 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2195 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2196 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2197 break;
2198 case 16:
2199 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2200 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2201 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2202 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2203 break;
2204 case 17:
2205 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2206 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2207 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2208 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2209 break;
2210 case 27:
2211 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2212 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2213 break;
2214 case 28:
2215 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2216 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2217 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2218 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2219 break;
2220 case 29:
2221 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2222 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2223 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2224 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2225 break;
2226 case 30:
2227 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2228 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2229 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2230 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2231 break;
2232 default:
2233 gb_tile_moden = 0;
2234 break;
2236 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2237 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2240 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2241 switch (reg_offset) {
2242 case 0:
2243 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2244 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2245 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2246 NUM_BANKS(ADDR_SURF_16_BANK));
2247 break;
2248 case 1:
2249 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2250 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2251 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2252 NUM_BANKS(ADDR_SURF_16_BANK));
2253 break;
2254 case 2:
2255 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2256 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2257 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2258 NUM_BANKS(ADDR_SURF_16_BANK));
2259 break;
2260 case 3:
2261 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2262 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2263 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2264 NUM_BANKS(ADDR_SURF_16_BANK));
2265 break;
2266 case 4:
2267 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2268 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2269 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2270 NUM_BANKS(ADDR_SURF_16_BANK));
2271 break;
2272 case 5:
2273 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2274 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2275 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2276 NUM_BANKS(ADDR_SURF_8_BANK));
2277 break;
2278 case 6:
2279 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2280 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2281 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2282 NUM_BANKS(ADDR_SURF_4_BANK));
2283 break;
2284 case 8:
2285 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2286 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2287 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2288 NUM_BANKS(ADDR_SURF_16_BANK));
2289 break;
2290 case 9:
2291 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2292 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2293 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2294 NUM_BANKS(ADDR_SURF_16_BANK));
2295 break;
2296 case 10:
2297 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2298 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2299 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2300 NUM_BANKS(ADDR_SURF_16_BANK));
2301 break;
2302 case 11:
2303 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2304 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2305 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2306 NUM_BANKS(ADDR_SURF_16_BANK));
2307 break;
2308 case 12:
2309 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2310 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2311 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2312 NUM_BANKS(ADDR_SURF_16_BANK));
2313 break;
2314 case 13:
2315 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2316 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2317 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2318 NUM_BANKS(ADDR_SURF_8_BANK));
2319 break;
2320 case 14:
2321 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2322 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2323 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2324 NUM_BANKS(ADDR_SURF_4_BANK));
2325 break;
2326 default:
2327 gb_tile_moden = 0;
2328 break;
2330 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2332 } else if (num_pipe_configs == 2) {
2333 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2334 switch (reg_offset) {
2335 case 0:
2336 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2337 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2338 PIPE_CONFIG(ADDR_SURF_P2) |
2339 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2340 break;
2341 case 1:
2342 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2343 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2344 PIPE_CONFIG(ADDR_SURF_P2) |
2345 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2346 break;
2347 case 2:
2348 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2349 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2350 PIPE_CONFIG(ADDR_SURF_P2) |
2351 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2352 break;
2353 case 3:
2354 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2355 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2356 PIPE_CONFIG(ADDR_SURF_P2) |
2357 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2358 break;
2359 case 4:
2360 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2361 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2362 PIPE_CONFIG(ADDR_SURF_P2) |
2363 TILE_SPLIT(split_equal_to_row_size));
2364 break;
2365 case 5:
2366 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2367 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2368 break;
2369 case 6:
2370 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2371 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2372 PIPE_CONFIG(ADDR_SURF_P2) |
2373 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2374 break;
2375 case 7:
2376 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2377 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2378 PIPE_CONFIG(ADDR_SURF_P2) |
2379 TILE_SPLIT(split_equal_to_row_size));
2380 break;
2381 case 8:
2382 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2383 break;
2384 case 9:
2385 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2386 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2387 break;
2388 case 10:
2389 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2390 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2391 PIPE_CONFIG(ADDR_SURF_P2) |
2392 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2393 break;
2394 case 11:
2395 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2396 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2397 PIPE_CONFIG(ADDR_SURF_P2) |
2398 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2399 break;
2400 case 12:
2401 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2402 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2403 PIPE_CONFIG(ADDR_SURF_P2) |
2404 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2405 break;
2406 case 13:
2407 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2408 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2409 break;
2410 case 14:
2411 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2412 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2413 PIPE_CONFIG(ADDR_SURF_P2) |
2414 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2415 break;
2416 case 16:
2417 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2418 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2419 PIPE_CONFIG(ADDR_SURF_P2) |
2420 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2421 break;
2422 case 17:
2423 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2424 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2425 PIPE_CONFIG(ADDR_SURF_P2) |
2426 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2427 break;
2428 case 27:
2429 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2430 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2431 break;
2432 case 28:
2433 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2434 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2435 PIPE_CONFIG(ADDR_SURF_P2) |
2436 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2437 break;
2438 case 29:
2439 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2440 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2441 PIPE_CONFIG(ADDR_SURF_P2) |
2442 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2443 break;
2444 case 30:
2445 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2446 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2447 PIPE_CONFIG(ADDR_SURF_P2) |
2448 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2449 break;
2450 default:
2451 gb_tile_moden = 0;
2452 break;
2454 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2455 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2457 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2458 switch (reg_offset) {
2459 case 0:
2460 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2461 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2462 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2463 NUM_BANKS(ADDR_SURF_16_BANK));
2464 break;
2465 case 1:
2466 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2467 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2468 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2469 NUM_BANKS(ADDR_SURF_16_BANK));
2470 break;
2471 case 2:
2472 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2474 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2475 NUM_BANKS(ADDR_SURF_16_BANK));
2476 break;
2477 case 3:
2478 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2480 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2481 NUM_BANKS(ADDR_SURF_16_BANK));
2482 break;
2483 case 4:
2484 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2487 NUM_BANKS(ADDR_SURF_16_BANK));
2488 break;
2489 case 5:
2490 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2492 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2493 NUM_BANKS(ADDR_SURF_16_BANK));
2494 break;
2495 case 6:
2496 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2498 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2499 NUM_BANKS(ADDR_SURF_8_BANK));
2500 break;
2501 case 8:
2502 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2503 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2504 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2505 NUM_BANKS(ADDR_SURF_16_BANK));
2506 break;
2507 case 9:
2508 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2509 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2510 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2511 NUM_BANKS(ADDR_SURF_16_BANK));
2512 break;
2513 case 10:
2514 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2515 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2516 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2517 NUM_BANKS(ADDR_SURF_16_BANK));
2518 break;
2519 case 11:
2520 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2521 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2522 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2523 NUM_BANKS(ADDR_SURF_16_BANK));
2524 break;
2525 case 12:
2526 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2528 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2529 NUM_BANKS(ADDR_SURF_16_BANK));
2530 break;
2531 case 13:
2532 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2533 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2534 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2535 NUM_BANKS(ADDR_SURF_16_BANK));
2536 break;
2537 case 14:
2538 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2540 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2541 NUM_BANKS(ADDR_SURF_8_BANK));
2542 break;
2543 default:
2544 gb_tile_moden = 0;
2545 break;
2547 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2549 } else
2550 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2554 * cik_select_se_sh - select which SE, SH to address
2556 * @rdev: radeon_device pointer
2557 * @se_num: shader engine to address
2558 * @sh_num: sh block to address
2560 * Select which SE, SH combinations to address. Certain
2561 * registers are instanced per SE or SH. 0xffffffff means
2562 * broadcast to all SEs or SHs (CIK).
2564 static void cik_select_se_sh(struct radeon_device *rdev,
2565 u32 se_num, u32 sh_num)
2567 u32 data = INSTANCE_BROADCAST_WRITES;
2569 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2570 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2571 else if (se_num == 0xffffffff)
2572 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2573 else if (sh_num == 0xffffffff)
2574 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2575 else
2576 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2577 WREG32(GRBM_GFX_INDEX, data);
2581 * cik_create_bitmask - create a bitmask
2583 * @bit_width: length of the mask
2585 * create a variable length bit mask (CIK).
2586 * Returns the bitmask.
2588 static u32 cik_create_bitmask(u32 bit_width)
2590 u32 i, mask = 0;
2592 for (i = 0; i < bit_width; i++) {
2593 mask <<= 1;
2594 mask |= 1;
2596 return mask;
2600 * cik_select_se_sh - select which SE, SH to address
2602 * @rdev: radeon_device pointer
2603 * @max_rb_num: max RBs (render backends) for the asic
2604 * @se_num: number of SEs (shader engines) for the asic
2605 * @sh_per_se: number of SH blocks per SE for the asic
2607 * Calculates the bitmask of disabled RBs (CIK).
2608 * Returns the disabled RB bitmask.
2610 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
2611 u32 max_rb_num_per_se,
2612 u32 sh_per_se)
2614 u32 data, mask;
2616 data = RREG32(CC_RB_BACKEND_DISABLE);
2617 if (data & 1)
2618 data &= BACKEND_DISABLE_MASK;
2619 else
2620 data = 0;
2621 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2623 data >>= BACKEND_DISABLE_SHIFT;
2625 mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
2627 return data & mask;
2631 * cik_setup_rb - setup the RBs on the asic
2633 * @rdev: radeon_device pointer
2634 * @se_num: number of SEs (shader engines) for the asic
2635 * @sh_per_se: number of SH blocks per SE for the asic
2636 * @max_rb_num: max RBs (render backends) for the asic
2638 * Configures per-SE/SH RB registers (CIK).
2640 static void cik_setup_rb(struct radeon_device *rdev,
2641 u32 se_num, u32 sh_per_se,
2642 u32 max_rb_num_per_se)
2644 int i, j;
2645 u32 data, mask;
2646 u32 disabled_rbs = 0;
2647 u32 enabled_rbs = 0;
2649 for (i = 0; i < se_num; i++) {
2650 for (j = 0; j < sh_per_se; j++) {
2651 cik_select_se_sh(rdev, i, j);
2652 data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
2653 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
2656 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2658 mask = 1;
2659 for (i = 0; i < max_rb_num_per_se * se_num; i++) {
2660 if (!(disabled_rbs & mask))
2661 enabled_rbs |= mask;
2662 mask <<= 1;
2665 rdev->config.cik.backend_enable_mask = enabled_rbs;
2667 for (i = 0; i < se_num; i++) {
2668 cik_select_se_sh(rdev, i, 0xffffffff);
2669 data = 0;
2670 for (j = 0; j < sh_per_se; j++) {
2671 switch (enabled_rbs & 3) {
2672 case 1:
2673 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2674 break;
2675 case 2:
2676 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2677 break;
2678 case 3:
2679 default:
2680 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2681 break;
2683 enabled_rbs >>= 2;
2685 WREG32(PA_SC_RASTER_CONFIG, data);
2687 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2691 * cik_gpu_init - setup the 3D engine
2693 * @rdev: radeon_device pointer
2695 * Configures the 3D engine and tiling configuration
2696 * registers so that the 3D engine is usable.
2698 static void cik_gpu_init(struct radeon_device *rdev)
2700 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
2701 u32 mc_shared_chmap, mc_arb_ramcfg;
2702 u32 hdp_host_path_cntl;
2703 u32 tmp;
2704 int i, j;
2706 switch (rdev->family) {
2707 case CHIP_BONAIRE:
2708 rdev->config.cik.max_shader_engines = 2;
2709 rdev->config.cik.max_tile_pipes = 4;
2710 rdev->config.cik.max_cu_per_sh = 7;
2711 rdev->config.cik.max_sh_per_se = 1;
2712 rdev->config.cik.max_backends_per_se = 2;
2713 rdev->config.cik.max_texture_channel_caches = 4;
2714 rdev->config.cik.max_gprs = 256;
2715 rdev->config.cik.max_gs_threads = 32;
2716 rdev->config.cik.max_hw_contexts = 8;
2718 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2719 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2720 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2721 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2722 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2723 break;
2724 case CHIP_KAVERI:
2725 rdev->config.cik.max_shader_engines = 1;
2726 rdev->config.cik.max_tile_pipes = 4;
2727 if ((rdev->pdev->device == 0x1304) ||
2728 (rdev->pdev->device == 0x1305) ||
2729 (rdev->pdev->device == 0x130C) ||
2730 (rdev->pdev->device == 0x130F) ||
2731 (rdev->pdev->device == 0x1310) ||
2732 (rdev->pdev->device == 0x1311) ||
2733 (rdev->pdev->device == 0x131C)) {
2734 rdev->config.cik.max_cu_per_sh = 8;
2735 rdev->config.cik.max_backends_per_se = 2;
2736 } else if ((rdev->pdev->device == 0x1309) ||
2737 (rdev->pdev->device == 0x130A) ||
2738 (rdev->pdev->device == 0x130D) ||
2739 (rdev->pdev->device == 0x1313) ||
2740 (rdev->pdev->device == 0x131D)) {
2741 rdev->config.cik.max_cu_per_sh = 6;
2742 rdev->config.cik.max_backends_per_se = 2;
2743 } else if ((rdev->pdev->device == 0x1306) ||
2744 (rdev->pdev->device == 0x1307) ||
2745 (rdev->pdev->device == 0x130B) ||
2746 (rdev->pdev->device == 0x130E) ||
2747 (rdev->pdev->device == 0x1315) ||
2748 (rdev->pdev->device == 0x1318) ||
2749 (rdev->pdev->device == 0x131B)) {
2750 rdev->config.cik.max_cu_per_sh = 4;
2751 rdev->config.cik.max_backends_per_se = 1;
2752 } else {
2753 rdev->config.cik.max_cu_per_sh = 3;
2754 rdev->config.cik.max_backends_per_se = 1;
2756 rdev->config.cik.max_sh_per_se = 1;
2757 rdev->config.cik.max_texture_channel_caches = 4;
2758 rdev->config.cik.max_gprs = 256;
2759 rdev->config.cik.max_gs_threads = 16;
2760 rdev->config.cik.max_hw_contexts = 8;
2762 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2763 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2764 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2765 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2766 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2767 break;
2768 case CHIP_KABINI:
2769 default:
2770 rdev->config.cik.max_shader_engines = 1;
2771 rdev->config.cik.max_tile_pipes = 2;
2772 rdev->config.cik.max_cu_per_sh = 2;
2773 rdev->config.cik.max_sh_per_se = 1;
2774 rdev->config.cik.max_backends_per_se = 1;
2775 rdev->config.cik.max_texture_channel_caches = 2;
2776 rdev->config.cik.max_gprs = 256;
2777 rdev->config.cik.max_gs_threads = 16;
2778 rdev->config.cik.max_hw_contexts = 8;
2780 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2781 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2782 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2783 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2784 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2785 break;
2788 /* Initialize HDP */
2789 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2790 WREG32((0x2c14 + j), 0x00000000);
2791 WREG32((0x2c18 + j), 0x00000000);
2792 WREG32((0x2c1c + j), 0x00000000);
2793 WREG32((0x2c20 + j), 0x00000000);
2794 WREG32((0x2c24 + j), 0x00000000);
2797 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2799 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2801 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2802 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2804 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
2805 rdev->config.cik.mem_max_burst_length_bytes = 256;
2806 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2807 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2808 if (rdev->config.cik.mem_row_size_in_kb > 4)
2809 rdev->config.cik.mem_row_size_in_kb = 4;
2810 /* XXX use MC settings? */
2811 rdev->config.cik.shader_engine_tile_size = 32;
2812 rdev->config.cik.num_gpus = 1;
2813 rdev->config.cik.multi_gpu_tile_size = 64;
2815 /* fix up row size */
2816 gb_addr_config &= ~ROW_SIZE_MASK;
2817 switch (rdev->config.cik.mem_row_size_in_kb) {
2818 case 1:
2819 default:
2820 gb_addr_config |= ROW_SIZE(0);
2821 break;
2822 case 2:
2823 gb_addr_config |= ROW_SIZE(1);
2824 break;
2825 case 4:
2826 gb_addr_config |= ROW_SIZE(2);
2827 break;
2830 /* setup tiling info dword. gb_addr_config is not adequate since it does
2831 * not have bank info, so create a custom tiling dword.
2832 * bits 3:0 num_pipes
2833 * bits 7:4 num_banks
2834 * bits 11:8 group_size
2835 * bits 15:12 row_size
2837 rdev->config.cik.tile_config = 0;
2838 switch (rdev->config.cik.num_tile_pipes) {
2839 case 1:
2840 rdev->config.cik.tile_config |= (0 << 0);
2841 break;
2842 case 2:
2843 rdev->config.cik.tile_config |= (1 << 0);
2844 break;
2845 case 4:
2846 rdev->config.cik.tile_config |= (2 << 0);
2847 break;
2848 case 8:
2849 default:
2850 /* XXX what about 12? */
2851 rdev->config.cik.tile_config |= (3 << 0);
2852 break;
2854 rdev->config.cik.tile_config |=
2855 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
2856 rdev->config.cik.tile_config |=
2857 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2858 rdev->config.cik.tile_config |=
2859 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2861 WREG32(GB_ADDR_CONFIG, gb_addr_config);
2862 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2863 WREG32(DMIF_ADDR_CALC, gb_addr_config);
2864 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
2865 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
2866 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2867 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2868 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2870 cik_tiling_mode_table_init(rdev);
2872 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
2873 rdev->config.cik.max_sh_per_se,
2874 rdev->config.cik.max_backends_per_se);
2876 /* set HW defaults for 3D engine */
2877 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2879 WREG32(SX_DEBUG_1, 0x20);
2881 WREG32(TA_CNTL_AUX, 0x00010000);
2883 tmp = RREG32(SPI_CONFIG_CNTL);
2884 tmp |= 0x03000000;
2885 WREG32(SPI_CONFIG_CNTL, tmp);
2887 WREG32(SQ_CONFIG, 1);
2889 WREG32(DB_DEBUG, 0);
2891 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2892 tmp |= 0x00000400;
2893 WREG32(DB_DEBUG2, tmp);
2895 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2896 tmp |= 0x00020200;
2897 WREG32(DB_DEBUG3, tmp);
2899 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2900 tmp |= 0x00018208;
2901 WREG32(CB_HW_CONTROL, tmp);
2903 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2905 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2906 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2907 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2908 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2910 WREG32(VGT_NUM_INSTANCES, 1);
2912 WREG32(CP_PERFMON_CNTL, 0);
2914 WREG32(SQ_CONFIG, 0);
2916 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2917 FORCE_EOV_MAX_REZ_CNT(255)));
2919 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2920 AUTO_INVLD_EN(ES_AND_GS_AUTO));
2922 WREG32(VGT_GS_VERTEX_REUSE, 16);
2923 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2925 tmp = RREG32(HDP_MISC_CNTL);
2926 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2927 WREG32(HDP_MISC_CNTL, tmp);
2929 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2930 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2932 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2933 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2935 udelay(50);
2939 * GPU scratch registers helpers function.
2942 * cik_scratch_init - setup driver info for CP scratch regs
2944 * @rdev: radeon_device pointer
2946 * Set up the number and offset of the CP scratch registers.
2947 * NOTE: use of CP scratch registers is a legacy inferface and
2948 * is not used by default on newer asics (r6xx+). On newer asics,
2949 * memory buffers are used for fences rather than scratch regs.
2951 static void cik_scratch_init(struct radeon_device *rdev)
2953 int i;
2955 rdev->scratch.num_reg = 7;
2956 rdev->scratch.reg_base = SCRATCH_REG0;
2957 for (i = 0; i < rdev->scratch.num_reg; i++) {
2958 rdev->scratch.free[i] = true;
2959 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2964 * cik_ring_test - basic gfx ring test
2966 * @rdev: radeon_device pointer
2967 * @ring: radeon_ring structure holding ring information
2969 * Allocate a scratch register and write to it using the gfx ring (CIK).
2970 * Provides a basic gfx ring test to verify that the ring is working.
2971 * Used by cik_cp_gfx_resume();
2972 * Returns 0 on success, error on failure.
2974 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2976 uint32_t scratch;
2977 uint32_t tmp = 0;
2978 unsigned i;
2979 int r;
2981 r = radeon_scratch_get(rdev, &scratch);
2982 if (r) {
2983 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2984 return r;
2986 WREG32(scratch, 0xCAFEDEAD);
2987 r = radeon_ring_lock(rdev, ring, 3);
2988 if (r) {
2989 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2990 radeon_scratch_free(rdev, scratch);
2991 return r;
2993 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2994 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2995 radeon_ring_write(ring, 0xDEADBEEF);
2996 radeon_ring_unlock_commit(rdev, ring);
2998 for (i = 0; i < rdev->usec_timeout; i++) {
2999 tmp = RREG32(scratch);
3000 if (tmp == 0xDEADBEEF)
3001 break;
3002 DRM_UDELAY(1);
3004 if (i < rdev->usec_timeout) {
3005 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3006 } else {
3007 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3008 ring->idx, scratch, tmp);
3009 r = -EINVAL;
3011 radeon_scratch_free(rdev, scratch);
3012 return r;
3016 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3018 * @rdev: radeon_device pointer
3019 * @fence: radeon fence object
3021 * Emits a fence sequnce number on the gfx ring and flushes
3022 * GPU caches.
3024 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3025 struct radeon_fence *fence)
3027 struct radeon_ring *ring = &rdev->ring[fence->ring];
3028 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3030 /* EVENT_WRITE_EOP - flush caches, send int */
3031 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3032 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3033 EOP_TC_ACTION_EN |
3034 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3035 EVENT_INDEX(5)));
3036 radeon_ring_write(ring, addr & 0xfffffffc);
3037 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3038 radeon_ring_write(ring, fence->seq);
3039 radeon_ring_write(ring, 0);
3040 /* HDP flush */
3041 /* We should be using the new WAIT_REG_MEM special op packet here
3042 * but it causes the CP to hang
3044 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3045 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3046 WRITE_DATA_DST_SEL(0)));
3047 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3048 radeon_ring_write(ring, 0);
3049 radeon_ring_write(ring, 0);
3053 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3055 * @rdev: radeon_device pointer
3056 * @fence: radeon fence object
3058 * Emits a fence sequnce number on the compute ring and flushes
3059 * GPU caches.
3061 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3062 struct radeon_fence *fence)
3064 struct radeon_ring *ring = &rdev->ring[fence->ring];
3065 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3067 /* RELEASE_MEM - flush caches, send int */
3068 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3069 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3070 EOP_TC_ACTION_EN |
3071 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3072 EVENT_INDEX(5)));
3073 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3074 radeon_ring_write(ring, addr & 0xfffffffc);
3075 radeon_ring_write(ring, upper_32_bits(addr));
3076 radeon_ring_write(ring, fence->seq);
3077 radeon_ring_write(ring, 0);
3078 /* HDP flush */
3079 /* We should be using the new WAIT_REG_MEM special op packet here
3080 * but it causes the CP to hang
3082 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3083 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3084 WRITE_DATA_DST_SEL(0)));
3085 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3086 radeon_ring_write(ring, 0);
3087 radeon_ring_write(ring, 0);
3090 void cik_semaphore_ring_emit(struct radeon_device *rdev,
3091 struct radeon_ring *ring,
3092 struct radeon_semaphore *semaphore,
3093 bool emit_wait)
3095 uint64_t addr = semaphore->gpu_addr;
3096 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3098 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3099 radeon_ring_write(ring, addr & 0xffffffff);
3100 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3104 * IB stuff
3107 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3109 * @rdev: radeon_device pointer
3110 * @ib: radeon indirect buffer object
3112 * Emits an DE (drawing engine) or CE (constant engine) IB
3113 * on the gfx ring. IBs are usually generated by userspace
3114 * acceleration drivers and submitted to the kernel for
3115 * sheduling on the ring. This function schedules the IB
3116 * on the gfx ring for execution by the GPU.
3118 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3120 struct radeon_ring *ring = &rdev->ring[ib->ring];
3121 u32 header, control = INDIRECT_BUFFER_VALID;
3123 if (ib->is_const_ib) {
3124 /* set switch buffer packet before const IB */
3125 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3126 radeon_ring_write(ring, 0);
3128 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3129 } else {
3130 u32 next_rptr;
3131 if (ring->rptr_save_reg) {
3132 next_rptr = ring->wptr + 3 + 4;
3133 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3134 radeon_ring_write(ring, ((ring->rptr_save_reg -
3135 PACKET3_SET_UCONFIG_REG_START) >> 2));
3136 radeon_ring_write(ring, next_rptr);
3137 } else if (rdev->wb.enabled) {
3138 next_rptr = ring->wptr + 5 + 4;
3139 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3140 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3141 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3142 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3143 radeon_ring_write(ring, next_rptr);
3146 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3149 control |= ib->length_dw |
3150 (ib->vm ? (ib->vm->id << 24) : 0);
3152 radeon_ring_write(ring, header);
3153 radeon_ring_write(ring,
3154 #ifdef __BIG_ENDIAN
3155 (2 << 0) |
3156 #endif
3157 (ib->gpu_addr & 0xFFFFFFFC));
3158 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3159 radeon_ring_write(ring, control);
3163 * cik_ib_test - basic gfx ring IB test
3165 * @rdev: radeon_device pointer
3166 * @ring: radeon_ring structure holding ring information
3168 * Allocate an IB and execute it on the gfx ring (CIK).
3169 * Provides a basic gfx ring test to verify that IBs are working.
3170 * Returns 0 on success, error on failure.
3172 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3174 struct radeon_ib ib;
3175 uint32_t scratch;
3176 uint32_t tmp = 0;
3177 unsigned i;
3178 int r;
3180 r = radeon_scratch_get(rdev, &scratch);
3181 if (r) {
3182 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3183 return r;
3185 WREG32(scratch, 0xCAFEDEAD);
3186 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3187 if (r) {
3188 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3189 radeon_scratch_free(rdev, scratch);
3190 return r;
3192 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3193 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3194 ib.ptr[2] = 0xDEADBEEF;
3195 ib.length_dw = 3;
3196 r = radeon_ib_schedule(rdev, &ib, NULL);
3197 if (r) {
3198 radeon_scratch_free(rdev, scratch);
3199 radeon_ib_free(rdev, &ib);
3200 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3201 return r;
3203 r = radeon_fence_wait(ib.fence, false);
3204 if (r) {
3205 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3206 radeon_scratch_free(rdev, scratch);
3207 radeon_ib_free(rdev, &ib);
3208 return r;
3210 for (i = 0; i < rdev->usec_timeout; i++) {
3211 tmp = RREG32(scratch);
3212 if (tmp == 0xDEADBEEF)
3213 break;
3214 DRM_UDELAY(1);
3216 if (i < rdev->usec_timeout) {
3217 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3218 } else {
3219 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3220 scratch, tmp);
3221 r = -EINVAL;
3223 radeon_scratch_free(rdev, scratch);
3224 radeon_ib_free(rdev, &ib);
3225 return r;
3229 * CP.
3230 * On CIK, gfx and compute now have independant command processors.
3232 * GFX
3233 * Gfx consists of a single ring and can process both gfx jobs and
3234 * compute jobs. The gfx CP consists of three microengines (ME):
3235 * PFP - Pre-Fetch Parser
3236 * ME - Micro Engine
3237 * CE - Constant Engine
3238 * The PFP and ME make up what is considered the Drawing Engine (DE).
3239 * The CE is an asynchronous engine used for updating buffer desciptors
3240 * used by the DE so that they can be loaded into cache in parallel
3241 * while the DE is processing state update packets.
3243 * Compute
3244 * The compute CP consists of two microengines (ME):
3245 * MEC1 - Compute MicroEngine 1
3246 * MEC2 - Compute MicroEngine 2
3247 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3248 * The queues are exposed to userspace and are programmed directly
3249 * by the compute runtime.
3252 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3254 * @rdev: radeon_device pointer
3255 * @enable: enable or disable the MEs
3257 * Halts or unhalts the gfx MEs.
3259 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3261 if (enable)
3262 WREG32(CP_ME_CNTL, 0);
3263 else {
3264 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3265 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3267 udelay(50);
3271 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3273 * @rdev: radeon_device pointer
3275 * Loads the gfx PFP, ME, and CE ucode.
3276 * Returns 0 for success, -EINVAL if the ucode is not available.
3278 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3280 const __be32 *fw_data;
3281 int i;
3283 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3284 return -EINVAL;
3286 cik_cp_gfx_enable(rdev, false);
3288 /* PFP */
3289 fw_data = (const __be32 *)rdev->pfp_fw->data;
3290 WREG32(CP_PFP_UCODE_ADDR, 0);
3291 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3292 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3293 WREG32(CP_PFP_UCODE_ADDR, 0);
3295 /* CE */
3296 fw_data = (const __be32 *)rdev->ce_fw->data;
3297 WREG32(CP_CE_UCODE_ADDR, 0);
3298 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3299 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3300 WREG32(CP_CE_UCODE_ADDR, 0);
3302 /* ME */
3303 fw_data = (const __be32 *)rdev->me_fw->data;
3304 WREG32(CP_ME_RAM_WADDR, 0);
3305 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3306 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3307 WREG32(CP_ME_RAM_WADDR, 0);
3309 WREG32(CP_PFP_UCODE_ADDR, 0);
3310 WREG32(CP_CE_UCODE_ADDR, 0);
3311 WREG32(CP_ME_RAM_WADDR, 0);
3312 WREG32(CP_ME_RAM_RADDR, 0);
3313 return 0;
3317 * cik_cp_gfx_start - start the gfx ring
3319 * @rdev: radeon_device pointer
3321 * Enables the ring and loads the clear state context and other
3322 * packets required to init the ring.
3323 * Returns 0 for success, error for failure.
3325 static int cik_cp_gfx_start(struct radeon_device *rdev)
3327 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3328 int r, i;
3330 /* init the CP */
3331 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3332 WREG32(CP_ENDIAN_SWAP, 0);
3333 WREG32(CP_DEVICE_ID, 1);
3335 cik_cp_gfx_enable(rdev, true);
3337 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3338 if (r) {
3339 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3340 return r;
3343 /* init the CE partitions. CE only used for gfx on CIK */
3344 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3345 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3346 radeon_ring_write(ring, 0xc000);
3347 radeon_ring_write(ring, 0xc000);
3349 /* setup clear context state */
3350 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3351 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3353 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3354 radeon_ring_write(ring, 0x80000000);
3355 radeon_ring_write(ring, 0x80000000);
3357 for (i = 0; i < cik_default_size; i++)
3358 radeon_ring_write(ring, cik_default_state[i]);
3360 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3361 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3363 /* set clear context state */
3364 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3365 radeon_ring_write(ring, 0);
3367 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3368 radeon_ring_write(ring, 0x00000316);
3369 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3370 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3372 radeon_ring_unlock_commit(rdev, ring);
3374 return 0;
3378 * cik_cp_gfx_fini - stop the gfx ring
3380 * @rdev: radeon_device pointer
3382 * Stop the gfx ring and tear down the driver ring
3383 * info.
3385 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3387 cik_cp_gfx_enable(rdev, false);
3388 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3392 * cik_cp_gfx_resume - setup the gfx ring buffer registers
3394 * @rdev: radeon_device pointer
3396 * Program the location and size of the gfx ring buffer
3397 * and test it to make sure it's working.
3398 * Returns 0 for success, error for failure.
3400 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3402 struct radeon_ring *ring;
3403 u32 tmp;
3404 u32 rb_bufsz;
3405 u64 rb_addr;
3406 int r;
3408 WREG32(CP_SEM_WAIT_TIMER, 0x0);
3409 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3411 /* Set the write pointer delay */
3412 WREG32(CP_RB_WPTR_DELAY, 0);
3414 /* set the RB to use vmid 0 */
3415 WREG32(CP_RB_VMID, 0);
3417 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3419 /* ring 0 - compute and gfx */
3420 /* Set ring buffer size */
3421 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3422 rb_bufsz = order_base_2(ring->ring_size / 8);
3423 tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3424 #ifdef __BIG_ENDIAN
3425 tmp |= BUF_SWAP_32BIT;
3426 #endif
3427 WREG32(CP_RB0_CNTL, tmp);
3429 /* Initialize the ring buffer's read and write pointers */
3430 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3431 ring->wptr = 0;
3432 WREG32(CP_RB0_WPTR, ring->wptr);
3434 /* set the wb address wether it's enabled or not */
3435 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3436 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3438 /* scratch register shadowing is no longer supported */
3439 WREG32(SCRATCH_UMSK, 0);
3441 if (!rdev->wb.enabled)
3442 tmp |= RB_NO_UPDATE;
3444 mdelay(1);
3445 WREG32(CP_RB0_CNTL, tmp);
3447 rb_addr = ring->gpu_addr >> 8;
3448 WREG32(CP_RB0_BASE, rb_addr);
3449 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
3451 ring->rptr = RREG32(CP_RB0_RPTR);
3453 /* start the ring */
3454 cik_cp_gfx_start(rdev);
3455 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3456 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3457 if (r) {
3458 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3459 return r;
3461 return 0;
3464 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
3465 struct radeon_ring *ring)
3467 u32 rptr;
3471 if (rdev->wb.enabled) {
3472 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
3473 } else {
3474 mutex_lock(&rdev->srbm_mutex);
3475 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3476 rptr = RREG32(CP_HQD_PQ_RPTR);
3477 cik_srbm_select(rdev, 0, 0, 0, 0);
3478 mutex_unlock(&rdev->srbm_mutex);
3481 return rptr;
3484 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
3485 struct radeon_ring *ring)
3487 u32 wptr;
3489 if (rdev->wb.enabled) {
3490 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
3491 } else {
3492 mutex_lock(&rdev->srbm_mutex);
3493 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3494 wptr = RREG32(CP_HQD_PQ_WPTR);
3495 cik_srbm_select(rdev, 0, 0, 0, 0);
3496 mutex_unlock(&rdev->srbm_mutex);
3499 return wptr;
3502 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
3503 struct radeon_ring *ring)
3505 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
3506 WDOORBELL32(ring->doorbell_offset, ring->wptr);
3510 * cik_cp_compute_enable - enable/disable the compute CP MEs
3512 * @rdev: radeon_device pointer
3513 * @enable: enable or disable the MEs
3515 * Halts or unhalts the compute MEs.
3517 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
3519 if (enable)
3520 WREG32(CP_MEC_CNTL, 0);
3521 else {
3522 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
3523 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3524 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3526 udelay(50);
3530 * cik_cp_compute_load_microcode - load the compute CP ME ucode
3532 * @rdev: radeon_device pointer
3534 * Loads the compute MEC1&2 ucode.
3535 * Returns 0 for success, -EINVAL if the ucode is not available.
3537 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
3539 const __be32 *fw_data;
3540 int i;
3542 if (!rdev->mec_fw)
3543 return -EINVAL;
3545 cik_cp_compute_enable(rdev, false);
3547 /* MEC1 */
3548 fw_data = (const __be32 *)rdev->mec_fw->data;
3549 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3550 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3551 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
3552 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3554 if (rdev->family == CHIP_KAVERI) {
3555 /* MEC2 */
3556 fw_data = (const __be32 *)rdev->mec_fw->data;
3557 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3558 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3559 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
3560 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3563 return 0;
3567 * cik_cp_compute_start - start the compute queues
3569 * @rdev: radeon_device pointer
3571 * Enable the compute queues.
3572 * Returns 0 for success, error for failure.
3574 static int cik_cp_compute_start(struct radeon_device *rdev)
3576 cik_cp_compute_enable(rdev, true);
3578 return 0;
3582 * cik_cp_compute_fini - stop the compute queues
3584 * @rdev: radeon_device pointer
3586 * Stop the compute queues and tear down the driver queue
3587 * info.
3589 static void cik_cp_compute_fini(struct radeon_device *rdev)
3591 int i, idx, r;
3593 cik_cp_compute_enable(rdev, false);
3595 for (i = 0; i < 2; i++) {
3596 if (i == 0)
3597 idx = CAYMAN_RING_TYPE_CP1_INDEX;
3598 else
3599 idx = CAYMAN_RING_TYPE_CP2_INDEX;
3601 if (rdev->ring[idx].mqd_obj) {
3602 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3603 if (unlikely(r != 0))
3604 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
3606 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
3607 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3609 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
3610 rdev->ring[idx].mqd_obj = NULL;
3615 static void cik_mec_fini(struct radeon_device *rdev)
3617 int r;
3619 if (rdev->mec.hpd_eop_obj) {
3620 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3621 if (unlikely(r != 0))
3622 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
3623 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
3624 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3626 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
3627 rdev->mec.hpd_eop_obj = NULL;
3631 #define MEC_HPD_SIZE 2048
3633 static int cik_mec_init(struct radeon_device *rdev)
3635 int r;
3636 u32 *hpd;
3639 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3640 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3642 if (rdev->family == CHIP_KAVERI)
3643 rdev->mec.num_mec = 2;
3644 else
3645 rdev->mec.num_mec = 1;
3646 rdev->mec.num_pipe = 4;
3647 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
3649 if (rdev->mec.hpd_eop_obj == NULL) {
3650 r = radeon_bo_create(rdev,
3651 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
3652 PAGE_SIZE, true,
3653 RADEON_GEM_DOMAIN_GTT, NULL,
3654 &rdev->mec.hpd_eop_obj);
3655 if (r) {
3656 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
3657 return r;
3661 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3662 if (unlikely(r != 0)) {
3663 cik_mec_fini(rdev);
3664 return r;
3666 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
3667 &rdev->mec.hpd_eop_gpu_addr);
3668 if (r) {
3669 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
3670 cik_mec_fini(rdev);
3671 return r;
3673 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
3674 if (r) {
3675 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
3676 cik_mec_fini(rdev);
3677 return r;
3680 /* clear memory. Not sure if this is required or not */
3681 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
3683 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
3684 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3686 return 0;
3689 struct hqd_registers
3691 u32 cp_mqd_base_addr;
3692 u32 cp_mqd_base_addr_hi;
3693 u32 cp_hqd_active;
3694 u32 cp_hqd_vmid;
3695 u32 cp_hqd_persistent_state;
3696 u32 cp_hqd_pipe_priority;
3697 u32 cp_hqd_queue_priority;
3698 u32 cp_hqd_quantum;
3699 u32 cp_hqd_pq_base;
3700 u32 cp_hqd_pq_base_hi;
3701 u32 cp_hqd_pq_rptr;
3702 u32 cp_hqd_pq_rptr_report_addr;
3703 u32 cp_hqd_pq_rptr_report_addr_hi;
3704 u32 cp_hqd_pq_wptr_poll_addr;
3705 u32 cp_hqd_pq_wptr_poll_addr_hi;
3706 u32 cp_hqd_pq_doorbell_control;
3707 u32 cp_hqd_pq_wptr;
3708 u32 cp_hqd_pq_control;
3709 u32 cp_hqd_ib_base_addr;
3710 u32 cp_hqd_ib_base_addr_hi;
3711 u32 cp_hqd_ib_rptr;
3712 u32 cp_hqd_ib_control;
3713 u32 cp_hqd_iq_timer;
3714 u32 cp_hqd_iq_rptr;
3715 u32 cp_hqd_dequeue_request;
3716 u32 cp_hqd_dma_offload;
3717 u32 cp_hqd_sema_cmd;
3718 u32 cp_hqd_msg_type;
3719 u32 cp_hqd_atomic0_preop_lo;
3720 u32 cp_hqd_atomic0_preop_hi;
3721 u32 cp_hqd_atomic1_preop_lo;
3722 u32 cp_hqd_atomic1_preop_hi;
3723 u32 cp_hqd_hq_scheduler0;
3724 u32 cp_hqd_hq_scheduler1;
3725 u32 cp_mqd_control;
3728 struct bonaire_mqd
3730 u32 header;
3731 u32 dispatch_initiator;
3732 u32 dimensions[3];
3733 u32 start_idx[3];
3734 u32 num_threads[3];
3735 u32 pipeline_stat_enable;
3736 u32 perf_counter_enable;
3737 u32 pgm[2];
3738 u32 tba[2];
3739 u32 tma[2];
3740 u32 pgm_rsrc[2];
3741 u32 vmid;
3742 u32 resource_limits;
3743 u32 static_thread_mgmt01[2];
3744 u32 tmp_ring_size;
3745 u32 static_thread_mgmt23[2];
3746 u32 restart[3];
3747 u32 thread_trace_enable;
3748 u32 reserved1;
3749 u32 user_data[16];
3750 u32 vgtcs_invoke_count[2];
3751 struct hqd_registers queue_state;
3752 u32 dequeue_cntr;
3753 u32 interrupt_queue[64];
3757 * cik_cp_compute_resume - setup the compute queue registers
3759 * @rdev: radeon_device pointer
3761 * Program the compute queues and test them to make sure they
3762 * are working.
3763 * Returns 0 for success, error for failure.
3765 static int cik_cp_compute_resume(struct radeon_device *rdev)
3767 int r, i, idx;
3768 u32 tmp;
3769 bool use_doorbell = true;
3770 u64 hqd_gpu_addr;
3771 u64 mqd_gpu_addr;
3772 u64 eop_gpu_addr;
3773 u64 wb_gpu_addr;
3774 u32 *buf;
3775 struct bonaire_mqd *mqd;
3777 r = cik_cp_compute_start(rdev);
3778 if (r)
3779 return r;
3781 /* fix up chicken bits */
3782 tmp = RREG32(CP_CPF_DEBUG);
3783 tmp |= (1 << 23);
3784 WREG32(CP_CPF_DEBUG, tmp);
3786 /* init the pipes */
3787 mutex_lock(&rdev->srbm_mutex);
3788 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
3789 int me = (i < 4) ? 1 : 2;
3790 int pipe = (i < 4) ? i : (i - 4);
3792 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
3794 cik_srbm_select(rdev, me, pipe, 0, 0);
3796 /* write the EOP addr */
3797 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
3798 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
3800 /* set the VMID assigned */
3801 WREG32(CP_HPD_EOP_VMID, 0);
3803 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3804 tmp = RREG32(CP_HPD_EOP_CONTROL);
3805 tmp &= ~EOP_SIZE_MASK;
3806 tmp |= order_base_2(MEC_HPD_SIZE / 8);
3807 WREG32(CP_HPD_EOP_CONTROL, tmp);
3809 cik_srbm_select(rdev, 0, 0, 0, 0);
3810 mutex_unlock(&rdev->srbm_mutex);
3812 /* init the queues. Just two for now. */
3813 for (i = 0; i < 2; i++) {
3814 if (i == 0)
3815 idx = CAYMAN_RING_TYPE_CP1_INDEX;
3816 else
3817 idx = CAYMAN_RING_TYPE_CP2_INDEX;
3819 if (rdev->ring[idx].mqd_obj == NULL) {
3820 r = radeon_bo_create(rdev,
3821 sizeof(struct bonaire_mqd),
3822 PAGE_SIZE, true,
3823 RADEON_GEM_DOMAIN_GTT, NULL,
3824 &rdev->ring[idx].mqd_obj);
3825 if (r) {
3826 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
3827 return r;
3831 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3832 if (unlikely(r != 0)) {
3833 cik_cp_compute_fini(rdev);
3834 return r;
3836 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
3837 &mqd_gpu_addr);
3838 if (r) {
3839 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
3840 cik_cp_compute_fini(rdev);
3841 return r;
3843 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
3844 if (r) {
3845 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
3846 cik_cp_compute_fini(rdev);
3847 return r;
3850 /* doorbell offset */
3851 rdev->ring[idx].doorbell_offset =
3852 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
3854 /* init the mqd struct */
3855 memset(buf, 0, sizeof(struct bonaire_mqd));
3857 mqd = (struct bonaire_mqd *)buf;
3858 mqd->header = 0xC0310800;
3859 mqd->static_thread_mgmt01[0] = 0xffffffff;
3860 mqd->static_thread_mgmt01[1] = 0xffffffff;
3861 mqd->static_thread_mgmt23[0] = 0xffffffff;
3862 mqd->static_thread_mgmt23[1] = 0xffffffff;
3864 mutex_lock(&rdev->srbm_mutex);
3865 cik_srbm_select(rdev, rdev->ring[idx].me,
3866 rdev->ring[idx].pipe,
3867 rdev->ring[idx].queue, 0);
3869 /* disable wptr polling */
3870 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
3871 tmp &= ~WPTR_POLL_EN;
3872 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
3874 /* enable doorbell? */
3875 mqd->queue_state.cp_hqd_pq_doorbell_control =
3876 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3877 if (use_doorbell)
3878 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3879 else
3880 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
3881 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3882 mqd->queue_state.cp_hqd_pq_doorbell_control);
3884 /* disable the queue if it's active */
3885 mqd->queue_state.cp_hqd_dequeue_request = 0;
3886 mqd->queue_state.cp_hqd_pq_rptr = 0;
3887 mqd->queue_state.cp_hqd_pq_wptr= 0;
3888 if (RREG32(CP_HQD_ACTIVE) & 1) {
3889 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3890 for (i = 0; i < rdev->usec_timeout; i++) {
3891 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3892 break;
3893 udelay(1);
3895 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3896 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3897 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3900 /* set the pointer to the MQD */
3901 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3902 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3903 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3904 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3905 /* set MQD vmid to 0 */
3906 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3907 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3908 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3910 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3911 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3912 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3913 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3914 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3915 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3917 /* set up the HQD, this is similar to CP_RB0_CNTL */
3918 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3919 mqd->queue_state.cp_hqd_pq_control &=
3920 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3922 mqd->queue_state.cp_hqd_pq_control |=
3923 order_base_2(rdev->ring[idx].ring_size / 8);
3924 mqd->queue_state.cp_hqd_pq_control |=
3925 (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
3926 #ifdef __BIG_ENDIAN
3927 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3928 #endif
3929 mqd->queue_state.cp_hqd_pq_control &=
3930 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3931 mqd->queue_state.cp_hqd_pq_control |=
3932 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3933 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3935 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3936 if (i == 0)
3937 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3938 else
3939 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3940 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3941 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3942 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3943 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3944 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3946 /* set the wb address wether it's enabled or not */
3947 if (i == 0)
3948 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3949 else
3950 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3951 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3952 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3953 upper_32_bits(wb_gpu_addr) & 0xffff;
3954 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3955 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3956 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3957 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3959 /* enable the doorbell if requested */
3960 if (use_doorbell) {
3961 mqd->queue_state.cp_hqd_pq_doorbell_control =
3962 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3963 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3964 mqd->queue_state.cp_hqd_pq_doorbell_control |=
3965 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3966 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3967 mqd->queue_state.cp_hqd_pq_doorbell_control &=
3968 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3970 } else {
3971 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3973 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3974 mqd->queue_state.cp_hqd_pq_doorbell_control);
3976 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3977 rdev->ring[idx].wptr = 0;
3978 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3979 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3980 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3981 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3983 /* set the vmid for the queue */
3984 mqd->queue_state.cp_hqd_vmid = 0;
3985 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3987 /* activate the queue */
3988 mqd->queue_state.cp_hqd_active = 1;
3989 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3991 cik_srbm_select(rdev, 0, 0, 0, 0);
3992 mutex_unlock(&rdev->srbm_mutex);
3994 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3995 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3997 rdev->ring[idx].ready = true;
3998 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3999 if (r)
4000 rdev->ring[idx].ready = false;
4003 return 0;
4006 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4008 cik_cp_gfx_enable(rdev, enable);
4009 cik_cp_compute_enable(rdev, enable);
4012 static int cik_cp_load_microcode(struct radeon_device *rdev)
4014 int r;
4016 r = cik_cp_gfx_load_microcode(rdev);
4017 if (r)
4018 return r;
4019 r = cik_cp_compute_load_microcode(rdev);
4020 if (r)
4021 return r;
4023 return 0;
4026 static void cik_cp_fini(struct radeon_device *rdev)
4028 cik_cp_gfx_fini(rdev);
4029 cik_cp_compute_fini(rdev);
4032 static int cik_cp_resume(struct radeon_device *rdev)
4034 int r;
4036 cik_enable_gui_idle_interrupt(rdev, false);
4038 r = cik_cp_load_microcode(rdev);
4039 if (r)
4040 return r;
4042 r = cik_cp_gfx_resume(rdev);
4043 if (r)
4044 return r;
4045 r = cik_cp_compute_resume(rdev);
4046 if (r)
4047 return r;
4049 cik_enable_gui_idle_interrupt(rdev, true);
4051 return 0;
4054 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4056 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
4057 RREG32(GRBM_STATUS));
4058 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
4059 RREG32(GRBM_STATUS2));
4060 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
4061 RREG32(GRBM_STATUS_SE0));
4062 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
4063 RREG32(GRBM_STATUS_SE1));
4064 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
4065 RREG32(GRBM_STATUS_SE2));
4066 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
4067 RREG32(GRBM_STATUS_SE3));
4068 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
4069 RREG32(SRBM_STATUS));
4070 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
4071 RREG32(SRBM_STATUS2));
4072 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
4073 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4074 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
4075 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4076 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4077 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
4078 RREG32(CP_STALLED_STAT1));
4079 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
4080 RREG32(CP_STALLED_STAT2));
4081 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
4082 RREG32(CP_STALLED_STAT3));
4083 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
4084 RREG32(CP_CPF_BUSY_STAT));
4085 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
4086 RREG32(CP_CPF_STALLED_STAT1));
4087 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4088 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4089 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
4090 RREG32(CP_CPC_STALLED_STAT1));
4091 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4095 * cik_gpu_check_soft_reset - check which blocks are busy
4097 * @rdev: radeon_device pointer
4099 * Check which blocks are busy and return the relevant reset
4100 * mask to be used by cik_gpu_soft_reset().
4101 * Returns a mask of the blocks to be reset.
4103 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4105 u32 reset_mask = 0;
4106 u32 tmp;
4108 /* GRBM_STATUS */
4109 tmp = RREG32(GRBM_STATUS);
4110 if (tmp & (PA_BUSY | SC_BUSY |
4111 BCI_BUSY | SX_BUSY |
4112 TA_BUSY | VGT_BUSY |
4113 DB_BUSY | CB_BUSY |
4114 GDS_BUSY | SPI_BUSY |
4115 IA_BUSY | IA_BUSY_NO_DMA))
4116 reset_mask |= RADEON_RESET_GFX;
4118 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4119 reset_mask |= RADEON_RESET_CP;
4121 /* GRBM_STATUS2 */
4122 tmp = RREG32(GRBM_STATUS2);
4123 if (tmp & RLC_BUSY)
4124 reset_mask |= RADEON_RESET_RLC;
4126 /* SDMA0_STATUS_REG */
4127 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4128 if (!(tmp & SDMA_IDLE))
4129 reset_mask |= RADEON_RESET_DMA;
4131 /* SDMA1_STATUS_REG */
4132 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4133 if (!(tmp & SDMA_IDLE))
4134 reset_mask |= RADEON_RESET_DMA1;
4136 /* SRBM_STATUS2 */
4137 tmp = RREG32(SRBM_STATUS2);
4138 if (tmp & SDMA_BUSY)
4139 reset_mask |= RADEON_RESET_DMA;
4141 if (tmp & SDMA1_BUSY)
4142 reset_mask |= RADEON_RESET_DMA1;
4144 /* SRBM_STATUS */
4145 tmp = RREG32(SRBM_STATUS);
4147 if (tmp & IH_BUSY)
4148 reset_mask |= RADEON_RESET_IH;
4150 if (tmp & SEM_BUSY)
4151 reset_mask |= RADEON_RESET_SEM;
4153 if (tmp & GRBM_RQ_PENDING)
4154 reset_mask |= RADEON_RESET_GRBM;
4156 if (tmp & VMC_BUSY)
4157 reset_mask |= RADEON_RESET_VMC;
4159 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4160 MCC_BUSY | MCD_BUSY))
4161 reset_mask |= RADEON_RESET_MC;
4163 if (evergreen_is_display_hung(rdev))
4164 reset_mask |= RADEON_RESET_DISPLAY;
4166 /* Skip MC reset as it's mostly likely not hung, just busy */
4167 if (reset_mask & RADEON_RESET_MC) {
4168 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4169 reset_mask &= ~RADEON_RESET_MC;
4172 return reset_mask;
4176 * cik_gpu_soft_reset - soft reset GPU
4178 * @rdev: radeon_device pointer
4179 * @reset_mask: mask of which blocks to reset
4181 * Soft reset the blocks specified in @reset_mask.
4183 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4185 struct evergreen_mc_save save;
4186 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4187 u32 tmp;
4189 if (reset_mask == 0)
4190 return;
4192 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4194 cik_print_gpu_status_regs(rdev);
4195 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4196 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4197 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4198 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4200 /* disable CG/PG */
4201 cik_fini_pg(rdev);
4202 cik_fini_cg(rdev);
4204 /* stop the rlc */
4205 cik_rlc_stop(rdev);
4207 /* Disable GFX parsing/prefetching */
4208 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4210 /* Disable MEC parsing/prefetching */
4211 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4213 if (reset_mask & RADEON_RESET_DMA) {
4214 /* sdma0 */
4215 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4216 tmp |= SDMA_HALT;
4217 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4219 if (reset_mask & RADEON_RESET_DMA1) {
4220 /* sdma1 */
4221 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4222 tmp |= SDMA_HALT;
4223 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4226 evergreen_mc_stop(rdev, &save);
4227 if (evergreen_mc_wait_for_idle(rdev)) {
4228 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4231 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4232 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4234 if (reset_mask & RADEON_RESET_CP) {
4235 grbm_soft_reset |= SOFT_RESET_CP;
4237 srbm_soft_reset |= SOFT_RESET_GRBM;
4240 if (reset_mask & RADEON_RESET_DMA)
4241 srbm_soft_reset |= SOFT_RESET_SDMA;
4243 if (reset_mask & RADEON_RESET_DMA1)
4244 srbm_soft_reset |= SOFT_RESET_SDMA1;
4246 if (reset_mask & RADEON_RESET_DISPLAY)
4247 srbm_soft_reset |= SOFT_RESET_DC;
4249 if (reset_mask & RADEON_RESET_RLC)
4250 grbm_soft_reset |= SOFT_RESET_RLC;
4252 if (reset_mask & RADEON_RESET_SEM)
4253 srbm_soft_reset |= SOFT_RESET_SEM;
4255 if (reset_mask & RADEON_RESET_IH)
4256 srbm_soft_reset |= SOFT_RESET_IH;
4258 if (reset_mask & RADEON_RESET_GRBM)
4259 srbm_soft_reset |= SOFT_RESET_GRBM;
4261 if (reset_mask & RADEON_RESET_VMC)
4262 srbm_soft_reset |= SOFT_RESET_VMC;
4264 if (!(rdev->flags & RADEON_IS_IGP)) {
4265 if (reset_mask & RADEON_RESET_MC)
4266 srbm_soft_reset |= SOFT_RESET_MC;
4269 if (grbm_soft_reset) {
4270 tmp = RREG32(GRBM_SOFT_RESET);
4271 tmp |= grbm_soft_reset;
4272 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4273 WREG32(GRBM_SOFT_RESET, tmp);
4274 tmp = RREG32(GRBM_SOFT_RESET);
4276 udelay(50);
4278 tmp &= ~grbm_soft_reset;
4279 WREG32(GRBM_SOFT_RESET, tmp);
4280 tmp = RREG32(GRBM_SOFT_RESET);
4283 if (srbm_soft_reset) {
4284 tmp = RREG32(SRBM_SOFT_RESET);
4285 tmp |= srbm_soft_reset;
4286 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4287 WREG32(SRBM_SOFT_RESET, tmp);
4288 tmp = RREG32(SRBM_SOFT_RESET);
4290 udelay(50);
4292 tmp &= ~srbm_soft_reset;
4293 WREG32(SRBM_SOFT_RESET, tmp);
4294 tmp = RREG32(SRBM_SOFT_RESET);
4297 /* Wait a little for things to settle down */
4298 udelay(50);
4300 evergreen_mc_resume(rdev, &save);
4301 udelay(50);
4303 cik_print_gpu_status_regs(rdev);
4307 * cik_asic_reset - soft reset GPU
4309 * @rdev: radeon_device pointer
4311 * Look up which blocks are hung and attempt
4312 * to reset them.
4313 * Returns 0 for success.
4315 int cik_asic_reset(struct radeon_device *rdev)
4317 u32 reset_mask;
4319 reset_mask = cik_gpu_check_soft_reset(rdev);
4321 if (reset_mask)
4322 r600_set_bios_scratch_engine_hung(rdev, true);
4324 cik_gpu_soft_reset(rdev, reset_mask);
4326 reset_mask = cik_gpu_check_soft_reset(rdev);
4328 if (!reset_mask)
4329 r600_set_bios_scratch_engine_hung(rdev, false);
4331 return 0;
4335 * cik_gfx_is_lockup - check if the 3D engine is locked up
4337 * @rdev: radeon_device pointer
4338 * @ring: radeon_ring structure holding ring information
4340 * Check if the 3D engine is locked up (CIK).
4341 * Returns true if the engine is locked, false if not.
4343 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4345 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4347 if (!(reset_mask & (RADEON_RESET_GFX |
4348 RADEON_RESET_COMPUTE |
4349 RADEON_RESET_CP))) {
4350 radeon_ring_lockup_update(ring);
4351 return false;
4353 /* force CP activities */
4354 radeon_ring_force_activity(rdev, ring);
4355 return radeon_ring_test_lockup(rdev, ring);
4358 /* MC */
4360 * cik_mc_program - program the GPU memory controller
4362 * @rdev: radeon_device pointer
4364 * Set the location of vram, gart, and AGP in the GPU's
4365 * physical address space (CIK).
4367 static void cik_mc_program(struct radeon_device *rdev)
4369 struct evergreen_mc_save save;
4370 u32 tmp;
4371 int i, j;
4373 /* Initialize HDP */
4374 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4375 WREG32((0x2c14 + j), 0x00000000);
4376 WREG32((0x2c18 + j), 0x00000000);
4377 WREG32((0x2c1c + j), 0x00000000);
4378 WREG32((0x2c20 + j), 0x00000000);
4379 WREG32((0x2c24 + j), 0x00000000);
4381 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4383 evergreen_mc_stop(rdev, &save);
4384 if (radeon_mc_wait_for_idle(rdev)) {
4385 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4387 /* Lockout access through VGA aperture*/
4388 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4389 /* Update configuration */
4390 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4391 rdev->mc.vram_start >> 12);
4392 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4393 rdev->mc.vram_end >> 12);
4394 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4395 rdev->vram_scratch.gpu_addr >> 12);
4396 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4397 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4398 WREG32(MC_VM_FB_LOCATION, tmp);
4399 /* XXX double check these! */
4400 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4401 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4402 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4403 WREG32(MC_VM_AGP_BASE, 0);
4404 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4405 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4406 if (radeon_mc_wait_for_idle(rdev)) {
4407 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4409 evergreen_mc_resume(rdev, &save);
4410 /* we need to own VRAM, so turn off the VGA renderer here
4411 * to stop it overwriting our objects */
4412 rv515_vga_render_disable(rdev);
4416 * cik_mc_init - initialize the memory controller driver params
4418 * @rdev: radeon_device pointer
4420 * Look up the amount of vram, vram width, and decide how to place
4421 * vram and gart within the GPU's physical address space (CIK).
4422 * Returns 0 for success.
4424 static int cik_mc_init(struct radeon_device *rdev)
4426 u32 tmp;
4427 int chansize, numchan;
4429 /* Get VRAM informations */
4430 rdev->mc.vram_is_ddr = true;
4431 tmp = RREG32(MC_ARB_RAMCFG);
4432 if (tmp & CHANSIZE_MASK) {
4433 chansize = 64;
4434 } else {
4435 chansize = 32;
4437 tmp = RREG32(MC_SHARED_CHMAP);
4438 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4439 case 0:
4440 default:
4441 numchan = 1;
4442 break;
4443 case 1:
4444 numchan = 2;
4445 break;
4446 case 2:
4447 numchan = 4;
4448 break;
4449 case 3:
4450 numchan = 8;
4451 break;
4452 case 4:
4453 numchan = 3;
4454 break;
4455 case 5:
4456 numchan = 6;
4457 break;
4458 case 6:
4459 numchan = 10;
4460 break;
4461 case 7:
4462 numchan = 12;
4463 break;
4464 case 8:
4465 numchan = 16;
4466 break;
4468 rdev->mc.vram_width = numchan * chansize;
4469 /* Could aper size report 0 ? */
4470 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4471 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4472 /* size in MB on si */
4473 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
4474 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
4475 rdev->mc.visible_vram_size = rdev->mc.aper_size;
4476 si_vram_gtt_location(rdev, &rdev->mc);
4477 radeon_update_bandwidth_info(rdev);
4479 return 0;
4483 * GART
4484 * VMID 0 is the physical GPU addresses as used by the kernel.
4485 * VMIDs 1-15 are used for userspace clients and are handled
4486 * by the radeon vm/hsa code.
4489 * cik_pcie_gart_tlb_flush - gart tlb flush callback
4491 * @rdev: radeon_device pointer
4493 * Flush the TLB for the VMID 0 page table (CIK).
4495 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4497 /* flush hdp cache */
4498 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4500 /* bits 0-15 are the VM contexts0-15 */
4501 WREG32(VM_INVALIDATE_REQUEST, 0x1);
4505 * cik_pcie_gart_enable - gart enable
4507 * @rdev: radeon_device pointer
4509 * This sets up the TLBs, programs the page tables for VMID0,
4510 * sets up the hw for VMIDs 1-15 which are allocated on
4511 * demand, and sets up the global locations for the LDS, GDS,
4512 * and GPUVM for FSA64 clients (CIK).
4513 * Returns 0 for success, errors for failure.
4515 static int cik_pcie_gart_enable(struct radeon_device *rdev)
4517 int r, i;
4519 if (rdev->gart.robj == NULL) {
4520 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4521 return -EINVAL;
4523 r = radeon_gart_table_vram_pin(rdev);
4524 if (r)
4525 return r;
4526 radeon_gart_restore(rdev);
4527 /* Setup TLB control */
4528 WREG32(MC_VM_MX_L1_TLB_CNTL,
4529 (0xA << 7) |
4530 ENABLE_L1_TLB |
4531 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4532 ENABLE_ADVANCED_DRIVER_MODEL |
4533 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4534 /* Setup L2 cache */
4535 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4536 ENABLE_L2_FRAGMENT_PROCESSING |
4537 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4538 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4539 EFFECTIVE_L2_QUEUE_SIZE(7) |
4540 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4541 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4542 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4543 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4544 /* setup context0 */
4545 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4546 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4547 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4548 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4549 (u32)(rdev->dummy_page.addr >> 12));
4550 WREG32(VM_CONTEXT0_CNTL2, 0);
4551 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4552 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4554 WREG32(0x15D4, 0);
4555 WREG32(0x15D8, 0);
4556 WREG32(0x15DC, 0);
4558 /* empty context1-15 */
4559 /* FIXME start with 4G, once using 2 level pt switch to full
4560 * vm size space
4562 /* set vm size, must be a multiple of 4 */
4563 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4564 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4565 for (i = 1; i < 16; i++) {
4566 if (i < 8)
4567 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4568 rdev->gart.table_addr >> 12);
4569 else
4570 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4571 rdev->gart.table_addr >> 12);
4574 /* enable context1-15 */
4575 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4576 (u32)(rdev->dummy_page.addr >> 12));
4577 WREG32(VM_CONTEXT1_CNTL2, 4);
4578 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4579 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4580 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4581 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4582 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4583 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4584 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4585 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4586 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4587 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4588 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4589 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4590 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4592 /* TC cache setup ??? */
4593 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4594 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4595 WREG32(TC_CFG_L1_STORE_POLICY, 0);
4597 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4598 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4599 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4600 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4601 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4603 WREG32(TC_CFG_L1_VOLATILE, 0);
4604 WREG32(TC_CFG_L2_VOLATILE, 0);
4606 if (rdev->family == CHIP_KAVERI) {
4607 u32 tmp = RREG32(CHUB_CONTROL);
4608 tmp &= ~BYPASS_VM;
4609 WREG32(CHUB_CONTROL, tmp);
4612 /* XXX SH_MEM regs */
4613 /* where to put LDS, scratch, GPUVM in FSA64 space */
4614 mutex_lock(&rdev->srbm_mutex);
4615 for (i = 0; i < 16; i++) {
4616 cik_srbm_select(rdev, 0, 0, 0, i);
4617 /* CP and shaders */
4618 WREG32(SH_MEM_CONFIG, 0);
4619 WREG32(SH_MEM_APE1_BASE, 1);
4620 WREG32(SH_MEM_APE1_LIMIT, 0);
4621 WREG32(SH_MEM_BASES, 0);
4622 /* SDMA GFX */
4623 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4624 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4625 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4626 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4627 /* XXX SDMA RLC - todo */
4629 cik_srbm_select(rdev, 0, 0, 0, 0);
4630 mutex_unlock(&rdev->srbm_mutex);
4632 cik_pcie_gart_tlb_flush(rdev);
4633 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4634 (unsigned)(rdev->mc.gtt_size >> 20),
4635 (unsigned long long)rdev->gart.table_addr);
4636 rdev->gart.ready = true;
4637 return 0;
4641 * cik_pcie_gart_disable - gart disable
4643 * @rdev: radeon_device pointer
4645 * This disables all VM page table (CIK).
4647 static void cik_pcie_gart_disable(struct radeon_device *rdev)
4649 /* Disable all tables */
4650 WREG32(VM_CONTEXT0_CNTL, 0);
4651 WREG32(VM_CONTEXT1_CNTL, 0);
4652 /* Setup TLB control */
4653 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4654 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4655 /* Setup L2 cache */
4656 WREG32(VM_L2_CNTL,
4657 ENABLE_L2_FRAGMENT_PROCESSING |
4658 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4659 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4660 EFFECTIVE_L2_QUEUE_SIZE(7) |
4661 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4662 WREG32(VM_L2_CNTL2, 0);
4663 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4664 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4665 radeon_gart_table_vram_unpin(rdev);
4669 * cik_pcie_gart_fini - vm fini callback
4671 * @rdev: radeon_device pointer
4673 * Tears down the driver GART/VM setup (CIK).
4675 static void cik_pcie_gart_fini(struct radeon_device *rdev)
4677 cik_pcie_gart_disable(rdev);
4678 radeon_gart_table_vram_free(rdev);
4679 radeon_gart_fini(rdev);
4682 /* vm parser */
4684 * cik_ib_parse - vm ib_parse callback
4686 * @rdev: radeon_device pointer
4687 * @ib: indirect buffer pointer
4689 * CIK uses hw IB checking so this is a nop (CIK).
4691 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4693 return 0;
4697 * vm
4698 * VMID 0 is the physical GPU addresses as used by the kernel.
4699 * VMIDs 1-15 are used for userspace clients and are handled
4700 * by the radeon vm/hsa code.
4703 * cik_vm_init - cik vm init callback
4705 * @rdev: radeon_device pointer
4707 * Inits cik specific vm parameters (number of VMs, base of vram for
4708 * VMIDs 1-15) (CIK).
4709 * Returns 0 for success.
4711 int cik_vm_init(struct radeon_device *rdev)
4713 /* number of VMs */
4714 rdev->vm_manager.nvm = 16;
4715 /* base offset of vram pages */
4716 if (rdev->flags & RADEON_IS_IGP) {
4717 u64 tmp = RREG32(MC_VM_FB_OFFSET);
4718 tmp <<= 22;
4719 rdev->vm_manager.vram_base_offset = tmp;
4720 } else
4721 rdev->vm_manager.vram_base_offset = 0;
4723 return 0;
4727 * cik_vm_fini - cik vm fini callback
4729 * @rdev: radeon_device pointer
4731 * Tear down any asic specific VM setup (CIK).
4733 void cik_vm_fini(struct radeon_device *rdev)
4738 * cik_vm_decode_fault - print human readable fault info
4740 * @rdev: radeon_device pointer
4741 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4742 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4744 * Print human readable fault information (CIK).
4746 static void cik_vm_decode_fault(struct radeon_device *rdev,
4747 u32 status, u32 addr, u32 mc_client)
4749 u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4750 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4751 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4752 char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
4753 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
4755 printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
4756 protections, vmid, addr,
4757 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4758 block, mc_client, mc_id);
4762 * cik_vm_flush - cik vm flush using the CP
4764 * @rdev: radeon_device pointer
4766 * Update the page table base and flush the VM TLB
4767 * using the CP (CIK).
4769 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4771 struct radeon_ring *ring = &rdev->ring[ridx];
4773 if (vm == NULL)
4774 return;
4776 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4777 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4778 WRITE_DATA_DST_SEL(0)));
4779 if (vm->id < 8) {
4780 radeon_ring_write(ring,
4781 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4782 } else {
4783 radeon_ring_write(ring,
4784 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4786 radeon_ring_write(ring, 0);
4787 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4789 /* update SH_MEM_* regs */
4790 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4791 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4792 WRITE_DATA_DST_SEL(0)));
4793 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4794 radeon_ring_write(ring, 0);
4795 radeon_ring_write(ring, VMID(vm->id));
4797 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4798 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4799 WRITE_DATA_DST_SEL(0)));
4800 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4801 radeon_ring_write(ring, 0);
4803 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4804 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4805 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4806 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4808 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4809 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4810 WRITE_DATA_DST_SEL(0)));
4811 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4812 radeon_ring_write(ring, 0);
4813 radeon_ring_write(ring, VMID(0));
4815 /* HDP flush */
4816 /* We should be using the WAIT_REG_MEM packet here like in
4817 * cik_fence_ring_emit(), but it causes the CP to hang in this
4818 * context...
4820 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4821 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4822 WRITE_DATA_DST_SEL(0)));
4823 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4824 radeon_ring_write(ring, 0);
4825 radeon_ring_write(ring, 0);
4827 /* bits 0-15 are the VM contexts0-15 */
4828 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4829 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4830 WRITE_DATA_DST_SEL(0)));
4831 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4832 radeon_ring_write(ring, 0);
4833 radeon_ring_write(ring, 1 << vm->id);
4835 /* compute doesn't have PFP */
4836 if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4837 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4838 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4839 radeon_ring_write(ring, 0x0);
4844 * cik_vm_set_page - update the page tables using sDMA
4846 * @rdev: radeon_device pointer
4847 * @ib: indirect buffer to fill with commands
4848 * @pe: addr of the page entry
4849 * @addr: dst addr to write into pe
4850 * @count: number of page entries to update
4851 * @incr: increase next addr by incr bytes
4852 * @flags: access flags
4854 * Update the page tables using CP or sDMA (CIK).
4856 void cik_vm_set_page(struct radeon_device *rdev,
4857 struct radeon_ib *ib,
4858 uint64_t pe,
4859 uint64_t addr, unsigned count,
4860 uint32_t incr, uint32_t flags)
4862 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4863 uint64_t value;
4864 unsigned ndw;
4866 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4867 /* CP */
4868 while (count) {
4869 ndw = 2 + count * 2;
4870 if (ndw > 0x3FFE)
4871 ndw = 0x3FFE;
4873 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4874 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4875 WRITE_DATA_DST_SEL(1));
4876 ib->ptr[ib->length_dw++] = pe;
4877 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4878 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4879 if (flags & RADEON_VM_PAGE_SYSTEM) {
4880 value = radeon_vm_map_gart(rdev, addr);
4881 value &= 0xFFFFFFFFFFFFF000ULL;
4882 } else if (flags & RADEON_VM_PAGE_VALID) {
4883 value = addr;
4884 } else {
4885 value = 0;
4887 addr += incr;
4888 value |= r600_flags;
4889 ib->ptr[ib->length_dw++] = value;
4890 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4893 } else {
4894 /* DMA */
4895 cik_sdma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
4900 * RLC
4901 * The RLC is a multi-purpose microengine that handles a
4902 * variety of functions, the most important of which is
4903 * the interrupt controller.
4905 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
4906 bool enable)
4908 u32 tmp = RREG32(CP_INT_CNTL_RING0);
4910 if (enable)
4911 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4912 else
4913 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4914 WREG32(CP_INT_CNTL_RING0, tmp);
4917 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
4919 u32 tmp;
4921 tmp = RREG32(RLC_LB_CNTL);
4922 if (enable)
4923 tmp |= LOAD_BALANCE_ENABLE;
4924 else
4925 tmp &= ~LOAD_BALANCE_ENABLE;
4926 WREG32(RLC_LB_CNTL, tmp);
4929 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
4931 u32 i, j, k;
4932 u32 mask;
4934 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4935 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4936 cik_select_se_sh(rdev, i, j);
4937 for (k = 0; k < rdev->usec_timeout; k++) {
4938 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4939 break;
4940 udelay(1);
4944 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4946 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4947 for (k = 0; k < rdev->usec_timeout; k++) {
4948 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4949 break;
4950 udelay(1);
4954 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
4956 u32 tmp;
4958 tmp = RREG32(RLC_CNTL);
4959 if (tmp != rlc)
4960 WREG32(RLC_CNTL, rlc);
4963 static u32 cik_halt_rlc(struct radeon_device *rdev)
4965 u32 data, orig;
4967 orig = data = RREG32(RLC_CNTL);
4969 if (data & RLC_ENABLE) {
4970 u32 i;
4972 data &= ~RLC_ENABLE;
4973 WREG32(RLC_CNTL, data);
4975 for (i = 0; i < rdev->usec_timeout; i++) {
4976 if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
4977 break;
4978 udelay(1);
4981 cik_wait_for_rlc_serdes(rdev);
4984 return orig;
4987 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
4989 u32 tmp, i, mask;
4991 tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
4992 WREG32(RLC_GPR_REG2, tmp);
4994 mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
4995 for (i = 0; i < rdev->usec_timeout; i++) {
4996 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
4997 break;
4998 udelay(1);
5001 for (i = 0; i < rdev->usec_timeout; i++) {
5002 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5003 break;
5004 udelay(1);
5008 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5010 u32 tmp;
5012 tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5013 WREG32(RLC_GPR_REG2, tmp);
5017 * cik_rlc_stop - stop the RLC ME
5019 * @rdev: radeon_device pointer
5021 * Halt the RLC ME (MicroEngine) (CIK).
5023 static void cik_rlc_stop(struct radeon_device *rdev)
5025 WREG32(RLC_CNTL, 0);
5027 cik_enable_gui_idle_interrupt(rdev, false);
5029 cik_wait_for_rlc_serdes(rdev);
5033 * cik_rlc_start - start the RLC ME
5035 * @rdev: radeon_device pointer
5037 * Unhalt the RLC ME (MicroEngine) (CIK).
5039 static void cik_rlc_start(struct radeon_device *rdev)
5041 WREG32(RLC_CNTL, RLC_ENABLE);
5043 cik_enable_gui_idle_interrupt(rdev, true);
5045 udelay(50);
5049 * cik_rlc_resume - setup the RLC hw
5051 * @rdev: radeon_device pointer
5053 * Initialize the RLC registers, load the ucode,
5054 * and start the RLC (CIK).
5055 * Returns 0 for success, -EINVAL if the ucode is not available.
5057 static int cik_rlc_resume(struct radeon_device *rdev)
5059 u32 i, size, tmp;
5060 const __be32 *fw_data;
5062 if (!rdev->rlc_fw)
5063 return -EINVAL;
5065 switch (rdev->family) {
5066 case CHIP_BONAIRE:
5067 default:
5068 size = BONAIRE_RLC_UCODE_SIZE;
5069 break;
5070 case CHIP_KAVERI:
5071 size = KV_RLC_UCODE_SIZE;
5072 break;
5073 case CHIP_KABINI:
5074 size = KB_RLC_UCODE_SIZE;
5075 break;
5078 cik_rlc_stop(rdev);
5080 /* disable CG */
5081 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5082 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5084 si_rlc_reset(rdev);
5086 cik_init_pg(rdev);
5088 cik_init_cg(rdev);
5090 WREG32(RLC_LB_CNTR_INIT, 0);
5091 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5093 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5094 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5095 WREG32(RLC_LB_PARAMS, 0x00600408);
5096 WREG32(RLC_LB_CNTL, 0x80000004);
5098 WREG32(RLC_MC_CNTL, 0);
5099 WREG32(RLC_UCODE_CNTL, 0);
5101 fw_data = (const __be32 *)rdev->rlc_fw->data;
5102 WREG32(RLC_GPM_UCODE_ADDR, 0);
5103 for (i = 0; i < size; i++)
5104 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5105 WREG32(RLC_GPM_UCODE_ADDR, 0);
5107 /* XXX - find out what chips support lbpw */
5108 cik_enable_lbpw(rdev, false);
5110 if (rdev->family == CHIP_BONAIRE)
5111 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5113 cik_rlc_start(rdev);
5115 return 0;
5118 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5120 u32 data, orig, tmp, tmp2;
5122 orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5124 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5125 cik_enable_gui_idle_interrupt(rdev, true);
5127 tmp = cik_halt_rlc(rdev);
5129 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5130 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5131 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5132 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5133 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5135 cik_update_rlc(rdev, tmp);
5137 data |= CGCG_EN | CGLS_EN;
5138 } else {
5139 cik_enable_gui_idle_interrupt(rdev, false);
5141 RREG32(CB_CGTT_SCLK_CTRL);
5142 RREG32(CB_CGTT_SCLK_CTRL);
5143 RREG32(CB_CGTT_SCLK_CTRL);
5144 RREG32(CB_CGTT_SCLK_CTRL);
5146 data &= ~(CGCG_EN | CGLS_EN);
5149 if (orig != data)
5150 WREG32(RLC_CGCG_CGLS_CTRL, data);
5154 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5156 u32 data, orig, tmp = 0;
5158 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5159 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5160 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5161 orig = data = RREG32(CP_MEM_SLP_CNTL);
5162 data |= CP_MEM_LS_EN;
5163 if (orig != data)
5164 WREG32(CP_MEM_SLP_CNTL, data);
5168 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5169 data &= 0xfffffffd;
5170 if (orig != data)
5171 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5173 tmp = cik_halt_rlc(rdev);
5175 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5176 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5177 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5178 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5179 WREG32(RLC_SERDES_WR_CTRL, data);
5181 cik_update_rlc(rdev, tmp);
5183 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5184 orig = data = RREG32(CGTS_SM_CTRL_REG);
5185 data &= ~SM_MODE_MASK;
5186 data |= SM_MODE(0x2);
5187 data |= SM_MODE_ENABLE;
5188 data &= ~CGTS_OVERRIDE;
5189 if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5190 (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5191 data &= ~CGTS_LS_OVERRIDE;
5192 data &= ~ON_MONITOR_ADD_MASK;
5193 data |= ON_MONITOR_ADD_EN;
5194 data |= ON_MONITOR_ADD(0x96);
5195 if (orig != data)
5196 WREG32(CGTS_SM_CTRL_REG, data);
5198 } else {
5199 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5200 data |= 0x00000002;
5201 if (orig != data)
5202 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5204 data = RREG32(RLC_MEM_SLP_CNTL);
5205 if (data & RLC_MEM_LS_EN) {
5206 data &= ~RLC_MEM_LS_EN;
5207 WREG32(RLC_MEM_SLP_CNTL, data);
5210 data = RREG32(CP_MEM_SLP_CNTL);
5211 if (data & CP_MEM_LS_EN) {
5212 data &= ~CP_MEM_LS_EN;
5213 WREG32(CP_MEM_SLP_CNTL, data);
5216 orig = data = RREG32(CGTS_SM_CTRL_REG);
5217 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5218 if (orig != data)
5219 WREG32(CGTS_SM_CTRL_REG, data);
5221 tmp = cik_halt_rlc(rdev);
5223 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5224 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5225 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5226 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5227 WREG32(RLC_SERDES_WR_CTRL, data);
5229 cik_update_rlc(rdev, tmp);
5233 static const u32 mc_cg_registers[] =
5235 MC_HUB_MISC_HUB_CG,
5236 MC_HUB_MISC_SIP_CG,
5237 MC_HUB_MISC_VM_CG,
5238 MC_XPB_CLK_GAT,
5239 ATC_MISC_CG,
5240 MC_CITF_MISC_WR_CG,
5241 MC_CITF_MISC_RD_CG,
5242 MC_CITF_MISC_VM_CG,
5243 VM_L2_CG,
5246 static void cik_enable_mc_ls(struct radeon_device *rdev,
5247 bool enable)
5249 int i;
5250 u32 orig, data;
5252 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5253 orig = data = RREG32(mc_cg_registers[i]);
5254 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5255 data |= MC_LS_ENABLE;
5256 else
5257 data &= ~MC_LS_ENABLE;
5258 if (data != orig)
5259 WREG32(mc_cg_registers[i], data);
5263 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5264 bool enable)
5266 int i;
5267 u32 orig, data;
5269 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5270 orig = data = RREG32(mc_cg_registers[i]);
5271 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5272 data |= MC_CG_ENABLE;
5273 else
5274 data &= ~MC_CG_ENABLE;
5275 if (data != orig)
5276 WREG32(mc_cg_registers[i], data);
5280 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5281 bool enable)
5283 u32 orig, data;
5285 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5286 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5287 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5288 } else {
5289 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5290 data |= 0xff000000;
5291 if (data != orig)
5292 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5294 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5295 data |= 0xff000000;
5296 if (data != orig)
5297 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5301 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5302 bool enable)
5304 u32 orig, data;
5306 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
5307 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5308 data |= 0x100;
5309 if (orig != data)
5310 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5312 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5313 data |= 0x100;
5314 if (orig != data)
5315 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5316 } else {
5317 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5318 data &= ~0x100;
5319 if (orig != data)
5320 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5322 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5323 data &= ~0x100;
5324 if (orig != data)
5325 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5329 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5330 bool enable)
5332 u32 orig, data;
5334 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5335 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5336 data = 0xfff;
5337 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5339 orig = data = RREG32(UVD_CGC_CTRL);
5340 data |= DCM;
5341 if (orig != data)
5342 WREG32(UVD_CGC_CTRL, data);
5343 } else {
5344 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5345 data &= ~0xfff;
5346 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5348 orig = data = RREG32(UVD_CGC_CTRL);
5349 data &= ~DCM;
5350 if (orig != data)
5351 WREG32(UVD_CGC_CTRL, data);
5355 static void cik_enable_bif_mgls(struct radeon_device *rdev,
5356 bool enable)
5358 u32 orig, data;
5360 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
5362 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5363 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5364 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5365 else
5366 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5367 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5369 if (orig != data)
5370 WREG32_PCIE_PORT(PCIE_CNTL2, data);
5373 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
5374 bool enable)
5376 u32 orig, data;
5378 orig = data = RREG32(HDP_HOST_PATH_CNTL);
5380 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5381 data &= ~CLOCK_GATING_DIS;
5382 else
5383 data |= CLOCK_GATING_DIS;
5385 if (orig != data)
5386 WREG32(HDP_HOST_PATH_CNTL, data);
5389 static void cik_enable_hdp_ls(struct radeon_device *rdev,
5390 bool enable)
5392 u32 orig, data;
5394 orig = data = RREG32(HDP_MEM_POWER_LS);
5396 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5397 data |= HDP_LS_ENABLE;
5398 else
5399 data &= ~HDP_LS_ENABLE;
5401 if (orig != data)
5402 WREG32(HDP_MEM_POWER_LS, data);
5405 void cik_update_cg(struct radeon_device *rdev,
5406 u32 block, bool enable)
5409 if (block & RADEON_CG_BLOCK_GFX) {
5410 cik_enable_gui_idle_interrupt(rdev, false);
5411 /* order matters! */
5412 if (enable) {
5413 cik_enable_mgcg(rdev, true);
5414 cik_enable_cgcg(rdev, true);
5415 } else {
5416 cik_enable_cgcg(rdev, false);
5417 cik_enable_mgcg(rdev, false);
5419 cik_enable_gui_idle_interrupt(rdev, true);
5422 if (block & RADEON_CG_BLOCK_MC) {
5423 if (!(rdev->flags & RADEON_IS_IGP)) {
5424 cik_enable_mc_mgcg(rdev, enable);
5425 cik_enable_mc_ls(rdev, enable);
5429 if (block & RADEON_CG_BLOCK_SDMA) {
5430 cik_enable_sdma_mgcg(rdev, enable);
5431 cik_enable_sdma_mgls(rdev, enable);
5434 if (block & RADEON_CG_BLOCK_BIF) {
5435 cik_enable_bif_mgls(rdev, enable);
5438 if (block & RADEON_CG_BLOCK_UVD) {
5439 if (rdev->has_uvd)
5440 cik_enable_uvd_mgcg(rdev, enable);
5443 if (block & RADEON_CG_BLOCK_HDP) {
5444 cik_enable_hdp_mgcg(rdev, enable);
5445 cik_enable_hdp_ls(rdev, enable);
5449 static void cik_init_cg(struct radeon_device *rdev)
5452 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
5454 if (rdev->has_uvd)
5455 si_init_uvd_internal_cg(rdev);
5457 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5458 RADEON_CG_BLOCK_SDMA |
5459 RADEON_CG_BLOCK_BIF |
5460 RADEON_CG_BLOCK_UVD |
5461 RADEON_CG_BLOCK_HDP), true);
5464 static void cik_fini_cg(struct radeon_device *rdev)
5466 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5467 RADEON_CG_BLOCK_SDMA |
5468 RADEON_CG_BLOCK_BIF |
5469 RADEON_CG_BLOCK_UVD |
5470 RADEON_CG_BLOCK_HDP), false);
5472 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
5475 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
5476 bool enable)
5478 u32 data, orig;
5480 orig = data = RREG32(RLC_PG_CNTL);
5481 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5482 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5483 else
5484 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5485 if (orig != data)
5486 WREG32(RLC_PG_CNTL, data);
5489 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
5490 bool enable)
5492 u32 data, orig;
5494 orig = data = RREG32(RLC_PG_CNTL);
5495 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5496 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5497 else
5498 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5499 if (orig != data)
5500 WREG32(RLC_PG_CNTL, data);
5503 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
5505 u32 data, orig;
5507 orig = data = RREG32(RLC_PG_CNTL);
5508 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
5509 data &= ~DISABLE_CP_PG;
5510 else
5511 data |= DISABLE_CP_PG;
5512 if (orig != data)
5513 WREG32(RLC_PG_CNTL, data);
5516 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
5518 u32 data, orig;
5520 orig = data = RREG32(RLC_PG_CNTL);
5521 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
5522 data &= ~DISABLE_GDS_PG;
5523 else
5524 data |= DISABLE_GDS_PG;
5525 if (orig != data)
5526 WREG32(RLC_PG_CNTL, data);
5529 #define CP_ME_TABLE_SIZE 96
5530 #define CP_ME_TABLE_OFFSET 2048
5531 #define CP_MEC_TABLE_OFFSET 4096
5533 void cik_init_cp_pg_table(struct radeon_device *rdev)
5535 const __be32 *fw_data;
5536 volatile u32 *dst_ptr;
5537 int me, i, max_me = 4;
5538 u32 bo_offset = 0;
5539 u32 table_offset;
5541 if (rdev->family == CHIP_KAVERI)
5542 max_me = 5;
5544 if (rdev->rlc.cp_table_ptr == NULL)
5545 return;
5547 /* write the cp table buffer */
5548 dst_ptr = rdev->rlc.cp_table_ptr;
5549 for (me = 0; me < max_me; me++) {
5550 if (me == 0) {
5551 fw_data = (const __be32 *)rdev->ce_fw->data;
5552 table_offset = CP_ME_TABLE_OFFSET;
5553 } else if (me == 1) {
5554 fw_data = (const __be32 *)rdev->pfp_fw->data;
5555 table_offset = CP_ME_TABLE_OFFSET;
5556 } else if (me == 2) {
5557 fw_data = (const __be32 *)rdev->me_fw->data;
5558 table_offset = CP_ME_TABLE_OFFSET;
5559 } else {
5560 fw_data = (const __be32 *)rdev->mec_fw->data;
5561 table_offset = CP_MEC_TABLE_OFFSET;
5564 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
5565 dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]);
5567 bo_offset += CP_ME_TABLE_SIZE;
5571 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
5572 bool enable)
5574 u32 data, orig;
5576 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5577 orig = data = RREG32(RLC_PG_CNTL);
5578 data |= GFX_PG_ENABLE;
5579 if (orig != data)
5580 WREG32(RLC_PG_CNTL, data);
5582 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5583 data |= AUTO_PG_EN;
5584 if (orig != data)
5585 WREG32(RLC_AUTO_PG_CTRL, data);
5586 } else {
5587 orig = data = RREG32(RLC_PG_CNTL);
5588 data &= ~GFX_PG_ENABLE;
5589 if (orig != data)
5590 WREG32(RLC_PG_CNTL, data);
5592 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5593 data &= ~AUTO_PG_EN;
5594 if (orig != data)
5595 WREG32(RLC_AUTO_PG_CTRL, data);
5597 data = RREG32(DB_RENDER_CONTROL);
5601 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5603 u32 mask = 0, tmp, tmp1;
5604 int i;
5606 cik_select_se_sh(rdev, se, sh);
5607 tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5608 tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5609 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5611 tmp &= 0xffff0000;
5613 tmp |= tmp1;
5614 tmp >>= 16;
5616 for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
5617 mask <<= 1;
5618 mask |= 1;
5621 return (~tmp) & mask;
5624 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
5626 u32 i, j, k, active_cu_number = 0;
5627 u32 mask, counter, cu_bitmap;
5628 u32 tmp = 0;
5630 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5631 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5632 mask = 1;
5633 cu_bitmap = 0;
5634 counter = 0;
5635 for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
5636 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
5637 if (counter < 2)
5638 cu_bitmap |= mask;
5639 counter ++;
5641 mask <<= 1;
5644 active_cu_number += counter;
5645 tmp |= (cu_bitmap << (i * 16 + j * 8));
5649 WREG32(RLC_PG_AO_CU_MASK, tmp);
5651 tmp = RREG32(RLC_MAX_PG_CU);
5652 tmp &= ~MAX_PU_CU_MASK;
5653 tmp |= MAX_PU_CU(active_cu_number);
5654 WREG32(RLC_MAX_PG_CU, tmp);
5657 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
5658 bool enable)
5660 u32 data, orig;
5662 orig = data = RREG32(RLC_PG_CNTL);
5663 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
5664 data |= STATIC_PER_CU_PG_ENABLE;
5665 else
5666 data &= ~STATIC_PER_CU_PG_ENABLE;
5667 if (orig != data)
5668 WREG32(RLC_PG_CNTL, data);
5671 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
5672 bool enable)
5674 u32 data, orig;
5676 orig = data = RREG32(RLC_PG_CNTL);
5677 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
5678 data |= DYN_PER_CU_PG_ENABLE;
5679 else
5680 data &= ~DYN_PER_CU_PG_ENABLE;
5681 if (orig != data)
5682 WREG32(RLC_PG_CNTL, data);
5685 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
5686 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
5688 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
5690 u32 data, orig;
5691 u32 i;
5693 if (rdev->rlc.cs_data) {
5694 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5695 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
5696 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
5697 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
5698 } else {
5699 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5700 for (i = 0; i < 3; i++)
5701 WREG32(RLC_GPM_SCRATCH_DATA, 0);
5703 if (rdev->rlc.reg_list) {
5704 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
5705 for (i = 0; i < rdev->rlc.reg_list_size; i++)
5706 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
5709 orig = data = RREG32(RLC_PG_CNTL);
5710 data |= GFX_PG_SRC;
5711 if (orig != data)
5712 WREG32(RLC_PG_CNTL, data);
5714 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5715 WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
5717 data = RREG32(CP_RB_WPTR_POLL_CNTL);
5718 data &= ~IDLE_POLL_COUNT_MASK;
5719 data |= IDLE_POLL_COUNT(0x60);
5720 WREG32(CP_RB_WPTR_POLL_CNTL, data);
5722 data = 0x10101010;
5723 WREG32(RLC_PG_DELAY, data);
5725 data = RREG32(RLC_PG_DELAY_2);
5726 data &= ~0xff;
5727 data |= 0x3;
5728 WREG32(RLC_PG_DELAY_2, data);
5730 data = RREG32(RLC_AUTO_PG_CTRL);
5731 data &= ~GRBM_REG_SGIT_MASK;
5732 data |= GRBM_REG_SGIT(0x700);
5733 WREG32(RLC_AUTO_PG_CTRL, data);
5737 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
5739 cik_enable_gfx_cgpg(rdev, enable);
5740 cik_enable_gfx_static_mgpg(rdev, enable);
5741 cik_enable_gfx_dynamic_mgpg(rdev, enable);
5744 u32 cik_get_csb_size(struct radeon_device *rdev)
5746 u32 count = 0;
5747 const struct cs_section_def *sect = NULL;
5748 const struct cs_extent_def *ext = NULL;
5750 if (rdev->rlc.cs_data == NULL)
5751 return 0;
5753 /* begin clear state */
5754 count += 2;
5755 /* context control state */
5756 count += 3;
5758 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5759 for (ext = sect->section; ext->extent != NULL; ++ext) {
5760 if (sect->id == SECT_CONTEXT)
5761 count += 2 + ext->reg_count;
5762 else
5763 return 0;
5766 /* pa_sc_raster_config/pa_sc_raster_config1 */
5767 count += 4;
5768 /* end clear state */
5769 count += 2;
5770 /* clear state */
5771 count += 2;
5773 return count;
5776 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5778 u32 count = 0, i;
5779 const struct cs_section_def *sect = NULL;
5780 const struct cs_extent_def *ext = NULL;
5782 if (rdev->rlc.cs_data == NULL)
5783 return;
5784 if (buffer == NULL)
5785 return;
5787 buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5788 buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE;
5790 buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1);
5791 buffer[count++] = 0x80000000;
5792 buffer[count++] = 0x80000000;
5794 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5795 for (ext = sect->section; ext->extent != NULL; ++ext) {
5796 if (sect->id == SECT_CONTEXT) {
5797 buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count);
5798 buffer[count++] = ext->reg_index - 0xa000;
5799 for (i = 0; i < ext->reg_count; i++)
5800 buffer[count++] = ext->extent[i];
5801 } else {
5802 return;
5807 buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
5808 buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START;
5809 switch (rdev->family) {
5810 case CHIP_BONAIRE:
5811 buffer[count++] = 0x16000012;
5812 buffer[count++] = 0x00000000;
5813 break;
5814 case CHIP_KAVERI:
5815 buffer[count++] = 0x00000000; /* XXX */
5816 buffer[count++] = 0x00000000;
5817 break;
5818 case CHIP_KABINI:
5819 buffer[count++] = 0x00000000; /* XXX */
5820 buffer[count++] = 0x00000000;
5821 break;
5822 default:
5823 buffer[count++] = 0x00000000;
5824 buffer[count++] = 0x00000000;
5825 break;
5828 buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5829 buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE;
5831 buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0);
5832 buffer[count++] = 0;
5835 static void cik_init_pg(struct radeon_device *rdev)
5837 if (rdev->pg_flags) {
5838 cik_enable_sck_slowdown_on_pu(rdev, true);
5839 cik_enable_sck_slowdown_on_pd(rdev, true);
5840 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5841 cik_init_gfx_cgpg(rdev);
5842 cik_enable_cp_pg(rdev, true);
5843 cik_enable_gds_pg(rdev, true);
5845 cik_init_ao_cu_mask(rdev);
5846 cik_update_gfx_pg(rdev, true);
5850 static void cik_fini_pg(struct radeon_device *rdev)
5852 if (rdev->pg_flags) {
5853 cik_update_gfx_pg(rdev, false);
5854 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5855 cik_enable_cp_pg(rdev, false);
5856 cik_enable_gds_pg(rdev, false);
5862 * Interrupts
5863 * Starting with r6xx, interrupts are handled via a ring buffer.
5864 * Ring buffers are areas of GPU accessible memory that the GPU
5865 * writes interrupt vectors into and the host reads vectors out of.
5866 * There is a rptr (read pointer) that determines where the
5867 * host is currently reading, and a wptr (write pointer)
5868 * which determines where the GPU has written. When the
5869 * pointers are equal, the ring is idle. When the GPU
5870 * writes vectors to the ring buffer, it increments the
5871 * wptr. When there is an interrupt, the host then starts
5872 * fetching commands and processing them until the pointers are
5873 * equal again at which point it updates the rptr.
5877 * cik_enable_interrupts - Enable the interrupt ring buffer
5879 * @rdev: radeon_device pointer
5881 * Enable the interrupt ring buffer (CIK).
5883 static void cik_enable_interrupts(struct radeon_device *rdev)
5885 u32 ih_cntl = RREG32(IH_CNTL);
5886 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5888 ih_cntl |= ENABLE_INTR;
5889 ih_rb_cntl |= IH_RB_ENABLE;
5890 WREG32(IH_CNTL, ih_cntl);
5891 WREG32(IH_RB_CNTL, ih_rb_cntl);
5892 rdev->ih.enabled = true;
5896 * cik_disable_interrupts - Disable the interrupt ring buffer
5898 * @rdev: radeon_device pointer
5900 * Disable the interrupt ring buffer (CIK).
5902 static void cik_disable_interrupts(struct radeon_device *rdev)
5904 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5905 u32 ih_cntl = RREG32(IH_CNTL);
5907 ih_rb_cntl &= ~IH_RB_ENABLE;
5908 ih_cntl &= ~ENABLE_INTR;
5909 WREG32(IH_RB_CNTL, ih_rb_cntl);
5910 WREG32(IH_CNTL, ih_cntl);
5911 /* set rptr, wptr to 0 */
5912 WREG32(IH_RB_RPTR, 0);
5913 WREG32(IH_RB_WPTR, 0);
5914 rdev->ih.enabled = false;
5915 rdev->ih.rptr = 0;
5919 * cik_disable_interrupt_state - Disable all interrupt sources
5921 * @rdev: radeon_device pointer
5923 * Clear all interrupt enable bits used by the driver (CIK).
5925 static void cik_disable_interrupt_state(struct radeon_device *rdev)
5927 u32 tmp;
5929 /* gfx ring */
5930 tmp = RREG32(CP_INT_CNTL_RING0) &
5931 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5932 WREG32(CP_INT_CNTL_RING0, tmp);
5933 /* sdma */
5934 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5935 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5936 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5937 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5938 /* compute queues */
5939 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
5940 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
5941 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
5942 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
5943 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
5944 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
5945 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
5946 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
5947 /* grbm */
5948 WREG32(GRBM_INT_CNTL, 0);
5949 /* vline/vblank, etc. */
5950 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5951 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5952 if (rdev->num_crtc >= 4) {
5953 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5954 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5956 if (rdev->num_crtc >= 6) {
5957 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5958 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5960 /* pflip */
5961 if (rdev->num_crtc >= 2) {
5962 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5963 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5965 if (rdev->num_crtc >= 4) {
5966 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5967 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5969 if (rdev->num_crtc >= 6) {
5970 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5971 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5974 /* dac hotplug */
5975 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5977 /* digital hotplug */
5978 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5979 WREG32(DC_HPD1_INT_CONTROL, tmp);
5980 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5981 WREG32(DC_HPD2_INT_CONTROL, tmp);
5982 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5983 WREG32(DC_HPD3_INT_CONTROL, tmp);
5984 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5985 WREG32(DC_HPD4_INT_CONTROL, tmp);
5986 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5987 WREG32(DC_HPD5_INT_CONTROL, tmp);
5988 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5989 WREG32(DC_HPD6_INT_CONTROL, tmp);
5994 * cik_irq_init - init and enable the interrupt ring
5996 * @rdev: radeon_device pointer
5998 * Allocate a ring buffer for the interrupt controller,
5999 * enable the RLC, disable interrupts, enable the IH
6000 * ring buffer and enable it (CIK).
6001 * Called at device load and reume.
6002 * Returns 0 for success, errors for failure.
6004 static int cik_irq_init(struct radeon_device *rdev)
6006 int ret = 0;
6007 int rb_bufsz;
6008 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6010 /* allocate ring */
6011 ret = r600_ih_ring_alloc(rdev);
6012 if (ret)
6013 return ret;
6015 /* disable irqs */
6016 cik_disable_interrupts(rdev);
6018 /* init rlc */
6019 ret = cik_rlc_resume(rdev);
6020 if (ret) {
6021 r600_ih_ring_fini(rdev);
6022 return ret;
6025 /* setup interrupt control */
6026 /* XXX this should actually be a bus address, not an MC address. same on older asics */
6027 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6028 interrupt_cntl = RREG32(INTERRUPT_CNTL);
6029 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6030 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6032 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6033 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6034 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6035 WREG32(INTERRUPT_CNTL, interrupt_cntl);
6037 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6038 rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6040 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6041 IH_WPTR_OVERFLOW_CLEAR |
6042 (rb_bufsz << 1));
6044 if (rdev->wb.enabled)
6045 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6047 /* set the writeback address whether it's enabled or not */
6048 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6049 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6051 WREG32(IH_RB_CNTL, ih_rb_cntl);
6053 /* set rptr, wptr to 0 */
6054 WREG32(IH_RB_RPTR, 0);
6055 WREG32(IH_RB_WPTR, 0);
6057 /* Default settings for IH_CNTL (disabled at first) */
6058 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6059 /* RPTR_REARM only works if msi's are enabled */
6060 if (rdev->msi_enabled)
6061 ih_cntl |= RPTR_REARM;
6062 WREG32(IH_CNTL, ih_cntl);
6064 /* force the active interrupt state to all disabled */
6065 cik_disable_interrupt_state(rdev);
6067 pci_set_master(rdev->pdev);
6069 /* enable irqs */
6070 cik_enable_interrupts(rdev);
6072 return ret;
6076 * cik_irq_set - enable/disable interrupt sources
6078 * @rdev: radeon_device pointer
6080 * Enable interrupt sources on the GPU (vblanks, hpd,
6081 * etc.) (CIK).
6082 * Returns 0 for success, errors for failure.
6084 int cik_irq_set(struct radeon_device *rdev)
6086 u32 cp_int_cntl;
6087 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6088 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6089 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6090 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6091 u32 grbm_int_cntl = 0;
6092 u32 dma_cntl, dma_cntl1;
6093 u32 thermal_int;
6095 if (!rdev->irq.installed) {
6096 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6097 return -EINVAL;
6099 /* don't enable anything if the ih is disabled */
6100 if (!rdev->ih.enabled) {
6101 cik_disable_interrupts(rdev);
6102 /* force the active interrupt state to all disabled */
6103 cik_disable_interrupt_state(rdev);
6104 return 0;
6107 cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6108 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6109 cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6111 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6112 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6113 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6114 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6115 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6116 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6118 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6119 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6121 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6122 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6123 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6124 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6125 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6126 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6127 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6128 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6130 if (rdev->flags & RADEON_IS_IGP)
6131 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6132 ~(THERM_INTH_MASK | THERM_INTL_MASK);
6133 else
6134 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6135 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6137 /* enable CP interrupts on all rings */
6138 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6139 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6140 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6142 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6143 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6144 DRM_DEBUG("si_irq_set: sw int cp1\n");
6145 if (ring->me == 1) {
6146 switch (ring->pipe) {
6147 case 0:
6148 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6149 break;
6150 case 1:
6151 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6152 break;
6153 case 2:
6154 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6155 break;
6156 case 3:
6157 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6158 break;
6159 default:
6160 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6161 break;
6163 } else if (ring->me == 2) {
6164 switch (ring->pipe) {
6165 case 0:
6166 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6167 break;
6168 case 1:
6169 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6170 break;
6171 case 2:
6172 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6173 break;
6174 case 3:
6175 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6176 break;
6177 default:
6178 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6179 break;
6181 } else {
6182 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6185 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6186 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6187 DRM_DEBUG("si_irq_set: sw int cp2\n");
6188 if (ring->me == 1) {
6189 switch (ring->pipe) {
6190 case 0:
6191 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6192 break;
6193 case 1:
6194 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6195 break;
6196 case 2:
6197 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6198 break;
6199 case 3:
6200 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6201 break;
6202 default:
6203 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6204 break;
6206 } else if (ring->me == 2) {
6207 switch (ring->pipe) {
6208 case 0:
6209 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6210 break;
6211 case 1:
6212 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6213 break;
6214 case 2:
6215 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6216 break;
6217 case 3:
6218 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6219 break;
6220 default:
6221 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6222 break;
6224 } else {
6225 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6229 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6230 DRM_DEBUG("cik_irq_set: sw int dma\n");
6231 dma_cntl |= TRAP_ENABLE;
6234 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6235 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6236 dma_cntl1 |= TRAP_ENABLE;
6239 if (rdev->irq.crtc_vblank_int[0] ||
6240 atomic_read(&rdev->irq.pflip[0])) {
6241 DRM_DEBUG("cik_irq_set: vblank 0\n");
6242 crtc1 |= VBLANK_INTERRUPT_MASK;
6244 if (rdev->irq.crtc_vblank_int[1] ||
6245 atomic_read(&rdev->irq.pflip[1])) {
6246 DRM_DEBUG("cik_irq_set: vblank 1\n");
6247 crtc2 |= VBLANK_INTERRUPT_MASK;
6249 if (rdev->irq.crtc_vblank_int[2] ||
6250 atomic_read(&rdev->irq.pflip[2])) {
6251 DRM_DEBUG("cik_irq_set: vblank 2\n");
6252 crtc3 |= VBLANK_INTERRUPT_MASK;
6254 if (rdev->irq.crtc_vblank_int[3] ||
6255 atomic_read(&rdev->irq.pflip[3])) {
6256 DRM_DEBUG("cik_irq_set: vblank 3\n");
6257 crtc4 |= VBLANK_INTERRUPT_MASK;
6259 if (rdev->irq.crtc_vblank_int[4] ||
6260 atomic_read(&rdev->irq.pflip[4])) {
6261 DRM_DEBUG("cik_irq_set: vblank 4\n");
6262 crtc5 |= VBLANK_INTERRUPT_MASK;
6264 if (rdev->irq.crtc_vblank_int[5] ||
6265 atomic_read(&rdev->irq.pflip[5])) {
6266 DRM_DEBUG("cik_irq_set: vblank 5\n");
6267 crtc6 |= VBLANK_INTERRUPT_MASK;
6269 if (rdev->irq.hpd[0]) {
6270 DRM_DEBUG("cik_irq_set: hpd 1\n");
6271 hpd1 |= DC_HPDx_INT_EN;
6273 if (rdev->irq.hpd[1]) {
6274 DRM_DEBUG("cik_irq_set: hpd 2\n");
6275 hpd2 |= DC_HPDx_INT_EN;
6277 if (rdev->irq.hpd[2]) {
6278 DRM_DEBUG("cik_irq_set: hpd 3\n");
6279 hpd3 |= DC_HPDx_INT_EN;
6281 if (rdev->irq.hpd[3]) {
6282 DRM_DEBUG("cik_irq_set: hpd 4\n");
6283 hpd4 |= DC_HPDx_INT_EN;
6285 if (rdev->irq.hpd[4]) {
6286 DRM_DEBUG("cik_irq_set: hpd 5\n");
6287 hpd5 |= DC_HPDx_INT_EN;
6289 if (rdev->irq.hpd[5]) {
6290 DRM_DEBUG("cik_irq_set: hpd 6\n");
6291 hpd6 |= DC_HPDx_INT_EN;
6294 if (rdev->irq.dpm_thermal) {
6295 DRM_DEBUG("dpm thermal\n");
6296 if (rdev->flags & RADEON_IS_IGP)
6297 thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6298 else
6299 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6302 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6304 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6305 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6307 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6308 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6309 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6310 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6311 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6312 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6313 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6314 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6316 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6318 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6319 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6320 if (rdev->num_crtc >= 4) {
6321 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6322 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6324 if (rdev->num_crtc >= 6) {
6325 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6326 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6329 if (rdev->num_crtc >= 2) {
6330 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
6331 GRPH_PFLIP_INT_MASK);
6332 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
6333 GRPH_PFLIP_INT_MASK);
6335 if (rdev->num_crtc >= 4) {
6336 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
6337 GRPH_PFLIP_INT_MASK);
6338 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
6339 GRPH_PFLIP_INT_MASK);
6341 if (rdev->num_crtc >= 6) {
6342 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
6343 GRPH_PFLIP_INT_MASK);
6344 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
6345 GRPH_PFLIP_INT_MASK);
6348 WREG32(DC_HPD1_INT_CONTROL, hpd1);
6349 WREG32(DC_HPD2_INT_CONTROL, hpd2);
6350 WREG32(DC_HPD3_INT_CONTROL, hpd3);
6351 WREG32(DC_HPD4_INT_CONTROL, hpd4);
6352 WREG32(DC_HPD5_INT_CONTROL, hpd5);
6353 WREG32(DC_HPD6_INT_CONTROL, hpd6);
6355 if (rdev->flags & RADEON_IS_IGP)
6356 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
6357 else
6358 WREG32_SMC(CG_THERMAL_INT, thermal_int);
6360 return 0;
6364 * cik_irq_ack - ack interrupt sources
6366 * @rdev: radeon_device pointer
6368 * Ack interrupt sources on the GPU (vblanks, hpd,
6369 * etc.) (CIK). Certain interrupts sources are sw
6370 * generated and do not require an explicit ack.
6372 static inline void cik_irq_ack(struct radeon_device *rdev)
6374 u32 tmp;
6376 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6377 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6378 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6379 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6380 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6381 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6382 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6384 rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
6385 EVERGREEN_CRTC0_REGISTER_OFFSET);
6386 rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
6387 EVERGREEN_CRTC1_REGISTER_OFFSET);
6388 if (rdev->num_crtc >= 4) {
6389 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
6390 EVERGREEN_CRTC2_REGISTER_OFFSET);
6391 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
6392 EVERGREEN_CRTC3_REGISTER_OFFSET);
6394 if (rdev->num_crtc >= 6) {
6395 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
6396 EVERGREEN_CRTC4_REGISTER_OFFSET);
6397 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
6398 EVERGREEN_CRTC5_REGISTER_OFFSET);
6401 if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
6402 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
6403 GRPH_PFLIP_INT_CLEAR);
6404 if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
6405 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
6406 GRPH_PFLIP_INT_CLEAR);
6407 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6408 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6409 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6410 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6411 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6412 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6413 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6414 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6416 if (rdev->num_crtc >= 4) {
6417 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
6418 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
6419 GRPH_PFLIP_INT_CLEAR);
6420 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
6421 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
6422 GRPH_PFLIP_INT_CLEAR);
6423 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6424 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6425 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6426 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6427 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6428 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6429 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6430 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6433 if (rdev->num_crtc >= 6) {
6434 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
6435 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
6436 GRPH_PFLIP_INT_CLEAR);
6437 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
6438 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
6439 GRPH_PFLIP_INT_CLEAR);
6440 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6441 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6442 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6443 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6444 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6445 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6446 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6447 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6450 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6451 tmp = RREG32(DC_HPD1_INT_CONTROL);
6452 tmp |= DC_HPDx_INT_ACK;
6453 WREG32(DC_HPD1_INT_CONTROL, tmp);
6455 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6456 tmp = RREG32(DC_HPD2_INT_CONTROL);
6457 tmp |= DC_HPDx_INT_ACK;
6458 WREG32(DC_HPD2_INT_CONTROL, tmp);
6460 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6461 tmp = RREG32(DC_HPD3_INT_CONTROL);
6462 tmp |= DC_HPDx_INT_ACK;
6463 WREG32(DC_HPD3_INT_CONTROL, tmp);
6465 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6466 tmp = RREG32(DC_HPD4_INT_CONTROL);
6467 tmp |= DC_HPDx_INT_ACK;
6468 WREG32(DC_HPD4_INT_CONTROL, tmp);
6470 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6471 tmp = RREG32(DC_HPD5_INT_CONTROL);
6472 tmp |= DC_HPDx_INT_ACK;
6473 WREG32(DC_HPD5_INT_CONTROL, tmp);
6475 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6476 tmp = RREG32(DC_HPD5_INT_CONTROL);
6477 tmp |= DC_HPDx_INT_ACK;
6478 WREG32(DC_HPD6_INT_CONTROL, tmp);
6483 * cik_irq_disable - disable interrupts
6485 * @rdev: radeon_device pointer
6487 * Disable interrupts on the hw (CIK).
6489 static void cik_irq_disable(struct radeon_device *rdev)
6491 cik_disable_interrupts(rdev);
6492 /* Wait and acknowledge irq */
6493 mdelay(1);
6494 cik_irq_ack(rdev);
6495 cik_disable_interrupt_state(rdev);
6499 * cik_irq_disable - disable interrupts for suspend
6501 * @rdev: radeon_device pointer
6503 * Disable interrupts and stop the RLC (CIK).
6504 * Used for suspend.
6506 static void cik_irq_suspend(struct radeon_device *rdev)
6508 cik_irq_disable(rdev);
6509 cik_rlc_stop(rdev);
6513 * cik_irq_fini - tear down interrupt support
6515 * @rdev: radeon_device pointer
6517 * Disable interrupts on the hw and free the IH ring
6518 * buffer (CIK).
6519 * Used for driver unload.
6521 static void cik_irq_fini(struct radeon_device *rdev)
6523 cik_irq_suspend(rdev);
6524 r600_ih_ring_fini(rdev);
6528 * cik_get_ih_wptr - get the IH ring buffer wptr
6530 * @rdev: radeon_device pointer
6532 * Get the IH ring buffer wptr from either the register
6533 * or the writeback memory buffer (CIK). Also check for
6534 * ring buffer overflow and deal with it.
6535 * Used by cik_irq_process().
6536 * Returns the value of the wptr.
6538 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
6540 u32 wptr, tmp;
6542 if (rdev->wb.enabled)
6543 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6544 else
6545 wptr = RREG32(IH_RB_WPTR);
6547 if (wptr & RB_OVERFLOW) {
6548 /* When a ring buffer overflow happen start parsing interrupt
6549 * from the last not overwritten vector (wptr + 16). Hopefully
6550 * this should allow us to catchup.
6552 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6553 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6554 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6555 tmp = RREG32(IH_RB_CNTL);
6556 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6557 WREG32(IH_RB_CNTL, tmp);
6558 wptr &= ~RB_OVERFLOW;
6560 return (wptr & rdev->ih.ptr_mask);
6563 /* CIK IV Ring
6564 * Each IV ring entry is 128 bits:
6565 * [7:0] - interrupt source id
6566 * [31:8] - reserved
6567 * [59:32] - interrupt source data
6568 * [63:60] - reserved
6569 * [71:64] - RINGID
6570 * CP:
6571 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
6572 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
6573 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
6574 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
6575 * PIPE_ID - ME0 0=3D
6576 * - ME1&2 compute dispatcher (4 pipes each)
6577 * SDMA:
6578 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
6579 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
6580 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
6581 * [79:72] - VMID
6582 * [95:80] - PASID
6583 * [127:96] - reserved
6586 * cik_irq_process - interrupt handler
6588 * @rdev: radeon_device pointer
6590 * Interrupt hander (CIK). Walk the IH ring,
6591 * ack interrupts and schedule work to handle
6592 * interrupt events.
6593 * Returns irq process return code.
6595 int cik_irq_process(struct radeon_device *rdev)
6597 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6598 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6599 u32 wptr;
6600 u32 rptr;
6601 u32 src_id, src_data, ring_id;
6602 u8 me_id, pipe_id, queue_id;
6603 u32 ring_index;
6604 bool queue_hotplug = false;
6605 bool queue_reset = false;
6606 u32 addr, status, mc_client;
6607 bool queue_thermal = false;
6609 if (!rdev->ih.enabled || rdev->shutdown)
6610 return IRQ_NONE;
6612 wptr = cik_get_ih_wptr(rdev);
6614 restart_ih:
6615 /* is somebody else already processing irqs? */
6616 if (atomic_xchg(&rdev->ih.lock, 1))
6617 return IRQ_NONE;
6619 rptr = rdev->ih.rptr;
6620 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6622 /* Order reading of wptr vs. reading of IH ring data */
6623 rmb();
6625 /* display interrupts */
6626 cik_irq_ack(rdev);
6628 while (rptr != wptr) {
6629 /* wptr/rptr are in bytes! */
6630 ring_index = rptr / 4;
6631 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6632 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6633 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6635 switch (src_id) {
6636 case 1: /* D1 vblank/vline */
6637 switch (src_data) {
6638 case 0: /* D1 vblank */
6639 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
6640 if (rdev->irq.crtc_vblank_int[0]) {
6641 drm_handle_vblank(rdev->ddev, 0);
6642 rdev->pm.vblank_sync = true;
6643 wake_up(&rdev->irq.vblank_queue);
6645 if (atomic_read(&rdev->irq.pflip[0]))
6646 radeon_crtc_handle_flip(rdev, 0);
6647 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6648 DRM_DEBUG("IH: D1 vblank\n");
6650 break;
6651 case 1: /* D1 vline */
6652 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
6653 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6654 DRM_DEBUG("IH: D1 vline\n");
6656 break;
6657 default:
6658 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6659 break;
6661 break;
6662 case 2: /* D2 vblank/vline */
6663 switch (src_data) {
6664 case 0: /* D2 vblank */
6665 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6666 if (rdev->irq.crtc_vblank_int[1]) {
6667 drm_handle_vblank(rdev->ddev, 1);
6668 rdev->pm.vblank_sync = true;
6669 wake_up(&rdev->irq.vblank_queue);
6671 if (atomic_read(&rdev->irq.pflip[1]))
6672 radeon_crtc_handle_flip(rdev, 1);
6673 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6674 DRM_DEBUG("IH: D2 vblank\n");
6676 break;
6677 case 1: /* D2 vline */
6678 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6679 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6680 DRM_DEBUG("IH: D2 vline\n");
6682 break;
6683 default:
6684 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6685 break;
6687 break;
6688 case 3: /* D3 vblank/vline */
6689 switch (src_data) {
6690 case 0: /* D3 vblank */
6691 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6692 if (rdev->irq.crtc_vblank_int[2]) {
6693 drm_handle_vblank(rdev->ddev, 2);
6694 rdev->pm.vblank_sync = true;
6695 wake_up(&rdev->irq.vblank_queue);
6697 if (atomic_read(&rdev->irq.pflip[2]))
6698 radeon_crtc_handle_flip(rdev, 2);
6699 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6700 DRM_DEBUG("IH: D3 vblank\n");
6702 break;
6703 case 1: /* D3 vline */
6704 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6705 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6706 DRM_DEBUG("IH: D3 vline\n");
6708 break;
6709 default:
6710 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6711 break;
6713 break;
6714 case 4: /* D4 vblank/vline */
6715 switch (src_data) {
6716 case 0: /* D4 vblank */
6717 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6718 if (rdev->irq.crtc_vblank_int[3]) {
6719 drm_handle_vblank(rdev->ddev, 3);
6720 rdev->pm.vblank_sync = true;
6721 wake_up(&rdev->irq.vblank_queue);
6723 if (atomic_read(&rdev->irq.pflip[3]))
6724 radeon_crtc_handle_flip(rdev, 3);
6725 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6726 DRM_DEBUG("IH: D4 vblank\n");
6728 break;
6729 case 1: /* D4 vline */
6730 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6731 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6732 DRM_DEBUG("IH: D4 vline\n");
6734 break;
6735 default:
6736 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6737 break;
6739 break;
6740 case 5: /* D5 vblank/vline */
6741 switch (src_data) {
6742 case 0: /* D5 vblank */
6743 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6744 if (rdev->irq.crtc_vblank_int[4]) {
6745 drm_handle_vblank(rdev->ddev, 4);
6746 rdev->pm.vblank_sync = true;
6747 wake_up(&rdev->irq.vblank_queue);
6749 if (atomic_read(&rdev->irq.pflip[4]))
6750 radeon_crtc_handle_flip(rdev, 4);
6751 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6752 DRM_DEBUG("IH: D5 vblank\n");
6754 break;
6755 case 1: /* D5 vline */
6756 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6757 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6758 DRM_DEBUG("IH: D5 vline\n");
6760 break;
6761 default:
6762 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6763 break;
6765 break;
6766 case 6: /* D6 vblank/vline */
6767 switch (src_data) {
6768 case 0: /* D6 vblank */
6769 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6770 if (rdev->irq.crtc_vblank_int[5]) {
6771 drm_handle_vblank(rdev->ddev, 5);
6772 rdev->pm.vblank_sync = true;
6773 wake_up(&rdev->irq.vblank_queue);
6775 if (atomic_read(&rdev->irq.pflip[5]))
6776 radeon_crtc_handle_flip(rdev, 5);
6777 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6778 DRM_DEBUG("IH: D6 vblank\n");
6780 break;
6781 case 1: /* D6 vline */
6782 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6783 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6784 DRM_DEBUG("IH: D6 vline\n");
6786 break;
6787 default:
6788 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6789 break;
6791 break;
6792 case 8: /* D1 page flip */
6793 case 10: /* D2 page flip */
6794 case 12: /* D3 page flip */
6795 case 14: /* D4 page flip */
6796 case 16: /* D5 page flip */
6797 case 18: /* D6 page flip */
6798 DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6799 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6800 break;
6801 case 42: /* HPD hotplug */
6802 switch (src_data) {
6803 case 0:
6804 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6805 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
6806 queue_hotplug = true;
6807 DRM_DEBUG("IH: HPD1\n");
6809 break;
6810 case 1:
6811 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6812 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6813 queue_hotplug = true;
6814 DRM_DEBUG("IH: HPD2\n");
6816 break;
6817 case 2:
6818 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6819 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6820 queue_hotplug = true;
6821 DRM_DEBUG("IH: HPD3\n");
6823 break;
6824 case 3:
6825 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6826 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6827 queue_hotplug = true;
6828 DRM_DEBUG("IH: HPD4\n");
6830 break;
6831 case 4:
6832 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6833 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6834 queue_hotplug = true;
6835 DRM_DEBUG("IH: HPD5\n");
6837 break;
6838 case 5:
6839 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6840 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6841 queue_hotplug = true;
6842 DRM_DEBUG("IH: HPD6\n");
6844 break;
6845 default:
6846 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6847 break;
6849 break;
6850 case 124: /* UVD */
6851 DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6852 radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6853 break;
6854 case 146:
6855 case 147:
6856 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6857 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6858 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
6859 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6860 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
6861 addr);
6862 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6863 status);
6864 cik_vm_decode_fault(rdev, status, addr, mc_client);
6865 /* reset addr and status */
6866 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6867 break;
6868 case 176: /* GFX RB CP_INT */
6869 case 177: /* GFX IB CP_INT */
6870 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6871 break;
6872 case 181: /* CP EOP event */
6873 DRM_DEBUG("IH: CP EOP\n");
6874 /* XXX check the bitfield order! */
6875 me_id = (ring_id & 0x60) >> 5;
6876 pipe_id = (ring_id & 0x18) >> 3;
6877 queue_id = (ring_id & 0x7) >> 0;
6878 switch (me_id) {
6879 case 0:
6880 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6881 break;
6882 case 1:
6883 case 2:
6884 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
6885 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6886 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
6887 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6888 break;
6890 break;
6891 case 184: /* CP Privileged reg access */
6892 DRM_ERROR("Illegal register access in command stream\n");
6893 /* XXX check the bitfield order! */
6894 me_id = (ring_id & 0x60) >> 5;
6895 pipe_id = (ring_id & 0x18) >> 3;
6896 queue_id = (ring_id & 0x7) >> 0;
6897 switch (me_id) {
6898 case 0:
6899 /* This results in a full GPU reset, but all we need to do is soft
6900 * reset the CP for gfx
6902 queue_reset = true;
6903 break;
6904 case 1:
6905 /* XXX compute */
6906 queue_reset = true;
6907 break;
6908 case 2:
6909 /* XXX compute */
6910 queue_reset = true;
6911 break;
6913 break;
6914 case 185: /* CP Privileged inst */
6915 DRM_ERROR("Illegal instruction in command stream\n");
6916 /* XXX check the bitfield order! */
6917 me_id = (ring_id & 0x60) >> 5;
6918 pipe_id = (ring_id & 0x18) >> 3;
6919 queue_id = (ring_id & 0x7) >> 0;
6920 switch (me_id) {
6921 case 0:
6922 /* This results in a full GPU reset, but all we need to do is soft
6923 * reset the CP for gfx
6925 queue_reset = true;
6926 break;
6927 case 1:
6928 /* XXX compute */
6929 queue_reset = true;
6930 break;
6931 case 2:
6932 /* XXX compute */
6933 queue_reset = true;
6934 break;
6936 break;
6937 case 224: /* SDMA trap event */
6938 /* XXX check the bitfield order! */
6939 me_id = (ring_id & 0x3) >> 0;
6940 queue_id = (ring_id & 0xc) >> 2;
6941 DRM_DEBUG("IH: SDMA trap\n");
6942 switch (me_id) {
6943 case 0:
6944 switch (queue_id) {
6945 case 0:
6946 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6947 break;
6948 case 1:
6949 /* XXX compute */
6950 break;
6951 case 2:
6952 /* XXX compute */
6953 break;
6955 break;
6956 case 1:
6957 switch (queue_id) {
6958 case 0:
6959 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6960 break;
6961 case 1:
6962 /* XXX compute */
6963 break;
6964 case 2:
6965 /* XXX compute */
6966 break;
6968 break;
6970 break;
6971 case 230: /* thermal low to high */
6972 DRM_DEBUG("IH: thermal low to high\n");
6973 rdev->pm.dpm.thermal.high_to_low = false;
6974 queue_thermal = true;
6975 break;
6976 case 231: /* thermal high to low */
6977 DRM_DEBUG("IH: thermal high to low\n");
6978 rdev->pm.dpm.thermal.high_to_low = true;
6979 queue_thermal = true;
6980 break;
6981 case 233: /* GUI IDLE */
6982 DRM_DEBUG("IH: GUI idle\n");
6983 break;
6984 case 241: /* SDMA Privileged inst */
6985 case 247: /* SDMA Privileged inst */
6986 DRM_ERROR("Illegal instruction in SDMA command stream\n");
6987 /* XXX check the bitfield order! */
6988 me_id = (ring_id & 0x3) >> 0;
6989 queue_id = (ring_id & 0xc) >> 2;
6990 switch (me_id) {
6991 case 0:
6992 switch (queue_id) {
6993 case 0:
6994 queue_reset = true;
6995 break;
6996 case 1:
6997 /* XXX compute */
6998 queue_reset = true;
6999 break;
7000 case 2:
7001 /* XXX compute */
7002 queue_reset = true;
7003 break;
7005 break;
7006 case 1:
7007 switch (queue_id) {
7008 case 0:
7009 queue_reset = true;
7010 break;
7011 case 1:
7012 /* XXX compute */
7013 queue_reset = true;
7014 break;
7015 case 2:
7016 /* XXX compute */
7017 queue_reset = true;
7018 break;
7020 break;
7022 break;
7023 default:
7024 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7025 break;
7028 /* wptr/rptr are in bytes! */
7029 rptr += 16;
7030 rptr &= rdev->ih.ptr_mask;
7032 if (queue_hotplug)
7033 schedule_work(&rdev->hotplug_work);
7034 if (queue_reset)
7035 schedule_work(&rdev->reset_work);
7036 if (queue_thermal)
7037 schedule_work(&rdev->pm.dpm.thermal.work);
7038 rdev->ih.rptr = rptr;
7039 WREG32(IH_RB_RPTR, rdev->ih.rptr);
7040 atomic_set(&rdev->ih.lock, 0);
7042 /* make sure wptr hasn't changed while processing */
7043 wptr = cik_get_ih_wptr(rdev);
7044 if (wptr != rptr)
7045 goto restart_ih;
7047 return IRQ_HANDLED;
7051 * startup/shutdown callbacks
7054 * cik_startup - program the asic to a functional state
7056 * @rdev: radeon_device pointer
7058 * Programs the asic to a functional state (CIK).
7059 * Called by cik_init() and cik_resume().
7060 * Returns 0 for success, error for failure.
7062 static int cik_startup(struct radeon_device *rdev)
7064 struct radeon_ring *ring;
7065 int r;
7067 /* enable pcie gen2/3 link */
7068 cik_pcie_gen3_enable(rdev);
7069 /* enable aspm */
7070 cik_program_aspm(rdev);
7072 /* scratch needs to be initialized before MC */
7073 r = r600_vram_scratch_init(rdev);
7074 if (r)
7075 return r;
7077 cik_mc_program(rdev);
7079 if (!(rdev->flags & RADEON_IS_IGP)) {
7080 r = ci_mc_load_microcode(rdev);
7081 if (r) {
7082 DRM_ERROR("Failed to load MC firmware!\n");
7083 return r;
7087 r = cik_pcie_gart_enable(rdev);
7088 if (r)
7089 return r;
7090 cik_gpu_init(rdev);
7092 /* allocate rlc buffers */
7093 if (rdev->flags & RADEON_IS_IGP) {
7094 if (rdev->family == CHIP_KAVERI) {
7095 rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7096 rdev->rlc.reg_list_size =
7097 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7098 } else {
7099 rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7100 rdev->rlc.reg_list_size =
7101 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7104 rdev->rlc.cs_data = ci_cs_data;
7105 rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7106 r = sumo_rlc_init(rdev);
7107 if (r) {
7108 DRM_ERROR("Failed to init rlc BOs!\n");
7109 return r;
7112 /* allocate wb buffer */
7113 r = radeon_wb_init(rdev);
7114 if (r)
7115 return r;
7117 /* allocate mec buffers */
7118 r = cik_mec_init(rdev);
7119 if (r) {
7120 DRM_ERROR("Failed to init MEC BOs!\n");
7121 return r;
7124 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7125 if (r) {
7126 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7127 return r;
7130 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7131 if (r) {
7132 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7133 return r;
7136 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7137 if (r) {
7138 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7139 return r;
7142 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7143 if (r) {
7144 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7145 return r;
7148 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7149 if (r) {
7150 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7151 return r;
7154 r = radeon_uvd_resume(rdev);
7155 if (!r) {
7156 r = uvd_v4_2_resume(rdev);
7157 if (!r) {
7158 r = radeon_fence_driver_start_ring(rdev,
7159 R600_RING_TYPE_UVD_INDEX);
7160 if (r)
7161 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7164 if (r)
7165 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7167 /* Enable IRQ */
7168 if (!rdev->irq.installed) {
7169 r = radeon_irq_kms_init(rdev);
7170 if (r)
7171 return r;
7174 r = cik_irq_init(rdev);
7175 if (r) {
7176 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7177 radeon_irq_kms_fini(rdev);
7178 return r;
7180 cik_irq_set(rdev);
7182 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7183 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7184 CP_RB0_RPTR, CP_RB0_WPTR,
7185 RADEON_CP_PACKET2);
7186 if (r)
7187 return r;
7189 /* set up the compute queues */
7190 /* type-2 packets are deprecated on MEC, use type-3 instead */
7191 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7192 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7193 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7194 PACKET3(PACKET3_NOP, 0x3FFF));
7195 if (r)
7196 return r;
7197 ring->me = 1; /* first MEC */
7198 ring->pipe = 0; /* first pipe */
7199 ring->queue = 0; /* first queue */
7200 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7202 /* type-2 packets are deprecated on MEC, use type-3 instead */
7203 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7204 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7205 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7206 PACKET3(PACKET3_NOP, 0x3FFF));
7207 if (r)
7208 return r;
7209 /* dGPU only have 1 MEC */
7210 ring->me = 1; /* first MEC */
7211 ring->pipe = 0; /* first pipe */
7212 ring->queue = 1; /* second queue */
7213 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7215 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7216 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7217 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
7218 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
7219 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7220 if (r)
7221 return r;
7223 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7224 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7225 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
7226 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
7227 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7228 if (r)
7229 return r;
7231 r = cik_cp_resume(rdev);
7232 if (r)
7233 return r;
7235 r = cik_sdma_resume(rdev);
7236 if (r)
7237 return r;
7239 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7240 if (ring->ring_size) {
7241 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7242 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
7243 RADEON_CP_PACKET2);
7244 if (!r)
7245 r = uvd_v1_0_init(rdev);
7246 if (r)
7247 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7250 r = radeon_ib_pool_init(rdev);
7251 if (r) {
7252 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7253 return r;
7256 r = radeon_vm_manager_init(rdev);
7257 if (r) {
7258 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7259 return r;
7262 r = dce6_audio_init(rdev);
7263 if (r)
7264 return r;
7266 return 0;
7270 * cik_resume - resume the asic to a functional state
7272 * @rdev: radeon_device pointer
7274 * Programs the asic to a functional state (CIK).
7275 * Called at resume.
7276 * Returns 0 for success, error for failure.
7278 int cik_resume(struct radeon_device *rdev)
7280 int r;
7282 /* post card */
7283 atom_asic_init(rdev->mode_info.atom_context);
7285 /* init golden registers */
7286 cik_init_golden_registers(rdev);
7288 rdev->accel_working = true;
7289 r = cik_startup(rdev);
7290 if (r) {
7291 DRM_ERROR("cik startup failed on resume\n");
7292 rdev->accel_working = false;
7293 return r;
7296 return r;
7301 * cik_suspend - suspend the asic
7303 * @rdev: radeon_device pointer
7305 * Bring the chip into a state suitable for suspend (CIK).
7306 * Called at suspend.
7307 * Returns 0 for success.
7309 int cik_suspend(struct radeon_device *rdev)
7311 dce6_audio_fini(rdev);
7312 radeon_vm_manager_fini(rdev);
7313 cik_cp_enable(rdev, false);
7314 cik_sdma_enable(rdev, false);
7315 uvd_v1_0_fini(rdev);
7316 radeon_uvd_suspend(rdev);
7317 cik_fini_pg(rdev);
7318 cik_fini_cg(rdev);
7319 cik_irq_suspend(rdev);
7320 radeon_wb_disable(rdev);
7321 cik_pcie_gart_disable(rdev);
7322 return 0;
7325 /* Plan is to move initialization in that function and use
7326 * helper function so that radeon_device_init pretty much
7327 * do nothing more than calling asic specific function. This
7328 * should also allow to remove a bunch of callback function
7329 * like vram_info.
7332 * cik_init - asic specific driver and hw init
7334 * @rdev: radeon_device pointer
7336 * Setup asic specific driver variables and program the hw
7337 * to a functional state (CIK).
7338 * Called at driver startup.
7339 * Returns 0 for success, errors for failure.
7341 int cik_init(struct radeon_device *rdev)
7343 struct radeon_ring *ring;
7344 int r;
7346 /* Read BIOS */
7347 if (!radeon_get_bios(rdev)) {
7348 if (ASIC_IS_AVIVO(rdev))
7349 return -EINVAL;
7351 /* Must be an ATOMBIOS */
7352 if (!rdev->is_atom_bios) {
7353 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7354 return -EINVAL;
7356 r = radeon_atombios_init(rdev);
7357 if (r)
7358 return r;
7360 /* Post card if necessary */
7361 if (!radeon_card_posted(rdev)) {
7362 if (!rdev->bios) {
7363 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7364 return -EINVAL;
7366 DRM_INFO("GPU not posted. posting now...\n");
7367 atom_asic_init(rdev->mode_info.atom_context);
7369 /* init golden registers */
7370 cik_init_golden_registers(rdev);
7371 /* Initialize scratch registers */
7372 cik_scratch_init(rdev);
7373 /* Initialize surface registers */
7374 radeon_surface_init(rdev);
7375 /* Initialize clocks */
7376 radeon_get_clock_info(rdev->ddev);
7378 /* Fence driver */
7379 r = radeon_fence_driver_init(rdev);
7380 if (r)
7381 return r;
7383 /* initialize memory controller */
7384 r = cik_mc_init(rdev);
7385 if (r)
7386 return r;
7387 /* Memory manager */
7388 r = radeon_bo_init(rdev);
7389 if (r)
7390 return r;
7392 if (rdev->flags & RADEON_IS_IGP) {
7393 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7394 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
7395 r = cik_init_microcode(rdev);
7396 if (r) {
7397 DRM_ERROR("Failed to load firmware!\n");
7398 return r;
7401 } else {
7402 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7403 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
7404 !rdev->mc_fw) {
7405 r = cik_init_microcode(rdev);
7406 if (r) {
7407 DRM_ERROR("Failed to load firmware!\n");
7408 return r;
7413 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7414 ring->ring_obj = NULL;
7415 r600_ring_init(rdev, ring, 1024 * 1024);
7417 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7418 ring->ring_obj = NULL;
7419 r600_ring_init(rdev, ring, 1024 * 1024);
7420 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7421 if (r)
7422 return r;
7424 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7425 ring->ring_obj = NULL;
7426 r600_ring_init(rdev, ring, 1024 * 1024);
7427 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7428 if (r)
7429 return r;
7431 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7432 ring->ring_obj = NULL;
7433 r600_ring_init(rdev, ring, 256 * 1024);
7435 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7436 ring->ring_obj = NULL;
7437 r600_ring_init(rdev, ring, 256 * 1024);
7439 r = radeon_uvd_init(rdev);
7440 if (!r) {
7441 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7442 ring->ring_obj = NULL;
7443 r600_ring_init(rdev, ring, 4096);
7446 rdev->ih.ring_obj = NULL;
7447 r600_ih_ring_init(rdev, 64 * 1024);
7449 r = r600_pcie_gart_init(rdev);
7450 if (r)
7451 return r;
7453 rdev->accel_working = true;
7454 r = cik_startup(rdev);
7455 if (r) {
7456 dev_err(rdev->dev, "disabling GPU acceleration\n");
7457 cik_cp_fini(rdev);
7458 cik_sdma_fini(rdev);
7459 cik_irq_fini(rdev);
7460 sumo_rlc_fini(rdev);
7461 cik_mec_fini(rdev);
7462 radeon_wb_fini(rdev);
7463 radeon_ib_pool_fini(rdev);
7464 radeon_vm_manager_fini(rdev);
7465 radeon_irq_kms_fini(rdev);
7466 cik_pcie_gart_fini(rdev);
7467 rdev->accel_working = false;
7470 /* Don't start up if the MC ucode is missing.
7471 * The default clocks and voltages before the MC ucode
7472 * is loaded are not suffient for advanced operations.
7474 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7475 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7476 return -EINVAL;
7479 return 0;
7483 * cik_fini - asic specific driver and hw fini
7485 * @rdev: radeon_device pointer
7487 * Tear down the asic specific driver variables and program the hw
7488 * to an idle state (CIK).
7489 * Called at driver unload.
7491 void cik_fini(struct radeon_device *rdev)
7493 cik_cp_fini(rdev);
7494 cik_sdma_fini(rdev);
7495 cik_fini_pg(rdev);
7496 cik_fini_cg(rdev);
7497 cik_irq_fini(rdev);
7498 sumo_rlc_fini(rdev);
7499 cik_mec_fini(rdev);
7500 radeon_wb_fini(rdev);
7501 radeon_vm_manager_fini(rdev);
7502 radeon_ib_pool_fini(rdev);
7503 radeon_irq_kms_fini(rdev);
7504 uvd_v1_0_fini(rdev);
7505 radeon_uvd_fini(rdev);
7506 cik_pcie_gart_fini(rdev);
7507 r600_vram_scratch_fini(rdev);
7508 radeon_gem_fini(rdev);
7509 radeon_fence_driver_fini(rdev);
7510 radeon_bo_fini(rdev);
7511 radeon_atombios_fini(rdev);
7512 kfree(rdev->bios);
7513 rdev->bios = NULL;
7516 /* display watermark setup */
7518 * dce8_line_buffer_adjust - Set up the line buffer
7520 * @rdev: radeon_device pointer
7521 * @radeon_crtc: the selected display controller
7522 * @mode: the current display mode on the selected display
7523 * controller
7525 * Setup up the line buffer allocation for
7526 * the selected display controller (CIK).
7527 * Returns the line buffer size in pixels.
7529 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
7530 struct radeon_crtc *radeon_crtc,
7531 struct drm_display_mode *mode)
7533 u32 tmp, buffer_alloc, i;
7534 u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
7536 * Line Buffer Setup
7537 * There are 6 line buffers, one for each display controllers.
7538 * There are 3 partitions per LB. Select the number of partitions
7539 * to enable based on the display width. For display widths larger
7540 * than 4096, you need use to use 2 display controllers and combine
7541 * them using the stereo blender.
7543 if (radeon_crtc->base.enabled && mode) {
7544 if (mode->crtc_hdisplay < 1920) {
7545 tmp = 1;
7546 buffer_alloc = 2;
7547 } else if (mode->crtc_hdisplay < 2560) {
7548 tmp = 2;
7549 buffer_alloc = 2;
7550 } else if (mode->crtc_hdisplay < 4096) {
7551 tmp = 0;
7552 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7553 } else {
7554 DRM_DEBUG_KMS("Mode too big for LB!\n");
7555 tmp = 0;
7556 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7558 } else {
7559 tmp = 1;
7560 buffer_alloc = 0;
7563 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
7564 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
7566 WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
7567 DMIF_BUFFERS_ALLOCATED(buffer_alloc));
7568 for (i = 0; i < rdev->usec_timeout; i++) {
7569 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
7570 DMIF_BUFFERS_ALLOCATED_COMPLETED)
7571 break;
7572 udelay(1);
7575 if (radeon_crtc->base.enabled && mode) {
7576 switch (tmp) {
7577 case 0:
7578 default:
7579 return 4096 * 2;
7580 case 1:
7581 return 1920 * 2;
7582 case 2:
7583 return 2560 * 2;
7587 /* controller not enabled, so no lb used */
7588 return 0;
7592 * cik_get_number_of_dram_channels - get the number of dram channels
7594 * @rdev: radeon_device pointer
7596 * Look up the number of video ram channels (CIK).
7597 * Used for display watermark bandwidth calculations
7598 * Returns the number of dram channels
7600 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
7602 u32 tmp = RREG32(MC_SHARED_CHMAP);
7604 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
7605 case 0:
7606 default:
7607 return 1;
7608 case 1:
7609 return 2;
7610 case 2:
7611 return 4;
7612 case 3:
7613 return 8;
7614 case 4:
7615 return 3;
7616 case 5:
7617 return 6;
7618 case 6:
7619 return 10;
7620 case 7:
7621 return 12;
7622 case 8:
7623 return 16;
7627 struct dce8_wm_params {
7628 u32 dram_channels; /* number of dram channels */
7629 u32 yclk; /* bandwidth per dram data pin in kHz */
7630 u32 sclk; /* engine clock in kHz */
7631 u32 disp_clk; /* display clock in kHz */
7632 u32 src_width; /* viewport width */
7633 u32 active_time; /* active display time in ns */
7634 u32 blank_time; /* blank time in ns */
7635 bool interlaced; /* mode is interlaced */
7636 fixed20_12 vsc; /* vertical scale ratio */
7637 u32 num_heads; /* number of active crtcs */
7638 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
7639 u32 lb_size; /* line buffer allocated to pipe */
7640 u32 vtaps; /* vertical scaler taps */
7644 * dce8_dram_bandwidth - get the dram bandwidth
7646 * @wm: watermark calculation data
7648 * Calculate the raw dram bandwidth (CIK).
7649 * Used for display watermark bandwidth calculations
7650 * Returns the dram bandwidth in MBytes/s
7652 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
7654 /* Calculate raw DRAM Bandwidth */
7655 fixed20_12 dram_efficiency; /* 0.7 */
7656 fixed20_12 yclk, dram_channels, bandwidth;
7657 fixed20_12 a;
7659 a.full = dfixed_const(1000);
7660 yclk.full = dfixed_const(wm->yclk);
7661 yclk.full = dfixed_div(yclk, a);
7662 dram_channels.full = dfixed_const(wm->dram_channels * 4);
7663 a.full = dfixed_const(10);
7664 dram_efficiency.full = dfixed_const(7);
7665 dram_efficiency.full = dfixed_div(dram_efficiency, a);
7666 bandwidth.full = dfixed_mul(dram_channels, yclk);
7667 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
7669 return dfixed_trunc(bandwidth);
7673 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
7675 * @wm: watermark calculation data
7677 * Calculate the dram bandwidth used for display (CIK).
7678 * Used for display watermark bandwidth calculations
7679 * Returns the dram bandwidth for display in MBytes/s
7681 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7683 /* Calculate DRAM Bandwidth and the part allocated to display. */
7684 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
7685 fixed20_12 yclk, dram_channels, bandwidth;
7686 fixed20_12 a;
7688 a.full = dfixed_const(1000);
7689 yclk.full = dfixed_const(wm->yclk);
7690 yclk.full = dfixed_div(yclk, a);
7691 dram_channels.full = dfixed_const(wm->dram_channels * 4);
7692 a.full = dfixed_const(10);
7693 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
7694 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
7695 bandwidth.full = dfixed_mul(dram_channels, yclk);
7696 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
7698 return dfixed_trunc(bandwidth);
7702 * dce8_data_return_bandwidth - get the data return bandwidth
7704 * @wm: watermark calculation data
7706 * Calculate the data return bandwidth used for display (CIK).
7707 * Used for display watermark bandwidth calculations
7708 * Returns the data return bandwidth in MBytes/s
7710 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
7712 /* Calculate the display Data return Bandwidth */
7713 fixed20_12 return_efficiency; /* 0.8 */
7714 fixed20_12 sclk, bandwidth;
7715 fixed20_12 a;
7717 a.full = dfixed_const(1000);
7718 sclk.full = dfixed_const(wm->sclk);
7719 sclk.full = dfixed_div(sclk, a);
7720 a.full = dfixed_const(10);
7721 return_efficiency.full = dfixed_const(8);
7722 return_efficiency.full = dfixed_div(return_efficiency, a);
7723 a.full = dfixed_const(32);
7724 bandwidth.full = dfixed_mul(a, sclk);
7725 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
7727 return dfixed_trunc(bandwidth);
7731 * dce8_dmif_request_bandwidth - get the dmif bandwidth
7733 * @wm: watermark calculation data
7735 * Calculate the dmif bandwidth used for display (CIK).
7736 * Used for display watermark bandwidth calculations
7737 * Returns the dmif bandwidth in MBytes/s
7739 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
7741 /* Calculate the DMIF Request Bandwidth */
7742 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
7743 fixed20_12 disp_clk, bandwidth;
7744 fixed20_12 a, b;
7746 a.full = dfixed_const(1000);
7747 disp_clk.full = dfixed_const(wm->disp_clk);
7748 disp_clk.full = dfixed_div(disp_clk, a);
7749 a.full = dfixed_const(32);
7750 b.full = dfixed_mul(a, disp_clk);
7752 a.full = dfixed_const(10);
7753 disp_clk_request_efficiency.full = dfixed_const(8);
7754 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
7756 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
7758 return dfixed_trunc(bandwidth);
7762 * dce8_available_bandwidth - get the min available bandwidth
7764 * @wm: watermark calculation data
7766 * Calculate the min available bandwidth used for display (CIK).
7767 * Used for display watermark bandwidth calculations
7768 * Returns the min available bandwidth in MBytes/s
7770 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
7772 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
7773 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
7774 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
7775 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
7777 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
7781 * dce8_average_bandwidth - get the average available bandwidth
7783 * @wm: watermark calculation data
7785 * Calculate the average available bandwidth used for display (CIK).
7786 * Used for display watermark bandwidth calculations
7787 * Returns the average available bandwidth in MBytes/s
7789 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
7791 /* Calculate the display mode Average Bandwidth
7792 * DisplayMode should contain the source and destination dimensions,
7793 * timing, etc.
7795 fixed20_12 bpp;
7796 fixed20_12 line_time;
7797 fixed20_12 src_width;
7798 fixed20_12 bandwidth;
7799 fixed20_12 a;
7801 a.full = dfixed_const(1000);
7802 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
7803 line_time.full = dfixed_div(line_time, a);
7804 bpp.full = dfixed_const(wm->bytes_per_pixel);
7805 src_width.full = dfixed_const(wm->src_width);
7806 bandwidth.full = dfixed_mul(src_width, bpp);
7807 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
7808 bandwidth.full = dfixed_div(bandwidth, line_time);
7810 return dfixed_trunc(bandwidth);
7814 * dce8_latency_watermark - get the latency watermark
7816 * @wm: watermark calculation data
7818 * Calculate the latency watermark (CIK).
7819 * Used for display watermark bandwidth calculations
7820 * Returns the latency watermark in ns
7822 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
7824 /* First calculate the latency in ns */
7825 u32 mc_latency = 2000; /* 2000 ns. */
7826 u32 available_bandwidth = dce8_available_bandwidth(wm);
7827 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
7828 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
7829 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
7830 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
7831 (wm->num_heads * cursor_line_pair_return_time);
7832 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
7833 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
7834 u32 tmp, dmif_size = 12288;
7835 fixed20_12 a, b, c;
7837 if (wm->num_heads == 0)
7838 return 0;
7840 a.full = dfixed_const(2);
7841 b.full = dfixed_const(1);
7842 if ((wm->vsc.full > a.full) ||
7843 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
7844 (wm->vtaps >= 5) ||
7845 ((wm->vsc.full >= a.full) && wm->interlaced))
7846 max_src_lines_per_dst_line = 4;
7847 else
7848 max_src_lines_per_dst_line = 2;
7850 a.full = dfixed_const(available_bandwidth);
7851 b.full = dfixed_const(wm->num_heads);
7852 a.full = dfixed_div(a, b);
7854 b.full = dfixed_const(mc_latency + 512);
7855 c.full = dfixed_const(wm->disp_clk);
7856 b.full = dfixed_div(b, c);
7858 c.full = dfixed_const(dmif_size);
7859 b.full = dfixed_div(c, b);
7861 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
7863 b.full = dfixed_const(1000);
7864 c.full = dfixed_const(wm->disp_clk);
7865 b.full = dfixed_div(c, b);
7866 c.full = dfixed_const(wm->bytes_per_pixel);
7867 b.full = dfixed_mul(b, c);
7869 lb_fill_bw = min(tmp, dfixed_trunc(b));
7871 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
7872 b.full = dfixed_const(1000);
7873 c.full = dfixed_const(lb_fill_bw);
7874 b.full = dfixed_div(c, b);
7875 a.full = dfixed_div(a, b);
7876 line_fill_time = dfixed_trunc(a);
7878 if (line_fill_time < wm->active_time)
7879 return latency;
7880 else
7881 return latency + (line_fill_time - wm->active_time);
7886 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
7887 * average and available dram bandwidth
7889 * @wm: watermark calculation data
7891 * Check if the display average bandwidth fits in the display
7892 * dram bandwidth (CIK).
7893 * Used for display watermark bandwidth calculations
7894 * Returns true if the display fits, false if not.
7896 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7898 if (dce8_average_bandwidth(wm) <=
7899 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
7900 return true;
7901 else
7902 return false;
7906 * dce8_average_bandwidth_vs_available_bandwidth - check
7907 * average and available bandwidth
7909 * @wm: watermark calculation data
7911 * Check if the display average bandwidth fits in the display
7912 * available bandwidth (CIK).
7913 * Used for display watermark bandwidth calculations
7914 * Returns true if the display fits, false if not.
7916 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
7918 if (dce8_average_bandwidth(wm) <=
7919 (dce8_available_bandwidth(wm) / wm->num_heads))
7920 return true;
7921 else
7922 return false;
7926 * dce8_check_latency_hiding - check latency hiding
7928 * @wm: watermark calculation data
7930 * Check latency hiding (CIK).
7931 * Used for display watermark bandwidth calculations
7932 * Returns true if the display fits, false if not.
7934 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
7936 u32 lb_partitions = wm->lb_size / wm->src_width;
7937 u32 line_time = wm->active_time + wm->blank_time;
7938 u32 latency_tolerant_lines;
7939 u32 latency_hiding;
7940 fixed20_12 a;
7942 a.full = dfixed_const(1);
7943 if (wm->vsc.full > a.full)
7944 latency_tolerant_lines = 1;
7945 else {
7946 if (lb_partitions <= (wm->vtaps + 1))
7947 latency_tolerant_lines = 1;
7948 else
7949 latency_tolerant_lines = 2;
7952 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
7954 if (dce8_latency_watermark(wm) <= latency_hiding)
7955 return true;
7956 else
7957 return false;
7961 * dce8_program_watermarks - program display watermarks
7963 * @rdev: radeon_device pointer
7964 * @radeon_crtc: the selected display controller
7965 * @lb_size: line buffer size
7966 * @num_heads: number of display controllers in use
7968 * Calculate and program the display watermarks for the
7969 * selected display controller (CIK).
7971 static void dce8_program_watermarks(struct radeon_device *rdev,
7972 struct radeon_crtc *radeon_crtc,
7973 u32 lb_size, u32 num_heads)
7975 struct drm_display_mode *mode = &radeon_crtc->base.mode;
7976 struct dce8_wm_params wm_low, wm_high;
7977 u32 pixel_period;
7978 u32 line_time = 0;
7979 u32 latency_watermark_a = 0, latency_watermark_b = 0;
7980 u32 tmp, wm_mask;
7982 if (radeon_crtc->base.enabled && num_heads && mode) {
7983 pixel_period = 1000000 / (u32)mode->clock;
7984 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
7986 /* watermark for high clocks */
7987 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7988 rdev->pm.dpm_enabled) {
7989 wm_high.yclk =
7990 radeon_dpm_get_mclk(rdev, false) * 10;
7991 wm_high.sclk =
7992 radeon_dpm_get_sclk(rdev, false) * 10;
7993 } else {
7994 wm_high.yclk = rdev->pm.current_mclk * 10;
7995 wm_high.sclk = rdev->pm.current_sclk * 10;
7998 wm_high.disp_clk = mode->clock;
7999 wm_high.src_width = mode->crtc_hdisplay;
8000 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8001 wm_high.blank_time = line_time - wm_high.active_time;
8002 wm_high.interlaced = false;
8003 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8004 wm_high.interlaced = true;
8005 wm_high.vsc = radeon_crtc->vsc;
8006 wm_high.vtaps = 1;
8007 if (radeon_crtc->rmx_type != RMX_OFF)
8008 wm_high.vtaps = 2;
8009 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8010 wm_high.lb_size = lb_size;
8011 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8012 wm_high.num_heads = num_heads;
8014 /* set for high clocks */
8015 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
8017 /* possibly force display priority to high */
8018 /* should really do this at mode validation time... */
8019 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8020 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8021 !dce8_check_latency_hiding(&wm_high) ||
8022 (rdev->disp_priority == 2)) {
8023 DRM_DEBUG_KMS("force priority to high\n");
8026 /* watermark for low clocks */
8027 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8028 rdev->pm.dpm_enabled) {
8029 wm_low.yclk =
8030 radeon_dpm_get_mclk(rdev, true) * 10;
8031 wm_low.sclk =
8032 radeon_dpm_get_sclk(rdev, true) * 10;
8033 } else {
8034 wm_low.yclk = rdev->pm.current_mclk * 10;
8035 wm_low.sclk = rdev->pm.current_sclk * 10;
8038 wm_low.disp_clk = mode->clock;
8039 wm_low.src_width = mode->crtc_hdisplay;
8040 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8041 wm_low.blank_time = line_time - wm_low.active_time;
8042 wm_low.interlaced = false;
8043 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8044 wm_low.interlaced = true;
8045 wm_low.vsc = radeon_crtc->vsc;
8046 wm_low.vtaps = 1;
8047 if (radeon_crtc->rmx_type != RMX_OFF)
8048 wm_low.vtaps = 2;
8049 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8050 wm_low.lb_size = lb_size;
8051 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8052 wm_low.num_heads = num_heads;
8054 /* set for low clocks */
8055 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
8057 /* possibly force display priority to high */
8058 /* should really do this at mode validation time... */
8059 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
8060 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
8061 !dce8_check_latency_hiding(&wm_low) ||
8062 (rdev->disp_priority == 2)) {
8063 DRM_DEBUG_KMS("force priority to high\n");
8067 /* select wm A */
8068 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8069 tmp = wm_mask;
8070 tmp &= ~LATENCY_WATERMARK_MASK(3);
8071 tmp |= LATENCY_WATERMARK_MASK(1);
8072 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8073 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8074 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
8075 LATENCY_HIGH_WATERMARK(line_time)));
8076 /* select wm B */
8077 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8078 tmp &= ~LATENCY_WATERMARK_MASK(3);
8079 tmp |= LATENCY_WATERMARK_MASK(2);
8080 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8081 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8082 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
8083 LATENCY_HIGH_WATERMARK(line_time)));
8084 /* restore original selection */
8085 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
8087 /* save values for DPM */
8088 radeon_crtc->line_time = line_time;
8089 radeon_crtc->wm_high = latency_watermark_a;
8090 radeon_crtc->wm_low = latency_watermark_b;
8094 * dce8_bandwidth_update - program display watermarks
8096 * @rdev: radeon_device pointer
8098 * Calculate and program the display watermarks and line
8099 * buffer allocation (CIK).
8101 void dce8_bandwidth_update(struct radeon_device *rdev)
8103 struct drm_display_mode *mode = NULL;
8104 u32 num_heads = 0, lb_size;
8105 int i;
8107 radeon_update_display_priority(rdev);
8109 for (i = 0; i < rdev->num_crtc; i++) {
8110 if (rdev->mode_info.crtcs[i]->base.enabled)
8111 num_heads++;
8113 for (i = 0; i < rdev->num_crtc; i++) {
8114 mode = &rdev->mode_info.crtcs[i]->base.mode;
8115 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8116 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8121 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8123 * @rdev: radeon_device pointer
8125 * Fetches a GPU clock counter snapshot (SI).
8126 * Returns the 64 bit clock counter snapshot.
8128 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8130 uint64_t clock;
8132 mutex_lock(&rdev->gpu_clock_mutex);
8133 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8134 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8135 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8136 mutex_unlock(&rdev->gpu_clock_mutex);
8137 return clock;
8140 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8141 u32 cntl_reg, u32 status_reg)
8143 int r, i;
8144 struct atom_clock_dividers dividers;
8145 uint32_t tmp;
8147 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8148 clock, false, &dividers);
8149 if (r)
8150 return r;
8152 tmp = RREG32_SMC(cntl_reg);
8153 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8154 tmp |= dividers.post_divider;
8155 WREG32_SMC(cntl_reg, tmp);
8157 for (i = 0; i < 100; i++) {
8158 if (RREG32_SMC(status_reg) & DCLK_STATUS)
8159 break;
8160 mdelay(10);
8162 if (i == 100)
8163 return -ETIMEDOUT;
8165 return 0;
8168 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8170 int r = 0;
8172 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8173 if (r)
8174 return r;
8176 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8177 return r;
8180 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8182 struct pci_dev *root = rdev->pdev->bus->self;
8183 int bridge_pos, gpu_pos;
8184 u32 speed_cntl, mask, current_data_rate;
8185 int ret, i;
8186 u16 tmp16;
8188 if (radeon_pcie_gen2 == 0)
8189 return;
8191 if (rdev->flags & RADEON_IS_IGP)
8192 return;
8194 if (!(rdev->flags & RADEON_IS_PCIE))
8195 return;
8197 ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8198 if (ret != 0)
8199 return;
8201 if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8202 return;
8204 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8205 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8206 LC_CURRENT_DATA_RATE_SHIFT;
8207 if (mask & DRM_PCIE_SPEED_80) {
8208 if (current_data_rate == 2) {
8209 DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8210 return;
8212 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8213 } else if (mask & DRM_PCIE_SPEED_50) {
8214 if (current_data_rate == 1) {
8215 DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8216 return;
8218 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8221 bridge_pos = pci_pcie_cap(root);
8222 if (!bridge_pos)
8223 return;
8225 gpu_pos = pci_pcie_cap(rdev->pdev);
8226 if (!gpu_pos)
8227 return;
8229 if (mask & DRM_PCIE_SPEED_80) {
8230 /* re-try equalization if gen3 is not already enabled */
8231 if (current_data_rate != 2) {
8232 u16 bridge_cfg, gpu_cfg;
8233 u16 bridge_cfg2, gpu_cfg2;
8234 u32 max_lw, current_lw, tmp;
8236 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8237 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8239 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8240 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8242 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8243 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8245 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8246 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8247 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8249 if (current_lw < max_lw) {
8250 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8251 if (tmp & LC_RENEGOTIATION_SUPPORT) {
8252 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8253 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8254 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8255 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8259 for (i = 0; i < 10; i++) {
8260 /* check status */
8261 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8262 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8263 break;
8265 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8266 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8268 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8269 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8271 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8272 tmp |= LC_SET_QUIESCE;
8273 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8275 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8276 tmp |= LC_REDO_EQ;
8277 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8279 mdelay(100);
8281 /* linkctl */
8282 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8283 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8284 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8285 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8287 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8288 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8289 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8290 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8292 /* linkctl2 */
8293 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8294 tmp16 &= ~((1 << 4) | (7 << 9));
8295 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8296 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8298 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8299 tmp16 &= ~((1 << 4) | (7 << 9));
8300 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8301 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8303 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8304 tmp &= ~LC_SET_QUIESCE;
8305 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8310 /* set the link speed */
8311 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8312 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8313 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8315 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8316 tmp16 &= ~0xf;
8317 if (mask & DRM_PCIE_SPEED_80)
8318 tmp16 |= 3; /* gen3 */
8319 else if (mask & DRM_PCIE_SPEED_50)
8320 tmp16 |= 2; /* gen2 */
8321 else
8322 tmp16 |= 1; /* gen1 */
8323 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8325 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8326 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8327 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8329 for (i = 0; i < rdev->usec_timeout; i++) {
8330 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8331 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8332 break;
8333 udelay(1);
8337 static void cik_program_aspm(struct radeon_device *rdev)
8339 u32 data, orig;
8340 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8341 bool disable_clkreq = false;
8343 if (radeon_aspm == 0)
8344 return;
8346 /* XXX double check IGPs */
8347 if (rdev->flags & RADEON_IS_IGP)
8348 return;
8350 if (!(rdev->flags & RADEON_IS_PCIE))
8351 return;
8353 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8354 data &= ~LC_XMIT_N_FTS_MASK;
8355 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8356 if (orig != data)
8357 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8359 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8360 data |= LC_GO_TO_RECOVERY;
8361 if (orig != data)
8362 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8364 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8365 data |= P_IGNORE_EDB_ERR;
8366 if (orig != data)
8367 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8369 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8370 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8371 data |= LC_PMI_TO_L1_DIS;
8372 if (!disable_l0s)
8373 data |= LC_L0S_INACTIVITY(7);
8375 if (!disable_l1) {
8376 data |= LC_L1_INACTIVITY(7);
8377 data &= ~LC_PMI_TO_L1_DIS;
8378 if (orig != data)
8379 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8381 if (!disable_plloff_in_l1) {
8382 bool clk_req_support;
8384 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8385 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8386 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8387 if (orig != data)
8388 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8390 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8391 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8392 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8393 if (orig != data)
8394 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8396 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8397 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8398 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8399 if (orig != data)
8400 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8402 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8403 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8404 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8405 if (orig != data)
8406 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8408 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8409 data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8410 data |= LC_DYN_LANES_PWR_STATE(3);
8411 if (orig != data)
8412 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8414 if (!disable_clkreq) {
8415 struct pci_dev *root = rdev->pdev->bus->self;
8416 u32 lnkcap;
8418 clk_req_support = false;
8419 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8420 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8421 clk_req_support = true;
8422 } else {
8423 clk_req_support = false;
8426 if (clk_req_support) {
8427 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8428 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8429 if (orig != data)
8430 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8432 orig = data = RREG32_SMC(THM_CLK_CNTL);
8433 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8434 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8435 if (orig != data)
8436 WREG32_SMC(THM_CLK_CNTL, data);
8438 orig = data = RREG32_SMC(MISC_CLK_CTRL);
8439 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8440 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8441 if (orig != data)
8442 WREG32_SMC(MISC_CLK_CTRL, data);
8444 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8445 data &= ~BCLK_AS_XCLK;
8446 if (orig != data)
8447 WREG32_SMC(CG_CLKPIN_CNTL, data);
8449 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8450 data &= ~FORCE_BIF_REFCLK_EN;
8451 if (orig != data)
8452 WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8454 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8455 data &= ~MPLL_CLKOUT_SEL_MASK;
8456 data |= MPLL_CLKOUT_SEL(4);
8457 if (orig != data)
8458 WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8461 } else {
8462 if (orig != data)
8463 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8466 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8467 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8468 if (orig != data)
8469 WREG32_PCIE_PORT(PCIE_CNTL2, data);
8471 if (!disable_l0s) {
8472 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8473 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8474 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8475 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8476 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8477 data &= ~LC_L0S_INACTIVITY_MASK;
8478 if (orig != data)
8479 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);