2 #include "nouveau_drv.h"
6 #include "pscnv_chan.h"
8 static int nv50_vm_map_kernel(struct pscnv_bo
*bo
);
9 static void nv50_vm_takedown(struct drm_device
*dev
);
10 static int nv50_vspace_do_unmap (struct pscnv_vspace
*vs
, uint64_t offset
, uint64_t length
);
13 nv50_vm_flush(struct drm_device
*dev
, int unit
) {
14 nv_wr32(dev
, 0x100c80, unit
<< 16 | 1);
15 if (!nouveau_wait_until(dev
, 2000000000ULL, 0x100c80, 1, 0)) {
16 NV_ERROR(dev
, "TLB flush fail on unit %d!\n", unit
);
22 static int nv50_vspace_tlb_flush (struct pscnv_vspace
*vs
) {
23 struct drm_nouveau_private
*dev_priv
= vs
->dev
->dev_private
;
25 nv50_vm_flush(vs
->dev
, 5); /* PFIFO always active */
26 for (i
= 0; i
< PSCNV_ENGINES_NUM
; i
++) {
27 struct pscnv_engine
*eng
= dev_priv
->engines
[i
];
28 if (nv50_vs(vs
)->engref
[i
])
29 if ((ret
= eng
->tlb_flush(eng
, vs
)))
36 nv50_vspace_fill_pd_slot (struct pscnv_vspace
*vs
, uint32_t pdenum
) {
37 struct drm_nouveau_private
*dev_priv
= vs
->dev
->dev_private
;
38 struct list_head
*pos
;
41 nv50_vs(vs
)->pt
[pdenum
] = pscnv_mem_alloc(vs
->dev
, NV50_VM_SPTE_COUNT
* 8, PSCNV_GEM_CONTIG
, 0, 0xa9e7ab1e);
42 if (!nv50_vs(vs
)->pt
[pdenum
]) {
47 nv50_vm_map_kernel(nv50_vs(vs
)->pt
[pdenum
]);
49 for (i
= 0; i
< NV50_VM_SPTE_COUNT
; i
++)
50 nv_wv32(nv50_vs(vs
)->pt
[pdenum
], i
* 8, 0);
52 if (dev_priv
->chipset
== 0x50)
53 chan_pd
= NV50_CHAN_PD
;
55 chan_pd
= NV84_CHAN_PD
;
57 list_for_each(pos
, &nv50_vs(vs
)->chan_list
) {
58 struct pscnv_chan
*ch
= list_entry(pos
, struct pscnv_chan
, vspace_list
);
59 uint64_t pde
= nv50_vs(vs
)->pt
[pdenum
]->start
| 3;
60 nv_wv32(ch
->bo
, chan_pd
+ pdenum
* 8 + 4, pde
>> 32);
61 nv_wv32(ch
->bo
, chan_pd
+ pdenum
* 8, pde
);
67 nv50_vspace_place_map (struct pscnv_vspace
*vs
, struct pscnv_bo
*bo
,
68 uint64_t start
, uint64_t end
, int back
,
69 struct pscnv_mm_node
**res
) {
70 return pscnv_mm_alloc(vs
->mm
, bo
->size
, back
?PSCNV_MM_FROMBACK
:0, start
, end
, res
);
73 static int nv50_vspace_map_contig_range (struct pscnv_vspace
*vs
, uint64_t offset
, uint64_t pte
, uint64_t size
, int lp
) {
75 /* XXX: add LP support */
78 uint32_t pgnum
= offset
/ 0x1000;
79 uint32_t pdenum
= pgnum
/ NV50_VM_SPTE_COUNT
;
80 uint32_t ptenum
= pgnum
% NV50_VM_SPTE_COUNT
;
83 while (lev
< 7 && size
>= (0x1000 << (lev
+ 1)) && !(offset
& (1 << (lev
+ 12))))
85 if (!nv50_vs(vs
)->pt
[pdenum
])
86 if ((ret
= nv50_vspace_fill_pd_slot (vs
, pdenum
)))
88 for (i
= 0; i
< (1 << lev
); i
++) {
89 nv_wv32(nv50_vs(vs
)->pt
[pdenum
], (ptenum
+ i
) * 8 + 4, pte
>> 32);
90 nv_wv32(nv50_vs(vs
)->pt
[pdenum
], (ptenum
+ i
) * 8, pte
| lev
<< 7);
91 if (pscnv_vm_debug
>= 3)
92 NV_INFO(vs
->dev
, "VM: [%08x][%08x] = %016llx\n", pdenum
, ptenum
+ i
, pte
| lev
<< 7);
94 size
-= (0x1000 << lev
);
95 offset
+= (0x1000 << lev
);
96 pte
+= (0x1000 << lev
);
102 nv50_vspace_do_map (struct pscnv_vspace
*vs
, struct pscnv_bo
*bo
, uint64_t offset
) {
103 struct drm_nouveau_private
*dev_priv
= vs
->dev
->dev_private
;
104 struct pscnv_mm_node
*n
;
107 switch (bo
->flags
& PSCNV_GEM_MEMTYPE_MASK
) {
108 case PSCNV_GEM_VRAM_SMALL
:
109 case PSCNV_GEM_VRAM_LARGE
:
110 for (n
= bo
->mmnode
; n
; n
= n
->next
) {
111 /* XXX: add LP support */
112 uint64_t pte
= n
->start
;
113 if (dev_priv
->chipset
== 0xaa || dev_priv
->chipset
== 0xac || dev_priv
->chipset
== 0xaf) {
114 pte
+= dev_priv
->vram_sys_base
;
117 pte
|= (uint64_t)bo
->tile_flags
<< 40;
118 pte
|= 1; /* present */
119 if ((ret
= nv50_vspace_map_contig_range(vs
, offset
+ roff
, pte
, n
->size
, 0))) {
120 nv50_vspace_do_unmap (vs
, offset
, bo
->size
);
126 case PSCNV_GEM_SYSRAM_SNOOP
:
127 case PSCNV_GEM_SYSRAM_NOSNOOP
:
128 for (i
= 0; i
< (bo
->size
>> PAGE_SHIFT
); i
++) {
129 uint64_t pte
= bo
->dmapages
[i
];
130 pte
|= (uint64_t)bo
->tile_flags
<< 40;
132 if ((bo
->flags
& PSCNV_GEM_MEMTYPE_MASK
) == PSCNV_GEM_SYSRAM_SNOOP
)
136 if ((ret
= nv50_vspace_map_contig_range(vs
, offset
+ roff
, pte
, PAGE_SIZE
, 0))) {
137 nv50_vspace_do_unmap (vs
, offset
, bo
->size
);
146 dev_priv
->vm
->bar_flush(vs
->dev
);
151 nv50_vspace_do_unmap (struct pscnv_vspace
*vs
, uint64_t offset
, uint64_t length
) {
152 struct drm_nouveau_private
*dev_priv
= vs
->dev
->dev_private
;
154 uint32_t pgnum
= offset
/ 0x1000;
155 uint32_t pdenum
= pgnum
/ NV50_VM_SPTE_COUNT
;
156 uint32_t ptenum
= pgnum
% NV50_VM_SPTE_COUNT
;
157 if (nv50_vs(vs
)->pt
[pdenum
]) {
158 nv_wv32(nv50_vs(vs
)->pt
[pdenum
], ptenum
* 8, 0);
163 dev_priv
->vm
->bar_flush(vs
->dev
);
165 return nv50_vm_flush(vs
->dev
, 6);
167 nv50_vspace_tlb_flush(vs
);
172 static int nv50_vspace_new(struct pscnv_vspace
*vs
) {
175 /* XXX: could actually use it some day... */
176 if (vs
->size
> 1ull << 40)
179 vs
->engdata
= kzalloc(sizeof(struct nv50_vspace
), GFP_KERNEL
);
181 NV_ERROR(vs
->dev
, "VM: Couldn't alloc vspace eng\n");
184 INIT_LIST_HEAD(&nv50_vs(vs
)->chan_list
);
185 ret
= pscnv_mm_init(vs
->dev
, 0, vs
->size
, 0x1000, 0x10000, 0x20000000, &vs
->mm
);
191 static void nv50_vspace_free(struct pscnv_vspace
*vs
) {
193 for (i
= 0; i
< NV50_VM_PDE_COUNT
; i
++) {
194 if (nv50_vs(vs
)->pt
[i
]) {
195 pscnv_mem_free(nv50_vs(vs
)->pt
[i
]);
201 static int nv50_vm_map_user(struct pscnv_bo
*bo
) {
202 struct drm_nouveau_private
*dev_priv
= bo
->dev
->dev_private
;
203 struct nv50_vm_engine
*vme
= nv50_vm(dev_priv
->vm
);
206 return pscnv_vspace_map(vme
->barvm
, bo
, 0, dev_priv
->fb_size
, 0, &bo
->map1
);
209 static int nv50_vm_map_kernel(struct pscnv_bo
*bo
) {
210 struct drm_nouveau_private
*dev_priv
= bo
->dev
->dev_private
;
211 struct nv50_vm_engine
*vme
= nv50_vm(dev_priv
->vm
);
214 return pscnv_vspace_map(vme
->barvm
, bo
, dev_priv
->fb_size
, dev_priv
->fb_size
+ dev_priv
->ramin_size
, 0, &bo
->map3
);
218 nv50_vm_bar_flush(struct drm_device
*dev
) {
219 nv_wr32(dev
, 0x330c, 1);
220 if (!nouveau_wait_until(dev
, 2000000000ULL, 0x330c, 2, 0)) {
221 NV_ERROR(dev
, "BAR flush timeout!\n");
226 nv84_vm_bar_flush(struct drm_device
*dev
) {
227 nv_wr32(dev
, 0x70000, 1);
228 if (!nouveau_wait_until(dev
, 2000000000ULL, 0x70000, 2, 0)) {
229 NV_ERROR(dev
, "BAR flush timeout!\n");
234 nv50_vm_init(struct drm_device
*dev
) {
235 struct drm_nouveau_private
*dev_priv
= dev
->dev_private
;
236 int bar1dma
, bar3dma
;
237 struct nv50_vm_engine
*vme
= kzalloc(sizeof *vme
, GFP_KERNEL
);
239 NV_ERROR(dev
, "VM: Couldn't alloc engine\n");
242 vme
->base
.takedown
= nv50_vm_takedown
;
243 vme
->base
.do_vspace_new
= nv50_vspace_new
;
244 vme
->base
.do_vspace_free
= nv50_vspace_free
;
245 vme
->base
.place_map
= nv50_vspace_place_map
;
246 vme
->base
.do_map
= nv50_vspace_do_map
;
247 vme
->base
.do_unmap
= nv50_vspace_do_unmap
;
248 vme
->base
.map_user
= nv50_vm_map_user
;
249 vme
->base
.map_kernel
= nv50_vm_map_kernel
;
250 if (dev_priv
->chipset
== 0x50)
251 vme
->base
.bar_flush
= nv50_vm_bar_flush
;
253 vme
->base
.bar_flush
= nv84_vm_bar_flush
;
254 dev_priv
->vm
= &vme
->base
;
256 dev_priv
->vm_ramin_base
= dev_priv
->fb_size
;
257 spin_lock_init(&dev_priv
->vm
->vs_lock
);
259 /* This is needed to get meaningful information from 100c90
260 * on traps. No idea what these values mean exactly. */
261 switch (dev_priv
->chipset
) {
263 nv_wr32(dev
, 0x100c90, 0x0707ff);
269 nv_wr32(dev
, 0x100c90, 0x0d0fff);
272 nv_wr32(dev
, 0x100c90, 0x1d07ff);
275 vme
->barvm
= pscnv_vspace_new (dev
, dev_priv
->fb_size
+ dev_priv
->ramin_size
, 0, 1);
281 vme
->barch
= pscnv_chan_new (dev
, vme
->barvm
, 1);
283 pscnv_vspace_unref(vme
->barvm
);
288 nv_wr32(dev
, 0x1704, 0x40000000 | vme
->barch
->bo
->start
>> 12);
289 bar1dma
= nv50_chan_dmaobj_new(vme
->barch
, 0x7fc00000, 0, dev_priv
->fb_size
);
290 bar3dma
= nv50_chan_dmaobj_new(vme
->barch
, 0x7fc00000, dev_priv
->fb_size
, dev_priv
->ramin_size
);
291 nv_wr32(dev
, 0x1708, 0x80000000 | bar1dma
>> 4);
292 nv_wr32(dev
, 0x170c, 0x80000000 | bar3dma
>> 4);
294 nv50_vm_map_kernel(vme
->barch
->bo
);
295 nv50_vm_map_kernel(nv50_vs(vme
->barvm
)->pt
[0]);
300 nv50_vm_takedown(struct drm_device
*dev
) {
301 struct drm_nouveau_private
*dev_priv
= dev
->dev_private
;
302 struct nv50_vm_engine
*vme
= nv50_vm(dev_priv
->vm
);
305 nv_wr32(dev
, 0x1708, 0);
306 nv_wr32(dev
, 0x170c, 0);
307 nv_wr32(dev
, 0x1710, 0);
308 nv_wr32(dev
, 0x1704, 0);
309 pscnv_chan_unref(vme
->barch
);
310 pscnv_vspace_unref(vme
->barvm
);
315 /* VM trap handling on NV50 is some kind of a fucking joke.
317 * So, there's this little bugger called MMU, which is in PFB area near
318 * 0x100c80 and contains registers to flush the TLB caches, and to report
321 * And you have several units making use of that MMU. The known ones atm
322 * include PGRAPH, PFIFO, the BARs, and the PEEPHOLE. Each of these has its
323 * own TLBs. And most of them have several subunits, each having a separate
326 * Now, if you use an address that is bad in some way, the MMU responds "NO
327 * PAGE!!!11!1". And stores the relevant address + unit + channel into
328 * 0x100c90 area, where you can read it. However, it does NOT report an
329 * interrupt - this is done by the faulting unit.
331 * Now, if you get several page faults at once, which is not that uncommon
332 * if you fuck up something in your code, all but the first trap is lost.
333 * The unit reporting the trap may or may not also store the address on its
336 * So we report the trap in two pieces. First we go through all the possible
337 * faulters and report their status, which may range anywhere from full access
338 * info [like TPDMA] to just "oh! a trap!" [like VFETCH]. Then we ask the MMU
339 * for whatever trap it remembers. Then the user can look at dmesg and maybe
340 * match them using the MMU status field. Which we should decode someday, but
343 * As for the Holy Grail of Demand Paging - hah. Who the hell knows. Given the
344 * fucked up reporting, the only hope lies in getting all individual units to
345 * cooperate. BAR accesses quite obviously cannot be demand paged [not a big
346 * problem - that's what host page tables are for]. PFIFO accesses all seem
347 * restartable just fine. As for PGRAPH... some, like TPDMA, are already dead
348 * when they happen, but maybe there's a DEBUG bit somewhere that changes it.
349 * Some others, like M2MF, hang on fault, and are therefore promising. But
350 * this requires shitloads of RE repeated for every unit. Have fun.
354 struct pscnv_enumval
{
360 static struct pscnv_enumval vm_trap_reasons
[] = {
361 { 0, "PT_NOT_PRESENT", 0},
362 { 1, "PT_TOO_SHORT", 0 },
363 { 2, "PAGE_NOT_PRESENT", 0 },
364 { 3, "PAGE_SYSTEM_ONLY", 0 },
365 { 4, "PAGE_READ_ONLY", 0 },
367 { 6, "NULL_DMAOBJ", 0 },
368 { 7, "WRONG_MEMTYPE", 0 },
369 /* 8-0xa never seen */
370 { 0xb, "VRAM_LIMIT", 0 },
371 /* 0xc-0xe never seen */
372 { 0xf, "DMAOBJ_LIMIT", 0 },
376 static struct pscnv_enumval vm_dispatch_subsubunits
[] = {
383 { 6, "M2M_NOTIFY", 0 },
387 static struct pscnv_enumval vm_ccache_subsubunits
[] = {
394 static struct pscnv_enumval vm_tprop_subsubunits
[] = {
405 { 0xa, "GLOBAL", 0 },
411 static struct pscnv_enumval vm_pgraph_subunits
[] = {
413 { 3, "DISPATCH", vm_dispatch_subsubunits
},
414 { 5, "CCACHE", vm_ccache_subsubunits
},
417 { 0xa, "TEXTURE", 0 },
418 { 0xb, "TPROP", vm_tprop_subsubunits
},
422 static struct pscnv_enumval vm_crypt_subsubunits
[] = {
430 static struct pscnv_enumval vm_pcrypt_subunits
[] = {
431 { 0xe, "CRYPT", vm_crypt_subsubunits
},
435 static struct pscnv_enumval vm_pfifo_subsubunits
[] = {
437 { 1, "SEMAPHORE", 0 },
438 /* 3 seen. also on semaphore. but couldn't reproduce. */
442 static struct pscnv_enumval vm_pfifo_subunits
[] = {
444 { 8, "FIFO", vm_pfifo_subsubunits
},
448 static struct pscnv_enumval vm_peephole_subunits
[] = {
449 /* even more curious. */
455 static struct pscnv_enumval vm_bar_subsubunits
[] = {
461 static struct pscnv_enumval vm_bar_subunits
[] = {
462 /* even more curious. */
463 { 4, "WRITE", vm_bar_subsubunits
},
464 { 8, "READ", vm_bar_subsubunits
},
465 /* 0xa also seen. some kind of write. */
469 static struct pscnv_enumval vm_units
[] = {
470 { 0, "PGRAPH", vm_pgraph_subunits
},
472 /* 2, 3 never seen */
473 { 4, "PEEPHOLE", vm_peephole_subunits
},
474 { 5, "PFIFO", vm_pfifo_subunits
},
475 { 6, "BAR", vm_bar_subunits
},
479 { 0xa, "PCRYPT", vm_pcrypt_subunits
},
480 /* 0xb, 0xc never seen */
482 { 0xe, "PDAEMON", 0 },
486 static struct pscnv_enumval
*pscnv_enum_find (struct pscnv_enumval
*list
, int val
) {
487 while (list
->value
!= val
&& list
->name
)
495 void nv50_vm_trap(struct drm_device
*dev
) {
496 struct drm_nouveau_private
*dev_priv
= dev
->dev_private
;
499 uint32_t idx
= nv_rd32(dev
, 0x100c90);
500 uint32_t s0
, s1
, s2
, s3
;
505 struct pscnv_enumval
*ev
;
507 if (idx
& 0x80000000) {
509 for (i
= 0; i
< 6; i
++) {
510 nv_wr32(dev
, 0x100c90, idx
| i
<< 24);
511 trap
[i
] = nv_rd32(dev
, 0x100c94);
513 if (dev_priv
->chipset
< 0xa3 || (dev_priv
->chipset
>= 0xaa && dev_priv
->chipset
<= 0xac)) {
515 s1
= (trap
[0] >> 4) & 0xf;
516 s2
= (trap
[0] >> 8) & 0xf;
517 s3
= (trap
[0] >> 12) & 0xf;
520 s1
= (trap
[0] >> 8) & 0xff;
521 s2
= (trap
[0] >> 16) & 0xff;
522 s3
= (trap
[0] >> 24) & 0xff;
524 ev
= pscnv_enum_find(vm_trap_reasons
, s1
);
526 snprintf(reason
, sizeof(reason
), "%s", ev
->name
);
528 snprintf(reason
, sizeof(reason
), "0x%x", s1
);
529 ev
= pscnv_enum_find(vm_units
, s0
);
531 snprintf(unit1
, sizeof(unit1
), "%s", ev
->name
);
533 snprintf(unit1
, sizeof(unit1
), "0x%x", s0
);
534 if (ev
&& (ev
= ev
->data
) && (ev
= pscnv_enum_find(ev
, s2
)))
535 snprintf(unit2
, sizeof(unit2
), "%s", ev
->name
);
537 snprintf(unit2
, sizeof(unit2
), "0x%x", s2
);
538 if (ev
&& (ev
= ev
->data
) && (ev
= pscnv_enum_find(ev
, s3
)))
539 snprintf(unit3
, sizeof(unit3
), "%s", ev
->name
);
541 snprintf(unit3
, sizeof(unit3
), "0x%x", s3
);
542 chan
= pscnv_chan_handle_lookup(dev
, trap
[2] << 16 | trap
[1]);
544 NV_INFO(dev
, "VM: Trapped %s at %02x%04x%04x ch %d on %s/%s/%s, reason %s\n",
545 (trap
[5]&0x100?"read":"write"),
546 trap
[5]&0xff, trap
[4]&0xffff,
547 trap
[3]&0xffff, chan
, unit1
, unit2
, unit3
, reason
);
549 NV_INFO(dev
, "VM: Trapped %s at %02x%04x%04x UNKNOWN ch %08x on %s/%s/%s, reason %s\n",
550 (trap
[5]&0x100?"read":"write"),
551 trap
[5]&0xff, trap
[4]&0xffff,
552 trap
[3]&0xffff, trap
[2] << 16 | trap
[1], unit1
, unit2
, unit3
, reason
);
554 nv_wr32(dev
, 0x100c90, idx
| 0x80000000);