1 // SPDX-License-Identifier: MIT
3 * Copyright © 2020 Intel Corporation
6 #include <linux/log2.h>
8 #include "gen6_ppgtt.h"
9 #include "i915_scatterlist.h"
10 #include "i915_trace.h"
11 #include "i915_vgpu.h"
14 /* Write pde (index) from the page directory @pd to the page table @pt */
15 static inline void gen6_write_pde(const struct gen6_ppgtt
*ppgtt
,
16 const unsigned int pde
,
17 const struct i915_page_table
*pt
)
19 dma_addr_t addr
= pt
? px_dma(pt
) : px_dma(ppgtt
->base
.vm
.scratch
[1]);
21 /* Caller needs to make sure the write completes if necessary */
22 iowrite32(GEN6_PDE_ADDR_ENCODE(addr
) | GEN6_PDE_VALID
,
23 ppgtt
->pd_addr
+ pde
);
26 void gen7_ppgtt_enable(struct intel_gt
*gt
)
28 struct drm_i915_private
*i915
= gt
->i915
;
29 struct intel_uncore
*uncore
= gt
->uncore
;
30 struct intel_engine_cs
*engine
;
31 enum intel_engine_id id
;
34 intel_uncore_rmw(uncore
, GAC_ECO_BITS
, 0, ECOBITS_PPGTT_CACHE64B
);
36 ecochk
= intel_uncore_read(uncore
, GAM_ECOCHK
);
37 if (IS_HASWELL(i915
)) {
38 ecochk
|= ECOCHK_PPGTT_WB_HSW
;
40 ecochk
|= ECOCHK_PPGTT_LLC_IVB
;
41 ecochk
&= ~ECOCHK_PPGTT_GFDT_IVB
;
43 intel_uncore_write(uncore
, GAM_ECOCHK
, ecochk
);
45 for_each_engine(engine
, gt
, id
) {
46 /* GFX_MODE is per-ring on gen7+ */
49 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE
));
53 void gen6_ppgtt_enable(struct intel_gt
*gt
)
55 struct intel_uncore
*uncore
= gt
->uncore
;
57 intel_uncore_rmw(uncore
,
60 ECOBITS_SNB_BIT
| ECOBITS_PPGTT_CACHE64B
);
62 intel_uncore_rmw(uncore
,
65 GAB_CTL_CONT_AFTER_PAGEFAULT
);
67 intel_uncore_rmw(uncore
,
70 ECOCHK_SNB_BIT
| ECOCHK_PPGTT_CACHE64B
);
72 if (HAS_PPGTT(uncore
->i915
)) /* may be disabled for VT-d */
73 intel_uncore_write(uncore
,
75 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE
));
78 /* PPGTT support for Sandybdrige/Gen6 and later */
79 static void gen6_ppgtt_clear_range(struct i915_address_space
*vm
,
80 u64 start
, u64 length
)
82 struct gen6_ppgtt
* const ppgtt
= to_gen6_ppgtt(i915_vm_to_ppgtt(vm
));
83 const unsigned int first_entry
= start
/ I915_GTT_PAGE_SIZE
;
84 const gen6_pte_t scratch_pte
= vm
->scratch
[0]->encode
;
85 unsigned int pde
= first_entry
/ GEN6_PTES
;
86 unsigned int pte
= first_entry
% GEN6_PTES
;
87 unsigned int num_entries
= length
/ I915_GTT_PAGE_SIZE
;
90 struct i915_page_table
* const pt
=
91 i915_pt_entry(ppgtt
->base
.pd
, pde
++);
92 const unsigned int count
= min(num_entries
, GEN6_PTES
- pte
);
97 GEM_BUG_ON(count
> atomic_read(&pt
->used
));
98 if (!atomic_sub_return(count
, &pt
->used
))
99 ppgtt
->scan_for_unused_pt
= true;
102 * Note that the hw doesn't support removing PDE on the fly
103 * (they are cached inside the context with no means to
104 * invalidate the cache), so we can only reset the PTE
105 * entries back to scratch.
108 vaddr
= kmap_atomic_px(pt
);
109 memset32(vaddr
+ pte
, scratch_pte
, count
);
110 kunmap_atomic(vaddr
);
116 static void gen6_ppgtt_insert_entries(struct i915_address_space
*vm
,
117 struct i915_vma
*vma
,
118 enum i915_cache_level cache_level
,
121 struct i915_ppgtt
*ppgtt
= i915_vm_to_ppgtt(vm
);
122 struct i915_page_directory
* const pd
= ppgtt
->pd
;
123 unsigned int first_entry
= vma
->node
.start
/ I915_GTT_PAGE_SIZE
;
124 unsigned int act_pt
= first_entry
/ GEN6_PTES
;
125 unsigned int act_pte
= first_entry
% GEN6_PTES
;
126 const u32 pte_encode
= vm
->pte_encode(0, cache_level
, flags
);
127 struct sgt_dma iter
= sgt_dma(vma
);
130 GEM_BUG_ON(!pd
->entry
[act_pt
]);
132 vaddr
= kmap_atomic_px(i915_pt_entry(pd
, act_pt
));
134 GEM_BUG_ON(sg_dma_len(iter
.sg
) < I915_GTT_PAGE_SIZE
);
135 vaddr
[act_pte
] = pte_encode
| GEN6_PTE_ADDR_ENCODE(iter
.dma
);
137 iter
.dma
+= I915_GTT_PAGE_SIZE
;
138 if (iter
.dma
== iter
.max
) {
139 iter
.sg
= __sg_next(iter
.sg
);
140 if (!iter
.sg
|| sg_dma_len(iter
.sg
) == 0)
143 iter
.dma
= sg_dma_address(iter
.sg
);
144 iter
.max
= iter
.dma
+ sg_dma_len(iter
.sg
);
147 if (++act_pte
== GEN6_PTES
) {
148 kunmap_atomic(vaddr
);
149 vaddr
= kmap_atomic_px(i915_pt_entry(pd
, ++act_pt
));
153 kunmap_atomic(vaddr
);
155 vma
->page_sizes
.gtt
= I915_GTT_PAGE_SIZE
;
158 static void gen6_flush_pd(struct gen6_ppgtt
*ppgtt
, u64 start
, u64 end
)
160 struct i915_page_directory
* const pd
= ppgtt
->base
.pd
;
161 struct i915_page_table
*pt
;
164 start
= round_down(start
, SZ_64K
);
165 end
= round_up(end
, SZ_64K
) - start
;
167 mutex_lock(&ppgtt
->flush
);
169 gen6_for_each_pde(pt
, pd
, start
, end
, pde
)
170 gen6_write_pde(ppgtt
, pde
, pt
);
173 ioread32(ppgtt
->pd_addr
+ pde
- 1);
174 gen6_ggtt_invalidate(ppgtt
->base
.vm
.gt
->ggtt
);
177 mutex_unlock(&ppgtt
->flush
);
180 static void gen6_alloc_va_range(struct i915_address_space
*vm
,
181 struct i915_vm_pt_stash
*stash
,
182 u64 start
, u64 length
)
184 struct gen6_ppgtt
*ppgtt
= to_gen6_ppgtt(i915_vm_to_ppgtt(vm
));
185 struct i915_page_directory
* const pd
= ppgtt
->base
.pd
;
186 struct i915_page_table
*pt
;
191 spin_lock(&pd
->lock
);
192 gen6_for_each_pde(pt
, pd
, start
, length
, pde
) {
193 const unsigned int count
= gen6_pte_count(start
, length
);
196 spin_unlock(&pd
->lock
);
199 __i915_gem_object_pin_pages(pt
->base
);
200 i915_gem_object_make_unshrinkable(pt
->base
);
202 fill32_px(pt
, vm
->scratch
[0]->encode
);
204 spin_lock(&pd
->lock
);
205 if (!pd
->entry
[pde
]) {
206 stash
->pt
[0] = pt
->stash
;
207 atomic_set(&pt
->used
, 0);
216 atomic_add(count
, &pt
->used
);
218 spin_unlock(&pd
->lock
);
220 if (flush
&& i915_vma_is_bound(ppgtt
->vma
, I915_VMA_GLOBAL_BIND
)) {
221 intel_wakeref_t wakeref
;
223 with_intel_runtime_pm(&vm
->i915
->runtime_pm
, wakeref
)
224 gen6_flush_pd(ppgtt
, from
, start
);
228 static int gen6_ppgtt_init_scratch(struct gen6_ppgtt
*ppgtt
)
230 struct i915_address_space
* const vm
= &ppgtt
->base
.vm
;
233 ret
= setup_scratch_page(vm
);
237 vm
->scratch
[0]->encode
=
238 vm
->pte_encode(px_dma(vm
->scratch
[0]),
239 I915_CACHE_NONE
, PTE_READ_ONLY
);
241 vm
->scratch
[1] = vm
->alloc_pt_dma(vm
, I915_GTT_PAGE_SIZE_4K
);
242 if (IS_ERR(vm
->scratch
[1])) {
243 ret
= PTR_ERR(vm
->scratch
[1]);
247 ret
= pin_pt_dma(vm
, vm
->scratch
[1]);
251 fill32_px(vm
->scratch
[1], vm
->scratch
[0]->encode
);
256 i915_gem_object_put(vm
->scratch
[1]);
258 i915_gem_object_put(vm
->scratch
[0]);
262 static void gen6_ppgtt_free_pd(struct gen6_ppgtt
*ppgtt
)
264 struct i915_page_directory
* const pd
= ppgtt
->base
.pd
;
265 struct i915_page_table
*pt
;
268 gen6_for_all_pdes(pt
, pd
, pde
)
270 free_pt(&ppgtt
->base
.vm
, pt
);
273 static void gen6_ppgtt_cleanup(struct i915_address_space
*vm
)
275 struct gen6_ppgtt
*ppgtt
= to_gen6_ppgtt(i915_vm_to_ppgtt(vm
));
277 __i915_vma_put(ppgtt
->vma
);
279 gen6_ppgtt_free_pd(ppgtt
);
282 mutex_destroy(&ppgtt
->flush
);
283 mutex_destroy(&ppgtt
->pin_mutex
);
285 free_pd(&ppgtt
->base
.vm
, ppgtt
->base
.pd
);
288 static int pd_vma_set_pages(struct i915_vma
*vma
)
290 vma
->pages
= ERR_PTR(-ENODEV
);
294 static void pd_vma_clear_pages(struct i915_vma
*vma
)
296 GEM_BUG_ON(!vma
->pages
);
301 static void pd_vma_bind(struct i915_address_space
*vm
,
302 struct i915_vm_pt_stash
*stash
,
303 struct i915_vma
*vma
,
304 enum i915_cache_level cache_level
,
307 struct i915_ggtt
*ggtt
= i915_vm_to_ggtt(vm
);
308 struct gen6_ppgtt
*ppgtt
= vma
->private;
309 u32 ggtt_offset
= i915_ggtt_offset(vma
) / I915_GTT_PAGE_SIZE
;
311 ppgtt
->pp_dir
= ggtt_offset
* sizeof(gen6_pte_t
) << 10;
312 ppgtt
->pd_addr
= (gen6_pte_t __iomem
*)ggtt
->gsm
+ ggtt_offset
;
314 gen6_flush_pd(ppgtt
, 0, ppgtt
->base
.vm
.total
);
317 static void pd_vma_unbind(struct i915_address_space
*vm
, struct i915_vma
*vma
)
319 struct gen6_ppgtt
*ppgtt
= vma
->private;
320 struct i915_page_directory
* const pd
= ppgtt
->base
.pd
;
321 struct i915_page_table
*pt
;
324 if (!ppgtt
->scan_for_unused_pt
)
327 /* Free all no longer used page tables */
328 gen6_for_all_pdes(pt
, ppgtt
->base
.pd
, pde
) {
329 if (!pt
|| atomic_read(&pt
->used
))
332 free_pt(&ppgtt
->base
.vm
, pt
);
333 pd
->entry
[pde
] = NULL
;
336 ppgtt
->scan_for_unused_pt
= false;
339 static const struct i915_vma_ops pd_vma_ops
= {
340 .set_pages
= pd_vma_set_pages
,
341 .clear_pages
= pd_vma_clear_pages
,
342 .bind_vma
= pd_vma_bind
,
343 .unbind_vma
= pd_vma_unbind
,
346 static struct i915_vma
*pd_vma_create(struct gen6_ppgtt
*ppgtt
, int size
)
348 struct i915_ggtt
*ggtt
= ppgtt
->base
.vm
.gt
->ggtt
;
349 struct i915_vma
*vma
;
351 GEM_BUG_ON(!IS_ALIGNED(size
, I915_GTT_PAGE_SIZE
));
352 GEM_BUG_ON(size
> ggtt
->vm
.total
);
354 vma
= i915_vma_alloc();
356 return ERR_PTR(-ENOMEM
);
358 i915_active_init(&vma
->active
, NULL
, NULL
);
360 kref_init(&vma
->ref
);
361 mutex_init(&vma
->pages_mutex
);
362 vma
->vm
= i915_vm_get(&ggtt
->vm
);
363 vma
->ops
= &pd_vma_ops
;
364 vma
->private = ppgtt
;
367 vma
->fence_size
= size
;
368 atomic_set(&vma
->flags
, I915_VMA_GGTT
);
369 vma
->ggtt_view
.type
= I915_GGTT_VIEW_ROTATED
; /* prevent fencing */
371 INIT_LIST_HEAD(&vma
->obj_link
);
372 INIT_LIST_HEAD(&vma
->closed_link
);
377 int gen6_ppgtt_pin(struct i915_ppgtt
*base
, struct i915_gem_ww_ctx
*ww
)
379 struct gen6_ppgtt
*ppgtt
= to_gen6_ppgtt(base
);
382 GEM_BUG_ON(!atomic_read(&ppgtt
->base
.vm
.open
));
385 * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt
386 * which will be pinned into every active context.
387 * (When vma->pin_count becomes atomic, I expect we will naturally
388 * need a larger, unpacked, type and kill this redundancy.)
390 if (atomic_add_unless(&ppgtt
->pin_count
, 1, 0))
393 if (mutex_lock_interruptible(&ppgtt
->pin_mutex
))
397 * PPGTT PDEs reside in the GGTT and consists of 512 entries. The
398 * allocator works in address space sizes, so it's multiplied by page
399 * size. We allocate at the top of the GTT to avoid fragmentation.
402 if (!atomic_read(&ppgtt
->pin_count
))
403 err
= i915_ggtt_pin(ppgtt
->vma
, ww
, GEN6_PD_ALIGN
, PIN_HIGH
);
405 atomic_inc(&ppgtt
->pin_count
);
406 mutex_unlock(&ppgtt
->pin_mutex
);
411 void gen6_ppgtt_unpin(struct i915_ppgtt
*base
)
413 struct gen6_ppgtt
*ppgtt
= to_gen6_ppgtt(base
);
415 GEM_BUG_ON(!atomic_read(&ppgtt
->pin_count
));
416 if (atomic_dec_and_test(&ppgtt
->pin_count
))
417 i915_vma_unpin(ppgtt
->vma
);
420 void gen6_ppgtt_unpin_all(struct i915_ppgtt
*base
)
422 struct gen6_ppgtt
*ppgtt
= to_gen6_ppgtt(base
);
424 if (!atomic_read(&ppgtt
->pin_count
))
427 i915_vma_unpin(ppgtt
->vma
);
428 atomic_set(&ppgtt
->pin_count
, 0);
431 struct i915_ppgtt
*gen6_ppgtt_create(struct intel_gt
*gt
)
433 struct i915_ggtt
* const ggtt
= gt
->ggtt
;
434 struct gen6_ppgtt
*ppgtt
;
437 ppgtt
= kzalloc(sizeof(*ppgtt
), GFP_KERNEL
);
439 return ERR_PTR(-ENOMEM
);
441 mutex_init(&ppgtt
->flush
);
442 mutex_init(&ppgtt
->pin_mutex
);
444 ppgtt_init(&ppgtt
->base
, gt
);
445 ppgtt
->base
.vm
.pd_shift
= ilog2(SZ_4K
* SZ_4K
/ sizeof(gen6_pte_t
));
446 ppgtt
->base
.vm
.top
= 1;
448 ppgtt
->base
.vm
.bind_async_flags
= I915_VMA_LOCAL_BIND
;
449 ppgtt
->base
.vm
.allocate_va_range
= gen6_alloc_va_range
;
450 ppgtt
->base
.vm
.clear_range
= gen6_ppgtt_clear_range
;
451 ppgtt
->base
.vm
.insert_entries
= gen6_ppgtt_insert_entries
;
452 ppgtt
->base
.vm
.cleanup
= gen6_ppgtt_cleanup
;
454 ppgtt
->base
.vm
.alloc_pt_dma
= alloc_pt_dma
;
455 ppgtt
->base
.vm
.pte_encode
= ggtt
->vm
.pte_encode
;
457 ppgtt
->base
.pd
= __alloc_pd(I915_PDES
);
458 if (!ppgtt
->base
.pd
) {
463 err
= gen6_ppgtt_init_scratch(ppgtt
);
467 ppgtt
->vma
= pd_vma_create(ppgtt
, GEN6_PD_SIZE
);
468 if (IS_ERR(ppgtt
->vma
)) {
469 err
= PTR_ERR(ppgtt
->vma
);
476 free_scratch(&ppgtt
->base
.vm
);
478 free_pd(&ppgtt
->base
.vm
, ppgtt
->base
.pd
);
480 mutex_destroy(&ppgtt
->pin_mutex
);