1 // SPDX-License-Identifier: MIT
3 * Copyright © 2020 Intel Corporation
6 #include <linux/log2.h>
8 #include "gen6_ppgtt.h"
9 #include "i915_scatterlist.h"
10 #include "i915_trace.h"
11 #include "i915_vgpu.h"
14 /* Write pde (index) from the page directory @pd to the page table @pt */
15 static inline void gen6_write_pde(const struct gen6_ppgtt
*ppgtt
,
16 const unsigned int pde
,
17 const struct i915_page_table
*pt
)
19 /* Caller needs to make sure the write completes if necessary */
20 iowrite32(GEN6_PDE_ADDR_ENCODE(px_dma(pt
)) | GEN6_PDE_VALID
,
21 ppgtt
->pd_addr
+ pde
);
24 void gen7_ppgtt_enable(struct intel_gt
*gt
)
26 struct drm_i915_private
*i915
= gt
->i915
;
27 struct intel_uncore
*uncore
= gt
->uncore
;
28 struct intel_engine_cs
*engine
;
29 enum intel_engine_id id
;
32 intel_uncore_rmw(uncore
, GAC_ECO_BITS
, 0, ECOBITS_PPGTT_CACHE64B
);
34 ecochk
= intel_uncore_read(uncore
, GAM_ECOCHK
);
35 if (IS_HASWELL(i915
)) {
36 ecochk
|= ECOCHK_PPGTT_WB_HSW
;
38 ecochk
|= ECOCHK_PPGTT_LLC_IVB
;
39 ecochk
&= ~ECOCHK_PPGTT_GFDT_IVB
;
41 intel_uncore_write(uncore
, GAM_ECOCHK
, ecochk
);
43 for_each_engine(engine
, gt
, id
) {
44 /* GFX_MODE is per-ring on gen7+ */
47 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE
));
51 void gen6_ppgtt_enable(struct intel_gt
*gt
)
53 struct intel_uncore
*uncore
= gt
->uncore
;
55 intel_uncore_rmw(uncore
,
58 ECOBITS_SNB_BIT
| ECOBITS_PPGTT_CACHE64B
);
60 intel_uncore_rmw(uncore
,
63 GAB_CTL_CONT_AFTER_PAGEFAULT
);
65 intel_uncore_rmw(uncore
,
68 ECOCHK_SNB_BIT
| ECOCHK_PPGTT_CACHE64B
);
70 if (HAS_PPGTT(uncore
->i915
)) /* may be disabled for VT-d */
71 intel_uncore_write(uncore
,
73 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE
));
76 /* PPGTT support for Sandybdrige/Gen6 and later */
77 static void gen6_ppgtt_clear_range(struct i915_address_space
*vm
,
78 u64 start
, u64 length
)
80 struct gen6_ppgtt
* const ppgtt
= to_gen6_ppgtt(i915_vm_to_ppgtt(vm
));
81 const unsigned int first_entry
= start
/ I915_GTT_PAGE_SIZE
;
82 const gen6_pte_t scratch_pte
= vm
->scratch
[0].encode
;
83 unsigned int pde
= first_entry
/ GEN6_PTES
;
84 unsigned int pte
= first_entry
% GEN6_PTES
;
85 unsigned int num_entries
= length
/ I915_GTT_PAGE_SIZE
;
88 struct i915_page_table
* const pt
=
89 i915_pt_entry(ppgtt
->base
.pd
, pde
++);
90 const unsigned int count
= min(num_entries
, GEN6_PTES
- pte
);
93 GEM_BUG_ON(px_base(pt
) == px_base(&vm
->scratch
[1]));
97 GEM_BUG_ON(count
> atomic_read(&pt
->used
));
98 if (!atomic_sub_return(count
, &pt
->used
))
99 ppgtt
->scan_for_unused_pt
= true;
102 * Note that the hw doesn't support removing PDE on the fly
103 * (they are cached inside the context with no means to
104 * invalidate the cache), so we can only reset the PTE
105 * entries back to scratch.
108 vaddr
= kmap_atomic_px(pt
);
109 memset32(vaddr
+ pte
, scratch_pte
, count
);
110 kunmap_atomic(vaddr
);
116 static void gen6_ppgtt_insert_entries(struct i915_address_space
*vm
,
117 struct i915_vma
*vma
,
118 enum i915_cache_level cache_level
,
121 struct i915_ppgtt
*ppgtt
= i915_vm_to_ppgtt(vm
);
122 struct i915_page_directory
* const pd
= ppgtt
->pd
;
123 unsigned int first_entry
= vma
->node
.start
/ I915_GTT_PAGE_SIZE
;
124 unsigned int act_pt
= first_entry
/ GEN6_PTES
;
125 unsigned int act_pte
= first_entry
% GEN6_PTES
;
126 const u32 pte_encode
= vm
->pte_encode(0, cache_level
, flags
);
127 struct sgt_dma iter
= sgt_dma(vma
);
130 GEM_BUG_ON(pd
->entry
[act_pt
] == &vm
->scratch
[1]);
132 vaddr
= kmap_atomic_px(i915_pt_entry(pd
, act_pt
));
134 GEM_BUG_ON(iter
.sg
->length
< I915_GTT_PAGE_SIZE
);
135 vaddr
[act_pte
] = pte_encode
| GEN6_PTE_ADDR_ENCODE(iter
.dma
);
137 iter
.dma
+= I915_GTT_PAGE_SIZE
;
138 if (iter
.dma
== iter
.max
) {
139 iter
.sg
= __sg_next(iter
.sg
);
143 iter
.dma
= sg_dma_address(iter
.sg
);
144 iter
.max
= iter
.dma
+ iter
.sg
->length
;
147 if (++act_pte
== GEN6_PTES
) {
148 kunmap_atomic(vaddr
);
149 vaddr
= kmap_atomic_px(i915_pt_entry(pd
, ++act_pt
));
153 kunmap_atomic(vaddr
);
155 vma
->page_sizes
.gtt
= I915_GTT_PAGE_SIZE
;
158 static void gen6_flush_pd(struct gen6_ppgtt
*ppgtt
, u64 start
, u64 end
)
160 struct i915_page_directory
* const pd
= ppgtt
->base
.pd
;
161 struct i915_page_table
*pt
;
164 start
= round_down(start
, SZ_64K
);
165 end
= round_up(end
, SZ_64K
) - start
;
167 mutex_lock(&ppgtt
->flush
);
169 gen6_for_each_pde(pt
, pd
, start
, end
, pde
)
170 gen6_write_pde(ppgtt
, pde
, pt
);
173 ioread32(ppgtt
->pd_addr
+ pde
- 1);
174 gen6_ggtt_invalidate(ppgtt
->base
.vm
.gt
->ggtt
);
177 mutex_unlock(&ppgtt
->flush
);
180 static int gen6_alloc_va_range(struct i915_address_space
*vm
,
181 u64 start
, u64 length
)
183 struct gen6_ppgtt
*ppgtt
= to_gen6_ppgtt(i915_vm_to_ppgtt(vm
));
184 struct i915_page_directory
* const pd
= ppgtt
->base
.pd
;
185 struct i915_page_table
*pt
, *alloc
= NULL
;
186 intel_wakeref_t wakeref
;
191 wakeref
= intel_runtime_pm_get(&vm
->i915
->runtime_pm
);
193 spin_lock(&pd
->lock
);
194 gen6_for_each_pde(pt
, pd
, start
, length
, pde
) {
195 const unsigned int count
= gen6_pte_count(start
, length
);
197 if (px_base(pt
) == px_base(&vm
->scratch
[1])) {
198 spin_unlock(&pd
->lock
);
200 pt
= fetch_and_zero(&alloc
);
208 fill32_px(pt
, vm
->scratch
[0].encode
);
210 spin_lock(&pd
->lock
);
211 if (pd
->entry
[pde
] == &vm
->scratch
[1]) {
219 atomic_add(count
, &pt
->used
);
221 spin_unlock(&pd
->lock
);
223 if (i915_vma_is_bound(ppgtt
->vma
, I915_VMA_GLOBAL_BIND
))
224 gen6_flush_pd(ppgtt
, from
, start
);
229 gen6_ppgtt_clear_range(vm
, from
, start
- from
);
233 intel_runtime_pm_put(&vm
->i915
->runtime_pm
, wakeref
);
237 static int gen6_ppgtt_init_scratch(struct gen6_ppgtt
*ppgtt
)
239 struct i915_address_space
* const vm
= &ppgtt
->base
.vm
;
240 struct i915_page_directory
* const pd
= ppgtt
->base
.pd
;
243 ret
= setup_scratch_page(vm
, __GFP_HIGHMEM
);
247 vm
->scratch
[0].encode
=
248 vm
->pte_encode(px_dma(&vm
->scratch
[0]),
249 I915_CACHE_NONE
, PTE_READ_ONLY
);
251 if (unlikely(setup_page_dma(vm
, px_base(&vm
->scratch
[1])))) {
252 cleanup_scratch_page(vm
);
256 fill32_px(&vm
->scratch
[1], vm
->scratch
[0].encode
);
257 memset_p(pd
->entry
, &vm
->scratch
[1], I915_PDES
);
262 static void gen6_ppgtt_free_pd(struct gen6_ppgtt
*ppgtt
)
264 struct i915_page_directory
* const pd
= ppgtt
->base
.pd
;
265 struct i915_page_dma
* const scratch
=
266 px_base(&ppgtt
->base
.vm
.scratch
[1]);
267 struct i915_page_table
*pt
;
270 gen6_for_all_pdes(pt
, pd
, pde
)
271 if (px_base(pt
) != scratch
)
272 free_px(&ppgtt
->base
.vm
, pt
);
275 static void gen6_ppgtt_cleanup(struct i915_address_space
*vm
)
277 struct gen6_ppgtt
*ppgtt
= to_gen6_ppgtt(i915_vm_to_ppgtt(vm
));
279 __i915_vma_put(ppgtt
->vma
);
281 gen6_ppgtt_free_pd(ppgtt
);
284 mutex_destroy(&ppgtt
->flush
);
285 mutex_destroy(&ppgtt
->pin_mutex
);
286 kfree(ppgtt
->base
.pd
);
289 static int pd_vma_set_pages(struct i915_vma
*vma
)
291 vma
->pages
= ERR_PTR(-ENODEV
);
295 static void pd_vma_clear_pages(struct i915_vma
*vma
)
297 GEM_BUG_ON(!vma
->pages
);
302 static int pd_vma_bind(struct i915_vma
*vma
,
303 enum i915_cache_level cache_level
,
306 struct i915_ggtt
*ggtt
= i915_vm_to_ggtt(vma
->vm
);
307 struct gen6_ppgtt
*ppgtt
= vma
->private;
308 u32 ggtt_offset
= i915_ggtt_offset(vma
) / I915_GTT_PAGE_SIZE
;
310 px_base(ppgtt
->base
.pd
)->ggtt_offset
= ggtt_offset
* sizeof(gen6_pte_t
);
311 ppgtt
->pd_addr
= (gen6_pte_t __iomem
*)ggtt
->gsm
+ ggtt_offset
;
313 gen6_flush_pd(ppgtt
, 0, ppgtt
->base
.vm
.total
);
317 static void pd_vma_unbind(struct i915_vma
*vma
)
319 struct gen6_ppgtt
*ppgtt
= vma
->private;
320 struct i915_page_directory
* const pd
= ppgtt
->base
.pd
;
321 struct i915_page_dma
* const scratch
=
322 px_base(&ppgtt
->base
.vm
.scratch
[1]);
323 struct i915_page_table
*pt
;
326 if (!ppgtt
->scan_for_unused_pt
)
329 /* Free all no longer used page tables */
330 gen6_for_all_pdes(pt
, ppgtt
->base
.pd
, pde
) {
331 if (px_base(pt
) == scratch
|| atomic_read(&pt
->used
))
334 free_px(&ppgtt
->base
.vm
, pt
);
335 pd
->entry
[pde
] = scratch
;
338 ppgtt
->scan_for_unused_pt
= false;
341 static const struct i915_vma_ops pd_vma_ops
= {
342 .set_pages
= pd_vma_set_pages
,
343 .clear_pages
= pd_vma_clear_pages
,
344 .bind_vma
= pd_vma_bind
,
345 .unbind_vma
= pd_vma_unbind
,
348 static struct i915_vma
*pd_vma_create(struct gen6_ppgtt
*ppgtt
, int size
)
350 struct i915_ggtt
*ggtt
= ppgtt
->base
.vm
.gt
->ggtt
;
351 struct i915_vma
*vma
;
353 GEM_BUG_ON(!IS_ALIGNED(size
, I915_GTT_PAGE_SIZE
));
354 GEM_BUG_ON(size
> ggtt
->vm
.total
);
356 vma
= i915_vma_alloc();
358 return ERR_PTR(-ENOMEM
);
360 i915_active_init(&vma
->active
, NULL
, NULL
);
362 kref_init(&vma
->ref
);
363 mutex_init(&vma
->pages_mutex
);
364 vma
->vm
= i915_vm_get(&ggtt
->vm
);
365 vma
->ops
= &pd_vma_ops
;
366 vma
->private = ppgtt
;
369 vma
->fence_size
= size
;
370 atomic_set(&vma
->flags
, I915_VMA_GGTT
);
371 vma
->ggtt_view
.type
= I915_GGTT_VIEW_ROTATED
; /* prevent fencing */
373 INIT_LIST_HEAD(&vma
->obj_link
);
374 INIT_LIST_HEAD(&vma
->closed_link
);
379 int gen6_ppgtt_pin(struct i915_ppgtt
*base
)
381 struct gen6_ppgtt
*ppgtt
= to_gen6_ppgtt(base
);
384 GEM_BUG_ON(!atomic_read(&ppgtt
->base
.vm
.open
));
387 * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt
388 * which will be pinned into every active context.
389 * (When vma->pin_count becomes atomic, I expect we will naturally
390 * need a larger, unpacked, type and kill this redundancy.)
392 if (atomic_add_unless(&ppgtt
->pin_count
, 1, 0))
395 if (mutex_lock_interruptible(&ppgtt
->pin_mutex
))
399 * PPGTT PDEs reside in the GGTT and consists of 512 entries. The
400 * allocator works in address space sizes, so it's multiplied by page
401 * size. We allocate at the top of the GTT to avoid fragmentation.
404 if (!atomic_read(&ppgtt
->pin_count
))
405 err
= i915_ggtt_pin(ppgtt
->vma
, GEN6_PD_ALIGN
, PIN_HIGH
);
407 atomic_inc(&ppgtt
->pin_count
);
408 mutex_unlock(&ppgtt
->pin_mutex
);
413 void gen6_ppgtt_unpin(struct i915_ppgtt
*base
)
415 struct gen6_ppgtt
*ppgtt
= to_gen6_ppgtt(base
);
417 GEM_BUG_ON(!atomic_read(&ppgtt
->pin_count
));
418 if (atomic_dec_and_test(&ppgtt
->pin_count
))
419 i915_vma_unpin(ppgtt
->vma
);
422 void gen6_ppgtt_unpin_all(struct i915_ppgtt
*base
)
424 struct gen6_ppgtt
*ppgtt
= to_gen6_ppgtt(base
);
426 if (!atomic_read(&ppgtt
->pin_count
))
429 i915_vma_unpin(ppgtt
->vma
);
430 atomic_set(&ppgtt
->pin_count
, 0);
433 struct i915_ppgtt
*gen6_ppgtt_create(struct intel_gt
*gt
)
435 struct i915_ggtt
* const ggtt
= gt
->ggtt
;
436 struct gen6_ppgtt
*ppgtt
;
439 ppgtt
= kzalloc(sizeof(*ppgtt
), GFP_KERNEL
);
441 return ERR_PTR(-ENOMEM
);
443 mutex_init(&ppgtt
->flush
);
444 mutex_init(&ppgtt
->pin_mutex
);
446 ppgtt_init(&ppgtt
->base
, gt
);
447 ppgtt
->base
.vm
.top
= 1;
449 ppgtt
->base
.vm
.bind_async_flags
= I915_VMA_LOCAL_BIND
;
450 ppgtt
->base
.vm
.allocate_va_range
= gen6_alloc_va_range
;
451 ppgtt
->base
.vm
.clear_range
= gen6_ppgtt_clear_range
;
452 ppgtt
->base
.vm
.insert_entries
= gen6_ppgtt_insert_entries
;
453 ppgtt
->base
.vm
.cleanup
= gen6_ppgtt_cleanup
;
455 ppgtt
->base
.vm
.pte_encode
= ggtt
->vm
.pte_encode
;
457 ppgtt
->base
.pd
= __alloc_pd(sizeof(*ppgtt
->base
.pd
));
458 if (!ppgtt
->base
.pd
) {
463 err
= gen6_ppgtt_init_scratch(ppgtt
);
467 ppgtt
->vma
= pd_vma_create(ppgtt
, GEN6_PD_SIZE
);
468 if (IS_ERR(ppgtt
->vma
)) {
469 err
= PTR_ERR(ppgtt
->vma
);
476 free_scratch(&ppgtt
->base
.vm
);
478 kfree(ppgtt
->base
.pd
);
480 mutex_destroy(&ppgtt
->pin_mutex
);