1 // SPDX-License-Identifier: GPL-2.0 OR MIT
2 /**************************************************************************
4 * Copyright 2019 VMware, Inc., Palo Alto, CA., USA
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
27 #include "vmwgfx_drv.h"
30 * Different methods for tracking dirty:
31 * VMW_BO_DIRTY_PAGETABLE - Scan the pagetable for hardware dirty bits
32 * VMW_BO_DIRTY_MKWRITE - Write-protect page table entries and record write-
33 * accesses in the VM mkwrite() callback
35 enum vmw_bo_dirty_method
{
36 VMW_BO_DIRTY_PAGETABLE
,
41 * No dirtied pages at scan trigger a transition to the _MKWRITE method,
42 * similarly a certain percentage of dirty pages trigger a transition to
43 * the _PAGETABLE method. How many triggers should we wait for before
46 #define VMW_DIRTY_NUM_CHANGE_TRIGGERS 2
48 /* Percentage to trigger a transition to the _PAGETABLE method */
49 #define VMW_DIRTY_PERCENTAGE 10
52 * struct vmw_bo_dirty - Dirty information for buffer objects
53 * @start: First currently dirty bit
54 * @end: Last currently dirty bit + 1
55 * @method: The currently used dirty method
56 * @change_count: Number of consecutive method change triggers
57 * @ref_count: Reference count for this structure
58 * @bitmap_size: The size of the bitmap in bits. Typically equal to the
59 * nuber of pages in the bo.
60 * @size: The accounting size for this struct.
61 * @bitmap: A bitmap where each bit represents a page. A set bit means a
67 enum vmw_bo_dirty_method method
;
68 unsigned int change_count
;
69 unsigned int ref_count
;
70 unsigned long bitmap_size
;
72 unsigned long bitmap
[];
76 * vmw_bo_dirty_scan_pagetable - Perform a pagetable scan for dirty bits
77 * @vbo: The buffer object to scan
79 * Scans the pagetable for dirty bits. Clear those bits and modify the
80 * dirty structure with the results. This function may change the
81 * dirty-tracking method.
83 static void vmw_bo_dirty_scan_pagetable(struct vmw_buffer_object
*vbo
)
85 struct vmw_bo_dirty
*dirty
= vbo
->dirty
;
86 pgoff_t offset
= drm_vma_node_start(&vbo
->base
.base
.vma_node
);
87 struct address_space
*mapping
= vbo
->base
.bdev
->dev_mapping
;
90 num_marked
= clean_record_shared_mapping_range
92 offset
, dirty
->bitmap_size
,
93 offset
, &dirty
->bitmap
[0],
94 &dirty
->start
, &dirty
->end
);
96 dirty
->change_count
++;
98 dirty
->change_count
= 0;
100 if (dirty
->change_count
> VMW_DIRTY_NUM_CHANGE_TRIGGERS
) {
101 dirty
->change_count
= 0;
102 dirty
->method
= VMW_BO_DIRTY_MKWRITE
;
103 wp_shared_mapping_range(mapping
,
104 offset
, dirty
->bitmap_size
);
105 clean_record_shared_mapping_range(mapping
,
106 offset
, dirty
->bitmap_size
,
107 offset
, &dirty
->bitmap
[0],
108 &dirty
->start
, &dirty
->end
);
113 * vmw_bo_dirty_scan_mkwrite - Reset the mkwrite dirty-tracking method
114 * @vbo: The buffer object to scan
116 * Write-protect pages written to so that consecutive write accesses will
117 * trigger a call to mkwrite.
119 * This function may change the dirty-tracking method.
121 static void vmw_bo_dirty_scan_mkwrite(struct vmw_buffer_object
*vbo
)
123 struct vmw_bo_dirty
*dirty
= vbo
->dirty
;
124 unsigned long offset
= drm_vma_node_start(&vbo
->base
.base
.vma_node
);
125 struct address_space
*mapping
= vbo
->base
.bdev
->dev_mapping
;
128 if (dirty
->end
<= dirty
->start
)
131 num_marked
= wp_shared_mapping_range(vbo
->base
.bdev
->dev_mapping
,
132 dirty
->start
+ offset
,
133 dirty
->end
- dirty
->start
);
135 if (100UL * num_marked
/ dirty
->bitmap_size
>
136 VMW_DIRTY_PERCENTAGE
) {
137 dirty
->change_count
++;
139 dirty
->change_count
= 0;
142 if (dirty
->change_count
> VMW_DIRTY_NUM_CHANGE_TRIGGERS
) {
144 pgoff_t end
= dirty
->bitmap_size
;
146 dirty
->method
= VMW_BO_DIRTY_PAGETABLE
;
147 clean_record_shared_mapping_range(mapping
, offset
, end
, offset
,
150 bitmap_clear(&dirty
->bitmap
[0], 0, dirty
->bitmap_size
);
151 if (dirty
->start
< dirty
->end
)
152 bitmap_set(&dirty
->bitmap
[0], dirty
->start
,
153 dirty
->end
- dirty
->start
);
154 dirty
->change_count
= 0;
159 * vmw_bo_dirty_scan - Scan for dirty pages and add them to the dirty
161 * @vbo: The buffer object to scan
163 * This function may change the dirty tracking method.
165 void vmw_bo_dirty_scan(struct vmw_buffer_object
*vbo
)
167 struct vmw_bo_dirty
*dirty
= vbo
->dirty
;
169 if (dirty
->method
== VMW_BO_DIRTY_PAGETABLE
)
170 vmw_bo_dirty_scan_pagetable(vbo
);
172 vmw_bo_dirty_scan_mkwrite(vbo
);
176 * vmw_bo_dirty_pre_unmap - write-protect and pick up dirty pages before
177 * an unmap_mapping_range operation.
178 * @vbo: The buffer object,
179 * @start: First page of the range within the buffer object.
180 * @end: Last page of the range within the buffer object + 1.
182 * If we're using the _PAGETABLE scan method, we may leak dirty pages
183 * when calling unmap_mapping_range(). This function makes sure we pick
184 * up all dirty pages.
186 static void vmw_bo_dirty_pre_unmap(struct vmw_buffer_object
*vbo
,
187 pgoff_t start
, pgoff_t end
)
189 struct vmw_bo_dirty
*dirty
= vbo
->dirty
;
190 unsigned long offset
= drm_vma_node_start(&vbo
->base
.base
.vma_node
);
191 struct address_space
*mapping
= vbo
->base
.bdev
->dev_mapping
;
193 if (dirty
->method
!= VMW_BO_DIRTY_PAGETABLE
|| start
>= end
)
196 wp_shared_mapping_range(mapping
, start
+ offset
, end
- start
);
197 clean_record_shared_mapping_range(mapping
, start
+ offset
,
199 &dirty
->bitmap
[0], &dirty
->start
,
204 * vmw_bo_dirty_unmap - Clear all ptes pointing to a range within a bo
205 * @vbo: The buffer object,
206 * @start: First page of the range within the buffer object.
207 * @end: Last page of the range within the buffer object + 1.
209 * This is similar to ttm_bo_unmap_virtual() except it takes a subrange.
211 void vmw_bo_dirty_unmap(struct vmw_buffer_object
*vbo
,
212 pgoff_t start
, pgoff_t end
)
214 unsigned long offset
= drm_vma_node_start(&vbo
->base
.base
.vma_node
);
215 struct address_space
*mapping
= vbo
->base
.bdev
->dev_mapping
;
217 vmw_bo_dirty_pre_unmap(vbo
, start
, end
);
218 unmap_shared_mapping_range(mapping
, (offset
+ start
) << PAGE_SHIFT
,
219 (loff_t
) (end
- start
) << PAGE_SHIFT
);
223 * vmw_bo_dirty_add - Add a dirty-tracking user to a buffer object
224 * @vbo: The buffer object
226 * This function registers a dirty-tracking user to a buffer object.
227 * A user can be for example a resource or a vma in a special user-space
230 * Return: Zero on success, -ENOMEM on memory allocation failure.
232 int vmw_bo_dirty_add(struct vmw_buffer_object
*vbo
)
234 struct vmw_bo_dirty
*dirty
= vbo
->dirty
;
235 pgoff_t num_pages
= vbo
->base
.num_pages
;
236 size_t size
, acc_size
;
238 static struct ttm_operation_ctx ctx
= {
239 .interruptible
= false,
248 size
= sizeof(*dirty
) + BITS_TO_LONGS(num_pages
) * sizeof(long);
249 acc_size
= ttm_round_pot(size
);
250 ret
= ttm_mem_global_alloc(&ttm_mem_glob
, acc_size
, &ctx
);
252 VMW_DEBUG_USER("Out of graphics memory for buffer object "
256 dirty
= kvzalloc(size
, GFP_KERNEL
);
262 dirty
->size
= acc_size
;
263 dirty
->bitmap_size
= num_pages
;
264 dirty
->start
= dirty
->bitmap_size
;
266 dirty
->ref_count
= 1;
267 if (num_pages
< PAGE_SIZE
/ sizeof(pte_t
)) {
268 dirty
->method
= VMW_BO_DIRTY_PAGETABLE
;
270 struct address_space
*mapping
= vbo
->base
.bdev
->dev_mapping
;
271 pgoff_t offset
= drm_vma_node_start(&vbo
->base
.base
.vma_node
);
273 dirty
->method
= VMW_BO_DIRTY_MKWRITE
;
275 /* Write-protect and then pick up already dirty bits */
276 wp_shared_mapping_range(mapping
, offset
, num_pages
);
277 clean_record_shared_mapping_range(mapping
, offset
, num_pages
,
280 &dirty
->start
, &dirty
->end
);
288 ttm_mem_global_free(&ttm_mem_glob
, acc_size
);
293 * vmw_bo_dirty_release - Release a dirty-tracking user from a buffer object
294 * @vbo: The buffer object
296 * This function releases a dirty-tracking user from a buffer object.
297 * If the reference count reaches zero, then the dirty-tracking object is
298 * freed and the pointer to it cleared.
300 * Return: Zero on success, -ENOMEM on memory allocation failure.
302 void vmw_bo_dirty_release(struct vmw_buffer_object
*vbo
)
304 struct vmw_bo_dirty
*dirty
= vbo
->dirty
;
306 if (dirty
&& --dirty
->ref_count
== 0) {
307 size_t acc_size
= dirty
->size
;
310 ttm_mem_global_free(&ttm_mem_glob
, acc_size
);
316 * vmw_bo_dirty_transfer_to_res - Pick up a resource's dirty region from
320 * This function will pick up all dirty ranges affecting the resource from
321 * it's backup mob, and call vmw_resource_dirty_update() once for each
322 * range. The transferred ranges will be cleared from the backing mob's
325 void vmw_bo_dirty_transfer_to_res(struct vmw_resource
*res
)
327 struct vmw_buffer_object
*vbo
= res
->backup
;
328 struct vmw_bo_dirty
*dirty
= vbo
->dirty
;
329 pgoff_t start
, cur
, end
;
330 unsigned long res_start
= res
->backup_offset
;
331 unsigned long res_end
= res
->backup_offset
+ res
->backup_size
;
333 WARN_ON_ONCE(res_start
& ~PAGE_MASK
);
334 res_start
>>= PAGE_SHIFT
;
335 res_end
= DIV_ROUND_UP(res_end
, PAGE_SIZE
);
337 if (res_start
>= dirty
->end
|| res_end
<= dirty
->start
)
340 cur
= max(res_start
, dirty
->start
);
341 res_end
= max(res_end
, dirty
->end
);
342 while (cur
< res_end
) {
345 start
= find_next_bit(&dirty
->bitmap
[0], res_end
, cur
);
346 if (start
>= res_end
)
349 end
= find_next_zero_bit(&dirty
->bitmap
[0], res_end
, start
+ 1);
352 bitmap_clear(&dirty
->bitmap
[0], start
, num
);
353 vmw_resource_dirty_update(res
, start
, end
);
356 if (res_start
<= dirty
->start
&& res_end
> dirty
->start
)
357 dirty
->start
= res_end
;
358 if (res_start
< dirty
->end
&& res_end
>= dirty
->end
)
359 dirty
->end
= res_start
;
363 * vmw_bo_dirty_clear_res - Clear a resource's dirty region from
367 * This function will clear all dirty ranges affecting the resource from
368 * it's backup mob's dirty tracking.
370 void vmw_bo_dirty_clear_res(struct vmw_resource
*res
)
372 unsigned long res_start
= res
->backup_offset
;
373 unsigned long res_end
= res
->backup_offset
+ res
->backup_size
;
374 struct vmw_buffer_object
*vbo
= res
->backup
;
375 struct vmw_bo_dirty
*dirty
= vbo
->dirty
;
377 res_start
>>= PAGE_SHIFT
;
378 res_end
= DIV_ROUND_UP(res_end
, PAGE_SIZE
);
380 if (res_start
>= dirty
->end
|| res_end
<= dirty
->start
)
383 res_start
= max(res_start
, dirty
->start
);
384 res_end
= min(res_end
, dirty
->end
);
385 bitmap_clear(&dirty
->bitmap
[0], res_start
, res_end
- res_start
);
387 if (res_start
<= dirty
->start
&& res_end
> dirty
->start
)
388 dirty
->start
= res_end
;
389 if (res_start
< dirty
->end
&& res_end
>= dirty
->end
)
390 dirty
->end
= res_start
;
393 vm_fault_t
vmw_bo_vm_mkwrite(struct vm_fault
*vmf
)
395 struct vm_area_struct
*vma
= vmf
->vma
;
396 struct ttm_buffer_object
*bo
= (struct ttm_buffer_object
*)
397 vma
->vm_private_data
;
399 unsigned long page_offset
;
400 unsigned int save_flags
;
401 struct vmw_buffer_object
*vbo
=
402 container_of(bo
, typeof(*vbo
), base
);
405 * mkwrite() doesn't handle the VM_FAULT_RETRY return value correctly.
406 * So make sure the TTM helpers are aware.
408 save_flags
= vmf
->flags
;
409 vmf
->flags
&= ~FAULT_FLAG_ALLOW_RETRY
;
410 ret
= ttm_bo_vm_reserve(bo
, vmf
);
411 vmf
->flags
= save_flags
;
415 page_offset
= vmf
->pgoff
- drm_vma_node_start(&bo
->base
.vma_node
);
416 if (unlikely(page_offset
>= bo
->num_pages
)) {
417 ret
= VM_FAULT_SIGBUS
;
421 if (vbo
->dirty
&& vbo
->dirty
->method
== VMW_BO_DIRTY_MKWRITE
&&
422 !test_bit(page_offset
, &vbo
->dirty
->bitmap
[0])) {
423 struct vmw_bo_dirty
*dirty
= vbo
->dirty
;
425 __set_bit(page_offset
, &dirty
->bitmap
[0]);
426 dirty
->start
= min(dirty
->start
, page_offset
);
427 dirty
->end
= max(dirty
->end
, page_offset
+ 1);
431 dma_resv_unlock(bo
->base
.resv
);
435 vm_fault_t
vmw_bo_vm_fault(struct vm_fault
*vmf
)
437 struct vm_area_struct
*vma
= vmf
->vma
;
438 struct ttm_buffer_object
*bo
= (struct ttm_buffer_object
*)
439 vma
->vm_private_data
;
440 struct vmw_buffer_object
*vbo
=
441 container_of(bo
, struct vmw_buffer_object
, base
);
442 pgoff_t num_prefault
;
446 ret
= ttm_bo_vm_reserve(bo
, vmf
);
450 num_prefault
= (vma
->vm_flags
& VM_RAND_READ
) ? 1 :
451 TTM_BO_VM_NUM_PREFAULT
;
454 pgoff_t allowed_prefault
;
455 unsigned long page_offset
;
457 page_offset
= vmf
->pgoff
-
458 drm_vma_node_start(&bo
->base
.vma_node
);
459 if (page_offset
>= bo
->num_pages
||
460 vmw_resources_clean(vbo
, page_offset
,
461 page_offset
+ PAGE_SIZE
,
462 &allowed_prefault
)) {
463 ret
= VM_FAULT_SIGBUS
;
467 num_prefault
= min(num_prefault
, allowed_prefault
);
471 * If we don't track dirty using the MKWRITE method, make sure
472 * sure the page protection is write-enabled so we don't get
473 * a lot of unnecessary write faults.
475 if (vbo
->dirty
&& vbo
->dirty
->method
== VMW_BO_DIRTY_MKWRITE
)
476 prot
= vm_get_page_prot(vma
->vm_flags
& ~VM_SHARED
);
478 prot
= vm_get_page_prot(vma
->vm_flags
);
480 ret
= ttm_bo_vm_fault_reserved(vmf
, prot
, num_prefault
, 1);
481 if (ret
== VM_FAULT_RETRY
&& !(vmf
->flags
& FAULT_FLAG_RETRY_NOWAIT
))
485 dma_resv_unlock(bo
->base
.resv
);
490 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
491 vm_fault_t
vmw_bo_vm_huge_fault(struct vm_fault
*vmf
,
492 enum page_entry_size pe_size
)
494 struct vm_area_struct
*vma
= vmf
->vma
;
495 struct ttm_buffer_object
*bo
= (struct ttm_buffer_object
*)
496 vma
->vm_private_data
;
497 struct vmw_buffer_object
*vbo
=
498 container_of(bo
, struct vmw_buffer_object
, base
);
501 pgoff_t fault_page_size
;
502 bool write
= vmf
->flags
& FAULT_FLAG_WRITE
;
503 bool is_cow_mapping
=
504 (vma
->vm_flags
& (VM_SHARED
| VM_MAYWRITE
)) == VM_MAYWRITE
;
508 fault_page_size
= HPAGE_PMD_SIZE
>> PAGE_SHIFT
;
510 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
512 fault_page_size
= HPAGE_PUD_SIZE
>> PAGE_SHIFT
;
517 return VM_FAULT_FALLBACK
;
520 /* Always do write dirty-tracking and COW on PTE level. */
521 if (write
&& (READ_ONCE(vbo
->dirty
) || is_cow_mapping
))
522 return VM_FAULT_FALLBACK
;
524 ret
= ttm_bo_vm_reserve(bo
, vmf
);
529 pgoff_t allowed_prefault
;
530 unsigned long page_offset
;
532 page_offset
= vmf
->pgoff
-
533 drm_vma_node_start(&bo
->base
.vma_node
);
534 if (page_offset
>= bo
->num_pages
||
535 vmw_resources_clean(vbo
, page_offset
,
536 page_offset
+ PAGE_SIZE
,
537 &allowed_prefault
)) {
538 ret
= VM_FAULT_SIGBUS
;
543 * Write protect, so we get a new fault on write, and can
546 prot
= vm_get_page_prot(vma
->vm_flags
& ~VM_SHARED
);
548 prot
= vm_get_page_prot(vma
->vm_flags
);
551 ret
= ttm_bo_vm_fault_reserved(vmf
, prot
, 1, fault_page_size
);
552 if (ret
== VM_FAULT_RETRY
&& !(vmf
->flags
& FAULT_FLAG_RETRY_NOWAIT
))
556 dma_resv_unlock(bo
->base
.resv
);