2 * SPDX-License-Identifier: MIT
4 * Copyright © 2014-2016 Intel Corporation
7 #include <linux/pagevec.h>
8 #include <linux/swap.h>
10 #include "gem/i915_gem_region.h"
12 #include "i915_gemfs.h"
13 #include "i915_gem_object.h"
14 #include "i915_scatterlist.h"
15 #include "i915_trace.h"
18 * Move pages to appropriate lru and release the pagevec, decrementing the
19 * ref count of those pages.
21 static void check_release_pagevec(struct pagevec
*pvec
)
23 check_move_unevictable_pages(pvec
);
24 __pagevec_release(pvec
);
28 static int shmem_get_pages(struct drm_i915_gem_object
*obj
)
30 struct drm_i915_private
*i915
= to_i915(obj
->base
.dev
);
31 struct intel_memory_region
*mem
= obj
->mm
.region
;
32 const unsigned long page_count
= obj
->base
.size
/ PAGE_SIZE
;
34 struct address_space
*mapping
;
36 struct scatterlist
*sg
;
37 struct sgt_iter sgt_iter
;
39 unsigned long last_pfn
= 0; /* suppress gcc warning */
40 unsigned int max_segment
= i915_sg_segment_size();
41 unsigned int sg_page_sizes
;
47 * Assert that the object is not currently in any GPU domain. As it
48 * wasn't in the GTT, there shouldn't be any way it could have been in
51 GEM_BUG_ON(obj
->read_domains
& I915_GEM_GPU_DOMAINS
);
52 GEM_BUG_ON(obj
->write_domain
& I915_GEM_GPU_DOMAINS
);
55 * If there's no chance of allocating enough pages for the whole
58 if (obj
->base
.size
> resource_size(&mem
->region
))
61 st
= kmalloc(sizeof(*st
), GFP_KERNEL
);
66 if (sg_alloc_table(st
, page_count
, GFP_KERNEL
)) {
72 * Get the list of pages out of our struct file. They'll be pinned
73 * at this point until we release them.
75 * Fail silently without starting the shrinker
77 mapping
= obj
->base
.filp
->f_mapping
;
78 mapping_set_unevictable(mapping
);
79 noreclaim
= mapping_gfp_constraint(mapping
, ~__GFP_RECLAIM
);
80 noreclaim
|= __GFP_NORETRY
| __GFP_NOWARN
;
85 for (i
= 0; i
< page_count
; i
++) {
86 const unsigned int shrink
[] = {
87 I915_SHRINK_BOUND
| I915_SHRINK_UNBOUND
,
90 gfp_t gfp
= noreclaim
;
94 page
= shmem_read_mapping_page_gfp(mapping
, i
, gfp
);
103 i915_gem_shrink(i915
, 2 * page_count
, NULL
, *s
++);
106 * We've tried hard to allocate the memory by reaping
107 * our own buffer, now let the real VM do its job and
108 * go down in flames if truly OOM.
110 * However, since graphics tend to be disposable,
111 * defer the oom here by reporting the ENOMEM back
115 /* reclaim and warn, but no oom */
116 gfp
= mapping_gfp_mask(mapping
);
119 * Our bo are always dirty and so we require
120 * kswapd to reclaim our pages (direct reclaim
121 * does not effectively begin pageout of our
122 * buffers on its own). However, direct reclaim
123 * only waits for kswapd when under allocation
124 * congestion. So as a result __GFP_RECLAIM is
125 * unreliable and fails to actually reclaim our
126 * dirty pages -- unless you try over and over
127 * again with !__GFP_NORETRY. However, we still
128 * want to fail this allocation rather than
129 * trigger the out-of-memory killer and for
130 * this we want __GFP_RETRY_MAYFAIL.
132 gfp
|= __GFP_RETRY_MAYFAIL
;
137 sg
->length
>= max_segment
||
138 page_to_pfn(page
) != last_pfn
+ 1) {
140 sg_page_sizes
|= sg
->length
;
144 sg_set_page(sg
, page
, PAGE_SIZE
, 0);
146 sg
->length
+= PAGE_SIZE
;
148 last_pfn
= page_to_pfn(page
);
150 /* Check that the i965g/gm workaround works. */
151 WARN_ON((gfp
& __GFP_DMA32
) && (last_pfn
>= 0x00100000UL
));
153 if (sg
) { /* loop terminated early; short sg table */
154 sg_page_sizes
|= sg
->length
;
158 /* Trim unused sg entries to avoid wasting memory. */
161 ret
= i915_gem_gtt_prepare_pages(obj
, st
);
164 * DMA remapping failed? One possible cause is that
165 * it could not reserve enough large entries, asking
166 * for PAGE_SIZE chunks instead may be helpful.
168 if (max_segment
> PAGE_SIZE
) {
169 for_each_sgt_page(page
, sgt_iter
, st
)
173 max_segment
= PAGE_SIZE
;
176 dev_warn(&i915
->drm
.pdev
->dev
,
177 "Failed to DMA remap %lu pages\n",
183 if (i915_gem_object_needs_bit17_swizzle(obj
))
184 i915_gem_object_do_bit_17_swizzle(obj
, st
);
186 __i915_gem_object_set_pages(obj
, st
, sg_page_sizes
);
193 mapping_clear_unevictable(mapping
);
195 for_each_sgt_page(page
, sgt_iter
, st
) {
196 if (!pagevec_add(&pvec
, page
))
197 check_release_pagevec(&pvec
);
199 if (pagevec_count(&pvec
))
200 check_release_pagevec(&pvec
);
205 * shmemfs first checks if there is enough memory to allocate the page
206 * and reports ENOSPC should there be insufficient, along with the usual
207 * ENOMEM for a genuine allocation failure.
209 * We use ENOSPC in our driver to mean that we have run out of aperture
210 * space and so want to translate the error from shmemfs back to our
211 * usual understanding of ENOMEM.
220 shmem_truncate(struct drm_i915_gem_object
*obj
)
223 * Our goal here is to return as much of the memory as
224 * is possible back to the system as we are called from OOM.
225 * To do this we must instruct the shmfs to drop all of its
226 * backing pages, *now*.
228 shmem_truncate_range(file_inode(obj
->base
.filp
), 0, (loff_t
)-1);
229 obj
->mm
.madv
= __I915_MADV_PURGED
;
230 obj
->mm
.pages
= ERR_PTR(-EFAULT
);
234 shmem_writeback(struct drm_i915_gem_object
*obj
)
236 struct address_space
*mapping
;
237 struct writeback_control wbc
= {
238 .sync_mode
= WB_SYNC_NONE
,
239 .nr_to_write
= SWAP_CLUSTER_MAX
,
241 .range_end
= LLONG_MAX
,
247 * Leave mmapings intact (GTT will have been revoked on unbinding,
248 * leaving only CPU mmapings around) and add those pages to the LRU
249 * instead of invoking writeback so they are aged and paged out
252 mapping
= obj
->base
.filp
->f_mapping
;
254 /* Begin writeback on each dirty page */
255 for (i
= 0; i
< obj
->base
.size
>> PAGE_SHIFT
; i
++) {
258 page
= find_lock_entry(mapping
, i
);
259 if (!page
|| xa_is_value(page
))
262 if (!page_mapped(page
) && clear_page_dirty_for_io(page
)) {
265 SetPageReclaim(page
);
266 ret
= mapping
->a_ops
->writepage(page
, &wbc
);
267 if (!PageWriteback(page
))
268 ClearPageReclaim(page
);
279 __i915_gem_object_release_shmem(struct drm_i915_gem_object
*obj
,
280 struct sg_table
*pages
,
283 GEM_BUG_ON(obj
->mm
.madv
== __I915_MADV_PURGED
);
285 if (obj
->mm
.madv
== I915_MADV_DONTNEED
)
286 obj
->mm
.dirty
= false;
289 (obj
->read_domains
& I915_GEM_DOMAIN_CPU
) == 0 &&
290 !(obj
->cache_coherent
& I915_BO_CACHE_COHERENT_FOR_READ
))
291 drm_clflush_sg(pages
);
293 __start_cpu_write(obj
);
297 shmem_put_pages(struct drm_i915_gem_object
*obj
, struct sg_table
*pages
)
299 struct sgt_iter sgt_iter
;
303 __i915_gem_object_release_shmem(obj
, pages
, true);
305 i915_gem_gtt_finish_pages(obj
, pages
);
307 if (i915_gem_object_needs_bit17_swizzle(obj
))
308 i915_gem_object_save_bit_17_swizzle(obj
, pages
);
310 mapping_clear_unevictable(file_inode(obj
->base
.filp
)->i_mapping
);
313 for_each_sgt_page(page
, sgt_iter
, pages
) {
315 set_page_dirty(page
);
317 if (obj
->mm
.madv
== I915_MADV_WILLNEED
)
318 mark_page_accessed(page
);
320 if (!pagevec_add(&pvec
, page
))
321 check_release_pagevec(&pvec
);
323 if (pagevec_count(&pvec
))
324 check_release_pagevec(&pvec
);
325 obj
->mm
.dirty
= false;
327 sg_free_table(pages
);
332 shmem_pwrite(struct drm_i915_gem_object
*obj
,
333 const struct drm_i915_gem_pwrite
*arg
)
335 struct address_space
*mapping
= obj
->base
.filp
->f_mapping
;
336 char __user
*user_data
= u64_to_user_ptr(arg
->data_ptr
);
340 /* Caller already validated user args */
341 GEM_BUG_ON(!access_ok(user_data
, arg
->size
));
344 * Before we instantiate/pin the backing store for our use, we
345 * can prepopulate the shmemfs filp efficiently using a write into
346 * the pagecache. We avoid the penalty of instantiating all the
347 * pages, important if the user is just writing to a few and never
348 * uses the object on the GPU, and using a direct write into shmemfs
349 * allows it to avoid the cost of retrieving a page (either swapin
350 * or clearing-before-use) before it is overwritten.
352 if (i915_gem_object_has_pages(obj
))
355 if (obj
->mm
.madv
!= I915_MADV_WILLNEED
)
359 * Before the pages are instantiated the object is treated as being
360 * in the CPU domain. The pages will be clflushed as required before
361 * use, and we can freely write into the pages directly. If userspace
362 * races pwrite with any other operation; corruption will ensue -
363 * that is userspace's prerogative!
367 offset
= arg
->offset
;
368 pg
= offset_in_page(offset
);
371 unsigned int len
, unwritten
;
377 len
= PAGE_SIZE
- pg
;
381 /* Prefault the user page to reduce potential recursion */
382 err
= __get_user(c
, user_data
);
386 err
= __get_user(c
, user_data
+ len
- 1);
390 err
= pagecache_write_begin(obj
->base
.filp
, mapping
,
396 vaddr
= kmap_atomic(page
);
397 unwritten
= __copy_from_user_inatomic(vaddr
+ pg
,
400 kunmap_atomic(vaddr
);
402 err
= pagecache_write_end(obj
->base
.filp
, mapping
,
403 offset
, len
, len
- unwritten
,
408 /* We don't handle -EFAULT, leave it to the caller to check */
421 static void shmem_release(struct drm_i915_gem_object
*obj
)
423 i915_gem_object_release_memory_region(obj
);
425 fput(obj
->base
.filp
);
428 const struct drm_i915_gem_object_ops i915_gem_shmem_ops
= {
429 .flags
= I915_GEM_OBJECT_HAS_STRUCT_PAGE
|
430 I915_GEM_OBJECT_IS_SHRINKABLE
,
432 .get_pages
= shmem_get_pages
,
433 .put_pages
= shmem_put_pages
,
434 .truncate
= shmem_truncate
,
435 .writeback
= shmem_writeback
,
437 .pwrite
= shmem_pwrite
,
439 .release
= shmem_release
,
442 static int __create_shmem(struct drm_i915_private
*i915
,
443 struct drm_gem_object
*obj
,
444 resource_size_t size
)
446 unsigned long flags
= VM_NORESERVE
;
449 drm_gem_private_object_init(&i915
->drm
, obj
, size
);
452 filp
= shmem_file_setup_with_mnt(i915
->mm
.gemfs
, "i915", size
,
455 filp
= shmem_file_setup("i915", size
, flags
);
457 return PTR_ERR(filp
);
463 static struct drm_i915_gem_object
*
464 create_shmem(struct intel_memory_region
*mem
,
465 resource_size_t size
,
468 static struct lock_class_key lock_class
;
469 struct drm_i915_private
*i915
= mem
->i915
;
470 struct drm_i915_gem_object
*obj
;
471 struct address_space
*mapping
;
472 unsigned int cache_level
;
476 obj
= i915_gem_object_alloc();
478 return ERR_PTR(-ENOMEM
);
480 ret
= __create_shmem(i915
, &obj
->base
, size
);
484 mask
= GFP_HIGHUSER
| __GFP_RECLAIMABLE
;
485 if (IS_I965GM(i915
) || IS_I965G(i915
)) {
486 /* 965gm cannot relocate objects above 4GiB. */
487 mask
&= ~__GFP_HIGHMEM
;
491 mapping
= obj
->base
.filp
->f_mapping
;
492 mapping_set_gfp_mask(mapping
, mask
);
493 GEM_BUG_ON(!(mapping_gfp_mask(mapping
) & __GFP_RECLAIM
));
495 i915_gem_object_init(obj
, &i915_gem_shmem_ops
, &lock_class
);
497 obj
->write_domain
= I915_GEM_DOMAIN_CPU
;
498 obj
->read_domains
= I915_GEM_DOMAIN_CPU
;
501 /* On some devices, we can have the GPU use the LLC (the CPU
502 * cache) for about a 10% performance improvement
503 * compared to uncached. Graphics requests other than
504 * display scanout are coherent with the CPU in
505 * accessing this cache. This means in this mode we
506 * don't need to clflush on the CPU side, and on the
507 * GPU side we only need to flush internal caches to
508 * get data visible to the CPU.
510 * However, we maintain the display planes as UC, and so
511 * need to rebind when first used as such.
513 cache_level
= I915_CACHE_LLC
;
515 cache_level
= I915_CACHE_NONE
;
517 i915_gem_object_set_cache_coherency(obj
, cache_level
);
519 i915_gem_object_init_memory_region(obj
, mem
, 0);
524 i915_gem_object_free(obj
);
528 struct drm_i915_gem_object
*
529 i915_gem_object_create_shmem(struct drm_i915_private
*i915
,
530 resource_size_t size
)
532 return i915_gem_object_create_region(i915
->mm
.regions
[INTEL_REGION_SMEM
],
536 /* Allocate a new GEM object and fill it with the supplied data */
537 struct drm_i915_gem_object
*
538 i915_gem_object_create_shmem_from_data(struct drm_i915_private
*dev_priv
,
539 const void *data
, resource_size_t size
)
541 struct drm_i915_gem_object
*obj
;
543 resource_size_t offset
;
546 obj
= i915_gem_object_create_shmem(dev_priv
, round_up(size
, PAGE_SIZE
));
550 GEM_BUG_ON(obj
->write_domain
!= I915_GEM_DOMAIN_CPU
);
552 file
= obj
->base
.filp
;
555 unsigned int len
= min_t(typeof(size
), size
, PAGE_SIZE
);
557 void *pgdata
, *vaddr
;
559 err
= pagecache_write_begin(file
, file
->f_mapping
,
566 memcpy(vaddr
, data
, len
);
569 err
= pagecache_write_end(file
, file
->f_mapping
,
583 i915_gem_object_put(obj
);
587 static int init_shmem(struct intel_memory_region
*mem
)
591 err
= i915_gemfs_init(mem
->i915
);
593 DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n",
597 intel_memory_region_set_name(mem
, "system");
599 return 0; /* Don't error, we can simply fallback to the kernel mnt */
602 static void release_shmem(struct intel_memory_region
*mem
)
604 i915_gemfs_fini(mem
->i915
);
607 static const struct intel_memory_region_ops shmem_region_ops
= {
609 .release
= release_shmem
,
610 .create_object
= create_shmem
,
613 struct intel_memory_region
*i915_gem_shmem_setup(struct drm_i915_private
*i915
)
615 return intel_memory_region_create(i915
, 0,
616 totalram_pages() << PAGE_SHIFT
,