2 * Copyright © 2008-2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Eric Anholt <eric@anholt.net>
29 #include <drm/drm_vma_manager.h>
30 #include <drm/i915_drm.h>
32 #include "i915_vgpu.h"
33 #include "i915_trace.h"
34 #include "intel_drv.h"
35 #include <linux/shmem_fs.h>
36 #include <linux/slab.h>
37 #include <linux/swap.h>
38 #include <linux/pci.h>
39 #include <linux/dma-buf.h>
41 #define RQ_BUG_ON(expr)
43 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object
*obj
);
44 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object
*obj
);
46 i915_gem_object_retire__write(struct drm_i915_gem_object
*obj
);
48 i915_gem_object_retire__read(struct drm_i915_gem_object
*obj
, int ring
);
49 static void i915_gem_write_fence(struct drm_device
*dev
, int reg
,
50 struct drm_i915_gem_object
*obj
);
51 static void i915_gem_object_update_fence(struct drm_i915_gem_object
*obj
,
52 struct drm_i915_fence_reg
*fence
,
55 static bool cpu_cache_is_coherent(struct drm_device
*dev
,
56 enum i915_cache_level level
)
58 return HAS_LLC(dev
) || level
!= I915_CACHE_NONE
;
61 static bool cpu_write_needs_clflush(struct drm_i915_gem_object
*obj
)
63 if (!cpu_cache_is_coherent(obj
->base
.dev
, obj
->cache_level
))
66 return obj
->pin_display
;
69 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object
*obj
)
72 i915_gem_release_mmap(obj
);
74 /* As we do not have an associated fence register, we will force
75 * a tiling change if we ever need to acquire one.
77 obj
->fence_dirty
= false;
78 obj
->fence_reg
= I915_FENCE_REG_NONE
;
81 /* some bookkeeping */
82 static void i915_gem_info_add_obj(struct drm_i915_private
*dev_priv
,
85 spin_lock(&dev_priv
->mm
.object_stat_lock
);
86 dev_priv
->mm
.object_count
++;
87 dev_priv
->mm
.object_memory
+= size
;
88 spin_unlock(&dev_priv
->mm
.object_stat_lock
);
91 static void i915_gem_info_remove_obj(struct drm_i915_private
*dev_priv
,
94 spin_lock(&dev_priv
->mm
.object_stat_lock
);
95 dev_priv
->mm
.object_count
--;
96 dev_priv
->mm
.object_memory
-= size
;
97 spin_unlock(&dev_priv
->mm
.object_stat_lock
);
101 i915_gem_wait_for_error(struct i915_gpu_error
*error
)
105 #define EXIT_COND (!i915_reset_in_progress(error) || \
106 i915_terminally_wedged(error))
111 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
112 * userspace. If it takes that long something really bad is going on and
113 * we should simply try to bail out and fail as gracefully as possible.
115 ret
= wait_event_interruptible_timeout(error
->reset_queue
,
119 DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
121 } else if (ret
< 0) {
129 int i915_mutex_lock_interruptible(struct drm_device
*dev
)
131 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
134 ret
= i915_gem_wait_for_error(&dev_priv
->gpu_error
);
138 ret
= mutex_lock_interruptible(&dev
->struct_mutex
);
142 WARN_ON(i915_verify_lists(dev
));
147 i915_gem_get_aperture_ioctl(struct drm_device
*dev
, void *data
,
148 struct drm_file
*file
)
150 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
151 struct drm_i915_gem_get_aperture
*args
= data
;
152 struct drm_i915_gem_object
*obj
;
156 mutex_lock(&dev
->struct_mutex
);
157 list_for_each_entry(obj
, &dev_priv
->mm
.bound_list
, global_list
)
158 if (i915_gem_obj_is_pinned(obj
))
159 pinned
+= i915_gem_obj_ggtt_size(obj
);
160 mutex_unlock(&dev
->struct_mutex
);
162 args
->aper_size
= dev_priv
->gtt
.base
.total
;
163 args
->aper_available_size
= args
->aper_size
- pinned
;
169 i915_gem_object_get_pages_phys(struct drm_i915_gem_object
*obj
)
171 struct address_space
*mapping
= file_inode(obj
->base
.filp
)->i_mapping
;
172 char *vaddr
= obj
->phys_handle
->vaddr
;
174 struct scatterlist
*sg
;
177 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj
)))
180 for (i
= 0; i
< obj
->base
.size
/ PAGE_SIZE
; i
++) {
184 page
= shmem_read_mapping_page(mapping
, i
);
186 return PTR_ERR(page
);
188 src
= kmap_atomic(page
);
189 memcpy(vaddr
, src
, PAGE_SIZE
);
190 drm_clflush_virt_range(vaddr
, PAGE_SIZE
);
193 page_cache_release(page
);
197 i915_gem_chipset_flush(obj
->base
.dev
);
199 st
= kmalloc(sizeof(*st
), GFP_KERNEL
);
203 if (sg_alloc_table(st
, 1, GFP_KERNEL
)) {
210 sg
->length
= obj
->base
.size
;
212 sg_dma_address(sg
) = obj
->phys_handle
->busaddr
;
213 sg_dma_len(sg
) = obj
->base
.size
;
220 i915_gem_object_put_pages_phys(struct drm_i915_gem_object
*obj
)
224 BUG_ON(obj
->madv
== __I915_MADV_PURGED
);
226 ret
= i915_gem_object_set_to_cpu_domain(obj
, true);
228 /* In the event of a disaster, abandon all caches and
231 WARN_ON(ret
!= -EIO
);
232 obj
->base
.read_domains
= obj
->base
.write_domain
= I915_GEM_DOMAIN_CPU
;
235 if (obj
->madv
== I915_MADV_DONTNEED
)
239 struct address_space
*mapping
= file_inode(obj
->base
.filp
)->i_mapping
;
240 char *vaddr
= obj
->phys_handle
->vaddr
;
243 for (i
= 0; i
< obj
->base
.size
/ PAGE_SIZE
; i
++) {
247 page
= shmem_read_mapping_page(mapping
, i
);
251 dst
= kmap_atomic(page
);
252 drm_clflush_virt_range(vaddr
, PAGE_SIZE
);
253 memcpy(dst
, vaddr
, PAGE_SIZE
);
256 set_page_dirty(page
);
257 if (obj
->madv
== I915_MADV_WILLNEED
)
258 mark_page_accessed(page
);
259 page_cache_release(page
);
265 sg_free_table(obj
->pages
);
270 i915_gem_object_release_phys(struct drm_i915_gem_object
*obj
)
272 drm_pci_free(obj
->base
.dev
, obj
->phys_handle
);
275 static const struct drm_i915_gem_object_ops i915_gem_phys_ops
= {
276 .get_pages
= i915_gem_object_get_pages_phys
,
277 .put_pages
= i915_gem_object_put_pages_phys
,
278 .release
= i915_gem_object_release_phys
,
282 drop_pages(struct drm_i915_gem_object
*obj
)
284 struct i915_vma
*vma
, *next
;
287 drm_gem_object_reference(&obj
->base
);
288 list_for_each_entry_safe(vma
, next
, &obj
->vma_list
, vma_link
)
289 if (i915_vma_unbind(vma
))
292 ret
= i915_gem_object_put_pages(obj
);
293 drm_gem_object_unreference(&obj
->base
);
299 i915_gem_object_attach_phys(struct drm_i915_gem_object
*obj
,
302 drm_dma_handle_t
*phys
;
305 if (obj
->phys_handle
) {
306 if ((unsigned long)obj
->phys_handle
->vaddr
& (align
-1))
312 if (obj
->madv
!= I915_MADV_WILLNEED
)
315 if (obj
->base
.filp
== NULL
)
318 ret
= drop_pages(obj
);
322 /* create a new object */
323 phys
= drm_pci_alloc(obj
->base
.dev
, obj
->base
.size
, align
);
327 obj
->phys_handle
= phys
;
328 obj
->ops
= &i915_gem_phys_ops
;
330 return i915_gem_object_get_pages(obj
);
334 i915_gem_phys_pwrite(struct drm_i915_gem_object
*obj
,
335 struct drm_i915_gem_pwrite
*args
,
336 struct drm_file
*file_priv
)
338 struct drm_device
*dev
= obj
->base
.dev
;
339 void *vaddr
= obj
->phys_handle
->vaddr
+ args
->offset
;
340 char __user
*user_data
= to_user_ptr(args
->data_ptr
);
343 /* We manually control the domain here and pretend that it
344 * remains coherent i.e. in the GTT domain, like shmem_pwrite.
346 ret
= i915_gem_object_wait_rendering(obj
, false);
350 intel_fb_obj_invalidate(obj
, NULL
, ORIGIN_CPU
);
351 if (__copy_from_user_inatomic_nocache(vaddr
, user_data
, args
->size
)) {
352 unsigned long unwritten
;
354 /* The physical object once assigned is fixed for the lifetime
355 * of the obj, so we can safely drop the lock and continue
358 mutex_unlock(&dev
->struct_mutex
);
359 unwritten
= copy_from_user(vaddr
, user_data
, args
->size
);
360 mutex_lock(&dev
->struct_mutex
);
367 drm_clflush_virt_range(vaddr
, args
->size
);
368 i915_gem_chipset_flush(dev
);
371 intel_fb_obj_flush(obj
, false);
375 void *i915_gem_object_alloc(struct drm_device
*dev
)
377 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
378 return kmem_cache_zalloc(dev_priv
->objects
, GFP_KERNEL
);
381 void i915_gem_object_free(struct drm_i915_gem_object
*obj
)
383 struct drm_i915_private
*dev_priv
= obj
->base
.dev
->dev_private
;
384 kmem_cache_free(dev_priv
->objects
, obj
);
388 i915_gem_create(struct drm_file
*file
,
389 struct drm_device
*dev
,
393 struct drm_i915_gem_object
*obj
;
397 size
= roundup(size
, PAGE_SIZE
);
401 /* Allocate the new object */
402 obj
= i915_gem_alloc_object(dev
, size
);
406 ret
= drm_gem_handle_create(file
, &obj
->base
, &handle
);
407 /* drop reference from allocate - handle holds it now */
408 drm_gem_object_unreference_unlocked(&obj
->base
);
417 i915_gem_dumb_create(struct drm_file
*file
,
418 struct drm_device
*dev
,
419 struct drm_mode_create_dumb
*args
)
421 /* have to work out size/pitch and return them */
422 args
->pitch
= ALIGN(args
->width
* DIV_ROUND_UP(args
->bpp
, 8), 64);
423 args
->size
= args
->pitch
* args
->height
;
424 return i915_gem_create(file
, dev
,
425 args
->size
, &args
->handle
);
429 * Creates a new mm object and returns a handle to it.
432 i915_gem_create_ioctl(struct drm_device
*dev
, void *data
,
433 struct drm_file
*file
)
435 struct drm_i915_gem_create
*args
= data
;
437 return i915_gem_create(file
, dev
,
438 args
->size
, &args
->handle
);
442 __copy_to_user_swizzled(char __user
*cpu_vaddr
,
443 const char *gpu_vaddr
, int gpu_offset
,
446 int ret
, cpu_offset
= 0;
449 int cacheline_end
= ALIGN(gpu_offset
+ 1, 64);
450 int this_length
= min(cacheline_end
- gpu_offset
, length
);
451 int swizzled_gpu_offset
= gpu_offset
^ 64;
453 ret
= __copy_to_user(cpu_vaddr
+ cpu_offset
,
454 gpu_vaddr
+ swizzled_gpu_offset
,
459 cpu_offset
+= this_length
;
460 gpu_offset
+= this_length
;
461 length
-= this_length
;
468 __copy_from_user_swizzled(char *gpu_vaddr
, int gpu_offset
,
469 const char __user
*cpu_vaddr
,
472 int ret
, cpu_offset
= 0;
475 int cacheline_end
= ALIGN(gpu_offset
+ 1, 64);
476 int this_length
= min(cacheline_end
- gpu_offset
, length
);
477 int swizzled_gpu_offset
= gpu_offset
^ 64;
479 ret
= __copy_from_user(gpu_vaddr
+ swizzled_gpu_offset
,
480 cpu_vaddr
+ cpu_offset
,
485 cpu_offset
+= this_length
;
486 gpu_offset
+= this_length
;
487 length
-= this_length
;
494 * Pins the specified object's pages and synchronizes the object with
495 * GPU accesses. Sets needs_clflush to non-zero if the caller should
496 * flush the object from the CPU cache.
498 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object
*obj
,
508 if (!(obj
->base
.read_domains
& I915_GEM_DOMAIN_CPU
)) {
509 /* If we're not in the cpu read domain, set ourself into the gtt
510 * read domain and manually flush cachelines (if required). This
511 * optimizes for the case when the gpu will dirty the data
512 * anyway again before the next pread happens. */
513 *needs_clflush
= !cpu_cache_is_coherent(obj
->base
.dev
,
515 ret
= i915_gem_object_wait_rendering(obj
, true);
520 ret
= i915_gem_object_get_pages(obj
);
524 i915_gem_object_pin_pages(obj
);
529 /* Per-page copy function for the shmem pread fastpath.
530 * Flushes invalid cachelines before reading the target if
531 * needs_clflush is set. */
533 shmem_pread_fast(struct page
*page
, int shmem_page_offset
, int page_length
,
534 char __user
*user_data
,
535 bool page_do_bit17_swizzling
, bool needs_clflush
)
540 if (unlikely(page_do_bit17_swizzling
))
543 vaddr
= kmap_atomic(page
);
545 drm_clflush_virt_range(vaddr
+ shmem_page_offset
,
547 ret
= __copy_to_user_inatomic(user_data
,
548 vaddr
+ shmem_page_offset
,
550 kunmap_atomic(vaddr
);
552 return ret
? -EFAULT
: 0;
556 shmem_clflush_swizzled_range(char *addr
, unsigned long length
,
559 if (unlikely(swizzled
)) {
560 unsigned long start
= (unsigned long) addr
;
561 unsigned long end
= (unsigned long) addr
+ length
;
563 /* For swizzling simply ensure that we always flush both
564 * channels. Lame, but simple and it works. Swizzled
565 * pwrite/pread is far from a hotpath - current userspace
566 * doesn't use it at all. */
567 start
= round_down(start
, 128);
568 end
= round_up(end
, 128);
570 drm_clflush_virt_range((void *)start
, end
- start
);
572 drm_clflush_virt_range(addr
, length
);
577 /* Only difference to the fast-path function is that this can handle bit17
578 * and uses non-atomic copy and kmap functions. */
580 shmem_pread_slow(struct page
*page
, int shmem_page_offset
, int page_length
,
581 char __user
*user_data
,
582 bool page_do_bit17_swizzling
, bool needs_clflush
)
589 shmem_clflush_swizzled_range(vaddr
+ shmem_page_offset
,
591 page_do_bit17_swizzling
);
593 if (page_do_bit17_swizzling
)
594 ret
= __copy_to_user_swizzled(user_data
,
595 vaddr
, shmem_page_offset
,
598 ret
= __copy_to_user(user_data
,
599 vaddr
+ shmem_page_offset
,
603 return ret
? - EFAULT
: 0;
607 i915_gem_shmem_pread(struct drm_device
*dev
,
608 struct drm_i915_gem_object
*obj
,
609 struct drm_i915_gem_pread
*args
,
610 struct drm_file
*file
)
612 char __user
*user_data
;
615 int shmem_page_offset
, page_length
, ret
= 0;
616 int obj_do_bit17_swizzling
, page_do_bit17_swizzling
;
618 int needs_clflush
= 0;
619 struct sg_page_iter sg_iter
;
621 user_data
= to_user_ptr(args
->data_ptr
);
624 obj_do_bit17_swizzling
= i915_gem_object_needs_bit17_swizzle(obj
);
626 ret
= i915_gem_obj_prepare_shmem_read(obj
, &needs_clflush
);
630 offset
= args
->offset
;
632 for_each_sg_page(obj
->pages
->sgl
, &sg_iter
, obj
->pages
->nents
,
633 offset
>> PAGE_SHIFT
) {
634 struct page
*page
= sg_page_iter_page(&sg_iter
);
639 /* Operation in this page
641 * shmem_page_offset = offset within page in shmem file
642 * page_length = bytes to copy for this page
644 shmem_page_offset
= offset_in_page(offset
);
645 page_length
= remain
;
646 if ((shmem_page_offset
+ page_length
) > PAGE_SIZE
)
647 page_length
= PAGE_SIZE
- shmem_page_offset
;
649 page_do_bit17_swizzling
= obj_do_bit17_swizzling
&&
650 (page_to_phys(page
) & (1 << 17)) != 0;
652 ret
= shmem_pread_fast(page
, shmem_page_offset
, page_length
,
653 user_data
, page_do_bit17_swizzling
,
658 mutex_unlock(&dev
->struct_mutex
);
660 if (likely(!i915
.prefault_disable
) && !prefaulted
) {
661 ret
= fault_in_multipages_writeable(user_data
, remain
);
662 /* Userspace is tricking us, but we've already clobbered
663 * its pages with the prefault and promised to write the
664 * data up to the first fault. Hence ignore any errors
665 * and just continue. */
670 ret
= shmem_pread_slow(page
, shmem_page_offset
, page_length
,
671 user_data
, page_do_bit17_swizzling
,
674 mutex_lock(&dev
->struct_mutex
);
680 remain
-= page_length
;
681 user_data
+= page_length
;
682 offset
+= page_length
;
686 i915_gem_object_unpin_pages(obj
);
692 * Reads data from the object referenced by handle.
694 * On error, the contents of *data are undefined.
697 i915_gem_pread_ioctl(struct drm_device
*dev
, void *data
,
698 struct drm_file
*file
)
700 struct drm_i915_gem_pread
*args
= data
;
701 struct drm_i915_gem_object
*obj
;
707 if (!access_ok(VERIFY_WRITE
,
708 to_user_ptr(args
->data_ptr
),
712 ret
= i915_mutex_lock_interruptible(dev
);
716 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file
, args
->handle
));
717 if (&obj
->base
== NULL
) {
722 /* Bounds check source. */
723 if (args
->offset
> obj
->base
.size
||
724 args
->size
> obj
->base
.size
- args
->offset
) {
729 /* prime objects have no backing filp to GEM pread/pwrite
732 if (!obj
->base
.filp
) {
737 trace_i915_gem_object_pread(obj
, args
->offset
, args
->size
);
739 ret
= i915_gem_shmem_pread(dev
, obj
, args
, file
);
742 drm_gem_object_unreference(&obj
->base
);
744 mutex_unlock(&dev
->struct_mutex
);
748 /* This is the fast write path which cannot handle
749 * page faults in the source data
753 fast_user_write(struct io_mapping
*mapping
,
754 loff_t page_base
, int page_offset
,
755 char __user
*user_data
,
758 void __iomem
*vaddr_atomic
;
760 unsigned long unwritten
;
762 vaddr_atomic
= io_mapping_map_atomic_wc(mapping
, page_base
);
763 /* We can use the cpu mem copy function because this is X86. */
764 vaddr
= (void __force
*)vaddr_atomic
+ page_offset
;
765 unwritten
= __copy_from_user_inatomic_nocache(vaddr
,
767 io_mapping_unmap_atomic(vaddr_atomic
);
772 * This is the fast pwrite path, where we copy the data directly from the
773 * user into the GTT, uncached.
776 i915_gem_gtt_pwrite_fast(struct drm_device
*dev
,
777 struct drm_i915_gem_object
*obj
,
778 struct drm_i915_gem_pwrite
*args
,
779 struct drm_file
*file
)
781 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
783 loff_t offset
, page_base
;
784 char __user
*user_data
;
785 int page_offset
, page_length
, ret
;
787 ret
= i915_gem_obj_ggtt_pin(obj
, 0, PIN_MAPPABLE
| PIN_NONBLOCK
);
791 ret
= i915_gem_object_set_to_gtt_domain(obj
, true);
795 ret
= i915_gem_object_put_fence(obj
);
799 user_data
= to_user_ptr(args
->data_ptr
);
802 offset
= i915_gem_obj_ggtt_offset(obj
) + args
->offset
;
804 intel_fb_obj_invalidate(obj
, NULL
, ORIGIN_GTT
);
807 /* Operation in this page
809 * page_base = page offset within aperture
810 * page_offset = offset within page
811 * page_length = bytes to copy for this page
813 page_base
= offset
& PAGE_MASK
;
814 page_offset
= offset_in_page(offset
);
815 page_length
= remain
;
816 if ((page_offset
+ remain
) > PAGE_SIZE
)
817 page_length
= PAGE_SIZE
- page_offset
;
819 /* If we get a fault while copying data, then (presumably) our
820 * source page isn't available. Return the error and we'll
821 * retry in the slow path.
823 if (fast_user_write(dev_priv
->gtt
.mappable
, page_base
,
824 page_offset
, user_data
, page_length
)) {
829 remain
-= page_length
;
830 user_data
+= page_length
;
831 offset
+= page_length
;
835 intel_fb_obj_flush(obj
, false);
837 i915_gem_object_ggtt_unpin(obj
);
842 /* Per-page copy function for the shmem pwrite fastpath.
843 * Flushes invalid cachelines before writing to the target if
844 * needs_clflush_before is set and flushes out any written cachelines after
845 * writing if needs_clflush is set. */
847 shmem_pwrite_fast(struct page
*page
, int shmem_page_offset
, int page_length
,
848 char __user
*user_data
,
849 bool page_do_bit17_swizzling
,
850 bool needs_clflush_before
,
851 bool needs_clflush_after
)
856 if (unlikely(page_do_bit17_swizzling
))
859 vaddr
= kmap_atomic(page
);
860 if (needs_clflush_before
)
861 drm_clflush_virt_range(vaddr
+ shmem_page_offset
,
863 ret
= __copy_from_user_inatomic(vaddr
+ shmem_page_offset
,
864 user_data
, page_length
);
865 if (needs_clflush_after
)
866 drm_clflush_virt_range(vaddr
+ shmem_page_offset
,
868 kunmap_atomic(vaddr
);
870 return ret
? -EFAULT
: 0;
873 /* Only difference to the fast-path function is that this can handle bit17
874 * and uses non-atomic copy and kmap functions. */
876 shmem_pwrite_slow(struct page
*page
, int shmem_page_offset
, int page_length
,
877 char __user
*user_data
,
878 bool page_do_bit17_swizzling
,
879 bool needs_clflush_before
,
880 bool needs_clflush_after
)
886 if (unlikely(needs_clflush_before
|| page_do_bit17_swizzling
))
887 shmem_clflush_swizzled_range(vaddr
+ shmem_page_offset
,
889 page_do_bit17_swizzling
);
890 if (page_do_bit17_swizzling
)
891 ret
= __copy_from_user_swizzled(vaddr
, shmem_page_offset
,
895 ret
= __copy_from_user(vaddr
+ shmem_page_offset
,
898 if (needs_clflush_after
)
899 shmem_clflush_swizzled_range(vaddr
+ shmem_page_offset
,
901 page_do_bit17_swizzling
);
904 return ret
? -EFAULT
: 0;
908 i915_gem_shmem_pwrite(struct drm_device
*dev
,
909 struct drm_i915_gem_object
*obj
,
910 struct drm_i915_gem_pwrite
*args
,
911 struct drm_file
*file
)
915 char __user
*user_data
;
916 int shmem_page_offset
, page_length
, ret
= 0;
917 int obj_do_bit17_swizzling
, page_do_bit17_swizzling
;
918 int hit_slowpath
= 0;
919 int needs_clflush_after
= 0;
920 int needs_clflush_before
= 0;
921 struct sg_page_iter sg_iter
;
923 user_data
= to_user_ptr(args
->data_ptr
);
926 obj_do_bit17_swizzling
= i915_gem_object_needs_bit17_swizzle(obj
);
928 if (obj
->base
.write_domain
!= I915_GEM_DOMAIN_CPU
) {
929 /* If we're not in the cpu write domain, set ourself into the gtt
930 * write domain and manually flush cachelines (if required). This
931 * optimizes for the case when the gpu will use the data
932 * right away and we therefore have to clflush anyway. */
933 needs_clflush_after
= cpu_write_needs_clflush(obj
);
934 ret
= i915_gem_object_wait_rendering(obj
, false);
938 /* Same trick applies to invalidate partially written cachelines read
940 if ((obj
->base
.read_domains
& I915_GEM_DOMAIN_CPU
) == 0)
941 needs_clflush_before
=
942 !cpu_cache_is_coherent(dev
, obj
->cache_level
);
944 ret
= i915_gem_object_get_pages(obj
);
948 intel_fb_obj_invalidate(obj
, NULL
, ORIGIN_CPU
);
950 i915_gem_object_pin_pages(obj
);
952 offset
= args
->offset
;
955 for_each_sg_page(obj
->pages
->sgl
, &sg_iter
, obj
->pages
->nents
,
956 offset
>> PAGE_SHIFT
) {
957 struct page
*page
= sg_page_iter_page(&sg_iter
);
958 int partial_cacheline_write
;
963 /* Operation in this page
965 * shmem_page_offset = offset within page in shmem file
966 * page_length = bytes to copy for this page
968 shmem_page_offset
= offset_in_page(offset
);
970 page_length
= remain
;
971 if ((shmem_page_offset
+ page_length
) > PAGE_SIZE
)
972 page_length
= PAGE_SIZE
- shmem_page_offset
;
974 /* If we don't overwrite a cacheline completely we need to be
975 * careful to have up-to-date data by first clflushing. Don't
976 * overcomplicate things and flush the entire patch. */
977 partial_cacheline_write
= needs_clflush_before
&&
978 ((shmem_page_offset
| page_length
)
979 & (boot_cpu_data
.x86_clflush_size
- 1));
981 page_do_bit17_swizzling
= obj_do_bit17_swizzling
&&
982 (page_to_phys(page
) & (1 << 17)) != 0;
984 ret
= shmem_pwrite_fast(page
, shmem_page_offset
, page_length
,
985 user_data
, page_do_bit17_swizzling
,
986 partial_cacheline_write
,
987 needs_clflush_after
);
992 mutex_unlock(&dev
->struct_mutex
);
993 ret
= shmem_pwrite_slow(page
, shmem_page_offset
, page_length
,
994 user_data
, page_do_bit17_swizzling
,
995 partial_cacheline_write
,
996 needs_clflush_after
);
998 mutex_lock(&dev
->struct_mutex
);
1004 remain
-= page_length
;
1005 user_data
+= page_length
;
1006 offset
+= page_length
;
1010 i915_gem_object_unpin_pages(obj
);
1014 * Fixup: Flush cpu caches in case we didn't flush the dirty
1015 * cachelines in-line while writing and the object moved
1016 * out of the cpu write domain while we've dropped the lock.
1018 if (!needs_clflush_after
&&
1019 obj
->base
.write_domain
!= I915_GEM_DOMAIN_CPU
) {
1020 if (i915_gem_clflush_object(obj
, obj
->pin_display
))
1021 i915_gem_chipset_flush(dev
);
1025 if (needs_clflush_after
)
1026 i915_gem_chipset_flush(dev
);
1028 intel_fb_obj_flush(obj
, false);
1033 * Writes data to the object referenced by handle.
1035 * On error, the contents of the buffer that were to be modified are undefined.
1038 i915_gem_pwrite_ioctl(struct drm_device
*dev
, void *data
,
1039 struct drm_file
*file
)
1041 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
1042 struct drm_i915_gem_pwrite
*args
= data
;
1043 struct drm_i915_gem_object
*obj
;
1046 if (args
->size
== 0)
1049 if (!access_ok(VERIFY_READ
,
1050 to_user_ptr(args
->data_ptr
),
1054 if (likely(!i915
.prefault_disable
)) {
1055 ret
= fault_in_multipages_readable(to_user_ptr(args
->data_ptr
),
1061 intel_runtime_pm_get(dev_priv
);
1063 ret
= i915_mutex_lock_interruptible(dev
);
1067 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file
, args
->handle
));
1068 if (&obj
->base
== NULL
) {
1073 /* Bounds check destination. */
1074 if (args
->offset
> obj
->base
.size
||
1075 args
->size
> obj
->base
.size
- args
->offset
) {
1080 /* prime objects have no backing filp to GEM pread/pwrite
1083 if (!obj
->base
.filp
) {
1088 trace_i915_gem_object_pwrite(obj
, args
->offset
, args
->size
);
1091 /* We can only do the GTT pwrite on untiled buffers, as otherwise
1092 * it would end up going through the fenced access, and we'll get
1093 * different detiling behavior between reading and writing.
1094 * pread/pwrite currently are reading and writing from the CPU
1095 * perspective, requiring manual detiling by the client.
1097 if (obj
->tiling_mode
== I915_TILING_NONE
&&
1098 obj
->base
.write_domain
!= I915_GEM_DOMAIN_CPU
&&
1099 cpu_write_needs_clflush(obj
)) {
1100 ret
= i915_gem_gtt_pwrite_fast(dev
, obj
, args
, file
);
1101 /* Note that the gtt paths might fail with non-page-backed user
1102 * pointers (e.g. gtt mappings when moving data between
1103 * textures). Fallback to the shmem path in that case. */
1106 if (ret
== -EFAULT
|| ret
== -ENOSPC
) {
1107 if (obj
->phys_handle
)
1108 ret
= i915_gem_phys_pwrite(obj
, args
, file
);
1110 ret
= i915_gem_shmem_pwrite(dev
, obj
, args
, file
);
1114 drm_gem_object_unreference(&obj
->base
);
1116 mutex_unlock(&dev
->struct_mutex
);
1118 intel_runtime_pm_put(dev_priv
);
1124 i915_gem_check_wedge(struct i915_gpu_error
*error
,
1127 if (i915_reset_in_progress(error
)) {
1128 /* Non-interruptible callers can't handle -EAGAIN, hence return
1129 * -EIO unconditionally for these. */
1133 /* Recovery complete, but the reset failed ... */
1134 if (i915_terminally_wedged(error
))
1138 * Check if GPU Reset is in progress - we need intel_ring_begin
1139 * to work properly to reinit the hw state while the gpu is
1140 * still marked as reset-in-progress. Handle this with a flag.
1142 if (!error
->reload_in_reset
)
1150 * Compare arbitrary request against outstanding lazy request. Emit on match.
1153 i915_gem_check_olr(struct drm_i915_gem_request
*req
)
1157 WARN_ON(!mutex_is_locked(&req
->ring
->dev
->struct_mutex
));
1160 if (req
== req
->ring
->outstanding_lazy_request
)
1161 ret
= i915_add_request(req
->ring
);
1166 static void fake_irq(unsigned long data
)
1168 wake_up_process((struct task_struct
*)data
);
1171 static bool missed_irq(struct drm_i915_private
*dev_priv
,
1172 struct intel_engine_cs
*ring
)
1174 return test_bit(ring
->id
, &dev_priv
->gpu_error
.missed_irq_rings
);
1177 static int __i915_spin_request(struct drm_i915_gem_request
*req
)
1179 unsigned long timeout
;
1181 if (i915_gem_request_get_ring(req
)->irq_refcount
)
1184 timeout
= jiffies
+ 1;
1185 while (!need_resched()) {
1186 if (i915_gem_request_completed(req
, true))
1189 if (time_after_eq(jiffies
, timeout
))
1192 cpu_relax_lowlatency();
1194 if (i915_gem_request_completed(req
, false))
1201 * __i915_wait_request - wait until execution of request has finished
1203 * @reset_counter: reset sequence associated with the given request
1204 * @interruptible: do an interruptible wait (normally yes)
1205 * @timeout: in - how long to wait (NULL forever); out - how much time remaining
1207 * Note: It is of utmost importance that the passed in seqno and reset_counter
1208 * values have been read by the caller in an smp safe manner. Where read-side
1209 * locks are involved, it is sufficient to read the reset_counter before
1210 * unlocking the lock that protects the seqno. For lockless tricks, the
1211 * reset_counter _must_ be read before, and an appropriate smp_rmb must be
1214 * Returns 0 if the request was found within the alloted time. Else returns the
1215 * errno with remaining time filled in timeout argument.
1217 int __i915_wait_request(struct drm_i915_gem_request
*req
,
1218 unsigned reset_counter
,
1221 struct intel_rps_client
*rps
)
1223 struct intel_engine_cs
*ring
= i915_gem_request_get_ring(req
);
1224 struct drm_device
*dev
= ring
->dev
;
1225 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
1226 const bool irq_test_in_progress
=
1227 ACCESS_ONCE(dev_priv
->gpu_error
.test_irq_rings
) & intel_ring_flag(ring
);
1229 unsigned long timeout_expire
;
1233 WARN(!intel_irqs_enabled(dev_priv
), "IRQs disabled");
1235 if (list_empty(&req
->list
))
1238 if (i915_gem_request_completed(req
, true))
1241 timeout_expire
= timeout
?
1242 jiffies
+ nsecs_to_jiffies_timeout((u64
)*timeout
) : 0;
1244 if (INTEL_INFO(dev_priv
)->gen
>= 6)
1245 gen6_rps_boost(dev_priv
, rps
, req
->emitted_jiffies
);
1247 /* Record current time in case interrupted by signal, or wedged */
1248 trace_i915_gem_request_wait_begin(req
);
1249 before
= ktime_get_raw_ns();
1251 /* Optimistic spin for the next jiffie before touching IRQs */
1252 ret
= __i915_spin_request(req
);
1256 if (!irq_test_in_progress
&& WARN_ON(!ring
->irq_get(ring
))) {
1262 struct timer_list timer
;
1264 prepare_to_wait(&ring
->irq_queue
, &wait
,
1265 interruptible
? TASK_INTERRUPTIBLE
: TASK_UNINTERRUPTIBLE
);
1267 /* We need to check whether any gpu reset happened in between
1268 * the caller grabbing the seqno and now ... */
1269 if (reset_counter
!= atomic_read(&dev_priv
->gpu_error
.reset_counter
)) {
1270 /* ... but upgrade the -EAGAIN to an -EIO if the gpu
1271 * is truely gone. */
1272 ret
= i915_gem_check_wedge(&dev_priv
->gpu_error
, interruptible
);
1278 if (i915_gem_request_completed(req
, false)) {
1283 if (interruptible
&& signal_pending(current
)) {
1288 if (timeout
&& time_after_eq(jiffies
, timeout_expire
)) {
1293 timer
.function
= NULL
;
1294 if (timeout
|| missed_irq(dev_priv
, ring
)) {
1295 unsigned long expire
;
1297 setup_timer_on_stack(&timer
, fake_irq
, (unsigned long)current
);
1298 expire
= missed_irq(dev_priv
, ring
) ? jiffies
+ 1 : timeout_expire
;
1299 mod_timer(&timer
, expire
);
1304 if (timer
.function
) {
1305 del_singleshot_timer_sync(&timer
);
1306 destroy_timer_on_stack(&timer
);
1309 if (!irq_test_in_progress
)
1310 ring
->irq_put(ring
);
1312 finish_wait(&ring
->irq_queue
, &wait
);
1315 now
= ktime_get_raw_ns();
1316 trace_i915_gem_request_wait_end(req
);
1319 s64 tres
= *timeout
- (now
- before
);
1321 *timeout
= tres
< 0 ? 0 : tres
;
1324 * Apparently ktime isn't accurate enough and occasionally has a
1325 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
1326 * things up to make the test happy. We allow up to 1 jiffy.
1328 * This is a regrssion from the timespec->ktime conversion.
1330 if (ret
== -ETIME
&& *timeout
< jiffies_to_usecs(1)*1000)
1338 i915_gem_request_remove_from_client(struct drm_i915_gem_request
*request
)
1340 struct drm_i915_file_private
*file_priv
= request
->file_priv
;
1345 spin_lock(&file_priv
->mm
.lock
);
1346 list_del(&request
->client_list
);
1347 request
->file_priv
= NULL
;
1348 spin_unlock(&file_priv
->mm
.lock
);
1351 static void i915_gem_request_retire(struct drm_i915_gem_request
*request
)
1353 trace_i915_gem_request_retire(request
);
1355 /* We know the GPU must have read the request to have
1356 * sent us the seqno + interrupt, so use the position
1357 * of tail of the request to update the last known position
1360 * Note this requires that we are always called in request
1363 request
->ringbuf
->last_retired_head
= request
->postfix
;
1365 list_del_init(&request
->list
);
1366 i915_gem_request_remove_from_client(request
);
1368 put_pid(request
->pid
);
1370 i915_gem_request_unreference(request
);
1374 __i915_gem_request_retire__upto(struct drm_i915_gem_request
*req
)
1376 struct intel_engine_cs
*engine
= req
->ring
;
1377 struct drm_i915_gem_request
*tmp
;
1379 lockdep_assert_held(&engine
->dev
->struct_mutex
);
1381 if (list_empty(&req
->list
))
1385 tmp
= list_first_entry(&engine
->request_list
,
1386 typeof(*tmp
), list
);
1388 i915_gem_request_retire(tmp
);
1389 } while (tmp
!= req
);
1391 WARN_ON(i915_verify_lists(engine
->dev
));
1395 * Waits for a request to be signaled, and cleans up the
1396 * request and object lists appropriately for that event.
1399 i915_wait_request(struct drm_i915_gem_request
*req
)
1401 struct drm_device
*dev
;
1402 struct drm_i915_private
*dev_priv
;
1406 BUG_ON(req
== NULL
);
1408 dev
= req
->ring
->dev
;
1409 dev_priv
= dev
->dev_private
;
1410 interruptible
= dev_priv
->mm
.interruptible
;
1412 BUG_ON(!mutex_is_locked(&dev
->struct_mutex
));
1414 ret
= i915_gem_check_wedge(&dev_priv
->gpu_error
, interruptible
);
1418 ret
= i915_gem_check_olr(req
);
1422 ret
= __i915_wait_request(req
,
1423 atomic_read(&dev_priv
->gpu_error
.reset_counter
),
1424 interruptible
, NULL
, NULL
);
1428 __i915_gem_request_retire__upto(req
);
1433 * Ensures that all rendering to the object has completed and the object is
1434 * safe to unbind from the GTT or access from the CPU.
1437 i915_gem_object_wait_rendering(struct drm_i915_gem_object
*obj
,
1446 if (obj
->last_write_req
!= NULL
) {
1447 ret
= i915_wait_request(obj
->last_write_req
);
1451 i
= obj
->last_write_req
->ring
->id
;
1452 if (obj
->last_read_req
[i
] == obj
->last_write_req
)
1453 i915_gem_object_retire__read(obj
, i
);
1455 i915_gem_object_retire__write(obj
);
1458 for (i
= 0; i
< I915_NUM_RINGS
; i
++) {
1459 if (obj
->last_read_req
[i
] == NULL
)
1462 ret
= i915_wait_request(obj
->last_read_req
[i
]);
1466 i915_gem_object_retire__read(obj
, i
);
1468 RQ_BUG_ON(obj
->active
);
1475 i915_gem_object_retire_request(struct drm_i915_gem_object
*obj
,
1476 struct drm_i915_gem_request
*req
)
1478 int ring
= req
->ring
->id
;
1480 if (obj
->last_read_req
[ring
] == req
)
1481 i915_gem_object_retire__read(obj
, ring
);
1482 else if (obj
->last_write_req
== req
)
1483 i915_gem_object_retire__write(obj
);
1485 __i915_gem_request_retire__upto(req
);
1488 /* A nonblocking variant of the above wait. This is a highly dangerous routine
1489 * as the object state may change during this call.
1491 static __must_check
int
1492 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object
*obj
,
1493 struct intel_rps_client
*rps
,
1496 struct drm_device
*dev
= obj
->base
.dev
;
1497 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
1498 struct drm_i915_gem_request
*requests
[I915_NUM_RINGS
];
1499 unsigned reset_counter
;
1502 BUG_ON(!mutex_is_locked(&dev
->struct_mutex
));
1503 BUG_ON(!dev_priv
->mm
.interruptible
);
1508 ret
= i915_gem_check_wedge(&dev_priv
->gpu_error
, true);
1512 reset_counter
= atomic_read(&dev_priv
->gpu_error
.reset_counter
);
1515 struct drm_i915_gem_request
*req
;
1517 req
= obj
->last_write_req
;
1521 ret
= i915_gem_check_olr(req
);
1525 requests
[n
++] = i915_gem_request_reference(req
);
1527 for (i
= 0; i
< I915_NUM_RINGS
; i
++) {
1528 struct drm_i915_gem_request
*req
;
1530 req
= obj
->last_read_req
[i
];
1534 ret
= i915_gem_check_olr(req
);
1538 requests
[n
++] = i915_gem_request_reference(req
);
1542 mutex_unlock(&dev
->struct_mutex
);
1543 for (i
= 0; ret
== 0 && i
< n
; i
++)
1544 ret
= __i915_wait_request(requests
[i
], reset_counter
, true,
1546 mutex_lock(&dev
->struct_mutex
);
1549 for (i
= 0; i
< n
; i
++) {
1551 i915_gem_object_retire_request(obj
, requests
[i
]);
1552 i915_gem_request_unreference(requests
[i
]);
1558 static struct intel_rps_client
*to_rps_client(struct drm_file
*file
)
1560 struct drm_i915_file_private
*fpriv
= file
->driver_priv
;
1565 * Called when user space prepares to use an object with the CPU, either
1566 * through the mmap ioctl's mapping or a GTT mapping.
1569 i915_gem_set_domain_ioctl(struct drm_device
*dev
, void *data
,
1570 struct drm_file
*file
)
1572 struct drm_i915_gem_set_domain
*args
= data
;
1573 struct drm_i915_gem_object
*obj
;
1574 uint32_t read_domains
= args
->read_domains
;
1575 uint32_t write_domain
= args
->write_domain
;
1578 /* Only handle setting domains to types used by the CPU. */
1579 if (write_domain
& I915_GEM_GPU_DOMAINS
)
1582 if (read_domains
& I915_GEM_GPU_DOMAINS
)
1585 /* Having something in the write domain implies it's in the read
1586 * domain, and only that read domain. Enforce that in the request.
1588 if (write_domain
!= 0 && read_domains
!= write_domain
)
1591 ret
= i915_mutex_lock_interruptible(dev
);
1595 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file
, args
->handle
));
1596 if (&obj
->base
== NULL
) {
1601 /* Try to flush the object off the GPU without holding the lock.
1602 * We will repeat the flush holding the lock in the normal manner
1603 * to catch cases where we are gazumped.
1605 ret
= i915_gem_object_wait_rendering__nonblocking(obj
,
1606 to_rps_client(file
),
1611 if (read_domains
& I915_GEM_DOMAIN_GTT
)
1612 ret
= i915_gem_object_set_to_gtt_domain(obj
, write_domain
!= 0);
1614 ret
= i915_gem_object_set_to_cpu_domain(obj
, write_domain
!= 0);
1617 drm_gem_object_unreference(&obj
->base
);
1619 mutex_unlock(&dev
->struct_mutex
);
1624 * Called when user space has done writes to this buffer
1627 i915_gem_sw_finish_ioctl(struct drm_device
*dev
, void *data
,
1628 struct drm_file
*file
)
1630 struct drm_i915_gem_sw_finish
*args
= data
;
1631 struct drm_i915_gem_object
*obj
;
1634 ret
= i915_mutex_lock_interruptible(dev
);
1638 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file
, args
->handle
));
1639 if (&obj
->base
== NULL
) {
1644 /* Pinned buffers may be scanout, so flush the cache */
1645 if (obj
->pin_display
)
1646 i915_gem_object_flush_cpu_write_domain(obj
);
1648 drm_gem_object_unreference(&obj
->base
);
1650 mutex_unlock(&dev
->struct_mutex
);
1655 * Maps the contents of an object, returning the address it is mapped
1658 * While the mapping holds a reference on the contents of the object, it doesn't
1659 * imply a ref on the object itself.
1663 * DRM driver writers who look a this function as an example for how to do GEM
1664 * mmap support, please don't implement mmap support like here. The modern way
1665 * to implement DRM mmap support is with an mmap offset ioctl (like
1666 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1667 * That way debug tooling like valgrind will understand what's going on, hiding
1668 * the mmap call in a driver private ioctl will break that. The i915 driver only
1669 * does cpu mmaps this way because we didn't know better.
1672 i915_gem_mmap_ioctl(struct drm_device
*dev
, void *data
,
1673 struct drm_file
*file
)
1675 struct drm_i915_gem_mmap
*args
= data
;
1676 struct drm_gem_object
*obj
;
1679 if (args
->flags
& ~(I915_MMAP_WC
))
1682 if (args
->flags
& I915_MMAP_WC
&& !cpu_has_pat
)
1685 obj
= drm_gem_object_lookup(dev
, file
, args
->handle
);
1689 /* prime objects have no backing filp to GEM mmap
1693 drm_gem_object_unreference_unlocked(obj
);
1697 addr
= vm_mmap(obj
->filp
, 0, args
->size
,
1698 PROT_READ
| PROT_WRITE
, MAP_SHARED
,
1700 if (args
->flags
& I915_MMAP_WC
) {
1701 struct mm_struct
*mm
= current
->mm
;
1702 struct vm_area_struct
*vma
;
1704 down_write(&mm
->mmap_sem
);
1705 vma
= find_vma(mm
, addr
);
1708 pgprot_writecombine(vm_get_page_prot(vma
->vm_flags
));
1711 up_write(&mm
->mmap_sem
);
1713 drm_gem_object_unreference_unlocked(obj
);
1714 if (IS_ERR((void *)addr
))
1717 args
->addr_ptr
= (uint64_t) addr
;
1723 * i915_gem_fault - fault a page into the GTT
1724 * vma: VMA in question
1727 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1728 * from userspace. The fault handler takes care of binding the object to
1729 * the GTT (if needed), allocating and programming a fence register (again,
1730 * only if needed based on whether the old reg is still valid or the object
1731 * is tiled) and inserting a new PTE into the faulting process.
1733 * Note that the faulting process may involve evicting existing objects
1734 * from the GTT and/or fence registers to make room. So performance may
1735 * suffer if the GTT working set is large or there are few fence registers
1738 int i915_gem_fault(struct vm_area_struct
*vma
, struct vm_fault
*vmf
)
1740 struct drm_i915_gem_object
*obj
= to_intel_bo(vma
->vm_private_data
);
1741 struct drm_device
*dev
= obj
->base
.dev
;
1742 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
1743 struct i915_ggtt_view view
= i915_ggtt_view_normal
;
1744 pgoff_t page_offset
;
1747 bool write
= !!(vmf
->flags
& FAULT_FLAG_WRITE
);
1749 intel_runtime_pm_get(dev_priv
);
1751 /* We don't use vmf->pgoff since that has the fake offset */
1752 page_offset
= ((unsigned long)vmf
->virtual_address
- vma
->vm_start
) >>
1755 ret
= i915_mutex_lock_interruptible(dev
);
1759 trace_i915_gem_object_fault(obj
, page_offset
, true, write
);
1761 /* Try to flush the object off the GPU first without holding the lock.
1762 * Upon reacquiring the lock, we will perform our sanity checks and then
1763 * repeat the flush holding the lock in the normal manner to catch cases
1764 * where we are gazumped.
1766 ret
= i915_gem_object_wait_rendering__nonblocking(obj
, NULL
, !write
);
1770 /* Access to snoopable pages through the GTT is incoherent. */
1771 if (obj
->cache_level
!= I915_CACHE_NONE
&& !HAS_LLC(dev
)) {
1776 /* Use a partial view if the object is bigger than the aperture. */
1777 if (obj
->base
.size
>= dev_priv
->gtt
.mappable_end
&&
1778 obj
->tiling_mode
== I915_TILING_NONE
) {
1779 static const unsigned int chunk_size
= 256; // 1 MiB
1781 memset(&view
, 0, sizeof(view
));
1782 view
.type
= I915_GGTT_VIEW_PARTIAL
;
1783 view
.params
.partial
.offset
= rounddown(page_offset
, chunk_size
);
1784 view
.params
.partial
.size
=
1787 (vma
->vm_end
- vma
->vm_start
)/PAGE_SIZE
-
1788 view
.params
.partial
.offset
);
1791 /* Now pin it into the GTT if needed */
1792 ret
= i915_gem_object_ggtt_pin(obj
, &view
, 0, PIN_MAPPABLE
);
1796 ret
= i915_gem_object_set_to_gtt_domain(obj
, write
);
1800 ret
= i915_gem_object_get_fence(obj
);
1804 /* Finally, remap it using the new GTT offset */
1805 pfn
= dev_priv
->gtt
.mappable_base
+
1806 i915_gem_obj_ggtt_offset_view(obj
, &view
);
1809 if (unlikely(view
.type
== I915_GGTT_VIEW_PARTIAL
)) {
1810 /* Overriding existing pages in partial view does not cause
1811 * us any trouble as TLBs are still valid because the fault
1812 * is due to userspace losing part of the mapping or never
1813 * having accessed it before (at this partials' range).
1815 unsigned long base
= vma
->vm_start
+
1816 (view
.params
.partial
.offset
<< PAGE_SHIFT
);
1819 for (i
= 0; i
< view
.params
.partial
.size
; i
++) {
1820 ret
= vm_insert_pfn(vma
, base
+ i
* PAGE_SIZE
, pfn
+ i
);
1825 obj
->fault_mappable
= true;
1827 if (!obj
->fault_mappable
) {
1828 unsigned long size
= min_t(unsigned long,
1829 vma
->vm_end
- vma
->vm_start
,
1833 for (i
= 0; i
< size
>> PAGE_SHIFT
; i
++) {
1834 ret
= vm_insert_pfn(vma
,
1835 (unsigned long)vma
->vm_start
+ i
* PAGE_SIZE
,
1841 obj
->fault_mappable
= true;
1843 ret
= vm_insert_pfn(vma
,
1844 (unsigned long)vmf
->virtual_address
,
1848 i915_gem_object_ggtt_unpin_view(obj
, &view
);
1850 mutex_unlock(&dev
->struct_mutex
);
1855 * We eat errors when the gpu is terminally wedged to avoid
1856 * userspace unduly crashing (gl has no provisions for mmaps to
1857 * fail). But any other -EIO isn't ours (e.g. swap in failure)
1858 * and so needs to be reported.
1860 if (!i915_terminally_wedged(&dev_priv
->gpu_error
)) {
1861 ret
= VM_FAULT_SIGBUS
;
1866 * EAGAIN means the gpu is hung and we'll wait for the error
1867 * handler to reset everything when re-faulting in
1868 * i915_mutex_lock_interruptible.
1875 * EBUSY is ok: this just means that another thread
1876 * already did the job.
1878 ret
= VM_FAULT_NOPAGE
;
1885 ret
= VM_FAULT_SIGBUS
;
1888 WARN_ONCE(ret
, "unhandled error in i915_gem_fault: %i\n", ret
);
1889 ret
= VM_FAULT_SIGBUS
;
1893 intel_runtime_pm_put(dev_priv
);
1898 * i915_gem_release_mmap - remove physical page mappings
1899 * @obj: obj in question
1901 * Preserve the reservation of the mmapping with the DRM core code, but
1902 * relinquish ownership of the pages back to the system.
1904 * It is vital that we remove the page mapping if we have mapped a tiled
1905 * object through the GTT and then lose the fence register due to
1906 * resource pressure. Similarly if the object has been moved out of the
1907 * aperture, than pages mapped into userspace must be revoked. Removing the
1908 * mapping will then trigger a page fault on the next user access, allowing
1909 * fixup by i915_gem_fault().
1912 i915_gem_release_mmap(struct drm_i915_gem_object
*obj
)
1914 if (!obj
->fault_mappable
)
1917 drm_vma_node_unmap(&obj
->base
.vma_node
,
1918 obj
->base
.dev
->anon_inode
->i_mapping
);
1919 obj
->fault_mappable
= false;
1923 i915_gem_release_all_mmaps(struct drm_i915_private
*dev_priv
)
1925 struct drm_i915_gem_object
*obj
;
1927 list_for_each_entry(obj
, &dev_priv
->mm
.bound_list
, global_list
)
1928 i915_gem_release_mmap(obj
);
1932 i915_gem_get_gtt_size(struct drm_device
*dev
, uint32_t size
, int tiling_mode
)
1936 if (INTEL_INFO(dev
)->gen
>= 4 ||
1937 tiling_mode
== I915_TILING_NONE
)
1940 /* Previous chips need a power-of-two fence region when tiling */
1941 if (INTEL_INFO(dev
)->gen
== 3)
1942 gtt_size
= 1024*1024;
1944 gtt_size
= 512*1024;
1946 while (gtt_size
< size
)
1953 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1954 * @obj: object to check
1956 * Return the required GTT alignment for an object, taking into account
1957 * potential fence register mapping.
1960 i915_gem_get_gtt_alignment(struct drm_device
*dev
, uint32_t size
,
1961 int tiling_mode
, bool fenced
)
1964 * Minimum alignment is 4k (GTT page size), but might be greater
1965 * if a fence register is needed for the object.
1967 if (INTEL_INFO(dev
)->gen
>= 4 || (!fenced
&& IS_G33(dev
)) ||
1968 tiling_mode
== I915_TILING_NONE
)
1972 * Previous chips need to be aligned to the size of the smallest
1973 * fence register that can contain the object.
1975 return i915_gem_get_gtt_size(dev
, size
, tiling_mode
);
1978 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object
*obj
)
1980 struct drm_i915_private
*dev_priv
= obj
->base
.dev
->dev_private
;
1983 if (drm_vma_node_has_offset(&obj
->base
.vma_node
))
1986 dev_priv
->mm
.shrinker_no_lock_stealing
= true;
1988 ret
= drm_gem_create_mmap_offset(&obj
->base
);
1992 /* Badly fragmented mmap space? The only way we can recover
1993 * space is by destroying unwanted objects. We can't randomly release
1994 * mmap_offsets as userspace expects them to be persistent for the
1995 * lifetime of the objects. The closest we can is to release the
1996 * offsets on purgeable objects by truncating it and marking it purged,
1997 * which prevents userspace from ever using that object again.
1999 i915_gem_shrink(dev_priv
,
2000 obj
->base
.size
>> PAGE_SHIFT
,
2002 I915_SHRINK_UNBOUND
|
2003 I915_SHRINK_PURGEABLE
);
2004 ret
= drm_gem_create_mmap_offset(&obj
->base
);
2008 i915_gem_shrink_all(dev_priv
);
2009 ret
= drm_gem_create_mmap_offset(&obj
->base
);
2011 dev_priv
->mm
.shrinker_no_lock_stealing
= false;
2016 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object
*obj
)
2018 drm_gem_free_mmap_offset(&obj
->base
);
2022 i915_gem_mmap_gtt(struct drm_file
*file
,
2023 struct drm_device
*dev
,
2027 struct drm_i915_gem_object
*obj
;
2030 ret
= i915_mutex_lock_interruptible(dev
);
2034 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file
, handle
));
2035 if (&obj
->base
== NULL
) {
2040 if (obj
->madv
!= I915_MADV_WILLNEED
) {
2041 DRM_DEBUG("Attempting to mmap a purgeable buffer\n");
2046 ret
= i915_gem_object_create_mmap_offset(obj
);
2050 *offset
= drm_vma_node_offset_addr(&obj
->base
.vma_node
);
2053 drm_gem_object_unreference(&obj
->base
);
2055 mutex_unlock(&dev
->struct_mutex
);
2060 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
2062 * @data: GTT mapping ioctl data
2063 * @file: GEM object info
2065 * Simply returns the fake offset to userspace so it can mmap it.
2066 * The mmap call will end up in drm_gem_mmap(), which will set things
2067 * up so we can get faults in the handler above.
2069 * The fault handler will take care of binding the object into the GTT
2070 * (since it may have been evicted to make room for something), allocating
2071 * a fence register, and mapping the appropriate aperture address into
2075 i915_gem_mmap_gtt_ioctl(struct drm_device
*dev
, void *data
,
2076 struct drm_file
*file
)
2078 struct drm_i915_gem_mmap_gtt
*args
= data
;
2080 return i915_gem_mmap_gtt(file
, dev
, args
->handle
, &args
->offset
);
2083 /* Immediately discard the backing storage */
2085 i915_gem_object_truncate(struct drm_i915_gem_object
*obj
)
2087 i915_gem_object_free_mmap_offset(obj
);
2089 if (obj
->base
.filp
== NULL
)
2092 /* Our goal here is to return as much of the memory as
2093 * is possible back to the system as we are called from OOM.
2094 * To do this we must instruct the shmfs to drop all of its
2095 * backing pages, *now*.
2097 shmem_truncate_range(file_inode(obj
->base
.filp
), 0, (loff_t
)-1);
2098 obj
->madv
= __I915_MADV_PURGED
;
2101 /* Try to discard unwanted pages */
2103 i915_gem_object_invalidate(struct drm_i915_gem_object
*obj
)
2105 struct address_space
*mapping
;
2107 switch (obj
->madv
) {
2108 case I915_MADV_DONTNEED
:
2109 i915_gem_object_truncate(obj
);
2110 case __I915_MADV_PURGED
:
2114 if (obj
->base
.filp
== NULL
)
2117 mapping
= file_inode(obj
->base
.filp
)->i_mapping
,
2118 invalidate_mapping_pages(mapping
, 0, (loff_t
)-1);
2122 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object
*obj
)
2124 struct sg_page_iter sg_iter
;
2127 BUG_ON(obj
->madv
== __I915_MADV_PURGED
);
2129 ret
= i915_gem_object_set_to_cpu_domain(obj
, true);
2131 /* In the event of a disaster, abandon all caches and
2132 * hope for the best.
2134 WARN_ON(ret
!= -EIO
);
2135 i915_gem_clflush_object(obj
, true);
2136 obj
->base
.read_domains
= obj
->base
.write_domain
= I915_GEM_DOMAIN_CPU
;
2139 i915_gem_gtt_finish_object(obj
);
2141 if (i915_gem_object_needs_bit17_swizzle(obj
))
2142 i915_gem_object_save_bit_17_swizzle(obj
);
2144 if (obj
->madv
== I915_MADV_DONTNEED
)
2147 for_each_sg_page(obj
->pages
->sgl
, &sg_iter
, obj
->pages
->nents
, 0) {
2148 struct page
*page
= sg_page_iter_page(&sg_iter
);
2151 set_page_dirty(page
);
2153 if (obj
->madv
== I915_MADV_WILLNEED
)
2154 mark_page_accessed(page
);
2156 page_cache_release(page
);
2160 sg_free_table(obj
->pages
);
2165 i915_gem_object_put_pages(struct drm_i915_gem_object
*obj
)
2167 const struct drm_i915_gem_object_ops
*ops
= obj
->ops
;
2169 if (obj
->pages
== NULL
)
2172 if (obj
->pages_pin_count
)
2175 BUG_ON(i915_gem_obj_bound_any(obj
));
2177 /* ->put_pages might need to allocate memory for the bit17 swizzle
2178 * array, hence protect them from being reaped by removing them from gtt
2180 list_del(&obj
->global_list
);
2182 ops
->put_pages(obj
);
2185 i915_gem_object_invalidate(obj
);
2191 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object
*obj
)
2193 struct drm_i915_private
*dev_priv
= obj
->base
.dev
->dev_private
;
2195 struct address_space
*mapping
;
2196 struct sg_table
*st
;
2197 struct scatterlist
*sg
;
2198 struct sg_page_iter sg_iter
;
2200 unsigned long last_pfn
= 0; /* suppress gcc warning */
2204 /* Assert that the object is not currently in any GPU domain. As it
2205 * wasn't in the GTT, there shouldn't be any way it could have been in
2208 BUG_ON(obj
->base
.read_domains
& I915_GEM_GPU_DOMAINS
);
2209 BUG_ON(obj
->base
.write_domain
& I915_GEM_GPU_DOMAINS
);
2211 st
= kmalloc(sizeof(*st
), GFP_KERNEL
);
2215 page_count
= obj
->base
.size
/ PAGE_SIZE
;
2216 if (sg_alloc_table(st
, page_count
, GFP_KERNEL
)) {
2221 /* Get the list of pages out of our struct file. They'll be pinned
2222 * at this point until we release them.
2224 * Fail silently without starting the shrinker
2226 mapping
= file_inode(obj
->base
.filp
)->i_mapping
;
2227 gfp
= mapping_gfp_mask(mapping
);
2228 gfp
|= __GFP_NORETRY
| __GFP_NOWARN
| __GFP_NO_KSWAPD
;
2229 gfp
&= ~(__GFP_IO
| __GFP_WAIT
);
2232 for (i
= 0; i
< page_count
; i
++) {
2233 page
= shmem_read_mapping_page_gfp(mapping
, i
, gfp
);
2235 i915_gem_shrink(dev_priv
,
2238 I915_SHRINK_UNBOUND
|
2239 I915_SHRINK_PURGEABLE
);
2240 page
= shmem_read_mapping_page_gfp(mapping
, i
, gfp
);
2243 /* We've tried hard to allocate the memory by reaping
2244 * our own buffer, now let the real VM do its job and
2245 * go down in flames if truly OOM.
2247 i915_gem_shrink_all(dev_priv
);
2248 page
= shmem_read_mapping_page(mapping
, i
);
2250 ret
= PTR_ERR(page
);
2254 #ifdef CONFIG_SWIOTLB
2255 if (swiotlb_nr_tbl()) {
2257 sg_set_page(sg
, page
, PAGE_SIZE
, 0);
2262 if (!i
|| page_to_pfn(page
) != last_pfn
+ 1) {
2266 sg_set_page(sg
, page
, PAGE_SIZE
, 0);
2268 sg
->length
+= PAGE_SIZE
;
2270 last_pfn
= page_to_pfn(page
);
2272 /* Check that the i965g/gm workaround works. */
2273 WARN_ON((gfp
& __GFP_DMA32
) && (last_pfn
>= 0x00100000UL
));
2275 #ifdef CONFIG_SWIOTLB
2276 if (!swiotlb_nr_tbl())
2281 ret
= i915_gem_gtt_prepare_object(obj
);
2285 if (i915_gem_object_needs_bit17_swizzle(obj
))
2286 i915_gem_object_do_bit_17_swizzle(obj
);
2288 if (obj
->tiling_mode
!= I915_TILING_NONE
&&
2289 dev_priv
->quirks
& QUIRK_PIN_SWIZZLED_PAGES
)
2290 i915_gem_object_pin_pages(obj
);
2296 for_each_sg_page(st
->sgl
, &sg_iter
, st
->nents
, 0)
2297 page_cache_release(sg_page_iter_page(&sg_iter
));
2301 /* shmemfs first checks if there is enough memory to allocate the page
2302 * and reports ENOSPC should there be insufficient, along with the usual
2303 * ENOMEM for a genuine allocation failure.
2305 * We use ENOSPC in our driver to mean that we have run out of aperture
2306 * space and so want to translate the error from shmemfs back to our
2307 * usual understanding of ENOMEM.
2315 /* Ensure that the associated pages are gathered from the backing storage
2316 * and pinned into our object. i915_gem_object_get_pages() may be called
2317 * multiple times before they are released by a single call to
2318 * i915_gem_object_put_pages() - once the pages are no longer referenced
2319 * either as a result of memory pressure (reaping pages under the shrinker)
2320 * or as the object is itself released.
2323 i915_gem_object_get_pages(struct drm_i915_gem_object
*obj
)
2325 struct drm_i915_private
*dev_priv
= obj
->base
.dev
->dev_private
;
2326 const struct drm_i915_gem_object_ops
*ops
= obj
->ops
;
2332 if (obj
->madv
!= I915_MADV_WILLNEED
) {
2333 DRM_DEBUG("Attempting to obtain a purgeable object\n");
2337 BUG_ON(obj
->pages_pin_count
);
2339 ret
= ops
->get_pages(obj
);
2343 list_add_tail(&obj
->global_list
, &dev_priv
->mm
.unbound_list
);
2345 obj
->get_page
.sg
= obj
->pages
->sgl
;
2346 obj
->get_page
.last
= 0;
2351 void i915_vma_move_to_active(struct i915_vma
*vma
,
2352 struct intel_engine_cs
*ring
)
2354 struct drm_i915_gem_object
*obj
= vma
->obj
;
2356 /* Add a reference if we're newly entering the active list. */
2357 if (obj
->active
== 0)
2358 drm_gem_object_reference(&obj
->base
);
2359 obj
->active
|= intel_ring_flag(ring
);
2361 list_move_tail(&obj
->ring_list
[ring
->id
], &ring
->active_list
);
2362 i915_gem_request_assign(&obj
->last_read_req
[ring
->id
],
2363 intel_ring_get_request(ring
));
2365 list_move_tail(&vma
->mm_list
, &vma
->vm
->active_list
);
2369 i915_gem_object_retire__write(struct drm_i915_gem_object
*obj
)
2371 RQ_BUG_ON(obj
->last_write_req
== NULL
);
2372 RQ_BUG_ON(!(obj
->active
& intel_ring_flag(obj
->last_write_req
->ring
)));
2374 i915_gem_request_assign(&obj
->last_write_req
, NULL
);
2375 intel_fb_obj_flush(obj
, true);
2379 i915_gem_object_retire__read(struct drm_i915_gem_object
*obj
, int ring
)
2381 struct i915_vma
*vma
;
2383 RQ_BUG_ON(obj
->last_read_req
[ring
] == NULL
);
2384 RQ_BUG_ON(!(obj
->active
& (1 << ring
)));
2386 list_del_init(&obj
->ring_list
[ring
]);
2387 i915_gem_request_assign(&obj
->last_read_req
[ring
], NULL
);
2389 if (obj
->last_write_req
&& obj
->last_write_req
->ring
->id
== ring
)
2390 i915_gem_object_retire__write(obj
);
2392 obj
->active
&= ~(1 << ring
);
2396 list_for_each_entry(vma
, &obj
->vma_list
, vma_link
) {
2397 if (!list_empty(&vma
->mm_list
))
2398 list_move_tail(&vma
->mm_list
, &vma
->vm
->inactive_list
);
2401 i915_gem_request_assign(&obj
->last_fenced_req
, NULL
);
2402 drm_gem_object_unreference(&obj
->base
);
2406 i915_gem_init_seqno(struct drm_device
*dev
, u32 seqno
)
2408 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
2409 struct intel_engine_cs
*ring
;
2412 /* Carefully retire all requests without writing to the rings */
2413 for_each_ring(ring
, dev_priv
, i
) {
2414 ret
= intel_ring_idle(ring
);
2418 i915_gem_retire_requests(dev
);
2420 /* Finally reset hw state */
2421 for_each_ring(ring
, dev_priv
, i
) {
2422 intel_ring_init_seqno(ring
, seqno
);
2424 for (j
= 0; j
< ARRAY_SIZE(ring
->semaphore
.sync_seqno
); j
++)
2425 ring
->semaphore
.sync_seqno
[j
] = 0;
2431 int i915_gem_set_seqno(struct drm_device
*dev
, u32 seqno
)
2433 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
2439 /* HWS page needs to be set less than what we
2440 * will inject to ring
2442 ret
= i915_gem_init_seqno(dev
, seqno
- 1);
2446 /* Carefully set the last_seqno value so that wrap
2447 * detection still works
2449 dev_priv
->next_seqno
= seqno
;
2450 dev_priv
->last_seqno
= seqno
- 1;
2451 if (dev_priv
->last_seqno
== 0)
2452 dev_priv
->last_seqno
--;
2458 i915_gem_get_seqno(struct drm_device
*dev
, u32
*seqno
)
2460 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
2462 /* reserve 0 for non-seqno */
2463 if (dev_priv
->next_seqno
== 0) {
2464 int ret
= i915_gem_init_seqno(dev
, 0);
2468 dev_priv
->next_seqno
= 1;
2471 *seqno
= dev_priv
->last_seqno
= dev_priv
->next_seqno
++;
2475 int __i915_add_request(struct intel_engine_cs
*ring
,
2476 struct drm_file
*file
,
2477 struct drm_i915_gem_object
*obj
)
2479 struct drm_i915_private
*dev_priv
= ring
->dev
->dev_private
;
2480 struct drm_i915_gem_request
*request
;
2481 struct intel_ringbuffer
*ringbuf
;
2485 request
= ring
->outstanding_lazy_request
;
2486 if (WARN_ON(request
== NULL
))
2489 if (i915
.enable_execlists
) {
2490 ringbuf
= request
->ctx
->engine
[ring
->id
].ringbuf
;
2492 ringbuf
= ring
->buffer
;
2494 request_start
= intel_ring_get_tail(ringbuf
);
2496 * Emit any outstanding flushes - execbuf can fail to emit the flush
2497 * after having emitted the batchbuffer command. Hence we need to fix
2498 * things up similar to emitting the lazy request. The difference here
2499 * is that the flush _must_ happen before the next request, no matter
2502 if (i915
.enable_execlists
) {
2503 ret
= logical_ring_flush_all_caches(ringbuf
, request
->ctx
);
2507 ret
= intel_ring_flush_all_caches(ring
);
2512 /* Record the position of the start of the request so that
2513 * should we detect the updated seqno part-way through the
2514 * GPU processing the request, we never over-estimate the
2515 * position of the head.
2517 request
->postfix
= intel_ring_get_tail(ringbuf
);
2519 if (i915
.enable_execlists
) {
2520 ret
= ring
->emit_request(ringbuf
, request
);
2524 ret
= ring
->add_request(ring
);
2528 request
->tail
= intel_ring_get_tail(ringbuf
);
2531 request
->head
= request_start
;
2533 /* Whilst this request exists, batch_obj will be on the
2534 * active_list, and so will hold the active reference. Only when this
2535 * request is retired will the the batch_obj be moved onto the
2536 * inactive_list and lose its active reference. Hence we do not need
2537 * to explicitly hold another reference here.
2539 request
->batch_obj
= obj
;
2541 if (!i915
.enable_execlists
) {
2542 /* Hold a reference to the current context so that we can inspect
2543 * it later in case a hangcheck error event fires.
2545 request
->ctx
= ring
->last_context
;
2547 i915_gem_context_reference(request
->ctx
);
2550 request
->emitted_jiffies
= jiffies
;
2551 ring
->last_submitted_seqno
= request
->seqno
;
2552 list_add_tail(&request
->list
, &ring
->request_list
);
2553 request
->file_priv
= NULL
;
2556 struct drm_i915_file_private
*file_priv
= file
->driver_priv
;
2558 spin_lock(&file_priv
->mm
.lock
);
2559 request
->file_priv
= file_priv
;
2560 list_add_tail(&request
->client_list
,
2561 &file_priv
->mm
.request_list
);
2562 spin_unlock(&file_priv
->mm
.lock
);
2564 request
->pid
= get_pid(task_pid(current
));
2567 trace_i915_gem_request_add(request
);
2568 ring
->outstanding_lazy_request
= NULL
;
2570 i915_queue_hangcheck(ring
->dev
);
2572 queue_delayed_work(dev_priv
->wq
,
2573 &dev_priv
->mm
.retire_work
,
2574 round_jiffies_up_relative(HZ
));
2575 intel_mark_busy(dev_priv
->dev
);
2580 static bool i915_context_is_banned(struct drm_i915_private
*dev_priv
,
2581 const struct intel_context
*ctx
)
2583 unsigned long elapsed
;
2585 elapsed
= get_seconds() - ctx
->hang_stats
.guilty_ts
;
2587 if (ctx
->hang_stats
.banned
)
2590 if (ctx
->hang_stats
.ban_period_seconds
&&
2591 elapsed
<= ctx
->hang_stats
.ban_period_seconds
) {
2592 if (!i915_gem_context_is_default(ctx
)) {
2593 DRM_DEBUG("context hanging too fast, banning!\n");
2595 } else if (i915_stop_ring_allow_ban(dev_priv
)) {
2596 if (i915_stop_ring_allow_warn(dev_priv
))
2597 DRM_ERROR("gpu hanging too fast, banning!\n");
2605 static void i915_set_reset_status(struct drm_i915_private
*dev_priv
,
2606 struct intel_context
*ctx
,
2609 struct i915_ctx_hang_stats
*hs
;
2614 hs
= &ctx
->hang_stats
;
2617 hs
->banned
= i915_context_is_banned(dev_priv
, ctx
);
2619 hs
->guilty_ts
= get_seconds();
2621 hs
->batch_pending
++;
2625 void i915_gem_request_free(struct kref
*req_ref
)
2627 struct drm_i915_gem_request
*req
= container_of(req_ref
,
2629 struct intel_context
*ctx
= req
->ctx
;
2632 if (i915
.enable_execlists
) {
2633 struct intel_engine_cs
*ring
= req
->ring
;
2635 if (ctx
!= ring
->default_context
)
2636 intel_lr_context_unpin(ring
, ctx
);
2639 i915_gem_context_unreference(ctx
);
2642 kmem_cache_free(req
->i915
->requests
, req
);
2645 int i915_gem_request_alloc(struct intel_engine_cs
*ring
,
2646 struct intel_context
*ctx
)
2648 struct drm_i915_private
*dev_priv
= to_i915(ring
->dev
);
2649 struct drm_i915_gem_request
*req
;
2652 if (ring
->outstanding_lazy_request
)
2655 req
= kmem_cache_zalloc(dev_priv
->requests
, GFP_KERNEL
);
2659 kref_init(&req
->ref
);
2660 req
->i915
= dev_priv
;
2662 ret
= i915_gem_get_seqno(ring
->dev
, &req
->seqno
);
2668 if (i915
.enable_execlists
)
2669 ret
= intel_logical_ring_alloc_request_extras(req
, ctx
);
2671 ret
= intel_ring_alloc_request_extras(req
);
2675 ring
->outstanding_lazy_request
= req
;
2679 kmem_cache_free(dev_priv
->requests
, req
);
2683 struct drm_i915_gem_request
*
2684 i915_gem_find_active_request(struct intel_engine_cs
*ring
)
2686 struct drm_i915_gem_request
*request
;
2688 list_for_each_entry(request
, &ring
->request_list
, list
) {
2689 if (i915_gem_request_completed(request
, false))
2698 static void i915_gem_reset_ring_status(struct drm_i915_private
*dev_priv
,
2699 struct intel_engine_cs
*ring
)
2701 struct drm_i915_gem_request
*request
;
2704 request
= i915_gem_find_active_request(ring
);
2706 if (request
== NULL
)
2709 ring_hung
= ring
->hangcheck
.score
>= HANGCHECK_SCORE_RING_HUNG
;
2711 i915_set_reset_status(dev_priv
, request
->ctx
, ring_hung
);
2713 list_for_each_entry_continue(request
, &ring
->request_list
, list
)
2714 i915_set_reset_status(dev_priv
, request
->ctx
, false);
2717 static void i915_gem_reset_ring_cleanup(struct drm_i915_private
*dev_priv
,
2718 struct intel_engine_cs
*ring
)
2720 while (!list_empty(&ring
->active_list
)) {
2721 struct drm_i915_gem_object
*obj
;
2723 obj
= list_first_entry(&ring
->active_list
,
2724 struct drm_i915_gem_object
,
2725 ring_list
[ring
->id
]);
2727 i915_gem_object_retire__read(obj
, ring
->id
);
2731 * Clear the execlists queue up before freeing the requests, as those
2732 * are the ones that keep the context and ringbuffer backing objects
2735 while (!list_empty(&ring
->execlist_queue
)) {
2736 struct drm_i915_gem_request
*submit_req
;
2738 submit_req
= list_first_entry(&ring
->execlist_queue
,
2739 struct drm_i915_gem_request
,
2741 list_del(&submit_req
->execlist_link
);
2743 if (submit_req
->ctx
!= ring
->default_context
)
2744 intel_lr_context_unpin(ring
, submit_req
->ctx
);
2746 i915_gem_request_unreference(submit_req
);
2750 * We must free the requests after all the corresponding objects have
2751 * been moved off active lists. Which is the same order as the normal
2752 * retire_requests function does. This is important if object hold
2753 * implicit references on things like e.g. ppgtt address spaces through
2756 while (!list_empty(&ring
->request_list
)) {
2757 struct drm_i915_gem_request
*request
;
2759 request
= list_first_entry(&ring
->request_list
,
2760 struct drm_i915_gem_request
,
2763 i915_gem_request_retire(request
);
2766 /* This may not have been flushed before the reset, so clean it now */
2767 i915_gem_request_assign(&ring
->outstanding_lazy_request
, NULL
);
2770 void i915_gem_restore_fences(struct drm_device
*dev
)
2772 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
2775 for (i
= 0; i
< dev_priv
->num_fence_regs
; i
++) {
2776 struct drm_i915_fence_reg
*reg
= &dev_priv
->fence_regs
[i
];
2779 * Commit delayed tiling changes if we have an object still
2780 * attached to the fence, otherwise just clear the fence.
2783 i915_gem_object_update_fence(reg
->obj
, reg
,
2784 reg
->obj
->tiling_mode
);
2786 i915_gem_write_fence(dev
, i
, NULL
);
2791 void i915_gem_reset(struct drm_device
*dev
)
2793 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
2794 struct intel_engine_cs
*ring
;
2798 * Before we free the objects from the requests, we need to inspect
2799 * them for finding the guilty party. As the requests only borrow
2800 * their reference to the objects, the inspection must be done first.
2802 for_each_ring(ring
, dev_priv
, i
)
2803 i915_gem_reset_ring_status(dev_priv
, ring
);
2805 for_each_ring(ring
, dev_priv
, i
)
2806 i915_gem_reset_ring_cleanup(dev_priv
, ring
);
2808 i915_gem_context_reset(dev
);
2810 i915_gem_restore_fences(dev
);
2812 WARN_ON(i915_verify_lists(dev
));
2816 * This function clears the request list as sequence numbers are passed.
2819 i915_gem_retire_requests_ring(struct intel_engine_cs
*ring
)
2821 WARN_ON(i915_verify_lists(ring
->dev
));
2823 /* Retire requests first as we use it above for the early return.
2824 * If we retire requests last, we may use a later seqno and so clear
2825 * the requests lists without clearing the active list, leading to
2828 while (!list_empty(&ring
->request_list
)) {
2829 struct drm_i915_gem_request
*request
;
2831 request
= list_first_entry(&ring
->request_list
,
2832 struct drm_i915_gem_request
,
2835 if (!i915_gem_request_completed(request
, true))
2838 i915_gem_request_retire(request
);
2841 /* Move any buffers on the active list that are no longer referenced
2842 * by the ringbuffer to the flushing/inactive lists as appropriate,
2843 * before we free the context associated with the requests.
2845 while (!list_empty(&ring
->active_list
)) {
2846 struct drm_i915_gem_object
*obj
;
2848 obj
= list_first_entry(&ring
->active_list
,
2849 struct drm_i915_gem_object
,
2850 ring_list
[ring
->id
]);
2852 if (!list_empty(&obj
->last_read_req
[ring
->id
]->list
))
2855 i915_gem_object_retire__read(obj
, ring
->id
);
2858 if (unlikely(ring
->trace_irq_req
&&
2859 i915_gem_request_completed(ring
->trace_irq_req
, true))) {
2860 ring
->irq_put(ring
);
2861 i915_gem_request_assign(&ring
->trace_irq_req
, NULL
);
2864 WARN_ON(i915_verify_lists(ring
->dev
));
2868 i915_gem_retire_requests(struct drm_device
*dev
)
2870 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
2871 struct intel_engine_cs
*ring
;
2875 for_each_ring(ring
, dev_priv
, i
) {
2876 i915_gem_retire_requests_ring(ring
);
2877 idle
&= list_empty(&ring
->request_list
);
2878 if (i915
.enable_execlists
) {
2879 unsigned long flags
;
2881 spin_lock_irqsave(&ring
->execlist_lock
, flags
);
2882 idle
&= list_empty(&ring
->execlist_queue
);
2883 spin_unlock_irqrestore(&ring
->execlist_lock
, flags
);
2885 intel_execlists_retire_requests(ring
);
2890 mod_delayed_work(dev_priv
->wq
,
2891 &dev_priv
->mm
.idle_work
,
2892 msecs_to_jiffies(100));
2898 i915_gem_retire_work_handler(struct work_struct
*work
)
2900 struct drm_i915_private
*dev_priv
=
2901 container_of(work
, typeof(*dev_priv
), mm
.retire_work
.work
);
2902 struct drm_device
*dev
= dev_priv
->dev
;
2905 /* Come back later if the device is busy... */
2907 if (mutex_trylock(&dev
->struct_mutex
)) {
2908 idle
= i915_gem_retire_requests(dev
);
2909 mutex_unlock(&dev
->struct_mutex
);
2912 queue_delayed_work(dev_priv
->wq
, &dev_priv
->mm
.retire_work
,
2913 round_jiffies_up_relative(HZ
));
2917 i915_gem_idle_work_handler(struct work_struct
*work
)
2919 struct drm_i915_private
*dev_priv
=
2920 container_of(work
, typeof(*dev_priv
), mm
.idle_work
.work
);
2921 struct drm_device
*dev
= dev_priv
->dev
;
2922 struct intel_engine_cs
*ring
;
2925 for_each_ring(ring
, dev_priv
, i
)
2926 if (!list_empty(&ring
->request_list
))
2929 intel_mark_idle(dev
);
2931 if (mutex_trylock(&dev
->struct_mutex
)) {
2932 struct intel_engine_cs
*ring
;
2935 for_each_ring(ring
, dev_priv
, i
)
2936 i915_gem_batch_pool_fini(&ring
->batch_pool
);
2938 mutex_unlock(&dev
->struct_mutex
);
2943 * Ensures that an object will eventually get non-busy by flushing any required
2944 * write domains, emitting any outstanding lazy request and retiring and
2945 * completed requests.
2948 i915_gem_object_flush_active(struct drm_i915_gem_object
*obj
)
2955 for (i
= 0; i
< I915_NUM_RINGS
; i
++) {
2956 struct drm_i915_gem_request
*req
;
2958 req
= obj
->last_read_req
[i
];
2962 if (list_empty(&req
->list
))
2965 ret
= i915_gem_check_olr(req
);
2969 if (i915_gem_request_completed(req
, true)) {
2970 __i915_gem_request_retire__upto(req
);
2972 i915_gem_object_retire__read(obj
, i
);
2980 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
2981 * @DRM_IOCTL_ARGS: standard ioctl arguments
2983 * Returns 0 if successful, else an error is returned with the remaining time in
2984 * the timeout parameter.
2985 * -ETIME: object is still busy after timeout
2986 * -ERESTARTSYS: signal interrupted the wait
2987 * -ENONENT: object doesn't exist
2988 * Also possible, but rare:
2989 * -EAGAIN: GPU wedged
2991 * -ENODEV: Internal IRQ fail
2992 * -E?: The add request failed
2994 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
2995 * non-zero timeout parameter the wait ioctl will wait for the given number of
2996 * nanoseconds on an object becoming unbusy. Since the wait itself does so
2997 * without holding struct_mutex the object may become re-busied before this
2998 * function completes. A similar but shorter * race condition exists in the busy
3002 i915_gem_wait_ioctl(struct drm_device
*dev
, void *data
, struct drm_file
*file
)
3004 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
3005 struct drm_i915_gem_wait
*args
= data
;
3006 struct drm_i915_gem_object
*obj
;
3007 struct drm_i915_gem_request
*req
[I915_NUM_RINGS
];
3008 unsigned reset_counter
;
3012 if (args
->flags
!= 0)
3015 ret
= i915_mutex_lock_interruptible(dev
);
3019 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file
, args
->bo_handle
));
3020 if (&obj
->base
== NULL
) {
3021 mutex_unlock(&dev
->struct_mutex
);
3025 /* Need to make sure the object gets inactive eventually. */
3026 ret
= i915_gem_object_flush_active(obj
);
3033 /* Do this after OLR check to make sure we make forward progress polling
3034 * on this IOCTL with a timeout == 0 (like busy ioctl)
3036 if (args
->timeout_ns
== 0) {
3041 drm_gem_object_unreference(&obj
->base
);
3042 reset_counter
= atomic_read(&dev_priv
->gpu_error
.reset_counter
);
3044 for (i
= 0; i
< I915_NUM_RINGS
; i
++) {
3045 if (obj
->last_read_req
[i
] == NULL
)
3048 req
[n
++] = i915_gem_request_reference(obj
->last_read_req
[i
]);
3051 mutex_unlock(&dev
->struct_mutex
);
3053 for (i
= 0; i
< n
; i
++) {
3055 ret
= __i915_wait_request(req
[i
], reset_counter
, true,
3056 args
->timeout_ns
> 0 ? &args
->timeout_ns
: NULL
,
3058 i915_gem_request_unreference__unlocked(req
[i
]);
3063 drm_gem_object_unreference(&obj
->base
);
3064 mutex_unlock(&dev
->struct_mutex
);
3069 __i915_gem_object_sync(struct drm_i915_gem_object
*obj
,
3070 struct intel_engine_cs
*to
,
3071 struct drm_i915_gem_request
*req
)
3073 struct intel_engine_cs
*from
;
3076 from
= i915_gem_request_get_ring(req
);
3080 if (i915_gem_request_completed(req
, true))
3083 ret
= i915_gem_check_olr(req
);
3087 if (!i915_semaphore_is_enabled(obj
->base
.dev
)) {
3088 struct drm_i915_private
*i915
= to_i915(obj
->base
.dev
);
3089 ret
= __i915_wait_request(req
,
3090 atomic_read(&i915
->gpu_error
.reset_counter
),
3091 i915
->mm
.interruptible
,
3093 &i915
->rps
.semaphores
);
3097 i915_gem_object_retire_request(obj
, req
);
3099 int idx
= intel_ring_sync_index(from
, to
);
3100 u32 seqno
= i915_gem_request_get_seqno(req
);
3102 if (seqno
<= from
->semaphore
.sync_seqno
[idx
])
3105 trace_i915_gem_ring_sync_to(from
, to
, req
);
3106 ret
= to
->semaphore
.sync_to(to
, from
, seqno
);
3110 /* We use last_read_req because sync_to()
3111 * might have just caused seqno wrap under
3114 from
->semaphore
.sync_seqno
[idx
] =
3115 i915_gem_request_get_seqno(obj
->last_read_req
[from
->id
]);
3122 * i915_gem_object_sync - sync an object to a ring.
3124 * @obj: object which may be in use on another ring.
3125 * @to: ring we wish to use the object on. May be NULL.
3127 * This code is meant to abstract object synchronization with the GPU.
3128 * Calling with NULL implies synchronizing the object with the CPU
3129 * rather than a particular GPU ring. Conceptually we serialise writes
3130 * between engines inside the GPU. We only allow on engine to write
3131 * into a buffer at any time, but multiple readers. To ensure each has
3132 * a coherent view of memory, we must:
3134 * - If there is an outstanding write request to the object, the new
3135 * request must wait for it to complete (either CPU or in hw, requests
3136 * on the same ring will be naturally ordered).
3138 * - If we are a write request (pending_write_domain is set), the new
3139 * request must wait for outstanding read requests to complete.
3141 * Returns 0 if successful, else propagates up the lower layer error.
3144 i915_gem_object_sync(struct drm_i915_gem_object
*obj
,
3145 struct intel_engine_cs
*to
)
3147 const bool readonly
= obj
->base
.pending_write_domain
== 0;
3148 struct drm_i915_gem_request
*req
[I915_NUM_RINGS
];
3155 return i915_gem_object_wait_rendering(obj
, readonly
);
3159 if (obj
->last_write_req
)
3160 req
[n
++] = obj
->last_write_req
;
3162 for (i
= 0; i
< I915_NUM_RINGS
; i
++)
3163 if (obj
->last_read_req
[i
])
3164 req
[n
++] = obj
->last_read_req
[i
];
3166 for (i
= 0; i
< n
; i
++) {
3167 ret
= __i915_gem_object_sync(obj
, to
, req
[i
]);
3175 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object
*obj
)
3177 u32 old_write_domain
, old_read_domains
;
3179 /* Force a pagefault for domain tracking on next user access */
3180 i915_gem_release_mmap(obj
);
3182 if ((obj
->base
.read_domains
& I915_GEM_DOMAIN_GTT
) == 0)
3185 /* Wait for any direct GTT access to complete */
3188 old_read_domains
= obj
->base
.read_domains
;
3189 old_write_domain
= obj
->base
.write_domain
;
3191 obj
->base
.read_domains
&= ~I915_GEM_DOMAIN_GTT
;
3192 obj
->base
.write_domain
&= ~I915_GEM_DOMAIN_GTT
;
3194 trace_i915_gem_object_change_domain(obj
,
3199 int i915_vma_unbind(struct i915_vma
*vma
)
3201 struct drm_i915_gem_object
*obj
= vma
->obj
;
3202 struct drm_i915_private
*dev_priv
= obj
->base
.dev
->dev_private
;
3205 if (list_empty(&vma
->vma_link
))
3208 if (!drm_mm_node_allocated(&vma
->node
)) {
3209 i915_gem_vma_destroy(vma
);
3216 BUG_ON(obj
->pages
== NULL
);
3218 ret
= i915_gem_object_wait_rendering(obj
, false);
3221 /* Continue on if we fail due to EIO, the GPU is hung so we
3222 * should be safe and we need to cleanup or else we might
3223 * cause memory corruption through use-after-free.
3226 if (i915_is_ggtt(vma
->vm
) &&
3227 vma
->ggtt_view
.type
== I915_GGTT_VIEW_NORMAL
) {
3228 i915_gem_object_finish_gtt(obj
);
3230 /* release the fence reg _after_ flushing */
3231 ret
= i915_gem_object_put_fence(obj
);
3236 trace_i915_vma_unbind(vma
);
3238 vma
->vm
->unbind_vma(vma
);
3241 list_del_init(&vma
->mm_list
);
3242 if (i915_is_ggtt(vma
->vm
)) {
3243 if (vma
->ggtt_view
.type
== I915_GGTT_VIEW_NORMAL
) {
3244 obj
->map_and_fenceable
= false;
3245 } else if (vma
->ggtt_view
.pages
) {
3246 sg_free_table(vma
->ggtt_view
.pages
);
3247 kfree(vma
->ggtt_view
.pages
);
3249 vma
->ggtt_view
.pages
= NULL
;
3252 drm_mm_remove_node(&vma
->node
);
3253 i915_gem_vma_destroy(vma
);
3255 /* Since the unbound list is global, only move to that list if
3256 * no more VMAs exist. */
3257 if (list_empty(&obj
->vma_list
))
3258 list_move_tail(&obj
->global_list
, &dev_priv
->mm
.unbound_list
);
3260 /* And finally now the object is completely decoupled from this vma,
3261 * we can drop its hold on the backing storage and allow it to be
3262 * reaped by the shrinker.
3264 i915_gem_object_unpin_pages(obj
);
3269 int i915_gpu_idle(struct drm_device
*dev
)
3271 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
3272 struct intel_engine_cs
*ring
;
3275 /* Flush everything onto the inactive list. */
3276 for_each_ring(ring
, dev_priv
, i
) {
3277 if (!i915
.enable_execlists
) {
3278 ret
= i915_switch_context(ring
, ring
->default_context
);
3283 ret
= intel_ring_idle(ring
);
3288 WARN_ON(i915_verify_lists(dev
));
3292 static void i965_write_fence_reg(struct drm_device
*dev
, int reg
,
3293 struct drm_i915_gem_object
*obj
)
3295 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
3297 int fence_pitch_shift
;
3299 if (INTEL_INFO(dev
)->gen
>= 6) {
3300 fence_reg
= FENCE_REG_SANDYBRIDGE_0
;
3301 fence_pitch_shift
= SANDYBRIDGE_FENCE_PITCH_SHIFT
;
3303 fence_reg
= FENCE_REG_965_0
;
3304 fence_pitch_shift
= I965_FENCE_PITCH_SHIFT
;
3307 fence_reg
+= reg
* 8;
3309 /* To w/a incoherency with non-atomic 64-bit register updates,
3310 * we split the 64-bit update into two 32-bit writes. In order
3311 * for a partial fence not to be evaluated between writes, we
3312 * precede the update with write to turn off the fence register,
3313 * and only enable the fence as the last step.
3315 * For extra levels of paranoia, we make sure each step lands
3316 * before applying the next step.
3318 I915_WRITE(fence_reg
, 0);
3319 POSTING_READ(fence_reg
);
3322 u32 size
= i915_gem_obj_ggtt_size(obj
);
3325 /* Adjust fence size to match tiled area */
3326 if (obj
->tiling_mode
!= I915_TILING_NONE
) {
3327 uint32_t row_size
= obj
->stride
*
3328 (obj
->tiling_mode
== I915_TILING_Y
? 32 : 8);
3329 size
= (size
/ row_size
) * row_size
;
3332 val
= (uint64_t)((i915_gem_obj_ggtt_offset(obj
) + size
- 4096) &
3334 val
|= i915_gem_obj_ggtt_offset(obj
) & 0xfffff000;
3335 val
|= (uint64_t)((obj
->stride
/ 128) - 1) << fence_pitch_shift
;
3336 if (obj
->tiling_mode
== I915_TILING_Y
)
3337 val
|= 1 << I965_FENCE_TILING_Y_SHIFT
;
3338 val
|= I965_FENCE_REG_VALID
;
3340 I915_WRITE(fence_reg
+ 4, val
>> 32);
3341 POSTING_READ(fence_reg
+ 4);
3343 I915_WRITE(fence_reg
+ 0, val
);
3344 POSTING_READ(fence_reg
);
3346 I915_WRITE(fence_reg
+ 4, 0);
3347 POSTING_READ(fence_reg
+ 4);
3351 static void i915_write_fence_reg(struct drm_device
*dev
, int reg
,
3352 struct drm_i915_gem_object
*obj
)
3354 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
3358 u32 size
= i915_gem_obj_ggtt_size(obj
);
3362 WARN((i915_gem_obj_ggtt_offset(obj
) & ~I915_FENCE_START_MASK
) ||
3363 (size
& -size
) != size
||
3364 (i915_gem_obj_ggtt_offset(obj
) & (size
- 1)),
3365 "object 0x%08lx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
3366 i915_gem_obj_ggtt_offset(obj
), obj
->map_and_fenceable
, size
);
3368 if (obj
->tiling_mode
== I915_TILING_Y
&& HAS_128_BYTE_Y_TILING(dev
))
3373 /* Note: pitch better be a power of two tile widths */
3374 pitch_val
= obj
->stride
/ tile_width
;
3375 pitch_val
= ffs(pitch_val
) - 1;
3377 val
= i915_gem_obj_ggtt_offset(obj
);
3378 if (obj
->tiling_mode
== I915_TILING_Y
)
3379 val
|= 1 << I830_FENCE_TILING_Y_SHIFT
;
3380 val
|= I915_FENCE_SIZE_BITS(size
);
3381 val
|= pitch_val
<< I830_FENCE_PITCH_SHIFT
;
3382 val
|= I830_FENCE_REG_VALID
;
3387 reg
= FENCE_REG_830_0
+ reg
* 4;
3389 reg
= FENCE_REG_945_8
+ (reg
- 8) * 4;
3391 I915_WRITE(reg
, val
);
3395 static void i830_write_fence_reg(struct drm_device
*dev
, int reg
,
3396 struct drm_i915_gem_object
*obj
)
3398 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
3402 u32 size
= i915_gem_obj_ggtt_size(obj
);
3405 WARN((i915_gem_obj_ggtt_offset(obj
) & ~I830_FENCE_START_MASK
) ||
3406 (size
& -size
) != size
||
3407 (i915_gem_obj_ggtt_offset(obj
) & (size
- 1)),
3408 "object 0x%08lx not 512K or pot-size 0x%08x aligned\n",
3409 i915_gem_obj_ggtt_offset(obj
), size
);
3411 pitch_val
= obj
->stride
/ 128;
3412 pitch_val
= ffs(pitch_val
) - 1;
3414 val
= i915_gem_obj_ggtt_offset(obj
);
3415 if (obj
->tiling_mode
== I915_TILING_Y
)
3416 val
|= 1 << I830_FENCE_TILING_Y_SHIFT
;
3417 val
|= I830_FENCE_SIZE_BITS(size
);
3418 val
|= pitch_val
<< I830_FENCE_PITCH_SHIFT
;
3419 val
|= I830_FENCE_REG_VALID
;
3423 I915_WRITE(FENCE_REG_830_0
+ reg
* 4, val
);
3424 POSTING_READ(FENCE_REG_830_0
+ reg
* 4);
3427 inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object
*obj
)
3429 return obj
&& obj
->base
.read_domains
& I915_GEM_DOMAIN_GTT
;
3432 static void i915_gem_write_fence(struct drm_device
*dev
, int reg
,
3433 struct drm_i915_gem_object
*obj
)
3435 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
3437 /* Ensure that all CPU reads are completed before installing a fence
3438 * and all writes before removing the fence.
3440 if (i915_gem_object_needs_mb(dev_priv
->fence_regs
[reg
].obj
))
3443 WARN(obj
&& (!obj
->stride
|| !obj
->tiling_mode
),
3444 "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
3445 obj
->stride
, obj
->tiling_mode
);
3448 i830_write_fence_reg(dev
, reg
, obj
);
3449 else if (IS_GEN3(dev
))
3450 i915_write_fence_reg(dev
, reg
, obj
);
3451 else if (INTEL_INFO(dev
)->gen
>= 4)
3452 i965_write_fence_reg(dev
, reg
, obj
);
3454 /* And similarly be paranoid that no direct access to this region
3455 * is reordered to before the fence is installed.
3457 if (i915_gem_object_needs_mb(obj
))
3461 static inline int fence_number(struct drm_i915_private
*dev_priv
,
3462 struct drm_i915_fence_reg
*fence
)
3464 return fence
- dev_priv
->fence_regs
;
3467 static void i915_gem_object_update_fence(struct drm_i915_gem_object
*obj
,
3468 struct drm_i915_fence_reg
*fence
,
3471 struct drm_i915_private
*dev_priv
= obj
->base
.dev
->dev_private
;
3472 int reg
= fence_number(dev_priv
, fence
);
3474 i915_gem_write_fence(obj
->base
.dev
, reg
, enable
? obj
: NULL
);
3477 obj
->fence_reg
= reg
;
3479 list_move_tail(&fence
->lru_list
, &dev_priv
->mm
.fence_list
);
3481 obj
->fence_reg
= I915_FENCE_REG_NONE
;
3483 list_del_init(&fence
->lru_list
);
3485 obj
->fence_dirty
= false;
3489 i915_gem_object_wait_fence(struct drm_i915_gem_object
*obj
)
3491 if (obj
->last_fenced_req
) {
3492 int ret
= i915_wait_request(obj
->last_fenced_req
);
3496 i915_gem_request_assign(&obj
->last_fenced_req
, NULL
);
3503 i915_gem_object_put_fence(struct drm_i915_gem_object
*obj
)
3505 struct drm_i915_private
*dev_priv
= obj
->base
.dev
->dev_private
;
3506 struct drm_i915_fence_reg
*fence
;
3509 ret
= i915_gem_object_wait_fence(obj
);
3513 if (obj
->fence_reg
== I915_FENCE_REG_NONE
)
3516 fence
= &dev_priv
->fence_regs
[obj
->fence_reg
];
3518 if (WARN_ON(fence
->pin_count
))
3521 i915_gem_object_fence_lost(obj
);
3522 i915_gem_object_update_fence(obj
, fence
, false);
3527 static struct drm_i915_fence_reg
*
3528 i915_find_fence_reg(struct drm_device
*dev
)
3530 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
3531 struct drm_i915_fence_reg
*reg
, *avail
;
3534 /* First try to find a free reg */
3536 for (i
= dev_priv
->fence_reg_start
; i
< dev_priv
->num_fence_regs
; i
++) {
3537 reg
= &dev_priv
->fence_regs
[i
];
3541 if (!reg
->pin_count
)
3548 /* None available, try to steal one or wait for a user to finish */
3549 list_for_each_entry(reg
, &dev_priv
->mm
.fence_list
, lru_list
) {
3557 /* Wait for completion of pending flips which consume fences */
3558 if (intel_has_pending_fb_unpin(dev
))
3559 return ERR_PTR(-EAGAIN
);
3561 return ERR_PTR(-EDEADLK
);
3565 * i915_gem_object_get_fence - set up fencing for an object
3566 * @obj: object to map through a fence reg
3568 * When mapping objects through the GTT, userspace wants to be able to write
3569 * to them without having to worry about swizzling if the object is tiled.
3570 * This function walks the fence regs looking for a free one for @obj,
3571 * stealing one if it can't find any.
3573 * It then sets up the reg based on the object's properties: address, pitch
3574 * and tiling format.
3576 * For an untiled surface, this removes any existing fence.
3579 i915_gem_object_get_fence(struct drm_i915_gem_object
*obj
)
3581 struct drm_device
*dev
= obj
->base
.dev
;
3582 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
3583 bool enable
= obj
->tiling_mode
!= I915_TILING_NONE
;
3584 struct drm_i915_fence_reg
*reg
;
3587 /* Have we updated the tiling parameters upon the object and so
3588 * will need to serialise the write to the associated fence register?
3590 if (obj
->fence_dirty
) {
3591 ret
= i915_gem_object_wait_fence(obj
);
3596 /* Just update our place in the LRU if our fence is getting reused. */
3597 if (obj
->fence_reg
!= I915_FENCE_REG_NONE
) {
3598 reg
= &dev_priv
->fence_regs
[obj
->fence_reg
];
3599 if (!obj
->fence_dirty
) {
3600 list_move_tail(®
->lru_list
,
3601 &dev_priv
->mm
.fence_list
);
3604 } else if (enable
) {
3605 if (WARN_ON(!obj
->map_and_fenceable
))
3608 reg
= i915_find_fence_reg(dev
);
3610 return PTR_ERR(reg
);
3613 struct drm_i915_gem_object
*old
= reg
->obj
;
3615 ret
= i915_gem_object_wait_fence(old
);
3619 i915_gem_object_fence_lost(old
);
3624 i915_gem_object_update_fence(obj
, reg
, enable
);
3629 static bool i915_gem_valid_gtt_space(struct i915_vma
*vma
,
3630 unsigned long cache_level
)
3632 struct drm_mm_node
*gtt_space
= &vma
->node
;
3633 struct drm_mm_node
*other
;
3636 * On some machines we have to be careful when putting differing types
3637 * of snoopable memory together to avoid the prefetcher crossing memory
3638 * domains and dying. During vm initialisation, we decide whether or not
3639 * these constraints apply and set the drm_mm.color_adjust
3642 if (vma
->vm
->mm
.color_adjust
== NULL
)
3645 if (!drm_mm_node_allocated(gtt_space
))
3648 if (list_empty(>t_space
->node_list
))
3651 other
= list_entry(gtt_space
->node_list
.prev
, struct drm_mm_node
, node_list
);
3652 if (other
->allocated
&& !other
->hole_follows
&& other
->color
!= cache_level
)
3655 other
= list_entry(gtt_space
->node_list
.next
, struct drm_mm_node
, node_list
);
3656 if (other
->allocated
&& !gtt_space
->hole_follows
&& other
->color
!= cache_level
)
3663 * Finds free space in the GTT aperture and binds the object or a view of it
3666 static struct i915_vma
*
3667 i915_gem_object_bind_to_vm(struct drm_i915_gem_object
*obj
,
3668 struct i915_address_space
*vm
,
3669 const struct i915_ggtt_view
*ggtt_view
,
3673 struct drm_device
*dev
= obj
->base
.dev
;
3674 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
3675 u32 size
, fence_size
, fence_alignment
, unfenced_alignment
;
3676 unsigned long start
=
3677 flags
& PIN_OFFSET_BIAS
? flags
& PIN_OFFSET_MASK
: 0;
3679 flags
& PIN_MAPPABLE
? dev_priv
->gtt
.mappable_end
: vm
->total
;
3680 struct i915_vma
*vma
;
3683 if (i915_is_ggtt(vm
)) {
3686 if (WARN_ON(!ggtt_view
))
3687 return ERR_PTR(-EINVAL
);
3689 view_size
= i915_ggtt_view_size(obj
, ggtt_view
);
3691 fence_size
= i915_gem_get_gtt_size(dev
,
3694 fence_alignment
= i915_gem_get_gtt_alignment(dev
,
3698 unfenced_alignment
= i915_gem_get_gtt_alignment(dev
,
3702 size
= flags
& PIN_MAPPABLE
? fence_size
: view_size
;
3704 fence_size
= i915_gem_get_gtt_size(dev
,
3707 fence_alignment
= i915_gem_get_gtt_alignment(dev
,
3711 unfenced_alignment
=
3712 i915_gem_get_gtt_alignment(dev
,
3716 size
= flags
& PIN_MAPPABLE
? fence_size
: obj
->base
.size
;
3720 alignment
= flags
& PIN_MAPPABLE
? fence_alignment
:
3722 if (flags
& PIN_MAPPABLE
&& alignment
& (fence_alignment
- 1)) {
3723 DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
3724 ggtt_view
? ggtt_view
->type
: 0,
3726 return ERR_PTR(-EINVAL
);
3729 /* If binding the object/GGTT view requires more space than the entire
3730 * aperture has, reject it early before evicting everything in a vain
3731 * attempt to find space.
3734 DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%u > %s aperture=%lu\n",
3735 ggtt_view
? ggtt_view
->type
: 0,
3737 flags
& PIN_MAPPABLE
? "mappable" : "total",
3739 return ERR_PTR(-E2BIG
);
3742 ret
= i915_gem_object_get_pages(obj
);
3744 return ERR_PTR(ret
);
3746 i915_gem_object_pin_pages(obj
);
3748 vma
= ggtt_view
? i915_gem_obj_lookup_or_create_ggtt_vma(obj
, ggtt_view
) :
3749 i915_gem_obj_lookup_or_create_vma(obj
, vm
);
3755 ret
= drm_mm_insert_node_in_range_generic(&vm
->mm
, &vma
->node
,
3759 DRM_MM_SEARCH_DEFAULT
,
3760 DRM_MM_CREATE_DEFAULT
);
3762 ret
= i915_gem_evict_something(dev
, vm
, size
, alignment
,
3771 if (WARN_ON(!i915_gem_valid_gtt_space(vma
, obj
->cache_level
))) {
3773 goto err_remove_node
;
3776 trace_i915_vma_bind(vma
, flags
);
3777 ret
= i915_vma_bind(vma
, obj
->cache_level
, flags
);
3779 goto err_remove_node
;
3781 list_move_tail(&obj
->global_list
, &dev_priv
->mm
.bound_list
);
3782 list_add_tail(&vma
->mm_list
, &vm
->inactive_list
);
3787 drm_mm_remove_node(&vma
->node
);
3789 i915_gem_vma_destroy(vma
);
3792 i915_gem_object_unpin_pages(obj
);
3797 i915_gem_clflush_object(struct drm_i915_gem_object
*obj
,
3800 /* If we don't have a page list set up, then we're not pinned
3801 * to GPU, and we can ignore the cache flush because it'll happen
3802 * again at bind time.
3804 if (obj
->pages
== NULL
)
3808 * Stolen memory is always coherent with the GPU as it is explicitly
3809 * marked as wc by the system, or the system is cache-coherent.
3811 if (obj
->stolen
|| obj
->phys_handle
)
3814 /* If the GPU is snooping the contents of the CPU cache,
3815 * we do not need to manually clear the CPU cache lines. However,
3816 * the caches are only snooped when the render cache is
3817 * flushed/invalidated. As we always have to emit invalidations
3818 * and flushes when moving into and out of the RENDER domain, correct
3819 * snooping behaviour occurs naturally as the result of our domain
3822 if (!force
&& cpu_cache_is_coherent(obj
->base
.dev
, obj
->cache_level
)) {
3823 obj
->cache_dirty
= true;
3827 trace_i915_gem_object_clflush(obj
);
3828 drm_clflush_sg(obj
->pages
);
3829 obj
->cache_dirty
= false;
3834 /** Flushes the GTT write domain for the object if it's dirty. */
3836 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object
*obj
)
3838 uint32_t old_write_domain
;
3840 if (obj
->base
.write_domain
!= I915_GEM_DOMAIN_GTT
)
3843 /* No actual flushing is required for the GTT write domain. Writes
3844 * to it immediately go to main memory as far as we know, so there's
3845 * no chipset flush. It also doesn't land in render cache.
3847 * However, we do have to enforce the order so that all writes through
3848 * the GTT land before any writes to the device, such as updates to
3853 old_write_domain
= obj
->base
.write_domain
;
3854 obj
->base
.write_domain
= 0;
3856 intel_fb_obj_flush(obj
, false);
3858 trace_i915_gem_object_change_domain(obj
,
3859 obj
->base
.read_domains
,
3863 /** Flushes the CPU write domain for the object if it's dirty. */
3865 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object
*obj
)
3867 uint32_t old_write_domain
;
3869 if (obj
->base
.write_domain
!= I915_GEM_DOMAIN_CPU
)
3872 if (i915_gem_clflush_object(obj
, obj
->pin_display
))
3873 i915_gem_chipset_flush(obj
->base
.dev
);
3875 old_write_domain
= obj
->base
.write_domain
;
3876 obj
->base
.write_domain
= 0;
3878 intel_fb_obj_flush(obj
, false);
3880 trace_i915_gem_object_change_domain(obj
,
3881 obj
->base
.read_domains
,
3886 * Moves a single object to the GTT read, and possibly write domain.
3888 * This function returns when the move is complete, including waiting on
3892 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object
*obj
, bool write
)
3894 uint32_t old_write_domain
, old_read_domains
;
3895 struct i915_vma
*vma
;
3898 if (obj
->base
.write_domain
== I915_GEM_DOMAIN_GTT
)
3901 ret
= i915_gem_object_wait_rendering(obj
, !write
);
3905 /* Flush and acquire obj->pages so that we are coherent through
3906 * direct access in memory with previous cached writes through
3907 * shmemfs and that our cache domain tracking remains valid.
3908 * For example, if the obj->filp was moved to swap without us
3909 * being notified and releasing the pages, we would mistakenly
3910 * continue to assume that the obj remained out of the CPU cached
3913 ret
= i915_gem_object_get_pages(obj
);
3917 i915_gem_object_flush_cpu_write_domain(obj
);
3919 /* Serialise direct access to this object with the barriers for
3920 * coherent writes from the GPU, by effectively invalidating the
3921 * GTT domain upon first access.
3923 if ((obj
->base
.read_domains
& I915_GEM_DOMAIN_GTT
) == 0)
3926 old_write_domain
= obj
->base
.write_domain
;
3927 old_read_domains
= obj
->base
.read_domains
;
3929 /* It should now be out of any other write domains, and we can update
3930 * the domain values for our changes.
3932 BUG_ON((obj
->base
.write_domain
& ~I915_GEM_DOMAIN_GTT
) != 0);
3933 obj
->base
.read_domains
|= I915_GEM_DOMAIN_GTT
;
3935 obj
->base
.read_domains
= I915_GEM_DOMAIN_GTT
;
3936 obj
->base
.write_domain
= I915_GEM_DOMAIN_GTT
;
3941 intel_fb_obj_invalidate(obj
, NULL
, ORIGIN_GTT
);
3943 trace_i915_gem_object_change_domain(obj
,
3947 /* And bump the LRU for this access */
3948 vma
= i915_gem_obj_to_ggtt(obj
);
3949 if (vma
&& drm_mm_node_allocated(&vma
->node
) && !obj
->active
)
3950 list_move_tail(&vma
->mm_list
,
3951 &to_i915(obj
->base
.dev
)->gtt
.base
.inactive_list
);
3956 int i915_gem_object_set_cache_level(struct drm_i915_gem_object
*obj
,
3957 enum i915_cache_level cache_level
)
3959 struct drm_device
*dev
= obj
->base
.dev
;
3960 struct i915_vma
*vma
, *next
;
3963 if (obj
->cache_level
== cache_level
)
3966 if (i915_gem_obj_is_pinned(obj
)) {
3967 DRM_DEBUG("can not change the cache level of pinned objects\n");
3971 list_for_each_entry_safe(vma
, next
, &obj
->vma_list
, vma_link
) {
3972 if (!i915_gem_valid_gtt_space(vma
, cache_level
)) {
3973 ret
= i915_vma_unbind(vma
);
3979 if (i915_gem_obj_bound_any(obj
)) {
3980 ret
= i915_gem_object_wait_rendering(obj
, false);
3984 i915_gem_object_finish_gtt(obj
);
3986 /* Before SandyBridge, you could not use tiling or fence
3987 * registers with snooped memory, so relinquish any fences
3988 * currently pointing to our region in the aperture.
3990 if (INTEL_INFO(dev
)->gen
< 6) {
3991 ret
= i915_gem_object_put_fence(obj
);
3996 list_for_each_entry(vma
, &obj
->vma_list
, vma_link
)
3997 if (drm_mm_node_allocated(&vma
->node
)) {
3998 ret
= i915_vma_bind(vma
, cache_level
,
4005 list_for_each_entry(vma
, &obj
->vma_list
, vma_link
)
4006 vma
->node
.color
= cache_level
;
4007 obj
->cache_level
= cache_level
;
4009 if (obj
->cache_dirty
&&
4010 obj
->base
.write_domain
!= I915_GEM_DOMAIN_CPU
&&
4011 cpu_write_needs_clflush(obj
)) {
4012 if (i915_gem_clflush_object(obj
, true))
4013 i915_gem_chipset_flush(obj
->base
.dev
);
4019 int i915_gem_get_caching_ioctl(struct drm_device
*dev
, void *data
,
4020 struct drm_file
*file
)
4022 struct drm_i915_gem_caching
*args
= data
;
4023 struct drm_i915_gem_object
*obj
;
4025 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file
, args
->handle
));
4026 if (&obj
->base
== NULL
)
4029 switch (obj
->cache_level
) {
4030 case I915_CACHE_LLC
:
4031 case I915_CACHE_L3_LLC
:
4032 args
->caching
= I915_CACHING_CACHED
;
4036 args
->caching
= I915_CACHING_DISPLAY
;
4040 args
->caching
= I915_CACHING_NONE
;
4044 drm_gem_object_unreference_unlocked(&obj
->base
);
4048 int i915_gem_set_caching_ioctl(struct drm_device
*dev
, void *data
,
4049 struct drm_file
*file
)
4051 struct drm_i915_gem_caching
*args
= data
;
4052 struct drm_i915_gem_object
*obj
;
4053 enum i915_cache_level level
;
4056 switch (args
->caching
) {
4057 case I915_CACHING_NONE
:
4058 level
= I915_CACHE_NONE
;
4060 case I915_CACHING_CACHED
:
4061 level
= I915_CACHE_LLC
;
4063 case I915_CACHING_DISPLAY
:
4064 level
= HAS_WT(dev
) ? I915_CACHE_WT
: I915_CACHE_NONE
;
4070 ret
= i915_mutex_lock_interruptible(dev
);
4074 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file
, args
->handle
));
4075 if (&obj
->base
== NULL
) {
4080 ret
= i915_gem_object_set_cache_level(obj
, level
);
4082 drm_gem_object_unreference(&obj
->base
);
4084 mutex_unlock(&dev
->struct_mutex
);
4089 * Prepare buffer for display plane (scanout, cursors, etc).
4090 * Can be called from an uninterruptible phase (modesetting) and allows
4091 * any flushes to be pipelined (for pageflips).
4094 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object
*obj
,
4096 struct intel_engine_cs
*pipelined
,
4097 const struct i915_ggtt_view
*view
)
4099 u32 old_read_domains
, old_write_domain
;
4102 ret
= i915_gem_object_sync(obj
, pipelined
);
4106 /* Mark the pin_display early so that we account for the
4107 * display coherency whilst setting up the cache domains.
4111 /* The display engine is not coherent with the LLC cache on gen6. As
4112 * a result, we make sure that the pinning that is about to occur is
4113 * done with uncached PTEs. This is lowest common denominator for all
4116 * However for gen6+, we could do better by using the GFDT bit instead
4117 * of uncaching, which would allow us to flush all the LLC-cached data
4118 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
4120 ret
= i915_gem_object_set_cache_level(obj
,
4121 HAS_WT(obj
->base
.dev
) ? I915_CACHE_WT
: I915_CACHE_NONE
);
4123 goto err_unpin_display
;
4125 /* As the user may map the buffer once pinned in the display plane
4126 * (e.g. libkms for the bootup splash), we have to ensure that we
4127 * always use map_and_fenceable for all scanout buffers.
4129 ret
= i915_gem_object_ggtt_pin(obj
, view
, alignment
,
4130 view
->type
== I915_GGTT_VIEW_NORMAL
?
4133 goto err_unpin_display
;
4135 i915_gem_object_flush_cpu_write_domain(obj
);
4137 old_write_domain
= obj
->base
.write_domain
;
4138 old_read_domains
= obj
->base
.read_domains
;
4140 /* It should now be out of any other write domains, and we can update
4141 * the domain values for our changes.
4143 obj
->base
.write_domain
= 0;
4144 obj
->base
.read_domains
|= I915_GEM_DOMAIN_GTT
;
4146 trace_i915_gem_object_change_domain(obj
,
4158 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object
*obj
,
4159 const struct i915_ggtt_view
*view
)
4161 if (WARN_ON(obj
->pin_display
== 0))
4164 i915_gem_object_ggtt_unpin_view(obj
, view
);
4170 * Moves a single object to the CPU read, and possibly write domain.
4172 * This function returns when the move is complete, including waiting on
4176 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object
*obj
, bool write
)
4178 uint32_t old_write_domain
, old_read_domains
;
4181 if (obj
->base
.write_domain
== I915_GEM_DOMAIN_CPU
)
4184 ret
= i915_gem_object_wait_rendering(obj
, !write
);
4188 i915_gem_object_flush_gtt_write_domain(obj
);
4190 old_write_domain
= obj
->base
.write_domain
;
4191 old_read_domains
= obj
->base
.read_domains
;
4193 /* Flush the CPU cache if it's still invalid. */
4194 if ((obj
->base
.read_domains
& I915_GEM_DOMAIN_CPU
) == 0) {
4195 i915_gem_clflush_object(obj
, false);
4197 obj
->base
.read_domains
|= I915_GEM_DOMAIN_CPU
;
4200 /* It should now be out of any other write domains, and we can update
4201 * the domain values for our changes.
4203 BUG_ON((obj
->base
.write_domain
& ~I915_GEM_DOMAIN_CPU
) != 0);
4205 /* If we're writing through the CPU, then the GPU read domains will
4206 * need to be invalidated at next use.
4209 obj
->base
.read_domains
= I915_GEM_DOMAIN_CPU
;
4210 obj
->base
.write_domain
= I915_GEM_DOMAIN_CPU
;
4214 intel_fb_obj_invalidate(obj
, NULL
, ORIGIN_CPU
);
4216 trace_i915_gem_object_change_domain(obj
,
4223 /* Throttle our rendering by waiting until the ring has completed our requests
4224 * emitted over 20 msec ago.
4226 * Note that if we were to use the current jiffies each time around the loop,
4227 * we wouldn't escape the function with any frames outstanding if the time to
4228 * render a frame was over 20ms.
4230 * This should get us reasonable parallelism between CPU and GPU but also
4231 * relatively low latency when blocking on a particular request to finish.
4234 i915_gem_ring_throttle(struct drm_device
*dev
, struct drm_file
*file
)
4236 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
4237 struct drm_i915_file_private
*file_priv
= file
->driver_priv
;
4238 unsigned long recent_enough
= jiffies
- DRM_I915_THROTTLE_JIFFIES
;
4239 struct drm_i915_gem_request
*request
, *target
= NULL
;
4240 unsigned reset_counter
;
4243 ret
= i915_gem_wait_for_error(&dev_priv
->gpu_error
);
4247 ret
= i915_gem_check_wedge(&dev_priv
->gpu_error
, false);
4251 spin_lock(&file_priv
->mm
.lock
);
4252 list_for_each_entry(request
, &file_priv
->mm
.request_list
, client_list
) {
4253 if (time_after_eq(request
->emitted_jiffies
, recent_enough
))
4258 reset_counter
= atomic_read(&dev_priv
->gpu_error
.reset_counter
);
4260 i915_gem_request_reference(target
);
4261 spin_unlock(&file_priv
->mm
.lock
);
4266 ret
= __i915_wait_request(target
, reset_counter
, true, NULL
, NULL
);
4268 queue_delayed_work(dev_priv
->wq
, &dev_priv
->mm
.retire_work
, 0);
4270 i915_gem_request_unreference__unlocked(target
);
4276 i915_vma_misplaced(struct i915_vma
*vma
, uint32_t alignment
, uint64_t flags
)
4278 struct drm_i915_gem_object
*obj
= vma
->obj
;
4281 vma
->node
.start
& (alignment
- 1))
4284 if (flags
& PIN_MAPPABLE
&& !obj
->map_and_fenceable
)
4287 if (flags
& PIN_OFFSET_BIAS
&&
4288 vma
->node
.start
< (flags
& PIN_OFFSET_MASK
))
4295 i915_gem_object_do_pin(struct drm_i915_gem_object
*obj
,
4296 struct i915_address_space
*vm
,
4297 const struct i915_ggtt_view
*ggtt_view
,
4301 struct drm_i915_private
*dev_priv
= obj
->base
.dev
->dev_private
;
4302 struct i915_vma
*vma
;
4306 if (WARN_ON(vm
== &dev_priv
->mm
.aliasing_ppgtt
->base
))
4309 if (WARN_ON(flags
& (PIN_GLOBAL
| PIN_MAPPABLE
) && !i915_is_ggtt(vm
)))
4312 if (WARN_ON((flags
& (PIN_MAPPABLE
| PIN_GLOBAL
)) == PIN_MAPPABLE
))
4315 if (WARN_ON(i915_is_ggtt(vm
) != !!ggtt_view
))
4318 vma
= ggtt_view
? i915_gem_obj_to_ggtt_view(obj
, ggtt_view
) :
4319 i915_gem_obj_to_vma(obj
, vm
);
4322 return PTR_ERR(vma
);
4325 if (WARN_ON(vma
->pin_count
== DRM_I915_GEM_OBJECT_MAX_PIN_COUNT
))
4328 if (i915_vma_misplaced(vma
, alignment
, flags
)) {
4329 unsigned long offset
;
4330 offset
= ggtt_view
? i915_gem_obj_ggtt_offset_view(obj
, ggtt_view
) :
4331 i915_gem_obj_offset(obj
, vm
);
4332 WARN(vma
->pin_count
,
4333 "bo is already pinned in %s with incorrect alignment:"
4334 " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d,"
4335 " obj->map_and_fenceable=%d\n",
4336 ggtt_view
? "ggtt" : "ppgtt",
4339 !!(flags
& PIN_MAPPABLE
),
4340 obj
->map_and_fenceable
);
4341 ret
= i915_vma_unbind(vma
);
4349 bound
= vma
? vma
->bound
: 0;
4350 if (vma
== NULL
|| !drm_mm_node_allocated(&vma
->node
)) {
4351 vma
= i915_gem_object_bind_to_vm(obj
, vm
, ggtt_view
, alignment
,
4354 return PTR_ERR(vma
);
4356 ret
= i915_vma_bind(vma
, obj
->cache_level
, flags
);
4361 if (ggtt_view
&& ggtt_view
->type
== I915_GGTT_VIEW_NORMAL
&&
4362 (bound
^ vma
->bound
) & GLOBAL_BIND
) {
4363 bool mappable
, fenceable
;
4364 u32 fence_size
, fence_alignment
;
4366 fence_size
= i915_gem_get_gtt_size(obj
->base
.dev
,
4369 fence_alignment
= i915_gem_get_gtt_alignment(obj
->base
.dev
,
4374 fenceable
= (vma
->node
.size
== fence_size
&&
4375 (vma
->node
.start
& (fence_alignment
- 1)) == 0);
4377 mappable
= (vma
->node
.start
+ fence_size
<=
4378 dev_priv
->gtt
.mappable_end
);
4380 obj
->map_and_fenceable
= mappable
&& fenceable
;
4382 WARN_ON(flags
& PIN_MAPPABLE
&& !obj
->map_and_fenceable
);
4390 i915_gem_object_pin(struct drm_i915_gem_object
*obj
,
4391 struct i915_address_space
*vm
,
4395 return i915_gem_object_do_pin(obj
, vm
,
4396 i915_is_ggtt(vm
) ? &i915_ggtt_view_normal
: NULL
,
4401 i915_gem_object_ggtt_pin(struct drm_i915_gem_object
*obj
,
4402 const struct i915_ggtt_view
*view
,
4406 if (WARN_ONCE(!view
, "no view specified"))
4409 return i915_gem_object_do_pin(obj
, i915_obj_to_ggtt(obj
), view
,
4410 alignment
, flags
| PIN_GLOBAL
);
4414 i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object
*obj
,
4415 const struct i915_ggtt_view
*view
)
4417 struct i915_vma
*vma
= i915_gem_obj_to_ggtt_view(obj
, view
);
4420 WARN_ON(vma
->pin_count
== 0);
4421 WARN_ON(!i915_gem_obj_ggtt_bound_view(obj
, view
));
4427 i915_gem_object_pin_fence(struct drm_i915_gem_object
*obj
)
4429 if (obj
->fence_reg
!= I915_FENCE_REG_NONE
) {
4430 struct drm_i915_private
*dev_priv
= obj
->base
.dev
->dev_private
;
4431 struct i915_vma
*ggtt_vma
= i915_gem_obj_to_ggtt(obj
);
4433 WARN_ON(!ggtt_vma
||
4434 dev_priv
->fence_regs
[obj
->fence_reg
].pin_count
>
4435 ggtt_vma
->pin_count
);
4436 dev_priv
->fence_regs
[obj
->fence_reg
].pin_count
++;
4443 i915_gem_object_unpin_fence(struct drm_i915_gem_object
*obj
)
4445 if (obj
->fence_reg
!= I915_FENCE_REG_NONE
) {
4446 struct drm_i915_private
*dev_priv
= obj
->base
.dev
->dev_private
;
4447 WARN_ON(dev_priv
->fence_regs
[obj
->fence_reg
].pin_count
<= 0);
4448 dev_priv
->fence_regs
[obj
->fence_reg
].pin_count
--;
4453 i915_gem_busy_ioctl(struct drm_device
*dev
, void *data
,
4454 struct drm_file
*file
)
4456 struct drm_i915_gem_busy
*args
= data
;
4457 struct drm_i915_gem_object
*obj
;
4460 ret
= i915_mutex_lock_interruptible(dev
);
4464 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file
, args
->handle
));
4465 if (&obj
->base
== NULL
) {
4470 /* Count all active objects as busy, even if they are currently not used
4471 * by the gpu. Users of this interface expect objects to eventually
4472 * become non-busy without any further actions, therefore emit any
4473 * necessary flushes here.
4475 ret
= i915_gem_object_flush_active(obj
);
4479 BUILD_BUG_ON(I915_NUM_RINGS
> 16);
4480 args
->busy
= obj
->active
<< 16;
4481 if (obj
->last_write_req
)
4482 args
->busy
|= obj
->last_write_req
->ring
->id
;
4485 drm_gem_object_unreference(&obj
->base
);
4487 mutex_unlock(&dev
->struct_mutex
);
4492 i915_gem_throttle_ioctl(struct drm_device
*dev
, void *data
,
4493 struct drm_file
*file_priv
)
4495 return i915_gem_ring_throttle(dev
, file_priv
);
4499 i915_gem_madvise_ioctl(struct drm_device
*dev
, void *data
,
4500 struct drm_file
*file_priv
)
4502 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
4503 struct drm_i915_gem_madvise
*args
= data
;
4504 struct drm_i915_gem_object
*obj
;
4507 switch (args
->madv
) {
4508 case I915_MADV_DONTNEED
:
4509 case I915_MADV_WILLNEED
:
4515 ret
= i915_mutex_lock_interruptible(dev
);
4519 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file_priv
, args
->handle
));
4520 if (&obj
->base
== NULL
) {
4525 if (i915_gem_obj_is_pinned(obj
)) {
4531 obj
->tiling_mode
!= I915_TILING_NONE
&&
4532 dev_priv
->quirks
& QUIRK_PIN_SWIZZLED_PAGES
) {
4533 if (obj
->madv
== I915_MADV_WILLNEED
)
4534 i915_gem_object_unpin_pages(obj
);
4535 if (args
->madv
== I915_MADV_WILLNEED
)
4536 i915_gem_object_pin_pages(obj
);
4539 if (obj
->madv
!= __I915_MADV_PURGED
)
4540 obj
->madv
= args
->madv
;
4542 /* if the object is no longer attached, discard its backing storage */
4543 if (obj
->madv
== I915_MADV_DONTNEED
&& obj
->pages
== NULL
)
4544 i915_gem_object_truncate(obj
);
4546 args
->retained
= obj
->madv
!= __I915_MADV_PURGED
;
4549 drm_gem_object_unreference(&obj
->base
);
4551 mutex_unlock(&dev
->struct_mutex
);
4555 void i915_gem_object_init(struct drm_i915_gem_object
*obj
,
4556 const struct drm_i915_gem_object_ops
*ops
)
4560 INIT_LIST_HEAD(&obj
->global_list
);
4561 for (i
= 0; i
< I915_NUM_RINGS
; i
++)
4562 INIT_LIST_HEAD(&obj
->ring_list
[i
]);
4563 INIT_LIST_HEAD(&obj
->obj_exec_link
);
4564 INIT_LIST_HEAD(&obj
->vma_list
);
4565 INIT_LIST_HEAD(&obj
->batch_pool_link
);
4569 obj
->fence_reg
= I915_FENCE_REG_NONE
;
4570 obj
->madv
= I915_MADV_WILLNEED
;
4572 i915_gem_info_add_obj(obj
->base
.dev
->dev_private
, obj
->base
.size
);
4575 static const struct drm_i915_gem_object_ops i915_gem_object_ops
= {
4576 .get_pages
= i915_gem_object_get_pages_gtt
,
4577 .put_pages
= i915_gem_object_put_pages_gtt
,
4580 struct drm_i915_gem_object
*i915_gem_alloc_object(struct drm_device
*dev
,
4583 struct drm_i915_gem_object
*obj
;
4584 struct address_space
*mapping
;
4587 obj
= i915_gem_object_alloc(dev
);
4591 if (drm_gem_object_init(dev
, &obj
->base
, size
) != 0) {
4592 i915_gem_object_free(obj
);
4596 mask
= GFP_HIGHUSER
| __GFP_RECLAIMABLE
;
4597 if (IS_CRESTLINE(dev
) || IS_BROADWATER(dev
)) {
4598 /* 965gm cannot relocate objects above 4GiB. */
4599 mask
&= ~__GFP_HIGHMEM
;
4600 mask
|= __GFP_DMA32
;
4603 mapping
= file_inode(obj
->base
.filp
)->i_mapping
;
4604 mapping_set_gfp_mask(mapping
, mask
);
4606 i915_gem_object_init(obj
, &i915_gem_object_ops
);
4608 obj
->base
.write_domain
= I915_GEM_DOMAIN_CPU
;
4609 obj
->base
.read_domains
= I915_GEM_DOMAIN_CPU
;
4612 /* On some devices, we can have the GPU use the LLC (the CPU
4613 * cache) for about a 10% performance improvement
4614 * compared to uncached. Graphics requests other than
4615 * display scanout are coherent with the CPU in
4616 * accessing this cache. This means in this mode we
4617 * don't need to clflush on the CPU side, and on the
4618 * GPU side we only need to flush internal caches to
4619 * get data visible to the CPU.
4621 * However, we maintain the display planes as UC, and so
4622 * need to rebind when first used as such.
4624 obj
->cache_level
= I915_CACHE_LLC
;
4626 obj
->cache_level
= I915_CACHE_NONE
;
4628 trace_i915_gem_object_create(obj
);
4633 static bool discard_backing_storage(struct drm_i915_gem_object
*obj
)
4635 /* If we are the last user of the backing storage (be it shmemfs
4636 * pages or stolen etc), we know that the pages are going to be
4637 * immediately released. In this case, we can then skip copying
4638 * back the contents from the GPU.
4641 if (obj
->madv
!= I915_MADV_WILLNEED
)
4644 if (obj
->base
.filp
== NULL
)
4647 /* At first glance, this looks racy, but then again so would be
4648 * userspace racing mmap against close. However, the first external
4649 * reference to the filp can only be obtained through the
4650 * i915_gem_mmap_ioctl() which safeguards us against the user
4651 * acquiring such a reference whilst we are in the middle of
4652 * freeing the object.
4654 return atomic_long_read(&obj
->base
.filp
->f_count
) == 1;
4657 void i915_gem_free_object(struct drm_gem_object
*gem_obj
)
4659 struct drm_i915_gem_object
*obj
= to_intel_bo(gem_obj
);
4660 struct drm_device
*dev
= obj
->base
.dev
;
4661 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
4662 struct i915_vma
*vma
, *next
;
4664 intel_runtime_pm_get(dev_priv
);
4666 trace_i915_gem_object_destroy(obj
);
4668 list_for_each_entry_safe(vma
, next
, &obj
->vma_list
, vma_link
) {
4672 ret
= i915_vma_unbind(vma
);
4673 if (WARN_ON(ret
== -ERESTARTSYS
)) {
4674 bool was_interruptible
;
4676 was_interruptible
= dev_priv
->mm
.interruptible
;
4677 dev_priv
->mm
.interruptible
= false;
4679 WARN_ON(i915_vma_unbind(vma
));
4681 dev_priv
->mm
.interruptible
= was_interruptible
;
4685 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up
4686 * before progressing. */
4688 i915_gem_object_unpin_pages(obj
);
4690 WARN_ON(obj
->frontbuffer_bits
);
4692 if (obj
->pages
&& obj
->madv
== I915_MADV_WILLNEED
&&
4693 dev_priv
->quirks
& QUIRK_PIN_SWIZZLED_PAGES
&&
4694 obj
->tiling_mode
!= I915_TILING_NONE
)
4695 i915_gem_object_unpin_pages(obj
);
4697 if (WARN_ON(obj
->pages_pin_count
))
4698 obj
->pages_pin_count
= 0;
4699 if (discard_backing_storage(obj
))
4700 obj
->madv
= I915_MADV_DONTNEED
;
4701 i915_gem_object_put_pages(obj
);
4702 i915_gem_object_free_mmap_offset(obj
);
4706 if (obj
->base
.import_attach
)
4707 drm_prime_gem_destroy(&obj
->base
, NULL
);
4709 if (obj
->ops
->release
)
4710 obj
->ops
->release(obj
);
4712 drm_gem_object_release(&obj
->base
);
4713 i915_gem_info_remove_obj(dev_priv
, obj
->base
.size
);
4716 i915_gem_object_free(obj
);
4718 intel_runtime_pm_put(dev_priv
);
4721 struct i915_vma
*i915_gem_obj_to_vma(struct drm_i915_gem_object
*obj
,
4722 struct i915_address_space
*vm
)
4724 struct i915_vma
*vma
;
4725 list_for_each_entry(vma
, &obj
->vma_list
, vma_link
) {
4726 if (i915_is_ggtt(vma
->vm
) &&
4727 vma
->ggtt_view
.type
!= I915_GGTT_VIEW_NORMAL
)
4735 struct i915_vma
*i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object
*obj
,
4736 const struct i915_ggtt_view
*view
)
4738 struct i915_address_space
*ggtt
= i915_obj_to_ggtt(obj
);
4739 struct i915_vma
*vma
;
4741 if (WARN_ONCE(!view
, "no view specified"))
4742 return ERR_PTR(-EINVAL
);
4744 list_for_each_entry(vma
, &obj
->vma_list
, vma_link
)
4745 if (vma
->vm
== ggtt
&&
4746 i915_ggtt_view_equal(&vma
->ggtt_view
, view
))
4751 void i915_gem_vma_destroy(struct i915_vma
*vma
)
4753 struct i915_address_space
*vm
= NULL
;
4754 WARN_ON(vma
->node
.allocated
);
4756 /* Keep the vma as a placeholder in the execbuffer reservation lists */
4757 if (!list_empty(&vma
->exec_list
))
4762 if (!i915_is_ggtt(vm
))
4763 i915_ppgtt_put(i915_vm_to_ppgtt(vm
));
4765 list_del(&vma
->vma_link
);
4767 kmem_cache_free(to_i915(vma
->obj
->base
.dev
)->vmas
, vma
);
4771 i915_gem_stop_ringbuffers(struct drm_device
*dev
)
4773 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
4774 struct intel_engine_cs
*ring
;
4777 for_each_ring(ring
, dev_priv
, i
)
4778 dev_priv
->gt
.stop_ring(ring
);
4782 i915_gem_suspend(struct drm_device
*dev
)
4784 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
4787 mutex_lock(&dev
->struct_mutex
);
4788 ret
= i915_gpu_idle(dev
);
4792 i915_gem_retire_requests(dev
);
4794 i915_gem_stop_ringbuffers(dev
);
4795 mutex_unlock(&dev
->struct_mutex
);
4797 cancel_delayed_work_sync(&dev_priv
->gpu_error
.hangcheck_work
);
4798 cancel_delayed_work_sync(&dev_priv
->mm
.retire_work
);
4799 flush_delayed_work(&dev_priv
->mm
.idle_work
);
4801 /* Assert that we sucessfully flushed all the work and
4802 * reset the GPU back to its idle, low power state.
4804 WARN_ON(dev_priv
->mm
.busy
);
4809 mutex_unlock(&dev
->struct_mutex
);
4813 int i915_gem_l3_remap(struct intel_engine_cs
*ring
, int slice
)
4815 struct drm_device
*dev
= ring
->dev
;
4816 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
4817 u32 reg_base
= GEN7_L3LOG_BASE
+ (slice
* 0x200);
4818 u32
*remap_info
= dev_priv
->l3_parity
.remap_info
[slice
];
4821 if (!HAS_L3_DPF(dev
) || !remap_info
)
4824 ret
= intel_ring_begin(ring
, GEN7_L3LOG_SIZE
/ 4 * 3);
4829 * Note: We do not worry about the concurrent register cacheline hang
4830 * here because no other code should access these registers other than
4831 * at initialization time.
4833 for (i
= 0; i
< GEN7_L3LOG_SIZE
; i
+= 4) {
4834 intel_ring_emit(ring
, MI_LOAD_REGISTER_IMM(1));
4835 intel_ring_emit(ring
, reg_base
+ i
);
4836 intel_ring_emit(ring
, remap_info
[i
/4]);
4839 intel_ring_advance(ring
);
4844 void i915_gem_init_swizzling(struct drm_device
*dev
)
4846 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
4848 if (INTEL_INFO(dev
)->gen
< 5 ||
4849 dev_priv
->mm
.bit_6_swizzle_x
== I915_BIT_6_SWIZZLE_NONE
)
4852 I915_WRITE(DISP_ARB_CTL
, I915_READ(DISP_ARB_CTL
) |
4853 DISP_TILE_SURFACE_SWIZZLING
);
4858 I915_WRITE(TILECTL
, I915_READ(TILECTL
) | TILECTL_SWZCTL
);
4860 I915_WRITE(ARB_MODE
, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB
));
4861 else if (IS_GEN7(dev
))
4862 I915_WRITE(ARB_MODE
, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB
));
4863 else if (IS_GEN8(dev
))
4864 I915_WRITE(GAMTARBMODE
, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW
));
4870 intel_enable_blt(struct drm_device
*dev
)
4875 /* The blitter was dysfunctional on early prototypes */
4876 if (IS_GEN6(dev
) && dev
->pdev
->revision
< 8) {
4877 DRM_INFO("BLT not supported on this pre-production hardware;"
4878 " graphics performance will be degraded.\n");
4885 static void init_unused_ring(struct drm_device
*dev
, u32 base
)
4887 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
4889 I915_WRITE(RING_CTL(base
), 0);
4890 I915_WRITE(RING_HEAD(base
), 0);
4891 I915_WRITE(RING_TAIL(base
), 0);
4892 I915_WRITE(RING_START(base
), 0);
4895 static void init_unused_rings(struct drm_device
*dev
)
4898 init_unused_ring(dev
, PRB1_BASE
);
4899 init_unused_ring(dev
, SRB0_BASE
);
4900 init_unused_ring(dev
, SRB1_BASE
);
4901 init_unused_ring(dev
, SRB2_BASE
);
4902 init_unused_ring(dev
, SRB3_BASE
);
4903 } else if (IS_GEN2(dev
)) {
4904 init_unused_ring(dev
, SRB0_BASE
);
4905 init_unused_ring(dev
, SRB1_BASE
);
4906 } else if (IS_GEN3(dev
)) {
4907 init_unused_ring(dev
, PRB1_BASE
);
4908 init_unused_ring(dev
, PRB2_BASE
);
4912 int i915_gem_init_rings(struct drm_device
*dev
)
4914 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
4917 ret
= intel_init_render_ring_buffer(dev
);
4922 ret
= intel_init_bsd_ring_buffer(dev
);
4924 goto cleanup_render_ring
;
4927 if (intel_enable_blt(dev
)) {
4928 ret
= intel_init_blt_ring_buffer(dev
);
4930 goto cleanup_bsd_ring
;
4933 if (HAS_VEBOX(dev
)) {
4934 ret
= intel_init_vebox_ring_buffer(dev
);
4936 goto cleanup_blt_ring
;
4939 if (HAS_BSD2(dev
)) {
4940 ret
= intel_init_bsd2_ring_buffer(dev
);
4942 goto cleanup_vebox_ring
;
4945 ret
= i915_gem_set_seqno(dev
, ((u32
)~0 - 0x1000));
4947 goto cleanup_bsd2_ring
;
4952 intel_cleanup_ring_buffer(&dev_priv
->ring
[VCS2
]);
4954 intel_cleanup_ring_buffer(&dev_priv
->ring
[VECS
]);
4956 intel_cleanup_ring_buffer(&dev_priv
->ring
[BCS
]);
4958 intel_cleanup_ring_buffer(&dev_priv
->ring
[VCS
]);
4959 cleanup_render_ring
:
4960 intel_cleanup_ring_buffer(&dev_priv
->ring
[RCS
]);
4966 i915_gem_init_hw(struct drm_device
*dev
)
4968 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
4969 struct intel_engine_cs
*ring
;
4972 if (INTEL_INFO(dev
)->gen
< 6 && !intel_enable_gtt())
4975 /* Double layer security blanket, see i915_gem_init() */
4976 intel_uncore_forcewake_get(dev_priv
, FORCEWAKE_ALL
);
4978 if (dev_priv
->ellc_size
)
4979 I915_WRITE(HSW_IDICR
, I915_READ(HSW_IDICR
) | IDIHASHMSK(0xf));
4981 if (IS_HASWELL(dev
))
4982 I915_WRITE(MI_PREDICATE_RESULT_2
, IS_HSW_GT3(dev
) ?
4983 LOWER_SLICE_ENABLED
: LOWER_SLICE_DISABLED
);
4985 if (HAS_PCH_NOP(dev
)) {
4986 if (IS_IVYBRIDGE(dev
)) {
4987 u32 temp
= I915_READ(GEN7_MSG_CTL
);
4988 temp
&= ~(WAIT_FOR_PCH_FLR_ACK
| WAIT_FOR_PCH_RESET_ACK
);
4989 I915_WRITE(GEN7_MSG_CTL
, temp
);
4990 } else if (INTEL_INFO(dev
)->gen
>= 7) {
4991 u32 temp
= I915_READ(HSW_NDE_RSTWRN_OPT
);
4992 temp
&= ~RESET_PCH_HANDSHAKE_ENABLE
;
4993 I915_WRITE(HSW_NDE_RSTWRN_OPT
, temp
);
4997 i915_gem_init_swizzling(dev
);
5000 * At least 830 can leave some of the unused rings
5001 * "active" (ie. head != tail) after resume which
5002 * will prevent c3 entry. Makes sure all unused rings
5005 init_unused_rings(dev
);
5007 for_each_ring(ring
, dev_priv
, i
) {
5008 ret
= ring
->init_hw(ring
);
5013 for (i
= 0; i
< NUM_L3_SLICES(dev
); i
++)
5014 i915_gem_l3_remap(&dev_priv
->ring
[RCS
], i
);
5016 ret
= i915_ppgtt_init_hw(dev
);
5017 if (ret
&& ret
!= -EIO
) {
5018 DRM_ERROR("PPGTT enable failed %d\n", ret
);
5019 i915_gem_cleanup_ringbuffer(dev
);
5022 ret
= i915_gem_context_enable(dev_priv
);
5023 if (ret
&& ret
!= -EIO
) {
5024 DRM_ERROR("Context enable failed %d\n", ret
);
5025 i915_gem_cleanup_ringbuffer(dev
);
5031 intel_uncore_forcewake_put(dev_priv
, FORCEWAKE_ALL
);
5035 int i915_gem_init(struct drm_device
*dev
)
5037 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
5040 i915
.enable_execlists
= intel_sanitize_enable_execlists(dev
,
5041 i915
.enable_execlists
);
5043 mutex_lock(&dev
->struct_mutex
);
5045 if (IS_VALLEYVIEW(dev
)) {
5046 /* VLVA0 (potential hack), BIOS isn't actually waking us */
5047 I915_WRITE(VLV_GTLC_WAKE_CTRL
, VLV_GTLC_ALLOWWAKEREQ
);
5048 if (wait_for((I915_READ(VLV_GTLC_PW_STATUS
) &
5049 VLV_GTLC_ALLOWWAKEACK
), 10))
5050 DRM_DEBUG_DRIVER("allow wake ack timed out\n");
5053 if (!i915
.enable_execlists
) {
5054 dev_priv
->gt
.execbuf_submit
= i915_gem_ringbuffer_submission
;
5055 dev_priv
->gt
.init_rings
= i915_gem_init_rings
;
5056 dev_priv
->gt
.cleanup_ring
= intel_cleanup_ring_buffer
;
5057 dev_priv
->gt
.stop_ring
= intel_stop_ring_buffer
;
5059 dev_priv
->gt
.execbuf_submit
= intel_execlists_submission
;
5060 dev_priv
->gt
.init_rings
= intel_logical_rings_init
;
5061 dev_priv
->gt
.cleanup_ring
= intel_logical_ring_cleanup
;
5062 dev_priv
->gt
.stop_ring
= intel_logical_ring_stop
;
5065 /* This is just a security blanket to placate dragons.
5066 * On some systems, we very sporadically observe that the first TLBs
5067 * used by the CS may be stale, despite us poking the TLB reset. If
5068 * we hold the forcewake during initialisation these problems
5069 * just magically go away.
5071 intel_uncore_forcewake_get(dev_priv
, FORCEWAKE_ALL
);
5073 ret
= i915_gem_init_userptr(dev
);
5077 i915_gem_init_global_gtt(dev
);
5079 ret
= i915_gem_context_init(dev
);
5083 ret
= dev_priv
->gt
.init_rings(dev
);
5087 ret
= i915_gem_init_hw(dev
);
5089 /* Allow ring initialisation to fail by marking the GPU as
5090 * wedged. But we only want to do this where the GPU is angry,
5091 * for all other failure, such as an allocation failure, bail.
5093 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
5094 atomic_set_mask(I915_WEDGED
, &dev_priv
->gpu_error
.reset_counter
);
5099 intel_uncore_forcewake_put(dev_priv
, FORCEWAKE_ALL
);
5100 mutex_unlock(&dev
->struct_mutex
);
5106 i915_gem_cleanup_ringbuffer(struct drm_device
*dev
)
5108 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
5109 struct intel_engine_cs
*ring
;
5112 for_each_ring(ring
, dev_priv
, i
)
5113 dev_priv
->gt
.cleanup_ring(ring
);
5117 init_ring_lists(struct intel_engine_cs
*ring
)
5119 INIT_LIST_HEAD(&ring
->active_list
);
5120 INIT_LIST_HEAD(&ring
->request_list
);
5123 void i915_init_vm(struct drm_i915_private
*dev_priv
,
5124 struct i915_address_space
*vm
)
5126 if (!i915_is_ggtt(vm
))
5127 drm_mm_init(&vm
->mm
, vm
->start
, vm
->total
);
5128 vm
->dev
= dev_priv
->dev
;
5129 INIT_LIST_HEAD(&vm
->active_list
);
5130 INIT_LIST_HEAD(&vm
->inactive_list
);
5131 INIT_LIST_HEAD(&vm
->global_link
);
5132 list_add_tail(&vm
->global_link
, &dev_priv
->vm_list
);
5136 i915_gem_load(struct drm_device
*dev
)
5138 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
5142 kmem_cache_create("i915_gem_object",
5143 sizeof(struct drm_i915_gem_object
), 0,
5147 kmem_cache_create("i915_gem_vma",
5148 sizeof(struct i915_vma
), 0,
5151 dev_priv
->requests
=
5152 kmem_cache_create("i915_gem_request",
5153 sizeof(struct drm_i915_gem_request
), 0,
5157 INIT_LIST_HEAD(&dev_priv
->vm_list
);
5158 i915_init_vm(dev_priv
, &dev_priv
->gtt
.base
);
5160 INIT_LIST_HEAD(&dev_priv
->context_list
);
5161 INIT_LIST_HEAD(&dev_priv
->mm
.unbound_list
);
5162 INIT_LIST_HEAD(&dev_priv
->mm
.bound_list
);
5163 INIT_LIST_HEAD(&dev_priv
->mm
.fence_list
);
5164 for (i
= 0; i
< I915_NUM_RINGS
; i
++)
5165 init_ring_lists(&dev_priv
->ring
[i
]);
5166 for (i
= 0; i
< I915_MAX_NUM_FENCES
; i
++)
5167 INIT_LIST_HEAD(&dev_priv
->fence_regs
[i
].lru_list
);
5168 INIT_DELAYED_WORK(&dev_priv
->mm
.retire_work
,
5169 i915_gem_retire_work_handler
);
5170 INIT_DELAYED_WORK(&dev_priv
->mm
.idle_work
,
5171 i915_gem_idle_work_handler
);
5172 init_waitqueue_head(&dev_priv
->gpu_error
.reset_queue
);
5174 dev_priv
->relative_constants_mode
= I915_EXEC_CONSTANTS_REL_GENERAL
;
5176 if (INTEL_INFO(dev
)->gen
>= 7 && !IS_VALLEYVIEW(dev
))
5177 dev_priv
->num_fence_regs
= 32;
5178 else if (INTEL_INFO(dev
)->gen
>= 4 || IS_I945G(dev
) || IS_I945GM(dev
) || IS_G33(dev
))
5179 dev_priv
->num_fence_regs
= 16;
5181 dev_priv
->num_fence_regs
= 8;
5183 if (intel_vgpu_active(dev
))
5184 dev_priv
->num_fence_regs
=
5185 I915_READ(vgtif_reg(avail_rs
.fence_num
));
5187 /* Initialize fence registers to zero */
5188 INIT_LIST_HEAD(&dev_priv
->mm
.fence_list
);
5189 i915_gem_restore_fences(dev
);
5191 i915_gem_detect_bit_6_swizzle(dev
);
5192 init_waitqueue_head(&dev_priv
->pending_flip_queue
);
5194 dev_priv
->mm
.interruptible
= true;
5196 i915_gem_shrinker_init(dev_priv
);
5198 mutex_init(&dev_priv
->fb_tracking
.lock
);
5201 void i915_gem_release(struct drm_device
*dev
, struct drm_file
*file
)
5203 struct drm_i915_file_private
*file_priv
= file
->driver_priv
;
5205 /* Clean up our request list when the client is going away, so that
5206 * later retire_requests won't dereference our soon-to-be-gone
5209 spin_lock(&file_priv
->mm
.lock
);
5210 while (!list_empty(&file_priv
->mm
.request_list
)) {
5211 struct drm_i915_gem_request
*request
;
5213 request
= list_first_entry(&file_priv
->mm
.request_list
,
5214 struct drm_i915_gem_request
,
5216 list_del(&request
->client_list
);
5217 request
->file_priv
= NULL
;
5219 spin_unlock(&file_priv
->mm
.lock
);
5221 if (!list_empty(&file_priv
->rps
.link
)) {
5222 spin_lock(&to_i915(dev
)->rps
.client_lock
);
5223 list_del(&file_priv
->rps
.link
);
5224 spin_unlock(&to_i915(dev
)->rps
.client_lock
);
5228 int i915_gem_open(struct drm_device
*dev
, struct drm_file
*file
)
5230 struct drm_i915_file_private
*file_priv
;
5233 DRM_DEBUG_DRIVER("\n");
5235 file_priv
= kzalloc(sizeof(*file_priv
), GFP_KERNEL
);
5239 file
->driver_priv
= file_priv
;
5240 file_priv
->dev_priv
= dev
->dev_private
;
5241 file_priv
->file
= file
;
5242 INIT_LIST_HEAD(&file_priv
->rps
.link
);
5244 spin_lock_init(&file_priv
->mm
.lock
);
5245 INIT_LIST_HEAD(&file_priv
->mm
.request_list
);
5247 ret
= i915_gem_context_open(dev
, file
);
5255 * i915_gem_track_fb - update frontbuffer tracking
5256 * old: current GEM buffer for the frontbuffer slots
5257 * new: new GEM buffer for the frontbuffer slots
5258 * frontbuffer_bits: bitmask of frontbuffer slots
5260 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
5261 * from @old and setting them in @new. Both @old and @new can be NULL.
5263 void i915_gem_track_fb(struct drm_i915_gem_object
*old
,
5264 struct drm_i915_gem_object
*new,
5265 unsigned frontbuffer_bits
)
5268 WARN_ON(!mutex_is_locked(&old
->base
.dev
->struct_mutex
));
5269 WARN_ON(!(old
->frontbuffer_bits
& frontbuffer_bits
));
5270 old
->frontbuffer_bits
&= ~frontbuffer_bits
;
5274 WARN_ON(!mutex_is_locked(&new->base
.dev
->struct_mutex
));
5275 WARN_ON(new->frontbuffer_bits
& frontbuffer_bits
);
5276 new->frontbuffer_bits
|= frontbuffer_bits
;
5280 /* All the new VM stuff */
5282 i915_gem_obj_offset(struct drm_i915_gem_object
*o
,
5283 struct i915_address_space
*vm
)
5285 struct drm_i915_private
*dev_priv
= o
->base
.dev
->dev_private
;
5286 struct i915_vma
*vma
;
5288 WARN_ON(vm
== &dev_priv
->mm
.aliasing_ppgtt
->base
);
5290 list_for_each_entry(vma
, &o
->vma_list
, vma_link
) {
5291 if (i915_is_ggtt(vma
->vm
) &&
5292 vma
->ggtt_view
.type
!= I915_GGTT_VIEW_NORMAL
)
5295 return vma
->node
.start
;
5298 WARN(1, "%s vma for this object not found.\n",
5299 i915_is_ggtt(vm
) ? "global" : "ppgtt");
5304 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object
*o
,
5305 const struct i915_ggtt_view
*view
)
5307 struct i915_address_space
*ggtt
= i915_obj_to_ggtt(o
);
5308 struct i915_vma
*vma
;
5310 list_for_each_entry(vma
, &o
->vma_list
, vma_link
)
5311 if (vma
->vm
== ggtt
&&
5312 i915_ggtt_view_equal(&vma
->ggtt_view
, view
))
5313 return vma
->node
.start
;
5315 WARN(1, "global vma for this object not found. (view=%u)\n", view
->type
);
5319 bool i915_gem_obj_bound(struct drm_i915_gem_object
*o
,
5320 struct i915_address_space
*vm
)
5322 struct i915_vma
*vma
;
5324 list_for_each_entry(vma
, &o
->vma_list
, vma_link
) {
5325 if (i915_is_ggtt(vma
->vm
) &&
5326 vma
->ggtt_view
.type
!= I915_GGTT_VIEW_NORMAL
)
5328 if (vma
->vm
== vm
&& drm_mm_node_allocated(&vma
->node
))
5335 bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object
*o
,
5336 const struct i915_ggtt_view
*view
)
5338 struct i915_address_space
*ggtt
= i915_obj_to_ggtt(o
);
5339 struct i915_vma
*vma
;
5341 list_for_each_entry(vma
, &o
->vma_list
, vma_link
)
5342 if (vma
->vm
== ggtt
&&
5343 i915_ggtt_view_equal(&vma
->ggtt_view
, view
) &&
5344 drm_mm_node_allocated(&vma
->node
))
5350 bool i915_gem_obj_bound_any(struct drm_i915_gem_object
*o
)
5352 struct i915_vma
*vma
;
5354 list_for_each_entry(vma
, &o
->vma_list
, vma_link
)
5355 if (drm_mm_node_allocated(&vma
->node
))
5361 unsigned long i915_gem_obj_size(struct drm_i915_gem_object
*o
,
5362 struct i915_address_space
*vm
)
5364 struct drm_i915_private
*dev_priv
= o
->base
.dev
->dev_private
;
5365 struct i915_vma
*vma
;
5367 WARN_ON(vm
== &dev_priv
->mm
.aliasing_ppgtt
->base
);
5369 BUG_ON(list_empty(&o
->vma_list
));
5371 list_for_each_entry(vma
, &o
->vma_list
, vma_link
) {
5372 if (i915_is_ggtt(vma
->vm
) &&
5373 vma
->ggtt_view
.type
!= I915_GGTT_VIEW_NORMAL
)
5376 return vma
->node
.size
;
5381 bool i915_gem_obj_is_pinned(struct drm_i915_gem_object
*obj
)
5383 struct i915_vma
*vma
;
5384 list_for_each_entry(vma
, &obj
->vma_list
, vma_link
)
5385 if (vma
->pin_count
> 0)