2 * Copyright © 2017 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "../i915_selftest.h"
27 #include <linux/prime_numbers.h>
30 #include "i915_random.h"
32 static const unsigned int page_sizes
[] = {
33 I915_GTT_PAGE_SIZE_2M
,
34 I915_GTT_PAGE_SIZE_64K
,
35 I915_GTT_PAGE_SIZE_4K
,
38 static unsigned int get_largest_page_size(struct drm_i915_private
*i915
,
43 for (i
= 0; i
< ARRAY_SIZE(page_sizes
); ++i
) {
44 unsigned int page_size
= page_sizes
[i
];
46 if (HAS_PAGE_SIZES(i915
, page_size
) && rem
>= page_size
)
53 static void huge_pages_free_pages(struct sg_table
*st
)
55 struct scatterlist
*sg
;
57 for (sg
= st
->sgl
; sg
; sg
= __sg_next(sg
)) {
59 __free_pages(sg_page(sg
), get_order(sg
->length
));
66 static int get_huge_pages(struct drm_i915_gem_object
*obj
)
68 #define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY)
69 unsigned int page_mask
= obj
->mm
.page_mask
;
71 struct scatterlist
*sg
;
72 unsigned int sg_page_sizes
;
75 st
= kmalloc(sizeof(*st
), GFP
);
79 if (sg_alloc_table(st
, obj
->base
.size
>> PAGE_SHIFT
, GFP
)) {
90 * Our goal here is simple, we want to greedily fill the object from
91 * largest to smallest page-size, while ensuring that we use *every*
92 * page-size as per the given page-mask.
95 unsigned int bit
= ilog2(page_mask
);
96 unsigned int page_size
= BIT(bit
);
97 int order
= get_order(page_size
);
102 GEM_BUG_ON(order
>= MAX_ORDER
);
103 page
= alloc_pages(GFP
| __GFP_ZERO
, order
);
107 sg_set_page(sg
, page
, page_size
, 0);
108 sg_page_sizes
|= page_size
;
118 } while ((rem
- ((page_size
-1) & page_mask
)) >= page_size
);
120 page_mask
&= (page_size
-1);
123 if (i915_gem_gtt_prepare_pages(obj
, st
))
126 obj
->mm
.madv
= I915_MADV_DONTNEED
;
128 GEM_BUG_ON(sg_page_sizes
!= obj
->mm
.page_mask
);
129 __i915_gem_object_set_pages(obj
, st
, sg_page_sizes
);
134 sg_set_page(sg
, NULL
, 0, 0);
136 huge_pages_free_pages(st
);
141 static void put_huge_pages(struct drm_i915_gem_object
*obj
,
142 struct sg_table
*pages
)
144 i915_gem_gtt_finish_pages(obj
, pages
);
145 huge_pages_free_pages(pages
);
147 obj
->mm
.dirty
= false;
148 obj
->mm
.madv
= I915_MADV_WILLNEED
;
151 static const struct drm_i915_gem_object_ops huge_page_ops
= {
152 .flags
= I915_GEM_OBJECT_HAS_STRUCT_PAGE
|
153 I915_GEM_OBJECT_IS_SHRINKABLE
,
154 .get_pages
= get_huge_pages
,
155 .put_pages
= put_huge_pages
,
158 static struct drm_i915_gem_object
*
159 huge_pages_object(struct drm_i915_private
*i915
,
161 unsigned int page_mask
)
163 struct drm_i915_gem_object
*obj
;
166 GEM_BUG_ON(!IS_ALIGNED(size
, BIT(__ffs(page_mask
))));
168 if (size
>> PAGE_SHIFT
> INT_MAX
)
169 return ERR_PTR(-E2BIG
);
171 if (overflows_type(size
, obj
->base
.size
))
172 return ERR_PTR(-E2BIG
);
174 obj
= i915_gem_object_alloc(i915
);
176 return ERR_PTR(-ENOMEM
);
178 drm_gem_private_object_init(&i915
->drm
, &obj
->base
, size
);
179 i915_gem_object_init(obj
, &huge_page_ops
);
181 obj
->base
.write_domain
= I915_GEM_DOMAIN_CPU
;
182 obj
->base
.read_domains
= I915_GEM_DOMAIN_CPU
;
183 obj
->cache_level
= I915_CACHE_NONE
;
185 obj
->mm
.page_mask
= page_mask
;
190 static int fake_get_huge_pages(struct drm_i915_gem_object
*obj
)
192 struct drm_i915_private
*i915
= to_i915(obj
->base
.dev
);
193 const u64 max_len
= rounddown_pow_of_two(UINT_MAX
);
195 struct scatterlist
*sg
;
196 unsigned int sg_page_sizes
;
199 st
= kmalloc(sizeof(*st
), GFP
);
203 if (sg_alloc_table(st
, obj
->base
.size
>> PAGE_SHIFT
, GFP
)) {
208 /* Use optimal page sized chunks to fill in the sg table */
209 rem
= obj
->base
.size
;
214 unsigned int page_size
= get_largest_page_size(i915
, rem
);
215 unsigned int len
= min(page_size
* div_u64(rem
, page_size
),
218 GEM_BUG_ON(!page_size
);
222 sg_dma_len(sg
) = len
;
223 sg_dma_address(sg
) = page_size
;
225 sg_page_sizes
|= len
;
238 obj
->mm
.madv
= I915_MADV_DONTNEED
;
240 __i915_gem_object_set_pages(obj
, st
, sg_page_sizes
);
245 static int fake_get_huge_pages_single(struct drm_i915_gem_object
*obj
)
247 struct drm_i915_private
*i915
= to_i915(obj
->base
.dev
);
249 struct scatterlist
*sg
;
250 unsigned int page_size
;
252 st
= kmalloc(sizeof(*st
), GFP
);
256 if (sg_alloc_table(st
, 1, GFP
)) {
264 page_size
= get_largest_page_size(i915
, obj
->base
.size
);
265 GEM_BUG_ON(!page_size
);
268 sg
->length
= obj
->base
.size
;
269 sg_dma_len(sg
) = obj
->base
.size
;
270 sg_dma_address(sg
) = page_size
;
272 obj
->mm
.madv
= I915_MADV_DONTNEED
;
274 __i915_gem_object_set_pages(obj
, st
, sg
->length
);
280 static void fake_free_huge_pages(struct drm_i915_gem_object
*obj
,
281 struct sg_table
*pages
)
283 sg_free_table(pages
);
287 static void fake_put_huge_pages(struct drm_i915_gem_object
*obj
,
288 struct sg_table
*pages
)
290 fake_free_huge_pages(obj
, pages
);
291 obj
->mm
.dirty
= false;
292 obj
->mm
.madv
= I915_MADV_WILLNEED
;
295 static const struct drm_i915_gem_object_ops fake_ops
= {
296 .flags
= I915_GEM_OBJECT_IS_SHRINKABLE
,
297 .get_pages
= fake_get_huge_pages
,
298 .put_pages
= fake_put_huge_pages
,
301 static const struct drm_i915_gem_object_ops fake_ops_single
= {
302 .flags
= I915_GEM_OBJECT_IS_SHRINKABLE
,
303 .get_pages
= fake_get_huge_pages_single
,
304 .put_pages
= fake_put_huge_pages
,
307 static struct drm_i915_gem_object
*
308 fake_huge_pages_object(struct drm_i915_private
*i915
, u64 size
, bool single
)
310 struct drm_i915_gem_object
*obj
;
313 GEM_BUG_ON(!IS_ALIGNED(size
, I915_GTT_PAGE_SIZE
));
315 if (size
>> PAGE_SHIFT
> UINT_MAX
)
316 return ERR_PTR(-E2BIG
);
318 if (overflows_type(size
, obj
->base
.size
))
319 return ERR_PTR(-E2BIG
);
321 obj
= i915_gem_object_alloc(i915
);
323 return ERR_PTR(-ENOMEM
);
325 drm_gem_private_object_init(&i915
->drm
, &obj
->base
, size
);
328 i915_gem_object_init(obj
, &fake_ops_single
);
330 i915_gem_object_init(obj
, &fake_ops
);
332 obj
->base
.write_domain
= I915_GEM_DOMAIN_CPU
;
333 obj
->base
.read_domains
= I915_GEM_DOMAIN_CPU
;
334 obj
->cache_level
= I915_CACHE_NONE
;
339 static int igt_check_page_sizes(struct i915_vma
*vma
)
341 struct drm_i915_private
*i915
= to_i915(vma
->obj
->base
.dev
);
342 unsigned int supported
= INTEL_INFO(i915
)->page_sizes
;
343 struct drm_i915_gem_object
*obj
= vma
->obj
;
346 if (!HAS_PAGE_SIZES(i915
, vma
->page_sizes
.sg
)) {
347 pr_err("unsupported page_sizes.sg=%u, supported=%u\n",
348 vma
->page_sizes
.sg
& ~supported
, supported
);
352 if (!HAS_PAGE_SIZES(i915
, vma
->page_sizes
.gtt
)) {
353 pr_err("unsupported page_sizes.gtt=%u, supported=%u\n",
354 vma
->page_sizes
.gtt
& ~supported
, supported
);
358 if (vma
->page_sizes
.phys
!= obj
->mm
.page_sizes
.phys
) {
359 pr_err("vma->page_sizes.phys(%u) != obj->mm.page_sizes.phys(%u)\n",
360 vma
->page_sizes
.phys
, obj
->mm
.page_sizes
.phys
);
364 if (vma
->page_sizes
.sg
!= obj
->mm
.page_sizes
.sg
) {
365 pr_err("vma->page_sizes.sg(%u) != obj->mm.page_sizes.sg(%u)\n",
366 vma
->page_sizes
.sg
, obj
->mm
.page_sizes
.sg
);
370 if (obj
->mm
.page_sizes
.gtt
) {
371 pr_err("obj->page_sizes.gtt(%u) should never be set\n",
372 obj
->mm
.page_sizes
.gtt
);
379 static int igt_mock_exhaust_device_supported_pages(void *arg
)
381 struct i915_hw_ppgtt
*ppgtt
= arg
;
382 struct drm_i915_private
*i915
= ppgtt
->base
.i915
;
383 unsigned int saved_mask
= INTEL_INFO(i915
)->page_sizes
;
384 struct drm_i915_gem_object
*obj
;
385 struct i915_vma
*vma
;
390 * Sanity check creating objects with every valid page support
391 * combination for our mock device.
394 for (i
= 1; i
< BIT(ARRAY_SIZE(page_sizes
)); i
++) {
395 unsigned int combination
= 0;
397 for (j
= 0; j
< ARRAY_SIZE(page_sizes
); j
++) {
399 combination
|= page_sizes
[j
];
402 mkwrite_device_info(i915
)->page_sizes
= combination
;
404 for (single
= 0; single
<= 1; ++single
) {
405 obj
= fake_huge_pages_object(i915
, combination
, !!single
);
411 if (obj
->base
.size
!= combination
) {
412 pr_err("obj->base.size=%zu, expected=%u\n",
413 obj
->base
.size
, combination
);
418 vma
= i915_vma_instance(obj
, &ppgtt
->base
, NULL
);
424 err
= i915_vma_pin(vma
, 0, 0, PIN_USER
);
428 err
= igt_check_page_sizes(vma
);
430 if (vma
->page_sizes
.sg
!= combination
) {
431 pr_err("page_sizes.sg=%u, expected=%u\n",
432 vma
->page_sizes
.sg
, combination
);
439 i915_gem_object_put(obj
);
451 i915_gem_object_put(obj
);
453 mkwrite_device_info(i915
)->page_sizes
= saved_mask
;
458 static int igt_mock_ppgtt_misaligned_dma(void *arg
)
460 struct i915_hw_ppgtt
*ppgtt
= arg
;
461 struct drm_i915_private
*i915
= ppgtt
->base
.i915
;
462 unsigned long supported
= INTEL_INFO(i915
)->page_sizes
;
463 struct drm_i915_gem_object
*obj
;
468 * Sanity check dma misalignment for huge pages -- the dma addresses we
469 * insert into the paging structures need to always respect the page
473 bit
= ilog2(I915_GTT_PAGE_SIZE_64K
);
475 for_each_set_bit_from(bit
, &supported
,
476 ilog2(I915_GTT_MAX_PAGE_SIZE
) + 1) {
477 IGT_TIMEOUT(end_time
);
478 unsigned int page_size
= BIT(bit
);
479 unsigned int flags
= PIN_USER
| PIN_OFFSET_FIXED
;
482 round_up(page_size
, I915_GTT_PAGE_SIZE_2M
) << 1;
483 struct i915_vma
*vma
;
485 obj
= fake_huge_pages_object(i915
, size
, true);
489 if (obj
->base
.size
!= size
) {
490 pr_err("obj->base.size=%zu, expected=%u\n",
491 obj
->base
.size
, size
);
496 err
= i915_gem_object_pin_pages(obj
);
500 /* Force the page size for this object */
501 obj
->mm
.page_sizes
.sg
= page_size
;
503 vma
= i915_vma_instance(obj
, &ppgtt
->base
, NULL
);
509 err
= i915_vma_pin(vma
, 0, 0, flags
);
516 err
= igt_check_page_sizes(vma
);
518 if (vma
->page_sizes
.gtt
!= page_size
) {
519 pr_err("page_sizes.gtt=%u, expected %u\n",
520 vma
->page_sizes
.gtt
, page_size
);
532 * Try all the other valid offsets until the next
533 * boundary -- should always fall back to using 4K
536 for (offset
= 4096; offset
< page_size
; offset
+= 4096) {
537 err
= i915_vma_unbind(vma
);
543 err
= i915_vma_pin(vma
, 0, 0, flags
| offset
);
549 err
= igt_check_page_sizes(vma
);
551 if (vma
->page_sizes
.gtt
!= I915_GTT_PAGE_SIZE_4K
) {
552 pr_err("page_sizes.gtt=%u, expected %lu\n",
553 vma
->page_sizes
.gtt
, I915_GTT_PAGE_SIZE_4K
);
564 if (igt_timeout(end_time
,
565 "%s timed out at offset %x with page-size %x\n",
566 __func__
, offset
, page_size
))
572 i915_gem_object_unpin_pages(obj
);
573 i915_gem_object_put(obj
);
579 i915_gem_object_unpin_pages(obj
);
581 i915_gem_object_put(obj
);
586 static void close_object_list(struct list_head
*objects
,
587 struct i915_hw_ppgtt
*ppgtt
)
589 struct drm_i915_gem_object
*obj
, *on
;
591 list_for_each_entry_safe(obj
, on
, objects
, st_link
) {
592 struct i915_vma
*vma
;
594 vma
= i915_vma_instance(obj
, &ppgtt
->base
, NULL
);
598 list_del(&obj
->st_link
);
599 i915_gem_object_unpin_pages(obj
);
600 i915_gem_object_put(obj
);
604 static int igt_mock_ppgtt_huge_fill(void *arg
)
606 struct i915_hw_ppgtt
*ppgtt
= arg
;
607 struct drm_i915_private
*i915
= ppgtt
->base
.i915
;
608 unsigned long max_pages
= ppgtt
->base
.total
>> PAGE_SHIFT
;
609 unsigned long page_num
;
612 IGT_TIMEOUT(end_time
);
615 for_each_prime_number_from(page_num
, 1, max_pages
) {
616 struct drm_i915_gem_object
*obj
;
617 u64 size
= page_num
<< PAGE_SHIFT
;
618 struct i915_vma
*vma
;
619 unsigned int expected_gtt
= 0;
622 obj
= fake_huge_pages_object(i915
, size
, single
);
628 if (obj
->base
.size
!= size
) {
629 pr_err("obj->base.size=%zd, expected=%llu\n",
630 obj
->base
.size
, size
);
631 i915_gem_object_put(obj
);
636 err
= i915_gem_object_pin_pages(obj
);
638 i915_gem_object_put(obj
);
642 list_add(&obj
->st_link
, &objects
);
644 vma
= i915_vma_instance(obj
, &ppgtt
->base
, NULL
);
650 err
= i915_vma_pin(vma
, 0, 0, PIN_USER
);
654 err
= igt_check_page_sizes(vma
);
661 * Figure out the expected gtt page size knowing that we go from
662 * largest to smallest page size sg chunks, and that we align to
663 * the largest page size.
665 for (i
= 0; i
< ARRAY_SIZE(page_sizes
); ++i
) {
666 unsigned int page_size
= page_sizes
[i
];
668 if (HAS_PAGE_SIZES(i915
, page_size
) &&
670 expected_gtt
|= page_size
;
675 GEM_BUG_ON(!expected_gtt
);
678 if (expected_gtt
& I915_GTT_PAGE_SIZE_4K
)
679 expected_gtt
&= ~I915_GTT_PAGE_SIZE_64K
;
683 if (vma
->page_sizes
.sg
& I915_GTT_PAGE_SIZE_64K
) {
684 if (!IS_ALIGNED(vma
->node
.start
,
685 I915_GTT_PAGE_SIZE_2M
)) {
686 pr_err("node.start(%llx) not aligned to 2M\n",
692 if (!IS_ALIGNED(vma
->node
.size
,
693 I915_GTT_PAGE_SIZE_2M
)) {
694 pr_err("node.size(%llx) not aligned to 2M\n",
701 if (vma
->page_sizes
.gtt
!= expected_gtt
) {
702 pr_err("gtt=%u, expected=%u, size=%zd, single=%s\n",
703 vma
->page_sizes
.gtt
, expected_gtt
,
704 obj
->base
.size
, yesno(!!single
));
709 if (igt_timeout(end_time
,
710 "%s timed out at size %zd\n",
711 __func__
, obj
->base
.size
))
717 close_object_list(&objects
, ppgtt
);
719 if (err
== -ENOMEM
|| err
== -ENOSPC
)
725 static int igt_mock_ppgtt_64K(void *arg
)
727 struct i915_hw_ppgtt
*ppgtt
= arg
;
728 struct drm_i915_private
*i915
= ppgtt
->base
.i915
;
729 struct drm_i915_gem_object
*obj
;
730 const struct object_info
{
735 /* Cases with forced padding/alignment */
738 .gtt
= I915_GTT_PAGE_SIZE_64K
,
742 .size
= SZ_64K
+ SZ_4K
,
743 .gtt
= I915_GTT_PAGE_SIZE_4K
,
747 .size
= SZ_64K
- SZ_4K
,
748 .gtt
= I915_GTT_PAGE_SIZE_4K
,
753 .gtt
= I915_GTT_PAGE_SIZE_64K
,
757 .size
= SZ_2M
- SZ_4K
,
758 .gtt
= I915_GTT_PAGE_SIZE_4K
,
762 .size
= SZ_2M
+ SZ_4K
,
763 .gtt
= I915_GTT_PAGE_SIZE_64K
| I915_GTT_PAGE_SIZE_4K
,
767 .size
= SZ_2M
+ SZ_64K
,
768 .gtt
= I915_GTT_PAGE_SIZE_64K
,
772 .size
= SZ_2M
- SZ_64K
,
773 .gtt
= I915_GTT_PAGE_SIZE_64K
,
776 /* Try without any forced padding/alignment */
780 .gtt
= I915_GTT_PAGE_SIZE_4K
,
784 .offset
= SZ_2M
- SZ_64K
,
785 .gtt
= I915_GTT_PAGE_SIZE_4K
,
788 struct i915_vma
*vma
;
793 * Sanity check some of the trickiness with 64K pages -- either we can
794 * safely mark the whole page-table(2M block) as 64K, or we have to
795 * always fallback to 4K.
798 if (!HAS_PAGE_SIZES(i915
, I915_GTT_PAGE_SIZE_64K
))
801 for (i
= 0; i
< ARRAY_SIZE(objects
); ++i
) {
802 unsigned int size
= objects
[i
].size
;
803 unsigned int expected_gtt
= objects
[i
].gtt
;
804 unsigned int offset
= objects
[i
].offset
;
805 unsigned int flags
= PIN_USER
;
807 for (single
= 0; single
<= 1; single
++) {
808 obj
= fake_huge_pages_object(i915
, size
, !!single
);
812 err
= i915_gem_object_pin_pages(obj
);
817 * Disable 2M pages -- We only want to use 64K/4K pages
820 obj
->mm
.page_sizes
.sg
&= ~I915_GTT_PAGE_SIZE_2M
;
822 vma
= i915_vma_instance(obj
, &ppgtt
->base
, NULL
);
825 goto out_object_unpin
;
829 flags
|= PIN_OFFSET_FIXED
| offset
;
831 err
= i915_vma_pin(vma
, 0, 0, flags
);
835 err
= igt_check_page_sizes(vma
);
839 if (!offset
&& vma
->page_sizes
.sg
& I915_GTT_PAGE_SIZE_64K
) {
840 if (!IS_ALIGNED(vma
->node
.start
,
841 I915_GTT_PAGE_SIZE_2M
)) {
842 pr_err("node.start(%llx) not aligned to 2M\n",
848 if (!IS_ALIGNED(vma
->node
.size
,
849 I915_GTT_PAGE_SIZE_2M
)) {
850 pr_err("node.size(%llx) not aligned to 2M\n",
857 if (vma
->page_sizes
.gtt
!= expected_gtt
) {
858 pr_err("gtt=%u, expected=%u, i=%d, single=%s\n",
859 vma
->page_sizes
.gtt
, expected_gtt
, i
,
868 i915_gem_object_unpin_pages(obj
);
869 i915_gem_object_put(obj
);
880 i915_gem_object_unpin_pages(obj
);
882 i915_gem_object_put(obj
);
887 static struct i915_vma
*
888 gpu_write_dw(struct i915_vma
*vma
, u64 offset
, u32 val
)
890 struct drm_i915_private
*i915
= to_i915(vma
->obj
->base
.dev
);
891 const int gen
= INTEL_GEN(vma
->vm
->i915
);
892 unsigned int count
= vma
->size
>> PAGE_SHIFT
;
893 struct drm_i915_gem_object
*obj
;
894 struct i915_vma
*batch
;
900 size
= (1 + 4 * count
) * sizeof(u32
);
901 size
= round_up(size
, PAGE_SIZE
);
902 obj
= i915_gem_object_create_internal(i915
, size
);
904 return ERR_CAST(obj
);
906 cmd
= i915_gem_object_pin_map(obj
, I915_MAP_WB
);
912 offset
+= vma
->node
.start
;
914 for (n
= 0; n
< count
; n
++) {
916 *cmd
++ = MI_STORE_DWORD_IMM_GEN4
;
917 *cmd
++ = lower_32_bits(offset
);
918 *cmd
++ = upper_32_bits(offset
);
920 } else if (gen
>= 4) {
921 *cmd
++ = MI_STORE_DWORD_IMM_GEN4
|
922 (gen
< 6 ? 1 << 22 : 0);
927 *cmd
++ = MI_STORE_DWORD_IMM
| 1 << 22;
935 *cmd
= MI_BATCH_BUFFER_END
;
937 i915_gem_object_unpin_map(obj
);
939 err
= i915_gem_object_set_to_gtt_domain(obj
, false);
943 batch
= i915_vma_instance(obj
, vma
->vm
, NULL
);
945 err
= PTR_ERR(batch
);
949 err
= i915_vma_pin(batch
, 0, 0, PIN_USER
);
956 i915_gem_object_put(obj
);
961 static int gpu_write(struct i915_vma
*vma
,
962 struct i915_gem_context
*ctx
,
963 struct intel_engine_cs
*engine
,
967 struct drm_i915_gem_request
*rq
;
968 struct i915_vma
*batch
;
972 GEM_BUG_ON(!intel_engine_can_store_dword(engine
));
974 err
= i915_gem_object_set_to_gtt_domain(vma
->obj
, true);
978 rq
= i915_gem_request_alloc(engine
, ctx
);
982 batch
= gpu_write_dw(vma
, dword
* sizeof(u32
), value
);
984 err
= PTR_ERR(batch
);
988 i915_vma_move_to_active(batch
, rq
, 0);
989 i915_gem_object_set_active_reference(batch
->obj
);
990 i915_vma_unpin(batch
);
991 i915_vma_close(batch
);
993 err
= engine
->emit_bb_start(rq
,
994 batch
->node
.start
, batch
->node
.size
,
999 i915_vma_move_to_active(vma
, rq
, EXEC_OBJECT_WRITE
);
1001 reservation_object_lock(vma
->resv
, NULL
);
1002 reservation_object_add_excl_fence(vma
->resv
, &rq
->fence
);
1003 reservation_object_unlock(vma
->resv
);
1006 __i915_add_request(rq
, err
== 0);
1011 static int cpu_check(struct drm_i915_gem_object
*obj
, u32 dword
, u32 val
)
1013 unsigned int needs_flush
;
1017 err
= i915_gem_obj_prepare_shmem_read(obj
, &needs_flush
);
1021 for (n
= 0; n
< obj
->base
.size
>> PAGE_SHIFT
; ++n
) {
1022 u32
*ptr
= kmap_atomic(i915_gem_object_get_page(obj
, n
));
1024 if (needs_flush
& CLFLUSH_BEFORE
)
1025 drm_clflush_virt_range(ptr
, PAGE_SIZE
);
1027 if (ptr
[dword
] != val
) {
1028 pr_err("n=%lu ptr[%u]=%u, val=%u\n",
1029 n
, dword
, ptr
[dword
], val
);
1038 i915_gem_obj_finish_shmem_access(obj
);
1043 static int __igt_write_huge(struct i915_gem_context
*ctx
,
1044 struct intel_engine_cs
*engine
,
1045 struct drm_i915_gem_object
*obj
,
1046 u64 size
, u64 offset
,
1049 struct drm_i915_private
*i915
= to_i915(obj
->base
.dev
);
1050 struct i915_address_space
*vm
= ctx
->ppgtt
? &ctx
->ppgtt
->base
: &i915
->ggtt
.base
;
1051 unsigned int flags
= PIN_USER
| PIN_OFFSET_FIXED
;
1052 struct i915_vma
*vma
;
1055 vma
= i915_vma_instance(obj
, vm
, NULL
);
1057 return PTR_ERR(vma
);
1059 err
= i915_vma_unbind(vma
);
1063 err
= i915_vma_pin(vma
, size
, 0, flags
| offset
);
1066 * The ggtt may have some pages reserved so
1067 * refrain from erroring out.
1069 if (err
== -ENOSPC
&& i915_is_ggtt(vm
))
1075 err
= igt_check_page_sizes(vma
);
1079 err
= gpu_write(vma
, ctx
, engine
, dword
, val
);
1081 pr_err("gpu-write failed at offset=%llx\n", offset
);
1085 err
= cpu_check(obj
, dword
, val
);
1087 pr_err("cpu-check failed at offset=%llx\n", offset
);
1092 i915_vma_unpin(vma
);
1094 i915_vma_close(vma
);
1099 static int igt_write_huge(struct i915_gem_context
*ctx
,
1100 struct drm_i915_gem_object
*obj
)
1102 struct drm_i915_private
*i915
= to_i915(obj
->base
.dev
);
1103 struct i915_address_space
*vm
= ctx
->ppgtt
? &ctx
->ppgtt
->base
: &i915
->ggtt
.base
;
1104 static struct intel_engine_cs
*engines
[I915_NUM_ENGINES
];
1105 struct intel_engine_cs
*engine
;
1106 I915_RND_STATE(prng
);
1107 IGT_TIMEOUT(end_time
);
1108 unsigned int max_page_size
;
1117 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj
));
1119 size
= obj
->base
.size
;
1120 if (obj
->mm
.page_sizes
.sg
& I915_GTT_PAGE_SIZE_64K
)
1121 size
= round_up(size
, I915_GTT_PAGE_SIZE_2M
);
1123 max_page_size
= rounddown_pow_of_two(obj
->mm
.page_sizes
.sg
);
1124 max
= div_u64((vm
->total
- size
), max_page_size
);
1127 for_each_engine(engine
, i915
, id
) {
1128 if (!intel_engine_can_store_dword(engine
)) {
1129 pr_info("store-dword-imm not supported on engine=%u\n", id
);
1132 engines
[n
++] = engine
;
1139 * To keep things interesting when alternating between engines in our
1140 * randomized order, lets also make feeding to the same engine a few
1141 * times in succession a possibility by enlarging the permutation array.
1143 order
= i915_random_order(n
* I915_NUM_ENGINES
, &prng
);
1148 * Try various offsets in an ascending/descending fashion until we
1149 * timeout -- we want to avoid issues hidden by effectively always using
1153 for_each_prime_number_from(num
, 0, max
) {
1154 u64 offset_low
= num
* max_page_size
;
1155 u64 offset_high
= (max
- num
) * max_page_size
;
1156 u32 dword
= offset_in_page(num
) / 4;
1158 engine
= engines
[order
[i
] % n
];
1159 i
= (i
+ 1) % (n
* I915_NUM_ENGINES
);
1161 err
= __igt_write_huge(ctx
, engine
, obj
, size
, offset_low
, dword
, num
+ 1);
1165 err
= __igt_write_huge(ctx
, engine
, obj
, size
, offset_high
, dword
, num
+ 1);
1169 if (igt_timeout(end_time
,
1170 "%s timed out on engine=%u, offset_low=%llx offset_high=%llx, max_page_size=%x\n",
1171 __func__
, engine
->id
, offset_low
, offset_high
, max_page_size
))
1180 static int igt_ppgtt_exhaust_huge(void *arg
)
1182 struct i915_gem_context
*ctx
= arg
;
1183 struct drm_i915_private
*i915
= ctx
->i915
;
1184 unsigned long supported
= INTEL_INFO(i915
)->page_sizes
;
1185 static unsigned int pages
[ARRAY_SIZE(page_sizes
)];
1186 struct drm_i915_gem_object
*obj
;
1187 unsigned int size_mask
;
1188 unsigned int page_mask
;
1192 if (supported
== I915_GTT_PAGE_SIZE_4K
)
1196 * Sanity check creating objects with a varying mix of page sizes --
1197 * ensuring that our writes lands in the right place.
1201 for_each_set_bit(i
, &supported
, ilog2(I915_GTT_MAX_PAGE_SIZE
) + 1)
1202 pages
[n
++] = BIT(i
);
1204 for (size_mask
= 2; size_mask
< BIT(n
); size_mask
++) {
1205 unsigned int size
= 0;
1207 for (i
= 0; i
< n
; i
++) {
1208 if (size_mask
& BIT(i
))
1213 * For our page mask we want to enumerate all the page-size
1214 * combinations which will fit into our chosen object size.
1216 for (page_mask
= 2; page_mask
<= size_mask
; page_mask
++) {
1217 unsigned int page_sizes
= 0;
1219 for (i
= 0; i
< n
; i
++) {
1220 if (page_mask
& BIT(i
))
1221 page_sizes
|= pages
[i
];
1225 * Ensure that we can actually fill the given object
1226 * with our chosen page mask.
1228 if (!IS_ALIGNED(size
, BIT(__ffs(page_sizes
))))
1231 obj
= huge_pages_object(i915
, size
, page_sizes
);
1237 err
= i915_gem_object_pin_pages(obj
);
1239 i915_gem_object_put(obj
);
1241 if (err
== -ENOMEM
) {
1242 pr_info("unable to get pages, size=%u, pages=%u\n",
1248 pr_err("pin_pages failed, size=%u, pages=%u\n",
1249 size_mask
, page_mask
);
1254 /* Force the page-size for the gtt insertion */
1255 obj
->mm
.page_sizes
.sg
= page_sizes
;
1257 err
= igt_write_huge(ctx
, obj
);
1259 pr_err("exhaust write-huge failed with size=%u\n",
1264 i915_gem_object_unpin_pages(obj
);
1265 i915_gem_object_put(obj
);
1272 i915_gem_object_unpin_pages(obj
);
1273 i915_gem_object_put(obj
);
1275 mkwrite_device_info(i915
)->page_sizes
= supported
;
1280 static int igt_ppgtt_internal_huge(void *arg
)
1282 struct i915_gem_context
*ctx
= arg
;
1283 struct drm_i915_private
*i915
= ctx
->i915
;
1284 struct drm_i915_gem_object
*obj
;
1285 static const unsigned int sizes
[] = {
1297 * Sanity check that the HW uses huge pages correctly through internal
1298 * -- ensure that our writes land in the right place.
1301 for (i
= 0; i
< ARRAY_SIZE(sizes
); ++i
) {
1302 unsigned int size
= sizes
[i
];
1304 obj
= i915_gem_object_create_internal(i915
, size
);
1306 return PTR_ERR(obj
);
1308 err
= i915_gem_object_pin_pages(obj
);
1312 if (obj
->mm
.page_sizes
.phys
< I915_GTT_PAGE_SIZE_64K
) {
1313 pr_info("internal unable to allocate huge-page(s) with size=%u\n",
1318 err
= igt_write_huge(ctx
, obj
);
1320 pr_err("internal write-huge failed with size=%u\n",
1325 i915_gem_object_unpin_pages(obj
);
1326 i915_gem_object_put(obj
);
1332 i915_gem_object_unpin_pages(obj
);
1334 i915_gem_object_put(obj
);
1339 static inline bool igt_can_allocate_thp(struct drm_i915_private
*i915
)
1341 return i915
->mm
.gemfs
&& has_transparent_hugepage();
1344 static int igt_ppgtt_gemfs_huge(void *arg
)
1346 struct i915_gem_context
*ctx
= arg
;
1347 struct drm_i915_private
*i915
= ctx
->i915
;
1348 struct drm_i915_gem_object
*obj
;
1349 static const unsigned int sizes
[] = {
1360 * Sanity check that the HW uses huge pages correctly through gemfs --
1361 * ensure that our writes land in the right place.
1364 if (!igt_can_allocate_thp(i915
)) {
1365 pr_info("missing THP support, skipping\n");
1369 for (i
= 0; i
< ARRAY_SIZE(sizes
); ++i
) {
1370 unsigned int size
= sizes
[i
];
1372 obj
= i915_gem_object_create(i915
, size
);
1374 return PTR_ERR(obj
);
1376 err
= i915_gem_object_pin_pages(obj
);
1380 if (obj
->mm
.page_sizes
.phys
< I915_GTT_PAGE_SIZE_2M
) {
1381 pr_info("finishing test early, gemfs unable to allocate huge-page(s) with size=%u\n",
1386 err
= igt_write_huge(ctx
, obj
);
1388 pr_err("gemfs write-huge failed with size=%u\n",
1393 i915_gem_object_unpin_pages(obj
);
1394 i915_gem_object_put(obj
);
1400 i915_gem_object_unpin_pages(obj
);
1402 i915_gem_object_put(obj
);
1407 static int igt_ppgtt_pin_update(void *arg
)
1409 struct i915_gem_context
*ctx
= arg
;
1410 struct drm_i915_private
*dev_priv
= ctx
->i915
;
1411 unsigned long supported
= INTEL_INFO(dev_priv
)->page_sizes
;
1412 struct i915_hw_ppgtt
*ppgtt
= ctx
->ppgtt
;
1413 struct drm_i915_gem_object
*obj
;
1414 struct i915_vma
*vma
;
1415 unsigned int flags
= PIN_USER
| PIN_OFFSET_FIXED
;
1420 * Make sure there's no funny business when doing a PIN_UPDATE -- in the
1421 * past we had a subtle issue with being able to incorrectly do multiple
1422 * alloc va ranges on the same object when doing a PIN_UPDATE, which
1423 * resulted in some pretty nasty bugs, though only when using
1427 if (!USES_FULL_48BIT_PPGTT(dev_priv
)) {
1428 pr_info("48b PPGTT not supported, skipping\n");
1432 first
= ilog2(I915_GTT_PAGE_SIZE_64K
);
1433 last
= ilog2(I915_GTT_PAGE_SIZE_2M
);
1435 for_each_set_bit_from(first
, &supported
, last
+ 1) {
1436 unsigned int page_size
= BIT(first
);
1438 obj
= i915_gem_object_create_internal(dev_priv
, page_size
);
1440 return PTR_ERR(obj
);
1442 vma
= i915_vma_instance(obj
, &ppgtt
->base
, NULL
);
1448 err
= i915_vma_pin(vma
, SZ_2M
, 0, flags
);
1452 if (vma
->page_sizes
.sg
< page_size
) {
1453 pr_info("Unable to allocate page-size %x, finishing test early\n",
1458 err
= igt_check_page_sizes(vma
);
1462 if (vma
->page_sizes
.gtt
!= page_size
) {
1463 dma_addr_t addr
= i915_gem_object_get_dma_address(obj
, 0);
1466 * The only valid reason for this to ever fail would be
1467 * if the dma-mapper screwed us over when we did the
1468 * dma_map_sg(), since it has the final say over the dma
1471 if (IS_ALIGNED(addr
, page_size
)) {
1472 pr_err("page_sizes.gtt=%u, expected=%u\n",
1473 vma
->page_sizes
.gtt
, page_size
);
1476 pr_info("dma address misaligned, finishing test early\n");
1482 err
= i915_vma_bind(vma
, I915_CACHE_NONE
, PIN_UPDATE
);
1486 i915_vma_unpin(vma
);
1487 i915_vma_close(vma
);
1489 i915_gem_object_put(obj
);
1492 obj
= i915_gem_object_create_internal(dev_priv
, PAGE_SIZE
);
1494 return PTR_ERR(obj
);
1496 vma
= i915_vma_instance(obj
, &ppgtt
->base
, NULL
);
1502 err
= i915_vma_pin(vma
, 0, 0, flags
);
1507 * Make sure we don't end up with something like where the pde is still
1508 * pointing to the 2M page, and the pt we just filled-in is dangling --
1509 * we can check this by writing to the first page where it would then
1510 * land in the now stale 2M page.
1513 err
= gpu_write(vma
, ctx
, dev_priv
->engine
[RCS
], 0, 0xdeadbeaf);
1517 err
= cpu_check(obj
, 0, 0xdeadbeaf);
1520 i915_vma_unpin(vma
);
1522 i915_vma_close(vma
);
1524 i915_gem_object_put(obj
);
1529 static int igt_tmpfs_fallback(void *arg
)
1531 struct i915_gem_context
*ctx
= arg
;
1532 struct drm_i915_private
*i915
= ctx
->i915
;
1533 struct vfsmount
*gemfs
= i915
->mm
.gemfs
;
1534 struct i915_address_space
*vm
= ctx
->ppgtt
? &ctx
->ppgtt
->base
: &i915
->ggtt
.base
;
1535 struct drm_i915_gem_object
*obj
;
1536 struct i915_vma
*vma
;
1541 * Make sure that we don't burst into a ball of flames upon falling back
1542 * to tmpfs, which we rely on if on the off-chance we encouter a failure
1543 * when setting up gemfs.
1546 i915
->mm
.gemfs
= NULL
;
1548 obj
= i915_gem_object_create(i915
, PAGE_SIZE
);
1554 vaddr
= i915_gem_object_pin_map(obj
, I915_MAP_WB
);
1555 if (IS_ERR(vaddr
)) {
1556 err
= PTR_ERR(vaddr
);
1559 *vaddr
= 0xdeadbeaf;
1561 i915_gem_object_unpin_map(obj
);
1563 vma
= i915_vma_instance(obj
, vm
, NULL
);
1569 err
= i915_vma_pin(vma
, 0, 0, PIN_USER
);
1573 err
= igt_check_page_sizes(vma
);
1575 i915_vma_unpin(vma
);
1577 i915_vma_close(vma
);
1579 i915_gem_object_put(obj
);
1581 i915
->mm
.gemfs
= gemfs
;
1586 static int igt_shrink_thp(void *arg
)
1588 struct i915_gem_context
*ctx
= arg
;
1589 struct drm_i915_private
*i915
= ctx
->i915
;
1590 struct i915_address_space
*vm
= ctx
->ppgtt
? &ctx
->ppgtt
->base
: &i915
->ggtt
.base
;
1591 struct drm_i915_gem_object
*obj
;
1592 struct i915_vma
*vma
;
1593 unsigned int flags
= PIN_USER
;
1597 * Sanity check shrinking huge-paged object -- make sure nothing blows
1601 if (!igt_can_allocate_thp(i915
)) {
1602 pr_info("missing THP support, skipping\n");
1606 obj
= i915_gem_object_create(i915
, SZ_2M
);
1608 return PTR_ERR(obj
);
1610 vma
= i915_vma_instance(obj
, vm
, NULL
);
1616 err
= i915_vma_pin(vma
, 0, 0, flags
);
1620 if (obj
->mm
.page_sizes
.phys
< I915_GTT_PAGE_SIZE_2M
) {
1621 pr_info("failed to allocate THP, finishing test early\n");
1625 err
= igt_check_page_sizes(vma
);
1629 err
= gpu_write(vma
, ctx
, i915
->engine
[RCS
], 0, 0xdeadbeaf);
1633 i915_vma_unpin(vma
);
1636 * Now that the pages are *unpinned* shrink-all should invoke
1637 * shmem to truncate our pages.
1639 i915_gem_shrink_all(i915
);
1640 if (i915_gem_object_has_pages(obj
)) {
1641 pr_err("shrink-all didn't truncate the pages\n");
1646 if (obj
->mm
.page_sizes
.sg
|| obj
->mm
.page_sizes
.phys
) {
1647 pr_err("residual page-size bits left\n");
1652 err
= i915_vma_pin(vma
, 0, 0, flags
);
1656 err
= cpu_check(obj
, 0, 0xdeadbeaf);
1659 i915_vma_unpin(vma
);
1661 i915_vma_close(vma
);
1663 i915_gem_object_put(obj
);
1668 int i915_gem_huge_page_mock_selftests(void)
1670 static const struct i915_subtest tests
[] = {
1671 SUBTEST(igt_mock_exhaust_device_supported_pages
),
1672 SUBTEST(igt_mock_ppgtt_misaligned_dma
),
1673 SUBTEST(igt_mock_ppgtt_huge_fill
),
1674 SUBTEST(igt_mock_ppgtt_64K
),
1676 int saved_ppgtt
= i915_modparams
.enable_ppgtt
;
1677 struct drm_i915_private
*dev_priv
;
1678 struct pci_dev
*pdev
;
1679 struct i915_hw_ppgtt
*ppgtt
;
1682 dev_priv
= mock_gem_device();
1686 /* Pretend to be a device which supports the 48b PPGTT */
1687 i915_modparams
.enable_ppgtt
= 3;
1689 pdev
= dev_priv
->drm
.pdev
;
1690 dma_coerce_mask_and_coherent(&pdev
->dev
, DMA_BIT_MASK(39));
1692 mutex_lock(&dev_priv
->drm
.struct_mutex
);
1693 ppgtt
= i915_ppgtt_create(dev_priv
, ERR_PTR(-ENODEV
), "mock");
1694 if (IS_ERR(ppgtt
)) {
1695 err
= PTR_ERR(ppgtt
);
1699 if (!i915_vm_is_48bit(&ppgtt
->base
)) {
1700 pr_err("failed to create 48b PPGTT\n");
1705 /* If we were ever hit this then it's time to mock the 64K scratch */
1706 if (!i915_vm_has_scratch_64K(&ppgtt
->base
)) {
1707 pr_err("PPGTT missing 64K scratch page\n");
1712 err
= i915_subtests(tests
, ppgtt
);
1715 i915_ppgtt_close(&ppgtt
->base
);
1716 i915_ppgtt_put(ppgtt
);
1719 mutex_unlock(&dev_priv
->drm
.struct_mutex
);
1721 i915_modparams
.enable_ppgtt
= saved_ppgtt
;
1723 drm_dev_unref(&dev_priv
->drm
);
1728 int i915_gem_huge_page_live_selftests(struct drm_i915_private
*dev_priv
)
1730 static const struct i915_subtest tests
[] = {
1731 SUBTEST(igt_shrink_thp
),
1732 SUBTEST(igt_ppgtt_pin_update
),
1733 SUBTEST(igt_tmpfs_fallback
),
1734 SUBTEST(igt_ppgtt_exhaust_huge
),
1735 SUBTEST(igt_ppgtt_gemfs_huge
),
1736 SUBTEST(igt_ppgtt_internal_huge
),
1738 struct drm_file
*file
;
1739 struct i915_gem_context
*ctx
;
1742 if (!USES_PPGTT(dev_priv
)) {
1743 pr_info("PPGTT not supported, skipping live-selftests\n");
1747 file
= mock_file(dev_priv
);
1749 return PTR_ERR(file
);
1751 mutex_lock(&dev_priv
->drm
.struct_mutex
);
1752 intel_runtime_pm_get(dev_priv
);
1754 ctx
= live_context(dev_priv
, file
);
1760 err
= i915_subtests(tests
, ctx
);
1763 intel_runtime_pm_put(dev_priv
);
1764 mutex_unlock(&dev_priv
->drm
.struct_mutex
);
1766 mock_file_free(dev_priv
, file
);