drivers/gpu/drm/i915/gt/intel_gtt.c

   1 // SPDX-License-Identifier: MIT
   2 /*
   3  * Copyright © 2020 Intel Corporation
   4  */
   5
   6 #include <linux/slab.h> /* fault-inject.h is not standalone! */
   7
   8 #include <linux/fault-inject.h>
   9
  10 #include "i915_trace.h"
  11 #include "intel_gt.h"
  12 #include "intel_gtt.h"
  13
  14 struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz)
  15 {
  16         if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
  17                 i915_gem_shrink_all(vm->i915);
  18
  19         return i915_gem_object_create_internal(vm->i915, sz);
  20 }
  21
  22 int pin_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj)
  23 {
  24         int err;
  25
  26         err = i915_gem_object_pin_pages(obj);
  27         if (err)
  28                 return err;
  29
  30         i915_gem_object_make_unshrinkable(obj);
  31         return 0;
  32 }
  33
  34 void __i915_vm_close(struct i915_address_space *vm)
  35 {
  36         struct i915_vma *vma, *vn;
  37
  38         if (!atomic_dec_and_mutex_lock(&vm->open, &vm->mutex))
  39                 return;
  40
  41         list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
  42                 struct drm_i915_gem_object *obj = vma->obj;
  43
  44                 /* Keep the obj (and hence the vma) alive as _we_ destroy it */
  45                 if (!kref_get_unless_zero(&obj->base.refcount))
  46                         continue;
  47
  48                 atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
  49                 WARN_ON(__i915_vma_unbind(vma));
  50                 __i915_vma_put(vma);
  51
  52                 i915_gem_object_put(obj);
  53         }
  54         GEM_BUG_ON(!list_empty(&vm->bound_list));
  55
  56         mutex_unlock(&vm->mutex);
  57 }
  58
  59 void i915_address_space_fini(struct i915_address_space *vm)
  60 {
  61         drm_mm_takedown(&vm->mm);
  62         mutex_destroy(&vm->mutex);
  63 }
  64
  65 static void __i915_vm_release(struct work_struct *work)
  66 {
  67         struct i915_address_space *vm =
  68                 container_of(work, struct i915_address_space, rcu.work);
  69
  70         vm->cleanup(vm);
  71         i915_address_space_fini(vm);
  72
  73         kfree(vm);
  74 }
  75
  76 void i915_vm_release(struct kref *kref)
  77 {
  78         struct i915_address_space *vm =
  79                 container_of(kref, struct i915_address_space, ref);
  80
  81         GEM_BUG_ON(i915_is_ggtt(vm));
  82         trace_i915_ppgtt_release(vm);
  83
  84         queue_rcu_work(vm->i915->wq, &vm->rcu);
  85 }
  86
  87 void i915_address_space_init(struct i915_address_space *vm, int subclass)
  88 {
  89         kref_init(&vm->ref);
  90         INIT_RCU_WORK(&vm->rcu, __i915_vm_release);
  91         atomic_set(&vm->open, 1);
  92
  93         /*
  94          * The vm->mutex must be reclaim safe (for use in the shrinker).
  95          * Do a dummy acquire now under fs_reclaim so that any allocation
  96          * attempt holding the lock is immediately reported by lockdep.
  97          */
  98         mutex_init(&vm->mutex);
  99         lockdep_set_subclass(&vm->mutex, subclass);
 100         i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex);
 101
 102         GEM_BUG_ON(!vm->total);
 103         drm_mm_init(&vm->mm, 0, vm->total);
 104         vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
 105
 106         INIT_LIST_HEAD(&vm->bound_list);
 107 }
 108
 109 void clear_pages(struct i915_vma *vma)
 110 {
 111         GEM_BUG_ON(!vma->pages);
 112
 113         if (vma->pages != vma->obj->mm.pages) {
 114                 sg_free_table(vma->pages);
 115                 kfree(vma->pages);
 116         }
 117         vma->pages = NULL;
 118
 119         memset(&vma->page_sizes, 0, sizeof(vma->page_sizes));
 120 }
 121
 122 dma_addr_t __px_dma(struct drm_i915_gem_object *p)
 123 {
 124         GEM_BUG_ON(!i915_gem_object_has_pages(p));
 125         return sg_dma_address(p->mm.pages->sgl);
 126 }
 127
 128 struct page *__px_page(struct drm_i915_gem_object *p)
 129 {
 130         GEM_BUG_ON(!i915_gem_object_has_pages(p));
 131         return sg_page(p->mm.pages->sgl);
 132 }
 133
 134 void
 135 fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count)
 136 {
 137         struct page *page = __px_page(p);
 138         void *vaddr;
 139
 140         vaddr = kmap(page);
 141         memset64(vaddr, val, count);
 142         clflush_cache_range(vaddr, PAGE_SIZE);
 143         kunmap(page);
 144 }
 145
 146 static void poison_scratch_page(struct drm_i915_gem_object *scratch)
 147 {
 148         struct sgt_iter sgt;
 149         struct page *page;
 150         u8 val;
 151
 152         val = 0;
 153         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
 154                 val = POISON_FREE;
 155
 156         for_each_sgt_page(page, sgt, scratch->mm.pages) {
 157                 void *vaddr;
 158
 159                 vaddr = kmap(page);
 160                 memset(vaddr, val, PAGE_SIZE);
 161                 kunmap(page);
 162         }
 163 }
 164
 165 int setup_scratch_page(struct i915_address_space *vm)
 166 {
 167         unsigned long size;
 168
 169         /*
 170          * In order to utilize 64K pages for an object with a size < 2M, we will
 171          * need to support a 64K scratch page, given that every 16th entry for a
 172          * page-table operating in 64K mode must point to a properly aligned 64K
 173          * region, including any PTEs which happen to point to scratch.
 174          *
 175          * This is only relevant for the 48b PPGTT where we support
 176          * huge-gtt-pages, see also i915_vma_insert(). However, as we share the
 177          * scratch (read-only) between all vm, we create one 64k scratch page
 178          * for all.
 179          */
 180         size = I915_GTT_PAGE_SIZE_4K;
 181         if (i915_vm_is_4lvl(vm) &&
 182             HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K))
 183                 size = I915_GTT_PAGE_SIZE_64K;
 184
 185         do {
 186                 struct drm_i915_gem_object *obj;
 187
 188                 obj = vm->alloc_pt_dma(vm, size);
 189                 if (IS_ERR(obj))
 190                         goto skip;
 191
 192                 if (pin_pt_dma(vm, obj))
 193                         goto skip_obj;
 194
 195                 /* We need a single contiguous page for our scratch */
 196                 if (obj->mm.page_sizes.sg < size)
 197                         goto skip_obj;
 198
 199                 /* And it needs to be correspondingly aligned */
 200                 if (__px_dma(obj) & (size - 1))
 201                         goto skip_obj;
 202
 203                 /*
 204                  * Use a non-zero scratch page for debugging.
 205                  *
 206                  * We want a value that should be reasonably obvious
 207                  * to spot in the error state, while also causing a GPU hang
 208                  * if executed. We prefer using a clear page in production, so
 209                  * should it ever be accidentally used, the effect should be
 210                  * fairly benign.
 211                  */
 212                 poison_scratch_page(obj);
 213
 214                 vm->scratch[0] = obj;
 215                 vm->scratch_order = get_order(size);
 216                 return 0;
 217
 218 skip_obj:
 219                 i915_gem_object_put(obj);
 220 skip:
 221                 if (size == I915_GTT_PAGE_SIZE_4K)
 222                         return -ENOMEM;
 223
 224                 size = I915_GTT_PAGE_SIZE_4K;
 225         } while (1);
 226 }
 227
 228 void free_scratch(struct i915_address_space *vm)
 229 {
 230         int i;
 231
 232         for (i = 0; i <= vm->top; i++)
 233                 i915_gem_object_put(vm->scratch[i]);
 234 }
 235
 236 void gtt_write_workarounds(struct intel_gt *gt)
 237 {
 238         struct drm_i915_private *i915 = gt->i915;
 239         struct intel_uncore *uncore = gt->uncore;
 240
 241         /*
 242          * This function is for gtt related workarounds. This function is
 243          * called on driver load and after a GPU reset, so you can place
 244          * workarounds here even if they get overwritten by GPU reset.
 245          */
 246         /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */
 247         if (IS_BROADWELL(i915))
 248                 intel_uncore_write(uncore,
 249                                    GEN8_L3_LRA_1_GPGPU,
 250                                    GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
 251         else if (IS_CHERRYVIEW(i915))
 252                 intel_uncore_write(uncore,
 253                                    GEN8_L3_LRA_1_GPGPU,
 254                                    GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
 255         else if (IS_GEN9_LP(i915))
 256                 intel_uncore_write(uncore,
 257                                    GEN8_L3_LRA_1_GPGPU,
 258                                    GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
 259         else if (INTEL_GEN(i915) >= 9 && INTEL_GEN(i915) <= 11)
 260                 intel_uncore_write(uncore,
 261                                    GEN8_L3_LRA_1_GPGPU,
 262                                    GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
 263
 264         /*
 265          * To support 64K PTEs we need to first enable the use of the
 266          * Intermediate-Page-Size(IPS) bit of the PDE field via some magical
 267          * mmio, otherwise the page-walker will simply ignore the IPS bit. This
 268          * shouldn't be needed after GEN10.
 269          *
 270          * 64K pages were first introduced from BDW+, although technically they
 271          * only *work* from gen9+. For pre-BDW we instead have the option for
 272          * 32K pages, but we don't currently have any support for it in our
 273          * driver.
 274          */
 275         if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) &&
 276             INTEL_GEN(i915) <= 10)
 277                 intel_uncore_rmw(uncore,
 278                                  GEN8_GAMW_ECO_DEV_RW_IA,
 279                                  0,
 280                                  GAMW_ECO_ENABLE_64K_IPS_FIELD);
 281
 282         if (IS_GEN_RANGE(i915, 8, 11)) {
 283                 bool can_use_gtt_cache = true;
 284
 285                 /*
 286                  * According to the BSpec if we use 2M/1G pages then we also
 287                  * need to disable the GTT cache. At least on BDW we can see
 288                  * visual corruption when using 2M pages, and not disabling the
 289                  * GTT cache.
 290                  */
 291                 if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M))
 292                         can_use_gtt_cache = false;
 293
 294                 /* WaGttCachingOffByDefault */
 295                 intel_uncore_write(uncore,
 296                                    HSW_GTT_CACHE_EN,
 297                                    can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0);
 298                 drm_WARN_ON_ONCE(&i915->drm, can_use_gtt_cache &&
 299                                  intel_uncore_read(uncore,
 300                                                    HSW_GTT_CACHE_EN) == 0);
 301         }
 302 }
 303
 304 static void tgl_setup_private_ppat(struct intel_uncore *uncore)
 305 {
 306         /* TGL doesn't support LLC or AGE settings */
 307         intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB);
 308         intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC);
 309         intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT);
 310         intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC);
 311         intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB);
 312         intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB);
 313         intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB);
 314         intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB);
 315 }
 316
 317 static void cnl_setup_private_ppat(struct intel_uncore *uncore)
 318 {
 319         intel_uncore_write(uncore,
 320                            GEN10_PAT_INDEX(0),
 321                            GEN8_PPAT_WB | GEN8_PPAT_LLC);
 322         intel_uncore_write(uncore,
 323                            GEN10_PAT_INDEX(1),
 324                            GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
 325         intel_uncore_write(uncore,
 326                            GEN10_PAT_INDEX(2),
 327                            GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE);
 328         intel_uncore_write(uncore,
 329                            GEN10_PAT_INDEX(3),
 330                            GEN8_PPAT_UC);
 331         intel_uncore_write(uncore,
 332                            GEN10_PAT_INDEX(4),
 333                            GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
 334         intel_uncore_write(uncore,
 335                            GEN10_PAT_INDEX(5),
 336                            GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
 337         intel_uncore_write(uncore,
 338                            GEN10_PAT_INDEX(6),
 339                            GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
 340         intel_uncore_write(uncore,
 341                            GEN10_PAT_INDEX(7),
 342                            GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
 343 }
 344
 345 /*
 346  * The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
 347  * bits. When using advanced contexts each context stores its own PAT, but
 348  * writing this data shouldn't be harmful even in those cases.
 349  */
 350 static void bdw_setup_private_ppat(struct intel_uncore *uncore)
 351 {
 352         struct drm_i915_private *i915 = uncore->i915;
 353         u64 pat;
 354
 355         pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) |      /* for normal objects, no eLLC */
 356               GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) |  /* for something pointing to ptes? */
 357               GEN8_PPAT(3, GEN8_PPAT_UC) |                      /* Uncached objects, mostly for scanout */
 358               GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
 359               GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
 360               GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
 361               GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
 362
 363         /* for scanout with eLLC */
 364         if (INTEL_GEN(i915) >= 9)
 365                 pat |= GEN8_PPAT(2, GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE);
 366         else
 367                 pat |= GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
 368
 369         intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
 370         intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
 371 }
 372
 373 static void chv_setup_private_ppat(struct intel_uncore *uncore)
 374 {
 375         u64 pat;
 376
 377         /*
 378          * Map WB on BDW to snooped on CHV.
 379          *
 380          * Only the snoop bit has meaning for CHV, the rest is
 381          * ignored.
 382          *
 383          * The hardware will never snoop for certain types of accesses:
 384          * - CPU GTT (GMADR->GGTT->no snoop->memory)
 385          * - PPGTT page tables
 386          * - some other special cycles
 387          *
 388          * As with BDW, we also need to consider the following for GT accesses:
 389          * "For GGTT, there is NO pat_sel[2:0] from the entry,
 390          * so RTL will always use the value corresponding to
 391          * pat_sel = 000".
 392          * Which means we must set the snoop bit in PAT entry 0
 393          * in order to keep the global status page working.
 394          */
 395
 396         pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
 397               GEN8_PPAT(1, 0) |
 398               GEN8_PPAT(2, 0) |
 399               GEN8_PPAT(3, 0) |
 400               GEN8_PPAT(4, CHV_PPAT_SNOOP) |
 401               GEN8_PPAT(5, CHV_PPAT_SNOOP) |
 402               GEN8_PPAT(6, CHV_PPAT_SNOOP) |
 403               GEN8_PPAT(7, CHV_PPAT_SNOOP);
 404
 405         intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
 406         intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
 407 }
 408
 409 void setup_private_pat(struct intel_uncore *uncore)
 410 {
 411         struct drm_i915_private *i915 = uncore->i915;
 412
 413         GEM_BUG_ON(INTEL_GEN(i915) < 8);
 414
 415         if (INTEL_GEN(i915) >= 12)
 416                 tgl_setup_private_ppat(uncore);
 417         else if (INTEL_GEN(i915) >= 10)
 418                 cnl_setup_private_ppat(uncore);
 419         else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915))
 420                 chv_setup_private_ppat(uncore);
 421         else
 422                 bdw_setup_private_ppat(uncore);
 423 }
 424
 425 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 426 #include "selftests/mock_gtt.c"
 427 #endif