drivers/gpu/drm/i915/gem/i915_gem_userptr.c

   1 /*
   2  * SPDX-License-Identifier: MIT
   3  *
   4  * Copyright © 2012-2014 Intel Corporation
   5  */
   6
   7 #include <linux/mmu_context.h>
   8 #include <linux/mmu_notifier.h>
   9 #include <linux/mempolicy.h>
  10 #include <linux/swap.h>
  11 #include <linux/sched/mm.h>
  12
  13 #include "i915_drv.h"
  14 #include "i915_gem_ioctls.h"
  15 #include "i915_gem_object.h"
  16 #include "i915_scatterlist.h"
  17
  18 struct i915_mm_struct {
  19         struct mm_struct *mm;
  20         struct drm_i915_private *i915;
  21         struct i915_mmu_notifier *mn;
  22         struct hlist_node node;
  23         struct kref kref;
  24         struct rcu_work work;
  25 };
  26
  27 #if defined(CONFIG_MMU_NOTIFIER)
  28 #include <linux/interval_tree.h>
  29
  30 struct i915_mmu_notifier {
  31         spinlock_t lock;
  32         struct hlist_node node;
  33         struct mmu_notifier mn;
  34         struct rb_root_cached objects;
  35         struct i915_mm_struct *mm;
  36 };
  37
  38 struct i915_mmu_object {
  39         struct i915_mmu_notifier *mn;
  40         struct drm_i915_gem_object *obj;
  41         struct interval_tree_node it;
  42 };
  43
  44 static void add_object(struct i915_mmu_object *mo)
  45 {
  46         GEM_BUG_ON(!RB_EMPTY_NODE(&mo->it.rb));
  47         interval_tree_insert(&mo->it, &mo->mn->objects);
  48 }
  49
  50 static void del_object(struct i915_mmu_object *mo)
  51 {
  52         if (RB_EMPTY_NODE(&mo->it.rb))
  53                 return;
  54
  55         interval_tree_remove(&mo->it, &mo->mn->objects);
  56         RB_CLEAR_NODE(&mo->it.rb);
  57 }
  58
  59 static void
  60 __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
  61 {
  62         struct i915_mmu_object *mo = obj->userptr.mmu_object;
  63
  64         /*
  65          * During mm_invalidate_range we need to cancel any userptr that
  66          * overlaps the range being invalidated. Doing so requires the
  67          * struct_mutex, and that risks recursion. In order to cause
  68          * recursion, the user must alias the userptr address space with
  69          * a GTT mmapping (possible with a MAP_FIXED) - then when we have
  70          * to invalidate that mmaping, mm_invalidate_range is called with
  71          * the userptr address *and* the struct_mutex held.  To prevent that
  72          * we set a flag under the i915_mmu_notifier spinlock to indicate
  73          * whether this object is valid.
  74          */
  75         if (!mo)
  76                 return;
  77
  78         spin_lock(&mo->mn->lock);
  79         if (value)
  80                 add_object(mo);
  81         else
  82                 del_object(mo);
  83         spin_unlock(&mo->mn->lock);
  84 }
  85
  86 static int
  87 userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
  88                                   const struct mmu_notifier_range *range)
  89 {
  90         struct i915_mmu_notifier *mn =
  91                 container_of(_mn, struct i915_mmu_notifier, mn);
  92         struct interval_tree_node *it;
  93         unsigned long end;
  94         int ret = 0;
  95
  96         if (RB_EMPTY_ROOT(&mn->objects.rb_root))
  97                 return 0;
  98
  99         /* interval ranges are inclusive, but invalidate range is exclusive */
 100         end = range->end - 1;
 101
 102         spin_lock(&mn->lock);
 103         it = interval_tree_iter_first(&mn->objects, range->start, end);
 104         while (it) {
 105                 struct drm_i915_gem_object *obj;
 106
 107                 if (!mmu_notifier_range_blockable(range)) {
 108                         ret = -EAGAIN;
 109                         break;
 110                 }
 111
 112                 /*
 113                  * The mmu_object is released late when destroying the
 114                  * GEM object so it is entirely possible to gain a
 115                  * reference on an object in the process of being freed
 116                  * since our serialisation is via the spinlock and not
 117                  * the struct_mutex - and consequently use it after it
 118                  * is freed and then double free it. To prevent that
 119                  * use-after-free we only acquire a reference on the
 120                  * object if it is not in the process of being destroyed.
 121                  */
 122                 obj = container_of(it, struct i915_mmu_object, it)->obj;
 123                 if (!kref_get_unless_zero(&obj->base.refcount)) {
 124                         it = interval_tree_iter_next(it, range->start, end);
 125                         continue;
 126                 }
 127                 spin_unlock(&mn->lock);
 128
 129                 ret = i915_gem_object_unbind(obj,
 130                                              I915_GEM_OBJECT_UNBIND_ACTIVE |
 131                                              I915_GEM_OBJECT_UNBIND_BARRIER);
 132                 if (ret == 0)
 133                         ret = __i915_gem_object_put_pages(obj);
 134                 i915_gem_object_put(obj);
 135                 if (ret)
 136                         return ret;
 137
 138                 spin_lock(&mn->lock);
 139
 140                 /*
 141                  * As we do not (yet) protect the mmu from concurrent insertion
 142                  * over this range, there is no guarantee that this search will
 143                  * terminate given a pathologic workload.
 144                  */
 145                 it = interval_tree_iter_first(&mn->objects, range->start, end);
 146         }
 147         spin_unlock(&mn->lock);
 148
 149         return ret;
 150
 151 }
 152
 153 static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
 154         .invalidate_range_start = userptr_mn_invalidate_range_start,
 155 };
 156
 157 static struct i915_mmu_notifier *
 158 i915_mmu_notifier_create(struct i915_mm_struct *mm)
 159 {
 160         struct i915_mmu_notifier *mn;
 161
 162         mn = kmalloc(sizeof(*mn), GFP_KERNEL);
 163         if (mn == NULL)
 164                 return ERR_PTR(-ENOMEM);
 165
 166         spin_lock_init(&mn->lock);
 167         mn->mn.ops = &i915_gem_userptr_notifier;
 168         mn->objects = RB_ROOT_CACHED;
 169         mn->mm = mm;
 170
 171         return mn;
 172 }
 173
 174 static void
 175 i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
 176 {
 177         struct i915_mmu_object *mo;
 178
 179         mo = fetch_and_zero(&obj->userptr.mmu_object);
 180         if (!mo)
 181                 return;
 182
 183         spin_lock(&mo->mn->lock);
 184         del_object(mo);
 185         spin_unlock(&mo->mn->lock);
 186         kfree(mo);
 187 }
 188
 189 static struct i915_mmu_notifier *
 190 i915_mmu_notifier_find(struct i915_mm_struct *mm)
 191 {
 192         struct i915_mmu_notifier *mn, *old;
 193         int err;
 194
 195         mn = READ_ONCE(mm->mn);
 196         if (likely(mn))
 197                 return mn;
 198
 199         mn = i915_mmu_notifier_create(mm);
 200         if (IS_ERR(mn))
 201                 return mn;
 202
 203         err = mmu_notifier_register(&mn->mn, mm->mm);
 204         if (err) {
 205                 kfree(mn);
 206                 return ERR_PTR(err);
 207         }
 208
 209         old = cmpxchg(&mm->mn, NULL, mn);
 210         if (old) {
 211                 mmu_notifier_unregister(&mn->mn, mm->mm);
 212                 kfree(mn);
 213                 mn = old;
 214         }
 215
 216         return mn;
 217 }
 218
 219 static int
 220 i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
 221                                     unsigned flags)
 222 {
 223         struct i915_mmu_notifier *mn;
 224         struct i915_mmu_object *mo;
 225
 226         if (flags & I915_USERPTR_UNSYNCHRONIZED)
 227                 return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
 228
 229         if (GEM_WARN_ON(!obj->userptr.mm))
 230                 return -EINVAL;
 231
 232         mn = i915_mmu_notifier_find(obj->userptr.mm);
 233         if (IS_ERR(mn))
 234                 return PTR_ERR(mn);
 235
 236         mo = kzalloc(sizeof(*mo), GFP_KERNEL);
 237         if (!mo)
 238                 return -ENOMEM;
 239
 240         mo->mn = mn;
 241         mo->obj = obj;
 242         mo->it.start = obj->userptr.ptr;
 243         mo->it.last = obj->userptr.ptr + obj->base.size - 1;
 244         RB_CLEAR_NODE(&mo->it.rb);
 245
 246         obj->userptr.mmu_object = mo;
 247         return 0;
 248 }
 249
 250 static void
 251 i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
 252                        struct mm_struct *mm)
 253 {
 254         if (mn == NULL)
 255                 return;
 256
 257         mmu_notifier_unregister(&mn->mn, mm);
 258         kfree(mn);
 259 }
 260
 261 #else
 262
 263 static void
 264 __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
 265 {
 266 }
 267
 268 static void
 269 i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
 270 {
 271 }
 272
 273 static int
 274 i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
 275                                     unsigned flags)
 276 {
 277         if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
 278                 return -ENODEV;
 279
 280         if (!capable(CAP_SYS_ADMIN))
 281                 return -EPERM;
 282
 283         return 0;
 284 }
 285
 286 static void
 287 i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
 288                        struct mm_struct *mm)
 289 {
 290 }
 291
 292 #endif
 293
 294 static struct i915_mm_struct *
 295 __i915_mm_struct_find(struct drm_i915_private *i915, struct mm_struct *real)
 296 {
 297         struct i915_mm_struct *it, *mm = NULL;
 298
 299         rcu_read_lock();
 300         hash_for_each_possible_rcu(i915->mm_structs,
 301                                    it, node,
 302                                    (unsigned long)real)
 303                 if (it->mm == real && kref_get_unless_zero(&it->kref)) {
 304                         mm = it;
 305                         break;
 306                 }
 307         rcu_read_unlock();
 308
 309         return mm;
 310 }
 311
 312 static int
 313 i915_gem_userptr_init__mm_struct(struct drm_i915_gem_object *obj)
 314 {
 315         struct drm_i915_private *i915 = to_i915(obj->base.dev);
 316         struct i915_mm_struct *mm, *new;
 317         int ret = 0;
 318
 319         /* During release of the GEM object we hold the struct_mutex. This
 320          * precludes us from calling mmput() at that time as that may be
 321          * the last reference and so call exit_mmap(). exit_mmap() will
 322          * attempt to reap the vma, and if we were holding a GTT mmap
 323          * would then call drm_gem_vm_close() and attempt to reacquire
 324          * the struct mutex. So in order to avoid that recursion, we have
 325          * to defer releasing the mm reference until after we drop the
 326          * struct_mutex, i.e. we need to schedule a worker to do the clean
 327          * up.
 328          */
 329         mm = __i915_mm_struct_find(i915, current->mm);
 330         if (mm)
 331                 goto out;
 332
 333         new = kmalloc(sizeof(*mm), GFP_KERNEL);
 334         if (!new)
 335                 return -ENOMEM;
 336
 337         kref_init(&new->kref);
 338         new->i915 = to_i915(obj->base.dev);
 339         new->mm = current->mm;
 340         new->mn = NULL;
 341
 342         spin_lock(&i915->mm_lock);
 343         mm = __i915_mm_struct_find(i915, current->mm);
 344         if (!mm) {
 345                 hash_add_rcu(i915->mm_structs,
 346                              &new->node,
 347                              (unsigned long)new->mm);
 348                 mmgrab(current->mm);
 349                 mm = new;
 350         }
 351         spin_unlock(&i915->mm_lock);
 352         if (mm != new)
 353                 kfree(new);
 354
 355 out:
 356         obj->userptr.mm = mm;
 357         return ret;
 358 }
 359
 360 static void
 361 __i915_mm_struct_free__worker(struct work_struct *work)
 362 {
 363         struct i915_mm_struct *mm = container_of(work, typeof(*mm), work.work);
 364
 365         i915_mmu_notifier_free(mm->mn, mm->mm);
 366         mmdrop(mm->mm);
 367         kfree(mm);
 368 }
 369
 370 static void
 371 __i915_mm_struct_free(struct kref *kref)
 372 {
 373         struct i915_mm_struct *mm = container_of(kref, typeof(*mm), kref);
 374
 375         spin_lock(&mm->i915->mm_lock);
 376         hash_del_rcu(&mm->node);
 377         spin_unlock(&mm->i915->mm_lock);
 378
 379         INIT_RCU_WORK(&mm->work, __i915_mm_struct_free__worker);
 380         queue_rcu_work(system_wq, &mm->work);
 381 }
 382
 383 static void
 384 i915_gem_userptr_release__mm_struct(struct drm_i915_gem_object *obj)
 385 {
 386         if (obj->userptr.mm == NULL)
 387                 return;
 388
 389         kref_put(&obj->userptr.mm->kref, __i915_mm_struct_free);
 390         obj->userptr.mm = NULL;
 391 }
 392
 393 struct get_pages_work {
 394         struct work_struct work;
 395         struct drm_i915_gem_object *obj;
 396         struct task_struct *task;
 397 };
 398
 399 static struct sg_table *
 400 __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
 401                                struct page **pvec, unsigned long num_pages)
 402 {
 403         unsigned int max_segment = i915_sg_segment_size();
 404         struct sg_table *st;
 405         unsigned int sg_page_sizes;
 406         struct scatterlist *sg;
 407         int ret;
 408
 409         st = kmalloc(sizeof(*st), GFP_KERNEL);
 410         if (!st)
 411                 return ERR_PTR(-ENOMEM);
 412
 413 alloc_table:
 414         sg = __sg_alloc_table_from_pages(st, pvec, num_pages, 0,
 415                                          num_pages << PAGE_SHIFT, max_segment,
 416                                          NULL, 0, GFP_KERNEL);
 417         if (IS_ERR(sg)) {
 418                 kfree(st);
 419                 return ERR_CAST(sg);
 420         }
 421
 422         ret = i915_gem_gtt_prepare_pages(obj, st);
 423         if (ret) {
 424                 sg_free_table(st);
 425
 426                 if (max_segment > PAGE_SIZE) {
 427                         max_segment = PAGE_SIZE;
 428                         goto alloc_table;
 429                 }
 430
 431                 kfree(st);
 432                 return ERR_PTR(ret);
 433         }
 434
 435         sg_page_sizes = i915_sg_page_sizes(st->sgl);
 436
 437         __i915_gem_object_set_pages(obj, st, sg_page_sizes);
 438
 439         return st;
 440 }
 441
 442 static void
 443 __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 444 {
 445         struct get_pages_work *work = container_of(_work, typeof(*work), work);
 446         struct drm_i915_gem_object *obj = work->obj;
 447         const unsigned long npages = obj->base.size >> PAGE_SHIFT;
 448         unsigned long pinned;
 449         struct page **pvec;
 450         int ret;
 451
 452         ret = -ENOMEM;
 453         pinned = 0;
 454
 455         pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
 456         if (pvec != NULL) {
 457                 struct mm_struct *mm = obj->userptr.mm->mm;
 458                 unsigned int flags = 0;
 459                 int locked = 0;
 460
 461                 if (!i915_gem_object_is_readonly(obj))
 462                         flags |= FOLL_WRITE;
 463
 464                 ret = -EFAULT;
 465                 if (mmget_not_zero(mm)) {
 466                         while (pinned < npages) {
 467                                 if (!locked) {
 468                                         mmap_read_lock(mm);
 469                                         locked = 1;
 470                                 }
 471                                 ret = pin_user_pages_remote
 472                                         (mm,
 473                                          obj->userptr.ptr + pinned * PAGE_SIZE,
 474                                          npages - pinned,
 475                                          flags,
 476                                          pvec + pinned, NULL, &locked);
 477                                 if (ret < 0)
 478                                         break;
 479
 480                                 pinned += ret;
 481                         }
 482                         if (locked)
 483                                 mmap_read_unlock(mm);
 484                         mmput(mm);
 485                 }
 486         }
 487
 488         mutex_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES);
 489         if (obj->userptr.work == &work->work) {
 490                 struct sg_table *pages = ERR_PTR(ret);
 491
 492                 if (pinned == npages) {
 493                         pages = __i915_gem_userptr_alloc_pages(obj, pvec,
 494                                                                npages);
 495                         if (!IS_ERR(pages)) {
 496                                 pinned = 0;
 497                                 pages = NULL;
 498                         }
 499                 }
 500
 501                 obj->userptr.work = ERR_CAST(pages);
 502                 if (IS_ERR(pages))
 503                         __i915_gem_userptr_set_active(obj, false);
 504         }
 505         mutex_unlock(&obj->mm.lock);
 506
 507         unpin_user_pages(pvec, pinned);
 508         kvfree(pvec);
 509
 510         i915_gem_object_put(obj);
 511         put_task_struct(work->task);
 512         kfree(work);
 513 }
 514
 515 static struct sg_table *
 516 __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj)
 517 {
 518         struct get_pages_work *work;
 519
 520         /* Spawn a worker so that we can acquire the
 521          * user pages without holding our mutex. Access
 522          * to the user pages requires mmap_lock, and we have
 523          * a strict lock ordering of mmap_lock, struct_mutex -
 524          * we already hold struct_mutex here and so cannot
 525          * call gup without encountering a lock inversion.
 526          *
 527          * Userspace will keep on repeating the operation
 528          * (thanks to EAGAIN) until either we hit the fast
 529          * path or the worker completes. If the worker is
 530          * cancelled or superseded, the task is still run
 531          * but the results ignored. (This leads to
 532          * complications that we may have a stray object
 533          * refcount that we need to be wary of when
 534          * checking for existing objects during creation.)
 535          * If the worker encounters an error, it reports
 536          * that error back to this function through
 537          * obj->userptr.work = ERR_PTR.
 538          */
 539         work = kmalloc(sizeof(*work), GFP_KERNEL);
 540         if (work == NULL)
 541                 return ERR_PTR(-ENOMEM);
 542
 543         obj->userptr.work = &work->work;
 544
 545         work->obj = i915_gem_object_get(obj);
 546
 547         work->task = current;
 548         get_task_struct(work->task);
 549
 550         INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
 551         queue_work(to_i915(obj->base.dev)->mm.userptr_wq, &work->work);
 552
 553         return ERR_PTR(-EAGAIN);
 554 }
 555
 556 static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
 557 {
 558         const unsigned long num_pages = obj->base.size >> PAGE_SHIFT;
 559         struct mm_struct *mm = obj->userptr.mm->mm;
 560         struct page **pvec;
 561         struct sg_table *pages;
 562         bool active;
 563         int pinned;
 564         unsigned int gup_flags = 0;
 565
 566         /* If userspace should engineer that these pages are replaced in
 567          * the vma between us binding this page into the GTT and completion
 568          * of rendering... Their loss. If they change the mapping of their
 569          * pages they need to create a new bo to point to the new vma.
 570          *
 571          * However, that still leaves open the possibility of the vma
 572          * being copied upon fork. Which falls under the same userspace
 573          * synchronisation issue as a regular bo, except that this time
 574          * the process may not be expecting that a particular piece of
 575          * memory is tied to the GPU.
 576          *
 577          * Fortunately, we can hook into the mmu_notifier in order to
 578          * discard the page references prior to anything nasty happening
 579          * to the vma (discard or cloning) which should prevent the more
 580          * egregious cases from causing harm.
 581          */
 582
 583         if (obj->userptr.work) {
 584                 /* active flag should still be held for the pending work */
 585                 if (IS_ERR(obj->userptr.work))
 586                         return PTR_ERR(obj->userptr.work);
 587                 else
 588                         return -EAGAIN;
 589         }
 590
 591         pvec = NULL;
 592         pinned = 0;
 593
 594         if (mm == current->mm) {
 595                 pvec = kvmalloc_array(num_pages, sizeof(struct page *),
 596                                       GFP_KERNEL |
 597                                       __GFP_NORETRY |
 598                                       __GFP_NOWARN);
 599                 if (pvec) {
 600                         /* defer to worker if malloc fails */
 601                         if (!i915_gem_object_is_readonly(obj))
 602                                 gup_flags |= FOLL_WRITE;
 603                         pinned = pin_user_pages_fast_only(obj->userptr.ptr,
 604                                                           num_pages, gup_flags,
 605                                                           pvec);
 606                 }
 607         }
 608
 609         active = false;
 610         if (pinned < 0) {
 611                 pages = ERR_PTR(pinned);
 612                 pinned = 0;
 613         } else if (pinned < num_pages) {
 614                 pages = __i915_gem_userptr_get_pages_schedule(obj);
 615                 active = pages == ERR_PTR(-EAGAIN);
 616         } else {
 617                 pages = __i915_gem_userptr_alloc_pages(obj, pvec, num_pages);
 618                 active = !IS_ERR(pages);
 619         }
 620         if (active)
 621                 __i915_gem_userptr_set_active(obj, true);
 622
 623         if (IS_ERR(pages))
 624                 unpin_user_pages(pvec, pinned);
 625         kvfree(pvec);
 626
 627         return PTR_ERR_OR_ZERO(pages);
 628 }
 629
 630 static void
 631 i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj,
 632                            struct sg_table *pages)
 633 {
 634         struct sgt_iter sgt_iter;
 635         struct page *page;
 636
 637         /* Cancel any inflight work and force them to restart their gup */
 638         obj->userptr.work = NULL;
 639         __i915_gem_userptr_set_active(obj, false);
 640         if (!pages)
 641                 return;
 642
 643         __i915_gem_object_release_shmem(obj, pages, true);
 644         i915_gem_gtt_finish_pages(obj, pages);
 645
 646         /*
 647          * We always mark objects as dirty when they are used by the GPU,
 648          * just in case. However, if we set the vma as being read-only we know
 649          * that the object will never have been written to.
 650          */
 651         if (i915_gem_object_is_readonly(obj))
 652                 obj->mm.dirty = false;
 653
 654         for_each_sgt_page(page, sgt_iter, pages) {
 655                 if (obj->mm.dirty && trylock_page(page)) {
 656                         /*
 657                          * As this may not be anonymous memory (e.g. shmem)
 658                          * but exist on a real mapping, we have to lock
 659                          * the page in order to dirty it -- holding
 660                          * the page reference is not sufficient to
 661                          * prevent the inode from being truncated.
 662                          * Play safe and take the lock.
 663                          *
 664                          * However...!
 665                          *
 666                          * The mmu-notifier can be invalidated for a
 667                          * migrate_page, that is alreadying holding the lock
 668                          * on the page. Such a try_to_unmap() will result
 669                          * in us calling put_pages() and so recursively try
 670                          * to lock the page. We avoid that deadlock with
 671                          * a trylock_page() and in exchange we risk missing
 672                          * some page dirtying.
 673                          */
 674                         set_page_dirty(page);
 675                         unlock_page(page);
 676                 }
 677
 678                 mark_page_accessed(page);
 679                 unpin_user_page(page);
 680         }
 681         obj->mm.dirty = false;
 682
 683         sg_free_table(pages);
 684         kfree(pages);
 685 }
 686
 687 static void
 688 i915_gem_userptr_release(struct drm_i915_gem_object *obj)
 689 {
 690         i915_gem_userptr_release__mmu_notifier(obj);
 691         i915_gem_userptr_release__mm_struct(obj);
 692 }
 693
 694 static int
 695 i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj)
 696 {
 697         if (obj->userptr.mmu_object)
 698                 return 0;
 699
 700         return i915_gem_userptr_init__mmu_notifier(obj, 0);
 701 }
 702
 703 static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
 704         .name = "i915_gem_object_userptr",
 705         .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
 706                  I915_GEM_OBJECT_IS_SHRINKABLE |
 707                  I915_GEM_OBJECT_NO_MMAP |
 708                  I915_GEM_OBJECT_ASYNC_CANCEL,
 709         .get_pages = i915_gem_userptr_get_pages,
 710         .put_pages = i915_gem_userptr_put_pages,
 711         .dmabuf_export = i915_gem_userptr_dmabuf_export,
 712         .release = i915_gem_userptr_release,
 713 };
 714
 715 /*
 716  * Creates a new mm object that wraps some normal memory from the process
 717  * context - user memory.
 718  *
 719  * We impose several restrictions upon the memory being mapped
 720  * into the GPU.
 721  * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
 722  * 2. It must be normal system memory, not a pointer into another map of IO
 723  *    space (e.g. it must not be a GTT mmapping of another object).
 724  * 3. We only allow a bo as large as we could in theory map into the GTT,
 725  *    that is we limit the size to the total size of the GTT.
 726  * 4. The bo is marked as being snoopable. The backing pages are left
 727  *    accessible directly by the CPU, but reads and writes by the GPU may
 728  *    incur the cost of a snoop (unless you have an LLC architecture).
 729  *
 730  * Synchronisation between multiple users and the GPU is left to userspace
 731  * through the normal set-domain-ioctl. The kernel will enforce that the
 732  * GPU relinquishes the VMA before it is returned back to the system
 733  * i.e. upon free(), munmap() or process termination. However, the userspace
 734  * malloc() library may not immediately relinquish the VMA after free() and
 735  * instead reuse it whilst the GPU is still reading and writing to the VMA.
 736  * Caveat emptor.
 737  *
 738  * Also note, that the object created here is not currently a "first class"
 739  * object, in that several ioctls are banned. These are the CPU access
 740  * ioctls: mmap(), pwrite and pread. In practice, you are expected to use
 741  * direct access via your pointer rather than use those ioctls. Another
 742  * restriction is that we do not allow userptr surfaces to be pinned to the
 743  * hardware and so we reject any attempt to create a framebuffer out of a
 744  * userptr.
 745  *
 746  * If you think this is a good interface to use to pass GPU memory between
 747  * drivers, please use dma-buf instead. In fact, wherever possible use
 748  * dma-buf instead.
 749  */
 750 int
 751 i915_gem_userptr_ioctl(struct drm_device *dev,
 752                        void *data,
 753                        struct drm_file *file)
 754 {
 755         static struct lock_class_key lock_class;
 756         struct drm_i915_private *dev_priv = to_i915(dev);
 757         struct drm_i915_gem_userptr *args = data;
 758         struct drm_i915_gem_object *obj;
 759         int ret;
 760         u32 handle;
 761
 762         if (!HAS_LLC(dev_priv) && !HAS_SNOOP(dev_priv)) {
 763                 /* We cannot support coherent userptr objects on hw without
 764                  * LLC and broken snooping.
 765                  */
 766                 return -ENODEV;
 767         }
 768
 769         if (args->flags & ~(I915_USERPTR_READ_ONLY |
 770                             I915_USERPTR_UNSYNCHRONIZED))
 771                 return -EINVAL;
 772
 773         /*
 774          * XXX: There is a prevalence of the assumption that we fit the
 775          * object's page count inside a 32bit _signed_ variable. Let's document
 776          * this and catch if we ever need to fix it. In the meantime, if you do
 777          * spot such a local variable, please consider fixing!
 778          *
 779          * Aside from our own locals (for which we have no excuse!):
 780          * - sg_table embeds unsigned int for num_pages
 781          * - get_user_pages*() mixed ints with longs
 782          */
 783
 784         if (args->user_size >> PAGE_SHIFT > INT_MAX)
 785                 return -E2BIG;
 786
 787         if (overflows_type(args->user_size, obj->base.size))
 788                 return -E2BIG;
 789
 790         if (!args->user_size)
 791                 return -EINVAL;
 792
 793         if (offset_in_page(args->user_ptr | args->user_size))
 794                 return -EINVAL;
 795
 796         if (!access_ok((char __user *)(unsigned long)args->user_ptr, args->user_size))
 797                 return -EFAULT;
 798
 799         if (args->flags & I915_USERPTR_READ_ONLY) {
 800                 /*
 801                  * On almost all of the older hw, we cannot tell the GPU that
 802                  * a page is readonly.
 803                  */
 804                 if (!dev_priv->gt.vm->has_read_only)
 805                         return -ENODEV;
 806         }
 807
 808         obj = i915_gem_object_alloc();
 809         if (obj == NULL)
 810                 return -ENOMEM;
 811
 812         drm_gem_private_object_init(dev, &obj->base, args->user_size);
 813         i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class);
 814         obj->read_domains = I915_GEM_DOMAIN_CPU;
 815         obj->write_domain = I915_GEM_DOMAIN_CPU;
 816         i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
 817
 818         obj->userptr.ptr = args->user_ptr;
 819         if (args->flags & I915_USERPTR_READ_ONLY)
 820                 i915_gem_object_set_readonly(obj);
 821
 822         /* And keep a pointer to the current->mm for resolving the user pages
 823          * at binding. This means that we need to hook into the mmu_notifier
 824          * in order to detect if the mmu is destroyed.
 825          */
 826         ret = i915_gem_userptr_init__mm_struct(obj);
 827         if (ret == 0)
 828                 ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
 829         if (ret == 0)
 830                 ret = drm_gem_handle_create(file, &obj->base, &handle);
 831
 832         /* drop reference from allocate - handle holds it now */
 833         i915_gem_object_put(obj);
 834         if (ret)
 835                 return ret;
 836
 837         args->handle = handle;
 838         return 0;
 839 }
 840
 841 int i915_gem_init_userptr(struct drm_i915_private *dev_priv)
 842 {
 843         spin_lock_init(&dev_priv->mm_lock);
 844         hash_init(dev_priv->mm_structs);
 845
 846         dev_priv->mm.userptr_wq =
 847                 alloc_workqueue("i915-userptr-acquire",
 848                                 WQ_HIGHPRI | WQ_UNBOUND,
 849                                 0);
 850         if (!dev_priv->mm.userptr_wq)
 851                 return -ENOMEM;
 852
 853         return 0;
 854 }
 855
 856 void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv)
 857 {
 858         destroy_workqueue(dev_priv->mm.userptr_wq);
 859 }