arch/sparc/mm/gup.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Lockless get_user_pages_fast for sparc, cribbed from powerpc
   4  *
   5  * Copyright (C) 2008 Nick Piggin
   6  * Copyright (C) 2008 Novell Inc.
   7  */
   8
   9 #include <linux/sched.h>
  10 #include <linux/mm.h>
  11 #include <linux/vmstat.h>
  12 #include <linux/pagemap.h>
  13 #include <linux/rwsem.h>
  14 #include <asm/pgtable.h>
  15
  16 /*
  17  * The performance critical leaf functions are made noinline otherwise gcc
  18  * inlines everything into a single function which results in too much
  19  * register pressure.
  20  */
  21 static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
  22                 unsigned long end, int write, struct page **pages, int *nr)
  23 {
  24         unsigned long mask, result;
  25         pte_t *ptep;
  26
  27         if (tlb_type == hypervisor) {
  28                 result = _PAGE_PRESENT_4V|_PAGE_P_4V;
  29                 if (write)
  30                         result |= _PAGE_WRITE_4V;
  31         } else {
  32                 result = _PAGE_PRESENT_4U|_PAGE_P_4U;
  33                 if (write)
  34                         result |= _PAGE_WRITE_4U;
  35         }
  36         mask = result | _PAGE_SPECIAL;
  37
  38         ptep = pte_offset_kernel(&pmd, addr);
  39         do {
  40                 struct page *page, *head;
  41                 pte_t pte = *ptep;
  42
  43                 if ((pte_val(pte) & mask) != result)
  44                         return 0;
  45                 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
  46
  47                 /* The hugepage case is simplified on sparc64 because
  48                  * we encode the sub-page pfn offsets into the
  49                  * hugepage PTEs.  We could optimize this in the future
  50                  * use page_cache_add_speculative() for the hugepage case.
  51                  */
  52                 page = pte_page(pte);
  53                 head = compound_head(page);
  54                 if (!page_cache_get_speculative(head))
  55                         return 0;
  56                 if (unlikely(pte_val(pte) != pte_val(*ptep))) {
  57                         put_page(head);
  58                         return 0;
  59                 }
  60
  61                 pages[*nr] = page;
  62                 (*nr)++;
  63         } while (ptep++, addr += PAGE_SIZE, addr != end);
  64
  65         return 1;
  66 }
  67
  68 static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
  69                         unsigned long end, int write, struct page **pages,
  70                         int *nr)
  71 {
  72         struct page *head, *page;
  73         int refs;
  74
  75         if (!(pmd_val(pmd) & _PAGE_VALID))
  76                 return 0;
  77
  78         if (write && !pmd_write(pmd))
  79                 return 0;
  80
  81         refs = 0;
  82         page = pmd_page(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
  83         head = compound_head(page);
  84         do {
  85                 VM_BUG_ON(compound_head(page) != head);
  86                 pages[*nr] = page;
  87                 (*nr)++;
  88                 page++;
  89                 refs++;
  90         } while (addr += PAGE_SIZE, addr != end);
  91
  92         if (!page_cache_add_speculative(head, refs)) {
  93                 *nr -= refs;
  94                 return 0;
  95         }
  96
  97         if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) {
  98                 *nr -= refs;
  99                 while (refs--)
 100                         put_page(head);
 101                 return 0;
 102         }
 103
 104         return 1;
 105 }
 106
 107 static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
 108                         unsigned long end, int write, struct page **pages,
 109                         int *nr)
 110 {
 111         struct page *head, *page;
 112         int refs;
 113
 114         if (!(pud_val(pud) & _PAGE_VALID))
 115                 return 0;
 116
 117         if (write && !pud_write(pud))
 118                 return 0;
 119
 120         refs = 0;
 121         page = pud_page(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
 122         head = compound_head(page);
 123         do {
 124                 VM_BUG_ON(compound_head(page) != head);
 125                 pages[*nr] = page;
 126                 (*nr)++;
 127                 page++;
 128                 refs++;
 129         } while (addr += PAGE_SIZE, addr != end);
 130
 131         if (!page_cache_add_speculative(head, refs)) {
 132                 *nr -= refs;
 133                 return 0;
 134         }
 135
 136         if (unlikely(pud_val(pud) != pud_val(*pudp))) {
 137                 *nr -= refs;
 138                 while (refs--)
 139                         put_page(head);
 140                 return 0;
 141         }
 142
 143         return 1;
 144 }
 145
 146 static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
 147                 int write, struct page **pages, int *nr)
 148 {
 149         unsigned long next;
 150         pmd_t *pmdp;
 151
 152         pmdp = pmd_offset(&pud, addr);
 153         do {
 154                 pmd_t pmd = *pmdp;
 155
 156                 next = pmd_addr_end(addr, end);
 157                 if (pmd_none(pmd))
 158                         return 0;
 159                 if (unlikely(pmd_large(pmd))) {
 160                         if (!gup_huge_pmd(pmdp, pmd, addr, next,
 161                                           write, pages, nr))
 162                                 return 0;
 163                 } else if (!gup_pte_range(pmd, addr, next, write,
 164                                           pages, nr))
 165                         return 0;
 166         } while (pmdp++, addr = next, addr != end);
 167
 168         return 1;
 169 }
 170
 171 static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
 172                 int write, struct page **pages, int *nr)
 173 {
 174         unsigned long next;
 175         pud_t *pudp;
 176
 177         pudp = pud_offset(&pgd, addr);
 178         do {
 179                 pud_t pud = *pudp;
 180
 181                 next = pud_addr_end(addr, end);
 182                 if (pud_none(pud))
 183                         return 0;
 184                 if (unlikely(pud_large(pud))) {
 185                         if (!gup_huge_pud(pudp, pud, addr, next,
 186                                           write, pages, nr))
 187                                 return 0;
 188                 } else if (!gup_pmd_range(pud, addr, next, write, pages, nr))
 189                         return 0;
 190         } while (pudp++, addr = next, addr != end);
 191
 192         return 1;
 193 }
 194
 195 int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
 196                           struct page **pages)
 197 {
 198         struct mm_struct *mm = current->mm;
 199         unsigned long addr, len, end;
 200         unsigned long next, flags;
 201         pgd_t *pgdp;
 202         int nr = 0;
 203
 204         start &= PAGE_MASK;
 205         addr = start;
 206         len = (unsigned long) nr_pages << PAGE_SHIFT;
 207         end = start + len;
 208
 209         local_irq_save(flags);
 210         pgdp = pgd_offset(mm, addr);
 211         do {
 212                 pgd_t pgd = *pgdp;
 213
 214                 next = pgd_addr_end(addr, end);
 215                 if (pgd_none(pgd))
 216                         break;
 217                 if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
 218                         break;
 219         } while (pgdp++, addr = next, addr != end);
 220         local_irq_restore(flags);
 221
 222         return nr;
 223 }
 224
 225 int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 226                         struct page **pages)
 227 {
 228         struct mm_struct *mm = current->mm;
 229         unsigned long addr, len, end;
 230         unsigned long next;
 231         pgd_t *pgdp;
 232         int nr = 0;
 233
 234         start &= PAGE_MASK;
 235         addr = start;
 236         len = (unsigned long) nr_pages << PAGE_SHIFT;
 237         end = start + len;
 238
 239         /*
 240          * XXX: batch / limit 'nr', to avoid large irq off latency
 241          * needs some instrumenting to determine the common sizes used by
 242          * important workloads (eg. DB2), and whether limiting the batch size
 243          * will decrease performance.
 244          *
 245          * It seems like we're in the clear for the moment. Direct-IO is
 246          * the main guy that batches up lots of get_user_pages, and even
 247          * they are limited to 64-at-a-time which is not so many.
 248          */
 249         /*
 250          * This doesn't prevent pagetable teardown, but does prevent
 251          * the pagetables from being freed on sparc.
 252          *
 253          * So long as we atomically load page table pointers versus teardown,
 254          * we can follow the address down to the the page and take a ref on it.
 255          */
 256         local_irq_disable();
 257
 258         pgdp = pgd_offset(mm, addr);
 259         do {
 260                 pgd_t pgd = *pgdp;
 261
 262                 next = pgd_addr_end(addr, end);
 263                 if (pgd_none(pgd))
 264                         goto slow;
 265                 if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
 266                         goto slow;
 267         } while (pgdp++, addr = next, addr != end);
 268
 269         local_irq_enable();
 270
 271         VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
 272         return nr;
 273
 274         {
 275                 int ret;
 276
 277 slow:
 278                 local_irq_enable();
 279
 280                 /* Try to get the remaining pages with get_user_pages */
 281                 start += nr << PAGE_SHIFT;
 282                 pages += nr;
 283
 284                 ret = get_user_pages_unlocked(start,
 285                         (end - start) >> PAGE_SHIFT, pages,
 286                         write ? FOLL_WRITE : 0);
 287
 288                 /* Have to be a bit careful with return values */
 289                 if (nr > 0) {
 290                         if (ret < 0)
 291                                 ret = nr;
 292                         else
 293                                 ret += nr;
 294                 }
 295
 296                 return ret;
 297         }
 298 }