mm/vmalloc.c

   1 /*
   2  *  linux/mm/vmalloc.c
   3  *
   4  *  Copyright (C) 1993  Linus Torvalds
   5  *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
   6  *  SMP-safe vmalloc/vfree/ioremap, Tigran Aivazian <tigran@veritas.com>, May 2000
   7  *  Major rework to support vmap/vunmap, Christoph Hellwig, SGI, August 2002
   8  *  Numa awareness, Christoph Lameter, SGI, June 2005
   9  */
  10
  11 #include <linux/mm.h>
  12 #include <linux/module.h>
  13 #include <linux/highmem.h>
  14 #include <linux/slab.h>
  15 #include <linux/spinlock.h>
  16 #include <linux/interrupt.h>
  17
  18 #include <linux/vmalloc.h>
  19
  20 #include <asm/uaccess.h>
  21 #include <asm/tlbflush.h>
  22
  23
  24 DEFINE_RWLOCK(vmlist_lock);
  25 struct vm_struct *vmlist;
  26
  27 static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
  28                             int node);
  29
  30 static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
  31 {
  32         pte_t *pte;
  33
  34         pte = pte_offset_kernel(pmd, addr);
  35         do {
  36                 pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
  37                 WARN_ON(!pte_none(ptent) && !pte_present(ptent));
  38         } while (pte++, addr += PAGE_SIZE, addr != end);
  39 }
  40
  41 static inline void vunmap_pmd_range(pud_t *pud, unsigned long addr,
  42                                                 unsigned long end)
  43 {
  44         pmd_t *pmd;
  45         unsigned long next;
  46
  47         pmd = pmd_offset(pud, addr);
  48         do {
  49                 next = pmd_addr_end(addr, end);
  50                 if (pmd_none_or_clear_bad(pmd))
  51                         continue;
  52                 vunmap_pte_range(pmd, addr, next);
  53         } while (pmd++, addr = next, addr != end);
  54 }
  55
  56 static inline void vunmap_pud_range(pgd_t *pgd, unsigned long addr,
  57                                                 unsigned long end)
  58 {
  59         pud_t *pud;
  60         unsigned long next;
  61
  62         pud = pud_offset(pgd, addr);
  63         do {
  64                 next = pud_addr_end(addr, end);
  65                 if (pud_none_or_clear_bad(pud))
  66                         continue;
  67                 vunmap_pmd_range(pud, addr, next);
  68         } while (pud++, addr = next, addr != end);
  69 }
  70
  71 void unmap_vm_area(struct vm_struct *area)
  72 {
  73         pgd_t *pgd;
  74         unsigned long next;
  75         unsigned long addr = (unsigned long) area->addr;
  76         unsigned long end = addr + area->size;
  77
  78         BUG_ON(addr >= end);
  79         pgd = pgd_offset_k(addr);
  80         flush_cache_vunmap(addr, end);
  81         do {
  82                 next = pgd_addr_end(addr, end);
  83                 if (pgd_none_or_clear_bad(pgd))
  84                         continue;
  85                 vunmap_pud_range(pgd, addr, next);
  86         } while (pgd++, addr = next, addr != end);
  87         flush_tlb_kernel_range((unsigned long) area->addr, end);
  88 }
  89
  90 static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
  91                         unsigned long end, pgprot_t prot, struct page ***pages)
  92 {
  93         pte_t *pte;
  94
  95         pte = pte_alloc_kernel(pmd, addr);
  96         if (!pte)
  97                 return -ENOMEM;
  98         do {
  99                 struct page *page = **pages;
 100                 WARN_ON(!pte_none(*pte));
 101                 if (!page)
 102                         return -ENOMEM;
 103                 set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
 104                 (*pages)++;
 105         } while (pte++, addr += PAGE_SIZE, addr != end);
 106         return 0;
 107 }
 108
 109 static inline int vmap_pmd_range(pud_t *pud, unsigned long addr,
 110                         unsigned long end, pgprot_t prot, struct page ***pages)
 111 {
 112         pmd_t *pmd;
 113         unsigned long next;
 114
 115         pmd = pmd_alloc(&init_mm, pud, addr);
 116         if (!pmd)
 117                 return -ENOMEM;
 118         do {
 119                 next = pmd_addr_end(addr, end);
 120                 if (vmap_pte_range(pmd, addr, next, prot, pages))
 121                         return -ENOMEM;
 122         } while (pmd++, addr = next, addr != end);
 123         return 0;
 124 }
 125
 126 static inline int vmap_pud_range(pgd_t *pgd, unsigned long addr,
 127                         unsigned long end, pgprot_t prot, struct page ***pages)
 128 {
 129         pud_t *pud;
 130         unsigned long next;
 131
 132         pud = pud_alloc(&init_mm, pgd, addr);
 133         if (!pud)
 134                 return -ENOMEM;
 135         do {
 136                 next = pud_addr_end(addr, end);
 137                 if (vmap_pmd_range(pud, addr, next, prot, pages))
 138                         return -ENOMEM;
 139         } while (pud++, addr = next, addr != end);
 140         return 0;
 141 }
 142
 143 int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
 144 {
 145         pgd_t *pgd;
 146         unsigned long next;
 147         unsigned long addr = (unsigned long) area->addr;
 148         unsigned long end = addr + area->size - PAGE_SIZE;
 149         int err;
 150
 151         BUG_ON(addr >= end);
 152         pgd = pgd_offset_k(addr);
 153         do {
 154                 next = pgd_addr_end(addr, end);
 155                 err = vmap_pud_range(pgd, addr, next, prot, pages);
 156                 if (err)
 157                         break;
 158         } while (pgd++, addr = next, addr != end);
 159         flush_cache_vmap((unsigned long) area->addr, end);
 160         return err;
 161 }
 162
 163 static struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long flags,
 164                                             unsigned long start, unsigned long end,
 165                                             int node, gfp_t gfp_mask)
 166 {
 167         struct vm_struct **p, *tmp, *area;
 168         unsigned long align = 1;
 169         unsigned long addr;
 170
 171         BUG_ON(in_interrupt());
 172         if (flags & VM_IOREMAP) {
 173                 int bit = fls(size);
 174
 175                 if (bit > IOREMAP_MAX_ORDER)
 176                         bit = IOREMAP_MAX_ORDER;
 177                 else if (bit < PAGE_SHIFT)
 178                         bit = PAGE_SHIFT;
 179
 180                 align = 1ul << bit;
 181         }
 182         addr = ALIGN(start, align);
 183         size = PAGE_ALIGN(size);
 184
 185         area = kmalloc_node(sizeof(*area), gfp_mask & GFP_LEVEL_MASK, node);
 186         if (unlikely(!area))
 187                 return NULL;
 188
 189         if (unlikely(!size))
 190                 return NULL;
 191
 192         /*
 193          * We always allocate a guard page.
 194          */
 195         size += PAGE_SIZE;
 196
 197         write_lock(&vmlist_lock);
 198         for (p = &vmlist; (tmp = *p) != NULL ;p = &tmp->next) {
 199                 if ((unsigned long)tmp->addr < addr) {
 200                         if((unsigned long)tmp->addr + tmp->size >= addr)
 201                                 addr = ALIGN(tmp->size +
 202                                              (unsigned long)tmp->addr, align);
 203                         continue;
 204                 }
 205                 if ((size + addr) < addr)
 206                         goto out;
 207                 if (size + addr <= (unsigned long)tmp->addr)
 208                         goto found;
 209                 addr = ALIGN(tmp->size + (unsigned long)tmp->addr, align);
 210                 if (addr > end - size)
 211                         goto out;
 212         }
 213
 214 found:
 215         area->next = *p;
 216         *p = area;
 217
 218         area->flags = flags;
 219         area->addr = (void *)addr;
 220         area->size = size;
 221         area->pages = NULL;
 222         area->nr_pages = 0;
 223         area->phys_addr = 0;
 224         write_unlock(&vmlist_lock);
 225
 226         return area;
 227
 228 out:
 229         write_unlock(&vmlist_lock);
 230         kfree(area);
 231         if (printk_ratelimit())
 232                 printk(KERN_WARNING "allocation failed: out of vmalloc space - use vmalloc=<size> to increase size.\n");
 233         return NULL;
 234 }
 235
 236 struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
 237                                 unsigned long start, unsigned long end)
 238 {
 239         return __get_vm_area_node(size, flags, start, end, -1, GFP_KERNEL);
 240 }
 241
 242 /**
 243  *      get_vm_area  -  reserve a contingous kernel virtual area
 244  *      @size:          size of the area
 245  *      @flags:         %VM_IOREMAP for I/O mappings or VM_ALLOC
 246  *
 247  *      Search an area of @size in the kernel virtual mapping area,
 248  *      and reserved it for out purposes.  Returns the area descriptor
 249  *      on success or %NULL on failure.
 250  */
 251 struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
 252 {
 253         return __get_vm_area(size, flags, VMALLOC_START, VMALLOC_END);
 254 }
 255
 256 struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags,
 257                                    int node, gfp_t gfp_mask)
 258 {
 259         return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END, node,
 260                                   gfp_mask);
 261 }
 262
 263 /* Caller must hold vmlist_lock */
 264 static struct vm_struct *__find_vm_area(void *addr)
 265 {
 266         struct vm_struct *tmp;
 267
 268         for (tmp = vmlist; tmp != NULL; tmp = tmp->next) {
 269                  if (tmp->addr == addr)
 270                         break;
 271         }
 272
 273         return tmp;
 274 }
 275
 276 /* Caller must hold vmlist_lock */
 277 static struct vm_struct *__remove_vm_area(void *addr)
 278 {
 279         struct vm_struct **p, *tmp;
 280
 281         for (p = &vmlist ; (tmp = *p) != NULL ;p = &tmp->next) {
 282                  if (tmp->addr == addr)
 283                          goto found;
 284         }
 285         return NULL;
 286
 287 found:
 288         unmap_vm_area(tmp);
 289         *p = tmp->next;
 290
 291         /*
 292          * Remove the guard page.
 293          */
 294         tmp->size -= PAGE_SIZE;
 295         return tmp;
 296 }
 297
 298 /**
 299  *      remove_vm_area  -  find and remove a contingous kernel virtual area
 300  *      @addr:          base address
 301  *
 302  *      Search for the kernel VM area starting at @addr, and remove it.
 303  *      This function returns the found VM area, but using it is NOT safe
 304  *      on SMP machines, except for its size or flags.
 305  */
 306 struct vm_struct *remove_vm_area(void *addr)
 307 {
 308         struct vm_struct *v;
 309         write_lock(&vmlist_lock);
 310         v = __remove_vm_area(addr);
 311         write_unlock(&vmlist_lock);
 312         return v;
 313 }
 314
 315 void __vunmap(void *addr, int deallocate_pages)
 316 {
 317         struct vm_struct *area;
 318
 319         if (!addr)
 320                 return;
 321
 322         if ((PAGE_SIZE-1) & (unsigned long)addr) {
 323                 printk(KERN_ERR "Trying to vfree() bad address (%p)\n", addr);
 324                 WARN_ON(1);
 325                 return;
 326         }
 327
 328         area = remove_vm_area(addr);
 329         if (unlikely(!area)) {
 330                 printk(KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
 331                                 addr);
 332                 WARN_ON(1);
 333                 return;
 334         }
 335
 336         debug_check_no_locks_freed(addr, area->size);
 337
 338         if (deallocate_pages) {
 339                 int i;
 340
 341                 for (i = 0; i < area->nr_pages; i++) {
 342                         BUG_ON(!area->pages[i]);
 343                         __free_page(area->pages[i]);
 344                 }
 345
 346                 if (area->flags & VM_VPAGES)
 347                         vfree(area->pages);
 348                 else
 349                         kfree(area->pages);
 350         }
 351
 352         kfree(area);
 353         return;
 354 }
 355
 356 /**
 357  *      vfree  -  release memory allocated by vmalloc()
 358  *      @addr:          memory base address
 359  *
 360  *      Free the virtually contiguous memory area starting at @addr, as
 361  *      obtained from vmalloc(), vmalloc_32() or __vmalloc(). If @addr is
 362  *      NULL, no operation is performed.
 363  *
 364  *      Must not be called in interrupt context.
 365  */
 366 void vfree(void *addr)
 367 {
 368         BUG_ON(in_interrupt());
 369         __vunmap(addr, 1);
 370 }
 371 EXPORT_SYMBOL(vfree);
 372
 373 /**
 374  *      vunmap  -  release virtual mapping obtained by vmap()
 375  *      @addr:          memory base address
 376  *
 377  *      Free the virtually contiguous memory area starting at @addr,
 378  *      which was created from the page array passed to vmap().
 379  *
 380  *      Must not be called in interrupt context.
 381  */
 382 void vunmap(void *addr)
 383 {
 384         BUG_ON(in_interrupt());
 385         __vunmap(addr, 0);
 386 }
 387 EXPORT_SYMBOL(vunmap);
 388
 389 /**
 390  *      vmap  -  map an array of pages into virtually contiguous space
 391  *      @pages:         array of page pointers
 392  *      @count:         number of pages to map
 393  *      @flags:         vm_area->flags
 394  *      @prot:          page protection for the mapping
 395  *
 396  *      Maps @count pages from @pages into contiguous kernel virtual
 397  *      space.
 398  */
 399 void *vmap(struct page **pages, unsigned int count,
 400                 unsigned long flags, pgprot_t prot)
 401 {
 402         struct vm_struct *area;
 403
 404         if (count > num_physpages)
 405                 return NULL;
 406
 407         area = get_vm_area((count << PAGE_SHIFT), flags);
 408         if (!area)
 409                 return NULL;
 410         if (map_vm_area(area, prot, &pages)) {
 411                 vunmap(area->addr);
 412                 return NULL;
 413         }
 414
 415         return area->addr;
 416 }
 417 EXPORT_SYMBOL(vmap);
 418
 419 void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 420                                 pgprot_t prot, int node)
 421 {
 422         struct page **pages;
 423         unsigned int nr_pages, array_size, i;
 424
 425         nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
 426         array_size = (nr_pages * sizeof(struct page *));
 427
 428         area->nr_pages = nr_pages;
 429         /* Please note that the recursion is strictly bounded. */
 430         if (array_size > PAGE_SIZE) {
 431                 pages = __vmalloc_node(array_size, gfp_mask, PAGE_KERNEL, node);
 432                 area->flags |= VM_VPAGES;
 433         } else {
 434                 pages = kmalloc_node(array_size,
 435                                 (gfp_mask & ~(__GFP_HIGHMEM | __GFP_ZERO)),
 436                                 node);
 437         }
 438         area->pages = pages;
 439         if (!area->pages) {
 440                 remove_vm_area(area->addr);
 441                 kfree(area);
 442                 return NULL;
 443         }
 444         memset(area->pages, 0, array_size);
 445
 446         for (i = 0; i < area->nr_pages; i++) {
 447                 if (node < 0)
 448                         area->pages[i] = alloc_page(gfp_mask);
 449                 else
 450                         area->pages[i] = alloc_pages_node(node, gfp_mask, 0);
 451                 if (unlikely(!area->pages[i])) {
 452                         /* Successfully allocated i pages, free them in __vunmap() */
 453                         area->nr_pages = i;
 454                         goto fail;
 455                 }
 456         }
 457
 458         if (map_vm_area(area, prot, &pages))
 459                 goto fail;
 460         return area->addr;
 461
 462 fail:
 463         vfree(area->addr);
 464         return NULL;
 465 }
 466
 467 void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot)
 468 {
 469         return __vmalloc_area_node(area, gfp_mask, prot, -1);
 470 }
 471
 472 /**
 473  *      __vmalloc_node  -  allocate virtually contiguous memory
 474  *      @size:          allocation size
 475  *      @gfp_mask:      flags for the page level allocator
 476  *      @prot:          protection mask for the allocated pages
 477  *      @node:          node to use for allocation or -1
 478  *
 479  *      Allocate enough pages to cover @size from the page level
 480  *      allocator with @gfp_mask flags.  Map them into contiguous
 481  *      kernel virtual space, using a pagetable protection of @prot.
 482  */
 483 static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
 484                             int node)
 485 {
 486         struct vm_struct *area;
 487
 488         size = PAGE_ALIGN(size);
 489         if (!size || (size >> PAGE_SHIFT) > num_physpages)
 490                 return NULL;
 491
 492         area = get_vm_area_node(size, VM_ALLOC, node, gfp_mask);
 493         if (!area)
 494                 return NULL;
 495
 496         return __vmalloc_area_node(area, gfp_mask, prot, node);
 497 }
 498
 499 void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
 500 {
 501         return __vmalloc_node(size, gfp_mask, prot, -1);
 502 }
 503 EXPORT_SYMBOL(__vmalloc);
 504
 505 /**
 506  *      vmalloc  -  allocate virtually contiguous memory
 507  *      @size:          allocation size
 508  *      Allocate enough pages to cover @size from the page level
 509  *      allocator and map them into contiguous kernel virtual space.
 510  *
 511  *      For tight control over page level allocator and protection flags
 512  *      use __vmalloc() instead.
 513  */
 514 void *vmalloc(unsigned long size)
 515 {
 516         return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
 517 }
 518 EXPORT_SYMBOL(vmalloc);
 519
 520 /**
 521  * vmalloc_user - allocate zeroed virtually contiguous memory for userspace
 522  * @size: allocation size
 523  *
 524  * The resulting memory area is zeroed so it can be mapped to userspace
 525  * without leaking data.
 526  */
 527 void *vmalloc_user(unsigned long size)
 528 {
 529         struct vm_struct *area;
 530         void *ret;
 531
 532         ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL);
 533         if (ret) {
 534                 write_lock(&vmlist_lock);
 535                 area = __find_vm_area(ret);
 536                 area->flags |= VM_USERMAP;
 537                 write_unlock(&vmlist_lock);
 538         }
 539         return ret;
 540 }
 541 EXPORT_SYMBOL(vmalloc_user);
 542
 543 /**
 544  *      vmalloc_node  -  allocate memory on a specific node
 545  *      @size:          allocation size
 546  *      @node:          numa node
 547  *
 548  *      Allocate enough pages to cover @size from the page level
 549  *      allocator and map them into contiguous kernel virtual space.
 550  *
 551  *      For tight control over page level allocator and protection flags
 552  *      use __vmalloc() instead.
 553  */
 554 void *vmalloc_node(unsigned long size, int node)
 555 {
 556         return __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL, node);
 557 }
 558 EXPORT_SYMBOL(vmalloc_node);
 559
 560 #ifndef PAGE_KERNEL_EXEC
 561 # define PAGE_KERNEL_EXEC PAGE_KERNEL
 562 #endif
 563
 564 /**
 565  *      vmalloc_exec  -  allocate virtually contiguous, executable memory
 566  *      @size:          allocation size
 567  *
 568  *      Kernel-internal function to allocate enough pages to cover @size
 569  *      the page level allocator and map them into contiguous and
 570  *      executable kernel virtual space.
 571  *
 572  *      For tight control over page level allocator and protection flags
 573  *      use __vmalloc() instead.
 574  */
 575
 576 void *vmalloc_exec(unsigned long size)
 577 {
 578         return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC);
 579 }
 580
 581 /**
 582  *      vmalloc_32  -  allocate virtually contiguous memory (32bit addressable)
 583  *      @size:          allocation size
 584  *
 585  *      Allocate enough 32bit PA addressable pages to cover @size from the
 586  *      page level allocator and map them into contiguous kernel virtual space.
 587  */
 588 void *vmalloc_32(unsigned long size)
 589 {
 590         return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL);
 591 }
 592 EXPORT_SYMBOL(vmalloc_32);
 593
 594 /**
 595  * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory
 596  *      @size:          allocation size
 597  *
 598  * The resulting memory area is 32bit addressable and zeroed so it can be
 599  * mapped to userspace without leaking data.
 600  */
 601 void *vmalloc_32_user(unsigned long size)
 602 {
 603         struct vm_struct *area;
 604         void *ret;
 605
 606         ret = __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
 607         if (ret) {
 608                 write_lock(&vmlist_lock);
 609                 area = __find_vm_area(ret);
 610                 area->flags |= VM_USERMAP;
 611                 write_unlock(&vmlist_lock);
 612         }
 613         return ret;
 614 }
 615 EXPORT_SYMBOL(vmalloc_32_user);
 616
 617 long vread(char *buf, char *addr, unsigned long count)
 618 {
 619         struct vm_struct *tmp;
 620         char *vaddr, *buf_start = buf;
 621         unsigned long n;
 622
 623         /* Don't allow overflow */
 624         if ((unsigned long) addr + count < count)
 625                 count = -(unsigned long) addr;
 626
 627         read_lock(&vmlist_lock);
 628         for (tmp = vmlist; tmp; tmp = tmp->next) {
 629                 vaddr = (char *) tmp->addr;
 630                 if (addr >= vaddr + tmp->size - PAGE_SIZE)
 631                         continue;
 632                 while (addr < vaddr) {
 633                         if (count == 0)
 634                                 goto finished;
 635                         *buf = '\0';
 636                         buf++;
 637                         addr++;
 638                         count--;
 639                 }
 640                 n = vaddr + tmp->size - PAGE_SIZE - addr;
 641                 do {
 642                         if (count == 0)
 643                                 goto finished;
 644                         *buf = *addr;
 645                         buf++;
 646                         addr++;
 647                         count--;
 648                 } while (--n > 0);
 649         }
 650 finished:
 651         read_unlock(&vmlist_lock);
 652         return buf - buf_start;
 653 }
 654
 655 long vwrite(char *buf, char *addr, unsigned long count)
 656 {
 657         struct vm_struct *tmp;
 658         char *vaddr, *buf_start = buf;
 659         unsigned long n;
 660
 661         /* Don't allow overflow */
 662         if ((unsigned long) addr + count < count)
 663                 count = -(unsigned long) addr;
 664
 665         read_lock(&vmlist_lock);
 666         for (tmp = vmlist; tmp; tmp = tmp->next) {
 667                 vaddr = (char *) tmp->addr;
 668                 if (addr >= vaddr + tmp->size - PAGE_SIZE)
 669                         continue;
 670                 while (addr < vaddr) {
 671                         if (count == 0)
 672                                 goto finished;
 673                         buf++;
 674                         addr++;
 675                         count--;
 676                 }
 677                 n = vaddr + tmp->size - PAGE_SIZE - addr;
 678                 do {
 679                         if (count == 0)
 680                                 goto finished;
 681                         *addr = *buf;
 682                         buf++;
 683                         addr++;
 684                         count--;
 685                 } while (--n > 0);
 686         }
 687 finished:
 688         read_unlock(&vmlist_lock);
 689         return buf - buf_start;
 690 }
 691
 692 /**
 693  *      remap_vmalloc_range  -  map vmalloc pages to userspace
 694  *      @vma:           vma to cover (map full range of vma)
 695  *      @addr:          vmalloc memory
 696  *      @pgoff:         number of pages into addr before first page to map
 697  *      @returns:       0 for success, -Exxx on failure
 698  *
 699  *      This function checks that addr is a valid vmalloc'ed area, and
 700  *      that it is big enough to cover the vma. Will return failure if
 701  *      that criteria isn't met.
 702  *
 703  *      Similar to remap_pfn_range (see mm/memory.c)
 704  */
 705 int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
 706                                                 unsigned long pgoff)
 707 {
 708         struct vm_struct *area;
 709         unsigned long uaddr = vma->vm_start;
 710         unsigned long usize = vma->vm_end - vma->vm_start;
 711         int ret;
 712
 713         if ((PAGE_SIZE-1) & (unsigned long)addr)
 714                 return -EINVAL;
 715
 716         read_lock(&vmlist_lock);
 717         area = __find_vm_area(addr);
 718         if (!area)
 719                 goto out_einval_locked;
 720
 721         if (!(area->flags & VM_USERMAP))
 722                 goto out_einval_locked;
 723
 724         if (usize + (pgoff << PAGE_SHIFT) > area->size - PAGE_SIZE)
 725                 goto out_einval_locked;
 726         read_unlock(&vmlist_lock);
 727
 728         addr += pgoff << PAGE_SHIFT;
 729         do {
 730                 struct page *page = vmalloc_to_page(addr);
 731                 ret = vm_insert_page(vma, uaddr, page);
 732                 if (ret)
 733                         return ret;
 734
 735                 uaddr += PAGE_SIZE;
 736                 addr += PAGE_SIZE;
 737                 usize -= PAGE_SIZE;
 738         } while (usize > 0);
 739
 740         /* Prevent "things" like memory migration? VM_flags need a cleanup... */
 741         vma->vm_flags |= VM_RESERVED;
 742
 743         return ret;
 744
 745 out_einval_locked:
 746         read_unlock(&vmlist_lock);
 747         return -EINVAL;
 748 }
 749 EXPORT_SYMBOL(remap_vmalloc_range);
 750