arch/nds32/kernel/dma.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 // Copyright (C) 2005-2017 Andes Technology Corporation
   3
   4 #include <linux/types.h>
   5 #include <linux/mm.h>
   6 #include <linux/string.h>
   7 #include <linux/dma-noncoherent.h>
   8 #include <linux/io.h>
   9 #include <linux/cache.h>
  10 #include <linux/highmem.h>
  11 #include <linux/slab.h>
  12 #include <asm/cacheflush.h>
  13 #include <asm/tlbflush.h>
  14 #include <asm/proc-fns.h>
  15
  16 /*
  17  * This is the page table (2MB) covering uncached, DMA consistent allocations
  18  */
  19 static pte_t *consistent_pte;
  20 static DEFINE_RAW_SPINLOCK(consistent_lock);
  21
  22 /*
  23  * VM region handling support.
  24  *
  25  * This should become something generic, handling VM region allocations for
  26  * vmalloc and similar (ioremap, module space, etc).
  27  *
  28  * I envisage vmalloc()'s supporting vm_struct becoming:
  29  *
  30  *  struct vm_struct {
  31  *    struct vm_region  region;
  32  *    unsigned long     flags;
  33  *    struct page       **pages;
  34  *    unsigned int      nr_pages;
  35  *    unsigned long     phys_addr;
  36  *  };
  37  *
  38  * get_vm_area() would then call vm_region_alloc with an appropriate
  39  * struct vm_region head (eg):
  40  *
  41  *  struct vm_region vmalloc_head = {
  42  *      .vm_list        = LIST_HEAD_INIT(vmalloc_head.vm_list),
  43  *      .vm_start       = VMALLOC_START,
  44  *      .vm_end         = VMALLOC_END,
  45  *  };
  46  *
  47  * However, vmalloc_head.vm_start is variable (typically, it is dependent on
  48  * the amount of RAM found at boot time.)  I would imagine that get_vm_area()
  49  * would have to initialise this each time prior to calling vm_region_alloc().
  50  */
  51 struct arch_vm_region {
  52         struct list_head vm_list;
  53         unsigned long vm_start;
  54         unsigned long vm_end;
  55         struct page *vm_pages;
  56 };
  57
  58 static struct arch_vm_region consistent_head = {
  59         .vm_list = LIST_HEAD_INIT(consistent_head.vm_list),
  60         .vm_start = CONSISTENT_BASE,
  61         .vm_end = CONSISTENT_END,
  62 };
  63
  64 static struct arch_vm_region *vm_region_alloc(struct arch_vm_region *head,
  65                                               size_t size, int gfp)
  66 {
  67         unsigned long addr = head->vm_start, end = head->vm_end - size;
  68         unsigned long flags;
  69         struct arch_vm_region *c, *new;
  70
  71         new = kmalloc(sizeof(struct arch_vm_region), gfp);
  72         if (!new)
  73                 goto out;
  74
  75         raw_spin_lock_irqsave(&consistent_lock, flags);
  76
  77         list_for_each_entry(c, &head->vm_list, vm_list) {
  78                 if ((addr + size) < addr)
  79                         goto nospc;
  80                 if ((addr + size) <= c->vm_start)
  81                         goto found;
  82                 addr = c->vm_end;
  83                 if (addr > end)
  84                         goto nospc;
  85         }
  86
  87 found:
  88         /*
  89          * Insert this entry _before_ the one we found.
  90          */
  91         list_add_tail(&new->vm_list, &c->vm_list);
  92         new->vm_start = addr;
  93         new->vm_end = addr + size;
  94
  95         raw_spin_unlock_irqrestore(&consistent_lock, flags);
  96         return new;
  97
  98 nospc:
  99         raw_spin_unlock_irqrestore(&consistent_lock, flags);
 100         kfree(new);
 101 out:
 102         return NULL;
 103 }
 104
 105 static struct arch_vm_region *vm_region_find(struct arch_vm_region *head,
 106                                              unsigned long addr)
 107 {
 108         struct arch_vm_region *c;
 109
 110         list_for_each_entry(c, &head->vm_list, vm_list) {
 111                 if (c->vm_start == addr)
 112                         goto out;
 113         }
 114         c = NULL;
 115 out:
 116         return c;
 117 }
 118
 119 void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
 120                 gfp_t gfp, unsigned long attrs)
 121 {
 122         struct page *page;
 123         struct arch_vm_region *c;
 124         unsigned long order;
 125         u64 mask = ~0ULL, limit;
 126         pgprot_t prot = pgprot_noncached(PAGE_KERNEL);
 127
 128         if (!consistent_pte) {
 129                 pr_err("%s: not initialized\n", __func__);
 130                 dump_stack();
 131                 return NULL;
 132         }
 133
 134         if (dev) {
 135                 mask = dev->coherent_dma_mask;
 136
 137                 /*
 138                  * Sanity check the DMA mask - it must be non-zero, and
 139                  * must be able to be satisfied by a DMA allocation.
 140                  */
 141                 if (mask == 0) {
 142                         dev_warn(dev, "coherent DMA mask is unset\n");
 143                         goto no_page;
 144                 }
 145
 146         }
 147
 148         /*
 149          * Sanity check the allocation size.
 150          */
 151         size = PAGE_ALIGN(size);
 152         limit = (mask + 1) & ~mask;
 153         if ((limit && size >= limit) ||
 154             size >= (CONSISTENT_END - CONSISTENT_BASE)) {
 155                 pr_warn("coherent allocation too big "
 156                         "(requested %#x mask %#llx)\n", size, mask);
 157                 goto no_page;
 158         }
 159
 160         order = get_order(size);
 161
 162         if (mask != 0xffffffff)
 163                 gfp |= GFP_DMA;
 164
 165         page = alloc_pages(gfp, order);
 166         if (!page)
 167                 goto no_page;
 168
 169         /*
 170          * Invalidate any data that might be lurking in the
 171          * kernel direct-mapped region for device DMA.
 172          */
 173         {
 174                 unsigned long kaddr = (unsigned long)page_address(page);
 175                 memset(page_address(page), 0, size);
 176                 cpu_dma_wbinval_range(kaddr, kaddr + size);
 177         }
 178
 179         /*
 180          * Allocate a virtual address in the consistent mapping region.
 181          */
 182         c = vm_region_alloc(&consistent_head, size,
 183                             gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
 184         if (c) {
 185                 pte_t *pte = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
 186                 struct page *end = page + (1 << order);
 187
 188                 c->vm_pages = page;
 189
 190                 /*
 191                  * Set the "dma handle"
 192                  */
 193                 *handle = page_to_phys(page);
 194
 195                 do {
 196                         BUG_ON(!pte_none(*pte));
 197
 198                         /*
 199                          * x86 does not mark the pages reserved...
 200                          */
 201                         SetPageReserved(page);
 202                         set_pte(pte, mk_pte(page, prot));
 203                         page++;
 204                         pte++;
 205                 } while (size -= PAGE_SIZE);
 206
 207                 /*
 208                  * Free the otherwise unused pages.
 209                  */
 210                 while (page < end) {
 211                         __free_page(page);
 212                         page++;
 213                 }
 214
 215                 return (void *)c->vm_start;
 216         }
 217
 218         if (page)
 219                 __free_pages(page, order);
 220 no_page:
 221         *handle = ~0;
 222         return NULL;
 223 }
 224
 225 void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
 226                 dma_addr_t handle, unsigned long attrs)
 227 {
 228         struct arch_vm_region *c;
 229         unsigned long flags, addr;
 230         pte_t *ptep;
 231
 232         size = PAGE_ALIGN(size);
 233
 234         raw_spin_lock_irqsave(&consistent_lock, flags);
 235
 236         c = vm_region_find(&consistent_head, (unsigned long)cpu_addr);
 237         if (!c)
 238                 goto no_area;
 239
 240         if ((c->vm_end - c->vm_start) != size) {
 241                 pr_err("%s: freeing wrong coherent size (%ld != %d)\n",
 242                        __func__, c->vm_end - c->vm_start, size);
 243                 dump_stack();
 244                 size = c->vm_end - c->vm_start;
 245         }
 246
 247         ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
 248         addr = c->vm_start;
 249         do {
 250                 pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep);
 251                 unsigned long pfn;
 252
 253                 ptep++;
 254                 addr += PAGE_SIZE;
 255
 256                 if (!pte_none(pte) && pte_present(pte)) {
 257                         pfn = pte_pfn(pte);
 258
 259                         if (pfn_valid(pfn)) {
 260                                 struct page *page = pfn_to_page(pfn);
 261
 262                                 /*
 263                                  * x86 does not mark the pages reserved...
 264                                  */
 265                                 ClearPageReserved(page);
 266
 267                                 __free_page(page);
 268                                 continue;
 269                         }
 270                 }
 271
 272                 pr_crit("%s: bad page in kernel page table\n", __func__);
 273         } while (size -= PAGE_SIZE);
 274
 275         flush_tlb_kernel_range(c->vm_start, c->vm_end);
 276
 277         list_del(&c->vm_list);
 278
 279         raw_spin_unlock_irqrestore(&consistent_lock, flags);
 280
 281         kfree(c);
 282         return;
 283
 284 no_area:
 285         raw_spin_unlock_irqrestore(&consistent_lock, flags);
 286         pr_err("%s: trying to free invalid coherent area: %p\n",
 287                __func__, cpu_addr);
 288         dump_stack();
 289 }
 290
 291 /*
 292  * Initialise the consistent memory allocation.
 293  */
 294 static int __init consistent_init(void)
 295 {
 296         pgd_t *pgd;
 297         pmd_t *pmd;
 298         pte_t *pte;
 299         int ret = 0;
 300
 301         do {
 302                 pgd = pgd_offset(&init_mm, CONSISTENT_BASE);
 303                 pmd = pmd_alloc(&init_mm, pgd, CONSISTENT_BASE);
 304                 if (!pmd) {
 305                         pr_err("%s: no pmd tables\n", __func__);
 306                         ret = -ENOMEM;
 307                         break;
 308                 }
 309                 /* The first level mapping may be created in somewhere.
 310                  * It's not necessary to warn here. */
 311                 /* WARN_ON(!pmd_none(*pmd)); */
 312
 313                 pte = pte_alloc_kernel(pmd, CONSISTENT_BASE);
 314                 if (!pte) {
 315                         ret = -ENOMEM;
 316                         break;
 317                 }
 318
 319                 consistent_pte = pte;
 320         } while (0);
 321
 322         return ret;
 323 }
 324
 325 core_initcall(consistent_init);
 326
 327 static inline void cache_op(phys_addr_t paddr, size_t size,
 328                 void (*fn)(unsigned long start, unsigned long end))
 329 {
 330         struct page *page = pfn_to_page(paddr >> PAGE_SHIFT);
 331         unsigned offset = paddr & ~PAGE_MASK;
 332         size_t left = size;
 333         unsigned long start;
 334
 335         do {
 336                 size_t len = left;
 337
 338                 if (PageHighMem(page)) {
 339                         void *addr;
 340
 341                         if (offset + len > PAGE_SIZE) {
 342                                 if (offset >= PAGE_SIZE) {
 343                                         page += offset >> PAGE_SHIFT;
 344                                         offset &= ~PAGE_MASK;
 345                                 }
 346                                 len = PAGE_SIZE - offset;
 347                         }
 348
 349                         addr = kmap_atomic(page);
 350                         start = (unsigned long)(addr + offset);
 351                         fn(start, start + len);
 352                         kunmap_atomic(addr);
 353                 } else {
 354                         start = (unsigned long)phys_to_virt(paddr);
 355                         fn(start, start + size);
 356                 }
 357                 offset = 0;
 358                 page++;
 359                 left -= len;
 360         } while (left);
 361 }
 362
 363 void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
 364                 size_t size, enum dma_data_direction dir)
 365 {
 366         switch (dir) {
 367         case DMA_FROM_DEVICE:
 368                 break;
 369         case DMA_TO_DEVICE:
 370         case DMA_BIDIRECTIONAL:
 371                 cache_op(paddr, size, cpu_dma_wb_range);
 372                 break;
 373         default:
 374                 BUG();
 375         }
 376 }
 377
 378 void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
 379                 size_t size, enum dma_data_direction dir)
 380 {
 381         switch (dir) {
 382         case DMA_TO_DEVICE:
 383                 break;
 384         case DMA_FROM_DEVICE:
 385         case DMA_BIDIRECTIONAL:
 386                 cache_op(paddr, size, cpu_dma_inval_range);
 387                 break;
 388         default:
 389                 BUG();
 390         }
 391 }