arch/nds32/mm/fault.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 // Copyright (C) 2005-2017 Andes Technology Corporation
   3
   4 #include <linux/extable.h>
   5 #include <linux/module.h>
   6 #include <linux/signal.h>
   7 #include <linux/ptrace.h>
   8 #include <linux/mm.h>
   9 #include <linux/init.h>
  10 #include <linux/hardirq.h>
  11 #include <linux/uaccess.h>
  12
  13 #include <asm/pgtable.h>
  14 #include <asm/tlbflush.h>
  15
  16 extern void die(const char *str, struct pt_regs *regs, long err);
  17
  18 /*
  19  * This is useful to dump out the page tables associated with
  20  * 'addr' in mm 'mm'.
  21  */
  22 void show_pte(struct mm_struct *mm, unsigned long addr)
  23 {
  24         pgd_t *pgd;
  25         if (!mm)
  26                 mm = &init_mm;
  27
  28         pr_alert("pgd = %p\n", mm->pgd);
  29         pgd = pgd_offset(mm, addr);
  30         pr_alert("[%08lx] *pgd=%08lx", addr, pgd_val(*pgd));
  31
  32         do {
  33                 pmd_t *pmd;
  34
  35                 if (pgd_none(*pgd))
  36                         break;
  37
  38                 if (pgd_bad(*pgd)) {
  39                         pr_alert("(bad)");
  40                         break;
  41                 }
  42
  43                 pmd = pmd_offset(pgd, addr);
  44 #if PTRS_PER_PMD != 1
  45                 pr_alert(", *pmd=%08lx", pmd_val(*pmd));
  46 #endif
  47
  48                 if (pmd_none(*pmd))
  49                         break;
  50
  51                 if (pmd_bad(*pmd)) {
  52                         pr_alert("(bad)");
  53                         break;
  54                 }
  55
  56                 if (IS_ENABLED(CONFIG_HIGHMEM))
  57                 {
  58                         pte_t *pte;
  59                         /* We must not map this if we have highmem enabled */
  60                         pte = pte_offset_map(pmd, addr);
  61                         pr_alert(", *pte=%08lx", pte_val(*pte));
  62                         pte_unmap(pte);
  63                 }
  64         } while (0);
  65
  66         pr_alert("\n");
  67 }
  68
  69 void do_page_fault(unsigned long entry, unsigned long addr,
  70                    unsigned int error_code, struct pt_regs *regs)
  71 {
  72         struct task_struct *tsk;
  73         struct mm_struct *mm;
  74         struct vm_area_struct *vma;
  75         int si_code;
  76         int fault;
  77         unsigned int mask = VM_READ | VM_WRITE | VM_EXEC;
  78         unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
  79
  80         error_code = error_code & (ITYPE_mskINST | ITYPE_mskETYPE);
  81         tsk = current;
  82         mm = tsk->mm;
  83         si_code = SEGV_MAPERR;
  84         /*
  85          * We fault-in kernel-space virtual memory on-demand. The
  86          * 'reference' page table is init_mm.pgd.
  87          *
  88          * NOTE! We MUST NOT take any locks for this case. We may
  89          * be in an interrupt or a critical region, and should
  90          * only copy the information from the master page table,
  91          * nothing more.
  92          */
  93         if (addr >= TASK_SIZE) {
  94                 if (user_mode(regs))
  95                         goto bad_area_nosemaphore;
  96
  97                 if (addr >= TASK_SIZE && addr < VMALLOC_END
  98                     && (entry == ENTRY_PTE_NOT_PRESENT))
  99                         goto vmalloc_fault;
 100                 else
 101                         goto no_context;
 102         }
 103
 104         /* Send a signal to the task for handling the unalignment access. */
 105         if (entry == ENTRY_GENERAL_EXCPETION
 106             && error_code == ETYPE_ALIGNMENT_CHECK) {
 107                 if (user_mode(regs))
 108                         goto bad_area_nosemaphore;
 109                 else
 110                         goto no_context;
 111         }
 112
 113         /*
 114          * If we're in an interrupt or have no user
 115          * context, we must not take the fault..
 116          */
 117         if (unlikely(faulthandler_disabled() || !mm))
 118                 goto no_context;
 119
 120         /*
 121          * As per x86, we may deadlock here. However, since the kernel only
 122          * validly references user space from well defined areas of the code,
 123          * we can bug out early if this is from code which shouldn't.
 124          */
 125         if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
 126                 if (!user_mode(regs) &&
 127                     !search_exception_tables(instruction_pointer(regs)))
 128                         goto no_context;
 129 retry:
 130                 down_read(&mm->mmap_sem);
 131         } else {
 132                 /*
 133                  * The above down_read_trylock() might have succeeded in which
 134                  * case, we'll have missed the might_sleep() from down_read().
 135                  */
 136                 might_sleep();
 137                 if (IS_ENABLED(CONFIG_DEBUG_VM)) {
 138                         if (!user_mode(regs) &&
 139                             !search_exception_tables(instruction_pointer(regs)))
 140                                 goto no_context;
 141                 }
 142         }
 143
 144         vma = find_vma(mm, addr);
 145
 146         if (unlikely(!vma))
 147                 goto bad_area;
 148
 149         if (vma->vm_start <= addr)
 150                 goto good_area;
 151
 152         if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
 153                 goto bad_area;
 154
 155         if (unlikely(expand_stack(vma, addr)))
 156                 goto bad_area;
 157
 158         /*
 159          * Ok, we have a good vm_area for this memory access, so
 160          * we can handle it..
 161          */
 162
 163 good_area:
 164         si_code = SEGV_ACCERR;
 165
 166         /* first do some preliminary protection checks */
 167         if (entry == ENTRY_PTE_NOT_PRESENT) {
 168                 if (error_code & ITYPE_mskINST)
 169                         mask = VM_EXEC;
 170                 else {
 171                         mask = VM_READ | VM_WRITE;
 172                         if (vma->vm_flags & VM_WRITE)
 173                                 flags |= FAULT_FLAG_WRITE;
 174                 }
 175         } else if (entry == ENTRY_TLB_MISC) {
 176                 switch (error_code & ITYPE_mskETYPE) {
 177                 case RD_PROT:
 178                         mask = VM_READ;
 179                         break;
 180                 case WRT_PROT:
 181                         mask = VM_WRITE;
 182                         flags |= FAULT_FLAG_WRITE;
 183                         break;
 184                 case NOEXEC:
 185                         mask = VM_EXEC;
 186                         break;
 187                 case PAGE_MODIFY:
 188                         mask = VM_WRITE;
 189                         flags |= FAULT_FLAG_WRITE;
 190                         break;
 191                 case ACC_BIT:
 192                         BUG();
 193                 default:
 194                         break;
 195                 }
 196
 197         }
 198         if (!(vma->vm_flags & mask))
 199                 goto bad_area;
 200
 201         /*
 202          * If for any reason at all we couldn't handle the fault,
 203          * make sure we exit gracefully rather than endlessly redo
 204          * the fault.
 205          */
 206
 207         fault = handle_mm_fault(vma, addr, flags);
 208
 209         /*
 210          * If we need to retry but a fatal signal is pending, handle the
 211          * signal first. We do not need to release the mmap_sem because it
 212          * would already be released in __lock_page_or_retry in mm/filemap.c.
 213          */
 214         if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) {
 215                 if (!user_mode(regs))
 216                         goto no_context;
 217                 return;
 218         }
 219
 220         if (unlikely(fault & VM_FAULT_ERROR)) {
 221                 if (fault & VM_FAULT_OOM)
 222                         goto out_of_memory;
 223                 else if (fault & VM_FAULT_SIGBUS)
 224                         goto do_sigbus;
 225                 else
 226                         goto bad_area;
 227         }
 228
 229         /*
 230          * Major/minor page fault accounting is only done on the initial
 231          * attempt. If we go through a retry, it is extremely likely that the
 232          * page will be found in page cache at that point.
 233          */
 234         if (flags & FAULT_FLAG_ALLOW_RETRY) {
 235                 if (fault & VM_FAULT_MAJOR)
 236                         tsk->maj_flt++;
 237                 else
 238                         tsk->min_flt++;
 239                 if (fault & VM_FAULT_RETRY) {
 240                         flags &= ~FAULT_FLAG_ALLOW_RETRY;
 241                         flags |= FAULT_FLAG_TRIED;
 242
 243                         /* No need to up_read(&mm->mmap_sem) as we would
 244                          * have already released it in __lock_page_or_retry
 245                          * in mm/filemap.c.
 246                          */
 247                         goto retry;
 248                 }
 249         }
 250
 251         up_read(&mm->mmap_sem);
 252         return;
 253
 254         /*
 255          * Something tried to access memory that isn't in our memory map..
 256          * Fix it, but check if it's kernel or user first..
 257          */
 258 bad_area:
 259         up_read(&mm->mmap_sem);
 260
 261 bad_area_nosemaphore:
 262
 263         /* User mode accesses just cause a SIGSEGV */
 264
 265         if (user_mode(regs)) {
 266                 tsk->thread.address = addr;
 267                 tsk->thread.error_code = error_code;
 268                 tsk->thread.trap_no = entry;
 269                 force_sig_fault(SIGSEGV, si_code, (void __user *)addr, tsk);
 270                 return;
 271         }
 272
 273 no_context:
 274
 275         /* Are we prepared to handle this kernel fault?
 276          *
 277          * (The kernel has valid exception-points in the source
 278          *  when it acesses user-memory. When it fails in one
 279          *  of those points, we find it in a table and do a jump
 280          *  to some fixup code that loads an appropriate error
 281          *  code)
 282          */
 283
 284         {
 285                 const struct exception_table_entry *entry;
 286
 287                 if ((entry =
 288                      search_exception_tables(instruction_pointer(regs))) !=
 289                     NULL) {
 290                         /* Adjust the instruction pointer in the stackframe */
 291                         instruction_pointer(regs) = entry->fixup;
 292                         return;
 293                 }
 294         }
 295
 296         /*
 297          * Oops. The kernel tried to access some bad page. We'll have to
 298          * terminate things with extreme prejudice.
 299          */
 300
 301         bust_spinlocks(1);
 302         pr_alert("Unable to handle kernel %s at virtual address %08lx\n",
 303                  (addr < PAGE_SIZE) ? "NULL pointer dereference" :
 304                  "paging request", addr);
 305
 306         show_pte(mm, addr);
 307         die("Oops", regs, error_code);
 308         bust_spinlocks(0);
 309         do_exit(SIGKILL);
 310
 311         return;
 312
 313         /*
 314          * We ran out of memory, or some other thing happened to us that made
 315          * us unable to handle the page fault gracefully.
 316          */
 317
 318 out_of_memory:
 319         up_read(&mm->mmap_sem);
 320         if (!user_mode(regs))
 321                 goto no_context;
 322         pagefault_out_of_memory();
 323         return;
 324
 325 do_sigbus:
 326         up_read(&mm->mmap_sem);
 327
 328         /* Kernel mode? Handle exceptions or die */
 329         if (!user_mode(regs))
 330                 goto no_context;
 331
 332         /*
 333          * Send a sigbus
 334          */
 335         tsk->thread.address = addr;
 336         tsk->thread.error_code = error_code;
 337         tsk->thread.trap_no = entry;
 338         force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)addr, tsk);
 339
 340         return;
 341
 342 vmalloc_fault:
 343         {
 344                 /*
 345                  * Synchronize this task's top level page-table
 346                  * with the 'reference' page table.
 347                  *
 348                  * Use current_pgd instead of tsk->active_mm->pgd
 349                  * since the latter might be unavailable if this
 350                  * code is executed in a misfortunately run irq
 351                  * (like inside schedule() between switch_mm and
 352                  *  switch_to...).
 353                  */
 354
 355                 unsigned int index = pgd_index(addr);
 356                 pgd_t *pgd, *pgd_k;
 357                 pud_t *pud, *pud_k;
 358                 pmd_t *pmd, *pmd_k;
 359                 pte_t *pte_k;
 360
 361                 pgd = (pgd_t *) __va(__nds32__mfsr(NDS32_SR_L1_PPTB)) + index;
 362                 pgd_k = init_mm.pgd + index;
 363
 364                 if (!pgd_present(*pgd_k))
 365                         goto no_context;
 366
 367                 pud = pud_offset(pgd, addr);
 368                 pud_k = pud_offset(pgd_k, addr);
 369                 if (!pud_present(*pud_k))
 370                         goto no_context;
 371
 372                 pmd = pmd_offset(pud, addr);
 373                 pmd_k = pmd_offset(pud_k, addr);
 374                 if (!pmd_present(*pmd_k))
 375                         goto no_context;
 376
 377                 if (!pmd_present(*pmd))
 378                         set_pmd(pmd, *pmd_k);
 379                 else
 380                         BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
 381
 382                 /*
 383                  * Since the vmalloc area is global, we don't
 384                  * need to copy individual PTE's, it is enough to
 385                  * copy the pgd pointer into the pte page of the
 386                  * root task. If that is there, we'll find our pte if
 387                  * it exists.
 388                  */
 389
 390                 /* Make sure the actual PTE exists as well to
 391                  * catch kernel vmalloc-area accesses to non-mapped
 392                  * addres. If we don't do this, this will just
 393                  * silently loop forever.
 394                  */
 395
 396                 pte_k = pte_offset_kernel(pmd_k, addr);
 397                 if (!pte_present(*pte_k))
 398                         goto no_context;
 399
 400                 return;
 401         }
 402 }