arch/sh/mm/fault.c

   1 /*
   2  * Page fault handler for SH with an MMU.
   3  *
   4  *  Copyright (C) 1999  Niibe Yutaka
   5  *  Copyright (C) 2003 - 2012  Paul Mundt
   6  *
   7  *  Based on linux/arch/i386/mm/fault.c:
   8  *   Copyright (C) 1995  Linus Torvalds
   9  *
  10  * This file is subject to the terms and conditions of the GNU General Public
  11  * License.  See the file "COPYING" in the main directory of this archive
  12  * for more details.
  13  */
  14 #include <linux/kernel.h>
  15 #include <linux/mm.h>
  16 #include <linux/sched/signal.h>
  17 #include <linux/hardirq.h>
  18 #include <linux/kprobes.h>
  19 #include <linux/perf_event.h>
  20 #include <linux/kdebug.h>
  21 #include <linux/uaccess.h>
  22 #include <asm/io_trapped.h>
  23 #include <asm/mmu_context.h>
  24 #include <asm/tlbflush.h>
  25 #include <asm/traps.h>
  26
  27 static inline int notify_page_fault(struct pt_regs *regs, int trap)
  28 {
  29         int ret = 0;
  30
  31         if (kprobes_built_in() && !user_mode(regs)) {
  32                 preempt_disable();
  33                 if (kprobe_running() && kprobe_fault_handler(regs, trap))
  34                         ret = 1;
  35                 preempt_enable();
  36         }
  37
  38         return ret;
  39 }
  40
  41 static void
  42 force_sig_info_fault(int si_signo, int si_code, unsigned long address,
  43                      struct task_struct *tsk)
  44 {
  45         siginfo_t info;
  46
  47         info.si_signo   = si_signo;
  48         info.si_errno   = 0;
  49         info.si_code    = si_code;
  50         info.si_addr    = (void __user *)address;
  51
  52         force_sig_info(si_signo, &info, tsk);
  53 }
  54
  55 /*
  56  * This is useful to dump out the page tables associated with
  57  * 'addr' in mm 'mm'.
  58  */
  59 static void show_pte(struct mm_struct *mm, unsigned long addr)
  60 {
  61         pgd_t *pgd;
  62
  63         if (mm) {
  64                 pgd = mm->pgd;
  65         } else {
  66                 pgd = get_TTB();
  67
  68                 if (unlikely(!pgd))
  69                         pgd = swapper_pg_dir;
  70         }
  71
  72         printk(KERN_ALERT "pgd = %p\n", pgd);
  73         pgd += pgd_index(addr);
  74         printk(KERN_ALERT "[%08lx] *pgd=%0*Lx", addr,
  75                (u32)(sizeof(*pgd) * 2), (u64)pgd_val(*pgd));
  76
  77         do {
  78                 pud_t *pud;
  79                 pmd_t *pmd;
  80                 pte_t *pte;
  81
  82                 if (pgd_none(*pgd))
  83                         break;
  84
  85                 if (pgd_bad(*pgd)) {
  86                         printk("(bad)");
  87                         break;
  88                 }
  89
  90                 pud = pud_offset(pgd, addr);
  91                 if (PTRS_PER_PUD != 1)
  92                         printk(", *pud=%0*Lx", (u32)(sizeof(*pud) * 2),
  93                                (u64)pud_val(*pud));
  94
  95                 if (pud_none(*pud))
  96                         break;
  97
  98                 if (pud_bad(*pud)) {
  99                         printk("(bad)");
 100                         break;
 101                 }
 102
 103                 pmd = pmd_offset(pud, addr);
 104                 if (PTRS_PER_PMD != 1)
 105                         printk(", *pmd=%0*Lx", (u32)(sizeof(*pmd) * 2),
 106                                (u64)pmd_val(*pmd));
 107
 108                 if (pmd_none(*pmd))
 109                         break;
 110
 111                 if (pmd_bad(*pmd)) {
 112                         printk("(bad)");
 113                         break;
 114                 }
 115
 116                 /* We must not map this if we have highmem enabled */
 117                 if (PageHighMem(pfn_to_page(pmd_val(*pmd) >> PAGE_SHIFT)))
 118                         break;
 119
 120                 pte = pte_offset_kernel(pmd, addr);
 121                 printk(", *pte=%0*Lx", (u32)(sizeof(*pte) * 2),
 122                        (u64)pte_val(*pte));
 123         } while (0);
 124
 125         printk("\n");
 126 }
 127
 128 static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
 129 {
 130         unsigned index = pgd_index(address);
 131         pgd_t *pgd_k;
 132         pud_t *pud, *pud_k;
 133         pmd_t *pmd, *pmd_k;
 134
 135         pgd += index;
 136         pgd_k = init_mm.pgd + index;
 137
 138         if (!pgd_present(*pgd_k))
 139                 return NULL;
 140
 141         pud = pud_offset(pgd, address);
 142         pud_k = pud_offset(pgd_k, address);
 143         if (!pud_present(*pud_k))
 144                 return NULL;
 145
 146         if (!pud_present(*pud))
 147             set_pud(pud, *pud_k);
 148
 149         pmd = pmd_offset(pud, address);
 150         pmd_k = pmd_offset(pud_k, address);
 151         if (!pmd_present(*pmd_k))
 152                 return NULL;
 153
 154         if (!pmd_present(*pmd))
 155                 set_pmd(pmd, *pmd_k);
 156         else {
 157                 /*
 158                  * The page tables are fully synchronised so there must
 159                  * be another reason for the fault. Return NULL here to
 160                  * signal that we have not taken care of the fault.
 161                  */
 162                 BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
 163                 return NULL;
 164         }
 165
 166         return pmd_k;
 167 }
 168
 169 #ifdef CONFIG_SH_STORE_QUEUES
 170 #define __FAULT_ADDR_LIMIT      P3_ADDR_MAX
 171 #else
 172 #define __FAULT_ADDR_LIMIT      VMALLOC_END
 173 #endif
 174
 175 /*
 176  * Handle a fault on the vmalloc or module mapping area
 177  */
 178 static noinline int vmalloc_fault(unsigned long address)
 179 {
 180         pgd_t *pgd_k;
 181         pmd_t *pmd_k;
 182         pte_t *pte_k;
 183
 184         /* Make sure we are in vmalloc/module/P3 area: */
 185         if (!(address >= VMALLOC_START && address < __FAULT_ADDR_LIMIT))
 186                 return -1;
 187
 188         /*
 189          * Synchronize this task's top level page-table
 190          * with the 'reference' page table.
 191          *
 192          * Do _not_ use "current" here. We might be inside
 193          * an interrupt in the middle of a task switch..
 194          */
 195         pgd_k = get_TTB();
 196         pmd_k = vmalloc_sync_one(pgd_k, address);
 197         if (!pmd_k)
 198                 return -1;
 199
 200         pte_k = pte_offset_kernel(pmd_k, address);
 201         if (!pte_present(*pte_k))
 202                 return -1;
 203
 204         return 0;
 205 }
 206
 207 static void
 208 show_fault_oops(struct pt_regs *regs, unsigned long address)
 209 {
 210         if (!oops_may_print())
 211                 return;
 212
 213         printk(KERN_ALERT "BUG: unable to handle kernel ");
 214         if (address < PAGE_SIZE)
 215                 printk(KERN_CONT "NULL pointer dereference");
 216         else
 217                 printk(KERN_CONT "paging request");
 218
 219         printk(KERN_CONT " at %08lx\n", address);
 220         printk(KERN_ALERT "PC:");
 221         printk_address(regs->pc, 1);
 222
 223         show_pte(NULL, address);
 224 }
 225
 226 static noinline void
 227 no_context(struct pt_regs *regs, unsigned long error_code,
 228            unsigned long address)
 229 {
 230         /* Are we prepared to handle this kernel fault?  */
 231         if (fixup_exception(regs))
 232                 return;
 233
 234         if (handle_trapped_io(regs, address))
 235                 return;
 236
 237         /*
 238          * Oops. The kernel tried to access some bad page. We'll have to
 239          * terminate things with extreme prejudice.
 240          */
 241         bust_spinlocks(1);
 242
 243         show_fault_oops(regs, address);
 244
 245         die("Oops", regs, error_code);
 246         bust_spinlocks(0);
 247         do_exit(SIGKILL);
 248 }
 249
 250 static void
 251 __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
 252                        unsigned long address, int si_code)
 253 {
 254         struct task_struct *tsk = current;
 255
 256         /* User mode accesses just cause a SIGSEGV */
 257         if (user_mode(regs)) {
 258                 /*
 259                  * It's possible to have interrupts off here:
 260                  */
 261                 local_irq_enable();
 262
 263                 force_sig_info_fault(SIGSEGV, si_code, address, tsk);
 264
 265                 return;
 266         }
 267
 268         no_context(regs, error_code, address);
 269 }
 270
 271 static noinline void
 272 bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
 273                      unsigned long address)
 274 {
 275         __bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR);
 276 }
 277
 278 static void
 279 __bad_area(struct pt_regs *regs, unsigned long error_code,
 280            unsigned long address, int si_code)
 281 {
 282         struct mm_struct *mm = current->mm;
 283
 284         /*
 285          * Something tried to access memory that isn't in our memory map..
 286          * Fix it, but check if it's kernel or user first..
 287          */
 288         up_read(&mm->mmap_sem);
 289
 290         __bad_area_nosemaphore(regs, error_code, address, si_code);
 291 }
 292
 293 static noinline void
 294 bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address)
 295 {
 296         __bad_area(regs, error_code, address, SEGV_MAPERR);
 297 }
 298
 299 static noinline void
 300 bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
 301                       unsigned long address)
 302 {
 303         __bad_area(regs, error_code, address, SEGV_ACCERR);
 304 }
 305
 306 static void
 307 do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
 308 {
 309         struct task_struct *tsk = current;
 310         struct mm_struct *mm = tsk->mm;
 311
 312         up_read(&mm->mmap_sem);
 313
 314         /* Kernel mode? Handle exceptions or die: */
 315         if (!user_mode(regs))
 316                 no_context(regs, error_code, address);
 317
 318         force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
 319 }
 320
 321 static noinline int
 322 mm_fault_error(struct pt_regs *regs, unsigned long error_code,
 323                unsigned long address, unsigned int fault)
 324 {
 325         /*
 326          * Pagefault was interrupted by SIGKILL. We have no reason to
 327          * continue pagefault.
 328          */
 329         if (fatal_signal_pending(current)) {
 330                 if (!(fault & VM_FAULT_RETRY))
 331                         up_read(&current->mm->mmap_sem);
 332                 if (!user_mode(regs))
 333                         no_context(regs, error_code, address);
 334                 return 1;
 335         }
 336
 337         if (!(fault & VM_FAULT_ERROR))
 338                 return 0;
 339
 340         if (fault & VM_FAULT_OOM) {
 341                 /* Kernel mode? Handle exceptions or die: */
 342                 if (!user_mode(regs)) {
 343                         up_read(&current->mm->mmap_sem);
 344                         no_context(regs, error_code, address);
 345                         return 1;
 346                 }
 347                 up_read(&current->mm->mmap_sem);
 348
 349                 /*
 350                  * We ran out of memory, call the OOM killer, and return the
 351                  * userspace (which will retry the fault, or kill us if we got
 352                  * oom-killed):
 353                  */
 354                 pagefault_out_of_memory();
 355         } else {
 356                 if (fault & VM_FAULT_SIGBUS)
 357                         do_sigbus(regs, error_code, address);
 358                 else if (fault & VM_FAULT_SIGSEGV)
 359                         bad_area(regs, error_code, address);
 360                 else
 361                         BUG();
 362         }
 363
 364         return 1;
 365 }
 366
 367 static inline int access_error(int error_code, struct vm_area_struct *vma)
 368 {
 369         if (error_code & FAULT_CODE_WRITE) {
 370                 /* write, present and write, not present: */
 371                 if (unlikely(!(vma->vm_flags & VM_WRITE)))
 372                         return 1;
 373                 return 0;
 374         }
 375
 376         /* ITLB miss on NX page */
 377         if (unlikely((error_code & FAULT_CODE_ITLB) &&
 378                      !(vma->vm_flags & VM_EXEC)))
 379                 return 1;
 380
 381         /* read, not present: */
 382         if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
 383                 return 1;
 384
 385         return 0;
 386 }
 387
 388 static int fault_in_kernel_space(unsigned long address)
 389 {
 390         return address >= TASK_SIZE;
 391 }
 392
 393 /*
 394  * This routine handles page faults.  It determines the address,
 395  * and the problem, and then passes it off to one of the appropriate
 396  * routines.
 397  */
 398 asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 399                                         unsigned long error_code,
 400                                         unsigned long address)
 401 {
 402         unsigned long vec;
 403         struct task_struct *tsk;
 404         struct mm_struct *mm;
 405         struct vm_area_struct * vma;
 406         int fault;
 407         unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 408
 409         tsk = current;
 410         mm = tsk->mm;
 411         vec = lookup_exception_vector();
 412
 413         /*
 414          * We fault-in kernel-space virtual memory on-demand. The
 415          * 'reference' page table is init_mm.pgd.
 416          *
 417          * NOTE! We MUST NOT take any locks for this case. We may
 418          * be in an interrupt or a critical region, and should
 419          * only copy the information from the master page table,
 420          * nothing more.
 421          */
 422         if (unlikely(fault_in_kernel_space(address))) {
 423                 if (vmalloc_fault(address) >= 0)
 424                         return;
 425                 if (notify_page_fault(regs, vec))
 426                         return;
 427
 428                 bad_area_nosemaphore(regs, error_code, address);
 429                 return;
 430         }
 431
 432         if (unlikely(notify_page_fault(regs, vec)))
 433                 return;
 434
 435         /* Only enable interrupts if they were on before the fault */
 436         if ((regs->sr & SR_IMASK) != SR_IMASK)
 437                 local_irq_enable();
 438
 439         perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
 440
 441         /*
 442          * If we're in an interrupt, have no user context or are running
 443          * with pagefaults disabled then we must not take the fault:
 444          */
 445         if (unlikely(faulthandler_disabled() || !mm)) {
 446                 bad_area_nosemaphore(regs, error_code, address);
 447                 return;
 448         }
 449
 450 retry:
 451         down_read(&mm->mmap_sem);
 452
 453         vma = find_vma(mm, address);
 454         if (unlikely(!vma)) {
 455                 bad_area(regs, error_code, address);
 456                 return;
 457         }
 458         if (likely(vma->vm_start <= address))
 459                 goto good_area;
 460         if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
 461                 bad_area(regs, error_code, address);
 462                 return;
 463         }
 464         if (unlikely(expand_stack(vma, address))) {
 465                 bad_area(regs, error_code, address);
 466                 return;
 467         }
 468
 469         /*
 470          * Ok, we have a good vm_area for this memory access, so
 471          * we can handle it..
 472          */
 473 good_area:
 474         if (unlikely(access_error(error_code, vma))) {
 475                 bad_area_access_error(regs, error_code, address);
 476                 return;
 477         }
 478
 479         set_thread_fault_code(error_code);
 480
 481         if (user_mode(regs))
 482                 flags |= FAULT_FLAG_USER;
 483         if (error_code & FAULT_CODE_WRITE)
 484                 flags |= FAULT_FLAG_WRITE;
 485
 486         /*
 487          * If for any reason at all we couldn't handle the fault,
 488          * make sure we exit gracefully rather than endlessly redo
 489          * the fault.
 490          */
 491         fault = handle_mm_fault(vma, address, flags);
 492
 493         if (unlikely(fault & (VM_FAULT_RETRY | VM_FAULT_ERROR)))
 494                 if (mm_fault_error(regs, error_code, address, fault))
 495                         return;
 496
 497         if (flags & FAULT_FLAG_ALLOW_RETRY) {
 498                 if (fault & VM_FAULT_MAJOR) {
 499                         tsk->maj_flt++;
 500                         perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
 501                                       regs, address);
 502                 } else {
 503                         tsk->min_flt++;
 504                         perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
 505                                       regs, address);
 506                 }
 507                 if (fault & VM_FAULT_RETRY) {
 508                         flags &= ~FAULT_FLAG_ALLOW_RETRY;
 509                         flags |= FAULT_FLAG_TRIED;
 510
 511                         /*
 512                          * No need to up_read(&mm->mmap_sem) as we would
 513                          * have already released it in __lock_page_or_retry
 514                          * in mm/filemap.c.
 515                          */
 516                         goto retry;
 517                 }
 518         }
 519
 520         up_read(&mm->mmap_sem);
 521 }