arch/arm/mm/fault.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  *  linux/arch/arm/mm/fault.c
   4  *
   5  *  Copyright (C) 1995  Linus Torvalds
   6  *  Modifications for ARM processor (c) 1995-2004 Russell King
   7  */
   8 #include <linux/extable.h>
   9 #include <linux/signal.h>
  10 #include <linux/mm.h>
  11 #include <linux/hardirq.h>
  12 #include <linux/init.h>
  13 #include <linux/kprobes.h>
  14 #include <linux/uaccess.h>
  15 #include <linux/page-flags.h>
  16 #include <linux/sched/signal.h>
  17 #include <linux/sched/debug.h>
  18 #include <linux/highmem.h>
  19 #include <linux/perf_event.h>
  20
  21 #include <asm/pgtable.h>
  22 #include <asm/system_misc.h>
  23 #include <asm/system_info.h>
  24 #include <asm/tlbflush.h>
  25
  26 #include "fault.h"
  27
  28 #ifdef CONFIG_MMU
  29
  30 /*
  31  * This is useful to dump out the page tables associated with
  32  * 'addr' in mm 'mm'.
  33  */
  34 void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr)
  35 {
  36         pgd_t *pgd;
  37
  38         if (!mm)
  39                 mm = &init_mm;
  40
  41         printk("%spgd = %p\n", lvl, mm->pgd);
  42         pgd = pgd_offset(mm, addr);
  43         printk("%s[%08lx] *pgd=%08llx", lvl, addr, (long long)pgd_val(*pgd));
  44
  45         do {
  46                 pud_t *pud;
  47                 pmd_t *pmd;
  48                 pte_t *pte;
  49
  50                 if (pgd_none(*pgd))
  51                         break;
  52
  53                 if (pgd_bad(*pgd)) {
  54                         pr_cont("(bad)");
  55                         break;
  56                 }
  57
  58                 pud = pud_offset(pgd, addr);
  59                 if (PTRS_PER_PUD != 1)
  60                         pr_cont(", *pud=%08llx", (long long)pud_val(*pud));
  61
  62                 if (pud_none(*pud))
  63                         break;
  64
  65                 if (pud_bad(*pud)) {
  66                         pr_cont("(bad)");
  67                         break;
  68                 }
  69
  70                 pmd = pmd_offset(pud, addr);
  71                 if (PTRS_PER_PMD != 1)
  72                         pr_cont(", *pmd=%08llx", (long long)pmd_val(*pmd));
  73
  74                 if (pmd_none(*pmd))
  75                         break;
  76
  77                 if (pmd_bad(*pmd)) {
  78                         pr_cont("(bad)");
  79                         break;
  80                 }
  81
  82                 /* We must not map this if we have highmem enabled */
  83                 if (PageHighMem(pfn_to_page(pmd_val(*pmd) >> PAGE_SHIFT)))
  84                         break;
  85
  86                 pte = pte_offset_map(pmd, addr);
  87                 pr_cont(", *pte=%08llx", (long long)pte_val(*pte));
  88 #ifndef CONFIG_ARM_LPAE
  89                 pr_cont(", *ppte=%08llx",
  90                        (long long)pte_val(pte[PTE_HWTABLE_PTRS]));
  91 #endif
  92                 pte_unmap(pte);
  93         } while(0);
  94
  95         pr_cont("\n");
  96 }
  97 #else                                   /* CONFIG_MMU */
  98 void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr)
  99 { }
 100 #endif                                  /* CONFIG_MMU */
 101
 102 /*
 103  * Oops.  The kernel tried to access some page that wasn't present.
 104  */
 105 static void
 106 __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
 107                   struct pt_regs *regs)
 108 {
 109         /*
 110          * Are we prepared to handle this kernel fault?
 111          */
 112         if (fixup_exception(regs))
 113                 return;
 114
 115         /*
 116          * No handler, we'll have to terminate things with extreme prejudice.
 117          */
 118         bust_spinlocks(1);
 119         pr_alert("8<--- cut here ---\n");
 120         pr_alert("Unable to handle kernel %s at virtual address %08lx\n",
 121                  (addr < PAGE_SIZE) ? "NULL pointer dereference" :
 122                  "paging request", addr);
 123
 124         show_pte(KERN_ALERT, mm, addr);
 125         die("Oops", regs, fsr);
 126         bust_spinlocks(0);
 127         do_exit(SIGKILL);
 128 }
 129
 130 /*
 131  * Something tried to access memory that isn't in our memory map..
 132  * User mode accesses just cause a SIGSEGV
 133  */
 134 static void
 135 __do_user_fault(unsigned long addr, unsigned int fsr, unsigned int sig,
 136                 int code, struct pt_regs *regs)
 137 {
 138         struct task_struct *tsk = current;
 139
 140         if (addr > TASK_SIZE)
 141                 harden_branch_predictor();
 142
 143 #ifdef CONFIG_DEBUG_USER
 144         if (((user_debug & UDBG_SEGV) && (sig == SIGSEGV)) ||
 145             ((user_debug & UDBG_BUS)  && (sig == SIGBUS))) {
 146                 pr_err("8<--- cut here ---\n");
 147                 pr_err("%s: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n",
 148                        tsk->comm, sig, addr, fsr);
 149                 show_pte(KERN_ERR, tsk->mm, addr);
 150                 show_regs(regs);
 151         }
 152 #endif
 153 #ifndef CONFIG_KUSER_HELPERS
 154         if ((sig == SIGSEGV) && ((addr & PAGE_MASK) == 0xffff0000))
 155                 printk_ratelimited(KERN_DEBUG
 156                                    "%s: CONFIG_KUSER_HELPERS disabled at 0x%08lx\n",
 157                                    tsk->comm, addr);
 158 #endif
 159
 160         tsk->thread.address = addr;
 161         tsk->thread.error_code = fsr;
 162         tsk->thread.trap_no = 14;
 163         force_sig_fault(sig, code, (void __user *)addr);
 164 }
 165
 166 void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 167 {
 168         struct task_struct *tsk = current;
 169         struct mm_struct *mm = tsk->active_mm;
 170
 171         /*
 172          * If we are in kernel mode at this point, we
 173          * have no context to handle this fault with.
 174          */
 175         if (user_mode(regs))
 176                 __do_user_fault(addr, fsr, SIGSEGV, SEGV_MAPERR, regs);
 177         else
 178                 __do_kernel_fault(mm, addr, fsr, regs);
 179 }
 180
 181 #ifdef CONFIG_MMU
 182 #define VM_FAULT_BADMAP         0x010000
 183 #define VM_FAULT_BADACCESS      0x020000
 184
 185 /*
 186  * Check that the permissions on the VMA allow for the fault which occurred.
 187  * If we encountered a write fault, we must have write permission, otherwise
 188  * we allow any permission.
 189  */
 190 static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma)
 191 {
 192         unsigned int mask = VM_READ | VM_WRITE | VM_EXEC;
 193
 194         if ((fsr & FSR_WRITE) && !(fsr & FSR_CM))
 195                 mask = VM_WRITE;
 196         if (fsr & FSR_LNX_PF)
 197                 mask = VM_EXEC;
 198
 199         return vma->vm_flags & mask ? false : true;
 200 }
 201
 202 static vm_fault_t __kprobes
 203 __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
 204                 unsigned int flags, struct task_struct *tsk)
 205 {
 206         struct vm_area_struct *vma;
 207         vm_fault_t fault;
 208
 209         vma = find_vma(mm, addr);
 210         fault = VM_FAULT_BADMAP;
 211         if (unlikely(!vma))
 212                 goto out;
 213         if (unlikely(vma->vm_start > addr))
 214                 goto check_stack;
 215
 216         /*
 217          * Ok, we have a good vm_area for this
 218          * memory access, so we can handle it.
 219          */
 220 good_area:
 221         if (access_error(fsr, vma)) {
 222                 fault = VM_FAULT_BADACCESS;
 223                 goto out;
 224         }
 225
 226         return handle_mm_fault(vma, addr & PAGE_MASK, flags);
 227
 228 check_stack:
 229         /* Don't allow expansion below FIRST_USER_ADDRESS */
 230         if (vma->vm_flags & VM_GROWSDOWN &&
 231             addr >= FIRST_USER_ADDRESS && !expand_stack(vma, addr))
 232                 goto good_area;
 233 out:
 234         return fault;
 235 }
 236
 237 static int __kprobes
 238 do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 239 {
 240         struct task_struct *tsk;
 241         struct mm_struct *mm;
 242         int sig, code;
 243         vm_fault_t fault;
 244         unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 245
 246         if (kprobe_page_fault(regs, fsr))
 247                 return 0;
 248
 249         tsk = current;
 250         mm  = tsk->mm;
 251
 252         /* Enable interrupts if they were enabled in the parent context. */
 253         if (interrupts_enabled(regs))
 254                 local_irq_enable();
 255
 256         /*
 257          * If we're in an interrupt or have no user
 258          * context, we must not take the fault..
 259          */
 260         if (faulthandler_disabled() || !mm)
 261                 goto no_context;
 262
 263         if (user_mode(regs))
 264                 flags |= FAULT_FLAG_USER;
 265         if ((fsr & FSR_WRITE) && !(fsr & FSR_CM))
 266                 flags |= FAULT_FLAG_WRITE;
 267
 268         /*
 269          * As per x86, we may deadlock here.  However, since the kernel only
 270          * validly references user space from well defined areas of the code,
 271          * we can bug out early if this is from code which shouldn't.
 272          */
 273         if (!down_read_trylock(&mm->mmap_sem)) {
 274                 if (!user_mode(regs) && !search_exception_tables(regs->ARM_pc))
 275                         goto no_context;
 276 retry:
 277                 down_read(&mm->mmap_sem);
 278         } else {
 279                 /*
 280                  * The above down_read_trylock() might have succeeded in
 281                  * which case, we'll have missed the might_sleep() from
 282                  * down_read()
 283                  */
 284                 might_sleep();
 285 #ifdef CONFIG_DEBUG_VM
 286                 if (!user_mode(regs) &&
 287                     !search_exception_tables(regs->ARM_pc))
 288                         goto no_context;
 289 #endif
 290         }
 291
 292         fault = __do_page_fault(mm, addr, fsr, flags, tsk);
 293
 294         /* If we need to retry but a fatal signal is pending, handle the
 295          * signal first. We do not need to release the mmap_sem because
 296          * it would already be released in __lock_page_or_retry in
 297          * mm/filemap.c. */
 298         if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) {
 299                 if (!user_mode(regs))
 300                         goto no_context;
 301                 return 0;
 302         }
 303
 304         /*
 305          * Major/minor page fault accounting is only done on the
 306          * initial attempt. If we go through a retry, it is extremely
 307          * likely that the page will be found in page cache at that point.
 308          */
 309
 310         perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
 311         if (!(fault & VM_FAULT_ERROR) && flags & FAULT_FLAG_ALLOW_RETRY) {
 312                 if (fault & VM_FAULT_MAJOR) {
 313                         tsk->maj_flt++;
 314                         perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
 315                                         regs, addr);
 316                 } else {
 317                         tsk->min_flt++;
 318                         perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
 319                                         regs, addr);
 320                 }
 321                 if (fault & VM_FAULT_RETRY) {
 322                         /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
 323                         * of starvation. */
 324                         flags &= ~FAULT_FLAG_ALLOW_RETRY;
 325                         flags |= FAULT_FLAG_TRIED;
 326                         goto retry;
 327                 }
 328         }
 329
 330         up_read(&mm->mmap_sem);
 331
 332         /*
 333          * Handle the "normal" case first - VM_FAULT_MAJOR
 334          */
 335         if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS))))
 336                 return 0;
 337
 338         /*
 339          * If we are in kernel mode at this point, we
 340          * have no context to handle this fault with.
 341          */
 342         if (!user_mode(regs))
 343                 goto no_context;
 344
 345         if (fault & VM_FAULT_OOM) {
 346                 /*
 347                  * We ran out of memory, call the OOM killer, and return to
 348                  * userspace (which will retry the fault, or kill us if we
 349                  * got oom-killed)
 350                  */
 351                 pagefault_out_of_memory();
 352                 return 0;
 353         }
 354
 355         if (fault & VM_FAULT_SIGBUS) {
 356                 /*
 357                  * We had some memory, but were unable to
 358                  * successfully fix up this page fault.
 359                  */
 360                 sig = SIGBUS;
 361                 code = BUS_ADRERR;
 362         } else {
 363                 /*
 364                  * Something tried to access memory that
 365                  * isn't in our memory map..
 366                  */
 367                 sig = SIGSEGV;
 368                 code = fault == VM_FAULT_BADACCESS ?
 369                         SEGV_ACCERR : SEGV_MAPERR;
 370         }
 371
 372         __do_user_fault(addr, fsr, sig, code, regs);
 373         return 0;
 374
 375 no_context:
 376         __do_kernel_fault(mm, addr, fsr, regs);
 377         return 0;
 378 }
 379 #else                                   /* CONFIG_MMU */
 380 static int
 381 do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 382 {
 383         return 0;
 384 }
 385 #endif                                  /* CONFIG_MMU */
 386
 387 /*
 388  * First Level Translation Fault Handler
 389  *
 390  * We enter here because the first level page table doesn't contain
 391  * a valid entry for the address.
 392  *
 393  * If the address is in kernel space (>= TASK_SIZE), then we are
 394  * probably faulting in the vmalloc() area.
 395  *
 396  * If the init_task's first level page tables contains the relevant
 397  * entry, we copy the it to this task.  If not, we send the process
 398  * a signal, fixup the exception, or oops the kernel.
 399  *
 400  * NOTE! We MUST NOT take any locks for this case. We may be in an
 401  * interrupt or a critical region, and should only copy the information
 402  * from the master page table, nothing more.
 403  */
 404 #ifdef CONFIG_MMU
 405 static int __kprobes
 406 do_translation_fault(unsigned long addr, unsigned int fsr,
 407                      struct pt_regs *regs)
 408 {
 409         unsigned int index;
 410         pgd_t *pgd, *pgd_k;
 411         pud_t *pud, *pud_k;
 412         pmd_t *pmd, *pmd_k;
 413
 414         if (addr < TASK_SIZE)
 415                 return do_page_fault(addr, fsr, regs);
 416
 417         if (user_mode(regs))
 418                 goto bad_area;
 419
 420         index = pgd_index(addr);
 421
 422         pgd = cpu_get_pgd() + index;
 423         pgd_k = init_mm.pgd + index;
 424
 425         if (pgd_none(*pgd_k))
 426                 goto bad_area;
 427         if (!pgd_present(*pgd))
 428                 set_pgd(pgd, *pgd_k);
 429
 430         pud = pud_offset(pgd, addr);
 431         pud_k = pud_offset(pgd_k, addr);
 432
 433         if (pud_none(*pud_k))
 434                 goto bad_area;
 435         if (!pud_present(*pud))
 436                 set_pud(pud, *pud_k);
 437
 438         pmd = pmd_offset(pud, addr);
 439         pmd_k = pmd_offset(pud_k, addr);
 440
 441 #ifdef CONFIG_ARM_LPAE
 442         /*
 443          * Only one hardware entry per PMD with LPAE.
 444          */
 445         index = 0;
 446 #else
 447         /*
 448          * On ARM one Linux PGD entry contains two hardware entries (see page
 449          * tables layout in pgtable.h). We normally guarantee that we always
 450          * fill both L1 entries. But create_mapping() doesn't follow the rule.
 451          * It can create inidividual L1 entries, so here we have to call
 452          * pmd_none() check for the entry really corresponded to address, not
 453          * for the first of pair.
 454          */
 455         index = (addr >> SECTION_SHIFT) & 1;
 456 #endif
 457         if (pmd_none(pmd_k[index]))
 458                 goto bad_area;
 459
 460         copy_pmd(pmd, pmd_k);
 461         return 0;
 462
 463 bad_area:
 464         do_bad_area(addr, fsr, regs);
 465         return 0;
 466 }
 467 #else                                   /* CONFIG_MMU */
 468 static int
 469 do_translation_fault(unsigned long addr, unsigned int fsr,
 470                      struct pt_regs *regs)
 471 {
 472         return 0;
 473 }
 474 #endif                                  /* CONFIG_MMU */
 475
 476 /*
 477  * Some section permission faults need to be handled gracefully.
 478  * They can happen due to a __{get,put}_user during an oops.
 479  */
 480 #ifndef CONFIG_ARM_LPAE
 481 static int
 482 do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 483 {
 484         do_bad_area(addr, fsr, regs);
 485         return 0;
 486 }
 487 #endif /* CONFIG_ARM_LPAE */
 488
 489 /*
 490  * This abort handler always returns "fault".
 491  */
 492 static int
 493 do_bad(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 494 {
 495         return 1;
 496 }
 497
 498 struct fsr_info {
 499         int     (*fn)(unsigned long addr, unsigned int fsr, struct pt_regs *regs);
 500         int     sig;
 501         int     code;
 502         const char *name;
 503 };
 504
 505 /* FSR definition */
 506 #ifdef CONFIG_ARM_LPAE
 507 #include "fsr-3level.c"
 508 #else
 509 #include "fsr-2level.c"
 510 #endif
 511
 512 void __init
 513 hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *),
 514                 int sig, int code, const char *name)
 515 {
 516         if (nr < 0 || nr >= ARRAY_SIZE(fsr_info))
 517                 BUG();
 518
 519         fsr_info[nr].fn   = fn;
 520         fsr_info[nr].sig  = sig;
 521         fsr_info[nr].code = code;
 522         fsr_info[nr].name = name;
 523 }
 524
 525 /*
 526  * Dispatch a data abort to the relevant handler.
 527  */
 528 asmlinkage void
 529 do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 530 {
 531         const struct fsr_info *inf = fsr_info + fsr_fs(fsr);
 532
 533         if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs))
 534                 return;
 535
 536         pr_alert("8<--- cut here ---\n");
 537         pr_alert("Unhandled fault: %s (0x%03x) at 0x%08lx\n",
 538                 inf->name, fsr, addr);
 539         show_pte(KERN_ALERT, current->mm, addr);
 540
 541         arm_notify_die("", regs, inf->sig, inf->code, (void __user *)addr,
 542                        fsr, 0);
 543 }
 544
 545 void __init
 546 hook_ifault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *),
 547                  int sig, int code, const char *name)
 548 {
 549         if (nr < 0 || nr >= ARRAY_SIZE(ifsr_info))
 550                 BUG();
 551
 552         ifsr_info[nr].fn   = fn;
 553         ifsr_info[nr].sig  = sig;
 554         ifsr_info[nr].code = code;
 555         ifsr_info[nr].name = name;
 556 }
 557
 558 asmlinkage void
 559 do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs)
 560 {
 561         const struct fsr_info *inf = ifsr_info + fsr_fs(ifsr);
 562
 563         if (!inf->fn(addr, ifsr | FSR_LNX_PF, regs))
 564                 return;
 565
 566         pr_alert("Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n",
 567                 inf->name, ifsr, addr);
 568
 569         arm_notify_die("", regs, inf->sig, inf->code, (void __user *)addr,
 570                        ifsr, 0);
 571 }
 572
 573 /*
 574  * Abort handler to be used only during first unmasking of asynchronous aborts
 575  * on the boot CPU. This makes sure that the machine will not die if the
 576  * firmware/bootloader left an imprecise abort pending for us to trip over.
 577  */
 578 static int __init early_abort_handler(unsigned long addr, unsigned int fsr,
 579                                       struct pt_regs *regs)
 580 {
 581         pr_warn("Hit pending asynchronous external abort (FSR=0x%08x) during "
 582                 "first unmask, this is most likely caused by a "
 583                 "firmware/bootloader bug.\n", fsr);
 584
 585         return 0;
 586 }
 587
 588 void __init early_abt_enable(void)
 589 {
 590         fsr_info[FSR_FS_AEA].fn = early_abort_handler;
 591         local_abt_enable();
 592         fsr_info[FSR_FS_AEA].fn = do_bad;
 593 }
 594
 595 #ifndef CONFIG_ARM_LPAE
 596 static int __init exceptions_init(void)
 597 {
 598         if (cpu_architecture() >= CPU_ARCH_ARMv6) {
 599                 hook_fault_code(4, do_translation_fault, SIGSEGV, SEGV_MAPERR,
 600                                 "I-cache maintenance fault");
 601         }
 602
 603         if (cpu_architecture() >= CPU_ARCH_ARMv7) {
 604                 /*
 605                  * TODO: Access flag faults introduced in ARMv6K.
 606                  * Runtime check for 'K' extension is needed
 607                  */
 608                 hook_fault_code(3, do_bad, SIGSEGV, SEGV_MAPERR,
 609                                 "section access flag fault");
 610                 hook_fault_code(6, do_bad, SIGSEGV, SEGV_MAPERR,
 611                                 "section access flag fault");
 612         }
 613
 614         return 0;
 615 }
 616
 617 arch_initcall(exceptions_init);
 618 #endif