arch/powerpc/mm/nohash/mmu_context.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * This file contains the routines for handling the MMU on those
   4  * PowerPC implementations where the MMU is not using the hash
   5  * table, such as 8xx, 4xx, BookE's etc...
   6  *
   7  * Copyright 2008 Ben Herrenschmidt <benh@kernel.crashing.org>
   8  *                IBM Corp.
   9  *
  10  *  Derived from previous arch/powerpc/mm/mmu_context.c
  11  *  and arch/powerpc/include/asm/mmu_context.h
  12  *
  13  * TODO:
  14  *
  15  *   - The global context lock will not scale very well
  16  *   - The maps should be dynamically allocated to allow for processors
  17  *     that support more PID bits at runtime
  18  *   - Implement flush_tlb_mm() by making the context stale and picking
  19  *     a new one
  20  *   - More aggressively clear stale map bits and maybe find some way to
  21  *     also clear mm->cpu_vm_mask bits when processes are migrated
  22  */
  23
  24 #include <linux/kernel.h>
  25 #include <linux/mm.h>
  26 #include <linux/init.h>
  27 #include <linux/spinlock.h>
  28 #include <linux/memblock.h>
  29 #include <linux/notifier.h>
  30 #include <linux/cpu.h>
  31 #include <linux/slab.h>
  32
  33 #include <asm/mmu_context.h>
  34 #include <asm/tlbflush.h>
  35 #include <asm/smp.h>
  36 #include <asm/kup.h>
  37
  38 #include <mm/mmu_decl.h>
  39
  40 /*
  41  * Room for two PTE table pointers, usually the kernel and current user
  42  * pointer to their respective root page table (pgdir).
  43  */
  44 void *abatron_pteptrs[2];
  45
  46 /*
  47  * The MPC8xx has only 16 contexts. We rotate through them on each task switch.
  48  * A better way would be to keep track of tasks that own contexts, and implement
  49  * an LRU usage. That way very active tasks don't always have to pay the TLB
  50  * reload overhead. The kernel pages are mapped shared, so the kernel can run on
  51  * behalf of any task that makes a kernel entry. Shared does not mean they are
  52  * not protected, just that the ASID comparison is not performed. -- Dan
  53  *
  54  * The IBM4xx has 256 contexts, so we can just rotate through these as a way of
  55  * "switching" contexts. If the TID of the TLB is zero, the PID/TID comparison
  56  * is disabled, so we can use a TID of zero to represent all kernel pages as
  57  * shared among all contexts. -- Dan
  58  *
  59  * The IBM 47x core supports 16-bit PIDs, thus 65535 contexts. We should
  60  * normally never have to steal though the facility is present if needed.
  61  * -- BenH
  62  */
  63 #define FIRST_CONTEXT 1
  64 #if defined(CONFIG_PPC_8xx)
  65 #define LAST_CONTEXT 16
  66 #elif defined(CONFIG_PPC_47x)
  67 #define LAST_CONTEXT 65535
  68 #else
  69 #define LAST_CONTEXT 255
  70 #endif
  71
  72 static unsigned int next_context, nr_free_contexts;
  73 static unsigned long *context_map;
  74 static unsigned long *stale_map[NR_CPUS];
  75 static struct mm_struct **context_mm;
  76 static DEFINE_RAW_SPINLOCK(context_lock);
  77
  78 #define CTX_MAP_SIZE    \
  79         (sizeof(unsigned long) * (LAST_CONTEXT / BITS_PER_LONG + 1))
  80
  81
  82 /* Steal a context from a task that has one at the moment.
  83  *
  84  * This is used when we are running out of available PID numbers
  85  * on the processors.
  86  *
  87  * This isn't an LRU system, it just frees up each context in
  88  * turn (sort-of pseudo-random replacement :).  This would be the
  89  * place to implement an LRU scheme if anyone was motivated to do it.
  90  *  -- paulus
  91  *
  92  * For context stealing, we use a slightly different approach for
  93  * SMP and UP. Basically, the UP one is simpler and doesn't use
  94  * the stale map as we can just flush the local CPU
  95  *  -- benh
  96  */
  97 static unsigned int steal_context_smp(unsigned int id)
  98 {
  99         struct mm_struct *mm;
 100         unsigned int cpu, max, i;
 101
 102         max = LAST_CONTEXT - FIRST_CONTEXT;
 103
 104         /* Attempt to free next_context first and then loop until we manage */
 105         while (max--) {
 106                 /* Pick up the victim mm */
 107                 mm = context_mm[id];
 108
 109                 /* We have a candidate victim, check if it's active, on SMP
 110                  * we cannot steal active contexts
 111                  */
 112                 if (mm->context.active) {
 113                         id++;
 114                         if (id > LAST_CONTEXT)
 115                                 id = FIRST_CONTEXT;
 116                         continue;
 117                 }
 118
 119                 /* Mark this mm has having no context anymore */
 120                 mm->context.id = MMU_NO_CONTEXT;
 121
 122                 /* Mark it stale on all CPUs that used this mm. For threaded
 123                  * implementations, we set it on all threads on each core
 124                  * represented in the mask. A future implementation will use
 125                  * a core map instead but this will do for now.
 126                  */
 127                 for_each_cpu(cpu, mm_cpumask(mm)) {
 128                         for (i = cpu_first_thread_sibling(cpu);
 129                              i <= cpu_last_thread_sibling(cpu); i++) {
 130                                 if (stale_map[i])
 131                                         __set_bit(id, stale_map[i]);
 132                         }
 133                         cpu = i - 1;
 134                 }
 135                 return id;
 136         }
 137
 138         /* This will happen if you have more CPUs than available contexts,
 139          * all we can do here is wait a bit and try again
 140          */
 141         raw_spin_unlock(&context_lock);
 142         cpu_relax();
 143         raw_spin_lock(&context_lock);
 144
 145         /* This will cause the caller to try again */
 146         return MMU_NO_CONTEXT;
 147 }
 148
 149 static unsigned int steal_all_contexts(void)
 150 {
 151         struct mm_struct *mm;
 152         int cpu = smp_processor_id();
 153         unsigned int id;
 154
 155         for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) {
 156                 /* Pick up the victim mm */
 157                 mm = context_mm[id];
 158
 159                 /* Mark this mm as having no context anymore */
 160                 mm->context.id = MMU_NO_CONTEXT;
 161                 if (id != FIRST_CONTEXT) {
 162                         context_mm[id] = NULL;
 163                         __clear_bit(id, context_map);
 164                 }
 165                 if (IS_ENABLED(CONFIG_SMP))
 166                         __clear_bit(id, stale_map[cpu]);
 167         }
 168
 169         /* Flush the TLB for all contexts (not to be used on SMP) */
 170         _tlbil_all();
 171
 172         nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT;
 173
 174         return FIRST_CONTEXT;
 175 }
 176
 177 /* Note that this will also be called on SMP if all other CPUs are
 178  * offlined, which means that it may be called for cpu != 0. For
 179  * this to work, we somewhat assume that CPUs that are onlined
 180  * come up with a fully clean TLB (or are cleaned when offlined)
 181  */
 182 static unsigned int steal_context_up(unsigned int id)
 183 {
 184         struct mm_struct *mm;
 185         int cpu = smp_processor_id();
 186
 187         /* Pick up the victim mm */
 188         mm = context_mm[id];
 189
 190         /* Flush the TLB for that context */
 191         local_flush_tlb_mm(mm);
 192
 193         /* Mark this mm has having no context anymore */
 194         mm->context.id = MMU_NO_CONTEXT;
 195
 196         /* XXX This clear should ultimately be part of local_flush_tlb_mm */
 197         if (IS_ENABLED(CONFIG_SMP))
 198                 __clear_bit(id, stale_map[cpu]);
 199
 200         return id;
 201 }
 202
 203 static void set_context(unsigned long id, pgd_t *pgd)
 204 {
 205         if (IS_ENABLED(CONFIG_PPC_8xx)) {
 206                 s16 offset = (s16)(__pa(swapper_pg_dir));
 207
 208                 /*
 209                  * Register M_TWB will contain base address of level 1 table minus the
 210                  * lower part of the kernel PGDIR base address, so that all accesses to
 211                  * level 1 table are done relative to lower part of kernel PGDIR base
 212                  * address.
 213                  */
 214                 mtspr(SPRN_M_TWB, __pa(pgd) - offset);
 215
 216                 /* Update context */
 217                 mtspr(SPRN_M_CASID, id - 1);
 218
 219                 /* sync */
 220                 mb();
 221         } else if (kuap_is_disabled()) {
 222                 mtspr(SPRN_PID, id);
 223                 isync();
 224         }
 225 }
 226
 227 void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
 228                         struct task_struct *tsk)
 229 {
 230         unsigned int id;
 231         unsigned int i, cpu = smp_processor_id();
 232         unsigned long *map;
 233
 234         /* No lockless fast path .. yet */
 235         raw_spin_lock(&context_lock);
 236
 237         if (IS_ENABLED(CONFIG_SMP)) {
 238                 /* Mark us active and the previous one not anymore */
 239                 next->context.active++;
 240                 if (prev) {
 241                         WARN_ON(prev->context.active < 1);
 242                         prev->context.active--;
 243                 }
 244         }
 245
 246  again:
 247
 248         /* If we already have a valid assigned context, skip all that */
 249         id = next->context.id;
 250         if (likely(id != MMU_NO_CONTEXT))
 251                 goto ctxt_ok;
 252
 253         /* We really don't have a context, let's try to acquire one */
 254         id = next_context;
 255         if (id > LAST_CONTEXT)
 256                 id = FIRST_CONTEXT;
 257         map = context_map;
 258
 259         /* No more free contexts, let's try to steal one */
 260         if (nr_free_contexts == 0) {
 261                 if (num_online_cpus() > 1) {
 262                         id = steal_context_smp(id);
 263                         if (id == MMU_NO_CONTEXT)
 264                                 goto again;
 265                         goto stolen;
 266                 }
 267                 if (IS_ENABLED(CONFIG_PPC_8xx))
 268                         id = steal_all_contexts();
 269                 else
 270                         id = steal_context_up(id);
 271                 goto stolen;
 272         }
 273         nr_free_contexts--;
 274
 275         /* We know there's at least one free context, try to find it */
 276         while (__test_and_set_bit(id, map)) {
 277                 id = find_next_zero_bit(map, LAST_CONTEXT+1, id);
 278                 if (id > LAST_CONTEXT)
 279                         id = FIRST_CONTEXT;
 280         }
 281  stolen:
 282         next_context = id + 1;
 283         context_mm[id] = next;
 284         next->context.id = id;
 285
 286  ctxt_ok:
 287
 288         /* If that context got marked stale on this CPU, then flush the
 289          * local TLB for it and unmark it before we use it
 290          */
 291         if (IS_ENABLED(CONFIG_SMP) && test_bit(id, stale_map[cpu])) {
 292                 local_flush_tlb_mm(next);
 293
 294                 /* XXX This clear should ultimately be part of local_flush_tlb_mm */
 295                 for (i = cpu_first_thread_sibling(cpu);
 296                      i <= cpu_last_thread_sibling(cpu); i++) {
 297                         if (stale_map[i])
 298                                 __clear_bit(id, stale_map[i]);
 299                 }
 300         }
 301
 302         /* Flick the MMU and release lock */
 303         if (IS_ENABLED(CONFIG_BDI_SWITCH))
 304                 abatron_pteptrs[1] = next->pgd;
 305         set_context(id, next->pgd);
 306 #if defined(CONFIG_BOOKE) && defined(CONFIG_PPC_KUAP)
 307         tsk->thread.pid = id;
 308 #endif
 309         raw_spin_unlock(&context_lock);
 310 }
 311
 312 /*
 313  * Set up the context for a new address space.
 314  */
 315 int init_new_context(struct task_struct *t, struct mm_struct *mm)
 316 {
 317         mm->context.id = MMU_NO_CONTEXT;
 318         mm->context.active = 0;
 319         pte_frag_set(&mm->context, NULL);
 320         return 0;
 321 }
 322
 323 /*
 324  * We're finished using the context for an address space.
 325  */
 326 void destroy_context(struct mm_struct *mm)
 327 {
 328         unsigned long flags;
 329         unsigned int id;
 330
 331         if (mm->context.id == MMU_NO_CONTEXT)
 332                 return;
 333
 334         WARN_ON(mm->context.active != 0);
 335
 336         raw_spin_lock_irqsave(&context_lock, flags);
 337         id = mm->context.id;
 338         if (id != MMU_NO_CONTEXT) {
 339                 __clear_bit(id, context_map);
 340                 mm->context.id = MMU_NO_CONTEXT;
 341                 context_mm[id] = NULL;
 342                 nr_free_contexts++;
 343         }
 344         raw_spin_unlock_irqrestore(&context_lock, flags);
 345 }
 346
 347 static int mmu_ctx_cpu_prepare(unsigned int cpu)
 348 {
 349         /* We don't touch CPU 0 map, it's allocated at aboot and kept
 350          * around forever
 351          */
 352         if (cpu == boot_cpuid)
 353                 return 0;
 354
 355         stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL);
 356         return 0;
 357 }
 358
 359 static int mmu_ctx_cpu_dead(unsigned int cpu)
 360 {
 361 #ifdef CONFIG_HOTPLUG_CPU
 362         if (cpu == boot_cpuid)
 363                 return 0;
 364
 365         kfree(stale_map[cpu]);
 366         stale_map[cpu] = NULL;
 367
 368         /* We also clear the cpu_vm_mask bits of CPUs going away */
 369         clear_tasks_mm_cpumask(cpu);
 370 #endif
 371         return 0;
 372 }
 373
 374 /*
 375  * Initialize the context management stuff.
 376  */
 377 void __init mmu_context_init(void)
 378 {
 379         /* Mark init_mm as being active on all possible CPUs since
 380          * we'll get called with prev == init_mm the first time
 381          * we schedule on a given CPU
 382          */
 383         init_mm.context.active = NR_CPUS;
 384
 385         /*
 386          * Allocate the maps used by context management
 387          */
 388         context_map = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES);
 389         if (!context_map)
 390                 panic("%s: Failed to allocate %zu bytes\n", __func__,
 391                       CTX_MAP_SIZE);
 392         context_mm = memblock_alloc(sizeof(void *) * (LAST_CONTEXT + 1),
 393                                     SMP_CACHE_BYTES);
 394         if (!context_mm)
 395                 panic("%s: Failed to allocate %zu bytes\n", __func__,
 396                       sizeof(void *) * (LAST_CONTEXT + 1));
 397         if (IS_ENABLED(CONFIG_SMP)) {
 398                 stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES);
 399                 if (!stale_map[boot_cpuid])
 400                         panic("%s: Failed to allocate %zu bytes\n", __func__,
 401                               CTX_MAP_SIZE);
 402
 403                 cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE,
 404                                           "powerpc/mmu/ctx:prepare",
 405                                           mmu_ctx_cpu_prepare, mmu_ctx_cpu_dead);
 406         }
 407
 408         printk(KERN_INFO
 409                "MMU: Allocated %zu bytes of context maps for %d contexts\n",
 410                2 * CTX_MAP_SIZE + (sizeof(void *) * (LAST_CONTEXT + 1)),
 411                LAST_CONTEXT - FIRST_CONTEXT + 1);
 412
 413         /*
 414          * Some processors have too few contexts to reserve one for
 415          * init_mm, and require using context 0 for a normal task.
 416          * Other processors reserve the use of context zero for the kernel.
 417          * This code assumes FIRST_CONTEXT < 32.
 418          */
 419         context_map[0] = (1 << FIRST_CONTEXT) - 1;
 420         next_context = FIRST_CONTEXT;
 421         nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT + 1;
 422 }