sys/uvm/uvm_emap.c

   1 /*      $NetBSD: uvm_emap.c,v 1.5 2009/08/29 00:06:43 rmind Exp $       */
   2
   3 /*-
   4  * Copyright (c) 2009 The NetBSD Foundation, Inc.
   5  * All rights reserved.
   6  *
   7  * This code is derived from software contributed to The NetBSD Foundation
   8  * by Mindaugas Rasiukevicius and Andrew Doran.
   9  *
  10  * Redistribution and use in source and binary forms, with or without
  11  * modification, are permitted provided that the following conditions
  12  * are met:
  13  * 1. Redistributions of source code must retain the above copyright
  14  *    notice, this list of conditions and the following disclaimer.
  15  * 2. Redistributions in binary form must reproduce the above copyright
  16  *    notice, this list of conditions and the following disclaimer in the
  17  *    documentation and/or other materials provided with the distribution.
  18  *
  19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  29  * POSSIBILITY OF SUCH DAMAGE.
  30  */
  31
  32 /*
  33  * UVM ephemeral mapping interface.
  34  *
  35  * Generic (more expensive) stubs are implemented for architectures which
  36  * do not support pmap.
  37  *
  38  * Note that uvm_emap_update() is called from lower pmap(9) layer, while
  39  * other functions call to pmap(9).  Typical pattern of update in pmap:
  40  *
  41  *      u_int gen = uvm_emap_gen_return();
  42  *      tlbflush();
  43  *      uvm_emap_update();
  44  *
  45  * It is also used from IPI context, therefore functions must safe.
  46  */
  47
  48 #include <sys/cdefs.h>
  49 __KERNEL_RCSID(0, "$NetBSD: uvm_emap.c,v 1.5 2009/08/29 00:06:43 rmind Exp $");
  50
  51 #include <sys/param.h>
  52 #include <sys/kernel.h>
  53
  54 #include <sys/atomic.h>
  55 #include <sys/lwp.h>
  56 #include <sys/vmem.h>
  57 #include <sys/types.h>
  58
  59 #include <uvm/uvm.h>
  60 #include <uvm/uvm_extern.h>
  61
  62 /* XXX: Arbitrary. */
  63 #ifdef _LP64
  64 #define UVM_EMAP_SIZE           (128 * 1024 * 1024)     /* 128 MB */
  65 #else
  66 #define UVM_EMAP_SIZE           (32 * 1024 * 1024)      /*  32 MB */
  67 #endif
  68
  69 static u_int            _uvm_emap_gen[COHERENCY_UNIT - sizeof(u_int)]
  70     __aligned(COHERENCY_UNIT);
  71
  72 #define uvm_emap_gen    (_uvm_emap_gen[0])
  73
  74 u_int                   uvm_emap_size = UVM_EMAP_SIZE;
  75 static vaddr_t          uvm_emap_va;
  76 static vmem_t *         uvm_emap_vmem;
  77
  78 /*
  79  * uvm_emap_init: initialize subsystem.
  80  */
  81 void
  82 uvm_emap_sysinit(void)
  83 {
  84         struct uvm_cpu *ucpu;
  85         size_t qmax;
  86         u_int i;
  87
  88         uvm_emap_size = roundup(uvm_emap_size, PAGE_SIZE);
  89         qmax = 16 * PAGE_SIZE;
  90 #if 0
  91         uvm_emap_va = uvm_km_alloc(kernel_map, uvm_emap_size, 0,
  92             UVM_KMF_VAONLY | UVM_KMF_WAITVA);
  93         if (uvm_emap_va == 0) {
  94                 panic("uvm_emap_init: KVA allocation failed");
  95         }
  96
  97         uvm_emap_vmem = vmem_create("emap", uvm_emap_va, uvm_emap_size,
  98             PAGE_SIZE, NULL, NULL, NULL, qmax, VM_SLEEP, IPL_NONE);
  99         if (uvm_emap_vmem == NULL) {
 100                 panic("uvm_emap_init: vmem creation failed");
 101         }
 102 #else
 103         uvm_emap_va = 0;
 104         uvm_emap_vmem = NULL;
 105 #endif
 106         /* Initial generation value is 1. */
 107         uvm_emap_gen = 1;
 108         for (i = 0; i < MAXCPUS; i++) {
 109                 ucpu = &uvm.cpus[i];
 110                 ucpu->emap_gen = 1;
 111         }
 112 }
 113
 114 /*
 115  * uvm_emap_alloc: allocate a window.
 116  */
 117 vaddr_t
 118 uvm_emap_alloc(vsize_t size, bool waitok)
 119 {
 120
 121         KASSERT(size > 0);
 122         KASSERT(round_page(size) == size);
 123
 124         return vmem_alloc(uvm_emap_vmem, size,
 125             VM_INSTANTFIT | (waitok ? VM_SLEEP : VM_NOSLEEP));
 126 }
 127
 128 /*
 129  * uvm_emap_free: free a window.
 130  */
 131 void
 132 uvm_emap_free(vaddr_t va, size_t size)
 133 {
 134
 135         KASSERT(va >= uvm_emap_va);
 136         KASSERT(size <= uvm_emap_size);
 137         KASSERT(va + size <= uvm_emap_va + uvm_emap_size);
 138
 139         vmem_free(uvm_emap_vmem, va, size);
 140 }
 141
 142 #ifdef __HAVE_PMAP_EMAP
 143
 144 /*
 145  * uvm_emap_enter: enter a new mapping, without TLB flush.
 146  */
 147 void
 148 uvm_emap_enter(vaddr_t va, struct vm_page **pgs, u_int npages)
 149 {
 150         paddr_t pa;
 151         u_int n;
 152
 153         for (n = 0; n < npages; n++, va += PAGE_SIZE) {
 154                 pa = VM_PAGE_TO_PHYS(pgs[n]);
 155                 pmap_emap_enter(va, pa, VM_PROT_READ);
 156         }
 157 }
 158
 159 /*
 160  * uvm_emap_remove: remove a mapping.
 161  */
 162 void
 163 uvm_emap_remove(vaddr_t sva, vsize_t len)
 164 {
 165
 166         pmap_emap_remove(sva, len);
 167 }
 168
 169 /*
 170  * uvm_emap_gen_return: get the global generation number.
 171  *
 172  * => can be called from IPI handler, therefore function must be safe.
 173  */
 174 u_int
 175 uvm_emap_gen_return(void)
 176 {
 177         u_int gen;
 178
 179         gen = uvm_emap_gen;
 180         if (__predict_false(gen == UVM_EMAP_INACTIVE)) {
 181                 /*
 182                  * Instead of looping, just increase in our side.
 183                  * Other thread could race and increase it again,
 184                  * but without any negative effect.
 185                  */
 186                 gen = atomic_inc_uint_nv(&uvm_emap_gen);
 187         }
 188         KASSERT(gen != UVM_EMAP_INACTIVE);
 189         return gen;
 190 }
 191
 192 /*
 193  * uvm_emap_switch: if the CPU is 'behind' the LWP in emap visibility,
 194  * perform TLB flush and thus update the local view.  Main purpose is
 195  * to handle kernel preemption, while emap is in use.
 196  *
 197  * => called from mi_switch(), when LWP returns after block or preempt.
 198  */
 199 void
 200 uvm_emap_switch(lwp_t *l)
 201 {
 202         struct uvm_cpu *ucpu;
 203         u_int curgen, gen;
 204
 205         KASSERT(kpreempt_disabled());
 206
 207         /* If LWP did not use emap, then nothing to do. */
 208         if (__predict_true(l->l_emap_gen == UVM_EMAP_INACTIVE)) {
 209                 return;
 210         }
 211
 212         /*
 213          * No need to synchronise if generation number of current CPU is
 214          * newer than the number of this LWP.
 215          *
 216          * This test assumes two's complement arithmetic and allows
 217          * ~2B missed updates before it will produce bad results.
 218          */
 219         ucpu = curcpu()->ci_data.cpu_uvm;
 220         curgen = ucpu->emap_gen;
 221         gen = l->l_emap_gen;
 222         if (__predict_true((signed int)(curgen - gen) >= 0)) {
 223                 return;
 224         }
 225
 226         /*
 227          * See comments in uvm_emap_consume() about memory
 228          * barriers and race conditions.
 229          */
 230         curgen = uvm_emap_gen_return();
 231         pmap_emap_sync(false);
 232         ucpu->emap_gen = curgen;
 233 }
 234
 235 /*
 236  * uvm_emap_consume: update the current CPU and LWP to the given generation
 237  * of the emap.  In a case of LWP migration to a different CPU after block
 238  * or preempt, uvm_emap_switch() will synchronise.
 239  *
 240  * => may be called from both interrupt and thread context.
 241  */
 242 void
 243 uvm_emap_consume(u_int gen)
 244 {
 245         struct cpu_info *ci;
 246         struct uvm_cpu *ucpu;
 247         lwp_t *l = curlwp;
 248         u_int curgen;
 249
 250         if (gen == UVM_EMAP_INACTIVE) {
 251                 return;
 252         }
 253
 254         /*
 255          * No need to synchronise if generation number of current CPU is
 256          * newer than the number of this LWP.
 257          *
 258          * This test assumes two's complement arithmetic and allows
 259          * ~2B missed updates before it will produce bad results.
 260          */
 261         KPREEMPT_DISABLE(l);
 262         ci = l->l_cpu;
 263         ucpu = ci->ci_data.cpu_uvm;
 264         if (__predict_true((signed int)(ucpu->emap_gen - gen) >= 0)) {
 265                 l->l_emap_gen = ucpu->emap_gen;
 266                 KPREEMPT_ENABLE(l);
 267                 return;
 268         }
 269
 270         /*
 271          * Record the current generation _before_ issuing the TLB flush.
 272          * No need for a memory barrier before, as reading a stale value
 273          * for uvm_emap_gen is not a problem.
 274          *
 275          * pmap_emap_sync() must implicitly perform a full memory barrier,
 276          * which prevents us from fetching a value from after the TLB flush
 277          * has occurred (which would be bad).
 278          *
 279          * We can race with an interrupt on the current CPU updating the
 280          * counter to a newer value.  This could cause us to set a stale
 281          * value into ucpu->emap_gen, overwriting a newer update from the
 282          * interrupt.  However, it does not matter since:
 283          *  (1) Interrupts always run to completion or block.
 284          *  (2) Interrupts will only ever install a newer value and,
 285          *  (3) We will roll the value forward later.
 286          */
 287         curgen = uvm_emap_gen_return();
 288         pmap_emap_sync(true);
 289         ucpu->emap_gen = curgen;
 290         l->l_emap_gen = curgen;
 291         KASSERT((signed int)(curgen - gen) >= 0);
 292         KPREEMPT_ENABLE(l);
 293 }
 294
 295 /*
 296  * uvm_emap_produce: increment emap generation counter.
 297  *
 298  * => pmap updates must be globally visible.
 299  * => caller must have already entered mappings.
 300  * => may be called from both interrupt and thread context.
 301  */
 302 u_int
 303 uvm_emap_produce(void)
 304 {
 305         u_int gen;
 306 again:
 307         gen = atomic_inc_uint_nv(&uvm_emap_gen);
 308         if (__predict_false(gen == UVM_EMAP_INACTIVE)) {
 309                 goto again;
 310         }
 311         return gen;
 312 }
 313
 314 /*
 315  * uvm_emap_update: update global emap generation number for current CPU.
 316  *
 317  * Function is called by MD code (eg. pmap) to take advantage of TLB flushes
 318  * initiated for other reasons, that sync the emap as a side effect.  Note
 319  * update should be performed before the actual TLB flush, to avoid race
 320  * with newly generated number.
 321  *
 322  * => can be called from IPI handler, therefore function must be safe.
 323  * => should be called _after_ TLB flush.
 324  * => emap generation number should be taken _before_ TLB flush.
 325  * => must be called with preemption disabled.
 326  */
 327 void
 328 uvm_emap_update(u_int gen)
 329 {
 330         struct uvm_cpu *ucpu;
 331
 332         /*
 333          * See comments in uvm_emap_consume() about memory barriers and
 334          * race conditions.  Store is atomic if emap_gen size is word.
 335          */
 336         CTASSERT(sizeof(ucpu->emap_gen) == sizeof(int));
 337         /* XXX: KASSERT(kpreempt_disabled()); */
 338
 339         ucpu = curcpu()->ci_data.cpu_uvm;
 340         ucpu->emap_gen = gen;
 341 }
 342
 343 #else
 344
 345 /*
 346  * Stubs for architectures which do not support emap.
 347  */
 348
 349 void
 350 uvm_emap_enter(vaddr_t va, struct vm_page **pgs, u_int npages)
 351 {
 352         paddr_t pa;
 353         u_int n;
 354
 355         for (n = 0; n < npages; n++, va += PAGE_SIZE) {
 356                 pa = VM_PAGE_TO_PHYS(pgs[n]);
 357                 pmap_kenter_pa(va, pa, VM_PROT_READ, 0);
 358         }
 359         pmap_update(pmap_kernel());
 360 }
 361
 362 void
 363 uvm_emap_remove(vaddr_t sva, vsize_t len)
 364 {
 365
 366         pmap_kremove(sva, len);
 367         pmap_update(pmap_kernel());
 368 }
 369
 370 #endif