arch/mips/kvm/mmu.c

   1 /*
   2  * This file is subject to the terms and conditions of the GNU General Public
   3  * License.  See the file "COPYING" in the main directory of this archive
   4  * for more details.
   5  *
   6  * KVM/MIPS MMU handling in the KVM module.
   7  *
   8  * Copyright (C) 2012  MIPS Technologies, Inc.  All rights reserved.
   9  * Authors: Sanjay Lal <sanjayl@kymasys.com>
  10  */
  11
  12 #include <linux/highmem.h>
  13 #include <linux/kvm_host.h>
  14 #include <linux/uaccess.h>
  15 #include <asm/mmu_context.h>
  16 #include <asm/pgalloc.h>
  17
  18 /*
  19  * KVM_MMU_CACHE_MIN_PAGES is the number of GPA page table translation levels
  20  * for which pages need to be cached.
  21  */
  22 #if defined(__PAGETABLE_PMD_FOLDED)
  23 #define KVM_MMU_CACHE_MIN_PAGES 1
  24 #else
  25 #define KVM_MMU_CACHE_MIN_PAGES 2
  26 #endif
  27
  28 void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
  29 {
  30         kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
  31 }
  32
  33 /**
  34  * kvm_pgd_init() - Initialise KVM GPA page directory.
  35  * @page:       Pointer to page directory (PGD) for KVM GPA.
  36  *
  37  * Initialise a KVM GPA page directory with pointers to the invalid table, i.e.
  38  * representing no mappings. This is similar to pgd_init(), however it
  39  * initialises all the page directory pointers, not just the ones corresponding
  40  * to the userland address space (since it is for the guest physical address
  41  * space rather than a virtual address space).
  42  */
  43 static void kvm_pgd_init(void *page)
  44 {
  45         unsigned long *p, *end;
  46         unsigned long entry;
  47
  48 #ifdef __PAGETABLE_PMD_FOLDED
  49         entry = (unsigned long)invalid_pte_table;
  50 #else
  51         entry = (unsigned long)invalid_pmd_table;
  52 #endif
  53
  54         p = (unsigned long *)page;
  55         end = p + PTRS_PER_PGD;
  56
  57         do {
  58                 p[0] = entry;
  59                 p[1] = entry;
  60                 p[2] = entry;
  61                 p[3] = entry;
  62                 p[4] = entry;
  63                 p += 8;
  64                 p[-3] = entry;
  65                 p[-2] = entry;
  66                 p[-1] = entry;
  67         } while (p != end);
  68 }
  69
  70 /**
  71  * kvm_pgd_alloc() - Allocate and initialise a KVM GPA page directory.
  72  *
  73  * Allocate a blank KVM GPA page directory (PGD) for representing guest physical
  74  * to host physical page mappings.
  75  *
  76  * Returns:     Pointer to new KVM GPA page directory.
  77  *              NULL on allocation failure.
  78  */
  79 pgd_t *kvm_pgd_alloc(void)
  80 {
  81         pgd_t *ret;
  82
  83         ret = (pgd_t *)__get_free_pages(GFP_KERNEL, PGD_TABLE_ORDER);
  84         if (ret)
  85                 kvm_pgd_init(ret);
  86
  87         return ret;
  88 }
  89
  90 /**
  91  * kvm_mips_walk_pgd() - Walk page table with optional allocation.
  92  * @pgd:        Page directory pointer.
  93  * @addr:       Address to index page table using.
  94  * @cache:      MMU page cache to allocate new page tables from, or NULL.
  95  *
  96  * Walk the page tables pointed to by @pgd to find the PTE corresponding to the
  97  * address @addr. If page tables don't exist for @addr, they will be created
  98  * from the MMU cache if @cache is not NULL.
  99  *
 100  * Returns:     Pointer to pte_t corresponding to @addr.
 101  *              NULL if a page table doesn't exist for @addr and !@cache.
 102  *              NULL if a page table allocation failed.
 103  */
 104 static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache,
 105                                 unsigned long addr)
 106 {
 107         p4d_t *p4d;
 108         pud_t *pud;
 109         pmd_t *pmd;
 110
 111         pgd += pgd_index(addr);
 112         if (pgd_none(*pgd)) {
 113                 /* Not used on MIPS yet */
 114                 BUG();
 115                 return NULL;
 116         }
 117         p4d = p4d_offset(pgd, addr);
 118         pud = pud_offset(p4d, addr);
 119         if (pud_none(*pud)) {
 120                 pmd_t *new_pmd;
 121
 122                 if (!cache)
 123                         return NULL;
 124                 new_pmd = kvm_mmu_memory_cache_alloc(cache);
 125                 pmd_init(new_pmd);
 126                 pud_populate(NULL, pud, new_pmd);
 127         }
 128         pmd = pmd_offset(pud, addr);
 129         if (pmd_none(*pmd)) {
 130                 pte_t *new_pte;
 131
 132                 if (!cache)
 133                         return NULL;
 134                 new_pte = kvm_mmu_memory_cache_alloc(cache);
 135                 clear_page(new_pte);
 136                 pmd_populate_kernel(NULL, pmd, new_pte);
 137         }
 138         return pte_offset_kernel(pmd, addr);
 139 }
 140
 141 /* Caller must hold kvm->mm_lock */
 142 static pte_t *kvm_mips_pte_for_gpa(struct kvm *kvm,
 143                                    struct kvm_mmu_memory_cache *cache,
 144                                    unsigned long addr)
 145 {
 146         return kvm_mips_walk_pgd(kvm->arch.gpa_mm.pgd, cache, addr);
 147 }
 148
 149 /*
 150  * kvm_mips_flush_gpa_{pte,pmd,pud,pgd,pt}.
 151  * Flush a range of guest physical address space from the VM's GPA page tables.
 152  */
 153
 154 static bool kvm_mips_flush_gpa_pte(pte_t *pte, unsigned long start_gpa,
 155                                    unsigned long end_gpa)
 156 {
 157         int i_min = pte_index(start_gpa);
 158         int i_max = pte_index(end_gpa);
 159         bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PTE - 1);
 160         int i;
 161
 162         for (i = i_min; i <= i_max; ++i) {
 163                 if (!pte_present(pte[i]))
 164                         continue;
 165
 166                 set_pte(pte + i, __pte(0));
 167         }
 168         return safe_to_remove;
 169 }
 170
 171 static bool kvm_mips_flush_gpa_pmd(pmd_t *pmd, unsigned long start_gpa,
 172                                    unsigned long end_gpa)
 173 {
 174         pte_t *pte;
 175         unsigned long end = ~0ul;
 176         int i_min = pmd_index(start_gpa);
 177         int i_max = pmd_index(end_gpa);
 178         bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1);
 179         int i;
 180
 181         for (i = i_min; i <= i_max; ++i, start_gpa = 0) {
 182                 if (!pmd_present(pmd[i]))
 183                         continue;
 184
 185                 pte = pte_offset_kernel(pmd + i, 0);
 186                 if (i == i_max)
 187                         end = end_gpa;
 188
 189                 if (kvm_mips_flush_gpa_pte(pte, start_gpa, end)) {
 190                         pmd_clear(pmd + i);
 191                         pte_free_kernel(NULL, pte);
 192                 } else {
 193                         safe_to_remove = false;
 194                 }
 195         }
 196         return safe_to_remove;
 197 }
 198
 199 static bool kvm_mips_flush_gpa_pud(pud_t *pud, unsigned long start_gpa,
 200                                    unsigned long end_gpa)
 201 {
 202         pmd_t *pmd;
 203         unsigned long end = ~0ul;
 204         int i_min = pud_index(start_gpa);
 205         int i_max = pud_index(end_gpa);
 206         bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1);
 207         int i;
 208
 209         for (i = i_min; i <= i_max; ++i, start_gpa = 0) {
 210                 if (!pud_present(pud[i]))
 211                         continue;
 212
 213                 pmd = pmd_offset(pud + i, 0);
 214                 if (i == i_max)
 215                         end = end_gpa;
 216
 217                 if (kvm_mips_flush_gpa_pmd(pmd, start_gpa, end)) {
 218                         pud_clear(pud + i);
 219                         pmd_free(NULL, pmd);
 220                 } else {
 221                         safe_to_remove = false;
 222                 }
 223         }
 224         return safe_to_remove;
 225 }
 226
 227 static bool kvm_mips_flush_gpa_pgd(pgd_t *pgd, unsigned long start_gpa,
 228                                    unsigned long end_gpa)
 229 {
 230         p4d_t *p4d;
 231         pud_t *pud;
 232         unsigned long end = ~0ul;
 233         int i_min = pgd_index(start_gpa);
 234         int i_max = pgd_index(end_gpa);
 235         bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PGD - 1);
 236         int i;
 237
 238         for (i = i_min; i <= i_max; ++i, start_gpa = 0) {
 239                 if (!pgd_present(pgd[i]))
 240                         continue;
 241
 242                 p4d = p4d_offset(pgd, 0);
 243                 pud = pud_offset(p4d + i, 0);
 244                 if (i == i_max)
 245                         end = end_gpa;
 246
 247                 if (kvm_mips_flush_gpa_pud(pud, start_gpa, end)) {
 248                         pgd_clear(pgd + i);
 249                         pud_free(NULL, pud);
 250                 } else {
 251                         safe_to_remove = false;
 252                 }
 253         }
 254         return safe_to_remove;
 255 }
 256
 257 /**
 258  * kvm_mips_flush_gpa_pt() - Flush a range of guest physical addresses.
 259  * @kvm:        KVM pointer.
 260  * @start_gfn:  Guest frame number of first page in GPA range to flush.
 261  * @end_gfn:    Guest frame number of last page in GPA range to flush.
 262  *
 263  * Flushes a range of GPA mappings from the GPA page tables.
 264  *
 265  * The caller must hold the @kvm->mmu_lock spinlock.
 266  *
 267  * Returns:     Whether its safe to remove the top level page directory because
 268  *              all lower levels have been removed.
 269  */
 270 bool kvm_mips_flush_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn)
 271 {
 272         return kvm_mips_flush_gpa_pgd(kvm->arch.gpa_mm.pgd,
 273                                       start_gfn << PAGE_SHIFT,
 274                                       end_gfn << PAGE_SHIFT);
 275 }
 276
 277 #define BUILD_PTE_RANGE_OP(name, op)                                    \
 278 static int kvm_mips_##name##_pte(pte_t *pte, unsigned long start,       \
 279                                  unsigned long end)                     \
 280 {                                                                       \
 281         int ret = 0;                                                    \
 282         int i_min = pte_index(start);                           \
 283         int i_max = pte_index(end);                                     \
 284         int i;                                                          \
 285         pte_t old, new;                                                 \
 286                                                                         \
 287         for (i = i_min; i <= i_max; ++i) {                              \
 288                 if (!pte_present(pte[i]))                               \
 289                         continue;                                       \
 290                                                                         \
 291                 old = pte[i];                                           \
 292                 new = op(old);                                          \
 293                 if (pte_val(new) == pte_val(old))                       \
 294                         continue;                                       \
 295                 set_pte(pte + i, new);                                  \
 296                 ret = 1;                                                \
 297         }                                                               \
 298         return ret;                                                     \
 299 }                                                                       \
 300                                                                         \
 301 /* returns true if anything was done */                                 \
 302 static int kvm_mips_##name##_pmd(pmd_t *pmd, unsigned long start,       \
 303                                  unsigned long end)                     \
 304 {                                                                       \
 305         int ret = 0;                                                    \
 306         pte_t *pte;                                                     \
 307         unsigned long cur_end = ~0ul;                                   \
 308         int i_min = pmd_index(start);                           \
 309         int i_max = pmd_index(end);                                     \
 310         int i;                                                          \
 311                                                                         \
 312         for (i = i_min; i <= i_max; ++i, start = 0) {                   \
 313                 if (!pmd_present(pmd[i]))                               \
 314                         continue;                                       \
 315                                                                         \
 316                 pte = pte_offset_kernel(pmd + i, 0);                            \
 317                 if (i == i_max)                                         \
 318                         cur_end = end;                                  \
 319                                                                         \
 320                 ret |= kvm_mips_##name##_pte(pte, start, cur_end);      \
 321         }                                                               \
 322         return ret;                                                     \
 323 }                                                                       \
 324                                                                         \
 325 static int kvm_mips_##name##_pud(pud_t *pud, unsigned long start,       \
 326                                  unsigned long end)                     \
 327 {                                                                       \
 328         int ret = 0;                                                    \
 329         pmd_t *pmd;                                                     \
 330         unsigned long cur_end = ~0ul;                                   \
 331         int i_min = pud_index(start);                           \
 332         int i_max = pud_index(end);                                     \
 333         int i;                                                          \
 334                                                                         \
 335         for (i = i_min; i <= i_max; ++i, start = 0) {                   \
 336                 if (!pud_present(pud[i]))                               \
 337                         continue;                                       \
 338                                                                         \
 339                 pmd = pmd_offset(pud + i, 0);                           \
 340                 if (i == i_max)                                         \
 341                         cur_end = end;                                  \
 342                                                                         \
 343                 ret |= kvm_mips_##name##_pmd(pmd, start, cur_end);      \
 344         }                                                               \
 345         return ret;                                                     \
 346 }                                                                       \
 347                                                                         \
 348 static int kvm_mips_##name##_pgd(pgd_t *pgd, unsigned long start,       \
 349                                  unsigned long end)                     \
 350 {                                                                       \
 351         int ret = 0;                                                    \
 352         p4d_t *p4d;                                                     \
 353         pud_t *pud;                                                     \
 354         unsigned long cur_end = ~0ul;                                   \
 355         int i_min = pgd_index(start);                                   \
 356         int i_max = pgd_index(end);                                     \
 357         int i;                                                          \
 358                                                                         \
 359         for (i = i_min; i <= i_max; ++i, start = 0) {                   \
 360                 if (!pgd_present(pgd[i]))                               \
 361                         continue;                                       \
 362                                                                         \
 363                 p4d = p4d_offset(pgd, 0);                               \
 364                 pud = pud_offset(p4d + i, 0);                           \
 365                 if (i == i_max)                                         \
 366                         cur_end = end;                                  \
 367                                                                         \
 368                 ret |= kvm_mips_##name##_pud(pud, start, cur_end);      \
 369         }                                                               \
 370         return ret;                                                     \
 371 }
 372
 373 /*
 374  * kvm_mips_mkclean_gpa_pt.
 375  * Mark a range of guest physical address space clean (writes fault) in the VM's
 376  * GPA page table to allow dirty page tracking.
 377  */
 378
 379 BUILD_PTE_RANGE_OP(mkclean, pte_mkclean)
 380
 381 /**
 382  * kvm_mips_mkclean_gpa_pt() - Make a range of guest physical addresses clean.
 383  * @kvm:        KVM pointer.
 384  * @start_gfn:  Guest frame number of first page in GPA range to flush.
 385  * @end_gfn:    Guest frame number of last page in GPA range to flush.
 386  *
 387  * Make a range of GPA mappings clean so that guest writes will fault and
 388  * trigger dirty page logging.
 389  *
 390  * The caller must hold the @kvm->mmu_lock spinlock.
 391  *
 392  * Returns:     Whether any GPA mappings were modified, which would require
 393  *              derived mappings (GVA page tables & TLB enties) to be
 394  *              invalidated.
 395  */
 396 int kvm_mips_mkclean_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn)
 397 {
 398         return kvm_mips_mkclean_pgd(kvm->arch.gpa_mm.pgd,
 399                                     start_gfn << PAGE_SHIFT,
 400                                     end_gfn << PAGE_SHIFT);
 401 }
 402
 403 /**
 404  * kvm_arch_mmu_enable_log_dirty_pt_masked() - write protect dirty pages
 405  * @kvm:        The KVM pointer
 406  * @slot:       The memory slot associated with mask
 407  * @gfn_offset: The gfn offset in memory slot
 408  * @mask:       The mask of dirty pages at offset 'gfn_offset' in this memory
 409  *              slot to be write protected
 410  *
 411  * Walks bits set in mask write protects the associated pte's. Caller must
 412  * acquire @kvm->mmu_lock.
 413  */
 414 void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
 415                 struct kvm_memory_slot *slot,
 416                 gfn_t gfn_offset, unsigned long mask)
 417 {
 418         gfn_t base_gfn = slot->base_gfn + gfn_offset;
 419         gfn_t start = base_gfn +  __ffs(mask);
 420         gfn_t end = base_gfn + __fls(mask);
 421
 422         kvm_mips_mkclean_gpa_pt(kvm, start, end);
 423 }
 424
 425 /*
 426  * kvm_mips_mkold_gpa_pt.
 427  * Mark a range of guest physical address space old (all accesses fault) in the
 428  * VM's GPA page table to allow detection of commonly used pages.
 429  */
 430
 431 BUILD_PTE_RANGE_OP(mkold, pte_mkold)
 432
 433 static int kvm_mips_mkold_gpa_pt(struct kvm *kvm, gfn_t start_gfn,
 434                                  gfn_t end_gfn)
 435 {
 436         return kvm_mips_mkold_pgd(kvm->arch.gpa_mm.pgd,
 437                                   start_gfn << PAGE_SHIFT,
 438                                   end_gfn << PAGE_SHIFT);
 439 }
 440
 441 bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
 442 {
 443         kvm_mips_flush_gpa_pt(kvm, range->start, range->end);
 444         return true;
 445 }
 446
 447 bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 448 {
 449         return kvm_mips_mkold_gpa_pt(kvm, range->start, range->end);
 450 }
 451
 452 bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 453 {
 454         gpa_t gpa = range->start << PAGE_SHIFT;
 455         pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
 456
 457         if (!gpa_pte)
 458                 return false;
 459         return pte_young(*gpa_pte);
 460 }
 461
 462 /**
 463  * _kvm_mips_map_page_fast() - Fast path GPA fault handler.
 464  * @vcpu:               VCPU pointer.
 465  * @gpa:                Guest physical address of fault.
 466  * @write_fault:        Whether the fault was due to a write.
 467  * @out_entry:          New PTE for @gpa (written on success unless NULL).
 468  * @out_buddy:          New PTE for @gpa's buddy (written on success unless
 469  *                      NULL).
 470  *
 471  * Perform fast path GPA fault handling, doing all that can be done without
 472  * calling into KVM. This handles marking old pages young (for idle page
 473  * tracking), and dirtying of clean pages (for dirty page logging).
 474  *
 475  * Returns:     0 on success, in which case we can update derived mappings and
 476  *              resume guest execution.
 477  *              -EFAULT on failure due to absent GPA mapping or write to
 478  *              read-only page, in which case KVM must be consulted.
 479  */
 480 static int _kvm_mips_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa,
 481                                    bool write_fault,
 482                                    pte_t *out_entry, pte_t *out_buddy)
 483 {
 484         struct kvm *kvm = vcpu->kvm;
 485         gfn_t gfn = gpa >> PAGE_SHIFT;
 486         pte_t *ptep;
 487         kvm_pfn_t pfn = 0;      /* silence bogus GCC warning */
 488         bool pfn_valid = false;
 489         int ret = 0;
 490
 491         spin_lock(&kvm->mmu_lock);
 492
 493         /* Fast path - just check GPA page table for an existing entry */
 494         ptep = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
 495         if (!ptep || !pte_present(*ptep)) {
 496                 ret = -EFAULT;
 497                 goto out;
 498         }
 499
 500         /* Track access to pages marked old */
 501         if (!pte_young(*ptep)) {
 502                 set_pte(ptep, pte_mkyoung(*ptep));
 503                 pfn = pte_pfn(*ptep);
 504                 pfn_valid = true;
 505                 /* call kvm_set_pfn_accessed() after unlock */
 506         }
 507         if (write_fault && !pte_dirty(*ptep)) {
 508                 if (!pte_write(*ptep)) {
 509                         ret = -EFAULT;
 510                         goto out;
 511                 }
 512
 513                 /* Track dirtying of writeable pages */
 514                 set_pte(ptep, pte_mkdirty(*ptep));
 515                 pfn = pte_pfn(*ptep);
 516                 mark_page_dirty(kvm, gfn);
 517                 kvm_set_pfn_dirty(pfn);
 518         }
 519
 520         if (out_entry)
 521                 *out_entry = *ptep;
 522         if (out_buddy)
 523                 *out_buddy = *ptep_buddy(ptep);
 524
 525 out:
 526         spin_unlock(&kvm->mmu_lock);
 527         if (pfn_valid)
 528                 kvm_set_pfn_accessed(pfn);
 529         return ret;
 530 }
 531
 532 /**
 533  * kvm_mips_map_page() - Map a guest physical page.
 534  * @vcpu:               VCPU pointer.
 535  * @gpa:                Guest physical address of fault.
 536  * @write_fault:        Whether the fault was due to a write.
 537  * @out_entry:          New PTE for @gpa (written on success unless NULL).
 538  * @out_buddy:          New PTE for @gpa's buddy (written on success unless
 539  *                      NULL).
 540  *
 541  * Handle GPA faults by creating a new GPA mapping (or updating an existing
 542  * one).
 543  *
 544  * This takes care of marking pages young or dirty (idle/dirty page tracking),
 545  * asking KVM for the corresponding PFN, and creating a mapping in the GPA page
 546  * tables. Derived mappings (GVA page tables and TLBs) must be handled by the
 547  * caller.
 548  *
 549  * Returns:     0 on success, in which case the caller may use the @out_entry
 550  *              and @out_buddy PTEs to update derived mappings and resume guest
 551  *              execution.
 552  *              -EFAULT if there is no memory region at @gpa or a write was
 553  *              attempted to a read-only memory region. This is usually handled
 554  *              as an MMIO access.
 555  */
 556 static int kvm_mips_map_page(struct kvm_vcpu *vcpu, unsigned long gpa,
 557                              bool write_fault,
 558                              pte_t *out_entry, pte_t *out_buddy)
 559 {
 560         struct kvm *kvm = vcpu->kvm;
 561         struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
 562         gfn_t gfn = gpa >> PAGE_SHIFT;
 563         int srcu_idx, err;
 564         kvm_pfn_t pfn;
 565         pte_t *ptep, entry;
 566         bool writeable;
 567         unsigned long prot_bits;
 568         unsigned long mmu_seq;
 569
 570         /* Try the fast path to handle old / clean pages */
 571         srcu_idx = srcu_read_lock(&kvm->srcu);
 572         err = _kvm_mips_map_page_fast(vcpu, gpa, write_fault, out_entry,
 573                                       out_buddy);
 574         if (!err)
 575                 goto out;
 576
 577         /* We need a minimum of cached pages ready for page table creation */
 578         err = kvm_mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES);
 579         if (err)
 580                 goto out;
 581
 582 retry:
 583         /*
 584          * Used to check for invalidations in progress, of the pfn that is
 585          * returned by pfn_to_pfn_prot below.
 586          */
 587         mmu_seq = kvm->mmu_invalidate_seq;
 588         /*
 589          * Ensure the read of mmu_invalidate_seq isn't reordered with PTE reads
 590          * in gfn_to_pfn_prot() (which calls get_user_pages()), so that we don't
 591          * risk the page we get a reference to getting unmapped before we have a
 592          * chance to grab the mmu_lock without mmu_invalidate_retry() noticing.
 593          *
 594          * This smp_rmb() pairs with the effective smp_wmb() of the combination
 595          * of the pte_unmap_unlock() after the PTE is zapped, and the
 596          * spin_lock() in kvm_mmu_notifier_invalidate_<page|range_end>() before
 597          * mmu_invalidate_seq is incremented.
 598          */
 599         smp_rmb();
 600
 601         /* Slow path - ask KVM core whether we can access this GPA */
 602         pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writeable);
 603         if (is_error_noslot_pfn(pfn)) {
 604                 err = -EFAULT;
 605                 goto out;
 606         }
 607
 608         spin_lock(&kvm->mmu_lock);
 609         /* Check if an invalidation has taken place since we got pfn */
 610         if (mmu_invalidate_retry(kvm, mmu_seq)) {
 611                 /*
 612                  * This can happen when mappings are changed asynchronously, but
 613                  * also synchronously if a COW is triggered by
 614                  * gfn_to_pfn_prot().
 615                  */
 616                 spin_unlock(&kvm->mmu_lock);
 617                 kvm_release_pfn_clean(pfn);
 618                 goto retry;
 619         }
 620
 621         /* Ensure page tables are allocated */
 622         ptep = kvm_mips_pte_for_gpa(kvm, memcache, gpa);
 623
 624         /* Set up the PTE */
 625         prot_bits = _PAGE_PRESENT | __READABLE | _page_cachable_default;
 626         if (writeable) {
 627                 prot_bits |= _PAGE_WRITE;
 628                 if (write_fault) {
 629                         prot_bits |= __WRITEABLE;
 630                         mark_page_dirty(kvm, gfn);
 631                         kvm_set_pfn_dirty(pfn);
 632                 }
 633         }
 634         entry = pfn_pte(pfn, __pgprot(prot_bits));
 635
 636         /* Write the PTE */
 637         set_pte(ptep, entry);
 638
 639         err = 0;
 640         if (out_entry)
 641                 *out_entry = *ptep;
 642         if (out_buddy)
 643                 *out_buddy = *ptep_buddy(ptep);
 644
 645         spin_unlock(&kvm->mmu_lock);
 646         kvm_release_pfn_clean(pfn);
 647         kvm_set_pfn_accessed(pfn);
 648 out:
 649         srcu_read_unlock(&kvm->srcu, srcu_idx);
 650         return err;
 651 }
 652
 653 int kvm_mips_handle_vz_root_tlb_fault(unsigned long badvaddr,
 654                                       struct kvm_vcpu *vcpu,
 655                                       bool write_fault)
 656 {
 657         int ret;
 658
 659         ret = kvm_mips_map_page(vcpu, badvaddr, write_fault, NULL, NULL);
 660         if (ret)
 661                 return ret;
 662
 663         /* Invalidate this entry in the TLB */
 664         return kvm_vz_host_tlb_inv(vcpu, badvaddr);
 665 }
 666
 667 /**
 668  * kvm_mips_migrate_count() - Migrate timer.
 669  * @vcpu:       Virtual CPU.
 670  *
 671  * Migrate CP0_Count hrtimer to the current CPU by cancelling and restarting it
 672  * if it was running prior to being cancelled.
 673  *
 674  * Must be called when the VCPU is migrated to a different CPU to ensure that
 675  * timer expiry during guest execution interrupts the guest and causes the
 676  * interrupt to be delivered in a timely manner.
 677  */
 678 static void kvm_mips_migrate_count(struct kvm_vcpu *vcpu)
 679 {
 680         if (hrtimer_cancel(&vcpu->arch.comparecount_timer))
 681                 hrtimer_restart(&vcpu->arch.comparecount_timer);
 682 }
 683
 684 /* Restore ASID once we are scheduled back after preemption */
 685 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 686 {
 687         unsigned long flags;
 688
 689         kvm_debug("%s: vcpu %p, cpu: %d\n", __func__, vcpu, cpu);
 690
 691         local_irq_save(flags);
 692
 693         vcpu->cpu = cpu;
 694         if (vcpu->arch.last_sched_cpu != cpu) {
 695                 kvm_debug("[%d->%d]KVM VCPU[%d] switch\n",
 696                           vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id);
 697                 /*
 698                  * Migrate the timer interrupt to the current CPU so that it
 699                  * always interrupts the guest and synchronously triggers a
 700                  * guest timer interrupt.
 701                  */
 702                 kvm_mips_migrate_count(vcpu);
 703         }
 704
 705         /* restore guest state to registers */
 706         kvm_mips_callbacks->vcpu_load(vcpu, cpu);
 707
 708         local_irq_restore(flags);
 709 }
 710
 711 /* ASID can change if another task is scheduled during preemption */
 712 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 713 {
 714         unsigned long flags;
 715         int cpu;
 716
 717         local_irq_save(flags);
 718
 719         cpu = smp_processor_id();
 720         vcpu->arch.last_sched_cpu = cpu;
 721         vcpu->cpu = -1;
 722
 723         /* save guest state in registers */
 724         kvm_mips_callbacks->vcpu_put(vcpu, cpu);
 725
 726         local_irq_restore(flags);
 727 }