1 /* $NetBSD: x86_xpmap.c,v 1.16 2009/10/19 18:41:11 bouyer Exp $ */
4 * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
20 * Copyright (c) 2006, 2007 Manuel Bouyer.
22 * Redistribution and use in source and binary forms, with or without
23 * modification, are permitted provided that the following conditions
25 * 1. Redistributions of source code must retain the above copyright
26 * notice, this list of conditions and the following disclaimer.
27 * 2. Redistributions in binary form must reproduce the above copyright
28 * notice, this list of conditions and the following disclaimer in the
29 * documentation and/or other materials provided with the distribution.
31 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
32 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
33 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
34 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
35 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
37 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
38 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
39 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
40 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
46 * Copyright (c) 2004 Christian Limpach.
47 * All rights reserved.
49 * Redistribution and use in source and binary forms, with or without
50 * modification, are permitted provided that the following conditions
52 * 1. Redistributions of source code must retain the above copyright
53 * notice, this list of conditions and the following disclaimer.
54 * 2. Redistributions in binary form must reproduce the above copyright
55 * notice, this list of conditions and the following disclaimer in the
56 * documentation and/or other materials provided with the distribution.
58 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
59 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
60 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
61 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
62 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
63 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
64 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
65 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
66 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
67 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
71 #include <sys/cdefs.h>
72 __KERNEL_RCSID(0, "$NetBSD: x86_xpmap.c,v 1.16 2009/10/19 18:41:11 bouyer Exp $");
78 #include <sys/param.h>
79 #include <sys/systm.h>
83 #include <machine/pmap.h>
84 #include <machine/gdt.h>
85 #include <xen/xenfunc.h>
87 #include <dev/isa/isareg.h>
88 #include <machine/isa_machdep.h>
91 /* #define XENDEBUG_SYNC */
92 /* #define XENDEBUG_LOW */
95 #define XENPRINTF(x) printf x
96 #define XENPRINTK(x) printk x
97 #define XENPRINTK2(x) /* printk x */
99 static char XBUF
[256];
103 #define XENPRINTK2(x)
105 #define PRINTF(x) printf x
106 #define PRINTK(x) printk x
108 /* on x86_64 kernel runs in ring 3 */
115 volatile shared_info_t
*HYPERVISOR_shared_info
;
116 /* Xen requires the start_info struct to be page aligned */
117 union start_info_union start_info_union
__aligned(PAGE_SIZE
);
118 unsigned long *xpmap_phys_to_machine_mapping
;
120 void xen_failsafe_handler(void);
122 #define HYPERVISOR_mmu_update_self(req, count, success_count) \
123 HYPERVISOR_mmu_update((req), (count), (success_count), DOMID_SELF)
126 xen_failsafe_handler(void)
129 panic("xen_failsafe_handler called!\n");
134 xen_set_ldt(vaddr_t base
, uint32_t entries
)
142 end
= base
+ (entries
<< 3);
144 end
= base
+ entries
* sizeof(union descriptor
);
147 for (va
= base
; va
< end
; va
+= PAGE_SIZE
) {
148 KASSERT(va
>= VM_MIN_KERNEL_ADDRESS
);
150 XENPRINTF(("xen_set_ldt %p %d %p\n", (void *)base
,
152 pmap_pte_clearbits(ptp
, PG_RW
);
155 xpq_queue_set_ldt(base
, entries
);
161 void xpq_debug_dump(void);
164 #define XPQUEUE_SIZE 2048
165 static mmu_update_t xpq_queue
[XPQUEUE_SIZE
];
166 static int xpq_idx
= 0;
169 xpq_flush_queue(void)
173 XENPRINTK2(("flush queue %p entries %d\n", xpq_queue
, xpq_idx
));
174 for (i
= 0; i
< xpq_idx
; i
++)
175 XENPRINTK2(("%d: %p %08" PRIx64
"\n", i
,
176 (uint64_t)xpq_queue
[i
].ptr
, (uint64_t)xpq_queue
[i
].val
));
178 HYPERVISOR_mmu_update_self(xpq_queue
, xpq_idx
, &ok
) < 0) {
179 printf("xpq_flush_queue: %d entries \n", xpq_idx
);
180 for (i
= 0; i
< xpq_idx
; i
++)
181 printf("0x%016" PRIx64
": 0x%016" PRIx64
"\n",
182 (uint64_t)xpq_queue
[i
].ptr
,
183 (uint64_t)xpq_queue
[i
].val
);
184 panic("HYPERVISOR_mmu_update failed\n");
190 xpq_increment_idx(void)
194 if (__predict_false(xpq_idx
== XPQUEUE_SIZE
))
199 xpq_queue_machphys_update(paddr_t ma
, paddr_t pa
)
201 XENPRINTK2(("xpq_queue_machphys_update ma=0x%" PRIx64
" pa=0x%" PRIx64
202 "\n", (int64_t)ma
, (int64_t)pa
));
203 xpq_queue
[xpq_idx
].ptr
= ma
| MMU_MACHPHYS_UPDATE
;
204 xpq_queue
[xpq_idx
].val
= (pa
- XPMAP_OFFSET
) >> PAGE_SHIFT
;
212 xpq_queue_pte_update(paddr_t ptr
, pt_entry_t val
)
215 KASSERT((ptr
& 3) == 0);
216 xpq_queue
[xpq_idx
].ptr
= (paddr_t
)ptr
| MMU_NORMAL_PT_UPDATE
;
217 xpq_queue
[xpq_idx
].val
= val
;
225 xpq_queue_pt_switch(paddr_t pa
)
230 XENPRINTK2(("xpq_queue_pt_switch: 0x%" PRIx64
" 0x%" PRIx64
"\n",
231 (int64_t)pa
, (int64_t)pa
));
232 op
.cmd
= MMUEXT_NEW_BASEPTR
;
233 op
.arg1
.mfn
= pa
>> PAGE_SHIFT
;
234 if (HYPERVISOR_mmuext_op(&op
, 1, NULL
, DOMID_SELF
) < 0)
235 panic("xpq_queue_pt_switch");
239 xpq_queue_pin_table(paddr_t pa
)
244 XENPRINTK2(("xpq_queue_pin_table: 0x%" PRIx64
" 0x%" PRIx64
"\n",
245 (int64_t)pa
, (int64_t)pa
));
246 op
.arg1
.mfn
= pa
>> PAGE_SHIFT
;
248 #if defined(__x86_64__)
249 op
.cmd
= MMUEXT_PIN_L4_TABLE
;
251 op
.cmd
= MMUEXT_PIN_L2_TABLE
;
253 if (HYPERVISOR_mmuext_op(&op
, 1, NULL
, DOMID_SELF
) < 0)
254 panic("xpq_queue_pin_table");
259 xpq_queue_pin_l3_table(paddr_t pa
)
264 XENPRINTK2(("xpq_queue_pin_l2_table: 0x%" PRIx64
" 0x%" PRIx64
"\n",
265 (int64_t)pa
, (int64_t)pa
));
266 op
.arg1
.mfn
= pa
>> PAGE_SHIFT
;
268 op
.cmd
= MMUEXT_PIN_L3_TABLE
;
269 if (HYPERVISOR_mmuext_op(&op
, 1, NULL
, DOMID_SELF
) < 0)
270 panic("xpq_queue_pin_table");
275 xpq_queue_unpin_table(paddr_t pa
)
280 XENPRINTK2(("xpq_queue_unpin_table: 0x%" PRIx64
" 0x%" PRIx64
"\n",
281 (int64_t)pa
, (int64_t)pa
));
282 op
.arg1
.mfn
= pa
>> PAGE_SHIFT
;
283 op
.cmd
= MMUEXT_UNPIN_TABLE
;
284 if (HYPERVISOR_mmuext_op(&op
, 1, NULL
, DOMID_SELF
) < 0)
285 panic("xpq_queue_unpin_table");
289 xpq_queue_set_ldt(vaddr_t va
, uint32_t entries
)
294 XENPRINTK2(("xpq_queue_set_ldt\n"));
295 KASSERT(va
== (va
& ~PAGE_MASK
));
296 op
.cmd
= MMUEXT_SET_LDT
;
297 op
.arg1
.linear_addr
= va
;
298 op
.arg2
.nr_ents
= entries
;
299 if (HYPERVISOR_mmuext_op(&op
, 1, NULL
, DOMID_SELF
) < 0)
300 panic("xpq_queue_set_ldt");
304 xpq_queue_tlb_flush(void)
309 XENPRINTK2(("xpq_queue_tlb_flush\n"));
310 op
.cmd
= MMUEXT_TLB_FLUSH_LOCAL
;
311 if (HYPERVISOR_mmuext_op(&op
, 1, NULL
, DOMID_SELF
) < 0)
312 panic("xpq_queue_tlb_flush");
316 xpq_flush_cache(void)
322 XENPRINTK2(("xpq_queue_flush_cache\n"));
323 op
.cmd
= MMUEXT_FLUSH_CACHE
;
324 if (HYPERVISOR_mmuext_op(&op
, 1, NULL
, DOMID_SELF
) < 0)
325 panic("xpq_flush_cache");
330 xpq_queue_invlpg(vaddr_t va
)
335 XENPRINTK2(("xpq_queue_invlpg %p\n", (void *)va
));
336 op
.cmd
= MMUEXT_INVLPG_LOCAL
;
337 op
.arg1
.linear_addr
= (va
& ~PAGE_MASK
);
338 if (HYPERVISOR_mmuext_op(&op
, 1, NULL
, DOMID_SELF
) < 0)
339 panic("xpq_queue_invlpg");
343 xpq_update_foreign(paddr_t ptr
, pt_entry_t val
, int dom
)
351 if (HYPERVISOR_mmu_update(&op
, 1, &ok
, dom
) < 0)
362 XENPRINTK2(("idx: %d\n", xpq_idx
));
363 for (i
= 0; i
< xpq_idx
; i
++) {
364 snprintf(XBUF
, sizeof(XBUF
), "%" PRIx64
" %08" PRIx64
,
365 (uint64_t)xpq_queue
[i
].ptr
, (uint64_t)xpq_queue
[i
].val
);
367 snprintf(XBUF
+ strlen(XBUF
),
368 sizeof(XBUF
) - strlen(XBUF
),
369 "%" PRIx64
" %08" PRIx64
,
370 (uint64_t)xpq_queue
[i
].ptr
,
371 (uint64_t)xpq_queue
[i
].val
);
373 snprintf(XBUF
+ strlen(XBUF
),
374 sizeof(XBUF
) - strlen(XBUF
),
375 "%" PRIx64
" %08" PRIx64
,
376 (uint64_t)xpq_queue
[i
].ptr
,
377 (uint64_t)xpq_queue
[i
].val
);
379 snprintf(XBUF
+ strlen(XBUF
),
380 sizeof(XBUF
) - strlen(XBUF
),
381 "%" PRIx64
" %08" PRIx64
,
382 (uint64_t)xpq_queue
[i
].ptr
,
383 (uint64_t)xpq_queue
[i
].val
);
384 XENPRINTK2(("%d: %s\n", xpq_idx
, XBUF
));
390 extern volatile struct xencons_interface
*xencons_interface
; /* XXX */
391 extern struct xenstore_domain_interface
*xenstore_interface
; /* XXX */
393 static void xen_bt_set_readonly (vaddr_t
);
394 static void xen_bootstrap_tables (vaddr_t
, vaddr_t
, int, int, int);
396 /* How many PDEs ? */
397 #if L2_SLOT_KERNBASE > 0
398 #define TABLE_L2_ENTRIES (2 * (NKL2_KIMG_ENTRIES + 1))
400 #define TABLE_L2_ENTRIES (NKL2_KIMG_ENTRIES + 1)
404 * Construct and switch to new pagetables
405 * first_avail is the first vaddr we can use after
406 * we get rid of Xen pagetables
409 vaddr_t
xen_pmap_bootstrap (void);
412 * Function to get rid of Xen bootstrap tables
415 /* How many PDP do we need: */
418 * For PAE, we consider a single contigous L2 "superpage" of 4 pages,
419 * all of them mapped by the L3 page. We also need a shadow page
422 static const int l2_4_count
= 6;
424 static const int l2_4_count
= PTP_LEVELS
- 1;
428 xen_pmap_bootstrap(void)
432 vaddr_t bootstrap_tables
, init_tables
;
434 xpmap_phys_to_machine_mapping
=
435 (unsigned long *)xen_start_info
.mfn_list
;
436 init_tables
= xen_start_info
.pt_base
;
437 __PRINTK(("xen_arch_pmap_bootstrap init_tables=0x%lx\n", init_tables
));
439 /* Space after Xen boostrap tables should be free */
440 bootstrap_tables
= xen_start_info
.pt_base
+
441 (xen_start_info
.nr_pt_frames
* PAGE_SIZE
);
444 * Calculate how many space we need
445 * first everything mapped before the Xen bootstrap tables
447 mapsize
= init_tables
- KERNTEXTOFF
;
448 /* after the tables we'll have:
450 * - dummy user PGD (x86_64)
451 * - HYPERVISOR_shared_info
452 * - ISA I/O mem (if needed)
454 mapsize
+= UPAGES
* NBPG
;
461 if (xendomain_is_dom0()) {
462 /* space for ISA I/O mem */
466 /* at this point mapsize doens't include the table size */
469 count
= TABLE_L2_ENTRIES
;
471 count
= (mapsize
+ (NBPD_L2
-1)) >> L2_SHIFT
;
472 #endif /* __x86_64__ */
474 /* now compute how many L2 pages we need exactly */
475 XENPRINTK(("bootstrap_final mapsize 0x%lx count %d\n", mapsize
, count
));
476 while (mapsize
+ (count
+ l2_4_count
) * PAGE_SIZE
+ KERNTEXTOFF
>
477 ((long)count
<< L2_SHIFT
) + KERNBASE
) {
482 * one more L2 page: we'll alocate several pages after kva_start
483 * in pmap_bootstrap() before pmap_growkernel(), which have not been
484 * counted here. It's not a big issue to allocate one more L2 as
485 * pmap_growkernel() will be called anyway.
492 * install bootstrap pages. We may need more L2 pages than will
493 * have the final table here, as it's installed after the final table
498 XENPRINTK(("bootstrap_again oldcount %d\n", oldcount
));
500 * Xen space we'll reclaim may not be enough for our new page tables,
501 * move bootstrap tables if necessary
503 if (bootstrap_tables
< init_tables
+ ((count
+ l2_4_count
) * PAGE_SIZE
))
504 bootstrap_tables
= init_tables
+
505 ((count
+ l2_4_count
) * PAGE_SIZE
);
506 /* make sure we have enough to map the bootstrap_tables */
507 if (bootstrap_tables
+ ((oldcount
+ l2_4_count
) * PAGE_SIZE
) >
508 ((long)oldcount
<< L2_SHIFT
) + KERNBASE
) {
510 goto bootstrap_again
;
513 /* Create temporary tables */
514 xen_bootstrap_tables(xen_start_info
.pt_base
, bootstrap_tables
,
515 xen_start_info
.nr_pt_frames
, oldcount
, 0);
517 /* Create final tables */
518 xen_bootstrap_tables(bootstrap_tables
, init_tables
,
519 oldcount
+ l2_4_count
, count
, 1);
521 /* zero out free space after tables */
522 memset((void *)(init_tables
+ ((count
+ l2_4_count
) * PAGE_SIZE
)), 0,
523 (UPAGES
+ 1) * NBPG
);
524 return (init_tables
+ ((count
+ l2_4_count
) * PAGE_SIZE
));
529 * Build a new table and switch to it
530 * old_count is # of old tables (including PGD, PDTPE and PDE)
531 * new_count is # of new tables (PTE only)
532 * we assume areas don't overlap
537 xen_bootstrap_tables (vaddr_t old_pgd
, vaddr_t new_pgd
,
538 int old_count
, int new_count
, int final
)
540 pd_entry_t
*pdtpe
, *pde
, *pte
;
541 pd_entry_t
*cur_pgd
, *bt_pgd
;
543 vaddr_t page
, avail
, text_end
, map_end
;
545 extern char __data_start
;
547 __PRINTK(("xen_bootstrap_tables(0x%lx, 0x%lx, %d, %d)\n",
548 old_pgd
, new_pgd
, old_count
, new_count
));
549 text_end
= ((vaddr_t
)&__data_start
) & ~PAGE_MASK
;
551 * size of R/W area after kernel text:
552 * xencons_interface (if present)
553 * xenstore_interface (if present)
554 * table pages (new_count + l2_4_count entries)
555 * extra mappings (only when final is true):
557 * dummy user PGD (x86_64 only)/gdt page (i386 only)
558 * HYPERVISOR_shared_info
559 * ISA I/O mem (if needed)
561 map_end
= new_pgd
+ ((new_count
+ l2_4_count
) * NBPG
);
563 map_end
+= (UPAGES
+ 1) * NBPG
;
564 HYPERVISOR_shared_info
= (shared_info_t
*)map_end
;
568 * we always set atdevbase, as it's used by init386 to find the first
569 * available VA. map_end is updated only if we are dom0, so
570 * atdevbase -> atdevbase + IOM_SIZE will be mapped only in
576 if (final
&& xendomain_is_dom0()) {
582 __PRINTK(("xen_bootstrap_tables text_end 0x%lx map_end 0x%lx\n",
584 __PRINTK(("console 0x%lx ", xen_start_info
.console
.domU
.mfn
));
585 __PRINTK(("xenstore 0x%lx\n", xen_start_info
.store_mfn
));
588 * Create bootstrap page tables
591 * - a PDTPE (level 3)
593 * - some PTEs (level 1)
596 cur_pgd
= (pd_entry_t
*) old_pgd
;
597 bt_pgd
= (pd_entry_t
*) new_pgd
;
598 memset (bt_pgd
, 0, PAGE_SIZE
);
599 avail
= new_pgd
+ PAGE_SIZE
;
601 /* Install level 3 */
602 pdtpe
= (pd_entry_t
*) avail
;
603 memset (pdtpe
, 0, PAGE_SIZE
);
606 addr
= ((u_long
) pdtpe
) - KERNBASE
;
607 bt_pgd
[pl4_pi(KERNTEXTOFF
)] =
608 xpmap_ptom_masked(addr
) | PG_k
| PG_RW
| PG_V
;
610 __PRINTK(("L3 va 0x%lx pa 0x%" PRIx64
" entry 0x%" PRIx64
" -> L4[0x%x]\n",
611 pdtpe
, (uint64_t)addr
, (uint64_t)bt_pgd
[pl4_pi(KERNTEXTOFF
)],
612 pl4_pi(KERNTEXTOFF
)));
615 #endif /* PTP_LEVELS > 3 */
619 pde
= (pd_entry_t
*) avail
;
620 memset(pde
, 0, PAGE_SIZE
);
623 addr
= ((u_long
) pde
) - KERNBASE
;
624 pdtpe
[pl3_pi(KERNTEXTOFF
)] =
625 xpmap_ptom_masked(addr
) | PG_k
| PG_V
| PG_RW
;
626 __PRINTK(("L2 va 0x%lx pa 0x%" PRIx64
" entry 0x%" PRIx64
" -> L3[0x%x]\n",
627 pde
, (int64_t)addr
, (int64_t)pdtpe
[pl3_pi(KERNTEXTOFF
)],
628 pl3_pi(KERNTEXTOFF
)));
630 /* our PAE-style level 2: 5 contigous pages (4 L2 + 1 shadow) */
631 pde
= (pd_entry_t
*) avail
;
632 memset(pde
, 0, PAGE_SIZE
* 5);
633 avail
+= PAGE_SIZE
* 5;
634 addr
= ((u_long
) pde
) - KERNBASE
;
636 * enter L2 pages in the L3.
637 * The real L2 kernel PD will be the last one (so that
638 * pde[L2_SLOT_KERN] always point to the shadow).
640 for (i
= 0; i
< 3; i
++, addr
+= PAGE_SIZE
) {
642 * Xen doens't want R/W mappings in L3 entries, it'll add it
645 pdtpe
[i
] = xpmap_ptom_masked(addr
) | PG_k
| PG_V
;
646 __PRINTK(("L2 va 0x%lx pa 0x%" PRIx64
" entry 0x%" PRIx64
647 " -> L3[0x%x]\n", (vaddr_t
)pde
+ PAGE_SIZE
* i
,
648 (int64_t)addr
, (int64_t)pdtpe
[i
], i
));
651 pdtpe
[3] = xpmap_ptom_masked(addr
) | PG_k
| PG_V
;
652 __PRINTK(("L2 va 0x%lx pa 0x%" PRIx64
" entry 0x%" PRIx64
653 " -> L3[0x%x]\n", (vaddr_t
)pde
+ PAGE_SIZE
* 4,
654 (int64_t)addr
, (int64_t)pdtpe
[3], 3));
658 #endif /* PTP_LEVELS > 2 */
662 for (i
= 0; i
< new_count
; i
++) {
663 vaddr_t cur_page
= page
;
665 pte
= (pd_entry_t
*) avail
;
668 memset(pte
, 0, PAGE_SIZE
);
669 while (pl2_pi(page
) == pl2_pi (cur_page
)) {
670 if (page
>= map_end
) {
671 /* not mapped at all */
672 pte
[pl1_pi(page
)] = 0;
676 pte
[pl1_pi(page
)] = xpmap_ptom_masked(page
- KERNBASE
);
677 if (page
== (vaddr_t
)HYPERVISOR_shared_info
) {
678 pte
[pl1_pi(page
)] = xen_start_info
.shared_info
;
679 __PRINTK(("HYPERVISOR_shared_info "
680 "va 0x%lx pte 0x%" PRIx64
"\n",
681 HYPERVISOR_shared_info
, (int64_t)pte
[pl1_pi(page
)]));
683 if ((xpmap_ptom_masked(page
- KERNBASE
) >> PAGE_SHIFT
)
684 == xen_start_info
.console
.domU
.mfn
) {
685 xencons_interface
= (void *)page
;
686 pte
[pl1_pi(page
)] = xen_start_info
.console
.domU
.mfn
;
687 pte
[pl1_pi(page
)] <<= PAGE_SHIFT
;
688 __PRINTK(("xencons_interface "
689 "va 0x%lx pte 0x%" PRIx64
"\n",
690 xencons_interface
, (int64_t)pte
[pl1_pi(page
)]));
692 if ((xpmap_ptom_masked(page
- KERNBASE
) >> PAGE_SHIFT
)
693 == xen_start_info
.store_mfn
) {
694 xenstore_interface
= (void *)page
;
695 pte
[pl1_pi(page
)] = xen_start_info
.store_mfn
;
696 pte
[pl1_pi(page
)] <<= PAGE_SHIFT
;
697 __PRINTK(("xenstore_interface "
698 "va 0x%lx pte 0x%" PRIx64
"\n",
699 xenstore_interface
, (int64_t)pte
[pl1_pi(page
)]));
702 if (page
>= (vaddr_t
)atdevbase
&&
703 page
< (vaddr_t
)atdevbase
+ IOM_SIZE
) {
705 IOM_BEGIN
+ (page
- (vaddr_t
)atdevbase
);
708 pte
[pl1_pi(page
)] |= PG_k
| PG_V
;
709 if (page
< text_end
) {
710 /* map kernel text RO */
711 pte
[pl1_pi(page
)] |= 0;
712 } else if (page
>= old_pgd
713 && page
< old_pgd
+ (old_count
* PAGE_SIZE
)) {
714 /* map old page tables RO */
715 pte
[pl1_pi(page
)] |= 0;
716 } else if (page
>= new_pgd
&&
717 page
< new_pgd
+ ((new_count
+ l2_4_count
) * PAGE_SIZE
)) {
718 /* map new page tables RO */
719 pte
[pl1_pi(page
)] |= 0;
722 pte
[pl1_pi(page
)] |= PG_RW
;
725 if ((page
>= old_pgd
&& page
< old_pgd
+ (old_count
* PAGE_SIZE
))
726 || page
>= new_pgd
) {
727 __PRINTK(("va 0x%lx pa 0x%lx "
728 "entry 0x%" PRIx64
" -> L1[0x%x]\n",
729 page
, page
- KERNBASE
,
730 (int64_t)pte
[pl1_pi(page
)], pl1_pi(page
)));
735 addr
= ((u_long
) pte
) - KERNBASE
;
736 pde
[pl2_pi(cur_page
)] =
737 xpmap_ptom_masked(addr
) | PG_k
| PG_RW
| PG_V
;
738 __PRINTK(("L1 va 0x%lx pa 0x%" PRIx64
" entry 0x%" PRIx64
739 " -> L2[0x%x]\n", pte
, (int64_t)addr
,
740 (int64_t)pde
[pl2_pi(cur_page
)], pl2_pi(cur_page
)));
742 xen_bt_set_readonly((vaddr_t
) pte
);
745 /* Install recursive page tables mapping */
748 * we need a shadow page for the kernel's L2 page
749 * The real L2 kernel PD will be the last one (so that
750 * pde[L2_SLOT_KERN] always point to the shadow.
752 memcpy(&pde
[L2_SLOT_KERN
+ NPDPG
], &pde
[L2_SLOT_KERN
], PAGE_SIZE
);
753 pmap_kl2pd
= &pde
[L2_SLOT_KERN
+ NPDPG
];
754 pmap_kl2paddr
= (u_long
)pmap_kl2pd
- KERNBASE
;
757 * We don't enter a recursive entry from the L3 PD. Instead,
758 * we enter the first 4 L2 pages, which includes the kernel's L2
759 * shadow. But we have to entrer the shadow after switching
760 * %cr3, or Xen will refcount some PTE with the wrong type.
762 addr
= (u_long
)pde
- KERNBASE
;
763 for (i
= 0; i
< 3; i
++, addr
+= PAGE_SIZE
) {
764 pde
[PDIR_SLOT_PTE
+ i
] = xpmap_ptom_masked(addr
) | PG_k
| PG_V
;
765 __PRINTK(("pde[%d] va 0x%lx pa 0x%lx entry 0x%" PRIx64
"\n",
766 (int)(PDIR_SLOT_PTE
+ i
), pde
+ PAGE_SIZE
* i
, (long)addr
,
767 (int64_t)pde
[PDIR_SLOT_PTE
+ i
]));
770 addr
+= PAGE_SIZE
; /* point to shadow L2 */
771 pde
[PDIR_SLOT_PTE
+ 3] = xpmap_ptom_masked(addr
) | PG_k
| PG_V
;
772 __PRINTK(("pde[%d] va 0x%lx pa 0x%lx entry 0x%" PRIx64
"\n",
773 (int)(PDIR_SLOT_PTE
+ 3), pde
+ PAGE_SIZE
* 4, (long)addr
,
774 (int64_t)pde
[PDIR_SLOT_PTE
+ 3]));
776 /* Mark tables RO, and pin the kernel's shadow as L2 */
777 addr
= (u_long
)pde
- KERNBASE
;
778 for (i
= 0; i
< 5; i
++, addr
+= PAGE_SIZE
) {
779 xen_bt_set_readonly(((vaddr_t
)pde
) + PAGE_SIZE
* i
);
780 if (i
== 2 || i
== 3)
783 __PRINTK(("pin L2 %d addr 0x%" PRIx64
"\n", i
, (int64_t)addr
));
784 xpq_queue_pin_table(xpmap_ptom_masked(addr
));
788 addr
= (u_long
)pde
- KERNBASE
+ 3 * PAGE_SIZE
;
789 __PRINTK(("pin L2 %d addr 0x%" PRIx64
"\n", 2, (int64_t)addr
));
790 xpq_queue_pin_table(xpmap_ptom_masked(addr
));
793 addr
= (u_long
)pde
- KERNBASE
+ 2 * PAGE_SIZE
;
794 __PRINTK(("pin L2 %d addr 0x%" PRIx64
"\n", 2, (int64_t)addr
));
795 xpq_queue_pin_table(xpmap_ptom_masked(addr
));
798 /* recursive entry in higher-level PD */
799 bt_pgd
[PDIR_SLOT_PTE
] =
800 xpmap_ptom_masked(new_pgd
- KERNBASE
) | PG_k
| PG_V
;
801 __PRINTK(("bt_pgd[PDIR_SLOT_PTE] va 0x%lx pa 0x%" PRIx64
802 " entry 0x%" PRIx64
"\n", new_pgd
, (int64_t)new_pgd
- KERNBASE
,
803 (int64_t)bt_pgd
[PDIR_SLOT_PTE
]));
805 xen_bt_set_readonly((vaddr_t
) pde
);
807 #if PTP_LEVELS > 2 || defined(PAE)
808 xen_bt_set_readonly((vaddr_t
) pdtpe
);
811 xen_bt_set_readonly(new_pgd
);
814 __PRINTK(("pin PGD\n"));
816 xpq_queue_pin_l3_table(xpmap_ptom_masked(new_pgd
- KERNBASE
));
818 xpq_queue_pin_table(xpmap_ptom_masked(new_pgd
- KERNBASE
));
821 /* Save phys. addr of PDP, for libkvm. */
822 PDPpaddr
= (long)pde
;
824 /* also save the address of the L3 page */
826 pmap_l3paddr
= (new_pgd
- KERNBASE
);
829 /* Switch to new tables */
830 __PRINTK(("switch to PGD\n"));
831 xpq_queue_pt_switch(xpmap_ptom_masked(new_pgd
- KERNBASE
));
832 __PRINTK(("bt_pgd[PDIR_SLOT_PTE] now entry 0x%" PRIx64
"\n",
833 (int64_t)bt_pgd
[PDIR_SLOT_PTE
]));
836 /* now enter kernel's PTE mappings */
837 addr
= (u_long
)pde
- KERNBASE
+ PAGE_SIZE
* 3;
838 xpq_queue_pte_update(
839 xpmap_ptom(((vaddr_t
)&pde
[PDIR_SLOT_PTE
+ 3]) - KERNBASE
),
840 xpmap_ptom_masked(addr
) | PG_k
| PG_V
);
847 /* Now we can safely reclaim space taken by old tables */
849 __PRINTK(("unpin old PGD\n"));
851 xpq_queue_unpin_table(xpmap_ptom_masked(old_pgd
- KERNBASE
));
852 /* Mark old tables RW */
854 addr
= (paddr_t
) pde
[pl2_pi(page
)] & PG_FRAME
;
855 addr
= xpmap_mtop(addr
);
856 pte
= (pd_entry_t
*) ((u_long
)addr
+ KERNBASE
);
858 __PRINTK(("*pde 0x%" PRIx64
" addr 0x%" PRIx64
" pte 0x%lx\n",
859 (int64_t)pde
[pl2_pi(page
)], (int64_t)addr
, (long)pte
));
860 while (page
< old_pgd
+ (old_count
* PAGE_SIZE
) && page
< map_end
) {
861 addr
= xpmap_ptom(((u_long
) pte
) - KERNBASE
);
862 XENPRINTK(("addr 0x%" PRIx64
" pte 0x%lx *pte 0x%" PRIx64
"\n",
863 (int64_t)addr
, (long)pte
, (int64_t)*pte
));
864 xpq_queue_pte_update(addr
, *pte
| PG_RW
);
867 * Our ptes are contiguous
868 * so it's safe to just "++" here
877 * Bootstrap helper functions
881 * Mark a page readonly
882 * XXX: assuming vaddr = paddr + KERNBASE
886 xen_bt_set_readonly (vaddr_t page
)
890 entry
= xpmap_ptom_masked(page
- KERNBASE
);
891 entry
|= PG_k
| PG_V
;
893 HYPERVISOR_update_va_mapping (page
, entry
, UVMF_INVLPG
);
898 xen_set_user_pgd(paddr_t page
)
904 op
.cmd
= MMUEXT_NEW_USER_BASEPTR
;
905 op
.arg1
.mfn
= xpmap_phys_to_machine_mapping
[page
>> PAGE_SHIFT
];
906 if (HYPERVISOR_mmuext_op(&op
, 1, NULL
, DOMID_SELF
) < 0)
907 panic("xen_set_user_pgd: failed to install new user page"
908 " directory %lx", page
);
911 #endif /* __x86_64__ */