1 /* $NetBSD: pte.h,v 1.27 2011/02/01 20:09:08 chuck Exp $ */
4 * Copyright (c) 2001 Wasabi Systems, Inc.
7 * Written by Frank van der Linden for Wasabi Systems, Inc.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
39 * Copyright (c) 1997 Charles D. Cranor and Washington University.
40 * All rights reserved.
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 * notice, this list of conditions and the following disclaimer in the
49 * documentation and/or other materials provided with the distribution.
51 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
52 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
53 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
54 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
55 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
56 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
57 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
58 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
59 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
60 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64 * pte.h rewritten by chuck based on the jolitz version, plus random
65 * info on the pentium and other processors found on the net. the
66 * goal of this rewrite is to provide enough documentation on the MMU
67 * hardware that the reader will be able to understand it without having
68 * to refer to a hardware manual.
78 * i386 MMU hardware structure (without PAE extension):
80 * the i386 MMU is a two-level MMU which maps 4GB of virtual memory.
81 * the pagesize is 4K (4096 [0x1000] bytes), although newer pentium
82 * processors can support a 4MB pagesize as well.
84 * the first level table (segment table?) is called a "page directory"
85 * and it contains 1024 page directory entries (PDEs). each PDE is
86 * 4 bytes (an int), so a PD fits in a single 4K page. this page is
87 * the page directory page (PDP). each PDE in a PDP maps 4MB of space
88 * (1024 * 4MB = 4GB). a PDE contains the physical address of the
89 * second level table: the page table. or, if 4MB pages are being used,
90 * then the PDE contains the PA of the 4MB page being mapped.
92 * a page table consists of 1024 page table entries (PTEs). each PTE is
93 * 4 bytes (an int), so a page table also fits in a single 4K page. a
94 * 4K page being used as a page table is called a page table page (PTP).
95 * each PTE in a PTP maps one 4K page (1024 * 4K = 4MB). a PTE contains
96 * the physical address of the page it maps and some flag bits (described
99 * the processor has a special register, "cr3", which points to the
100 * the PDP which is currently controlling the mappings of the virtual
103 * the following picture shows the translation process for a 4K page:
105 * %cr3 register [PA of PDP]
108 * | bits <31-22> of VA bits <21-12> of VA bits <11-0>
109 * | index the PDP (0 - 1023) index the PTP are the page offset
112 * +--->+----------+ | |
113 * | PD Page | PA of v |
114 * | |---PTP-------->+------------+ |
115 * | 1024 PDE | | page table |--PTE--+ |
116 * | entries | | (aka PTP) | | |
117 * +----------+ | 1024 PTE | | |
121 * bits <31-12> bits <11-0>
122 * p h y s i c a l a d d r
124 * the i386 caches PTEs in a TLB. it is important to flush out old
125 * TLB mappings when making a change to a mappings. writing to the
126 * %cr3 will flush the entire TLB. newer processors also have an
127 * instruction that will invalidate the mapping of a single page (which
128 * is useful if you are changing a single mappings because it preserves
129 * all the cached TLB entries).
131 * as shows, bits 31-12 of the PTE contain PA of the page being mapped.
132 * the rest of the PTE is defined as follows:
134 * 11 n/a available for OS use, hardware ignores it
135 * 10 n/a available for OS use, hardware ignores it
136 * 9 n/a available for OS use, hardware ignores it
137 * 8 G global bit (see discussion below)
138 * 7 PS page size [for PDEs] (0=4k, 1=4M <if supported>)
139 * 6 D dirty (modified) page
140 * 5 A accessed (referenced) page
141 * 4 PCD cache disable
142 * 3 PWT prevent write through (cache)
143 * 2 U/S user/supervisor bit (0=supervisor only, 1=both u&s)
144 * 1 R/W read/write bit (0=read only, 1=read-write)
145 * 0 P present (valid)
148 * - PS is only supported on newer processors
149 * - PTEs with the G bit are global in the sense that they are not
150 * flushed from the TLB when %cr3 is written (to flush, use the
151 * "flush single page" instruction). this is only supported on
152 * newer processors. this bit can be used to keep the kernel's
153 * TLB entries around while context switching. since the kernel
154 * is mapped into all processes at the same place it does not make
155 * sense to flush these entries when switching from one process'
158 * The PAE extension extends the size of the PTE to 64 bits (52bits physical
159 * address) and is compatible with the amd64 PTE format. The first level
160 * maps 2M, the second 1G, so a third level page table is introduced to
161 * map the 4GB virtual address space. This PD has only 4 entries.
162 * We can't use recursive mapping at level 3 to map the PD pages, as this
163 * would eat one GB of address space. In addition, Xen imposes restrictions
164 * on the entries we put in the L3 page (for example, the page pointed to by
165 * the last slot can't be shared among different L3 pages), which makes
166 * handling this L3 page in the same way we do for L2 on i386 (or L4 on amd64)
167 * difficult. For most things we'll just pretend to have only 2 levels,
168 * with the 2 high bits of the L2 index being in fact the index in the
172 #if !defined(_LOCORE)
175 * here we define the data types for PDEs and PTEs
178 typedef uint64_t pd_entry_t
; /* PDE */
179 typedef uint64_t pt_entry_t
; /* PTE */
181 typedef uint32_t pd_entry_t
; /* PDE */
182 typedef uint32_t pt_entry_t
; /* PTE */
188 * now we define various for playing with virtual addresses
195 #define NBPD_L1 (1ULL << L1_SHIFT) /* # bytes mapped by L1 ent (4K) */
196 #define NBPD_L2 (1ULL << L2_SHIFT) /* # bytes mapped by L2 ent (2MB) */
197 #define NBPD_L3 (1ULL << L3_SHIFT) /* # bytes mapped by L3 ent (1GB) */
199 #define L3_MASK 0xc0000000
200 #define L2_REALMASK 0x3fe00000
201 #define L2_MASK (L2_REALMASK | L3_MASK)
202 #define L1_MASK 0x001ff000
204 #define L3_FRAME (L3_MASK)
205 #define L2_FRAME (L3_FRAME | L2_MASK)
206 #define L1_FRAME (L2_FRAME|L1_MASK)
208 #define PG_FRAME 0x000ffffffffff000ULL /* page frame mask */
209 #define PG_LGFRAME 0x000fffffffe00000ULL /* large (2MB) page frame mask */
211 /* macros to get real L2 and L3 index, from our "extended" L2 index */
212 #define l2tol3(idx) ((idx) >> (L3_SHIFT - L2_SHIFT))
213 #define l2tol2(idx) ((idx) & (L2_REALMASK >> L2_SHIFT))
219 #define NBPD_L1 (1UL << L1_SHIFT) /* # bytes mapped by L1 ent (4K) */
220 #define NBPD_L2 (1UL << L2_SHIFT) /* # bytes mapped by L2 ent (4MB) */
222 #define L2_MASK 0xffc00000
223 #define L1_MASK 0x003ff000
225 #define L2_FRAME (L2_MASK)
226 #define L1_FRAME (L2_FRAME|L1_MASK)
228 #define PG_FRAME 0xfffff000 /* page frame mask */
229 #define PG_LGFRAME 0xffc00000 /* large (4MB) page frame mask */
233 * here we define the bits of the PDE/PTE, as described above:
235 * XXXCDC: need to rename these (PG_u == ugly).
238 #define PG_V 0x00000001 /* valid entry */
239 #define PG_RO 0x00000000 /* read-only page */
240 #define PG_RW 0x00000002 /* read-write page */
241 #define PG_u 0x00000004 /* user accessible page */
242 #define PG_PROT 0x00000806 /* all protection bits */
243 #define PG_WT 0x00000008 /* write through */
244 #define PG_N 0x00000010 /* non-cacheable */
245 #define PG_U 0x00000020 /* has been used */
246 #define PG_M 0x00000040 /* has been modified */
247 #define PG_PAT 0x00000080 /* PAT (on pte) */
248 #define PG_PS 0x00000080 /* 4MB page size (2MB for PAE) */
249 #define PG_G 0x00000100 /* global, don't TLB flush */
250 #define PG_AVAIL1 0x00000200 /* ignored by hardware */
251 #define PG_AVAIL2 0x00000400 /* ignored by hardware */
252 #define PG_AVAIL3 0x00000800 /* ignored by hardware */
253 #define PG_LGPAT 0x00001000 /* PAT on large pages */
256 * various short-hand protection codes
259 #define PG_KR 0x00000000 /* kernel read-only */
260 #define PG_KW 0x00000002 /* kernel read-write */
263 #define PG_NX 0x8000000000000000ULL /* No-execute */
265 #define PG_NX 0 /* dummy */
270 #endif /* _I386_PTE_H_ */