1 /* This file contains code for initialization of protected mode, to initialize
2 * code and data segment descriptors, and to initialize global descriptors
3 * for local descriptors in the process table.
9 #include <minix/cpufeature.h>
10 #include <sys/types.h>
11 #include <machine/multiboot.h>
12 #include "kernel/kernel.h"
14 #include "archconst.h"
15 #include "arch_proto.h"
20 #define INT_GATE_TYPE (INT_286_GATE | DESC_386_BIT)
21 #define TSS_TYPE (AVL_286_TSS | DESC_386_BIT)
23 /* This is OK initially, when the 1:1 mapping is still there. */
24 char *video_mem
= (char *) MULTIBOOT_VIDEO_BUFFER
;
26 /* Storage for gdt, idt and tss. */
27 struct segdesc_s gdt
[GDT_SIZE
] __aligned(DESC_SIZE
);
28 struct gatedesc_s idt
[IDT_SIZE
] __aligned(DESC_SIZE
);
29 struct tss_s tss
[CONFIG_MAX_CPUS
];
31 u32_t k_percpu_stacks
[CONFIG_MAX_CPUS
];
33 int prot_init_done
= 0;
35 phys_bytes
vir2phys(void *vir
)
37 extern char _kern_vir_base
, _kern_phys_base
; /* in kernel.lds */
38 u32_t offset
= (vir_bytes
) &_kern_vir_base
-
39 (vir_bytes
) &_kern_phys_base
;
40 return (phys_bytes
)vir
- offset
;
43 /*===========================================================================*
45 *===========================================================================*/
46 void enable_iop(struct proc
*pp
)
48 /* Allow a user process to use I/O instructions. Change the I/O Permission
49 * Level bits in the psw. These specify least-privileged Current Permission
50 * Level allowed to execute I/O instructions. Users and servers have CPL 3.
51 * You can't have less privilege than that. Kernel has CPL 0, tasks CPL 1.
53 pp
->p_reg
.psw
|= 0x3000;
57 /*===========================================================================*
59 *===========================================================================*/
60 void sdesc(struct segdesc_s
*segdp
, phys_bytes base
, vir_bytes size
)
62 /* Fill in the size fields (base, limit and granularity) of a descriptor. */
63 segdp
->base_low
= base
;
64 segdp
->base_middle
= base
>> BASE_MIDDLE_SHIFT
;
65 segdp
->base_high
= base
>> BASE_HIGH_SHIFT
;
67 --size
; /* convert to a limit, 0 size means 4G */
68 if (size
> BYTE_GRAN_MAX
) {
69 segdp
->limit_low
= size
>> PAGE_GRAN_SHIFT
;
70 segdp
->granularity
= GRANULAR
| (size
>>
71 (PAGE_GRAN_SHIFT
+ GRANULARITY_SHIFT
));
73 segdp
->limit_low
= size
;
74 segdp
->granularity
= size
>> GRANULARITY_SHIFT
;
76 segdp
->granularity
|= DEFAULT
; /* means BIG for data seg */
79 /*===========================================================================*
81 *===========================================================================*/
82 void init_param_dataseg(register struct segdesc_s
*segdp
,
83 phys_bytes base
, vir_bytes size
, const int privilege
)
85 /* Build descriptor for a data segment. */
86 sdesc(segdp
, base
, size
);
87 segdp
->access
= (privilege
<< DPL_SHIFT
) | (PRESENT
| SEGMENT
|
88 WRITEABLE
| ACCESSED
);
89 /* EXECUTABLE = 0, EXPAND_DOWN = 0, ACCESSED = 0 */
92 void init_dataseg(int index
, const int privilege
)
94 init_param_dataseg(&gdt
[index
], 0, 0xFFFFFFFF, privilege
);
97 /*===========================================================================*
99 *===========================================================================*/
100 static void init_codeseg(int index
, int privilege
)
102 /* Build descriptor for a code segment. */
103 sdesc(&gdt
[index
], 0, 0xFFFFFFFF);
104 gdt
[index
].access
= (privilege
<< DPL_SHIFT
)
105 | (PRESENT
| SEGMENT
| EXECUTABLE
| READABLE
);
106 /* CONFORMING = 0, ACCESSED = 0 */
109 static struct gate_table_s gate_table_pic
[] = {
110 { hwint00
, VECTOR( 0), INTR_PRIVILEGE
},
111 { hwint01
, VECTOR( 1), INTR_PRIVILEGE
},
112 { hwint02
, VECTOR( 2), INTR_PRIVILEGE
},
113 { hwint03
, VECTOR( 3), INTR_PRIVILEGE
},
114 { hwint04
, VECTOR( 4), INTR_PRIVILEGE
},
115 { hwint05
, VECTOR( 5), INTR_PRIVILEGE
},
116 { hwint06
, VECTOR( 6), INTR_PRIVILEGE
},
117 { hwint07
, VECTOR( 7), INTR_PRIVILEGE
},
118 { hwint08
, VECTOR( 8), INTR_PRIVILEGE
},
119 { hwint09
, VECTOR( 9), INTR_PRIVILEGE
},
120 { hwint10
, VECTOR(10), INTR_PRIVILEGE
},
121 { hwint11
, VECTOR(11), INTR_PRIVILEGE
},
122 { hwint12
, VECTOR(12), INTR_PRIVILEGE
},
123 { hwint13
, VECTOR(13), INTR_PRIVILEGE
},
124 { hwint14
, VECTOR(14), INTR_PRIVILEGE
},
125 { hwint15
, VECTOR(15), INTR_PRIVILEGE
},
129 static struct gate_table_s gate_table_exceptions
[] = {
130 { divide_error
, DIVIDE_VECTOR
, INTR_PRIVILEGE
},
131 { single_step_exception
, DEBUG_VECTOR
, INTR_PRIVILEGE
},
132 { nmi
, NMI_VECTOR
, INTR_PRIVILEGE
},
133 { breakpoint_exception
, BREAKPOINT_VECTOR
, USER_PRIVILEGE
},
134 { overflow
, OVERFLOW_VECTOR
, USER_PRIVILEGE
},
135 { bounds_check
, BOUNDS_VECTOR
, INTR_PRIVILEGE
},
136 { inval_opcode
, INVAL_OP_VECTOR
, INTR_PRIVILEGE
},
137 { copr_not_available
, COPROC_NOT_VECTOR
, INTR_PRIVILEGE
},
138 { double_fault
, DOUBLE_FAULT_VECTOR
, INTR_PRIVILEGE
},
139 { copr_seg_overrun
, COPROC_SEG_VECTOR
, INTR_PRIVILEGE
},
140 { inval_tss
, INVAL_TSS_VECTOR
, INTR_PRIVILEGE
},
141 { segment_not_present
, SEG_NOT_VECTOR
, INTR_PRIVILEGE
},
142 { stack_exception
, STACK_FAULT_VECTOR
, INTR_PRIVILEGE
},
143 { general_protection
, PROTECTION_VECTOR
, INTR_PRIVILEGE
},
144 { page_fault
, PAGE_FAULT_VECTOR
, INTR_PRIVILEGE
},
145 { copr_error
, COPROC_ERR_VECTOR
, INTR_PRIVILEGE
},
146 { alignment_check
, ALIGNMENT_CHECK_VECTOR
, INTR_PRIVILEGE
},
147 { machine_check
, MACHINE_CHECK_VECTOR
, INTR_PRIVILEGE
},
148 { simd_exception
, SIMD_EXCEPTION_VECTOR
, INTR_PRIVILEGE
},
149 { ipc_entry_softint_orig
, IPC_VECTOR_ORIG
, USER_PRIVILEGE
},
150 { kernel_call_entry_orig
, KERN_CALL_VECTOR_ORIG
, USER_PRIVILEGE
},
151 { ipc_entry_softint_um
, IPC_VECTOR_UM
, USER_PRIVILEGE
},
152 { kernel_call_entry_um
, KERN_CALL_VECTOR_UM
, USER_PRIVILEGE
},
156 int tss_init(unsigned cpu
, void * kernel_stack
)
158 struct tss_s
* t
= &tss
[cpu
];
159 int index
= TSS_INDEX(cpu
);
160 struct segdesc_s
*tssgdt
;
162 tssgdt
= &gdt
[index
];
164 init_param_dataseg(tssgdt
, (phys_bytes
) t
,
165 sizeof(struct tss_s
), INTR_PRIVILEGE
);
166 tssgdt
->access
= PRESENT
| (INTR_PRIVILEGE
<< DPL_SHIFT
) | TSS_TYPE
;
169 memset(t
, 0, sizeof(*t
));
170 t
->ds
= t
->es
= t
->fs
= t
->gs
= t
->ss0
= KERN_DS_SELECTOR
;
171 t
->cs
= KERN_CS_SELECTOR
;
172 t
->iobase
= sizeof(struct tss_s
); /* empty i/o permissions map */
175 * make space for process pointer and cpu id and point to the first
178 k_percpu_stacks
[cpu
] = t
->sp0
= ((unsigned) kernel_stack
) - X86_STACK_TOP_RESERVED
;
180 * set the cpu id at the top of the stack so we know on which cpu is
181 * this stak in use when we trap to kernel
183 *((reg_t
*)(t
->sp0
+ 1 * sizeof(reg_t
))) = cpu
;
185 /* Set up Intel SYSENTER support if available. */
186 if(minix_feature_flags
& MKF_I386_INTEL_SYSENTER
) {
187 ia32_msr_write(INTEL_MSR_SYSENTER_CS
, 0, KERN_CS_SELECTOR
);
188 ia32_msr_write(INTEL_MSR_SYSENTER_ESP
, 0, t
->sp0
);
189 ia32_msr_write(INTEL_MSR_SYSENTER_EIP
, 0, (u32_t
) ipc_entry_sysenter
);
192 /* Set up AMD SYSCALL support if available. */
193 if(minix_feature_flags
& MKF_I386_AMD_SYSCALL
) {
194 u32_t msr_lo
, msr_hi
;
196 /* set SYSCALL ENABLE bit in EFER MSR */
197 ia32_msr_read(AMD_MSR_EFER
, &msr_hi
, &msr_lo
);
198 msr_lo
|= AMD_EFER_SCE
;
199 ia32_msr_write(AMD_MSR_EFER
, msr_hi
, msr_lo
);
201 /* set STAR register value */
202 #define set_star_cpu(forcpu) if(cpu == forcpu) { \
203 ia32_msr_write(AMD_MSR_STAR, \
204 ((u32_t)USER_CS_SELECTOR << 16) | (u32_t)KERN_CS_SELECTOR, \
205 (u32_t) ipc_entry_syscall_cpu ## forcpu); }
214 assert(CONFIG_MAX_CPUS
<= 8);
217 return SEG_SELECTOR(index
);
220 phys_bytes
init_segdesc(int gdt_index
, void *base
, int size
)
222 struct desctableptr_s
*dtp
= (struct desctableptr_s
*) &gdt
[gdt_index
];
223 dtp
->limit
= size
- 1;
224 dtp
->base
= (phys_bytes
) base
;
226 return (phys_bytes
) dtp
;
229 void int_gate(struct gatedesc_s
*tab
,
230 unsigned vec_nr
, vir_bytes offset
, unsigned dpl_type
)
232 /* Build descriptor for an interrupt gate. */
233 register struct gatedesc_s
*idp
;
236 idp
->offset_low
= offset
;
237 idp
->selector
= KERN_CS_SELECTOR
;
238 idp
->p_dpl_type
= dpl_type
;
239 idp
->offset_high
= offset
>> OFFSET_HIGH_SHIFT
;
242 void int_gate_idt(unsigned vec_nr
, vir_bytes offset
, unsigned dpl_type
)
244 int_gate(idt
, vec_nr
, offset
, dpl_type
);
247 void idt_copy_vectors(struct gate_table_s
* first
)
249 struct gate_table_s
*gtp
;
250 for (gtp
= first
; gtp
->gate
; gtp
++) {
251 int_gate(idt
, gtp
->vec_nr
, (vir_bytes
) gtp
->gate
,
252 PRESENT
| INT_GATE_TYPE
|
253 (gtp
->privilege
<< DPL_SHIFT
));
257 void idt_copy_vectors_pic(void)
259 idt_copy_vectors(gate_table_pic
);
264 idt_copy_vectors_pic();
265 idt_copy_vectors(gate_table_exceptions
);
268 struct desctableptr_s gdt_desc
, idt_desc
;
270 void idt_reload(void)
275 multiboot_module_t
*bootmod(int pnr
)
281 /* Search for desired process in boot process
282 * list. The first NR_TASKS ones do not correspond
283 * to a module, however, so we don't search those.
285 for(i
= NR_TASKS
; i
< NR_BOOT_PROCS
; i
++) {
288 if(image
[i
].proc_nr
== pnr
) {
289 assert(p
< MULTIBOOT_MAX_MODS
);
290 assert(p
< kinfo
.mbi
.mi_mods_count
);
291 return &kinfo
.module_list
[p
];
295 panic("boot module %d not found", pnr
);
300 void prot_load_selectors(void)
302 /* this function is called by both prot_init by the BSP and
303 * the early AP booting code in mpx.S by secondary CPU's.
304 * everything is set up the same except for the TSS that is per-CPU.
306 x86_lgdt(&gdt_desc
); /* Load gdt */
309 x86_lldt(LDT_SELECTOR
); /* Load bogus ldt */
310 x86_ltr(TSS_SELECTOR(booting_cpu
));
313 x86_load_ds(KERN_DS_SELECTOR
);
314 x86_load_es(KERN_DS_SELECTOR
);
315 x86_load_fs(KERN_DS_SELECTOR
);
316 x86_load_gs(KERN_DS_SELECTOR
);
317 x86_load_ss(KERN_DS_SELECTOR
);
320 /*===========================================================================*
322 *===========================================================================*/
325 extern char k_boot_stktop
;
327 if(_cpufeature(_CPUF_I386_SYSENTER
))
328 minix_feature_flags
|= MKF_I386_INTEL_SYSENTER
;
329 if(_cpufeature(_CPUF_I386_SYSCALL
))
330 minix_feature_flags
|= MKF_I386_AMD_SYSCALL
;
332 memset(gdt
, 0, sizeof(gdt
));
333 memset(idt
, 0, sizeof(idt
));
335 /* Build GDT, IDT, IDT descriptors. */
336 gdt_desc
.base
= (u32_t
) gdt
;
337 gdt_desc
.limit
= sizeof(gdt
)-1;
338 idt_desc
.base
= (u32_t
) idt
;
339 idt_desc
.limit
= sizeof(idt
)-1;
340 tss_init(0, &k_boot_stktop
);
343 init_param_dataseg(&gdt
[LDT_INDEX
],
344 (phys_bytes
) 0, 0, INTR_PRIVILEGE
); /* unusable LDT */
345 gdt
[LDT_INDEX
].access
= PRESENT
| LDT
;
346 init_codeseg(KERN_CS_INDEX
, INTR_PRIVILEGE
);
347 init_dataseg(KERN_DS_INDEX
, INTR_PRIVILEGE
);
348 init_codeseg(USER_CS_INDEX
, USER_PRIVILEGE
);
349 init_dataseg(USER_DS_INDEX
, USER_PRIVILEGE
);
351 /* Currently the multiboot segments are loaded; which is fine, but
352 * let's replace them with the ones from our own GDT so we test
353 * right away whether they work as expected.
355 prot_load_selectors();
357 /* Set up a new post-relocate bootstrap pagetable so that
358 * we can map in VM, and we no longer rely on pre-relocated
363 pg_identity(&kinfo
); /* Still need 1:1 for lapic and video mem and such. */
370 static int alloc_for_vm
= 0;
372 void arch_post_init(void)
374 /* Let memory mapping code know what's going on at bootstrap time */
376 vm
= proc_addr(VM_PROC_NR
);
377 get_cpulocal_var(ptproc
) = vm
;
378 pg_info(&vm
->p_seg
.p_cr3
, &vm
->p_seg
.p_cr3_v
);
381 static int libexec_pg_alloc(struct exec_info
*execi
, vir_bytes vaddr
, size_t len
)
383 pg_map(PG_ALLOCATEME
, vaddr
, vaddr
+len
, &kinfo
);
385 memset((char *) vaddr
, 0, len
);
390 void arch_boot_proc(struct boot_image
*ip
, struct proc
*rp
)
392 multiboot_module_t
*mod
;
393 struct ps_strings
*psp
;
396 if(rp
->p_nr
< 0) return;
398 mod
= bootmod(rp
->p_nr
);
400 /* Important special case: we put VM in the bootstrap pagetable
404 if(rp
->p_nr
== VM_PROC_NR
) {
405 struct exec_info execi
;
407 memset(&execi
, 0, sizeof(execi
));
409 /* exec parameters */
410 execi
.stack_high
= kinfo
.user_sp
;
411 execi
.stack_size
= 64 * 1024; /* not too crazy as it must be preallocated */
412 execi
.proc_e
= ip
->endpoint
;
413 execi
.hdr
= (char *) mod
->mod_start
; /* phys mem direct */
414 execi
.filesize
= execi
.hdr_len
= mod
->mod_end
- mod
->mod_start
;
415 strlcpy(execi
.progname
, ip
->proc_name
, sizeof(execi
.progname
));
418 /* callbacks for use in the kernel */
419 execi
.copymem
= libexec_copy_memcpy
;
420 execi
.clearmem
= libexec_clear_memset
;
421 execi
.allocmem_prealloc_junk
= libexec_pg_alloc
;
422 execi
.allocmem_prealloc_cleared
= libexec_pg_alloc
;
423 execi
.allocmem_ondemand
= libexec_pg_alloc
;
424 execi
.clearproc
= NULL
;
426 /* parse VM ELF binary and alloc/map it into bootstrap pagetable */
427 if(libexec_load_elf(&execi
) != OK
)
428 panic("VM loading failed");
430 /* Setup a ps_strings struct on the stack, pointing to the
431 * following argv, envp. */
432 sp
= (char *)execi
.stack_high
;
433 sp
-= sizeof(struct ps_strings
);
434 psp
= (struct ps_strings
*) sp
;
436 /* Take the stack pointer down three words to give startup code
437 * something to use as "argc", "argv" and "envp".
439 sp
-= (sizeof(void *) + sizeof(void *) + sizeof(int));
441 // linear address space, so it is available.
442 psp
->ps_argvstr
= (char **)(sp
+ sizeof(int));
443 psp
->ps_nargvstr
= 0;
444 psp
->ps_envstr
= psp
->ps_argvstr
+ sizeof(void *);
447 arch_proc_init(rp
, execi
.pc
, (vir_bytes
)sp
,
448 execi
.stack_high
- sizeof(struct ps_strings
),
451 /* Free VM blob that was just copied into existence. */
452 add_memmap(&kinfo
, mod
->mod_start
, mod
->mod_end
-mod
->mod_start
);
453 mod
->mod_end
= mod
->mod_start
= 0;
456 kinfo
.vm_allocated_bytes
= alloc_for_vm
;