kernel: separate state for trace-deferred syscalls
[minix.git] / kernel / arch / i386 / arch_system.c
blob15ffa5cd55524600a3f655fa7a1b43d8e4825572
1 /* system dependent functions for use inside the whole kernel. */
3 #include "kernel/kernel.h"
5 #include <unistd.h>
6 #include <ctype.h>
7 #include <string.h>
8 #include <machine/cmos.h>
9 #include <machine/bios.h>
10 #include <machine/cpu.h>
11 #include <minix/portio.h>
12 #include <minix/cpufeature.h>
13 #include <assert.h>
14 #include <signal.h>
15 #include <machine/vm.h>
17 #include <minix/u64.h>
19 #include "archconst.h"
20 #include "arch_proto.h"
21 #include "serial.h"
22 #include "oxpcie.h"
23 #include "direct_utils.h"
24 #include <machine/multiboot.h>
26 #include "glo.h"
28 #ifdef USE_APIC
29 #include "apic.h"
30 #endif
32 #ifdef USE_ACPI
33 #include "acpi.h"
34 #endif
36 static int osfxsr_feature; /* FXSAVE/FXRSTOR instructions support (SSEx) */
38 /* set MP and NE flags to handle FPU exceptions in native mode. */
39 #define CR0_MP_NE 0x0022
40 /* set CR4.OSFXSR[bit 9] if FXSR is supported. */
41 #define CR4_OSFXSR (1L<<9)
42 /* set OSXMMEXCPT[bit 10] if we provide #XM handler. */
43 #define CR4_OSXMMEXCPT (1L<<10)
45 void * k_stacks;
47 static void ser_debug(int c);
48 #ifdef CONFIG_SMP
49 static void ser_dump_proc_cpu(void);
50 #endif
51 #if !CONFIG_OXPCIE
52 static void ser_init(void);
53 #endif
55 void fpu_init(void)
57 unsigned short cw, sw;
59 fninit();
60 sw = fnstsw();
61 fnstcw(&cw);
63 if((sw & 0xff) == 0 &&
64 (cw & 0x103f) == 0x3f) {
65 /* We have some sort of FPU, but don't check exact model.
66 * Set CR0_NE and CR0_MP to handle fpu exceptions
67 * in native mode. */
68 write_cr0(read_cr0() | CR0_MP_NE);
69 get_cpulocal_var(fpu_presence) = 1;
70 if(_cpufeature(_CPUF_I386_FXSR)) {
71 u32_t cr4 = read_cr4() | CR4_OSFXSR; /* Enable FXSR. */
73 /* OSXMMEXCPT if supported
74 * FXSR feature can be available without SSE
76 if(_cpufeature(_CPUF_I386_SSE))
77 cr4 |= CR4_OSXMMEXCPT;
79 write_cr4(cr4);
80 osfxsr_feature = 1;
81 } else {
82 osfxsr_feature = 0;
84 } else {
85 /* No FPU presents. */
86 get_cpulocal_var(fpu_presence) = 0;
87 osfxsr_feature = 0;
88 return;
92 void save_local_fpu(struct proc *pr, int retain)
94 char *state = pr->p_seg.fpu_state;
96 /* Save process FPU context. If the 'retain' flag is set, keep the FPU
97 * state as is. If the flag is not set, the state is undefined upon
98 * return, and the caller is responsible for reloading a proper state.
101 if(!is_fpu())
102 return;
104 assert(state);
106 if(osfxsr_feature) {
107 fxsave(state);
108 } else {
109 fnsave(state);
110 if (retain)
111 (void) frstor(state);
115 void save_fpu(struct proc *pr)
117 #ifdef CONFIG_SMP
118 if (cpuid != pr->p_cpu) {
119 int stopped;
121 /* remember if the process was already stopped */
122 stopped = RTS_ISSET(pr, RTS_PROC_STOP);
124 /* stop the remote process and force its context to be saved */
125 smp_schedule_stop_proc_save_ctx(pr);
128 * If the process wasn't stopped let the process run again. The
129 * process is kept block by the fact that the kernel cannot run
130 * on its cpu
132 if (!stopped)
133 RTS_UNSET(pr, RTS_PROC_STOP);
135 return;
137 #endif
139 if (get_cpulocal_var(fpu_owner) == pr) {
140 disable_fpu_exception();
141 save_local_fpu(pr, TRUE /*retain*/);
145 /* reserve a chunk of memory for fpu state; every one has to
146 * be FPUALIGN-aligned.
148 static char fpu_state[NR_PROCS][FPU_XFP_SIZE] __aligned(FPUALIGN);
150 void arch_proc_reset(struct proc *pr)
152 char *v = NULL;
153 struct stackframe_s reg;
155 assert(pr->p_nr < NR_PROCS);
157 if(pr->p_nr >= 0) {
158 v = fpu_state[pr->p_nr];
159 /* verify alignment */
160 assert(!((vir_bytes)v % FPUALIGN));
161 /* initialize state */
162 memset(v, 0, FPU_XFP_SIZE);
165 /* Clear process state. */
166 memset(&reg, 0, sizeof(pr->p_reg));
167 if(iskerneln(pr->p_nr))
168 reg.psw = INIT_TASK_PSW;
169 else
170 reg.psw = INIT_PSW;
172 pr->p_seg.fpu_state = v;
174 /* Initialize the fundamentals that are (initially) the same for all
175 * processes - the segment selectors it gets to use.
177 pr->p_reg.cs = USER_CS_SELECTOR;
178 pr->p_reg.gs =
179 pr->p_reg.fs =
180 pr->p_reg.ss =
181 pr->p_reg.es =
182 pr->p_reg.ds = USER_DS_SELECTOR;
184 /* set full context and make sure it gets restored */
185 arch_proc_setcontext(pr, &reg, 0);
188 void arch_set_secondary_ipc_return(struct proc *p, u32_t val)
190 p->p_reg.bx = val;
193 int restore_fpu(struct proc *pr)
195 int failed;
196 char *state = pr->p_seg.fpu_state;
198 assert(state);
200 if(!proc_used_fpu(pr)) {
201 fninit();
202 pr->p_misc_flags |= MF_FPU_INITIALIZED;
203 } else {
204 if(osfxsr_feature) {
205 failed = fxrstor(state);
206 } else {
207 failed = frstor(state);
210 if (failed) return EINVAL;
213 return OK;
216 void cpu_identify(void)
218 u32_t eax, ebx, ecx, edx;
219 unsigned cpu = cpuid;
221 eax = 0;
222 _cpuid(&eax, &ebx, &ecx, &edx);
224 if (ebx == INTEL_CPUID_GEN_EBX && ecx == INTEL_CPUID_GEN_ECX &&
225 edx == INTEL_CPUID_GEN_EDX) {
226 cpu_info[cpu].vendor = CPU_VENDOR_INTEL;
227 } else if (ebx == AMD_CPUID_GEN_EBX && ecx == AMD_CPUID_GEN_ECX &&
228 edx == AMD_CPUID_GEN_EDX) {
229 cpu_info[cpu].vendor = CPU_VENDOR_AMD;
230 } else
231 cpu_info[cpu].vendor = CPU_VENDOR_UNKNOWN;
233 if (eax == 0)
234 return;
236 eax = 1;
237 _cpuid(&eax, &ebx, &ecx, &edx);
239 cpu_info[cpu].family = (eax >> 8) & 0xf;
240 if (cpu_info[cpu].family == 0xf)
241 cpu_info[cpu].family += (eax >> 20) & 0xff;
242 cpu_info[cpu].model = (eax >> 4) & 0xf;
243 if (cpu_info[cpu].model == 0xf || cpu_info[cpu].model == 0x6)
244 cpu_info[cpu].model += ((eax >> 16) & 0xf) << 4 ;
245 cpu_info[cpu].stepping = eax & 0xf;
246 cpu_info[cpu].flags[0] = ecx;
247 cpu_info[cpu].flags[1] = edx;
250 void arch_init(void)
252 k_stacks = (void*) &k_stacks_start;
253 assert(!((vir_bytes) k_stacks % K_STACK_SIZE));
255 #ifndef CONFIG_SMP
257 * use stack 0 and cpu id 0 on a single processor machine, SMP
258 * configuration does this in smp_init() for all cpus at once
260 tss_init(0, get_k_stack_top(0));
261 #endif
263 #if !CONFIG_OXPCIE
264 ser_init();
265 #endif
267 #ifdef USE_ACPI
268 acpi_init();
269 #endif
271 #if defined(USE_APIC) && !defined(CONFIG_SMP)
272 if (config_no_apic) {
273 BOOT_VERBOSE(printf("APIC disabled, using legacy PIC\n"));
275 else if (!apic_single_cpu_init()) {
276 BOOT_VERBOSE(printf("APIC not present, using legacy PIC\n"));
278 #endif
280 /* Reserve some BIOS ranges */
281 cut_memmap(&kinfo, BIOS_MEM_BEGIN, BIOS_MEM_END);
282 cut_memmap(&kinfo, BASE_MEM_TOP, UPPER_MEM_END);
285 /*===========================================================================*
286 * do_ser_debug *
287 *===========================================================================*/
288 void do_ser_debug()
290 u8_t c, lsr;
292 #if CONFIG_OXPCIE
294 int oxin;
295 if((oxin = oxpcie_in()) >= 0)
296 ser_debug(oxin);
298 #endif
300 lsr= inb(COM1_LSR);
301 if (!(lsr & LSR_DR))
302 return;
303 c = inb(COM1_RBR);
304 ser_debug(c);
307 static void ser_dump_queue_cpu(unsigned cpu)
309 int q;
310 struct proc ** rdy_head;
312 rdy_head = get_cpu_var(cpu, run_q_head);
314 for(q = 0; q < NR_SCHED_QUEUES; q++) {
315 struct proc *p;
316 if(rdy_head[q]) {
317 printf("%2d: ", q);
318 for(p = rdy_head[q]; p; p = p->p_nextready) {
319 printf("%s / %d ", p->p_name, p->p_endpoint);
321 printf("\n");
326 static void ser_dump_queues(void)
328 #ifdef CONFIG_SMP
329 unsigned cpu;
331 printf("--- run queues ---\n");
332 for (cpu = 0; cpu < ncpus; cpu++) {
333 printf("CPU %d :\n", cpu);
334 ser_dump_queue_cpu(cpu);
336 #else
337 ser_dump_queue_cpu(0);
338 #endif
341 #ifdef CONFIG_SMP
342 static void dump_bkl_usage(void)
344 unsigned cpu;
346 printf("--- BKL usage ---\n");
347 for (cpu = 0; cpu < ncpus; cpu++) {
348 printf("cpu %3d kernel ticks 0x%x%08x bkl ticks 0x%x%08x succ %d tries %d\n", cpu,
349 ex64hi(kernel_ticks[cpu]),
350 ex64lo(kernel_ticks[cpu]),
351 ex64hi(bkl_ticks[cpu]),
352 ex64lo(bkl_ticks[cpu]),
353 bkl_succ[cpu], bkl_tries[cpu]);
357 static void reset_bkl_usage(void)
359 memset(kernel_ticks, 0, sizeof(kernel_ticks));
360 memset(bkl_ticks, 0, sizeof(bkl_ticks));
361 memset(bkl_tries, 0, sizeof(bkl_tries));
362 memset(bkl_succ, 0, sizeof(bkl_succ));
364 #endif
366 static void ser_debug(const int c)
368 serial_debug_active = 1;
370 switch(c)
372 case 'Q':
373 minix_shutdown(NULL);
374 NOT_REACHABLE;
375 #ifdef CONFIG_SMP
376 case 'B':
377 dump_bkl_usage();
378 break;
379 case 'b':
380 reset_bkl_usage();
381 break;
382 #endif
383 case '1':
384 ser_dump_proc();
385 break;
386 case '2':
387 ser_dump_queues();
388 break;
389 #ifdef CONFIG_SMP
390 case '4':
391 ser_dump_proc_cpu();
392 break;
393 #endif
394 #if DEBUG_TRACE
395 #define TOGGLECASE(ch, flag) \
396 case ch: { \
397 if(verboseflags & flag) { \
398 verboseflags &= ~flag; \
399 printf("%s disabled\n", #flag); \
400 } else { \
401 verboseflags |= flag; \
402 printf("%s enabled\n", #flag); \
404 break; \
406 TOGGLECASE('8', VF_SCHEDULING)
407 TOGGLECASE('9', VF_PICKPROC)
408 #endif
409 #ifdef USE_APIC
410 case 'I':
411 dump_apic_irq_state();
412 break;
413 #endif
415 serial_debug_active = 0;
418 #if DEBUG_SERIAL
419 void ser_dump_proc()
421 struct proc *pp;
423 for (pp= BEG_PROC_ADDR; pp < END_PROC_ADDR; pp++)
425 if (isemptyp(pp))
426 continue;
427 print_proc_recursive(pp);
431 #ifdef CONFIG_SMP
432 static void ser_dump_proc_cpu(void)
434 struct proc *pp;
435 unsigned cpu;
437 for (cpu = 0; cpu < ncpus; cpu++) {
438 printf("CPU %d processes : \n", cpu);
439 for (pp= BEG_USER_ADDR; pp < END_PROC_ADDR; pp++) {
440 if (isemptyp(pp) || pp->p_cpu != cpu)
441 continue;
442 print_proc(pp);
446 #endif
448 #endif /* DEBUG_SERIAL */
450 #if SPROFILE
452 int arch_init_profile_clock(const u32_t freq)
454 int r;
455 /* Set CMOS timer frequency. */
456 outb(RTC_INDEX, RTC_REG_A);
457 outb(RTC_IO, RTC_A_DV_OK | freq);
458 /* Enable CMOS timer interrupts. */
459 outb(RTC_INDEX, RTC_REG_B);
460 r = inb(RTC_IO);
461 outb(RTC_INDEX, RTC_REG_B);
462 outb(RTC_IO, r | RTC_B_PIE);
463 /* Mandatory read of CMOS register to enable timer interrupts. */
464 outb(RTC_INDEX, RTC_REG_C);
465 inb(RTC_IO);
467 return CMOS_CLOCK_IRQ;
470 void arch_stop_profile_clock(void)
472 int r;
473 /* Disable CMOS timer interrupts. */
474 outb(RTC_INDEX, RTC_REG_B);
475 r = inb(RTC_IO);
476 outb(RTC_INDEX, RTC_REG_B);
477 outb(RTC_IO, r & ~RTC_B_PIE);
480 void arch_ack_profile_clock(void)
482 /* Mandatory read of CMOS register to re-enable timer interrupts. */
483 outb(RTC_INDEX, RTC_REG_C);
484 inb(RTC_IO);
487 #endif
489 void arch_do_syscall(struct proc *proc)
491 /* do_ipc assumes that it's running because of the current process */
492 assert(proc == get_cpulocal_var(proc_ptr));
493 /* Make the system call, for real this time. */
494 assert(proc->p_misc_flags & MF_SC_DEFER);
495 proc->p_reg.retreg =
496 do_ipc(proc->p_defer.r1, proc->p_defer.r2, proc->p_defer.r3);
499 struct proc * arch_finish_switch_to_user(void)
501 char * stk;
502 struct proc * p;
504 #ifdef CONFIG_SMP
505 stk = (char *)tss[cpuid].sp0;
506 #else
507 stk = (char *)tss[0].sp0;
508 #endif
509 /* set pointer to the process to run on the stack */
510 p = get_cpulocal_var(proc_ptr);
511 *((reg_t *)stk) = (reg_t) p;
513 /* make sure IF is on in FLAGS so that interrupts won't be disabled
514 * once p's context is restored. this should not be possible.
516 assert(p->p_reg.psw & (1L << 9));
518 return p;
521 void arch_proc_setcontext(struct proc *p, struct stackframe_s *state, int isuser)
523 if(isuser) {
524 /* Restore user bits of psw from sc, maintain system bits
525 * from proc.
527 state->psw = (state->psw & X86_FLAGS_USER) |
528 (p->p_reg.psw & ~X86_FLAGS_USER);
531 /* someone wants to totally re-initialize process state */
532 assert(sizeof(p->p_reg) == sizeof(*state));
533 memcpy(&p->p_reg, state, sizeof(*state));
535 /* further code is instructed to not touch the context
536 * any more
538 p->p_misc_flags |= MF_CONTEXT_SET;
540 /* on x86 this requires returning using iret (KTS_INT)
541 * so that the full context is restored instead of relying on
542 * the userspace doing it (as it would do on SYSEXIT).
543 * as ESP and EIP are also reset, userspace won't try to
544 * restore bogus context after returning.
546 * if the process is not blocked, or the kernel will ignore
547 * our trap style, we needn't panic but things will probably
548 * not go well for the process (restored context will be ignored)
549 * and the situation should be debugged.
551 if(!(p->p_rts_flags)) {
552 printf("WARNINIG: setting full context of runnable process\n");
553 print_proc(p);
554 util_stacktrace();
556 if(p->p_seg.p_kern_trap_style == KTS_NONE)
557 printf("WARNINIG: setting full context of out-of-kernel process\n");
558 p->p_seg.p_kern_trap_style = KTS_FULLCONTEXT;
561 void restore_user_context(struct proc *p)
563 int trap_style = p->p_seg.p_kern_trap_style;
564 #if 0
565 #define TYPES 10
566 static int restores[TYPES], n = 0;
568 if(trap_style >= 0 && trap_style < TYPES)
569 restores[trap_style]++;
571 if(!(n++ % 500000)) {
572 int t;
573 for(t = 0; t < TYPES; t++)
574 if(restores[t])
575 printf("%d: %d ", t, restores[t]);
576 printf("\n");
578 #endif
580 p->p_seg.p_kern_trap_style = KTS_NONE;
582 if(trap_style == KTS_SYSENTER) {
583 restore_user_context_sysenter(p);
584 NOT_REACHABLE;
587 if(trap_style == KTS_SYSCALL) {
588 restore_user_context_syscall(p);
589 NOT_REACHABLE;
592 switch(trap_style) {
593 case KTS_NONE:
594 panic("no entry trap style known");
595 case KTS_INT_HARD:
596 case KTS_INT_UM:
597 case KTS_FULLCONTEXT:
598 case KTS_INT_ORIG:
599 restore_user_context_int(p);
600 NOT_REACHABLE;
601 default:
602 panic("unknown trap style recorded");
603 NOT_REACHABLE;
606 NOT_REACHABLE;
609 void fpu_sigcontext(struct proc *pr, struct sigframe *fr, struct sigcontext *sc)
611 int fp_error;
613 if (osfxsr_feature) {
614 fp_error = sc->sc_fpu_state.xfp_regs.fp_status &
615 ~sc->sc_fpu_state.xfp_regs.fp_control;
616 } else {
617 fp_error = sc->sc_fpu_state.fpu_regs.fp_status &
618 ~sc->sc_fpu_state.fpu_regs.fp_control;
621 if (fp_error & 0x001) { /* Invalid op */
623 * swd & 0x240 == 0x040: Stack Underflow
624 * swd & 0x240 == 0x240: Stack Overflow
625 * User must clear the SF bit (0x40) if set
627 fr->sf_code = FPE_FLTINV;
628 } else if (fp_error & 0x004) {
629 fr->sf_code = FPE_FLTDIV; /* Divide by Zero */
630 } else if (fp_error & 0x008) {
631 fr->sf_code = FPE_FLTOVF; /* Overflow */
632 } else if (fp_error & 0x012) {
633 fr->sf_code = FPE_FLTUND; /* Denormal, Underflow */
634 } else if (fp_error & 0x020) {
635 fr->sf_code = FPE_FLTRES; /* Precision */
636 } else {
637 fr->sf_code = 0; /* XXX - probably should be used for FPE_INTOVF or
638 * FPE_INTDIV */
642 reg_t arch_get_sp(struct proc *p) { return p->p_reg.sp; }
644 #if !CONFIG_OXPCIE
645 static void ser_init(void)
647 unsigned char lcr;
648 unsigned divisor;
650 /* keep BIOS settings if cttybaud is not set */
651 if (kinfo.serial_debug_baud <= 0) return;
653 /* set DLAB to make baud accessible */
654 lcr = LCR_8BIT | LCR_1STOP | LCR_NPAR;
655 outb(COM1_LCR, lcr | LCR_DLAB);
657 /* set baud rate */
658 divisor = UART_BASE_FREQ / kinfo.serial_debug_baud;
659 if (divisor < 1) divisor = 1;
660 if (divisor > 65535) divisor = 65535;
662 outb(COM1_DLL, divisor & 0xff);
663 outb(COM1_DLM, (divisor >> 8) & 0xff);
665 /* clear DLAB */
666 outb(COM1_LCR, lcr);
668 #endif