updated on Tue Jan 10 04:01:21 UTC 2012
[aur-mirror.git] / rtai-kernel / hal-linux-2.6.35.7-x86-2.8-01.patch
blobdfbe038e2d0a1716166d323c0685819cc6f81ee8
1 diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
2 index f942bb7..2a79581 100644
3 --- a/arch/x86/Kconfig
4 +++ b/arch/x86/Kconfig
5 @@ -18,6 +18,7 @@ config X86_64
6 ### Arch settings
7 config X86
8 def_bool y
9 + select HAVE_IPIPE_HOSTRT if IPIPE
10 select HAVE_AOUT if X86_32
11 select HAVE_READQ
12 select HAVE_WRITEQ
13 @@ -509,6 +510,7 @@ config SCHED_OMIT_FRAME_POINTER
15 menuconfig PARAVIRT_GUEST
16 bool "Paravirtualized guest support"
17 + depends on !IPIPE
18 ---help---
19 Say Y here to get to see options related to running Linux under
20 various hypervisors. This option alone does not add any kernel code.
21 @@ -560,6 +562,7 @@ source "arch/x86/lguest/Kconfig"
23 config PARAVIRT
24 bool "Enable paravirtualization code"
25 + depends on !IPIPE
26 ---help---
27 This changes the kernel so it can modify itself when it is run
28 under a hypervisor, potentially improving performance significantly
29 @@ -799,6 +802,8 @@ config SCHED_MC
31 source "kernel/Kconfig.preempt"
33 +source "kernel/ipipe/Kconfig"
35 config X86_UP_APIC
36 bool "Local APIC support on uniprocessors"
37 depends on X86_32 && !SMP && !X86_32_NON_STANDARD
38 diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
39 index 1fa03e0..bdc3afb 100644
40 --- a/arch/x86/include/asm/apic.h
41 +++ b/arch/x86/include/asm/apic.h
42 @@ -415,7 +415,13 @@ static inline u32 safe_apic_wait_icr_idle(void) { return 0; }
44 #endif /* CONFIG_X86_LOCAL_APIC */
46 +#ifdef CONFIG_IPIPE
47 +#define ack_APIC_irq() do { } while(0)
48 +static inline void __ack_APIC_irq(void)
49 +#else /* !CONFIG_IPIPE */
50 +#define __ack_APIC_irq() ack_APIC_irq()
51 static inline void ack_APIC_irq(void)
52 +#endif /* CONFIG_IPIPE */
55 * ack_APIC_irq() actually gets compiled as a single instruction
56 diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
57 index 7fe3b30..0a471f1 100644
58 --- a/arch/x86/include/asm/apicdef.h
59 +++ b/arch/x86/include/asm/apicdef.h
60 @@ -149,6 +149,7 @@
61 # define MAX_LOCAL_APIC 32768
62 #endif
64 +#ifndef __ASSEMBLY__
66 * All x86-64 systems are xAPIC compatible.
67 * In the following, "apicid" is a physical APIC ID.
68 @@ -424,4 +425,7 @@ struct local_apic {
69 #else
70 #define BAD_APICID 0xFFFFu
71 #endif
73 +#endif /* !__ASSEMBLY__ */
75 #endif /* _ASM_X86_APICDEF_H */
76 diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
77 index 8e8ec66..b2f8e98 100644
78 --- a/arch/x86/include/asm/entry_arch.h
79 +++ b/arch/x86/include/asm/entry_arch.h
80 @@ -22,6 +22,7 @@ BUILD_INTERRUPT3(invalidate_interrupt1,INVALIDATE_TLB_VECTOR_START+1,
81 smp_invalidate_interrupt)
82 BUILD_INTERRUPT3(invalidate_interrupt2,INVALIDATE_TLB_VECTOR_START+2,
83 smp_invalidate_interrupt)
84 +#ifndef CONFIG_IPIPE
85 BUILD_INTERRUPT3(invalidate_interrupt3,INVALIDATE_TLB_VECTOR_START+3,
86 smp_invalidate_interrupt)
87 BUILD_INTERRUPT3(invalidate_interrupt4,INVALIDATE_TLB_VECTOR_START+4,
88 @@ -32,6 +33,7 @@ BUILD_INTERRUPT3(invalidate_interrupt6,INVALIDATE_TLB_VECTOR_START+6,
89 smp_invalidate_interrupt)
90 BUILD_INTERRUPT3(invalidate_interrupt7,INVALIDATE_TLB_VECTOR_START+7,
91 smp_invalidate_interrupt)
92 +#endif /* !CONFIG_IPIPE */
93 #endif
95 BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
96 diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
97 index 46c0fe0..0cf2ce3 100644
98 --- a/arch/x86/include/asm/hw_irq.h
99 +++ b/arch/x86/include/asm/hw_irq.h
100 @@ -35,6 +35,13 @@ extern void spurious_interrupt(void);
101 extern void thermal_interrupt(void);
102 extern void reschedule_interrupt(void);
103 extern void mce_self_interrupt(void);
104 +#ifdef CONFIG_IPIPE
105 +void ipipe_ipi0(void);
106 +void ipipe_ipi1(void);
107 +void ipipe_ipi2(void);
108 +void ipipe_ipi3(void);
109 +void ipipe_ipiX(void);
110 +#endif
112 extern void invalidate_interrupt(void);
113 extern void invalidate_interrupt0(void);
114 @@ -115,6 +122,7 @@ extern void smp_apic_timer_interrupt(struct pt_regs *);
115 extern void smp_spurious_interrupt(struct pt_regs *);
116 extern void smp_x86_platform_ipi(struct pt_regs *);
117 extern void smp_error_interrupt(struct pt_regs *);
118 +extern void smp_perf_pending_interrupt(struct pt_regs *);
119 #ifdef CONFIG_X86_IO_APIC
120 extern asmlinkage void smp_irq_move_cleanup_interrupt(void);
121 #endif
122 @@ -127,6 +135,7 @@ extern void smp_invalidate_interrupt(struct pt_regs *);
123 #else
124 extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *);
125 #endif
126 +extern asmlinkage void smp_reboot_interrupt(void);
127 #endif
129 extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void);
130 diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
131 index c991b3a..fe511b2 100644
132 --- a/arch/x86/include/asm/i387.h
133 +++ b/arch/x86/include/asm/i387.h
134 @@ -331,11 +331,14 @@ static inline void __clear_fpu(struct task_struct *tsk)
135 static inline void kernel_fpu_begin(void)
137 struct thread_info *me = current_thread_info();
138 + unsigned long flags;
139 preempt_disable();
140 + local_irq_save_hw_cond(flags);
141 if (me->status & TS_USEDFPU)
142 __save_init_fpu(me->task);
143 else
144 clts();
145 + local_irq_restore_hw_cond(flags);
148 static inline void kernel_fpu_end(void)
149 diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h
150 index 1655147..093fe80 100644
151 --- a/arch/x86/include/asm/i8259.h
152 +++ b/arch/x86/include/asm/i8259.h
153 @@ -24,7 +24,7 @@ extern unsigned int cached_irq_mask;
154 #define SLAVE_ICW4_DEFAULT 0x01
155 #define PIC_ICW4_AEOI 2
157 -extern raw_spinlock_t i8259A_lock;
158 +IPIPE_DECLARE_RAW_SPINLOCK(i8259A_lock);
160 /* the PIC may need a careful delay on some platforms, hence specific calls */
161 static inline unsigned char inb_pic(unsigned int port)
162 diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
163 index 0b72282..6574056 100644
164 --- a/arch/x86/include/asm/ipi.h
165 +++ b/arch/x86/include/asm/ipi.h
166 @@ -68,6 +68,9 @@ __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest
167 * to the APIC.
169 unsigned int cfg;
170 + unsigned long flags;
172 + local_irq_save_hw(flags);
175 * Wait for idle.
176 @@ -83,6 +86,8 @@ __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest
177 * Send the IPI. The write to APIC_ICR fires this off.
179 native_apic_mem_write(APIC_ICR, cfg);
181 + local_irq_restore_hw(flags);
185 diff --git a/arch/x86/include/asm/ipipe.h b/arch/x86/include/asm/ipipe.h
186 new file mode 100644
187 index 0000000..8af0104
188 --- /dev/null
189 +++ b/arch/x86/include/asm/ipipe.h
190 @@ -0,0 +1,157 @@
191 +/* -*- linux-c -*-
192 + * arch/x86/include/asm/ipipe.h
194 + * Copyright (C) 2007 Philippe Gerum.
196 + * This program is free software; you can redistribute it and/or modify
197 + * it under the terms of the GNU General Public License as published by
198 + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
199 + * USA; either version 2 of the License, or (at your option) any later
200 + * version.
202 + * This program is distributed in the hope that it will be useful,
203 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
204 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
205 + * GNU General Public License for more details.
207 + * You should have received a copy of the GNU General Public License
208 + * along with this program; if not, write to the Free Software
209 + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
210 + */
212 +#ifndef __X86_IPIPE_H
213 +#define __X86_IPIPE_H
215 +#ifdef CONFIG_IPIPE
217 +#ifndef IPIPE_ARCH_STRING
218 +#define IPIPE_ARCH_STRING "2.8-01"
219 +#define IPIPE_MAJOR_NUMBER 2
220 +#define IPIPE_MINOR_NUMBER 8
221 +#define IPIPE_PATCH_NUMBER 1
222 +#endif
224 +DECLARE_PER_CPU(struct pt_regs, __ipipe_tick_regs);
226 +DECLARE_PER_CPU(unsigned long, __ipipe_cr2);
228 +static inline unsigned __ipipe_get_irq_vector(int irq)
230 +#ifdef CONFIG_X86_IO_APIC
231 + unsigned __ipipe_get_ioapic_irq_vector(int irq);
232 + return __ipipe_get_ioapic_irq_vector(irq);
233 +#elif defined(CONFIG_X86_LOCAL_APIC)
234 + return irq >= IPIPE_FIRST_APIC_IRQ && irq < IPIPE_NR_XIRQS ?
235 + ipipe_apic_irq_vector(irq) : irq + IRQ0_VECTOR;
236 +#else
237 + return irq + IRQ0_VECTOR;
238 +#endif
241 +#ifdef CONFIG_X86_32
242 +# include "ipipe_32.h"
243 +#else
244 +# include "ipipe_64.h"
245 +#endif
248 + * The logical processor id and the current Linux task are read from the PDA,
249 + * so this is always safe, regardless of the underlying stack.
250 + */
251 +#define ipipe_processor_id() raw_smp_processor_id()
252 +#define ipipe_safe_current() current
254 +#define prepare_arch_switch(next) \
255 +do { \
256 + ipipe_schedule_notify(current, next); \
257 + local_irq_disable_hw(); \
258 +} while(0)
260 +#define task_hijacked(p) \
261 + ({ int x = __ipipe_root_domain_p; \
262 + if (x) local_irq_enable_hw(); !x; })
264 +struct ipipe_domain;
266 +struct ipipe_sysinfo {
268 + int ncpus; /* Number of CPUs on board */
269 + u64 cpufreq; /* CPU frequency (in Hz) */
271 + /* Arch-dependent block */
273 + struct {
274 + unsigned tmirq; /* Timer tick IRQ */
275 + u64 tmfreq; /* Timer frequency */
276 + } archdep;
279 +/* Private interface -- Internal use only */
281 +#define __ipipe_check_platform() do { } while(0)
282 +#define __ipipe_init_platform() do { } while(0)
283 +#define __ipipe_enable_irq(irq) irq_to_desc(irq)->chip->enable(irq)
284 +#define __ipipe_disable_irq(irq) irq_to_desc(irq)->chip->disable(irq)
286 +#ifdef CONFIG_SMP
287 +void __ipipe_hook_critical_ipi(struct ipipe_domain *ipd);
288 +#else
289 +#define __ipipe_hook_critical_ipi(ipd) do { } while(0)
290 +#endif
292 +#define __ipipe_disable_irqdesc(ipd, irq) do { } while(0)
294 +void __ipipe_enable_irqdesc(struct ipipe_domain *ipd, unsigned irq);
296 +void __ipipe_enable_pipeline(void);
298 +void __ipipe_do_critical_sync(unsigned irq, void *cookie);
300 +void __ipipe_serial_debug(const char *fmt, ...);
302 +extern int __ipipe_tick_irq;
304 +#ifdef CONFIG_X86_LOCAL_APIC
305 +#define ipipe_update_tick_evtdev(evtdev) \
306 + do { \
307 + if (strcmp((evtdev)->name, "lapic") == 0) \
308 + __ipipe_tick_irq = \
309 + ipipe_apic_vector_irq(LOCAL_TIMER_VECTOR); \
310 + else \
311 + __ipipe_tick_irq = 0; \
312 + } while (0)
313 +#else
314 +#define ipipe_update_tick_evtdev(evtdev) \
315 + __ipipe_tick_irq = 0
316 +#endif
318 +int __ipipe_check_lapic(void);
320 +int __ipipe_check_tickdev(const char *devname);
322 +#define __ipipe_syscall_watched_p(p, sc) \
323 + (ipipe_notifier_enabled_p(p) || (unsigned long)sc >= NR_syscalls)
325 +#define __ipipe_root_tick_p(regs) ((regs)->flags & X86_EFLAGS_IF)
327 +#else /* !CONFIG_IPIPE */
329 +#define ipipe_update_tick_evtdev(evtdev) do { } while (0)
330 +#define task_hijacked(p) 0
332 +#endif /* CONFIG_IPIPE */
334 +#if defined(CONFIG_SMP) && defined(CONFIG_IPIPE)
335 +#define __ipipe_move_root_irq(irq) \
336 + do { \
337 + if (irq < NR_IRQS) { \
338 + struct irq_chip *chip = irq_to_desc(irq)->chip; \
339 + if (chip->move) \
340 + chip->move(irq); \
341 + } \
342 + } while (0)
343 +#else /* !(CONFIG_SMP && CONFIG_IPIPE) */
344 +#define __ipipe_move_root_irq(irq) do { } while (0)
345 +#endif /* !(CONFIG_SMP && CONFIG_IPIPE) */
347 +#endif /* !__X86_IPIPE_H */
348 diff --git a/arch/x86/include/asm/ipipe_32.h b/arch/x86/include/asm/ipipe_32.h
349 new file mode 100644
350 index 0000000..4263a7f
351 --- /dev/null
352 +++ b/arch/x86/include/asm/ipipe_32.h
353 @@ -0,0 +1,136 @@
354 +/* -*- linux-c -*-
355 + * arch/x86/include/asm/ipipe_32.h
357 + * Copyright (C) 2002-2005 Philippe Gerum.
359 + * This program is free software; you can redistribute it and/or modify
360 + * it under the terms of the GNU General Public License as published by
361 + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
362 + * USA; either version 2 of the License, or (at your option) any later
363 + * version.
365 + * This program is distributed in the hope that it will be useful,
366 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
367 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
368 + * GNU General Public License for more details.
370 + * You should have received a copy of the GNU General Public License
371 + * along with this program; if not, write to the Free Software
372 + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
373 + */
375 +#ifndef __X86_IPIPE_32_H
376 +#define __X86_IPIPE_32_H
378 +#include <linux/cpumask.h>
379 +#include <linux/list.h>
380 +#include <linux/threads.h>
381 +#include <linux/ipipe_percpu.h>
382 +#include <asm/ptrace.h>
384 +#define ipipe_read_tsc(t) __asm__ __volatile__("rdtsc" : "=A" (t))
385 +#define ipipe_cpu_freq() ({ unsigned long long __freq = cpu_has_tsc?(1000LL * cpu_khz):CLOCK_TICK_RATE; __freq; })
387 +#define ipipe_tsc2ns(t) \
388 +({ \
389 + unsigned long long delta = (t)*1000; \
390 + do_div(delta, cpu_khz/1000+1); \
391 + (unsigned long)delta; \
394 +#define ipipe_tsc2us(t) \
395 +({ \
396 + unsigned long long delta = (t); \
397 + do_div(delta, cpu_khz/1000+1); \
398 + (unsigned long)delta; \
401 +/* Private interface -- Internal use only */
403 +int __ipipe_handle_irq(struct pt_regs *regs);
405 +static inline unsigned long __ipipe_ffnz(unsigned long ul)
407 + __asm__("bsrl %1, %0":"=r"(ul)
408 + : "r"(ul));
409 + return ul;
412 +struct irq_desc;
414 +void __ipipe_ack_edge_irq(unsigned irq, struct irq_desc *desc);
416 +void __ipipe_end_edge_irq(unsigned irq, struct irq_desc *desc);
418 +static inline void __do_root_xirq(ipipe_irq_handler_t handler,
419 + unsigned int irq)
421 + struct pt_regs *regs = &__raw_get_cpu_var(__ipipe_tick_regs);
423 + regs->orig_ax = ~__ipipe_get_irq_vector(irq);
425 + __asm__ __volatile__("pushfl\n\t"
426 + "pushl %%cs\n\t"
427 + "pushl $__xirq_end\n\t"
428 + "pushl %%eax\n\t"
429 + "pushl %%gs\n\t"
430 + "pushl %%fs\n\t"
431 + "pushl %%es\n\t"
432 + "pushl %%ds\n\t"
433 + "pushl %%eax\n\t"
434 + "pushl %%ebp\n\t"
435 + "pushl %%edi\n\t"
436 + "pushl %%esi\n\t"
437 + "pushl %%edx\n\t"
438 + "pushl %%ecx\n\t"
439 + "pushl %%ebx\n\t"
440 + "movl %2,%%eax\n\t"
441 + "call *%1\n\t"
442 + "jmp ret_from_intr\n\t"
443 + "__xirq_end: cli\n"
444 + : /* no output */
445 + : "a" (~irq), "r" (handler), "rm" (regs));
448 +#define __ipipe_do_root_xirq(ipd, irq) \
449 + __do_root_xirq((ipd)->irqs[irq].handler, irq)
451 +static inline void __do_root_virq(ipipe_irq_handler_t handler,
452 + void *cookie, unsigned int irq)
454 + void irq_enter(void);
455 + void irq_exit(void);
457 + irq_enter();
458 + __asm__ __volatile__("pushfl\n\t"
459 + "pushl %%cs\n\t"
460 + "pushl $__virq_end\n\t"
461 + "pushl $-1\n\t"
462 + "pushl %%gs\n\t"
463 + "pushl %%fs\n\t"
464 + "pushl %%es\n\t"
465 + "pushl %%ds\n\t"
466 + "pushl %%eax\n\t"
467 + "pushl %%ebp\n\t"
468 + "pushl %%edi\n\t"
469 + "pushl %%esi\n\t"
470 + "pushl %%edx\n\t"
471 + "pushl %%ecx\n\t"
472 + "pushl %%ebx\n\t"
473 + "pushl %2\n\t"
474 + "pushl %%eax\n\t"
475 + "call *%1\n\t"
476 + "addl $8,%%esp\n"
477 + : /* no output */
478 + : "a" (irq), "r" (handler), "d" (cookie));
479 + irq_exit();
480 + __asm__ __volatile__("jmp ret_from_intr\n\t"
481 + "__virq_end: cli\n"
482 + : /* no output */
483 + : /* no input */);
486 +#define __ipipe_do_root_virq(ipd, irq) \
487 + __do_root_virq((ipd)->irqs[irq].handler, (ipd)->irqs[irq].cookie, irq)
489 +#endif /* !__X86_IPIPE_32_H */
490 diff --git a/arch/x86/include/asm/ipipe_64.h b/arch/x86/include/asm/ipipe_64.h
491 new file mode 100644
492 index 0000000..b9367f6
493 --- /dev/null
494 +++ b/arch/x86/include/asm/ipipe_64.h
495 @@ -0,0 +1,141 @@
496 +/* -*- linux-c -*-
497 + * arch/x86/include/asm/ipipe_64.h
499 + * Copyright (C) 2007 Philippe Gerum.
501 + * This program is free software; you can redistribute it and/or modify
502 + * it under the terms of the GNU General Public License as published by
503 + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
504 + * USA; either version 2 of the License, or (at your option) any later
505 + * version.
507 + * This program is distributed in the hope that it will be useful,
508 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
509 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
510 + * GNU General Public License for more details.
512 + * You should have received a copy of the GNU General Public License
513 + * along with this program; if not, write to the Free Software
514 + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
515 + */
517 +#ifndef __X86_IPIPE_64_H
518 +#define __X86_IPIPE_64_H
520 +#include <asm/ptrace.h>
521 +#include <asm/irq.h>
522 +#include <linux/cpumask.h>
523 +#include <linux/list.h>
524 +#include <linux/ipipe_percpu.h>
525 +#ifdef CONFIG_SMP
526 +#include <asm/mpspec.h>
527 +#include <linux/thread_info.h>
528 +#endif
530 +#define ipipe_read_tsc(t) do { \
531 + unsigned int __a,__d; \
532 + asm volatile("rdtsc" : "=a" (__a), "=d" (__d)); \
533 + (t) = ((unsigned long)__a) | (((unsigned long)__d)<<32); \
534 +} while(0)
536 +extern unsigned cpu_khz;
537 +#define ipipe_cpu_freq() ({ unsigned long __freq = (1000UL * cpu_khz); __freq; })
538 +#define ipipe_tsc2ns(t) (((t) * 1000UL) / (ipipe_cpu_freq() / 1000000UL))
539 +#define ipipe_tsc2us(t) ((t) / (ipipe_cpu_freq() / 1000000UL))
541 +/* Private interface -- Internal use only */
543 +int __ipipe_handle_irq(struct pt_regs *regs);
545 +static inline unsigned long __ipipe_ffnz(unsigned long ul)
547 + __asm__("bsrq %1, %0":"=r"(ul)
548 + : "rm"(ul));
549 + return ul;
552 +struct irq_desc;
554 +void __ipipe_ack_edge_irq(unsigned irq, struct irq_desc *desc);
556 +void __ipipe_end_edge_irq(unsigned irq, struct irq_desc *desc);
558 +static inline void __do_root_xirq(ipipe_irq_handler_t handler,
559 + unsigned int irq)
561 + struct pt_regs *regs = &__raw_get_cpu_var(__ipipe_tick_regs);
563 + regs->orig_ax = ~__ipipe_get_irq_vector(irq);
565 + __asm__ __volatile__("movq %%rsp, %%rax\n\t"
566 + "pushq $0\n\t"
567 + "pushq %%rax\n\t"
568 + "pushfq\n\t"
569 + "pushq %[kernel_cs]\n\t"
570 + "pushq $__xirq_end\n\t"
571 + "pushq %[vector]\n\t"
572 + "subq $9*8,%%rsp\n\t"
573 + "movq %%rdi,8*8(%%rsp)\n\t"
574 + "movq %%rsi,7*8(%%rsp)\n\t"
575 + "movq %%rdx,6*8(%%rsp)\n\t"
576 + "movq %%rcx,5*8(%%rsp)\n\t"
577 + "movq %%rax,4*8(%%rsp)\n\t"
578 + "movq %%r8,3*8(%%rsp)\n\t"
579 + "movq %%r9,2*8(%%rsp)\n\t"
580 + "movq %%r10,1*8(%%rsp)\n\t"
581 + "movq %%r11,(%%rsp)\n\t"
582 + "call *%[handler]\n\t"
583 + "cli\n\t"
584 + "jmp exit_intr\n\t"
585 + "__xirq_end: cli\n"
586 + : /* no output */
587 + : [kernel_cs] "i" (__KERNEL_CS),
588 + [vector] "rm" (regs->orig_ax),
589 + [handler] "r" (handler), "D" (regs)
590 + : "rax");
593 +#define __ipipe_do_root_xirq(ipd, irq) \
594 + __do_root_xirq((ipd)->irqs[irq].handler, irq)
596 +static inline void __do_root_virq(ipipe_irq_handler_t handler,
597 + void *cookie, unsigned int irq)
599 + void irq_enter(void);
600 + void irq_exit(void);
602 + irq_enter();
603 + __asm__ __volatile__("movq %%rsp, %%rax\n\t"
604 + "pushq $0\n\t"
605 + "pushq %%rax\n\t"
606 + "pushfq\n\t"
607 + "pushq %[kernel_cs]\n\t"
608 + "pushq $__virq_end\n\t"
609 + "pushq $-1\n\t"
610 + "subq $9*8,%%rsp\n\t"
611 + "movq %%rdi,8*8(%%rsp)\n\t"
612 + "movq %%rsi,7*8(%%rsp)\n\t"
613 + "movq %%rdx,6*8(%%rsp)\n\t"
614 + "movq %%rcx,5*8(%%rsp)\n\t"
615 + "movq %%rax,4*8(%%rsp)\n\t"
616 + "movq %%r8,3*8(%%rsp)\n\t"
617 + "movq %%r9,2*8(%%rsp)\n\t"
618 + "movq %%r10,1*8(%%rsp)\n\t"
619 + "movq %%r11,(%%rsp)\n\t"
620 + "call *%[handler]\n\t"
621 + : /* no output */
622 + : [kernel_cs] "i" (__KERNEL_CS),
623 + [handler] "r" (handler), "D" (irq), "S" (cookie)
624 + : "rax");
625 + irq_exit();
626 + __asm__ __volatile__("cli\n\t"
627 + "jmp exit_intr\n\t"
628 + "__virq_end: cli\n"
629 + : /* no output */
630 + : /* no input */);
633 +#define __ipipe_do_root_virq(ipd, irq) \
634 + __do_root_virq((ipd)->irqs[irq].handler, (ipd)->irqs[irq].cookie, irq)
636 +#endif /* !__X86_IPIPE_64_H */
637 diff --git a/arch/x86/include/asm/ipipe_base.h b/arch/x86/include/asm/ipipe_base.h
638 new file mode 100644
639 index 0000000..44c8c73
640 --- /dev/null
641 +++ b/arch/x86/include/asm/ipipe_base.h
642 @@ -0,0 +1,216 @@
643 +/* -*- linux-c -*-
644 + * arch/x86/include/asm/ipipe_base.h
646 + * Copyright (C) 2007-2009 Philippe Gerum.
648 + * This program is free software; you can redistribute it and/or modify
649 + * it under the terms of the GNU General Public License as published by
650 + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
651 + * USA; either version 2 of the License, or (at your option) any later
652 + * version.
654 + * This program is distributed in the hope that it will be useful,
655 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
656 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
657 + * GNU General Public License for more details.
659 + * You should have received a copy of the GNU General Public License
660 + * along with this program; if not, write to the Free Software
661 + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
662 + */
664 +#ifndef __X86_IPIPE_BASE_H
665 +#define __X86_IPIPE_BASE_H
667 +#include <linux/threads.h>
668 +#include <asm/apicdef.h>
669 +#include <asm/irq_vectors.h>
670 +#include <asm/bitsperlong.h>
672 +#ifdef CONFIG_X86_32
673 +#define IPIPE_NR_FAULTS 33 /* 32 from IDT + iret_error */
674 +#else
675 +#define IPIPE_NR_FAULTS 32
676 +#endif
678 +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
680 + * System interrupts are mapped beyond the last defined external IRQ
681 + * number.
682 + */
683 +#define IPIPE_NR_XIRQS (NR_IRQS + 32)
684 +#define IPIPE_FIRST_APIC_IRQ NR_IRQS
685 +#define IPIPE_SERVICE_VECTOR0 (INVALIDATE_TLB_VECTOR_END + 1)
686 +#define IPIPE_SERVICE_IPI0 ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR0)
687 +#define IPIPE_SERVICE_VECTOR1 (INVALIDATE_TLB_VECTOR_END + 2)
688 +#define IPIPE_SERVICE_IPI1 ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR1)
689 +#define IPIPE_SERVICE_VECTOR2 (INVALIDATE_TLB_VECTOR_END + 3)
690 +#define IPIPE_SERVICE_IPI2 ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR2)
691 +#define IPIPE_SERVICE_VECTOR3 (INVALIDATE_TLB_VECTOR_END + 4)
692 +#define IPIPE_SERVICE_IPI3 ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR3)
693 +#ifdef CONFIG_SMP
694 +#define IPIPE_CRITICAL_VECTOR (INVALIDATE_TLB_VECTOR_END + 5)
695 +#define IPIPE_CRITICAL_IPI ipipe_apic_vector_irq(IPIPE_CRITICAL_VECTOR)
696 +#endif
697 +#define ipipe_apic_irq_vector(irq) ((irq) - IPIPE_FIRST_APIC_IRQ + FIRST_SYSTEM_VECTOR)
698 +#define ipipe_apic_vector_irq(vec) ((vec) - FIRST_SYSTEM_VECTOR + IPIPE_FIRST_APIC_IRQ)
699 +#else /* !(CONFIG_X86_64 || CONFIG_X86_LOCAL_APIC) */
700 +#define IPIPE_NR_XIRQS NR_IRQS
701 +#endif /* !(CONFIG_X86_64 || CONFIG_X86_LOCAL_APIC) */
703 +#define ipipe_ipi_p(ipi) \
704 + (ipi >= IPIPE_SERVICE_IPI0 && ipi <= IPIPE_SERVICE_IPI3)
706 +/* Pseudo-vectors used for kernel events */
707 +#define IPIPE_FIRST_EVENT IPIPE_NR_FAULTS
708 +#define IPIPE_EVENT_SYSCALL (IPIPE_FIRST_EVENT)
709 +#define IPIPE_EVENT_SCHEDULE (IPIPE_FIRST_EVENT + 1)
710 +#define IPIPE_EVENT_SIGWAKE (IPIPE_FIRST_EVENT + 2)
711 +#define IPIPE_EVENT_SETSCHED (IPIPE_FIRST_EVENT + 3)
712 +#define IPIPE_EVENT_INIT (IPIPE_FIRST_EVENT + 4)
713 +#define IPIPE_EVENT_EXIT (IPIPE_FIRST_EVENT + 5)
714 +#define IPIPE_EVENT_CLEANUP (IPIPE_FIRST_EVENT + 6)
715 +#define IPIPE_EVENT_RETURN (IPIPE_FIRST_EVENT + 7)
716 +#define IPIPE_EVENT_HOSTRT (IPIPE_FIRST_EVENT + 8)
717 +#define IPIPE_LAST_EVENT IPIPE_EVENT_HOSTRT
718 +#define IPIPE_NR_EVENTS (IPIPE_LAST_EVENT + 1)
720 +#define ex_do_divide_error 0
721 +#define ex_do_debug 1
722 +/* NMI not pipelined. */
723 +#define ex_do_int3 3
724 +#define ex_do_overflow 4
725 +#define ex_do_bounds 5
726 +#define ex_do_invalid_op 6
727 +#define ex_do_device_not_available 7
728 +/* Double fault not pipelined. */
729 +#define ex_do_coprocessor_segment_overrun 9
730 +#define ex_do_invalid_TSS 10
731 +#define ex_do_segment_not_present 11
732 +#define ex_do_stack_segment 12
733 +#define ex_do_general_protection 13
734 +#define ex_do_page_fault 14
735 +#define ex_do_spurious_interrupt_bug 15
736 +#define ex_do_coprocessor_error 16
737 +#define ex_do_alignment_check 17
738 +#define ex_machine_check_vector 18
739 +#define ex_reserved ex_machine_check_vector
740 +#define ex_do_simd_coprocessor_error 19
741 +#define ex_do_iret_error 32
743 +#ifndef __ASSEMBLY__
745 +#ifdef CONFIG_SMP
747 +#include <asm/alternative.h>
749 +#ifdef CONFIG_X86_32
750 +#define GET_ROOT_STATUS_ADDR \
751 + "pushfl; cli;" \
752 + "movl %%fs:this_cpu_off, %%eax;" \
753 + "lea ipipe_percpu_darray(%%eax), %%eax;"
754 +#define PUT_ROOT_STATUS_ADDR "popfl;"
755 +#define TEST_AND_SET_ROOT_STATUS \
756 + "btsl $0,(%%eax);"
757 +#define TEST_ROOT_STATUS \
758 + "btl $0,(%%eax);"
759 +#define ROOT_TEST_CLOBBER_LIST "eax"
760 +#else /* CONFIG_X86_64 */
761 +#define GET_ROOT_STATUS_ADDR \
762 + "pushfq; cli;" \
763 + "movq %%gs:this_cpu_off, %%rax;" \
764 + "lea ipipe_percpu_darray(%%rax), %%rax;"
765 +#define PUT_ROOT_STATUS_ADDR "popfq;"
766 +#define TEST_AND_SET_ROOT_STATUS \
767 + "btsl $0,(%%rax);"
768 +#define TEST_ROOT_STATUS \
769 + "btl $0,(%%rax);"
770 +#define ROOT_TEST_CLOBBER_LIST "rax"
771 +#endif /* CONFIG_X86_64 */
773 +static inline void __ipipe_stall_root(void)
775 + __asm__ __volatile__(GET_ROOT_STATUS_ADDR
776 + LOCK_PREFIX
777 + TEST_AND_SET_ROOT_STATUS
778 + PUT_ROOT_STATUS_ADDR
779 + : : : ROOT_TEST_CLOBBER_LIST, "memory");
782 +static inline unsigned long __ipipe_test_and_stall_root(void)
784 + int oldbit;
786 + __asm__ __volatile__(GET_ROOT_STATUS_ADDR
787 + LOCK_PREFIX
788 + TEST_AND_SET_ROOT_STATUS
789 + "sbbl %0,%0;"
790 + PUT_ROOT_STATUS_ADDR
791 + :"=r" (oldbit)
792 + : : ROOT_TEST_CLOBBER_LIST, "memory");
793 + return oldbit;
796 +static inline unsigned long __ipipe_test_root(void)
798 + int oldbit;
800 + __asm__ __volatile__(GET_ROOT_STATUS_ADDR
801 + TEST_ROOT_STATUS
802 + "sbbl %0,%0;"
803 + PUT_ROOT_STATUS_ADDR
804 + :"=r" (oldbit)
805 + : : ROOT_TEST_CLOBBER_LIST);
806 + return oldbit;
809 +#else /* !CONFIG_SMP */
811 +#if __GNUC__ >= 4
812 +/* Alias to ipipe_root_cpudom_var(status) */
813 +extern unsigned long __ipipe_root_status;
814 +#else
815 +extern unsigned long *const __ipipe_root_status_addr;
816 +#define __ipipe_root_status (*__ipipe_root_status_addr)
817 +#endif
819 +static inline void __ipipe_stall_root(void)
821 + volatile unsigned long *p = &__ipipe_root_status;
822 + __asm__ __volatile__("btsl $0,%0;"
823 + :"+m" (*p) : : "memory");
826 +static inline unsigned long __ipipe_test_and_stall_root(void)
828 + volatile unsigned long *p = &__ipipe_root_status;
829 + int oldbit;
831 + __asm__ __volatile__("btsl $0,%1;"
832 + "sbbl %0,%0;"
833 + :"=r" (oldbit), "+m" (*p)
834 + : : "memory");
835 + return oldbit;
838 +static inline unsigned long __ipipe_test_root(void)
840 + volatile unsigned long *p = &__ipipe_root_status;
841 + int oldbit;
843 + __asm__ __volatile__("btl $0,%1;"
844 + "sbbl %0,%0;"
845 + :"=r" (oldbit)
846 + :"m" (*p));
847 + return oldbit;
850 +#endif /* !CONFIG_SMP */
852 +void __ipipe_halt_root(void);
854 +void __ipipe_serial_debug(const char *fmt, ...);
856 +#endif /* !__ASSEMBLY__ */
858 +#endif /* !__X86_IPIPE_BASE_H */
859 diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
860 index 8767d99..1e05ee0 100644
861 --- a/arch/x86/include/asm/irq_vectors.h
862 +++ b/arch/x86/include/asm/irq_vectors.h
863 @@ -96,10 +96,17 @@
864 #define THRESHOLD_APIC_VECTOR 0xf9
865 #define REBOOT_VECTOR 0xf8
867 +#ifdef CONFIG_IPIPE
868 +/* f0-f2 used for TLB flush, f3-f7 reserved for the I-pipe */
869 +#define INVALIDATE_TLB_VECTOR_END 0xf2
870 +#define INVALIDATE_TLB_VECTOR_START 0xf0
871 +#define NUM_INVALIDATE_TLB_VECTORS 3
872 +#else /* !CONFIG_IPIPE */
873 /* f0-f7 used for spreading out TLB flushes: */
874 #define INVALIDATE_TLB_VECTOR_END 0xf7
875 #define INVALIDATE_TLB_VECTOR_START 0xf0
876 #define NUM_INVALIDATE_TLB_VECTORS 8
877 +#endif
880 * Local APIC timer IRQ vector is on a different priority level,
881 @@ -120,6 +127,9 @@
883 #define UV_BAU_MESSAGE 0xea
885 +/* I-pipe: Lowest number of vectors above */
886 +#define FIRST_SYSTEM_VECTOR 0xea
889 * Self IPI vector for machine checks
891 diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
892 index 9e2b952..59c8be8 100644
893 --- a/arch/x86/include/asm/irqflags.h
894 +++ b/arch/x86/include/asm/irqflags.h
895 @@ -4,6 +4,11 @@
896 #include <asm/processor-flags.h>
898 #ifndef __ASSEMBLY__
900 +#include <linux/ipipe_base.h>
901 +#include <linux/ipipe_trace.h>
902 +#include <linux/compiler.h>
905 * Interrupt control:
907 @@ -12,6 +17,10 @@ static inline unsigned long native_save_fl(void)
909 unsigned long flags;
911 +#ifdef CONFIG_IPIPE
912 + flags = (!__ipipe_test_root()) << 9;
913 + barrier();
914 +#else
916 * "=rm" is safe here, because "pop" adjusts the stack before
917 * it evaluates its effective address -- this is part of the
918 @@ -22,31 +31,53 @@ static inline unsigned long native_save_fl(void)
919 : "=rm" (flags)
920 : /* no input */
921 : "memory");
922 +#endif
924 return flags;
927 static inline void native_restore_fl(unsigned long flags)
929 +#ifdef CONFIG_IPIPE
930 + barrier();
931 + __ipipe_restore_root(!(flags & X86_EFLAGS_IF));
932 +#else
933 asm volatile("push %0 ; popf"
934 : /* no output */
935 :"g" (flags)
936 :"memory", "cc");
937 +#endif
940 static inline void native_irq_disable(void)
942 +#ifdef CONFIG_IPIPE
943 + ipipe_check_context(ipipe_root_domain);
944 + __ipipe_stall_root();
945 + barrier();
946 +#else
947 asm volatile("cli": : :"memory");
948 +#endif
951 static inline void native_irq_enable(void)
953 +#ifdef CONFIG_IPIPE
954 + barrier();
955 + __ipipe_unstall_root();
956 +#else
957 asm volatile("sti": : :"memory");
958 +#endif
961 static inline void native_safe_halt(void)
963 +#ifdef CONFIG_IPIPE
964 + barrier();
965 + __ipipe_halt_root();
966 +#else
967 asm volatile("sti; hlt": : :"memory");
968 +#endif
971 static inline void native_halt(void)
972 @@ -71,6 +102,71 @@ static inline void raw_local_irq_restore(unsigned long flags)
973 native_restore_fl(flags);
976 +static inline unsigned long raw_mangle_irq_bits(int virt, unsigned long real)
978 + /*
979 + * Merge virtual and real interrupt mask bits into a single
980 + * (32bit) word.
981 + */
982 + return (real & ~(1L << 31)) | ((virt != 0) << 31);
985 +static inline int raw_demangle_irq_bits(unsigned long *x)
987 + int virt = (*x & (1L << 31)) != 0;
988 + *x &= ~(1L << 31);
989 + return virt;
992 +#define local_irq_save_hw_notrace(x) \
993 + __asm__ __volatile__("pushf ; pop %0 ; cli":"=g" (x): /* no input */ :"memory")
994 +#define local_irq_restore_hw_notrace(x) \
995 + __asm__ __volatile__("push %0 ; popf": /* no output */ :"g" (x):"memory", "cc")
997 +#define local_save_flags_hw(x) __asm__ __volatile__("pushf ; pop %0":"=g" (x): /* no input */)
999 +#define irqs_disabled_hw() \
1000 + ({ \
1001 + unsigned long x; \
1002 + local_save_flags_hw(x); \
1003 + !((x) & X86_EFLAGS_IF); \
1004 + })
1006 +#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
1007 +#define local_irq_disable_hw() do { \
1008 + if (!irqs_disabled_hw()) { \
1009 + local_irq_disable_hw_notrace(); \
1010 + ipipe_trace_begin(0x80000000); \
1011 + } \
1012 + } while (0)
1013 +#define local_irq_enable_hw() do { \
1014 + if (irqs_disabled_hw()) { \
1015 + ipipe_trace_end(0x80000000); \
1016 + local_irq_enable_hw_notrace(); \
1017 + } \
1018 + } while (0)
1019 +#define local_irq_save_hw(x) do { \
1020 + local_save_flags_hw(x); \
1021 + if ((x) & X86_EFLAGS_IF) { \
1022 + local_irq_disable_hw_notrace(); \
1023 + ipipe_trace_begin(0x80000001); \
1024 + } \
1025 + } while (0)
1026 +#define local_irq_restore_hw(x) do { \
1027 + if ((x) & X86_EFLAGS_IF) \
1028 + ipipe_trace_end(0x80000001); \
1029 + local_irq_restore_hw_notrace(x); \
1030 + } while (0)
1031 +#else /* !CONFIG_IPIPE_TRACE_IRQSOFF */
1032 +#define local_irq_save_hw(x) local_irq_save_hw_notrace(x)
1033 +#define local_irq_restore_hw(x) local_irq_restore_hw_notrace(x)
1034 +#define local_irq_enable_hw() local_irq_enable_hw_notrace()
1035 +#define local_irq_disable_hw() local_irq_disable_hw_notrace()
1036 +#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */
1038 +#define local_irq_disable_hw_notrace() __asm__ __volatile__("cli": : :"memory")
1039 +#define local_irq_enable_hw_notrace() __asm__ __volatile__("sti": : :"memory")
1041 static inline void raw_local_irq_disable(void)
1043 native_irq_disable();
1044 @@ -104,16 +200,40 @@ static inline void halt(void)
1046 static inline unsigned long __raw_local_irq_save(void)
1048 +#ifdef CONFIG_IPIPE
1049 + unsigned long flags = (!__ipipe_test_and_stall_root()) << 9;
1050 + barrier();
1051 +#else
1052 unsigned long flags = __raw_local_save_flags();
1054 raw_local_irq_disable();
1055 +#endif
1057 return flags;
1059 #else
1061 -#define ENABLE_INTERRUPTS(x) sti
1062 -#define DISABLE_INTERRUPTS(x) cli
1063 +#ifdef CONFIG_IPIPE
1064 +#ifdef CONFIG_X86_32
1065 +#define DISABLE_INTERRUPTS(clobbers) PER_CPU(ipipe_percpu_darray, %eax); btsl $0,(%eax); sti
1066 +#define ENABLE_INTERRUPTS(clobbers) call __ipipe_unstall_root
1067 +#else /* CONFIG_X86_64 */
1068 +/* Not worth virtualizing in x86_64 mode. */
1069 +#define DISABLE_INTERRUPTS(clobbers) cli
1070 +#define ENABLE_INTERRUPTS(clobbers) sti
1071 +#endif /* CONFIG_X86_64 */
1072 +#define ENABLE_INTERRUPTS_HW_COND sti
1073 +#define DISABLE_INTERRUPTS_HW_COND cli
1074 +#define DISABLE_INTERRUPTS_HW(clobbers) cli
1075 +#define ENABLE_INTERRUPTS_HW(clobbers) sti
1076 +#else /* !CONFIG_IPIPE */
1077 +#define ENABLE_INTERRUPTS(x) sti
1078 +#define DISABLE_INTERRUPTS(x) cli
1079 +#define ENABLE_INTERRUPTS_HW_COND
1080 +#define DISABLE_INTERRUPTS_HW_COND
1081 +#define DISABLE_INTERRUPTS_HW(clobbers) DISABLE_INTERRUPTS(clobbers)
1082 +#define ENABLE_INTERRUPTS_HW(clobbers) ENABLE_INTERRUPTS(clobbers)
1083 +#endif /* !CONFIG_IPIPE */
1085 #ifdef CONFIG_X86_64
1086 #define SWAPGS swapgs
1087 @@ -156,8 +276,10 @@ static inline unsigned long __raw_local_irq_save(void)
1088 #define raw_local_save_flags(flags) \
1089 do { (flags) = __raw_local_save_flags(); } while (0)
1091 -#define raw_local_irq_save(flags) \
1092 - do { (flags) = __raw_local_irq_save(); } while (0)
1093 +#define raw_local_irq_save(flags) do { \
1094 + ipipe_check_context(ipipe_root_domain); \
1095 + (flags) = __raw_local_irq_save(); \
1096 + } while (0)
1098 static inline int raw_irqs_disabled_flags(unsigned long flags)
1100 @@ -189,7 +311,10 @@ static inline int raw_irqs_disabled(void)
1101 pushl %eax; \
1102 pushl %ecx; \
1103 pushl %edx; \
1104 + pushfl; \
1105 + sti; \
1106 call lockdep_sys_exit; \
1107 + popfl; \
1108 popl %edx; \
1109 popl %ecx; \
1110 popl %eax;
1111 @@ -198,8 +323,23 @@ static inline int raw_irqs_disabled(void)
1112 #endif
1114 #ifdef CONFIG_TRACE_IRQFLAGS
1115 +# if defined(CONFIG_IPIPE) && defined(CONFIG_X86_64)
1116 +# define TRACE_IRQS_ON \
1117 + call trace_hardirqs_on_thunk; \
1118 + pushq %rax; \
1119 + PER_CPU(ipipe_percpu_darray, %rax); \
1120 + btrl $0,(%rax); \
1121 + popq %rax
1122 +# define TRACE_IRQS_OFF \
1123 + pushq %rax; \
1124 + PER_CPU(ipipe_percpu_darray, %rax); \
1125 + btsl $0,(%rax); \
1126 + popq %rax; \
1127 + call trace_hardirqs_off_thunk
1128 +# else /* !(CONFIG_IPIPE && CONFIG_X86_64) */
1129 # define TRACE_IRQS_ON call trace_hardirqs_on_thunk;
1130 # define TRACE_IRQS_OFF call trace_hardirqs_off_thunk;
1131 +# endif /* !(CONFIG_IPIPE && CONFIG_X86_64) */
1132 #else
1133 # define TRACE_IRQS_ON
1134 # define TRACE_IRQS_OFF
1135 diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
1136 index 4a2d4e0..1ee45d4 100644
1137 --- a/arch/x86/include/asm/mmu_context.h
1138 +++ b/arch/x86/include/asm/mmu_context.h
1139 @@ -30,11 +30,14 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
1140 #endif
1143 -static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
1144 - struct task_struct *tsk)
1145 +static inline void __switch_mm(struct mm_struct *prev, struct mm_struct *next,
1146 + struct task_struct *tsk)
1148 unsigned cpu = smp_processor_id();
1150 +#ifdef CONFIG_IPIPE_DEBUG_INTERNAL
1151 + WARN_ON_ONCE(!irqs_disabled_hw());
1152 +#endif
1153 if (likely(prev != next)) {
1154 /* stop flush ipis for the previous mm */
1155 cpumask_clear_cpu(cpu, mm_cpumask(prev));
1156 @@ -70,10 +73,23 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
1157 #endif
1160 +static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
1161 + struct task_struct *tsk)
1163 + unsigned long flags;
1164 + local_irq_save_hw_cond(flags);
1165 + __switch_mm(prev, next, tsk);
1166 + local_irq_restore_hw_cond(flags);
1169 +#define ipipe_mm_switch_protect(flags) local_irq_save_hw_cond(flags)
1170 +#define ipipe_mm_switch_unprotect(flags) \
1171 + local_irq_restore_hw_cond(flags)
1173 #define activate_mm(prev, next) \
1174 do { \
1175 paravirt_activate_mm((prev), (next)); \
1176 - switch_mm((prev), (next), NULL); \
1177 + __switch_mm((prev), (next), NULL); \
1178 } while (0);
1180 #ifdef CONFIG_X86_32
1181 diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
1182 index 93da9c3..c1ad7fc 100644
1183 --- a/arch/x86/include/asm/nmi.h
1184 +++ b/arch/x86/include/asm/nmi.h
1185 @@ -28,7 +28,7 @@ extern void setup_apic_nmi_watchdog(void *);
1186 extern void stop_apic_nmi_watchdog(void *);
1187 extern void disable_timer_nmi_watchdog(void);
1188 extern void enable_timer_nmi_watchdog(void);
1189 -extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason);
1190 +extern int (*nmi_watchdog_tick)(struct pt_regs *regs, unsigned reason);
1191 extern void cpu_nmi_set_wd_enabled(void);
1193 extern atomic_t nmi_active;
1194 diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
1195 index 7e5c6a6..5fa356b 100644
1196 --- a/arch/x86/include/asm/processor.h
1197 +++ b/arch/x86/include/asm/processor.h
1198 @@ -438,6 +438,7 @@ struct thread_struct {
1199 unsigned short ds;
1200 unsigned short fsindex;
1201 unsigned short gsindex;
1202 + unsigned long rip;
1203 #endif
1204 #ifdef CONFIG_X86_32
1205 unsigned long ip;
1206 diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
1207 index e7f4d33..e197763 100644
1208 --- a/arch/x86/include/asm/system.h
1209 +++ b/arch/x86/include/asm/system.h
1210 @@ -127,8 +127,12 @@ do { \
1211 #define switch_to(prev, next, last) \
1212 asm volatile(SAVE_CONTEXT \
1213 "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
1214 + "movq $thread_return,%P[threadrip](%[prev])\n\t" /* save RIP */ \
1215 "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \
1216 - "call __switch_to\n\t" \
1217 + "pushq %P[threadrip](%[next])\n\t" /* restore RIP */ \
1218 + "jmp __switch_to\n\t" \
1219 + ".globl thread_return\n\t" \
1220 + "thread_return:\n\t" \
1221 "movq "__percpu_arg([current_task])",%%rsi\n\t" \
1222 __switch_canary \
1223 "movq %P[thread_info](%%rsi),%%r8\n\t" \
1224 @@ -140,6 +144,7 @@ do { \
1225 __switch_canary_oparam \
1226 : [next] "S" (next), [prev] "D" (prev), \
1227 [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
1228 + [threadrip] "i" (offsetof(struct task_struct, thread.rip)), \
1229 [ti_flags] "i" (offsetof(struct thread_info, flags)), \
1230 [_tif_fork] "i" (_TIF_FORK), \
1231 [thread_info] "i" (offsetof(struct task_struct, stack)), \
1232 @@ -307,8 +312,13 @@ static inline void native_wbinvd(void)
1233 #else
1234 #define read_cr0() (native_read_cr0())
1235 #define write_cr0(x) (native_write_cr0(x))
1236 +#ifdef CONFIG_IPIPE
1237 +#define read_cr2() __raw_get_cpu_var(__ipipe_cr2)
1238 +#define write_cr2(x) __raw_get_cpu_var(__ipipe_cr2) = (x)
1239 +#else /* !CONFIG_IPIPE */
1240 #define read_cr2() (native_read_cr2())
1241 #define write_cr2(x) (native_write_cr2(x))
1242 +#endif /* !CONFIG_IPIPE */
1243 #define read_cr3() (native_read_cr3())
1244 #define write_cr3(x) (native_write_cr3(x))
1245 #define read_cr4() (native_read_cr4())
1246 diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
1247 index f66cda5..021a149 100644
1248 --- a/arch/x86/include/asm/traps.h
1249 +++ b/arch/x86/include/asm/traps.h
1250 @@ -82,8 +82,8 @@ extern int panic_on_unrecovered_nmi;
1251 void math_error(struct pt_regs *, int, int);
1252 void math_emulate(struct math_emu_info *);
1253 #ifndef CONFIG_X86_32
1254 -asmlinkage void smp_thermal_interrupt(void);
1255 asmlinkage void mce_threshold_interrupt(void);
1256 #endif
1257 +asmlinkage void smp_thermal_interrupt(void);
1259 #endif /* _ASM_X86_TRAPS_H */
1260 diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
1261 index 1ca132f..f8b9a98 100644
1262 --- a/arch/x86/include/asm/tsc.h
1263 +++ b/arch/x86/include/asm/tsc.h
1264 @@ -14,6 +14,7 @@
1266 typedef unsigned long long cycles_t;
1268 +extern struct clocksource clocksource_tsc;
1269 extern unsigned int cpu_khz;
1270 extern unsigned int tsc_khz;
1272 diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
1273 index e77b220..b6f755d 100644
1274 --- a/arch/x86/kernel/Makefile
1275 +++ b/arch/x86/kernel/Makefile
1276 @@ -83,6 +83,7 @@ obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
1277 obj-$(CONFIG_KGDB) += kgdb.o
1278 obj-$(CONFIG_VM86) += vm86_32.o
1279 obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
1280 +obj-$(CONFIG_IPIPE) += ipipe.o
1282 obj-$(CONFIG_HPET_TIMER) += hpet.o
1283 obj-$(CONFIG_APB_TIMER) += apb_timer.o
1284 diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
1285 index c07e513..94fdbf9 100644
1286 --- a/arch/x86/kernel/apic/apic.c
1287 +++ b/arch/x86/kernel/apic/apic.c
1288 @@ -426,7 +426,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
1289 if (evt->features & CLOCK_EVT_FEAT_DUMMY)
1290 return;
1292 - local_irq_save(flags);
1293 + local_irq_save_hw(flags);
1295 switch (mode) {
1296 case CLOCK_EVT_MODE_PERIODIC:
1297 @@ -446,7 +446,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
1298 break;
1301 - local_irq_restore(flags);
1302 + local_irq_restore_hw(flags);
1306 @@ -962,7 +962,7 @@ void lapic_shutdown(void)
1307 if (!cpu_has_apic && !apic_from_smp_config())
1308 return;
1310 - local_irq_save(flags);
1311 + local_irq_save_hw(flags);
1313 #ifdef CONFIG_X86_32
1314 if (!enabled_via_apicbase)
1315 @@ -972,7 +972,7 @@ void lapic_shutdown(void)
1316 disable_local_APIC();
1319 - local_irq_restore(flags);
1320 + local_irq_restore_hw(flags);
1324 @@ -1146,6 +1146,10 @@ static void __cpuinit lapic_setup_esr(void)
1325 oldvalue, value);
1328 +int __ipipe_check_lapic(void)
1330 + return !(lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY);
1334 * setup_local_APIC - setup the local APIC
1335 @@ -1219,7 +1223,7 @@ void __cpuinit setup_local_APIC(void)
1336 value = apic_read(APIC_ISR + i*0x10);
1337 for (j = 31; j >= 0; j--) {
1338 if (value & (1<<j)) {
1339 - ack_APIC_irq();
1340 + __ack_APIC_irq();
1341 acked++;
1344 @@ -1738,7 +1742,7 @@ void smp_spurious_interrupt(struct pt_regs *regs)
1346 v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
1347 if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
1348 - ack_APIC_irq();
1349 + __ack_APIC_irq();
1351 inc_irq_stat(irq_spurious_count);
1353 @@ -2007,13 +2011,13 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
1354 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
1355 #endif
1357 - local_irq_save(flags);
1358 + local_irq_save_hw(flags);
1359 disable_local_APIC();
1361 if (intr_remapping_enabled)
1362 disable_intr_remapping();
1364 - local_irq_restore(flags);
1365 + local_irq_restore_hw(flags);
1366 return 0;
1369 @@ -2028,7 +2032,7 @@ static int lapic_resume(struct sys_device *dev)
1370 if (!apic_pm_state.active)
1371 return 0;
1373 - local_irq_save(flags);
1374 + local_irq_save_hw(flags);
1375 if (intr_remapping_enabled) {
1376 ioapic_entries = alloc_ioapic_entries();
1377 if (!ioapic_entries) {
1378 @@ -2094,7 +2098,7 @@ static int lapic_resume(struct sys_device *dev)
1379 free_ioapic_entries(ioapic_entries);
1381 restore:
1382 - local_irq_restore(flags);
1383 + local_irq_restore_hw(flags);
1385 return ret;
1387 diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
1388 index 09d3b17..0820b62 100644
1389 --- a/arch/x86/kernel/apic/apic_flat_64.c
1390 +++ b/arch/x86/kernel/apic/apic_flat_64.c
1391 @@ -72,9 +72,9 @@ static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
1393 unsigned long flags;
1395 - local_irq_save(flags);
1396 + local_irq_save_hw(flags);
1397 __default_send_IPI_dest_field(mask, vector, apic->dest_logical);
1398 - local_irq_restore(flags);
1399 + local_irq_restore_hw(flags);
1402 static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)
1403 diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
1404 index f1efeba..fcd56a7 100644
1405 --- a/arch/x86/kernel/apic/io_apic.c
1406 +++ b/arch/x86/kernel/apic/io_apic.c
1407 @@ -74,8 +74,8 @@
1409 int sis_apic_bug = -1;
1411 -static DEFINE_RAW_SPINLOCK(ioapic_lock);
1412 -static DEFINE_RAW_SPINLOCK(vector_lock);
1413 +static IPIPE_DEFINE_RAW_SPINLOCK(ioapic_lock);
1414 +static IPIPE_DEFINE_RAW_SPINLOCK(vector_lock);
1417 * # of IRQ routing registers
1418 @@ -391,6 +391,8 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned
1419 writel(value, &io_apic->data);
1422 +#if !defined(CONFIG_IPIPE) || defined(CONFIG_SMP)
1424 static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
1426 struct irq_pin_list *entry;
1427 @@ -414,6 +416,8 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
1428 return false;
1431 +#endif /* !CONFIG_IPIPE || CONFIG_SMP */
1433 union entry_union {
1434 struct { u32 w1, w2; };
1435 struct IO_APIC_route_entry entry;
1436 @@ -595,6 +599,7 @@ static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
1437 BUG_ON(!cfg);
1439 raw_spin_lock_irqsave(&ioapic_lock, flags);
1440 + ipipe_irq_lock(desc->irq);
1441 __mask_IO_APIC_irq(cfg);
1442 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
1444 @@ -606,6 +611,7 @@ static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
1446 raw_spin_lock_irqsave(&ioapic_lock, flags);
1447 __unmask_IO_APIC_irq(cfg);
1448 + ipipe_irq_unlock(desc->irq);
1449 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
1452 @@ -2248,6 +2254,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
1454 cfg = irq_cfg(irq);
1455 __unmask_IO_APIC_irq(cfg);
1456 + ipipe_irq_unlock(irq);
1457 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
1459 return was_pending;
1460 @@ -2542,11 +2549,13 @@ static inline void irq_complete_move(struct irq_desc **descp) {}
1462 static void ack_apic_edge(unsigned int irq)
1464 +#ifndef CONFIG_IPIPE
1465 struct irq_desc *desc = irq_to_desc(irq);
1467 irq_complete_move(&desc);
1468 move_native_irq(irq);
1469 - ack_APIC_irq();
1470 +#endif /* CONFIG_IPIPE */
1471 + __ack_APIC_irq();
1474 atomic_t irq_mis_count;
1475 @@ -2590,6 +2599,8 @@ static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
1479 +#if !defined(CONFIG_IPIPE) || defined(CONFIG_SMP)
1481 static void eoi_ioapic_irq(struct irq_desc *desc)
1483 struct irq_cfg *cfg;
1484 @@ -2604,12 +2615,44 @@ static void eoi_ioapic_irq(struct irq_desc *desc)
1485 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
1488 +#endif /* !IPIPE || SMP */
1490 +#if defined(CONFIG_IPIPE) && defined(CONFIG_SMP)
1492 +static void move_apic_irq(unsigned int irq)
1494 + struct irq_desc *desc = irq_to_desc(irq);
1495 + struct irq_cfg *cfg;
1497 + if (desc->handle_irq == &handle_edge_irq) {
1498 + raw_spin_lock(&desc->lock);
1499 + irq_complete_move(&desc);
1500 + move_native_irq(irq);
1501 + raw_spin_unlock(&desc->lock);
1502 + } else if (desc->handle_irq == &handle_fasteoi_irq) {
1503 + raw_spin_lock(&desc->lock);
1504 + irq_complete_move(&desc);
1505 + if (irq_remapped(irq))
1506 + eoi_ioapic_irq(desc);
1507 + if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
1508 + cfg = desc->chip_data;
1509 + if (!io_apic_level_ack_pending(cfg))
1510 + move_masked_irq(irq);
1511 + unmask_IO_APIC_irq_desc(desc);
1513 + raw_spin_unlock(&desc->lock);
1514 + } else
1515 + WARN_ON_ONCE(1);
1517 +#endif /* CONFIG_IPIPE && CONFIG_SMP */
1519 static void ack_apic_level(unsigned int irq)
1521 struct irq_desc *desc = irq_to_desc(irq);
1522 unsigned long v;
1523 int i;
1524 struct irq_cfg *cfg;
1525 +#ifndef CONFIG_IPIPE
1526 int do_unmask_irq = 0;
1528 irq_complete_move(&desc);
1529 @@ -2709,19 +2752,40 @@ static void ack_apic_level(unsigned int irq)
1530 move_masked_irq(irq);
1531 unmask_IO_APIC_irq_desc(desc);
1533 +#else /* CONFIG_IPIPE */
1534 + /*
1535 + * Prevent low priority IRQs grabbed by high priority domains
1536 + * from being delayed, waiting for a high priority interrupt
1537 + * handler running in a low priority domain to complete.
1538 + * This code assumes hw interrupts off.
1539 + */
1540 + cfg = desc->chip_data;
1541 + i = cfg->vector;
1542 + v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
1543 + raw_spin_lock(&ioapic_lock);
1544 + if (unlikely(!(v & (1 << (i & 0x1f))))) {
1545 + /* IO-APIC erratum: see comment above. */
1546 + atomic_inc(&irq_mis_count);
1547 + __eoi_ioapic_irq(irq, cfg);
1548 + __mask_IO_APIC_irq(cfg);
1549 + } else
1550 + __mask_IO_APIC_irq(cfg);
1551 + raw_spin_unlock(&ioapic_lock);
1552 + __ack_APIC_irq();
1553 +#endif /* CONFIG_IPIPE */
1556 #ifdef CONFIG_INTR_REMAP
1557 static void ir_ack_apic_edge(unsigned int irq)
1559 - ack_APIC_irq();
1560 + __ack_APIC_irq();
1563 static void ir_ack_apic_level(unsigned int irq)
1565 struct irq_desc *desc = irq_to_desc(irq);
1567 - ack_APIC_irq();
1568 + __ack_APIC_irq();
1569 eoi_ioapic_irq(desc);
1571 #endif /* CONFIG_INTR_REMAP */
1572 @@ -2735,6 +2799,9 @@ static struct irq_chip ioapic_chip __read_mostly = {
1573 .eoi = ack_apic_level,
1574 #ifdef CONFIG_SMP
1575 .set_affinity = set_ioapic_affinity_irq,
1576 +#ifdef CONFIG_IPIPE
1577 + .move = move_apic_irq,
1578 +#endif
1579 #endif
1580 .retrigger = ioapic_retrigger_irq,
1582 @@ -2749,6 +2816,9 @@ static struct irq_chip ir_ioapic_chip __read_mostly = {
1583 .eoi = ir_ack_apic_level,
1584 #ifdef CONFIG_SMP
1585 .set_affinity = set_ir_ioapic_affinity_irq,
1586 +#ifdef CONFIG_IPIPE
1587 + .move = move_apic_irq,
1588 +#endif
1589 #endif
1590 #endif
1591 .retrigger = ioapic_retrigger_irq,
1592 @@ -2794,23 +2864,29 @@ static inline void init_IO_APIC_traps(void)
1594 static void mask_lapic_irq(unsigned int irq)
1596 - unsigned long v;
1597 + unsigned long v, flags;
1599 + local_irq_save_hw_cond(flags);
1600 + ipipe_irq_lock(irq);
1601 v = apic_read(APIC_LVT0);
1602 apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
1603 + local_irq_restore_hw_cond(flags);
1606 static void unmask_lapic_irq(unsigned int irq)
1608 - unsigned long v;
1609 + unsigned long v, flags;
1611 + local_irq_save_hw_cond(flags);
1612 v = apic_read(APIC_LVT0);
1613 apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
1614 + ipipe_irq_unlock(irq);
1615 + local_irq_restore_hw_cond(flags);
1618 static void ack_lapic_irq(unsigned int irq)
1620 - ack_APIC_irq();
1621 + __ack_APIC_irq();
1624 static struct irq_chip lapic_chip __read_mostly = {
1625 @@ -2818,6 +2894,9 @@ static struct irq_chip lapic_chip __read_mostly = {
1626 .mask = mask_lapic_irq,
1627 .unmask = unmask_lapic_irq,
1628 .ack = ack_lapic_irq,
1629 +#if defined(CONFIG_IPIPE) && defined(CONFIG_SMP)
1630 + .move = move_apic_irq,
1631 +#endif
1634 static void lapic_register_intr(int irq, struct irq_desc *desc)
1635 @@ -3065,6 +3144,10 @@ static inline void __init check_timer(void)
1636 "...trying to set up timer as Virtual Wire IRQ...\n");
1638 lapic_register_intr(0, desc);
1639 +#if defined(CONFIG_IPIPE) && defined(CONFIG_X86_64)
1640 + irq_to_desc(0)->ipipe_ack = __ipipe_ack_edge_irq;
1641 + irq_to_desc(0)->ipipe_end = __ipipe_end_edge_irq;
1642 +#endif
1643 apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
1644 legacy_pic->chip->unmask(0);
1646 @@ -3462,6 +3545,9 @@ static struct irq_chip msi_chip = {
1647 .ack = ack_apic_edge,
1648 #ifdef CONFIG_SMP
1649 .set_affinity = set_msi_irq_affinity,
1650 +#ifdef CONFIG_IPIPE
1651 + .move = move_apic_irq,
1652 +#endif
1653 #endif
1654 .retrigger = ioapic_retrigger_irq,
1656 @@ -3474,6 +3560,9 @@ static struct irq_chip msi_ir_chip = {
1657 .ack = ir_ack_apic_edge,
1658 #ifdef CONFIG_SMP
1659 .set_affinity = ir_set_msi_irq_affinity,
1660 +#ifdef CONFIG_IPIPE
1661 + .move = move_apic_irq,
1662 +#endif
1663 #endif
1664 #endif
1665 .retrigger = ioapic_retrigger_irq,
1666 @@ -3788,6 +3877,9 @@ static struct irq_chip ht_irq_chip = {
1667 .ack = ack_apic_edge,
1668 #ifdef CONFIG_SMP
1669 .set_affinity = set_ht_irq_affinity,
1670 +#ifdef CONFIG_IPIPE
1671 + .move = move_apic_irq,
1672 +#endif
1673 #endif
1674 .retrigger = ioapic_retrigger_irq,
1676 @@ -4088,6 +4180,14 @@ int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity)
1677 return 0;
1680 +#ifdef CONFIG_IPIPE
1681 +unsigned __ipipe_get_ioapic_irq_vector(int irq)
1683 + return irq >= IPIPE_FIRST_APIC_IRQ && irq < IPIPE_NR_XIRQS ?
1684 + ipipe_apic_irq_vector(irq) : irq_cfg(irq)->vector;
1686 +#endif /* CONFIG_IPIPE */
1689 * This function currently is only a helper for the i386 smp boot process where
1690 * we need to reprogram the ioredtbls to cater for the cpus which have come online
1691 diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
1692 index 08385e0..f5ad117 100644
1693 --- a/arch/x86/kernel/apic/ipi.c
1694 +++ b/arch/x86/kernel/apic/ipi.c
1695 @@ -29,12 +29,12 @@ void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector)
1696 * to an arbitrary mask, so I do a unicast to each CPU instead.
1697 * - mbligh
1699 - local_irq_save(flags);
1700 + local_irq_save_hw(flags);
1701 for_each_cpu(query_cpu, mask) {
1702 __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid,
1703 query_cpu), vector, APIC_DEST_PHYSICAL);
1705 - local_irq_restore(flags);
1706 + local_irq_restore_hw(flags);
1709 void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
1710 @@ -46,14 +46,14 @@ void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
1712 /* See Hack comment above */
1714 - local_irq_save(flags);
1715 + local_irq_save_hw(flags);
1716 for_each_cpu(query_cpu, mask) {
1717 if (query_cpu == this_cpu)
1718 continue;
1719 __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid,
1720 query_cpu), vector, APIC_DEST_PHYSICAL);
1722 - local_irq_restore(flags);
1723 + local_irq_restore_hw(flags);
1726 void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
1727 @@ -68,12 +68,12 @@ void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
1728 * should be modified to do 1 message per cluster ID - mbligh
1731 - local_irq_save(flags);
1732 + local_irq_save_hw(flags);
1733 for_each_cpu(query_cpu, mask)
1734 __default_send_IPI_dest_field(
1735 apic->cpu_to_logical_apicid(query_cpu), vector,
1736 apic->dest_logical);
1737 - local_irq_restore(flags);
1738 + local_irq_restore_hw(flags);
1741 void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
1742 @@ -85,7 +85,7 @@ void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
1744 /* See Hack comment above */
1746 - local_irq_save(flags);
1747 + local_irq_save_hw(flags);
1748 for_each_cpu(query_cpu, mask) {
1749 if (query_cpu == this_cpu)
1750 continue;
1751 @@ -93,7 +93,7 @@ void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
1752 apic->cpu_to_logical_apicid(query_cpu), vector,
1753 apic->dest_logical);
1755 - local_irq_restore(flags);
1756 + local_irq_restore_hw(flags);
1759 #ifdef CONFIG_X86_32
1760 @@ -109,10 +109,10 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector)
1761 if (WARN_ONCE(!mask, "empty IPI mask"))
1762 return;
1764 - local_irq_save(flags);
1765 + local_irq_save_hw(flags);
1766 WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
1767 __default_send_IPI_dest_field(mask, vector, apic->dest_logical);
1768 - local_irq_restore(flags);
1769 + local_irq_restore_hw(flags);
1772 void default_send_IPI_allbutself(int vector)
1773 diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
1774 index 1edaf15..cc1f90f 100644
1775 --- a/arch/x86/kernel/apic/nmi.c
1776 +++ b/arch/x86/kernel/apic/nmi.c
1777 @@ -61,6 +61,10 @@ static unsigned int nmi_hz = HZ;
1778 static DEFINE_PER_CPU(short, wd_enabled);
1779 static int endflag __initdata;
1781 +static int default_nmi_watchdog_tick(struct pt_regs * regs, unsigned reason);
1782 +int (*nmi_watchdog_tick) (struct pt_regs * regs, unsigned reason) = &default_nmi_watchdog_tick;
1783 +EXPORT_SYMBOL(nmi_watchdog_tick);
1785 static inline unsigned int get_nmi_count(int cpu)
1787 return per_cpu(irq_stat, cpu).__nmi_count;
1788 @@ -389,7 +393,7 @@ void touch_nmi_watchdog(void)
1789 EXPORT_SYMBOL(touch_nmi_watchdog);
1791 notrace __kprobes int
1792 -nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
1793 +default_nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
1796 * Since current_thread_info()-> is always on the stack, and we
1797 diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
1798 index cf69c59..23baa6d 100644
1799 --- a/arch/x86/kernel/apic/x2apic_cluster.c
1800 +++ b/arch/x86/kernel/apic/x2apic_cluster.c
1801 @@ -61,13 +61,13 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
1803 x2apic_wrmsr_fence();
1805 - local_irq_save(flags);
1806 + local_irq_save_hw(flags);
1807 for_each_cpu(query_cpu, mask) {
1808 __x2apic_send_IPI_dest(
1809 per_cpu(x86_cpu_to_logical_apicid, query_cpu),
1810 vector, apic->dest_logical);
1812 - local_irq_restore(flags);
1813 + local_irq_restore_hw(flags);
1816 static void
1817 @@ -79,7 +79,7 @@ static void
1819 x2apic_wrmsr_fence();
1821 - local_irq_save(flags);
1822 + local_irq_save_hw(flags);
1823 for_each_cpu(query_cpu, mask) {
1824 if (query_cpu == this_cpu)
1825 continue;
1826 @@ -87,7 +87,7 @@ static void
1827 per_cpu(x86_cpu_to_logical_apicid, query_cpu),
1828 vector, apic->dest_logical);
1830 - local_irq_restore(flags);
1831 + local_irq_restore_hw(flags);
1834 static void x2apic_send_IPI_allbutself(int vector)
1835 @@ -98,7 +98,7 @@ static void x2apic_send_IPI_allbutself(int vector)
1837 x2apic_wrmsr_fence();
1839 - local_irq_save(flags);
1840 + local_irq_save_hw(flags);
1841 for_each_online_cpu(query_cpu) {
1842 if (query_cpu == this_cpu)
1843 continue;
1844 @@ -106,7 +106,7 @@ static void x2apic_send_IPI_allbutself(int vector)
1845 per_cpu(x86_cpu_to_logical_apicid, query_cpu),
1846 vector, apic->dest_logical);
1848 - local_irq_restore(flags);
1849 + local_irq_restore_hw(flags);
1852 static void x2apic_send_IPI_all(int vector)
1853 diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
1854 index 8972f38..19bccee 100644
1855 --- a/arch/x86/kernel/apic/x2apic_phys.c
1856 +++ b/arch/x86/kernel/apic/x2apic_phys.c
1857 @@ -62,12 +62,12 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
1859 x2apic_wrmsr_fence();
1861 - local_irq_save(flags);
1862 + local_irq_save_hw(flags);
1863 for_each_cpu(query_cpu, mask) {
1864 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
1865 vector, APIC_DEST_PHYSICAL);
1867 - local_irq_restore(flags);
1868 + local_irq_restore_hw(flags);
1871 static void
1872 @@ -79,14 +79,14 @@ static void
1874 x2apic_wrmsr_fence();
1876 - local_irq_save(flags);
1877 + local_irq_save_hw(flags);
1878 for_each_cpu(query_cpu, mask) {
1879 if (query_cpu != this_cpu)
1880 __x2apic_send_IPI_dest(
1881 per_cpu(x86_cpu_to_apicid, query_cpu),
1882 vector, APIC_DEST_PHYSICAL);
1884 - local_irq_restore(flags);
1885 + local_irq_restore_hw(flags);
1888 static void x2apic_send_IPI_allbutself(int vector)
1889 @@ -97,14 +97,14 @@ static void x2apic_send_IPI_allbutself(int vector)
1891 x2apic_wrmsr_fence();
1893 - local_irq_save(flags);
1894 + local_irq_save_hw(flags);
1895 for_each_online_cpu(query_cpu) {
1896 if (query_cpu == this_cpu)
1897 continue;
1898 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
1899 vector, APIC_DEST_PHYSICAL);
1901 - local_irq_restore(flags);
1902 + local_irq_restore_hw(flags);
1905 static void x2apic_send_IPI_all(int vector)
1906 diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
1907 index 68a3343..f7e0927 100644
1908 --- a/arch/x86/kernel/cpu/mtrr/cyrix.c
1909 +++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
1910 @@ -18,7 +18,7 @@ cyrix_get_arr(unsigned int reg, unsigned long *base,
1912 arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */
1914 - local_irq_save(flags);
1915 + local_irq_save_hw(flags);
1917 ccr3 = getCx86(CX86_CCR3);
1918 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
1919 @@ -28,7 +28,7 @@ cyrix_get_arr(unsigned int reg, unsigned long *base,
1920 rcr = getCx86(CX86_RCR_BASE + reg);
1921 setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
1923 - local_irq_restore(flags);
1924 + local_irq_restore_hw(flags);
1926 shift = ((unsigned char *) base)[1] & 0x0f;
1927 *base >>= PAGE_SHIFT;
1928 @@ -178,6 +178,7 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base,
1929 unsigned long size, mtrr_type type)
1931 unsigned char arr, arr_type, arr_size;
1932 + unsigned long flags;
1934 arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */
1936 @@ -221,6 +222,8 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base,
1940 + local_irq_save_hw(flags);
1942 prepare_set();
1944 base <<= PAGE_SHIFT;
1945 @@ -230,6 +233,8 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base,
1946 setCx86(CX86_RCR_BASE + reg, arr_type);
1948 post_set();
1950 + local_irq_restore_hw(flags);
1953 typedef struct {
1954 @@ -247,8 +252,10 @@ static unsigned char ccr_state[7] = { 0, 0, 0, 0, 0, 0, 0 };
1956 static void cyrix_set_all(void)
1958 + unsigned long flags;
1959 int i;
1961 + local_irq_save_hw(flags);
1962 prepare_set();
1964 /* the CCRs are not contiguous */
1965 @@ -263,6 +270,7 @@ static void cyrix_set_all(void)
1968 post_set();
1969 + local_irq_restore_hw(flags);
1972 static const struct mtrr_ops cyrix_mtrr_ops = {
1973 diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
1974 index fd31a44..9de6771 100644
1975 --- a/arch/x86/kernel/cpu/mtrr/generic.c
1976 +++ b/arch/x86/kernel/cpu/mtrr/generic.c
1977 @@ -634,7 +634,7 @@ static void generic_set_all(void)
1978 unsigned long mask, count;
1979 unsigned long flags;
1981 - local_irq_save(flags);
1982 + local_irq_save_hw(flags);
1983 prepare_set();
1985 /* Actually set the state */
1986 @@ -644,7 +644,7 @@ static void generic_set_all(void)
1987 pat_init();
1989 post_set();
1990 - local_irq_restore(flags);
1991 + local_irq_restore_hw(flags);
1993 /* Use the atomic bitops to update the global mask */
1994 for (count = 0; count < sizeof mask * 8; ++count) {
1995 @@ -668,12 +668,12 @@ static void generic_set_all(void)
1996 static void generic_set_mtrr(unsigned int reg, unsigned long base,
1997 unsigned long size, mtrr_type type)
1999 - unsigned long flags;
2000 + unsigned long flags, _flags;
2001 struct mtrr_var_range *vr;
2003 vr = &mtrr_state.var_ranges[reg];
2005 - local_irq_save(flags);
2006 + local_irq_save_full(flags, _flags);
2007 prepare_set();
2009 if (size == 0) {
2010 @@ -694,7 +694,7 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base,
2013 post_set();
2014 - local_irq_restore(flags);
2015 + local_irq_restore_full(flags, _flags);
2018 int generic_validate_add_page(unsigned long base, unsigned long size,
2019 diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
2020 index c89a386..fa94518 100644
2021 --- a/arch/x86/kernel/dumpstack.c
2022 +++ b/arch/x86/kernel/dumpstack.c
2023 @@ -350,6 +350,7 @@ die_nmi(char *str, struct pt_regs *regs, int do_panic)
2024 local_irq_enable();
2025 do_exit(SIGBUS);
2027 +EXPORT_SYMBOL_GPL(die_nmi);
2029 static int __init oops_setup(char *s)
2031 diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
2032 index 11540a1..7798c2a 100644
2033 --- a/arch/x86/kernel/dumpstack_32.c
2034 +++ b/arch/x86/kernel/dumpstack_32.c
2035 @@ -103,6 +103,9 @@ void show_registers(struct pt_regs *regs)
2036 printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n",
2037 TASK_COMM_LEN, current->comm, task_pid_nr(current),
2038 current_thread_info(), current, task_thread_info(current));
2039 +#ifdef CONFIG_IPIPE
2040 + printk(KERN_EMERG "I-pipe domain %s\n", ipipe_current_domain->name);
2041 +#endif /* CONFIG_IPIPE */
2043 * When in-kernel, we also print out the stack and code at the
2044 * time of the fault..
2045 diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
2046 index 272c9f1..99333e8 100644
2047 --- a/arch/x86/kernel/dumpstack_64.c
2048 +++ b/arch/x86/kernel/dumpstack_64.c
2049 @@ -294,6 +294,11 @@ void show_registers(struct pt_regs *regs)
2050 printk("CPU %d ", cpu);
2051 print_modules();
2052 __show_regs(regs, 1);
2053 +#ifdef CONFIG_IPIPE
2054 + if (ipipe_current_domain != ipipe_root_domain)
2055 + printk("I-pipe domain %s\n", ipipe_current_domain->name);
2056 + else
2057 +#endif /* CONFIG_IPIPE */
2058 printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
2059 cur->comm, cur->pid, task_thread_info(cur), cur);
2061 diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
2062 index cd49141..4fc229b 100644
2063 --- a/arch/x86/kernel/entry_32.S
2064 +++ b/arch/x86/kernel/entry_32.S
2065 @@ -44,6 +44,7 @@
2066 #include <linux/linkage.h>
2067 #include <asm/thread_info.h>
2068 #include <asm/irqflags.h>
2069 +#include <asm/ipipe_base.h>
2070 #include <asm/errno.h>
2071 #include <asm/segment.h>
2072 #include <asm/smp.h>
2073 @@ -80,8 +81,61 @@
2075 #define nr_syscalls ((syscall_table_size)/4)
2077 +#ifdef CONFIG_IPIPE
2078 +#define EMULATE_ROOT_IRET(bypass) \
2079 + call __ipipe_unstall_iret_root ; \
2080 + TRACE_IRQS_ON ; \
2081 + bypass: \
2082 + movl PT_EAX(%esp),%eax
2083 +#define TEST_PREEMPTIBLE(regs) call __ipipe_kpreempt_root ; testl %eax,%eax
2084 +#define CATCH_ROOT_SYSCALL(bypass1,bypass2) \
2085 + movl %esp,%eax ; \
2086 + call __ipipe_syscall_root ; \
2087 + testl %eax,%eax ; \
2088 + js bypass1 ; \
2089 + jne bypass2 ; \
2090 + movl PT_ORIG_EAX(%esp),%eax
2091 +#define PUSH_XCODE(v) pushl $ ex_ ## v
2092 +#define PUSH_XVEC(v) pushl $ ex_ ## v
2093 +#define HANDLE_EXCEPTION(code) movl %code,%ecx ; \
2094 + call __ipipe_handle_exception ; \
2095 + testl %eax,%eax ; \
2096 + jnz restore_ret
2097 +#define DIVERT_EXCEPTION(code) movl $(__USER_DS), %ecx ; \
2098 + movl %ecx, %ds ; \
2099 + movl %ecx, %es ; \
2100 + movl %esp, %eax ; \
2101 + movl $ex_ ## code,%edx ; \
2102 + call __ipipe_divert_exception ; \
2103 + testl %eax,%eax ; \
2104 + jnz restore_ret
2106 +#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
2107 +# define IPIPE_TRACE_IRQ_ENTER \
2108 + lea PT_EIP-4(%esp), %ebp; \
2109 + movl PT_ORIG_EAX(%esp), %eax; \
2110 + call ipipe_trace_begin
2111 +# define IPIPE_TRACE_IRQ_EXIT \
2112 + pushl %eax; \
2113 + movl PT_ORIG_EAX+4(%esp), %eax; \
2114 + call ipipe_trace_end; \
2115 + popl %eax
2116 +#else /* !CONFIG_IPIPE_TRACE_IRQSOFF */
2117 +#define IPIPE_TRACE_IRQ_ENTER
2118 +#define IPIPE_TRACE_IRQ_EXIT
2119 +#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */
2120 +#else /* !CONFIG_IPIPE */
2121 +#define EMULATE_ROOT_IRET(bypass)
2122 +#define TEST_PREEMPTIBLE(regs) testl $X86_EFLAGS_IF,PT_EFLAGS(regs)
2123 +#define CATCH_ROOT_SYSCALL(bypass1,bypass2)
2124 +#define PUSH_XCODE(v) pushl $v
2125 +#define PUSH_XVEC(v) pushl v
2126 +#define HANDLE_EXCEPTION(code) call *%code
2127 +#define DIVERT_EXCEPTION(code)
2128 +#endif /* CONFIG_IPIPE */
2130 #ifdef CONFIG_PREEMPT
2131 -#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
2132 +#define preempt_stop(clobbers) DISABLE_INTERRUPTS_HW(clobbers); TRACE_IRQS_OFF
2133 #else
2134 #define preempt_stop(clobbers)
2135 #define resume_kernel restore_all
2136 @@ -319,6 +373,7 @@
2137 .endm
2139 ENTRY(ret_from_fork)
2140 + ENABLE_INTERRUPTS_HW_COND
2141 CFI_STARTPROC
2142 pushl %eax
2143 CFI_ADJUST_CFA_OFFSET 4
2144 @@ -350,7 +405,7 @@ END(ret_from_fork)
2145 RING0_PTREGS_FRAME
2146 ret_from_exception:
2147 preempt_stop(CLBR_ANY)
2148 -ret_from_intr:
2149 +ENTRY(ret_from_intr)
2150 GET_THREAD_INFO(%ebp)
2151 check_userspace:
2152 movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
2153 @@ -374,14 +429,13 @@ END(ret_from_exception)
2155 #ifdef CONFIG_PREEMPT
2156 ENTRY(resume_kernel)
2157 - DISABLE_INTERRUPTS(CLBR_ANY)
2158 cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
2159 jnz restore_all
2160 need_resched:
2161 movl TI_flags(%ebp), %ecx # need_resched set ?
2162 testb $_TIF_NEED_RESCHED, %cl
2163 jz restore_all
2164 - testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ?
2165 + TEST_PREEMPTIBLE(%esp) # interrupts off (exception path) ?
2166 jz restore_all
2167 call preempt_schedule_irq
2168 jmp need_resched
2169 @@ -433,7 +487,7 @@ sysenter_past_esp:
2170 pushl %eax
2171 CFI_ADJUST_CFA_OFFSET 4
2172 SAVE_ALL
2173 - ENABLE_INTERRUPTS(CLBR_NONE)
2174 + ENABLE_INTERRUPTS_HW(CLBR_NONE)
2177 * Load the potential sixth argument from user stack.
2178 @@ -449,6 +503,7 @@ sysenter_past_esp:
2179 .previous
2181 GET_THREAD_INFO(%ebp)
2182 + CATCH_ROOT_SYSCALL(sysenter_tail,sysenter_out)
2184 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
2185 jnz sysenter_audit
2186 @@ -457,6 +512,7 @@ sysenter_do_call:
2187 jae syscall_badsys
2188 call *sys_call_table(,%eax,4)
2189 movl %eax,PT_EAX(%esp)
2190 +sysenter_tail:
2191 LOCKDEP_SYS_EXIT
2192 DISABLE_INTERRUPTS(CLBR_ANY)
2193 TRACE_IRQS_OFF
2194 @@ -465,10 +521,13 @@ sysenter_do_call:
2195 jne sysexit_audit
2196 sysenter_exit:
2197 /* if something modifies registers it must also disable sysexit */
2198 + EMULATE_ROOT_IRET(sysenter_out)
2199 movl PT_EIP(%esp), %edx
2200 movl PT_OLDESP(%esp), %ecx
2201 xorl %ebp,%ebp
2202 - TRACE_IRQS_ON
2203 +#ifndef CONFIG_IPIPE
2204 + TRACE_IRQS_ON
2205 +#endif
2206 1: mov PT_FS(%esp), %fs
2207 PTGS_TO_GS
2208 ENABLE_INTERRUPTS_SYSEXIT
2209 @@ -533,6 +592,7 @@ ENTRY(system_call)
2210 CFI_ADJUST_CFA_OFFSET 4
2211 SAVE_ALL
2212 GET_THREAD_INFO(%ebp)
2213 + CATCH_ROOT_SYSCALL(syscall_exit,restore_ret)
2214 # system call tracing in operation / emulation
2215 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
2216 jnz syscall_trace_entry
2217 @@ -565,6 +625,10 @@ restore_all_notrace:
2218 CFI_REMEMBER_STATE
2219 je ldt_ss # returning to user-space with LDT SS
2220 restore_nocheck:
2221 +#ifdef CONFIG_IPIPE
2222 + call __ipipe_unstall_iret_root
2223 +#endif /* CONFIG_IPIPE */
2224 +restore_ret:
2225 RESTORE_REGS 4 # skip orig_eax/error_code
2226 CFI_ADJUST_CFA_OFFSET -4
2227 irq_return:
2228 @@ -572,7 +636,7 @@ irq_return:
2229 .section .fixup,"ax"
2230 ENTRY(iret_exc)
2231 pushl $0 # no error code
2232 - pushl $do_iret_error
2233 + PUSH_XCODE(do_iret_error)
2234 jmp error_code
2235 .previous
2236 .section __ex_table,"a"
2237 @@ -626,7 +690,7 @@ ldt_ss:
2238 /* Disable interrupts, but do not irqtrace this section: we
2239 * will soon execute iret and the tracer was already set to
2240 * the irqstate after the iret */
2241 - DISABLE_INTERRUPTS(CLBR_EAX)
2242 + DISABLE_INTERRUPTS_HW(CLBR_EAX)
2243 lss (%esp), %esp /* switch to espfix segment */
2244 CFI_ADJUST_CFA_OFFSET -8
2245 jmp restore_nocheck
2246 @@ -640,6 +704,7 @@ work_pending:
2247 testb $_TIF_NEED_RESCHED, %cl
2248 jz work_notifysig
2249 work_resched:
2250 + ENABLE_INTERRUPTS_HW_COND
2251 call schedule
2252 LOCKDEP_SYS_EXIT
2253 DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
2254 @@ -855,6 +920,49 @@ END(irq_entries_start)
2255 END(interrupt)
2256 .previous
2258 +#ifdef CONFIG_IPIPE
2259 + .p2align CONFIG_X86_L1_CACHE_SHIFT
2260 +common_interrupt:
2261 + addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */
2262 + SAVE_ALL
2263 + IPIPE_TRACE_IRQ_ENTER
2264 + movl %esp, %eax
2265 + call *ipipe_irq_handler
2266 + IPIPE_TRACE_IRQ_EXIT
2267 + testl %eax,%eax
2268 + jnz ret_from_intr
2269 + jmp restore_ret
2270 + CFI_ENDPROC
2272 + .pushsection .kprobes.text, "ax"
2273 +#define BUILD_INTERRUPT3(name, nr, fn) \
2274 +ENTRY(name) \
2275 + RING0_INT_FRAME; \
2276 + pushl $~(nr); \
2277 + CFI_ADJUST_CFA_OFFSET 4; \
2278 + SAVE_ALL; \
2279 + IPIPE_TRACE_IRQ_ENTER; \
2280 + movl %esp, %eax; \
2281 + call *ipipe_irq_handler; \
2282 + IPIPE_TRACE_IRQ_EXIT; \
2283 + testl %eax,%eax; \
2284 + jnz ret_from_intr; \
2285 + jmp restore_ret; \
2286 + CFI_ENDPROC
2288 +#define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name)
2290 +#ifdef CONFIG_X86_LOCAL_APIC
2291 + BUILD_INTERRUPT(ipipe_ipi0,IPIPE_SERVICE_VECTOR0)
2292 + BUILD_INTERRUPT(ipipe_ipi1,IPIPE_SERVICE_VECTOR1)
2293 + BUILD_INTERRUPT(ipipe_ipi2,IPIPE_SERVICE_VECTOR2)
2294 + BUILD_INTERRUPT(ipipe_ipi3,IPIPE_SERVICE_VECTOR3)
2295 +#ifdef CONFIG_SMP
2296 + BUILD_INTERRUPT(ipipe_ipiX,IPIPE_CRITICAL_VECTOR)
2297 +#endif
2298 +#endif
2300 +#else /* !CONFIG_IPIPE */
2302 * the CPU automatically disables interrupts when executing an IRQ vector,
2303 * so IRQ-flags tracing has to follow that:
2304 @@ -889,6 +997,8 @@ ENDPROC(name)
2306 #define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name)
2308 +#endif /* !CONFIG_IPIPE */
2310 /* The include is where all of the SMP etc. interrupts come from */
2311 #include <asm/entry_arch.h>
2313 @@ -896,7 +1006,7 @@ ENTRY(coprocessor_error)
2314 RING0_INT_FRAME
2315 pushl $0
2316 CFI_ADJUST_CFA_OFFSET 4
2317 - pushl $do_coprocessor_error
2318 + PUSH_XCODE(do_coprocessor_error)
2319 CFI_ADJUST_CFA_OFFSET 4
2320 jmp error_code
2321 CFI_ENDPROC
2322 @@ -919,11 +1029,11 @@ ENTRY(simd_coprocessor_error)
2323 .byte 664f-663f
2324 .previous
2325 .section .altinstr_replacement,"ax"
2326 -663: pushl $do_simd_coprocessor_error
2327 +663: PUSH_XCODE(do_simd_coprocessor_error)
2328 664:
2329 .previous
2330 #else
2331 - pushl $do_simd_coprocessor_error
2332 + PUSH_XCODE(do_simd_coprocessor_error)
2333 #endif
2334 CFI_ADJUST_CFA_OFFSET 4
2335 jmp error_code
2336 @@ -934,7 +1044,7 @@ ENTRY(device_not_available)
2337 RING0_INT_FRAME
2338 pushl $-1 # mark this as an int
2339 CFI_ADJUST_CFA_OFFSET 4
2340 - pushl $do_device_not_available
2341 + PUSH_XCODE(do_device_not_available)
2342 CFI_ADJUST_CFA_OFFSET 4
2343 jmp error_code
2344 CFI_ENDPROC
2345 @@ -959,7 +1069,7 @@ ENTRY(overflow)
2346 RING0_INT_FRAME
2347 pushl $0
2348 CFI_ADJUST_CFA_OFFSET 4
2349 - pushl $do_overflow
2350 + PUSH_XCODE(do_overflow)
2351 CFI_ADJUST_CFA_OFFSET 4
2352 jmp error_code
2353 CFI_ENDPROC
2354 @@ -969,7 +1079,7 @@ ENTRY(bounds)
2355 RING0_INT_FRAME
2356 pushl $0
2357 CFI_ADJUST_CFA_OFFSET 4
2358 - pushl $do_bounds
2359 + PUSH_XCODE(do_bounds)
2360 CFI_ADJUST_CFA_OFFSET 4
2361 jmp error_code
2362 CFI_ENDPROC
2363 @@ -979,7 +1089,7 @@ ENTRY(invalid_op)
2364 RING0_INT_FRAME
2365 pushl $0
2366 CFI_ADJUST_CFA_OFFSET 4
2367 - pushl $do_invalid_op
2368 + PUSH_XCODE(do_invalid_op)
2369 CFI_ADJUST_CFA_OFFSET 4
2370 jmp error_code
2371 CFI_ENDPROC
2372 @@ -989,7 +1099,7 @@ ENTRY(coprocessor_segment_overrun)
2373 RING0_INT_FRAME
2374 pushl $0
2375 CFI_ADJUST_CFA_OFFSET 4
2376 - pushl $do_coprocessor_segment_overrun
2377 + PUSH_XCODE(do_coprocessor_segment_overrun)
2378 CFI_ADJUST_CFA_OFFSET 4
2379 jmp error_code
2380 CFI_ENDPROC
2381 @@ -997,7 +1107,7 @@ END(coprocessor_segment_overrun)
2383 ENTRY(invalid_TSS)
2384 RING0_EC_FRAME
2385 - pushl $do_invalid_TSS
2386 + PUSH_XCODE(do_invalid_TSS)
2387 CFI_ADJUST_CFA_OFFSET 4
2388 jmp error_code
2389 CFI_ENDPROC
2390 @@ -1005,7 +1115,7 @@ END(invalid_TSS)
2392 ENTRY(segment_not_present)
2393 RING0_EC_FRAME
2394 - pushl $do_segment_not_present
2395 + PUSH_XCODE(do_segment_not_present)
2396 CFI_ADJUST_CFA_OFFSET 4
2397 jmp error_code
2398 CFI_ENDPROC
2399 @@ -1013,7 +1123,7 @@ END(segment_not_present)
2401 ENTRY(stack_segment)
2402 RING0_EC_FRAME
2403 - pushl $do_stack_segment
2404 + PUSH_XCODE(do_stack_segment)
2405 CFI_ADJUST_CFA_OFFSET 4
2406 jmp error_code
2407 CFI_ENDPROC
2408 @@ -1021,7 +1131,7 @@ END(stack_segment)
2410 ENTRY(alignment_check)
2411 RING0_EC_FRAME
2412 - pushl $do_alignment_check
2413 + PUSH_XCODE(do_alignment_check)
2414 CFI_ADJUST_CFA_OFFSET 4
2415 jmp error_code
2416 CFI_ENDPROC
2417 @@ -1031,7 +1141,7 @@ ENTRY(divide_error)
2418 RING0_INT_FRAME
2419 pushl $0 # no error code
2420 CFI_ADJUST_CFA_OFFSET 4
2421 - pushl $do_divide_error
2422 + PUSH_XCODE(do_divide_error)
2423 CFI_ADJUST_CFA_OFFSET 4
2424 jmp error_code
2425 CFI_ENDPROC
2426 @@ -1042,7 +1152,7 @@ ENTRY(machine_check)
2427 RING0_INT_FRAME
2428 pushl $0
2429 CFI_ADJUST_CFA_OFFSET 4
2430 - pushl machine_check_vector
2431 + PUSH_XVEC(machine_check_vector)
2432 CFI_ADJUST_CFA_OFFSET 4
2433 jmp error_code
2434 CFI_ENDPROC
2435 @@ -1053,7 +1163,7 @@ ENTRY(spurious_interrupt_bug)
2436 RING0_INT_FRAME
2437 pushl $0
2438 CFI_ADJUST_CFA_OFFSET 4
2439 - pushl $do_spurious_interrupt_bug
2440 + PUSH_XCODE(do_spurious_interrupt_bug)
2441 CFI_ADJUST_CFA_OFFSET 4
2442 jmp error_code
2443 CFI_ENDPROC
2444 @@ -1285,7 +1395,7 @@ syscall_table_size=(.-sys_call_table)
2446 ENTRY(page_fault)
2447 RING0_EC_FRAME
2448 - pushl $do_page_fault
2449 + PUSH_XCODE(do_page_fault)
2450 CFI_ADJUST_CFA_OFFSET 4
2451 ALIGN
2452 error_code:
2453 @@ -1333,9 +1443,11 @@ error_code:
2454 movl $(__USER_DS), %ecx
2455 movl %ecx, %ds
2456 movl %ecx, %es
2457 +#ifndef CONFIG_IPIPE
2458 TRACE_IRQS_OFF
2459 +#endif
2460 movl %esp,%eax # pt_regs pointer
2461 - call *%edi
2462 + HANDLE_EXCEPTION(edi)
2463 jmp ret_from_exception
2464 CFI_ENDPROC
2465 END(page_fault)
2466 @@ -1379,6 +1491,7 @@ debug_stack_correct:
2467 CFI_ADJUST_CFA_OFFSET 4
2468 SAVE_ALL
2469 TRACE_IRQS_OFF
2470 + DIVERT_EXCEPTION(do_debug)
2471 xorl %edx,%edx # error code 0
2472 movl %esp,%eax # pt_regs pointer
2473 call do_debug
2474 @@ -1479,6 +1592,7 @@ ENTRY(int3)
2475 CFI_ADJUST_CFA_OFFSET 4
2476 SAVE_ALL
2477 TRACE_IRQS_OFF
2478 + DIVERT_EXCEPTION(do_int3)
2479 xorl %edx,%edx # zero error code
2480 movl %esp,%eax # pt_regs pointer
2481 call do_int3
2482 @@ -1488,7 +1602,7 @@ END(int3)
2484 ENTRY(general_protection)
2485 RING0_EC_FRAME
2486 - pushl $do_general_protection
2487 + PUSH_XCODE(do_general_protection)
2488 CFI_ADJUST_CFA_OFFSET 4
2489 jmp error_code
2490 CFI_ENDPROC
2491 diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
2492 index 4db7c4d..2c7e86d 100644
2493 --- a/arch/x86/kernel/entry_64.S
2494 +++ b/arch/x86/kernel/entry_64.S
2495 @@ -48,6 +48,7 @@
2496 #include <asm/unistd.h>
2497 #include <asm/thread_info.h>
2498 #include <asm/hw_irq.h>
2499 +#include <asm/ipipe_base.h>
2500 #include <asm/page_types.h>
2501 #include <asm/irqflags.h>
2502 #include <asm/paravirt.h>
2503 @@ -61,6 +62,13 @@
2504 #define __AUDIT_ARCH_LE 0x40000000
2506 .code64
2508 +#ifdef CONFIG_IPIPE
2509 +#define PREEMPT_SCHEDULE_IRQ call __ipipe_preempt_schedule_irq
2510 +#else /* !CONFIG_IPIPE */
2511 +#define PREEMPT_SCHEDULE_IRQ call preempt_schedule_irq
2512 +#endif /* !CONFIG_IPIPE */
2514 #ifdef CONFIG_FUNCTION_TRACER
2515 #ifdef CONFIG_DYNAMIC_FTRACE
2516 ENTRY(mcount)
2517 @@ -336,7 +344,10 @@ ENTRY(save_args)
2519 * We entered an interrupt context - irqs are off:
2521 -2: TRACE_IRQS_OFF
2523 +#ifndef CONFIG_IPIPE
2524 + TRACE_IRQS_OFF
2525 +#endif
2527 CFI_ENDPROC
2528 END(save_args)
2529 @@ -402,6 +413,7 @@ ENTRY(ret_from_fork)
2530 CFI_ADJUST_CFA_OFFSET 8
2531 popf # reset kernel eflags
2532 CFI_ADJUST_CFA_OFFSET -8
2533 + ENABLE_INTERRUPTS_HW_COND
2535 call schedule_tail # rdi: 'prev' task parameter
2537 @@ -477,6 +489,17 @@ ENTRY(system_call_after_swapgs)
2538 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
2539 movq %rcx,RIP-ARGOFFSET(%rsp)
2540 CFI_REL_OFFSET rip,RIP-ARGOFFSET
2541 +#ifdef CONFIG_IPIPE
2542 + pushq %rdi
2543 + pushq %rax
2544 + leaq -(ARGOFFSET-16)(%rsp),%rdi # regs for handler
2545 + call __ipipe_syscall_root_thunk
2546 + testl %eax, %eax
2547 + popq %rax
2548 + popq %rdi
2549 + js ret_from_sys_call
2550 + jnz sysret_fastexit
2551 +#endif
2552 GET_THREAD_INFO(%rcx)
2553 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
2554 jnz tracesys
2555 @@ -506,6 +529,7 @@ sysret_check:
2556 * sysretq will re-enable interrupts:
2558 TRACE_IRQS_ON
2559 +sysret_fastexit:
2560 movq RIP-ARGOFFSET(%rsp),%rcx
2561 CFI_REGISTER rip,rcx
2562 RESTORE_ARGS 0,-ARG_SKIP,1
2563 @@ -517,6 +541,8 @@ sysret_check:
2564 /* Handle reschedules */
2565 /* edx: work, edi: workmask */
2566 sysret_careful:
2567 + testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),%edx
2568 + jnz ret_from_sys_call_trace
2569 bt $TIF_NEED_RESCHED,%edx
2570 jnc sysret_signal
2571 TRACE_IRQS_ON
2572 @@ -528,6 +554,16 @@ sysret_careful:
2573 CFI_ADJUST_CFA_OFFSET -8
2574 jmp sysret_check
2576 +ret_from_sys_call_trace:
2577 + TRACE_IRQS_ON
2578 + sti
2579 + SAVE_REST
2580 + FIXUP_TOP_OF_STACK %rdi
2581 + movq %rsp,%rdi
2582 + LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
2583 + RESTORE_REST
2584 + jmp int_ret_from_sys_call
2586 /* Handle a signal */
2587 sysret_signal:
2588 TRACE_IRQS_ON
2589 @@ -800,7 +836,29 @@ END(interrupt)
2590 CFI_ADJUST_CFA_OFFSET 10*8
2591 call save_args
2592 PARTIAL_FRAME 0
2593 +#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
2594 + pushq %rbp
2595 + leaq RIP-8(%rdi), %rbp # make interrupted address show up in trace
2596 + pushq %rdi
2597 + movq ORIG_RAX(%rdi), %rdi # IRQ number
2598 + notq %rdi # ...is inverted, fix up
2599 + call ipipe_trace_begin
2600 + popq %rdi
2601 + popq %rbp
2603 call \func
2605 + pushq %rbp
2606 + pushq %rax
2607 + movq 8-ARGOFFSET+ORIG_RAX(%rbp), %rdi
2608 + leaq 8-ARGOFFSET+RIP-8(%rbp), %rbp
2609 + notq %rdi
2610 + call ipipe_trace_end
2611 + popq %rax
2612 + popq %rbp
2613 +#else
2614 + call \func
2615 +#endif
2616 .endm
2619 @@ -813,9 +871,24 @@ END(interrupt)
2621 .p2align CONFIG_X86_L1_CACHE_SHIFT
2622 common_interrupt:
2623 +#ifdef CONFIG_IPIPE
2624 + XCPT_FRAME
2625 + addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
2626 + interrupt *ipipe_irq_handler
2627 + testl %eax, %eax
2628 + jnz ret_from_intr
2629 + decl PER_CPU_VAR(irq_count)
2630 + leaveq
2631 + CFI_DEF_CFA_REGISTER rsp
2632 + CFI_ADJUST_CFA_OFFSET -8
2633 + testl $3,CS-ARGOFFSET(%rsp)
2634 + jz restore_args
2635 + jmp retint_swapgs_notrace
2636 +#else /* !CONFIG_IPIPE */
2637 XCPT_FRAME
2638 addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
2639 interrupt do_IRQ
2640 +#endif /* !CONFIG_IPIPE */
2641 /* 0(%rsp): old_rsp-ARGOFFSET */
2642 ret_from_intr:
2643 DISABLE_INTERRUPTS(CLBR_NONE)
2644 @@ -824,7 +897,7 @@ ret_from_intr:
2645 leaveq
2646 CFI_DEF_CFA_REGISTER rsp
2647 CFI_ADJUST_CFA_OFFSET -8
2648 -exit_intr:
2649 +ENTRY(exit_intr)
2650 GET_THREAD_INFO(%rcx)
2651 testl $3,CS-ARGOFFSET(%rsp)
2652 je retint_kernel
2653 @@ -844,20 +917,20 @@ retint_check:
2654 jnz retint_careful
2656 retint_swapgs: /* return to user-space */
2657 + TRACE_IRQS_IRETQ
2659 * The iretq could re-enable interrupts:
2661 - DISABLE_INTERRUPTS(CLBR_ANY)
2662 - TRACE_IRQS_IRETQ
2663 +retint_swapgs_notrace:
2664 SWAPGS
2665 +retint_noswapgs:
2666 jmp restore_args
2668 retint_restore_args: /* return to kernel space */
2669 - DISABLE_INTERRUPTS(CLBR_ANY)
2670 + TRACE_IRQS_IRETQ
2672 * The iretq could re-enable interrupts:
2674 - TRACE_IRQS_IRETQ
2675 restore_args:
2676 RESTORE_ARGS 0,8,0
2678 @@ -939,7 +1012,15 @@ ENTRY(retint_kernel)
2679 jnc retint_restore_args
2680 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
2681 jnc retint_restore_args
2682 - call preempt_schedule_irq
2683 +#ifdef CONFIG_IPIPE
2684 + /*
2685 + * We may have preempted call_softirq before __do_softirq raised or
2686 + * after it lowered the preemption counter.
2687 + */
2688 + cmpl $0,PER_CPU_VAR(irq_count)
2689 + jge retint_restore_args
2690 +#endif
2691 + PREEMPT_SCHEDULE_IRQ
2692 jmp exit_intr
2693 #endif
2695 @@ -953,16 +1034,31 @@ END(common_interrupt)
2697 * APIC interrupts.
2699 -.macro apicinterrupt num sym do_sym
2700 + .macro apicinterrupt num sym do_sym
2701 ENTRY(\sym)
2702 INTR_FRAME
2703 pushq $~(\num)
2704 CFI_ADJUST_CFA_OFFSET 8
2705 +#ifdef CONFIG_IPIPE
2706 + interrupt *ipipe_irq_handler
2707 + testl %eax, %eax
2708 + jnz ret_from_intr
2709 + decl PER_CPU_VAR(irq_count)
2710 + leaveq
2711 + CFI_DEF_CFA_REGISTER rsp
2712 + CFI_ADJUST_CFA_OFFSET -8
2713 + testl $3,CS-ARGOFFSET(%rsp)
2714 + jz restore_args
2715 + jmp retint_swapgs_notrace
2716 + CFI_ENDPROC
2717 + .endm
2718 +#else /* !CONFIG_IPIPE */
2719 interrupt \do_sym
2720 jmp ret_from_intr
2721 CFI_ENDPROC
2722 END(\sym)
2723 .endm
2724 +#endif /* !CONFIG_IPIPE */
2726 #ifdef CONFIG_SMP
2727 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
2728 @@ -987,6 +1083,7 @@ apicinterrupt INVALIDATE_TLB_VECTOR_START+1 \
2729 invalidate_interrupt1 smp_invalidate_interrupt
2730 apicinterrupt INVALIDATE_TLB_VECTOR_START+2 \
2731 invalidate_interrupt2 smp_invalidate_interrupt
2732 +#ifndef CONFIG_IPIPE
2733 apicinterrupt INVALIDATE_TLB_VECTOR_START+3 \
2734 invalidate_interrupt3 smp_invalidate_interrupt
2735 apicinterrupt INVALIDATE_TLB_VECTOR_START+4 \
2736 @@ -997,6 +1094,7 @@ apicinterrupt INVALIDATE_TLB_VECTOR_START+6 \
2737 invalidate_interrupt6 smp_invalidate_interrupt
2738 apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \
2739 invalidate_interrupt7 smp_invalidate_interrupt
2740 +#endif /* !CONFIG_IPIPE */
2741 #endif
2743 apicinterrupt THRESHOLD_APIC_VECTOR \
2744 @@ -1031,7 +1129,7 @@ apicinterrupt LOCAL_PENDING_VECTOR \
2746 * Exception entry points.
2748 -.macro zeroentry sym do_sym
2749 +.macro zeroentry sym do_sym ex_code
2750 ENTRY(\sym)
2751 INTR_FRAME
2752 PARAVIRT_ADJUST_EXCEPTION_FRAME
2753 @@ -1042,13 +1140,28 @@ ENTRY(\sym)
2754 DEFAULT_FRAME 0
2755 movq %rsp,%rdi /* pt_regs pointer */
2756 xorl %esi,%esi /* no error code */
2757 +#ifdef CONFIG_IPIPE
2758 + movq $\ex_code,%rdx
2759 + call __ipipe_handle_exception /* handle(regs, error_code, ex_code) */
2760 + TRACE_IRQS_OFF
2761 + testl %eax, %eax
2762 + jz error_exit
2763 + movl %ebx,%eax
2764 + RESTORE_REST
2765 + DISABLE_INTERRUPTS(CLBR_NONE)
2766 + testl %eax,%eax
2767 + jne retint_noswapgs
2768 + jmp retint_swapgs_notrace
2769 +#else /* !CONFIG_IPIPE */
2770 + TRACE_IRQS_OFF
2771 call \do_sym
2772 +#endif /* !CONFIG_IPIPE */
2773 jmp error_exit /* %ebx: no swapgs flag */
2774 CFI_ENDPROC
2775 END(\sym)
2776 .endm
2778 -.macro paranoidzeroentry sym do_sym
2779 +.macro paranoidzeroentry sym do_sym ex_code=0
2780 ENTRY(\sym)
2781 INTR_FRAME
2782 PARAVIRT_ADJUST_EXCEPTION_FRAME
2783 @@ -1056,16 +1169,31 @@ ENTRY(\sym)
2784 CFI_ADJUST_CFA_OFFSET 8
2785 subq $15*8, %rsp
2786 call save_paranoid
2787 - TRACE_IRQS_OFF
2788 movq %rsp,%rdi /* pt_regs pointer */
2789 +#ifdef CONFIG_IPIPE
2790 + .if \ex_code
2791 + movq $\ex_code,%rsi
2792 + call __ipipe_divert_exception /* handle(regs, ex_code) */
2793 + TRACE_IRQS_OFF
2794 + testl %eax,%eax
2795 + jnz 1f
2796 + movq %rsp,%rdi
2797 + .endif
2798 +#else
2799 + TRACE_IRQS_OFF
2800 +#endif
2801 xorl %esi,%esi /* no error code */
2802 call \do_sym
2803 +#ifdef CONFIG_IPIPE
2804 + xorl %eax,%eax /* tell paranoid_exit to propagate the exception */
2806 +#endif
2807 jmp paranoid_exit /* %ebx: no swapgs flag */
2808 CFI_ENDPROC
2809 END(\sym)
2810 .endm
2812 -.macro paranoidzeroentry_ist sym do_sym ist
2813 +.macro paranoidzeroentry_ist sym do_sym ist ex_code=0
2814 ENTRY(\sym)
2815 INTR_FRAME
2816 PARAVIRT_ADJUST_EXCEPTION_FRAME
2817 @@ -1075,17 +1203,30 @@ ENTRY(\sym)
2818 call save_paranoid
2819 TRACE_IRQS_OFF
2820 movq %rsp,%rdi /* pt_regs pointer */
2821 +#ifdef CONFIG_IPIPE
2822 + .if \ex_code
2823 + movq $\ex_code,%rsi
2824 + call __ipipe_divert_exception /* handle(regs, ex_code) */
2825 + testl %eax,%eax
2826 + jnz 1f
2827 + movq %rsp,%rdi
2828 + .endif
2829 +#endif
2830 xorl %esi,%esi /* no error code */
2831 PER_CPU(init_tss, %r12)
2832 subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12)
2833 call \do_sym
2834 addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12)
2835 +#ifdef CONFIG_IPIPE
2836 + xorl %eax,%eax /* tell paranoid_exit to propagate the exception */
2838 +#endif
2839 jmp paranoid_exit /* %ebx: no swapgs flag */
2840 CFI_ENDPROC
2841 END(\sym)
2842 .endm
2844 -.macro errorentry sym do_sym
2845 +.macro errorentry sym do_sym ex_code
2846 ENTRY(\sym)
2847 XCPT_FRAME
2848 PARAVIRT_ADJUST_EXCEPTION_FRAME
2849 @@ -1096,14 +1237,29 @@ ENTRY(\sym)
2850 movq %rsp,%rdi /* pt_regs pointer */
2851 movq ORIG_RAX(%rsp),%rsi /* get error code */
2852 movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
2853 +#ifdef CONFIG_IPIPE
2854 + movq $\ex_code,%rdx
2855 + call __ipipe_handle_exception /* handle(regs, error_code, ex_code) */
2856 + TRACE_IRQS_OFF
2857 + testl %eax, %eax
2858 + jz error_exit
2859 + movl %ebx,%eax
2860 + RESTORE_REST
2861 + DISABLE_INTERRUPTS(CLBR_NONE)
2862 + testl %eax,%eax
2863 + jne retint_noswapgs
2864 + jmp retint_swapgs_notrace
2865 +#else /* !CONFIG_IPIPE */
2866 + TRACE_IRQS_OFF
2867 call \do_sym
2868 +#endif /* !CONFIG_IPIPE */
2869 jmp error_exit /* %ebx: no swapgs flag */
2870 CFI_ENDPROC
2871 END(\sym)
2872 .endm
2874 /* error code is on the stack already */
2875 -.macro paranoiderrorentry sym do_sym
2876 +.macro paranoiderrorentry sym do_sym ex_code=0
2877 ENTRY(\sym)
2878 XCPT_FRAME
2879 PARAVIRT_ADJUST_EXCEPTION_FRAME
2880 @@ -1113,27 +1269,40 @@ ENTRY(\sym)
2881 DEFAULT_FRAME 0
2882 TRACE_IRQS_OFF
2883 movq %rsp,%rdi /* pt_regs pointer */
2884 +#ifdef CONFIG_IPIPE
2885 + .if \ex_code
2886 + movq $\ex_code,%rsi
2887 + call __ipipe_divert_exception /* handle(regs, ex_code) */
2888 + testl %eax,%eax
2889 + jnz 1f
2890 + movq %rsp,%rdi
2891 + .endif
2892 +#endif
2893 movq ORIG_RAX(%rsp),%rsi /* get error code */
2894 movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
2895 call \do_sym
2896 +#ifdef CONFIG_IPIPE
2897 + xorl %eax,%eax /* tell paranoid_exit to propagate the exception */
2899 +#endif
2900 jmp paranoid_exit /* %ebx: no swapgs flag */
2901 CFI_ENDPROC
2902 END(\sym)
2903 .endm
2905 -zeroentry divide_error do_divide_error
2906 -zeroentry overflow do_overflow
2907 -zeroentry bounds do_bounds
2908 -zeroentry invalid_op do_invalid_op
2909 -zeroentry device_not_available do_device_not_available
2910 +zeroentry divide_error do_divide_error ex_do_divide_error
2911 +zeroentry overflow do_overflow ex_do_overflow
2912 +zeroentry bounds do_bounds ex_do_bounds
2913 +zeroentry invalid_op do_invalid_op ex_do_invalid_op
2914 +zeroentry device_not_available do_device_not_available ex_do_device_not_available
2915 paranoiderrorentry double_fault do_double_fault
2916 -zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun
2917 -errorentry invalid_TSS do_invalid_TSS
2918 -errorentry segment_not_present do_segment_not_present
2919 -zeroentry spurious_interrupt_bug do_spurious_interrupt_bug
2920 -zeroentry coprocessor_error do_coprocessor_error
2921 -errorentry alignment_check do_alignment_check
2922 -zeroentry simd_coprocessor_error do_simd_coprocessor_error
2923 +zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun ex_do_coprocessor_segment_overrun
2924 +errorentry invalid_TSS do_invalid_TSS ex_do_invalid_TSS
2925 +errorentry segment_not_present do_segment_not_present ex_do_segment_not_present
2926 +zeroentry spurious_interrupt_bug do_spurious_interrupt_bug ex_do_spurious_interrupt_bug
2927 +zeroentry coprocessor_error do_coprocessor_error ex_do_coprocessor_error
2928 +errorentry alignment_check do_alignment_check ex_do_alignment_check
2929 +zeroentry simd_coprocessor_error do_simd_coprocessor_error ex_do_simd_coprocessor_error
2931 /* Reload gs selector with exception handling */
2932 /* edi: new selector */
2933 @@ -1220,14 +1389,18 @@ ENTRY(call_softirq)
2934 CFI_REL_OFFSET rbp,0
2935 mov %rsp,%rbp
2936 CFI_DEF_CFA_REGISTER rbp
2937 + DISABLE_INTERRUPTS_HW_COND
2938 incl PER_CPU_VAR(irq_count)
2939 cmove PER_CPU_VAR(irq_stack_ptr),%rsp
2940 + ENABLE_INTERRUPTS_HW_COND
2941 push %rbp # backlink for old unwinder
2942 call __do_softirq
2943 + DISABLE_INTERRUPTS_HW_COND
2944 leaveq
2945 CFI_DEF_CFA_REGISTER rsp
2946 CFI_ADJUST_CFA_OFFSET -8
2947 decl PER_CPU_VAR(irq_count)
2948 + ENABLE_INTERRUPTS_HW_COND
2950 CFI_ENDPROC
2951 END(call_softirq)
2952 @@ -1336,16 +1509,16 @@ END(xen_failsafe_callback)
2954 .pushsection .kprobes.text, "ax"
2956 -paranoidzeroentry_ist debug do_debug DEBUG_STACK
2957 -paranoidzeroentry_ist int3 do_int3 DEBUG_STACK
2958 +paranoidzeroentry_ist debug do_debug DEBUG_STACK ex_do_debug
2959 +paranoidzeroentry_ist int3 do_int3 DEBUG_STACK ex_do_int3
2960 paranoiderrorentry stack_segment do_stack_segment
2961 #ifdef CONFIG_XEN
2962 zeroentry xen_debug do_debug
2963 zeroentry xen_int3 do_int3
2964 errorentry xen_stack_segment do_stack_segment
2965 #endif
2966 -errorentry general_protection do_general_protection
2967 -errorentry page_fault do_page_fault
2968 +errorentry general_protection do_general_protection ex_do_general_protection
2969 +errorentry page_fault do_page_fault ex_do_page_fault
2970 #ifdef CONFIG_X86_MCE
2971 paranoidzeroentry machine_check *machine_check_vector(%rip)
2972 #endif
2973 @@ -1368,8 +1541,13 @@ ENTRY(paranoid_exit)
2974 INTR_FRAME
2975 DISABLE_INTERRUPTS(CLBR_NONE)
2976 TRACE_IRQS_OFF
2977 +paranoid_notrace:
2978 testl %ebx,%ebx /* swapgs needed? */
2979 jnz paranoid_restore
2980 +#ifdef CONFIG_IPIPE
2981 + testl %eax,%eax
2982 + jnz paranoid_swapgs
2983 +#endif
2984 testl $3,CS(%rsp)
2985 jnz paranoid_userspace
2986 paranoid_swapgs:
2987 @@ -1440,7 +1618,6 @@ ENTRY(error_entry)
2988 error_swapgs:
2989 SWAPGS
2990 error_sti:
2991 - TRACE_IRQS_OFF
2993 CFI_ENDPROC
2995 diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
2996 index 2dfd315..3253978 100644
2997 --- a/arch/x86/kernel/i8253.c
2998 +++ b/arch/x86/kernel/i8253.c
2999 @@ -11,6 +11,7 @@
3000 #include <linux/delay.h>
3001 #include <linux/init.h>
3002 #include <linux/io.h>
3003 +#include <linux/ipipe.h>
3005 #include <asm/i8253.h>
3006 #include <asm/hpet.h>
3007 @@ -130,6 +131,12 @@ static cycle_t pit_read(struct clocksource *cs)
3008 int count;
3009 u32 jifs;
3011 +#ifdef CONFIG_IPIPE
3012 + if (!__ipipe_pipeline_head_p(ipipe_root_domain))
3013 + /* We don't really own the PIT. */
3014 + return (cycle_t)(jiffies * LATCH) + (LATCH - 1) - old_count;
3015 +#endif /* CONFIG_IPIPE */
3017 raw_spin_lock_irqsave(&i8253_lock, flags);
3019 * Although our caller may have the read side of xtime_lock,
3020 diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
3021 index cafa7c8..28d72ce 100644
3022 --- a/arch/x86/kernel/i8259.c
3023 +++ b/arch/x86/kernel/i8259.c
3024 @@ -31,7 +31,7 @@
3027 static int i8259A_auto_eoi;
3028 -DEFINE_RAW_SPINLOCK(i8259A_lock);
3029 +IPIPE_DEFINE_RAW_SPINLOCK(i8259A_lock);
3030 static void mask_and_ack_8259A(unsigned int);
3031 static void mask_8259A(void);
3032 static void unmask_8259A(void);
3033 @@ -74,6 +74,7 @@ static void disable_8259A_irq(unsigned int irq)
3034 unsigned long flags;
3036 raw_spin_lock_irqsave(&i8259A_lock, flags);
3037 + ipipe_irq_lock(irq);
3038 cached_irq_mask |= mask;
3039 if (irq & 8)
3040 outb(cached_slave_mask, PIC_SLAVE_IMR);
3041 @@ -84,15 +85,18 @@ static void disable_8259A_irq(unsigned int irq)
3043 static void enable_8259A_irq(unsigned int irq)
3045 - unsigned int mask = ~(1 << irq);
3046 + unsigned int mask = (1 << irq);
3047 unsigned long flags;
3049 raw_spin_lock_irqsave(&i8259A_lock, flags);
3050 - cached_irq_mask &= mask;
3051 - if (irq & 8)
3052 - outb(cached_slave_mask, PIC_SLAVE_IMR);
3053 - else
3054 - outb(cached_master_mask, PIC_MASTER_IMR);
3055 + if (cached_irq_mask & mask) {
3056 + cached_irq_mask &= ~mask;
3057 + if (irq & 8)
3058 + outb(cached_slave_mask, PIC_SLAVE_IMR);
3059 + else
3060 + outb(cached_master_mask, PIC_MASTER_IMR);
3061 + ipipe_irq_unlock(irq);
3063 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
3066 @@ -173,6 +177,18 @@ static void mask_and_ack_8259A(unsigned int irq)
3068 if (cached_irq_mask & irqmask)
3069 goto spurious_8259A_irq;
3070 +#ifdef CONFIG_IPIPE
3071 + if (irq == 0) {
3072 + /*
3073 + * Fast timer ack -- don't mask (unless supposedly
3074 + * spurious). We trace outb's in order to detect
3075 + * broken hardware inducing large delays.
3076 + */
3077 + outb(0x60, PIC_MASTER_CMD); /* Specific EOI to master. */
3078 + raw_spin_unlock_irqrestore(&i8259A_lock, flags);
3079 + return;
3081 +#endif /* CONFIG_IPIPE */
3082 cached_irq_mask |= irqmask;
3084 handle_real_irq:
3085 diff --git a/arch/x86/kernel/ipipe.c b/arch/x86/kernel/ipipe.c
3086 new file mode 100644
3087 index 0000000..6720ba8
3088 --- /dev/null
3089 +++ b/arch/x86/kernel/ipipe.c
3090 @@ -0,0 +1,971 @@
3091 +/* -*- linux-c -*-
3092 + * linux/arch/x86/kernel/ipipe.c
3094 + * Copyright (C) 2002-2007 Philippe Gerum.
3096 + * This program is free software; you can redistribute it and/or modify
3097 + * it under the terms of the GNU General Public License as published by
3098 + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
3099 + * USA; either version 2 of the License, or (at your option) any later
3100 + * version.
3102 + * This program is distributed in the hope that it will be useful,
3103 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
3104 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
3105 + * GNU General Public License for more details.
3107 + * You should have received a copy of the GNU General Public License
3108 + * along with this program; if not, write to the Free Software
3109 + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
3111 + * Architecture-dependent I-PIPE support for x86.
3112 + */
3114 +#include <linux/kernel.h>
3115 +#include <linux/smp.h>
3116 +#include <linux/module.h>
3117 +#include <linux/sched.h>
3118 +#include <linux/interrupt.h>
3119 +#include <linux/slab.h>
3120 +#include <linux/irq.h>
3121 +#include <linux/clockchips.h>
3122 +#include <linux/kprobes.h>
3123 +#include <asm/unistd.h>
3124 +#include <asm/system.h>
3125 +#include <asm/atomic.h>
3126 +#include <asm/hw_irq.h>
3127 +#include <asm/irq.h>
3128 +#include <asm/desc.h>
3129 +#include <asm/io.h>
3130 +#ifdef CONFIG_X86_LOCAL_APIC
3131 +#include <asm/tlbflush.h>
3132 +#include <asm/fixmap.h>
3133 +#include <asm/bitops.h>
3134 +#include <asm/mpspec.h>
3135 +#ifdef CONFIG_X86_IO_APIC
3136 +#include <asm/io_apic.h>
3137 +#endif /* CONFIG_X86_IO_APIC */
3138 +#include <asm/apic.h>
3139 +#endif /* CONFIG_X86_LOCAL_APIC */
3140 +#include <asm/traps.h>
3142 +int __ipipe_tick_irq = 0; /* Legacy timer */
3144 +DEFINE_PER_CPU(struct pt_regs, __ipipe_tick_regs);
3146 +DEFINE_PER_CPU(unsigned long, __ipipe_cr2);
3147 +EXPORT_PER_CPU_SYMBOL_GPL(__ipipe_cr2);
3150 + * ipipe_trigger_irq() -- Push the interrupt at front of the pipeline
3151 + * just like if it has been actually received from a hw source. Also
3152 + * works for virtual interrupts.
3153 + */
3154 +int ipipe_trigger_irq(unsigned int irq)
3156 + struct pt_regs regs;
3157 + unsigned long flags;
3159 +#ifdef CONFIG_IPIPE_DEBUG
3160 + if (irq >= IPIPE_NR_IRQS)
3161 + return -EINVAL;
3162 + if (ipipe_virtual_irq_p(irq)) {
3163 + if (!test_bit(irq - IPIPE_VIRQ_BASE,
3164 + &__ipipe_virtual_irq_map))
3165 + return -EINVAL;
3166 + } else if (irq_to_desc(irq) == NULL)
3167 + return -EINVAL;
3168 +#endif
3169 + local_irq_save_hw(flags);
3170 + regs.flags = flags;
3171 + regs.orig_ax = irq; /* Positive value - IRQ won't be acked */
3172 + regs.cs = __KERNEL_CS;
3173 + __ipipe_handle_irq(&regs);
3174 + local_irq_restore_hw(flags);
3176 + return 1;
3179 +int ipipe_get_sysinfo(struct ipipe_sysinfo *info)
3181 + info->ncpus = num_online_cpus();
3182 + info->cpufreq = ipipe_cpu_freq();
3183 + info->archdep.tmirq = __ipipe_tick_irq;
3184 +#ifdef CONFIG_X86_TSC
3185 + info->archdep.tmfreq = ipipe_cpu_freq();
3186 +#else /* !CONFIG_X86_TSC */
3187 + info->archdep.tmfreq = CLOCK_TICK_RATE;
3188 +#endif /* CONFIG_X86_TSC */
3190 + return 0;
3193 +#ifdef CONFIG_X86_UV
3194 +asmlinkage void uv_bau_message_interrupt(struct pt_regs *regs);
3195 +#endif
3196 +#ifdef CONFIG_X86_MCE_THRESHOLD
3197 +asmlinkage void smp_threshold_interrupt(void);
3198 +#endif
3199 +#ifdef CONFIG_X86_NEW_MCE
3200 +asmlinkage void smp_mce_self_interrupt(void);
3201 +#endif
3203 +static void __ipipe_ack_irq(unsigned irq, struct irq_desc *desc)
3205 + desc->ipipe_ack(irq, desc);
3208 +void __ipipe_enable_irqdesc(struct ipipe_domain *ipd, unsigned irq)
3210 + irq_to_desc(irq)->status &= ~IRQ_DISABLED;
3213 +#ifdef CONFIG_X86_LOCAL_APIC
3215 +static void __ipipe_noack_apic(unsigned irq, struct irq_desc *desc)
3219 +static void __ipipe_ack_apic(unsigned irq, struct irq_desc *desc)
3221 + __ack_APIC_irq();
3224 +static void __ipipe_null_handler(unsigned irq, void *cookie)
3228 +#endif /* CONFIG_X86_LOCAL_APIC */
3230 +/* __ipipe_enable_pipeline() -- We are running on the boot CPU, hw
3231 + interrupts are off, and secondary CPUs are still lost in space. */
3233 +void __init __ipipe_enable_pipeline(void)
3235 + unsigned int vector, irq;
3237 +#ifdef CONFIG_X86_LOCAL_APIC
3239 + /* Map the APIC system vectors. */
3241 + ipipe_virtualize_irq(ipipe_root_domain,
3242 + ipipe_apic_vector_irq(LOCAL_TIMER_VECTOR),
3243 + (ipipe_irq_handler_t)&smp_apic_timer_interrupt,
3244 + NULL,
3245 + &__ipipe_ack_apic,
3246 + IPIPE_STDROOT_MASK);
3248 + ipipe_virtualize_irq(ipipe_root_domain,
3249 + ipipe_apic_vector_irq(SPURIOUS_APIC_VECTOR),
3250 + (ipipe_irq_handler_t)&smp_spurious_interrupt,
3251 + NULL,
3252 + &__ipipe_noack_apic,
3253 + IPIPE_STDROOT_MASK);
3255 + ipipe_virtualize_irq(ipipe_root_domain,
3256 + ipipe_apic_vector_irq(ERROR_APIC_VECTOR),
3257 + (ipipe_irq_handler_t)&smp_error_interrupt,
3258 + NULL,
3259 + &__ipipe_ack_apic,
3260 + IPIPE_STDROOT_MASK);
3262 + ipipe_virtualize_irq(ipipe_root_domain,
3263 + ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR0),
3264 + &__ipipe_null_handler,
3265 + NULL,
3266 + &__ipipe_ack_apic,
3267 + IPIPE_STDROOT_MASK);
3269 + ipipe_virtualize_irq(ipipe_root_domain,
3270 + ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR1),
3271 + &__ipipe_null_handler,
3272 + NULL,
3273 + &__ipipe_ack_apic,
3274 + IPIPE_STDROOT_MASK);
3276 + ipipe_virtualize_irq(ipipe_root_domain,
3277 + ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR2),
3278 + &__ipipe_null_handler,
3279 + NULL,
3280 + &__ipipe_ack_apic,
3281 + IPIPE_STDROOT_MASK);
3283 + ipipe_virtualize_irq(ipipe_root_domain,
3284 + ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR3),
3285 + &__ipipe_null_handler,
3286 + NULL,
3287 + &__ipipe_ack_apic,
3288 + IPIPE_STDROOT_MASK);
3290 +#ifdef CONFIG_X86_THERMAL_VECTOR
3291 + ipipe_virtualize_irq(ipipe_root_domain,
3292 + ipipe_apic_vector_irq(THERMAL_APIC_VECTOR),
3293 + (ipipe_irq_handler_t)&smp_thermal_interrupt,
3294 + NULL,
3295 + &__ipipe_ack_apic,
3296 + IPIPE_STDROOT_MASK);
3297 +#endif /* CONFIG_X86_THERMAL_VECTOR */
3299 +#ifdef CONFIG_X86_MCE_THRESHOLD
3300 + ipipe_virtualize_irq(ipipe_root_domain,
3301 + ipipe_apic_vector_irq(THRESHOLD_APIC_VECTOR),
3302 + (ipipe_irq_handler_t)&smp_threshold_interrupt,
3303 + NULL,
3304 + &__ipipe_ack_apic,
3305 + IPIPE_STDROOT_MASK);
3306 +#endif /* CONFIG_X86_MCE_THRESHOLD */
3308 +#ifdef CONFIG_X86_NEW_MCE
3309 + ipipe_virtualize_irq(ipipe_root_domain,
3310 + ipipe_apic_vector_irq(MCE_SELF_VECTOR),
3311 + (ipipe_irq_handler_t)&smp_mce_self_interrupt,
3312 + NULL,
3313 + &__ipipe_ack_apic,
3314 + IPIPE_STDROOT_MASK);
3315 +#endif /* CONFIG_X86_MCE_THRESHOLD */
3317 +#ifdef CONFIG_X86_UV
3318 + ipipe_virtualize_irq(ipipe_root_domain,
3319 + ipipe_apic_vector_irq(UV_BAU_MESSAGE),
3320 + (ipipe_irq_handler_t)&uv_bau_message_interrupt,
3321 + NULL,
3322 + &__ipipe_ack_apic,
3323 + IPIPE_STDROOT_MASK);
3324 +#endif /* CONFIG_X86_UV */
3326 + ipipe_virtualize_irq(ipipe_root_domain,
3327 + ipipe_apic_vector_irq(X86_PLATFORM_IPI_VECTOR),
3328 + (ipipe_irq_handler_t)&smp_x86_platform_ipi,
3329 + NULL,
3330 + &__ipipe_ack_apic,
3331 + IPIPE_STDROOT_MASK);
3333 +#ifdef CONFIG_PERF_EVENTS
3334 + ipipe_virtualize_irq(ipipe_root_domain,
3335 + ipipe_apic_vector_irq(LOCAL_PENDING_VECTOR),
3336 + (ipipe_irq_handler_t)&smp_perf_pending_interrupt,
3337 + NULL,
3338 + &__ipipe_ack_apic,
3339 + IPIPE_STDROOT_MASK);
3340 +#endif /* CONFIG_PERF_EVENTS */
3342 +#endif /* CONFIG_X86_LOCAL_APIC */
3344 +#ifdef CONFIG_SMP
3345 + ipipe_virtualize_irq(ipipe_root_domain,
3346 + ipipe_apic_vector_irq(RESCHEDULE_VECTOR),
3347 + (ipipe_irq_handler_t)&smp_reschedule_interrupt,
3348 + NULL,
3349 + &__ipipe_ack_apic,
3350 + IPIPE_STDROOT_MASK);
3352 + for (vector = INVALIDATE_TLB_VECTOR_START;
3353 + vector <= INVALIDATE_TLB_VECTOR_END; ++vector)
3354 + ipipe_virtualize_irq(ipipe_root_domain,
3355 + ipipe_apic_vector_irq(vector),
3356 + (ipipe_irq_handler_t)&smp_invalidate_interrupt,
3357 + NULL,
3358 + &__ipipe_ack_apic,
3359 + IPIPE_STDROOT_MASK);
3361 + ipipe_virtualize_irq(ipipe_root_domain,
3362 + ipipe_apic_vector_irq(CALL_FUNCTION_VECTOR),
3363 + (ipipe_irq_handler_t)&smp_call_function_interrupt,
3364 + NULL,
3365 + &__ipipe_ack_apic,
3366 + IPIPE_STDROOT_MASK);
3368 + ipipe_virtualize_irq(ipipe_root_domain,
3369 + ipipe_apic_vector_irq(CALL_FUNCTION_SINGLE_VECTOR),
3370 + (ipipe_irq_handler_t)&smp_call_function_single_interrupt,
3371 + NULL,
3372 + &__ipipe_ack_apic,
3373 + IPIPE_STDROOT_MASK);
3375 + ipipe_virtualize_irq(ipipe_root_domain,
3376 + IRQ_MOVE_CLEANUP_VECTOR,
3377 + (ipipe_irq_handler_t)&smp_irq_move_cleanup_interrupt,
3378 + NULL,
3379 + &__ipipe_ack_apic,
3380 + IPIPE_STDROOT_MASK);
3382 + ipipe_virtualize_irq(ipipe_root_domain,
3383 + ipipe_apic_vector_irq(REBOOT_VECTOR),
3384 + (ipipe_irq_handler_t)&smp_reboot_interrupt,
3385 + NULL,
3386 + &__ipipe_ack_apic,
3387 + IPIPE_STDROOT_MASK);
3388 +#else
3389 + (void)vector;
3390 +#endif /* CONFIG_SMP */
3392 + /* Finally, virtualize the remaining ISA and IO-APIC
3393 + * interrupts. Interrupts which have already been virtualized
3394 + * will just beget a silent -EPERM error since
3395 + * IPIPE_SYSTEM_MASK has been passed for them, that's ok. */
3397 + for (irq = 0; irq < NR_IRQS; irq++)
3398 + /*
3399 + * Fails for IPIPE_CRITICAL_IPI and IRQ_MOVE_CLEANUP_VECTOR,
3400 + * but that's ok.
3401 + */
3402 + ipipe_virtualize_irq(ipipe_root_domain,
3403 + irq,
3404 + (ipipe_irq_handler_t)&do_IRQ,
3405 + NULL,
3406 + &__ipipe_ack_irq,
3407 + IPIPE_STDROOT_MASK);
3409 +#ifdef CONFIG_X86_LOCAL_APIC
3410 + /* Eventually allow these vectors to be reprogrammed. */
3411 + ipipe_root_domain->irqs[IPIPE_SERVICE_IPI0].control &= ~IPIPE_SYSTEM_MASK;
3412 + ipipe_root_domain->irqs[IPIPE_SERVICE_IPI1].control &= ~IPIPE_SYSTEM_MASK;
3413 + ipipe_root_domain->irqs[IPIPE_SERVICE_IPI2].control &= ~IPIPE_SYSTEM_MASK;
3414 + ipipe_root_domain->irqs[IPIPE_SERVICE_IPI3].control &= ~IPIPE_SYSTEM_MASK;
3415 +#endif /* CONFIG_X86_LOCAL_APIC */
3418 +#ifdef CONFIG_SMP
3420 +cpumask_t __ipipe_set_irq_affinity(unsigned irq, cpumask_t cpumask)
3422 + cpumask_t oldmask;
3424 + if (irq_to_desc(irq)->chip->set_affinity == NULL)
3425 + return CPU_MASK_NONE;
3427 + if (cpus_empty(cpumask))
3428 + return CPU_MASK_NONE; /* Return mask value -- no change. */
3430 + cpus_and(cpumask, cpumask, cpu_online_map);
3431 + if (cpus_empty(cpumask))
3432 + return CPU_MASK_NONE; /* Error -- bad mask value or non-routable IRQ. */
3434 + cpumask_copy(&oldmask, irq_to_desc(irq)->affinity);
3435 + irq_to_desc(irq)->chip->set_affinity(irq, &cpumask);
3437 + return oldmask;
3440 +int __ipipe_send_ipi(unsigned ipi, cpumask_t cpumask)
3442 + unsigned long flags;
3443 + int self;
3445 + local_irq_save_hw(flags);
3447 + self = cpu_isset(ipipe_processor_id(),cpumask);
3448 + cpu_clear(ipipe_processor_id(), cpumask);
3450 + if (!cpus_empty(cpumask))
3451 + apic->send_IPI_mask(&cpumask, ipipe_apic_irq_vector(ipi));
3453 + if (self)
3454 + ipipe_trigger_irq(ipi);
3456 + local_irq_restore_hw(flags);
3458 + return 0;
3461 +void __ipipe_hook_critical_ipi(struct ipipe_domain *ipd)
3463 + ipd->irqs[IPIPE_CRITICAL_IPI].acknowledge = &__ipipe_ack_apic;
3464 + ipd->irqs[IPIPE_CRITICAL_IPI].handler = &__ipipe_do_critical_sync;
3465 + ipd->irqs[IPIPE_CRITICAL_IPI].cookie = NULL;
3466 + /* Immediately handle in the current domain but *never* pass */
3467 + ipd->irqs[IPIPE_CRITICAL_IPI].control =
3468 + IPIPE_HANDLE_MASK|IPIPE_STICKY_MASK|IPIPE_SYSTEM_MASK;
3471 +#endif /* CONFIG_SMP */
3473 +static inline void __fixup_if(int s, struct pt_regs *regs)
3475 + /*
3476 + * Have the saved hw state look like the domain stall bit, so
3477 + * that __ipipe_unstall_iret_root() restores the proper
3478 + * pipeline state for the root stage upon exit.
3479 + */
3480 + if (s)
3481 + regs->flags &= ~X86_EFLAGS_IF;
3482 + else
3483 + regs->flags |= X86_EFLAGS_IF;
3486 +#ifdef CONFIG_X86_32
3489 + * Check the stall bit of the root domain to make sure the existing
3490 + * preemption opportunity upon in-kernel resumption could be
3491 + * exploited. In case a rescheduling could take place, the root stage
3492 + * is stalled before the hw interrupts are re-enabled. This routine
3493 + * must be called with hw interrupts off.
3494 + */
3496 +asmlinkage int __ipipe_kpreempt_root(struct pt_regs regs)
3498 + if (test_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status)))
3499 + /* Root stage is stalled: rescheduling denied. */
3500 + return 0;
3502 + __ipipe_stall_root();
3503 + trace_hardirqs_off();
3504 + local_irq_enable_hw_notrace();
3506 + return 1; /* Ok, may reschedule now. */
3509 +asmlinkage void __ipipe_unstall_iret_root(struct pt_regs regs)
3511 + struct ipipe_percpu_domain_data *p;
3513 + /* Emulate IRET's handling of the interrupt flag. */
3515 + local_irq_disable_hw();
3517 + p = ipipe_root_cpudom_ptr();
3519 + /*
3520 + * Restore the software state as it used to be on kernel
3521 + * entry. CAUTION: NMIs must *not* return through this
3522 + * emulation.
3523 + */
3524 + if (raw_irqs_disabled_flags(regs.flags)) {
3525 + if (!__test_and_set_bit(IPIPE_STALL_FLAG, &p->status))
3526 + trace_hardirqs_off();
3527 + if (!__ipipe_pipeline_head_p(ipipe_root_domain))
3528 + regs.flags |= X86_EFLAGS_IF;
3529 + } else {
3530 + if (test_bit(IPIPE_STALL_FLAG, &p->status)) {
3531 + trace_hardirqs_on();
3532 + __clear_bit(IPIPE_STALL_FLAG, &p->status);
3534 + /*
3535 + * We could have received and logged interrupts while
3536 + * stalled in the syscall path: play the log now to
3537 + * release any pending event. The SYNC_BIT prevents
3538 + * infinite recursion in case of flooding.
3539 + */
3540 + if (unlikely(__ipipe_ipending_p(p)))
3541 + __ipipe_sync_pipeline();
3543 +#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
3544 + ipipe_trace_end(0x8000000D);
3545 +#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */
3548 +#else /* !CONFIG_X86_32 */
3550 +#ifdef CONFIG_PREEMPT
3552 +asmlinkage void preempt_schedule_irq(void);
3554 +void __ipipe_preempt_schedule_irq(void)
3556 + struct ipipe_percpu_domain_data *p;
3557 + unsigned long flags;
3558 + /*
3559 + * We have no IRQ state fixup on entry to exceptions in
3560 + * x86_64, so we have to stall the root stage before
3561 + * rescheduling.
3562 + */
3563 + BUG_ON(!irqs_disabled_hw());
3564 + local_irq_save(flags);
3565 + local_irq_enable_hw();
3566 + preempt_schedule_irq(); /* Ok, may reschedule now. */
3567 + local_irq_disable_hw();
3569 + /*
3570 + * Flush any pending interrupt that may have been logged after
3571 + * preempt_schedule_irq() stalled the root stage before
3572 + * returning to us, and now.
3573 + */
3574 + p = ipipe_root_cpudom_ptr();
3575 + if (unlikely(__ipipe_ipending_p(p))) {
3576 + add_preempt_count(PREEMPT_ACTIVE);
3577 + trace_hardirqs_on();
3578 + clear_bit(IPIPE_STALL_FLAG, &p->status);
3579 + __ipipe_sync_pipeline();
3580 + sub_preempt_count(PREEMPT_ACTIVE);
3581 + }
3583 + __local_irq_restore_nosync(flags);
3586 +#endif /* CONFIG_PREEMPT */
3588 +#endif /* !CONFIG_X86_32 */
3590 +void __ipipe_halt_root(void)
3592 + struct ipipe_percpu_domain_data *p;
3594 + /* Emulate sti+hlt sequence over the root domain. */
3596 + local_irq_disable_hw();
3598 + p = ipipe_root_cpudom_ptr();
3600 + trace_hardirqs_on();
3601 + clear_bit(IPIPE_STALL_FLAG, &p->status);
3603 + if (unlikely(__ipipe_ipending_p(p))) {
3604 + __ipipe_sync_pipeline();
3605 + local_irq_enable_hw();
3606 + } else {
3607 +#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
3608 + ipipe_trace_end(0x8000000E);
3609 +#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */
3610 + asm volatile("sti; hlt": : :"memory");
3614 +static void do_machine_check_vector(struct pt_regs *regs, long error_code)
3616 +#ifdef CONFIG_X86_MCE
3617 +#ifdef CONFIG_X86_32
3618 + extern void (*machine_check_vector)(struct pt_regs *, long error_code);
3619 + machine_check_vector(regs, error_code);
3620 +#else
3621 + do_machine_check(regs, error_code);
3622 +#endif
3623 +#endif /* CONFIG_X86_MCE */
3626 +/* Work around genksyms's issue with over-qualification in decls. */
3628 +typedef void dotraplinkage __ipipe_exhandler(struct pt_regs *, long);
3630 +typedef __ipipe_exhandler *__ipipe_exptr;
3632 +static __ipipe_exptr __ipipe_std_extable[] = {
3634 + [ex_do_divide_error] = &do_divide_error,
3635 + [ex_do_overflow] = &do_overflow,
3636 + [ex_do_bounds] = &do_bounds,
3637 + [ex_do_invalid_op] = &do_invalid_op,
3638 + [ex_do_coprocessor_segment_overrun] = &do_coprocessor_segment_overrun,
3639 + [ex_do_invalid_TSS] = &do_invalid_TSS,
3640 + [ex_do_segment_not_present] = &do_segment_not_present,
3641 + [ex_do_stack_segment] = &do_stack_segment,
3642 + [ex_do_general_protection] = do_general_protection,
3643 + [ex_do_page_fault] = (__ipipe_exptr)&do_page_fault,
3644 + [ex_do_spurious_interrupt_bug] = &do_spurious_interrupt_bug,
3645 + [ex_do_coprocessor_error] = &do_coprocessor_error,
3646 + [ex_do_alignment_check] = &do_alignment_check,
3647 + [ex_machine_check_vector] = &do_machine_check_vector,
3648 + [ex_do_simd_coprocessor_error] = &do_simd_coprocessor_error,
3649 + [ex_do_device_not_available] = &do_device_not_available,
3650 +#ifdef CONFIG_X86_32
3651 + [ex_do_iret_error] = &do_iret_error,
3652 +#endif
3655 +#ifdef CONFIG_KGDB
3656 +#include <linux/kgdb.h>
3658 +static int __ipipe_xlate_signo[] = {
3660 + [ex_do_divide_error] = SIGFPE,
3661 + [ex_do_debug] = SIGTRAP,
3662 + [2] = -1,
3663 + [ex_do_int3] = SIGTRAP,
3664 + [ex_do_overflow] = SIGSEGV,
3665 + [ex_do_bounds] = SIGSEGV,
3666 + [ex_do_invalid_op] = SIGILL,
3667 + [ex_do_device_not_available] = -1,
3668 + [8] = -1,
3669 + [ex_do_coprocessor_segment_overrun] = SIGFPE,
3670 + [ex_do_invalid_TSS] = SIGSEGV,
3671 + [ex_do_segment_not_present] = SIGBUS,
3672 + [ex_do_stack_segment] = SIGBUS,
3673 + [ex_do_general_protection] = SIGSEGV,
3674 + [ex_do_page_fault] = SIGSEGV,
3675 + [ex_do_spurious_interrupt_bug] = -1,
3676 + [ex_do_coprocessor_error] = -1,
3677 + [ex_do_alignment_check] = SIGBUS,
3678 + [ex_machine_check_vector] = -1,
3679 + [ex_do_simd_coprocessor_error] = -1,
3680 + [20 ... 31] = -1,
3681 +#ifdef CONFIG_X86_32
3682 + [ex_do_iret_error] = SIGSEGV,
3683 +#endif
3685 +#endif /* CONFIG_KGDB */
3687 +int __ipipe_handle_exception(struct pt_regs *regs, long error_code, int vector)
3689 + bool root_entry = false;
3690 + unsigned long flags = 0;
3691 + unsigned long cr2 = 0;
3693 + if (ipipe_root_domain_p) {
3694 + root_entry = true;
3696 + local_save_flags(flags);
3697 + /*
3698 + * Replicate hw interrupt state into the virtual mask
3699 + * before calling the I-pipe event handler over the
3700 + * root domain. Also required later when calling the
3701 + * Linux exception handler.
3702 + */
3703 + if (irqs_disabled_hw())
3704 + local_irq_disable();
3706 +#ifdef CONFIG_KGDB
3707 + /* catch exception KGDB is interested in over non-root domains */
3708 + else if (__ipipe_xlate_signo[vector] >= 0 &&
3709 + !kgdb_handle_exception(vector, __ipipe_xlate_signo[vector],
3710 + error_code, regs))
3711 + return 1;
3712 +#endif /* CONFIG_KGDB */
3714 + if (vector == ex_do_page_fault)
3715 + cr2 = native_read_cr2();
3717 + if (unlikely(ipipe_trap_notify(vector, regs))) {
3718 + if (root_entry)
3719 + local_irq_restore_nosync(flags);
3720 + return 1;
3723 + if (likely(ipipe_root_domain_p)) {
3724 + /*
3725 + * If root is not the topmost domain or in case we faulted in
3726 + * the iret path of x86-32, regs.flags does not match the root
3727 + * domain state. The fault handler or the low-level return
3728 + * code may evaluate it. So fix this up, either by the root
3729 + * state sampled on entry or, if we migrated to root, with the
3730 + * current state.
3731 + */
3732 + __fixup_if(root_entry ? raw_irqs_disabled_flags(flags) :
3733 + raw_irqs_disabled(), regs);
3734 + } else {
3735 + /* Detect unhandled faults over non-root domains. */
3736 + struct ipipe_domain *ipd = ipipe_current_domain;
3738 + /* Switch to root so that Linux can handle the fault cleanly. */
3739 + __ipipe_current_domain = ipipe_root_domain;
3741 + ipipe_trace_panic_freeze();
3743 + /* Always warn about user land and unfixable faults. */
3744 + if ((error_code & 4) || !search_exception_tables(instruction_pointer(regs))) {
3745 + printk(KERN_ERR "BUG: Unhandled exception over domain"
3746 + " %s at 0x%lx - switching to ROOT\n",
3747 + ipd->name, instruction_pointer(regs));
3748 + dump_stack();
3749 + ipipe_trace_panic_dump();
3750 +#ifdef CONFIG_IPIPE_DEBUG
3751 + /* Also report fixable ones when debugging is enabled. */
3752 + } else {
3753 + printk(KERN_WARNING "WARNING: Fixable exception over "
3754 + "domain %s at 0x%lx - switching to ROOT\n",
3755 + ipd->name, instruction_pointer(regs));
3756 + dump_stack();
3757 + ipipe_trace_panic_dump();
3758 +#endif /* CONFIG_IPIPE_DEBUG */
3762 + if (vector == ex_do_page_fault)
3763 + write_cr2(cr2);
3765 + __ipipe_std_extable[vector](regs, error_code);
3767 + /*
3768 + * Relevant for 64-bit: Restore root domain state as the low-level
3769 + * return code will not align it to regs.flags.
3770 + */
3771 + if (root_entry)
3772 + local_irq_restore_nosync(flags);
3774 + return 0;
3777 +int __ipipe_divert_exception(struct pt_regs *regs, int vector)
3779 + bool root_entry = false;
3780 + unsigned long flags = 0;
3782 + if (ipipe_root_domain_p) {
3783 + root_entry = true;
3785 + local_save_flags(flags);
3787 + if (irqs_disabled_hw()) {
3788 + /*
3789 + * Same root state handling as in
3790 + * __ipipe_handle_exception.
3791 + */
3792 + local_irq_disable();
3795 +#ifdef CONFIG_KGDB
3796 + /* catch int1 and int3 over non-root domains */
3797 + else {
3798 +#ifdef CONFIG_X86_32
3799 + if (vector != ex_do_device_not_available)
3800 +#endif
3802 + unsigned int condition = 0;
3804 + if (vector == 1)
3805 + get_debugreg(condition, 6);
3806 + if (!kgdb_handle_exception(vector, SIGTRAP, condition, regs))
3807 + return 1;
3810 +#endif /* CONFIG_KGDB */
3812 + if (unlikely(ipipe_trap_notify(vector, regs))) {
3813 + if (root_entry)
3814 + local_irq_restore_nosync(flags);
3815 + return 1;
3818 + /* see __ipipe_handle_exception */
3819 + if (likely(ipipe_root_domain_p))
3820 + __fixup_if(root_entry ? raw_irqs_disabled_flags(flags) :
3821 + raw_irqs_disabled(), regs);
3822 + /*
3823 + * No need to restore root state in the 64-bit case, the Linux handler
3824 + * and the return code will take care of it.
3825 + */
3827 + return 0;
3830 +int __ipipe_syscall_root(struct pt_regs *regs)
3832 + struct ipipe_percpu_domain_data *p;
3833 + unsigned long flags;
3834 + int ret;
3836 + /*
3837 + * This routine either returns:
3838 + * 0 -- if the syscall is to be passed to Linux;
3839 + * >0 -- if the syscall should not be passed to Linux, and no
3840 + * tail work should be performed;
3841 + * <0 -- if the syscall should not be passed to Linux but the
3842 + * tail work has to be performed (for handling signals etc).
3843 + */
3845 + if (!__ipipe_syscall_watched_p(current, regs->orig_ax) ||
3846 + !__ipipe_event_monitored_p(IPIPE_EVENT_SYSCALL))
3847 + return 0;
3849 + ret = __ipipe_dispatch_event(IPIPE_EVENT_SYSCALL, regs);
3851 + local_irq_save_hw(flags);
3853 + if (current->ipipe_flags & PF_EVTRET) {
3854 + current->ipipe_flags &= ~PF_EVTRET;
3855 + __ipipe_dispatch_event(IPIPE_EVENT_RETURN, regs);
3858 + if (!ipipe_root_domain_p) {
3859 +#ifdef CONFIG_X86_32
3860 + local_irq_restore_hw(flags);
3861 +#endif
3862 + return 1;
3865 + p = ipipe_root_cpudom_ptr();
3866 +#ifdef CONFIG_X86_32
3867 + /*
3868 + * Fix-up only required on 32-bit as only here the IRET return code
3869 + * will evaluate the flags.
3870 + */
3871 + __fixup_if(test_bit(IPIPE_STALL_FLAG, &p->status), regs);
3872 +#endif
3873 + /*
3874 + * If allowed, sync pending VIRQs before _TIF_NEED_RESCHED is
3875 + * tested.
3876 + */
3877 + if (__ipipe_ipending_p(p))
3878 + __ipipe_sync_pipeline();
3879 +#ifdef CONFIG_X86_64
3880 + if (!ret)
3881 +#endif
3882 + local_irq_restore_hw(flags);
3884 + return -ret;
3888 + * __ipipe_handle_irq() -- IPIPE's generic IRQ handler. An optimistic
3889 + * interrupt protection log is maintained here for each domain. Hw
3890 + * interrupts are off on entry.
3891 + */
3892 +int __ipipe_handle_irq(struct pt_regs *regs)
3894 + struct ipipe_domain *this_domain, *next_domain;
3895 + unsigned int vector = regs->orig_ax, irq;
3896 + struct list_head *head, *pos;
3897 + struct pt_regs *tick_regs;
3898 + int m_ack;
3900 + if ((long)regs->orig_ax < 0) {
3901 + vector = ~vector;
3902 +#ifdef CONFIG_X86_LOCAL_APIC
3903 + if (vector >= FIRST_SYSTEM_VECTOR)
3904 + irq = ipipe_apic_vector_irq(vector);
3905 +#ifdef CONFIG_SMP
3906 + else if (vector == IRQ_MOVE_CLEANUP_VECTOR)
3907 + irq = vector;
3908 +#endif /* CONFIG_SMP */
3909 + else
3910 +#endif /* CONFIG_X86_LOCAL_APIC */
3911 + irq = __get_cpu_var(vector_irq)[vector];
3912 + m_ack = 0;
3913 + } else { /* This is a self-triggered one. */
3914 + irq = vector;
3915 + m_ack = 1;
3918 + this_domain = ipipe_current_domain;
3920 + if (test_bit(IPIPE_STICKY_FLAG, &this_domain->irqs[irq].control))
3921 + head = &this_domain->p_link;
3922 + else {
3923 + head = __ipipe_pipeline.next;
3924 + next_domain = list_entry(head, struct ipipe_domain, p_link);
3925 + if (likely(test_bit(IPIPE_WIRED_FLAG, &next_domain->irqs[irq].control))) {
3926 + if (!m_ack && next_domain->irqs[irq].acknowledge)
3927 + next_domain->irqs[irq].acknowledge(irq, irq_to_desc(irq));
3928 + __ipipe_dispatch_wired(next_domain, irq);
3929 + goto finalize_nosync;
3933 + /* Ack the interrupt. */
3935 + pos = head;
3937 + while (pos != &__ipipe_pipeline) {
3938 + next_domain = list_entry(pos, struct ipipe_domain, p_link);
3939 + if (test_bit(IPIPE_HANDLE_FLAG, &next_domain->irqs[irq].control)) {
3940 + __ipipe_set_irq_pending(next_domain, irq);
3941 + if (!m_ack && next_domain->irqs[irq].acknowledge) {
3942 + next_domain->irqs[irq].acknowledge(irq, irq_to_desc(irq));
3943 + m_ack = 1;
3946 + if (!test_bit(IPIPE_PASS_FLAG, &next_domain->irqs[irq].control))
3947 + break;
3948 + pos = next_domain->p_link.next;
3951 + /*
3952 + * If the interrupt preempted the head domain, then do not
3953 + * even try to walk the pipeline, unless an interrupt is
3954 + * pending for it.
3955 + */
3956 + if (test_bit(IPIPE_AHEAD_FLAG, &this_domain->flags) &&
3957 + !__ipipe_ipending_p(ipipe_head_cpudom_ptr()))
3958 + goto finalize_nosync;
3960 + /*
3961 + * Now walk the pipeline, yielding control to the highest
3962 + * priority domain that has pending interrupt(s) or
3963 + * immediately to the current domain if the interrupt has been
3964 + * marked as 'sticky'. This search does not go beyond the
3965 + * current domain in the pipeline.
3966 + */
3968 + __ipipe_walk_pipeline(head);
3970 +finalize_nosync:
3972 + /*
3973 + * Given our deferred dispatching model for regular IRQs, we
3974 + * only record CPU regs for the last timer interrupt, so that
3975 + * the timer handler charges CPU times properly. It is assumed
3976 + * that other interrupt handlers don't actually care for such
3977 + * information.
3978 + */
3980 + if (irq == __ipipe_tick_irq) {
3981 + tick_regs = &__raw_get_cpu_var(__ipipe_tick_regs);
3982 + tick_regs->flags = regs->flags;
3983 + tick_regs->cs = regs->cs;
3984 + tick_regs->ip = regs->ip;
3985 + tick_regs->bp = regs->bp;
3986 +#ifdef CONFIG_X86_64
3987 + tick_regs->ss = regs->ss;
3988 + tick_regs->sp = regs->sp;
3989 +#endif
3990 + if (!__ipipe_root_domain_p)
3991 + tick_regs->flags &= ~X86_EFLAGS_IF;
3994 + if (user_mode(regs) && (current->ipipe_flags & PF_EVTRET) != 0) {
3995 + current->ipipe_flags &= ~PF_EVTRET;
3996 + __ipipe_dispatch_event(IPIPE_EVENT_RETURN, regs);
3999 + if (!__ipipe_root_domain_p ||
4000 + test_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status)))
4001 + return 0;
4003 + return 1;
4006 +int __ipipe_check_tickdev(const char *devname)
4008 +#ifdef CONFIG_X86_LOCAL_APIC
4009 + if (!strcmp(devname, "lapic"))
4010 + return __ipipe_check_lapic();
4011 +#endif
4013 + return 1;
4016 +void *ipipe_irq_handler = __ipipe_handle_irq;
4017 +EXPORT_SYMBOL(ipipe_irq_handler);
4018 +EXPORT_SYMBOL(io_apic_irqs);
4019 +EXPORT_PER_CPU_SYMBOL(__ipipe_tick_regs);
4020 +__attribute__((regparm(3))) void do_notify_resume(struct pt_regs *, void *, __u32);
4021 +EXPORT_SYMBOL(do_notify_resume);
4022 +extern void *sys_call_table;
4023 +EXPORT_SYMBOL(sys_call_table);
4024 +#ifdef CONFIG_X86_32
4025 +extern void ret_from_intr(void);
4026 +EXPORT_SYMBOL(ret_from_intr);
4027 +extern spinlock_t i8259A_lock;
4028 +extern struct desc_struct idt_table[];
4029 +#else
4030 +extern ipipe_spinlock_t i8259A_lock;
4031 +extern gate_desc idt_table[];
4032 +#endif
4033 +EXPORT_PER_CPU_SYMBOL(vector_irq);
4034 +EXPORT_SYMBOL(idt_table);
4035 +EXPORT_SYMBOL(i8259A_lock);
4036 +EXPORT_SYMBOL(__ipipe_sync_stage);
4037 +EXPORT_SYMBOL(kill_proc_info);
4038 +EXPORT_SYMBOL(find_task_by_pid_ns);
4040 +EXPORT_SYMBOL(__ipipe_tick_irq);
4042 +EXPORT_SYMBOL_GPL(irq_to_desc);
4043 +struct task_struct *__switch_to(struct task_struct *prev_p,
4044 + struct task_struct *next_p);
4045 +EXPORT_SYMBOL_GPL(__switch_to);
4046 +EXPORT_SYMBOL_GPL(show_stack);
4048 +EXPORT_PER_CPU_SYMBOL_GPL(init_tss);
4049 +#ifdef CONFIG_SMP
4050 +EXPORT_PER_CPU_SYMBOL_GPL(cpu_tlbstate);
4051 +#endif /* CONFIG_SMP */
4053 +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
4054 +EXPORT_SYMBOL(tasklist_lock);
4055 +#endif /* CONFIG_SMP || CONFIG_DEBUG_SPINLOCK */
4057 +#if defined(CONFIG_CC_STACKPROTECTOR) && defined(CONFIG_X86_64)
4058 +EXPORT_PER_CPU_SYMBOL_GPL(irq_stack_union);
4059 +#endif
4061 +EXPORT_SYMBOL(__ipipe_halt_root);
4062 diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
4063 index 91fd0c7..d56b66f 100644
4064 --- a/arch/x86/kernel/irq.c
4065 +++ b/arch/x86/kernel/irq.c
4066 @@ -38,7 +38,7 @@ void ack_bad_irq(unsigned int irq)
4067 * completely.
4068 * But only ack when the APIC is enabled -AK
4070 - ack_APIC_irq();
4071 + __ack_APIC_irq();
4074 #define irq_stats(x) (&per_cpu(irq_stat, x))
4075 @@ -231,11 +231,12 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
4076 unsigned vector = ~regs->orig_ax;
4077 unsigned irq;
4079 + irq = __get_cpu_var(vector_irq)[vector];
4080 + __ipipe_move_root_irq(irq);
4082 exit_idle();
4083 irq_enter();
4085 - irq = __get_cpu_var(vector_irq)[vector];
4087 if (!handle_irq(irq, regs)) {
4088 ack_APIC_irq();
4090 diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
4091 index 990ae7c..5d371c8 100644
4092 --- a/arch/x86/kernel/irqinit.c
4093 +++ b/arch/x86/kernel/irqinit.c
4094 @@ -176,11 +176,13 @@ static void __init smp_intr_init(void)
4095 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
4096 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
4097 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
4098 +#ifndef CONFIG_IPIPE
4099 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
4100 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
4101 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
4102 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
4103 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
4104 +#endif
4106 /* IPI for generic function call */
4107 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
4108 @@ -195,6 +197,10 @@ static void __init smp_intr_init(void)
4110 /* IPI used for rebooting/stopping */
4111 alloc_intr_gate(REBOOT_VECTOR, reboot_interrupt);
4112 +#if defined(CONFIG_IPIPE) && defined(CONFIG_X86_32)
4113 + /* IPI for critical lock */
4114 + alloc_intr_gate(IPIPE_CRITICAL_VECTOR, ipipe_ipiX);
4115 +#endif
4116 #endif
4117 #endif /* CONFIG_SMP */
4119 @@ -229,6 +235,12 @@ static void __init apic_intr_init(void)
4120 alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt);
4121 # endif
4123 +#if defined(CONFIG_IPIPE) && defined(CONFIG_X86_32)
4124 + alloc_intr_gate(IPIPE_SERVICE_VECTOR0, ipipe_ipi0);
4125 + alloc_intr_gate(IPIPE_SERVICE_VECTOR1, ipipe_ipi1);
4126 + alloc_intr_gate(IPIPE_SERVICE_VECTOR2, ipipe_ipi2);
4127 + alloc_intr_gate(IPIPE_SERVICE_VECTOR3, ipipe_ipi3);
4128 +#endif
4129 #endif
4132 diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
4133 index e7e3521..fd8bbdb 100644
4134 --- a/arch/x86/kernel/process.c
4135 +++ b/arch/x86/kernel/process.c
4136 @@ -40,6 +40,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
4137 if (ret)
4138 return ret;
4139 fpu_copy(&dst->thread.fpu, &src->thread.fpu);
4140 + } else {
4141 +#ifdef CONFIG_IPIPE
4142 + /* unconditionally allocate, RT domain may need it */
4143 + memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu));
4144 + ret = fpu_alloc(&dst->thread.fpu);
4145 + if (ret)
4146 + return ret;
4147 +#endif
4149 return 0;
4151 @@ -61,6 +69,10 @@ void arch_task_cache_init(void)
4152 kmem_cache_create("task_xstate", xstate_size,
4153 __alignof__(union thread_xstate),
4154 SLAB_PANIC | SLAB_NOTRACK, NULL);
4155 +#ifdef CONFIG_IPIPE
4156 + memset(&current->thread.fpu, 0, sizeof(current->thread.fpu));
4157 + fpu_alloc(&current->thread.fpu);
4158 +#endif
4162 @@ -396,7 +408,7 @@ EXPORT_SYMBOL(default_idle);
4164 void stop_this_cpu(void *dummy)
4166 - local_irq_disable();
4167 + local_irq_disable_hw();
4169 * Remove this CPU:
4171 @@ -623,6 +635,11 @@ static void c1e_idle(void)
4173 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
4175 +#ifdef CONFIG_IPIPE
4176 +#define default_to_mwait force_mwait
4177 +#else
4178 +#define default_to_mwait 1
4179 +#endif
4180 #ifdef CONFIG_SMP
4181 if (pm_idle == poll_idle && smp_num_siblings > 1) {
4182 printk_once(KERN_WARNING "WARNING: polling idle and HT enabled,"
4183 @@ -632,7 +649,7 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
4184 if (pm_idle)
4185 return;
4187 - if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
4188 + if (default_to_mwait && cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
4190 * One CPU supports mwait => All CPUs supports mwait
4192 diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
4193 index 8d12878..60a9773 100644
4194 --- a/arch/x86/kernel/process_32.c
4195 +++ b/arch/x86/kernel/process_32.c
4196 @@ -252,10 +252,12 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
4197 regs->cs = __USER_CS;
4198 regs->ip = new_ip;
4199 regs->sp = new_sp;
4200 +#ifndef CONFIG_IPIPE /* Lazily handled, init_fpu() will reset the state. */
4202 * Free the old FP and other extended state
4204 free_thread_xstate(current);
4205 +#endif
4207 EXPORT_SYMBOL_GPL(start_thread);
4209 @@ -292,7 +294,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
4211 struct thread_struct *prev = &prev_p->thread,
4212 *next = &next_p->thread;
4213 - int cpu = smp_processor_id();
4214 + int cpu = raw_smp_processor_id();
4215 struct tss_struct *tss = &per_cpu(init_tss, cpu);
4216 bool preload_fpu;
4218 diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
4219 index 3c2422a..00aa572 100644
4220 --- a/arch/x86/kernel/process_64.c
4221 +++ b/arch/x86/kernel/process_64.c
4222 @@ -56,6 +56,8 @@ asmlinkage extern void ret_from_fork(void);
4223 DEFINE_PER_CPU(unsigned long, old_rsp);
4224 static DEFINE_PER_CPU(unsigned char, is_idle);
4226 +asmlinkage extern void thread_return(void);
4228 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
4230 void idle_notifier_register(struct notifier_block *n)
4231 @@ -272,6 +274,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
4232 p->thread.sp = (unsigned long) childregs;
4233 p->thread.sp0 = (unsigned long) (childregs+1);
4234 p->thread.usersp = me->thread.usersp;
4235 + p->thread.rip = (unsigned long) thread_return;
4237 set_tsk_thread_flag(p, TIF_FORK);
4239 @@ -338,10 +341,12 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
4240 regs->ss = _ss;
4241 regs->flags = X86_EFLAGS_IF;
4242 set_fs(USER_DS);
4243 +#ifndef CONFIG_IPIPE /* Lazily handled, init_fpu() will reset the state. */
4245 * Free the old FP and other extended state
4247 free_thread_xstate(current);
4248 +#endif
4251 void
4252 @@ -374,7 +379,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
4254 struct thread_struct *prev = &prev_p->thread;
4255 struct thread_struct *next = &next_p->thread;
4256 - int cpu = smp_processor_id();
4257 + int cpu = raw_smp_processor_id();
4258 struct tss_struct *tss = &per_cpu(init_tss, cpu);
4259 unsigned fsindex, gsindex;
4260 bool preload_fpu;
4261 diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
4262 index 70c4872..367f800 100644
4263 --- a/arch/x86/kernel/ptrace.c
4264 +++ b/arch/x86/kernel/ptrace.c
4265 @@ -19,6 +19,7 @@
4266 #include <linux/audit.h>
4267 #include <linux/seccomp.h>
4268 #include <linux/signal.h>
4269 +#include <linux/unistd.h>
4270 #include <linux/perf_event.h>
4271 #include <linux/hw_breakpoint.h>
4273 @@ -1397,6 +1398,10 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs)
4275 bool step;
4277 +#ifdef CONFIG_IPIPE
4278 + if (syscall_get_nr(current, regs) >= NR_syscalls)
4279 + return;
4280 +#endif
4281 if (unlikely(current->audit_context))
4282 audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
4284 diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
4285 index d801210..88b0758 100644
4286 --- a/arch/x86/kernel/smp.c
4287 +++ b/arch/x86/kernel/smp.c
4288 @@ -185,9 +185,9 @@ static void native_smp_send_stop(void)
4289 udelay(1);
4292 - local_irq_save(flags);
4293 + local_irq_save_hw(flags);
4294 disable_local_APIC();
4295 - local_irq_restore(flags);
4296 + local_irq_restore_hw(flags);
4300 diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
4301 index 0bf2ece..d58fb5e 100644
4302 --- a/arch/x86/kernel/smpboot.c
4303 +++ b/arch/x86/kernel/smpboot.c
4304 @@ -293,7 +293,7 @@ static void __cpuinit smp_callin(void)
4306 * Activate a secondary processor.
4308 -notrace static void __cpuinit start_secondary(void *unused)
4309 +static void __cpuinit start_secondary(void *unused)
4312 * Don't put *anything* before cpu_init(), SMP booting is too
4313 @@ -891,7 +891,7 @@ do_rest:
4314 int __cpuinit native_cpu_up(unsigned int cpu)
4316 int apicid = apic->cpu_present_to_apicid(cpu);
4317 - unsigned long flags;
4318 + unsigned long flags, _flags;
4319 int err;
4321 WARN_ON(irqs_disabled());
4322 @@ -943,9 +943,9 @@ int __cpuinit native_cpu_up(unsigned int cpu)
4323 * Check TSC synchronization with the AP (keep irqs disabled
4324 * while doing so):
4326 - local_irq_save(flags);
4327 + local_irq_save_full(flags, _flags);
4328 check_tsc_sync_source(cpu);
4329 - local_irq_restore(flags);
4330 + local_irq_restore_full(flags, _flags);
4332 while (!cpu_online(cpu)) {
4333 cpu_relax();
4334 diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
4335 index fb5cc5e..6491f29 100644
4336 --- a/arch/x86/kernel/time.c
4337 +++ b/arch/x86/kernel/time.c
4338 @@ -60,6 +60,8 @@ EXPORT_SYMBOL(profile_pc);
4340 static irqreturn_t timer_interrupt(int irq, void *dev_id)
4342 + unsigned long flags;
4344 /* Keep nmi watchdog up to date */
4345 inc_irq_stat(irq0_irqs);
4347 @@ -70,11 +72,11 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
4348 * manually to deassert NMI lines for the watchdog if run
4349 * on an 82489DX-based system.
4351 - raw_spin_lock(&i8259A_lock);
4352 + raw_spin_lock_irqsave(&i8259A_lock, flags);
4353 outb(0x0c, PIC_MASTER_OCW3);
4354 /* Ack the IRQ; AEOI will end it automatically. */
4355 inb(PIC_MASTER_POLL);
4356 - raw_spin_unlock(&i8259A_lock);
4357 + raw_spin_unlock_irqrestore(&i8259A_lock, flags);
4360 global_clock_event->event_handler(global_clock_event);
4361 diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
4362 index 725ef4d..a2fd3e7 100644
4363 --- a/arch/x86/kernel/traps.c
4364 +++ b/arch/x86/kernel/traps.c
4365 @@ -725,6 +725,7 @@ void __math_state_restore(void)
4367 if (unlikely(restore_fpu_checking(tsk))) {
4368 stts();
4369 + local_irq_enable_hw_cond();
4370 force_sig(SIGSEGV, tsk);
4371 return;
4373 @@ -747,6 +748,7 @@ asmlinkage void math_state_restore(void)
4375 struct thread_info *thread = current_thread_info();
4376 struct task_struct *tsk = thread->task;
4377 + unsigned long flags;
4379 if (!tsk_used_math(tsk)) {
4380 local_irq_enable();
4381 @@ -763,9 +765,11 @@ asmlinkage void math_state_restore(void)
4382 local_irq_disable();
4385 + local_irq_save_hw_cond(flags);
4386 clts(); /* Allow maths ops (or we recurse) */
4388 __math_state_restore();
4389 + local_irq_restore_hw_cond(flags);
4391 EXPORT_SYMBOL_GPL(math_state_restore);
4393 diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
4394 index 4094ae0..cf46117 100644
4395 --- a/arch/x86/kernel/tsc.c
4396 +++ b/arch/x86/kernel/tsc.c
4397 @@ -737,7 +737,7 @@ core_initcall(cpufreq_tsc);
4399 /* clocksource code */
4401 -static struct clocksource clocksource_tsc;
4402 +struct clocksource clocksource_tsc;
4405 * We compare the TSC to the cycle_last value in the clocksource
4406 @@ -783,7 +783,7 @@ static void resume_tsc(struct clocksource *cs)
4407 clocksource_tsc.cycle_last = 0;
4410 -static struct clocksource clocksource_tsc = {
4411 +struct clocksource clocksource_tsc = {
4412 .name = "tsc",
4413 .rating = 300,
4414 .read = read_tsc,
4415 diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
4416 index 5ffb562..81fa5f4 100644
4417 --- a/arch/x86/kernel/vm86_32.c
4418 +++ b/arch/x86/kernel/vm86_32.c
4419 @@ -148,12 +148,14 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs)
4420 do_exit(SIGSEGV);
4423 + local_irq_disable_hw_cond();
4424 tss = &per_cpu(init_tss, get_cpu());
4425 current->thread.sp0 = current->thread.saved_sp0;
4426 current->thread.sysenter_cs = __KERNEL_CS;
4427 load_sp0(tss, &current->thread);
4428 current->thread.saved_sp0 = 0;
4429 put_cpu();
4430 + local_irq_enable_hw_cond();
4432 ret = KVM86->regs32;
4434 @@ -323,12 +325,14 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
4435 tsk->thread.saved_fs = info->regs32->fs;
4436 tsk->thread.saved_gs = get_user_gs(info->regs32);
4438 + local_irq_disable_hw_cond();
4439 tss = &per_cpu(init_tss, get_cpu());
4440 tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0;
4441 if (cpu_has_sep)
4442 tsk->thread.sysenter_cs = 0;
4443 load_sp0(tss, &tsk->thread);
4444 put_cpu();
4445 + local_irq_enable_hw_cond();
4447 tsk->thread.screen_bitmap = info->screen_bitmap;
4448 if (info->flags & VM86_SCREEN_BITMAP)
4449 diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
4450 index 1c0c6ab..7a1ca74 100644
4451 --- a/arch/x86/kernel/vsyscall_64.c
4452 +++ b/arch/x86/kernel/vsyscall_64.c
4453 @@ -32,6 +32,7 @@
4454 #include <linux/cpu.h>
4455 #include <linux/smp.h>
4456 #include <linux/notifier.h>
4457 +#include <linux/ipipe_tickdev.h>
4459 #include <asm/vsyscall.h>
4460 #include <asm/pgtable.h>
4461 @@ -90,6 +91,9 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock,
4462 vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic;
4463 vsyscall_gtod_data.wall_time_coarse = __current_kernel_time();
4464 write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
4466 + if (clock == &clocksource_tsc)
4467 + ipipe_update_hostrt(wall_time, clock);
4470 /* RED-PEN may want to readd seq locking, but then the variable should be
4471 diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c
4472 index c9f2d9b..78d780a 100644
4473 --- a/arch/x86/lib/mmx_32.c
4474 +++ b/arch/x86/lib/mmx_32.c
4475 @@ -30,7 +30,7 @@ void *_mmx_memcpy(void *to, const void *from, size_t len)
4476 void *p;
4477 int i;
4479 - if (unlikely(in_interrupt()))
4480 + if (unlikely(!ipipe_root_domain_p || in_interrupt()))
4481 return __memcpy(to, from, len);
4483 p = to;
4484 diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
4485 index bf9a7d5..98609ae 100644
4486 --- a/arch/x86/lib/thunk_64.S
4487 +++ b/arch/x86/lib/thunk_64.S
4488 @@ -65,6 +65,10 @@
4489 thunk lockdep_sys_exit_thunk,lockdep_sys_exit
4490 #endif
4492 +#ifdef CONFIG_IPIPE
4493 + thunk_retrax __ipipe_syscall_root_thunk,__ipipe_syscall_root
4494 +#endif
4496 /* SAVE_ARGS below is used only for the .cfi directives it contains. */
4497 CFI_STARTPROC
4498 SAVE_ARGS
4499 diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
4500 index 4c4508e..78369cb 100644
4501 --- a/arch/x86/mm/fault.c
4502 +++ b/arch/x86/mm/fault.c
4503 @@ -358,9 +358,9 @@ void vmalloc_sync_all(void)
4505 * This assumes no large pages in there.
4507 -static noinline __kprobes int vmalloc_fault(unsigned long address)
4508 +static inline int vmalloc_sync_one(pgd_t *pgd, unsigned long address)
4510 - pgd_t *pgd, *pgd_ref;
4511 + pgd_t *pgd_ref;
4512 pud_t *pud, *pud_ref;
4513 pmd_t *pmd, *pmd_ref;
4514 pte_t *pte, *pte_ref;
4515 @@ -374,7 +374,6 @@ static noinline __kprobes int vmalloc_fault(unsigned long address)
4516 * happen within a race in page table update. In the later
4517 * case just flush:
4519 - pgd = pgd_offset(current->active_mm, address);
4520 pgd_ref = pgd_offset_k(address);
4521 if (pgd_none(*pgd_ref))
4522 return -1;
4523 @@ -422,6 +421,12 @@ static noinline __kprobes int vmalloc_fault(unsigned long address)
4524 return 0;
4527 +static noinline __kprobes int vmalloc_fault(unsigned long address)
4529 + pgd_t *pgd = pgd_offset(current->active_mm, address);
4530 + return vmalloc_sync_one(pgd, address);
4533 static const char errata93_warning[] =
4534 KERN_ERR
4535 "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
4536 @@ -961,6 +966,9 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
4537 /* Get the faulting address: */
4538 address = read_cr2();
4540 + if (!__ipipe_pipeline_head_p(ipipe_root_domain))
4541 + local_irq_enable_hw_cond();
4544 * Detect and handle instructions that would cause a page fault for
4545 * both a tracked kernel page and a userspace page.
4546 @@ -1140,3 +1148,43 @@ good_area:
4548 up_read(&mm->mmap_sem);
4551 +#ifdef CONFIG_IPIPE
4552 +void __ipipe_pin_range_globally(unsigned long start, unsigned long end)
4554 +#ifdef CONFIG_X86_32
4555 + unsigned long next, addr = start;
4557 + do {
4558 + unsigned long flags;
4559 + struct page *page;
4561 + next = pgd_addr_end(addr, end);
4562 + spin_lock_irqsave(&pgd_lock, flags);
4563 + list_for_each_entry(page, &pgd_list, lru)
4564 + vmalloc_sync_one(page_address(page), addr);
4565 + spin_unlock_irqrestore(&pgd_lock, flags);
4567 + } while (addr = next, addr != end);
4568 +#else
4569 + unsigned long next, addr = start;
4570 + int ret = 0;
4572 + do {
4573 + struct page *page;
4575 + next = pgd_addr_end(addr, end);
4576 + spin_lock(&pgd_lock);
4577 + list_for_each_entry(page, &pgd_list, lru) {
4578 + pgd_t *pgd;
4579 + pgd = (pgd_t *)page_address(page) + pgd_index(addr);
4580 + ret = vmalloc_sync_one(pgd, addr);
4581 + if (ret)
4582 + break;
4584 + spin_unlock(&pgd_lock);
4585 + addr = next;
4586 + } while (!ret && addr != end);
4587 +#endif
4589 +#endif /* CONFIG_IPIPE */
4590 diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
4591 index 426f3a1..93e359a 100644
4592 --- a/arch/x86/mm/tlb.c
4593 +++ b/arch/x86/mm/tlb.c
4594 @@ -58,11 +58,15 @@ static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS];
4596 void leave_mm(int cpu)
4598 + unsigned long flags;
4600 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
4601 BUG();
4602 + local_irq_save_hw_cond(flags);
4603 cpumask_clear_cpu(cpu,
4604 mm_cpumask(percpu_read(cpu_tlbstate.active_mm)));
4605 load_cr3(swapper_pg_dir);
4606 + local_irq_restore_hw_cond(flags);
4608 EXPORT_SYMBOL_GPL(leave_mm);
4610 @@ -193,6 +197,9 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
4611 apic->send_IPI_mask(to_cpumask(f->flush_cpumask),
4612 INVALIDATE_TLB_VECTOR_START + sender);
4614 +#ifdef CONFIG_IPIPE
4615 + WARN_ON_ONCE(irqs_disabled_hw());
4616 +#endif
4617 while (!cpumask_empty(to_cpumask(f->flush_cpumask)))
4618 cpu_relax();
4620 diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c
4621 index 98abf8b..7cb0a86 100644
4622 --- a/drivers/pci/htirq.c
4623 +++ b/drivers/pci/htirq.c
4624 @@ -20,7 +20,7 @@
4625 * With multiple simultaneous hypertransport irq devices it might pay
4626 * to make this more fine grained. But start with simple, stupid, and correct.
4628 -static DEFINE_SPINLOCK(ht_irq_lock);
4629 +static IPIPE_DEFINE_SPINLOCK(ht_irq_lock);
4631 struct ht_irq_cfg {
4632 struct pci_dev *dev;
4633 diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
4634 index 891e1dd..95f42db 100644
4635 --- a/drivers/serial/8250.c
4636 +++ b/drivers/serial/8250.c
4637 @@ -3051,6 +3051,53 @@ static int serial8250_resume(struct platform_device *dev)
4638 return 0;
4641 +#if defined(CONFIG_IPIPE_DEBUG) && defined(CONFIG_SERIAL_8250_CONSOLE)
4643 +#include <stdarg.h>
4645 +void __weak __ipipe_serial_debug(const char *fmt, ...)
4647 + struct uart_8250_port *up = &serial8250_ports[0];
4648 + unsigned int ier, count;
4649 + unsigned long flags;
4650 + char buf[128];
4651 + va_list ap;
4653 + va_start(ap, fmt);
4654 + vsprintf(buf, fmt, ap);
4655 + va_end(ap);
4656 + count = strlen(buf);
4658 + touch_nmi_watchdog();
4660 + local_irq_save_hw(flags);
4662 + /*
4663 + * First save the IER then disable the interrupts
4664 + */
4665 + ier = serial_in(up, UART_IER);
4667 + if (up->capabilities & UART_CAP_UUE)
4668 + serial_out(up, UART_IER, UART_IER_UUE);
4669 + else
4670 + serial_out(up, UART_IER, 0);
4672 + uart_console_write(&up->port, buf, count, serial8250_console_putchar);
4674 + /*
4675 + * Finally, wait for transmitter to become empty
4676 + * and restore the IER
4677 + */
4678 + wait_for_xmitr(up, BOTH_EMPTY);
4679 + serial_out(up, UART_IER, ier);
4681 + local_irq_restore_hw(flags);
4684 +EXPORT_SYMBOL(__ipipe_serial_debug);
4686 +#endif
4688 static struct platform_driver serial8250_isa_driver = {
4689 .probe = serial8250_probe,
4690 .remove = __devexit_p(serial8250_remove),
4691 diff --git a/fs/exec.c b/fs/exec.c
4692 index e19de6a..b04d039 100644
4693 --- a/fs/exec.c
4694 +++ b/fs/exec.c
4695 @@ -719,6 +719,7 @@ static int exec_mmap(struct mm_struct *mm)
4697 struct task_struct *tsk;
4698 struct mm_struct * old_mm, *active_mm;
4699 + unsigned long flags;
4701 /* Notify parent that we're no longer interested in the old VM */
4702 tsk = current;
4703 @@ -742,8 +743,10 @@ static int exec_mmap(struct mm_struct *mm)
4704 task_lock(tsk);
4705 active_mm = tsk->active_mm;
4706 tsk->mm = mm;
4707 + ipipe_mm_switch_protect(flags);
4708 tsk->active_mm = mm;
4709 activate_mm(active_mm, mm);
4710 + ipipe_mm_switch_unprotect(flags);
4711 task_unlock(tsk);
4712 arch_pick_mmap_layout(mm);
4713 if (old_mm) {
4714 diff --git a/fs/proc/array.c b/fs/proc/array.c
4715 index fff6572..fc6bd22 100644
4716 --- a/fs/proc/array.c
4717 +++ b/fs/proc/array.c
4718 @@ -142,6 +142,10 @@ static const char *task_state_array[] = {
4719 "x (dead)", /* 64 */
4720 "K (wakekill)", /* 128 */
4721 "W (waking)", /* 256 */
4722 +#ifdef CONFIG_IPIPE
4723 + "A (atomic switch)", /* 512 */
4724 + "N (wakeup disabled)", /* 1024 */
4725 +#endif
4728 static inline const char *get_task_state(struct task_struct *tsk)
4729 diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
4730 index 058129e..3fac94d 100644
4731 --- a/include/asm-generic/atomic.h
4732 +++ b/include/asm-generic/atomic.h
4733 @@ -57,11 +57,11 @@ static inline int atomic_add_return(int i, atomic_t *v)
4734 unsigned long flags;
4735 int temp;
4737 - local_irq_save(flags);
4738 + local_irq_save_hw(flags);
4739 temp = v->counter;
4740 temp += i;
4741 v->counter = temp;
4742 - local_irq_restore(flags);
4743 + local_irq_restore_hw(flags);
4745 return temp;
4747 @@ -78,11 +78,11 @@ static inline int atomic_sub_return(int i, atomic_t *v)
4748 unsigned long flags;
4749 int temp;
4751 - local_irq_save(flags);
4752 + local_irq_save_hw(flags);
4753 temp = v->counter;
4754 temp -= i;
4755 v->counter = temp;
4756 - local_irq_restore(flags);
4757 + local_irq_restore_hw(flags);
4759 return temp;
4761 @@ -135,9 +135,9 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr)
4762 unsigned long flags;
4764 mask = ~mask;
4765 - local_irq_save(flags);
4766 + local_irq_save_hw(flags);
4767 *addr &= mask;
4768 - local_irq_restore(flags);
4769 + local_irq_restore_hw(flags);
4772 #define atomic_xchg(ptr, v) (xchg(&(ptr)->counter, (v)))
4773 diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h
4774 index ecc44a8..5caf6e9 100644
4775 --- a/include/asm-generic/bitops/atomic.h
4776 +++ b/include/asm-generic/bitops/atomic.h
4777 @@ -21,20 +21,20 @@ extern arch_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned;
4778 * this is the substitute */
4779 #define _atomic_spin_lock_irqsave(l,f) do { \
4780 arch_spinlock_t *s = ATOMIC_HASH(l); \
4781 - local_irq_save(f); \
4782 + local_irq_save_hw(f); \
4783 arch_spin_lock(s); \
4784 } while(0)
4786 #define _atomic_spin_unlock_irqrestore(l,f) do { \
4787 arch_spinlock_t *s = ATOMIC_HASH(l); \
4788 arch_spin_unlock(s); \
4789 - local_irq_restore(f); \
4790 + local_irq_restore_hw(f); \
4791 } while(0)
4794 #else
4795 -# define _atomic_spin_lock_irqsave(l,f) do { local_irq_save(f); } while (0)
4796 -# define _atomic_spin_unlock_irqrestore(l,f) do { local_irq_restore(f); } while (0)
4797 +# define _atomic_spin_lock_irqsave(l,f) do { local_irq_save_hw(f); } while (0)
4798 +# define _atomic_spin_unlock_irqrestore(l,f) do { local_irq_restore_hw(f); } while (0)
4799 #endif
4802 diff --git a/include/asm-generic/cmpxchg-local.h b/include/asm-generic/cmpxchg-local.h
4803 index b2ba2fc..ed01ab9 100644
4804 --- a/include/asm-generic/cmpxchg-local.h
4805 +++ b/include/asm-generic/cmpxchg-local.h
4806 @@ -20,7 +20,7 @@ static inline unsigned long __cmpxchg_local_generic(volatile void *ptr,
4807 if (size == 8 && sizeof(unsigned long) != 8)
4808 wrong_size_cmpxchg(ptr);
4810 - local_irq_save(flags);
4811 + local_irq_save_hw(flags);
4812 switch (size) {
4813 case 1: prev = *(u8 *)ptr;
4814 if (prev == old)
4815 @@ -41,7 +41,7 @@ static inline unsigned long __cmpxchg_local_generic(volatile void *ptr,
4816 default:
4817 wrong_size_cmpxchg(ptr);
4819 - local_irq_restore(flags);
4820 + local_irq_restore_hw(flags);
4821 return prev;
4824 @@ -54,11 +54,11 @@ static inline u64 __cmpxchg64_local_generic(volatile void *ptr,
4825 u64 prev;
4826 unsigned long flags;
4828 - local_irq_save(flags);
4829 + local_irq_save_hw(flags);
4830 prev = *(u64 *)ptr;
4831 if (prev == old)
4832 *(u64 *)ptr = new;
4833 - local_irq_restore(flags);
4834 + local_irq_restore_hw(flags);
4835 return prev;
4838 diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
4839 index b5043a9..52c56ea 100644
4840 --- a/include/asm-generic/percpu.h
4841 +++ b/include/asm-generic/percpu.h
4842 @@ -63,6 +63,20 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
4843 #define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset)
4844 #define __this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset)
4846 +#ifdef CONFIG_IPIPE
4847 +#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP)
4848 +extern int __ipipe_check_percpu_access(void);
4849 +#define __ipipe_local_cpu_offset \
4850 + ({ \
4851 + WARN_ON_ONCE(__ipipe_check_percpu_access()); \
4852 + __my_cpu_offset; \
4853 + })
4854 +#else
4855 +#define __ipipe_local_cpu_offset __my_cpu_offset
4856 +#endif
4857 +#define __ipipe_get_cpu_var(var) \
4858 + (*SHIFT_PERCPU_PTR(&(var), __ipipe_local_cpu_offset))
4859 +#endif /* CONFIG_IPIPE */
4861 #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
4862 extern void setup_per_cpu_areas(void);
4863 @@ -73,6 +87,7 @@ extern void setup_per_cpu_areas(void);
4864 #define per_cpu(var, cpu) (*((void)(cpu), &(var)))
4865 #define __get_cpu_var(var) (var)
4866 #define __raw_get_cpu_var(var) (var)
4867 +#define __ipipe_get_cpu_var(var) __raw_get_cpu_var(var)
4868 #define this_cpu_ptr(ptr) per_cpu_ptr(ptr, 0)
4869 #define __this_cpu_ptr(ptr) this_cpu_ptr(ptr)
4871 diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
4872 index d5b3876..92b7abe 100644
4873 --- a/include/linux/hardirq.h
4874 +++ b/include/linux/hardirq.h
4875 @@ -209,6 +209,7 @@ extern void irq_exit(void);
4877 #define nmi_enter() \
4878 do { \
4879 + ipipe_nmi_enter(); \
4880 ftrace_nmi_enter(); \
4881 BUG_ON(in_nmi()); \
4882 add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \
4883 @@ -225,6 +226,7 @@ extern void irq_exit(void);
4884 BUG_ON(!in_nmi()); \
4885 sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \
4886 ftrace_nmi_exit(); \
4887 + ipipe_nmi_exit(); \
4888 } while (0)
4890 #endif /* LINUX_HARDIRQ_H */
4891 diff --git a/include/linux/ipipe.h b/include/linux/ipipe.h
4892 new file mode 100644
4893 index 0000000..4e04013
4894 --- /dev/null
4895 +++ b/include/linux/ipipe.h
4896 @@ -0,0 +1,780 @@
4897 +/* -*- linux-c -*-
4898 + * include/linux/ipipe.h
4900 + * Copyright (C) 2002-2007 Philippe Gerum.
4902 + * This program is free software; you can redistribute it and/or modify
4903 + * it under the terms of the GNU General Public License as published by
4904 + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
4905 + * USA; either version 2 of the License, or (at your option) any later
4906 + * version.
4908 + * This program is distributed in the hope that it will be useful,
4909 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
4910 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4911 + * GNU General Public License for more details.
4913 + * You should have received a copy of the GNU General Public License
4914 + * along with this program; if not, write to the Free Software
4915 + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
4916 + */
4918 +#ifndef __LINUX_IPIPE_H
4919 +#define __LINUX_IPIPE_H
4921 +#include <linux/spinlock.h>
4922 +#include <linux/cache.h>
4923 +#include <linux/percpu.h>
4924 +#include <linux/mutex.h>
4925 +#include <linux/linkage.h>
4926 +#include <linux/ipipe_base.h>
4927 +#include <asm/ipipe.h>
4928 +#include <asm/bug.h>
4930 +#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
4932 +#include <linux/cpumask.h>
4933 +#include <asm/system.h>
4935 +static inline int ipipe_disable_context_check(int cpu)
4937 + return xchg(&per_cpu(ipipe_percpu_context_check, cpu), 0);
4940 +static inline void ipipe_restore_context_check(int cpu, int old_state)
4942 + per_cpu(ipipe_percpu_context_check, cpu) = old_state;
4945 +static inline void ipipe_context_check_off(void)
4947 + int cpu;
4948 + for_each_online_cpu(cpu)
4949 + per_cpu(ipipe_percpu_context_check, cpu) = 0;
4952 +#else /* !CONFIG_IPIPE_DEBUG_CONTEXT */
4954 +static inline int ipipe_disable_context_check(int cpu)
4956 + return 0;
4959 +static inline void ipipe_restore_context_check(int cpu, int old_state) { }
4961 +static inline void ipipe_context_check_off(void) { }
4963 +#endif /* !CONFIG_IPIPE_DEBUG_CONTEXT */
4965 +#ifdef CONFIG_IPIPE_DEBUG_INTERNAL
4966 +#define IPIPE_WARN(c) WARN_ON(c)
4967 +#define IPIPE_WARN_ONCE(c) WARN_ON_ONCE(c)
4968 +#else
4969 +#define IPIPE_WARN(c) do { (void)(c); } while (0)
4970 +#define IPIPE_WARN_ONCE(c) do { (void)(c); } while (0)
4971 +#endif
4973 +#ifdef CONFIG_IPIPE
4975 +#define IPIPE_VERSION_STRING IPIPE_ARCH_STRING
4976 +#define IPIPE_RELEASE_NUMBER ((IPIPE_MAJOR_NUMBER << 16) | \
4977 + (IPIPE_MINOR_NUMBER << 8) | \
4978 + (IPIPE_PATCH_NUMBER))
4980 +#ifndef BROKEN_BUILTIN_RETURN_ADDRESS
4981 +#define __BUILTIN_RETURN_ADDRESS0 ((unsigned long)__builtin_return_address(0))
4982 +#define __BUILTIN_RETURN_ADDRESS1 ((unsigned long)__builtin_return_address(1))
4983 +#endif /* !BUILTIN_RETURN_ADDRESS */
4985 +#define IPIPE_ROOT_PRIO 100
4986 +#define IPIPE_ROOT_ID 0
4987 +#define IPIPE_ROOT_NPTDKEYS 4 /* Must be <= BITS_PER_LONG */
4989 +#define IPIPE_RESET_TIMER 0x1
4990 +#define IPIPE_GRAB_TIMER 0x2
4992 +/* Global domain flags */
4993 +#define IPIPE_SPRINTK_FLAG 0 /* Synchronous printk() allowed */
4994 +#define IPIPE_AHEAD_FLAG 1 /* Domain always heads the pipeline */
4996 +/* Interrupt control bits */
4997 +#define IPIPE_HANDLE_FLAG 0
4998 +#define IPIPE_PASS_FLAG 1
4999 +#define IPIPE_ENABLE_FLAG 2
5000 +#define IPIPE_DYNAMIC_FLAG IPIPE_HANDLE_FLAG
5001 +#define IPIPE_STICKY_FLAG 3
5002 +#define IPIPE_SYSTEM_FLAG 4
5003 +#define IPIPE_LOCK_FLAG 5
5004 +#define IPIPE_WIRED_FLAG 6
5005 +#define IPIPE_EXCLUSIVE_FLAG 7
5007 +#define IPIPE_HANDLE_MASK (1 << IPIPE_HANDLE_FLAG)
5008 +#define IPIPE_PASS_MASK (1 << IPIPE_PASS_FLAG)
5009 +#define IPIPE_ENABLE_MASK (1 << IPIPE_ENABLE_FLAG)
5010 +#define IPIPE_DYNAMIC_MASK IPIPE_HANDLE_MASK
5011 +#define IPIPE_STICKY_MASK (1 << IPIPE_STICKY_FLAG)
5012 +#define IPIPE_SYSTEM_MASK (1 << IPIPE_SYSTEM_FLAG)
5013 +#define IPIPE_LOCK_MASK (1 << IPIPE_LOCK_FLAG)
5014 +#define IPIPE_WIRED_MASK (1 << IPIPE_WIRED_FLAG)
5015 +#define IPIPE_EXCLUSIVE_MASK (1 << IPIPE_EXCLUSIVE_FLAG)
5017 +#define IPIPE_DEFAULT_MASK (IPIPE_HANDLE_MASK|IPIPE_PASS_MASK)
5018 +#define IPIPE_STDROOT_MASK (IPIPE_HANDLE_MASK|IPIPE_PASS_MASK|IPIPE_SYSTEM_MASK)
5020 +#define IPIPE_EVENT_SELF 0x80000000
5022 +#define IPIPE_NR_CPUS NR_CPUS
5024 +/* This accessor assumes hw IRQs are off on SMP; allows assignment. */
5025 +#define __ipipe_current_domain __ipipe_get_cpu_var(ipipe_percpu_domain)
5026 +/* This read-only accessor makes sure that hw IRQs are off on SMP. */
5027 +#define ipipe_current_domain \
5028 + ({ \
5029 + struct ipipe_domain *__ipd__; \
5030 + unsigned long __flags__; \
5031 + local_irq_save_hw_smp(__flags__); \
5032 + __ipd__ = __ipipe_current_domain; \
5033 + local_irq_restore_hw_smp(__flags__); \
5034 + __ipd__; \
5035 + })
5037 +#define ipipe_virtual_irq_p(irq) ((irq) >= IPIPE_VIRQ_BASE && \
5038 + (irq) < IPIPE_NR_IRQS)
5040 +#define IPIPE_SAME_HANDLER ((ipipe_irq_handler_t)(-1))
5042 +struct irq_desc;
5044 +typedef void (*ipipe_irq_ackfn_t)(unsigned irq, struct irq_desc *desc);
5046 +typedef int (*ipipe_event_handler_t)(unsigned event,
5047 + struct ipipe_domain *from,
5048 + void *data);
5049 +struct ipipe_domain {
5051 + int slot; /* Slot number in percpu domain data array. */
5052 + struct list_head p_link; /* Link in pipeline */
5053 + ipipe_event_handler_t evhand[IPIPE_NR_EVENTS]; /* Event handlers. */
5054 + unsigned long long evself; /* Self-monitored event bits. */
5056 + struct ipipe_irqdesc {
5057 + unsigned long control;
5058 + ipipe_irq_ackfn_t acknowledge;
5059 + ipipe_irq_handler_t handler;
5060 + void *cookie;
5061 + } ____cacheline_aligned irqs[IPIPE_NR_IRQS];
5063 + int priority;
5064 + void *pdd;
5065 + unsigned long flags;
5066 + unsigned domid;
5067 + const char *name;
5068 + struct mutex mutex;
5071 +#define IPIPE_HEAD_PRIORITY (-1) /* For domains always heading the pipeline */
5073 +struct ipipe_domain_attr {
5075 + unsigned domid; /* Domain identifier -- Magic value set by caller */
5076 + const char *name; /* Domain name -- Warning: won't be dup'ed! */
5077 + int priority; /* Priority in interrupt pipeline */
5078 + void (*entry) (void); /* Domain entry point */
5079 + void *pdd; /* Per-domain (opaque) data pointer */
5082 +#define __ipipe_irq_cookie(ipd, irq) (ipd)->irqs[irq].cookie
5083 +#define __ipipe_irq_handler(ipd, irq) (ipd)->irqs[irq].handler
5084 +#define __ipipe_cpudata_irq_hits(ipd, cpu, irq) ipipe_percpudom(ipd, irqall, cpu)[irq]
5086 +extern unsigned __ipipe_printk_virq;
5088 +extern unsigned long __ipipe_virtual_irq_map;
5090 +extern struct list_head __ipipe_pipeline;
5092 +extern int __ipipe_event_monitors[];
5094 +typedef void (*ipipe_root_preempt_handler_t)(void *cookie);
5096 +DECLARE_PER_CPU(ipipe_root_preempt_handler_t, __ipipe_root_preempt_handler);
5097 +DECLARE_PER_CPU(void *, __ipipe_root_preempt_cookie);
5099 +/* Private interface */
5101 +void ipipe_init_early(void);
5103 +void ipipe_init(void);
5105 +#ifdef CONFIG_PROC_FS
5106 +void ipipe_init_proc(void);
5108 +#ifdef CONFIG_IPIPE_TRACE
5109 +void __ipipe_init_tracer(void);
5110 +#else /* !CONFIG_IPIPE_TRACE */
5111 +#define __ipipe_init_tracer() do { } while(0)
5112 +#endif /* CONFIG_IPIPE_TRACE */
5114 +#else /* !CONFIG_PROC_FS */
5115 +#define ipipe_init_proc() do { } while(0)
5116 +#endif /* CONFIG_PROC_FS */
5118 +void __ipipe_init_stage(struct ipipe_domain *ipd);
5120 +void __ipipe_cleanup_domain(struct ipipe_domain *ipd);
5122 +void __ipipe_add_domain_proc(struct ipipe_domain *ipd);
5124 +void __ipipe_remove_domain_proc(struct ipipe_domain *ipd);
5126 +void __ipipe_flush_printk(unsigned irq, void *cookie);
5128 +void __ipipe_walk_pipeline(struct list_head *pos);
5130 +void __ipipe_pend_irq(unsigned irq, struct list_head *head);
5132 +int __ipipe_dispatch_event(unsigned event, void *data);
5134 +void __ipipe_dispatch_wired_nocheck(struct ipipe_domain *head, unsigned irq);
5136 +void __ipipe_dispatch_wired(struct ipipe_domain *head, unsigned irq);
5138 +void __ipipe_sync_stage(void);
5140 +void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned irq);
5142 +void __ipipe_lock_irq(struct ipipe_domain *ipd, int cpu, unsigned irq);
5144 +void __ipipe_unlock_irq(struct ipipe_domain *ipd, unsigned irq);
5146 +void __ipipe_pin_range_globally(unsigned long start, unsigned long end);
5148 +/* Must be called hw IRQs off. */
5149 +static inline void ipipe_irq_lock(unsigned irq)
5151 + __ipipe_lock_irq(__ipipe_current_domain, ipipe_processor_id(), irq);
5154 +/* Must be called hw IRQs off. */
5155 +static inline void ipipe_irq_unlock(unsigned irq)
5157 + __ipipe_unlock_irq(__ipipe_current_domain, irq);
5160 +#ifndef __ipipe_sync_pipeline
5161 +#define __ipipe_sync_pipeline() __ipipe_sync_stage()
5162 +#endif
5164 +#ifndef __ipipe_do_root_xirq
5165 +#define __ipipe_do_root_xirq(ipd, irq) \
5166 + (ipd)->irqs[irq].handler(irq, (ipd)->irqs[irq].cookie)
5167 +#endif
5169 +#ifndef __ipipe_do_root_virq
5170 +#define __ipipe_do_root_virq(ipd, irq) \
5171 + (ipd)->irqs[irq].handler(irq, (ipd)->irqs[irq].cookie)
5172 +#endif
5174 +#ifndef __ipipe_run_irqtail
5175 +#define __ipipe_run_irqtail(irq) do { } while(0)
5176 +#endif
5178 +#define __ipipe_pipeline_head_p(ipd) (&(ipd)->p_link == __ipipe_pipeline.next)
5180 +#define __ipipe_ipending_p(p) ((p)->irqpend_himap != 0)
5183 + * Keep the following as a macro, so that client code could check for
5184 + * the support of the invariant pipeline head optimization.
5185 + */
5186 +#define __ipipe_pipeline_head() \
5187 + list_entry(__ipipe_pipeline.next, struct ipipe_domain, p_link)
5189 +#define local_irq_enable_hw_cond() local_irq_enable_hw()
5190 +#define local_irq_disable_hw_cond() local_irq_disable_hw()
5191 +#define local_irq_save_hw_cond(flags) local_irq_save_hw(flags)
5192 +#define local_irq_restore_hw_cond(flags) local_irq_restore_hw(flags)
5194 +#ifdef CONFIG_SMP
5195 +cpumask_t __ipipe_set_irq_affinity(unsigned irq, cpumask_t cpumask);
5196 +int __ipipe_send_ipi(unsigned ipi, cpumask_t cpumask);
5197 +#define local_irq_save_hw_smp(flags) local_irq_save_hw(flags)
5198 +#define local_irq_restore_hw_smp(flags) local_irq_restore_hw(flags)
5199 +#else /* !CONFIG_SMP */
5200 +#define local_irq_save_hw_smp(flags) do { (void)(flags); } while(0)
5201 +#define local_irq_restore_hw_smp(flags) do { } while(0)
5202 +#endif /* CONFIG_SMP */
5204 +#define local_irq_save_full(vflags, rflags) \
5205 + do { \
5206 + local_irq_save(vflags); \
5207 + local_irq_save_hw(rflags); \
5208 + } while(0)
5210 +#define local_irq_restore_full(vflags, rflags) \
5211 + do { \
5212 + local_irq_restore_hw(rflags); \
5213 + local_irq_restore(vflags); \
5214 + } while(0)
5216 +static inline void __local_irq_restore_nosync(unsigned long x)
5218 + struct ipipe_percpu_domain_data *p = ipipe_root_cpudom_ptr();
5220 + if (raw_irqs_disabled_flags(x)) {
5221 + set_bit(IPIPE_STALL_FLAG, &p->status);
5222 + trace_hardirqs_off();
5223 + } else {
5224 + trace_hardirqs_on();
5225 + clear_bit(IPIPE_STALL_FLAG, &p->status);
5229 +static inline void local_irq_restore_nosync(unsigned long x)
5231 + unsigned long flags;
5232 + local_irq_save_hw_smp(flags);
5233 + __local_irq_restore_nosync(x);
5234 + local_irq_restore_hw_smp(flags);
5237 +#define __ipipe_root_domain_p (__ipipe_current_domain == ipipe_root_domain)
5238 +#define ipipe_root_domain_p (ipipe_current_domain == ipipe_root_domain)
5240 +/* This has to be called with hw IRQs off. */
5241 +#define __ipipe_head_domain_p __ipipe_pipeline_head_p(__ipipe_current_domain)
5243 +static inline int __ipipe_event_monitored_p(int ev)
5245 + if (__ipipe_event_monitors[ev] > 0)
5246 + return 1;
5248 + return (ipipe_current_domain->evself & (1LL << ev)) != 0;
5252 + * <!>: Backward compat is kept for now, with client domains
5253 + * storing the notifier enabled bit in the main flags of a
5254 + * task struct. This is clearly deprecated: at some point,
5255 + * this kludge will vanish. Fix the relevant code using
5256 + * ipipe_enable/disable_notifier() instead. You have been
5257 + * warned.
5258 + */
5259 +#define ipipe_notifier_enabled_p(p) \
5260 + (((p)->flags|(p)->ipipe_flags) & PF_EVNOTIFY)
5262 +#define ipipe_sigwake_notify(p) \
5263 + do { \
5264 + if (ipipe_notifier_enabled_p(p) && \
5265 + __ipipe_event_monitored_p(IPIPE_EVENT_SIGWAKE)) \
5266 + __ipipe_dispatch_event(IPIPE_EVENT_SIGWAKE, p); \
5267 + } while (0)
5269 +#define ipipe_exit_notify(p) \
5270 + do { \
5271 + if (ipipe_notifier_enabled_p(p) && \
5272 + __ipipe_event_monitored_p(IPIPE_EVENT_EXIT)) \
5273 + __ipipe_dispatch_event(IPIPE_EVENT_EXIT, p); \
5274 + } while (0)
5276 +#define ipipe_setsched_notify(p) \
5277 + do { \
5278 + if (ipipe_notifier_enabled_p(p) && \
5279 + __ipipe_event_monitored_p(IPIPE_EVENT_SETSCHED)) \
5280 + __ipipe_dispatch_event(IPIPE_EVENT_SETSCHED, p); \
5281 + } while (0)
5283 +#define ipipe_schedule_notify(prev, next) \
5284 +do { \
5285 + if ((ipipe_notifier_enabled_p(next) || \
5286 + ipipe_notifier_enabled_p(prev)) && \
5287 + __ipipe_event_monitored_p(IPIPE_EVENT_SCHEDULE)) \
5288 + __ipipe_dispatch_event(IPIPE_EVENT_SCHEDULE, next); \
5289 +} while (0)
5291 +#define ipipe_trap_notify(ex, regs) \
5292 + ({ \
5293 + unsigned long __flags__; \
5294 + int __ret__ = 0; \
5295 + local_irq_save_hw_smp(__flags__); \
5296 + if ((test_bit(IPIPE_NOSTACK_FLAG, &ipipe_this_cpudom_var(status)) || \
5297 + ipipe_notifier_enabled_p(current)) && \
5298 + __ipipe_event_monitored_p(ex)) { \
5299 + local_irq_restore_hw_smp(__flags__); \
5300 + __ret__ = __ipipe_dispatch_event(ex, regs); \
5301 + } else \
5302 + local_irq_restore_hw_smp(__flags__); \
5303 + __ret__; \
5304 + })
5306 +#define ipipe_init_notify(p) \
5307 + do { \
5308 + if (__ipipe_event_monitored_p(IPIPE_EVENT_INIT)) \
5309 + __ipipe_dispatch_event(IPIPE_EVENT_INIT, p); \
5310 + } while (0)
5312 +#define ipipe_cleanup_notify(mm) \
5313 + do { \
5314 + if (__ipipe_event_monitored_p(IPIPE_EVENT_CLEANUP)) \
5315 + __ipipe_dispatch_event(IPIPE_EVENT_CLEANUP, mm); \
5316 + } while (0)
5318 +/* Public interface */
5320 +int ipipe_register_domain(struct ipipe_domain *ipd,
5321 + struct ipipe_domain_attr *attr);
5323 +int ipipe_unregister_domain(struct ipipe_domain *ipd);
5325 +void ipipe_suspend_domain(void);
5327 +int ipipe_virtualize_irq(struct ipipe_domain *ipd,
5328 + unsigned irq,
5329 + ipipe_irq_handler_t handler,
5330 + void *cookie,
5331 + ipipe_irq_ackfn_t acknowledge,
5332 + unsigned modemask);
5334 +int ipipe_control_irq(struct ipipe_domain *ipd,
5335 + unsigned int irq,
5336 + unsigned clrmask,
5337 + unsigned setmask);
5339 +unsigned ipipe_alloc_virq(void);
5341 +int ipipe_free_virq(unsigned virq);
5343 +int ipipe_trigger_irq(unsigned irq);
5345 +static inline void __ipipe_propagate_irq(unsigned irq)
5347 + struct list_head *next = __ipipe_current_domain->p_link.next;
5348 + if (next == &ipipe_root.p_link) {
5349 + /* Fast path: root must handle all interrupts. */
5350 + __ipipe_set_irq_pending(&ipipe_root, irq);
5351 + return;
5353 + __ipipe_pend_irq(irq, next);
5356 +static inline void __ipipe_schedule_irq(unsigned irq)
5358 + __ipipe_pend_irq(irq, &__ipipe_current_domain->p_link);
5361 +static inline void __ipipe_schedule_irq_head(unsigned irq)
5363 + __ipipe_set_irq_pending(__ipipe_pipeline_head(), irq);
5366 +static inline void __ipipe_schedule_irq_root(unsigned irq)
5368 + __ipipe_set_irq_pending(&ipipe_root, irq);
5371 +static inline void ipipe_propagate_irq(unsigned irq)
5373 + unsigned long flags;
5375 + local_irq_save_hw(flags);
5376 + __ipipe_propagate_irq(irq);
5377 + local_irq_restore_hw(flags);
5380 +static inline void ipipe_schedule_irq(unsigned irq)
5382 + unsigned long flags;
5384 + local_irq_save_hw(flags);
5385 + __ipipe_schedule_irq(irq);
5386 + local_irq_restore_hw(flags);
5389 +static inline void ipipe_schedule_irq_head(unsigned irq)
5391 + unsigned long flags;
5393 + local_irq_save_hw(flags);
5394 + __ipipe_schedule_irq_head(irq);
5395 + local_irq_restore_hw(flags);
5398 +static inline void ipipe_schedule_irq_root(unsigned irq)
5400 + unsigned long flags;
5402 + local_irq_save_hw(flags);
5403 + __ipipe_schedule_irq_root(irq);
5404 + local_irq_restore_hw(flags);
5407 +void ipipe_stall_pipeline_from(struct ipipe_domain *ipd);
5409 +unsigned long ipipe_test_and_stall_pipeline_from(struct ipipe_domain *ipd);
5411 +unsigned long ipipe_test_and_unstall_pipeline_from(struct ipipe_domain *ipd);
5413 +static inline void ipipe_unstall_pipeline_from(struct ipipe_domain *ipd)
5415 + ipipe_test_and_unstall_pipeline_from(ipd);
5418 +void ipipe_restore_pipeline_from(struct ipipe_domain *ipd,
5419 + unsigned long x);
5421 +static inline unsigned long ipipe_test_pipeline_from(struct ipipe_domain *ipd)
5423 + return test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status));
5426 +static inline void ipipe_stall_pipeline_head(void)
5428 + local_irq_disable_hw();
5429 + __set_bit(IPIPE_STALL_FLAG, &ipipe_head_cpudom_var(status));
5432 +static inline unsigned long ipipe_test_and_stall_pipeline_head(void)
5434 + local_irq_disable_hw();
5435 + return __test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_head_cpudom_var(status));
5438 +void ipipe_unstall_pipeline_head(void);
5440 +void __ipipe_restore_pipeline_head(unsigned long x);
5442 +static inline void ipipe_restore_pipeline_head(unsigned long x)
5444 +#ifdef CONFIG_IPIPE_DEBUG
5445 + if (WARN_ON_ONCE(!irqs_disabled_hw()))
5446 + local_irq_disable_hw();
5447 +#endif
5448 + if ((x ^ test_bit(IPIPE_STALL_FLAG, &ipipe_head_cpudom_var(status))) & 1)
5449 + __ipipe_restore_pipeline_head(x);
5452 +#define ipipe_unstall_pipeline() \
5453 + ipipe_unstall_pipeline_from(ipipe_current_domain)
5455 +#define ipipe_test_and_unstall_pipeline() \
5456 + ipipe_test_and_unstall_pipeline_from(ipipe_current_domain)
5458 +#define ipipe_test_pipeline() \
5459 + ipipe_test_pipeline_from(ipipe_current_domain)
5461 +#define ipipe_test_and_stall_pipeline() \
5462 + ipipe_test_and_stall_pipeline_from(ipipe_current_domain)
5464 +#define ipipe_stall_pipeline() \
5465 + ipipe_stall_pipeline_from(ipipe_current_domain)
5467 +#define ipipe_restore_pipeline(x) \
5468 + ipipe_restore_pipeline_from(ipipe_current_domain, (x))
5470 +void ipipe_init_attr(struct ipipe_domain_attr *attr);
5472 +int ipipe_get_sysinfo(struct ipipe_sysinfo *sysinfo);
5474 +void __ipipe_do_critical_sync(unsigned irq, void *cookie);
5476 +unsigned long ipipe_critical_enter(void (*syncfn) (void));
5478 +void ipipe_critical_exit(unsigned long flags);
5480 +void ipipe_prepare_panic(void);
5482 +static inline void ipipe_set_printk_sync(struct ipipe_domain *ipd)
5484 + set_bit(IPIPE_SPRINTK_FLAG, &ipd->flags);
5487 +static inline void ipipe_set_printk_async(struct ipipe_domain *ipd)
5489 + clear_bit(IPIPE_SPRINTK_FLAG, &ipd->flags);
5492 +static inline void ipipe_set_foreign_stack(struct ipipe_domain *ipd)
5494 + /* Must be called hw interrupts off. */
5495 + __set_bit(IPIPE_NOSTACK_FLAG, &ipipe_cpudom_var(ipd, status));
5498 +static inline void ipipe_clear_foreign_stack(struct ipipe_domain *ipd)
5500 + /* Must be called hw interrupts off. */
5501 + __clear_bit(IPIPE_NOSTACK_FLAG, &ipipe_cpudom_var(ipd, status));
5504 +static inline int ipipe_test_foreign_stack(void)
5506 + /* Must be called hw interrupts off. */
5507 + return test_bit(IPIPE_NOSTACK_FLAG, &ipipe_this_cpudom_var(status));
5510 +#ifndef ipipe_safe_current
5511 +#define ipipe_safe_current() \
5512 +({ \
5513 + struct task_struct *p; \
5514 + unsigned long flags; \
5515 + local_irq_save_hw_smp(flags); \
5516 + p = ipipe_test_foreign_stack() ? &init_task : current; \
5517 + local_irq_restore_hw_smp(flags); \
5518 + p; \
5520 +#endif
5522 +ipipe_event_handler_t ipipe_catch_event(struct ipipe_domain *ipd,
5523 + unsigned event,
5524 + ipipe_event_handler_t handler);
5526 +cpumask_t ipipe_set_irq_affinity(unsigned irq,
5527 + cpumask_t cpumask);
5529 +int ipipe_send_ipi(unsigned ipi,
5530 + cpumask_t cpumask);
5532 +int ipipe_setscheduler_root(struct task_struct *p,
5533 + int policy,
5534 + int prio);
5536 +int ipipe_reenter_root(struct task_struct *prev,
5537 + int policy,
5538 + int prio);
5540 +int ipipe_alloc_ptdkey(void);
5542 +int ipipe_free_ptdkey(int key);
5544 +int ipipe_set_ptd(int key,
5545 + void *value);
5547 +void *ipipe_get_ptd(int key);
5549 +int ipipe_disable_ondemand_mappings(struct task_struct *tsk);
5551 +static inline void ipipe_nmi_enter(void)
5553 + int cpu = ipipe_processor_id();
5555 + per_cpu(ipipe_nmi_saved_root, cpu) = ipipe_root_cpudom_var(status);
5556 + __set_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status));
5558 +#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
5559 + per_cpu(ipipe_saved_context_check_state, cpu) =
5560 + ipipe_disable_context_check(cpu);
5561 +#endif /* CONFIG_IPIPE_DEBUG_CONTEXT */
5564 +static inline void ipipe_nmi_exit(void)
5566 + int cpu = ipipe_processor_id();
5568 +#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
5569 + ipipe_restore_context_check
5570 + (cpu, per_cpu(ipipe_saved_context_check_state, cpu));
5571 +#endif /* CONFIG_IPIPE_DEBUG_CONTEXT */
5573 + if (!test_bit(IPIPE_STALL_FLAG, &per_cpu(ipipe_nmi_saved_root, cpu)))
5574 + __clear_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status));
5577 +#define ipipe_enable_notifier(p) \
5578 + do { \
5579 + (p)->ipipe_flags |= PF_EVNOTIFY; \
5580 + } while (0)
5582 +#define ipipe_disable_notifier(p) \
5583 + do { \
5584 + (p)->ipipe_flags &= ~(PF_EVNOTIFY|PF_EVTRET); \
5585 + } while (0)
5587 +/* hw IRQs off. */
5588 +#define ipipe_return_notify(p) \
5589 + do { \
5590 + if (ipipe_notifier_enabled_p(p) && \
5591 + __ipipe_event_monitored_p(IPIPE_EVENT_RETURN)) \
5592 + (p)->ipipe_flags |= PF_EVTRET; \
5593 + } while (0)
5595 +static inline void
5596 +ipipe_register_root_preempt_handler(ipipe_root_preempt_handler_t handler,
5597 + void *cookie)
5599 + int cpu = ipipe_processor_id();
5601 + per_cpu(__ipipe_root_preempt_cookie, cpu) = cookie;
5602 + barrier();
5603 + per_cpu(__ipipe_root_preempt_handler, cpu) = handler;
5606 +static inline void ipipe_unregister_root_preempt_handler(void)
5608 + per_cpu(__ipipe_root_preempt_handler, ipipe_processor_id()) = NULL;
5611 +static inline void ipipe_root_preempt_notify(void)
5613 + ipipe_root_preempt_handler_t handler;
5614 + int cpu = ipipe_processor_id();
5616 + handler = per_cpu(__ipipe_root_preempt_handler, cpu);
5617 + if (unlikely(handler))
5618 + handler(per_cpu(__ipipe_root_preempt_cookie, cpu));
5621 +#else /* !CONFIG_IPIPE */
5623 +#define ipipe_init_early() do { } while(0)
5624 +#define ipipe_init() do { } while(0)
5625 +#define ipipe_suspend_domain() do { } while(0)
5626 +#define ipipe_sigwake_notify(p) do { } while(0)
5627 +#define ipipe_setsched_notify(p) do { } while(0)
5628 +#define ipipe_init_notify(p) do { } while(0)
5629 +#define ipipe_exit_notify(p) do { } while(0)
5630 +#define ipipe_cleanup_notify(mm) do { } while(0)
5631 +#define ipipe_trap_notify(t,r) 0
5632 +#define ipipe_init_proc() do { } while(0)
5634 +#define ipipe_register_root_preempt_handler(h, c) do { } while (0)
5635 +#define ipipe_unregister_root_preempt_handler() do { } while (0)
5636 +#define ipipe_root_preempt_notify() do { } while (0)
5638 +static inline void __ipipe_pin_range_globally(unsigned long start,
5639 + unsigned long end)
5643 +static inline int ipipe_test_foreign_stack(void)
5645 + return 0;
5648 +#define local_irq_enable_hw_cond() do { } while(0)
5649 +#define local_irq_disable_hw_cond() do { } while(0)
5650 +#define local_irq_save_hw_cond(flags) do { (void)(flags); } while(0)
5651 +#define local_irq_restore_hw_cond(flags) do { } while(0)
5652 +#define local_irq_save_hw_smp(flags) do { (void)(flags); } while(0)
5653 +#define local_irq_restore_hw_smp(flags) do { } while(0)
5655 +#define ipipe_irq_lock(irq) do { } while(0)
5656 +#define ipipe_irq_unlock(irq) do { } while(0)
5658 +#define __ipipe_root_domain_p 1
5659 +#define ipipe_root_domain_p 1
5660 +#define ipipe_safe_current() current
5661 +#define ipipe_processor_id() smp_processor_id()
5663 +#define ipipe_nmi_enter() do { } while (0)
5664 +#define ipipe_nmi_exit() do { } while (0)
5666 +#define local_irq_disable_head() local_irq_disable()
5668 +#define local_irq_save_full(vflags, rflags) do { (void)(vflags); local_irq_save(rflags); } while(0)
5669 +#define local_irq_restore_full(vflags, rflags) do { (void)(vflags); local_irq_restore(rflags); } while(0)
5670 +#define local_irq_restore_nosync(vflags) local_irq_restore(vflags)
5672 +#define __ipipe_pipeline_head_p(ipd) 1
5674 +#endif /* CONFIG_IPIPE */
5676 +#endif /* !__LINUX_IPIPE_H */
5677 diff --git a/include/linux/ipipe_base.h b/include/linux/ipipe_base.h
5678 new file mode 100644
5679 index 0000000..3f43ba5
5680 --- /dev/null
5681 +++ b/include/linux/ipipe_base.h
5682 @@ -0,0 +1,134 @@
5683 +/* -*- linux-c -*-
5684 + * include/linux/ipipe_base.h
5686 + * Copyright (C) 2002-2007 Philippe Gerum.
5687 + * 2007 Jan Kiszka.
5689 + * This program is free software; you can redistribute it and/or modify
5690 + * it under the terms of the GNU General Public License as published by
5691 + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
5692 + * USA; either version 2 of the License, or (at your option) any later
5693 + * version.
5695 + * This program is distributed in the hope that it will be useful,
5696 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
5697 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5698 + * GNU General Public License for more details.
5700 + * You should have received a copy of the GNU General Public License
5701 + * along with this program; if not, write to the Free Software
5702 + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
5703 + */
5705 +#ifndef __LINUX_IPIPE_BASE_H
5706 +#define __LINUX_IPIPE_BASE_H
5708 +#ifdef CONFIG_IPIPE
5710 +#include <asm/ipipe_base.h>
5712 +#define __bpl_up(x) (((x)+(BITS_PER_LONG-1)) & ~(BITS_PER_LONG-1))
5713 +/* Number of virtual IRQs (must be a multiple of BITS_PER_LONG) */
5714 +#define IPIPE_NR_VIRQS BITS_PER_LONG
5715 +/* First virtual IRQ # (must be aligned on BITS_PER_LONG) */
5716 +#define IPIPE_VIRQ_BASE __bpl_up(IPIPE_NR_XIRQS)
5717 +/* Total number of IRQ slots */
5718 +#define IPIPE_NR_IRQS (IPIPE_VIRQ_BASE+IPIPE_NR_VIRQS)
5720 +#define IPIPE_IRQ_LOMAPSZ (IPIPE_NR_IRQS / BITS_PER_LONG)
5721 +#if IPIPE_IRQ_LOMAPSZ > BITS_PER_LONG
5723 + * We need a 3-level mapping. This allows us to handle up to 32k IRQ
5724 + * vectors on 32bit machines, 256k on 64bit ones.
5725 + */
5726 +#define __IPIPE_3LEVEL_IRQMAP 1
5727 +#define IPIPE_IRQ_MDMAPSZ (__bpl_up(IPIPE_IRQ_LOMAPSZ) / BITS_PER_LONG)
5728 +#else
5730 + * 2-level mapping is enough. This allows us to handle up to 1024 IRQ
5731 + * vectors on 32bit machines, 4096 on 64bit ones.
5732 + */
5733 +#define __IPIPE_2LEVEL_IRQMAP 1
5734 +#endif
5736 +/* Per-cpu pipeline status */
5737 +#define IPIPE_STALL_FLAG 0 /* Stalls a pipeline stage -- guaranteed at bit #0 */
5738 +#define IPIPE_NOSTACK_FLAG 1 /* Domain currently runs on a foreign stack */
5740 +#define IPIPE_STALL_MASK (1L << IPIPE_STALL_FLAG)
5741 +#define IPIPE_NOSTACK_MASK (1L << IPIPE_NOSTACK_FLAG)
5743 +typedef void (*ipipe_irq_handler_t)(unsigned int irq,
5744 + void *cookie);
5746 +extern struct ipipe_domain ipipe_root;
5748 +#define ipipe_root_domain (&ipipe_root)
5750 +void __ipipe_unstall_root(void);
5752 +void __ipipe_restore_root(unsigned long x);
5754 +#define ipipe_preempt_disable(flags) \
5755 + do { \
5756 + local_irq_save_hw(flags); \
5757 + if (__ipipe_root_domain_p) \
5758 + preempt_disable(); \
5759 + } while (0)
5761 +#define ipipe_preempt_enable(flags) \
5762 + do { \
5763 + if (__ipipe_root_domain_p) { \
5764 + preempt_enable_no_resched(); \
5765 + local_irq_restore_hw(flags); \
5766 + preempt_check_resched(); \
5767 + } else \
5768 + local_irq_restore_hw(flags); \
5769 + } while (0)
5771 +#define ipipe_get_cpu(flags) ({ ipipe_preempt_disable(flags); ipipe_processor_id(); })
5772 +#define ipipe_put_cpu(flags) ipipe_preempt_enable(flags)
5774 +#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
5775 +void ipipe_check_context(struct ipipe_domain *border_ipd);
5776 +#else /* !CONFIG_IPIPE_DEBUG_CONTEXT */
5777 +static inline void ipipe_check_context(struct ipipe_domain *border_ipd) { }
5778 +#endif /* !CONFIG_IPIPE_DEBUG_CONTEXT */
5780 +/* Generic features */
5782 +#ifdef CONFIG_GENERIC_CLOCKEVENTS
5783 +#define __IPIPE_FEATURE_REQUEST_TICKDEV 1
5784 +#endif
5785 +#define __IPIPE_FEATURE_DELAYED_ATOMICSW 1
5786 +#define __IPIPE_FEATURE_FASTPEND_IRQ 1
5787 +#define __IPIPE_FEATURE_TRACE_EVENT 1
5788 +#define __IPIPE_FEATURE_ENABLE_NOTIFIER 1
5789 +#ifdef CONFIG_HAVE_IPIPE_HOSTRT
5790 +#define __IPIPE_FEATURE_HOSTRT 1
5791 +#endif
5792 +#define __IPIPE_FEATURE_PREPARE_PANIC 1
5793 +#define __IPIPE_FEATURE_ROOT_PREEMPT_NOTIFIER 1
5794 +#define __IPIPE_FEATURE_CONTROL_IRQ 1
5796 +#else /* !CONFIG_IPIPE */
5798 +#define ipipe_preempt_disable(flags) \
5799 + do { \
5800 + preempt_disable(); \
5801 + (void)(flags); \
5802 + } while (0)
5803 +#define ipipe_preempt_enable(flags) preempt_enable()
5805 +#define ipipe_get_cpu(flags) ({ (void)(flags); get_cpu(); })
5806 +#define ipipe_put_cpu(flags) \
5807 + do { \
5808 + (void)(flags); \
5809 + put_cpu(); \
5810 + } while (0)
5812 +#define ipipe_check_context(ipd) do { } while(0)
5814 +#endif /* CONFIG_IPIPE */
5816 +#endif /* !__LINUX_IPIPE_BASE_H */
5817 diff --git a/include/linux/ipipe_lock.h b/include/linux/ipipe_lock.h
5818 new file mode 100644
5819 index 0000000..cf33925
5820 --- /dev/null
5821 +++ b/include/linux/ipipe_lock.h
5822 @@ -0,0 +1,240 @@
5823 +/* -*- linux-c -*-
5824 + * include/linux/ipipe_lock.h
5826 + * Copyright (C) 2009 Philippe Gerum.
5828 + * This program is free software; you can redistribute it and/or modify
5829 + * it under the terms of the GNU General Public License as published by
5830 + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
5831 + * USA; either version 2 of the License, or (at your option) any later
5832 + * version.
5834 + * This program is distributed in the hope that it will be useful,
5835 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
5836 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5837 + * GNU General Public License for more details.
5839 + * You should have received a copy of the GNU General Public License
5840 + * along with this program; if not, write to the Free Software
5841 + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
5842 + */
5844 +#ifndef __LINUX_IPIPE_LOCK_H
5845 +#define __LINUX_IPIPE_LOCK_H
5847 +typedef struct {
5848 + arch_spinlock_t arch_lock;
5849 +} __ipipe_spinlock_t;
5851 +#define ipipe_spinlock_p(lock) \
5852 + __builtin_types_compatible_p(typeof(lock), __ipipe_spinlock_t *)
5854 +#define std_spinlock_raw_p(lock) \
5855 + __builtin_types_compatible_p(typeof(lock), raw_spinlock_t *)
5857 +#define std_spinlock_p(lock) \
5858 + __builtin_types_compatible_p(typeof(lock), spinlock_t *)
5860 +#define ipipe_spinlock(lock) ((__ipipe_spinlock_t *)(lock))
5861 +#define std_spinlock_raw(lock) ((raw_spinlock_t *)(lock))
5862 +#define std_spinlock(lock) ((spinlock_t *)(lock))
5864 +#define PICK_SPINLOCK_IRQSAVE(lock, flags) \
5865 + do { \
5866 + if (ipipe_spinlock_p(lock)) \
5867 + (flags) = __ipipe_spin_lock_irqsave(ipipe_spinlock(lock)); \
5868 + else if (std_spinlock_raw_p(lock)) \
5869 + __real_raw_spin_lock_irqsave(std_spinlock_raw(lock), flags); \
5870 + else if (std_spinlock_p(lock)) \
5871 + __real_raw_spin_lock_irqsave(&std_spinlock(lock)->rlock, flags); \
5872 + else __bad_lock_type(); \
5873 + } while (0)
5875 +#define PICK_SPINTRYLOCK_IRQSAVE(lock, flags) \
5876 + ({ \
5877 + int __ret__; \
5878 + if (ipipe_spinlock_p(lock)) \
5879 + __ret__ = __ipipe_spin_trylock_irqsave(ipipe_spinlock(lock), &(flags)); \
5880 + else if (std_spinlock_raw_p(lock)) \
5881 + __ret__ = __real_raw_spin_trylock_irqsave(std_spinlock_raw(lock), flags); \
5882 + else if (std_spinlock_p(lock)) \
5883 + __ret__ = __real_raw_spin_trylock_irqsave(&std_spinlock(lock)->rlock, flags); \
5884 + else __bad_lock_type(); \
5885 + __ret__; \
5886 + })
5888 +#define PICK_SPINTRYLOCK_IRQ(lock) \
5889 + ({ \
5890 + int __ret__; \
5891 + if (ipipe_spinlock_p(lock)) \
5892 + __ret__ = __ipipe_spin_trylock_irq(ipipe_spinlock(lock)); \
5893 + else if (std_spinlock_raw_p(lock)) \
5894 + __ret__ = __real_raw_spin_trylock_irq(std_spinlock_raw(lock)); \
5895 + else if (std_spinlock_p(lock)) \
5896 + __ret__ = __real_raw_spin_trylock_irq(&std_spinlock(lock)->rlock); \
5897 + else __bad_lock_type(); \
5898 + __ret__; \
5899 + })
5901 +#define PICK_SPINUNLOCK_IRQRESTORE(lock, flags) \
5902 + do { \
5903 + if (ipipe_spinlock_p(lock)) \
5904 + __ipipe_spin_unlock_irqrestore(ipipe_spinlock(lock), flags); \
5905 + else { \
5906 + __ipipe_spin_unlock_debug(flags); \
5907 + if (std_spinlock_raw_p(lock)) \
5908 + __real_raw_spin_unlock_irqrestore(std_spinlock_raw(lock), flags); \
5909 + else if (std_spinlock_p(lock)) \
5910 + __real_raw_spin_unlock_irqrestore(&std_spinlock(lock)->rlock, flags); \
5911 + } \
5912 + } while (0)
5914 +#define PICK_SPINOP(op, lock) \
5915 + do { \
5916 + if (ipipe_spinlock_p(lock)) \
5917 + arch_spin##op(&ipipe_spinlock(lock)->arch_lock); \
5918 + else if (std_spinlock_raw_p(lock)) \
5919 + __real_raw_spin##op(std_spinlock_raw(lock)); \
5920 + else if (std_spinlock_p(lock)) \
5921 + __real_raw_spin##op(&std_spinlock(lock)->rlock); \
5922 + else __bad_lock_type(); \
5923 + } while (0)
5925 +#define PICK_SPINOP_RET(op, lock, type) \
5926 + ({ \
5927 + type __ret__; \
5928 + if (ipipe_spinlock_p(lock)) \
5929 + __ret__ = arch_spin##op(&ipipe_spinlock(lock)->arch_lock); \
5930 + else if (std_spinlock_raw_p(lock)) \
5931 + __ret__ = __real_raw_spin##op(std_spinlock_raw(lock)); \
5932 + else if (std_spinlock_p(lock)) \
5933 + __ret__ = __real_raw_spin##op(&std_spinlock(lock)->rlock); \
5934 + else { __ret__ = -1; __bad_lock_type(); } \
5935 + __ret__; \
5936 + })
5938 +#define arch_spin_lock_init(lock) \
5939 + do { \
5940 + IPIPE_DEFINE_SPINLOCK(__lock__); \
5941 + *((ipipe_spinlock_t *)lock) = __lock__; \
5942 + } while (0)
5944 +#define arch_spin_lock_irq(lock) \
5945 + do { \
5946 + local_irq_disable_hw(); \
5947 + arch_spin_lock(lock); \
5948 + } while (0)
5950 +#define arch_spin_unlock_irq(lock) \
5951 + do { \
5952 + arch_spin_unlock(lock); \
5953 + local_irq_enable_hw(); \
5954 + } while (0)
5956 +typedef struct {
5957 + arch_rwlock_t arch_lock;
5958 +} __ipipe_rwlock_t;
5960 +#define ipipe_rwlock_p(lock) \
5961 + __builtin_types_compatible_p(typeof(lock), __ipipe_rwlock_t *)
5963 +#define std_rwlock_p(lock) \
5964 + __builtin_types_compatible_p(typeof(lock), rwlock_t *)
5966 +#define ipipe_rwlock(lock) ((__ipipe_rwlock_t *)(lock))
5967 +#define std_rwlock(lock) ((rwlock_t *)(lock))
5969 +#define PICK_RWOP(op, lock) \
5970 + do { \
5971 + if (ipipe_rwlock_p(lock)) \
5972 + arch##op(&ipipe_rwlock(lock)->arch_lock); \
5973 + else if (std_rwlock_p(lock)) \
5974 + _raw##op(std_rwlock(lock)); \
5975 + else __bad_lock_type(); \
5976 + } while (0)
5978 +extern int __bad_lock_type(void);
5980 +#ifdef CONFIG_IPIPE
5982 +#define ipipe_spinlock_t __ipipe_spinlock_t
5983 +#define IPIPE_DEFINE_RAW_SPINLOCK(x) ipipe_spinlock_t x = IPIPE_SPIN_LOCK_UNLOCKED
5984 +#define IPIPE_DECLARE_RAW_SPINLOCK(x) extern ipipe_spinlock_t x
5985 +#define IPIPE_DEFINE_SPINLOCK(x) IPIPE_DEFINE_RAW_SPINLOCK(x)
5986 +#define IPIPE_DECLARE_SPINLOCK(x) IPIPE_DECLARE_RAW_SPINLOCK(x)
5988 +#define IPIPE_SPIN_LOCK_UNLOCKED \
5989 + (__ipipe_spinlock_t) { .arch_lock = __ARCH_SPIN_LOCK_UNLOCKED }
5991 +#define spin_lock_irqsave_cond(lock, flags) \
5992 + spin_lock_irqsave(lock, flags)
5994 +#define spin_unlock_irqrestore_cond(lock, flags) \
5995 + spin_unlock_irqrestore(lock, flags)
5997 +void __ipipe_spin_lock_irq(ipipe_spinlock_t *lock);
5999 +int __ipipe_spin_trylock_irq(ipipe_spinlock_t *lock);
6001 +void __ipipe_spin_unlock_irq(ipipe_spinlock_t *lock);
6003 +unsigned long __ipipe_spin_lock_irqsave(ipipe_spinlock_t *lock);
6005 +int __ipipe_spin_trylock_irqsave(ipipe_spinlock_t *lock,
6006 + unsigned long *x);
6008 +void __ipipe_spin_unlock_irqrestore(ipipe_spinlock_t *lock,
6009 + unsigned long x);
6011 +void __ipipe_spin_unlock_irqbegin(ipipe_spinlock_t *lock);
6013 +void __ipipe_spin_unlock_irqcomplete(unsigned long x);
6015 +#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP)
6016 +void __ipipe_spin_unlock_debug(unsigned long flags);
6017 +#else
6018 +#define __ipipe_spin_unlock_debug(flags) do { } while (0)
6019 +#endif
6021 +#define ipipe_rwlock_t __ipipe_rwlock_t
6022 +#define IPIPE_DEFINE_RWLOCK(x) ipipe_rwlock_t x = IPIPE_RW_LOCK_UNLOCKED
6023 +#define IPIPE_DECLARE_RWLOCK(x) extern ipipe_rwlock_t x
6025 +#define IPIPE_RW_LOCK_UNLOCKED \
6026 + (__ipipe_rwlock_t) { .arch_lock = __ARCH_RW_LOCK_UNLOCKED }
6028 +#else /* !CONFIG_IPIPE */
6030 +#define ipipe_spinlock_t spinlock_t
6031 +#define IPIPE_DEFINE_SPINLOCK(x) DEFINE_SPINLOCK(x)
6032 +#define IPIPE_DECLARE_SPINLOCK(x) extern spinlock_t x
6033 +#define IPIPE_SPIN_LOCK_UNLOCKED SPIN_LOCK_UNLOCKED
6034 +#define IPIPE_DEFINE_RAW_SPINLOCK(x) DEFINE_RAW_SPINLOCK(x)
6035 +#define IPIPE_DECLARE_RAW_SPINLOCK(x) extern raw_spinlock_t x
6037 +#define spin_lock_irqsave_cond(lock, flags) \
6038 + do { \
6039 + (void)(flags); \
6040 + spin_lock(lock); \
6041 + } while(0)
6043 +#define spin_unlock_irqrestore_cond(lock, flags) \
6044 + spin_unlock(lock)
6046 +#define __ipipe_spin_lock_irq(lock) do { } while (0)
6047 +#define __ipipe_spin_unlock_irq(lock) do { } while (0)
6048 +#define __ipipe_spin_lock_irqsave(lock) 0
6049 +#define __ipipe_spin_trylock_irqsave(lock, x) ({ (void)(x); 1; })
6050 +#define __ipipe_spin_unlock_irqrestore(lock, x) do { (void)(x); } while (0)
6051 +#define __ipipe_spin_unlock_irqbegin(lock) do { } while (0)
6052 +#define __ipipe_spin_unlock_irqcomplete(x) do { (void)(x); } while (0)
6053 +#define __ipipe_spin_unlock_debug(flags) do { } while (0)
6055 +#define ipipe_rwlock_t rwlock_t
6056 +#define IPIPE_DEFINE_RWLOCK(x) DEFINE_RWLOCK(x)
6057 +#define IPIPE_DECLARE_RWLOCK(x) extern rwlock_t x
6058 +#define IPIPE_RW_LOCK_UNLOCKED RW_LOCK_UNLOCKED
6060 +#endif /* !CONFIG_IPIPE */
6062 +#endif /* !__LINUX_IPIPE_LOCK_H */
6063 diff --git a/include/linux/ipipe_percpu.h b/include/linux/ipipe_percpu.h
6064 new file mode 100644
6065 index 0000000..0b42e8c
6066 --- /dev/null
6067 +++ b/include/linux/ipipe_percpu.h
6068 @@ -0,0 +1,89 @@
6069 +/* -*- linux-c -*-
6070 + * include/linux/ipipe_percpu.h
6072 + * Copyright (C) 2007 Philippe Gerum.
6074 + * This program is free software; you can redistribute it and/or modify
6075 + * it under the terms of the GNU General Public License as published by
6076 + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
6077 + * USA; either version 2 of the License, or (at your option) any later
6078 + * version.
6080 + * This program is distributed in the hope that it will be useful,
6081 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
6082 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6083 + * GNU General Public License for more details.
6085 + * You should have received a copy of the GNU General Public License
6086 + * along with this program; if not, write to the Free Software
6087 + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
6088 + */
6090 +#ifndef __LINUX_IPIPE_PERCPU_H
6091 +#define __LINUX_IPIPE_PERCPU_H
6093 +#include <asm/percpu.h>
6094 +#include <asm/ptrace.h>
6096 +struct ipipe_domain;
6098 +struct ipipe_percpu_domain_data {
6099 + unsigned long status; /* <= Must be first in struct. */
6100 + unsigned long irqpend_himap;
6101 +#ifdef __IPIPE_3LEVEL_IRQMAP
6102 + unsigned long irqpend_mdmap[IPIPE_IRQ_MDMAPSZ];
6103 +#endif
6104 + unsigned long irqpend_lomap[IPIPE_IRQ_LOMAPSZ];
6105 + unsigned long irqheld_map[IPIPE_IRQ_LOMAPSZ];
6106 + unsigned long irqall[IPIPE_NR_IRQS];
6107 + u64 evsync;
6111 + * CAREFUL: all accessors based on __raw_get_cpu_var() you may find in
6112 + * this file should be used only while hw interrupts are off, to
6113 + * prevent from CPU migration regardless of the running domain.
6114 + */
6115 +#ifdef CONFIG_SMP
6116 +#define ipipe_percpudom_ptr(ipd, cpu) \
6117 + (&per_cpu(ipipe_percpu_darray, cpu)[(ipd)->slot])
6118 +#define ipipe_cpudom_ptr(ipd) \
6119 + (&__ipipe_get_cpu_var(ipipe_percpu_darray)[(ipd)->slot])
6120 +#else
6121 +DECLARE_PER_CPU(struct ipipe_percpu_domain_data *, ipipe_percpu_daddr[CONFIG_IPIPE_DOMAINS]);
6122 +#define ipipe_percpudom_ptr(ipd, cpu) \
6123 + (per_cpu(ipipe_percpu_daddr, cpu)[(ipd)->slot])
6124 +#define ipipe_cpudom_ptr(ipd) \
6125 + (__ipipe_get_cpu_var(ipipe_percpu_daddr)[(ipd)->slot])
6126 +#endif
6127 +#define ipipe_percpudom(ipd, var, cpu) (ipipe_percpudom_ptr(ipd, cpu)->var)
6128 +#define ipipe_cpudom_var(ipd, var) (ipipe_cpudom_ptr(ipd)->var)
6130 +#define IPIPE_ROOT_SLOT 0
6131 +#define IPIPE_HEAD_SLOT (CONFIG_IPIPE_DOMAINS - 1)
6133 +DECLARE_PER_CPU(struct ipipe_percpu_domain_data, ipipe_percpu_darray[CONFIG_IPIPE_DOMAINS]);
6135 +DECLARE_PER_CPU(struct ipipe_domain *, ipipe_percpu_domain);
6137 +DECLARE_PER_CPU(unsigned long, ipipe_nmi_saved_root);
6139 +#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
6140 +DECLARE_PER_CPU(int, ipipe_percpu_context_check);
6141 +DECLARE_PER_CPU(int, ipipe_saved_context_check_state);
6142 +#endif
6144 +#define ipipe_root_cpudom_ptr() \
6145 + (&__ipipe_get_cpu_var(ipipe_percpu_darray)[IPIPE_ROOT_SLOT])
6147 +#define ipipe_root_cpudom_var(var) ipipe_root_cpudom_ptr()->var
6149 +#define ipipe_this_cpudom_var(var) \
6150 + ipipe_cpudom_var(__ipipe_current_domain, var)
6152 +#define ipipe_head_cpudom_ptr() \
6153 + (&__ipipe_get_cpu_var(ipipe_percpu_darray)[IPIPE_HEAD_SLOT])
6155 +#define ipipe_head_cpudom_var(var) ipipe_head_cpudom_ptr()->var
6157 +#endif /* !__LINUX_IPIPE_PERCPU_H */
6158 diff --git a/include/linux/ipipe_tickdev.h b/include/linux/ipipe_tickdev.h
6159 new file mode 100644
6160 index 0000000..df17444
6161 --- /dev/null
6162 +++ b/include/linux/ipipe_tickdev.h
6163 @@ -0,0 +1,83 @@
6164 +/* -*- linux-c -*-
6165 + * include/linux/ipipe_tickdev.h
6167 + * Copyright (C) 2007 Philippe Gerum.
6169 + * This program is free software; you can redistribute it and/or modify
6170 + * it under the terms of the GNU General Public License as published by
6171 + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
6172 + * USA; either version 2 of the License, or (at your option) any later
6173 + * version.
6175 + * This program is distributed in the hope that it will be useful,
6176 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
6177 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6178 + * GNU General Public License for more details.
6180 + * You should have received a copy of the GNU General Public License
6181 + * along with this program; if not, write to the Free Software
6182 + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
6183 + */
6185 +#ifndef __LINUX_IPIPE_TICKDEV_H
6186 +#define __LINUX_IPIPE_TICKDEV_H
6188 +#if defined(CONFIG_IPIPE) && defined(CONFIG_GENERIC_CLOCKEVENTS)
6190 +#include <linux/clockchips.h>
6191 +#include <linux/clocksource.h>
6193 +struct tick_device;
6195 +struct ipipe_tick_device {
6197 + void (*emul_set_mode)(enum clock_event_mode,
6198 + struct clock_event_device *cdev);
6199 + int (*emul_set_tick)(unsigned long delta,
6200 + struct clock_event_device *cdev);
6201 + void (*real_set_mode)(enum clock_event_mode mode,
6202 + struct clock_event_device *cdev);
6203 + int (*real_set_tick)(unsigned long delta,
6204 + struct clock_event_device *cdev);
6205 + struct tick_device *slave;
6206 + unsigned long real_max_delta_ns;
6207 + unsigned long real_mult;
6208 + int real_shift;
6212 + * NOTE: When modifying this structure, make sure to keep the Xenomai
6213 + * definition include/nucleus/vdso.h in sync.
6214 + */
6215 +struct ipipe_hostrt_data {
6216 + short live;
6217 + seqcount_t seqcount;
6218 + time_t wall_time_sec;
6219 + u32 wall_time_nsec;
6220 + struct timespec wall_to_monotonic;
6221 + cycle_t cycle_last;
6222 + cycle_t mask;
6223 + u32 mult;
6224 + u32 shift;
6227 +int ipipe_request_tickdev(const char *devname,
6228 + void (*emumode)(enum clock_event_mode mode,
6229 + struct clock_event_device *cdev),
6230 + int (*emutick)(unsigned long evt,
6231 + struct clock_event_device *cdev),
6232 + int cpu, unsigned long *tmfreq);
6234 +void ipipe_release_tickdev(int cpu);
6236 +#endif /* CONFIG_IPIPE && CONFIG_GENERIC_CLOCKEVENTS */
6238 +#ifdef CONFIG_HAVE_IPIPE_HOSTRT
6239 +void ipipe_update_hostrt(struct timespec *wall_time,
6240 + struct clocksource *clock);
6241 +#else /* !CONFIG_IPIPE_HOSTRT */
6242 +static inline void
6243 +ipipe_update_hostrt(struct timespec *wall_time, struct clocksource *clock) {};
6244 +#endif
6246 +#endif /* !__LINUX_IPIPE_TICKDEV_H */
6247 diff --git a/include/linux/ipipe_trace.h b/include/linux/ipipe_trace.h
6248 new file mode 100644
6249 index 0000000..627b354
6250 --- /dev/null
6251 +++ b/include/linux/ipipe_trace.h
6252 @@ -0,0 +1,72 @@
6253 +/* -*- linux-c -*-
6254 + * include/linux/ipipe_trace.h
6256 + * Copyright (C) 2005 Luotao Fu.
6257 + * 2005-2007 Jan Kiszka.
6259 + * This program is free software; you can redistribute it and/or modify
6260 + * it under the terms of the GNU General Public License as published by
6261 + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
6262 + * USA; either version 2 of the License, or (at your option) any later
6263 + * version.
6265 + * This program is distributed in the hope that it will be useful,
6266 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
6267 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6268 + * GNU General Public License for more details.
6270 + * You should have received a copy of the GNU General Public License
6271 + * along with this program; if not, write to the Free Software
6272 + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
6273 + */
6275 +#ifndef _LINUX_IPIPE_TRACE_H
6276 +#define _LINUX_IPIPE_TRACE_H
6278 +#ifdef CONFIG_IPIPE_TRACE
6280 +#include <linux/types.h>
6282 +void ipipe_trace_begin(unsigned long v);
6283 +void ipipe_trace_end(unsigned long v);
6284 +void ipipe_trace_freeze(unsigned long v);
6285 +void ipipe_trace_special(unsigned char special_id, unsigned long v);
6286 +void ipipe_trace_pid(pid_t pid, short prio);
6287 +void ipipe_trace_event(unsigned char id, unsigned long delay_tsc);
6288 +int ipipe_trace_max_reset(void);
6289 +int ipipe_trace_frozen_reset(void);
6291 +#else /* !CONFIG_IPIPE_TRACE */
6293 +#define ipipe_trace_begin(v) do { (void)(v); } while(0)
6294 +#define ipipe_trace_end(v) do { (void)(v); } while(0)
6295 +#define ipipe_trace_freeze(v) do { (void)(v); } while(0)
6296 +#define ipipe_trace_special(id, v) do { (void)(id); (void)(v); } while(0)
6297 +#define ipipe_trace_pid(pid, prio) do { (void)(pid); (void)(prio); } while(0)
6298 +#define ipipe_trace_event(id, delay_tsc) do { (void)(id); (void)(delay_tsc); } while(0)
6299 +#define ipipe_trace_max_reset() do { } while(0)
6300 +#define ipipe_trace_froze_reset() do { } while(0)
6302 +#endif /* !CONFIG_IPIPE_TRACE */
6304 +#ifdef CONFIG_IPIPE_TRACE_PANIC
6305 +void ipipe_trace_panic_freeze(void);
6306 +void ipipe_trace_panic_dump(void);
6307 +#else
6308 +static inline void ipipe_trace_panic_freeze(void) { }
6309 +static inline void ipipe_trace_panic_dump(void) { }
6310 +#endif
6312 +#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
6313 +#define ipipe_trace_irq_entry(irq) ipipe_trace_begin(irq)
6314 +#define ipipe_trace_irq_exit(irq) ipipe_trace_end(irq)
6315 +#define ipipe_trace_irqsoff() ipipe_trace_begin(0x80000000UL)
6316 +#define ipipe_trace_irqson() ipipe_trace_end(0x80000000UL)
6317 +#else
6318 +#define ipipe_trace_irq_entry(irq) do { (void)(irq);} while(0)
6319 +#define ipipe_trace_irq_exit(irq) do { (void)(irq);} while(0)
6320 +#define ipipe_trace_irqsoff() do { } while(0)
6321 +#define ipipe_trace_irqson() do { } while(0)
6322 +#endif
6324 +#endif /* !__LINUX_IPIPE_TRACE_H */
6325 diff --git a/include/linux/irq.h b/include/linux/irq.h
6326 index c03243a..630e995 100644
6327 --- a/include/linux/irq.h
6328 +++ b/include/linux/irq.h
6329 @@ -124,6 +124,9 @@ struct irq_chip {
6330 void (*end)(unsigned int irq);
6331 int (*set_affinity)(unsigned int irq,
6332 const struct cpumask *dest);
6333 +#ifdef CONFIG_IPIPE
6334 + void (*move)(unsigned int irq);
6335 +#endif /* CONFIG_IPIPE */
6336 int (*retrigger)(unsigned int irq);
6337 int (*set_type)(unsigned int irq, unsigned int flow_type);
6338 int (*set_wake)(unsigned int irq, unsigned int on);
6339 @@ -173,6 +176,12 @@ struct irq_2_iommu;
6340 * @name: flow handler name for /proc/interrupts output
6342 struct irq_desc {
6343 +#ifdef CONFIG_IPIPE
6344 + void (*ipipe_ack)(unsigned int irq,
6345 + struct irq_desc *desc);
6346 + void (*ipipe_end)(unsigned int irq,
6347 + struct irq_desc *desc);
6348 +#endif /* CONFIG_IPIPE */
6349 unsigned int irq;
6350 struct timer_rand_state *timer_rand_state;
6351 unsigned int *kstat_irqs;
6352 @@ -347,6 +356,10 @@ extern void
6353 set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
6354 irq_flow_handler_t handle, const char *name);
6356 +extern irq_flow_handler_t
6357 +__fixup_irq_handler(struct irq_desc *desc, irq_flow_handler_t handle,
6358 + int is_chained);
6360 extern void
6361 __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
6362 const char *name);
6363 @@ -358,6 +371,7 @@ static inline void __set_irq_handler_unlocked(int irq,
6364 struct irq_desc *desc;
6366 desc = irq_to_desc(irq);
6367 + handler = __fixup_irq_handler(desc, handler, 0);
6368 desc->handle_irq = handler;
6371 diff --git a/include/linux/kernel.h b/include/linux/kernel.h
6372 index 8317ec4..2874a21 100644
6373 --- a/include/linux/kernel.h
6374 +++ b/include/linux/kernel.h
6375 @@ -16,6 +16,7 @@
6376 #include <linux/compiler.h>
6377 #include <linux/bitops.h>
6378 #include <linux/log2.h>
6379 +#include <linux/ipipe_base.h>
6380 #include <linux/typecheck.h>
6381 #include <linux/dynamic_debug.h>
6382 #include <asm/byteorder.h>
6383 @@ -130,9 +131,12 @@ struct user;
6385 #ifdef CONFIG_PREEMPT_VOLUNTARY
6386 extern int _cond_resched(void);
6387 -# define might_resched() _cond_resched()
6388 +# define might_resched() do { \
6389 + ipipe_check_context(ipipe_root_domain); \
6390 + _cond_resched(); \
6391 + } while (0)
6392 #else
6393 -# define might_resched() do { } while (0)
6394 +# define might_resched() ipipe_check_context(ipipe_root_domain)
6395 #endif
6397 #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
6398 diff --git a/include/linux/preempt.h b/include/linux/preempt.h
6399 index 2e681d9..130b7d5 100644
6400 --- a/include/linux/preempt.h
6401 +++ b/include/linux/preempt.h
6402 @@ -9,13 +9,20 @@
6403 #include <linux/thread_info.h>
6404 #include <linux/linkage.h>
6405 #include <linux/list.h>
6406 +#include <linux/ipipe_base.h>
6408 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
6409 extern void add_preempt_count(int val);
6410 extern void sub_preempt_count(int val);
6411 #else
6412 -# define add_preempt_count(val) do { preempt_count() += (val); } while (0)
6413 -# define sub_preempt_count(val) do { preempt_count() -= (val); } while (0)
6414 +# define add_preempt_count(val) do { \
6415 + ipipe_check_context(ipipe_root_domain); \
6416 + preempt_count() += (val); \
6417 + } while (0)
6418 +# define sub_preempt_count(val) do { \
6419 + ipipe_check_context(ipipe_root_domain); \
6420 + preempt_count() -= (val); \
6421 + } while (0)
6422 #endif
6424 #define inc_preempt_count() add_preempt_count(1)
6425 diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h
6426 index bc2994e..5e2da8d 100644
6427 --- a/include/linux/rwlock.h
6428 +++ b/include/linux/rwlock.h
6429 @@ -61,8 +61,8 @@ do { \
6430 #define read_trylock(lock) __cond_lock(lock, _raw_read_trylock(lock))
6431 #define write_trylock(lock) __cond_lock(lock, _raw_write_trylock(lock))
6433 -#define write_lock(lock) _raw_write_lock(lock)
6434 -#define read_lock(lock) _raw_read_lock(lock)
6435 +#define write_lock(lock) PICK_RWOP(_write_lock, lock)
6436 +#define read_lock(lock) PICK_RWOP(_read_lock, lock)
6438 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
6440 @@ -96,8 +96,8 @@ do { \
6441 #define read_lock_bh(lock) _raw_read_lock_bh(lock)
6442 #define write_lock_irq(lock) _raw_write_lock_irq(lock)
6443 #define write_lock_bh(lock) _raw_write_lock_bh(lock)
6444 -#define read_unlock(lock) _raw_read_unlock(lock)
6445 -#define write_unlock(lock) _raw_write_unlock(lock)
6446 +#define read_unlock(lock) PICK_RWOP(_read_unlock, lock)
6447 +#define write_unlock(lock) PICK_RWOP(_write_unlock, lock)
6448 #define read_unlock_irq(lock) _raw_read_unlock_irq(lock)
6449 #define write_unlock_irq(lock) _raw_write_unlock_irq(lock)
6451 diff --git a/include/linux/rwlock_api_smp.h b/include/linux/rwlock_api_smp.h
6452 index 9c9f049..62c8941 100644
6453 --- a/include/linux/rwlock_api_smp.h
6454 +++ b/include/linux/rwlock_api_smp.h
6455 @@ -141,7 +141,9 @@ static inline int __raw_write_trylock(rwlock_t *lock)
6456 * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
6457 * not re-enabled during lock-acquire (which the preempt-spin-ops do):
6459 -#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)
6460 +#if !defined(CONFIG_GENERIC_LOCKBREAK) || \
6461 + defined(CONFIG_DEBUG_LOCK_ALLOC) || \
6462 + defined(CONFIG_IPIPE)
6464 static inline void __raw_read_lock(rwlock_t *lock)
6466 diff --git a/include/linux/sched.h b/include/linux/sched.h
6467 index 5ee397e..ebcd306 100644
6468 --- a/include/linux/sched.h
6469 +++ b/include/linux/sched.h
6470 @@ -61,6 +61,7 @@ struct sched_param {
6471 #include <linux/errno.h>
6472 #include <linux/nodemask.h>
6473 #include <linux/mm_types.h>
6474 +#include <linux/ipipe.h>
6476 #include <asm/system.h>
6477 #include <asm/page.h>
6478 @@ -191,9 +192,17 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
6479 #define TASK_DEAD 64
6480 #define TASK_WAKEKILL 128
6481 #define TASK_WAKING 256
6482 +#ifdef CONFIG_IPIPE
6483 +#define TASK_ATOMICSWITCH 512
6484 +#define TASK_NOWAKEUP 1024
6485 +#define TASK_STATE_MAX 2048
6486 +#define TASK_STATE_TO_CHAR_STR "RSDTtZXxKWAN"
6487 +#else /* !CONFIG_IPIPE */
6488 +#define TASK_ATOMICSWITCH 0
6489 +#define TASK_NOWAKEUP 0
6490 #define TASK_STATE_MAX 512
6492 #define TASK_STATE_TO_CHAR_STR "RSDTtZXxKW"
6493 +#endif /* CONFIG_IPIPE */
6495 extern char ___assert_task_state[1 - 2*!!(
6496 sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)];
6497 @@ -308,6 +317,15 @@ extern void trap_init(void);
6498 extern void update_process_times(int user);
6499 extern void scheduler_tick(void);
6501 +#ifdef CONFIG_IPIPE
6502 +void update_root_process_times(struct pt_regs *regs);
6503 +#else /* !CONFIG_IPIPE */
6504 +static inline void update_root_process_times(struct pt_regs *regs)
6506 + update_process_times(user_mode(regs));
6508 +#endif /* CONFIG_IPIPE */
6510 extern void sched_show_task(struct task_struct *p);
6512 #ifdef CONFIG_DETECT_SOFTLOCKUP
6513 @@ -359,7 +377,7 @@ extern signed long schedule_timeout(signed long timeout);
6514 extern signed long schedule_timeout_interruptible(signed long timeout);
6515 extern signed long schedule_timeout_killable(signed long timeout);
6516 extern signed long schedule_timeout_uninterruptible(signed long timeout);
6517 -asmlinkage void schedule(void);
6518 +asmlinkage int schedule(void);
6519 extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner);
6521 struct nsproxy;
6522 @@ -435,6 +453,9 @@ extern int get_dumpable(struct mm_struct *mm);
6523 #endif
6524 /* leave room for more dump flags */
6525 #define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */
6526 +#ifdef CONFIG_IPIPE
6527 +#define MMF_VM_PINNED 31 /* ondemand load up and COW disabled */
6528 +#endif
6530 #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
6532 @@ -1445,6 +1466,10 @@ struct task_struct {
6533 #endif
6534 atomic_t fs_excl; /* holding fs exclusive resources */
6535 struct rcu_head rcu;
6536 +#ifdef CONFIG_IPIPE
6537 + unsigned int ipipe_flags;
6538 + void *ptd[IPIPE_ROOT_NPTDKEYS];
6539 +#endif
6542 * cache last used pipe for splice
6543 @@ -1691,6 +1716,11 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
6544 #define PF_EXITING 0x00000004 /* getting shut down */
6545 #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
6546 #define PF_VCPU 0x00000010 /* I'm a virtual CPU */
6547 +#ifdef CONFIG_IPIPE
6548 +#define PF_EVNOTIFY 0x00000020 /* Notify other domains about internal events */
6549 +#else
6550 +#define PF_EVNOTIFY 0
6551 +#endif /* CONFIG_IPIPE */
6552 #define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */
6553 #define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */
6554 #define PF_SUPERPRIV 0x00000100 /* used super-user privileges */
6555 @@ -1719,6 +1749,12 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
6556 #define PF_FREEZER_NOSIG 0x80000000 /* Freezer won't send signals to it */
6559 + * p->ipipe_flags -- care for conflict with legacy PF_EVNOTIFY in main
6560 + * flags, until it moves there.
6561 + */
6562 +#define PF_EVTRET 0x1
6565 * Only the _current_ task can read/write to tsk->flags, but other
6566 * tasks can access tsk->flags in readonly mode for example
6567 * with tsk_used_math (like during threaded core dumping).
6568 diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
6569 index f885465..6b6d70d 100644
6570 --- a/include/linux/spinlock.h
6571 +++ b/include/linux/spinlock.h
6572 @@ -88,10 +88,12 @@
6573 # include <linux/spinlock_up.h>
6574 #endif
6576 +#include <linux/ipipe_lock.h>
6578 #ifdef CONFIG_DEBUG_SPINLOCK
6579 extern void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
6580 struct lock_class_key *key);
6581 -# define raw_spin_lock_init(lock) \
6582 +# define __real_raw_spin_lock_init(lock) \
6583 do { \
6584 static struct lock_class_key __key; \
6586 @@ -99,9 +101,10 @@ do { \
6587 } while (0)
6589 #else
6590 -# define raw_spin_lock_init(lock) \
6591 +# define __real_raw_spin_lock_init(lock) \
6592 do { *(lock) = __RAW_SPIN_LOCK_UNLOCKED(lock); } while (0)
6593 #endif
6594 +#define raw_spin_lock_init(lock) PICK_SPINOP(_lock_init, lock)
6596 #define raw_spin_is_locked(lock) arch_spin_is_locked(&(lock)->raw_lock)
6598 @@ -164,9 +167,11 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
6599 * various methods are defined as nops in the case they are not
6600 * required.
6602 -#define raw_spin_trylock(lock) __cond_lock(lock, _raw_spin_trylock(lock))
6603 +#define __real_raw_spin_trylock(lock) __cond_lock(lock, _raw_spin_trylock(lock))
6604 +#define raw_spin_trylock(lock) PICK_SPINOP_RET(_trylock, lock, int)
6606 -#define raw_spin_lock(lock) _raw_spin_lock(lock)
6607 +#define __real_raw_spin_lock(lock) _raw_spin_lock(lock)
6608 +#define raw_spin_lock(lock) PICK_SPINOP(_lock, lock)
6610 #ifdef CONFIG_DEBUG_LOCK_ALLOC
6611 # define raw_spin_lock_nested(lock, subclass) \
6612 @@ -184,7 +189,7 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
6614 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
6616 -#define raw_spin_lock_irqsave(lock, flags) \
6617 +#define __real_raw_spin_lock_irqsave(lock, flags) \
6618 do { \
6619 typecheck(unsigned long, flags); \
6620 flags = _raw_spin_lock_irqsave(lock); \
6621 @@ -206,7 +211,7 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
6623 #else
6625 -#define raw_spin_lock_irqsave(lock, flags) \
6626 +#define __real_raw_spin_lock_irqsave(lock, flags) \
6627 do { \
6628 typecheck(unsigned long, flags); \
6629 _raw_spin_lock_irqsave(lock, flags); \
6630 @@ -217,34 +222,46 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
6632 #endif
6634 -#define raw_spin_lock_irq(lock) _raw_spin_lock_irq(lock)
6635 +#define raw_spin_lock_irqsave(lock, flags) \
6636 + PICK_SPINLOCK_IRQSAVE(lock, flags)
6638 +#define __real_raw_spin_lock_irq(lock) _raw_spin_lock_irq(lock)
6639 +#define raw_spin_lock_irq(lock) PICK_SPINOP(_lock_irq, lock)
6640 #define raw_spin_lock_bh(lock) _raw_spin_lock_bh(lock)
6641 -#define raw_spin_unlock(lock) _raw_spin_unlock(lock)
6642 -#define raw_spin_unlock_irq(lock) _raw_spin_unlock_irq(lock)
6643 +#define __real_raw_spin_unlock(lock) _raw_spin_unlock(lock)
6644 +#define raw_spin_unlock(lock) PICK_SPINOP(_unlock, lock)
6645 +#define __real_raw_spin_unlock_irq(lock) _raw_spin_unlock_irq(lock)
6646 +#define raw_spin_unlock_irq(lock) PICK_SPINOP(_unlock_irq, lock)
6648 -#define raw_spin_unlock_irqrestore(lock, flags) \
6649 +#define __real_raw_spin_unlock_irqrestore(lock, flags) \
6650 do { \
6651 typecheck(unsigned long, flags); \
6652 _raw_spin_unlock_irqrestore(lock, flags); \
6653 } while (0)
6654 +#define raw_spin_unlock_irqrestore(lock, flags) \
6655 + PICK_SPINUNLOCK_IRQRESTORE(lock, flags)
6657 #define raw_spin_unlock_bh(lock) _raw_spin_unlock_bh(lock)
6659 #define raw_spin_trylock_bh(lock) \
6660 __cond_lock(lock, _raw_spin_trylock_bh(lock))
6662 -#define raw_spin_trylock_irq(lock) \
6663 +#define __real_raw_spin_trylock_irq(lock) \
6664 ({ \
6665 local_irq_disable(); \
6666 - raw_spin_trylock(lock) ? \
6667 + __real_raw_spin_trylock(lock) ? \
6668 1 : ({ local_irq_enable(); 0; }); \
6670 +#define raw_spin_trylock_irq(lock) PICK_SPINTRYLOCK_IRQ(lock)
6672 -#define raw_spin_trylock_irqsave(lock, flags) \
6673 +#define __real_raw_spin_trylock_irqsave(lock, flags) \
6674 ({ \
6675 local_irq_save(flags); \
6676 raw_spin_trylock(lock) ? \
6677 1 : ({ local_irq_restore(flags); 0; }); \
6679 +#define raw_spin_trylock_irqsave(lock, flags) \
6680 + PICK_SPINTRYLOCK_IRQSAVE(lock, flags)
6683 * raw_spin_can_lock - would raw_spin_trylock() succeed?
6684 @@ -275,24 +292,17 @@ static inline raw_spinlock_t *spinlock_check(spinlock_t *lock)
6686 #define spin_lock_init(_lock) \
6687 do { \
6688 - spinlock_check(_lock); \
6689 - raw_spin_lock_init(&(_lock)->rlock); \
6690 + raw_spin_lock_init(_lock); \
6691 } while (0)
6693 -static inline void spin_lock(spinlock_t *lock)
6695 - raw_spin_lock(&lock->rlock);
6697 +#define spin_lock(lock) raw_spin_lock(lock)
6699 static inline void spin_lock_bh(spinlock_t *lock)
6701 raw_spin_lock_bh(&lock->rlock);
6704 -static inline int spin_trylock(spinlock_t *lock)
6706 - return raw_spin_trylock(&lock->rlock);
6708 +#define spin_trylock(lock) raw_spin_trylock(lock)
6710 #define spin_lock_nested(lock, subclass) \
6711 do { \
6712 @@ -304,14 +314,11 @@ do { \
6713 raw_spin_lock_nest_lock(spinlock_check(lock), nest_lock); \
6714 } while (0)
6716 -static inline void spin_lock_irq(spinlock_t *lock)
6718 - raw_spin_lock_irq(&lock->rlock);
6720 +#define spin_lock_irq(lock) raw_spin_lock_irq(lock)
6722 #define spin_lock_irqsave(lock, flags) \
6723 do { \
6724 - raw_spin_lock_irqsave(spinlock_check(lock), flags); \
6725 + raw_spin_lock_irqsave(lock, flags); \
6726 } while (0)
6728 #define spin_lock_irqsave_nested(lock, flags, subclass) \
6729 @@ -319,39 +326,28 @@ do { \
6730 raw_spin_lock_irqsave_nested(spinlock_check(lock), flags, subclass); \
6731 } while (0)
6733 -static inline void spin_unlock(spinlock_t *lock)
6735 - raw_spin_unlock(&lock->rlock);
6737 +#define spin_unlock(lock) raw_spin_unlock(lock)
6739 static inline void spin_unlock_bh(spinlock_t *lock)
6741 raw_spin_unlock_bh(&lock->rlock);
6744 -static inline void spin_unlock_irq(spinlock_t *lock)
6746 - raw_spin_unlock_irq(&lock->rlock);
6748 +#define spin_unlock_irq(lock) raw_spin_unlock_irq(lock)
6750 -static inline void spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
6752 - raw_spin_unlock_irqrestore(&lock->rlock, flags);
6754 +#define spin_unlock_irqrestore(lock, flags) \
6755 + raw_spin_unlock_irqrestore(lock, flags)
6757 static inline int spin_trylock_bh(spinlock_t *lock)
6759 return raw_spin_trylock_bh(&lock->rlock);
6762 -static inline int spin_trylock_irq(spinlock_t *lock)
6764 - return raw_spin_trylock_irq(&lock->rlock);
6766 +#define spin_trylock_irq(lock) raw_spin_trylock_irq(lock)
6768 #define spin_trylock_irqsave(lock, flags) \
6769 ({ \
6770 - raw_spin_trylock_irqsave(spinlock_check(lock), flags); \
6771 + raw_spin_trylock_irqsave(lock, flags); \
6774 static inline void spin_unlock_wait(spinlock_t *lock)
6775 diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
6776 index e253ccd..378e01e 100644
6777 --- a/include/linux/spinlock_api_smp.h
6778 +++ b/include/linux/spinlock_api_smp.h
6779 @@ -99,7 +99,9 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock)
6780 * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
6781 * not re-enabled during lock-acquire (which the preempt-spin-ops do):
6783 -#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)
6784 +#if !defined(CONFIG_GENERIC_LOCKBREAK) || \
6785 + defined(CONFIG_DEBUG_LOCK_ALLOC) || \
6786 + defined(CONFIG_IPIPE)
6788 static inline unsigned long __raw_spin_lock_irqsave(raw_spinlock_t *lock)
6790 @@ -113,7 +115,7 @@ static inline unsigned long __raw_spin_lock_irqsave(raw_spinlock_t *lock)
6791 * do_raw_spin_lock_flags() code, because lockdep assumes
6792 * that interrupts are not re-enabled during lock-acquire:
6794 -#ifdef CONFIG_LOCKDEP
6795 +#if defined(CONFIG_LOCKDEP) || defined(CONFIG_IPIPE)
6796 LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
6797 #else
6798 do_raw_spin_lock_flags(lock, &flags);
6799 diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h
6800 index b14f6a9..e400972 100644
6801 --- a/include/linux/spinlock_up.h
6802 +++ b/include/linux/spinlock_up.h
6803 @@ -49,13 +49,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
6805 * Read-write spinlocks. No debug version.
6807 -#define arch_read_lock(lock) do { (void)(lock); } while (0)
6808 -#define arch_write_lock(lock) do { (void)(lock); } while (0)
6809 -#define arch_read_trylock(lock) ({ (void)(lock); 1; })
6810 -#define arch_write_trylock(lock) ({ (void)(lock); 1; })
6811 -#define arch_read_unlock(lock) do { (void)(lock); } while (0)
6812 -#define arch_write_unlock(lock) do { (void)(lock); } while (0)
6814 #else /* DEBUG_SPINLOCK */
6815 #define arch_spin_is_locked(lock) ((void)(lock), 0)
6816 /* for sched.c and kernel_lock.c: */
6817 @@ -65,6 +58,13 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
6818 # define arch_spin_trylock(lock) ({ (void)(lock); 1; })
6819 #endif /* DEBUG_SPINLOCK */
6821 +#define arch_read_lock(lock) do { (void)(lock); } while (0)
6822 +#define arch_write_lock(lock) do { (void)(lock); } while (0)
6823 +#define arch_read_trylock(lock) ({ (void)(lock); 1; })
6824 +#define arch_write_trylock(lock) ({ (void)(lock); 1; })
6825 +#define arch_read_unlock(lock) do { (void)(lock); } while (0)
6826 +#define arch_write_unlock(lock) do { (void)(lock); } while (0)
6828 #define arch_spin_is_contended(lock) (((void)(lock), 0))
6830 #define arch_read_can_lock(lock) (((void)(lock), 1))
6831 diff --git a/init/Kconfig b/init/Kconfig
6832 index 5cff9a9..6ce2285 100644
6833 --- a/init/Kconfig
6834 +++ b/init/Kconfig
6835 @@ -86,6 +86,7 @@ config CROSS_COMPILE
6837 config LOCALVERSION
6838 string "Local version - append to kernel release"
6839 + default "-ipipe"
6840 help
6841 Append an extra string to the end of your kernel version.
6842 This will show up when you type uname, for example.
6843 diff --git a/init/main.c b/init/main.c
6844 index a42fdf4..149d8ea 100644
6845 --- a/init/main.c
6846 +++ b/init/main.c
6847 @@ -557,7 +557,7 @@ asmlinkage void __init start_kernel(void)
6849 cgroup_init_early();
6851 - local_irq_disable();
6852 + local_irq_disable_hw();
6853 early_boot_irqs_off();
6854 early_init_irq_lock_class();
6856 @@ -592,6 +592,7 @@ asmlinkage void __init start_kernel(void)
6857 pidhash_init();
6858 vfs_caches_init_early();
6859 sort_main_extable();
6860 + ipipe_init_early();
6861 trap_init();
6862 mm_init();
6864 @@ -621,6 +622,11 @@ asmlinkage void __init start_kernel(void)
6865 softirq_init();
6866 timekeeping_init();
6867 time_init();
6868 + /*
6869 + * We need to wait for the interrupt and time subsystems to be
6870 + * initialized before enabling the pipeline.
6871 + */
6872 + ipipe_init();
6873 profile_init();
6874 if (!irqs_disabled())
6875 printk(KERN_CRIT "start_kernel(): bug: interrupts were "
6876 @@ -802,6 +808,7 @@ static void __init do_basic_setup(void)
6877 init_tmpfs();
6878 driver_init();
6879 init_irq_proc();
6880 + ipipe_init_proc();
6881 do_ctors();
6882 do_initcalls();
6884 diff --git a/kernel/Makefile b/kernel/Makefile
6885 index 057472f..c456c33 100644
6886 --- a/kernel/Makefile
6887 +++ b/kernel/Makefile
6888 @@ -86,6 +86,7 @@ obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o
6889 obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o
6890 obj-$(CONFIG_TINY_RCU) += rcutiny.o
6891 obj-$(CONFIG_RELAY) += relay.o
6892 +obj-$(CONFIG_IPIPE) += ipipe/
6893 obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
6894 obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
6895 obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
6896 diff --git a/kernel/exit.c b/kernel/exit.c
6897 index ac90425..a491bc3 100644
6898 --- a/kernel/exit.c
6899 +++ b/kernel/exit.c
6900 @@ -966,6 +966,7 @@ NORET_TYPE void do_exit(long code)
6901 acct_process();
6902 trace_sched_process_exit(tsk);
6904 + ipipe_exit_notify(tsk);
6905 exit_sem(tsk);
6906 exit_files(tsk);
6907 exit_fs(tsk);
6908 @@ -1773,3 +1774,37 @@ SYSCALL_DEFINE3(waitpid, pid_t, pid, int
6911 #endif
6913 +void rt_daemonize(void)
6915 + sigset_t blocked;
6917 + /*
6918 + * We don't want to have TIF_FREEZE set if the system-wide hibernation
6919 + * or suspend transition begins right now.
6920 + */
6921 + current->flags |= (PF_NOFREEZE | PF_KTHREAD);
6923 + if (current->nsproxy != &init_nsproxy) {
6924 + get_nsproxy(&init_nsproxy);
6925 + switch_task_namespaces(current, &init_nsproxy);
6927 + set_special_pids(&init_struct_pid);
6928 + proc_clear_tty(current);
6930 + /* Block and flush all signals */
6931 + sigfillset(&blocked);
6932 + sigprocmask(SIG_BLOCK, &blocked, NULL);
6933 + flush_signals(current);
6935 + /* Become as one with the init task */
6937 + daemonize_fs_struct();
6938 + exit_files(current);
6939 + current->files = init_task.files;
6940 + atomic_inc(&current->files->count);
6942 + reparent_to_kthreadd();
6945 +EXPORT_SYMBOL(rt_daemonize);
6946 diff --git a/kernel/fork.c b/kernel/fork.c
6947 index e96c0cd..9be0e42 100644
6948 --- a/kernel/fork.c
6949 +++ b/kernel/fork.c
6950 @@ -540,6 +540,7 @@ void mmput(struct mm_struct *mm)
6951 exit_aio(mm);
6952 ksm_exit(mm);
6953 exit_mmap(mm);
6954 + ipipe_cleanup_notify(mm);
6955 set_mm_exe_file(mm, NULL);
6956 if (!list_empty(&mm->mmlist)) {
6957 spin_lock(&mmlist_lock);
6958 @@ -910,7 +911,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
6960 unsigned long new_flags = p->flags;
6962 - new_flags &= ~PF_SUPERPRIV;
6963 + new_flags &= ~(PF_SUPERPRIV | PF_EVNOTIFY);
6964 new_flags |= PF_FORKNOEXEC;
6965 new_flags |= PF_STARTING;
6966 p->flags = new_flags;
6967 @@ -1286,6 +1287,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
6968 write_unlock_irq(&tasklist_lock);
6969 proc_fork_connector(p);
6970 cgroup_post_fork(p);
6971 +#ifdef CONFIG_IPIPE
6972 + p->ipipe_flags = 0;
6973 + memset(p->ptd, 0, sizeof(p->ptd));
6974 +#endif /* CONFIG_IPIPE */
6975 perf_event_fork(p);
6976 return p;
6978 @@ -1688,11 +1693,14 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
6981 if (new_mm) {
6982 + unsigned long flags;
6983 mm = current->mm;
6984 active_mm = current->active_mm;
6985 current->mm = new_mm;
6986 + ipipe_mm_switch_protect(flags);
6987 current->active_mm = new_mm;
6988 activate_mm(active_mm, new_mm);
6989 + ipipe_mm_switch_unprotect(flags);
6990 new_mm = mm;
6993 diff --git a/kernel/ipipe/Kconfig b/kernel/ipipe/Kconfig
6994 new file mode 100644
6995 index 0000000..693a7d2
6996 --- /dev/null
6997 +++ b/kernel/ipipe/Kconfig
6998 @@ -0,0 +1,26 @@
6999 +config IPIPE
7000 + bool "Interrupt pipeline"
7001 + default y
7002 + ---help---
7003 + Activate this option if you want the interrupt pipeline to be
7004 + compiled in.
7006 +config IPIPE_DOMAINS
7007 + int "Max domains"
7008 + depends on IPIPE
7009 + default 4
7010 + ---help---
7011 + The maximum number of I-pipe domains to run concurrently.
7013 +config IPIPE_DELAYED_ATOMICSW
7014 + bool
7015 + depends on IPIPE
7016 + default n
7018 +config IPIPE_UNMASKED_CONTEXT_SWITCH
7019 + bool
7020 + depends on IPIPE
7021 + default n
7023 +config HAVE_IPIPE_HOSTRT
7024 + bool
7025 diff --git a/kernel/ipipe/Kconfig.debug b/kernel/ipipe/Kconfig.debug
7026 new file mode 100644
7027 index 0000000..32160fc
7028 --- /dev/null
7029 +++ b/kernel/ipipe/Kconfig.debug
7030 @@ -0,0 +1,95 @@
7031 +config IPIPE_DEBUG
7032 + bool "I-pipe debugging"
7033 + depends on IPIPE
7035 +config IPIPE_DEBUG_CONTEXT
7036 + bool "Check for illicit cross-domain calls"
7037 + depends on IPIPE_DEBUG
7038 + default y
7039 + ---help---
7040 + Enable this feature to arm checkpoints in the kernel that
7041 + verify the correct invocation context. On entry of critical
7042 + Linux services a warning is issued if the caller is not
7043 + running over the root domain.
7045 +config IPIPE_DEBUG_INTERNAL
7046 + bool "Enable internal debug checks"
7047 + depends on IPIPE_DEBUG
7048 + default y
7049 + ---help---
7050 + When this feature is enabled, I-pipe will perform internal
7051 + consistency checks of its subsystems, e.g. on per-cpu variable
7052 + access.
7054 +config IPIPE_TRACE
7055 + bool "Latency tracing"
7056 + depends on IPIPE_DEBUG
7057 + select FRAME_POINTER
7058 + select KALLSYMS
7059 + select PROC_FS
7060 + ---help---
7061 + Activate this option if you want to use per-function tracing of
7062 + the kernel. The tracer will collect data via instrumentation
7063 + features like the one below or with the help of explicite calls
7064 + of ipipe_trace_xxx(). See include/linux/ipipe_trace.h for the
7065 + in-kernel tracing API. The collected data and runtime control
7066 + is available via /proc/ipipe/trace/*.
7068 +if IPIPE_TRACE
7070 +config IPIPE_TRACE_ENABLE
7071 + bool "Enable tracing on boot"
7072 + default y
7073 + ---help---
7074 + Disable this option if you want to arm the tracer after booting
7075 + manually ("echo 1 > /proc/ipipe/tracer/enable"). This can reduce
7076 + boot time on slow embedded devices due to the tracer overhead.
7078 +config IPIPE_TRACE_MCOUNT
7079 + bool "Instrument function entries"
7080 + default y
7081 + select FUNCTION_TRACER
7082 + select TRACING
7083 + select CONTEXT_SWITCH_TRACER
7084 + ---help---
7085 + When enabled, records every kernel function entry in the tracer
7086 + log. While this slows down the system noticeably, it provides
7087 + the highest level of information about the flow of events.
7088 + However, it can be switch off in order to record only explicit
7089 + I-pipe trace points.
7091 +config IPIPE_TRACE_IRQSOFF
7092 + bool "Trace IRQs-off times"
7093 + default y
7094 + ---help---
7095 + Activate this option if I-pipe shall trace the longest path
7096 + with hard-IRQs switched off.
7098 +config IPIPE_TRACE_SHIFT
7099 + int "Depth of trace log (14 => 16Kpoints, 15 => 32Kpoints)"
7100 + range 10 18
7101 + default 14
7102 + ---help---
7103 + The number of trace points to hold tracing data for each
7104 + trace path, as a power of 2.
7106 +config IPIPE_TRACE_VMALLOC
7107 + bool "Use vmalloc'ed trace buffer"
7108 + default y if EMBEDDED
7109 + ---help---
7110 + Instead of reserving static kernel data, the required buffer
7111 + is allocated via vmalloc during boot-up when this option is
7112 + enabled. This can help to start systems that are low on memory,
7113 + but it slightly degrades overall performance. Try this option
7114 + when a traced kernel hangs unexpectedly at boot time.
7116 +config IPIPE_TRACE_PANIC
7117 + bool "Enable panic back traces"
7118 + default y
7119 + ---help---
7120 + Provides services to freeze and dump a back trace on panic
7121 + situations. This is used on IPIPE_DEBUG_CONTEXT exceptions
7122 + as well as ordinary kernel oopses. You can control the number
7123 + of printed back trace points via /proc/ipipe/trace.
7125 +endif
7126 diff --git a/kernel/ipipe/Makefile b/kernel/ipipe/Makefile
7127 new file mode 100644
7128 index 0000000..6257dfa
7129 --- /dev/null
7130 +++ b/kernel/ipipe/Makefile
7131 @@ -0,0 +1,3 @@
7133 +obj-$(CONFIG_IPIPE) += core.o
7134 +obj-$(CONFIG_IPIPE_TRACE) += tracer.o
7135 diff --git a/kernel/ipipe/core.c b/kernel/ipipe/core.c
7136 new file mode 100644
7137 index 0000000..7f1df1f
7138 --- /dev/null
7139 +++ b/kernel/ipipe/core.c
7140 @@ -0,0 +1,2150 @@
7141 +/* -*- linux-c -*-
7142 + * linux/kernel/ipipe/core.c
7144 + * Copyright (C) 2002-2005 Philippe Gerum.
7146 + * This program is free software; you can redistribute it and/or modify
7147 + * it under the terms of the GNU General Public License as published by
7148 + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
7149 + * USA; either version 2 of the License, or (at your option) any later
7150 + * version.
7152 + * This program is distributed in the hope that it will be useful,
7153 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
7154 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7155 + * GNU General Public License for more details.
7157 + * You should have received a copy of the GNU General Public License
7158 + * along with this program; if not, write to the Free Software
7159 + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
7161 + * Architecture-independent I-PIPE core support.
7162 + */
7164 +#include <linux/version.h>
7165 +#include <linux/module.h>
7166 +#include <linux/init.h>
7167 +#include <linux/kernel.h>
7168 +#include <linux/sched.h>
7169 +#include <linux/sched.h>
7170 +#include <linux/kallsyms.h>
7171 +#include <linux/interrupt.h>
7172 +#include <linux/bitops.h>
7173 +#include <linux/tick.h>
7174 +#ifdef CONFIG_PROC_FS
7175 +#include <linux/proc_fs.h>
7176 +#include <linux/seq_file.h>
7177 +#endif /* CONFIG_PROC_FS */
7178 +#include <linux/ipipe_trace.h>
7179 +#include <linux/ipipe_tickdev.h>
7180 +#include <linux/irq.h>
7182 +static int __ipipe_ptd_key_count;
7184 +static unsigned long __ipipe_ptd_key_map;
7186 +static unsigned long __ipipe_domain_slot_map;
7188 +struct ipipe_domain ipipe_root;
7190 +#ifdef CONFIG_SMP
7192 +#define IPIPE_CRITICAL_TIMEOUT 1000000
7194 +static cpumask_t __ipipe_cpu_sync_map;
7196 +static cpumask_t __ipipe_cpu_lock_map;
7198 +static cpumask_t __ipipe_cpu_pass_map;
7200 +static unsigned long __ipipe_critical_lock;
7202 +static IPIPE_DEFINE_SPINLOCK(__ipipe_cpu_barrier);
7204 +static atomic_t __ipipe_critical_count = ATOMIC_INIT(0);
7206 +static void (*__ipipe_cpu_sync) (void);
7208 +#else /* !CONFIG_SMP */
7211 + * Create an alias to the unique root status, so that arch-dep code
7212 + * may get simple and easy access to this percpu variable. We also
7213 + * create an array of pointers to the percpu domain data; this tends
7214 + * to produce a better code when reaching non-root domains. We make
7215 + * sure that the early boot code would be able to dereference the
7216 + * pointer to the root domain data safely by statically initializing
7217 + * its value (local_irq*() routines depend on this).
7218 + */
7219 +#if __GNUC__ >= 4
7220 +extern unsigned long __ipipe_root_status
7221 +__attribute__((alias(__stringify(ipipe_percpu_darray))));
7222 +EXPORT_SYMBOL(__ipipe_root_status);
7223 +#else /* __GNUC__ < 4 */
7225 + * Work around a GCC 3.x issue making alias symbols unusable as
7226 + * constant initializers.
7227 + */
7228 +unsigned long *const __ipipe_root_status_addr =
7229 + &__raw_get_cpu_var(ipipe_percpu_darray)[IPIPE_ROOT_SLOT].status;
7230 +EXPORT_SYMBOL(__ipipe_root_status_addr);
7231 +#endif /* __GNUC__ < 4 */
7233 +DEFINE_PER_CPU(struct ipipe_percpu_domain_data *, ipipe_percpu_daddr[CONFIG_IPIPE_DOMAINS]) =
7234 +{ [IPIPE_ROOT_SLOT] = (struct ipipe_percpu_domain_data *)&__raw_get_cpu_var(ipipe_percpu_darray) };
7235 +EXPORT_PER_CPU_SYMBOL(ipipe_percpu_daddr);
7236 +#endif /* !CONFIG_SMP */
7238 +DEFINE_PER_CPU(struct ipipe_percpu_domain_data, ipipe_percpu_darray[CONFIG_IPIPE_DOMAINS]) =
7239 +{ [IPIPE_ROOT_SLOT] = { .status = IPIPE_STALL_MASK } }; /* Root domain stalled on each CPU at startup. */
7241 +DEFINE_PER_CPU(struct ipipe_domain *, ipipe_percpu_domain) = { &ipipe_root };
7243 +DEFINE_PER_CPU(unsigned long, ipipe_nmi_saved_root); /* Copy of root status during NMI */
7245 +static IPIPE_DEFINE_SPINLOCK(__ipipe_pipelock);
7247 +LIST_HEAD(__ipipe_pipeline);
7249 +unsigned long __ipipe_virtual_irq_map;
7251 +#ifdef CONFIG_PRINTK
7252 +unsigned __ipipe_printk_virq;
7253 +#endif /* CONFIG_PRINTK */
7255 +int __ipipe_event_monitors[IPIPE_NR_EVENTS];
7257 +DEFINE_PER_CPU(ipipe_root_preempt_handler_t, __ipipe_root_preempt_handler);
7258 +EXPORT_PER_CPU_SYMBOL_GPL(__ipipe_root_preempt_handler);
7260 +DEFINE_PER_CPU(void *, __ipipe_root_preempt_cookie);
7261 +EXPORT_PER_CPU_SYMBOL_GPL(__ipipe_root_preempt_cookie);
7263 +#ifdef CONFIG_GENERIC_CLOCKEVENTS
7265 +DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
7267 +static DEFINE_PER_CPU(struct ipipe_tick_device, ipipe_tick_cpu_device);
7269 +int ipipe_request_tickdev(const char *devname,
7270 + void (*emumode)(enum clock_event_mode mode,
7271 + struct clock_event_device *cdev),
7272 + int (*emutick)(unsigned long delta,
7273 + struct clock_event_device *cdev),
7274 + int cpu, unsigned long *tmfreq)
7276 + struct ipipe_tick_device *itd;
7277 + struct tick_device *slave;
7278 + struct clock_event_device *evtdev;
7279 + unsigned long long freq;
7280 + unsigned long flags;
7281 + int status;
7283 + flags = ipipe_critical_enter(NULL);
7285 + itd = &per_cpu(ipipe_tick_cpu_device, cpu);
7287 + if (itd->slave != NULL) {
7288 + status = -EBUSY;
7289 + goto out;
7292 + slave = &per_cpu(tick_cpu_device, cpu);
7294 + if (strcmp(slave->evtdev->name, devname)) {
7295 + /*
7296 + * No conflict so far with the current tick device,
7297 + * check whether the requested device is sane and has
7298 + * been blessed by the kernel.
7299 + */
7300 + status = __ipipe_check_tickdev(devname) ?
7301 + CLOCK_EVT_MODE_UNUSED : CLOCK_EVT_MODE_SHUTDOWN;
7302 + goto out;
7305 + /*
7306 + * Our caller asks for using the same clock event device for
7307 + * ticking than we do, let's create a tick emulation device to
7308 + * interpose on the set_next_event() method, so that we may
7309 + * both manage the device in oneshot mode. Only the tick
7310 + * emulation code will actually program the clockchip hardware
7311 + * for the next shot, though.
7313 + * CAUTION: we still have to grab the tick device even when it
7314 + * current runs in periodic mode, since the kernel may switch
7315 + * to oneshot dynamically (highres/no_hz tick mode).
7316 + */
7318 + evtdev = slave->evtdev;
7319 + status = evtdev->mode;
7321 + if (status == CLOCK_EVT_MODE_SHUTDOWN)
7322 + goto out;
7324 + itd->slave = slave;
7325 + itd->emul_set_mode = emumode;
7326 + itd->emul_set_tick = emutick;
7327 + itd->real_set_mode = evtdev->set_mode;
7328 + itd->real_set_tick = evtdev->set_next_event;
7329 + itd->real_max_delta_ns = evtdev->max_delta_ns;
7330 + itd->real_mult = evtdev->mult;
7331 + itd->real_shift = evtdev->shift;
7332 + freq = (1000000000ULL * evtdev->mult) >> evtdev->shift;
7333 + *tmfreq = (unsigned long)freq;
7334 + evtdev->set_mode = emumode;
7335 + evtdev->set_next_event = emutick;
7336 + evtdev->max_delta_ns = ULONG_MAX;
7337 + evtdev->mult = 1;
7338 + evtdev->shift = 0;
7339 +out:
7340 + ipipe_critical_exit(flags);
7342 + return status;
7345 +void ipipe_release_tickdev(int cpu)
7347 + struct ipipe_tick_device *itd;
7348 + struct tick_device *slave;
7349 + struct clock_event_device *evtdev;
7350 + unsigned long flags;
7352 + flags = ipipe_critical_enter(NULL);
7354 + itd = &per_cpu(ipipe_tick_cpu_device, cpu);
7356 + if (itd->slave != NULL) {
7357 + slave = &per_cpu(tick_cpu_device, cpu);
7358 + evtdev = slave->evtdev;
7359 + evtdev->set_mode = itd->real_set_mode;
7360 + evtdev->set_next_event = itd->real_set_tick;
7361 + evtdev->max_delta_ns = itd->real_max_delta_ns;
7362 + evtdev->mult = itd->real_mult;
7363 + evtdev->shift = itd->real_shift;
7364 + itd->slave = NULL;
7367 + ipipe_critical_exit(flags);
7370 +#endif /* CONFIG_GENERIC_CLOCKEVENTS */
7372 +void __init ipipe_init_early(void)
7374 + struct ipipe_domain *ipd = &ipipe_root;
7376 + /*
7377 + * Do the early init stuff. At this point, the kernel does not
7378 + * provide much services yet: be careful.
7379 + */
7380 + __ipipe_check_platform(); /* Do platform dependent checks first. */
7382 + /*
7383 + * A lightweight registration code for the root domain. We are
7384 + * running on the boot CPU, hw interrupts are off, and
7385 + * secondary CPUs are still lost in space.
7386 + */
7388 + /* Reserve percpu data slot #0 for the root domain. */
7389 + ipd->slot = 0;
7390 + set_bit(0, &__ipipe_domain_slot_map);
7392 + ipd->name = "Linux";
7393 + ipd->domid = IPIPE_ROOT_ID;
7394 + ipd->priority = IPIPE_ROOT_PRIO;
7396 + __ipipe_init_stage(ipd);
7398 + list_add_tail(&ipd->p_link, &__ipipe_pipeline);
7400 + __ipipe_init_platform();
7402 +#ifdef CONFIG_PRINTK
7403 + __ipipe_printk_virq = ipipe_alloc_virq(); /* Cannot fail here. */
7404 + ipd->irqs[__ipipe_printk_virq].handler = &__ipipe_flush_printk;
7405 + ipd->irqs[__ipipe_printk_virq].cookie = NULL;
7406 + ipd->irqs[__ipipe_printk_virq].acknowledge = NULL;
7407 + ipd->irqs[__ipipe_printk_virq].control = IPIPE_HANDLE_MASK;
7408 +#endif /* CONFIG_PRINTK */
7411 +void __init ipipe_init(void)
7413 + /* Now we may engage the pipeline. */
7414 + __ipipe_enable_pipeline();
7416 + printk(KERN_INFO "I-pipe %s: pipeline enabled.\n",
7417 + IPIPE_VERSION_STRING);
7420 +void __ipipe_init_stage(struct ipipe_domain *ipd)
7422 + struct ipipe_percpu_domain_data *p;
7423 + unsigned long status;
7424 + int cpu, n;
7426 + for_each_online_cpu(cpu) {
7427 + p = ipipe_percpudom_ptr(ipd, cpu);
7428 + status = p->status;
7429 + memset(p, 0, sizeof(*p));
7430 + p->status = status;
7433 + for (n = 0; n < IPIPE_NR_IRQS; n++) {
7434 + ipd->irqs[n].acknowledge = NULL;
7435 + ipd->irqs[n].handler = NULL;
7436 + ipd->irqs[n].control = IPIPE_PASS_MASK; /* Pass but don't handle */
7439 + for (n = 0; n < IPIPE_NR_EVENTS; n++)
7440 + ipd->evhand[n] = NULL;
7442 + ipd->evself = 0LL;
7443 + mutex_init(&ipd->mutex);
7445 + __ipipe_hook_critical_ipi(ipd);
7448 +void __ipipe_cleanup_domain(struct ipipe_domain *ipd)
7450 + ipipe_unstall_pipeline_from(ipd);
7452 +#ifdef CONFIG_SMP
7454 + struct ipipe_percpu_domain_data *p;
7455 + int cpu;
7457 + for_each_online_cpu(cpu) {
7458 + p = ipipe_percpudom_ptr(ipd, cpu);
7459 + while (__ipipe_ipending_p(p))
7460 + cpu_relax();
7463 +#else
7464 + __raw_get_cpu_var(ipipe_percpu_daddr)[ipd->slot] = NULL;
7465 +#endif
7467 + clear_bit(ipd->slot, &__ipipe_domain_slot_map);
7470 +void __ipipe_unstall_root(void)
7472 + struct ipipe_percpu_domain_data *p;
7474 + local_irq_disable_hw();
7476 + /* This helps catching bad usage from assembly call sites. */
7477 + ipipe_check_context(ipipe_root_domain);
7479 + p = ipipe_root_cpudom_ptr();
7481 + __clear_bit(IPIPE_STALL_FLAG, &p->status);
7483 + if (unlikely(__ipipe_ipending_p(p)))
7484 + __ipipe_sync_pipeline();
7486 + local_irq_enable_hw();
7489 +void __ipipe_restore_root(unsigned long x)
7491 + ipipe_check_context(ipipe_root_domain);
7493 + if (x)
7494 + __ipipe_stall_root();
7495 + else
7496 + __ipipe_unstall_root();
7499 +void ipipe_stall_pipeline_from(struct ipipe_domain *ipd)
7501 + unsigned long flags;
7502 + /*
7503 + * We have to prevent against race on updating the status
7504 + * variable _and_ CPU migration at the same time, so disable
7505 + * hw IRQs here.
7506 + */
7507 + local_irq_save_hw(flags);
7509 + __set_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status));
7511 + if (!__ipipe_pipeline_head_p(ipd))
7512 + local_irq_restore_hw(flags);
7515 +unsigned long ipipe_test_and_stall_pipeline_from(struct ipipe_domain *ipd)
7517 + unsigned long flags, x;
7519 + /* See ipipe_stall_pipeline_from() */
7520 + local_irq_save_hw(flags);
7522 + x = __test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status));
7524 + if (!__ipipe_pipeline_head_p(ipd))
7525 + local_irq_restore_hw(flags);
7527 + return x;
7530 +unsigned long ipipe_test_and_unstall_pipeline_from(struct ipipe_domain *ipd)
7532 + unsigned long flags, x;
7533 + struct list_head *pos;
7535 + local_irq_save_hw(flags);
7537 + x = __test_and_clear_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status));
7539 + if (ipd == __ipipe_current_domain)
7540 + pos = &ipd->p_link;
7541 + else
7542 + pos = __ipipe_pipeline.next;
7544 + __ipipe_walk_pipeline(pos);
7546 + if (likely(__ipipe_pipeline_head_p(ipd)))
7547 + local_irq_enable_hw();
7548 + else
7549 + local_irq_restore_hw(flags);
7551 + return x;
7554 +void ipipe_restore_pipeline_from(struct ipipe_domain *ipd,
7555 + unsigned long x)
7557 + if (x)
7558 + ipipe_stall_pipeline_from(ipd);
7559 + else
7560 + ipipe_unstall_pipeline_from(ipd);
7563 +void ipipe_unstall_pipeline_head(void)
7565 + struct ipipe_percpu_domain_data *p = ipipe_head_cpudom_ptr();
7566 + struct ipipe_domain *head_domain;
7568 + local_irq_disable_hw();
7570 + __clear_bit(IPIPE_STALL_FLAG, &p->status);
7572 + if (unlikely(__ipipe_ipending_p(p))) {
7573 + head_domain = __ipipe_pipeline_head();
7574 + if (likely(head_domain == __ipipe_current_domain))
7575 + __ipipe_sync_pipeline();
7576 + else
7577 + __ipipe_walk_pipeline(&head_domain->p_link);
7580 + local_irq_enable_hw();
7583 +void __ipipe_restore_pipeline_head(unsigned long x) /* hw interrupt off */
7585 + struct ipipe_percpu_domain_data *p = ipipe_head_cpudom_ptr();
7586 + struct ipipe_domain *head_domain;
7588 + if (x) {
7589 +#ifdef CONFIG_DEBUG_KERNEL
7590 + static int warned;
7591 + if (!warned && test_and_set_bit(IPIPE_STALL_FLAG, &p->status)) {
7592 + /*
7593 + * Already stalled albeit ipipe_restore_pipeline_head()
7594 + * should have detected it? Send a warning once.
7595 + */
7596 + local_irq_enable_hw();
7597 + warned = 1;
7598 + printk(KERN_WARNING
7599 + "I-pipe: ipipe_restore_pipeline_head() optimization failed.\n");
7600 + dump_stack();
7601 + local_irq_disable_hw();
7603 +#else /* !CONFIG_DEBUG_KERNEL */
7604 + set_bit(IPIPE_STALL_FLAG, &p->status);
7605 +#endif /* CONFIG_DEBUG_KERNEL */
7607 + else {
7608 + __clear_bit(IPIPE_STALL_FLAG, &p->status);
7609 + if (unlikely(__ipipe_ipending_p(p))) {
7610 + head_domain = __ipipe_pipeline_head();
7611 + if (likely(head_domain == __ipipe_current_domain))
7612 + __ipipe_sync_pipeline();
7613 + else
7614 + __ipipe_walk_pipeline(&head_domain->p_link);
7616 + local_irq_enable_hw();
7620 +void __ipipe_spin_lock_irq(ipipe_spinlock_t *lock)
7622 + local_irq_disable_hw();
7623 + arch_spin_lock(&lock->arch_lock);
7624 + __set_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status));
7627 +void __ipipe_spin_unlock_irq(ipipe_spinlock_t *lock)
7629 + arch_spin_unlock(&lock->arch_lock);
7630 + __clear_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status));
7631 + local_irq_enable_hw();
7634 +unsigned long __ipipe_spin_lock_irqsave(ipipe_spinlock_t *lock)
7636 + unsigned long flags;
7637 + int s;
7639 + local_irq_save_hw(flags);
7640 + arch_spin_lock(&lock->arch_lock);
7641 + s = __test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status));
7643 + return raw_mangle_irq_bits(s, flags);
7646 +int __ipipe_spin_trylock_irqsave(ipipe_spinlock_t *lock,
7647 + unsigned long *x)
7649 + unsigned long flags;
7650 + int s;
7652 + local_irq_save_hw(flags);
7653 + if (!arch_spin_trylock(&lock->arch_lock)) {
7654 + local_irq_restore_hw(flags);
7655 + return 0;
7657 + s = __test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status));
7658 + *x = raw_mangle_irq_bits(s, flags);
7660 + return 1;
7663 +void __ipipe_spin_unlock_irqrestore(ipipe_spinlock_t *lock,
7664 + unsigned long x)
7666 + arch_spin_unlock(&lock->arch_lock);
7667 + if (!raw_demangle_irq_bits(&x))
7668 + __clear_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status));
7669 + local_irq_restore_hw(x);
7672 +int __ipipe_spin_trylock_irq(ipipe_spinlock_t *lock)
7674 + unsigned long flags;
7676 + local_irq_save_hw(flags);
7677 + if (!arch_spin_trylock(&lock->arch_lock)) {
7678 + local_irq_restore_hw(flags);
7679 + return 0;
7681 + __set_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status));
7683 + return 1;
7686 +void __ipipe_spin_unlock_irqbegin(ipipe_spinlock_t *lock)
7688 + arch_spin_unlock(&lock->arch_lock);
7691 +void __ipipe_spin_unlock_irqcomplete(unsigned long x)
7693 + if (!raw_demangle_irq_bits(&x))
7694 + __clear_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status));
7695 + local_irq_restore_hw(x);
7698 +#ifdef __IPIPE_3LEVEL_IRQMAP
7700 +/* Must be called hw IRQs off. */
7701 +static inline void __ipipe_set_irq_held(struct ipipe_percpu_domain_data *p,
7702 + unsigned int irq)
7704 + __set_bit(irq, p->irqheld_map);
7705 + p->irqall[irq]++;
7708 +/* Must be called hw IRQs off. */
7709 +void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned int irq)
7711 + struct ipipe_percpu_domain_data *p = ipipe_cpudom_ptr(ipd);
7712 + int l0b, l1b;
7714 + IPIPE_WARN_ONCE(!irqs_disabled_hw());
7716 + l0b = irq / (BITS_PER_LONG * BITS_PER_LONG);
7717 + l1b = irq / BITS_PER_LONG;
7719 + if (likely(!test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))) {
7720 + __set_bit(irq, p->irqpend_lomap);
7721 + __set_bit(l1b, p->irqpend_mdmap);
7722 + __set_bit(l0b, &p->irqpend_himap);
7723 + } else
7724 + __set_bit(irq, p->irqheld_map);
7726 + p->irqall[irq]++;
7729 +/* Must be called hw IRQs off. */
7730 +void __ipipe_lock_irq(struct ipipe_domain *ipd, int cpu, unsigned int irq)
7732 + struct ipipe_percpu_domain_data *p;
7733 + int l0b, l1b;
7735 + IPIPE_WARN_ONCE(!irqs_disabled_hw());
7737 + /* Wired interrupts cannot be locked (it is useless). */
7738 + if (test_bit(IPIPE_WIRED_FLAG, &ipd->irqs[irq].control) ||
7739 + test_and_set_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))
7740 + return;
7742 + l0b = irq / (BITS_PER_LONG * BITS_PER_LONG);
7743 + l1b = irq / BITS_PER_LONG;
7745 + p = ipipe_percpudom_ptr(ipd, cpu);
7746 + if (__test_and_clear_bit(irq, p->irqpend_lomap)) {
7747 + __set_bit(irq, p->irqheld_map);
7748 + if (p->irqpend_lomap[l1b] == 0) {
7749 + __clear_bit(l1b, p->irqpend_mdmap);
7750 + if (p->irqpend_mdmap[l0b] == 0)
7751 + __clear_bit(l0b, &p->irqpend_himap);
7756 +/* Must be called hw IRQs off. */
7757 +void __ipipe_unlock_irq(struct ipipe_domain *ipd, unsigned int irq)
7759 + struct ipipe_percpu_domain_data *p;
7760 + int l0b, l1b, cpu;
7762 + IPIPE_WARN_ONCE(!irqs_disabled_hw());
7764 + if (unlikely(!test_and_clear_bit(IPIPE_LOCK_FLAG,
7765 + &ipd->irqs[irq].control)))
7766 + return;
7768 + l0b = irq / (BITS_PER_LONG * BITS_PER_LONG);
7769 + l1b = irq / BITS_PER_LONG;
7771 + for_each_online_cpu(cpu) {
7772 + p = ipipe_percpudom_ptr(ipd, cpu);
7773 + if (test_and_clear_bit(irq, p->irqheld_map)) {
7774 + /* We need atomic ops here: */
7775 + set_bit(irq, p->irqpend_lomap);
7776 + set_bit(l1b, p->irqpend_mdmap);
7777 + set_bit(l0b, &p->irqpend_himap);
7782 +static inline int __ipipe_next_irq(struct ipipe_percpu_domain_data *p)
7784 + int l0b, l1b, l2b;
7785 + unsigned long l0m, l1m, l2m;
7786 + unsigned int irq;
7788 + l0m = p->irqpend_himap;
7789 + if (unlikely(l0m == 0))
7790 + return -1;
7792 + l0b = __ipipe_ffnz(l0m);
7793 + l1m = p->irqpend_mdmap[l0b];
7794 + if (unlikely(l1m == 0))
7795 + return -1;
7797 + l1b = __ipipe_ffnz(l1m) + l0b * BITS_PER_LONG;
7798 + l2m = p->irqpend_lomap[l1b];
7799 + if (unlikely(l2m == 0))
7800 + return -1;
7802 + l2b = __ipipe_ffnz(l2m);
7803 + irq = l1b * BITS_PER_LONG + l2b;
7805 + __clear_bit(irq, p->irqpend_lomap);
7806 + if (p->irqpend_lomap[l1b] == 0) {
7807 + __clear_bit(l1b, p->irqpend_mdmap);
7808 + if (p->irqpend_mdmap[l0b] == 0)
7809 + __clear_bit(l0b, &p->irqpend_himap);
7812 + return irq;
7815 +#else /* __IPIPE_2LEVEL_IRQMAP */
7817 +/* Must be called hw IRQs off. */
7818 +static inline void __ipipe_set_irq_held(struct ipipe_percpu_domain_data *p,
7819 + unsigned int irq)
7821 + __set_bit(irq, p->irqheld_map);
7822 + p->irqall[irq]++;
7825 +/* Must be called hw IRQs off. */
7826 +void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned irq)
7828 + struct ipipe_percpu_domain_data *p = ipipe_cpudom_ptr(ipd);
7829 + int l0b = irq / BITS_PER_LONG;
7831 + IPIPE_WARN_ONCE(!irqs_disabled_hw());
7833 + if (likely(!test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))) {
7834 + __set_bit(irq, p->irqpend_lomap);
7835 + __set_bit(l0b, &p->irqpend_himap);
7836 + } else
7837 + __set_bit(irq, p->irqheld_map);
7839 + p->irqall[irq]++;
7842 +/* Must be called hw IRQs off. */
7843 +void __ipipe_lock_irq(struct ipipe_domain *ipd, int cpu, unsigned irq)
7845 + struct ipipe_percpu_domain_data *p;
7846 + int l0b = irq / BITS_PER_LONG;
7848 + IPIPE_WARN_ONCE(!irqs_disabled_hw());
7850 + /* Wired interrupts cannot be locked (it is useless). */
7851 + if (test_bit(IPIPE_WIRED_FLAG, &ipd->irqs[irq].control) ||
7852 + test_and_set_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))
7853 + return;
7855 + p = ipipe_percpudom_ptr(ipd, cpu);
7856 + if (__test_and_clear_bit(irq, p->irqpend_lomap)) {
7857 + __set_bit(irq, p->irqheld_map);
7858 + if (p->irqpend_lomap[l0b] == 0)
7859 + __clear_bit(l0b, &p->irqpend_himap);
7863 +/* Must be called hw IRQs off. */
7864 +void __ipipe_unlock_irq(struct ipipe_domain *ipd, unsigned irq)
7866 + struct ipipe_percpu_domain_data *p;
7867 + int l0b = irq / BITS_PER_LONG, cpu;
7869 + IPIPE_WARN_ONCE(!irqs_disabled_hw());
7871 + if (unlikely(!test_and_clear_bit(IPIPE_LOCK_FLAG,
7872 + &ipd->irqs[irq].control)))
7873 + return;
7875 + for_each_online_cpu(cpu) {
7876 + p = ipipe_percpudom_ptr(ipd, cpu);
7877 + if (test_and_clear_bit(irq, p->irqheld_map)) {
7878 + /* We need atomic ops here: */
7879 + set_bit(irq, p->irqpend_lomap);
7880 + set_bit(l0b, &p->irqpend_himap);
7885 +static inline int __ipipe_next_irq(struct ipipe_percpu_domain_data *p)
7887 + unsigned long l0m, l1m;
7888 + int l0b, l1b;
7890 + l0m = p->irqpend_himap;
7891 + if (unlikely(l0m == 0))
7892 + return -1;
7894 + l0b = __ipipe_ffnz(l0m);
7895 + l1m = p->irqpend_lomap[l0b];
7896 + if (unlikely(l1m == 0))
7897 + return -1;
7899 + l1b = __ipipe_ffnz(l1m);
7900 + __clear_bit(l1b, &p->irqpend_lomap[l0b]);
7901 + if (p->irqpend_lomap[l0b] == 0)
7902 + __clear_bit(l0b, &p->irqpend_himap);
7904 + return l0b * BITS_PER_LONG + l1b;
7907 +#endif /* __IPIPE_2LEVEL_IRQMAP */
7910 + * __ipipe_walk_pipeline(): Plays interrupts pending in the log. Must
7911 + * be called with local hw interrupts disabled.
7912 + */
7913 +void __ipipe_walk_pipeline(struct list_head *pos)
7915 + struct ipipe_domain *this_domain = __ipipe_current_domain, *next_domain;
7916 + struct ipipe_percpu_domain_data *p, *np;
7918 + p = ipipe_cpudom_ptr(this_domain);
7920 + while (pos != &__ipipe_pipeline) {
7922 + next_domain = list_entry(pos, struct ipipe_domain, p_link);
7923 + np = ipipe_cpudom_ptr(next_domain);
7925 + if (test_bit(IPIPE_STALL_FLAG, &np->status))
7926 + break; /* Stalled stage -- do not go further. */
7928 + if (__ipipe_ipending_p(np)) {
7929 + if (next_domain == this_domain)
7930 + __ipipe_sync_pipeline();
7931 + else {
7933 + p->evsync = 0;
7934 + __ipipe_current_domain = next_domain;
7935 + ipipe_suspend_domain(); /* Sync stage and propagate interrupts. */
7937 + if (__ipipe_current_domain == next_domain)
7938 + __ipipe_current_domain = this_domain;
7939 + /*
7940 + * Otherwise, something changed the current domain under our
7941 + * feet recycling the register set; do not override the new
7942 + * domain.
7943 + */
7945 + if (__ipipe_ipending_p(p) &&
7946 + !test_bit(IPIPE_STALL_FLAG, &p->status))
7947 + __ipipe_sync_pipeline();
7949 + break;
7950 + } else if (next_domain == this_domain)
7951 + break;
7953 + pos = next_domain->p_link.next;
7958 + * ipipe_suspend_domain() -- Suspend the current domain, switching to
7959 + * the next one which has pending work down the pipeline.
7960 + */
7961 +void ipipe_suspend_domain(void)
7963 + struct ipipe_domain *this_domain, *next_domain;
7964 + struct ipipe_percpu_domain_data *p;
7965 + struct list_head *ln;
7966 + unsigned long flags;
7968 + local_irq_save_hw(flags);
7970 + this_domain = next_domain = __ipipe_current_domain;
7971 + p = ipipe_cpudom_ptr(this_domain);
7972 + p->status &= ~IPIPE_STALL_MASK;
7974 + if (__ipipe_ipending_p(p))
7975 + goto sync_stage;
7977 + for (;;) {
7978 + ln = next_domain->p_link.next;
7980 + if (ln == &__ipipe_pipeline)
7981 + break;
7983 + next_domain = list_entry(ln, struct ipipe_domain, p_link);
7984 + p = ipipe_cpudom_ptr(next_domain);
7986 + if (p->status & IPIPE_STALL_MASK)
7987 + break;
7989 + if (!__ipipe_ipending_p(p))
7990 + continue;
7992 + __ipipe_current_domain = next_domain;
7993 +sync_stage:
7994 + __ipipe_sync_pipeline();
7996 + if (__ipipe_current_domain != next_domain)
7997 + /*
7998 + * Something has changed the current domain under our
7999 + * feet, recycling the register set; take note.
8000 + */
8001 + this_domain = __ipipe_current_domain;
8004 + __ipipe_current_domain = this_domain;
8006 + local_irq_restore_hw(flags);
8010 +/* ipipe_alloc_virq() -- Allocate a pipelined virtual/soft interrupt.
8011 + * Virtual interrupts are handled in exactly the same way than their
8012 + * hw-generated counterparts wrt pipelining.
8013 + */
8014 +unsigned ipipe_alloc_virq(void)
8016 + unsigned long flags, irq = 0;
8017 + int ipos;
8019 + spin_lock_irqsave(&__ipipe_pipelock, flags);
8021 + if (__ipipe_virtual_irq_map != ~0) {
8022 + ipos = ffz(__ipipe_virtual_irq_map);
8023 + set_bit(ipos, &__ipipe_virtual_irq_map);
8024 + irq = ipos + IPIPE_VIRQ_BASE;
8027 + spin_unlock_irqrestore(&__ipipe_pipelock, flags);
8029 + return irq;
8033 + * ipipe_virtualize_irq() -- Set a per-domain pipelined interrupt
8034 + * handler.
8035 + */
8036 +int ipipe_virtualize_irq(struct ipipe_domain *ipd,
8037 + unsigned int irq,
8038 + ipipe_irq_handler_t handler,
8039 + void *cookie,
8040 + ipipe_irq_ackfn_t acknowledge,
8041 + unsigned modemask)
8043 + ipipe_irq_handler_t old_handler;
8044 + struct irq_desc *desc;
8045 + unsigned long flags;
8046 + int ret = 0;
8048 + if (irq >= IPIPE_NR_IRQS)
8049 + return -EINVAL;
8051 + if (ipd->irqs[irq].control & IPIPE_SYSTEM_MASK)
8052 + return -EPERM;
8054 + if (!test_bit(IPIPE_AHEAD_FLAG, &ipd->flags))
8055 + /* Silently unwire interrupts for non-heading domains. */
8056 + modemask &= ~IPIPE_WIRED_MASK;
8058 + spin_lock_irqsave(&__ipipe_pipelock, flags);
8060 + old_handler = ipd->irqs[irq].handler;
8062 + if (handler == NULL) {
8063 + modemask &=
8064 + ~(IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK |
8065 + IPIPE_EXCLUSIVE_MASK | IPIPE_WIRED_MASK);
8067 + ipd->irqs[irq].handler = NULL;
8068 + ipd->irqs[irq].cookie = NULL;
8069 + ipd->irqs[irq].acknowledge = NULL;
8070 + ipd->irqs[irq].control = modemask;
8072 + if (irq < NR_IRQS && !ipipe_virtual_irq_p(irq)) {
8073 + desc = irq_to_desc(irq);
8074 + if (old_handler && desc)
8075 + __ipipe_disable_irqdesc(ipd, irq);
8078 + goto unlock_and_exit;
8081 + if (handler == IPIPE_SAME_HANDLER) {
8082 + cookie = ipd->irqs[irq].cookie;
8083 + handler = old_handler;
8084 + if (handler == NULL) {
8085 + ret = -EINVAL;
8086 + goto unlock_and_exit;
8088 + } else if ((modemask & IPIPE_EXCLUSIVE_MASK) != 0 && old_handler) {
8089 + ret = -EBUSY;
8090 + goto unlock_and_exit;
8093 + /*
8094 + * Wired interrupts can only be delivered to domains always
8095 + * heading the pipeline, and using dynamic propagation.
8096 + */
8097 + if ((modemask & IPIPE_WIRED_MASK) != 0) {
8098 + if ((modemask & (IPIPE_PASS_MASK | IPIPE_STICKY_MASK)) != 0) {
8099 + ret = -EINVAL;
8100 + goto unlock_and_exit;
8102 + modemask |= IPIPE_HANDLE_MASK;
8105 + if ((modemask & IPIPE_STICKY_MASK) != 0)
8106 + modemask |= IPIPE_HANDLE_MASK;
8108 + if (acknowledge == NULL)
8109 + /*
8110 + * Acknowledge handler unspecified for a hw interrupt:
8111 + * use the Linux-defined handler instead.
8112 + */
8113 + acknowledge = ipipe_root_domain->irqs[irq].acknowledge;
8115 + ipd->irqs[irq].handler = handler;
8116 + ipd->irqs[irq].cookie = cookie;
8117 + ipd->irqs[irq].acknowledge = acknowledge;
8118 + ipd->irqs[irq].control = modemask;
8120 + desc = irq_to_desc(irq);
8121 + if (desc == NULL)
8122 + goto unlock_and_exit;
8124 + if (irq < NR_IRQS && !ipipe_virtual_irq_p(irq)) {
8125 + __ipipe_enable_irqdesc(ipd, irq);
8126 + /*
8127 + * IRQ enable/disable state is domain-sensitive, so we
8128 + * may not change it for another domain. What is
8129 + * allowed however is forcing some domain to handle an
8130 + * interrupt source, by passing the proper 'ipd'
8131 + * descriptor which thus may be different from
8132 + * __ipipe_current_domain.
8133 + */
8134 + if ((modemask & IPIPE_ENABLE_MASK) != 0) {
8135 + if (ipd != __ipipe_current_domain)
8136 + ret = -EPERM;
8137 + else
8138 + __ipipe_enable_irq(irq);
8142 +unlock_and_exit:
8144 + spin_unlock_irqrestore(&__ipipe_pipelock, flags);
8146 + return ret;
8149 +/* ipipe_control_irq() -- Change control mode of a pipelined interrupt. */
8151 +int ipipe_control_irq(struct ipipe_domain *ipd, unsigned int irq,
8152 + unsigned clrmask, unsigned setmask)
8154 + unsigned long flags;
8155 + int ret = 0;
8157 + if (irq >= IPIPE_NR_IRQS)
8158 + return -EINVAL;
8160 + flags = ipipe_critical_enter(NULL);
8162 + if (ipd->irqs[irq].control & IPIPE_SYSTEM_MASK) {
8163 + ret = -EPERM;
8164 + goto out;
8167 + if (ipd->irqs[irq].handler == NULL)
8168 + setmask &= ~(IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK);
8170 + if ((setmask & IPIPE_STICKY_MASK) != 0)
8171 + setmask |= IPIPE_HANDLE_MASK;
8173 + if ((clrmask & (IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK)) != 0) /* If one goes, both go. */
8174 + clrmask |= (IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK);
8176 + ipd->irqs[irq].control &= ~clrmask;
8177 + ipd->irqs[irq].control |= setmask;
8179 + if ((setmask & IPIPE_ENABLE_MASK) != 0)
8180 + __ipipe_enable_irq(irq);
8181 + else if ((clrmask & IPIPE_ENABLE_MASK) != 0)
8182 + __ipipe_disable_irq(irq);
8184 +out:
8185 + ipipe_critical_exit(flags);
8187 + return ret;
8190 +/* __ipipe_dispatch_event() -- Low-level event dispatcher. */
8192 +int __ipipe_dispatch_event (unsigned event, void *data)
8194 +extern void *ipipe_irq_handler; void *handler; if (ipipe_irq_handler != __ipipe_handle_irq && (handler = ipipe_root_domain->evhand[event])) { return ((int (*)(unsigned long, void *))handler)(event, data); } else {
8195 + struct ipipe_domain *start_domain, *this_domain, *next_domain;
8196 + struct ipipe_percpu_domain_data *np;
8197 + ipipe_event_handler_t evhand;
8198 + struct list_head *pos, *npos;
8199 + unsigned long flags;
8200 + int propagate = 1;
8202 + local_irq_save_hw(flags);
8204 + start_domain = this_domain = __ipipe_current_domain;
8206 + list_for_each_safe(pos, npos, &__ipipe_pipeline) {
8207 + /*
8208 + * Note: Domain migration may occur while running
8209 + * event or interrupt handlers, in which case the
8210 + * current register set is going to be recycled for a
8211 + * different domain than the initiating one. We do
8212 + * care for that, always tracking the current domain
8213 + * descriptor upon return from those handlers.
8214 + */
8215 + next_domain = list_entry(pos, struct ipipe_domain, p_link);
8216 + np = ipipe_cpudom_ptr(next_domain);
8218 + /*
8219 + * Keep a cached copy of the handler's address since
8220 + * ipipe_catch_event() may clear it under our feet.
8221 + */
8222 + evhand = next_domain->evhand[event];
8224 + if (evhand != NULL) {
8225 + __ipipe_current_domain = next_domain;
8226 + np->evsync |= (1LL << event);
8227 + local_irq_restore_hw(flags);
8228 + propagate = !evhand(event, start_domain, data);
8229 + local_irq_save_hw(flags);
8230 + /*
8231 + * We may have a migration issue here, if the
8232 + * current task is migrated to another CPU on
8233 + * behalf of the invoked handler, usually when
8234 + * a syscall event is processed. However,
8235 + * ipipe_catch_event() will make sure that a
8236 + * CPU that clears a handler for any given
8237 + * event will not attempt to wait for itself
8238 + * to clear the evsync bit for that event,
8239 + * which practically plugs the hole, without
8240 + * resorting to a much more complex strategy.
8241 + */
8242 + np->evsync &= ~(1LL << event);
8243 + if (__ipipe_current_domain != next_domain)
8244 + this_domain = __ipipe_current_domain;
8247 + /* NEVER sync the root stage here. */
8248 + if (next_domain != ipipe_root_domain &&
8249 + __ipipe_ipending_p(np) &&
8250 + !test_bit(IPIPE_STALL_FLAG, &np->status)) {
8251 + __ipipe_current_domain = next_domain;
8252 + __ipipe_sync_pipeline();
8253 + if (__ipipe_current_domain != next_domain)
8254 + this_domain = __ipipe_current_domain;
8257 + __ipipe_current_domain = this_domain;
8259 + if (next_domain == this_domain || !propagate)
8260 + break;
8263 + local_irq_restore_hw(flags);
8265 + return !propagate;
8266 +} }
8269 + * __ipipe_dispatch_wired -- Wired interrupt dispatcher. Wired
8270 + * interrupts are immediately and unconditionally delivered to the
8271 + * domain heading the pipeline upon receipt, and such domain must have
8272 + * been registered as an invariant head for the system (priority ==
8273 + * IPIPE_HEAD_PRIORITY). The motivation for using wired interrupts is
8274 + * to get an extra-fast dispatching path for those IRQs, by relying on
8275 + * a straightforward logic based on assumptions that must always be
8276 + * true for invariant head domains. The following assumptions are
8277 + * made when dealing with such interrupts:
8279 + * 1- Wired interrupts are purely dynamic, i.e. the decision to
8280 + * propagate them down the pipeline must be done from the head domain
8281 + * ISR.
8282 + * 2- Wired interrupts cannot be shared or sticky.
8283 + * 3- The root domain cannot be an invariant pipeline head, in
8284 + * consequence of what the root domain cannot handle wired
8285 + * interrupts.
8286 + * 4- Wired interrupts must have a valid acknowledge handler for the
8287 + * head domain (if needed, see __ipipe_handle_irq).
8289 + * Called with hw interrupts off.
8290 + */
8292 +void __ipipe_dispatch_wired(struct ipipe_domain *head, unsigned irq)
8294 + struct ipipe_percpu_domain_data *p = ipipe_cpudom_ptr(head);
8296 + if (test_bit(IPIPE_STALL_FLAG, &p->status)) {
8297 + __ipipe_set_irq_pending(head, irq);
8298 + return;
8301 + __ipipe_dispatch_wired_nocheck(head, irq);
8304 +void __ipipe_dispatch_wired_nocheck(struct ipipe_domain *head, unsigned irq) /* hw interrupts off */
8306 + struct ipipe_percpu_domain_data *p = ipipe_cpudom_ptr(head);
8307 + struct ipipe_domain *old;
8309 + old = __ipipe_current_domain;
8310 + __ipipe_current_domain = head; /* Switch to the head domain. */
8312 + p->irqall[irq]++;
8313 + __set_bit(IPIPE_STALL_FLAG, &p->status);
8314 + barrier();
8315 + head->irqs[irq].handler(irq, head->irqs[irq].cookie); /* Call the ISR. */
8316 + __ipipe_run_irqtail(irq);
8317 + barrier();
8318 + __clear_bit(IPIPE_STALL_FLAG, &p->status);
8320 + if (__ipipe_current_domain == head) {
8321 + __ipipe_current_domain = old;
8322 + if (old == head) {
8323 + if (__ipipe_ipending_p(p))
8324 + __ipipe_sync_pipeline();
8325 + return;
8329 + __ipipe_walk_pipeline(&head->p_link);
8333 + * __ipipe_sync_stage() -- Flush the pending IRQs for the current
8334 + * domain (and processor). This routine flushes the interrupt log
8335 + * (see "Optimistic interrupt protection" from D. Stodolsky et al. for
8336 + * more on the deferred interrupt scheme). Every interrupt that
8337 + * occurred while the pipeline was stalled gets played. WARNING:
8338 + * callers on SMP boxen should always check for CPU migration on
8339 + * return of this routine.
8341 + * This routine must be called with hw interrupts off.
8342 + */
8343 +void __ipipe_sync_stage(void)
8345 + struct ipipe_percpu_domain_data *p;
8346 + struct ipipe_domain *ipd;
8347 + int irq;
8349 + ipd = __ipipe_current_domain;
8350 + p = ipipe_cpudom_ptr(ipd);
8352 + __set_bit(IPIPE_STALL_FLAG, &p->status);
8353 + smp_wmb();
8355 + if (ipd == ipipe_root_domain)
8356 + trace_hardirqs_off();
8358 + for (;;) {
8359 + irq = __ipipe_next_irq(p);
8360 + if (irq < 0)
8361 + break;
8362 + /*
8363 + * Make sure the compiler does not reorder wrongly, so
8364 + * that all updates to maps are done before the
8365 + * handler gets called.
8366 + */
8367 + barrier();
8369 + if (test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))
8370 + continue;
8372 + if (!__ipipe_pipeline_head_p(ipd))
8373 + local_irq_enable_hw();
8375 + if (likely(ipd != ipipe_root_domain)) {
8376 + ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie);
8377 + __ipipe_run_irqtail(irq);
8378 + } else if (ipipe_virtual_irq_p(irq)) {
8379 + irq_enter();
8380 + __ipipe_do_root_virq(ipd, irq);
8381 + irq_exit();
8382 + } else
8383 + __ipipe_do_root_xirq(ipd, irq);
8385 + local_irq_disable_hw();
8386 + p = ipipe_cpudom_ptr(__ipipe_current_domain);
8389 + if (ipd == ipipe_root_domain)
8390 + trace_hardirqs_on();
8392 + __clear_bit(IPIPE_STALL_FLAG, &p->status);
8395 +/* ipipe_register_domain() -- Link a new domain to the pipeline. */
8397 +int ipipe_register_domain(struct ipipe_domain *ipd,
8398 + struct ipipe_domain_attr *attr)
8400 + struct ipipe_percpu_domain_data *p;
8401 + struct list_head *pos = NULL;
8402 + struct ipipe_domain *_ipd;
8403 + unsigned long flags;
8405 + if (!ipipe_root_domain_p) {
8406 + printk(KERN_WARNING
8407 + "I-pipe: Only the root domain may register a new domain.\n");
8408 + return -EPERM;
8411 + flags = ipipe_critical_enter(NULL);
8413 + if (attr->priority == IPIPE_HEAD_PRIORITY) {
8414 + if (test_bit(IPIPE_HEAD_SLOT, &__ipipe_domain_slot_map)) {
8415 + ipipe_critical_exit(flags);
8416 + return -EAGAIN; /* Cannot override current head. */
8418 + ipd->slot = IPIPE_HEAD_SLOT;
8419 + } else
8420 + ipd->slot = ffz(__ipipe_domain_slot_map);
8422 + if (ipd->slot < CONFIG_IPIPE_DOMAINS) {
8423 + set_bit(ipd->slot, &__ipipe_domain_slot_map);
8424 + list_for_each(pos, &__ipipe_pipeline) {
8425 + _ipd = list_entry(pos, struct ipipe_domain, p_link);
8426 + if (_ipd->domid == attr->domid)
8427 + break;
8431 + ipipe_critical_exit(flags);
8433 + if (pos != &__ipipe_pipeline) {
8434 + if (ipd->slot < CONFIG_IPIPE_DOMAINS)
8435 + clear_bit(ipd->slot, &__ipipe_domain_slot_map);
8436 + return -EBUSY;
8439 +#ifndef CONFIG_SMP
8440 + /*
8441 + * Set up the perdomain pointers for direct access to the
8442 + * percpu domain data. This saves a costly multiply each time
8443 + * we need to refer to the contents of the percpu domain data
8444 + * array.
8445 + */
8446 + __raw_get_cpu_var(ipipe_percpu_daddr)[ipd->slot] = &__raw_get_cpu_var(ipipe_percpu_darray)[ipd->slot];
8447 +#endif
8449 + ipd->name = attr->name;
8450 + ipd->domid = attr->domid;
8451 + ipd->pdd = attr->pdd;
8452 + ipd->flags = 0;
8454 + if (attr->priority == IPIPE_HEAD_PRIORITY) {
8455 + ipd->priority = INT_MAX;
8456 + __set_bit(IPIPE_AHEAD_FLAG,&ipd->flags);
8458 + else
8459 + ipd->priority = attr->priority;
8461 + __ipipe_init_stage(ipd);
8463 + INIT_LIST_HEAD(&ipd->p_link);
8465 +#ifdef CONFIG_PROC_FS
8466 + __ipipe_add_domain_proc(ipd);
8467 +#endif /* CONFIG_PROC_FS */
8469 + flags = ipipe_critical_enter(NULL);
8471 + list_for_each(pos, &__ipipe_pipeline) {
8472 + _ipd = list_entry(pos, struct ipipe_domain, p_link);
8473 + if (ipd->priority > _ipd->priority)
8474 + break;
8477 + list_add_tail(&ipd->p_link, pos);
8479 + ipipe_critical_exit(flags);
8481 + printk(KERN_INFO "I-pipe: Domain %s registered.\n", ipd->name);
8483 + if (attr->entry == NULL)
8484 + return 0;
8486 + /*
8487 + * Finally, allow the new domain to perform its initialization
8488 + * duties.
8489 + */
8490 + local_irq_save_hw_smp(flags);
8491 + __ipipe_current_domain = ipd;
8492 + local_irq_restore_hw_smp(flags);
8493 + attr->entry();
8494 + local_irq_save_hw(flags);
8495 + __ipipe_current_domain = ipipe_root_domain;
8496 + p = ipipe_root_cpudom_ptr();
8498 + if (__ipipe_ipending_p(p) &&
8499 + !test_bit(IPIPE_STALL_FLAG, &p->status))
8500 + __ipipe_sync_pipeline();
8502 + local_irq_restore_hw(flags);
8504 + return 0;
8507 +/* ipipe_unregister_domain() -- Remove a domain from the pipeline. */
8509 +int ipipe_unregister_domain(struct ipipe_domain *ipd)
8511 + unsigned long flags;
8513 + if (!ipipe_root_domain_p) {
8514 + printk(KERN_WARNING
8515 + "I-pipe: Only the root domain may unregister a domain.\n");
8516 + return -EPERM;
8519 + if (ipd == ipipe_root_domain) {
8520 + printk(KERN_WARNING
8521 + "I-pipe: Cannot unregister the root domain.\n");
8522 + return -EPERM;
8524 +#ifdef CONFIG_SMP
8526 + struct ipipe_percpu_domain_data *p;
8527 + unsigned int irq;
8528 + int cpu;
8530 + /*
8531 + * In the SMP case, wait for the logged events to drain on
8532 + * other processors before eventually removing the domain
8533 + * from the pipeline.
8534 + */
8536 + ipipe_unstall_pipeline_from(ipd);
8538 + flags = ipipe_critical_enter(NULL);
8540 + for (irq = 0; irq < IPIPE_NR_IRQS; irq++) {
8541 + clear_bit(IPIPE_HANDLE_FLAG, &ipd->irqs[irq].control);
8542 + clear_bit(IPIPE_WIRED_FLAG, &ipd->irqs[irq].control);
8543 + clear_bit(IPIPE_STICKY_FLAG, &ipd->irqs[irq].control);
8544 + set_bit(IPIPE_PASS_FLAG, &ipd->irqs[irq].control);
8547 + ipipe_critical_exit(flags);
8549 + for_each_online_cpu(cpu) {
8550 + p = ipipe_percpudom_ptr(ipd, cpu);
8551 + while (__ipipe_ipending_p(p))
8552 + cpu_relax();
8555 +#endif /* CONFIG_SMP */
8557 + mutex_lock(&ipd->mutex);
8559 +#ifdef CONFIG_PROC_FS
8560 + __ipipe_remove_domain_proc(ipd);
8561 +#endif /* CONFIG_PROC_FS */
8563 + /*
8564 + * Simply remove the domain from the pipeline and we are almost done.
8565 + */
8567 + flags = ipipe_critical_enter(NULL);
8568 + list_del_init(&ipd->p_link);
8569 + ipipe_critical_exit(flags);
8571 + __ipipe_cleanup_domain(ipd);
8573 + mutex_unlock(&ipd->mutex);
8575 + printk(KERN_INFO "I-pipe: Domain %s unregistered.\n", ipd->name);
8577 + return 0;
8581 + * ipipe_propagate_irq() -- Force a given IRQ propagation on behalf of
8582 + * a running interrupt handler to the next domain down the pipeline.
8583 + * ipipe_schedule_irq() -- Does almost the same as above, but attempts
8584 + * to pend the interrupt for the current domain first.
8585 + * Must be called hw IRQs off.
8586 + */
8587 +void __ipipe_pend_irq(unsigned irq, struct list_head *head)
8589 + struct ipipe_domain *ipd;
8590 + struct list_head *ln;
8592 +#ifdef CONFIG_IPIPE_DEBUG
8593 + BUG_ON(irq >= IPIPE_NR_IRQS ||
8594 + (ipipe_virtual_irq_p(irq)
8595 + && !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map)));
8596 +#endif
8597 + for (ln = head; ln != &__ipipe_pipeline; ln = ipd->p_link.next) {
8598 + ipd = list_entry(ln, struct ipipe_domain, p_link);
8599 + if (test_bit(IPIPE_HANDLE_FLAG, &ipd->irqs[irq].control)) {
8600 + __ipipe_set_irq_pending(ipd, irq);
8601 + return;
8606 +/* ipipe_free_virq() -- Release a virtual/soft interrupt. */
8608 +int ipipe_free_virq(unsigned virq)
8610 + if (!ipipe_virtual_irq_p(virq))
8611 + return -EINVAL;
8613 + clear_bit(virq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map);
8615 + return 0;
8618 +void ipipe_init_attr(struct ipipe_domain_attr *attr)
8620 + attr->name = "anon";
8621 + attr->domid = 1;
8622 + attr->entry = NULL;
8623 + attr->priority = IPIPE_ROOT_PRIO;
8624 + attr->pdd = NULL;
8628 + * ipipe_catch_event() -- Interpose or remove an event handler for a
8629 + * given domain.
8630 + */
8631 +ipipe_event_handler_t ipipe_catch_event(struct ipipe_domain *ipd,
8632 + unsigned event,
8633 + ipipe_event_handler_t handler)
8635 + ipipe_event_handler_t old_handler;
8636 + unsigned long flags;
8637 + int self = 0, cpu;
8639 + if (event & IPIPE_EVENT_SELF) {
8640 + event &= ~IPIPE_EVENT_SELF;
8641 + self = 1;
8644 + if (event >= IPIPE_NR_EVENTS)
8645 + return NULL;
8647 + flags = ipipe_critical_enter(NULL);
8649 + if (!(old_handler = xchg(&ipd->evhand[event],handler))) {
8650 + if (handler) {
8651 + if (self)
8652 + ipd->evself |= (1LL << event);
8653 + else
8654 + __ipipe_event_monitors[event]++;
8657 + else if (!handler) {
8658 + if (ipd->evself & (1LL << event))
8659 + ipd->evself &= ~(1LL << event);
8660 + else
8661 + __ipipe_event_monitors[event]--;
8662 + } else if ((ipd->evself & (1LL << event)) && !self) {
8663 + __ipipe_event_monitors[event]++;
8664 + ipd->evself &= ~(1LL << event);
8665 + } else if (!(ipd->evself & (1LL << event)) && self) {
8666 + __ipipe_event_monitors[event]--;
8667 + ipd->evself |= (1LL << event);
8670 + ipipe_critical_exit(flags);
8672 + if (!handler && ipipe_root_domain_p) {
8673 + /*
8674 + * If we cleared a handler on behalf of the root
8675 + * domain, we have to wait for any current invocation
8676 + * to drain, since our caller might subsequently unmap
8677 + * the target domain. To this aim, this code
8678 + * synchronizes with __ipipe_dispatch_event(),
8679 + * guaranteeing that either the dispatcher sees a null
8680 + * handler in which case it discards the invocation
8681 + * (which also prevents from entering a livelock), or
8682 + * finds a valid handler and calls it. Symmetrically,
8683 + * ipipe_catch_event() ensures that the called code
8684 + * won't be unmapped under our feet until the event
8685 + * synchronization flag is cleared for the given event
8686 + * on all CPUs.
8687 + */
8688 + preempt_disable();
8689 + cpu = smp_processor_id();
8690 + /*
8691 + * Hack: this solves the potential migration issue
8692 + * raised in __ipipe_dispatch_event(). This is a
8693 + * work-around which makes the assumption that other
8694 + * CPUs will subsequently, either process at least one
8695 + * interrupt for the target domain, or call
8696 + * __ipipe_dispatch_event() without going through a
8697 + * migration while running the handler at least once;
8698 + * practically, this is safe on any normally running
8699 + * system.
8700 + */
8701 + ipipe_percpudom(ipd, evsync, cpu) &= ~(1LL << event);
8702 + preempt_enable();
8704 + for_each_online_cpu(cpu) {
8705 + while (ipipe_percpudom(ipd, evsync, cpu) & (1LL << event))
8706 + schedule_timeout_interruptible(HZ / 50);
8710 + return old_handler;
8713 +cpumask_t ipipe_set_irq_affinity (unsigned irq, cpumask_t cpumask)
8715 +#ifdef CONFIG_SMP
8716 + if (irq >= IPIPE_NR_XIRQS)
8717 + /* Allow changing affinity of external IRQs only. */
8718 + return CPU_MASK_NONE;
8720 + if (num_online_cpus() > 1)
8721 + return __ipipe_set_irq_affinity(irq,cpumask);
8722 +#endif /* CONFIG_SMP */
8724 + return CPU_MASK_NONE;
8727 +int ipipe_send_ipi (unsigned ipi, cpumask_t cpumask)
8730 +#ifdef CONFIG_SMP
8731 + if (!ipipe_ipi_p(ipi))
8732 + return -EINVAL;
8733 + return __ipipe_send_ipi(ipi,cpumask);
8734 +#else /* !CONFIG_SMP */
8735 + return -EINVAL;
8736 +#endif /* CONFIG_SMP */
8739 +#ifdef CONFIG_SMP
8741 +/* Always called with hw interrupts off. */
8742 +void __ipipe_do_critical_sync(unsigned irq, void *cookie)
8744 + int cpu = ipipe_processor_id();
8746 + cpu_set(cpu, __ipipe_cpu_sync_map);
8748 + /* Now we are in sync with the lock requestor running on another
8749 + CPU. Enter a spinning wait until he releases the global
8750 + lock. */
8751 + spin_lock(&__ipipe_cpu_barrier);
8753 + /* Got it. Now get out. */
8755 + if (__ipipe_cpu_sync)
8756 + /* Call the sync routine if any. */
8757 + __ipipe_cpu_sync();
8759 + cpu_set(cpu, __ipipe_cpu_pass_map);
8761 + spin_unlock(&__ipipe_cpu_barrier);
8763 + cpu_clear(cpu, __ipipe_cpu_sync_map);
8765 +#endif /* CONFIG_SMP */
8768 + * ipipe_critical_enter() -- Grab the superlock excluding all CPUs but
8769 + * the current one from a critical section. This lock is used when we
8770 + * must enforce a global critical section for a single CPU in a
8771 + * possibly SMP system whichever context the CPUs are running.
8772 + */
8773 +unsigned long ipipe_critical_enter(void (*syncfn)(void))
8775 + unsigned long flags;
8777 + local_irq_save_hw(flags);
8779 +#ifdef CONFIG_SMP
8780 + if (num_online_cpus() > 1) {
8781 + int cpu = ipipe_processor_id();
8782 + cpumask_t allbutself;
8783 + unsigned long loops;
8785 + if (!cpu_test_and_set(cpu, __ipipe_cpu_lock_map)) {
8786 + while (test_and_set_bit(0, &__ipipe_critical_lock)) {
8787 + int n = 0;
8789 + local_irq_enable_hw();
8791 + do {
8792 + cpu_relax();
8793 + } while (++n < cpu);
8795 + local_irq_disable_hw();
8798 +restart:
8799 + spin_lock(&__ipipe_cpu_barrier);
8801 + __ipipe_cpu_sync = syncfn;
8803 + cpus_clear(__ipipe_cpu_pass_map);
8804 + cpu_set(cpu, __ipipe_cpu_pass_map);
8806 + /*
8807 + * Send the sync IPI to all processors but the current
8808 + * one.
8809 + */
8810 + cpus_andnot(allbutself, cpu_online_map,
8811 + __ipipe_cpu_pass_map);
8812 + __ipipe_send_ipi(IPIPE_CRITICAL_IPI, allbutself);
8814 + loops = IPIPE_CRITICAL_TIMEOUT;
8816 + while (!cpus_equal(__ipipe_cpu_sync_map, allbutself)) {
8817 + cpu_relax();
8819 + if (--loops == 0) {
8820 + /*
8821 + * We ran into a deadlock due to a
8822 + * contended rwlock. Cancel this round
8823 + * and retry.
8824 + */
8825 + __ipipe_cpu_sync = NULL;
8827 + spin_unlock(&__ipipe_cpu_barrier);
8829 + /*
8830 + * Ensure all CPUs consumed the IPI to
8831 + * avoid running __ipipe_cpu_sync
8832 + * prematurely. This usually resolves
8833 + * the deadlock reason too.
8834 + */
8835 + while (!cpus_equal(cpu_online_map,
8836 + __ipipe_cpu_pass_map))
8837 + cpu_relax();
8839 + goto restart;
8844 + atomic_inc(&__ipipe_critical_count);
8846 +#endif /* CONFIG_SMP */
8848 + return flags;
8851 +/* ipipe_critical_exit() -- Release the superlock. */
8853 +void ipipe_critical_exit(unsigned long flags)
8855 +#ifdef CONFIG_SMP
8856 + if (num_online_cpus() > 1 &&
8857 + atomic_dec_and_test(&__ipipe_critical_count)) {
8858 + spin_unlock(&__ipipe_cpu_barrier);
8860 + while (!cpus_empty(__ipipe_cpu_sync_map))
8861 + cpu_relax();
8863 + cpu_clear(ipipe_processor_id(), __ipipe_cpu_lock_map);
8864 + clear_bit(0, &__ipipe_critical_lock);
8865 + smp_mb__after_clear_bit();
8867 +#endif /* CONFIG_SMP */
8869 + local_irq_restore_hw(flags);
8872 +#ifdef CONFIG_HAVE_IPIPE_HOSTRT
8874 + * NOTE: The architecture specific code must only call this function
8875 + * when a clocksource suitable for CLOCK_HOST_REALTIME is enabled.
8876 + */
8877 +void ipipe_update_hostrt(struct timespec *wall_time, struct clocksource *clock)
8879 + struct ipipe_hostrt_data hostrt_data;
8881 + hostrt_data.live = 1;
8882 + hostrt_data.cycle_last = clock->cycle_last;
8883 + hostrt_data.mask = clock->mask;
8884 + hostrt_data.mult = clock->mult;
8885 + hostrt_data.shift = clock->shift;
8886 + hostrt_data.wall_time_sec = wall_time->tv_sec;
8887 + hostrt_data.wall_time_nsec = wall_time->tv_nsec;
8888 + hostrt_data.wall_to_monotonic = wall_to_monotonic;
8890 + /* Note: The event receiver is responsible for providing
8891 + proper locking */
8892 + if (__ipipe_event_monitored_p(IPIPE_EVENT_HOSTRT))
8893 + __ipipe_dispatch_event(IPIPE_EVENT_HOSTRT, &hostrt_data);
8895 +#endif /* CONFIG_HAVE_IPIPE_HOSTRT */
8897 +int ipipe_alloc_ptdkey (void)
8899 + unsigned long flags;
8900 + int key = -1;
8902 + spin_lock_irqsave(&__ipipe_pipelock,flags);
8904 + if (__ipipe_ptd_key_count < IPIPE_ROOT_NPTDKEYS) {
8905 + key = ffz(__ipipe_ptd_key_map);
8906 + set_bit(key,&__ipipe_ptd_key_map);
8907 + __ipipe_ptd_key_count++;
8910 + spin_unlock_irqrestore(&__ipipe_pipelock,flags);
8912 + return key;
8915 +int ipipe_free_ptdkey (int key)
8917 + unsigned long flags;
8919 + if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS)
8920 + return -EINVAL;
8922 + spin_lock_irqsave(&__ipipe_pipelock,flags);
8924 + if (test_and_clear_bit(key,&__ipipe_ptd_key_map))
8925 + __ipipe_ptd_key_count--;
8927 + spin_unlock_irqrestore(&__ipipe_pipelock,flags);
8929 + return 0;
8932 +int ipipe_set_ptd (int key, void *value)
8935 + if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS)
8936 + return -EINVAL;
8938 + current->ptd[key] = value;
8940 + return 0;
8943 +void *ipipe_get_ptd (int key)
8946 + if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS)
8947 + return NULL;
8949 + return current->ptd[key];
8952 +#ifdef CONFIG_PROC_FS
8954 +struct proc_dir_entry *ipipe_proc_root;
8956 +static int __ipipe_version_info_proc(char *page,
8957 + char **start,
8958 + off_t off, int count, int *eof, void *data)
8960 + int len = sprintf(page, "%s\n", IPIPE_VERSION_STRING);
8962 + len -= off;
8964 + if (len <= off + count)
8965 + *eof = 1;
8967 + *start = page + off;
8969 + if(len > count)
8970 + len = count;
8972 + if(len < 0)
8973 + len = 0;
8975 + return len;
8978 +static int __ipipe_common_info_show(struct seq_file *p, void *data)
8980 + struct ipipe_domain *ipd = (struct ipipe_domain *)p->private;
8981 + char handling, stickiness, lockbit, exclusive, virtuality;
8983 + unsigned long ctlbits;
8984 + unsigned irq;
8986 + seq_printf(p, " +----- Handling ([A]ccepted, [G]rabbed, [W]ired, [D]iscarded)\n");
8987 + seq_printf(p, " |+---- Sticky\n");
8988 + seq_printf(p, " ||+--- Locked\n");
8989 + seq_printf(p, " |||+-- Exclusive\n");
8990 + seq_printf(p, " ||||+- Virtual\n");
8991 + seq_printf(p, "[IRQ] |||||\n");
8993 + mutex_lock(&ipd->mutex);
8995 + for (irq = 0; irq < IPIPE_NR_IRQS; irq++) {
8996 + /* Remember to protect against
8997 + * ipipe_virtual_irq/ipipe_control_irq if more fields
8998 + * get involved. */
8999 + ctlbits = ipd->irqs[irq].control;
9001 + if (irq >= IPIPE_NR_XIRQS && !ipipe_virtual_irq_p(irq))
9002 + /*
9003 + * There might be a hole between the last external
9004 + * IRQ and the first virtual one; skip it.
9005 + */
9006 + continue;
9008 + if (ipipe_virtual_irq_p(irq)
9009 + && !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map))
9010 + /* Non-allocated virtual IRQ; skip it. */
9011 + continue;
9013 + /*
9014 + * Statuses are as follows:
9015 + * o "accepted" means handled _and_ passed down the pipeline.
9016 + * o "grabbed" means handled, but the interrupt might be
9017 + * terminated _or_ passed down the pipeline depending on
9018 + * what the domain handler asks for to the I-pipe.
9019 + * o "wired" is basically the same as "grabbed", except that
9020 + * the interrupt is unconditionally delivered to an invariant
9021 + * pipeline head domain.
9022 + * o "passed" means unhandled by the domain but passed
9023 + * down the pipeline.
9024 + * o "discarded" means unhandled and _not_ passed down the
9025 + * pipeline. The interrupt merely disappears from the
9026 + * current domain down to the end of the pipeline.
9027 + */
9028 + if (ctlbits & IPIPE_HANDLE_MASK) {
9029 + if (ctlbits & IPIPE_PASS_MASK)
9030 + handling = 'A';
9031 + else if (ctlbits & IPIPE_WIRED_MASK)
9032 + handling = 'W';
9033 + else
9034 + handling = 'G';
9035 + } else if (ctlbits & IPIPE_PASS_MASK)
9036 + /* Do not output if no major action is taken. */
9037 + continue;
9038 + else
9039 + handling = 'D';
9041 + if (ctlbits & IPIPE_STICKY_MASK)
9042 + stickiness = 'S';
9043 + else
9044 + stickiness = '.';
9046 + if (ctlbits & IPIPE_LOCK_MASK)
9047 + lockbit = 'L';
9048 + else
9049 + lockbit = '.';
9051 + if (ctlbits & IPIPE_EXCLUSIVE_MASK)
9052 + exclusive = 'X';
9053 + else
9054 + exclusive = '.';
9056 + if (ipipe_virtual_irq_p(irq))
9057 + virtuality = 'V';
9058 + else
9059 + virtuality = '.';
9061 + seq_printf(p, " %3u: %c%c%c%c%c\n",
9062 + irq, handling, stickiness, lockbit, exclusive, virtuality);
9065 + seq_printf(p, "[Domain info]\n");
9067 + seq_printf(p, "id=0x%.8x\n", ipd->domid);
9069 + if (test_bit(IPIPE_AHEAD_FLAG,&ipd->flags))
9070 + seq_printf(p, "priority=topmost\n");
9071 + else
9072 + seq_printf(p, "priority=%d\n", ipd->priority);
9074 + mutex_unlock(&ipd->mutex);
9076 + return 0;
9079 +static int __ipipe_common_info_open(struct inode *inode, struct file *file)
9081 + return single_open(file, __ipipe_common_info_show, PROC_I(inode)->pde->data);
9084 +static struct file_operations __ipipe_info_proc_ops = {
9085 + .owner = THIS_MODULE,
9086 + .open = __ipipe_common_info_open,
9087 + .read = seq_read,
9088 + .llseek = seq_lseek,
9089 + .release = single_release,
9092 +void __ipipe_add_domain_proc(struct ipipe_domain *ipd)
9094 + struct proc_dir_entry *e = create_proc_entry(ipd->name, 0444, ipipe_proc_root);
9095 + if (e) {
9096 + e->proc_fops = &__ipipe_info_proc_ops;
9097 + e->data = (void*) ipd;
9101 +void __ipipe_remove_domain_proc(struct ipipe_domain *ipd)
9103 + remove_proc_entry(ipd->name,ipipe_proc_root);
9106 +void __init ipipe_init_proc(void)
9108 + ipipe_proc_root = create_proc_entry("ipipe",S_IFDIR, 0);
9109 + create_proc_read_entry("version",0444,ipipe_proc_root,&__ipipe_version_info_proc,NULL);
9110 + __ipipe_add_domain_proc(ipipe_root_domain);
9112 + __ipipe_init_tracer();
9115 +#endif /* CONFIG_PROC_FS */
9117 +#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
9119 +DEFINE_PER_CPU(int, ipipe_percpu_context_check) = { 1 };
9120 +DEFINE_PER_CPU(int, ipipe_saved_context_check_state);
9122 +void ipipe_check_context(struct ipipe_domain *border_domain)
9124 + struct ipipe_percpu_domain_data *p;
9125 + struct ipipe_domain *this_domain;
9126 + unsigned long flags;
9127 + int cpu;
9129 + local_irq_save_hw_smp(flags);
9131 + this_domain = __ipipe_current_domain;
9132 + p = ipipe_head_cpudom_ptr();
9133 + if (likely(this_domain->priority <= border_domain->priority &&
9134 + !test_bit(IPIPE_STALL_FLAG, &p->status))) {
9135 + local_irq_restore_hw_smp(flags);
9136 + return;
9137 + }
9139 + cpu = ipipe_processor_id();
9140 + if (!per_cpu(ipipe_percpu_context_check, cpu)) {
9141 + local_irq_restore_hw_smp(flags);
9142 + return;
9143 + }
9145 + local_irq_restore_hw_smp(flags);
9147 + ipipe_context_check_off();
9148 + ipipe_trace_panic_freeze();
9149 + ipipe_set_printk_sync(__ipipe_current_domain);
9151 + if (this_domain->priority > border_domain->priority)
9152 + printk(KERN_ERR "I-pipe: Detected illicit call from domain "
9153 + "'%s'\n"
9154 + KERN_ERR " into a service reserved for domain "
9155 + "'%s' and below.\n",
9156 + this_domain->name, border_domain->name);
9157 + else
9158 + printk(KERN_ERR "I-pipe: Detected stalled topmost domain, "
9159 + "probably caused by a bug.\n"
9160 + " A critical section may have been "
9161 + "left unterminated.\n");
9162 + dump_stack();
9163 + ipipe_trace_panic_dump();
9166 +EXPORT_SYMBOL(ipipe_check_context);
9168 +#endif /* CONFIG_IPIPE_DEBUG_CONTEXT */
9170 +#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP)
9172 +int notrace __ipipe_check_percpu_access(void)
9174 + struct ipipe_percpu_domain_data *p;
9175 + struct ipipe_domain *this_domain;
9176 + unsigned long flags;
9177 + int ret = 0;
9179 + local_irq_save_hw_notrace(flags);
9181 + this_domain = __raw_get_cpu_var(ipipe_percpu_domain);
9183 + /*
9184 + * Only the root domain may implement preemptive CPU migration
9185 + * of tasks, so anything above in the pipeline should be fine.
9186 + */
9187 + if (this_domain->priority > IPIPE_ROOT_PRIO)
9188 + goto out;
9190 + if (raw_irqs_disabled_flags(flags))
9191 + goto out;
9193 + /*
9194 + * Last chance: hw interrupts were enabled on entry while
9195 + * running over the root domain, but the root stage might be
9196 + * currently stalled, in which case preemption would be
9197 + * disabled, and no migration could occur.
9198 + */
9199 + if (this_domain == ipipe_root_domain) {
9200 + p = ipipe_root_cpudom_ptr();
9201 + if (test_bit(IPIPE_STALL_FLAG, &p->status))
9202 + goto out;
9204 + /*
9205 + * Our caller may end up accessing the wrong per-cpu variable
9206 + * instance due to CPU migration; tell it to complain about
9207 + * this.
9208 + */
9209 + ret = 1;
9210 +out:
9211 + local_irq_restore_hw_notrace(flags);
9213 + return ret;
9216 +void __ipipe_spin_unlock_debug(unsigned long flags)
9218 + /*
9219 + * We catch a nasty issue where spin_unlock_irqrestore() on a
9220 + * regular kernel spinlock is about to re-enable hw interrupts
9221 + * in a section entered with hw irqs off. This is clearly the
9222 + * sign of a massive breakage coming. Usual suspect is a
9223 + * regular spinlock which was overlooked, used within a
9224 + * section which must run with hw irqs disabled.
9225 + */
9226 + WARN_ON_ONCE(!raw_irqs_disabled_flags(flags) && irqs_disabled_hw());
9228 +EXPORT_SYMBOL(__ipipe_spin_unlock_debug);
9230 +#endif /* CONFIG_IPIPE_DEBUG_INTERNAL && CONFIG_SMP */
9233 +void ipipe_prepare_panic(void)
9235 + ipipe_set_printk_sync(ipipe_current_domain);
9236 + ipipe_context_check_off();
9239 +EXPORT_SYMBOL_GPL(ipipe_prepare_panic);
9241 +EXPORT_SYMBOL(ipipe_virtualize_irq);
9242 +EXPORT_SYMBOL(ipipe_control_irq);
9243 +EXPORT_SYMBOL(ipipe_suspend_domain);
9244 +EXPORT_SYMBOL(ipipe_alloc_virq);
9245 +EXPORT_PER_CPU_SYMBOL(ipipe_percpu_domain);
9246 +EXPORT_PER_CPU_SYMBOL(ipipe_percpu_darray);
9247 +EXPORT_SYMBOL(ipipe_root);
9248 +EXPORT_SYMBOL(ipipe_stall_pipeline_from);
9249 +EXPORT_SYMBOL(ipipe_test_and_stall_pipeline_from);
9250 +EXPORT_SYMBOL(ipipe_test_and_unstall_pipeline_from);
9251 +EXPORT_SYMBOL(ipipe_restore_pipeline_from);
9252 +EXPORT_SYMBOL(ipipe_unstall_pipeline_head);
9253 +EXPORT_SYMBOL(__ipipe_restore_pipeline_head);
9254 +EXPORT_SYMBOL(__ipipe_unstall_root);
9255 +EXPORT_SYMBOL(__ipipe_restore_root);
9256 +EXPORT_SYMBOL(__ipipe_spin_lock_irq);
9257 +EXPORT_SYMBOL(__ipipe_spin_unlock_irq);
9258 +EXPORT_SYMBOL(__ipipe_spin_lock_irqsave);
9259 +EXPORT_SYMBOL(__ipipe_spin_trylock_irq);
9260 +EXPORT_SYMBOL(__ipipe_spin_trylock_irqsave);
9261 +EXPORT_SYMBOL(__ipipe_spin_unlock_irqrestore);
9262 +EXPORT_SYMBOL(__ipipe_pipeline);
9263 +EXPORT_SYMBOL(__ipipe_lock_irq);
9264 +EXPORT_SYMBOL(__ipipe_unlock_irq);
9265 +EXPORT_SYMBOL(ipipe_register_domain);
9266 +EXPORT_SYMBOL(ipipe_unregister_domain);
9267 +EXPORT_SYMBOL(ipipe_free_virq);
9268 +EXPORT_SYMBOL(ipipe_init_attr);
9269 +EXPORT_SYMBOL(ipipe_catch_event);
9270 +EXPORT_SYMBOL(ipipe_alloc_ptdkey);
9271 +EXPORT_SYMBOL(ipipe_free_ptdkey);
9272 +EXPORT_SYMBOL(ipipe_set_ptd);
9273 +EXPORT_SYMBOL(ipipe_get_ptd);
9274 +EXPORT_SYMBOL(ipipe_set_irq_affinity);
9275 +EXPORT_SYMBOL(ipipe_send_ipi);
9276 +EXPORT_SYMBOL(__ipipe_pend_irq);
9277 +EXPORT_SYMBOL(__ipipe_set_irq_pending);
9278 +EXPORT_SYMBOL(__ipipe_event_monitors);
9279 +#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP)
9280 +EXPORT_SYMBOL(__ipipe_check_percpu_access);
9281 +#endif
9282 +#ifdef CONFIG_GENERIC_CLOCKEVENTS
9283 +EXPORT_SYMBOL(ipipe_request_tickdev);
9284 +EXPORT_SYMBOL(ipipe_release_tickdev);
9285 +#endif
9287 +EXPORT_SYMBOL(ipipe_critical_enter);
9288 +EXPORT_SYMBOL(ipipe_critical_exit);
9289 +EXPORT_SYMBOL(ipipe_trigger_irq);
9290 +EXPORT_SYMBOL(ipipe_get_sysinfo);
9291 diff --git a/kernel/ipipe/tracer.c b/kernel/ipipe/tracer.c
9292 new file mode 100644
9293 index 0000000..f013ef4
9294 --- /dev/null
9295 +++ b/kernel/ipipe/tracer.c
9296 @@ -0,0 +1,1442 @@
9297 +/* -*- linux-c -*-
9298 + * kernel/ipipe/tracer.c
9300 + * Copyright (C) 2005 Luotao Fu.
9301 + * 2005-2008 Jan Kiszka.
9303 + * This program is free software; you can redistribute it and/or modify
9304 + * it under the terms of the GNU General Public License as published by
9305 + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
9306 + * USA; either version 2 of the License, or (at your option) any later
9307 + * version.
9309 + * This program is distributed in the hope that it will be useful,
9310 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
9311 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9312 + * GNU General Public License for more details.
9314 + * You should have received a copy of the GNU General Public License
9315 + * along with this program; if not, write to the Free Software
9316 + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
9317 + */
9319 +#include <linux/kernel.h>
9320 +#include <linux/module.h>
9321 +#include <linux/version.h>
9322 +#include <linux/kallsyms.h>
9323 +#include <linux/seq_file.h>
9324 +#include <linux/proc_fs.h>
9325 +#include <linux/ctype.h>
9326 +#include <linux/vmalloc.h>
9327 +#include <linux/pid.h>
9328 +#include <linux/vermagic.h>
9329 +#include <linux/sched.h>
9330 +#include <linux/ipipe.h>
9331 +#include <linux/ftrace.h>
9332 +#include <asm/uaccess.h>
9334 +#define IPIPE_TRACE_PATHS 4 /* <!> Do not lower below 3 */
9335 +#define IPIPE_DEFAULT_ACTIVE 0
9336 +#define IPIPE_DEFAULT_MAX 1
9337 +#define IPIPE_DEFAULT_FROZEN 2
9339 +#define IPIPE_TRACE_POINTS (1 << CONFIG_IPIPE_TRACE_SHIFT)
9340 +#define WRAP_POINT_NO(point) ((point) & (IPIPE_TRACE_POINTS-1))
9342 +#define IPIPE_DEFAULT_PRE_TRACE 10
9343 +#define IPIPE_DEFAULT_POST_TRACE 10
9344 +#define IPIPE_DEFAULT_BACK_TRACE 100
9346 +#define IPIPE_DELAY_NOTE 1000 /* in nanoseconds */
9347 +#define IPIPE_DELAY_WARN 10000 /* in nanoseconds */
9349 +#define IPIPE_TFLG_NMI_LOCK 0x0001
9350 +#define IPIPE_TFLG_NMI_HIT 0x0002
9351 +#define IPIPE_TFLG_NMI_FREEZE_REQ 0x0004
9353 +#define IPIPE_TFLG_HWIRQ_OFF 0x0100
9354 +#define IPIPE_TFLG_FREEZING 0x0200
9355 +#define IPIPE_TFLG_CURRDOM_SHIFT 10 /* bits 10..11: current domain */
9356 +#define IPIPE_TFLG_CURRDOM_MASK 0x0C00
9357 +#define IPIPE_TFLG_DOMSTATE_SHIFT 12 /* bits 12..15: domain stalled? */
9358 +#define IPIPE_TFLG_DOMSTATE_BITS 3
9360 +#define IPIPE_TFLG_DOMAIN_STALLED(point, n) \
9361 + (point->flags & (1 << (n + IPIPE_TFLG_DOMSTATE_SHIFT)))
9362 +#define IPIPE_TFLG_CURRENT_DOMAIN(point) \
9363 + ((point->flags & IPIPE_TFLG_CURRDOM_MASK) >> IPIPE_TFLG_CURRDOM_SHIFT)
9365 +struct ipipe_trace_point {
9366 + short type;
9367 + short flags;
9368 + unsigned long eip;
9369 + unsigned long parent_eip;
9370 + unsigned long v;
9371 + unsigned long long timestamp;
9374 +struct ipipe_trace_path {
9375 + volatile int flags;
9376 + int dump_lock; /* separated from flags due to cross-cpu access */
9377 + int trace_pos; /* next point to fill */
9378 + int begin, end; /* finalised path begin and end */
9379 + int post_trace; /* non-zero when in post-trace phase */
9380 + unsigned long long length; /* max path length in cycles */
9381 + unsigned long nmi_saved_eip; /* for deferred requests from NMIs */
9382 + unsigned long nmi_saved_parent_eip;
9383 + unsigned long nmi_saved_v;
9384 + struct ipipe_trace_point point[IPIPE_TRACE_POINTS];
9385 +} ____cacheline_aligned_in_smp;
9387 +enum ipipe_trace_type
9389 + IPIPE_TRACE_FUNC = 0,
9390 + IPIPE_TRACE_BEGIN,
9391 + IPIPE_TRACE_END,
9392 + IPIPE_TRACE_FREEZE,
9393 + IPIPE_TRACE_SPECIAL,
9394 + IPIPE_TRACE_PID,
9395 + IPIPE_TRACE_EVENT,
9398 +#define IPIPE_TYPE_MASK 0x0007
9399 +#define IPIPE_TYPE_BITS 3
9401 +#ifdef CONFIG_IPIPE_TRACE_VMALLOC
9402 +static DEFINE_PER_CPU(struct ipipe_trace_path *, trace_path);
9403 +#else /* !CONFIG_IPIPE_TRACE_VMALLOC */
9404 +static DEFINE_PER_CPU(struct ipipe_trace_path, trace_path[IPIPE_TRACE_PATHS]) =
9405 + { [0 ... IPIPE_TRACE_PATHS-1] = { .begin = -1, .end = -1 } };
9406 +#endif /* CONFIG_IPIPE_TRACE_VMALLOC */
9408 +int ipipe_trace_enable = 0;
9410 +static DEFINE_PER_CPU(int, active_path) = { IPIPE_DEFAULT_ACTIVE };
9411 +static DEFINE_PER_CPU(int, max_path) = { IPIPE_DEFAULT_MAX };
9412 +static DEFINE_PER_CPU(int, frozen_path) = { IPIPE_DEFAULT_FROZEN };
9413 +static IPIPE_DEFINE_SPINLOCK(global_path_lock);
9414 +static int pre_trace = IPIPE_DEFAULT_PRE_TRACE;
9415 +static int post_trace = IPIPE_DEFAULT_POST_TRACE;
9416 +static int back_trace = IPIPE_DEFAULT_BACK_TRACE;
9417 +static int verbose_trace = 1;
9418 +static unsigned long trace_overhead;
9420 +static unsigned long trigger_begin;
9421 +static unsigned long trigger_end;
9423 +static DEFINE_MUTEX(out_mutex);
9424 +static struct ipipe_trace_path *print_path;
9425 +#ifdef CONFIG_IPIPE_TRACE_PANIC
9426 +static struct ipipe_trace_path *panic_path;
9427 +#endif /* CONFIG_IPIPE_TRACE_PANIC */
9428 +static int print_pre_trace;
9429 +static int print_post_trace;
9432 +static long __ipipe_signed_tsc2us(long long tsc);
9433 +static void
9434 +__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point);
9435 +static void __ipipe_print_symname(struct seq_file *m, unsigned long eip);
9438 +static notrace void
9439 +__ipipe_store_domain_states(struct ipipe_trace_point *point)
9441 + struct ipipe_domain *ipd;
9442 + struct list_head *pos;
9443 + int i = 0;
9445 + list_for_each_prev(pos, &__ipipe_pipeline) {
9446 + ipd = list_entry(pos, struct ipipe_domain, p_link);
9448 + if (test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status)))
9449 + point->flags |= 1 << (i + IPIPE_TFLG_DOMSTATE_SHIFT);
9451 + if (ipd == __ipipe_current_domain)
9452 + point->flags |= i << IPIPE_TFLG_CURRDOM_SHIFT;
9454 + if (++i > IPIPE_TFLG_DOMSTATE_BITS)
9455 + break;
9459 +static notrace int __ipipe_get_free_trace_path(int old, int cpu)
9461 + int new_active = old;
9462 + struct ipipe_trace_path *tp;
9464 + do {
9465 + if (++new_active == IPIPE_TRACE_PATHS)
9466 + new_active = 0;
9467 + tp = &per_cpu(trace_path, cpu)[new_active];
9468 + } while (new_active == per_cpu(max_path, cpu) ||
9469 + new_active == per_cpu(frozen_path, cpu) ||
9470 + tp->dump_lock);
9472 + return new_active;
9475 +static notrace void
9476 +__ipipe_migrate_pre_trace(struct ipipe_trace_path *new_tp,
9477 + struct ipipe_trace_path *old_tp, int old_pos)
9479 + int i;
9481 + new_tp->trace_pos = pre_trace+1;
9483 + for (i = new_tp->trace_pos; i > 0; i--)
9484 + memcpy(&new_tp->point[WRAP_POINT_NO(new_tp->trace_pos-i)],
9485 + &old_tp->point[WRAP_POINT_NO(old_pos-i)],
9486 + sizeof(struct ipipe_trace_point));
9488 + /* mark the end (i.e. the point before point[0]) invalid */
9489 + new_tp->point[IPIPE_TRACE_POINTS-1].eip = 0;
9492 +static notrace struct ipipe_trace_path *
9493 +__ipipe_trace_end(int cpu, struct ipipe_trace_path *tp, int pos)
9495 + struct ipipe_trace_path *old_tp = tp;
9496 + long active = per_cpu(active_path, cpu);
9497 + unsigned long long length;
9499 + /* do we have a new worst case? */
9500 + length = tp->point[tp->end].timestamp -
9501 + tp->point[tp->begin].timestamp;
9502 + if (length > per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)].length) {
9503 + /* we need protection here against other cpus trying
9504 + to start a proc dump */
9505 + spin_lock(&global_path_lock);
9507 + /* active path holds new worst case */
9508 + tp->length = length;
9509 + per_cpu(max_path, cpu) = active;
9511 + /* find next unused trace path */
9512 + active = __ipipe_get_free_trace_path(active, cpu);
9514 + spin_unlock(&global_path_lock);
9516 + tp = &per_cpu(trace_path, cpu)[active];
9518 + /* migrate last entries for pre-tracing */
9519 + __ipipe_migrate_pre_trace(tp, old_tp, pos);
9522 + return tp;
9525 +static notrace struct ipipe_trace_path *
9526 +__ipipe_trace_freeze(int cpu, struct ipipe_trace_path *tp, int pos)
9528 + struct ipipe_trace_path *old_tp = tp;
9529 + long active = per_cpu(active_path, cpu);
9530 + int n;
9532 + /* frozen paths have no core (begin=end) */
9533 + tp->begin = tp->end;
9535 + /* we need protection here against other cpus trying
9536 + * to set their frozen path or to start a proc dump */
9537 + spin_lock(&global_path_lock);
9539 + per_cpu(frozen_path, cpu) = active;
9541 + /* find next unused trace path */
9542 + active = __ipipe_get_free_trace_path(active, cpu);
9544 + /* check if this is the first frozen path */
9545 + for_each_possible_cpu(n) {
9546 + if (n != cpu &&
9547 + per_cpu(trace_path, n)[per_cpu(frozen_path, n)].end >= 0)
9548 + tp->end = -1;
9551 + spin_unlock(&global_path_lock);
9553 + tp = &per_cpu(trace_path, cpu)[active];
9555 + /* migrate last entries for pre-tracing */
9556 + __ipipe_migrate_pre_trace(tp, old_tp, pos);
9558 + return tp;
9561 +void notrace
9562 +__ipipe_trace(enum ipipe_trace_type type, unsigned long eip,
9563 + unsigned long parent_eip, unsigned long v)
9565 + struct ipipe_trace_path *tp, *old_tp;
9566 + int pos, next_pos, begin;
9567 + struct ipipe_trace_point *point;
9568 + unsigned long flags;
9569 + int cpu;
9571 + local_irq_save_hw_notrace(flags);
9573 + cpu = ipipe_processor_id();
9574 + restart:
9575 + tp = old_tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)];
9577 + /* here starts a race window with NMIs - catched below */
9579 + /* check for NMI recursion */
9580 + if (unlikely(tp->flags & IPIPE_TFLG_NMI_LOCK)) {
9581 + tp->flags |= IPIPE_TFLG_NMI_HIT;
9583 + /* first freeze request from NMI context? */
9584 + if ((type == IPIPE_TRACE_FREEZE) &&
9585 + !(tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ)) {
9586 + /* save arguments and mark deferred freezing */
9587 + tp->flags |= IPIPE_TFLG_NMI_FREEZE_REQ;
9588 + tp->nmi_saved_eip = eip;
9589 + tp->nmi_saved_parent_eip = parent_eip;
9590 + tp->nmi_saved_v = v;
9592 + return; /* no need for restoring flags inside IRQ */
9595 + /* clear NMI events and set lock (atomically per cpu) */
9596 + tp->flags = (tp->flags & ~(IPIPE_TFLG_NMI_HIT |
9597 + IPIPE_TFLG_NMI_FREEZE_REQ))
9598 + | IPIPE_TFLG_NMI_LOCK;
9600 + /* check active_path again - some nasty NMI may have switched
9601 + * it meanwhile */
9602 + if (unlikely(tp !=
9603 + &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)])) {
9604 + /* release lock on wrong path and restart */
9605 + tp->flags &= ~IPIPE_TFLG_NMI_LOCK;
9607 + /* there is no chance that the NMI got deferred
9608 + * => no need to check for pending freeze requests */
9609 + goto restart;
9612 + /* get the point buffer */
9613 + pos = tp->trace_pos;
9614 + point = &tp->point[pos];
9616 + /* store all trace point data */
9617 + point->type = type;
9618 + point->flags = raw_irqs_disabled_flags(flags) ? IPIPE_TFLG_HWIRQ_OFF : 0;
9619 + point->eip = eip;
9620 + point->parent_eip = parent_eip;
9621 + point->v = v;
9622 + ipipe_read_tsc(point->timestamp);
9624 + __ipipe_store_domain_states(point);
9626 + /* forward to next point buffer */
9627 + next_pos = WRAP_POINT_NO(pos+1);
9628 + tp->trace_pos = next_pos;
9630 + /* only mark beginning if we haven't started yet */
9631 + begin = tp->begin;
9632 + if (unlikely(type == IPIPE_TRACE_BEGIN) && (begin < 0))
9633 + tp->begin = pos;
9635 + /* end of critical path, start post-trace if not already started */
9636 + if (unlikely(type == IPIPE_TRACE_END) &&
9637 + (begin >= 0) && !tp->post_trace)
9638 + tp->post_trace = post_trace + 1;
9640 + /* freeze only if the slot is free and we are not already freezing */
9641 + if ((unlikely(type == IPIPE_TRACE_FREEZE) ||
9642 + (unlikely(eip >= trigger_begin && eip <= trigger_end) &&
9643 + type == IPIPE_TRACE_FUNC)) &&
9644 + per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)].begin < 0 &&
9645 + !(tp->flags & IPIPE_TFLG_FREEZING)) {
9646 + tp->post_trace = post_trace + 1;
9647 + tp->flags |= IPIPE_TFLG_FREEZING;
9650 + /* enforce end of trace in case of overflow */
9651 + if (unlikely(WRAP_POINT_NO(next_pos + 1) == begin)) {
9652 + tp->end = pos;
9653 + goto enforce_end;
9656 + /* stop tracing this path if we are in post-trace and
9657 + * a) that phase is over now or
9658 + * b) a new TRACE_BEGIN came in but we are not freezing this path */
9659 + if (unlikely((tp->post_trace > 0) && ((--tp->post_trace == 0) ||
9660 + ((type == IPIPE_TRACE_BEGIN) &&
9661 + !(tp->flags & IPIPE_TFLG_FREEZING))))) {
9662 + /* store the path's end (i.e. excluding post-trace) */
9663 + tp->end = WRAP_POINT_NO(pos - post_trace + tp->post_trace);
9665 + enforce_end:
9666 + if (tp->flags & IPIPE_TFLG_FREEZING)
9667 + tp = __ipipe_trace_freeze(cpu, tp, pos);
9668 + else
9669 + tp = __ipipe_trace_end(cpu, tp, pos);
9671 + /* reset the active path, maybe already start a new one */
9672 + tp->begin = (type == IPIPE_TRACE_BEGIN) ?
9673 + WRAP_POINT_NO(tp->trace_pos - 1) : -1;
9674 + tp->end = -1;
9675 + tp->post_trace = 0;
9676 + tp->flags = 0;
9678 + /* update active_path not earlier to avoid races with NMIs */
9679 + per_cpu(active_path, cpu) = tp - per_cpu(trace_path, cpu);
9682 + /* we still have old_tp and point,
9683 + * let's reset NMI lock and check for catches */
9684 + old_tp->flags &= ~IPIPE_TFLG_NMI_LOCK;
9685 + if (unlikely(old_tp->flags & IPIPE_TFLG_NMI_HIT)) {
9686 + /* well, this late tagging may not immediately be visible for
9687 + * other cpus already dumping this path - a minor issue */
9688 + point->flags |= IPIPE_TFLG_NMI_HIT;
9690 + /* handle deferred freezing from NMI context */
9691 + if (old_tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ)
9692 + __ipipe_trace(IPIPE_TRACE_FREEZE, old_tp->nmi_saved_eip,
9693 + old_tp->nmi_saved_parent_eip,
9694 + old_tp->nmi_saved_v);
9697 + local_irq_restore_hw_notrace(flags);
9700 +static unsigned long __ipipe_global_path_lock(void)
9702 + unsigned long flags;
9703 + int cpu;
9704 + struct ipipe_trace_path *tp;
9706 + spin_lock_irqsave(&global_path_lock, flags);
9708 + cpu = ipipe_processor_id();
9709 + restart:
9710 + tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)];
9712 + /* here is small race window with NMIs - catched below */
9714 + /* clear NMI events and set lock (atomically per cpu) */
9715 + tp->flags = (tp->flags & ~(IPIPE_TFLG_NMI_HIT |
9716 + IPIPE_TFLG_NMI_FREEZE_REQ))
9717 + | IPIPE_TFLG_NMI_LOCK;
9719 + /* check active_path again - some nasty NMI may have switched
9720 + * it meanwhile */
9721 + if (tp != &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]) {
9722 + /* release lock on wrong path and restart */
9723 + tp->flags &= ~IPIPE_TFLG_NMI_LOCK;
9725 + /* there is no chance that the NMI got deferred
9726 + * => no need to check for pending freeze requests */
9727 + goto restart;
9730 + return flags;
9733 +static void __ipipe_global_path_unlock(unsigned long flags)
9735 + int cpu;
9736 + struct ipipe_trace_path *tp;
9738 + /* release spinlock first - it's not involved in the NMI issue */
9739 + __ipipe_spin_unlock_irqbegin(&global_path_lock);
9741 + cpu = ipipe_processor_id();
9742 + tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)];
9744 + tp->flags &= ~IPIPE_TFLG_NMI_LOCK;
9746 + /* handle deferred freezing from NMI context */
9747 + if (tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ)
9748 + __ipipe_trace(IPIPE_TRACE_FREEZE, tp->nmi_saved_eip,
9749 + tp->nmi_saved_parent_eip, tp->nmi_saved_v);
9751 + /* See __ipipe_spin_lock_irqsave() and friends. */
9752 + __ipipe_spin_unlock_irqcomplete(flags);
9755 +void notrace ipipe_trace_begin(unsigned long v)
9757 + if (!ipipe_trace_enable)
9758 + return;
9759 + __ipipe_trace(IPIPE_TRACE_BEGIN, __BUILTIN_RETURN_ADDRESS0,
9760 + __BUILTIN_RETURN_ADDRESS1, v);
9762 +EXPORT_SYMBOL(ipipe_trace_begin);
9764 +void notrace ipipe_trace_end(unsigned long v)
9766 + if (!ipipe_trace_enable)
9767 + return;
9768 + __ipipe_trace(IPIPE_TRACE_END, __BUILTIN_RETURN_ADDRESS0,
9769 + __BUILTIN_RETURN_ADDRESS1, v);
9771 +EXPORT_SYMBOL(ipipe_trace_end);
9773 +void notrace ipipe_trace_freeze(unsigned long v)
9775 + if (!ipipe_trace_enable)
9776 + return;
9777 + __ipipe_trace(IPIPE_TRACE_FREEZE, __BUILTIN_RETURN_ADDRESS0,
9778 + __BUILTIN_RETURN_ADDRESS1, v);
9780 +EXPORT_SYMBOL(ipipe_trace_freeze);
9782 +void notrace ipipe_trace_special(unsigned char id, unsigned long v)
9784 + if (!ipipe_trace_enable)
9785 + return;
9786 + __ipipe_trace(IPIPE_TRACE_SPECIAL | (id << IPIPE_TYPE_BITS),
9787 + __BUILTIN_RETURN_ADDRESS0,
9788 + __BUILTIN_RETURN_ADDRESS1, v);
9790 +EXPORT_SYMBOL(ipipe_trace_special);
9792 +void notrace ipipe_trace_pid(pid_t pid, short prio)
9794 + if (!ipipe_trace_enable)
9795 + return;
9796 + __ipipe_trace(IPIPE_TRACE_PID | (prio << IPIPE_TYPE_BITS),
9797 + __BUILTIN_RETURN_ADDRESS0,
9798 + __BUILTIN_RETURN_ADDRESS1, pid);
9800 +EXPORT_SYMBOL(ipipe_trace_pid);
9802 +void notrace ipipe_trace_event(unsigned char id, unsigned long delay_tsc)
9804 + if (!ipipe_trace_enable)
9805 + return;
9806 + __ipipe_trace(IPIPE_TRACE_EVENT | (id << IPIPE_TYPE_BITS),
9807 + __BUILTIN_RETURN_ADDRESS0,
9808 + __BUILTIN_RETURN_ADDRESS1, delay_tsc);
9810 +EXPORT_SYMBOL(ipipe_trace_event);
9812 +int ipipe_trace_max_reset(void)
9814 + int cpu;
9815 + unsigned long flags;
9816 + struct ipipe_trace_path *path;
9817 + int ret = 0;
9819 + flags = __ipipe_global_path_lock();
9821 + for_each_possible_cpu(cpu) {
9822 + path = &per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)];
9824 + if (path->dump_lock) {
9825 + ret = -EBUSY;
9826 + break;
9829 + path->begin = -1;
9830 + path->end = -1;
9831 + path->trace_pos = 0;
9832 + path->length = 0;
9835 + __ipipe_global_path_unlock(flags);
9837 + return ret;
9839 +EXPORT_SYMBOL(ipipe_trace_max_reset);
9841 +int ipipe_trace_frozen_reset(void)
9843 + int cpu;
9844 + unsigned long flags;
9845 + struct ipipe_trace_path *path;
9846 + int ret = 0;
9848 + flags = __ipipe_global_path_lock();
9850 + for_each_online_cpu(cpu) {
9851 + path = &per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)];
9853 + if (path->dump_lock) {
9854 + ret = -EBUSY;
9855 + break;
9858 + path->begin = -1;
9859 + path->end = -1;
9860 + path->trace_pos = 0;
9861 + path->length = 0;
9864 + __ipipe_global_path_unlock(flags);
9866 + return ret;
9868 +EXPORT_SYMBOL(ipipe_trace_frozen_reset);
9870 +static void
9871 +__ipipe_get_task_info(char *task_info, struct ipipe_trace_point *point,
9872 + int trylock)
9874 + struct task_struct *task = NULL;
9875 + char buf[8];
9876 + int i;
9877 + int locked = 1;
9879 + if (trylock) {
9880 + if (!read_trylock(&tasklist_lock))
9881 + locked = 0;
9882 + } else
9883 + read_lock(&tasklist_lock);
9885 + if (locked)
9886 + task = find_task_by_pid_ns((pid_t)point->v, &init_pid_ns);
9888 + if (task)
9889 + strncpy(task_info, task->comm, 11);
9890 + else
9891 + strcpy(task_info, "-<?>-");
9893 + if (locked)
9894 + read_unlock(&tasklist_lock);
9896 + for (i = strlen(task_info); i < 11; i++)
9897 + task_info[i] = ' ';
9899 + sprintf(buf, " %d ", point->type >> IPIPE_TYPE_BITS);
9900 + strcpy(task_info + (11 - strlen(buf)), buf);
9903 +static void
9904 +__ipipe_get_event_date(char *buf,struct ipipe_trace_path *path,
9905 + struct ipipe_trace_point *point)
9907 + long time;
9908 + int type;
9910 + time = __ipipe_signed_tsc2us(point->timestamp -
9911 + path->point[path->begin].timestamp + point->v);
9912 + type = point->type >> IPIPE_TYPE_BITS;
9914 + if (type == 0)
9915 + /*
9916 + * Event type #0 is predefined, stands for the next
9917 + * timer tick.
9918 + */
9919 + sprintf(buf, "tick@%-6ld", time);
9920 + else
9921 + sprintf(buf, "%3d@%-7ld", type, time);
9924 +#ifdef CONFIG_IPIPE_TRACE_PANIC
9925 +void ipipe_trace_panic_freeze(void)
9927 + unsigned long flags;
9928 + int cpu;
9930 + if (!ipipe_trace_enable)
9931 + return;
9933 + ipipe_trace_enable = 0;
9934 + local_irq_save_hw_notrace(flags);
9936 + cpu = ipipe_processor_id();
9938 + panic_path = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)];
9940 + local_irq_restore_hw(flags);
9942 +EXPORT_SYMBOL(ipipe_trace_panic_freeze);
9944 +void ipipe_trace_panic_dump(void)
9946 + int cnt = back_trace;
9947 + int start, pos;
9948 + char buf[16];
9950 + if (!panic_path)
9951 + return;
9953 + ipipe_context_check_off();
9955 + printk("I-pipe tracer log (%d points):\n", cnt);
9957 + start = pos = WRAP_POINT_NO(panic_path->trace_pos-1);
9959 + while (cnt-- > 0) {
9960 + struct ipipe_trace_point *point = &panic_path->point[pos];
9961 + long time;
9962 + char info[16];
9963 + int i;
9965 + printk(" %c",
9966 + (point->flags & IPIPE_TFLG_HWIRQ_OFF) ? '|' : ' ');
9968 + for (i = IPIPE_TFLG_DOMSTATE_BITS; i >= 0; i--)
9969 + printk("%c",
9970 + (IPIPE_TFLG_CURRENT_DOMAIN(point) == i) ?
9971 + (IPIPE_TFLG_DOMAIN_STALLED(point, i) ?
9972 + '#' : '+') :
9973 + (IPIPE_TFLG_DOMAIN_STALLED(point, i) ?
9974 + '*' : ' '));
9976 + if (!point->eip)
9977 + printk("-<invalid>-\n");
9978 + else {
9979 + __ipipe_trace_point_type(buf, point);
9980 + printk("%s", buf);
9982 + switch (point->type & IPIPE_TYPE_MASK) {
9983 + case IPIPE_TRACE_FUNC:
9984 + printk(" ");
9985 + break;
9987 + case IPIPE_TRACE_PID:
9988 + __ipipe_get_task_info(info,
9989 + point, 1);
9990 + printk("%s", info);
9991 + break;
9993 + case IPIPE_TRACE_EVENT:
9994 + __ipipe_get_event_date(info,
9995 + panic_path, point);
9996 + printk("%s", info);
9997 + break;
9999 + default:
10000 + printk("0x%08lx ", point->v);
10003 + time = __ipipe_signed_tsc2us(point->timestamp -
10004 + panic_path->point[start].timestamp);
10005 + printk(" %5ld ", time);
10007 + __ipipe_print_symname(NULL, point->eip);
10008 + printk(" (");
10009 + __ipipe_print_symname(NULL, point->parent_eip);
10010 + printk(")\n");
10012 + pos = WRAP_POINT_NO(pos - 1);
10015 + panic_path = NULL;
10017 +EXPORT_SYMBOL(ipipe_trace_panic_dump);
10018 +#endif /* CONFIG_IPIPE_TRACE_PANIC */
10021 +/* --- /proc output --- */
10023 +static notrace int __ipipe_in_critical_trpath(long point_no)
10025 + return ((WRAP_POINT_NO(point_no-print_path->begin) <
10026 + WRAP_POINT_NO(print_path->end-print_path->begin)) ||
10027 + ((print_path->end == print_path->begin) &&
10028 + (WRAP_POINT_NO(point_no-print_path->end) >
10029 + print_post_trace)));
10032 +static long __ipipe_signed_tsc2us(long long tsc)
10034 + unsigned long long abs_tsc;
10035 + long us;
10037 + /* ipipe_tsc2us works on unsigned => handle sign separately */
10038 + abs_tsc = (tsc >= 0) ? tsc : -tsc;
10039 + us = ipipe_tsc2us(abs_tsc);
10040 + if (tsc < 0)
10041 + return -us;
10042 + else
10043 + return us;
10046 +static void
10047 +__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point)
10049 + switch (point->type & IPIPE_TYPE_MASK) {
10050 + case IPIPE_TRACE_FUNC:
10051 + strcpy(buf, "func ");
10052 + break;
10054 + case IPIPE_TRACE_BEGIN:
10055 + strcpy(buf, "begin ");
10056 + break;
10058 + case IPIPE_TRACE_END:
10059 + strcpy(buf, "end ");
10060 + break;
10062 + case IPIPE_TRACE_FREEZE:
10063 + strcpy(buf, "freeze ");
10064 + break;
10066 + case IPIPE_TRACE_SPECIAL:
10067 + sprintf(buf, "(0x%02x) ",
10068 + point->type >> IPIPE_TYPE_BITS);
10069 + break;
10071 + case IPIPE_TRACE_PID:
10072 + sprintf(buf, "[%5d] ", (pid_t)point->v);
10073 + break;
10075 + case IPIPE_TRACE_EVENT:
10076 + sprintf(buf, "event ");
10077 + break;
10081 +static void
10082 +__ipipe_print_pathmark(struct seq_file *m, struct ipipe_trace_point *point)
10084 + char mark = ' ';
10085 + int point_no = point - print_path->point;
10086 + int i;
10088 + if (print_path->end == point_no)
10089 + mark = '<';
10090 + else if (print_path->begin == point_no)
10091 + mark = '>';
10092 + else if (__ipipe_in_critical_trpath(point_no))
10093 + mark = ':';
10094 + seq_printf(m, "%c%c", mark,
10095 + (point->flags & IPIPE_TFLG_HWIRQ_OFF) ? '|' : ' ');
10097 + if (!verbose_trace)
10098 + return;
10100 + for (i = IPIPE_TFLG_DOMSTATE_BITS; i >= 0; i--)
10101 + seq_printf(m, "%c",
10102 + (IPIPE_TFLG_CURRENT_DOMAIN(point) == i) ?
10103 + (IPIPE_TFLG_DOMAIN_STALLED(point, i) ?
10104 + '#' : '+') :
10105 + (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? '*' : ' '));
10108 +static void
10109 +__ipipe_print_delay(struct seq_file *m, struct ipipe_trace_point *point)
10111 + unsigned long delay = 0;
10112 + int next;
10113 + char *mark = " ";
10115 + next = WRAP_POINT_NO(point+1 - print_path->point);
10117 + if (next != print_path->trace_pos)
10118 + delay = ipipe_tsc2ns(print_path->point[next].timestamp -
10119 + point->timestamp);
10121 + if (__ipipe_in_critical_trpath(point - print_path->point)) {
10122 + if (delay > IPIPE_DELAY_WARN)
10123 + mark = "! ";
10124 + else if (delay > IPIPE_DELAY_NOTE)
10125 + mark = "+ ";
10127 + seq_puts(m, mark);
10129 + if (verbose_trace)
10130 + seq_printf(m, "%3lu.%03lu%c ", delay/1000, delay%1000,
10131 + (point->flags & IPIPE_TFLG_NMI_HIT) ? 'N' : ' ');
10132 + else
10133 + seq_puts(m, " ");
10136 +static void __ipipe_print_symname(struct seq_file *m, unsigned long eip)
10138 + char namebuf[KSYM_NAME_LEN+1];
10139 + unsigned long size, offset;
10140 + const char *sym_name;
10141 + char *modname;
10143 + sym_name = kallsyms_lookup(eip, &size, &offset, &modname, namebuf);
10145 +#ifdef CONFIG_IPIPE_TRACE_PANIC
10146 + if (!m) {
10147 + /* panic dump */
10148 + if (sym_name) {
10149 + printk("%s+0x%lx", sym_name, offset);
10150 + if (modname)
10151 + printk(" [%s]", modname);
10153 + } else
10154 +#endif /* CONFIG_IPIPE_TRACE_PANIC */
10156 + if (sym_name) {
10157 + if (verbose_trace) {
10158 + seq_printf(m, "%s+0x%lx", sym_name, offset);
10159 + if (modname)
10160 + seq_printf(m, " [%s]", modname);
10161 + } else
10162 + seq_puts(m, sym_name);
10163 + } else
10164 + seq_printf(m, "<%08lx>", eip);
10168 +static void __ipipe_print_headline(struct seq_file *m)
10170 + seq_printf(m, "Calibrated minimum trace-point overhead: %lu.%03lu "
10171 + "us\n\n", trace_overhead/1000, trace_overhead%1000);
10173 + if (verbose_trace) {
10174 + const char *name[4] = { [0 ... 3] = "<unused>" };
10175 + struct list_head *pos;
10176 + int i = 0;
10178 + list_for_each_prev(pos, &__ipipe_pipeline) {
10179 + struct ipipe_domain *ipd =
10180 + list_entry(pos, struct ipipe_domain, p_link);
10182 + name[i] = ipd->name;
10183 + if (++i > 3)
10184 + break;
10187 + seq_printf(m,
10188 + " +----- Hard IRQs ('|': locked)\n"
10189 + " |+---- %s\n"
10190 + " ||+--- %s\n"
10191 + " |||+-- %s\n"
10192 + " ||||+- %s%s\n"
10193 + " ||||| +---------- "
10194 + "Delay flag ('+': > %d us, '!': > %d us)\n"
10195 + " ||||| | +- "
10196 + "NMI noise ('N')\n"
10197 + " ||||| | |\n"
10198 + " Type User Val. Time Delay Function "
10199 + "(Parent)\n",
10200 + name[3], name[2], name[1], name[0],
10201 + name[0] ? " ('*': domain stalled, '+': current, "
10202 + "'#': current+stalled)" : "",
10203 + IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000);
10204 + } else
10205 + seq_printf(m,
10206 + " +--------------- Hard IRQs ('|': locked)\n"
10207 + " | +- Delay flag "
10208 + "('+': > %d us, '!': > %d us)\n"
10209 + " | |\n"
10210 + " Type Time Function (Parent)\n",
10211 + IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000);
10214 +static void *__ipipe_max_prtrace_start(struct seq_file *m, loff_t *pos)
10216 + loff_t n = *pos;
10218 + mutex_lock(&out_mutex);
10220 + if (!n) {
10221 + struct ipipe_trace_path *tp;
10222 + unsigned long length_usecs;
10223 + int points, cpu;
10224 + unsigned long flags;
10226 + /* protect against max_path/frozen_path updates while we
10227 + * haven't locked our target path, also avoid recursively
10228 + * taking global_path_lock from NMI context */
10229 + flags = __ipipe_global_path_lock();
10231 + /* find the longest of all per-cpu paths */
10232 + print_path = NULL;
10233 + for_each_online_cpu(cpu) {
10234 + tp = &per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)];
10235 + if ((print_path == NULL) ||
10236 + (tp->length > print_path->length)) {
10237 + print_path = tp;
10238 + break;
10241 + print_path->dump_lock = 1;
10243 + __ipipe_global_path_unlock(flags);
10245 + /* does this path actually contain data? */
10246 + if (print_path->end == print_path->begin)
10247 + return NULL;
10249 + /* number of points inside the critical path */
10250 + points = WRAP_POINT_NO(print_path->end-print_path->begin+1);
10252 + /* pre- and post-tracing length, post-trace length was frozen
10253 + in __ipipe_trace, pre-trace may have to be reduced due to
10254 + buffer overrun */
10255 + print_pre_trace = pre_trace;
10256 + print_post_trace = WRAP_POINT_NO(print_path->trace_pos -
10257 + print_path->end - 1);
10258 + if (points+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1)
10259 + print_pre_trace = IPIPE_TRACE_POINTS - 1 - points -
10260 + print_post_trace;
10262 + length_usecs = ipipe_tsc2us(print_path->length);
10263 + seq_printf(m, "I-pipe worst-case tracing service on %s/ipipe-%s\n"
10264 + "------------------------------------------------------------\n",
10265 + UTS_RELEASE, IPIPE_ARCH_STRING);
10266 + seq_printf(m, "CPU: %d, Begin: %lld cycles, Trace Points: "
10267 + "%d (-%d/+%d), Length: %lu us\n",
10268 + cpu, print_path->point[print_path->begin].timestamp,
10269 + points, print_pre_trace, print_post_trace, length_usecs);
10270 + __ipipe_print_headline(m);
10273 + /* check if we are inside the trace range */
10274 + if (n >= WRAP_POINT_NO(print_path->end - print_path->begin + 1 +
10275 + print_pre_trace + print_post_trace))
10276 + return NULL;
10278 + /* return the next point to be shown */
10279 + return &print_path->point[WRAP_POINT_NO(print_path->begin -
10280 + print_pre_trace + n)];
10283 +static void *__ipipe_prtrace_next(struct seq_file *m, void *p, loff_t *pos)
10285 + loff_t n = ++*pos;
10287 + /* check if we are inside the trace range with the next entry */
10288 + if (n >= WRAP_POINT_NO(print_path->end - print_path->begin + 1 +
10289 + print_pre_trace + print_post_trace))
10290 + return NULL;
10292 + /* return the next point to be shown */
10293 + return &print_path->point[WRAP_POINT_NO(print_path->begin -
10294 + print_pre_trace + *pos)];
10297 +static void __ipipe_prtrace_stop(struct seq_file *m, void *p)
10299 + if (print_path)
10300 + print_path->dump_lock = 0;
10301 + mutex_unlock(&out_mutex);
10304 +static int __ipipe_prtrace_show(struct seq_file *m, void *p)
10306 + long time;
10307 + struct ipipe_trace_point *point = p;
10308 + char buf[16];
10310 + if (!point->eip) {
10311 + seq_puts(m, "-<invalid>-\n");
10312 + return 0;
10315 + __ipipe_print_pathmark(m, point);
10316 + __ipipe_trace_point_type(buf, point);
10317 + seq_puts(m, buf);
10318 + if (verbose_trace)
10319 + switch (point->type & IPIPE_TYPE_MASK) {
10320 + case IPIPE_TRACE_FUNC:
10321 + seq_puts(m, " ");
10322 + break;
10324 + case IPIPE_TRACE_PID:
10325 + __ipipe_get_task_info(buf, point, 0);
10326 + seq_puts(m, buf);
10327 + break;
10329 + case IPIPE_TRACE_EVENT:
10330 + __ipipe_get_event_date(buf, print_path, point);
10331 + seq_puts(m, buf);
10332 + break;
10334 + default:
10335 + seq_printf(m, "0x%08lx ", point->v);
10338 + time = __ipipe_signed_tsc2us(point->timestamp -
10339 + print_path->point[print_path->begin].timestamp);
10340 + seq_printf(m, "%5ld", time);
10342 + __ipipe_print_delay(m, point);
10343 + __ipipe_print_symname(m, point->eip);
10344 + seq_puts(m, " (");
10345 + __ipipe_print_symname(m, point->parent_eip);
10346 + seq_puts(m, ")\n");
10348 + return 0;
10351 +static struct seq_operations __ipipe_max_ptrace_ops = {
10352 + .start = __ipipe_max_prtrace_start,
10353 + .next = __ipipe_prtrace_next,
10354 + .stop = __ipipe_prtrace_stop,
10355 + .show = __ipipe_prtrace_show
10358 +static int __ipipe_max_prtrace_open(struct inode *inode, struct file *file)
10360 + return seq_open(file, &__ipipe_max_ptrace_ops);
10363 +static ssize_t
10364 +__ipipe_max_reset(struct file *file, const char __user *pbuffer,
10365 + size_t count, loff_t *data)
10367 + mutex_lock(&out_mutex);
10368 + ipipe_trace_max_reset();
10369 + mutex_unlock(&out_mutex);
10371 + return count;
10374 +struct file_operations __ipipe_max_prtrace_fops = {
10375 + .open = __ipipe_max_prtrace_open,
10376 + .read = seq_read,
10377 + .write = __ipipe_max_reset,
10378 + .llseek = seq_lseek,
10379 + .release = seq_release,
10382 +static void *__ipipe_frozen_prtrace_start(struct seq_file *m, loff_t *pos)
10384 + loff_t n = *pos;
10386 + mutex_lock(&out_mutex);
10388 + if (!n) {
10389 + struct ipipe_trace_path *tp;
10390 + int cpu;
10391 + unsigned long flags;
10393 + /* protect against max_path/frozen_path updates while we
10394 + * haven't locked our target path, also avoid recursively
10395 + * taking global_path_lock from NMI context */
10396 + flags = __ipipe_global_path_lock();
10398 + /* find the first of all per-cpu frozen paths */
10399 + print_path = NULL;
10400 + for_each_online_cpu(cpu) {
10401 + tp = &per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)];
10402 + if (tp->end >= 0) {
10403 + print_path = tp;
10404 + break;
10407 + if (print_path)
10408 + print_path->dump_lock = 1;
10410 + __ipipe_global_path_unlock(flags);
10412 + if (!print_path)
10413 + return NULL;
10415 + /* back- and post-tracing length, post-trace length was frozen
10416 + in __ipipe_trace, back-trace may have to be reduced due to
10417 + buffer overrun */
10418 + print_pre_trace = back_trace-1; /* substract freeze point */
10419 + print_post_trace = WRAP_POINT_NO(print_path->trace_pos -
10420 + print_path->end - 1);
10421 + if (1+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1)
10422 + print_pre_trace = IPIPE_TRACE_POINTS - 2 -
10423 + print_post_trace;
10425 + seq_printf(m, "I-pipe frozen back-tracing service on %s/ipipe-%s\n"
10426 + "------------------------------------------------------"
10427 + "------\n",
10428 + UTS_RELEASE, IPIPE_ARCH_STRING);
10429 + seq_printf(m, "CPU: %d, Freeze: %lld cycles, Trace Points: %d (+%d)\n",
10430 + cpu, print_path->point[print_path->begin].timestamp,
10431 + print_pre_trace+1, print_post_trace);
10432 + __ipipe_print_headline(m);
10435 + /* check if we are inside the trace range */
10436 + if (n >= print_pre_trace + 1 + print_post_trace)
10437 + return NULL;
10439 + /* return the next point to be shown */
10440 + return &print_path->point[WRAP_POINT_NO(print_path->begin-
10441 + print_pre_trace+n)];
10444 +static struct seq_operations __ipipe_frozen_ptrace_ops = {
10445 + .start = __ipipe_frozen_prtrace_start,
10446 + .next = __ipipe_prtrace_next,
10447 + .stop = __ipipe_prtrace_stop,
10448 + .show = __ipipe_prtrace_show
10451 +static int __ipipe_frozen_prtrace_open(struct inode *inode, struct file *file)
10453 + return seq_open(file, &__ipipe_frozen_ptrace_ops);
10456 +static ssize_t
10457 +__ipipe_frozen_ctrl(struct file *file, const char __user *pbuffer,
10458 + size_t count, loff_t *data)
10460 + char *end, buf[16];
10461 + int val;
10462 + int n;
10464 + n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count;
10466 + if (copy_from_user(buf, pbuffer, n))
10467 + return -EFAULT;
10469 + buf[n] = '\0';
10470 + val = simple_strtol(buf, &end, 0);
10472 + if (((*end != '\0') && !isspace(*end)) || (val < 0))
10473 + return -EINVAL;
10475 + mutex_lock(&out_mutex);
10476 + ipipe_trace_frozen_reset();
10477 + if (val > 0)
10478 + ipipe_trace_freeze(-1);
10479 + mutex_unlock(&out_mutex);
10481 + return count;
10484 +struct file_operations __ipipe_frozen_prtrace_fops = {
10485 + .open = __ipipe_frozen_prtrace_open,
10486 + .read = seq_read,
10487 + .write = __ipipe_frozen_ctrl,
10488 + .llseek = seq_lseek,
10489 + .release = seq_release,
10492 +static int __ipipe_rd_proc_val(char *page, char **start, off_t off,
10493 + int count, int *eof, void *data)
10495 + int len;
10497 + len = sprintf(page, "%u\n", *(int *)data);
10498 + len -= off;
10499 + if (len <= off + count)
10500 + *eof = 1;
10501 + *start = page + off;
10502 + if (len > count)
10503 + len = count;
10504 + if (len < 0)
10505 + len = 0;
10507 + return len;
10510 +static int __ipipe_wr_proc_val(struct file *file, const char __user *buffer,
10511 + unsigned long count, void *data)
10513 + char *end, buf[16];
10514 + int val;
10515 + int n;
10517 + n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count;
10519 + if (copy_from_user(buf, buffer, n))
10520 + return -EFAULT;
10522 + buf[n] = '\0';
10523 + val = simple_strtol(buf, &end, 0);
10525 + if (((*end != '\0') && !isspace(*end)) || (val < 0))
10526 + return -EINVAL;
10528 + mutex_lock(&out_mutex);
10529 + *(int *)data = val;
10530 + mutex_unlock(&out_mutex);
10532 + return count;
10535 +static int __ipipe_rd_trigger(char *page, char **start, off_t off, int count,
10536 + int *eof, void *data)
10538 + int len;
10540 + if (!trigger_begin)
10541 + return 0;
10543 + len = sprint_symbol(page, trigger_begin);
10544 + page[len++] = '\n';
10546 + len -= off;
10547 + if (len <= off + count)
10548 + *eof = 1;
10549 + *start = page + off;
10550 + if (len > count)
10551 + len = count;
10552 + if (len < 0)
10553 + len = 0;
10555 + return len;
10558 +static int __ipipe_wr_trigger(struct file *file, const char __user *buffer,
10559 + unsigned long count, void *data)
10561 + char buf[KSYM_SYMBOL_LEN];
10562 + unsigned long begin, end;
10564 + if (count > sizeof(buf) - 1)
10565 + count = sizeof(buf) - 1;
10566 + if (copy_from_user(buf, buffer, count))
10567 + return -EFAULT;
10568 + buf[count] = 0;
10569 + if (buf[count-1] == '\n')
10570 + buf[count-1] = 0;
10572 + begin = kallsyms_lookup_name(buf);
10573 + if (!begin || !kallsyms_lookup_size_offset(begin, &end, NULL))
10574 + return -ENOENT;
10575 + end += begin - 1;
10577 + mutex_lock(&out_mutex);
10578 + /* invalidate the current range before setting a new one */
10579 + trigger_end = 0;
10580 + wmb();
10581 + ipipe_trace_frozen_reset();
10583 + /* set new range */
10584 + trigger_begin = begin;
10585 + wmb();
10586 + trigger_end = end;
10587 + mutex_unlock(&out_mutex);
10589 + return count;
10592 +#ifdef CONFIG_IPIPE_TRACE_MCOUNT
10593 +static void notrace
10594 +ipipe_trace_function(unsigned long ip, unsigned long parent_ip)
10596 + if (!ipipe_trace_enable)
10597 + return;
10598 + __ipipe_trace(IPIPE_TRACE_FUNC, ip, parent_ip, 0);
10601 +static struct ftrace_ops ipipe_trace_ops = {
10602 + .func = ipipe_trace_function
10605 +static int __ipipe_wr_enable(struct file *file, const char __user *buffer,
10606 + unsigned long count, void *data)
10608 + char *end, buf[16];
10609 + int val;
10610 + int n;
10612 + n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count;
10614 + if (copy_from_user(buf, buffer, n))
10615 + return -EFAULT;
10617 + buf[n] = '\0';
10618 + val = simple_strtol(buf, &end, 0);
10620 + if (((*end != '\0') && !isspace(*end)) || (val < 0))
10621 + return -EINVAL;
10623 + mutex_lock(&out_mutex);
10625 + if (ipipe_trace_enable) {
10626 + if (!val)
10627 + unregister_ftrace_function(&ipipe_trace_ops);
10628 + } else if (val)
10629 + register_ftrace_function(&ipipe_trace_ops);
10631 + ipipe_trace_enable = val;
10633 + mutex_unlock(&out_mutex);
10635 + return count;
10637 +#endif /* CONFIG_IPIPE_TRACE_MCOUNT */
10639 +extern struct proc_dir_entry *ipipe_proc_root;
10641 +static struct proc_dir_entry * __init
10642 +__ipipe_create_trace_proc_val(struct proc_dir_entry *trace_dir,
10643 + const char *name, int *value_ptr)
10645 + struct proc_dir_entry *entry;
10647 + entry = create_proc_entry(name, 0644, trace_dir);
10648 + if (entry) {
10649 + entry->data = value_ptr;
10650 + entry->read_proc = __ipipe_rd_proc_val;
10651 + entry->write_proc = __ipipe_wr_proc_val;
10653 + return entry;
10656 +void __init __ipipe_init_tracer(void)
10658 + struct proc_dir_entry *trace_dir;
10659 + struct proc_dir_entry *entry;
10660 + unsigned long long start, end, min = ULLONG_MAX;
10661 + int i;
10662 +#ifdef CONFIG_IPIPE_TRACE_VMALLOC
10663 + int cpu, path;
10665 + for_each_possible_cpu(cpu) {
10666 + struct ipipe_trace_path *tp_buf;
10668 + tp_buf = vmalloc_node(sizeof(struct ipipe_trace_path) *
10669 + IPIPE_TRACE_PATHS, cpu_to_node(cpu));
10670 + if (!tp_buf) {
10671 + printk(KERN_ERR "I-pipe: "
10672 + "insufficient memory for trace buffer.\n");
10673 + return;
10675 + memset(tp_buf, 0,
10676 + sizeof(struct ipipe_trace_path) * IPIPE_TRACE_PATHS);
10677 + for (path = 0; path < IPIPE_TRACE_PATHS; path++) {
10678 + tp_buf[path].begin = -1;
10679 + tp_buf[path].end = -1;
10681 + per_cpu(trace_path, cpu) = tp_buf;
10683 +#endif /* CONFIG_IPIPE_TRACE_VMALLOC */
10685 + /* Calculate minimum overhead of __ipipe_trace() */
10686 + local_irq_disable_hw();
10687 + for (i = 0; i < 100; i++) {
10688 + ipipe_read_tsc(start);
10689 + __ipipe_trace(IPIPE_TRACE_FUNC, __BUILTIN_RETURN_ADDRESS0,
10690 + __BUILTIN_RETURN_ADDRESS1, 0);
10691 + ipipe_read_tsc(end);
10693 + end -= start;
10694 + if (end < min)
10695 + min = end;
10697 + local_irq_enable_hw();
10698 + trace_overhead = ipipe_tsc2ns(min);
10700 +#ifdef CONFIG_IPIPE_TRACE_ENABLE
10701 + ipipe_trace_enable = 1;
10702 +#ifdef CONFIG_IPIPE_TRACE_MCOUNT
10703 + ftrace_enabled = 1;
10704 + register_ftrace_function(&ipipe_trace_ops);
10705 +#endif /* CONFIG_IPIPE_TRACE_MCOUNT */
10706 +#endif /* CONFIG_IPIPE_TRACE_ENABLE */
10708 + trace_dir = create_proc_entry("trace", S_IFDIR, ipipe_proc_root);
10710 + entry = create_proc_entry("max", 0644, trace_dir);
10711 + if (entry)
10712 + entry->proc_fops = &__ipipe_max_prtrace_fops;
10714 + entry = create_proc_entry("frozen", 0644, trace_dir);
10715 + if (entry)
10716 + entry->proc_fops = &__ipipe_frozen_prtrace_fops;
10718 + entry = create_proc_entry("trigger", 0644, trace_dir);
10719 + if (entry) {
10720 + entry->read_proc = __ipipe_rd_trigger;
10721 + entry->write_proc = __ipipe_wr_trigger;
10724 + __ipipe_create_trace_proc_val(trace_dir, "pre_trace_points",
10725 + &pre_trace);
10726 + __ipipe_create_trace_proc_val(trace_dir, "post_trace_points",
10727 + &post_trace);
10728 + __ipipe_create_trace_proc_val(trace_dir, "back_trace_points",
10729 + &back_trace);
10730 + __ipipe_create_trace_proc_val(trace_dir, "verbose",
10731 + &verbose_trace);
10732 + entry = __ipipe_create_trace_proc_val(trace_dir, "enable",
10733 + &ipipe_trace_enable);
10734 +#ifdef CONFIG_IPIPE_TRACE_MCOUNT
10735 + if (entry)
10736 + entry->write_proc = __ipipe_wr_enable;
10737 +#endif /* CONFIG_IPIPE_TRACE_MCOUNT */
10739 diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
10740 index b7091d5..2997ea8 100644
10741 --- a/kernel/irq/chip.c
10742 +++ b/kernel/irq/chip.c
10743 @@ -15,6 +15,7 @@
10744 #include <linux/module.h>
10745 #include <linux/interrupt.h>
10746 #include <linux/kernel_stat.h>
10747 +#include <linux/ipipe.h>
10749 #include "internals.h"
10751 @@ -476,7 +477,9 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
10752 irqreturn_t action_ret;
10754 raw_spin_lock(&desc->lock);
10755 +#ifndef CONFIG_IPIPE
10756 mask_ack_irq(desc, irq);
10757 +#endif
10759 if (unlikely(desc->status & IRQ_INPROGRESS))
10760 goto out_unlock;
10761 @@ -553,8 +556,13 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
10763 raw_spin_lock(&desc->lock);
10764 desc->status &= ~IRQ_INPROGRESS;
10765 +#ifdef CONFIG_IPIPE
10766 + desc->chip->unmask(irq);
10767 +out:
10768 +#else
10769 out:
10770 desc->chip->eoi(irq);
10771 +#endif
10773 raw_spin_unlock(&desc->lock);
10775 @@ -596,8 +604,10 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
10776 kstat_incr_irqs_this_cpu(irq, desc);
10778 /* Start handling the irq */
10779 +#ifndef CONFIG_IPIPE
10780 if (desc->chip->ack)
10781 desc->chip->ack(irq);
10782 +#endif
10784 /* Mark the IRQ currently in progress.*/
10785 desc->status |= IRQ_INPROGRESS;
10786 @@ -650,8 +660,10 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
10788 kstat_incr_irqs_this_cpu(irq, desc);
10790 +#ifndef CONFIG_IPIPE
10791 if (desc->chip->ack)
10792 desc->chip->ack(irq);
10793 +#endif /* CONFIG_IPIPE */
10795 action_ret = handle_IRQ_event(irq, desc->action);
10796 if (!noirqdebug)
10797 @@ -661,6 +673,134 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
10798 desc->chip->eoi(irq);
10801 +#ifdef CONFIG_IPIPE
10803 +void __ipipe_ack_simple_irq(unsigned irq, struct irq_desc *desc)
10807 +void __ipipe_end_simple_irq(unsigned irq, struct irq_desc *desc)
10811 +void __ipipe_ack_level_irq(unsigned irq, struct irq_desc *desc)
10813 + mask_ack_irq(desc, irq);
10816 +void __ipipe_end_level_irq(unsigned irq, struct irq_desc *desc)
10818 + if (desc->chip->unmask)
10819 + desc->chip->unmask(irq);
10822 +void __ipipe_ack_fasteoi_irq(unsigned irq, struct irq_desc *desc)
10824 + desc->chip->eoi(irq);
10827 +void __ipipe_end_fasteoi_irq(unsigned irq, struct irq_desc *desc)
10829 + /*
10830 + * Non-requestable IRQs should not be masked in EOI handler.
10831 + */
10832 + if (!(desc->status & IRQ_NOREQUEST))
10833 + desc->chip->unmask(irq);
10836 +void __ipipe_ack_edge_irq(unsigned irq, struct irq_desc *desc)
10838 + desc->chip->ack(irq);
10841 +void __ipipe_ack_percpu_irq(unsigned irq, struct irq_desc *desc)
10843 + if (desc->chip->ack)
10844 + desc->chip->ack(irq);
10847 +void __ipipe_end_percpu_irq(unsigned irq, struct irq_desc *desc)
10849 + if (desc->chip->eoi)
10850 + desc->chip->eoi(irq);
10853 +void __ipipe_end_edge_irq(unsigned irq, struct irq_desc *desc)
10857 +void __ipipe_ack_bad_irq(unsigned irq, struct irq_desc *desc)
10859 + static int done;
10861 + handle_bad_irq(irq, desc);
10863 + if (!done) {
10864 + printk(KERN_WARNING "%s: unknown flow handler for IRQ %d\n",
10865 + __FUNCTION__, irq);
10866 + done = 1;
10870 +void __ipipe_noack_irq(unsigned irq, struct irq_desc *desc)
10874 +void __ipipe_noend_irq(unsigned irq, struct irq_desc *desc)
10878 +irq_flow_handler_t
10879 +__fixup_irq_handler(struct irq_desc *desc, irq_flow_handler_t handle, int is_chained)
10881 + if (unlikely(handle == NULL)) {
10882 + desc->ipipe_ack = &__ipipe_ack_bad_irq;
10883 + desc->ipipe_end = &__ipipe_noend_irq;
10884 + } else {
10885 + if (is_chained) {
10886 + desc->ipipe_ack = handle;
10887 + desc->ipipe_end = &__ipipe_noend_irq;
10888 + handle = __ipipe_noack_irq;
10889 + } else if (handle == &handle_simple_irq) {
10890 + desc->ipipe_ack = &__ipipe_ack_simple_irq;
10891 + desc->ipipe_end = &__ipipe_end_simple_irq;
10892 + } else if (handle == &handle_level_irq) {
10893 + desc->ipipe_ack = &__ipipe_ack_level_irq;
10894 + desc->ipipe_end = &__ipipe_end_level_irq;
10895 + } else if (handle == &handle_edge_irq) {
10896 + desc->ipipe_ack = &__ipipe_ack_edge_irq;
10897 + desc->ipipe_end = &__ipipe_end_edge_irq;
10898 + } else if (handle == &handle_fasteoi_irq) {
10899 + desc->ipipe_ack = &__ipipe_ack_fasteoi_irq;
10900 + desc->ipipe_end = &__ipipe_end_fasteoi_irq;
10901 + } else if (handle == &handle_percpu_irq) {
10902 + desc->ipipe_ack = &__ipipe_ack_percpu_irq;
10903 + desc->ipipe_end = &__ipipe_end_percpu_irq;
10904 + } else if (desc->chip == &no_irq_chip) {
10905 + desc->ipipe_ack = &__ipipe_noack_irq;
10906 + desc->ipipe_end = &__ipipe_noend_irq;
10907 + } else {
10908 + desc->ipipe_ack = &__ipipe_ack_bad_irq;
10909 + desc->ipipe_end = &__ipipe_noend_irq;
10913 + /* Suppress intermediate trampoline routine. */
10914 + ipipe_root_domain->irqs[desc->irq].acknowledge = desc->ipipe_ack;
10916 + return handle;
10919 +#else /* !CONFIG_IPIPE */
10921 +irq_flow_handler_t
10922 +__fixup_irq_handler(struct irq_desc *desc, irq_flow_handler_t handle, int is_chained)
10924 + return handle;
10927 +#endif /* !CONFIG_IPIPE */
10929 void
10930 __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
10931 const char *name)
10932 @@ -692,6 +832,8 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
10933 chip_bus_lock(irq, desc);
10934 raw_spin_lock_irqsave(&desc->lock, flags);
10936 + handle = __fixup_irq_handler(desc, handle, is_chained);
10938 /* Uninstall? */
10939 if (handle == handle_bad_irq) {
10940 if (desc->chip != &no_irq_chip)
10941 diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
10942 index 27e5c69..a7279a2 100644
10943 --- a/kernel/irq/handle.c
10944 +++ b/kernel/irq/handle.c
10945 @@ -457,8 +457,10 @@ unsigned int __do_IRQ(unsigned int irq)
10947 * No locking required for CPU-local interrupts:
10949 +#ifndef CONFIG_IPIPE
10950 if (desc->chip->ack)
10951 desc->chip->ack(irq);
10952 +#endif
10953 if (likely(!(desc->status & IRQ_DISABLED))) {
10954 action_ret = handle_IRQ_event(irq, desc->action);
10955 if (!noirqdebug)
10956 @@ -469,8 +471,10 @@ unsigned int __do_IRQ(unsigned int irq)
10959 raw_spin_lock(&desc->lock);
10960 +#ifndef CONFIG_IPIPE
10961 if (desc->chip->ack)
10962 desc->chip->ack(irq);
10963 +#endif
10965 * REPLAY is when Linux resends an IRQ that was dropped earlier
10966 * WAITING is used by probe to mark irqs that are being tested
10967 diff --git a/kernel/lockdep.c b/kernel/lockdep.c
10968 index 5428679..8c03b18 100644
10969 --- a/kernel/lockdep.c
10970 +++ b/kernel/lockdep.c
10971 @@ -2322,7 +2322,7 @@ void trace_hardirqs_on_caller(unsigned long ip)
10972 /* we'll do an OFF -> ON transition: */
10973 curr->hardirqs_enabled = 1;
10975 - if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
10976 + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !irqs_disabled_hw()))
10977 return;
10978 if (DEBUG_LOCKS_WARN_ON(current->hardirq_context))
10979 return;
10980 @@ -2365,7 +2365,7 @@ void trace_hardirqs_off_caller(unsigned long ip)
10981 if (unlikely(!debug_locks || current->lockdep_recursion))
10982 return;
10984 - if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
10985 + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !irqs_disabled_hw()))
10986 return;
10988 if (curr->hardirqs_enabled) {
10989 @@ -2397,7 +2397,7 @@ void trace_softirqs_on(unsigned long ip)
10990 if (unlikely(!debug_locks))
10991 return;
10993 - if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
10994 + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !irqs_disabled_hw()))
10995 return;
10997 if (curr->softirqs_enabled) {
10998 @@ -2431,7 +2431,7 @@ void trace_softirqs_off(unsigned long ip)
10999 if (unlikely(!debug_locks))
11000 return;
11002 - if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
11003 + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !irqs_disabled_hw()))
11004 return;
11006 if (curr->softirqs_enabled) {
11007 diff --git a/kernel/panic.c b/kernel/panic.c
11008 index 3b16cd9..4b1951f 100644
11009 --- a/kernel/panic.c
11010 +++ b/kernel/panic.c
11011 @@ -23,6 +23,7 @@
11012 #include <linux/init.h>
11013 #include <linux/nmi.h>
11014 #include <linux/dmi.h>
11015 +#include <linux/ipipe_trace.h>
11017 int panic_on_oops;
11018 static unsigned long tainted_mask;
11019 @@ -324,6 +325,8 @@ void oops_enter(void)
11021 tracing_off();
11022 /* can't trust the integrity of the kernel anymore: */
11023 + ipipe_trace_panic_freeze();
11024 + ipipe_disable_context_check(ipipe_processor_id());
11025 debug_locks_off();
11026 do_oops_enter_exit();
11028 diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
11029 index aa9e916..2852c65 100644
11030 --- a/kernel/power/hibernate.c
11031 +++ b/kernel/power/hibernate.c
11032 @@ -269,6 +269,7 @@ static int create_image(int platform_mode)
11033 goto Enable_cpus;
11035 local_irq_disable();
11036 + local_irq_disable_hw_cond();
11038 error = sysdev_suspend(PMSG_FREEZE);
11039 if (error) {
11040 @@ -298,6 +299,7 @@ static int create_image(int platform_mode)
11043 Enable_irqs:
11044 + local_irq_enable_hw_cond();
11045 local_irq_enable();
11047 Enable_cpus:
11048 @@ -393,6 +395,7 @@ static int resume_target_kernel(bool platform_mode)
11049 goto Enable_cpus;
11051 local_irq_disable();
11052 + local_irq_disable_hw_cond();
11054 error = sysdev_suspend(PMSG_QUIESCE);
11055 if (error)
11056 @@ -424,6 +427,7 @@ static int resume_target_kernel(bool platform_mode)
11057 sysdev_resume();
11059 Enable_irqs:
11060 + local_irq_enable_hw_cond();
11061 local_irq_enable();
11063 Enable_cpus:
11064 @@ -510,6 +514,7 @@ int hibernation_platform_enter(void)
11065 goto Platform_finish;
11067 local_irq_disable();
11068 + local_irq_disable_hw_cond();
11069 sysdev_suspend(PMSG_HIBERNATE);
11070 hibernation_ops->enter();
11071 /* We should never get here */
11072 diff --git a/kernel/printk.c b/kernel/printk.c
11073 index 444b770..60e2a08 100644
11074 --- a/kernel/printk.c
11075 +++ b/kernel/printk.c
11076 @@ -576,6 +576,41 @@ static int have_callable_console(void)
11077 return 0;
11080 +#ifdef CONFIG_IPIPE
11082 +static IPIPE_DEFINE_SPINLOCK(__ipipe_printk_lock);
11084 +static int __ipipe_printk_fill;
11086 +static char __ipipe_printk_buf[__LOG_BUF_LEN];
11088 +void __ipipe_flush_printk (unsigned virq, void *cookie)
11090 + char *p = __ipipe_printk_buf;
11091 + int len, lmax, out = 0;
11092 + unsigned long flags;
11094 + goto start;
11096 + do {
11097 + spin_unlock_irqrestore(&__ipipe_printk_lock, flags);
11098 + start:
11099 + lmax = __ipipe_printk_fill;
11100 + while (out < lmax) {
11101 + len = strlen(p) + 1;
11102 + printk("%s",p);
11103 + p += len;
11104 + out += len;
11106 + spin_lock_irqsave(&__ipipe_printk_lock, flags);
11108 + while (__ipipe_printk_fill != lmax);
11110 + __ipipe_printk_fill = 0;
11112 + spin_unlock_irqrestore(&__ipipe_printk_lock, flags);
11116 * printk - print a kernel message
11117 * @fmt: format string
11118 @@ -600,6 +635,65 @@ static int have_callable_console(void)
11120 asmlinkage int printk(const char *fmt, ...)
11122 + int r, fbytes, oldcount;
11123 + unsigned long flags;
11124 + int sprintk = 1;
11125 + int cs = -1;
11126 + va_list args;
11128 + va_start(args, fmt);
11130 + local_irq_save_hw(flags);
11132 + if (test_bit(IPIPE_SPRINTK_FLAG, &__ipipe_current_domain->flags) ||
11133 + oops_in_progress)
11134 + cs = ipipe_disable_context_check(ipipe_processor_id());
11135 + else if (__ipipe_current_domain == ipipe_root_domain) {
11136 + struct ipipe_domain *dom;
11138 + list_for_each_entry(dom, &__ipipe_pipeline, p_link) {
11139 + if (dom == ipipe_root_domain)
11140 + break;
11141 + if (test_bit(IPIPE_STALL_FLAG,
11142 + &ipipe_cpudom_var(dom, status)))
11143 + sprintk = 0;
11145 + } else
11146 + sprintk = 0;
11148 + local_irq_restore_hw(flags);
11150 + if (sprintk) {
11151 + r = vprintk(fmt, args);
11152 + if (cs != -1)
11153 + ipipe_restore_context_check(ipipe_processor_id(), cs);
11154 + goto out;
11157 + spin_lock_irqsave(&__ipipe_printk_lock, flags);
11159 + oldcount = __ipipe_printk_fill;
11160 + fbytes = __LOG_BUF_LEN - oldcount;
11162 + if (fbytes > 1) {
11163 + r = vscnprintf(__ipipe_printk_buf + __ipipe_printk_fill,
11164 + fbytes, fmt, args) + 1; /* account for the null byte */
11165 + __ipipe_printk_fill += r;
11166 + } else
11167 + r = 0;
11169 + spin_unlock_irqrestore(&__ipipe_printk_lock, flags);
11171 + if (oldcount == 0)
11172 + ipipe_trigger_irq(__ipipe_printk_virq);
11173 +out:
11174 + va_end(args);
11176 + return r;
11178 +#else /* !CONFIG_IPIPE */
11179 +asmlinkage int printk(const char *fmt, ...)
11181 va_list args;
11182 int r;
11184 @@ -617,6 +711,7 @@ asmlinkage int printk(const char *fmt, ...)
11186 return r;
11188 +#endif /* CONFIG_IPIPE */
11190 /* cpu currently holding logbuf_lock */
11191 static volatile unsigned int printk_cpu = UINT_MAX;
11192 diff --git a/kernel/sched.c b/kernel/sched.c
11193 index 4fa520b..c5cabba 100644
11194 --- a/kernel/sched.c
11195 +++ b/kernel/sched.c
11196 @@ -2283,7 +2283,8 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
11198 smp_wmb();
11199 rq = task_rq_lock(p, &flags);
11200 - if (!(p->state & state))
11201 + if (!(p->state & state) ||
11202 + (p->state & (TASK_NOWAKEUP|TASK_ATOMICSWITCH)))
11203 goto out;
11205 if (p->se.on_rq)
11206 @@ -2752,22 +2753,29 @@ asmlinkage void schedule_tail(struct tas
11207 #endif
11208 if (current->set_child_tid)
11209 put_user(task_pid_vnr(current), current->set_child_tid);
11211 + ipipe_init_notify(current);
11215 * context_switch - switch to the new MM and the new
11216 * thread's register state.
11218 -static inline void
11219 +int
11220 context_switch(struct rq *rq, struct task_struct *prev,
11221 struct task_struct *next)
11223 struct mm_struct *mm, *oldmm;
11225 - prepare_task_switch(rq, prev, next);
11226 - trace_sched_switch(prev, next);
11227 mm = next->mm;
11228 oldmm = prev->active_mm;
11230 +if (!rq) {
11231 + switch_mm(oldmm, next->active_mm, next);
11232 + if (!mm) enter_lazy_tlb(oldmm, next);
11233 +} else {
11234 + prepare_task_switch(rq, prev, next);
11235 + trace_sched_switch(prev, next);
11237 * For paravirt, this is coupled with an exit in switch_to to
11238 * combine the page table reload and the switch backend into
11239 @@ -2795,11 +2803,24 @@ context_switch(struct rq *rq, struct tas
11240 #ifndef __ARCH_WANT_UNLOCKED_CTXSW
11241 spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
11242 #endif
11245 +#ifdef CONFIG_IPIPE
11246 + next->ptd[IPIPE_ROOT_NPTDKEYS - 1] = prev;
11247 +#endif /* CONFIG_IPIPE */
11248 /* Here we just switch the register state and the stack. */
11249 switch_to(prev, next, prev);
11251 barrier();
11253 +if (unlikely(rq)) {
11254 +#if 1 // def CONFIG_IPIPE_DELAYED_ATOMICSW
11255 + current->state &= ~TASK_ATOMICSWITCH;
11256 +#else
11257 + prev->state &= ~TASK_ATOMICSWITCH;
11258 +#endif
11259 + if (task_hijacked(prev))
11260 + return 1; __ipipe_dispatch_event(IPIPE_FIRST_EVENT - 2, 0);
11263 * this_rq must be evaluated again because prev may have moved
11264 * CPUs since it called schedule(), thus the 'rq' on its stack
11265 @@ -2807,6 +2828,10 @@ context_switch(struct rq *rq, struct tas
11267 finish_task_switch(this_rq(), prev);
11269 + return 0;
11272 +EXPORT_SYMBOL(context_switch);
11275 * nr_running, nr_uninterruptible and nr_context_switches:
11276 @@ -3443,6 +3468,7 @@ notrace unsigned long get_parent_ip(unsi
11278 void __kprobes add_preempt_count(int val)
11280 + ipipe_check_context(ipipe_root_domain);
11281 #ifdef CONFIG_DEBUG_PREEMPT
11283 * Underflow?
11284 @@ -3465,6 +3491,7 @@ EXPORT_SYMBOL(add_preempt_count);
11286 void __kprobes sub_preempt_count(int val)
11288 + ipipe_check_context(ipipe_root_domain);
11289 #ifdef CONFIG_DEBUG_PREEMPT
11291 * Underflow?
11292 @@ -3513,6 +3540,7 @@ static noinline void __schedule_bug(stru
11294 static inline void schedule_debug(struct task_struct *prev)
11296 + ipipe_check_context(ipipe_root_domain);
11298 * Test if we are atomic. Since do_exit() needs to call into
11299 * schedule() atomically, we ignore that path for now.
11300 @@ -3575,7 +3603,7 @@ pick_next_task(struct rq *rq)
11302 * schedule() is the main scheduler function.
11304 -asmlinkage void __sched schedule(void)
11305 +asmlinkage int __sched schedule(void)
11307 struct task_struct *prev, *next;
11308 unsigned long *switch_count;
11309 @@ -3589,6 +3617,9 @@ need_resched:
11310 rcu_note_context_switch(cpu);
11311 prev = rq->curr;
11312 switch_count = &prev->nivcsw;
11313 + if (unlikely(prev->state & TASK_ATOMICSWITCH))
11314 + /* Pop one disable level -- one still remains. */
11315 + preempt_enable();
11317 release_kernel_lock(prev);
11318 need_resched_nonpreemptible:
11319 @@ -3625,15 +3656,18 @@ need_resched_nonpreemptible:
11320 rq->curr = next;
11321 ++*switch_count;
11323 - context_switch(rq, prev, next); /* unlocks the rq */
11324 + if (context_switch(rq, prev, next)) /* unlocks the rq */
11325 + return 1; /* task hijacked by higher domain */
11327 * the context switch might have flipped the stack from under
11328 * us, hence refresh the local variables.
11330 cpu = smp_processor_id();
11331 rq = cpu_rq(cpu);
11332 - } else
11333 + } else {
11334 + prev->state &= ~TASK_ATOMICSWITCH;
11335 raw_spin_unlock_irq(&rq->lock);
11338 post_schedule(rq);
11340 @@ -3646,6 +3680,8 @@ need_resched_nonpreemptible:
11341 preempt_enable_no_resched();
11342 if (need_resched())
11343 goto need_resched;
11345 + return 0;
11347 EXPORT_SYMBOL(schedule);
11349 @@ -3737,7 +3773,8 @@ asmlinkage void __sched preempt_schedule
11351 do {
11352 add_preempt_count(PREEMPT_ACTIVE);
11353 - schedule();
11354 + if (schedule())
11355 + return;
11356 sub_preempt_count(PREEMPT_ACTIVE);
11359 @@ -4522,6 +4559,7 @@ recheck:
11360 oldprio = p->prio;
11361 prev_class = p->sched_class;
11362 __setscheduler(rq, p, policy, param->sched_priority);
11363 + ipipe_setsched_notify(p);
11365 if (running)
11366 p->sched_class->set_curr_task(rq);
11367 @@ -5178,6 +5216,7 @@ void __cpuinit init_idle(struct task_str
11368 #else
11369 task_thread_info(idle)->preempt_count = 0;
11370 #endif
11371 + ipipe_check_context(ipipe_root_domain);
11373 * The idle tasks have their own, simple scheduling class:
11375 @@ -8977,3 +9016,65 @@ void synchronize_sched_expedited(void)
11376 EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
11378 #endif /* #else #ifndef CONFIG_SMP */
11380 +#ifdef CONFIG_IPIPE
11382 +int ipipe_setscheduler_root(struct task_struct *p, int policy, int prio)
11384 + const struct sched_class *prev_class;
11385 + int oldprio, on_rq, running;
11386 + unsigned long flags;
11387 + struct rq *rq;
11389 + raw_spin_lock_irqsave(&p->pi_lock, flags);
11390 + rq = __task_rq_lock(p);
11391 + update_rq_clock(rq);
11392 + on_rq = p->se.on_rq;
11393 + running = task_current(rq, p);
11394 + if (on_rq)
11395 + deactivate_task(rq, p, 0);
11396 + if (running)
11397 + p->sched_class->put_prev_task(rq, p);
11399 + p->sched_reset_on_fork = 0;
11401 + oldprio = p->prio;
11402 + prev_class = p->sched_class;
11403 + __setscheduler(rq, p, policy, prio);
11404 + ipipe_setsched_notify(p);
11406 + if (running)
11407 + p->sched_class->set_curr_task(rq);
11408 + if (on_rq) {
11409 + activate_task(rq, p, 0);
11411 + check_class_changed(rq, p, prev_class, oldprio, running);
11413 + __task_rq_unlock(rq);
11414 + raw_spin_unlock_irqrestore(&p->pi_lock, flags);
11416 + rt_mutex_adjust_pi(p);
11418 + return 0;
11420 +EXPORT_SYMBOL_GPL(ipipe_setscheduler_root);
11422 +int ipipe_reenter_root(struct task_struct *prev, int policy, int prio)
11424 + struct rq *rq = this_rq();
11426 + finish_task_switch(rq, prev);
11428 + post_schedule(rq);
11430 + (void)reacquire_kernel_lock(current);
11431 + preempt_enable_no_resched();
11433 + if (current->policy != policy || current->rt_priority != prio)
11434 + return ipipe_setscheduler_root(current, policy, prio);
11436 + return 0;
11438 +EXPORT_SYMBOL_GPL(ipipe_reenter_root);
11440 +#endif /* CONFIG_IPIPE */
11441 diff --git a/kernel/signal.c b/kernel/signal.c
11442 index bded651..d4495ce 100644
11443 --- a/kernel/signal.c
11444 +++ b/kernel/signal.c
11445 @@ -558,6 +558,7 @@ void signal_wake_up(struct task_struct *t, int resume)
11446 unsigned int mask;
11448 set_tsk_thread_flag(t, TIF_SIGPENDING);
11449 + ipipe_sigwake_notify(t); /* TIF_SIGPENDING must be set first. */
11452 * For SIGKILL, we want to wake it up in the stopped/traced/killable
11453 diff --git a/kernel/spinlock.c b/kernel/spinlock.c
11454 index be6517f..862aed4 100644
11455 --- a/kernel/spinlock.c
11456 +++ b/kernel/spinlock.c
11457 @@ -26,7 +26,9 @@
11458 * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
11459 * not re-enabled during lock-acquire (which the preempt-spin-ops do):
11461 -#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)
11462 +#if !defined(CONFIG_GENERIC_LOCKBREAK) || \
11463 + defined(CONFIG_DEBUG_LOCK_ALLOC) || \
11464 + defined(CONFIG_IPIPE)
11466 * The __lock_function inlines are taken from
11467 * include/linux/spinlock_api_smp.h
11468 diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
11469 index b6b898d..75c2031 100644
11470 --- a/kernel/time/tick-common.c
11471 +++ b/kernel/time/tick-common.c
11472 @@ -69,7 +69,7 @@ static void tick_periodic(int cpu)
11473 write_sequnlock(&xtime_lock);
11476 - update_process_times(user_mode(get_irq_regs()));
11477 + update_root_process_times(get_irq_regs());
11478 profile_tick(CPU_PROFILING);
11481 @@ -177,6 +177,10 @@ static void tick_setup_device(struct tick_device *td,
11483 td->evtdev = newdev;
11485 + /* I-pipe: derive global tick IRQ from CPU 0 */
11486 + if (cpu == 0)
11487 + ipipe_update_tick_evtdev(newdev);
11490 * When the device is not per cpu, pin the interrupt to the
11491 * current cpu:
11492 diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
11493 index f898af6..d4c82b5 100644
11494 --- a/kernel/time/tick-sched.c
11495 +++ b/kernel/time/tick-sched.c
11496 @@ -604,7 +604,7 @@ static void tick_nohz_handler(struct clock_event_device *dev)
11497 ts->idle_jiffies++;
11500 - update_process_times(user_mode(regs));
11501 + update_root_process_times(regs);
11502 profile_tick(CPU_PROFILING);
11504 while (tick_nohz_reprogram(ts, now)) {
11505 @@ -764,7 +764,7 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
11506 touch_softlockup_watchdog();
11507 ts->idle_jiffies++;
11509 - update_process_times(user_mode(regs));
11510 + update_root_process_times(regs);
11511 profile_tick(CPU_PROFILING);
11514 diff --git a/kernel/timer.c b/kernel/timer.c
11515 index ee305c8..f595adb 100644
11516 --- a/kernel/timer.c
11517 +++ b/kernel/timer.c
11518 @@ -1269,6 +1269,25 @@ void update_process_times(int user_tick)
11519 run_posix_cpu_timers(p);
11522 +#ifdef CONFIG_IPIPE
11524 +void update_root_process_times(struct pt_regs *regs)
11526 + int cpu, user_tick = user_mode(regs);
11528 + if (__ipipe_root_tick_p(regs)) {
11529 + update_process_times(user_tick);
11530 + return;
11533 + run_local_timers();
11534 + cpu = smp_processor_id();
11535 + rcu_check_callbacks(cpu, user_tick);
11536 + run_posix_cpu_timers(current);
11539 +#endif
11542 * This function runs timers and the timer-tq in bottom half context.
11544 diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
11545 index 6b316b3..ad342e8 100644
11546 --- a/kernel/trace/ftrace.c
11547 +++ b/kernel/trace/ftrace.c
11548 @@ -29,6 +29,7 @@
11549 #include <linux/list.h>
11550 #include <linux/hash.h>
11551 #include <linux/rcupdate.h>
11552 +#include <linux/ipipe.h>
11554 #include <trace/events/sched.h>
11556 @@ -1157,6 +1158,9 @@ static int __ftrace_modify_code(void *data)
11558 static void ftrace_run_update_code(int command)
11560 +#ifdef CONFIG_IPIPE
11561 + unsigned long flags;
11562 +#endif /* CONFIG_IPIPE */
11563 int ret;
11565 ret = ftrace_arch_code_modify_prepare();
11566 @@ -1164,7 +1168,13 @@ static void ftrace_run_update_code(int command)
11567 if (ret)
11568 return;
11570 +#ifdef CONFIG_IPIPE
11571 + flags = ipipe_critical_enter(NULL);
11572 + __ftrace_modify_code(&command);
11573 + ipipe_critical_exit(flags);
11574 +#else /* !CONFIG_IPIPE */
11575 stop_machine(__ftrace_modify_code, &command, NULL);
11576 +#endif /* !CONFIG_IPIPE */
11578 ret = ftrace_arch_code_modify_post_process();
11579 FTRACE_WARN_ON(ret);
11580 @@ -2684,9 +2694,9 @@ static int ftrace_process_locs(struct module *mod,
11583 /* disable interrupts to prevent kstop machine */
11584 - local_irq_save(flags);
11585 + local_irq_save_hw_notrace(flags);
11586 ftrace_update_code(mod);
11587 - local_irq_restore(flags);
11588 + local_irq_restore_hw_notrace(flags);
11589 mutex_unlock(&ftrace_lock);
11591 return 0;
11592 @@ -2765,9 +2775,9 @@ void __init ftrace_init(void)
11593 /* Keep the ftrace pointer to the stub */
11594 addr = (unsigned long)ftrace_stub;
11596 - local_irq_save(flags);
11597 + local_irq_save_hw_notrace(flags);
11598 ftrace_dyn_arch_init(&addr);
11599 - local_irq_restore(flags);
11600 + local_irq_restore_hw_notrace(flags);
11602 /* ftrace_dyn_arch_init places the return code in addr */
11603 if (addr)
11604 diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
11605 index e722e9d..3b0413c 100644
11606 --- a/lib/Kconfig.debug
11607 +++ b/lib/Kconfig.debug
11608 @@ -137,6 +137,8 @@ config DEBUG_SECTION_MISMATCH
11609 - Enable verbose reporting from modpost to help solving
11610 the section mismatches reported.
11612 +source "kernel/ipipe/Kconfig.debug"
11614 config DEBUG_KERNEL
11615 bool "Kernel debugging"
11616 help
11617 diff --git a/lib/bust_spinlocks.c b/lib/bust_spinlocks.c
11618 index 9681d54..2dba50c 100644
11619 --- a/lib/bust_spinlocks.c
11620 +++ b/lib/bust_spinlocks.c
11621 @@ -13,6 +13,7 @@
11622 #include <linux/wait.h>
11623 #include <linux/vt_kern.h>
11624 #include <linux/console.h>
11625 +#include <linux/ipipe_trace.h>
11628 void __attribute__((weak)) bust_spinlocks(int yes)
11629 @@ -24,6 +25,7 @@ void __attribute__((weak)) bust_spinlocks(int yes)
11630 unblank_screen();
11631 #endif
11632 console_unblank();
11633 + ipipe_trace_panic_dump();
11634 if (--oops_in_progress == 0)
11635 wake_up_klogd();
11637 diff --git a/lib/ioremap.c b/lib/ioremap.c
11638 index 14c6078..a275469 100644
11639 --- a/lib/ioremap.c
11640 +++ b/lib/ioremap.c
11641 @@ -85,8 +85,8 @@ int ioremap_page_range(unsigned long addr,
11642 if (err)
11643 break;
11644 } while (pgd++, addr = next, addr != end);
11646 - flush_cache_vmap(start, end);
11647 + __ipipe_pin_range_globally(start, end);
11648 + flush_cache_vmap(start, end);
11650 return err;
11652 diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
11653 index 4689cb0..3d12764 100644
11654 --- a/lib/smp_processor_id.c
11655 +++ b/lib/smp_processor_id.c
11656 @@ -12,10 +12,13 @@ notrace unsigned int debug_smp_processor_id(void)
11657 unsigned long preempt_count = preempt_count();
11658 int this_cpu = raw_smp_processor_id();
11660 + if (!ipipe_root_domain_p)
11661 + goto out;
11663 if (likely(preempt_count))
11664 goto out;
11666 - if (irqs_disabled())
11667 + if (irqs_disabled() || irqs_disabled_hw())
11668 goto out;
11671 diff --git a/mm/memory.c b/mm/memory.c
11672 index 7550758..e96effb 100644
11673 --- a/mm/memory.c
11674 +++ b/mm/memory.c
11675 @@ -642,6 +642,32 @@ out:
11676 return pfn_to_page(pfn);
11679 +static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma)
11681 + /*
11682 + * If the source page was a PFN mapping, we don't have
11683 + * a "struct page" for it. We do a best-effort copy by
11684 + * just copying from the original user address. If that
11685 + * fails, we just zero-fill it. Live with it.
11686 + */
11687 + if (unlikely(!src)) {
11688 + void *kaddr = kmap_atomic(dst, KM_USER0);
11689 + void __user *uaddr = (void __user *)(va & PAGE_MASK);
11691 + /*
11692 + * This really shouldn't fail, because the page is there
11693 + * in the page tables. But it might just be unreadable,
11694 + * in which case we just give up and fill the result with
11695 + * zeroes.
11696 + */
11697 + if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE))
11698 + memset(kaddr, 0, PAGE_SIZE);
11699 + kunmap_atomic(kaddr, KM_USER0);
11700 + flush_dcache_page(dst);
11701 + } else
11702 + copy_user_highpage(dst, src, va, vma);
11706 * copy one vm_area from one task to the other. Assumes the page tables
11707 * already present in the new task to be cleared in the whole range
11708 @@ -650,8 +676,8 @@ out:
11710 static inline unsigned long
11711 copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
11712 - pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
11713 - unsigned long addr, int *rss)
11714 + pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
11715 + unsigned long addr, int *rss, struct page *uncow_page)
11717 unsigned long vm_flags = vma->vm_flags;
11718 pte_t pte = *src_pte;
11719 @@ -694,6 +720,21 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
11720 * in the parent and the child
11722 if (is_cow_mapping(vm_flags)) {
11723 +#ifdef CONFIG_IPIPE
11724 + if (uncow_page) {
11725 + struct page *old_page = vm_normal_page(vma, addr, pte);
11726 + cow_user_page(uncow_page, old_page, addr, vma);
11727 + pte = mk_pte(uncow_page, vma->vm_page_prot);
11729 + if (vm_flags & VM_SHARED)
11730 + pte = pte_mkclean(pte);
11731 + pte = pte_mkold(pte);
11733 + page_add_new_anon_rmap(uncow_page, vma, addr);
11734 + rss[!!PageAnon(uncow_page)]++;
11735 + goto out_set_pte;
11737 +#endif /* CONFIG_IPIPE */
11738 ptep_set_wrprotect(src_mm, addr, src_pte);
11739 pte = pte_wrprotect(pte);
11741 @@ -731,13 +772,27 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
11742 int progress = 0;
11743 int rss[NR_MM_COUNTERS];
11744 swp_entry_t entry = (swp_entry_t){0};
11746 + struct page *uncow_page = NULL;
11747 +#ifdef CONFIG_IPIPE
11748 + int do_cow_break = 0;
11749 +again:
11750 + if (do_cow_break) {
11751 + uncow_page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
11752 + if (uncow_page == NULL)
11753 + return -ENOMEM;
11754 + do_cow_break = 0;
11756 +#else
11757 again:
11758 +#endif
11759 init_rss_vec(rss);
11761 dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
11762 - if (!dst_pte)
11763 + if (!dst_pte) {
11764 + if (uncow_page)
11765 + page_cache_release(uncow_page);
11766 return -ENOMEM;
11768 src_pte = pte_offset_map_nested(src_pmd, addr);
11769 src_ptl = pte_lockptr(src_mm, src_pmd);
11770 spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
11771 @@ -760,8 +815,25 @@ again:
11772 progress++;
11773 continue;
11775 +#ifdef CONFIG_IPIPE
11776 + if (likely(uncow_page == NULL) && likely(pte_present(*src_pte))) {
11777 + if (is_cow_mapping(vma->vm_flags) &&
11778 + test_bit(MMF_VM_PINNED, &src_mm->flags) &&
11779 + ((vma->vm_flags|src_mm->def_flags) & VM_LOCKED)) {
11780 + arch_leave_lazy_mmu_mode();
11781 + spin_unlock(src_ptl);
11782 + pte_unmap_nested(src_pte);
11783 + add_mm_rss_vec(dst_mm, rss);
11784 + pte_unmap_unlock(dst_pte, dst_ptl);
11785 + cond_resched();
11786 + do_cow_break = 1;
11787 + goto again;
11790 +#endif
11791 entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte,
11792 - vma, addr, rss);
11793 + vma, addr, rss, uncow_page);
11794 + uncow_page = NULL;
11795 if (entry.val)
11796 break;
11797 progress += 8;
11798 @@ -2063,32 +2135,6 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
11799 return pte;
11802 -static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma)
11804 - /*
11805 - * If the source page was a PFN mapping, we don't have
11806 - * a "struct page" for it. We do a best-effort copy by
11807 - * just copying from the original user address. If that
11808 - * fails, we just zero-fill it. Live with it.
11809 - */
11810 - if (unlikely(!src)) {
11811 - void *kaddr = kmap_atomic(dst, KM_USER0);
11812 - void __user *uaddr = (void __user *)(va & PAGE_MASK);
11814 - /*
11815 - * This really shouldn't fail, because the page is there
11816 - * in the page tables. But it might just be unreadable,
11817 - * in which case we just give up and fill the result with
11818 - * zeroes.
11819 - */
11820 - if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE))
11821 - memset(kaddr, 0, PAGE_SIZE);
11822 - kunmap_atomic(kaddr, KM_USER0);
11823 - flush_dcache_page(dst);
11824 - } else
11825 - copy_user_highpage(dst, src, va, vma);
11829 * This routine handles present pages, when users try to write
11830 * to a shared page. It is done by copying the page to a new address
11831 @@ -3589,3 +3635,111 @@ void might_fault(void)
11833 EXPORT_SYMBOL(might_fault);
11834 #endif
11836 +#ifdef CONFIG_IPIPE
11838 +static inline int ipipe_pin_pte_range(struct mm_struct *mm, pmd_t *pmd,
11839 + struct vm_area_struct *vma,
11840 + unsigned long addr, unsigned long end)
11842 + spinlock_t *ptl;
11843 + pte_t *pte;
11845 + do {
11846 + pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
11847 + if (!pte)
11848 + continue;
11850 + if (!pte_present(*pte) || pte_write(*pte)) {
11851 + pte_unmap_unlock(pte, ptl);
11852 + continue;
11855 + if (do_wp_page(mm, vma, addr, pte, pmd, ptl, *pte) == VM_FAULT_OOM)
11856 + return -ENOMEM;
11857 + } while (addr += PAGE_SIZE, addr != end);
11858 + return 0;
11861 +static inline int ipipe_pin_pmd_range(struct mm_struct *mm, pud_t *pud,
11862 + struct vm_area_struct *vma,
11863 + unsigned long addr, unsigned long end)
11865 + unsigned long next;
11866 + pmd_t *pmd;
11868 + pmd = pmd_offset(pud, addr);
11869 + do {
11870 + next = pmd_addr_end(addr, end);
11871 + if (pmd_none_or_clear_bad(pmd))
11872 + continue;
11873 + if (ipipe_pin_pte_range(mm, pmd, vma, addr, next))
11874 + return -ENOMEM;
11875 + } while (pmd++, addr = next, addr != end);
11876 + return 0;
11879 +static inline int ipipe_pin_pud_range(struct mm_struct *mm, pgd_t *pgd,
11880 + struct vm_area_struct *vma,
11881 + unsigned long addr, unsigned long end)
11883 + unsigned long next;
11884 + pud_t *pud;
11886 + pud = pud_offset(pgd, addr);
11887 + do {
11888 + next = pud_addr_end(addr, end);
11889 + if (pud_none_or_clear_bad(pud))
11890 + continue;
11891 + if (ipipe_pin_pmd_range(mm, pud, vma, addr, next))
11892 + return -ENOMEM;
11893 + } while (pud++, addr = next, addr != end);
11894 + return 0;
11897 +int ipipe_disable_ondemand_mappings(struct task_struct *tsk)
11899 + unsigned long addr, next, end;
11900 + struct vm_area_struct *vma;
11901 + struct mm_struct *mm;
11902 + int result = 0;
11903 + pgd_t *pgd;
11905 + mm = get_task_mm(tsk);
11906 + if (!mm)
11907 + return -EPERM;
11909 + down_write(&mm->mmap_sem);
11910 + if (test_bit(MMF_VM_PINNED, &mm->flags))
11911 + goto done_mm;
11913 + for (vma = mm->mmap; vma; vma = vma->vm_next) {
11914 + if (!is_cow_mapping(vma->vm_flags)
11915 + || !(vma->vm_flags & VM_WRITE))
11916 + continue;
11918 + addr = vma->vm_start;
11919 + end = vma->vm_end;
11921 + pgd = pgd_offset(mm, addr);
11922 + do {
11923 + next = pgd_addr_end(addr, end);
11924 + if (pgd_none_or_clear_bad(pgd))
11925 + continue;
11926 + if (ipipe_pin_pud_range(mm, pgd, vma, addr, next)) {
11927 + result = -ENOMEM;
11928 + goto done_mm;
11930 + } while (pgd++, addr = next, addr != end);
11932 + set_bit(MMF_VM_PINNED, &mm->flags);
11934 + done_mm:
11935 + up_write(&mm->mmap_sem);
11936 + mmput(mm);
11937 + return result;
11940 +EXPORT_SYMBOL(ipipe_disable_ondemand_mappings);
11942 +#endif
11943 diff --git a/mm/mmu_context.c b/mm/mmu_context.c
11944 index 9e82e93..a4bd34d 100644
11945 --- a/mm/mmu_context.c
11946 +++ b/mm/mmu_context.c
11947 @@ -24,15 +24,18 @@ void use_mm(struct mm_struct *mm)
11949 struct mm_struct *active_mm;
11950 struct task_struct *tsk = current;
11951 + unsigned long flags;
11953 task_lock(tsk);
11954 active_mm = tsk->active_mm;
11955 + ipipe_mm_switch_protect(flags);
11956 if (active_mm != mm) {
11957 atomic_inc(&mm->mm_count);
11958 tsk->active_mm = mm;
11960 tsk->mm = mm;
11961 - switch_mm(active_mm, mm, tsk);
11962 + __switch_mm(active_mm, mm, tsk);
11963 + ipipe_mm_switch_unprotect(flags);
11964 task_unlock(tsk);
11966 if (active_mm != mm)
11967 diff --git a/mm/vmalloc.c b/mm/vmalloc.c
11968 index ae00746..ca1597c 100644
11969 --- a/mm/vmalloc.c
11970 +++ b/mm/vmalloc.c
11971 @@ -172,6 +172,8 @@ static int vmap_page_range_noflush(unsigned long start, unsigned long end,
11972 return err;
11973 } while (pgd++, addr = next, addr != end);
11975 + __ipipe_pin_range_globally(start, end);
11977 return nr;