* add p cc
[mascara-docs.git] / i386 / linux / linux-2.3.21 / arch / sparc64 / kernel / smp.c
blobd1adeb2c70b0224d5d78c34a00067c5ef22cef33
1 /* smp.c: Sparc64 SMP support.
3 * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
4 */
6 #include <linux/kernel.h>
7 #include <linux/sched.h>
8 #include <linux/mm.h>
9 #include <linux/pagemap.h>
10 #include <linux/threads.h>
11 #include <linux/smp.h>
12 #include <linux/smp_lock.h>
13 #include <linux/interrupt.h>
14 #include <linux/kernel_stat.h>
15 #include <linux/delay.h>
16 #include <linux/init.h>
17 #include <linux/spinlock.h>
19 #include <asm/head.h>
20 #include <asm/ptrace.h>
21 #include <asm/atomic.h>
23 #include <asm/irq.h>
24 #include <asm/page.h>
25 #include <asm/pgtable.h>
26 #include <asm/oplib.h>
27 #include <asm/hardirq.h>
28 #include <asm/softirq.h>
29 #include <asm/uaccess.h>
30 #include <asm/timer.h>
32 #define __KERNEL_SYSCALLS__
33 #include <linux/unistd.h>
35 extern int linux_num_cpus;
36 extern void calibrate_delay(void);
37 extern unsigned prom_cpu_nodes[];
39 struct cpuinfo_sparc cpu_data[NR_CPUS] __attribute__ ((aligned (64)));
41 volatile int cpu_number_map[NR_CPUS] __attribute__ ((aligned (64)));
42 volatile int __cpu_logical_map[NR_CPUS] __attribute__ ((aligned (64)));
44 /* Please don't make this stuff initdata!!! --DaveM */
45 static unsigned char boot_cpu_id = 0;
46 static int smp_activated = 0;
48 /* Kernel spinlock */
49 spinlock_t kernel_flag = SPIN_LOCK_UNLOCKED;
51 volatile int smp_processors_ready = 0;
52 unsigned long cpu_present_map = 0;
53 int smp_num_cpus = 1;
54 int smp_threads_ready = 0;
56 void __init smp_setup(char *str, int *ints)
58 /* XXX implement me XXX */
61 int smp_info(char *buf)
63 int len = 7, i;
65 strcpy(buf, "State:\n");
66 for (i = 0; i < NR_CPUS; i++)
67 if(cpu_present_map & (1UL << i))
68 len += sprintf(buf + len,
69 "CPU%d:\t\tonline\n", i);
70 return len;
73 int smp_bogo(char *buf)
75 int len = 0, i;
77 for (i = 0; i < NR_CPUS; i++)
78 if(cpu_present_map & (1UL << i))
79 len += sprintf(buf + len,
80 "Cpu%dBogo\t: %lu.%02lu\n",
81 i, cpu_data[i].udelay_val / 500000,
82 (cpu_data[i].udelay_val / 5000) % 100);
83 return len;
86 void __init smp_store_cpu_info(int id)
88 int i;
90 cpu_data[id].irq_count = 0;
91 cpu_data[id].bh_count = 0;
92 /* multiplier and counter set by
93 smp_setup_percpu_timer() */
94 cpu_data[id].udelay_val = loops_per_sec;
96 cpu_data[id].pgcache_size = 0;
97 cpu_data[id].pte_cache = NULL;
98 cpu_data[id].pgdcache_size = 0;
99 cpu_data[id].pgd_cache = NULL;
100 cpu_data[id].idle_volume = 1;
102 for(i = 0; i < 16; i++)
103 cpu_data[id].irq_worklists[i] = 0;
106 void __init smp_commence(void)
110 static void smp_setup_percpu_timer(void);
111 static void smp_tune_scheduling(void);
113 static volatile unsigned long callin_flag = 0;
115 extern void inherit_locked_prom_mappings(int save_p);
116 extern void cpu_probe(void);
118 void __init smp_callin(void)
120 int cpuid = hard_smp_processor_id();
122 inherit_locked_prom_mappings(0);
124 __flush_cache_all();
125 __flush_tlb_all();
127 cpu_probe();
129 /* Master did this already, now is the time for us to do it. */
130 __asm__ __volatile__("
131 sethi %%hi(0x80000000), %%g1
132 sllx %%g1, 32, %%g1
133 rd %%tick, %%g2
134 add %%g2, 6, %%g2
135 andn %%g2, %%g1, %%g2
136 wrpr %%g2, 0, %%tick
137 " : /* no outputs */
138 : /* no inputs */
139 : "g1", "g2");
141 smp_setup_percpu_timer();
143 __sti();
145 calibrate_delay();
146 smp_store_cpu_info(cpuid);
147 callin_flag = 1;
148 __asm__ __volatile__("membar #Sync\n\t"
149 "flush %%g6" : : : "memory");
151 /* Clear this or we will die instantly when we
152 * schedule back to this idler...
154 current->thread.flags &= ~(SPARC_FLAG_NEWCHILD);
156 /* Attach to the address space of init_task. */
157 atomic_inc(&init_mm.mm_count);
158 current->active_mm = &init_mm;
160 while(!smp_processors_ready)
161 membar("#LoadLoad");
164 extern int cpu_idle(void);
165 extern void init_IRQ(void);
167 void initialize_secondary(void)
171 int start_secondary(void *unused)
173 trap_init();
174 init_IRQ();
175 smp_callin();
176 return cpu_idle();
179 void cpu_panic(void)
181 printk("CPU[%d]: Returns from cpu_idle!\n", smp_processor_id());
182 panic("SMP bolixed\n");
185 extern struct prom_cpuinfo linux_cpus[64];
187 extern unsigned long smp_trampoline;
189 /* The OBP cpu startup callback truncates the 3rd arg cookie to
190 * 32-bits (I think) so to be safe we have it read the pointer
191 * contained here so we work on >4GB machines. -DaveM
193 static struct task_struct *cpu_new_task = NULL;
195 void __init smp_boot_cpus(void)
197 int cpucount = 0, i;
199 printk("Entering UltraSMPenguin Mode...\n");
200 __sti();
201 smp_store_cpu_info(boot_cpu_id);
202 smp_tune_scheduling();
203 init_idle();
205 if(linux_num_cpus == 1)
206 return;
208 for(i = 0; i < NR_CPUS; i++) {
209 if(i == boot_cpu_id)
210 continue;
212 if(cpu_present_map & (1UL << i)) {
213 unsigned long entry = (unsigned long)(&smp_trampoline);
214 unsigned long cookie = (unsigned long)(&cpu_new_task);
215 struct task_struct *p;
216 int timeout;
217 int no;
218 extern unsigned long phys_base;
220 entry += phys_base - KERNBASE;
221 cookie += phys_base - KERNBASE;
222 kernel_thread(start_secondary, NULL, CLONE_PID);
223 cpucount++;
225 p = init_task.prev_task;
226 init_tasks[cpucount] = p;
228 p->processor = i;
229 p->has_cpu = 1; /* we schedule the first task manually */
231 del_from_runqueue(p);
232 unhash_process(p);
234 callin_flag = 0;
235 for (no = 0; no < linux_num_cpus; no++)
236 if (linux_cpus[no].mid == i)
237 break;
238 cpu_new_task = p;
239 prom_startcpu(linux_cpus[no].prom_node,
240 entry, cookie);
241 for(timeout = 0; timeout < 5000000; timeout++) {
242 if(callin_flag)
243 break;
244 udelay(100);
246 if(callin_flag) {
247 cpu_number_map[i] = cpucount;
248 __cpu_logical_map[cpucount] = i;
249 prom_cpu_nodes[i] = linux_cpus[no].prom_node;
250 } else {
251 cpucount--;
252 printk("Processor %d is stuck.\n", i);
255 if(!callin_flag) {
256 cpu_present_map &= ~(1UL << i);
257 cpu_number_map[i] = -1;
260 cpu_new_task = NULL;
261 if(cpucount == 0) {
262 printk("Error: only one processor found.\n");
263 cpu_present_map = (1UL << smp_processor_id());
264 } else {
265 unsigned long bogosum = 0;
267 for(i = 0; i < NR_CPUS; i++) {
268 if(cpu_present_map & (1UL << i))
269 bogosum += cpu_data[i].udelay_val;
271 printk("Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
272 cpucount + 1,
273 (bogosum + 2500)/500000,
274 ((bogosum + 2500)/5000)%100);
275 smp_activated = 1;
276 smp_num_cpus = cpucount + 1;
278 smp_processors_ready = 1;
279 membar("#StoreStore | #StoreLoad");
282 /* #define XCALL_DEBUG */
284 static inline void xcall_deliver(u64 data0, u64 data1, u64 data2, u64 pstate, unsigned long cpu)
286 u64 result, target = (cpu << 14) | 0x70;
287 int stuck, tmp;
289 #ifdef XCALL_DEBUG
290 printk("CPU[%d]: xcall(data[%016lx:%016lx:%016lx],tgt[%016lx])\n",
291 smp_processor_id(), data0, data1, data2, target);
292 #endif
293 again:
294 tmp = 0x40;
295 __asm__ __volatile__("
296 wrpr %1, %2, %%pstate
297 stxa %4, [%0] %3
298 stxa %5, [%0+%8] %3
299 add %0, %8, %0
300 stxa %6, [%0+%8] %3
301 membar #Sync
302 stxa %%g0, [%7] %3
303 membar #Sync"
304 : "=r" (tmp)
305 : "r" (pstate), "i" (PSTATE_IE), "i" (ASI_UDB_INTR_W),
306 "r" (data0), "r" (data1), "r" (data2), "r" (target), "r" (0x10), "0" (tmp));
308 /* NOTE: PSTATE_IE is still clear. */
309 stuck = 100000;
310 do {
311 __asm__ __volatile__("ldxa [%%g0] %1, %0"
312 : "=r" (result)
313 : "i" (ASI_INTR_DISPATCH_STAT));
314 if(result == 0) {
315 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
316 : : "r" (pstate));
317 return;
319 stuck -= 1;
320 if(stuck == 0)
321 break;
322 } while(result & 0x1);
323 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
324 : : "r" (pstate));
325 if(stuck == 0) {
326 #ifdef XCALL_DEBUG
327 printk("CPU[%d]: mondo stuckage result[%016lx]\n",
328 smp_processor_id(), result);
329 #endif
330 } else {
331 #ifdef XCALL_DEBUG
332 printk("CPU[%d]: Penguin %d NACK's master.\n", smp_processor_id(), cpu);
333 #endif
334 udelay(2);
335 goto again;
339 void smp_cross_call(unsigned long *func, u32 ctx, u64 data1, u64 data2)
341 if(smp_processors_ready) {
342 unsigned long mask = (cpu_present_map & ~(1UL<<smp_processor_id()));
343 u64 pstate, data0 = (((u64)ctx)<<32 | (((u64)func) & 0xffffffff));
344 int i, ncpus = smp_num_cpus - 1;
346 __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
347 for(i = 0; i < NR_CPUS; i++) {
348 if(mask & (1UL << i)) {
349 xcall_deliver(data0, data1, data2, pstate, i);
350 ncpus--;
352 if (!ncpus) break;
354 /* NOTE: Caller runs local copy on master. */
358 extern unsigned long xcall_flush_tlb_page;
359 extern unsigned long xcall_flush_tlb_mm;
360 extern unsigned long xcall_flush_tlb_range;
361 extern unsigned long xcall_flush_tlb_all;
362 extern unsigned long xcall_tlbcachesync;
363 extern unsigned long xcall_flush_cache_all;
364 extern unsigned long xcall_report_regs;
365 extern unsigned long xcall_receive_signal;
367 void smp_receive_signal(int cpu)
369 if(smp_processors_ready &&
370 (cpu_present_map & (1UL<<cpu)) != 0) {
371 u64 pstate, data0 = (((u64)&xcall_receive_signal) & 0xffffffff);
372 __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
373 xcall_deliver(data0, 0, 0, pstate, cpu);
377 void smp_report_regs(void)
379 smp_cross_call(&xcall_report_regs, 0, 0, 0);
382 void smp_flush_cache_all(void)
384 smp_cross_call(&xcall_flush_cache_all, 0, 0, 0);
385 __flush_cache_all();
388 void smp_flush_tlb_all(void)
390 smp_cross_call(&xcall_flush_tlb_all, 0, 0, 0);
391 __flush_tlb_all();
394 /* We know that the window frames of the user have been flushed
395 * to the stack before we get here because all callers of us
396 * are flush_tlb_*() routines, and these run after flush_cache_*()
397 * which performs the flushw.
399 * XXX I diked out the fancy flush avoidance code for the
400 * XXX swapping cases for now until the new MM code stabilizes. -DaveM
402 * The SMP TLB coherency scheme we use works as follows:
404 * 1) mm->cpu_vm_mask is a bit mask of which cpus an address
405 * space has (potentially) executed on, this is the heuristic
406 * we use to avoid doing cross calls.
408 * 2) TLB context numbers are shared globally across all processors
409 * in the system, this allows us to play several games to avoid
410 * cross calls.
412 * One invariant is that when a cpu switches to a process, and
413 * that processes tsk->active_mm->cpu_vm_mask does not have the
414 * current cpu's bit set, that tlb context is flushed locally.
416 * If the address space is non-shared (ie. mm->count == 1) we avoid
417 * cross calls when we want to flush the currently running process's
418 * tlb state. This is done by clearing all cpu bits except the current
419 * processor's in current->active_mm->cpu_vm_mask and performing the
420 * flush locally only. This will force any subsequent cpus which run
421 * this task to flush the context from the local tlb if the process
422 * migrates to another cpu (again).
424 * 3) For shared address spaces (threads) and swapping we bite the
425 * bullet for most cases and perform the cross call.
427 * The performance gain from "optimizing" away the cross call for threads is
428 * questionable (in theory the big win for threads is the massive sharing of
429 * address space state across processors).
431 * For the swapping case the locking is difficult to get right, we'd have to
432 * enforce strict ordered access to mm->cpu_vm_mask via a spinlock for example.
433 * Then again one could argue that when you are swapping, the cost of a cross
434 * call won't even show up on the performance radar. But in any case we do get
435 * rid of the cross-call when the task has a dead context or the task has only
436 * ever run on the local cpu.
438 void smp_flush_tlb_mm(struct mm_struct *mm)
440 u32 ctx = CTX_HWBITS(mm->context);
442 if (mm == current->active_mm &&
443 atomic_read(&mm->mm_users) == 1 &&
444 (mm->cpu_vm_mask == (1UL << smp_processor_id())))
445 goto local_flush_and_out;
447 smp_cross_call(&xcall_flush_tlb_mm, ctx, 0, 0);
449 local_flush_and_out:
450 __flush_tlb_mm(ctx, SECONDARY_CONTEXT);
453 void smp_flush_tlb_range(struct mm_struct *mm, unsigned long start,
454 unsigned long end)
456 u32 ctx = CTX_HWBITS(mm->context);
458 start &= PAGE_MASK;
459 end &= PAGE_MASK;
460 if(mm == current->active_mm &&
461 atomic_read(&mm->mm_users) == 1 &&
462 (mm->cpu_vm_mask == (1UL << smp_processor_id())))
463 goto local_flush_and_out;
465 smp_cross_call(&xcall_flush_tlb_range, ctx, start, end);
467 local_flush_and_out:
468 __flush_tlb_range(ctx, start, SECONDARY_CONTEXT, end, PAGE_SIZE, (end-start));
471 void smp_flush_tlb_page(struct mm_struct *mm, unsigned long page)
473 u32 ctx = CTX_HWBITS(mm->context);
475 page &= PAGE_MASK;
476 if(mm == current->active_mm &&
477 atomic_read(&mm->mm_users) == 1 &&
478 (mm->cpu_vm_mask == (1UL << smp_processor_id()))) {
479 goto local_flush_and_out;
482 smp_cross_call(&xcall_flush_tlb_page, ctx, page, 0);
484 local_flush_and_out:
485 __flush_tlb_page(ctx, page, SECONDARY_CONTEXT);
488 /* CPU capture. */
489 /* #define CAPTURE_DEBUG */
490 extern unsigned long xcall_capture;
492 static atomic_t smp_capture_depth = ATOMIC_INIT(0);
493 static atomic_t smp_capture_registry = ATOMIC_INIT(0);
494 static unsigned long penguins_are_doing_time = 0;
496 void smp_capture(void)
498 if (smp_processors_ready) {
499 int result = atomic_add_return(1, &smp_capture_depth);
501 membar("#StoreStore | #LoadStore");
502 if(result == 1) {
503 int ncpus = smp_num_cpus;
505 #ifdef CAPTURE_DEBUG
506 printk("CPU[%d]: Sending penguins to jail...",
507 smp_processor_id());
508 #endif
509 penguins_are_doing_time = 1;
510 membar("#StoreStore | #LoadStore");
511 atomic_inc(&smp_capture_registry);
512 smp_cross_call(&xcall_capture, 0, 0, 0);
513 while(atomic_read(&smp_capture_registry) != ncpus)
514 membar("#LoadLoad");
515 #ifdef CAPTURE_DEBUG
516 printk("done\n");
517 #endif
522 void smp_release(void)
524 if(smp_processors_ready) {
525 if(atomic_dec_and_test(&smp_capture_depth)) {
526 #ifdef CAPTURE_DEBUG
527 printk("CPU[%d]: Giving pardon to imprisoned penguins\n",
528 smp_processor_id());
529 #endif
530 penguins_are_doing_time = 0;
531 membar("#StoreStore | #StoreLoad");
532 atomic_dec(&smp_capture_registry);
537 /* Imprisoned penguins run with %pil == 15, but PSTATE_IE set, so they
538 * can service tlb flush xcalls...
540 void smp_penguin_jailcell(void)
542 flushw_user();
543 atomic_inc(&smp_capture_registry);
544 membar("#StoreLoad | #StoreStore");
545 while(penguins_are_doing_time)
546 membar("#LoadLoad");
547 atomic_dec(&smp_capture_registry);
550 static inline void sparc64_do_profile(unsigned long pc, unsigned long g3)
552 if (prof_buffer && current->pid) {
553 extern int _stext;
554 extern int rwlock_impl_begin, rwlock_impl_end;
555 extern int atomic_impl_begin, atomic_impl_end;
557 if ((pc >= (unsigned long) &rwlock_impl_begin &&
558 pc < (unsigned long) &rwlock_impl_end) ||
559 (pc >= (unsigned long) &atomic_impl_begin &&
560 pc < (unsigned long) &atomic_impl_end))
561 pc = g3;
563 pc -= (unsigned long) &_stext;
564 pc >>= prof_shift;
566 if(pc >= prof_len)
567 pc = prof_len - 1;
568 atomic_inc((atomic_t *)&prof_buffer[pc]);
572 static unsigned long current_tick_offset;
574 #define prof_multiplier(__cpu) cpu_data[(__cpu)].multiplier
575 #define prof_counter(__cpu) cpu_data[(__cpu)].counter
577 extern void update_one_process(struct task_struct *p, unsigned long ticks,
578 unsigned long user, unsigned long system,
579 int cpu);
581 void smp_percpu_timer_interrupt(struct pt_regs *regs)
583 unsigned long compare, tick;
584 int cpu = smp_processor_id();
585 int user = user_mode(regs);
588 * Check for level 14 softint.
590 if (!(get_softint() & (1UL << 0))) {
591 extern void handler_irq(int, struct pt_regs *);
593 handler_irq(14, regs);
594 return;
597 clear_softint((1UL << 0));
598 do {
599 if(!user)
600 sparc64_do_profile(regs->tpc, regs->u_regs[UREG_G3]);
601 if(!--prof_counter(cpu))
603 if (cpu == boot_cpu_id) {
604 /* XXX Keep this in sync with irq.c --DaveM */
605 #define irq_enter(cpu, irq) \
606 do { hardirq_enter(cpu); \
607 spin_unlock_wait(&global_irq_lock); \
608 } while(0)
609 #define irq_exit(cpu, irq) hardirq_exit(cpu)
611 irq_enter(cpu, 0);
612 kstat.irqs[cpu][0]++;
614 timer_tick_interrupt(regs);
616 irq_exit(cpu, 0);
618 #undef irq_enter
619 #undef irq_exit
622 if(current->pid) {
623 unsigned int *inc, *inc2;
625 update_one_process(current, 1, user, !user, cpu);
626 if(--current->counter <= 0) {
627 current->counter = 0;
628 current->need_resched = 1;
631 if(user) {
632 if(current->priority < DEF_PRIORITY) {
633 inc = &kstat.cpu_nice;
634 inc2 = &kstat.per_cpu_nice[cpu];
635 } else {
636 inc = &kstat.cpu_user;
637 inc2 = &kstat.per_cpu_user[cpu];
639 } else {
640 inc = &kstat.cpu_system;
641 inc2 = &kstat.per_cpu_system[cpu];
643 atomic_inc((atomic_t *)inc);
644 atomic_inc((atomic_t *)inc2);
646 prof_counter(cpu) = prof_multiplier(cpu);
649 __asm__ __volatile__("rd %%tick_cmpr, %0\n\t"
650 "add %0, %2, %0\n\t"
651 "wr %0, 0x0, %%tick_cmpr\n\t"
652 "rd %%tick, %1"
653 : "=&r" (compare), "=r" (tick)
654 : "r" (current_tick_offset));
655 } while (tick >= compare);
658 static void __init smp_setup_percpu_timer(void)
660 int cpu = smp_processor_id();
662 prof_counter(cpu) = prof_multiplier(cpu) = 1;
664 __asm__ __volatile__("rd %%tick, %%g1\n\t"
665 "add %%g1, %0, %%g1\n\t"
666 "wr %%g1, 0x0, %%tick_cmpr"
667 : /* no outputs */
668 : "r" (current_tick_offset)
669 : "g1");
672 void __init smp_tick_init(void)
674 int i;
676 boot_cpu_id = hard_smp_processor_id();
677 current_tick_offset = timer_tick_offset;
678 cpu_present_map = 0;
679 for(i = 0; i < linux_num_cpus; i++)
680 cpu_present_map |= (1UL << linux_cpus[i].mid);
681 for(i = 0; i < NR_CPUS; i++) {
682 cpu_number_map[i] = -1;
683 __cpu_logical_map[i] = -1;
685 cpu_number_map[boot_cpu_id] = 0;
686 prom_cpu_nodes[boot_cpu_id] = linux_cpus[0].prom_node;
687 __cpu_logical_map[0] = boot_cpu_id;
688 current->processor = boot_cpu_id;
689 prof_counter(boot_cpu_id) = prof_multiplier(boot_cpu_id) = 1;
692 static inline unsigned long find_flush_base(unsigned long size)
694 struct page *p = mem_map;
695 unsigned long found, base;
697 size = PAGE_ALIGN(size);
698 found = size;
699 base = page_address(p);
700 while(found != 0) {
701 /* Failure. */
702 if(p >= (mem_map + max_mapnr))
703 return 0UL;
704 if(PageSkip(p)) {
705 p = p->next_hash;
706 base = page_address(p);
707 found = size;
708 } else {
709 found -= PAGE_SIZE;
710 p++;
713 return base;
716 cycles_t cacheflush_time;
718 static void __init smp_tune_scheduling (void)
720 unsigned long flush_base, flags, *p;
721 unsigned int ecache_size;
722 cycles_t tick1, tick2, raw;
724 /* Approximate heuristic for SMP scheduling. It is an
725 * estimation of the time it takes to flush the L2 cache
726 * on the local processor.
728 * The ia32 chooses to use the L1 cache flush time instead,
729 * and I consider this complete nonsense. The Ultra can service
730 * a miss to the L1 with a hit to the L2 in 7 or 8 cycles, and
731 * L2 misses are what create extra bus traffic (ie. the "cost"
732 * of moving a process from one cpu to another).
734 printk("SMP: Calibrating ecache flush... ");
735 ecache_size = prom_getintdefault(linux_cpus[0].prom_node,
736 "ecache-size", (512 *1024));
737 flush_base = find_flush_base(ecache_size << 1);
739 if(flush_base != 0UL) {
740 __save_and_cli(flags);
742 /* Scan twice the size once just to get the TLB entries
743 * loaded and make sure the second scan measures pure misses.
745 for(p = (unsigned long *)flush_base;
746 ((unsigned long)p) < (flush_base + (ecache_size<<1));
747 p += (64 / sizeof(unsigned long)))
748 *((volatile unsigned long *)p);
750 /* Now the real measurement. */
751 __asm__ __volatile__("
752 b,pt %%xcc, 1f
753 rd %%tick, %0
755 .align 64
756 1: ldx [%2 + 0x000], %%g1
757 ldx [%2 + 0x040], %%g2
758 ldx [%2 + 0x080], %%g3
759 ldx [%2 + 0x0c0], %%g5
760 add %2, 0x100, %2
761 cmp %2, %4
762 bne,pt %%xcc, 1b
765 rd %%tick, %1"
766 : "=&r" (tick1), "=&r" (tick2), "=&r" (flush_base)
767 : "2" (flush_base), "r" (flush_base + ecache_size)
768 : "g1", "g2", "g3", "g5");
770 __restore_flags(flags);
772 raw = (tick2 - tick1);
774 /* Dampen it a little, considering two processes
775 * sharing the cache and fitting.
777 cacheflush_time = (raw - (raw >> 2));
778 } else
779 cacheflush_time = ((ecache_size << 2) +
780 (ecache_size << 1));
782 printk("Using heuristic of %d cycles.\n",
783 (int) cacheflush_time);
786 /* /proc/profile writes can call this, don't __init it please. */
787 int setup_profiling_timer(unsigned int multiplier)
789 unsigned long flags;
790 int i;
792 if((!multiplier) || (timer_tick_offset / multiplier) < 1000)
793 return -EINVAL;
795 save_and_cli(flags);
796 for(i = 0; i < NR_CPUS; i++) {
797 if(cpu_present_map & (1UL << i))
798 prof_multiplier(i) = multiplier;
800 current_tick_offset = (timer_tick_offset / multiplier);
801 restore_flags(flags);
803 return 0;