repo init
[linux-rt-nao.git] / arch / x86 / kernel / vsyscall_64.c
blob326c92541f3b92abaa6b528e935164968bb8a10a
1 /*
2 * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
3 * Copyright 2003 Andi Kleen, SuSE Labs.
5 * Thanks to hpa@transmeta.com for some useful hint.
6 * Special thanks to Ingo Molnar for his early experience with
7 * a different vsyscall implementation for Linux/IA32 and for the name.
9 * vsyscall 1 is located at -10Mbyte, vsyscall 2 is located
10 * at virtual address -10Mbyte+1024bytes etc... There are at max 4
11 * vsyscalls. One vsyscall can reserve more than 1 slot to avoid
12 * jumping out of line if necessary. We cannot add more with this
13 * mechanism because older kernels won't return -ENOSYS.
14 * If we want more than four we need a vDSO.
16 * Note: the concept clashes with user mode linux. If you use UML and
17 * want per guest time just set the kernel.vsyscall64 sysctl to 0.
20 /* Disable profiling for userspace code: */
21 #define DISABLE_BRANCH_PROFILING
23 #include <linux/time.h>
24 #include <linux/init.h>
25 #include <linux/kernel.h>
26 #include <linux/timer.h>
27 #include <linux/seqlock.h>
28 #include <linux/jiffies.h>
29 #include <linux/sysctl.h>
30 #include <linux/clocksource.h>
31 #include <linux/getcpu.h>
32 #include <linux/cpu.h>
33 #include <linux/smp.h>
34 #include <linux/notifier.h>
36 #include <asm/vsyscall.h>
37 #include <asm/pgtable.h>
38 #include <asm/page.h>
39 #include <asm/unistd.h>
40 #include <asm/fixmap.h>
41 #include <asm/errno.h>
42 #include <asm/io.h>
43 #include <asm/segment.h>
44 #include <asm/desc.h>
45 #include <asm/topology.h>
46 #include <asm/vgtod.h>
48 #define __vsyscall(nr) \
49 __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace
50 #define __syscall_clobber "r11","cx","memory"
53 * vsyscall_gtod_data contains data that is :
54 * - readonly from vsyscalls
55 * - written by timer interrupt or systcl (/proc/sys/kernel/vsyscall64)
56 * Try to keep this structure as small as possible to avoid cache line ping pongs
58 int __vgetcpu_mode __section_vgetcpu_mode;
60 struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data =
62 .lock = __RAW_SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
63 .sysctl_enabled = 1,
66 void update_vsyscall_tz(void)
68 unsigned long flags;
70 write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
71 /* sys_tz has changed */
72 vsyscall_gtod_data.sys_tz = sys_tz;
73 write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
76 void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
78 unsigned long flags;
80 write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
82 if (likely(vsyscall_gtod_data.sysctl_enabled == 2)) {
83 struct timespec tmp = *(wall_time);
84 cycle_t (*vread)(void);
85 cycle_t now;
87 vread = vsyscall_gtod_data.clock.vread;
88 if (likely(vread))
89 now = vread();
90 else
91 now = clock->read();
93 /* calculate interval: */
94 now = (now - clock->cycle_last) & clock->mask;
95 /* convert to nsecs: */
96 tmp.tv_nsec += ( now * clock->mult) >> clock->shift;
98 while (tmp.tv_nsec >= NSEC_PER_SEC) {
99 tmp.tv_sec += 1;
100 tmp.tv_nsec -= NSEC_PER_SEC;
103 vsyscall_gtod_data.wall_time_sec = tmp.tv_sec;
104 vsyscall_gtod_data.wall_time_nsec = tmp.tv_nsec;
105 } else {
106 vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
107 vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
109 /* copy vsyscall data */
110 vsyscall_gtod_data.clock.vread = clock->vread;
111 vsyscall_gtod_data.clock.cycle_last = clock->cycle_last;
112 vsyscall_gtod_data.clock.mask = clock->mask;
113 vsyscall_gtod_data.clock.mult = clock->mult;
114 vsyscall_gtod_data.clock.shift = clock->shift;
115 vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic;
116 write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
119 /* RED-PEN may want to readd seq locking, but then the variable should be
120 * write-once.
122 static __always_inline void do_get_tz(struct timezone * tz)
124 *tz = __vsyscall_gtod_data.sys_tz;
127 static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
129 int ret;
130 asm volatile("syscall"
131 : "=a" (ret)
132 : "0" (__NR_gettimeofday),"D" (tv),"S" (tz)
133 : __syscall_clobber );
134 return ret;
137 static __always_inline long time_syscall(long *t)
139 long secs;
140 asm volatile("syscall"
141 : "=a" (secs)
142 : "0" (__NR_time),"D" (t) : __syscall_clobber);
143 return secs;
146 static __always_inline void do_vgettimeofday(struct timeval * tv)
148 cycle_t now, base, mask, cycle_delta;
149 unsigned seq;
150 unsigned long mult, shift, nsec;
151 cycle_t (*vread)(void);
153 if (likely(__vsyscall_gtod_data.sysctl_enabled == 2)) {
154 struct timeval tmp;
156 do {
157 barrier();
158 tv->tv_sec = __vsyscall_gtod_data.wall_time_sec;
159 tv->tv_usec = __vsyscall_gtod_data.wall_time_nsec;
160 barrier();
161 tmp.tv_sec = __vsyscall_gtod_data.wall_time_sec;
162 tmp.tv_usec = __vsyscall_gtod_data.wall_time_nsec;
164 } while (tmp.tv_usec != tv->tv_usec ||
165 tmp.tv_sec != tv->tv_sec);
167 tv->tv_usec /= NSEC_PER_MSEC;
168 tv->tv_usec *= USEC_PER_MSEC;
169 return;
172 do {
173 seq = read_seqbegin(&__vsyscall_gtod_data.lock);
175 vread = __vsyscall_gtod_data.clock.vread;
176 if (unlikely(!__vsyscall_gtod_data.sysctl_enabled || !vread)) {
177 gettimeofday(tv,NULL);
178 return;
182 * Surround the RDTSC by barriers, to make sure it's not
183 * speculated to outside the seqlock critical section and
184 * does not cause time warps:
186 rdtsc_barrier();
187 rdtsc_barrier();
189 base = __vsyscall_gtod_data.clock.cycle_last;
190 mask = __vsyscall_gtod_data.clock.mask;
191 mult = __vsyscall_gtod_data.clock.mult;
192 shift = __vsyscall_gtod_data.clock.shift;
194 tv->tv_sec = __vsyscall_gtod_data.wall_time_sec;
195 nsec = __vsyscall_gtod_data.wall_time_nsec;
196 } while (read_seqretry(&__vsyscall_gtod_data.lock, seq));
198 now = vread();
199 /* calculate interval: */
200 cycle_delta = (now - base) & mask;
201 /* convert to nsecs: */
202 nsec += (cycle_delta * mult) >> shift;
204 while (nsec >= NSEC_PER_SEC) {
205 tv->tv_sec += 1;
206 nsec -= NSEC_PER_SEC;
208 tv->tv_usec = nsec / NSEC_PER_USEC;
211 int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
213 if (tv)
214 do_vgettimeofday(tv);
215 if (tz)
216 do_get_tz(tz);
217 return 0;
220 /* This will break when the xtime seconds get inaccurate, but that is
221 * unlikely */
222 time_t __vsyscall(1) vtime(time_t *t)
224 struct timeval tv;
225 time_t result;
226 if (unlikely(!__vsyscall_gtod_data.sysctl_enabled))
227 return time_syscall(t);
229 vgettimeofday(&tv, NULL);
230 result = tv.tv_sec;
231 if (t)
232 *t = result;
233 return result;
236 /* Fast way to get current CPU and node.
237 This helps to do per node and per CPU caches in user space.
238 The result is not guaranteed without CPU affinity, but usually
239 works out because the scheduler tries to keep a thread on the same
240 CPU.
242 tcache must point to a two element sized long array.
243 All arguments can be NULL. */
244 long __vsyscall(2)
245 vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
247 unsigned int p;
248 unsigned long j = 0;
250 /* Fast cache - only recompute value once per jiffies and avoid
251 relatively costly rdtscp/cpuid otherwise.
252 This works because the scheduler usually keeps the process
253 on the same CPU and this syscall doesn't guarantee its
254 results anyways.
255 We do this here because otherwise user space would do it on
256 its own in a likely inferior way (no access to jiffies).
257 If you don't like it pass NULL. */
258 if (tcache && tcache->blob[0] == (j = __jiffies)) {
259 p = tcache->blob[1];
260 } else if (__vgetcpu_mode == VGETCPU_RDTSCP) {
261 /* Load per CPU data from RDTSCP */
262 native_read_tscp(&p);
263 } else {
264 /* Load per CPU data from GDT */
265 asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
267 if (tcache) {
268 tcache->blob[0] = j;
269 tcache->blob[1] = p;
271 if (cpu)
272 *cpu = p & 0xfff;
273 if (node)
274 *node = p >> 12;
275 return 0;
278 static long __vsyscall(3) venosys_1(void)
280 return -ENOSYS;
283 #ifdef CONFIG_SYSCTL
285 static int
286 vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp,
287 void __user *buffer, size_t *lenp, loff_t *ppos)
289 return proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
292 static ctl_table kernel_table2[] = {
293 { .procname = "vsyscall64",
294 .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int),
295 .mode = 0644,
296 .proc_handler = vsyscall_sysctl_change },
300 static ctl_table kernel_root_table2[] = {
301 { .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555,
302 .child = kernel_table2 },
305 #endif
307 /* Assume __initcall executes before all user space. Hopefully kmod
308 doesn't violate that. We'll find out if it does. */
309 static void __cpuinit vsyscall_set_cpu(int cpu)
311 unsigned long d;
312 unsigned long node = 0;
313 #ifdef CONFIG_NUMA
314 node = cpu_to_node(cpu);
315 #endif
316 if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
317 write_rdtscp_aux((node << 12) | cpu);
319 /* Store cpu number in limit so that it can be loaded quickly
320 in user space in vgetcpu.
321 12 bits for the CPU and 8 bits for the node. */
322 d = 0x0f40000000000ULL;
323 d |= cpu;
324 d |= (node & 0xf) << 12;
325 d |= (node >> 4) << 48;
326 write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
329 static void __cpuinit cpu_vsyscall_init(void *arg)
331 /* preemption should be already off */
332 vsyscall_set_cpu(raw_smp_processor_id());
335 static int __cpuinit
336 cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
338 long cpu = (long)arg;
339 if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
340 smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1);
341 return NOTIFY_DONE;
344 void __init map_vsyscall(void)
346 extern char __vsyscall_0;
347 unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0);
349 /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */
350 __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
353 static int __init vsyscall_init(void)
355 BUG_ON(((unsigned long) &vgettimeofday !=
356 VSYSCALL_ADDR(__NR_vgettimeofday)));
357 BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
358 BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
359 BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu));
360 #ifdef CONFIG_SYSCTL
361 register_sysctl_table(kernel_root_table2);
362 #endif
363 on_each_cpu(cpu_vsyscall_init, NULL, 1);
364 hotcpu_notifier(cpu_vsyscall_notifier, 0);
365 return 0;
368 __initcall(vsyscall_init);