1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk})
4 * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
5 * Copyright (C) 2012-2014 Cisco Systems
6 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
7 * Copyright (C) 2019 Intel Corporation
10 #include <linux/clockchips.h>
11 #include <linux/init.h>
12 #include <linux/interrupt.h>
13 #include <linux/jiffies.h>
15 #include <linux/sched.h>
16 #include <linux/spinlock.h>
17 #include <linux/threads.h>
19 #include <asm/param.h>
20 #include <kern_util.h>
22 #include <linux/time-internal.h>
23 #include <linux/um_timetravel.h>
24 #include <shared/init.h>
26 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
27 enum time_travel_mode time_travel_mode
;
28 EXPORT_SYMBOL_GPL(time_travel_mode
);
30 static bool time_travel_start_set
;
31 static unsigned long long time_travel_start
;
32 static unsigned long long time_travel_time
;
33 static LIST_HEAD(time_travel_events
);
34 static unsigned long long time_travel_timer_interval
;
35 static unsigned long long time_travel_next_event
;
36 static struct time_travel_event time_travel_timer_event
;
37 static int time_travel_ext_fd
= -1;
38 static unsigned int time_travel_ext_waiting
;
39 static bool time_travel_ext_prev_request_valid
;
40 static unsigned long long time_travel_ext_prev_request
;
41 static bool time_travel_ext_free_until_valid
;
42 static unsigned long long time_travel_ext_free_until
;
44 static void time_travel_set_time(unsigned long long ns
)
46 if (unlikely(ns
< time_travel_time
))
47 panic("time-travel: time goes backwards %lld -> %lld\n",
48 time_travel_time
, ns
);
49 time_travel_time
= ns
;
52 enum time_travel_message_handling
{
58 static void time_travel_handle_message(struct um_timetravel_msg
*msg
,
59 enum time_travel_message_handling mode
)
61 struct um_timetravel_msg resp
= {
62 .op
= UM_TIMETRAVEL_ACK
,
67 * Poll outside the locked section (if we're not called to only read
68 * the response) so we can get interrupts for e.g. virtio while we're
69 * here, but then we need to lock to not get interrupted between the
70 * read of the message and write of the ACK.
72 if (mode
!= TTMH_READ
) {
73 while (os_poll(1, &time_travel_ext_fd
) != 0) {
74 if (mode
== TTMH_IDLE
) {
75 BUG_ON(!irqs_disabled());
82 ret
= os_read_file(time_travel_ext_fd
, msg
, sizeof(*msg
));
85 panic("time-travel external link is broken\n");
86 if (ret
!= sizeof(*msg
))
87 panic("invalid time-travel message - %d bytes\n", ret
);
91 WARN_ONCE(1, "time-travel: unexpected message %lld\n",
92 (unsigned long long)msg
->op
);
94 case UM_TIMETRAVEL_ACK
:
96 case UM_TIMETRAVEL_RUN
:
97 time_travel_set_time(msg
->time
);
99 case UM_TIMETRAVEL_FREE_UNTIL
:
100 time_travel_ext_free_until_valid
= true;
101 time_travel_ext_free_until
= msg
->time
;
105 os_write_file(time_travel_ext_fd
, &resp
, sizeof(resp
));
108 static u64
time_travel_ext_req(u32 op
, u64 time
)
112 struct um_timetravel_msg msg
= {
120 * We need to save interrupts here and only restore when we
121 * got the ACK - otherwise we can get interrupted and send
122 * another request while we're still waiting for an ACK, but
123 * the peer doesn't know we got interrupted and will send
124 * the ACKs in the same order as the message, but we'd need
125 * to see them in the opposite order ...
127 * This wouldn't matter *too* much, but some ACKs carry the
128 * current time (for UM_TIMETRAVEL_GET) and getting another
129 * ACK without a time would confuse us a lot!
131 * The sequence number assignment that happens here lets us
132 * debug such message handling issues more easily.
134 local_irq_save(flags
);
135 os_write_file(time_travel_ext_fd
, &msg
, sizeof(msg
));
137 while (msg
.op
!= UM_TIMETRAVEL_ACK
)
138 time_travel_handle_message(&msg
, TTMH_READ
);
141 panic("time-travel: ACK message has different seqno! op=%d, seq=%d != %d time=%lld\n",
142 msg
.op
, msg
.seq
, mseq
, msg
.time
);
144 if (op
== UM_TIMETRAVEL_GET
)
145 time_travel_set_time(msg
.time
);
146 local_irq_restore(flags
);
151 void __time_travel_wait_readable(int fd
)
153 int fds
[2] = { fd
, time_travel_ext_fd
};
156 if (time_travel_mode
!= TT_MODE_EXTERNAL
)
159 while ((ret
= os_poll(2, fds
))) {
160 struct um_timetravel_msg msg
;
163 time_travel_handle_message(&msg
, TTMH_READ
);
166 EXPORT_SYMBOL_GPL(__time_travel_wait_readable
);
168 static void time_travel_ext_update_request(unsigned long long time
)
170 if (time_travel_mode
!= TT_MODE_EXTERNAL
)
173 /* asked for exactly this time previously */
174 if (time_travel_ext_prev_request_valid
&&
175 time
== time_travel_ext_prev_request
)
178 time_travel_ext_prev_request
= time
;
179 time_travel_ext_prev_request_valid
= true;
180 time_travel_ext_req(UM_TIMETRAVEL_REQUEST
, time
);
183 void __time_travel_propagate_time(void)
185 time_travel_ext_req(UM_TIMETRAVEL_UPDATE
, time_travel_time
);
187 EXPORT_SYMBOL_GPL(__time_travel_propagate_time
);
189 /* returns true if we must do a wait to the simtime device */
190 static bool time_travel_ext_request(unsigned long long time
)
193 * If we received an external sync point ("free until") then we
194 * don't have to request/wait for anything until then, unless
195 * we're already waiting.
197 if (!time_travel_ext_waiting
&& time_travel_ext_free_until_valid
&&
198 time
< time_travel_ext_free_until
)
201 time_travel_ext_update_request(time
);
205 static void time_travel_ext_wait(bool idle
)
207 struct um_timetravel_msg msg
= {
208 .op
= UM_TIMETRAVEL_ACK
,
211 time_travel_ext_prev_request_valid
= false;
212 time_travel_ext_waiting
++;
214 time_travel_ext_req(UM_TIMETRAVEL_WAIT
, -1);
217 * Here we are deep in the idle loop, so we have to break out of the
218 * kernel abstraction in a sense and implement this in terms of the
219 * UML system waiting on the VQ interrupt while sleeping, when we get
220 * the signal it'll call time_travel_ext_vq_notify_done() completing the
223 while (msg
.op
!= UM_TIMETRAVEL_RUN
)
224 time_travel_handle_message(&msg
, idle
? TTMH_IDLE
: TTMH_POLL
);
226 time_travel_ext_waiting
--;
228 /* we might request more stuff while polling - reset when we run */
229 time_travel_ext_prev_request_valid
= false;
232 static void time_travel_ext_get_time(void)
234 time_travel_ext_req(UM_TIMETRAVEL_GET
, -1);
237 static void __time_travel_update_time(unsigned long long ns
, bool idle
)
239 if (time_travel_mode
== TT_MODE_EXTERNAL
&& time_travel_ext_request(ns
))
240 time_travel_ext_wait(idle
);
242 time_travel_set_time(ns
);
245 static struct time_travel_event
*time_travel_first_event(void)
247 return list_first_entry_or_null(&time_travel_events
,
248 struct time_travel_event
,
252 static void __time_travel_add_event(struct time_travel_event
*e
,
253 unsigned long long time
)
255 struct time_travel_event
*tmp
;
256 bool inserted
= false;
258 if (WARN(time_travel_mode
== TT_MODE_BASIC
&&
259 e
!= &time_travel_timer_event
,
260 "only timer events can be handled in basic mode"))
269 list_for_each_entry(tmp
, &time_travel_events
, list
) {
271 * Add the new entry before one with higher time,
272 * or if they're equal and both on stack, because
273 * in that case we need to unwind the stack in the
274 * right order, and the later event (timer sleep
275 * or such) must be dequeued first.
277 if ((tmp
->time
> e
->time
) ||
278 (tmp
->time
== e
->time
&& tmp
->onstack
&& e
->onstack
)) {
279 list_add_tail(&e
->list
, &tmp
->list
);
286 list_add_tail(&e
->list
, &time_travel_events
);
288 tmp
= time_travel_first_event();
289 time_travel_ext_update_request(tmp
->time
);
290 time_travel_next_event
= tmp
->time
;
293 static void time_travel_add_event(struct time_travel_event
*e
,
294 unsigned long long time
)
299 __time_travel_add_event(e
, time
);
302 void time_travel_periodic_timer(struct time_travel_event
*e
)
304 time_travel_add_event(&time_travel_timer_event
,
305 time_travel_time
+ time_travel_timer_interval
);
309 static void time_travel_deliver_event(struct time_travel_event
*e
)
311 if (e
== &time_travel_timer_event
) {
313 * deliver_alarm() does the irq_enter/irq_exit
314 * by itself, so must handle it specially here
320 local_irq_save(flags
);
324 local_irq_restore(flags
);
328 static bool time_travel_del_event(struct time_travel_event
*e
)
337 static void time_travel_update_time(unsigned long long next
, bool idle
)
339 struct time_travel_event ne
= {
342 struct time_travel_event
*e
;
343 bool finished
= idle
;
345 /* add it without a handler - we deal with that specifically below */
346 __time_travel_add_event(&ne
, next
);
349 e
= time_travel_first_event();
352 __time_travel_update_time(e
->time
, idle
);
354 /* new events may have been inserted while we were waiting */
355 if (e
== time_travel_first_event()) {
356 BUG_ON(!time_travel_del_event(e
));
357 BUG_ON(time_travel_time
!= e
->time
);
363 panic("On-stack event dequeued outside of the stack! time=%lld, event time=%lld, event=%pS\n",
364 time_travel_time
, e
->time
, e
);
365 time_travel_deliver_event(e
);
369 e
= time_travel_first_event();
371 time_travel_ext_update_request(e
->time
);
372 } while (ne
.pending
&& !finished
);
374 time_travel_del_event(&ne
);
377 void time_travel_ndelay(unsigned long nsec
)
379 time_travel_update_time(time_travel_time
+ nsec
, false);
381 EXPORT_SYMBOL(time_travel_ndelay
);
383 void time_travel_add_irq_event(struct time_travel_event
*e
)
385 BUG_ON(time_travel_mode
!= TT_MODE_EXTERNAL
);
387 time_travel_ext_get_time();
389 * We could model interrupt latency here, for now just
390 * don't have any latency at all and request the exact
391 * same time (again) to run the interrupt...
393 time_travel_add_event(e
, time_travel_time
);
395 EXPORT_SYMBOL_GPL(time_travel_add_irq_event
);
397 static void time_travel_oneshot_timer(struct time_travel_event
*e
)
402 void time_travel_sleep(unsigned long long duration
)
404 unsigned long long next
= time_travel_time
+ duration
;
406 if (time_travel_mode
== TT_MODE_BASIC
)
409 time_travel_update_time(next
, true);
411 if (time_travel_mode
== TT_MODE_BASIC
&&
412 time_travel_timer_event
.pending
) {
413 if (time_travel_timer_event
.fn
== time_travel_periodic_timer
) {
415 * This is somewhat wrong - we should get the first
416 * one sooner like the os_timer_one_shot() below...
418 os_timer_set_interval(time_travel_timer_interval
);
420 os_timer_one_shot(time_travel_timer_event
.time
- next
);
425 static void time_travel_handle_real_alarm(void)
427 time_travel_set_time(time_travel_next_event
);
429 time_travel_del_event(&time_travel_timer_event
);
431 if (time_travel_timer_event
.fn
== time_travel_periodic_timer
)
432 time_travel_add_event(&time_travel_timer_event
,
434 time_travel_timer_interval
);
437 static void time_travel_set_interval(unsigned long long interval
)
439 time_travel_timer_interval
= interval
;
442 static int time_travel_connect_external(const char *socket
)
445 unsigned long long id
= (unsigned long long)-1;
448 if ((sep
= strchr(socket
, ':'))) {
450 if (sep
- socket
> sizeof(buf
) - 1)
453 memcpy(buf
, socket
, sep
- socket
);
454 if (kstrtoull(buf
, 0, &id
)) {
456 panic("time-travel: invalid external ID in string '%s'\n",
464 rc
= os_connect_socket(socket
);
466 panic("time-travel: failed to connect to external socket %s\n",
471 time_travel_ext_fd
= rc
;
473 time_travel_ext_req(UM_TIMETRAVEL_START
, id
);
477 #else /* CONFIG_UML_TIME_TRAVEL_SUPPORT */
478 #define time_travel_start_set 0
479 #define time_travel_start 0
480 #define time_travel_time 0
482 static inline void time_travel_update_time(unsigned long long ns
, bool retearly
)
486 static inline void time_travel_handle_real_alarm(void)
490 static void time_travel_set_interval(unsigned long long interval
)
494 /* fail link if this actually gets used */
495 extern u64
time_travel_ext_req(u32 op
, u64 time
);
497 /* these are empty macros so the struct/fn need not exist */
498 #define time_travel_add_event(e, time) do { } while (0)
499 #define time_travel_del_event(e) do { } while (0)
502 void timer_handler(int sig
, struct siginfo
*unused_si
, struct uml_pt_regs
*regs
)
507 * In basic time-travel mode we still get real interrupts
508 * (signals) but since we don't read time from the OS, we
509 * must update the simulated time here to the expiry when
511 * This is not the case in inf-cpu mode, since there we
512 * never get any real signals from the OS.
514 if (time_travel_mode
== TT_MODE_BASIC
)
515 time_travel_handle_real_alarm();
517 local_irq_save(flags
);
518 do_IRQ(TIMER_IRQ
, regs
);
519 local_irq_restore(flags
);
522 static int itimer_shutdown(struct clock_event_device
*evt
)
524 if (time_travel_mode
!= TT_MODE_OFF
)
525 time_travel_del_event(&time_travel_timer_event
);
527 if (time_travel_mode
!= TT_MODE_INFCPU
&&
528 time_travel_mode
!= TT_MODE_EXTERNAL
)
534 static int itimer_set_periodic(struct clock_event_device
*evt
)
536 unsigned long long interval
= NSEC_PER_SEC
/ HZ
;
538 if (time_travel_mode
!= TT_MODE_OFF
) {
539 time_travel_del_event(&time_travel_timer_event
);
540 time_travel_set_event_fn(&time_travel_timer_event
,
541 time_travel_periodic_timer
);
542 time_travel_set_interval(interval
);
543 time_travel_add_event(&time_travel_timer_event
,
544 time_travel_time
+ interval
);
547 if (time_travel_mode
!= TT_MODE_INFCPU
&&
548 time_travel_mode
!= TT_MODE_EXTERNAL
)
549 os_timer_set_interval(interval
);
554 static int itimer_next_event(unsigned long delta
,
555 struct clock_event_device
*evt
)
559 if (time_travel_mode
!= TT_MODE_OFF
) {
560 time_travel_del_event(&time_travel_timer_event
);
561 time_travel_set_event_fn(&time_travel_timer_event
,
562 time_travel_oneshot_timer
);
563 time_travel_add_event(&time_travel_timer_event
,
564 time_travel_time
+ delta
);
567 if (time_travel_mode
!= TT_MODE_INFCPU
&&
568 time_travel_mode
!= TT_MODE_EXTERNAL
)
569 return os_timer_one_shot(delta
);
574 static int itimer_one_shot(struct clock_event_device
*evt
)
576 return itimer_next_event(0, evt
);
579 static struct clock_event_device timer_clockevent
= {
580 .name
= "posix-timer",
582 .cpumask
= cpu_possible_mask
,
583 .features
= CLOCK_EVT_FEAT_PERIODIC
|
584 CLOCK_EVT_FEAT_ONESHOT
,
585 .set_state_shutdown
= itimer_shutdown
,
586 .set_state_periodic
= itimer_set_periodic
,
587 .set_state_oneshot
= itimer_one_shot
,
588 .set_next_event
= itimer_next_event
,
590 .max_delta_ns
= 0xffffffff,
591 .max_delta_ticks
= 0xffffffff,
592 .min_delta_ns
= TIMER_MIN_DELTA
,
593 .min_delta_ticks
= TIMER_MIN_DELTA
, // microsecond resolution should be enough for anyone, same as 640K RAM
598 static irqreturn_t
um_timer(int irq
, void *dev
)
600 if (get_current()->mm
!= NULL
)
602 /* userspace - relay signal, results in correct userspace timers */
603 os_alarm_process(get_current()->mm
->context
.id
.u
.pid
);
606 (*timer_clockevent
.event_handler
)(&timer_clockevent
);
611 static u64
timer_read(struct clocksource
*cs
)
613 if (time_travel_mode
!= TT_MODE_OFF
) {
615 * We make reading the timer cost a bit so that we don't get
616 * stuck in loops that expect time to move more than the
617 * exact requested sleep amount, e.g. python's socket server,
618 * see https://bugs.python.org/issue37026.
620 * However, don't do that when we're in interrupt or such as
621 * then we might recurse into our own processing, and get to
622 * even more waiting, and that's not good - it messes up the
623 * "what do I do next" and onstack event we use to know when
624 * to return from time_travel_update_time().
626 if (!irqs_disabled() && !in_interrupt() && !in_softirq())
627 time_travel_update_time(time_travel_time
+
630 return time_travel_time
/ TIMER_MULTIPLIER
;
633 return os_nsecs() / TIMER_MULTIPLIER
;
636 static struct clocksource timer_clocksource
= {
640 .mask
= CLOCKSOURCE_MASK(64),
641 .flags
= CLOCK_SOURCE_IS_CONTINUOUS
,
644 static void __init
um_timer_setup(void)
648 err
= request_irq(TIMER_IRQ
, um_timer
, IRQF_TIMER
, "hr timer", NULL
);
650 printk(KERN_ERR
"register_timer : request_irq failed - "
651 "errno = %d\n", -err
);
653 err
= os_timer_create();
655 printk(KERN_ERR
"creation of timer failed - errno = %d\n", -err
);
659 err
= clocksource_register_hz(&timer_clocksource
, NSEC_PER_SEC
/TIMER_MULTIPLIER
);
661 printk(KERN_ERR
"clocksource_register_hz returned %d\n", err
);
664 clockevents_register_device(&timer_clockevent
);
667 void read_persistent_clock64(struct timespec64
*ts
)
671 if (time_travel_start_set
)
672 nsecs
= time_travel_start
+ time_travel_time
;
673 else if (time_travel_mode
== TT_MODE_EXTERNAL
)
674 nsecs
= time_travel_ext_req(UM_TIMETRAVEL_GET_TOD
, -1);
676 nsecs
= os_persistent_clock_emulation();
678 set_normalized_timespec64(ts
, nsecs
/ NSEC_PER_SEC
,
679 nsecs
% NSEC_PER_SEC
);
682 void __init
time_init(void)
684 timer_set_signal_handler();
685 late_time_init
= um_timer_setup
;
688 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
689 unsigned long calibrate_delay_is_known(void)
691 if (time_travel_mode
== TT_MODE_INFCPU
||
692 time_travel_mode
== TT_MODE_EXTERNAL
)
697 int setup_time_travel(char *str
)
699 if (strcmp(str
, "=inf-cpu") == 0) {
700 time_travel_mode
= TT_MODE_INFCPU
;
701 timer_clockevent
.name
= "time-travel-timer-infcpu";
702 timer_clocksource
.name
= "time-travel-clock";
706 if (strncmp(str
, "=ext:", 5) == 0) {
707 time_travel_mode
= TT_MODE_EXTERNAL
;
708 timer_clockevent
.name
= "time-travel-timer-external";
709 timer_clocksource
.name
= "time-travel-clock-external";
710 return time_travel_connect_external(str
+ 5);
714 time_travel_mode
= TT_MODE_BASIC
;
715 timer_clockevent
.name
= "time-travel-timer";
716 timer_clocksource
.name
= "time-travel-clock";
723 __setup("time-travel", setup_time_travel
);
724 __uml_help(setup_time_travel
,
726 "This option just enables basic time travel mode, in which the clock/timers\n"
727 "inside the UML instance skip forward when there's nothing to do, rather than\n"
728 "waiting for real time to elapse. However, instance CPU speed is limited by\n"
729 "the real CPU speed, so e.g. a 10ms timer will always fire after ~10ms wall\n"
730 "clock (but quicker when there's nothing to do).\n"
732 "time-travel=inf-cpu\n"
733 "This enables time travel mode with infinite processing power, in which there\n"
734 "are no wall clock timers, and any CPU processing happens - as seen from the\n"
735 "guest - instantly. This can be useful for accurate simulation regardless of\n"
736 "debug overhead, physical CPU speed, etc. but is somewhat dangerous as it can\n"
737 "easily lead to getting stuck (e.g. if anything in the system busy loops).\n"
739 "time-travel=ext:[ID:]/path/to/socket\n"
740 "This enables time travel mode similar to =inf-cpu, except the system will\n"
741 "use the given socket to coordinate with a central scheduler, in order to\n"
742 "have more than one system simultaneously be on simulated time. The virtio\n"
743 "driver code in UML knows about this so you can also simulate networks and\n"
744 "devices using it, assuming the device has the right capabilities.\n"
745 "The optional ID is a 64-bit integer that's sent to the central scheduler.\n");
747 int setup_time_travel_start(char *str
)
751 err
= kstrtoull(str
, 0, &time_travel_start
);
755 time_travel_start_set
= 1;
759 __setup("time-travel-start", setup_time_travel_start
);
760 __uml_help(setup_time_travel_start
,
761 "time-travel-start=<seconds>\n"
762 "Configure the UML instance's wall clock to start at this value rather than\n"
763 "the host's wall clock at the time of UML boot.\n");