2 * Just as userspace programs request kernel operations through a system
3 * call, the Guest requests Host operations through a "hypercall". You might
4 * notice this nomenclature doesn't really follow any logic, but the name has
5 * been around for long enough that we're stuck with it. As you'd expect, this
6 * code is basically a one big switch statement.
9 /* Copyright (C) 2006 Rusty Russell IBM Corporation
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2 of the License, or
14 (at your option) any later version.
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, write to the Free Software
23 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 #include <linux/uaccess.h>
26 #include <linux/syscalls.h>
28 #include <linux/ktime.h>
30 #include <asm/pgtable.h>
34 * This is the core hypercall routine: where the Guest gets what it wants.
35 * Or gets killed. Or, in the case of LHCALL_SHUTDOWN, both.
37 static void do_hcall(struct lg_cpu
*cpu
, struct hcall_args
*args
)
40 case LHCALL_FLUSH_ASYNC
:
42 * This call does nothing, except by breaking out of the Guest
43 * it makes us process all the asynchronous hypercalls.
46 case LHCALL_SEND_INTERRUPTS
:
48 * This call does nothing too, but by breaking out of the Guest
49 * it makes us process any pending interrupts.
52 case LHCALL_LGUEST_INIT
:
54 * You can't get here unless you're already initialized. Don't
57 kill_guest(cpu
, "already have lguest_data");
59 case LHCALL_SHUTDOWN
: {
62 * Shutdown is such a trivial hypercall that we do it in five
65 * If the lgread fails, it will call kill_guest() itself; the
66 * kill_guest() with the message will be ignored.
68 __lgread(cpu
, msg
, args
->arg1
, sizeof(msg
));
69 msg
[sizeof(msg
)-1] = '\0';
70 kill_guest(cpu
, "CRASH: %s", msg
);
71 if (args
->arg2
== LGUEST_SHUTDOWN_RESTART
)
72 cpu
->lg
->dead
= ERR_PTR(-ERESTART
);
75 case LHCALL_FLUSH_TLB
:
76 /* FLUSH_TLB comes in two flavors, depending on the argument: */
78 guest_pagetable_clear_all(cpu
);
80 guest_pagetable_flush_user(cpu
);
84 * All these calls simply pass the arguments through to the right
87 case LHCALL_NEW_PGTABLE
:
88 guest_new_pagetable(cpu
, args
->arg1
);
90 case LHCALL_SET_STACK
:
91 guest_set_stack(cpu
, args
->arg1
, args
->arg2
, args
->arg3
);
95 guest_set_pte(cpu
, args
->arg1
, args
->arg2
,
96 __pte(args
->arg3
| (u64
)args
->arg4
<< 32));
98 guest_set_pte(cpu
, args
->arg1
, args
->arg2
, __pte(args
->arg3
));
102 guest_set_pgd(cpu
->lg
, args
->arg1
, args
->arg2
);
104 #ifdef CONFIG_X86_PAE
106 guest_set_pmd(cpu
->lg
, args
->arg1
, args
->arg2
);
109 case LHCALL_SET_CLOCKEVENT
:
110 guest_set_clockevent(cpu
, args
->arg1
);
113 /* Similarly, this sets the halted flag for run_guest(). */
117 /* It should be an architecture-specific hypercall. */
118 if (lguest_arch_do_hcall(cpu
, args
))
119 kill_guest(cpu
, "Bad hypercall %li\n", args
->arg0
);
124 * Asynchronous hypercalls are easy: we just look in the array in the
125 * Guest's "struct lguest_data" to see if any new ones are marked "ready".
127 * We are careful to do these in order: obviously we respect the order the
128 * Guest put them in the ring, but we also promise the Guest that they will
129 * happen before any normal hypercall (which is why we check this before
130 * checking for a normal hcall).
132 static void do_async_hcalls(struct lg_cpu
*cpu
)
135 u8 st
[LHCALL_RING_SIZE
];
137 /* For simplicity, we copy the entire call status array in at once. */
138 if (copy_from_user(&st
, &cpu
->lg
->lguest_data
->hcall_status
, sizeof(st
)))
141 /* We process "struct lguest_data"s hcalls[] ring once. */
142 for (i
= 0; i
< ARRAY_SIZE(st
); i
++) {
143 struct hcall_args args
;
145 * We remember where we were up to from last time. This makes
146 * sure that the hypercalls are done in the order the Guest
147 * places them in the ring.
149 unsigned int n
= cpu
->next_hcall
;
151 /* 0xFF means there's no call here (yet). */
156 * OK, we have hypercall. Increment the "next_hcall" cursor,
157 * and wrap back to 0 if we reach the end.
159 if (++cpu
->next_hcall
== LHCALL_RING_SIZE
)
163 * Copy the hypercall arguments into a local copy of the
166 if (copy_from_user(&args
, &cpu
->lg
->lguest_data
->hcalls
[n
],
167 sizeof(struct hcall_args
))) {
168 kill_guest(cpu
, "Fetching async hypercalls");
172 /* Do the hypercall, same as a normal one. */
173 do_hcall(cpu
, &args
);
175 /* Mark the hypercall done. */
176 if (put_user(0xFF, &cpu
->lg
->lguest_data
->hcall_status
[n
])) {
177 kill_guest(cpu
, "Writing result for async hypercall");
182 * Stop doing hypercalls if they want to notify the Launcher:
183 * it needs to service this first.
185 if (cpu
->pending
.trap
)
191 * Last of all, we look at what happens first of all. The very first time the
192 * Guest makes a hypercall, we end up here to set things up:
194 static void initialize(struct lg_cpu
*cpu
)
197 * You can't do anything until you're initialized. The Guest knows the
198 * rules, so we're unforgiving here.
200 if (cpu
->hcall
->arg0
!= LHCALL_LGUEST_INIT
) {
201 kill_guest(cpu
, "hypercall %li before INIT", cpu
->hcall
->arg0
);
205 if (lguest_arch_init_hypercalls(cpu
))
206 kill_guest(cpu
, "bad guest page %p", cpu
->lg
->lguest_data
);
209 * The Guest tells us where we're not to deliver interrupts by putting
210 * the instruction address into "struct lguest_data".
212 if (get_user(cpu
->lg
->noirq_iret
, &cpu
->lg
->lguest_data
->noirq_iret
))
213 kill_guest(cpu
, "bad guest page %p", cpu
->lg
->lguest_data
);
216 * We write the current time into the Guest's data page once so it can
219 write_timestamp(cpu
);
221 /* page_tables.c will also do some setup. */
222 page_table_guest_data_init(cpu
);
225 * This is the one case where the above accesses might have been the
226 * first write to a Guest page. This may have caused a copy-on-write
227 * fault, but the old page might be (read-only) in the Guest
230 guest_pagetable_clear_all(cpu
);
235 * If a Guest reads from a page (so creates a mapping) that it has never
236 * written to, and then the Launcher writes to it (ie. the output of a virtual
237 * device), the Guest will still see the old page. In practice, this never
238 * happens: why would the Guest read a page which it has never written to? But
239 * a similar scenario might one day bite us, so it's worth mentioning.
241 * Note that if we used a shared anonymous mapping in the Launcher instead of
242 * mapping /dev/zero private, we wouldn't worry about cop-on-write. And we
243 * need that to switch the Launcher to processes (away from threads) anyway.
249 * Remember from the Guest, hypercalls come in two flavors: normal and
250 * asynchronous. This file handles both of types.
252 void do_hypercalls(struct lg_cpu
*cpu
)
254 /* Not initialized yet? This hypercall must do it. */
255 if (unlikely(!cpu
->lg
->lguest_data
)) {
256 /* Set up the "struct lguest_data" */
264 * The Guest has initialized.
266 * Look in the hypercall ring for the async hypercalls:
268 do_async_hcalls(cpu
);
271 * If we stopped reading the hypercall ring because the Guest did a
272 * NOTIFY to the Launcher, we want to return now. Otherwise we do
275 if (!cpu
->pending
.trap
) {
276 do_hcall(cpu
, cpu
->hcall
);
278 * Tricky point: we reset the hcall pointer to mark the
279 * hypercall as "done". We use the hcall pointer rather than
280 * the trap number to indicate a hypercall is pending.
281 * Normally it doesn't matter: the Guest will run again and
282 * update the trap number before we come back here.
284 * However, if we are signalled or the Guest sends I/O to the
285 * Launcher, the run_guest() loop will exit without running the
286 * Guest. When it comes back it would try to re-run the
287 * hypercall. Finding that bug sucked.
294 * This routine supplies the Guest with time: it's used for wallclock time at
295 * initial boot and as a rough time source if the TSC isn't available.
297 void write_timestamp(struct lg_cpu
*cpu
)
300 ktime_get_real_ts(&now
);
301 if (copy_to_user(&cpu
->lg
->lguest_data
->time
,
302 &now
, sizeof(struct timespec
)))
303 kill_guest(cpu
, "Writing timestamp");