2 * s390host.c -- hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
15 #include <linux/compiler.h>
16 #include <linux/err.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/module.h>
22 #include <linux/slab.h>
23 #include <linux/timer.h>
24 #include <asm/lowcore.h>
25 #include <asm/pgtable.h>
30 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
32 struct kvm_stats_debugfs_item debugfs_entries
[] = {
33 { "userspace_handled", VCPU_STAT(exit_userspace
) },
34 { "exit_null", VCPU_STAT(exit_null
) },
35 { "exit_validity", VCPU_STAT(exit_validity
) },
36 { "exit_stop_request", VCPU_STAT(exit_stop_request
) },
37 { "exit_external_request", VCPU_STAT(exit_external_request
) },
38 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt
) },
39 { "exit_instruction", VCPU_STAT(exit_instruction
) },
40 { "exit_program_interruption", VCPU_STAT(exit_program_interruption
) },
41 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program
) },
42 { "instruction_lctg", VCPU_STAT(instruction_lctg
) },
43 { "instruction_lctl", VCPU_STAT(instruction_lctl
) },
44 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal
) },
45 { "deliver_service_signal", VCPU_STAT(deliver_service_signal
) },
46 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt
) },
47 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal
) },
48 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal
) },
49 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal
) },
50 { "deliver_program_interruption", VCPU_STAT(deliver_program_int
) },
51 { "exit_wait_state", VCPU_STAT(exit_wait_state
) },
52 { "instruction_stidp", VCPU_STAT(instruction_stidp
) },
53 { "instruction_spx", VCPU_STAT(instruction_spx
) },
54 { "instruction_stpx", VCPU_STAT(instruction_stpx
) },
55 { "instruction_stap", VCPU_STAT(instruction_stap
) },
56 { "instruction_storage_key", VCPU_STAT(instruction_storage_key
) },
57 { "instruction_stsch", VCPU_STAT(instruction_stsch
) },
58 { "instruction_chsc", VCPU_STAT(instruction_chsc
) },
59 { "instruction_stsi", VCPU_STAT(instruction_stsi
) },
60 { "instruction_stfl", VCPU_STAT(instruction_stfl
) },
61 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense
) },
62 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency
) },
63 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop
) },
64 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch
) },
65 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix
) },
66 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart
) },
67 { "diagnose_44", VCPU_STAT(diagnose_44
) },
72 /* Section: not file related */
73 void kvm_arch_hardware_enable(void *garbage
)
75 /* every s390 is virtualization enabled ;-) */
78 void kvm_arch_hardware_disable(void *garbage
)
82 void decache_vcpus_on_cpu(int cpu
)
86 int kvm_arch_hardware_setup(void)
91 void kvm_arch_hardware_unsetup(void)
95 void kvm_arch_check_processor_compat(void *rtn
)
99 int kvm_arch_init(void *opaque
)
104 void kvm_arch_exit(void)
108 /* Section: device related */
109 long kvm_arch_dev_ioctl(struct file
*filp
,
110 unsigned int ioctl
, unsigned long arg
)
112 if (ioctl
== KVM_S390_ENABLE_SIE
)
113 return s390_enable_sie();
117 int kvm_dev_ioctl_check_extension(long ext
)
122 /* Section: vm related */
124 * Get (and clear) the dirty memory log for a memory slot.
126 int kvm_vm_ioctl_get_dirty_log(struct kvm
*kvm
,
127 struct kvm_dirty_log
*log
)
132 long kvm_arch_vm_ioctl(struct file
*filp
,
133 unsigned int ioctl
, unsigned long arg
)
135 struct kvm
*kvm
= filp
->private_data
;
136 void __user
*argp
= (void __user
*)arg
;
140 case KVM_S390_INTERRUPT
: {
141 struct kvm_s390_interrupt s390int
;
144 if (copy_from_user(&s390int
, argp
, sizeof(s390int
)))
146 r
= kvm_s390_inject_vm(kvm
, &s390int
);
156 struct kvm
*kvm_arch_create_vm(void)
162 rc
= s390_enable_sie();
167 kvm
= kzalloc(sizeof(struct kvm
), GFP_KERNEL
);
171 kvm
->arch
.sca
= (struct sca_block
*) get_zeroed_page(GFP_KERNEL
);
175 sprintf(debug_name
, "kvm-%u", current
->pid
);
177 kvm
->arch
.dbf
= debug_register(debug_name
, 8, 2, 8 * sizeof(long));
181 spin_lock_init(&kvm
->arch
.float_int
.lock
);
182 INIT_LIST_HEAD(&kvm
->arch
.float_int
.list
);
184 debug_register_view(kvm
->arch
.dbf
, &debug_sprintf_view
);
185 VM_EVENT(kvm
, 3, "%s", "vm created");
187 try_module_get(THIS_MODULE
);
191 free_page((unsigned long)(kvm
->arch
.sca
));
198 void kvm_arch_destroy_vm(struct kvm
*kvm
)
200 debug_unregister(kvm
->arch
.dbf
);
201 free_page((unsigned long)(kvm
->arch
.sca
));
203 module_put(THIS_MODULE
);
206 /* Section: vcpu related */
207 int kvm_arch_vcpu_init(struct kvm_vcpu
*vcpu
)
212 void kvm_arch_vcpu_uninit(struct kvm_vcpu
*vcpu
)
214 /* kvm common code refers to this, but does'nt call it */
218 void kvm_arch_vcpu_load(struct kvm_vcpu
*vcpu
, int cpu
)
220 save_fp_regs(&vcpu
->arch
.host_fpregs
);
221 save_access_regs(vcpu
->arch
.host_acrs
);
222 vcpu
->arch
.guest_fpregs
.fpc
&= FPC_VALID_MASK
;
223 restore_fp_regs(&vcpu
->arch
.guest_fpregs
);
224 restore_access_regs(vcpu
->arch
.guest_acrs
);
227 void kvm_arch_vcpu_put(struct kvm_vcpu
*vcpu
)
229 save_fp_regs(&vcpu
->arch
.guest_fpregs
);
230 save_access_regs(vcpu
->arch
.guest_acrs
);
231 restore_fp_regs(&vcpu
->arch
.host_fpregs
);
232 restore_access_regs(vcpu
->arch
.host_acrs
);
235 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu
*vcpu
)
237 /* this equals initial cpu reset in pop, but we don't switch to ESA */
238 vcpu
->arch
.sie_block
->gpsw
.mask
= 0UL;
239 vcpu
->arch
.sie_block
->gpsw
.addr
= 0UL;
240 vcpu
->arch
.sie_block
->prefix
= 0UL;
241 vcpu
->arch
.sie_block
->ihcpu
= 0xffff;
242 vcpu
->arch
.sie_block
->cputm
= 0UL;
243 vcpu
->arch
.sie_block
->ckc
= 0UL;
244 vcpu
->arch
.sie_block
->todpr
= 0;
245 memset(vcpu
->arch
.sie_block
->gcr
, 0, 16 * sizeof(__u64
));
246 vcpu
->arch
.sie_block
->gcr
[0] = 0xE0UL
;
247 vcpu
->arch
.sie_block
->gcr
[14] = 0xC2000000UL
;
248 vcpu
->arch
.guest_fpregs
.fpc
= 0;
249 asm volatile("lfpc %0" : : "Q" (vcpu
->arch
.guest_fpregs
.fpc
));
250 vcpu
->arch
.sie_block
->gbea
= 1;
253 int kvm_arch_vcpu_setup(struct kvm_vcpu
*vcpu
)
255 atomic_set(&vcpu
->arch
.sie_block
->cpuflags
, CPUSTAT_ZARCH
);
256 vcpu
->arch
.sie_block
->gmslm
= 0xffffffffffUL
;
257 vcpu
->arch
.sie_block
->gmsor
= 0x000000000000;
258 vcpu
->arch
.sie_block
->ecb
= 2;
259 vcpu
->arch
.sie_block
->eca
= 0xC1002001U
;
260 setup_timer(&vcpu
->arch
.ckc_timer
, kvm_s390_idle_wakeup
,
261 (unsigned long) vcpu
);
262 get_cpu_id(&vcpu
->arch
.cpu_id
);
263 vcpu
->arch
.cpu_id
.version
= 0xfe;
267 struct kvm_vcpu
*kvm_arch_vcpu_create(struct kvm
*kvm
,
270 struct kvm_vcpu
*vcpu
= kzalloc(sizeof(struct kvm_vcpu
), GFP_KERNEL
);
276 vcpu
->arch
.sie_block
= (struct sie_block
*) get_zeroed_page(GFP_KERNEL
);
278 if (!vcpu
->arch
.sie_block
)
281 vcpu
->arch
.sie_block
->icpua
= id
;
282 BUG_ON(!kvm
->arch
.sca
);
283 BUG_ON(kvm
->arch
.sca
->cpu
[id
].sda
);
284 kvm
->arch
.sca
->cpu
[id
].sda
= (__u64
) vcpu
->arch
.sie_block
;
285 vcpu
->arch
.sie_block
->scaoh
= (__u32
)(((__u64
)kvm
->arch
.sca
) >> 32);
286 vcpu
->arch
.sie_block
->scaol
= (__u32
)(__u64
)kvm
->arch
.sca
;
288 spin_lock_init(&vcpu
->arch
.local_int
.lock
);
289 INIT_LIST_HEAD(&vcpu
->arch
.local_int
.list
);
290 vcpu
->arch
.local_int
.float_int
= &kvm
->arch
.float_int
;
291 spin_lock_bh(&kvm
->arch
.float_int
.lock
);
292 kvm
->arch
.float_int
.local_int
[id
] = &vcpu
->arch
.local_int
;
293 init_waitqueue_head(&vcpu
->arch
.local_int
.wq
);
294 vcpu
->arch
.local_int
.cpuflags
= &vcpu
->arch
.sie_block
->cpuflags
;
295 spin_unlock_bh(&kvm
->arch
.float_int
.lock
);
297 rc
= kvm_vcpu_init(vcpu
, kvm
, id
);
300 VM_EVENT(kvm
, 3, "create cpu %d at %p, sie block at %p", id
, vcpu
,
301 vcpu
->arch
.sie_block
);
303 try_module_get(THIS_MODULE
);
312 void kvm_arch_vcpu_destroy(struct kvm_vcpu
*vcpu
)
314 VCPU_EVENT(vcpu
, 3, "%s", "destroy cpu");
315 free_page((unsigned long)(vcpu
->arch
.sie_block
));
317 module_put(THIS_MODULE
);
320 int kvm_arch_vcpu_runnable(struct kvm_vcpu
*vcpu
)
322 /* kvm common code refers to this, but never calls it */
327 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu
*vcpu
)
330 kvm_s390_vcpu_initial_reset(vcpu
);
335 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu
*vcpu
, struct kvm_regs
*regs
)
338 memcpy(&vcpu
->arch
.guest_gprs
, ®s
->gprs
, sizeof(regs
->gprs
));
343 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu
*vcpu
, struct kvm_regs
*regs
)
346 memcpy(®s
->gprs
, &vcpu
->arch
.guest_gprs
, sizeof(regs
->gprs
));
351 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu
*vcpu
,
352 struct kvm_sregs
*sregs
)
355 memcpy(&vcpu
->arch
.guest_acrs
, &sregs
->acrs
, sizeof(sregs
->acrs
));
356 memcpy(&vcpu
->arch
.sie_block
->gcr
, &sregs
->crs
, sizeof(sregs
->crs
));
361 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu
*vcpu
,
362 struct kvm_sregs
*sregs
)
365 memcpy(&sregs
->acrs
, &vcpu
->arch
.guest_acrs
, sizeof(sregs
->acrs
));
366 memcpy(&sregs
->crs
, &vcpu
->arch
.sie_block
->gcr
, sizeof(sregs
->crs
));
371 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu
*vcpu
, struct kvm_fpu
*fpu
)
374 memcpy(&vcpu
->arch
.guest_fpregs
.fprs
, &fpu
->fprs
, sizeof(fpu
->fprs
));
375 vcpu
->arch
.guest_fpregs
.fpc
= fpu
->fpc
;
380 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu
*vcpu
, struct kvm_fpu
*fpu
)
383 memcpy(&fpu
->fprs
, &vcpu
->arch
.guest_fpregs
.fprs
, sizeof(fpu
->fprs
));
384 fpu
->fpc
= vcpu
->arch
.guest_fpregs
.fpc
;
389 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu
*vcpu
, psw_t psw
)
394 if (atomic_read(&vcpu
->arch
.sie_block
->cpuflags
) & CPUSTAT_RUNNING
)
397 vcpu
->arch
.sie_block
->gpsw
= psw
;
402 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu
*vcpu
,
403 struct kvm_translation
*tr
)
405 return -EINVAL
; /* not implemented yet */
408 int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu
*vcpu
,
409 struct kvm_debug_guest
*dbg
)
411 return -EINVAL
; /* not implemented yet */
414 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu
*vcpu
,
415 struct kvm_mp_state
*mp_state
)
417 return -EINVAL
; /* not implemented yet */
420 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu
*vcpu
,
421 struct kvm_mp_state
*mp_state
)
423 return -EINVAL
; /* not implemented yet */
426 static void __vcpu_run(struct kvm_vcpu
*vcpu
)
428 memcpy(&vcpu
->arch
.sie_block
->gg14
, &vcpu
->arch
.guest_gprs
[14], 16);
433 vcpu
->arch
.sie_block
->icptcode
= 0;
437 VCPU_EVENT(vcpu
, 6, "entering sie flags %x",
438 atomic_read(&vcpu
->arch
.sie_block
->cpuflags
));
439 sie64a(vcpu
->arch
.sie_block
, vcpu
->arch
.guest_gprs
);
440 VCPU_EVENT(vcpu
, 6, "exit sie icptcode %d",
441 vcpu
->arch
.sie_block
->icptcode
);
446 memcpy(&vcpu
->arch
.guest_gprs
[14], &vcpu
->arch
.sie_block
->gg14
, 16);
449 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu
*vcpu
, struct kvm_run
*kvm_run
)
456 if (vcpu
->sigset_active
)
457 sigprocmask(SIG_SETMASK
, &vcpu
->sigset
, &sigsaved
);
459 atomic_set_mask(CPUSTAT_RUNNING
, &vcpu
->arch
.sie_block
->cpuflags
);
461 BUG_ON(vcpu
->kvm
->arch
.float_int
.local_int
[vcpu
->vcpu_id
] == NULL
);
463 switch (kvm_run
->exit_reason
) {
464 case KVM_EXIT_S390_SIEIC
:
465 vcpu
->arch
.sie_block
->gpsw
.mask
= kvm_run
->s390_sieic
.mask
;
466 vcpu
->arch
.sie_block
->gpsw
.addr
= kvm_run
->s390_sieic
.addr
;
468 case KVM_EXIT_UNKNOWN
:
469 case KVM_EXIT_S390_RESET
:
478 kvm_s390_deliver_pending_interrupts(vcpu
);
480 rc
= kvm_handle_sie_intercept(vcpu
);
481 } while (!signal_pending(current
) && !rc
);
483 if (signal_pending(current
) && !rc
)
486 if (rc
== -ENOTSUPP
) {
487 /* intercept cannot be handled in-kernel, prepare kvm-run */
488 kvm_run
->exit_reason
= KVM_EXIT_S390_SIEIC
;
489 kvm_run
->s390_sieic
.icptcode
= vcpu
->arch
.sie_block
->icptcode
;
490 kvm_run
->s390_sieic
.mask
= vcpu
->arch
.sie_block
->gpsw
.mask
;
491 kvm_run
->s390_sieic
.addr
= vcpu
->arch
.sie_block
->gpsw
.addr
;
492 kvm_run
->s390_sieic
.ipa
= vcpu
->arch
.sie_block
->ipa
;
493 kvm_run
->s390_sieic
.ipb
= vcpu
->arch
.sie_block
->ipb
;
497 if (rc
== -EREMOTE
) {
498 /* intercept was handled, but userspace support is needed
499 * kvm_run has been prepared by the handler */
503 if (vcpu
->sigset_active
)
504 sigprocmask(SIG_SETMASK
, &sigsaved
, NULL
);
508 vcpu
->stat
.exit_userspace
++;
512 static int __guestcopy(struct kvm_vcpu
*vcpu
, u64 guestdest
, const void *from
,
513 unsigned long n
, int prefix
)
516 return copy_to_guest(vcpu
, guestdest
, from
, n
);
518 return copy_to_guest_absolute(vcpu
, guestdest
, from
, n
);
522 * store status at address
523 * we use have two special cases:
524 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
525 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
527 int __kvm_s390_vcpu_store_status(struct kvm_vcpu
*vcpu
, unsigned long addr
)
529 const unsigned char archmode
= 1;
532 if (addr
== KVM_S390_STORE_STATUS_NOADDR
) {
533 if (copy_to_guest_absolute(vcpu
, 163ul, &archmode
, 1))
535 addr
= SAVE_AREA_BASE
;
537 } else if (addr
== KVM_S390_STORE_STATUS_PREFIXED
) {
538 if (copy_to_guest(vcpu
, 163ul, &archmode
, 1))
540 addr
= SAVE_AREA_BASE
;
545 if (__guestcopy(vcpu
, addr
+ offsetof(struct save_area_s390x
, fp_regs
),
546 vcpu
->arch
.guest_fpregs
.fprs
, 128, prefix
))
549 if (__guestcopy(vcpu
, addr
+ offsetof(struct save_area_s390x
, gp_regs
),
550 vcpu
->arch
.guest_gprs
, 128, prefix
))
553 if (__guestcopy(vcpu
, addr
+ offsetof(struct save_area_s390x
, psw
),
554 &vcpu
->arch
.sie_block
->gpsw
, 16, prefix
))
557 if (__guestcopy(vcpu
, addr
+ offsetof(struct save_area_s390x
, pref_reg
),
558 &vcpu
->arch
.sie_block
->prefix
, 4, prefix
))
561 if (__guestcopy(vcpu
,
562 addr
+ offsetof(struct save_area_s390x
, fp_ctrl_reg
),
563 &vcpu
->arch
.guest_fpregs
.fpc
, 4, prefix
))
566 if (__guestcopy(vcpu
, addr
+ offsetof(struct save_area_s390x
, tod_reg
),
567 &vcpu
->arch
.sie_block
->todpr
, 4, prefix
))
570 if (__guestcopy(vcpu
, addr
+ offsetof(struct save_area_s390x
, timer
),
571 &vcpu
->arch
.sie_block
->cputm
, 8, prefix
))
574 if (__guestcopy(vcpu
, addr
+ offsetof(struct save_area_s390x
, clk_cmp
),
575 &vcpu
->arch
.sie_block
->ckc
, 8, prefix
))
578 if (__guestcopy(vcpu
, addr
+ offsetof(struct save_area_s390x
, acc_regs
),
579 &vcpu
->arch
.guest_acrs
, 64, prefix
))
582 if (__guestcopy(vcpu
,
583 addr
+ offsetof(struct save_area_s390x
, ctrl_regs
),
584 &vcpu
->arch
.sie_block
->gcr
, 128, prefix
))
589 static int kvm_s390_vcpu_store_status(struct kvm_vcpu
*vcpu
, unsigned long addr
)
594 rc
= __kvm_s390_vcpu_store_status(vcpu
, addr
);
599 long kvm_arch_vcpu_ioctl(struct file
*filp
,
600 unsigned int ioctl
, unsigned long arg
)
602 struct kvm_vcpu
*vcpu
= filp
->private_data
;
603 void __user
*argp
= (void __user
*)arg
;
606 case KVM_S390_INTERRUPT
: {
607 struct kvm_s390_interrupt s390int
;
609 if (copy_from_user(&s390int
, argp
, sizeof(s390int
)))
611 return kvm_s390_inject_vcpu(vcpu
, &s390int
);
613 case KVM_S390_STORE_STATUS
:
614 return kvm_s390_vcpu_store_status(vcpu
, arg
);
615 case KVM_S390_SET_INITIAL_PSW
: {
618 if (copy_from_user(&psw
, argp
, sizeof(psw
)))
620 return kvm_arch_vcpu_ioctl_set_initial_psw(vcpu
, psw
);
622 case KVM_S390_INITIAL_RESET
:
623 return kvm_arch_vcpu_ioctl_initial_reset(vcpu
);
630 /* Section: memory related */
631 int kvm_arch_set_memory_region(struct kvm
*kvm
,
632 struct kvm_userspace_memory_region
*mem
,
633 struct kvm_memory_slot old
,
636 /* A few sanity checks. We can have exactly one memory slot which has
637 to start at guest virtual zero and which has to be located at a
638 page boundary in userland and which has to end at a page boundary.
639 The memory in userland is ok to be fragmented into various different
640 vmas. It is okay to mmap() and munmap() stuff in this slot after
641 doing this call at any time */
646 if (mem
->guest_phys_addr
)
649 if (mem
->userspace_addr
& (PAGE_SIZE
- 1))
652 if (mem
->memory_size
& (PAGE_SIZE
- 1))
655 kvm
->arch
.guest_origin
= mem
->userspace_addr
;
656 kvm
->arch
.guest_memsize
= mem
->memory_size
;
658 /* FIXME: we do want to interrupt running CPUs and update their memory
659 configuration now to avoid race conditions. But hey, changing the
660 memory layout while virtual CPUs are running is usually bad
661 programming practice. */
666 gfn_t
unalias_gfn(struct kvm
*kvm
, gfn_t gfn
)
671 static int __init
kvm_s390_init(void)
673 return kvm_init(NULL
, sizeof(struct kvm_vcpu
), THIS_MODULE
);
676 static void __exit
kvm_s390_exit(void)
681 module_init(kvm_s390_init
);
682 module_exit(kvm_s390_exit
);