Merge tag 'powerpc-4.6-4' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc...
[linux/fpc-iii.git] / arch / s390 / kvm / kvm-s390.c
blob668c087513e597158a16b20d79beaf7d0cd694d0
1 /*
2 * hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008, 2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/module.h>
25 #include <linux/random.h>
26 #include <linux/slab.h>
27 #include <linux/timer.h>
28 #include <linux/vmalloc.h>
29 #include <asm/asm-offsets.h>
30 #include <asm/lowcore.h>
31 #include <asm/etr.h>
32 #include <asm/pgtable.h>
33 #include <asm/gmap.h>
34 #include <asm/nmi.h>
35 #include <asm/switch_to.h>
36 #include <asm/isc.h>
37 #include <asm/sclp.h>
38 #include "kvm-s390.h"
39 #include "gaccess.h"
41 #define KMSG_COMPONENT "kvm-s390"
42 #undef pr_fmt
43 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
45 #define CREATE_TRACE_POINTS
46 #include "trace.h"
47 #include "trace-s390.h"
49 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
50 #define LOCAL_IRQS 32
51 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
52 (KVM_MAX_VCPUS + LOCAL_IRQS))
54 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
56 struct kvm_stats_debugfs_item debugfs_entries[] = {
57 { "userspace_handled", VCPU_STAT(exit_userspace) },
58 { "exit_null", VCPU_STAT(exit_null) },
59 { "exit_validity", VCPU_STAT(exit_validity) },
60 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
61 { "exit_external_request", VCPU_STAT(exit_external_request) },
62 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
63 { "exit_instruction", VCPU_STAT(exit_instruction) },
64 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
65 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
66 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
67 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
68 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
69 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
70 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
71 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
72 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
73 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
74 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
75 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
76 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
77 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
78 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
79 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
80 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
81 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
82 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
83 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
84 { "instruction_spx", VCPU_STAT(instruction_spx) },
85 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
86 { "instruction_stap", VCPU_STAT(instruction_stap) },
87 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
88 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
89 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
90 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
91 { "instruction_essa", VCPU_STAT(instruction_essa) },
92 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
93 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
94 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
95 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
96 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
97 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
98 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
99 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
100 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
101 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
102 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
103 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
104 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
105 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
106 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
107 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
108 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
109 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
110 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
111 { "diagnose_10", VCPU_STAT(diagnose_10) },
112 { "diagnose_44", VCPU_STAT(diagnose_44) },
113 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
114 { "diagnose_258", VCPU_STAT(diagnose_258) },
115 { "diagnose_308", VCPU_STAT(diagnose_308) },
116 { "diagnose_500", VCPU_STAT(diagnose_500) },
117 { NULL }
120 /* upper facilities limit for kvm */
121 unsigned long kvm_s390_fac_list_mask[] = {
122 0xffe6fffbfcfdfc40UL,
123 0x005e800000000000UL,
126 unsigned long kvm_s390_fac_list_mask_size(void)
128 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
129 return ARRAY_SIZE(kvm_s390_fac_list_mask);
132 static struct gmap_notifier gmap_notifier;
133 debug_info_t *kvm_s390_dbf;
135 /* Section: not file related */
136 int kvm_arch_hardware_enable(void)
138 /* every s390 is virtualization enabled ;-) */
139 return 0;
142 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
145 * This callback is executed during stop_machine(). All CPUs are therefore
146 * temporarily stopped. In order not to change guest behavior, we have to
147 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
148 * so a CPU won't be stopped while calculating with the epoch.
150 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
151 void *v)
153 struct kvm *kvm;
154 struct kvm_vcpu *vcpu;
155 int i;
156 unsigned long long *delta = v;
158 list_for_each_entry(kvm, &vm_list, vm_list) {
159 kvm->arch.epoch -= *delta;
160 kvm_for_each_vcpu(i, vcpu, kvm) {
161 vcpu->arch.sie_block->epoch -= *delta;
162 if (vcpu->arch.cputm_enabled)
163 vcpu->arch.cputm_start += *delta;
166 return NOTIFY_OK;
169 static struct notifier_block kvm_clock_notifier = {
170 .notifier_call = kvm_clock_sync,
173 int kvm_arch_hardware_setup(void)
175 gmap_notifier.notifier_call = kvm_gmap_notifier;
176 gmap_register_ipte_notifier(&gmap_notifier);
177 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
178 &kvm_clock_notifier);
179 return 0;
182 void kvm_arch_hardware_unsetup(void)
184 gmap_unregister_ipte_notifier(&gmap_notifier);
185 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
186 &kvm_clock_notifier);
189 int kvm_arch_init(void *opaque)
191 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
192 if (!kvm_s390_dbf)
193 return -ENOMEM;
195 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
196 debug_unregister(kvm_s390_dbf);
197 return -ENOMEM;
200 /* Register floating interrupt controller interface. */
201 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
204 void kvm_arch_exit(void)
206 debug_unregister(kvm_s390_dbf);
209 /* Section: device related */
210 long kvm_arch_dev_ioctl(struct file *filp,
211 unsigned int ioctl, unsigned long arg)
213 if (ioctl == KVM_S390_ENABLE_SIE)
214 return s390_enable_sie();
215 return -EINVAL;
218 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
220 int r;
222 switch (ext) {
223 case KVM_CAP_S390_PSW:
224 case KVM_CAP_S390_GMAP:
225 case KVM_CAP_SYNC_MMU:
226 #ifdef CONFIG_KVM_S390_UCONTROL
227 case KVM_CAP_S390_UCONTROL:
228 #endif
229 case KVM_CAP_ASYNC_PF:
230 case KVM_CAP_SYNC_REGS:
231 case KVM_CAP_ONE_REG:
232 case KVM_CAP_ENABLE_CAP:
233 case KVM_CAP_S390_CSS_SUPPORT:
234 case KVM_CAP_IOEVENTFD:
235 case KVM_CAP_DEVICE_CTRL:
236 case KVM_CAP_ENABLE_CAP_VM:
237 case KVM_CAP_S390_IRQCHIP:
238 case KVM_CAP_VM_ATTRIBUTES:
239 case KVM_CAP_MP_STATE:
240 case KVM_CAP_S390_INJECT_IRQ:
241 case KVM_CAP_S390_USER_SIGP:
242 case KVM_CAP_S390_USER_STSI:
243 case KVM_CAP_S390_SKEYS:
244 case KVM_CAP_S390_IRQ_STATE:
245 r = 1;
246 break;
247 case KVM_CAP_S390_MEM_OP:
248 r = MEM_OP_MAX_SIZE;
249 break;
250 case KVM_CAP_NR_VCPUS:
251 case KVM_CAP_MAX_VCPUS:
252 r = sclp.has_esca ? KVM_S390_ESCA_CPU_SLOTS
253 : KVM_S390_BSCA_CPU_SLOTS;
254 break;
255 case KVM_CAP_NR_MEMSLOTS:
256 r = KVM_USER_MEM_SLOTS;
257 break;
258 case KVM_CAP_S390_COW:
259 r = MACHINE_HAS_ESOP;
260 break;
261 case KVM_CAP_S390_VECTOR_REGISTERS:
262 r = MACHINE_HAS_VX;
263 break;
264 case KVM_CAP_S390_RI:
265 r = test_facility(64);
266 break;
267 default:
268 r = 0;
270 return r;
273 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
274 struct kvm_memory_slot *memslot)
276 gfn_t cur_gfn, last_gfn;
277 unsigned long address;
278 struct gmap *gmap = kvm->arch.gmap;
280 /* Loop over all guest pages */
281 last_gfn = memslot->base_gfn + memslot->npages;
282 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
283 address = gfn_to_hva_memslot(memslot, cur_gfn);
285 if (test_and_clear_guest_dirty(gmap->mm, address))
286 mark_page_dirty(kvm, cur_gfn);
287 if (fatal_signal_pending(current))
288 return;
289 cond_resched();
293 /* Section: vm related */
294 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
297 * Get (and clear) the dirty memory log for a memory slot.
299 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
300 struct kvm_dirty_log *log)
302 int r;
303 unsigned long n;
304 struct kvm_memslots *slots;
305 struct kvm_memory_slot *memslot;
306 int is_dirty = 0;
308 mutex_lock(&kvm->slots_lock);
310 r = -EINVAL;
311 if (log->slot >= KVM_USER_MEM_SLOTS)
312 goto out;
314 slots = kvm_memslots(kvm);
315 memslot = id_to_memslot(slots, log->slot);
316 r = -ENOENT;
317 if (!memslot->dirty_bitmap)
318 goto out;
320 kvm_s390_sync_dirty_log(kvm, memslot);
321 r = kvm_get_dirty_log(kvm, log, &is_dirty);
322 if (r)
323 goto out;
325 /* Clear the dirty log */
326 if (is_dirty) {
327 n = kvm_dirty_bitmap_bytes(memslot);
328 memset(memslot->dirty_bitmap, 0, n);
330 r = 0;
331 out:
332 mutex_unlock(&kvm->slots_lock);
333 return r;
336 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
338 int r;
340 if (cap->flags)
341 return -EINVAL;
343 switch (cap->cap) {
344 case KVM_CAP_S390_IRQCHIP:
345 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
346 kvm->arch.use_irqchip = 1;
347 r = 0;
348 break;
349 case KVM_CAP_S390_USER_SIGP:
350 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
351 kvm->arch.user_sigp = 1;
352 r = 0;
353 break;
354 case KVM_CAP_S390_VECTOR_REGISTERS:
355 mutex_lock(&kvm->lock);
356 if (atomic_read(&kvm->online_vcpus)) {
357 r = -EBUSY;
358 } else if (MACHINE_HAS_VX) {
359 set_kvm_facility(kvm->arch.model.fac_mask, 129);
360 set_kvm_facility(kvm->arch.model.fac_list, 129);
361 r = 0;
362 } else
363 r = -EINVAL;
364 mutex_unlock(&kvm->lock);
365 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
366 r ? "(not available)" : "(success)");
367 break;
368 case KVM_CAP_S390_RI:
369 r = -EINVAL;
370 mutex_lock(&kvm->lock);
371 if (atomic_read(&kvm->online_vcpus)) {
372 r = -EBUSY;
373 } else if (test_facility(64)) {
374 set_kvm_facility(kvm->arch.model.fac_mask, 64);
375 set_kvm_facility(kvm->arch.model.fac_list, 64);
376 r = 0;
378 mutex_unlock(&kvm->lock);
379 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
380 r ? "(not available)" : "(success)");
381 break;
382 case KVM_CAP_S390_USER_STSI:
383 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
384 kvm->arch.user_stsi = 1;
385 r = 0;
386 break;
387 default:
388 r = -EINVAL;
389 break;
391 return r;
394 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
396 int ret;
398 switch (attr->attr) {
399 case KVM_S390_VM_MEM_LIMIT_SIZE:
400 ret = 0;
401 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
402 kvm->arch.mem_limit);
403 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
404 ret = -EFAULT;
405 break;
406 default:
407 ret = -ENXIO;
408 break;
410 return ret;
413 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
415 int ret;
416 unsigned int idx;
417 switch (attr->attr) {
418 case KVM_S390_VM_MEM_ENABLE_CMMA:
419 /* enable CMMA only for z10 and later (EDAT_1) */
420 ret = -EINVAL;
421 if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
422 break;
424 ret = -EBUSY;
425 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
426 mutex_lock(&kvm->lock);
427 if (atomic_read(&kvm->online_vcpus) == 0) {
428 kvm->arch.use_cmma = 1;
429 ret = 0;
431 mutex_unlock(&kvm->lock);
432 break;
433 case KVM_S390_VM_MEM_CLR_CMMA:
434 ret = -EINVAL;
435 if (!kvm->arch.use_cmma)
436 break;
438 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
439 mutex_lock(&kvm->lock);
440 idx = srcu_read_lock(&kvm->srcu);
441 s390_reset_cmma(kvm->arch.gmap->mm);
442 srcu_read_unlock(&kvm->srcu, idx);
443 mutex_unlock(&kvm->lock);
444 ret = 0;
445 break;
446 case KVM_S390_VM_MEM_LIMIT_SIZE: {
447 unsigned long new_limit;
449 if (kvm_is_ucontrol(kvm))
450 return -EINVAL;
452 if (get_user(new_limit, (u64 __user *)attr->addr))
453 return -EFAULT;
455 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
456 new_limit > kvm->arch.mem_limit)
457 return -E2BIG;
459 if (!new_limit)
460 return -EINVAL;
462 /* gmap_alloc takes last usable address */
463 if (new_limit != KVM_S390_NO_MEM_LIMIT)
464 new_limit -= 1;
466 ret = -EBUSY;
467 mutex_lock(&kvm->lock);
468 if (atomic_read(&kvm->online_vcpus) == 0) {
469 /* gmap_alloc will round the limit up */
470 struct gmap *new = gmap_alloc(current->mm, new_limit);
472 if (!new) {
473 ret = -ENOMEM;
474 } else {
475 gmap_free(kvm->arch.gmap);
476 new->private = kvm;
477 kvm->arch.gmap = new;
478 ret = 0;
481 mutex_unlock(&kvm->lock);
482 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
483 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
484 (void *) kvm->arch.gmap->asce);
485 break;
487 default:
488 ret = -ENXIO;
489 break;
491 return ret;
494 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
496 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
498 struct kvm_vcpu *vcpu;
499 int i;
501 if (!test_kvm_facility(kvm, 76))
502 return -EINVAL;
504 mutex_lock(&kvm->lock);
505 switch (attr->attr) {
506 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
507 get_random_bytes(
508 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
509 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
510 kvm->arch.crypto.aes_kw = 1;
511 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
512 break;
513 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
514 get_random_bytes(
515 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
516 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
517 kvm->arch.crypto.dea_kw = 1;
518 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
519 break;
520 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
521 kvm->arch.crypto.aes_kw = 0;
522 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
523 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
524 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
525 break;
526 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
527 kvm->arch.crypto.dea_kw = 0;
528 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
529 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
530 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
531 break;
532 default:
533 mutex_unlock(&kvm->lock);
534 return -ENXIO;
537 kvm_for_each_vcpu(i, vcpu, kvm) {
538 kvm_s390_vcpu_crypto_setup(vcpu);
539 exit_sie(vcpu);
541 mutex_unlock(&kvm->lock);
542 return 0;
545 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
547 u8 gtod_high;
549 if (copy_from_user(&gtod_high, (void __user *)attr->addr,
550 sizeof(gtod_high)))
551 return -EFAULT;
553 if (gtod_high != 0)
554 return -EINVAL;
555 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
557 return 0;
560 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
562 u64 gtod;
564 if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
565 return -EFAULT;
567 kvm_s390_set_tod_clock(kvm, gtod);
568 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
569 return 0;
572 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
574 int ret;
576 if (attr->flags)
577 return -EINVAL;
579 switch (attr->attr) {
580 case KVM_S390_VM_TOD_HIGH:
581 ret = kvm_s390_set_tod_high(kvm, attr);
582 break;
583 case KVM_S390_VM_TOD_LOW:
584 ret = kvm_s390_set_tod_low(kvm, attr);
585 break;
586 default:
587 ret = -ENXIO;
588 break;
590 return ret;
593 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
595 u8 gtod_high = 0;
597 if (copy_to_user((void __user *)attr->addr, &gtod_high,
598 sizeof(gtod_high)))
599 return -EFAULT;
600 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
602 return 0;
605 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
607 u64 gtod;
609 gtod = kvm_s390_get_tod_clock_fast(kvm);
610 if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
611 return -EFAULT;
612 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
614 return 0;
617 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
619 int ret;
621 if (attr->flags)
622 return -EINVAL;
624 switch (attr->attr) {
625 case KVM_S390_VM_TOD_HIGH:
626 ret = kvm_s390_get_tod_high(kvm, attr);
627 break;
628 case KVM_S390_VM_TOD_LOW:
629 ret = kvm_s390_get_tod_low(kvm, attr);
630 break;
631 default:
632 ret = -ENXIO;
633 break;
635 return ret;
638 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
640 struct kvm_s390_vm_cpu_processor *proc;
641 int ret = 0;
643 mutex_lock(&kvm->lock);
644 if (atomic_read(&kvm->online_vcpus)) {
645 ret = -EBUSY;
646 goto out;
648 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
649 if (!proc) {
650 ret = -ENOMEM;
651 goto out;
653 if (!copy_from_user(proc, (void __user *)attr->addr,
654 sizeof(*proc))) {
655 memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
656 sizeof(struct cpuid));
657 kvm->arch.model.ibc = proc->ibc;
658 memcpy(kvm->arch.model.fac_list, proc->fac_list,
659 S390_ARCH_FAC_LIST_SIZE_BYTE);
660 } else
661 ret = -EFAULT;
662 kfree(proc);
663 out:
664 mutex_unlock(&kvm->lock);
665 return ret;
668 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
670 int ret = -ENXIO;
672 switch (attr->attr) {
673 case KVM_S390_VM_CPU_PROCESSOR:
674 ret = kvm_s390_set_processor(kvm, attr);
675 break;
677 return ret;
680 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
682 struct kvm_s390_vm_cpu_processor *proc;
683 int ret = 0;
685 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
686 if (!proc) {
687 ret = -ENOMEM;
688 goto out;
690 memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
691 proc->ibc = kvm->arch.model.ibc;
692 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
693 S390_ARCH_FAC_LIST_SIZE_BYTE);
694 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
695 ret = -EFAULT;
696 kfree(proc);
697 out:
698 return ret;
701 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
703 struct kvm_s390_vm_cpu_machine *mach;
704 int ret = 0;
706 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
707 if (!mach) {
708 ret = -ENOMEM;
709 goto out;
711 get_cpu_id((struct cpuid *) &mach->cpuid);
712 mach->ibc = sclp.ibc;
713 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
714 S390_ARCH_FAC_LIST_SIZE_BYTE);
715 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
716 S390_ARCH_FAC_LIST_SIZE_BYTE);
717 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
718 ret = -EFAULT;
719 kfree(mach);
720 out:
721 return ret;
724 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
726 int ret = -ENXIO;
728 switch (attr->attr) {
729 case KVM_S390_VM_CPU_PROCESSOR:
730 ret = kvm_s390_get_processor(kvm, attr);
731 break;
732 case KVM_S390_VM_CPU_MACHINE:
733 ret = kvm_s390_get_machine(kvm, attr);
734 break;
736 return ret;
739 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
741 int ret;
743 switch (attr->group) {
744 case KVM_S390_VM_MEM_CTRL:
745 ret = kvm_s390_set_mem_control(kvm, attr);
746 break;
747 case KVM_S390_VM_TOD:
748 ret = kvm_s390_set_tod(kvm, attr);
749 break;
750 case KVM_S390_VM_CPU_MODEL:
751 ret = kvm_s390_set_cpu_model(kvm, attr);
752 break;
753 case KVM_S390_VM_CRYPTO:
754 ret = kvm_s390_vm_set_crypto(kvm, attr);
755 break;
756 default:
757 ret = -ENXIO;
758 break;
761 return ret;
764 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
766 int ret;
768 switch (attr->group) {
769 case KVM_S390_VM_MEM_CTRL:
770 ret = kvm_s390_get_mem_control(kvm, attr);
771 break;
772 case KVM_S390_VM_TOD:
773 ret = kvm_s390_get_tod(kvm, attr);
774 break;
775 case KVM_S390_VM_CPU_MODEL:
776 ret = kvm_s390_get_cpu_model(kvm, attr);
777 break;
778 default:
779 ret = -ENXIO;
780 break;
783 return ret;
786 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
788 int ret;
790 switch (attr->group) {
791 case KVM_S390_VM_MEM_CTRL:
792 switch (attr->attr) {
793 case KVM_S390_VM_MEM_ENABLE_CMMA:
794 case KVM_S390_VM_MEM_CLR_CMMA:
795 case KVM_S390_VM_MEM_LIMIT_SIZE:
796 ret = 0;
797 break;
798 default:
799 ret = -ENXIO;
800 break;
802 break;
803 case KVM_S390_VM_TOD:
804 switch (attr->attr) {
805 case KVM_S390_VM_TOD_LOW:
806 case KVM_S390_VM_TOD_HIGH:
807 ret = 0;
808 break;
809 default:
810 ret = -ENXIO;
811 break;
813 break;
814 case KVM_S390_VM_CPU_MODEL:
815 switch (attr->attr) {
816 case KVM_S390_VM_CPU_PROCESSOR:
817 case KVM_S390_VM_CPU_MACHINE:
818 ret = 0;
819 break;
820 default:
821 ret = -ENXIO;
822 break;
824 break;
825 case KVM_S390_VM_CRYPTO:
826 switch (attr->attr) {
827 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
828 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
829 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
830 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
831 ret = 0;
832 break;
833 default:
834 ret = -ENXIO;
835 break;
837 break;
838 default:
839 ret = -ENXIO;
840 break;
843 return ret;
846 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
848 uint8_t *keys;
849 uint64_t hva;
850 unsigned long curkey;
851 int i, r = 0;
853 if (args->flags != 0)
854 return -EINVAL;
856 /* Is this guest using storage keys? */
857 if (!mm_use_skey(current->mm))
858 return KVM_S390_GET_SKEYS_NONE;
860 /* Enforce sane limit on memory allocation */
861 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
862 return -EINVAL;
864 keys = kmalloc_array(args->count, sizeof(uint8_t),
865 GFP_KERNEL | __GFP_NOWARN);
866 if (!keys)
867 keys = vmalloc(sizeof(uint8_t) * args->count);
868 if (!keys)
869 return -ENOMEM;
871 for (i = 0; i < args->count; i++) {
872 hva = gfn_to_hva(kvm, args->start_gfn + i);
873 if (kvm_is_error_hva(hva)) {
874 r = -EFAULT;
875 goto out;
878 curkey = get_guest_storage_key(current->mm, hva);
879 if (IS_ERR_VALUE(curkey)) {
880 r = curkey;
881 goto out;
883 keys[i] = curkey;
886 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
887 sizeof(uint8_t) * args->count);
888 if (r)
889 r = -EFAULT;
890 out:
891 kvfree(keys);
892 return r;
895 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
897 uint8_t *keys;
898 uint64_t hva;
899 int i, r = 0;
901 if (args->flags != 0)
902 return -EINVAL;
904 /* Enforce sane limit on memory allocation */
905 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
906 return -EINVAL;
908 keys = kmalloc_array(args->count, sizeof(uint8_t),
909 GFP_KERNEL | __GFP_NOWARN);
910 if (!keys)
911 keys = vmalloc(sizeof(uint8_t) * args->count);
912 if (!keys)
913 return -ENOMEM;
915 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
916 sizeof(uint8_t) * args->count);
917 if (r) {
918 r = -EFAULT;
919 goto out;
922 /* Enable storage key handling for the guest */
923 r = s390_enable_skey();
924 if (r)
925 goto out;
927 for (i = 0; i < args->count; i++) {
928 hva = gfn_to_hva(kvm, args->start_gfn + i);
929 if (kvm_is_error_hva(hva)) {
930 r = -EFAULT;
931 goto out;
934 /* Lowest order bit is reserved */
935 if (keys[i] & 0x01) {
936 r = -EINVAL;
937 goto out;
940 r = set_guest_storage_key(current->mm, hva,
941 (unsigned long)keys[i], 0);
942 if (r)
943 goto out;
945 out:
946 kvfree(keys);
947 return r;
950 long kvm_arch_vm_ioctl(struct file *filp,
951 unsigned int ioctl, unsigned long arg)
953 struct kvm *kvm = filp->private_data;
954 void __user *argp = (void __user *)arg;
955 struct kvm_device_attr attr;
956 int r;
958 switch (ioctl) {
959 case KVM_S390_INTERRUPT: {
960 struct kvm_s390_interrupt s390int;
962 r = -EFAULT;
963 if (copy_from_user(&s390int, argp, sizeof(s390int)))
964 break;
965 r = kvm_s390_inject_vm(kvm, &s390int);
966 break;
968 case KVM_ENABLE_CAP: {
969 struct kvm_enable_cap cap;
970 r = -EFAULT;
971 if (copy_from_user(&cap, argp, sizeof(cap)))
972 break;
973 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
974 break;
976 case KVM_CREATE_IRQCHIP: {
977 struct kvm_irq_routing_entry routing;
979 r = -EINVAL;
980 if (kvm->arch.use_irqchip) {
981 /* Set up dummy routing. */
982 memset(&routing, 0, sizeof(routing));
983 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
985 break;
987 case KVM_SET_DEVICE_ATTR: {
988 r = -EFAULT;
989 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
990 break;
991 r = kvm_s390_vm_set_attr(kvm, &attr);
992 break;
994 case KVM_GET_DEVICE_ATTR: {
995 r = -EFAULT;
996 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
997 break;
998 r = kvm_s390_vm_get_attr(kvm, &attr);
999 break;
1001 case KVM_HAS_DEVICE_ATTR: {
1002 r = -EFAULT;
1003 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1004 break;
1005 r = kvm_s390_vm_has_attr(kvm, &attr);
1006 break;
1008 case KVM_S390_GET_SKEYS: {
1009 struct kvm_s390_skeys args;
1011 r = -EFAULT;
1012 if (copy_from_user(&args, argp,
1013 sizeof(struct kvm_s390_skeys)))
1014 break;
1015 r = kvm_s390_get_skeys(kvm, &args);
1016 break;
1018 case KVM_S390_SET_SKEYS: {
1019 struct kvm_s390_skeys args;
1021 r = -EFAULT;
1022 if (copy_from_user(&args, argp,
1023 sizeof(struct kvm_s390_skeys)))
1024 break;
1025 r = kvm_s390_set_skeys(kvm, &args);
1026 break;
1028 default:
1029 r = -ENOTTY;
1032 return r;
1035 static int kvm_s390_query_ap_config(u8 *config)
1037 u32 fcn_code = 0x04000000UL;
1038 u32 cc = 0;
1040 memset(config, 0, 128);
1041 asm volatile(
1042 "lgr 0,%1\n"
1043 "lgr 2,%2\n"
1044 ".long 0xb2af0000\n" /* PQAP(QCI) */
1045 "0: ipm %0\n"
1046 "srl %0,28\n"
1047 "1:\n"
1048 EX_TABLE(0b, 1b)
1049 : "+r" (cc)
1050 : "r" (fcn_code), "r" (config)
1051 : "cc", "0", "2", "memory"
1054 return cc;
1057 static int kvm_s390_apxa_installed(void)
1059 u8 config[128];
1060 int cc;
1062 if (test_facility(12)) {
1063 cc = kvm_s390_query_ap_config(config);
1065 if (cc)
1066 pr_err("PQAP(QCI) failed with cc=%d", cc);
1067 else
1068 return config[0] & 0x40;
1071 return 0;
1074 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1076 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1078 if (kvm_s390_apxa_installed())
1079 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1080 else
1081 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1084 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
1086 get_cpu_id(cpu_id);
1087 cpu_id->version = 0xff;
1090 static void kvm_s390_crypto_init(struct kvm *kvm)
1092 if (!test_kvm_facility(kvm, 76))
1093 return;
1095 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1096 kvm_s390_set_crycb_format(kvm);
1098 /* Enable AES/DEA protected key functions by default */
1099 kvm->arch.crypto.aes_kw = 1;
1100 kvm->arch.crypto.dea_kw = 1;
1101 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1102 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1103 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1104 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1107 static void sca_dispose(struct kvm *kvm)
1109 if (kvm->arch.use_esca)
1110 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1111 else
1112 free_page((unsigned long)(kvm->arch.sca));
1113 kvm->arch.sca = NULL;
1116 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1118 int i, rc;
1119 char debug_name[16];
1120 static unsigned long sca_offset;
1122 rc = -EINVAL;
1123 #ifdef CONFIG_KVM_S390_UCONTROL
1124 if (type & ~KVM_VM_S390_UCONTROL)
1125 goto out_err;
1126 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1127 goto out_err;
1128 #else
1129 if (type)
1130 goto out_err;
1131 #endif
1133 rc = s390_enable_sie();
1134 if (rc)
1135 goto out_err;
1137 rc = -ENOMEM;
1139 kvm->arch.use_esca = 0; /* start with basic SCA */
1140 rwlock_init(&kvm->arch.sca_lock);
1141 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL);
1142 if (!kvm->arch.sca)
1143 goto out_err;
1144 spin_lock(&kvm_lock);
1145 sca_offset += 16;
1146 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1147 sca_offset = 0;
1148 kvm->arch.sca = (struct bsca_block *)
1149 ((char *) kvm->arch.sca + sca_offset);
1150 spin_unlock(&kvm_lock);
1152 sprintf(debug_name, "kvm-%u", current->pid);
1154 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1155 if (!kvm->arch.dbf)
1156 goto out_err;
1158 kvm->arch.sie_page2 =
1159 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1160 if (!kvm->arch.sie_page2)
1161 goto out_err;
1163 /* Populate the facility mask initially. */
1164 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1165 S390_ARCH_FAC_LIST_SIZE_BYTE);
1166 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1167 if (i < kvm_s390_fac_list_mask_size())
1168 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1169 else
1170 kvm->arch.model.fac_mask[i] = 0UL;
1173 /* Populate the facility list initially. */
1174 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1175 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1176 S390_ARCH_FAC_LIST_SIZE_BYTE);
1178 kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1179 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1181 kvm_s390_crypto_init(kvm);
1183 spin_lock_init(&kvm->arch.float_int.lock);
1184 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1185 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1186 init_waitqueue_head(&kvm->arch.ipte_wq);
1187 mutex_init(&kvm->arch.ipte_mutex);
1189 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1190 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1192 if (type & KVM_VM_S390_UCONTROL) {
1193 kvm->arch.gmap = NULL;
1194 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1195 } else {
1196 if (sclp.hamax == U64_MAX)
1197 kvm->arch.mem_limit = TASK_MAX_SIZE;
1198 else
1199 kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1200 sclp.hamax + 1);
1201 kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1);
1202 if (!kvm->arch.gmap)
1203 goto out_err;
1204 kvm->arch.gmap->private = kvm;
1205 kvm->arch.gmap->pfault_enabled = 0;
1208 kvm->arch.css_support = 0;
1209 kvm->arch.use_irqchip = 0;
1210 kvm->arch.epoch = 0;
1212 spin_lock_init(&kvm->arch.start_stop_lock);
1213 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1215 return 0;
1216 out_err:
1217 free_page((unsigned long)kvm->arch.sie_page2);
1218 debug_unregister(kvm->arch.dbf);
1219 sca_dispose(kvm);
1220 KVM_EVENT(3, "creation of vm failed: %d", rc);
1221 return rc;
1224 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1226 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1227 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1228 kvm_s390_clear_local_irqs(vcpu);
1229 kvm_clear_async_pf_completion_queue(vcpu);
1230 if (!kvm_is_ucontrol(vcpu->kvm))
1231 sca_del_vcpu(vcpu);
1233 if (kvm_is_ucontrol(vcpu->kvm))
1234 gmap_free(vcpu->arch.gmap);
1236 if (vcpu->kvm->arch.use_cmma)
1237 kvm_s390_vcpu_unsetup_cmma(vcpu);
1238 free_page((unsigned long)(vcpu->arch.sie_block));
1240 kvm_vcpu_uninit(vcpu);
1241 kmem_cache_free(kvm_vcpu_cache, vcpu);
1244 static void kvm_free_vcpus(struct kvm *kvm)
1246 unsigned int i;
1247 struct kvm_vcpu *vcpu;
1249 kvm_for_each_vcpu(i, vcpu, kvm)
1250 kvm_arch_vcpu_destroy(vcpu);
1252 mutex_lock(&kvm->lock);
1253 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1254 kvm->vcpus[i] = NULL;
1256 atomic_set(&kvm->online_vcpus, 0);
1257 mutex_unlock(&kvm->lock);
1260 void kvm_arch_destroy_vm(struct kvm *kvm)
1262 kvm_free_vcpus(kvm);
1263 sca_dispose(kvm);
1264 debug_unregister(kvm->arch.dbf);
1265 free_page((unsigned long)kvm->arch.sie_page2);
1266 if (!kvm_is_ucontrol(kvm))
1267 gmap_free(kvm->arch.gmap);
1268 kvm_s390_destroy_adapters(kvm);
1269 kvm_s390_clear_float_irqs(kvm);
1270 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1273 /* Section: vcpu related */
1274 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1276 vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1277 if (!vcpu->arch.gmap)
1278 return -ENOMEM;
1279 vcpu->arch.gmap->private = vcpu->kvm;
1281 return 0;
1284 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1286 read_lock(&vcpu->kvm->arch.sca_lock);
1287 if (vcpu->kvm->arch.use_esca) {
1288 struct esca_block *sca = vcpu->kvm->arch.sca;
1290 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1291 sca->cpu[vcpu->vcpu_id].sda = 0;
1292 } else {
1293 struct bsca_block *sca = vcpu->kvm->arch.sca;
1295 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1296 sca->cpu[vcpu->vcpu_id].sda = 0;
1298 read_unlock(&vcpu->kvm->arch.sca_lock);
1301 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1303 read_lock(&vcpu->kvm->arch.sca_lock);
1304 if (vcpu->kvm->arch.use_esca) {
1305 struct esca_block *sca = vcpu->kvm->arch.sca;
1307 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1308 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1309 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1310 vcpu->arch.sie_block->ecb2 |= 0x04U;
1311 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1312 } else {
1313 struct bsca_block *sca = vcpu->kvm->arch.sca;
1315 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1316 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1317 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1318 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1320 read_unlock(&vcpu->kvm->arch.sca_lock);
1323 /* Basic SCA to Extended SCA data copy routines */
1324 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1326 d->sda = s->sda;
1327 d->sigp_ctrl.c = s->sigp_ctrl.c;
1328 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1331 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1333 int i;
1335 d->ipte_control = s->ipte_control;
1336 d->mcn[0] = s->mcn;
1337 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1338 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1341 static int sca_switch_to_extended(struct kvm *kvm)
1343 struct bsca_block *old_sca = kvm->arch.sca;
1344 struct esca_block *new_sca;
1345 struct kvm_vcpu *vcpu;
1346 unsigned int vcpu_idx;
1347 u32 scaol, scaoh;
1349 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1350 if (!new_sca)
1351 return -ENOMEM;
1353 scaoh = (u32)((u64)(new_sca) >> 32);
1354 scaol = (u32)(u64)(new_sca) & ~0x3fU;
1356 kvm_s390_vcpu_block_all(kvm);
1357 write_lock(&kvm->arch.sca_lock);
1359 sca_copy_b_to_e(new_sca, old_sca);
1361 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1362 vcpu->arch.sie_block->scaoh = scaoh;
1363 vcpu->arch.sie_block->scaol = scaol;
1364 vcpu->arch.sie_block->ecb2 |= 0x04U;
1366 kvm->arch.sca = new_sca;
1367 kvm->arch.use_esca = 1;
1369 write_unlock(&kvm->arch.sca_lock);
1370 kvm_s390_vcpu_unblock_all(kvm);
1372 free_page((unsigned long)old_sca);
1374 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1375 old_sca, kvm->arch.sca);
1376 return 0;
1379 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1381 int rc;
1383 if (id < KVM_S390_BSCA_CPU_SLOTS)
1384 return true;
1385 if (!sclp.has_esca)
1386 return false;
1388 mutex_lock(&kvm->lock);
1389 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1390 mutex_unlock(&kvm->lock);
1392 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1395 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1397 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1398 kvm_clear_async_pf_completion_queue(vcpu);
1399 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1400 KVM_SYNC_GPRS |
1401 KVM_SYNC_ACRS |
1402 KVM_SYNC_CRS |
1403 KVM_SYNC_ARCH0 |
1404 KVM_SYNC_PFAULT;
1405 if (test_kvm_facility(vcpu->kvm, 64))
1406 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1407 /* fprs can be synchronized via vrs, even if the guest has no vx. With
1408 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1410 if (MACHINE_HAS_VX)
1411 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1412 else
1413 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1415 if (kvm_is_ucontrol(vcpu->kvm))
1416 return __kvm_ucontrol_vcpu_init(vcpu);
1418 return 0;
1421 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1422 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1424 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1425 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1426 vcpu->arch.cputm_start = get_tod_clock_fast();
1427 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1430 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1431 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1433 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1434 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1435 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1436 vcpu->arch.cputm_start = 0;
1437 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1440 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1441 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1443 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1444 vcpu->arch.cputm_enabled = true;
1445 __start_cpu_timer_accounting(vcpu);
1448 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1449 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1451 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1452 __stop_cpu_timer_accounting(vcpu);
1453 vcpu->arch.cputm_enabled = false;
1456 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1458 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1459 __enable_cpu_timer_accounting(vcpu);
1460 preempt_enable();
1463 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1465 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1466 __disable_cpu_timer_accounting(vcpu);
1467 preempt_enable();
1470 /* set the cpu timer - may only be called from the VCPU thread itself */
1471 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1473 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1474 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1475 if (vcpu->arch.cputm_enabled)
1476 vcpu->arch.cputm_start = get_tod_clock_fast();
1477 vcpu->arch.sie_block->cputm = cputm;
1478 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1479 preempt_enable();
1482 /* update and get the cpu timer - can also be called from other VCPU threads */
1483 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1485 unsigned int seq;
1486 __u64 value;
1488 if (unlikely(!vcpu->arch.cputm_enabled))
1489 return vcpu->arch.sie_block->cputm;
1491 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1492 do {
1493 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1495 * If the writer would ever execute a read in the critical
1496 * section, e.g. in irq context, we have a deadlock.
1498 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1499 value = vcpu->arch.sie_block->cputm;
1500 /* if cputm_start is 0, accounting is being started/stopped */
1501 if (likely(vcpu->arch.cputm_start))
1502 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1503 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1504 preempt_enable();
1505 return value;
1508 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1510 /* Save host register state */
1511 save_fpu_regs();
1512 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1513 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1515 if (MACHINE_HAS_VX)
1516 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1517 else
1518 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1519 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1520 if (test_fp_ctl(current->thread.fpu.fpc))
1521 /* User space provided an invalid FPC, let's clear it */
1522 current->thread.fpu.fpc = 0;
1524 save_access_regs(vcpu->arch.host_acrs);
1525 restore_access_regs(vcpu->run->s.regs.acrs);
1526 gmap_enable(vcpu->arch.gmap);
1527 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1528 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1529 __start_cpu_timer_accounting(vcpu);
1530 vcpu->cpu = cpu;
1533 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1535 vcpu->cpu = -1;
1536 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1537 __stop_cpu_timer_accounting(vcpu);
1538 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1539 gmap_disable(vcpu->arch.gmap);
1541 /* Save guest register state */
1542 save_fpu_regs();
1543 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1545 /* Restore host register state */
1546 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1547 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1549 save_access_regs(vcpu->run->s.regs.acrs);
1550 restore_access_regs(vcpu->arch.host_acrs);
1553 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1555 /* this equals initial cpu reset in pop, but we don't switch to ESA */
1556 vcpu->arch.sie_block->gpsw.mask = 0UL;
1557 vcpu->arch.sie_block->gpsw.addr = 0UL;
1558 kvm_s390_set_prefix(vcpu, 0);
1559 kvm_s390_set_cpu_timer(vcpu, 0);
1560 vcpu->arch.sie_block->ckc = 0UL;
1561 vcpu->arch.sie_block->todpr = 0;
1562 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1563 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
1564 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1565 /* make sure the new fpc will be lazily loaded */
1566 save_fpu_regs();
1567 current->thread.fpu.fpc = 0;
1568 vcpu->arch.sie_block->gbea = 1;
1569 vcpu->arch.sie_block->pp = 0;
1570 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1571 kvm_clear_async_pf_completion_queue(vcpu);
1572 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1573 kvm_s390_vcpu_stop(vcpu);
1574 kvm_s390_clear_local_irqs(vcpu);
1577 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1579 mutex_lock(&vcpu->kvm->lock);
1580 preempt_disable();
1581 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1582 preempt_enable();
1583 mutex_unlock(&vcpu->kvm->lock);
1584 if (!kvm_is_ucontrol(vcpu->kvm)) {
1585 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1586 sca_add_vcpu(vcpu);
1591 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1593 if (!test_kvm_facility(vcpu->kvm, 76))
1594 return;
1596 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1598 if (vcpu->kvm->arch.crypto.aes_kw)
1599 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1600 if (vcpu->kvm->arch.crypto.dea_kw)
1601 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1603 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1606 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1608 free_page(vcpu->arch.sie_block->cbrlo);
1609 vcpu->arch.sie_block->cbrlo = 0;
1612 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1614 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1615 if (!vcpu->arch.sie_block->cbrlo)
1616 return -ENOMEM;
1618 vcpu->arch.sie_block->ecb2 |= 0x80;
1619 vcpu->arch.sie_block->ecb2 &= ~0x08;
1620 return 0;
1623 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1625 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1627 vcpu->arch.cpu_id = model->cpu_id;
1628 vcpu->arch.sie_block->ibc = model->ibc;
1629 if (test_kvm_facility(vcpu->kvm, 7))
1630 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1633 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1635 int rc = 0;
1637 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1638 CPUSTAT_SM |
1639 CPUSTAT_STOPPED);
1641 if (test_kvm_facility(vcpu->kvm, 78))
1642 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1643 else if (test_kvm_facility(vcpu->kvm, 8))
1644 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1646 kvm_s390_vcpu_setup_model(vcpu);
1648 vcpu->arch.sie_block->ecb = 6;
1649 if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1650 vcpu->arch.sie_block->ecb |= 0x10;
1652 vcpu->arch.sie_block->ecb2 = 8;
1653 vcpu->arch.sie_block->eca = 0xC1002000U;
1654 if (sclp.has_siif)
1655 vcpu->arch.sie_block->eca |= 1;
1656 if (sclp.has_sigpif)
1657 vcpu->arch.sie_block->eca |= 0x10000000U;
1658 if (test_kvm_facility(vcpu->kvm, 64))
1659 vcpu->arch.sie_block->ecb3 |= 0x01;
1660 if (test_kvm_facility(vcpu->kvm, 129)) {
1661 vcpu->arch.sie_block->eca |= 0x00020000;
1662 vcpu->arch.sie_block->ecd |= 0x20000000;
1664 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1665 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1667 if (vcpu->kvm->arch.use_cmma) {
1668 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1669 if (rc)
1670 return rc;
1672 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1673 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1675 kvm_s390_vcpu_crypto_setup(vcpu);
1677 return rc;
1680 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1681 unsigned int id)
1683 struct kvm_vcpu *vcpu;
1684 struct sie_page *sie_page;
1685 int rc = -EINVAL;
1687 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1688 goto out;
1690 rc = -ENOMEM;
1692 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1693 if (!vcpu)
1694 goto out;
1696 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1697 if (!sie_page)
1698 goto out_free_cpu;
1700 vcpu->arch.sie_block = &sie_page->sie_block;
1701 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1703 vcpu->arch.sie_block->icpua = id;
1704 spin_lock_init(&vcpu->arch.local_int.lock);
1705 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1706 vcpu->arch.local_int.wq = &vcpu->wq;
1707 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1708 seqcount_init(&vcpu->arch.cputm_seqcount);
1710 rc = kvm_vcpu_init(vcpu, kvm, id);
1711 if (rc)
1712 goto out_free_sie_block;
1713 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
1714 vcpu->arch.sie_block);
1715 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1717 return vcpu;
1718 out_free_sie_block:
1719 free_page((unsigned long)(vcpu->arch.sie_block));
1720 out_free_cpu:
1721 kmem_cache_free(kvm_vcpu_cache, vcpu);
1722 out:
1723 return ERR_PTR(rc);
1726 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1728 return kvm_s390_vcpu_has_irq(vcpu, 0);
1731 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1733 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1734 exit_sie(vcpu);
1737 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1739 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1742 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1744 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1745 exit_sie(vcpu);
1748 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1750 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1754 * Kick a guest cpu out of SIE and wait until SIE is not running.
1755 * If the CPU is not running (e.g. waiting as idle) the function will
1756 * return immediately. */
1757 void exit_sie(struct kvm_vcpu *vcpu)
1759 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1760 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1761 cpu_relax();
1764 /* Kick a guest cpu out of SIE to process a request synchronously */
1765 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1767 kvm_make_request(req, vcpu);
1768 kvm_s390_vcpu_request(vcpu);
1771 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1773 int i;
1774 struct kvm *kvm = gmap->private;
1775 struct kvm_vcpu *vcpu;
1777 kvm_for_each_vcpu(i, vcpu, kvm) {
1778 /* match against both prefix pages */
1779 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1780 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1781 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1786 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1788 /* kvm common code refers to this, but never calls it */
1789 BUG();
1790 return 0;
1793 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1794 struct kvm_one_reg *reg)
1796 int r = -EINVAL;
1798 switch (reg->id) {
1799 case KVM_REG_S390_TODPR:
1800 r = put_user(vcpu->arch.sie_block->todpr,
1801 (u32 __user *)reg->addr);
1802 break;
1803 case KVM_REG_S390_EPOCHDIFF:
1804 r = put_user(vcpu->arch.sie_block->epoch,
1805 (u64 __user *)reg->addr);
1806 break;
1807 case KVM_REG_S390_CPU_TIMER:
1808 r = put_user(kvm_s390_get_cpu_timer(vcpu),
1809 (u64 __user *)reg->addr);
1810 break;
1811 case KVM_REG_S390_CLOCK_COMP:
1812 r = put_user(vcpu->arch.sie_block->ckc,
1813 (u64 __user *)reg->addr);
1814 break;
1815 case KVM_REG_S390_PFTOKEN:
1816 r = put_user(vcpu->arch.pfault_token,
1817 (u64 __user *)reg->addr);
1818 break;
1819 case KVM_REG_S390_PFCOMPARE:
1820 r = put_user(vcpu->arch.pfault_compare,
1821 (u64 __user *)reg->addr);
1822 break;
1823 case KVM_REG_S390_PFSELECT:
1824 r = put_user(vcpu->arch.pfault_select,
1825 (u64 __user *)reg->addr);
1826 break;
1827 case KVM_REG_S390_PP:
1828 r = put_user(vcpu->arch.sie_block->pp,
1829 (u64 __user *)reg->addr);
1830 break;
1831 case KVM_REG_S390_GBEA:
1832 r = put_user(vcpu->arch.sie_block->gbea,
1833 (u64 __user *)reg->addr);
1834 break;
1835 default:
1836 break;
1839 return r;
1842 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1843 struct kvm_one_reg *reg)
1845 int r = -EINVAL;
1846 __u64 val;
1848 switch (reg->id) {
1849 case KVM_REG_S390_TODPR:
1850 r = get_user(vcpu->arch.sie_block->todpr,
1851 (u32 __user *)reg->addr);
1852 break;
1853 case KVM_REG_S390_EPOCHDIFF:
1854 r = get_user(vcpu->arch.sie_block->epoch,
1855 (u64 __user *)reg->addr);
1856 break;
1857 case KVM_REG_S390_CPU_TIMER:
1858 r = get_user(val, (u64 __user *)reg->addr);
1859 if (!r)
1860 kvm_s390_set_cpu_timer(vcpu, val);
1861 break;
1862 case KVM_REG_S390_CLOCK_COMP:
1863 r = get_user(vcpu->arch.sie_block->ckc,
1864 (u64 __user *)reg->addr);
1865 break;
1866 case KVM_REG_S390_PFTOKEN:
1867 r = get_user(vcpu->arch.pfault_token,
1868 (u64 __user *)reg->addr);
1869 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1870 kvm_clear_async_pf_completion_queue(vcpu);
1871 break;
1872 case KVM_REG_S390_PFCOMPARE:
1873 r = get_user(vcpu->arch.pfault_compare,
1874 (u64 __user *)reg->addr);
1875 break;
1876 case KVM_REG_S390_PFSELECT:
1877 r = get_user(vcpu->arch.pfault_select,
1878 (u64 __user *)reg->addr);
1879 break;
1880 case KVM_REG_S390_PP:
1881 r = get_user(vcpu->arch.sie_block->pp,
1882 (u64 __user *)reg->addr);
1883 break;
1884 case KVM_REG_S390_GBEA:
1885 r = get_user(vcpu->arch.sie_block->gbea,
1886 (u64 __user *)reg->addr);
1887 break;
1888 default:
1889 break;
1892 return r;
1895 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1897 kvm_s390_vcpu_initial_reset(vcpu);
1898 return 0;
1901 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1903 memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
1904 return 0;
1907 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1909 memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1910 return 0;
1913 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1914 struct kvm_sregs *sregs)
1916 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1917 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1918 restore_access_regs(vcpu->run->s.regs.acrs);
1919 return 0;
1922 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1923 struct kvm_sregs *sregs)
1925 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1926 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1927 return 0;
1930 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1932 /* make sure the new values will be lazily loaded */
1933 save_fpu_regs();
1934 if (test_fp_ctl(fpu->fpc))
1935 return -EINVAL;
1936 current->thread.fpu.fpc = fpu->fpc;
1937 if (MACHINE_HAS_VX)
1938 convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
1939 else
1940 memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
1941 return 0;
1944 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1946 /* make sure we have the latest values */
1947 save_fpu_regs();
1948 if (MACHINE_HAS_VX)
1949 convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
1950 else
1951 memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
1952 fpu->fpc = current->thread.fpu.fpc;
1953 return 0;
1956 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1958 int rc = 0;
1960 if (!is_vcpu_stopped(vcpu))
1961 rc = -EBUSY;
1962 else {
1963 vcpu->run->psw_mask = psw.mask;
1964 vcpu->run->psw_addr = psw.addr;
1966 return rc;
1969 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1970 struct kvm_translation *tr)
1972 return -EINVAL; /* not implemented yet */
1975 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1976 KVM_GUESTDBG_USE_HW_BP | \
1977 KVM_GUESTDBG_ENABLE)
1979 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1980 struct kvm_guest_debug *dbg)
1982 int rc = 0;
1984 vcpu->guest_debug = 0;
1985 kvm_s390_clear_bp_data(vcpu);
1987 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1988 return -EINVAL;
1990 if (dbg->control & KVM_GUESTDBG_ENABLE) {
1991 vcpu->guest_debug = dbg->control;
1992 /* enforce guest PER */
1993 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1995 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1996 rc = kvm_s390_import_bp_data(vcpu, dbg);
1997 } else {
1998 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1999 vcpu->arch.guestdbg.last_bp = 0;
2002 if (rc) {
2003 vcpu->guest_debug = 0;
2004 kvm_s390_clear_bp_data(vcpu);
2005 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2008 return rc;
2011 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2012 struct kvm_mp_state *mp_state)
2014 /* CHECK_STOP and LOAD are not supported yet */
2015 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2016 KVM_MP_STATE_OPERATING;
2019 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2020 struct kvm_mp_state *mp_state)
2022 int rc = 0;
2024 /* user space knows about this interface - let it control the state */
2025 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2027 switch (mp_state->mp_state) {
2028 case KVM_MP_STATE_STOPPED:
2029 kvm_s390_vcpu_stop(vcpu);
2030 break;
2031 case KVM_MP_STATE_OPERATING:
2032 kvm_s390_vcpu_start(vcpu);
2033 break;
2034 case KVM_MP_STATE_LOAD:
2035 case KVM_MP_STATE_CHECK_STOP:
2036 /* fall through - CHECK_STOP and LOAD are not supported yet */
2037 default:
2038 rc = -ENXIO;
2041 return rc;
2044 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2046 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2049 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2051 retry:
2052 kvm_s390_vcpu_request_handled(vcpu);
2053 if (!vcpu->requests)
2054 return 0;
2056 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2057 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
2058 * This ensures that the ipte instruction for this request has
2059 * already finished. We might race against a second unmapper that
2060 * wants to set the blocking bit. Lets just retry the request loop.
2062 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2063 int rc;
2064 rc = gmap_ipte_notify(vcpu->arch.gmap,
2065 kvm_s390_get_prefix(vcpu),
2066 PAGE_SIZE * 2);
2067 if (rc)
2068 return rc;
2069 goto retry;
2072 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2073 vcpu->arch.sie_block->ihcpu = 0xffff;
2074 goto retry;
2077 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2078 if (!ibs_enabled(vcpu)) {
2079 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2080 atomic_or(CPUSTAT_IBS,
2081 &vcpu->arch.sie_block->cpuflags);
2083 goto retry;
2086 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2087 if (ibs_enabled(vcpu)) {
2088 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2089 atomic_andnot(CPUSTAT_IBS,
2090 &vcpu->arch.sie_block->cpuflags);
2092 goto retry;
2095 /* nothing to do, just clear the request */
2096 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2098 return 0;
2101 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2103 struct kvm_vcpu *vcpu;
2104 int i;
2106 mutex_lock(&kvm->lock);
2107 preempt_disable();
2108 kvm->arch.epoch = tod - get_tod_clock();
2109 kvm_s390_vcpu_block_all(kvm);
2110 kvm_for_each_vcpu(i, vcpu, kvm)
2111 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2112 kvm_s390_vcpu_unblock_all(kvm);
2113 preempt_enable();
2114 mutex_unlock(&kvm->lock);
2118 * kvm_arch_fault_in_page - fault-in guest page if necessary
2119 * @vcpu: The corresponding virtual cpu
2120 * @gpa: Guest physical address
2121 * @writable: Whether the page should be writable or not
2123 * Make sure that a guest page has been faulted-in on the host.
2125 * Return: Zero on success, negative error code otherwise.
2127 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2129 return gmap_fault(vcpu->arch.gmap, gpa,
2130 writable ? FAULT_FLAG_WRITE : 0);
2133 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2134 unsigned long token)
2136 struct kvm_s390_interrupt inti;
2137 struct kvm_s390_irq irq;
2139 if (start_token) {
2140 irq.u.ext.ext_params2 = token;
2141 irq.type = KVM_S390_INT_PFAULT_INIT;
2142 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2143 } else {
2144 inti.type = KVM_S390_INT_PFAULT_DONE;
2145 inti.parm64 = token;
2146 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2150 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2151 struct kvm_async_pf *work)
2153 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2154 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2157 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2158 struct kvm_async_pf *work)
2160 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2161 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2164 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2165 struct kvm_async_pf *work)
2167 /* s390 will always inject the page directly */
2170 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2173 * s390 will always inject the page directly,
2174 * but we still want check_async_completion to cleanup
2176 return true;
2179 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2181 hva_t hva;
2182 struct kvm_arch_async_pf arch;
2183 int rc;
2185 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2186 return 0;
2187 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2188 vcpu->arch.pfault_compare)
2189 return 0;
2190 if (psw_extint_disabled(vcpu))
2191 return 0;
2192 if (kvm_s390_vcpu_has_irq(vcpu, 0))
2193 return 0;
2194 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2195 return 0;
2196 if (!vcpu->arch.gmap->pfault_enabled)
2197 return 0;
2199 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2200 hva += current->thread.gmap_addr & ~PAGE_MASK;
2201 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2202 return 0;
2204 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2205 return rc;
2208 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2210 int rc, cpuflags;
2213 * On s390 notifications for arriving pages will be delivered directly
2214 * to the guest but the house keeping for completed pfaults is
2215 * handled outside the worker.
2217 kvm_check_async_pf_completion(vcpu);
2219 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2220 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2222 if (need_resched())
2223 schedule();
2225 if (test_cpu_flag(CIF_MCCK_PENDING))
2226 s390_handle_mcck();
2228 if (!kvm_is_ucontrol(vcpu->kvm)) {
2229 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2230 if (rc)
2231 return rc;
2234 rc = kvm_s390_handle_requests(vcpu);
2235 if (rc)
2236 return rc;
2238 if (guestdbg_enabled(vcpu)) {
2239 kvm_s390_backup_guest_per_regs(vcpu);
2240 kvm_s390_patch_guest_per_regs(vcpu);
2243 vcpu->arch.sie_block->icptcode = 0;
2244 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2245 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2246 trace_kvm_s390_sie_enter(vcpu, cpuflags);
2248 return 0;
2251 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2253 struct kvm_s390_pgm_info pgm_info = {
2254 .code = PGM_ADDRESSING,
2256 u8 opcode, ilen;
2257 int rc;
2259 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2260 trace_kvm_s390_sie_fault(vcpu);
2263 * We want to inject an addressing exception, which is defined as a
2264 * suppressing or terminating exception. However, since we came here
2265 * by a DAT access exception, the PSW still points to the faulting
2266 * instruction since DAT exceptions are nullifying. So we've got
2267 * to look up the current opcode to get the length of the instruction
2268 * to be able to forward the PSW.
2270 rc = read_guest_instr(vcpu, &opcode, 1);
2271 ilen = insn_length(opcode);
2272 if (rc < 0) {
2273 return rc;
2274 } else if (rc) {
2275 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2276 * Forward by arbitrary ilc, injection will take care of
2277 * nullification if necessary.
2279 pgm_info = vcpu->arch.pgm;
2280 ilen = 4;
2282 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2283 kvm_s390_forward_psw(vcpu, ilen);
2284 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2287 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2289 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2290 vcpu->arch.sie_block->icptcode);
2291 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2293 if (guestdbg_enabled(vcpu))
2294 kvm_s390_restore_guest_per_regs(vcpu);
2296 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2297 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2299 if (vcpu->arch.sie_block->icptcode > 0) {
2300 int rc = kvm_handle_sie_intercept(vcpu);
2302 if (rc != -EOPNOTSUPP)
2303 return rc;
2304 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2305 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2306 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2307 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2308 return -EREMOTE;
2309 } else if (exit_reason != -EFAULT) {
2310 vcpu->stat.exit_null++;
2311 return 0;
2312 } else if (kvm_is_ucontrol(vcpu->kvm)) {
2313 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2314 vcpu->run->s390_ucontrol.trans_exc_code =
2315 current->thread.gmap_addr;
2316 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2317 return -EREMOTE;
2318 } else if (current->thread.gmap_pfault) {
2319 trace_kvm_s390_major_guest_pfault(vcpu);
2320 current->thread.gmap_pfault = 0;
2321 if (kvm_arch_setup_async_pf(vcpu))
2322 return 0;
2323 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2325 return vcpu_post_run_fault_in_sie(vcpu);
2328 static int __vcpu_run(struct kvm_vcpu *vcpu)
2330 int rc, exit_reason;
2333 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2334 * ning the guest), so that memslots (and other stuff) are protected
2336 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2338 do {
2339 rc = vcpu_pre_run(vcpu);
2340 if (rc)
2341 break;
2343 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2345 * As PF_VCPU will be used in fault handler, between
2346 * guest_enter and guest_exit should be no uaccess.
2348 local_irq_disable();
2349 __kvm_guest_enter();
2350 __disable_cpu_timer_accounting(vcpu);
2351 local_irq_enable();
2352 exit_reason = sie64a(vcpu->arch.sie_block,
2353 vcpu->run->s.regs.gprs);
2354 local_irq_disable();
2355 __enable_cpu_timer_accounting(vcpu);
2356 __kvm_guest_exit();
2357 local_irq_enable();
2358 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2360 rc = vcpu_post_run(vcpu, exit_reason);
2361 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2363 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2364 return rc;
2367 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2369 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2370 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2371 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2372 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2373 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2374 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2375 /* some control register changes require a tlb flush */
2376 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2378 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2379 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2380 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2381 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2382 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2383 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2385 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2386 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2387 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2388 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2389 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2390 kvm_clear_async_pf_completion_queue(vcpu);
2392 kvm_run->kvm_dirty_regs = 0;
2395 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2397 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2398 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2399 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2400 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2401 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2402 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2403 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2404 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2405 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2406 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2407 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2408 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2411 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2413 int rc;
2414 sigset_t sigsaved;
2416 if (guestdbg_exit_pending(vcpu)) {
2417 kvm_s390_prepare_debug_exit(vcpu);
2418 return 0;
2421 if (vcpu->sigset_active)
2422 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2424 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2425 kvm_s390_vcpu_start(vcpu);
2426 } else if (is_vcpu_stopped(vcpu)) {
2427 pr_err_ratelimited("can't run stopped vcpu %d\n",
2428 vcpu->vcpu_id);
2429 return -EINVAL;
2432 sync_regs(vcpu, kvm_run);
2433 enable_cpu_timer_accounting(vcpu);
2435 might_fault();
2436 rc = __vcpu_run(vcpu);
2438 if (signal_pending(current) && !rc) {
2439 kvm_run->exit_reason = KVM_EXIT_INTR;
2440 rc = -EINTR;
2443 if (guestdbg_exit_pending(vcpu) && !rc) {
2444 kvm_s390_prepare_debug_exit(vcpu);
2445 rc = 0;
2448 if (rc == -EREMOTE) {
2449 /* userspace support is needed, kvm_run has been prepared */
2450 rc = 0;
2453 disable_cpu_timer_accounting(vcpu);
2454 store_regs(vcpu, kvm_run);
2456 if (vcpu->sigset_active)
2457 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2459 vcpu->stat.exit_userspace++;
2460 return rc;
2464 * store status at address
2465 * we use have two special cases:
2466 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2467 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2469 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2471 unsigned char archmode = 1;
2472 freg_t fprs[NUM_FPRS];
2473 unsigned int px;
2474 u64 clkcomp, cputm;
2475 int rc;
2477 px = kvm_s390_get_prefix(vcpu);
2478 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2479 if (write_guest_abs(vcpu, 163, &archmode, 1))
2480 return -EFAULT;
2481 gpa = 0;
2482 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2483 if (write_guest_real(vcpu, 163, &archmode, 1))
2484 return -EFAULT;
2485 gpa = px;
2486 } else
2487 gpa -= __LC_FPREGS_SAVE_AREA;
2489 /* manually convert vector registers if necessary */
2490 if (MACHINE_HAS_VX) {
2491 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2492 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2493 fprs, 128);
2494 } else {
2495 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2496 vcpu->run->s.regs.fprs, 128);
2498 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2499 vcpu->run->s.regs.gprs, 128);
2500 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2501 &vcpu->arch.sie_block->gpsw, 16);
2502 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2503 &px, 4);
2504 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2505 &vcpu->run->s.regs.fpc, 4);
2506 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2507 &vcpu->arch.sie_block->todpr, 4);
2508 cputm = kvm_s390_get_cpu_timer(vcpu);
2509 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2510 &cputm, 8);
2511 clkcomp = vcpu->arch.sie_block->ckc >> 8;
2512 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2513 &clkcomp, 8);
2514 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2515 &vcpu->run->s.regs.acrs, 64);
2516 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2517 &vcpu->arch.sie_block->gcr, 128);
2518 return rc ? -EFAULT : 0;
2521 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2524 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2525 * copying in vcpu load/put. Lets update our copies before we save
2526 * it into the save area
2528 save_fpu_regs();
2529 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2530 save_access_regs(vcpu->run->s.regs.acrs);
2532 return kvm_s390_store_status_unloaded(vcpu, addr);
2536 * store additional status at address
2538 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2539 unsigned long gpa)
2541 /* Only bits 0-53 are used for address formation */
2542 if (!(gpa & ~0x3ff))
2543 return 0;
2545 return write_guest_abs(vcpu, gpa & ~0x3ff,
2546 (void *)&vcpu->run->s.regs.vrs, 512);
2549 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2551 if (!test_kvm_facility(vcpu->kvm, 129))
2552 return 0;
2555 * The guest VXRS are in the host VXRs due to the lazy
2556 * copying in vcpu load/put. We can simply call save_fpu_regs()
2557 * to save the current register state because we are in the
2558 * middle of a load/put cycle.
2560 * Let's update our copies before we save it into the save area.
2562 save_fpu_regs();
2564 return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2567 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2569 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2570 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2573 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2575 unsigned int i;
2576 struct kvm_vcpu *vcpu;
2578 kvm_for_each_vcpu(i, vcpu, kvm) {
2579 __disable_ibs_on_vcpu(vcpu);
2583 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2585 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2586 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2589 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2591 int i, online_vcpus, started_vcpus = 0;
2593 if (!is_vcpu_stopped(vcpu))
2594 return;
2596 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2597 /* Only one cpu at a time may enter/leave the STOPPED state. */
2598 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2599 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2601 for (i = 0; i < online_vcpus; i++) {
2602 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2603 started_vcpus++;
2606 if (started_vcpus == 0) {
2607 /* we're the only active VCPU -> speed it up */
2608 __enable_ibs_on_vcpu(vcpu);
2609 } else if (started_vcpus == 1) {
2611 * As we are starting a second VCPU, we have to disable
2612 * the IBS facility on all VCPUs to remove potentially
2613 * oustanding ENABLE requests.
2615 __disable_ibs_on_all_vcpus(vcpu->kvm);
2618 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2620 * Another VCPU might have used IBS while we were offline.
2621 * Let's play safe and flush the VCPU at startup.
2623 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2624 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2625 return;
2628 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2630 int i, online_vcpus, started_vcpus = 0;
2631 struct kvm_vcpu *started_vcpu = NULL;
2633 if (is_vcpu_stopped(vcpu))
2634 return;
2636 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2637 /* Only one cpu at a time may enter/leave the STOPPED state. */
2638 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2639 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2641 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2642 kvm_s390_clear_stop_irq(vcpu);
2644 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2645 __disable_ibs_on_vcpu(vcpu);
2647 for (i = 0; i < online_vcpus; i++) {
2648 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2649 started_vcpus++;
2650 started_vcpu = vcpu->kvm->vcpus[i];
2654 if (started_vcpus == 1) {
2656 * As we only have one VCPU left, we want to enable the
2657 * IBS facility for that VCPU to speed it up.
2659 __enable_ibs_on_vcpu(started_vcpu);
2662 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2663 return;
2666 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2667 struct kvm_enable_cap *cap)
2669 int r;
2671 if (cap->flags)
2672 return -EINVAL;
2674 switch (cap->cap) {
2675 case KVM_CAP_S390_CSS_SUPPORT:
2676 if (!vcpu->kvm->arch.css_support) {
2677 vcpu->kvm->arch.css_support = 1;
2678 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2679 trace_kvm_s390_enable_css(vcpu->kvm);
2681 r = 0;
2682 break;
2683 default:
2684 r = -EINVAL;
2685 break;
2687 return r;
2690 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2691 struct kvm_s390_mem_op *mop)
2693 void __user *uaddr = (void __user *)mop->buf;
2694 void *tmpbuf = NULL;
2695 int r, srcu_idx;
2696 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2697 | KVM_S390_MEMOP_F_CHECK_ONLY;
2699 if (mop->flags & ~supported_flags)
2700 return -EINVAL;
2702 if (mop->size > MEM_OP_MAX_SIZE)
2703 return -E2BIG;
2705 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2706 tmpbuf = vmalloc(mop->size);
2707 if (!tmpbuf)
2708 return -ENOMEM;
2711 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2713 switch (mop->op) {
2714 case KVM_S390_MEMOP_LOGICAL_READ:
2715 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2716 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2717 mop->size, GACC_FETCH);
2718 break;
2720 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2721 if (r == 0) {
2722 if (copy_to_user(uaddr, tmpbuf, mop->size))
2723 r = -EFAULT;
2725 break;
2726 case KVM_S390_MEMOP_LOGICAL_WRITE:
2727 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2728 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2729 mop->size, GACC_STORE);
2730 break;
2732 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2733 r = -EFAULT;
2734 break;
2736 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2737 break;
2738 default:
2739 r = -EINVAL;
2742 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2744 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2745 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2747 vfree(tmpbuf);
2748 return r;
2751 long kvm_arch_vcpu_ioctl(struct file *filp,
2752 unsigned int ioctl, unsigned long arg)
2754 struct kvm_vcpu *vcpu = filp->private_data;
2755 void __user *argp = (void __user *)arg;
2756 int idx;
2757 long r;
2759 switch (ioctl) {
2760 case KVM_S390_IRQ: {
2761 struct kvm_s390_irq s390irq;
2763 r = -EFAULT;
2764 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2765 break;
2766 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2767 break;
2769 case KVM_S390_INTERRUPT: {
2770 struct kvm_s390_interrupt s390int;
2771 struct kvm_s390_irq s390irq;
2773 r = -EFAULT;
2774 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2775 break;
2776 if (s390int_to_s390irq(&s390int, &s390irq))
2777 return -EINVAL;
2778 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2779 break;
2781 case KVM_S390_STORE_STATUS:
2782 idx = srcu_read_lock(&vcpu->kvm->srcu);
2783 r = kvm_s390_vcpu_store_status(vcpu, arg);
2784 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2785 break;
2786 case KVM_S390_SET_INITIAL_PSW: {
2787 psw_t psw;
2789 r = -EFAULT;
2790 if (copy_from_user(&psw, argp, sizeof(psw)))
2791 break;
2792 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2793 break;
2795 case KVM_S390_INITIAL_RESET:
2796 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2797 break;
2798 case KVM_SET_ONE_REG:
2799 case KVM_GET_ONE_REG: {
2800 struct kvm_one_reg reg;
2801 r = -EFAULT;
2802 if (copy_from_user(&reg, argp, sizeof(reg)))
2803 break;
2804 if (ioctl == KVM_SET_ONE_REG)
2805 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
2806 else
2807 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
2808 break;
2810 #ifdef CONFIG_KVM_S390_UCONTROL
2811 case KVM_S390_UCAS_MAP: {
2812 struct kvm_s390_ucas_mapping ucasmap;
2814 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2815 r = -EFAULT;
2816 break;
2819 if (!kvm_is_ucontrol(vcpu->kvm)) {
2820 r = -EINVAL;
2821 break;
2824 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2825 ucasmap.vcpu_addr, ucasmap.length);
2826 break;
2828 case KVM_S390_UCAS_UNMAP: {
2829 struct kvm_s390_ucas_mapping ucasmap;
2831 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2832 r = -EFAULT;
2833 break;
2836 if (!kvm_is_ucontrol(vcpu->kvm)) {
2837 r = -EINVAL;
2838 break;
2841 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2842 ucasmap.length);
2843 break;
2845 #endif
2846 case KVM_S390_VCPU_FAULT: {
2847 r = gmap_fault(vcpu->arch.gmap, arg, 0);
2848 break;
2850 case KVM_ENABLE_CAP:
2852 struct kvm_enable_cap cap;
2853 r = -EFAULT;
2854 if (copy_from_user(&cap, argp, sizeof(cap)))
2855 break;
2856 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2857 break;
2859 case KVM_S390_MEM_OP: {
2860 struct kvm_s390_mem_op mem_op;
2862 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2863 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2864 else
2865 r = -EFAULT;
2866 break;
2868 case KVM_S390_SET_IRQ_STATE: {
2869 struct kvm_s390_irq_state irq_state;
2871 r = -EFAULT;
2872 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2873 break;
2874 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2875 irq_state.len == 0 ||
2876 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2877 r = -EINVAL;
2878 break;
2880 r = kvm_s390_set_irq_state(vcpu,
2881 (void __user *) irq_state.buf,
2882 irq_state.len);
2883 break;
2885 case KVM_S390_GET_IRQ_STATE: {
2886 struct kvm_s390_irq_state irq_state;
2888 r = -EFAULT;
2889 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2890 break;
2891 if (irq_state.len == 0) {
2892 r = -EINVAL;
2893 break;
2895 r = kvm_s390_get_irq_state(vcpu,
2896 (__u8 __user *) irq_state.buf,
2897 irq_state.len);
2898 break;
2900 default:
2901 r = -ENOTTY;
2903 return r;
2906 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2908 #ifdef CONFIG_KVM_S390_UCONTROL
2909 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2910 && (kvm_is_ucontrol(vcpu->kvm))) {
2911 vmf->page = virt_to_page(vcpu->arch.sie_block);
2912 get_page(vmf->page);
2913 return 0;
2915 #endif
2916 return VM_FAULT_SIGBUS;
2919 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2920 unsigned long npages)
2922 return 0;
2925 /* Section: memory related */
2926 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2927 struct kvm_memory_slot *memslot,
2928 const struct kvm_userspace_memory_region *mem,
2929 enum kvm_mr_change change)
2931 /* A few sanity checks. We can have memory slots which have to be
2932 located/ended at a segment boundary (1MB). The memory in userland is
2933 ok to be fragmented into various different vmas. It is okay to mmap()
2934 and munmap() stuff in this slot after doing this call at any time */
2936 if (mem->userspace_addr & 0xffffful)
2937 return -EINVAL;
2939 if (mem->memory_size & 0xffffful)
2940 return -EINVAL;
2942 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
2943 return -EINVAL;
2945 return 0;
2948 void kvm_arch_commit_memory_region(struct kvm *kvm,
2949 const struct kvm_userspace_memory_region *mem,
2950 const struct kvm_memory_slot *old,
2951 const struct kvm_memory_slot *new,
2952 enum kvm_mr_change change)
2954 int rc;
2956 /* If the basics of the memslot do not change, we do not want
2957 * to update the gmap. Every update causes several unnecessary
2958 * segment translation exceptions. This is usually handled just
2959 * fine by the normal fault handler + gmap, but it will also
2960 * cause faults on the prefix page of running guest CPUs.
2962 if (old->userspace_addr == mem->userspace_addr &&
2963 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2964 old->npages * PAGE_SIZE == mem->memory_size)
2965 return;
2967 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2968 mem->guest_phys_addr, mem->memory_size);
2969 if (rc)
2970 pr_warn("failed to commit memory region\n");
2971 return;
2974 static int __init kvm_s390_init(void)
2976 if (!sclp.has_sief2) {
2977 pr_info("SIE not available\n");
2978 return -ENODEV;
2981 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2984 static void __exit kvm_s390_exit(void)
2986 kvm_exit();
2989 module_init(kvm_s390_init);
2990 module_exit(kvm_s390_exit);
2993 * Enable autoloading of the kvm module.
2994 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2995 * since x86 takes a different approach.
2997 #include <linux/miscdevice.h>
2998 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2999 MODULE_ALIAS("devname:kvm");