Merge tag 'locking-urgent-2020-12-27' of git://git.kernel.org/pub/scm/linux/kernel...
[linux/fpc-iii.git] / arch / s390 / kvm / kvm-s390.c
blobdbafd057ca6a796768905ee24d4d1cbb66f14988
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * hosting IBM Z kernel virtual machines (s390x)
5 * Copyright IBM Corp. 2008, 2020
7 * Author(s): Carsten Otte <cotte@de.ibm.com>
8 * Christian Borntraeger <borntraeger@de.ibm.com>
9 * Heiko Carstens <heiko.carstens@de.ibm.com>
10 * Christian Ehrhardt <ehrhardt@de.ibm.com>
11 * Jason J. Herne <jjherne@us.ibm.com>
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 #include <linux/pgtable.h>
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
38 #include <asm/stp.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include <asm/uv.h>
48 #include "kvm-s390.h"
49 #include "gaccess.h"
51 #define CREATE_TRACE_POINTS
52 #include "trace.h"
53 #include "trace-s390.h"
55 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
56 #define LOCAL_IRQS 32
57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
58 (KVM_MAX_VCPUS + LOCAL_IRQS))
60 struct kvm_stats_debugfs_item debugfs_entries[] = {
61 VCPU_STAT("userspace_handled", exit_userspace),
62 VCPU_STAT("exit_null", exit_null),
63 VCPU_STAT("pfault_sync", pfault_sync),
64 VCPU_STAT("exit_validity", exit_validity),
65 VCPU_STAT("exit_stop_request", exit_stop_request),
66 VCPU_STAT("exit_external_request", exit_external_request),
67 VCPU_STAT("exit_io_request", exit_io_request),
68 VCPU_STAT("exit_external_interrupt", exit_external_interrupt),
69 VCPU_STAT("exit_instruction", exit_instruction),
70 VCPU_STAT("exit_pei", exit_pei),
71 VCPU_STAT("exit_program_interruption", exit_program_interruption),
72 VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program),
73 VCPU_STAT("exit_operation_exception", exit_operation_exception),
74 VCPU_STAT("halt_successful_poll", halt_successful_poll),
75 VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
76 VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
77 VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal),
78 VCPU_STAT("halt_wakeup", halt_wakeup),
79 VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
80 VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
81 VCPU_STAT("instruction_lctlg", instruction_lctlg),
82 VCPU_STAT("instruction_lctl", instruction_lctl),
83 VCPU_STAT("instruction_stctl", instruction_stctl),
84 VCPU_STAT("instruction_stctg", instruction_stctg),
85 VCPU_STAT("deliver_ckc", deliver_ckc),
86 VCPU_STAT("deliver_cputm", deliver_cputm),
87 VCPU_STAT("deliver_emergency_signal", deliver_emergency_signal),
88 VCPU_STAT("deliver_external_call", deliver_external_call),
89 VCPU_STAT("deliver_service_signal", deliver_service_signal),
90 VCPU_STAT("deliver_virtio", deliver_virtio),
91 VCPU_STAT("deliver_stop_signal", deliver_stop_signal),
92 VCPU_STAT("deliver_prefix_signal", deliver_prefix_signal),
93 VCPU_STAT("deliver_restart_signal", deliver_restart_signal),
94 VCPU_STAT("deliver_program", deliver_program),
95 VCPU_STAT("deliver_io", deliver_io),
96 VCPU_STAT("deliver_machine_check", deliver_machine_check),
97 VCPU_STAT("exit_wait_state", exit_wait_state),
98 VCPU_STAT("inject_ckc", inject_ckc),
99 VCPU_STAT("inject_cputm", inject_cputm),
100 VCPU_STAT("inject_external_call", inject_external_call),
101 VM_STAT("inject_float_mchk", inject_float_mchk),
102 VCPU_STAT("inject_emergency_signal", inject_emergency_signal),
103 VM_STAT("inject_io", inject_io),
104 VCPU_STAT("inject_mchk", inject_mchk),
105 VM_STAT("inject_pfault_done", inject_pfault_done),
106 VCPU_STAT("inject_program", inject_program),
107 VCPU_STAT("inject_restart", inject_restart),
108 VM_STAT("inject_service_signal", inject_service_signal),
109 VCPU_STAT("inject_set_prefix", inject_set_prefix),
110 VCPU_STAT("inject_stop_signal", inject_stop_signal),
111 VCPU_STAT("inject_pfault_init", inject_pfault_init),
112 VM_STAT("inject_virtio", inject_virtio),
113 VCPU_STAT("instruction_epsw", instruction_epsw),
114 VCPU_STAT("instruction_gs", instruction_gs),
115 VCPU_STAT("instruction_io_other", instruction_io_other),
116 VCPU_STAT("instruction_lpsw", instruction_lpsw),
117 VCPU_STAT("instruction_lpswe", instruction_lpswe),
118 VCPU_STAT("instruction_pfmf", instruction_pfmf),
119 VCPU_STAT("instruction_ptff", instruction_ptff),
120 VCPU_STAT("instruction_stidp", instruction_stidp),
121 VCPU_STAT("instruction_sck", instruction_sck),
122 VCPU_STAT("instruction_sckpf", instruction_sckpf),
123 VCPU_STAT("instruction_spx", instruction_spx),
124 VCPU_STAT("instruction_stpx", instruction_stpx),
125 VCPU_STAT("instruction_stap", instruction_stap),
126 VCPU_STAT("instruction_iske", instruction_iske),
127 VCPU_STAT("instruction_ri", instruction_ri),
128 VCPU_STAT("instruction_rrbe", instruction_rrbe),
129 VCPU_STAT("instruction_sske", instruction_sske),
130 VCPU_STAT("instruction_ipte_interlock", instruction_ipte_interlock),
131 VCPU_STAT("instruction_essa", instruction_essa),
132 VCPU_STAT("instruction_stsi", instruction_stsi),
133 VCPU_STAT("instruction_stfl", instruction_stfl),
134 VCPU_STAT("instruction_tb", instruction_tb),
135 VCPU_STAT("instruction_tpi", instruction_tpi),
136 VCPU_STAT("instruction_tprot", instruction_tprot),
137 VCPU_STAT("instruction_tsch", instruction_tsch),
138 VCPU_STAT("instruction_sthyi", instruction_sthyi),
139 VCPU_STAT("instruction_sie", instruction_sie),
140 VCPU_STAT("instruction_sigp_sense", instruction_sigp_sense),
141 VCPU_STAT("instruction_sigp_sense_running", instruction_sigp_sense_running),
142 VCPU_STAT("instruction_sigp_external_call", instruction_sigp_external_call),
143 VCPU_STAT("instruction_sigp_emergency", instruction_sigp_emergency),
144 VCPU_STAT("instruction_sigp_cond_emergency", instruction_sigp_cond_emergency),
145 VCPU_STAT("instruction_sigp_start", instruction_sigp_start),
146 VCPU_STAT("instruction_sigp_stop", instruction_sigp_stop),
147 VCPU_STAT("instruction_sigp_stop_store_status", instruction_sigp_stop_store_status),
148 VCPU_STAT("instruction_sigp_store_status", instruction_sigp_store_status),
149 VCPU_STAT("instruction_sigp_store_adtl_status", instruction_sigp_store_adtl_status),
150 VCPU_STAT("instruction_sigp_set_arch", instruction_sigp_arch),
151 VCPU_STAT("instruction_sigp_set_prefix", instruction_sigp_prefix),
152 VCPU_STAT("instruction_sigp_restart", instruction_sigp_restart),
153 VCPU_STAT("instruction_sigp_cpu_reset", instruction_sigp_cpu_reset),
154 VCPU_STAT("instruction_sigp_init_cpu_reset", instruction_sigp_init_cpu_reset),
155 VCPU_STAT("instruction_sigp_unknown", instruction_sigp_unknown),
156 VCPU_STAT("instruction_diag_10", diagnose_10),
157 VCPU_STAT("instruction_diag_44", diagnose_44),
158 VCPU_STAT("instruction_diag_9c", diagnose_9c),
159 VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored),
160 VCPU_STAT("instruction_diag_258", diagnose_258),
161 VCPU_STAT("instruction_diag_308", diagnose_308),
162 VCPU_STAT("instruction_diag_500", diagnose_500),
163 VCPU_STAT("instruction_diag_other", diagnose_other),
164 { NULL }
167 struct kvm_s390_tod_clock_ext {
168 __u8 epoch_idx;
169 __u64 tod;
170 __u8 reserved[7];
171 } __packed;
173 /* allow nested virtualization in KVM (if enabled by user space) */
174 static int nested;
175 module_param(nested, int, S_IRUGO);
176 MODULE_PARM_DESC(nested, "Nested virtualization support");
178 /* allow 1m huge page guest backing, if !nested */
179 static int hpage;
180 module_param(hpage, int, 0444);
181 MODULE_PARM_DESC(hpage, "1m huge page backing support");
183 /* maximum percentage of steal time for polling. >100 is treated like 100 */
184 static u8 halt_poll_max_steal = 10;
185 module_param(halt_poll_max_steal, byte, 0644);
186 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
188 /* if set to true, the GISA will be initialized and used if available */
189 static bool use_gisa = true;
190 module_param(use_gisa, bool, 0644);
191 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
194 * For now we handle at most 16 double words as this is what the s390 base
195 * kernel handles and stores in the prefix page. If we ever need to go beyond
196 * this, this requires changes to code, but the external uapi can stay.
198 #define SIZE_INTERNAL 16
201 * Base feature mask that defines default mask for facilities. Consists of the
202 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
204 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
206 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
207 * and defines the facilities that can be enabled via a cpu model.
209 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
211 static unsigned long kvm_s390_fac_size(void)
213 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
214 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
215 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
216 sizeof(S390_lowcore.stfle_fac_list));
218 return SIZE_INTERNAL;
221 /* available cpu features supported by kvm */
222 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
223 /* available subfunctions indicated via query / "test bit" */
224 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
226 static struct gmap_notifier gmap_notifier;
227 static struct gmap_notifier vsie_gmap_notifier;
228 debug_info_t *kvm_s390_dbf;
229 debug_info_t *kvm_s390_dbf_uv;
231 /* Section: not file related */
232 int kvm_arch_hardware_enable(void)
234 /* every s390 is virtualization enabled ;-) */
235 return 0;
238 int kvm_arch_check_processor_compat(void *opaque)
240 return 0;
243 /* forward declarations */
244 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
245 unsigned long end);
246 static int sca_switch_to_extended(struct kvm *kvm);
248 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
250 u8 delta_idx = 0;
253 * The TOD jumps by delta, we have to compensate this by adding
254 * -delta to the epoch.
256 delta = -delta;
258 /* sign-extension - we're adding to signed values below */
259 if ((s64)delta < 0)
260 delta_idx = -1;
262 scb->epoch += delta;
263 if (scb->ecd & ECD_MEF) {
264 scb->epdx += delta_idx;
265 if (scb->epoch < delta)
266 scb->epdx += 1;
271 * This callback is executed during stop_machine(). All CPUs are therefore
272 * temporarily stopped. In order not to change guest behavior, we have to
273 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
274 * so a CPU won't be stopped while calculating with the epoch.
276 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
277 void *v)
279 struct kvm *kvm;
280 struct kvm_vcpu *vcpu;
281 int i;
282 unsigned long long *delta = v;
284 list_for_each_entry(kvm, &vm_list, vm_list) {
285 kvm_for_each_vcpu(i, vcpu, kvm) {
286 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
287 if (i == 0) {
288 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
289 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
291 if (vcpu->arch.cputm_enabled)
292 vcpu->arch.cputm_start += *delta;
293 if (vcpu->arch.vsie_block)
294 kvm_clock_sync_scb(vcpu->arch.vsie_block,
295 *delta);
298 return NOTIFY_OK;
301 static struct notifier_block kvm_clock_notifier = {
302 .notifier_call = kvm_clock_sync,
305 int kvm_arch_hardware_setup(void *opaque)
307 gmap_notifier.notifier_call = kvm_gmap_notifier;
308 gmap_register_pte_notifier(&gmap_notifier);
309 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
310 gmap_register_pte_notifier(&vsie_gmap_notifier);
311 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
312 &kvm_clock_notifier);
313 return 0;
316 void kvm_arch_hardware_unsetup(void)
318 gmap_unregister_pte_notifier(&gmap_notifier);
319 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
320 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
321 &kvm_clock_notifier);
324 static void allow_cpu_feat(unsigned long nr)
326 set_bit_inv(nr, kvm_s390_available_cpu_feat);
329 static inline int plo_test_bit(unsigned char nr)
331 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
332 int cc;
334 asm volatile(
335 /* Parameter registers are ignored for "test bit" */
336 " plo 0,0,0,0(0)\n"
337 " ipm %0\n"
338 " srl %0,28\n"
339 : "=d" (cc)
340 : "d" (r0)
341 : "cc");
342 return cc == 0;
345 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
347 register unsigned long r0 asm("0") = 0; /* query function */
348 register unsigned long r1 asm("1") = (unsigned long) query;
350 asm volatile(
351 /* Parameter regs are ignored */
352 " .insn rrf,%[opc] << 16,2,4,6,0\n"
354 : "d" (r0), "a" (r1), [opc] "i" (opcode)
355 : "cc", "memory");
358 #define INSN_SORTL 0xb938
359 #define INSN_DFLTCC 0xb939
361 static void kvm_s390_cpu_feat_init(void)
363 int i;
365 for (i = 0; i < 256; ++i) {
366 if (plo_test_bit(i))
367 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
370 if (test_facility(28)) /* TOD-clock steering */
371 ptff(kvm_s390_available_subfunc.ptff,
372 sizeof(kvm_s390_available_subfunc.ptff),
373 PTFF_QAF);
375 if (test_facility(17)) { /* MSA */
376 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
377 kvm_s390_available_subfunc.kmac);
378 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
379 kvm_s390_available_subfunc.kmc);
380 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
381 kvm_s390_available_subfunc.km);
382 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
383 kvm_s390_available_subfunc.kimd);
384 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
385 kvm_s390_available_subfunc.klmd);
387 if (test_facility(76)) /* MSA3 */
388 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
389 kvm_s390_available_subfunc.pckmo);
390 if (test_facility(77)) { /* MSA4 */
391 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
392 kvm_s390_available_subfunc.kmctr);
393 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
394 kvm_s390_available_subfunc.kmf);
395 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
396 kvm_s390_available_subfunc.kmo);
397 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
398 kvm_s390_available_subfunc.pcc);
400 if (test_facility(57)) /* MSA5 */
401 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
402 kvm_s390_available_subfunc.ppno);
404 if (test_facility(146)) /* MSA8 */
405 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
406 kvm_s390_available_subfunc.kma);
408 if (test_facility(155)) /* MSA9 */
409 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
410 kvm_s390_available_subfunc.kdsa);
412 if (test_facility(150)) /* SORTL */
413 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
415 if (test_facility(151)) /* DFLTCC */
416 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
418 if (MACHINE_HAS_ESOP)
419 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
421 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
422 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
424 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
425 !test_facility(3) || !nested)
426 return;
427 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
428 if (sclp.has_64bscao)
429 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
430 if (sclp.has_siif)
431 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
432 if (sclp.has_gpere)
433 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
434 if (sclp.has_gsls)
435 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
436 if (sclp.has_ib)
437 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
438 if (sclp.has_cei)
439 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
440 if (sclp.has_ibs)
441 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
442 if (sclp.has_kss)
443 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
445 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
446 * all skey handling functions read/set the skey from the PGSTE
447 * instead of the real storage key.
449 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
450 * pages being detected as preserved although they are resident.
452 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
453 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
455 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
456 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
457 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
459 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
460 * cannot easily shadow the SCA because of the ipte lock.
464 int kvm_arch_init(void *opaque)
466 int rc = -ENOMEM;
468 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
469 if (!kvm_s390_dbf)
470 return -ENOMEM;
472 kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
473 if (!kvm_s390_dbf_uv)
474 goto out;
476 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
477 debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
478 goto out;
480 kvm_s390_cpu_feat_init();
482 /* Register floating interrupt controller interface. */
483 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
484 if (rc) {
485 pr_err("A FLIC registration call failed with rc=%d\n", rc);
486 goto out;
489 rc = kvm_s390_gib_init(GAL_ISC);
490 if (rc)
491 goto out;
493 return 0;
495 out:
496 kvm_arch_exit();
497 return rc;
500 void kvm_arch_exit(void)
502 kvm_s390_gib_destroy();
503 debug_unregister(kvm_s390_dbf);
504 debug_unregister(kvm_s390_dbf_uv);
507 /* Section: device related */
508 long kvm_arch_dev_ioctl(struct file *filp,
509 unsigned int ioctl, unsigned long arg)
511 if (ioctl == KVM_S390_ENABLE_SIE)
512 return s390_enable_sie();
513 return -EINVAL;
516 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
518 int r;
520 switch (ext) {
521 case KVM_CAP_S390_PSW:
522 case KVM_CAP_S390_GMAP:
523 case KVM_CAP_SYNC_MMU:
524 #ifdef CONFIG_KVM_S390_UCONTROL
525 case KVM_CAP_S390_UCONTROL:
526 #endif
527 case KVM_CAP_ASYNC_PF:
528 case KVM_CAP_SYNC_REGS:
529 case KVM_CAP_ONE_REG:
530 case KVM_CAP_ENABLE_CAP:
531 case KVM_CAP_S390_CSS_SUPPORT:
532 case KVM_CAP_IOEVENTFD:
533 case KVM_CAP_DEVICE_CTRL:
534 case KVM_CAP_S390_IRQCHIP:
535 case KVM_CAP_VM_ATTRIBUTES:
536 case KVM_CAP_MP_STATE:
537 case KVM_CAP_IMMEDIATE_EXIT:
538 case KVM_CAP_S390_INJECT_IRQ:
539 case KVM_CAP_S390_USER_SIGP:
540 case KVM_CAP_S390_USER_STSI:
541 case KVM_CAP_S390_SKEYS:
542 case KVM_CAP_S390_IRQ_STATE:
543 case KVM_CAP_S390_USER_INSTR0:
544 case KVM_CAP_S390_CMMA_MIGRATION:
545 case KVM_CAP_S390_AIS:
546 case KVM_CAP_S390_AIS_MIGRATION:
547 case KVM_CAP_S390_VCPU_RESETS:
548 case KVM_CAP_SET_GUEST_DEBUG:
549 case KVM_CAP_S390_DIAG318:
550 r = 1;
551 break;
552 case KVM_CAP_S390_HPAGE_1M:
553 r = 0;
554 if (hpage && !kvm_is_ucontrol(kvm))
555 r = 1;
556 break;
557 case KVM_CAP_S390_MEM_OP:
558 r = MEM_OP_MAX_SIZE;
559 break;
560 case KVM_CAP_NR_VCPUS:
561 case KVM_CAP_MAX_VCPUS:
562 case KVM_CAP_MAX_VCPU_ID:
563 r = KVM_S390_BSCA_CPU_SLOTS;
564 if (!kvm_s390_use_sca_entries())
565 r = KVM_MAX_VCPUS;
566 else if (sclp.has_esca && sclp.has_64bscao)
567 r = KVM_S390_ESCA_CPU_SLOTS;
568 break;
569 case KVM_CAP_S390_COW:
570 r = MACHINE_HAS_ESOP;
571 break;
572 case KVM_CAP_S390_VECTOR_REGISTERS:
573 r = MACHINE_HAS_VX;
574 break;
575 case KVM_CAP_S390_RI:
576 r = test_facility(64);
577 break;
578 case KVM_CAP_S390_GS:
579 r = test_facility(133);
580 break;
581 case KVM_CAP_S390_BPB:
582 r = test_facility(82);
583 break;
584 case KVM_CAP_S390_PROTECTED:
585 r = is_prot_virt_host();
586 break;
587 default:
588 r = 0;
590 return r;
593 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
595 int i;
596 gfn_t cur_gfn, last_gfn;
597 unsigned long gaddr, vmaddr;
598 struct gmap *gmap = kvm->arch.gmap;
599 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
601 /* Loop over all guest segments */
602 cur_gfn = memslot->base_gfn;
603 last_gfn = memslot->base_gfn + memslot->npages;
604 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
605 gaddr = gfn_to_gpa(cur_gfn);
606 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
607 if (kvm_is_error_hva(vmaddr))
608 continue;
610 bitmap_zero(bitmap, _PAGE_ENTRIES);
611 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
612 for (i = 0; i < _PAGE_ENTRIES; i++) {
613 if (test_bit(i, bitmap))
614 mark_page_dirty(kvm, cur_gfn + i);
617 if (fatal_signal_pending(current))
618 return;
619 cond_resched();
623 /* Section: vm related */
624 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
627 * Get (and clear) the dirty memory log for a memory slot.
629 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
630 struct kvm_dirty_log *log)
632 int r;
633 unsigned long n;
634 struct kvm_memory_slot *memslot;
635 int is_dirty;
637 if (kvm_is_ucontrol(kvm))
638 return -EINVAL;
640 mutex_lock(&kvm->slots_lock);
642 r = -EINVAL;
643 if (log->slot >= KVM_USER_MEM_SLOTS)
644 goto out;
646 r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
647 if (r)
648 goto out;
650 /* Clear the dirty log */
651 if (is_dirty) {
652 n = kvm_dirty_bitmap_bytes(memslot);
653 memset(memslot->dirty_bitmap, 0, n);
655 r = 0;
656 out:
657 mutex_unlock(&kvm->slots_lock);
658 return r;
661 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
663 unsigned int i;
664 struct kvm_vcpu *vcpu;
666 kvm_for_each_vcpu(i, vcpu, kvm) {
667 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
671 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
673 int r;
675 if (cap->flags)
676 return -EINVAL;
678 switch (cap->cap) {
679 case KVM_CAP_S390_IRQCHIP:
680 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
681 kvm->arch.use_irqchip = 1;
682 r = 0;
683 break;
684 case KVM_CAP_S390_USER_SIGP:
685 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
686 kvm->arch.user_sigp = 1;
687 r = 0;
688 break;
689 case KVM_CAP_S390_VECTOR_REGISTERS:
690 mutex_lock(&kvm->lock);
691 if (kvm->created_vcpus) {
692 r = -EBUSY;
693 } else if (MACHINE_HAS_VX) {
694 set_kvm_facility(kvm->arch.model.fac_mask, 129);
695 set_kvm_facility(kvm->arch.model.fac_list, 129);
696 if (test_facility(134)) {
697 set_kvm_facility(kvm->arch.model.fac_mask, 134);
698 set_kvm_facility(kvm->arch.model.fac_list, 134);
700 if (test_facility(135)) {
701 set_kvm_facility(kvm->arch.model.fac_mask, 135);
702 set_kvm_facility(kvm->arch.model.fac_list, 135);
704 if (test_facility(148)) {
705 set_kvm_facility(kvm->arch.model.fac_mask, 148);
706 set_kvm_facility(kvm->arch.model.fac_list, 148);
708 if (test_facility(152)) {
709 set_kvm_facility(kvm->arch.model.fac_mask, 152);
710 set_kvm_facility(kvm->arch.model.fac_list, 152);
712 r = 0;
713 } else
714 r = -EINVAL;
715 mutex_unlock(&kvm->lock);
716 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
717 r ? "(not available)" : "(success)");
718 break;
719 case KVM_CAP_S390_RI:
720 r = -EINVAL;
721 mutex_lock(&kvm->lock);
722 if (kvm->created_vcpus) {
723 r = -EBUSY;
724 } else if (test_facility(64)) {
725 set_kvm_facility(kvm->arch.model.fac_mask, 64);
726 set_kvm_facility(kvm->arch.model.fac_list, 64);
727 r = 0;
729 mutex_unlock(&kvm->lock);
730 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
731 r ? "(not available)" : "(success)");
732 break;
733 case KVM_CAP_S390_AIS:
734 mutex_lock(&kvm->lock);
735 if (kvm->created_vcpus) {
736 r = -EBUSY;
737 } else {
738 set_kvm_facility(kvm->arch.model.fac_mask, 72);
739 set_kvm_facility(kvm->arch.model.fac_list, 72);
740 r = 0;
742 mutex_unlock(&kvm->lock);
743 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
744 r ? "(not available)" : "(success)");
745 break;
746 case KVM_CAP_S390_GS:
747 r = -EINVAL;
748 mutex_lock(&kvm->lock);
749 if (kvm->created_vcpus) {
750 r = -EBUSY;
751 } else if (test_facility(133)) {
752 set_kvm_facility(kvm->arch.model.fac_mask, 133);
753 set_kvm_facility(kvm->arch.model.fac_list, 133);
754 r = 0;
756 mutex_unlock(&kvm->lock);
757 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
758 r ? "(not available)" : "(success)");
759 break;
760 case KVM_CAP_S390_HPAGE_1M:
761 mutex_lock(&kvm->lock);
762 if (kvm->created_vcpus)
763 r = -EBUSY;
764 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
765 r = -EINVAL;
766 else {
767 r = 0;
768 mmap_write_lock(kvm->mm);
769 kvm->mm->context.allow_gmap_hpage_1m = 1;
770 mmap_write_unlock(kvm->mm);
772 * We might have to create fake 4k page
773 * tables. To avoid that the hardware works on
774 * stale PGSTEs, we emulate these instructions.
776 kvm->arch.use_skf = 0;
777 kvm->arch.use_pfmfi = 0;
779 mutex_unlock(&kvm->lock);
780 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
781 r ? "(not available)" : "(success)");
782 break;
783 case KVM_CAP_S390_USER_STSI:
784 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
785 kvm->arch.user_stsi = 1;
786 r = 0;
787 break;
788 case KVM_CAP_S390_USER_INSTR0:
789 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
790 kvm->arch.user_instr0 = 1;
791 icpt_operexc_on_all_vcpus(kvm);
792 r = 0;
793 break;
794 default:
795 r = -EINVAL;
796 break;
798 return r;
801 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
803 int ret;
805 switch (attr->attr) {
806 case KVM_S390_VM_MEM_LIMIT_SIZE:
807 ret = 0;
808 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
809 kvm->arch.mem_limit);
810 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
811 ret = -EFAULT;
812 break;
813 default:
814 ret = -ENXIO;
815 break;
817 return ret;
820 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
822 int ret;
823 unsigned int idx;
824 switch (attr->attr) {
825 case KVM_S390_VM_MEM_ENABLE_CMMA:
826 ret = -ENXIO;
827 if (!sclp.has_cmma)
828 break;
830 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
831 mutex_lock(&kvm->lock);
832 if (kvm->created_vcpus)
833 ret = -EBUSY;
834 else if (kvm->mm->context.allow_gmap_hpage_1m)
835 ret = -EINVAL;
836 else {
837 kvm->arch.use_cmma = 1;
838 /* Not compatible with cmma. */
839 kvm->arch.use_pfmfi = 0;
840 ret = 0;
842 mutex_unlock(&kvm->lock);
843 break;
844 case KVM_S390_VM_MEM_CLR_CMMA:
845 ret = -ENXIO;
846 if (!sclp.has_cmma)
847 break;
848 ret = -EINVAL;
849 if (!kvm->arch.use_cmma)
850 break;
852 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
853 mutex_lock(&kvm->lock);
854 idx = srcu_read_lock(&kvm->srcu);
855 s390_reset_cmma(kvm->arch.gmap->mm);
856 srcu_read_unlock(&kvm->srcu, idx);
857 mutex_unlock(&kvm->lock);
858 ret = 0;
859 break;
860 case KVM_S390_VM_MEM_LIMIT_SIZE: {
861 unsigned long new_limit;
863 if (kvm_is_ucontrol(kvm))
864 return -EINVAL;
866 if (get_user(new_limit, (u64 __user *)attr->addr))
867 return -EFAULT;
869 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
870 new_limit > kvm->arch.mem_limit)
871 return -E2BIG;
873 if (!new_limit)
874 return -EINVAL;
876 /* gmap_create takes last usable address */
877 if (new_limit != KVM_S390_NO_MEM_LIMIT)
878 new_limit -= 1;
880 ret = -EBUSY;
881 mutex_lock(&kvm->lock);
882 if (!kvm->created_vcpus) {
883 /* gmap_create will round the limit up */
884 struct gmap *new = gmap_create(current->mm, new_limit);
886 if (!new) {
887 ret = -ENOMEM;
888 } else {
889 gmap_remove(kvm->arch.gmap);
890 new->private = kvm;
891 kvm->arch.gmap = new;
892 ret = 0;
895 mutex_unlock(&kvm->lock);
896 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
897 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
898 (void *) kvm->arch.gmap->asce);
899 break;
901 default:
902 ret = -ENXIO;
903 break;
905 return ret;
908 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
910 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
912 struct kvm_vcpu *vcpu;
913 int i;
915 kvm_s390_vcpu_block_all(kvm);
917 kvm_for_each_vcpu(i, vcpu, kvm) {
918 kvm_s390_vcpu_crypto_setup(vcpu);
919 /* recreate the shadow crycb by leaving the VSIE handler */
920 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
923 kvm_s390_vcpu_unblock_all(kvm);
926 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
928 mutex_lock(&kvm->lock);
929 switch (attr->attr) {
930 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
931 if (!test_kvm_facility(kvm, 76)) {
932 mutex_unlock(&kvm->lock);
933 return -EINVAL;
935 get_random_bytes(
936 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
937 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
938 kvm->arch.crypto.aes_kw = 1;
939 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
940 break;
941 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
942 if (!test_kvm_facility(kvm, 76)) {
943 mutex_unlock(&kvm->lock);
944 return -EINVAL;
946 get_random_bytes(
947 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
948 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
949 kvm->arch.crypto.dea_kw = 1;
950 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
951 break;
952 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
953 if (!test_kvm_facility(kvm, 76)) {
954 mutex_unlock(&kvm->lock);
955 return -EINVAL;
957 kvm->arch.crypto.aes_kw = 0;
958 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
959 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
960 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
961 break;
962 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
963 if (!test_kvm_facility(kvm, 76)) {
964 mutex_unlock(&kvm->lock);
965 return -EINVAL;
967 kvm->arch.crypto.dea_kw = 0;
968 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
969 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
970 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
971 break;
972 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
973 if (!ap_instructions_available()) {
974 mutex_unlock(&kvm->lock);
975 return -EOPNOTSUPP;
977 kvm->arch.crypto.apie = 1;
978 break;
979 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
980 if (!ap_instructions_available()) {
981 mutex_unlock(&kvm->lock);
982 return -EOPNOTSUPP;
984 kvm->arch.crypto.apie = 0;
985 break;
986 default:
987 mutex_unlock(&kvm->lock);
988 return -ENXIO;
991 kvm_s390_vcpu_crypto_reset_all(kvm);
992 mutex_unlock(&kvm->lock);
993 return 0;
996 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
998 int cx;
999 struct kvm_vcpu *vcpu;
1001 kvm_for_each_vcpu(cx, vcpu, kvm)
1002 kvm_s390_sync_request(req, vcpu);
1006 * Must be called with kvm->srcu held to avoid races on memslots, and with
1007 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1009 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1011 struct kvm_memory_slot *ms;
1012 struct kvm_memslots *slots;
1013 unsigned long ram_pages = 0;
1014 int slotnr;
1016 /* migration mode already enabled */
1017 if (kvm->arch.migration_mode)
1018 return 0;
1019 slots = kvm_memslots(kvm);
1020 if (!slots || !slots->used_slots)
1021 return -EINVAL;
1023 if (!kvm->arch.use_cmma) {
1024 kvm->arch.migration_mode = 1;
1025 return 0;
1027 /* mark all the pages in active slots as dirty */
1028 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1029 ms = slots->memslots + slotnr;
1030 if (!ms->dirty_bitmap)
1031 return -EINVAL;
1033 * The second half of the bitmap is only used on x86,
1034 * and would be wasted otherwise, so we put it to good
1035 * use here to keep track of the state of the storage
1036 * attributes.
1038 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1039 ram_pages += ms->npages;
1041 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1042 kvm->arch.migration_mode = 1;
1043 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1044 return 0;
1048 * Must be called with kvm->slots_lock to avoid races with ourselves and
1049 * kvm_s390_vm_start_migration.
1051 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1053 /* migration mode already disabled */
1054 if (!kvm->arch.migration_mode)
1055 return 0;
1056 kvm->arch.migration_mode = 0;
1057 if (kvm->arch.use_cmma)
1058 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1059 return 0;
1062 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1063 struct kvm_device_attr *attr)
1065 int res = -ENXIO;
1067 mutex_lock(&kvm->slots_lock);
1068 switch (attr->attr) {
1069 case KVM_S390_VM_MIGRATION_START:
1070 res = kvm_s390_vm_start_migration(kvm);
1071 break;
1072 case KVM_S390_VM_MIGRATION_STOP:
1073 res = kvm_s390_vm_stop_migration(kvm);
1074 break;
1075 default:
1076 break;
1078 mutex_unlock(&kvm->slots_lock);
1080 return res;
1083 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1084 struct kvm_device_attr *attr)
1086 u64 mig = kvm->arch.migration_mode;
1088 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1089 return -ENXIO;
1091 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1092 return -EFAULT;
1093 return 0;
1096 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1098 struct kvm_s390_vm_tod_clock gtod;
1100 if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1101 return -EFAULT;
1103 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1104 return -EINVAL;
1105 kvm_s390_set_tod_clock(kvm, &gtod);
1107 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1108 gtod.epoch_idx, gtod.tod);
1110 return 0;
1113 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1115 u8 gtod_high;
1117 if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1118 sizeof(gtod_high)))
1119 return -EFAULT;
1121 if (gtod_high != 0)
1122 return -EINVAL;
1123 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1125 return 0;
1128 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1130 struct kvm_s390_vm_tod_clock gtod = { 0 };
1132 if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1133 sizeof(gtod.tod)))
1134 return -EFAULT;
1136 kvm_s390_set_tod_clock(kvm, &gtod);
1137 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1138 return 0;
1141 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1143 int ret;
1145 if (attr->flags)
1146 return -EINVAL;
1148 switch (attr->attr) {
1149 case KVM_S390_VM_TOD_EXT:
1150 ret = kvm_s390_set_tod_ext(kvm, attr);
1151 break;
1152 case KVM_S390_VM_TOD_HIGH:
1153 ret = kvm_s390_set_tod_high(kvm, attr);
1154 break;
1155 case KVM_S390_VM_TOD_LOW:
1156 ret = kvm_s390_set_tod_low(kvm, attr);
1157 break;
1158 default:
1159 ret = -ENXIO;
1160 break;
1162 return ret;
1165 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1166 struct kvm_s390_vm_tod_clock *gtod)
1168 struct kvm_s390_tod_clock_ext htod;
1170 preempt_disable();
1172 get_tod_clock_ext((char *)&htod);
1174 gtod->tod = htod.tod + kvm->arch.epoch;
1175 gtod->epoch_idx = 0;
1176 if (test_kvm_facility(kvm, 139)) {
1177 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1178 if (gtod->tod < htod.tod)
1179 gtod->epoch_idx += 1;
1182 preempt_enable();
1185 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1187 struct kvm_s390_vm_tod_clock gtod;
1189 memset(&gtod, 0, sizeof(gtod));
1190 kvm_s390_get_tod_clock(kvm, &gtod);
1191 if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1192 return -EFAULT;
1194 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1195 gtod.epoch_idx, gtod.tod);
1196 return 0;
1199 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1201 u8 gtod_high = 0;
1203 if (copy_to_user((void __user *)attr->addr, &gtod_high,
1204 sizeof(gtod_high)))
1205 return -EFAULT;
1206 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1208 return 0;
1211 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1213 u64 gtod;
1215 gtod = kvm_s390_get_tod_clock_fast(kvm);
1216 if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1217 return -EFAULT;
1218 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1220 return 0;
1223 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1225 int ret;
1227 if (attr->flags)
1228 return -EINVAL;
1230 switch (attr->attr) {
1231 case KVM_S390_VM_TOD_EXT:
1232 ret = kvm_s390_get_tod_ext(kvm, attr);
1233 break;
1234 case KVM_S390_VM_TOD_HIGH:
1235 ret = kvm_s390_get_tod_high(kvm, attr);
1236 break;
1237 case KVM_S390_VM_TOD_LOW:
1238 ret = kvm_s390_get_tod_low(kvm, attr);
1239 break;
1240 default:
1241 ret = -ENXIO;
1242 break;
1244 return ret;
1247 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1249 struct kvm_s390_vm_cpu_processor *proc;
1250 u16 lowest_ibc, unblocked_ibc;
1251 int ret = 0;
1253 mutex_lock(&kvm->lock);
1254 if (kvm->created_vcpus) {
1255 ret = -EBUSY;
1256 goto out;
1258 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1259 if (!proc) {
1260 ret = -ENOMEM;
1261 goto out;
1263 if (!copy_from_user(proc, (void __user *)attr->addr,
1264 sizeof(*proc))) {
1265 kvm->arch.model.cpuid = proc->cpuid;
1266 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1267 unblocked_ibc = sclp.ibc & 0xfff;
1268 if (lowest_ibc && proc->ibc) {
1269 if (proc->ibc > unblocked_ibc)
1270 kvm->arch.model.ibc = unblocked_ibc;
1271 else if (proc->ibc < lowest_ibc)
1272 kvm->arch.model.ibc = lowest_ibc;
1273 else
1274 kvm->arch.model.ibc = proc->ibc;
1276 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1277 S390_ARCH_FAC_LIST_SIZE_BYTE);
1278 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1279 kvm->arch.model.ibc,
1280 kvm->arch.model.cpuid);
1281 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1282 kvm->arch.model.fac_list[0],
1283 kvm->arch.model.fac_list[1],
1284 kvm->arch.model.fac_list[2]);
1285 } else
1286 ret = -EFAULT;
1287 kfree(proc);
1288 out:
1289 mutex_unlock(&kvm->lock);
1290 return ret;
1293 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1294 struct kvm_device_attr *attr)
1296 struct kvm_s390_vm_cpu_feat data;
1298 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1299 return -EFAULT;
1300 if (!bitmap_subset((unsigned long *) data.feat,
1301 kvm_s390_available_cpu_feat,
1302 KVM_S390_VM_CPU_FEAT_NR_BITS))
1303 return -EINVAL;
1305 mutex_lock(&kvm->lock);
1306 if (kvm->created_vcpus) {
1307 mutex_unlock(&kvm->lock);
1308 return -EBUSY;
1310 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1311 KVM_S390_VM_CPU_FEAT_NR_BITS);
1312 mutex_unlock(&kvm->lock);
1313 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1314 data.feat[0],
1315 data.feat[1],
1316 data.feat[2]);
1317 return 0;
1320 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1321 struct kvm_device_attr *attr)
1323 mutex_lock(&kvm->lock);
1324 if (kvm->created_vcpus) {
1325 mutex_unlock(&kvm->lock);
1326 return -EBUSY;
1329 if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1330 sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1331 mutex_unlock(&kvm->lock);
1332 return -EFAULT;
1334 mutex_unlock(&kvm->lock);
1336 VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1337 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1338 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1339 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1340 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1341 VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1342 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1343 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1344 VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1345 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1346 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1347 VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1348 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1349 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1350 VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx",
1351 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1352 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1353 VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1354 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1355 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1356 VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1357 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1358 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1359 VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1360 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1361 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1362 VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1363 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1364 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1365 VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1366 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1367 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1368 VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1369 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1370 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1371 VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1372 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1373 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1374 VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1375 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1376 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1377 VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1378 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1379 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1380 VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1381 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1382 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1383 VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1384 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1385 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1386 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1387 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1388 VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1389 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1390 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1391 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1392 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1394 return 0;
1397 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1399 int ret = -ENXIO;
1401 switch (attr->attr) {
1402 case KVM_S390_VM_CPU_PROCESSOR:
1403 ret = kvm_s390_set_processor(kvm, attr);
1404 break;
1405 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1406 ret = kvm_s390_set_processor_feat(kvm, attr);
1407 break;
1408 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1409 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1410 break;
1412 return ret;
1415 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1417 struct kvm_s390_vm_cpu_processor *proc;
1418 int ret = 0;
1420 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1421 if (!proc) {
1422 ret = -ENOMEM;
1423 goto out;
1425 proc->cpuid = kvm->arch.model.cpuid;
1426 proc->ibc = kvm->arch.model.ibc;
1427 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1428 S390_ARCH_FAC_LIST_SIZE_BYTE);
1429 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1430 kvm->arch.model.ibc,
1431 kvm->arch.model.cpuid);
1432 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1433 kvm->arch.model.fac_list[0],
1434 kvm->arch.model.fac_list[1],
1435 kvm->arch.model.fac_list[2]);
1436 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1437 ret = -EFAULT;
1438 kfree(proc);
1439 out:
1440 return ret;
1443 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1445 struct kvm_s390_vm_cpu_machine *mach;
1446 int ret = 0;
1448 mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1449 if (!mach) {
1450 ret = -ENOMEM;
1451 goto out;
1453 get_cpu_id((struct cpuid *) &mach->cpuid);
1454 mach->ibc = sclp.ibc;
1455 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1456 S390_ARCH_FAC_LIST_SIZE_BYTE);
1457 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1458 sizeof(S390_lowcore.stfle_fac_list));
1459 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1460 kvm->arch.model.ibc,
1461 kvm->arch.model.cpuid);
1462 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1463 mach->fac_mask[0],
1464 mach->fac_mask[1],
1465 mach->fac_mask[2]);
1466 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1467 mach->fac_list[0],
1468 mach->fac_list[1],
1469 mach->fac_list[2]);
1470 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1471 ret = -EFAULT;
1472 kfree(mach);
1473 out:
1474 return ret;
1477 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1478 struct kvm_device_attr *attr)
1480 struct kvm_s390_vm_cpu_feat data;
1482 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1483 KVM_S390_VM_CPU_FEAT_NR_BITS);
1484 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1485 return -EFAULT;
1486 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1487 data.feat[0],
1488 data.feat[1],
1489 data.feat[2]);
1490 return 0;
1493 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1494 struct kvm_device_attr *attr)
1496 struct kvm_s390_vm_cpu_feat data;
1498 bitmap_copy((unsigned long *) data.feat,
1499 kvm_s390_available_cpu_feat,
1500 KVM_S390_VM_CPU_FEAT_NR_BITS);
1501 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1502 return -EFAULT;
1503 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1504 data.feat[0],
1505 data.feat[1],
1506 data.feat[2]);
1507 return 0;
1510 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1511 struct kvm_device_attr *attr)
1513 if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1514 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1515 return -EFAULT;
1517 VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1518 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1519 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1520 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1521 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1522 VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1523 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1524 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1525 VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1526 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1527 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1528 VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1529 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1530 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1531 VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx",
1532 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1533 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1534 VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1535 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1536 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1537 VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1538 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1539 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1540 VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1541 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1542 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1543 VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1544 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1545 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1546 VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1547 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1548 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1549 VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1550 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1551 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1552 VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1553 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1554 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1555 VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1556 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1557 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1558 VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1559 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1560 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1561 VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1562 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1563 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1564 VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1565 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1566 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1567 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1568 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1569 VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1570 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1571 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1572 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1573 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1575 return 0;
1578 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1579 struct kvm_device_attr *attr)
1581 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1582 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1583 return -EFAULT;
1585 VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1586 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1587 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1588 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1589 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1590 VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx",
1591 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1592 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1593 VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx",
1594 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1595 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1596 VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx",
1597 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1598 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1599 VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx",
1600 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1601 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1602 VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx",
1603 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1604 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1605 VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx",
1606 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1607 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1608 VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx",
1609 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1610 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1611 VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx",
1612 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1613 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1614 VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx",
1615 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1616 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1617 VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx",
1618 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1619 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1620 VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx",
1621 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1622 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1623 VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx",
1624 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1625 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1626 VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx",
1627 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1628 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1629 VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx",
1630 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1631 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1632 VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1633 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1634 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1635 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1636 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1637 VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1638 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1639 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1640 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1641 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1643 return 0;
1646 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1648 int ret = -ENXIO;
1650 switch (attr->attr) {
1651 case KVM_S390_VM_CPU_PROCESSOR:
1652 ret = kvm_s390_get_processor(kvm, attr);
1653 break;
1654 case KVM_S390_VM_CPU_MACHINE:
1655 ret = kvm_s390_get_machine(kvm, attr);
1656 break;
1657 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1658 ret = kvm_s390_get_processor_feat(kvm, attr);
1659 break;
1660 case KVM_S390_VM_CPU_MACHINE_FEAT:
1661 ret = kvm_s390_get_machine_feat(kvm, attr);
1662 break;
1663 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1664 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1665 break;
1666 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1667 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1668 break;
1670 return ret;
1673 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1675 int ret;
1677 switch (attr->group) {
1678 case KVM_S390_VM_MEM_CTRL:
1679 ret = kvm_s390_set_mem_control(kvm, attr);
1680 break;
1681 case KVM_S390_VM_TOD:
1682 ret = kvm_s390_set_tod(kvm, attr);
1683 break;
1684 case KVM_S390_VM_CPU_MODEL:
1685 ret = kvm_s390_set_cpu_model(kvm, attr);
1686 break;
1687 case KVM_S390_VM_CRYPTO:
1688 ret = kvm_s390_vm_set_crypto(kvm, attr);
1689 break;
1690 case KVM_S390_VM_MIGRATION:
1691 ret = kvm_s390_vm_set_migration(kvm, attr);
1692 break;
1693 default:
1694 ret = -ENXIO;
1695 break;
1698 return ret;
1701 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1703 int ret;
1705 switch (attr->group) {
1706 case KVM_S390_VM_MEM_CTRL:
1707 ret = kvm_s390_get_mem_control(kvm, attr);
1708 break;
1709 case KVM_S390_VM_TOD:
1710 ret = kvm_s390_get_tod(kvm, attr);
1711 break;
1712 case KVM_S390_VM_CPU_MODEL:
1713 ret = kvm_s390_get_cpu_model(kvm, attr);
1714 break;
1715 case KVM_S390_VM_MIGRATION:
1716 ret = kvm_s390_vm_get_migration(kvm, attr);
1717 break;
1718 default:
1719 ret = -ENXIO;
1720 break;
1723 return ret;
1726 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1728 int ret;
1730 switch (attr->group) {
1731 case KVM_S390_VM_MEM_CTRL:
1732 switch (attr->attr) {
1733 case KVM_S390_VM_MEM_ENABLE_CMMA:
1734 case KVM_S390_VM_MEM_CLR_CMMA:
1735 ret = sclp.has_cmma ? 0 : -ENXIO;
1736 break;
1737 case KVM_S390_VM_MEM_LIMIT_SIZE:
1738 ret = 0;
1739 break;
1740 default:
1741 ret = -ENXIO;
1742 break;
1744 break;
1745 case KVM_S390_VM_TOD:
1746 switch (attr->attr) {
1747 case KVM_S390_VM_TOD_LOW:
1748 case KVM_S390_VM_TOD_HIGH:
1749 ret = 0;
1750 break;
1751 default:
1752 ret = -ENXIO;
1753 break;
1755 break;
1756 case KVM_S390_VM_CPU_MODEL:
1757 switch (attr->attr) {
1758 case KVM_S390_VM_CPU_PROCESSOR:
1759 case KVM_S390_VM_CPU_MACHINE:
1760 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1761 case KVM_S390_VM_CPU_MACHINE_FEAT:
1762 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1763 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1764 ret = 0;
1765 break;
1766 default:
1767 ret = -ENXIO;
1768 break;
1770 break;
1771 case KVM_S390_VM_CRYPTO:
1772 switch (attr->attr) {
1773 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1774 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1775 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1776 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1777 ret = 0;
1778 break;
1779 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1780 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1781 ret = ap_instructions_available() ? 0 : -ENXIO;
1782 break;
1783 default:
1784 ret = -ENXIO;
1785 break;
1787 break;
1788 case KVM_S390_VM_MIGRATION:
1789 ret = 0;
1790 break;
1791 default:
1792 ret = -ENXIO;
1793 break;
1796 return ret;
1799 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1801 uint8_t *keys;
1802 uint64_t hva;
1803 int srcu_idx, i, r = 0;
1805 if (args->flags != 0)
1806 return -EINVAL;
1808 /* Is this guest using storage keys? */
1809 if (!mm_uses_skeys(current->mm))
1810 return KVM_S390_GET_SKEYS_NONE;
1812 /* Enforce sane limit on memory allocation */
1813 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1814 return -EINVAL;
1816 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1817 if (!keys)
1818 return -ENOMEM;
1820 mmap_read_lock(current->mm);
1821 srcu_idx = srcu_read_lock(&kvm->srcu);
1822 for (i = 0; i < args->count; i++) {
1823 hva = gfn_to_hva(kvm, args->start_gfn + i);
1824 if (kvm_is_error_hva(hva)) {
1825 r = -EFAULT;
1826 break;
1829 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1830 if (r)
1831 break;
1833 srcu_read_unlock(&kvm->srcu, srcu_idx);
1834 mmap_read_unlock(current->mm);
1836 if (!r) {
1837 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1838 sizeof(uint8_t) * args->count);
1839 if (r)
1840 r = -EFAULT;
1843 kvfree(keys);
1844 return r;
1847 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1849 uint8_t *keys;
1850 uint64_t hva;
1851 int srcu_idx, i, r = 0;
1852 bool unlocked;
1854 if (args->flags != 0)
1855 return -EINVAL;
1857 /* Enforce sane limit on memory allocation */
1858 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1859 return -EINVAL;
1861 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1862 if (!keys)
1863 return -ENOMEM;
1865 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1866 sizeof(uint8_t) * args->count);
1867 if (r) {
1868 r = -EFAULT;
1869 goto out;
1872 /* Enable storage key handling for the guest */
1873 r = s390_enable_skey();
1874 if (r)
1875 goto out;
1877 i = 0;
1878 mmap_read_lock(current->mm);
1879 srcu_idx = srcu_read_lock(&kvm->srcu);
1880 while (i < args->count) {
1881 unlocked = false;
1882 hva = gfn_to_hva(kvm, args->start_gfn + i);
1883 if (kvm_is_error_hva(hva)) {
1884 r = -EFAULT;
1885 break;
1888 /* Lowest order bit is reserved */
1889 if (keys[i] & 0x01) {
1890 r = -EINVAL;
1891 break;
1894 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1895 if (r) {
1896 r = fixup_user_fault(current->mm, hva,
1897 FAULT_FLAG_WRITE, &unlocked);
1898 if (r)
1899 break;
1901 if (!r)
1902 i++;
1904 srcu_read_unlock(&kvm->srcu, srcu_idx);
1905 mmap_read_unlock(current->mm);
1906 out:
1907 kvfree(keys);
1908 return r;
1912 * Base address and length must be sent at the start of each block, therefore
1913 * it's cheaper to send some clean data, as long as it's less than the size of
1914 * two longs.
1916 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1917 /* for consistency */
1918 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1921 * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1922 * address falls in a hole. In that case the index of one of the memslots
1923 * bordering the hole is returned.
1925 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1927 int start = 0, end = slots->used_slots;
1928 int slot = atomic_read(&slots->lru_slot);
1929 struct kvm_memory_slot *memslots = slots->memslots;
1931 if (gfn >= memslots[slot].base_gfn &&
1932 gfn < memslots[slot].base_gfn + memslots[slot].npages)
1933 return slot;
1935 while (start < end) {
1936 slot = start + (end - start) / 2;
1938 if (gfn >= memslots[slot].base_gfn)
1939 end = slot;
1940 else
1941 start = slot + 1;
1944 if (start >= slots->used_slots)
1945 return slots->used_slots - 1;
1947 if (gfn >= memslots[start].base_gfn &&
1948 gfn < memslots[start].base_gfn + memslots[start].npages) {
1949 atomic_set(&slots->lru_slot, start);
1952 return start;
1955 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1956 u8 *res, unsigned long bufsize)
1958 unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1960 args->count = 0;
1961 while (args->count < bufsize) {
1962 hva = gfn_to_hva(kvm, cur_gfn);
1964 * We return an error if the first value was invalid, but we
1965 * return successfully if at least one value was copied.
1967 if (kvm_is_error_hva(hva))
1968 return args->count ? 0 : -EFAULT;
1969 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1970 pgstev = 0;
1971 res[args->count++] = (pgstev >> 24) & 0x43;
1972 cur_gfn++;
1975 return 0;
1978 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1979 unsigned long cur_gfn)
1981 int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1982 struct kvm_memory_slot *ms = slots->memslots + slotidx;
1983 unsigned long ofs = cur_gfn - ms->base_gfn;
1985 if (ms->base_gfn + ms->npages <= cur_gfn) {
1986 slotidx--;
1987 /* If we are above the highest slot, wrap around */
1988 if (slotidx < 0)
1989 slotidx = slots->used_slots - 1;
1991 ms = slots->memslots + slotidx;
1992 ofs = 0;
1994 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1995 while ((slotidx > 0) && (ofs >= ms->npages)) {
1996 slotidx--;
1997 ms = slots->memslots + slotidx;
1998 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
2000 return ms->base_gfn + ofs;
2003 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2004 u8 *res, unsigned long bufsize)
2006 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2007 struct kvm_memslots *slots = kvm_memslots(kvm);
2008 struct kvm_memory_slot *ms;
2010 if (unlikely(!slots->used_slots))
2011 return 0;
2013 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2014 ms = gfn_to_memslot(kvm, cur_gfn);
2015 args->count = 0;
2016 args->start_gfn = cur_gfn;
2017 if (!ms)
2018 return 0;
2019 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2020 mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2022 while (args->count < bufsize) {
2023 hva = gfn_to_hva(kvm, cur_gfn);
2024 if (kvm_is_error_hva(hva))
2025 return 0;
2026 /* Decrement only if we actually flipped the bit to 0 */
2027 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2028 atomic64_dec(&kvm->arch.cmma_dirty_pages);
2029 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2030 pgstev = 0;
2031 /* Save the value */
2032 res[args->count++] = (pgstev >> 24) & 0x43;
2033 /* If the next bit is too far away, stop. */
2034 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2035 return 0;
2036 /* If we reached the previous "next", find the next one */
2037 if (cur_gfn == next_gfn)
2038 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2039 /* Reached the end of memory or of the buffer, stop */
2040 if ((next_gfn >= mem_end) ||
2041 (next_gfn - args->start_gfn >= bufsize))
2042 return 0;
2043 cur_gfn++;
2044 /* Reached the end of the current memslot, take the next one. */
2045 if (cur_gfn - ms->base_gfn >= ms->npages) {
2046 ms = gfn_to_memslot(kvm, cur_gfn);
2047 if (!ms)
2048 return 0;
2051 return 0;
2055 * This function searches for the next page with dirty CMMA attributes, and
2056 * saves the attributes in the buffer up to either the end of the buffer or
2057 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2058 * no trailing clean bytes are saved.
2059 * In case no dirty bits were found, or if CMMA was not enabled or used, the
2060 * output buffer will indicate 0 as length.
2062 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2063 struct kvm_s390_cmma_log *args)
2065 unsigned long bufsize;
2066 int srcu_idx, peek, ret;
2067 u8 *values;
2069 if (!kvm->arch.use_cmma)
2070 return -ENXIO;
2071 /* Invalid/unsupported flags were specified */
2072 if (args->flags & ~KVM_S390_CMMA_PEEK)
2073 return -EINVAL;
2074 /* Migration mode query, and we are not doing a migration */
2075 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2076 if (!peek && !kvm->arch.migration_mode)
2077 return -EINVAL;
2078 /* CMMA is disabled or was not used, or the buffer has length zero */
2079 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2080 if (!bufsize || !kvm->mm->context.uses_cmm) {
2081 memset(args, 0, sizeof(*args));
2082 return 0;
2084 /* We are not peeking, and there are no dirty pages */
2085 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2086 memset(args, 0, sizeof(*args));
2087 return 0;
2090 values = vmalloc(bufsize);
2091 if (!values)
2092 return -ENOMEM;
2094 mmap_read_lock(kvm->mm);
2095 srcu_idx = srcu_read_lock(&kvm->srcu);
2096 if (peek)
2097 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2098 else
2099 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2100 srcu_read_unlock(&kvm->srcu, srcu_idx);
2101 mmap_read_unlock(kvm->mm);
2103 if (kvm->arch.migration_mode)
2104 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2105 else
2106 args->remaining = 0;
2108 if (copy_to_user((void __user *)args->values, values, args->count))
2109 ret = -EFAULT;
2111 vfree(values);
2112 return ret;
2116 * This function sets the CMMA attributes for the given pages. If the input
2117 * buffer has zero length, no action is taken, otherwise the attributes are
2118 * set and the mm->context.uses_cmm flag is set.
2120 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2121 const struct kvm_s390_cmma_log *args)
2123 unsigned long hva, mask, pgstev, i;
2124 uint8_t *bits;
2125 int srcu_idx, r = 0;
2127 mask = args->mask;
2129 if (!kvm->arch.use_cmma)
2130 return -ENXIO;
2131 /* invalid/unsupported flags */
2132 if (args->flags != 0)
2133 return -EINVAL;
2134 /* Enforce sane limit on memory allocation */
2135 if (args->count > KVM_S390_CMMA_SIZE_MAX)
2136 return -EINVAL;
2137 /* Nothing to do */
2138 if (args->count == 0)
2139 return 0;
2141 bits = vmalloc(array_size(sizeof(*bits), args->count));
2142 if (!bits)
2143 return -ENOMEM;
2145 r = copy_from_user(bits, (void __user *)args->values, args->count);
2146 if (r) {
2147 r = -EFAULT;
2148 goto out;
2151 mmap_read_lock(kvm->mm);
2152 srcu_idx = srcu_read_lock(&kvm->srcu);
2153 for (i = 0; i < args->count; i++) {
2154 hva = gfn_to_hva(kvm, args->start_gfn + i);
2155 if (kvm_is_error_hva(hva)) {
2156 r = -EFAULT;
2157 break;
2160 pgstev = bits[i];
2161 pgstev = pgstev << 24;
2162 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2163 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2165 srcu_read_unlock(&kvm->srcu, srcu_idx);
2166 mmap_read_unlock(kvm->mm);
2168 if (!kvm->mm->context.uses_cmm) {
2169 mmap_write_lock(kvm->mm);
2170 kvm->mm->context.uses_cmm = 1;
2171 mmap_write_unlock(kvm->mm);
2173 out:
2174 vfree(bits);
2175 return r;
2178 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2180 struct kvm_vcpu *vcpu;
2181 u16 rc, rrc;
2182 int ret = 0;
2183 int i;
2186 * We ignore failures and try to destroy as many CPUs as possible.
2187 * At the same time we must not free the assigned resources when
2188 * this fails, as the ultravisor has still access to that memory.
2189 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2190 * behind.
2191 * We want to return the first failure rc and rrc, though.
2193 kvm_for_each_vcpu(i, vcpu, kvm) {
2194 mutex_lock(&vcpu->mutex);
2195 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2196 *rcp = rc;
2197 *rrcp = rrc;
2198 ret = -EIO;
2200 mutex_unlock(&vcpu->mutex);
2202 return ret;
2205 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2207 int i, r = 0;
2208 u16 dummy;
2210 struct kvm_vcpu *vcpu;
2212 kvm_for_each_vcpu(i, vcpu, kvm) {
2213 mutex_lock(&vcpu->mutex);
2214 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2215 mutex_unlock(&vcpu->mutex);
2216 if (r)
2217 break;
2219 if (r)
2220 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2221 return r;
2224 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2226 int r = 0;
2227 u16 dummy;
2228 void __user *argp = (void __user *)cmd->data;
2230 switch (cmd->cmd) {
2231 case KVM_PV_ENABLE: {
2232 r = -EINVAL;
2233 if (kvm_s390_pv_is_protected(kvm))
2234 break;
2237 * FMT 4 SIE needs esca. As we never switch back to bsca from
2238 * esca, we need no cleanup in the error cases below
2240 r = sca_switch_to_extended(kvm);
2241 if (r)
2242 break;
2244 mmap_write_lock(current->mm);
2245 r = gmap_mark_unmergeable();
2246 mmap_write_unlock(current->mm);
2247 if (r)
2248 break;
2250 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2251 if (r)
2252 break;
2254 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2255 if (r)
2256 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2258 /* we need to block service interrupts from now on */
2259 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2260 break;
2262 case KVM_PV_DISABLE: {
2263 r = -EINVAL;
2264 if (!kvm_s390_pv_is_protected(kvm))
2265 break;
2267 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2269 * If a CPU could not be destroyed, destroy VM will also fail.
2270 * There is no point in trying to destroy it. Instead return
2271 * the rc and rrc from the first CPU that failed destroying.
2273 if (r)
2274 break;
2275 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2277 /* no need to block service interrupts any more */
2278 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2279 break;
2281 case KVM_PV_SET_SEC_PARMS: {
2282 struct kvm_s390_pv_sec_parm parms = {};
2283 void *hdr;
2285 r = -EINVAL;
2286 if (!kvm_s390_pv_is_protected(kvm))
2287 break;
2289 r = -EFAULT;
2290 if (copy_from_user(&parms, argp, sizeof(parms)))
2291 break;
2293 /* Currently restricted to 8KB */
2294 r = -EINVAL;
2295 if (parms.length > PAGE_SIZE * 2)
2296 break;
2298 r = -ENOMEM;
2299 hdr = vmalloc(parms.length);
2300 if (!hdr)
2301 break;
2303 r = -EFAULT;
2304 if (!copy_from_user(hdr, (void __user *)parms.origin,
2305 parms.length))
2306 r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2307 &cmd->rc, &cmd->rrc);
2309 vfree(hdr);
2310 break;
2312 case KVM_PV_UNPACK: {
2313 struct kvm_s390_pv_unp unp = {};
2315 r = -EINVAL;
2316 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2317 break;
2319 r = -EFAULT;
2320 if (copy_from_user(&unp, argp, sizeof(unp)))
2321 break;
2323 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2324 &cmd->rc, &cmd->rrc);
2325 break;
2327 case KVM_PV_VERIFY: {
2328 r = -EINVAL;
2329 if (!kvm_s390_pv_is_protected(kvm))
2330 break;
2332 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2333 UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2334 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2335 cmd->rrc);
2336 break;
2338 case KVM_PV_PREP_RESET: {
2339 r = -EINVAL;
2340 if (!kvm_s390_pv_is_protected(kvm))
2341 break;
2343 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2344 UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2345 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2346 cmd->rc, cmd->rrc);
2347 break;
2349 case KVM_PV_UNSHARE_ALL: {
2350 r = -EINVAL;
2351 if (!kvm_s390_pv_is_protected(kvm))
2352 break;
2354 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2355 UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2356 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2357 cmd->rc, cmd->rrc);
2358 break;
2360 default:
2361 r = -ENOTTY;
2363 return r;
2366 long kvm_arch_vm_ioctl(struct file *filp,
2367 unsigned int ioctl, unsigned long arg)
2369 struct kvm *kvm = filp->private_data;
2370 void __user *argp = (void __user *)arg;
2371 struct kvm_device_attr attr;
2372 int r;
2374 switch (ioctl) {
2375 case KVM_S390_INTERRUPT: {
2376 struct kvm_s390_interrupt s390int;
2378 r = -EFAULT;
2379 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2380 break;
2381 r = kvm_s390_inject_vm(kvm, &s390int);
2382 break;
2384 case KVM_CREATE_IRQCHIP: {
2385 struct kvm_irq_routing_entry routing;
2387 r = -EINVAL;
2388 if (kvm->arch.use_irqchip) {
2389 /* Set up dummy routing. */
2390 memset(&routing, 0, sizeof(routing));
2391 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2393 break;
2395 case KVM_SET_DEVICE_ATTR: {
2396 r = -EFAULT;
2397 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2398 break;
2399 r = kvm_s390_vm_set_attr(kvm, &attr);
2400 break;
2402 case KVM_GET_DEVICE_ATTR: {
2403 r = -EFAULT;
2404 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2405 break;
2406 r = kvm_s390_vm_get_attr(kvm, &attr);
2407 break;
2409 case KVM_HAS_DEVICE_ATTR: {
2410 r = -EFAULT;
2411 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2412 break;
2413 r = kvm_s390_vm_has_attr(kvm, &attr);
2414 break;
2416 case KVM_S390_GET_SKEYS: {
2417 struct kvm_s390_skeys args;
2419 r = -EFAULT;
2420 if (copy_from_user(&args, argp,
2421 sizeof(struct kvm_s390_skeys)))
2422 break;
2423 r = kvm_s390_get_skeys(kvm, &args);
2424 break;
2426 case KVM_S390_SET_SKEYS: {
2427 struct kvm_s390_skeys args;
2429 r = -EFAULT;
2430 if (copy_from_user(&args, argp,
2431 sizeof(struct kvm_s390_skeys)))
2432 break;
2433 r = kvm_s390_set_skeys(kvm, &args);
2434 break;
2436 case KVM_S390_GET_CMMA_BITS: {
2437 struct kvm_s390_cmma_log args;
2439 r = -EFAULT;
2440 if (copy_from_user(&args, argp, sizeof(args)))
2441 break;
2442 mutex_lock(&kvm->slots_lock);
2443 r = kvm_s390_get_cmma_bits(kvm, &args);
2444 mutex_unlock(&kvm->slots_lock);
2445 if (!r) {
2446 r = copy_to_user(argp, &args, sizeof(args));
2447 if (r)
2448 r = -EFAULT;
2450 break;
2452 case KVM_S390_SET_CMMA_BITS: {
2453 struct kvm_s390_cmma_log args;
2455 r = -EFAULT;
2456 if (copy_from_user(&args, argp, sizeof(args)))
2457 break;
2458 mutex_lock(&kvm->slots_lock);
2459 r = kvm_s390_set_cmma_bits(kvm, &args);
2460 mutex_unlock(&kvm->slots_lock);
2461 break;
2463 case KVM_S390_PV_COMMAND: {
2464 struct kvm_pv_cmd args;
2466 /* protvirt means user sigp */
2467 kvm->arch.user_cpu_state_ctrl = 1;
2468 r = 0;
2469 if (!is_prot_virt_host()) {
2470 r = -EINVAL;
2471 break;
2473 if (copy_from_user(&args, argp, sizeof(args))) {
2474 r = -EFAULT;
2475 break;
2477 if (args.flags) {
2478 r = -EINVAL;
2479 break;
2481 mutex_lock(&kvm->lock);
2482 r = kvm_s390_handle_pv(kvm, &args);
2483 mutex_unlock(&kvm->lock);
2484 if (copy_to_user(argp, &args, sizeof(args))) {
2485 r = -EFAULT;
2486 break;
2488 break;
2490 default:
2491 r = -ENOTTY;
2494 return r;
2497 static int kvm_s390_apxa_installed(void)
2499 struct ap_config_info info;
2501 if (ap_instructions_available()) {
2502 if (ap_qci(&info) == 0)
2503 return info.apxa;
2506 return 0;
2510 * The format of the crypto control block (CRYCB) is specified in the 3 low
2511 * order bits of the CRYCB designation (CRYCBD) field as follows:
2512 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2513 * AP extended addressing (APXA) facility are installed.
2514 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2515 * Format 2: Both the APXA and MSAX3 facilities are installed
2517 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2519 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2521 /* Clear the CRYCB format bits - i.e., set format 0 by default */
2522 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2524 /* Check whether MSAX3 is installed */
2525 if (!test_kvm_facility(kvm, 76))
2526 return;
2528 if (kvm_s390_apxa_installed())
2529 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2530 else
2531 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2534 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2535 unsigned long *aqm, unsigned long *adm)
2537 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2539 mutex_lock(&kvm->lock);
2540 kvm_s390_vcpu_block_all(kvm);
2542 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2543 case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2544 memcpy(crycb->apcb1.apm, apm, 32);
2545 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2546 apm[0], apm[1], apm[2], apm[3]);
2547 memcpy(crycb->apcb1.aqm, aqm, 32);
2548 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2549 aqm[0], aqm[1], aqm[2], aqm[3]);
2550 memcpy(crycb->apcb1.adm, adm, 32);
2551 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2552 adm[0], adm[1], adm[2], adm[3]);
2553 break;
2554 case CRYCB_FORMAT1:
2555 case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2556 memcpy(crycb->apcb0.apm, apm, 8);
2557 memcpy(crycb->apcb0.aqm, aqm, 2);
2558 memcpy(crycb->apcb0.adm, adm, 2);
2559 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2560 apm[0], *((unsigned short *)aqm),
2561 *((unsigned short *)adm));
2562 break;
2563 default: /* Can not happen */
2564 break;
2567 /* recreate the shadow crycb for each vcpu */
2568 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2569 kvm_s390_vcpu_unblock_all(kvm);
2570 mutex_unlock(&kvm->lock);
2572 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2574 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2576 mutex_lock(&kvm->lock);
2577 kvm_s390_vcpu_block_all(kvm);
2579 memset(&kvm->arch.crypto.crycb->apcb0, 0,
2580 sizeof(kvm->arch.crypto.crycb->apcb0));
2581 memset(&kvm->arch.crypto.crycb->apcb1, 0,
2582 sizeof(kvm->arch.crypto.crycb->apcb1));
2584 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2585 /* recreate the shadow crycb for each vcpu */
2586 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2587 kvm_s390_vcpu_unblock_all(kvm);
2588 mutex_unlock(&kvm->lock);
2590 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2592 static u64 kvm_s390_get_initial_cpuid(void)
2594 struct cpuid cpuid;
2596 get_cpu_id(&cpuid);
2597 cpuid.version = 0xff;
2598 return *((u64 *) &cpuid);
2601 static void kvm_s390_crypto_init(struct kvm *kvm)
2603 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2604 kvm_s390_set_crycb_format(kvm);
2606 if (!test_kvm_facility(kvm, 76))
2607 return;
2609 /* Enable AES/DEA protected key functions by default */
2610 kvm->arch.crypto.aes_kw = 1;
2611 kvm->arch.crypto.dea_kw = 1;
2612 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2613 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2614 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2615 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2618 static void sca_dispose(struct kvm *kvm)
2620 if (kvm->arch.use_esca)
2621 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2622 else
2623 free_page((unsigned long)(kvm->arch.sca));
2624 kvm->arch.sca = NULL;
2627 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2629 gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2630 int i, rc;
2631 char debug_name[16];
2632 static unsigned long sca_offset;
2634 rc = -EINVAL;
2635 #ifdef CONFIG_KVM_S390_UCONTROL
2636 if (type & ~KVM_VM_S390_UCONTROL)
2637 goto out_err;
2638 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2639 goto out_err;
2640 #else
2641 if (type)
2642 goto out_err;
2643 #endif
2645 rc = s390_enable_sie();
2646 if (rc)
2647 goto out_err;
2649 rc = -ENOMEM;
2651 if (!sclp.has_64bscao)
2652 alloc_flags |= GFP_DMA;
2653 rwlock_init(&kvm->arch.sca_lock);
2654 /* start with basic SCA */
2655 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2656 if (!kvm->arch.sca)
2657 goto out_err;
2658 mutex_lock(&kvm_lock);
2659 sca_offset += 16;
2660 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2661 sca_offset = 0;
2662 kvm->arch.sca = (struct bsca_block *)
2663 ((char *) kvm->arch.sca + sca_offset);
2664 mutex_unlock(&kvm_lock);
2666 sprintf(debug_name, "kvm-%u", current->pid);
2668 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2669 if (!kvm->arch.dbf)
2670 goto out_err;
2672 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2673 kvm->arch.sie_page2 =
2674 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2675 if (!kvm->arch.sie_page2)
2676 goto out_err;
2678 kvm->arch.sie_page2->kvm = kvm;
2679 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2681 for (i = 0; i < kvm_s390_fac_size(); i++) {
2682 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2683 (kvm_s390_fac_base[i] |
2684 kvm_s390_fac_ext[i]);
2685 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2686 kvm_s390_fac_base[i];
2688 kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2690 /* we are always in czam mode - even on pre z14 machines */
2691 set_kvm_facility(kvm->arch.model.fac_mask, 138);
2692 set_kvm_facility(kvm->arch.model.fac_list, 138);
2693 /* we emulate STHYI in kvm */
2694 set_kvm_facility(kvm->arch.model.fac_mask, 74);
2695 set_kvm_facility(kvm->arch.model.fac_list, 74);
2696 if (MACHINE_HAS_TLB_GUEST) {
2697 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2698 set_kvm_facility(kvm->arch.model.fac_list, 147);
2701 if (css_general_characteristics.aiv && test_facility(65))
2702 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2704 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2705 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2707 kvm_s390_crypto_init(kvm);
2709 mutex_init(&kvm->arch.float_int.ais_lock);
2710 spin_lock_init(&kvm->arch.float_int.lock);
2711 for (i = 0; i < FIRQ_LIST_COUNT; i++)
2712 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2713 init_waitqueue_head(&kvm->arch.ipte_wq);
2714 mutex_init(&kvm->arch.ipte_mutex);
2716 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2717 VM_EVENT(kvm, 3, "vm created with type %lu", type);
2719 if (type & KVM_VM_S390_UCONTROL) {
2720 kvm->arch.gmap = NULL;
2721 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2722 } else {
2723 if (sclp.hamax == U64_MAX)
2724 kvm->arch.mem_limit = TASK_SIZE_MAX;
2725 else
2726 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2727 sclp.hamax + 1);
2728 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2729 if (!kvm->arch.gmap)
2730 goto out_err;
2731 kvm->arch.gmap->private = kvm;
2732 kvm->arch.gmap->pfault_enabled = 0;
2735 kvm->arch.use_pfmfi = sclp.has_pfmfi;
2736 kvm->arch.use_skf = sclp.has_skey;
2737 spin_lock_init(&kvm->arch.start_stop_lock);
2738 kvm_s390_vsie_init(kvm);
2739 if (use_gisa)
2740 kvm_s390_gisa_init(kvm);
2741 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2743 return 0;
2744 out_err:
2745 free_page((unsigned long)kvm->arch.sie_page2);
2746 debug_unregister(kvm->arch.dbf);
2747 sca_dispose(kvm);
2748 KVM_EVENT(3, "creation of vm failed: %d", rc);
2749 return rc;
2752 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2754 u16 rc, rrc;
2756 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2757 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2758 kvm_s390_clear_local_irqs(vcpu);
2759 kvm_clear_async_pf_completion_queue(vcpu);
2760 if (!kvm_is_ucontrol(vcpu->kvm))
2761 sca_del_vcpu(vcpu);
2763 if (kvm_is_ucontrol(vcpu->kvm))
2764 gmap_remove(vcpu->arch.gmap);
2766 if (vcpu->kvm->arch.use_cmma)
2767 kvm_s390_vcpu_unsetup_cmma(vcpu);
2768 /* We can not hold the vcpu mutex here, we are already dying */
2769 if (kvm_s390_pv_cpu_get_handle(vcpu))
2770 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2771 free_page((unsigned long)(vcpu->arch.sie_block));
2774 static void kvm_free_vcpus(struct kvm *kvm)
2776 unsigned int i;
2777 struct kvm_vcpu *vcpu;
2779 kvm_for_each_vcpu(i, vcpu, kvm)
2780 kvm_vcpu_destroy(vcpu);
2782 mutex_lock(&kvm->lock);
2783 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2784 kvm->vcpus[i] = NULL;
2786 atomic_set(&kvm->online_vcpus, 0);
2787 mutex_unlock(&kvm->lock);
2790 void kvm_arch_destroy_vm(struct kvm *kvm)
2792 u16 rc, rrc;
2794 kvm_free_vcpus(kvm);
2795 sca_dispose(kvm);
2796 kvm_s390_gisa_destroy(kvm);
2798 * We are already at the end of life and kvm->lock is not taken.
2799 * This is ok as the file descriptor is closed by now and nobody
2800 * can mess with the pv state. To avoid lockdep_assert_held from
2801 * complaining we do not use kvm_s390_pv_is_protected.
2803 if (kvm_s390_pv_get_handle(kvm))
2804 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2805 debug_unregister(kvm->arch.dbf);
2806 free_page((unsigned long)kvm->arch.sie_page2);
2807 if (!kvm_is_ucontrol(kvm))
2808 gmap_remove(kvm->arch.gmap);
2809 kvm_s390_destroy_adapters(kvm);
2810 kvm_s390_clear_float_irqs(kvm);
2811 kvm_s390_vsie_destroy(kvm);
2812 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2815 /* Section: vcpu related */
2816 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2818 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2819 if (!vcpu->arch.gmap)
2820 return -ENOMEM;
2821 vcpu->arch.gmap->private = vcpu->kvm;
2823 return 0;
2826 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2828 if (!kvm_s390_use_sca_entries())
2829 return;
2830 read_lock(&vcpu->kvm->arch.sca_lock);
2831 if (vcpu->kvm->arch.use_esca) {
2832 struct esca_block *sca = vcpu->kvm->arch.sca;
2834 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2835 sca->cpu[vcpu->vcpu_id].sda = 0;
2836 } else {
2837 struct bsca_block *sca = vcpu->kvm->arch.sca;
2839 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2840 sca->cpu[vcpu->vcpu_id].sda = 0;
2842 read_unlock(&vcpu->kvm->arch.sca_lock);
2845 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2847 if (!kvm_s390_use_sca_entries()) {
2848 struct bsca_block *sca = vcpu->kvm->arch.sca;
2850 /* we still need the basic sca for the ipte control */
2851 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2852 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2853 return;
2855 read_lock(&vcpu->kvm->arch.sca_lock);
2856 if (vcpu->kvm->arch.use_esca) {
2857 struct esca_block *sca = vcpu->kvm->arch.sca;
2859 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2860 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2861 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2862 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2863 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2864 } else {
2865 struct bsca_block *sca = vcpu->kvm->arch.sca;
2867 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2868 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2869 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2870 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2872 read_unlock(&vcpu->kvm->arch.sca_lock);
2875 /* Basic SCA to Extended SCA data copy routines */
2876 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2878 d->sda = s->sda;
2879 d->sigp_ctrl.c = s->sigp_ctrl.c;
2880 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2883 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2885 int i;
2887 d->ipte_control = s->ipte_control;
2888 d->mcn[0] = s->mcn;
2889 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2890 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2893 static int sca_switch_to_extended(struct kvm *kvm)
2895 struct bsca_block *old_sca = kvm->arch.sca;
2896 struct esca_block *new_sca;
2897 struct kvm_vcpu *vcpu;
2898 unsigned int vcpu_idx;
2899 u32 scaol, scaoh;
2901 if (kvm->arch.use_esca)
2902 return 0;
2904 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
2905 if (!new_sca)
2906 return -ENOMEM;
2908 scaoh = (u32)((u64)(new_sca) >> 32);
2909 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2911 kvm_s390_vcpu_block_all(kvm);
2912 write_lock(&kvm->arch.sca_lock);
2914 sca_copy_b_to_e(new_sca, old_sca);
2916 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2917 vcpu->arch.sie_block->scaoh = scaoh;
2918 vcpu->arch.sie_block->scaol = scaol;
2919 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2921 kvm->arch.sca = new_sca;
2922 kvm->arch.use_esca = 1;
2924 write_unlock(&kvm->arch.sca_lock);
2925 kvm_s390_vcpu_unblock_all(kvm);
2927 free_page((unsigned long)old_sca);
2929 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2930 old_sca, kvm->arch.sca);
2931 return 0;
2934 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2936 int rc;
2938 if (!kvm_s390_use_sca_entries()) {
2939 if (id < KVM_MAX_VCPUS)
2940 return true;
2941 return false;
2943 if (id < KVM_S390_BSCA_CPU_SLOTS)
2944 return true;
2945 if (!sclp.has_esca || !sclp.has_64bscao)
2946 return false;
2948 mutex_lock(&kvm->lock);
2949 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2950 mutex_unlock(&kvm->lock);
2952 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2955 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2956 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2958 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2959 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2960 vcpu->arch.cputm_start = get_tod_clock_fast();
2961 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2964 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2965 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2967 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2968 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2969 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2970 vcpu->arch.cputm_start = 0;
2971 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2974 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2975 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2977 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2978 vcpu->arch.cputm_enabled = true;
2979 __start_cpu_timer_accounting(vcpu);
2982 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2983 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2985 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2986 __stop_cpu_timer_accounting(vcpu);
2987 vcpu->arch.cputm_enabled = false;
2990 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2992 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2993 __enable_cpu_timer_accounting(vcpu);
2994 preempt_enable();
2997 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2999 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3000 __disable_cpu_timer_accounting(vcpu);
3001 preempt_enable();
3004 /* set the cpu timer - may only be called from the VCPU thread itself */
3005 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3007 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3008 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3009 if (vcpu->arch.cputm_enabled)
3010 vcpu->arch.cputm_start = get_tod_clock_fast();
3011 vcpu->arch.sie_block->cputm = cputm;
3012 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3013 preempt_enable();
3016 /* update and get the cpu timer - can also be called from other VCPU threads */
3017 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3019 unsigned int seq;
3020 __u64 value;
3022 if (unlikely(!vcpu->arch.cputm_enabled))
3023 return vcpu->arch.sie_block->cputm;
3025 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3026 do {
3027 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3029 * If the writer would ever execute a read in the critical
3030 * section, e.g. in irq context, we have a deadlock.
3032 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3033 value = vcpu->arch.sie_block->cputm;
3034 /* if cputm_start is 0, accounting is being started/stopped */
3035 if (likely(vcpu->arch.cputm_start))
3036 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3037 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3038 preempt_enable();
3039 return value;
3042 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3045 gmap_enable(vcpu->arch.enabled_gmap);
3046 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3047 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3048 __start_cpu_timer_accounting(vcpu);
3049 vcpu->cpu = cpu;
3052 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3054 vcpu->cpu = -1;
3055 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3056 __stop_cpu_timer_accounting(vcpu);
3057 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3058 vcpu->arch.enabled_gmap = gmap_get_enabled();
3059 gmap_disable(vcpu->arch.enabled_gmap);
3063 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3065 mutex_lock(&vcpu->kvm->lock);
3066 preempt_disable();
3067 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3068 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3069 preempt_enable();
3070 mutex_unlock(&vcpu->kvm->lock);
3071 if (!kvm_is_ucontrol(vcpu->kvm)) {
3072 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3073 sca_add_vcpu(vcpu);
3075 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3076 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3077 /* make vcpu_load load the right gmap on the first trigger */
3078 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3081 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3083 if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3084 test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3085 return true;
3086 return false;
3089 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3091 /* At least one ECC subfunction must be present */
3092 return kvm_has_pckmo_subfunc(kvm, 32) ||
3093 kvm_has_pckmo_subfunc(kvm, 33) ||
3094 kvm_has_pckmo_subfunc(kvm, 34) ||
3095 kvm_has_pckmo_subfunc(kvm, 40) ||
3096 kvm_has_pckmo_subfunc(kvm, 41);
3100 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3103 * If the AP instructions are not being interpreted and the MSAX3
3104 * facility is not configured for the guest, there is nothing to set up.
3106 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3107 return;
3109 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3110 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3111 vcpu->arch.sie_block->eca &= ~ECA_APIE;
3112 vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3114 if (vcpu->kvm->arch.crypto.apie)
3115 vcpu->arch.sie_block->eca |= ECA_APIE;
3117 /* Set up protected key support */
3118 if (vcpu->kvm->arch.crypto.aes_kw) {
3119 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3120 /* ecc is also wrapped with AES key */
3121 if (kvm_has_pckmo_ecc(vcpu->kvm))
3122 vcpu->arch.sie_block->ecd |= ECD_ECC;
3125 if (vcpu->kvm->arch.crypto.dea_kw)
3126 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3129 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3131 free_page(vcpu->arch.sie_block->cbrlo);
3132 vcpu->arch.sie_block->cbrlo = 0;
3135 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3137 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3138 if (!vcpu->arch.sie_block->cbrlo)
3139 return -ENOMEM;
3140 return 0;
3143 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3145 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3147 vcpu->arch.sie_block->ibc = model->ibc;
3148 if (test_kvm_facility(vcpu->kvm, 7))
3149 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3152 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3154 int rc = 0;
3155 u16 uvrc, uvrrc;
3157 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3158 CPUSTAT_SM |
3159 CPUSTAT_STOPPED);
3161 if (test_kvm_facility(vcpu->kvm, 78))
3162 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3163 else if (test_kvm_facility(vcpu->kvm, 8))
3164 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3166 kvm_s390_vcpu_setup_model(vcpu);
3168 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3169 if (MACHINE_HAS_ESOP)
3170 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3171 if (test_kvm_facility(vcpu->kvm, 9))
3172 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3173 if (test_kvm_facility(vcpu->kvm, 73))
3174 vcpu->arch.sie_block->ecb |= ECB_TE;
3176 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3177 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3178 if (test_kvm_facility(vcpu->kvm, 130))
3179 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3180 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3181 if (sclp.has_cei)
3182 vcpu->arch.sie_block->eca |= ECA_CEI;
3183 if (sclp.has_ib)
3184 vcpu->arch.sie_block->eca |= ECA_IB;
3185 if (sclp.has_siif)
3186 vcpu->arch.sie_block->eca |= ECA_SII;
3187 if (sclp.has_sigpif)
3188 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3189 if (test_kvm_facility(vcpu->kvm, 129)) {
3190 vcpu->arch.sie_block->eca |= ECA_VX;
3191 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3193 if (test_kvm_facility(vcpu->kvm, 139))
3194 vcpu->arch.sie_block->ecd |= ECD_MEF;
3195 if (test_kvm_facility(vcpu->kvm, 156))
3196 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3197 if (vcpu->arch.sie_block->gd) {
3198 vcpu->arch.sie_block->eca |= ECA_AIV;
3199 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3200 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3202 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3203 | SDNXC;
3204 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3206 if (sclp.has_kss)
3207 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3208 else
3209 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3211 if (vcpu->kvm->arch.use_cmma) {
3212 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3213 if (rc)
3214 return rc;
3216 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3217 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3219 vcpu->arch.sie_block->hpid = HPID_KVM;
3221 kvm_s390_vcpu_crypto_setup(vcpu);
3223 mutex_lock(&vcpu->kvm->lock);
3224 if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3225 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3226 if (rc)
3227 kvm_s390_vcpu_unsetup_cmma(vcpu);
3229 mutex_unlock(&vcpu->kvm->lock);
3231 return rc;
3234 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3236 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3237 return -EINVAL;
3238 return 0;
3241 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3243 struct sie_page *sie_page;
3244 int rc;
3246 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3247 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3248 if (!sie_page)
3249 return -ENOMEM;
3251 vcpu->arch.sie_block = &sie_page->sie_block;
3252 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3254 /* the real guest size will always be smaller than msl */
3255 vcpu->arch.sie_block->mso = 0;
3256 vcpu->arch.sie_block->msl = sclp.hamax;
3258 vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3259 spin_lock_init(&vcpu->arch.local_int.lock);
3260 vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3261 if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3262 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3263 seqcount_init(&vcpu->arch.cputm_seqcount);
3265 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3266 kvm_clear_async_pf_completion_queue(vcpu);
3267 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3268 KVM_SYNC_GPRS |
3269 KVM_SYNC_ACRS |
3270 KVM_SYNC_CRS |
3271 KVM_SYNC_ARCH0 |
3272 KVM_SYNC_PFAULT |
3273 KVM_SYNC_DIAG318;
3274 kvm_s390_set_prefix(vcpu, 0);
3275 if (test_kvm_facility(vcpu->kvm, 64))
3276 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3277 if (test_kvm_facility(vcpu->kvm, 82))
3278 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3279 if (test_kvm_facility(vcpu->kvm, 133))
3280 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3281 if (test_kvm_facility(vcpu->kvm, 156))
3282 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3283 /* fprs can be synchronized via vrs, even if the guest has no vx. With
3284 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3286 if (MACHINE_HAS_VX)
3287 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3288 else
3289 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3291 if (kvm_is_ucontrol(vcpu->kvm)) {
3292 rc = __kvm_ucontrol_vcpu_init(vcpu);
3293 if (rc)
3294 goto out_free_sie_block;
3297 VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3298 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3299 trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3301 rc = kvm_s390_vcpu_setup(vcpu);
3302 if (rc)
3303 goto out_ucontrol_uninit;
3304 return 0;
3306 out_ucontrol_uninit:
3307 if (kvm_is_ucontrol(vcpu->kvm))
3308 gmap_remove(vcpu->arch.gmap);
3309 out_free_sie_block:
3310 free_page((unsigned long)(vcpu->arch.sie_block));
3311 return rc;
3314 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3316 return kvm_s390_vcpu_has_irq(vcpu, 0);
3319 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3321 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3324 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3326 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3327 exit_sie(vcpu);
3330 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3332 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3335 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3337 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3338 exit_sie(vcpu);
3341 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3343 return atomic_read(&vcpu->arch.sie_block->prog20) &
3344 (PROG_BLOCK_SIE | PROG_REQUEST);
3347 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3349 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3353 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3354 * If the CPU is not running (e.g. waiting as idle) the function will
3355 * return immediately. */
3356 void exit_sie(struct kvm_vcpu *vcpu)
3358 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3359 kvm_s390_vsie_kick(vcpu);
3360 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3361 cpu_relax();
3364 /* Kick a guest cpu out of SIE to process a request synchronously */
3365 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3367 kvm_make_request(req, vcpu);
3368 kvm_s390_vcpu_request(vcpu);
3371 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3372 unsigned long end)
3374 struct kvm *kvm = gmap->private;
3375 struct kvm_vcpu *vcpu;
3376 unsigned long prefix;
3377 int i;
3379 if (gmap_is_shadow(gmap))
3380 return;
3381 if (start >= 1UL << 31)
3382 /* We are only interested in prefix pages */
3383 return;
3384 kvm_for_each_vcpu(i, vcpu, kvm) {
3385 /* match against both prefix pages */
3386 prefix = kvm_s390_get_prefix(vcpu);
3387 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3388 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3389 start, end);
3390 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3395 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3397 /* do not poll with more than halt_poll_max_steal percent of steal time */
3398 if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3399 halt_poll_max_steal) {
3400 vcpu->stat.halt_no_poll_steal++;
3401 return true;
3403 return false;
3406 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3408 /* kvm common code refers to this, but never calls it */
3409 BUG();
3410 return 0;
3413 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3414 struct kvm_one_reg *reg)
3416 int r = -EINVAL;
3418 switch (reg->id) {
3419 case KVM_REG_S390_TODPR:
3420 r = put_user(vcpu->arch.sie_block->todpr,
3421 (u32 __user *)reg->addr);
3422 break;
3423 case KVM_REG_S390_EPOCHDIFF:
3424 r = put_user(vcpu->arch.sie_block->epoch,
3425 (u64 __user *)reg->addr);
3426 break;
3427 case KVM_REG_S390_CPU_TIMER:
3428 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3429 (u64 __user *)reg->addr);
3430 break;
3431 case KVM_REG_S390_CLOCK_COMP:
3432 r = put_user(vcpu->arch.sie_block->ckc,
3433 (u64 __user *)reg->addr);
3434 break;
3435 case KVM_REG_S390_PFTOKEN:
3436 r = put_user(vcpu->arch.pfault_token,
3437 (u64 __user *)reg->addr);
3438 break;
3439 case KVM_REG_S390_PFCOMPARE:
3440 r = put_user(vcpu->arch.pfault_compare,
3441 (u64 __user *)reg->addr);
3442 break;
3443 case KVM_REG_S390_PFSELECT:
3444 r = put_user(vcpu->arch.pfault_select,
3445 (u64 __user *)reg->addr);
3446 break;
3447 case KVM_REG_S390_PP:
3448 r = put_user(vcpu->arch.sie_block->pp,
3449 (u64 __user *)reg->addr);
3450 break;
3451 case KVM_REG_S390_GBEA:
3452 r = put_user(vcpu->arch.sie_block->gbea,
3453 (u64 __user *)reg->addr);
3454 break;
3455 default:
3456 break;
3459 return r;
3462 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3463 struct kvm_one_reg *reg)
3465 int r = -EINVAL;
3466 __u64 val;
3468 switch (reg->id) {
3469 case KVM_REG_S390_TODPR:
3470 r = get_user(vcpu->arch.sie_block->todpr,
3471 (u32 __user *)reg->addr);
3472 break;
3473 case KVM_REG_S390_EPOCHDIFF:
3474 r = get_user(vcpu->arch.sie_block->epoch,
3475 (u64 __user *)reg->addr);
3476 break;
3477 case KVM_REG_S390_CPU_TIMER:
3478 r = get_user(val, (u64 __user *)reg->addr);
3479 if (!r)
3480 kvm_s390_set_cpu_timer(vcpu, val);
3481 break;
3482 case KVM_REG_S390_CLOCK_COMP:
3483 r = get_user(vcpu->arch.sie_block->ckc,
3484 (u64 __user *)reg->addr);
3485 break;
3486 case KVM_REG_S390_PFTOKEN:
3487 r = get_user(vcpu->arch.pfault_token,
3488 (u64 __user *)reg->addr);
3489 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3490 kvm_clear_async_pf_completion_queue(vcpu);
3491 break;
3492 case KVM_REG_S390_PFCOMPARE:
3493 r = get_user(vcpu->arch.pfault_compare,
3494 (u64 __user *)reg->addr);
3495 break;
3496 case KVM_REG_S390_PFSELECT:
3497 r = get_user(vcpu->arch.pfault_select,
3498 (u64 __user *)reg->addr);
3499 break;
3500 case KVM_REG_S390_PP:
3501 r = get_user(vcpu->arch.sie_block->pp,
3502 (u64 __user *)reg->addr);
3503 break;
3504 case KVM_REG_S390_GBEA:
3505 r = get_user(vcpu->arch.sie_block->gbea,
3506 (u64 __user *)reg->addr);
3507 break;
3508 default:
3509 break;
3512 return r;
3515 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3517 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3518 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3519 memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3521 kvm_clear_async_pf_completion_queue(vcpu);
3522 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3523 kvm_s390_vcpu_stop(vcpu);
3524 kvm_s390_clear_local_irqs(vcpu);
3527 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3529 /* Initial reset is a superset of the normal reset */
3530 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3533 * This equals initial cpu reset in pop, but we don't switch to ESA.
3534 * We do not only reset the internal data, but also ...
3536 vcpu->arch.sie_block->gpsw.mask = 0;
3537 vcpu->arch.sie_block->gpsw.addr = 0;
3538 kvm_s390_set_prefix(vcpu, 0);
3539 kvm_s390_set_cpu_timer(vcpu, 0);
3540 vcpu->arch.sie_block->ckc = 0;
3541 memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3542 vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3543 vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3545 /* ... the data in sync regs */
3546 memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3547 vcpu->run->s.regs.ckc = 0;
3548 vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3549 vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3550 vcpu->run->psw_addr = 0;
3551 vcpu->run->psw_mask = 0;
3552 vcpu->run->s.regs.todpr = 0;
3553 vcpu->run->s.regs.cputm = 0;
3554 vcpu->run->s.regs.ckc = 0;
3555 vcpu->run->s.regs.pp = 0;
3556 vcpu->run->s.regs.gbea = 1;
3557 vcpu->run->s.regs.fpc = 0;
3559 * Do not reset these registers in the protected case, as some of
3560 * them are overlayed and they are not accessible in this case
3561 * anyway.
3563 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3564 vcpu->arch.sie_block->gbea = 1;
3565 vcpu->arch.sie_block->pp = 0;
3566 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3567 vcpu->arch.sie_block->todpr = 0;
3571 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3573 struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3575 /* Clear reset is a superset of the initial reset */
3576 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3578 memset(&regs->gprs, 0, sizeof(regs->gprs));
3579 memset(&regs->vrs, 0, sizeof(regs->vrs));
3580 memset(&regs->acrs, 0, sizeof(regs->acrs));
3581 memset(&regs->gscb, 0, sizeof(regs->gscb));
3583 regs->etoken = 0;
3584 regs->etoken_extension = 0;
3587 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3589 vcpu_load(vcpu);
3590 memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3591 vcpu_put(vcpu);
3592 return 0;
3595 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3597 vcpu_load(vcpu);
3598 memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3599 vcpu_put(vcpu);
3600 return 0;
3603 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3604 struct kvm_sregs *sregs)
3606 vcpu_load(vcpu);
3608 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3609 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3611 vcpu_put(vcpu);
3612 return 0;
3615 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3616 struct kvm_sregs *sregs)
3618 vcpu_load(vcpu);
3620 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3621 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3623 vcpu_put(vcpu);
3624 return 0;
3627 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3629 int ret = 0;
3631 vcpu_load(vcpu);
3633 if (test_fp_ctl(fpu->fpc)) {
3634 ret = -EINVAL;
3635 goto out;
3637 vcpu->run->s.regs.fpc = fpu->fpc;
3638 if (MACHINE_HAS_VX)
3639 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3640 (freg_t *) fpu->fprs);
3641 else
3642 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3644 out:
3645 vcpu_put(vcpu);
3646 return ret;
3649 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3651 vcpu_load(vcpu);
3653 /* make sure we have the latest values */
3654 save_fpu_regs();
3655 if (MACHINE_HAS_VX)
3656 convert_vx_to_fp((freg_t *) fpu->fprs,
3657 (__vector128 *) vcpu->run->s.regs.vrs);
3658 else
3659 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3660 fpu->fpc = vcpu->run->s.regs.fpc;
3662 vcpu_put(vcpu);
3663 return 0;
3666 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3668 int rc = 0;
3670 if (!is_vcpu_stopped(vcpu))
3671 rc = -EBUSY;
3672 else {
3673 vcpu->run->psw_mask = psw.mask;
3674 vcpu->run->psw_addr = psw.addr;
3676 return rc;
3679 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3680 struct kvm_translation *tr)
3682 return -EINVAL; /* not implemented yet */
3685 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3686 KVM_GUESTDBG_USE_HW_BP | \
3687 KVM_GUESTDBG_ENABLE)
3689 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3690 struct kvm_guest_debug *dbg)
3692 int rc = 0;
3694 vcpu_load(vcpu);
3696 vcpu->guest_debug = 0;
3697 kvm_s390_clear_bp_data(vcpu);
3699 if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3700 rc = -EINVAL;
3701 goto out;
3703 if (!sclp.has_gpere) {
3704 rc = -EINVAL;
3705 goto out;
3708 if (dbg->control & KVM_GUESTDBG_ENABLE) {
3709 vcpu->guest_debug = dbg->control;
3710 /* enforce guest PER */
3711 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3713 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3714 rc = kvm_s390_import_bp_data(vcpu, dbg);
3715 } else {
3716 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3717 vcpu->arch.guestdbg.last_bp = 0;
3720 if (rc) {
3721 vcpu->guest_debug = 0;
3722 kvm_s390_clear_bp_data(vcpu);
3723 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3726 out:
3727 vcpu_put(vcpu);
3728 return rc;
3731 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3732 struct kvm_mp_state *mp_state)
3734 int ret;
3736 vcpu_load(vcpu);
3738 /* CHECK_STOP and LOAD are not supported yet */
3739 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3740 KVM_MP_STATE_OPERATING;
3742 vcpu_put(vcpu);
3743 return ret;
3746 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3747 struct kvm_mp_state *mp_state)
3749 int rc = 0;
3751 vcpu_load(vcpu);
3753 /* user space knows about this interface - let it control the state */
3754 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3756 switch (mp_state->mp_state) {
3757 case KVM_MP_STATE_STOPPED:
3758 rc = kvm_s390_vcpu_stop(vcpu);
3759 break;
3760 case KVM_MP_STATE_OPERATING:
3761 rc = kvm_s390_vcpu_start(vcpu);
3762 break;
3763 case KVM_MP_STATE_LOAD:
3764 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3765 rc = -ENXIO;
3766 break;
3768 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3769 break;
3770 case KVM_MP_STATE_CHECK_STOP:
3771 fallthrough; /* CHECK_STOP and LOAD are not supported yet */
3772 default:
3773 rc = -ENXIO;
3776 vcpu_put(vcpu);
3777 return rc;
3780 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3782 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3785 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3787 retry:
3788 kvm_s390_vcpu_request_handled(vcpu);
3789 if (!kvm_request_pending(vcpu))
3790 return 0;
3792 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3793 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3794 * This ensures that the ipte instruction for this request has
3795 * already finished. We might race against a second unmapper that
3796 * wants to set the blocking bit. Lets just retry the request loop.
3798 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3799 int rc;
3800 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3801 kvm_s390_get_prefix(vcpu),
3802 PAGE_SIZE * 2, PROT_WRITE);
3803 if (rc) {
3804 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3805 return rc;
3807 goto retry;
3810 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3811 vcpu->arch.sie_block->ihcpu = 0xffff;
3812 goto retry;
3815 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3816 if (!ibs_enabled(vcpu)) {
3817 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3818 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3820 goto retry;
3823 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3824 if (ibs_enabled(vcpu)) {
3825 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3826 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3828 goto retry;
3831 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3832 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3833 goto retry;
3836 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3838 * Disable CMM virtualization; we will emulate the ESSA
3839 * instruction manually, in order to provide additional
3840 * functionalities needed for live migration.
3842 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3843 goto retry;
3846 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3848 * Re-enable CMM virtualization if CMMA is available and
3849 * CMM has been used.
3851 if ((vcpu->kvm->arch.use_cmma) &&
3852 (vcpu->kvm->mm->context.uses_cmm))
3853 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3854 goto retry;
3857 /* nothing to do, just clear the request */
3858 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3859 /* we left the vsie handler, nothing to do, just clear the request */
3860 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3862 return 0;
3865 void kvm_s390_set_tod_clock(struct kvm *kvm,
3866 const struct kvm_s390_vm_tod_clock *gtod)
3868 struct kvm_vcpu *vcpu;
3869 struct kvm_s390_tod_clock_ext htod;
3870 int i;
3872 mutex_lock(&kvm->lock);
3873 preempt_disable();
3875 get_tod_clock_ext((char *)&htod);
3877 kvm->arch.epoch = gtod->tod - htod.tod;
3878 kvm->arch.epdx = 0;
3879 if (test_kvm_facility(kvm, 139)) {
3880 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3881 if (kvm->arch.epoch > gtod->tod)
3882 kvm->arch.epdx -= 1;
3885 kvm_s390_vcpu_block_all(kvm);
3886 kvm_for_each_vcpu(i, vcpu, kvm) {
3887 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3888 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
3891 kvm_s390_vcpu_unblock_all(kvm);
3892 preempt_enable();
3893 mutex_unlock(&kvm->lock);
3897 * kvm_arch_fault_in_page - fault-in guest page if necessary
3898 * @vcpu: The corresponding virtual cpu
3899 * @gpa: Guest physical address
3900 * @writable: Whether the page should be writable or not
3902 * Make sure that a guest page has been faulted-in on the host.
3904 * Return: Zero on success, negative error code otherwise.
3906 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3908 return gmap_fault(vcpu->arch.gmap, gpa,
3909 writable ? FAULT_FLAG_WRITE : 0);
3912 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3913 unsigned long token)
3915 struct kvm_s390_interrupt inti;
3916 struct kvm_s390_irq irq;
3918 if (start_token) {
3919 irq.u.ext.ext_params2 = token;
3920 irq.type = KVM_S390_INT_PFAULT_INIT;
3921 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3922 } else {
3923 inti.type = KVM_S390_INT_PFAULT_DONE;
3924 inti.parm64 = token;
3925 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3929 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3930 struct kvm_async_pf *work)
3932 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3933 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3935 return true;
3938 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3939 struct kvm_async_pf *work)
3941 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3942 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3945 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3946 struct kvm_async_pf *work)
3948 /* s390 will always inject the page directly */
3951 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
3954 * s390 will always inject the page directly,
3955 * but we still want check_async_completion to cleanup
3957 return true;
3960 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3962 hva_t hva;
3963 struct kvm_arch_async_pf arch;
3965 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3966 return false;
3967 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3968 vcpu->arch.pfault_compare)
3969 return false;
3970 if (psw_extint_disabled(vcpu))
3971 return false;
3972 if (kvm_s390_vcpu_has_irq(vcpu, 0))
3973 return false;
3974 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3975 return false;
3976 if (!vcpu->arch.gmap->pfault_enabled)
3977 return false;
3979 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3980 hva += current->thread.gmap_addr & ~PAGE_MASK;
3981 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3982 return false;
3984 return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3987 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3989 int rc, cpuflags;
3992 * On s390 notifications for arriving pages will be delivered directly
3993 * to the guest but the house keeping for completed pfaults is
3994 * handled outside the worker.
3996 kvm_check_async_pf_completion(vcpu);
3998 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3999 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4001 if (need_resched())
4002 schedule();
4004 if (!kvm_is_ucontrol(vcpu->kvm)) {
4005 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4006 if (rc)
4007 return rc;
4010 rc = kvm_s390_handle_requests(vcpu);
4011 if (rc)
4012 return rc;
4014 if (guestdbg_enabled(vcpu)) {
4015 kvm_s390_backup_guest_per_regs(vcpu);
4016 kvm_s390_patch_guest_per_regs(vcpu);
4019 clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
4021 vcpu->arch.sie_block->icptcode = 0;
4022 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4023 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4024 trace_kvm_s390_sie_enter(vcpu, cpuflags);
4026 return 0;
4029 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4031 struct kvm_s390_pgm_info pgm_info = {
4032 .code = PGM_ADDRESSING,
4034 u8 opcode, ilen;
4035 int rc;
4037 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4038 trace_kvm_s390_sie_fault(vcpu);
4041 * We want to inject an addressing exception, which is defined as a
4042 * suppressing or terminating exception. However, since we came here
4043 * by a DAT access exception, the PSW still points to the faulting
4044 * instruction since DAT exceptions are nullifying. So we've got
4045 * to look up the current opcode to get the length of the instruction
4046 * to be able to forward the PSW.
4048 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4049 ilen = insn_length(opcode);
4050 if (rc < 0) {
4051 return rc;
4052 } else if (rc) {
4053 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4054 * Forward by arbitrary ilc, injection will take care of
4055 * nullification if necessary.
4057 pgm_info = vcpu->arch.pgm;
4058 ilen = 4;
4060 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4061 kvm_s390_forward_psw(vcpu, ilen);
4062 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4065 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4067 struct mcck_volatile_info *mcck_info;
4068 struct sie_page *sie_page;
4070 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4071 vcpu->arch.sie_block->icptcode);
4072 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4074 if (guestdbg_enabled(vcpu))
4075 kvm_s390_restore_guest_per_regs(vcpu);
4077 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4078 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4080 if (exit_reason == -EINTR) {
4081 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4082 sie_page = container_of(vcpu->arch.sie_block,
4083 struct sie_page, sie_block);
4084 mcck_info = &sie_page->mcck_info;
4085 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4086 return 0;
4089 if (vcpu->arch.sie_block->icptcode > 0) {
4090 int rc = kvm_handle_sie_intercept(vcpu);
4092 if (rc != -EOPNOTSUPP)
4093 return rc;
4094 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4095 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4096 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4097 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4098 return -EREMOTE;
4099 } else if (exit_reason != -EFAULT) {
4100 vcpu->stat.exit_null++;
4101 return 0;
4102 } else if (kvm_is_ucontrol(vcpu->kvm)) {
4103 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4104 vcpu->run->s390_ucontrol.trans_exc_code =
4105 current->thread.gmap_addr;
4106 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4107 return -EREMOTE;
4108 } else if (current->thread.gmap_pfault) {
4109 trace_kvm_s390_major_guest_pfault(vcpu);
4110 current->thread.gmap_pfault = 0;
4111 if (kvm_arch_setup_async_pf(vcpu))
4112 return 0;
4113 vcpu->stat.pfault_sync++;
4114 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4116 return vcpu_post_run_fault_in_sie(vcpu);
4119 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4120 static int __vcpu_run(struct kvm_vcpu *vcpu)
4122 int rc, exit_reason;
4123 struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4126 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4127 * ning the guest), so that memslots (and other stuff) are protected
4129 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4131 do {
4132 rc = vcpu_pre_run(vcpu);
4133 if (rc)
4134 break;
4136 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4138 * As PF_VCPU will be used in fault handler, between
4139 * guest_enter and guest_exit should be no uaccess.
4141 local_irq_disable();
4142 guest_enter_irqoff();
4143 __disable_cpu_timer_accounting(vcpu);
4144 local_irq_enable();
4145 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4146 memcpy(sie_page->pv_grregs,
4147 vcpu->run->s.regs.gprs,
4148 sizeof(sie_page->pv_grregs));
4150 exit_reason = sie64a(vcpu->arch.sie_block,
4151 vcpu->run->s.regs.gprs);
4152 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4153 memcpy(vcpu->run->s.regs.gprs,
4154 sie_page->pv_grregs,
4155 sizeof(sie_page->pv_grregs));
4157 * We're not allowed to inject interrupts on intercepts
4158 * that leave the guest state in an "in-between" state
4159 * where the next SIE entry will do a continuation.
4160 * Fence interrupts in our "internal" PSW.
4162 if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4163 vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4164 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4167 local_irq_disable();
4168 __enable_cpu_timer_accounting(vcpu);
4169 guest_exit_irqoff();
4170 local_irq_enable();
4171 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4173 rc = vcpu_post_run(vcpu, exit_reason);
4174 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4176 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4177 return rc;
4180 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4182 struct kvm_run *kvm_run = vcpu->run;
4183 struct runtime_instr_cb *riccb;
4184 struct gs_cb *gscb;
4186 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4187 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4188 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4189 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4190 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4191 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4192 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4193 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4195 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4196 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4197 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4198 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4199 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4200 kvm_clear_async_pf_completion_queue(vcpu);
4202 if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4203 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4204 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4207 * If userspace sets the riccb (e.g. after migration) to a valid state,
4208 * we should enable RI here instead of doing the lazy enablement.
4210 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4211 test_kvm_facility(vcpu->kvm, 64) &&
4212 riccb->v &&
4213 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4214 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4215 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4218 * If userspace sets the gscb (e.g. after migration) to non-zero,
4219 * we should enable GS here instead of doing the lazy enablement.
4221 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4222 test_kvm_facility(vcpu->kvm, 133) &&
4223 gscb->gssm &&
4224 !vcpu->arch.gs_enabled) {
4225 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4226 vcpu->arch.sie_block->ecb |= ECB_GS;
4227 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4228 vcpu->arch.gs_enabled = 1;
4230 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4231 test_kvm_facility(vcpu->kvm, 82)) {
4232 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4233 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4235 if (MACHINE_HAS_GS) {
4236 preempt_disable();
4237 __ctl_set_bit(2, 4);
4238 if (current->thread.gs_cb) {
4239 vcpu->arch.host_gscb = current->thread.gs_cb;
4240 save_gs_cb(vcpu->arch.host_gscb);
4242 if (vcpu->arch.gs_enabled) {
4243 current->thread.gs_cb = (struct gs_cb *)
4244 &vcpu->run->s.regs.gscb;
4245 restore_gs_cb(current->thread.gs_cb);
4247 preempt_enable();
4249 /* SIE will load etoken directly from SDNX and therefore kvm_run */
4252 static void sync_regs(struct kvm_vcpu *vcpu)
4254 struct kvm_run *kvm_run = vcpu->run;
4256 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4257 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4258 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4259 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4260 /* some control register changes require a tlb flush */
4261 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4263 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4264 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4265 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4267 save_access_regs(vcpu->arch.host_acrs);
4268 restore_access_regs(vcpu->run->s.regs.acrs);
4269 /* save host (userspace) fprs/vrs */
4270 save_fpu_regs();
4271 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4272 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4273 if (MACHINE_HAS_VX)
4274 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4275 else
4276 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4277 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4278 if (test_fp_ctl(current->thread.fpu.fpc))
4279 /* User space provided an invalid FPC, let's clear it */
4280 current->thread.fpu.fpc = 0;
4282 /* Sync fmt2 only data */
4283 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4284 sync_regs_fmt2(vcpu);
4285 } else {
4287 * In several places we have to modify our internal view to
4288 * not do things that are disallowed by the ultravisor. For
4289 * example we must not inject interrupts after specific exits
4290 * (e.g. 112 prefix page not secure). We do this by turning
4291 * off the machine check, external and I/O interrupt bits
4292 * of our PSW copy. To avoid getting validity intercepts, we
4293 * do only accept the condition code from userspace.
4295 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4296 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4297 PSW_MASK_CC;
4300 kvm_run->kvm_dirty_regs = 0;
4303 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4305 struct kvm_run *kvm_run = vcpu->run;
4307 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4308 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4309 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4310 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4311 kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4312 if (MACHINE_HAS_GS) {
4313 __ctl_set_bit(2, 4);
4314 if (vcpu->arch.gs_enabled)
4315 save_gs_cb(current->thread.gs_cb);
4316 preempt_disable();
4317 current->thread.gs_cb = vcpu->arch.host_gscb;
4318 restore_gs_cb(vcpu->arch.host_gscb);
4319 preempt_enable();
4320 if (!vcpu->arch.host_gscb)
4321 __ctl_clear_bit(2, 4);
4322 vcpu->arch.host_gscb = NULL;
4324 /* SIE will save etoken directly into SDNX and therefore kvm_run */
4327 static void store_regs(struct kvm_vcpu *vcpu)
4329 struct kvm_run *kvm_run = vcpu->run;
4331 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4332 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4333 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4334 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4335 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4336 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4337 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4338 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4339 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4340 save_access_regs(vcpu->run->s.regs.acrs);
4341 restore_access_regs(vcpu->arch.host_acrs);
4342 /* Save guest register state */
4343 save_fpu_regs();
4344 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4345 /* Restore will be done lazily at return */
4346 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4347 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4348 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4349 store_regs_fmt2(vcpu);
4352 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4354 struct kvm_run *kvm_run = vcpu->run;
4355 int rc;
4357 if (kvm_run->immediate_exit)
4358 return -EINTR;
4360 if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4361 kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4362 return -EINVAL;
4364 vcpu_load(vcpu);
4366 if (guestdbg_exit_pending(vcpu)) {
4367 kvm_s390_prepare_debug_exit(vcpu);
4368 rc = 0;
4369 goto out;
4372 kvm_sigset_activate(vcpu);
4375 * no need to check the return value of vcpu_start as it can only have
4376 * an error for protvirt, but protvirt means user cpu state
4378 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4379 kvm_s390_vcpu_start(vcpu);
4380 } else if (is_vcpu_stopped(vcpu)) {
4381 pr_err_ratelimited("can't run stopped vcpu %d\n",
4382 vcpu->vcpu_id);
4383 rc = -EINVAL;
4384 goto out;
4387 sync_regs(vcpu);
4388 enable_cpu_timer_accounting(vcpu);
4390 might_fault();
4391 rc = __vcpu_run(vcpu);
4393 if (signal_pending(current) && !rc) {
4394 kvm_run->exit_reason = KVM_EXIT_INTR;
4395 rc = -EINTR;
4398 if (guestdbg_exit_pending(vcpu) && !rc) {
4399 kvm_s390_prepare_debug_exit(vcpu);
4400 rc = 0;
4403 if (rc == -EREMOTE) {
4404 /* userspace support is needed, kvm_run has been prepared */
4405 rc = 0;
4408 disable_cpu_timer_accounting(vcpu);
4409 store_regs(vcpu);
4411 kvm_sigset_deactivate(vcpu);
4413 vcpu->stat.exit_userspace++;
4414 out:
4415 vcpu_put(vcpu);
4416 return rc;
4420 * store status at address
4421 * we use have two special cases:
4422 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4423 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4425 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4427 unsigned char archmode = 1;
4428 freg_t fprs[NUM_FPRS];
4429 unsigned int px;
4430 u64 clkcomp, cputm;
4431 int rc;
4433 px = kvm_s390_get_prefix(vcpu);
4434 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4435 if (write_guest_abs(vcpu, 163, &archmode, 1))
4436 return -EFAULT;
4437 gpa = 0;
4438 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4439 if (write_guest_real(vcpu, 163, &archmode, 1))
4440 return -EFAULT;
4441 gpa = px;
4442 } else
4443 gpa -= __LC_FPREGS_SAVE_AREA;
4445 /* manually convert vector registers if necessary */
4446 if (MACHINE_HAS_VX) {
4447 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4448 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4449 fprs, 128);
4450 } else {
4451 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4452 vcpu->run->s.regs.fprs, 128);
4454 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4455 vcpu->run->s.regs.gprs, 128);
4456 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4457 &vcpu->arch.sie_block->gpsw, 16);
4458 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4459 &px, 4);
4460 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4461 &vcpu->run->s.regs.fpc, 4);
4462 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4463 &vcpu->arch.sie_block->todpr, 4);
4464 cputm = kvm_s390_get_cpu_timer(vcpu);
4465 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4466 &cputm, 8);
4467 clkcomp = vcpu->arch.sie_block->ckc >> 8;
4468 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4469 &clkcomp, 8);
4470 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4471 &vcpu->run->s.regs.acrs, 64);
4472 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4473 &vcpu->arch.sie_block->gcr, 128);
4474 return rc ? -EFAULT : 0;
4477 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4480 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4481 * switch in the run ioctl. Let's update our copies before we save
4482 * it into the save area
4484 save_fpu_regs();
4485 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4486 save_access_regs(vcpu->run->s.regs.acrs);
4488 return kvm_s390_store_status_unloaded(vcpu, addr);
4491 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4493 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4494 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4497 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4499 unsigned int i;
4500 struct kvm_vcpu *vcpu;
4502 kvm_for_each_vcpu(i, vcpu, kvm) {
4503 __disable_ibs_on_vcpu(vcpu);
4507 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4509 if (!sclp.has_ibs)
4510 return;
4511 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4512 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4515 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4517 int i, online_vcpus, r = 0, started_vcpus = 0;
4519 if (!is_vcpu_stopped(vcpu))
4520 return 0;
4522 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4523 /* Only one cpu at a time may enter/leave the STOPPED state. */
4524 spin_lock(&vcpu->kvm->arch.start_stop_lock);
4525 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4527 /* Let's tell the UV that we want to change into the operating state */
4528 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4529 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4530 if (r) {
4531 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4532 return r;
4536 for (i = 0; i < online_vcpus; i++) {
4537 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4538 started_vcpus++;
4541 if (started_vcpus == 0) {
4542 /* we're the only active VCPU -> speed it up */
4543 __enable_ibs_on_vcpu(vcpu);
4544 } else if (started_vcpus == 1) {
4546 * As we are starting a second VCPU, we have to disable
4547 * the IBS facility on all VCPUs to remove potentially
4548 * oustanding ENABLE requests.
4550 __disable_ibs_on_all_vcpus(vcpu->kvm);
4553 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4555 * The real PSW might have changed due to a RESTART interpreted by the
4556 * ultravisor. We block all interrupts and let the next sie exit
4557 * refresh our view.
4559 if (kvm_s390_pv_cpu_is_protected(vcpu))
4560 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4562 * Another VCPU might have used IBS while we were offline.
4563 * Let's play safe and flush the VCPU at startup.
4565 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4566 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4567 return 0;
4570 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4572 int i, online_vcpus, r = 0, started_vcpus = 0;
4573 struct kvm_vcpu *started_vcpu = NULL;
4575 if (is_vcpu_stopped(vcpu))
4576 return 0;
4578 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4579 /* Only one cpu at a time may enter/leave the STOPPED state. */
4580 spin_lock(&vcpu->kvm->arch.start_stop_lock);
4581 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4583 /* Let's tell the UV that we want to change into the stopped state */
4584 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4585 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4586 if (r) {
4587 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4588 return r;
4592 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4593 kvm_s390_clear_stop_irq(vcpu);
4595 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4596 __disable_ibs_on_vcpu(vcpu);
4598 for (i = 0; i < online_vcpus; i++) {
4599 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4600 started_vcpus++;
4601 started_vcpu = vcpu->kvm->vcpus[i];
4605 if (started_vcpus == 1) {
4607 * As we only have one VCPU left, we want to enable the
4608 * IBS facility for that VCPU to speed it up.
4610 __enable_ibs_on_vcpu(started_vcpu);
4613 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4614 return 0;
4617 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4618 struct kvm_enable_cap *cap)
4620 int r;
4622 if (cap->flags)
4623 return -EINVAL;
4625 switch (cap->cap) {
4626 case KVM_CAP_S390_CSS_SUPPORT:
4627 if (!vcpu->kvm->arch.css_support) {
4628 vcpu->kvm->arch.css_support = 1;
4629 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4630 trace_kvm_s390_enable_css(vcpu->kvm);
4632 r = 0;
4633 break;
4634 default:
4635 r = -EINVAL;
4636 break;
4638 return r;
4641 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4642 struct kvm_s390_mem_op *mop)
4644 void __user *uaddr = (void __user *)mop->buf;
4645 int r = 0;
4647 if (mop->flags || !mop->size)
4648 return -EINVAL;
4649 if (mop->size + mop->sida_offset < mop->size)
4650 return -EINVAL;
4651 if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4652 return -E2BIG;
4654 switch (mop->op) {
4655 case KVM_S390_MEMOP_SIDA_READ:
4656 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4657 mop->sida_offset), mop->size))
4658 r = -EFAULT;
4660 break;
4661 case KVM_S390_MEMOP_SIDA_WRITE:
4662 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4663 mop->sida_offset), uaddr, mop->size))
4664 r = -EFAULT;
4665 break;
4667 return r;
4669 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4670 struct kvm_s390_mem_op *mop)
4672 void __user *uaddr = (void __user *)mop->buf;
4673 void *tmpbuf = NULL;
4674 int r = 0;
4675 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4676 | KVM_S390_MEMOP_F_CHECK_ONLY;
4678 if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4679 return -EINVAL;
4681 if (mop->size > MEM_OP_MAX_SIZE)
4682 return -E2BIG;
4684 if (kvm_s390_pv_cpu_is_protected(vcpu))
4685 return -EINVAL;
4687 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4688 tmpbuf = vmalloc(mop->size);
4689 if (!tmpbuf)
4690 return -ENOMEM;
4693 switch (mop->op) {
4694 case KVM_S390_MEMOP_LOGICAL_READ:
4695 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4696 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4697 mop->size, GACC_FETCH);
4698 break;
4700 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4701 if (r == 0) {
4702 if (copy_to_user(uaddr, tmpbuf, mop->size))
4703 r = -EFAULT;
4705 break;
4706 case KVM_S390_MEMOP_LOGICAL_WRITE:
4707 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4708 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4709 mop->size, GACC_STORE);
4710 break;
4712 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4713 r = -EFAULT;
4714 break;
4716 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4717 break;
4720 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4721 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4723 vfree(tmpbuf);
4724 return r;
4727 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4728 struct kvm_s390_mem_op *mop)
4730 int r, srcu_idx;
4732 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4734 switch (mop->op) {
4735 case KVM_S390_MEMOP_LOGICAL_READ:
4736 case KVM_S390_MEMOP_LOGICAL_WRITE:
4737 r = kvm_s390_guest_mem_op(vcpu, mop);
4738 break;
4739 case KVM_S390_MEMOP_SIDA_READ:
4740 case KVM_S390_MEMOP_SIDA_WRITE:
4741 /* we are locked against sida going away by the vcpu->mutex */
4742 r = kvm_s390_guest_sida_op(vcpu, mop);
4743 break;
4744 default:
4745 r = -EINVAL;
4748 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4749 return r;
4752 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4753 unsigned int ioctl, unsigned long arg)
4755 struct kvm_vcpu *vcpu = filp->private_data;
4756 void __user *argp = (void __user *)arg;
4758 switch (ioctl) {
4759 case KVM_S390_IRQ: {
4760 struct kvm_s390_irq s390irq;
4762 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4763 return -EFAULT;
4764 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4766 case KVM_S390_INTERRUPT: {
4767 struct kvm_s390_interrupt s390int;
4768 struct kvm_s390_irq s390irq = {};
4770 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4771 return -EFAULT;
4772 if (s390int_to_s390irq(&s390int, &s390irq))
4773 return -EINVAL;
4774 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4777 return -ENOIOCTLCMD;
4780 long kvm_arch_vcpu_ioctl(struct file *filp,
4781 unsigned int ioctl, unsigned long arg)
4783 struct kvm_vcpu *vcpu = filp->private_data;
4784 void __user *argp = (void __user *)arg;
4785 int idx;
4786 long r;
4787 u16 rc, rrc;
4789 vcpu_load(vcpu);
4791 switch (ioctl) {
4792 case KVM_S390_STORE_STATUS:
4793 idx = srcu_read_lock(&vcpu->kvm->srcu);
4794 r = kvm_s390_store_status_unloaded(vcpu, arg);
4795 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4796 break;
4797 case KVM_S390_SET_INITIAL_PSW: {
4798 psw_t psw;
4800 r = -EFAULT;
4801 if (copy_from_user(&psw, argp, sizeof(psw)))
4802 break;
4803 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4804 break;
4806 case KVM_S390_CLEAR_RESET:
4807 r = 0;
4808 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4809 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4810 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4811 UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4812 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4813 rc, rrc);
4815 break;
4816 case KVM_S390_INITIAL_RESET:
4817 r = 0;
4818 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4819 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4820 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4821 UVC_CMD_CPU_RESET_INITIAL,
4822 &rc, &rrc);
4823 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4824 rc, rrc);
4826 break;
4827 case KVM_S390_NORMAL_RESET:
4828 r = 0;
4829 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4830 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4831 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4832 UVC_CMD_CPU_RESET, &rc, &rrc);
4833 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4834 rc, rrc);
4836 break;
4837 case KVM_SET_ONE_REG:
4838 case KVM_GET_ONE_REG: {
4839 struct kvm_one_reg reg;
4840 r = -EINVAL;
4841 if (kvm_s390_pv_cpu_is_protected(vcpu))
4842 break;
4843 r = -EFAULT;
4844 if (copy_from_user(&reg, argp, sizeof(reg)))
4845 break;
4846 if (ioctl == KVM_SET_ONE_REG)
4847 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4848 else
4849 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4850 break;
4852 #ifdef CONFIG_KVM_S390_UCONTROL
4853 case KVM_S390_UCAS_MAP: {
4854 struct kvm_s390_ucas_mapping ucasmap;
4856 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4857 r = -EFAULT;
4858 break;
4861 if (!kvm_is_ucontrol(vcpu->kvm)) {
4862 r = -EINVAL;
4863 break;
4866 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4867 ucasmap.vcpu_addr, ucasmap.length);
4868 break;
4870 case KVM_S390_UCAS_UNMAP: {
4871 struct kvm_s390_ucas_mapping ucasmap;
4873 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4874 r = -EFAULT;
4875 break;
4878 if (!kvm_is_ucontrol(vcpu->kvm)) {
4879 r = -EINVAL;
4880 break;
4883 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4884 ucasmap.length);
4885 break;
4887 #endif
4888 case KVM_S390_VCPU_FAULT: {
4889 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4890 break;
4892 case KVM_ENABLE_CAP:
4894 struct kvm_enable_cap cap;
4895 r = -EFAULT;
4896 if (copy_from_user(&cap, argp, sizeof(cap)))
4897 break;
4898 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4899 break;
4901 case KVM_S390_MEM_OP: {
4902 struct kvm_s390_mem_op mem_op;
4904 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4905 r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4906 else
4907 r = -EFAULT;
4908 break;
4910 case KVM_S390_SET_IRQ_STATE: {
4911 struct kvm_s390_irq_state irq_state;
4913 r = -EFAULT;
4914 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4915 break;
4916 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4917 irq_state.len == 0 ||
4918 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4919 r = -EINVAL;
4920 break;
4922 /* do not use irq_state.flags, it will break old QEMUs */
4923 r = kvm_s390_set_irq_state(vcpu,
4924 (void __user *) irq_state.buf,
4925 irq_state.len);
4926 break;
4928 case KVM_S390_GET_IRQ_STATE: {
4929 struct kvm_s390_irq_state irq_state;
4931 r = -EFAULT;
4932 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4933 break;
4934 if (irq_state.len == 0) {
4935 r = -EINVAL;
4936 break;
4938 /* do not use irq_state.flags, it will break old QEMUs */
4939 r = kvm_s390_get_irq_state(vcpu,
4940 (__u8 __user *) irq_state.buf,
4941 irq_state.len);
4942 break;
4944 default:
4945 r = -ENOTTY;
4948 vcpu_put(vcpu);
4949 return r;
4952 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4954 #ifdef CONFIG_KVM_S390_UCONTROL
4955 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4956 && (kvm_is_ucontrol(vcpu->kvm))) {
4957 vmf->page = virt_to_page(vcpu->arch.sie_block);
4958 get_page(vmf->page);
4959 return 0;
4961 #endif
4962 return VM_FAULT_SIGBUS;
4965 /* Section: memory related */
4966 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4967 struct kvm_memory_slot *memslot,
4968 const struct kvm_userspace_memory_region *mem,
4969 enum kvm_mr_change change)
4971 /* A few sanity checks. We can have memory slots which have to be
4972 located/ended at a segment boundary (1MB). The memory in userland is
4973 ok to be fragmented into various different vmas. It is okay to mmap()
4974 and munmap() stuff in this slot after doing this call at any time */
4976 if (mem->userspace_addr & 0xffffful)
4977 return -EINVAL;
4979 if (mem->memory_size & 0xffffful)
4980 return -EINVAL;
4982 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4983 return -EINVAL;
4985 /* When we are protected, we should not change the memory slots */
4986 if (kvm_s390_pv_get_handle(kvm))
4987 return -EINVAL;
4988 return 0;
4991 void kvm_arch_commit_memory_region(struct kvm *kvm,
4992 const struct kvm_userspace_memory_region *mem,
4993 struct kvm_memory_slot *old,
4994 const struct kvm_memory_slot *new,
4995 enum kvm_mr_change change)
4997 int rc = 0;
4999 switch (change) {
5000 case KVM_MR_DELETE:
5001 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5002 old->npages * PAGE_SIZE);
5003 break;
5004 case KVM_MR_MOVE:
5005 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5006 old->npages * PAGE_SIZE);
5007 if (rc)
5008 break;
5009 fallthrough;
5010 case KVM_MR_CREATE:
5011 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5012 mem->guest_phys_addr, mem->memory_size);
5013 break;
5014 case KVM_MR_FLAGS_ONLY:
5015 break;
5016 default:
5017 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5019 if (rc)
5020 pr_warn("failed to commit memory region\n");
5021 return;
5024 static inline unsigned long nonhyp_mask(int i)
5026 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5028 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5031 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5033 vcpu->valid_wakeup = false;
5036 static int __init kvm_s390_init(void)
5038 int i;
5040 if (!sclp.has_sief2) {
5041 pr_info("SIE is not available\n");
5042 return -ENODEV;
5045 if (nested && hpage) {
5046 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5047 return -EINVAL;
5050 for (i = 0; i < 16; i++)
5051 kvm_s390_fac_base[i] |=
5052 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
5054 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5057 static void __exit kvm_s390_exit(void)
5059 kvm_exit();
5062 module_init(kvm_s390_init);
5063 module_exit(kvm_s390_exit);
5066 * Enable autoloading of the kvm module.
5067 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5068 * since x86 takes a different approach.
5070 #include <linux/miscdevice.h>
5071 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5072 MODULE_ALIAS("devname:kvm");