arch/s390/kvm/kvm-s390.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * hosting IBM Z kernel virtual machines (s390x)
   4  *
   5  * Copyright IBM Corp. 2008, 2018
   6  *
   7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8  *               Christian Borntraeger <borntraeger@de.ibm.com>
   9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  11  *               Jason J. Herne <jjherne@us.ibm.com>
  12  */
  13
  14 #include <linux/compiler.h>
  15 #include <linux/err.h>
  16 #include <linux/fs.h>
  17 #include <linux/hrtimer.h>
  18 #include <linux/init.h>
  19 #include <linux/kvm.h>
  20 #include <linux/kvm_host.h>
  21 #include <linux/mman.h>
  22 #include <linux/module.h>
  23 #include <linux/moduleparam.h>
  24 #include <linux/random.h>
  25 #include <linux/slab.h>
  26 #include <linux/timer.h>
  27 #include <linux/vmalloc.h>
  28 #include <linux/bitmap.h>
  29 #include <linux/sched/signal.h>
  30 #include <linux/string.h>
  31
  32 #include <asm/asm-offsets.h>
  33 #include <asm/lowcore.h>
  34 #include <asm/stp.h>
  35 #include <asm/pgtable.h>
  36 #include <asm/gmap.h>
  37 #include <asm/nmi.h>
  38 #include <asm/switch_to.h>
  39 #include <asm/isc.h>
  40 #include <asm/sclp.h>
  41 #include <asm/cpacf.h>
  42 #include <asm/timex.h>
  43 #include "kvm-s390.h"
  44 #include "gaccess.h"
  45
  46 #define KMSG_COMPONENT "kvm-s390"
  47 #undef pr_fmt
  48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  49
  50 #define CREATE_TRACE_POINTS
  51 #include "trace.h"
  52 #include "trace-s390.h"
  53
  54 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  55 #define LOCAL_IRQS 32
  56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  57                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  58
  59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
  61
  62 struct kvm_stats_debugfs_item debugfs_entries[] = {
  63         { "userspace_handled", VCPU_STAT(exit_userspace) },
  64         { "exit_null", VCPU_STAT(exit_null) },
  65         { "exit_validity", VCPU_STAT(exit_validity) },
  66         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  67         { "exit_external_request", VCPU_STAT(exit_external_request) },
  68         { "exit_io_request", VCPU_STAT(exit_io_request) },
  69         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  70         { "exit_instruction", VCPU_STAT(exit_instruction) },
  71         { "exit_pei", VCPU_STAT(exit_pei) },
  72         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  73         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  74         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  75         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  76         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  77         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  78         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  79         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  80         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  81         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  82         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  83         { "deliver_ckc", VCPU_STAT(deliver_ckc) },
  84         { "deliver_cputm", VCPU_STAT(deliver_cputm) },
  85         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  86         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  87         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  88         { "deliver_virtio", VCPU_STAT(deliver_virtio) },
  89         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  90         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  91         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  92         { "deliver_program", VCPU_STAT(deliver_program) },
  93         { "deliver_io", VCPU_STAT(deliver_io) },
  94         { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
  95         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  96         { "inject_ckc", VCPU_STAT(inject_ckc) },
  97         { "inject_cputm", VCPU_STAT(inject_cputm) },
  98         { "inject_external_call", VCPU_STAT(inject_external_call) },
  99         { "inject_float_mchk", VM_STAT(inject_float_mchk) },
 100         { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
 101         { "inject_io", VM_STAT(inject_io) },
 102         { "inject_mchk", VCPU_STAT(inject_mchk) },
 103         { "inject_pfault_done", VM_STAT(inject_pfault_done) },
 104         { "inject_program", VCPU_STAT(inject_program) },
 105         { "inject_restart", VCPU_STAT(inject_restart) },
 106         { "inject_service_signal", VM_STAT(inject_service_signal) },
 107         { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
 108         { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
 109         { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
 110         { "inject_virtio", VM_STAT(inject_virtio) },
 111         { "instruction_epsw", VCPU_STAT(instruction_epsw) },
 112         { "instruction_gs", VCPU_STAT(instruction_gs) },
 113         { "instruction_io_other", VCPU_STAT(instruction_io_other) },
 114         { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
 115         { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
 116         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
 117         { "instruction_ptff", VCPU_STAT(instruction_ptff) },
 118         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
 119         { "instruction_sck", VCPU_STAT(instruction_sck) },
 120         { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
 121         { "instruction_spx", VCPU_STAT(instruction_spx) },
 122         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
 123         { "instruction_stap", VCPU_STAT(instruction_stap) },
 124         { "instruction_iske", VCPU_STAT(instruction_iske) },
 125         { "instruction_ri", VCPU_STAT(instruction_ri) },
 126         { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
 127         { "instruction_sske", VCPU_STAT(instruction_sske) },
 128         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
 129         { "instruction_essa", VCPU_STAT(instruction_essa) },
 130         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
 131         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 132         { "instruction_tb", VCPU_STAT(instruction_tb) },
 133         { "instruction_tpi", VCPU_STAT(instruction_tpi) },
 134         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 135         { "instruction_tsch", VCPU_STAT(instruction_tsch) },
 136         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 137         { "instruction_sie", VCPU_STAT(instruction_sie) },
 138         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 139         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 140         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 141         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 142         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 143         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 144         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 145         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 146         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 147         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 148         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 149         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 150         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 151         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 152         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 153         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 154         { "instruction_diag_10", VCPU_STAT(diagnose_10) },
 155         { "instruction_diag_44", VCPU_STAT(diagnose_44) },
 156         { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
 157         { "instruction_diag_258", VCPU_STAT(diagnose_258) },
 158         { "instruction_diag_308", VCPU_STAT(diagnose_308) },
 159         { "instruction_diag_500", VCPU_STAT(diagnose_500) },
 160         { "instruction_diag_other", VCPU_STAT(diagnose_other) },
 161         { NULL }
 162 };
 163
 164 struct kvm_s390_tod_clock_ext {
 165         __u8 epoch_idx;
 166         __u64 tod;
 167         __u8 reserved[7];
 168 } __packed;
 169
 170 /* allow nested virtualization in KVM (if enabled by user space) */
 171 static int nested;
 172 module_param(nested, int, S_IRUGO);
 173 MODULE_PARM_DESC(nested, "Nested virtualization support");
 174
 175
 176 /*
 177  * For now we handle at most 16 double words as this is what the s390 base
 178  * kernel handles and stores in the prefix page. If we ever need to go beyond
 179  * this, this requires changes to code, but the external uapi can stay.
 180  */
 181 #define SIZE_INTERNAL 16
 182
 183 /*
 184  * Base feature mask that defines default mask for facilities. Consists of the
 185  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 186  */
 187 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 188 /*
 189  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 190  * and defines the facilities that can be enabled via a cpu model.
 191  */
 192 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 193
 194 static unsigned long kvm_s390_fac_size(void)
 195 {
 196         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 197         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 198         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 199                 sizeof(S390_lowcore.stfle_fac_list));
 200
 201         return SIZE_INTERNAL;
 202 }
 203
 204 /* available cpu features supported by kvm */
 205 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 206 /* available subfunctions indicated via query / "test bit" */
 207 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 208
 209 static struct gmap_notifier gmap_notifier;
 210 static struct gmap_notifier vsie_gmap_notifier;
 211 debug_info_t *kvm_s390_dbf;
 212
 213 /* Section: not file related */
 214 int kvm_arch_hardware_enable(void)
 215 {
 216         /* every s390 is virtualization enabled ;-) */
 217         return 0;
 218 }
 219
 220 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 221                               unsigned long end);
 222
 223 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 224 {
 225         u8 delta_idx = 0;
 226
 227         /*
 228          * The TOD jumps by delta, we have to compensate this by adding
 229          * -delta to the epoch.
 230          */
 231         delta = -delta;
 232
 233         /* sign-extension - we're adding to signed values below */
 234         if ((s64)delta < 0)
 235                 delta_idx = -1;
 236
 237         scb->epoch += delta;
 238         if (scb->ecd & ECD_MEF) {
 239                 scb->epdx += delta_idx;
 240                 if (scb->epoch < delta)
 241                         scb->epdx += 1;
 242         }
 243 }
 244
 245 /*
 246  * This callback is executed during stop_machine(). All CPUs are therefore
 247  * temporarily stopped. In order not to change guest behavior, we have to
 248  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 249  * so a CPU won't be stopped while calculating with the epoch.
 250  */
 251 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 252                           void *v)
 253 {
 254         struct kvm *kvm;
 255         struct kvm_vcpu *vcpu;
 256         int i;
 257         unsigned long long *delta = v;
 258
 259         list_for_each_entry(kvm, &vm_list, vm_list) {
 260                 kvm_for_each_vcpu(i, vcpu, kvm) {
 261                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 262                         if (i == 0) {
 263                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 264                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 265                         }
 266                         if (vcpu->arch.cputm_enabled)
 267                                 vcpu->arch.cputm_start += *delta;
 268                         if (vcpu->arch.vsie_block)
 269                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
 270                                                    *delta);
 271                 }
 272         }
 273         return NOTIFY_OK;
 274 }
 275
 276 static struct notifier_block kvm_clock_notifier = {
 277         .notifier_call = kvm_clock_sync,
 278 };
 279
 280 int kvm_arch_hardware_setup(void)
 281 {
 282         gmap_notifier.notifier_call = kvm_gmap_notifier;
 283         gmap_register_pte_notifier(&gmap_notifier);
 284         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 285         gmap_register_pte_notifier(&vsie_gmap_notifier);
 286         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 287                                        &kvm_clock_notifier);
 288         return 0;
 289 }
 290
 291 void kvm_arch_hardware_unsetup(void)
 292 {
 293         gmap_unregister_pte_notifier(&gmap_notifier);
 294         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 295         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 296                                          &kvm_clock_notifier);
 297 }
 298
 299 static void allow_cpu_feat(unsigned long nr)
 300 {
 301         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 302 }
 303
 304 static inline int plo_test_bit(unsigned char nr)
 305 {
 306         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 307         int cc;
 308
 309         asm volatile(
 310                 /* Parameter registers are ignored for "test bit" */
 311                 "       plo     0,0,0,0(0)\n"
 312                 "       ipm     %0\n"
 313                 "       srl     %0,28\n"
 314                 : "=d" (cc)
 315                 : "d" (r0)
 316                 : "cc");
 317         return cc == 0;
 318 }
 319
 320 static void kvm_s390_cpu_feat_init(void)
 321 {
 322         int i;
 323
 324         for (i = 0; i < 256; ++i) {
 325                 if (plo_test_bit(i))
 326                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 327         }
 328
 329         if (test_facility(28)) /* TOD-clock steering */
 330                 ptff(kvm_s390_available_subfunc.ptff,
 331                      sizeof(kvm_s390_available_subfunc.ptff),
 332                      PTFF_QAF);
 333
 334         if (test_facility(17)) { /* MSA */
 335                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 336                               kvm_s390_available_subfunc.kmac);
 337                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 338                               kvm_s390_available_subfunc.kmc);
 339                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 340                               kvm_s390_available_subfunc.km);
 341                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 342                               kvm_s390_available_subfunc.kimd);
 343                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 344                               kvm_s390_available_subfunc.klmd);
 345         }
 346         if (test_facility(76)) /* MSA3 */
 347                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 348                               kvm_s390_available_subfunc.pckmo);
 349         if (test_facility(77)) { /* MSA4 */
 350                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 351                               kvm_s390_available_subfunc.kmctr);
 352                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 353                               kvm_s390_available_subfunc.kmf);
 354                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 355                               kvm_s390_available_subfunc.kmo);
 356                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 357                               kvm_s390_available_subfunc.pcc);
 358         }
 359         if (test_facility(57)) /* MSA5 */
 360                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 361                               kvm_s390_available_subfunc.ppno);
 362
 363         if (test_facility(146)) /* MSA8 */
 364                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 365                               kvm_s390_available_subfunc.kma);
 366
 367         if (MACHINE_HAS_ESOP)
 368                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 369         /*
 370          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 371          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 372          */
 373         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 374             !test_facility(3) || !nested)
 375                 return;
 376         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 377         if (sclp.has_64bscao)
 378                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 379         if (sclp.has_siif)
 380                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 381         if (sclp.has_gpere)
 382                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 383         if (sclp.has_gsls)
 384                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 385         if (sclp.has_ib)
 386                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 387         if (sclp.has_cei)
 388                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 389         if (sclp.has_ibs)
 390                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 391         if (sclp.has_kss)
 392                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 393         /*
 394          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 395          * all skey handling functions read/set the skey from the PGSTE
 396          * instead of the real storage key.
 397          *
 398          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 399          * pages being detected as preserved although they are resident.
 400          *
 401          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 402          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 403          *
 404          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 405          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 406          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 407          *
 408          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 409          * cannot easily shadow the SCA because of the ipte lock.
 410          */
 411 }
 412
 413 int kvm_arch_init(void *opaque)
 414 {
 415         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 416         if (!kvm_s390_dbf)
 417                 return -ENOMEM;
 418
 419         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 420                 debug_unregister(kvm_s390_dbf);
 421                 return -ENOMEM;
 422         }
 423
 424         kvm_s390_cpu_feat_init();
 425
 426         /* Register floating interrupt controller interface. */
 427         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 428 }
 429
 430 void kvm_arch_exit(void)
 431 {
 432         debug_unregister(kvm_s390_dbf);
 433 }
 434
 435 /* Section: device related */
 436 long kvm_arch_dev_ioctl(struct file *filp,
 437                         unsigned int ioctl, unsigned long arg)
 438 {
 439         if (ioctl == KVM_S390_ENABLE_SIE)
 440                 return s390_enable_sie();
 441         return -EINVAL;
 442 }
 443
 444 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 445 {
 446         int r;
 447
 448         switch (ext) {
 449         case KVM_CAP_S390_PSW:
 450         case KVM_CAP_S390_GMAP:
 451         case KVM_CAP_SYNC_MMU:
 452 #ifdef CONFIG_KVM_S390_UCONTROL
 453         case KVM_CAP_S390_UCONTROL:
 454 #endif
 455         case KVM_CAP_ASYNC_PF:
 456         case KVM_CAP_SYNC_REGS:
 457         case KVM_CAP_ONE_REG:
 458         case KVM_CAP_ENABLE_CAP:
 459         case KVM_CAP_S390_CSS_SUPPORT:
 460         case KVM_CAP_IOEVENTFD:
 461         case KVM_CAP_DEVICE_CTRL:
 462         case KVM_CAP_ENABLE_CAP_VM:
 463         case KVM_CAP_S390_IRQCHIP:
 464         case KVM_CAP_VM_ATTRIBUTES:
 465         case KVM_CAP_MP_STATE:
 466         case KVM_CAP_IMMEDIATE_EXIT:
 467         case KVM_CAP_S390_INJECT_IRQ:
 468         case KVM_CAP_S390_USER_SIGP:
 469         case KVM_CAP_S390_USER_STSI:
 470         case KVM_CAP_S390_SKEYS:
 471         case KVM_CAP_S390_IRQ_STATE:
 472         case KVM_CAP_S390_USER_INSTR0:
 473         case KVM_CAP_S390_CMMA_MIGRATION:
 474         case KVM_CAP_S390_AIS:
 475         case KVM_CAP_S390_AIS_MIGRATION:
 476                 r = 1;
 477                 break;
 478         case KVM_CAP_S390_MEM_OP:
 479                 r = MEM_OP_MAX_SIZE;
 480                 break;
 481         case KVM_CAP_NR_VCPUS:
 482         case KVM_CAP_MAX_VCPUS:
 483                 r = KVM_S390_BSCA_CPU_SLOTS;
 484                 if (!kvm_s390_use_sca_entries())
 485                         r = KVM_MAX_VCPUS;
 486                 else if (sclp.has_esca && sclp.has_64bscao)
 487                         r = KVM_S390_ESCA_CPU_SLOTS;
 488                 break;
 489         case KVM_CAP_NR_MEMSLOTS:
 490                 r = KVM_USER_MEM_SLOTS;
 491                 break;
 492         case KVM_CAP_S390_COW:
 493                 r = MACHINE_HAS_ESOP;
 494                 break;
 495         case KVM_CAP_S390_VECTOR_REGISTERS:
 496                 r = MACHINE_HAS_VX;
 497                 break;
 498         case KVM_CAP_S390_RI:
 499                 r = test_facility(64);
 500                 break;
 501         case KVM_CAP_S390_GS:
 502                 r = test_facility(133);
 503                 break;
 504         case KVM_CAP_S390_BPB:
 505                 r = test_facility(82);
 506                 break;
 507         default:
 508                 r = 0;
 509         }
 510         return r;
 511 }
 512
 513 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 514                                         struct kvm_memory_slot *memslot)
 515 {
 516         gfn_t cur_gfn, last_gfn;
 517         unsigned long address;
 518         struct gmap *gmap = kvm->arch.gmap;
 519
 520         /* Loop over all guest pages */
 521         last_gfn = memslot->base_gfn + memslot->npages;
 522         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 523                 address = gfn_to_hva_memslot(memslot, cur_gfn);
 524
 525                 if (test_and_clear_guest_dirty(gmap->mm, address))
 526                         mark_page_dirty(kvm, cur_gfn);
 527                 if (fatal_signal_pending(current))
 528                         return;
 529                 cond_resched();
 530         }
 531 }
 532
 533 /* Section: vm related */
 534 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 535
 536 /*
 537  * Get (and clear) the dirty memory log for a memory slot.
 538  */
 539 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 540                                struct kvm_dirty_log *log)
 541 {
 542         int r;
 543         unsigned long n;
 544         struct kvm_memslots *slots;
 545         struct kvm_memory_slot *memslot;
 546         int is_dirty = 0;
 547
 548         if (kvm_is_ucontrol(kvm))
 549                 return -EINVAL;
 550
 551         mutex_lock(&kvm->slots_lock);
 552
 553         r = -EINVAL;
 554         if (log->slot >= KVM_USER_MEM_SLOTS)
 555                 goto out;
 556
 557         slots = kvm_memslots(kvm);
 558         memslot = id_to_memslot(slots, log->slot);
 559         r = -ENOENT;
 560         if (!memslot->dirty_bitmap)
 561                 goto out;
 562
 563         kvm_s390_sync_dirty_log(kvm, memslot);
 564         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 565         if (r)
 566                 goto out;
 567
 568         /* Clear the dirty log */
 569         if (is_dirty) {
 570                 n = kvm_dirty_bitmap_bytes(memslot);
 571                 memset(memslot->dirty_bitmap, 0, n);
 572         }
 573         r = 0;
 574 out:
 575         mutex_unlock(&kvm->slots_lock);
 576         return r;
 577 }
 578
 579 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 580 {
 581         unsigned int i;
 582         struct kvm_vcpu *vcpu;
 583
 584         kvm_for_each_vcpu(i, vcpu, kvm) {
 585                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 586         }
 587 }
 588
 589 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 590 {
 591         int r;
 592
 593         if (cap->flags)
 594                 return -EINVAL;
 595
 596         switch (cap->cap) {
 597         case KVM_CAP_S390_IRQCHIP:
 598                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 599                 kvm->arch.use_irqchip = 1;
 600                 r = 0;
 601                 break;
 602         case KVM_CAP_S390_USER_SIGP:
 603                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 604                 kvm->arch.user_sigp = 1;
 605                 r = 0;
 606                 break;
 607         case KVM_CAP_S390_VECTOR_REGISTERS:
 608                 mutex_lock(&kvm->lock);
 609                 if (kvm->created_vcpus) {
 610                         r = -EBUSY;
 611                 } else if (MACHINE_HAS_VX) {
 612                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 613                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 614                         if (test_facility(134)) {
 615                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
 616                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
 617                         }
 618                         if (test_facility(135)) {
 619                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
 620                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
 621                         }
 622                         r = 0;
 623                 } else
 624                         r = -EINVAL;
 625                 mutex_unlock(&kvm->lock);
 626                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 627                          r ? "(not available)" : "(success)");
 628                 break;
 629         case KVM_CAP_S390_RI:
 630                 r = -EINVAL;
 631                 mutex_lock(&kvm->lock);
 632                 if (kvm->created_vcpus) {
 633                         r = -EBUSY;
 634                 } else if (test_facility(64)) {
 635                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 636                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 637                         r = 0;
 638                 }
 639                 mutex_unlock(&kvm->lock);
 640                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 641                          r ? "(not available)" : "(success)");
 642                 break;
 643         case KVM_CAP_S390_AIS:
 644                 mutex_lock(&kvm->lock);
 645                 if (kvm->created_vcpus) {
 646                         r = -EBUSY;
 647                 } else {
 648                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
 649                         set_kvm_facility(kvm->arch.model.fac_list, 72);
 650                         r = 0;
 651                 }
 652                 mutex_unlock(&kvm->lock);
 653                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 654                          r ? "(not available)" : "(success)");
 655                 break;
 656         case KVM_CAP_S390_GS:
 657                 r = -EINVAL;
 658                 mutex_lock(&kvm->lock);
 659                 if (kvm->created_vcpus) {
 660                         r = -EBUSY;
 661                 } else if (test_facility(133)) {
 662                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
 663                         set_kvm_facility(kvm->arch.model.fac_list, 133);
 664                         r = 0;
 665                 }
 666                 mutex_unlock(&kvm->lock);
 667                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 668                          r ? "(not available)" : "(success)");
 669                 break;
 670         case KVM_CAP_S390_USER_STSI:
 671                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 672                 kvm->arch.user_stsi = 1;
 673                 r = 0;
 674                 break;
 675         case KVM_CAP_S390_USER_INSTR0:
 676                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 677                 kvm->arch.user_instr0 = 1;
 678                 icpt_operexc_on_all_vcpus(kvm);
 679                 r = 0;
 680                 break;
 681         default:
 682                 r = -EINVAL;
 683                 break;
 684         }
 685         return r;
 686 }
 687
 688 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 689 {
 690         int ret;
 691
 692         switch (attr->attr) {
 693         case KVM_S390_VM_MEM_LIMIT_SIZE:
 694                 ret = 0;
 695                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 696                          kvm->arch.mem_limit);
 697                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 698                         ret = -EFAULT;
 699                 break;
 700         default:
 701                 ret = -ENXIO;
 702                 break;
 703         }
 704         return ret;
 705 }
 706
 707 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 708 {
 709         int ret;
 710         unsigned int idx;
 711         switch (attr->attr) {
 712         case KVM_S390_VM_MEM_ENABLE_CMMA:
 713                 ret = -ENXIO;
 714                 if (!sclp.has_cmma)
 715                         break;
 716
 717                 ret = -EBUSY;
 718                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 719                 mutex_lock(&kvm->lock);
 720                 if (!kvm->created_vcpus) {
 721                         kvm->arch.use_cmma = 1;
 722                         /* Not compatible with cmma. */
 723                         kvm->arch.use_pfmfi = 0;
 724                         ret = 0;
 725                 }
 726                 mutex_unlock(&kvm->lock);
 727                 break;
 728         case KVM_S390_VM_MEM_CLR_CMMA:
 729                 ret = -ENXIO;
 730                 if (!sclp.has_cmma)
 731                         break;
 732                 ret = -EINVAL;
 733                 if (!kvm->arch.use_cmma)
 734                         break;
 735
 736                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 737                 mutex_lock(&kvm->lock);
 738                 idx = srcu_read_lock(&kvm->srcu);
 739                 s390_reset_cmma(kvm->arch.gmap->mm);
 740                 srcu_read_unlock(&kvm->srcu, idx);
 741                 mutex_unlock(&kvm->lock);
 742                 ret = 0;
 743                 break;
 744         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 745                 unsigned long new_limit;
 746
 747                 if (kvm_is_ucontrol(kvm))
 748                         return -EINVAL;
 749
 750                 if (get_user(new_limit, (u64 __user *)attr->addr))
 751                         return -EFAULT;
 752
 753                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 754                     new_limit > kvm->arch.mem_limit)
 755                         return -E2BIG;
 756
 757                 if (!new_limit)
 758                         return -EINVAL;
 759
 760                 /* gmap_create takes last usable address */
 761                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 762                         new_limit -= 1;
 763
 764                 ret = -EBUSY;
 765                 mutex_lock(&kvm->lock);
 766                 if (!kvm->created_vcpus) {
 767                         /* gmap_create will round the limit up */
 768                         struct gmap *new = gmap_create(current->mm, new_limit);
 769
 770                         if (!new) {
 771                                 ret = -ENOMEM;
 772                         } else {
 773                                 gmap_remove(kvm->arch.gmap);
 774                                 new->private = kvm;
 775                                 kvm->arch.gmap = new;
 776                                 ret = 0;
 777                         }
 778                 }
 779                 mutex_unlock(&kvm->lock);
 780                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 781                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 782                          (void *) kvm->arch.gmap->asce);
 783                 break;
 784         }
 785         default:
 786                 ret = -ENXIO;
 787                 break;
 788         }
 789         return ret;
 790 }
 791
 792 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 793
 794 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 795 {
 796         struct kvm_vcpu *vcpu;
 797         int i;
 798
 799         if (!test_kvm_facility(kvm, 76))
 800                 return -EINVAL;
 801
 802         mutex_lock(&kvm->lock);
 803         switch (attr->attr) {
 804         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 805                 get_random_bytes(
 806                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 807                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 808                 kvm->arch.crypto.aes_kw = 1;
 809                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 810                 break;
 811         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 812                 get_random_bytes(
 813                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 814                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 815                 kvm->arch.crypto.dea_kw = 1;
 816                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 817                 break;
 818         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 819                 kvm->arch.crypto.aes_kw = 0;
 820                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 821                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 822                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 823                 break;
 824         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 825                 kvm->arch.crypto.dea_kw = 0;
 826                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 827                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 828                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 829                 break;
 830         default:
 831                 mutex_unlock(&kvm->lock);
 832                 return -ENXIO;
 833         }
 834
 835         kvm_for_each_vcpu(i, vcpu, kvm) {
 836                 kvm_s390_vcpu_crypto_setup(vcpu);
 837                 exit_sie(vcpu);
 838         }
 839         mutex_unlock(&kvm->lock);
 840         return 0;
 841 }
 842
 843 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
 844 {
 845         int cx;
 846         struct kvm_vcpu *vcpu;
 847
 848         kvm_for_each_vcpu(cx, vcpu, kvm)
 849                 kvm_s390_sync_request(req, vcpu);
 850 }
 851
 852 /*
 853  * Must be called with kvm->srcu held to avoid races on memslots, and with
 854  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
 855  */
 856 static int kvm_s390_vm_start_migration(struct kvm *kvm)
 857 {
 858         struct kvm_s390_migration_state *mgs;
 859         struct kvm_memory_slot *ms;
 860         /* should be the only one */
 861         struct kvm_memslots *slots;
 862         unsigned long ram_pages;
 863         int slotnr;
 864
 865         /* migration mode already enabled */
 866         if (kvm->arch.migration_state)
 867                 return 0;
 868
 869         slots = kvm_memslots(kvm);
 870         if (!slots || !slots->used_slots)
 871                 return -EINVAL;
 872
 873         mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
 874         if (!mgs)
 875                 return -ENOMEM;
 876         kvm->arch.migration_state = mgs;
 877
 878         if (kvm->arch.use_cmma) {
 879                 /*
 880                  * Get the first slot. They are reverse sorted by base_gfn, so
 881                  * the first slot is also the one at the end of the address
 882                  * space. We have verified above that at least one slot is
 883                  * present.
 884                  */
 885                 ms = slots->memslots;
 886                 /* round up so we only use full longs */
 887                 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
 888                 /* allocate enough bytes to store all the bits */
 889                 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
 890                 if (!mgs->pgste_bitmap) {
 891                         kfree(mgs);
 892                         kvm->arch.migration_state = NULL;
 893                         return -ENOMEM;
 894                 }
 895
 896                 mgs->bitmap_size = ram_pages;
 897                 atomic64_set(&mgs->dirty_pages, ram_pages);
 898                 /* mark all the pages in active slots as dirty */
 899                 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
 900                         ms = slots->memslots + slotnr;
 901                         bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
 902                 }
 903
 904                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
 905         }
 906         return 0;
 907 }
 908
 909 /*
 910  * Must be called with kvm->slots_lock to avoid races with ourselves and
 911  * kvm_s390_vm_start_migration.
 912  */
 913 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
 914 {
 915         struct kvm_s390_migration_state *mgs;
 916
 917         /* migration mode already disabled */
 918         if (!kvm->arch.migration_state)
 919                 return 0;
 920         mgs = kvm->arch.migration_state;
 921         kvm->arch.migration_state = NULL;
 922
 923         if (kvm->arch.use_cmma) {
 924                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
 925                 /* We have to wait for the essa emulation to finish */
 926                 synchronize_srcu(&kvm->srcu);
 927                 vfree(mgs->pgste_bitmap);
 928         }
 929         kfree(mgs);
 930         return 0;
 931 }
 932
 933 static int kvm_s390_vm_set_migration(struct kvm *kvm,
 934                                      struct kvm_device_attr *attr)
 935 {
 936         int res = -ENXIO;
 937
 938         mutex_lock(&kvm->slots_lock);
 939         switch (attr->attr) {
 940         case KVM_S390_VM_MIGRATION_START:
 941                 res = kvm_s390_vm_start_migration(kvm);
 942                 break;
 943         case KVM_S390_VM_MIGRATION_STOP:
 944                 res = kvm_s390_vm_stop_migration(kvm);
 945                 break;
 946         default:
 947                 break;
 948         }
 949         mutex_unlock(&kvm->slots_lock);
 950
 951         return res;
 952 }
 953
 954 static int kvm_s390_vm_get_migration(struct kvm *kvm,
 955                                      struct kvm_device_attr *attr)
 956 {
 957         u64 mig = (kvm->arch.migration_state != NULL);
 958
 959         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
 960                 return -ENXIO;
 961
 962         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
 963                 return -EFAULT;
 964         return 0;
 965 }
 966
 967 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
 968 {
 969         struct kvm_s390_vm_tod_clock gtod;
 970
 971         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 972                 return -EFAULT;
 973
 974         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
 975                 return -EINVAL;
 976         kvm_s390_set_tod_clock(kvm, &gtod);
 977
 978         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
 979                 gtod.epoch_idx, gtod.tod);
 980
 981         return 0;
 982 }
 983
 984 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 985 {
 986         u8 gtod_high;
 987
 988         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 989                                            sizeof(gtod_high)))
 990                 return -EFAULT;
 991
 992         if (gtod_high != 0)
 993                 return -EINVAL;
 994         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
 995
 996         return 0;
 997 }
 998
 999 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1000 {
1001         struct kvm_s390_vm_tod_clock gtod = { 0 };
1002
1003         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1004                            sizeof(gtod.tod)))
1005                 return -EFAULT;
1006
1007         kvm_s390_set_tod_clock(kvm, &gtod);
1008         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1009         return 0;
1010 }
1011
1012 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1013 {
1014         int ret;
1015
1016         if (attr->flags)
1017                 return -EINVAL;
1018
1019         switch (attr->attr) {
1020         case KVM_S390_VM_TOD_EXT:
1021                 ret = kvm_s390_set_tod_ext(kvm, attr);
1022                 break;
1023         case KVM_S390_VM_TOD_HIGH:
1024                 ret = kvm_s390_set_tod_high(kvm, attr);
1025                 break;
1026         case KVM_S390_VM_TOD_LOW:
1027                 ret = kvm_s390_set_tod_low(kvm, attr);
1028                 break;
1029         default:
1030                 ret = -ENXIO;
1031                 break;
1032         }
1033         return ret;
1034 }
1035
1036 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
1037                                         struct kvm_s390_vm_tod_clock *gtod)
1038 {
1039         struct kvm_s390_tod_clock_ext htod;
1040
1041         preempt_disable();
1042
1043         get_tod_clock_ext((char *)&htod);
1044
1045         gtod->tod = htod.tod + kvm->arch.epoch;
1046         gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1047
1048         if (gtod->tod < htod.tod)
1049                 gtod->epoch_idx += 1;
1050
1051         preempt_enable();
1052 }
1053
1054 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1055 {
1056         struct kvm_s390_vm_tod_clock gtod;
1057
1058         memset(&gtod, 0, sizeof(gtod));
1059
1060         if (test_kvm_facility(kvm, 139))
1061                 kvm_s390_get_tod_clock_ext(kvm, &gtod);
1062         else
1063                 gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
1064
1065         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1066                 return -EFAULT;
1067
1068         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1069                 gtod.epoch_idx, gtod.tod);
1070         return 0;
1071 }
1072
1073 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1074 {
1075         u8 gtod_high = 0;
1076
1077         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1078                                          sizeof(gtod_high)))
1079                 return -EFAULT;
1080         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1081
1082         return 0;
1083 }
1084
1085 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1086 {
1087         u64 gtod;
1088
1089         gtod = kvm_s390_get_tod_clock_fast(kvm);
1090         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1091                 return -EFAULT;
1092         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1093
1094         return 0;
1095 }
1096
1097 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1098 {
1099         int ret;
1100
1101         if (attr->flags)
1102                 return -EINVAL;
1103
1104         switch (attr->attr) {
1105         case KVM_S390_VM_TOD_EXT:
1106                 ret = kvm_s390_get_tod_ext(kvm, attr);
1107                 break;
1108         case KVM_S390_VM_TOD_HIGH:
1109                 ret = kvm_s390_get_tod_high(kvm, attr);
1110                 break;
1111         case KVM_S390_VM_TOD_LOW:
1112                 ret = kvm_s390_get_tod_low(kvm, attr);
1113                 break;
1114         default:
1115                 ret = -ENXIO;
1116                 break;
1117         }
1118         return ret;
1119 }
1120
1121 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1122 {
1123         struct kvm_s390_vm_cpu_processor *proc;
1124         u16 lowest_ibc, unblocked_ibc;
1125         int ret = 0;
1126
1127         mutex_lock(&kvm->lock);
1128         if (kvm->created_vcpus) {
1129                 ret = -EBUSY;
1130                 goto out;
1131         }
1132         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1133         if (!proc) {
1134                 ret = -ENOMEM;
1135                 goto out;
1136         }
1137         if (!copy_from_user(proc, (void __user *)attr->addr,
1138                             sizeof(*proc))) {
1139                 kvm->arch.model.cpuid = proc->cpuid;
1140                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1141                 unblocked_ibc = sclp.ibc & 0xfff;
1142                 if (lowest_ibc && proc->ibc) {
1143                         if (proc->ibc > unblocked_ibc)
1144                                 kvm->arch.model.ibc = unblocked_ibc;
1145                         else if (proc->ibc < lowest_ibc)
1146                                 kvm->arch.model.ibc = lowest_ibc;
1147                         else
1148                                 kvm->arch.model.ibc = proc->ibc;
1149                 }
1150                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1151                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1152                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1153                          kvm->arch.model.ibc,
1154                          kvm->arch.model.cpuid);
1155                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1156                          kvm->arch.model.fac_list[0],
1157                          kvm->arch.model.fac_list[1],
1158                          kvm->arch.model.fac_list[2]);
1159         } else
1160                 ret = -EFAULT;
1161         kfree(proc);
1162 out:
1163         mutex_unlock(&kvm->lock);
1164         return ret;
1165 }
1166
1167 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1168                                        struct kvm_device_attr *attr)
1169 {
1170         struct kvm_s390_vm_cpu_feat data;
1171
1172         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1173                 return -EFAULT;
1174         if (!bitmap_subset((unsigned long *) data.feat,
1175                            kvm_s390_available_cpu_feat,
1176                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1177                 return -EINVAL;
1178
1179         mutex_lock(&kvm->lock);
1180         if (kvm->created_vcpus) {
1181                 mutex_unlock(&kvm->lock);
1182                 return -EBUSY;
1183         }
1184         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1185                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1186         mutex_unlock(&kvm->lock);
1187         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1188                          data.feat[0],
1189                          data.feat[1],
1190                          data.feat[2]);
1191         return 0;
1192 }
1193
1194 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1195                                           struct kvm_device_attr *attr)
1196 {
1197         /*
1198          * Once supported by kernel + hw, we have to store the subfunctions
1199          * in kvm->arch and remember that user space configured them.
1200          */
1201         return -ENXIO;
1202 }
1203
1204 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1205 {
1206         int ret = -ENXIO;
1207
1208         switch (attr->attr) {
1209         case KVM_S390_VM_CPU_PROCESSOR:
1210                 ret = kvm_s390_set_processor(kvm, attr);
1211                 break;
1212         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1213                 ret = kvm_s390_set_processor_feat(kvm, attr);
1214                 break;
1215         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1216                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1217                 break;
1218         }
1219         return ret;
1220 }
1221
1222 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1223 {
1224         struct kvm_s390_vm_cpu_processor *proc;
1225         int ret = 0;
1226
1227         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1228         if (!proc) {
1229                 ret = -ENOMEM;
1230                 goto out;
1231         }
1232         proc->cpuid = kvm->arch.model.cpuid;
1233         proc->ibc = kvm->arch.model.ibc;
1234         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1235                S390_ARCH_FAC_LIST_SIZE_BYTE);
1236         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1237                  kvm->arch.model.ibc,
1238                  kvm->arch.model.cpuid);
1239         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1240                  kvm->arch.model.fac_list[0],
1241                  kvm->arch.model.fac_list[1],
1242                  kvm->arch.model.fac_list[2]);
1243         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1244                 ret = -EFAULT;
1245         kfree(proc);
1246 out:
1247         return ret;
1248 }
1249
1250 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1251 {
1252         struct kvm_s390_vm_cpu_machine *mach;
1253         int ret = 0;
1254
1255         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1256         if (!mach) {
1257                 ret = -ENOMEM;
1258                 goto out;
1259         }
1260         get_cpu_id((struct cpuid *) &mach->cpuid);
1261         mach->ibc = sclp.ibc;
1262         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1263                S390_ARCH_FAC_LIST_SIZE_BYTE);
1264         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1265                sizeof(S390_lowcore.stfle_fac_list));
1266         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1267                  kvm->arch.model.ibc,
1268                  kvm->arch.model.cpuid);
1269         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1270                  mach->fac_mask[0],
1271                  mach->fac_mask[1],
1272                  mach->fac_mask[2]);
1273         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1274                  mach->fac_list[0],
1275                  mach->fac_list[1],
1276                  mach->fac_list[2]);
1277         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1278                 ret = -EFAULT;
1279         kfree(mach);
1280 out:
1281         return ret;
1282 }
1283
1284 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1285                                        struct kvm_device_attr *attr)
1286 {
1287         struct kvm_s390_vm_cpu_feat data;
1288
1289         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1290                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1291         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1292                 return -EFAULT;
1293         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1294                          data.feat[0],
1295                          data.feat[1],
1296                          data.feat[2]);
1297         return 0;
1298 }
1299
1300 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1301                                      struct kvm_device_attr *attr)
1302 {
1303         struct kvm_s390_vm_cpu_feat data;
1304
1305         bitmap_copy((unsigned long *) data.feat,
1306                     kvm_s390_available_cpu_feat,
1307                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1308         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1309                 return -EFAULT;
1310         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1311                          data.feat[0],
1312                          data.feat[1],
1313                          data.feat[2]);
1314         return 0;
1315 }
1316
1317 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1318                                           struct kvm_device_attr *attr)
1319 {
1320         /*
1321          * Once we can actually configure subfunctions (kernel + hw support),
1322          * we have to check if they were already set by user space, if so copy
1323          * them from kvm->arch.
1324          */
1325         return -ENXIO;
1326 }
1327
1328 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1329                                         struct kvm_device_attr *attr)
1330 {
1331         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1332             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1333                 return -EFAULT;
1334         return 0;
1335 }
1336 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1337 {
1338         int ret = -ENXIO;
1339
1340         switch (attr->attr) {
1341         case KVM_S390_VM_CPU_PROCESSOR:
1342                 ret = kvm_s390_get_processor(kvm, attr);
1343                 break;
1344         case KVM_S390_VM_CPU_MACHINE:
1345                 ret = kvm_s390_get_machine(kvm, attr);
1346                 break;
1347         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1348                 ret = kvm_s390_get_processor_feat(kvm, attr);
1349                 break;
1350         case KVM_S390_VM_CPU_MACHINE_FEAT:
1351                 ret = kvm_s390_get_machine_feat(kvm, attr);
1352                 break;
1353         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1354                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1355                 break;
1356         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1357                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1358                 break;
1359         }
1360         return ret;
1361 }
1362
1363 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1364 {
1365         int ret;
1366
1367         switch (attr->group) {
1368         case KVM_S390_VM_MEM_CTRL:
1369                 ret = kvm_s390_set_mem_control(kvm, attr);
1370                 break;
1371         case KVM_S390_VM_TOD:
1372                 ret = kvm_s390_set_tod(kvm, attr);
1373                 break;
1374         case KVM_S390_VM_CPU_MODEL:
1375                 ret = kvm_s390_set_cpu_model(kvm, attr);
1376                 break;
1377         case KVM_S390_VM_CRYPTO:
1378                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1379                 break;
1380         case KVM_S390_VM_MIGRATION:
1381                 ret = kvm_s390_vm_set_migration(kvm, attr);
1382                 break;
1383         default:
1384                 ret = -ENXIO;
1385                 break;
1386         }
1387
1388         return ret;
1389 }
1390
1391 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1392 {
1393         int ret;
1394
1395         switch (attr->group) {
1396         case KVM_S390_VM_MEM_CTRL:
1397                 ret = kvm_s390_get_mem_control(kvm, attr);
1398                 break;
1399         case KVM_S390_VM_TOD:
1400                 ret = kvm_s390_get_tod(kvm, attr);
1401                 break;
1402         case KVM_S390_VM_CPU_MODEL:
1403                 ret = kvm_s390_get_cpu_model(kvm, attr);
1404                 break;
1405         case KVM_S390_VM_MIGRATION:
1406                 ret = kvm_s390_vm_get_migration(kvm, attr);
1407                 break;
1408         default:
1409                 ret = -ENXIO;
1410                 break;
1411         }
1412
1413         return ret;
1414 }
1415
1416 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1417 {
1418         int ret;
1419
1420         switch (attr->group) {
1421         case KVM_S390_VM_MEM_CTRL:
1422                 switch (attr->attr) {
1423                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1424                 case KVM_S390_VM_MEM_CLR_CMMA:
1425                         ret = sclp.has_cmma ? 0 : -ENXIO;
1426                         break;
1427                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1428                         ret = 0;
1429                         break;
1430                 default:
1431                         ret = -ENXIO;
1432                         break;
1433                 }
1434                 break;
1435         case KVM_S390_VM_TOD:
1436                 switch (attr->attr) {
1437                 case KVM_S390_VM_TOD_LOW:
1438                 case KVM_S390_VM_TOD_HIGH:
1439                         ret = 0;
1440                         break;
1441                 default:
1442                         ret = -ENXIO;
1443                         break;
1444                 }
1445                 break;
1446         case KVM_S390_VM_CPU_MODEL:
1447                 switch (attr->attr) {
1448                 case KVM_S390_VM_CPU_PROCESSOR:
1449                 case KVM_S390_VM_CPU_MACHINE:
1450                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1451                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1452                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1453                         ret = 0;
1454                         break;
1455                 /* configuring subfunctions is not supported yet */
1456                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1457                 default:
1458                         ret = -ENXIO;
1459                         break;
1460                 }
1461                 break;
1462         case KVM_S390_VM_CRYPTO:
1463                 switch (attr->attr) {
1464                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1465                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1466                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1467                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1468                         ret = 0;
1469                         break;
1470                 default:
1471                         ret = -ENXIO;
1472                         break;
1473                 }
1474                 break;
1475         case KVM_S390_VM_MIGRATION:
1476                 ret = 0;
1477                 break;
1478         default:
1479                 ret = -ENXIO;
1480                 break;
1481         }
1482
1483         return ret;
1484 }
1485
1486 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1487 {
1488         uint8_t *keys;
1489         uint64_t hva;
1490         int srcu_idx, i, r = 0;
1491
1492         if (args->flags != 0)
1493                 return -EINVAL;
1494
1495         /* Is this guest using storage keys? */
1496         if (!mm_use_skey(current->mm))
1497                 return KVM_S390_GET_SKEYS_NONE;
1498
1499         /* Enforce sane limit on memory allocation */
1500         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1501                 return -EINVAL;
1502
1503         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1504         if (!keys)
1505                 return -ENOMEM;
1506
1507         down_read(&current->mm->mmap_sem);
1508         srcu_idx = srcu_read_lock(&kvm->srcu);
1509         for (i = 0; i < args->count; i++) {
1510                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1511                 if (kvm_is_error_hva(hva)) {
1512                         r = -EFAULT;
1513                         break;
1514                 }
1515
1516                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1517                 if (r)
1518                         break;
1519         }
1520         srcu_read_unlock(&kvm->srcu, srcu_idx);
1521         up_read(&current->mm->mmap_sem);
1522
1523         if (!r) {
1524                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1525                                  sizeof(uint8_t) * args->count);
1526                 if (r)
1527                         r = -EFAULT;
1528         }
1529
1530         kvfree(keys);
1531         return r;
1532 }
1533
1534 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1535 {
1536         uint8_t *keys;
1537         uint64_t hva;
1538         int srcu_idx, i, r = 0;
1539
1540         if (args->flags != 0)
1541                 return -EINVAL;
1542
1543         /* Enforce sane limit on memory allocation */
1544         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1545                 return -EINVAL;
1546
1547         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1548         if (!keys)
1549                 return -ENOMEM;
1550
1551         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1552                            sizeof(uint8_t) * args->count);
1553         if (r) {
1554                 r = -EFAULT;
1555                 goto out;
1556         }
1557
1558         /* Enable storage key handling for the guest */
1559         r = s390_enable_skey();
1560         if (r)
1561                 goto out;
1562
1563         down_read(&current->mm->mmap_sem);
1564         srcu_idx = srcu_read_lock(&kvm->srcu);
1565         for (i = 0; i < args->count; i++) {
1566                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1567                 if (kvm_is_error_hva(hva)) {
1568                         r = -EFAULT;
1569                         break;
1570                 }
1571
1572                 /* Lowest order bit is reserved */
1573                 if (keys[i] & 0x01) {
1574                         r = -EINVAL;
1575                         break;
1576                 }
1577
1578                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1579                 if (r)
1580                         break;
1581         }
1582         srcu_read_unlock(&kvm->srcu, srcu_idx);
1583         up_read(&current->mm->mmap_sem);
1584 out:
1585         kvfree(keys);
1586         return r;
1587 }
1588
1589 /*
1590  * Base address and length must be sent at the start of each block, therefore
1591  * it's cheaper to send some clean data, as long as it's less than the size of
1592  * two longs.
1593  */
1594 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1595 /* for consistency */
1596 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1597
1598 /*
1599  * This function searches for the next page with dirty CMMA attributes, and
1600  * saves the attributes in the buffer up to either the end of the buffer or
1601  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1602  * no trailing clean bytes are saved.
1603  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1604  * output buffer will indicate 0 as length.
1605  */
1606 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1607                                   struct kvm_s390_cmma_log *args)
1608 {
1609         struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1610         unsigned long bufsize, hva, pgstev, i, next, cur;
1611         int srcu_idx, peek, r = 0, rr;
1612         u8 *res;
1613
1614         cur = args->start_gfn;
1615         i = next = pgstev = 0;
1616
1617         if (unlikely(!kvm->arch.use_cmma))
1618                 return -ENXIO;
1619         /* Invalid/unsupported flags were specified */
1620         if (args->flags & ~KVM_S390_CMMA_PEEK)
1621                 return -EINVAL;
1622         /* Migration mode query, and we are not doing a migration */
1623         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1624         if (!peek && !s)
1625                 return -EINVAL;
1626         /* CMMA is disabled or was not used, or the buffer has length zero */
1627         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1628         if (!bufsize || !kvm->mm->context.uses_cmm) {
1629                 memset(args, 0, sizeof(*args));
1630                 return 0;
1631         }
1632
1633         if (!peek) {
1634                 /* We are not peeking, and there are no dirty pages */
1635                 if (!atomic64_read(&s->dirty_pages)) {
1636                         memset(args, 0, sizeof(*args));
1637                         return 0;
1638                 }
1639                 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1640                                     args->start_gfn);
1641                 if (cur >= s->bitmap_size)      /* nothing found, loop back */
1642                         cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1643                 if (cur >= s->bitmap_size) {    /* again! (very unlikely) */
1644                         memset(args, 0, sizeof(*args));
1645                         return 0;
1646                 }
1647                 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1648         }
1649
1650         res = vmalloc(bufsize);
1651         if (!res)
1652                 return -ENOMEM;
1653
1654         args->start_gfn = cur;
1655
1656         down_read(&kvm->mm->mmap_sem);
1657         srcu_idx = srcu_read_lock(&kvm->srcu);
1658         while (i < bufsize) {
1659                 hva = gfn_to_hva(kvm, cur);
1660                 if (kvm_is_error_hva(hva)) {
1661                         r = -EFAULT;
1662                         break;
1663                 }
1664                 /* decrement only if we actually flipped the bit to 0 */
1665                 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1666                         atomic64_dec(&s->dirty_pages);
1667                 r = get_pgste(kvm->mm, hva, &pgstev);
1668                 if (r < 0)
1669                         pgstev = 0;
1670                 /* save the value */
1671                 res[i++] = (pgstev >> 24) & 0x43;
1672                 /*
1673                  * if the next bit is too far away, stop.
1674                  * if we reached the previous "next", find the next one
1675                  */
1676                 if (!peek) {
1677                         if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1678                                 break;
1679                         if (cur == next)
1680                                 next = find_next_bit(s->pgste_bitmap,
1681                                                      s->bitmap_size, cur + 1);
1682                 /* reached the end of the bitmap or of the buffer, stop */
1683                         if ((next >= s->bitmap_size) ||
1684                             (next >= args->start_gfn + bufsize))
1685                                 break;
1686                 }
1687                 cur++;
1688         }
1689         srcu_read_unlock(&kvm->srcu, srcu_idx);
1690         up_read(&kvm->mm->mmap_sem);
1691         args->count = i;
1692         args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1693
1694         rr = copy_to_user((void __user *)args->values, res, args->count);
1695         if (rr)
1696                 r = -EFAULT;
1697
1698         vfree(res);
1699         return r;
1700 }
1701
1702 /*
1703  * This function sets the CMMA attributes for the given pages. If the input
1704  * buffer has zero length, no action is taken, otherwise the attributes are
1705  * set and the mm->context.uses_cmm flag is set.
1706  */
1707 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1708                                   const struct kvm_s390_cmma_log *args)
1709 {
1710         unsigned long hva, mask, pgstev, i;
1711         uint8_t *bits;
1712         int srcu_idx, r = 0;
1713
1714         mask = args->mask;
1715
1716         if (!kvm->arch.use_cmma)
1717                 return -ENXIO;
1718         /* invalid/unsupported flags */
1719         if (args->flags != 0)
1720                 return -EINVAL;
1721         /* Enforce sane limit on memory allocation */
1722         if (args->count > KVM_S390_CMMA_SIZE_MAX)
1723                 return -EINVAL;
1724         /* Nothing to do */
1725         if (args->count == 0)
1726                 return 0;
1727
1728         bits = vmalloc(sizeof(*bits) * args->count);
1729         if (!bits)
1730                 return -ENOMEM;
1731
1732         r = copy_from_user(bits, (void __user *)args->values, args->count);
1733         if (r) {
1734                 r = -EFAULT;
1735                 goto out;
1736         }
1737
1738         down_read(&kvm->mm->mmap_sem);
1739         srcu_idx = srcu_read_lock(&kvm->srcu);
1740         for (i = 0; i < args->count; i++) {
1741                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1742                 if (kvm_is_error_hva(hva)) {
1743                         r = -EFAULT;
1744                         break;
1745                 }
1746
1747                 pgstev = bits[i];
1748                 pgstev = pgstev << 24;
1749                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1750                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1751         }
1752         srcu_read_unlock(&kvm->srcu, srcu_idx);
1753         up_read(&kvm->mm->mmap_sem);
1754
1755         if (!kvm->mm->context.uses_cmm) {
1756                 down_write(&kvm->mm->mmap_sem);
1757                 kvm->mm->context.uses_cmm = 1;
1758                 up_write(&kvm->mm->mmap_sem);
1759         }
1760 out:
1761         vfree(bits);
1762         return r;
1763 }
1764
1765 long kvm_arch_vm_ioctl(struct file *filp,
1766                        unsigned int ioctl, unsigned long arg)
1767 {
1768         struct kvm *kvm = filp->private_data;
1769         void __user *argp = (void __user *)arg;
1770         struct kvm_device_attr attr;
1771         int r;
1772
1773         switch (ioctl) {
1774         case KVM_S390_INTERRUPT: {
1775                 struct kvm_s390_interrupt s390int;
1776
1777                 r = -EFAULT;
1778                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1779                         break;
1780                 r = kvm_s390_inject_vm(kvm, &s390int);
1781                 break;
1782         }
1783         case KVM_ENABLE_CAP: {
1784                 struct kvm_enable_cap cap;
1785                 r = -EFAULT;
1786                 if (copy_from_user(&cap, argp, sizeof(cap)))
1787                         break;
1788                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1789                 break;
1790         }
1791         case KVM_CREATE_IRQCHIP: {
1792                 struct kvm_irq_routing_entry routing;
1793
1794                 r = -EINVAL;
1795                 if (kvm->arch.use_irqchip) {
1796                         /* Set up dummy routing. */
1797                         memset(&routing, 0, sizeof(routing));
1798                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1799                 }
1800                 break;
1801         }
1802         case KVM_SET_DEVICE_ATTR: {
1803                 r = -EFAULT;
1804                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1805                         break;
1806                 r = kvm_s390_vm_set_attr(kvm, &attr);
1807                 break;
1808         }
1809         case KVM_GET_DEVICE_ATTR: {
1810                 r = -EFAULT;
1811                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1812                         break;
1813                 r = kvm_s390_vm_get_attr(kvm, &attr);
1814                 break;
1815         }
1816         case KVM_HAS_DEVICE_ATTR: {
1817                 r = -EFAULT;
1818                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1819                         break;
1820                 r = kvm_s390_vm_has_attr(kvm, &attr);
1821                 break;
1822         }
1823         case KVM_S390_GET_SKEYS: {
1824                 struct kvm_s390_skeys args;
1825
1826                 r = -EFAULT;
1827                 if (copy_from_user(&args, argp,
1828                                    sizeof(struct kvm_s390_skeys)))
1829                         break;
1830                 r = kvm_s390_get_skeys(kvm, &args);
1831                 break;
1832         }
1833         case KVM_S390_SET_SKEYS: {
1834                 struct kvm_s390_skeys args;
1835
1836                 r = -EFAULT;
1837                 if (copy_from_user(&args, argp,
1838                                    sizeof(struct kvm_s390_skeys)))
1839                         break;
1840                 r = kvm_s390_set_skeys(kvm, &args);
1841                 break;
1842         }
1843         case KVM_S390_GET_CMMA_BITS: {
1844                 struct kvm_s390_cmma_log args;
1845
1846                 r = -EFAULT;
1847                 if (copy_from_user(&args, argp, sizeof(args)))
1848                         break;
1849                 mutex_lock(&kvm->slots_lock);
1850                 r = kvm_s390_get_cmma_bits(kvm, &args);
1851                 mutex_unlock(&kvm->slots_lock);
1852                 if (!r) {
1853                         r = copy_to_user(argp, &args, sizeof(args));
1854                         if (r)
1855                                 r = -EFAULT;
1856                 }
1857                 break;
1858         }
1859         case KVM_S390_SET_CMMA_BITS: {
1860                 struct kvm_s390_cmma_log args;
1861
1862                 r = -EFAULT;
1863                 if (copy_from_user(&args, argp, sizeof(args)))
1864                         break;
1865                 mutex_lock(&kvm->slots_lock);
1866                 r = kvm_s390_set_cmma_bits(kvm, &args);
1867                 mutex_unlock(&kvm->slots_lock);
1868                 break;
1869         }
1870         default:
1871                 r = -ENOTTY;
1872         }
1873
1874         return r;
1875 }
1876
1877 static int kvm_s390_query_ap_config(u8 *config)
1878 {
1879         u32 fcn_code = 0x04000000UL;
1880         u32 cc = 0;
1881
1882         memset(config, 0, 128);
1883         asm volatile(
1884                 "lgr 0,%1\n"
1885                 "lgr 2,%2\n"
1886                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1887                 "0: ipm %0\n"
1888                 "srl %0,28\n"
1889                 "1:\n"
1890                 EX_TABLE(0b, 1b)
1891                 : "+r" (cc)
1892                 : "r" (fcn_code), "r" (config)
1893                 : "cc", "0", "2", "memory"
1894         );
1895
1896         return cc;
1897 }
1898
1899 static int kvm_s390_apxa_installed(void)
1900 {
1901         u8 config[128];
1902         int cc;
1903
1904         if (test_facility(12)) {
1905                 cc = kvm_s390_query_ap_config(config);
1906
1907                 if (cc)
1908                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1909                 else
1910                         return config[0] & 0x40;
1911         }
1912
1913         return 0;
1914 }
1915
1916 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1917 {
1918         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1919
1920         if (kvm_s390_apxa_installed())
1921                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1922         else
1923                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1924 }
1925
1926 static u64 kvm_s390_get_initial_cpuid(void)
1927 {
1928         struct cpuid cpuid;
1929
1930         get_cpu_id(&cpuid);
1931         cpuid.version = 0xff;
1932         return *((u64 *) &cpuid);
1933 }
1934
1935 static void kvm_s390_crypto_init(struct kvm *kvm)
1936 {
1937         if (!test_kvm_facility(kvm, 76))
1938                 return;
1939
1940         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1941         kvm_s390_set_crycb_format(kvm);
1942
1943         /* Enable AES/DEA protected key functions by default */
1944         kvm->arch.crypto.aes_kw = 1;
1945         kvm->arch.crypto.dea_kw = 1;
1946         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1947                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1948         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1949                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1950 }
1951
1952 static void sca_dispose(struct kvm *kvm)
1953 {
1954         if (kvm->arch.use_esca)
1955                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1956         else
1957                 free_page((unsigned long)(kvm->arch.sca));
1958         kvm->arch.sca = NULL;
1959 }
1960
1961 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1962 {
1963         gfp_t alloc_flags = GFP_KERNEL;
1964         int i, rc;
1965         char debug_name[16];
1966         static unsigned long sca_offset;
1967
1968         rc = -EINVAL;
1969 #ifdef CONFIG_KVM_S390_UCONTROL
1970         if (type & ~KVM_VM_S390_UCONTROL)
1971                 goto out_err;
1972         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1973                 goto out_err;
1974 #else
1975         if (type)
1976                 goto out_err;
1977 #endif
1978
1979         rc = s390_enable_sie();
1980         if (rc)
1981                 goto out_err;
1982
1983         rc = -ENOMEM;
1984
1985         kvm->arch.use_esca = 0; /* start with basic SCA */
1986         if (!sclp.has_64bscao)
1987                 alloc_flags |= GFP_DMA;
1988         rwlock_init(&kvm->arch.sca_lock);
1989         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1990         if (!kvm->arch.sca)
1991                 goto out_err;
1992         spin_lock(&kvm_lock);
1993         sca_offset += 16;
1994         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1995                 sca_offset = 0;
1996         kvm->arch.sca = (struct bsca_block *)
1997                         ((char *) kvm->arch.sca + sca_offset);
1998         spin_unlock(&kvm_lock);
1999
2000         sprintf(debug_name, "kvm-%u", current->pid);
2001
2002         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2003         if (!kvm->arch.dbf)
2004                 goto out_err;
2005
2006         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2007         kvm->arch.sie_page2 =
2008              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2009         if (!kvm->arch.sie_page2)
2010                 goto out_err;
2011
2012         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2013
2014         for (i = 0; i < kvm_s390_fac_size(); i++) {
2015                 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2016                                               (kvm_s390_fac_base[i] |
2017                                                kvm_s390_fac_ext[i]);
2018                 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2019                                               kvm_s390_fac_base[i];
2020         }
2021
2022         /* we are always in czam mode - even on pre z14 machines */
2023         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2024         set_kvm_facility(kvm->arch.model.fac_list, 138);
2025         /* we emulate STHYI in kvm */
2026         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2027         set_kvm_facility(kvm->arch.model.fac_list, 74);
2028         if (MACHINE_HAS_TLB_GUEST) {
2029                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2030                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2031         }
2032
2033         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2034         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2035
2036         kvm_s390_crypto_init(kvm);
2037
2038         mutex_init(&kvm->arch.float_int.ais_lock);
2039         kvm->arch.float_int.simm = 0;
2040         kvm->arch.float_int.nimm = 0;
2041         spin_lock_init(&kvm->arch.float_int.lock);
2042         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2043                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2044         init_waitqueue_head(&kvm->arch.ipte_wq);
2045         mutex_init(&kvm->arch.ipte_mutex);
2046
2047         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2048         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2049
2050         if (type & KVM_VM_S390_UCONTROL) {
2051                 kvm->arch.gmap = NULL;
2052                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2053         } else {
2054                 if (sclp.hamax == U64_MAX)
2055                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2056                 else
2057                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2058                                                     sclp.hamax + 1);
2059                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2060                 if (!kvm->arch.gmap)
2061                         goto out_err;
2062                 kvm->arch.gmap->private = kvm;
2063                 kvm->arch.gmap->pfault_enabled = 0;
2064         }
2065
2066         kvm->arch.css_support = 0;
2067         kvm->arch.use_irqchip = 0;
2068         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2069         kvm->arch.epoch = 0;
2070
2071         spin_lock_init(&kvm->arch.start_stop_lock);
2072         kvm_s390_vsie_init(kvm);
2073         kvm_s390_gisa_init(kvm);
2074         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2075
2076         return 0;
2077 out_err:
2078         free_page((unsigned long)kvm->arch.sie_page2);
2079         debug_unregister(kvm->arch.dbf);
2080         sca_dispose(kvm);
2081         KVM_EVENT(3, "creation of vm failed: %d", rc);
2082         return rc;
2083 }
2084
2085 bool kvm_arch_has_vcpu_debugfs(void)
2086 {
2087         return false;
2088 }
2089
2090 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2091 {
2092         return 0;
2093 }
2094
2095 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2096 {
2097         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2098         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2099         kvm_s390_clear_local_irqs(vcpu);
2100         kvm_clear_async_pf_completion_queue(vcpu);
2101         if (!kvm_is_ucontrol(vcpu->kvm))
2102                 sca_del_vcpu(vcpu);
2103
2104         if (kvm_is_ucontrol(vcpu->kvm))
2105                 gmap_remove(vcpu->arch.gmap);
2106
2107         if (vcpu->kvm->arch.use_cmma)
2108                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2109         free_page((unsigned long)(vcpu->arch.sie_block));
2110
2111         kvm_vcpu_uninit(vcpu);
2112         kmem_cache_free(kvm_vcpu_cache, vcpu);
2113 }
2114
2115 static void kvm_free_vcpus(struct kvm *kvm)
2116 {
2117         unsigned int i;
2118         struct kvm_vcpu *vcpu;
2119
2120         kvm_for_each_vcpu(i, vcpu, kvm)
2121                 kvm_arch_vcpu_destroy(vcpu);
2122
2123         mutex_lock(&kvm->lock);
2124         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2125                 kvm->vcpus[i] = NULL;
2126
2127         atomic_set(&kvm->online_vcpus, 0);
2128         mutex_unlock(&kvm->lock);
2129 }
2130
2131 void kvm_arch_destroy_vm(struct kvm *kvm)
2132 {
2133         kvm_free_vcpus(kvm);
2134         sca_dispose(kvm);
2135         debug_unregister(kvm->arch.dbf);
2136         kvm_s390_gisa_destroy(kvm);
2137         free_page((unsigned long)kvm->arch.sie_page2);
2138         if (!kvm_is_ucontrol(kvm))
2139                 gmap_remove(kvm->arch.gmap);
2140         kvm_s390_destroy_adapters(kvm);
2141         kvm_s390_clear_float_irqs(kvm);
2142         kvm_s390_vsie_destroy(kvm);
2143         if (kvm->arch.migration_state) {
2144                 vfree(kvm->arch.migration_state->pgste_bitmap);
2145                 kfree(kvm->arch.migration_state);
2146         }
2147         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2148 }
2149
2150 /* Section: vcpu related */
2151 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2152 {
2153         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2154         if (!vcpu->arch.gmap)
2155                 return -ENOMEM;
2156         vcpu->arch.gmap->private = vcpu->kvm;
2157
2158         return 0;
2159 }
2160
2161 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2162 {
2163         if (!kvm_s390_use_sca_entries())
2164                 return;
2165         read_lock(&vcpu->kvm->arch.sca_lock);
2166         if (vcpu->kvm->arch.use_esca) {
2167                 struct esca_block *sca = vcpu->kvm->arch.sca;
2168
2169                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2170                 sca->cpu[vcpu->vcpu_id].sda = 0;
2171         } else {
2172                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2173
2174                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2175                 sca->cpu[vcpu->vcpu_id].sda = 0;
2176         }
2177         read_unlock(&vcpu->kvm->arch.sca_lock);
2178 }
2179
2180 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2181 {
2182         if (!kvm_s390_use_sca_entries()) {
2183                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2184
2185                 /* we still need the basic sca for the ipte control */
2186                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2187                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2188                 return;
2189         }
2190         read_lock(&vcpu->kvm->arch.sca_lock);
2191         if (vcpu->kvm->arch.use_esca) {
2192                 struct esca_block *sca = vcpu->kvm->arch.sca;
2193
2194                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2195                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2196                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2197                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2198                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2199         } else {
2200                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2201
2202                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2203                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2204                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2205                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2206         }
2207         read_unlock(&vcpu->kvm->arch.sca_lock);
2208 }
2209
2210 /* Basic SCA to Extended SCA data copy routines */
2211 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2212 {
2213         d->sda = s->sda;
2214         d->sigp_ctrl.c = s->sigp_ctrl.c;
2215         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2216 }
2217
2218 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2219 {
2220         int i;
2221
2222         d->ipte_control = s->ipte_control;
2223         d->mcn[0] = s->mcn;
2224         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2225                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2226 }
2227
2228 static int sca_switch_to_extended(struct kvm *kvm)
2229 {
2230         struct bsca_block *old_sca = kvm->arch.sca;
2231         struct esca_block *new_sca;
2232         struct kvm_vcpu *vcpu;
2233         unsigned int vcpu_idx;
2234         u32 scaol, scaoh;
2235
2236         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2237         if (!new_sca)
2238                 return -ENOMEM;
2239
2240         scaoh = (u32)((u64)(new_sca) >> 32);
2241         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2242
2243         kvm_s390_vcpu_block_all(kvm);
2244         write_lock(&kvm->arch.sca_lock);
2245
2246         sca_copy_b_to_e(new_sca, old_sca);
2247
2248         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2249                 vcpu->arch.sie_block->scaoh = scaoh;
2250                 vcpu->arch.sie_block->scaol = scaol;
2251                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2252         }
2253         kvm->arch.sca = new_sca;
2254         kvm->arch.use_esca = 1;
2255
2256         write_unlock(&kvm->arch.sca_lock);
2257         kvm_s390_vcpu_unblock_all(kvm);
2258
2259         free_page((unsigned long)old_sca);
2260
2261         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2262                  old_sca, kvm->arch.sca);
2263         return 0;
2264 }
2265
2266 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2267 {
2268         int rc;
2269
2270         if (!kvm_s390_use_sca_entries()) {
2271                 if (id < KVM_MAX_VCPUS)
2272                         return true;
2273                 return false;
2274         }
2275         if (id < KVM_S390_BSCA_CPU_SLOTS)
2276                 return true;
2277         if (!sclp.has_esca || !sclp.has_64bscao)
2278                 return false;
2279
2280         mutex_lock(&kvm->lock);
2281         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2282         mutex_unlock(&kvm->lock);
2283
2284         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2285 }
2286
2287 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2288 {
2289         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2290         kvm_clear_async_pf_completion_queue(vcpu);
2291         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2292                                     KVM_SYNC_GPRS |
2293                                     KVM_SYNC_ACRS |
2294                                     KVM_SYNC_CRS |
2295                                     KVM_SYNC_ARCH0 |
2296                                     KVM_SYNC_PFAULT;
2297         kvm_s390_set_prefix(vcpu, 0);
2298         if (test_kvm_facility(vcpu->kvm, 64))
2299                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2300         if (test_kvm_facility(vcpu->kvm, 82))
2301                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2302         if (test_kvm_facility(vcpu->kvm, 133))
2303                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2304         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2305          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2306          */
2307         if (MACHINE_HAS_VX)
2308                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2309         else
2310                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2311
2312         if (kvm_is_ucontrol(vcpu->kvm))
2313                 return __kvm_ucontrol_vcpu_init(vcpu);
2314
2315         return 0;
2316 }
2317
2318 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2319 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2320 {
2321         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2322         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2323         vcpu->arch.cputm_start = get_tod_clock_fast();
2324         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2325 }
2326
2327 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2328 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2329 {
2330         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2331         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2332         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2333         vcpu->arch.cputm_start = 0;
2334         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2335 }
2336
2337 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2338 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2339 {
2340         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2341         vcpu->arch.cputm_enabled = true;
2342         __start_cpu_timer_accounting(vcpu);
2343 }
2344
2345 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2346 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2347 {
2348         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2349         __stop_cpu_timer_accounting(vcpu);
2350         vcpu->arch.cputm_enabled = false;
2351 }
2352
2353 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2354 {
2355         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2356         __enable_cpu_timer_accounting(vcpu);
2357         preempt_enable();
2358 }
2359
2360 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2361 {
2362         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2363         __disable_cpu_timer_accounting(vcpu);
2364         preempt_enable();
2365 }
2366
2367 /* set the cpu timer - may only be called from the VCPU thread itself */
2368 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2369 {
2370         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2371         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2372         if (vcpu->arch.cputm_enabled)
2373                 vcpu->arch.cputm_start = get_tod_clock_fast();
2374         vcpu->arch.sie_block->cputm = cputm;
2375         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2376         preempt_enable();
2377 }
2378
2379 /* update and get the cpu timer - can also be called from other VCPU threads */
2380 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2381 {
2382         unsigned int seq;
2383         __u64 value;
2384
2385         if (unlikely(!vcpu->arch.cputm_enabled))
2386                 return vcpu->arch.sie_block->cputm;
2387
2388         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2389         do {
2390                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2391                 /*
2392                  * If the writer would ever execute a read in the critical
2393                  * section, e.g. in irq context, we have a deadlock.
2394                  */
2395                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2396                 value = vcpu->arch.sie_block->cputm;
2397                 /* if cputm_start is 0, accounting is being started/stopped */
2398                 if (likely(vcpu->arch.cputm_start))
2399                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2400         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2401         preempt_enable();
2402         return value;
2403 }
2404
2405 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2406 {
2407
2408         gmap_enable(vcpu->arch.enabled_gmap);
2409         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2410         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2411                 __start_cpu_timer_accounting(vcpu);
2412         vcpu->cpu = cpu;
2413 }
2414
2415 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2416 {
2417         vcpu->cpu = -1;
2418         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2419                 __stop_cpu_timer_accounting(vcpu);
2420         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2421         vcpu->arch.enabled_gmap = gmap_get_enabled();
2422         gmap_disable(vcpu->arch.enabled_gmap);
2423
2424 }
2425
2426 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2427 {
2428         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2429         vcpu->arch.sie_block->gpsw.mask = 0UL;
2430         vcpu->arch.sie_block->gpsw.addr = 0UL;
2431         kvm_s390_set_prefix(vcpu, 0);
2432         kvm_s390_set_cpu_timer(vcpu, 0);
2433         vcpu->arch.sie_block->ckc       = 0UL;
2434         vcpu->arch.sie_block->todpr     = 0;
2435         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2436         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2437         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2438         /* make sure the new fpc will be lazily loaded */
2439         save_fpu_regs();
2440         current->thread.fpu.fpc = 0;
2441         vcpu->arch.sie_block->gbea = 1;
2442         vcpu->arch.sie_block->pp = 0;
2443         vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2444         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2445         kvm_clear_async_pf_completion_queue(vcpu);
2446         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2447                 kvm_s390_vcpu_stop(vcpu);
2448         kvm_s390_clear_local_irqs(vcpu);
2449 }
2450
2451 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2452 {
2453         mutex_lock(&vcpu->kvm->lock);
2454         preempt_disable();
2455         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2456         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2457         preempt_enable();
2458         mutex_unlock(&vcpu->kvm->lock);
2459         if (!kvm_is_ucontrol(vcpu->kvm)) {
2460                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2461                 sca_add_vcpu(vcpu);
2462         }
2463         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2464                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2465         /* make vcpu_load load the right gmap on the first trigger */
2466         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2467 }
2468
2469 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2470 {
2471         if (!test_kvm_facility(vcpu->kvm, 76))
2472                 return;
2473
2474         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2475
2476         if (vcpu->kvm->arch.crypto.aes_kw)
2477                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2478         if (vcpu->kvm->arch.crypto.dea_kw)
2479                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2480
2481         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2482 }
2483
2484 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2485 {
2486         free_page(vcpu->arch.sie_block->cbrlo);
2487         vcpu->arch.sie_block->cbrlo = 0;
2488 }
2489
2490 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2491 {
2492         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2493         if (!vcpu->arch.sie_block->cbrlo)
2494                 return -ENOMEM;
2495         return 0;
2496 }
2497
2498 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2499 {
2500         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2501
2502         vcpu->arch.sie_block->ibc = model->ibc;
2503         if (test_kvm_facility(vcpu->kvm, 7))
2504                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2505 }
2506
2507 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2508 {
2509         int rc = 0;
2510
2511         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2512                                                     CPUSTAT_SM |
2513                                                     CPUSTAT_STOPPED);
2514
2515         if (test_kvm_facility(vcpu->kvm, 78))
2516                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2517         else if (test_kvm_facility(vcpu->kvm, 8))
2518                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2519
2520         kvm_s390_vcpu_setup_model(vcpu);
2521
2522         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2523         if (MACHINE_HAS_ESOP)
2524                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2525         if (test_kvm_facility(vcpu->kvm, 9))
2526                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2527         if (test_kvm_facility(vcpu->kvm, 73))
2528                 vcpu->arch.sie_block->ecb |= ECB_TE;
2529
2530         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2531                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2532         if (test_kvm_facility(vcpu->kvm, 130))
2533                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2534         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2535         if (sclp.has_cei)
2536                 vcpu->arch.sie_block->eca |= ECA_CEI;
2537         if (sclp.has_ib)
2538                 vcpu->arch.sie_block->eca |= ECA_IB;
2539         if (sclp.has_siif)
2540                 vcpu->arch.sie_block->eca |= ECA_SII;
2541         if (sclp.has_sigpif)
2542                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2543         if (test_kvm_facility(vcpu->kvm, 129)) {
2544                 vcpu->arch.sie_block->eca |= ECA_VX;
2545                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2546         }
2547         if (test_kvm_facility(vcpu->kvm, 139))
2548                 vcpu->arch.sie_block->ecd |= ECD_MEF;
2549
2550         if (vcpu->arch.sie_block->gd) {
2551                 vcpu->arch.sie_block->eca |= ECA_AIV;
2552                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2553                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2554         }
2555         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2556                                         | SDNXC;
2557         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2558
2559         if (sclp.has_kss)
2560                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2561         else
2562                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2563
2564         if (vcpu->kvm->arch.use_cmma) {
2565                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2566                 if (rc)
2567                         return rc;
2568         }
2569         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2570         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2571
2572         kvm_s390_vcpu_crypto_setup(vcpu);
2573
2574         return rc;
2575 }
2576
2577 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2578                                       unsigned int id)
2579 {
2580         struct kvm_vcpu *vcpu;
2581         struct sie_page *sie_page;
2582         int rc = -EINVAL;
2583
2584         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2585                 goto out;
2586
2587         rc = -ENOMEM;
2588
2589         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2590         if (!vcpu)
2591                 goto out;
2592
2593         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2594         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2595         if (!sie_page)
2596                 goto out_free_cpu;
2597
2598         vcpu->arch.sie_block = &sie_page->sie_block;
2599         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2600
2601         /* the real guest size will always be smaller than msl */
2602         vcpu->arch.sie_block->mso = 0;
2603         vcpu->arch.sie_block->msl = sclp.hamax;
2604
2605         vcpu->arch.sie_block->icpua = id;
2606         spin_lock_init(&vcpu->arch.local_int.lock);
2607         vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
2608         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2609                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2610         seqcount_init(&vcpu->arch.cputm_seqcount);
2611
2612         rc = kvm_vcpu_init(vcpu, kvm, id);
2613         if (rc)
2614                 goto out_free_sie_block;
2615         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2616                  vcpu->arch.sie_block);
2617         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2618
2619         return vcpu;
2620 out_free_sie_block:
2621         free_page((unsigned long)(vcpu->arch.sie_block));
2622 out_free_cpu:
2623         kmem_cache_free(kvm_vcpu_cache, vcpu);
2624 out:
2625         return ERR_PTR(rc);
2626 }
2627
2628 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2629 {
2630         return kvm_s390_vcpu_has_irq(vcpu, 0);
2631 }
2632
2633 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2634 {
2635         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2636 }
2637
2638 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2639 {
2640         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2641         exit_sie(vcpu);
2642 }
2643
2644 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2645 {
2646         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2647 }
2648
2649 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2650 {
2651         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2652         exit_sie(vcpu);
2653 }
2654
2655 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2656 {
2657         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2658 }
2659
2660 /*
2661  * Kick a guest cpu out of SIE and wait until SIE is not running.
2662  * If the CPU is not running (e.g. waiting as idle) the function will
2663  * return immediately. */
2664 void exit_sie(struct kvm_vcpu *vcpu)
2665 {
2666         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
2667         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2668                 cpu_relax();
2669 }
2670
2671 /* Kick a guest cpu out of SIE to process a request synchronously */
2672 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2673 {
2674         kvm_make_request(req, vcpu);
2675         kvm_s390_vcpu_request(vcpu);
2676 }
2677
2678 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2679                               unsigned long end)
2680 {
2681         struct kvm *kvm = gmap->private;
2682         struct kvm_vcpu *vcpu;
2683         unsigned long prefix;
2684         int i;
2685
2686         if (gmap_is_shadow(gmap))
2687                 return;
2688         if (start >= 1UL << 31)
2689                 /* We are only interested in prefix pages */
2690                 return;
2691         kvm_for_each_vcpu(i, vcpu, kvm) {
2692                 /* match against both prefix pages */
2693                 prefix = kvm_s390_get_prefix(vcpu);
2694                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2695                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2696                                    start, end);
2697                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2698                 }
2699         }
2700 }
2701
2702 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2703 {
2704         /* kvm common code refers to this, but never calls it */
2705         BUG();
2706         return 0;
2707 }
2708
2709 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2710                                            struct kvm_one_reg *reg)
2711 {
2712         int r = -EINVAL;
2713
2714         switch (reg->id) {
2715         case KVM_REG_S390_TODPR:
2716                 r = put_user(vcpu->arch.sie_block->todpr,
2717                              (u32 __user *)reg->addr);
2718                 break;
2719         case KVM_REG_S390_EPOCHDIFF:
2720                 r = put_user(vcpu->arch.sie_block->epoch,
2721                              (u64 __user *)reg->addr);
2722                 break;
2723         case KVM_REG_S390_CPU_TIMER:
2724                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2725                              (u64 __user *)reg->addr);
2726                 break;
2727         case KVM_REG_S390_CLOCK_COMP:
2728                 r = put_user(vcpu->arch.sie_block->ckc,
2729                              (u64 __user *)reg->addr);
2730                 break;
2731         case KVM_REG_S390_PFTOKEN:
2732                 r = put_user(vcpu->arch.pfault_token,
2733                              (u64 __user *)reg->addr);
2734                 break;
2735         case KVM_REG_S390_PFCOMPARE:
2736                 r = put_user(vcpu->arch.pfault_compare,
2737                              (u64 __user *)reg->addr);
2738                 break;
2739         case KVM_REG_S390_PFSELECT:
2740                 r = put_user(vcpu->arch.pfault_select,
2741                              (u64 __user *)reg->addr);
2742                 break;
2743         case KVM_REG_S390_PP:
2744                 r = put_user(vcpu->arch.sie_block->pp,
2745                              (u64 __user *)reg->addr);
2746                 break;
2747         case KVM_REG_S390_GBEA:
2748                 r = put_user(vcpu->arch.sie_block->gbea,
2749                              (u64 __user *)reg->addr);
2750                 break;
2751         default:
2752                 break;
2753         }
2754
2755         return r;
2756 }
2757
2758 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2759                                            struct kvm_one_reg *reg)
2760 {
2761         int r = -EINVAL;
2762         __u64 val;
2763
2764         switch (reg->id) {
2765         case KVM_REG_S390_TODPR:
2766                 r = get_user(vcpu->arch.sie_block->todpr,
2767                              (u32 __user *)reg->addr);
2768                 break;
2769         case KVM_REG_S390_EPOCHDIFF:
2770                 r = get_user(vcpu->arch.sie_block->epoch,
2771                              (u64 __user *)reg->addr);
2772                 break;
2773         case KVM_REG_S390_CPU_TIMER:
2774                 r = get_user(val, (u64 __user *)reg->addr);
2775                 if (!r)
2776                         kvm_s390_set_cpu_timer(vcpu, val);
2777                 break;
2778         case KVM_REG_S390_CLOCK_COMP:
2779                 r = get_user(vcpu->arch.sie_block->ckc,
2780                              (u64 __user *)reg->addr);
2781                 break;
2782         case KVM_REG_S390_PFTOKEN:
2783                 r = get_user(vcpu->arch.pfault_token,
2784                              (u64 __user *)reg->addr);
2785                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2786                         kvm_clear_async_pf_completion_queue(vcpu);
2787                 break;
2788         case KVM_REG_S390_PFCOMPARE:
2789                 r = get_user(vcpu->arch.pfault_compare,
2790                              (u64 __user *)reg->addr);
2791                 break;
2792         case KVM_REG_S390_PFSELECT:
2793                 r = get_user(vcpu->arch.pfault_select,
2794                              (u64 __user *)reg->addr);
2795                 break;
2796         case KVM_REG_S390_PP:
2797                 r = get_user(vcpu->arch.sie_block->pp,
2798                              (u64 __user *)reg->addr);
2799                 break;
2800         case KVM_REG_S390_GBEA:
2801                 r = get_user(vcpu->arch.sie_block->gbea,
2802                              (u64 __user *)reg->addr);
2803                 break;
2804         default:
2805                 break;
2806         }
2807
2808         return r;
2809 }
2810
2811 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2812 {
2813         kvm_s390_vcpu_initial_reset(vcpu);
2814         return 0;
2815 }
2816
2817 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2818 {
2819         vcpu_load(vcpu);
2820         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2821         vcpu_put(vcpu);
2822         return 0;
2823 }
2824
2825 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2826 {
2827         vcpu_load(vcpu);
2828         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2829         vcpu_put(vcpu);
2830         return 0;
2831 }
2832
2833 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2834                                   struct kvm_sregs *sregs)
2835 {
2836         vcpu_load(vcpu);
2837
2838         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2839         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2840
2841         vcpu_put(vcpu);
2842         return 0;
2843 }
2844
2845 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2846                                   struct kvm_sregs *sregs)
2847 {
2848         vcpu_load(vcpu);
2849
2850         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2851         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2852
2853         vcpu_put(vcpu);
2854         return 0;
2855 }
2856
2857 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2858 {
2859         int ret = 0;
2860
2861         vcpu_load(vcpu);
2862
2863         if (test_fp_ctl(fpu->fpc)) {
2864                 ret = -EINVAL;
2865                 goto out;
2866         }
2867         vcpu->run->s.regs.fpc = fpu->fpc;
2868         if (MACHINE_HAS_VX)
2869                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2870                                  (freg_t *) fpu->fprs);
2871         else
2872                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2873
2874 out:
2875         vcpu_put(vcpu);
2876         return ret;
2877 }
2878
2879 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2880 {
2881         vcpu_load(vcpu);
2882
2883         /* make sure we have the latest values */
2884         save_fpu_regs();
2885         if (MACHINE_HAS_VX)
2886                 convert_vx_to_fp((freg_t *) fpu->fprs,
2887                                  (__vector128 *) vcpu->run->s.regs.vrs);
2888         else
2889                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2890         fpu->fpc = vcpu->run->s.regs.fpc;
2891
2892         vcpu_put(vcpu);
2893         return 0;
2894 }
2895
2896 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2897 {
2898         int rc = 0;
2899
2900         if (!is_vcpu_stopped(vcpu))
2901                 rc = -EBUSY;
2902         else {
2903                 vcpu->run->psw_mask = psw.mask;
2904                 vcpu->run->psw_addr = psw.addr;
2905         }
2906         return rc;
2907 }
2908
2909 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2910                                   struct kvm_translation *tr)
2911 {
2912         return -EINVAL; /* not implemented yet */
2913 }
2914
2915 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2916                               KVM_GUESTDBG_USE_HW_BP | \
2917                               KVM_GUESTDBG_ENABLE)
2918
2919 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2920                                         struct kvm_guest_debug *dbg)
2921 {
2922         int rc = 0;
2923
2924         vcpu_load(vcpu);
2925
2926         vcpu->guest_debug = 0;
2927         kvm_s390_clear_bp_data(vcpu);
2928
2929         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
2930                 rc = -EINVAL;
2931                 goto out;
2932         }
2933         if (!sclp.has_gpere) {
2934                 rc = -EINVAL;
2935                 goto out;
2936         }
2937
2938         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2939                 vcpu->guest_debug = dbg->control;
2940                 /* enforce guest PER */
2941                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
2942
2943                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2944                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2945         } else {
2946                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
2947                 vcpu->arch.guestdbg.last_bp = 0;
2948         }
2949
2950         if (rc) {
2951                 vcpu->guest_debug = 0;
2952                 kvm_s390_clear_bp_data(vcpu);
2953                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
2954         }
2955
2956 out:
2957         vcpu_put(vcpu);
2958         return rc;
2959 }
2960
2961 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2962                                     struct kvm_mp_state *mp_state)
2963 {
2964         int ret;
2965
2966         vcpu_load(vcpu);
2967
2968         /* CHECK_STOP and LOAD are not supported yet */
2969         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2970                                       KVM_MP_STATE_OPERATING;
2971
2972         vcpu_put(vcpu);
2973         return ret;
2974 }
2975
2976 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2977                                     struct kvm_mp_state *mp_state)
2978 {
2979         int rc = 0;
2980
2981         vcpu_load(vcpu);
2982
2983         /* user space knows about this interface - let it control the state */
2984         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2985
2986         switch (mp_state->mp_state) {
2987         case KVM_MP_STATE_STOPPED:
2988                 kvm_s390_vcpu_stop(vcpu);
2989                 break;
2990         case KVM_MP_STATE_OPERATING:
2991                 kvm_s390_vcpu_start(vcpu);
2992                 break;
2993         case KVM_MP_STATE_LOAD:
2994         case KVM_MP_STATE_CHECK_STOP:
2995                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2996         default:
2997                 rc = -ENXIO;
2998         }
2999
3000         vcpu_put(vcpu);
3001         return rc;
3002 }
3003
3004 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3005 {
3006         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3007 }
3008
3009 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3010 {
3011 retry:
3012         kvm_s390_vcpu_request_handled(vcpu);
3013         if (!kvm_request_pending(vcpu))
3014                 return 0;
3015         /*
3016          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3017          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3018          * This ensures that the ipte instruction for this request has
3019          * already finished. We might race against a second unmapper that
3020          * wants to set the blocking bit. Lets just retry the request loop.
3021          */
3022         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3023                 int rc;
3024                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3025                                           kvm_s390_get_prefix(vcpu),
3026                                           PAGE_SIZE * 2, PROT_WRITE);
3027                 if (rc) {
3028                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3029                         return rc;
3030                 }
3031                 goto retry;
3032         }
3033
3034         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3035                 vcpu->arch.sie_block->ihcpu = 0xffff;
3036                 goto retry;
3037         }
3038
3039         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3040                 if (!ibs_enabled(vcpu)) {
3041                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3042                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3043                 }
3044                 goto retry;
3045         }
3046
3047         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3048                 if (ibs_enabled(vcpu)) {
3049                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3050                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3051                 }
3052                 goto retry;
3053         }
3054
3055         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3056                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3057                 goto retry;
3058         }
3059
3060         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3061                 /*
3062                  * Disable CMM virtualization; we will emulate the ESSA
3063                  * instruction manually, in order to provide additional
3064                  * functionalities needed for live migration.
3065                  */
3066                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3067                 goto retry;
3068         }
3069
3070         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3071                 /*
3072                  * Re-enable CMM virtualization if CMMA is available and
3073                  * CMM has been used.
3074                  */
3075                 if ((vcpu->kvm->arch.use_cmma) &&
3076                     (vcpu->kvm->mm->context.uses_cmm))
3077                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3078                 goto retry;
3079         }
3080
3081         /* nothing to do, just clear the request */
3082         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3083
3084         return 0;
3085 }
3086
3087 void kvm_s390_set_tod_clock(struct kvm *kvm,
3088                             const struct kvm_s390_vm_tod_clock *gtod)
3089 {
3090         struct kvm_vcpu *vcpu;
3091         struct kvm_s390_tod_clock_ext htod;
3092         int i;
3093
3094         mutex_lock(&kvm->lock);
3095         preempt_disable();
3096
3097         get_tod_clock_ext((char *)&htod);
3098
3099         kvm->arch.epoch = gtod->tod - htod.tod;
3100         kvm->arch.epdx = 0;
3101         if (test_kvm_facility(kvm, 139)) {
3102                 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3103                 if (kvm->arch.epoch > gtod->tod)
3104                         kvm->arch.epdx -= 1;
3105         }
3106
3107         kvm_s390_vcpu_block_all(kvm);
3108         kvm_for_each_vcpu(i, vcpu, kvm) {
3109                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3110                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3111         }
3112
3113         kvm_s390_vcpu_unblock_all(kvm);
3114         preempt_enable();
3115         mutex_unlock(&kvm->lock);
3116 }
3117
3118 /**
3119  * kvm_arch_fault_in_page - fault-in guest page if necessary
3120  * @vcpu: The corresponding virtual cpu
3121  * @gpa: Guest physical address
3122  * @writable: Whether the page should be writable or not
3123  *
3124  * Make sure that a guest page has been faulted-in on the host.
3125  *
3126  * Return: Zero on success, negative error code otherwise.
3127  */
3128 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3129 {
3130         return gmap_fault(vcpu->arch.gmap, gpa,
3131                           writable ? FAULT_FLAG_WRITE : 0);
3132 }
3133
3134 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3135                                       unsigned long token)
3136 {
3137         struct kvm_s390_interrupt inti;
3138         struct kvm_s390_irq irq;
3139
3140         if (start_token) {
3141                 irq.u.ext.ext_params2 = token;
3142                 irq.type = KVM_S390_INT_PFAULT_INIT;
3143                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3144         } else {
3145                 inti.type = KVM_S390_INT_PFAULT_DONE;
3146                 inti.parm64 = token;
3147                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3148         }
3149 }
3150
3151 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3152                                      struct kvm_async_pf *work)
3153 {
3154         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3155         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3156 }
3157
3158 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3159                                  struct kvm_async_pf *work)
3160 {
3161         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3162         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3163 }
3164
3165 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3166                                struct kvm_async_pf *work)
3167 {
3168         /* s390 will always inject the page directly */
3169 }
3170
3171 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3172 {
3173         /*
3174          * s390 will always inject the page directly,
3175          * but we still want check_async_completion to cleanup
3176          */
3177         return true;
3178 }
3179
3180 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3181 {
3182         hva_t hva;
3183         struct kvm_arch_async_pf arch;
3184         int rc;
3185
3186         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3187                 return 0;
3188         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3189             vcpu->arch.pfault_compare)
3190                 return 0;
3191         if (psw_extint_disabled(vcpu))
3192                 return 0;
3193         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3194                 return 0;
3195         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3196                 return 0;
3197         if (!vcpu->arch.gmap->pfault_enabled)
3198                 return 0;
3199
3200         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3201         hva += current->thread.gmap_addr & ~PAGE_MASK;
3202         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3203                 return 0;
3204
3205         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3206         return rc;
3207 }
3208
3209 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3210 {
3211         int rc, cpuflags;
3212
3213         /*
3214          * On s390 notifications for arriving pages will be delivered directly
3215          * to the guest but the house keeping for completed pfaults is
3216          * handled outside the worker.
3217          */
3218         kvm_check_async_pf_completion(vcpu);
3219
3220         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3221         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3222
3223         if (need_resched())
3224                 schedule();
3225
3226         if (test_cpu_flag(CIF_MCCK_PENDING))
3227                 s390_handle_mcck();
3228
3229         if (!kvm_is_ucontrol(vcpu->kvm)) {
3230                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3231                 if (rc)
3232                         return rc;
3233         }
3234
3235         rc = kvm_s390_handle_requests(vcpu);
3236         if (rc)
3237                 return rc;
3238
3239         if (guestdbg_enabled(vcpu)) {
3240                 kvm_s390_backup_guest_per_regs(vcpu);
3241                 kvm_s390_patch_guest_per_regs(vcpu);
3242         }
3243
3244         vcpu->arch.sie_block->icptcode = 0;
3245         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3246         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3247         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3248
3249         return 0;
3250 }
3251
3252 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3253 {
3254         struct kvm_s390_pgm_info pgm_info = {
3255                 .code = PGM_ADDRESSING,
3256         };
3257         u8 opcode, ilen;
3258         int rc;
3259
3260         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3261         trace_kvm_s390_sie_fault(vcpu);
3262
3263         /*
3264          * We want to inject an addressing exception, which is defined as a
3265          * suppressing or terminating exception. However, since we came here
3266          * by a DAT access exception, the PSW still points to the faulting
3267          * instruction since DAT exceptions are nullifying. So we've got
3268          * to look up the current opcode to get the length of the instruction
3269          * to be able to forward the PSW.
3270          */
3271         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3272         ilen = insn_length(opcode);
3273         if (rc < 0) {
3274                 return rc;
3275         } else if (rc) {
3276                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3277                  * Forward by arbitrary ilc, injection will take care of
3278                  * nullification if necessary.
3279                  */
3280                 pgm_info = vcpu->arch.pgm;
3281                 ilen = 4;
3282         }
3283         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3284         kvm_s390_forward_psw(vcpu, ilen);
3285         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3286 }
3287
3288 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3289 {
3290         struct mcck_volatile_info *mcck_info;
3291         struct sie_page *sie_page;
3292
3293         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3294                    vcpu->arch.sie_block->icptcode);
3295         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3296
3297         if (guestdbg_enabled(vcpu))
3298                 kvm_s390_restore_guest_per_regs(vcpu);
3299
3300         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3301         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3302
3303         if (exit_reason == -EINTR) {
3304                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3305                 sie_page = container_of(vcpu->arch.sie_block,
3306                                         struct sie_page, sie_block);
3307                 mcck_info = &sie_page->mcck_info;
3308                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3309                 return 0;
3310         }
3311
3312         if (vcpu->arch.sie_block->icptcode > 0) {
3313                 int rc = kvm_handle_sie_intercept(vcpu);
3314
3315                 if (rc != -EOPNOTSUPP)
3316                         return rc;
3317                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3318                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3319                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3320                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3321                 return -EREMOTE;
3322         } else if (exit_reason != -EFAULT) {
3323                 vcpu->stat.exit_null++;
3324                 return 0;
3325         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3326                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3327                 vcpu->run->s390_ucontrol.trans_exc_code =
3328                                                 current->thread.gmap_addr;
3329                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3330                 return -EREMOTE;
3331         } else if (current->thread.gmap_pfault) {
3332                 trace_kvm_s390_major_guest_pfault(vcpu);
3333                 current->thread.gmap_pfault = 0;
3334                 if (kvm_arch_setup_async_pf(vcpu))
3335                         return 0;
3336                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3337         }
3338         return vcpu_post_run_fault_in_sie(vcpu);
3339 }
3340
3341 static int __vcpu_run(struct kvm_vcpu *vcpu)
3342 {
3343         int rc, exit_reason;
3344
3345         /*
3346          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3347          * ning the guest), so that memslots (and other stuff) are protected
3348          */
3349         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3350
3351         do {
3352                 rc = vcpu_pre_run(vcpu);
3353                 if (rc)
3354                         break;
3355
3356                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3357                 /*
3358                  * As PF_VCPU will be used in fault handler, between
3359                  * guest_enter and guest_exit should be no uaccess.
3360                  */
3361                 local_irq_disable();
3362                 guest_enter_irqoff();
3363                 __disable_cpu_timer_accounting(vcpu);
3364                 local_irq_enable();
3365                 exit_reason = sie64a(vcpu->arch.sie_block,
3366                                      vcpu->run->s.regs.gprs);
3367                 local_irq_disable();
3368                 __enable_cpu_timer_accounting(vcpu);
3369                 guest_exit_irqoff();
3370                 local_irq_enable();
3371                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3372
3373                 rc = vcpu_post_run(vcpu, exit_reason);
3374         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3375
3376         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3377         return rc;
3378 }
3379
3380 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3381 {
3382         struct runtime_instr_cb *riccb;
3383         struct gs_cb *gscb;
3384
3385         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3386         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3387         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3388         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3389         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3390                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3391         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3392                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3393                 /* some control register changes require a tlb flush */
3394                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3395         }
3396         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3397                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3398                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3399                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3400                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3401                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3402         }
3403         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3404                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3405                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3406                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3407                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3408                         kvm_clear_async_pf_completion_queue(vcpu);
3409         }
3410         /*
3411          * If userspace sets the riccb (e.g. after migration) to a valid state,
3412          * we should enable RI here instead of doing the lazy enablement.
3413          */
3414         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3415             test_kvm_facility(vcpu->kvm, 64) &&
3416             riccb->v &&
3417             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3418                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3419                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3420         }
3421         /*
3422          * If userspace sets the gscb (e.g. after migration) to non-zero,
3423          * we should enable GS here instead of doing the lazy enablement.
3424          */
3425         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3426             test_kvm_facility(vcpu->kvm, 133) &&
3427             gscb->gssm &&
3428             !vcpu->arch.gs_enabled) {
3429                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3430                 vcpu->arch.sie_block->ecb |= ECB_GS;
3431                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3432                 vcpu->arch.gs_enabled = 1;
3433         }
3434         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3435             test_kvm_facility(vcpu->kvm, 82)) {
3436                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3437                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3438         }
3439         save_access_regs(vcpu->arch.host_acrs);
3440         restore_access_regs(vcpu->run->s.regs.acrs);
3441         /* save host (userspace) fprs/vrs */
3442         save_fpu_regs();
3443         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3444         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3445         if (MACHINE_HAS_VX)
3446                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3447         else
3448                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3449         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3450         if (test_fp_ctl(current->thread.fpu.fpc))
3451                 /* User space provided an invalid FPC, let's clear it */
3452                 current->thread.fpu.fpc = 0;
3453         if (MACHINE_HAS_GS) {
3454                 preempt_disable();
3455                 __ctl_set_bit(2, 4);
3456                 if (current->thread.gs_cb) {
3457                         vcpu->arch.host_gscb = current->thread.gs_cb;
3458                         save_gs_cb(vcpu->arch.host_gscb);
3459                 }
3460                 if (vcpu->arch.gs_enabled) {
3461                         current->thread.gs_cb = (struct gs_cb *)
3462                                                 &vcpu->run->s.regs.gscb;
3463                         restore_gs_cb(current->thread.gs_cb);
3464                 }
3465                 preempt_enable();
3466         }
3467
3468         kvm_run->kvm_dirty_regs = 0;
3469 }
3470
3471 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3472 {
3473         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3474         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3475         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3476         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3477         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3478         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3479         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3480         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3481         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3482         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3483         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3484         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3485         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3486         save_access_regs(vcpu->run->s.regs.acrs);
3487         restore_access_regs(vcpu->arch.host_acrs);
3488         /* Save guest register state */
3489         save_fpu_regs();
3490         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3491         /* Restore will be done lazily at return */
3492         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3493         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3494         if (MACHINE_HAS_GS) {
3495                 __ctl_set_bit(2, 4);
3496                 if (vcpu->arch.gs_enabled)
3497                         save_gs_cb(current->thread.gs_cb);
3498                 preempt_disable();
3499                 current->thread.gs_cb = vcpu->arch.host_gscb;
3500                 restore_gs_cb(vcpu->arch.host_gscb);
3501                 preempt_enable();
3502                 if (!vcpu->arch.host_gscb)
3503                         __ctl_clear_bit(2, 4);
3504                 vcpu->arch.host_gscb = NULL;
3505         }
3506
3507 }
3508
3509 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3510 {
3511         int rc;
3512
3513         if (kvm_run->immediate_exit)
3514                 return -EINTR;
3515
3516         vcpu_load(vcpu);
3517
3518         if (guestdbg_exit_pending(vcpu)) {
3519                 kvm_s390_prepare_debug_exit(vcpu);
3520                 rc = 0;
3521                 goto out;
3522         }
3523
3524         kvm_sigset_activate(vcpu);
3525
3526         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3527                 kvm_s390_vcpu_start(vcpu);
3528         } else if (is_vcpu_stopped(vcpu)) {
3529                 pr_err_ratelimited("can't run stopped vcpu %d\n",
3530                                    vcpu->vcpu_id);
3531                 rc = -EINVAL;
3532                 goto out;
3533         }
3534
3535         sync_regs(vcpu, kvm_run);
3536         enable_cpu_timer_accounting(vcpu);
3537
3538         might_fault();
3539         rc = __vcpu_run(vcpu);
3540
3541         if (signal_pending(current) && !rc) {
3542                 kvm_run->exit_reason = KVM_EXIT_INTR;
3543                 rc = -EINTR;
3544         }
3545
3546         if (guestdbg_exit_pending(vcpu) && !rc)  {
3547                 kvm_s390_prepare_debug_exit(vcpu);
3548                 rc = 0;
3549         }
3550
3551         if (rc == -EREMOTE) {
3552                 /* userspace support is needed, kvm_run has been prepared */
3553                 rc = 0;
3554         }
3555
3556         disable_cpu_timer_accounting(vcpu);
3557         store_regs(vcpu, kvm_run);
3558
3559         kvm_sigset_deactivate(vcpu);
3560
3561         vcpu->stat.exit_userspace++;
3562 out:
3563         vcpu_put(vcpu);
3564         return rc;
3565 }
3566
3567 /*
3568  * store status at address
3569  * we use have two special cases:
3570  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3571  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3572  */
3573 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3574 {
3575         unsigned char archmode = 1;
3576         freg_t fprs[NUM_FPRS];
3577         unsigned int px;
3578         u64 clkcomp, cputm;
3579         int rc;
3580
3581         px = kvm_s390_get_prefix(vcpu);
3582         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3583                 if (write_guest_abs(vcpu, 163, &archmode, 1))
3584                         return -EFAULT;
3585                 gpa = 0;
3586         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3587                 if (write_guest_real(vcpu, 163, &archmode, 1))
3588                         return -EFAULT;
3589                 gpa = px;
3590         } else
3591                 gpa -= __LC_FPREGS_SAVE_AREA;
3592
3593         /* manually convert vector registers if necessary */
3594         if (MACHINE_HAS_VX) {
3595                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3596                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3597                                      fprs, 128);
3598         } else {
3599                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3600                                      vcpu->run->s.regs.fprs, 128);
3601         }
3602         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3603                               vcpu->run->s.regs.gprs, 128);
3604         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3605                               &vcpu->arch.sie_block->gpsw, 16);
3606         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3607                               &px, 4);
3608         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3609                               &vcpu->run->s.regs.fpc, 4);
3610         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3611                               &vcpu->arch.sie_block->todpr, 4);
3612         cputm = kvm_s390_get_cpu_timer(vcpu);
3613         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3614                               &cputm, 8);
3615         clkcomp = vcpu->arch.sie_block->ckc >> 8;
3616         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3617                               &clkcomp, 8);
3618         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3619                               &vcpu->run->s.regs.acrs, 64);
3620         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3621                               &vcpu->arch.sie_block->gcr, 128);
3622         return rc ? -EFAULT : 0;
3623 }
3624
3625 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3626 {
3627         /*
3628          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3629          * switch in the run ioctl. Let's update our copies before we save
3630          * it into the save area
3631          */
3632         save_fpu_regs();
3633         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3634         save_access_regs(vcpu->run->s.regs.acrs);
3635
3636         return kvm_s390_store_status_unloaded(vcpu, addr);
3637 }
3638
3639 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3640 {
3641         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3642         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3643 }
3644
3645 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3646 {
3647         unsigned int i;
3648         struct kvm_vcpu *vcpu;
3649
3650         kvm_for_each_vcpu(i, vcpu, kvm) {
3651                 __disable_ibs_on_vcpu(vcpu);
3652         }
3653 }
3654
3655 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3656 {
3657         if (!sclp.has_ibs)
3658                 return;
3659         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3660         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3661 }
3662
3663 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3664 {
3665         int i, online_vcpus, started_vcpus = 0;
3666
3667         if (!is_vcpu_stopped(vcpu))
3668                 return;
3669
3670         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3671         /* Only one cpu at a time may enter/leave the STOPPED state. */
3672         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3673         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3674
3675         for (i = 0; i < online_vcpus; i++) {
3676                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3677                         started_vcpus++;
3678         }
3679
3680         if (started_vcpus == 0) {
3681                 /* we're the only active VCPU -> speed it up */
3682                 __enable_ibs_on_vcpu(vcpu);
3683         } else if (started_vcpus == 1) {
3684                 /*
3685                  * As we are starting a second VCPU, we have to disable
3686                  * the IBS facility on all VCPUs to remove potentially
3687                  * oustanding ENABLE requests.
3688                  */
3689                 __disable_ibs_on_all_vcpus(vcpu->kvm);
3690         }
3691
3692         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
3693         /*
3694          * Another VCPU might have used IBS while we were offline.
3695          * Let's play safe and flush the VCPU at startup.
3696          */
3697         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3698         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3699         return;
3700 }
3701
3702 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3703 {
3704         int i, online_vcpus, started_vcpus = 0;
3705         struct kvm_vcpu *started_vcpu = NULL;
3706
3707         if (is_vcpu_stopped(vcpu))
3708                 return;
3709
3710         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3711         /* Only one cpu at a time may enter/leave the STOPPED state. */
3712         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3713         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3714
3715         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3716         kvm_s390_clear_stop_irq(vcpu);
3717
3718         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
3719         __disable_ibs_on_vcpu(vcpu);
3720
3721         for (i = 0; i < online_vcpus; i++) {
3722                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3723                         started_vcpus++;
3724                         started_vcpu = vcpu->kvm->vcpus[i];
3725                 }
3726         }
3727
3728         if (started_vcpus == 1) {
3729                 /*
3730                  * As we only have one VCPU left, we want to enable the
3731                  * IBS facility for that VCPU to speed it up.
3732                  */
3733                 __enable_ibs_on_vcpu(started_vcpu);
3734         }
3735
3736         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3737         return;
3738 }
3739
3740 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3741                                      struct kvm_enable_cap *cap)
3742 {
3743         int r;
3744
3745         if (cap->flags)
3746                 return -EINVAL;
3747
3748         switch (cap->cap) {
3749         case KVM_CAP_S390_CSS_SUPPORT:
3750                 if (!vcpu->kvm->arch.css_support) {
3751                         vcpu->kvm->arch.css_support = 1;
3752                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3753                         trace_kvm_s390_enable_css(vcpu->kvm);
3754                 }
3755                 r = 0;
3756                 break;
3757         default:
3758                 r = -EINVAL;
3759                 break;
3760         }
3761         return r;
3762 }
3763
3764 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3765                                   struct kvm_s390_mem_op *mop)
3766 {
3767         void __user *uaddr = (void __user *)mop->buf;
3768         void *tmpbuf = NULL;
3769         int r, srcu_idx;
3770         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3771                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3772
3773         if (mop->flags & ~supported_flags)
3774                 return -EINVAL;
3775
3776         if (mop->size > MEM_OP_MAX_SIZE)
3777                 return -E2BIG;
3778
3779         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3780                 tmpbuf = vmalloc(mop->size);
3781                 if (!tmpbuf)
3782                         return -ENOMEM;
3783         }
3784
3785         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3786
3787         switch (mop->op) {
3788         case KVM_S390_MEMOP_LOGICAL_READ:
3789                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3790                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3791                                             mop->size, GACC_FETCH);
3792                         break;
3793                 }
3794                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3795                 if (r == 0) {
3796                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3797                                 r = -EFAULT;
3798                 }
3799                 break;
3800         case KVM_S390_MEMOP_LOGICAL_WRITE:
3801                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3802                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3803                                             mop->size, GACC_STORE);
3804                         break;
3805                 }
3806                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3807                         r = -EFAULT;
3808                         break;
3809                 }
3810                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3811                 break;
3812         default:
3813                 r = -EINVAL;
3814         }
3815
3816         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3817
3818         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3819                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3820
3821         vfree(tmpbuf);
3822         return r;
3823 }
3824
3825 long kvm_arch_vcpu_async_ioctl(struct file *filp,
3826                                unsigned int ioctl, unsigned long arg)
3827 {
3828         struct kvm_vcpu *vcpu = filp->private_data;
3829         void __user *argp = (void __user *)arg;
3830
3831         switch (ioctl) {
3832         case KVM_S390_IRQ: {
3833                 struct kvm_s390_irq s390irq;
3834
3835                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3836                         return -EFAULT;
3837                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
3838         }
3839         case KVM_S390_INTERRUPT: {
3840                 struct kvm_s390_interrupt s390int;
3841                 struct kvm_s390_irq s390irq;
3842
3843                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3844                         return -EFAULT;
3845                 if (s390int_to_s390irq(&s390int, &s390irq))
3846                         return -EINVAL;
3847                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
3848         }
3849         }
3850         return -ENOIOCTLCMD;
3851 }
3852
3853 long kvm_arch_vcpu_ioctl(struct file *filp,
3854                          unsigned int ioctl, unsigned long arg)
3855 {
3856         struct kvm_vcpu *vcpu = filp->private_data;
3857         void __user *argp = (void __user *)arg;
3858         int idx;
3859         long r;
3860
3861         vcpu_load(vcpu);
3862
3863         switch (ioctl) {
3864         case KVM_S390_STORE_STATUS:
3865                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3866                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3867                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3868                 break;
3869         case KVM_S390_SET_INITIAL_PSW: {
3870                 psw_t psw;
3871
3872                 r = -EFAULT;
3873                 if (copy_from_user(&psw, argp, sizeof(psw)))
3874                         break;
3875                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3876                 break;
3877         }
3878         case KVM_S390_INITIAL_RESET:
3879                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3880                 break;
3881         case KVM_SET_ONE_REG:
3882         case KVM_GET_ONE_REG: {
3883                 struct kvm_one_reg reg;
3884                 r = -EFAULT;
3885                 if (copy_from_user(&reg, argp, sizeof(reg)))
3886                         break;
3887                 if (ioctl == KVM_SET_ONE_REG)
3888                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3889                 else
3890                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3891                 break;
3892         }
3893 #ifdef CONFIG_KVM_S390_UCONTROL
3894         case KVM_S390_UCAS_MAP: {
3895                 struct kvm_s390_ucas_mapping ucasmap;
3896
3897                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3898                         r = -EFAULT;
3899                         break;
3900                 }
3901
3902                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3903                         r = -EINVAL;
3904                         break;
3905                 }
3906
3907                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3908                                      ucasmap.vcpu_addr, ucasmap.length);
3909                 break;
3910         }
3911         case KVM_S390_UCAS_UNMAP: {
3912                 struct kvm_s390_ucas_mapping ucasmap;
3913
3914                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3915                         r = -EFAULT;
3916                         break;
3917                 }
3918
3919                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3920                         r = -EINVAL;
3921                         break;
3922                 }
3923
3924                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3925                         ucasmap.length);
3926                 break;
3927         }
3928 #endif
3929         case KVM_S390_VCPU_FAULT: {
3930                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3931                 break;
3932         }
3933         case KVM_ENABLE_CAP:
3934         {
3935                 struct kvm_enable_cap cap;
3936                 r = -EFAULT;
3937                 if (copy_from_user(&cap, argp, sizeof(cap)))
3938                         break;
3939                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3940                 break;
3941         }
3942         case KVM_S390_MEM_OP: {
3943                 struct kvm_s390_mem_op mem_op;
3944
3945                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3946                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3947                 else
3948                         r = -EFAULT;
3949                 break;
3950         }
3951         case KVM_S390_SET_IRQ_STATE: {
3952                 struct kvm_s390_irq_state irq_state;
3953
3954                 r = -EFAULT;
3955                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3956                         break;
3957                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3958                     irq_state.len == 0 ||
3959                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3960                         r = -EINVAL;
3961                         break;
3962                 }
3963                 /* do not use irq_state.flags, it will break old QEMUs */
3964                 r = kvm_s390_set_irq_state(vcpu,
3965                                            (void __user *) irq_state.buf,
3966                                            irq_state.len);
3967                 break;
3968         }
3969         case KVM_S390_GET_IRQ_STATE: {
3970                 struct kvm_s390_irq_state irq_state;
3971
3972                 r = -EFAULT;
3973                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3974                         break;
3975                 if (irq_state.len == 0) {
3976                         r = -EINVAL;
3977                         break;
3978                 }
3979                 /* do not use irq_state.flags, it will break old QEMUs */
3980                 r = kvm_s390_get_irq_state(vcpu,
3981                                            (__u8 __user *)  irq_state.buf,
3982                                            irq_state.len);
3983                 break;
3984         }
3985         default:
3986                 r = -ENOTTY;
3987         }
3988
3989         vcpu_put(vcpu);
3990         return r;
3991 }
3992
3993 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3994 {
3995 #ifdef CONFIG_KVM_S390_UCONTROL
3996         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3997                  && (kvm_is_ucontrol(vcpu->kvm))) {
3998                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3999                 get_page(vmf->page);
4000                 return 0;
4001         }
4002 #endif
4003         return VM_FAULT_SIGBUS;
4004 }
4005
4006 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4007                             unsigned long npages)
4008 {
4009         return 0;
4010 }
4011
4012 /* Section: memory related */
4013 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4014                                    struct kvm_memory_slot *memslot,
4015                                    const struct kvm_userspace_memory_region *mem,
4016                                    enum kvm_mr_change change)
4017 {
4018         /* A few sanity checks. We can have memory slots which have to be
4019            located/ended at a segment boundary (1MB). The memory in userland is
4020            ok to be fragmented into various different vmas. It is okay to mmap()
4021            and munmap() stuff in this slot after doing this call at any time */
4022
4023         if (mem->userspace_addr & 0xffffful)
4024                 return -EINVAL;
4025
4026         if (mem->memory_size & 0xffffful)
4027                 return -EINVAL;
4028
4029         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4030                 return -EINVAL;
4031
4032         return 0;
4033 }
4034
4035 void kvm_arch_commit_memory_region(struct kvm *kvm,
4036                                 const struct kvm_userspace_memory_region *mem,
4037                                 const struct kvm_memory_slot *old,
4038                                 const struct kvm_memory_slot *new,
4039                                 enum kvm_mr_change change)
4040 {
4041         int rc;
4042
4043         /* If the basics of the memslot do not change, we do not want
4044          * to update the gmap. Every update causes several unnecessary
4045          * segment translation exceptions. This is usually handled just
4046          * fine by the normal fault handler + gmap, but it will also
4047          * cause faults on the prefix page of running guest CPUs.
4048          */
4049         if (old->userspace_addr == mem->userspace_addr &&
4050             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4051             old->npages * PAGE_SIZE == mem->memory_size)
4052                 return;
4053
4054         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4055                 mem->guest_phys_addr, mem->memory_size);
4056         if (rc)
4057                 pr_warn("failed to commit memory region\n");
4058         return;
4059 }
4060
4061 static inline unsigned long nonhyp_mask(int i)
4062 {
4063         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4064
4065         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4066 }
4067
4068 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4069 {
4070         vcpu->valid_wakeup = false;
4071 }
4072
4073 static int __init kvm_s390_init(void)
4074 {
4075         int i;
4076
4077         if (!sclp.has_sief2) {
4078                 pr_info("SIE not available\n");
4079                 return -ENODEV;
4080         }
4081
4082         for (i = 0; i < 16; i++)
4083                 kvm_s390_fac_base[i] |=
4084                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4085
4086         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4087 }
4088
4089 static void __exit kvm_s390_exit(void)
4090 {
4091         kvm_exit();
4092 }
4093
4094 module_init(kvm_s390_init);
4095 module_exit(kvm_s390_exit);
4096
4097 /*
4098  * Enable autoloading of the kvm module.
4099  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4100  * since x86 takes a different approach.
4101  */
4102 #include <linux/miscdevice.h>
4103 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4104 MODULE_ALIAS("devname:kvm");