arch/s390/kvm/kvm-s390.c

   1 /*
   2  * hosting zSeries kernel virtual machines
   3  *
   4  * Copyright IBM Corp. 2008, 2009
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License (version 2 only)
   8  * as published by the Free Software Foundation.
   9  *
  10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  11  *               Christian Borntraeger <borntraeger@de.ibm.com>
  12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  14  *               Jason J. Herne <jjherne@us.ibm.com>
  15  */
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/mman.h>
  25 #include <linux/module.h>
  26 #include <linux/random.h>
  27 #include <linux/slab.h>
  28 #include <linux/timer.h>
  29 #include <linux/vmalloc.h>
  30 #include <linux/bitmap.h>
  31 #include <asm/asm-offsets.h>
  32 #include <asm/lowcore.h>
  33 #include <asm/stp.h>
  34 #include <asm/pgtable.h>
  35 #include <asm/gmap.h>
  36 #include <asm/nmi.h>
  37 #include <asm/switch_to.h>
  38 #include <asm/isc.h>
  39 #include <asm/sclp.h>
  40 #include <asm/cpacf.h>
  41 #include <asm/timex.h>
  42 #include "kvm-s390.h"
  43 #include "gaccess.h"
  44
  45 #define KMSG_COMPONENT "kvm-s390"
  46 #undef pr_fmt
  47 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  48
  49 #define CREATE_TRACE_POINTS
  50 #include "trace.h"
  51 #include "trace-s390.h"
  52
  53 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  54 #define LOCAL_IRQS 32
  55 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  56                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  57
  58 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  59
  60 struct kvm_stats_debugfs_item debugfs_entries[] = {
  61         { "userspace_handled", VCPU_STAT(exit_userspace) },
  62         { "exit_null", VCPU_STAT(exit_null) },
  63         { "exit_validity", VCPU_STAT(exit_validity) },
  64         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  65         { "exit_external_request", VCPU_STAT(exit_external_request) },
  66         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  67         { "exit_instruction", VCPU_STAT(exit_instruction) },
  68         { "exit_pei", VCPU_STAT(exit_pei) },
  69         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  70         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  71         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  72         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  73         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  74         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  75         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  76         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  77         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  78         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  79         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  80         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  81         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  82         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  83         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
  84         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  85         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  86         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  87         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
  88         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  89         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
  90         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
  91         { "instruction_spx", VCPU_STAT(instruction_spx) },
  92         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
  93         { "instruction_stap", VCPU_STAT(instruction_stap) },
  94         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
  95         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
  96         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
  97         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
  98         { "instruction_essa", VCPU_STAT(instruction_essa) },
  99         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
 100         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 101         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 102         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 103         { "instruction_sie", VCPU_STAT(instruction_sie) },
 104         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 105         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 106         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 107         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 108         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 109         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 110         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 111         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 112         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 113         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 114         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 115         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 116         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 117         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 118         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 119         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 120         { "diagnose_10", VCPU_STAT(diagnose_10) },
 121         { "diagnose_44", VCPU_STAT(diagnose_44) },
 122         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
 123         { "diagnose_258", VCPU_STAT(diagnose_258) },
 124         { "diagnose_308", VCPU_STAT(diagnose_308) },
 125         { "diagnose_500", VCPU_STAT(diagnose_500) },
 126         { NULL }
 127 };
 128
 129 /* allow nested virtualization in KVM (if enabled by user space) */
 130 static int nested;
 131 module_param(nested, int, S_IRUGO);
 132 MODULE_PARM_DESC(nested, "Nested virtualization support");
 133
 134 /* upper facilities limit for kvm */
 135 unsigned long kvm_s390_fac_list_mask[16] = {
 136         0xffe6000000000000UL,
 137         0x005e000000000000UL,
 138 };
 139
 140 unsigned long kvm_s390_fac_list_mask_size(void)
 141 {
 142         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
 143         return ARRAY_SIZE(kvm_s390_fac_list_mask);
 144 }
 145
 146 /* available cpu features supported by kvm */
 147 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 148 /* available subfunctions indicated via query / "test bit" */
 149 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 150
 151 static struct gmap_notifier gmap_notifier;
 152 static struct gmap_notifier vsie_gmap_notifier;
 153 debug_info_t *kvm_s390_dbf;
 154
 155 /* Section: not file related */
 156 int kvm_arch_hardware_enable(void)
 157 {
 158         /* every s390 is virtualization enabled ;-) */
 159         return 0;
 160 }
 161
 162 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 163                               unsigned long end);
 164
 165 /*
 166  * This callback is executed during stop_machine(). All CPUs are therefore
 167  * temporarily stopped. In order not to change guest behavior, we have to
 168  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 169  * so a CPU won't be stopped while calculating with the epoch.
 170  */
 171 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 172                           void *v)
 173 {
 174         struct kvm *kvm;
 175         struct kvm_vcpu *vcpu;
 176         int i;
 177         unsigned long long *delta = v;
 178
 179         list_for_each_entry(kvm, &vm_list, vm_list) {
 180                 kvm->arch.epoch -= *delta;
 181                 kvm_for_each_vcpu(i, vcpu, kvm) {
 182                         vcpu->arch.sie_block->epoch -= *delta;
 183                         if (vcpu->arch.cputm_enabled)
 184                                 vcpu->arch.cputm_start += *delta;
 185                         if (vcpu->arch.vsie_block)
 186                                 vcpu->arch.vsie_block->epoch -= *delta;
 187                 }
 188         }
 189         return NOTIFY_OK;
 190 }
 191
 192 static struct notifier_block kvm_clock_notifier = {
 193         .notifier_call = kvm_clock_sync,
 194 };
 195
 196 int kvm_arch_hardware_setup(void)
 197 {
 198         gmap_notifier.notifier_call = kvm_gmap_notifier;
 199         gmap_register_pte_notifier(&gmap_notifier);
 200         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 201         gmap_register_pte_notifier(&vsie_gmap_notifier);
 202         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 203                                        &kvm_clock_notifier);
 204         return 0;
 205 }
 206
 207 void kvm_arch_hardware_unsetup(void)
 208 {
 209         gmap_unregister_pte_notifier(&gmap_notifier);
 210         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 211         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 212                                          &kvm_clock_notifier);
 213 }
 214
 215 static void allow_cpu_feat(unsigned long nr)
 216 {
 217         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 218 }
 219
 220 static inline int plo_test_bit(unsigned char nr)
 221 {
 222         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 223         int cc = 3; /* subfunction not available */
 224
 225         asm volatile(
 226                 /* Parameter registers are ignored for "test bit" */
 227                 "       plo     0,0,0,0(0)\n"
 228                 "       ipm     %0\n"
 229                 "       srl     %0,28\n"
 230                 : "=d" (cc)
 231                 : "d" (r0)
 232                 : "cc");
 233         return cc == 0;
 234 }
 235
 236 static void kvm_s390_cpu_feat_init(void)
 237 {
 238         int i;
 239
 240         for (i = 0; i < 256; ++i) {
 241                 if (plo_test_bit(i))
 242                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 243         }
 244
 245         if (test_facility(28)) /* TOD-clock steering */
 246                 ptff(kvm_s390_available_subfunc.ptff,
 247                      sizeof(kvm_s390_available_subfunc.ptff),
 248                      PTFF_QAF);
 249
 250         if (test_facility(17)) { /* MSA */
 251                 __cpacf_query(CPACF_KMAC, kvm_s390_available_subfunc.kmac);
 252                 __cpacf_query(CPACF_KMC, kvm_s390_available_subfunc.kmc);
 253                 __cpacf_query(CPACF_KM, kvm_s390_available_subfunc.km);
 254                 __cpacf_query(CPACF_KIMD, kvm_s390_available_subfunc.kimd);
 255                 __cpacf_query(CPACF_KLMD, kvm_s390_available_subfunc.klmd);
 256         }
 257         if (test_facility(76)) /* MSA3 */
 258                 __cpacf_query(CPACF_PCKMO, kvm_s390_available_subfunc.pckmo);
 259         if (test_facility(77)) { /* MSA4 */
 260                 __cpacf_query(CPACF_KMCTR, kvm_s390_available_subfunc.kmctr);
 261                 __cpacf_query(CPACF_KMF, kvm_s390_available_subfunc.kmf);
 262                 __cpacf_query(CPACF_KMO, kvm_s390_available_subfunc.kmo);
 263                 __cpacf_query(CPACF_PCC, kvm_s390_available_subfunc.pcc);
 264         }
 265         if (test_facility(57)) /* MSA5 */
 266                 __cpacf_query(CPACF_PPNO, kvm_s390_available_subfunc.ppno);
 267
 268         if (MACHINE_HAS_ESOP)
 269                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 270         /*
 271          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 272          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 273          */
 274         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 275             !test_facility(3) || !nested)
 276                 return;
 277         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 278         if (sclp.has_64bscao)
 279                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 280         if (sclp.has_siif)
 281                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 282         if (sclp.has_gpere)
 283                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 284         if (sclp.has_gsls)
 285                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 286         if (sclp.has_ib)
 287                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 288         if (sclp.has_cei)
 289                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 290         if (sclp.has_ibs)
 291                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 292         /*
 293          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 294          * all skey handling functions read/set the skey from the PGSTE
 295          * instead of the real storage key.
 296          *
 297          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 298          * pages being detected as preserved although they are resident.
 299          *
 300          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 301          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 302          *
 303          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 304          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 305          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 306          *
 307          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 308          * cannot easily shadow the SCA because of the ipte lock.
 309          */
 310 }
 311
 312 int kvm_arch_init(void *opaque)
 313 {
 314         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 315         if (!kvm_s390_dbf)
 316                 return -ENOMEM;
 317
 318         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 319                 debug_unregister(kvm_s390_dbf);
 320                 return -ENOMEM;
 321         }
 322
 323         kvm_s390_cpu_feat_init();
 324
 325         /* Register floating interrupt controller interface. */
 326         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 327 }
 328
 329 void kvm_arch_exit(void)
 330 {
 331         debug_unregister(kvm_s390_dbf);
 332 }
 333
 334 /* Section: device related */
 335 long kvm_arch_dev_ioctl(struct file *filp,
 336                         unsigned int ioctl, unsigned long arg)
 337 {
 338         if (ioctl == KVM_S390_ENABLE_SIE)
 339                 return s390_enable_sie();
 340         return -EINVAL;
 341 }
 342
 343 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 344 {
 345         int r;
 346
 347         switch (ext) {
 348         case KVM_CAP_S390_PSW:
 349         case KVM_CAP_S390_GMAP:
 350         case KVM_CAP_SYNC_MMU:
 351 #ifdef CONFIG_KVM_S390_UCONTROL
 352         case KVM_CAP_S390_UCONTROL:
 353 #endif
 354         case KVM_CAP_ASYNC_PF:
 355         case KVM_CAP_SYNC_REGS:
 356         case KVM_CAP_ONE_REG:
 357         case KVM_CAP_ENABLE_CAP:
 358         case KVM_CAP_S390_CSS_SUPPORT:
 359         case KVM_CAP_IOEVENTFD:
 360         case KVM_CAP_DEVICE_CTRL:
 361         case KVM_CAP_ENABLE_CAP_VM:
 362         case KVM_CAP_S390_IRQCHIP:
 363         case KVM_CAP_VM_ATTRIBUTES:
 364         case KVM_CAP_MP_STATE:
 365         case KVM_CAP_S390_INJECT_IRQ:
 366         case KVM_CAP_S390_USER_SIGP:
 367         case KVM_CAP_S390_USER_STSI:
 368         case KVM_CAP_S390_SKEYS:
 369         case KVM_CAP_S390_IRQ_STATE:
 370         case KVM_CAP_S390_USER_INSTR0:
 371                 r = 1;
 372                 break;
 373         case KVM_CAP_S390_MEM_OP:
 374                 r = MEM_OP_MAX_SIZE;
 375                 break;
 376         case KVM_CAP_NR_VCPUS:
 377         case KVM_CAP_MAX_VCPUS:
 378                 r = KVM_S390_BSCA_CPU_SLOTS;
 379                 if (sclp.has_esca && sclp.has_64bscao)
 380                         r = KVM_S390_ESCA_CPU_SLOTS;
 381                 break;
 382         case KVM_CAP_NR_MEMSLOTS:
 383                 r = KVM_USER_MEM_SLOTS;
 384                 break;
 385         case KVM_CAP_S390_COW:
 386                 r = MACHINE_HAS_ESOP;
 387                 break;
 388         case KVM_CAP_S390_VECTOR_REGISTERS:
 389                 r = MACHINE_HAS_VX;
 390                 break;
 391         case KVM_CAP_S390_RI:
 392                 r = test_facility(64);
 393                 break;
 394         default:
 395                 r = 0;
 396         }
 397         return r;
 398 }
 399
 400 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 401                                         struct kvm_memory_slot *memslot)
 402 {
 403         gfn_t cur_gfn, last_gfn;
 404         unsigned long address;
 405         struct gmap *gmap = kvm->arch.gmap;
 406
 407         /* Loop over all guest pages */
 408         last_gfn = memslot->base_gfn + memslot->npages;
 409         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 410                 address = gfn_to_hva_memslot(memslot, cur_gfn);
 411
 412                 if (test_and_clear_guest_dirty(gmap->mm, address))
 413                         mark_page_dirty(kvm, cur_gfn);
 414                 if (fatal_signal_pending(current))
 415                         return;
 416                 cond_resched();
 417         }
 418 }
 419
 420 /* Section: vm related */
 421 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 422
 423 /*
 424  * Get (and clear) the dirty memory log for a memory slot.
 425  */
 426 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 427                                struct kvm_dirty_log *log)
 428 {
 429         int r;
 430         unsigned long n;
 431         struct kvm_memslots *slots;
 432         struct kvm_memory_slot *memslot;
 433         int is_dirty = 0;
 434
 435         mutex_lock(&kvm->slots_lock);
 436
 437         r = -EINVAL;
 438         if (log->slot >= KVM_USER_MEM_SLOTS)
 439                 goto out;
 440
 441         slots = kvm_memslots(kvm);
 442         memslot = id_to_memslot(slots, log->slot);
 443         r = -ENOENT;
 444         if (!memslot->dirty_bitmap)
 445                 goto out;
 446
 447         kvm_s390_sync_dirty_log(kvm, memslot);
 448         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 449         if (r)
 450                 goto out;
 451
 452         /* Clear the dirty log */
 453         if (is_dirty) {
 454                 n = kvm_dirty_bitmap_bytes(memslot);
 455                 memset(memslot->dirty_bitmap, 0, n);
 456         }
 457         r = 0;
 458 out:
 459         mutex_unlock(&kvm->slots_lock);
 460         return r;
 461 }
 462
 463 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 464 {
 465         unsigned int i;
 466         struct kvm_vcpu *vcpu;
 467
 468         kvm_for_each_vcpu(i, vcpu, kvm) {
 469                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 470         }
 471 }
 472
 473 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 474 {
 475         int r;
 476
 477         if (cap->flags)
 478                 return -EINVAL;
 479
 480         switch (cap->cap) {
 481         case KVM_CAP_S390_IRQCHIP:
 482                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 483                 kvm->arch.use_irqchip = 1;
 484                 r = 0;
 485                 break;
 486         case KVM_CAP_S390_USER_SIGP:
 487                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 488                 kvm->arch.user_sigp = 1;
 489                 r = 0;
 490                 break;
 491         case KVM_CAP_S390_VECTOR_REGISTERS:
 492                 mutex_lock(&kvm->lock);
 493                 if (kvm->created_vcpus) {
 494                         r = -EBUSY;
 495                 } else if (MACHINE_HAS_VX) {
 496                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 497                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 498                         r = 0;
 499                 } else
 500                         r = -EINVAL;
 501                 mutex_unlock(&kvm->lock);
 502                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 503                          r ? "(not available)" : "(success)");
 504                 break;
 505         case KVM_CAP_S390_RI:
 506                 r = -EINVAL;
 507                 mutex_lock(&kvm->lock);
 508                 if (kvm->created_vcpus) {
 509                         r = -EBUSY;
 510                 } else if (test_facility(64)) {
 511                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 512                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 513                         r = 0;
 514                 }
 515                 mutex_unlock(&kvm->lock);
 516                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 517                          r ? "(not available)" : "(success)");
 518                 break;
 519         case KVM_CAP_S390_USER_STSI:
 520                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 521                 kvm->arch.user_stsi = 1;
 522                 r = 0;
 523                 break;
 524         case KVM_CAP_S390_USER_INSTR0:
 525                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 526                 kvm->arch.user_instr0 = 1;
 527                 icpt_operexc_on_all_vcpus(kvm);
 528                 r = 0;
 529                 break;
 530         default:
 531                 r = -EINVAL;
 532                 break;
 533         }
 534         return r;
 535 }
 536
 537 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 538 {
 539         int ret;
 540
 541         switch (attr->attr) {
 542         case KVM_S390_VM_MEM_LIMIT_SIZE:
 543                 ret = 0;
 544                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 545                          kvm->arch.mem_limit);
 546                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 547                         ret = -EFAULT;
 548                 break;
 549         default:
 550                 ret = -ENXIO;
 551                 break;
 552         }
 553         return ret;
 554 }
 555
 556 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 557 {
 558         int ret;
 559         unsigned int idx;
 560         switch (attr->attr) {
 561         case KVM_S390_VM_MEM_ENABLE_CMMA:
 562                 ret = -ENXIO;
 563                 if (!sclp.has_cmma)
 564                         break;
 565
 566                 ret = -EBUSY;
 567                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 568                 mutex_lock(&kvm->lock);
 569                 if (!kvm->created_vcpus) {
 570                         kvm->arch.use_cmma = 1;
 571                         ret = 0;
 572                 }
 573                 mutex_unlock(&kvm->lock);
 574                 break;
 575         case KVM_S390_VM_MEM_CLR_CMMA:
 576                 ret = -ENXIO;
 577                 if (!sclp.has_cmma)
 578                         break;
 579                 ret = -EINVAL;
 580                 if (!kvm->arch.use_cmma)
 581                         break;
 582
 583                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 584                 mutex_lock(&kvm->lock);
 585                 idx = srcu_read_lock(&kvm->srcu);
 586                 s390_reset_cmma(kvm->arch.gmap->mm);
 587                 srcu_read_unlock(&kvm->srcu, idx);
 588                 mutex_unlock(&kvm->lock);
 589                 ret = 0;
 590                 break;
 591         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 592                 unsigned long new_limit;
 593
 594                 if (kvm_is_ucontrol(kvm))
 595                         return -EINVAL;
 596
 597                 if (get_user(new_limit, (u64 __user *)attr->addr))
 598                         return -EFAULT;
 599
 600                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 601                     new_limit > kvm->arch.mem_limit)
 602                         return -E2BIG;
 603
 604                 if (!new_limit)
 605                         return -EINVAL;
 606
 607                 /* gmap_create takes last usable address */
 608                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 609                         new_limit -= 1;
 610
 611                 ret = -EBUSY;
 612                 mutex_lock(&kvm->lock);
 613                 if (!kvm->created_vcpus) {
 614                         /* gmap_create will round the limit up */
 615                         struct gmap *new = gmap_create(current->mm, new_limit);
 616
 617                         if (!new) {
 618                                 ret = -ENOMEM;
 619                         } else {
 620                                 gmap_remove(kvm->arch.gmap);
 621                                 new->private = kvm;
 622                                 kvm->arch.gmap = new;
 623                                 ret = 0;
 624                         }
 625                 }
 626                 mutex_unlock(&kvm->lock);
 627                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 628                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 629                          (void *) kvm->arch.gmap->asce);
 630                 break;
 631         }
 632         default:
 633                 ret = -ENXIO;
 634                 break;
 635         }
 636         return ret;
 637 }
 638
 639 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 640
 641 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 642 {
 643         struct kvm_vcpu *vcpu;
 644         int i;
 645
 646         if (!test_kvm_facility(kvm, 76))
 647                 return -EINVAL;
 648
 649         mutex_lock(&kvm->lock);
 650         switch (attr->attr) {
 651         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 652                 get_random_bytes(
 653                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 654                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 655                 kvm->arch.crypto.aes_kw = 1;
 656                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 657                 break;
 658         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 659                 get_random_bytes(
 660                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 661                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 662                 kvm->arch.crypto.dea_kw = 1;
 663                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 664                 break;
 665         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 666                 kvm->arch.crypto.aes_kw = 0;
 667                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 668                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 669                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 670                 break;
 671         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 672                 kvm->arch.crypto.dea_kw = 0;
 673                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 674                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 675                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 676                 break;
 677         default:
 678                 mutex_unlock(&kvm->lock);
 679                 return -ENXIO;
 680         }
 681
 682         kvm_for_each_vcpu(i, vcpu, kvm) {
 683                 kvm_s390_vcpu_crypto_setup(vcpu);
 684                 exit_sie(vcpu);
 685         }
 686         mutex_unlock(&kvm->lock);
 687         return 0;
 688 }
 689
 690 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 691 {
 692         u8 gtod_high;
 693
 694         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 695                                            sizeof(gtod_high)))
 696                 return -EFAULT;
 697
 698         if (gtod_high != 0)
 699                 return -EINVAL;
 700         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
 701
 702         return 0;
 703 }
 704
 705 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 706 {
 707         u64 gtod;
 708
 709         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 710                 return -EFAULT;
 711
 712         kvm_s390_set_tod_clock(kvm, gtod);
 713         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
 714         return 0;
 715 }
 716
 717 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 718 {
 719         int ret;
 720
 721         if (attr->flags)
 722                 return -EINVAL;
 723
 724         switch (attr->attr) {
 725         case KVM_S390_VM_TOD_HIGH:
 726                 ret = kvm_s390_set_tod_high(kvm, attr);
 727                 break;
 728         case KVM_S390_VM_TOD_LOW:
 729                 ret = kvm_s390_set_tod_low(kvm, attr);
 730                 break;
 731         default:
 732                 ret = -ENXIO;
 733                 break;
 734         }
 735         return ret;
 736 }
 737
 738 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 739 {
 740         u8 gtod_high = 0;
 741
 742         if (copy_to_user((void __user *)attr->addr, &gtod_high,
 743                                          sizeof(gtod_high)))
 744                 return -EFAULT;
 745         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
 746
 747         return 0;
 748 }
 749
 750 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 751 {
 752         u64 gtod;
 753
 754         gtod = kvm_s390_get_tod_clock_fast(kvm);
 755         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 756                 return -EFAULT;
 757         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
 758
 759         return 0;
 760 }
 761
 762 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 763 {
 764         int ret;
 765
 766         if (attr->flags)
 767                 return -EINVAL;
 768
 769         switch (attr->attr) {
 770         case KVM_S390_VM_TOD_HIGH:
 771                 ret = kvm_s390_get_tod_high(kvm, attr);
 772                 break;
 773         case KVM_S390_VM_TOD_LOW:
 774                 ret = kvm_s390_get_tod_low(kvm, attr);
 775                 break;
 776         default:
 777                 ret = -ENXIO;
 778                 break;
 779         }
 780         return ret;
 781 }
 782
 783 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 784 {
 785         struct kvm_s390_vm_cpu_processor *proc;
 786         u16 lowest_ibc, unblocked_ibc;
 787         int ret = 0;
 788
 789         mutex_lock(&kvm->lock);
 790         if (kvm->created_vcpus) {
 791                 ret = -EBUSY;
 792                 goto out;
 793         }
 794         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 795         if (!proc) {
 796                 ret = -ENOMEM;
 797                 goto out;
 798         }
 799         if (!copy_from_user(proc, (void __user *)attr->addr,
 800                             sizeof(*proc))) {
 801                 kvm->arch.model.cpuid = proc->cpuid;
 802                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
 803                 unblocked_ibc = sclp.ibc & 0xfff;
 804                 if (lowest_ibc && proc->ibc) {
 805                         if (proc->ibc > unblocked_ibc)
 806                                 kvm->arch.model.ibc = unblocked_ibc;
 807                         else if (proc->ibc < lowest_ibc)
 808                                 kvm->arch.model.ibc = lowest_ibc;
 809                         else
 810                                 kvm->arch.model.ibc = proc->ibc;
 811                 }
 812                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
 813                        S390_ARCH_FAC_LIST_SIZE_BYTE);
 814         } else
 815                 ret = -EFAULT;
 816         kfree(proc);
 817 out:
 818         mutex_unlock(&kvm->lock);
 819         return ret;
 820 }
 821
 822 static int kvm_s390_set_processor_feat(struct kvm *kvm,
 823                                        struct kvm_device_attr *attr)
 824 {
 825         struct kvm_s390_vm_cpu_feat data;
 826         int ret = -EBUSY;
 827
 828         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
 829                 return -EFAULT;
 830         if (!bitmap_subset((unsigned long *) data.feat,
 831                            kvm_s390_available_cpu_feat,
 832                            KVM_S390_VM_CPU_FEAT_NR_BITS))
 833                 return -EINVAL;
 834
 835         mutex_lock(&kvm->lock);
 836         if (!atomic_read(&kvm->online_vcpus)) {
 837                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
 838                             KVM_S390_VM_CPU_FEAT_NR_BITS);
 839                 ret = 0;
 840         }
 841         mutex_unlock(&kvm->lock);
 842         return ret;
 843 }
 844
 845 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
 846                                           struct kvm_device_attr *attr)
 847 {
 848         /*
 849          * Once supported by kernel + hw, we have to store the subfunctions
 850          * in kvm->arch and remember that user space configured them.
 851          */
 852         return -ENXIO;
 853 }
 854
 855 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 856 {
 857         int ret = -ENXIO;
 858
 859         switch (attr->attr) {
 860         case KVM_S390_VM_CPU_PROCESSOR:
 861                 ret = kvm_s390_set_processor(kvm, attr);
 862                 break;
 863         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 864                 ret = kvm_s390_set_processor_feat(kvm, attr);
 865                 break;
 866         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 867                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
 868                 break;
 869         }
 870         return ret;
 871 }
 872
 873 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 874 {
 875         struct kvm_s390_vm_cpu_processor *proc;
 876         int ret = 0;
 877
 878         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 879         if (!proc) {
 880                 ret = -ENOMEM;
 881                 goto out;
 882         }
 883         proc->cpuid = kvm->arch.model.cpuid;
 884         proc->ibc = kvm->arch.model.ibc;
 885         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
 886                S390_ARCH_FAC_LIST_SIZE_BYTE);
 887         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
 888                 ret = -EFAULT;
 889         kfree(proc);
 890 out:
 891         return ret;
 892 }
 893
 894 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
 895 {
 896         struct kvm_s390_vm_cpu_machine *mach;
 897         int ret = 0;
 898
 899         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
 900         if (!mach) {
 901                 ret = -ENOMEM;
 902                 goto out;
 903         }
 904         get_cpu_id((struct cpuid *) &mach->cpuid);
 905         mach->ibc = sclp.ibc;
 906         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
 907                S390_ARCH_FAC_LIST_SIZE_BYTE);
 908         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
 909                S390_ARCH_FAC_LIST_SIZE_BYTE);
 910         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
 911                 ret = -EFAULT;
 912         kfree(mach);
 913 out:
 914         return ret;
 915 }
 916
 917 static int kvm_s390_get_processor_feat(struct kvm *kvm,
 918                                        struct kvm_device_attr *attr)
 919 {
 920         struct kvm_s390_vm_cpu_feat data;
 921
 922         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
 923                     KVM_S390_VM_CPU_FEAT_NR_BITS);
 924         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
 925                 return -EFAULT;
 926         return 0;
 927 }
 928
 929 static int kvm_s390_get_machine_feat(struct kvm *kvm,
 930                                      struct kvm_device_attr *attr)
 931 {
 932         struct kvm_s390_vm_cpu_feat data;
 933
 934         bitmap_copy((unsigned long *) data.feat,
 935                     kvm_s390_available_cpu_feat,
 936                     KVM_S390_VM_CPU_FEAT_NR_BITS);
 937         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
 938                 return -EFAULT;
 939         return 0;
 940 }
 941
 942 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
 943                                           struct kvm_device_attr *attr)
 944 {
 945         /*
 946          * Once we can actually configure subfunctions (kernel + hw support),
 947          * we have to check if they were already set by user space, if so copy
 948          * them from kvm->arch.
 949          */
 950         return -ENXIO;
 951 }
 952
 953 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
 954                                         struct kvm_device_attr *attr)
 955 {
 956         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
 957             sizeof(struct kvm_s390_vm_cpu_subfunc)))
 958                 return -EFAULT;
 959         return 0;
 960 }
 961 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 962 {
 963         int ret = -ENXIO;
 964
 965         switch (attr->attr) {
 966         case KVM_S390_VM_CPU_PROCESSOR:
 967                 ret = kvm_s390_get_processor(kvm, attr);
 968                 break;
 969         case KVM_S390_VM_CPU_MACHINE:
 970                 ret = kvm_s390_get_machine(kvm, attr);
 971                 break;
 972         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 973                 ret = kvm_s390_get_processor_feat(kvm, attr);
 974                 break;
 975         case KVM_S390_VM_CPU_MACHINE_FEAT:
 976                 ret = kvm_s390_get_machine_feat(kvm, attr);
 977                 break;
 978         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 979                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
 980                 break;
 981         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
 982                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
 983                 break;
 984         }
 985         return ret;
 986 }
 987
 988 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 989 {
 990         int ret;
 991
 992         switch (attr->group) {
 993         case KVM_S390_VM_MEM_CTRL:
 994                 ret = kvm_s390_set_mem_control(kvm, attr);
 995                 break;
 996         case KVM_S390_VM_TOD:
 997                 ret = kvm_s390_set_tod(kvm, attr);
 998                 break;
 999         case KVM_S390_VM_CPU_MODEL:
1000                 ret = kvm_s390_set_cpu_model(kvm, attr);
1001                 break;
1002         case KVM_S390_VM_CRYPTO:
1003                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1004                 break;
1005         default:
1006                 ret = -ENXIO;
1007                 break;
1008         }
1009
1010         return ret;
1011 }
1012
1013 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1014 {
1015         int ret;
1016
1017         switch (attr->group) {
1018         case KVM_S390_VM_MEM_CTRL:
1019                 ret = kvm_s390_get_mem_control(kvm, attr);
1020                 break;
1021         case KVM_S390_VM_TOD:
1022                 ret = kvm_s390_get_tod(kvm, attr);
1023                 break;
1024         case KVM_S390_VM_CPU_MODEL:
1025                 ret = kvm_s390_get_cpu_model(kvm, attr);
1026                 break;
1027         default:
1028                 ret = -ENXIO;
1029                 break;
1030         }
1031
1032         return ret;
1033 }
1034
1035 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1036 {
1037         int ret;
1038
1039         switch (attr->group) {
1040         case KVM_S390_VM_MEM_CTRL:
1041                 switch (attr->attr) {
1042                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1043                 case KVM_S390_VM_MEM_CLR_CMMA:
1044                         ret = sclp.has_cmma ? 0 : -ENXIO;
1045                         break;
1046                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1047                         ret = 0;
1048                         break;
1049                 default:
1050                         ret = -ENXIO;
1051                         break;
1052                 }
1053                 break;
1054         case KVM_S390_VM_TOD:
1055                 switch (attr->attr) {
1056                 case KVM_S390_VM_TOD_LOW:
1057                 case KVM_S390_VM_TOD_HIGH:
1058                         ret = 0;
1059                         break;
1060                 default:
1061                         ret = -ENXIO;
1062                         break;
1063                 }
1064                 break;
1065         case KVM_S390_VM_CPU_MODEL:
1066                 switch (attr->attr) {
1067                 case KVM_S390_VM_CPU_PROCESSOR:
1068                 case KVM_S390_VM_CPU_MACHINE:
1069                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1070                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1071                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1072                         ret = 0;
1073                         break;
1074                 /* configuring subfunctions is not supported yet */
1075                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1076                 default:
1077                         ret = -ENXIO;
1078                         break;
1079                 }
1080                 break;
1081         case KVM_S390_VM_CRYPTO:
1082                 switch (attr->attr) {
1083                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1084                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1085                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1086                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1087                         ret = 0;
1088                         break;
1089                 default:
1090                         ret = -ENXIO;
1091                         break;
1092                 }
1093                 break;
1094         default:
1095                 ret = -ENXIO;
1096                 break;
1097         }
1098
1099         return ret;
1100 }
1101
1102 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1103 {
1104         uint8_t *keys;
1105         uint64_t hva;
1106         int i, r = 0;
1107
1108         if (args->flags != 0)
1109                 return -EINVAL;
1110
1111         /* Is this guest using storage keys? */
1112         if (!mm_use_skey(current->mm))
1113                 return KVM_S390_GET_SKEYS_NONE;
1114
1115         /* Enforce sane limit on memory allocation */
1116         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1117                 return -EINVAL;
1118
1119         keys = kmalloc_array(args->count, sizeof(uint8_t),
1120                              GFP_KERNEL | __GFP_NOWARN);
1121         if (!keys)
1122                 keys = vmalloc(sizeof(uint8_t) * args->count);
1123         if (!keys)
1124                 return -ENOMEM;
1125
1126         down_read(&current->mm->mmap_sem);
1127         for (i = 0; i < args->count; i++) {
1128                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1129                 if (kvm_is_error_hva(hva)) {
1130                         r = -EFAULT;
1131                         break;
1132                 }
1133
1134                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1135                 if (r)
1136                         break;
1137         }
1138         up_read(&current->mm->mmap_sem);
1139
1140         if (!r) {
1141                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1142                                  sizeof(uint8_t) * args->count);
1143                 if (r)
1144                         r = -EFAULT;
1145         }
1146
1147         kvfree(keys);
1148         return r;
1149 }
1150
1151 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1152 {
1153         uint8_t *keys;
1154         uint64_t hva;
1155         int i, r = 0;
1156
1157         if (args->flags != 0)
1158                 return -EINVAL;
1159
1160         /* Enforce sane limit on memory allocation */
1161         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1162                 return -EINVAL;
1163
1164         keys = kmalloc_array(args->count, sizeof(uint8_t),
1165                              GFP_KERNEL | __GFP_NOWARN);
1166         if (!keys)
1167                 keys = vmalloc(sizeof(uint8_t) * args->count);
1168         if (!keys)
1169                 return -ENOMEM;
1170
1171         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1172                            sizeof(uint8_t) * args->count);
1173         if (r) {
1174                 r = -EFAULT;
1175                 goto out;
1176         }
1177
1178         /* Enable storage key handling for the guest */
1179         r = s390_enable_skey();
1180         if (r)
1181                 goto out;
1182
1183         down_read(&current->mm->mmap_sem);
1184         for (i = 0; i < args->count; i++) {
1185                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1186                 if (kvm_is_error_hva(hva)) {
1187                         r = -EFAULT;
1188                         break;
1189                 }
1190
1191                 /* Lowest order bit is reserved */
1192                 if (keys[i] & 0x01) {
1193                         r = -EINVAL;
1194                         break;
1195                 }
1196
1197                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1198                 if (r)
1199                         break;
1200         }
1201         up_read(&current->mm->mmap_sem);
1202 out:
1203         kvfree(keys);
1204         return r;
1205 }
1206
1207 long kvm_arch_vm_ioctl(struct file *filp,
1208                        unsigned int ioctl, unsigned long arg)
1209 {
1210         struct kvm *kvm = filp->private_data;
1211         void __user *argp = (void __user *)arg;
1212         struct kvm_device_attr attr;
1213         int r;
1214
1215         switch (ioctl) {
1216         case KVM_S390_INTERRUPT: {
1217                 struct kvm_s390_interrupt s390int;
1218
1219                 r = -EFAULT;
1220                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1221                         break;
1222                 r = kvm_s390_inject_vm(kvm, &s390int);
1223                 break;
1224         }
1225         case KVM_ENABLE_CAP: {
1226                 struct kvm_enable_cap cap;
1227                 r = -EFAULT;
1228                 if (copy_from_user(&cap, argp, sizeof(cap)))
1229                         break;
1230                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1231                 break;
1232         }
1233         case KVM_CREATE_IRQCHIP: {
1234                 struct kvm_irq_routing_entry routing;
1235
1236                 r = -EINVAL;
1237                 if (kvm->arch.use_irqchip) {
1238                         /* Set up dummy routing. */
1239                         memset(&routing, 0, sizeof(routing));
1240                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1241                 }
1242                 break;
1243         }
1244         case KVM_SET_DEVICE_ATTR: {
1245                 r = -EFAULT;
1246                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1247                         break;
1248                 r = kvm_s390_vm_set_attr(kvm, &attr);
1249                 break;
1250         }
1251         case KVM_GET_DEVICE_ATTR: {
1252                 r = -EFAULT;
1253                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1254                         break;
1255                 r = kvm_s390_vm_get_attr(kvm, &attr);
1256                 break;
1257         }
1258         case KVM_HAS_DEVICE_ATTR: {
1259                 r = -EFAULT;
1260                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1261                         break;
1262                 r = kvm_s390_vm_has_attr(kvm, &attr);
1263                 break;
1264         }
1265         case KVM_S390_GET_SKEYS: {
1266                 struct kvm_s390_skeys args;
1267
1268                 r = -EFAULT;
1269                 if (copy_from_user(&args, argp,
1270                                    sizeof(struct kvm_s390_skeys)))
1271                         break;
1272                 r = kvm_s390_get_skeys(kvm, &args);
1273                 break;
1274         }
1275         case KVM_S390_SET_SKEYS: {
1276                 struct kvm_s390_skeys args;
1277
1278                 r = -EFAULT;
1279                 if (copy_from_user(&args, argp,
1280                                    sizeof(struct kvm_s390_skeys)))
1281                         break;
1282                 r = kvm_s390_set_skeys(kvm, &args);
1283                 break;
1284         }
1285         default:
1286                 r = -ENOTTY;
1287         }
1288
1289         return r;
1290 }
1291
1292 static int kvm_s390_query_ap_config(u8 *config)
1293 {
1294         u32 fcn_code = 0x04000000UL;
1295         u32 cc = 0;
1296
1297         memset(config, 0, 128);
1298         asm volatile(
1299                 "lgr 0,%1\n"
1300                 "lgr 2,%2\n"
1301                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1302                 "0: ipm %0\n"
1303                 "srl %0,28\n"
1304                 "1:\n"
1305                 EX_TABLE(0b, 1b)
1306                 : "+r" (cc)
1307                 : "r" (fcn_code), "r" (config)
1308                 : "cc", "0", "2", "memory"
1309         );
1310
1311         return cc;
1312 }
1313
1314 static int kvm_s390_apxa_installed(void)
1315 {
1316         u8 config[128];
1317         int cc;
1318
1319         if (test_facility(12)) {
1320                 cc = kvm_s390_query_ap_config(config);
1321
1322                 if (cc)
1323                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1324                 else
1325                         return config[0] & 0x40;
1326         }
1327
1328         return 0;
1329 }
1330
1331 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1332 {
1333         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1334
1335         if (kvm_s390_apxa_installed())
1336                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1337         else
1338                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1339 }
1340
1341 static u64 kvm_s390_get_initial_cpuid(void)
1342 {
1343         struct cpuid cpuid;
1344
1345         get_cpu_id(&cpuid);
1346         cpuid.version = 0xff;
1347         return *((u64 *) &cpuid);
1348 }
1349
1350 static void kvm_s390_crypto_init(struct kvm *kvm)
1351 {
1352         if (!test_kvm_facility(kvm, 76))
1353                 return;
1354
1355         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1356         kvm_s390_set_crycb_format(kvm);
1357
1358         /* Enable AES/DEA protected key functions by default */
1359         kvm->arch.crypto.aes_kw = 1;
1360         kvm->arch.crypto.dea_kw = 1;
1361         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1362                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1363         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1364                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1365 }
1366
1367 static void sca_dispose(struct kvm *kvm)
1368 {
1369         if (kvm->arch.use_esca)
1370                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1371         else
1372                 free_page((unsigned long)(kvm->arch.sca));
1373         kvm->arch.sca = NULL;
1374 }
1375
1376 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1377 {
1378         gfp_t alloc_flags = GFP_KERNEL;
1379         int i, rc;
1380         char debug_name[16];
1381         static unsigned long sca_offset;
1382
1383         rc = -EINVAL;
1384 #ifdef CONFIG_KVM_S390_UCONTROL
1385         if (type & ~KVM_VM_S390_UCONTROL)
1386                 goto out_err;
1387         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1388                 goto out_err;
1389 #else
1390         if (type)
1391                 goto out_err;
1392 #endif
1393
1394         rc = s390_enable_sie();
1395         if (rc)
1396                 goto out_err;
1397
1398         rc = -ENOMEM;
1399
1400         ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1401
1402         kvm->arch.use_esca = 0; /* start with basic SCA */
1403         if (!sclp.has_64bscao)
1404                 alloc_flags |= GFP_DMA;
1405         rwlock_init(&kvm->arch.sca_lock);
1406         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1407         if (!kvm->arch.sca)
1408                 goto out_err;
1409         spin_lock(&kvm_lock);
1410         sca_offset += 16;
1411         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1412                 sca_offset = 0;
1413         kvm->arch.sca = (struct bsca_block *)
1414                         ((char *) kvm->arch.sca + sca_offset);
1415         spin_unlock(&kvm_lock);
1416
1417         sprintf(debug_name, "kvm-%u", current->pid);
1418
1419         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1420         if (!kvm->arch.dbf)
1421                 goto out_err;
1422
1423         kvm->arch.sie_page2 =
1424              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1425         if (!kvm->arch.sie_page2)
1426                 goto out_err;
1427
1428         /* Populate the facility mask initially. */
1429         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1430                S390_ARCH_FAC_LIST_SIZE_BYTE);
1431         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1432                 if (i < kvm_s390_fac_list_mask_size())
1433                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1434                 else
1435                         kvm->arch.model.fac_mask[i] = 0UL;
1436         }
1437
1438         /* Populate the facility list initially. */
1439         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1440         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1441                S390_ARCH_FAC_LIST_SIZE_BYTE);
1442
1443         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1444         set_kvm_facility(kvm->arch.model.fac_list, 74);
1445
1446         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1447         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1448
1449         kvm_s390_crypto_init(kvm);
1450
1451         spin_lock_init(&kvm->arch.float_int.lock);
1452         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1453                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1454         init_waitqueue_head(&kvm->arch.ipte_wq);
1455         mutex_init(&kvm->arch.ipte_mutex);
1456
1457         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1458         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1459
1460         if (type & KVM_VM_S390_UCONTROL) {
1461                 kvm->arch.gmap = NULL;
1462                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1463         } else {
1464                 if (sclp.hamax == U64_MAX)
1465                         kvm->arch.mem_limit = TASK_MAX_SIZE;
1466                 else
1467                         kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1468                                                     sclp.hamax + 1);
1469                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1470                 if (!kvm->arch.gmap)
1471                         goto out_err;
1472                 kvm->arch.gmap->private = kvm;
1473                 kvm->arch.gmap->pfault_enabled = 0;
1474         }
1475
1476         kvm->arch.css_support = 0;
1477         kvm->arch.use_irqchip = 0;
1478         kvm->arch.epoch = 0;
1479
1480         spin_lock_init(&kvm->arch.start_stop_lock);
1481         kvm_s390_vsie_init(kvm);
1482         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1483
1484         return 0;
1485 out_err:
1486         free_page((unsigned long)kvm->arch.sie_page2);
1487         debug_unregister(kvm->arch.dbf);
1488         sca_dispose(kvm);
1489         KVM_EVENT(3, "creation of vm failed: %d", rc);
1490         return rc;
1491 }
1492
1493 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1494 {
1495         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1496         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1497         kvm_s390_clear_local_irqs(vcpu);
1498         kvm_clear_async_pf_completion_queue(vcpu);
1499         if (!kvm_is_ucontrol(vcpu->kvm))
1500                 sca_del_vcpu(vcpu);
1501
1502         if (kvm_is_ucontrol(vcpu->kvm))
1503                 gmap_remove(vcpu->arch.gmap);
1504
1505         if (vcpu->kvm->arch.use_cmma)
1506                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1507         free_page((unsigned long)(vcpu->arch.sie_block));
1508
1509         kvm_vcpu_uninit(vcpu);
1510         kmem_cache_free(kvm_vcpu_cache, vcpu);
1511 }
1512
1513 static void kvm_free_vcpus(struct kvm *kvm)
1514 {
1515         unsigned int i;
1516         struct kvm_vcpu *vcpu;
1517
1518         kvm_for_each_vcpu(i, vcpu, kvm)
1519                 kvm_arch_vcpu_destroy(vcpu);
1520
1521         mutex_lock(&kvm->lock);
1522         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1523                 kvm->vcpus[i] = NULL;
1524
1525         atomic_set(&kvm->online_vcpus, 0);
1526         mutex_unlock(&kvm->lock);
1527 }
1528
1529 void kvm_arch_destroy_vm(struct kvm *kvm)
1530 {
1531         kvm_free_vcpus(kvm);
1532         sca_dispose(kvm);
1533         debug_unregister(kvm->arch.dbf);
1534         free_page((unsigned long)kvm->arch.sie_page2);
1535         if (!kvm_is_ucontrol(kvm))
1536                 gmap_remove(kvm->arch.gmap);
1537         kvm_s390_destroy_adapters(kvm);
1538         kvm_s390_clear_float_irqs(kvm);
1539         kvm_s390_vsie_destroy(kvm);
1540         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1541 }
1542
1543 /* Section: vcpu related */
1544 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1545 {
1546         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1547         if (!vcpu->arch.gmap)
1548                 return -ENOMEM;
1549         vcpu->arch.gmap->private = vcpu->kvm;
1550
1551         return 0;
1552 }
1553
1554 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1555 {
1556         read_lock(&vcpu->kvm->arch.sca_lock);
1557         if (vcpu->kvm->arch.use_esca) {
1558                 struct esca_block *sca = vcpu->kvm->arch.sca;
1559
1560                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1561                 sca->cpu[vcpu->vcpu_id].sda = 0;
1562         } else {
1563                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1564
1565                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1566                 sca->cpu[vcpu->vcpu_id].sda = 0;
1567         }
1568         read_unlock(&vcpu->kvm->arch.sca_lock);
1569 }
1570
1571 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1572 {
1573         read_lock(&vcpu->kvm->arch.sca_lock);
1574         if (vcpu->kvm->arch.use_esca) {
1575                 struct esca_block *sca = vcpu->kvm->arch.sca;
1576
1577                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1578                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1579                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1580                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1581                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1582         } else {
1583                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1584
1585                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1586                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1587                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1588                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1589         }
1590         read_unlock(&vcpu->kvm->arch.sca_lock);
1591 }
1592
1593 /* Basic SCA to Extended SCA data copy routines */
1594 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1595 {
1596         d->sda = s->sda;
1597         d->sigp_ctrl.c = s->sigp_ctrl.c;
1598         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1599 }
1600
1601 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1602 {
1603         int i;
1604
1605         d->ipte_control = s->ipte_control;
1606         d->mcn[0] = s->mcn;
1607         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1608                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1609 }
1610
1611 static int sca_switch_to_extended(struct kvm *kvm)
1612 {
1613         struct bsca_block *old_sca = kvm->arch.sca;
1614         struct esca_block *new_sca;
1615         struct kvm_vcpu *vcpu;
1616         unsigned int vcpu_idx;
1617         u32 scaol, scaoh;
1618
1619         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1620         if (!new_sca)
1621                 return -ENOMEM;
1622
1623         scaoh = (u32)((u64)(new_sca) >> 32);
1624         scaol = (u32)(u64)(new_sca) & ~0x3fU;
1625
1626         kvm_s390_vcpu_block_all(kvm);
1627         write_lock(&kvm->arch.sca_lock);
1628
1629         sca_copy_b_to_e(new_sca, old_sca);
1630
1631         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1632                 vcpu->arch.sie_block->scaoh = scaoh;
1633                 vcpu->arch.sie_block->scaol = scaol;
1634                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1635         }
1636         kvm->arch.sca = new_sca;
1637         kvm->arch.use_esca = 1;
1638
1639         write_unlock(&kvm->arch.sca_lock);
1640         kvm_s390_vcpu_unblock_all(kvm);
1641
1642         free_page((unsigned long)old_sca);
1643
1644         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1645                  old_sca, kvm->arch.sca);
1646         return 0;
1647 }
1648
1649 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1650 {
1651         int rc;
1652
1653         if (id < KVM_S390_BSCA_CPU_SLOTS)
1654                 return true;
1655         if (!sclp.has_esca || !sclp.has_64bscao)
1656                 return false;
1657
1658         mutex_lock(&kvm->lock);
1659         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1660         mutex_unlock(&kvm->lock);
1661
1662         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1663 }
1664
1665 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1666 {
1667         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1668         kvm_clear_async_pf_completion_queue(vcpu);
1669         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1670                                     KVM_SYNC_GPRS |
1671                                     KVM_SYNC_ACRS |
1672                                     KVM_SYNC_CRS |
1673                                     KVM_SYNC_ARCH0 |
1674                                     KVM_SYNC_PFAULT;
1675         kvm_s390_set_prefix(vcpu, 0);
1676         if (test_kvm_facility(vcpu->kvm, 64))
1677                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1678         /* fprs can be synchronized via vrs, even if the guest has no vx. With
1679          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1680          */
1681         if (MACHINE_HAS_VX)
1682                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1683         else
1684                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1685
1686         if (kvm_is_ucontrol(vcpu->kvm))
1687                 return __kvm_ucontrol_vcpu_init(vcpu);
1688
1689         return 0;
1690 }
1691
1692 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1693 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1694 {
1695         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1696         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1697         vcpu->arch.cputm_start = get_tod_clock_fast();
1698         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1699 }
1700
1701 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1702 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1703 {
1704         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1705         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1706         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1707         vcpu->arch.cputm_start = 0;
1708         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1709 }
1710
1711 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1712 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1713 {
1714         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1715         vcpu->arch.cputm_enabled = true;
1716         __start_cpu_timer_accounting(vcpu);
1717 }
1718
1719 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1720 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1721 {
1722         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1723         __stop_cpu_timer_accounting(vcpu);
1724         vcpu->arch.cputm_enabled = false;
1725 }
1726
1727 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1728 {
1729         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1730         __enable_cpu_timer_accounting(vcpu);
1731         preempt_enable();
1732 }
1733
1734 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1735 {
1736         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1737         __disable_cpu_timer_accounting(vcpu);
1738         preempt_enable();
1739 }
1740
1741 /* set the cpu timer - may only be called from the VCPU thread itself */
1742 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1743 {
1744         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1745         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1746         if (vcpu->arch.cputm_enabled)
1747                 vcpu->arch.cputm_start = get_tod_clock_fast();
1748         vcpu->arch.sie_block->cputm = cputm;
1749         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1750         preempt_enable();
1751 }
1752
1753 /* update and get the cpu timer - can also be called from other VCPU threads */
1754 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1755 {
1756         unsigned int seq;
1757         __u64 value;
1758
1759         if (unlikely(!vcpu->arch.cputm_enabled))
1760                 return vcpu->arch.sie_block->cputm;
1761
1762         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1763         do {
1764                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1765                 /*
1766                  * If the writer would ever execute a read in the critical
1767                  * section, e.g. in irq context, we have a deadlock.
1768                  */
1769                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1770                 value = vcpu->arch.sie_block->cputm;
1771                 /* if cputm_start is 0, accounting is being started/stopped */
1772                 if (likely(vcpu->arch.cputm_start))
1773                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1774         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1775         preempt_enable();
1776         return value;
1777 }
1778
1779 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1780 {
1781         /* Save host register state */
1782         save_fpu_regs();
1783         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1784         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1785
1786         if (MACHINE_HAS_VX)
1787                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1788         else
1789                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1790         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1791         if (test_fp_ctl(current->thread.fpu.fpc))
1792                 /* User space provided an invalid FPC, let's clear it */
1793                 current->thread.fpu.fpc = 0;
1794
1795         save_access_regs(vcpu->arch.host_acrs);
1796         restore_access_regs(vcpu->run->s.regs.acrs);
1797         gmap_enable(vcpu->arch.enabled_gmap);
1798         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1799         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1800                 __start_cpu_timer_accounting(vcpu);
1801         vcpu->cpu = cpu;
1802 }
1803
1804 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1805 {
1806         vcpu->cpu = -1;
1807         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1808                 __stop_cpu_timer_accounting(vcpu);
1809         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1810         vcpu->arch.enabled_gmap = gmap_get_enabled();
1811         gmap_disable(vcpu->arch.enabled_gmap);
1812
1813         /* Save guest register state */
1814         save_fpu_regs();
1815         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1816
1817         /* Restore host register state */
1818         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1819         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1820
1821         save_access_regs(vcpu->run->s.regs.acrs);
1822         restore_access_regs(vcpu->arch.host_acrs);
1823 }
1824
1825 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1826 {
1827         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1828         vcpu->arch.sie_block->gpsw.mask = 0UL;
1829         vcpu->arch.sie_block->gpsw.addr = 0UL;
1830         kvm_s390_set_prefix(vcpu, 0);
1831         kvm_s390_set_cpu_timer(vcpu, 0);
1832         vcpu->arch.sie_block->ckc       = 0UL;
1833         vcpu->arch.sie_block->todpr     = 0;
1834         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1835         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1836         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1837         /* make sure the new fpc will be lazily loaded */
1838         save_fpu_regs();
1839         current->thread.fpu.fpc = 0;
1840         vcpu->arch.sie_block->gbea = 1;
1841         vcpu->arch.sie_block->pp = 0;
1842         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1843         kvm_clear_async_pf_completion_queue(vcpu);
1844         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1845                 kvm_s390_vcpu_stop(vcpu);
1846         kvm_s390_clear_local_irqs(vcpu);
1847 }
1848
1849 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1850 {
1851         mutex_lock(&vcpu->kvm->lock);
1852         preempt_disable();
1853         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1854         preempt_enable();
1855         mutex_unlock(&vcpu->kvm->lock);
1856         if (!kvm_is_ucontrol(vcpu->kvm)) {
1857                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1858                 sca_add_vcpu(vcpu);
1859         }
1860         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
1861                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1862         /* make vcpu_load load the right gmap on the first trigger */
1863         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1864 }
1865
1866 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1867 {
1868         if (!test_kvm_facility(vcpu->kvm, 76))
1869                 return;
1870
1871         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1872
1873         if (vcpu->kvm->arch.crypto.aes_kw)
1874                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1875         if (vcpu->kvm->arch.crypto.dea_kw)
1876                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1877
1878         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1879 }
1880
1881 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1882 {
1883         free_page(vcpu->arch.sie_block->cbrlo);
1884         vcpu->arch.sie_block->cbrlo = 0;
1885 }
1886
1887 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1888 {
1889         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1890         if (!vcpu->arch.sie_block->cbrlo)
1891                 return -ENOMEM;
1892
1893         vcpu->arch.sie_block->ecb2 |= 0x80;
1894         vcpu->arch.sie_block->ecb2 &= ~0x08;
1895         return 0;
1896 }
1897
1898 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1899 {
1900         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1901
1902         vcpu->arch.sie_block->ibc = model->ibc;
1903         if (test_kvm_facility(vcpu->kvm, 7))
1904                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1905 }
1906
1907 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1908 {
1909         int rc = 0;
1910
1911         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1912                                                     CPUSTAT_SM |
1913                                                     CPUSTAT_STOPPED);
1914
1915         if (test_kvm_facility(vcpu->kvm, 78))
1916                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1917         else if (test_kvm_facility(vcpu->kvm, 8))
1918                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1919
1920         kvm_s390_vcpu_setup_model(vcpu);
1921
1922         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1923         if (MACHINE_HAS_ESOP)
1924                 vcpu->arch.sie_block->ecb |= 0x02;
1925         if (test_kvm_facility(vcpu->kvm, 9))
1926                 vcpu->arch.sie_block->ecb |= 0x04;
1927         if (test_kvm_facility(vcpu->kvm, 73))
1928                 vcpu->arch.sie_block->ecb |= 0x10;
1929
1930         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1931                 vcpu->arch.sie_block->ecb2 |= 0x08;
1932         vcpu->arch.sie_block->eca = 0x1002000U;
1933         if (sclp.has_cei)
1934                 vcpu->arch.sie_block->eca |= 0x80000000U;
1935         if (sclp.has_ib)
1936                 vcpu->arch.sie_block->eca |= 0x40000000U;
1937         if (sclp.has_siif)
1938                 vcpu->arch.sie_block->eca |= 1;
1939         if (sclp.has_sigpif)
1940                 vcpu->arch.sie_block->eca |= 0x10000000U;
1941         if (test_kvm_facility(vcpu->kvm, 64))
1942                 vcpu->arch.sie_block->ecb3 |= 0x01;
1943         if (test_kvm_facility(vcpu->kvm, 129)) {
1944                 vcpu->arch.sie_block->eca |= 0x00020000;
1945                 vcpu->arch.sie_block->ecd |= 0x20000000;
1946         }
1947         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1948         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1949
1950         if (vcpu->kvm->arch.use_cmma) {
1951                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1952                 if (rc)
1953                         return rc;
1954         }
1955         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1956         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1957
1958         kvm_s390_vcpu_crypto_setup(vcpu);
1959
1960         return rc;
1961 }
1962
1963 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1964                                       unsigned int id)
1965 {
1966         struct kvm_vcpu *vcpu;
1967         struct sie_page *sie_page;
1968         int rc = -EINVAL;
1969
1970         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1971                 goto out;
1972
1973         rc = -ENOMEM;
1974
1975         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1976         if (!vcpu)
1977                 goto out;
1978
1979         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1980         if (!sie_page)
1981                 goto out_free_cpu;
1982
1983         vcpu->arch.sie_block = &sie_page->sie_block;
1984         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1985
1986         /* the real guest size will always be smaller than msl */
1987         vcpu->arch.sie_block->mso = 0;
1988         vcpu->arch.sie_block->msl = sclp.hamax;
1989
1990         vcpu->arch.sie_block->icpua = id;
1991         spin_lock_init(&vcpu->arch.local_int.lock);
1992         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1993         vcpu->arch.local_int.wq = &vcpu->wq;
1994         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1995         seqcount_init(&vcpu->arch.cputm_seqcount);
1996
1997         rc = kvm_vcpu_init(vcpu, kvm, id);
1998         if (rc)
1999                 goto out_free_sie_block;
2000         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2001                  vcpu->arch.sie_block);
2002         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2003
2004         return vcpu;
2005 out_free_sie_block:
2006         free_page((unsigned long)(vcpu->arch.sie_block));
2007 out_free_cpu:
2008         kmem_cache_free(kvm_vcpu_cache, vcpu);
2009 out:
2010         return ERR_PTR(rc);
2011 }
2012
2013 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2014 {
2015         return kvm_s390_vcpu_has_irq(vcpu, 0);
2016 }
2017
2018 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2019 {
2020         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2021         exit_sie(vcpu);
2022 }
2023
2024 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2025 {
2026         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2027 }
2028
2029 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2030 {
2031         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2032         exit_sie(vcpu);
2033 }
2034
2035 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2036 {
2037         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2038 }
2039
2040 /*
2041  * Kick a guest cpu out of SIE and wait until SIE is not running.
2042  * If the CPU is not running (e.g. waiting as idle) the function will
2043  * return immediately. */
2044 void exit_sie(struct kvm_vcpu *vcpu)
2045 {
2046         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2047         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2048                 cpu_relax();
2049 }
2050
2051 /* Kick a guest cpu out of SIE to process a request synchronously */
2052 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2053 {
2054         kvm_make_request(req, vcpu);
2055         kvm_s390_vcpu_request(vcpu);
2056 }
2057
2058 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2059                               unsigned long end)
2060 {
2061         struct kvm *kvm = gmap->private;
2062         struct kvm_vcpu *vcpu;
2063         unsigned long prefix;
2064         int i;
2065
2066         if (gmap_is_shadow(gmap))
2067                 return;
2068         if (start >= 1UL << 31)
2069                 /* We are only interested in prefix pages */
2070                 return;
2071         kvm_for_each_vcpu(i, vcpu, kvm) {
2072                 /* match against both prefix pages */
2073                 prefix = kvm_s390_get_prefix(vcpu);
2074                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2075                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2076                                    start, end);
2077                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2078                 }
2079         }
2080 }
2081
2082 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2083 {
2084         /* kvm common code refers to this, but never calls it */
2085         BUG();
2086         return 0;
2087 }
2088
2089 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2090                                            struct kvm_one_reg *reg)
2091 {
2092         int r = -EINVAL;
2093
2094         switch (reg->id) {
2095         case KVM_REG_S390_TODPR:
2096                 r = put_user(vcpu->arch.sie_block->todpr,
2097                              (u32 __user *)reg->addr);
2098                 break;
2099         case KVM_REG_S390_EPOCHDIFF:
2100                 r = put_user(vcpu->arch.sie_block->epoch,
2101                              (u64 __user *)reg->addr);
2102                 break;
2103         case KVM_REG_S390_CPU_TIMER:
2104                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2105                              (u64 __user *)reg->addr);
2106                 break;
2107         case KVM_REG_S390_CLOCK_COMP:
2108                 r = put_user(vcpu->arch.sie_block->ckc,
2109                              (u64 __user *)reg->addr);
2110                 break;
2111         case KVM_REG_S390_PFTOKEN:
2112                 r = put_user(vcpu->arch.pfault_token,
2113                              (u64 __user *)reg->addr);
2114                 break;
2115         case KVM_REG_S390_PFCOMPARE:
2116                 r = put_user(vcpu->arch.pfault_compare,
2117                              (u64 __user *)reg->addr);
2118                 break;
2119         case KVM_REG_S390_PFSELECT:
2120                 r = put_user(vcpu->arch.pfault_select,
2121                              (u64 __user *)reg->addr);
2122                 break;
2123         case KVM_REG_S390_PP:
2124                 r = put_user(vcpu->arch.sie_block->pp,
2125                              (u64 __user *)reg->addr);
2126                 break;
2127         case KVM_REG_S390_GBEA:
2128                 r = put_user(vcpu->arch.sie_block->gbea,
2129                              (u64 __user *)reg->addr);
2130                 break;
2131         default:
2132                 break;
2133         }
2134
2135         return r;
2136 }
2137
2138 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2139                                            struct kvm_one_reg *reg)
2140 {
2141         int r = -EINVAL;
2142         __u64 val;
2143
2144         switch (reg->id) {
2145         case KVM_REG_S390_TODPR:
2146                 r = get_user(vcpu->arch.sie_block->todpr,
2147                              (u32 __user *)reg->addr);
2148                 break;
2149         case KVM_REG_S390_EPOCHDIFF:
2150                 r = get_user(vcpu->arch.sie_block->epoch,
2151                              (u64 __user *)reg->addr);
2152                 break;
2153         case KVM_REG_S390_CPU_TIMER:
2154                 r = get_user(val, (u64 __user *)reg->addr);
2155                 if (!r)
2156                         kvm_s390_set_cpu_timer(vcpu, val);
2157                 break;
2158         case KVM_REG_S390_CLOCK_COMP:
2159                 r = get_user(vcpu->arch.sie_block->ckc,
2160                              (u64 __user *)reg->addr);
2161                 break;
2162         case KVM_REG_S390_PFTOKEN:
2163                 r = get_user(vcpu->arch.pfault_token,
2164                              (u64 __user *)reg->addr);
2165                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2166                         kvm_clear_async_pf_completion_queue(vcpu);
2167                 break;
2168         case KVM_REG_S390_PFCOMPARE:
2169                 r = get_user(vcpu->arch.pfault_compare,
2170                              (u64 __user *)reg->addr);
2171                 break;
2172         case KVM_REG_S390_PFSELECT:
2173                 r = get_user(vcpu->arch.pfault_select,
2174                              (u64 __user *)reg->addr);
2175                 break;
2176         case KVM_REG_S390_PP:
2177                 r = get_user(vcpu->arch.sie_block->pp,
2178                              (u64 __user *)reg->addr);
2179                 break;
2180         case KVM_REG_S390_GBEA:
2181                 r = get_user(vcpu->arch.sie_block->gbea,
2182                              (u64 __user *)reg->addr);
2183                 break;
2184         default:
2185                 break;
2186         }
2187
2188         return r;
2189 }
2190
2191 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2192 {
2193         kvm_s390_vcpu_initial_reset(vcpu);
2194         return 0;
2195 }
2196
2197 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2198 {
2199         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2200         return 0;
2201 }
2202
2203 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2204 {
2205         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2206         return 0;
2207 }
2208
2209 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2210                                   struct kvm_sregs *sregs)
2211 {
2212         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2213         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2214         restore_access_regs(vcpu->run->s.regs.acrs);
2215         return 0;
2216 }
2217
2218 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2219                                   struct kvm_sregs *sregs)
2220 {
2221         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2222         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2223         return 0;
2224 }
2225
2226 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2227 {
2228         /* make sure the new values will be lazily loaded */
2229         save_fpu_regs();
2230         if (test_fp_ctl(fpu->fpc))
2231                 return -EINVAL;
2232         current->thread.fpu.fpc = fpu->fpc;
2233         if (MACHINE_HAS_VX)
2234                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2235                                  (freg_t *) fpu->fprs);
2236         else
2237                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2238         return 0;
2239 }
2240
2241 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2242 {
2243         /* make sure we have the latest values */
2244         save_fpu_regs();
2245         if (MACHINE_HAS_VX)
2246                 convert_vx_to_fp((freg_t *) fpu->fprs,
2247                                  (__vector128 *) vcpu->run->s.regs.vrs);
2248         else
2249                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2250         fpu->fpc = current->thread.fpu.fpc;
2251         return 0;
2252 }
2253
2254 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2255 {
2256         int rc = 0;
2257
2258         if (!is_vcpu_stopped(vcpu))
2259                 rc = -EBUSY;
2260         else {
2261                 vcpu->run->psw_mask = psw.mask;
2262                 vcpu->run->psw_addr = psw.addr;
2263         }
2264         return rc;
2265 }
2266
2267 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2268                                   struct kvm_translation *tr)
2269 {
2270         return -EINVAL; /* not implemented yet */
2271 }
2272
2273 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2274                               KVM_GUESTDBG_USE_HW_BP | \
2275                               KVM_GUESTDBG_ENABLE)
2276
2277 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2278                                         struct kvm_guest_debug *dbg)
2279 {
2280         int rc = 0;
2281
2282         vcpu->guest_debug = 0;
2283         kvm_s390_clear_bp_data(vcpu);
2284
2285         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2286                 return -EINVAL;
2287         if (!sclp.has_gpere)
2288                 return -EINVAL;
2289
2290         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2291                 vcpu->guest_debug = dbg->control;
2292                 /* enforce guest PER */
2293                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2294
2295                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2296                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2297         } else {
2298                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2299                 vcpu->arch.guestdbg.last_bp = 0;
2300         }
2301
2302         if (rc) {
2303                 vcpu->guest_debug = 0;
2304                 kvm_s390_clear_bp_data(vcpu);
2305                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2306         }
2307
2308         return rc;
2309 }
2310
2311 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2312                                     struct kvm_mp_state *mp_state)
2313 {
2314         /* CHECK_STOP and LOAD are not supported yet */
2315         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2316                                        KVM_MP_STATE_OPERATING;
2317 }
2318
2319 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2320                                     struct kvm_mp_state *mp_state)
2321 {
2322         int rc = 0;
2323
2324         /* user space knows about this interface - let it control the state */
2325         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2326
2327         switch (mp_state->mp_state) {
2328         case KVM_MP_STATE_STOPPED:
2329                 kvm_s390_vcpu_stop(vcpu);
2330                 break;
2331         case KVM_MP_STATE_OPERATING:
2332                 kvm_s390_vcpu_start(vcpu);
2333                 break;
2334         case KVM_MP_STATE_LOAD:
2335         case KVM_MP_STATE_CHECK_STOP:
2336                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2337         default:
2338                 rc = -ENXIO;
2339         }
2340
2341         return rc;
2342 }
2343
2344 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2345 {
2346         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2347 }
2348
2349 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2350 {
2351 retry:
2352         kvm_s390_vcpu_request_handled(vcpu);
2353         if (!vcpu->requests)
2354                 return 0;
2355         /*
2356          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2357          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2358          * This ensures that the ipte instruction for this request has
2359          * already finished. We might race against a second unmapper that
2360          * wants to set the blocking bit. Lets just retry the request loop.
2361          */
2362         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2363                 int rc;
2364                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2365                                           kvm_s390_get_prefix(vcpu),
2366                                           PAGE_SIZE * 2, PROT_WRITE);
2367                 if (rc) {
2368                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2369                         return rc;
2370                 }
2371                 goto retry;
2372         }
2373
2374         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2375                 vcpu->arch.sie_block->ihcpu = 0xffff;
2376                 goto retry;
2377         }
2378
2379         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2380                 if (!ibs_enabled(vcpu)) {
2381                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2382                         atomic_or(CPUSTAT_IBS,
2383                                         &vcpu->arch.sie_block->cpuflags);
2384                 }
2385                 goto retry;
2386         }
2387
2388         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2389                 if (ibs_enabled(vcpu)) {
2390                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2391                         atomic_andnot(CPUSTAT_IBS,
2392                                           &vcpu->arch.sie_block->cpuflags);
2393                 }
2394                 goto retry;
2395         }
2396
2397         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2398                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2399                 goto retry;
2400         }
2401
2402         /* nothing to do, just clear the request */
2403         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2404
2405         return 0;
2406 }
2407
2408 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2409 {
2410         struct kvm_vcpu *vcpu;
2411         int i;
2412
2413         mutex_lock(&kvm->lock);
2414         preempt_disable();
2415         kvm->arch.epoch = tod - get_tod_clock();
2416         kvm_s390_vcpu_block_all(kvm);
2417         kvm_for_each_vcpu(i, vcpu, kvm)
2418                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2419         kvm_s390_vcpu_unblock_all(kvm);
2420         preempt_enable();
2421         mutex_unlock(&kvm->lock);
2422 }
2423
2424 /**
2425  * kvm_arch_fault_in_page - fault-in guest page if necessary
2426  * @vcpu: The corresponding virtual cpu
2427  * @gpa: Guest physical address
2428  * @writable: Whether the page should be writable or not
2429  *
2430  * Make sure that a guest page has been faulted-in on the host.
2431  *
2432  * Return: Zero on success, negative error code otherwise.
2433  */
2434 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2435 {
2436         return gmap_fault(vcpu->arch.gmap, gpa,
2437                           writable ? FAULT_FLAG_WRITE : 0);
2438 }
2439
2440 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2441                                       unsigned long token)
2442 {
2443         struct kvm_s390_interrupt inti;
2444         struct kvm_s390_irq irq;
2445
2446         if (start_token) {
2447                 irq.u.ext.ext_params2 = token;
2448                 irq.type = KVM_S390_INT_PFAULT_INIT;
2449                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2450         } else {
2451                 inti.type = KVM_S390_INT_PFAULT_DONE;
2452                 inti.parm64 = token;
2453                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2454         }
2455 }
2456
2457 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2458                                      struct kvm_async_pf *work)
2459 {
2460         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2461         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2462 }
2463
2464 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2465                                  struct kvm_async_pf *work)
2466 {
2467         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2468         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2469 }
2470
2471 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2472                                struct kvm_async_pf *work)
2473 {
2474         /* s390 will always inject the page directly */
2475 }
2476
2477 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2478 {
2479         /*
2480          * s390 will always inject the page directly,
2481          * but we still want check_async_completion to cleanup
2482          */
2483         return true;
2484 }
2485
2486 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2487 {
2488         hva_t hva;
2489         struct kvm_arch_async_pf arch;
2490         int rc;
2491
2492         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2493                 return 0;
2494         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2495             vcpu->arch.pfault_compare)
2496                 return 0;
2497         if (psw_extint_disabled(vcpu))
2498                 return 0;
2499         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2500                 return 0;
2501         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2502                 return 0;
2503         if (!vcpu->arch.gmap->pfault_enabled)
2504                 return 0;
2505
2506         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2507         hva += current->thread.gmap_addr & ~PAGE_MASK;
2508         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2509                 return 0;
2510
2511         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2512         return rc;
2513 }
2514
2515 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2516 {
2517         int rc, cpuflags;
2518
2519         /*
2520          * On s390 notifications for arriving pages will be delivered directly
2521          * to the guest but the house keeping for completed pfaults is
2522          * handled outside the worker.
2523          */
2524         kvm_check_async_pf_completion(vcpu);
2525
2526         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2527         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2528
2529         if (need_resched())
2530                 schedule();
2531
2532         if (test_cpu_flag(CIF_MCCK_PENDING))
2533                 s390_handle_mcck();
2534
2535         if (!kvm_is_ucontrol(vcpu->kvm)) {
2536                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2537                 if (rc)
2538                         return rc;
2539         }
2540
2541         rc = kvm_s390_handle_requests(vcpu);
2542         if (rc)
2543                 return rc;
2544
2545         if (guestdbg_enabled(vcpu)) {
2546                 kvm_s390_backup_guest_per_regs(vcpu);
2547                 kvm_s390_patch_guest_per_regs(vcpu);
2548         }
2549
2550         vcpu->arch.sie_block->icptcode = 0;
2551         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2552         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2553         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2554
2555         return 0;
2556 }
2557
2558 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2559 {
2560         struct kvm_s390_pgm_info pgm_info = {
2561                 .code = PGM_ADDRESSING,
2562         };
2563         u8 opcode, ilen;
2564         int rc;
2565
2566         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2567         trace_kvm_s390_sie_fault(vcpu);
2568
2569         /*
2570          * We want to inject an addressing exception, which is defined as a
2571          * suppressing or terminating exception. However, since we came here
2572          * by a DAT access exception, the PSW still points to the faulting
2573          * instruction since DAT exceptions are nullifying. So we've got
2574          * to look up the current opcode to get the length of the instruction
2575          * to be able to forward the PSW.
2576          */
2577         rc = read_guest_instr(vcpu, &opcode, 1);
2578         ilen = insn_length(opcode);
2579         if (rc < 0) {
2580                 return rc;
2581         } else if (rc) {
2582                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2583                  * Forward by arbitrary ilc, injection will take care of
2584                  * nullification if necessary.
2585                  */
2586                 pgm_info = vcpu->arch.pgm;
2587                 ilen = 4;
2588         }
2589         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2590         kvm_s390_forward_psw(vcpu, ilen);
2591         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2592 }
2593
2594 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2595 {
2596         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2597                    vcpu->arch.sie_block->icptcode);
2598         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2599
2600         if (guestdbg_enabled(vcpu))
2601                 kvm_s390_restore_guest_per_regs(vcpu);
2602
2603         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2604         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2605
2606         if (vcpu->arch.sie_block->icptcode > 0) {
2607                 int rc = kvm_handle_sie_intercept(vcpu);
2608
2609                 if (rc != -EOPNOTSUPP)
2610                         return rc;
2611                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2612                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2613                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2614                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2615                 return -EREMOTE;
2616         } else if (exit_reason != -EFAULT) {
2617                 vcpu->stat.exit_null++;
2618                 return 0;
2619         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2620                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2621                 vcpu->run->s390_ucontrol.trans_exc_code =
2622                                                 current->thread.gmap_addr;
2623                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2624                 return -EREMOTE;
2625         } else if (current->thread.gmap_pfault) {
2626                 trace_kvm_s390_major_guest_pfault(vcpu);
2627                 current->thread.gmap_pfault = 0;
2628                 if (kvm_arch_setup_async_pf(vcpu))
2629                         return 0;
2630                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2631         }
2632         return vcpu_post_run_fault_in_sie(vcpu);
2633 }
2634
2635 static int __vcpu_run(struct kvm_vcpu *vcpu)
2636 {
2637         int rc, exit_reason;
2638
2639         /*
2640          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2641          * ning the guest), so that memslots (and other stuff) are protected
2642          */
2643         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2644
2645         do {
2646                 rc = vcpu_pre_run(vcpu);
2647                 if (rc)
2648                         break;
2649
2650                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2651                 /*
2652                  * As PF_VCPU will be used in fault handler, between
2653                  * guest_enter and guest_exit should be no uaccess.
2654                  */
2655                 local_irq_disable();
2656                 guest_enter_irqoff();
2657                 __disable_cpu_timer_accounting(vcpu);
2658                 local_irq_enable();
2659                 exit_reason = sie64a(vcpu->arch.sie_block,
2660                                      vcpu->run->s.regs.gprs);
2661                 local_irq_disable();
2662                 __enable_cpu_timer_accounting(vcpu);
2663                 guest_exit_irqoff();
2664                 local_irq_enable();
2665                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2666
2667                 rc = vcpu_post_run(vcpu, exit_reason);
2668         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2669
2670         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2671         return rc;
2672 }
2673
2674 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2675 {
2676         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2677         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2678         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2679                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2680         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2681                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2682                 /* some control register changes require a tlb flush */
2683                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2684         }
2685         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2686                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2687                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2688                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2689                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2690                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2691         }
2692         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2693                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2694                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2695                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2696                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2697                         kvm_clear_async_pf_completion_queue(vcpu);
2698         }
2699         kvm_run->kvm_dirty_regs = 0;
2700 }
2701
2702 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2703 {
2704         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2705         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2706         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2707         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2708         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2709         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2710         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2711         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2712         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2713         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2714         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2715         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2716 }
2717
2718 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2719 {
2720         int rc;
2721         sigset_t sigsaved;
2722
2723         if (guestdbg_exit_pending(vcpu)) {
2724                 kvm_s390_prepare_debug_exit(vcpu);
2725                 return 0;
2726         }
2727
2728         if (vcpu->sigset_active)
2729                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2730
2731         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2732                 kvm_s390_vcpu_start(vcpu);
2733         } else if (is_vcpu_stopped(vcpu)) {
2734                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2735                                    vcpu->vcpu_id);
2736                 return -EINVAL;
2737         }
2738
2739         sync_regs(vcpu, kvm_run);
2740         enable_cpu_timer_accounting(vcpu);
2741
2742         might_fault();
2743         rc = __vcpu_run(vcpu);
2744
2745         if (signal_pending(current) && !rc) {
2746                 kvm_run->exit_reason = KVM_EXIT_INTR;
2747                 rc = -EINTR;
2748         }
2749
2750         if (guestdbg_exit_pending(vcpu) && !rc)  {
2751                 kvm_s390_prepare_debug_exit(vcpu);
2752                 rc = 0;
2753         }
2754
2755         if (rc == -EREMOTE) {
2756                 /* userspace support is needed, kvm_run has been prepared */
2757                 rc = 0;
2758         }
2759
2760         disable_cpu_timer_accounting(vcpu);
2761         store_regs(vcpu, kvm_run);
2762
2763         if (vcpu->sigset_active)
2764                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2765
2766         vcpu->stat.exit_userspace++;
2767         return rc;
2768 }
2769
2770 /*
2771  * store status at address
2772  * we use have two special cases:
2773  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2774  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2775  */
2776 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2777 {
2778         unsigned char archmode = 1;
2779         freg_t fprs[NUM_FPRS];
2780         unsigned int px;
2781         u64 clkcomp, cputm;
2782         int rc;
2783
2784         px = kvm_s390_get_prefix(vcpu);
2785         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2786                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2787                         return -EFAULT;
2788                 gpa = 0;
2789         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2790                 if (write_guest_real(vcpu, 163, &archmode, 1))
2791                         return -EFAULT;
2792                 gpa = px;
2793         } else
2794                 gpa -= __LC_FPREGS_SAVE_AREA;
2795
2796         /* manually convert vector registers if necessary */
2797         if (MACHINE_HAS_VX) {
2798                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2799                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2800                                      fprs, 128);
2801         } else {
2802                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2803                                      vcpu->run->s.regs.fprs, 128);
2804         }
2805         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2806                               vcpu->run->s.regs.gprs, 128);
2807         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2808                               &vcpu->arch.sie_block->gpsw, 16);
2809         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2810                               &px, 4);
2811         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2812                               &vcpu->run->s.regs.fpc, 4);
2813         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2814                               &vcpu->arch.sie_block->todpr, 4);
2815         cputm = kvm_s390_get_cpu_timer(vcpu);
2816         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2817                               &cputm, 8);
2818         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2819         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2820                               &clkcomp, 8);
2821         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2822                               &vcpu->run->s.regs.acrs, 64);
2823         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2824                               &vcpu->arch.sie_block->gcr, 128);
2825         return rc ? -EFAULT : 0;
2826 }
2827
2828 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2829 {
2830         /*
2831          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2832          * copying in vcpu load/put. Lets update our copies before we save
2833          * it into the save area
2834          */
2835         save_fpu_regs();
2836         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2837         save_access_regs(vcpu->run->s.regs.acrs);
2838
2839         return kvm_s390_store_status_unloaded(vcpu, addr);
2840 }
2841
2842 /*
2843  * store additional status at address
2844  */
2845 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2846                                         unsigned long gpa)
2847 {
2848         /* Only bits 0-53 are used for address formation */
2849         if (!(gpa & ~0x3ff))
2850                 return 0;
2851
2852         return write_guest_abs(vcpu, gpa & ~0x3ff,
2853                                (void *)&vcpu->run->s.regs.vrs, 512);
2854 }
2855
2856 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2857 {
2858         if (!test_kvm_facility(vcpu->kvm, 129))
2859                 return 0;
2860
2861         /*
2862          * The guest VXRS are in the host VXRs due to the lazy
2863          * copying in vcpu load/put. We can simply call save_fpu_regs()
2864          * to save the current register state because we are in the
2865          * middle of a load/put cycle.
2866          *
2867          * Let's update our copies before we save it into the save area.
2868          */
2869         save_fpu_regs();
2870
2871         return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2872 }
2873
2874 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2875 {
2876         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2877         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2878 }
2879
2880 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2881 {
2882         unsigned int i;
2883         struct kvm_vcpu *vcpu;
2884
2885         kvm_for_each_vcpu(i, vcpu, kvm) {
2886                 __disable_ibs_on_vcpu(vcpu);
2887         }
2888 }
2889
2890 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2891 {
2892         if (!sclp.has_ibs)
2893                 return;
2894         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2895         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2896 }
2897
2898 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2899 {
2900         int i, online_vcpus, started_vcpus = 0;
2901
2902         if (!is_vcpu_stopped(vcpu))
2903                 return;
2904
2905         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2906         /* Only one cpu at a time may enter/leave the STOPPED state. */
2907         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2908         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2909
2910         for (i = 0; i < online_vcpus; i++) {
2911                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2912                         started_vcpus++;
2913         }
2914
2915         if (started_vcpus == 0) {
2916                 /* we're the only active VCPU -> speed it up */
2917                 __enable_ibs_on_vcpu(vcpu);
2918         } else if (started_vcpus == 1) {
2919                 /*
2920                  * As we are starting a second VCPU, we have to disable
2921                  * the IBS facility on all VCPUs to remove potentially
2922                  * oustanding ENABLE requests.
2923                  */
2924                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2925         }
2926
2927         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2928         /*
2929          * Another VCPU might have used IBS while we were offline.
2930          * Let's play safe and flush the VCPU at startup.
2931          */
2932         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2933         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2934         return;
2935 }
2936
2937 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2938 {
2939         int i, online_vcpus, started_vcpus = 0;
2940         struct kvm_vcpu *started_vcpu = NULL;
2941
2942         if (is_vcpu_stopped(vcpu))
2943                 return;
2944
2945         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2946         /* Only one cpu at a time may enter/leave the STOPPED state. */
2947         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2948         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2949
2950         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2951         kvm_s390_clear_stop_irq(vcpu);
2952
2953         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2954         __disable_ibs_on_vcpu(vcpu);
2955
2956         for (i = 0; i < online_vcpus; i++) {
2957                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2958                         started_vcpus++;
2959                         started_vcpu = vcpu->kvm->vcpus[i];
2960                 }
2961         }
2962
2963         if (started_vcpus == 1) {
2964                 /*
2965                  * As we only have one VCPU left, we want to enable the
2966                  * IBS facility for that VCPU to speed it up.
2967                  */
2968                 __enable_ibs_on_vcpu(started_vcpu);
2969         }
2970
2971         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2972         return;
2973 }
2974
2975 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2976                                      struct kvm_enable_cap *cap)
2977 {
2978         int r;
2979
2980         if (cap->flags)
2981                 return -EINVAL;
2982
2983         switch (cap->cap) {
2984         case KVM_CAP_S390_CSS_SUPPORT:
2985                 if (!vcpu->kvm->arch.css_support) {
2986                         vcpu->kvm->arch.css_support = 1;
2987                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2988                         trace_kvm_s390_enable_css(vcpu->kvm);
2989                 }
2990                 r = 0;
2991                 break;
2992         default:
2993                 r = -EINVAL;
2994                 break;
2995         }
2996         return r;
2997 }
2998
2999 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3000                                   struct kvm_s390_mem_op *mop)
3001 {
3002         void __user *uaddr = (void __user *)mop->buf;
3003         void *tmpbuf = NULL;
3004         int r, srcu_idx;
3005         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3006                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3007
3008         if (mop->flags & ~supported_flags)
3009                 return -EINVAL;
3010
3011         if (mop->size > MEM_OP_MAX_SIZE)
3012                 return -E2BIG;
3013
3014         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3015                 tmpbuf = vmalloc(mop->size);
3016                 if (!tmpbuf)
3017                         return -ENOMEM;
3018         }
3019
3020         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3021
3022         switch (mop->op) {
3023         case KVM_S390_MEMOP_LOGICAL_READ:
3024                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3025                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3026                                             mop->size, GACC_FETCH);
3027                         break;
3028                 }
3029                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3030                 if (r == 0) {
3031                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3032                                 r = -EFAULT;
3033                 }
3034                 break;
3035         case KVM_S390_MEMOP_LOGICAL_WRITE:
3036                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3037                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3038                                             mop->size, GACC_STORE);
3039                         break;
3040                 }
3041                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3042                         r = -EFAULT;
3043                         break;
3044                 }
3045                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3046                 break;
3047         default:
3048                 r = -EINVAL;
3049         }
3050
3051         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3052
3053         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3054                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3055
3056         vfree(tmpbuf);
3057         return r;
3058 }
3059
3060 long kvm_arch_vcpu_ioctl(struct file *filp,
3061                          unsigned int ioctl, unsigned long arg)
3062 {
3063         struct kvm_vcpu *vcpu = filp->private_data;
3064         void __user *argp = (void __user *)arg;
3065         int idx;
3066         long r;
3067
3068         switch (ioctl) {
3069         case KVM_S390_IRQ: {
3070                 struct kvm_s390_irq s390irq;
3071
3072                 r = -EFAULT;
3073                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3074                         break;
3075                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3076                 break;
3077         }
3078         case KVM_S390_INTERRUPT: {
3079                 struct kvm_s390_interrupt s390int;
3080                 struct kvm_s390_irq s390irq;
3081
3082                 r = -EFAULT;
3083                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3084                         break;
3085                 if (s390int_to_s390irq(&s390int, &s390irq))
3086                         return -EINVAL;
3087                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3088                 break;
3089         }
3090         case KVM_S390_STORE_STATUS:
3091                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3092                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3093                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3094                 break;
3095         case KVM_S390_SET_INITIAL_PSW: {
3096                 psw_t psw;
3097
3098                 r = -EFAULT;
3099                 if (copy_from_user(&psw, argp, sizeof(psw)))
3100                         break;
3101                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3102                 break;
3103         }
3104         case KVM_S390_INITIAL_RESET:
3105                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3106                 break;
3107         case KVM_SET_ONE_REG:
3108         case KVM_GET_ONE_REG: {
3109                 struct kvm_one_reg reg;
3110                 r = -EFAULT;
3111                 if (copy_from_user(&reg, argp, sizeof(reg)))
3112                         break;
3113                 if (ioctl == KVM_SET_ONE_REG)
3114                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3115                 else
3116                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3117                 break;
3118         }
3119 #ifdef CONFIG_KVM_S390_UCONTROL
3120         case KVM_S390_UCAS_MAP: {
3121                 struct kvm_s390_ucas_mapping ucasmap;
3122
3123                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3124                         r = -EFAULT;
3125                         break;
3126                 }
3127
3128                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3129                         r = -EINVAL;
3130                         break;
3131                 }
3132
3133                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3134                                      ucasmap.vcpu_addr, ucasmap.length);
3135                 break;
3136         }
3137         case KVM_S390_UCAS_UNMAP: {
3138                 struct kvm_s390_ucas_mapping ucasmap;
3139
3140                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3141                         r = -EFAULT;
3142                         break;
3143                 }
3144
3145                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3146                         r = -EINVAL;
3147                         break;
3148                 }
3149
3150                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3151                         ucasmap.length);
3152                 break;
3153         }
3154 #endif
3155         case KVM_S390_VCPU_FAULT: {
3156                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3157                 break;
3158         }
3159         case KVM_ENABLE_CAP:
3160         {
3161                 struct kvm_enable_cap cap;
3162                 r = -EFAULT;
3163                 if (copy_from_user(&cap, argp, sizeof(cap)))
3164                         break;
3165                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3166                 break;
3167         }
3168         case KVM_S390_MEM_OP: {
3169                 struct kvm_s390_mem_op mem_op;
3170
3171                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3172                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3173                 else
3174                         r = -EFAULT;
3175                 break;
3176         }
3177         case KVM_S390_SET_IRQ_STATE: {
3178                 struct kvm_s390_irq_state irq_state;
3179
3180                 r = -EFAULT;
3181                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3182                         break;
3183                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3184                     irq_state.len == 0 ||
3185                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3186                         r = -EINVAL;
3187                         break;
3188                 }
3189                 r = kvm_s390_set_irq_state(vcpu,
3190                                            (void __user *) irq_state.buf,
3191                                            irq_state.len);
3192                 break;
3193         }
3194         case KVM_S390_GET_IRQ_STATE: {
3195                 struct kvm_s390_irq_state irq_state;
3196
3197                 r = -EFAULT;
3198                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3199                         break;
3200                 if (irq_state.len == 0) {
3201                         r = -EINVAL;
3202                         break;
3203                 }
3204                 r = kvm_s390_get_irq_state(vcpu,
3205                                            (__u8 __user *)  irq_state.buf,
3206                                            irq_state.len);
3207                 break;
3208         }
3209         default:
3210                 r = -ENOTTY;
3211         }
3212         return r;
3213 }
3214
3215 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3216 {
3217 #ifdef CONFIG_KVM_S390_UCONTROL
3218         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3219                  && (kvm_is_ucontrol(vcpu->kvm))) {
3220                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3221                 get_page(vmf->page);
3222                 return 0;
3223         }
3224 #endif
3225         return VM_FAULT_SIGBUS;
3226 }
3227
3228 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3229                             unsigned long npages)
3230 {
3231         return 0;
3232 }
3233
3234 /* Section: memory related */
3235 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3236                                    struct kvm_memory_slot *memslot,
3237                                    const struct kvm_userspace_memory_region *mem,
3238                                    enum kvm_mr_change change)
3239 {
3240         /* A few sanity checks. We can have memory slots which have to be
3241            located/ended at a segment boundary (1MB). The memory in userland is
3242            ok to be fragmented into various different vmas. It is okay to mmap()
3243            and munmap() stuff in this slot after doing this call at any time */
3244
3245         if (mem->userspace_addr & 0xffffful)
3246                 return -EINVAL;
3247
3248         if (mem->memory_size & 0xffffful)
3249                 return -EINVAL;
3250
3251         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3252                 return -EINVAL;
3253
3254         return 0;
3255 }
3256
3257 void kvm_arch_commit_memory_region(struct kvm *kvm,
3258                                 const struct kvm_userspace_memory_region *mem,
3259                                 const struct kvm_memory_slot *old,
3260                                 const struct kvm_memory_slot *new,
3261                                 enum kvm_mr_change change)
3262 {
3263         int rc;
3264
3265         /* If the basics of the memslot do not change, we do not want
3266          * to update the gmap. Every update causes several unnecessary
3267          * segment translation exceptions. This is usually handled just
3268          * fine by the normal fault handler + gmap, but it will also
3269          * cause faults on the prefix page of running guest CPUs.
3270          */
3271         if (old->userspace_addr == mem->userspace_addr &&
3272             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3273             old->npages * PAGE_SIZE == mem->memory_size)
3274                 return;
3275
3276         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3277                 mem->guest_phys_addr, mem->memory_size);
3278         if (rc)
3279                 pr_warn("failed to commit memory region\n");
3280         return;
3281 }
3282
3283 static inline unsigned long nonhyp_mask(int i)
3284 {
3285         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3286
3287         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3288 }
3289
3290 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3291 {
3292         vcpu->valid_wakeup = false;
3293 }
3294
3295 static int __init kvm_s390_init(void)
3296 {
3297         int i;
3298
3299         if (!sclp.has_sief2) {
3300                 pr_info("SIE not available\n");
3301                 return -ENODEV;
3302         }
3303
3304         for (i = 0; i < 16; i++)
3305                 kvm_s390_fac_list_mask[i] |=
3306                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3307
3308         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3309 }
3310
3311 static void __exit kvm_s390_exit(void)
3312 {
3313         kvm_exit();
3314 }
3315
3316 module_init(kvm_s390_init);
3317 module_exit(kvm_s390_exit);
3318
3319 /*
3320  * Enable autoloading of the kvm module.
3321  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3322  * since x86 takes a different approach.
3323  */
3324 #include <linux/miscdevice.h>
3325 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3326 MODULE_ALIAS("devname:kvm");