arch/s390/kvm/kvm-s390.c

   1 /*
   2  * hosting zSeries kernel virtual machines
   3  *
   4  * Copyright IBM Corp. 2008, 2009
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License (version 2 only)
   8  * as published by the Free Software Foundation.
   9  *
  10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  11  *               Christian Borntraeger <borntraeger@de.ibm.com>
  12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  14  *               Jason J. Herne <jjherne@us.ibm.com>
  15  */
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/mman.h>
  25 #include <linux/module.h>
  26 #include <linux/random.h>
  27 #include <linux/slab.h>
  28 #include <linux/timer.h>
  29 #include <linux/vmalloc.h>
  30 #include <linux/bitmap.h>
  31 #include <asm/asm-offsets.h>
  32 #include <asm/lowcore.h>
  33 #include <asm/stp.h>
  34 #include <asm/pgtable.h>
  35 #include <asm/gmap.h>
  36 #include <asm/nmi.h>
  37 #include <asm/switch_to.h>
  38 #include <asm/isc.h>
  39 #include <asm/sclp.h>
  40 #include <asm/cpacf.h>
  41 #include <asm/timex.h>
  42 #include "kvm-s390.h"
  43 #include "gaccess.h"
  44
  45 #define KMSG_COMPONENT "kvm-s390"
  46 #undef pr_fmt
  47 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  48
  49 #define CREATE_TRACE_POINTS
  50 #include "trace.h"
  51 #include "trace-s390.h"
  52
  53 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  54 #define LOCAL_IRQS 32
  55 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  56                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  57
  58 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  59
  60 struct kvm_stats_debugfs_item debugfs_entries[] = {
  61         { "userspace_handled", VCPU_STAT(exit_userspace) },
  62         { "exit_null", VCPU_STAT(exit_null) },
  63         { "exit_validity", VCPU_STAT(exit_validity) },
  64         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  65         { "exit_external_request", VCPU_STAT(exit_external_request) },
  66         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  67         { "exit_instruction", VCPU_STAT(exit_instruction) },
  68         { "exit_pei", VCPU_STAT(exit_pei) },
  69         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  70         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  71         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  72         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  73         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  74         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  75         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  76         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  77         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  78         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  79         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  80         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  81         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  82         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  83         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
  84         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  85         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  86         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  87         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
  88         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  89         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
  90         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
  91         { "instruction_spx", VCPU_STAT(instruction_spx) },
  92         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
  93         { "instruction_stap", VCPU_STAT(instruction_stap) },
  94         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
  95         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
  96         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
  97         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
  98         { "instruction_essa", VCPU_STAT(instruction_essa) },
  99         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
 100         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 101         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 102         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 103         { "instruction_sie", VCPU_STAT(instruction_sie) },
 104         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 105         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 106         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 107         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 108         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 109         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 110         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 111         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 112         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 113         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 114         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 115         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 116         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 117         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 118         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 119         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 120         { "diagnose_10", VCPU_STAT(diagnose_10) },
 121         { "diagnose_44", VCPU_STAT(diagnose_44) },
 122         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
 123         { "diagnose_258", VCPU_STAT(diagnose_258) },
 124         { "diagnose_308", VCPU_STAT(diagnose_308) },
 125         { "diagnose_500", VCPU_STAT(diagnose_500) },
 126         { NULL }
 127 };
 128
 129 /* allow nested virtualization in KVM (if enabled by user space) */
 130 static int nested;
 131 module_param(nested, int, S_IRUGO);
 132 MODULE_PARM_DESC(nested, "Nested virtualization support");
 133
 134 /* upper facilities limit for kvm */
 135 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
 136
 137 unsigned long kvm_s390_fac_list_mask_size(void)
 138 {
 139         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
 140         return ARRAY_SIZE(kvm_s390_fac_list_mask);
 141 }
 142
 143 /* available cpu features supported by kvm */
 144 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 145 /* available subfunctions indicated via query / "test bit" */
 146 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 147
 148 static struct gmap_notifier gmap_notifier;
 149 static struct gmap_notifier vsie_gmap_notifier;
 150 debug_info_t *kvm_s390_dbf;
 151
 152 /* Section: not file related */
 153 int kvm_arch_hardware_enable(void)
 154 {
 155         /* every s390 is virtualization enabled ;-) */
 156         return 0;
 157 }
 158
 159 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 160                               unsigned long end);
 161
 162 /*
 163  * This callback is executed during stop_machine(). All CPUs are therefore
 164  * temporarily stopped. In order not to change guest behavior, we have to
 165  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 166  * so a CPU won't be stopped while calculating with the epoch.
 167  */
 168 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 169                           void *v)
 170 {
 171         struct kvm *kvm;
 172         struct kvm_vcpu *vcpu;
 173         int i;
 174         unsigned long long *delta = v;
 175
 176         list_for_each_entry(kvm, &vm_list, vm_list) {
 177                 kvm->arch.epoch -= *delta;
 178                 kvm_for_each_vcpu(i, vcpu, kvm) {
 179                         vcpu->arch.sie_block->epoch -= *delta;
 180                         if (vcpu->arch.cputm_enabled)
 181                                 vcpu->arch.cputm_start += *delta;
 182                         if (vcpu->arch.vsie_block)
 183                                 vcpu->arch.vsie_block->epoch -= *delta;
 184                 }
 185         }
 186         return NOTIFY_OK;
 187 }
 188
 189 static struct notifier_block kvm_clock_notifier = {
 190         .notifier_call = kvm_clock_sync,
 191 };
 192
 193 int kvm_arch_hardware_setup(void)
 194 {
 195         gmap_notifier.notifier_call = kvm_gmap_notifier;
 196         gmap_register_pte_notifier(&gmap_notifier);
 197         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 198         gmap_register_pte_notifier(&vsie_gmap_notifier);
 199         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 200                                        &kvm_clock_notifier);
 201         return 0;
 202 }
 203
 204 void kvm_arch_hardware_unsetup(void)
 205 {
 206         gmap_unregister_pte_notifier(&gmap_notifier);
 207         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 208         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 209                                          &kvm_clock_notifier);
 210 }
 211
 212 static void allow_cpu_feat(unsigned long nr)
 213 {
 214         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 215 }
 216
 217 static inline int plo_test_bit(unsigned char nr)
 218 {
 219         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 220         int cc = 3; /* subfunction not available */
 221
 222         asm volatile(
 223                 /* Parameter registers are ignored for "test bit" */
 224                 "       plo     0,0,0,0(0)\n"
 225                 "       ipm     %0\n"
 226                 "       srl     %0,28\n"
 227                 : "=d" (cc)
 228                 : "d" (r0)
 229                 : "cc");
 230         return cc == 0;
 231 }
 232
 233 static void kvm_s390_cpu_feat_init(void)
 234 {
 235         int i;
 236
 237         for (i = 0; i < 256; ++i) {
 238                 if (plo_test_bit(i))
 239                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 240         }
 241
 242         if (test_facility(28)) /* TOD-clock steering */
 243                 ptff(kvm_s390_available_subfunc.ptff,
 244                      sizeof(kvm_s390_available_subfunc.ptff),
 245                      PTFF_QAF);
 246
 247         if (test_facility(17)) { /* MSA */
 248                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 249                               kvm_s390_available_subfunc.kmac);
 250                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 251                               kvm_s390_available_subfunc.kmc);
 252                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 253                               kvm_s390_available_subfunc.km);
 254                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 255                               kvm_s390_available_subfunc.kimd);
 256                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 257                               kvm_s390_available_subfunc.klmd);
 258         }
 259         if (test_facility(76)) /* MSA3 */
 260                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 261                               kvm_s390_available_subfunc.pckmo);
 262         if (test_facility(77)) { /* MSA4 */
 263                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 264                               kvm_s390_available_subfunc.kmctr);
 265                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 266                               kvm_s390_available_subfunc.kmf);
 267                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 268                               kvm_s390_available_subfunc.kmo);
 269                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 270                               kvm_s390_available_subfunc.pcc);
 271         }
 272         if (test_facility(57)) /* MSA5 */
 273                 __cpacf_query(CPACF_PPNO, (cpacf_mask_t *)
 274                               kvm_s390_available_subfunc.ppno);
 275
 276         if (MACHINE_HAS_ESOP)
 277                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 278         /*
 279          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 280          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 281          */
 282         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 283             !test_facility(3) || !nested)
 284                 return;
 285         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 286         if (sclp.has_64bscao)
 287                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 288         if (sclp.has_siif)
 289                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 290         if (sclp.has_gpere)
 291                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 292         if (sclp.has_gsls)
 293                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 294         if (sclp.has_ib)
 295                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 296         if (sclp.has_cei)
 297                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 298         if (sclp.has_ibs)
 299                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 300         /*
 301          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 302          * all skey handling functions read/set the skey from the PGSTE
 303          * instead of the real storage key.
 304          *
 305          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 306          * pages being detected as preserved although they are resident.
 307          *
 308          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 309          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 310          *
 311          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 312          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 313          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 314          *
 315          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 316          * cannot easily shadow the SCA because of the ipte lock.
 317          */
 318 }
 319
 320 int kvm_arch_init(void *opaque)
 321 {
 322         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 323         if (!kvm_s390_dbf)
 324                 return -ENOMEM;
 325
 326         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 327                 debug_unregister(kvm_s390_dbf);
 328                 return -ENOMEM;
 329         }
 330
 331         kvm_s390_cpu_feat_init();
 332
 333         /* Register floating interrupt controller interface. */
 334         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 335 }
 336
 337 void kvm_arch_exit(void)
 338 {
 339         debug_unregister(kvm_s390_dbf);
 340 }
 341
 342 /* Section: device related */
 343 long kvm_arch_dev_ioctl(struct file *filp,
 344                         unsigned int ioctl, unsigned long arg)
 345 {
 346         if (ioctl == KVM_S390_ENABLE_SIE)
 347                 return s390_enable_sie();
 348         return -EINVAL;
 349 }
 350
 351 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 352 {
 353         int r;
 354
 355         switch (ext) {
 356         case KVM_CAP_S390_PSW:
 357         case KVM_CAP_S390_GMAP:
 358         case KVM_CAP_SYNC_MMU:
 359 #ifdef CONFIG_KVM_S390_UCONTROL
 360         case KVM_CAP_S390_UCONTROL:
 361 #endif
 362         case KVM_CAP_ASYNC_PF:
 363         case KVM_CAP_SYNC_REGS:
 364         case KVM_CAP_ONE_REG:
 365         case KVM_CAP_ENABLE_CAP:
 366         case KVM_CAP_S390_CSS_SUPPORT:
 367         case KVM_CAP_IOEVENTFD:
 368         case KVM_CAP_DEVICE_CTRL:
 369         case KVM_CAP_ENABLE_CAP_VM:
 370         case KVM_CAP_S390_IRQCHIP:
 371         case KVM_CAP_VM_ATTRIBUTES:
 372         case KVM_CAP_MP_STATE:
 373         case KVM_CAP_S390_INJECT_IRQ:
 374         case KVM_CAP_S390_USER_SIGP:
 375         case KVM_CAP_S390_USER_STSI:
 376         case KVM_CAP_S390_SKEYS:
 377         case KVM_CAP_S390_IRQ_STATE:
 378         case KVM_CAP_S390_USER_INSTR0:
 379                 r = 1;
 380                 break;
 381         case KVM_CAP_S390_MEM_OP:
 382                 r = MEM_OP_MAX_SIZE;
 383                 break;
 384         case KVM_CAP_NR_VCPUS:
 385         case KVM_CAP_MAX_VCPUS:
 386                 r = KVM_S390_BSCA_CPU_SLOTS;
 387                 if (!kvm_s390_use_sca_entries())
 388                         r = KVM_MAX_VCPUS;
 389                 else if (sclp.has_esca && sclp.has_64bscao)
 390                         r = KVM_S390_ESCA_CPU_SLOTS;
 391                 break;
 392         case KVM_CAP_NR_MEMSLOTS:
 393                 r = KVM_USER_MEM_SLOTS;
 394                 break;
 395         case KVM_CAP_S390_COW:
 396                 r = MACHINE_HAS_ESOP;
 397                 break;
 398         case KVM_CAP_S390_VECTOR_REGISTERS:
 399                 r = MACHINE_HAS_VX;
 400                 break;
 401         case KVM_CAP_S390_RI:
 402                 r = test_facility(64);
 403                 break;
 404         default:
 405                 r = 0;
 406         }
 407         return r;
 408 }
 409
 410 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 411                                         struct kvm_memory_slot *memslot)
 412 {
 413         gfn_t cur_gfn, last_gfn;
 414         unsigned long address;
 415         struct gmap *gmap = kvm->arch.gmap;
 416
 417         /* Loop over all guest pages */
 418         last_gfn = memslot->base_gfn + memslot->npages;
 419         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 420                 address = gfn_to_hva_memslot(memslot, cur_gfn);
 421
 422                 if (test_and_clear_guest_dirty(gmap->mm, address))
 423                         mark_page_dirty(kvm, cur_gfn);
 424                 if (fatal_signal_pending(current))
 425                         return;
 426                 cond_resched();
 427         }
 428 }
 429
 430 /* Section: vm related */
 431 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 432
 433 /*
 434  * Get (and clear) the dirty memory log for a memory slot.
 435  */
 436 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 437                                struct kvm_dirty_log *log)
 438 {
 439         int r;
 440         unsigned long n;
 441         struct kvm_memslots *slots;
 442         struct kvm_memory_slot *memslot;
 443         int is_dirty = 0;
 444
 445         mutex_lock(&kvm->slots_lock);
 446
 447         r = -EINVAL;
 448         if (log->slot >= KVM_USER_MEM_SLOTS)
 449                 goto out;
 450
 451         slots = kvm_memslots(kvm);
 452         memslot = id_to_memslot(slots, log->slot);
 453         r = -ENOENT;
 454         if (!memslot->dirty_bitmap)
 455                 goto out;
 456
 457         kvm_s390_sync_dirty_log(kvm, memslot);
 458         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 459         if (r)
 460                 goto out;
 461
 462         /* Clear the dirty log */
 463         if (is_dirty) {
 464                 n = kvm_dirty_bitmap_bytes(memslot);
 465                 memset(memslot->dirty_bitmap, 0, n);
 466         }
 467         r = 0;
 468 out:
 469         mutex_unlock(&kvm->slots_lock);
 470         return r;
 471 }
 472
 473 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 474 {
 475         unsigned int i;
 476         struct kvm_vcpu *vcpu;
 477
 478         kvm_for_each_vcpu(i, vcpu, kvm) {
 479                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 480         }
 481 }
 482
 483 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 484 {
 485         int r;
 486
 487         if (cap->flags)
 488                 return -EINVAL;
 489
 490         switch (cap->cap) {
 491         case KVM_CAP_S390_IRQCHIP:
 492                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 493                 kvm->arch.use_irqchip = 1;
 494                 r = 0;
 495                 break;
 496         case KVM_CAP_S390_USER_SIGP:
 497                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 498                 kvm->arch.user_sigp = 1;
 499                 r = 0;
 500                 break;
 501         case KVM_CAP_S390_VECTOR_REGISTERS:
 502                 mutex_lock(&kvm->lock);
 503                 if (kvm->created_vcpus) {
 504                         r = -EBUSY;
 505                 } else if (MACHINE_HAS_VX) {
 506                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 507                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 508                         r = 0;
 509                 } else
 510                         r = -EINVAL;
 511                 mutex_unlock(&kvm->lock);
 512                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 513                          r ? "(not available)" : "(success)");
 514                 break;
 515         case KVM_CAP_S390_RI:
 516                 r = -EINVAL;
 517                 mutex_lock(&kvm->lock);
 518                 if (kvm->created_vcpus) {
 519                         r = -EBUSY;
 520                 } else if (test_facility(64)) {
 521                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 522                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 523                         r = 0;
 524                 }
 525                 mutex_unlock(&kvm->lock);
 526                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 527                          r ? "(not available)" : "(success)");
 528                 break;
 529         case KVM_CAP_S390_USER_STSI:
 530                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 531                 kvm->arch.user_stsi = 1;
 532                 r = 0;
 533                 break;
 534         case KVM_CAP_S390_USER_INSTR0:
 535                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 536                 kvm->arch.user_instr0 = 1;
 537                 icpt_operexc_on_all_vcpus(kvm);
 538                 r = 0;
 539                 break;
 540         default:
 541                 r = -EINVAL;
 542                 break;
 543         }
 544         return r;
 545 }
 546
 547 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 548 {
 549         int ret;
 550
 551         switch (attr->attr) {
 552         case KVM_S390_VM_MEM_LIMIT_SIZE:
 553                 ret = 0;
 554                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 555                          kvm->arch.mem_limit);
 556                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 557                         ret = -EFAULT;
 558                 break;
 559         default:
 560                 ret = -ENXIO;
 561                 break;
 562         }
 563         return ret;
 564 }
 565
 566 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 567 {
 568         int ret;
 569         unsigned int idx;
 570         switch (attr->attr) {
 571         case KVM_S390_VM_MEM_ENABLE_CMMA:
 572                 ret = -ENXIO;
 573                 if (!sclp.has_cmma)
 574                         break;
 575
 576                 ret = -EBUSY;
 577                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 578                 mutex_lock(&kvm->lock);
 579                 if (!kvm->created_vcpus) {
 580                         kvm->arch.use_cmma = 1;
 581                         ret = 0;
 582                 }
 583                 mutex_unlock(&kvm->lock);
 584                 break;
 585         case KVM_S390_VM_MEM_CLR_CMMA:
 586                 ret = -ENXIO;
 587                 if (!sclp.has_cmma)
 588                         break;
 589                 ret = -EINVAL;
 590                 if (!kvm->arch.use_cmma)
 591                         break;
 592
 593                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 594                 mutex_lock(&kvm->lock);
 595                 idx = srcu_read_lock(&kvm->srcu);
 596                 s390_reset_cmma(kvm->arch.gmap->mm);
 597                 srcu_read_unlock(&kvm->srcu, idx);
 598                 mutex_unlock(&kvm->lock);
 599                 ret = 0;
 600                 break;
 601         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 602                 unsigned long new_limit;
 603
 604                 if (kvm_is_ucontrol(kvm))
 605                         return -EINVAL;
 606
 607                 if (get_user(new_limit, (u64 __user *)attr->addr))
 608                         return -EFAULT;
 609
 610                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 611                     new_limit > kvm->arch.mem_limit)
 612                         return -E2BIG;
 613
 614                 if (!new_limit)
 615                         return -EINVAL;
 616
 617                 /* gmap_create takes last usable address */
 618                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 619                         new_limit -= 1;
 620
 621                 ret = -EBUSY;
 622                 mutex_lock(&kvm->lock);
 623                 if (!kvm->created_vcpus) {
 624                         /* gmap_create will round the limit up */
 625                         struct gmap *new = gmap_create(current->mm, new_limit);
 626
 627                         if (!new) {
 628                                 ret = -ENOMEM;
 629                         } else {
 630                                 gmap_remove(kvm->arch.gmap);
 631                                 new->private = kvm;
 632                                 kvm->arch.gmap = new;
 633                                 ret = 0;
 634                         }
 635                 }
 636                 mutex_unlock(&kvm->lock);
 637                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 638                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 639                          (void *) kvm->arch.gmap->asce);
 640                 break;
 641         }
 642         default:
 643                 ret = -ENXIO;
 644                 break;
 645         }
 646         return ret;
 647 }
 648
 649 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 650
 651 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 652 {
 653         struct kvm_vcpu *vcpu;
 654         int i;
 655
 656         if (!test_kvm_facility(kvm, 76))
 657                 return -EINVAL;
 658
 659         mutex_lock(&kvm->lock);
 660         switch (attr->attr) {
 661         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 662                 get_random_bytes(
 663                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 664                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 665                 kvm->arch.crypto.aes_kw = 1;
 666                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 667                 break;
 668         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 669                 get_random_bytes(
 670                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 671                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 672                 kvm->arch.crypto.dea_kw = 1;
 673                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 674                 break;
 675         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 676                 kvm->arch.crypto.aes_kw = 0;
 677                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 678                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 679                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 680                 break;
 681         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 682                 kvm->arch.crypto.dea_kw = 0;
 683                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 684                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 685                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 686                 break;
 687         default:
 688                 mutex_unlock(&kvm->lock);
 689                 return -ENXIO;
 690         }
 691
 692         kvm_for_each_vcpu(i, vcpu, kvm) {
 693                 kvm_s390_vcpu_crypto_setup(vcpu);
 694                 exit_sie(vcpu);
 695         }
 696         mutex_unlock(&kvm->lock);
 697         return 0;
 698 }
 699
 700 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 701 {
 702         u8 gtod_high;
 703
 704         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 705                                            sizeof(gtod_high)))
 706                 return -EFAULT;
 707
 708         if (gtod_high != 0)
 709                 return -EINVAL;
 710         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
 711
 712         return 0;
 713 }
 714
 715 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 716 {
 717         u64 gtod;
 718
 719         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 720                 return -EFAULT;
 721
 722         kvm_s390_set_tod_clock(kvm, gtod);
 723         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
 724         return 0;
 725 }
 726
 727 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 728 {
 729         int ret;
 730
 731         if (attr->flags)
 732                 return -EINVAL;
 733
 734         switch (attr->attr) {
 735         case KVM_S390_VM_TOD_HIGH:
 736                 ret = kvm_s390_set_tod_high(kvm, attr);
 737                 break;
 738         case KVM_S390_VM_TOD_LOW:
 739                 ret = kvm_s390_set_tod_low(kvm, attr);
 740                 break;
 741         default:
 742                 ret = -ENXIO;
 743                 break;
 744         }
 745         return ret;
 746 }
 747
 748 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 749 {
 750         u8 gtod_high = 0;
 751
 752         if (copy_to_user((void __user *)attr->addr, &gtod_high,
 753                                          sizeof(gtod_high)))
 754                 return -EFAULT;
 755         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
 756
 757         return 0;
 758 }
 759
 760 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 761 {
 762         u64 gtod;
 763
 764         gtod = kvm_s390_get_tod_clock_fast(kvm);
 765         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 766                 return -EFAULT;
 767         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
 768
 769         return 0;
 770 }
 771
 772 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 773 {
 774         int ret;
 775
 776         if (attr->flags)
 777                 return -EINVAL;
 778
 779         switch (attr->attr) {
 780         case KVM_S390_VM_TOD_HIGH:
 781                 ret = kvm_s390_get_tod_high(kvm, attr);
 782                 break;
 783         case KVM_S390_VM_TOD_LOW:
 784                 ret = kvm_s390_get_tod_low(kvm, attr);
 785                 break;
 786         default:
 787                 ret = -ENXIO;
 788                 break;
 789         }
 790         return ret;
 791 }
 792
 793 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 794 {
 795         struct kvm_s390_vm_cpu_processor *proc;
 796         u16 lowest_ibc, unblocked_ibc;
 797         int ret = 0;
 798
 799         mutex_lock(&kvm->lock);
 800         if (kvm->created_vcpus) {
 801                 ret = -EBUSY;
 802                 goto out;
 803         }
 804         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 805         if (!proc) {
 806                 ret = -ENOMEM;
 807                 goto out;
 808         }
 809         if (!copy_from_user(proc, (void __user *)attr->addr,
 810                             sizeof(*proc))) {
 811                 kvm->arch.model.cpuid = proc->cpuid;
 812                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
 813                 unblocked_ibc = sclp.ibc & 0xfff;
 814                 if (lowest_ibc && proc->ibc) {
 815                         if (proc->ibc > unblocked_ibc)
 816                                 kvm->arch.model.ibc = unblocked_ibc;
 817                         else if (proc->ibc < lowest_ibc)
 818                                 kvm->arch.model.ibc = lowest_ibc;
 819                         else
 820                                 kvm->arch.model.ibc = proc->ibc;
 821                 }
 822                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
 823                        S390_ARCH_FAC_LIST_SIZE_BYTE);
 824         } else
 825                 ret = -EFAULT;
 826         kfree(proc);
 827 out:
 828         mutex_unlock(&kvm->lock);
 829         return ret;
 830 }
 831
 832 static int kvm_s390_set_processor_feat(struct kvm *kvm,
 833                                        struct kvm_device_attr *attr)
 834 {
 835         struct kvm_s390_vm_cpu_feat data;
 836         int ret = -EBUSY;
 837
 838         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
 839                 return -EFAULT;
 840         if (!bitmap_subset((unsigned long *) data.feat,
 841                            kvm_s390_available_cpu_feat,
 842                            KVM_S390_VM_CPU_FEAT_NR_BITS))
 843                 return -EINVAL;
 844
 845         mutex_lock(&kvm->lock);
 846         if (!atomic_read(&kvm->online_vcpus)) {
 847                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
 848                             KVM_S390_VM_CPU_FEAT_NR_BITS);
 849                 ret = 0;
 850         }
 851         mutex_unlock(&kvm->lock);
 852         return ret;
 853 }
 854
 855 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
 856                                           struct kvm_device_attr *attr)
 857 {
 858         /*
 859          * Once supported by kernel + hw, we have to store the subfunctions
 860          * in kvm->arch and remember that user space configured them.
 861          */
 862         return -ENXIO;
 863 }
 864
 865 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 866 {
 867         int ret = -ENXIO;
 868
 869         switch (attr->attr) {
 870         case KVM_S390_VM_CPU_PROCESSOR:
 871                 ret = kvm_s390_set_processor(kvm, attr);
 872                 break;
 873         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 874                 ret = kvm_s390_set_processor_feat(kvm, attr);
 875                 break;
 876         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 877                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
 878                 break;
 879         }
 880         return ret;
 881 }
 882
 883 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 884 {
 885         struct kvm_s390_vm_cpu_processor *proc;
 886         int ret = 0;
 887
 888         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 889         if (!proc) {
 890                 ret = -ENOMEM;
 891                 goto out;
 892         }
 893         proc->cpuid = kvm->arch.model.cpuid;
 894         proc->ibc = kvm->arch.model.ibc;
 895         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
 896                S390_ARCH_FAC_LIST_SIZE_BYTE);
 897         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
 898                 ret = -EFAULT;
 899         kfree(proc);
 900 out:
 901         return ret;
 902 }
 903
 904 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
 905 {
 906         struct kvm_s390_vm_cpu_machine *mach;
 907         int ret = 0;
 908
 909         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
 910         if (!mach) {
 911                 ret = -ENOMEM;
 912                 goto out;
 913         }
 914         get_cpu_id((struct cpuid *) &mach->cpuid);
 915         mach->ibc = sclp.ibc;
 916         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
 917                S390_ARCH_FAC_LIST_SIZE_BYTE);
 918         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
 919                S390_ARCH_FAC_LIST_SIZE_BYTE);
 920         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
 921                 ret = -EFAULT;
 922         kfree(mach);
 923 out:
 924         return ret;
 925 }
 926
 927 static int kvm_s390_get_processor_feat(struct kvm *kvm,
 928                                        struct kvm_device_attr *attr)
 929 {
 930         struct kvm_s390_vm_cpu_feat data;
 931
 932         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
 933                     KVM_S390_VM_CPU_FEAT_NR_BITS);
 934         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
 935                 return -EFAULT;
 936         return 0;
 937 }
 938
 939 static int kvm_s390_get_machine_feat(struct kvm *kvm,
 940                                      struct kvm_device_attr *attr)
 941 {
 942         struct kvm_s390_vm_cpu_feat data;
 943
 944         bitmap_copy((unsigned long *) data.feat,
 945                     kvm_s390_available_cpu_feat,
 946                     KVM_S390_VM_CPU_FEAT_NR_BITS);
 947         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
 948                 return -EFAULT;
 949         return 0;
 950 }
 951
 952 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
 953                                           struct kvm_device_attr *attr)
 954 {
 955         /*
 956          * Once we can actually configure subfunctions (kernel + hw support),
 957          * we have to check if they were already set by user space, if so copy
 958          * them from kvm->arch.
 959          */
 960         return -ENXIO;
 961 }
 962
 963 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
 964                                         struct kvm_device_attr *attr)
 965 {
 966         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
 967             sizeof(struct kvm_s390_vm_cpu_subfunc)))
 968                 return -EFAULT;
 969         return 0;
 970 }
 971 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 972 {
 973         int ret = -ENXIO;
 974
 975         switch (attr->attr) {
 976         case KVM_S390_VM_CPU_PROCESSOR:
 977                 ret = kvm_s390_get_processor(kvm, attr);
 978                 break;
 979         case KVM_S390_VM_CPU_MACHINE:
 980                 ret = kvm_s390_get_machine(kvm, attr);
 981                 break;
 982         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 983                 ret = kvm_s390_get_processor_feat(kvm, attr);
 984                 break;
 985         case KVM_S390_VM_CPU_MACHINE_FEAT:
 986                 ret = kvm_s390_get_machine_feat(kvm, attr);
 987                 break;
 988         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 989                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
 990                 break;
 991         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
 992                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
 993                 break;
 994         }
 995         return ret;
 996 }
 997
 998 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 999 {
1000         int ret;
1001
1002         switch (attr->group) {
1003         case KVM_S390_VM_MEM_CTRL:
1004                 ret = kvm_s390_set_mem_control(kvm, attr);
1005                 break;
1006         case KVM_S390_VM_TOD:
1007                 ret = kvm_s390_set_tod(kvm, attr);
1008                 break;
1009         case KVM_S390_VM_CPU_MODEL:
1010                 ret = kvm_s390_set_cpu_model(kvm, attr);
1011                 break;
1012         case KVM_S390_VM_CRYPTO:
1013                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1014                 break;
1015         default:
1016                 ret = -ENXIO;
1017                 break;
1018         }
1019
1020         return ret;
1021 }
1022
1023 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1024 {
1025         int ret;
1026
1027         switch (attr->group) {
1028         case KVM_S390_VM_MEM_CTRL:
1029                 ret = kvm_s390_get_mem_control(kvm, attr);
1030                 break;
1031         case KVM_S390_VM_TOD:
1032                 ret = kvm_s390_get_tod(kvm, attr);
1033                 break;
1034         case KVM_S390_VM_CPU_MODEL:
1035                 ret = kvm_s390_get_cpu_model(kvm, attr);
1036                 break;
1037         default:
1038                 ret = -ENXIO;
1039                 break;
1040         }
1041
1042         return ret;
1043 }
1044
1045 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1046 {
1047         int ret;
1048
1049         switch (attr->group) {
1050         case KVM_S390_VM_MEM_CTRL:
1051                 switch (attr->attr) {
1052                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1053                 case KVM_S390_VM_MEM_CLR_CMMA:
1054                         ret = sclp.has_cmma ? 0 : -ENXIO;
1055                         break;
1056                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1057                         ret = 0;
1058                         break;
1059                 default:
1060                         ret = -ENXIO;
1061                         break;
1062                 }
1063                 break;
1064         case KVM_S390_VM_TOD:
1065                 switch (attr->attr) {
1066                 case KVM_S390_VM_TOD_LOW:
1067                 case KVM_S390_VM_TOD_HIGH:
1068                         ret = 0;
1069                         break;
1070                 default:
1071                         ret = -ENXIO;
1072                         break;
1073                 }
1074                 break;
1075         case KVM_S390_VM_CPU_MODEL:
1076                 switch (attr->attr) {
1077                 case KVM_S390_VM_CPU_PROCESSOR:
1078                 case KVM_S390_VM_CPU_MACHINE:
1079                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1080                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1081                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1082                         ret = 0;
1083                         break;
1084                 /* configuring subfunctions is not supported yet */
1085                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1086                 default:
1087                         ret = -ENXIO;
1088                         break;
1089                 }
1090                 break;
1091         case KVM_S390_VM_CRYPTO:
1092                 switch (attr->attr) {
1093                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1094                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1095                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1096                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1097                         ret = 0;
1098                         break;
1099                 default:
1100                         ret = -ENXIO;
1101                         break;
1102                 }
1103                 break;
1104         default:
1105                 ret = -ENXIO;
1106                 break;
1107         }
1108
1109         return ret;
1110 }
1111
1112 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1113 {
1114         uint8_t *keys;
1115         uint64_t hva;
1116         int i, r = 0;
1117
1118         if (args->flags != 0)
1119                 return -EINVAL;
1120
1121         /* Is this guest using storage keys? */
1122         if (!mm_use_skey(current->mm))
1123                 return KVM_S390_GET_SKEYS_NONE;
1124
1125         /* Enforce sane limit on memory allocation */
1126         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1127                 return -EINVAL;
1128
1129         keys = kmalloc_array(args->count, sizeof(uint8_t),
1130                              GFP_KERNEL | __GFP_NOWARN);
1131         if (!keys)
1132                 keys = vmalloc(sizeof(uint8_t) * args->count);
1133         if (!keys)
1134                 return -ENOMEM;
1135
1136         down_read(&current->mm->mmap_sem);
1137         for (i = 0; i < args->count; i++) {
1138                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1139                 if (kvm_is_error_hva(hva)) {
1140                         r = -EFAULT;
1141                         break;
1142                 }
1143
1144                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1145                 if (r)
1146                         break;
1147         }
1148         up_read(&current->mm->mmap_sem);
1149
1150         if (!r) {
1151                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1152                                  sizeof(uint8_t) * args->count);
1153                 if (r)
1154                         r = -EFAULT;
1155         }
1156
1157         kvfree(keys);
1158         return r;
1159 }
1160
1161 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1162 {
1163         uint8_t *keys;
1164         uint64_t hva;
1165         int i, r = 0;
1166
1167         if (args->flags != 0)
1168                 return -EINVAL;
1169
1170         /* Enforce sane limit on memory allocation */
1171         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1172                 return -EINVAL;
1173
1174         keys = kmalloc_array(args->count, sizeof(uint8_t),
1175                              GFP_KERNEL | __GFP_NOWARN);
1176         if (!keys)
1177                 keys = vmalloc(sizeof(uint8_t) * args->count);
1178         if (!keys)
1179                 return -ENOMEM;
1180
1181         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1182                            sizeof(uint8_t) * args->count);
1183         if (r) {
1184                 r = -EFAULT;
1185                 goto out;
1186         }
1187
1188         /* Enable storage key handling for the guest */
1189         r = s390_enable_skey();
1190         if (r)
1191                 goto out;
1192
1193         down_read(&current->mm->mmap_sem);
1194         for (i = 0; i < args->count; i++) {
1195                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1196                 if (kvm_is_error_hva(hva)) {
1197                         r = -EFAULT;
1198                         break;
1199                 }
1200
1201                 /* Lowest order bit is reserved */
1202                 if (keys[i] & 0x01) {
1203                         r = -EINVAL;
1204                         break;
1205                 }
1206
1207                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1208                 if (r)
1209                         break;
1210         }
1211         up_read(&current->mm->mmap_sem);
1212 out:
1213         kvfree(keys);
1214         return r;
1215 }
1216
1217 long kvm_arch_vm_ioctl(struct file *filp,
1218                        unsigned int ioctl, unsigned long arg)
1219 {
1220         struct kvm *kvm = filp->private_data;
1221         void __user *argp = (void __user *)arg;
1222         struct kvm_device_attr attr;
1223         int r;
1224
1225         switch (ioctl) {
1226         case KVM_S390_INTERRUPT: {
1227                 struct kvm_s390_interrupt s390int;
1228
1229                 r = -EFAULT;
1230                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1231                         break;
1232                 r = kvm_s390_inject_vm(kvm, &s390int);
1233                 break;
1234         }
1235         case KVM_ENABLE_CAP: {
1236                 struct kvm_enable_cap cap;
1237                 r = -EFAULT;
1238                 if (copy_from_user(&cap, argp, sizeof(cap)))
1239                         break;
1240                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1241                 break;
1242         }
1243         case KVM_CREATE_IRQCHIP: {
1244                 struct kvm_irq_routing_entry routing;
1245
1246                 r = -EINVAL;
1247                 if (kvm->arch.use_irqchip) {
1248                         /* Set up dummy routing. */
1249                         memset(&routing, 0, sizeof(routing));
1250                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1251                 }
1252                 break;
1253         }
1254         case KVM_SET_DEVICE_ATTR: {
1255                 r = -EFAULT;
1256                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1257                         break;
1258                 r = kvm_s390_vm_set_attr(kvm, &attr);
1259                 break;
1260         }
1261         case KVM_GET_DEVICE_ATTR: {
1262                 r = -EFAULT;
1263                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1264                         break;
1265                 r = kvm_s390_vm_get_attr(kvm, &attr);
1266                 break;
1267         }
1268         case KVM_HAS_DEVICE_ATTR: {
1269                 r = -EFAULT;
1270                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1271                         break;
1272                 r = kvm_s390_vm_has_attr(kvm, &attr);
1273                 break;
1274         }
1275         case KVM_S390_GET_SKEYS: {
1276                 struct kvm_s390_skeys args;
1277
1278                 r = -EFAULT;
1279                 if (copy_from_user(&args, argp,
1280                                    sizeof(struct kvm_s390_skeys)))
1281                         break;
1282                 r = kvm_s390_get_skeys(kvm, &args);
1283                 break;
1284         }
1285         case KVM_S390_SET_SKEYS: {
1286                 struct kvm_s390_skeys args;
1287
1288                 r = -EFAULT;
1289                 if (copy_from_user(&args, argp,
1290                                    sizeof(struct kvm_s390_skeys)))
1291                         break;
1292                 r = kvm_s390_set_skeys(kvm, &args);
1293                 break;
1294         }
1295         default:
1296                 r = -ENOTTY;
1297         }
1298
1299         return r;
1300 }
1301
1302 static int kvm_s390_query_ap_config(u8 *config)
1303 {
1304         u32 fcn_code = 0x04000000UL;
1305         u32 cc = 0;
1306
1307         memset(config, 0, 128);
1308         asm volatile(
1309                 "lgr 0,%1\n"
1310                 "lgr 2,%2\n"
1311                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1312                 "0: ipm %0\n"
1313                 "srl %0,28\n"
1314                 "1:\n"
1315                 EX_TABLE(0b, 1b)
1316                 : "+r" (cc)
1317                 : "r" (fcn_code), "r" (config)
1318                 : "cc", "0", "2", "memory"
1319         );
1320
1321         return cc;
1322 }
1323
1324 static int kvm_s390_apxa_installed(void)
1325 {
1326         u8 config[128];
1327         int cc;
1328
1329         if (test_facility(12)) {
1330                 cc = kvm_s390_query_ap_config(config);
1331
1332                 if (cc)
1333                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1334                 else
1335                         return config[0] & 0x40;
1336         }
1337
1338         return 0;
1339 }
1340
1341 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1342 {
1343         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1344
1345         if (kvm_s390_apxa_installed())
1346                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1347         else
1348                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1349 }
1350
1351 static u64 kvm_s390_get_initial_cpuid(void)
1352 {
1353         struct cpuid cpuid;
1354
1355         get_cpu_id(&cpuid);
1356         cpuid.version = 0xff;
1357         return *((u64 *) &cpuid);
1358 }
1359
1360 static void kvm_s390_crypto_init(struct kvm *kvm)
1361 {
1362         if (!test_kvm_facility(kvm, 76))
1363                 return;
1364
1365         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1366         kvm_s390_set_crycb_format(kvm);
1367
1368         /* Enable AES/DEA protected key functions by default */
1369         kvm->arch.crypto.aes_kw = 1;
1370         kvm->arch.crypto.dea_kw = 1;
1371         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1372                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1373         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1374                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1375 }
1376
1377 static void sca_dispose(struct kvm *kvm)
1378 {
1379         if (kvm->arch.use_esca)
1380                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1381         else
1382                 free_page((unsigned long)(kvm->arch.sca));
1383         kvm->arch.sca = NULL;
1384 }
1385
1386 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1387 {
1388         gfp_t alloc_flags = GFP_KERNEL;
1389         int i, rc;
1390         char debug_name[16];
1391         static unsigned long sca_offset;
1392
1393         rc = -EINVAL;
1394 #ifdef CONFIG_KVM_S390_UCONTROL
1395         if (type & ~KVM_VM_S390_UCONTROL)
1396                 goto out_err;
1397         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1398                 goto out_err;
1399 #else
1400         if (type)
1401                 goto out_err;
1402 #endif
1403
1404         rc = s390_enable_sie();
1405         if (rc)
1406                 goto out_err;
1407
1408         rc = -ENOMEM;
1409
1410         ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1411
1412         kvm->arch.use_esca = 0; /* start with basic SCA */
1413         if (!sclp.has_64bscao)
1414                 alloc_flags |= GFP_DMA;
1415         rwlock_init(&kvm->arch.sca_lock);
1416         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1417         if (!kvm->arch.sca)
1418                 goto out_err;
1419         spin_lock(&kvm_lock);
1420         sca_offset += 16;
1421         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1422                 sca_offset = 0;
1423         kvm->arch.sca = (struct bsca_block *)
1424                         ((char *) kvm->arch.sca + sca_offset);
1425         spin_unlock(&kvm_lock);
1426
1427         sprintf(debug_name, "kvm-%u", current->pid);
1428
1429         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1430         if (!kvm->arch.dbf)
1431                 goto out_err;
1432
1433         kvm->arch.sie_page2 =
1434              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1435         if (!kvm->arch.sie_page2)
1436                 goto out_err;
1437
1438         /* Populate the facility mask initially. */
1439         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1440                S390_ARCH_FAC_LIST_SIZE_BYTE);
1441         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1442                 if (i < kvm_s390_fac_list_mask_size())
1443                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1444                 else
1445                         kvm->arch.model.fac_mask[i] = 0UL;
1446         }
1447
1448         /* Populate the facility list initially. */
1449         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1450         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1451                S390_ARCH_FAC_LIST_SIZE_BYTE);
1452
1453         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1454         set_kvm_facility(kvm->arch.model.fac_list, 74);
1455
1456         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1457         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1458
1459         kvm_s390_crypto_init(kvm);
1460
1461         spin_lock_init(&kvm->arch.float_int.lock);
1462         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1463                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1464         init_waitqueue_head(&kvm->arch.ipte_wq);
1465         mutex_init(&kvm->arch.ipte_mutex);
1466
1467         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1468         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1469
1470         if (type & KVM_VM_S390_UCONTROL) {
1471                 kvm->arch.gmap = NULL;
1472                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1473         } else {
1474                 if (sclp.hamax == U64_MAX)
1475                         kvm->arch.mem_limit = TASK_MAX_SIZE;
1476                 else
1477                         kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1478                                                     sclp.hamax + 1);
1479                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1480                 if (!kvm->arch.gmap)
1481                         goto out_err;
1482                 kvm->arch.gmap->private = kvm;
1483                 kvm->arch.gmap->pfault_enabled = 0;
1484         }
1485
1486         kvm->arch.css_support = 0;
1487         kvm->arch.use_irqchip = 0;
1488         kvm->arch.epoch = 0;
1489
1490         spin_lock_init(&kvm->arch.start_stop_lock);
1491         kvm_s390_vsie_init(kvm);
1492         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1493
1494         return 0;
1495 out_err:
1496         free_page((unsigned long)kvm->arch.sie_page2);
1497         debug_unregister(kvm->arch.dbf);
1498         sca_dispose(kvm);
1499         KVM_EVENT(3, "creation of vm failed: %d", rc);
1500         return rc;
1501 }
1502
1503 bool kvm_arch_has_vcpu_debugfs(void)
1504 {
1505         return false;
1506 }
1507
1508 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1509 {
1510         return 0;
1511 }
1512
1513 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1514 {
1515         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1516         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1517         kvm_s390_clear_local_irqs(vcpu);
1518         kvm_clear_async_pf_completion_queue(vcpu);
1519         if (!kvm_is_ucontrol(vcpu->kvm))
1520                 sca_del_vcpu(vcpu);
1521
1522         if (kvm_is_ucontrol(vcpu->kvm))
1523                 gmap_remove(vcpu->arch.gmap);
1524
1525         if (vcpu->kvm->arch.use_cmma)
1526                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1527         free_page((unsigned long)(vcpu->arch.sie_block));
1528
1529         kvm_vcpu_uninit(vcpu);
1530         kmem_cache_free(kvm_vcpu_cache, vcpu);
1531 }
1532
1533 static void kvm_free_vcpus(struct kvm *kvm)
1534 {
1535         unsigned int i;
1536         struct kvm_vcpu *vcpu;
1537
1538         kvm_for_each_vcpu(i, vcpu, kvm)
1539                 kvm_arch_vcpu_destroy(vcpu);
1540
1541         mutex_lock(&kvm->lock);
1542         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1543                 kvm->vcpus[i] = NULL;
1544
1545         atomic_set(&kvm->online_vcpus, 0);
1546         mutex_unlock(&kvm->lock);
1547 }
1548
1549 void kvm_arch_destroy_vm(struct kvm *kvm)
1550 {
1551         kvm_free_vcpus(kvm);
1552         sca_dispose(kvm);
1553         debug_unregister(kvm->arch.dbf);
1554         free_page((unsigned long)kvm->arch.sie_page2);
1555         if (!kvm_is_ucontrol(kvm))
1556                 gmap_remove(kvm->arch.gmap);
1557         kvm_s390_destroy_adapters(kvm);
1558         kvm_s390_clear_float_irqs(kvm);
1559         kvm_s390_vsie_destroy(kvm);
1560         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1561 }
1562
1563 /* Section: vcpu related */
1564 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1565 {
1566         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1567         if (!vcpu->arch.gmap)
1568                 return -ENOMEM;
1569         vcpu->arch.gmap->private = vcpu->kvm;
1570
1571         return 0;
1572 }
1573
1574 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1575 {
1576         if (!kvm_s390_use_sca_entries())
1577                 return;
1578         read_lock(&vcpu->kvm->arch.sca_lock);
1579         if (vcpu->kvm->arch.use_esca) {
1580                 struct esca_block *sca = vcpu->kvm->arch.sca;
1581
1582                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1583                 sca->cpu[vcpu->vcpu_id].sda = 0;
1584         } else {
1585                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1586
1587                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1588                 sca->cpu[vcpu->vcpu_id].sda = 0;
1589         }
1590         read_unlock(&vcpu->kvm->arch.sca_lock);
1591 }
1592
1593 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1594 {
1595         if (!kvm_s390_use_sca_entries()) {
1596                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1597
1598                 /* we still need the basic sca for the ipte control */
1599                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1600                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1601         }
1602         read_lock(&vcpu->kvm->arch.sca_lock);
1603         if (vcpu->kvm->arch.use_esca) {
1604                 struct esca_block *sca = vcpu->kvm->arch.sca;
1605
1606                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1607                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1608                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1609                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1610                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1611         } else {
1612                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1613
1614                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1615                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1616                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1617                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1618         }
1619         read_unlock(&vcpu->kvm->arch.sca_lock);
1620 }
1621
1622 /* Basic SCA to Extended SCA data copy routines */
1623 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1624 {
1625         d->sda = s->sda;
1626         d->sigp_ctrl.c = s->sigp_ctrl.c;
1627         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1628 }
1629
1630 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1631 {
1632         int i;
1633
1634         d->ipte_control = s->ipte_control;
1635         d->mcn[0] = s->mcn;
1636         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1637                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1638 }
1639
1640 static int sca_switch_to_extended(struct kvm *kvm)
1641 {
1642         struct bsca_block *old_sca = kvm->arch.sca;
1643         struct esca_block *new_sca;
1644         struct kvm_vcpu *vcpu;
1645         unsigned int vcpu_idx;
1646         u32 scaol, scaoh;
1647
1648         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1649         if (!new_sca)
1650                 return -ENOMEM;
1651
1652         scaoh = (u32)((u64)(new_sca) >> 32);
1653         scaol = (u32)(u64)(new_sca) & ~0x3fU;
1654
1655         kvm_s390_vcpu_block_all(kvm);
1656         write_lock(&kvm->arch.sca_lock);
1657
1658         sca_copy_b_to_e(new_sca, old_sca);
1659
1660         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1661                 vcpu->arch.sie_block->scaoh = scaoh;
1662                 vcpu->arch.sie_block->scaol = scaol;
1663                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1664         }
1665         kvm->arch.sca = new_sca;
1666         kvm->arch.use_esca = 1;
1667
1668         write_unlock(&kvm->arch.sca_lock);
1669         kvm_s390_vcpu_unblock_all(kvm);
1670
1671         free_page((unsigned long)old_sca);
1672
1673         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1674                  old_sca, kvm->arch.sca);
1675         return 0;
1676 }
1677
1678 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1679 {
1680         int rc;
1681
1682         if (!kvm_s390_use_sca_entries()) {
1683                 if (id < KVM_MAX_VCPUS)
1684                         return true;
1685                 return false;
1686         }
1687         if (id < KVM_S390_BSCA_CPU_SLOTS)
1688                 return true;
1689         if (!sclp.has_esca || !sclp.has_64bscao)
1690                 return false;
1691
1692         mutex_lock(&kvm->lock);
1693         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1694         mutex_unlock(&kvm->lock);
1695
1696         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1697 }
1698
1699 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1700 {
1701         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1702         kvm_clear_async_pf_completion_queue(vcpu);
1703         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1704                                     KVM_SYNC_GPRS |
1705                                     KVM_SYNC_ACRS |
1706                                     KVM_SYNC_CRS |
1707                                     KVM_SYNC_ARCH0 |
1708                                     KVM_SYNC_PFAULT;
1709         kvm_s390_set_prefix(vcpu, 0);
1710         if (test_kvm_facility(vcpu->kvm, 64))
1711                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1712         /* fprs can be synchronized via vrs, even if the guest has no vx. With
1713          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1714          */
1715         if (MACHINE_HAS_VX)
1716                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1717         else
1718                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1719
1720         if (kvm_is_ucontrol(vcpu->kvm))
1721                 return __kvm_ucontrol_vcpu_init(vcpu);
1722
1723         return 0;
1724 }
1725
1726 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1727 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1728 {
1729         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1730         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1731         vcpu->arch.cputm_start = get_tod_clock_fast();
1732         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1733 }
1734
1735 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1736 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1737 {
1738         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1739         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1740         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1741         vcpu->arch.cputm_start = 0;
1742         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1743 }
1744
1745 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1746 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1747 {
1748         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1749         vcpu->arch.cputm_enabled = true;
1750         __start_cpu_timer_accounting(vcpu);
1751 }
1752
1753 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1754 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1755 {
1756         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1757         __stop_cpu_timer_accounting(vcpu);
1758         vcpu->arch.cputm_enabled = false;
1759 }
1760
1761 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1762 {
1763         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1764         __enable_cpu_timer_accounting(vcpu);
1765         preempt_enable();
1766 }
1767
1768 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1769 {
1770         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1771         __disable_cpu_timer_accounting(vcpu);
1772         preempt_enable();
1773 }
1774
1775 /* set the cpu timer - may only be called from the VCPU thread itself */
1776 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1777 {
1778         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1779         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1780         if (vcpu->arch.cputm_enabled)
1781                 vcpu->arch.cputm_start = get_tod_clock_fast();
1782         vcpu->arch.sie_block->cputm = cputm;
1783         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1784         preempt_enable();
1785 }
1786
1787 /* update and get the cpu timer - can also be called from other VCPU threads */
1788 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1789 {
1790         unsigned int seq;
1791         __u64 value;
1792
1793         if (unlikely(!vcpu->arch.cputm_enabled))
1794                 return vcpu->arch.sie_block->cputm;
1795
1796         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1797         do {
1798                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1799                 /*
1800                  * If the writer would ever execute a read in the critical
1801                  * section, e.g. in irq context, we have a deadlock.
1802                  */
1803                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1804                 value = vcpu->arch.sie_block->cputm;
1805                 /* if cputm_start is 0, accounting is being started/stopped */
1806                 if (likely(vcpu->arch.cputm_start))
1807                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1808         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1809         preempt_enable();
1810         return value;
1811 }
1812
1813 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1814 {
1815
1816         gmap_enable(vcpu->arch.enabled_gmap);
1817         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1818         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1819                 __start_cpu_timer_accounting(vcpu);
1820         vcpu->cpu = cpu;
1821 }
1822
1823 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1824 {
1825         vcpu->cpu = -1;
1826         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1827                 __stop_cpu_timer_accounting(vcpu);
1828         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1829         vcpu->arch.enabled_gmap = gmap_get_enabled();
1830         gmap_disable(vcpu->arch.enabled_gmap);
1831
1832 }
1833
1834 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1835 {
1836         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1837         vcpu->arch.sie_block->gpsw.mask = 0UL;
1838         vcpu->arch.sie_block->gpsw.addr = 0UL;
1839         kvm_s390_set_prefix(vcpu, 0);
1840         kvm_s390_set_cpu_timer(vcpu, 0);
1841         vcpu->arch.sie_block->ckc       = 0UL;
1842         vcpu->arch.sie_block->todpr     = 0;
1843         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1844         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1845         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1846         /* make sure the new fpc will be lazily loaded */
1847         save_fpu_regs();
1848         current->thread.fpu.fpc = 0;
1849         vcpu->arch.sie_block->gbea = 1;
1850         vcpu->arch.sie_block->pp = 0;
1851         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1852         kvm_clear_async_pf_completion_queue(vcpu);
1853         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1854                 kvm_s390_vcpu_stop(vcpu);
1855         kvm_s390_clear_local_irqs(vcpu);
1856 }
1857
1858 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1859 {
1860         mutex_lock(&vcpu->kvm->lock);
1861         preempt_disable();
1862         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1863         preempt_enable();
1864         mutex_unlock(&vcpu->kvm->lock);
1865         if (!kvm_is_ucontrol(vcpu->kvm)) {
1866                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1867                 sca_add_vcpu(vcpu);
1868         }
1869         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
1870                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1871         /* make vcpu_load load the right gmap on the first trigger */
1872         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1873 }
1874
1875 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1876 {
1877         if (!test_kvm_facility(vcpu->kvm, 76))
1878                 return;
1879
1880         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1881
1882         if (vcpu->kvm->arch.crypto.aes_kw)
1883                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1884         if (vcpu->kvm->arch.crypto.dea_kw)
1885                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1886
1887         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1888 }
1889
1890 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1891 {
1892         free_page(vcpu->arch.sie_block->cbrlo);
1893         vcpu->arch.sie_block->cbrlo = 0;
1894 }
1895
1896 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1897 {
1898         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1899         if (!vcpu->arch.sie_block->cbrlo)
1900                 return -ENOMEM;
1901
1902         vcpu->arch.sie_block->ecb2 |= 0x80;
1903         vcpu->arch.sie_block->ecb2 &= ~0x08;
1904         return 0;
1905 }
1906
1907 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1908 {
1909         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1910
1911         vcpu->arch.sie_block->ibc = model->ibc;
1912         if (test_kvm_facility(vcpu->kvm, 7))
1913                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1914 }
1915
1916 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1917 {
1918         int rc = 0;
1919
1920         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1921                                                     CPUSTAT_SM |
1922                                                     CPUSTAT_STOPPED);
1923
1924         if (test_kvm_facility(vcpu->kvm, 78))
1925                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1926         else if (test_kvm_facility(vcpu->kvm, 8))
1927                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1928
1929         kvm_s390_vcpu_setup_model(vcpu);
1930
1931         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1932         if (MACHINE_HAS_ESOP)
1933                 vcpu->arch.sie_block->ecb |= 0x02;
1934         if (test_kvm_facility(vcpu->kvm, 9))
1935                 vcpu->arch.sie_block->ecb |= 0x04;
1936         if (test_kvm_facility(vcpu->kvm, 73))
1937                 vcpu->arch.sie_block->ecb |= 0x10;
1938
1939         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1940                 vcpu->arch.sie_block->ecb2 |= 0x08;
1941         vcpu->arch.sie_block->eca = 0x1002000U;
1942         if (sclp.has_cei)
1943                 vcpu->arch.sie_block->eca |= 0x80000000U;
1944         if (sclp.has_ib)
1945                 vcpu->arch.sie_block->eca |= 0x40000000U;
1946         if (sclp.has_siif)
1947                 vcpu->arch.sie_block->eca |= 1;
1948         if (sclp.has_sigpif)
1949                 vcpu->arch.sie_block->eca |= 0x10000000U;
1950         if (test_kvm_facility(vcpu->kvm, 129)) {
1951                 vcpu->arch.sie_block->eca |= 0x00020000;
1952                 vcpu->arch.sie_block->ecd |= 0x20000000;
1953         }
1954         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1955         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1956
1957         if (vcpu->kvm->arch.use_cmma) {
1958                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1959                 if (rc)
1960                         return rc;
1961         }
1962         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1963         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1964
1965         kvm_s390_vcpu_crypto_setup(vcpu);
1966
1967         return rc;
1968 }
1969
1970 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1971                                       unsigned int id)
1972 {
1973         struct kvm_vcpu *vcpu;
1974         struct sie_page *sie_page;
1975         int rc = -EINVAL;
1976
1977         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1978                 goto out;
1979
1980         rc = -ENOMEM;
1981
1982         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1983         if (!vcpu)
1984                 goto out;
1985
1986         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1987         if (!sie_page)
1988                 goto out_free_cpu;
1989
1990         vcpu->arch.sie_block = &sie_page->sie_block;
1991         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1992
1993         /* the real guest size will always be smaller than msl */
1994         vcpu->arch.sie_block->mso = 0;
1995         vcpu->arch.sie_block->msl = sclp.hamax;
1996
1997         vcpu->arch.sie_block->icpua = id;
1998         spin_lock_init(&vcpu->arch.local_int.lock);
1999         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2000         vcpu->arch.local_int.wq = &vcpu->wq;
2001         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2002         seqcount_init(&vcpu->arch.cputm_seqcount);
2003
2004         rc = kvm_vcpu_init(vcpu, kvm, id);
2005         if (rc)
2006                 goto out_free_sie_block;
2007         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2008                  vcpu->arch.sie_block);
2009         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2010
2011         return vcpu;
2012 out_free_sie_block:
2013         free_page((unsigned long)(vcpu->arch.sie_block));
2014 out_free_cpu:
2015         kmem_cache_free(kvm_vcpu_cache, vcpu);
2016 out:
2017         return ERR_PTR(rc);
2018 }
2019
2020 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2021 {
2022         return kvm_s390_vcpu_has_irq(vcpu, 0);
2023 }
2024
2025 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2026 {
2027         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2028         exit_sie(vcpu);
2029 }
2030
2031 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2032 {
2033         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2034 }
2035
2036 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2037 {
2038         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2039         exit_sie(vcpu);
2040 }
2041
2042 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2043 {
2044         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2045 }
2046
2047 /*
2048  * Kick a guest cpu out of SIE and wait until SIE is not running.
2049  * If the CPU is not running (e.g. waiting as idle) the function will
2050  * return immediately. */
2051 void exit_sie(struct kvm_vcpu *vcpu)
2052 {
2053         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2054         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2055                 cpu_relax();
2056 }
2057
2058 /* Kick a guest cpu out of SIE to process a request synchronously */
2059 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2060 {
2061         kvm_make_request(req, vcpu);
2062         kvm_s390_vcpu_request(vcpu);
2063 }
2064
2065 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2066                               unsigned long end)
2067 {
2068         struct kvm *kvm = gmap->private;
2069         struct kvm_vcpu *vcpu;
2070         unsigned long prefix;
2071         int i;
2072
2073         if (gmap_is_shadow(gmap))
2074                 return;
2075         if (start >= 1UL << 31)
2076                 /* We are only interested in prefix pages */
2077                 return;
2078         kvm_for_each_vcpu(i, vcpu, kvm) {
2079                 /* match against both prefix pages */
2080                 prefix = kvm_s390_get_prefix(vcpu);
2081                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2082                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2083                                    start, end);
2084                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2085                 }
2086         }
2087 }
2088
2089 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2090 {
2091         /* kvm common code refers to this, but never calls it */
2092         BUG();
2093         return 0;
2094 }
2095
2096 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2097                                            struct kvm_one_reg *reg)
2098 {
2099         int r = -EINVAL;
2100
2101         switch (reg->id) {
2102         case KVM_REG_S390_TODPR:
2103                 r = put_user(vcpu->arch.sie_block->todpr,
2104                              (u32 __user *)reg->addr);
2105                 break;
2106         case KVM_REG_S390_EPOCHDIFF:
2107                 r = put_user(vcpu->arch.sie_block->epoch,
2108                              (u64 __user *)reg->addr);
2109                 break;
2110         case KVM_REG_S390_CPU_TIMER:
2111                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2112                              (u64 __user *)reg->addr);
2113                 break;
2114         case KVM_REG_S390_CLOCK_COMP:
2115                 r = put_user(vcpu->arch.sie_block->ckc,
2116                              (u64 __user *)reg->addr);
2117                 break;
2118         case KVM_REG_S390_PFTOKEN:
2119                 r = put_user(vcpu->arch.pfault_token,
2120                              (u64 __user *)reg->addr);
2121                 break;
2122         case KVM_REG_S390_PFCOMPARE:
2123                 r = put_user(vcpu->arch.pfault_compare,
2124                              (u64 __user *)reg->addr);
2125                 break;
2126         case KVM_REG_S390_PFSELECT:
2127                 r = put_user(vcpu->arch.pfault_select,
2128                              (u64 __user *)reg->addr);
2129                 break;
2130         case KVM_REG_S390_PP:
2131                 r = put_user(vcpu->arch.sie_block->pp,
2132                              (u64 __user *)reg->addr);
2133                 break;
2134         case KVM_REG_S390_GBEA:
2135                 r = put_user(vcpu->arch.sie_block->gbea,
2136                              (u64 __user *)reg->addr);
2137                 break;
2138         default:
2139                 break;
2140         }
2141
2142         return r;
2143 }
2144
2145 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2146                                            struct kvm_one_reg *reg)
2147 {
2148         int r = -EINVAL;
2149         __u64 val;
2150
2151         switch (reg->id) {
2152         case KVM_REG_S390_TODPR:
2153                 r = get_user(vcpu->arch.sie_block->todpr,
2154                              (u32 __user *)reg->addr);
2155                 break;
2156         case KVM_REG_S390_EPOCHDIFF:
2157                 r = get_user(vcpu->arch.sie_block->epoch,
2158                              (u64 __user *)reg->addr);
2159                 break;
2160         case KVM_REG_S390_CPU_TIMER:
2161                 r = get_user(val, (u64 __user *)reg->addr);
2162                 if (!r)
2163                         kvm_s390_set_cpu_timer(vcpu, val);
2164                 break;
2165         case KVM_REG_S390_CLOCK_COMP:
2166                 r = get_user(vcpu->arch.sie_block->ckc,
2167                              (u64 __user *)reg->addr);
2168                 break;
2169         case KVM_REG_S390_PFTOKEN:
2170                 r = get_user(vcpu->arch.pfault_token,
2171                              (u64 __user *)reg->addr);
2172                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2173                         kvm_clear_async_pf_completion_queue(vcpu);
2174                 break;
2175         case KVM_REG_S390_PFCOMPARE:
2176                 r = get_user(vcpu->arch.pfault_compare,
2177                              (u64 __user *)reg->addr);
2178                 break;
2179         case KVM_REG_S390_PFSELECT:
2180                 r = get_user(vcpu->arch.pfault_select,
2181                              (u64 __user *)reg->addr);
2182                 break;
2183         case KVM_REG_S390_PP:
2184                 r = get_user(vcpu->arch.sie_block->pp,
2185                              (u64 __user *)reg->addr);
2186                 break;
2187         case KVM_REG_S390_GBEA:
2188                 r = get_user(vcpu->arch.sie_block->gbea,
2189                              (u64 __user *)reg->addr);
2190                 break;
2191         default:
2192                 break;
2193         }
2194
2195         return r;
2196 }
2197
2198 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2199 {
2200         kvm_s390_vcpu_initial_reset(vcpu);
2201         return 0;
2202 }
2203
2204 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2205 {
2206         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2207         return 0;
2208 }
2209
2210 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2211 {
2212         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2213         return 0;
2214 }
2215
2216 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2217                                   struct kvm_sregs *sregs)
2218 {
2219         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2220         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2221         return 0;
2222 }
2223
2224 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2225                                   struct kvm_sregs *sregs)
2226 {
2227         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2228         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2229         return 0;
2230 }
2231
2232 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2233 {
2234         if (test_fp_ctl(fpu->fpc))
2235                 return -EINVAL;
2236         vcpu->run->s.regs.fpc = fpu->fpc;
2237         if (MACHINE_HAS_VX)
2238                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2239                                  (freg_t *) fpu->fprs);
2240         else
2241                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2242         return 0;
2243 }
2244
2245 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2246 {
2247         /* make sure we have the latest values */
2248         save_fpu_regs();
2249         if (MACHINE_HAS_VX)
2250                 convert_vx_to_fp((freg_t *) fpu->fprs,
2251                                  (__vector128 *) vcpu->run->s.regs.vrs);
2252         else
2253                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2254         fpu->fpc = vcpu->run->s.regs.fpc;
2255         return 0;
2256 }
2257
2258 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2259 {
2260         int rc = 0;
2261
2262         if (!is_vcpu_stopped(vcpu))
2263                 rc = -EBUSY;
2264         else {
2265                 vcpu->run->psw_mask = psw.mask;
2266                 vcpu->run->psw_addr = psw.addr;
2267         }
2268         return rc;
2269 }
2270
2271 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2272                                   struct kvm_translation *tr)
2273 {
2274         return -EINVAL; /* not implemented yet */
2275 }
2276
2277 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2278                               KVM_GUESTDBG_USE_HW_BP | \
2279                               KVM_GUESTDBG_ENABLE)
2280
2281 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2282                                         struct kvm_guest_debug *dbg)
2283 {
2284         int rc = 0;
2285
2286         vcpu->guest_debug = 0;
2287         kvm_s390_clear_bp_data(vcpu);
2288
2289         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2290                 return -EINVAL;
2291         if (!sclp.has_gpere)
2292                 return -EINVAL;
2293
2294         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2295                 vcpu->guest_debug = dbg->control;
2296                 /* enforce guest PER */
2297                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2298
2299                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2300                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2301         } else {
2302                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2303                 vcpu->arch.guestdbg.last_bp = 0;
2304         }
2305
2306         if (rc) {
2307                 vcpu->guest_debug = 0;
2308                 kvm_s390_clear_bp_data(vcpu);
2309                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2310         }
2311
2312         return rc;
2313 }
2314
2315 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2316                                     struct kvm_mp_state *mp_state)
2317 {
2318         /* CHECK_STOP and LOAD are not supported yet */
2319         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2320                                        KVM_MP_STATE_OPERATING;
2321 }
2322
2323 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2324                                     struct kvm_mp_state *mp_state)
2325 {
2326         int rc = 0;
2327
2328         /* user space knows about this interface - let it control the state */
2329         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2330
2331         switch (mp_state->mp_state) {
2332         case KVM_MP_STATE_STOPPED:
2333                 kvm_s390_vcpu_stop(vcpu);
2334                 break;
2335         case KVM_MP_STATE_OPERATING:
2336                 kvm_s390_vcpu_start(vcpu);
2337                 break;
2338         case KVM_MP_STATE_LOAD:
2339         case KVM_MP_STATE_CHECK_STOP:
2340                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2341         default:
2342                 rc = -ENXIO;
2343         }
2344
2345         return rc;
2346 }
2347
2348 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2349 {
2350         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2351 }
2352
2353 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2354 {
2355 retry:
2356         kvm_s390_vcpu_request_handled(vcpu);
2357         if (!vcpu->requests)
2358                 return 0;
2359         /*
2360          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2361          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2362          * This ensures that the ipte instruction for this request has
2363          * already finished. We might race against a second unmapper that
2364          * wants to set the blocking bit. Lets just retry the request loop.
2365          */
2366         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2367                 int rc;
2368                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2369                                           kvm_s390_get_prefix(vcpu),
2370                                           PAGE_SIZE * 2, PROT_WRITE);
2371                 if (rc) {
2372                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2373                         return rc;
2374                 }
2375                 goto retry;
2376         }
2377
2378         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2379                 vcpu->arch.sie_block->ihcpu = 0xffff;
2380                 goto retry;
2381         }
2382
2383         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2384                 if (!ibs_enabled(vcpu)) {
2385                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2386                         atomic_or(CPUSTAT_IBS,
2387                                         &vcpu->arch.sie_block->cpuflags);
2388                 }
2389                 goto retry;
2390         }
2391
2392         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2393                 if (ibs_enabled(vcpu)) {
2394                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2395                         atomic_andnot(CPUSTAT_IBS,
2396                                           &vcpu->arch.sie_block->cpuflags);
2397                 }
2398                 goto retry;
2399         }
2400
2401         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2402                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2403                 goto retry;
2404         }
2405
2406         /* nothing to do, just clear the request */
2407         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2408
2409         return 0;
2410 }
2411
2412 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2413 {
2414         struct kvm_vcpu *vcpu;
2415         int i;
2416
2417         mutex_lock(&kvm->lock);
2418         preempt_disable();
2419         kvm->arch.epoch = tod - get_tod_clock();
2420         kvm_s390_vcpu_block_all(kvm);
2421         kvm_for_each_vcpu(i, vcpu, kvm)
2422                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2423         kvm_s390_vcpu_unblock_all(kvm);
2424         preempt_enable();
2425         mutex_unlock(&kvm->lock);
2426 }
2427
2428 /**
2429  * kvm_arch_fault_in_page - fault-in guest page if necessary
2430  * @vcpu: The corresponding virtual cpu
2431  * @gpa: Guest physical address
2432  * @writable: Whether the page should be writable or not
2433  *
2434  * Make sure that a guest page has been faulted-in on the host.
2435  *
2436  * Return: Zero on success, negative error code otherwise.
2437  */
2438 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2439 {
2440         return gmap_fault(vcpu->arch.gmap, gpa,
2441                           writable ? FAULT_FLAG_WRITE : 0);
2442 }
2443
2444 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2445                                       unsigned long token)
2446 {
2447         struct kvm_s390_interrupt inti;
2448         struct kvm_s390_irq irq;
2449
2450         if (start_token) {
2451                 irq.u.ext.ext_params2 = token;
2452                 irq.type = KVM_S390_INT_PFAULT_INIT;
2453                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2454         } else {
2455                 inti.type = KVM_S390_INT_PFAULT_DONE;
2456                 inti.parm64 = token;
2457                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2458         }
2459 }
2460
2461 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2462                                      struct kvm_async_pf *work)
2463 {
2464         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2465         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2466 }
2467
2468 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2469                                  struct kvm_async_pf *work)
2470 {
2471         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2472         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2473 }
2474
2475 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2476                                struct kvm_async_pf *work)
2477 {
2478         /* s390 will always inject the page directly */
2479 }
2480
2481 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2482 {
2483         /*
2484          * s390 will always inject the page directly,
2485          * but we still want check_async_completion to cleanup
2486          */
2487         return true;
2488 }
2489
2490 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2491 {
2492         hva_t hva;
2493         struct kvm_arch_async_pf arch;
2494         int rc;
2495
2496         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2497                 return 0;
2498         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2499             vcpu->arch.pfault_compare)
2500                 return 0;
2501         if (psw_extint_disabled(vcpu))
2502                 return 0;
2503         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2504                 return 0;
2505         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2506                 return 0;
2507         if (!vcpu->arch.gmap->pfault_enabled)
2508                 return 0;
2509
2510         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2511         hva += current->thread.gmap_addr & ~PAGE_MASK;
2512         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2513                 return 0;
2514
2515         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2516         return rc;
2517 }
2518
2519 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2520 {
2521         int rc, cpuflags;
2522
2523         /*
2524          * On s390 notifications for arriving pages will be delivered directly
2525          * to the guest but the house keeping for completed pfaults is
2526          * handled outside the worker.
2527          */
2528         kvm_check_async_pf_completion(vcpu);
2529
2530         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2531         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2532
2533         if (need_resched())
2534                 schedule();
2535
2536         if (test_cpu_flag(CIF_MCCK_PENDING))
2537                 s390_handle_mcck();
2538
2539         if (!kvm_is_ucontrol(vcpu->kvm)) {
2540                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2541                 if (rc)
2542                         return rc;
2543         }
2544
2545         rc = kvm_s390_handle_requests(vcpu);
2546         if (rc)
2547                 return rc;
2548
2549         if (guestdbg_enabled(vcpu)) {
2550                 kvm_s390_backup_guest_per_regs(vcpu);
2551                 kvm_s390_patch_guest_per_regs(vcpu);
2552         }
2553
2554         vcpu->arch.sie_block->icptcode = 0;
2555         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2556         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2557         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2558
2559         return 0;
2560 }
2561
2562 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2563 {
2564         struct kvm_s390_pgm_info pgm_info = {
2565                 .code = PGM_ADDRESSING,
2566         };
2567         u8 opcode, ilen;
2568         int rc;
2569
2570         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2571         trace_kvm_s390_sie_fault(vcpu);
2572
2573         /*
2574          * We want to inject an addressing exception, which is defined as a
2575          * suppressing or terminating exception. However, since we came here
2576          * by a DAT access exception, the PSW still points to the faulting
2577          * instruction since DAT exceptions are nullifying. So we've got
2578          * to look up the current opcode to get the length of the instruction
2579          * to be able to forward the PSW.
2580          */
2581         rc = read_guest_instr(vcpu, &opcode, 1);
2582         ilen = insn_length(opcode);
2583         if (rc < 0) {
2584                 return rc;
2585         } else if (rc) {
2586                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2587                  * Forward by arbitrary ilc, injection will take care of
2588                  * nullification if necessary.
2589                  */
2590                 pgm_info = vcpu->arch.pgm;
2591                 ilen = 4;
2592         }
2593         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2594         kvm_s390_forward_psw(vcpu, ilen);
2595         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2596 }
2597
2598 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2599 {
2600         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2601                    vcpu->arch.sie_block->icptcode);
2602         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2603
2604         if (guestdbg_enabled(vcpu))
2605                 kvm_s390_restore_guest_per_regs(vcpu);
2606
2607         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2608         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2609
2610         if (vcpu->arch.sie_block->icptcode > 0) {
2611                 int rc = kvm_handle_sie_intercept(vcpu);
2612
2613                 if (rc != -EOPNOTSUPP)
2614                         return rc;
2615                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2616                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2617                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2618                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2619                 return -EREMOTE;
2620         } else if (exit_reason != -EFAULT) {
2621                 vcpu->stat.exit_null++;
2622                 return 0;
2623         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2624                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2625                 vcpu->run->s390_ucontrol.trans_exc_code =
2626                                                 current->thread.gmap_addr;
2627                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2628                 return -EREMOTE;
2629         } else if (current->thread.gmap_pfault) {
2630                 trace_kvm_s390_major_guest_pfault(vcpu);
2631                 current->thread.gmap_pfault = 0;
2632                 if (kvm_arch_setup_async_pf(vcpu))
2633                         return 0;
2634                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2635         }
2636         return vcpu_post_run_fault_in_sie(vcpu);
2637 }
2638
2639 static int __vcpu_run(struct kvm_vcpu *vcpu)
2640 {
2641         int rc, exit_reason;
2642
2643         /*
2644          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2645          * ning the guest), so that memslots (and other stuff) are protected
2646          */
2647         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2648
2649         do {
2650                 rc = vcpu_pre_run(vcpu);
2651                 if (rc)
2652                         break;
2653
2654                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2655                 /*
2656                  * As PF_VCPU will be used in fault handler, between
2657                  * guest_enter and guest_exit should be no uaccess.
2658                  */
2659                 local_irq_disable();
2660                 guest_enter_irqoff();
2661                 __disable_cpu_timer_accounting(vcpu);
2662                 local_irq_enable();
2663                 exit_reason = sie64a(vcpu->arch.sie_block,
2664                                      vcpu->run->s.regs.gprs);
2665                 local_irq_disable();
2666                 __enable_cpu_timer_accounting(vcpu);
2667                 guest_exit_irqoff();
2668                 local_irq_enable();
2669                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2670
2671                 rc = vcpu_post_run(vcpu, exit_reason);
2672         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2673
2674         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2675         return rc;
2676 }
2677
2678 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2679 {
2680         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2681         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2682         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2683                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2684         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2685                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2686                 /* some control register changes require a tlb flush */
2687                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2688         }
2689         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2690                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2691                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2692                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2693                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2694                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2695         }
2696         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2697                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2698                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2699                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2700                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2701                         kvm_clear_async_pf_completion_queue(vcpu);
2702         }
2703         /*
2704          * If userspace sets the riccb (e.g. after migration) to a valid state,
2705          * we should enable RI here instead of doing the lazy enablement.
2706          */
2707         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
2708             test_kvm_facility(vcpu->kvm, 64)) {
2709                 struct runtime_instr_cb *riccb =
2710                         (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
2711
2712                 if (riccb->valid)
2713                         vcpu->arch.sie_block->ecb3 |= 0x01;
2714         }
2715         save_access_regs(vcpu->arch.host_acrs);
2716         restore_access_regs(vcpu->run->s.regs.acrs);
2717         /* save host (userspace) fprs/vrs */
2718         save_fpu_regs();
2719         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
2720         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
2721         if (MACHINE_HAS_VX)
2722                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
2723         else
2724                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
2725         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
2726         if (test_fp_ctl(current->thread.fpu.fpc))
2727                 /* User space provided an invalid FPC, let's clear it */
2728                 current->thread.fpu.fpc = 0;
2729
2730         kvm_run->kvm_dirty_regs = 0;
2731 }
2732
2733 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2734 {
2735         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2736         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2737         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2738         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2739         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2740         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2741         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2742         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2743         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2744         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2745         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2746         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2747         save_access_regs(vcpu->run->s.regs.acrs);
2748         restore_access_regs(vcpu->arch.host_acrs);
2749         /* Save guest register state */
2750         save_fpu_regs();
2751         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2752         /* Restore will be done lazily at return */
2753         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
2754         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
2755
2756 }
2757
2758 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2759 {
2760         int rc;
2761         sigset_t sigsaved;
2762
2763         if (guestdbg_exit_pending(vcpu)) {
2764                 kvm_s390_prepare_debug_exit(vcpu);
2765                 return 0;
2766         }
2767
2768         if (vcpu->sigset_active)
2769                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2770
2771         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2772                 kvm_s390_vcpu_start(vcpu);
2773         } else if (is_vcpu_stopped(vcpu)) {
2774                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2775                                    vcpu->vcpu_id);
2776                 return -EINVAL;
2777         }
2778
2779         sync_regs(vcpu, kvm_run);
2780         enable_cpu_timer_accounting(vcpu);
2781
2782         might_fault();
2783         rc = __vcpu_run(vcpu);
2784
2785         if (signal_pending(current) && !rc) {
2786                 kvm_run->exit_reason = KVM_EXIT_INTR;
2787                 rc = -EINTR;
2788         }
2789
2790         if (guestdbg_exit_pending(vcpu) && !rc)  {
2791                 kvm_s390_prepare_debug_exit(vcpu);
2792                 rc = 0;
2793         }
2794
2795         if (rc == -EREMOTE) {
2796                 /* userspace support is needed, kvm_run has been prepared */
2797                 rc = 0;
2798         }
2799
2800         disable_cpu_timer_accounting(vcpu);
2801         store_regs(vcpu, kvm_run);
2802
2803         if (vcpu->sigset_active)
2804                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2805
2806         vcpu->stat.exit_userspace++;
2807         return rc;
2808 }
2809
2810 /*
2811  * store status at address
2812  * we use have two special cases:
2813  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2814  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2815  */
2816 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2817 {
2818         unsigned char archmode = 1;
2819         freg_t fprs[NUM_FPRS];
2820         unsigned int px;
2821         u64 clkcomp, cputm;
2822         int rc;
2823
2824         px = kvm_s390_get_prefix(vcpu);
2825         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2826                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2827                         return -EFAULT;
2828                 gpa = 0;
2829         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2830                 if (write_guest_real(vcpu, 163, &archmode, 1))
2831                         return -EFAULT;
2832                 gpa = px;
2833         } else
2834                 gpa -= __LC_FPREGS_SAVE_AREA;
2835
2836         /* manually convert vector registers if necessary */
2837         if (MACHINE_HAS_VX) {
2838                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2839                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2840                                      fprs, 128);
2841         } else {
2842                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2843                                      vcpu->run->s.regs.fprs, 128);
2844         }
2845         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2846                               vcpu->run->s.regs.gprs, 128);
2847         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2848                               &vcpu->arch.sie_block->gpsw, 16);
2849         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2850                               &px, 4);
2851         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2852                               &vcpu->run->s.regs.fpc, 4);
2853         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2854                               &vcpu->arch.sie_block->todpr, 4);
2855         cputm = kvm_s390_get_cpu_timer(vcpu);
2856         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2857                               &cputm, 8);
2858         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2859         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2860                               &clkcomp, 8);
2861         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2862                               &vcpu->run->s.regs.acrs, 64);
2863         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2864                               &vcpu->arch.sie_block->gcr, 128);
2865         return rc ? -EFAULT : 0;
2866 }
2867
2868 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2869 {
2870         /*
2871          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2872          * switch in the run ioctl. Let's update our copies before we save
2873          * it into the save area
2874          */
2875         save_fpu_regs();
2876         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2877         save_access_regs(vcpu->run->s.regs.acrs);
2878
2879         return kvm_s390_store_status_unloaded(vcpu, addr);
2880 }
2881
2882 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2883 {
2884         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2885         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2886 }
2887
2888 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2889 {
2890         unsigned int i;
2891         struct kvm_vcpu *vcpu;
2892
2893         kvm_for_each_vcpu(i, vcpu, kvm) {
2894                 __disable_ibs_on_vcpu(vcpu);
2895         }
2896 }
2897
2898 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2899 {
2900         if (!sclp.has_ibs)
2901                 return;
2902         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2903         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2904 }
2905
2906 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2907 {
2908         int i, online_vcpus, started_vcpus = 0;
2909
2910         if (!is_vcpu_stopped(vcpu))
2911                 return;
2912
2913         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2914         /* Only one cpu at a time may enter/leave the STOPPED state. */
2915         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2916         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2917
2918         for (i = 0; i < online_vcpus; i++) {
2919                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2920                         started_vcpus++;
2921         }
2922
2923         if (started_vcpus == 0) {
2924                 /* we're the only active VCPU -> speed it up */
2925                 __enable_ibs_on_vcpu(vcpu);
2926         } else if (started_vcpus == 1) {
2927                 /*
2928                  * As we are starting a second VCPU, we have to disable
2929                  * the IBS facility on all VCPUs to remove potentially
2930                  * oustanding ENABLE requests.
2931                  */
2932                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2933         }
2934
2935         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2936         /*
2937          * Another VCPU might have used IBS while we were offline.
2938          * Let's play safe and flush the VCPU at startup.
2939          */
2940         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2941         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2942         return;
2943 }
2944
2945 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2946 {
2947         int i, online_vcpus, started_vcpus = 0;
2948         struct kvm_vcpu *started_vcpu = NULL;
2949
2950         if (is_vcpu_stopped(vcpu))
2951                 return;
2952
2953         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2954         /* Only one cpu at a time may enter/leave the STOPPED state. */
2955         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2956         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2957
2958         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2959         kvm_s390_clear_stop_irq(vcpu);
2960
2961         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2962         __disable_ibs_on_vcpu(vcpu);
2963
2964         for (i = 0; i < online_vcpus; i++) {
2965                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2966                         started_vcpus++;
2967                         started_vcpu = vcpu->kvm->vcpus[i];
2968                 }
2969         }
2970
2971         if (started_vcpus == 1) {
2972                 /*
2973                  * As we only have one VCPU left, we want to enable the
2974                  * IBS facility for that VCPU to speed it up.
2975                  */
2976                 __enable_ibs_on_vcpu(started_vcpu);
2977         }
2978
2979         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2980         return;
2981 }
2982
2983 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2984                                      struct kvm_enable_cap *cap)
2985 {
2986         int r;
2987
2988         if (cap->flags)
2989                 return -EINVAL;
2990
2991         switch (cap->cap) {
2992         case KVM_CAP_S390_CSS_SUPPORT:
2993                 if (!vcpu->kvm->arch.css_support) {
2994                         vcpu->kvm->arch.css_support = 1;
2995                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2996                         trace_kvm_s390_enable_css(vcpu->kvm);
2997                 }
2998                 r = 0;
2999                 break;
3000         default:
3001                 r = -EINVAL;
3002                 break;
3003         }
3004         return r;
3005 }
3006
3007 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3008                                   struct kvm_s390_mem_op *mop)
3009 {
3010         void __user *uaddr = (void __user *)mop->buf;
3011         void *tmpbuf = NULL;
3012         int r, srcu_idx;
3013         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3014                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3015
3016         if (mop->flags & ~supported_flags)
3017                 return -EINVAL;
3018
3019         if (mop->size > MEM_OP_MAX_SIZE)
3020                 return -E2BIG;
3021
3022         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3023                 tmpbuf = vmalloc(mop->size);
3024                 if (!tmpbuf)
3025                         return -ENOMEM;
3026         }
3027
3028         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3029
3030         switch (mop->op) {
3031         case KVM_S390_MEMOP_LOGICAL_READ:
3032                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3033                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3034                                             mop->size, GACC_FETCH);
3035                         break;
3036                 }
3037                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3038                 if (r == 0) {
3039                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3040                                 r = -EFAULT;
3041                 }
3042                 break;
3043         case KVM_S390_MEMOP_LOGICAL_WRITE:
3044                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3045                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3046                                             mop->size, GACC_STORE);
3047                         break;
3048                 }
3049                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3050                         r = -EFAULT;
3051                         break;
3052                 }
3053                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3054                 break;
3055         default:
3056                 r = -EINVAL;
3057         }
3058
3059         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3060
3061         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3062                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3063
3064         vfree(tmpbuf);
3065         return r;
3066 }
3067
3068 long kvm_arch_vcpu_ioctl(struct file *filp,
3069                          unsigned int ioctl, unsigned long arg)
3070 {
3071         struct kvm_vcpu *vcpu = filp->private_data;
3072         void __user *argp = (void __user *)arg;
3073         int idx;
3074         long r;
3075
3076         switch (ioctl) {
3077         case KVM_S390_IRQ: {
3078                 struct kvm_s390_irq s390irq;
3079
3080                 r = -EFAULT;
3081                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3082                         break;
3083                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3084                 break;
3085         }
3086         case KVM_S390_INTERRUPT: {
3087                 struct kvm_s390_interrupt s390int;
3088                 struct kvm_s390_irq s390irq;
3089
3090                 r = -EFAULT;
3091                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3092                         break;
3093                 if (s390int_to_s390irq(&s390int, &s390irq))
3094                         return -EINVAL;
3095                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3096                 break;
3097         }
3098         case KVM_S390_STORE_STATUS:
3099                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3100                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3101                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3102                 break;
3103         case KVM_S390_SET_INITIAL_PSW: {
3104                 psw_t psw;
3105
3106                 r = -EFAULT;
3107                 if (copy_from_user(&psw, argp, sizeof(psw)))
3108                         break;
3109                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3110                 break;
3111         }
3112         case KVM_S390_INITIAL_RESET:
3113                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3114                 break;
3115         case KVM_SET_ONE_REG:
3116         case KVM_GET_ONE_REG: {
3117                 struct kvm_one_reg reg;
3118                 r = -EFAULT;
3119                 if (copy_from_user(&reg, argp, sizeof(reg)))
3120                         break;
3121                 if (ioctl == KVM_SET_ONE_REG)
3122                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3123                 else
3124                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3125                 break;
3126         }
3127 #ifdef CONFIG_KVM_S390_UCONTROL
3128         case KVM_S390_UCAS_MAP: {
3129                 struct kvm_s390_ucas_mapping ucasmap;
3130
3131                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3132                         r = -EFAULT;
3133                         break;
3134                 }
3135
3136                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3137                         r = -EINVAL;
3138                         break;
3139                 }
3140
3141                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3142                                      ucasmap.vcpu_addr, ucasmap.length);
3143                 break;
3144         }
3145         case KVM_S390_UCAS_UNMAP: {
3146                 struct kvm_s390_ucas_mapping ucasmap;
3147
3148                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3149                         r = -EFAULT;
3150                         break;
3151                 }
3152
3153                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3154                         r = -EINVAL;
3155                         break;
3156                 }
3157
3158                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3159                         ucasmap.length);
3160                 break;
3161         }
3162 #endif
3163         case KVM_S390_VCPU_FAULT: {
3164                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3165                 break;
3166         }
3167         case KVM_ENABLE_CAP:
3168         {
3169                 struct kvm_enable_cap cap;
3170                 r = -EFAULT;
3171                 if (copy_from_user(&cap, argp, sizeof(cap)))
3172                         break;
3173                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3174                 break;
3175         }
3176         case KVM_S390_MEM_OP: {
3177                 struct kvm_s390_mem_op mem_op;
3178
3179                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3180                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3181                 else
3182                         r = -EFAULT;
3183                 break;
3184         }
3185         case KVM_S390_SET_IRQ_STATE: {
3186                 struct kvm_s390_irq_state irq_state;
3187
3188                 r = -EFAULT;
3189                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3190                         break;
3191                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3192                     irq_state.len == 0 ||
3193                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3194                         r = -EINVAL;
3195                         break;
3196                 }
3197                 r = kvm_s390_set_irq_state(vcpu,
3198                                            (void __user *) irq_state.buf,
3199                                            irq_state.len);
3200                 break;
3201         }
3202         case KVM_S390_GET_IRQ_STATE: {
3203                 struct kvm_s390_irq_state irq_state;
3204
3205                 r = -EFAULT;
3206                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3207                         break;
3208                 if (irq_state.len == 0) {
3209                         r = -EINVAL;
3210                         break;
3211                 }
3212                 r = kvm_s390_get_irq_state(vcpu,
3213                                            (__u8 __user *)  irq_state.buf,
3214                                            irq_state.len);
3215                 break;
3216         }
3217         default:
3218                 r = -ENOTTY;
3219         }
3220         return r;
3221 }
3222
3223 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3224 {
3225 #ifdef CONFIG_KVM_S390_UCONTROL
3226         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3227                  && (kvm_is_ucontrol(vcpu->kvm))) {
3228                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3229                 get_page(vmf->page);
3230                 return 0;
3231         }
3232 #endif
3233         return VM_FAULT_SIGBUS;
3234 }
3235
3236 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3237                             unsigned long npages)
3238 {
3239         return 0;
3240 }
3241
3242 /* Section: memory related */
3243 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3244                                    struct kvm_memory_slot *memslot,
3245                                    const struct kvm_userspace_memory_region *mem,
3246                                    enum kvm_mr_change change)
3247 {
3248         /* A few sanity checks. We can have memory slots which have to be
3249            located/ended at a segment boundary (1MB). The memory in userland is
3250            ok to be fragmented into various different vmas. It is okay to mmap()
3251            and munmap() stuff in this slot after doing this call at any time */
3252
3253         if (mem->userspace_addr & 0xffffful)
3254                 return -EINVAL;
3255
3256         if (mem->memory_size & 0xffffful)
3257                 return -EINVAL;
3258
3259         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3260                 return -EINVAL;
3261
3262         return 0;
3263 }
3264
3265 void kvm_arch_commit_memory_region(struct kvm *kvm,
3266                                 const struct kvm_userspace_memory_region *mem,
3267                                 const struct kvm_memory_slot *old,
3268                                 const struct kvm_memory_slot *new,
3269                                 enum kvm_mr_change change)
3270 {
3271         int rc;
3272
3273         /* If the basics of the memslot do not change, we do not want
3274          * to update the gmap. Every update causes several unnecessary
3275          * segment translation exceptions. This is usually handled just
3276          * fine by the normal fault handler + gmap, but it will also
3277          * cause faults on the prefix page of running guest CPUs.
3278          */
3279         if (old->userspace_addr == mem->userspace_addr &&
3280             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3281             old->npages * PAGE_SIZE == mem->memory_size)
3282                 return;
3283
3284         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3285                 mem->guest_phys_addr, mem->memory_size);
3286         if (rc)
3287                 pr_warn("failed to commit memory region\n");
3288         return;
3289 }
3290
3291 static inline unsigned long nonhyp_mask(int i)
3292 {
3293         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3294
3295         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3296 }
3297
3298 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3299 {
3300         vcpu->valid_wakeup = false;
3301 }
3302
3303 static int __init kvm_s390_init(void)
3304 {
3305         int i;
3306
3307         if (!sclp.has_sief2) {
3308                 pr_info("SIE not available\n");
3309                 return -ENODEV;
3310         }
3311
3312         for (i = 0; i < 16; i++)
3313                 kvm_s390_fac_list_mask[i] |=
3314                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3315
3316         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3317 }
3318
3319 static void __exit kvm_s390_exit(void)
3320 {
3321         kvm_exit();
3322 }
3323
3324 module_init(kvm_s390_init);
3325 module_exit(kvm_s390_exit);
3326
3327 /*
3328  * Enable autoloading of the kvm module.
3329  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3330  * since x86 takes a different approach.
3331  */
3332 #include <linux/miscdevice.h>
3333 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3334 MODULE_ALIAS("devname:kvm");