arch/s390/kvm/kvm-s390.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * hosting IBM Z kernel virtual machines (s390x)
   4  *
   5  * Copyright IBM Corp. 2008, 2018
   6  *
   7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8  *               Christian Borntraeger <borntraeger@de.ibm.com>
   9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  11  *               Jason J. Herne <jjherne@us.ibm.com>
  12  */
  13
  14 #include <linux/compiler.h>
  15 #include <linux/err.h>
  16 #include <linux/fs.h>
  17 #include <linux/hrtimer.h>
  18 #include <linux/init.h>
  19 #include <linux/kvm.h>
  20 #include <linux/kvm_host.h>
  21 #include <linux/mman.h>
  22 #include <linux/module.h>
  23 #include <linux/moduleparam.h>
  24 #include <linux/random.h>
  25 #include <linux/slab.h>
  26 #include <linux/timer.h>
  27 #include <linux/vmalloc.h>
  28 #include <linux/bitmap.h>
  29 #include <linux/sched/signal.h>
  30 #include <linux/string.h>
  31
  32 #include <asm/asm-offsets.h>
  33 #include <asm/lowcore.h>
  34 #include <asm/stp.h>
  35 #include <asm/pgtable.h>
  36 #include <asm/gmap.h>
  37 #include <asm/nmi.h>
  38 #include <asm/switch_to.h>
  39 #include <asm/isc.h>
  40 #include <asm/sclp.h>
  41 #include <asm/cpacf.h>
  42 #include <asm/timex.h>
  43 #include "kvm-s390.h"
  44 #include "gaccess.h"
  45
  46 #define KMSG_COMPONENT "kvm-s390"
  47 #undef pr_fmt
  48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  49
  50 #define CREATE_TRACE_POINTS
  51 #include "trace.h"
  52 #include "trace-s390.h"
  53
  54 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  55 #define LOCAL_IRQS 32
  56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  57                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  58
  59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  60
  61 struct kvm_stats_debugfs_item debugfs_entries[] = {
  62         { "userspace_handled", VCPU_STAT(exit_userspace) },
  63         { "exit_null", VCPU_STAT(exit_null) },
  64         { "exit_validity", VCPU_STAT(exit_validity) },
  65         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  66         { "exit_external_request", VCPU_STAT(exit_external_request) },
  67         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  68         { "exit_instruction", VCPU_STAT(exit_instruction) },
  69         { "exit_pei", VCPU_STAT(exit_pei) },
  70         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  71         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  72         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  73         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  74         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  75         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  76         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  77         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  78         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  79         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  80         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  81         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  82         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  83         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  84         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
  85         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  86         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  87         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  88         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
  89         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  90         { "instruction_epsw", VCPU_STAT(instruction_epsw) },
  91         { "instruction_gs", VCPU_STAT(instruction_gs) },
  92         { "instruction_io_other", VCPU_STAT(instruction_io_other) },
  93         { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
  94         { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
  95         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
  96         { "instruction_ptff", VCPU_STAT(instruction_ptff) },
  97         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
  98         { "instruction_sck", VCPU_STAT(instruction_sck) },
  99         { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
 100         { "instruction_spx", VCPU_STAT(instruction_spx) },
 101         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
 102         { "instruction_stap", VCPU_STAT(instruction_stap) },
 103         { "instruction_iske", VCPU_STAT(instruction_iske) },
 104         { "instruction_ri", VCPU_STAT(instruction_ri) },
 105         { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
 106         { "instruction_sske", VCPU_STAT(instruction_sske) },
 107         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
 108         { "instruction_essa", VCPU_STAT(instruction_essa) },
 109         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
 110         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 111         { "instruction_tb", VCPU_STAT(instruction_tb) },
 112         { "instruction_tpi", VCPU_STAT(instruction_tpi) },
 113         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 114         { "instruction_tsch", VCPU_STAT(instruction_tsch) },
 115         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 116         { "instruction_sie", VCPU_STAT(instruction_sie) },
 117         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 118         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 119         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 120         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 121         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 122         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 123         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 124         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 125         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 126         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 127         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 128         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 129         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 130         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 131         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 132         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 133         { "instruction_diag_10", VCPU_STAT(diagnose_10) },
 134         { "instruction_diag_44", VCPU_STAT(diagnose_44) },
 135         { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
 136         { "instruction_diag_258", VCPU_STAT(diagnose_258) },
 137         { "instruction_diag_308", VCPU_STAT(diagnose_308) },
 138         { "instruction_diag_500", VCPU_STAT(diagnose_500) },
 139         { "instruction_diag_other", VCPU_STAT(diagnose_other) },
 140         { NULL }
 141 };
 142
 143 struct kvm_s390_tod_clock_ext {
 144         __u8 epoch_idx;
 145         __u64 tod;
 146         __u8 reserved[7];
 147 } __packed;
 148
 149 /* allow nested virtualization in KVM (if enabled by user space) */
 150 static int nested;
 151 module_param(nested, int, S_IRUGO);
 152 MODULE_PARM_DESC(nested, "Nested virtualization support");
 153
 154 /* upper facilities limit for kvm */
 155 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
 156
 157 unsigned long kvm_s390_fac_list_mask_size(void)
 158 {
 159         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
 160         return ARRAY_SIZE(kvm_s390_fac_list_mask);
 161 }
 162
 163 /* available cpu features supported by kvm */
 164 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 165 /* available subfunctions indicated via query / "test bit" */
 166 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 167
 168 static struct gmap_notifier gmap_notifier;
 169 static struct gmap_notifier vsie_gmap_notifier;
 170 debug_info_t *kvm_s390_dbf;
 171
 172 /* Section: not file related */
 173 int kvm_arch_hardware_enable(void)
 174 {
 175         /* every s390 is virtualization enabled ;-) */
 176         return 0;
 177 }
 178
 179 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 180                               unsigned long end);
 181
 182 /*
 183  * This callback is executed during stop_machine(). All CPUs are therefore
 184  * temporarily stopped. In order not to change guest behavior, we have to
 185  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 186  * so a CPU won't be stopped while calculating with the epoch.
 187  */
 188 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 189                           void *v)
 190 {
 191         struct kvm *kvm;
 192         struct kvm_vcpu *vcpu;
 193         int i;
 194         unsigned long long *delta = v;
 195
 196         list_for_each_entry(kvm, &vm_list, vm_list) {
 197                 kvm->arch.epoch -= *delta;
 198                 kvm_for_each_vcpu(i, vcpu, kvm) {
 199                         vcpu->arch.sie_block->epoch -= *delta;
 200                         if (vcpu->arch.cputm_enabled)
 201                                 vcpu->arch.cputm_start += *delta;
 202                         if (vcpu->arch.vsie_block)
 203                                 vcpu->arch.vsie_block->epoch -= *delta;
 204                 }
 205         }
 206         return NOTIFY_OK;
 207 }
 208
 209 static struct notifier_block kvm_clock_notifier = {
 210         .notifier_call = kvm_clock_sync,
 211 };
 212
 213 int kvm_arch_hardware_setup(void)
 214 {
 215         gmap_notifier.notifier_call = kvm_gmap_notifier;
 216         gmap_register_pte_notifier(&gmap_notifier);
 217         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 218         gmap_register_pte_notifier(&vsie_gmap_notifier);
 219         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 220                                        &kvm_clock_notifier);
 221         return 0;
 222 }
 223
 224 void kvm_arch_hardware_unsetup(void)
 225 {
 226         gmap_unregister_pte_notifier(&gmap_notifier);
 227         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 228         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 229                                          &kvm_clock_notifier);
 230 }
 231
 232 static void allow_cpu_feat(unsigned long nr)
 233 {
 234         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 235 }
 236
 237 static inline int plo_test_bit(unsigned char nr)
 238 {
 239         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 240         int cc;
 241
 242         asm volatile(
 243                 /* Parameter registers are ignored for "test bit" */
 244                 "       plo     0,0,0,0(0)\n"
 245                 "       ipm     %0\n"
 246                 "       srl     %0,28\n"
 247                 : "=d" (cc)
 248                 : "d" (r0)
 249                 : "cc");
 250         return cc == 0;
 251 }
 252
 253 static void kvm_s390_cpu_feat_init(void)
 254 {
 255         int i;
 256
 257         for (i = 0; i < 256; ++i) {
 258                 if (plo_test_bit(i))
 259                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 260         }
 261
 262         if (test_facility(28)) /* TOD-clock steering */
 263                 ptff(kvm_s390_available_subfunc.ptff,
 264                      sizeof(kvm_s390_available_subfunc.ptff),
 265                      PTFF_QAF);
 266
 267         if (test_facility(17)) { /* MSA */
 268                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 269                               kvm_s390_available_subfunc.kmac);
 270                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 271                               kvm_s390_available_subfunc.kmc);
 272                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 273                               kvm_s390_available_subfunc.km);
 274                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 275                               kvm_s390_available_subfunc.kimd);
 276                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 277                               kvm_s390_available_subfunc.klmd);
 278         }
 279         if (test_facility(76)) /* MSA3 */
 280                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 281                               kvm_s390_available_subfunc.pckmo);
 282         if (test_facility(77)) { /* MSA4 */
 283                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 284                               kvm_s390_available_subfunc.kmctr);
 285                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 286                               kvm_s390_available_subfunc.kmf);
 287                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 288                               kvm_s390_available_subfunc.kmo);
 289                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 290                               kvm_s390_available_subfunc.pcc);
 291         }
 292         if (test_facility(57)) /* MSA5 */
 293                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 294                               kvm_s390_available_subfunc.ppno);
 295
 296         if (test_facility(146)) /* MSA8 */
 297                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 298                               kvm_s390_available_subfunc.kma);
 299
 300         if (MACHINE_HAS_ESOP)
 301                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 302         /*
 303          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 304          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 305          */
 306         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 307             !test_facility(3) || !nested)
 308                 return;
 309         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 310         if (sclp.has_64bscao)
 311                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 312         if (sclp.has_siif)
 313                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 314         if (sclp.has_gpere)
 315                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 316         if (sclp.has_gsls)
 317                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 318         if (sclp.has_ib)
 319                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 320         if (sclp.has_cei)
 321                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 322         if (sclp.has_ibs)
 323                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 324         if (sclp.has_kss)
 325                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 326         /*
 327          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 328          * all skey handling functions read/set the skey from the PGSTE
 329          * instead of the real storage key.
 330          *
 331          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 332          * pages being detected as preserved although they are resident.
 333          *
 334          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 335          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 336          *
 337          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 338          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 339          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 340          *
 341          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 342          * cannot easily shadow the SCA because of the ipte lock.
 343          */
 344 }
 345
 346 int kvm_arch_init(void *opaque)
 347 {
 348         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 349         if (!kvm_s390_dbf)
 350                 return -ENOMEM;
 351
 352         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 353                 debug_unregister(kvm_s390_dbf);
 354                 return -ENOMEM;
 355         }
 356
 357         kvm_s390_cpu_feat_init();
 358
 359         /* Register floating interrupt controller interface. */
 360         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 361 }
 362
 363 void kvm_arch_exit(void)
 364 {
 365         debug_unregister(kvm_s390_dbf);
 366 }
 367
 368 /* Section: device related */
 369 long kvm_arch_dev_ioctl(struct file *filp,
 370                         unsigned int ioctl, unsigned long arg)
 371 {
 372         if (ioctl == KVM_S390_ENABLE_SIE)
 373                 return s390_enable_sie();
 374         return -EINVAL;
 375 }
 376
 377 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 378 {
 379         int r;
 380
 381         switch (ext) {
 382         case KVM_CAP_S390_PSW:
 383         case KVM_CAP_S390_GMAP:
 384         case KVM_CAP_SYNC_MMU:
 385 #ifdef CONFIG_KVM_S390_UCONTROL
 386         case KVM_CAP_S390_UCONTROL:
 387 #endif
 388         case KVM_CAP_ASYNC_PF:
 389         case KVM_CAP_SYNC_REGS:
 390         case KVM_CAP_ONE_REG:
 391         case KVM_CAP_ENABLE_CAP:
 392         case KVM_CAP_S390_CSS_SUPPORT:
 393         case KVM_CAP_IOEVENTFD:
 394         case KVM_CAP_DEVICE_CTRL:
 395         case KVM_CAP_ENABLE_CAP_VM:
 396         case KVM_CAP_S390_IRQCHIP:
 397         case KVM_CAP_VM_ATTRIBUTES:
 398         case KVM_CAP_MP_STATE:
 399         case KVM_CAP_IMMEDIATE_EXIT:
 400         case KVM_CAP_S390_INJECT_IRQ:
 401         case KVM_CAP_S390_USER_SIGP:
 402         case KVM_CAP_S390_USER_STSI:
 403         case KVM_CAP_S390_SKEYS:
 404         case KVM_CAP_S390_IRQ_STATE:
 405         case KVM_CAP_S390_USER_INSTR0:
 406         case KVM_CAP_S390_CMMA_MIGRATION:
 407         case KVM_CAP_S390_AIS:
 408         case KVM_CAP_S390_AIS_MIGRATION:
 409                 r = 1;
 410                 break;
 411         case KVM_CAP_S390_MEM_OP:
 412                 r = MEM_OP_MAX_SIZE;
 413                 break;
 414         case KVM_CAP_NR_VCPUS:
 415         case KVM_CAP_MAX_VCPUS:
 416                 r = KVM_S390_BSCA_CPU_SLOTS;
 417                 if (!kvm_s390_use_sca_entries())
 418                         r = KVM_MAX_VCPUS;
 419                 else if (sclp.has_esca && sclp.has_64bscao)
 420                         r = KVM_S390_ESCA_CPU_SLOTS;
 421                 break;
 422         case KVM_CAP_NR_MEMSLOTS:
 423                 r = KVM_USER_MEM_SLOTS;
 424                 break;
 425         case KVM_CAP_S390_COW:
 426                 r = MACHINE_HAS_ESOP;
 427                 break;
 428         case KVM_CAP_S390_VECTOR_REGISTERS:
 429                 r = MACHINE_HAS_VX;
 430                 break;
 431         case KVM_CAP_S390_RI:
 432                 r = test_facility(64);
 433                 break;
 434         case KVM_CAP_S390_GS:
 435                 r = test_facility(133);
 436                 break;
 437         case KVM_CAP_S390_BPB:
 438                 r = test_facility(82);
 439                 break;
 440         default:
 441                 r = 0;
 442         }
 443         return r;
 444 }
 445
 446 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 447                                         struct kvm_memory_slot *memslot)
 448 {
 449         gfn_t cur_gfn, last_gfn;
 450         unsigned long address;
 451         struct gmap *gmap = kvm->arch.gmap;
 452
 453         /* Loop over all guest pages */
 454         last_gfn = memslot->base_gfn + memslot->npages;
 455         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 456                 address = gfn_to_hva_memslot(memslot, cur_gfn);
 457
 458                 if (test_and_clear_guest_dirty(gmap->mm, address))
 459                         mark_page_dirty(kvm, cur_gfn);
 460                 if (fatal_signal_pending(current))
 461                         return;
 462                 cond_resched();
 463         }
 464 }
 465
 466 /* Section: vm related */
 467 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 468
 469 /*
 470  * Get (and clear) the dirty memory log for a memory slot.
 471  */
 472 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 473                                struct kvm_dirty_log *log)
 474 {
 475         int r;
 476         unsigned long n;
 477         struct kvm_memslots *slots;
 478         struct kvm_memory_slot *memslot;
 479         int is_dirty = 0;
 480
 481         if (kvm_is_ucontrol(kvm))
 482                 return -EINVAL;
 483
 484         mutex_lock(&kvm->slots_lock);
 485
 486         r = -EINVAL;
 487         if (log->slot >= KVM_USER_MEM_SLOTS)
 488                 goto out;
 489
 490         slots = kvm_memslots(kvm);
 491         memslot = id_to_memslot(slots, log->slot);
 492         r = -ENOENT;
 493         if (!memslot->dirty_bitmap)
 494                 goto out;
 495
 496         kvm_s390_sync_dirty_log(kvm, memslot);
 497         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 498         if (r)
 499                 goto out;
 500
 501         /* Clear the dirty log */
 502         if (is_dirty) {
 503                 n = kvm_dirty_bitmap_bytes(memslot);
 504                 memset(memslot->dirty_bitmap, 0, n);
 505         }
 506         r = 0;
 507 out:
 508         mutex_unlock(&kvm->slots_lock);
 509         return r;
 510 }
 511
 512 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 513 {
 514         unsigned int i;
 515         struct kvm_vcpu *vcpu;
 516
 517         kvm_for_each_vcpu(i, vcpu, kvm) {
 518                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 519         }
 520 }
 521
 522 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 523 {
 524         int r;
 525
 526         if (cap->flags)
 527                 return -EINVAL;
 528
 529         switch (cap->cap) {
 530         case KVM_CAP_S390_IRQCHIP:
 531                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 532                 kvm->arch.use_irqchip = 1;
 533                 r = 0;
 534                 break;
 535         case KVM_CAP_S390_USER_SIGP:
 536                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 537                 kvm->arch.user_sigp = 1;
 538                 r = 0;
 539                 break;
 540         case KVM_CAP_S390_VECTOR_REGISTERS:
 541                 mutex_lock(&kvm->lock);
 542                 if (kvm->created_vcpus) {
 543                         r = -EBUSY;
 544                 } else if (MACHINE_HAS_VX) {
 545                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 546                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 547                         if (test_facility(134)) {
 548                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
 549                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
 550                         }
 551                         if (test_facility(135)) {
 552                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
 553                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
 554                         }
 555                         r = 0;
 556                 } else
 557                         r = -EINVAL;
 558                 mutex_unlock(&kvm->lock);
 559                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 560                          r ? "(not available)" : "(success)");
 561                 break;
 562         case KVM_CAP_S390_RI:
 563                 r = -EINVAL;
 564                 mutex_lock(&kvm->lock);
 565                 if (kvm->created_vcpus) {
 566                         r = -EBUSY;
 567                 } else if (test_facility(64)) {
 568                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 569                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 570                         r = 0;
 571                 }
 572                 mutex_unlock(&kvm->lock);
 573                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 574                          r ? "(not available)" : "(success)");
 575                 break;
 576         case KVM_CAP_S390_AIS:
 577                 mutex_lock(&kvm->lock);
 578                 if (kvm->created_vcpus) {
 579                         r = -EBUSY;
 580                 } else {
 581                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
 582                         set_kvm_facility(kvm->arch.model.fac_list, 72);
 583                         r = 0;
 584                 }
 585                 mutex_unlock(&kvm->lock);
 586                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 587                          r ? "(not available)" : "(success)");
 588                 break;
 589         case KVM_CAP_S390_GS:
 590                 r = -EINVAL;
 591                 mutex_lock(&kvm->lock);
 592                 if (kvm->created_vcpus) {
 593                         r = -EBUSY;
 594                 } else if (test_facility(133)) {
 595                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
 596                         set_kvm_facility(kvm->arch.model.fac_list, 133);
 597                         r = 0;
 598                 }
 599                 mutex_unlock(&kvm->lock);
 600                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 601                          r ? "(not available)" : "(success)");
 602                 break;
 603         case KVM_CAP_S390_USER_STSI:
 604                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 605                 kvm->arch.user_stsi = 1;
 606                 r = 0;
 607                 break;
 608         case KVM_CAP_S390_USER_INSTR0:
 609                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 610                 kvm->arch.user_instr0 = 1;
 611                 icpt_operexc_on_all_vcpus(kvm);
 612                 r = 0;
 613                 break;
 614         default:
 615                 r = -EINVAL;
 616                 break;
 617         }
 618         return r;
 619 }
 620
 621 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 622 {
 623         int ret;
 624
 625         switch (attr->attr) {
 626         case KVM_S390_VM_MEM_LIMIT_SIZE:
 627                 ret = 0;
 628                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 629                          kvm->arch.mem_limit);
 630                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 631                         ret = -EFAULT;
 632                 break;
 633         default:
 634                 ret = -ENXIO;
 635                 break;
 636         }
 637         return ret;
 638 }
 639
 640 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 641 {
 642         int ret;
 643         unsigned int idx;
 644         switch (attr->attr) {
 645         case KVM_S390_VM_MEM_ENABLE_CMMA:
 646                 ret = -ENXIO;
 647                 if (!sclp.has_cmma)
 648                         break;
 649
 650                 ret = -EBUSY;
 651                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 652                 mutex_lock(&kvm->lock);
 653                 if (!kvm->created_vcpus) {
 654                         kvm->arch.use_cmma = 1;
 655                         ret = 0;
 656                 }
 657                 mutex_unlock(&kvm->lock);
 658                 break;
 659         case KVM_S390_VM_MEM_CLR_CMMA:
 660                 ret = -ENXIO;
 661                 if (!sclp.has_cmma)
 662                         break;
 663                 ret = -EINVAL;
 664                 if (!kvm->arch.use_cmma)
 665                         break;
 666
 667                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 668                 mutex_lock(&kvm->lock);
 669                 idx = srcu_read_lock(&kvm->srcu);
 670                 s390_reset_cmma(kvm->arch.gmap->mm);
 671                 srcu_read_unlock(&kvm->srcu, idx);
 672                 mutex_unlock(&kvm->lock);
 673                 ret = 0;
 674                 break;
 675         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 676                 unsigned long new_limit;
 677
 678                 if (kvm_is_ucontrol(kvm))
 679                         return -EINVAL;
 680
 681                 if (get_user(new_limit, (u64 __user *)attr->addr))
 682                         return -EFAULT;
 683
 684                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 685                     new_limit > kvm->arch.mem_limit)
 686                         return -E2BIG;
 687
 688                 if (!new_limit)
 689                         return -EINVAL;
 690
 691                 /* gmap_create takes last usable address */
 692                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 693                         new_limit -= 1;
 694
 695                 ret = -EBUSY;
 696                 mutex_lock(&kvm->lock);
 697                 if (!kvm->created_vcpus) {
 698                         /* gmap_create will round the limit up */
 699                         struct gmap *new = gmap_create(current->mm, new_limit);
 700
 701                         if (!new) {
 702                                 ret = -ENOMEM;
 703                         } else {
 704                                 gmap_remove(kvm->arch.gmap);
 705                                 new->private = kvm;
 706                                 kvm->arch.gmap = new;
 707                                 ret = 0;
 708                         }
 709                 }
 710                 mutex_unlock(&kvm->lock);
 711                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 712                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 713                          (void *) kvm->arch.gmap->asce);
 714                 break;
 715         }
 716         default:
 717                 ret = -ENXIO;
 718                 break;
 719         }
 720         return ret;
 721 }
 722
 723 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 724
 725 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 726 {
 727         struct kvm_vcpu *vcpu;
 728         int i;
 729
 730         if (!test_kvm_facility(kvm, 76))
 731                 return -EINVAL;
 732
 733         mutex_lock(&kvm->lock);
 734         switch (attr->attr) {
 735         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 736                 get_random_bytes(
 737                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 738                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 739                 kvm->arch.crypto.aes_kw = 1;
 740                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 741                 break;
 742         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 743                 get_random_bytes(
 744                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 745                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 746                 kvm->arch.crypto.dea_kw = 1;
 747                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 748                 break;
 749         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 750                 kvm->arch.crypto.aes_kw = 0;
 751                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 752                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 753                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 754                 break;
 755         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 756                 kvm->arch.crypto.dea_kw = 0;
 757                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 758                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 759                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 760                 break;
 761         default:
 762                 mutex_unlock(&kvm->lock);
 763                 return -ENXIO;
 764         }
 765
 766         kvm_for_each_vcpu(i, vcpu, kvm) {
 767                 kvm_s390_vcpu_crypto_setup(vcpu);
 768                 exit_sie(vcpu);
 769         }
 770         mutex_unlock(&kvm->lock);
 771         return 0;
 772 }
 773
 774 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
 775 {
 776         int cx;
 777         struct kvm_vcpu *vcpu;
 778
 779         kvm_for_each_vcpu(cx, vcpu, kvm)
 780                 kvm_s390_sync_request(req, vcpu);
 781 }
 782
 783 /*
 784  * Must be called with kvm->srcu held to avoid races on memslots, and with
 785  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
 786  */
 787 static int kvm_s390_vm_start_migration(struct kvm *kvm)
 788 {
 789         struct kvm_s390_migration_state *mgs;
 790         struct kvm_memory_slot *ms;
 791         /* should be the only one */
 792         struct kvm_memslots *slots;
 793         unsigned long ram_pages;
 794         int slotnr;
 795
 796         /* migration mode already enabled */
 797         if (kvm->arch.migration_state)
 798                 return 0;
 799
 800         slots = kvm_memslots(kvm);
 801         if (!slots || !slots->used_slots)
 802                 return -EINVAL;
 803
 804         mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
 805         if (!mgs)
 806                 return -ENOMEM;
 807         kvm->arch.migration_state = mgs;
 808
 809         if (kvm->arch.use_cmma) {
 810                 /*
 811                  * Get the first slot. They are reverse sorted by base_gfn, so
 812                  * the first slot is also the one at the end of the address
 813                  * space. We have verified above that at least one slot is
 814                  * present.
 815                  */
 816                 ms = slots->memslots;
 817                 /* round up so we only use full longs */
 818                 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
 819                 /* allocate enough bytes to store all the bits */
 820                 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
 821                 if (!mgs->pgste_bitmap) {
 822                         kfree(mgs);
 823                         kvm->arch.migration_state = NULL;
 824                         return -ENOMEM;
 825                 }
 826
 827                 mgs->bitmap_size = ram_pages;
 828                 atomic64_set(&mgs->dirty_pages, ram_pages);
 829                 /* mark all the pages in active slots as dirty */
 830                 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
 831                         ms = slots->memslots + slotnr;
 832                         bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
 833                 }
 834
 835                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
 836         }
 837         return 0;
 838 }
 839
 840 /*
 841  * Must be called with kvm->slots_lock to avoid races with ourselves and
 842  * kvm_s390_vm_start_migration.
 843  */
 844 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
 845 {
 846         struct kvm_s390_migration_state *mgs;
 847
 848         /* migration mode already disabled */
 849         if (!kvm->arch.migration_state)
 850                 return 0;
 851         mgs = kvm->arch.migration_state;
 852         kvm->arch.migration_state = NULL;
 853
 854         if (kvm->arch.use_cmma) {
 855                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
 856                 /* We have to wait for the essa emulation to finish */
 857                 synchronize_srcu(&kvm->srcu);
 858                 vfree(mgs->pgste_bitmap);
 859         }
 860         kfree(mgs);
 861         return 0;
 862 }
 863
 864 static int kvm_s390_vm_set_migration(struct kvm *kvm,
 865                                      struct kvm_device_attr *attr)
 866 {
 867         int res = -ENXIO;
 868
 869         mutex_lock(&kvm->slots_lock);
 870         switch (attr->attr) {
 871         case KVM_S390_VM_MIGRATION_START:
 872                 res = kvm_s390_vm_start_migration(kvm);
 873                 break;
 874         case KVM_S390_VM_MIGRATION_STOP:
 875                 res = kvm_s390_vm_stop_migration(kvm);
 876                 break;
 877         default:
 878                 break;
 879         }
 880         mutex_unlock(&kvm->slots_lock);
 881
 882         return res;
 883 }
 884
 885 static int kvm_s390_vm_get_migration(struct kvm *kvm,
 886                                      struct kvm_device_attr *attr)
 887 {
 888         u64 mig = (kvm->arch.migration_state != NULL);
 889
 890         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
 891                 return -ENXIO;
 892
 893         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
 894                 return -EFAULT;
 895         return 0;
 896 }
 897
 898 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
 899 {
 900         struct kvm_s390_vm_tod_clock gtod;
 901
 902         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 903                 return -EFAULT;
 904
 905         if (test_kvm_facility(kvm, 139))
 906                 kvm_s390_set_tod_clock_ext(kvm, &gtod);
 907         else if (gtod.epoch_idx == 0)
 908                 kvm_s390_set_tod_clock(kvm, gtod.tod);
 909         else
 910                 return -EINVAL;
 911
 912         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
 913                 gtod.epoch_idx, gtod.tod);
 914
 915         return 0;
 916 }
 917
 918 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 919 {
 920         u8 gtod_high;
 921
 922         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 923                                            sizeof(gtod_high)))
 924                 return -EFAULT;
 925
 926         if (gtod_high != 0)
 927                 return -EINVAL;
 928         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
 929
 930         return 0;
 931 }
 932
 933 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 934 {
 935         u64 gtod;
 936
 937         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 938                 return -EFAULT;
 939
 940         kvm_s390_set_tod_clock(kvm, gtod);
 941         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
 942         return 0;
 943 }
 944
 945 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 946 {
 947         int ret;
 948
 949         if (attr->flags)
 950                 return -EINVAL;
 951
 952         switch (attr->attr) {
 953         case KVM_S390_VM_TOD_EXT:
 954                 ret = kvm_s390_set_tod_ext(kvm, attr);
 955                 break;
 956         case KVM_S390_VM_TOD_HIGH:
 957                 ret = kvm_s390_set_tod_high(kvm, attr);
 958                 break;
 959         case KVM_S390_VM_TOD_LOW:
 960                 ret = kvm_s390_set_tod_low(kvm, attr);
 961                 break;
 962         default:
 963                 ret = -ENXIO;
 964                 break;
 965         }
 966         return ret;
 967 }
 968
 969 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
 970                                         struct kvm_s390_vm_tod_clock *gtod)
 971 {
 972         struct kvm_s390_tod_clock_ext htod;
 973
 974         preempt_disable();
 975
 976         get_tod_clock_ext((char *)&htod);
 977
 978         gtod->tod = htod.tod + kvm->arch.epoch;
 979         gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
 980
 981         if (gtod->tod < htod.tod)
 982                 gtod->epoch_idx += 1;
 983
 984         preempt_enable();
 985 }
 986
 987 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
 988 {
 989         struct kvm_s390_vm_tod_clock gtod;
 990
 991         memset(&gtod, 0, sizeof(gtod));
 992
 993         if (test_kvm_facility(kvm, 139))
 994                 kvm_s390_get_tod_clock_ext(kvm, &gtod);
 995         else
 996                 gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
 997
 998         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 999                 return -EFAULT;
1000
1001         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1002                 gtod.epoch_idx, gtod.tod);
1003         return 0;
1004 }
1005
1006 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1007 {
1008         u8 gtod_high = 0;
1009
1010         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1011                                          sizeof(gtod_high)))
1012                 return -EFAULT;
1013         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1014
1015         return 0;
1016 }
1017
1018 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1019 {
1020         u64 gtod;
1021
1022         gtod = kvm_s390_get_tod_clock_fast(kvm);
1023         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1024                 return -EFAULT;
1025         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1026
1027         return 0;
1028 }
1029
1030 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1031 {
1032         int ret;
1033
1034         if (attr->flags)
1035                 return -EINVAL;
1036
1037         switch (attr->attr) {
1038         case KVM_S390_VM_TOD_EXT:
1039                 ret = kvm_s390_get_tod_ext(kvm, attr);
1040                 break;
1041         case KVM_S390_VM_TOD_HIGH:
1042                 ret = kvm_s390_get_tod_high(kvm, attr);
1043                 break;
1044         case KVM_S390_VM_TOD_LOW:
1045                 ret = kvm_s390_get_tod_low(kvm, attr);
1046                 break;
1047         default:
1048                 ret = -ENXIO;
1049                 break;
1050         }
1051         return ret;
1052 }
1053
1054 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1055 {
1056         struct kvm_s390_vm_cpu_processor *proc;
1057         u16 lowest_ibc, unblocked_ibc;
1058         int ret = 0;
1059
1060         mutex_lock(&kvm->lock);
1061         if (kvm->created_vcpus) {
1062                 ret = -EBUSY;
1063                 goto out;
1064         }
1065         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1066         if (!proc) {
1067                 ret = -ENOMEM;
1068                 goto out;
1069         }
1070         if (!copy_from_user(proc, (void __user *)attr->addr,
1071                             sizeof(*proc))) {
1072                 kvm->arch.model.cpuid = proc->cpuid;
1073                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1074                 unblocked_ibc = sclp.ibc & 0xfff;
1075                 if (lowest_ibc && proc->ibc) {
1076                         if (proc->ibc > unblocked_ibc)
1077                                 kvm->arch.model.ibc = unblocked_ibc;
1078                         else if (proc->ibc < lowest_ibc)
1079                                 kvm->arch.model.ibc = lowest_ibc;
1080                         else
1081                                 kvm->arch.model.ibc = proc->ibc;
1082                 }
1083                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1084                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1085                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1086                          kvm->arch.model.ibc,
1087                          kvm->arch.model.cpuid);
1088                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1089                          kvm->arch.model.fac_list[0],
1090                          kvm->arch.model.fac_list[1],
1091                          kvm->arch.model.fac_list[2]);
1092         } else
1093                 ret = -EFAULT;
1094         kfree(proc);
1095 out:
1096         mutex_unlock(&kvm->lock);
1097         return ret;
1098 }
1099
1100 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1101                                        struct kvm_device_attr *attr)
1102 {
1103         struct kvm_s390_vm_cpu_feat data;
1104
1105         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1106                 return -EFAULT;
1107         if (!bitmap_subset((unsigned long *) data.feat,
1108                            kvm_s390_available_cpu_feat,
1109                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1110                 return -EINVAL;
1111
1112         mutex_lock(&kvm->lock);
1113         if (kvm->created_vcpus) {
1114                 mutex_unlock(&kvm->lock);
1115                 return -EBUSY;
1116         }
1117         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1118                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1119         mutex_unlock(&kvm->lock);
1120         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1121                          data.feat[0],
1122                          data.feat[1],
1123                          data.feat[2]);
1124         return 0;
1125 }
1126
1127 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1128                                           struct kvm_device_attr *attr)
1129 {
1130         /*
1131          * Once supported by kernel + hw, we have to store the subfunctions
1132          * in kvm->arch and remember that user space configured them.
1133          */
1134         return -ENXIO;
1135 }
1136
1137 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1138 {
1139         int ret = -ENXIO;
1140
1141         switch (attr->attr) {
1142         case KVM_S390_VM_CPU_PROCESSOR:
1143                 ret = kvm_s390_set_processor(kvm, attr);
1144                 break;
1145         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1146                 ret = kvm_s390_set_processor_feat(kvm, attr);
1147                 break;
1148         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1149                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1150                 break;
1151         }
1152         return ret;
1153 }
1154
1155 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1156 {
1157         struct kvm_s390_vm_cpu_processor *proc;
1158         int ret = 0;
1159
1160         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1161         if (!proc) {
1162                 ret = -ENOMEM;
1163                 goto out;
1164         }
1165         proc->cpuid = kvm->arch.model.cpuid;
1166         proc->ibc = kvm->arch.model.ibc;
1167         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1168                S390_ARCH_FAC_LIST_SIZE_BYTE);
1169         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1170                  kvm->arch.model.ibc,
1171                  kvm->arch.model.cpuid);
1172         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1173                  kvm->arch.model.fac_list[0],
1174                  kvm->arch.model.fac_list[1],
1175                  kvm->arch.model.fac_list[2]);
1176         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1177                 ret = -EFAULT;
1178         kfree(proc);
1179 out:
1180         return ret;
1181 }
1182
1183 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1184 {
1185         struct kvm_s390_vm_cpu_machine *mach;
1186         int ret = 0;
1187
1188         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1189         if (!mach) {
1190                 ret = -ENOMEM;
1191                 goto out;
1192         }
1193         get_cpu_id((struct cpuid *) &mach->cpuid);
1194         mach->ibc = sclp.ibc;
1195         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1196                S390_ARCH_FAC_LIST_SIZE_BYTE);
1197         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1198                sizeof(S390_lowcore.stfle_fac_list));
1199         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1200                  kvm->arch.model.ibc,
1201                  kvm->arch.model.cpuid);
1202         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1203                  mach->fac_mask[0],
1204                  mach->fac_mask[1],
1205                  mach->fac_mask[2]);
1206         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1207                  mach->fac_list[0],
1208                  mach->fac_list[1],
1209                  mach->fac_list[2]);
1210         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1211                 ret = -EFAULT;
1212         kfree(mach);
1213 out:
1214         return ret;
1215 }
1216
1217 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1218                                        struct kvm_device_attr *attr)
1219 {
1220         struct kvm_s390_vm_cpu_feat data;
1221
1222         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1223                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1224         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1225                 return -EFAULT;
1226         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1227                          data.feat[0],
1228                          data.feat[1],
1229                          data.feat[2]);
1230         return 0;
1231 }
1232
1233 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1234                                      struct kvm_device_attr *attr)
1235 {
1236         struct kvm_s390_vm_cpu_feat data;
1237
1238         bitmap_copy((unsigned long *) data.feat,
1239                     kvm_s390_available_cpu_feat,
1240                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1241         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1242                 return -EFAULT;
1243         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1244                          data.feat[0],
1245                          data.feat[1],
1246                          data.feat[2]);
1247         return 0;
1248 }
1249
1250 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1251                                           struct kvm_device_attr *attr)
1252 {
1253         /*
1254          * Once we can actually configure subfunctions (kernel + hw support),
1255          * we have to check if they were already set by user space, if so copy
1256          * them from kvm->arch.
1257          */
1258         return -ENXIO;
1259 }
1260
1261 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1262                                         struct kvm_device_attr *attr)
1263 {
1264         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1265             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1266                 return -EFAULT;
1267         return 0;
1268 }
1269 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1270 {
1271         int ret = -ENXIO;
1272
1273         switch (attr->attr) {
1274         case KVM_S390_VM_CPU_PROCESSOR:
1275                 ret = kvm_s390_get_processor(kvm, attr);
1276                 break;
1277         case KVM_S390_VM_CPU_MACHINE:
1278                 ret = kvm_s390_get_machine(kvm, attr);
1279                 break;
1280         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1281                 ret = kvm_s390_get_processor_feat(kvm, attr);
1282                 break;
1283         case KVM_S390_VM_CPU_MACHINE_FEAT:
1284                 ret = kvm_s390_get_machine_feat(kvm, attr);
1285                 break;
1286         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1287                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1288                 break;
1289         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1290                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1291                 break;
1292         }
1293         return ret;
1294 }
1295
1296 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1297 {
1298         int ret;
1299
1300         switch (attr->group) {
1301         case KVM_S390_VM_MEM_CTRL:
1302                 ret = kvm_s390_set_mem_control(kvm, attr);
1303                 break;
1304         case KVM_S390_VM_TOD:
1305                 ret = kvm_s390_set_tod(kvm, attr);
1306                 break;
1307         case KVM_S390_VM_CPU_MODEL:
1308                 ret = kvm_s390_set_cpu_model(kvm, attr);
1309                 break;
1310         case KVM_S390_VM_CRYPTO:
1311                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1312                 break;
1313         case KVM_S390_VM_MIGRATION:
1314                 ret = kvm_s390_vm_set_migration(kvm, attr);
1315                 break;
1316         default:
1317                 ret = -ENXIO;
1318                 break;
1319         }
1320
1321         return ret;
1322 }
1323
1324 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1325 {
1326         int ret;
1327
1328         switch (attr->group) {
1329         case KVM_S390_VM_MEM_CTRL:
1330                 ret = kvm_s390_get_mem_control(kvm, attr);
1331                 break;
1332         case KVM_S390_VM_TOD:
1333                 ret = kvm_s390_get_tod(kvm, attr);
1334                 break;
1335         case KVM_S390_VM_CPU_MODEL:
1336                 ret = kvm_s390_get_cpu_model(kvm, attr);
1337                 break;
1338         case KVM_S390_VM_MIGRATION:
1339                 ret = kvm_s390_vm_get_migration(kvm, attr);
1340                 break;
1341         default:
1342                 ret = -ENXIO;
1343                 break;
1344         }
1345
1346         return ret;
1347 }
1348
1349 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1350 {
1351         int ret;
1352
1353         switch (attr->group) {
1354         case KVM_S390_VM_MEM_CTRL:
1355                 switch (attr->attr) {
1356                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1357                 case KVM_S390_VM_MEM_CLR_CMMA:
1358                         ret = sclp.has_cmma ? 0 : -ENXIO;
1359                         break;
1360                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1361                         ret = 0;
1362                         break;
1363                 default:
1364                         ret = -ENXIO;
1365                         break;
1366                 }
1367                 break;
1368         case KVM_S390_VM_TOD:
1369                 switch (attr->attr) {
1370                 case KVM_S390_VM_TOD_LOW:
1371                 case KVM_S390_VM_TOD_HIGH:
1372                         ret = 0;
1373                         break;
1374                 default:
1375                         ret = -ENXIO;
1376                         break;
1377                 }
1378                 break;
1379         case KVM_S390_VM_CPU_MODEL:
1380                 switch (attr->attr) {
1381                 case KVM_S390_VM_CPU_PROCESSOR:
1382                 case KVM_S390_VM_CPU_MACHINE:
1383                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1384                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1385                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1386                         ret = 0;
1387                         break;
1388                 /* configuring subfunctions is not supported yet */
1389                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1390                 default:
1391                         ret = -ENXIO;
1392                         break;
1393                 }
1394                 break;
1395         case KVM_S390_VM_CRYPTO:
1396                 switch (attr->attr) {
1397                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1398                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1399                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1400                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1401                         ret = 0;
1402                         break;
1403                 default:
1404                         ret = -ENXIO;
1405                         break;
1406                 }
1407                 break;
1408         case KVM_S390_VM_MIGRATION:
1409                 ret = 0;
1410                 break;
1411         default:
1412                 ret = -ENXIO;
1413                 break;
1414         }
1415
1416         return ret;
1417 }
1418
1419 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1420 {
1421         uint8_t *keys;
1422         uint64_t hva;
1423         int srcu_idx, i, r = 0;
1424
1425         if (args->flags != 0)
1426                 return -EINVAL;
1427
1428         /* Is this guest using storage keys? */
1429         if (!mm_use_skey(current->mm))
1430                 return KVM_S390_GET_SKEYS_NONE;
1431
1432         /* Enforce sane limit on memory allocation */
1433         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1434                 return -EINVAL;
1435
1436         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1437         if (!keys)
1438                 return -ENOMEM;
1439
1440         down_read(&current->mm->mmap_sem);
1441         srcu_idx = srcu_read_lock(&kvm->srcu);
1442         for (i = 0; i < args->count; i++) {
1443                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1444                 if (kvm_is_error_hva(hva)) {
1445                         r = -EFAULT;
1446                         break;
1447                 }
1448
1449                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1450                 if (r)
1451                         break;
1452         }
1453         srcu_read_unlock(&kvm->srcu, srcu_idx);
1454         up_read(&current->mm->mmap_sem);
1455
1456         if (!r) {
1457                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1458                                  sizeof(uint8_t) * args->count);
1459                 if (r)
1460                         r = -EFAULT;
1461         }
1462
1463         kvfree(keys);
1464         return r;
1465 }
1466
1467 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1468 {
1469         uint8_t *keys;
1470         uint64_t hva;
1471         int srcu_idx, i, r = 0;
1472
1473         if (args->flags != 0)
1474                 return -EINVAL;
1475
1476         /* Enforce sane limit on memory allocation */
1477         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1478                 return -EINVAL;
1479
1480         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1481         if (!keys)
1482                 return -ENOMEM;
1483
1484         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1485                            sizeof(uint8_t) * args->count);
1486         if (r) {
1487                 r = -EFAULT;
1488                 goto out;
1489         }
1490
1491         /* Enable storage key handling for the guest */
1492         r = s390_enable_skey();
1493         if (r)
1494                 goto out;
1495
1496         down_read(&current->mm->mmap_sem);
1497         srcu_idx = srcu_read_lock(&kvm->srcu);
1498         for (i = 0; i < args->count; i++) {
1499                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1500                 if (kvm_is_error_hva(hva)) {
1501                         r = -EFAULT;
1502                         break;
1503                 }
1504
1505                 /* Lowest order bit is reserved */
1506                 if (keys[i] & 0x01) {
1507                         r = -EINVAL;
1508                         break;
1509                 }
1510
1511                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1512                 if (r)
1513                         break;
1514         }
1515         srcu_read_unlock(&kvm->srcu, srcu_idx);
1516         up_read(&current->mm->mmap_sem);
1517 out:
1518         kvfree(keys);
1519         return r;
1520 }
1521
1522 /*
1523  * Base address and length must be sent at the start of each block, therefore
1524  * it's cheaper to send some clean data, as long as it's less than the size of
1525  * two longs.
1526  */
1527 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1528 /* for consistency */
1529 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1530
1531 /*
1532  * This function searches for the next page with dirty CMMA attributes, and
1533  * saves the attributes in the buffer up to either the end of the buffer or
1534  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1535  * no trailing clean bytes are saved.
1536  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1537  * output buffer will indicate 0 as length.
1538  */
1539 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1540                                   struct kvm_s390_cmma_log *args)
1541 {
1542         struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1543         unsigned long bufsize, hva, pgstev, i, next, cur;
1544         int srcu_idx, peek, r = 0, rr;
1545         u8 *res;
1546
1547         cur = args->start_gfn;
1548         i = next = pgstev = 0;
1549
1550         if (unlikely(!kvm->arch.use_cmma))
1551                 return -ENXIO;
1552         /* Invalid/unsupported flags were specified */
1553         if (args->flags & ~KVM_S390_CMMA_PEEK)
1554                 return -EINVAL;
1555         /* Migration mode query, and we are not doing a migration */
1556         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1557         if (!peek && !s)
1558                 return -EINVAL;
1559         /* CMMA is disabled or was not used, or the buffer has length zero */
1560         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1561         if (!bufsize || !kvm->mm->context.use_cmma) {
1562                 memset(args, 0, sizeof(*args));
1563                 return 0;
1564         }
1565
1566         if (!peek) {
1567                 /* We are not peeking, and there are no dirty pages */
1568                 if (!atomic64_read(&s->dirty_pages)) {
1569                         memset(args, 0, sizeof(*args));
1570                         return 0;
1571                 }
1572                 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1573                                     args->start_gfn);
1574                 if (cur >= s->bitmap_size)      /* nothing found, loop back */
1575                         cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1576                 if (cur >= s->bitmap_size) {    /* again! (very unlikely) */
1577                         memset(args, 0, sizeof(*args));
1578                         return 0;
1579                 }
1580                 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1581         }
1582
1583         res = vmalloc(bufsize);
1584         if (!res)
1585                 return -ENOMEM;
1586
1587         args->start_gfn = cur;
1588
1589         down_read(&kvm->mm->mmap_sem);
1590         srcu_idx = srcu_read_lock(&kvm->srcu);
1591         while (i < bufsize) {
1592                 hva = gfn_to_hva(kvm, cur);
1593                 if (kvm_is_error_hva(hva)) {
1594                         r = -EFAULT;
1595                         break;
1596                 }
1597                 /* decrement only if we actually flipped the bit to 0 */
1598                 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1599                         atomic64_dec(&s->dirty_pages);
1600                 r = get_pgste(kvm->mm, hva, &pgstev);
1601                 if (r < 0)
1602                         pgstev = 0;
1603                 /* save the value */
1604                 res[i++] = (pgstev >> 24) & 0x43;
1605                 /*
1606                  * if the next bit is too far away, stop.
1607                  * if we reached the previous "next", find the next one
1608                  */
1609                 if (!peek) {
1610                         if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1611                                 break;
1612                         if (cur == next)
1613                                 next = find_next_bit(s->pgste_bitmap,
1614                                                      s->bitmap_size, cur + 1);
1615                 /* reached the end of the bitmap or of the buffer, stop */
1616                         if ((next >= s->bitmap_size) ||
1617                             (next >= args->start_gfn + bufsize))
1618                                 break;
1619                 }
1620                 cur++;
1621         }
1622         srcu_read_unlock(&kvm->srcu, srcu_idx);
1623         up_read(&kvm->mm->mmap_sem);
1624         args->count = i;
1625         args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1626
1627         rr = copy_to_user((void __user *)args->values, res, args->count);
1628         if (rr)
1629                 r = -EFAULT;
1630
1631         vfree(res);
1632         return r;
1633 }
1634
1635 /*
1636  * This function sets the CMMA attributes for the given pages. If the input
1637  * buffer has zero length, no action is taken, otherwise the attributes are
1638  * set and the mm->context.use_cmma flag is set.
1639  */
1640 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1641                                   const struct kvm_s390_cmma_log *args)
1642 {
1643         unsigned long hva, mask, pgstev, i;
1644         uint8_t *bits;
1645         int srcu_idx, r = 0;
1646
1647         mask = args->mask;
1648
1649         if (!kvm->arch.use_cmma)
1650                 return -ENXIO;
1651         /* invalid/unsupported flags */
1652         if (args->flags != 0)
1653                 return -EINVAL;
1654         /* Enforce sane limit on memory allocation */
1655         if (args->count > KVM_S390_CMMA_SIZE_MAX)
1656                 return -EINVAL;
1657         /* Nothing to do */
1658         if (args->count == 0)
1659                 return 0;
1660
1661         bits = vmalloc(sizeof(*bits) * args->count);
1662         if (!bits)
1663                 return -ENOMEM;
1664
1665         r = copy_from_user(bits, (void __user *)args->values, args->count);
1666         if (r) {
1667                 r = -EFAULT;
1668                 goto out;
1669         }
1670
1671         down_read(&kvm->mm->mmap_sem);
1672         srcu_idx = srcu_read_lock(&kvm->srcu);
1673         for (i = 0; i < args->count; i++) {
1674                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1675                 if (kvm_is_error_hva(hva)) {
1676                         r = -EFAULT;
1677                         break;
1678                 }
1679
1680                 pgstev = bits[i];
1681                 pgstev = pgstev << 24;
1682                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1683                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1684         }
1685         srcu_read_unlock(&kvm->srcu, srcu_idx);
1686         up_read(&kvm->mm->mmap_sem);
1687
1688         if (!kvm->mm->context.use_cmma) {
1689                 down_write(&kvm->mm->mmap_sem);
1690                 kvm->mm->context.use_cmma = 1;
1691                 up_write(&kvm->mm->mmap_sem);
1692         }
1693 out:
1694         vfree(bits);
1695         return r;
1696 }
1697
1698 long kvm_arch_vm_ioctl(struct file *filp,
1699                        unsigned int ioctl, unsigned long arg)
1700 {
1701         struct kvm *kvm = filp->private_data;
1702         void __user *argp = (void __user *)arg;
1703         struct kvm_device_attr attr;
1704         int r;
1705
1706         switch (ioctl) {
1707         case KVM_S390_INTERRUPT: {
1708                 struct kvm_s390_interrupt s390int;
1709
1710                 r = -EFAULT;
1711                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1712                         break;
1713                 r = kvm_s390_inject_vm(kvm, &s390int);
1714                 break;
1715         }
1716         case KVM_ENABLE_CAP: {
1717                 struct kvm_enable_cap cap;
1718                 r = -EFAULT;
1719                 if (copy_from_user(&cap, argp, sizeof(cap)))
1720                         break;
1721                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1722                 break;
1723         }
1724         case KVM_CREATE_IRQCHIP: {
1725                 struct kvm_irq_routing_entry routing;
1726
1727                 r = -EINVAL;
1728                 if (kvm->arch.use_irqchip) {
1729                         /* Set up dummy routing. */
1730                         memset(&routing, 0, sizeof(routing));
1731                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1732                 }
1733                 break;
1734         }
1735         case KVM_SET_DEVICE_ATTR: {
1736                 r = -EFAULT;
1737                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1738                         break;
1739                 r = kvm_s390_vm_set_attr(kvm, &attr);
1740                 break;
1741         }
1742         case KVM_GET_DEVICE_ATTR: {
1743                 r = -EFAULT;
1744                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1745                         break;
1746                 r = kvm_s390_vm_get_attr(kvm, &attr);
1747                 break;
1748         }
1749         case KVM_HAS_DEVICE_ATTR: {
1750                 r = -EFAULT;
1751                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1752                         break;
1753                 r = kvm_s390_vm_has_attr(kvm, &attr);
1754                 break;
1755         }
1756         case KVM_S390_GET_SKEYS: {
1757                 struct kvm_s390_skeys args;
1758
1759                 r = -EFAULT;
1760                 if (copy_from_user(&args, argp,
1761                                    sizeof(struct kvm_s390_skeys)))
1762                         break;
1763                 r = kvm_s390_get_skeys(kvm, &args);
1764                 break;
1765         }
1766         case KVM_S390_SET_SKEYS: {
1767                 struct kvm_s390_skeys args;
1768
1769                 r = -EFAULT;
1770                 if (copy_from_user(&args, argp,
1771                                    sizeof(struct kvm_s390_skeys)))
1772                         break;
1773                 r = kvm_s390_set_skeys(kvm, &args);
1774                 break;
1775         }
1776         case KVM_S390_GET_CMMA_BITS: {
1777                 struct kvm_s390_cmma_log args;
1778
1779                 r = -EFAULT;
1780                 if (copy_from_user(&args, argp, sizeof(args)))
1781                         break;
1782                 mutex_lock(&kvm->slots_lock);
1783                 r = kvm_s390_get_cmma_bits(kvm, &args);
1784                 mutex_unlock(&kvm->slots_lock);
1785                 if (!r) {
1786                         r = copy_to_user(argp, &args, sizeof(args));
1787                         if (r)
1788                                 r = -EFAULT;
1789                 }
1790                 break;
1791         }
1792         case KVM_S390_SET_CMMA_BITS: {
1793                 struct kvm_s390_cmma_log args;
1794
1795                 r = -EFAULT;
1796                 if (copy_from_user(&args, argp, sizeof(args)))
1797                         break;
1798                 mutex_lock(&kvm->slots_lock);
1799                 r = kvm_s390_set_cmma_bits(kvm, &args);
1800                 mutex_unlock(&kvm->slots_lock);
1801                 break;
1802         }
1803         default:
1804                 r = -ENOTTY;
1805         }
1806
1807         return r;
1808 }
1809
1810 static int kvm_s390_query_ap_config(u8 *config)
1811 {
1812         u32 fcn_code = 0x04000000UL;
1813         u32 cc = 0;
1814
1815         memset(config, 0, 128);
1816         asm volatile(
1817                 "lgr 0,%1\n"
1818                 "lgr 2,%2\n"
1819                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1820                 "0: ipm %0\n"
1821                 "srl %0,28\n"
1822                 "1:\n"
1823                 EX_TABLE(0b, 1b)
1824                 : "+r" (cc)
1825                 : "r" (fcn_code), "r" (config)
1826                 : "cc", "0", "2", "memory"
1827         );
1828
1829         return cc;
1830 }
1831
1832 static int kvm_s390_apxa_installed(void)
1833 {
1834         u8 config[128];
1835         int cc;
1836
1837         if (test_facility(12)) {
1838                 cc = kvm_s390_query_ap_config(config);
1839
1840                 if (cc)
1841                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1842                 else
1843                         return config[0] & 0x40;
1844         }
1845
1846         return 0;
1847 }
1848
1849 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1850 {
1851         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1852
1853         if (kvm_s390_apxa_installed())
1854                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1855         else
1856                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1857 }
1858
1859 static u64 kvm_s390_get_initial_cpuid(void)
1860 {
1861         struct cpuid cpuid;
1862
1863         get_cpu_id(&cpuid);
1864         cpuid.version = 0xff;
1865         return *((u64 *) &cpuid);
1866 }
1867
1868 static void kvm_s390_crypto_init(struct kvm *kvm)
1869 {
1870         if (!test_kvm_facility(kvm, 76))
1871                 return;
1872
1873         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1874         kvm_s390_set_crycb_format(kvm);
1875
1876         /* Enable AES/DEA protected key functions by default */
1877         kvm->arch.crypto.aes_kw = 1;
1878         kvm->arch.crypto.dea_kw = 1;
1879         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1880                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1881         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1882                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1883 }
1884
1885 static void sca_dispose(struct kvm *kvm)
1886 {
1887         if (kvm->arch.use_esca)
1888                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1889         else
1890                 free_page((unsigned long)(kvm->arch.sca));
1891         kvm->arch.sca = NULL;
1892 }
1893
1894 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1895 {
1896         gfp_t alloc_flags = GFP_KERNEL;
1897         int i, rc;
1898         char debug_name[16];
1899         static unsigned long sca_offset;
1900
1901         rc = -EINVAL;
1902 #ifdef CONFIG_KVM_S390_UCONTROL
1903         if (type & ~KVM_VM_S390_UCONTROL)
1904                 goto out_err;
1905         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1906                 goto out_err;
1907 #else
1908         if (type)
1909                 goto out_err;
1910 #endif
1911
1912         rc = s390_enable_sie();
1913         if (rc)
1914                 goto out_err;
1915
1916         rc = -ENOMEM;
1917
1918         kvm->arch.use_esca = 0; /* start with basic SCA */
1919         if (!sclp.has_64bscao)
1920                 alloc_flags |= GFP_DMA;
1921         rwlock_init(&kvm->arch.sca_lock);
1922         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1923         if (!kvm->arch.sca)
1924                 goto out_err;
1925         spin_lock(&kvm_lock);
1926         sca_offset += 16;
1927         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1928                 sca_offset = 0;
1929         kvm->arch.sca = (struct bsca_block *)
1930                         ((char *) kvm->arch.sca + sca_offset);
1931         spin_unlock(&kvm_lock);
1932
1933         sprintf(debug_name, "kvm-%u", current->pid);
1934
1935         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1936         if (!kvm->arch.dbf)
1937                 goto out_err;
1938
1939         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
1940         kvm->arch.sie_page2 =
1941              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1942         if (!kvm->arch.sie_page2)
1943                 goto out_err;
1944
1945         /* Populate the facility mask initially. */
1946         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1947                sizeof(S390_lowcore.stfle_fac_list));
1948         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1949                 if (i < kvm_s390_fac_list_mask_size())
1950                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1951                 else
1952                         kvm->arch.model.fac_mask[i] = 0UL;
1953         }
1954
1955         /* Populate the facility list initially. */
1956         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1957         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1958                S390_ARCH_FAC_LIST_SIZE_BYTE);
1959
1960         /* we are always in czam mode - even on pre z14 machines */
1961         set_kvm_facility(kvm->arch.model.fac_mask, 138);
1962         set_kvm_facility(kvm->arch.model.fac_list, 138);
1963         /* we emulate STHYI in kvm */
1964         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1965         set_kvm_facility(kvm->arch.model.fac_list, 74);
1966         if (MACHINE_HAS_TLB_GUEST) {
1967                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
1968                 set_kvm_facility(kvm->arch.model.fac_list, 147);
1969         }
1970
1971         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1972         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1973
1974         kvm_s390_crypto_init(kvm);
1975
1976         mutex_init(&kvm->arch.float_int.ais_lock);
1977         kvm->arch.float_int.simm = 0;
1978         kvm->arch.float_int.nimm = 0;
1979         spin_lock_init(&kvm->arch.float_int.lock);
1980         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1981                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1982         init_waitqueue_head(&kvm->arch.ipte_wq);
1983         mutex_init(&kvm->arch.ipte_mutex);
1984
1985         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1986         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1987
1988         if (type & KVM_VM_S390_UCONTROL) {
1989                 kvm->arch.gmap = NULL;
1990                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1991         } else {
1992                 if (sclp.hamax == U64_MAX)
1993                         kvm->arch.mem_limit = TASK_SIZE_MAX;
1994                 else
1995                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1996                                                     sclp.hamax + 1);
1997                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1998                 if (!kvm->arch.gmap)
1999                         goto out_err;
2000                 kvm->arch.gmap->private = kvm;
2001                 kvm->arch.gmap->pfault_enabled = 0;
2002         }
2003
2004         kvm->arch.css_support = 0;
2005         kvm->arch.use_irqchip = 0;
2006         kvm->arch.epoch = 0;
2007
2008         spin_lock_init(&kvm->arch.start_stop_lock);
2009         kvm_s390_vsie_init(kvm);
2010         kvm_s390_gisa_init(kvm);
2011         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2012
2013         return 0;
2014 out_err:
2015         free_page((unsigned long)kvm->arch.sie_page2);
2016         debug_unregister(kvm->arch.dbf);
2017         sca_dispose(kvm);
2018         KVM_EVENT(3, "creation of vm failed: %d", rc);
2019         return rc;
2020 }
2021
2022 bool kvm_arch_has_vcpu_debugfs(void)
2023 {
2024         return false;
2025 }
2026
2027 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2028 {
2029         return 0;
2030 }
2031
2032 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2033 {
2034         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2035         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2036         kvm_s390_clear_local_irqs(vcpu);
2037         kvm_clear_async_pf_completion_queue(vcpu);
2038         if (!kvm_is_ucontrol(vcpu->kvm))
2039                 sca_del_vcpu(vcpu);
2040
2041         if (kvm_is_ucontrol(vcpu->kvm))
2042                 gmap_remove(vcpu->arch.gmap);
2043
2044         if (vcpu->kvm->arch.use_cmma)
2045                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2046         free_page((unsigned long)(vcpu->arch.sie_block));
2047
2048         kvm_vcpu_uninit(vcpu);
2049         kmem_cache_free(kvm_vcpu_cache, vcpu);
2050 }
2051
2052 static void kvm_free_vcpus(struct kvm *kvm)
2053 {
2054         unsigned int i;
2055         struct kvm_vcpu *vcpu;
2056
2057         kvm_for_each_vcpu(i, vcpu, kvm)
2058                 kvm_arch_vcpu_destroy(vcpu);
2059
2060         mutex_lock(&kvm->lock);
2061         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2062                 kvm->vcpus[i] = NULL;
2063
2064         atomic_set(&kvm->online_vcpus, 0);
2065         mutex_unlock(&kvm->lock);
2066 }
2067
2068 void kvm_arch_destroy_vm(struct kvm *kvm)
2069 {
2070         kvm_free_vcpus(kvm);
2071         sca_dispose(kvm);
2072         debug_unregister(kvm->arch.dbf);
2073         kvm_s390_gisa_destroy(kvm);
2074         free_page((unsigned long)kvm->arch.sie_page2);
2075         if (!kvm_is_ucontrol(kvm))
2076                 gmap_remove(kvm->arch.gmap);
2077         kvm_s390_destroy_adapters(kvm);
2078         kvm_s390_clear_float_irqs(kvm);
2079         kvm_s390_vsie_destroy(kvm);
2080         if (kvm->arch.migration_state) {
2081                 vfree(kvm->arch.migration_state->pgste_bitmap);
2082                 kfree(kvm->arch.migration_state);
2083         }
2084         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2085 }
2086
2087 /* Section: vcpu related */
2088 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2089 {
2090         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2091         if (!vcpu->arch.gmap)
2092                 return -ENOMEM;
2093         vcpu->arch.gmap->private = vcpu->kvm;
2094
2095         return 0;
2096 }
2097
2098 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2099 {
2100         if (!kvm_s390_use_sca_entries())
2101                 return;
2102         read_lock(&vcpu->kvm->arch.sca_lock);
2103         if (vcpu->kvm->arch.use_esca) {
2104                 struct esca_block *sca = vcpu->kvm->arch.sca;
2105
2106                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2107                 sca->cpu[vcpu->vcpu_id].sda = 0;
2108         } else {
2109                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2110
2111                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2112                 sca->cpu[vcpu->vcpu_id].sda = 0;
2113         }
2114         read_unlock(&vcpu->kvm->arch.sca_lock);
2115 }
2116
2117 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2118 {
2119         if (!kvm_s390_use_sca_entries()) {
2120                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2121
2122                 /* we still need the basic sca for the ipte control */
2123                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2124                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2125         }
2126         read_lock(&vcpu->kvm->arch.sca_lock);
2127         if (vcpu->kvm->arch.use_esca) {
2128                 struct esca_block *sca = vcpu->kvm->arch.sca;
2129
2130                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2131                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2132                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2133                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2134                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2135         } else {
2136                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2137
2138                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2139                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2140                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2141                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2142         }
2143         read_unlock(&vcpu->kvm->arch.sca_lock);
2144 }
2145
2146 /* Basic SCA to Extended SCA data copy routines */
2147 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2148 {
2149         d->sda = s->sda;
2150         d->sigp_ctrl.c = s->sigp_ctrl.c;
2151         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2152 }
2153
2154 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2155 {
2156         int i;
2157
2158         d->ipte_control = s->ipte_control;
2159         d->mcn[0] = s->mcn;
2160         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2161                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2162 }
2163
2164 static int sca_switch_to_extended(struct kvm *kvm)
2165 {
2166         struct bsca_block *old_sca = kvm->arch.sca;
2167         struct esca_block *new_sca;
2168         struct kvm_vcpu *vcpu;
2169         unsigned int vcpu_idx;
2170         u32 scaol, scaoh;
2171
2172         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2173         if (!new_sca)
2174                 return -ENOMEM;
2175
2176         scaoh = (u32)((u64)(new_sca) >> 32);
2177         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2178
2179         kvm_s390_vcpu_block_all(kvm);
2180         write_lock(&kvm->arch.sca_lock);
2181
2182         sca_copy_b_to_e(new_sca, old_sca);
2183
2184         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2185                 vcpu->arch.sie_block->scaoh = scaoh;
2186                 vcpu->arch.sie_block->scaol = scaol;
2187                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2188         }
2189         kvm->arch.sca = new_sca;
2190         kvm->arch.use_esca = 1;
2191
2192         write_unlock(&kvm->arch.sca_lock);
2193         kvm_s390_vcpu_unblock_all(kvm);
2194
2195         free_page((unsigned long)old_sca);
2196
2197         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2198                  old_sca, kvm->arch.sca);
2199         return 0;
2200 }
2201
2202 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2203 {
2204         int rc;
2205
2206         if (!kvm_s390_use_sca_entries()) {
2207                 if (id < KVM_MAX_VCPUS)
2208                         return true;
2209                 return false;
2210         }
2211         if (id < KVM_S390_BSCA_CPU_SLOTS)
2212                 return true;
2213         if (!sclp.has_esca || !sclp.has_64bscao)
2214                 return false;
2215
2216         mutex_lock(&kvm->lock);
2217         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2218         mutex_unlock(&kvm->lock);
2219
2220         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2221 }
2222
2223 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2224 {
2225         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2226         kvm_clear_async_pf_completion_queue(vcpu);
2227         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2228                                     KVM_SYNC_GPRS |
2229                                     KVM_SYNC_ACRS |
2230                                     KVM_SYNC_CRS |
2231                                     KVM_SYNC_ARCH0 |
2232                                     KVM_SYNC_PFAULT;
2233         kvm_s390_set_prefix(vcpu, 0);
2234         if (test_kvm_facility(vcpu->kvm, 64))
2235                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2236         if (test_kvm_facility(vcpu->kvm, 82))
2237                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2238         if (test_kvm_facility(vcpu->kvm, 133))
2239                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2240         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2241          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2242          */
2243         if (MACHINE_HAS_VX)
2244                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2245         else
2246                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2247
2248         if (kvm_is_ucontrol(vcpu->kvm))
2249                 return __kvm_ucontrol_vcpu_init(vcpu);
2250
2251         return 0;
2252 }
2253
2254 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2255 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2256 {
2257         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2258         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2259         vcpu->arch.cputm_start = get_tod_clock_fast();
2260         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2261 }
2262
2263 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2264 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2265 {
2266         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2267         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2268         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2269         vcpu->arch.cputm_start = 0;
2270         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2271 }
2272
2273 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2274 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2275 {
2276         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2277         vcpu->arch.cputm_enabled = true;
2278         __start_cpu_timer_accounting(vcpu);
2279 }
2280
2281 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2282 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2283 {
2284         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2285         __stop_cpu_timer_accounting(vcpu);
2286         vcpu->arch.cputm_enabled = false;
2287 }
2288
2289 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2290 {
2291         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2292         __enable_cpu_timer_accounting(vcpu);
2293         preempt_enable();
2294 }
2295
2296 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2297 {
2298         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2299         __disable_cpu_timer_accounting(vcpu);
2300         preempt_enable();
2301 }
2302
2303 /* set the cpu timer - may only be called from the VCPU thread itself */
2304 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2305 {
2306         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2307         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2308         if (vcpu->arch.cputm_enabled)
2309                 vcpu->arch.cputm_start = get_tod_clock_fast();
2310         vcpu->arch.sie_block->cputm = cputm;
2311         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2312         preempt_enable();
2313 }
2314
2315 /* update and get the cpu timer - can also be called from other VCPU threads */
2316 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2317 {
2318         unsigned int seq;
2319         __u64 value;
2320
2321         if (unlikely(!vcpu->arch.cputm_enabled))
2322                 return vcpu->arch.sie_block->cputm;
2323
2324         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2325         do {
2326                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2327                 /*
2328                  * If the writer would ever execute a read in the critical
2329                  * section, e.g. in irq context, we have a deadlock.
2330                  */
2331                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2332                 value = vcpu->arch.sie_block->cputm;
2333                 /* if cputm_start is 0, accounting is being started/stopped */
2334                 if (likely(vcpu->arch.cputm_start))
2335                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2336         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2337         preempt_enable();
2338         return value;
2339 }
2340
2341 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2342 {
2343
2344         gmap_enable(vcpu->arch.enabled_gmap);
2345         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2346         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2347                 __start_cpu_timer_accounting(vcpu);
2348         vcpu->cpu = cpu;
2349 }
2350
2351 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2352 {
2353         vcpu->cpu = -1;
2354         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2355                 __stop_cpu_timer_accounting(vcpu);
2356         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2357         vcpu->arch.enabled_gmap = gmap_get_enabled();
2358         gmap_disable(vcpu->arch.enabled_gmap);
2359
2360 }
2361
2362 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2363 {
2364         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2365         vcpu->arch.sie_block->gpsw.mask = 0UL;
2366         vcpu->arch.sie_block->gpsw.addr = 0UL;
2367         kvm_s390_set_prefix(vcpu, 0);
2368         kvm_s390_set_cpu_timer(vcpu, 0);
2369         vcpu->arch.sie_block->ckc       = 0UL;
2370         vcpu->arch.sie_block->todpr     = 0;
2371         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2372         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2373         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2374         /* make sure the new fpc will be lazily loaded */
2375         save_fpu_regs();
2376         current->thread.fpu.fpc = 0;
2377         vcpu->arch.sie_block->gbea = 1;
2378         vcpu->arch.sie_block->pp = 0;
2379         vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2380         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2381         kvm_clear_async_pf_completion_queue(vcpu);
2382         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2383                 kvm_s390_vcpu_stop(vcpu);
2384         kvm_s390_clear_local_irqs(vcpu);
2385 }
2386
2387 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2388 {
2389         mutex_lock(&vcpu->kvm->lock);
2390         preempt_disable();
2391         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2392         preempt_enable();
2393         mutex_unlock(&vcpu->kvm->lock);
2394         if (!kvm_is_ucontrol(vcpu->kvm)) {
2395                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2396                 sca_add_vcpu(vcpu);
2397         }
2398         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2399                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2400         /* make vcpu_load load the right gmap on the first trigger */
2401         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2402 }
2403
2404 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2405 {
2406         if (!test_kvm_facility(vcpu->kvm, 76))
2407                 return;
2408
2409         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2410
2411         if (vcpu->kvm->arch.crypto.aes_kw)
2412                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2413         if (vcpu->kvm->arch.crypto.dea_kw)
2414                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2415
2416         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2417 }
2418
2419 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2420 {
2421         free_page(vcpu->arch.sie_block->cbrlo);
2422         vcpu->arch.sie_block->cbrlo = 0;
2423 }
2424
2425 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2426 {
2427         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2428         if (!vcpu->arch.sie_block->cbrlo)
2429                 return -ENOMEM;
2430
2431         vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2432         return 0;
2433 }
2434
2435 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2436 {
2437         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2438
2439         vcpu->arch.sie_block->ibc = model->ibc;
2440         if (test_kvm_facility(vcpu->kvm, 7))
2441                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2442 }
2443
2444 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2445 {
2446         int rc = 0;
2447
2448         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2449                                                     CPUSTAT_SM |
2450                                                     CPUSTAT_STOPPED);
2451
2452         if (test_kvm_facility(vcpu->kvm, 78))
2453                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2454         else if (test_kvm_facility(vcpu->kvm, 8))
2455                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2456
2457         kvm_s390_vcpu_setup_model(vcpu);
2458
2459         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2460         if (MACHINE_HAS_ESOP)
2461                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2462         if (test_kvm_facility(vcpu->kvm, 9))
2463                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2464         if (test_kvm_facility(vcpu->kvm, 73))
2465                 vcpu->arch.sie_block->ecb |= ECB_TE;
2466
2467         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2468                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2469         if (test_kvm_facility(vcpu->kvm, 130))
2470                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2471         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2472         if (sclp.has_cei)
2473                 vcpu->arch.sie_block->eca |= ECA_CEI;
2474         if (sclp.has_ib)
2475                 vcpu->arch.sie_block->eca |= ECA_IB;
2476         if (sclp.has_siif)
2477                 vcpu->arch.sie_block->eca |= ECA_SII;
2478         if (sclp.has_sigpif)
2479                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2480         if (test_kvm_facility(vcpu->kvm, 129)) {
2481                 vcpu->arch.sie_block->eca |= ECA_VX;
2482                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2483         }
2484         if (test_kvm_facility(vcpu->kvm, 139))
2485                 vcpu->arch.sie_block->ecd |= ECD_MEF;
2486
2487         if (vcpu->arch.sie_block->gd) {
2488                 vcpu->arch.sie_block->eca |= ECA_AIV;
2489                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2490                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2491         }
2492         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2493                                         | SDNXC;
2494         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2495
2496         if (sclp.has_kss)
2497                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2498         else
2499                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2500
2501         if (vcpu->kvm->arch.use_cmma) {
2502                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2503                 if (rc)
2504                         return rc;
2505         }
2506         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2507         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2508
2509         kvm_s390_vcpu_crypto_setup(vcpu);
2510
2511         return rc;
2512 }
2513
2514 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2515                                       unsigned int id)
2516 {
2517         struct kvm_vcpu *vcpu;
2518         struct sie_page *sie_page;
2519         int rc = -EINVAL;
2520
2521         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2522                 goto out;
2523
2524         rc = -ENOMEM;
2525
2526         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2527         if (!vcpu)
2528                 goto out;
2529
2530         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2531         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2532         if (!sie_page)
2533                 goto out_free_cpu;
2534
2535         vcpu->arch.sie_block = &sie_page->sie_block;
2536         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2537
2538         /* the real guest size will always be smaller than msl */
2539         vcpu->arch.sie_block->mso = 0;
2540         vcpu->arch.sie_block->msl = sclp.hamax;
2541
2542         vcpu->arch.sie_block->icpua = id;
2543         spin_lock_init(&vcpu->arch.local_int.lock);
2544         vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
2545         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2546                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2547         seqcount_init(&vcpu->arch.cputm_seqcount);
2548
2549         rc = kvm_vcpu_init(vcpu, kvm, id);
2550         if (rc)
2551                 goto out_free_sie_block;
2552         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2553                  vcpu->arch.sie_block);
2554         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2555
2556         return vcpu;
2557 out_free_sie_block:
2558         free_page((unsigned long)(vcpu->arch.sie_block));
2559 out_free_cpu:
2560         kmem_cache_free(kvm_vcpu_cache, vcpu);
2561 out:
2562         return ERR_PTR(rc);
2563 }
2564
2565 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2566 {
2567         return kvm_s390_vcpu_has_irq(vcpu, 0);
2568 }
2569
2570 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2571 {
2572         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2573 }
2574
2575 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2576 {
2577         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2578         exit_sie(vcpu);
2579 }
2580
2581 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2582 {
2583         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2584 }
2585
2586 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2587 {
2588         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2589         exit_sie(vcpu);
2590 }
2591
2592 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2593 {
2594         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2595 }
2596
2597 /*
2598  * Kick a guest cpu out of SIE and wait until SIE is not running.
2599  * If the CPU is not running (e.g. waiting as idle) the function will
2600  * return immediately. */
2601 void exit_sie(struct kvm_vcpu *vcpu)
2602 {
2603         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
2604         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2605                 cpu_relax();
2606 }
2607
2608 /* Kick a guest cpu out of SIE to process a request synchronously */
2609 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2610 {
2611         kvm_make_request(req, vcpu);
2612         kvm_s390_vcpu_request(vcpu);
2613 }
2614
2615 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2616                               unsigned long end)
2617 {
2618         struct kvm *kvm = gmap->private;
2619         struct kvm_vcpu *vcpu;
2620         unsigned long prefix;
2621         int i;
2622
2623         if (gmap_is_shadow(gmap))
2624                 return;
2625         if (start >= 1UL << 31)
2626                 /* We are only interested in prefix pages */
2627                 return;
2628         kvm_for_each_vcpu(i, vcpu, kvm) {
2629                 /* match against both prefix pages */
2630                 prefix = kvm_s390_get_prefix(vcpu);
2631                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2632                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2633                                    start, end);
2634                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2635                 }
2636         }
2637 }
2638
2639 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2640 {
2641         /* kvm common code refers to this, but never calls it */
2642         BUG();
2643         return 0;
2644 }
2645
2646 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2647                                            struct kvm_one_reg *reg)
2648 {
2649         int r = -EINVAL;
2650
2651         switch (reg->id) {
2652         case KVM_REG_S390_TODPR:
2653                 r = put_user(vcpu->arch.sie_block->todpr,
2654                              (u32 __user *)reg->addr);
2655                 break;
2656         case KVM_REG_S390_EPOCHDIFF:
2657                 r = put_user(vcpu->arch.sie_block->epoch,
2658                              (u64 __user *)reg->addr);
2659                 break;
2660         case KVM_REG_S390_CPU_TIMER:
2661                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2662                              (u64 __user *)reg->addr);
2663                 break;
2664         case KVM_REG_S390_CLOCK_COMP:
2665                 r = put_user(vcpu->arch.sie_block->ckc,
2666                              (u64 __user *)reg->addr);
2667                 break;
2668         case KVM_REG_S390_PFTOKEN:
2669                 r = put_user(vcpu->arch.pfault_token,
2670                              (u64 __user *)reg->addr);
2671                 break;
2672         case KVM_REG_S390_PFCOMPARE:
2673                 r = put_user(vcpu->arch.pfault_compare,
2674                              (u64 __user *)reg->addr);
2675                 break;
2676         case KVM_REG_S390_PFSELECT:
2677                 r = put_user(vcpu->arch.pfault_select,
2678                              (u64 __user *)reg->addr);
2679                 break;
2680         case KVM_REG_S390_PP:
2681                 r = put_user(vcpu->arch.sie_block->pp,
2682                              (u64 __user *)reg->addr);
2683                 break;
2684         case KVM_REG_S390_GBEA:
2685                 r = put_user(vcpu->arch.sie_block->gbea,
2686                              (u64 __user *)reg->addr);
2687                 break;
2688         default:
2689                 break;
2690         }
2691
2692         return r;
2693 }
2694
2695 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2696                                            struct kvm_one_reg *reg)
2697 {
2698         int r = -EINVAL;
2699         __u64 val;
2700
2701         switch (reg->id) {
2702         case KVM_REG_S390_TODPR:
2703                 r = get_user(vcpu->arch.sie_block->todpr,
2704                              (u32 __user *)reg->addr);
2705                 break;
2706         case KVM_REG_S390_EPOCHDIFF:
2707                 r = get_user(vcpu->arch.sie_block->epoch,
2708                              (u64 __user *)reg->addr);
2709                 break;
2710         case KVM_REG_S390_CPU_TIMER:
2711                 r = get_user(val, (u64 __user *)reg->addr);
2712                 if (!r)
2713                         kvm_s390_set_cpu_timer(vcpu, val);
2714                 break;
2715         case KVM_REG_S390_CLOCK_COMP:
2716                 r = get_user(vcpu->arch.sie_block->ckc,
2717                              (u64 __user *)reg->addr);
2718                 break;
2719         case KVM_REG_S390_PFTOKEN:
2720                 r = get_user(vcpu->arch.pfault_token,
2721                              (u64 __user *)reg->addr);
2722                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2723                         kvm_clear_async_pf_completion_queue(vcpu);
2724                 break;
2725         case KVM_REG_S390_PFCOMPARE:
2726                 r = get_user(vcpu->arch.pfault_compare,
2727                              (u64 __user *)reg->addr);
2728                 break;
2729         case KVM_REG_S390_PFSELECT:
2730                 r = get_user(vcpu->arch.pfault_select,
2731                              (u64 __user *)reg->addr);
2732                 break;
2733         case KVM_REG_S390_PP:
2734                 r = get_user(vcpu->arch.sie_block->pp,
2735                              (u64 __user *)reg->addr);
2736                 break;
2737         case KVM_REG_S390_GBEA:
2738                 r = get_user(vcpu->arch.sie_block->gbea,
2739                              (u64 __user *)reg->addr);
2740                 break;
2741         default:
2742                 break;
2743         }
2744
2745         return r;
2746 }
2747
2748 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2749 {
2750         kvm_s390_vcpu_initial_reset(vcpu);
2751         return 0;
2752 }
2753
2754 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2755 {
2756         vcpu_load(vcpu);
2757         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2758         vcpu_put(vcpu);
2759         return 0;
2760 }
2761
2762 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2763 {
2764         vcpu_load(vcpu);
2765         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2766         vcpu_put(vcpu);
2767         return 0;
2768 }
2769
2770 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2771                                   struct kvm_sregs *sregs)
2772 {
2773         vcpu_load(vcpu);
2774
2775         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2776         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2777
2778         vcpu_put(vcpu);
2779         return 0;
2780 }
2781
2782 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2783                                   struct kvm_sregs *sregs)
2784 {
2785         vcpu_load(vcpu);
2786
2787         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2788         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2789
2790         vcpu_put(vcpu);
2791         return 0;
2792 }
2793
2794 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2795 {
2796         int ret = 0;
2797
2798         vcpu_load(vcpu);
2799
2800         if (test_fp_ctl(fpu->fpc)) {
2801                 ret = -EINVAL;
2802                 goto out;
2803         }
2804         vcpu->run->s.regs.fpc = fpu->fpc;
2805         if (MACHINE_HAS_VX)
2806                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2807                                  (freg_t *) fpu->fprs);
2808         else
2809                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2810
2811 out:
2812         vcpu_put(vcpu);
2813         return ret;
2814 }
2815
2816 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2817 {
2818         vcpu_load(vcpu);
2819
2820         /* make sure we have the latest values */
2821         save_fpu_regs();
2822         if (MACHINE_HAS_VX)
2823                 convert_vx_to_fp((freg_t *) fpu->fprs,
2824                                  (__vector128 *) vcpu->run->s.regs.vrs);
2825         else
2826                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2827         fpu->fpc = vcpu->run->s.regs.fpc;
2828
2829         vcpu_put(vcpu);
2830         return 0;
2831 }
2832
2833 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2834 {
2835         int rc = 0;
2836
2837         if (!is_vcpu_stopped(vcpu))
2838                 rc = -EBUSY;
2839         else {
2840                 vcpu->run->psw_mask = psw.mask;
2841                 vcpu->run->psw_addr = psw.addr;
2842         }
2843         return rc;
2844 }
2845
2846 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2847                                   struct kvm_translation *tr)
2848 {
2849         return -EINVAL; /* not implemented yet */
2850 }
2851
2852 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2853                               KVM_GUESTDBG_USE_HW_BP | \
2854                               KVM_GUESTDBG_ENABLE)
2855
2856 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2857                                         struct kvm_guest_debug *dbg)
2858 {
2859         int rc = 0;
2860
2861         vcpu_load(vcpu);
2862
2863         vcpu->guest_debug = 0;
2864         kvm_s390_clear_bp_data(vcpu);
2865
2866         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
2867                 rc = -EINVAL;
2868                 goto out;
2869         }
2870         if (!sclp.has_gpere) {
2871                 rc = -EINVAL;
2872                 goto out;
2873         }
2874
2875         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2876                 vcpu->guest_debug = dbg->control;
2877                 /* enforce guest PER */
2878                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
2879
2880                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2881                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2882         } else {
2883                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
2884                 vcpu->arch.guestdbg.last_bp = 0;
2885         }
2886
2887         if (rc) {
2888                 vcpu->guest_debug = 0;
2889                 kvm_s390_clear_bp_data(vcpu);
2890                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
2891         }
2892
2893 out:
2894         vcpu_put(vcpu);
2895         return rc;
2896 }
2897
2898 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2899                                     struct kvm_mp_state *mp_state)
2900 {
2901         int ret;
2902
2903         vcpu_load(vcpu);
2904
2905         /* CHECK_STOP and LOAD are not supported yet */
2906         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2907                                       KVM_MP_STATE_OPERATING;
2908
2909         vcpu_put(vcpu);
2910         return ret;
2911 }
2912
2913 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2914                                     struct kvm_mp_state *mp_state)
2915 {
2916         int rc = 0;
2917
2918         vcpu_load(vcpu);
2919
2920         /* user space knows about this interface - let it control the state */
2921         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2922
2923         switch (mp_state->mp_state) {
2924         case KVM_MP_STATE_STOPPED:
2925                 kvm_s390_vcpu_stop(vcpu);
2926                 break;
2927         case KVM_MP_STATE_OPERATING:
2928                 kvm_s390_vcpu_start(vcpu);
2929                 break;
2930         case KVM_MP_STATE_LOAD:
2931         case KVM_MP_STATE_CHECK_STOP:
2932                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2933         default:
2934                 rc = -ENXIO;
2935         }
2936
2937         vcpu_put(vcpu);
2938         return rc;
2939 }
2940
2941 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2942 {
2943         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
2944 }
2945
2946 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2947 {
2948 retry:
2949         kvm_s390_vcpu_request_handled(vcpu);
2950         if (!kvm_request_pending(vcpu))
2951                 return 0;
2952         /*
2953          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2954          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2955          * This ensures that the ipte instruction for this request has
2956          * already finished. We might race against a second unmapper that
2957          * wants to set the blocking bit. Lets just retry the request loop.
2958          */
2959         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2960                 int rc;
2961                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2962                                           kvm_s390_get_prefix(vcpu),
2963                                           PAGE_SIZE * 2, PROT_WRITE);
2964                 if (rc) {
2965                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2966                         return rc;
2967                 }
2968                 goto retry;
2969         }
2970
2971         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2972                 vcpu->arch.sie_block->ihcpu = 0xffff;
2973                 goto retry;
2974         }
2975
2976         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2977                 if (!ibs_enabled(vcpu)) {
2978                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2979                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
2980                 }
2981                 goto retry;
2982         }
2983
2984         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2985                 if (ibs_enabled(vcpu)) {
2986                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2987                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
2988                 }
2989                 goto retry;
2990         }
2991
2992         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2993                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2994                 goto retry;
2995         }
2996
2997         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2998                 /*
2999                  * Disable CMMA virtualization; we will emulate the ESSA
3000                  * instruction manually, in order to provide additional
3001                  * functionalities needed for live migration.
3002                  */
3003                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3004                 goto retry;
3005         }
3006
3007         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3008                 /*
3009                  * Re-enable CMMA virtualization if CMMA is available and
3010                  * was used.
3011                  */
3012                 if ((vcpu->kvm->arch.use_cmma) &&
3013                     (vcpu->kvm->mm->context.use_cmma))
3014                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3015                 goto retry;
3016         }
3017
3018         /* nothing to do, just clear the request */
3019         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3020
3021         return 0;
3022 }
3023
3024 void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
3025                                  const struct kvm_s390_vm_tod_clock *gtod)
3026 {
3027         struct kvm_vcpu *vcpu;
3028         struct kvm_s390_tod_clock_ext htod;
3029         int i;
3030
3031         mutex_lock(&kvm->lock);
3032         preempt_disable();
3033
3034         get_tod_clock_ext((char *)&htod);
3035
3036         kvm->arch.epoch = gtod->tod - htod.tod;
3037         kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3038
3039         if (kvm->arch.epoch > gtod->tod)
3040                 kvm->arch.epdx -= 1;
3041
3042         kvm_s390_vcpu_block_all(kvm);
3043         kvm_for_each_vcpu(i, vcpu, kvm) {
3044                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3045                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3046         }
3047
3048         kvm_s390_vcpu_unblock_all(kvm);
3049         preempt_enable();
3050         mutex_unlock(&kvm->lock);
3051 }
3052
3053 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
3054 {
3055         struct kvm_vcpu *vcpu;
3056         int i;
3057
3058         mutex_lock(&kvm->lock);
3059         preempt_disable();
3060         kvm->arch.epoch = tod - get_tod_clock();
3061         kvm_s390_vcpu_block_all(kvm);
3062         kvm_for_each_vcpu(i, vcpu, kvm)
3063                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3064         kvm_s390_vcpu_unblock_all(kvm);
3065         preempt_enable();
3066         mutex_unlock(&kvm->lock);
3067 }
3068
3069 /**
3070  * kvm_arch_fault_in_page - fault-in guest page if necessary
3071  * @vcpu: The corresponding virtual cpu
3072  * @gpa: Guest physical address
3073  * @writable: Whether the page should be writable or not
3074  *
3075  * Make sure that a guest page has been faulted-in on the host.
3076  *
3077  * Return: Zero on success, negative error code otherwise.
3078  */
3079 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3080 {
3081         return gmap_fault(vcpu->arch.gmap, gpa,
3082                           writable ? FAULT_FLAG_WRITE : 0);
3083 }
3084
3085 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3086                                       unsigned long token)
3087 {
3088         struct kvm_s390_interrupt inti;
3089         struct kvm_s390_irq irq;
3090
3091         if (start_token) {
3092                 irq.u.ext.ext_params2 = token;
3093                 irq.type = KVM_S390_INT_PFAULT_INIT;
3094                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3095         } else {
3096                 inti.type = KVM_S390_INT_PFAULT_DONE;
3097                 inti.parm64 = token;
3098                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3099         }
3100 }
3101
3102 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3103                                      struct kvm_async_pf *work)
3104 {
3105         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3106         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3107 }
3108
3109 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3110                                  struct kvm_async_pf *work)
3111 {
3112         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3113         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3114 }
3115
3116 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3117                                struct kvm_async_pf *work)
3118 {
3119         /* s390 will always inject the page directly */
3120 }
3121
3122 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3123 {
3124         /*
3125          * s390 will always inject the page directly,
3126          * but we still want check_async_completion to cleanup
3127          */
3128         return true;
3129 }
3130
3131 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3132 {
3133         hva_t hva;
3134         struct kvm_arch_async_pf arch;
3135         int rc;
3136
3137         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3138                 return 0;
3139         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3140             vcpu->arch.pfault_compare)
3141                 return 0;
3142         if (psw_extint_disabled(vcpu))
3143                 return 0;
3144         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3145                 return 0;
3146         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3147                 return 0;
3148         if (!vcpu->arch.gmap->pfault_enabled)
3149                 return 0;
3150
3151         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3152         hva += current->thread.gmap_addr & ~PAGE_MASK;
3153         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3154                 return 0;
3155
3156         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3157         return rc;
3158 }
3159
3160 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3161 {
3162         int rc, cpuflags;
3163
3164         /*
3165          * On s390 notifications for arriving pages will be delivered directly
3166          * to the guest but the house keeping for completed pfaults is
3167          * handled outside the worker.
3168          */
3169         kvm_check_async_pf_completion(vcpu);
3170
3171         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3172         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3173
3174         if (need_resched())
3175                 schedule();
3176
3177         if (test_cpu_flag(CIF_MCCK_PENDING))
3178                 s390_handle_mcck();
3179
3180         if (!kvm_is_ucontrol(vcpu->kvm)) {
3181                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3182                 if (rc)
3183                         return rc;
3184         }
3185
3186         rc = kvm_s390_handle_requests(vcpu);
3187         if (rc)
3188                 return rc;
3189
3190         if (guestdbg_enabled(vcpu)) {
3191                 kvm_s390_backup_guest_per_regs(vcpu);
3192                 kvm_s390_patch_guest_per_regs(vcpu);
3193         }
3194
3195         vcpu->arch.sie_block->icptcode = 0;
3196         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3197         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3198         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3199
3200         return 0;
3201 }
3202
3203 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3204 {
3205         struct kvm_s390_pgm_info pgm_info = {
3206                 .code = PGM_ADDRESSING,
3207         };
3208         u8 opcode, ilen;
3209         int rc;
3210
3211         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3212         trace_kvm_s390_sie_fault(vcpu);
3213
3214         /*
3215          * We want to inject an addressing exception, which is defined as a
3216          * suppressing or terminating exception. However, since we came here
3217          * by a DAT access exception, the PSW still points to the faulting
3218          * instruction since DAT exceptions are nullifying. So we've got
3219          * to look up the current opcode to get the length of the instruction
3220          * to be able to forward the PSW.
3221          */
3222         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3223         ilen = insn_length(opcode);
3224         if (rc < 0) {
3225                 return rc;
3226         } else if (rc) {
3227                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3228                  * Forward by arbitrary ilc, injection will take care of
3229                  * nullification if necessary.
3230                  */
3231                 pgm_info = vcpu->arch.pgm;
3232                 ilen = 4;
3233         }
3234         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3235         kvm_s390_forward_psw(vcpu, ilen);
3236         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3237 }
3238
3239 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3240 {
3241         struct mcck_volatile_info *mcck_info;
3242         struct sie_page *sie_page;
3243
3244         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3245                    vcpu->arch.sie_block->icptcode);
3246         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3247
3248         if (guestdbg_enabled(vcpu))
3249                 kvm_s390_restore_guest_per_regs(vcpu);
3250
3251         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3252         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3253
3254         if (exit_reason == -EINTR) {
3255                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3256                 sie_page = container_of(vcpu->arch.sie_block,
3257                                         struct sie_page, sie_block);
3258                 mcck_info = &sie_page->mcck_info;
3259                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3260                 return 0;
3261         }
3262
3263         if (vcpu->arch.sie_block->icptcode > 0) {
3264                 int rc = kvm_handle_sie_intercept(vcpu);
3265
3266                 if (rc != -EOPNOTSUPP)
3267                         return rc;
3268                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3269                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3270                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3271                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3272                 return -EREMOTE;
3273         } else if (exit_reason != -EFAULT) {
3274                 vcpu->stat.exit_null++;
3275                 return 0;
3276         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3277                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3278                 vcpu->run->s390_ucontrol.trans_exc_code =
3279                                                 current->thread.gmap_addr;
3280                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3281                 return -EREMOTE;
3282         } else if (current->thread.gmap_pfault) {
3283                 trace_kvm_s390_major_guest_pfault(vcpu);
3284                 current->thread.gmap_pfault = 0;
3285                 if (kvm_arch_setup_async_pf(vcpu))
3286                         return 0;
3287                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3288         }
3289         return vcpu_post_run_fault_in_sie(vcpu);
3290 }
3291
3292 static int __vcpu_run(struct kvm_vcpu *vcpu)
3293 {
3294         int rc, exit_reason;
3295
3296         /*
3297          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3298          * ning the guest), so that memslots (and other stuff) are protected
3299          */
3300         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3301
3302         do {
3303                 rc = vcpu_pre_run(vcpu);
3304                 if (rc)
3305                         break;
3306
3307                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3308                 /*
3309                  * As PF_VCPU will be used in fault handler, between
3310                  * guest_enter and guest_exit should be no uaccess.
3311                  */
3312                 local_irq_disable();
3313                 guest_enter_irqoff();
3314                 __disable_cpu_timer_accounting(vcpu);
3315                 local_irq_enable();
3316                 exit_reason = sie64a(vcpu->arch.sie_block,
3317                                      vcpu->run->s.regs.gprs);
3318                 local_irq_disable();
3319                 __enable_cpu_timer_accounting(vcpu);
3320                 guest_exit_irqoff();
3321                 local_irq_enable();
3322                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3323
3324                 rc = vcpu_post_run(vcpu, exit_reason);
3325         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3326
3327         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3328         return rc;
3329 }
3330
3331 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3332 {
3333         struct runtime_instr_cb *riccb;
3334         struct gs_cb *gscb;
3335
3336         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3337         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3338         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3339         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3340         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3341                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3342         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3343                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3344                 /* some control register changes require a tlb flush */
3345                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3346         }
3347         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3348                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3349                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3350                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3351                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3352                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3353         }
3354         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3355                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3356                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3357                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3358                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3359                         kvm_clear_async_pf_completion_queue(vcpu);
3360         }
3361         /*
3362          * If userspace sets the riccb (e.g. after migration) to a valid state,
3363          * we should enable RI here instead of doing the lazy enablement.
3364          */
3365         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3366             test_kvm_facility(vcpu->kvm, 64) &&
3367             riccb->v &&
3368             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3369                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3370                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3371         }
3372         /*
3373          * If userspace sets the gscb (e.g. after migration) to non-zero,
3374          * we should enable GS here instead of doing the lazy enablement.
3375          */
3376         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3377             test_kvm_facility(vcpu->kvm, 133) &&
3378             gscb->gssm &&
3379             !vcpu->arch.gs_enabled) {
3380                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3381                 vcpu->arch.sie_block->ecb |= ECB_GS;
3382                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3383                 vcpu->arch.gs_enabled = 1;
3384         }
3385         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3386             test_kvm_facility(vcpu->kvm, 82)) {
3387                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3388                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3389         }
3390         save_access_regs(vcpu->arch.host_acrs);
3391         restore_access_regs(vcpu->run->s.regs.acrs);
3392         /* save host (userspace) fprs/vrs */
3393         save_fpu_regs();
3394         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3395         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3396         if (MACHINE_HAS_VX)
3397                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3398         else
3399                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3400         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3401         if (test_fp_ctl(current->thread.fpu.fpc))
3402                 /* User space provided an invalid FPC, let's clear it */
3403                 current->thread.fpu.fpc = 0;
3404         if (MACHINE_HAS_GS) {
3405                 preempt_disable();
3406                 __ctl_set_bit(2, 4);
3407                 if (current->thread.gs_cb) {
3408                         vcpu->arch.host_gscb = current->thread.gs_cb;
3409                         save_gs_cb(vcpu->arch.host_gscb);
3410                 }
3411                 if (vcpu->arch.gs_enabled) {
3412                         current->thread.gs_cb = (struct gs_cb *)
3413                                                 &vcpu->run->s.regs.gscb;
3414                         restore_gs_cb(current->thread.gs_cb);
3415                 }
3416                 preempt_enable();
3417         }
3418
3419         kvm_run->kvm_dirty_regs = 0;
3420 }
3421
3422 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3423 {
3424         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3425         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3426         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3427         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3428         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3429         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3430         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3431         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3432         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3433         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3434         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3435         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3436         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3437         save_access_regs(vcpu->run->s.regs.acrs);
3438         restore_access_regs(vcpu->arch.host_acrs);
3439         /* Save guest register state */
3440         save_fpu_regs();
3441         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3442         /* Restore will be done lazily at return */
3443         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3444         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3445         if (MACHINE_HAS_GS) {
3446                 __ctl_set_bit(2, 4);
3447                 if (vcpu->arch.gs_enabled)
3448                         save_gs_cb(current->thread.gs_cb);
3449                 preempt_disable();
3450                 current->thread.gs_cb = vcpu->arch.host_gscb;
3451                 restore_gs_cb(vcpu->arch.host_gscb);
3452                 preempt_enable();
3453                 if (!vcpu->arch.host_gscb)
3454                         __ctl_clear_bit(2, 4);
3455                 vcpu->arch.host_gscb = NULL;
3456         }
3457
3458 }
3459
3460 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3461 {
3462         int rc;
3463
3464         if (kvm_run->immediate_exit)
3465                 return -EINTR;
3466
3467         vcpu_load(vcpu);
3468
3469         if (guestdbg_exit_pending(vcpu)) {
3470                 kvm_s390_prepare_debug_exit(vcpu);
3471                 rc = 0;
3472                 goto out;
3473         }
3474
3475         kvm_sigset_activate(vcpu);
3476
3477         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3478                 kvm_s390_vcpu_start(vcpu);
3479         } else if (is_vcpu_stopped(vcpu)) {
3480                 pr_err_ratelimited("can't run stopped vcpu %d\n",
3481                                    vcpu->vcpu_id);
3482                 rc = -EINVAL;
3483                 goto out;
3484         }
3485
3486         sync_regs(vcpu, kvm_run);
3487         enable_cpu_timer_accounting(vcpu);
3488
3489         might_fault();
3490         rc = __vcpu_run(vcpu);
3491
3492         if (signal_pending(current) && !rc) {
3493                 kvm_run->exit_reason = KVM_EXIT_INTR;
3494                 rc = -EINTR;
3495         }
3496
3497         if (guestdbg_exit_pending(vcpu) && !rc)  {
3498                 kvm_s390_prepare_debug_exit(vcpu);
3499                 rc = 0;
3500         }
3501
3502         if (rc == -EREMOTE) {
3503                 /* userspace support is needed, kvm_run has been prepared */
3504                 rc = 0;
3505         }
3506
3507         disable_cpu_timer_accounting(vcpu);
3508         store_regs(vcpu, kvm_run);
3509
3510         kvm_sigset_deactivate(vcpu);
3511
3512         vcpu->stat.exit_userspace++;
3513 out:
3514         vcpu_put(vcpu);
3515         return rc;
3516 }
3517
3518 /*
3519  * store status at address
3520  * we use have two special cases:
3521  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3522  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3523  */
3524 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3525 {
3526         unsigned char archmode = 1;
3527         freg_t fprs[NUM_FPRS];
3528         unsigned int px;
3529         u64 clkcomp, cputm;
3530         int rc;
3531
3532         px = kvm_s390_get_prefix(vcpu);
3533         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3534                 if (write_guest_abs(vcpu, 163, &archmode, 1))
3535                         return -EFAULT;
3536                 gpa = 0;
3537         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3538                 if (write_guest_real(vcpu, 163, &archmode, 1))
3539                         return -EFAULT;
3540                 gpa = px;
3541         } else
3542                 gpa -= __LC_FPREGS_SAVE_AREA;
3543
3544         /* manually convert vector registers if necessary */
3545         if (MACHINE_HAS_VX) {
3546                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3547                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3548                                      fprs, 128);
3549         } else {
3550                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3551                                      vcpu->run->s.regs.fprs, 128);
3552         }
3553         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3554                               vcpu->run->s.regs.gprs, 128);
3555         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3556                               &vcpu->arch.sie_block->gpsw, 16);
3557         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3558                               &px, 4);
3559         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3560                               &vcpu->run->s.regs.fpc, 4);
3561         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3562                               &vcpu->arch.sie_block->todpr, 4);
3563         cputm = kvm_s390_get_cpu_timer(vcpu);
3564         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3565                               &cputm, 8);
3566         clkcomp = vcpu->arch.sie_block->ckc >> 8;
3567         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3568                               &clkcomp, 8);
3569         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3570                               &vcpu->run->s.regs.acrs, 64);
3571         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3572                               &vcpu->arch.sie_block->gcr, 128);
3573         return rc ? -EFAULT : 0;
3574 }
3575
3576 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3577 {
3578         /*
3579          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3580          * switch in the run ioctl. Let's update our copies before we save
3581          * it into the save area
3582          */
3583         save_fpu_regs();
3584         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3585         save_access_regs(vcpu->run->s.regs.acrs);
3586
3587         return kvm_s390_store_status_unloaded(vcpu, addr);
3588 }
3589
3590 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3591 {
3592         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3593         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3594 }
3595
3596 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3597 {
3598         unsigned int i;
3599         struct kvm_vcpu *vcpu;
3600
3601         kvm_for_each_vcpu(i, vcpu, kvm) {
3602                 __disable_ibs_on_vcpu(vcpu);
3603         }
3604 }
3605
3606 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3607 {
3608         if (!sclp.has_ibs)
3609                 return;
3610         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3611         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3612 }
3613
3614 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3615 {
3616         int i, online_vcpus, started_vcpus = 0;
3617
3618         if (!is_vcpu_stopped(vcpu))
3619                 return;
3620
3621         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3622         /* Only one cpu at a time may enter/leave the STOPPED state. */
3623         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3624         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3625
3626         for (i = 0; i < online_vcpus; i++) {
3627                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3628                         started_vcpus++;
3629         }
3630
3631         if (started_vcpus == 0) {
3632                 /* we're the only active VCPU -> speed it up */
3633                 __enable_ibs_on_vcpu(vcpu);
3634         } else if (started_vcpus == 1) {
3635                 /*
3636                  * As we are starting a second VCPU, we have to disable
3637                  * the IBS facility on all VCPUs to remove potentially
3638                  * oustanding ENABLE requests.
3639                  */
3640                 __disable_ibs_on_all_vcpus(vcpu->kvm);
3641         }
3642
3643         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
3644         /*
3645          * Another VCPU might have used IBS while we were offline.
3646          * Let's play safe and flush the VCPU at startup.
3647          */
3648         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3649         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3650         return;
3651 }
3652
3653 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3654 {
3655         int i, online_vcpus, started_vcpus = 0;
3656         struct kvm_vcpu *started_vcpu = NULL;
3657
3658         if (is_vcpu_stopped(vcpu))
3659                 return;
3660
3661         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3662         /* Only one cpu at a time may enter/leave the STOPPED state. */
3663         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3664         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3665
3666         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3667         kvm_s390_clear_stop_irq(vcpu);
3668
3669         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
3670         __disable_ibs_on_vcpu(vcpu);
3671
3672         for (i = 0; i < online_vcpus; i++) {
3673                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3674                         started_vcpus++;
3675                         started_vcpu = vcpu->kvm->vcpus[i];
3676                 }
3677         }
3678
3679         if (started_vcpus == 1) {
3680                 /*
3681                  * As we only have one VCPU left, we want to enable the
3682                  * IBS facility for that VCPU to speed it up.
3683                  */
3684                 __enable_ibs_on_vcpu(started_vcpu);
3685         }
3686
3687         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3688         return;
3689 }
3690
3691 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3692                                      struct kvm_enable_cap *cap)
3693 {
3694         int r;
3695
3696         if (cap->flags)
3697                 return -EINVAL;
3698
3699         switch (cap->cap) {
3700         case KVM_CAP_S390_CSS_SUPPORT:
3701                 if (!vcpu->kvm->arch.css_support) {
3702                         vcpu->kvm->arch.css_support = 1;
3703                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3704                         trace_kvm_s390_enable_css(vcpu->kvm);
3705                 }
3706                 r = 0;
3707                 break;
3708         default:
3709                 r = -EINVAL;
3710                 break;
3711         }
3712         return r;
3713 }
3714
3715 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3716                                   struct kvm_s390_mem_op *mop)
3717 {
3718         void __user *uaddr = (void __user *)mop->buf;
3719         void *tmpbuf = NULL;
3720         int r, srcu_idx;
3721         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3722                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3723
3724         if (mop->flags & ~supported_flags)
3725                 return -EINVAL;
3726
3727         if (mop->size > MEM_OP_MAX_SIZE)
3728                 return -E2BIG;
3729
3730         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3731                 tmpbuf = vmalloc(mop->size);
3732                 if (!tmpbuf)
3733                         return -ENOMEM;
3734         }
3735
3736         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3737
3738         switch (mop->op) {
3739         case KVM_S390_MEMOP_LOGICAL_READ:
3740                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3741                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3742                                             mop->size, GACC_FETCH);
3743                         break;
3744                 }
3745                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3746                 if (r == 0) {
3747                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3748                                 r = -EFAULT;
3749                 }
3750                 break;
3751         case KVM_S390_MEMOP_LOGICAL_WRITE:
3752                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3753                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3754                                             mop->size, GACC_STORE);
3755                         break;
3756                 }
3757                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3758                         r = -EFAULT;
3759                         break;
3760                 }
3761                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3762                 break;
3763         default:
3764                 r = -EINVAL;
3765         }
3766
3767         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3768
3769         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3770                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3771
3772         vfree(tmpbuf);
3773         return r;
3774 }
3775
3776 long kvm_arch_vcpu_async_ioctl(struct file *filp,
3777                                unsigned int ioctl, unsigned long arg)
3778 {
3779         struct kvm_vcpu *vcpu = filp->private_data;
3780         void __user *argp = (void __user *)arg;
3781
3782         switch (ioctl) {
3783         case KVM_S390_IRQ: {
3784                 struct kvm_s390_irq s390irq;
3785
3786                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3787                         return -EFAULT;
3788                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
3789         }
3790         case KVM_S390_INTERRUPT: {
3791                 struct kvm_s390_interrupt s390int;
3792                 struct kvm_s390_irq s390irq;
3793
3794                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3795                         return -EFAULT;
3796                 if (s390int_to_s390irq(&s390int, &s390irq))
3797                         return -EINVAL;
3798                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
3799         }
3800         }
3801         return -ENOIOCTLCMD;
3802 }
3803
3804 long kvm_arch_vcpu_ioctl(struct file *filp,
3805                          unsigned int ioctl, unsigned long arg)
3806 {
3807         struct kvm_vcpu *vcpu = filp->private_data;
3808         void __user *argp = (void __user *)arg;
3809         int idx;
3810         long r;
3811
3812         vcpu_load(vcpu);
3813
3814         switch (ioctl) {
3815         case KVM_S390_STORE_STATUS:
3816                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3817                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3818                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3819                 break;
3820         case KVM_S390_SET_INITIAL_PSW: {
3821                 psw_t psw;
3822
3823                 r = -EFAULT;
3824                 if (copy_from_user(&psw, argp, sizeof(psw)))
3825                         break;
3826                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3827                 break;
3828         }
3829         case KVM_S390_INITIAL_RESET:
3830                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3831                 break;
3832         case KVM_SET_ONE_REG:
3833         case KVM_GET_ONE_REG: {
3834                 struct kvm_one_reg reg;
3835                 r = -EFAULT;
3836                 if (copy_from_user(&reg, argp, sizeof(reg)))
3837                         break;
3838                 if (ioctl == KVM_SET_ONE_REG)
3839                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3840                 else
3841                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3842                 break;
3843         }
3844 #ifdef CONFIG_KVM_S390_UCONTROL
3845         case KVM_S390_UCAS_MAP: {
3846                 struct kvm_s390_ucas_mapping ucasmap;
3847
3848                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3849                         r = -EFAULT;
3850                         break;
3851                 }
3852
3853                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3854                         r = -EINVAL;
3855                         break;
3856                 }
3857
3858                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3859                                      ucasmap.vcpu_addr, ucasmap.length);
3860                 break;
3861         }
3862         case KVM_S390_UCAS_UNMAP: {
3863                 struct kvm_s390_ucas_mapping ucasmap;
3864
3865                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3866                         r = -EFAULT;
3867                         break;
3868                 }
3869
3870                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3871                         r = -EINVAL;
3872                         break;
3873                 }
3874
3875                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3876                         ucasmap.length);
3877                 break;
3878         }
3879 #endif
3880         case KVM_S390_VCPU_FAULT: {
3881                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3882                 break;
3883         }
3884         case KVM_ENABLE_CAP:
3885         {
3886                 struct kvm_enable_cap cap;
3887                 r = -EFAULT;
3888                 if (copy_from_user(&cap, argp, sizeof(cap)))
3889                         break;
3890                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3891                 break;
3892         }
3893         case KVM_S390_MEM_OP: {
3894                 struct kvm_s390_mem_op mem_op;
3895
3896                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3897                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3898                 else
3899                         r = -EFAULT;
3900                 break;
3901         }
3902         case KVM_S390_SET_IRQ_STATE: {
3903                 struct kvm_s390_irq_state irq_state;
3904
3905                 r = -EFAULT;
3906                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3907                         break;
3908                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3909                     irq_state.len == 0 ||
3910                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3911                         r = -EINVAL;
3912                         break;
3913                 }
3914                 /* do not use irq_state.flags, it will break old QEMUs */
3915                 r = kvm_s390_set_irq_state(vcpu,
3916                                            (void __user *) irq_state.buf,
3917                                            irq_state.len);
3918                 break;
3919         }
3920         case KVM_S390_GET_IRQ_STATE: {
3921                 struct kvm_s390_irq_state irq_state;
3922
3923                 r = -EFAULT;
3924                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3925                         break;
3926                 if (irq_state.len == 0) {
3927                         r = -EINVAL;
3928                         break;
3929                 }
3930                 /* do not use irq_state.flags, it will break old QEMUs */
3931                 r = kvm_s390_get_irq_state(vcpu,
3932                                            (__u8 __user *)  irq_state.buf,
3933                                            irq_state.len);
3934                 break;
3935         }
3936         default:
3937                 r = -ENOTTY;
3938         }
3939
3940         vcpu_put(vcpu);
3941         return r;
3942 }
3943
3944 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3945 {
3946 #ifdef CONFIG_KVM_S390_UCONTROL
3947         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3948                  && (kvm_is_ucontrol(vcpu->kvm))) {
3949                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3950                 get_page(vmf->page);
3951                 return 0;
3952         }
3953 #endif
3954         return VM_FAULT_SIGBUS;
3955 }
3956
3957 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3958                             unsigned long npages)
3959 {
3960         return 0;
3961 }
3962
3963 /* Section: memory related */
3964 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3965                                    struct kvm_memory_slot *memslot,
3966                                    const struct kvm_userspace_memory_region *mem,
3967                                    enum kvm_mr_change change)
3968 {
3969         /* A few sanity checks. We can have memory slots which have to be
3970            located/ended at a segment boundary (1MB). The memory in userland is
3971            ok to be fragmented into various different vmas. It is okay to mmap()
3972            and munmap() stuff in this slot after doing this call at any time */
3973
3974         if (mem->userspace_addr & 0xffffful)
3975                 return -EINVAL;
3976
3977         if (mem->memory_size & 0xffffful)
3978                 return -EINVAL;
3979
3980         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3981                 return -EINVAL;
3982
3983         return 0;
3984 }
3985
3986 void kvm_arch_commit_memory_region(struct kvm *kvm,
3987                                 const struct kvm_userspace_memory_region *mem,
3988                                 const struct kvm_memory_slot *old,
3989                                 const struct kvm_memory_slot *new,
3990                                 enum kvm_mr_change change)
3991 {
3992         int rc;
3993
3994         /* If the basics of the memslot do not change, we do not want
3995          * to update the gmap. Every update causes several unnecessary
3996          * segment translation exceptions. This is usually handled just
3997          * fine by the normal fault handler + gmap, but it will also
3998          * cause faults on the prefix page of running guest CPUs.
3999          */
4000         if (old->userspace_addr == mem->userspace_addr &&
4001             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4002             old->npages * PAGE_SIZE == mem->memory_size)
4003                 return;
4004
4005         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4006                 mem->guest_phys_addr, mem->memory_size);
4007         if (rc)
4008                 pr_warn("failed to commit memory region\n");
4009         return;
4010 }
4011
4012 static inline unsigned long nonhyp_mask(int i)
4013 {
4014         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4015
4016         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4017 }
4018
4019 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4020 {
4021         vcpu->valid_wakeup = false;
4022 }
4023
4024 static int __init kvm_s390_init(void)
4025 {
4026         int i;
4027
4028         if (!sclp.has_sief2) {
4029                 pr_info("SIE not available\n");
4030                 return -ENODEV;
4031         }
4032
4033         for (i = 0; i < 16; i++)
4034                 kvm_s390_fac_list_mask[i] |=
4035                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4036
4037         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4038 }
4039
4040 static void __exit kvm_s390_exit(void)
4041 {
4042         kvm_exit();
4043 }
4044
4045 module_init(kvm_s390_init);
4046 module_exit(kvm_s390_exit);
4047
4048 /*
4049  * Enable autoloading of the kvm module.
4050  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4051  * since x86 takes a different approach.
4052  */
4053 #include <linux/miscdevice.h>
4054 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4055 MODULE_ALIAS("devname:kvm");