arch/s390/kvm/kvm-s390.c

   1 /*
   2  * hosting zSeries kernel virtual machines
   3  *
   4  * Copyright IBM Corp. 2008, 2009
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License (version 2 only)
   8  * as published by the Free Software Foundation.
   9  *
  10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  11  *               Christian Borntraeger <borntraeger@de.ibm.com>
  12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  14  *               Jason J. Herne <jjherne@us.ibm.com>
  15  */
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/module.h>
  25 #include <linux/random.h>
  26 #include <linux/slab.h>
  27 #include <linux/timer.h>
  28 #include <linux/vmalloc.h>
  29 #include <asm/asm-offsets.h>
  30 #include <asm/lowcore.h>
  31 #include <asm/etr.h>
  32 #include <asm/pgtable.h>
  33 #include <asm/nmi.h>
  34 #include <asm/switch_to.h>
  35 #include <asm/isc.h>
  36 #include <asm/sclp.h>
  37 #include "kvm-s390.h"
  38 #include "gaccess.h"
  39
  40 #define KMSG_COMPONENT "kvm-s390"
  41 #undef pr_fmt
  42 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  43
  44 #define CREATE_TRACE_POINTS
  45 #include "trace.h"
  46 #include "trace-s390.h"
  47
  48 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  49 #define LOCAL_IRQS 32
  50 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  51                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  52
  53 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  54
  55 struct kvm_stats_debugfs_item debugfs_entries[] = {
  56         { "userspace_handled", VCPU_STAT(exit_userspace) },
  57         { "exit_null", VCPU_STAT(exit_null) },
  58         { "exit_validity", VCPU_STAT(exit_validity) },
  59         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  60         { "exit_external_request", VCPU_STAT(exit_external_request) },
  61         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  62         { "exit_instruction", VCPU_STAT(exit_instruction) },
  63         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  64         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  65         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  66         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  67         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  68         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  69         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  70         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  71         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  72         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  73         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  74         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
  75         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  76         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  77         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  78         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
  79         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  80         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
  81         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
  82         { "instruction_spx", VCPU_STAT(instruction_spx) },
  83         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
  84         { "instruction_stap", VCPU_STAT(instruction_stap) },
  85         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
  86         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
  87         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
  88         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
  89         { "instruction_essa", VCPU_STAT(instruction_essa) },
  90         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
  91         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
  92         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
  93         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
  94         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
  95         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
  96         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
  97         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
  98         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
  99         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 100         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 101         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 102         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 103         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 104         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 105         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 106         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 107         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 108         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 109         { "diagnose_10", VCPU_STAT(diagnose_10) },
 110         { "diagnose_44", VCPU_STAT(diagnose_44) },
 111         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
 112         { "diagnose_258", VCPU_STAT(diagnose_258) },
 113         { "diagnose_308", VCPU_STAT(diagnose_308) },
 114         { "diagnose_500", VCPU_STAT(diagnose_500) },
 115         { NULL }
 116 };
 117
 118 /* upper facilities limit for kvm */
 119 unsigned long kvm_s390_fac_list_mask[] = {
 120         0xffe6fffbfcfdfc40UL,
 121         0x005e800000000000UL,
 122 };
 123
 124 unsigned long kvm_s390_fac_list_mask_size(void)
 125 {
 126         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
 127         return ARRAY_SIZE(kvm_s390_fac_list_mask);
 128 }
 129
 130 static struct gmap_notifier gmap_notifier;
 131 debug_info_t *kvm_s390_dbf;
 132
 133 /* Section: not file related */
 134 int kvm_arch_hardware_enable(void)
 135 {
 136         /* every s390 is virtualization enabled ;-) */
 137         return 0;
 138 }
 139
 140 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
 141
 142 /*
 143  * This callback is executed during stop_machine(). All CPUs are therefore
 144  * temporarily stopped. In order not to change guest behavior, we have to
 145  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 146  * so a CPU won't be stopped while calculating with the epoch.
 147  */
 148 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 149                           void *v)
 150 {
 151         struct kvm *kvm;
 152         struct kvm_vcpu *vcpu;
 153         int i;
 154         unsigned long long *delta = v;
 155
 156         list_for_each_entry(kvm, &vm_list, vm_list) {
 157                 kvm->arch.epoch -= *delta;
 158                 kvm_for_each_vcpu(i, vcpu, kvm) {
 159                         vcpu->arch.sie_block->epoch -= *delta;
 160                 }
 161         }
 162         return NOTIFY_OK;
 163 }
 164
 165 static struct notifier_block kvm_clock_notifier = {
 166         .notifier_call = kvm_clock_sync,
 167 };
 168
 169 int kvm_arch_hardware_setup(void)
 170 {
 171         gmap_notifier.notifier_call = kvm_gmap_notifier;
 172         gmap_register_ipte_notifier(&gmap_notifier);
 173         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 174                                        &kvm_clock_notifier);
 175         return 0;
 176 }
 177
 178 void kvm_arch_hardware_unsetup(void)
 179 {
 180         gmap_unregister_ipte_notifier(&gmap_notifier);
 181         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 182                                          &kvm_clock_notifier);
 183 }
 184
 185 int kvm_arch_init(void *opaque)
 186 {
 187         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 188         if (!kvm_s390_dbf)
 189                 return -ENOMEM;
 190
 191         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 192                 debug_unregister(kvm_s390_dbf);
 193                 return -ENOMEM;
 194         }
 195
 196         /* Register floating interrupt controller interface. */
 197         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 198 }
 199
 200 void kvm_arch_exit(void)
 201 {
 202         debug_unregister(kvm_s390_dbf);
 203 }
 204
 205 /* Section: device related */
 206 long kvm_arch_dev_ioctl(struct file *filp,
 207                         unsigned int ioctl, unsigned long arg)
 208 {
 209         if (ioctl == KVM_S390_ENABLE_SIE)
 210                 return s390_enable_sie();
 211         return -EINVAL;
 212 }
 213
 214 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 215 {
 216         int r;
 217
 218         switch (ext) {
 219         case KVM_CAP_S390_PSW:
 220         case KVM_CAP_S390_GMAP:
 221         case KVM_CAP_SYNC_MMU:
 222 #ifdef CONFIG_KVM_S390_UCONTROL
 223         case KVM_CAP_S390_UCONTROL:
 224 #endif
 225         case KVM_CAP_ASYNC_PF:
 226         case KVM_CAP_SYNC_REGS:
 227         case KVM_CAP_ONE_REG:
 228         case KVM_CAP_ENABLE_CAP:
 229         case KVM_CAP_S390_CSS_SUPPORT:
 230         case KVM_CAP_IOEVENTFD:
 231         case KVM_CAP_DEVICE_CTRL:
 232         case KVM_CAP_ENABLE_CAP_VM:
 233         case KVM_CAP_S390_IRQCHIP:
 234         case KVM_CAP_VM_ATTRIBUTES:
 235         case KVM_CAP_MP_STATE:
 236         case KVM_CAP_S390_INJECT_IRQ:
 237         case KVM_CAP_S390_USER_SIGP:
 238         case KVM_CAP_S390_USER_STSI:
 239         case KVM_CAP_S390_SKEYS:
 240         case KVM_CAP_S390_IRQ_STATE:
 241                 r = 1;
 242                 break;
 243         case KVM_CAP_S390_MEM_OP:
 244                 r = MEM_OP_MAX_SIZE;
 245                 break;
 246         case KVM_CAP_NR_VCPUS:
 247         case KVM_CAP_MAX_VCPUS:
 248                 r = KVM_MAX_VCPUS;
 249                 break;
 250         case KVM_CAP_NR_MEMSLOTS:
 251                 r = KVM_USER_MEM_SLOTS;
 252                 break;
 253         case KVM_CAP_S390_COW:
 254                 r = MACHINE_HAS_ESOP;
 255                 break;
 256         case KVM_CAP_S390_VECTOR_REGISTERS:
 257                 r = MACHINE_HAS_VX;
 258                 break;
 259         default:
 260                 r = 0;
 261         }
 262         return r;
 263 }
 264
 265 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 266                                         struct kvm_memory_slot *memslot)
 267 {
 268         gfn_t cur_gfn, last_gfn;
 269         unsigned long address;
 270         struct gmap *gmap = kvm->arch.gmap;
 271
 272         down_read(&gmap->mm->mmap_sem);
 273         /* Loop over all guest pages */
 274         last_gfn = memslot->base_gfn + memslot->npages;
 275         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 276                 address = gfn_to_hva_memslot(memslot, cur_gfn);
 277
 278                 if (gmap_test_and_clear_dirty(address, gmap))
 279                         mark_page_dirty(kvm, cur_gfn);
 280         }
 281         up_read(&gmap->mm->mmap_sem);
 282 }
 283
 284 /* Section: vm related */
 285 /*
 286  * Get (and clear) the dirty memory log for a memory slot.
 287  */
 288 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 289                                struct kvm_dirty_log *log)
 290 {
 291         int r;
 292         unsigned long n;
 293         struct kvm_memslots *slots;
 294         struct kvm_memory_slot *memslot;
 295         int is_dirty = 0;
 296
 297         mutex_lock(&kvm->slots_lock);
 298
 299         r = -EINVAL;
 300         if (log->slot >= KVM_USER_MEM_SLOTS)
 301                 goto out;
 302
 303         slots = kvm_memslots(kvm);
 304         memslot = id_to_memslot(slots, log->slot);
 305         r = -ENOENT;
 306         if (!memslot->dirty_bitmap)
 307                 goto out;
 308
 309         kvm_s390_sync_dirty_log(kvm, memslot);
 310         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 311         if (r)
 312                 goto out;
 313
 314         /* Clear the dirty log */
 315         if (is_dirty) {
 316                 n = kvm_dirty_bitmap_bytes(memslot);
 317                 memset(memslot->dirty_bitmap, 0, n);
 318         }
 319         r = 0;
 320 out:
 321         mutex_unlock(&kvm->slots_lock);
 322         return r;
 323 }
 324
 325 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 326 {
 327         int r;
 328
 329         if (cap->flags)
 330                 return -EINVAL;
 331
 332         switch (cap->cap) {
 333         case KVM_CAP_S390_IRQCHIP:
 334                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 335                 kvm->arch.use_irqchip = 1;
 336                 r = 0;
 337                 break;
 338         case KVM_CAP_S390_USER_SIGP:
 339                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 340                 kvm->arch.user_sigp = 1;
 341                 r = 0;
 342                 break;
 343         case KVM_CAP_S390_VECTOR_REGISTERS:
 344                 if (MACHINE_HAS_VX) {
 345                         set_kvm_facility(kvm->arch.model.fac->mask, 129);
 346                         set_kvm_facility(kvm->arch.model.fac->list, 129);
 347                         r = 0;
 348                 } else
 349                         r = -EINVAL;
 350                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 351                          r ? "(not available)" : "(success)");
 352                 break;
 353         case KVM_CAP_S390_USER_STSI:
 354                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 355                 kvm->arch.user_stsi = 1;
 356                 r = 0;
 357                 break;
 358         default:
 359                 r = -EINVAL;
 360                 break;
 361         }
 362         return r;
 363 }
 364
 365 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 366 {
 367         int ret;
 368
 369         switch (attr->attr) {
 370         case KVM_S390_VM_MEM_LIMIT_SIZE:
 371                 ret = 0;
 372                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 373                          kvm->arch.gmap->asce_end);
 374                 if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
 375                         ret = -EFAULT;
 376                 break;
 377         default:
 378                 ret = -ENXIO;
 379                 break;
 380         }
 381         return ret;
 382 }
 383
 384 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 385 {
 386         int ret;
 387         unsigned int idx;
 388         switch (attr->attr) {
 389         case KVM_S390_VM_MEM_ENABLE_CMMA:
 390                 /* enable CMMA only for z10 and later (EDAT_1) */
 391                 ret = -EINVAL;
 392                 if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
 393                         break;
 394
 395                 ret = -EBUSY;
 396                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 397                 mutex_lock(&kvm->lock);
 398                 if (atomic_read(&kvm->online_vcpus) == 0) {
 399                         kvm->arch.use_cmma = 1;
 400                         ret = 0;
 401                 }
 402                 mutex_unlock(&kvm->lock);
 403                 break;
 404         case KVM_S390_VM_MEM_CLR_CMMA:
 405                 ret = -EINVAL;
 406                 if (!kvm->arch.use_cmma)
 407                         break;
 408
 409                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 410                 mutex_lock(&kvm->lock);
 411                 idx = srcu_read_lock(&kvm->srcu);
 412                 s390_reset_cmma(kvm->arch.gmap->mm);
 413                 srcu_read_unlock(&kvm->srcu, idx);
 414                 mutex_unlock(&kvm->lock);
 415                 ret = 0;
 416                 break;
 417         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 418                 unsigned long new_limit;
 419
 420                 if (kvm_is_ucontrol(kvm))
 421                         return -EINVAL;
 422
 423                 if (get_user(new_limit, (u64 __user *)attr->addr))
 424                         return -EFAULT;
 425
 426                 if (new_limit > kvm->arch.gmap->asce_end)
 427                         return -E2BIG;
 428
 429                 ret = -EBUSY;
 430                 mutex_lock(&kvm->lock);
 431                 if (atomic_read(&kvm->online_vcpus) == 0) {
 432                         /* gmap_alloc will round the limit up */
 433                         struct gmap *new = gmap_alloc(current->mm, new_limit);
 434
 435                         if (!new) {
 436                                 ret = -ENOMEM;
 437                         } else {
 438                                 gmap_free(kvm->arch.gmap);
 439                                 new->private = kvm;
 440                                 kvm->arch.gmap = new;
 441                                 ret = 0;
 442                         }
 443                 }
 444                 mutex_unlock(&kvm->lock);
 445                 VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit);
 446                 break;
 447         }
 448         default:
 449                 ret = -ENXIO;
 450                 break;
 451         }
 452         return ret;
 453 }
 454
 455 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 456
 457 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 458 {
 459         struct kvm_vcpu *vcpu;
 460         int i;
 461
 462         if (!test_kvm_facility(kvm, 76))
 463                 return -EINVAL;
 464
 465         mutex_lock(&kvm->lock);
 466         switch (attr->attr) {
 467         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 468                 get_random_bytes(
 469                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 470                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 471                 kvm->arch.crypto.aes_kw = 1;
 472                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 473                 break;
 474         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 475                 get_random_bytes(
 476                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 477                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 478                 kvm->arch.crypto.dea_kw = 1;
 479                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 480                 break;
 481         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 482                 kvm->arch.crypto.aes_kw = 0;
 483                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 484                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 485                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 486                 break;
 487         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 488                 kvm->arch.crypto.dea_kw = 0;
 489                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 490                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 491                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 492                 break;
 493         default:
 494                 mutex_unlock(&kvm->lock);
 495                 return -ENXIO;
 496         }
 497
 498         kvm_for_each_vcpu(i, vcpu, kvm) {
 499                 kvm_s390_vcpu_crypto_setup(vcpu);
 500                 exit_sie(vcpu);
 501         }
 502         mutex_unlock(&kvm->lock);
 503         return 0;
 504 }
 505
 506 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 507 {
 508         u8 gtod_high;
 509
 510         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 511                                            sizeof(gtod_high)))
 512                 return -EFAULT;
 513
 514         if (gtod_high != 0)
 515                 return -EINVAL;
 516         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x\n", gtod_high);
 517
 518         return 0;
 519 }
 520
 521 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 522 {
 523         struct kvm_vcpu *cur_vcpu;
 524         unsigned int vcpu_idx;
 525         u64 host_tod, gtod;
 526         int r;
 527
 528         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 529                 return -EFAULT;
 530
 531         r = store_tod_clock(&host_tod);
 532         if (r)
 533                 return r;
 534
 535         mutex_lock(&kvm->lock);
 536         preempt_disable();
 537         kvm->arch.epoch = gtod - host_tod;
 538         kvm_s390_vcpu_block_all(kvm);
 539         kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm)
 540                 cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch;
 541         kvm_s390_vcpu_unblock_all(kvm);
 542         preempt_enable();
 543         mutex_unlock(&kvm->lock);
 544         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx\n", gtod);
 545         return 0;
 546 }
 547
 548 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 549 {
 550         int ret;
 551
 552         if (attr->flags)
 553                 return -EINVAL;
 554
 555         switch (attr->attr) {
 556         case KVM_S390_VM_TOD_HIGH:
 557                 ret = kvm_s390_set_tod_high(kvm, attr);
 558                 break;
 559         case KVM_S390_VM_TOD_LOW:
 560                 ret = kvm_s390_set_tod_low(kvm, attr);
 561                 break;
 562         default:
 563                 ret = -ENXIO;
 564                 break;
 565         }
 566         return ret;
 567 }
 568
 569 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 570 {
 571         u8 gtod_high = 0;
 572
 573         if (copy_to_user((void __user *)attr->addr, &gtod_high,
 574                                          sizeof(gtod_high)))
 575                 return -EFAULT;
 576         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x\n", gtod_high);
 577
 578         return 0;
 579 }
 580
 581 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 582 {
 583         u64 host_tod, gtod;
 584         int r;
 585
 586         r = store_tod_clock(&host_tod);
 587         if (r)
 588                 return r;
 589
 590         preempt_disable();
 591         gtod = host_tod + kvm->arch.epoch;
 592         preempt_enable();
 593         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 594                 return -EFAULT;
 595         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx\n", gtod);
 596
 597         return 0;
 598 }
 599
 600 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 601 {
 602         int ret;
 603
 604         if (attr->flags)
 605                 return -EINVAL;
 606
 607         switch (attr->attr) {
 608         case KVM_S390_VM_TOD_HIGH:
 609                 ret = kvm_s390_get_tod_high(kvm, attr);
 610                 break;
 611         case KVM_S390_VM_TOD_LOW:
 612                 ret = kvm_s390_get_tod_low(kvm, attr);
 613                 break;
 614         default:
 615                 ret = -ENXIO;
 616                 break;
 617         }
 618         return ret;
 619 }
 620
 621 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 622 {
 623         struct kvm_s390_vm_cpu_processor *proc;
 624         int ret = 0;
 625
 626         mutex_lock(&kvm->lock);
 627         if (atomic_read(&kvm->online_vcpus)) {
 628                 ret = -EBUSY;
 629                 goto out;
 630         }
 631         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 632         if (!proc) {
 633                 ret = -ENOMEM;
 634                 goto out;
 635         }
 636         if (!copy_from_user(proc, (void __user *)attr->addr,
 637                             sizeof(*proc))) {
 638                 memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
 639                        sizeof(struct cpuid));
 640                 kvm->arch.model.ibc = proc->ibc;
 641                 memcpy(kvm->arch.model.fac->list, proc->fac_list,
 642                        S390_ARCH_FAC_LIST_SIZE_BYTE);
 643         } else
 644                 ret = -EFAULT;
 645         kfree(proc);
 646 out:
 647         mutex_unlock(&kvm->lock);
 648         return ret;
 649 }
 650
 651 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 652 {
 653         int ret = -ENXIO;
 654
 655         switch (attr->attr) {
 656         case KVM_S390_VM_CPU_PROCESSOR:
 657                 ret = kvm_s390_set_processor(kvm, attr);
 658                 break;
 659         }
 660         return ret;
 661 }
 662
 663 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 664 {
 665         struct kvm_s390_vm_cpu_processor *proc;
 666         int ret = 0;
 667
 668         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 669         if (!proc) {
 670                 ret = -ENOMEM;
 671                 goto out;
 672         }
 673         memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
 674         proc->ibc = kvm->arch.model.ibc;
 675         memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
 676         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
 677                 ret = -EFAULT;
 678         kfree(proc);
 679 out:
 680         return ret;
 681 }
 682
 683 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
 684 {
 685         struct kvm_s390_vm_cpu_machine *mach;
 686         int ret = 0;
 687
 688         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
 689         if (!mach) {
 690                 ret = -ENOMEM;
 691                 goto out;
 692         }
 693         get_cpu_id((struct cpuid *) &mach->cpuid);
 694         mach->ibc = sclp.ibc;
 695         memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
 696                S390_ARCH_FAC_LIST_SIZE_BYTE);
 697         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
 698                S390_ARCH_FAC_LIST_SIZE_BYTE);
 699         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
 700                 ret = -EFAULT;
 701         kfree(mach);
 702 out:
 703         return ret;
 704 }
 705
 706 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 707 {
 708         int ret = -ENXIO;
 709
 710         switch (attr->attr) {
 711         case KVM_S390_VM_CPU_PROCESSOR:
 712                 ret = kvm_s390_get_processor(kvm, attr);
 713                 break;
 714         case KVM_S390_VM_CPU_MACHINE:
 715                 ret = kvm_s390_get_machine(kvm, attr);
 716                 break;
 717         }
 718         return ret;
 719 }
 720
 721 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 722 {
 723         int ret;
 724
 725         switch (attr->group) {
 726         case KVM_S390_VM_MEM_CTRL:
 727                 ret = kvm_s390_set_mem_control(kvm, attr);
 728                 break;
 729         case KVM_S390_VM_TOD:
 730                 ret = kvm_s390_set_tod(kvm, attr);
 731                 break;
 732         case KVM_S390_VM_CPU_MODEL:
 733                 ret = kvm_s390_set_cpu_model(kvm, attr);
 734                 break;
 735         case KVM_S390_VM_CRYPTO:
 736                 ret = kvm_s390_vm_set_crypto(kvm, attr);
 737                 break;
 738         default:
 739                 ret = -ENXIO;
 740                 break;
 741         }
 742
 743         return ret;
 744 }
 745
 746 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 747 {
 748         int ret;
 749
 750         switch (attr->group) {
 751         case KVM_S390_VM_MEM_CTRL:
 752                 ret = kvm_s390_get_mem_control(kvm, attr);
 753                 break;
 754         case KVM_S390_VM_TOD:
 755                 ret = kvm_s390_get_tod(kvm, attr);
 756                 break;
 757         case KVM_S390_VM_CPU_MODEL:
 758                 ret = kvm_s390_get_cpu_model(kvm, attr);
 759                 break;
 760         default:
 761                 ret = -ENXIO;
 762                 break;
 763         }
 764
 765         return ret;
 766 }
 767
 768 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 769 {
 770         int ret;
 771
 772         switch (attr->group) {
 773         case KVM_S390_VM_MEM_CTRL:
 774                 switch (attr->attr) {
 775                 case KVM_S390_VM_MEM_ENABLE_CMMA:
 776                 case KVM_S390_VM_MEM_CLR_CMMA:
 777                 case KVM_S390_VM_MEM_LIMIT_SIZE:
 778                         ret = 0;
 779                         break;
 780                 default:
 781                         ret = -ENXIO;
 782                         break;
 783                 }
 784                 break;
 785         case KVM_S390_VM_TOD:
 786                 switch (attr->attr) {
 787                 case KVM_S390_VM_TOD_LOW:
 788                 case KVM_S390_VM_TOD_HIGH:
 789                         ret = 0;
 790                         break;
 791                 default:
 792                         ret = -ENXIO;
 793                         break;
 794                 }
 795                 break;
 796         case KVM_S390_VM_CPU_MODEL:
 797                 switch (attr->attr) {
 798                 case KVM_S390_VM_CPU_PROCESSOR:
 799                 case KVM_S390_VM_CPU_MACHINE:
 800                         ret = 0;
 801                         break;
 802                 default:
 803                         ret = -ENXIO;
 804                         break;
 805                 }
 806                 break;
 807         case KVM_S390_VM_CRYPTO:
 808                 switch (attr->attr) {
 809                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 810                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 811                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 812                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 813                         ret = 0;
 814                         break;
 815                 default:
 816                         ret = -ENXIO;
 817                         break;
 818                 }
 819                 break;
 820         default:
 821                 ret = -ENXIO;
 822                 break;
 823         }
 824
 825         return ret;
 826 }
 827
 828 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 829 {
 830         uint8_t *keys;
 831         uint64_t hva;
 832         unsigned long curkey;
 833         int i, r = 0;
 834
 835         if (args->flags != 0)
 836                 return -EINVAL;
 837
 838         /* Is this guest using storage keys? */
 839         if (!mm_use_skey(current->mm))
 840                 return KVM_S390_GET_SKEYS_NONE;
 841
 842         /* Enforce sane limit on memory allocation */
 843         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
 844                 return -EINVAL;
 845
 846         keys = kmalloc_array(args->count, sizeof(uint8_t),
 847                              GFP_KERNEL | __GFP_NOWARN);
 848         if (!keys)
 849                 keys = vmalloc(sizeof(uint8_t) * args->count);
 850         if (!keys)
 851                 return -ENOMEM;
 852
 853         for (i = 0; i < args->count; i++) {
 854                 hva = gfn_to_hva(kvm, args->start_gfn + i);
 855                 if (kvm_is_error_hva(hva)) {
 856                         r = -EFAULT;
 857                         goto out;
 858                 }
 859
 860                 curkey = get_guest_storage_key(current->mm, hva);
 861                 if (IS_ERR_VALUE(curkey)) {
 862                         r = curkey;
 863                         goto out;
 864                 }
 865                 keys[i] = curkey;
 866         }
 867
 868         r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
 869                          sizeof(uint8_t) * args->count);
 870         if (r)
 871                 r = -EFAULT;
 872 out:
 873         kvfree(keys);
 874         return r;
 875 }
 876
 877 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 878 {
 879         uint8_t *keys;
 880         uint64_t hva;
 881         int i, r = 0;
 882
 883         if (args->flags != 0)
 884                 return -EINVAL;
 885
 886         /* Enforce sane limit on memory allocation */
 887         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
 888                 return -EINVAL;
 889
 890         keys = kmalloc_array(args->count, sizeof(uint8_t),
 891                              GFP_KERNEL | __GFP_NOWARN);
 892         if (!keys)
 893                 keys = vmalloc(sizeof(uint8_t) * args->count);
 894         if (!keys)
 895                 return -ENOMEM;
 896
 897         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
 898                            sizeof(uint8_t) * args->count);
 899         if (r) {
 900                 r = -EFAULT;
 901                 goto out;
 902         }
 903
 904         /* Enable storage key handling for the guest */
 905         r = s390_enable_skey();
 906         if (r)
 907                 goto out;
 908
 909         for (i = 0; i < args->count; i++) {
 910                 hva = gfn_to_hva(kvm, args->start_gfn + i);
 911                 if (kvm_is_error_hva(hva)) {
 912                         r = -EFAULT;
 913                         goto out;
 914                 }
 915
 916                 /* Lowest order bit is reserved */
 917                 if (keys[i] & 0x01) {
 918                         r = -EINVAL;
 919                         goto out;
 920                 }
 921
 922                 r = set_guest_storage_key(current->mm, hva,
 923                                           (unsigned long)keys[i], 0);
 924                 if (r)
 925                         goto out;
 926         }
 927 out:
 928         kvfree(keys);
 929         return r;
 930 }
 931
 932 long kvm_arch_vm_ioctl(struct file *filp,
 933                        unsigned int ioctl, unsigned long arg)
 934 {
 935         struct kvm *kvm = filp->private_data;
 936         void __user *argp = (void __user *)arg;
 937         struct kvm_device_attr attr;
 938         int r;
 939
 940         switch (ioctl) {
 941         case KVM_S390_INTERRUPT: {
 942                 struct kvm_s390_interrupt s390int;
 943
 944                 r = -EFAULT;
 945                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
 946                         break;
 947                 r = kvm_s390_inject_vm(kvm, &s390int);
 948                 break;
 949         }
 950         case KVM_ENABLE_CAP: {
 951                 struct kvm_enable_cap cap;
 952                 r = -EFAULT;
 953                 if (copy_from_user(&cap, argp, sizeof(cap)))
 954                         break;
 955                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
 956                 break;
 957         }
 958         case KVM_CREATE_IRQCHIP: {
 959                 struct kvm_irq_routing_entry routing;
 960
 961                 r = -EINVAL;
 962                 if (kvm->arch.use_irqchip) {
 963                         /* Set up dummy routing. */
 964                         memset(&routing, 0, sizeof(routing));
 965                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
 966                 }
 967                 break;
 968         }
 969         case KVM_SET_DEVICE_ATTR: {
 970                 r = -EFAULT;
 971                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
 972                         break;
 973                 r = kvm_s390_vm_set_attr(kvm, &attr);
 974                 break;
 975         }
 976         case KVM_GET_DEVICE_ATTR: {
 977                 r = -EFAULT;
 978                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
 979                         break;
 980                 r = kvm_s390_vm_get_attr(kvm, &attr);
 981                 break;
 982         }
 983         case KVM_HAS_DEVICE_ATTR: {
 984                 r = -EFAULT;
 985                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
 986                         break;
 987                 r = kvm_s390_vm_has_attr(kvm, &attr);
 988                 break;
 989         }
 990         case KVM_S390_GET_SKEYS: {
 991                 struct kvm_s390_skeys args;
 992
 993                 r = -EFAULT;
 994                 if (copy_from_user(&args, argp,
 995                                    sizeof(struct kvm_s390_skeys)))
 996                         break;
 997                 r = kvm_s390_get_skeys(kvm, &args);
 998                 break;
 999         }
1000         case KVM_S390_SET_SKEYS: {
1001                 struct kvm_s390_skeys args;
1002
1003                 r = -EFAULT;
1004                 if (copy_from_user(&args, argp,
1005                                    sizeof(struct kvm_s390_skeys)))
1006                         break;
1007                 r = kvm_s390_set_skeys(kvm, &args);
1008                 break;
1009         }
1010         default:
1011                 r = -ENOTTY;
1012         }
1013
1014         return r;
1015 }
1016
1017 static int kvm_s390_query_ap_config(u8 *config)
1018 {
1019         u32 fcn_code = 0x04000000UL;
1020         u32 cc = 0;
1021
1022         memset(config, 0, 128);
1023         asm volatile(
1024                 "lgr 0,%1\n"
1025                 "lgr 2,%2\n"
1026                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1027                 "0: ipm %0\n"
1028                 "srl %0,28\n"
1029                 "1:\n"
1030                 EX_TABLE(0b, 1b)
1031                 : "+r" (cc)
1032                 : "r" (fcn_code), "r" (config)
1033                 : "cc", "0", "2", "memory"
1034         );
1035
1036         return cc;
1037 }
1038
1039 static int kvm_s390_apxa_installed(void)
1040 {
1041         u8 config[128];
1042         int cc;
1043
1044         if (test_facility(2) && test_facility(12)) {
1045                 cc = kvm_s390_query_ap_config(config);
1046
1047                 if (cc)
1048                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1049                 else
1050                         return config[0] & 0x40;
1051         }
1052
1053         return 0;
1054 }
1055
1056 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1057 {
1058         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1059
1060         if (kvm_s390_apxa_installed())
1061                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1062         else
1063                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1064 }
1065
1066 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
1067 {
1068         get_cpu_id(cpu_id);
1069         cpu_id->version = 0xff;
1070 }
1071
1072 static int kvm_s390_crypto_init(struct kvm *kvm)
1073 {
1074         if (!test_kvm_facility(kvm, 76))
1075                 return 0;
1076
1077         kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
1078                                          GFP_KERNEL | GFP_DMA);
1079         if (!kvm->arch.crypto.crycb)
1080                 return -ENOMEM;
1081
1082         kvm_s390_set_crycb_format(kvm);
1083
1084         /* Enable AES/DEA protected key functions by default */
1085         kvm->arch.crypto.aes_kw = 1;
1086         kvm->arch.crypto.dea_kw = 1;
1087         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1088                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1089         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1090                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1091
1092         return 0;
1093 }
1094
1095 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1096 {
1097         int i, rc;
1098         char debug_name[16];
1099         static unsigned long sca_offset;
1100
1101         rc = -EINVAL;
1102 #ifdef CONFIG_KVM_S390_UCONTROL
1103         if (type & ~KVM_VM_S390_UCONTROL)
1104                 goto out_err;
1105         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1106                 goto out_err;
1107 #else
1108         if (type)
1109                 goto out_err;
1110 #endif
1111
1112         rc = s390_enable_sie();
1113         if (rc)
1114                 goto out_err;
1115
1116         rc = -ENOMEM;
1117
1118         kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
1119         if (!kvm->arch.sca)
1120                 goto out_err;
1121         spin_lock(&kvm_lock);
1122         sca_offset = (sca_offset + 16) & 0x7f0;
1123         kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
1124         spin_unlock(&kvm_lock);
1125
1126         sprintf(debug_name, "kvm-%u", current->pid);
1127
1128         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1129         if (!kvm->arch.dbf)
1130                 goto out_err;
1131
1132         /*
1133          * The architectural maximum amount of facilities is 16 kbit. To store
1134          * this amount, 2 kbyte of memory is required. Thus we need a full
1135          * page to hold the guest facility list (arch.model.fac->list) and the
1136          * facility mask (arch.model.fac->mask). Its address size has to be
1137          * 31 bits and word aligned.
1138          */
1139         kvm->arch.model.fac =
1140                 (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1141         if (!kvm->arch.model.fac)
1142                 goto out_err;
1143
1144         /* Populate the facility mask initially. */
1145         memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
1146                S390_ARCH_FAC_LIST_SIZE_BYTE);
1147         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1148                 if (i < kvm_s390_fac_list_mask_size())
1149                         kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
1150                 else
1151                         kvm->arch.model.fac->mask[i] = 0UL;
1152         }
1153
1154         /* Populate the facility list initially. */
1155         memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
1156                S390_ARCH_FAC_LIST_SIZE_BYTE);
1157
1158         kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1159         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1160
1161         if (kvm_s390_crypto_init(kvm) < 0)
1162                 goto out_err;
1163
1164         spin_lock_init(&kvm->arch.float_int.lock);
1165         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1166                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1167         init_waitqueue_head(&kvm->arch.ipte_wq);
1168         mutex_init(&kvm->arch.ipte_mutex);
1169
1170         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1171         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1172
1173         if (type & KVM_VM_S390_UCONTROL) {
1174                 kvm->arch.gmap = NULL;
1175         } else {
1176                 kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
1177                 if (!kvm->arch.gmap)
1178                         goto out_err;
1179                 kvm->arch.gmap->private = kvm;
1180                 kvm->arch.gmap->pfault_enabled = 0;
1181         }
1182
1183         kvm->arch.css_support = 0;
1184         kvm->arch.use_irqchip = 0;
1185         kvm->arch.epoch = 0;
1186
1187         spin_lock_init(&kvm->arch.start_stop_lock);
1188         KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
1189
1190         return 0;
1191 out_err:
1192         kfree(kvm->arch.crypto.crycb);
1193         free_page((unsigned long)kvm->arch.model.fac);
1194         debug_unregister(kvm->arch.dbf);
1195         free_page((unsigned long)(kvm->arch.sca));
1196         KVM_EVENT(3, "creation of vm failed: %d", rc);
1197         return rc;
1198 }
1199
1200 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1201 {
1202         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1203         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1204         kvm_s390_clear_local_irqs(vcpu);
1205         kvm_clear_async_pf_completion_queue(vcpu);
1206         if (!kvm_is_ucontrol(vcpu->kvm)) {
1207                 clear_bit(63 - vcpu->vcpu_id,
1208                           (unsigned long *) &vcpu->kvm->arch.sca->mcn);
1209                 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
1210                     (__u64) vcpu->arch.sie_block)
1211                         vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
1212         }
1213         smp_mb();
1214
1215         if (kvm_is_ucontrol(vcpu->kvm))
1216                 gmap_free(vcpu->arch.gmap);
1217
1218         if (vcpu->kvm->arch.use_cmma)
1219                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1220         free_page((unsigned long)(vcpu->arch.sie_block));
1221
1222         kvm_vcpu_uninit(vcpu);
1223         kmem_cache_free(kvm_vcpu_cache, vcpu);
1224 }
1225
1226 static void kvm_free_vcpus(struct kvm *kvm)
1227 {
1228         unsigned int i;
1229         struct kvm_vcpu *vcpu;
1230
1231         kvm_for_each_vcpu(i, vcpu, kvm)
1232                 kvm_arch_vcpu_destroy(vcpu);
1233
1234         mutex_lock(&kvm->lock);
1235         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1236                 kvm->vcpus[i] = NULL;
1237
1238         atomic_set(&kvm->online_vcpus, 0);
1239         mutex_unlock(&kvm->lock);
1240 }
1241
1242 void kvm_arch_destroy_vm(struct kvm *kvm)
1243 {
1244         kvm_free_vcpus(kvm);
1245         free_page((unsigned long)kvm->arch.model.fac);
1246         free_page((unsigned long)(kvm->arch.sca));
1247         debug_unregister(kvm->arch.dbf);
1248         kfree(kvm->arch.crypto.crycb);
1249         if (!kvm_is_ucontrol(kvm))
1250                 gmap_free(kvm->arch.gmap);
1251         kvm_s390_destroy_adapters(kvm);
1252         kvm_s390_clear_float_irqs(kvm);
1253         KVM_EVENT(3, "vm 0x%p destroyed", kvm);
1254 }
1255
1256 /* Section: vcpu related */
1257 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1258 {
1259         vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1260         if (!vcpu->arch.gmap)
1261                 return -ENOMEM;
1262         vcpu->arch.gmap->private = vcpu->kvm;
1263
1264         return 0;
1265 }
1266
1267 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1268 {
1269         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1270         kvm_clear_async_pf_completion_queue(vcpu);
1271         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1272                                     KVM_SYNC_GPRS |
1273                                     KVM_SYNC_ACRS |
1274                                     KVM_SYNC_CRS |
1275                                     KVM_SYNC_ARCH0 |
1276                                     KVM_SYNC_PFAULT;
1277         if (test_kvm_facility(vcpu->kvm, 129))
1278                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1279
1280         if (kvm_is_ucontrol(vcpu->kvm))
1281                 return __kvm_ucontrol_vcpu_init(vcpu);
1282
1283         return 0;
1284 }
1285
1286 /*
1287  * Backs up the current FP/VX register save area on a particular
1288  * destination.  Used to switch between different register save
1289  * areas.
1290  */
1291 static inline void save_fpu_to(struct fpu *dst)
1292 {
1293         dst->fpc = current->thread.fpu.fpc;
1294         dst->flags = current->thread.fpu.flags;
1295         dst->regs = current->thread.fpu.regs;
1296 }
1297
1298 /*
1299  * Switches the FP/VX register save area from which to lazy
1300  * restore register contents.
1301  */
1302 static inline void load_fpu_from(struct fpu *from)
1303 {
1304         current->thread.fpu.fpc = from->fpc;
1305         current->thread.fpu.flags = from->flags;
1306         current->thread.fpu.regs = from->regs;
1307 }
1308
1309 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1310 {
1311         /* Save host register state */
1312         save_fpu_regs();
1313         save_fpu_to(&vcpu->arch.host_fpregs);
1314
1315         if (test_kvm_facility(vcpu->kvm, 129)) {
1316                 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1317                 current->thread.fpu.flags = FPU_USE_VX;
1318                 /*
1319                  * Use the register save area in the SIE-control block
1320                  * for register restore and save in kvm_arch_vcpu_put()
1321                  */
1322                 current->thread.fpu.vxrs =
1323                         (__vector128 *)&vcpu->run->s.regs.vrs;
1324                 /* Always enable the vector extension for KVM */
1325                 __ctl_set_vx();
1326         } else
1327                 load_fpu_from(&vcpu->arch.guest_fpregs);
1328
1329         if (test_fp_ctl(current->thread.fpu.fpc))
1330                 /* User space provided an invalid FPC, let's clear it */
1331                 current->thread.fpu.fpc = 0;
1332
1333         save_access_regs(vcpu->arch.host_acrs);
1334         restore_access_regs(vcpu->run->s.regs.acrs);
1335         gmap_enable(vcpu->arch.gmap);
1336         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1337 }
1338
1339 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1340 {
1341         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1342         gmap_disable(vcpu->arch.gmap);
1343
1344         save_fpu_regs();
1345
1346         if (test_kvm_facility(vcpu->kvm, 129))
1347                 /*
1348                  * kvm_arch_vcpu_load() set up the register save area to
1349                  * the &vcpu->run->s.regs.vrs and, thus, the vector registers
1350                  * are already saved.  Only the floating-point control must be
1351                  * copied.
1352                  */
1353                 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1354         else
1355                 save_fpu_to(&vcpu->arch.guest_fpregs);
1356         load_fpu_from(&vcpu->arch.host_fpregs);
1357
1358         save_access_regs(vcpu->run->s.regs.acrs);
1359         restore_access_regs(vcpu->arch.host_acrs);
1360 }
1361
1362 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1363 {
1364         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1365         vcpu->arch.sie_block->gpsw.mask = 0UL;
1366         vcpu->arch.sie_block->gpsw.addr = 0UL;
1367         kvm_s390_set_prefix(vcpu, 0);
1368         vcpu->arch.sie_block->cputm     = 0UL;
1369         vcpu->arch.sie_block->ckc       = 0UL;
1370         vcpu->arch.sie_block->todpr     = 0;
1371         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1372         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1373         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1374         vcpu->arch.guest_fpregs.fpc = 0;
1375         asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
1376         vcpu->arch.sie_block->gbea = 1;
1377         vcpu->arch.sie_block->pp = 0;
1378         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1379         kvm_clear_async_pf_completion_queue(vcpu);
1380         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1381                 kvm_s390_vcpu_stop(vcpu);
1382         kvm_s390_clear_local_irqs(vcpu);
1383 }
1384
1385 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1386 {
1387         mutex_lock(&vcpu->kvm->lock);
1388         preempt_disable();
1389         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1390         preempt_enable();
1391         mutex_unlock(&vcpu->kvm->lock);
1392         if (!kvm_is_ucontrol(vcpu->kvm))
1393                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1394 }
1395
1396 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1397 {
1398         if (!test_kvm_facility(vcpu->kvm, 76))
1399                 return;
1400
1401         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1402
1403         if (vcpu->kvm->arch.crypto.aes_kw)
1404                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1405         if (vcpu->kvm->arch.crypto.dea_kw)
1406                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1407
1408         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1409 }
1410
1411 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1412 {
1413         free_page(vcpu->arch.sie_block->cbrlo);
1414         vcpu->arch.sie_block->cbrlo = 0;
1415 }
1416
1417 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1418 {
1419         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1420         if (!vcpu->arch.sie_block->cbrlo)
1421                 return -ENOMEM;
1422
1423         vcpu->arch.sie_block->ecb2 |= 0x80;
1424         vcpu->arch.sie_block->ecb2 &= ~0x08;
1425         return 0;
1426 }
1427
1428 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1429 {
1430         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1431
1432         vcpu->arch.cpu_id = model->cpu_id;
1433         vcpu->arch.sie_block->ibc = model->ibc;
1434         vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
1435 }
1436
1437 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1438 {
1439         int rc = 0;
1440
1441         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1442                                                     CPUSTAT_SM |
1443                                                     CPUSTAT_STOPPED);
1444
1445         if (test_kvm_facility(vcpu->kvm, 78))
1446                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1447         else if (test_kvm_facility(vcpu->kvm, 8))
1448                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1449
1450         kvm_s390_vcpu_setup_model(vcpu);
1451
1452         vcpu->arch.sie_block->ecb   = 6;
1453         if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1454                 vcpu->arch.sie_block->ecb |= 0x10;
1455
1456         vcpu->arch.sie_block->ecb2  = 8;
1457         vcpu->arch.sie_block->eca   = 0xC1002000U;
1458         if (sclp.has_siif)
1459                 vcpu->arch.sie_block->eca |= 1;
1460         if (sclp.has_sigpif)
1461                 vcpu->arch.sie_block->eca |= 0x10000000U;
1462         if (test_kvm_facility(vcpu->kvm, 129)) {
1463                 vcpu->arch.sie_block->eca |= 0x00020000;
1464                 vcpu->arch.sie_block->ecd |= 0x20000000;
1465         }
1466         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1467
1468         if (vcpu->kvm->arch.use_cmma) {
1469                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1470                 if (rc)
1471                         return rc;
1472         }
1473         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1474         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1475
1476         kvm_s390_vcpu_crypto_setup(vcpu);
1477
1478         return rc;
1479 }
1480
1481 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1482                                       unsigned int id)
1483 {
1484         struct kvm_vcpu *vcpu;
1485         struct sie_page *sie_page;
1486         int rc = -EINVAL;
1487
1488         if (id >= KVM_MAX_VCPUS)
1489                 goto out;
1490
1491         rc = -ENOMEM;
1492
1493         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1494         if (!vcpu)
1495                 goto out;
1496
1497         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1498         if (!sie_page)
1499                 goto out_free_cpu;
1500
1501         vcpu->arch.sie_block = &sie_page->sie_block;
1502         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1503
1504         vcpu->arch.sie_block->icpua = id;
1505         if (!kvm_is_ucontrol(kvm)) {
1506                 if (!kvm->arch.sca) {
1507                         WARN_ON_ONCE(1);
1508                         goto out_free_cpu;
1509                 }
1510                 if (!kvm->arch.sca->cpu[id].sda)
1511                         kvm->arch.sca->cpu[id].sda =
1512                                 (__u64) vcpu->arch.sie_block;
1513                 vcpu->arch.sie_block->scaoh =
1514                         (__u32)(((__u64)kvm->arch.sca) >> 32);
1515                 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
1516                 set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
1517         }
1518
1519         spin_lock_init(&vcpu->arch.local_int.lock);
1520         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1521         vcpu->arch.local_int.wq = &vcpu->wq;
1522         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1523
1524         /*
1525          * Allocate a save area for floating-point registers.  If the vector
1526          * extension is available, register contents are saved in the SIE
1527          * control block.  The allocated save area is still required in
1528          * particular places, for example, in kvm_s390_vcpu_store_status().
1529          */
1530         vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
1531                                                GFP_KERNEL);
1532         if (!vcpu->arch.guest_fpregs.fprs) {
1533                 rc = -ENOMEM;
1534                 goto out_free_sie_block;
1535         }
1536
1537         rc = kvm_vcpu_init(vcpu, kvm, id);
1538         if (rc)
1539                 goto out_free_sie_block;
1540         VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
1541                  vcpu->arch.sie_block);
1542         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1543
1544         return vcpu;
1545 out_free_sie_block:
1546         free_page((unsigned long)(vcpu->arch.sie_block));
1547 out_free_cpu:
1548         kmem_cache_free(kvm_vcpu_cache, vcpu);
1549 out:
1550         return ERR_PTR(rc);
1551 }
1552
1553 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1554 {
1555         return kvm_s390_vcpu_has_irq(vcpu, 0);
1556 }
1557
1558 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1559 {
1560         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1561         exit_sie(vcpu);
1562 }
1563
1564 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1565 {
1566         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1567 }
1568
1569 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1570 {
1571         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1572         exit_sie(vcpu);
1573 }
1574
1575 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1576 {
1577         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1578 }
1579
1580 /*
1581  * Kick a guest cpu out of SIE and wait until SIE is not running.
1582  * If the CPU is not running (e.g. waiting as idle) the function will
1583  * return immediately. */
1584 void exit_sie(struct kvm_vcpu *vcpu)
1585 {
1586         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1587         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1588                 cpu_relax();
1589 }
1590
1591 /* Kick a guest cpu out of SIE to process a request synchronously */
1592 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1593 {
1594         kvm_make_request(req, vcpu);
1595         kvm_s390_vcpu_request(vcpu);
1596 }
1597
1598 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1599 {
1600         int i;
1601         struct kvm *kvm = gmap->private;
1602         struct kvm_vcpu *vcpu;
1603
1604         kvm_for_each_vcpu(i, vcpu, kvm) {
1605                 /* match against both prefix pages */
1606                 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1607                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1608                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1609                 }
1610         }
1611 }
1612
1613 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1614 {
1615         /* kvm common code refers to this, but never calls it */
1616         BUG();
1617         return 0;
1618 }
1619
1620 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1621                                            struct kvm_one_reg *reg)
1622 {
1623         int r = -EINVAL;
1624
1625         switch (reg->id) {
1626         case KVM_REG_S390_TODPR:
1627                 r = put_user(vcpu->arch.sie_block->todpr,
1628                              (u32 __user *)reg->addr);
1629                 break;
1630         case KVM_REG_S390_EPOCHDIFF:
1631                 r = put_user(vcpu->arch.sie_block->epoch,
1632                              (u64 __user *)reg->addr);
1633                 break;
1634         case KVM_REG_S390_CPU_TIMER:
1635                 r = put_user(vcpu->arch.sie_block->cputm,
1636                              (u64 __user *)reg->addr);
1637                 break;
1638         case KVM_REG_S390_CLOCK_COMP:
1639                 r = put_user(vcpu->arch.sie_block->ckc,
1640                              (u64 __user *)reg->addr);
1641                 break;
1642         case KVM_REG_S390_PFTOKEN:
1643                 r = put_user(vcpu->arch.pfault_token,
1644                              (u64 __user *)reg->addr);
1645                 break;
1646         case KVM_REG_S390_PFCOMPARE:
1647                 r = put_user(vcpu->arch.pfault_compare,
1648                              (u64 __user *)reg->addr);
1649                 break;
1650         case KVM_REG_S390_PFSELECT:
1651                 r = put_user(vcpu->arch.pfault_select,
1652                              (u64 __user *)reg->addr);
1653                 break;
1654         case KVM_REG_S390_PP:
1655                 r = put_user(vcpu->arch.sie_block->pp,
1656                              (u64 __user *)reg->addr);
1657                 break;
1658         case KVM_REG_S390_GBEA:
1659                 r = put_user(vcpu->arch.sie_block->gbea,
1660                              (u64 __user *)reg->addr);
1661                 break;
1662         default:
1663                 break;
1664         }
1665
1666         return r;
1667 }
1668
1669 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1670                                            struct kvm_one_reg *reg)
1671 {
1672         int r = -EINVAL;
1673
1674         switch (reg->id) {
1675         case KVM_REG_S390_TODPR:
1676                 r = get_user(vcpu->arch.sie_block->todpr,
1677                              (u32 __user *)reg->addr);
1678                 break;
1679         case KVM_REG_S390_EPOCHDIFF:
1680                 r = get_user(vcpu->arch.sie_block->epoch,
1681                              (u64 __user *)reg->addr);
1682                 break;
1683         case KVM_REG_S390_CPU_TIMER:
1684                 r = get_user(vcpu->arch.sie_block->cputm,
1685                              (u64 __user *)reg->addr);
1686                 break;
1687         case KVM_REG_S390_CLOCK_COMP:
1688                 r = get_user(vcpu->arch.sie_block->ckc,
1689                              (u64 __user *)reg->addr);
1690                 break;
1691         case KVM_REG_S390_PFTOKEN:
1692                 r = get_user(vcpu->arch.pfault_token,
1693                              (u64 __user *)reg->addr);
1694                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1695                         kvm_clear_async_pf_completion_queue(vcpu);
1696                 break;
1697         case KVM_REG_S390_PFCOMPARE:
1698                 r = get_user(vcpu->arch.pfault_compare,
1699                              (u64 __user *)reg->addr);
1700                 break;
1701         case KVM_REG_S390_PFSELECT:
1702                 r = get_user(vcpu->arch.pfault_select,
1703                              (u64 __user *)reg->addr);
1704                 break;
1705         case KVM_REG_S390_PP:
1706                 r = get_user(vcpu->arch.sie_block->pp,
1707                              (u64 __user *)reg->addr);
1708                 break;
1709         case KVM_REG_S390_GBEA:
1710                 r = get_user(vcpu->arch.sie_block->gbea,
1711                              (u64 __user *)reg->addr);
1712                 break;
1713         default:
1714                 break;
1715         }
1716
1717         return r;
1718 }
1719
1720 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1721 {
1722         kvm_s390_vcpu_initial_reset(vcpu);
1723         return 0;
1724 }
1725
1726 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1727 {
1728         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
1729         return 0;
1730 }
1731
1732 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1733 {
1734         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1735         return 0;
1736 }
1737
1738 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1739                                   struct kvm_sregs *sregs)
1740 {
1741         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1742         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1743         restore_access_regs(vcpu->run->s.regs.acrs);
1744         return 0;
1745 }
1746
1747 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1748                                   struct kvm_sregs *sregs)
1749 {
1750         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1751         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1752         return 0;
1753 }
1754
1755 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1756 {
1757         if (test_fp_ctl(fpu->fpc))
1758                 return -EINVAL;
1759         memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
1760         vcpu->arch.guest_fpregs.fpc = fpu->fpc;
1761         save_fpu_regs();
1762         load_fpu_from(&vcpu->arch.guest_fpregs);
1763         return 0;
1764 }
1765
1766 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1767 {
1768         memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
1769         fpu->fpc = vcpu->arch.guest_fpregs.fpc;
1770         return 0;
1771 }
1772
1773 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1774 {
1775         int rc = 0;
1776
1777         if (!is_vcpu_stopped(vcpu))
1778                 rc = -EBUSY;
1779         else {
1780                 vcpu->run->psw_mask = psw.mask;
1781                 vcpu->run->psw_addr = psw.addr;
1782         }
1783         return rc;
1784 }
1785
1786 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1787                                   struct kvm_translation *tr)
1788 {
1789         return -EINVAL; /* not implemented yet */
1790 }
1791
1792 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1793                               KVM_GUESTDBG_USE_HW_BP | \
1794                               KVM_GUESTDBG_ENABLE)
1795
1796 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1797                                         struct kvm_guest_debug *dbg)
1798 {
1799         int rc = 0;
1800
1801         vcpu->guest_debug = 0;
1802         kvm_s390_clear_bp_data(vcpu);
1803
1804         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1805                 return -EINVAL;
1806
1807         if (dbg->control & KVM_GUESTDBG_ENABLE) {
1808                 vcpu->guest_debug = dbg->control;
1809                 /* enforce guest PER */
1810                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1811
1812                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1813                         rc = kvm_s390_import_bp_data(vcpu, dbg);
1814         } else {
1815                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1816                 vcpu->arch.guestdbg.last_bp = 0;
1817         }
1818
1819         if (rc) {
1820                 vcpu->guest_debug = 0;
1821                 kvm_s390_clear_bp_data(vcpu);
1822                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1823         }
1824
1825         return rc;
1826 }
1827
1828 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1829                                     struct kvm_mp_state *mp_state)
1830 {
1831         /* CHECK_STOP and LOAD are not supported yet */
1832         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1833                                        KVM_MP_STATE_OPERATING;
1834 }
1835
1836 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1837                                     struct kvm_mp_state *mp_state)
1838 {
1839         int rc = 0;
1840
1841         /* user space knows about this interface - let it control the state */
1842         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1843
1844         switch (mp_state->mp_state) {
1845         case KVM_MP_STATE_STOPPED:
1846                 kvm_s390_vcpu_stop(vcpu);
1847                 break;
1848         case KVM_MP_STATE_OPERATING:
1849                 kvm_s390_vcpu_start(vcpu);
1850                 break;
1851         case KVM_MP_STATE_LOAD:
1852         case KVM_MP_STATE_CHECK_STOP:
1853                 /* fall through - CHECK_STOP and LOAD are not supported yet */
1854         default:
1855                 rc = -ENXIO;
1856         }
1857
1858         return rc;
1859 }
1860
1861 static bool ibs_enabled(struct kvm_vcpu *vcpu)
1862 {
1863         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1864 }
1865
1866 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1867 {
1868 retry:
1869         kvm_s390_vcpu_request_handled(vcpu);
1870         if (!vcpu->requests)
1871                 return 0;
1872         /*
1873          * We use MMU_RELOAD just to re-arm the ipte notifier for the
1874          * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1875          * This ensures that the ipte instruction for this request has
1876          * already finished. We might race against a second unmapper that
1877          * wants to set the blocking bit. Lets just retry the request loop.
1878          */
1879         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1880                 int rc;
1881                 rc = gmap_ipte_notify(vcpu->arch.gmap,
1882                                       kvm_s390_get_prefix(vcpu),
1883                                       PAGE_SIZE * 2);
1884                 if (rc)
1885                         return rc;
1886                 goto retry;
1887         }
1888
1889         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1890                 vcpu->arch.sie_block->ihcpu = 0xffff;
1891                 goto retry;
1892         }
1893
1894         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1895                 if (!ibs_enabled(vcpu)) {
1896                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
1897                         atomic_or(CPUSTAT_IBS,
1898                                         &vcpu->arch.sie_block->cpuflags);
1899                 }
1900                 goto retry;
1901         }
1902
1903         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
1904                 if (ibs_enabled(vcpu)) {
1905                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
1906                         atomic_andnot(CPUSTAT_IBS,
1907                                           &vcpu->arch.sie_block->cpuflags);
1908                 }
1909                 goto retry;
1910         }
1911
1912         /* nothing to do, just clear the request */
1913         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
1914
1915         return 0;
1916 }
1917
1918 /**
1919  * kvm_arch_fault_in_page - fault-in guest page if necessary
1920  * @vcpu: The corresponding virtual cpu
1921  * @gpa: Guest physical address
1922  * @writable: Whether the page should be writable or not
1923  *
1924  * Make sure that a guest page has been faulted-in on the host.
1925  *
1926  * Return: Zero on success, negative error code otherwise.
1927  */
1928 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
1929 {
1930         return gmap_fault(vcpu->arch.gmap, gpa,
1931                           writable ? FAULT_FLAG_WRITE : 0);
1932 }
1933
1934 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
1935                                       unsigned long token)
1936 {
1937         struct kvm_s390_interrupt inti;
1938         struct kvm_s390_irq irq;
1939
1940         if (start_token) {
1941                 irq.u.ext.ext_params2 = token;
1942                 irq.type = KVM_S390_INT_PFAULT_INIT;
1943                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
1944         } else {
1945                 inti.type = KVM_S390_INT_PFAULT_DONE;
1946                 inti.parm64 = token;
1947                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
1948         }
1949 }
1950
1951 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
1952                                      struct kvm_async_pf *work)
1953 {
1954         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
1955         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
1956 }
1957
1958 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
1959                                  struct kvm_async_pf *work)
1960 {
1961         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
1962         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
1963 }
1964
1965 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
1966                                struct kvm_async_pf *work)
1967 {
1968         /* s390 will always inject the page directly */
1969 }
1970
1971 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
1972 {
1973         /*
1974          * s390 will always inject the page directly,
1975          * but we still want check_async_completion to cleanup
1976          */
1977         return true;
1978 }
1979
1980 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
1981 {
1982         hva_t hva;
1983         struct kvm_arch_async_pf arch;
1984         int rc;
1985
1986         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1987                 return 0;
1988         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
1989             vcpu->arch.pfault_compare)
1990                 return 0;
1991         if (psw_extint_disabled(vcpu))
1992                 return 0;
1993         if (kvm_s390_vcpu_has_irq(vcpu, 0))
1994                 return 0;
1995         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
1996                 return 0;
1997         if (!vcpu->arch.gmap->pfault_enabled)
1998                 return 0;
1999
2000         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2001         hva += current->thread.gmap_addr & ~PAGE_MASK;
2002         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2003                 return 0;
2004
2005         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2006         return rc;
2007 }
2008
2009 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2010 {
2011         int rc, cpuflags;
2012
2013         /*
2014          * On s390 notifications for arriving pages will be delivered directly
2015          * to the guest but the house keeping for completed pfaults is
2016          * handled outside the worker.
2017          */
2018         kvm_check_async_pf_completion(vcpu);
2019
2020         memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
2021
2022         if (need_resched())
2023                 schedule();
2024
2025         if (test_cpu_flag(CIF_MCCK_PENDING))
2026                 s390_handle_mcck();
2027
2028         if (!kvm_is_ucontrol(vcpu->kvm)) {
2029                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2030                 if (rc)
2031                         return rc;
2032         }
2033
2034         rc = kvm_s390_handle_requests(vcpu);
2035         if (rc)
2036                 return rc;
2037
2038         if (guestdbg_enabled(vcpu)) {
2039                 kvm_s390_backup_guest_per_regs(vcpu);
2040                 kvm_s390_patch_guest_per_regs(vcpu);
2041         }
2042
2043         vcpu->arch.sie_block->icptcode = 0;
2044         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2045         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2046         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2047
2048         return 0;
2049 }
2050
2051 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2052 {
2053         psw_t *psw = &vcpu->arch.sie_block->gpsw;
2054         u8 opcode;
2055         int rc;
2056
2057         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2058         trace_kvm_s390_sie_fault(vcpu);
2059
2060         /*
2061          * We want to inject an addressing exception, which is defined as a
2062          * suppressing or terminating exception. However, since we came here
2063          * by a DAT access exception, the PSW still points to the faulting
2064          * instruction since DAT exceptions are nullifying. So we've got
2065          * to look up the current opcode to get the length of the instruction
2066          * to be able to forward the PSW.
2067          */
2068         rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
2069         if (rc)
2070                 return kvm_s390_inject_prog_cond(vcpu, rc);
2071         psw->addr = __rewind_psw(*psw, -insn_length(opcode));
2072
2073         return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
2074 }
2075
2076 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2077 {
2078         int rc = -1;
2079
2080         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2081                    vcpu->arch.sie_block->icptcode);
2082         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2083
2084         if (guestdbg_enabled(vcpu))
2085                 kvm_s390_restore_guest_per_regs(vcpu);
2086
2087         if (exit_reason >= 0) {
2088                 rc = 0;
2089         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2090                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2091                 vcpu->run->s390_ucontrol.trans_exc_code =
2092                                                 current->thread.gmap_addr;
2093                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2094                 rc = -EREMOTE;
2095
2096         } else if (current->thread.gmap_pfault) {
2097                 trace_kvm_s390_major_guest_pfault(vcpu);
2098                 current->thread.gmap_pfault = 0;
2099                 if (kvm_arch_setup_async_pf(vcpu)) {
2100                         rc = 0;
2101                 } else {
2102                         gpa_t gpa = current->thread.gmap_addr;
2103                         rc = kvm_arch_fault_in_page(vcpu, gpa, 1);
2104                 }
2105         }
2106
2107         if (rc == -1)
2108                 rc = vcpu_post_run_fault_in_sie(vcpu);
2109
2110         memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
2111
2112         if (rc == 0) {
2113                 if (kvm_is_ucontrol(vcpu->kvm))
2114                         /* Don't exit for host interrupts. */
2115                         rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
2116                 else
2117                         rc = kvm_handle_sie_intercept(vcpu);
2118         }
2119
2120         return rc;
2121 }
2122
2123 static int __vcpu_run(struct kvm_vcpu *vcpu)
2124 {
2125         int rc, exit_reason;
2126
2127         /*
2128          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2129          * ning the guest), so that memslots (and other stuff) are protected
2130          */
2131         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2132
2133         do {
2134                 rc = vcpu_pre_run(vcpu);
2135                 if (rc)
2136                         break;
2137
2138                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2139                 /*
2140                  * As PF_VCPU will be used in fault handler, between
2141                  * guest_enter and guest_exit should be no uaccess.
2142                  */
2143                 local_irq_disable();
2144                 __kvm_guest_enter();
2145                 local_irq_enable();
2146                 exit_reason = sie64a(vcpu->arch.sie_block,
2147                                      vcpu->run->s.regs.gprs);
2148                 local_irq_disable();
2149                 __kvm_guest_exit();
2150                 local_irq_enable();
2151                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2152
2153                 rc = vcpu_post_run(vcpu, exit_reason);
2154         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2155
2156         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2157         return rc;
2158 }
2159
2160 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2161 {
2162         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2163         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2164         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2165                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2166         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2167                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2168                 /* some control register changes require a tlb flush */
2169                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2170         }
2171         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2172                 vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
2173                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2174                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2175                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2176                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2177         }
2178         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2179                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2180                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2181                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2182                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2183                         kvm_clear_async_pf_completion_queue(vcpu);
2184         }
2185         kvm_run->kvm_dirty_regs = 0;
2186 }
2187
2188 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2189 {
2190         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2191         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2192         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2193         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2194         kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
2195         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2196         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2197         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2198         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2199         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2200         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2201         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2202 }
2203
2204 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2205 {
2206         int rc;
2207         sigset_t sigsaved;
2208
2209         if (guestdbg_exit_pending(vcpu)) {
2210                 kvm_s390_prepare_debug_exit(vcpu);
2211                 return 0;
2212         }
2213
2214         if (vcpu->sigset_active)
2215                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2216
2217         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2218                 kvm_s390_vcpu_start(vcpu);
2219         } else if (is_vcpu_stopped(vcpu)) {
2220                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2221                                    vcpu->vcpu_id);
2222                 return -EINVAL;
2223         }
2224
2225         sync_regs(vcpu, kvm_run);
2226
2227         might_fault();
2228         rc = __vcpu_run(vcpu);
2229
2230         if (signal_pending(current) && !rc) {
2231                 kvm_run->exit_reason = KVM_EXIT_INTR;
2232                 rc = -EINTR;
2233         }
2234
2235         if (guestdbg_exit_pending(vcpu) && !rc)  {
2236                 kvm_s390_prepare_debug_exit(vcpu);
2237                 rc = 0;
2238         }
2239
2240         if (rc == -EOPNOTSUPP) {
2241                 /* intercept cannot be handled in-kernel, prepare kvm-run */
2242                 kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
2243                 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2244                 kvm_run->s390_sieic.ipa      = vcpu->arch.sie_block->ipa;
2245                 kvm_run->s390_sieic.ipb      = vcpu->arch.sie_block->ipb;
2246                 rc = 0;
2247         }
2248
2249         if (rc == -EREMOTE) {
2250                 /* intercept was handled, but userspace support is needed
2251                  * kvm_run has been prepared by the handler */
2252                 rc = 0;
2253         }
2254
2255         store_regs(vcpu, kvm_run);
2256
2257         if (vcpu->sigset_active)
2258                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2259
2260         vcpu->stat.exit_userspace++;
2261         return rc;
2262 }
2263
2264 /*
2265  * store status at address
2266  * we use have two special cases:
2267  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2268  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2269  */
2270 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2271 {
2272         unsigned char archmode = 1;
2273         unsigned int px;
2274         u64 clkcomp;
2275         int rc;
2276
2277         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2278                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2279                         return -EFAULT;
2280                 gpa = SAVE_AREA_BASE;
2281         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2282                 if (write_guest_real(vcpu, 163, &archmode, 1))
2283                         return -EFAULT;
2284                 gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
2285         }
2286         rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
2287                              vcpu->arch.guest_fpregs.fprs, 128);
2288         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
2289                               vcpu->run->s.regs.gprs, 128);
2290         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
2291                               &vcpu->arch.sie_block->gpsw, 16);
2292         px = kvm_s390_get_prefix(vcpu);
2293         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
2294                               &px, 4);
2295         rc |= write_guest_abs(vcpu,
2296                               gpa + offsetof(struct save_area, fp_ctrl_reg),
2297                               &vcpu->arch.guest_fpregs.fpc, 4);
2298         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
2299                               &vcpu->arch.sie_block->todpr, 4);
2300         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
2301                               &vcpu->arch.sie_block->cputm, 8);
2302         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2303         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
2304                               &clkcomp, 8);
2305         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
2306                               &vcpu->run->s.regs.acrs, 64);
2307         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
2308                               &vcpu->arch.sie_block->gcr, 128);
2309         return rc ? -EFAULT : 0;
2310 }
2311
2312 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2313 {
2314         /*
2315          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2316          * copying in vcpu load/put. Lets update our copies before we save
2317          * it into the save area
2318          */
2319         save_fpu_regs();
2320         if (test_kvm_facility(vcpu->kvm, 129)) {
2321                 /*
2322                  * If the vector extension is available, the vector registers
2323                  * which overlaps with floating-point registers are saved in
2324                  * the SIE-control block.  Hence, extract the floating-point
2325                  * registers and the FPC value and store them in the
2326                  * guest_fpregs structure.
2327                  */
2328                 WARN_ON(!is_vx_task(current));    /* XXX remove later */
2329                 vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
2330                 convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
2331                                  current->thread.fpu.vxrs);
2332         } else
2333                 save_fpu_to(&vcpu->arch.guest_fpregs);
2334         save_access_regs(vcpu->run->s.regs.acrs);
2335
2336         return kvm_s390_store_status_unloaded(vcpu, addr);
2337 }
2338
2339 /*
2340  * store additional status at address
2341  */
2342 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2343                                         unsigned long gpa)
2344 {
2345         /* Only bits 0-53 are used for address formation */
2346         if (!(gpa & ~0x3ff))
2347                 return 0;
2348
2349         return write_guest_abs(vcpu, gpa & ~0x3ff,
2350                                (void *)&vcpu->run->s.regs.vrs, 512);
2351 }
2352
2353 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2354 {
2355         if (!test_kvm_facility(vcpu->kvm, 129))
2356                 return 0;
2357
2358         /*
2359          * The guest VXRS are in the host VXRs due to the lazy
2360          * copying in vcpu load/put. We can simply call save_fpu_regs()
2361          * to save the current register state because we are in the
2362          * middle of a load/put cycle.
2363          *
2364          * Let's update our copies before we save it into the save area.
2365          */
2366         save_fpu_regs();
2367
2368         return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2369 }
2370
2371 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2372 {
2373         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2374         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2375 }
2376
2377 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2378 {
2379         unsigned int i;
2380         struct kvm_vcpu *vcpu;
2381
2382         kvm_for_each_vcpu(i, vcpu, kvm) {
2383                 __disable_ibs_on_vcpu(vcpu);
2384         }
2385 }
2386
2387 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2388 {
2389         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2390         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2391 }
2392
2393 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2394 {
2395         int i, online_vcpus, started_vcpus = 0;
2396
2397         if (!is_vcpu_stopped(vcpu))
2398                 return;
2399
2400         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2401         /* Only one cpu at a time may enter/leave the STOPPED state. */
2402         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2403         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2404
2405         for (i = 0; i < online_vcpus; i++) {
2406                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2407                         started_vcpus++;
2408         }
2409
2410         if (started_vcpus == 0) {
2411                 /* we're the only active VCPU -> speed it up */
2412                 __enable_ibs_on_vcpu(vcpu);
2413         } else if (started_vcpus == 1) {
2414                 /*
2415                  * As we are starting a second VCPU, we have to disable
2416                  * the IBS facility on all VCPUs to remove potentially
2417                  * oustanding ENABLE requests.
2418                  */
2419                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2420         }
2421
2422         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2423         /*
2424          * Another VCPU might have used IBS while we were offline.
2425          * Let's play safe and flush the VCPU at startup.
2426          */
2427         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2428         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2429         return;
2430 }
2431
2432 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2433 {
2434         int i, online_vcpus, started_vcpus = 0;
2435         struct kvm_vcpu *started_vcpu = NULL;
2436
2437         if (is_vcpu_stopped(vcpu))
2438                 return;
2439
2440         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2441         /* Only one cpu at a time may enter/leave the STOPPED state. */
2442         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2443         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2444
2445         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2446         kvm_s390_clear_stop_irq(vcpu);
2447
2448         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2449         __disable_ibs_on_vcpu(vcpu);
2450
2451         for (i = 0; i < online_vcpus; i++) {
2452                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2453                         started_vcpus++;
2454                         started_vcpu = vcpu->kvm->vcpus[i];
2455                 }
2456         }
2457
2458         if (started_vcpus == 1) {
2459                 /*
2460                  * As we only have one VCPU left, we want to enable the
2461                  * IBS facility for that VCPU to speed it up.
2462                  */
2463                 __enable_ibs_on_vcpu(started_vcpu);
2464         }
2465
2466         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2467         return;
2468 }
2469
2470 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2471                                      struct kvm_enable_cap *cap)
2472 {
2473         int r;
2474
2475         if (cap->flags)
2476                 return -EINVAL;
2477
2478         switch (cap->cap) {
2479         case KVM_CAP_S390_CSS_SUPPORT:
2480                 if (!vcpu->kvm->arch.css_support) {
2481                         vcpu->kvm->arch.css_support = 1;
2482                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2483                         trace_kvm_s390_enable_css(vcpu->kvm);
2484                 }
2485                 r = 0;
2486                 break;
2487         default:
2488                 r = -EINVAL;
2489                 break;
2490         }
2491         return r;
2492 }
2493
2494 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2495                                   struct kvm_s390_mem_op *mop)
2496 {
2497         void __user *uaddr = (void __user *)mop->buf;
2498         void *tmpbuf = NULL;
2499         int r, srcu_idx;
2500         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2501                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
2502
2503         if (mop->flags & ~supported_flags)
2504                 return -EINVAL;
2505
2506         if (mop->size > MEM_OP_MAX_SIZE)
2507                 return -E2BIG;
2508
2509         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2510                 tmpbuf = vmalloc(mop->size);
2511                 if (!tmpbuf)
2512                         return -ENOMEM;
2513         }
2514
2515         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2516
2517         switch (mop->op) {
2518         case KVM_S390_MEMOP_LOGICAL_READ:
2519                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2520                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
2521                         break;
2522                 }
2523                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2524                 if (r == 0) {
2525                         if (copy_to_user(uaddr, tmpbuf, mop->size))
2526                                 r = -EFAULT;
2527                 }
2528                 break;
2529         case KVM_S390_MEMOP_LOGICAL_WRITE:
2530                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2531                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
2532                         break;
2533                 }
2534                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2535                         r = -EFAULT;
2536                         break;
2537                 }
2538                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2539                 break;
2540         default:
2541                 r = -EINVAL;
2542         }
2543
2544         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2545
2546         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2547                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2548
2549         vfree(tmpbuf);
2550         return r;
2551 }
2552
2553 long kvm_arch_vcpu_ioctl(struct file *filp,
2554                          unsigned int ioctl, unsigned long arg)
2555 {
2556         struct kvm_vcpu *vcpu = filp->private_data;
2557         void __user *argp = (void __user *)arg;
2558         int idx;
2559         long r;
2560
2561         switch (ioctl) {
2562         case KVM_S390_IRQ: {
2563                 struct kvm_s390_irq s390irq;
2564
2565                 r = -EFAULT;
2566                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2567                         break;
2568                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2569                 break;
2570         }
2571         case KVM_S390_INTERRUPT: {
2572                 struct kvm_s390_interrupt s390int;
2573                 struct kvm_s390_irq s390irq;
2574
2575                 r = -EFAULT;
2576                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2577                         break;
2578                 if (s390int_to_s390irq(&s390int, &s390irq))
2579                         return -EINVAL;
2580                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2581                 break;
2582         }
2583         case KVM_S390_STORE_STATUS:
2584                 idx = srcu_read_lock(&vcpu->kvm->srcu);
2585                 r = kvm_s390_vcpu_store_status(vcpu, arg);
2586                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2587                 break;
2588         case KVM_S390_SET_INITIAL_PSW: {
2589                 psw_t psw;
2590
2591                 r = -EFAULT;
2592                 if (copy_from_user(&psw, argp, sizeof(psw)))
2593                         break;
2594                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2595                 break;
2596         }
2597         case KVM_S390_INITIAL_RESET:
2598                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2599                 break;
2600         case KVM_SET_ONE_REG:
2601         case KVM_GET_ONE_REG: {
2602                 struct kvm_one_reg reg;
2603                 r = -EFAULT;
2604                 if (copy_from_user(&reg, argp, sizeof(reg)))
2605                         break;
2606                 if (ioctl == KVM_SET_ONE_REG)
2607                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
2608                 else
2609                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
2610                 break;
2611         }
2612 #ifdef CONFIG_KVM_S390_UCONTROL
2613         case KVM_S390_UCAS_MAP: {
2614                 struct kvm_s390_ucas_mapping ucasmap;
2615
2616                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2617                         r = -EFAULT;
2618                         break;
2619                 }
2620
2621                 if (!kvm_is_ucontrol(vcpu->kvm)) {
2622                         r = -EINVAL;
2623                         break;
2624                 }
2625
2626                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2627                                      ucasmap.vcpu_addr, ucasmap.length);
2628                 break;
2629         }
2630         case KVM_S390_UCAS_UNMAP: {
2631                 struct kvm_s390_ucas_mapping ucasmap;
2632
2633                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2634                         r = -EFAULT;
2635                         break;
2636                 }
2637
2638                 if (!kvm_is_ucontrol(vcpu->kvm)) {
2639                         r = -EINVAL;
2640                         break;
2641                 }
2642
2643                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2644                         ucasmap.length);
2645                 break;
2646         }
2647 #endif
2648         case KVM_S390_VCPU_FAULT: {
2649                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
2650                 break;
2651         }
2652         case KVM_ENABLE_CAP:
2653         {
2654                 struct kvm_enable_cap cap;
2655                 r = -EFAULT;
2656                 if (copy_from_user(&cap, argp, sizeof(cap)))
2657                         break;
2658                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2659                 break;
2660         }
2661         case KVM_S390_MEM_OP: {
2662                 struct kvm_s390_mem_op mem_op;
2663
2664                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2665                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2666                 else
2667                         r = -EFAULT;
2668                 break;
2669         }
2670         case KVM_S390_SET_IRQ_STATE: {
2671                 struct kvm_s390_irq_state irq_state;
2672
2673                 r = -EFAULT;
2674                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2675                         break;
2676                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2677                     irq_state.len == 0 ||
2678                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2679                         r = -EINVAL;
2680                         break;
2681                 }
2682                 r = kvm_s390_set_irq_state(vcpu,
2683                                            (void __user *) irq_state.buf,
2684                                            irq_state.len);
2685                 break;
2686         }
2687         case KVM_S390_GET_IRQ_STATE: {
2688                 struct kvm_s390_irq_state irq_state;
2689
2690                 r = -EFAULT;
2691                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2692                         break;
2693                 if (irq_state.len == 0) {
2694                         r = -EINVAL;
2695                         break;
2696                 }
2697                 r = kvm_s390_get_irq_state(vcpu,
2698                                            (__u8 __user *)  irq_state.buf,
2699                                            irq_state.len);
2700                 break;
2701         }
2702         default:
2703                 r = -ENOTTY;
2704         }
2705         return r;
2706 }
2707
2708 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2709 {
2710 #ifdef CONFIG_KVM_S390_UCONTROL
2711         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2712                  && (kvm_is_ucontrol(vcpu->kvm))) {
2713                 vmf->page = virt_to_page(vcpu->arch.sie_block);
2714                 get_page(vmf->page);
2715                 return 0;
2716         }
2717 #endif
2718         return VM_FAULT_SIGBUS;
2719 }
2720
2721 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2722                             unsigned long npages)
2723 {
2724         return 0;
2725 }
2726
2727 /* Section: memory related */
2728 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2729                                    struct kvm_memory_slot *memslot,
2730                                    const struct kvm_userspace_memory_region *mem,
2731                                    enum kvm_mr_change change)
2732 {
2733         /* A few sanity checks. We can have memory slots which have to be
2734            located/ended at a segment boundary (1MB). The memory in userland is
2735            ok to be fragmented into various different vmas. It is okay to mmap()
2736            and munmap() stuff in this slot after doing this call at any time */
2737
2738         if (mem->userspace_addr & 0xffffful)
2739                 return -EINVAL;
2740
2741         if (mem->memory_size & 0xffffful)
2742                 return -EINVAL;
2743
2744         return 0;
2745 }
2746
2747 void kvm_arch_commit_memory_region(struct kvm *kvm,
2748                                 const struct kvm_userspace_memory_region *mem,
2749                                 const struct kvm_memory_slot *old,
2750                                 const struct kvm_memory_slot *new,
2751                                 enum kvm_mr_change change)
2752 {
2753         int rc;
2754
2755         /* If the basics of the memslot do not change, we do not want
2756          * to update the gmap. Every update causes several unnecessary
2757          * segment translation exceptions. This is usually handled just
2758          * fine by the normal fault handler + gmap, but it will also
2759          * cause faults on the prefix page of running guest CPUs.
2760          */
2761         if (old->userspace_addr == mem->userspace_addr &&
2762             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2763             old->npages * PAGE_SIZE == mem->memory_size)
2764                 return;
2765
2766         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2767                 mem->guest_phys_addr, mem->memory_size);
2768         if (rc)
2769                 pr_warn("failed to commit memory region\n");
2770         return;
2771 }
2772
2773 static int __init kvm_s390_init(void)
2774 {
2775         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2776 }
2777
2778 static void __exit kvm_s390_exit(void)
2779 {
2780         kvm_exit();
2781 }
2782
2783 module_init(kvm_s390_init);
2784 module_exit(kvm_s390_exit);
2785
2786 /*
2787  * Enable autoloading of the kvm module.
2788  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2789  * since x86 takes a different approach.
2790  */
2791 #include <linux/miscdevice.h>
2792 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2793 MODULE_ALIAS("devname:kvm");