arch/s390/kvm/kvm-s390.c

   1 /*
   2  * hosting zSeries kernel virtual machines
   3  *
   4  * Copyright IBM Corp. 2008, 2009
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License (version 2 only)
   8  * as published by the Free Software Foundation.
   9  *
  10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  11  *               Christian Borntraeger <borntraeger@de.ibm.com>
  12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  14  *               Jason J. Herne <jjherne@us.ibm.com>
  15  */
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/module.h>
  25 #include <linux/random.h>
  26 #include <linux/slab.h>
  27 #include <linux/timer.h>
  28 #include <linux/vmalloc.h>
  29 #include <asm/asm-offsets.h>
  30 #include <asm/lowcore.h>
  31 #include <asm/etr.h>
  32 #include <asm/pgtable.h>
  33 #include <asm/nmi.h>
  34 #include <asm/switch_to.h>
  35 #include <asm/isc.h>
  36 #include <asm/sclp.h>
  37 #include "kvm-s390.h"
  38 #include "gaccess.h"
  39
  40 #define KMSG_COMPONENT "kvm-s390"
  41 #undef pr_fmt
  42 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  43
  44 #define CREATE_TRACE_POINTS
  45 #include "trace.h"
  46 #include "trace-s390.h"
  47
  48 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  49 #define LOCAL_IRQS 32
  50 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  51                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  52
  53 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  54
  55 struct kvm_stats_debugfs_item debugfs_entries[] = {
  56         { "userspace_handled", VCPU_STAT(exit_userspace) },
  57         { "exit_null", VCPU_STAT(exit_null) },
  58         { "exit_validity", VCPU_STAT(exit_validity) },
  59         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  60         { "exit_external_request", VCPU_STAT(exit_external_request) },
  61         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  62         { "exit_instruction", VCPU_STAT(exit_instruction) },
  63         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  64         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  65         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  66         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  67         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  68         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  69         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  70         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  71         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  72         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  73         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  74         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  75         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
  76         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  77         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  78         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  79         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
  80         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  81         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
  82         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
  83         { "instruction_spx", VCPU_STAT(instruction_spx) },
  84         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
  85         { "instruction_stap", VCPU_STAT(instruction_stap) },
  86         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
  87         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
  88         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
  89         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
  90         { "instruction_essa", VCPU_STAT(instruction_essa) },
  91         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
  92         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
  93         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
  94         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
  95         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
  96         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
  97         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
  98         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
  99         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 100         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 101         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 102         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 103         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 104         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 105         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 106         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 107         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 108         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 109         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 110         { "diagnose_10", VCPU_STAT(diagnose_10) },
 111         { "diagnose_44", VCPU_STAT(diagnose_44) },
 112         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
 113         { "diagnose_258", VCPU_STAT(diagnose_258) },
 114         { "diagnose_308", VCPU_STAT(diagnose_308) },
 115         { "diagnose_500", VCPU_STAT(diagnose_500) },
 116         { NULL }
 117 };
 118
 119 /* upper facilities limit for kvm */
 120 unsigned long kvm_s390_fac_list_mask[] = {
 121         0xffe6fffbfcfdfc40UL,
 122         0x005e800000000000UL,
 123 };
 124
 125 unsigned long kvm_s390_fac_list_mask_size(void)
 126 {
 127         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
 128         return ARRAY_SIZE(kvm_s390_fac_list_mask);
 129 }
 130
 131 static struct gmap_notifier gmap_notifier;
 132 debug_info_t *kvm_s390_dbf;
 133
 134 /* Section: not file related */
 135 int kvm_arch_hardware_enable(void)
 136 {
 137         /* every s390 is virtualization enabled ;-) */
 138         return 0;
 139 }
 140
 141 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
 142
 143 /*
 144  * This callback is executed during stop_machine(). All CPUs are therefore
 145  * temporarily stopped. In order not to change guest behavior, we have to
 146  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 147  * so a CPU won't be stopped while calculating with the epoch.
 148  */
 149 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 150                           void *v)
 151 {
 152         struct kvm *kvm;
 153         struct kvm_vcpu *vcpu;
 154         int i;
 155         unsigned long long *delta = v;
 156
 157         list_for_each_entry(kvm, &vm_list, vm_list) {
 158                 kvm->arch.epoch -= *delta;
 159                 kvm_for_each_vcpu(i, vcpu, kvm) {
 160                         vcpu->arch.sie_block->epoch -= *delta;
 161                 }
 162         }
 163         return NOTIFY_OK;
 164 }
 165
 166 static struct notifier_block kvm_clock_notifier = {
 167         .notifier_call = kvm_clock_sync,
 168 };
 169
 170 int kvm_arch_hardware_setup(void)
 171 {
 172         gmap_notifier.notifier_call = kvm_gmap_notifier;
 173         gmap_register_ipte_notifier(&gmap_notifier);
 174         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 175                                        &kvm_clock_notifier);
 176         return 0;
 177 }
 178
 179 void kvm_arch_hardware_unsetup(void)
 180 {
 181         gmap_unregister_ipte_notifier(&gmap_notifier);
 182         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 183                                          &kvm_clock_notifier);
 184 }
 185
 186 int kvm_arch_init(void *opaque)
 187 {
 188         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 189         if (!kvm_s390_dbf)
 190                 return -ENOMEM;
 191
 192         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 193                 debug_unregister(kvm_s390_dbf);
 194                 return -ENOMEM;
 195         }
 196
 197         /* Register floating interrupt controller interface. */
 198         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 199 }
 200
 201 void kvm_arch_exit(void)
 202 {
 203         debug_unregister(kvm_s390_dbf);
 204 }
 205
 206 /* Section: device related */
 207 long kvm_arch_dev_ioctl(struct file *filp,
 208                         unsigned int ioctl, unsigned long arg)
 209 {
 210         if (ioctl == KVM_S390_ENABLE_SIE)
 211                 return s390_enable_sie();
 212         return -EINVAL;
 213 }
 214
 215 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 216 {
 217         int r;
 218
 219         switch (ext) {
 220         case KVM_CAP_S390_PSW:
 221         case KVM_CAP_S390_GMAP:
 222         case KVM_CAP_SYNC_MMU:
 223 #ifdef CONFIG_KVM_S390_UCONTROL
 224         case KVM_CAP_S390_UCONTROL:
 225 #endif
 226         case KVM_CAP_ASYNC_PF:
 227         case KVM_CAP_SYNC_REGS:
 228         case KVM_CAP_ONE_REG:
 229         case KVM_CAP_ENABLE_CAP:
 230         case KVM_CAP_S390_CSS_SUPPORT:
 231         case KVM_CAP_IOEVENTFD:
 232         case KVM_CAP_DEVICE_CTRL:
 233         case KVM_CAP_ENABLE_CAP_VM:
 234         case KVM_CAP_S390_IRQCHIP:
 235         case KVM_CAP_VM_ATTRIBUTES:
 236         case KVM_CAP_MP_STATE:
 237         case KVM_CAP_S390_INJECT_IRQ:
 238         case KVM_CAP_S390_USER_SIGP:
 239         case KVM_CAP_S390_USER_STSI:
 240         case KVM_CAP_S390_SKEYS:
 241         case KVM_CAP_S390_IRQ_STATE:
 242                 r = 1;
 243                 break;
 244         case KVM_CAP_S390_MEM_OP:
 245                 r = MEM_OP_MAX_SIZE;
 246                 break;
 247         case KVM_CAP_NR_VCPUS:
 248         case KVM_CAP_MAX_VCPUS:
 249                 r = KVM_MAX_VCPUS;
 250                 break;
 251         case KVM_CAP_NR_MEMSLOTS:
 252                 r = KVM_USER_MEM_SLOTS;
 253                 break;
 254         case KVM_CAP_S390_COW:
 255                 r = MACHINE_HAS_ESOP;
 256                 break;
 257         case KVM_CAP_S390_VECTOR_REGISTERS:
 258                 r = MACHINE_HAS_VX;
 259                 break;
 260         default:
 261                 r = 0;
 262         }
 263         return r;
 264 }
 265
 266 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 267                                         struct kvm_memory_slot *memslot)
 268 {
 269         gfn_t cur_gfn, last_gfn;
 270         unsigned long address;
 271         struct gmap *gmap = kvm->arch.gmap;
 272
 273         down_read(&gmap->mm->mmap_sem);
 274         /* Loop over all guest pages */
 275         last_gfn = memslot->base_gfn + memslot->npages;
 276         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 277                 address = gfn_to_hva_memslot(memslot, cur_gfn);
 278
 279                 if (gmap_test_and_clear_dirty(address, gmap))
 280                         mark_page_dirty(kvm, cur_gfn);
 281         }
 282         up_read(&gmap->mm->mmap_sem);
 283 }
 284
 285 /* Section: vm related */
 286 /*
 287  * Get (and clear) the dirty memory log for a memory slot.
 288  */
 289 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 290                                struct kvm_dirty_log *log)
 291 {
 292         int r;
 293         unsigned long n;
 294         struct kvm_memslots *slots;
 295         struct kvm_memory_slot *memslot;
 296         int is_dirty = 0;
 297
 298         mutex_lock(&kvm->slots_lock);
 299
 300         r = -EINVAL;
 301         if (log->slot >= KVM_USER_MEM_SLOTS)
 302                 goto out;
 303
 304         slots = kvm_memslots(kvm);
 305         memslot = id_to_memslot(slots, log->slot);
 306         r = -ENOENT;
 307         if (!memslot->dirty_bitmap)
 308                 goto out;
 309
 310         kvm_s390_sync_dirty_log(kvm, memslot);
 311         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 312         if (r)
 313                 goto out;
 314
 315         /* Clear the dirty log */
 316         if (is_dirty) {
 317                 n = kvm_dirty_bitmap_bytes(memslot);
 318                 memset(memslot->dirty_bitmap, 0, n);
 319         }
 320         r = 0;
 321 out:
 322         mutex_unlock(&kvm->slots_lock);
 323         return r;
 324 }
 325
 326 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 327 {
 328         int r;
 329
 330         if (cap->flags)
 331                 return -EINVAL;
 332
 333         switch (cap->cap) {
 334         case KVM_CAP_S390_IRQCHIP:
 335                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 336                 kvm->arch.use_irqchip = 1;
 337                 r = 0;
 338                 break;
 339         case KVM_CAP_S390_USER_SIGP:
 340                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 341                 kvm->arch.user_sigp = 1;
 342                 r = 0;
 343                 break;
 344         case KVM_CAP_S390_VECTOR_REGISTERS:
 345                 mutex_lock(&kvm->lock);
 346                 if (atomic_read(&kvm->online_vcpus)) {
 347                         r = -EBUSY;
 348                 } else if (MACHINE_HAS_VX) {
 349                         set_kvm_facility(kvm->arch.model.fac->mask, 129);
 350                         set_kvm_facility(kvm->arch.model.fac->list, 129);
 351                         r = 0;
 352                 } else
 353                         r = -EINVAL;
 354                 mutex_unlock(&kvm->lock);
 355                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 356                          r ? "(not available)" : "(success)");
 357                 break;
 358         case KVM_CAP_S390_USER_STSI:
 359                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 360                 kvm->arch.user_stsi = 1;
 361                 r = 0;
 362                 break;
 363         default:
 364                 r = -EINVAL;
 365                 break;
 366         }
 367         return r;
 368 }
 369
 370 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 371 {
 372         int ret;
 373
 374         switch (attr->attr) {
 375         case KVM_S390_VM_MEM_LIMIT_SIZE:
 376                 ret = 0;
 377                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 378                          kvm->arch.gmap->asce_end);
 379                 if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
 380                         ret = -EFAULT;
 381                 break;
 382         default:
 383                 ret = -ENXIO;
 384                 break;
 385         }
 386         return ret;
 387 }
 388
 389 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 390 {
 391         int ret;
 392         unsigned int idx;
 393         switch (attr->attr) {
 394         case KVM_S390_VM_MEM_ENABLE_CMMA:
 395                 /* enable CMMA only for z10 and later (EDAT_1) */
 396                 ret = -EINVAL;
 397                 if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
 398                         break;
 399
 400                 ret = -EBUSY;
 401                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 402                 mutex_lock(&kvm->lock);
 403                 if (atomic_read(&kvm->online_vcpus) == 0) {
 404                         kvm->arch.use_cmma = 1;
 405                         ret = 0;
 406                 }
 407                 mutex_unlock(&kvm->lock);
 408                 break;
 409         case KVM_S390_VM_MEM_CLR_CMMA:
 410                 ret = -EINVAL;
 411                 if (!kvm->arch.use_cmma)
 412                         break;
 413
 414                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 415                 mutex_lock(&kvm->lock);
 416                 idx = srcu_read_lock(&kvm->srcu);
 417                 s390_reset_cmma(kvm->arch.gmap->mm);
 418                 srcu_read_unlock(&kvm->srcu, idx);
 419                 mutex_unlock(&kvm->lock);
 420                 ret = 0;
 421                 break;
 422         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 423                 unsigned long new_limit;
 424
 425                 if (kvm_is_ucontrol(kvm))
 426                         return -EINVAL;
 427
 428                 if (get_user(new_limit, (u64 __user *)attr->addr))
 429                         return -EFAULT;
 430
 431                 if (new_limit > kvm->arch.gmap->asce_end)
 432                         return -E2BIG;
 433
 434                 ret = -EBUSY;
 435                 mutex_lock(&kvm->lock);
 436                 if (atomic_read(&kvm->online_vcpus) == 0) {
 437                         /* gmap_alloc will round the limit up */
 438                         struct gmap *new = gmap_alloc(current->mm, new_limit);
 439
 440                         if (!new) {
 441                                 ret = -ENOMEM;
 442                         } else {
 443                                 gmap_free(kvm->arch.gmap);
 444                                 new->private = kvm;
 445                                 kvm->arch.gmap = new;
 446                                 ret = 0;
 447                         }
 448                 }
 449                 mutex_unlock(&kvm->lock);
 450                 VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit);
 451                 break;
 452         }
 453         default:
 454                 ret = -ENXIO;
 455                 break;
 456         }
 457         return ret;
 458 }
 459
 460 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 461
 462 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 463 {
 464         struct kvm_vcpu *vcpu;
 465         int i;
 466
 467         if (!test_kvm_facility(kvm, 76))
 468                 return -EINVAL;
 469
 470         mutex_lock(&kvm->lock);
 471         switch (attr->attr) {
 472         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 473                 get_random_bytes(
 474                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 475                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 476                 kvm->arch.crypto.aes_kw = 1;
 477                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 478                 break;
 479         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 480                 get_random_bytes(
 481                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 482                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 483                 kvm->arch.crypto.dea_kw = 1;
 484                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 485                 break;
 486         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 487                 kvm->arch.crypto.aes_kw = 0;
 488                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 489                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 490                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 491                 break;
 492         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 493                 kvm->arch.crypto.dea_kw = 0;
 494                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 495                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 496                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 497                 break;
 498         default:
 499                 mutex_unlock(&kvm->lock);
 500                 return -ENXIO;
 501         }
 502
 503         kvm_for_each_vcpu(i, vcpu, kvm) {
 504                 kvm_s390_vcpu_crypto_setup(vcpu);
 505                 exit_sie(vcpu);
 506         }
 507         mutex_unlock(&kvm->lock);
 508         return 0;
 509 }
 510
 511 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 512 {
 513         u8 gtod_high;
 514
 515         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 516                                            sizeof(gtod_high)))
 517                 return -EFAULT;
 518
 519         if (gtod_high != 0)
 520                 return -EINVAL;
 521         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
 522
 523         return 0;
 524 }
 525
 526 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 527 {
 528         u64 gtod;
 529
 530         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 531                 return -EFAULT;
 532
 533         kvm_s390_set_tod_clock(kvm, gtod);
 534         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
 535         return 0;
 536 }
 537
 538 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 539 {
 540         int ret;
 541
 542         if (attr->flags)
 543                 return -EINVAL;
 544
 545         switch (attr->attr) {
 546         case KVM_S390_VM_TOD_HIGH:
 547                 ret = kvm_s390_set_tod_high(kvm, attr);
 548                 break;
 549         case KVM_S390_VM_TOD_LOW:
 550                 ret = kvm_s390_set_tod_low(kvm, attr);
 551                 break;
 552         default:
 553                 ret = -ENXIO;
 554                 break;
 555         }
 556         return ret;
 557 }
 558
 559 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 560 {
 561         u8 gtod_high = 0;
 562
 563         if (copy_to_user((void __user *)attr->addr, &gtod_high,
 564                                          sizeof(gtod_high)))
 565                 return -EFAULT;
 566         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
 567
 568         return 0;
 569 }
 570
 571 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 572 {
 573         u64 gtod;
 574
 575         gtod = kvm_s390_get_tod_clock_fast(kvm);
 576         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 577                 return -EFAULT;
 578         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
 579
 580         return 0;
 581 }
 582
 583 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 584 {
 585         int ret;
 586
 587         if (attr->flags)
 588                 return -EINVAL;
 589
 590         switch (attr->attr) {
 591         case KVM_S390_VM_TOD_HIGH:
 592                 ret = kvm_s390_get_tod_high(kvm, attr);
 593                 break;
 594         case KVM_S390_VM_TOD_LOW:
 595                 ret = kvm_s390_get_tod_low(kvm, attr);
 596                 break;
 597         default:
 598                 ret = -ENXIO;
 599                 break;
 600         }
 601         return ret;
 602 }
 603
 604 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 605 {
 606         struct kvm_s390_vm_cpu_processor *proc;
 607         int ret = 0;
 608
 609         mutex_lock(&kvm->lock);
 610         if (atomic_read(&kvm->online_vcpus)) {
 611                 ret = -EBUSY;
 612                 goto out;
 613         }
 614         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 615         if (!proc) {
 616                 ret = -ENOMEM;
 617                 goto out;
 618         }
 619         if (!copy_from_user(proc, (void __user *)attr->addr,
 620                             sizeof(*proc))) {
 621                 memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
 622                        sizeof(struct cpuid));
 623                 kvm->arch.model.ibc = proc->ibc;
 624                 memcpy(kvm->arch.model.fac->list, proc->fac_list,
 625                        S390_ARCH_FAC_LIST_SIZE_BYTE);
 626         } else
 627                 ret = -EFAULT;
 628         kfree(proc);
 629 out:
 630         mutex_unlock(&kvm->lock);
 631         return ret;
 632 }
 633
 634 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 635 {
 636         int ret = -ENXIO;
 637
 638         switch (attr->attr) {
 639         case KVM_S390_VM_CPU_PROCESSOR:
 640                 ret = kvm_s390_set_processor(kvm, attr);
 641                 break;
 642         }
 643         return ret;
 644 }
 645
 646 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 647 {
 648         struct kvm_s390_vm_cpu_processor *proc;
 649         int ret = 0;
 650
 651         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 652         if (!proc) {
 653                 ret = -ENOMEM;
 654                 goto out;
 655         }
 656         memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
 657         proc->ibc = kvm->arch.model.ibc;
 658         memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
 659         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
 660                 ret = -EFAULT;
 661         kfree(proc);
 662 out:
 663         return ret;
 664 }
 665
 666 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
 667 {
 668         struct kvm_s390_vm_cpu_machine *mach;
 669         int ret = 0;
 670
 671         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
 672         if (!mach) {
 673                 ret = -ENOMEM;
 674                 goto out;
 675         }
 676         get_cpu_id((struct cpuid *) &mach->cpuid);
 677         mach->ibc = sclp.ibc;
 678         memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
 679                S390_ARCH_FAC_LIST_SIZE_BYTE);
 680         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
 681                S390_ARCH_FAC_LIST_SIZE_BYTE);
 682         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
 683                 ret = -EFAULT;
 684         kfree(mach);
 685 out:
 686         return ret;
 687 }
 688
 689 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 690 {
 691         int ret = -ENXIO;
 692
 693         switch (attr->attr) {
 694         case KVM_S390_VM_CPU_PROCESSOR:
 695                 ret = kvm_s390_get_processor(kvm, attr);
 696                 break;
 697         case KVM_S390_VM_CPU_MACHINE:
 698                 ret = kvm_s390_get_machine(kvm, attr);
 699                 break;
 700         }
 701         return ret;
 702 }
 703
 704 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 705 {
 706         int ret;
 707
 708         switch (attr->group) {
 709         case KVM_S390_VM_MEM_CTRL:
 710                 ret = kvm_s390_set_mem_control(kvm, attr);
 711                 break;
 712         case KVM_S390_VM_TOD:
 713                 ret = kvm_s390_set_tod(kvm, attr);
 714                 break;
 715         case KVM_S390_VM_CPU_MODEL:
 716                 ret = kvm_s390_set_cpu_model(kvm, attr);
 717                 break;
 718         case KVM_S390_VM_CRYPTO:
 719                 ret = kvm_s390_vm_set_crypto(kvm, attr);
 720                 break;
 721         default:
 722                 ret = -ENXIO;
 723                 break;
 724         }
 725
 726         return ret;
 727 }
 728
 729 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 730 {
 731         int ret;
 732
 733         switch (attr->group) {
 734         case KVM_S390_VM_MEM_CTRL:
 735                 ret = kvm_s390_get_mem_control(kvm, attr);
 736                 break;
 737         case KVM_S390_VM_TOD:
 738                 ret = kvm_s390_get_tod(kvm, attr);
 739                 break;
 740         case KVM_S390_VM_CPU_MODEL:
 741                 ret = kvm_s390_get_cpu_model(kvm, attr);
 742                 break;
 743         default:
 744                 ret = -ENXIO;
 745                 break;
 746         }
 747
 748         return ret;
 749 }
 750
 751 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 752 {
 753         int ret;
 754
 755         switch (attr->group) {
 756         case KVM_S390_VM_MEM_CTRL:
 757                 switch (attr->attr) {
 758                 case KVM_S390_VM_MEM_ENABLE_CMMA:
 759                 case KVM_S390_VM_MEM_CLR_CMMA:
 760                 case KVM_S390_VM_MEM_LIMIT_SIZE:
 761                         ret = 0;
 762                         break;
 763                 default:
 764                         ret = -ENXIO;
 765                         break;
 766                 }
 767                 break;
 768         case KVM_S390_VM_TOD:
 769                 switch (attr->attr) {
 770                 case KVM_S390_VM_TOD_LOW:
 771                 case KVM_S390_VM_TOD_HIGH:
 772                         ret = 0;
 773                         break;
 774                 default:
 775                         ret = -ENXIO;
 776                         break;
 777                 }
 778                 break;
 779         case KVM_S390_VM_CPU_MODEL:
 780                 switch (attr->attr) {
 781                 case KVM_S390_VM_CPU_PROCESSOR:
 782                 case KVM_S390_VM_CPU_MACHINE:
 783                         ret = 0;
 784                         break;
 785                 default:
 786                         ret = -ENXIO;
 787                         break;
 788                 }
 789                 break;
 790         case KVM_S390_VM_CRYPTO:
 791                 switch (attr->attr) {
 792                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 793                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 794                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 795                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 796                         ret = 0;
 797                         break;
 798                 default:
 799                         ret = -ENXIO;
 800                         break;
 801                 }
 802                 break;
 803         default:
 804                 ret = -ENXIO;
 805                 break;
 806         }
 807
 808         return ret;
 809 }
 810
 811 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 812 {
 813         uint8_t *keys;
 814         uint64_t hva;
 815         unsigned long curkey;
 816         int i, r = 0;
 817
 818         if (args->flags != 0)
 819                 return -EINVAL;
 820
 821         /* Is this guest using storage keys? */
 822         if (!mm_use_skey(current->mm))
 823                 return KVM_S390_GET_SKEYS_NONE;
 824
 825         /* Enforce sane limit on memory allocation */
 826         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
 827                 return -EINVAL;
 828
 829         keys = kmalloc_array(args->count, sizeof(uint8_t),
 830                              GFP_KERNEL | __GFP_NOWARN);
 831         if (!keys)
 832                 keys = vmalloc(sizeof(uint8_t) * args->count);
 833         if (!keys)
 834                 return -ENOMEM;
 835
 836         for (i = 0; i < args->count; i++) {
 837                 hva = gfn_to_hva(kvm, args->start_gfn + i);
 838                 if (kvm_is_error_hva(hva)) {
 839                         r = -EFAULT;
 840                         goto out;
 841                 }
 842
 843                 curkey = get_guest_storage_key(current->mm, hva);
 844                 if (IS_ERR_VALUE(curkey)) {
 845                         r = curkey;
 846                         goto out;
 847                 }
 848                 keys[i] = curkey;
 849         }
 850
 851         r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
 852                          sizeof(uint8_t) * args->count);
 853         if (r)
 854                 r = -EFAULT;
 855 out:
 856         kvfree(keys);
 857         return r;
 858 }
 859
 860 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 861 {
 862         uint8_t *keys;
 863         uint64_t hva;
 864         int i, r = 0;
 865
 866         if (args->flags != 0)
 867                 return -EINVAL;
 868
 869         /* Enforce sane limit on memory allocation */
 870         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
 871                 return -EINVAL;
 872
 873         keys = kmalloc_array(args->count, sizeof(uint8_t),
 874                              GFP_KERNEL | __GFP_NOWARN);
 875         if (!keys)
 876                 keys = vmalloc(sizeof(uint8_t) * args->count);
 877         if (!keys)
 878                 return -ENOMEM;
 879
 880         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
 881                            sizeof(uint8_t) * args->count);
 882         if (r) {
 883                 r = -EFAULT;
 884                 goto out;
 885         }
 886
 887         /* Enable storage key handling for the guest */
 888         r = s390_enable_skey();
 889         if (r)
 890                 goto out;
 891
 892         for (i = 0; i < args->count; i++) {
 893                 hva = gfn_to_hva(kvm, args->start_gfn + i);
 894                 if (kvm_is_error_hva(hva)) {
 895                         r = -EFAULT;
 896                         goto out;
 897                 }
 898
 899                 /* Lowest order bit is reserved */
 900                 if (keys[i] & 0x01) {
 901                         r = -EINVAL;
 902                         goto out;
 903                 }
 904
 905                 r = set_guest_storage_key(current->mm, hva,
 906                                           (unsigned long)keys[i], 0);
 907                 if (r)
 908                         goto out;
 909         }
 910 out:
 911         kvfree(keys);
 912         return r;
 913 }
 914
 915 long kvm_arch_vm_ioctl(struct file *filp,
 916                        unsigned int ioctl, unsigned long arg)
 917 {
 918         struct kvm *kvm = filp->private_data;
 919         void __user *argp = (void __user *)arg;
 920         struct kvm_device_attr attr;
 921         int r;
 922
 923         switch (ioctl) {
 924         case KVM_S390_INTERRUPT: {
 925                 struct kvm_s390_interrupt s390int;
 926
 927                 r = -EFAULT;
 928                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
 929                         break;
 930                 r = kvm_s390_inject_vm(kvm, &s390int);
 931                 break;
 932         }
 933         case KVM_ENABLE_CAP: {
 934                 struct kvm_enable_cap cap;
 935                 r = -EFAULT;
 936                 if (copy_from_user(&cap, argp, sizeof(cap)))
 937                         break;
 938                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
 939                 break;
 940         }
 941         case KVM_CREATE_IRQCHIP: {
 942                 struct kvm_irq_routing_entry routing;
 943
 944                 r = -EINVAL;
 945                 if (kvm->arch.use_irqchip) {
 946                         /* Set up dummy routing. */
 947                         memset(&routing, 0, sizeof(routing));
 948                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
 949                 }
 950                 break;
 951         }
 952         case KVM_SET_DEVICE_ATTR: {
 953                 r = -EFAULT;
 954                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
 955                         break;
 956                 r = kvm_s390_vm_set_attr(kvm, &attr);
 957                 break;
 958         }
 959         case KVM_GET_DEVICE_ATTR: {
 960                 r = -EFAULT;
 961                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
 962                         break;
 963                 r = kvm_s390_vm_get_attr(kvm, &attr);
 964                 break;
 965         }
 966         case KVM_HAS_DEVICE_ATTR: {
 967                 r = -EFAULT;
 968                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
 969                         break;
 970                 r = kvm_s390_vm_has_attr(kvm, &attr);
 971                 break;
 972         }
 973         case KVM_S390_GET_SKEYS: {
 974                 struct kvm_s390_skeys args;
 975
 976                 r = -EFAULT;
 977                 if (copy_from_user(&args, argp,
 978                                    sizeof(struct kvm_s390_skeys)))
 979                         break;
 980                 r = kvm_s390_get_skeys(kvm, &args);
 981                 break;
 982         }
 983         case KVM_S390_SET_SKEYS: {
 984                 struct kvm_s390_skeys args;
 985
 986                 r = -EFAULT;
 987                 if (copy_from_user(&args, argp,
 988                                    sizeof(struct kvm_s390_skeys)))
 989                         break;
 990                 r = kvm_s390_set_skeys(kvm, &args);
 991                 break;
 992         }
 993         default:
 994                 r = -ENOTTY;
 995         }
 996
 997         return r;
 998 }
 999
1000 static int kvm_s390_query_ap_config(u8 *config)
1001 {
1002         u32 fcn_code = 0x04000000UL;
1003         u32 cc = 0;
1004
1005         memset(config, 0, 128);
1006         asm volatile(
1007                 "lgr 0,%1\n"
1008                 "lgr 2,%2\n"
1009                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1010                 "0: ipm %0\n"
1011                 "srl %0,28\n"
1012                 "1:\n"
1013                 EX_TABLE(0b, 1b)
1014                 : "+r" (cc)
1015                 : "r" (fcn_code), "r" (config)
1016                 : "cc", "0", "2", "memory"
1017         );
1018
1019         return cc;
1020 }
1021
1022 static int kvm_s390_apxa_installed(void)
1023 {
1024         u8 config[128];
1025         int cc;
1026
1027         if (test_facility(2) && test_facility(12)) {
1028                 cc = kvm_s390_query_ap_config(config);
1029
1030                 if (cc)
1031                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1032                 else
1033                         return config[0] & 0x40;
1034         }
1035
1036         return 0;
1037 }
1038
1039 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1040 {
1041         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1042
1043         if (kvm_s390_apxa_installed())
1044                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1045         else
1046                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1047 }
1048
1049 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
1050 {
1051         get_cpu_id(cpu_id);
1052         cpu_id->version = 0xff;
1053 }
1054
1055 static int kvm_s390_crypto_init(struct kvm *kvm)
1056 {
1057         if (!test_kvm_facility(kvm, 76))
1058                 return 0;
1059
1060         kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
1061                                          GFP_KERNEL | GFP_DMA);
1062         if (!kvm->arch.crypto.crycb)
1063                 return -ENOMEM;
1064
1065         kvm_s390_set_crycb_format(kvm);
1066
1067         /* Enable AES/DEA protected key functions by default */
1068         kvm->arch.crypto.aes_kw = 1;
1069         kvm->arch.crypto.dea_kw = 1;
1070         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1071                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1072         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1073                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1074
1075         return 0;
1076 }
1077
1078 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1079 {
1080         int i, rc;
1081         char debug_name[16];
1082         static unsigned long sca_offset;
1083
1084         rc = -EINVAL;
1085 #ifdef CONFIG_KVM_S390_UCONTROL
1086         if (type & ~KVM_VM_S390_UCONTROL)
1087                 goto out_err;
1088         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1089                 goto out_err;
1090 #else
1091         if (type)
1092                 goto out_err;
1093 #endif
1094
1095         rc = s390_enable_sie();
1096         if (rc)
1097                 goto out_err;
1098
1099         rc = -ENOMEM;
1100
1101         kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
1102         if (!kvm->arch.sca)
1103                 goto out_err;
1104         spin_lock(&kvm_lock);
1105         sca_offset += 16;
1106         if (sca_offset + sizeof(struct sca_block) > PAGE_SIZE)
1107                 sca_offset = 0;
1108         kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
1109         spin_unlock(&kvm_lock);
1110
1111         sprintf(debug_name, "kvm-%u", current->pid);
1112
1113         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1114         if (!kvm->arch.dbf)
1115                 goto out_err;
1116
1117         /*
1118          * The architectural maximum amount of facilities is 16 kbit. To store
1119          * this amount, 2 kbyte of memory is required. Thus we need a full
1120          * page to hold the guest facility list (arch.model.fac->list) and the
1121          * facility mask (arch.model.fac->mask). Its address size has to be
1122          * 31 bits and word aligned.
1123          */
1124         kvm->arch.model.fac =
1125                 (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1126         if (!kvm->arch.model.fac)
1127                 goto out_err;
1128
1129         /* Populate the facility mask initially. */
1130         memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
1131                S390_ARCH_FAC_LIST_SIZE_BYTE);
1132         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1133                 if (i < kvm_s390_fac_list_mask_size())
1134                         kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
1135                 else
1136                         kvm->arch.model.fac->mask[i] = 0UL;
1137         }
1138
1139         /* Populate the facility list initially. */
1140         memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
1141                S390_ARCH_FAC_LIST_SIZE_BYTE);
1142
1143         kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1144         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1145
1146         if (kvm_s390_crypto_init(kvm) < 0)
1147                 goto out_err;
1148
1149         spin_lock_init(&kvm->arch.float_int.lock);
1150         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1151                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1152         init_waitqueue_head(&kvm->arch.ipte_wq);
1153         mutex_init(&kvm->arch.ipte_mutex);
1154
1155         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1156         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1157
1158         if (type & KVM_VM_S390_UCONTROL) {
1159                 kvm->arch.gmap = NULL;
1160         } else {
1161                 kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
1162                 if (!kvm->arch.gmap)
1163                         goto out_err;
1164                 kvm->arch.gmap->private = kvm;
1165                 kvm->arch.gmap->pfault_enabled = 0;
1166         }
1167
1168         kvm->arch.css_support = 0;
1169         kvm->arch.use_irqchip = 0;
1170         kvm->arch.epoch = 0;
1171
1172         spin_lock_init(&kvm->arch.start_stop_lock);
1173         KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
1174
1175         return 0;
1176 out_err:
1177         kfree(kvm->arch.crypto.crycb);
1178         free_page((unsigned long)kvm->arch.model.fac);
1179         debug_unregister(kvm->arch.dbf);
1180         free_page((unsigned long)(kvm->arch.sca));
1181         KVM_EVENT(3, "creation of vm failed: %d", rc);
1182         return rc;
1183 }
1184
1185 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1186 {
1187         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1188         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1189         kvm_s390_clear_local_irqs(vcpu);
1190         kvm_clear_async_pf_completion_queue(vcpu);
1191         if (!kvm_is_ucontrol(vcpu->kvm)) {
1192                 clear_bit(63 - vcpu->vcpu_id,
1193                           (unsigned long *) &vcpu->kvm->arch.sca->mcn);
1194                 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
1195                     (__u64) vcpu->arch.sie_block)
1196                         vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
1197         }
1198         smp_mb();
1199
1200         if (kvm_is_ucontrol(vcpu->kvm))
1201                 gmap_free(vcpu->arch.gmap);
1202
1203         if (vcpu->kvm->arch.use_cmma)
1204                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1205         free_page((unsigned long)(vcpu->arch.sie_block));
1206
1207         kvm_vcpu_uninit(vcpu);
1208         kmem_cache_free(kvm_vcpu_cache, vcpu);
1209 }
1210
1211 static void kvm_free_vcpus(struct kvm *kvm)
1212 {
1213         unsigned int i;
1214         struct kvm_vcpu *vcpu;
1215
1216         kvm_for_each_vcpu(i, vcpu, kvm)
1217                 kvm_arch_vcpu_destroy(vcpu);
1218
1219         mutex_lock(&kvm->lock);
1220         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1221                 kvm->vcpus[i] = NULL;
1222
1223         atomic_set(&kvm->online_vcpus, 0);
1224         mutex_unlock(&kvm->lock);
1225 }
1226
1227 void kvm_arch_destroy_vm(struct kvm *kvm)
1228 {
1229         kvm_free_vcpus(kvm);
1230         free_page((unsigned long)kvm->arch.model.fac);
1231         free_page((unsigned long)(kvm->arch.sca));
1232         debug_unregister(kvm->arch.dbf);
1233         kfree(kvm->arch.crypto.crycb);
1234         if (!kvm_is_ucontrol(kvm))
1235                 gmap_free(kvm->arch.gmap);
1236         kvm_s390_destroy_adapters(kvm);
1237         kvm_s390_clear_float_irqs(kvm);
1238         KVM_EVENT(3, "vm 0x%p destroyed", kvm);
1239 }
1240
1241 /* Section: vcpu related */
1242 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1243 {
1244         vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1245         if (!vcpu->arch.gmap)
1246                 return -ENOMEM;
1247         vcpu->arch.gmap->private = vcpu->kvm;
1248
1249         return 0;
1250 }
1251
1252 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1253 {
1254         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1255         kvm_clear_async_pf_completion_queue(vcpu);
1256         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1257                                     KVM_SYNC_GPRS |
1258                                     KVM_SYNC_ACRS |
1259                                     KVM_SYNC_CRS |
1260                                     KVM_SYNC_ARCH0 |
1261                                     KVM_SYNC_PFAULT;
1262         if (test_kvm_facility(vcpu->kvm, 129))
1263                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1264
1265         if (kvm_is_ucontrol(vcpu->kvm))
1266                 return __kvm_ucontrol_vcpu_init(vcpu);
1267
1268         return 0;
1269 }
1270
1271 /*
1272  * Backs up the current FP/VX register save area on a particular
1273  * destination.  Used to switch between different register save
1274  * areas.
1275  */
1276 static inline void save_fpu_to(struct fpu *dst)
1277 {
1278         dst->fpc = current->thread.fpu.fpc;
1279         dst->regs = current->thread.fpu.regs;
1280 }
1281
1282 /*
1283  * Switches the FP/VX register save area from which to lazy
1284  * restore register contents.
1285  */
1286 static inline void load_fpu_from(struct fpu *from)
1287 {
1288         current->thread.fpu.fpc = from->fpc;
1289         current->thread.fpu.regs = from->regs;
1290 }
1291
1292 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1293 {
1294         /* Save host register state */
1295         save_fpu_regs();
1296         save_fpu_to(&vcpu->arch.host_fpregs);
1297
1298         if (test_kvm_facility(vcpu->kvm, 129)) {
1299                 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1300                 /*
1301                  * Use the register save area in the SIE-control block
1302                  * for register restore and save in kvm_arch_vcpu_put()
1303                  */
1304                 current->thread.fpu.vxrs =
1305                         (__vector128 *)&vcpu->run->s.regs.vrs;
1306         } else
1307                 load_fpu_from(&vcpu->arch.guest_fpregs);
1308
1309         if (test_fp_ctl(current->thread.fpu.fpc))
1310                 /* User space provided an invalid FPC, let's clear it */
1311                 current->thread.fpu.fpc = 0;
1312
1313         save_access_regs(vcpu->arch.host_acrs);
1314         restore_access_regs(vcpu->run->s.regs.acrs);
1315         gmap_enable(vcpu->arch.gmap);
1316         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1317 }
1318
1319 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1320 {
1321         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1322         gmap_disable(vcpu->arch.gmap);
1323
1324         save_fpu_regs();
1325
1326         if (test_kvm_facility(vcpu->kvm, 129))
1327                 /*
1328                  * kvm_arch_vcpu_load() set up the register save area to
1329                  * the &vcpu->run->s.regs.vrs and, thus, the vector registers
1330                  * are already saved.  Only the floating-point control must be
1331                  * copied.
1332                  */
1333                 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1334         else
1335                 save_fpu_to(&vcpu->arch.guest_fpregs);
1336         load_fpu_from(&vcpu->arch.host_fpregs);
1337
1338         save_access_regs(vcpu->run->s.regs.acrs);
1339         restore_access_regs(vcpu->arch.host_acrs);
1340 }
1341
1342 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1343 {
1344         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1345         vcpu->arch.sie_block->gpsw.mask = 0UL;
1346         vcpu->arch.sie_block->gpsw.addr = 0UL;
1347         kvm_s390_set_prefix(vcpu, 0);
1348         vcpu->arch.sie_block->cputm     = 0UL;
1349         vcpu->arch.sie_block->ckc       = 0UL;
1350         vcpu->arch.sie_block->todpr     = 0;
1351         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1352         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1353         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1354         vcpu->arch.guest_fpregs.fpc = 0;
1355         asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
1356         vcpu->arch.sie_block->gbea = 1;
1357         vcpu->arch.sie_block->pp = 0;
1358         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1359         kvm_clear_async_pf_completion_queue(vcpu);
1360         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1361                 kvm_s390_vcpu_stop(vcpu);
1362         kvm_s390_clear_local_irqs(vcpu);
1363 }
1364
1365 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1366 {
1367         mutex_lock(&vcpu->kvm->lock);
1368         preempt_disable();
1369         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1370         preempt_enable();
1371         mutex_unlock(&vcpu->kvm->lock);
1372         if (!kvm_is_ucontrol(vcpu->kvm))
1373                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1374 }
1375
1376 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1377 {
1378         if (!test_kvm_facility(vcpu->kvm, 76))
1379                 return;
1380
1381         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1382
1383         if (vcpu->kvm->arch.crypto.aes_kw)
1384                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1385         if (vcpu->kvm->arch.crypto.dea_kw)
1386                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1387
1388         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1389 }
1390
1391 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1392 {
1393         free_page(vcpu->arch.sie_block->cbrlo);
1394         vcpu->arch.sie_block->cbrlo = 0;
1395 }
1396
1397 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1398 {
1399         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1400         if (!vcpu->arch.sie_block->cbrlo)
1401                 return -ENOMEM;
1402
1403         vcpu->arch.sie_block->ecb2 |= 0x80;
1404         vcpu->arch.sie_block->ecb2 &= ~0x08;
1405         return 0;
1406 }
1407
1408 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1409 {
1410         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1411
1412         vcpu->arch.cpu_id = model->cpu_id;
1413         vcpu->arch.sie_block->ibc = model->ibc;
1414         vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
1415 }
1416
1417 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1418 {
1419         int rc = 0;
1420
1421         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1422                                                     CPUSTAT_SM |
1423                                                     CPUSTAT_STOPPED);
1424
1425         if (test_kvm_facility(vcpu->kvm, 78))
1426                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1427         else if (test_kvm_facility(vcpu->kvm, 8))
1428                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1429
1430         kvm_s390_vcpu_setup_model(vcpu);
1431
1432         vcpu->arch.sie_block->ecb   = 6;
1433         if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1434                 vcpu->arch.sie_block->ecb |= 0x10;
1435
1436         vcpu->arch.sie_block->ecb2  = 8;
1437         vcpu->arch.sie_block->eca   = 0xC1002000U;
1438         if (sclp.has_siif)
1439                 vcpu->arch.sie_block->eca |= 1;
1440         if (sclp.has_sigpif)
1441                 vcpu->arch.sie_block->eca |= 0x10000000U;
1442         if (test_kvm_facility(vcpu->kvm, 129)) {
1443                 vcpu->arch.sie_block->eca |= 0x00020000;
1444                 vcpu->arch.sie_block->ecd |= 0x20000000;
1445         }
1446         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1447
1448         if (vcpu->kvm->arch.use_cmma) {
1449                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1450                 if (rc)
1451                         return rc;
1452         }
1453         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1454         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1455
1456         kvm_s390_vcpu_crypto_setup(vcpu);
1457
1458         return rc;
1459 }
1460
1461 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1462                                       unsigned int id)
1463 {
1464         struct kvm_vcpu *vcpu;
1465         struct sie_page *sie_page;
1466         int rc = -EINVAL;
1467
1468         if (id >= KVM_MAX_VCPUS)
1469                 goto out;
1470
1471         rc = -ENOMEM;
1472
1473         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1474         if (!vcpu)
1475                 goto out;
1476
1477         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1478         if (!sie_page)
1479                 goto out_free_cpu;
1480
1481         vcpu->arch.sie_block = &sie_page->sie_block;
1482         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1483
1484         vcpu->arch.sie_block->icpua = id;
1485         if (!kvm_is_ucontrol(kvm)) {
1486                 if (!kvm->arch.sca) {
1487                         WARN_ON_ONCE(1);
1488                         goto out_free_cpu;
1489                 }
1490                 if (!kvm->arch.sca->cpu[id].sda)
1491                         kvm->arch.sca->cpu[id].sda =
1492                                 (__u64) vcpu->arch.sie_block;
1493                 vcpu->arch.sie_block->scaoh =
1494                         (__u32)(((__u64)kvm->arch.sca) >> 32);
1495                 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
1496                 set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
1497         }
1498
1499         spin_lock_init(&vcpu->arch.local_int.lock);
1500         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1501         vcpu->arch.local_int.wq = &vcpu->wq;
1502         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1503
1504         /*
1505          * Allocate a save area for floating-point registers.  If the vector
1506          * extension is available, register contents are saved in the SIE
1507          * control block.  The allocated save area is still required in
1508          * particular places, for example, in kvm_s390_vcpu_store_status().
1509          */
1510         vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
1511                                                GFP_KERNEL);
1512         if (!vcpu->arch.guest_fpregs.fprs) {
1513                 rc = -ENOMEM;
1514                 goto out_free_sie_block;
1515         }
1516
1517         rc = kvm_vcpu_init(vcpu, kvm, id);
1518         if (rc)
1519                 goto out_free_sie_block;
1520         VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
1521                  vcpu->arch.sie_block);
1522         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1523
1524         return vcpu;
1525 out_free_sie_block:
1526         free_page((unsigned long)(vcpu->arch.sie_block));
1527 out_free_cpu:
1528         kmem_cache_free(kvm_vcpu_cache, vcpu);
1529 out:
1530         return ERR_PTR(rc);
1531 }
1532
1533 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1534 {
1535         return kvm_s390_vcpu_has_irq(vcpu, 0);
1536 }
1537
1538 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1539 {
1540         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1541         exit_sie(vcpu);
1542 }
1543
1544 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1545 {
1546         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1547 }
1548
1549 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1550 {
1551         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1552         exit_sie(vcpu);
1553 }
1554
1555 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1556 {
1557         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1558 }
1559
1560 /*
1561  * Kick a guest cpu out of SIE and wait until SIE is not running.
1562  * If the CPU is not running (e.g. waiting as idle) the function will
1563  * return immediately. */
1564 void exit_sie(struct kvm_vcpu *vcpu)
1565 {
1566         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1567         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1568                 cpu_relax();
1569 }
1570
1571 /* Kick a guest cpu out of SIE to process a request synchronously */
1572 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1573 {
1574         kvm_make_request(req, vcpu);
1575         kvm_s390_vcpu_request(vcpu);
1576 }
1577
1578 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1579 {
1580         int i;
1581         struct kvm *kvm = gmap->private;
1582         struct kvm_vcpu *vcpu;
1583
1584         kvm_for_each_vcpu(i, vcpu, kvm) {
1585                 /* match against both prefix pages */
1586                 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1587                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1588                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1589                 }
1590         }
1591 }
1592
1593 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1594 {
1595         /* kvm common code refers to this, but never calls it */
1596         BUG();
1597         return 0;
1598 }
1599
1600 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1601                                            struct kvm_one_reg *reg)
1602 {
1603         int r = -EINVAL;
1604
1605         switch (reg->id) {
1606         case KVM_REG_S390_TODPR:
1607                 r = put_user(vcpu->arch.sie_block->todpr,
1608                              (u32 __user *)reg->addr);
1609                 break;
1610         case KVM_REG_S390_EPOCHDIFF:
1611                 r = put_user(vcpu->arch.sie_block->epoch,
1612                              (u64 __user *)reg->addr);
1613                 break;
1614         case KVM_REG_S390_CPU_TIMER:
1615                 r = put_user(vcpu->arch.sie_block->cputm,
1616                              (u64 __user *)reg->addr);
1617                 break;
1618         case KVM_REG_S390_CLOCK_COMP:
1619                 r = put_user(vcpu->arch.sie_block->ckc,
1620                              (u64 __user *)reg->addr);
1621                 break;
1622         case KVM_REG_S390_PFTOKEN:
1623                 r = put_user(vcpu->arch.pfault_token,
1624                              (u64 __user *)reg->addr);
1625                 break;
1626         case KVM_REG_S390_PFCOMPARE:
1627                 r = put_user(vcpu->arch.pfault_compare,
1628                              (u64 __user *)reg->addr);
1629                 break;
1630         case KVM_REG_S390_PFSELECT:
1631                 r = put_user(vcpu->arch.pfault_select,
1632                              (u64 __user *)reg->addr);
1633                 break;
1634         case KVM_REG_S390_PP:
1635                 r = put_user(vcpu->arch.sie_block->pp,
1636                              (u64 __user *)reg->addr);
1637                 break;
1638         case KVM_REG_S390_GBEA:
1639                 r = put_user(vcpu->arch.sie_block->gbea,
1640                              (u64 __user *)reg->addr);
1641                 break;
1642         default:
1643                 break;
1644         }
1645
1646         return r;
1647 }
1648
1649 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1650                                            struct kvm_one_reg *reg)
1651 {
1652         int r = -EINVAL;
1653
1654         switch (reg->id) {
1655         case KVM_REG_S390_TODPR:
1656                 r = get_user(vcpu->arch.sie_block->todpr,
1657                              (u32 __user *)reg->addr);
1658                 break;
1659         case KVM_REG_S390_EPOCHDIFF:
1660                 r = get_user(vcpu->arch.sie_block->epoch,
1661                              (u64 __user *)reg->addr);
1662                 break;
1663         case KVM_REG_S390_CPU_TIMER:
1664                 r = get_user(vcpu->arch.sie_block->cputm,
1665                              (u64 __user *)reg->addr);
1666                 break;
1667         case KVM_REG_S390_CLOCK_COMP:
1668                 r = get_user(vcpu->arch.sie_block->ckc,
1669                              (u64 __user *)reg->addr);
1670                 break;
1671         case KVM_REG_S390_PFTOKEN:
1672                 r = get_user(vcpu->arch.pfault_token,
1673                              (u64 __user *)reg->addr);
1674                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1675                         kvm_clear_async_pf_completion_queue(vcpu);
1676                 break;
1677         case KVM_REG_S390_PFCOMPARE:
1678                 r = get_user(vcpu->arch.pfault_compare,
1679                              (u64 __user *)reg->addr);
1680                 break;
1681         case KVM_REG_S390_PFSELECT:
1682                 r = get_user(vcpu->arch.pfault_select,
1683                              (u64 __user *)reg->addr);
1684                 break;
1685         case KVM_REG_S390_PP:
1686                 r = get_user(vcpu->arch.sie_block->pp,
1687                              (u64 __user *)reg->addr);
1688                 break;
1689         case KVM_REG_S390_GBEA:
1690                 r = get_user(vcpu->arch.sie_block->gbea,
1691                              (u64 __user *)reg->addr);
1692                 break;
1693         default:
1694                 break;
1695         }
1696
1697         return r;
1698 }
1699
1700 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1701 {
1702         kvm_s390_vcpu_initial_reset(vcpu);
1703         return 0;
1704 }
1705
1706 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1707 {
1708         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
1709         return 0;
1710 }
1711
1712 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1713 {
1714         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1715         return 0;
1716 }
1717
1718 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1719                                   struct kvm_sregs *sregs)
1720 {
1721         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1722         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1723         restore_access_regs(vcpu->run->s.regs.acrs);
1724         return 0;
1725 }
1726
1727 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1728                                   struct kvm_sregs *sregs)
1729 {
1730         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1731         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1732         return 0;
1733 }
1734
1735 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1736 {
1737         if (test_fp_ctl(fpu->fpc))
1738                 return -EINVAL;
1739         memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
1740         vcpu->arch.guest_fpregs.fpc = fpu->fpc;
1741         save_fpu_regs();
1742         load_fpu_from(&vcpu->arch.guest_fpregs);
1743         return 0;
1744 }
1745
1746 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1747 {
1748         memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
1749         fpu->fpc = vcpu->arch.guest_fpregs.fpc;
1750         return 0;
1751 }
1752
1753 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1754 {
1755         int rc = 0;
1756
1757         if (!is_vcpu_stopped(vcpu))
1758                 rc = -EBUSY;
1759         else {
1760                 vcpu->run->psw_mask = psw.mask;
1761                 vcpu->run->psw_addr = psw.addr;
1762         }
1763         return rc;
1764 }
1765
1766 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1767                                   struct kvm_translation *tr)
1768 {
1769         return -EINVAL; /* not implemented yet */
1770 }
1771
1772 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1773                               KVM_GUESTDBG_USE_HW_BP | \
1774                               KVM_GUESTDBG_ENABLE)
1775
1776 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1777                                         struct kvm_guest_debug *dbg)
1778 {
1779         int rc = 0;
1780
1781         vcpu->guest_debug = 0;
1782         kvm_s390_clear_bp_data(vcpu);
1783
1784         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1785                 return -EINVAL;
1786
1787         if (dbg->control & KVM_GUESTDBG_ENABLE) {
1788                 vcpu->guest_debug = dbg->control;
1789                 /* enforce guest PER */
1790                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1791
1792                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1793                         rc = kvm_s390_import_bp_data(vcpu, dbg);
1794         } else {
1795                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1796                 vcpu->arch.guestdbg.last_bp = 0;
1797         }
1798
1799         if (rc) {
1800                 vcpu->guest_debug = 0;
1801                 kvm_s390_clear_bp_data(vcpu);
1802                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1803         }
1804
1805         return rc;
1806 }
1807
1808 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1809                                     struct kvm_mp_state *mp_state)
1810 {
1811         /* CHECK_STOP and LOAD are not supported yet */
1812         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1813                                        KVM_MP_STATE_OPERATING;
1814 }
1815
1816 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1817                                     struct kvm_mp_state *mp_state)
1818 {
1819         int rc = 0;
1820
1821         /* user space knows about this interface - let it control the state */
1822         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1823
1824         switch (mp_state->mp_state) {
1825         case KVM_MP_STATE_STOPPED:
1826                 kvm_s390_vcpu_stop(vcpu);
1827                 break;
1828         case KVM_MP_STATE_OPERATING:
1829                 kvm_s390_vcpu_start(vcpu);
1830                 break;
1831         case KVM_MP_STATE_LOAD:
1832         case KVM_MP_STATE_CHECK_STOP:
1833                 /* fall through - CHECK_STOP and LOAD are not supported yet */
1834         default:
1835                 rc = -ENXIO;
1836         }
1837
1838         return rc;
1839 }
1840
1841 static bool ibs_enabled(struct kvm_vcpu *vcpu)
1842 {
1843         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1844 }
1845
1846 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1847 {
1848 retry:
1849         kvm_s390_vcpu_request_handled(vcpu);
1850         if (!vcpu->requests)
1851                 return 0;
1852         /*
1853          * We use MMU_RELOAD just to re-arm the ipte notifier for the
1854          * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1855          * This ensures that the ipte instruction for this request has
1856          * already finished. We might race against a second unmapper that
1857          * wants to set the blocking bit. Lets just retry the request loop.
1858          */
1859         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1860                 int rc;
1861                 rc = gmap_ipte_notify(vcpu->arch.gmap,
1862                                       kvm_s390_get_prefix(vcpu),
1863                                       PAGE_SIZE * 2);
1864                 if (rc)
1865                         return rc;
1866                 goto retry;
1867         }
1868
1869         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1870                 vcpu->arch.sie_block->ihcpu = 0xffff;
1871                 goto retry;
1872         }
1873
1874         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1875                 if (!ibs_enabled(vcpu)) {
1876                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
1877                         atomic_or(CPUSTAT_IBS,
1878                                         &vcpu->arch.sie_block->cpuflags);
1879                 }
1880                 goto retry;
1881         }
1882
1883         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
1884                 if (ibs_enabled(vcpu)) {
1885                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
1886                         atomic_andnot(CPUSTAT_IBS,
1887                                           &vcpu->arch.sie_block->cpuflags);
1888                 }
1889                 goto retry;
1890         }
1891
1892         /* nothing to do, just clear the request */
1893         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
1894
1895         return 0;
1896 }
1897
1898 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
1899 {
1900         struct kvm_vcpu *vcpu;
1901         int i;
1902
1903         mutex_lock(&kvm->lock);
1904         preempt_disable();
1905         kvm->arch.epoch = tod - get_tod_clock();
1906         kvm_s390_vcpu_block_all(kvm);
1907         kvm_for_each_vcpu(i, vcpu, kvm)
1908                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
1909         kvm_s390_vcpu_unblock_all(kvm);
1910         preempt_enable();
1911         mutex_unlock(&kvm->lock);
1912 }
1913
1914 /**
1915  * kvm_arch_fault_in_page - fault-in guest page if necessary
1916  * @vcpu: The corresponding virtual cpu
1917  * @gpa: Guest physical address
1918  * @writable: Whether the page should be writable or not
1919  *
1920  * Make sure that a guest page has been faulted-in on the host.
1921  *
1922  * Return: Zero on success, negative error code otherwise.
1923  */
1924 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
1925 {
1926         return gmap_fault(vcpu->arch.gmap, gpa,
1927                           writable ? FAULT_FLAG_WRITE : 0);
1928 }
1929
1930 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
1931                                       unsigned long token)
1932 {
1933         struct kvm_s390_interrupt inti;
1934         struct kvm_s390_irq irq;
1935
1936         if (start_token) {
1937                 irq.u.ext.ext_params2 = token;
1938                 irq.type = KVM_S390_INT_PFAULT_INIT;
1939                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
1940         } else {
1941                 inti.type = KVM_S390_INT_PFAULT_DONE;
1942                 inti.parm64 = token;
1943                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
1944         }
1945 }
1946
1947 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
1948                                      struct kvm_async_pf *work)
1949 {
1950         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
1951         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
1952 }
1953
1954 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
1955                                  struct kvm_async_pf *work)
1956 {
1957         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
1958         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
1959 }
1960
1961 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
1962                                struct kvm_async_pf *work)
1963 {
1964         /* s390 will always inject the page directly */
1965 }
1966
1967 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
1968 {
1969         /*
1970          * s390 will always inject the page directly,
1971          * but we still want check_async_completion to cleanup
1972          */
1973         return true;
1974 }
1975
1976 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
1977 {
1978         hva_t hva;
1979         struct kvm_arch_async_pf arch;
1980         int rc;
1981
1982         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1983                 return 0;
1984         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
1985             vcpu->arch.pfault_compare)
1986                 return 0;
1987         if (psw_extint_disabled(vcpu))
1988                 return 0;
1989         if (kvm_s390_vcpu_has_irq(vcpu, 0))
1990                 return 0;
1991         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
1992                 return 0;
1993         if (!vcpu->arch.gmap->pfault_enabled)
1994                 return 0;
1995
1996         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
1997         hva += current->thread.gmap_addr & ~PAGE_MASK;
1998         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
1999                 return 0;
2000
2001         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2002         return rc;
2003 }
2004
2005 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2006 {
2007         int rc, cpuflags;
2008
2009         /*
2010          * On s390 notifications for arriving pages will be delivered directly
2011          * to the guest but the house keeping for completed pfaults is
2012          * handled outside the worker.
2013          */
2014         kvm_check_async_pf_completion(vcpu);
2015
2016         memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
2017
2018         if (need_resched())
2019                 schedule();
2020
2021         if (test_cpu_flag(CIF_MCCK_PENDING))
2022                 s390_handle_mcck();
2023
2024         if (!kvm_is_ucontrol(vcpu->kvm)) {
2025                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2026                 if (rc)
2027                         return rc;
2028         }
2029
2030         rc = kvm_s390_handle_requests(vcpu);
2031         if (rc)
2032                 return rc;
2033
2034         if (guestdbg_enabled(vcpu)) {
2035                 kvm_s390_backup_guest_per_regs(vcpu);
2036                 kvm_s390_patch_guest_per_regs(vcpu);
2037         }
2038
2039         vcpu->arch.sie_block->icptcode = 0;
2040         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2041         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2042         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2043
2044         return 0;
2045 }
2046
2047 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2048 {
2049         psw_t *psw = &vcpu->arch.sie_block->gpsw;
2050         u8 opcode;
2051         int rc;
2052
2053         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2054         trace_kvm_s390_sie_fault(vcpu);
2055
2056         /*
2057          * We want to inject an addressing exception, which is defined as a
2058          * suppressing or terminating exception. However, since we came here
2059          * by a DAT access exception, the PSW still points to the faulting
2060          * instruction since DAT exceptions are nullifying. So we've got
2061          * to look up the current opcode to get the length of the instruction
2062          * to be able to forward the PSW.
2063          */
2064         rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
2065         if (rc)
2066                 return kvm_s390_inject_prog_cond(vcpu, rc);
2067         psw->addr = __rewind_psw(*psw, -insn_length(opcode));
2068
2069         return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
2070 }
2071
2072 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2073 {
2074         int rc = -1;
2075
2076         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2077                    vcpu->arch.sie_block->icptcode);
2078         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2079
2080         if (guestdbg_enabled(vcpu))
2081                 kvm_s390_restore_guest_per_regs(vcpu);
2082
2083         if (exit_reason >= 0) {
2084                 rc = 0;
2085         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2086                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2087                 vcpu->run->s390_ucontrol.trans_exc_code =
2088                                                 current->thread.gmap_addr;
2089                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2090                 rc = -EREMOTE;
2091
2092         } else if (current->thread.gmap_pfault) {
2093                 trace_kvm_s390_major_guest_pfault(vcpu);
2094                 current->thread.gmap_pfault = 0;
2095                 if (kvm_arch_setup_async_pf(vcpu)) {
2096                         rc = 0;
2097                 } else {
2098                         gpa_t gpa = current->thread.gmap_addr;
2099                         rc = kvm_arch_fault_in_page(vcpu, gpa, 1);
2100                 }
2101         }
2102
2103         if (rc == -1)
2104                 rc = vcpu_post_run_fault_in_sie(vcpu);
2105
2106         memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
2107
2108         if (rc == 0) {
2109                 if (kvm_is_ucontrol(vcpu->kvm))
2110                         /* Don't exit for host interrupts. */
2111                         rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
2112                 else
2113                         rc = kvm_handle_sie_intercept(vcpu);
2114         }
2115
2116         return rc;
2117 }
2118
2119 static int __vcpu_run(struct kvm_vcpu *vcpu)
2120 {
2121         int rc, exit_reason;
2122
2123         /*
2124          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2125          * ning the guest), so that memslots (and other stuff) are protected
2126          */
2127         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2128
2129         do {
2130                 rc = vcpu_pre_run(vcpu);
2131                 if (rc)
2132                         break;
2133
2134                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2135                 /*
2136                  * As PF_VCPU will be used in fault handler, between
2137                  * guest_enter and guest_exit should be no uaccess.
2138                  */
2139                 local_irq_disable();
2140                 __kvm_guest_enter();
2141                 local_irq_enable();
2142                 exit_reason = sie64a(vcpu->arch.sie_block,
2143                                      vcpu->run->s.regs.gprs);
2144                 local_irq_disable();
2145                 __kvm_guest_exit();
2146                 local_irq_enable();
2147                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2148
2149                 rc = vcpu_post_run(vcpu, exit_reason);
2150         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2151
2152         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2153         return rc;
2154 }
2155
2156 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2157 {
2158         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2159         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2160         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2161                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2162         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2163                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2164                 /* some control register changes require a tlb flush */
2165                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2166         }
2167         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2168                 vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
2169                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2170                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2171                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2172                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2173         }
2174         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2175                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2176                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2177                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2178                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2179                         kvm_clear_async_pf_completion_queue(vcpu);
2180         }
2181         kvm_run->kvm_dirty_regs = 0;
2182 }
2183
2184 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2185 {
2186         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2187         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2188         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2189         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2190         kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
2191         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2192         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2193         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2194         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2195         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2196         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2197         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2198 }
2199
2200 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2201 {
2202         int rc;
2203         sigset_t sigsaved;
2204
2205         if (guestdbg_exit_pending(vcpu)) {
2206                 kvm_s390_prepare_debug_exit(vcpu);
2207                 return 0;
2208         }
2209
2210         if (vcpu->sigset_active)
2211                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2212
2213         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2214                 kvm_s390_vcpu_start(vcpu);
2215         } else if (is_vcpu_stopped(vcpu)) {
2216                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2217                                    vcpu->vcpu_id);
2218                 return -EINVAL;
2219         }
2220
2221         sync_regs(vcpu, kvm_run);
2222
2223         might_fault();
2224         rc = __vcpu_run(vcpu);
2225
2226         if (signal_pending(current) && !rc) {
2227                 kvm_run->exit_reason = KVM_EXIT_INTR;
2228                 rc = -EINTR;
2229         }
2230
2231         if (guestdbg_exit_pending(vcpu) && !rc)  {
2232                 kvm_s390_prepare_debug_exit(vcpu);
2233                 rc = 0;
2234         }
2235
2236         if (rc == -EOPNOTSUPP) {
2237                 /* intercept cannot be handled in-kernel, prepare kvm-run */
2238                 kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
2239                 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2240                 kvm_run->s390_sieic.ipa      = vcpu->arch.sie_block->ipa;
2241                 kvm_run->s390_sieic.ipb      = vcpu->arch.sie_block->ipb;
2242                 rc = 0;
2243         }
2244
2245         if (rc == -EREMOTE) {
2246                 /* intercept was handled, but userspace support is needed
2247                  * kvm_run has been prepared by the handler */
2248                 rc = 0;
2249         }
2250
2251         store_regs(vcpu, kvm_run);
2252
2253         if (vcpu->sigset_active)
2254                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2255
2256         vcpu->stat.exit_userspace++;
2257         return rc;
2258 }
2259
2260 /*
2261  * store status at address
2262  * we use have two special cases:
2263  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2264  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2265  */
2266 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2267 {
2268         unsigned char archmode = 1;
2269         unsigned int px;
2270         u64 clkcomp;
2271         int rc;
2272
2273         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2274                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2275                         return -EFAULT;
2276                 gpa = SAVE_AREA_BASE;
2277         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2278                 if (write_guest_real(vcpu, 163, &archmode, 1))
2279                         return -EFAULT;
2280                 gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
2281         }
2282         rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
2283                              vcpu->arch.guest_fpregs.fprs, 128);
2284         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
2285                               vcpu->run->s.regs.gprs, 128);
2286         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
2287                               &vcpu->arch.sie_block->gpsw, 16);
2288         px = kvm_s390_get_prefix(vcpu);
2289         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
2290                               &px, 4);
2291         rc |= write_guest_abs(vcpu,
2292                               gpa + offsetof(struct save_area, fp_ctrl_reg),
2293                               &vcpu->arch.guest_fpregs.fpc, 4);
2294         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
2295                               &vcpu->arch.sie_block->todpr, 4);
2296         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
2297                               &vcpu->arch.sie_block->cputm, 8);
2298         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2299         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
2300                               &clkcomp, 8);
2301         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
2302                               &vcpu->run->s.regs.acrs, 64);
2303         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
2304                               &vcpu->arch.sie_block->gcr, 128);
2305         return rc ? -EFAULT : 0;
2306 }
2307
2308 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2309 {
2310         /*
2311          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2312          * copying in vcpu load/put. Lets update our copies before we save
2313          * it into the save area
2314          */
2315         save_fpu_regs();
2316         if (test_kvm_facility(vcpu->kvm, 129)) {
2317                 /*
2318                  * If the vector extension is available, the vector registers
2319                  * which overlaps with floating-point registers are saved in
2320                  * the SIE-control block.  Hence, extract the floating-point
2321                  * registers and the FPC value and store them in the
2322                  * guest_fpregs structure.
2323                  */
2324                 vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
2325                 convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
2326                                  current->thread.fpu.vxrs);
2327         } else
2328                 save_fpu_to(&vcpu->arch.guest_fpregs);
2329         save_access_regs(vcpu->run->s.regs.acrs);
2330
2331         return kvm_s390_store_status_unloaded(vcpu, addr);
2332 }
2333
2334 /*
2335  * store additional status at address
2336  */
2337 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2338                                         unsigned long gpa)
2339 {
2340         /* Only bits 0-53 are used for address formation */
2341         if (!(gpa & ~0x3ff))
2342                 return 0;
2343
2344         return write_guest_abs(vcpu, gpa & ~0x3ff,
2345                                (void *)&vcpu->run->s.regs.vrs, 512);
2346 }
2347
2348 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2349 {
2350         if (!test_kvm_facility(vcpu->kvm, 129))
2351                 return 0;
2352
2353         /*
2354          * The guest VXRS are in the host VXRs due to the lazy
2355          * copying in vcpu load/put. We can simply call save_fpu_regs()
2356          * to save the current register state because we are in the
2357          * middle of a load/put cycle.
2358          *
2359          * Let's update our copies before we save it into the save area.
2360          */
2361         save_fpu_regs();
2362
2363         return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2364 }
2365
2366 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2367 {
2368         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2369         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2370 }
2371
2372 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2373 {
2374         unsigned int i;
2375         struct kvm_vcpu *vcpu;
2376
2377         kvm_for_each_vcpu(i, vcpu, kvm) {
2378                 __disable_ibs_on_vcpu(vcpu);
2379         }
2380 }
2381
2382 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2383 {
2384         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2385         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2386 }
2387
2388 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2389 {
2390         int i, online_vcpus, started_vcpus = 0;
2391
2392         if (!is_vcpu_stopped(vcpu))
2393                 return;
2394
2395         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2396         /* Only one cpu at a time may enter/leave the STOPPED state. */
2397         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2398         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2399
2400         for (i = 0; i < online_vcpus; i++) {
2401                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2402                         started_vcpus++;
2403         }
2404
2405         if (started_vcpus == 0) {
2406                 /* we're the only active VCPU -> speed it up */
2407                 __enable_ibs_on_vcpu(vcpu);
2408         } else if (started_vcpus == 1) {
2409                 /*
2410                  * As we are starting a second VCPU, we have to disable
2411                  * the IBS facility on all VCPUs to remove potentially
2412                  * oustanding ENABLE requests.
2413                  */
2414                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2415         }
2416
2417         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2418         /*
2419          * Another VCPU might have used IBS while we were offline.
2420          * Let's play safe and flush the VCPU at startup.
2421          */
2422         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2423         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2424         return;
2425 }
2426
2427 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2428 {
2429         int i, online_vcpus, started_vcpus = 0;
2430         struct kvm_vcpu *started_vcpu = NULL;
2431
2432         if (is_vcpu_stopped(vcpu))
2433                 return;
2434
2435         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2436         /* Only one cpu at a time may enter/leave the STOPPED state. */
2437         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2438         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2439
2440         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2441         kvm_s390_clear_stop_irq(vcpu);
2442
2443         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2444         __disable_ibs_on_vcpu(vcpu);
2445
2446         for (i = 0; i < online_vcpus; i++) {
2447                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2448                         started_vcpus++;
2449                         started_vcpu = vcpu->kvm->vcpus[i];
2450                 }
2451         }
2452
2453         if (started_vcpus == 1) {
2454                 /*
2455                  * As we only have one VCPU left, we want to enable the
2456                  * IBS facility for that VCPU to speed it up.
2457                  */
2458                 __enable_ibs_on_vcpu(started_vcpu);
2459         }
2460
2461         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2462         return;
2463 }
2464
2465 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2466                                      struct kvm_enable_cap *cap)
2467 {
2468         int r;
2469
2470         if (cap->flags)
2471                 return -EINVAL;
2472
2473         switch (cap->cap) {
2474         case KVM_CAP_S390_CSS_SUPPORT:
2475                 if (!vcpu->kvm->arch.css_support) {
2476                         vcpu->kvm->arch.css_support = 1;
2477                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2478                         trace_kvm_s390_enable_css(vcpu->kvm);
2479                 }
2480                 r = 0;
2481                 break;
2482         default:
2483                 r = -EINVAL;
2484                 break;
2485         }
2486         return r;
2487 }
2488
2489 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2490                                   struct kvm_s390_mem_op *mop)
2491 {
2492         void __user *uaddr = (void __user *)mop->buf;
2493         void *tmpbuf = NULL;
2494         int r, srcu_idx;
2495         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2496                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
2497
2498         if (mop->flags & ~supported_flags)
2499                 return -EINVAL;
2500
2501         if (mop->size > MEM_OP_MAX_SIZE)
2502                 return -E2BIG;
2503
2504         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2505                 tmpbuf = vmalloc(mop->size);
2506                 if (!tmpbuf)
2507                         return -ENOMEM;
2508         }
2509
2510         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2511
2512         switch (mop->op) {
2513         case KVM_S390_MEMOP_LOGICAL_READ:
2514                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2515                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
2516                         break;
2517                 }
2518                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2519                 if (r == 0) {
2520                         if (copy_to_user(uaddr, tmpbuf, mop->size))
2521                                 r = -EFAULT;
2522                 }
2523                 break;
2524         case KVM_S390_MEMOP_LOGICAL_WRITE:
2525                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2526                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
2527                         break;
2528                 }
2529                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2530                         r = -EFAULT;
2531                         break;
2532                 }
2533                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2534                 break;
2535         default:
2536                 r = -EINVAL;
2537         }
2538
2539         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2540
2541         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2542                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2543
2544         vfree(tmpbuf);
2545         return r;
2546 }
2547
2548 long kvm_arch_vcpu_ioctl(struct file *filp,
2549                          unsigned int ioctl, unsigned long arg)
2550 {
2551         struct kvm_vcpu *vcpu = filp->private_data;
2552         void __user *argp = (void __user *)arg;
2553         int idx;
2554         long r;
2555
2556         switch (ioctl) {
2557         case KVM_S390_IRQ: {
2558                 struct kvm_s390_irq s390irq;
2559
2560                 r = -EFAULT;
2561                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2562                         break;
2563                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2564                 break;
2565         }
2566         case KVM_S390_INTERRUPT: {
2567                 struct kvm_s390_interrupt s390int;
2568                 struct kvm_s390_irq s390irq;
2569
2570                 r = -EFAULT;
2571                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2572                         break;
2573                 if (s390int_to_s390irq(&s390int, &s390irq))
2574                         return -EINVAL;
2575                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2576                 break;
2577         }
2578         case KVM_S390_STORE_STATUS:
2579                 idx = srcu_read_lock(&vcpu->kvm->srcu);
2580                 r = kvm_s390_vcpu_store_status(vcpu, arg);
2581                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2582                 break;
2583         case KVM_S390_SET_INITIAL_PSW: {
2584                 psw_t psw;
2585
2586                 r = -EFAULT;
2587                 if (copy_from_user(&psw, argp, sizeof(psw)))
2588                         break;
2589                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2590                 break;
2591         }
2592         case KVM_S390_INITIAL_RESET:
2593                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2594                 break;
2595         case KVM_SET_ONE_REG:
2596         case KVM_GET_ONE_REG: {
2597                 struct kvm_one_reg reg;
2598                 r = -EFAULT;
2599                 if (copy_from_user(&reg, argp, sizeof(reg)))
2600                         break;
2601                 if (ioctl == KVM_SET_ONE_REG)
2602                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
2603                 else
2604                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
2605                 break;
2606         }
2607 #ifdef CONFIG_KVM_S390_UCONTROL
2608         case KVM_S390_UCAS_MAP: {
2609                 struct kvm_s390_ucas_mapping ucasmap;
2610
2611                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2612                         r = -EFAULT;
2613                         break;
2614                 }
2615
2616                 if (!kvm_is_ucontrol(vcpu->kvm)) {
2617                         r = -EINVAL;
2618                         break;
2619                 }
2620
2621                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2622                                      ucasmap.vcpu_addr, ucasmap.length);
2623                 break;
2624         }
2625         case KVM_S390_UCAS_UNMAP: {
2626                 struct kvm_s390_ucas_mapping ucasmap;
2627
2628                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2629                         r = -EFAULT;
2630                         break;
2631                 }
2632
2633                 if (!kvm_is_ucontrol(vcpu->kvm)) {
2634                         r = -EINVAL;
2635                         break;
2636                 }
2637
2638                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2639                         ucasmap.length);
2640                 break;
2641         }
2642 #endif
2643         case KVM_S390_VCPU_FAULT: {
2644                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
2645                 break;
2646         }
2647         case KVM_ENABLE_CAP:
2648         {
2649                 struct kvm_enable_cap cap;
2650                 r = -EFAULT;
2651                 if (copy_from_user(&cap, argp, sizeof(cap)))
2652                         break;
2653                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2654                 break;
2655         }
2656         case KVM_S390_MEM_OP: {
2657                 struct kvm_s390_mem_op mem_op;
2658
2659                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2660                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2661                 else
2662                         r = -EFAULT;
2663                 break;
2664         }
2665         case KVM_S390_SET_IRQ_STATE: {
2666                 struct kvm_s390_irq_state irq_state;
2667
2668                 r = -EFAULT;
2669                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2670                         break;
2671                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2672                     irq_state.len == 0 ||
2673                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2674                         r = -EINVAL;
2675                         break;
2676                 }
2677                 r = kvm_s390_set_irq_state(vcpu,
2678                                            (void __user *) irq_state.buf,
2679                                            irq_state.len);
2680                 break;
2681         }
2682         case KVM_S390_GET_IRQ_STATE: {
2683                 struct kvm_s390_irq_state irq_state;
2684
2685                 r = -EFAULT;
2686                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2687                         break;
2688                 if (irq_state.len == 0) {
2689                         r = -EINVAL;
2690                         break;
2691                 }
2692                 r = kvm_s390_get_irq_state(vcpu,
2693                                            (__u8 __user *)  irq_state.buf,
2694                                            irq_state.len);
2695                 break;
2696         }
2697         default:
2698                 r = -ENOTTY;
2699         }
2700         return r;
2701 }
2702
2703 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2704 {
2705 #ifdef CONFIG_KVM_S390_UCONTROL
2706         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2707                  && (kvm_is_ucontrol(vcpu->kvm))) {
2708                 vmf->page = virt_to_page(vcpu->arch.sie_block);
2709                 get_page(vmf->page);
2710                 return 0;
2711         }
2712 #endif
2713         return VM_FAULT_SIGBUS;
2714 }
2715
2716 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2717                             unsigned long npages)
2718 {
2719         return 0;
2720 }
2721
2722 /* Section: memory related */
2723 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2724                                    struct kvm_memory_slot *memslot,
2725                                    const struct kvm_userspace_memory_region *mem,
2726                                    enum kvm_mr_change change)
2727 {
2728         /* A few sanity checks. We can have memory slots which have to be
2729            located/ended at a segment boundary (1MB). The memory in userland is
2730            ok to be fragmented into various different vmas. It is okay to mmap()
2731            and munmap() stuff in this slot after doing this call at any time */
2732
2733         if (mem->userspace_addr & 0xffffful)
2734                 return -EINVAL;
2735
2736         if (mem->memory_size & 0xffffful)
2737                 return -EINVAL;
2738
2739         return 0;
2740 }
2741
2742 void kvm_arch_commit_memory_region(struct kvm *kvm,
2743                                 const struct kvm_userspace_memory_region *mem,
2744                                 const struct kvm_memory_slot *old,
2745                                 const struct kvm_memory_slot *new,
2746                                 enum kvm_mr_change change)
2747 {
2748         int rc;
2749
2750         /* If the basics of the memslot do not change, we do not want
2751          * to update the gmap. Every update causes several unnecessary
2752          * segment translation exceptions. This is usually handled just
2753          * fine by the normal fault handler + gmap, but it will also
2754          * cause faults on the prefix page of running guest CPUs.
2755          */
2756         if (old->userspace_addr == mem->userspace_addr &&
2757             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2758             old->npages * PAGE_SIZE == mem->memory_size)
2759                 return;
2760
2761         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2762                 mem->guest_phys_addr, mem->memory_size);
2763         if (rc)
2764                 pr_warn("failed to commit memory region\n");
2765         return;
2766 }
2767
2768 static int __init kvm_s390_init(void)
2769 {
2770         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2771 }
2772
2773 static void __exit kvm_s390_exit(void)
2774 {
2775         kvm_exit();
2776 }
2777
2778 module_init(kvm_s390_init);
2779 module_exit(kvm_s390_exit);
2780
2781 /*
2782  * Enable autoloading of the kvm module.
2783  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2784  * since x86 takes a different approach.
2785  */
2786 #include <linux/miscdevice.h>
2787 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2788 MODULE_ALIAS("devname:kvm");