drivers/hv/hv.c

   1 /*
   2  * Copyright (c) 2009, Microsoft Corporation.
   3  *
   4  * This program is free software; you can redistribute it and/or modify it
   5  * under the terms and conditions of the GNU General Public License,
   6  * version 2, as published by the Free Software Foundation.
   7  *
   8  * This program is distributed in the hope it will be useful, but WITHOUT
   9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  11  * more details.
  12  *
  13  * You should have received a copy of the GNU General Public License along with
  14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
  15  * Place - Suite 330, Boston, MA 02111-1307 USA.
  16  *
  17  * Authors:
  18  *   Haiyang Zhang <haiyangz@microsoft.com>
  19  *   Hank Janssen  <hjanssen@microsoft.com>
  20  *
  21  */
  22 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  23
  24 #include <linux/kernel.h>
  25 #include <linux/mm.h>
  26 #include <linux/slab.h>
  27 #include <linux/vmalloc.h>
  28 #include <linux/hyperv.h>
  29 #include <linux/version.h>
  30 #include <linux/random.h>
  31 #include <linux/clockchips.h>
  32 #include <asm/mshyperv.h>
  33 #include "hyperv_vmbus.h"
  34
  35 /* The one and only */
  36 struct hv_context hv_context = {
  37         .synic_initialized      = false,
  38 };
  39
  40 /*
  41  * If false, we're using the old mechanism for stimer0 interrupts
  42  * where it sends a VMbus message when it expires. The old
  43  * mechanism is used when running on older versions of Hyper-V
  44  * that don't support Direct Mode. While Hyper-V provides
  45  * four stimer's per CPU, Linux uses only stimer0.
  46  */
  47 static bool direct_mode_enabled;
  48 static int stimer0_irq;
  49 static int stimer0_vector;
  50
  51 #define HV_TIMER_FREQUENCY (10 * 1000 * 1000) /* 100ns period */
  52 #define HV_MAX_MAX_DELTA_TICKS 0xffffffff
  53 #define HV_MIN_DELTA_TICKS 1
  54
  55 /*
  56  * hv_init - Main initialization routine.
  57  *
  58  * This routine must be called before any other routines in here are called
  59  */
  60 int hv_init(void)
  61 {
  62         hv_context.cpu_context = alloc_percpu(struct hv_per_cpu_context);
  63         if (!hv_context.cpu_context)
  64                 return -ENOMEM;
  65
  66         direct_mode_enabled = ms_hyperv.misc_features &
  67                         HV_X64_STIMER_DIRECT_MODE_AVAILABLE;
  68         return 0;
  69 }
  70
  71 /*
  72  * hv_post_message - Post a message using the hypervisor message IPC.
  73  *
  74  * This involves a hypercall.
  75  */
  76 int hv_post_message(union hv_connection_id connection_id,
  77                   enum hv_message_type message_type,
  78                   void *payload, size_t payload_size)
  79 {
  80         struct hv_input_post_message *aligned_msg;
  81         struct hv_per_cpu_context *hv_cpu;
  82         u64 status;
  83
  84         if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT)
  85                 return -EMSGSIZE;
  86
  87         hv_cpu = get_cpu_ptr(hv_context.cpu_context);
  88         aligned_msg = hv_cpu->post_msg_page;
  89         aligned_msg->connectionid = connection_id;
  90         aligned_msg->reserved = 0;
  91         aligned_msg->message_type = message_type;
  92         aligned_msg->payload_size = payload_size;
  93         memcpy((void *)aligned_msg->payload, payload, payload_size);
  94
  95         status = hv_do_hypercall(HVCALL_POST_MESSAGE, aligned_msg, NULL);
  96
  97         /* Preemption must remain disabled until after the hypercall
  98          * so some other thread can't get scheduled onto this cpu and
  99          * corrupt the per-cpu post_msg_page
 100          */
 101         put_cpu_ptr(hv_cpu);
 102
 103         return status & 0xFFFF;
 104 }
 105
 106 /*
 107  * ISR for when stimer0 is operating in Direct Mode.  Direct Mode
 108  * does not use VMbus or any VMbus messages, so process here and not
 109  * in the VMbus driver code.
 110  */
 111
 112 static void hv_stimer0_isr(void)
 113 {
 114         struct hv_per_cpu_context *hv_cpu;
 115
 116         hv_cpu = this_cpu_ptr(hv_context.cpu_context);
 117         hv_cpu->clk_evt->event_handler(hv_cpu->clk_evt);
 118         add_interrupt_randomness(stimer0_vector, 0);
 119 }
 120
 121 static int hv_ce_set_next_event(unsigned long delta,
 122                                 struct clock_event_device *evt)
 123 {
 124         u64 current_tick;
 125
 126         WARN_ON(!clockevent_state_oneshot(evt));
 127
 128         current_tick = hyperv_cs->read(NULL);
 129         current_tick += delta;
 130         hv_init_timer(HV_X64_MSR_STIMER0_COUNT, current_tick);
 131         return 0;
 132 }
 133
 134 static int hv_ce_shutdown(struct clock_event_device *evt)
 135 {
 136         hv_init_timer(HV_X64_MSR_STIMER0_COUNT, 0);
 137         hv_init_timer_config(HV_X64_MSR_STIMER0_CONFIG, 0);
 138         if (direct_mode_enabled)
 139                 hv_disable_stimer0_percpu_irq(stimer0_irq);
 140
 141         return 0;
 142 }
 143
 144 static int hv_ce_set_oneshot(struct clock_event_device *evt)
 145 {
 146         union hv_timer_config timer_cfg;
 147
 148         timer_cfg.as_uint64 = 0;
 149         timer_cfg.enable = 1;
 150         timer_cfg.auto_enable = 1;
 151         if (direct_mode_enabled) {
 152                 /*
 153                  * When it expires, the timer will directly interrupt
 154                  * on the specified hardware vector/IRQ.
 155                  */
 156                 timer_cfg.direct_mode = 1;
 157                 timer_cfg.apic_vector = stimer0_vector;
 158                 hv_enable_stimer0_percpu_irq(stimer0_irq);
 159         } else {
 160                 /*
 161                  * When it expires, the timer will generate a VMbus message,
 162                  * to be handled by the normal VMbus interrupt handler.
 163                  */
 164                 timer_cfg.direct_mode = 0;
 165                 timer_cfg.sintx = VMBUS_MESSAGE_SINT;
 166         }
 167         hv_init_timer_config(HV_X64_MSR_STIMER0_CONFIG, timer_cfg.as_uint64);
 168         return 0;
 169 }
 170
 171 static void hv_init_clockevent_device(struct clock_event_device *dev, int cpu)
 172 {
 173         dev->name = "Hyper-V clockevent";
 174         dev->features = CLOCK_EVT_FEAT_ONESHOT;
 175         dev->cpumask = cpumask_of(cpu);
 176         dev->rating = 1000;
 177         /*
 178          * Avoid settint dev->owner = THIS_MODULE deliberately as doing so will
 179          * result in clockevents_config_and_register() taking additional
 180          * references to the hv_vmbus module making it impossible to unload.
 181          */
 182
 183         dev->set_state_shutdown = hv_ce_shutdown;
 184         dev->set_state_oneshot = hv_ce_set_oneshot;
 185         dev->set_next_event = hv_ce_set_next_event;
 186 }
 187
 188
 189 int hv_synic_alloc(void)
 190 {
 191         int cpu;
 192
 193         hv_context.hv_numa_map = kcalloc(nr_node_ids, sizeof(struct cpumask),
 194                                          GFP_KERNEL);
 195         if (hv_context.hv_numa_map == NULL) {
 196                 pr_err("Unable to allocate NUMA map\n");
 197                 goto err;
 198         }
 199
 200         for_each_present_cpu(cpu) {
 201                 struct hv_per_cpu_context *hv_cpu
 202                         = per_cpu_ptr(hv_context.cpu_context, cpu);
 203
 204                 memset(hv_cpu, 0, sizeof(*hv_cpu));
 205                 tasklet_init(&hv_cpu->msg_dpc,
 206                              vmbus_on_msg_dpc, (unsigned long) hv_cpu);
 207
 208                 hv_cpu->clk_evt = kzalloc(sizeof(struct clock_event_device),
 209                                           GFP_KERNEL);
 210                 if (hv_cpu->clk_evt == NULL) {
 211                         pr_err("Unable to allocate clock event device\n");
 212                         goto err;
 213                 }
 214                 hv_init_clockevent_device(hv_cpu->clk_evt, cpu);
 215
 216                 hv_cpu->synic_message_page =
 217                         (void *)get_zeroed_page(GFP_ATOMIC);
 218                 if (hv_cpu->synic_message_page == NULL) {
 219                         pr_err("Unable to allocate SYNIC message page\n");
 220                         goto err;
 221                 }
 222
 223                 hv_cpu->synic_event_page = (void *)get_zeroed_page(GFP_ATOMIC);
 224                 if (hv_cpu->synic_event_page == NULL) {
 225                         pr_err("Unable to allocate SYNIC event page\n");
 226                         goto err;
 227                 }
 228
 229                 hv_cpu->post_msg_page = (void *)get_zeroed_page(GFP_ATOMIC);
 230                 if (hv_cpu->post_msg_page == NULL) {
 231                         pr_err("Unable to allocate post msg page\n");
 232                         goto err;
 233                 }
 234
 235                 INIT_LIST_HEAD(&hv_cpu->chan_list);
 236         }
 237
 238         if (direct_mode_enabled &&
 239             hv_setup_stimer0_irq(&stimer0_irq, &stimer0_vector,
 240                                 hv_stimer0_isr))
 241                 goto err;
 242
 243         return 0;
 244 err:
 245         /*
 246          * Any memory allocations that succeeded will be freed when
 247          * the caller cleans up by calling hv_synic_free()
 248          */
 249         return -ENOMEM;
 250 }
 251
 252
 253 void hv_synic_free(void)
 254 {
 255         int cpu;
 256
 257         for_each_present_cpu(cpu) {
 258                 struct hv_per_cpu_context *hv_cpu
 259                         = per_cpu_ptr(hv_context.cpu_context, cpu);
 260
 261                 kfree(hv_cpu->clk_evt);
 262                 free_page((unsigned long)hv_cpu->synic_event_page);
 263                 free_page((unsigned long)hv_cpu->synic_message_page);
 264                 free_page((unsigned long)hv_cpu->post_msg_page);
 265         }
 266
 267         kfree(hv_context.hv_numa_map);
 268 }
 269
 270 /*
 271  * hv_synic_init - Initialize the Synthetic Interrupt Controller.
 272  *
 273  * If it is already initialized by another entity (ie x2v shim), we need to
 274  * retrieve the initialized message and event pages.  Otherwise, we create and
 275  * initialize the message and event pages.
 276  */
 277 int hv_synic_init(unsigned int cpu)
 278 {
 279         struct hv_per_cpu_context *hv_cpu
 280                 = per_cpu_ptr(hv_context.cpu_context, cpu);
 281         union hv_synic_simp simp;
 282         union hv_synic_siefp siefp;
 283         union hv_synic_sint shared_sint;
 284         union hv_synic_scontrol sctrl;
 285
 286         /* Setup the Synic's message page */
 287         hv_get_simp(simp.as_uint64);
 288         simp.simp_enabled = 1;
 289         simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page)
 290                 >> PAGE_SHIFT;
 291
 292         hv_set_simp(simp.as_uint64);
 293
 294         /* Setup the Synic's event page */
 295         hv_get_siefp(siefp.as_uint64);
 296         siefp.siefp_enabled = 1;
 297         siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page)
 298                 >> PAGE_SHIFT;
 299
 300         hv_set_siefp(siefp.as_uint64);
 301
 302         /* Setup the shared SINT. */
 303         hv_get_synint_state(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT,
 304                             shared_sint.as_uint64);
 305
 306         shared_sint.vector = HYPERVISOR_CALLBACK_VECTOR;
 307         shared_sint.masked = false;
 308         if (ms_hyperv.hints & HV_X64_DEPRECATING_AEOI_RECOMMENDED)
 309                 shared_sint.auto_eoi = false;
 310         else
 311                 shared_sint.auto_eoi = true;
 312
 313         hv_set_synint_state(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT,
 314                             shared_sint.as_uint64);
 315
 316         /* Enable the global synic bit */
 317         hv_get_synic_state(sctrl.as_uint64);
 318         sctrl.enable = 1;
 319
 320         hv_set_synic_state(sctrl.as_uint64);
 321
 322         hv_context.synic_initialized = true;
 323
 324         /*
 325          * Register the per-cpu clockevent source.
 326          */
 327         if (ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE)
 328                 clockevents_config_and_register(hv_cpu->clk_evt,
 329                                                 HV_TIMER_FREQUENCY,
 330                                                 HV_MIN_DELTA_TICKS,
 331                                                 HV_MAX_MAX_DELTA_TICKS);
 332         return 0;
 333 }
 334
 335 /*
 336  * hv_synic_clockevents_cleanup - Cleanup clockevent devices
 337  */
 338 void hv_synic_clockevents_cleanup(void)
 339 {
 340         int cpu;
 341
 342         if (!(ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE))
 343                 return;
 344
 345         if (direct_mode_enabled)
 346                 hv_remove_stimer0_irq(stimer0_irq);
 347
 348         for_each_present_cpu(cpu) {
 349                 struct hv_per_cpu_context *hv_cpu
 350                         = per_cpu_ptr(hv_context.cpu_context, cpu);
 351
 352                 clockevents_unbind_device(hv_cpu->clk_evt, cpu);
 353         }
 354 }
 355
 356 /*
 357  * hv_synic_cleanup - Cleanup routine for hv_synic_init().
 358  */
 359 int hv_synic_cleanup(unsigned int cpu)
 360 {
 361         union hv_synic_sint shared_sint;
 362         union hv_synic_simp simp;
 363         union hv_synic_siefp siefp;
 364         union hv_synic_scontrol sctrl;
 365         struct vmbus_channel *channel, *sc;
 366         bool channel_found = false;
 367         unsigned long flags;
 368
 369         if (!hv_context.synic_initialized)
 370                 return -EFAULT;
 371
 372         /*
 373          * Search for channels which are bound to the CPU we're about to
 374          * cleanup. In case we find one and vmbus is still connected we need to
 375          * fail, this will effectively prevent CPU offlining. There is no way
 376          * we can re-bind channels to different CPUs for now.
 377          */
 378         mutex_lock(&vmbus_connection.channel_mutex);
 379         list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
 380                 if (channel->target_cpu == cpu) {
 381                         channel_found = true;
 382                         break;
 383                 }
 384                 spin_lock_irqsave(&channel->lock, flags);
 385                 list_for_each_entry(sc, &channel->sc_list, sc_list) {
 386                         if (sc->target_cpu == cpu) {
 387                                 channel_found = true;
 388                                 break;
 389                         }
 390                 }
 391                 spin_unlock_irqrestore(&channel->lock, flags);
 392                 if (channel_found)
 393                         break;
 394         }
 395         mutex_unlock(&vmbus_connection.channel_mutex);
 396
 397         if (channel_found && vmbus_connection.conn_state == CONNECTED)
 398                 return -EBUSY;
 399
 400         /* Turn off clockevent device */
 401         if (ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE) {
 402                 struct hv_per_cpu_context *hv_cpu
 403                         = this_cpu_ptr(hv_context.cpu_context);
 404
 405                 clockevents_unbind_device(hv_cpu->clk_evt, cpu);
 406                 hv_ce_shutdown(hv_cpu->clk_evt);
 407                 put_cpu_ptr(hv_cpu);
 408         }
 409
 410         hv_get_synint_state(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT,
 411                             shared_sint.as_uint64);
 412
 413         shared_sint.masked = 1;
 414
 415         /* Need to correctly cleanup in the case of SMP!!! */
 416         /* Disable the interrupt */
 417         hv_set_synint_state(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT,
 418                             shared_sint.as_uint64);
 419
 420         hv_get_simp(simp.as_uint64);
 421         simp.simp_enabled = 0;
 422         simp.base_simp_gpa = 0;
 423
 424         hv_set_simp(simp.as_uint64);
 425
 426         hv_get_siefp(siefp.as_uint64);
 427         siefp.siefp_enabled = 0;
 428         siefp.base_siefp_gpa = 0;
 429
 430         hv_set_siefp(siefp.as_uint64);
 431
 432         /* Disable the global synic bit */
 433         hv_get_synic_state(sctrl.as_uint64);
 434         sctrl.enable = 0;
 435         hv_set_synic_state(sctrl.as_uint64);
 436
 437         return 0;
 438 }