drivers/hv/channel_mgmt.c

   1 /*
   2  * Copyright (c) 2009, Microsoft Corporation.
   3  *
   4  * This program is free software; you can redistribute it and/or modify it
   5  * under the terms and conditions of the GNU General Public License,
   6  * version 2, as published by the Free Software Foundation.
   7  *
   8  * This program is distributed in the hope it will be useful, but WITHOUT
   9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  11  * more details.
  12  *
  13  * You should have received a copy of the GNU General Public License along with
  14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
  15  * Place - Suite 330, Boston, MA 02111-1307 USA.
  16  *
  17  * Authors:
  18  *   Haiyang Zhang <haiyangz@microsoft.com>
  19  *   Hank Janssen  <hjanssen@microsoft.com>
  20  */
  21 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  22
  23 #include <linux/kernel.h>
  24 #include <linux/interrupt.h>
  25 #include <linux/sched.h>
  26 #include <linux/wait.h>
  27 #include <linux/mm.h>
  28 #include <linux/slab.h>
  29 #include <linux/list.h>
  30 #include <linux/module.h>
  31 #include <linux/completion.h>
  32 #include <linux/delay.h>
  33 #include <linux/hyperv.h>
  34
  35 #include "hyperv_vmbus.h"
  36
  37 static void init_vp_index(struct vmbus_channel *channel, u16 dev_type);
  38
  39 static const struct vmbus_device vmbus_devs[] = {
  40         /* IDE */
  41         { .dev_type = HV_IDE,
  42           HV_IDE_GUID,
  43           .perf_device = true,
  44         },
  45
  46         /* SCSI */
  47         { .dev_type = HV_SCSI,
  48           HV_SCSI_GUID,
  49           .perf_device = true,
  50         },
  51
  52         /* Fibre Channel */
  53         { .dev_type = HV_FC,
  54           HV_SYNTHFC_GUID,
  55           .perf_device = true,
  56         },
  57
  58         /* Synthetic NIC */
  59         { .dev_type = HV_NIC,
  60           HV_NIC_GUID,
  61           .perf_device = true,
  62         },
  63
  64         /* Network Direct */
  65         { .dev_type = HV_ND,
  66           HV_ND_GUID,
  67           .perf_device = true,
  68         },
  69
  70         /* PCIE */
  71         { .dev_type = HV_PCIE,
  72           HV_PCIE_GUID,
  73           .perf_device = true,
  74         },
  75
  76         /* Synthetic Frame Buffer */
  77         { .dev_type = HV_FB,
  78           HV_SYNTHVID_GUID,
  79           .perf_device = false,
  80         },
  81
  82         /* Synthetic Keyboard */
  83         { .dev_type = HV_KBD,
  84           HV_KBD_GUID,
  85           .perf_device = false,
  86         },
  87
  88         /* Synthetic MOUSE */
  89         { .dev_type = HV_MOUSE,
  90           HV_MOUSE_GUID,
  91           .perf_device = false,
  92         },
  93
  94         /* KVP */
  95         { .dev_type = HV_KVP,
  96           HV_KVP_GUID,
  97           .perf_device = false,
  98         },
  99
 100         /* Time Synch */
 101         { .dev_type = HV_TS,
 102           HV_TS_GUID,
 103           .perf_device = false,
 104         },
 105
 106         /* Heartbeat */
 107         { .dev_type = HV_HB,
 108           HV_HEART_BEAT_GUID,
 109           .perf_device = false,
 110         },
 111
 112         /* Shutdown */
 113         { .dev_type = HV_SHUTDOWN,
 114           HV_SHUTDOWN_GUID,
 115           .perf_device = false,
 116         },
 117
 118         /* File copy */
 119         { .dev_type = HV_FCOPY,
 120           HV_FCOPY_GUID,
 121           .perf_device = false,
 122         },
 123
 124         /* Backup */
 125         { .dev_type = HV_BACKUP,
 126           HV_VSS_GUID,
 127           .perf_device = false,
 128         },
 129
 130         /* Dynamic Memory */
 131         { .dev_type = HV_DM,
 132           HV_DM_GUID,
 133           .perf_device = false,
 134         },
 135
 136         /* Unknown GUID */
 137         { .dev_type = HV_UNKOWN,
 138           .perf_device = false,
 139         },
 140 };
 141
 142 static const struct {
 143         uuid_le guid;
 144 } vmbus_unsupported_devs[] = {
 145         { HV_AVMA1_GUID },
 146         { HV_AVMA2_GUID },
 147         { HV_RDV_GUID   },
 148 };
 149
 150 /*
 151  * The rescinded channel may be blocked waiting for a response from the host;
 152  * take care of that.
 153  */
 154 static void vmbus_rescind_cleanup(struct vmbus_channel *channel)
 155 {
 156         struct vmbus_channel_msginfo *msginfo;
 157         unsigned long flags;
 158
 159
 160         spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 161
 162         list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
 163                                 msglistentry) {
 164
 165                 if (msginfo->waiting_channel == channel) {
 166                         complete(&msginfo->waitevent);
 167                         break;
 168                 }
 169         }
 170         spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
 171 }
 172
 173 static bool is_unsupported_vmbus_devs(const uuid_le *guid)
 174 {
 175         int i;
 176
 177         for (i = 0; i < ARRAY_SIZE(vmbus_unsupported_devs); i++)
 178                 if (!uuid_le_cmp(*guid, vmbus_unsupported_devs[i].guid))
 179                         return true;
 180         return false;
 181 }
 182
 183 static u16 hv_get_dev_type(const struct vmbus_channel *channel)
 184 {
 185         const uuid_le *guid = &channel->offermsg.offer.if_type;
 186         u16 i;
 187
 188         if (is_hvsock_channel(channel) || is_unsupported_vmbus_devs(guid))
 189                 return HV_UNKOWN;
 190
 191         for (i = HV_IDE; i < HV_UNKOWN; i++) {
 192                 if (!uuid_le_cmp(*guid, vmbus_devs[i].guid))
 193                         return i;
 194         }
 195         pr_info("Unknown GUID: %pUl\n", guid);
 196         return i;
 197 }
 198
 199 /**
 200  * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message
 201  * @icmsghdrp: Pointer to msg header structure
 202  * @icmsg_negotiate: Pointer to negotiate message structure
 203  * @buf: Raw buffer channel data
 204  *
 205  * @icmsghdrp is of type &struct icmsg_hdr.
 206  * @negop is of type &struct icmsg_negotiate.
 207  * Set up and fill in default negotiate response message.
 208  *
 209  * The fw_version specifies the  framework version that
 210  * we can support and srv_version specifies the service
 211  * version we can support.
 212  *
 213  * Mainly used by Hyper-V drivers.
 214  */
 215 bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp,
 216                                 struct icmsg_negotiate *negop, u8 *buf,
 217                                 int fw_version, int srv_version)
 218 {
 219         int icframe_major, icframe_minor;
 220         int icmsg_major, icmsg_minor;
 221         int fw_major, fw_minor;
 222         int srv_major, srv_minor;
 223         int i;
 224         bool found_match = false;
 225
 226         icmsghdrp->icmsgsize = 0x10;
 227         fw_major = (fw_version >> 16);
 228         fw_minor = (fw_version & 0xFFFF);
 229
 230         srv_major = (srv_version >> 16);
 231         srv_minor = (srv_version & 0xFFFF);
 232
 233         negop = (struct icmsg_negotiate *)&buf[
 234                 sizeof(struct vmbuspipe_hdr) +
 235                 sizeof(struct icmsg_hdr)];
 236
 237         icframe_major = negop->icframe_vercnt;
 238         icframe_minor = 0;
 239
 240         icmsg_major = negop->icmsg_vercnt;
 241         icmsg_minor = 0;
 242
 243         /*
 244          * Select the framework version number we will
 245          * support.
 246          */
 247
 248         for (i = 0; i < negop->icframe_vercnt; i++) {
 249                 if ((negop->icversion_data[i].major == fw_major) &&
 250                    (negop->icversion_data[i].minor == fw_minor)) {
 251                         icframe_major = negop->icversion_data[i].major;
 252                         icframe_minor = negop->icversion_data[i].minor;
 253                         found_match = true;
 254                 }
 255         }
 256
 257         if (!found_match)
 258                 goto fw_error;
 259
 260         found_match = false;
 261
 262         for (i = negop->icframe_vercnt;
 263                  (i < negop->icframe_vercnt + negop->icmsg_vercnt); i++) {
 264                 if ((negop->icversion_data[i].major == srv_major) &&
 265                    (negop->icversion_data[i].minor == srv_minor)) {
 266                         icmsg_major = negop->icversion_data[i].major;
 267                         icmsg_minor = negop->icversion_data[i].minor;
 268                         found_match = true;
 269                 }
 270         }
 271
 272         /*
 273          * Respond with the framework and service
 274          * version numbers we can support.
 275          */
 276
 277 fw_error:
 278         if (!found_match) {
 279                 negop->icframe_vercnt = 0;
 280                 negop->icmsg_vercnt = 0;
 281         } else {
 282                 negop->icframe_vercnt = 1;
 283                 negop->icmsg_vercnt = 1;
 284         }
 285
 286         negop->icversion_data[0].major = icframe_major;
 287         negop->icversion_data[0].minor = icframe_minor;
 288         negop->icversion_data[1].major = icmsg_major;
 289         negop->icversion_data[1].minor = icmsg_minor;
 290         return found_match;
 291 }
 292
 293 EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);
 294
 295 /*
 296  * alloc_channel - Allocate and initialize a vmbus channel object
 297  */
 298 static struct vmbus_channel *alloc_channel(void)
 299 {
 300         struct vmbus_channel *channel;
 301
 302         channel = kzalloc(sizeof(*channel), GFP_ATOMIC);
 303         if (!channel)
 304                 return NULL;
 305
 306         channel->acquire_ring_lock = true;
 307         spin_lock_init(&channel->inbound_lock);
 308         spin_lock_init(&channel->lock);
 309
 310         INIT_LIST_HEAD(&channel->sc_list);
 311         INIT_LIST_HEAD(&channel->percpu_list);
 312
 313         return channel;
 314 }
 315
 316 /*
 317  * free_channel - Release the resources used by the vmbus channel object
 318  */
 319 static void free_channel(struct vmbus_channel *channel)
 320 {
 321         kfree(channel);
 322 }
 323
 324 static void percpu_channel_enq(void *arg)
 325 {
 326         struct vmbus_channel *channel = arg;
 327         int cpu = smp_processor_id();
 328
 329         list_add_tail(&channel->percpu_list, &hv_context.percpu_list[cpu]);
 330 }
 331
 332 static void percpu_channel_deq(void *arg)
 333 {
 334         struct vmbus_channel *channel = arg;
 335
 336         list_del(&channel->percpu_list);
 337 }
 338
 339
 340 static void vmbus_release_relid(u32 relid)
 341 {
 342         struct vmbus_channel_relid_released msg;
 343
 344         memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
 345         msg.child_relid = relid;
 346         msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
 347         vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released),
 348                        true);
 349 }
 350
 351 void hv_event_tasklet_disable(struct vmbus_channel *channel)
 352 {
 353         struct tasklet_struct *tasklet;
 354         tasklet = hv_context.event_dpc[channel->target_cpu];
 355         tasklet_disable(tasklet);
 356 }
 357
 358 void hv_event_tasklet_enable(struct vmbus_channel *channel)
 359 {
 360         struct tasklet_struct *tasklet;
 361         tasklet = hv_context.event_dpc[channel->target_cpu];
 362         tasklet_enable(tasklet);
 363
 364         /* In case there is any pending event */
 365         tasklet_schedule(tasklet);
 366 }
 367
 368 void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
 369 {
 370         unsigned long flags;
 371         struct vmbus_channel *primary_channel;
 372
 373         BUG_ON(!channel->rescind);
 374         BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
 375
 376         hv_event_tasklet_disable(channel);
 377         if (channel->target_cpu != get_cpu()) {
 378                 put_cpu();
 379                 smp_call_function_single(channel->target_cpu,
 380                                          percpu_channel_deq, channel, true);
 381         } else {
 382                 percpu_channel_deq(channel);
 383                 put_cpu();
 384         }
 385         hv_event_tasklet_enable(channel);
 386
 387         if (channel->primary_channel == NULL) {
 388                 list_del(&channel->listentry);
 389
 390                 primary_channel = channel;
 391         } else {
 392                 primary_channel = channel->primary_channel;
 393                 spin_lock_irqsave(&primary_channel->lock, flags);
 394                 list_del(&channel->sc_list);
 395                 primary_channel->num_sc--;
 396                 spin_unlock_irqrestore(&primary_channel->lock, flags);
 397         }
 398
 399         /*
 400          * We need to free the bit for init_vp_index() to work in the case
 401          * of sub-channel, when we reload drivers like hv_netvsc.
 402          */
 403         if (channel->affinity_policy == HV_LOCALIZED)
 404                 cpumask_clear_cpu(channel->target_cpu,
 405                                   &primary_channel->alloced_cpus_in_node);
 406
 407         vmbus_release_relid(relid);
 408
 409         free_channel(channel);
 410 }
 411
 412 void vmbus_free_channels(void)
 413 {
 414         struct vmbus_channel *channel, *tmp;
 415
 416         mutex_lock(&vmbus_connection.channel_mutex);
 417         list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list,
 418                 listentry) {
 419                 /* hv_process_channel_removal() needs this */
 420                 channel->rescind = true;
 421
 422                 vmbus_device_unregister(channel->device_obj);
 423         }
 424         mutex_unlock(&vmbus_connection.channel_mutex);
 425 }
 426
 427 /*
 428  * vmbus_process_offer - Process the offer by creating a channel/device
 429  * associated with this offer
 430  */
 431 static void vmbus_process_offer(struct vmbus_channel *newchannel)
 432 {
 433         struct vmbus_channel *channel;
 434         bool fnew = true;
 435         unsigned long flags;
 436         u16 dev_type;
 437         int ret;
 438
 439         /* Make sure this is a new offer */
 440         mutex_lock(&vmbus_connection.channel_mutex);
 441
 442         list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
 443                 if (!uuid_le_cmp(channel->offermsg.offer.if_type,
 444                         newchannel->offermsg.offer.if_type) &&
 445                         !uuid_le_cmp(channel->offermsg.offer.if_instance,
 446                                 newchannel->offermsg.offer.if_instance)) {
 447                         fnew = false;
 448                         break;
 449                 }
 450         }
 451
 452         if (fnew)
 453                 list_add_tail(&newchannel->listentry,
 454                               &vmbus_connection.chn_list);
 455
 456         mutex_unlock(&vmbus_connection.channel_mutex);
 457
 458         if (!fnew) {
 459                 /*
 460                  * Check to see if this is a sub-channel.
 461                  */
 462                 if (newchannel->offermsg.offer.sub_channel_index != 0) {
 463                         /*
 464                          * Process the sub-channel.
 465                          */
 466                         newchannel->primary_channel = channel;
 467                         spin_lock_irqsave(&channel->lock, flags);
 468                         list_add_tail(&newchannel->sc_list, &channel->sc_list);
 469                         channel->num_sc++;
 470                         spin_unlock_irqrestore(&channel->lock, flags);
 471                 } else
 472                         goto err_free_chan;
 473         }
 474
 475         dev_type = hv_get_dev_type(newchannel);
 476
 477         init_vp_index(newchannel, dev_type);
 478
 479         hv_event_tasklet_disable(newchannel);
 480         if (newchannel->target_cpu != get_cpu()) {
 481                 put_cpu();
 482                 smp_call_function_single(newchannel->target_cpu,
 483                                          percpu_channel_enq,
 484                                          newchannel, true);
 485         } else {
 486                 percpu_channel_enq(newchannel);
 487                 put_cpu();
 488         }
 489         hv_event_tasklet_enable(newchannel);
 490
 491         /*
 492          * This state is used to indicate a successful open
 493          * so that when we do close the channel normally, we
 494          * can cleanup properly
 495          */
 496         newchannel->state = CHANNEL_OPEN_STATE;
 497
 498         if (!fnew) {
 499                 if (channel->sc_creation_callback != NULL)
 500                         channel->sc_creation_callback(newchannel);
 501                 return;
 502         }
 503
 504         /*
 505          * Start the process of binding this offer to the driver
 506          * We need to set the DeviceObject field before calling
 507          * vmbus_child_dev_add()
 508          */
 509         newchannel->device_obj = vmbus_device_create(
 510                 &newchannel->offermsg.offer.if_type,
 511                 &newchannel->offermsg.offer.if_instance,
 512                 newchannel);
 513         if (!newchannel->device_obj)
 514                 goto err_deq_chan;
 515
 516         newchannel->device_obj->device_id = dev_type;
 517         /*
 518          * Add the new device to the bus. This will kick off device-driver
 519          * binding which eventually invokes the device driver's AddDevice()
 520          * method.
 521          */
 522         mutex_lock(&vmbus_connection.channel_mutex);
 523         ret = vmbus_device_register(newchannel->device_obj);
 524         mutex_unlock(&vmbus_connection.channel_mutex);
 525
 526         if (ret != 0) {
 527                 pr_err("unable to add child device object (relid %d)\n",
 528                         newchannel->offermsg.child_relid);
 529                 kfree(newchannel->device_obj);
 530                 goto err_deq_chan;
 531         }
 532         return;
 533
 534 err_deq_chan:
 535         mutex_lock(&vmbus_connection.channel_mutex);
 536         list_del(&newchannel->listentry);
 537         mutex_unlock(&vmbus_connection.channel_mutex);
 538
 539         hv_event_tasklet_disable(newchannel);
 540         if (newchannel->target_cpu != get_cpu()) {
 541                 put_cpu();
 542                 smp_call_function_single(newchannel->target_cpu,
 543                                          percpu_channel_deq, newchannel, true);
 544         } else {
 545                 percpu_channel_deq(newchannel);
 546                 put_cpu();
 547         }
 548         hv_event_tasklet_enable(newchannel);
 549
 550         vmbus_release_relid(newchannel->offermsg.child_relid);
 551
 552 err_free_chan:
 553         free_channel(newchannel);
 554 }
 555
 556 /*
 557  * We use this state to statically distribute the channel interrupt load.
 558  */
 559 static int next_numa_node_id;
 560
 561 /*
 562  * Starting with Win8, we can statically distribute the incoming
 563  * channel interrupt load by binding a channel to VCPU.
 564  * We do this in a hierarchical fashion:
 565  * First distribute the primary channels across available NUMA nodes
 566  * and then distribute the subchannels amongst the CPUs in the NUMA
 567  * node assigned to the primary channel.
 568  *
 569  * For pre-win8 hosts or non-performance critical channels we assign the
 570  * first CPU in the first NUMA node.
 571  */
 572 static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
 573 {
 574         u32 cur_cpu;
 575         bool perf_chn = vmbus_devs[dev_type].perf_device;
 576         struct vmbus_channel *primary = channel->primary_channel;
 577         int next_node;
 578         struct cpumask available_mask;
 579         struct cpumask *alloced_mask;
 580
 581         if ((vmbus_proto_version == VERSION_WS2008) ||
 582             (vmbus_proto_version == VERSION_WIN7) || (!perf_chn)) {
 583                 /*
 584                  * Prior to win8, all channel interrupts are
 585                  * delivered on cpu 0.
 586                  * Also if the channel is not a performance critical
 587                  * channel, bind it to cpu 0.
 588                  */
 589                 channel->numa_node = 0;
 590                 channel->target_cpu = 0;
 591                 channel->target_vp = hv_context.vp_index[0];
 592                 return;
 593         }
 594
 595         /*
 596          * Based on the channel affinity policy, we will assign the NUMA
 597          * nodes.
 598          */
 599
 600         if ((channel->affinity_policy == HV_BALANCED) || (!primary)) {
 601                 while (true) {
 602                         next_node = next_numa_node_id++;
 603                         if (next_node == nr_node_ids) {
 604                                 next_node = next_numa_node_id = 0;
 605                                 continue;
 606                         }
 607                         if (cpumask_empty(cpumask_of_node(next_node)))
 608                                 continue;
 609                         break;
 610                 }
 611                 channel->numa_node = next_node;
 612                 primary = channel;
 613         }
 614         alloced_mask = &hv_context.hv_numa_map[primary->numa_node];
 615
 616         if (cpumask_weight(alloced_mask) ==
 617             cpumask_weight(cpumask_of_node(primary->numa_node))) {
 618                 /*
 619                  * We have cycled through all the CPUs in the node;
 620                  * reset the alloced map.
 621                  */
 622                 cpumask_clear(alloced_mask);
 623         }
 624
 625         cpumask_xor(&available_mask, alloced_mask,
 626                     cpumask_of_node(primary->numa_node));
 627
 628         cur_cpu = -1;
 629
 630         if (primary->affinity_policy == HV_LOCALIZED) {
 631                 /*
 632                  * Normally Hyper-V host doesn't create more subchannels
 633                  * than there are VCPUs on the node but it is possible when not
 634                  * all present VCPUs on the node are initialized by guest.
 635                  * Clear the alloced_cpus_in_node to start over.
 636                  */
 637                 if (cpumask_equal(&primary->alloced_cpus_in_node,
 638                                   cpumask_of_node(primary->numa_node)))
 639                         cpumask_clear(&primary->alloced_cpus_in_node);
 640         }
 641
 642         while (true) {
 643                 cur_cpu = cpumask_next(cur_cpu, &available_mask);
 644                 if (cur_cpu >= nr_cpu_ids) {
 645                         cur_cpu = -1;
 646                         cpumask_copy(&available_mask,
 647                                      cpumask_of_node(primary->numa_node));
 648                         continue;
 649                 }
 650
 651                 if (primary->affinity_policy == HV_LOCALIZED) {
 652                         /*
 653                          * NOTE: in the case of sub-channel, we clear the
 654                          * sub-channel related bit(s) in
 655                          * primary->alloced_cpus_in_node in
 656                          * hv_process_channel_removal(), so when we
 657                          * reload drivers like hv_netvsc in SMP guest, here
 658                          * we're able to re-allocate
 659                          * bit from primary->alloced_cpus_in_node.
 660                          */
 661                         if (!cpumask_test_cpu(cur_cpu,
 662                                               &primary->alloced_cpus_in_node)) {
 663                                 cpumask_set_cpu(cur_cpu,
 664                                                 &primary->alloced_cpus_in_node);
 665                                 cpumask_set_cpu(cur_cpu, alloced_mask);
 666                                 break;
 667                         }
 668                 } else {
 669                         cpumask_set_cpu(cur_cpu, alloced_mask);
 670                         break;
 671                 }
 672         }
 673
 674         channel->target_cpu = cur_cpu;
 675         channel->target_vp = hv_context.vp_index[cur_cpu];
 676 }
 677
 678 static void vmbus_wait_for_unload(void)
 679 {
 680         int cpu;
 681         void *page_addr;
 682         struct hv_message *msg;
 683         struct vmbus_channel_message_header *hdr;
 684         u32 message_type;
 685
 686         /*
 687          * CHANNELMSG_UNLOAD_RESPONSE is always delivered to the CPU which was
 688          * used for initial contact or to CPU0 depending on host version. When
 689          * we're crashing on a different CPU let's hope that IRQ handler on
 690          * the cpu which receives CHANNELMSG_UNLOAD_RESPONSE is still
 691          * functional and vmbus_unload_response() will complete
 692          * vmbus_connection.unload_event. If not, the last thing we can do is
 693          * read message pages for all CPUs directly.
 694          */
 695         while (1) {
 696                 if (completion_done(&vmbus_connection.unload_event))
 697                         break;
 698
 699                 for_each_online_cpu(cpu) {
 700                         page_addr = hv_context.synic_message_page[cpu];
 701                         msg = (struct hv_message *)page_addr +
 702                                 VMBUS_MESSAGE_SINT;
 703
 704                         message_type = READ_ONCE(msg->header.message_type);
 705                         if (message_type == HVMSG_NONE)
 706                                 continue;
 707
 708                         hdr = (struct vmbus_channel_message_header *)
 709                                 msg->u.payload;
 710
 711                         if (hdr->msgtype == CHANNELMSG_UNLOAD_RESPONSE)
 712                                 complete(&vmbus_connection.unload_event);
 713
 714                         vmbus_signal_eom(msg, message_type);
 715                 }
 716
 717                 mdelay(10);
 718         }
 719
 720         /*
 721          * We're crashing and already got the UNLOAD_RESPONSE, cleanup all
 722          * maybe-pending messages on all CPUs to be able to receive new
 723          * messages after we reconnect.
 724          */
 725         for_each_online_cpu(cpu) {
 726                 page_addr = hv_context.synic_message_page[cpu];
 727                 msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
 728                 msg->header.message_type = HVMSG_NONE;
 729         }
 730 }
 731
 732 /*
 733  * vmbus_unload_response - Handler for the unload response.
 734  */
 735 static void vmbus_unload_response(struct vmbus_channel_message_header *hdr)
 736 {
 737         /*
 738          * This is a global event; just wakeup the waiting thread.
 739          * Once we successfully unload, we can cleanup the monitor state.
 740          */
 741         complete(&vmbus_connection.unload_event);
 742 }
 743
 744 void vmbus_initiate_unload(bool crash)
 745 {
 746         struct vmbus_channel_message_header hdr;
 747
 748         /* Pre-Win2012R2 hosts don't support reconnect */
 749         if (vmbus_proto_version < VERSION_WIN8_1)
 750                 return;
 751
 752         init_completion(&vmbus_connection.unload_event);
 753         memset(&hdr, 0, sizeof(struct vmbus_channel_message_header));
 754         hdr.msgtype = CHANNELMSG_UNLOAD;
 755         vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header),
 756                        !crash);
 757
 758         /*
 759          * vmbus_initiate_unload() is also called on crash and the crash can be
 760          * happening in an interrupt context, where scheduling is impossible.
 761          */
 762         if (!crash)
 763                 wait_for_completion(&vmbus_connection.unload_event);
 764         else
 765                 vmbus_wait_for_unload();
 766 }
 767
 768 /*
 769  * vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
 770  *
 771  */
 772 static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
 773 {
 774         struct vmbus_channel_offer_channel *offer;
 775         struct vmbus_channel *newchannel;
 776
 777         offer = (struct vmbus_channel_offer_channel *)hdr;
 778
 779         /* Allocate the channel object and save this offer. */
 780         newchannel = alloc_channel();
 781         if (!newchannel) {
 782                 vmbus_release_relid(offer->child_relid);
 783                 pr_err("Unable to allocate channel object\n");
 784                 return;
 785         }
 786
 787         /*
 788          * By default we setup state to enable batched
 789          * reading. A specific service can choose to
 790          * disable this prior to opening the channel.
 791          */
 792         newchannel->batched_reading = true;
 793
 794         /*
 795          * Setup state for signalling the host.
 796          */
 797         newchannel->sig_event = (struct hv_input_signal_event *)
 798                                 (ALIGN((unsigned long)
 799                                 &newchannel->sig_buf,
 800                                 HV_HYPERCALL_PARAM_ALIGN));
 801
 802         newchannel->sig_event->connectionid.asu32 = 0;
 803         newchannel->sig_event->connectionid.u.id = VMBUS_EVENT_CONNECTION_ID;
 804         newchannel->sig_event->flag_number = 0;
 805         newchannel->sig_event->rsvdz = 0;
 806
 807         if (vmbus_proto_version != VERSION_WS2008) {
 808                 newchannel->is_dedicated_interrupt =
 809                                 (offer->is_dedicated_interrupt != 0);
 810                 newchannel->sig_event->connectionid.u.id =
 811                                 offer->connection_id;
 812         }
 813
 814         memcpy(&newchannel->offermsg, offer,
 815                sizeof(struct vmbus_channel_offer_channel));
 816         newchannel->monitor_grp = (u8)offer->monitorid / 32;
 817         newchannel->monitor_bit = (u8)offer->monitorid % 32;
 818
 819         vmbus_process_offer(newchannel);
 820 }
 821
 822 /*
 823  * vmbus_onoffer_rescind - Rescind offer handler.
 824  *
 825  * We queue a work item to process this offer synchronously
 826  */
 827 static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
 828 {
 829         struct vmbus_channel_rescind_offer *rescind;
 830         struct vmbus_channel *channel;
 831         unsigned long flags;
 832         struct device *dev;
 833
 834         rescind = (struct vmbus_channel_rescind_offer *)hdr;
 835
 836         mutex_lock(&vmbus_connection.channel_mutex);
 837         channel = relid2channel(rescind->child_relid);
 838
 839         if (channel == NULL) {
 840                 /*
 841                  * This is very impossible, because in
 842                  * vmbus_process_offer(), we have already invoked
 843                  * vmbus_release_relid() on error.
 844                  */
 845                 goto out;
 846         }
 847
 848         spin_lock_irqsave(&channel->lock, flags);
 849         channel->rescind = true;
 850         spin_unlock_irqrestore(&channel->lock, flags);
 851
 852         vmbus_rescind_cleanup(channel);
 853
 854         if (channel->device_obj) {
 855                 if (channel->chn_rescind_callback) {
 856                         channel->chn_rescind_callback(channel);
 857                         goto out;
 858                 }
 859                 /*
 860                  * We will have to unregister this device from the
 861                  * driver core.
 862                  */
 863                 dev = get_device(&channel->device_obj->device);
 864                 if (dev) {
 865                         vmbus_device_unregister(channel->device_obj);
 866                         put_device(dev);
 867                 }
 868         } else {
 869                 hv_process_channel_removal(channel,
 870                         channel->offermsg.child_relid);
 871         }
 872
 873 out:
 874         mutex_unlock(&vmbus_connection.channel_mutex);
 875 }
 876
 877 void vmbus_hvsock_device_unregister(struct vmbus_channel *channel)
 878 {
 879         mutex_lock(&vmbus_connection.channel_mutex);
 880
 881         BUG_ON(!is_hvsock_channel(channel));
 882
 883         channel->rescind = true;
 884         vmbus_device_unregister(channel->device_obj);
 885
 886         mutex_unlock(&vmbus_connection.channel_mutex);
 887 }
 888 EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister);
 889
 890
 891 /*
 892  * vmbus_onoffers_delivered -
 893  * This is invoked when all offers have been delivered.
 894  *
 895  * Nothing to do here.
 896  */
 897 static void vmbus_onoffers_delivered(
 898                         struct vmbus_channel_message_header *hdr)
 899 {
 900 }
 901
 902 /*
 903  * vmbus_onopen_result - Open result handler.
 904  *
 905  * This is invoked when we received a response to our channel open request.
 906  * Find the matching request, copy the response and signal the requesting
 907  * thread.
 908  */
 909 static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr)
 910 {
 911         struct vmbus_channel_open_result *result;
 912         struct vmbus_channel_msginfo *msginfo;
 913         struct vmbus_channel_message_header *requestheader;
 914         struct vmbus_channel_open_channel *openmsg;
 915         unsigned long flags;
 916
 917         result = (struct vmbus_channel_open_result *)hdr;
 918
 919         /*
 920          * Find the open msg, copy the result and signal/unblock the wait event
 921          */
 922         spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 923
 924         list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
 925                                 msglistentry) {
 926                 requestheader =
 927                         (struct vmbus_channel_message_header *)msginfo->msg;
 928
 929                 if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) {
 930                         openmsg =
 931                         (struct vmbus_channel_open_channel *)msginfo->msg;
 932                         if (openmsg->child_relid == result->child_relid &&
 933                             openmsg->openid == result->openid) {
 934                                 memcpy(&msginfo->response.open_result,
 935                                        result,
 936                                        sizeof(
 937                                         struct vmbus_channel_open_result));
 938                                 complete(&msginfo->waitevent);
 939                                 break;
 940                         }
 941                 }
 942         }
 943         spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
 944 }
 945
 946 /*
 947  * vmbus_ongpadl_created - GPADL created handler.
 948  *
 949  * This is invoked when we received a response to our gpadl create request.
 950  * Find the matching request, copy the response and signal the requesting
 951  * thread.
 952  */
 953 static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr)
 954 {
 955         struct vmbus_channel_gpadl_created *gpadlcreated;
 956         struct vmbus_channel_msginfo *msginfo;
 957         struct vmbus_channel_message_header *requestheader;
 958         struct vmbus_channel_gpadl_header *gpadlheader;
 959         unsigned long flags;
 960
 961         gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr;
 962
 963         /*
 964          * Find the establish msg, copy the result and signal/unblock the wait
 965          * event
 966          */
 967         spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 968
 969         list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
 970                                 msglistentry) {
 971                 requestheader =
 972                         (struct vmbus_channel_message_header *)msginfo->msg;
 973
 974                 if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) {
 975                         gpadlheader =
 976                         (struct vmbus_channel_gpadl_header *)requestheader;
 977
 978                         if ((gpadlcreated->child_relid ==
 979                              gpadlheader->child_relid) &&
 980                             (gpadlcreated->gpadl == gpadlheader->gpadl)) {
 981                                 memcpy(&msginfo->response.gpadl_created,
 982                                        gpadlcreated,
 983                                        sizeof(
 984                                         struct vmbus_channel_gpadl_created));
 985                                 complete(&msginfo->waitevent);
 986                                 break;
 987                         }
 988                 }
 989         }
 990         spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
 991 }
 992
 993 /*
 994  * vmbus_ongpadl_torndown - GPADL torndown handler.
 995  *
 996  * This is invoked when we received a response to our gpadl teardown request.
 997  * Find the matching request, copy the response and signal the requesting
 998  * thread.
 999  */
1000 static void vmbus_ongpadl_torndown(
1001                         struct vmbus_channel_message_header *hdr)
1002 {
1003         struct vmbus_channel_gpadl_torndown *gpadl_torndown;
1004         struct vmbus_channel_msginfo *msginfo;
1005         struct vmbus_channel_message_header *requestheader;
1006         struct vmbus_channel_gpadl_teardown *gpadl_teardown;
1007         unsigned long flags;
1008
1009         gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr;
1010
1011         /*
1012          * Find the open msg, copy the result and signal/unblock the wait event
1013          */
1014         spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1015
1016         list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
1017                                 msglistentry) {
1018                 requestheader =
1019                         (struct vmbus_channel_message_header *)msginfo->msg;
1020
1021                 if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) {
1022                         gpadl_teardown =
1023                         (struct vmbus_channel_gpadl_teardown *)requestheader;
1024
1025                         if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) {
1026                                 memcpy(&msginfo->response.gpadl_torndown,
1027                                        gpadl_torndown,
1028                                        sizeof(
1029                                         struct vmbus_channel_gpadl_torndown));
1030                                 complete(&msginfo->waitevent);
1031                                 break;
1032                         }
1033                 }
1034         }
1035         spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1036 }
1037
1038 /*
1039  * vmbus_onversion_response - Version response handler
1040  *
1041  * This is invoked when we received a response to our initiate contact request.
1042  * Find the matching request, copy the response and signal the requesting
1043  * thread.
1044  */
1045 static void vmbus_onversion_response(
1046                 struct vmbus_channel_message_header *hdr)
1047 {
1048         struct vmbus_channel_msginfo *msginfo;
1049         struct vmbus_channel_message_header *requestheader;
1050         struct vmbus_channel_version_response *version_response;
1051         unsigned long flags;
1052
1053         version_response = (struct vmbus_channel_version_response *)hdr;
1054         spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1055
1056         list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
1057                                 msglistentry) {
1058                 requestheader =
1059                         (struct vmbus_channel_message_header *)msginfo->msg;
1060
1061                 if (requestheader->msgtype ==
1062                     CHANNELMSG_INITIATE_CONTACT) {
1063                         memcpy(&msginfo->response.version_response,
1064                               version_response,
1065                               sizeof(struct vmbus_channel_version_response));
1066                         complete(&msginfo->waitevent);
1067                 }
1068         }
1069         spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1070 }
1071
1072 /* Channel message dispatch table */
1073 struct vmbus_channel_message_table_entry
1074         channel_message_table[CHANNELMSG_COUNT] = {
1075         {CHANNELMSG_INVALID,                    0, NULL},
1076         {CHANNELMSG_OFFERCHANNEL,               0, vmbus_onoffer},
1077         {CHANNELMSG_RESCIND_CHANNELOFFER,       0, vmbus_onoffer_rescind},
1078         {CHANNELMSG_REQUESTOFFERS,              0, NULL},
1079         {CHANNELMSG_ALLOFFERS_DELIVERED,        1, vmbus_onoffers_delivered},
1080         {CHANNELMSG_OPENCHANNEL,                0, NULL},
1081         {CHANNELMSG_OPENCHANNEL_RESULT,         1, vmbus_onopen_result},
1082         {CHANNELMSG_CLOSECHANNEL,               0, NULL},
1083         {CHANNELMSG_GPADL_HEADER,               0, NULL},
1084         {CHANNELMSG_GPADL_BODY,                 0, NULL},
1085         {CHANNELMSG_GPADL_CREATED,              1, vmbus_ongpadl_created},
1086         {CHANNELMSG_GPADL_TEARDOWN,             0, NULL},
1087         {CHANNELMSG_GPADL_TORNDOWN,             1, vmbus_ongpadl_torndown},
1088         {CHANNELMSG_RELID_RELEASED,             0, NULL},
1089         {CHANNELMSG_INITIATE_CONTACT,           0, NULL},
1090         {CHANNELMSG_VERSION_RESPONSE,           1, vmbus_onversion_response},
1091         {CHANNELMSG_UNLOAD,                     0, NULL},
1092         {CHANNELMSG_UNLOAD_RESPONSE,            1, vmbus_unload_response},
1093         {CHANNELMSG_18,                         0, NULL},
1094         {CHANNELMSG_19,                         0, NULL},
1095         {CHANNELMSG_20,                         0, NULL},
1096         {CHANNELMSG_TL_CONNECT_REQUEST,         0, NULL},
1097 };
1098
1099 /*
1100  * vmbus_onmessage - Handler for channel protocol messages.
1101  *
1102  * This is invoked in the vmbus worker thread context.
1103  */
1104 void vmbus_onmessage(void *context)
1105 {
1106         struct hv_message *msg = context;
1107         struct vmbus_channel_message_header *hdr;
1108         int size;
1109
1110         hdr = (struct vmbus_channel_message_header *)msg->u.payload;
1111         size = msg->header.payload_size;
1112
1113         if (hdr->msgtype >= CHANNELMSG_COUNT) {
1114                 pr_err("Received invalid channel message type %d size %d\n",
1115                            hdr->msgtype, size);
1116                 print_hex_dump_bytes("", DUMP_PREFIX_NONE,
1117                                      (unsigned char *)msg->u.payload, size);
1118                 return;
1119         }
1120
1121         if (channel_message_table[hdr->msgtype].message_handler)
1122                 channel_message_table[hdr->msgtype].message_handler(hdr);
1123         else
1124                 pr_err("Unhandled channel message type %d\n", hdr->msgtype);
1125 }
1126
1127 /*
1128  * vmbus_request_offers - Send a request to get all our pending offers.
1129  */
1130 int vmbus_request_offers(void)
1131 {
1132         struct vmbus_channel_message_header *msg;
1133         struct vmbus_channel_msginfo *msginfo;
1134         int ret;
1135
1136         msginfo = kmalloc(sizeof(*msginfo) +
1137                           sizeof(struct vmbus_channel_message_header),
1138                           GFP_KERNEL);
1139         if (!msginfo)
1140                 return -ENOMEM;
1141
1142         msg = (struct vmbus_channel_message_header *)msginfo->msg;
1143
1144         msg->msgtype = CHANNELMSG_REQUESTOFFERS;
1145
1146
1147         ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_message_header),
1148                              true);
1149         if (ret != 0) {
1150                 pr_err("Unable to request offers - %d\n", ret);
1151
1152                 goto cleanup;
1153         }
1154
1155 cleanup:
1156         kfree(msginfo);
1157
1158         return ret;
1159 }
1160
1161 /*
1162  * Retrieve the (sub) channel on which to send an outgoing request.
1163  * When a primary channel has multiple sub-channels, we try to
1164  * distribute the load equally amongst all available channels.
1165  */
1166 struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary)
1167 {
1168         struct list_head *cur, *tmp;
1169         int cur_cpu;
1170         struct vmbus_channel *cur_channel;
1171         struct vmbus_channel *outgoing_channel = primary;
1172         int next_channel;
1173         int i = 1;
1174
1175         if (list_empty(&primary->sc_list))
1176                 return outgoing_channel;
1177
1178         next_channel = primary->next_oc++;
1179
1180         if (next_channel > (primary->num_sc)) {
1181                 primary->next_oc = 0;
1182                 return outgoing_channel;
1183         }
1184
1185         cur_cpu = hv_context.vp_index[get_cpu()];
1186         put_cpu();
1187         list_for_each_safe(cur, tmp, &primary->sc_list) {
1188                 cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
1189                 if (cur_channel->state != CHANNEL_OPENED_STATE)
1190                         continue;
1191
1192                 if (cur_channel->target_vp == cur_cpu)
1193                         return cur_channel;
1194
1195                 if (i == next_channel)
1196                         return cur_channel;
1197
1198                 i++;
1199         }
1200
1201         return outgoing_channel;
1202 }
1203 EXPORT_SYMBOL_GPL(vmbus_get_outgoing_channel);
1204
1205 static void invoke_sc_cb(struct vmbus_channel *primary_channel)
1206 {
1207         struct list_head *cur, *tmp;
1208         struct vmbus_channel *cur_channel;
1209
1210         if (primary_channel->sc_creation_callback == NULL)
1211                 return;
1212
1213         list_for_each_safe(cur, tmp, &primary_channel->sc_list) {
1214                 cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
1215
1216                 primary_channel->sc_creation_callback(cur_channel);
1217         }
1218 }
1219
1220 void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
1221                                 void (*sc_cr_cb)(struct vmbus_channel *new_sc))
1222 {
1223         primary_channel->sc_creation_callback = sc_cr_cb;
1224 }
1225 EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback);
1226
1227 bool vmbus_are_subchannels_present(struct vmbus_channel *primary)
1228 {
1229         bool ret;
1230
1231         ret = !list_empty(&primary->sc_list);
1232
1233         if (ret) {
1234                 /*
1235                  * Invoke the callback on sub-channel creation.
1236                  * This will present a uniform interface to the
1237                  * clients.
1238                  */
1239                 invoke_sc_cb(primary);
1240         }
1241
1242         return ret;
1243 }
1244 EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present);
1245
1246 void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel,
1247                 void (*chn_rescind_cb)(struct vmbus_channel *))
1248 {
1249         channel->chn_rescind_callback = chn_rescind_cb;
1250 }
1251 EXPORT_SYMBOL_GPL(vmbus_set_chn_rescind_callback);