usr/src/uts/common/io/ib/clients/eoib/eib_svc.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21
  22 /*
  23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25
  26 #include <sys/types.h>
  27 #include <sys/kmem.h>
  28 #include <sys/conf.h>
  29 #include <sys/ddi.h>
  30 #include <sys/sunddi.h>
  31 #include <sys/ksynch.h>
  32 #include <sys/callb.h>
  33 #include <sys/mac_provider.h>
  34
  35 #include <sys/ib/clients/eoib/eib_impl.h>
  36
  37 /*
  38  * Thread to handle EoIB events asynchronously
  39  */
  40 void
  41 eib_events_handler(eib_t *ss)
  42 {
  43         eib_event_t *evi;
  44         eib_event_t *nxt;
  45         kmutex_t ci_lock;
  46         callb_cpr_t ci;
  47
  48         mutex_init(&ci_lock, NULL, MUTEX_DRIVER, NULL);
  49         CALLB_CPR_INIT(&ci, &ci_lock, callb_generic_cpr, EIB_EVENTS_HDLR);
  50
  51 wait_for_event:
  52         mutex_enter(&ss->ei_ev_lock);
  53         while ((evi = ss->ei_event) == NULL) {
  54                 mutex_enter(&ci_lock);
  55                 CALLB_CPR_SAFE_BEGIN(&ci);
  56                 mutex_exit(&ci_lock);
  57
  58                 cv_wait(&ss->ei_ev_cv, &ss->ei_ev_lock);
  59
  60                 mutex_enter(&ci_lock);
  61                 CALLB_CPR_SAFE_END(&ci, &ci_lock);
  62                 mutex_exit(&ci_lock);
  63         }
  64
  65         /*
  66          * Are we being asked to die ?
  67          */
  68         if (evi->ev_code == EIB_EV_SHUTDOWN) {
  69                 while (evi) {
  70                         nxt = evi->ev_next;
  71                         kmem_free(evi, sizeof (eib_event_t));
  72                         evi = nxt;
  73                 }
  74                 ss->ei_event = NULL;
  75                 mutex_exit(&ss->ei_ev_lock);
  76
  77                 mutex_enter(&ci_lock);
  78                 CALLB_CPR_EXIT(&ci);
  79                 mutex_destroy(&ci_lock);
  80
  81                 return;
  82         }
  83
  84         /*
  85          * Otherwise, pull out the first entry from our work queue
  86          */
  87         ss->ei_event = evi->ev_next;
  88         evi->ev_next = NULL;
  89
  90         mutex_exit(&ss->ei_ev_lock);
  91
  92         /*
  93          * Process this event
  94          *
  95          * Note that we don't want to race with plumb/unplumb in this
  96          * handler, since we may have to restart vnics or do stuff that
  97          * may get re-initialized or released if we allowed plumb/unplumb
  98          * to happen in parallel.
  99          */
 100         eib_mac_set_nic_state(ss, EIB_NIC_RESTARTING);
 101
 102         switch (evi->ev_code) {
 103         case EIB_EV_PORT_DOWN:
 104                 EIB_DPRINTF_DEBUG(ss->ei_instance,
 105                     "eib_events_handler: Begin EIB_EV_PORT_DOWN");
 106
 107                 eib_mac_link_down(ss, B_FALSE);
 108
 109                 EIB_DPRINTF_DEBUG(ss->ei_instance,
 110                     "eib_events_handler: End EIB_EV_PORT_DOWN");
 111                 break;
 112
 113         case EIB_EV_PORT_UP:
 114                 EIB_DPRINTF_DEBUG(ss->ei_instance,
 115                     "eib_events_handler: Begin EIB_EV_PORT_UP");
 116
 117                 eib_ibt_link_mod(ss);
 118
 119                 EIB_DPRINTF_DEBUG(ss->ei_instance,
 120                     "eib_events_handler: End EIB_EV_PORT_UP");
 121                 break;
 122
 123         case EIB_EV_PKEY_CHANGE:
 124                 EIB_DPRINTF_DEBUG(ss->ei_instance,
 125                     "eib_events_handler: Begin EIB_EV_PKEY_CHANGE");
 126
 127                 eib_ibt_link_mod(ss);
 128
 129                 EIB_DPRINTF_DEBUG(ss->ei_instance,
 130                     "eib_events_handler: End EIB_EV_PKEY_CHANGE");
 131                 break;
 132
 133         case EIB_EV_SGID_CHANGE:
 134                 EIB_DPRINTF_DEBUG(ss->ei_instance,
 135                     "eib_events_handler: Begin EIB_EV_SGID_CHANGE");
 136
 137                 eib_ibt_link_mod(ss);
 138
 139                 EIB_DPRINTF_DEBUG(ss->ei_instance,
 140                     "eib_events_handler: End EIB_EV_SGID_CHANGE");
 141                 break;
 142
 143         case EIB_EV_CLNT_REREG:
 144                 EIB_DPRINTF_DEBUG(ss->ei_instance,
 145                     "eib_events_handler: Begin EIB_EV_CLNT_REREG");
 146
 147                 eib_ibt_link_mod(ss);
 148
 149                 EIB_DPRINTF_DEBUG(ss->ei_instance,
 150                     "eib_events_handler: End EIB_EV_CLNT_REREG");
 151                 break;
 152
 153         case EIB_EV_GW_UP:
 154                 EIB_DPRINTF_DEBUG(ss->ei_instance,
 155                     "eib_events_handler: Begin EIB_EV_GW_UP");
 156
 157                 /*
 158                  * EoIB nexus has notified us that our gateway is now
 159                  * reachable. Unless we already think it is reachable,
 160                  * mark it so in our records and try to resurrect dead
 161                  * vnics.
 162                  */
 163                 mutex_enter(&ss->ei_vnic_lock);
 164                 if (ss->ei_gw_unreachable == B_FALSE) {
 165                         EIB_DPRINTF_DEBUG(ss->ei_instance,
 166                             "eib_events_handler: gw reachable");
 167                         mutex_exit(&ss->ei_vnic_lock);
 168
 169                         EIB_DPRINTF_DEBUG(ss->ei_instance,
 170                             "eib_events_handler: End EIB_EV_GW_UP");
 171                         break;
 172                 }
 173                 ss->ei_gw_unreachable = B_FALSE;
 174                 mutex_exit(&ss->ei_vnic_lock);
 175
 176                 /*
 177                  * If we've not even started yet, we have nothing to do.
 178                  */
 179                 if ((ss->ei_node_state->ns_nic_state & EIB_NIC_STARTED) == 0) {
 180                         EIB_DPRINTF_DEBUG(ss->ei_instance,
 181                             "eib_events_handler: End EIB_EV_GW_UP");
 182                         break;
 183                 }
 184
 185                 if (eib_mac_hca_portstate(ss, NULL, NULL) != EIB_E_SUCCESS) {
 186                         EIB_DPRINTF_DEBUG(ss->ei_instance,
 187                             "eib_events_handler: "
 188                             "HCA portstate failed, marking link down");
 189
 190                         eib_mac_link_down(ss, B_FALSE);
 191                 } else {
 192                         uint8_t vn0_mac[ETHERADDRL];
 193
 194                         EIB_DPRINTF_DEBUG(ss->ei_instance,
 195                             "eib_events_handler: "
 196                             "HCA portstate ok, resurrecting zombies");
 197
 198                         bcopy(eib_zero_mac, vn0_mac, ETHERADDRL);
 199                         eib_vnic_resurrect_zombies(ss, vn0_mac);
 200
 201                         /*
 202                          * If we've resurrected the zombies because the gateway
 203                          * went down and came back, it is possible our unicast
 204                          * mac address changed from what it was earlier. If
 205                          * so, we need to update our unicast address with the
 206                          * mac layer before marking the link up.
 207                          */
 208                         if (bcmp(vn0_mac, eib_zero_mac, ETHERADDRL) != 0) {
 209                                 EIB_DPRINTF_DEBUG(ss->ei_instance,
 210                                     "eib_events_handler: updating unicast "
 211                                     "addr to %x:%x:%x:%x:%x:%x", vn0_mac[0],
 212                                     vn0_mac[1], vn0_mac[2], vn0_mac[3],
 213                                     vn0_mac[4], vn0_mac[5]);
 214
 215                                 mac_unicst_update(ss->ei_mac_hdl, vn0_mac);
 216                         }
 217
 218                         EIB_DPRINTF_DEBUG(ss->ei_instance,
 219                             "eib_events_handler: eib_mac_link_up(B_FALSE)");
 220
 221                         eib_mac_link_up(ss, B_FALSE);
 222                 }
 223
 224                 EIB_DPRINTF_DEBUG(ss->ei_instance,
 225                     "eib_events_handler: End EIB_EV_GW_UP");
 226                 break;
 227
 228         case EIB_EV_GW_INFO_UPDATE:
 229                 EIB_DPRINTF_DEBUG(ss->ei_instance,
 230                     "eib_events_handler: Begin EIB_EV_GW_INFO_UPDATE");
 231
 232                 if (evi->ev_arg) {
 233                         eib_update_props(ss, (eib_gw_info_t *)(evi->ev_arg));
 234                         kmem_free(evi->ev_arg, sizeof (eib_gw_info_t));
 235                 }
 236
 237                 EIB_DPRINTF_DEBUG(ss->ei_instance,
 238                     "eib_events_handler: End EIB_EV_GW_INFO_UPDATE");
 239                 break;
 240
 241         case EIB_EV_MCG_DELETED:
 242                 EIB_DPRINTF_DEBUG(ss->ei_instance,
 243                     "eib_events_handler: Begin-End EIB_EV_MCG_DELETED");
 244                 break;
 245
 246         case EIB_EV_MCG_CREATED:
 247                 EIB_DPRINTF_DEBUG(ss->ei_instance,
 248                     "eib_events_handler: Begin-End EIB_EV_MCG_CREATED");
 249                 break;
 250
 251         case EIB_EV_GW_EPORT_DOWN:
 252                 EIB_DPRINTF_DEBUG(ss->ei_instance,
 253                     "eib_events_handler: Begin-End EIB_EV_GW_EPORT_DOWN");
 254                 break;
 255
 256         case EIB_EV_GW_DOWN:
 257                 EIB_DPRINTF_DEBUG(ss->ei_instance,
 258                     "eib_events_handler: Begin-End EIB_EV_GW_DOWN");
 259                 break;
 260         }
 261
 262         eib_mac_clr_nic_state(ss, EIB_NIC_RESTARTING);
 263
 264         kmem_free(evi, sizeof (eib_event_t));
 265         goto wait_for_event;
 266
 267         /*NOTREACHED*/
 268 }
 269
 270 void
 271 eib_svc_enqueue_event(eib_t *ss, eib_event_t *evi)
 272 {
 273         eib_event_t *elem = NULL;
 274         eib_event_t *tail = NULL;
 275
 276         mutex_enter(&ss->ei_ev_lock);
 277
 278         /*
 279          * Notice to shutdown has a higher priority than the
 280          * rest and goes to the head of the list. Everything
 281          * else goes at the end.
 282          */
 283         if (evi->ev_code == EIB_EV_SHUTDOWN) {
 284                 evi->ev_next = ss->ei_event;
 285                 ss->ei_event = evi;
 286         } else {
 287                 for (elem = ss->ei_event; elem; elem = elem->ev_next)
 288                         tail = elem;
 289
 290                 if (tail)
 291                         tail->ev_next = evi;
 292                 else
 293                         ss->ei_event = evi;
 294         }
 295
 296         cv_signal(&ss->ei_ev_cv);
 297         mutex_exit(&ss->ei_ev_lock);
 298 }
 299
 300 /*
 301  * Thread to refill channels with rwqes whenever they get low.
 302  */
 303 void
 304 eib_refill_rwqes(eib_t *ss)
 305 {
 306         eib_chan_t *chan;
 307         kmutex_t ci_lock;
 308         callb_cpr_t ci;
 309
 310         mutex_init(&ci_lock, NULL, MUTEX_DRIVER, NULL);
 311         CALLB_CPR_INIT(&ci, &ci_lock, callb_generic_cpr, EIB_RWQES_REFILLER);
 312
 313 wait_for_refill_work:
 314         mutex_enter(&ss->ei_rxpost_lock);
 315
 316         while ((ss->ei_rxpost == NULL) && (ss->ei_rxpost_die == 0)) {
 317                 mutex_enter(&ci_lock);
 318                 CALLB_CPR_SAFE_BEGIN(&ci);
 319                 mutex_exit(&ci_lock);
 320
 321                 cv_wait(&ss->ei_rxpost_cv, &ss->ei_rxpost_lock);
 322
 323                 mutex_enter(&ci_lock);
 324                 CALLB_CPR_SAFE_END(&ci, &ci_lock);
 325                 mutex_exit(&ci_lock);
 326         }
 327
 328         /*
 329          * Discard all requests for refill if we're being asked to die
 330          */
 331         if (ss->ei_rxpost_die) {
 332                 ss->ei_rxpost = NULL;
 333                 mutex_exit(&ss->ei_rxpost_lock);
 334
 335                 mutex_enter(&ci_lock);
 336                 CALLB_CPR_EXIT(&ci);
 337                 mutex_destroy(&ci_lock);
 338
 339                 return;
 340         }
 341         ASSERT(ss->ei_rxpost != NULL);
 342
 343         /*
 344          * Take the first element out of the queue
 345          */
 346         chan = ss->ei_rxpost;
 347         ss->ei_rxpost = chan->ch_rxpost_next;
 348         chan->ch_rxpost_next = NULL;
 349
 350         mutex_exit(&ss->ei_rxpost_lock);
 351
 352         /*
 353          * Try to post a bunch of recv wqes into this channel. If we
 354          * fail, it means that we haven't even been able to post a
 355          * single recv wqe.  This is alarming, but there's nothing
 356          * we can do. We just move on to the next channel needing
 357          * our service.
 358          */
 359         if (eib_chan_post_rx(ss, chan, NULL) != EIB_E_SUCCESS) {
 360                 EIB_DPRINTF_ERR(ss->ei_instance,
 361                     "eib_refill_rwqes: eib_chan_post_rx() failed");
 362         }
 363
 364         /*
 365          * Mark it to indicate that the refilling is done
 366          */
 367         mutex_enter(&chan->ch_rx_lock);
 368         chan->ch_rx_refilling = B_FALSE;
 369         mutex_exit(&chan->ch_rx_lock);
 370
 371         goto wait_for_refill_work;
 372
 373         /*NOTREACHED*/
 374 }
 375
 376 /*
 377  * Thread to create or restart vnics when required
 378  */
 379 void
 380 eib_vnic_creator(eib_t *ss)
 381 {
 382         eib_vnic_req_t *vrq;
 383         eib_vnic_req_t *elem;
 384         eib_vnic_req_t *nxt;
 385         kmutex_t ci_lock;
 386         callb_cpr_t ci;
 387         uint_t vr_req;
 388         uint8_t *vr_mac;
 389         int ret;
 390         int err;
 391
 392         mutex_init(&ci_lock, NULL, MUTEX_DRIVER, NULL);
 393         CALLB_CPR_INIT(&ci, &ci_lock, callb_generic_cpr, EIB_VNIC_CREATOR);
 394
 395 wait_for_vnic_req:
 396         mutex_enter(&ss->ei_vnic_req_lock);
 397
 398         while ((vrq = ss->ei_vnic_req) == NULL) {
 399                 mutex_enter(&ci_lock);
 400                 CALLB_CPR_SAFE_BEGIN(&ci);
 401                 mutex_exit(&ci_lock);
 402
 403                 cv_wait(&ss->ei_vnic_req_cv, &ss->ei_vnic_req_lock);
 404
 405                 mutex_enter(&ci_lock);
 406                 CALLB_CPR_SAFE_END(&ci, &ci_lock);
 407                 mutex_exit(&ci_lock);
 408         }
 409
 410         /*
 411          * Pull out the first request
 412          */
 413         ss->ei_vnic_req = vrq->vr_next;
 414         vrq->vr_next = NULL;
 415
 416         vr_req = vrq->vr_req;
 417         vr_mac = vrq->vr_mac;
 418
 419         switch (vr_req) {
 420         case EIB_CR_REQ_DIE:
 421         case EIB_CR_REQ_FLUSH:
 422                 /*
 423                  * Cleanup all pending reqs and failed reqs
 424                  */
 425                 for (elem = ss->ei_vnic_req; elem; elem = nxt) {
 426                         nxt = elem->vr_next;
 427                         kmem_free(elem, sizeof (eib_vnic_req_t));
 428                 }
 429                 for (elem = ss->ei_failed_vnic_req; elem; elem = nxt) {
 430                         nxt = elem->vr_next;
 431                         kmem_free(elem, sizeof (eib_vnic_req_t));
 432                 }
 433                 ss->ei_vnic_req = NULL;
 434                 ss->ei_failed_vnic_req = NULL;
 435                 ss->ei_pending_vnic_req = NULL;
 436                 mutex_exit(&ss->ei_vnic_req_lock);
 437
 438                 break;
 439
 440         case EIB_CR_REQ_NEW_VNIC:
 441                 ss->ei_pending_vnic_req = vrq;
 442                 mutex_exit(&ss->ei_vnic_req_lock);
 443
 444                 EIB_DPRINTF_DEBUG(ss->ei_instance, "eib_vnic_creator: "
 445                     "new vnic creation request for %x:%x:%x:%x:%x:%x, 0x%x",
 446                     vr_mac[0], vr_mac[1], vr_mac[2], vr_mac[3], vr_mac[4],
 447                     vr_mac[5], vrq->vr_vlan);
 448
 449                 /*
 450                  * Make sure we don't race with the plumb/unplumb code.  If
 451                  * the eoib instance has been unplumbed already, we ignore any
 452                  * creation requests that may have been pending.
 453                  */
 454                 eib_mac_set_nic_state(ss, EIB_NIC_STARTING);
 455
 456                 if ((ss->ei_node_state->ns_nic_state & EIB_NIC_STARTED) !=
 457                     EIB_NIC_STARTED) {
 458                         mutex_enter(&ss->ei_vnic_req_lock);
 459                         ss->ei_pending_vnic_req = NULL;
 460                         mutex_exit(&ss->ei_vnic_req_lock);
 461                         eib_mac_clr_nic_state(ss, EIB_NIC_STARTING);
 462                         break;
 463                 }
 464
 465                 /*
 466                  * Try to create a new vnic with the supplied parameters.
 467                  */
 468                 err = 0;
 469                 if ((ret = eib_vnic_create(ss, vrq->vr_mac, vrq->vr_vlan,
 470                     NULL, &err)) != EIB_E_SUCCESS) {
 471                         EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_creator: "
 472                             "eib_vnic_create(mac=%x:%x:%x:%x:%x:%x, vlan=0x%x) "
 473                             "failed, ret=%d", vr_mac[0], vr_mac[1], vr_mac[2],
 474                             vr_mac[3], vr_mac[4], vr_mac[5], vrq->vr_vlan, err);
 475                 }
 476
 477                 /*
 478                  * If we failed, add this vnic req to our failed list (unless
 479                  * it already exists there), so we won't try to create this
 480                  * vnic again.  Whether we fail or succeed, we're done with
 481                  * processing this req, so clear the pending req.
 482                  */
 483                 mutex_enter(&ss->ei_vnic_req_lock);
 484                 if ((ret != EIB_E_SUCCESS) && (err != EEXIST)) {
 485                         vrq->vr_next = ss->ei_failed_vnic_req;
 486                         ss->ei_failed_vnic_req = vrq;
 487                         vrq = NULL;
 488                 }
 489                 ss->ei_pending_vnic_req = NULL;
 490                 mutex_exit(&ss->ei_vnic_req_lock);
 491
 492                 /*
 493                  * Notify the mac layer that it should retry its tx again. If we
 494                  * had created the vnic successfully, we'll be able to send the
 495                  * packets; if we had not been successful, we'll drop packets on
 496                  * this vnic.
 497                  */
 498                 EIB_DPRINTF_DEBUG(ss->ei_instance,
 499                     "eib_vnic_creator: calling mac_tx_update()");
 500                 mac_tx_update(ss->ei_mac_hdl);
 501
 502                 eib_mac_clr_nic_state(ss, EIB_NIC_STARTING);
 503                 break;
 504
 505         default:
 506                 EIB_DPRINTF_DEBUG(ss->ei_instance, "eib_vnic_creator: "
 507                     "unknown request 0x%lx, ignoring", vrq->vr_req);
 508                 break;
 509         }
 510
 511         /*
 512          * Free the current req and quit if we have to
 513          */
 514         if (vrq) {
 515                 kmem_free(vrq, sizeof (eib_vnic_req_t));
 516         }
 517
 518         if (vr_req == EIB_CR_REQ_DIE) {
 519                 mutex_enter(&ci_lock);
 520                 CALLB_CPR_EXIT(&ci);
 521                 mutex_destroy(&ci_lock);
 522
 523                 return;
 524         }
 525
 526         goto wait_for_vnic_req;
 527         /*NOTREACHED*/
 528 }
 529
 530 /*
 531  * Thread to monitor tx wqes and update the mac layer when needed.
 532  * Note that this thread can only be started after the tx wqe pool
 533  * has been allocated and initialized.
 534  */
 535 void
 536 eib_monitor_tx_wqes(eib_t *ss)
 537 {
 538         eib_wqe_pool_t *wp = ss->ei_tx;
 539         kmutex_t ci_lock;
 540         callb_cpr_t ci;
 541
 542         mutex_init(&ci_lock, NULL, MUTEX_DRIVER, NULL);
 543         CALLB_CPR_INIT(&ci, &ci_lock, callb_generic_cpr, EIB_TXWQES_MONITOR);
 544
 545         ASSERT(wp != NULL);
 546
 547 monitor_wqe_status:
 548         mutex_enter(&wp->wp_lock);
 549
 550         /*
 551          * Wait till someone falls short of wqes
 552          */
 553         while (wp->wp_status == 0) {
 554                 mutex_enter(&ci_lock);
 555                 CALLB_CPR_SAFE_BEGIN(&ci);
 556                 mutex_exit(&ci_lock);
 557
 558                 cv_wait(&wp->wp_cv, &wp->wp_lock);
 559
 560                 mutex_enter(&ci_lock);
 561                 CALLB_CPR_SAFE_END(&ci, &ci_lock);
 562                 mutex_exit(&ci_lock);
 563         }
 564
 565         /*
 566          * Have we been asked to die ?
 567          */
 568         if (wp->wp_status & EIB_TXWQE_MONITOR_DIE) {
 569                 mutex_exit(&wp->wp_lock);
 570
 571                 mutex_enter(&ci_lock);
 572                 CALLB_CPR_EXIT(&ci);
 573                 mutex_destroy(&ci_lock);
 574
 575                 return;
 576         }
 577
 578         ASSERT((wp->wp_status & EIB_TXWQE_SHORT) != 0);
 579
 580         /*
 581          * Start monitoring free wqes till they cross min threshold
 582          */
 583         while ((wp->wp_nfree < EIB_NFREE_SWQES_HWM) &&
 584             ((wp->wp_status & EIB_TXWQE_MONITOR_DIE) == 0)) {
 585
 586                 mutex_enter(&ci_lock);
 587                 CALLB_CPR_SAFE_BEGIN(&ci);
 588                 mutex_exit(&ci_lock);
 589
 590                 cv_wait(&wp->wp_cv, &wp->wp_lock);
 591
 592                 mutex_enter(&ci_lock);
 593                 CALLB_CPR_SAFE_END(&ci, &ci_lock);
 594                 mutex_exit(&ci_lock);
 595         }
 596
 597         /*
 598          * Have we been asked to die ?
 599          */
 600         if (wp->wp_status & EIB_TXWQE_MONITOR_DIE) {
 601                 mutex_exit(&wp->wp_lock);
 602
 603                 mutex_enter(&ci_lock);
 604                 CALLB_CPR_EXIT(&ci);
 605                 mutex_destroy(&ci_lock);
 606
 607                 return;
 608         }
 609
 610         ASSERT(wp->wp_nfree >= EIB_NFREE_SWQES_HWM);
 611         wp->wp_status &= (~EIB_TXWQE_SHORT);
 612
 613         mutex_exit(&wp->wp_lock);
 614
 615         /*
 616          * Inform the mac layer that tx resources are now available
 617          * and go back to monitoring
 618          */
 619         if (ss->ei_mac_hdl) {
 620                 mac_tx_update(ss->ei_mac_hdl);
 621         }
 622         goto monitor_wqe_status;
 623
 624         /*NOTREACHED*/
 625 }
 626
 627 /*
 628  * Thread to monitor lso bufs and update the mac layer as needed.
 629  * Note that this thread can only be started after the lso buckets
 630  * have been allocated and initialized.
 631  */
 632 void
 633 eib_monitor_lso_bufs(eib_t *ss)
 634 {
 635         eib_lsobkt_t *bkt = ss->ei_lso;
 636         kmutex_t ci_lock;
 637         callb_cpr_t ci;
 638
 639         mutex_init(&ci_lock, NULL, MUTEX_DRIVER, NULL);
 640         CALLB_CPR_INIT(&ci, &ci_lock, callb_generic_cpr, EIB_LSOBUFS_MONITOR);
 641
 642         ASSERT(bkt != NULL);
 643
 644 monitor_lso_status:
 645         mutex_enter(&bkt->bk_lock);
 646
 647         /*
 648          * Wait till someone falls short of LSO buffers or we're asked
 649          * to die
 650          */
 651         while (bkt->bk_status == 0) {
 652                 mutex_enter(&ci_lock);
 653                 CALLB_CPR_SAFE_BEGIN(&ci);
 654                 mutex_exit(&ci_lock);
 655
 656                 cv_wait(&bkt->bk_cv, &bkt->bk_lock);
 657
 658                 mutex_enter(&ci_lock);
 659                 CALLB_CPR_SAFE_END(&ci, &ci_lock);
 660                 mutex_exit(&ci_lock);
 661         }
 662
 663         if (bkt->bk_status & EIB_LBUF_MONITOR_DIE) {
 664                 mutex_exit(&bkt->bk_lock);
 665
 666                 mutex_enter(&ci_lock);
 667                 CALLB_CPR_EXIT(&ci);
 668                 mutex_destroy(&ci_lock);
 669
 670                 return;
 671         }
 672
 673         ASSERT((bkt->bk_status & EIB_LBUF_SHORT) != 0);
 674
 675         /*
 676          * Start monitoring free LSO buffers till there are enough
 677          * free buffers available
 678          */
 679         while ((bkt->bk_nfree < EIB_LSO_FREE_BUFS_THRESH) &&
 680             ((bkt->bk_status & EIB_LBUF_MONITOR_DIE) == 0)) {
 681
 682                 mutex_enter(&ci_lock);
 683                 CALLB_CPR_SAFE_BEGIN(&ci);
 684                 mutex_exit(&ci_lock);
 685
 686                 cv_wait(&bkt->bk_cv, &bkt->bk_lock);
 687
 688                 mutex_enter(&ci_lock);
 689                 CALLB_CPR_SAFE_END(&ci, &ci_lock);
 690                 mutex_exit(&ci_lock);
 691         }
 692
 693         if (bkt->bk_status & EIB_LBUF_MONITOR_DIE) {
 694                 mutex_exit(&bkt->bk_lock);
 695
 696                 mutex_enter(&ci_lock);
 697                 CALLB_CPR_EXIT(&ci);
 698                 mutex_destroy(&ci_lock);
 699
 700                 return;
 701         }
 702
 703         /*
 704          * We have enough lso buffers available now
 705          */
 706         ASSERT(bkt->bk_nfree >= EIB_LSO_FREE_BUFS_THRESH);
 707         bkt->bk_status &= (~EIB_LBUF_SHORT);
 708
 709         mutex_exit(&bkt->bk_lock);
 710
 711         /*
 712          * Inform the mac layer that tx lso resources are now available
 713          * and go back to monitoring
 714          */
 715         if (ss->ei_mac_hdl) {
 716                 mac_tx_update(ss->ei_mac_hdl);
 717         }
 718         goto monitor_lso_status;
 719
 720         /*NOTREACHED*/
 721 }
 722
 723 /*
 724  * Thread to manage the keepalive requirements for vnics and the gateway.
 725  */
 726 void
 727 eib_manage_keepalives(eib_t *ss)
 728 {
 729         eib_ka_vnics_t *elem;
 730         eib_ka_vnics_t *nxt;
 731         clock_t deadline;
 732         int64_t lbolt64;
 733         int err;
 734         kmutex_t ci_lock;
 735         callb_cpr_t ci;
 736
 737         mutex_init(&ci_lock, NULL, MUTEX_DRIVER, NULL);
 738         CALLB_CPR_INIT(&ci, &ci_lock, callb_generic_cpr, EIB_EVENTS_HDLR);
 739
 740         mutex_enter(&ss->ei_ka_vnics_lock);
 741
 742 periodic_keepalive:
 743         deadline = ddi_get_lbolt() + ss->ei_gw_props->pp_vnic_ka_ticks;
 744
 745         while ((ss->ei_ka_vnics_event &
 746             (EIB_KA_VNICS_DIE | EIB_KA_VNICS_TIMED_OUT)) == 0) {
 747                 mutex_enter(&ci_lock);
 748                 CALLB_CPR_SAFE_BEGIN(&ci);
 749                 mutex_exit(&ci_lock);
 750
 751                 if (cv_timedwait(&ss->ei_ka_vnics_cv, &ss->ei_ka_vnics_lock,
 752                     deadline) == -1) {
 753                         ss->ei_ka_vnics_event |= EIB_KA_VNICS_TIMED_OUT;
 754                 }
 755
 756                 mutex_enter(&ci_lock);
 757                 CALLB_CPR_SAFE_END(&ci, &ci_lock);
 758                 mutex_exit(&ci_lock);
 759         }
 760
 761         if (ss->ei_ka_vnics_event & EIB_KA_VNICS_DIE) {
 762                 for (elem = ss->ei_ka_vnics; elem; elem = nxt) {
 763                         nxt = elem->ka_next;
 764                         kmem_free(elem, sizeof (eib_ka_vnics_t));
 765                 }
 766                 ss->ei_ka_vnics = NULL;
 767                 mutex_exit(&ss->ei_ka_vnics_lock);
 768
 769                 mutex_enter(&ci_lock);
 770                 CALLB_CPR_EXIT(&ci);
 771                 mutex_destroy(&ci_lock);
 772
 773                 return;
 774         }
 775
 776         /*
 777          * Are there any vnics that need keepalive management ?
 778          */
 779         ss->ei_ka_vnics_event &= ~EIB_KA_VNICS_TIMED_OUT;
 780         if (ss->ei_ka_vnics == NULL)
 781                 goto periodic_keepalive;
 782
 783         /*
 784          * Ok, we need to send vnic keepalives to our gateway. But first
 785          * check if the gateway heartbeat is good as of this moment.  Note
 786          * that we need do get the lbolt value after acquiring ei_vnic_lock
 787          * to ensure that ei_gw_last_heartbeat does not change before the
 788          * comparison (to avoid a negative value in the comparison result
 789          * causing us to incorrectly assume that the gateway heartbeat has
 790          * stopped).
 791          */
 792         mutex_enter(&ss->ei_vnic_lock);
 793
 794         lbolt64 = ddi_get_lbolt64();
 795
 796         if (ss->ei_gw_last_heartbeat != 0) {
 797                 if ((lbolt64 - ss->ei_gw_last_heartbeat) >
 798                     ss->ei_gw_props->pp_gw_ka_ticks) {
 799
 800                         EIB_DPRINTF_WARN(ss->ei_instance,
 801                             "eib_manage_keepalives: no keepalives from gateway "
 802                             "0x%x for hca_guid=0x%llx, port=0x%x, "
 803                             "last_gw_ka=0x%llx", ss->ei_gw_props->pp_gw_portid,
 804                             ss->ei_props->ep_hca_guid,
 805                             ss->ei_props->ep_port_num,
 806                             ss->ei_gw_last_heartbeat);
 807
 808                         for (elem = ss->ei_ka_vnics; elem; elem = nxt) {
 809                                 nxt = elem->ka_next;
 810                                 ss->ei_zombie_vnics |=
 811                                     ((uint64_t)1 << elem->ka_vnic->vn_instance);
 812                                 kmem_free(elem, sizeof (eib_ka_vnics_t));
 813                         }
 814                         ss->ei_ka_vnics = NULL;
 815                         ss->ei_gw_unreachable = B_TRUE;
 816                         mutex_exit(&ss->ei_vnic_lock);
 817
 818                         eib_mac_link_down(ss, B_FALSE);
 819
 820                         goto periodic_keepalive;
 821                 }
 822         }
 823         mutex_exit(&ss->ei_vnic_lock);
 824
 825         for (elem = ss->ei_ka_vnics; elem; elem = elem->ka_next)
 826                 (void) eib_fip_heartbeat(ss, elem->ka_vnic, &err);
 827
 828         goto periodic_keepalive;
 829         /*NOTREACHED*/
 830 }
 831
 832 void
 833 eib_stop_events_handler(eib_t *ss)
 834 {
 835         eib_event_t *evi;
 836
 837         evi = kmem_zalloc(sizeof (eib_event_t), KM_SLEEP);
 838         evi->ev_code = EIB_EV_SHUTDOWN;
 839         evi->ev_arg = NULL;
 840
 841         eib_svc_enqueue_event(ss, evi);
 842
 843         thread_join(ss->ei_events_handler);
 844 }
 845
 846 void
 847 eib_stop_refill_rwqes(eib_t *ss)
 848 {
 849         mutex_enter(&ss->ei_rxpost_lock);
 850
 851         ss->ei_rxpost_die = 1;
 852
 853         cv_signal(&ss->ei_rxpost_cv);
 854         mutex_exit(&ss->ei_rxpost_lock);
 855
 856         thread_join(ss->ei_rwqes_refiller);
 857 }
 858
 859 void
 860 eib_stop_vnic_creator(eib_t *ss)
 861 {
 862         eib_vnic_req_t *vrq;
 863
 864         vrq = kmem_zalloc(sizeof (eib_vnic_req_t), KM_SLEEP);
 865         vrq->vr_req = EIB_CR_REQ_DIE;
 866         vrq->vr_next = NULL;
 867
 868         eib_vnic_enqueue_req(ss, vrq);
 869
 870         thread_join(ss->ei_vnic_creator);
 871 }
 872
 873 void
 874 eib_stop_monitor_tx_wqes(eib_t *ss)
 875 {
 876         eib_wqe_pool_t *wp = ss->ei_tx;
 877
 878         mutex_enter(&wp->wp_lock);
 879
 880         wp->wp_status |= EIB_TXWQE_MONITOR_DIE;
 881
 882         cv_signal(&wp->wp_cv);
 883         mutex_exit(&wp->wp_lock);
 884
 885         thread_join(ss->ei_txwqe_monitor);
 886 }
 887
 888 int
 889 eib_stop_monitor_lso_bufs(eib_t *ss, boolean_t force)
 890 {
 891         eib_lsobkt_t *bkt = ss->ei_lso;
 892
 893         mutex_enter(&bkt->bk_lock);
 894
 895         /*
 896          * If there are some buffers still not reaped and the force
 897          * flag is not set, return without doing anything. Otherwise,
 898          * stop the lso bufs monitor and wait for it to die.
 899          */
 900         if ((bkt->bk_nelem != bkt->bk_nfree) && (force == B_FALSE)) {
 901                 mutex_exit(&bkt->bk_lock);
 902                 return (EIB_E_FAILURE);
 903         }
 904
 905         bkt->bk_status |= EIB_LBUF_MONITOR_DIE;
 906
 907         cv_signal(&bkt->bk_cv);
 908         mutex_exit(&bkt->bk_lock);
 909
 910         thread_join(ss->ei_lsobufs_monitor);
 911         return (EIB_E_SUCCESS);
 912 }
 913
 914 void
 915 eib_stop_manage_keepalives(eib_t *ss)
 916 {
 917         mutex_enter(&ss->ei_ka_vnics_lock);
 918
 919         ss->ei_ka_vnics_event |= EIB_KA_VNICS_DIE;
 920
 921         cv_signal(&ss->ei_ka_vnics_cv);
 922         mutex_exit(&ss->ei_ka_vnics_lock);
 923
 924         thread_join(ss->ei_keepalives_manager);
 925 }
 926
 927 void
 928 eib_flush_vnic_reqs(eib_t *ss)
 929 {
 930         eib_vnic_req_t *vrq;
 931
 932         vrq = kmem_zalloc(sizeof (eib_vnic_req_t), KM_SLEEP);
 933         vrq->vr_req = EIB_CR_REQ_FLUSH;
 934         vrq->vr_next = NULL;
 935
 936         eib_vnic_enqueue_req(ss, vrq);
 937 }
 938
 939 /*ARGSUSED*/
 940 void
 941 eib_gw_alive_cb(dev_info_t *dip, ddi_eventcookie_t cookie, void *arg,
 942     void *impl_data)
 943 {
 944         eib_t *ss = (eib_t *)arg;
 945         eib_event_t *evi;
 946
 947         evi = kmem_zalloc(sizeof (eib_event_t), KM_NOSLEEP);
 948         if (evi == NULL) {
 949                 EIB_DPRINTF_WARN(ss->ei_instance, "eib_gw_alive_cb: "
 950                     "no memory, ignoring this gateway alive event");
 951         } else {
 952                 evi->ev_code = EIB_EV_GW_UP;
 953                 evi->ev_arg = NULL;
 954                 eib_svc_enqueue_event(ss, evi);
 955         }
 956 }
 957
 958 /*ARGSUSED*/
 959 void
 960 eib_login_ack_cb(dev_info_t *dip, ddi_eventcookie_t cookie, void *arg,
 961     void *impl_data)
 962 {
 963         eib_t *ss = (eib_t *)arg;
 964         uint8_t *pkt = (uint8_t *)impl_data;
 965         eib_login_data_t ld;
 966
 967         /*
 968          * We have received a login ack message from the gateway via the EoIB
 969          * nexus (solicitation qpn).  The packet is passed to us raw (unparsed)
 970          * and we have to figure out if this is a vnic login ack.
 971          */
 972         if (eib_fip_parse_login_ack(ss, pkt + EIB_GRH_SZ, &ld) == EIB_E_SUCCESS)
 973                 eib_vnic_login_ack(ss, &ld);
 974 }
 975
 976 /*ARGSUSED*/
 977 void
 978 eib_gw_info_cb(dev_info_t *dip, ddi_eventcookie_t cookie, void *arg,
 979     void *impl_data)
 980 {
 981         eib_t *ss = (eib_t *)arg;
 982         eib_event_t *evi;
 983
 984         evi = kmem_zalloc(sizeof (eib_event_t), KM_NOSLEEP);
 985         if (evi == NULL) {
 986                 EIB_DPRINTF_WARN(ss->ei_instance, "eib_gw_info_cb: "
 987                     "no memory, ignoring this gateway props update event");
 988                 return;
 989         }
 990         evi->ev_arg = kmem_zalloc(sizeof (eib_gw_info_t), KM_NOSLEEP);
 991         if (evi->ev_arg == NULL) {
 992                 EIB_DPRINTF_WARN(ss->ei_instance, "eib_gw_info_cb: "
 993                     "no memory, ignoring this gateway props update event");
 994                 kmem_free(evi, sizeof (eib_event_t));
 995                 return;
 996         }
 997         bcopy(impl_data, evi->ev_arg, sizeof (eib_gw_info_t));
 998         evi->ev_code = EIB_EV_GW_INFO_UPDATE;
 999
1000         eib_svc_enqueue_event(ss, evi);
1001 }