drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c

   1 /*
   2  * This file is part of the Chelsio T4 PCI-E SR-IOV Virtual Function Ethernet
   3  * driver for Linux.
   4  *
   5  * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved.
   6  *
   7  * This software is available to you under a choice of one of two
   8  * licenses.  You may choose to be licensed under the terms of the GNU
   9  * General Public License (GPL) Version 2, available from the file
  10  * COPYING in the main directory of this source tree, or the
  11  * OpenIB.org BSD license below:
  12  *
  13  *     Redistribution and use in source and binary forms, with or
  14  *     without modification, are permitted provided that the following
  15  *     conditions are met:
  16  *
  17  *      - Redistributions of source code must retain the above
  18  *        copyright notice, this list of conditions and the following
  19  *        disclaimer.
  20  *
  21  *      - Redistributions in binary form must reproduce the above
  22  *        copyright notice, this list of conditions and the following
  23  *        disclaimer in the documentation and/or other materials
  24  *        provided with the distribution.
  25  *
  26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  33  * SOFTWARE.
  34  */
  35
  36 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  37
  38 #include <linux/module.h>
  39 #include <linux/moduleparam.h>
  40 #include <linux/init.h>
  41 #include <linux/pci.h>
  42 #include <linux/dma-mapping.h>
  43 #include <linux/netdevice.h>
  44 #include <linux/etherdevice.h>
  45 #include <linux/debugfs.h>
  46 #include <linux/ethtool.h>
  47 #include <linux/mdio.h>
  48
  49 #include "t4vf_common.h"
  50 #include "t4vf_defs.h"
  51
  52 #include "../cxgb4/t4_regs.h"
  53 #include "../cxgb4/t4_msg.h"
  54
  55 /*
  56  * Generic information about the driver.
  57  */
  58 #define DRV_VERSION "2.0.0-ko"
  59 #define DRV_DESC "Chelsio T4/T5/T6 Virtual Function (VF) Network Driver"
  60
  61 /*
  62  * Module Parameters.
  63  * ==================
  64  */
  65
  66 /*
  67  * Default ethtool "message level" for adapters.
  68  */
  69 #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
  70                          NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
  71                          NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
  72
  73 /*
  74  * The driver uses the best interrupt scheme available on a platform in the
  75  * order MSI-X then MSI.  This parameter determines which of these schemes the
  76  * driver may consider as follows:
  77  *
  78  *     msi = 2: choose from among MSI-X and MSI
  79  *     msi = 1: only consider MSI interrupts
  80  *
  81  * Note that unlike the Physical Function driver, this Virtual Function driver
  82  * does _not_ support legacy INTx interrupts (this limitation is mandated by
  83  * the PCI-E SR-IOV standard).
  84  */
  85 #define MSI_MSIX        2
  86 #define MSI_MSI         1
  87 #define MSI_DEFAULT     MSI_MSIX
  88
  89 static int msi = MSI_DEFAULT;
  90
  91 module_param(msi, int, 0644);
  92 MODULE_PARM_DESC(msi, "whether to use MSI-X or MSI");
  93
  94 /*
  95  * Fundamental constants.
  96  * ======================
  97  */
  98
  99 enum {
 100         MAX_TXQ_ENTRIES         = 16384,
 101         MAX_RSPQ_ENTRIES        = 16384,
 102         MAX_RX_BUFFERS          = 16384,
 103
 104         MIN_TXQ_ENTRIES         = 32,
 105         MIN_RSPQ_ENTRIES        = 128,
 106         MIN_FL_ENTRIES          = 16,
 107
 108         /*
 109          * For purposes of manipulating the Free List size we need to
 110          * recognize that Free Lists are actually Egress Queues (the host
 111          * produces free buffers which the hardware consumes), Egress Queues
 112          * indices are all in units of Egress Context Units bytes, and free
 113          * list entries are 64-bit PCI DMA addresses.  And since the state of
 114          * the Producer Index == the Consumer Index implies an EMPTY list, we
 115          * always have at least one Egress Unit's worth of Free List entries
 116          * unused.  See sge.c for more details ...
 117          */
 118         EQ_UNIT = SGE_EQ_IDXSIZE,
 119         FL_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64),
 120         MIN_FL_RESID = FL_PER_EQ_UNIT,
 121 };
 122
 123 /*
 124  * Global driver state.
 125  * ====================
 126  */
 127
 128 static struct dentry *cxgb4vf_debugfs_root;
 129
 130 /*
 131  * OS "Callback" functions.
 132  * ========================
 133  */
 134
 135 /*
 136  * The link status has changed on the indicated "port" (Virtual Interface).
 137  */
 138 void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok)
 139 {
 140         struct net_device *dev = adapter->port[pidx];
 141
 142         /*
 143          * If the port is disabled or the current recorded "link up"
 144          * status matches the new status, just return.
 145          */
 146         if (!netif_running(dev) || link_ok == netif_carrier_ok(dev))
 147                 return;
 148
 149         /*
 150          * Tell the OS that the link status has changed and print a short
 151          * informative message on the console about the event.
 152          */
 153         if (link_ok) {
 154                 const char *s;
 155                 const char *fc;
 156                 const struct port_info *pi = netdev_priv(dev);
 157
 158                 switch (pi->link_cfg.speed) {
 159                 case 100:
 160                         s = "100Mbps";
 161                         break;
 162                 case 1000:
 163                         s = "1Gbps";
 164                         break;
 165                 case 10000:
 166                         s = "10Gbps";
 167                         break;
 168                 case 25000:
 169                         s = "25Gbps";
 170                         break;
 171                 case 40000:
 172                         s = "40Gbps";
 173                         break;
 174                 case 100000:
 175                         s = "100Gbps";
 176                         break;
 177
 178                 default:
 179                         s = "unknown";
 180                         break;
 181                 }
 182
 183                 switch ((int)pi->link_cfg.fc) {
 184                 case PAUSE_RX:
 185                         fc = "RX";
 186                         break;
 187
 188                 case PAUSE_TX:
 189                         fc = "TX";
 190                         break;
 191
 192                 case PAUSE_RX | PAUSE_TX:
 193                         fc = "RX/TX";
 194                         break;
 195
 196                 default:
 197                         fc = "no";
 198                         break;
 199                 }
 200
 201                 netdev_info(dev, "link up, %s, full-duplex, %s PAUSE\n", s, fc);
 202         } else {
 203                 netdev_info(dev, "link down\n");
 204         }
 205 }
 206
 207 /*
 208  * THe port module type has changed on the indicated "port" (Virtual
 209  * Interface).
 210  */
 211 void t4vf_os_portmod_changed(struct adapter *adapter, int pidx)
 212 {
 213         static const char * const mod_str[] = {
 214                 NULL, "LR", "SR", "ER", "passive DA", "active DA", "LRM"
 215         };
 216         const struct net_device *dev = adapter->port[pidx];
 217         const struct port_info *pi = netdev_priv(dev);
 218
 219         if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
 220                 dev_info(adapter->pdev_dev, "%s: port module unplugged\n",
 221                          dev->name);
 222         else if (pi->mod_type < ARRAY_SIZE(mod_str))
 223                 dev_info(adapter->pdev_dev, "%s: %s port module inserted\n",
 224                          dev->name, mod_str[pi->mod_type]);
 225         else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED)
 226                 dev_info(adapter->pdev_dev, "%s: unsupported optical port "
 227                          "module inserted\n", dev->name);
 228         else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN)
 229                 dev_info(adapter->pdev_dev, "%s: unknown port module inserted,"
 230                          "forcing TWINAX\n", dev->name);
 231         else if (pi->mod_type == FW_PORT_MOD_TYPE_ERROR)
 232                 dev_info(adapter->pdev_dev, "%s: transceiver module error\n",
 233                          dev->name);
 234         else
 235                 dev_info(adapter->pdev_dev, "%s: unknown module type %d "
 236                          "inserted\n", dev->name, pi->mod_type);
 237 }
 238
 239 /*
 240  * Net device operations.
 241  * ======================
 242  */
 243
 244
 245
 246
 247 /*
 248  * Perform the MAC and PHY actions needed to enable a "port" (Virtual
 249  * Interface).
 250  */
 251 static int link_start(struct net_device *dev)
 252 {
 253         int ret;
 254         struct port_info *pi = netdev_priv(dev);
 255
 256         /*
 257          * We do not set address filters and promiscuity here, the stack does
 258          * that step explicitly. Enable vlan accel.
 259          */
 260         ret = t4vf_set_rxmode(pi->adapter, pi->viid, dev->mtu, -1, -1, -1, 1,
 261                               true);
 262         if (ret == 0) {
 263                 ret = t4vf_change_mac(pi->adapter, pi->viid,
 264                                       pi->xact_addr_filt, dev->dev_addr, true);
 265                 if (ret >= 0) {
 266                         pi->xact_addr_filt = ret;
 267                         ret = 0;
 268                 }
 269         }
 270
 271         /*
 272          * We don't need to actually "start the link" itself since the
 273          * firmware will do that for us when the first Virtual Interface
 274          * is enabled on a port.
 275          */
 276         if (ret == 0)
 277                 ret = t4vf_enable_vi(pi->adapter, pi->viid, true, true);
 278
 279         /* The Virtual Interfaces are connected to an internal switch on the
 280          * chip which allows VIs attached to the same port to talk to each
 281          * other even when the port link is down.  As a result, we generally
 282          * want to always report a VI's link as being "up", provided there are
 283          * no errors in enabling vi.
 284          */
 285
 286         if (ret == 0)
 287                 netif_carrier_on(dev);
 288
 289         return ret;
 290 }
 291
 292 /*
 293  * Name the MSI-X interrupts.
 294  */
 295 static void name_msix_vecs(struct adapter *adapter)
 296 {
 297         int namelen = sizeof(adapter->msix_info[0].desc) - 1;
 298         int pidx;
 299
 300         /*
 301          * Firmware events.
 302          */
 303         snprintf(adapter->msix_info[MSIX_FW].desc, namelen,
 304                  "%s-FWeventq", adapter->name);
 305         adapter->msix_info[MSIX_FW].desc[namelen] = 0;
 306
 307         /*
 308          * Ethernet queues.
 309          */
 310         for_each_port(adapter, pidx) {
 311                 struct net_device *dev = adapter->port[pidx];
 312                 const struct port_info *pi = netdev_priv(dev);
 313                 int qs, msi;
 314
 315                 for (qs = 0, msi = MSIX_IQFLINT; qs < pi->nqsets; qs++, msi++) {
 316                         snprintf(adapter->msix_info[msi].desc, namelen,
 317                                  "%s-%d", dev->name, qs);
 318                         adapter->msix_info[msi].desc[namelen] = 0;
 319                 }
 320         }
 321 }
 322
 323 /*
 324  * Request all of our MSI-X resources.
 325  */
 326 static int request_msix_queue_irqs(struct adapter *adapter)
 327 {
 328         struct sge *s = &adapter->sge;
 329         int rxq, msi, err;
 330
 331         /*
 332          * Firmware events.
 333          */
 334         err = request_irq(adapter->msix_info[MSIX_FW].vec, t4vf_sge_intr_msix,
 335                           0, adapter->msix_info[MSIX_FW].desc, &s->fw_evtq);
 336         if (err)
 337                 return err;
 338
 339         /*
 340          * Ethernet queues.
 341          */
 342         msi = MSIX_IQFLINT;
 343         for_each_ethrxq(s, rxq) {
 344                 err = request_irq(adapter->msix_info[msi].vec,
 345                                   t4vf_sge_intr_msix, 0,
 346                                   adapter->msix_info[msi].desc,
 347                                   &s->ethrxq[rxq].rspq);
 348                 if (err)
 349                         goto err_free_irqs;
 350                 msi++;
 351         }
 352         return 0;
 353
 354 err_free_irqs:
 355         while (--rxq >= 0)
 356                 free_irq(adapter->msix_info[--msi].vec, &s->ethrxq[rxq].rspq);
 357         free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
 358         return err;
 359 }
 360
 361 /*
 362  * Free our MSI-X resources.
 363  */
 364 static void free_msix_queue_irqs(struct adapter *adapter)
 365 {
 366         struct sge *s = &adapter->sge;
 367         int rxq, msi;
 368
 369         free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
 370         msi = MSIX_IQFLINT;
 371         for_each_ethrxq(s, rxq)
 372                 free_irq(adapter->msix_info[msi++].vec,
 373                          &s->ethrxq[rxq].rspq);
 374 }
 375
 376 /*
 377  * Turn on NAPI and start up interrupts on a response queue.
 378  */
 379 static void qenable(struct sge_rspq *rspq)
 380 {
 381         napi_enable(&rspq->napi);
 382
 383         /*
 384          * 0-increment the Going To Sleep register to start the timer and
 385          * enable interrupts.
 386          */
 387         t4_write_reg(rspq->adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
 388                      CIDXINC_V(0) |
 389                      SEINTARM_V(rspq->intr_params) |
 390                      INGRESSQID_V(rspq->cntxt_id));
 391 }
 392
 393 /*
 394  * Enable NAPI scheduling and interrupt generation for all Receive Queues.
 395  */
 396 static void enable_rx(struct adapter *adapter)
 397 {
 398         int rxq;
 399         struct sge *s = &adapter->sge;
 400
 401         for_each_ethrxq(s, rxq)
 402                 qenable(&s->ethrxq[rxq].rspq);
 403         qenable(&s->fw_evtq);
 404
 405         /*
 406          * The interrupt queue doesn't use NAPI so we do the 0-increment of
 407          * its Going To Sleep register here to get it started.
 408          */
 409         if (adapter->flags & USING_MSI)
 410                 t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
 411                              CIDXINC_V(0) |
 412                              SEINTARM_V(s->intrq.intr_params) |
 413                              INGRESSQID_V(s->intrq.cntxt_id));
 414
 415 }
 416
 417 /*
 418  * Wait until all NAPI handlers are descheduled.
 419  */
 420 static void quiesce_rx(struct adapter *adapter)
 421 {
 422         struct sge *s = &adapter->sge;
 423         int rxq;
 424
 425         for_each_ethrxq(s, rxq)
 426                 napi_disable(&s->ethrxq[rxq].rspq.napi);
 427         napi_disable(&s->fw_evtq.napi);
 428 }
 429
 430 /*
 431  * Response queue handler for the firmware event queue.
 432  */
 433 static int fwevtq_handler(struct sge_rspq *rspq, const __be64 *rsp,
 434                           const struct pkt_gl *gl)
 435 {
 436         /*
 437          * Extract response opcode and get pointer to CPL message body.
 438          */
 439         struct adapter *adapter = rspq->adapter;
 440         u8 opcode = ((const struct rss_header *)rsp)->opcode;
 441         void *cpl = (void *)(rsp + 1);
 442
 443         switch (opcode) {
 444         case CPL_FW6_MSG: {
 445                 /*
 446                  * We've received an asynchronous message from the firmware.
 447                  */
 448                 const struct cpl_fw6_msg *fw_msg = cpl;
 449                 if (fw_msg->type == FW6_TYPE_CMD_RPL)
 450                         t4vf_handle_fw_rpl(adapter, fw_msg->data);
 451                 break;
 452         }
 453
 454         case CPL_FW4_MSG: {
 455                 /* FW can send EGR_UPDATEs encapsulated in a CPL_FW4_MSG.
 456                  */
 457                 const struct cpl_sge_egr_update *p = (void *)(rsp + 3);
 458                 opcode = CPL_OPCODE_G(ntohl(p->opcode_qid));
 459                 if (opcode != CPL_SGE_EGR_UPDATE) {
 460                         dev_err(adapter->pdev_dev, "unexpected FW4/CPL %#x on FW event queue\n"
 461                                 , opcode);
 462                         break;
 463                 }
 464                 cpl = (void *)p;
 465                 /*FALLTHROUGH*/
 466         }
 467
 468         case CPL_SGE_EGR_UPDATE: {
 469                 /*
 470                  * We've received an Egress Queue Status Update message.  We
 471                  * get these, if the SGE is configured to send these when the
 472                  * firmware passes certain points in processing our TX
 473                  * Ethernet Queue or if we make an explicit request for one.
 474                  * We use these updates to determine when we may need to
 475                  * restart a TX Ethernet Queue which was stopped for lack of
 476                  * free TX Queue Descriptors ...
 477                  */
 478                 const struct cpl_sge_egr_update *p = cpl;
 479                 unsigned int qid = EGR_QID_G(be32_to_cpu(p->opcode_qid));
 480                 struct sge *s = &adapter->sge;
 481                 struct sge_txq *tq;
 482                 struct sge_eth_txq *txq;
 483                 unsigned int eq_idx;
 484
 485                 /*
 486                  * Perform sanity checking on the Queue ID to make sure it
 487                  * really refers to one of our TX Ethernet Egress Queues which
 488                  * is active and matches the queue's ID.  None of these error
 489                  * conditions should ever happen so we may want to either make
 490                  * them fatal and/or conditionalized under DEBUG.
 491                  */
 492                 eq_idx = EQ_IDX(s, qid);
 493                 if (unlikely(eq_idx >= MAX_EGRQ)) {
 494                         dev_err(adapter->pdev_dev,
 495                                 "Egress Update QID %d out of range\n", qid);
 496                         break;
 497                 }
 498                 tq = s->egr_map[eq_idx];
 499                 if (unlikely(tq == NULL)) {
 500                         dev_err(adapter->pdev_dev,
 501                                 "Egress Update QID %d TXQ=NULL\n", qid);
 502                         break;
 503                 }
 504                 txq = container_of(tq, struct sge_eth_txq, q);
 505                 if (unlikely(tq->abs_id != qid)) {
 506                         dev_err(adapter->pdev_dev,
 507                                 "Egress Update QID %d refers to TXQ %d\n",
 508                                 qid, tq->abs_id);
 509                         break;
 510                 }
 511
 512                 /*
 513                  * Restart a stopped TX Queue which has less than half of its
 514                  * TX ring in use ...
 515                  */
 516                 txq->q.restarts++;
 517                 netif_tx_wake_queue(txq->txq);
 518                 break;
 519         }
 520
 521         default:
 522                 dev_err(adapter->pdev_dev,
 523                         "unexpected CPL %#x on FW event queue\n", opcode);
 524         }
 525
 526         return 0;
 527 }
 528
 529 /*
 530  * Allocate SGE TX/RX response queues.  Determine how many sets of SGE queues
 531  * to use and initializes them.  We support multiple "Queue Sets" per port if
 532  * we have MSI-X, otherwise just one queue set per port.
 533  */
 534 static int setup_sge_queues(struct adapter *adapter)
 535 {
 536         struct sge *s = &adapter->sge;
 537         int err, pidx, msix;
 538
 539         /*
 540          * Clear "Queue Set" Free List Starving and TX Queue Mapping Error
 541          * state.
 542          */
 543         bitmap_zero(s->starving_fl, MAX_EGRQ);
 544
 545         /*
 546          * If we're using MSI interrupt mode we need to set up a "forwarded
 547          * interrupt" queue which we'll set up with our MSI vector.  The rest
 548          * of the ingress queues will be set up to forward their interrupts to
 549          * this queue ...  This must be first since t4vf_sge_alloc_rxq() uses
 550          * the intrq's queue ID as the interrupt forwarding queue for the
 551          * subsequent calls ...
 552          */
 553         if (adapter->flags & USING_MSI) {
 554                 err = t4vf_sge_alloc_rxq(adapter, &s->intrq, false,
 555                                          adapter->port[0], 0, NULL, NULL);
 556                 if (err)
 557                         goto err_free_queues;
 558         }
 559
 560         /*
 561          * Allocate our ingress queue for asynchronous firmware messages.
 562          */
 563         err = t4vf_sge_alloc_rxq(adapter, &s->fw_evtq, true, adapter->port[0],
 564                                  MSIX_FW, NULL, fwevtq_handler);
 565         if (err)
 566                 goto err_free_queues;
 567
 568         /*
 569          * Allocate each "port"'s initial Queue Sets.  These can be changed
 570          * later on ... up to the point where any interface on the adapter is
 571          * brought up at which point lots of things get nailed down
 572          * permanently ...
 573          */
 574         msix = MSIX_IQFLINT;
 575         for_each_port(adapter, pidx) {
 576                 struct net_device *dev = adapter->port[pidx];
 577                 struct port_info *pi = netdev_priv(dev);
 578                 struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
 579                 struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
 580                 int qs;
 581
 582                 for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
 583                         err = t4vf_sge_alloc_rxq(adapter, &rxq->rspq, false,
 584                                                  dev, msix++,
 585                                                  &rxq->fl, t4vf_ethrx_handler);
 586                         if (err)
 587                                 goto err_free_queues;
 588
 589                         err = t4vf_sge_alloc_eth_txq(adapter, txq, dev,
 590                                              netdev_get_tx_queue(dev, qs),
 591                                              s->fw_evtq.cntxt_id);
 592                         if (err)
 593                                 goto err_free_queues;
 594
 595                         rxq->rspq.idx = qs;
 596                         memset(&rxq->stats, 0, sizeof(rxq->stats));
 597                 }
 598         }
 599
 600         /*
 601          * Create the reverse mappings for the queues.
 602          */
 603         s->egr_base = s->ethtxq[0].q.abs_id - s->ethtxq[0].q.cntxt_id;
 604         s->ingr_base = s->ethrxq[0].rspq.abs_id - s->ethrxq[0].rspq.cntxt_id;
 605         IQ_MAP(s, s->fw_evtq.abs_id) = &s->fw_evtq;
 606         for_each_port(adapter, pidx) {
 607                 struct net_device *dev = adapter->port[pidx];
 608                 struct port_info *pi = netdev_priv(dev);
 609                 struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
 610                 struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
 611                 int qs;
 612
 613                 for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
 614                         IQ_MAP(s, rxq->rspq.abs_id) = &rxq->rspq;
 615                         EQ_MAP(s, txq->q.abs_id) = &txq->q;
 616
 617                         /*
 618                          * The FW_IQ_CMD doesn't return the Absolute Queue IDs
 619                          * for Free Lists but since all of the Egress Queues
 620                          * (including Free Lists) have Relative Queue IDs
 621                          * which are computed as Absolute - Base Queue ID, we
 622                          * can synthesize the Absolute Queue IDs for the Free
 623                          * Lists.  This is useful for debugging purposes when
 624                          * we want to dump Queue Contexts via the PF Driver.
 625                          */
 626                         rxq->fl.abs_id = rxq->fl.cntxt_id + s->egr_base;
 627                         EQ_MAP(s, rxq->fl.abs_id) = &rxq->fl;
 628                 }
 629         }
 630         return 0;
 631
 632 err_free_queues:
 633         t4vf_free_sge_resources(adapter);
 634         return err;
 635 }
 636
 637 /*
 638  * Set up Receive Side Scaling (RSS) to distribute packets to multiple receive
 639  * queues.  We configure the RSS CPU lookup table to distribute to the number
 640  * of HW receive queues, and the response queue lookup table to narrow that
 641  * down to the response queues actually configured for each "port" (Virtual
 642  * Interface).  We always configure the RSS mapping for all ports since the
 643  * mapping table has plenty of entries.
 644  */
 645 static int setup_rss(struct adapter *adapter)
 646 {
 647         int pidx;
 648
 649         for_each_port(adapter, pidx) {
 650                 struct port_info *pi = adap2pinfo(adapter, pidx);
 651                 struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
 652                 u16 rss[MAX_PORT_QSETS];
 653                 int qs, err;
 654
 655                 for (qs = 0; qs < pi->nqsets; qs++)
 656                         rss[qs] = rxq[qs].rspq.abs_id;
 657
 658                 err = t4vf_config_rss_range(adapter, pi->viid,
 659                                             0, pi->rss_size, rss, pi->nqsets);
 660                 if (err)
 661                         return err;
 662
 663                 /*
 664                  * Perform Global RSS Mode-specific initialization.
 665                  */
 666                 switch (adapter->params.rss.mode) {
 667                 case FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL:
 668                         /*
 669                          * If Tunnel All Lookup isn't specified in the global
 670                          * RSS Configuration, then we need to specify a
 671                          * default Ingress Queue for any ingress packets which
 672                          * aren't hashed.  We'll use our first ingress queue
 673                          * ...
 674                          */
 675                         if (!adapter->params.rss.u.basicvirtual.tnlalllookup) {
 676                                 union rss_vi_config config;
 677                                 err = t4vf_read_rss_vi_config(adapter,
 678                                                               pi->viid,
 679                                                               &config);
 680                                 if (err)
 681                                         return err;
 682                                 config.basicvirtual.defaultq =
 683                                         rxq[0].rspq.abs_id;
 684                                 err = t4vf_write_rss_vi_config(adapter,
 685                                                                pi->viid,
 686                                                                &config);
 687                                 if (err)
 688                                         return err;
 689                         }
 690                         break;
 691                 }
 692         }
 693
 694         return 0;
 695 }
 696
 697 /*
 698  * Bring the adapter up.  Called whenever we go from no "ports" open to having
 699  * one open.  This function performs the actions necessary to make an adapter
 700  * operational, such as completing the initialization of HW modules, and
 701  * enabling interrupts.  Must be called with the rtnl lock held.  (Note that
 702  * this is called "cxgb_up" in the PF Driver.)
 703  */
 704 static int adapter_up(struct adapter *adapter)
 705 {
 706         int err;
 707
 708         /*
 709          * If this is the first time we've been called, perform basic
 710          * adapter setup.  Once we've done this, many of our adapter
 711          * parameters can no longer be changed ...
 712          */
 713         if ((adapter->flags & FULL_INIT_DONE) == 0) {
 714                 err = setup_sge_queues(adapter);
 715                 if (err)
 716                         return err;
 717                 err = setup_rss(adapter);
 718                 if (err) {
 719                         t4vf_free_sge_resources(adapter);
 720                         return err;
 721                 }
 722
 723                 if (adapter->flags & USING_MSIX)
 724                         name_msix_vecs(adapter);
 725                 adapter->flags |= FULL_INIT_DONE;
 726         }
 727
 728         /*
 729          * Acquire our interrupt resources.  We only support MSI-X and MSI.
 730          */
 731         BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
 732         if (adapter->flags & USING_MSIX)
 733                 err = request_msix_queue_irqs(adapter);
 734         else
 735                 err = request_irq(adapter->pdev->irq,
 736                                   t4vf_intr_handler(adapter), 0,
 737                                   adapter->name, adapter);
 738         if (err) {
 739                 dev_err(adapter->pdev_dev, "request_irq failed, err %d\n",
 740                         err);
 741                 return err;
 742         }
 743
 744         /*
 745          * Enable NAPI ingress processing and return success.
 746          */
 747         enable_rx(adapter);
 748         t4vf_sge_start(adapter);
 749
 750         /* Initialize hash mac addr list*/
 751         INIT_LIST_HEAD(&adapter->mac_hlist);
 752         return 0;
 753 }
 754
 755 /*
 756  * Bring the adapter down.  Called whenever the last "port" (Virtual
 757  * Interface) closed.  (Note that this routine is called "cxgb_down" in the PF
 758  * Driver.)
 759  */
 760 static void adapter_down(struct adapter *adapter)
 761 {
 762         /*
 763          * Free interrupt resources.
 764          */
 765         if (adapter->flags & USING_MSIX)
 766                 free_msix_queue_irqs(adapter);
 767         else
 768                 free_irq(adapter->pdev->irq, adapter);
 769
 770         /*
 771          * Wait for NAPI handlers to finish.
 772          */
 773         quiesce_rx(adapter);
 774 }
 775
 776 /*
 777  * Start up a net device.
 778  */
 779 static int cxgb4vf_open(struct net_device *dev)
 780 {
 781         int err;
 782         struct port_info *pi = netdev_priv(dev);
 783         struct adapter *adapter = pi->adapter;
 784
 785         /*
 786          * If this is the first interface that we're opening on the "adapter",
 787          * bring the "adapter" up now.
 788          */
 789         if (adapter->open_device_map == 0) {
 790                 err = adapter_up(adapter);
 791                 if (err)
 792                         return err;
 793         }
 794
 795         /*
 796          * Note that this interface is up and start everything up ...
 797          */
 798         err = link_start(dev);
 799         if (err)
 800                 goto err_unwind;
 801
 802         pi->vlan_id = t4vf_get_vf_vlan_acl(adapter);
 803
 804         netif_tx_start_all_queues(dev);
 805         set_bit(pi->port_id, &adapter->open_device_map);
 806         return 0;
 807
 808 err_unwind:
 809         if (adapter->open_device_map == 0)
 810                 adapter_down(adapter);
 811         return err;
 812 }
 813
 814 /*
 815  * Shut down a net device.  This routine is called "cxgb_close" in the PF
 816  * Driver ...
 817  */
 818 static int cxgb4vf_stop(struct net_device *dev)
 819 {
 820         struct port_info *pi = netdev_priv(dev);
 821         struct adapter *adapter = pi->adapter;
 822
 823         netif_tx_stop_all_queues(dev);
 824         netif_carrier_off(dev);
 825         t4vf_enable_vi(adapter, pi->viid, false, false);
 826         pi->link_cfg.link_ok = 0;
 827
 828         clear_bit(pi->port_id, &adapter->open_device_map);
 829         if (adapter->open_device_map == 0)
 830                 adapter_down(adapter);
 831         return 0;
 832 }
 833
 834 /*
 835  * Translate our basic statistics into the standard "ifconfig" statistics.
 836  */
 837 static struct net_device_stats *cxgb4vf_get_stats(struct net_device *dev)
 838 {
 839         struct t4vf_port_stats stats;
 840         struct port_info *pi = netdev2pinfo(dev);
 841         struct adapter *adapter = pi->adapter;
 842         struct net_device_stats *ns = &dev->stats;
 843         int err;
 844
 845         spin_lock(&adapter->stats_lock);
 846         err = t4vf_get_port_stats(adapter, pi->pidx, &stats);
 847         spin_unlock(&adapter->stats_lock);
 848
 849         memset(ns, 0, sizeof(*ns));
 850         if (err)
 851                 return ns;
 852
 853         ns->tx_bytes = (stats.tx_bcast_bytes + stats.tx_mcast_bytes +
 854                         stats.tx_ucast_bytes + stats.tx_offload_bytes);
 855         ns->tx_packets = (stats.tx_bcast_frames + stats.tx_mcast_frames +
 856                           stats.tx_ucast_frames + stats.tx_offload_frames);
 857         ns->rx_bytes = (stats.rx_bcast_bytes + stats.rx_mcast_bytes +
 858                         stats.rx_ucast_bytes);
 859         ns->rx_packets = (stats.rx_bcast_frames + stats.rx_mcast_frames +
 860                           stats.rx_ucast_frames);
 861         ns->multicast = stats.rx_mcast_frames;
 862         ns->tx_errors = stats.tx_drop_frames;
 863         ns->rx_errors = stats.rx_err_frames;
 864
 865         return ns;
 866 }
 867
 868 static inline int cxgb4vf_set_addr_hash(struct port_info *pi)
 869 {
 870         struct adapter *adapter = pi->adapter;
 871         u64 vec = 0;
 872         bool ucast = false;
 873         struct hash_mac_addr *entry;
 874
 875         /* Calculate the hash vector for the updated list and program it */
 876         list_for_each_entry(entry, &adapter->mac_hlist, list) {
 877                 ucast |= is_unicast_ether_addr(entry->addr);
 878                 vec |= (1ULL << hash_mac_addr(entry->addr));
 879         }
 880         return t4vf_set_addr_hash(adapter, pi->viid, ucast, vec, false);
 881 }
 882
 883 static int cxgb4vf_mac_sync(struct net_device *netdev, const u8 *mac_addr)
 884 {
 885         struct port_info *pi = netdev_priv(netdev);
 886         struct adapter *adapter = pi->adapter;
 887         int ret;
 888         u64 mhash = 0;
 889         u64 uhash = 0;
 890         bool free = false;
 891         bool ucast = is_unicast_ether_addr(mac_addr);
 892         const u8 *maclist[1] = {mac_addr};
 893         struct hash_mac_addr *new_entry;
 894
 895         ret = t4vf_alloc_mac_filt(adapter, pi->viid, free, 1, maclist,
 896                                   NULL, ucast ? &uhash : &mhash, false);
 897         if (ret < 0)
 898                 goto out;
 899         /* if hash != 0, then add the addr to hash addr list
 900          * so on the end we will calculate the hash for the
 901          * list and program it
 902          */
 903         if (uhash || mhash) {
 904                 new_entry = kzalloc(sizeof(*new_entry), GFP_ATOMIC);
 905                 if (!new_entry)
 906                         return -ENOMEM;
 907                 ether_addr_copy(new_entry->addr, mac_addr);
 908                 list_add_tail(&new_entry->list, &adapter->mac_hlist);
 909                 ret = cxgb4vf_set_addr_hash(pi);
 910         }
 911 out:
 912         return ret < 0 ? ret : 0;
 913 }
 914
 915 static int cxgb4vf_mac_unsync(struct net_device *netdev, const u8 *mac_addr)
 916 {
 917         struct port_info *pi = netdev_priv(netdev);
 918         struct adapter *adapter = pi->adapter;
 919         int ret;
 920         const u8 *maclist[1] = {mac_addr};
 921         struct hash_mac_addr *entry, *tmp;
 922
 923         /* If the MAC address to be removed is in the hash addr
 924          * list, delete it from the list and update hash vector
 925          */
 926         list_for_each_entry_safe(entry, tmp, &adapter->mac_hlist, list) {
 927                 if (ether_addr_equal(entry->addr, mac_addr)) {
 928                         list_del(&entry->list);
 929                         kfree(entry);
 930                         return cxgb4vf_set_addr_hash(pi);
 931                 }
 932         }
 933
 934         ret = t4vf_free_mac_filt(adapter, pi->viid, 1, maclist, false);
 935         return ret < 0 ? -EINVAL : 0;
 936 }
 937
 938 /*
 939  * Set RX properties of a port, such as promiscruity, address filters, and MTU.
 940  * If @mtu is -1 it is left unchanged.
 941  */
 942 static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
 943 {
 944         struct port_info *pi = netdev_priv(dev);
 945
 946         __dev_uc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync);
 947         __dev_mc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync);
 948         return t4vf_set_rxmode(pi->adapter, pi->viid, -1,
 949                                (dev->flags & IFF_PROMISC) != 0,
 950                                (dev->flags & IFF_ALLMULTI) != 0,
 951                                1, -1, sleep_ok);
 952 }
 953
 954 /*
 955  * Set the current receive modes on the device.
 956  */
 957 static void cxgb4vf_set_rxmode(struct net_device *dev)
 958 {
 959         /* unfortunately we can't return errors to the stack */
 960         set_rxmode(dev, -1, false);
 961 }
 962
 963 /*
 964  * Find the entry in the interrupt holdoff timer value array which comes
 965  * closest to the specified interrupt holdoff value.
 966  */
 967 static int closest_timer(const struct sge *s, int us)
 968 {
 969         int i, timer_idx = 0, min_delta = INT_MAX;
 970
 971         for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) {
 972                 int delta = us - s->timer_val[i];
 973                 if (delta < 0)
 974                         delta = -delta;
 975                 if (delta < min_delta) {
 976                         min_delta = delta;
 977                         timer_idx = i;
 978                 }
 979         }
 980         return timer_idx;
 981 }
 982
 983 static int closest_thres(const struct sge *s, int thres)
 984 {
 985         int i, delta, pktcnt_idx = 0, min_delta = INT_MAX;
 986
 987         for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) {
 988                 delta = thres - s->counter_val[i];
 989                 if (delta < 0)
 990                         delta = -delta;
 991                 if (delta < min_delta) {
 992                         min_delta = delta;
 993                         pktcnt_idx = i;
 994                 }
 995         }
 996         return pktcnt_idx;
 997 }
 998
 999 /*
1000  * Return a queue's interrupt hold-off time in us.  0 means no timer.
1001  */
1002 static unsigned int qtimer_val(const struct adapter *adapter,
1003                                const struct sge_rspq *rspq)
1004 {
1005         unsigned int timer_idx = QINTR_TIMER_IDX_G(rspq->intr_params);
1006
1007         return timer_idx < SGE_NTIMERS
1008                 ? adapter->sge.timer_val[timer_idx]
1009                 : 0;
1010 }
1011
1012 /**
1013  *      set_rxq_intr_params - set a queue's interrupt holdoff parameters
1014  *      @adapter: the adapter
1015  *      @rspq: the RX response queue
1016  *      @us: the hold-off time in us, or 0 to disable timer
1017  *      @cnt: the hold-off packet count, or 0 to disable counter
1018  *
1019  *      Sets an RX response queue's interrupt hold-off time and packet count.
1020  *      At least one of the two needs to be enabled for the queue to generate
1021  *      interrupts.
1022  */
1023 static int set_rxq_intr_params(struct adapter *adapter, struct sge_rspq *rspq,
1024                                unsigned int us, unsigned int cnt)
1025 {
1026         unsigned int timer_idx;
1027
1028         /*
1029          * If both the interrupt holdoff timer and count are specified as
1030          * zero, default to a holdoff count of 1 ...
1031          */
1032         if ((us | cnt) == 0)
1033                 cnt = 1;
1034
1035         /*
1036          * If an interrupt holdoff count has been specified, then find the
1037          * closest configured holdoff count and use that.  If the response
1038          * queue has already been created, then update its queue context
1039          * parameters ...
1040          */
1041         if (cnt) {
1042                 int err;
1043                 u32 v, pktcnt_idx;
1044
1045                 pktcnt_idx = closest_thres(&adapter->sge, cnt);
1046                 if (rspq->desc && rspq->pktcnt_idx != pktcnt_idx) {
1047                         v = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
1048                             FW_PARAMS_PARAM_X_V(
1049                                         FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) |
1050                             FW_PARAMS_PARAM_YZ_V(rspq->cntxt_id);
1051                         err = t4vf_set_params(adapter, 1, &v, &pktcnt_idx);
1052                         if (err)
1053                                 return err;
1054                 }
1055                 rspq->pktcnt_idx = pktcnt_idx;
1056         }
1057
1058         /*
1059          * Compute the closest holdoff timer index from the supplied holdoff
1060          * timer value.
1061          */
1062         timer_idx = (us == 0
1063                      ? SGE_TIMER_RSTRT_CNTR
1064                      : closest_timer(&adapter->sge, us));
1065
1066         /*
1067          * Update the response queue's interrupt coalescing parameters and
1068          * return success.
1069          */
1070         rspq->intr_params = (QINTR_TIMER_IDX_V(timer_idx) |
1071                              QINTR_CNT_EN_V(cnt > 0));
1072         return 0;
1073 }
1074
1075 /*
1076  * Return a version number to identify the type of adapter.  The scheme is:
1077  * - bits 0..9: chip version
1078  * - bits 10..15: chip revision
1079  */
1080 static inline unsigned int mk_adap_vers(const struct adapter *adapter)
1081 {
1082         /*
1083          * Chip version 4, revision 0x3f (cxgb4vf).
1084          */
1085         return CHELSIO_CHIP_VERSION(adapter->params.chip) | (0x3f << 10);
1086 }
1087
1088 /*
1089  * Execute the specified ioctl command.
1090  */
1091 static int cxgb4vf_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1092 {
1093         int ret = 0;
1094
1095         switch (cmd) {
1096             /*
1097              * The VF Driver doesn't have access to any of the other
1098              * common Ethernet device ioctl()'s (like reading/writing
1099              * PHY registers, etc.
1100              */
1101
1102         default:
1103                 ret = -EOPNOTSUPP;
1104                 break;
1105         }
1106         return ret;
1107 }
1108
1109 /*
1110  * Change the device's MTU.
1111  */
1112 static int cxgb4vf_change_mtu(struct net_device *dev, int new_mtu)
1113 {
1114         int ret;
1115         struct port_info *pi = netdev_priv(dev);
1116
1117         ret = t4vf_set_rxmode(pi->adapter, pi->viid, new_mtu,
1118                               -1, -1, -1, -1, true);
1119         if (!ret)
1120                 dev->mtu = new_mtu;
1121         return ret;
1122 }
1123
1124 static netdev_features_t cxgb4vf_fix_features(struct net_device *dev,
1125         netdev_features_t features)
1126 {
1127         /*
1128          * Since there is no support for separate rx/tx vlan accel
1129          * enable/disable make sure tx flag is always in same state as rx.
1130          */
1131         if (features & NETIF_F_HW_VLAN_CTAG_RX)
1132                 features |= NETIF_F_HW_VLAN_CTAG_TX;
1133         else
1134                 features &= ~NETIF_F_HW_VLAN_CTAG_TX;
1135
1136         return features;
1137 }
1138
1139 static int cxgb4vf_set_features(struct net_device *dev,
1140         netdev_features_t features)
1141 {
1142         struct port_info *pi = netdev_priv(dev);
1143         netdev_features_t changed = dev->features ^ features;
1144
1145         if (changed & NETIF_F_HW_VLAN_CTAG_RX)
1146                 t4vf_set_rxmode(pi->adapter, pi->viid, -1, -1, -1, -1,
1147                                 features & NETIF_F_HW_VLAN_CTAG_TX, 0);
1148
1149         return 0;
1150 }
1151
1152 /*
1153  * Change the devices MAC address.
1154  */
1155 static int cxgb4vf_set_mac_addr(struct net_device *dev, void *_addr)
1156 {
1157         int ret;
1158         struct sockaddr *addr = _addr;
1159         struct port_info *pi = netdev_priv(dev);
1160
1161         if (!is_valid_ether_addr(addr->sa_data))
1162                 return -EADDRNOTAVAIL;
1163
1164         ret = t4vf_change_mac(pi->adapter, pi->viid, pi->xact_addr_filt,
1165                               addr->sa_data, true);
1166         if (ret < 0)
1167                 return ret;
1168
1169         memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
1170         pi->xact_addr_filt = ret;
1171         return 0;
1172 }
1173
1174 #ifdef CONFIG_NET_POLL_CONTROLLER
1175 /*
1176  * Poll all of our receive queues.  This is called outside of normal interrupt
1177  * context.
1178  */
1179 static void cxgb4vf_poll_controller(struct net_device *dev)
1180 {
1181         struct port_info *pi = netdev_priv(dev);
1182         struct adapter *adapter = pi->adapter;
1183
1184         if (adapter->flags & USING_MSIX) {
1185                 struct sge_eth_rxq *rxq;
1186                 int nqsets;
1187
1188                 rxq = &adapter->sge.ethrxq[pi->first_qset];
1189                 for (nqsets = pi->nqsets; nqsets; nqsets--) {
1190                         t4vf_sge_intr_msix(0, &rxq->rspq);
1191                         rxq++;
1192                 }
1193         } else
1194                 t4vf_intr_handler(adapter)(0, adapter);
1195 }
1196 #endif
1197
1198 /*
1199  * Ethtool operations.
1200  * ===================
1201  *
1202  * Note that we don't support any ethtool operations which change the physical
1203  * state of the port to which we're linked.
1204  */
1205
1206 /**
1207  *      from_fw_port_mod_type - translate Firmware Port/Module type to Ethtool
1208  *      @port_type: Firmware Port Type
1209  *      @mod_type: Firmware Module Type
1210  *
1211  *      Translate Firmware Port/Module type to Ethtool Port Type.
1212  */
1213 static int from_fw_port_mod_type(enum fw_port_type port_type,
1214                                  enum fw_port_module_type mod_type)
1215 {
1216         if (port_type == FW_PORT_TYPE_BT_SGMII ||
1217             port_type == FW_PORT_TYPE_BT_XFI ||
1218             port_type == FW_PORT_TYPE_BT_XAUI) {
1219                 return PORT_TP;
1220         } else if (port_type == FW_PORT_TYPE_FIBER_XFI ||
1221                    port_type == FW_PORT_TYPE_FIBER_XAUI) {
1222                 return PORT_FIBRE;
1223         } else if (port_type == FW_PORT_TYPE_SFP ||
1224                    port_type == FW_PORT_TYPE_QSFP_10G ||
1225                    port_type == FW_PORT_TYPE_QSA ||
1226                    port_type == FW_PORT_TYPE_QSFP ||
1227                    port_type == FW_PORT_TYPE_CR4_QSFP ||
1228                    port_type == FW_PORT_TYPE_CR_QSFP ||
1229                    port_type == FW_PORT_TYPE_CR2_QSFP ||
1230                    port_type == FW_PORT_TYPE_SFP28) {
1231                 if (mod_type == FW_PORT_MOD_TYPE_LR ||
1232                     mod_type == FW_PORT_MOD_TYPE_SR ||
1233                     mod_type == FW_PORT_MOD_TYPE_ER ||
1234                     mod_type == FW_PORT_MOD_TYPE_LRM)
1235                         return PORT_FIBRE;
1236                 else if (mod_type == FW_PORT_MOD_TYPE_TWINAX_PASSIVE ||
1237                          mod_type == FW_PORT_MOD_TYPE_TWINAX_ACTIVE)
1238                         return PORT_DA;
1239                 else
1240                         return PORT_OTHER;
1241         } else if (port_type == FW_PORT_TYPE_KR4_100G ||
1242                    port_type == FW_PORT_TYPE_KR_SFP28 ||
1243                    port_type == FW_PORT_TYPE_KR_XLAUI) {
1244                 return PORT_NONE;
1245         }
1246
1247         return PORT_OTHER;
1248 }
1249
1250 /**
1251  *      fw_caps_to_lmm - translate Firmware to ethtool Link Mode Mask
1252  *      @port_type: Firmware Port Type
1253  *      @fw_caps: Firmware Port Capabilities
1254  *      @link_mode_mask: ethtool Link Mode Mask
1255  *
1256  *      Translate a Firmware Port Capabilities specification to an ethtool
1257  *      Link Mode Mask.
1258  */
1259 static void fw_caps_to_lmm(enum fw_port_type port_type,
1260                            unsigned int fw_caps,
1261                            unsigned long *link_mode_mask)
1262 {
1263         #define SET_LMM(__lmm_name) \
1264                 __set_bit(ETHTOOL_LINK_MODE_ ## __lmm_name ## _BIT, \
1265                           link_mode_mask)
1266
1267         #define FW_CAPS_TO_LMM(__fw_name, __lmm_name) \
1268                 do { \
1269                         if (fw_caps & FW_PORT_CAP32_ ## __fw_name) \
1270                                 SET_LMM(__lmm_name); \
1271                 } while (0)
1272
1273         switch (port_type) {
1274         case FW_PORT_TYPE_BT_SGMII:
1275         case FW_PORT_TYPE_BT_XFI:
1276         case FW_PORT_TYPE_BT_XAUI:
1277                 SET_LMM(TP);
1278                 FW_CAPS_TO_LMM(SPEED_100M, 100baseT_Full);
1279                 FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1280                 FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
1281                 break;
1282
1283         case FW_PORT_TYPE_KX4:
1284         case FW_PORT_TYPE_KX:
1285                 SET_LMM(Backplane);
1286                 FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
1287                 FW_CAPS_TO_LMM(SPEED_10G, 10000baseKX4_Full);
1288                 break;
1289
1290         case FW_PORT_TYPE_KR:
1291                 SET_LMM(Backplane);
1292                 FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
1293                 break;
1294
1295         case FW_PORT_TYPE_BP_AP:
1296                 SET_LMM(Backplane);
1297                 FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
1298                 FW_CAPS_TO_LMM(SPEED_10G, 10000baseR_FEC);
1299                 FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
1300                 break;
1301
1302         case FW_PORT_TYPE_BP4_AP:
1303                 SET_LMM(Backplane);
1304                 FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
1305                 FW_CAPS_TO_LMM(SPEED_10G, 10000baseR_FEC);
1306                 FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
1307                 FW_CAPS_TO_LMM(SPEED_10G, 10000baseKX4_Full);
1308                 break;
1309
1310         case FW_PORT_TYPE_FIBER_XFI:
1311         case FW_PORT_TYPE_FIBER_XAUI:
1312         case FW_PORT_TYPE_SFP:
1313         case FW_PORT_TYPE_QSFP_10G:
1314         case FW_PORT_TYPE_QSA:
1315                 SET_LMM(FIBRE);
1316                 FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1317                 FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
1318                 break;
1319
1320         case FW_PORT_TYPE_BP40_BA:
1321         case FW_PORT_TYPE_QSFP:
1322                 SET_LMM(FIBRE);
1323                 FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1324                 FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
1325                 FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full);
1326                 break;
1327
1328         case FW_PORT_TYPE_CR_QSFP:
1329         case FW_PORT_TYPE_SFP28:
1330                 SET_LMM(FIBRE);
1331                 FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1332                 FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
1333                 FW_CAPS_TO_LMM(SPEED_25G, 25000baseCR_Full);
1334                 break;
1335
1336         case FW_PORT_TYPE_KR_SFP28:
1337                 SET_LMM(Backplane);
1338                 FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1339                 FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
1340                 FW_CAPS_TO_LMM(SPEED_25G, 25000baseKR_Full);
1341                 break;
1342
1343         case FW_PORT_TYPE_KR_XLAUI:
1344                 SET_LMM(Backplane);
1345                 FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
1346                 FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
1347                 FW_CAPS_TO_LMM(SPEED_40G, 40000baseKR4_Full);
1348                 break;
1349
1350         case FW_PORT_TYPE_CR2_QSFP:
1351                 SET_LMM(FIBRE);
1352                 FW_CAPS_TO_LMM(SPEED_50G, 50000baseSR2_Full);
1353                 break;
1354
1355         case FW_PORT_TYPE_KR4_100G:
1356         case FW_PORT_TYPE_CR4_QSFP:
1357                 SET_LMM(FIBRE);
1358                 FW_CAPS_TO_LMM(SPEED_1G,  1000baseT_Full);
1359                 FW_CAPS_TO_LMM(SPEED_10G, 10000baseSR_Full);
1360                 FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full);
1361                 FW_CAPS_TO_LMM(SPEED_25G, 25000baseCR_Full);
1362                 FW_CAPS_TO_LMM(SPEED_50G, 50000baseCR2_Full);
1363                 FW_CAPS_TO_LMM(SPEED_100G, 100000baseCR4_Full);
1364                 break;
1365
1366         default:
1367                 break;
1368         }
1369
1370         FW_CAPS_TO_LMM(ANEG, Autoneg);
1371         FW_CAPS_TO_LMM(802_3_PAUSE, Pause);
1372         FW_CAPS_TO_LMM(802_3_ASM_DIR, Asym_Pause);
1373
1374         #undef FW_CAPS_TO_LMM
1375         #undef SET_LMM
1376 }
1377
1378 static int cxgb4vf_get_link_ksettings(struct net_device *dev,
1379                                   struct ethtool_link_ksettings *link_ksettings)
1380 {
1381         struct port_info *pi = netdev_priv(dev);
1382         struct ethtool_link_settings *base = &link_ksettings->base;
1383
1384         /* For the nonce, the Firmware doesn't send up Port State changes
1385          * when the Virtual Interface attached to the Port is down.  So
1386          * if it's down, let's grab any changes.
1387          */
1388         if (!netif_running(dev))
1389                 (void)t4vf_update_port_info(pi);
1390
1391         ethtool_link_ksettings_zero_link_mode(link_ksettings, supported);
1392         ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
1393         ethtool_link_ksettings_zero_link_mode(link_ksettings, lp_advertising);
1394
1395         base->port = from_fw_port_mod_type(pi->port_type, pi->mod_type);
1396
1397         if (pi->mdio_addr >= 0) {
1398                 base->phy_address = pi->mdio_addr;
1399                 base->mdio_support = (pi->port_type == FW_PORT_TYPE_BT_SGMII
1400                                       ? ETH_MDIO_SUPPORTS_C22
1401                                       : ETH_MDIO_SUPPORTS_C45);
1402         } else {
1403                 base->phy_address = 255;
1404                 base->mdio_support = 0;
1405         }
1406
1407         fw_caps_to_lmm(pi->port_type, pi->link_cfg.pcaps,
1408                        link_ksettings->link_modes.supported);
1409         fw_caps_to_lmm(pi->port_type, pi->link_cfg.acaps,
1410                        link_ksettings->link_modes.advertising);
1411         fw_caps_to_lmm(pi->port_type, pi->link_cfg.lpacaps,
1412                        link_ksettings->link_modes.lp_advertising);
1413
1414         if (netif_carrier_ok(dev)) {
1415                 base->speed = pi->link_cfg.speed;
1416                 base->duplex = DUPLEX_FULL;
1417         } else {
1418                 base->speed = SPEED_UNKNOWN;
1419                 base->duplex = DUPLEX_UNKNOWN;
1420         }
1421
1422         base->autoneg = pi->link_cfg.autoneg;
1423         if (pi->link_cfg.pcaps & FW_PORT_CAP32_ANEG)
1424                 ethtool_link_ksettings_add_link_mode(link_ksettings,
1425                                                      supported, Autoneg);
1426         if (pi->link_cfg.autoneg)
1427                 ethtool_link_ksettings_add_link_mode(link_ksettings,
1428                                                      advertising, Autoneg);
1429
1430         return 0;
1431 }
1432
1433 /* Translate the Firmware FEC value into the ethtool value. */
1434 static inline unsigned int fwcap_to_eth_fec(unsigned int fw_fec)
1435 {
1436         unsigned int eth_fec = 0;
1437
1438         if (fw_fec & FW_PORT_CAP32_FEC_RS)
1439                 eth_fec |= ETHTOOL_FEC_RS;
1440         if (fw_fec & FW_PORT_CAP32_FEC_BASER_RS)
1441                 eth_fec |= ETHTOOL_FEC_BASER;
1442
1443         /* if nothing is set, then FEC is off */
1444         if (!eth_fec)
1445                 eth_fec = ETHTOOL_FEC_OFF;
1446
1447         return eth_fec;
1448 }
1449
1450 /* Translate Common Code FEC value into ethtool value. */
1451 static inline unsigned int cc_to_eth_fec(unsigned int cc_fec)
1452 {
1453         unsigned int eth_fec = 0;
1454
1455         if (cc_fec & FEC_AUTO)
1456                 eth_fec |= ETHTOOL_FEC_AUTO;
1457         if (cc_fec & FEC_RS)
1458                 eth_fec |= ETHTOOL_FEC_RS;
1459         if (cc_fec & FEC_BASER_RS)
1460                 eth_fec |= ETHTOOL_FEC_BASER;
1461
1462         /* if nothing is set, then FEC is off */
1463         if (!eth_fec)
1464                 eth_fec = ETHTOOL_FEC_OFF;
1465
1466         return eth_fec;
1467 }
1468
1469 static int cxgb4vf_get_fecparam(struct net_device *dev,
1470                                 struct ethtool_fecparam *fec)
1471 {
1472         const struct port_info *pi = netdev_priv(dev);
1473         const struct link_config *lc = &pi->link_cfg;
1474
1475         /* Translate the Firmware FEC Support into the ethtool value.  We
1476          * always support IEEE 802.3 "automatic" selection of Link FEC type if
1477          * any FEC is supported.
1478          */
1479         fec->fec = fwcap_to_eth_fec(lc->pcaps);
1480         if (fec->fec != ETHTOOL_FEC_OFF)
1481                 fec->fec |= ETHTOOL_FEC_AUTO;
1482
1483         /* Translate the current internal FEC parameters into the
1484          * ethtool values.
1485          */
1486         fec->active_fec = cc_to_eth_fec(lc->fec);
1487         return 0;
1488 }
1489
1490 /*
1491  * Return our driver information.
1492  */
1493 static void cxgb4vf_get_drvinfo(struct net_device *dev,
1494                                 struct ethtool_drvinfo *drvinfo)
1495 {
1496         struct adapter *adapter = netdev2adap(dev);
1497
1498         strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
1499         strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version));
1500         strlcpy(drvinfo->bus_info, pci_name(to_pci_dev(dev->dev.parent)),
1501                 sizeof(drvinfo->bus_info));
1502         snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
1503                  "%u.%u.%u.%u, TP %u.%u.%u.%u",
1504                  FW_HDR_FW_VER_MAJOR_G(adapter->params.dev.fwrev),
1505                  FW_HDR_FW_VER_MINOR_G(adapter->params.dev.fwrev),
1506                  FW_HDR_FW_VER_MICRO_G(adapter->params.dev.fwrev),
1507                  FW_HDR_FW_VER_BUILD_G(adapter->params.dev.fwrev),
1508                  FW_HDR_FW_VER_MAJOR_G(adapter->params.dev.tprev),
1509                  FW_HDR_FW_VER_MINOR_G(adapter->params.dev.tprev),
1510                  FW_HDR_FW_VER_MICRO_G(adapter->params.dev.tprev),
1511                  FW_HDR_FW_VER_BUILD_G(adapter->params.dev.tprev));
1512 }
1513
1514 /*
1515  * Return current adapter message level.
1516  */
1517 static u32 cxgb4vf_get_msglevel(struct net_device *dev)
1518 {
1519         return netdev2adap(dev)->msg_enable;
1520 }
1521
1522 /*
1523  * Set current adapter message level.
1524  */
1525 static void cxgb4vf_set_msglevel(struct net_device *dev, u32 msglevel)
1526 {
1527         netdev2adap(dev)->msg_enable = msglevel;
1528 }
1529
1530 /*
1531  * Return the device's current Queue Set ring size parameters along with the
1532  * allowed maximum values.  Since ethtool doesn't understand the concept of
1533  * multi-queue devices, we just return the current values associated with the
1534  * first Queue Set.
1535  */
1536 static void cxgb4vf_get_ringparam(struct net_device *dev,
1537                                   struct ethtool_ringparam *rp)
1538 {
1539         const struct port_info *pi = netdev_priv(dev);
1540         const struct sge *s = &pi->adapter->sge;
1541
1542         rp->rx_max_pending = MAX_RX_BUFFERS;
1543         rp->rx_mini_max_pending = MAX_RSPQ_ENTRIES;
1544         rp->rx_jumbo_max_pending = 0;
1545         rp->tx_max_pending = MAX_TXQ_ENTRIES;
1546
1547         rp->rx_pending = s->ethrxq[pi->first_qset].fl.size - MIN_FL_RESID;
1548         rp->rx_mini_pending = s->ethrxq[pi->first_qset].rspq.size;
1549         rp->rx_jumbo_pending = 0;
1550         rp->tx_pending = s->ethtxq[pi->first_qset].q.size;
1551 }
1552
1553 /*
1554  * Set the Queue Set ring size parameters for the device.  Again, since
1555  * ethtool doesn't allow for the concept of multiple queues per device, we'll
1556  * apply these new values across all of the Queue Sets associated with the
1557  * device -- after vetting them of course!
1558  */
1559 static int cxgb4vf_set_ringparam(struct net_device *dev,
1560                                  struct ethtool_ringparam *rp)
1561 {
1562         const struct port_info *pi = netdev_priv(dev);
1563         struct adapter *adapter = pi->adapter;
1564         struct sge *s = &adapter->sge;
1565         int qs;
1566
1567         if (rp->rx_pending > MAX_RX_BUFFERS ||
1568             rp->rx_jumbo_pending ||
1569             rp->tx_pending > MAX_TXQ_ENTRIES ||
1570             rp->rx_mini_pending > MAX_RSPQ_ENTRIES ||
1571             rp->rx_mini_pending < MIN_RSPQ_ENTRIES ||
1572             rp->rx_pending < MIN_FL_ENTRIES ||
1573             rp->tx_pending < MIN_TXQ_ENTRIES)
1574                 return -EINVAL;
1575
1576         if (adapter->flags & FULL_INIT_DONE)
1577                 return -EBUSY;
1578
1579         for (qs = pi->first_qset; qs < pi->first_qset + pi->nqsets; qs++) {
1580                 s->ethrxq[qs].fl.size = rp->rx_pending + MIN_FL_RESID;
1581                 s->ethrxq[qs].rspq.size = rp->rx_mini_pending;
1582                 s->ethtxq[qs].q.size = rp->tx_pending;
1583         }
1584         return 0;
1585 }
1586
1587 /*
1588  * Return the interrupt holdoff timer and count for the first Queue Set on the
1589  * device.  Our extension ioctl() (the cxgbtool interface) allows the
1590  * interrupt holdoff timer to be read on all of the device's Queue Sets.
1591  */
1592 static int cxgb4vf_get_coalesce(struct net_device *dev,
1593                                 struct ethtool_coalesce *coalesce)
1594 {
1595         const struct port_info *pi = netdev_priv(dev);
1596         const struct adapter *adapter = pi->adapter;
1597         const struct sge_rspq *rspq = &adapter->sge.ethrxq[pi->first_qset].rspq;
1598
1599         coalesce->rx_coalesce_usecs = qtimer_val(adapter, rspq);
1600         coalesce->rx_max_coalesced_frames =
1601                 ((rspq->intr_params & QINTR_CNT_EN_F)
1602                  ? adapter->sge.counter_val[rspq->pktcnt_idx]
1603                  : 0);
1604         return 0;
1605 }
1606
1607 /*
1608  * Set the RX interrupt holdoff timer and count for the first Queue Set on the
1609  * interface.  Our extension ioctl() (the cxgbtool interface) allows us to set
1610  * the interrupt holdoff timer on any of the device's Queue Sets.
1611  */
1612 static int cxgb4vf_set_coalesce(struct net_device *dev,
1613                                 struct ethtool_coalesce *coalesce)
1614 {
1615         const struct port_info *pi = netdev_priv(dev);
1616         struct adapter *adapter = pi->adapter;
1617
1618         return set_rxq_intr_params(adapter,
1619                                    &adapter->sge.ethrxq[pi->first_qset].rspq,
1620                                    coalesce->rx_coalesce_usecs,
1621                                    coalesce->rx_max_coalesced_frames);
1622 }
1623
1624 /*
1625  * Report current port link pause parameter settings.
1626  */
1627 static void cxgb4vf_get_pauseparam(struct net_device *dev,
1628                                    struct ethtool_pauseparam *pauseparam)
1629 {
1630         struct port_info *pi = netdev_priv(dev);
1631
1632         pauseparam->autoneg = (pi->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
1633         pauseparam->rx_pause = (pi->link_cfg.fc & PAUSE_RX) != 0;
1634         pauseparam->tx_pause = (pi->link_cfg.fc & PAUSE_TX) != 0;
1635 }
1636
1637 /*
1638  * Identify the port by blinking the port's LED.
1639  */
1640 static int cxgb4vf_phys_id(struct net_device *dev,
1641                            enum ethtool_phys_id_state state)
1642 {
1643         unsigned int val;
1644         struct port_info *pi = netdev_priv(dev);
1645
1646         if (state == ETHTOOL_ID_ACTIVE)
1647                 val = 0xffff;
1648         else if (state == ETHTOOL_ID_INACTIVE)
1649                 val = 0;
1650         else
1651                 return -EINVAL;
1652
1653         return t4vf_identify_port(pi->adapter, pi->viid, val);
1654 }
1655
1656 /*
1657  * Port stats maintained per queue of the port.
1658  */
1659 struct queue_port_stats {
1660         u64 tso;
1661         u64 tx_csum;
1662         u64 rx_csum;
1663         u64 vlan_ex;
1664         u64 vlan_ins;
1665         u64 lro_pkts;
1666         u64 lro_merged;
1667 };
1668
1669 /*
1670  * Strings for the ETH_SS_STATS statistics set ("ethtool -S").  Note that
1671  * these need to match the order of statistics returned by
1672  * t4vf_get_port_stats().
1673  */
1674 static const char stats_strings[][ETH_GSTRING_LEN] = {
1675         /*
1676          * These must match the layout of the t4vf_port_stats structure.
1677          */
1678         "TxBroadcastBytes  ",
1679         "TxBroadcastFrames ",
1680         "TxMulticastBytes  ",
1681         "TxMulticastFrames ",
1682         "TxUnicastBytes    ",
1683         "TxUnicastFrames   ",
1684         "TxDroppedFrames   ",
1685         "TxOffloadBytes    ",
1686         "TxOffloadFrames   ",
1687         "RxBroadcastBytes  ",
1688         "RxBroadcastFrames ",
1689         "RxMulticastBytes  ",
1690         "RxMulticastFrames ",
1691         "RxUnicastBytes    ",
1692         "RxUnicastFrames   ",
1693         "RxErrorFrames     ",
1694
1695         /*
1696          * These are accumulated per-queue statistics and must match the
1697          * order of the fields in the queue_port_stats structure.
1698          */
1699         "TSO               ",
1700         "TxCsumOffload     ",
1701         "RxCsumGood        ",
1702         "VLANextractions   ",
1703         "VLANinsertions    ",
1704         "GROPackets        ",
1705         "GROMerged         ",
1706 };
1707
1708 /*
1709  * Return the number of statistics in the specified statistics set.
1710  */
1711 static int cxgb4vf_get_sset_count(struct net_device *dev, int sset)
1712 {
1713         switch (sset) {
1714         case ETH_SS_STATS:
1715                 return ARRAY_SIZE(stats_strings);
1716         default:
1717                 return -EOPNOTSUPP;
1718         }
1719         /*NOTREACHED*/
1720 }
1721
1722 /*
1723  * Return the strings for the specified statistics set.
1724  */
1725 static void cxgb4vf_get_strings(struct net_device *dev,
1726                                 u32 sset,
1727                                 u8 *data)
1728 {
1729         switch (sset) {
1730         case ETH_SS_STATS:
1731                 memcpy(data, stats_strings, sizeof(stats_strings));
1732                 break;
1733         }
1734 }
1735
1736 /*
1737  * Small utility routine to accumulate queue statistics across the queues of
1738  * a "port".
1739  */
1740 static void collect_sge_port_stats(const struct adapter *adapter,
1741                                    const struct port_info *pi,
1742                                    struct queue_port_stats *stats)
1743 {
1744         const struct sge_eth_txq *txq = &adapter->sge.ethtxq[pi->first_qset];
1745         const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
1746         int qs;
1747
1748         memset(stats, 0, sizeof(*stats));
1749         for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
1750                 stats->tso += txq->tso;
1751                 stats->tx_csum += txq->tx_cso;
1752                 stats->rx_csum += rxq->stats.rx_cso;
1753                 stats->vlan_ex += rxq->stats.vlan_ex;
1754                 stats->vlan_ins += txq->vlan_ins;
1755                 stats->lro_pkts += rxq->stats.lro_pkts;
1756                 stats->lro_merged += rxq->stats.lro_merged;
1757         }
1758 }
1759
1760 /*
1761  * Return the ETH_SS_STATS statistics set.
1762  */
1763 static void cxgb4vf_get_ethtool_stats(struct net_device *dev,
1764                                       struct ethtool_stats *stats,
1765                                       u64 *data)
1766 {
1767         struct port_info *pi = netdev2pinfo(dev);
1768         struct adapter *adapter = pi->adapter;
1769         int err = t4vf_get_port_stats(adapter, pi->pidx,
1770                                       (struct t4vf_port_stats *)data);
1771         if (err)
1772                 memset(data, 0, sizeof(struct t4vf_port_stats));
1773
1774         data += sizeof(struct t4vf_port_stats) / sizeof(u64);
1775         collect_sge_port_stats(adapter, pi, (struct queue_port_stats *)data);
1776 }
1777
1778 /*
1779  * Return the size of our register map.
1780  */
1781 static int cxgb4vf_get_regs_len(struct net_device *dev)
1782 {
1783         return T4VF_REGMAP_SIZE;
1784 }
1785
1786 /*
1787  * Dump a block of registers, start to end inclusive, into a buffer.
1788  */
1789 static void reg_block_dump(struct adapter *adapter, void *regbuf,
1790                            unsigned int start, unsigned int end)
1791 {
1792         u32 *bp = regbuf + start - T4VF_REGMAP_START;
1793
1794         for ( ; start <= end; start += sizeof(u32)) {
1795                 /*
1796                  * Avoid reading the Mailbox Control register since that
1797                  * can trigger a Mailbox Ownership Arbitration cycle and
1798                  * interfere with communication with the firmware.
1799                  */
1800                 if (start == T4VF_CIM_BASE_ADDR + CIM_VF_EXT_MAILBOX_CTRL)
1801                         *bp++ = 0xffff;
1802                 else
1803                         *bp++ = t4_read_reg(adapter, start);
1804         }
1805 }
1806
1807 /*
1808  * Copy our entire register map into the provided buffer.
1809  */
1810 static void cxgb4vf_get_regs(struct net_device *dev,
1811                              struct ethtool_regs *regs,
1812                              void *regbuf)
1813 {
1814         struct adapter *adapter = netdev2adap(dev);
1815
1816         regs->version = mk_adap_vers(adapter);
1817
1818         /*
1819          * Fill in register buffer with our register map.
1820          */
1821         memset(regbuf, 0, T4VF_REGMAP_SIZE);
1822
1823         reg_block_dump(adapter, regbuf,
1824                        T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_FIRST,
1825                        T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_LAST);
1826         reg_block_dump(adapter, regbuf,
1827                        T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_FIRST,
1828                        T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_LAST);
1829
1830         /* T5 adds new registers in the PL Register map.
1831          */
1832         reg_block_dump(adapter, regbuf,
1833                        T4VF_PL_BASE_ADDR + T4VF_MOD_MAP_PL_FIRST,
1834                        T4VF_PL_BASE_ADDR + (is_t4(adapter->params.chip)
1835                        ? PL_VF_WHOAMI_A : PL_VF_REVISION_A));
1836         reg_block_dump(adapter, regbuf,
1837                        T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_FIRST,
1838                        T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_LAST);
1839
1840         reg_block_dump(adapter, regbuf,
1841                        T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_FIRST,
1842                        T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_LAST);
1843 }
1844
1845 /*
1846  * Report current Wake On LAN settings.
1847  */
1848 static void cxgb4vf_get_wol(struct net_device *dev,
1849                             struct ethtool_wolinfo *wol)
1850 {
1851         wol->supported = 0;
1852         wol->wolopts = 0;
1853         memset(&wol->sopass, 0, sizeof(wol->sopass));
1854 }
1855
1856 /*
1857  * TCP Segmentation Offload flags which we support.
1858  */
1859 #define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
1860
1861 static const struct ethtool_ops cxgb4vf_ethtool_ops = {
1862         .get_link_ksettings     = cxgb4vf_get_link_ksettings,
1863         .get_fecparam           = cxgb4vf_get_fecparam,
1864         .get_drvinfo            = cxgb4vf_get_drvinfo,
1865         .get_msglevel           = cxgb4vf_get_msglevel,
1866         .set_msglevel           = cxgb4vf_set_msglevel,
1867         .get_ringparam          = cxgb4vf_get_ringparam,
1868         .set_ringparam          = cxgb4vf_set_ringparam,
1869         .get_coalesce           = cxgb4vf_get_coalesce,
1870         .set_coalesce           = cxgb4vf_set_coalesce,
1871         .get_pauseparam         = cxgb4vf_get_pauseparam,
1872         .get_link               = ethtool_op_get_link,
1873         .get_strings            = cxgb4vf_get_strings,
1874         .set_phys_id            = cxgb4vf_phys_id,
1875         .get_sset_count         = cxgb4vf_get_sset_count,
1876         .get_ethtool_stats      = cxgb4vf_get_ethtool_stats,
1877         .get_regs_len           = cxgb4vf_get_regs_len,
1878         .get_regs               = cxgb4vf_get_regs,
1879         .get_wol                = cxgb4vf_get_wol,
1880 };
1881
1882 /*
1883  * /sys/kernel/debug/cxgb4vf support code and data.
1884  * ================================================
1885  */
1886
1887 /*
1888  * Show Firmware Mailbox Command/Reply Log
1889  *
1890  * Note that we don't do any locking when dumping the Firmware Mailbox Log so
1891  * it's possible that we can catch things during a log update and therefore
1892  * see partially corrupted log entries.  But i9t's probably Good Enough(tm).
1893  * If we ever decide that we want to make sure that we're dumping a coherent
1894  * log, we'd need to perform locking in the mailbox logging and in
1895  * mboxlog_open() where we'd need to grab the entire mailbox log in one go
1896  * like we do for the Firmware Device Log.  But as stated above, meh ...
1897  */
1898 static int mboxlog_show(struct seq_file *seq, void *v)
1899 {
1900         struct adapter *adapter = seq->private;
1901         struct mbox_cmd_log *log = adapter->mbox_log;
1902         struct mbox_cmd *entry;
1903         int entry_idx, i;
1904
1905         if (v == SEQ_START_TOKEN) {
1906                 seq_printf(seq,
1907                            "%10s  %15s  %5s  %5s  %s\n",
1908                            "Seq#", "Tstamp", "Atime", "Etime",
1909                            "Command/Reply");
1910                 return 0;
1911         }
1912
1913         entry_idx = log->cursor + ((uintptr_t)v - 2);
1914         if (entry_idx >= log->size)
1915                 entry_idx -= log->size;
1916         entry = mbox_cmd_log_entry(log, entry_idx);
1917
1918         /* skip over unused entries */
1919         if (entry->timestamp == 0)
1920                 return 0;
1921
1922         seq_printf(seq, "%10u  %15llu  %5d  %5d",
1923                    entry->seqno, entry->timestamp,
1924                    entry->access, entry->execute);
1925         for (i = 0; i < MBOX_LEN / 8; i++) {
1926                 u64 flit = entry->cmd[i];
1927                 u32 hi = (u32)(flit >> 32);
1928                 u32 lo = (u32)flit;
1929
1930                 seq_printf(seq, "  %08x %08x", hi, lo);
1931         }
1932         seq_puts(seq, "\n");
1933         return 0;
1934 }
1935
1936 static inline void *mboxlog_get_idx(struct seq_file *seq, loff_t pos)
1937 {
1938         struct adapter *adapter = seq->private;
1939         struct mbox_cmd_log *log = adapter->mbox_log;
1940
1941         return ((pos <= log->size) ? (void *)(uintptr_t)(pos + 1) : NULL);
1942 }
1943
1944 static void *mboxlog_start(struct seq_file *seq, loff_t *pos)
1945 {
1946         return *pos ? mboxlog_get_idx(seq, *pos) : SEQ_START_TOKEN;
1947 }
1948
1949 static void *mboxlog_next(struct seq_file *seq, void *v, loff_t *pos)
1950 {
1951         ++*pos;
1952         return mboxlog_get_idx(seq, *pos);
1953 }
1954
1955 static void mboxlog_stop(struct seq_file *seq, void *v)
1956 {
1957 }
1958
1959 static const struct seq_operations mboxlog_seq_ops = {
1960         .start = mboxlog_start,
1961         .next  = mboxlog_next,
1962         .stop  = mboxlog_stop,
1963         .show  = mboxlog_show
1964 };
1965
1966 static int mboxlog_open(struct inode *inode, struct file *file)
1967 {
1968         int res = seq_open(file, &mboxlog_seq_ops);
1969
1970         if (!res) {
1971                 struct seq_file *seq = file->private_data;
1972
1973                 seq->private = inode->i_private;
1974         }
1975         return res;
1976 }
1977
1978 static const struct file_operations mboxlog_fops = {
1979         .owner   = THIS_MODULE,
1980         .open    = mboxlog_open,
1981         .read    = seq_read,
1982         .llseek  = seq_lseek,
1983         .release = seq_release,
1984 };
1985
1986 /*
1987  * Show SGE Queue Set information.  We display QPL Queues Sets per line.
1988  */
1989 #define QPL     4
1990
1991 static int sge_qinfo_show(struct seq_file *seq, void *v)
1992 {
1993         struct adapter *adapter = seq->private;
1994         int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
1995         int qs, r = (uintptr_t)v - 1;
1996
1997         if (r)
1998                 seq_putc(seq, '\n');
1999
2000         #define S3(fmt_spec, s, v) \
2001                 do {\
2002                         seq_printf(seq, "%-12s", s); \
2003                         for (qs = 0; qs < n; ++qs) \
2004                                 seq_printf(seq, " %16" fmt_spec, v); \
2005                         seq_putc(seq, '\n'); \
2006                 } while (0)
2007         #define S(s, v)         S3("s", s, v)
2008         #define T(s, v)         S3("u", s, txq[qs].v)
2009         #define R(s, v)         S3("u", s, rxq[qs].v)
2010
2011         if (r < eth_entries) {
2012                 const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
2013                 const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
2014                 int n = min(QPL, adapter->sge.ethqsets - QPL * r);
2015
2016                 S("QType:", "Ethernet");
2017                 S("Interface:",
2018                   (rxq[qs].rspq.netdev
2019                    ? rxq[qs].rspq.netdev->name
2020                    : "N/A"));
2021                 S3("d", "Port:",
2022                    (rxq[qs].rspq.netdev
2023                     ? ((struct port_info *)
2024                        netdev_priv(rxq[qs].rspq.netdev))->port_id
2025                     : -1));
2026                 T("TxQ ID:", q.abs_id);
2027                 T("TxQ size:", q.size);
2028                 T("TxQ inuse:", q.in_use);
2029                 T("TxQ PIdx:", q.pidx);
2030                 T("TxQ CIdx:", q.cidx);
2031                 R("RspQ ID:", rspq.abs_id);
2032                 R("RspQ size:", rspq.size);
2033                 R("RspQE size:", rspq.iqe_len);
2034                 S3("u", "Intr delay:", qtimer_val(adapter, &rxq[qs].rspq));
2035                 S3("u", "Intr pktcnt:",
2036                    adapter->sge.counter_val[rxq[qs].rspq.pktcnt_idx]);
2037                 R("RspQ CIdx:", rspq.cidx);
2038                 R("RspQ Gen:", rspq.gen);
2039                 R("FL ID:", fl.abs_id);
2040                 R("FL size:", fl.size - MIN_FL_RESID);
2041                 R("FL avail:", fl.avail);
2042                 R("FL PIdx:", fl.pidx);
2043                 R("FL CIdx:", fl.cidx);
2044                 return 0;
2045         }
2046
2047         r -= eth_entries;
2048         if (r == 0) {
2049                 const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
2050
2051                 seq_printf(seq, "%-12s %16s\n", "QType:", "FW event queue");
2052                 seq_printf(seq, "%-12s %16u\n", "RspQ ID:", evtq->abs_id);
2053                 seq_printf(seq, "%-12s %16u\n", "Intr delay:",
2054                            qtimer_val(adapter, evtq));
2055                 seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
2056                            adapter->sge.counter_val[evtq->pktcnt_idx]);
2057                 seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", evtq->cidx);
2058                 seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", evtq->gen);
2059         } else if (r == 1) {
2060                 const struct sge_rspq *intrq = &adapter->sge.intrq;
2061
2062                 seq_printf(seq, "%-12s %16s\n", "QType:", "Interrupt Queue");
2063                 seq_printf(seq, "%-12s %16u\n", "RspQ ID:", intrq->abs_id);
2064                 seq_printf(seq, "%-12s %16u\n", "Intr delay:",
2065                            qtimer_val(adapter, intrq));
2066                 seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
2067                            adapter->sge.counter_val[intrq->pktcnt_idx]);
2068                 seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", intrq->cidx);
2069                 seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", intrq->gen);
2070         }
2071
2072         #undef R
2073         #undef T
2074         #undef S
2075         #undef S3
2076
2077         return 0;
2078 }
2079
2080 /*
2081  * Return the number of "entries" in our "file".  We group the multi-Queue
2082  * sections with QPL Queue Sets per "entry".  The sections of the output are:
2083  *
2084  *     Ethernet RX/TX Queue Sets
2085  *     Firmware Event Queue
2086  *     Forwarded Interrupt Queue (if in MSI mode)
2087  */
2088 static int sge_queue_entries(const struct adapter *adapter)
2089 {
2090         return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
2091                 ((adapter->flags & USING_MSI) != 0);
2092 }
2093
2094 static void *sge_queue_start(struct seq_file *seq, loff_t *pos)
2095 {
2096         int entries = sge_queue_entries(seq->private);
2097
2098         return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2099 }
2100
2101 static void sge_queue_stop(struct seq_file *seq, void *v)
2102 {
2103 }
2104
2105 static void *sge_queue_next(struct seq_file *seq, void *v, loff_t *pos)
2106 {
2107         int entries = sge_queue_entries(seq->private);
2108
2109         ++*pos;
2110         return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2111 }
2112
2113 static const struct seq_operations sge_qinfo_seq_ops = {
2114         .start = sge_queue_start,
2115         .next  = sge_queue_next,
2116         .stop  = sge_queue_stop,
2117         .show  = sge_qinfo_show
2118 };
2119
2120 static int sge_qinfo_open(struct inode *inode, struct file *file)
2121 {
2122         int res = seq_open(file, &sge_qinfo_seq_ops);
2123
2124         if (!res) {
2125                 struct seq_file *seq = file->private_data;
2126                 seq->private = inode->i_private;
2127         }
2128         return res;
2129 }
2130
2131 static const struct file_operations sge_qinfo_debugfs_fops = {
2132         .owner   = THIS_MODULE,
2133         .open    = sge_qinfo_open,
2134         .read    = seq_read,
2135         .llseek  = seq_lseek,
2136         .release = seq_release,
2137 };
2138
2139 /*
2140  * Show SGE Queue Set statistics.  We display QPL Queues Sets per line.
2141  */
2142 #define QPL     4
2143
2144 static int sge_qstats_show(struct seq_file *seq, void *v)
2145 {
2146         struct adapter *adapter = seq->private;
2147         int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
2148         int qs, r = (uintptr_t)v - 1;
2149
2150         if (r)
2151                 seq_putc(seq, '\n');
2152
2153         #define S3(fmt, s, v) \
2154                 do { \
2155                         seq_printf(seq, "%-16s", s); \
2156                         for (qs = 0; qs < n; ++qs) \
2157                                 seq_printf(seq, " %8" fmt, v); \
2158                         seq_putc(seq, '\n'); \
2159                 } while (0)
2160         #define S(s, v)         S3("s", s, v)
2161
2162         #define T3(fmt, s, v)   S3(fmt, s, txq[qs].v)
2163         #define T(s, v)         T3("lu", s, v)
2164
2165         #define R3(fmt, s, v)   S3(fmt, s, rxq[qs].v)
2166         #define R(s, v)         R3("lu", s, v)
2167
2168         if (r < eth_entries) {
2169                 const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
2170                 const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
2171                 int n = min(QPL, adapter->sge.ethqsets - QPL * r);
2172
2173                 S("QType:", "Ethernet");
2174                 S("Interface:",
2175                   (rxq[qs].rspq.netdev
2176                    ? rxq[qs].rspq.netdev->name
2177                    : "N/A"));
2178                 R3("u", "RspQNullInts:", rspq.unhandled_irqs);
2179                 R("RxPackets:", stats.pkts);
2180                 R("RxCSO:", stats.rx_cso);
2181                 R("VLANxtract:", stats.vlan_ex);
2182                 R("LROmerged:", stats.lro_merged);
2183                 R("LROpackets:", stats.lro_pkts);
2184                 R("RxDrops:", stats.rx_drops);
2185                 T("TSO:", tso);
2186                 T("TxCSO:", tx_cso);
2187                 T("VLANins:", vlan_ins);
2188                 T("TxQFull:", q.stops);
2189                 T("TxQRestarts:", q.restarts);
2190                 T("TxMapErr:", mapping_err);
2191                 R("FLAllocErr:", fl.alloc_failed);
2192                 R("FLLrgAlcErr:", fl.large_alloc_failed);
2193                 R("FLStarving:", fl.starving);
2194                 return 0;
2195         }
2196
2197         r -= eth_entries;
2198         if (r == 0) {
2199                 const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
2200
2201                 seq_printf(seq, "%-8s %16s\n", "QType:", "FW event queue");
2202                 seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
2203                            evtq->unhandled_irqs);
2204                 seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", evtq->cidx);
2205                 seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", evtq->gen);
2206         } else if (r == 1) {
2207                 const struct sge_rspq *intrq = &adapter->sge.intrq;
2208
2209                 seq_printf(seq, "%-8s %16s\n", "QType:", "Interrupt Queue");
2210                 seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
2211                            intrq->unhandled_irqs);
2212                 seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", intrq->cidx);
2213                 seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", intrq->gen);
2214         }
2215
2216         #undef R
2217         #undef T
2218         #undef S
2219         #undef R3
2220         #undef T3
2221         #undef S3
2222
2223         return 0;
2224 }
2225
2226 /*
2227  * Return the number of "entries" in our "file".  We group the multi-Queue
2228  * sections with QPL Queue Sets per "entry".  The sections of the output are:
2229  *
2230  *     Ethernet RX/TX Queue Sets
2231  *     Firmware Event Queue
2232  *     Forwarded Interrupt Queue (if in MSI mode)
2233  */
2234 static int sge_qstats_entries(const struct adapter *adapter)
2235 {
2236         return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
2237                 ((adapter->flags & USING_MSI) != 0);
2238 }
2239
2240 static void *sge_qstats_start(struct seq_file *seq, loff_t *pos)
2241 {
2242         int entries = sge_qstats_entries(seq->private);
2243
2244         return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2245 }
2246
2247 static void sge_qstats_stop(struct seq_file *seq, void *v)
2248 {
2249 }
2250
2251 static void *sge_qstats_next(struct seq_file *seq, void *v, loff_t *pos)
2252 {
2253         int entries = sge_qstats_entries(seq->private);
2254
2255         (*pos)++;
2256         return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2257 }
2258
2259 static const struct seq_operations sge_qstats_seq_ops = {
2260         .start = sge_qstats_start,
2261         .next  = sge_qstats_next,
2262         .stop  = sge_qstats_stop,
2263         .show  = sge_qstats_show
2264 };
2265
2266 static int sge_qstats_open(struct inode *inode, struct file *file)
2267 {
2268         int res = seq_open(file, &sge_qstats_seq_ops);
2269
2270         if (res == 0) {
2271                 struct seq_file *seq = file->private_data;
2272                 seq->private = inode->i_private;
2273         }
2274         return res;
2275 }
2276
2277 static const struct file_operations sge_qstats_proc_fops = {
2278         .owner   = THIS_MODULE,
2279         .open    = sge_qstats_open,
2280         .read    = seq_read,
2281         .llseek  = seq_lseek,
2282         .release = seq_release,
2283 };
2284
2285 /*
2286  * Show PCI-E SR-IOV Virtual Function Resource Limits.
2287  */
2288 static int resources_show(struct seq_file *seq, void *v)
2289 {
2290         struct adapter *adapter = seq->private;
2291         struct vf_resources *vfres = &adapter->params.vfres;
2292
2293         #define S(desc, fmt, var) \
2294                 seq_printf(seq, "%-60s " fmt "\n", \
2295                            desc " (" #var "):", vfres->var)
2296
2297         S("Virtual Interfaces", "%d", nvi);
2298         S("Egress Queues", "%d", neq);
2299         S("Ethernet Control", "%d", nethctrl);
2300         S("Ingress Queues/w Free Lists/Interrupts", "%d", niqflint);
2301         S("Ingress Queues", "%d", niq);
2302         S("Traffic Class", "%d", tc);
2303         S("Port Access Rights Mask", "%#x", pmask);
2304         S("MAC Address Filters", "%d", nexactf);
2305         S("Firmware Command Read Capabilities", "%#x", r_caps);
2306         S("Firmware Command Write/Execute Capabilities", "%#x", wx_caps);
2307
2308         #undef S
2309
2310         return 0;
2311 }
2312
2313 static int resources_open(struct inode *inode, struct file *file)
2314 {
2315         return single_open(file, resources_show, inode->i_private);
2316 }
2317
2318 static const struct file_operations resources_proc_fops = {
2319         .owner   = THIS_MODULE,
2320         .open    = resources_open,
2321         .read    = seq_read,
2322         .llseek  = seq_lseek,
2323         .release = single_release,
2324 };
2325
2326 /*
2327  * Show Virtual Interfaces.
2328  */
2329 static int interfaces_show(struct seq_file *seq, void *v)
2330 {
2331         if (v == SEQ_START_TOKEN) {
2332                 seq_puts(seq, "Interface  Port   VIID\n");
2333         } else {
2334                 struct adapter *adapter = seq->private;
2335                 int pidx = (uintptr_t)v - 2;
2336                 struct net_device *dev = adapter->port[pidx];
2337                 struct port_info *pi = netdev_priv(dev);
2338
2339                 seq_printf(seq, "%9s  %4d  %#5x\n",
2340                            dev->name, pi->port_id, pi->viid);
2341         }
2342         return 0;
2343 }
2344
2345 static inline void *interfaces_get_idx(struct adapter *adapter, loff_t pos)
2346 {
2347         return pos <= adapter->params.nports
2348                 ? (void *)(uintptr_t)(pos + 1)
2349                 : NULL;
2350 }
2351
2352 static void *interfaces_start(struct seq_file *seq, loff_t *pos)
2353 {
2354         return *pos
2355                 ? interfaces_get_idx(seq->private, *pos)
2356                 : SEQ_START_TOKEN;
2357 }
2358
2359 static void *interfaces_next(struct seq_file *seq, void *v, loff_t *pos)
2360 {
2361         (*pos)++;
2362         return interfaces_get_idx(seq->private, *pos);
2363 }
2364
2365 static void interfaces_stop(struct seq_file *seq, void *v)
2366 {
2367 }
2368
2369 static const struct seq_operations interfaces_seq_ops = {
2370         .start = interfaces_start,
2371         .next  = interfaces_next,
2372         .stop  = interfaces_stop,
2373         .show  = interfaces_show
2374 };
2375
2376 static int interfaces_open(struct inode *inode, struct file *file)
2377 {
2378         int res = seq_open(file, &interfaces_seq_ops);
2379
2380         if (res == 0) {
2381                 struct seq_file *seq = file->private_data;
2382                 seq->private = inode->i_private;
2383         }
2384         return res;
2385 }
2386
2387 static const struct file_operations interfaces_proc_fops = {
2388         .owner   = THIS_MODULE,
2389         .open    = interfaces_open,
2390         .read    = seq_read,
2391         .llseek  = seq_lseek,
2392         .release = seq_release,
2393 };
2394
2395 /*
2396  * /sys/kernel/debugfs/cxgb4vf/ files list.
2397  */
2398 struct cxgb4vf_debugfs_entry {
2399         const char *name;               /* name of debugfs node */
2400         umode_t mode;                   /* file system mode */
2401         const struct file_operations *fops;
2402 };
2403
2404 static struct cxgb4vf_debugfs_entry debugfs_files[] = {
2405         { "mboxlog",    0444, &mboxlog_fops },
2406         { "sge_qinfo",  0444, &sge_qinfo_debugfs_fops },
2407         { "sge_qstats", 0444, &sge_qstats_proc_fops },
2408         { "resources",  0444, &resources_proc_fops },
2409         { "interfaces", 0444, &interfaces_proc_fops },
2410 };
2411
2412 /*
2413  * Module and device initialization and cleanup code.
2414  * ==================================================
2415  */
2416
2417 /*
2418  * Set up out /sys/kernel/debug/cxgb4vf sub-nodes.  We assume that the
2419  * directory (debugfs_root) has already been set up.
2420  */
2421 static int setup_debugfs(struct adapter *adapter)
2422 {
2423         int i;
2424
2425         BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2426
2427         /*
2428          * Debugfs support is best effort.
2429          */
2430         for (i = 0; i < ARRAY_SIZE(debugfs_files); i++)
2431                 (void)debugfs_create_file(debugfs_files[i].name,
2432                                   debugfs_files[i].mode,
2433                                   adapter->debugfs_root,
2434                                   (void *)adapter,
2435                                   debugfs_files[i].fops);
2436
2437         return 0;
2438 }
2439
2440 /*
2441  * Tear down the /sys/kernel/debug/cxgb4vf sub-nodes created above.  We leave
2442  * it to our caller to tear down the directory (debugfs_root).
2443  */
2444 static void cleanup_debugfs(struct adapter *adapter)
2445 {
2446         BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2447
2448         /*
2449          * Unlike our sister routine cleanup_proc(), we don't need to remove
2450          * individual entries because a call will be made to
2451          * debugfs_remove_recursive().  We just need to clean up any ancillary
2452          * persistent state.
2453          */
2454         /* nothing to do */
2455 }
2456
2457 /* Figure out how many Ports and Queue Sets we can support.  This depends on
2458  * knowing our Virtual Function Resources and may be called a second time if
2459  * we fall back from MSI-X to MSI Interrupt Mode.
2460  */
2461 static void size_nports_qsets(struct adapter *adapter)
2462 {
2463         struct vf_resources *vfres = &adapter->params.vfres;
2464         unsigned int ethqsets, pmask_nports;
2465
2466         /* The number of "ports" which we support is equal to the number of
2467          * Virtual Interfaces with which we've been provisioned.
2468          */
2469         adapter->params.nports = vfres->nvi;
2470         if (adapter->params.nports > MAX_NPORTS) {
2471                 dev_warn(adapter->pdev_dev, "only using %d of %d maximum"
2472                          " allowed virtual interfaces\n", MAX_NPORTS,
2473                          adapter->params.nports);
2474                 adapter->params.nports = MAX_NPORTS;
2475         }
2476
2477         /* We may have been provisioned with more VIs than the number of
2478          * ports we're allowed to access (our Port Access Rights Mask).
2479          * This is obviously a configuration conflict but we don't want to
2480          * crash the kernel or anything silly just because of that.
2481          */
2482         pmask_nports = hweight32(adapter->params.vfres.pmask);
2483         if (pmask_nports < adapter->params.nports) {
2484                 dev_warn(adapter->pdev_dev, "only using %d of %d provisioned"
2485                          " virtual interfaces; limited by Port Access Rights"
2486                          " mask %#x\n", pmask_nports, adapter->params.nports,
2487                          adapter->params.vfres.pmask);
2488                 adapter->params.nports = pmask_nports;
2489         }
2490
2491         /* We need to reserve an Ingress Queue for the Asynchronous Firmware
2492          * Event Queue.  And if we're using MSI Interrupts, we'll also need to
2493          * reserve an Ingress Queue for a Forwarded Interrupts.
2494          *
2495          * The rest of the FL/Intr-capable ingress queues will be matched up
2496          * one-for-one with Ethernet/Control egress queues in order to form
2497          * "Queue Sets" which will be aportioned between the "ports".  For
2498          * each Queue Set, we'll need the ability to allocate two Egress
2499          * Contexts -- one for the Ingress Queue Free List and one for the TX
2500          * Ethernet Queue.
2501          *
2502          * Note that even if we're currently configured to use MSI-X
2503          * Interrupts (module variable msi == MSI_MSIX) we may get downgraded
2504          * to MSI Interrupts if we can't get enough MSI-X Interrupts.  If that
2505          * happens we'll need to adjust things later.
2506          */
2507         ethqsets = vfres->niqflint - 1 - (msi == MSI_MSI);
2508         if (vfres->nethctrl != ethqsets)
2509                 ethqsets = min(vfres->nethctrl, ethqsets);
2510         if (vfres->neq < ethqsets*2)
2511                 ethqsets = vfres->neq/2;
2512         if (ethqsets > MAX_ETH_QSETS)
2513                 ethqsets = MAX_ETH_QSETS;
2514         adapter->sge.max_ethqsets = ethqsets;
2515
2516         if (adapter->sge.max_ethqsets < adapter->params.nports) {
2517                 dev_warn(adapter->pdev_dev, "only using %d of %d available"
2518                          " virtual interfaces (too few Queue Sets)\n",
2519                          adapter->sge.max_ethqsets, adapter->params.nports);
2520                 adapter->params.nports = adapter->sge.max_ethqsets;
2521         }
2522 }
2523
2524 /*
2525  * Perform early "adapter" initialization.  This is where we discover what
2526  * adapter parameters we're going to be using and initialize basic adapter
2527  * hardware support.
2528  */
2529 static int adap_init0(struct adapter *adapter)
2530 {
2531         struct sge_params *sge_params = &adapter->params.sge;
2532         struct sge *s = &adapter->sge;
2533         int err;
2534         u32 param, val = 0;
2535
2536         /*
2537          * Some environments do not properly handle PCIE FLRs -- e.g. in Linux
2538          * 2.6.31 and later we can't call pci_reset_function() in order to
2539          * issue an FLR because of a self- deadlock on the device semaphore.
2540          * Meanwhile, the OS infrastructure doesn't issue FLRs in all the
2541          * cases where they're needed -- for instance, some versions of KVM
2542          * fail to reset "Assigned Devices" when the VM reboots.  Therefore we
2543          * use the firmware based reset in order to reset any per function
2544          * state.
2545          */
2546         err = t4vf_fw_reset(adapter);
2547         if (err < 0) {
2548                 dev_err(adapter->pdev_dev, "FW reset failed: err=%d\n", err);
2549                 return err;
2550         }
2551
2552         /*
2553          * Grab basic operational parameters.  These will predominantly have
2554          * been set up by the Physical Function Driver or will be hard coded
2555          * into the adapter.  We just have to live with them ...  Note that
2556          * we _must_ get our VPD parameters before our SGE parameters because
2557          * we need to know the adapter's core clock from the VPD in order to
2558          * properly decode the SGE Timer Values.
2559          */
2560         err = t4vf_get_dev_params(adapter);
2561         if (err) {
2562                 dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2563                         " device parameters: err=%d\n", err);
2564                 return err;
2565         }
2566         err = t4vf_get_vpd_params(adapter);
2567         if (err) {
2568                 dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2569                         " VPD parameters: err=%d\n", err);
2570                 return err;
2571         }
2572         err = t4vf_get_sge_params(adapter);
2573         if (err) {
2574                 dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2575                         " SGE parameters: err=%d\n", err);
2576                 return err;
2577         }
2578         err = t4vf_get_rss_glb_config(adapter);
2579         if (err) {
2580                 dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2581                         " RSS parameters: err=%d\n", err);
2582                 return err;
2583         }
2584         if (adapter->params.rss.mode !=
2585             FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL) {
2586                 dev_err(adapter->pdev_dev, "unable to operate with global RSS"
2587                         " mode %d\n", adapter->params.rss.mode);
2588                 return -EINVAL;
2589         }
2590         err = t4vf_sge_init(adapter);
2591         if (err) {
2592                 dev_err(adapter->pdev_dev, "unable to use adapter parameters:"
2593                         " err=%d\n", err);
2594                 return err;
2595         }
2596
2597         /* If we're running on newer firmware, let it know that we're
2598          * prepared to deal with encapsulated CPL messages.  Older
2599          * firmware won't understand this and we'll just get
2600          * unencapsulated messages ...
2601          */
2602         param = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) |
2603                 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_CPLFW4MSG_ENCAP);
2604         val = 1;
2605         (void) t4vf_set_params(adapter, 1, &param, &val);
2606
2607         /*
2608          * Retrieve our RX interrupt holdoff timer values and counter
2609          * threshold values from the SGE parameters.
2610          */
2611         s->timer_val[0] = core_ticks_to_us(adapter,
2612                 TIMERVALUE0_G(sge_params->sge_timer_value_0_and_1));
2613         s->timer_val[1] = core_ticks_to_us(adapter,
2614                 TIMERVALUE1_G(sge_params->sge_timer_value_0_and_1));
2615         s->timer_val[2] = core_ticks_to_us(adapter,
2616                 TIMERVALUE0_G(sge_params->sge_timer_value_2_and_3));
2617         s->timer_val[3] = core_ticks_to_us(adapter,
2618                 TIMERVALUE1_G(sge_params->sge_timer_value_2_and_3));
2619         s->timer_val[4] = core_ticks_to_us(adapter,
2620                 TIMERVALUE0_G(sge_params->sge_timer_value_4_and_5));
2621         s->timer_val[5] = core_ticks_to_us(adapter,
2622                 TIMERVALUE1_G(sge_params->sge_timer_value_4_and_5));
2623
2624         s->counter_val[0] = THRESHOLD_0_G(sge_params->sge_ingress_rx_threshold);
2625         s->counter_val[1] = THRESHOLD_1_G(sge_params->sge_ingress_rx_threshold);
2626         s->counter_val[2] = THRESHOLD_2_G(sge_params->sge_ingress_rx_threshold);
2627         s->counter_val[3] = THRESHOLD_3_G(sge_params->sge_ingress_rx_threshold);
2628
2629         /*
2630          * Grab our Virtual Interface resource allocation, extract the
2631          * features that we're interested in and do a bit of sanity testing on
2632          * what we discover.
2633          */
2634         err = t4vf_get_vfres(adapter);
2635         if (err) {
2636                 dev_err(adapter->pdev_dev, "unable to get virtual interface"
2637                         " resources: err=%d\n", err);
2638                 return err;
2639         }
2640
2641         /* Check for various parameter sanity issues */
2642         if (adapter->params.vfres.pmask == 0) {
2643                 dev_err(adapter->pdev_dev, "no port access configured\n"
2644                         "usable!\n");
2645                 return -EINVAL;
2646         }
2647         if (adapter->params.vfres.nvi == 0) {
2648                 dev_err(adapter->pdev_dev, "no virtual interfaces configured/"
2649                         "usable!\n");
2650                 return -EINVAL;
2651         }
2652
2653         /* Initialize nports and max_ethqsets now that we have our Virtual
2654          * Function Resources.
2655          */
2656         size_nports_qsets(adapter);
2657
2658         return 0;
2659 }
2660
2661 static inline void init_rspq(struct sge_rspq *rspq, u8 timer_idx,
2662                              u8 pkt_cnt_idx, unsigned int size,
2663                              unsigned int iqe_size)
2664 {
2665         rspq->intr_params = (QINTR_TIMER_IDX_V(timer_idx) |
2666                              (pkt_cnt_idx < SGE_NCOUNTERS ?
2667                               QINTR_CNT_EN_F : 0));
2668         rspq->pktcnt_idx = (pkt_cnt_idx < SGE_NCOUNTERS
2669                             ? pkt_cnt_idx
2670                             : 0);
2671         rspq->iqe_len = iqe_size;
2672         rspq->size = size;
2673 }
2674
2675 /*
2676  * Perform default configuration of DMA queues depending on the number and
2677  * type of ports we found and the number of available CPUs.  Most settings can
2678  * be modified by the admin via ethtool and cxgbtool prior to the adapter
2679  * being brought up for the first time.
2680  */
2681 static void cfg_queues(struct adapter *adapter)
2682 {
2683         struct sge *s = &adapter->sge;
2684         int q10g, n10g, qidx, pidx, qs;
2685         size_t iqe_size;
2686
2687         /*
2688          * We should not be called till we know how many Queue Sets we can
2689          * support.  In particular, this means that we need to know what kind
2690          * of interrupts we'll be using ...
2691          */
2692         BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
2693
2694         /*
2695          * Count the number of 10GbE Virtual Interfaces that we have.
2696          */
2697         n10g = 0;
2698         for_each_port(adapter, pidx)
2699                 n10g += is_x_10g_port(&adap2pinfo(adapter, pidx)->link_cfg);
2700
2701         /*
2702          * We default to 1 queue per non-10G port and up to # of cores queues
2703          * per 10G port.
2704          */
2705         if (n10g == 0)
2706                 q10g = 0;
2707         else {
2708                 int n1g = (adapter->params.nports - n10g);
2709                 q10g = (adapter->sge.max_ethqsets - n1g) / n10g;
2710                 if (q10g > num_online_cpus())
2711                         q10g = num_online_cpus();
2712         }
2713
2714         /*
2715          * Allocate the "Queue Sets" to the various Virtual Interfaces.
2716          * The layout will be established in setup_sge_queues() when the
2717          * adapter is brough up for the first time.
2718          */
2719         qidx = 0;
2720         for_each_port(adapter, pidx) {
2721                 struct port_info *pi = adap2pinfo(adapter, pidx);
2722
2723                 pi->first_qset = qidx;
2724                 pi->nqsets = is_x_10g_port(&pi->link_cfg) ? q10g : 1;
2725                 qidx += pi->nqsets;
2726         }
2727         s->ethqsets = qidx;
2728
2729         /*
2730          * The Ingress Queue Entry Size for our various Response Queues needs
2731          * to be big enough to accommodate the largest message we can receive
2732          * from the chip/firmware; which is 64 bytes ...
2733          */
2734         iqe_size = 64;
2735
2736         /*
2737          * Set up default Queue Set parameters ...  Start off with the
2738          * shortest interrupt holdoff timer.
2739          */
2740         for (qs = 0; qs < s->max_ethqsets; qs++) {
2741                 struct sge_eth_rxq *rxq = &s->ethrxq[qs];
2742                 struct sge_eth_txq *txq = &s->ethtxq[qs];
2743
2744                 init_rspq(&rxq->rspq, 0, 0, 1024, iqe_size);
2745                 rxq->fl.size = 72;
2746                 txq->q.size = 1024;
2747         }
2748
2749         /*
2750          * The firmware event queue is used for link state changes and
2751          * notifications of TX DMA completions.
2752          */
2753         init_rspq(&s->fw_evtq, SGE_TIMER_RSTRT_CNTR, 0, 512, iqe_size);
2754
2755         /*
2756          * The forwarded interrupt queue is used when we're in MSI interrupt
2757          * mode.  In this mode all interrupts associated with RX queues will
2758          * be forwarded to a single queue which we'll associate with our MSI
2759          * interrupt vector.  The messages dropped in the forwarded interrupt
2760          * queue will indicate which ingress queue needs servicing ...  This
2761          * queue needs to be large enough to accommodate all of the ingress
2762          * queues which are forwarding their interrupt (+1 to prevent the PIDX
2763          * from equalling the CIDX if every ingress queue has an outstanding
2764          * interrupt).  The queue doesn't need to be any larger because no
2765          * ingress queue will ever have more than one outstanding interrupt at
2766          * any time ...
2767          */
2768         init_rspq(&s->intrq, SGE_TIMER_RSTRT_CNTR, 0, MSIX_ENTRIES + 1,
2769                   iqe_size);
2770 }
2771
2772 /*
2773  * Reduce the number of Ethernet queues across all ports to at most n.
2774  * n provides at least one queue per port.
2775  */
2776 static void reduce_ethqs(struct adapter *adapter, int n)
2777 {
2778         int i;
2779         struct port_info *pi;
2780
2781         /*
2782          * While we have too many active Ether Queue Sets, interate across the
2783          * "ports" and reduce their individual Queue Set allocations.
2784          */
2785         BUG_ON(n < adapter->params.nports);
2786         while (n < adapter->sge.ethqsets)
2787                 for_each_port(adapter, i) {
2788                         pi = adap2pinfo(adapter, i);
2789                         if (pi->nqsets > 1) {
2790                                 pi->nqsets--;
2791                                 adapter->sge.ethqsets--;
2792                                 if (adapter->sge.ethqsets <= n)
2793                                         break;
2794                         }
2795                 }
2796
2797         /*
2798          * Reassign the starting Queue Sets for each of the "ports" ...
2799          */
2800         n = 0;
2801         for_each_port(adapter, i) {
2802                 pi = adap2pinfo(adapter, i);
2803                 pi->first_qset = n;
2804                 n += pi->nqsets;
2805         }
2806 }
2807
2808 /*
2809  * We need to grab enough MSI-X vectors to cover our interrupt needs.  Ideally
2810  * we get a separate MSI-X vector for every "Queue Set" plus any extras we
2811  * need.  Minimally we need one for every Virtual Interface plus those needed
2812  * for our "extras".  Note that this process may lower the maximum number of
2813  * allowed Queue Sets ...
2814  */
2815 static int enable_msix(struct adapter *adapter)
2816 {
2817         int i, want, need, nqsets;
2818         struct msix_entry entries[MSIX_ENTRIES];
2819         struct sge *s = &adapter->sge;
2820
2821         for (i = 0; i < MSIX_ENTRIES; ++i)
2822                 entries[i].entry = i;
2823
2824         /*
2825          * We _want_ enough MSI-X interrupts to cover all of our "Queue Sets"
2826          * plus those needed for our "extras" (for example, the firmware
2827          * message queue).  We _need_ at least one "Queue Set" per Virtual
2828          * Interface plus those needed for our "extras".  So now we get to see
2829          * if the song is right ...
2830          */
2831         want = s->max_ethqsets + MSIX_EXTRAS;
2832         need = adapter->params.nports + MSIX_EXTRAS;
2833
2834         want = pci_enable_msix_range(adapter->pdev, entries, need, want);
2835         if (want < 0)
2836                 return want;
2837
2838         nqsets = want - MSIX_EXTRAS;
2839         if (nqsets < s->max_ethqsets) {
2840                 dev_warn(adapter->pdev_dev, "only enough MSI-X vectors"
2841                          " for %d Queue Sets\n", nqsets);
2842                 s->max_ethqsets = nqsets;
2843                 if (nqsets < s->ethqsets)
2844                         reduce_ethqs(adapter, nqsets);
2845         }
2846         for (i = 0; i < want; ++i)
2847                 adapter->msix_info[i].vec = entries[i].vector;
2848
2849         return 0;
2850 }
2851
2852 static const struct net_device_ops cxgb4vf_netdev_ops   = {
2853         .ndo_open               = cxgb4vf_open,
2854         .ndo_stop               = cxgb4vf_stop,
2855         .ndo_start_xmit         = t4vf_eth_xmit,
2856         .ndo_get_stats          = cxgb4vf_get_stats,
2857         .ndo_set_rx_mode        = cxgb4vf_set_rxmode,
2858         .ndo_set_mac_address    = cxgb4vf_set_mac_addr,
2859         .ndo_validate_addr      = eth_validate_addr,
2860         .ndo_do_ioctl           = cxgb4vf_do_ioctl,
2861         .ndo_change_mtu         = cxgb4vf_change_mtu,
2862         .ndo_fix_features       = cxgb4vf_fix_features,
2863         .ndo_set_features       = cxgb4vf_set_features,
2864 #ifdef CONFIG_NET_POLL_CONTROLLER
2865         .ndo_poll_controller    = cxgb4vf_poll_controller,
2866 #endif
2867 };
2868
2869 /*
2870  * "Probe" a device: initialize a device and construct all kernel and driver
2871  * state needed to manage the device.  This routine is called "init_one" in
2872  * the PF Driver ...
2873  */
2874 static int cxgb4vf_pci_probe(struct pci_dev *pdev,
2875                              const struct pci_device_id *ent)
2876 {
2877         int pci_using_dac;
2878         int err, pidx;
2879         unsigned int pmask;
2880         struct adapter *adapter;
2881         struct port_info *pi;
2882         struct net_device *netdev;
2883         unsigned int pf;
2884
2885         /*
2886          * Print our driver banner the first time we're called to initialize a
2887          * device.
2888          */
2889         pr_info_once("%s - version %s\n", DRV_DESC, DRV_VERSION);
2890
2891         /*
2892          * Initialize generic PCI device state.
2893          */
2894         err = pci_enable_device(pdev);
2895         if (err) {
2896                 dev_err(&pdev->dev, "cannot enable PCI device\n");
2897                 return err;
2898         }
2899
2900         /*
2901          * Reserve PCI resources for the device.  If we can't get them some
2902          * other driver may have already claimed the device ...
2903          */
2904         err = pci_request_regions(pdev, KBUILD_MODNAME);
2905         if (err) {
2906                 dev_err(&pdev->dev, "cannot obtain PCI resources\n");
2907                 goto err_disable_device;
2908         }
2909
2910         /*
2911          * Set up our DMA mask: try for 64-bit address masking first and
2912          * fall back to 32-bit if we can't get 64 bits ...
2913          */
2914         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
2915         if (err == 0) {
2916                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
2917                 if (err) {
2918                         dev_err(&pdev->dev, "unable to obtain 64-bit DMA for"
2919                                 " coherent allocations\n");
2920                         goto err_release_regions;
2921                 }
2922                 pci_using_dac = 1;
2923         } else {
2924                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
2925                 if (err != 0) {
2926                         dev_err(&pdev->dev, "no usable DMA configuration\n");
2927                         goto err_release_regions;
2928                 }
2929                 pci_using_dac = 0;
2930         }
2931
2932         /*
2933          * Enable bus mastering for the device ...
2934          */
2935         pci_set_master(pdev);
2936
2937         /*
2938          * Allocate our adapter data structure and attach it to the device.
2939          */
2940         adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
2941         if (!adapter) {
2942                 err = -ENOMEM;
2943                 goto err_release_regions;
2944         }
2945         pci_set_drvdata(pdev, adapter);
2946         adapter->pdev = pdev;
2947         adapter->pdev_dev = &pdev->dev;
2948
2949         adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) +
2950                                     (sizeof(struct mbox_cmd) *
2951                                      T4VF_OS_LOG_MBOX_CMDS),
2952                                     GFP_KERNEL);
2953         if (!adapter->mbox_log) {
2954                 err = -ENOMEM;
2955                 goto err_free_adapter;
2956         }
2957         adapter->mbox_log->size = T4VF_OS_LOG_MBOX_CMDS;
2958
2959         /*
2960          * Initialize SMP data synchronization resources.
2961          */
2962         spin_lock_init(&adapter->stats_lock);
2963         spin_lock_init(&adapter->mbox_lock);
2964         INIT_LIST_HEAD(&adapter->mlist.list);
2965
2966         /*
2967          * Map our I/O registers in BAR0.
2968          */
2969         adapter->regs = pci_ioremap_bar(pdev, 0);
2970         if (!adapter->regs) {
2971                 dev_err(&pdev->dev, "cannot map device registers\n");
2972                 err = -ENOMEM;
2973                 goto err_free_adapter;
2974         }
2975
2976         /* Wait for the device to become ready before proceeding ...
2977          */
2978         err = t4vf_prep_adapter(adapter);
2979         if (err) {
2980                 dev_err(adapter->pdev_dev, "device didn't become ready:"
2981                         " err=%d\n", err);
2982                 goto err_unmap_bar0;
2983         }
2984
2985         /* For T5 and later we want to use the new BAR-based User Doorbells,
2986          * so we need to map BAR2 here ...
2987          */
2988         if (!is_t4(adapter->params.chip)) {
2989                 adapter->bar2 = ioremap_wc(pci_resource_start(pdev, 2),
2990                                            pci_resource_len(pdev, 2));
2991                 if (!adapter->bar2) {
2992                         dev_err(adapter->pdev_dev, "cannot map BAR2 doorbells\n");
2993                         err = -ENOMEM;
2994                         goto err_unmap_bar0;
2995                 }
2996         }
2997         /*
2998          * Initialize adapter level features.
2999          */
3000         adapter->name = pci_name(pdev);
3001         adapter->msg_enable = DFLT_MSG_ENABLE;
3002
3003         /* If possible, we use PCIe Relaxed Ordering Attribute to deliver
3004          * Ingress Packet Data to Free List Buffers in order to allow for
3005          * chipset performance optimizations between the Root Complex and
3006          * Memory Controllers.  (Messages to the associated Ingress Queue
3007          * notifying new Packet Placement in the Free Lists Buffers will be
3008          * send without the Relaxed Ordering Attribute thus guaranteeing that
3009          * all preceding PCIe Transaction Layer Packets will be processed
3010          * first.)  But some Root Complexes have various issues with Upstream
3011          * Transaction Layer Packets with the Relaxed Ordering Attribute set.
3012          * The PCIe devices which under the Root Complexes will be cleared the
3013          * Relaxed Ordering bit in the configuration space, So we check our
3014          * PCIe configuration space to see if it's flagged with advice against
3015          * using Relaxed Ordering.
3016          */
3017         if (!pcie_relaxed_ordering_enabled(pdev))
3018                 adapter->flags |= ROOT_NO_RELAXED_ORDERING;
3019
3020         err = adap_init0(adapter);
3021         if (err)
3022                 goto err_unmap_bar;
3023
3024         /*
3025          * Allocate our "adapter ports" and stitch everything together.
3026          */
3027         pmask = adapter->params.vfres.pmask;
3028         pf = t4vf_get_pf_from_vf(adapter);
3029         for_each_port(adapter, pidx) {
3030                 int port_id, viid;
3031                 u8 mac[ETH_ALEN];
3032                 unsigned int naddr = 1;
3033
3034                 /*
3035                  * We simplistically allocate our virtual interfaces
3036                  * sequentially across the port numbers to which we have
3037                  * access rights.  This should be configurable in some manner
3038                  * ...
3039                  */
3040                 if (pmask == 0)
3041                         break;
3042                 port_id = ffs(pmask) - 1;
3043                 pmask &= ~(1 << port_id);
3044                 viid = t4vf_alloc_vi(adapter, port_id);
3045                 if (viid < 0) {
3046                         dev_err(&pdev->dev, "cannot allocate VI for port %d:"
3047                                 " err=%d\n", port_id, viid);
3048                         err = viid;
3049                         goto err_free_dev;
3050                 }
3051
3052                 /*
3053                  * Allocate our network device and stitch things together.
3054                  */
3055                 netdev = alloc_etherdev_mq(sizeof(struct port_info),
3056                                            MAX_PORT_QSETS);
3057                 if (netdev == NULL) {
3058                         t4vf_free_vi(adapter, viid);
3059                         err = -ENOMEM;
3060                         goto err_free_dev;
3061                 }
3062                 adapter->port[pidx] = netdev;
3063                 SET_NETDEV_DEV(netdev, &pdev->dev);
3064                 pi = netdev_priv(netdev);
3065                 pi->adapter = adapter;
3066                 pi->pidx = pidx;
3067                 pi->port_id = port_id;
3068                 pi->viid = viid;
3069
3070                 /*
3071                  * Initialize the starting state of our "port" and register
3072                  * it.
3073                  */
3074                 pi->xact_addr_filt = -1;
3075                 netif_carrier_off(netdev);
3076                 netdev->irq = pdev->irq;
3077
3078                 netdev->hw_features = NETIF_F_SG | TSO_FLAGS |
3079                         NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
3080                         NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_RXCSUM;
3081                 netdev->vlan_features = NETIF_F_SG | TSO_FLAGS |
3082                         NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
3083                         NETIF_F_HIGHDMA;
3084                 netdev->features = netdev->hw_features |
3085                                    NETIF_F_HW_VLAN_CTAG_TX;
3086                 if (pci_using_dac)
3087                         netdev->features |= NETIF_F_HIGHDMA;
3088
3089                 netdev->priv_flags |= IFF_UNICAST_FLT;
3090                 netdev->min_mtu = 81;
3091                 netdev->max_mtu = ETH_MAX_MTU;
3092
3093                 netdev->netdev_ops = &cxgb4vf_netdev_ops;
3094                 netdev->ethtool_ops = &cxgb4vf_ethtool_ops;
3095                 netdev->dev_port = pi->port_id;
3096
3097                 /*
3098                  * Initialize the hardware/software state for the port.
3099                  */
3100                 err = t4vf_port_init(adapter, pidx);
3101                 if (err) {
3102                         dev_err(&pdev->dev, "cannot initialize port %d\n",
3103                                 pidx);
3104                         goto err_free_dev;
3105                 }
3106
3107                 err = t4vf_get_vf_mac_acl(adapter, pf, &naddr, mac);
3108                 if (err) {
3109                         dev_err(&pdev->dev,
3110                                 "unable to determine MAC ACL address, "
3111                                 "continuing anyway.. (status %d)\n", err);
3112                 } else if (naddr && adapter->params.vfres.nvi == 1) {
3113                         struct sockaddr addr;
3114
3115                         ether_addr_copy(addr.sa_data, mac);
3116                         err = cxgb4vf_set_mac_addr(netdev, &addr);
3117                         if (err) {
3118                                 dev_err(&pdev->dev,
3119                                         "unable to set MAC address %pM\n",
3120                                         mac);
3121                                 goto err_free_dev;
3122                         }
3123                         dev_info(&pdev->dev,
3124                                  "Using assigned MAC ACL: %pM\n", mac);
3125                 }
3126         }
3127
3128         /* See what interrupts we'll be using.  If we've been configured to
3129          * use MSI-X interrupts, try to enable them but fall back to using
3130          * MSI interrupts if we can't enable MSI-X interrupts.  If we can't
3131          * get MSI interrupts we bail with the error.
3132          */
3133         if (msi == MSI_MSIX && enable_msix(adapter) == 0)
3134                 adapter->flags |= USING_MSIX;
3135         else {
3136                 if (msi == MSI_MSIX) {
3137                         dev_info(adapter->pdev_dev,
3138                                  "Unable to use MSI-X Interrupts; falling "
3139                                  "back to MSI Interrupts\n");
3140
3141                         /* We're going to need a Forwarded Interrupt Queue so
3142                          * that may cut into how many Queue Sets we can
3143                          * support.
3144                          */
3145                         msi = MSI_MSI;
3146                         size_nports_qsets(adapter);
3147                 }
3148                 err = pci_enable_msi(pdev);
3149                 if (err) {
3150                         dev_err(&pdev->dev, "Unable to allocate MSI Interrupts;"
3151                                 " err=%d\n", err);
3152                         goto err_free_dev;
3153                 }
3154                 adapter->flags |= USING_MSI;
3155         }
3156
3157         /* Now that we know how many "ports" we have and what interrupt
3158          * mechanism we're going to use, we can configure our queue resources.
3159          */
3160         cfg_queues(adapter);
3161
3162         /*
3163          * The "card" is now ready to go.  If any errors occur during device
3164          * registration we do not fail the whole "card" but rather proceed
3165          * only with the ports we manage to register successfully.  However we
3166          * must register at least one net device.
3167          */
3168         for_each_port(adapter, pidx) {
3169                 struct port_info *pi = netdev_priv(adapter->port[pidx]);
3170                 netdev = adapter->port[pidx];
3171                 if (netdev == NULL)
3172                         continue;
3173
3174                 netif_set_real_num_tx_queues(netdev, pi->nqsets);
3175                 netif_set_real_num_rx_queues(netdev, pi->nqsets);
3176
3177                 err = register_netdev(netdev);
3178                 if (err) {
3179                         dev_warn(&pdev->dev, "cannot register net device %s,"
3180                                  " skipping\n", netdev->name);
3181                         continue;
3182                 }
3183
3184                 set_bit(pidx, &adapter->registered_device_map);
3185         }
3186         if (adapter->registered_device_map == 0) {
3187                 dev_err(&pdev->dev, "could not register any net devices\n");
3188                 goto err_disable_interrupts;
3189         }
3190
3191         /*
3192          * Set up our debugfs entries.
3193          */
3194         if (!IS_ERR_OR_NULL(cxgb4vf_debugfs_root)) {
3195                 adapter->debugfs_root =
3196                         debugfs_create_dir(pci_name(pdev),
3197                                            cxgb4vf_debugfs_root);
3198                 if (IS_ERR_OR_NULL(adapter->debugfs_root))
3199                         dev_warn(&pdev->dev, "could not create debugfs"
3200                                  " directory");
3201                 else
3202                         setup_debugfs(adapter);
3203         }
3204
3205         /*
3206          * Print a short notice on the existence and configuration of the new
3207          * VF network device ...
3208          */
3209         for_each_port(adapter, pidx) {
3210                 dev_info(adapter->pdev_dev, "%s: Chelsio VF NIC PCIe %s\n",
3211                          adapter->port[pidx]->name,
3212                          (adapter->flags & USING_MSIX) ? "MSI-X" :
3213                          (adapter->flags & USING_MSI)  ? "MSI" : "");
3214         }
3215
3216         /*
3217          * Return success!
3218          */
3219         return 0;
3220
3221         /*
3222          * Error recovery and exit code.  Unwind state that's been created
3223          * so far and return the error.
3224          */
3225 err_disable_interrupts:
3226         if (adapter->flags & USING_MSIX) {
3227                 pci_disable_msix(adapter->pdev);
3228                 adapter->flags &= ~USING_MSIX;
3229         } else if (adapter->flags & USING_MSI) {
3230                 pci_disable_msi(adapter->pdev);
3231                 adapter->flags &= ~USING_MSI;
3232         }
3233
3234 err_free_dev:
3235         for_each_port(adapter, pidx) {
3236                 netdev = adapter->port[pidx];
3237                 if (netdev == NULL)
3238                         continue;
3239                 pi = netdev_priv(netdev);
3240                 t4vf_free_vi(adapter, pi->viid);
3241                 if (test_bit(pidx, &adapter->registered_device_map))
3242                         unregister_netdev(netdev);
3243                 free_netdev(netdev);
3244         }
3245
3246 err_unmap_bar:
3247         if (!is_t4(adapter->params.chip))
3248                 iounmap(adapter->bar2);
3249
3250 err_unmap_bar0:
3251         iounmap(adapter->regs);
3252
3253 err_free_adapter:
3254         kfree(adapter->mbox_log);
3255         kfree(adapter);
3256
3257 err_release_regions:
3258         pci_release_regions(pdev);
3259         pci_clear_master(pdev);
3260
3261 err_disable_device:
3262         pci_disable_device(pdev);
3263
3264         return err;
3265 }
3266
3267 /*
3268  * "Remove" a device: tear down all kernel and driver state created in the
3269  * "probe" routine and quiesce the device (disable interrupts, etc.).  (Note
3270  * that this is called "remove_one" in the PF Driver.)
3271  */
3272 static void cxgb4vf_pci_remove(struct pci_dev *pdev)
3273 {
3274         struct adapter *adapter = pci_get_drvdata(pdev);
3275
3276         /*
3277          * Tear down driver state associated with device.
3278          */
3279         if (adapter) {
3280                 int pidx;
3281
3282                 /*
3283                  * Stop all of our activity.  Unregister network port,
3284                  * disable interrupts, etc.
3285                  */
3286                 for_each_port(adapter, pidx)
3287                         if (test_bit(pidx, &adapter->registered_device_map))
3288                                 unregister_netdev(adapter->port[pidx]);
3289                 t4vf_sge_stop(adapter);
3290                 if (adapter->flags & USING_MSIX) {
3291                         pci_disable_msix(adapter->pdev);
3292                         adapter->flags &= ~USING_MSIX;
3293                 } else if (adapter->flags & USING_MSI) {
3294                         pci_disable_msi(adapter->pdev);
3295                         adapter->flags &= ~USING_MSI;
3296                 }
3297
3298                 /*
3299                  * Tear down our debugfs entries.
3300                  */
3301                 if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
3302                         cleanup_debugfs(adapter);
3303                         debugfs_remove_recursive(adapter->debugfs_root);
3304                 }
3305
3306                 /*
3307                  * Free all of the various resources which we've acquired ...
3308                  */
3309                 t4vf_free_sge_resources(adapter);
3310                 for_each_port(adapter, pidx) {
3311                         struct net_device *netdev = adapter->port[pidx];
3312                         struct port_info *pi;
3313
3314                         if (netdev == NULL)
3315                                 continue;
3316
3317                         pi = netdev_priv(netdev);
3318                         t4vf_free_vi(adapter, pi->viid);
3319                         free_netdev(netdev);
3320                 }
3321                 iounmap(adapter->regs);
3322                 if (!is_t4(adapter->params.chip))
3323                         iounmap(adapter->bar2);
3324                 kfree(adapter->mbox_log);
3325                 kfree(adapter);
3326         }
3327
3328         /*
3329          * Disable the device and release its PCI resources.
3330          */
3331         pci_disable_device(pdev);
3332         pci_clear_master(pdev);
3333         pci_release_regions(pdev);
3334 }
3335
3336 /*
3337  * "Shutdown" quiesce the device, stopping Ingress Packet and Interrupt
3338  * delivery.
3339  */
3340 static void cxgb4vf_pci_shutdown(struct pci_dev *pdev)
3341 {
3342         struct adapter *adapter;
3343         int pidx;
3344
3345         adapter = pci_get_drvdata(pdev);
3346         if (!adapter)
3347                 return;
3348
3349         /* Disable all Virtual Interfaces.  This will shut down the
3350          * delivery of all ingress packets into the chip for these
3351          * Virtual Interfaces.
3352          */
3353         for_each_port(adapter, pidx)
3354                 if (test_bit(pidx, &adapter->registered_device_map))
3355                         unregister_netdev(adapter->port[pidx]);
3356
3357         /* Free up all Queues which will prevent further DMA and
3358          * Interrupts allowing various internal pathways to drain.
3359          */
3360         t4vf_sge_stop(adapter);
3361         if (adapter->flags & USING_MSIX) {
3362                 pci_disable_msix(adapter->pdev);
3363                 adapter->flags &= ~USING_MSIX;
3364         } else if (adapter->flags & USING_MSI) {
3365                 pci_disable_msi(adapter->pdev);
3366                 adapter->flags &= ~USING_MSI;
3367         }
3368
3369         /*
3370          * Free up all Queues which will prevent further DMA and
3371          * Interrupts allowing various internal pathways to drain.
3372          */
3373         t4vf_free_sge_resources(adapter);
3374         pci_set_drvdata(pdev, NULL);
3375 }
3376
3377 /* Macros needed to support the PCI Device ID Table ...
3378  */
3379 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \
3380         static const struct pci_device_id cxgb4vf_pci_tbl[] = {
3381 #define CH_PCI_DEVICE_ID_FUNCTION       0x8
3382
3383 #define CH_PCI_ID_TABLE_ENTRY(devid) \
3384                 { PCI_VDEVICE(CHELSIO, (devid)), 0 }
3385
3386 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_END { 0, } }
3387
3388 #include "../cxgb4/t4_pci_id_tbl.h"
3389
3390 MODULE_DESCRIPTION(DRV_DESC);
3391 MODULE_AUTHOR("Chelsio Communications");
3392 MODULE_LICENSE("Dual BSD/GPL");
3393 MODULE_VERSION(DRV_VERSION);
3394 MODULE_DEVICE_TABLE(pci, cxgb4vf_pci_tbl);
3395
3396 static struct pci_driver cxgb4vf_driver = {
3397         .name           = KBUILD_MODNAME,
3398         .id_table       = cxgb4vf_pci_tbl,
3399         .probe          = cxgb4vf_pci_probe,
3400         .remove         = cxgb4vf_pci_remove,
3401         .shutdown       = cxgb4vf_pci_shutdown,
3402 };
3403
3404 /*
3405  * Initialize global driver state.
3406  */
3407 static int __init cxgb4vf_module_init(void)
3408 {
3409         int ret;
3410
3411         /*
3412          * Vet our module parameters.
3413          */
3414         if (msi != MSI_MSIX && msi != MSI_MSI) {
3415                 pr_warn("bad module parameter msi=%d; must be %d (MSI-X or MSI) or %d (MSI)\n",
3416                         msi, MSI_MSIX, MSI_MSI);
3417                 return -EINVAL;
3418         }
3419
3420         /* Debugfs support is optional, just warn if this fails */
3421         cxgb4vf_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
3422         if (IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
3423                 pr_warn("could not create debugfs entry, continuing\n");
3424
3425         ret = pci_register_driver(&cxgb4vf_driver);
3426         if (ret < 0 && !IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
3427                 debugfs_remove(cxgb4vf_debugfs_root);
3428         return ret;
3429 }
3430
3431 /*
3432  * Tear down global driver state.
3433  */
3434 static void __exit cxgb4vf_module_exit(void)
3435 {
3436         pci_unregister_driver(&cxgb4vf_driver);
3437         debugfs_remove(cxgb4vf_debugfs_root);
3438 }
3439
3440 module_init(cxgb4vf_module_init);
3441 module_exit(cxgb4vf_module_exit);