drivers/net/ethernet/sfc/efx_common.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /****************************************************************************
   3  * Driver for Solarflare network controllers and boards
   4  * Copyright 2018 Solarflare Communications Inc.
   5  *
   6  * This program is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 as published
   8  * by the Free Software Foundation, incorporated herein by reference.
   9  */
  10
  11 #include "net_driver.h"
  12 #include <linux/module.h>
  13 #include <linux/netdevice.h>
  14 #include "efx_common.h"
  15 #include "efx_channels.h"
  16 #include "efx.h"
  17 #include "mcdi.h"
  18 #include "selftest.h"
  19 #include "rx_common.h"
  20 #include "tx_common.h"
  21 #include "nic.h"
  22 #include "io.h"
  23 #include "mcdi_pcol.h"
  24
  25 static unsigned int debug = (NETIF_MSG_DRV | NETIF_MSG_PROBE |
  26                              NETIF_MSG_LINK | NETIF_MSG_IFDOWN |
  27                              NETIF_MSG_IFUP | NETIF_MSG_RX_ERR |
  28                              NETIF_MSG_TX_ERR | NETIF_MSG_HW);
  29 module_param(debug, uint, 0);
  30 MODULE_PARM_DESC(debug, "Bitmapped debugging message enable value");
  31
  32 /* This is the time (in jiffies) between invocations of the hardware
  33  * monitor.
  34  * On Falcon-based NICs, this will:
  35  * - Check the on-board hardware monitor;
  36  * - Poll the link state and reconfigure the hardware as necessary.
  37  * On Siena-based NICs for power systems with EEH support, this will give EEH a
  38  * chance to start.
  39  */
  40 static unsigned int efx_monitor_interval = 1 * HZ;
  41
  42 /* How often and how many times to poll for a reset while waiting for a
  43  * BIST that another function started to complete.
  44  */
  45 #define BIST_WAIT_DELAY_MS      100
  46 #define BIST_WAIT_DELAY_COUNT   100
  47
  48 /* Default stats update time */
  49 #define STATS_PERIOD_MS_DEFAULT 1000
  50
  51 const unsigned int efx_reset_type_max = RESET_TYPE_MAX;
  52 const char *const efx_reset_type_names[] = {
  53         [RESET_TYPE_INVISIBLE]          = "INVISIBLE",
  54         [RESET_TYPE_ALL]                = "ALL",
  55         [RESET_TYPE_RECOVER_OR_ALL]     = "RECOVER_OR_ALL",
  56         [RESET_TYPE_WORLD]              = "WORLD",
  57         [RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE",
  58         [RESET_TYPE_DATAPATH]           = "DATAPATH",
  59         [RESET_TYPE_MC_BIST]            = "MC_BIST",
  60         [RESET_TYPE_DISABLE]            = "DISABLE",
  61         [RESET_TYPE_TX_WATCHDOG]        = "TX_WATCHDOG",
  62         [RESET_TYPE_INT_ERROR]          = "INT_ERROR",
  63         [RESET_TYPE_DMA_ERROR]          = "DMA_ERROR",
  64         [RESET_TYPE_TX_SKIP]            = "TX_SKIP",
  65         [RESET_TYPE_MC_FAILURE]         = "MC_FAILURE",
  66         [RESET_TYPE_MCDI_TIMEOUT]       = "MCDI_TIMEOUT (FLR)",
  67 };
  68
  69 #define RESET_TYPE(type) \
  70         STRING_TABLE_LOOKUP(type, efx_reset_type)
  71
  72 /* Loopback mode names (see LOOPBACK_MODE()) */
  73 const unsigned int efx_loopback_mode_max = LOOPBACK_MAX;
  74 const char *const efx_loopback_mode_names[] = {
  75         [LOOPBACK_NONE]         = "NONE",
  76         [LOOPBACK_DATA]         = "DATAPATH",
  77         [LOOPBACK_GMAC]         = "GMAC",
  78         [LOOPBACK_XGMII]        = "XGMII",
  79         [LOOPBACK_XGXS]         = "XGXS",
  80         [LOOPBACK_XAUI]         = "XAUI",
  81         [LOOPBACK_GMII]         = "GMII",
  82         [LOOPBACK_SGMII]        = "SGMII",
  83         [LOOPBACK_XGBR]         = "XGBR",
  84         [LOOPBACK_XFI]          = "XFI",
  85         [LOOPBACK_XAUI_FAR]     = "XAUI_FAR",
  86         [LOOPBACK_GMII_FAR]     = "GMII_FAR",
  87         [LOOPBACK_SGMII_FAR]    = "SGMII_FAR",
  88         [LOOPBACK_XFI_FAR]      = "XFI_FAR",
  89         [LOOPBACK_GPHY]         = "GPHY",
  90         [LOOPBACK_PHYXS]        = "PHYXS",
  91         [LOOPBACK_PCS]          = "PCS",
  92         [LOOPBACK_PMAPMD]       = "PMA/PMD",
  93         [LOOPBACK_XPORT]        = "XPORT",
  94         [LOOPBACK_XGMII_WS]     = "XGMII_WS",
  95         [LOOPBACK_XAUI_WS]      = "XAUI_WS",
  96         [LOOPBACK_XAUI_WS_FAR]  = "XAUI_WS_FAR",
  97         [LOOPBACK_XAUI_WS_NEAR] = "XAUI_WS_NEAR",
  98         [LOOPBACK_GMII_WS]      = "GMII_WS",
  99         [LOOPBACK_XFI_WS]       = "XFI_WS",
 100         [LOOPBACK_XFI_WS_FAR]   = "XFI_WS_FAR",
 101         [LOOPBACK_PHYXS_WS]     = "PHYXS_WS",
 102 };
 103
 104 /* Reset workqueue. If any NIC has a hardware failure then a reset will be
 105  * queued onto this work queue. This is not a per-nic work queue, because
 106  * efx_reset_work() acquires the rtnl lock, so resets are naturally serialised.
 107  */
 108 static struct workqueue_struct *reset_workqueue;
 109
 110 int efx_create_reset_workqueue(void)
 111 {
 112         reset_workqueue = create_singlethread_workqueue("sfc_reset");
 113         if (!reset_workqueue) {
 114                 printk(KERN_ERR "Failed to create reset workqueue\n");
 115                 return -ENOMEM;
 116         }
 117
 118         return 0;
 119 }
 120
 121 void efx_queue_reset_work(struct efx_nic *efx)
 122 {
 123         queue_work(reset_workqueue, &efx->reset_work);
 124 }
 125
 126 void efx_flush_reset_workqueue(struct efx_nic *efx)
 127 {
 128         cancel_work_sync(&efx->reset_work);
 129 }
 130
 131 void efx_destroy_reset_workqueue(void)
 132 {
 133         if (reset_workqueue) {
 134                 destroy_workqueue(reset_workqueue);
 135                 reset_workqueue = NULL;
 136         }
 137 }
 138
 139 /* We assume that efx->type->reconfigure_mac will always try to sync RX
 140  * filters and therefore needs to read-lock the filter table against freeing
 141  */
 142 void efx_mac_reconfigure(struct efx_nic *efx)
 143 {
 144         if (efx->type->reconfigure_mac) {
 145                 down_read(&efx->filter_sem);
 146                 efx->type->reconfigure_mac(efx);
 147                 up_read(&efx->filter_sem);
 148         }
 149 }
 150
 151 /* Asynchronous work item for changing MAC promiscuity and multicast
 152  * hash.  Avoid a drain/rx_ingress enable by reconfiguring the current
 153  * MAC directly.
 154  */
 155 static void efx_mac_work(struct work_struct *data)
 156 {
 157         struct efx_nic *efx = container_of(data, struct efx_nic, mac_work);
 158
 159         mutex_lock(&efx->mac_lock);
 160         if (efx->port_enabled)
 161                 efx_mac_reconfigure(efx);
 162         mutex_unlock(&efx->mac_lock);
 163 }
 164
 165 /* This ensures that the kernel is kept informed (via
 166  * netif_carrier_on/off) of the link status, and also maintains the
 167  * link status's stop on the port's TX queue.
 168  */
 169 void efx_link_status_changed(struct efx_nic *efx)
 170 {
 171         struct efx_link_state *link_state = &efx->link_state;
 172
 173         /* SFC Bug 5356: A net_dev notifier is registered, so we must ensure
 174          * that no events are triggered between unregister_netdev() and the
 175          * driver unloading. A more general condition is that NETDEV_CHANGE
 176          * can only be generated between NETDEV_UP and NETDEV_DOWN
 177          */
 178         if (!netif_running(efx->net_dev))
 179                 return;
 180
 181         if (link_state->up != netif_carrier_ok(efx->net_dev)) {
 182                 efx->n_link_state_changes++;
 183
 184                 if (link_state->up)
 185                         netif_carrier_on(efx->net_dev);
 186                 else
 187                         netif_carrier_off(efx->net_dev);
 188         }
 189
 190         /* Status message for kernel log */
 191         if (link_state->up)
 192                 netif_info(efx, link, efx->net_dev,
 193                            "link up at %uMbps %s-duplex (MTU %d)\n",
 194                            link_state->speed, link_state->fd ? "full" : "half",
 195                            efx->net_dev->mtu);
 196         else
 197                 netif_info(efx, link, efx->net_dev, "link down\n");
 198 }
 199
 200 unsigned int efx_xdp_max_mtu(struct efx_nic *efx)
 201 {
 202         /* The maximum MTU that we can fit in a single page, allowing for
 203          * framing, overhead and XDP headroom + tailroom.
 204          */
 205         int overhead = EFX_MAX_FRAME_LEN(0) + sizeof(struct efx_rx_page_state) +
 206                        efx->rx_prefix_size + efx->type->rx_buffer_padding +
 207                        efx->rx_ip_align + EFX_XDP_HEADROOM + EFX_XDP_TAILROOM;
 208
 209         return PAGE_SIZE - overhead;
 210 }
 211
 212 /* Context: process, rtnl_lock() held. */
 213 int efx_change_mtu(struct net_device *net_dev, int new_mtu)
 214 {
 215         struct efx_nic *efx = netdev_priv(net_dev);
 216         int rc;
 217
 218         rc = efx_check_disabled(efx);
 219         if (rc)
 220                 return rc;
 221
 222         if (rtnl_dereference(efx->xdp_prog) &&
 223             new_mtu > efx_xdp_max_mtu(efx)) {
 224                 netif_err(efx, drv, efx->net_dev,
 225                           "Requested MTU of %d too big for XDP (max: %d)\n",
 226                           new_mtu, efx_xdp_max_mtu(efx));
 227                 return -EINVAL;
 228         }
 229
 230         netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu);
 231
 232         efx_device_detach_sync(efx);
 233         efx_stop_all(efx);
 234
 235         mutex_lock(&efx->mac_lock);
 236         net_dev->mtu = new_mtu;
 237         efx_mac_reconfigure(efx);
 238         mutex_unlock(&efx->mac_lock);
 239
 240         efx_start_all(efx);
 241         efx_device_attach_if_not_resetting(efx);
 242         return 0;
 243 }
 244
 245 /**************************************************************************
 246  *
 247  * Hardware monitor
 248  *
 249  **************************************************************************/
 250
 251 /* Run periodically off the general workqueue */
 252 static void efx_monitor(struct work_struct *data)
 253 {
 254         struct efx_nic *efx = container_of(data, struct efx_nic,
 255                                            monitor_work.work);
 256
 257         netif_vdbg(efx, timer, efx->net_dev,
 258                    "hardware monitor executing on CPU %d\n",
 259                    raw_smp_processor_id());
 260         BUG_ON(efx->type->monitor == NULL);
 261
 262         /* If the mac_lock is already held then it is likely a port
 263          * reconfiguration is already in place, which will likely do
 264          * most of the work of monitor() anyway.
 265          */
 266         if (mutex_trylock(&efx->mac_lock)) {
 267                 if (efx->port_enabled && efx->type->monitor)
 268                         efx->type->monitor(efx);
 269                 mutex_unlock(&efx->mac_lock);
 270         }
 271
 272         efx_start_monitor(efx);
 273 }
 274
 275 void efx_start_monitor(struct efx_nic *efx)
 276 {
 277         if (efx->type->monitor)
 278                 queue_delayed_work(efx->workqueue, &efx->monitor_work,
 279                                    efx_monitor_interval);
 280 }
 281
 282 /**************************************************************************
 283  *
 284  * Event queue processing
 285  *
 286  *************************************************************************/
 287
 288 /* Channels are shutdown and reinitialised whilst the NIC is running
 289  * to propagate configuration changes (mtu, checksum offload), or
 290  * to clear hardware error conditions
 291  */
 292 static void efx_start_datapath(struct efx_nic *efx)
 293 {
 294         netdev_features_t old_features = efx->net_dev->features;
 295         bool old_rx_scatter = efx->rx_scatter;
 296         size_t rx_buf_len;
 297
 298         /* Calculate the rx buffer allocation parameters required to
 299          * support the current MTU, including padding for header
 300          * alignment and overruns.
 301          */
 302         efx->rx_dma_len = (efx->rx_prefix_size +
 303                            EFX_MAX_FRAME_LEN(efx->net_dev->mtu) +
 304                            efx->type->rx_buffer_padding);
 305         rx_buf_len = (sizeof(struct efx_rx_page_state)   + EFX_XDP_HEADROOM +
 306                       efx->rx_ip_align + efx->rx_dma_len + EFX_XDP_TAILROOM);
 307
 308         if (rx_buf_len <= PAGE_SIZE) {
 309                 efx->rx_scatter = efx->type->always_rx_scatter;
 310                 efx->rx_buffer_order = 0;
 311         } else if (efx->type->can_rx_scatter) {
 312                 BUILD_BUG_ON(EFX_RX_USR_BUF_SIZE % L1_CACHE_BYTES);
 313                 BUILD_BUG_ON(sizeof(struct efx_rx_page_state) +
 314                              2 * ALIGN(NET_IP_ALIGN + EFX_RX_USR_BUF_SIZE,
 315                                        EFX_RX_BUF_ALIGNMENT) >
 316                              PAGE_SIZE);
 317                 efx->rx_scatter = true;
 318                 efx->rx_dma_len = EFX_RX_USR_BUF_SIZE;
 319                 efx->rx_buffer_order = 0;
 320         } else {
 321                 efx->rx_scatter = false;
 322                 efx->rx_buffer_order = get_order(rx_buf_len);
 323         }
 324
 325         efx_rx_config_page_split(efx);
 326         if (efx->rx_buffer_order)
 327                 netif_dbg(efx, drv, efx->net_dev,
 328                           "RX buf len=%u; page order=%u batch=%u\n",
 329                           efx->rx_dma_len, efx->rx_buffer_order,
 330                           efx->rx_pages_per_batch);
 331         else
 332                 netif_dbg(efx, drv, efx->net_dev,
 333                           "RX buf len=%u step=%u bpp=%u; page batch=%u\n",
 334                           efx->rx_dma_len, efx->rx_page_buf_step,
 335                           efx->rx_bufs_per_page, efx->rx_pages_per_batch);
 336
 337         /* Restore previously fixed features in hw_features and remove
 338          * features which are fixed now
 339          */
 340         efx->net_dev->hw_features |= efx->net_dev->features;
 341         efx->net_dev->hw_features &= ~efx->fixed_features;
 342         efx->net_dev->features |= efx->fixed_features;
 343         if (efx->net_dev->features != old_features)
 344                 netdev_features_change(efx->net_dev);
 345
 346         /* RX filters may also have scatter-enabled flags */
 347         if ((efx->rx_scatter != old_rx_scatter) &&
 348             efx->type->filter_update_rx_scatter)
 349                 efx->type->filter_update_rx_scatter(efx);
 350
 351         /* We must keep at least one descriptor in a TX ring empty.
 352          * We could avoid this when the queue size does not exactly
 353          * match the hardware ring size, but it's not that important.
 354          * Therefore we stop the queue when one more skb might fill
 355          * the ring completely.  We wake it when half way back to
 356          * empty.
 357          */
 358         efx->txq_stop_thresh = efx->txq_entries - efx_tx_max_skb_descs(efx);
 359         efx->txq_wake_thresh = efx->txq_stop_thresh / 2;
 360
 361         /* Initialise the channels */
 362         efx_start_channels(efx);
 363
 364         efx_ptp_start_datapath(efx);
 365
 366         if (netif_device_present(efx->net_dev))
 367                 netif_tx_wake_all_queues(efx->net_dev);
 368 }
 369
 370 static void efx_stop_datapath(struct efx_nic *efx)
 371 {
 372         EFX_ASSERT_RESET_SERIALISED(efx);
 373         BUG_ON(efx->port_enabled);
 374
 375         efx_ptp_stop_datapath(efx);
 376
 377         efx_stop_channels(efx);
 378 }
 379
 380 /**************************************************************************
 381  *
 382  * Port handling
 383  *
 384  **************************************************************************/
 385
 386 static void efx_start_port(struct efx_nic *efx)
 387 {
 388         netif_dbg(efx, ifup, efx->net_dev, "start port\n");
 389         BUG_ON(efx->port_enabled);
 390
 391         mutex_lock(&efx->mac_lock);
 392         efx->port_enabled = true;
 393
 394         /* Ensure MAC ingress/egress is enabled */
 395         efx_mac_reconfigure(efx);
 396
 397         mutex_unlock(&efx->mac_lock);
 398 }
 399
 400 /* Cancel work for MAC reconfiguration, periodic hardware monitoring
 401  * and the async self-test, wait for them to finish and prevent them
 402  * being scheduled again.  This doesn't cover online resets, which
 403  * should only be cancelled when removing the device.
 404  */
 405 static void efx_stop_port(struct efx_nic *efx)
 406 {
 407         netif_dbg(efx, ifdown, efx->net_dev, "stop port\n");
 408
 409         EFX_ASSERT_RESET_SERIALISED(efx);
 410
 411         mutex_lock(&efx->mac_lock);
 412         efx->port_enabled = false;
 413         mutex_unlock(&efx->mac_lock);
 414
 415         /* Serialise against efx_set_multicast_list() */
 416         netif_addr_lock_bh(efx->net_dev);
 417         netif_addr_unlock_bh(efx->net_dev);
 418
 419         cancel_delayed_work_sync(&efx->monitor_work);
 420         efx_selftest_async_cancel(efx);
 421         cancel_work_sync(&efx->mac_work);
 422 }
 423
 424 /* If the interface is supposed to be running but is not, start
 425  * the hardware and software data path, regular activity for the port
 426  * (MAC statistics, link polling, etc.) and schedule the port to be
 427  * reconfigured.  Interrupts must already be enabled.  This function
 428  * is safe to call multiple times, so long as the NIC is not disabled.
 429  * Requires the RTNL lock.
 430  */
 431 void efx_start_all(struct efx_nic *efx)
 432 {
 433         EFX_ASSERT_RESET_SERIALISED(efx);
 434         BUG_ON(efx->state == STATE_DISABLED);
 435
 436         /* Check that it is appropriate to restart the interface. All
 437          * of these flags are safe to read under just the rtnl lock
 438          */
 439         if (efx->port_enabled || !netif_running(efx->net_dev) ||
 440             efx->reset_pending)
 441                 return;
 442
 443         efx_start_port(efx);
 444         efx_start_datapath(efx);
 445
 446         /* Start the hardware monitor if there is one */
 447         efx_start_monitor(efx);
 448
 449         /* Link state detection is normally event-driven; we have
 450          * to poll now because we could have missed a change
 451          */
 452         mutex_lock(&efx->mac_lock);
 453         if (efx->phy_op->poll(efx))
 454                 efx_link_status_changed(efx);
 455         mutex_unlock(&efx->mac_lock);
 456
 457         if (efx->type->start_stats) {
 458                 efx->type->start_stats(efx);
 459                 efx->type->pull_stats(efx);
 460                 spin_lock_bh(&efx->stats_lock);
 461                 efx->type->update_stats(efx, NULL, NULL);
 462                 spin_unlock_bh(&efx->stats_lock);
 463         }
 464 }
 465
 466 /* Quiesce the hardware and software data path, and regular activity
 467  * for the port without bringing the link down.  Safe to call multiple
 468  * times with the NIC in almost any state, but interrupts should be
 469  * enabled.  Requires the RTNL lock.
 470  */
 471 void efx_stop_all(struct efx_nic *efx)
 472 {
 473         EFX_ASSERT_RESET_SERIALISED(efx);
 474
 475         /* port_enabled can be read safely under the rtnl lock */
 476         if (!efx->port_enabled)
 477                 return;
 478
 479         if (efx->type->update_stats) {
 480                 /* update stats before we go down so we can accurately count
 481                  * rx_nodesc_drops
 482                  */
 483                 efx->type->pull_stats(efx);
 484                 spin_lock_bh(&efx->stats_lock);
 485                 efx->type->update_stats(efx, NULL, NULL);
 486                 spin_unlock_bh(&efx->stats_lock);
 487                 efx->type->stop_stats(efx);
 488         }
 489
 490         efx_stop_port(efx);
 491
 492         /* Stop the kernel transmit interface.  This is only valid if
 493          * the device is stopped or detached; otherwise the watchdog
 494          * may fire immediately.
 495          */
 496         WARN_ON(netif_running(efx->net_dev) &&
 497                 netif_device_present(efx->net_dev));
 498         netif_tx_disable(efx->net_dev);
 499
 500         efx_stop_datapath(efx);
 501 }
 502
 503 /* Context: process, dev_base_lock or RTNL held, non-blocking. */
 504 void efx_net_stats(struct net_device *net_dev, struct rtnl_link_stats64 *stats)
 505 {
 506         struct efx_nic *efx = netdev_priv(net_dev);
 507
 508         spin_lock_bh(&efx->stats_lock);
 509         efx->type->update_stats(efx, NULL, stats);
 510         spin_unlock_bh(&efx->stats_lock);
 511 }
 512
 513 /* Push loopback/power/transmit disable settings to the PHY, and reconfigure
 514  * the MAC appropriately. All other PHY configuration changes are pushed
 515  * through phy_op->set_settings(), and pushed asynchronously to the MAC
 516  * through efx_monitor().
 517  *
 518  * Callers must hold the mac_lock
 519  */
 520 int __efx_reconfigure_port(struct efx_nic *efx)
 521 {
 522         enum efx_phy_mode phy_mode;
 523         int rc = 0;
 524
 525         WARN_ON(!mutex_is_locked(&efx->mac_lock));
 526
 527         /* Disable PHY transmit in mac level loopbacks */
 528         phy_mode = efx->phy_mode;
 529         if (LOOPBACK_INTERNAL(efx))
 530                 efx->phy_mode |= PHY_MODE_TX_DISABLED;
 531         else
 532                 efx->phy_mode &= ~PHY_MODE_TX_DISABLED;
 533
 534         if (efx->type->reconfigure_port)
 535                 rc = efx->type->reconfigure_port(efx);
 536
 537         if (rc)
 538                 efx->phy_mode = phy_mode;
 539
 540         return rc;
 541 }
 542
 543 /* Reinitialise the MAC to pick up new PHY settings, even if the port is
 544  * disabled.
 545  */
 546 int efx_reconfigure_port(struct efx_nic *efx)
 547 {
 548         int rc;
 549
 550         EFX_ASSERT_RESET_SERIALISED(efx);
 551
 552         mutex_lock(&efx->mac_lock);
 553         rc = __efx_reconfigure_port(efx);
 554         mutex_unlock(&efx->mac_lock);
 555
 556         return rc;
 557 }
 558
 559 /**************************************************************************
 560  *
 561  * Device reset and suspend
 562  *
 563  **************************************************************************/
 564
 565 static void efx_wait_for_bist_end(struct efx_nic *efx)
 566 {
 567         int i;
 568
 569         for (i = 0; i < BIST_WAIT_DELAY_COUNT; ++i) {
 570                 if (efx_mcdi_poll_reboot(efx))
 571                         goto out;
 572                 msleep(BIST_WAIT_DELAY_MS);
 573         }
 574
 575         netif_err(efx, drv, efx->net_dev, "Warning: No MC reboot after BIST mode\n");
 576 out:
 577         /* Either way unset the BIST flag. If we found no reboot we probably
 578          * won't recover, but we should try.
 579          */
 580         efx->mc_bist_for_other_fn = false;
 581 }
 582
 583 /* Try recovery mechanisms.
 584  * For now only EEH is supported.
 585  * Returns 0 if the recovery mechanisms are unsuccessful.
 586  * Returns a non-zero value otherwise.
 587  */
 588 int efx_try_recovery(struct efx_nic *efx)
 589 {
 590 #ifdef CONFIG_EEH
 591         /* A PCI error can occur and not be seen by EEH because nothing
 592          * happens on the PCI bus. In this case the driver may fail and
 593          * schedule a 'recover or reset', leading to this recovery handler.
 594          * Manually call the eeh failure check function.
 595          */
 596         struct eeh_dev *eehdev = pci_dev_to_eeh_dev(efx->pci_dev);
 597         if (eeh_dev_check_failure(eehdev)) {
 598                 /* The EEH mechanisms will handle the error and reset the
 599                  * device if necessary.
 600                  */
 601                 return 1;
 602         }
 603 #endif
 604         return 0;
 605 }
 606
 607 /* Tears down the entire software state and most of the hardware state
 608  * before reset.
 609  */
 610 void efx_reset_down(struct efx_nic *efx, enum reset_type method)
 611 {
 612         EFX_ASSERT_RESET_SERIALISED(efx);
 613
 614         if (method == RESET_TYPE_MCDI_TIMEOUT)
 615                 efx->type->prepare_flr(efx);
 616
 617         efx_stop_all(efx);
 618         efx_disable_interrupts(efx);
 619
 620         mutex_lock(&efx->mac_lock);
 621         down_write(&efx->filter_sem);
 622         mutex_lock(&efx->rss_lock);
 623         if (efx->port_initialized && method != RESET_TYPE_INVISIBLE &&
 624             method != RESET_TYPE_DATAPATH)
 625                 efx->phy_op->fini(efx);
 626         efx->type->fini(efx);
 627 }
 628
 629 /* This function will always ensure that the locks acquired in
 630  * efx_reset_down() are released. A failure return code indicates
 631  * that we were unable to reinitialise the hardware, and the
 632  * driver should be disabled. If ok is false, then the rx and tx
 633  * engines are not restarted, pending a RESET_DISABLE.
 634  */
 635 int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok)
 636 {
 637         int rc;
 638
 639         EFX_ASSERT_RESET_SERIALISED(efx);
 640
 641         if (method == RESET_TYPE_MCDI_TIMEOUT)
 642                 efx->type->finish_flr(efx);
 643
 644         /* Ensure that SRAM is initialised even if we're disabling the device */
 645         rc = efx->type->init(efx);
 646         if (rc) {
 647                 netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n");
 648                 goto fail;
 649         }
 650
 651         if (!ok)
 652                 goto fail;
 653
 654         if (efx->port_initialized && method != RESET_TYPE_INVISIBLE &&
 655             method != RESET_TYPE_DATAPATH) {
 656                 rc = efx->phy_op->init(efx);
 657                 if (rc)
 658                         goto fail;
 659                 rc = efx->phy_op->reconfigure(efx);
 660                 if (rc && rc != -EPERM)
 661                         netif_err(efx, drv, efx->net_dev,
 662                                   "could not restore PHY settings\n");
 663         }
 664
 665         rc = efx_enable_interrupts(efx);
 666         if (rc)
 667                 goto fail;
 668
 669 #ifdef CONFIG_SFC_SRIOV
 670         rc = efx->type->vswitching_restore(efx);
 671         if (rc) /* not fatal; the PF will still work fine */
 672                 netif_warn(efx, probe, efx->net_dev,
 673                            "failed to restore vswitching rc=%d;"
 674                            " VFs may not function\n", rc);
 675 #endif
 676
 677         if (efx->type->rx_restore_rss_contexts)
 678                 efx->type->rx_restore_rss_contexts(efx);
 679         mutex_unlock(&efx->rss_lock);
 680         efx->type->filter_table_restore(efx);
 681         up_write(&efx->filter_sem);
 682         if (efx->type->sriov_reset)
 683                 efx->type->sriov_reset(efx);
 684
 685         mutex_unlock(&efx->mac_lock);
 686
 687         efx_start_all(efx);
 688
 689         if (efx->type->udp_tnl_push_ports)
 690                 efx->type->udp_tnl_push_ports(efx);
 691
 692         return 0;
 693
 694 fail:
 695         efx->port_initialized = false;
 696
 697         mutex_unlock(&efx->rss_lock);
 698         up_write(&efx->filter_sem);
 699         mutex_unlock(&efx->mac_lock);
 700
 701         return rc;
 702 }
 703
 704 /* Reset the NIC using the specified method.  Note that the reset may
 705  * fail, in which case the card will be left in an unusable state.
 706  *
 707  * Caller must hold the rtnl_lock.
 708  */
 709 int efx_reset(struct efx_nic *efx, enum reset_type method)
 710 {
 711         bool disabled;
 712         int rc, rc2;
 713
 714         netif_info(efx, drv, efx->net_dev, "resetting (%s)\n",
 715                    RESET_TYPE(method));
 716
 717         efx_device_detach_sync(efx);
 718         efx_reset_down(efx, method);
 719
 720         rc = efx->type->reset(efx, method);
 721         if (rc) {
 722                 netif_err(efx, drv, efx->net_dev, "failed to reset hardware\n");
 723                 goto out;
 724         }
 725
 726         /* Clear flags for the scopes we covered.  We assume the NIC and
 727          * driver are now quiescent so that there is no race here.
 728          */
 729         if (method < RESET_TYPE_MAX_METHOD)
 730                 efx->reset_pending &= -(1 << (method + 1));
 731         else /* it doesn't fit into the well-ordered scope hierarchy */
 732                 __clear_bit(method, &efx->reset_pending);
 733
 734         /* Reinitialise bus-mastering, which may have been turned off before
 735          * the reset was scheduled. This is still appropriate, even in the
 736          * RESET_TYPE_DISABLE since this driver generally assumes the hardware
 737          * can respond to requests.
 738          */
 739         pci_set_master(efx->pci_dev);
 740
 741 out:
 742         /* Leave device stopped if necessary */
 743         disabled = rc ||
 744                 method == RESET_TYPE_DISABLE ||
 745                 method == RESET_TYPE_RECOVER_OR_DISABLE;
 746         rc2 = efx_reset_up(efx, method, !disabled);
 747         if (rc2) {
 748                 disabled = true;
 749                 if (!rc)
 750                         rc = rc2;
 751         }
 752
 753         if (disabled) {
 754                 dev_close(efx->net_dev);
 755                 netif_err(efx, drv, efx->net_dev, "has been disabled\n");
 756                 efx->state = STATE_DISABLED;
 757         } else {
 758                 netif_dbg(efx, drv, efx->net_dev, "reset complete\n");
 759                 efx_device_attach_if_not_resetting(efx);
 760         }
 761         return rc;
 762 }
 763
 764 /* The worker thread exists so that code that cannot sleep can
 765  * schedule a reset for later.
 766  */
 767 static void efx_reset_work(struct work_struct *data)
 768 {
 769         struct efx_nic *efx = container_of(data, struct efx_nic, reset_work);
 770         unsigned long pending;
 771         enum reset_type method;
 772
 773         pending = READ_ONCE(efx->reset_pending);
 774         method = fls(pending) - 1;
 775
 776         if (method == RESET_TYPE_MC_BIST)
 777                 efx_wait_for_bist_end(efx);
 778
 779         if ((method == RESET_TYPE_RECOVER_OR_DISABLE ||
 780              method == RESET_TYPE_RECOVER_OR_ALL) &&
 781             efx_try_recovery(efx))
 782                 return;
 783
 784         if (!pending)
 785                 return;
 786
 787         rtnl_lock();
 788
 789         /* We checked the state in efx_schedule_reset() but it may
 790          * have changed by now.  Now that we have the RTNL lock,
 791          * it cannot change again.
 792          */
 793         if (efx->state == STATE_READY)
 794                 (void)efx_reset(efx, method);
 795
 796         rtnl_unlock();
 797 }
 798
 799 void efx_schedule_reset(struct efx_nic *efx, enum reset_type type)
 800 {
 801         enum reset_type method;
 802
 803         if (efx->state == STATE_RECOVERY) {
 804                 netif_dbg(efx, drv, efx->net_dev,
 805                           "recovering: skip scheduling %s reset\n",
 806                           RESET_TYPE(type));
 807                 return;
 808         }
 809
 810         switch (type) {
 811         case RESET_TYPE_INVISIBLE:
 812         case RESET_TYPE_ALL:
 813         case RESET_TYPE_RECOVER_OR_ALL:
 814         case RESET_TYPE_WORLD:
 815         case RESET_TYPE_DISABLE:
 816         case RESET_TYPE_RECOVER_OR_DISABLE:
 817         case RESET_TYPE_DATAPATH:
 818         case RESET_TYPE_MC_BIST:
 819         case RESET_TYPE_MCDI_TIMEOUT:
 820                 method = type;
 821                 netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n",
 822                           RESET_TYPE(method));
 823                 break;
 824         default:
 825                 method = efx->type->map_reset_reason(type);
 826                 netif_dbg(efx, drv, efx->net_dev,
 827                           "scheduling %s reset for %s\n",
 828                           RESET_TYPE(method), RESET_TYPE(type));
 829                 break;
 830         }
 831
 832         set_bit(method, &efx->reset_pending);
 833         smp_mb(); /* ensure we change reset_pending before checking state */
 834
 835         /* If we're not READY then just leave the flags set as the cue
 836          * to abort probing or reschedule the reset later.
 837          */
 838         if (READ_ONCE(efx->state) != STATE_READY)
 839                 return;
 840
 841         /* efx_process_channel() will no longer read events once a
 842          * reset is scheduled. So switch back to poll'd MCDI completions.
 843          */
 844         efx_mcdi_mode_poll(efx);
 845
 846         efx_queue_reset_work(efx);
 847 }
 848
 849 /**************************************************************************
 850  *
 851  * Dummy PHY/MAC operations
 852  *
 853  * Can be used for some unimplemented operations
 854  * Needed so all function pointers are valid and do not have to be tested
 855  * before use
 856  *
 857  **************************************************************************/
 858 int efx_port_dummy_op_int(struct efx_nic *efx)
 859 {
 860         return 0;
 861 }
 862 void efx_port_dummy_op_void(struct efx_nic *efx) {}
 863
 864 static bool efx_port_dummy_op_poll(struct efx_nic *efx)
 865 {
 866         return false;
 867 }
 868
 869 static const struct efx_phy_operations efx_dummy_phy_operations = {
 870         .init            = efx_port_dummy_op_int,
 871         .reconfigure     = efx_port_dummy_op_int,
 872         .poll            = efx_port_dummy_op_poll,
 873         .fini            = efx_port_dummy_op_void,
 874 };
 875
 876 /**************************************************************************
 877  *
 878  * Data housekeeping
 879  *
 880  **************************************************************************/
 881
 882 /* This zeroes out and then fills in the invariants in a struct
 883  * efx_nic (including all sub-structures).
 884  */
 885 int efx_init_struct(struct efx_nic *efx,
 886                     struct pci_dev *pci_dev, struct net_device *net_dev)
 887 {
 888         int rc = -ENOMEM;
 889
 890         /* Initialise common structures */
 891         INIT_LIST_HEAD(&efx->node);
 892         INIT_LIST_HEAD(&efx->secondary_list);
 893         spin_lock_init(&efx->biu_lock);
 894 #ifdef CONFIG_SFC_MTD
 895         INIT_LIST_HEAD(&efx->mtd_list);
 896 #endif
 897         INIT_WORK(&efx->reset_work, efx_reset_work);
 898         INIT_DELAYED_WORK(&efx->monitor_work, efx_monitor);
 899         efx_selftest_async_init(efx);
 900         efx->pci_dev = pci_dev;
 901         efx->msg_enable = debug;
 902         efx->state = STATE_UNINIT;
 903         strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name));
 904
 905         efx->net_dev = net_dev;
 906         efx->rx_prefix_size = efx->type->rx_prefix_size;
 907         efx->rx_ip_align =
 908                 NET_IP_ALIGN ? (efx->rx_prefix_size + NET_IP_ALIGN) % 4 : 0;
 909         efx->rx_packet_hash_offset =
 910                 efx->type->rx_hash_offset - efx->type->rx_prefix_size;
 911         efx->rx_packet_ts_offset =
 912                 efx->type->rx_ts_offset - efx->type->rx_prefix_size;
 913         INIT_LIST_HEAD(&efx->rss_context.list);
 914         mutex_init(&efx->rss_lock);
 915         spin_lock_init(&efx->stats_lock);
 916         efx->vi_stride = EFX_DEFAULT_VI_STRIDE;
 917         efx->num_mac_stats = MC_CMD_MAC_NSTATS;
 918         BUILD_BUG_ON(MC_CMD_MAC_NSTATS - 1 != MC_CMD_MAC_GENERATION_END);
 919         mutex_init(&efx->mac_lock);
 920 #ifdef CONFIG_RFS_ACCEL
 921         mutex_init(&efx->rps_mutex);
 922         spin_lock_init(&efx->rps_hash_lock);
 923         /* Failure to allocate is not fatal, but may degrade ARFS performance */
 924         efx->rps_hash_table = kcalloc(EFX_ARFS_HASH_TABLE_SIZE,
 925                                       sizeof(*efx->rps_hash_table), GFP_KERNEL);
 926 #endif
 927         efx->phy_op = &efx_dummy_phy_operations;
 928         efx->mdio.dev = net_dev;
 929         INIT_WORK(&efx->mac_work, efx_mac_work);
 930         init_waitqueue_head(&efx->flush_wq);
 931
 932         rc = efx_init_channels(efx);
 933         if (rc)
 934                 goto fail;
 935
 936         /* Would be good to use the net_dev name, but we're too early */
 937         snprintf(efx->workqueue_name, sizeof(efx->workqueue_name), "sfc%s",
 938                  pci_name(pci_dev));
 939         efx->workqueue = create_singlethread_workqueue(efx->workqueue_name);
 940         if (!efx->workqueue) {
 941                 rc = -ENOMEM;
 942                 goto fail;
 943         }
 944
 945         return 0;
 946
 947 fail:
 948         efx_fini_struct(efx);
 949         return rc;
 950 }
 951
 952 void efx_fini_struct(struct efx_nic *efx)
 953 {
 954 #ifdef CONFIG_RFS_ACCEL
 955         kfree(efx->rps_hash_table);
 956 #endif
 957
 958         efx_fini_channels(efx);
 959
 960         kfree(efx->vpd_sn);
 961
 962         if (efx->workqueue) {
 963                 destroy_workqueue(efx->workqueue);
 964                 efx->workqueue = NULL;
 965         }
 966 }
 967
 968 /* This configures the PCI device to enable I/O and DMA. */
 969 int efx_init_io(struct efx_nic *efx, int bar, dma_addr_t dma_mask,
 970                 unsigned int mem_map_size)
 971 {
 972         struct pci_dev *pci_dev = efx->pci_dev;
 973         int rc;
 974
 975         netif_dbg(efx, probe, efx->net_dev, "initialising I/O\n");
 976
 977         rc = pci_enable_device(pci_dev);
 978         if (rc) {
 979                 netif_err(efx, probe, efx->net_dev,
 980                           "failed to enable PCI device\n");
 981                 goto fail1;
 982         }
 983
 984         pci_set_master(pci_dev);
 985
 986         /* Set the PCI DMA mask.  Try all possibilities from our
 987          * genuine mask down to 32 bits, because some architectures
 988          * (e.g. x86_64 with iommu_sac_force set) will allow 40 bit
 989          * masks event though they reject 46 bit masks.
 990          */
 991         while (dma_mask > 0x7fffffffUL) {
 992                 rc = dma_set_mask_and_coherent(&pci_dev->dev, dma_mask);
 993                 if (rc == 0)
 994                         break;
 995                 dma_mask >>= 1;
 996         }
 997         if (rc) {
 998                 netif_err(efx, probe, efx->net_dev,
 999                           "could not find a suitable DMA mask\n");
1000                 goto fail2;
1001         }
1002         netif_dbg(efx, probe, efx->net_dev,
1003                   "using DMA mask %llx\n", (unsigned long long)dma_mask);
1004
1005         efx->membase_phys = pci_resource_start(efx->pci_dev, bar);
1006         if (!efx->membase_phys) {
1007                 netif_err(efx, probe, efx->net_dev,
1008                           "ERROR: No BAR%d mapping from the BIOS. "
1009                           "Try pci=realloc on the kernel command line\n", bar);
1010                 rc = -ENODEV;
1011                 goto fail3;
1012         }
1013
1014         rc = pci_request_region(pci_dev, bar, "sfc");
1015         if (rc) {
1016                 netif_err(efx, probe, efx->net_dev,
1017                           "request for memory BAR failed\n");
1018                 rc = -EIO;
1019                 goto fail3;
1020         }
1021
1022         efx->membase = ioremap(efx->membase_phys, mem_map_size);
1023         if (!efx->membase) {
1024                 netif_err(efx, probe, efx->net_dev,
1025                           "could not map memory BAR at %llx+%x\n",
1026                           (unsigned long long)efx->membase_phys, mem_map_size);
1027                 rc = -ENOMEM;
1028                 goto fail4;
1029         }
1030         netif_dbg(efx, probe, efx->net_dev,
1031                   "memory BAR at %llx+%x (virtual %p)\n",
1032                   (unsigned long long)efx->membase_phys, mem_map_size,
1033                   efx->membase);
1034
1035         return 0;
1036
1037 fail4:
1038         pci_release_region(efx->pci_dev, bar);
1039 fail3:
1040         efx->membase_phys = 0;
1041 fail2:
1042         pci_disable_device(efx->pci_dev);
1043 fail1:
1044         return rc;
1045 }
1046
1047 void efx_fini_io(struct efx_nic *efx, int bar)
1048 {
1049         netif_dbg(efx, drv, efx->net_dev, "shutting down I/O\n");
1050
1051         if (efx->membase) {
1052                 iounmap(efx->membase);
1053                 efx->membase = NULL;
1054         }
1055
1056         if (efx->membase_phys) {
1057                 pci_release_region(efx->pci_dev, bar);
1058                 efx->membase_phys = 0;
1059         }
1060
1061         /* Don't disable bus-mastering if VFs are assigned */
1062         if (!pci_vfs_assigned(efx->pci_dev))
1063                 pci_disable_device(efx->pci_dev);
1064 }
1065
1066 #ifdef CONFIG_SFC_MCDI_LOGGING
1067 static ssize_t show_mcdi_log(struct device *dev, struct device_attribute *attr,
1068                              char *buf)
1069 {
1070         struct efx_nic *efx = dev_get_drvdata(dev);
1071         struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
1072
1073         return scnprintf(buf, PAGE_SIZE, "%d\n", mcdi->logging_enabled);
1074 }
1075
1076 static ssize_t set_mcdi_log(struct device *dev, struct device_attribute *attr,
1077                             const char *buf, size_t count)
1078 {
1079         struct efx_nic *efx = dev_get_drvdata(dev);
1080         struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
1081         bool enable = count > 0 && *buf != '0';
1082
1083         mcdi->logging_enabled = enable;
1084         return count;
1085 }
1086
1087 static DEVICE_ATTR(mcdi_logging, 0644, show_mcdi_log, set_mcdi_log);
1088
1089 void efx_init_mcdi_logging(struct efx_nic *efx)
1090 {
1091         int rc = device_create_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging);
1092
1093         if (rc) {
1094                 netif_warn(efx, drv, efx->net_dev,
1095                            "failed to init net dev attributes\n");
1096         }
1097 }
1098
1099 void efx_fini_mcdi_logging(struct efx_nic *efx)
1100 {
1101         device_remove_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging);
1102 }
1103 #endif