1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2018-2023, Intel Corporation. */
4 /* Intel(R) Ethernet Connection E800 Series Linux Driver */
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
8 #include <generated/utsrelease.h>
9 #include <linux/crash_dump.h>
14 #include "ice_dcb_lib.h"
15 #include "ice_dcb_nl.h"
16 #include "devlink/devlink.h"
17 #include "devlink/devlink_port.h"
18 #include "ice_sf_eth.h"
19 #include "ice_hwmon.h"
20 /* Including ice_trace.h with CREATE_TRACE_POINTS defined will generate the
21 * ice tracepoint functions. This must be done exactly once across the
24 #define CREATE_TRACE_POINTS
25 #include "ice_trace.h"
26 #include "ice_eswitch.h"
27 #include "ice_tc_lib.h"
28 #include "ice_vsi_vlan_ops.h"
29 #include <net/xdp_sock_drv.h>
31 #define DRV_SUMMARY "Intel(R) Ethernet Connection E800 Series Linux Driver"
32 static const char ice_driver_string
[] = DRV_SUMMARY
;
33 static const char ice_copyright
[] = "Copyright (c) 2018, Intel Corporation.";
35 /* DDP Package file located in firmware search paths (e.g. /lib/firmware/) */
36 #define ICE_DDP_PKG_PATH "intel/ice/ddp/"
37 #define ICE_DDP_PKG_FILE ICE_DDP_PKG_PATH "ice.pkg"
39 MODULE_DESCRIPTION(DRV_SUMMARY
);
40 MODULE_IMPORT_NS("LIBIE");
41 MODULE_LICENSE("GPL v2");
42 MODULE_FIRMWARE(ICE_DDP_PKG_FILE
);
44 static int debug
= -1;
45 module_param(debug
, int, 0644);
46 #ifndef CONFIG_DYNAMIC_DEBUG
47 MODULE_PARM_DESC(debug
, "netif level (0=none,...,16=all), hw debug_mask (0x8XXXXXXX)");
49 MODULE_PARM_DESC(debug
, "netif level (0=none,...,16=all)");
50 #endif /* !CONFIG_DYNAMIC_DEBUG */
52 DEFINE_STATIC_KEY_FALSE(ice_xdp_locking_key
);
53 EXPORT_SYMBOL(ice_xdp_locking_key
);
56 * ice_hw_to_dev - Get device pointer from the hardware structure
57 * @hw: pointer to the device HW structure
59 * Used to access the device pointer from compilation units which can't easily
60 * include the definition of struct ice_pf without leading to circular header
63 struct device
*ice_hw_to_dev(struct ice_hw
*hw
)
65 struct ice_pf
*pf
= container_of(hw
, struct ice_pf
, hw
);
67 return &pf
->pdev
->dev
;
70 static struct workqueue_struct
*ice_wq
;
71 struct workqueue_struct
*ice_lag_wq
;
72 static const struct net_device_ops ice_netdev_safe_mode_ops
;
73 static const struct net_device_ops ice_netdev_ops
;
75 static void ice_rebuild(struct ice_pf
*pf
, enum ice_reset_req reset_type
);
77 static void ice_vsi_release_all(struct ice_pf
*pf
);
79 static int ice_rebuild_channels(struct ice_pf
*pf
);
80 static void ice_remove_q_channels(struct ice_vsi
*vsi
, bool rem_adv_fltr
);
83 ice_indr_setup_tc_cb(struct net_device
*netdev
, struct Qdisc
*sch
,
84 void *cb_priv
, enum tc_setup_type type
, void *type_data
,
86 void (*cleanup
)(struct flow_block_cb
*block_cb
));
88 bool netif_is_ice(const struct net_device
*dev
)
90 return dev
&& (dev
->netdev_ops
== &ice_netdev_ops
||
91 dev
->netdev_ops
== &ice_netdev_safe_mode_ops
);
95 * ice_get_tx_pending - returns number of Tx descriptors not processed
96 * @ring: the ring of descriptors
98 static u16
ice_get_tx_pending(struct ice_tx_ring
*ring
)
102 head
= ring
->next_to_clean
;
103 tail
= ring
->next_to_use
;
106 return (head
< tail
) ?
107 tail
- head
: (tail
+ ring
->count
- head
);
112 * ice_check_for_hang_subtask - check for and recover hung queues
113 * @pf: pointer to PF struct
115 static void ice_check_for_hang_subtask(struct ice_pf
*pf
)
117 struct ice_vsi
*vsi
= NULL
;
123 ice_for_each_vsi(pf
, v
)
124 if (pf
->vsi
[v
] && pf
->vsi
[v
]->type
== ICE_VSI_PF
) {
129 if (!vsi
|| test_bit(ICE_VSI_DOWN
, vsi
->state
))
132 if (!(vsi
->netdev
&& netif_carrier_ok(vsi
->netdev
)))
137 ice_for_each_txq(vsi
, i
) {
138 struct ice_tx_ring
*tx_ring
= vsi
->tx_rings
[i
];
139 struct ice_ring_stats
*ring_stats
;
143 if (ice_ring_ch_enabled(tx_ring
))
146 ring_stats
= tx_ring
->ring_stats
;
151 /* If packet counter has not changed the queue is
152 * likely stalled, so force an interrupt for this
155 * prev_pkt would be negative if there was no
158 packets
= ring_stats
->stats
.pkts
& INT_MAX
;
159 if (ring_stats
->tx_stats
.prev_pkt
== packets
) {
160 /* Trigger sw interrupt to revive the queue */
161 ice_trigger_sw_intr(hw
, tx_ring
->q_vector
);
165 /* Memory barrier between read of packet count and call
166 * to ice_get_tx_pending()
169 ring_stats
->tx_stats
.prev_pkt
=
170 ice_get_tx_pending(tx_ring
) ? packets
: -1;
176 * ice_init_mac_fltr - Set initial MAC filters
177 * @pf: board private structure
179 * Set initial set of MAC filters for PF VSI; configure filters for permanent
180 * address and broadcast address. If an error is encountered, netdevice will be
183 static int ice_init_mac_fltr(struct ice_pf
*pf
)
188 vsi
= ice_get_main_vsi(pf
);
192 perm_addr
= vsi
->port_info
->mac
.perm_addr
;
193 return ice_fltr_add_mac_and_broadcast(vsi
, perm_addr
, ICE_FWD_TO_VSI
);
197 * ice_add_mac_to_sync_list - creates list of MAC addresses to be synced
198 * @netdev: the net device on which the sync is happening
199 * @addr: MAC address to sync
201 * This is a callback function which is called by the in kernel device sync
202 * functions (like __dev_uc_sync, __dev_mc_sync, etc). This function only
203 * populates the tmp_sync_list, which is later used by ice_add_mac to add the
204 * MAC filters from the hardware.
206 static int ice_add_mac_to_sync_list(struct net_device
*netdev
, const u8
*addr
)
208 struct ice_netdev_priv
*np
= netdev_priv(netdev
);
209 struct ice_vsi
*vsi
= np
->vsi
;
211 if (ice_fltr_add_mac_to_list(vsi
, &vsi
->tmp_sync_list
, addr
,
219 * ice_add_mac_to_unsync_list - creates list of MAC addresses to be unsynced
220 * @netdev: the net device on which the unsync is happening
221 * @addr: MAC address to unsync
223 * This is a callback function which is called by the in kernel device unsync
224 * functions (like __dev_uc_unsync, __dev_mc_unsync, etc). This function only
225 * populates the tmp_unsync_list, which is later used by ice_remove_mac to
226 * delete the MAC filters from the hardware.
228 static int ice_add_mac_to_unsync_list(struct net_device
*netdev
, const u8
*addr
)
230 struct ice_netdev_priv
*np
= netdev_priv(netdev
);
231 struct ice_vsi
*vsi
= np
->vsi
;
233 /* Under some circumstances, we might receive a request to delete our
234 * own device address from our uc list. Because we store the device
235 * address in the VSI's MAC filter list, we need to ignore such
236 * requests and not delete our device address from this list.
238 if (ether_addr_equal(addr
, netdev
->dev_addr
))
241 if (ice_fltr_add_mac_to_list(vsi
, &vsi
->tmp_unsync_list
, addr
,
249 * ice_vsi_fltr_changed - check if filter state changed
250 * @vsi: VSI to be checked
252 * returns true if filter state has changed, false otherwise.
254 static bool ice_vsi_fltr_changed(struct ice_vsi
*vsi
)
256 return test_bit(ICE_VSI_UMAC_FLTR_CHANGED
, vsi
->state
) ||
257 test_bit(ICE_VSI_MMAC_FLTR_CHANGED
, vsi
->state
);
261 * ice_set_promisc - Enable promiscuous mode for a given PF
262 * @vsi: the VSI being configured
263 * @promisc_m: mask of promiscuous config bits
266 static int ice_set_promisc(struct ice_vsi
*vsi
, u8 promisc_m
)
270 if (vsi
->type
!= ICE_VSI_PF
)
273 if (ice_vsi_has_non_zero_vlans(vsi
)) {
274 promisc_m
|= (ICE_PROMISC_VLAN_RX
| ICE_PROMISC_VLAN_TX
);
275 status
= ice_fltr_set_vlan_vsi_promisc(&vsi
->back
->hw
, vsi
,
278 status
= ice_fltr_set_vsi_promisc(&vsi
->back
->hw
, vsi
->idx
,
281 if (status
&& status
!= -EEXIST
)
284 netdev_dbg(vsi
->netdev
, "set promisc filter bits for VSI %i: 0x%x\n",
285 vsi
->vsi_num
, promisc_m
);
290 * ice_clear_promisc - Disable promiscuous mode for a given PF
291 * @vsi: the VSI being configured
292 * @promisc_m: mask of promiscuous config bits
295 static int ice_clear_promisc(struct ice_vsi
*vsi
, u8 promisc_m
)
299 if (vsi
->type
!= ICE_VSI_PF
)
302 if (ice_vsi_has_non_zero_vlans(vsi
)) {
303 promisc_m
|= (ICE_PROMISC_VLAN_RX
| ICE_PROMISC_VLAN_TX
);
304 status
= ice_fltr_clear_vlan_vsi_promisc(&vsi
->back
->hw
, vsi
,
307 status
= ice_fltr_clear_vsi_promisc(&vsi
->back
->hw
, vsi
->idx
,
311 netdev_dbg(vsi
->netdev
, "clear promisc filter bits for VSI %i: 0x%x\n",
312 vsi
->vsi_num
, promisc_m
);
317 * ice_vsi_sync_fltr - Update the VSI filter list to the HW
318 * @vsi: ptr to the VSI
320 * Push any outstanding VSI filter changes through the AdminQ.
322 static int ice_vsi_sync_fltr(struct ice_vsi
*vsi
)
324 struct ice_vsi_vlan_ops
*vlan_ops
= ice_get_compat_vsi_vlan_ops(vsi
);
325 struct device
*dev
= ice_pf_to_dev(vsi
->back
);
326 struct net_device
*netdev
= vsi
->netdev
;
327 bool promisc_forced_on
= false;
328 struct ice_pf
*pf
= vsi
->back
;
329 struct ice_hw
*hw
= &pf
->hw
;
330 u32 changed_flags
= 0;
336 while (test_and_set_bit(ICE_CFG_BUSY
, vsi
->state
))
337 usleep_range(1000, 2000);
339 changed_flags
= vsi
->current_netdev_flags
^ vsi
->netdev
->flags
;
340 vsi
->current_netdev_flags
= vsi
->netdev
->flags
;
342 INIT_LIST_HEAD(&vsi
->tmp_sync_list
);
343 INIT_LIST_HEAD(&vsi
->tmp_unsync_list
);
345 if (ice_vsi_fltr_changed(vsi
)) {
346 clear_bit(ICE_VSI_UMAC_FLTR_CHANGED
, vsi
->state
);
347 clear_bit(ICE_VSI_MMAC_FLTR_CHANGED
, vsi
->state
);
349 /* grab the netdev's addr_list_lock */
350 netif_addr_lock_bh(netdev
);
351 __dev_uc_sync(netdev
, ice_add_mac_to_sync_list
,
352 ice_add_mac_to_unsync_list
);
353 __dev_mc_sync(netdev
, ice_add_mac_to_sync_list
,
354 ice_add_mac_to_unsync_list
);
355 /* our temp lists are populated. release lock */
356 netif_addr_unlock_bh(netdev
);
359 /* Remove MAC addresses in the unsync list */
360 err
= ice_fltr_remove_mac_list(vsi
, &vsi
->tmp_unsync_list
);
361 ice_fltr_free_list(dev
, &vsi
->tmp_unsync_list
);
363 netdev_err(netdev
, "Failed to delete MAC filters\n");
364 /* if we failed because of alloc failures, just bail */
369 /* Add MAC addresses in the sync list */
370 err
= ice_fltr_add_mac_list(vsi
, &vsi
->tmp_sync_list
);
371 ice_fltr_free_list(dev
, &vsi
->tmp_sync_list
);
372 /* If filter is added successfully or already exists, do not go into
373 * 'if' condition and report it as error. Instead continue processing
374 * rest of the function.
376 if (err
&& err
!= -EEXIST
) {
377 netdev_err(netdev
, "Failed to add MAC filters\n");
378 /* If there is no more space for new umac filters, VSI
379 * should go into promiscuous mode. There should be some
380 * space reserved for promiscuous filters.
382 if (hw
->adminq
.sq_last_status
== ICE_AQ_RC_ENOSPC
&&
383 !test_and_set_bit(ICE_FLTR_OVERFLOW_PROMISC
,
385 promisc_forced_on
= true;
386 netdev_warn(netdev
, "Reached MAC filter limit, forcing promisc mode on VSI %d\n",
393 /* check for changes in promiscuous modes */
394 if (changed_flags
& IFF_ALLMULTI
) {
395 if (vsi
->current_netdev_flags
& IFF_ALLMULTI
) {
396 err
= ice_set_promisc(vsi
, ICE_MCAST_PROMISC_BITS
);
398 vsi
->current_netdev_flags
&= ~IFF_ALLMULTI
;
402 /* !(vsi->current_netdev_flags & IFF_ALLMULTI) */
403 err
= ice_clear_promisc(vsi
, ICE_MCAST_PROMISC_BITS
);
405 vsi
->current_netdev_flags
|= IFF_ALLMULTI
;
411 if (((changed_flags
& IFF_PROMISC
) || promisc_forced_on
) ||
412 test_bit(ICE_VSI_PROMISC_CHANGED
, vsi
->state
)) {
413 clear_bit(ICE_VSI_PROMISC_CHANGED
, vsi
->state
);
414 if (vsi
->current_netdev_flags
& IFF_PROMISC
) {
415 /* Apply Rx filter rule to get traffic from wire */
416 if (!ice_is_dflt_vsi_in_use(vsi
->port_info
)) {
417 err
= ice_set_dflt_vsi(vsi
);
418 if (err
&& err
!= -EEXIST
) {
419 netdev_err(netdev
, "Error %d setting default VSI %i Rx rule\n",
421 vsi
->current_netdev_flags
&=
426 vlan_ops
->dis_rx_filtering(vsi
);
428 /* promiscuous mode implies allmulticast so
429 * that VSIs that are in promiscuous mode are
430 * subscribed to multicast packets coming to
433 err
= ice_set_promisc(vsi
,
434 ICE_MCAST_PROMISC_BITS
);
439 /* Clear Rx filter to remove traffic from wire */
440 if (ice_is_vsi_dflt_vsi(vsi
)) {
441 err
= ice_clear_dflt_vsi(vsi
);
443 netdev_err(netdev
, "Error %d clearing default VSI %i Rx rule\n",
445 vsi
->current_netdev_flags
|=
449 if (vsi
->netdev
->features
&
450 NETIF_F_HW_VLAN_CTAG_FILTER
)
451 vlan_ops
->ena_rx_filtering(vsi
);
454 /* disable allmulti here, but only if allmulti is not
455 * still enabled for the netdev
457 if (!(vsi
->current_netdev_flags
& IFF_ALLMULTI
)) {
458 err
= ice_clear_promisc(vsi
,
459 ICE_MCAST_PROMISC_BITS
);
461 netdev_err(netdev
, "Error %d clearing multicast promiscuous on VSI %i\n",
470 set_bit(ICE_VSI_PROMISC_CHANGED
, vsi
->state
);
473 /* if something went wrong then set the changed flag so we try again */
474 set_bit(ICE_VSI_UMAC_FLTR_CHANGED
, vsi
->state
);
475 set_bit(ICE_VSI_MMAC_FLTR_CHANGED
, vsi
->state
);
477 clear_bit(ICE_CFG_BUSY
, vsi
->state
);
482 * ice_sync_fltr_subtask - Sync the VSI filter list with HW
483 * @pf: board private structure
485 static void ice_sync_fltr_subtask(struct ice_pf
*pf
)
489 if (!pf
|| !(test_bit(ICE_FLAG_FLTR_SYNC
, pf
->flags
)))
492 clear_bit(ICE_FLAG_FLTR_SYNC
, pf
->flags
);
494 ice_for_each_vsi(pf
, v
)
495 if (pf
->vsi
[v
] && ice_vsi_fltr_changed(pf
->vsi
[v
]) &&
496 ice_vsi_sync_fltr(pf
->vsi
[v
])) {
497 /* come back and try again later */
498 set_bit(ICE_FLAG_FLTR_SYNC
, pf
->flags
);
504 * ice_pf_dis_all_vsi - Pause all VSIs on a PF
506 * @locked: is the rtnl_lock already held
508 static void ice_pf_dis_all_vsi(struct ice_pf
*pf
, bool locked
)
513 ice_for_each_vsi(pf
, v
)
515 ice_dis_vsi(pf
->vsi
[v
], locked
);
517 for (node
= 0; node
< ICE_MAX_PF_AGG_NODES
; node
++)
518 pf
->pf_agg_node
[node
].num_vsis
= 0;
520 for (node
= 0; node
< ICE_MAX_VF_AGG_NODES
; node
++)
521 pf
->vf_agg_node
[node
].num_vsis
= 0;
525 * ice_prepare_for_reset - prep for reset
526 * @pf: board private structure
527 * @reset_type: reset type requested
529 * Inform or close all dependent features in prep for reset.
532 ice_prepare_for_reset(struct ice_pf
*pf
, enum ice_reset_req reset_type
)
534 struct ice_hw
*hw
= &pf
->hw
;
539 dev_dbg(ice_pf_to_dev(pf
), "reset_type=%d\n", reset_type
);
541 /* already prepared for reset */
542 if (test_bit(ICE_PREPARED_FOR_RESET
, pf
->state
))
545 synchronize_irq(pf
->oicr_irq
.virq
);
547 ice_unplug_aux_dev(pf
);
549 /* Notify VFs of impending reset */
550 if (ice_check_sq_alive(hw
, &hw
->mailboxq
))
551 ice_vc_notify_reset(pf
);
553 /* Disable VFs until reset is completed */
554 mutex_lock(&pf
->vfs
.table_lock
);
555 ice_for_each_vf(pf
, bkt
, vf
)
556 ice_set_vf_state_dis(vf
);
557 mutex_unlock(&pf
->vfs
.table_lock
);
559 if (ice_is_eswitch_mode_switchdev(pf
)) {
561 ice_eswitch_br_fdb_flush(pf
->eswitch
.br_offloads
->bridge
);
565 /* release ADQ specific HW and SW resources */
566 vsi
= ice_get_main_vsi(pf
);
570 /* to be on safe side, reset orig_rss_size so that normal flow
571 * of deciding rss_size can take precedence
573 vsi
->orig_rss_size
= 0;
575 if (test_bit(ICE_FLAG_TC_MQPRIO
, pf
->flags
)) {
576 if (reset_type
== ICE_RESET_PFR
) {
577 vsi
->old_ena_tc
= vsi
->all_enatc
;
578 vsi
->old_numtc
= vsi
->all_numtc
;
580 ice_remove_q_channels(vsi
, true);
582 /* for other reset type, do not support channel rebuild
583 * hence reset needed info
591 clear_bit(ICE_FLAG_TC_MQPRIO
, pf
->flags
);
592 memset(&vsi
->mqprio_qopt
, 0, sizeof(vsi
->mqprio_qopt
));
597 netif_device_detach(vsi
->netdev
);
600 /* clear SW filtering DB */
601 ice_clear_hw_tbls(hw
);
602 /* disable the VSIs and their queues that are not already DOWN */
603 set_bit(ICE_VSI_REBUILD_PENDING
, ice_get_main_vsi(pf
)->state
);
604 ice_pf_dis_all_vsi(pf
, false);
606 if (test_bit(ICE_FLAG_PTP_SUPPORTED
, pf
->flags
))
607 ice_ptp_prepare_for_reset(pf
, reset_type
);
609 if (ice_is_feature_supported(pf
, ICE_F_GNSS
))
613 ice_sched_clear_port(hw
->port_info
);
615 ice_shutdown_all_ctrlq(hw
, false);
617 set_bit(ICE_PREPARED_FOR_RESET
, pf
->state
);
621 * ice_do_reset - Initiate one of many types of resets
622 * @pf: board private structure
623 * @reset_type: reset type requested before this function was called.
625 static void ice_do_reset(struct ice_pf
*pf
, enum ice_reset_req reset_type
)
627 struct device
*dev
= ice_pf_to_dev(pf
);
628 struct ice_hw
*hw
= &pf
->hw
;
630 dev_dbg(dev
, "reset_type 0x%x requested\n", reset_type
);
632 if (pf
->lag
&& pf
->lag
->bonded
&& reset_type
== ICE_RESET_PFR
) {
633 dev_dbg(dev
, "PFR on a bonded interface, promoting to CORER\n");
634 reset_type
= ICE_RESET_CORER
;
637 ice_prepare_for_reset(pf
, reset_type
);
639 /* trigger the reset */
640 if (ice_reset(hw
, reset_type
)) {
641 dev_err(dev
, "reset %d failed\n", reset_type
);
642 set_bit(ICE_RESET_FAILED
, pf
->state
);
643 clear_bit(ICE_RESET_OICR_RECV
, pf
->state
);
644 clear_bit(ICE_PREPARED_FOR_RESET
, pf
->state
);
645 clear_bit(ICE_PFR_REQ
, pf
->state
);
646 clear_bit(ICE_CORER_REQ
, pf
->state
);
647 clear_bit(ICE_GLOBR_REQ
, pf
->state
);
648 wake_up(&pf
->reset_wait_queue
);
652 /* PFR is a bit of a special case because it doesn't result in an OICR
653 * interrupt. So for PFR, rebuild after the reset and clear the reset-
654 * associated state bits.
656 if (reset_type
== ICE_RESET_PFR
) {
658 ice_rebuild(pf
, reset_type
);
659 clear_bit(ICE_PREPARED_FOR_RESET
, pf
->state
);
660 clear_bit(ICE_PFR_REQ
, pf
->state
);
661 wake_up(&pf
->reset_wait_queue
);
662 ice_reset_all_vfs(pf
);
667 * ice_reset_subtask - Set up for resetting the device and driver
668 * @pf: board private structure
670 static void ice_reset_subtask(struct ice_pf
*pf
)
672 enum ice_reset_req reset_type
= ICE_RESET_INVAL
;
674 /* When a CORER/GLOBR/EMPR is about to happen, the hardware triggers an
675 * OICR interrupt. The OICR handler (ice_misc_intr) determines what type
676 * of reset is pending and sets bits in pf->state indicating the reset
677 * type and ICE_RESET_OICR_RECV. So, if the latter bit is set
678 * prepare for pending reset if not already (for PF software-initiated
679 * global resets the software should already be prepared for it as
680 * indicated by ICE_PREPARED_FOR_RESET; for global resets initiated
681 * by firmware or software on other PFs, that bit is not set so prepare
682 * for the reset now), poll for reset done, rebuild and return.
684 if (test_bit(ICE_RESET_OICR_RECV
, pf
->state
)) {
685 /* Perform the largest reset requested */
686 if (test_and_clear_bit(ICE_CORER_RECV
, pf
->state
))
687 reset_type
= ICE_RESET_CORER
;
688 if (test_and_clear_bit(ICE_GLOBR_RECV
, pf
->state
))
689 reset_type
= ICE_RESET_GLOBR
;
690 if (test_and_clear_bit(ICE_EMPR_RECV
, pf
->state
))
691 reset_type
= ICE_RESET_EMPR
;
692 /* return if no valid reset type requested */
693 if (reset_type
== ICE_RESET_INVAL
)
695 ice_prepare_for_reset(pf
, reset_type
);
697 /* make sure we are ready to rebuild */
698 if (ice_check_reset(&pf
->hw
)) {
699 set_bit(ICE_RESET_FAILED
, pf
->state
);
701 /* done with reset. start rebuild */
702 pf
->hw
.reset_ongoing
= false;
703 ice_rebuild(pf
, reset_type
);
704 /* clear bit to resume normal operations, but
705 * ICE_NEEDS_RESTART bit is set in case rebuild failed
707 clear_bit(ICE_RESET_OICR_RECV
, pf
->state
);
708 clear_bit(ICE_PREPARED_FOR_RESET
, pf
->state
);
709 clear_bit(ICE_PFR_REQ
, pf
->state
);
710 clear_bit(ICE_CORER_REQ
, pf
->state
);
711 clear_bit(ICE_GLOBR_REQ
, pf
->state
);
712 wake_up(&pf
->reset_wait_queue
);
713 ice_reset_all_vfs(pf
);
719 /* No pending resets to finish processing. Check for new resets */
720 if (test_bit(ICE_PFR_REQ
, pf
->state
)) {
721 reset_type
= ICE_RESET_PFR
;
722 if (pf
->lag
&& pf
->lag
->bonded
) {
723 dev_dbg(ice_pf_to_dev(pf
), "PFR on a bonded interface, promoting to CORER\n");
724 reset_type
= ICE_RESET_CORER
;
727 if (test_bit(ICE_CORER_REQ
, pf
->state
))
728 reset_type
= ICE_RESET_CORER
;
729 if (test_bit(ICE_GLOBR_REQ
, pf
->state
))
730 reset_type
= ICE_RESET_GLOBR
;
731 /* If no valid reset type requested just return */
732 if (reset_type
== ICE_RESET_INVAL
)
735 /* reset if not already down or busy */
736 if (!test_bit(ICE_DOWN
, pf
->state
) &&
737 !test_bit(ICE_CFG_BUSY
, pf
->state
)) {
738 ice_do_reset(pf
, reset_type
);
743 * ice_print_topo_conflict - print topology conflict message
744 * @vsi: the VSI whose topology status is being checked
746 static void ice_print_topo_conflict(struct ice_vsi
*vsi
)
748 switch (vsi
->port_info
->phy
.link_info
.topo_media_conflict
) {
749 case ICE_AQ_LINK_TOPO_CONFLICT
:
750 case ICE_AQ_LINK_MEDIA_CONFLICT
:
751 case ICE_AQ_LINK_TOPO_UNREACH_PRT
:
752 case ICE_AQ_LINK_TOPO_UNDRUTIL_PRT
:
753 case ICE_AQ_LINK_TOPO_UNDRUTIL_MEDIA
:
754 netdev_info(vsi
->netdev
, "Potential misconfiguration of the Ethernet port detected. If it was not intended, please use the Intel (R) Ethernet Port Configuration Tool to address the issue.\n");
756 case ICE_AQ_LINK_TOPO_UNSUPP_MEDIA
:
757 if (test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA
, vsi
->back
->flags
))
758 netdev_warn(vsi
->netdev
, "An unsupported module type was detected. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules\n");
760 netdev_err(vsi
->netdev
, "Rx/Tx is disabled on this device because an unsupported module type was detected. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
768 * ice_print_link_msg - print link up or down message
769 * @vsi: the VSI whose link status is being queried
770 * @isup: boolean for if the link is now up or down
772 void ice_print_link_msg(struct ice_vsi
*vsi
, bool isup
)
774 struct ice_aqc_get_phy_caps_data
*caps
;
775 const char *an_advertised
;
786 if (vsi
->current_isup
== isup
)
789 vsi
->current_isup
= isup
;
792 netdev_info(vsi
->netdev
, "NIC Link is Down\n");
796 switch (vsi
->port_info
->phy
.link_info
.link_speed
) {
797 case ICE_AQ_LINK_SPEED_200GB
:
800 case ICE_AQ_LINK_SPEED_100GB
:
803 case ICE_AQ_LINK_SPEED_50GB
:
806 case ICE_AQ_LINK_SPEED_40GB
:
809 case ICE_AQ_LINK_SPEED_25GB
:
812 case ICE_AQ_LINK_SPEED_20GB
:
815 case ICE_AQ_LINK_SPEED_10GB
:
818 case ICE_AQ_LINK_SPEED_5GB
:
821 case ICE_AQ_LINK_SPEED_2500MB
:
824 case ICE_AQ_LINK_SPEED_1000MB
:
827 case ICE_AQ_LINK_SPEED_100MB
:
835 switch (vsi
->port_info
->fc
.current_mode
) {
839 case ICE_FC_TX_PAUSE
:
842 case ICE_FC_RX_PAUSE
:
853 /* Get FEC mode based on negotiated link info */
854 switch (vsi
->port_info
->phy
.link_info
.fec_info
) {
855 case ICE_AQ_LINK_25G_RS_528_FEC_EN
:
856 case ICE_AQ_LINK_25G_RS_544_FEC_EN
:
859 case ICE_AQ_LINK_25G_KR_FEC_EN
:
860 fec
= "FC-FEC/BASE-R";
867 /* check if autoneg completed, might be false due to not supported */
868 if (vsi
->port_info
->phy
.link_info
.an_info
& ICE_AQ_AN_COMPLETED
)
873 /* Get FEC mode requested based on PHY caps last SW configuration */
874 caps
= kzalloc(sizeof(*caps
), GFP_KERNEL
);
877 an_advertised
= "Unknown";
881 status
= ice_aq_get_phy_caps(vsi
->port_info
, false,
882 ICE_AQC_REPORT_ACTIVE_CFG
, caps
, NULL
);
884 netdev_info(vsi
->netdev
, "Get phy capability failed.\n");
886 an_advertised
= ice_is_phy_caps_an_enabled(caps
) ? "On" : "Off";
888 if (caps
->link_fec_options
& ICE_AQC_PHY_FEC_25G_RS_528_REQ
||
889 caps
->link_fec_options
& ICE_AQC_PHY_FEC_25G_RS_544_REQ
)
891 else if (caps
->link_fec_options
& ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ
||
892 caps
->link_fec_options
& ICE_AQC_PHY_FEC_25G_KR_REQ
)
893 fec_req
= "FC-FEC/BASE-R";
900 netdev_info(vsi
->netdev
, "NIC Link is up %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg Advertised: %s, Autoneg Negotiated: %s, Flow Control: %s\n",
901 speed
, fec_req
, fec
, an_advertised
, an
, fc
);
902 ice_print_topo_conflict(vsi
);
906 * ice_vsi_link_event - update the VSI's netdev
907 * @vsi: the VSI on which the link event occurred
908 * @link_up: whether or not the VSI needs to be set up or down
910 static void ice_vsi_link_event(struct ice_vsi
*vsi
, bool link_up
)
915 if (test_bit(ICE_VSI_DOWN
, vsi
->state
) || !vsi
->netdev
)
918 if (vsi
->type
== ICE_VSI_PF
) {
919 if (link_up
== netif_carrier_ok(vsi
->netdev
))
923 netif_carrier_on(vsi
->netdev
);
924 netif_tx_wake_all_queues(vsi
->netdev
);
926 netif_carrier_off(vsi
->netdev
);
927 netif_tx_stop_all_queues(vsi
->netdev
);
933 * ice_set_dflt_mib - send a default config MIB to the FW
934 * @pf: private PF struct
936 * This function sends a default configuration MIB to the FW.
938 * If this function errors out at any point, the driver is still able to
939 * function. The main impact is that LFC may not operate as expected.
940 * Therefore an error state in this function should be treated with a DBG
941 * message and continue on with driver rebuild/reenable.
943 static void ice_set_dflt_mib(struct ice_pf
*pf
)
945 struct device
*dev
= ice_pf_to_dev(pf
);
946 u8 mib_type
, *buf
, *lldpmib
= NULL
;
947 u16 len
, typelen
, offset
= 0;
948 struct ice_lldp_org_tlv
*tlv
;
949 struct ice_hw
*hw
= &pf
->hw
;
952 mib_type
= SET_LOCAL_MIB_TYPE_LOCAL_MIB
;
953 lldpmib
= kzalloc(ICE_LLDPDU_SIZE
, GFP_KERNEL
);
955 dev_dbg(dev
, "%s Failed to allocate MIB memory\n",
960 /* Add ETS CFG TLV */
961 tlv
= (struct ice_lldp_org_tlv
*)lldpmib
;
962 typelen
= ((ICE_TLV_TYPE_ORG
<< ICE_LLDP_TLV_TYPE_S
) |
963 ICE_IEEE_ETS_TLV_LEN
);
964 tlv
->typelen
= htons(typelen
);
965 ouisubtype
= ((ICE_IEEE_8021QAZ_OUI
<< ICE_LLDP_TLV_OUI_S
) |
966 ICE_IEEE_SUBTYPE_ETS_CFG
);
967 tlv
->ouisubtype
= htonl(ouisubtype
);
972 /* ETS CFG all UPs map to TC 0. Next 4 (1 - 4) Octets = 0.
973 * Octets 5 - 12 are BW values, set octet 5 to 100% BW.
974 * Octets 13 - 20 are TSA values - leave as zeros
977 len
= FIELD_GET(ICE_LLDP_TLV_LEN_M
, typelen
);
979 tlv
= (struct ice_lldp_org_tlv
*)
980 ((char *)tlv
+ sizeof(tlv
->typelen
) + len
);
982 /* Add ETS REC TLV */
984 tlv
->typelen
= htons(typelen
);
986 ouisubtype
= ((ICE_IEEE_8021QAZ_OUI
<< ICE_LLDP_TLV_OUI_S
) |
987 ICE_IEEE_SUBTYPE_ETS_REC
);
988 tlv
->ouisubtype
= htonl(ouisubtype
);
990 /* First octet of buf is reserved
991 * Octets 1 - 4 map UP to TC - all UPs map to zero
992 * Octets 5 - 12 are BW values - set TC 0 to 100%.
993 * Octets 13 - 20 are TSA value - leave as zeros
997 tlv
= (struct ice_lldp_org_tlv
*)
998 ((char *)tlv
+ sizeof(tlv
->typelen
) + len
);
1000 /* Add PFC CFG TLV */
1001 typelen
= ((ICE_TLV_TYPE_ORG
<< ICE_LLDP_TLV_TYPE_S
) |
1002 ICE_IEEE_PFC_TLV_LEN
);
1003 tlv
->typelen
= htons(typelen
);
1005 ouisubtype
= ((ICE_IEEE_8021QAZ_OUI
<< ICE_LLDP_TLV_OUI_S
) |
1006 ICE_IEEE_SUBTYPE_PFC_CFG
);
1007 tlv
->ouisubtype
= htonl(ouisubtype
);
1009 /* Octet 1 left as all zeros - PFC disabled */
1011 len
= FIELD_GET(ICE_LLDP_TLV_LEN_M
, typelen
);
1014 if (ice_aq_set_lldp_mib(hw
, mib_type
, (void *)lldpmib
, offset
, NULL
))
1015 dev_dbg(dev
, "%s Failed to set default LLDP MIB\n", __func__
);
1021 * ice_check_phy_fw_load - check if PHY FW load failed
1022 * @pf: pointer to PF struct
1023 * @link_cfg_err: bitmap from the link info structure
1025 * check if external PHY FW load failed and print an error message if it did
1027 static void ice_check_phy_fw_load(struct ice_pf
*pf
, u8 link_cfg_err
)
1029 if (!(link_cfg_err
& ICE_AQ_LINK_EXTERNAL_PHY_LOAD_FAILURE
)) {
1030 clear_bit(ICE_FLAG_PHY_FW_LOAD_FAILED
, pf
->flags
);
1034 if (test_bit(ICE_FLAG_PHY_FW_LOAD_FAILED
, pf
->flags
))
1037 if (link_cfg_err
& ICE_AQ_LINK_EXTERNAL_PHY_LOAD_FAILURE
) {
1038 dev_err(ice_pf_to_dev(pf
), "Device failed to load the FW for the external PHY. Please download and install the latest NVM for your device and try again\n");
1039 set_bit(ICE_FLAG_PHY_FW_LOAD_FAILED
, pf
->flags
);
1044 * ice_check_module_power
1045 * @pf: pointer to PF struct
1046 * @link_cfg_err: bitmap from the link info structure
1048 * check module power level returned by a previous call to aq_get_link_info
1049 * and print error messages if module power level is not supported
1051 static void ice_check_module_power(struct ice_pf
*pf
, u8 link_cfg_err
)
1053 /* if module power level is supported, clear the flag */
1054 if (!(link_cfg_err
& (ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT
|
1055 ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED
))) {
1056 clear_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED
, pf
->flags
);
1060 /* if ICE_FLAG_MOD_POWER_UNSUPPORTED was previously set and the
1061 * above block didn't clear this bit, there's nothing to do
1063 if (test_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED
, pf
->flags
))
1066 if (link_cfg_err
& ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT
) {
1067 dev_err(ice_pf_to_dev(pf
), "The installed module is incompatible with the device's NVM image. Cannot start link\n");
1068 set_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED
, pf
->flags
);
1069 } else if (link_cfg_err
& ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED
) {
1070 dev_err(ice_pf_to_dev(pf
), "The module's power requirements exceed the device's power supply. Cannot start link\n");
1071 set_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED
, pf
->flags
);
1076 * ice_check_link_cfg_err - check if link configuration failed
1077 * @pf: pointer to the PF struct
1078 * @link_cfg_err: bitmap from the link info structure
1080 * print if any link configuration failure happens due to the value in the
1081 * link_cfg_err parameter in the link info structure
1083 static void ice_check_link_cfg_err(struct ice_pf
*pf
, u8 link_cfg_err
)
1085 ice_check_module_power(pf
, link_cfg_err
);
1086 ice_check_phy_fw_load(pf
, link_cfg_err
);
1090 * ice_link_event - process the link event
1091 * @pf: PF that the link event is associated with
1092 * @pi: port_info for the port that the link event is associated with
1093 * @link_up: true if the physical link is up and false if it is down
1094 * @link_speed: current link speed received from the link event
1096 * Returns 0 on success and negative on failure
1099 ice_link_event(struct ice_pf
*pf
, struct ice_port_info
*pi
, bool link_up
,
1102 struct device
*dev
= ice_pf_to_dev(pf
);
1103 struct ice_phy_info
*phy_info
;
1104 struct ice_vsi
*vsi
;
1109 phy_info
= &pi
->phy
;
1110 phy_info
->link_info_old
= phy_info
->link_info
;
1112 old_link
= !!(phy_info
->link_info_old
.link_info
& ICE_AQ_LINK_UP
);
1113 old_link_speed
= phy_info
->link_info_old
.link_speed
;
1115 /* update the link info structures and re-enable link events,
1116 * don't bail on failure due to other book keeping needed
1118 status
= ice_update_link_info(pi
);
1120 dev_dbg(dev
, "Failed to update link status on port %d, err %d aq_err %s\n",
1122 ice_aq_str(pi
->hw
->adminq
.sq_last_status
));
1124 ice_check_link_cfg_err(pf
, pi
->phy
.link_info
.link_cfg_err
);
1126 /* Check if the link state is up after updating link info, and treat
1127 * this event as an UP event since the link is actually UP now.
1129 if (phy_info
->link_info
.link_info
& ICE_AQ_LINK_UP
)
1132 vsi
= ice_get_main_vsi(pf
);
1133 if (!vsi
|| !vsi
->port_info
)
1136 /* turn off PHY if media was removed */
1137 if (!test_bit(ICE_FLAG_NO_MEDIA
, pf
->flags
) &&
1138 !(pi
->phy
.link_info
.link_info
& ICE_AQ_MEDIA_AVAILABLE
)) {
1139 set_bit(ICE_FLAG_NO_MEDIA
, pf
->flags
);
1140 ice_set_link(vsi
, false);
1143 /* if the old link up/down and speed is the same as the new */
1144 if (link_up
== old_link
&& link_speed
== old_link_speed
)
1147 ice_ptp_link_change(pf
, pf
->hw
.pf_id
, link_up
);
1149 if (ice_is_dcb_active(pf
)) {
1150 if (test_bit(ICE_FLAG_DCB_ENA
, pf
->flags
))
1151 ice_dcb_rebuild(pf
);
1154 ice_set_dflt_mib(pf
);
1156 ice_vsi_link_event(vsi
, link_up
);
1157 ice_print_link_msg(vsi
, link_up
);
1159 ice_vc_notify_link_state(pf
);
1165 * ice_watchdog_subtask - periodic tasks not using event driven scheduling
1166 * @pf: board private structure
1168 static void ice_watchdog_subtask(struct ice_pf
*pf
)
1172 /* if interface is down do nothing */
1173 if (test_bit(ICE_DOWN
, pf
->state
) ||
1174 test_bit(ICE_CFG_BUSY
, pf
->state
))
1177 /* make sure we don't do these things too often */
1178 if (time_before(jiffies
,
1179 pf
->serv_tmr_prev
+ pf
->serv_tmr_period
))
1182 pf
->serv_tmr_prev
= jiffies
;
1184 /* Update the stats for active netdevs so the network stack
1185 * can look at updated numbers whenever it cares to
1187 ice_update_pf_stats(pf
);
1188 ice_for_each_vsi(pf
, i
)
1189 if (pf
->vsi
[i
] && pf
->vsi
[i
]->netdev
)
1190 ice_update_vsi_stats(pf
->vsi
[i
]);
1194 * ice_init_link_events - enable/initialize link events
1195 * @pi: pointer to the port_info instance
1197 * Returns -EIO on failure, 0 on success
1199 static int ice_init_link_events(struct ice_port_info
*pi
)
1203 mask
= ~((u16
)(ICE_AQ_LINK_EVENT_UPDOWN
| ICE_AQ_LINK_EVENT_MEDIA_NA
|
1204 ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL
|
1205 ICE_AQ_LINK_EVENT_PHY_FW_LOAD_FAIL
));
1207 if (ice_aq_set_event_mask(pi
->hw
, pi
->lport
, mask
, NULL
)) {
1208 dev_dbg(ice_hw_to_dev(pi
->hw
), "Failed to set link event mask for port %d\n",
1213 if (ice_aq_get_link_info(pi
, true, NULL
, NULL
)) {
1214 dev_dbg(ice_hw_to_dev(pi
->hw
), "Failed to enable link events for port %d\n",
1223 * ice_handle_link_event - handle link event via ARQ
1224 * @pf: PF that the link event is associated with
1225 * @event: event structure containing link status info
1228 ice_handle_link_event(struct ice_pf
*pf
, struct ice_rq_event_info
*event
)
1230 struct ice_aqc_get_link_status_data
*link_data
;
1231 struct ice_port_info
*port_info
;
1234 link_data
= (struct ice_aqc_get_link_status_data
*)event
->msg_buf
;
1235 port_info
= pf
->hw
.port_info
;
1239 status
= ice_link_event(pf
, port_info
,
1240 !!(link_data
->link_info
& ICE_AQ_LINK_UP
),
1241 le16_to_cpu(link_data
->link_speed
));
1243 dev_dbg(ice_pf_to_dev(pf
), "Could not process link event, error %d\n",
1250 * ice_get_fwlog_data - copy the FW log data from ARQ event
1251 * @pf: PF that the FW log event is associated with
1252 * @event: event structure containing FW log data
1255 ice_get_fwlog_data(struct ice_pf
*pf
, struct ice_rq_event_info
*event
)
1257 struct ice_fwlog_data
*fwlog
;
1258 struct ice_hw
*hw
= &pf
->hw
;
1260 fwlog
= &hw
->fwlog_ring
.rings
[hw
->fwlog_ring
.tail
];
1262 memset(fwlog
->data
, 0, PAGE_SIZE
);
1263 fwlog
->data_size
= le16_to_cpu(event
->desc
.datalen
);
1265 memcpy(fwlog
->data
, event
->msg_buf
, fwlog
->data_size
);
1266 ice_fwlog_ring_increment(&hw
->fwlog_ring
.tail
, hw
->fwlog_ring
.size
);
1268 if (ice_fwlog_ring_full(&hw
->fwlog_ring
)) {
1269 /* the rings are full so bump the head to create room */
1270 ice_fwlog_ring_increment(&hw
->fwlog_ring
.head
,
1271 hw
->fwlog_ring
.size
);
1276 * ice_aq_prep_for_event - Prepare to wait for an AdminQ event from firmware
1277 * @pf: pointer to the PF private structure
1278 * @task: intermediate helper storage and identifier for waiting
1279 * @opcode: the opcode to wait for
1281 * Prepares to wait for a specific AdminQ completion event on the ARQ for
1282 * a given PF. Actual wait would be done by a call to ice_aq_wait_for_event().
1284 * Calls are separated to allow caller registering for event before sending
1285 * the command, which mitigates a race between registering and FW responding.
1287 * To obtain only the descriptor contents, pass an task->event with null
1288 * msg_buf. If the complete data buffer is desired, allocate the
1289 * task->event.msg_buf with enough space ahead of time.
1291 void ice_aq_prep_for_event(struct ice_pf
*pf
, struct ice_aq_task
*task
,
1294 INIT_HLIST_NODE(&task
->entry
);
1295 task
->opcode
= opcode
;
1296 task
->state
= ICE_AQ_TASK_WAITING
;
1298 spin_lock_bh(&pf
->aq_wait_lock
);
1299 hlist_add_head(&task
->entry
, &pf
->aq_wait_list
);
1300 spin_unlock_bh(&pf
->aq_wait_lock
);
1304 * ice_aq_wait_for_event - Wait for an AdminQ event from firmware
1305 * @pf: pointer to the PF private structure
1306 * @task: ptr prepared by ice_aq_prep_for_event()
1307 * @timeout: how long to wait, in jiffies
1309 * Waits for a specific AdminQ completion event on the ARQ for a given PF. The
1310 * current thread will be put to sleep until the specified event occurs or
1311 * until the given timeout is reached.
1313 * Returns: zero on success, or a negative error code on failure.
1315 int ice_aq_wait_for_event(struct ice_pf
*pf
, struct ice_aq_task
*task
,
1316 unsigned long timeout
)
1318 enum ice_aq_task_state
*state
= &task
->state
;
1319 struct device
*dev
= ice_pf_to_dev(pf
);
1320 unsigned long start
= jiffies
;
1324 ret
= wait_event_interruptible_timeout(pf
->aq_wait_queue
,
1325 *state
!= ICE_AQ_TASK_WAITING
,
1328 case ICE_AQ_TASK_NOT_PREPARED
:
1329 WARN(1, "call to %s without ice_aq_prep_for_event()", __func__
);
1332 case ICE_AQ_TASK_WAITING
:
1333 err
= ret
< 0 ? ret
: -ETIMEDOUT
;
1335 case ICE_AQ_TASK_CANCELED
:
1336 err
= ret
< 0 ? ret
: -ECANCELED
;
1338 case ICE_AQ_TASK_COMPLETE
:
1339 err
= ret
< 0 ? ret
: 0;
1342 WARN(1, "Unexpected AdminQ wait task state %u", *state
);
1347 dev_dbg(dev
, "Waited %u msecs (max %u msecs) for firmware response to op 0x%04x\n",
1348 jiffies_to_msecs(jiffies
- start
),
1349 jiffies_to_msecs(timeout
),
1352 spin_lock_bh(&pf
->aq_wait_lock
);
1353 hlist_del(&task
->entry
);
1354 spin_unlock_bh(&pf
->aq_wait_lock
);
1360 * ice_aq_check_events - Check if any thread is waiting for an AdminQ event
1361 * @pf: pointer to the PF private structure
1362 * @opcode: the opcode of the event
1363 * @event: the event to check
1365 * Loops over the current list of pending threads waiting for an AdminQ event.
1366 * For each matching task, copy the contents of the event into the task
1367 * structure and wake up the thread.
1369 * If multiple threads wait for the same opcode, they will all be woken up.
1371 * Note that event->msg_buf will only be duplicated if the event has a buffer
1372 * with enough space already allocated. Otherwise, only the descriptor and
1373 * message length will be copied.
1375 * Returns: true if an event was found, false otherwise
1377 static void ice_aq_check_events(struct ice_pf
*pf
, u16 opcode
,
1378 struct ice_rq_event_info
*event
)
1380 struct ice_rq_event_info
*task_ev
;
1381 struct ice_aq_task
*task
;
1384 spin_lock_bh(&pf
->aq_wait_lock
);
1385 hlist_for_each_entry(task
, &pf
->aq_wait_list
, entry
) {
1386 if (task
->state
!= ICE_AQ_TASK_WAITING
)
1388 if (task
->opcode
!= opcode
)
1391 task_ev
= &task
->event
;
1392 memcpy(&task_ev
->desc
, &event
->desc
, sizeof(event
->desc
));
1393 task_ev
->msg_len
= event
->msg_len
;
1395 /* Only copy the data buffer if a destination was set */
1396 if (task_ev
->msg_buf
&& task_ev
->buf_len
>= event
->buf_len
) {
1397 memcpy(task_ev
->msg_buf
, event
->msg_buf
,
1399 task_ev
->buf_len
= event
->buf_len
;
1402 task
->state
= ICE_AQ_TASK_COMPLETE
;
1405 spin_unlock_bh(&pf
->aq_wait_lock
);
1408 wake_up(&pf
->aq_wait_queue
);
1412 * ice_aq_cancel_waiting_tasks - Immediately cancel all waiting tasks
1413 * @pf: the PF private structure
1415 * Set all waiting tasks to ICE_AQ_TASK_CANCELED, and wake up their threads.
1416 * This will then cause ice_aq_wait_for_event to exit with -ECANCELED.
1418 static void ice_aq_cancel_waiting_tasks(struct ice_pf
*pf
)
1420 struct ice_aq_task
*task
;
1422 spin_lock_bh(&pf
->aq_wait_lock
);
1423 hlist_for_each_entry(task
, &pf
->aq_wait_list
, entry
)
1424 task
->state
= ICE_AQ_TASK_CANCELED
;
1425 spin_unlock_bh(&pf
->aq_wait_lock
);
1427 wake_up(&pf
->aq_wait_queue
);
1430 #define ICE_MBX_OVERFLOW_WATERMARK 64
1433 * __ice_clean_ctrlq - helper function to clean controlq rings
1434 * @pf: ptr to struct ice_pf
1435 * @q_type: specific Control queue type
1437 static int __ice_clean_ctrlq(struct ice_pf
*pf
, enum ice_ctl_q q_type
)
1439 struct device
*dev
= ice_pf_to_dev(pf
);
1440 struct ice_rq_event_info event
;
1441 struct ice_hw
*hw
= &pf
->hw
;
1442 struct ice_ctl_q_info
*cq
;
1447 /* Do not clean control queue if/when PF reset fails */
1448 if (test_bit(ICE_RESET_FAILED
, pf
->state
))
1452 case ICE_CTL_Q_ADMIN
:
1460 case ICE_CTL_Q_MAILBOX
:
1463 /* we are going to try to detect a malicious VF, so set the
1464 * state to begin detection
1466 hw
->mbx_snapshot
.mbx_buf
.state
= ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT
;
1469 dev_warn(dev
, "Unknown control queue type 0x%x\n", q_type
);
1473 /* check for error indications - PF_xx_AxQLEN register layout for
1474 * FW/MBX/SB are identical so just use defines for PF_FW_AxQLEN.
1476 val
= rd32(hw
, cq
->rq
.len
);
1477 if (val
& (PF_FW_ARQLEN_ARQVFE_M
| PF_FW_ARQLEN_ARQOVFL_M
|
1478 PF_FW_ARQLEN_ARQCRIT_M
)) {
1480 if (val
& PF_FW_ARQLEN_ARQVFE_M
)
1481 dev_dbg(dev
, "%s Receive Queue VF Error detected\n",
1483 if (val
& PF_FW_ARQLEN_ARQOVFL_M
) {
1484 dev_dbg(dev
, "%s Receive Queue Overflow Error detected\n",
1487 if (val
& PF_FW_ARQLEN_ARQCRIT_M
)
1488 dev_dbg(dev
, "%s Receive Queue Critical Error detected\n",
1490 val
&= ~(PF_FW_ARQLEN_ARQVFE_M
| PF_FW_ARQLEN_ARQOVFL_M
|
1491 PF_FW_ARQLEN_ARQCRIT_M
);
1493 wr32(hw
, cq
->rq
.len
, val
);
1496 val
= rd32(hw
, cq
->sq
.len
);
1497 if (val
& (PF_FW_ATQLEN_ATQVFE_M
| PF_FW_ATQLEN_ATQOVFL_M
|
1498 PF_FW_ATQLEN_ATQCRIT_M
)) {
1500 if (val
& PF_FW_ATQLEN_ATQVFE_M
)
1501 dev_dbg(dev
, "%s Send Queue VF Error detected\n",
1503 if (val
& PF_FW_ATQLEN_ATQOVFL_M
) {
1504 dev_dbg(dev
, "%s Send Queue Overflow Error detected\n",
1507 if (val
& PF_FW_ATQLEN_ATQCRIT_M
)
1508 dev_dbg(dev
, "%s Send Queue Critical Error detected\n",
1510 val
&= ~(PF_FW_ATQLEN_ATQVFE_M
| PF_FW_ATQLEN_ATQOVFL_M
|
1511 PF_FW_ATQLEN_ATQCRIT_M
);
1513 wr32(hw
, cq
->sq
.len
, val
);
1516 event
.buf_len
= cq
->rq_buf_size
;
1517 event
.msg_buf
= kzalloc(event
.buf_len
, GFP_KERNEL
);
1522 struct ice_mbx_data data
= {};
1526 ret
= ice_clean_rq_elem(hw
, cq
, &event
, &pending
);
1527 if (ret
== -EALREADY
)
1530 dev_err(dev
, "%s Receive Queue event error %d\n", qtype
,
1535 opcode
= le16_to_cpu(event
.desc
.opcode
);
1537 /* Notify any thread that might be waiting for this event */
1538 ice_aq_check_events(pf
, opcode
, &event
);
1541 case ice_aqc_opc_get_link_status
:
1542 if (ice_handle_link_event(pf
, &event
))
1543 dev_err(dev
, "Could not handle link event\n");
1545 case ice_aqc_opc_event_lan_overflow
:
1546 ice_vf_lan_overflow_event(pf
, &event
);
1548 case ice_mbx_opc_send_msg_to_pf
:
1549 if (ice_is_feature_supported(pf
, ICE_F_MBX_LIMIT
)) {
1550 ice_vc_process_vf_msg(pf
, &event
, NULL
);
1551 ice_mbx_vf_dec_trig_e830(hw
, &event
);
1553 u16 val
= hw
->mailboxq
.num_rq_entries
;
1555 data
.max_num_msgs_mbx
= val
;
1556 val
= ICE_MBX_OVERFLOW_WATERMARK
;
1557 data
.async_watermark_val
= val
;
1558 data
.num_msg_proc
= i
;
1559 data
.num_pending_arq
= pending
;
1561 ice_vc_process_vf_msg(pf
, &event
, &data
);
1564 case ice_aqc_opc_fw_logs_event
:
1565 ice_get_fwlog_data(pf
, &event
);
1567 case ice_aqc_opc_lldp_set_mib_change
:
1568 ice_dcb_process_lldp_set_mib_change(pf
, &event
);
1571 dev_dbg(dev
, "%s Receive Queue unknown event 0x%04x ignored\n",
1575 } while (pending
&& (i
++ < ICE_DFLT_IRQ_WORK
));
1577 kfree(event
.msg_buf
);
1579 return pending
&& (i
== ICE_DFLT_IRQ_WORK
);
1583 * ice_ctrlq_pending - check if there is a difference between ntc and ntu
1584 * @hw: pointer to hardware info
1585 * @cq: control queue information
1587 * returns true if there are pending messages in a queue, false if there aren't
1589 static bool ice_ctrlq_pending(struct ice_hw
*hw
, struct ice_ctl_q_info
*cq
)
1593 ntu
= (u16
)(rd32(hw
, cq
->rq
.head
) & cq
->rq
.head_mask
);
1594 return cq
->rq
.next_to_clean
!= ntu
;
1598 * ice_clean_adminq_subtask - clean the AdminQ rings
1599 * @pf: board private structure
1601 static void ice_clean_adminq_subtask(struct ice_pf
*pf
)
1603 struct ice_hw
*hw
= &pf
->hw
;
1605 if (!test_bit(ICE_ADMINQ_EVENT_PENDING
, pf
->state
))
1608 if (__ice_clean_ctrlq(pf
, ICE_CTL_Q_ADMIN
))
1611 clear_bit(ICE_ADMINQ_EVENT_PENDING
, pf
->state
);
1613 /* There might be a situation where new messages arrive to a control
1614 * queue between processing the last message and clearing the
1615 * EVENT_PENDING bit. So before exiting, check queue head again (using
1616 * ice_ctrlq_pending) and process new messages if any.
1618 if (ice_ctrlq_pending(hw
, &hw
->adminq
))
1619 __ice_clean_ctrlq(pf
, ICE_CTL_Q_ADMIN
);
1625 * ice_clean_mailboxq_subtask - clean the MailboxQ rings
1626 * @pf: board private structure
1628 static void ice_clean_mailboxq_subtask(struct ice_pf
*pf
)
1630 struct ice_hw
*hw
= &pf
->hw
;
1632 if (!test_bit(ICE_MAILBOXQ_EVENT_PENDING
, pf
->state
))
1635 if (__ice_clean_ctrlq(pf
, ICE_CTL_Q_MAILBOX
))
1638 clear_bit(ICE_MAILBOXQ_EVENT_PENDING
, pf
->state
);
1640 if (ice_ctrlq_pending(hw
, &hw
->mailboxq
))
1641 __ice_clean_ctrlq(pf
, ICE_CTL_Q_MAILBOX
);
1647 * ice_clean_sbq_subtask - clean the Sideband Queue rings
1648 * @pf: board private structure
1650 static void ice_clean_sbq_subtask(struct ice_pf
*pf
)
1652 struct ice_hw
*hw
= &pf
->hw
;
1654 /* if mac_type is not generic, sideband is not supported
1655 * and there's nothing to do here
1657 if (!ice_is_generic_mac(hw
)) {
1658 clear_bit(ICE_SIDEBANDQ_EVENT_PENDING
, pf
->state
);
1662 if (!test_bit(ICE_SIDEBANDQ_EVENT_PENDING
, pf
->state
))
1665 if (__ice_clean_ctrlq(pf
, ICE_CTL_Q_SB
))
1668 clear_bit(ICE_SIDEBANDQ_EVENT_PENDING
, pf
->state
);
1670 if (ice_ctrlq_pending(hw
, &hw
->sbq
))
1671 __ice_clean_ctrlq(pf
, ICE_CTL_Q_SB
);
1677 * ice_service_task_schedule - schedule the service task to wake up
1678 * @pf: board private structure
1680 * If not already scheduled, this puts the task into the work queue.
1682 void ice_service_task_schedule(struct ice_pf
*pf
)
1684 if (!test_bit(ICE_SERVICE_DIS
, pf
->state
) &&
1685 !test_and_set_bit(ICE_SERVICE_SCHED
, pf
->state
) &&
1686 !test_bit(ICE_NEEDS_RESTART
, pf
->state
))
1687 queue_work(ice_wq
, &pf
->serv_task
);
1691 * ice_service_task_complete - finish up the service task
1692 * @pf: board private structure
1694 static void ice_service_task_complete(struct ice_pf
*pf
)
1696 WARN_ON(!test_bit(ICE_SERVICE_SCHED
, pf
->state
));
1698 /* force memory (pf->state) to sync before next service task */
1699 smp_mb__before_atomic();
1700 clear_bit(ICE_SERVICE_SCHED
, pf
->state
);
1704 * ice_service_task_stop - stop service task and cancel works
1705 * @pf: board private structure
1707 * Return 0 if the ICE_SERVICE_DIS bit was not already set,
1710 static int ice_service_task_stop(struct ice_pf
*pf
)
1714 ret
= test_and_set_bit(ICE_SERVICE_DIS
, pf
->state
);
1716 if (pf
->serv_tmr
.function
)
1717 del_timer_sync(&pf
->serv_tmr
);
1718 if (pf
->serv_task
.func
)
1719 cancel_work_sync(&pf
->serv_task
);
1721 clear_bit(ICE_SERVICE_SCHED
, pf
->state
);
1726 * ice_service_task_restart - restart service task and schedule works
1727 * @pf: board private structure
1729 * This function is needed for suspend and resume works (e.g WoL scenario)
1731 static void ice_service_task_restart(struct ice_pf
*pf
)
1733 clear_bit(ICE_SERVICE_DIS
, pf
->state
);
1734 ice_service_task_schedule(pf
);
1738 * ice_service_timer - timer callback to schedule service task
1739 * @t: pointer to timer_list
1741 static void ice_service_timer(struct timer_list
*t
)
1743 struct ice_pf
*pf
= from_timer(pf
, t
, serv_tmr
);
1745 mod_timer(&pf
->serv_tmr
, round_jiffies(pf
->serv_tmr_period
+ jiffies
));
1746 ice_service_task_schedule(pf
);
1750 * ice_mdd_maybe_reset_vf - reset VF after MDD event
1751 * @pf: pointer to the PF structure
1752 * @vf: pointer to the VF structure
1753 * @reset_vf_tx: whether Tx MDD has occurred
1754 * @reset_vf_rx: whether Rx MDD has occurred
1756 * Since the queue can get stuck on VF MDD events, the PF can be configured to
1757 * automatically reset the VF by enabling the private ethtool flag
1758 * mdd-auto-reset-vf.
1760 static void ice_mdd_maybe_reset_vf(struct ice_pf
*pf
, struct ice_vf
*vf
,
1761 bool reset_vf_tx
, bool reset_vf_rx
)
1763 struct device
*dev
= ice_pf_to_dev(pf
);
1765 if (!test_bit(ICE_FLAG_MDD_AUTO_RESET_VF
, pf
->flags
))
1768 /* VF MDD event counters will be cleared by reset, so print the event
1772 ice_print_vf_tx_mdd_event(vf
);
1775 ice_print_vf_rx_mdd_event(vf
);
1777 dev_info(dev
, "PF-to-VF reset on PF %d VF %d due to MDD event\n",
1778 pf
->hw
.pf_id
, vf
->vf_id
);
1779 ice_reset_vf(vf
, ICE_VF_RESET_NOTIFY
| ICE_VF_RESET_LOCK
);
1783 * ice_handle_mdd_event - handle malicious driver detect event
1784 * @pf: pointer to the PF structure
1786 * Called from service task. OICR interrupt handler indicates MDD event.
1787 * VF MDD logging is guarded by net_ratelimit. Additional PF and VF log
1788 * messages are wrapped by netif_msg_[rx|tx]_err. Since VF Rx MDD events
1789 * disable the queue, the PF can be configured to reset the VF using ethtool
1790 * private flag mdd-auto-reset-vf.
1792 static void ice_handle_mdd_event(struct ice_pf
*pf
)
1794 struct device
*dev
= ice_pf_to_dev(pf
);
1795 struct ice_hw
*hw
= &pf
->hw
;
1800 if (!test_and_clear_bit(ICE_MDD_EVENT_PENDING
, pf
->state
)) {
1801 /* Since the VF MDD event logging is rate limited, check if
1802 * there are pending MDD events.
1804 ice_print_vfs_mdd_events(pf
);
1808 /* find what triggered an MDD event */
1809 reg
= rd32(hw
, GL_MDET_TX_PQM
);
1810 if (reg
& GL_MDET_TX_PQM_VALID_M
) {
1811 u8 pf_num
= FIELD_GET(GL_MDET_TX_PQM_PF_NUM_M
, reg
);
1812 u16 vf_num
= FIELD_GET(GL_MDET_TX_PQM_VF_NUM_M
, reg
);
1813 u8 event
= FIELD_GET(GL_MDET_TX_PQM_MAL_TYPE_M
, reg
);
1814 u16 queue
= FIELD_GET(GL_MDET_TX_PQM_QNUM_M
, reg
);
1816 if (netif_msg_tx_err(pf
))
1817 dev_info(dev
, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
1818 event
, queue
, pf_num
, vf_num
);
1819 wr32(hw
, GL_MDET_TX_PQM
, 0xffffffff);
1822 reg
= rd32(hw
, GL_MDET_TX_TCLAN_BY_MAC(hw
));
1823 if (reg
& GL_MDET_TX_TCLAN_VALID_M
) {
1824 u8 pf_num
= FIELD_GET(GL_MDET_TX_TCLAN_PF_NUM_M
, reg
);
1825 u16 vf_num
= FIELD_GET(GL_MDET_TX_TCLAN_VF_NUM_M
, reg
);
1826 u8 event
= FIELD_GET(GL_MDET_TX_TCLAN_MAL_TYPE_M
, reg
);
1827 u16 queue
= FIELD_GET(GL_MDET_TX_TCLAN_QNUM_M
, reg
);
1829 if (netif_msg_tx_err(pf
))
1830 dev_info(dev
, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
1831 event
, queue
, pf_num
, vf_num
);
1832 wr32(hw
, GL_MDET_TX_TCLAN_BY_MAC(hw
), U32_MAX
);
1835 reg
= rd32(hw
, GL_MDET_RX
);
1836 if (reg
& GL_MDET_RX_VALID_M
) {
1837 u8 pf_num
= FIELD_GET(GL_MDET_RX_PF_NUM_M
, reg
);
1838 u16 vf_num
= FIELD_GET(GL_MDET_RX_VF_NUM_M
, reg
);
1839 u8 event
= FIELD_GET(GL_MDET_RX_MAL_TYPE_M
, reg
);
1840 u16 queue
= FIELD_GET(GL_MDET_RX_QNUM_M
, reg
);
1842 if (netif_msg_rx_err(pf
))
1843 dev_info(dev
, "Malicious Driver Detection event %d on RX queue %d PF# %d VF# %d\n",
1844 event
, queue
, pf_num
, vf_num
);
1845 wr32(hw
, GL_MDET_RX
, 0xffffffff);
1848 /* check to see if this PF caused an MDD event */
1849 reg
= rd32(hw
, PF_MDET_TX_PQM
);
1850 if (reg
& PF_MDET_TX_PQM_VALID_M
) {
1851 wr32(hw
, PF_MDET_TX_PQM
, 0xFFFF);
1852 if (netif_msg_tx_err(pf
))
1853 dev_info(dev
, "Malicious Driver Detection event TX_PQM detected on PF\n");
1856 reg
= rd32(hw
, PF_MDET_TX_TCLAN_BY_MAC(hw
));
1857 if (reg
& PF_MDET_TX_TCLAN_VALID_M
) {
1858 wr32(hw
, PF_MDET_TX_TCLAN_BY_MAC(hw
), 0xffff);
1859 if (netif_msg_tx_err(pf
))
1860 dev_info(dev
, "Malicious Driver Detection event TX_TCLAN detected on PF\n");
1863 reg
= rd32(hw
, PF_MDET_RX
);
1864 if (reg
& PF_MDET_RX_VALID_M
) {
1865 wr32(hw
, PF_MDET_RX
, 0xFFFF);
1866 if (netif_msg_rx_err(pf
))
1867 dev_info(dev
, "Malicious Driver Detection event RX detected on PF\n");
1870 /* Check to see if one of the VFs caused an MDD event, and then
1871 * increment counters and set print pending
1873 mutex_lock(&pf
->vfs
.table_lock
);
1874 ice_for_each_vf(pf
, bkt
, vf
) {
1875 bool reset_vf_tx
= false, reset_vf_rx
= false;
1877 reg
= rd32(hw
, VP_MDET_TX_PQM(vf
->vf_id
));
1878 if (reg
& VP_MDET_TX_PQM_VALID_M
) {
1879 wr32(hw
, VP_MDET_TX_PQM(vf
->vf_id
), 0xFFFF);
1880 vf
->mdd_tx_events
.count
++;
1881 set_bit(ICE_MDD_VF_PRINT_PENDING
, pf
->state
);
1882 if (netif_msg_tx_err(pf
))
1883 dev_info(dev
, "Malicious Driver Detection event TX_PQM detected on VF %d\n",
1889 reg
= rd32(hw
, VP_MDET_TX_TCLAN(vf
->vf_id
));
1890 if (reg
& VP_MDET_TX_TCLAN_VALID_M
) {
1891 wr32(hw
, VP_MDET_TX_TCLAN(vf
->vf_id
), 0xFFFF);
1892 vf
->mdd_tx_events
.count
++;
1893 set_bit(ICE_MDD_VF_PRINT_PENDING
, pf
->state
);
1894 if (netif_msg_tx_err(pf
))
1895 dev_info(dev
, "Malicious Driver Detection event TX_TCLAN detected on VF %d\n",
1901 reg
= rd32(hw
, VP_MDET_TX_TDPU(vf
->vf_id
));
1902 if (reg
& VP_MDET_TX_TDPU_VALID_M
) {
1903 wr32(hw
, VP_MDET_TX_TDPU(vf
->vf_id
), 0xFFFF);
1904 vf
->mdd_tx_events
.count
++;
1905 set_bit(ICE_MDD_VF_PRINT_PENDING
, pf
->state
);
1906 if (netif_msg_tx_err(pf
))
1907 dev_info(dev
, "Malicious Driver Detection event TX_TDPU detected on VF %d\n",
1913 reg
= rd32(hw
, VP_MDET_RX(vf
->vf_id
));
1914 if (reg
& VP_MDET_RX_VALID_M
) {
1915 wr32(hw
, VP_MDET_RX(vf
->vf_id
), 0xFFFF);
1916 vf
->mdd_rx_events
.count
++;
1917 set_bit(ICE_MDD_VF_PRINT_PENDING
, pf
->state
);
1918 if (netif_msg_rx_err(pf
))
1919 dev_info(dev
, "Malicious Driver Detection event RX detected on VF %d\n",
1925 if (reset_vf_tx
|| reset_vf_rx
)
1926 ice_mdd_maybe_reset_vf(pf
, vf
, reset_vf_tx
,
1929 mutex_unlock(&pf
->vfs
.table_lock
);
1931 ice_print_vfs_mdd_events(pf
);
1935 * ice_force_phys_link_state - Force the physical link state
1936 * @vsi: VSI to force the physical link state to up/down
1937 * @link_up: true/false indicates to set the physical link to up/down
1939 * Force the physical link state by getting the current PHY capabilities from
1940 * hardware and setting the PHY config based on the determined capabilities. If
1941 * link changes a link event will be triggered because both the Enable Automatic
1942 * Link Update and LESM Enable bits are set when setting the PHY capabilities.
1944 * Returns 0 on success, negative on failure
1946 static int ice_force_phys_link_state(struct ice_vsi
*vsi
, bool link_up
)
1948 struct ice_aqc_get_phy_caps_data
*pcaps
;
1949 struct ice_aqc_set_phy_cfg_data
*cfg
;
1950 struct ice_port_info
*pi
;
1954 if (!vsi
|| !vsi
->port_info
|| !vsi
->back
)
1956 if (vsi
->type
!= ICE_VSI_PF
)
1959 dev
= ice_pf_to_dev(vsi
->back
);
1961 pi
= vsi
->port_info
;
1963 pcaps
= kzalloc(sizeof(*pcaps
), GFP_KERNEL
);
1967 retcode
= ice_aq_get_phy_caps(pi
, false, ICE_AQC_REPORT_ACTIVE_CFG
, pcaps
,
1970 dev_err(dev
, "Failed to get phy capabilities, VSI %d error %d\n",
1971 vsi
->vsi_num
, retcode
);
1976 /* No change in link */
1977 if (link_up
== !!(pcaps
->caps
& ICE_AQC_PHY_EN_LINK
) &&
1978 link_up
== !!(pi
->phy
.link_info
.link_info
& ICE_AQ_LINK_UP
))
1981 /* Use the current user PHY configuration. The current user PHY
1982 * configuration is initialized during probe from PHY capabilities
1983 * software mode, and updated on set PHY configuration.
1985 cfg
= kmemdup(&pi
->phy
.curr_user_phy_cfg
, sizeof(*cfg
), GFP_KERNEL
);
1991 cfg
->caps
|= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT
;
1993 cfg
->caps
|= ICE_AQ_PHY_ENA_LINK
;
1995 cfg
->caps
&= ~ICE_AQ_PHY_ENA_LINK
;
1997 retcode
= ice_aq_set_phy_cfg(&vsi
->back
->hw
, pi
, cfg
, NULL
);
1999 dev_err(dev
, "Failed to set phy config, VSI %d error %d\n",
2000 vsi
->vsi_num
, retcode
);
2011 * ice_init_nvm_phy_type - Initialize the NVM PHY type
2012 * @pi: port info structure
2014 * Initialize nvm_phy_type_[low|high] for link lenient mode support
2016 static int ice_init_nvm_phy_type(struct ice_port_info
*pi
)
2018 struct ice_aqc_get_phy_caps_data
*pcaps
;
2019 struct ice_pf
*pf
= pi
->hw
->back
;
2022 pcaps
= kzalloc(sizeof(*pcaps
), GFP_KERNEL
);
2026 err
= ice_aq_get_phy_caps(pi
, false, ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA
,
2030 dev_err(ice_pf_to_dev(pf
), "Get PHY capability failed.\n");
2034 pf
->nvm_phy_type_hi
= pcaps
->phy_type_high
;
2035 pf
->nvm_phy_type_lo
= pcaps
->phy_type_low
;
2043 * ice_init_link_dflt_override - Initialize link default override
2044 * @pi: port info structure
2046 * Initialize link default override and PHY total port shutdown during probe
2048 static void ice_init_link_dflt_override(struct ice_port_info
*pi
)
2050 struct ice_link_default_override_tlv
*ldo
;
2051 struct ice_pf
*pf
= pi
->hw
->back
;
2053 ldo
= &pf
->link_dflt_override
;
2054 if (ice_get_link_default_override(ldo
, pi
))
2057 if (!(ldo
->options
& ICE_LINK_OVERRIDE_PORT_DIS
))
2060 /* Enable Total Port Shutdown (override/replace link-down-on-close
2061 * ethtool private flag) for ports with Port Disable bit set.
2063 set_bit(ICE_FLAG_TOTAL_PORT_SHUTDOWN_ENA
, pf
->flags
);
2064 set_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA
, pf
->flags
);
2068 * ice_init_phy_cfg_dflt_override - Initialize PHY cfg default override settings
2069 * @pi: port info structure
2071 * If default override is enabled, initialize the user PHY cfg speed and FEC
2072 * settings using the default override mask from the NVM.
2074 * The PHY should only be configured with the default override settings the
2075 * first time media is available. The ICE_LINK_DEFAULT_OVERRIDE_PENDING state
2076 * is used to indicate that the user PHY cfg default override is initialized
2077 * and the PHY has not been configured with the default override settings. The
2078 * state is set here, and cleared in ice_configure_phy the first time the PHY is
2081 * This function should be called only if the FW doesn't support default
2082 * configuration mode, as reported by ice_fw_supports_report_dflt_cfg.
2084 static void ice_init_phy_cfg_dflt_override(struct ice_port_info
*pi
)
2086 struct ice_link_default_override_tlv
*ldo
;
2087 struct ice_aqc_set_phy_cfg_data
*cfg
;
2088 struct ice_phy_info
*phy
= &pi
->phy
;
2089 struct ice_pf
*pf
= pi
->hw
->back
;
2091 ldo
= &pf
->link_dflt_override
;
2093 /* If link default override is enabled, use to mask NVM PHY capabilities
2094 * for speed and FEC default configuration.
2096 cfg
= &phy
->curr_user_phy_cfg
;
2098 if (ldo
->phy_type_low
|| ldo
->phy_type_high
) {
2099 cfg
->phy_type_low
= pf
->nvm_phy_type_lo
&
2100 cpu_to_le64(ldo
->phy_type_low
);
2101 cfg
->phy_type_high
= pf
->nvm_phy_type_hi
&
2102 cpu_to_le64(ldo
->phy_type_high
);
2104 cfg
->link_fec_opt
= ldo
->fec_options
;
2105 phy
->curr_user_fec_req
= ICE_FEC_AUTO
;
2107 set_bit(ICE_LINK_DEFAULT_OVERRIDE_PENDING
, pf
->state
);
2111 * ice_init_phy_user_cfg - Initialize the PHY user configuration
2112 * @pi: port info structure
2114 * Initialize the current user PHY configuration, speed, FEC, and FC requested
2115 * mode to default. The PHY defaults are from get PHY capabilities topology
2116 * with media so call when media is first available. An error is returned if
2117 * called when media is not available. The PHY initialization completed state is
2120 * These configurations are used when setting PHY
2121 * configuration. The user PHY configuration is updated on set PHY
2122 * configuration. Returns 0 on success, negative on failure
2124 static int ice_init_phy_user_cfg(struct ice_port_info
*pi
)
2126 struct ice_aqc_get_phy_caps_data
*pcaps
;
2127 struct ice_phy_info
*phy
= &pi
->phy
;
2128 struct ice_pf
*pf
= pi
->hw
->back
;
2131 if (!(phy
->link_info
.link_info
& ICE_AQ_MEDIA_AVAILABLE
))
2134 pcaps
= kzalloc(sizeof(*pcaps
), GFP_KERNEL
);
2138 if (ice_fw_supports_report_dflt_cfg(pi
->hw
))
2139 err
= ice_aq_get_phy_caps(pi
, false, ICE_AQC_REPORT_DFLT_CFG
,
2142 err
= ice_aq_get_phy_caps(pi
, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA
,
2145 dev_err(ice_pf_to_dev(pf
), "Get PHY capability failed.\n");
2149 ice_copy_phy_caps_to_cfg(pi
, pcaps
, &pi
->phy
.curr_user_phy_cfg
);
2151 /* check if lenient mode is supported and enabled */
2152 if (ice_fw_supports_link_override(pi
->hw
) &&
2153 !(pcaps
->module_compliance_enforcement
&
2154 ICE_AQC_MOD_ENFORCE_STRICT_MODE
)) {
2155 set_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA
, pf
->flags
);
2157 /* if the FW supports default PHY configuration mode, then the driver
2158 * does not have to apply link override settings. If not,
2159 * initialize user PHY configuration with link override values
2161 if (!ice_fw_supports_report_dflt_cfg(pi
->hw
) &&
2162 (pf
->link_dflt_override
.options
& ICE_LINK_OVERRIDE_EN
)) {
2163 ice_init_phy_cfg_dflt_override(pi
);
2168 /* if link default override is not enabled, set user flow control and
2169 * FEC settings based on what get_phy_caps returned
2171 phy
->curr_user_fec_req
= ice_caps_to_fec_mode(pcaps
->caps
,
2172 pcaps
->link_fec_options
);
2173 phy
->curr_user_fc_req
= ice_caps_to_fc_mode(pcaps
->caps
);
2176 phy
->curr_user_speed_req
= ICE_AQ_LINK_SPEED_M
;
2177 set_bit(ICE_PHY_INIT_COMPLETE
, pf
->state
);
2184 * ice_configure_phy - configure PHY
2187 * Set the PHY configuration. If the current PHY configuration is the same as
2188 * the curr_user_phy_cfg, then do nothing to avoid link flap. Otherwise
2189 * configure the based get PHY capabilities for topology with media.
2191 static int ice_configure_phy(struct ice_vsi
*vsi
)
2193 struct device
*dev
= ice_pf_to_dev(vsi
->back
);
2194 struct ice_port_info
*pi
= vsi
->port_info
;
2195 struct ice_aqc_get_phy_caps_data
*pcaps
;
2196 struct ice_aqc_set_phy_cfg_data
*cfg
;
2197 struct ice_phy_info
*phy
= &pi
->phy
;
2198 struct ice_pf
*pf
= vsi
->back
;
2201 /* Ensure we have media as we cannot configure a medialess port */
2202 if (!(phy
->link_info
.link_info
& ICE_AQ_MEDIA_AVAILABLE
))
2205 ice_print_topo_conflict(vsi
);
2207 if (!test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA
, pf
->flags
) &&
2208 phy
->link_info
.topo_media_conflict
== ICE_AQ_LINK_TOPO_UNSUPP_MEDIA
)
2211 if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA
, pf
->flags
))
2212 return ice_force_phys_link_state(vsi
, true);
2214 pcaps
= kzalloc(sizeof(*pcaps
), GFP_KERNEL
);
2218 /* Get current PHY config */
2219 err
= ice_aq_get_phy_caps(pi
, false, ICE_AQC_REPORT_ACTIVE_CFG
, pcaps
,
2222 dev_err(dev
, "Failed to get PHY configuration, VSI %d error %d\n",
2227 /* If PHY enable link is configured and configuration has not changed,
2228 * there's nothing to do
2230 if (pcaps
->caps
& ICE_AQC_PHY_EN_LINK
&&
2231 ice_phy_caps_equals_cfg(pcaps
, &phy
->curr_user_phy_cfg
))
2234 /* Use PHY topology as baseline for configuration */
2235 memset(pcaps
, 0, sizeof(*pcaps
));
2236 if (ice_fw_supports_report_dflt_cfg(pi
->hw
))
2237 err
= ice_aq_get_phy_caps(pi
, false, ICE_AQC_REPORT_DFLT_CFG
,
2240 err
= ice_aq_get_phy_caps(pi
, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA
,
2243 dev_err(dev
, "Failed to get PHY caps, VSI %d error %d\n",
2248 cfg
= kzalloc(sizeof(*cfg
), GFP_KERNEL
);
2254 ice_copy_phy_caps_to_cfg(pi
, pcaps
, cfg
);
2256 /* Speed - If default override pending, use curr_user_phy_cfg set in
2257 * ice_init_phy_user_cfg_ldo.
2259 if (test_and_clear_bit(ICE_LINK_DEFAULT_OVERRIDE_PENDING
,
2260 vsi
->back
->state
)) {
2261 cfg
->phy_type_low
= phy
->curr_user_phy_cfg
.phy_type_low
;
2262 cfg
->phy_type_high
= phy
->curr_user_phy_cfg
.phy_type_high
;
2264 u64 phy_low
= 0, phy_high
= 0;
2266 ice_update_phy_type(&phy_low
, &phy_high
,
2267 pi
->phy
.curr_user_speed_req
);
2268 cfg
->phy_type_low
= pcaps
->phy_type_low
& cpu_to_le64(phy_low
);
2269 cfg
->phy_type_high
= pcaps
->phy_type_high
&
2270 cpu_to_le64(phy_high
);
2273 /* Can't provide what was requested; use PHY capabilities */
2274 if (!cfg
->phy_type_low
&& !cfg
->phy_type_high
) {
2275 cfg
->phy_type_low
= pcaps
->phy_type_low
;
2276 cfg
->phy_type_high
= pcaps
->phy_type_high
;
2280 ice_cfg_phy_fec(pi
, cfg
, phy
->curr_user_fec_req
);
2282 /* Can't provide what was requested; use PHY capabilities */
2283 if (cfg
->link_fec_opt
!=
2284 (cfg
->link_fec_opt
& pcaps
->link_fec_options
)) {
2285 cfg
->caps
|= pcaps
->caps
& ICE_AQC_PHY_EN_AUTO_FEC
;
2286 cfg
->link_fec_opt
= pcaps
->link_fec_options
;
2289 /* Flow Control - always supported; no need to check against
2292 ice_cfg_phy_fc(pi
, cfg
, phy
->curr_user_fc_req
);
2294 /* Enable link and link update */
2295 cfg
->caps
|= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT
| ICE_AQ_PHY_ENA_LINK
;
2297 err
= ice_aq_set_phy_cfg(&pf
->hw
, pi
, cfg
, NULL
);
2299 dev_err(dev
, "Failed to set phy config, VSI %d error %d\n",
2309 * ice_check_media_subtask - Check for media
2310 * @pf: pointer to PF struct
2312 * If media is available, then initialize PHY user configuration if it is not
2313 * been, and configure the PHY if the interface is up.
2315 static void ice_check_media_subtask(struct ice_pf
*pf
)
2317 struct ice_port_info
*pi
;
2318 struct ice_vsi
*vsi
;
2321 /* No need to check for media if it's already present */
2322 if (!test_bit(ICE_FLAG_NO_MEDIA
, pf
->flags
))
2325 vsi
= ice_get_main_vsi(pf
);
2329 /* Refresh link info and check if media is present */
2330 pi
= vsi
->port_info
;
2331 err
= ice_update_link_info(pi
);
2335 ice_check_link_cfg_err(pf
, pi
->phy
.link_info
.link_cfg_err
);
2337 if (pi
->phy
.link_info
.link_info
& ICE_AQ_MEDIA_AVAILABLE
) {
2338 if (!test_bit(ICE_PHY_INIT_COMPLETE
, pf
->state
))
2339 ice_init_phy_user_cfg(pi
);
2341 /* PHY settings are reset on media insertion, reconfigure
2342 * PHY to preserve settings.
2344 if (test_bit(ICE_VSI_DOWN
, vsi
->state
) &&
2345 test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA
, vsi
->back
->flags
))
2348 err
= ice_configure_phy(vsi
);
2350 clear_bit(ICE_FLAG_NO_MEDIA
, pf
->flags
);
2352 /* A Link Status Event will be generated; the event handler
2353 * will complete bringing the interface up
2359 * ice_service_task - manage and run subtasks
2360 * @work: pointer to work_struct contained by the PF struct
2362 static void ice_service_task(struct work_struct
*work
)
2364 struct ice_pf
*pf
= container_of(work
, struct ice_pf
, serv_task
);
2365 unsigned long start_time
= jiffies
;
2369 /* process reset requests first */
2370 ice_reset_subtask(pf
);
2372 /* bail if a reset/recovery cycle is pending or rebuild failed */
2373 if (ice_is_reset_in_progress(pf
->state
) ||
2374 test_bit(ICE_SUSPENDED
, pf
->state
) ||
2375 test_bit(ICE_NEEDS_RESTART
, pf
->state
)) {
2376 ice_service_task_complete(pf
);
2380 if (test_and_clear_bit(ICE_AUX_ERR_PENDING
, pf
->state
)) {
2381 struct iidc_event
*event
;
2383 event
= kzalloc(sizeof(*event
), GFP_KERNEL
);
2385 set_bit(IIDC_EVENT_CRIT_ERR
, event
->type
);
2386 /* report the entire OICR value to AUX driver */
2387 swap(event
->reg
, pf
->oicr_err_reg
);
2388 ice_send_event_to_aux(pf
, event
);
2393 /* unplug aux dev per request, if an unplug request came in
2394 * while processing a plug request, this will handle it
2396 if (test_and_clear_bit(ICE_FLAG_UNPLUG_AUX_DEV
, pf
->flags
))
2397 ice_unplug_aux_dev(pf
);
2399 /* Plug aux device per request */
2400 if (test_and_clear_bit(ICE_FLAG_PLUG_AUX_DEV
, pf
->flags
))
2401 ice_plug_aux_dev(pf
);
2403 if (test_and_clear_bit(ICE_FLAG_MTU_CHANGED
, pf
->flags
)) {
2404 struct iidc_event
*event
;
2406 event
= kzalloc(sizeof(*event
), GFP_KERNEL
);
2408 set_bit(IIDC_EVENT_AFTER_MTU_CHANGE
, event
->type
);
2409 ice_send_event_to_aux(pf
, event
);
2414 ice_clean_adminq_subtask(pf
);
2415 ice_check_media_subtask(pf
);
2416 ice_check_for_hang_subtask(pf
);
2417 ice_sync_fltr_subtask(pf
);
2418 ice_handle_mdd_event(pf
);
2419 ice_watchdog_subtask(pf
);
2421 if (ice_is_safe_mode(pf
)) {
2422 ice_service_task_complete(pf
);
2426 ice_process_vflr_event(pf
);
2427 ice_clean_mailboxq_subtask(pf
);
2428 ice_clean_sbq_subtask(pf
);
2429 ice_sync_arfs_fltrs(pf
);
2430 ice_flush_fdir_ctx(pf
);
2432 /* Clear ICE_SERVICE_SCHED flag to allow scheduling next event */
2433 ice_service_task_complete(pf
);
2435 /* If the tasks have taken longer than one service timer period
2436 * or there is more work to be done, reset the service timer to
2437 * schedule the service task now.
2439 if (time_after(jiffies
, (start_time
+ pf
->serv_tmr_period
)) ||
2440 test_bit(ICE_MDD_EVENT_PENDING
, pf
->state
) ||
2441 test_bit(ICE_VFLR_EVENT_PENDING
, pf
->state
) ||
2442 test_bit(ICE_MAILBOXQ_EVENT_PENDING
, pf
->state
) ||
2443 test_bit(ICE_FD_VF_FLUSH_CTX
, pf
->state
) ||
2444 test_bit(ICE_SIDEBANDQ_EVENT_PENDING
, pf
->state
) ||
2445 test_bit(ICE_ADMINQ_EVENT_PENDING
, pf
->state
))
2446 mod_timer(&pf
->serv_tmr
, jiffies
);
2450 * ice_set_ctrlq_len - helper function to set controlq length
2451 * @hw: pointer to the HW instance
2453 static void ice_set_ctrlq_len(struct ice_hw
*hw
)
2455 hw
->adminq
.num_rq_entries
= ICE_AQ_LEN
;
2456 hw
->adminq
.num_sq_entries
= ICE_AQ_LEN
;
2457 hw
->adminq
.rq_buf_size
= ICE_AQ_MAX_BUF_LEN
;
2458 hw
->adminq
.sq_buf_size
= ICE_AQ_MAX_BUF_LEN
;
2459 hw
->mailboxq
.num_rq_entries
= PF_MBX_ARQLEN_ARQLEN_M
;
2460 hw
->mailboxq
.num_sq_entries
= ICE_MBXSQ_LEN
;
2461 hw
->mailboxq
.rq_buf_size
= ICE_MBXQ_MAX_BUF_LEN
;
2462 hw
->mailboxq
.sq_buf_size
= ICE_MBXQ_MAX_BUF_LEN
;
2463 hw
->sbq
.num_rq_entries
= ICE_SBQ_LEN
;
2464 hw
->sbq
.num_sq_entries
= ICE_SBQ_LEN
;
2465 hw
->sbq
.rq_buf_size
= ICE_SBQ_MAX_BUF_LEN
;
2466 hw
->sbq
.sq_buf_size
= ICE_SBQ_MAX_BUF_LEN
;
2470 * ice_schedule_reset - schedule a reset
2471 * @pf: board private structure
2472 * @reset: reset being requested
2474 int ice_schedule_reset(struct ice_pf
*pf
, enum ice_reset_req reset
)
2476 struct device
*dev
= ice_pf_to_dev(pf
);
2478 /* bail out if earlier reset has failed */
2479 if (test_bit(ICE_RESET_FAILED
, pf
->state
)) {
2480 dev_dbg(dev
, "earlier reset has failed\n");
2483 /* bail if reset/recovery already in progress */
2484 if (ice_is_reset_in_progress(pf
->state
)) {
2485 dev_dbg(dev
, "Reset already in progress\n");
2491 set_bit(ICE_PFR_REQ
, pf
->state
);
2493 case ICE_RESET_CORER
:
2494 set_bit(ICE_CORER_REQ
, pf
->state
);
2496 case ICE_RESET_GLOBR
:
2497 set_bit(ICE_GLOBR_REQ
, pf
->state
);
2503 ice_service_task_schedule(pf
);
2508 * ice_irq_affinity_notify - Callback for affinity changes
2509 * @notify: context as to what irq was changed
2510 * @mask: the new affinity mask
2512 * This is a callback function used by the irq_set_affinity_notifier function
2513 * so that we may register to receive changes to the irq affinity masks.
2516 ice_irq_affinity_notify(struct irq_affinity_notify
*notify
,
2517 const cpumask_t
*mask
)
2519 struct ice_q_vector
*q_vector
=
2520 container_of(notify
, struct ice_q_vector
, affinity_notify
);
2522 cpumask_copy(&q_vector
->affinity_mask
, mask
);
2526 * ice_irq_affinity_release - Callback for affinity notifier release
2527 * @ref: internal core kernel usage
2529 * This is a callback function used by the irq_set_affinity_notifier function
2530 * to inform the current notification subscriber that they will no longer
2531 * receive notifications.
2533 static void ice_irq_affinity_release(struct kref __always_unused
*ref
) {}
2536 * ice_vsi_ena_irq - Enable IRQ for the given VSI
2537 * @vsi: the VSI being configured
2539 static int ice_vsi_ena_irq(struct ice_vsi
*vsi
)
2541 struct ice_hw
*hw
= &vsi
->back
->hw
;
2544 ice_for_each_q_vector(vsi
, i
)
2545 ice_irq_dynamic_ena(hw
, vsi
, vsi
->q_vectors
[i
]);
2552 * ice_vsi_req_irq_msix - get MSI-X vectors from the OS for the VSI
2553 * @vsi: the VSI being configured
2554 * @basename: name for the vector
2556 static int ice_vsi_req_irq_msix(struct ice_vsi
*vsi
, char *basename
)
2558 int q_vectors
= vsi
->num_q_vectors
;
2559 struct ice_pf
*pf
= vsi
->back
;
2566 dev
= ice_pf_to_dev(pf
);
2567 for (vector
= 0; vector
< q_vectors
; vector
++) {
2568 struct ice_q_vector
*q_vector
= vsi
->q_vectors
[vector
];
2570 irq_num
= q_vector
->irq
.virq
;
2572 if (q_vector
->tx
.tx_ring
&& q_vector
->rx
.rx_ring
) {
2573 snprintf(q_vector
->name
, sizeof(q_vector
->name
) - 1,
2574 "%s-%s-%d", basename
, "TxRx", rx_int_idx
++);
2576 } else if (q_vector
->rx
.rx_ring
) {
2577 snprintf(q_vector
->name
, sizeof(q_vector
->name
) - 1,
2578 "%s-%s-%d", basename
, "rx", rx_int_idx
++);
2579 } else if (q_vector
->tx
.tx_ring
) {
2580 snprintf(q_vector
->name
, sizeof(q_vector
->name
) - 1,
2581 "%s-%s-%d", basename
, "tx", tx_int_idx
++);
2583 /* skip this unused q_vector */
2586 if (vsi
->type
== ICE_VSI_CTRL
&& vsi
->vf
)
2587 err
= devm_request_irq(dev
, irq_num
, vsi
->irq_handler
,
2588 IRQF_SHARED
, q_vector
->name
,
2591 err
= devm_request_irq(dev
, irq_num
, vsi
->irq_handler
,
2592 0, q_vector
->name
, q_vector
);
2594 netdev_err(vsi
->netdev
, "MSIX request_irq failed, error: %d\n",
2599 /* register for affinity change notifications */
2600 if (!IS_ENABLED(CONFIG_RFS_ACCEL
)) {
2601 struct irq_affinity_notify
*affinity_notify
;
2603 affinity_notify
= &q_vector
->affinity_notify
;
2604 affinity_notify
->notify
= ice_irq_affinity_notify
;
2605 affinity_notify
->release
= ice_irq_affinity_release
;
2606 irq_set_affinity_notifier(irq_num
, affinity_notify
);
2609 /* assign the mask for this irq */
2610 irq_update_affinity_hint(irq_num
, &q_vector
->affinity_mask
);
2613 err
= ice_set_cpu_rx_rmap(vsi
);
2615 netdev_err(vsi
->netdev
, "Failed to setup CPU RMAP on VSI %u: %pe\n",
2616 vsi
->vsi_num
, ERR_PTR(err
));
2620 vsi
->irqs_ready
= true;
2625 irq_num
= vsi
->q_vectors
[vector
]->irq
.virq
;
2626 if (!IS_ENABLED(CONFIG_RFS_ACCEL
))
2627 irq_set_affinity_notifier(irq_num
, NULL
);
2628 irq_update_affinity_hint(irq_num
, NULL
);
2629 devm_free_irq(dev
, irq_num
, &vsi
->q_vectors
[vector
]);
2635 * ice_xdp_alloc_setup_rings - Allocate and setup Tx rings for XDP
2636 * @vsi: VSI to setup Tx rings used by XDP
2638 * Return 0 on success and negative value on error
2640 static int ice_xdp_alloc_setup_rings(struct ice_vsi
*vsi
)
2642 struct device
*dev
= ice_pf_to_dev(vsi
->back
);
2643 struct ice_tx_desc
*tx_desc
;
2646 ice_for_each_xdp_txq(vsi
, i
) {
2647 u16 xdp_q_idx
= vsi
->alloc_txq
+ i
;
2648 struct ice_ring_stats
*ring_stats
;
2649 struct ice_tx_ring
*xdp_ring
;
2651 xdp_ring
= kzalloc(sizeof(*xdp_ring
), GFP_KERNEL
);
2653 goto free_xdp_rings
;
2655 ring_stats
= kzalloc(sizeof(*ring_stats
), GFP_KERNEL
);
2657 ice_free_tx_ring(xdp_ring
);
2658 goto free_xdp_rings
;
2661 xdp_ring
->ring_stats
= ring_stats
;
2662 xdp_ring
->q_index
= xdp_q_idx
;
2663 xdp_ring
->reg_idx
= vsi
->txq_map
[xdp_q_idx
];
2664 xdp_ring
->vsi
= vsi
;
2665 xdp_ring
->netdev
= NULL
;
2666 xdp_ring
->dev
= dev
;
2667 xdp_ring
->count
= vsi
->num_tx_desc
;
2668 WRITE_ONCE(vsi
->xdp_rings
[i
], xdp_ring
);
2669 if (ice_setup_tx_ring(xdp_ring
))
2670 goto free_xdp_rings
;
2671 ice_set_ring_xdp(xdp_ring
);
2672 spin_lock_init(&xdp_ring
->tx_lock
);
2673 for (j
= 0; j
< xdp_ring
->count
; j
++) {
2674 tx_desc
= ICE_TX_DESC(xdp_ring
, j
);
2675 tx_desc
->cmd_type_offset_bsz
= 0;
2682 for (; i
>= 0; i
--) {
2683 if (vsi
->xdp_rings
[i
] && vsi
->xdp_rings
[i
]->desc
) {
2684 kfree_rcu(vsi
->xdp_rings
[i
]->ring_stats
, rcu
);
2685 vsi
->xdp_rings
[i
]->ring_stats
= NULL
;
2686 ice_free_tx_ring(vsi
->xdp_rings
[i
]);
2693 * ice_vsi_assign_bpf_prog - set or clear bpf prog pointer on VSI
2694 * @vsi: VSI to set the bpf prog on
2695 * @prog: the bpf prog pointer
2697 static void ice_vsi_assign_bpf_prog(struct ice_vsi
*vsi
, struct bpf_prog
*prog
)
2699 struct bpf_prog
*old_prog
;
2702 old_prog
= xchg(&vsi
->xdp_prog
, prog
);
2703 ice_for_each_rxq(vsi
, i
)
2704 WRITE_ONCE(vsi
->rx_rings
[i
]->xdp_prog
, vsi
->xdp_prog
);
2707 bpf_prog_put(old_prog
);
2710 static struct ice_tx_ring
*ice_xdp_ring_from_qid(struct ice_vsi
*vsi
, int qid
)
2712 struct ice_q_vector
*q_vector
;
2713 struct ice_tx_ring
*ring
;
2715 if (static_key_enabled(&ice_xdp_locking_key
))
2716 return vsi
->xdp_rings
[qid
% vsi
->num_xdp_txq
];
2718 q_vector
= vsi
->rx_rings
[qid
]->q_vector
;
2719 ice_for_each_tx_ring(ring
, q_vector
->tx
)
2720 if (ice_ring_is_xdp(ring
))
2727 * ice_map_xdp_rings - Map XDP rings to interrupt vectors
2728 * @vsi: the VSI with XDP rings being configured
2730 * Map XDP rings to interrupt vectors and perform the configuration steps
2731 * dependent on the mapping.
2733 void ice_map_xdp_rings(struct ice_vsi
*vsi
)
2735 int xdp_rings_rem
= vsi
->num_xdp_txq
;
2738 /* follow the logic from ice_vsi_map_rings_to_vectors */
2739 ice_for_each_q_vector(vsi
, v_idx
) {
2740 struct ice_q_vector
*q_vector
= vsi
->q_vectors
[v_idx
];
2741 int xdp_rings_per_v
, q_id
, q_base
;
2743 xdp_rings_per_v
= DIV_ROUND_UP(xdp_rings_rem
,
2744 vsi
->num_q_vectors
- v_idx
);
2745 q_base
= vsi
->num_xdp_txq
- xdp_rings_rem
;
2747 for (q_id
= q_base
; q_id
< (q_base
+ xdp_rings_per_v
); q_id
++) {
2748 struct ice_tx_ring
*xdp_ring
= vsi
->xdp_rings
[q_id
];
2750 xdp_ring
->q_vector
= q_vector
;
2751 xdp_ring
->next
= q_vector
->tx
.tx_ring
;
2752 q_vector
->tx
.tx_ring
= xdp_ring
;
2754 xdp_rings_rem
-= xdp_rings_per_v
;
2757 ice_for_each_rxq(vsi
, q_idx
) {
2758 vsi
->rx_rings
[q_idx
]->xdp_ring
= ice_xdp_ring_from_qid(vsi
,
2760 ice_tx_xsk_pool(vsi
, q_idx
);
2765 * ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP
2766 * @vsi: VSI to bring up Tx rings used by XDP
2767 * @prog: bpf program that will be assigned to VSI
2768 * @cfg_type: create from scratch or restore the existing configuration
2770 * Return 0 on success and negative value on error
2772 int ice_prepare_xdp_rings(struct ice_vsi
*vsi
, struct bpf_prog
*prog
,
2773 enum ice_xdp_cfg cfg_type
)
2775 u16 max_txqs
[ICE_MAX_TRAFFIC_CLASS
] = { 0 };
2776 struct ice_pf
*pf
= vsi
->back
;
2777 struct ice_qs_cfg xdp_qs_cfg
= {
2778 .qs_mutex
= &pf
->avail_q_mutex
,
2779 .pf_map
= pf
->avail_txqs
,
2780 .pf_map_size
= pf
->max_pf_txqs
,
2781 .q_count
= vsi
->num_xdp_txq
,
2782 .scatter_count
= ICE_MAX_SCATTER_TXQS
,
2783 .vsi_map
= vsi
->txq_map
,
2784 .vsi_map_offset
= vsi
->alloc_txq
,
2785 .mapping_mode
= ICE_VSI_MAP_CONTIG
2790 dev
= ice_pf_to_dev(pf
);
2791 vsi
->xdp_rings
= devm_kcalloc(dev
, vsi
->num_xdp_txq
,
2792 sizeof(*vsi
->xdp_rings
), GFP_KERNEL
);
2793 if (!vsi
->xdp_rings
)
2796 vsi
->xdp_mapping_mode
= xdp_qs_cfg
.mapping_mode
;
2797 if (__ice_vsi_get_qs(&xdp_qs_cfg
))
2800 if (static_key_enabled(&ice_xdp_locking_key
))
2801 netdev_warn(vsi
->netdev
,
2802 "Could not allocate one XDP Tx ring per CPU, XDP_TX/XDP_REDIRECT actions will be slower\n");
2804 if (ice_xdp_alloc_setup_rings(vsi
))
2805 goto clear_xdp_rings
;
2807 /* omit the scheduler update if in reset path; XDP queues will be
2808 * taken into account at the end of ice_vsi_rebuild, where
2809 * ice_cfg_vsi_lan is being called
2811 if (cfg_type
== ICE_XDP_CFG_PART
)
2814 ice_map_xdp_rings(vsi
);
2816 /* tell the Tx scheduler that right now we have
2819 for (i
= 0; i
< vsi
->tc_cfg
.numtc
; i
++)
2820 max_txqs
[i
] = vsi
->num_txq
+ vsi
->num_xdp_txq
;
2822 status
= ice_cfg_vsi_lan(vsi
->port_info
, vsi
->idx
, vsi
->tc_cfg
.ena_tc
,
2825 dev_err(dev
, "Failed VSI LAN queue config for XDP, error: %d\n",
2827 goto clear_xdp_rings
;
2830 /* assign the prog only when it's not already present on VSI;
2831 * this flow is a subject of both ethtool -L and ndo_bpf flows;
2832 * VSI rebuild that happens under ethtool -L can expose us to
2833 * the bpf_prog refcount issues as we would be swapping same
2834 * bpf_prog pointers from vsi->xdp_prog and calling bpf_prog_put
2835 * on it as it would be treated as an 'old_prog'; for ndo_bpf
2836 * this is not harmful as dev_xdp_install bumps the refcount
2837 * before calling the op exposed by the driver;
2839 if (!ice_is_xdp_ena_vsi(vsi
))
2840 ice_vsi_assign_bpf_prog(vsi
, prog
);
2844 ice_for_each_xdp_txq(vsi
, i
)
2845 if (vsi
->xdp_rings
[i
]) {
2846 kfree_rcu(vsi
->xdp_rings
[i
], rcu
);
2847 vsi
->xdp_rings
[i
] = NULL
;
2851 mutex_lock(&pf
->avail_q_mutex
);
2852 ice_for_each_xdp_txq(vsi
, i
) {
2853 clear_bit(vsi
->txq_map
[i
+ vsi
->alloc_txq
], pf
->avail_txqs
);
2854 vsi
->txq_map
[i
+ vsi
->alloc_txq
] = ICE_INVAL_Q_INDEX
;
2856 mutex_unlock(&pf
->avail_q_mutex
);
2858 devm_kfree(dev
, vsi
->xdp_rings
);
2863 * ice_destroy_xdp_rings - undo the configuration made by ice_prepare_xdp_rings
2864 * @vsi: VSI to remove XDP rings
2865 * @cfg_type: disable XDP permanently or allow it to be restored later
2867 * Detach XDP rings from irq vectors, clean up the PF bitmap and free
2870 int ice_destroy_xdp_rings(struct ice_vsi
*vsi
, enum ice_xdp_cfg cfg_type
)
2872 u16 max_txqs
[ICE_MAX_TRAFFIC_CLASS
] = { 0 };
2873 struct ice_pf
*pf
= vsi
->back
;
2876 /* q_vectors are freed in reset path so there's no point in detaching
2879 if (cfg_type
== ICE_XDP_CFG_PART
)
2882 ice_for_each_q_vector(vsi
, v_idx
) {
2883 struct ice_q_vector
*q_vector
= vsi
->q_vectors
[v_idx
];
2884 struct ice_tx_ring
*ring
;
2886 ice_for_each_tx_ring(ring
, q_vector
->tx
)
2887 if (!ring
->tx_buf
|| !ice_ring_is_xdp(ring
))
2890 /* restore the value of last node prior to XDP setup */
2891 q_vector
->tx
.tx_ring
= ring
;
2895 mutex_lock(&pf
->avail_q_mutex
);
2896 ice_for_each_xdp_txq(vsi
, i
) {
2897 clear_bit(vsi
->txq_map
[i
+ vsi
->alloc_txq
], pf
->avail_txqs
);
2898 vsi
->txq_map
[i
+ vsi
->alloc_txq
] = ICE_INVAL_Q_INDEX
;
2900 mutex_unlock(&pf
->avail_q_mutex
);
2902 ice_for_each_xdp_txq(vsi
, i
)
2903 if (vsi
->xdp_rings
[i
]) {
2904 if (vsi
->xdp_rings
[i
]->desc
) {
2906 ice_free_tx_ring(vsi
->xdp_rings
[i
]);
2908 kfree_rcu(vsi
->xdp_rings
[i
]->ring_stats
, rcu
);
2909 vsi
->xdp_rings
[i
]->ring_stats
= NULL
;
2910 kfree_rcu(vsi
->xdp_rings
[i
], rcu
);
2911 vsi
->xdp_rings
[i
] = NULL
;
2914 devm_kfree(ice_pf_to_dev(pf
), vsi
->xdp_rings
);
2915 vsi
->xdp_rings
= NULL
;
2917 if (static_key_enabled(&ice_xdp_locking_key
))
2918 static_branch_dec(&ice_xdp_locking_key
);
2920 if (cfg_type
== ICE_XDP_CFG_PART
)
2923 ice_vsi_assign_bpf_prog(vsi
, NULL
);
2925 /* notify Tx scheduler that we destroyed XDP queues and bring
2926 * back the old number of child nodes
2928 for (i
= 0; i
< vsi
->tc_cfg
.numtc
; i
++)
2929 max_txqs
[i
] = vsi
->num_txq
;
2931 /* change number of XDP Tx queues to 0 */
2932 vsi
->num_xdp_txq
= 0;
2934 return ice_cfg_vsi_lan(vsi
->port_info
, vsi
->idx
, vsi
->tc_cfg
.ena_tc
,
2939 * ice_vsi_rx_napi_schedule - Schedule napi on RX queues from VSI
2940 * @vsi: VSI to schedule napi on
2942 static void ice_vsi_rx_napi_schedule(struct ice_vsi
*vsi
)
2946 ice_for_each_rxq(vsi
, i
) {
2947 struct ice_rx_ring
*rx_ring
= vsi
->rx_rings
[i
];
2949 if (READ_ONCE(rx_ring
->xsk_pool
))
2950 napi_schedule(&rx_ring
->q_vector
->napi
);
2955 * ice_vsi_determine_xdp_res - figure out how many Tx qs can XDP have
2956 * @vsi: VSI to determine the count of XDP Tx qs
2958 * returns 0 if Tx qs count is higher than at least half of CPU count,
2961 int ice_vsi_determine_xdp_res(struct ice_vsi
*vsi
)
2963 u16 avail
= ice_get_avail_txq_count(vsi
->back
);
2964 u16 cpus
= num_possible_cpus();
2966 if (avail
< cpus
/ 2)
2969 if (vsi
->type
== ICE_VSI_SF
)
2970 avail
= vsi
->alloc_txq
;
2972 vsi
->num_xdp_txq
= min_t(u16
, avail
, cpus
);
2974 if (vsi
->num_xdp_txq
< cpus
)
2975 static_branch_inc(&ice_xdp_locking_key
);
2981 * ice_max_xdp_frame_size - returns the maximum allowed frame size for XDP
2982 * @vsi: Pointer to VSI structure
2984 static int ice_max_xdp_frame_size(struct ice_vsi
*vsi
)
2986 if (test_bit(ICE_FLAG_LEGACY_RX
, vsi
->back
->flags
))
2987 return ICE_RXBUF_1664
;
2989 return ICE_RXBUF_3072
;
2993 * ice_xdp_setup_prog - Add or remove XDP eBPF program
2994 * @vsi: VSI to setup XDP for
2995 * @prog: XDP program
2996 * @extack: netlink extended ack
2999 ice_xdp_setup_prog(struct ice_vsi
*vsi
, struct bpf_prog
*prog
,
3000 struct netlink_ext_ack
*extack
)
3002 unsigned int frame_size
= vsi
->netdev
->mtu
+ ICE_ETH_PKT_HDR_PAD
;
3003 int ret
= 0, xdp_ring_err
= 0;
3006 if (prog
&& !prog
->aux
->xdp_has_frags
) {
3007 if (frame_size
> ice_max_xdp_frame_size(vsi
)) {
3008 NL_SET_ERR_MSG_MOD(extack
,
3009 "MTU is too large for linear frames and XDP prog does not support frags");
3014 /* hot swap progs and avoid toggling link */
3015 if (ice_is_xdp_ena_vsi(vsi
) == !!prog
||
3016 test_bit(ICE_VSI_REBUILD_PENDING
, vsi
->state
)) {
3017 ice_vsi_assign_bpf_prog(vsi
, prog
);
3021 if_running
= netif_running(vsi
->netdev
) &&
3022 !test_and_set_bit(ICE_VSI_DOWN
, vsi
->state
);
3024 /* need to stop netdev while setting up the program for Rx rings */
3026 ret
= ice_down(vsi
);
3028 NL_SET_ERR_MSG_MOD(extack
, "Preparing device for XDP attach failed");
3033 if (!ice_is_xdp_ena_vsi(vsi
) && prog
) {
3034 xdp_ring_err
= ice_vsi_determine_xdp_res(vsi
);
3036 NL_SET_ERR_MSG_MOD(extack
, "Not enough Tx resources for XDP");
3038 xdp_ring_err
= ice_prepare_xdp_rings(vsi
, prog
,
3041 NL_SET_ERR_MSG_MOD(extack
, "Setting up XDP Tx resources failed");
3043 xdp_features_set_redirect_target(vsi
->netdev
, true);
3044 /* reallocate Rx queues that are used for zero-copy */
3045 xdp_ring_err
= ice_realloc_zc_buf(vsi
, true);
3047 NL_SET_ERR_MSG_MOD(extack
, "Setting up XDP Rx resources failed");
3048 } else if (ice_is_xdp_ena_vsi(vsi
) && !prog
) {
3049 xdp_features_clear_redirect_target(vsi
->netdev
);
3050 xdp_ring_err
= ice_destroy_xdp_rings(vsi
, ICE_XDP_CFG_FULL
);
3052 NL_SET_ERR_MSG_MOD(extack
, "Freeing XDP Tx resources failed");
3053 /* reallocate Rx queues that were used for zero-copy */
3054 xdp_ring_err
= ice_realloc_zc_buf(vsi
, false);
3056 NL_SET_ERR_MSG_MOD(extack
, "Freeing XDP Rx resources failed");
3063 ice_vsi_rx_napi_schedule(vsi
);
3065 return (ret
|| xdp_ring_err
) ? -ENOMEM
: 0;
3069 * ice_xdp_safe_mode - XDP handler for safe mode
3073 static int ice_xdp_safe_mode(struct net_device __always_unused
*dev
,
3074 struct netdev_bpf
*xdp
)
3076 NL_SET_ERR_MSG_MOD(xdp
->extack
,
3077 "Please provide working DDP firmware package in order to use XDP\n"
3078 "Refer to Documentation/networking/device_drivers/ethernet/intel/ice.rst");
3083 * ice_xdp - implements XDP handler
3087 int ice_xdp(struct net_device
*dev
, struct netdev_bpf
*xdp
)
3089 struct ice_netdev_priv
*np
= netdev_priv(dev
);
3090 struct ice_vsi
*vsi
= np
->vsi
;
3093 if (vsi
->type
!= ICE_VSI_PF
&& vsi
->type
!= ICE_VSI_SF
) {
3094 NL_SET_ERR_MSG_MOD(xdp
->extack
, "XDP can be loaded only on PF or SF VSI");
3098 mutex_lock(&vsi
->xdp_state_lock
);
3100 switch (xdp
->command
) {
3101 case XDP_SETUP_PROG
:
3102 ret
= ice_xdp_setup_prog(vsi
, xdp
->prog
, xdp
->extack
);
3104 case XDP_SETUP_XSK_POOL
:
3105 ret
= ice_xsk_pool_setup(vsi
, xdp
->xsk
.pool
, xdp
->xsk
.queue_id
);
3111 mutex_unlock(&vsi
->xdp_state_lock
);
3116 * ice_ena_misc_vector - enable the non-queue interrupts
3117 * @pf: board private structure
3119 static void ice_ena_misc_vector(struct ice_pf
*pf
)
3121 struct ice_hw
*hw
= &pf
->hw
;
3122 u32 pf_intr_start_offset
;
3125 /* Disable anti-spoof detection interrupt to prevent spurious event
3126 * interrupts during a function reset. Anti-spoof functionally is
3129 val
= rd32(hw
, GL_MDCK_TX_TDPU
);
3130 val
|= GL_MDCK_TX_TDPU_RCU_ANTISPOOF_ITR_DIS_M
;
3131 wr32(hw
, GL_MDCK_TX_TDPU
, val
);
3133 /* clear things first */
3134 wr32(hw
, PFINT_OICR_ENA
, 0); /* disable all */
3135 rd32(hw
, PFINT_OICR
); /* read to clear */
3137 val
= (PFINT_OICR_ECC_ERR_M
|
3138 PFINT_OICR_MAL_DETECT_M
|
3140 PFINT_OICR_PCI_EXCEPTION_M
|
3142 PFINT_OICR_HMC_ERR_M
|
3143 PFINT_OICR_PE_PUSH_M
|
3144 PFINT_OICR_PE_CRITERR_M
);
3146 wr32(hw
, PFINT_OICR_ENA
, val
);
3148 /* SW_ITR_IDX = 0, but don't change INTENA */
3149 wr32(hw
, GLINT_DYN_CTL(pf
->oicr_irq
.index
),
3150 GLINT_DYN_CTL_SW_ITR_INDX_M
| GLINT_DYN_CTL_INTENA_MSK_M
);
3152 if (!pf
->hw
.dev_caps
.ts_dev_info
.ts_ll_int_read
)
3154 pf_intr_start_offset
= rd32(hw
, PFINT_ALLOC
) & PFINT_ALLOC_FIRST
;
3155 wr32(hw
, GLINT_DYN_CTL(pf
->ll_ts_irq
.index
+ pf_intr_start_offset
),
3156 GLINT_DYN_CTL_SW_ITR_INDX_M
| GLINT_DYN_CTL_INTENA_MSK_M
);
3160 * ice_ll_ts_intr - ll_ts interrupt handler
3161 * @irq: interrupt number
3162 * @data: pointer to a q_vector
3164 static irqreturn_t
ice_ll_ts_intr(int __always_unused irq
, void *data
)
3166 struct ice_pf
*pf
= data
;
3167 u32 pf_intr_start_offset
;
3168 struct ice_ptp_tx
*tx
;
3169 unsigned long flags
;
3175 tx
= &pf
->ptp
.port
.tx
;
3176 spin_lock_irqsave(&tx
->lock
, flags
);
3177 ice_ptp_complete_tx_single_tstamp(tx
);
3179 idx
= find_next_bit_wrap(tx
->in_use
, tx
->len
,
3180 tx
->last_ll_ts_idx_read
+ 1);
3182 ice_ptp_req_tx_single_tstamp(tx
, idx
);
3183 spin_unlock_irqrestore(&tx
->lock
, flags
);
3185 val
= GLINT_DYN_CTL_INTENA_M
| GLINT_DYN_CTL_CLEARPBA_M
|
3186 (ICE_ITR_NONE
<< GLINT_DYN_CTL_ITR_INDX_S
);
3187 pf_intr_start_offset
= rd32(hw
, PFINT_ALLOC
) & PFINT_ALLOC_FIRST
;
3188 wr32(hw
, GLINT_DYN_CTL(pf
->ll_ts_irq
.index
+ pf_intr_start_offset
),
3195 * ice_misc_intr - misc interrupt handler
3196 * @irq: interrupt number
3197 * @data: pointer to a q_vector
3199 static irqreturn_t
ice_misc_intr(int __always_unused irq
, void *data
)
3201 struct ice_pf
*pf
= (struct ice_pf
*)data
;
3202 irqreturn_t ret
= IRQ_HANDLED
;
3203 struct ice_hw
*hw
= &pf
->hw
;
3207 dev
= ice_pf_to_dev(pf
);
3208 set_bit(ICE_ADMINQ_EVENT_PENDING
, pf
->state
);
3209 set_bit(ICE_MAILBOXQ_EVENT_PENDING
, pf
->state
);
3210 set_bit(ICE_SIDEBANDQ_EVENT_PENDING
, pf
->state
);
3212 oicr
= rd32(hw
, PFINT_OICR
);
3213 ena_mask
= rd32(hw
, PFINT_OICR_ENA
);
3215 if (oicr
& PFINT_OICR_SWINT_M
) {
3216 ena_mask
&= ~PFINT_OICR_SWINT_M
;
3220 if (oicr
& PFINT_OICR_MAL_DETECT_M
) {
3221 ena_mask
&= ~PFINT_OICR_MAL_DETECT_M
;
3222 set_bit(ICE_MDD_EVENT_PENDING
, pf
->state
);
3224 if (oicr
& PFINT_OICR_VFLR_M
) {
3225 /* disable any further VFLR event notifications */
3226 if (test_bit(ICE_VF_RESETS_DISABLED
, pf
->state
)) {
3227 u32 reg
= rd32(hw
, PFINT_OICR_ENA
);
3229 reg
&= ~PFINT_OICR_VFLR_M
;
3230 wr32(hw
, PFINT_OICR_ENA
, reg
);
3232 ena_mask
&= ~PFINT_OICR_VFLR_M
;
3233 set_bit(ICE_VFLR_EVENT_PENDING
, pf
->state
);
3237 if (oicr
& PFINT_OICR_GRST_M
) {
3240 /* we have a reset warning */
3241 ena_mask
&= ~PFINT_OICR_GRST_M
;
3242 reset
= FIELD_GET(GLGEN_RSTAT_RESET_TYPE_M
,
3243 rd32(hw
, GLGEN_RSTAT
));
3245 if (reset
== ICE_RESET_CORER
)
3247 else if (reset
== ICE_RESET_GLOBR
)
3249 else if (reset
== ICE_RESET_EMPR
)
3252 dev_dbg(dev
, "Invalid reset type %d\n", reset
);
3254 /* If a reset cycle isn't already in progress, we set a bit in
3255 * pf->state so that the service task can start a reset/rebuild.
3257 if (!test_and_set_bit(ICE_RESET_OICR_RECV
, pf
->state
)) {
3258 if (reset
== ICE_RESET_CORER
)
3259 set_bit(ICE_CORER_RECV
, pf
->state
);
3260 else if (reset
== ICE_RESET_GLOBR
)
3261 set_bit(ICE_GLOBR_RECV
, pf
->state
);
3263 set_bit(ICE_EMPR_RECV
, pf
->state
);
3265 /* There are couple of different bits at play here.
3266 * hw->reset_ongoing indicates whether the hardware is
3267 * in reset. This is set to true when a reset interrupt
3268 * is received and set back to false after the driver
3269 * has determined that the hardware is out of reset.
3271 * ICE_RESET_OICR_RECV in pf->state indicates
3272 * that a post reset rebuild is required before the
3273 * driver is operational again. This is set above.
3275 * As this is the start of the reset/rebuild cycle, set
3276 * both to indicate that.
3278 hw
->reset_ongoing
= true;
3282 if (oicr
& PFINT_OICR_TSYN_TX_M
) {
3283 ena_mask
&= ~PFINT_OICR_TSYN_TX_M
;
3284 if (ice_pf_state_is_nominal(pf
) &&
3285 pf
->hw
.dev_caps
.ts_dev_info
.ts_ll_int_read
) {
3286 struct ice_ptp_tx
*tx
= &pf
->ptp
.port
.tx
;
3287 unsigned long flags
;
3290 spin_lock_irqsave(&tx
->lock
, flags
);
3291 idx
= find_next_bit_wrap(tx
->in_use
, tx
->len
,
3292 tx
->last_ll_ts_idx_read
+ 1);
3294 ice_ptp_req_tx_single_tstamp(tx
, idx
);
3295 spin_unlock_irqrestore(&tx
->lock
, flags
);
3296 } else if (ice_ptp_pf_handles_tx_interrupt(pf
)) {
3297 set_bit(ICE_MISC_THREAD_TX_TSTAMP
, pf
->misc_thread
);
3298 ret
= IRQ_WAKE_THREAD
;
3302 if (oicr
& PFINT_OICR_TSYN_EVNT_M
) {
3303 u8 tmr_idx
= hw
->func_caps
.ts_func_info
.tmr_index_owned
;
3304 u32 gltsyn_stat
= rd32(hw
, GLTSYN_STAT(tmr_idx
));
3306 ena_mask
&= ~PFINT_OICR_TSYN_EVNT_M
;
3308 if (ice_pf_src_tmr_owned(pf
)) {
3309 /* Save EVENTs from GLTSYN register */
3310 pf
->ptp
.ext_ts_irq
|= gltsyn_stat
&
3311 (GLTSYN_STAT_EVENT0_M
|
3312 GLTSYN_STAT_EVENT1_M
|
3313 GLTSYN_STAT_EVENT2_M
);
3315 ice_ptp_extts_event(pf
);
3319 #define ICE_AUX_CRIT_ERR (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_HMC_ERR_M | PFINT_OICR_PE_PUSH_M)
3320 if (oicr
& ICE_AUX_CRIT_ERR
) {
3321 pf
->oicr_err_reg
|= oicr
;
3322 set_bit(ICE_AUX_ERR_PENDING
, pf
->state
);
3323 ena_mask
&= ~ICE_AUX_CRIT_ERR
;
3326 /* Report any remaining unexpected interrupts */
3329 dev_dbg(dev
, "unhandled interrupt oicr=0x%08x\n", oicr
);
3330 /* If a critical error is pending there is no choice but to
3333 if (oicr
& (PFINT_OICR_PCI_EXCEPTION_M
|
3334 PFINT_OICR_ECC_ERR_M
)) {
3335 set_bit(ICE_PFR_REQ
, pf
->state
);
3338 ice_service_task_schedule(pf
);
3339 if (ret
== IRQ_HANDLED
)
3340 ice_irq_dynamic_ena(hw
, NULL
, NULL
);
3346 * ice_misc_intr_thread_fn - misc interrupt thread function
3347 * @irq: interrupt number
3348 * @data: pointer to a q_vector
3350 static irqreturn_t
ice_misc_intr_thread_fn(int __always_unused irq
, void *data
)
3352 struct ice_pf
*pf
= data
;
3357 if (ice_is_reset_in_progress(pf
->state
))
3360 if (test_and_clear_bit(ICE_MISC_THREAD_TX_TSTAMP
, pf
->misc_thread
)) {
3361 /* Process outstanding Tx timestamps. If there is more work,
3362 * re-arm the interrupt to trigger again.
3364 if (ice_ptp_process_ts(pf
) == ICE_TX_TSTAMP_WORK_PENDING
) {
3365 wr32(hw
, PFINT_OICR
, PFINT_OICR_TSYN_TX_M
);
3371 ice_irq_dynamic_ena(hw
, NULL
, NULL
);
3377 * ice_dis_ctrlq_interrupts - disable control queue interrupts
3378 * @hw: pointer to HW structure
3380 static void ice_dis_ctrlq_interrupts(struct ice_hw
*hw
)
3382 /* disable Admin queue Interrupt causes */
3383 wr32(hw
, PFINT_FW_CTL
,
3384 rd32(hw
, PFINT_FW_CTL
) & ~PFINT_FW_CTL_CAUSE_ENA_M
);
3386 /* disable Mailbox queue Interrupt causes */
3387 wr32(hw
, PFINT_MBX_CTL
,
3388 rd32(hw
, PFINT_MBX_CTL
) & ~PFINT_MBX_CTL_CAUSE_ENA_M
);
3390 wr32(hw
, PFINT_SB_CTL
,
3391 rd32(hw
, PFINT_SB_CTL
) & ~PFINT_SB_CTL_CAUSE_ENA_M
);
3393 /* disable Control queue Interrupt causes */
3394 wr32(hw
, PFINT_OICR_CTL
,
3395 rd32(hw
, PFINT_OICR_CTL
) & ~PFINT_OICR_CTL_CAUSE_ENA_M
);
3401 * ice_free_irq_msix_ll_ts- Unroll ll_ts vector setup
3402 * @pf: board private structure
3404 static void ice_free_irq_msix_ll_ts(struct ice_pf
*pf
)
3406 int irq_num
= pf
->ll_ts_irq
.virq
;
3408 synchronize_irq(irq_num
);
3409 devm_free_irq(ice_pf_to_dev(pf
), irq_num
, pf
);
3411 ice_free_irq(pf
, pf
->ll_ts_irq
);
3415 * ice_free_irq_msix_misc - Unroll misc vector setup
3416 * @pf: board private structure
3418 static void ice_free_irq_msix_misc(struct ice_pf
*pf
)
3420 int misc_irq_num
= pf
->oicr_irq
.virq
;
3421 struct ice_hw
*hw
= &pf
->hw
;
3423 ice_dis_ctrlq_interrupts(hw
);
3425 /* disable OICR interrupt */
3426 wr32(hw
, PFINT_OICR_ENA
, 0);
3429 synchronize_irq(misc_irq_num
);
3430 devm_free_irq(ice_pf_to_dev(pf
), misc_irq_num
, pf
);
3432 ice_free_irq(pf
, pf
->oicr_irq
);
3433 if (pf
->hw
.dev_caps
.ts_dev_info
.ts_ll_int_read
)
3434 ice_free_irq_msix_ll_ts(pf
);
3438 * ice_ena_ctrlq_interrupts - enable control queue interrupts
3439 * @hw: pointer to HW structure
3440 * @reg_idx: HW vector index to associate the control queue interrupts with
3442 static void ice_ena_ctrlq_interrupts(struct ice_hw
*hw
, u16 reg_idx
)
3446 val
= ((reg_idx
& PFINT_OICR_CTL_MSIX_INDX_M
) |
3447 PFINT_OICR_CTL_CAUSE_ENA_M
);
3448 wr32(hw
, PFINT_OICR_CTL
, val
);
3450 /* enable Admin queue Interrupt causes */
3451 val
= ((reg_idx
& PFINT_FW_CTL_MSIX_INDX_M
) |
3452 PFINT_FW_CTL_CAUSE_ENA_M
);
3453 wr32(hw
, PFINT_FW_CTL
, val
);
3455 /* enable Mailbox queue Interrupt causes */
3456 val
= ((reg_idx
& PFINT_MBX_CTL_MSIX_INDX_M
) |
3457 PFINT_MBX_CTL_CAUSE_ENA_M
);
3458 wr32(hw
, PFINT_MBX_CTL
, val
);
3460 if (!hw
->dev_caps
.ts_dev_info
.ts_ll_int_read
) {
3461 /* enable Sideband queue Interrupt causes */
3462 val
= ((reg_idx
& PFINT_SB_CTL_MSIX_INDX_M
) |
3463 PFINT_SB_CTL_CAUSE_ENA_M
);
3464 wr32(hw
, PFINT_SB_CTL
, val
);
3471 * ice_req_irq_msix_misc - Setup the misc vector to handle non queue events
3472 * @pf: board private structure
3474 * This sets up the handler for MSIX 0, which is used to manage the
3475 * non-queue interrupts, e.g. AdminQ and errors. This is not used
3476 * when in MSI or Legacy interrupt mode.
3478 static int ice_req_irq_msix_misc(struct ice_pf
*pf
)
3480 struct device
*dev
= ice_pf_to_dev(pf
);
3481 struct ice_hw
*hw
= &pf
->hw
;
3482 u32 pf_intr_start_offset
;
3486 if (!pf
->int_name
[0])
3487 snprintf(pf
->int_name
, sizeof(pf
->int_name
) - 1, "%s-%s:misc",
3488 dev_driver_string(dev
), dev_name(dev
));
3490 if (!pf
->int_name_ll_ts
[0])
3491 snprintf(pf
->int_name_ll_ts
, sizeof(pf
->int_name_ll_ts
) - 1,
3492 "%s-%s:ll_ts", dev_driver_string(dev
), dev_name(dev
));
3493 /* Do not request IRQ but do enable OICR interrupt since settings are
3494 * lost during reset. Note that this function is called only during
3495 * rebuild path and not while reset is in progress.
3497 if (ice_is_reset_in_progress(pf
->state
))
3500 /* reserve one vector in irq_tracker for misc interrupts */
3501 irq
= ice_alloc_irq(pf
, false);
3506 err
= devm_request_threaded_irq(dev
, pf
->oicr_irq
.virq
, ice_misc_intr
,
3507 ice_misc_intr_thread_fn
, 0,
3510 dev_err(dev
, "devm_request_threaded_irq for %s failed: %d\n",
3512 ice_free_irq(pf
, pf
->oicr_irq
);
3516 /* reserve one vector in irq_tracker for ll_ts interrupt */
3517 if (!pf
->hw
.dev_caps
.ts_dev_info
.ts_ll_int_read
)
3520 irq
= ice_alloc_irq(pf
, false);
3524 pf
->ll_ts_irq
= irq
;
3525 err
= devm_request_irq(dev
, pf
->ll_ts_irq
.virq
, ice_ll_ts_intr
, 0,
3526 pf
->int_name_ll_ts
, pf
);
3528 dev_err(dev
, "devm_request_irq for %s failed: %d\n",
3529 pf
->int_name_ll_ts
, err
);
3530 ice_free_irq(pf
, pf
->ll_ts_irq
);
3535 ice_ena_misc_vector(pf
);
3537 ice_ena_ctrlq_interrupts(hw
, pf
->oicr_irq
.index
);
3538 /* This enables LL TS interrupt */
3539 pf_intr_start_offset
= rd32(hw
, PFINT_ALLOC
) & PFINT_ALLOC_FIRST
;
3540 if (pf
->hw
.dev_caps
.ts_dev_info
.ts_ll_int_read
)
3541 wr32(hw
, PFINT_SB_CTL
,
3542 ((pf
->ll_ts_irq
.index
+ pf_intr_start_offset
) &
3543 PFINT_SB_CTL_MSIX_INDX_M
) | PFINT_SB_CTL_CAUSE_ENA_M
);
3544 wr32(hw
, GLINT_ITR(ICE_RX_ITR
, pf
->oicr_irq
.index
),
3545 ITR_REG_ALIGN(ICE_ITR_8K
) >> ICE_ITR_GRAN_S
);
3548 ice_irq_dynamic_ena(hw
, NULL
, NULL
);
3554 * ice_set_ops - set netdev and ethtools ops for the given netdev
3555 * @vsi: the VSI associated with the new netdev
3557 static void ice_set_ops(struct ice_vsi
*vsi
)
3559 struct net_device
*netdev
= vsi
->netdev
;
3560 struct ice_pf
*pf
= ice_netdev_to_pf(netdev
);
3562 if (ice_is_safe_mode(pf
)) {
3563 netdev
->netdev_ops
= &ice_netdev_safe_mode_ops
;
3564 ice_set_ethtool_safe_mode_ops(netdev
);
3568 netdev
->netdev_ops
= &ice_netdev_ops
;
3569 netdev
->udp_tunnel_nic_info
= &pf
->hw
.udp_tunnel_nic
;
3570 netdev
->xdp_metadata_ops
= &ice_xdp_md_ops
;
3571 ice_set_ethtool_ops(netdev
);
3573 if (vsi
->type
!= ICE_VSI_PF
)
3576 netdev
->xdp_features
= NETDEV_XDP_ACT_BASIC
| NETDEV_XDP_ACT_REDIRECT
|
3577 NETDEV_XDP_ACT_XSK_ZEROCOPY
|
3578 NETDEV_XDP_ACT_RX_SG
;
3579 netdev
->xdp_zc_max_segs
= ICE_MAX_BUF_TXD
;
3583 * ice_set_netdev_features - set features for the given netdev
3584 * @netdev: netdev instance
3586 void ice_set_netdev_features(struct net_device
*netdev
)
3588 struct ice_pf
*pf
= ice_netdev_to_pf(netdev
);
3589 bool is_dvm_ena
= ice_is_dvm_ena(&pf
->hw
);
3590 netdev_features_t csumo_features
;
3591 netdev_features_t vlano_features
;
3592 netdev_features_t dflt_features
;
3593 netdev_features_t tso_features
;
3595 if (ice_is_safe_mode(pf
)) {
3597 netdev
->features
= NETIF_F_SG
| NETIF_F_HIGHDMA
;
3598 netdev
->hw_features
= netdev
->features
;
3602 dflt_features
= NETIF_F_SG
|
3607 csumo_features
= NETIF_F_RXCSUM
|
3612 vlano_features
= NETIF_F_HW_VLAN_CTAG_FILTER
|
3613 NETIF_F_HW_VLAN_CTAG_TX
|
3614 NETIF_F_HW_VLAN_CTAG_RX
;
3616 /* Enable CTAG/STAG filtering by default in Double VLAN Mode (DVM) */
3618 vlano_features
|= NETIF_F_HW_VLAN_STAG_FILTER
;
3620 tso_features
= NETIF_F_TSO
|
3624 NETIF_F_GSO_UDP_TUNNEL
|
3625 NETIF_F_GSO_GRE_CSUM
|
3626 NETIF_F_GSO_UDP_TUNNEL_CSUM
|
3627 NETIF_F_GSO_PARTIAL
|
3628 NETIF_F_GSO_IPXIP4
|
3629 NETIF_F_GSO_IPXIP6
|
3632 netdev
->gso_partial_features
|= NETIF_F_GSO_UDP_TUNNEL_CSUM
|
3633 NETIF_F_GSO_GRE_CSUM
;
3634 /* set features that user can change */
3635 netdev
->hw_features
= dflt_features
| csumo_features
|
3636 vlano_features
| tso_features
;
3638 /* add support for HW_CSUM on packets with MPLS header */
3639 netdev
->mpls_features
= NETIF_F_HW_CSUM
|
3643 /* enable features */
3644 netdev
->features
|= netdev
->hw_features
;
3646 netdev
->hw_features
|= NETIF_F_HW_TC
;
3647 netdev
->hw_features
|= NETIF_F_LOOPBACK
;
3649 /* encap and VLAN devices inherit default, csumo and tso features */
3650 netdev
->hw_enc_features
|= dflt_features
| csumo_features
|
3652 netdev
->vlan_features
|= dflt_features
| csumo_features
|
3655 /* advertise support but don't enable by default since only one type of
3656 * VLAN offload can be enabled at a time (i.e. CTAG or STAG). When one
3657 * type turns on the other has to be turned off. This is enforced by the
3658 * ice_fix_features() ndo callback.
3661 netdev
->hw_features
|= NETIF_F_HW_VLAN_STAG_RX
|
3662 NETIF_F_HW_VLAN_STAG_TX
;
3664 /* Leave CRC / FCS stripping enabled by default, but allow the value to
3665 * be changed at runtime
3667 netdev
->hw_features
|= NETIF_F_RXFCS
;
3669 netif_set_tso_max_size(netdev
, ICE_MAX_TSO_SIZE
);
3673 * ice_fill_rss_lut - Fill the RSS lookup table with default values
3674 * @lut: Lookup table
3675 * @rss_table_size: Lookup table size
3676 * @rss_size: Range of queue number for hashing
3678 void ice_fill_rss_lut(u8
*lut
, u16 rss_table_size
, u16 rss_size
)
3682 for (i
= 0; i
< rss_table_size
; i
++)
3683 lut
[i
] = i
% rss_size
;
3687 * ice_pf_vsi_setup - Set up a PF VSI
3688 * @pf: board private structure
3689 * @pi: pointer to the port_info instance
3691 * Returns pointer to the successfully allocated VSI software struct
3692 * on success, otherwise returns NULL on failure.
3694 static struct ice_vsi
*
3695 ice_pf_vsi_setup(struct ice_pf
*pf
, struct ice_port_info
*pi
)
3697 struct ice_vsi_cfg_params params
= {};
3699 params
.type
= ICE_VSI_PF
;
3700 params
.port_info
= pi
;
3701 params
.flags
= ICE_VSI_FLAG_INIT
;
3703 return ice_vsi_setup(pf
, ¶ms
);
3706 static struct ice_vsi
*
3707 ice_chnl_vsi_setup(struct ice_pf
*pf
, struct ice_port_info
*pi
,
3708 struct ice_channel
*ch
)
3710 struct ice_vsi_cfg_params params
= {};
3712 params
.type
= ICE_VSI_CHNL
;
3713 params
.port_info
= pi
;
3715 params
.flags
= ICE_VSI_FLAG_INIT
;
3717 return ice_vsi_setup(pf
, ¶ms
);
3721 * ice_ctrl_vsi_setup - Set up a control VSI
3722 * @pf: board private structure
3723 * @pi: pointer to the port_info instance
3725 * Returns pointer to the successfully allocated VSI software struct
3726 * on success, otherwise returns NULL on failure.
3728 static struct ice_vsi
*
3729 ice_ctrl_vsi_setup(struct ice_pf
*pf
, struct ice_port_info
*pi
)
3731 struct ice_vsi_cfg_params params
= {};
3733 params
.type
= ICE_VSI_CTRL
;
3734 params
.port_info
= pi
;
3735 params
.flags
= ICE_VSI_FLAG_INIT
;
3737 return ice_vsi_setup(pf
, ¶ms
);
3741 * ice_lb_vsi_setup - Set up a loopback VSI
3742 * @pf: board private structure
3743 * @pi: pointer to the port_info instance
3745 * Returns pointer to the successfully allocated VSI software struct
3746 * on success, otherwise returns NULL on failure.
3749 ice_lb_vsi_setup(struct ice_pf
*pf
, struct ice_port_info
*pi
)
3751 struct ice_vsi_cfg_params params
= {};
3753 params
.type
= ICE_VSI_LB
;
3754 params
.port_info
= pi
;
3755 params
.flags
= ICE_VSI_FLAG_INIT
;
3757 return ice_vsi_setup(pf
, ¶ms
);
3761 * ice_vlan_rx_add_vid - Add a VLAN ID filter to HW offload
3762 * @netdev: network interface to be adjusted
3764 * @vid: VLAN ID to be added
3766 * net_device_ops implementation for adding VLAN IDs
3768 int ice_vlan_rx_add_vid(struct net_device
*netdev
, __be16 proto
, u16 vid
)
3770 struct ice_netdev_priv
*np
= netdev_priv(netdev
);
3771 struct ice_vsi_vlan_ops
*vlan_ops
;
3772 struct ice_vsi
*vsi
= np
->vsi
;
3773 struct ice_vlan vlan
;
3776 /* VLAN 0 is added by default during load/reset */
3780 while (test_and_set_bit(ICE_CFG_BUSY
, vsi
->state
))
3781 usleep_range(1000, 2000);
3783 /* Add multicast promisc rule for the VLAN ID to be added if
3784 * all-multicast is currently enabled.
3786 if (vsi
->current_netdev_flags
& IFF_ALLMULTI
) {
3787 ret
= ice_fltr_set_vsi_promisc(&vsi
->back
->hw
, vsi
->idx
,
3788 ICE_MCAST_VLAN_PROMISC_BITS
,
3794 vlan_ops
= ice_get_compat_vsi_vlan_ops(vsi
);
3796 /* Add a switch rule for this VLAN ID so its corresponding VLAN tagged
3797 * packets aren't pruned by the device's internal switch on Rx
3799 vlan
= ICE_VLAN(be16_to_cpu(proto
), vid
, 0);
3800 ret
= vlan_ops
->add_vlan(vsi
, &vlan
);
3804 /* If all-multicast is currently enabled and this VLAN ID is only one
3805 * besides VLAN-0 we have to update look-up type of multicast promisc
3806 * rule for VLAN-0 from ICE_SW_LKUP_PROMISC to ICE_SW_LKUP_PROMISC_VLAN.
3808 if ((vsi
->current_netdev_flags
& IFF_ALLMULTI
) &&
3809 ice_vsi_num_non_zero_vlans(vsi
) == 1) {
3810 ice_fltr_clear_vsi_promisc(&vsi
->back
->hw
, vsi
->idx
,
3811 ICE_MCAST_PROMISC_BITS
, 0);
3812 ice_fltr_set_vsi_promisc(&vsi
->back
->hw
, vsi
->idx
,
3813 ICE_MCAST_VLAN_PROMISC_BITS
, 0);
3817 clear_bit(ICE_CFG_BUSY
, vsi
->state
);
3823 * ice_vlan_rx_kill_vid - Remove a VLAN ID filter from HW offload
3824 * @netdev: network interface to be adjusted
3826 * @vid: VLAN ID to be removed
3828 * net_device_ops implementation for removing VLAN IDs
3830 int ice_vlan_rx_kill_vid(struct net_device
*netdev
, __be16 proto
, u16 vid
)
3832 struct ice_netdev_priv
*np
= netdev_priv(netdev
);
3833 struct ice_vsi_vlan_ops
*vlan_ops
;
3834 struct ice_vsi
*vsi
= np
->vsi
;
3835 struct ice_vlan vlan
;
3838 /* don't allow removal of VLAN 0 */
3842 while (test_and_set_bit(ICE_CFG_BUSY
, vsi
->state
))
3843 usleep_range(1000, 2000);
3845 ret
= ice_clear_vsi_promisc(&vsi
->back
->hw
, vsi
->idx
,
3846 ICE_MCAST_VLAN_PROMISC_BITS
, vid
);
3848 netdev_err(netdev
, "Error clearing multicast promiscuous mode on VSI %i\n",
3850 vsi
->current_netdev_flags
|= IFF_ALLMULTI
;
3853 vlan_ops
= ice_get_compat_vsi_vlan_ops(vsi
);
3855 /* Make sure VLAN delete is successful before updating VLAN
3858 vlan
= ICE_VLAN(be16_to_cpu(proto
), vid
, 0);
3859 ret
= vlan_ops
->del_vlan(vsi
, &vlan
);
3863 /* Remove multicast promisc rule for the removed VLAN ID if
3864 * all-multicast is enabled.
3866 if (vsi
->current_netdev_flags
& IFF_ALLMULTI
)
3867 ice_fltr_clear_vsi_promisc(&vsi
->back
->hw
, vsi
->idx
,
3868 ICE_MCAST_VLAN_PROMISC_BITS
, vid
);
3870 if (!ice_vsi_has_non_zero_vlans(vsi
)) {
3871 /* Update look-up type of multicast promisc rule for VLAN 0
3872 * from ICE_SW_LKUP_PROMISC_VLAN to ICE_SW_LKUP_PROMISC when
3873 * all-multicast is enabled and VLAN 0 is the only VLAN rule.
3875 if (vsi
->current_netdev_flags
& IFF_ALLMULTI
) {
3876 ice_fltr_clear_vsi_promisc(&vsi
->back
->hw
, vsi
->idx
,
3877 ICE_MCAST_VLAN_PROMISC_BITS
,
3879 ice_fltr_set_vsi_promisc(&vsi
->back
->hw
, vsi
->idx
,
3880 ICE_MCAST_PROMISC_BITS
, 0);
3885 clear_bit(ICE_CFG_BUSY
, vsi
->state
);
3891 * ice_rep_indr_tc_block_unbind
3892 * @cb_priv: indirection block private data
3894 static void ice_rep_indr_tc_block_unbind(void *cb_priv
)
3896 struct ice_indr_block_priv
*indr_priv
= cb_priv
;
3898 list_del(&indr_priv
->list
);
3903 * ice_tc_indir_block_unregister - Unregister TC indirect block notifications
3904 * @vsi: VSI struct which has the netdev
3906 static void ice_tc_indir_block_unregister(struct ice_vsi
*vsi
)
3908 struct ice_netdev_priv
*np
= netdev_priv(vsi
->netdev
);
3910 flow_indr_dev_unregister(ice_indr_setup_tc_cb
, np
,
3911 ice_rep_indr_tc_block_unbind
);
3915 * ice_tc_indir_block_register - Register TC indirect block notifications
3916 * @vsi: VSI struct which has the netdev
3918 * Returns 0 on success, negative value on failure
3920 static int ice_tc_indir_block_register(struct ice_vsi
*vsi
)
3922 struct ice_netdev_priv
*np
;
3924 if (!vsi
|| !vsi
->netdev
)
3927 np
= netdev_priv(vsi
->netdev
);
3929 INIT_LIST_HEAD(&np
->tc_indr_block_priv_list
);
3930 return flow_indr_dev_register(ice_indr_setup_tc_cb
, np
);
3934 * ice_get_avail_q_count - Get count of queues in use
3935 * @pf_qmap: bitmap to get queue use count from
3936 * @lock: pointer to a mutex that protects access to pf_qmap
3937 * @size: size of the bitmap
3940 ice_get_avail_q_count(unsigned long *pf_qmap
, struct mutex
*lock
, u16 size
)
3946 for_each_clear_bit(bit
, pf_qmap
, size
)
3954 * ice_get_avail_txq_count - Get count of Tx queues in use
3955 * @pf: pointer to an ice_pf instance
3957 u16
ice_get_avail_txq_count(struct ice_pf
*pf
)
3959 return ice_get_avail_q_count(pf
->avail_txqs
, &pf
->avail_q_mutex
,
3964 * ice_get_avail_rxq_count - Get count of Rx queues in use
3965 * @pf: pointer to an ice_pf instance
3967 u16
ice_get_avail_rxq_count(struct ice_pf
*pf
)
3969 return ice_get_avail_q_count(pf
->avail_rxqs
, &pf
->avail_q_mutex
,
3974 * ice_deinit_pf - Unrolls initialziations done by ice_init_pf
3975 * @pf: board private structure to initialize
3977 static void ice_deinit_pf(struct ice_pf
*pf
)
3979 ice_service_task_stop(pf
);
3980 mutex_destroy(&pf
->lag_mutex
);
3981 mutex_destroy(&pf
->adev_mutex
);
3982 mutex_destroy(&pf
->sw_mutex
);
3983 mutex_destroy(&pf
->tc_mutex
);
3984 mutex_destroy(&pf
->avail_q_mutex
);
3985 mutex_destroy(&pf
->vfs
.table_lock
);
3987 if (pf
->avail_txqs
) {
3988 bitmap_free(pf
->avail_txqs
);
3989 pf
->avail_txqs
= NULL
;
3992 if (pf
->avail_rxqs
) {
3993 bitmap_free(pf
->avail_rxqs
);
3994 pf
->avail_rxqs
= NULL
;
3998 ptp_clock_unregister(pf
->ptp
.clock
);
4000 xa_destroy(&pf
->dyn_ports
);
4001 xa_destroy(&pf
->sf_nums
);
4005 * ice_set_pf_caps - set PFs capability flags
4006 * @pf: pointer to the PF instance
4008 static void ice_set_pf_caps(struct ice_pf
*pf
)
4010 struct ice_hw_func_caps
*func_caps
= &pf
->hw
.func_caps
;
4012 clear_bit(ICE_FLAG_RDMA_ENA
, pf
->flags
);
4013 if (func_caps
->common_cap
.rdma
)
4014 set_bit(ICE_FLAG_RDMA_ENA
, pf
->flags
);
4015 clear_bit(ICE_FLAG_DCB_CAPABLE
, pf
->flags
);
4016 if (func_caps
->common_cap
.dcb
)
4017 set_bit(ICE_FLAG_DCB_CAPABLE
, pf
->flags
);
4018 clear_bit(ICE_FLAG_SRIOV_CAPABLE
, pf
->flags
);
4019 if (func_caps
->common_cap
.sr_iov_1_1
) {
4020 set_bit(ICE_FLAG_SRIOV_CAPABLE
, pf
->flags
);
4021 pf
->vfs
.num_supported
= min_t(int, func_caps
->num_allocd_vfs
,
4024 clear_bit(ICE_FLAG_RSS_ENA
, pf
->flags
);
4025 if (func_caps
->common_cap
.rss_table_size
)
4026 set_bit(ICE_FLAG_RSS_ENA
, pf
->flags
);
4028 clear_bit(ICE_FLAG_FD_ENA
, pf
->flags
);
4029 if (func_caps
->fd_fltr_guar
> 0 || func_caps
->fd_fltr_best_effort
> 0) {
4032 /* ctrl_vsi_idx will be set to a valid value when flow director
4033 * is setup by ice_init_fdir
4035 pf
->ctrl_vsi_idx
= ICE_NO_VSI
;
4036 set_bit(ICE_FLAG_FD_ENA
, pf
->flags
);
4037 /* force guaranteed filter pool for PF */
4038 ice_alloc_fd_guar_item(&pf
->hw
, &unused
,
4039 func_caps
->fd_fltr_guar
);
4040 /* force shared filter pool for PF */
4041 ice_alloc_fd_shrd_item(&pf
->hw
, &unused
,
4042 func_caps
->fd_fltr_best_effort
);
4045 clear_bit(ICE_FLAG_PTP_SUPPORTED
, pf
->flags
);
4046 if (func_caps
->common_cap
.ieee_1588
&&
4047 !(pf
->hw
.mac_type
== ICE_MAC_E830
))
4048 set_bit(ICE_FLAG_PTP_SUPPORTED
, pf
->flags
);
4050 pf
->max_pf_txqs
= func_caps
->common_cap
.num_txq
;
4051 pf
->max_pf_rxqs
= func_caps
->common_cap
.num_rxq
;
4055 * ice_init_pf - Initialize general software structures (struct ice_pf)
4056 * @pf: board private structure to initialize
4058 static int ice_init_pf(struct ice_pf
*pf
)
4060 ice_set_pf_caps(pf
);
4062 mutex_init(&pf
->sw_mutex
);
4063 mutex_init(&pf
->tc_mutex
);
4064 mutex_init(&pf
->adev_mutex
);
4065 mutex_init(&pf
->lag_mutex
);
4067 INIT_HLIST_HEAD(&pf
->aq_wait_list
);
4068 spin_lock_init(&pf
->aq_wait_lock
);
4069 init_waitqueue_head(&pf
->aq_wait_queue
);
4071 init_waitqueue_head(&pf
->reset_wait_queue
);
4073 /* setup service timer and periodic service task */
4074 timer_setup(&pf
->serv_tmr
, ice_service_timer
, 0);
4075 pf
->serv_tmr_period
= HZ
;
4076 INIT_WORK(&pf
->serv_task
, ice_service_task
);
4077 clear_bit(ICE_SERVICE_SCHED
, pf
->state
);
4079 mutex_init(&pf
->avail_q_mutex
);
4080 pf
->avail_txqs
= bitmap_zalloc(pf
->max_pf_txqs
, GFP_KERNEL
);
4081 if (!pf
->avail_txqs
)
4084 pf
->avail_rxqs
= bitmap_zalloc(pf
->max_pf_rxqs
, GFP_KERNEL
);
4085 if (!pf
->avail_rxqs
) {
4086 bitmap_free(pf
->avail_txqs
);
4087 pf
->avail_txqs
= NULL
;
4091 mutex_init(&pf
->vfs
.table_lock
);
4092 hash_init(pf
->vfs
.table
);
4093 if (ice_is_feature_supported(pf
, ICE_F_MBX_LIMIT
))
4094 wr32(&pf
->hw
, E830_MBX_PF_IN_FLIGHT_VF_MSGS_THRESH
,
4095 ICE_MBX_OVERFLOW_WATERMARK
);
4097 ice_mbx_init_snapshot(&pf
->hw
);
4099 xa_init(&pf
->dyn_ports
);
4100 xa_init(&pf
->sf_nums
);
4106 * ice_is_wol_supported - check if WoL is supported
4107 * @hw: pointer to hardware info
4109 * Check if WoL is supported based on the HW configuration.
4110 * Returns true if NVM supports and enables WoL for this port, false otherwise
4112 bool ice_is_wol_supported(struct ice_hw
*hw
)
4116 /* A bit set to 1 in the NVM Software Reserved Word 2 (WoL control
4117 * word) indicates WoL is not supported on the corresponding PF ID.
4119 if (ice_read_sr_word(hw
, ICE_SR_NVM_WOL_CFG
, &wol_ctrl
))
4122 return !(BIT(hw
->port_info
->lport
) & wol_ctrl
);
4126 * ice_vsi_recfg_qs - Change the number of queues on a VSI
4127 * @vsi: VSI being changed
4128 * @new_rx: new number of Rx queues
4129 * @new_tx: new number of Tx queues
4130 * @locked: is adev device_lock held
4132 * Only change the number of queues if new_tx, or new_rx is non-0.
4134 * Returns 0 on success.
4136 int ice_vsi_recfg_qs(struct ice_vsi
*vsi
, int new_rx
, int new_tx
, bool locked
)
4138 struct ice_pf
*pf
= vsi
->back
;
4139 int i
, err
= 0, timeout
= 50;
4141 if (!new_rx
&& !new_tx
)
4144 while (test_and_set_bit(ICE_CFG_BUSY
, pf
->state
)) {
4148 usleep_range(1000, 2000);
4152 vsi
->req_txq
= (u16
)new_tx
;
4154 vsi
->req_rxq
= (u16
)new_rx
;
4156 /* set for the next time the netdev is started */
4157 if (!netif_running(vsi
->netdev
)) {
4158 err
= ice_vsi_rebuild(vsi
, ICE_VSI_FLAG_NO_INIT
);
4161 dev_dbg(ice_pf_to_dev(pf
), "Link is down, queue count change happens when link is brought up\n");
4166 err
= ice_vsi_rebuild(vsi
, ICE_VSI_FLAG_NO_INIT
);
4170 ice_for_each_traffic_class(i
) {
4171 if (vsi
->tc_cfg
.ena_tc
& BIT(i
))
4172 netdev_set_tc_queue(vsi
->netdev
,
4173 vsi
->tc_cfg
.tc_info
[i
].netdev_tc
,
4174 vsi
->tc_cfg
.tc_info
[i
].qcount_tx
,
4175 vsi
->tc_cfg
.tc_info
[i
].qoffset
);
4177 ice_pf_dcb_recfg(pf
, locked
);
4182 dev_err(ice_pf_to_dev(pf
), "Error during VSI rebuild: %d. Unload and reload the driver.\n",
4185 clear_bit(ICE_CFG_BUSY
, pf
->state
);
4190 * ice_set_safe_mode_vlan_cfg - configure PF VSI to allow all VLANs in safe mode
4191 * @pf: PF to configure
4193 * No VLAN offloads/filtering are advertised in safe mode so make sure the PF
4194 * VSI can still Tx/Rx VLAN tagged packets.
4196 static void ice_set_safe_mode_vlan_cfg(struct ice_pf
*pf
)
4198 struct ice_vsi
*vsi
= ice_get_main_vsi(pf
);
4199 struct ice_vsi_ctx
*ctxt
;
4206 ctxt
= kzalloc(sizeof(*ctxt
), GFP_KERNEL
);
4211 ctxt
->info
= vsi
->info
;
4213 ctxt
->info
.valid_sections
=
4214 cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID
|
4215 ICE_AQ_VSI_PROP_SECURITY_VALID
|
4216 ICE_AQ_VSI_PROP_SW_VALID
);
4218 /* disable VLAN anti-spoof */
4219 ctxt
->info
.sec_flags
&= ~(ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA
<<
4220 ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S
);
4222 /* disable VLAN pruning and keep all other settings */
4223 ctxt
->info
.sw_flags2
&= ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA
;
4225 /* allow all VLANs on Tx and don't strip on Rx */
4226 ctxt
->info
.inner_vlan_flags
= ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL
|
4227 ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING
;
4229 status
= ice_update_vsi(hw
, vsi
->idx
, ctxt
, NULL
);
4231 dev_err(ice_pf_to_dev(vsi
->back
), "Failed to update VSI for safe mode VLANs, err %d aq_err %s\n",
4232 status
, ice_aq_str(hw
->adminq
.sq_last_status
));
4234 vsi
->info
.sec_flags
= ctxt
->info
.sec_flags
;
4235 vsi
->info
.sw_flags2
= ctxt
->info
.sw_flags2
;
4236 vsi
->info
.inner_vlan_flags
= ctxt
->info
.inner_vlan_flags
;
4243 * ice_log_pkg_init - log result of DDP package load
4244 * @hw: pointer to hardware info
4245 * @state: state of package load
4247 static void ice_log_pkg_init(struct ice_hw
*hw
, enum ice_ddp_state state
)
4249 struct ice_pf
*pf
= hw
->back
;
4252 dev
= ice_pf_to_dev(pf
);
4255 case ICE_DDP_PKG_SUCCESS
:
4256 dev_info(dev
, "The DDP package was successfully loaded: %s version %d.%d.%d.%d\n",
4257 hw
->active_pkg_name
,
4258 hw
->active_pkg_ver
.major
,
4259 hw
->active_pkg_ver
.minor
,
4260 hw
->active_pkg_ver
.update
,
4261 hw
->active_pkg_ver
.draft
);
4263 case ICE_DDP_PKG_SAME_VERSION_ALREADY_LOADED
:
4264 dev_info(dev
, "DDP package already present on device: %s version %d.%d.%d.%d\n",
4265 hw
->active_pkg_name
,
4266 hw
->active_pkg_ver
.major
,
4267 hw
->active_pkg_ver
.minor
,
4268 hw
->active_pkg_ver
.update
,
4269 hw
->active_pkg_ver
.draft
);
4271 case ICE_DDP_PKG_ALREADY_LOADED_NOT_SUPPORTED
:
4272 dev_err(dev
, "The device has a DDP package that is not supported by the driver. The device has package '%s' version %d.%d.x.x. The driver requires version %d.%d.x.x. Entering Safe Mode.\n",
4273 hw
->active_pkg_name
,
4274 hw
->active_pkg_ver
.major
,
4275 hw
->active_pkg_ver
.minor
,
4276 ICE_PKG_SUPP_VER_MAJ
, ICE_PKG_SUPP_VER_MNR
);
4278 case ICE_DDP_PKG_COMPATIBLE_ALREADY_LOADED
:
4279 dev_info(dev
, "The driver could not load the DDP package file because a compatible DDP package is already present on the device. The device has package '%s' version %d.%d.%d.%d. The package file found by the driver: '%s' version %d.%d.%d.%d.\n",
4280 hw
->active_pkg_name
,
4281 hw
->active_pkg_ver
.major
,
4282 hw
->active_pkg_ver
.minor
,
4283 hw
->active_pkg_ver
.update
,
4284 hw
->active_pkg_ver
.draft
,
4291 case ICE_DDP_PKG_FW_MISMATCH
:
4292 dev_err(dev
, "The firmware loaded on the device is not compatible with the DDP package. Please update the device's NVM. Entering safe mode.\n");
4294 case ICE_DDP_PKG_INVALID_FILE
:
4295 dev_err(dev
, "The DDP package file is invalid. Entering Safe Mode.\n");
4297 case ICE_DDP_PKG_FILE_VERSION_TOO_HIGH
:
4298 dev_err(dev
, "The DDP package file version is higher than the driver supports. Please use an updated driver. Entering Safe Mode.\n");
4300 case ICE_DDP_PKG_FILE_VERSION_TOO_LOW
:
4301 dev_err(dev
, "The DDP package file version is lower than the driver supports. The driver requires version %d.%d.x.x. Please use an updated DDP Package file. Entering Safe Mode.\n",
4302 ICE_PKG_SUPP_VER_MAJ
, ICE_PKG_SUPP_VER_MNR
);
4304 case ICE_DDP_PKG_FILE_SIGNATURE_INVALID
:
4305 dev_err(dev
, "The DDP package could not be loaded because its signature is not valid. Please use a valid DDP Package. Entering Safe Mode.\n");
4307 case ICE_DDP_PKG_FILE_REVISION_TOO_LOW
:
4308 dev_err(dev
, "The DDP Package could not be loaded because its security revision is too low. Please use an updated DDP Package. Entering Safe Mode.\n");
4310 case ICE_DDP_PKG_LOAD_ERROR
:
4311 dev_err(dev
, "An error occurred on the device while loading the DDP package. The device will be reset.\n");
4312 /* poll for reset to complete */
4313 if (ice_check_reset(hw
))
4314 dev_err(dev
, "Error resetting device. Please reload the driver\n");
4316 case ICE_DDP_PKG_ERR
:
4318 dev_err(dev
, "An unknown error occurred when loading the DDP package. Entering Safe Mode.\n");
4324 * ice_load_pkg - load/reload the DDP Package file
4325 * @firmware: firmware structure when firmware requested or NULL for reload
4326 * @pf: pointer to the PF instance
4328 * Called on probe and post CORER/GLOBR rebuild to load DDP Package and
4329 * initialize HW tables.
4332 ice_load_pkg(const struct firmware
*firmware
, struct ice_pf
*pf
)
4334 enum ice_ddp_state state
= ICE_DDP_PKG_ERR
;
4335 struct device
*dev
= ice_pf_to_dev(pf
);
4336 struct ice_hw
*hw
= &pf
->hw
;
4338 /* Load DDP Package */
4339 if (firmware
&& !hw
->pkg_copy
) {
4340 state
= ice_copy_and_init_pkg(hw
, firmware
->data
,
4342 ice_log_pkg_init(hw
, state
);
4343 } else if (!firmware
&& hw
->pkg_copy
) {
4344 /* Reload package during rebuild after CORER/GLOBR reset */
4345 state
= ice_init_pkg(hw
, hw
->pkg_copy
, hw
->pkg_size
);
4346 ice_log_pkg_init(hw
, state
);
4348 dev_err(dev
, "The DDP package file failed to load. Entering Safe Mode.\n");
4351 if (!ice_is_init_pkg_successful(state
)) {
4353 clear_bit(ICE_FLAG_ADV_FEATURES
, pf
->flags
);
4357 /* Successful download package is the precondition for advanced
4358 * features, hence setting the ICE_FLAG_ADV_FEATURES flag
4360 set_bit(ICE_FLAG_ADV_FEATURES
, pf
->flags
);
4364 * ice_verify_cacheline_size - verify driver's assumption of 64 Byte cache lines
4365 * @pf: pointer to the PF structure
4367 * There is no error returned here because the driver should be able to handle
4368 * 128 Byte cache lines, so we only print a warning in case issues are seen,
4369 * specifically with Tx.
4371 static void ice_verify_cacheline_size(struct ice_pf
*pf
)
4373 if (rd32(&pf
->hw
, GLPCI_CNF2
) & GLPCI_CNF2_CACHELINE_SIZE_M
)
4374 dev_warn(ice_pf_to_dev(pf
), "%d Byte cache line assumption is invalid, driver may have Tx timeouts!\n",
4375 ICE_CACHE_LINE_BYTES
);
4379 * ice_send_version - update firmware with driver version
4382 * Returns 0 on success, else error code
4384 static int ice_send_version(struct ice_pf
*pf
)
4386 struct ice_driver_ver dv
;
4388 dv
.major_ver
= 0xff;
4389 dv
.minor_ver
= 0xff;
4390 dv
.build_ver
= 0xff;
4391 dv
.subbuild_ver
= 0;
4392 strscpy((char *)dv
.driver_string
, UTS_RELEASE
,
4393 sizeof(dv
.driver_string
));
4394 return ice_aq_send_driver_ver(&pf
->hw
, &dv
, NULL
);
4398 * ice_init_fdir - Initialize flow director VSI and configuration
4399 * @pf: pointer to the PF instance
4401 * returns 0 on success, negative on error
4403 static int ice_init_fdir(struct ice_pf
*pf
)
4405 struct device
*dev
= ice_pf_to_dev(pf
);
4406 struct ice_vsi
*ctrl_vsi
;
4409 /* Side Band Flow Director needs to have a control VSI.
4410 * Allocate it and store it in the PF.
4412 ctrl_vsi
= ice_ctrl_vsi_setup(pf
, pf
->hw
.port_info
);
4414 dev_dbg(dev
, "could not create control VSI\n");
4418 err
= ice_vsi_open_ctrl(ctrl_vsi
);
4420 dev_dbg(dev
, "could not open control VSI\n");
4424 mutex_init(&pf
->hw
.fdir_fltr_lock
);
4426 err
= ice_fdir_create_dflt_rules(pf
);
4433 ice_fdir_release_flows(&pf
->hw
);
4434 ice_vsi_close(ctrl_vsi
);
4436 ice_vsi_release(ctrl_vsi
);
4437 if (pf
->ctrl_vsi_idx
!= ICE_NO_VSI
) {
4438 pf
->vsi
[pf
->ctrl_vsi_idx
] = NULL
;
4439 pf
->ctrl_vsi_idx
= ICE_NO_VSI
;
4444 static void ice_deinit_fdir(struct ice_pf
*pf
)
4446 struct ice_vsi
*vsi
= ice_get_ctrl_vsi(pf
);
4451 ice_vsi_manage_fdir(vsi
, false);
4452 ice_vsi_release(vsi
);
4453 if (pf
->ctrl_vsi_idx
!= ICE_NO_VSI
) {
4454 pf
->vsi
[pf
->ctrl_vsi_idx
] = NULL
;
4455 pf
->ctrl_vsi_idx
= ICE_NO_VSI
;
4458 mutex_destroy(&(&pf
->hw
)->fdir_fltr_lock
);
4462 * ice_get_opt_fw_name - return optional firmware file name or NULL
4463 * @pf: pointer to the PF instance
4465 static char *ice_get_opt_fw_name(struct ice_pf
*pf
)
4467 /* Optional firmware name same as default with additional dash
4468 * followed by a EUI-64 identifier (PCIe Device Serial Number)
4470 struct pci_dev
*pdev
= pf
->pdev
;
4471 char *opt_fw_filename
;
4474 /* Determine the name of the optional file using the DSN (two
4475 * dwords following the start of the DSN Capability).
4477 dsn
= pci_get_dsn(pdev
);
4481 opt_fw_filename
= kzalloc(NAME_MAX
, GFP_KERNEL
);
4482 if (!opt_fw_filename
)
4485 snprintf(opt_fw_filename
, NAME_MAX
, "%sice-%016llx.pkg",
4486 ICE_DDP_PKG_PATH
, dsn
);
4488 return opt_fw_filename
;
4492 * ice_request_fw - Device initialization routine
4493 * @pf: pointer to the PF instance
4494 * @firmware: double pointer to firmware struct
4496 * Return: zero when successful, negative values otherwise.
4498 static int ice_request_fw(struct ice_pf
*pf
, const struct firmware
**firmware
)
4500 char *opt_fw_filename
= ice_get_opt_fw_name(pf
);
4501 struct device
*dev
= ice_pf_to_dev(pf
);
4504 /* optional device-specific DDP (if present) overrides the default DDP
4505 * package file. kernel logs a debug message if the file doesn't exist,
4506 * and warning messages for other errors.
4508 if (opt_fw_filename
) {
4509 err
= firmware_request_nowarn(firmware
, opt_fw_filename
, dev
);
4510 kfree(opt_fw_filename
);
4514 err
= request_firmware(firmware
, ICE_DDP_PKG_FILE
, dev
);
4516 dev_err(dev
, "The DDP package file was not found or could not be read. Entering Safe Mode\n");
4522 * ice_init_tx_topology - performs Tx topology initialization
4523 * @hw: pointer to the hardware structure
4524 * @firmware: pointer to firmware structure
4526 * Return: zero when init was successful, negative values otherwise.
4529 ice_init_tx_topology(struct ice_hw
*hw
, const struct firmware
*firmware
)
4531 u8 num_tx_sched_layers
= hw
->num_tx_sched_layers
;
4532 struct ice_pf
*pf
= hw
->back
;
4536 dev
= ice_pf_to_dev(pf
);
4537 err
= ice_cfg_tx_topo(hw
, firmware
->data
, firmware
->size
);
4539 if (hw
->num_tx_sched_layers
> num_tx_sched_layers
)
4540 dev_info(dev
, "Tx scheduling layers switching feature disabled\n");
4542 dev_info(dev
, "Tx scheduling layers switching feature enabled\n");
4543 /* if there was a change in topology ice_cfg_tx_topo triggered
4544 * a CORER and we need to re-init hw
4547 err
= ice_init_hw(hw
);
4550 } else if (err
== -EIO
) {
4551 dev_info(dev
, "DDP package does not support Tx scheduling layers switching feature - please update to the latest DDP package and try again\n");
4558 * ice_init_supported_rxdids - Initialize supported Rx descriptor IDs
4559 * @hw: pointer to the hardware structure
4560 * @pf: pointer to pf structure
4562 * The pf->supported_rxdids bitmap is used to indicate to VFs which descriptor
4563 * formats the PF hardware supports. The exact list of supported RXDIDs
4564 * depends on the loaded DDP package. The IDs can be determined by reading the
4565 * GLFLXP_RXDID_FLAGS register after the DDP package is loaded.
4567 * Note that the legacy 32-byte RXDID 0 is always supported but is not listed
4568 * in the DDP package. The 16-byte legacy descriptor is never supported by
4571 static void ice_init_supported_rxdids(struct ice_hw
*hw
, struct ice_pf
*pf
)
4573 pf
->supported_rxdids
= BIT(ICE_RXDID_LEGACY_1
);
4575 for (int i
= ICE_RXDID_FLEX_NIC
; i
< ICE_FLEX_DESC_RXDID_MAX_NUM
; i
++) {
4578 regval
= rd32(hw
, GLFLXP_RXDID_FLAGS(i
, 0));
4579 if ((regval
>> GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S
)
4580 & GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_M
)
4581 pf
->supported_rxdids
|= BIT(i
);
4586 * ice_init_ddp_config - DDP related configuration
4587 * @hw: pointer to the hardware structure
4588 * @pf: pointer to pf structure
4590 * This function loads DDP file from the disk, then initializes Tx
4591 * topology. At the end DDP package is loaded on the card.
4593 * Return: zero when init was successful, negative values otherwise.
4595 static int ice_init_ddp_config(struct ice_hw
*hw
, struct ice_pf
*pf
)
4597 struct device
*dev
= ice_pf_to_dev(pf
);
4598 const struct firmware
*firmware
= NULL
;
4601 err
= ice_request_fw(pf
, &firmware
);
4603 dev_err(dev
, "Fail during requesting FW: %d\n", err
);
4607 err
= ice_init_tx_topology(hw
, firmware
);
4609 dev_err(dev
, "Fail during initialization of Tx topology: %d\n",
4611 release_firmware(firmware
);
4615 /* Download firmware to device */
4616 ice_load_pkg(firmware
, pf
);
4617 release_firmware(firmware
);
4619 /* Initialize the supported Rx descriptor IDs after loading DDP */
4620 ice_init_supported_rxdids(hw
, pf
);
4626 * ice_print_wake_reason - show the wake up cause in the log
4627 * @pf: pointer to the PF struct
4629 static void ice_print_wake_reason(struct ice_pf
*pf
)
4631 u32 wus
= pf
->wakeup_reason
;
4632 const char *wake_str
;
4634 /* if no wake event, nothing to print */
4638 if (wus
& PFPM_WUS_LNKC_M
)
4639 wake_str
= "Link\n";
4640 else if (wus
& PFPM_WUS_MAG_M
)
4641 wake_str
= "Magic Packet\n";
4642 else if (wus
& PFPM_WUS_MNG_M
)
4643 wake_str
= "Management\n";
4644 else if (wus
& PFPM_WUS_FW_RST_WK_M
)
4645 wake_str
= "Firmware Reset\n";
4647 wake_str
= "Unknown\n";
4649 dev_info(ice_pf_to_dev(pf
), "Wake reason: %s", wake_str
);
4653 * ice_pf_fwlog_update_module - update 1 module
4654 * @pf: pointer to the PF struct
4655 * @log_level: log_level to use for the @module
4656 * @module: module to update
4658 void ice_pf_fwlog_update_module(struct ice_pf
*pf
, int log_level
, int module
)
4660 struct ice_hw
*hw
= &pf
->hw
;
4662 hw
->fwlog_cfg
.module_entries
[module
].log_level
= log_level
;
4666 * ice_register_netdev - register netdev
4667 * @vsi: pointer to the VSI struct
4669 static int ice_register_netdev(struct ice_vsi
*vsi
)
4673 if (!vsi
|| !vsi
->netdev
)
4676 err
= register_netdev(vsi
->netdev
);
4680 set_bit(ICE_VSI_NETDEV_REGISTERED
, vsi
->state
);
4681 netif_carrier_off(vsi
->netdev
);
4682 netif_tx_stop_all_queues(vsi
->netdev
);
4687 static void ice_unregister_netdev(struct ice_vsi
*vsi
)
4689 if (!vsi
|| !vsi
->netdev
)
4692 unregister_netdev(vsi
->netdev
);
4693 clear_bit(ICE_VSI_NETDEV_REGISTERED
, vsi
->state
);
4697 * ice_cfg_netdev - Allocate, configure and register a netdev
4698 * @vsi: the VSI associated with the new netdev
4700 * Returns 0 on success, negative value on failure
4702 static int ice_cfg_netdev(struct ice_vsi
*vsi
)
4704 struct ice_netdev_priv
*np
;
4705 struct net_device
*netdev
;
4706 u8 mac_addr
[ETH_ALEN
];
4708 netdev
= alloc_etherdev_mqs(sizeof(*np
), vsi
->alloc_txq
,
4713 set_bit(ICE_VSI_NETDEV_ALLOCD
, vsi
->state
);
4714 vsi
->netdev
= netdev
;
4715 np
= netdev_priv(netdev
);
4718 ice_set_netdev_features(netdev
);
4721 if (vsi
->type
== ICE_VSI_PF
) {
4722 SET_NETDEV_DEV(netdev
, ice_pf_to_dev(vsi
->back
));
4723 ether_addr_copy(mac_addr
, vsi
->port_info
->mac
.perm_addr
);
4724 eth_hw_addr_set(netdev
, mac_addr
);
4727 netdev
->priv_flags
|= IFF_UNICAST_FLT
;
4729 /* Setup netdev TC information */
4730 ice_vsi_cfg_netdev_tc(vsi
, vsi
->tc_cfg
.ena_tc
);
4732 netdev
->max_mtu
= ICE_MAX_MTU
;
4737 static void ice_decfg_netdev(struct ice_vsi
*vsi
)
4739 clear_bit(ICE_VSI_NETDEV_ALLOCD
, vsi
->state
);
4740 free_netdev(vsi
->netdev
);
4745 * ice_wait_for_fw - wait for full FW readiness
4746 * @hw: pointer to the hardware structure
4747 * @timeout: milliseconds that can elapse before timing out
4749 static int ice_wait_for_fw(struct ice_hw
*hw
, u32 timeout
)
4754 while (elapsed
<= timeout
) {
4755 fw_loading
= rd32(hw
, GL_MNG_FWSM
) & GL_MNG_FWSM_FW_LOADING_M
;
4757 /* firmware was not yet loaded, we have to wait more */
4769 int ice_init_dev(struct ice_pf
*pf
)
4771 struct device
*dev
= ice_pf_to_dev(pf
);
4772 struct ice_hw
*hw
= &pf
->hw
;
4775 err
= ice_init_hw(hw
);
4777 dev_err(dev
, "ice_init_hw failed: %d\n", err
);
4781 /* Some cards require longer initialization times
4782 * due to necessity of loading FW from an external source.
4783 * This can take even half a minute.
4785 if (ice_is_pf_c827(hw
)) {
4786 err
= ice_wait_for_fw(hw
, 30000);
4788 dev_err(dev
, "ice_wait_for_fw timed out");
4793 ice_init_feature_support(pf
);
4795 err
= ice_init_ddp_config(hw
, pf
);
4797 /* if ice_init_ddp_config fails, ICE_FLAG_ADV_FEATURES bit won't be
4798 * set in pf->state, which will cause ice_is_safe_mode to return
4801 if (err
|| ice_is_safe_mode(pf
)) {
4802 /* we already got function/device capabilities but these don't
4803 * reflect what the driver needs to do in safe mode. Instead of
4804 * adding conditional logic everywhere to ignore these
4805 * device/function capabilities, override them.
4807 ice_set_safe_mode_caps(hw
);
4810 err
= ice_init_pf(pf
);
4812 dev_err(dev
, "ice_init_pf failed: %d\n", err
);
4816 pf
->hw
.udp_tunnel_nic
.set_port
= ice_udp_tunnel_set_port
;
4817 pf
->hw
.udp_tunnel_nic
.unset_port
= ice_udp_tunnel_unset_port
;
4818 pf
->hw
.udp_tunnel_nic
.flags
= UDP_TUNNEL_NIC_INFO_MAY_SLEEP
;
4819 pf
->hw
.udp_tunnel_nic
.shared
= &pf
->hw
.udp_tunnel_shared
;
4820 if (pf
->hw
.tnl
.valid_count
[TNL_VXLAN
]) {
4821 pf
->hw
.udp_tunnel_nic
.tables
[0].n_entries
=
4822 pf
->hw
.tnl
.valid_count
[TNL_VXLAN
];
4823 pf
->hw
.udp_tunnel_nic
.tables
[0].tunnel_types
=
4824 UDP_TUNNEL_TYPE_VXLAN
;
4826 if (pf
->hw
.tnl
.valid_count
[TNL_GENEVE
]) {
4827 pf
->hw
.udp_tunnel_nic
.tables
[1].n_entries
=
4828 pf
->hw
.tnl
.valid_count
[TNL_GENEVE
];
4829 pf
->hw
.udp_tunnel_nic
.tables
[1].tunnel_types
=
4830 UDP_TUNNEL_TYPE_GENEVE
;
4833 err
= ice_init_interrupt_scheme(pf
);
4835 dev_err(dev
, "ice_init_interrupt_scheme failed: %d\n", err
);
4837 goto err_init_interrupt_scheme
;
4840 /* In case of MSIX we are going to setup the misc vector right here
4841 * to handle admin queue events etc. In case of legacy and MSI
4842 * the misc functionality and queue processing is combined in
4843 * the same vector and that gets setup at open.
4845 err
= ice_req_irq_msix_misc(pf
);
4847 dev_err(dev
, "setup of misc vector failed: %d\n", err
);
4848 goto err_req_irq_msix_misc
;
4853 err_req_irq_msix_misc
:
4854 ice_clear_interrupt_scheme(pf
);
4855 err_init_interrupt_scheme
:
4862 void ice_deinit_dev(struct ice_pf
*pf
)
4864 ice_free_irq_msix_misc(pf
);
4866 ice_deinit_hw(&pf
->hw
);
4868 /* Service task is already stopped, so call reset directly. */
4869 ice_reset(&pf
->hw
, ICE_RESET_PFR
);
4870 pci_wait_for_pending_transaction(pf
->pdev
);
4871 ice_clear_interrupt_scheme(pf
);
4874 static void ice_init_features(struct ice_pf
*pf
)
4876 struct device
*dev
= ice_pf_to_dev(pf
);
4878 if (ice_is_safe_mode(pf
))
4881 /* initialize DDP driven features */
4882 if (test_bit(ICE_FLAG_PTP_SUPPORTED
, pf
->flags
))
4885 if (ice_is_feature_supported(pf
, ICE_F_GNSS
))
4888 if (ice_is_feature_supported(pf
, ICE_F_CGU
) ||
4889 ice_is_feature_supported(pf
, ICE_F_PHY_RCLK
))
4892 /* Note: Flow director init failure is non-fatal to load */
4893 if (ice_init_fdir(pf
))
4894 dev_err(dev
, "could not initialize flow director\n");
4896 /* Note: DCB init failure is non-fatal to load */
4897 if (ice_init_pf_dcb(pf
, false)) {
4898 clear_bit(ICE_FLAG_DCB_CAPABLE
, pf
->flags
);
4899 clear_bit(ICE_FLAG_DCB_ENA
, pf
->flags
);
4901 ice_cfg_lldp_mib_change(&pf
->hw
, true);
4904 if (ice_init_lag(pf
))
4905 dev_warn(dev
, "Failed to init link aggregation support\n");
4910 static void ice_deinit_features(struct ice_pf
*pf
)
4912 if (ice_is_safe_mode(pf
))
4916 if (test_bit(ICE_FLAG_DCB_CAPABLE
, pf
->flags
))
4917 ice_cfg_lldp_mib_change(&pf
->hw
, false);
4918 ice_deinit_fdir(pf
);
4919 if (ice_is_feature_supported(pf
, ICE_F_GNSS
))
4921 if (test_bit(ICE_FLAG_PTP_SUPPORTED
, pf
->flags
))
4922 ice_ptp_release(pf
);
4923 if (test_bit(ICE_FLAG_DPLL
, pf
->flags
))
4924 ice_dpll_deinit(pf
);
4925 if (pf
->eswitch_mode
== DEVLINK_ESWITCH_MODE_SWITCHDEV
)
4926 xa_destroy(&pf
->eswitch
.reprs
);
4929 static void ice_init_wakeup(struct ice_pf
*pf
)
4931 /* Save wakeup reason register for later use */
4932 pf
->wakeup_reason
= rd32(&pf
->hw
, PFPM_WUS
);
4934 /* check for a power management event */
4935 ice_print_wake_reason(pf
);
4937 /* clear wake status, all bits */
4938 wr32(&pf
->hw
, PFPM_WUS
, U32_MAX
);
4940 /* Disable WoL at init, wait for user to enable */
4941 device_set_wakeup_enable(ice_pf_to_dev(pf
), false);
4944 static int ice_init_link(struct ice_pf
*pf
)
4946 struct device
*dev
= ice_pf_to_dev(pf
);
4949 err
= ice_init_link_events(pf
->hw
.port_info
);
4951 dev_err(dev
, "ice_init_link_events failed: %d\n", err
);
4955 /* not a fatal error if this fails */
4956 err
= ice_init_nvm_phy_type(pf
->hw
.port_info
);
4958 dev_err(dev
, "ice_init_nvm_phy_type failed: %d\n", err
);
4960 /* not a fatal error if this fails */
4961 err
= ice_update_link_info(pf
->hw
.port_info
);
4963 dev_err(dev
, "ice_update_link_info failed: %d\n", err
);
4965 ice_init_link_dflt_override(pf
->hw
.port_info
);
4967 ice_check_link_cfg_err(pf
,
4968 pf
->hw
.port_info
->phy
.link_info
.link_cfg_err
);
4970 /* if media available, initialize PHY settings */
4971 if (pf
->hw
.port_info
->phy
.link_info
.link_info
&
4972 ICE_AQ_MEDIA_AVAILABLE
) {
4973 /* not a fatal error if this fails */
4974 err
= ice_init_phy_user_cfg(pf
->hw
.port_info
);
4976 dev_err(dev
, "ice_init_phy_user_cfg failed: %d\n", err
);
4978 if (!test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA
, pf
->flags
)) {
4979 struct ice_vsi
*vsi
= ice_get_main_vsi(pf
);
4982 ice_configure_phy(vsi
);
4985 set_bit(ICE_FLAG_NO_MEDIA
, pf
->flags
);
4991 static int ice_init_pf_sw(struct ice_pf
*pf
)
4993 bool dvm
= ice_is_dvm_ena(&pf
->hw
);
4994 struct ice_vsi
*vsi
;
4997 /* create switch struct for the switch element created by FW on boot */
4998 pf
->first_sw
= kzalloc(sizeof(*pf
->first_sw
), GFP_KERNEL
);
5003 pf
->first_sw
->bridge_mode
= BRIDGE_MODE_VEB
;
5005 pf
->first_sw
->bridge_mode
= BRIDGE_MODE_VEPA
;
5007 pf
->first_sw
->pf
= pf
;
5009 /* record the sw_id available for later use */
5010 pf
->first_sw
->sw_id
= pf
->hw
.port_info
->sw_id
;
5012 err
= ice_aq_set_port_params(pf
->hw
.port_info
, dvm
, NULL
);
5014 goto err_aq_set_port_params
;
5016 vsi
= ice_pf_vsi_setup(pf
, pf
->hw
.port_info
);
5019 goto err_pf_vsi_setup
;
5025 err_aq_set_port_params
:
5026 kfree(pf
->first_sw
);
5030 static void ice_deinit_pf_sw(struct ice_pf
*pf
)
5032 struct ice_vsi
*vsi
= ice_get_main_vsi(pf
);
5037 ice_vsi_release(vsi
);
5038 kfree(pf
->first_sw
);
5041 static int ice_alloc_vsis(struct ice_pf
*pf
)
5043 struct device
*dev
= ice_pf_to_dev(pf
);
5045 pf
->num_alloc_vsi
= pf
->hw
.func_caps
.guar_num_vsi
;
5046 if (!pf
->num_alloc_vsi
)
5049 if (pf
->num_alloc_vsi
> UDP_TUNNEL_NIC_MAX_SHARING_DEVICES
) {
5051 "limiting the VSI count due to UDP tunnel limitation %d > %d\n",
5052 pf
->num_alloc_vsi
, UDP_TUNNEL_NIC_MAX_SHARING_DEVICES
);
5053 pf
->num_alloc_vsi
= UDP_TUNNEL_NIC_MAX_SHARING_DEVICES
;
5056 pf
->vsi
= devm_kcalloc(dev
, pf
->num_alloc_vsi
, sizeof(*pf
->vsi
),
5061 pf
->vsi_stats
= devm_kcalloc(dev
, pf
->num_alloc_vsi
,
5062 sizeof(*pf
->vsi_stats
), GFP_KERNEL
);
5063 if (!pf
->vsi_stats
) {
5064 devm_kfree(dev
, pf
->vsi
);
5071 static void ice_dealloc_vsis(struct ice_pf
*pf
)
5073 devm_kfree(ice_pf_to_dev(pf
), pf
->vsi_stats
);
5074 pf
->vsi_stats
= NULL
;
5076 pf
->num_alloc_vsi
= 0;
5077 devm_kfree(ice_pf_to_dev(pf
), pf
->vsi
);
5081 static int ice_init_devlink(struct ice_pf
*pf
)
5085 err
= ice_devlink_register_params(pf
);
5089 ice_devlink_init_regions(pf
);
5090 ice_devlink_register(pf
);
5095 static void ice_deinit_devlink(struct ice_pf
*pf
)
5097 ice_devlink_unregister(pf
);
5098 ice_devlink_destroy_regions(pf
);
5099 ice_devlink_unregister_params(pf
);
5102 static int ice_init(struct ice_pf
*pf
)
5106 err
= ice_init_dev(pf
);
5110 err
= ice_alloc_vsis(pf
);
5112 goto err_alloc_vsis
;
5114 err
= ice_init_pf_sw(pf
);
5116 goto err_init_pf_sw
;
5118 ice_init_wakeup(pf
);
5120 err
= ice_init_link(pf
);
5124 err
= ice_send_version(pf
);
5128 ice_verify_cacheline_size(pf
);
5130 if (ice_is_safe_mode(pf
))
5131 ice_set_safe_mode_vlan_cfg(pf
);
5133 /* print PCI link speed and width */
5134 pcie_print_link_status(pf
->pdev
);
5136 /* ready to go, so clear down state bit */
5137 clear_bit(ICE_DOWN
, pf
->state
);
5138 clear_bit(ICE_SERVICE_DIS
, pf
->state
);
5140 /* since everything is good, start the service timer */
5141 mod_timer(&pf
->serv_tmr
, round_jiffies(jiffies
+ pf
->serv_tmr_period
));
5146 ice_deinit_pf_sw(pf
);
5148 ice_dealloc_vsis(pf
);
5154 static void ice_deinit(struct ice_pf
*pf
)
5156 set_bit(ICE_SERVICE_DIS
, pf
->state
);
5157 set_bit(ICE_DOWN
, pf
->state
);
5159 ice_deinit_pf_sw(pf
);
5160 ice_dealloc_vsis(pf
);
5165 * ice_load - load pf by init hw and starting VSI
5166 * @pf: pointer to the pf instance
5168 * This function has to be called under devl_lock.
5170 int ice_load(struct ice_pf
*pf
)
5172 struct ice_vsi
*vsi
;
5175 devl_assert_locked(priv_to_devlink(pf
));
5177 vsi
= ice_get_main_vsi(pf
);
5179 /* init channel list */
5180 INIT_LIST_HEAD(&vsi
->ch_list
);
5182 err
= ice_cfg_netdev(vsi
);
5186 /* Setup DCB netlink interface */
5187 ice_dcbnl_setup(vsi
);
5189 err
= ice_init_mac_fltr(pf
);
5191 goto err_init_mac_fltr
;
5193 err
= ice_devlink_create_pf_port(pf
);
5195 goto err_devlink_create_pf_port
;
5197 SET_NETDEV_DEVLINK_PORT(vsi
->netdev
, &pf
->devlink_port
);
5199 err
= ice_register_netdev(vsi
);
5201 goto err_register_netdev
;
5203 err
= ice_tc_indir_block_register(vsi
);
5205 goto err_tc_indir_block_register
;
5209 err
= ice_init_rdma(pf
);
5213 ice_init_features(pf
);
5214 ice_service_task_restart(pf
);
5216 clear_bit(ICE_DOWN
, pf
->state
);
5221 ice_tc_indir_block_unregister(vsi
);
5222 err_tc_indir_block_register
:
5223 ice_unregister_netdev(vsi
);
5224 err_register_netdev
:
5225 ice_devlink_destroy_pf_port(pf
);
5226 err_devlink_create_pf_port
:
5228 ice_decfg_netdev(vsi
);
5233 * ice_unload - unload pf by stopping VSI and deinit hw
5234 * @pf: pointer to the pf instance
5236 * This function has to be called under devl_lock.
5238 void ice_unload(struct ice_pf
*pf
)
5240 struct ice_vsi
*vsi
= ice_get_main_vsi(pf
);
5242 devl_assert_locked(priv_to_devlink(pf
));
5244 ice_deinit_features(pf
);
5245 ice_deinit_rdma(pf
);
5246 ice_tc_indir_block_unregister(vsi
);
5247 ice_unregister_netdev(vsi
);
5248 ice_devlink_destroy_pf_port(pf
);
5249 ice_decfg_netdev(vsi
);
5253 * ice_probe - Device initialization routine
5254 * @pdev: PCI device information struct
5255 * @ent: entry in ice_pci_tbl
5257 * Returns 0 on success, negative on failure
5260 ice_probe(struct pci_dev
*pdev
, const struct pci_device_id __always_unused
*ent
)
5262 struct device
*dev
= &pdev
->dev
;
5263 struct ice_adapter
*adapter
;
5268 if (pdev
->is_virtfn
) {
5269 dev_err(dev
, "can't probe a virtual function\n");
5273 /* when under a kdump kernel initiate a reset before enabling the
5274 * device in order to clear out any pending DMA transactions. These
5275 * transactions can cause some systems to machine check when doing
5276 * the pcim_enable_device() below.
5278 if (is_kdump_kernel()) {
5279 pci_save_state(pdev
);
5280 pci_clear_master(pdev
);
5281 err
= pcie_flr(pdev
);
5284 pci_restore_state(pdev
);
5287 /* this driver uses devres, see
5288 * Documentation/driver-api/driver-model/devres.rst
5290 err
= pcim_enable_device(pdev
);
5294 err
= pcim_iomap_regions(pdev
, BIT(ICE_BAR0
), dev_driver_string(dev
));
5296 dev_err(dev
, "BAR0 I/O map error %d\n", err
);
5300 pf
= ice_allocate_pf(dev
);
5304 /* initialize Auxiliary index to invalid value */
5307 /* set up for high or low DMA */
5308 err
= dma_set_mask_and_coherent(dev
, DMA_BIT_MASK(64));
5310 dev_err(dev
, "DMA configuration failed: 0x%x\n", err
);
5314 pci_set_master(pdev
);
5316 adapter
= ice_adapter_get(pdev
);
5317 if (IS_ERR(adapter
))
5318 return PTR_ERR(adapter
);
5321 pf
->adapter
= adapter
;
5322 pci_set_drvdata(pdev
, pf
);
5323 set_bit(ICE_DOWN
, pf
->state
);
5324 /* Disable service task until DOWN bit is cleared */
5325 set_bit(ICE_SERVICE_DIS
, pf
->state
);
5328 hw
->hw_addr
= pcim_iomap_table(pdev
)[ICE_BAR0
];
5329 pci_save_state(pdev
);
5332 hw
->port_info
= NULL
;
5333 hw
->vendor_id
= pdev
->vendor
;
5334 hw
->device_id
= pdev
->device
;
5335 pci_read_config_byte(pdev
, PCI_REVISION_ID
, &hw
->revision_id
);
5336 hw
->subsystem_vendor_id
= pdev
->subsystem_vendor
;
5337 hw
->subsystem_device_id
= pdev
->subsystem_device
;
5338 hw
->bus
.device
= PCI_SLOT(pdev
->devfn
);
5339 hw
->bus
.func
= PCI_FUNC(pdev
->devfn
);
5340 ice_set_ctrlq_len(hw
);
5342 pf
->msg_enable
= netif_msg_init(debug
, ICE_DFLT_NETIF_M
);
5344 #ifndef CONFIG_DYNAMIC_DEBUG
5346 hw
->debug_mask
= debug
;
5353 devl_lock(priv_to_devlink(pf
));
5358 err
= ice_init_devlink(pf
);
5360 goto err_init_devlink
;
5361 devl_unlock(priv_to_devlink(pf
));
5368 devl_unlock(priv_to_devlink(pf
));
5371 ice_adapter_put(pdev
);
5376 * ice_set_wake - enable or disable Wake on LAN
5377 * @pf: pointer to the PF struct
5379 * Simple helper for WoL control
5381 static void ice_set_wake(struct ice_pf
*pf
)
5383 struct ice_hw
*hw
= &pf
->hw
;
5384 bool wol
= pf
->wol_ena
;
5386 /* clear wake state, otherwise new wake events won't fire */
5387 wr32(hw
, PFPM_WUS
, U32_MAX
);
5389 /* enable / disable APM wake up, no RMW needed */
5390 wr32(hw
, PFPM_APM
, wol
? PFPM_APM_APME_M
: 0);
5392 /* set magic packet filter enabled */
5393 wr32(hw
, PFPM_WUFC
, wol
? PFPM_WUFC_MAG_M
: 0);
5397 * ice_setup_mc_magic_wake - setup device to wake on multicast magic packet
5398 * @pf: pointer to the PF struct
5400 * Issue firmware command to enable multicast magic wake, making
5401 * sure that any locally administered address (LAA) is used for
5402 * wake, and that PF reset doesn't undo the LAA.
5404 static void ice_setup_mc_magic_wake(struct ice_pf
*pf
)
5406 struct device
*dev
= ice_pf_to_dev(pf
);
5407 struct ice_hw
*hw
= &pf
->hw
;
5408 u8 mac_addr
[ETH_ALEN
];
5409 struct ice_vsi
*vsi
;
5416 vsi
= ice_get_main_vsi(pf
);
5420 /* Get current MAC address in case it's an LAA */
5422 ether_addr_copy(mac_addr
, vsi
->netdev
->dev_addr
);
5424 ether_addr_copy(mac_addr
, vsi
->port_info
->mac
.perm_addr
);
5426 flags
= ICE_AQC_MAN_MAC_WR_MC_MAG_EN
|
5427 ICE_AQC_MAN_MAC_UPDATE_LAA_WOL
|
5428 ICE_AQC_MAN_MAC_WR_WOL_LAA_PFR_KEEP
;
5430 status
= ice_aq_manage_mac_write(hw
, mac_addr
, flags
, NULL
);
5432 dev_err(dev
, "Failed to enable Multicast Magic Packet wake, err %d aq_err %s\n",
5433 status
, ice_aq_str(hw
->adminq
.sq_last_status
));
5437 * ice_remove - Device removal routine
5438 * @pdev: PCI device information struct
5440 static void ice_remove(struct pci_dev
*pdev
)
5442 struct ice_pf
*pf
= pci_get_drvdata(pdev
);
5445 for (i
= 0; i
< ICE_MAX_RESET_WAIT
; i
++) {
5446 if (!ice_is_reset_in_progress(pf
->state
))
5451 if (test_bit(ICE_FLAG_SRIOV_ENA
, pf
->flags
)) {
5452 set_bit(ICE_VF_RESETS_DISABLED
, pf
->state
);
5458 ice_service_task_stop(pf
);
5459 ice_aq_cancel_waiting_tasks(pf
);
5460 set_bit(ICE_DOWN
, pf
->state
);
5462 if (!ice_is_safe_mode(pf
))
5463 ice_remove_arfs(pf
);
5465 devl_lock(priv_to_devlink(pf
));
5466 ice_dealloc_all_dynamic_ports(pf
);
5467 ice_deinit_devlink(pf
);
5470 devl_unlock(priv_to_devlink(pf
));
5473 ice_vsi_release_all(pf
);
5475 ice_setup_mc_magic_wake(pf
);
5478 ice_adapter_put(pdev
);
5482 * ice_shutdown - PCI callback for shutting down device
5483 * @pdev: PCI device information struct
5485 static void ice_shutdown(struct pci_dev
*pdev
)
5487 struct ice_pf
*pf
= pci_get_drvdata(pdev
);
5491 if (system_state
== SYSTEM_POWER_OFF
) {
5492 pci_wake_from_d3(pdev
, pf
->wol_ena
);
5493 pci_set_power_state(pdev
, PCI_D3hot
);
5498 * ice_prepare_for_shutdown - prep for PCI shutdown
5499 * @pf: board private structure
5501 * Inform or close all dependent features in prep for PCI device shutdown
5503 static void ice_prepare_for_shutdown(struct ice_pf
*pf
)
5505 struct ice_hw
*hw
= &pf
->hw
;
5508 /* Notify VFs of impending reset */
5509 if (ice_check_sq_alive(hw
, &hw
->mailboxq
))
5510 ice_vc_notify_reset(pf
);
5512 dev_dbg(ice_pf_to_dev(pf
), "Tearing down internal switch for shutdown\n");
5514 /* disable the VSIs and their queues that are not already DOWN */
5515 ice_pf_dis_all_vsi(pf
, false);
5517 ice_for_each_vsi(pf
, v
)
5519 pf
->vsi
[v
]->vsi_num
= 0;
5521 ice_shutdown_all_ctrlq(hw
, true);
5525 * ice_reinit_interrupt_scheme - Reinitialize interrupt scheme
5526 * @pf: board private structure to reinitialize
5528 * This routine reinitialize interrupt scheme that was cleared during
5529 * power management suspend callback.
5531 * This should be called during resume routine to re-allocate the q_vectors
5532 * and reacquire interrupts.
5534 static int ice_reinit_interrupt_scheme(struct ice_pf
*pf
)
5536 struct device
*dev
= ice_pf_to_dev(pf
);
5539 /* Since we clear MSIX flag during suspend, we need to
5540 * set it back during resume...
5543 ret
= ice_init_interrupt_scheme(pf
);
5545 dev_err(dev
, "Failed to re-initialize interrupt %d\n", ret
);
5549 /* Remap vectors and rings, after successful re-init interrupts */
5550 ice_for_each_vsi(pf
, v
) {
5554 ret
= ice_vsi_alloc_q_vectors(pf
->vsi
[v
]);
5557 ice_vsi_map_rings_to_vectors(pf
->vsi
[v
]);
5559 ice_vsi_set_napi_queues(pf
->vsi
[v
]);
5563 ret
= ice_req_irq_msix_misc(pf
);
5565 dev_err(dev
, "Setting up misc vector failed after device suspend %d\n",
5576 ice_vsi_clear_napi_queues(pf
->vsi
[v
]);
5578 ice_vsi_free_q_vectors(pf
->vsi
[v
]);
5586 * @dev: generic device information structure
5588 * Power Management callback to quiesce the device and prepare
5589 * for D3 transition.
5591 static int ice_suspend(struct device
*dev
)
5593 struct pci_dev
*pdev
= to_pci_dev(dev
);
5597 pf
= pci_get_drvdata(pdev
);
5599 if (!ice_pf_state_is_nominal(pf
)) {
5600 dev_err(dev
, "Device is not ready, no need to suspend it\n");
5604 /* Stop watchdog tasks until resume completion.
5605 * Even though it is most likely that the service task is
5606 * disabled if the device is suspended or down, the service task's
5607 * state is controlled by a different state bit, and we should
5608 * store and honor whatever state that bit is in at this point.
5610 disabled
= ice_service_task_stop(pf
);
5612 ice_deinit_rdma(pf
);
5614 /* Already suspended?, then there is nothing to do */
5615 if (test_and_set_bit(ICE_SUSPENDED
, pf
->state
)) {
5617 ice_service_task_restart(pf
);
5621 if (test_bit(ICE_DOWN
, pf
->state
) ||
5622 ice_is_reset_in_progress(pf
->state
)) {
5623 dev_err(dev
, "can't suspend device in reset or already down\n");
5625 ice_service_task_restart(pf
);
5629 ice_setup_mc_magic_wake(pf
);
5631 ice_prepare_for_shutdown(pf
);
5635 /* Free vectors, clear the interrupt scheme and release IRQs
5636 * for proper hibernation, especially with large number of CPUs.
5637 * Otherwise hibernation might fail when mapping all the vectors back
5640 ice_free_irq_msix_misc(pf
);
5641 ice_for_each_vsi(pf
, v
) {
5645 ice_vsi_clear_napi_queues(pf
->vsi
[v
]);
5647 ice_vsi_free_q_vectors(pf
->vsi
[v
]);
5649 ice_clear_interrupt_scheme(pf
);
5651 pci_save_state(pdev
);
5652 pci_wake_from_d3(pdev
, pf
->wol_ena
);
5653 pci_set_power_state(pdev
, PCI_D3hot
);
5658 * ice_resume - PM callback for waking up from D3
5659 * @dev: generic device information structure
5661 static int ice_resume(struct device
*dev
)
5663 struct pci_dev
*pdev
= to_pci_dev(dev
);
5664 enum ice_reset_req reset_type
;
5669 pci_set_power_state(pdev
, PCI_D0
);
5670 pci_restore_state(pdev
);
5671 pci_save_state(pdev
);
5673 if (!pci_device_is_present(pdev
))
5676 ret
= pci_enable_device_mem(pdev
);
5678 dev_err(dev
, "Cannot enable device after suspend\n");
5682 pf
= pci_get_drvdata(pdev
);
5685 pf
->wakeup_reason
= rd32(hw
, PFPM_WUS
);
5686 ice_print_wake_reason(pf
);
5688 /* We cleared the interrupt scheme when we suspended, so we need to
5689 * restore it now to resume device functionality.
5691 ret
= ice_reinit_interrupt_scheme(pf
);
5693 dev_err(dev
, "Cannot restore interrupt scheme: %d\n", ret
);
5695 ret
= ice_init_rdma(pf
);
5697 dev_err(dev
, "Reinitialize RDMA during resume failed: %d\n",
5700 clear_bit(ICE_DOWN
, pf
->state
);
5701 /* Now perform PF reset and rebuild */
5702 reset_type
= ICE_RESET_PFR
;
5703 /* re-enable service task for reset, but allow reset to schedule it */
5704 clear_bit(ICE_SERVICE_DIS
, pf
->state
);
5706 if (ice_schedule_reset(pf
, reset_type
))
5707 dev_err(dev
, "Reset during resume failed.\n");
5709 clear_bit(ICE_SUSPENDED
, pf
->state
);
5710 ice_service_task_restart(pf
);
5712 /* Restart the service task */
5713 mod_timer(&pf
->serv_tmr
, round_jiffies(jiffies
+ pf
->serv_tmr_period
));
5719 * ice_pci_err_detected - warning that PCI error has been detected
5720 * @pdev: PCI device information struct
5721 * @err: the type of PCI error
5723 * Called to warn that something happened on the PCI bus and the error handling
5724 * is in progress. Allows the driver to gracefully prepare/handle PCI errors.
5726 static pci_ers_result_t
5727 ice_pci_err_detected(struct pci_dev
*pdev
, pci_channel_state_t err
)
5729 struct ice_pf
*pf
= pci_get_drvdata(pdev
);
5732 dev_err(&pdev
->dev
, "%s: unrecoverable device error %d\n",
5734 return PCI_ERS_RESULT_DISCONNECT
;
5737 if (!test_bit(ICE_SUSPENDED
, pf
->state
)) {
5738 ice_service_task_stop(pf
);
5740 if (!test_bit(ICE_PREPARED_FOR_RESET
, pf
->state
)) {
5741 set_bit(ICE_PFR_REQ
, pf
->state
);
5742 ice_prepare_for_reset(pf
, ICE_RESET_PFR
);
5746 return PCI_ERS_RESULT_NEED_RESET
;
5750 * ice_pci_err_slot_reset - a PCI slot reset has just happened
5751 * @pdev: PCI device information struct
5753 * Called to determine if the driver can recover from the PCI slot reset by
5754 * using a register read to determine if the device is recoverable.
5756 static pci_ers_result_t
ice_pci_err_slot_reset(struct pci_dev
*pdev
)
5758 struct ice_pf
*pf
= pci_get_drvdata(pdev
);
5759 pci_ers_result_t result
;
5763 err
= pci_enable_device_mem(pdev
);
5765 dev_err(&pdev
->dev
, "Cannot re-enable PCI device after reset, error %d\n",
5767 result
= PCI_ERS_RESULT_DISCONNECT
;
5769 pci_set_master(pdev
);
5770 pci_restore_state(pdev
);
5771 pci_save_state(pdev
);
5772 pci_wake_from_d3(pdev
, false);
5774 /* Check for life */
5775 reg
= rd32(&pf
->hw
, GLGEN_RTRIG
);
5777 result
= PCI_ERS_RESULT_RECOVERED
;
5779 result
= PCI_ERS_RESULT_DISCONNECT
;
5786 * ice_pci_err_resume - restart operations after PCI error recovery
5787 * @pdev: PCI device information struct
5789 * Called to allow the driver to bring things back up after PCI error and/or
5790 * reset recovery have finished
5792 static void ice_pci_err_resume(struct pci_dev
*pdev
)
5794 struct ice_pf
*pf
= pci_get_drvdata(pdev
);
5797 dev_err(&pdev
->dev
, "%s failed, device is unrecoverable\n",
5802 if (test_bit(ICE_SUSPENDED
, pf
->state
)) {
5803 dev_dbg(&pdev
->dev
, "%s failed to resume normal operations!\n",
5808 ice_restore_all_vfs_msi_state(pf
);
5810 ice_do_reset(pf
, ICE_RESET_PFR
);
5811 ice_service_task_restart(pf
);
5812 mod_timer(&pf
->serv_tmr
, round_jiffies(jiffies
+ pf
->serv_tmr_period
));
5816 * ice_pci_err_reset_prepare - prepare device driver for PCI reset
5817 * @pdev: PCI device information struct
5819 static void ice_pci_err_reset_prepare(struct pci_dev
*pdev
)
5821 struct ice_pf
*pf
= pci_get_drvdata(pdev
);
5823 if (!test_bit(ICE_SUSPENDED
, pf
->state
)) {
5824 ice_service_task_stop(pf
);
5826 if (!test_bit(ICE_PREPARED_FOR_RESET
, pf
->state
)) {
5827 set_bit(ICE_PFR_REQ
, pf
->state
);
5828 ice_prepare_for_reset(pf
, ICE_RESET_PFR
);
5834 * ice_pci_err_reset_done - PCI reset done, device driver reset can begin
5835 * @pdev: PCI device information struct
5837 static void ice_pci_err_reset_done(struct pci_dev
*pdev
)
5839 ice_pci_err_resume(pdev
);
5842 /* ice_pci_tbl - PCI Device ID Table
5844 * Wildcard entries (PCI_ANY_ID) should come last
5845 * Last entry must be all 0s
5847 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
5848 * Class, Class Mask, private data (not used) }
5850 static const struct pci_device_id ice_pci_tbl
[] = {
5851 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E810C_BACKPLANE
) },
5852 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E810C_QSFP
) },
5853 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E810C_SFP
) },
5854 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E810_XXV_BACKPLANE
) },
5855 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E810_XXV_QSFP
) },
5856 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E810_XXV_SFP
) },
5857 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E823C_BACKPLANE
) },
5858 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E823C_QSFP
) },
5859 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E823C_SFP
) },
5860 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E823C_10G_BASE_T
) },
5861 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E823C_SGMII
) },
5862 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E822C_BACKPLANE
) },
5863 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E822C_QSFP
) },
5864 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E822C_SFP
) },
5865 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E822C_10G_BASE_T
) },
5866 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E822C_SGMII
) },
5867 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E822L_BACKPLANE
) },
5868 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E822L_SFP
) },
5869 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E822L_10G_BASE_T
) },
5870 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E822L_SGMII
) },
5871 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E823L_BACKPLANE
) },
5872 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E823L_SFP
) },
5873 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E823L_10G_BASE_T
) },
5874 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E823L_1GBE
) },
5875 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E823L_QSFP
) },
5876 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E822_SI_DFLT
) },
5877 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E825C_BACKPLANE
), },
5878 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E825C_QSFP
), },
5879 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E825C_SFP
), },
5880 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E825C_SGMII
), },
5881 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E830CC_BACKPLANE
) },
5882 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E830CC_QSFP56
) },
5883 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E830CC_SFP
) },
5884 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E830CC_SFP_DD
) },
5885 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E830C_BACKPLANE
), },
5886 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E830_XXV_BACKPLANE
), },
5887 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E830C_QSFP
), },
5888 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E830_XXV_QSFP
), },
5889 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E830C_SFP
), },
5890 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E830_XXV_SFP
), },
5891 /* required last entry */
5894 MODULE_DEVICE_TABLE(pci
, ice_pci_tbl
);
5896 static DEFINE_SIMPLE_DEV_PM_OPS(ice_pm_ops
, ice_suspend
, ice_resume
);
5898 static const struct pci_error_handlers ice_pci_err_handler
= {
5899 .error_detected
= ice_pci_err_detected
,
5900 .slot_reset
= ice_pci_err_slot_reset
,
5901 .reset_prepare
= ice_pci_err_reset_prepare
,
5902 .reset_done
= ice_pci_err_reset_done
,
5903 .resume
= ice_pci_err_resume
5906 static struct pci_driver ice_driver
= {
5907 .name
= KBUILD_MODNAME
,
5908 .id_table
= ice_pci_tbl
,
5910 .remove
= ice_remove
,
5911 .driver
.pm
= pm_sleep_ptr(&ice_pm_ops
),
5912 .shutdown
= ice_shutdown
,
5913 .sriov_configure
= ice_sriov_configure
,
5914 .sriov_get_vf_total_msix
= ice_sriov_get_vf_total_msix
,
5915 .sriov_set_msix_vec_count
= ice_sriov_set_msix_vec_count
,
5916 .err_handler
= &ice_pci_err_handler
5920 * ice_module_init - Driver registration routine
5922 * ice_module_init is the first routine called when the driver is
5923 * loaded. All it does is register with the PCI subsystem.
5925 static int __init
ice_module_init(void)
5927 int status
= -ENOMEM
;
5929 pr_info("%s\n", ice_driver_string
);
5930 pr_info("%s\n", ice_copyright
);
5932 ice_adv_lnk_speed_maps_init();
5934 ice_wq
= alloc_workqueue("%s", WQ_UNBOUND
, 0, KBUILD_MODNAME
);
5936 pr_err("Failed to create workqueue\n");
5940 ice_lag_wq
= alloc_ordered_workqueue("ice_lag_wq", 0);
5942 pr_err("Failed to create LAG workqueue\n");
5948 status
= pci_register_driver(&ice_driver
);
5950 pr_err("failed to register PCI driver, err %d\n", status
);
5951 goto err_dest_lag_wq
;
5954 status
= ice_sf_driver_register();
5956 pr_err("Failed to register SF driver, err %d\n", status
);
5963 pci_unregister_driver(&ice_driver
);
5965 destroy_workqueue(ice_lag_wq
);
5968 destroy_workqueue(ice_wq
);
5971 module_init(ice_module_init
);
5974 * ice_module_exit - Driver exit cleanup routine
5976 * ice_module_exit is called just before the driver is removed
5979 static void __exit
ice_module_exit(void)
5981 ice_sf_driver_unregister();
5982 pci_unregister_driver(&ice_driver
);
5984 destroy_workqueue(ice_wq
);
5985 destroy_workqueue(ice_lag_wq
);
5986 pr_info("module unloaded\n");
5988 module_exit(ice_module_exit
);
5991 * ice_set_mac_address - NDO callback to set MAC address
5992 * @netdev: network interface device structure
5993 * @pi: pointer to an address structure
5995 * Returns 0 on success, negative on failure
5997 static int ice_set_mac_address(struct net_device
*netdev
, void *pi
)
5999 struct ice_netdev_priv
*np
= netdev_priv(netdev
);
6000 struct ice_vsi
*vsi
= np
->vsi
;
6001 struct ice_pf
*pf
= vsi
->back
;
6002 struct ice_hw
*hw
= &pf
->hw
;
6003 struct sockaddr
*addr
= pi
;
6004 u8 old_mac
[ETH_ALEN
];
6009 mac
= (u8
*)addr
->sa_data
;
6011 if (!is_valid_ether_addr(mac
))
6012 return -EADDRNOTAVAIL
;
6014 if (test_bit(ICE_DOWN
, pf
->state
) ||
6015 ice_is_reset_in_progress(pf
->state
)) {
6016 netdev_err(netdev
, "can't set mac %pM. device not ready\n",
6021 if (ice_chnl_dmac_fltr_cnt(pf
)) {
6022 netdev_err(netdev
, "can't set mac %pM. Device has tc-flower filters, delete all of them and try again\n",
6027 netif_addr_lock_bh(netdev
);
6028 ether_addr_copy(old_mac
, netdev
->dev_addr
);
6029 /* change the netdev's MAC address */
6030 eth_hw_addr_set(netdev
, mac
);
6031 netif_addr_unlock_bh(netdev
);
6033 /* Clean up old MAC filter. Not an error if old filter doesn't exist */
6034 err
= ice_fltr_remove_mac(vsi
, old_mac
, ICE_FWD_TO_VSI
);
6035 if (err
&& err
!= -ENOENT
) {
6036 err
= -EADDRNOTAVAIL
;
6037 goto err_update_filters
;
6040 /* Add filter for new MAC. If filter exists, return success */
6041 err
= ice_fltr_add_mac(vsi
, mac
, ICE_FWD_TO_VSI
);
6042 if (err
== -EEXIST
) {
6043 /* Although this MAC filter is already present in hardware it's
6044 * possible in some cases (e.g. bonding) that dev_addr was
6045 * modified outside of the driver and needs to be restored back
6048 netdev_dbg(netdev
, "filter for MAC %pM already exists\n", mac
);
6052 /* error if the new filter addition failed */
6053 err
= -EADDRNOTAVAIL
;
6058 netdev_err(netdev
, "can't set MAC %pM. filter update failed\n",
6060 netif_addr_lock_bh(netdev
);
6061 eth_hw_addr_set(netdev
, old_mac
);
6062 netif_addr_unlock_bh(netdev
);
6066 netdev_dbg(vsi
->netdev
, "updated MAC address to %pM\n",
6069 /* write new MAC address to the firmware */
6070 flags
= ICE_AQC_MAN_MAC_UPDATE_LAA_WOL
;
6071 err
= ice_aq_manage_mac_write(hw
, mac
, flags
, NULL
);
6073 netdev_err(netdev
, "can't set MAC %pM. write to firmware failed error %d\n",
6080 * ice_set_rx_mode - NDO callback to set the netdev filters
6081 * @netdev: network interface device structure
6083 static void ice_set_rx_mode(struct net_device
*netdev
)
6085 struct ice_netdev_priv
*np
= netdev_priv(netdev
);
6086 struct ice_vsi
*vsi
= np
->vsi
;
6088 if (!vsi
|| ice_is_switchdev_running(vsi
->back
))
6091 /* Set the flags to synchronize filters
6092 * ndo_set_rx_mode may be triggered even without a change in netdev
6095 set_bit(ICE_VSI_UMAC_FLTR_CHANGED
, vsi
->state
);
6096 set_bit(ICE_VSI_MMAC_FLTR_CHANGED
, vsi
->state
);
6097 set_bit(ICE_FLAG_FLTR_SYNC
, vsi
->back
->flags
);
6099 /* schedule our worker thread which will take care of
6100 * applying the new filter changes
6102 ice_service_task_schedule(vsi
->back
);
6106 * ice_set_tx_maxrate - NDO callback to set the maximum per-queue bitrate
6107 * @netdev: network interface device structure
6108 * @queue_index: Queue ID
6109 * @maxrate: maximum bandwidth in Mbps
6112 ice_set_tx_maxrate(struct net_device
*netdev
, int queue_index
, u32 maxrate
)
6114 struct ice_netdev_priv
*np
= netdev_priv(netdev
);
6115 struct ice_vsi
*vsi
= np
->vsi
;
6120 /* Validate maxrate requested is within permitted range */
6121 if (maxrate
&& (maxrate
> (ICE_SCHED_MAX_BW
/ 1000))) {
6122 netdev_err(netdev
, "Invalid max rate %d specified for the queue %d\n",
6123 maxrate
, queue_index
);
6127 q_handle
= vsi
->tx_rings
[queue_index
]->q_handle
;
6128 tc
= ice_dcb_get_tc(vsi
, queue_index
);
6130 vsi
= ice_locate_vsi_using_queue(vsi
, queue_index
);
6132 netdev_err(netdev
, "Invalid VSI for given queue %d\n",
6137 /* Set BW back to default, when user set maxrate to 0 */
6139 status
= ice_cfg_q_bw_dflt_lmt(vsi
->port_info
, vsi
->idx
, tc
,
6140 q_handle
, ICE_MAX_BW
);
6142 status
= ice_cfg_q_bw_lmt(vsi
->port_info
, vsi
->idx
, tc
,
6143 q_handle
, ICE_MAX_BW
, maxrate
* 1000);
6145 netdev_err(netdev
, "Unable to set Tx max rate, error %d\n",
6152 * ice_fdb_add - add an entry to the hardware database
6153 * @ndm: the input from the stack
6154 * @tb: pointer to array of nladdr (unused)
6155 * @dev: the net device pointer
6156 * @addr: the MAC address entry being added
6158 * @flags: instructions from stack about fdb operation
6159 * @notified: whether notification was emitted
6160 * @extack: netlink extended ack
6163 ice_fdb_add(struct ndmsg
*ndm
, struct nlattr __always_unused
*tb
[],
6164 struct net_device
*dev
, const unsigned char *addr
, u16 vid
,
6165 u16 flags
, bool *notified
,
6166 struct netlink_ext_ack __always_unused
*extack
)
6171 netdev_err(dev
, "VLANs aren't supported yet for dev_uc|mc_add()\n");
6174 if (ndm
->ndm_state
&& !(ndm
->ndm_state
& NUD_PERMANENT
)) {
6175 netdev_err(dev
, "FDB only supports static addresses\n");
6179 if (is_unicast_ether_addr(addr
) || is_link_local_ether_addr(addr
))
6180 err
= dev_uc_add_excl(dev
, addr
);
6181 else if (is_multicast_ether_addr(addr
))
6182 err
= dev_mc_add_excl(dev
, addr
);
6186 /* Only return duplicate errors if NLM_F_EXCL is set */
6187 if (err
== -EEXIST
&& !(flags
& NLM_F_EXCL
))
6194 * ice_fdb_del - delete an entry from the hardware database
6195 * @ndm: the input from the stack
6196 * @tb: pointer to array of nladdr (unused)
6197 * @dev: the net device pointer
6198 * @addr: the MAC address entry being added
6200 * @notified: whether notification was emitted
6201 * @extack: netlink extended ack
6204 ice_fdb_del(struct ndmsg
*ndm
, __always_unused
struct nlattr
*tb
[],
6205 struct net_device
*dev
, const unsigned char *addr
,
6206 __always_unused u16 vid
, bool *notified
,
6207 struct netlink_ext_ack
*extack
)
6211 if (ndm
->ndm_state
& NUD_PERMANENT
) {
6212 netdev_err(dev
, "FDB only supports static addresses\n");
6216 if (is_unicast_ether_addr(addr
))
6217 err
= dev_uc_del(dev
, addr
);
6218 else if (is_multicast_ether_addr(addr
))
6219 err
= dev_mc_del(dev
, addr
);
6226 #define NETIF_VLAN_OFFLOAD_FEATURES (NETIF_F_HW_VLAN_CTAG_RX | \
6227 NETIF_F_HW_VLAN_CTAG_TX | \
6228 NETIF_F_HW_VLAN_STAG_RX | \
6229 NETIF_F_HW_VLAN_STAG_TX)
6231 #define NETIF_VLAN_STRIPPING_FEATURES (NETIF_F_HW_VLAN_CTAG_RX | \
6232 NETIF_F_HW_VLAN_STAG_RX)
6234 #define NETIF_VLAN_FILTERING_FEATURES (NETIF_F_HW_VLAN_CTAG_FILTER | \
6235 NETIF_F_HW_VLAN_STAG_FILTER)
6238 * ice_fix_features - fix the netdev features flags based on device limitations
6239 * @netdev: ptr to the netdev that flags are being fixed on
6240 * @features: features that need to be checked and possibly fixed
6242 * Make sure any fixups are made to features in this callback. This enables the
6243 * driver to not have to check unsupported configurations throughout the driver
6244 * because that's the responsiblity of this callback.
6246 * Single VLAN Mode (SVM) Supported Features:
6247 * NETIF_F_HW_VLAN_CTAG_FILTER
6248 * NETIF_F_HW_VLAN_CTAG_RX
6249 * NETIF_F_HW_VLAN_CTAG_TX
6251 * Double VLAN Mode (DVM) Supported Features:
6252 * NETIF_F_HW_VLAN_CTAG_FILTER
6253 * NETIF_F_HW_VLAN_CTAG_RX
6254 * NETIF_F_HW_VLAN_CTAG_TX
6256 * NETIF_F_HW_VLAN_STAG_FILTER
6257 * NETIF_HW_VLAN_STAG_RX
6258 * NETIF_HW_VLAN_STAG_TX
6260 * Features that need fixing:
6261 * Cannot simultaneously enable CTAG and STAG stripping and/or insertion.
6262 * These are mutually exlusive as the VSI context cannot support multiple
6263 * VLAN ethertypes simultaneously for stripping and/or insertion. If this
6264 * is not done, then default to clearing the requested STAG offload
6267 * All supported filtering has to be enabled or disabled together. For
6268 * example, in DVM, CTAG and STAG filtering have to be enabled and disabled
6269 * together. If this is not done, then default to VLAN filtering disabled.
6270 * These are mutually exclusive as there is currently no way to
6271 * enable/disable VLAN filtering based on VLAN ethertype when using VLAN
6274 static netdev_features_t
6275 ice_fix_features(struct net_device
*netdev
, netdev_features_t features
)
6277 struct ice_netdev_priv
*np
= netdev_priv(netdev
);
6278 netdev_features_t req_vlan_fltr
, cur_vlan_fltr
;
6279 bool cur_ctag
, cur_stag
, req_ctag
, req_stag
;
6281 cur_vlan_fltr
= netdev
->features
& NETIF_VLAN_FILTERING_FEATURES
;
6282 cur_ctag
= cur_vlan_fltr
& NETIF_F_HW_VLAN_CTAG_FILTER
;
6283 cur_stag
= cur_vlan_fltr
& NETIF_F_HW_VLAN_STAG_FILTER
;
6285 req_vlan_fltr
= features
& NETIF_VLAN_FILTERING_FEATURES
;
6286 req_ctag
= req_vlan_fltr
& NETIF_F_HW_VLAN_CTAG_FILTER
;
6287 req_stag
= req_vlan_fltr
& NETIF_F_HW_VLAN_STAG_FILTER
;
6289 if (req_vlan_fltr
!= cur_vlan_fltr
) {
6290 if (ice_is_dvm_ena(&np
->vsi
->back
->hw
)) {
6291 if (req_ctag
&& req_stag
) {
6292 features
|= NETIF_VLAN_FILTERING_FEATURES
;
6293 } else if (!req_ctag
&& !req_stag
) {
6294 features
&= ~NETIF_VLAN_FILTERING_FEATURES
;
6295 } else if ((!cur_ctag
&& req_ctag
&& !cur_stag
) ||
6296 (!cur_stag
&& req_stag
&& !cur_ctag
)) {
6297 features
|= NETIF_VLAN_FILTERING_FEATURES
;
6298 netdev_warn(netdev
, "802.1Q and 802.1ad VLAN filtering must be either both on or both off. VLAN filtering has been enabled for both types.\n");
6299 } else if ((cur_ctag
&& !req_ctag
&& cur_stag
) ||
6300 (cur_stag
&& !req_stag
&& cur_ctag
)) {
6301 features
&= ~NETIF_VLAN_FILTERING_FEATURES
;
6302 netdev_warn(netdev
, "802.1Q and 802.1ad VLAN filtering must be either both on or both off. VLAN filtering has been disabled for both types.\n");
6305 if (req_vlan_fltr
& NETIF_F_HW_VLAN_STAG_FILTER
)
6306 netdev_warn(netdev
, "cannot support requested 802.1ad filtering setting in SVM mode\n");
6308 if (req_vlan_fltr
& NETIF_F_HW_VLAN_CTAG_FILTER
)
6309 features
|= NETIF_F_HW_VLAN_CTAG_FILTER
;
6313 if ((features
& (NETIF_F_HW_VLAN_CTAG_RX
| NETIF_F_HW_VLAN_CTAG_TX
)) &&
6314 (features
& (NETIF_F_HW_VLAN_STAG_RX
| NETIF_F_HW_VLAN_STAG_TX
))) {
6315 netdev_warn(netdev
, "cannot support CTAG and STAG VLAN stripping and/or insertion simultaneously since CTAG and STAG offloads are mutually exclusive, clearing STAG offload settings\n");
6316 features
&= ~(NETIF_F_HW_VLAN_STAG_RX
|
6317 NETIF_F_HW_VLAN_STAG_TX
);
6320 if (!(netdev
->features
& NETIF_F_RXFCS
) &&
6321 (features
& NETIF_F_RXFCS
) &&
6322 (features
& NETIF_VLAN_STRIPPING_FEATURES
) &&
6323 !ice_vsi_has_non_zero_vlans(np
->vsi
)) {
6324 netdev_warn(netdev
, "Disabling VLAN stripping as FCS/CRC stripping is also disabled and there is no VLAN configured\n");
6325 features
&= ~NETIF_VLAN_STRIPPING_FEATURES
;
6332 * ice_set_rx_rings_vlan_proto - update rings with new stripped VLAN proto
6334 * @vlan_ethertype: VLAN ethertype (802.1Q or 802.1ad) in network byte order
6336 * Store current stripped VLAN proto in ring packet context,
6337 * so it can be accessed more efficiently by packet processing code.
6340 ice_set_rx_rings_vlan_proto(struct ice_vsi
*vsi
, __be16 vlan_ethertype
)
6344 ice_for_each_alloc_rxq(vsi
, i
)
6345 vsi
->rx_rings
[i
]->pkt_ctx
.vlan_proto
= vlan_ethertype
;
6349 * ice_set_vlan_offload_features - set VLAN offload features for the PF VSI
6351 * @features: features used to determine VLAN offload settings
6353 * First, determine the vlan_ethertype based on the VLAN offload bits in
6354 * features. Then determine if stripping and insertion should be enabled or
6355 * disabled. Finally enable or disable VLAN stripping and insertion.
6358 ice_set_vlan_offload_features(struct ice_vsi
*vsi
, netdev_features_t features
)
6360 bool enable_stripping
= true, enable_insertion
= true;
6361 struct ice_vsi_vlan_ops
*vlan_ops
;
6362 int strip_err
= 0, insert_err
= 0;
6363 u16 vlan_ethertype
= 0;
6365 vlan_ops
= ice_get_compat_vsi_vlan_ops(vsi
);
6367 if (features
& (NETIF_F_HW_VLAN_STAG_RX
| NETIF_F_HW_VLAN_STAG_TX
))
6368 vlan_ethertype
= ETH_P_8021AD
;
6369 else if (features
& (NETIF_F_HW_VLAN_CTAG_RX
| NETIF_F_HW_VLAN_CTAG_TX
))
6370 vlan_ethertype
= ETH_P_8021Q
;
6372 if (!(features
& (NETIF_F_HW_VLAN_STAG_RX
| NETIF_F_HW_VLAN_CTAG_RX
)))
6373 enable_stripping
= false;
6374 if (!(features
& (NETIF_F_HW_VLAN_STAG_TX
| NETIF_F_HW_VLAN_CTAG_TX
)))
6375 enable_insertion
= false;
6377 if (enable_stripping
)
6378 strip_err
= vlan_ops
->ena_stripping(vsi
, vlan_ethertype
);
6380 strip_err
= vlan_ops
->dis_stripping(vsi
);
6382 if (enable_insertion
)
6383 insert_err
= vlan_ops
->ena_insertion(vsi
, vlan_ethertype
);
6385 insert_err
= vlan_ops
->dis_insertion(vsi
);
6387 if (strip_err
|| insert_err
)
6390 ice_set_rx_rings_vlan_proto(vsi
, enable_stripping
?
6391 htons(vlan_ethertype
) : 0);
6397 * ice_set_vlan_filtering_features - set VLAN filtering features for the PF VSI
6399 * @features: features used to determine VLAN filtering settings
6401 * Enable or disable Rx VLAN filtering based on the VLAN filtering bits in the
6405 ice_set_vlan_filtering_features(struct ice_vsi
*vsi
, netdev_features_t features
)
6407 struct ice_vsi_vlan_ops
*vlan_ops
= ice_get_compat_vsi_vlan_ops(vsi
);
6410 /* support Single VLAN Mode (SVM) and Double VLAN Mode (DVM) by checking
6411 * if either bit is set. In switchdev mode Rx filtering should never be
6415 (NETIF_F_HW_VLAN_CTAG_FILTER
| NETIF_F_HW_VLAN_STAG_FILTER
)) &&
6416 !ice_is_eswitch_mode_switchdev(vsi
->back
))
6417 err
= vlan_ops
->ena_rx_filtering(vsi
);
6419 err
= vlan_ops
->dis_rx_filtering(vsi
);
6425 * ice_set_vlan_features - set VLAN settings based on suggested feature set
6426 * @netdev: ptr to the netdev being adjusted
6427 * @features: the feature set that the stack is suggesting
6429 * Only update VLAN settings if the requested_vlan_features are different than
6430 * the current_vlan_features.
6433 ice_set_vlan_features(struct net_device
*netdev
, netdev_features_t features
)
6435 netdev_features_t current_vlan_features
, requested_vlan_features
;
6436 struct ice_netdev_priv
*np
= netdev_priv(netdev
);
6437 struct ice_vsi
*vsi
= np
->vsi
;
6440 current_vlan_features
= netdev
->features
& NETIF_VLAN_OFFLOAD_FEATURES
;
6441 requested_vlan_features
= features
& NETIF_VLAN_OFFLOAD_FEATURES
;
6442 if (current_vlan_features
^ requested_vlan_features
) {
6443 if ((features
& NETIF_F_RXFCS
) &&
6444 (features
& NETIF_VLAN_STRIPPING_FEATURES
)) {
6445 dev_err(ice_pf_to_dev(vsi
->back
),
6446 "To enable VLAN stripping, you must first enable FCS/CRC stripping\n");
6450 err
= ice_set_vlan_offload_features(vsi
, features
);
6455 current_vlan_features
= netdev
->features
&
6456 NETIF_VLAN_FILTERING_FEATURES
;
6457 requested_vlan_features
= features
& NETIF_VLAN_FILTERING_FEATURES
;
6458 if (current_vlan_features
^ requested_vlan_features
) {
6459 err
= ice_set_vlan_filtering_features(vsi
, features
);
6468 * ice_set_loopback - turn on/off loopback mode on underlying PF
6470 * @ena: flag to indicate the on/off setting
6472 static int ice_set_loopback(struct ice_vsi
*vsi
, bool ena
)
6474 bool if_running
= netif_running(vsi
->netdev
);
6477 if (if_running
&& !test_and_set_bit(ICE_VSI_DOWN
, vsi
->state
)) {
6478 ret
= ice_down(vsi
);
6480 netdev_err(vsi
->netdev
, "Preparing device to toggle loopback failed\n");
6484 ret
= ice_aq_set_mac_loopback(&vsi
->back
->hw
, ena
, NULL
);
6486 netdev_err(vsi
->netdev
, "Failed to toggle loopback state\n");
6494 * ice_set_features - set the netdev feature flags
6495 * @netdev: ptr to the netdev being adjusted
6496 * @features: the feature set that the stack is suggesting
6499 ice_set_features(struct net_device
*netdev
, netdev_features_t features
)
6501 netdev_features_t changed
= netdev
->features
^ features
;
6502 struct ice_netdev_priv
*np
= netdev_priv(netdev
);
6503 struct ice_vsi
*vsi
= np
->vsi
;
6504 struct ice_pf
*pf
= vsi
->back
;
6507 /* Don't set any netdev advanced features with device in Safe Mode */
6508 if (ice_is_safe_mode(pf
)) {
6509 dev_err(ice_pf_to_dev(pf
),
6510 "Device is in Safe Mode - not enabling advanced netdev features\n");
6514 /* Do not change setting during reset */
6515 if (ice_is_reset_in_progress(pf
->state
)) {
6516 dev_err(ice_pf_to_dev(pf
),
6517 "Device is resetting, changing advanced netdev features temporarily unavailable.\n");
6521 /* Multiple features can be changed in one call so keep features in
6522 * separate if/else statements to guarantee each feature is checked
6524 if (changed
& NETIF_F_RXHASH
)
6525 ice_vsi_manage_rss_lut(vsi
, !!(features
& NETIF_F_RXHASH
));
6527 ret
= ice_set_vlan_features(netdev
, features
);
6531 /* Turn on receive of FCS aka CRC, and after setting this
6532 * flag the packet data will have the 4 byte CRC appended
6534 if (changed
& NETIF_F_RXFCS
) {
6535 if ((features
& NETIF_F_RXFCS
) &&
6536 (features
& NETIF_VLAN_STRIPPING_FEATURES
)) {
6537 dev_err(ice_pf_to_dev(vsi
->back
),
6538 "To disable FCS/CRC stripping, you must first disable VLAN stripping\n");
6542 ice_vsi_cfg_crc_strip(vsi
, !!(features
& NETIF_F_RXFCS
));
6543 ret
= ice_down_up(vsi
);
6548 if (changed
& NETIF_F_NTUPLE
) {
6549 bool ena
= !!(features
& NETIF_F_NTUPLE
);
6551 ice_vsi_manage_fdir(vsi
, ena
);
6552 ena
? ice_init_arfs(vsi
) : ice_clear_arfs(vsi
);
6555 /* don't turn off hw_tc_offload when ADQ is already enabled */
6556 if (!(features
& NETIF_F_HW_TC
) && ice_is_adq_active(pf
)) {
6557 dev_err(ice_pf_to_dev(pf
), "ADQ is active, can't turn hw_tc_offload off\n");
6561 if (changed
& NETIF_F_HW_TC
) {
6562 bool ena
= !!(features
& NETIF_F_HW_TC
);
6564 assign_bit(ICE_FLAG_CLS_FLOWER
, pf
->flags
, ena
);
6567 if (changed
& NETIF_F_LOOPBACK
)
6568 ret
= ice_set_loopback(vsi
, !!(features
& NETIF_F_LOOPBACK
));
6574 * ice_vsi_vlan_setup - Setup VLAN offload properties on a PF VSI
6575 * @vsi: VSI to setup VLAN properties for
6577 static int ice_vsi_vlan_setup(struct ice_vsi
*vsi
)
6581 err
= ice_set_vlan_offload_features(vsi
, vsi
->netdev
->features
);
6585 err
= ice_set_vlan_filtering_features(vsi
, vsi
->netdev
->features
);
6589 return ice_vsi_add_vlan_zero(vsi
);
6593 * ice_vsi_cfg_lan - Setup the VSI lan related config
6594 * @vsi: the VSI being configured
6596 * Return 0 on success and negative value on error
6598 int ice_vsi_cfg_lan(struct ice_vsi
*vsi
)
6602 if (vsi
->netdev
&& vsi
->type
== ICE_VSI_PF
) {
6603 ice_set_rx_mode(vsi
->netdev
);
6605 err
= ice_vsi_vlan_setup(vsi
);
6609 ice_vsi_cfg_dcb_rings(vsi
);
6611 err
= ice_vsi_cfg_lan_txqs(vsi
);
6612 if (!err
&& ice_is_xdp_ena_vsi(vsi
))
6613 err
= ice_vsi_cfg_xdp_txqs(vsi
);
6615 err
= ice_vsi_cfg_rxqs(vsi
);
6620 /* THEORY OF MODERATION:
6621 * The ice driver hardware works differently than the hardware that DIMLIB was
6622 * originally made for. ice hardware doesn't have packet count limits that
6623 * can trigger an interrupt, but it *does* have interrupt rate limit support,
6624 * which is hard-coded to a limit of 250,000 ints/second.
6625 * If not using dynamic moderation, the INTRL value can be modified
6626 * by ethtool rx-usecs-high.
6629 /* the throttle rate for interrupts, basically worst case delay before
6630 * an initial interrupt fires, value is stored in microseconds.
6635 /* Make a different profile for Rx that doesn't allow quite so aggressive
6636 * moderation at the high end (it maxes out at 126us or about 8k interrupts a
6639 static const struct ice_dim rx_profile
[] = {
6640 {2}, /* 500,000 ints/s, capped at 250K by INTRL */
6641 {8}, /* 125,000 ints/s */
6642 {16}, /* 62,500 ints/s */
6643 {62}, /* 16,129 ints/s */
6644 {126} /* 7,936 ints/s */
6647 /* The transmit profile, which has the same sorts of values
6648 * as the previous struct
6650 static const struct ice_dim tx_profile
[] = {
6651 {2}, /* 500,000 ints/s, capped at 250K by INTRL */
6652 {8}, /* 125,000 ints/s */
6653 {40}, /* 16,125 ints/s */
6654 {128}, /* 7,812 ints/s */
6655 {256} /* 3,906 ints/s */
6658 static void ice_tx_dim_work(struct work_struct
*work
)
6660 struct ice_ring_container
*rc
;
6664 dim
= container_of(work
, struct dim
, work
);
6667 WARN_ON(dim
->profile_ix
>= ARRAY_SIZE(tx_profile
));
6669 /* look up the values in our local table */
6670 itr
= tx_profile
[dim
->profile_ix
].itr
;
6672 ice_trace(tx_dim_work
, container_of(rc
, struct ice_q_vector
, tx
), dim
);
6673 ice_write_itr(rc
, itr
);
6675 dim
->state
= DIM_START_MEASURE
;
6678 static void ice_rx_dim_work(struct work_struct
*work
)
6680 struct ice_ring_container
*rc
;
6684 dim
= container_of(work
, struct dim
, work
);
6687 WARN_ON(dim
->profile_ix
>= ARRAY_SIZE(rx_profile
));
6689 /* look up the values in our local table */
6690 itr
= rx_profile
[dim
->profile_ix
].itr
;
6692 ice_trace(rx_dim_work
, container_of(rc
, struct ice_q_vector
, rx
), dim
);
6693 ice_write_itr(rc
, itr
);
6695 dim
->state
= DIM_START_MEASURE
;
6698 #define ICE_DIM_DEFAULT_PROFILE_IX 1
6701 * ice_init_moderation - set up interrupt moderation
6702 * @q_vector: the vector containing rings to be configured
6704 * Set up interrupt moderation registers, with the intent to do the right thing
6705 * when called from reset or from probe, and whether or not dynamic moderation
6706 * is enabled or not. Take special care to write all the registers in both
6707 * dynamic moderation mode or not in order to make sure hardware is in a known
6710 static void ice_init_moderation(struct ice_q_vector
*q_vector
)
6712 struct ice_ring_container
*rc
;
6713 bool tx_dynamic
, rx_dynamic
;
6716 INIT_WORK(&rc
->dim
.work
, ice_tx_dim_work
);
6717 rc
->dim
.mode
= DIM_CQ_PERIOD_MODE_START_FROM_EQE
;
6718 rc
->dim
.profile_ix
= ICE_DIM_DEFAULT_PROFILE_IX
;
6720 tx_dynamic
= ITR_IS_DYNAMIC(rc
);
6722 /* set the initial TX ITR to match the above */
6723 ice_write_itr(rc
, tx_dynamic
?
6724 tx_profile
[rc
->dim
.profile_ix
].itr
: rc
->itr_setting
);
6727 INIT_WORK(&rc
->dim
.work
, ice_rx_dim_work
);
6728 rc
->dim
.mode
= DIM_CQ_PERIOD_MODE_START_FROM_EQE
;
6729 rc
->dim
.profile_ix
= ICE_DIM_DEFAULT_PROFILE_IX
;
6731 rx_dynamic
= ITR_IS_DYNAMIC(rc
);
6733 /* set the initial RX ITR to match the above */
6734 ice_write_itr(rc
, rx_dynamic
? rx_profile
[rc
->dim
.profile_ix
].itr
:
6737 ice_set_q_vector_intrl(q_vector
);
6741 * ice_napi_enable_all - Enable NAPI for all q_vectors in the VSI
6742 * @vsi: the VSI being configured
6744 static void ice_napi_enable_all(struct ice_vsi
*vsi
)
6751 ice_for_each_q_vector(vsi
, q_idx
) {
6752 struct ice_q_vector
*q_vector
= vsi
->q_vectors
[q_idx
];
6754 ice_init_moderation(q_vector
);
6756 if (q_vector
->rx
.rx_ring
|| q_vector
->tx
.tx_ring
)
6757 napi_enable(&q_vector
->napi
);
6762 * ice_up_complete - Finish the last steps of bringing up a connection
6763 * @vsi: The VSI being configured
6765 * Return 0 on success and negative value on error
6767 static int ice_up_complete(struct ice_vsi
*vsi
)
6769 struct ice_pf
*pf
= vsi
->back
;
6772 ice_vsi_cfg_msix(vsi
);
6774 /* Enable only Rx rings, Tx rings were enabled by the FW when the
6775 * Tx queue group list was configured and the context bits were
6776 * programmed using ice_vsi_cfg_txqs
6778 err
= ice_vsi_start_all_rx_rings(vsi
);
6782 clear_bit(ICE_VSI_DOWN
, vsi
->state
);
6783 ice_napi_enable_all(vsi
);
6784 ice_vsi_ena_irq(vsi
);
6786 if (vsi
->port_info
&&
6787 (vsi
->port_info
->phy
.link_info
.link_info
& ICE_AQ_LINK_UP
) &&
6788 ((vsi
->netdev
&& (vsi
->type
== ICE_VSI_PF
||
6789 vsi
->type
== ICE_VSI_SF
)))) {
6790 ice_print_link_msg(vsi
, true);
6791 netif_tx_start_all_queues(vsi
->netdev
);
6792 netif_carrier_on(vsi
->netdev
);
6793 ice_ptp_link_change(pf
, pf
->hw
.pf_id
, true);
6796 /* Perform an initial read of the statistics registers now to
6797 * set the baseline so counters are ready when interface is up
6799 ice_update_eth_stats(vsi
);
6801 if (vsi
->type
== ICE_VSI_PF
)
6802 ice_service_task_schedule(pf
);
6808 * ice_up - Bring the connection back up after being down
6809 * @vsi: VSI being configured
6811 int ice_up(struct ice_vsi
*vsi
)
6815 err
= ice_vsi_cfg_lan(vsi
);
6817 err
= ice_up_complete(vsi
);
6823 * ice_fetch_u64_stats_per_ring - get packets and bytes stats per ring
6824 * @syncp: pointer to u64_stats_sync
6825 * @stats: stats that pkts and bytes count will be taken from
6826 * @pkts: packets stats counter
6827 * @bytes: bytes stats counter
6829 * This function fetches stats from the ring considering the atomic operations
6830 * that needs to be performed to read u64 values in 32 bit machine.
6833 ice_fetch_u64_stats_per_ring(struct u64_stats_sync
*syncp
,
6834 struct ice_q_stats stats
, u64
*pkts
, u64
*bytes
)
6839 start
= u64_stats_fetch_begin(syncp
);
6841 *bytes
= stats
.bytes
;
6842 } while (u64_stats_fetch_retry(syncp
, start
));
6846 * ice_update_vsi_tx_ring_stats - Update VSI Tx ring stats counters
6847 * @vsi: the VSI to be updated
6848 * @vsi_stats: the stats struct to be updated
6849 * @rings: rings to work on
6850 * @count: number of rings
6853 ice_update_vsi_tx_ring_stats(struct ice_vsi
*vsi
,
6854 struct rtnl_link_stats64
*vsi_stats
,
6855 struct ice_tx_ring
**rings
, u16 count
)
6859 for (i
= 0; i
< count
; i
++) {
6860 struct ice_tx_ring
*ring
;
6861 u64 pkts
= 0, bytes
= 0;
6863 ring
= READ_ONCE(rings
[i
]);
6864 if (!ring
|| !ring
->ring_stats
)
6866 ice_fetch_u64_stats_per_ring(&ring
->ring_stats
->syncp
,
6867 ring
->ring_stats
->stats
, &pkts
,
6869 vsi_stats
->tx_packets
+= pkts
;
6870 vsi_stats
->tx_bytes
+= bytes
;
6871 vsi
->tx_restart
+= ring
->ring_stats
->tx_stats
.restart_q
;
6872 vsi
->tx_busy
+= ring
->ring_stats
->tx_stats
.tx_busy
;
6873 vsi
->tx_linearize
+= ring
->ring_stats
->tx_stats
.tx_linearize
;
6878 * ice_update_vsi_ring_stats - Update VSI stats counters
6879 * @vsi: the VSI to be updated
6881 static void ice_update_vsi_ring_stats(struct ice_vsi
*vsi
)
6883 struct rtnl_link_stats64
*net_stats
, *stats_prev
;
6884 struct rtnl_link_stats64
*vsi_stats
;
6885 struct ice_pf
*pf
= vsi
->back
;
6889 vsi_stats
= kzalloc(sizeof(*vsi_stats
), GFP_ATOMIC
);
6893 /* reset non-netdev (extended) stats */
6894 vsi
->tx_restart
= 0;
6896 vsi
->tx_linearize
= 0;
6897 vsi
->rx_buf_failed
= 0;
6898 vsi
->rx_page_failed
= 0;
6902 /* update Tx rings counters */
6903 ice_update_vsi_tx_ring_stats(vsi
, vsi_stats
, vsi
->tx_rings
,
6906 /* update Rx rings counters */
6907 ice_for_each_rxq(vsi
, i
) {
6908 struct ice_rx_ring
*ring
= READ_ONCE(vsi
->rx_rings
[i
]);
6909 struct ice_ring_stats
*ring_stats
;
6911 ring_stats
= ring
->ring_stats
;
6912 ice_fetch_u64_stats_per_ring(&ring_stats
->syncp
,
6913 ring_stats
->stats
, &pkts
,
6915 vsi_stats
->rx_packets
+= pkts
;
6916 vsi_stats
->rx_bytes
+= bytes
;
6917 vsi
->rx_buf_failed
+= ring_stats
->rx_stats
.alloc_buf_failed
;
6918 vsi
->rx_page_failed
+= ring_stats
->rx_stats
.alloc_page_failed
;
6921 /* update XDP Tx rings counters */
6922 if (ice_is_xdp_ena_vsi(vsi
))
6923 ice_update_vsi_tx_ring_stats(vsi
, vsi_stats
, vsi
->xdp_rings
,
6928 net_stats
= &vsi
->net_stats
;
6929 stats_prev
= &vsi
->net_stats_prev
;
6931 /* Update netdev counters, but keep in mind that values could start at
6932 * random value after PF reset. And as we increase the reported stat by
6933 * diff of Prev-Cur, we need to be sure that Prev is valid. If it's not,
6934 * let's skip this round.
6936 if (likely(pf
->stat_prev_loaded
)) {
6937 net_stats
->tx_packets
+= vsi_stats
->tx_packets
- stats_prev
->tx_packets
;
6938 net_stats
->tx_bytes
+= vsi_stats
->tx_bytes
- stats_prev
->tx_bytes
;
6939 net_stats
->rx_packets
+= vsi_stats
->rx_packets
- stats_prev
->rx_packets
;
6940 net_stats
->rx_bytes
+= vsi_stats
->rx_bytes
- stats_prev
->rx_bytes
;
6943 stats_prev
->tx_packets
= vsi_stats
->tx_packets
;
6944 stats_prev
->tx_bytes
= vsi_stats
->tx_bytes
;
6945 stats_prev
->rx_packets
= vsi_stats
->rx_packets
;
6946 stats_prev
->rx_bytes
= vsi_stats
->rx_bytes
;
6952 * ice_update_vsi_stats - Update VSI stats counters
6953 * @vsi: the VSI to be updated
6955 void ice_update_vsi_stats(struct ice_vsi
*vsi
)
6957 struct rtnl_link_stats64
*cur_ns
= &vsi
->net_stats
;
6958 struct ice_eth_stats
*cur_es
= &vsi
->eth_stats
;
6959 struct ice_pf
*pf
= vsi
->back
;
6961 if (test_bit(ICE_VSI_DOWN
, vsi
->state
) ||
6962 test_bit(ICE_CFG_BUSY
, pf
->state
))
6965 /* get stats as recorded by Tx/Rx rings */
6966 ice_update_vsi_ring_stats(vsi
);
6968 /* get VSI stats as recorded by the hardware */
6969 ice_update_eth_stats(vsi
);
6971 cur_ns
->tx_errors
= cur_es
->tx_errors
;
6972 cur_ns
->rx_dropped
= cur_es
->rx_discards
;
6973 cur_ns
->tx_dropped
= cur_es
->tx_discards
;
6974 cur_ns
->multicast
= cur_es
->rx_multicast
;
6976 /* update some more netdev stats if this is main VSI */
6977 if (vsi
->type
== ICE_VSI_PF
) {
6978 cur_ns
->rx_crc_errors
= pf
->stats
.crc_errors
;
6979 cur_ns
->rx_errors
= pf
->stats
.crc_errors
+
6980 pf
->stats
.illegal_bytes
+
6981 pf
->stats
.rx_undersize
+
6982 pf
->hw_csum_rx_error
+
6983 pf
->stats
.rx_jabber
+
6984 pf
->stats
.rx_fragments
+
6985 pf
->stats
.rx_oversize
;
6986 /* record drops from the port level */
6987 cur_ns
->rx_missed_errors
= pf
->stats
.eth
.rx_discards
;
6992 * ice_update_pf_stats - Update PF port stats counters
6993 * @pf: PF whose stats needs to be updated
6995 void ice_update_pf_stats(struct ice_pf
*pf
)
6997 struct ice_hw_port_stats
*prev_ps
, *cur_ps
;
6998 struct ice_hw
*hw
= &pf
->hw
;
7002 port
= hw
->port_info
->lport
;
7003 prev_ps
= &pf
->stats_prev
;
7004 cur_ps
= &pf
->stats
;
7006 if (ice_is_reset_in_progress(pf
->state
))
7007 pf
->stat_prev_loaded
= false;
7009 ice_stat_update40(hw
, GLPRT_GORCL(port
), pf
->stat_prev_loaded
,
7010 &prev_ps
->eth
.rx_bytes
,
7011 &cur_ps
->eth
.rx_bytes
);
7013 ice_stat_update40(hw
, GLPRT_UPRCL(port
), pf
->stat_prev_loaded
,
7014 &prev_ps
->eth
.rx_unicast
,
7015 &cur_ps
->eth
.rx_unicast
);
7017 ice_stat_update40(hw
, GLPRT_MPRCL(port
), pf
->stat_prev_loaded
,
7018 &prev_ps
->eth
.rx_multicast
,
7019 &cur_ps
->eth
.rx_multicast
);
7021 ice_stat_update40(hw
, GLPRT_BPRCL(port
), pf
->stat_prev_loaded
,
7022 &prev_ps
->eth
.rx_broadcast
,
7023 &cur_ps
->eth
.rx_broadcast
);
7025 ice_stat_update32(hw
, PRTRPB_RDPC
, pf
->stat_prev_loaded
,
7026 &prev_ps
->eth
.rx_discards
,
7027 &cur_ps
->eth
.rx_discards
);
7029 ice_stat_update40(hw
, GLPRT_GOTCL(port
), pf
->stat_prev_loaded
,
7030 &prev_ps
->eth
.tx_bytes
,
7031 &cur_ps
->eth
.tx_bytes
);
7033 ice_stat_update40(hw
, GLPRT_UPTCL(port
), pf
->stat_prev_loaded
,
7034 &prev_ps
->eth
.tx_unicast
,
7035 &cur_ps
->eth
.tx_unicast
);
7037 ice_stat_update40(hw
, GLPRT_MPTCL(port
), pf
->stat_prev_loaded
,
7038 &prev_ps
->eth
.tx_multicast
,
7039 &cur_ps
->eth
.tx_multicast
);
7041 ice_stat_update40(hw
, GLPRT_BPTCL(port
), pf
->stat_prev_loaded
,
7042 &prev_ps
->eth
.tx_broadcast
,
7043 &cur_ps
->eth
.tx_broadcast
);
7045 ice_stat_update32(hw
, GLPRT_TDOLD(port
), pf
->stat_prev_loaded
,
7046 &prev_ps
->tx_dropped_link_down
,
7047 &cur_ps
->tx_dropped_link_down
);
7049 ice_stat_update40(hw
, GLPRT_PRC64L(port
), pf
->stat_prev_loaded
,
7050 &prev_ps
->rx_size_64
, &cur_ps
->rx_size_64
);
7052 ice_stat_update40(hw
, GLPRT_PRC127L(port
), pf
->stat_prev_loaded
,
7053 &prev_ps
->rx_size_127
, &cur_ps
->rx_size_127
);
7055 ice_stat_update40(hw
, GLPRT_PRC255L(port
), pf
->stat_prev_loaded
,
7056 &prev_ps
->rx_size_255
, &cur_ps
->rx_size_255
);
7058 ice_stat_update40(hw
, GLPRT_PRC511L(port
), pf
->stat_prev_loaded
,
7059 &prev_ps
->rx_size_511
, &cur_ps
->rx_size_511
);
7061 ice_stat_update40(hw
, GLPRT_PRC1023L(port
), pf
->stat_prev_loaded
,
7062 &prev_ps
->rx_size_1023
, &cur_ps
->rx_size_1023
);
7064 ice_stat_update40(hw
, GLPRT_PRC1522L(port
), pf
->stat_prev_loaded
,
7065 &prev_ps
->rx_size_1522
, &cur_ps
->rx_size_1522
);
7067 ice_stat_update40(hw
, GLPRT_PRC9522L(port
), pf
->stat_prev_loaded
,
7068 &prev_ps
->rx_size_big
, &cur_ps
->rx_size_big
);
7070 ice_stat_update40(hw
, GLPRT_PTC64L(port
), pf
->stat_prev_loaded
,
7071 &prev_ps
->tx_size_64
, &cur_ps
->tx_size_64
);
7073 ice_stat_update40(hw
, GLPRT_PTC127L(port
), pf
->stat_prev_loaded
,
7074 &prev_ps
->tx_size_127
, &cur_ps
->tx_size_127
);
7076 ice_stat_update40(hw
, GLPRT_PTC255L(port
), pf
->stat_prev_loaded
,
7077 &prev_ps
->tx_size_255
, &cur_ps
->tx_size_255
);
7079 ice_stat_update40(hw
, GLPRT_PTC511L(port
), pf
->stat_prev_loaded
,
7080 &prev_ps
->tx_size_511
, &cur_ps
->tx_size_511
);
7082 ice_stat_update40(hw
, GLPRT_PTC1023L(port
), pf
->stat_prev_loaded
,
7083 &prev_ps
->tx_size_1023
, &cur_ps
->tx_size_1023
);
7085 ice_stat_update40(hw
, GLPRT_PTC1522L(port
), pf
->stat_prev_loaded
,
7086 &prev_ps
->tx_size_1522
, &cur_ps
->tx_size_1522
);
7088 ice_stat_update40(hw
, GLPRT_PTC9522L(port
), pf
->stat_prev_loaded
,
7089 &prev_ps
->tx_size_big
, &cur_ps
->tx_size_big
);
7091 fd_ctr_base
= hw
->fd_ctr_base
;
7093 ice_stat_update40(hw
,
7094 GLSTAT_FD_CNT0L(ICE_FD_SB_STAT_IDX(fd_ctr_base
)),
7095 pf
->stat_prev_loaded
, &prev_ps
->fd_sb_match
,
7096 &cur_ps
->fd_sb_match
);
7097 ice_stat_update32(hw
, GLPRT_LXONRXC(port
), pf
->stat_prev_loaded
,
7098 &prev_ps
->link_xon_rx
, &cur_ps
->link_xon_rx
);
7100 ice_stat_update32(hw
, GLPRT_LXOFFRXC(port
), pf
->stat_prev_loaded
,
7101 &prev_ps
->link_xoff_rx
, &cur_ps
->link_xoff_rx
);
7103 ice_stat_update32(hw
, GLPRT_LXONTXC(port
), pf
->stat_prev_loaded
,
7104 &prev_ps
->link_xon_tx
, &cur_ps
->link_xon_tx
);
7106 ice_stat_update32(hw
, GLPRT_LXOFFTXC(port
), pf
->stat_prev_loaded
,
7107 &prev_ps
->link_xoff_tx
, &cur_ps
->link_xoff_tx
);
7109 ice_update_dcb_stats(pf
);
7111 ice_stat_update32(hw
, GLPRT_CRCERRS(port
), pf
->stat_prev_loaded
,
7112 &prev_ps
->crc_errors
, &cur_ps
->crc_errors
);
7114 ice_stat_update32(hw
, GLPRT_ILLERRC(port
), pf
->stat_prev_loaded
,
7115 &prev_ps
->illegal_bytes
, &cur_ps
->illegal_bytes
);
7117 ice_stat_update32(hw
, GLPRT_MLFC(port
), pf
->stat_prev_loaded
,
7118 &prev_ps
->mac_local_faults
,
7119 &cur_ps
->mac_local_faults
);
7121 ice_stat_update32(hw
, GLPRT_MRFC(port
), pf
->stat_prev_loaded
,
7122 &prev_ps
->mac_remote_faults
,
7123 &cur_ps
->mac_remote_faults
);
7125 ice_stat_update32(hw
, GLPRT_RUC(port
), pf
->stat_prev_loaded
,
7126 &prev_ps
->rx_undersize
, &cur_ps
->rx_undersize
);
7128 ice_stat_update32(hw
, GLPRT_RFC(port
), pf
->stat_prev_loaded
,
7129 &prev_ps
->rx_fragments
, &cur_ps
->rx_fragments
);
7131 ice_stat_update32(hw
, GLPRT_ROC(port
), pf
->stat_prev_loaded
,
7132 &prev_ps
->rx_oversize
, &cur_ps
->rx_oversize
);
7134 ice_stat_update32(hw
, GLPRT_RJC(port
), pf
->stat_prev_loaded
,
7135 &prev_ps
->rx_jabber
, &cur_ps
->rx_jabber
);
7137 cur_ps
->fd_sb_status
= test_bit(ICE_FLAG_FD_ENA
, pf
->flags
) ? 1 : 0;
7139 pf
->stat_prev_loaded
= true;
7143 * ice_get_stats64 - get statistics for network device structure
7144 * @netdev: network interface device structure
7145 * @stats: main device statistics structure
7147 void ice_get_stats64(struct net_device
*netdev
, struct rtnl_link_stats64
*stats
)
7149 struct ice_netdev_priv
*np
= netdev_priv(netdev
);
7150 struct rtnl_link_stats64
*vsi_stats
;
7151 struct ice_vsi
*vsi
= np
->vsi
;
7153 vsi_stats
= &vsi
->net_stats
;
7155 if (!vsi
->num_txq
|| !vsi
->num_rxq
)
7158 /* netdev packet/byte stats come from ring counter. These are obtained
7159 * by summing up ring counters (done by ice_update_vsi_ring_stats).
7160 * But, only call the update routine and read the registers if VSI is
7163 if (!test_bit(ICE_VSI_DOWN
, vsi
->state
))
7164 ice_update_vsi_ring_stats(vsi
);
7165 stats
->tx_packets
= vsi_stats
->tx_packets
;
7166 stats
->tx_bytes
= vsi_stats
->tx_bytes
;
7167 stats
->rx_packets
= vsi_stats
->rx_packets
;
7168 stats
->rx_bytes
= vsi_stats
->rx_bytes
;
7170 /* The rest of the stats can be read from the hardware but instead we
7171 * just return values that the watchdog task has already obtained from
7174 stats
->multicast
= vsi_stats
->multicast
;
7175 stats
->tx_errors
= vsi_stats
->tx_errors
;
7176 stats
->tx_dropped
= vsi_stats
->tx_dropped
;
7177 stats
->rx_errors
= vsi_stats
->rx_errors
;
7178 stats
->rx_dropped
= vsi_stats
->rx_dropped
;
7179 stats
->rx_crc_errors
= vsi_stats
->rx_crc_errors
;
7180 stats
->rx_length_errors
= vsi_stats
->rx_length_errors
;
7184 * ice_napi_disable_all - Disable NAPI for all q_vectors in the VSI
7185 * @vsi: VSI having NAPI disabled
7187 static void ice_napi_disable_all(struct ice_vsi
*vsi
)
7194 ice_for_each_q_vector(vsi
, q_idx
) {
7195 struct ice_q_vector
*q_vector
= vsi
->q_vectors
[q_idx
];
7197 if (q_vector
->rx
.rx_ring
|| q_vector
->tx
.tx_ring
)
7198 napi_disable(&q_vector
->napi
);
7200 cancel_work_sync(&q_vector
->tx
.dim
.work
);
7201 cancel_work_sync(&q_vector
->rx
.dim
.work
);
7206 * ice_vsi_dis_irq - Mask off queue interrupt generation on the VSI
7207 * @vsi: the VSI being un-configured
7209 static void ice_vsi_dis_irq(struct ice_vsi
*vsi
)
7211 struct ice_pf
*pf
= vsi
->back
;
7212 struct ice_hw
*hw
= &pf
->hw
;
7216 /* disable interrupt causation from each Rx queue; Tx queues are
7217 * handled in ice_vsi_stop_tx_ring()
7219 if (vsi
->rx_rings
) {
7220 ice_for_each_rxq(vsi
, i
) {
7221 if (vsi
->rx_rings
[i
]) {
7224 reg
= vsi
->rx_rings
[i
]->reg_idx
;
7225 val
= rd32(hw
, QINT_RQCTL(reg
));
7226 val
&= ~QINT_RQCTL_CAUSE_ENA_M
;
7227 wr32(hw
, QINT_RQCTL(reg
), val
);
7232 /* disable each interrupt */
7233 ice_for_each_q_vector(vsi
, i
) {
7234 if (!vsi
->q_vectors
[i
])
7236 wr32(hw
, GLINT_DYN_CTL(vsi
->q_vectors
[i
]->reg_idx
), 0);
7241 /* don't call synchronize_irq() for VF's from the host */
7242 if (vsi
->type
== ICE_VSI_VF
)
7245 ice_for_each_q_vector(vsi
, i
)
7246 synchronize_irq(vsi
->q_vectors
[i
]->irq
.virq
);
7250 * ice_down - Shutdown the connection
7251 * @vsi: The VSI being stopped
7253 * Caller of this function is expected to set the vsi->state ICE_DOWN bit
7255 int ice_down(struct ice_vsi
*vsi
)
7257 int i
, tx_err
, rx_err
, vlan_err
= 0;
7259 WARN_ON(!test_bit(ICE_VSI_DOWN
, vsi
->state
));
7262 vlan_err
= ice_vsi_del_vlan_zero(vsi
);
7263 ice_ptp_link_change(vsi
->back
, vsi
->back
->hw
.pf_id
, false);
7264 netif_carrier_off(vsi
->netdev
);
7265 netif_tx_disable(vsi
->netdev
);
7268 ice_vsi_dis_irq(vsi
);
7270 tx_err
= ice_vsi_stop_lan_tx_rings(vsi
, ICE_NO_RESET
, 0);
7272 netdev_err(vsi
->netdev
, "Failed stop Tx rings, VSI %d error %d\n",
7273 vsi
->vsi_num
, tx_err
);
7274 if (!tx_err
&& vsi
->xdp_rings
) {
7275 tx_err
= ice_vsi_stop_xdp_tx_rings(vsi
);
7277 netdev_err(vsi
->netdev
, "Failed stop XDP rings, VSI %d error %d\n",
7278 vsi
->vsi_num
, tx_err
);
7281 rx_err
= ice_vsi_stop_all_rx_rings(vsi
);
7283 netdev_err(vsi
->netdev
, "Failed stop Rx rings, VSI %d error %d\n",
7284 vsi
->vsi_num
, rx_err
);
7286 ice_napi_disable_all(vsi
);
7288 ice_for_each_txq(vsi
, i
)
7289 ice_clean_tx_ring(vsi
->tx_rings
[i
]);
7292 ice_for_each_xdp_txq(vsi
, i
)
7293 ice_clean_tx_ring(vsi
->xdp_rings
[i
]);
7295 ice_for_each_rxq(vsi
, i
)
7296 ice_clean_rx_ring(vsi
->rx_rings
[i
]);
7298 if (tx_err
|| rx_err
|| vlan_err
) {
7299 netdev_err(vsi
->netdev
, "Failed to close VSI 0x%04X on switch 0x%04X\n",
7300 vsi
->vsi_num
, vsi
->vsw
->sw_id
);
7308 * ice_down_up - shutdown the VSI connection and bring it up
7309 * @vsi: the VSI to be reconnected
7311 int ice_down_up(struct ice_vsi
*vsi
)
7315 /* if DOWN already set, nothing to do */
7316 if (test_and_set_bit(ICE_VSI_DOWN
, vsi
->state
))
7319 ret
= ice_down(vsi
);
7325 netdev_err(vsi
->netdev
, "reallocating resources failed during netdev features change, may need to reload driver\n");
7333 * ice_vsi_setup_tx_rings - Allocate VSI Tx queue resources
7334 * @vsi: VSI having resources allocated
7336 * Return 0 on success, negative on failure
7338 int ice_vsi_setup_tx_rings(struct ice_vsi
*vsi
)
7342 if (!vsi
->num_txq
) {
7343 dev_err(ice_pf_to_dev(vsi
->back
), "VSI %d has 0 Tx queues\n",
7348 ice_for_each_txq(vsi
, i
) {
7349 struct ice_tx_ring
*ring
= vsi
->tx_rings
[i
];
7355 ring
->netdev
= vsi
->netdev
;
7356 err
= ice_setup_tx_ring(ring
);
7365 * ice_vsi_setup_rx_rings - Allocate VSI Rx queue resources
7366 * @vsi: VSI having resources allocated
7368 * Return 0 on success, negative on failure
7370 int ice_vsi_setup_rx_rings(struct ice_vsi
*vsi
)
7374 if (!vsi
->num_rxq
) {
7375 dev_err(ice_pf_to_dev(vsi
->back
), "VSI %d has 0 Rx queues\n",
7380 ice_for_each_rxq(vsi
, i
) {
7381 struct ice_rx_ring
*ring
= vsi
->rx_rings
[i
];
7387 ring
->netdev
= vsi
->netdev
;
7388 err
= ice_setup_rx_ring(ring
);
7397 * ice_vsi_open_ctrl - open control VSI for use
7398 * @vsi: the VSI to open
7400 * Initialization of the Control VSI
7402 * Returns 0 on success, negative value on error
7404 int ice_vsi_open_ctrl(struct ice_vsi
*vsi
)
7406 char int_name
[ICE_INT_NAME_STR_LEN
];
7407 struct ice_pf
*pf
= vsi
->back
;
7411 dev
= ice_pf_to_dev(pf
);
7412 /* allocate descriptors */
7413 err
= ice_vsi_setup_tx_rings(vsi
);
7417 err
= ice_vsi_setup_rx_rings(vsi
);
7421 err
= ice_vsi_cfg_lan(vsi
);
7425 snprintf(int_name
, sizeof(int_name
) - 1, "%s-%s:ctrl",
7426 dev_driver_string(dev
), dev_name(dev
));
7427 err
= ice_vsi_req_irq_msix(vsi
, int_name
);
7431 ice_vsi_cfg_msix(vsi
);
7433 err
= ice_vsi_start_all_rx_rings(vsi
);
7435 goto err_up_complete
;
7437 clear_bit(ICE_VSI_DOWN
, vsi
->state
);
7438 ice_vsi_ena_irq(vsi
);
7445 ice_vsi_free_rx_rings(vsi
);
7447 ice_vsi_free_tx_rings(vsi
);
7453 * ice_vsi_open - Called when a network interface is made active
7454 * @vsi: the VSI to open
7456 * Initialization of the VSI
7458 * Returns 0 on success, negative value on error
7460 int ice_vsi_open(struct ice_vsi
*vsi
)
7462 char int_name
[ICE_INT_NAME_STR_LEN
];
7463 struct ice_pf
*pf
= vsi
->back
;
7466 /* allocate descriptors */
7467 err
= ice_vsi_setup_tx_rings(vsi
);
7471 err
= ice_vsi_setup_rx_rings(vsi
);
7475 err
= ice_vsi_cfg_lan(vsi
);
7479 snprintf(int_name
, sizeof(int_name
) - 1, "%s-%s",
7480 dev_driver_string(ice_pf_to_dev(pf
)), vsi
->netdev
->name
);
7481 err
= ice_vsi_req_irq_msix(vsi
, int_name
);
7485 ice_vsi_cfg_netdev_tc(vsi
, vsi
->tc_cfg
.ena_tc
);
7487 if (vsi
->type
== ICE_VSI_PF
|| vsi
->type
== ICE_VSI_SF
) {
7488 /* Notify the stack of the actual queue counts. */
7489 err
= netif_set_real_num_tx_queues(vsi
->netdev
, vsi
->num_txq
);
7493 err
= netif_set_real_num_rx_queues(vsi
->netdev
, vsi
->num_rxq
);
7497 ice_vsi_set_napi_queues(vsi
);
7500 err
= ice_up_complete(vsi
);
7502 goto err_up_complete
;
7509 ice_vsi_free_irq(vsi
);
7511 ice_vsi_free_rx_rings(vsi
);
7513 ice_vsi_free_tx_rings(vsi
);
7519 * ice_vsi_release_all - Delete all VSIs
7520 * @pf: PF from which all VSIs are being removed
7522 static void ice_vsi_release_all(struct ice_pf
*pf
)
7529 ice_for_each_vsi(pf
, i
) {
7533 if (pf
->vsi
[i
]->type
== ICE_VSI_CHNL
)
7536 err
= ice_vsi_release(pf
->vsi
[i
]);
7538 dev_dbg(ice_pf_to_dev(pf
), "Failed to release pf->vsi[%d], err %d, vsi_num = %d\n",
7539 i
, err
, pf
->vsi
[i
]->vsi_num
);
7544 * ice_vsi_rebuild_by_type - Rebuild VSI of a given type
7545 * @pf: pointer to the PF instance
7546 * @type: VSI type to rebuild
7548 * Iterates through the pf->vsi array and rebuilds VSIs of the requested type
7550 static int ice_vsi_rebuild_by_type(struct ice_pf
*pf
, enum ice_vsi_type type
)
7552 struct device
*dev
= ice_pf_to_dev(pf
);
7555 ice_for_each_vsi(pf
, i
) {
7556 struct ice_vsi
*vsi
= pf
->vsi
[i
];
7558 if (!vsi
|| vsi
->type
!= type
)
7561 /* rebuild the VSI */
7562 err
= ice_vsi_rebuild(vsi
, ICE_VSI_FLAG_INIT
);
7564 dev_err(dev
, "rebuild VSI failed, err %d, VSI index %d, type %s\n",
7565 err
, vsi
->idx
, ice_vsi_type_str(type
));
7569 /* replay filters for the VSI */
7570 err
= ice_replay_vsi(&pf
->hw
, vsi
->idx
);
7572 dev_err(dev
, "replay VSI failed, error %d, VSI index %d, type %s\n",
7573 err
, vsi
->idx
, ice_vsi_type_str(type
));
7577 /* Re-map HW VSI number, using VSI handle that has been
7578 * previously validated in ice_replay_vsi() call above
7580 vsi
->vsi_num
= ice_get_hw_vsi_num(&pf
->hw
, vsi
->idx
);
7582 /* enable the VSI */
7583 err
= ice_ena_vsi(vsi
, false);
7585 dev_err(dev
, "enable VSI failed, err %d, VSI index %d, type %s\n",
7586 err
, vsi
->idx
, ice_vsi_type_str(type
));
7590 dev_info(dev
, "VSI rebuilt. VSI index %d, type %s\n", vsi
->idx
,
7591 ice_vsi_type_str(type
));
7598 * ice_update_pf_netdev_link - Update PF netdev link status
7599 * @pf: pointer to the PF instance
7601 static void ice_update_pf_netdev_link(struct ice_pf
*pf
)
7606 ice_for_each_vsi(pf
, i
) {
7607 struct ice_vsi
*vsi
= pf
->vsi
[i
];
7609 if (!vsi
|| vsi
->type
!= ICE_VSI_PF
)
7612 ice_get_link_status(pf
->vsi
[i
]->port_info
, &link_up
);
7614 netif_carrier_on(pf
->vsi
[i
]->netdev
);
7615 netif_tx_wake_all_queues(pf
->vsi
[i
]->netdev
);
7617 netif_carrier_off(pf
->vsi
[i
]->netdev
);
7618 netif_tx_stop_all_queues(pf
->vsi
[i
]->netdev
);
7624 * ice_rebuild - rebuild after reset
7625 * @pf: PF to rebuild
7626 * @reset_type: type of reset
7628 * Do not rebuild VF VSI in this flow because that is already handled via
7629 * ice_reset_all_vfs(). This is because requirements for resetting a VF after a
7630 * PFR/CORER/GLOBER/etc. are different than the normal flow. Also, we don't want
7631 * to reset/rebuild all the VF VSI twice.
7633 static void ice_rebuild(struct ice_pf
*pf
, enum ice_reset_req reset_type
)
7635 struct ice_vsi
*vsi
= ice_get_main_vsi(pf
);
7636 struct device
*dev
= ice_pf_to_dev(pf
);
7637 struct ice_hw
*hw
= &pf
->hw
;
7641 if (test_bit(ICE_DOWN
, pf
->state
))
7642 goto clear_recovery
;
7644 dev_dbg(dev
, "rebuilding PF after reset_type=%d\n", reset_type
);
7646 #define ICE_EMP_RESET_SLEEP_MS 5000
7647 if (reset_type
== ICE_RESET_EMPR
) {
7648 /* If an EMP reset has occurred, any previously pending flash
7649 * update will have completed. We no longer know whether or
7650 * not the NVM update EMP reset is restricted.
7652 pf
->fw_emp_reset_disabled
= false;
7654 msleep(ICE_EMP_RESET_SLEEP_MS
);
7657 err
= ice_init_all_ctrlq(hw
);
7659 dev_err(dev
, "control queues init failed %d\n", err
);
7660 goto err_init_ctrlq
;
7663 /* if DDP was previously loaded successfully */
7664 if (!ice_is_safe_mode(pf
)) {
7665 /* reload the SW DB of filter tables */
7666 if (reset_type
== ICE_RESET_PFR
)
7667 ice_fill_blk_tbls(hw
);
7669 /* Reload DDP Package after CORER/GLOBR reset */
7670 ice_load_pkg(NULL
, pf
);
7673 err
= ice_clear_pf_cfg(hw
);
7675 dev_err(dev
, "clear PF configuration failed %d\n", err
);
7676 goto err_init_ctrlq
;
7679 ice_clear_pxe_mode(hw
);
7681 err
= ice_init_nvm(hw
);
7683 dev_err(dev
, "ice_init_nvm failed %d\n", err
);
7684 goto err_init_ctrlq
;
7687 err
= ice_get_caps(hw
);
7689 dev_err(dev
, "ice_get_caps failed %d\n", err
);
7690 goto err_init_ctrlq
;
7693 err
= ice_aq_set_mac_cfg(hw
, ICE_AQ_SET_MAC_FRAME_SIZE_MAX
, NULL
);
7695 dev_err(dev
, "set_mac_cfg failed %d\n", err
);
7696 goto err_init_ctrlq
;
7699 dvm
= ice_is_dvm_ena(hw
);
7701 err
= ice_aq_set_port_params(pf
->hw
.port_info
, dvm
, NULL
);
7703 goto err_init_ctrlq
;
7705 err
= ice_sched_init_port(hw
->port_info
);
7707 goto err_sched_init_port
;
7709 /* start misc vector */
7710 err
= ice_req_irq_msix_misc(pf
);
7712 dev_err(dev
, "misc vector setup failed: %d\n", err
);
7713 goto err_sched_init_port
;
7716 if (test_bit(ICE_FLAG_FD_ENA
, pf
->flags
)) {
7717 wr32(hw
, PFQF_FD_ENA
, PFQF_FD_ENA_FD_ENA_M
);
7718 if (!rd32(hw
, PFQF_FD_SIZE
)) {
7719 u16 unused
, guar
, b_effort
;
7721 guar
= hw
->func_caps
.fd_fltr_guar
;
7722 b_effort
= hw
->func_caps
.fd_fltr_best_effort
;
7724 /* force guaranteed filter pool for PF */
7725 ice_alloc_fd_guar_item(hw
, &unused
, guar
);
7726 /* force shared filter pool for PF */
7727 ice_alloc_fd_shrd_item(hw
, &unused
, b_effort
);
7731 if (test_bit(ICE_FLAG_DCB_ENA
, pf
->flags
))
7732 ice_dcb_rebuild(pf
);
7734 /* If the PF previously had enabled PTP, PTP init needs to happen before
7735 * the VSI rebuild. If not, this causes the PTP link status events to
7738 if (test_bit(ICE_FLAG_PTP_SUPPORTED
, pf
->flags
))
7739 ice_ptp_rebuild(pf
, reset_type
);
7741 if (ice_is_feature_supported(pf
, ICE_F_GNSS
))
7744 /* rebuild PF VSI */
7745 err
= ice_vsi_rebuild_by_type(pf
, ICE_VSI_PF
);
7747 dev_err(dev
, "PF VSI rebuild failed: %d\n", err
);
7748 goto err_vsi_rebuild
;
7751 if (reset_type
== ICE_RESET_PFR
) {
7752 err
= ice_rebuild_channels(pf
);
7754 dev_err(dev
, "failed to rebuild and replay ADQ VSIs, err %d\n",
7756 goto err_vsi_rebuild
;
7760 /* If Flow Director is active */
7761 if (test_bit(ICE_FLAG_FD_ENA
, pf
->flags
)) {
7762 err
= ice_vsi_rebuild_by_type(pf
, ICE_VSI_CTRL
);
7764 dev_err(dev
, "control VSI rebuild failed: %d\n", err
);
7765 goto err_vsi_rebuild
;
7768 /* replay HW Flow Director recipes */
7770 ice_fdir_replay_flows(hw
);
7772 /* replay Flow Director filters */
7773 ice_fdir_replay_fltrs(pf
);
7775 ice_rebuild_arfs(pf
);
7778 if (vsi
&& vsi
->netdev
)
7779 netif_device_attach(vsi
->netdev
);
7781 ice_update_pf_netdev_link(pf
);
7783 /* tell the firmware we are up */
7784 err
= ice_send_version(pf
);
7786 dev_err(dev
, "Rebuild failed due to error sending driver version: %d\n",
7788 goto err_vsi_rebuild
;
7791 ice_replay_post(hw
);
7793 /* if we get here, reset flow is successful */
7794 clear_bit(ICE_RESET_FAILED
, pf
->state
);
7796 ice_plug_aux_dev(pf
);
7797 if (ice_is_feature_supported(pf
, ICE_F_SRIOV_LAG
))
7798 ice_lag_rebuild(pf
);
7800 /* Restore timestamp mode settings after VSI rebuild */
7801 ice_ptp_restore_timestamp_mode(pf
);
7805 err_sched_init_port
:
7806 ice_sched_cleanup_all(hw
);
7808 ice_shutdown_all_ctrlq(hw
, false);
7809 set_bit(ICE_RESET_FAILED
, pf
->state
);
7811 /* set this bit in PF state to control service task scheduling */
7812 set_bit(ICE_NEEDS_RESTART
, pf
->state
);
7813 dev_err(dev
, "Rebuild failed, unload and reload driver\n");
7817 * ice_change_mtu - NDO callback to change the MTU
7818 * @netdev: network interface device structure
7819 * @new_mtu: new value for maximum frame size
7821 * Returns 0 on success, negative on failure
7823 int ice_change_mtu(struct net_device
*netdev
, int new_mtu
)
7825 struct ice_netdev_priv
*np
= netdev_priv(netdev
);
7826 struct ice_vsi
*vsi
= np
->vsi
;
7827 struct ice_pf
*pf
= vsi
->back
;
7828 struct bpf_prog
*prog
;
7832 if (new_mtu
== (int)netdev
->mtu
) {
7833 netdev_warn(netdev
, "MTU is already %u\n", netdev
->mtu
);
7837 prog
= vsi
->xdp_prog
;
7838 if (prog
&& !prog
->aux
->xdp_has_frags
) {
7839 int frame_size
= ice_max_xdp_frame_size(vsi
);
7841 if (new_mtu
+ ICE_ETH_PKT_HDR_PAD
> frame_size
) {
7842 netdev_err(netdev
, "max MTU for XDP usage is %d\n",
7843 frame_size
- ICE_ETH_PKT_HDR_PAD
);
7846 } else if (test_bit(ICE_FLAG_LEGACY_RX
, pf
->flags
)) {
7847 if (new_mtu
+ ICE_ETH_PKT_HDR_PAD
> ICE_MAX_FRAME_LEGACY_RX
) {
7848 netdev_err(netdev
, "Too big MTU for legacy-rx; Max is %d\n",
7849 ICE_MAX_FRAME_LEGACY_RX
- ICE_ETH_PKT_HDR_PAD
);
7854 /* if a reset is in progress, wait for some time for it to complete */
7856 if (ice_is_reset_in_progress(pf
->state
)) {
7858 usleep_range(1000, 2000);
7863 } while (count
< 100);
7866 netdev_err(netdev
, "can't change MTU. Device is busy\n");
7870 WRITE_ONCE(netdev
->mtu
, (unsigned int)new_mtu
);
7871 err
= ice_down_up(vsi
);
7875 netdev_dbg(netdev
, "changed MTU to %d\n", new_mtu
);
7876 set_bit(ICE_FLAG_MTU_CHANGED
, pf
->flags
);
7882 * ice_eth_ioctl - Access the hwtstamp interface
7883 * @netdev: network interface device structure
7884 * @ifr: interface request data
7885 * @cmd: ioctl command
7887 static int ice_eth_ioctl(struct net_device
*netdev
, struct ifreq
*ifr
, int cmd
)
7889 struct ice_netdev_priv
*np
= netdev_priv(netdev
);
7890 struct ice_pf
*pf
= np
->vsi
->back
;
7894 return ice_ptp_get_ts_config(pf
, ifr
);
7896 return ice_ptp_set_ts_config(pf
, ifr
);
7903 * ice_aq_str - convert AQ err code to a string
7904 * @aq_err: the AQ error code to convert
7906 const char *ice_aq_str(enum ice_aq_err aq_err
)
7911 case ICE_AQ_RC_EPERM
:
7912 return "ICE_AQ_RC_EPERM";
7913 case ICE_AQ_RC_ENOENT
:
7914 return "ICE_AQ_RC_ENOENT";
7915 case ICE_AQ_RC_ENOMEM
:
7916 return "ICE_AQ_RC_ENOMEM";
7917 case ICE_AQ_RC_EBUSY
:
7918 return "ICE_AQ_RC_EBUSY";
7919 case ICE_AQ_RC_EEXIST
:
7920 return "ICE_AQ_RC_EEXIST";
7921 case ICE_AQ_RC_EINVAL
:
7922 return "ICE_AQ_RC_EINVAL";
7923 case ICE_AQ_RC_ENOSPC
:
7924 return "ICE_AQ_RC_ENOSPC";
7925 case ICE_AQ_RC_ENOSYS
:
7926 return "ICE_AQ_RC_ENOSYS";
7927 case ICE_AQ_RC_EMODE
:
7928 return "ICE_AQ_RC_EMODE";
7929 case ICE_AQ_RC_ENOSEC
:
7930 return "ICE_AQ_RC_ENOSEC";
7931 case ICE_AQ_RC_EBADSIG
:
7932 return "ICE_AQ_RC_EBADSIG";
7933 case ICE_AQ_RC_ESVN
:
7934 return "ICE_AQ_RC_ESVN";
7935 case ICE_AQ_RC_EBADMAN
:
7936 return "ICE_AQ_RC_EBADMAN";
7937 case ICE_AQ_RC_EBADBUF
:
7938 return "ICE_AQ_RC_EBADBUF";
7941 return "ICE_AQ_RC_UNKNOWN";
7945 * ice_set_rss_lut - Set RSS LUT
7946 * @vsi: Pointer to VSI structure
7947 * @lut: Lookup table
7948 * @lut_size: Lookup table size
7950 * Returns 0 on success, negative on failure
7952 int ice_set_rss_lut(struct ice_vsi
*vsi
, u8
*lut
, u16 lut_size
)
7954 struct ice_aq_get_set_rss_lut_params params
= {};
7955 struct ice_hw
*hw
= &vsi
->back
->hw
;
7961 params
.vsi_handle
= vsi
->idx
;
7962 params
.lut_size
= lut_size
;
7963 params
.lut_type
= vsi
->rss_lut_type
;
7966 status
= ice_aq_set_rss_lut(hw
, ¶ms
);
7968 dev_err(ice_pf_to_dev(vsi
->back
), "Cannot set RSS lut, err %d aq_err %s\n",
7969 status
, ice_aq_str(hw
->adminq
.sq_last_status
));
7975 * ice_set_rss_key - Set RSS key
7976 * @vsi: Pointer to the VSI structure
7977 * @seed: RSS hash seed
7979 * Returns 0 on success, negative on failure
7981 int ice_set_rss_key(struct ice_vsi
*vsi
, u8
*seed
)
7983 struct ice_hw
*hw
= &vsi
->back
->hw
;
7989 status
= ice_aq_set_rss_key(hw
, vsi
->idx
, (struct ice_aqc_get_set_rss_keys
*)seed
);
7991 dev_err(ice_pf_to_dev(vsi
->back
), "Cannot set RSS key, err %d aq_err %s\n",
7992 status
, ice_aq_str(hw
->adminq
.sq_last_status
));
7998 * ice_get_rss_lut - Get RSS LUT
7999 * @vsi: Pointer to VSI structure
8000 * @lut: Buffer to store the lookup table entries
8001 * @lut_size: Size of buffer to store the lookup table entries
8003 * Returns 0 on success, negative on failure
8005 int ice_get_rss_lut(struct ice_vsi
*vsi
, u8
*lut
, u16 lut_size
)
8007 struct ice_aq_get_set_rss_lut_params params
= {};
8008 struct ice_hw
*hw
= &vsi
->back
->hw
;
8014 params
.vsi_handle
= vsi
->idx
;
8015 params
.lut_size
= lut_size
;
8016 params
.lut_type
= vsi
->rss_lut_type
;
8019 status
= ice_aq_get_rss_lut(hw
, ¶ms
);
8021 dev_err(ice_pf_to_dev(vsi
->back
), "Cannot get RSS lut, err %d aq_err %s\n",
8022 status
, ice_aq_str(hw
->adminq
.sq_last_status
));
8028 * ice_get_rss_key - Get RSS key
8029 * @vsi: Pointer to VSI structure
8030 * @seed: Buffer to store the key in
8032 * Returns 0 on success, negative on failure
8034 int ice_get_rss_key(struct ice_vsi
*vsi
, u8
*seed
)
8036 struct ice_hw
*hw
= &vsi
->back
->hw
;
8042 status
= ice_aq_get_rss_key(hw
, vsi
->idx
, (struct ice_aqc_get_set_rss_keys
*)seed
);
8044 dev_err(ice_pf_to_dev(vsi
->back
), "Cannot get RSS key, err %d aq_err %s\n",
8045 status
, ice_aq_str(hw
->adminq
.sq_last_status
));
8051 * ice_set_rss_hfunc - Set RSS HASH function
8052 * @vsi: Pointer to VSI structure
8053 * @hfunc: hash function (ICE_AQ_VSI_Q_OPT_RSS_*)
8055 * Returns 0 on success, negative on failure
8057 int ice_set_rss_hfunc(struct ice_vsi
*vsi
, u8 hfunc
)
8059 struct ice_hw
*hw
= &vsi
->back
->hw
;
8060 struct ice_vsi_ctx
*ctx
;
8064 if (hfunc
== vsi
->rss_hfunc
)
8067 if (hfunc
!= ICE_AQ_VSI_Q_OPT_RSS_HASH_TPLZ
&&
8068 hfunc
!= ICE_AQ_VSI_Q_OPT_RSS_HASH_SYM_TPLZ
)
8071 ctx
= kzalloc(sizeof(*ctx
), GFP_KERNEL
);
8075 ctx
->info
.valid_sections
= cpu_to_le16(ICE_AQ_VSI_PROP_Q_OPT_VALID
);
8076 ctx
->info
.q_opt_rss
= vsi
->info
.q_opt_rss
;
8077 ctx
->info
.q_opt_rss
&= ~ICE_AQ_VSI_Q_OPT_RSS_HASH_M
;
8078 ctx
->info
.q_opt_rss
|=
8079 FIELD_PREP(ICE_AQ_VSI_Q_OPT_RSS_HASH_M
, hfunc
);
8080 ctx
->info
.q_opt_tc
= vsi
->info
.q_opt_tc
;
8081 ctx
->info
.q_opt_flags
= vsi
->info
.q_opt_rss
;
8083 err
= ice_update_vsi(hw
, vsi
->idx
, ctx
, NULL
);
8085 dev_err(ice_pf_to_dev(vsi
->back
), "Failed to configure RSS hash for VSI %d, error %d\n",
8088 vsi
->info
.q_opt_rss
= ctx
->info
.q_opt_rss
;
8089 vsi
->rss_hfunc
= hfunc
;
8090 netdev_info(vsi
->netdev
, "Hash function set to: %sToeplitz\n",
8091 hfunc
== ICE_AQ_VSI_Q_OPT_RSS_HASH_SYM_TPLZ
?
8098 /* Fix the symmetry setting for all existing RSS configurations */
8099 symm
= !!(hfunc
== ICE_AQ_VSI_Q_OPT_RSS_HASH_SYM_TPLZ
);
8100 return ice_set_rss_cfg_symm(hw
, vsi
, symm
);
8104 * ice_bridge_getlink - Get the hardware bridge mode
8107 * @seq: RTNL message seq
8108 * @dev: the netdev being configured
8109 * @filter_mask: filter mask passed in
8110 * @nlflags: netlink flags passed in
8112 * Return the bridge mode (VEB/VEPA)
8115 ice_bridge_getlink(struct sk_buff
*skb
, u32 pid
, u32 seq
,
8116 struct net_device
*dev
, u32 filter_mask
, int nlflags
)
8118 struct ice_netdev_priv
*np
= netdev_priv(dev
);
8119 struct ice_vsi
*vsi
= np
->vsi
;
8120 struct ice_pf
*pf
= vsi
->back
;
8123 bmode
= pf
->first_sw
->bridge_mode
;
8125 return ndo_dflt_bridge_getlink(skb
, pid
, seq
, dev
, bmode
, 0, 0, nlflags
,
8130 * ice_vsi_update_bridge_mode - Update VSI for switching bridge mode (VEB/VEPA)
8131 * @vsi: Pointer to VSI structure
8132 * @bmode: Hardware bridge mode (VEB/VEPA)
8134 * Returns 0 on success, negative on failure
8136 static int ice_vsi_update_bridge_mode(struct ice_vsi
*vsi
, u16 bmode
)
8138 struct ice_aqc_vsi_props
*vsi_props
;
8139 struct ice_hw
*hw
= &vsi
->back
->hw
;
8140 struct ice_vsi_ctx
*ctxt
;
8143 vsi_props
= &vsi
->info
;
8145 ctxt
= kzalloc(sizeof(*ctxt
), GFP_KERNEL
);
8149 ctxt
->info
= vsi
->info
;
8151 if (bmode
== BRIDGE_MODE_VEB
)
8152 /* change from VEPA to VEB mode */
8153 ctxt
->info
.sw_flags
|= ICE_AQ_VSI_SW_FLAG_ALLOW_LB
;
8155 /* change from VEB to VEPA mode */
8156 ctxt
->info
.sw_flags
&= ~ICE_AQ_VSI_SW_FLAG_ALLOW_LB
;
8157 ctxt
->info
.valid_sections
= cpu_to_le16(ICE_AQ_VSI_PROP_SW_VALID
);
8159 ret
= ice_update_vsi(hw
, vsi
->idx
, ctxt
, NULL
);
8161 dev_err(ice_pf_to_dev(vsi
->back
), "update VSI for bridge mode failed, bmode = %d err %d aq_err %s\n",
8162 bmode
, ret
, ice_aq_str(hw
->adminq
.sq_last_status
));
8165 /* Update sw flags for book keeping */
8166 vsi_props
->sw_flags
= ctxt
->info
.sw_flags
;
8174 * ice_bridge_setlink - Set the hardware bridge mode
8175 * @dev: the netdev being configured
8176 * @nlh: RTNL message
8177 * @flags: bridge setlink flags
8178 * @extack: netlink extended ack
8180 * Sets the bridge mode (VEB/VEPA) of the switch to which the netdev (VSI) is
8181 * hooked up to. Iterates through the PF VSI list and sets the loopback mode (if
8182 * not already set for all VSIs connected to this switch. And also update the
8183 * unicast switch filter rules for the corresponding switch of the netdev.
8186 ice_bridge_setlink(struct net_device
*dev
, struct nlmsghdr
*nlh
,
8187 u16 __always_unused flags
,
8188 struct netlink_ext_ack __always_unused
*extack
)
8190 struct ice_netdev_priv
*np
= netdev_priv(dev
);
8191 struct ice_pf
*pf
= np
->vsi
->back
;
8192 struct nlattr
*attr
, *br_spec
;
8193 struct ice_hw
*hw
= &pf
->hw
;
8194 struct ice_sw
*pf_sw
;
8195 int rem
, v
, err
= 0;
8197 pf_sw
= pf
->first_sw
;
8198 /* find the attribute in the netlink message */
8199 br_spec
= nlmsg_find_attr(nlh
, sizeof(struct ifinfomsg
), IFLA_AF_SPEC
);
8203 nla_for_each_nested_type(attr
, IFLA_BRIDGE_MODE
, br_spec
, rem
) {
8204 __u16 mode
= nla_get_u16(attr
);
8206 if (mode
!= BRIDGE_MODE_VEPA
&& mode
!= BRIDGE_MODE_VEB
)
8208 /* Continue if bridge mode is not being flipped */
8209 if (mode
== pf_sw
->bridge_mode
)
8211 /* Iterates through the PF VSI list and update the loopback
8214 ice_for_each_vsi(pf
, v
) {
8217 err
= ice_vsi_update_bridge_mode(pf
->vsi
[v
], mode
);
8222 hw
->evb_veb
= (mode
== BRIDGE_MODE_VEB
);
8223 /* Update the unicast switch filter rules for the corresponding
8224 * switch of the netdev
8226 err
= ice_update_sw_rule_bridge_mode(hw
);
8228 netdev_err(dev
, "switch rule update failed, mode = %d err %d aq_err %s\n",
8230 ice_aq_str(hw
->adminq
.sq_last_status
));
8231 /* revert hw->evb_veb */
8232 hw
->evb_veb
= (pf_sw
->bridge_mode
== BRIDGE_MODE_VEB
);
8236 pf_sw
->bridge_mode
= mode
;
8243 * ice_tx_timeout - Respond to a Tx Hang
8244 * @netdev: network interface device structure
8245 * @txqueue: Tx queue
8247 void ice_tx_timeout(struct net_device
*netdev
, unsigned int txqueue
)
8249 struct ice_netdev_priv
*np
= netdev_priv(netdev
);
8250 struct ice_tx_ring
*tx_ring
= NULL
;
8251 struct ice_vsi
*vsi
= np
->vsi
;
8252 struct ice_pf
*pf
= vsi
->back
;
8255 pf
->tx_timeout_count
++;
8257 /* Check if PFC is enabled for the TC to which the queue belongs
8258 * to. If yes then Tx timeout is not caused by a hung queue, no
8259 * need to reset and rebuild
8261 if (ice_is_pfc_causing_hung_q(pf
, txqueue
)) {
8262 dev_info(ice_pf_to_dev(pf
), "Fake Tx hang detected on queue %u, timeout caused by PFC storm\n",
8267 /* now that we have an index, find the tx_ring struct */
8268 ice_for_each_txq(vsi
, i
)
8269 if (vsi
->tx_rings
[i
] && vsi
->tx_rings
[i
]->desc
)
8270 if (txqueue
== vsi
->tx_rings
[i
]->q_index
) {
8271 tx_ring
= vsi
->tx_rings
[i
];
8275 /* Reset recovery level if enough time has elapsed after last timeout.
8276 * Also ensure no new reset action happens before next timeout period.
8278 if (time_after(jiffies
, (pf
->tx_timeout_last_recovery
+ HZ
* 20)))
8279 pf
->tx_timeout_recovery_level
= 1;
8280 else if (time_before(jiffies
, (pf
->tx_timeout_last_recovery
+
8281 netdev
->watchdog_timeo
)))
8285 struct ice_hw
*hw
= &pf
->hw
;
8288 head
= FIELD_GET(QTX_COMM_HEAD_HEAD_M
,
8289 rd32(hw
, QTX_COMM_HEAD(vsi
->txq_map
[txqueue
])));
8290 /* Read interrupt register */
8291 val
= rd32(hw
, GLINT_DYN_CTL(tx_ring
->q_vector
->reg_idx
));
8293 netdev_info(netdev
, "tx_timeout: VSI_num: %d, Q %u, NTC: 0x%x, HW_HEAD: 0x%x, NTU: 0x%x, INT: 0x%x\n",
8294 vsi
->vsi_num
, txqueue
, tx_ring
->next_to_clean
,
8295 head
, tx_ring
->next_to_use
, val
);
8298 pf
->tx_timeout_last_recovery
= jiffies
;
8299 netdev_info(netdev
, "tx_timeout recovery level %d, txqueue %u\n",
8300 pf
->tx_timeout_recovery_level
, txqueue
);
8302 switch (pf
->tx_timeout_recovery_level
) {
8304 set_bit(ICE_PFR_REQ
, pf
->state
);
8307 set_bit(ICE_CORER_REQ
, pf
->state
);
8310 set_bit(ICE_GLOBR_REQ
, pf
->state
);
8313 netdev_err(netdev
, "tx_timeout recovery unsuccessful, device is in unrecoverable state.\n");
8314 set_bit(ICE_DOWN
, pf
->state
);
8315 set_bit(ICE_VSI_NEEDS_RESTART
, vsi
->state
);
8316 set_bit(ICE_SERVICE_DIS
, pf
->state
);
8320 ice_service_task_schedule(pf
);
8321 pf
->tx_timeout_recovery_level
++;
8325 * ice_setup_tc_cls_flower - flower classifier offloads
8326 * @np: net device to configure
8327 * @filter_dev: device on which filter is added
8328 * @cls_flower: offload data
8331 ice_setup_tc_cls_flower(struct ice_netdev_priv
*np
,
8332 struct net_device
*filter_dev
,
8333 struct flow_cls_offload
*cls_flower
)
8335 struct ice_vsi
*vsi
= np
->vsi
;
8337 if (cls_flower
->common
.chain_index
)
8340 switch (cls_flower
->command
) {
8341 case FLOW_CLS_REPLACE
:
8342 return ice_add_cls_flower(filter_dev
, vsi
, cls_flower
);
8343 case FLOW_CLS_DESTROY
:
8344 return ice_del_cls_flower(vsi
, cls_flower
);
8351 * ice_setup_tc_block_cb - callback handler registered for TC block
8352 * @type: TC SETUP type
8353 * @type_data: TC flower offload data that contains user input
8354 * @cb_priv: netdev private data
8357 ice_setup_tc_block_cb(enum tc_setup_type type
, void *type_data
, void *cb_priv
)
8359 struct ice_netdev_priv
*np
= cb_priv
;
8362 case TC_SETUP_CLSFLOWER
:
8363 return ice_setup_tc_cls_flower(np
, np
->vsi
->netdev
,
8371 * ice_validate_mqprio_qopt - Validate TCF input parameters
8372 * @vsi: Pointer to VSI
8373 * @mqprio_qopt: input parameters for mqprio queue configuration
8375 * This function validates MQPRIO params, such as qcount (power of 2 wherever
8376 * needed), and make sure user doesn't specify qcount and BW rate limit
8377 * for TCs, which are more than "num_tc"
8380 ice_validate_mqprio_qopt(struct ice_vsi
*vsi
,
8381 struct tc_mqprio_qopt_offload
*mqprio_qopt
)
8383 int non_power_of_2_qcount
= 0;
8384 struct ice_pf
*pf
= vsi
->back
;
8385 int max_rss_q_cnt
= 0;
8386 u64 sum_min_rate
= 0;
8391 if (vsi
->type
!= ICE_VSI_PF
)
8394 if (mqprio_qopt
->qopt
.offset
[0] != 0 ||
8395 mqprio_qopt
->qopt
.num_tc
< 1 ||
8396 mqprio_qopt
->qopt
.num_tc
> ICE_CHNL_MAX_TC
)
8399 dev
= ice_pf_to_dev(pf
);
8400 vsi
->ch_rss_size
= 0;
8401 num_tc
= mqprio_qopt
->qopt
.num_tc
;
8402 speed
= ice_get_link_speed_kbps(vsi
);
8404 for (i
= 0; num_tc
; i
++) {
8405 int qcount
= mqprio_qopt
->qopt
.count
[i
];
8406 u64 max_rate
, min_rate
, rem
;
8411 if (is_power_of_2(qcount
)) {
8412 if (non_power_of_2_qcount
&&
8413 qcount
> non_power_of_2_qcount
) {
8414 dev_err(dev
, "qcount[%d] cannot be greater than non power of 2 qcount[%d]\n",
8415 qcount
, non_power_of_2_qcount
);
8418 if (qcount
> max_rss_q_cnt
)
8419 max_rss_q_cnt
= qcount
;
8421 if (non_power_of_2_qcount
&&
8422 qcount
!= non_power_of_2_qcount
) {
8423 dev_err(dev
, "Only one non power of 2 qcount allowed[%d,%d]\n",
8424 qcount
, non_power_of_2_qcount
);
8427 if (qcount
< max_rss_q_cnt
) {
8428 dev_err(dev
, "non power of 2 qcount[%d] cannot be less than other qcount[%d]\n",
8429 qcount
, max_rss_q_cnt
);
8432 max_rss_q_cnt
= qcount
;
8433 non_power_of_2_qcount
= qcount
;
8436 /* TC command takes input in K/N/Gbps or K/M/Gbit etc but
8437 * converts the bandwidth rate limit into Bytes/s when
8438 * passing it down to the driver. So convert input bandwidth
8439 * from Bytes/s to Kbps
8441 max_rate
= mqprio_qopt
->max_rate
[i
];
8442 max_rate
= div_u64(max_rate
, ICE_BW_KBPS_DIVISOR
);
8444 /* min_rate is minimum guaranteed rate and it can't be zero */
8445 min_rate
= mqprio_qopt
->min_rate
[i
];
8446 min_rate
= div_u64(min_rate
, ICE_BW_KBPS_DIVISOR
);
8447 sum_min_rate
+= min_rate
;
8449 if (min_rate
&& min_rate
< ICE_MIN_BW_LIMIT
) {
8450 dev_err(dev
, "TC%d: min_rate(%llu Kbps) < %u Kbps\n", i
,
8451 min_rate
, ICE_MIN_BW_LIMIT
);
8455 if (max_rate
&& max_rate
> speed
) {
8456 dev_err(dev
, "TC%d: max_rate(%llu Kbps) > link speed of %u Kbps\n",
8457 i
, max_rate
, speed
);
8461 iter_div_u64_rem(min_rate
, ICE_MIN_BW_LIMIT
, &rem
);
8463 dev_err(dev
, "TC%d: Min Rate not multiple of %u Kbps",
8464 i
, ICE_MIN_BW_LIMIT
);
8468 iter_div_u64_rem(max_rate
, ICE_MIN_BW_LIMIT
, &rem
);
8470 dev_err(dev
, "TC%d: Max Rate not multiple of %u Kbps",
8471 i
, ICE_MIN_BW_LIMIT
);
8475 /* min_rate can't be more than max_rate, except when max_rate
8476 * is zero (implies max_rate sought is max line rate). In such
8477 * a case min_rate can be more than max.
8479 if (max_rate
&& min_rate
> max_rate
) {
8480 dev_err(dev
, "min_rate %llu Kbps can't be more than max_rate %llu Kbps\n",
8481 min_rate
, max_rate
);
8485 if (i
>= mqprio_qopt
->qopt
.num_tc
- 1)
8487 if (mqprio_qopt
->qopt
.offset
[i
+ 1] !=
8488 (mqprio_qopt
->qopt
.offset
[i
] + qcount
))
8492 (mqprio_qopt
->qopt
.offset
[i
] + mqprio_qopt
->qopt
.count
[i
]))
8495 (mqprio_qopt
->qopt
.offset
[i
] + mqprio_qopt
->qopt
.count
[i
]))
8498 if (sum_min_rate
&& sum_min_rate
> (u64
)speed
) {
8499 dev_err(dev
, "Invalid min Tx rate(%llu) Kbps > speed (%u) Kbps specified\n",
8500 sum_min_rate
, speed
);
8504 /* make sure vsi->ch_rss_size is set correctly based on TC's qcount */
8505 vsi
->ch_rss_size
= max_rss_q_cnt
;
8511 * ice_add_vsi_to_fdir - add a VSI to the flow director group for PF
8512 * @pf: ptr to PF device
8515 static int ice_add_vsi_to_fdir(struct ice_pf
*pf
, struct ice_vsi
*vsi
)
8517 struct device
*dev
= ice_pf_to_dev(pf
);
8522 if (!(vsi
->num_gfltr
|| vsi
->num_bfltr
))
8526 for (flow
= 0; flow
< ICE_FLTR_PTYPE_MAX
; flow
++) {
8527 struct ice_fd_hw_prof
*prof
;
8531 if (!(hw
->fdir_prof
&& hw
->fdir_prof
[flow
] &&
8532 hw
->fdir_prof
[flow
]->cnt
))
8535 for (tun
= 0; tun
< ICE_FD_HW_SEG_MAX
; tun
++) {
8536 enum ice_flow_priority prio
;
8538 /* add this VSI to FDir profile for this flow */
8539 prio
= ICE_FLOW_PRIO_NORMAL
;
8540 prof
= hw
->fdir_prof
[flow
];
8541 status
= ice_flow_add_entry(hw
, ICE_BLK_FD
,
8543 prof
->vsi_h
[0], vsi
->idx
,
8544 prio
, prof
->fdir_seg
[tun
],
8547 dev_err(dev
, "channel VSI idx %d, not able to add to group %d\n",
8552 prof
->entry_h
[prof
->cnt
][tun
] = entry_h
;
8555 /* store VSI for filter replay and delete */
8556 prof
->vsi_h
[prof
->cnt
] = vsi
->idx
;
8560 dev_dbg(dev
, "VSI idx %d added to fdir group %d\n", vsi
->idx
,
8565 dev_dbg(dev
, "VSI idx %d not added to fdir groups\n", vsi
->idx
);
8571 * ice_add_channel - add a channel by adding VSI
8572 * @pf: ptr to PF device
8573 * @sw_id: underlying HW switching element ID
8574 * @ch: ptr to channel structure
8576 * Add a channel (VSI) using add_vsi and queue_map
8578 static int ice_add_channel(struct ice_pf
*pf
, u16 sw_id
, struct ice_channel
*ch
)
8580 struct device
*dev
= ice_pf_to_dev(pf
);
8581 struct ice_vsi
*vsi
;
8583 if (ch
->type
!= ICE_VSI_CHNL
) {
8584 dev_err(dev
, "add new VSI failed, ch->type %d\n", ch
->type
);
8588 vsi
= ice_chnl_vsi_setup(pf
, pf
->hw
.port_info
, ch
);
8589 if (!vsi
|| vsi
->type
!= ICE_VSI_CHNL
) {
8590 dev_err(dev
, "create chnl VSI failure\n");
8594 ice_add_vsi_to_fdir(pf
, vsi
);
8597 ch
->vsi_num
= vsi
->vsi_num
;
8598 ch
->info
.mapping_flags
= vsi
->info
.mapping_flags
;
8600 /* set the back pointer of channel for newly created VSI */
8603 memcpy(&ch
->info
.q_mapping
, &vsi
->info
.q_mapping
,
8604 sizeof(vsi
->info
.q_mapping
));
8605 memcpy(&ch
->info
.tc_mapping
, vsi
->info
.tc_mapping
,
8606 sizeof(vsi
->info
.tc_mapping
));
8613 * @vsi: the VSI being setup
8614 * @ch: ptr to channel structure
8616 * Configure channel specific resources such as rings, vector.
8618 static void ice_chnl_cfg_res(struct ice_vsi
*vsi
, struct ice_channel
*ch
)
8622 for (i
= 0; i
< ch
->num_txq
; i
++) {
8623 struct ice_q_vector
*tx_q_vector
, *rx_q_vector
;
8624 struct ice_ring_container
*rc
;
8625 struct ice_tx_ring
*tx_ring
;
8626 struct ice_rx_ring
*rx_ring
;
8628 tx_ring
= vsi
->tx_rings
[ch
->base_q
+ i
];
8629 rx_ring
= vsi
->rx_rings
[ch
->base_q
+ i
];
8630 if (!tx_ring
|| !rx_ring
)
8633 /* setup ring being channel enabled */
8637 /* following code block sets up vector specific attributes */
8638 tx_q_vector
= tx_ring
->q_vector
;
8639 rx_q_vector
= rx_ring
->q_vector
;
8640 if (!tx_q_vector
&& !rx_q_vector
)
8644 tx_q_vector
->ch
= ch
;
8645 /* setup Tx and Rx ITR setting if DIM is off */
8646 rc
= &tx_q_vector
->tx
;
8647 if (!ITR_IS_DYNAMIC(rc
))
8648 ice_write_itr(rc
, rc
->itr_setting
);
8651 rx_q_vector
->ch
= ch
;
8652 /* setup Tx and Rx ITR setting if DIM is off */
8653 rc
= &rx_q_vector
->rx
;
8654 if (!ITR_IS_DYNAMIC(rc
))
8655 ice_write_itr(rc
, rc
->itr_setting
);
8659 /* it is safe to assume that, if channel has non-zero num_t[r]xq, then
8660 * GLINT_ITR register would have written to perform in-context
8661 * update, hence perform flush
8663 if (ch
->num_txq
|| ch
->num_rxq
)
8664 ice_flush(&vsi
->back
->hw
);
8668 * ice_cfg_chnl_all_res - configure channel resources
8669 * @vsi: pte to main_vsi
8670 * @ch: ptr to channel structure
8672 * This function configures channel specific resources such as flow-director
8673 * counter index, and other resources such as queues, vectors, ITR settings
8676 ice_cfg_chnl_all_res(struct ice_vsi
*vsi
, struct ice_channel
*ch
)
8678 /* configure channel (aka ADQ) resources such as queues, vectors,
8679 * ITR settings for channel specific vectors and anything else
8681 ice_chnl_cfg_res(vsi
, ch
);
8685 * ice_setup_hw_channel - setup new channel
8686 * @pf: ptr to PF device
8687 * @vsi: the VSI being setup
8688 * @ch: ptr to channel structure
8689 * @sw_id: underlying HW switching element ID
8690 * @type: type of channel to be created (VMDq2/VF)
8692 * Setup new channel (VSI) based on specified type (VMDq2/VF)
8693 * and configures Tx rings accordingly
8696 ice_setup_hw_channel(struct ice_pf
*pf
, struct ice_vsi
*vsi
,
8697 struct ice_channel
*ch
, u16 sw_id
, u8 type
)
8699 struct device
*dev
= ice_pf_to_dev(pf
);
8702 ch
->base_q
= vsi
->next_base_q
;
8705 ret
= ice_add_channel(pf
, sw_id
, ch
);
8707 dev_err(dev
, "failed to add_channel using sw_id %u\n", sw_id
);
8711 /* configure/setup ADQ specific resources */
8712 ice_cfg_chnl_all_res(vsi
, ch
);
8714 /* make sure to update the next_base_q so that subsequent channel's
8715 * (aka ADQ) VSI queue map is correct
8717 vsi
->next_base_q
= vsi
->next_base_q
+ ch
->num_rxq
;
8718 dev_dbg(dev
, "added channel: vsi_num %u, num_rxq %u\n", ch
->vsi_num
,
8725 * ice_setup_channel - setup new channel using uplink element
8726 * @pf: ptr to PF device
8727 * @vsi: the VSI being setup
8728 * @ch: ptr to channel structure
8730 * Setup new channel (VSI) based on specified type (VMDq2/VF)
8731 * and uplink switching element
8734 ice_setup_channel(struct ice_pf
*pf
, struct ice_vsi
*vsi
,
8735 struct ice_channel
*ch
)
8737 struct device
*dev
= ice_pf_to_dev(pf
);
8741 if (vsi
->type
!= ICE_VSI_PF
) {
8742 dev_err(dev
, "unsupported parent VSI type(%d)\n", vsi
->type
);
8746 sw_id
= pf
->first_sw
->sw_id
;
8748 /* create channel (VSI) */
8749 ret
= ice_setup_hw_channel(pf
, vsi
, ch
, sw_id
, ICE_VSI_CHNL
);
8751 dev_err(dev
, "failed to setup hw_channel\n");
8754 dev_dbg(dev
, "successfully created channel()\n");
8756 return ch
->ch_vsi
? true : false;
8760 * ice_set_bw_limit - setup BW limit for Tx traffic based on max_tx_rate
8761 * @vsi: VSI to be configured
8762 * @max_tx_rate: max Tx rate in Kbps to be configured as maximum BW limit
8763 * @min_tx_rate: min Tx rate in Kbps to be configured as minimum BW limit
8766 ice_set_bw_limit(struct ice_vsi
*vsi
, u64 max_tx_rate
, u64 min_tx_rate
)
8770 err
= ice_set_min_bw_limit(vsi
, min_tx_rate
);
8774 return ice_set_max_bw_limit(vsi
, max_tx_rate
);
8778 * ice_create_q_channel - function to create channel
8779 * @vsi: VSI to be configured
8780 * @ch: ptr to channel (it contains channel specific params)
8782 * This function creates channel (VSI) using num_queues specified by user,
8783 * reconfigs RSS if needed.
8785 static int ice_create_q_channel(struct ice_vsi
*vsi
, struct ice_channel
*ch
)
8787 struct ice_pf
*pf
= vsi
->back
;
8793 dev
= ice_pf_to_dev(pf
);
8794 if (!ch
->num_txq
|| !ch
->num_rxq
) {
8795 dev_err(dev
, "Invalid num_queues requested: %d\n", ch
->num_rxq
);
8799 if (!vsi
->cnt_q_avail
|| vsi
->cnt_q_avail
< ch
->num_txq
) {
8800 dev_err(dev
, "cnt_q_avail (%u) less than num_queues %d\n",
8801 vsi
->cnt_q_avail
, ch
->num_txq
);
8805 if (!ice_setup_channel(pf
, vsi
, ch
)) {
8806 dev_info(dev
, "Failed to setup channel\n");
8809 /* configure BW rate limit */
8810 if (ch
->ch_vsi
&& (ch
->max_tx_rate
|| ch
->min_tx_rate
)) {
8813 ret
= ice_set_bw_limit(ch
->ch_vsi
, ch
->max_tx_rate
,
8816 dev_err(dev
, "failed to set Tx rate of %llu Kbps for VSI(%u)\n",
8817 ch
->max_tx_rate
, ch
->ch_vsi
->vsi_num
);
8819 dev_dbg(dev
, "set Tx rate of %llu Kbps for VSI(%u)\n",
8820 ch
->max_tx_rate
, ch
->ch_vsi
->vsi_num
);
8823 vsi
->cnt_q_avail
-= ch
->num_txq
;
8829 * ice_rem_all_chnl_fltrs - removes all channel filters
8830 * @pf: ptr to PF, TC-flower based filter are tracked at PF level
8832 * Remove all advanced switch filters only if they are channel specific
8833 * tc-flower based filter
8835 static void ice_rem_all_chnl_fltrs(struct ice_pf
*pf
)
8837 struct ice_tc_flower_fltr
*fltr
;
8838 struct hlist_node
*node
;
8840 /* to remove all channel filters, iterate an ordered list of filters */
8841 hlist_for_each_entry_safe(fltr
, node
,
8842 &pf
->tc_flower_fltr_list
,
8844 struct ice_rule_query_data rule
;
8847 /* for now process only channel specific filters */
8848 if (!ice_is_chnl_fltr(fltr
))
8851 rule
.rid
= fltr
->rid
;
8852 rule
.rule_id
= fltr
->rule_id
;
8853 rule
.vsi_handle
= fltr
->dest_vsi_handle
;
8854 status
= ice_rem_adv_rule_by_id(&pf
->hw
, &rule
);
8856 if (status
== -ENOENT
)
8857 dev_dbg(ice_pf_to_dev(pf
), "TC flower filter (rule_id %u) does not exist\n",
8860 dev_err(ice_pf_to_dev(pf
), "failed to delete TC flower filter, status %d\n",
8862 } else if (fltr
->dest_vsi
) {
8863 /* update advanced switch filter count */
8864 if (fltr
->dest_vsi
->type
== ICE_VSI_CHNL
) {
8865 u32 flags
= fltr
->flags
;
8867 fltr
->dest_vsi
->num_chnl_fltr
--;
8868 if (flags
& (ICE_TC_FLWR_FIELD_DST_MAC
|
8869 ICE_TC_FLWR_FIELD_ENC_DST_MAC
))
8870 pf
->num_dmac_chnl_fltrs
--;
8874 hlist_del(&fltr
->tc_flower_node
);
8880 * ice_remove_q_channels - Remove queue channels for the TCs
8881 * @vsi: VSI to be configured
8882 * @rem_fltr: delete advanced switch filter or not
8884 * Remove queue channels for the TCs
8886 static void ice_remove_q_channels(struct ice_vsi
*vsi
, bool rem_fltr
)
8888 struct ice_channel
*ch
, *ch_tmp
;
8889 struct ice_pf
*pf
= vsi
->back
;
8892 /* remove all tc-flower based filter if they are channel filters only */
8894 ice_rem_all_chnl_fltrs(pf
);
8896 /* remove ntuple filters since queue configuration is being changed */
8897 if (vsi
->netdev
->features
& NETIF_F_NTUPLE
) {
8898 struct ice_hw
*hw
= &pf
->hw
;
8900 mutex_lock(&hw
->fdir_fltr_lock
);
8901 ice_fdir_del_all_fltrs(vsi
);
8902 mutex_unlock(&hw
->fdir_fltr_lock
);
8905 /* perform cleanup for channels if they exist */
8906 list_for_each_entry_safe(ch
, ch_tmp
, &vsi
->ch_list
, list
) {
8907 struct ice_vsi
*ch_vsi
;
8909 list_del(&ch
->list
);
8910 ch_vsi
= ch
->ch_vsi
;
8916 /* Reset queue contexts */
8917 for (i
= 0; i
< ch
->num_rxq
; i
++) {
8918 struct ice_tx_ring
*tx_ring
;
8919 struct ice_rx_ring
*rx_ring
;
8921 tx_ring
= vsi
->tx_rings
[ch
->base_q
+ i
];
8922 rx_ring
= vsi
->rx_rings
[ch
->base_q
+ i
];
8925 if (tx_ring
->q_vector
)
8926 tx_ring
->q_vector
->ch
= NULL
;
8930 if (rx_ring
->q_vector
)
8931 rx_ring
->q_vector
->ch
= NULL
;
8935 /* Release FD resources for the channel VSI */
8936 ice_fdir_rem_adq_chnl(&pf
->hw
, ch
->ch_vsi
->idx
);
8938 /* clear the VSI from scheduler tree */
8939 ice_rm_vsi_lan_cfg(ch
->ch_vsi
->port_info
, ch
->ch_vsi
->idx
);
8941 /* Delete VSI from FW, PF and HW VSI arrays */
8942 ice_vsi_delete(ch
->ch_vsi
);
8944 /* free the channel */
8948 /* clear the channel VSI map which is stored in main VSI */
8949 ice_for_each_chnl_tc(i
)
8950 vsi
->tc_map_vsi
[i
] = NULL
;
8952 /* reset main VSI's all TC information */
8958 * ice_rebuild_channels - rebuild channel
8961 * Recreate channel VSIs and replay filters
8963 static int ice_rebuild_channels(struct ice_pf
*pf
)
8965 struct device
*dev
= ice_pf_to_dev(pf
);
8966 struct ice_vsi
*main_vsi
;
8967 bool rem_adv_fltr
= true;
8968 struct ice_channel
*ch
;
8969 struct ice_vsi
*vsi
;
8973 main_vsi
= ice_get_main_vsi(pf
);
8977 if (!test_bit(ICE_FLAG_TC_MQPRIO
, pf
->flags
) ||
8978 main_vsi
->old_numtc
== 1)
8979 return 0; /* nothing to be done */
8981 /* reconfigure main VSI based on old value of TC and cached values
8984 err
= ice_vsi_cfg_tc(main_vsi
, main_vsi
->old_ena_tc
);
8986 dev_err(dev
, "failed configuring TC(ena_tc:0x%02x) for HW VSI=%u\n",
8987 main_vsi
->old_ena_tc
, main_vsi
->vsi_num
);
8991 /* rebuild ADQ VSIs */
8992 ice_for_each_vsi(pf
, i
) {
8993 enum ice_vsi_type type
;
8996 if (!vsi
|| vsi
->type
!= ICE_VSI_CHNL
)
9001 /* rebuild ADQ VSI */
9002 err
= ice_vsi_rebuild(vsi
, ICE_VSI_FLAG_INIT
);
9004 dev_err(dev
, "VSI (type:%s) at index %d rebuild failed, err %d\n",
9005 ice_vsi_type_str(type
), vsi
->idx
, err
);
9009 /* Re-map HW VSI number, using VSI handle that has been
9010 * previously validated in ice_replay_vsi() call above
9012 vsi
->vsi_num
= ice_get_hw_vsi_num(&pf
->hw
, vsi
->idx
);
9014 /* replay filters for the VSI */
9015 err
= ice_replay_vsi(&pf
->hw
, vsi
->idx
);
9017 dev_err(dev
, "VSI (type:%s) replay failed, err %d, VSI index %d\n",
9018 ice_vsi_type_str(type
), err
, vsi
->idx
);
9019 rem_adv_fltr
= false;
9022 dev_info(dev
, "VSI (type:%s) at index %d rebuilt successfully\n",
9023 ice_vsi_type_str(type
), vsi
->idx
);
9025 /* store ADQ VSI at correct TC index in main VSI's
9028 main_vsi
->tc_map_vsi
[tc_idx
++] = vsi
;
9031 /* ADQ VSI(s) has been rebuilt successfully, so setup
9032 * channel for main VSI's Tx and Rx rings
9034 list_for_each_entry(ch
, &main_vsi
->ch_list
, list
) {
9035 struct ice_vsi
*ch_vsi
;
9037 ch_vsi
= ch
->ch_vsi
;
9041 /* reconfig channel resources */
9042 ice_cfg_chnl_all_res(main_vsi
, ch
);
9044 /* replay BW rate limit if it is non-zero */
9045 if (!ch
->max_tx_rate
&& !ch
->min_tx_rate
)
9048 err
= ice_set_bw_limit(ch_vsi
, ch
->max_tx_rate
,
9051 dev_err(dev
, "failed (err:%d) to rebuild BW rate limit, max_tx_rate: %llu Kbps, min_tx_rate: %llu Kbps for VSI(%u)\n",
9052 err
, ch
->max_tx_rate
, ch
->min_tx_rate
,
9055 dev_dbg(dev
, "successfully rebuild BW rate limit, max_tx_rate: %llu Kbps, min_tx_rate: %llu Kbps for VSI(%u)\n",
9056 ch
->max_tx_rate
, ch
->min_tx_rate
,
9060 /* reconfig RSS for main VSI */
9061 if (main_vsi
->ch_rss_size
)
9062 ice_vsi_cfg_rss_lut_key(main_vsi
);
9067 ice_remove_q_channels(main_vsi
, rem_adv_fltr
);
9072 * ice_create_q_channels - Add queue channel for the given TCs
9073 * @vsi: VSI to be configured
9075 * Configures queue channel mapping to the given TCs
9077 static int ice_create_q_channels(struct ice_vsi
*vsi
)
9079 struct ice_pf
*pf
= vsi
->back
;
9080 struct ice_channel
*ch
;
9083 ice_for_each_chnl_tc(i
) {
9084 if (!(vsi
->all_enatc
& BIT(i
)))
9087 ch
= kzalloc(sizeof(*ch
), GFP_KERNEL
);
9092 INIT_LIST_HEAD(&ch
->list
);
9093 ch
->num_rxq
= vsi
->mqprio_qopt
.qopt
.count
[i
];
9094 ch
->num_txq
= vsi
->mqprio_qopt
.qopt
.count
[i
];
9095 ch
->base_q
= vsi
->mqprio_qopt
.qopt
.offset
[i
];
9096 ch
->max_tx_rate
= vsi
->mqprio_qopt
.max_rate
[i
];
9097 ch
->min_tx_rate
= vsi
->mqprio_qopt
.min_rate
[i
];
9099 /* convert to Kbits/s */
9100 if (ch
->max_tx_rate
)
9101 ch
->max_tx_rate
= div_u64(ch
->max_tx_rate
,
9102 ICE_BW_KBPS_DIVISOR
);
9103 if (ch
->min_tx_rate
)
9104 ch
->min_tx_rate
= div_u64(ch
->min_tx_rate
,
9105 ICE_BW_KBPS_DIVISOR
);
9107 ret
= ice_create_q_channel(vsi
, ch
);
9109 dev_err(ice_pf_to_dev(pf
),
9110 "failed creating channel TC:%d\n", i
);
9114 list_add_tail(&ch
->list
, &vsi
->ch_list
);
9115 vsi
->tc_map_vsi
[i
] = ch
->ch_vsi
;
9116 dev_dbg(ice_pf_to_dev(pf
),
9117 "successfully created channel: VSI %pK\n", ch
->ch_vsi
);
9122 ice_remove_q_channels(vsi
, false);
9128 * ice_setup_tc_mqprio_qdisc - configure multiple traffic classes
9129 * @netdev: net device to configure
9130 * @type_data: TC offload data
9132 static int ice_setup_tc_mqprio_qdisc(struct net_device
*netdev
, void *type_data
)
9134 struct tc_mqprio_qopt_offload
*mqprio_qopt
= type_data
;
9135 struct ice_netdev_priv
*np
= netdev_priv(netdev
);
9136 struct ice_vsi
*vsi
= np
->vsi
;
9137 struct ice_pf
*pf
= vsi
->back
;
9138 u16 mode
, ena_tc_qdisc
= 0;
9139 int cur_txq
, cur_rxq
;
9144 dev
= ice_pf_to_dev(pf
);
9145 num_tcf
= mqprio_qopt
->qopt
.num_tc
;
9146 hw
= mqprio_qopt
->qopt
.hw
;
9147 mode
= mqprio_qopt
->mode
;
9149 clear_bit(ICE_FLAG_TC_MQPRIO
, pf
->flags
);
9150 vsi
->ch_rss_size
= 0;
9151 memcpy(&vsi
->mqprio_qopt
, mqprio_qopt
, sizeof(*mqprio_qopt
));
9155 /* Generate queue region map for number of TCF requested */
9156 for (i
= 0; i
< num_tcf
; i
++)
9157 ena_tc_qdisc
|= BIT(i
);
9160 case TC_MQPRIO_MODE_CHANNEL
:
9162 if (pf
->hw
.port_info
->is_custom_tx_enabled
) {
9163 dev_err(dev
, "Custom Tx scheduler feature enabled, can't configure ADQ\n");
9166 ice_tear_down_devlink_rate_tree(pf
);
9168 ret
= ice_validate_mqprio_qopt(vsi
, mqprio_qopt
);
9170 netdev_err(netdev
, "failed to validate_mqprio_qopt(), ret %d\n",
9174 memcpy(&vsi
->mqprio_qopt
, mqprio_qopt
, sizeof(*mqprio_qopt
));
9175 set_bit(ICE_FLAG_TC_MQPRIO
, pf
->flags
);
9176 /* don't assume state of hw_tc_offload during driver load
9177 * and set the flag for TC flower filter if hw_tc_offload
9180 if (vsi
->netdev
->features
& NETIF_F_HW_TC
)
9181 set_bit(ICE_FLAG_CLS_FLOWER
, pf
->flags
);
9189 /* Requesting same TCF configuration as already enabled */
9190 if (ena_tc_qdisc
== vsi
->tc_cfg
.ena_tc
&&
9191 mode
!= TC_MQPRIO_MODE_CHANNEL
)
9194 /* Pause VSI queues */
9195 ice_dis_vsi(vsi
, true);
9197 if (!hw
&& !test_bit(ICE_FLAG_TC_MQPRIO
, pf
->flags
))
9198 ice_remove_q_channels(vsi
, true);
9200 if (!hw
&& !test_bit(ICE_FLAG_TC_MQPRIO
, pf
->flags
)) {
9201 vsi
->req_txq
= min_t(int, ice_get_avail_txq_count(pf
),
9203 vsi
->req_rxq
= min_t(int, ice_get_avail_rxq_count(pf
),
9206 /* logic to rebuild VSI, same like ethtool -L */
9207 u16 offset
= 0, qcount_tx
= 0, qcount_rx
= 0;
9209 for (i
= 0; i
< num_tcf
; i
++) {
9210 if (!(ena_tc_qdisc
& BIT(i
)))
9213 offset
= vsi
->mqprio_qopt
.qopt
.offset
[i
];
9214 qcount_rx
= vsi
->mqprio_qopt
.qopt
.count
[i
];
9215 qcount_tx
= vsi
->mqprio_qopt
.qopt
.count
[i
];
9217 vsi
->req_txq
= offset
+ qcount_tx
;
9218 vsi
->req_rxq
= offset
+ qcount_rx
;
9220 /* store away original rss_size info, so that it gets reused
9221 * form ice_vsi_rebuild during tc-qdisc delete stage - to
9222 * determine, what should be the rss_sizefor main VSI
9224 vsi
->orig_rss_size
= vsi
->rss_size
;
9227 /* save current values of Tx and Rx queues before calling VSI rebuild
9228 * for fallback option
9230 cur_txq
= vsi
->num_txq
;
9231 cur_rxq
= vsi
->num_rxq
;
9233 /* proceed with rebuild main VSI using correct number of queues */
9234 ret
= ice_vsi_rebuild(vsi
, ICE_VSI_FLAG_NO_INIT
);
9236 /* fallback to current number of queues */
9237 dev_info(dev
, "Rebuild failed with new queues, try with current number of queues\n");
9238 vsi
->req_txq
= cur_txq
;
9239 vsi
->req_rxq
= cur_rxq
;
9240 clear_bit(ICE_RESET_FAILED
, pf
->state
);
9241 if (ice_vsi_rebuild(vsi
, ICE_VSI_FLAG_NO_INIT
)) {
9242 dev_err(dev
, "Rebuild of main VSI failed again\n");
9247 vsi
->all_numtc
= num_tcf
;
9248 vsi
->all_enatc
= ena_tc_qdisc
;
9249 ret
= ice_vsi_cfg_tc(vsi
, ena_tc_qdisc
);
9251 netdev_err(netdev
, "failed configuring TC for VSI id=%d\n",
9256 if (test_bit(ICE_FLAG_TC_MQPRIO
, pf
->flags
)) {
9257 u64 max_tx_rate
= vsi
->mqprio_qopt
.max_rate
[0];
9258 u64 min_tx_rate
= vsi
->mqprio_qopt
.min_rate
[0];
9260 /* set TC0 rate limit if specified */
9261 if (max_tx_rate
|| min_tx_rate
) {
9262 /* convert to Kbits/s */
9264 max_tx_rate
= div_u64(max_tx_rate
, ICE_BW_KBPS_DIVISOR
);
9266 min_tx_rate
= div_u64(min_tx_rate
, ICE_BW_KBPS_DIVISOR
);
9268 ret
= ice_set_bw_limit(vsi
, max_tx_rate
, min_tx_rate
);
9270 dev_dbg(dev
, "set Tx rate max %llu min %llu for VSI(%u)\n",
9271 max_tx_rate
, min_tx_rate
, vsi
->vsi_num
);
9273 dev_err(dev
, "failed to set Tx rate max %llu min %llu for VSI(%u)\n",
9274 max_tx_rate
, min_tx_rate
, vsi
->vsi_num
);
9278 ret
= ice_create_q_channels(vsi
);
9280 netdev_err(netdev
, "failed configuring queue channels\n");
9283 netdev_dbg(netdev
, "successfully configured channels\n");
9287 if (vsi
->ch_rss_size
)
9288 ice_vsi_cfg_rss_lut_key(vsi
);
9291 /* if error, reset the all_numtc and all_enatc */
9297 ice_ena_vsi(vsi
, true);
9302 static LIST_HEAD(ice_block_cb_list
);
9305 ice_setup_tc(struct net_device
*netdev
, enum tc_setup_type type
,
9308 struct ice_netdev_priv
*np
= netdev_priv(netdev
);
9309 struct ice_pf
*pf
= np
->vsi
->back
;
9310 bool locked
= false;
9314 case TC_SETUP_BLOCK
:
9315 return flow_block_cb_setup_simple(type_data
,
9317 ice_setup_tc_block_cb
,
9319 case TC_SETUP_QDISC_MQPRIO
:
9320 if (ice_is_eswitch_mode_switchdev(pf
)) {
9321 netdev_err(netdev
, "TC MQPRIO offload not supported, switchdev is enabled\n");
9326 mutex_lock(&pf
->adev_mutex
);
9327 device_lock(&pf
->adev
->dev
);
9329 if (pf
->adev
->dev
.driver
) {
9330 netdev_err(netdev
, "Cannot change qdisc when RDMA is active\n");
9336 /* setup traffic classifier for receive side */
9337 mutex_lock(&pf
->tc_mutex
);
9338 err
= ice_setup_tc_mqprio_qdisc(netdev
, type_data
);
9339 mutex_unlock(&pf
->tc_mutex
);
9343 device_unlock(&pf
->adev
->dev
);
9344 mutex_unlock(&pf
->adev_mutex
);
9353 static struct ice_indr_block_priv
*
9354 ice_indr_block_priv_lookup(struct ice_netdev_priv
*np
,
9355 struct net_device
*netdev
)
9357 struct ice_indr_block_priv
*cb_priv
;
9359 list_for_each_entry(cb_priv
, &np
->tc_indr_block_priv_list
, list
) {
9360 if (!cb_priv
->netdev
)
9362 if (cb_priv
->netdev
== netdev
)
9369 ice_indr_setup_block_cb(enum tc_setup_type type
, void *type_data
,
9372 struct ice_indr_block_priv
*priv
= indr_priv
;
9373 struct ice_netdev_priv
*np
= priv
->np
;
9376 case TC_SETUP_CLSFLOWER
:
9377 return ice_setup_tc_cls_flower(np
, priv
->netdev
,
9378 (struct flow_cls_offload
*)
9386 ice_indr_setup_tc_block(struct net_device
*netdev
, struct Qdisc
*sch
,
9387 struct ice_netdev_priv
*np
,
9388 struct flow_block_offload
*f
, void *data
,
9389 void (*cleanup
)(struct flow_block_cb
*block_cb
))
9391 struct ice_indr_block_priv
*indr_priv
;
9392 struct flow_block_cb
*block_cb
;
9394 if (!ice_is_tunnel_supported(netdev
) &&
9395 !(is_vlan_dev(netdev
) &&
9396 vlan_dev_real_dev(netdev
) == np
->vsi
->netdev
))
9399 if (f
->binder_type
!= FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS
)
9402 switch (f
->command
) {
9403 case FLOW_BLOCK_BIND
:
9404 indr_priv
= ice_indr_block_priv_lookup(np
, netdev
);
9408 indr_priv
= kzalloc(sizeof(*indr_priv
), GFP_KERNEL
);
9412 indr_priv
->netdev
= netdev
;
9414 list_add(&indr_priv
->list
, &np
->tc_indr_block_priv_list
);
9417 flow_indr_block_cb_alloc(ice_indr_setup_block_cb
,
9418 indr_priv
, indr_priv
,
9419 ice_rep_indr_tc_block_unbind
,
9420 f
, netdev
, sch
, data
, np
,
9423 if (IS_ERR(block_cb
)) {
9424 list_del(&indr_priv
->list
);
9426 return PTR_ERR(block_cb
);
9428 flow_block_cb_add(block_cb
, f
);
9429 list_add_tail(&block_cb
->driver_list
, &ice_block_cb_list
);
9431 case FLOW_BLOCK_UNBIND
:
9432 indr_priv
= ice_indr_block_priv_lookup(np
, netdev
);
9436 block_cb
= flow_block_cb_lookup(f
->block
,
9437 ice_indr_setup_block_cb
,
9442 flow_indr_block_cb_remove(block_cb
, f
);
9444 list_del(&block_cb
->driver_list
);
9453 ice_indr_setup_tc_cb(struct net_device
*netdev
, struct Qdisc
*sch
,
9454 void *cb_priv
, enum tc_setup_type type
, void *type_data
,
9456 void (*cleanup
)(struct flow_block_cb
*block_cb
))
9459 case TC_SETUP_BLOCK
:
9460 return ice_indr_setup_tc_block(netdev
, sch
, cb_priv
, type_data
,
9469 * ice_open - Called when a network interface becomes active
9470 * @netdev: network interface device structure
9472 * The open entry point is called when a network interface is made
9473 * active by the system (IFF_UP). At this point all resources needed
9474 * for transmit and receive operations are allocated, the interrupt
9475 * handler is registered with the OS, the netdev watchdog is enabled,
9476 * and the stack is notified that the interface is ready.
9478 * Returns 0 on success, negative value on failure
9480 int ice_open(struct net_device
*netdev
)
9482 struct ice_netdev_priv
*np
= netdev_priv(netdev
);
9483 struct ice_pf
*pf
= np
->vsi
->back
;
9485 if (ice_is_reset_in_progress(pf
->state
)) {
9486 netdev_err(netdev
, "can't open net device while reset is in progress");
9490 return ice_open_internal(netdev
);
9494 * ice_open_internal - Called when a network interface becomes active
9495 * @netdev: network interface device structure
9497 * Internal ice_open implementation. Should not be used directly except for ice_open and reset
9500 * Returns 0 on success, negative value on failure
9502 int ice_open_internal(struct net_device
*netdev
)
9504 struct ice_netdev_priv
*np
= netdev_priv(netdev
);
9505 struct ice_vsi
*vsi
= np
->vsi
;
9506 struct ice_pf
*pf
= vsi
->back
;
9507 struct ice_port_info
*pi
;
9510 if (test_bit(ICE_NEEDS_RESTART
, pf
->state
)) {
9511 netdev_err(netdev
, "driver needs to be unloaded and reloaded\n");
9515 netif_carrier_off(netdev
);
9517 pi
= vsi
->port_info
;
9518 err
= ice_update_link_info(pi
);
9520 netdev_err(netdev
, "Failed to get link info, error %d\n", err
);
9524 ice_check_link_cfg_err(pf
, pi
->phy
.link_info
.link_cfg_err
);
9526 /* Set PHY if there is media, otherwise, turn off PHY */
9527 if (pi
->phy
.link_info
.link_info
& ICE_AQ_MEDIA_AVAILABLE
) {
9528 clear_bit(ICE_FLAG_NO_MEDIA
, pf
->flags
);
9529 if (!test_bit(ICE_PHY_INIT_COMPLETE
, pf
->state
)) {
9530 err
= ice_init_phy_user_cfg(pi
);
9532 netdev_err(netdev
, "Failed to initialize PHY settings, error %d\n",
9538 err
= ice_configure_phy(vsi
);
9540 netdev_err(netdev
, "Failed to set physical link up, error %d\n",
9545 set_bit(ICE_FLAG_NO_MEDIA
, pf
->flags
);
9546 ice_set_link(vsi
, false);
9549 err
= ice_vsi_open(vsi
);
9551 netdev_err(netdev
, "Failed to open VSI 0x%04X on switch 0x%04X\n",
9552 vsi
->vsi_num
, vsi
->vsw
->sw_id
);
9554 /* Update existing tunnels information */
9555 udp_tunnel_get_rx_info(netdev
);
9561 * ice_stop - Disables a network interface
9562 * @netdev: network interface device structure
9564 * The stop entry point is called when an interface is de-activated by the OS,
9565 * and the netdevice enters the DOWN state. The hardware is still under the
9566 * driver's control, but the netdev interface is disabled.
9568 * Returns success only - not allowed to fail
9570 int ice_stop(struct net_device
*netdev
)
9572 struct ice_netdev_priv
*np
= netdev_priv(netdev
);
9573 struct ice_vsi
*vsi
= np
->vsi
;
9574 struct ice_pf
*pf
= vsi
->back
;
9576 if (ice_is_reset_in_progress(pf
->state
)) {
9577 netdev_err(netdev
, "can't stop net device while reset is in progress");
9581 if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA
, vsi
->back
->flags
)) {
9582 int link_err
= ice_force_phys_link_state(vsi
, false);
9585 if (link_err
== -ENOMEDIUM
)
9586 netdev_info(vsi
->netdev
, "Skipping link reconfig - no media attached, VSI %d\n",
9589 netdev_err(vsi
->netdev
, "Failed to set physical link down, VSI %d error %d\n",
9590 vsi
->vsi_num
, link_err
);
9603 * ice_features_check - Validate encapsulated packet conforms to limits
9605 * @netdev: This port's netdev
9606 * @features: Offload features that the stack believes apply
9608 static netdev_features_t
9609 ice_features_check(struct sk_buff
*skb
,
9610 struct net_device __always_unused
*netdev
,
9611 netdev_features_t features
)
9613 bool gso
= skb_is_gso(skb
);
9616 /* No point in doing any of this if neither checksum nor GSO are
9617 * being requested for this frame. We can rule out both by just
9618 * checking for CHECKSUM_PARTIAL
9620 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
)
9623 /* We cannot support GSO if the MSS is going to be less than
9624 * 64 bytes. If it is then we need to drop support for GSO.
9626 if (gso
&& (skb_shinfo(skb
)->gso_size
< ICE_TXD_CTX_MIN_MSS
))
9627 features
&= ~NETIF_F_GSO_MASK
;
9629 len
= skb_network_offset(skb
);
9630 if (len
> ICE_TXD_MACLEN_MAX
|| len
& 0x1)
9631 goto out_rm_features
;
9633 len
= skb_network_header_len(skb
);
9634 if (len
> ICE_TXD_IPLEN_MAX
|| len
& 0x1)
9635 goto out_rm_features
;
9637 if (skb
->encapsulation
) {
9638 /* this must work for VXLAN frames AND IPIP/SIT frames, and in
9639 * the case of IPIP frames, the transport header pointer is
9640 * after the inner header! So check to make sure that this
9641 * is a GRE or UDP_TUNNEL frame before doing that math.
9643 if (gso
&& (skb_shinfo(skb
)->gso_type
&
9644 (SKB_GSO_GRE
| SKB_GSO_UDP_TUNNEL
))) {
9645 len
= skb_inner_network_header(skb
) -
9646 skb_transport_header(skb
);
9647 if (len
> ICE_TXD_L4LEN_MAX
|| len
& 0x1)
9648 goto out_rm_features
;
9651 len
= skb_inner_network_header_len(skb
);
9652 if (len
> ICE_TXD_IPLEN_MAX
|| len
& 0x1)
9653 goto out_rm_features
;
9658 return features
& ~(NETIF_F_CSUM_MASK
| NETIF_F_GSO_MASK
);
9661 static const struct net_device_ops ice_netdev_safe_mode_ops
= {
9662 .ndo_open
= ice_open
,
9663 .ndo_stop
= ice_stop
,
9664 .ndo_start_xmit
= ice_start_xmit
,
9665 .ndo_set_mac_address
= ice_set_mac_address
,
9666 .ndo_validate_addr
= eth_validate_addr
,
9667 .ndo_change_mtu
= ice_change_mtu
,
9668 .ndo_get_stats64
= ice_get_stats64
,
9669 .ndo_tx_timeout
= ice_tx_timeout
,
9670 .ndo_bpf
= ice_xdp_safe_mode
,
9673 static const struct net_device_ops ice_netdev_ops
= {
9674 .ndo_open
= ice_open
,
9675 .ndo_stop
= ice_stop
,
9676 .ndo_start_xmit
= ice_start_xmit
,
9677 .ndo_select_queue
= ice_select_queue
,
9678 .ndo_features_check
= ice_features_check
,
9679 .ndo_fix_features
= ice_fix_features
,
9680 .ndo_set_rx_mode
= ice_set_rx_mode
,
9681 .ndo_set_mac_address
= ice_set_mac_address
,
9682 .ndo_validate_addr
= eth_validate_addr
,
9683 .ndo_change_mtu
= ice_change_mtu
,
9684 .ndo_get_stats64
= ice_get_stats64
,
9685 .ndo_set_tx_maxrate
= ice_set_tx_maxrate
,
9686 .ndo_eth_ioctl
= ice_eth_ioctl
,
9687 .ndo_set_vf_spoofchk
= ice_set_vf_spoofchk
,
9688 .ndo_set_vf_mac
= ice_set_vf_mac
,
9689 .ndo_get_vf_config
= ice_get_vf_cfg
,
9690 .ndo_set_vf_trust
= ice_set_vf_trust
,
9691 .ndo_set_vf_vlan
= ice_set_vf_port_vlan
,
9692 .ndo_set_vf_link_state
= ice_set_vf_link_state
,
9693 .ndo_get_vf_stats
= ice_get_vf_stats
,
9694 .ndo_set_vf_rate
= ice_set_vf_bw
,
9695 .ndo_vlan_rx_add_vid
= ice_vlan_rx_add_vid
,
9696 .ndo_vlan_rx_kill_vid
= ice_vlan_rx_kill_vid
,
9697 .ndo_setup_tc
= ice_setup_tc
,
9698 .ndo_set_features
= ice_set_features
,
9699 .ndo_bridge_getlink
= ice_bridge_getlink
,
9700 .ndo_bridge_setlink
= ice_bridge_setlink
,
9701 .ndo_fdb_add
= ice_fdb_add
,
9702 .ndo_fdb_del
= ice_fdb_del
,
9703 #ifdef CONFIG_RFS_ACCEL
9704 .ndo_rx_flow_steer
= ice_rx_flow_steer
,
9706 .ndo_tx_timeout
= ice_tx_timeout
,
9708 .ndo_xdp_xmit
= ice_xdp_xmit
,
9709 .ndo_xsk_wakeup
= ice_xsk_wakeup
,