1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 1999 - 2018 Intel Corporation. */
5 #include "ixgbe_sriov.h"
7 #ifdef CONFIG_IXGBE_DCB
9 * ixgbe_cache_ring_dcb_sriov - Descriptor ring to register mapping for SR-IOV
10 * @adapter: board private structure to initialize
12 * Cache the descriptor ring offsets for SR-IOV to the assigned rings. It
13 * will also try to cache the proper offsets if RSS/FCoE are enabled along
17 static bool ixgbe_cache_ring_dcb_sriov(struct ixgbe_adapter
*adapter
)
20 struct ixgbe_ring_feature
*fcoe
= &adapter
->ring_feature
[RING_F_FCOE
];
21 #endif /* IXGBE_FCOE */
22 struct ixgbe_ring_feature
*vmdq
= &adapter
->ring_feature
[RING_F_VMDQ
];
25 u8 tcs
= adapter
->hw_tcs
;
27 /* verify we have DCB queueing enabled before proceeding */
31 /* verify we have VMDq enabled before proceeding */
32 if (!(adapter
->flags
& IXGBE_FLAG_SRIOV_ENABLED
))
35 /* start at VMDq register offset for SR-IOV enabled setups */
36 reg_idx
= vmdq
->offset
* __ALIGN_MASK(1, ~vmdq
->mask
);
37 for (i
= 0, pool
= 0; i
< adapter
->num_rx_queues
; i
++, reg_idx
++) {
38 /* If we are greater than indices move to next pool */
39 if ((reg_idx
& ~vmdq
->mask
) >= tcs
) {
41 reg_idx
= __ALIGN_MASK(reg_idx
, ~vmdq
->mask
);
43 adapter
->rx_ring
[i
]->reg_idx
= reg_idx
;
44 adapter
->rx_ring
[i
]->netdev
= pool
? NULL
: adapter
->netdev
;
47 reg_idx
= vmdq
->offset
* __ALIGN_MASK(1, ~vmdq
->mask
);
48 for (i
= 0; i
< adapter
->num_tx_queues
; i
++, reg_idx
++) {
49 /* If we are greater than indices move to next pool */
50 if ((reg_idx
& ~vmdq
->mask
) >= tcs
)
51 reg_idx
= __ALIGN_MASK(reg_idx
, ~vmdq
->mask
);
52 adapter
->tx_ring
[i
]->reg_idx
= reg_idx
;
56 /* nothing to do if FCoE is disabled */
57 if (!(adapter
->flags
& IXGBE_FLAG_FCOE_ENABLED
))
60 /* The work is already done if the FCoE ring is shared */
61 if (fcoe
->offset
< tcs
)
64 /* The FCoE rings exist separately, we need to move their reg_idx */
66 u16 queues_per_pool
= __ALIGN_MASK(1, ~vmdq
->mask
);
67 u8 fcoe_tc
= ixgbe_fcoe_get_tc(adapter
);
69 reg_idx
= (vmdq
->offset
+ vmdq
->indices
) * queues_per_pool
;
70 for (i
= fcoe
->offset
; i
< adapter
->num_rx_queues
; i
++) {
71 reg_idx
= __ALIGN_MASK(reg_idx
, ~vmdq
->mask
) + fcoe_tc
;
72 adapter
->rx_ring
[i
]->reg_idx
= reg_idx
;
73 adapter
->rx_ring
[i
]->netdev
= adapter
->netdev
;
77 reg_idx
= (vmdq
->offset
+ vmdq
->indices
) * queues_per_pool
;
78 for (i
= fcoe
->offset
; i
< adapter
->num_tx_queues
; i
++) {
79 reg_idx
= __ALIGN_MASK(reg_idx
, ~vmdq
->mask
) + fcoe_tc
;
80 adapter
->tx_ring
[i
]->reg_idx
= reg_idx
;
85 #endif /* IXGBE_FCOE */
89 /* ixgbe_get_first_reg_idx - Return first register index associated with ring */
90 static void ixgbe_get_first_reg_idx(struct ixgbe_adapter
*adapter
, u8 tc
,
91 unsigned int *tx
, unsigned int *rx
)
93 struct ixgbe_hw
*hw
= &adapter
->hw
;
94 u8 num_tcs
= adapter
->hw_tcs
;
99 switch (hw
->mac
.type
) {
100 case ixgbe_mac_82598EB
:
101 /* TxQs/TC: 4 RxQs/TC: 8 */
102 *tx
= tc
<< 2; /* 0, 4, 8, 12, 16, 20, 24, 28 */
103 *rx
= tc
<< 3; /* 0, 8, 16, 24, 32, 40, 48, 56 */
105 case ixgbe_mac_82599EB
:
108 case ixgbe_mac_X550EM_x
:
109 case ixgbe_mac_x550em_a
:
112 * TCs : TC0/1 TC2/3 TC4-7
118 *tx
= tc
<< 5; /* 0, 32, 64 */
120 *tx
= (tc
+ 2) << 4; /* 80, 96 */
122 *tx
= (tc
+ 8) << 3; /* 104, 112, 120 */
125 * TCs : TC0 TC1 TC2/3
131 *tx
= tc
<< 6; /* 0, 64 */
133 *tx
= (tc
+ 4) << 4; /* 96, 112 */
142 * ixgbe_cache_ring_dcb - Descriptor ring to register mapping for DCB
143 * @adapter: board private structure to initialize
145 * Cache the descriptor ring offsets for DCB to the assigned rings.
148 static bool ixgbe_cache_ring_dcb(struct ixgbe_adapter
*adapter
)
150 u8 num_tcs
= adapter
->hw_tcs
;
151 unsigned int tx_idx
, rx_idx
;
152 int tc
, offset
, rss_i
, i
;
154 /* verify we have DCB queueing enabled before proceeding */
158 rss_i
= adapter
->ring_feature
[RING_F_RSS
].indices
;
160 for (tc
= 0, offset
= 0; tc
< num_tcs
; tc
++, offset
+= rss_i
) {
161 ixgbe_get_first_reg_idx(adapter
, tc
, &tx_idx
, &rx_idx
);
162 for (i
= 0; i
< rss_i
; i
++, tx_idx
++, rx_idx
++) {
163 adapter
->tx_ring
[offset
+ i
]->reg_idx
= tx_idx
;
164 adapter
->rx_ring
[offset
+ i
]->reg_idx
= rx_idx
;
165 adapter
->rx_ring
[offset
+ i
]->netdev
= adapter
->netdev
;
166 adapter
->tx_ring
[offset
+ i
]->dcb_tc
= tc
;
167 adapter
->rx_ring
[offset
+ i
]->dcb_tc
= tc
;
176 * ixgbe_cache_ring_sriov - Descriptor ring to register mapping for sriov
177 * @adapter: board private structure to initialize
179 * SR-IOV doesn't use any descriptor rings but changes the default if
180 * no other mapping is used.
183 static bool ixgbe_cache_ring_sriov(struct ixgbe_adapter
*adapter
)
186 struct ixgbe_ring_feature
*fcoe
= &adapter
->ring_feature
[RING_F_FCOE
];
187 #endif /* IXGBE_FCOE */
188 struct ixgbe_ring_feature
*vmdq
= &adapter
->ring_feature
[RING_F_VMDQ
];
189 struct ixgbe_ring_feature
*rss
= &adapter
->ring_feature
[RING_F_RSS
];
193 /* only proceed if VMDq is enabled */
194 if (!(adapter
->flags
& IXGBE_FLAG_VMDQ_ENABLED
))
197 /* start at VMDq register offset for SR-IOV enabled setups */
199 reg_idx
= vmdq
->offset
* __ALIGN_MASK(1, ~vmdq
->mask
);
200 for (i
= 0; i
< adapter
->num_rx_queues
; i
++, reg_idx
++) {
202 /* Allow first FCoE queue to be mapped as RSS */
203 if (fcoe
->offset
&& (i
> fcoe
->offset
))
206 /* If we are greater than indices move to next pool */
207 if ((reg_idx
& ~vmdq
->mask
) >= rss
->indices
) {
209 reg_idx
= __ALIGN_MASK(reg_idx
, ~vmdq
->mask
);
211 adapter
->rx_ring
[i
]->reg_idx
= reg_idx
;
212 adapter
->rx_ring
[i
]->netdev
= pool
? NULL
: adapter
->netdev
;
216 /* FCoE uses a linear block of queues so just assigning 1:1 */
217 for (; i
< adapter
->num_rx_queues
; i
++, reg_idx
++) {
218 adapter
->rx_ring
[i
]->reg_idx
= reg_idx
;
219 adapter
->rx_ring
[i
]->netdev
= adapter
->netdev
;
223 reg_idx
= vmdq
->offset
* __ALIGN_MASK(1, ~vmdq
->mask
);
224 for (i
= 0; i
< adapter
->num_tx_queues
; i
++, reg_idx
++) {
226 /* Allow first FCoE queue to be mapped as RSS */
227 if (fcoe
->offset
&& (i
> fcoe
->offset
))
230 /* If we are greater than indices move to next pool */
231 if ((reg_idx
& rss
->mask
) >= rss
->indices
)
232 reg_idx
= __ALIGN_MASK(reg_idx
, ~vmdq
->mask
);
233 adapter
->tx_ring
[i
]->reg_idx
= reg_idx
;
237 /* FCoE uses a linear block of queues so just assigning 1:1 */
238 for (; i
< adapter
->num_tx_queues
; i
++, reg_idx
++)
239 adapter
->tx_ring
[i
]->reg_idx
= reg_idx
;
247 * ixgbe_cache_ring_rss - Descriptor ring to register mapping for RSS
248 * @adapter: board private structure to initialize
250 * Cache the descriptor ring offsets for RSS to the assigned rings.
253 static bool ixgbe_cache_ring_rss(struct ixgbe_adapter
*adapter
)
257 for (i
= 0; i
< adapter
->num_rx_queues
; i
++) {
258 adapter
->rx_ring
[i
]->reg_idx
= i
;
259 adapter
->rx_ring
[i
]->netdev
= adapter
->netdev
;
261 for (i
= 0, reg_idx
= 0; i
< adapter
->num_tx_queues
; i
++, reg_idx
++)
262 adapter
->tx_ring
[i
]->reg_idx
= reg_idx
;
263 for (i
= 0; i
< adapter
->num_xdp_queues
; i
++, reg_idx
++)
264 adapter
->xdp_ring
[i
]->reg_idx
= reg_idx
;
270 * ixgbe_cache_ring_register - Descriptor ring to register mapping
271 * @adapter: board private structure to initialize
273 * Once we know the feature-set enabled for the device, we'll cache
274 * the register offset the descriptor ring is assigned to.
276 * Note, the order the various feature calls is important. It must start with
277 * the "most" features enabled at the same time, then trickle down to the
278 * least amount of features turned on at once.
280 static void ixgbe_cache_ring_register(struct ixgbe_adapter
*adapter
)
282 /* start with default case */
283 adapter
->rx_ring
[0]->reg_idx
= 0;
284 adapter
->tx_ring
[0]->reg_idx
= 0;
286 #ifdef CONFIG_IXGBE_DCB
287 if (ixgbe_cache_ring_dcb_sriov(adapter
))
290 if (ixgbe_cache_ring_dcb(adapter
))
294 if (ixgbe_cache_ring_sriov(adapter
))
297 ixgbe_cache_ring_rss(adapter
);
300 static int ixgbe_xdp_queues(struct ixgbe_adapter
*adapter
)
304 queues
= min_t(int, IXGBE_MAX_XDP_QS
, nr_cpu_ids
);
305 return adapter
->xdp_prog
? queues
: 0;
308 #define IXGBE_RSS_64Q_MASK 0x3F
309 #define IXGBE_RSS_16Q_MASK 0xF
310 #define IXGBE_RSS_8Q_MASK 0x7
311 #define IXGBE_RSS_4Q_MASK 0x3
312 #define IXGBE_RSS_2Q_MASK 0x1
313 #define IXGBE_RSS_DISABLED_MASK 0x0
315 #ifdef CONFIG_IXGBE_DCB
317 * ixgbe_set_dcb_sriov_queues: Allocate queues for SR-IOV devices w/ DCB
318 * @adapter: board private structure to initialize
320 * When SR-IOV (Single Root IO Virtualiztion) is enabled, allocate queues
321 * and VM pools where appropriate. Also assign queues based on DCB
322 * priorities and map accordingly..
325 static bool ixgbe_set_dcb_sriov_queues(struct ixgbe_adapter
*adapter
)
328 u16 vmdq_i
= adapter
->ring_feature
[RING_F_VMDQ
].limit
;
333 u8 tcs
= adapter
->hw_tcs
;
335 /* verify we have DCB queueing enabled before proceeding */
339 /* verify we have VMDq enabled before proceeding */
340 if (!(adapter
->flags
& IXGBE_FLAG_SRIOV_ENABLED
))
343 /* limit VMDq instances on the PF by number of Tx queues */
344 vmdq_i
= min_t(u16
, vmdq_i
, MAX_TX_QUEUES
/ tcs
);
346 /* Add starting offset to total pool count */
347 vmdq_i
+= adapter
->ring_feature
[RING_F_VMDQ
].offset
;
349 /* 16 pools w/ 8 TC per pool */
351 vmdq_i
= min_t(u16
, vmdq_i
, 16);
352 vmdq_m
= IXGBE_82599_VMDQ_8Q_MASK
;
353 /* 32 pools w/ 4 TC per pool */
355 vmdq_i
= min_t(u16
, vmdq_i
, 32);
356 vmdq_m
= IXGBE_82599_VMDQ_4Q_MASK
;
360 /* queues in the remaining pools are available for FCoE */
361 fcoe_i
= (128 / __ALIGN_MASK(1, ~vmdq_m
)) - vmdq_i
;
364 /* remove the starting offset from the pool count */
365 vmdq_i
-= adapter
->ring_feature
[RING_F_VMDQ
].offset
;
367 /* save features for later use */
368 adapter
->ring_feature
[RING_F_VMDQ
].indices
= vmdq_i
;
369 adapter
->ring_feature
[RING_F_VMDQ
].mask
= vmdq_m
;
372 * We do not support DCB, VMDq, and RSS all simultaneously
373 * so we will disable RSS since it is the lowest priority
375 adapter
->ring_feature
[RING_F_RSS
].indices
= 1;
376 adapter
->ring_feature
[RING_F_RSS
].mask
= IXGBE_RSS_DISABLED_MASK
;
378 /* disable ATR as it is not supported when VMDq is enabled */
379 adapter
->flags
&= ~IXGBE_FLAG_FDIR_HASH_CAPABLE
;
381 adapter
->num_rx_pools
= vmdq_i
;
382 adapter
->num_rx_queues_per_pool
= tcs
;
384 adapter
->num_tx_queues
= vmdq_i
* tcs
;
385 adapter
->num_xdp_queues
= 0;
386 adapter
->num_rx_queues
= vmdq_i
* tcs
;
389 if (adapter
->flags
& IXGBE_FLAG_FCOE_ENABLED
) {
390 struct ixgbe_ring_feature
*fcoe
;
392 fcoe
= &adapter
->ring_feature
[RING_F_FCOE
];
394 /* limit ourselves based on feature limits */
395 fcoe_i
= min_t(u16
, fcoe_i
, fcoe
->limit
);
398 /* alloc queues for FCoE separately */
399 fcoe
->indices
= fcoe_i
;
400 fcoe
->offset
= vmdq_i
* tcs
;
402 /* add queues to adapter */
403 adapter
->num_tx_queues
+= fcoe_i
;
404 adapter
->num_rx_queues
+= fcoe_i
;
405 } else if (tcs
> 1) {
406 /* use queue belonging to FcoE TC */
408 fcoe
->offset
= ixgbe_fcoe_get_tc(adapter
);
410 adapter
->flags
&= ~IXGBE_FLAG_FCOE_ENABLED
;
417 #endif /* IXGBE_FCOE */
418 /* configure TC to queue mapping */
419 for (i
= 0; i
< tcs
; i
++)
420 netdev_set_tc_queue(adapter
->netdev
, i
, 1, i
);
425 static bool ixgbe_set_dcb_queues(struct ixgbe_adapter
*adapter
)
427 struct net_device
*dev
= adapter
->netdev
;
428 struct ixgbe_ring_feature
*f
;
432 /* Map queue offset and counts onto allocated tx queues */
433 tcs
= adapter
->hw_tcs
;
435 /* verify we have DCB queueing enabled before proceeding */
439 /* determine the upper limit for our current DCB mode */
440 rss_i
= dev
->num_tx_queues
/ tcs
;
441 if (adapter
->hw
.mac
.type
== ixgbe_mac_82598EB
) {
442 /* 8 TC w/ 4 queues per TC */
443 rss_i
= min_t(u16
, rss_i
, 4);
444 rss_m
= IXGBE_RSS_4Q_MASK
;
445 } else if (tcs
> 4) {
446 /* 8 TC w/ 8 queues per TC */
447 rss_i
= min_t(u16
, rss_i
, 8);
448 rss_m
= IXGBE_RSS_8Q_MASK
;
450 /* 4 TC w/ 16 queues per TC */
451 rss_i
= min_t(u16
, rss_i
, 16);
452 rss_m
= IXGBE_RSS_16Q_MASK
;
455 /* set RSS mask and indices */
456 f
= &adapter
->ring_feature
[RING_F_RSS
];
457 rss_i
= min_t(int, rss_i
, f
->limit
);
461 /* disable ATR as it is not supported when multiple TCs are enabled */
462 adapter
->flags
&= ~IXGBE_FLAG_FDIR_HASH_CAPABLE
;
465 /* FCoE enabled queues require special configuration indexed
466 * by feature specific indices and offset. Here we map FCoE
467 * indices onto the DCB queue pairs allowing FCoE to own
468 * configuration later.
470 if (adapter
->flags
& IXGBE_FLAG_FCOE_ENABLED
) {
471 u8 tc
= ixgbe_fcoe_get_tc(adapter
);
473 f
= &adapter
->ring_feature
[RING_F_FCOE
];
474 f
->indices
= min_t(u16
, rss_i
, f
->limit
);
475 f
->offset
= rss_i
* tc
;
478 #endif /* IXGBE_FCOE */
479 for (i
= 0; i
< tcs
; i
++)
480 netdev_set_tc_queue(dev
, i
, rss_i
, rss_i
* i
);
482 adapter
->num_tx_queues
= rss_i
* tcs
;
483 adapter
->num_xdp_queues
= 0;
484 adapter
->num_rx_queues
= rss_i
* tcs
;
491 * ixgbe_set_sriov_queues - Allocate queues for SR-IOV devices
492 * @adapter: board private structure to initialize
494 * When SR-IOV (Single Root IO Virtualiztion) is enabled, allocate queues
495 * and VM pools where appropriate. If RSS is available, then also try and
496 * enable RSS and map accordingly.
499 static bool ixgbe_set_sriov_queues(struct ixgbe_adapter
*adapter
)
501 u16 vmdq_i
= adapter
->ring_feature
[RING_F_VMDQ
].limit
;
503 u16 rss_i
= adapter
->ring_feature
[RING_F_RSS
].limit
;
504 u16 rss_m
= IXGBE_RSS_DISABLED_MASK
;
509 /* only proceed if SR-IOV is enabled */
510 if (!(adapter
->flags
& IXGBE_FLAG_SRIOV_ENABLED
))
513 /* limit l2fwd RSS based on total Tx queue limit */
514 rss_i
= min_t(u16
, rss_i
, MAX_TX_QUEUES
/ vmdq_i
);
516 /* Add starting offset to total pool count */
517 vmdq_i
+= adapter
->ring_feature
[RING_F_VMDQ
].offset
;
519 /* double check we are limited to maximum pools */
520 vmdq_i
= min_t(u16
, IXGBE_MAX_VMDQ_INDICES
, vmdq_i
);
522 /* 64 pool mode with 2 queues per pool */
524 vmdq_m
= IXGBE_82599_VMDQ_2Q_MASK
;
525 rss_m
= IXGBE_RSS_2Q_MASK
;
526 rss_i
= min_t(u16
, rss_i
, 2);
527 /* 32 pool mode with up to 4 queues per pool */
529 vmdq_m
= IXGBE_82599_VMDQ_4Q_MASK
;
530 rss_m
= IXGBE_RSS_4Q_MASK
;
531 /* We can support 4, 2, or 1 queues */
532 rss_i
= (rss_i
> 3) ? 4 : (rss_i
> 1) ? 2 : 1;
536 /* queues in the remaining pools are available for FCoE */
537 fcoe_i
= 128 - (vmdq_i
* __ALIGN_MASK(1, ~vmdq_m
));
540 /* remove the starting offset from the pool count */
541 vmdq_i
-= adapter
->ring_feature
[RING_F_VMDQ
].offset
;
543 /* save features for later use */
544 adapter
->ring_feature
[RING_F_VMDQ
].indices
= vmdq_i
;
545 adapter
->ring_feature
[RING_F_VMDQ
].mask
= vmdq_m
;
547 /* limit RSS based on user input and save for later use */
548 adapter
->ring_feature
[RING_F_RSS
].indices
= rss_i
;
549 adapter
->ring_feature
[RING_F_RSS
].mask
= rss_m
;
551 adapter
->num_rx_pools
= vmdq_i
;
552 adapter
->num_rx_queues_per_pool
= rss_i
;
554 adapter
->num_rx_queues
= vmdq_i
* rss_i
;
555 adapter
->num_tx_queues
= vmdq_i
* rss_i
;
556 adapter
->num_xdp_queues
= 0;
558 /* disable ATR as it is not supported when VMDq is enabled */
559 adapter
->flags
&= ~IXGBE_FLAG_FDIR_HASH_CAPABLE
;
563 * FCoE can use rings from adjacent buffers to allow RSS
564 * like behavior. To account for this we need to add the
565 * FCoE indices to the total ring count.
567 if (adapter
->flags
& IXGBE_FLAG_FCOE_ENABLED
) {
568 struct ixgbe_ring_feature
*fcoe
;
570 fcoe
= &adapter
->ring_feature
[RING_F_FCOE
];
572 /* limit ourselves based on feature limits */
573 fcoe_i
= min_t(u16
, fcoe_i
, fcoe
->limit
);
575 if (vmdq_i
> 1 && fcoe_i
) {
576 /* alloc queues for FCoE separately */
577 fcoe
->indices
= fcoe_i
;
578 fcoe
->offset
= vmdq_i
* rss_i
;
580 /* merge FCoE queues with RSS queues */
581 fcoe_i
= min_t(u16
, fcoe_i
+ rss_i
, num_online_cpus());
583 /* limit indices to rss_i if MSI-X is disabled */
584 if (!(adapter
->flags
& IXGBE_FLAG_MSIX_ENABLED
))
587 /* attempt to reserve some queues for just FCoE */
588 fcoe
->indices
= min_t(u16
, fcoe_i
, fcoe
->limit
);
589 fcoe
->offset
= fcoe_i
- fcoe
->indices
;
594 /* add queues to adapter */
595 adapter
->num_tx_queues
+= fcoe_i
;
596 adapter
->num_rx_queues
+= fcoe_i
;
600 /* To support macvlan offload we have to use num_tc to
601 * restrict the queues that can be used by the device.
602 * By doing this we can avoid reporting a false number of
606 netdev_set_num_tc(adapter
->netdev
, 1);
608 /* populate TC0 for use by pool 0 */
609 netdev_set_tc_queue(adapter
->netdev
, 0,
610 adapter
->num_rx_queues_per_pool
, 0);
616 * ixgbe_set_rss_queues - Allocate queues for RSS
617 * @adapter: board private structure to initialize
619 * This is our "base" multiqueue mode. RSS (Receive Side Scaling) will try
620 * to allocate one Rx queue per CPU, and if available, one Tx queue per CPU.
623 static bool ixgbe_set_rss_queues(struct ixgbe_adapter
*adapter
)
625 struct ixgbe_hw
*hw
= &adapter
->hw
;
626 struct ixgbe_ring_feature
*f
;
629 /* set mask for 16 queue limit of RSS */
630 f
= &adapter
->ring_feature
[RING_F_RSS
];
635 if (hw
->mac
.type
< ixgbe_mac_X550
)
636 f
->mask
= IXGBE_RSS_16Q_MASK
;
638 f
->mask
= IXGBE_RSS_64Q_MASK
;
640 /* disable ATR by default, it will be configured below */
641 adapter
->flags
&= ~IXGBE_FLAG_FDIR_HASH_CAPABLE
;
644 * Use Flow Director in addition to RSS to ensure the best
645 * distribution of flows across cores, even when an FDIR flow
648 if (rss_i
> 1 && adapter
->atr_sample_rate
) {
649 f
= &adapter
->ring_feature
[RING_F_FDIR
];
651 rss_i
= f
->indices
= f
->limit
;
653 if (!(adapter
->flags
& IXGBE_FLAG_FDIR_PERFECT_CAPABLE
))
654 adapter
->flags
|= IXGBE_FLAG_FDIR_HASH_CAPABLE
;
659 * FCoE can exist on the same rings as standard network traffic
660 * however it is preferred to avoid that if possible. In order
661 * to get the best performance we allocate as many FCoE queues
662 * as we can and we place them at the end of the ring array to
663 * avoid sharing queues with standard RSS on systems with 24 or
666 if (adapter
->flags
& IXGBE_FLAG_FCOE_ENABLED
) {
667 struct net_device
*dev
= adapter
->netdev
;
670 f
= &adapter
->ring_feature
[RING_F_FCOE
];
672 /* merge FCoE queues with RSS queues */
673 fcoe_i
= min_t(u16
, f
->limit
+ rss_i
, num_online_cpus());
674 fcoe_i
= min_t(u16
, fcoe_i
, dev
->num_tx_queues
);
676 /* limit indices to rss_i if MSI-X is disabled */
677 if (!(adapter
->flags
& IXGBE_FLAG_MSIX_ENABLED
))
680 /* attempt to reserve some queues for just FCoE */
681 f
->indices
= min_t(u16
, fcoe_i
, f
->limit
);
682 f
->offset
= fcoe_i
- f
->indices
;
683 rss_i
= max_t(u16
, fcoe_i
, rss_i
);
686 #endif /* IXGBE_FCOE */
687 adapter
->num_rx_queues
= rss_i
;
688 adapter
->num_tx_queues
= rss_i
;
689 adapter
->num_xdp_queues
= ixgbe_xdp_queues(adapter
);
695 * ixgbe_set_num_queues - Allocate queues for device, feature dependent
696 * @adapter: board private structure to initialize
698 * This is the top level queue allocation routine. The order here is very
699 * important, starting with the "most" number of features turned on at once,
700 * and ending with the smallest set of features. This way large combinations
701 * can be allocated if they're turned on, and smaller combinations are the
702 * fallthrough conditions.
705 static void ixgbe_set_num_queues(struct ixgbe_adapter
*adapter
)
707 /* Start with base case */
708 adapter
->num_rx_queues
= 1;
709 adapter
->num_tx_queues
= 1;
710 adapter
->num_xdp_queues
= 0;
711 adapter
->num_rx_pools
= 1;
712 adapter
->num_rx_queues_per_pool
= 1;
714 #ifdef CONFIG_IXGBE_DCB
715 if (ixgbe_set_dcb_sriov_queues(adapter
))
718 if (ixgbe_set_dcb_queues(adapter
))
722 if (ixgbe_set_sriov_queues(adapter
))
725 ixgbe_set_rss_queues(adapter
);
729 * ixgbe_acquire_msix_vectors - acquire MSI-X vectors
730 * @adapter: board private structure
732 * Attempts to acquire a suitable range of MSI-X vector interrupts. Will
733 * return a negative error code if unable to acquire MSI-X vectors for any
736 static int ixgbe_acquire_msix_vectors(struct ixgbe_adapter
*adapter
)
738 struct ixgbe_hw
*hw
= &adapter
->hw
;
739 int i
, vectors
, vector_threshold
;
741 /* We start by asking for one vector per queue pair with XDP queues
742 * being stacked with TX queues.
744 vectors
= max(adapter
->num_rx_queues
, adapter
->num_tx_queues
);
745 vectors
= max(vectors
, adapter
->num_xdp_queues
);
747 /* It is easy to be greedy for MSI-X vectors. However, it really
748 * doesn't do much good if we have a lot more vectors than CPUs. We'll
749 * be somewhat conservative and only ask for (roughly) the same number
750 * of vectors as there are CPUs.
752 vectors
= min_t(int, vectors
, num_online_cpus());
754 /* Some vectors are necessary for non-queue interrupts */
755 vectors
+= NON_Q_VECTORS
;
757 /* Hardware can only support a maximum of hw.mac->max_msix_vectors.
758 * With features such as RSS and VMDq, we can easily surpass the
759 * number of Rx and Tx descriptor queues supported by our device.
760 * Thus, we cap the maximum in the rare cases where the CPU count also
761 * exceeds our vector limit
763 vectors
= min_t(int, vectors
, hw
->mac
.max_msix_vectors
);
765 /* We want a minimum of two MSI-X vectors for (1) a TxQ[0] + RxQ[0]
766 * handler, and (2) an Other (Link Status Change, etc.) handler.
768 vector_threshold
= MIN_MSIX_COUNT
;
770 adapter
->msix_entries
= kcalloc(vectors
,
771 sizeof(struct msix_entry
),
773 if (!adapter
->msix_entries
)
776 for (i
= 0; i
< vectors
; i
++)
777 adapter
->msix_entries
[i
].entry
= i
;
779 vectors
= pci_enable_msix_range(adapter
->pdev
, adapter
->msix_entries
,
780 vector_threshold
, vectors
);
783 /* A negative count of allocated vectors indicates an error in
784 * acquiring within the specified range of MSI-X vectors
786 e_dev_warn("Failed to allocate MSI-X interrupts. Err: %d\n",
789 adapter
->flags
&= ~IXGBE_FLAG_MSIX_ENABLED
;
790 kfree(adapter
->msix_entries
);
791 adapter
->msix_entries
= NULL
;
796 /* we successfully allocated some number of vectors within our
799 adapter
->flags
|= IXGBE_FLAG_MSIX_ENABLED
;
801 /* Adjust for only the vectors we'll use, which is minimum
802 * of max_q_vectors, or the number of vectors we were allocated.
804 vectors
-= NON_Q_VECTORS
;
805 adapter
->num_q_vectors
= min_t(int, vectors
, adapter
->max_q_vectors
);
810 static void ixgbe_add_ring(struct ixgbe_ring
*ring
,
811 struct ixgbe_ring_container
*head
)
813 ring
->next
= head
->ring
;
816 head
->next_update
= jiffies
+ 1;
820 * ixgbe_alloc_q_vector - Allocate memory for a single interrupt vector
821 * @adapter: board private structure to initialize
822 * @v_count: q_vectors allocated on adapter, used for ring interleaving
823 * @v_idx: index of vector in adapter struct
824 * @txr_count: total number of Tx rings to allocate
825 * @txr_idx: index of first Tx ring to allocate
826 * @xdp_count: total number of XDP rings to allocate
827 * @xdp_idx: index of first XDP ring to allocate
828 * @rxr_count: total number of Rx rings to allocate
829 * @rxr_idx: index of first Rx ring to allocate
831 * We allocate one q_vector. If allocation fails we return -ENOMEM.
833 static int ixgbe_alloc_q_vector(struct ixgbe_adapter
*adapter
,
834 int v_count
, int v_idx
,
835 int txr_count
, int txr_idx
,
836 int xdp_count
, int xdp_idx
,
837 int rxr_count
, int rxr_idx
)
839 int node
= dev_to_node(&adapter
->pdev
->dev
);
840 struct ixgbe_q_vector
*q_vector
;
841 struct ixgbe_ring
*ring
;
844 u8 tcs
= adapter
->hw_tcs
;
846 ring_count
= txr_count
+ rxr_count
+ xdp_count
;
848 /* customize cpu for Flow Director mapping */
849 if ((tcs
<= 1) && !(adapter
->flags
& IXGBE_FLAG_SRIOV_ENABLED
)) {
850 u16 rss_i
= adapter
->ring_feature
[RING_F_RSS
].indices
;
851 if (rss_i
> 1 && adapter
->atr_sample_rate
) {
852 cpu
= cpumask_local_spread(v_idx
, node
);
853 node
= cpu_to_node(cpu
);
857 /* allocate q_vector and rings */
858 q_vector
= kzalloc_node(struct_size(q_vector
, ring
, ring_count
),
861 q_vector
= kzalloc(struct_size(q_vector
, ring
, ring_count
),
866 /* setup affinity mask and node */
868 cpumask_set_cpu(cpu
, &q_vector
->affinity_mask
);
869 q_vector
->numa_node
= node
;
871 #ifdef CONFIG_IXGBE_DCA
872 /* initialize CPU for DCA */
876 /* initialize NAPI */
877 netif_napi_add(adapter
->netdev
, &q_vector
->napi
, ixgbe_poll
);
879 /* tie q_vector and adapter together */
880 adapter
->q_vector
[v_idx
] = q_vector
;
881 q_vector
->adapter
= adapter
;
882 q_vector
->v_idx
= v_idx
;
884 /* initialize work limits */
885 q_vector
->tx
.work_limit
= adapter
->tx_work_limit
;
887 /* Initialize setting for adaptive ITR */
888 q_vector
->tx
.itr
= IXGBE_ITR_ADAPTIVE_MAX_USECS
|
889 IXGBE_ITR_ADAPTIVE_LATENCY
;
890 q_vector
->rx
.itr
= IXGBE_ITR_ADAPTIVE_MAX_USECS
|
891 IXGBE_ITR_ADAPTIVE_LATENCY
;
894 if (txr_count
&& !rxr_count
) {
896 if (adapter
->tx_itr_setting
== 1)
897 q_vector
->itr
= IXGBE_12K_ITR
;
899 q_vector
->itr
= adapter
->tx_itr_setting
;
901 /* rx or rx/tx vector */
902 if (adapter
->rx_itr_setting
== 1)
903 q_vector
->itr
= IXGBE_20K_ITR
;
905 q_vector
->itr
= adapter
->rx_itr_setting
;
908 /* initialize pointer to rings */
909 ring
= q_vector
->ring
;
912 /* assign generic ring traits */
913 ring
->dev
= &adapter
->pdev
->dev
;
914 ring
->netdev
= adapter
->netdev
;
916 /* configure backlink on ring */
917 ring
->q_vector
= q_vector
;
919 /* update q_vector Tx values */
920 ixgbe_add_ring(ring
, &q_vector
->tx
);
922 /* apply Tx specific ring traits */
923 ring
->count
= adapter
->tx_ring_count
;
924 ring
->queue_index
= txr_idx
;
926 /* assign ring to adapter */
927 WRITE_ONCE(adapter
->tx_ring
[txr_idx
], ring
);
929 /* update count and index */
933 /* push pointer to next ring */
938 /* assign generic ring traits */
939 ring
->dev
= &adapter
->pdev
->dev
;
940 ring
->netdev
= adapter
->netdev
;
942 /* configure backlink on ring */
943 ring
->q_vector
= q_vector
;
945 /* update q_vector Tx values */
946 ixgbe_add_ring(ring
, &q_vector
->tx
);
948 /* apply Tx specific ring traits */
949 ring
->count
= adapter
->tx_ring_count
;
950 ring
->queue_index
= xdp_idx
;
952 spin_lock_init(&ring
->tx_lock
);
954 /* assign ring to adapter */
955 WRITE_ONCE(adapter
->xdp_ring
[xdp_idx
], ring
);
957 /* update count and index */
961 /* push pointer to next ring */
966 /* assign generic ring traits */
967 ring
->dev
= &adapter
->pdev
->dev
;
968 ring
->netdev
= adapter
->netdev
;
970 /* configure backlink on ring */
971 ring
->q_vector
= q_vector
;
973 /* update q_vector Rx values */
974 ixgbe_add_ring(ring
, &q_vector
->rx
);
977 * 82599 errata, UDP frames with a 0 checksum
978 * can be marked as checksum errors.
980 if (adapter
->hw
.mac
.type
== ixgbe_mac_82599EB
)
981 set_bit(__IXGBE_RX_CSUM_UDP_ZERO_ERR
, &ring
->state
);
984 if (adapter
->netdev
->fcoe_mtu
) {
985 struct ixgbe_ring_feature
*f
;
986 f
= &adapter
->ring_feature
[RING_F_FCOE
];
987 if ((rxr_idx
>= f
->offset
) &&
988 (rxr_idx
< f
->offset
+ f
->indices
))
989 set_bit(__IXGBE_RX_FCOE
, &ring
->state
);
992 #endif /* IXGBE_FCOE */
993 /* apply Rx specific ring traits */
994 ring
->count
= adapter
->rx_ring_count
;
995 ring
->queue_index
= rxr_idx
;
997 /* assign ring to adapter */
998 WRITE_ONCE(adapter
->rx_ring
[rxr_idx
], ring
);
1000 /* update count and index */
1004 /* push pointer to next ring */
1012 * ixgbe_free_q_vector - Free memory allocated for specific interrupt vector
1013 * @adapter: board private structure to initialize
1014 * @v_idx: Index of vector to be freed
1016 * This function frees the memory allocated to the q_vector. In addition if
1017 * NAPI is enabled it will delete any references to the NAPI struct prior
1018 * to freeing the q_vector.
1020 static void ixgbe_free_q_vector(struct ixgbe_adapter
*adapter
, int v_idx
)
1022 struct ixgbe_q_vector
*q_vector
= adapter
->q_vector
[v_idx
];
1023 struct ixgbe_ring
*ring
;
1025 ixgbe_for_each_ring(ring
, q_vector
->tx
) {
1026 if (ring_is_xdp(ring
))
1027 WRITE_ONCE(adapter
->xdp_ring
[ring
->queue_index
], NULL
);
1029 WRITE_ONCE(adapter
->tx_ring
[ring
->queue_index
], NULL
);
1032 ixgbe_for_each_ring(ring
, q_vector
->rx
)
1033 WRITE_ONCE(adapter
->rx_ring
[ring
->queue_index
], NULL
);
1035 adapter
->q_vector
[v_idx
] = NULL
;
1036 __netif_napi_del(&q_vector
->napi
);
1039 * after a call to __netif_napi_del() napi may still be used and
1040 * ixgbe_get_stats64() might access the rings on this vector,
1041 * we must wait a grace period before freeing it.
1043 kfree_rcu(q_vector
, rcu
);
1047 * ixgbe_alloc_q_vectors - Allocate memory for interrupt vectors
1048 * @adapter: board private structure to initialize
1050 * We allocate one q_vector per queue interrupt. If allocation fails we
1053 static int ixgbe_alloc_q_vectors(struct ixgbe_adapter
*adapter
)
1055 int q_vectors
= adapter
->num_q_vectors
;
1056 int rxr_remaining
= adapter
->num_rx_queues
;
1057 int txr_remaining
= adapter
->num_tx_queues
;
1058 int xdp_remaining
= adapter
->num_xdp_queues
;
1059 int rxr_idx
= 0, txr_idx
= 0, xdp_idx
= 0, v_idx
= 0;
1062 /* only one q_vector if MSI-X is disabled. */
1063 if (!(adapter
->flags
& IXGBE_FLAG_MSIX_ENABLED
))
1066 if (q_vectors
>= (rxr_remaining
+ txr_remaining
+ xdp_remaining
)) {
1067 for (; rxr_remaining
; v_idx
++) {
1068 err
= ixgbe_alloc_q_vector(adapter
, q_vectors
, v_idx
,
1069 0, 0, 0, 0, 1, rxr_idx
);
1074 /* update counts and index */
1080 for (; v_idx
< q_vectors
; v_idx
++) {
1081 int rqpv
= DIV_ROUND_UP(rxr_remaining
, q_vectors
- v_idx
);
1082 int tqpv
= DIV_ROUND_UP(txr_remaining
, q_vectors
- v_idx
);
1083 int xqpv
= DIV_ROUND_UP(xdp_remaining
, q_vectors
- v_idx
);
1085 err
= ixgbe_alloc_q_vector(adapter
, q_vectors
, v_idx
,
1093 /* update counts and index */
1094 rxr_remaining
-= rqpv
;
1095 txr_remaining
-= tqpv
;
1096 xdp_remaining
-= xqpv
;
1102 for (i
= 0; i
< adapter
->num_rx_queues
; i
++) {
1103 if (adapter
->rx_ring
[i
])
1104 adapter
->rx_ring
[i
]->ring_idx
= i
;
1107 for (i
= 0; i
< adapter
->num_tx_queues
; i
++) {
1108 if (adapter
->tx_ring
[i
])
1109 adapter
->tx_ring
[i
]->ring_idx
= i
;
1112 for (i
= 0; i
< adapter
->num_xdp_queues
; i
++) {
1113 if (adapter
->xdp_ring
[i
])
1114 adapter
->xdp_ring
[i
]->ring_idx
= i
;
1120 adapter
->num_tx_queues
= 0;
1121 adapter
->num_xdp_queues
= 0;
1122 adapter
->num_rx_queues
= 0;
1123 adapter
->num_q_vectors
= 0;
1126 ixgbe_free_q_vector(adapter
, v_idx
);
1132 * ixgbe_free_q_vectors - Free memory allocated for interrupt vectors
1133 * @adapter: board private structure to initialize
1135 * This function frees the memory allocated to the q_vectors. In addition if
1136 * NAPI is enabled it will delete any references to the NAPI struct prior
1137 * to freeing the q_vector.
1139 static void ixgbe_free_q_vectors(struct ixgbe_adapter
*adapter
)
1141 int v_idx
= adapter
->num_q_vectors
;
1143 adapter
->num_tx_queues
= 0;
1144 adapter
->num_xdp_queues
= 0;
1145 adapter
->num_rx_queues
= 0;
1146 adapter
->num_q_vectors
= 0;
1149 ixgbe_free_q_vector(adapter
, v_idx
);
1152 static void ixgbe_reset_interrupt_capability(struct ixgbe_adapter
*adapter
)
1154 if (adapter
->flags
& IXGBE_FLAG_MSIX_ENABLED
) {
1155 adapter
->flags
&= ~IXGBE_FLAG_MSIX_ENABLED
;
1156 pci_disable_msix(adapter
->pdev
);
1157 kfree(adapter
->msix_entries
);
1158 adapter
->msix_entries
= NULL
;
1159 } else if (adapter
->flags
& IXGBE_FLAG_MSI_ENABLED
) {
1160 adapter
->flags
&= ~IXGBE_FLAG_MSI_ENABLED
;
1161 pci_disable_msi(adapter
->pdev
);
1166 * ixgbe_set_interrupt_capability - set MSI-X or MSI if supported
1167 * @adapter: board private structure to initialize
1169 * Attempt to configure the interrupts using the best available
1170 * capabilities of the hardware and the kernel.
1172 static void ixgbe_set_interrupt_capability(struct ixgbe_adapter
*adapter
)
1176 /* We will try to get MSI-X interrupts first */
1177 if (!ixgbe_acquire_msix_vectors(adapter
))
1180 /* At this point, we do not have MSI-X capabilities. We need to
1181 * reconfigure or disable various features which require MSI-X
1185 /* Disable DCB unless we only have a single traffic class */
1186 if (adapter
->hw_tcs
> 1) {
1187 e_dev_warn("Number of DCB TCs exceeds number of available queues. Disabling DCB support.\n");
1188 netdev_reset_tc(adapter
->netdev
);
1190 if (adapter
->hw
.mac
.type
== ixgbe_mac_82598EB
)
1191 adapter
->hw
.fc
.requested_mode
= adapter
->last_lfc_mode
;
1193 adapter
->flags
&= ~IXGBE_FLAG_DCB_ENABLED
;
1194 adapter
->temp_dcb_cfg
.pfc_mode_enable
= false;
1195 adapter
->dcb_cfg
.pfc_mode_enable
= false;
1198 adapter
->hw_tcs
= 0;
1199 adapter
->dcb_cfg
.num_tcs
.pg_tcs
= 1;
1200 adapter
->dcb_cfg
.num_tcs
.pfc_tcs
= 1;
1202 /* Disable SR-IOV support */
1203 e_dev_warn("Disabling SR-IOV support\n");
1204 ixgbe_disable_sriov(adapter
);
1207 e_dev_warn("Disabling RSS support\n");
1208 adapter
->ring_feature
[RING_F_RSS
].limit
= 1;
1210 /* recalculate number of queues now that many features have been
1211 * changed or disabled.
1213 ixgbe_set_num_queues(adapter
);
1214 adapter
->num_q_vectors
= 1;
1216 err
= pci_enable_msi(adapter
->pdev
);
1218 e_dev_warn("Failed to allocate MSI interrupt, falling back to legacy. Error: %d\n",
1221 adapter
->flags
|= IXGBE_FLAG_MSI_ENABLED
;
1225 * ixgbe_init_interrupt_scheme - Determine proper interrupt scheme
1226 * @adapter: board private structure to initialize
1228 * We determine which interrupt scheme to use based on...
1229 * - Kernel support (MSI, MSI-X)
1230 * - which can be user-defined (via MODULE_PARAM)
1231 * - Hardware queue count (num_*_queues)
1232 * - defined by miscellaneous hardware support/features (RSS, etc.)
1234 int ixgbe_init_interrupt_scheme(struct ixgbe_adapter
*adapter
)
1238 /* Number of supported queues */
1239 ixgbe_set_num_queues(adapter
);
1241 /* Set interrupt mode */
1242 ixgbe_set_interrupt_capability(adapter
);
1244 err
= ixgbe_alloc_q_vectors(adapter
);
1246 e_dev_err("Unable to allocate memory for queue vectors\n");
1247 goto err_alloc_q_vectors
;
1250 ixgbe_cache_ring_register(adapter
);
1252 e_dev_info("Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u XDP Queue count = %u\n",
1253 (adapter
->num_rx_queues
> 1) ? "Enabled" : "Disabled",
1254 adapter
->num_rx_queues
, adapter
->num_tx_queues
,
1255 adapter
->num_xdp_queues
);
1257 set_bit(__IXGBE_DOWN
, &adapter
->state
);
1261 err_alloc_q_vectors
:
1262 ixgbe_reset_interrupt_capability(adapter
);
1267 * ixgbe_clear_interrupt_scheme - Clear the current interrupt scheme settings
1268 * @adapter: board private structure to clear interrupt scheme on
1270 * We go through and clear interrupt specific resources and reset the structure
1271 * to pre-load conditions
1273 void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter
*adapter
)
1275 adapter
->num_tx_queues
= 0;
1276 adapter
->num_xdp_queues
= 0;
1277 adapter
->num_rx_queues
= 0;
1279 ixgbe_free_q_vectors(adapter
);
1280 ixgbe_reset_interrupt_capability(adapter
);
1283 void ixgbe_tx_ctxtdesc(struct ixgbe_ring
*tx_ring
, u32 vlan_macip_lens
,
1284 u32 fceof_saidx
, u32 type_tucmd
, u32 mss_l4len_idx
)
1286 struct ixgbe_adv_tx_context_desc
*context_desc
;
1287 u16 i
= tx_ring
->next_to_use
;
1289 context_desc
= IXGBE_TX_CTXTDESC(tx_ring
, i
);
1292 tx_ring
->next_to_use
= (i
< tx_ring
->count
) ? i
: 0;
1294 /* set bits to identify this as an advanced context descriptor */
1295 type_tucmd
|= IXGBE_TXD_CMD_DEXT
| IXGBE_ADVTXD_DTYP_CTXT
;
1297 context_desc
->vlan_macip_lens
= cpu_to_le32(vlan_macip_lens
);
1298 context_desc
->fceof_saidx
= cpu_to_le32(fceof_saidx
);
1299 context_desc
->type_tucmd_mlhl
= cpu_to_le32(type_tucmd
);
1300 context_desc
->mss_l4len_idx
= cpu_to_le32(mss_l4len_idx
);