1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 1999 - 2018 Intel Corporation. */
5 #include "ixgbe_sriov.h"
7 #ifdef CONFIG_IXGBE_DCB
9 * ixgbe_cache_ring_dcb_sriov - Descriptor ring to register mapping for SR-IOV
10 * @adapter: board private structure to initialize
12 * Cache the descriptor ring offsets for SR-IOV to the assigned rings. It
13 * will also try to cache the proper offsets if RSS/FCoE are enabled along
17 static bool ixgbe_cache_ring_dcb_sriov(struct ixgbe_adapter
*adapter
)
20 struct ixgbe_ring_feature
*fcoe
= &adapter
->ring_feature
[RING_F_FCOE
];
21 #endif /* IXGBE_FCOE */
22 struct ixgbe_ring_feature
*vmdq
= &adapter
->ring_feature
[RING_F_VMDQ
];
25 u8 tcs
= adapter
->hw_tcs
;
27 /* verify we have DCB queueing enabled before proceeding */
31 /* verify we have VMDq enabled before proceeding */
32 if (!(adapter
->flags
& IXGBE_FLAG_SRIOV_ENABLED
))
35 /* start at VMDq register offset for SR-IOV enabled setups */
36 reg_idx
= vmdq
->offset
* __ALIGN_MASK(1, ~vmdq
->mask
);
37 for (i
= 0, pool
= 0; i
< adapter
->num_rx_queues
; i
++, reg_idx
++) {
38 /* If we are greater than indices move to next pool */
39 if ((reg_idx
& ~vmdq
->mask
) >= tcs
) {
41 reg_idx
= __ALIGN_MASK(reg_idx
, ~vmdq
->mask
);
43 adapter
->rx_ring
[i
]->reg_idx
= reg_idx
;
44 adapter
->rx_ring
[i
]->netdev
= pool
? NULL
: adapter
->netdev
;
47 reg_idx
= vmdq
->offset
* __ALIGN_MASK(1, ~vmdq
->mask
);
48 for (i
= 0; i
< adapter
->num_tx_queues
; i
++, reg_idx
++) {
49 /* If we are greater than indices move to next pool */
50 if ((reg_idx
& ~vmdq
->mask
) >= tcs
)
51 reg_idx
= __ALIGN_MASK(reg_idx
, ~vmdq
->mask
);
52 adapter
->tx_ring
[i
]->reg_idx
= reg_idx
;
56 /* nothing to do if FCoE is disabled */
57 if (!(adapter
->flags
& IXGBE_FLAG_FCOE_ENABLED
))
60 /* The work is already done if the FCoE ring is shared */
61 if (fcoe
->offset
< tcs
)
64 /* The FCoE rings exist separately, we need to move their reg_idx */
66 u16 queues_per_pool
= __ALIGN_MASK(1, ~vmdq
->mask
);
67 u8 fcoe_tc
= ixgbe_fcoe_get_tc(adapter
);
69 reg_idx
= (vmdq
->offset
+ vmdq
->indices
) * queues_per_pool
;
70 for (i
= fcoe
->offset
; i
< adapter
->num_rx_queues
; i
++) {
71 reg_idx
= __ALIGN_MASK(reg_idx
, ~vmdq
->mask
) + fcoe_tc
;
72 adapter
->rx_ring
[i
]->reg_idx
= reg_idx
;
73 adapter
->rx_ring
[i
]->netdev
= adapter
->netdev
;
77 reg_idx
= (vmdq
->offset
+ vmdq
->indices
) * queues_per_pool
;
78 for (i
= fcoe
->offset
; i
< adapter
->num_tx_queues
; i
++) {
79 reg_idx
= __ALIGN_MASK(reg_idx
, ~vmdq
->mask
) + fcoe_tc
;
80 adapter
->tx_ring
[i
]->reg_idx
= reg_idx
;
85 #endif /* IXGBE_FCOE */
89 /* ixgbe_get_first_reg_idx - Return first register index associated with ring */
90 static void ixgbe_get_first_reg_idx(struct ixgbe_adapter
*adapter
, u8 tc
,
91 unsigned int *tx
, unsigned int *rx
)
93 struct ixgbe_hw
*hw
= &adapter
->hw
;
94 u8 num_tcs
= adapter
->hw_tcs
;
99 switch (hw
->mac
.type
) {
100 case ixgbe_mac_82598EB
:
101 /* TxQs/TC: 4 RxQs/TC: 8 */
102 *tx
= tc
<< 2; /* 0, 4, 8, 12, 16, 20, 24, 28 */
103 *rx
= tc
<< 3; /* 0, 8, 16, 24, 32, 40, 48, 56 */
105 case ixgbe_mac_82599EB
:
108 case ixgbe_mac_X550EM_x
:
109 case ixgbe_mac_x550em_a
:
112 * TCs : TC0/1 TC2/3 TC4-7
118 *tx
= tc
<< 5; /* 0, 32, 64 */
120 *tx
= (tc
+ 2) << 4; /* 80, 96 */
122 *tx
= (tc
+ 8) << 3; /* 104, 112, 120 */
125 * TCs : TC0 TC1 TC2/3
131 *tx
= tc
<< 6; /* 0, 64 */
133 *tx
= (tc
+ 4) << 4; /* 96, 112 */
141 * ixgbe_cache_ring_dcb - Descriptor ring to register mapping for DCB
142 * @adapter: board private structure to initialize
144 * Cache the descriptor ring offsets for DCB to the assigned rings.
147 static bool ixgbe_cache_ring_dcb(struct ixgbe_adapter
*adapter
)
149 u8 num_tcs
= adapter
->hw_tcs
;
150 unsigned int tx_idx
, rx_idx
;
151 int tc
, offset
, rss_i
, i
;
153 /* verify we have DCB queueing enabled before proceeding */
157 rss_i
= adapter
->ring_feature
[RING_F_RSS
].indices
;
159 for (tc
= 0, offset
= 0; tc
< num_tcs
; tc
++, offset
+= rss_i
) {
160 ixgbe_get_first_reg_idx(adapter
, tc
, &tx_idx
, &rx_idx
);
161 for (i
= 0; i
< rss_i
; i
++, tx_idx
++, rx_idx
++) {
162 adapter
->tx_ring
[offset
+ i
]->reg_idx
= tx_idx
;
163 adapter
->rx_ring
[offset
+ i
]->reg_idx
= rx_idx
;
164 adapter
->rx_ring
[offset
+ i
]->netdev
= adapter
->netdev
;
165 adapter
->tx_ring
[offset
+ i
]->dcb_tc
= tc
;
166 adapter
->rx_ring
[offset
+ i
]->dcb_tc
= tc
;
175 * ixgbe_cache_ring_sriov - Descriptor ring to register mapping for sriov
176 * @adapter: board private structure to initialize
178 * SR-IOV doesn't use any descriptor rings but changes the default if
179 * no other mapping is used.
182 static bool ixgbe_cache_ring_sriov(struct ixgbe_adapter
*adapter
)
185 struct ixgbe_ring_feature
*fcoe
= &adapter
->ring_feature
[RING_F_FCOE
];
186 #endif /* IXGBE_FCOE */
187 struct ixgbe_ring_feature
*vmdq
= &adapter
->ring_feature
[RING_F_VMDQ
];
188 struct ixgbe_ring_feature
*rss
= &adapter
->ring_feature
[RING_F_RSS
];
192 /* only proceed if VMDq is enabled */
193 if (!(adapter
->flags
& IXGBE_FLAG_VMDQ_ENABLED
))
196 /* start at VMDq register offset for SR-IOV enabled setups */
198 reg_idx
= vmdq
->offset
* __ALIGN_MASK(1, ~vmdq
->mask
);
199 for (i
= 0; i
< adapter
->num_rx_queues
; i
++, reg_idx
++) {
201 /* Allow first FCoE queue to be mapped as RSS */
202 if (fcoe
->offset
&& (i
> fcoe
->offset
))
205 /* If we are greater than indices move to next pool */
206 if ((reg_idx
& ~vmdq
->mask
) >= rss
->indices
) {
208 reg_idx
= __ALIGN_MASK(reg_idx
, ~vmdq
->mask
);
210 adapter
->rx_ring
[i
]->reg_idx
= reg_idx
;
211 adapter
->rx_ring
[i
]->netdev
= pool
? NULL
: adapter
->netdev
;
215 /* FCoE uses a linear block of queues so just assigning 1:1 */
216 for (; i
< adapter
->num_rx_queues
; i
++, reg_idx
++) {
217 adapter
->rx_ring
[i
]->reg_idx
= reg_idx
;
218 adapter
->rx_ring
[i
]->netdev
= adapter
->netdev
;
222 reg_idx
= vmdq
->offset
* __ALIGN_MASK(1, ~vmdq
->mask
);
223 for (i
= 0; i
< adapter
->num_tx_queues
; i
++, reg_idx
++) {
225 /* Allow first FCoE queue to be mapped as RSS */
226 if (fcoe
->offset
&& (i
> fcoe
->offset
))
229 /* If we are greater than indices move to next pool */
230 if ((reg_idx
& rss
->mask
) >= rss
->indices
)
231 reg_idx
= __ALIGN_MASK(reg_idx
, ~vmdq
->mask
);
232 adapter
->tx_ring
[i
]->reg_idx
= reg_idx
;
236 /* FCoE uses a linear block of queues so just assigning 1:1 */
237 for (; i
< adapter
->num_tx_queues
; i
++, reg_idx
++)
238 adapter
->tx_ring
[i
]->reg_idx
= reg_idx
;
246 * ixgbe_cache_ring_rss - Descriptor ring to register mapping for RSS
247 * @adapter: board private structure to initialize
249 * Cache the descriptor ring offsets for RSS to the assigned rings.
252 static bool ixgbe_cache_ring_rss(struct ixgbe_adapter
*adapter
)
256 for (i
= 0; i
< adapter
->num_rx_queues
; i
++) {
257 adapter
->rx_ring
[i
]->reg_idx
= i
;
258 adapter
->rx_ring
[i
]->netdev
= adapter
->netdev
;
260 for (i
= 0, reg_idx
= 0; i
< adapter
->num_tx_queues
; i
++, reg_idx
++)
261 adapter
->tx_ring
[i
]->reg_idx
= reg_idx
;
262 for (i
= 0; i
< adapter
->num_xdp_queues
; i
++, reg_idx
++)
263 adapter
->xdp_ring
[i
]->reg_idx
= reg_idx
;
269 * ixgbe_cache_ring_register - Descriptor ring to register mapping
270 * @adapter: board private structure to initialize
272 * Once we know the feature-set enabled for the device, we'll cache
273 * the register offset the descriptor ring is assigned to.
275 * Note, the order the various feature calls is important. It must start with
276 * the "most" features enabled at the same time, then trickle down to the
277 * least amount of features turned on at once.
279 static void ixgbe_cache_ring_register(struct ixgbe_adapter
*adapter
)
281 /* start with default case */
282 adapter
->rx_ring
[0]->reg_idx
= 0;
283 adapter
->tx_ring
[0]->reg_idx
= 0;
285 #ifdef CONFIG_IXGBE_DCB
286 if (ixgbe_cache_ring_dcb_sriov(adapter
))
289 if (ixgbe_cache_ring_dcb(adapter
))
293 if (ixgbe_cache_ring_sriov(adapter
))
296 ixgbe_cache_ring_rss(adapter
);
299 static int ixgbe_xdp_queues(struct ixgbe_adapter
*adapter
)
301 return adapter
->xdp_prog
? nr_cpu_ids
: 0;
304 #define IXGBE_RSS_64Q_MASK 0x3F
305 #define IXGBE_RSS_16Q_MASK 0xF
306 #define IXGBE_RSS_8Q_MASK 0x7
307 #define IXGBE_RSS_4Q_MASK 0x3
308 #define IXGBE_RSS_2Q_MASK 0x1
309 #define IXGBE_RSS_DISABLED_MASK 0x0
311 #ifdef CONFIG_IXGBE_DCB
313 * ixgbe_set_dcb_sriov_queues: Allocate queues for SR-IOV devices w/ DCB
314 * @adapter: board private structure to initialize
316 * When SR-IOV (Single Root IO Virtualiztion) is enabled, allocate queues
317 * and VM pools where appropriate. Also assign queues based on DCB
318 * priorities and map accordingly..
321 static bool ixgbe_set_dcb_sriov_queues(struct ixgbe_adapter
*adapter
)
324 u16 vmdq_i
= adapter
->ring_feature
[RING_F_VMDQ
].limit
;
329 u8 tcs
= adapter
->hw_tcs
;
331 /* verify we have DCB queueing enabled before proceeding */
335 /* verify we have VMDq enabled before proceeding */
336 if (!(adapter
->flags
& IXGBE_FLAG_SRIOV_ENABLED
))
339 /* limit VMDq instances on the PF by number of Tx queues */
340 vmdq_i
= min_t(u16
, vmdq_i
, MAX_TX_QUEUES
/ tcs
);
342 /* Add starting offset to total pool count */
343 vmdq_i
+= adapter
->ring_feature
[RING_F_VMDQ
].offset
;
345 /* 16 pools w/ 8 TC per pool */
347 vmdq_i
= min_t(u16
, vmdq_i
, 16);
348 vmdq_m
= IXGBE_82599_VMDQ_8Q_MASK
;
349 /* 32 pools w/ 4 TC per pool */
351 vmdq_i
= min_t(u16
, vmdq_i
, 32);
352 vmdq_m
= IXGBE_82599_VMDQ_4Q_MASK
;
356 /* queues in the remaining pools are available for FCoE */
357 fcoe_i
= (128 / __ALIGN_MASK(1, ~vmdq_m
)) - vmdq_i
;
360 /* remove the starting offset from the pool count */
361 vmdq_i
-= adapter
->ring_feature
[RING_F_VMDQ
].offset
;
363 /* save features for later use */
364 adapter
->ring_feature
[RING_F_VMDQ
].indices
= vmdq_i
;
365 adapter
->ring_feature
[RING_F_VMDQ
].mask
= vmdq_m
;
368 * We do not support DCB, VMDq, and RSS all simultaneously
369 * so we will disable RSS since it is the lowest priority
371 adapter
->ring_feature
[RING_F_RSS
].indices
= 1;
372 adapter
->ring_feature
[RING_F_RSS
].mask
= IXGBE_RSS_DISABLED_MASK
;
374 /* disable ATR as it is not supported when VMDq is enabled */
375 adapter
->flags
&= ~IXGBE_FLAG_FDIR_HASH_CAPABLE
;
377 adapter
->num_rx_pools
= vmdq_i
;
378 adapter
->num_rx_queues_per_pool
= tcs
;
380 adapter
->num_tx_queues
= vmdq_i
* tcs
;
381 adapter
->num_xdp_queues
= 0;
382 adapter
->num_rx_queues
= vmdq_i
* tcs
;
385 if (adapter
->flags
& IXGBE_FLAG_FCOE_ENABLED
) {
386 struct ixgbe_ring_feature
*fcoe
;
388 fcoe
= &adapter
->ring_feature
[RING_F_FCOE
];
390 /* limit ourselves based on feature limits */
391 fcoe_i
= min_t(u16
, fcoe_i
, fcoe
->limit
);
394 /* alloc queues for FCoE separately */
395 fcoe
->indices
= fcoe_i
;
396 fcoe
->offset
= vmdq_i
* tcs
;
398 /* add queues to adapter */
399 adapter
->num_tx_queues
+= fcoe_i
;
400 adapter
->num_rx_queues
+= fcoe_i
;
401 } else if (tcs
> 1) {
402 /* use queue belonging to FcoE TC */
404 fcoe
->offset
= ixgbe_fcoe_get_tc(adapter
);
406 adapter
->flags
&= ~IXGBE_FLAG_FCOE_ENABLED
;
413 #endif /* IXGBE_FCOE */
414 /* configure TC to queue mapping */
415 for (i
= 0; i
< tcs
; i
++)
416 netdev_set_tc_queue(adapter
->netdev
, i
, 1, i
);
421 static bool ixgbe_set_dcb_queues(struct ixgbe_adapter
*adapter
)
423 struct net_device
*dev
= adapter
->netdev
;
424 struct ixgbe_ring_feature
*f
;
428 /* Map queue offset and counts onto allocated tx queues */
429 tcs
= adapter
->hw_tcs
;
431 /* verify we have DCB queueing enabled before proceeding */
435 /* determine the upper limit for our current DCB mode */
436 rss_i
= dev
->num_tx_queues
/ tcs
;
437 if (adapter
->hw
.mac
.type
== ixgbe_mac_82598EB
) {
438 /* 8 TC w/ 4 queues per TC */
439 rss_i
= min_t(u16
, rss_i
, 4);
440 rss_m
= IXGBE_RSS_4Q_MASK
;
441 } else if (tcs
> 4) {
442 /* 8 TC w/ 8 queues per TC */
443 rss_i
= min_t(u16
, rss_i
, 8);
444 rss_m
= IXGBE_RSS_8Q_MASK
;
446 /* 4 TC w/ 16 queues per TC */
447 rss_i
= min_t(u16
, rss_i
, 16);
448 rss_m
= IXGBE_RSS_16Q_MASK
;
451 /* set RSS mask and indices */
452 f
= &adapter
->ring_feature
[RING_F_RSS
];
453 rss_i
= min_t(int, rss_i
, f
->limit
);
457 /* disable ATR as it is not supported when multiple TCs are enabled */
458 adapter
->flags
&= ~IXGBE_FLAG_FDIR_HASH_CAPABLE
;
461 /* FCoE enabled queues require special configuration indexed
462 * by feature specific indices and offset. Here we map FCoE
463 * indices onto the DCB queue pairs allowing FCoE to own
464 * configuration later.
466 if (adapter
->flags
& IXGBE_FLAG_FCOE_ENABLED
) {
467 u8 tc
= ixgbe_fcoe_get_tc(adapter
);
469 f
= &adapter
->ring_feature
[RING_F_FCOE
];
470 f
->indices
= min_t(u16
, rss_i
, f
->limit
);
471 f
->offset
= rss_i
* tc
;
474 #endif /* IXGBE_FCOE */
475 for (i
= 0; i
< tcs
; i
++)
476 netdev_set_tc_queue(dev
, i
, rss_i
, rss_i
* i
);
478 adapter
->num_tx_queues
= rss_i
* tcs
;
479 adapter
->num_xdp_queues
= 0;
480 adapter
->num_rx_queues
= rss_i
* tcs
;
487 * ixgbe_set_sriov_queues - Allocate queues for SR-IOV devices
488 * @adapter: board private structure to initialize
490 * When SR-IOV (Single Root IO Virtualiztion) is enabled, allocate queues
491 * and VM pools where appropriate. If RSS is available, then also try and
492 * enable RSS and map accordingly.
495 static bool ixgbe_set_sriov_queues(struct ixgbe_adapter
*adapter
)
497 u16 vmdq_i
= adapter
->ring_feature
[RING_F_VMDQ
].limit
;
499 u16 rss_i
= adapter
->ring_feature
[RING_F_RSS
].limit
;
500 u16 rss_m
= IXGBE_RSS_DISABLED_MASK
;
505 /* only proceed if SR-IOV is enabled */
506 if (!(adapter
->flags
& IXGBE_FLAG_SRIOV_ENABLED
))
509 /* limit l2fwd RSS based on total Tx queue limit */
510 rss_i
= min_t(u16
, rss_i
, MAX_TX_QUEUES
/ vmdq_i
);
512 /* Add starting offset to total pool count */
513 vmdq_i
+= adapter
->ring_feature
[RING_F_VMDQ
].offset
;
515 /* double check we are limited to maximum pools */
516 vmdq_i
= min_t(u16
, IXGBE_MAX_VMDQ_INDICES
, vmdq_i
);
518 /* 64 pool mode with 2 queues per pool */
520 vmdq_m
= IXGBE_82599_VMDQ_2Q_MASK
;
521 rss_m
= IXGBE_RSS_2Q_MASK
;
522 rss_i
= min_t(u16
, rss_i
, 2);
523 /* 32 pool mode with up to 4 queues per pool */
525 vmdq_m
= IXGBE_82599_VMDQ_4Q_MASK
;
526 rss_m
= IXGBE_RSS_4Q_MASK
;
527 /* We can support 4, 2, or 1 queues */
528 rss_i
= (rss_i
> 3) ? 4 : (rss_i
> 1) ? 2 : 1;
532 /* queues in the remaining pools are available for FCoE */
533 fcoe_i
= 128 - (vmdq_i
* __ALIGN_MASK(1, ~vmdq_m
));
536 /* remove the starting offset from the pool count */
537 vmdq_i
-= adapter
->ring_feature
[RING_F_VMDQ
].offset
;
539 /* save features for later use */
540 adapter
->ring_feature
[RING_F_VMDQ
].indices
= vmdq_i
;
541 adapter
->ring_feature
[RING_F_VMDQ
].mask
= vmdq_m
;
543 /* limit RSS based on user input and save for later use */
544 adapter
->ring_feature
[RING_F_RSS
].indices
= rss_i
;
545 adapter
->ring_feature
[RING_F_RSS
].mask
= rss_m
;
547 adapter
->num_rx_pools
= vmdq_i
;
548 adapter
->num_rx_queues_per_pool
= rss_i
;
550 adapter
->num_rx_queues
= vmdq_i
* rss_i
;
551 adapter
->num_tx_queues
= vmdq_i
* rss_i
;
552 adapter
->num_xdp_queues
= 0;
554 /* disable ATR as it is not supported when VMDq is enabled */
555 adapter
->flags
&= ~IXGBE_FLAG_FDIR_HASH_CAPABLE
;
559 * FCoE can use rings from adjacent buffers to allow RSS
560 * like behavior. To account for this we need to add the
561 * FCoE indices to the total ring count.
563 if (adapter
->flags
& IXGBE_FLAG_FCOE_ENABLED
) {
564 struct ixgbe_ring_feature
*fcoe
;
566 fcoe
= &adapter
->ring_feature
[RING_F_FCOE
];
568 /* limit ourselves based on feature limits */
569 fcoe_i
= min_t(u16
, fcoe_i
, fcoe
->limit
);
571 if (vmdq_i
> 1 && fcoe_i
) {
572 /* alloc queues for FCoE separately */
573 fcoe
->indices
= fcoe_i
;
574 fcoe
->offset
= vmdq_i
* rss_i
;
576 /* merge FCoE queues with RSS queues */
577 fcoe_i
= min_t(u16
, fcoe_i
+ rss_i
, num_online_cpus());
579 /* limit indices to rss_i if MSI-X is disabled */
580 if (!(adapter
->flags
& IXGBE_FLAG_MSIX_ENABLED
))
583 /* attempt to reserve some queues for just FCoE */
584 fcoe
->indices
= min_t(u16
, fcoe_i
, fcoe
->limit
);
585 fcoe
->offset
= fcoe_i
- fcoe
->indices
;
590 /* add queues to adapter */
591 adapter
->num_tx_queues
+= fcoe_i
;
592 adapter
->num_rx_queues
+= fcoe_i
;
596 /* To support macvlan offload we have to use num_tc to
597 * restrict the queues that can be used by the device.
598 * By doing this we can avoid reporting a false number of
602 netdev_set_num_tc(adapter
->netdev
, 1);
604 /* populate TC0 for use by pool 0 */
605 netdev_set_tc_queue(adapter
->netdev
, 0,
606 adapter
->num_rx_queues_per_pool
, 0);
612 * ixgbe_set_rss_queues - Allocate queues for RSS
613 * @adapter: board private structure to initialize
615 * This is our "base" multiqueue mode. RSS (Receive Side Scaling) will try
616 * to allocate one Rx queue per CPU, and if available, one Tx queue per CPU.
619 static bool ixgbe_set_rss_queues(struct ixgbe_adapter
*adapter
)
621 struct ixgbe_hw
*hw
= &adapter
->hw
;
622 struct ixgbe_ring_feature
*f
;
625 /* set mask for 16 queue limit of RSS */
626 f
= &adapter
->ring_feature
[RING_F_RSS
];
631 if (hw
->mac
.type
< ixgbe_mac_X550
)
632 f
->mask
= IXGBE_RSS_16Q_MASK
;
634 f
->mask
= IXGBE_RSS_64Q_MASK
;
636 /* disable ATR by default, it will be configured below */
637 adapter
->flags
&= ~IXGBE_FLAG_FDIR_HASH_CAPABLE
;
640 * Use Flow Director in addition to RSS to ensure the best
641 * distribution of flows across cores, even when an FDIR flow
644 if (rss_i
> 1 && adapter
->atr_sample_rate
) {
645 f
= &adapter
->ring_feature
[RING_F_FDIR
];
647 rss_i
= f
->indices
= f
->limit
;
649 if (!(adapter
->flags
& IXGBE_FLAG_FDIR_PERFECT_CAPABLE
))
650 adapter
->flags
|= IXGBE_FLAG_FDIR_HASH_CAPABLE
;
655 * FCoE can exist on the same rings as standard network traffic
656 * however it is preferred to avoid that if possible. In order
657 * to get the best performance we allocate as many FCoE queues
658 * as we can and we place them at the end of the ring array to
659 * avoid sharing queues with standard RSS on systems with 24 or
662 if (adapter
->flags
& IXGBE_FLAG_FCOE_ENABLED
) {
663 struct net_device
*dev
= adapter
->netdev
;
666 f
= &adapter
->ring_feature
[RING_F_FCOE
];
668 /* merge FCoE queues with RSS queues */
669 fcoe_i
= min_t(u16
, f
->limit
+ rss_i
, num_online_cpus());
670 fcoe_i
= min_t(u16
, fcoe_i
, dev
->num_tx_queues
);
672 /* limit indices to rss_i if MSI-X is disabled */
673 if (!(adapter
->flags
& IXGBE_FLAG_MSIX_ENABLED
))
676 /* attempt to reserve some queues for just FCoE */
677 f
->indices
= min_t(u16
, fcoe_i
, f
->limit
);
678 f
->offset
= fcoe_i
- f
->indices
;
679 rss_i
= max_t(u16
, fcoe_i
, rss_i
);
682 #endif /* IXGBE_FCOE */
683 adapter
->num_rx_queues
= rss_i
;
684 adapter
->num_tx_queues
= rss_i
;
685 adapter
->num_xdp_queues
= ixgbe_xdp_queues(adapter
);
691 * ixgbe_set_num_queues - Allocate queues for device, feature dependent
692 * @adapter: board private structure to initialize
694 * This is the top level queue allocation routine. The order here is very
695 * important, starting with the "most" number of features turned on at once,
696 * and ending with the smallest set of features. This way large combinations
697 * can be allocated if they're turned on, and smaller combinations are the
698 * fallthrough conditions.
701 static void ixgbe_set_num_queues(struct ixgbe_adapter
*adapter
)
703 /* Start with base case */
704 adapter
->num_rx_queues
= 1;
705 adapter
->num_tx_queues
= 1;
706 adapter
->num_xdp_queues
= 0;
707 adapter
->num_rx_pools
= 1;
708 adapter
->num_rx_queues_per_pool
= 1;
710 #ifdef CONFIG_IXGBE_DCB
711 if (ixgbe_set_dcb_sriov_queues(adapter
))
714 if (ixgbe_set_dcb_queues(adapter
))
718 if (ixgbe_set_sriov_queues(adapter
))
721 ixgbe_set_rss_queues(adapter
);
725 * ixgbe_acquire_msix_vectors - acquire MSI-X vectors
726 * @adapter: board private structure
728 * Attempts to acquire a suitable range of MSI-X vector interrupts. Will
729 * return a negative error code if unable to acquire MSI-X vectors for any
732 static int ixgbe_acquire_msix_vectors(struct ixgbe_adapter
*adapter
)
734 struct ixgbe_hw
*hw
= &adapter
->hw
;
735 int i
, vectors
, vector_threshold
;
737 /* We start by asking for one vector per queue pair with XDP queues
738 * being stacked with TX queues.
740 vectors
= max(adapter
->num_rx_queues
, adapter
->num_tx_queues
);
741 vectors
= max(vectors
, adapter
->num_xdp_queues
);
743 /* It is easy to be greedy for MSI-X vectors. However, it really
744 * doesn't do much good if we have a lot more vectors than CPUs. We'll
745 * be somewhat conservative and only ask for (roughly) the same number
746 * of vectors as there are CPUs.
748 vectors
= min_t(int, vectors
, num_online_cpus());
750 /* Some vectors are necessary for non-queue interrupts */
751 vectors
+= NON_Q_VECTORS
;
753 /* Hardware can only support a maximum of hw.mac->max_msix_vectors.
754 * With features such as RSS and VMDq, we can easily surpass the
755 * number of Rx and Tx descriptor queues supported by our device.
756 * Thus, we cap the maximum in the rare cases where the CPU count also
757 * exceeds our vector limit
759 vectors
= min_t(int, vectors
, hw
->mac
.max_msix_vectors
);
761 /* We want a minimum of two MSI-X vectors for (1) a TxQ[0] + RxQ[0]
762 * handler, and (2) an Other (Link Status Change, etc.) handler.
764 vector_threshold
= MIN_MSIX_COUNT
;
766 adapter
->msix_entries
= kcalloc(vectors
,
767 sizeof(struct msix_entry
),
769 if (!adapter
->msix_entries
)
772 for (i
= 0; i
< vectors
; i
++)
773 adapter
->msix_entries
[i
].entry
= i
;
775 vectors
= pci_enable_msix_range(adapter
->pdev
, adapter
->msix_entries
,
776 vector_threshold
, vectors
);
779 /* A negative count of allocated vectors indicates an error in
780 * acquiring within the specified range of MSI-X vectors
782 e_dev_warn("Failed to allocate MSI-X interrupts. Err: %d\n",
785 adapter
->flags
&= ~IXGBE_FLAG_MSIX_ENABLED
;
786 kfree(adapter
->msix_entries
);
787 adapter
->msix_entries
= NULL
;
792 /* we successfully allocated some number of vectors within our
795 adapter
->flags
|= IXGBE_FLAG_MSIX_ENABLED
;
797 /* Adjust for only the vectors we'll use, which is minimum
798 * of max_q_vectors, or the number of vectors we were allocated.
800 vectors
-= NON_Q_VECTORS
;
801 adapter
->num_q_vectors
= min_t(int, vectors
, adapter
->max_q_vectors
);
806 static void ixgbe_add_ring(struct ixgbe_ring
*ring
,
807 struct ixgbe_ring_container
*head
)
809 ring
->next
= head
->ring
;
812 head
->next_update
= jiffies
+ 1;
816 * ixgbe_alloc_q_vector - Allocate memory for a single interrupt vector
817 * @adapter: board private structure to initialize
818 * @v_count: q_vectors allocated on adapter, used for ring interleaving
819 * @v_idx: index of vector in adapter struct
820 * @txr_count: total number of Tx rings to allocate
821 * @txr_idx: index of first Tx ring to allocate
822 * @xdp_count: total number of XDP rings to allocate
823 * @xdp_idx: index of first XDP ring to allocate
824 * @rxr_count: total number of Rx rings to allocate
825 * @rxr_idx: index of first Rx ring to allocate
827 * We allocate one q_vector. If allocation fails we return -ENOMEM.
829 static int ixgbe_alloc_q_vector(struct ixgbe_adapter
*adapter
,
830 int v_count
, int v_idx
,
831 int txr_count
, int txr_idx
,
832 int xdp_count
, int xdp_idx
,
833 int rxr_count
, int rxr_idx
)
835 int node
= dev_to_node(&adapter
->pdev
->dev
);
836 struct ixgbe_q_vector
*q_vector
;
837 struct ixgbe_ring
*ring
;
840 u8 tcs
= adapter
->hw_tcs
;
842 ring_count
= txr_count
+ rxr_count
+ xdp_count
;
844 /* customize cpu for Flow Director mapping */
845 if ((tcs
<= 1) && !(adapter
->flags
& IXGBE_FLAG_SRIOV_ENABLED
)) {
846 u16 rss_i
= adapter
->ring_feature
[RING_F_RSS
].indices
;
847 if (rss_i
> 1 && adapter
->atr_sample_rate
) {
848 cpu
= cpumask_local_spread(v_idx
, node
);
849 node
= cpu_to_node(cpu
);
853 /* allocate q_vector and rings */
854 q_vector
= kzalloc_node(struct_size(q_vector
, ring
, ring_count
),
857 q_vector
= kzalloc(struct_size(q_vector
, ring
, ring_count
),
862 /* setup affinity mask and node */
864 cpumask_set_cpu(cpu
, &q_vector
->affinity_mask
);
865 q_vector
->numa_node
= node
;
867 #ifdef CONFIG_IXGBE_DCA
868 /* initialize CPU for DCA */
872 /* initialize NAPI */
873 netif_napi_add(adapter
->netdev
, &q_vector
->napi
,
876 /* tie q_vector and adapter together */
877 adapter
->q_vector
[v_idx
] = q_vector
;
878 q_vector
->adapter
= adapter
;
879 q_vector
->v_idx
= v_idx
;
881 /* initialize work limits */
882 q_vector
->tx
.work_limit
= adapter
->tx_work_limit
;
884 /* Initialize setting for adaptive ITR */
885 q_vector
->tx
.itr
= IXGBE_ITR_ADAPTIVE_MAX_USECS
|
886 IXGBE_ITR_ADAPTIVE_LATENCY
;
887 q_vector
->rx
.itr
= IXGBE_ITR_ADAPTIVE_MAX_USECS
|
888 IXGBE_ITR_ADAPTIVE_LATENCY
;
891 if (txr_count
&& !rxr_count
) {
893 if (adapter
->tx_itr_setting
== 1)
894 q_vector
->itr
= IXGBE_12K_ITR
;
896 q_vector
->itr
= adapter
->tx_itr_setting
;
898 /* rx or rx/tx vector */
899 if (adapter
->rx_itr_setting
== 1)
900 q_vector
->itr
= IXGBE_20K_ITR
;
902 q_vector
->itr
= adapter
->rx_itr_setting
;
905 /* initialize pointer to rings */
906 ring
= q_vector
->ring
;
909 /* assign generic ring traits */
910 ring
->dev
= &adapter
->pdev
->dev
;
911 ring
->netdev
= adapter
->netdev
;
913 /* configure backlink on ring */
914 ring
->q_vector
= q_vector
;
916 /* update q_vector Tx values */
917 ixgbe_add_ring(ring
, &q_vector
->tx
);
919 /* apply Tx specific ring traits */
920 ring
->count
= adapter
->tx_ring_count
;
921 ring
->queue_index
= txr_idx
;
923 /* assign ring to adapter */
924 WRITE_ONCE(adapter
->tx_ring
[txr_idx
], ring
);
926 /* update count and index */
930 /* push pointer to next ring */
935 /* assign generic ring traits */
936 ring
->dev
= &adapter
->pdev
->dev
;
937 ring
->netdev
= adapter
->netdev
;
939 /* configure backlink on ring */
940 ring
->q_vector
= q_vector
;
942 /* update q_vector Tx values */
943 ixgbe_add_ring(ring
, &q_vector
->tx
);
945 /* apply Tx specific ring traits */
946 ring
->count
= adapter
->tx_ring_count
;
947 ring
->queue_index
= xdp_idx
;
950 /* assign ring to adapter */
951 WRITE_ONCE(adapter
->xdp_ring
[xdp_idx
], ring
);
953 /* update count and index */
957 /* push pointer to next ring */
962 /* assign generic ring traits */
963 ring
->dev
= &adapter
->pdev
->dev
;
964 ring
->netdev
= adapter
->netdev
;
966 /* configure backlink on ring */
967 ring
->q_vector
= q_vector
;
969 /* update q_vector Rx values */
970 ixgbe_add_ring(ring
, &q_vector
->rx
);
973 * 82599 errata, UDP frames with a 0 checksum
974 * can be marked as checksum errors.
976 if (adapter
->hw
.mac
.type
== ixgbe_mac_82599EB
)
977 set_bit(__IXGBE_RX_CSUM_UDP_ZERO_ERR
, &ring
->state
);
980 if (adapter
->netdev
->features
& NETIF_F_FCOE_MTU
) {
981 struct ixgbe_ring_feature
*f
;
982 f
= &adapter
->ring_feature
[RING_F_FCOE
];
983 if ((rxr_idx
>= f
->offset
) &&
984 (rxr_idx
< f
->offset
+ f
->indices
))
985 set_bit(__IXGBE_RX_FCOE
, &ring
->state
);
988 #endif /* IXGBE_FCOE */
989 /* apply Rx specific ring traits */
990 ring
->count
= adapter
->rx_ring_count
;
991 ring
->queue_index
= rxr_idx
;
993 /* assign ring to adapter */
994 WRITE_ONCE(adapter
->rx_ring
[rxr_idx
], ring
);
996 /* update count and index */
1000 /* push pointer to next ring */
1008 * ixgbe_free_q_vector - Free memory allocated for specific interrupt vector
1009 * @adapter: board private structure to initialize
1010 * @v_idx: Index of vector to be freed
1012 * This function frees the memory allocated to the q_vector. In addition if
1013 * NAPI is enabled it will delete any references to the NAPI struct prior
1014 * to freeing the q_vector.
1016 static void ixgbe_free_q_vector(struct ixgbe_adapter
*adapter
, int v_idx
)
1018 struct ixgbe_q_vector
*q_vector
= adapter
->q_vector
[v_idx
];
1019 struct ixgbe_ring
*ring
;
1021 ixgbe_for_each_ring(ring
, q_vector
->tx
) {
1022 if (ring_is_xdp(ring
))
1023 WRITE_ONCE(adapter
->xdp_ring
[ring
->queue_index
], NULL
);
1025 WRITE_ONCE(adapter
->tx_ring
[ring
->queue_index
], NULL
);
1028 ixgbe_for_each_ring(ring
, q_vector
->rx
)
1029 WRITE_ONCE(adapter
->rx_ring
[ring
->queue_index
], NULL
);
1031 adapter
->q_vector
[v_idx
] = NULL
;
1032 __netif_napi_del(&q_vector
->napi
);
1035 * after a call to __netif_napi_del() napi may still be used and
1036 * ixgbe_get_stats64() might access the rings on this vector,
1037 * we must wait a grace period before freeing it.
1039 kfree_rcu(q_vector
, rcu
);
1043 * ixgbe_alloc_q_vectors - Allocate memory for interrupt vectors
1044 * @adapter: board private structure to initialize
1046 * We allocate one q_vector per queue interrupt. If allocation fails we
1049 static int ixgbe_alloc_q_vectors(struct ixgbe_adapter
*adapter
)
1051 int q_vectors
= adapter
->num_q_vectors
;
1052 int rxr_remaining
= adapter
->num_rx_queues
;
1053 int txr_remaining
= adapter
->num_tx_queues
;
1054 int xdp_remaining
= adapter
->num_xdp_queues
;
1055 int rxr_idx
= 0, txr_idx
= 0, xdp_idx
= 0, v_idx
= 0;
1058 /* only one q_vector if MSI-X is disabled. */
1059 if (!(adapter
->flags
& IXGBE_FLAG_MSIX_ENABLED
))
1062 if (q_vectors
>= (rxr_remaining
+ txr_remaining
+ xdp_remaining
)) {
1063 for (; rxr_remaining
; v_idx
++) {
1064 err
= ixgbe_alloc_q_vector(adapter
, q_vectors
, v_idx
,
1065 0, 0, 0, 0, 1, rxr_idx
);
1070 /* update counts and index */
1076 for (; v_idx
< q_vectors
; v_idx
++) {
1077 int rqpv
= DIV_ROUND_UP(rxr_remaining
, q_vectors
- v_idx
);
1078 int tqpv
= DIV_ROUND_UP(txr_remaining
, q_vectors
- v_idx
);
1079 int xqpv
= DIV_ROUND_UP(xdp_remaining
, q_vectors
- v_idx
);
1081 err
= ixgbe_alloc_q_vector(adapter
, q_vectors
, v_idx
,
1089 /* update counts and index */
1090 rxr_remaining
-= rqpv
;
1091 txr_remaining
-= tqpv
;
1092 xdp_remaining
-= xqpv
;
1098 for (i
= 0; i
< adapter
->num_rx_queues
; i
++) {
1099 if (adapter
->rx_ring
[i
])
1100 adapter
->rx_ring
[i
]->ring_idx
= i
;
1103 for (i
= 0; i
< adapter
->num_tx_queues
; i
++) {
1104 if (adapter
->tx_ring
[i
])
1105 adapter
->tx_ring
[i
]->ring_idx
= i
;
1108 for (i
= 0; i
< adapter
->num_xdp_queues
; i
++) {
1109 if (adapter
->xdp_ring
[i
])
1110 adapter
->xdp_ring
[i
]->ring_idx
= i
;
1116 adapter
->num_tx_queues
= 0;
1117 adapter
->num_xdp_queues
= 0;
1118 adapter
->num_rx_queues
= 0;
1119 adapter
->num_q_vectors
= 0;
1122 ixgbe_free_q_vector(adapter
, v_idx
);
1128 * ixgbe_free_q_vectors - Free memory allocated for interrupt vectors
1129 * @adapter: board private structure to initialize
1131 * This function frees the memory allocated to the q_vectors. In addition if
1132 * NAPI is enabled it will delete any references to the NAPI struct prior
1133 * to freeing the q_vector.
1135 static void ixgbe_free_q_vectors(struct ixgbe_adapter
*adapter
)
1137 int v_idx
= adapter
->num_q_vectors
;
1139 adapter
->num_tx_queues
= 0;
1140 adapter
->num_xdp_queues
= 0;
1141 adapter
->num_rx_queues
= 0;
1142 adapter
->num_q_vectors
= 0;
1145 ixgbe_free_q_vector(adapter
, v_idx
);
1148 static void ixgbe_reset_interrupt_capability(struct ixgbe_adapter
*adapter
)
1150 if (adapter
->flags
& IXGBE_FLAG_MSIX_ENABLED
) {
1151 adapter
->flags
&= ~IXGBE_FLAG_MSIX_ENABLED
;
1152 pci_disable_msix(adapter
->pdev
);
1153 kfree(adapter
->msix_entries
);
1154 adapter
->msix_entries
= NULL
;
1155 } else if (adapter
->flags
& IXGBE_FLAG_MSI_ENABLED
) {
1156 adapter
->flags
&= ~IXGBE_FLAG_MSI_ENABLED
;
1157 pci_disable_msi(adapter
->pdev
);
1162 * ixgbe_set_interrupt_capability - set MSI-X or MSI if supported
1163 * @adapter: board private structure to initialize
1165 * Attempt to configure the interrupts using the best available
1166 * capabilities of the hardware and the kernel.
1168 static void ixgbe_set_interrupt_capability(struct ixgbe_adapter
*adapter
)
1172 /* We will try to get MSI-X interrupts first */
1173 if (!ixgbe_acquire_msix_vectors(adapter
))
1176 /* At this point, we do not have MSI-X capabilities. We need to
1177 * reconfigure or disable various features which require MSI-X
1181 /* Disable DCB unless we only have a single traffic class */
1182 if (adapter
->hw_tcs
> 1) {
1183 e_dev_warn("Number of DCB TCs exceeds number of available queues. Disabling DCB support.\n");
1184 netdev_reset_tc(adapter
->netdev
);
1186 if (adapter
->hw
.mac
.type
== ixgbe_mac_82598EB
)
1187 adapter
->hw
.fc
.requested_mode
= adapter
->last_lfc_mode
;
1189 adapter
->flags
&= ~IXGBE_FLAG_DCB_ENABLED
;
1190 adapter
->temp_dcb_cfg
.pfc_mode_enable
= false;
1191 adapter
->dcb_cfg
.pfc_mode_enable
= false;
1194 adapter
->hw_tcs
= 0;
1195 adapter
->dcb_cfg
.num_tcs
.pg_tcs
= 1;
1196 adapter
->dcb_cfg
.num_tcs
.pfc_tcs
= 1;
1198 /* Disable SR-IOV support */
1199 e_dev_warn("Disabling SR-IOV support\n");
1200 ixgbe_disable_sriov(adapter
);
1203 e_dev_warn("Disabling RSS support\n");
1204 adapter
->ring_feature
[RING_F_RSS
].limit
= 1;
1206 /* recalculate number of queues now that many features have been
1207 * changed or disabled.
1209 ixgbe_set_num_queues(adapter
);
1210 adapter
->num_q_vectors
= 1;
1212 err
= pci_enable_msi(adapter
->pdev
);
1214 e_dev_warn("Failed to allocate MSI interrupt, falling back to legacy. Error: %d\n",
1217 adapter
->flags
|= IXGBE_FLAG_MSI_ENABLED
;
1221 * ixgbe_init_interrupt_scheme - Determine proper interrupt scheme
1222 * @adapter: board private structure to initialize
1224 * We determine which interrupt scheme to use based on...
1225 * - Kernel support (MSI, MSI-X)
1226 * - which can be user-defined (via MODULE_PARAM)
1227 * - Hardware queue count (num_*_queues)
1228 * - defined by miscellaneous hardware support/features (RSS, etc.)
1230 int ixgbe_init_interrupt_scheme(struct ixgbe_adapter
*adapter
)
1234 /* Number of supported queues */
1235 ixgbe_set_num_queues(adapter
);
1237 /* Set interrupt mode */
1238 ixgbe_set_interrupt_capability(adapter
);
1240 err
= ixgbe_alloc_q_vectors(adapter
);
1242 e_dev_err("Unable to allocate memory for queue vectors\n");
1243 goto err_alloc_q_vectors
;
1246 ixgbe_cache_ring_register(adapter
);
1248 e_dev_info("Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u XDP Queue count = %u\n",
1249 (adapter
->num_rx_queues
> 1) ? "Enabled" : "Disabled",
1250 adapter
->num_rx_queues
, adapter
->num_tx_queues
,
1251 adapter
->num_xdp_queues
);
1253 set_bit(__IXGBE_DOWN
, &adapter
->state
);
1257 err_alloc_q_vectors
:
1258 ixgbe_reset_interrupt_capability(adapter
);
1263 * ixgbe_clear_interrupt_scheme - Clear the current interrupt scheme settings
1264 * @adapter: board private structure to clear interrupt scheme on
1266 * We go through and clear interrupt specific resources and reset the structure
1267 * to pre-load conditions
1269 void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter
*adapter
)
1271 adapter
->num_tx_queues
= 0;
1272 adapter
->num_xdp_queues
= 0;
1273 adapter
->num_rx_queues
= 0;
1275 ixgbe_free_q_vectors(adapter
);
1276 ixgbe_reset_interrupt_capability(adapter
);
1279 void ixgbe_tx_ctxtdesc(struct ixgbe_ring
*tx_ring
, u32 vlan_macip_lens
,
1280 u32 fceof_saidx
, u32 type_tucmd
, u32 mss_l4len_idx
)
1282 struct ixgbe_adv_tx_context_desc
*context_desc
;
1283 u16 i
= tx_ring
->next_to_use
;
1285 context_desc
= IXGBE_TX_CTXTDESC(tx_ring
, i
);
1288 tx_ring
->next_to_use
= (i
< tx_ring
->count
) ? i
: 0;
1290 /* set bits to identify this as an advanced context descriptor */
1291 type_tucmd
|= IXGBE_TXD_CMD_DEXT
| IXGBE_ADVTXD_DTYP_CTXT
;
1293 context_desc
->vlan_macip_lens
= cpu_to_le32(vlan_macip_lens
);
1294 context_desc
->fceof_saidx
= cpu_to_le32(fceof_saidx
);
1295 context_desc
->type_tucmd_mlhl
= cpu_to_le32(type_tucmd
);
1296 context_desc
->mss_l4len_idx
= cpu_to_le32(mss_l4len_idx
);