1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 1999 - 2018 Intel Corporation. */
5 #include "ixgbe_sriov.h"
7 #ifdef CONFIG_IXGBE_DCB
9 * ixgbe_cache_ring_dcb_sriov - Descriptor ring to register mapping for SR-IOV
10 * @adapter: board private structure to initialize
12 * Cache the descriptor ring offsets for SR-IOV to the assigned rings. It
13 * will also try to cache the proper offsets if RSS/FCoE are enabled along
17 static bool ixgbe_cache_ring_dcb_sriov(struct ixgbe_adapter
*adapter
)
20 struct ixgbe_ring_feature
*fcoe
= &adapter
->ring_feature
[RING_F_FCOE
];
21 #endif /* IXGBE_FCOE */
22 struct ixgbe_ring_feature
*vmdq
= &adapter
->ring_feature
[RING_F_VMDQ
];
25 u8 tcs
= adapter
->hw_tcs
;
27 /* verify we have DCB queueing enabled before proceeding */
31 /* verify we have VMDq enabled before proceeding */
32 if (!(adapter
->flags
& IXGBE_FLAG_SRIOV_ENABLED
))
35 /* start at VMDq register offset for SR-IOV enabled setups */
36 reg_idx
= vmdq
->offset
* __ALIGN_MASK(1, ~vmdq
->mask
);
37 for (i
= 0, pool
= 0; i
< adapter
->num_rx_queues
; i
++, reg_idx
++) {
38 /* If we are greater than indices move to next pool */
39 if ((reg_idx
& ~vmdq
->mask
) >= tcs
) {
41 reg_idx
= __ALIGN_MASK(reg_idx
, ~vmdq
->mask
);
43 adapter
->rx_ring
[i
]->reg_idx
= reg_idx
;
44 adapter
->rx_ring
[i
]->netdev
= pool
? NULL
: adapter
->netdev
;
47 reg_idx
= vmdq
->offset
* __ALIGN_MASK(1, ~vmdq
->mask
);
48 for (i
= 0; i
< adapter
->num_tx_queues
; i
++, reg_idx
++) {
49 /* If we are greater than indices move to next pool */
50 if ((reg_idx
& ~vmdq
->mask
) >= tcs
)
51 reg_idx
= __ALIGN_MASK(reg_idx
, ~vmdq
->mask
);
52 adapter
->tx_ring
[i
]->reg_idx
= reg_idx
;
56 /* nothing to do if FCoE is disabled */
57 if (!(adapter
->flags
& IXGBE_FLAG_FCOE_ENABLED
))
60 /* The work is already done if the FCoE ring is shared */
61 if (fcoe
->offset
< tcs
)
64 /* The FCoE rings exist separately, we need to move their reg_idx */
66 u16 queues_per_pool
= __ALIGN_MASK(1, ~vmdq
->mask
);
67 u8 fcoe_tc
= ixgbe_fcoe_get_tc(adapter
);
69 reg_idx
= (vmdq
->offset
+ vmdq
->indices
) * queues_per_pool
;
70 for (i
= fcoe
->offset
; i
< adapter
->num_rx_queues
; i
++) {
71 reg_idx
= __ALIGN_MASK(reg_idx
, ~vmdq
->mask
) + fcoe_tc
;
72 adapter
->rx_ring
[i
]->reg_idx
= reg_idx
;
73 adapter
->rx_ring
[i
]->netdev
= adapter
->netdev
;
77 reg_idx
= (vmdq
->offset
+ vmdq
->indices
) * queues_per_pool
;
78 for (i
= fcoe
->offset
; i
< adapter
->num_tx_queues
; i
++) {
79 reg_idx
= __ALIGN_MASK(reg_idx
, ~vmdq
->mask
) + fcoe_tc
;
80 adapter
->tx_ring
[i
]->reg_idx
= reg_idx
;
85 #endif /* IXGBE_FCOE */
89 /* ixgbe_get_first_reg_idx - Return first register index associated with ring */
90 static void ixgbe_get_first_reg_idx(struct ixgbe_adapter
*adapter
, u8 tc
,
91 unsigned int *tx
, unsigned int *rx
)
93 struct ixgbe_hw
*hw
= &adapter
->hw
;
94 u8 num_tcs
= adapter
->hw_tcs
;
99 switch (hw
->mac
.type
) {
100 case ixgbe_mac_82598EB
:
101 /* TxQs/TC: 4 RxQs/TC: 8 */
102 *tx
= tc
<< 2; /* 0, 4, 8, 12, 16, 20, 24, 28 */
103 *rx
= tc
<< 3; /* 0, 8, 16, 24, 32, 40, 48, 56 */
105 case ixgbe_mac_82599EB
:
108 case ixgbe_mac_X550EM_x
:
109 case ixgbe_mac_x550em_a
:
112 * TCs : TC0/1 TC2/3 TC4-7
118 *tx
= tc
<< 5; /* 0, 32, 64 */
120 *tx
= (tc
+ 2) << 4; /* 80, 96 */
122 *tx
= (tc
+ 8) << 3; /* 104, 112, 120 */
125 * TCs : TC0 TC1 TC2/3
131 *tx
= tc
<< 6; /* 0, 64 */
133 *tx
= (tc
+ 4) << 4; /* 96, 112 */
141 * ixgbe_cache_ring_dcb - Descriptor ring to register mapping for DCB
142 * @adapter: board private structure to initialize
144 * Cache the descriptor ring offsets for DCB to the assigned rings.
147 static bool ixgbe_cache_ring_dcb(struct ixgbe_adapter
*adapter
)
149 u8 num_tcs
= adapter
->hw_tcs
;
150 unsigned int tx_idx
, rx_idx
;
151 int tc
, offset
, rss_i
, i
;
153 /* verify we have DCB queueing enabled before proceeding */
157 rss_i
= adapter
->ring_feature
[RING_F_RSS
].indices
;
159 for (tc
= 0, offset
= 0; tc
< num_tcs
; tc
++, offset
+= rss_i
) {
160 ixgbe_get_first_reg_idx(adapter
, tc
, &tx_idx
, &rx_idx
);
161 for (i
= 0; i
< rss_i
; i
++, tx_idx
++, rx_idx
++) {
162 adapter
->tx_ring
[offset
+ i
]->reg_idx
= tx_idx
;
163 adapter
->rx_ring
[offset
+ i
]->reg_idx
= rx_idx
;
164 adapter
->rx_ring
[offset
+ i
]->netdev
= adapter
->netdev
;
165 adapter
->tx_ring
[offset
+ i
]->dcb_tc
= tc
;
166 adapter
->rx_ring
[offset
+ i
]->dcb_tc
= tc
;
175 * ixgbe_cache_ring_sriov - Descriptor ring to register mapping for sriov
176 * @adapter: board private structure to initialize
178 * SR-IOV doesn't use any descriptor rings but changes the default if
179 * no other mapping is used.
182 static bool ixgbe_cache_ring_sriov(struct ixgbe_adapter
*adapter
)
185 struct ixgbe_ring_feature
*fcoe
= &adapter
->ring_feature
[RING_F_FCOE
];
186 #endif /* IXGBE_FCOE */
187 struct ixgbe_ring_feature
*vmdq
= &adapter
->ring_feature
[RING_F_VMDQ
];
188 struct ixgbe_ring_feature
*rss
= &adapter
->ring_feature
[RING_F_RSS
];
192 /* only proceed if VMDq is enabled */
193 if (!(adapter
->flags
& IXGBE_FLAG_VMDQ_ENABLED
))
196 /* start at VMDq register offset for SR-IOV enabled setups */
198 reg_idx
= vmdq
->offset
* __ALIGN_MASK(1, ~vmdq
->mask
);
199 for (i
= 0; i
< adapter
->num_rx_queues
; i
++, reg_idx
++) {
201 /* Allow first FCoE queue to be mapped as RSS */
202 if (fcoe
->offset
&& (i
> fcoe
->offset
))
205 /* If we are greater than indices move to next pool */
206 if ((reg_idx
& ~vmdq
->mask
) >= rss
->indices
) {
208 reg_idx
= __ALIGN_MASK(reg_idx
, ~vmdq
->mask
);
210 adapter
->rx_ring
[i
]->reg_idx
= reg_idx
;
211 adapter
->rx_ring
[i
]->netdev
= pool
? NULL
: adapter
->netdev
;
215 /* FCoE uses a linear block of queues so just assigning 1:1 */
216 for (; i
< adapter
->num_rx_queues
; i
++, reg_idx
++) {
217 adapter
->rx_ring
[i
]->reg_idx
= reg_idx
;
218 adapter
->rx_ring
[i
]->netdev
= adapter
->netdev
;
222 reg_idx
= vmdq
->offset
* __ALIGN_MASK(1, ~vmdq
->mask
);
223 for (i
= 0; i
< adapter
->num_tx_queues
; i
++, reg_idx
++) {
225 /* Allow first FCoE queue to be mapped as RSS */
226 if (fcoe
->offset
&& (i
> fcoe
->offset
))
229 /* If we are greater than indices move to next pool */
230 if ((reg_idx
& rss
->mask
) >= rss
->indices
)
231 reg_idx
= __ALIGN_MASK(reg_idx
, ~vmdq
->mask
);
232 adapter
->tx_ring
[i
]->reg_idx
= reg_idx
;
236 /* FCoE uses a linear block of queues so just assigning 1:1 */
237 for (; i
< adapter
->num_tx_queues
; i
++, reg_idx
++)
238 adapter
->tx_ring
[i
]->reg_idx
= reg_idx
;
246 * ixgbe_cache_ring_rss - Descriptor ring to register mapping for RSS
247 * @adapter: board private structure to initialize
249 * Cache the descriptor ring offsets for RSS to the assigned rings.
252 static bool ixgbe_cache_ring_rss(struct ixgbe_adapter
*adapter
)
256 for (i
= 0; i
< adapter
->num_rx_queues
; i
++) {
257 adapter
->rx_ring
[i
]->reg_idx
= i
;
258 adapter
->rx_ring
[i
]->netdev
= adapter
->netdev
;
260 for (i
= 0, reg_idx
= 0; i
< adapter
->num_tx_queues
; i
++, reg_idx
++)
261 adapter
->tx_ring
[i
]->reg_idx
= reg_idx
;
262 for (i
= 0; i
< adapter
->num_xdp_queues
; i
++, reg_idx
++)
263 adapter
->xdp_ring
[i
]->reg_idx
= reg_idx
;
269 * ixgbe_cache_ring_register - Descriptor ring to register mapping
270 * @adapter: board private structure to initialize
272 * Once we know the feature-set enabled for the device, we'll cache
273 * the register offset the descriptor ring is assigned to.
275 * Note, the order the various feature calls is important. It must start with
276 * the "most" features enabled at the same time, then trickle down to the
277 * least amount of features turned on at once.
279 static void ixgbe_cache_ring_register(struct ixgbe_adapter
*adapter
)
281 /* start with default case */
282 adapter
->rx_ring
[0]->reg_idx
= 0;
283 adapter
->tx_ring
[0]->reg_idx
= 0;
285 #ifdef CONFIG_IXGBE_DCB
286 if (ixgbe_cache_ring_dcb_sriov(adapter
))
289 if (ixgbe_cache_ring_dcb(adapter
))
293 if (ixgbe_cache_ring_sriov(adapter
))
296 ixgbe_cache_ring_rss(adapter
);
299 static int ixgbe_xdp_queues(struct ixgbe_adapter
*adapter
)
301 return adapter
->xdp_prog
? nr_cpu_ids
: 0;
304 #define IXGBE_RSS_64Q_MASK 0x3F
305 #define IXGBE_RSS_16Q_MASK 0xF
306 #define IXGBE_RSS_8Q_MASK 0x7
307 #define IXGBE_RSS_4Q_MASK 0x3
308 #define IXGBE_RSS_2Q_MASK 0x1
309 #define IXGBE_RSS_DISABLED_MASK 0x0
311 #ifdef CONFIG_IXGBE_DCB
313 * ixgbe_set_dcb_sriov_queues: Allocate queues for SR-IOV devices w/ DCB
314 * @adapter: board private structure to initialize
316 * When SR-IOV (Single Root IO Virtualiztion) is enabled, allocate queues
317 * and VM pools where appropriate. Also assign queues based on DCB
318 * priorities and map accordingly..
321 static bool ixgbe_set_dcb_sriov_queues(struct ixgbe_adapter
*adapter
)
324 u16 vmdq_i
= adapter
->ring_feature
[RING_F_VMDQ
].limit
;
329 u8 tcs
= adapter
->hw_tcs
;
331 /* verify we have DCB queueing enabled before proceeding */
335 /* verify we have VMDq enabled before proceeding */
336 if (!(adapter
->flags
& IXGBE_FLAG_SRIOV_ENABLED
))
339 /* limit VMDq instances on the PF by number of Tx queues */
340 vmdq_i
= min_t(u16
, vmdq_i
, MAX_TX_QUEUES
/ tcs
);
342 /* Add starting offset to total pool count */
343 vmdq_i
+= adapter
->ring_feature
[RING_F_VMDQ
].offset
;
345 /* 16 pools w/ 8 TC per pool */
347 vmdq_i
= min_t(u16
, vmdq_i
, 16);
348 vmdq_m
= IXGBE_82599_VMDQ_8Q_MASK
;
349 /* 32 pools w/ 4 TC per pool */
351 vmdq_i
= min_t(u16
, vmdq_i
, 32);
352 vmdq_m
= IXGBE_82599_VMDQ_4Q_MASK
;
356 /* queues in the remaining pools are available for FCoE */
357 fcoe_i
= (128 / __ALIGN_MASK(1, ~vmdq_m
)) - vmdq_i
;
360 /* remove the starting offset from the pool count */
361 vmdq_i
-= adapter
->ring_feature
[RING_F_VMDQ
].offset
;
363 /* save features for later use */
364 adapter
->ring_feature
[RING_F_VMDQ
].indices
= vmdq_i
;
365 adapter
->ring_feature
[RING_F_VMDQ
].mask
= vmdq_m
;
368 * We do not support DCB, VMDq, and RSS all simultaneously
369 * so we will disable RSS since it is the lowest priority
371 adapter
->ring_feature
[RING_F_RSS
].indices
= 1;
372 adapter
->ring_feature
[RING_F_RSS
].mask
= IXGBE_RSS_DISABLED_MASK
;
374 /* disable ATR as it is not supported when VMDq is enabled */
375 adapter
->flags
&= ~IXGBE_FLAG_FDIR_HASH_CAPABLE
;
377 adapter
->num_rx_pools
= vmdq_i
;
378 adapter
->num_rx_queues_per_pool
= tcs
;
380 adapter
->num_tx_queues
= vmdq_i
* tcs
;
381 adapter
->num_xdp_queues
= 0;
382 adapter
->num_rx_queues
= vmdq_i
* tcs
;
385 if (adapter
->flags
& IXGBE_FLAG_FCOE_ENABLED
) {
386 struct ixgbe_ring_feature
*fcoe
;
388 fcoe
= &adapter
->ring_feature
[RING_F_FCOE
];
390 /* limit ourselves based on feature limits */
391 fcoe_i
= min_t(u16
, fcoe_i
, fcoe
->limit
);
394 /* alloc queues for FCoE separately */
395 fcoe
->indices
= fcoe_i
;
396 fcoe
->offset
= vmdq_i
* tcs
;
398 /* add queues to adapter */
399 adapter
->num_tx_queues
+= fcoe_i
;
400 adapter
->num_rx_queues
+= fcoe_i
;
401 } else if (tcs
> 1) {
402 /* use queue belonging to FcoE TC */
404 fcoe
->offset
= ixgbe_fcoe_get_tc(adapter
);
406 adapter
->flags
&= ~IXGBE_FLAG_FCOE_ENABLED
;
413 #endif /* IXGBE_FCOE */
414 /* configure TC to queue mapping */
415 for (i
= 0; i
< tcs
; i
++)
416 netdev_set_tc_queue(adapter
->netdev
, i
, 1, i
);
421 static bool ixgbe_set_dcb_queues(struct ixgbe_adapter
*adapter
)
423 struct net_device
*dev
= adapter
->netdev
;
424 struct ixgbe_ring_feature
*f
;
428 /* Map queue offset and counts onto allocated tx queues */
429 tcs
= adapter
->hw_tcs
;
431 /* verify we have DCB queueing enabled before proceeding */
435 /* determine the upper limit for our current DCB mode */
436 rss_i
= dev
->num_tx_queues
/ tcs
;
437 if (adapter
->hw
.mac
.type
== ixgbe_mac_82598EB
) {
438 /* 8 TC w/ 4 queues per TC */
439 rss_i
= min_t(u16
, rss_i
, 4);
440 rss_m
= IXGBE_RSS_4Q_MASK
;
441 } else if (tcs
> 4) {
442 /* 8 TC w/ 8 queues per TC */
443 rss_i
= min_t(u16
, rss_i
, 8);
444 rss_m
= IXGBE_RSS_8Q_MASK
;
446 /* 4 TC w/ 16 queues per TC */
447 rss_i
= min_t(u16
, rss_i
, 16);
448 rss_m
= IXGBE_RSS_16Q_MASK
;
451 /* set RSS mask and indices */
452 f
= &adapter
->ring_feature
[RING_F_RSS
];
453 rss_i
= min_t(int, rss_i
, f
->limit
);
457 /* disable ATR as it is not supported when multiple TCs are enabled */
458 adapter
->flags
&= ~IXGBE_FLAG_FDIR_HASH_CAPABLE
;
461 /* FCoE enabled queues require special configuration indexed
462 * by feature specific indices and offset. Here we map FCoE
463 * indices onto the DCB queue pairs allowing FCoE to own
464 * configuration later.
466 if (adapter
->flags
& IXGBE_FLAG_FCOE_ENABLED
) {
467 u8 tc
= ixgbe_fcoe_get_tc(adapter
);
469 f
= &adapter
->ring_feature
[RING_F_FCOE
];
470 f
->indices
= min_t(u16
, rss_i
, f
->limit
);
471 f
->offset
= rss_i
* tc
;
474 #endif /* IXGBE_FCOE */
475 for (i
= 0; i
< tcs
; i
++)
476 netdev_set_tc_queue(dev
, i
, rss_i
, rss_i
* i
);
478 adapter
->num_tx_queues
= rss_i
* tcs
;
479 adapter
->num_xdp_queues
= 0;
480 adapter
->num_rx_queues
= rss_i
* tcs
;
487 * ixgbe_set_sriov_queues - Allocate queues for SR-IOV devices
488 * @adapter: board private structure to initialize
490 * When SR-IOV (Single Root IO Virtualiztion) is enabled, allocate queues
491 * and VM pools where appropriate. If RSS is available, then also try and
492 * enable RSS and map accordingly.
495 static bool ixgbe_set_sriov_queues(struct ixgbe_adapter
*adapter
)
497 u16 vmdq_i
= adapter
->ring_feature
[RING_F_VMDQ
].limit
;
499 u16 rss_i
= adapter
->ring_feature
[RING_F_RSS
].limit
;
500 u16 rss_m
= IXGBE_RSS_DISABLED_MASK
;
505 /* only proceed if SR-IOV is enabled */
506 if (!(adapter
->flags
& IXGBE_FLAG_SRIOV_ENABLED
))
509 /* limit l2fwd RSS based on total Tx queue limit */
510 rss_i
= min_t(u16
, rss_i
, MAX_TX_QUEUES
/ vmdq_i
);
512 /* Add starting offset to total pool count */
513 vmdq_i
+= adapter
->ring_feature
[RING_F_VMDQ
].offset
;
515 /* double check we are limited to maximum pools */
516 vmdq_i
= min_t(u16
, IXGBE_MAX_VMDQ_INDICES
, vmdq_i
);
518 /* 64 pool mode with 2 queues per pool */
520 vmdq_m
= IXGBE_82599_VMDQ_2Q_MASK
;
521 rss_m
= IXGBE_RSS_2Q_MASK
;
522 rss_i
= min_t(u16
, rss_i
, 2);
523 /* 32 pool mode with up to 4 queues per pool */
525 vmdq_m
= IXGBE_82599_VMDQ_4Q_MASK
;
526 rss_m
= IXGBE_RSS_4Q_MASK
;
527 /* We can support 4, 2, or 1 queues */
528 rss_i
= (rss_i
> 3) ? 4 : (rss_i
> 1) ? 2 : 1;
532 /* queues in the remaining pools are available for FCoE */
533 fcoe_i
= 128 - (vmdq_i
* __ALIGN_MASK(1, ~vmdq_m
));
536 /* remove the starting offset from the pool count */
537 vmdq_i
-= adapter
->ring_feature
[RING_F_VMDQ
].offset
;
539 /* save features for later use */
540 adapter
->ring_feature
[RING_F_VMDQ
].indices
= vmdq_i
;
541 adapter
->ring_feature
[RING_F_VMDQ
].mask
= vmdq_m
;
543 /* limit RSS based on user input and save for later use */
544 adapter
->ring_feature
[RING_F_RSS
].indices
= rss_i
;
545 adapter
->ring_feature
[RING_F_RSS
].mask
= rss_m
;
547 adapter
->num_rx_pools
= vmdq_i
;
548 adapter
->num_rx_queues_per_pool
= rss_i
;
550 adapter
->num_rx_queues
= vmdq_i
* rss_i
;
551 adapter
->num_tx_queues
= vmdq_i
* rss_i
;
552 adapter
->num_xdp_queues
= 0;
554 /* disable ATR as it is not supported when VMDq is enabled */
555 adapter
->flags
&= ~IXGBE_FLAG_FDIR_HASH_CAPABLE
;
559 * FCoE can use rings from adjacent buffers to allow RSS
560 * like behavior. To account for this we need to add the
561 * FCoE indices to the total ring count.
563 if (adapter
->flags
& IXGBE_FLAG_FCOE_ENABLED
) {
564 struct ixgbe_ring_feature
*fcoe
;
566 fcoe
= &adapter
->ring_feature
[RING_F_FCOE
];
568 /* limit ourselves based on feature limits */
569 fcoe_i
= min_t(u16
, fcoe_i
, fcoe
->limit
);
571 if (vmdq_i
> 1 && fcoe_i
) {
572 /* alloc queues for FCoE separately */
573 fcoe
->indices
= fcoe_i
;
574 fcoe
->offset
= vmdq_i
* rss_i
;
576 /* merge FCoE queues with RSS queues */
577 fcoe_i
= min_t(u16
, fcoe_i
+ rss_i
, num_online_cpus());
579 /* limit indices to rss_i if MSI-X is disabled */
580 if (!(adapter
->flags
& IXGBE_FLAG_MSIX_ENABLED
))
583 /* attempt to reserve some queues for just FCoE */
584 fcoe
->indices
= min_t(u16
, fcoe_i
, fcoe
->limit
);
585 fcoe
->offset
= fcoe_i
- fcoe
->indices
;
590 /* add queues to adapter */
591 adapter
->num_tx_queues
+= fcoe_i
;
592 adapter
->num_rx_queues
+= fcoe_i
;
596 /* To support macvlan offload we have to use num_tc to
597 * restrict the queues that can be used by the device.
598 * By doing this we can avoid reporting a false number of
602 netdev_set_num_tc(adapter
->netdev
, 1);
604 /* populate TC0 for use by pool 0 */
605 netdev_set_tc_queue(adapter
->netdev
, 0,
606 adapter
->num_rx_queues_per_pool
, 0);
612 * ixgbe_set_rss_queues - Allocate queues for RSS
613 * @adapter: board private structure to initialize
615 * This is our "base" multiqueue mode. RSS (Receive Side Scaling) will try
616 * to allocate one Rx queue per CPU, and if available, one Tx queue per CPU.
619 static bool ixgbe_set_rss_queues(struct ixgbe_adapter
*adapter
)
621 struct ixgbe_hw
*hw
= &adapter
->hw
;
622 struct ixgbe_ring_feature
*f
;
625 /* set mask for 16 queue limit of RSS */
626 f
= &adapter
->ring_feature
[RING_F_RSS
];
631 if (hw
->mac
.type
< ixgbe_mac_X550
)
632 f
->mask
= IXGBE_RSS_16Q_MASK
;
634 f
->mask
= IXGBE_RSS_64Q_MASK
;
636 /* disable ATR by default, it will be configured below */
637 adapter
->flags
&= ~IXGBE_FLAG_FDIR_HASH_CAPABLE
;
640 * Use Flow Director in addition to RSS to ensure the best
641 * distribution of flows across cores, even when an FDIR flow
644 if (rss_i
> 1 && adapter
->atr_sample_rate
) {
645 f
= &adapter
->ring_feature
[RING_F_FDIR
];
647 rss_i
= f
->indices
= f
->limit
;
649 if (!(adapter
->flags
& IXGBE_FLAG_FDIR_PERFECT_CAPABLE
))
650 adapter
->flags
|= IXGBE_FLAG_FDIR_HASH_CAPABLE
;
655 * FCoE can exist on the same rings as standard network traffic
656 * however it is preferred to avoid that if possible. In order
657 * to get the best performance we allocate as many FCoE queues
658 * as we can and we place them at the end of the ring array to
659 * avoid sharing queues with standard RSS on systems with 24 or
662 if (adapter
->flags
& IXGBE_FLAG_FCOE_ENABLED
) {
663 struct net_device
*dev
= adapter
->netdev
;
666 f
= &adapter
->ring_feature
[RING_F_FCOE
];
668 /* merge FCoE queues with RSS queues */
669 fcoe_i
= min_t(u16
, f
->limit
+ rss_i
, num_online_cpus());
670 fcoe_i
= min_t(u16
, fcoe_i
, dev
->num_tx_queues
);
672 /* limit indices to rss_i if MSI-X is disabled */
673 if (!(adapter
->flags
& IXGBE_FLAG_MSIX_ENABLED
))
676 /* attempt to reserve some queues for just FCoE */
677 f
->indices
= min_t(u16
, fcoe_i
, f
->limit
);
678 f
->offset
= fcoe_i
- f
->indices
;
679 rss_i
= max_t(u16
, fcoe_i
, rss_i
);
682 #endif /* IXGBE_FCOE */
683 adapter
->num_rx_queues
= rss_i
;
684 adapter
->num_tx_queues
= rss_i
;
685 adapter
->num_xdp_queues
= ixgbe_xdp_queues(adapter
);
691 * ixgbe_set_num_queues - Allocate queues for device, feature dependent
692 * @adapter: board private structure to initialize
694 * This is the top level queue allocation routine. The order here is very
695 * important, starting with the "most" number of features turned on at once,
696 * and ending with the smallest set of features. This way large combinations
697 * can be allocated if they're turned on, and smaller combinations are the
698 * fallthrough conditions.
701 static void ixgbe_set_num_queues(struct ixgbe_adapter
*adapter
)
703 /* Start with base case */
704 adapter
->num_rx_queues
= 1;
705 adapter
->num_tx_queues
= 1;
706 adapter
->num_xdp_queues
= 0;
707 adapter
->num_rx_pools
= 1;
708 adapter
->num_rx_queues_per_pool
= 1;
710 #ifdef CONFIG_IXGBE_DCB
711 if (ixgbe_set_dcb_sriov_queues(adapter
))
714 if (ixgbe_set_dcb_queues(adapter
))
718 if (ixgbe_set_sriov_queues(adapter
))
721 ixgbe_set_rss_queues(adapter
);
725 * ixgbe_acquire_msix_vectors - acquire MSI-X vectors
726 * @adapter: board private structure
728 * Attempts to acquire a suitable range of MSI-X vector interrupts. Will
729 * return a negative error code if unable to acquire MSI-X vectors for any
732 static int ixgbe_acquire_msix_vectors(struct ixgbe_adapter
*adapter
)
734 struct ixgbe_hw
*hw
= &adapter
->hw
;
735 int i
, vectors
, vector_threshold
;
737 /* We start by asking for one vector per queue pair with XDP queues
738 * being stacked with TX queues.
740 vectors
= max(adapter
->num_rx_queues
, adapter
->num_tx_queues
);
741 vectors
= max(vectors
, adapter
->num_xdp_queues
);
743 /* It is easy to be greedy for MSI-X vectors. However, it really
744 * doesn't do much good if we have a lot more vectors than CPUs. We'll
745 * be somewhat conservative and only ask for (roughly) the same number
746 * of vectors as there are CPUs.
748 vectors
= min_t(int, vectors
, num_online_cpus());
750 /* Some vectors are necessary for non-queue interrupts */
751 vectors
+= NON_Q_VECTORS
;
753 /* Hardware can only support a maximum of hw.mac->max_msix_vectors.
754 * With features such as RSS and VMDq, we can easily surpass the
755 * number of Rx and Tx descriptor queues supported by our device.
756 * Thus, we cap the maximum in the rare cases where the CPU count also
757 * exceeds our vector limit
759 vectors
= min_t(int, vectors
, hw
->mac
.max_msix_vectors
);
761 /* We want a minimum of two MSI-X vectors for (1) a TxQ[0] + RxQ[0]
762 * handler, and (2) an Other (Link Status Change, etc.) handler.
764 vector_threshold
= MIN_MSIX_COUNT
;
766 adapter
->msix_entries
= kcalloc(vectors
,
767 sizeof(struct msix_entry
),
769 if (!adapter
->msix_entries
)
772 for (i
= 0; i
< vectors
; i
++)
773 adapter
->msix_entries
[i
].entry
= i
;
775 vectors
= pci_enable_msix_range(adapter
->pdev
, adapter
->msix_entries
,
776 vector_threshold
, vectors
);
779 /* A negative count of allocated vectors indicates an error in
780 * acquiring within the specified range of MSI-X vectors
782 e_dev_warn("Failed to allocate MSI-X interrupts. Err: %d\n",
785 adapter
->flags
&= ~IXGBE_FLAG_MSIX_ENABLED
;
786 kfree(adapter
->msix_entries
);
787 adapter
->msix_entries
= NULL
;
792 /* we successfully allocated some number of vectors within our
795 adapter
->flags
|= IXGBE_FLAG_MSIX_ENABLED
;
797 /* Adjust for only the vectors we'll use, which is minimum
798 * of max_q_vectors, or the number of vectors we were allocated.
800 vectors
-= NON_Q_VECTORS
;
801 adapter
->num_q_vectors
= min_t(int, vectors
, adapter
->max_q_vectors
);
806 static void ixgbe_add_ring(struct ixgbe_ring
*ring
,
807 struct ixgbe_ring_container
*head
)
809 ring
->next
= head
->ring
;
812 head
->next_update
= jiffies
+ 1;
816 * ixgbe_alloc_q_vector - Allocate memory for a single interrupt vector
817 * @adapter: board private structure to initialize
818 * @v_count: q_vectors allocated on adapter, used for ring interleaving
819 * @v_idx: index of vector in adapter struct
820 * @txr_count: total number of Tx rings to allocate
821 * @txr_idx: index of first Tx ring to allocate
822 * @xdp_count: total number of XDP rings to allocate
823 * @xdp_idx: index of first XDP ring to allocate
824 * @rxr_count: total number of Rx rings to allocate
825 * @rxr_idx: index of first Rx ring to allocate
827 * We allocate one q_vector. If allocation fails we return -ENOMEM.
829 static int ixgbe_alloc_q_vector(struct ixgbe_adapter
*adapter
,
830 int v_count
, int v_idx
,
831 int txr_count
, int txr_idx
,
832 int xdp_count
, int xdp_idx
,
833 int rxr_count
, int rxr_idx
)
835 struct ixgbe_q_vector
*q_vector
;
836 struct ixgbe_ring
*ring
;
837 int node
= NUMA_NO_NODE
;
839 int ring_count
, size
;
840 u8 tcs
= adapter
->hw_tcs
;
842 ring_count
= txr_count
+ rxr_count
+ xdp_count
;
843 size
= sizeof(struct ixgbe_q_vector
) +
844 (sizeof(struct ixgbe_ring
) * ring_count
);
846 /* customize cpu for Flow Director mapping */
847 if ((tcs
<= 1) && !(adapter
->flags
& IXGBE_FLAG_SRIOV_ENABLED
)) {
848 u16 rss_i
= adapter
->ring_feature
[RING_F_RSS
].indices
;
849 if (rss_i
> 1 && adapter
->atr_sample_rate
) {
850 if (cpu_online(v_idx
)) {
852 node
= cpu_to_node(cpu
);
857 /* allocate q_vector and rings */
858 q_vector
= kzalloc_node(size
, GFP_KERNEL
, node
);
860 q_vector
= kzalloc(size
, GFP_KERNEL
);
864 /* setup affinity mask and node */
866 cpumask_set_cpu(cpu
, &q_vector
->affinity_mask
);
867 q_vector
->numa_node
= node
;
869 #ifdef CONFIG_IXGBE_DCA
870 /* initialize CPU for DCA */
874 /* initialize NAPI */
875 netif_napi_add(adapter
->netdev
, &q_vector
->napi
,
878 /* tie q_vector and adapter together */
879 adapter
->q_vector
[v_idx
] = q_vector
;
880 q_vector
->adapter
= adapter
;
881 q_vector
->v_idx
= v_idx
;
883 /* initialize work limits */
884 q_vector
->tx
.work_limit
= adapter
->tx_work_limit
;
886 /* Initialize setting for adaptive ITR */
887 q_vector
->tx
.itr
= IXGBE_ITR_ADAPTIVE_MAX_USECS
|
888 IXGBE_ITR_ADAPTIVE_LATENCY
;
889 q_vector
->rx
.itr
= IXGBE_ITR_ADAPTIVE_MAX_USECS
|
890 IXGBE_ITR_ADAPTIVE_LATENCY
;
893 if (txr_count
&& !rxr_count
) {
895 if (adapter
->tx_itr_setting
== 1)
896 q_vector
->itr
= IXGBE_12K_ITR
;
898 q_vector
->itr
= adapter
->tx_itr_setting
;
900 /* rx or rx/tx vector */
901 if (adapter
->rx_itr_setting
== 1)
902 q_vector
->itr
= IXGBE_20K_ITR
;
904 q_vector
->itr
= adapter
->rx_itr_setting
;
907 /* initialize pointer to rings */
908 ring
= q_vector
->ring
;
911 /* assign generic ring traits */
912 ring
->dev
= &adapter
->pdev
->dev
;
913 ring
->netdev
= adapter
->netdev
;
915 /* configure backlink on ring */
916 ring
->q_vector
= q_vector
;
918 /* update q_vector Tx values */
919 ixgbe_add_ring(ring
, &q_vector
->tx
);
921 /* apply Tx specific ring traits */
922 ring
->count
= adapter
->tx_ring_count
;
923 ring
->queue_index
= txr_idx
;
925 /* assign ring to adapter */
926 adapter
->tx_ring
[txr_idx
] = ring
;
928 /* update count and index */
932 /* push pointer to next ring */
937 /* assign generic ring traits */
938 ring
->dev
= &adapter
->pdev
->dev
;
939 ring
->netdev
= adapter
->netdev
;
941 /* configure backlink on ring */
942 ring
->q_vector
= q_vector
;
944 /* update q_vector Tx values */
945 ixgbe_add_ring(ring
, &q_vector
->tx
);
947 /* apply Tx specific ring traits */
948 ring
->count
= adapter
->tx_ring_count
;
949 ring
->queue_index
= xdp_idx
;
952 /* assign ring to adapter */
953 adapter
->xdp_ring
[xdp_idx
] = ring
;
955 /* update count and index */
959 /* push pointer to next ring */
964 /* assign generic ring traits */
965 ring
->dev
= &adapter
->pdev
->dev
;
966 ring
->netdev
= adapter
->netdev
;
968 /* configure backlink on ring */
969 ring
->q_vector
= q_vector
;
971 /* update q_vector Rx values */
972 ixgbe_add_ring(ring
, &q_vector
->rx
);
975 * 82599 errata, UDP frames with a 0 checksum
976 * can be marked as checksum errors.
978 if (adapter
->hw
.mac
.type
== ixgbe_mac_82599EB
)
979 set_bit(__IXGBE_RX_CSUM_UDP_ZERO_ERR
, &ring
->state
);
982 if (adapter
->netdev
->features
& NETIF_F_FCOE_MTU
) {
983 struct ixgbe_ring_feature
*f
;
984 f
= &adapter
->ring_feature
[RING_F_FCOE
];
985 if ((rxr_idx
>= f
->offset
) &&
986 (rxr_idx
< f
->offset
+ f
->indices
))
987 set_bit(__IXGBE_RX_FCOE
, &ring
->state
);
990 #endif /* IXGBE_FCOE */
991 /* apply Rx specific ring traits */
992 ring
->count
= adapter
->rx_ring_count
;
993 ring
->queue_index
= rxr_idx
;
995 /* assign ring to adapter */
996 adapter
->rx_ring
[rxr_idx
] = ring
;
998 /* update count and index */
1002 /* push pointer to next ring */
1010 * ixgbe_free_q_vector - Free memory allocated for specific interrupt vector
1011 * @adapter: board private structure to initialize
1012 * @v_idx: Index of vector to be freed
1014 * This function frees the memory allocated to the q_vector. In addition if
1015 * NAPI is enabled it will delete any references to the NAPI struct prior
1016 * to freeing the q_vector.
1018 static void ixgbe_free_q_vector(struct ixgbe_adapter
*adapter
, int v_idx
)
1020 struct ixgbe_q_vector
*q_vector
= adapter
->q_vector
[v_idx
];
1021 struct ixgbe_ring
*ring
;
1023 ixgbe_for_each_ring(ring
, q_vector
->tx
) {
1024 if (ring_is_xdp(ring
))
1025 adapter
->xdp_ring
[ring
->queue_index
] = NULL
;
1027 adapter
->tx_ring
[ring
->queue_index
] = NULL
;
1030 ixgbe_for_each_ring(ring
, q_vector
->rx
)
1031 adapter
->rx_ring
[ring
->queue_index
] = NULL
;
1033 adapter
->q_vector
[v_idx
] = NULL
;
1034 napi_hash_del(&q_vector
->napi
);
1035 netif_napi_del(&q_vector
->napi
);
1038 * ixgbe_get_stats64() might access the rings on this vector,
1039 * we must wait a grace period before freeing it.
1041 kfree_rcu(q_vector
, rcu
);
1045 * ixgbe_alloc_q_vectors - Allocate memory for interrupt vectors
1046 * @adapter: board private structure to initialize
1048 * We allocate one q_vector per queue interrupt. If allocation fails we
1051 static int ixgbe_alloc_q_vectors(struct ixgbe_adapter
*adapter
)
1053 int q_vectors
= adapter
->num_q_vectors
;
1054 int rxr_remaining
= adapter
->num_rx_queues
;
1055 int txr_remaining
= adapter
->num_tx_queues
;
1056 int xdp_remaining
= adapter
->num_xdp_queues
;
1057 int rxr_idx
= 0, txr_idx
= 0, xdp_idx
= 0, v_idx
= 0;
1060 /* only one q_vector if MSI-X is disabled. */
1061 if (!(adapter
->flags
& IXGBE_FLAG_MSIX_ENABLED
))
1064 if (q_vectors
>= (rxr_remaining
+ txr_remaining
+ xdp_remaining
)) {
1065 for (; rxr_remaining
; v_idx
++) {
1066 err
= ixgbe_alloc_q_vector(adapter
, q_vectors
, v_idx
,
1067 0, 0, 0, 0, 1, rxr_idx
);
1072 /* update counts and index */
1078 for (; v_idx
< q_vectors
; v_idx
++) {
1079 int rqpv
= DIV_ROUND_UP(rxr_remaining
, q_vectors
- v_idx
);
1080 int tqpv
= DIV_ROUND_UP(txr_remaining
, q_vectors
- v_idx
);
1081 int xqpv
= DIV_ROUND_UP(xdp_remaining
, q_vectors
- v_idx
);
1083 err
= ixgbe_alloc_q_vector(adapter
, q_vectors
, v_idx
,
1091 /* update counts and index */
1092 rxr_remaining
-= rqpv
;
1093 txr_remaining
-= tqpv
;
1094 xdp_remaining
-= xqpv
;
1103 adapter
->num_tx_queues
= 0;
1104 adapter
->num_xdp_queues
= 0;
1105 adapter
->num_rx_queues
= 0;
1106 adapter
->num_q_vectors
= 0;
1109 ixgbe_free_q_vector(adapter
, v_idx
);
1115 * ixgbe_free_q_vectors - Free memory allocated for interrupt vectors
1116 * @adapter: board private structure to initialize
1118 * This function frees the memory allocated to the q_vectors. In addition if
1119 * NAPI is enabled it will delete any references to the NAPI struct prior
1120 * to freeing the q_vector.
1122 static void ixgbe_free_q_vectors(struct ixgbe_adapter
*adapter
)
1124 int v_idx
= adapter
->num_q_vectors
;
1126 adapter
->num_tx_queues
= 0;
1127 adapter
->num_xdp_queues
= 0;
1128 adapter
->num_rx_queues
= 0;
1129 adapter
->num_q_vectors
= 0;
1132 ixgbe_free_q_vector(adapter
, v_idx
);
1135 static void ixgbe_reset_interrupt_capability(struct ixgbe_adapter
*adapter
)
1137 if (adapter
->flags
& IXGBE_FLAG_MSIX_ENABLED
) {
1138 adapter
->flags
&= ~IXGBE_FLAG_MSIX_ENABLED
;
1139 pci_disable_msix(adapter
->pdev
);
1140 kfree(adapter
->msix_entries
);
1141 adapter
->msix_entries
= NULL
;
1142 } else if (adapter
->flags
& IXGBE_FLAG_MSI_ENABLED
) {
1143 adapter
->flags
&= ~IXGBE_FLAG_MSI_ENABLED
;
1144 pci_disable_msi(adapter
->pdev
);
1149 * ixgbe_set_interrupt_capability - set MSI-X or MSI if supported
1150 * @adapter: board private structure to initialize
1152 * Attempt to configure the interrupts using the best available
1153 * capabilities of the hardware and the kernel.
1155 static void ixgbe_set_interrupt_capability(struct ixgbe_adapter
*adapter
)
1159 /* We will try to get MSI-X interrupts first */
1160 if (!ixgbe_acquire_msix_vectors(adapter
))
1163 /* At this point, we do not have MSI-X capabilities. We need to
1164 * reconfigure or disable various features which require MSI-X
1168 /* Disable DCB unless we only have a single traffic class */
1169 if (adapter
->hw_tcs
> 1) {
1170 e_dev_warn("Number of DCB TCs exceeds number of available queues. Disabling DCB support.\n");
1171 netdev_reset_tc(adapter
->netdev
);
1173 if (adapter
->hw
.mac
.type
== ixgbe_mac_82598EB
)
1174 adapter
->hw
.fc
.requested_mode
= adapter
->last_lfc_mode
;
1176 adapter
->flags
&= ~IXGBE_FLAG_DCB_ENABLED
;
1177 adapter
->temp_dcb_cfg
.pfc_mode_enable
= false;
1178 adapter
->dcb_cfg
.pfc_mode_enable
= false;
1181 adapter
->hw_tcs
= 0;
1182 adapter
->dcb_cfg
.num_tcs
.pg_tcs
= 1;
1183 adapter
->dcb_cfg
.num_tcs
.pfc_tcs
= 1;
1185 /* Disable SR-IOV support */
1186 e_dev_warn("Disabling SR-IOV support\n");
1187 ixgbe_disable_sriov(adapter
);
1190 e_dev_warn("Disabling RSS support\n");
1191 adapter
->ring_feature
[RING_F_RSS
].limit
= 1;
1193 /* recalculate number of queues now that many features have been
1194 * changed or disabled.
1196 ixgbe_set_num_queues(adapter
);
1197 adapter
->num_q_vectors
= 1;
1199 err
= pci_enable_msi(adapter
->pdev
);
1201 e_dev_warn("Failed to allocate MSI interrupt, falling back to legacy. Error: %d\n",
1204 adapter
->flags
|= IXGBE_FLAG_MSI_ENABLED
;
1208 * ixgbe_init_interrupt_scheme - Determine proper interrupt scheme
1209 * @adapter: board private structure to initialize
1211 * We determine which interrupt scheme to use based on...
1212 * - Kernel support (MSI, MSI-X)
1213 * - which can be user-defined (via MODULE_PARAM)
1214 * - Hardware queue count (num_*_queues)
1215 * - defined by miscellaneous hardware support/features (RSS, etc.)
1217 int ixgbe_init_interrupt_scheme(struct ixgbe_adapter
*adapter
)
1221 /* Number of supported queues */
1222 ixgbe_set_num_queues(adapter
);
1224 /* Set interrupt mode */
1225 ixgbe_set_interrupt_capability(adapter
);
1227 err
= ixgbe_alloc_q_vectors(adapter
);
1229 e_dev_err("Unable to allocate memory for queue vectors\n");
1230 goto err_alloc_q_vectors
;
1233 ixgbe_cache_ring_register(adapter
);
1235 e_dev_info("Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u XDP Queue count = %u\n",
1236 (adapter
->num_rx_queues
> 1) ? "Enabled" : "Disabled",
1237 adapter
->num_rx_queues
, adapter
->num_tx_queues
,
1238 adapter
->num_xdp_queues
);
1240 set_bit(__IXGBE_DOWN
, &adapter
->state
);
1244 err_alloc_q_vectors
:
1245 ixgbe_reset_interrupt_capability(adapter
);
1250 * ixgbe_clear_interrupt_scheme - Clear the current interrupt scheme settings
1251 * @adapter: board private structure to clear interrupt scheme on
1253 * We go through and clear interrupt specific resources and reset the structure
1254 * to pre-load conditions
1256 void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter
*adapter
)
1258 adapter
->num_tx_queues
= 0;
1259 adapter
->num_xdp_queues
= 0;
1260 adapter
->num_rx_queues
= 0;
1262 ixgbe_free_q_vectors(adapter
);
1263 ixgbe_reset_interrupt_capability(adapter
);
1266 void ixgbe_tx_ctxtdesc(struct ixgbe_ring
*tx_ring
, u32 vlan_macip_lens
,
1267 u32 fceof_saidx
, u32 type_tucmd
, u32 mss_l4len_idx
)
1269 struct ixgbe_adv_tx_context_desc
*context_desc
;
1270 u16 i
= tx_ring
->next_to_use
;
1272 context_desc
= IXGBE_TX_CTXTDESC(tx_ring
, i
);
1275 tx_ring
->next_to_use
= (i
< tx_ring
->count
) ? i
: 0;
1277 /* set bits to identify this as an advanced context descriptor */
1278 type_tucmd
|= IXGBE_TXD_CMD_DEXT
| IXGBE_ADVTXD_DTYP_CTXT
;
1280 context_desc
->vlan_macip_lens
= cpu_to_le32(vlan_macip_lens
);
1281 context_desc
->fceof_saidx
= cpu_to_le32(fceof_saidx
);
1282 context_desc
->type_tucmd_mlhl
= cpu_to_le32(type_tucmd
);
1283 context_desc
->mss_l4len_idx
= cpu_to_le32(mss_l4len_idx
);