1 /*******************************************************************************
3 Intel 10 Gigabit PCI Express Linux driver
4 Copyright(c) 1999 - 2016 Intel Corporation.
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
23 Linux NICS <linux.nics@intel.com>
24 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
25 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
27 *******************************************************************************/
30 #include "ixgbe_sriov.h"
32 #ifdef CONFIG_IXGBE_DCB
34 * ixgbe_cache_ring_dcb_sriov - Descriptor ring to register mapping for SR-IOV
35 * @adapter: board private structure to initialize
37 * Cache the descriptor ring offsets for SR-IOV to the assigned rings. It
38 * will also try to cache the proper offsets if RSS/FCoE are enabled along
42 static bool ixgbe_cache_ring_dcb_sriov(struct ixgbe_adapter
*adapter
)
45 struct ixgbe_ring_feature
*fcoe
= &adapter
->ring_feature
[RING_F_FCOE
];
46 #endif /* IXGBE_FCOE */
47 struct ixgbe_ring_feature
*vmdq
= &adapter
->ring_feature
[RING_F_VMDQ
];
50 u8 tcs
= adapter
->hw_tcs
;
52 /* verify we have DCB queueing enabled before proceeding */
56 /* verify we have VMDq enabled before proceeding */
57 if (!(adapter
->flags
& IXGBE_FLAG_SRIOV_ENABLED
))
60 /* start at VMDq register offset for SR-IOV enabled setups */
62 reg_idx
= vmdq
->offset
* __ALIGN_MASK(1, ~vmdq
->mask
);
63 for (i
= 0, pool
= 0; i
< adapter
->num_rx_queues
; i
++, reg_idx
++) {
64 /* If we are greater than indices move to next pool */
65 if ((reg_idx
& ~vmdq
->mask
) >= tcs
) {
67 reg_idx
= __ALIGN_MASK(reg_idx
, ~vmdq
->mask
);
69 adapter
->rx_ring
[i
]->reg_idx
= reg_idx
;
70 adapter
->rx_ring
[i
]->netdev
= pool
? NULL
: adapter
->netdev
;
73 reg_idx
= vmdq
->offset
* __ALIGN_MASK(1, ~vmdq
->mask
);
74 for (i
= 0; i
< adapter
->num_tx_queues
; i
++, reg_idx
++) {
75 /* If we are greater than indices move to next pool */
76 if ((reg_idx
& ~vmdq
->mask
) >= tcs
)
77 reg_idx
= __ALIGN_MASK(reg_idx
, ~vmdq
->mask
);
78 adapter
->tx_ring
[i
]->reg_idx
= reg_idx
;
82 /* nothing to do if FCoE is disabled */
83 if (!(adapter
->flags
& IXGBE_FLAG_FCOE_ENABLED
))
86 /* The work is already done if the FCoE ring is shared */
87 if (fcoe
->offset
< tcs
)
90 /* The FCoE rings exist separately, we need to move their reg_idx */
92 u16 queues_per_pool
= __ALIGN_MASK(1, ~vmdq
->mask
);
93 u8 fcoe_tc
= ixgbe_fcoe_get_tc(adapter
);
95 reg_idx
= (vmdq
->offset
+ vmdq
->indices
) * queues_per_pool
;
96 for (i
= fcoe
->offset
; i
< adapter
->num_rx_queues
; i
++) {
97 reg_idx
= __ALIGN_MASK(reg_idx
, ~vmdq
->mask
) + fcoe_tc
;
98 adapter
->rx_ring
[i
]->reg_idx
= reg_idx
;
99 adapter
->rx_ring
[i
]->netdev
= adapter
->netdev
;
103 reg_idx
= (vmdq
->offset
+ vmdq
->indices
) * queues_per_pool
;
104 for (i
= fcoe
->offset
; i
< adapter
->num_tx_queues
; i
++) {
105 reg_idx
= __ALIGN_MASK(reg_idx
, ~vmdq
->mask
) + fcoe_tc
;
106 adapter
->tx_ring
[i
]->reg_idx
= reg_idx
;
111 #endif /* IXGBE_FCOE */
115 /* ixgbe_get_first_reg_idx - Return first register index associated with ring */
116 static void ixgbe_get_first_reg_idx(struct ixgbe_adapter
*adapter
, u8 tc
,
117 unsigned int *tx
, unsigned int *rx
)
119 struct ixgbe_hw
*hw
= &adapter
->hw
;
120 u8 num_tcs
= adapter
->hw_tcs
;
125 switch (hw
->mac
.type
) {
126 case ixgbe_mac_82598EB
:
127 /* TxQs/TC: 4 RxQs/TC: 8 */
128 *tx
= tc
<< 2; /* 0, 4, 8, 12, 16, 20, 24, 28 */
129 *rx
= tc
<< 3; /* 0, 8, 16, 24, 32, 40, 48, 56 */
131 case ixgbe_mac_82599EB
:
134 case ixgbe_mac_X550EM_x
:
135 case ixgbe_mac_x550em_a
:
138 * TCs : TC0/1 TC2/3 TC4-7
144 *tx
= tc
<< 5; /* 0, 32, 64 */
146 *tx
= (tc
+ 2) << 4; /* 80, 96 */
148 *tx
= (tc
+ 8) << 3; /* 104, 112, 120 */
151 * TCs : TC0 TC1 TC2/3
157 *tx
= tc
<< 6; /* 0, 64 */
159 *tx
= (tc
+ 4) << 4; /* 96, 112 */
167 * ixgbe_cache_ring_dcb - Descriptor ring to register mapping for DCB
168 * @adapter: board private structure to initialize
170 * Cache the descriptor ring offsets for DCB to the assigned rings.
173 static bool ixgbe_cache_ring_dcb(struct ixgbe_adapter
*adapter
)
175 u8 num_tcs
= adapter
->hw_tcs
;
176 unsigned int tx_idx
, rx_idx
;
177 int tc
, offset
, rss_i
, i
;
179 /* verify we have DCB queueing enabled before proceeding */
183 rss_i
= adapter
->ring_feature
[RING_F_RSS
].indices
;
185 for (tc
= 0, offset
= 0; tc
< num_tcs
; tc
++, offset
+= rss_i
) {
186 ixgbe_get_first_reg_idx(adapter
, tc
, &tx_idx
, &rx_idx
);
187 for (i
= 0; i
< rss_i
; i
++, tx_idx
++, rx_idx
++) {
188 adapter
->tx_ring
[offset
+ i
]->reg_idx
= tx_idx
;
189 adapter
->rx_ring
[offset
+ i
]->reg_idx
= rx_idx
;
190 adapter
->rx_ring
[offset
+ i
]->netdev
= adapter
->netdev
;
191 adapter
->tx_ring
[offset
+ i
]->dcb_tc
= tc
;
192 adapter
->rx_ring
[offset
+ i
]->dcb_tc
= tc
;
201 * ixgbe_cache_ring_sriov - Descriptor ring to register mapping for sriov
202 * @adapter: board private structure to initialize
204 * SR-IOV doesn't use any descriptor rings but changes the default if
205 * no other mapping is used.
208 static bool ixgbe_cache_ring_sriov(struct ixgbe_adapter
*adapter
)
211 struct ixgbe_ring_feature
*fcoe
= &adapter
->ring_feature
[RING_F_FCOE
];
212 #endif /* IXGBE_FCOE */
213 struct ixgbe_ring_feature
*vmdq
= &adapter
->ring_feature
[RING_F_VMDQ
];
214 struct ixgbe_ring_feature
*rss
= &adapter
->ring_feature
[RING_F_RSS
];
218 /* only proceed if VMDq is enabled */
219 if (!(adapter
->flags
& IXGBE_FLAG_VMDQ_ENABLED
))
222 /* start at VMDq register offset for SR-IOV enabled setups */
224 reg_idx
= vmdq
->offset
* __ALIGN_MASK(1, ~vmdq
->mask
);
225 for (i
= 0; i
< adapter
->num_rx_queues
; i
++, reg_idx
++) {
227 /* Allow first FCoE queue to be mapped as RSS */
228 if (fcoe
->offset
&& (i
> fcoe
->offset
))
231 /* If we are greater than indices move to next pool */
232 if ((reg_idx
& ~vmdq
->mask
) >= rss
->indices
) {
234 reg_idx
= __ALIGN_MASK(reg_idx
, ~vmdq
->mask
);
236 adapter
->rx_ring
[i
]->reg_idx
= reg_idx
;
237 adapter
->rx_ring
[i
]->netdev
= pool
? NULL
: adapter
->netdev
;
241 /* FCoE uses a linear block of queues so just assigning 1:1 */
242 for (; i
< adapter
->num_rx_queues
; i
++, reg_idx
++) {
243 adapter
->rx_ring
[i
]->reg_idx
= reg_idx
;
244 adapter
->rx_ring
[i
]->netdev
= adapter
->netdev
;
248 reg_idx
= vmdq
->offset
* __ALIGN_MASK(1, ~vmdq
->mask
);
249 for (i
= 0; i
< adapter
->num_tx_queues
; i
++, reg_idx
++) {
251 /* Allow first FCoE queue to be mapped as RSS */
252 if (fcoe
->offset
&& (i
> fcoe
->offset
))
255 /* If we are greater than indices move to next pool */
256 if ((reg_idx
& rss
->mask
) >= rss
->indices
)
257 reg_idx
= __ALIGN_MASK(reg_idx
, ~vmdq
->mask
);
258 adapter
->tx_ring
[i
]->reg_idx
= reg_idx
;
262 /* FCoE uses a linear block of queues so just assigning 1:1 */
263 for (; i
< adapter
->num_tx_queues
; i
++, reg_idx
++)
264 adapter
->tx_ring
[i
]->reg_idx
= reg_idx
;
272 * ixgbe_cache_ring_rss - Descriptor ring to register mapping for RSS
273 * @adapter: board private structure to initialize
275 * Cache the descriptor ring offsets for RSS to the assigned rings.
278 static bool ixgbe_cache_ring_rss(struct ixgbe_adapter
*adapter
)
282 for (i
= 0; i
< adapter
->num_rx_queues
; i
++) {
283 adapter
->rx_ring
[i
]->reg_idx
= i
;
284 adapter
->rx_ring
[i
]->netdev
= adapter
->netdev
;
286 for (i
= 0, reg_idx
= 0; i
< adapter
->num_tx_queues
; i
++, reg_idx
++)
287 adapter
->tx_ring
[i
]->reg_idx
= reg_idx
;
288 for (i
= 0; i
< adapter
->num_xdp_queues
; i
++, reg_idx
++)
289 adapter
->xdp_ring
[i
]->reg_idx
= reg_idx
;
295 * ixgbe_cache_ring_register - Descriptor ring to register mapping
296 * @adapter: board private structure to initialize
298 * Once we know the feature-set enabled for the device, we'll cache
299 * the register offset the descriptor ring is assigned to.
301 * Note, the order the various feature calls is important. It must start with
302 * the "most" features enabled at the same time, then trickle down to the
303 * least amount of features turned on at once.
305 static void ixgbe_cache_ring_register(struct ixgbe_adapter
*adapter
)
307 /* start with default case */
308 adapter
->rx_ring
[0]->reg_idx
= 0;
309 adapter
->tx_ring
[0]->reg_idx
= 0;
311 #ifdef CONFIG_IXGBE_DCB
312 if (ixgbe_cache_ring_dcb_sriov(adapter
))
315 if (ixgbe_cache_ring_dcb(adapter
))
319 if (ixgbe_cache_ring_sriov(adapter
))
322 ixgbe_cache_ring_rss(adapter
);
325 static int ixgbe_xdp_queues(struct ixgbe_adapter
*adapter
)
327 return adapter
->xdp_prog
? nr_cpu_ids
: 0;
330 #define IXGBE_RSS_64Q_MASK 0x3F
331 #define IXGBE_RSS_16Q_MASK 0xF
332 #define IXGBE_RSS_8Q_MASK 0x7
333 #define IXGBE_RSS_4Q_MASK 0x3
334 #define IXGBE_RSS_2Q_MASK 0x1
335 #define IXGBE_RSS_DISABLED_MASK 0x0
337 #ifdef CONFIG_IXGBE_DCB
339 * ixgbe_set_dcb_sriov_queues: Allocate queues for SR-IOV devices w/ DCB
340 * @adapter: board private structure to initialize
342 * When SR-IOV (Single Root IO Virtualiztion) is enabled, allocate queues
343 * and VM pools where appropriate. Also assign queues based on DCB
344 * priorities and map accordingly..
347 static bool ixgbe_set_dcb_sriov_queues(struct ixgbe_adapter
*adapter
)
350 u16 vmdq_i
= adapter
->ring_feature
[RING_F_VMDQ
].limit
;
355 u8 tcs
= adapter
->hw_tcs
;
357 /* verify we have DCB queueing enabled before proceeding */
361 /* verify we have VMDq enabled before proceeding */
362 if (!(adapter
->flags
& IXGBE_FLAG_SRIOV_ENABLED
))
365 /* limit VMDq instances on the PF by number of Tx queues */
366 vmdq_i
= min_t(u16
, vmdq_i
, MAX_TX_QUEUES
/ tcs
);
368 /* Add starting offset to total pool count */
369 vmdq_i
+= adapter
->ring_feature
[RING_F_VMDQ
].offset
;
371 /* 16 pools w/ 8 TC per pool */
373 vmdq_i
= min_t(u16
, vmdq_i
, 16);
374 vmdq_m
= IXGBE_82599_VMDQ_8Q_MASK
;
375 /* 32 pools w/ 4 TC per pool */
377 vmdq_i
= min_t(u16
, vmdq_i
, 32);
378 vmdq_m
= IXGBE_82599_VMDQ_4Q_MASK
;
382 /* queues in the remaining pools are available for FCoE */
383 fcoe_i
= (128 / __ALIGN_MASK(1, ~vmdq_m
)) - vmdq_i
;
386 /* remove the starting offset from the pool count */
387 vmdq_i
-= adapter
->ring_feature
[RING_F_VMDQ
].offset
;
389 /* save features for later use */
390 adapter
->ring_feature
[RING_F_VMDQ
].indices
= vmdq_i
;
391 adapter
->ring_feature
[RING_F_VMDQ
].mask
= vmdq_m
;
394 * We do not support DCB, VMDq, and RSS all simultaneously
395 * so we will disable RSS since it is the lowest priority
397 adapter
->ring_feature
[RING_F_RSS
].indices
= 1;
398 adapter
->ring_feature
[RING_F_RSS
].mask
= IXGBE_RSS_DISABLED_MASK
;
400 /* disable ATR as it is not supported when VMDq is enabled */
401 adapter
->flags
&= ~IXGBE_FLAG_FDIR_HASH_CAPABLE
;
403 adapter
->num_rx_pools
= vmdq_i
;
404 adapter
->num_rx_queues_per_pool
= tcs
;
406 adapter
->num_tx_queues
= vmdq_i
* tcs
;
407 adapter
->num_xdp_queues
= 0;
408 adapter
->num_rx_queues
= vmdq_i
* tcs
;
411 if (adapter
->flags
& IXGBE_FLAG_FCOE_ENABLED
) {
412 struct ixgbe_ring_feature
*fcoe
;
414 fcoe
= &adapter
->ring_feature
[RING_F_FCOE
];
416 /* limit ourselves based on feature limits */
417 fcoe_i
= min_t(u16
, fcoe_i
, fcoe
->limit
);
420 /* alloc queues for FCoE separately */
421 fcoe
->indices
= fcoe_i
;
422 fcoe
->offset
= vmdq_i
* tcs
;
424 /* add queues to adapter */
425 adapter
->num_tx_queues
+= fcoe_i
;
426 adapter
->num_rx_queues
+= fcoe_i
;
427 } else if (tcs
> 1) {
428 /* use queue belonging to FcoE TC */
430 fcoe
->offset
= ixgbe_fcoe_get_tc(adapter
);
432 adapter
->flags
&= ~IXGBE_FLAG_FCOE_ENABLED
;
439 #endif /* IXGBE_FCOE */
440 /* configure TC to queue mapping */
441 for (i
= 0; i
< tcs
; i
++)
442 netdev_set_tc_queue(adapter
->netdev
, i
, 1, i
);
447 static bool ixgbe_set_dcb_queues(struct ixgbe_adapter
*adapter
)
449 struct net_device
*dev
= adapter
->netdev
;
450 struct ixgbe_ring_feature
*f
;
454 /* Map queue offset and counts onto allocated tx queues */
455 tcs
= adapter
->hw_tcs
;
457 /* verify we have DCB queueing enabled before proceeding */
461 /* determine the upper limit for our current DCB mode */
462 rss_i
= dev
->num_tx_queues
/ tcs
;
463 if (adapter
->hw
.mac
.type
== ixgbe_mac_82598EB
) {
464 /* 8 TC w/ 4 queues per TC */
465 rss_i
= min_t(u16
, rss_i
, 4);
466 rss_m
= IXGBE_RSS_4Q_MASK
;
467 } else if (tcs
> 4) {
468 /* 8 TC w/ 8 queues per TC */
469 rss_i
= min_t(u16
, rss_i
, 8);
470 rss_m
= IXGBE_RSS_8Q_MASK
;
472 /* 4 TC w/ 16 queues per TC */
473 rss_i
= min_t(u16
, rss_i
, 16);
474 rss_m
= IXGBE_RSS_16Q_MASK
;
477 /* set RSS mask and indices */
478 f
= &adapter
->ring_feature
[RING_F_RSS
];
479 rss_i
= min_t(int, rss_i
, f
->limit
);
483 /* disable ATR as it is not supported when multiple TCs are enabled */
484 adapter
->flags
&= ~IXGBE_FLAG_FDIR_HASH_CAPABLE
;
487 /* FCoE enabled queues require special configuration indexed
488 * by feature specific indices and offset. Here we map FCoE
489 * indices onto the DCB queue pairs allowing FCoE to own
490 * configuration later.
492 if (adapter
->flags
& IXGBE_FLAG_FCOE_ENABLED
) {
493 u8 tc
= ixgbe_fcoe_get_tc(adapter
);
495 f
= &adapter
->ring_feature
[RING_F_FCOE
];
496 f
->indices
= min_t(u16
, rss_i
, f
->limit
);
497 f
->offset
= rss_i
* tc
;
500 #endif /* IXGBE_FCOE */
501 for (i
= 0; i
< tcs
; i
++)
502 netdev_set_tc_queue(dev
, i
, rss_i
, rss_i
* i
);
504 adapter
->num_tx_queues
= rss_i
* tcs
;
505 adapter
->num_xdp_queues
= 0;
506 adapter
->num_rx_queues
= rss_i
* tcs
;
513 * ixgbe_set_sriov_queues - Allocate queues for SR-IOV devices
514 * @adapter: board private structure to initialize
516 * When SR-IOV (Single Root IO Virtualiztion) is enabled, allocate queues
517 * and VM pools where appropriate. If RSS is available, then also try and
518 * enable RSS and map accordingly.
521 static bool ixgbe_set_sriov_queues(struct ixgbe_adapter
*adapter
)
523 u16 vmdq_i
= adapter
->ring_feature
[RING_F_VMDQ
].limit
;
525 u16 rss_i
= adapter
->ring_feature
[RING_F_RSS
].limit
;
526 u16 rss_m
= IXGBE_RSS_DISABLED_MASK
;
531 /* only proceed if SR-IOV is enabled */
532 if (!(adapter
->flags
& IXGBE_FLAG_SRIOV_ENABLED
))
535 /* limit l2fwd RSS based on total Tx queue limit */
536 rss_i
= min_t(u16
, rss_i
, MAX_TX_QUEUES
/ vmdq_i
);
538 /* Add starting offset to total pool count */
539 vmdq_i
+= adapter
->ring_feature
[RING_F_VMDQ
].offset
;
541 /* double check we are limited to maximum pools */
542 vmdq_i
= min_t(u16
, IXGBE_MAX_VMDQ_INDICES
, vmdq_i
);
544 /* 64 pool mode with 2 queues per pool */
546 vmdq_m
= IXGBE_82599_VMDQ_2Q_MASK
;
547 rss_m
= IXGBE_RSS_2Q_MASK
;
548 rss_i
= min_t(u16
, rss_i
, 2);
549 /* 32 pool mode with up to 4 queues per pool */
551 vmdq_m
= IXGBE_82599_VMDQ_4Q_MASK
;
552 rss_m
= IXGBE_RSS_4Q_MASK
;
553 /* We can support 4, 2, or 1 queues */
554 rss_i
= (rss_i
> 3) ? 4 : (rss_i
> 1) ? 2 : 1;
558 /* queues in the remaining pools are available for FCoE */
559 fcoe_i
= 128 - (vmdq_i
* __ALIGN_MASK(1, ~vmdq_m
));
562 /* remove the starting offset from the pool count */
563 vmdq_i
-= adapter
->ring_feature
[RING_F_VMDQ
].offset
;
565 /* save features for later use */
566 adapter
->ring_feature
[RING_F_VMDQ
].indices
= vmdq_i
;
567 adapter
->ring_feature
[RING_F_VMDQ
].mask
= vmdq_m
;
569 /* limit RSS based on user input and save for later use */
570 adapter
->ring_feature
[RING_F_RSS
].indices
= rss_i
;
571 adapter
->ring_feature
[RING_F_RSS
].mask
= rss_m
;
573 adapter
->num_rx_pools
= vmdq_i
;
574 adapter
->num_rx_queues_per_pool
= rss_i
;
576 adapter
->num_rx_queues
= vmdq_i
* rss_i
;
577 adapter
->num_tx_queues
= vmdq_i
* rss_i
;
578 adapter
->num_xdp_queues
= 0;
580 /* disable ATR as it is not supported when VMDq is enabled */
581 adapter
->flags
&= ~IXGBE_FLAG_FDIR_HASH_CAPABLE
;
585 * FCoE can use rings from adjacent buffers to allow RSS
586 * like behavior. To account for this we need to add the
587 * FCoE indices to the total ring count.
589 if (adapter
->flags
& IXGBE_FLAG_FCOE_ENABLED
) {
590 struct ixgbe_ring_feature
*fcoe
;
592 fcoe
= &adapter
->ring_feature
[RING_F_FCOE
];
594 /* limit ourselves based on feature limits */
595 fcoe_i
= min_t(u16
, fcoe_i
, fcoe
->limit
);
597 if (vmdq_i
> 1 && fcoe_i
) {
598 /* alloc queues for FCoE separately */
599 fcoe
->indices
= fcoe_i
;
600 fcoe
->offset
= vmdq_i
* rss_i
;
602 /* merge FCoE queues with RSS queues */
603 fcoe_i
= min_t(u16
, fcoe_i
+ rss_i
, num_online_cpus());
605 /* limit indices to rss_i if MSI-X is disabled */
606 if (!(adapter
->flags
& IXGBE_FLAG_MSIX_ENABLED
))
609 /* attempt to reserve some queues for just FCoE */
610 fcoe
->indices
= min_t(u16
, fcoe_i
, fcoe
->limit
);
611 fcoe
->offset
= fcoe_i
- fcoe
->indices
;
616 /* add queues to adapter */
617 adapter
->num_tx_queues
+= fcoe_i
;
618 adapter
->num_rx_queues
+= fcoe_i
;
622 /* populate TC0 for use by pool 0 */
623 netdev_set_tc_queue(adapter
->netdev
, 0,
624 adapter
->num_rx_queues_per_pool
, 0);
630 * ixgbe_set_rss_queues - Allocate queues for RSS
631 * @adapter: board private structure to initialize
633 * This is our "base" multiqueue mode. RSS (Receive Side Scaling) will try
634 * to allocate one Rx queue per CPU, and if available, one Tx queue per CPU.
637 static bool ixgbe_set_rss_queues(struct ixgbe_adapter
*adapter
)
639 struct ixgbe_hw
*hw
= &adapter
->hw
;
640 struct ixgbe_ring_feature
*f
;
643 /* set mask for 16 queue limit of RSS */
644 f
= &adapter
->ring_feature
[RING_F_RSS
];
649 if (hw
->mac
.type
< ixgbe_mac_X550
)
650 f
->mask
= IXGBE_RSS_16Q_MASK
;
652 f
->mask
= IXGBE_RSS_64Q_MASK
;
654 /* disable ATR by default, it will be configured below */
655 adapter
->flags
&= ~IXGBE_FLAG_FDIR_HASH_CAPABLE
;
658 * Use Flow Director in addition to RSS to ensure the best
659 * distribution of flows across cores, even when an FDIR flow
662 if (rss_i
> 1 && adapter
->atr_sample_rate
) {
663 f
= &adapter
->ring_feature
[RING_F_FDIR
];
665 rss_i
= f
->indices
= f
->limit
;
667 if (!(adapter
->flags
& IXGBE_FLAG_FDIR_PERFECT_CAPABLE
))
668 adapter
->flags
|= IXGBE_FLAG_FDIR_HASH_CAPABLE
;
673 * FCoE can exist on the same rings as standard network traffic
674 * however it is preferred to avoid that if possible. In order
675 * to get the best performance we allocate as many FCoE queues
676 * as we can and we place them at the end of the ring array to
677 * avoid sharing queues with standard RSS on systems with 24 or
680 if (adapter
->flags
& IXGBE_FLAG_FCOE_ENABLED
) {
681 struct net_device
*dev
= adapter
->netdev
;
684 f
= &adapter
->ring_feature
[RING_F_FCOE
];
686 /* merge FCoE queues with RSS queues */
687 fcoe_i
= min_t(u16
, f
->limit
+ rss_i
, num_online_cpus());
688 fcoe_i
= min_t(u16
, fcoe_i
, dev
->num_tx_queues
);
690 /* limit indices to rss_i if MSI-X is disabled */
691 if (!(adapter
->flags
& IXGBE_FLAG_MSIX_ENABLED
))
694 /* attempt to reserve some queues for just FCoE */
695 f
->indices
= min_t(u16
, fcoe_i
, f
->limit
);
696 f
->offset
= fcoe_i
- f
->indices
;
697 rss_i
= max_t(u16
, fcoe_i
, rss_i
);
700 #endif /* IXGBE_FCOE */
701 adapter
->num_rx_queues
= rss_i
;
702 adapter
->num_tx_queues
= rss_i
;
703 adapter
->num_xdp_queues
= ixgbe_xdp_queues(adapter
);
709 * ixgbe_set_num_queues - Allocate queues for device, feature dependent
710 * @adapter: board private structure to initialize
712 * This is the top level queue allocation routine. The order here is very
713 * important, starting with the "most" number of features turned on at once,
714 * and ending with the smallest set of features. This way large combinations
715 * can be allocated if they're turned on, and smaller combinations are the
716 * fallthrough conditions.
719 static void ixgbe_set_num_queues(struct ixgbe_adapter
*adapter
)
721 /* Start with base case */
722 adapter
->num_rx_queues
= 1;
723 adapter
->num_tx_queues
= 1;
724 adapter
->num_xdp_queues
= 0;
725 adapter
->num_rx_pools
= 1;
726 adapter
->num_rx_queues_per_pool
= 1;
728 #ifdef CONFIG_IXGBE_DCB
729 if (ixgbe_set_dcb_sriov_queues(adapter
))
732 if (ixgbe_set_dcb_queues(adapter
))
736 if (ixgbe_set_sriov_queues(adapter
))
739 ixgbe_set_rss_queues(adapter
);
743 * ixgbe_acquire_msix_vectors - acquire MSI-X vectors
744 * @adapter: board private structure
746 * Attempts to acquire a suitable range of MSI-X vector interrupts. Will
747 * return a negative error code if unable to acquire MSI-X vectors for any
750 static int ixgbe_acquire_msix_vectors(struct ixgbe_adapter
*adapter
)
752 struct ixgbe_hw
*hw
= &adapter
->hw
;
753 int i
, vectors
, vector_threshold
;
755 /* We start by asking for one vector per queue pair with XDP queues
756 * being stacked with TX queues.
758 vectors
= max(adapter
->num_rx_queues
, adapter
->num_tx_queues
);
759 vectors
= max(vectors
, adapter
->num_xdp_queues
);
761 /* It is easy to be greedy for MSI-X vectors. However, it really
762 * doesn't do much good if we have a lot more vectors than CPUs. We'll
763 * be somewhat conservative and only ask for (roughly) the same number
764 * of vectors as there are CPUs.
766 vectors
= min_t(int, vectors
, num_online_cpus());
768 /* Some vectors are necessary for non-queue interrupts */
769 vectors
+= NON_Q_VECTORS
;
771 /* Hardware can only support a maximum of hw.mac->max_msix_vectors.
772 * With features such as RSS and VMDq, we can easily surpass the
773 * number of Rx and Tx descriptor queues supported by our device.
774 * Thus, we cap the maximum in the rare cases where the CPU count also
775 * exceeds our vector limit
777 vectors
= min_t(int, vectors
, hw
->mac
.max_msix_vectors
);
779 /* We want a minimum of two MSI-X vectors for (1) a TxQ[0] + RxQ[0]
780 * handler, and (2) an Other (Link Status Change, etc.) handler.
782 vector_threshold
= MIN_MSIX_COUNT
;
784 adapter
->msix_entries
= kcalloc(vectors
,
785 sizeof(struct msix_entry
),
787 if (!adapter
->msix_entries
)
790 for (i
= 0; i
< vectors
; i
++)
791 adapter
->msix_entries
[i
].entry
= i
;
793 vectors
= pci_enable_msix_range(adapter
->pdev
, adapter
->msix_entries
,
794 vector_threshold
, vectors
);
797 /* A negative count of allocated vectors indicates an error in
798 * acquiring within the specified range of MSI-X vectors
800 e_dev_warn("Failed to allocate MSI-X interrupts. Err: %d\n",
803 adapter
->flags
&= ~IXGBE_FLAG_MSIX_ENABLED
;
804 kfree(adapter
->msix_entries
);
805 adapter
->msix_entries
= NULL
;
810 /* we successfully allocated some number of vectors within our
813 adapter
->flags
|= IXGBE_FLAG_MSIX_ENABLED
;
815 /* Adjust for only the vectors we'll use, which is minimum
816 * of max_q_vectors, or the number of vectors we were allocated.
818 vectors
-= NON_Q_VECTORS
;
819 adapter
->num_q_vectors
= min_t(int, vectors
, adapter
->max_q_vectors
);
824 static void ixgbe_add_ring(struct ixgbe_ring
*ring
,
825 struct ixgbe_ring_container
*head
)
827 ring
->next
= head
->ring
;
830 head
->next_update
= jiffies
+ 1;
834 * ixgbe_alloc_q_vector - Allocate memory for a single interrupt vector
835 * @adapter: board private structure to initialize
836 * @v_count: q_vectors allocated on adapter, used for ring interleaving
837 * @v_idx: index of vector in adapter struct
838 * @txr_count: total number of Tx rings to allocate
839 * @txr_idx: index of first Tx ring to allocate
840 * @xdp_count: total number of XDP rings to allocate
841 * @xdp_idx: index of first XDP ring to allocate
842 * @rxr_count: total number of Rx rings to allocate
843 * @rxr_idx: index of first Rx ring to allocate
845 * We allocate one q_vector. If allocation fails we return -ENOMEM.
847 static int ixgbe_alloc_q_vector(struct ixgbe_adapter
*adapter
,
848 int v_count
, int v_idx
,
849 int txr_count
, int txr_idx
,
850 int xdp_count
, int xdp_idx
,
851 int rxr_count
, int rxr_idx
)
853 struct ixgbe_q_vector
*q_vector
;
854 struct ixgbe_ring
*ring
;
855 int node
= NUMA_NO_NODE
;
857 int ring_count
, size
;
858 u8 tcs
= adapter
->hw_tcs
;
860 ring_count
= txr_count
+ rxr_count
+ xdp_count
;
861 size
= sizeof(struct ixgbe_q_vector
) +
862 (sizeof(struct ixgbe_ring
) * ring_count
);
864 /* customize cpu for Flow Director mapping */
865 if ((tcs
<= 1) && !(adapter
->flags
& IXGBE_FLAG_SRIOV_ENABLED
)) {
866 u16 rss_i
= adapter
->ring_feature
[RING_F_RSS
].indices
;
867 if (rss_i
> 1 && adapter
->atr_sample_rate
) {
868 if (cpu_online(v_idx
)) {
870 node
= cpu_to_node(cpu
);
875 /* allocate q_vector and rings */
876 q_vector
= kzalloc_node(size
, GFP_KERNEL
, node
);
878 q_vector
= kzalloc(size
, GFP_KERNEL
);
882 /* setup affinity mask and node */
884 cpumask_set_cpu(cpu
, &q_vector
->affinity_mask
);
885 q_vector
->numa_node
= node
;
887 #ifdef CONFIG_IXGBE_DCA
888 /* initialize CPU for DCA */
892 /* initialize NAPI */
893 netif_napi_add(adapter
->netdev
, &q_vector
->napi
,
896 /* tie q_vector and adapter together */
897 adapter
->q_vector
[v_idx
] = q_vector
;
898 q_vector
->adapter
= adapter
;
899 q_vector
->v_idx
= v_idx
;
901 /* initialize work limits */
902 q_vector
->tx
.work_limit
= adapter
->tx_work_limit
;
904 /* Initialize setting for adaptive ITR */
905 q_vector
->tx
.itr
= IXGBE_ITR_ADAPTIVE_MAX_USECS
|
906 IXGBE_ITR_ADAPTIVE_LATENCY
;
907 q_vector
->rx
.itr
= IXGBE_ITR_ADAPTIVE_MAX_USECS
|
908 IXGBE_ITR_ADAPTIVE_LATENCY
;
911 if (txr_count
&& !rxr_count
) {
913 if (adapter
->tx_itr_setting
== 1)
914 q_vector
->itr
= IXGBE_12K_ITR
;
916 q_vector
->itr
= adapter
->tx_itr_setting
;
918 /* rx or rx/tx vector */
919 if (adapter
->rx_itr_setting
== 1)
920 q_vector
->itr
= IXGBE_20K_ITR
;
922 q_vector
->itr
= adapter
->rx_itr_setting
;
925 /* initialize pointer to rings */
926 ring
= q_vector
->ring
;
929 /* assign generic ring traits */
930 ring
->dev
= &adapter
->pdev
->dev
;
931 ring
->netdev
= adapter
->netdev
;
933 /* configure backlink on ring */
934 ring
->q_vector
= q_vector
;
936 /* update q_vector Tx values */
937 ixgbe_add_ring(ring
, &q_vector
->tx
);
939 /* apply Tx specific ring traits */
940 ring
->count
= adapter
->tx_ring_count
;
941 ring
->queue_index
= txr_idx
;
943 /* assign ring to adapter */
944 adapter
->tx_ring
[txr_idx
] = ring
;
946 /* update count and index */
950 /* push pointer to next ring */
955 /* assign generic ring traits */
956 ring
->dev
= &adapter
->pdev
->dev
;
957 ring
->netdev
= adapter
->netdev
;
959 /* configure backlink on ring */
960 ring
->q_vector
= q_vector
;
962 /* update q_vector Tx values */
963 ixgbe_add_ring(ring
, &q_vector
->tx
);
965 /* apply Tx specific ring traits */
966 ring
->count
= adapter
->tx_ring_count
;
967 ring
->queue_index
= xdp_idx
;
970 /* assign ring to adapter */
971 adapter
->xdp_ring
[xdp_idx
] = ring
;
973 /* update count and index */
977 /* push pointer to next ring */
982 /* assign generic ring traits */
983 ring
->dev
= &adapter
->pdev
->dev
;
984 ring
->netdev
= adapter
->netdev
;
986 /* configure backlink on ring */
987 ring
->q_vector
= q_vector
;
989 /* update q_vector Rx values */
990 ixgbe_add_ring(ring
, &q_vector
->rx
);
993 * 82599 errata, UDP frames with a 0 checksum
994 * can be marked as checksum errors.
996 if (adapter
->hw
.mac
.type
== ixgbe_mac_82599EB
)
997 set_bit(__IXGBE_RX_CSUM_UDP_ZERO_ERR
, &ring
->state
);
1000 if (adapter
->netdev
->features
& NETIF_F_FCOE_MTU
) {
1001 struct ixgbe_ring_feature
*f
;
1002 f
= &adapter
->ring_feature
[RING_F_FCOE
];
1003 if ((rxr_idx
>= f
->offset
) &&
1004 (rxr_idx
< f
->offset
+ f
->indices
))
1005 set_bit(__IXGBE_RX_FCOE
, &ring
->state
);
1008 #endif /* IXGBE_FCOE */
1009 /* apply Rx specific ring traits */
1010 ring
->count
= adapter
->rx_ring_count
;
1011 ring
->queue_index
= rxr_idx
;
1013 /* assign ring to adapter */
1014 adapter
->rx_ring
[rxr_idx
] = ring
;
1016 /* update count and index */
1020 /* push pointer to next ring */
1028 * ixgbe_free_q_vector - Free memory allocated for specific interrupt vector
1029 * @adapter: board private structure to initialize
1030 * @v_idx: Index of vector to be freed
1032 * This function frees the memory allocated to the q_vector. In addition if
1033 * NAPI is enabled it will delete any references to the NAPI struct prior
1034 * to freeing the q_vector.
1036 static void ixgbe_free_q_vector(struct ixgbe_adapter
*adapter
, int v_idx
)
1038 struct ixgbe_q_vector
*q_vector
= adapter
->q_vector
[v_idx
];
1039 struct ixgbe_ring
*ring
;
1041 ixgbe_for_each_ring(ring
, q_vector
->tx
) {
1042 if (ring_is_xdp(ring
))
1043 adapter
->xdp_ring
[ring
->queue_index
] = NULL
;
1045 adapter
->tx_ring
[ring
->queue_index
] = NULL
;
1048 ixgbe_for_each_ring(ring
, q_vector
->rx
)
1049 adapter
->rx_ring
[ring
->queue_index
] = NULL
;
1051 adapter
->q_vector
[v_idx
] = NULL
;
1052 napi_hash_del(&q_vector
->napi
);
1053 netif_napi_del(&q_vector
->napi
);
1056 * ixgbe_get_stats64() might access the rings on this vector,
1057 * we must wait a grace period before freeing it.
1059 kfree_rcu(q_vector
, rcu
);
1063 * ixgbe_alloc_q_vectors - Allocate memory for interrupt vectors
1064 * @adapter: board private structure to initialize
1066 * We allocate one q_vector per queue interrupt. If allocation fails we
1069 static int ixgbe_alloc_q_vectors(struct ixgbe_adapter
*adapter
)
1071 int q_vectors
= adapter
->num_q_vectors
;
1072 int rxr_remaining
= adapter
->num_rx_queues
;
1073 int txr_remaining
= adapter
->num_tx_queues
;
1074 int xdp_remaining
= adapter
->num_xdp_queues
;
1075 int rxr_idx
= 0, txr_idx
= 0, xdp_idx
= 0, v_idx
= 0;
1078 /* only one q_vector if MSI-X is disabled. */
1079 if (!(adapter
->flags
& IXGBE_FLAG_MSIX_ENABLED
))
1082 if (q_vectors
>= (rxr_remaining
+ txr_remaining
+ xdp_remaining
)) {
1083 for (; rxr_remaining
; v_idx
++) {
1084 err
= ixgbe_alloc_q_vector(adapter
, q_vectors
, v_idx
,
1085 0, 0, 0, 0, 1, rxr_idx
);
1090 /* update counts and index */
1096 for (; v_idx
< q_vectors
; v_idx
++) {
1097 int rqpv
= DIV_ROUND_UP(rxr_remaining
, q_vectors
- v_idx
);
1098 int tqpv
= DIV_ROUND_UP(txr_remaining
, q_vectors
- v_idx
);
1099 int xqpv
= DIV_ROUND_UP(xdp_remaining
, q_vectors
- v_idx
);
1101 err
= ixgbe_alloc_q_vector(adapter
, q_vectors
, v_idx
,
1109 /* update counts and index */
1110 rxr_remaining
-= rqpv
;
1111 txr_remaining
-= tqpv
;
1112 xdp_remaining
-= xqpv
;
1121 adapter
->num_tx_queues
= 0;
1122 adapter
->num_xdp_queues
= 0;
1123 adapter
->num_rx_queues
= 0;
1124 adapter
->num_q_vectors
= 0;
1127 ixgbe_free_q_vector(adapter
, v_idx
);
1133 * ixgbe_free_q_vectors - Free memory allocated for interrupt vectors
1134 * @adapter: board private structure to initialize
1136 * This function frees the memory allocated to the q_vectors. In addition if
1137 * NAPI is enabled it will delete any references to the NAPI struct prior
1138 * to freeing the q_vector.
1140 static void ixgbe_free_q_vectors(struct ixgbe_adapter
*adapter
)
1142 int v_idx
= adapter
->num_q_vectors
;
1144 adapter
->num_tx_queues
= 0;
1145 adapter
->num_xdp_queues
= 0;
1146 adapter
->num_rx_queues
= 0;
1147 adapter
->num_q_vectors
= 0;
1150 ixgbe_free_q_vector(adapter
, v_idx
);
1153 static void ixgbe_reset_interrupt_capability(struct ixgbe_adapter
*adapter
)
1155 if (adapter
->flags
& IXGBE_FLAG_MSIX_ENABLED
) {
1156 adapter
->flags
&= ~IXGBE_FLAG_MSIX_ENABLED
;
1157 pci_disable_msix(adapter
->pdev
);
1158 kfree(adapter
->msix_entries
);
1159 adapter
->msix_entries
= NULL
;
1160 } else if (adapter
->flags
& IXGBE_FLAG_MSI_ENABLED
) {
1161 adapter
->flags
&= ~IXGBE_FLAG_MSI_ENABLED
;
1162 pci_disable_msi(adapter
->pdev
);
1167 * ixgbe_set_interrupt_capability - set MSI-X or MSI if supported
1168 * @adapter: board private structure to initialize
1170 * Attempt to configure the interrupts using the best available
1171 * capabilities of the hardware and the kernel.
1173 static void ixgbe_set_interrupt_capability(struct ixgbe_adapter
*adapter
)
1177 /* We will try to get MSI-X interrupts first */
1178 if (!ixgbe_acquire_msix_vectors(adapter
))
1181 /* At this point, we do not have MSI-X capabilities. We need to
1182 * reconfigure or disable various features which require MSI-X
1186 /* Disable DCB unless we only have a single traffic class */
1187 if (adapter
->hw_tcs
> 1) {
1188 e_dev_warn("Number of DCB TCs exceeds number of available queues. Disabling DCB support.\n");
1189 netdev_reset_tc(adapter
->netdev
);
1191 if (adapter
->hw
.mac
.type
== ixgbe_mac_82598EB
)
1192 adapter
->hw
.fc
.requested_mode
= adapter
->last_lfc_mode
;
1194 adapter
->flags
&= ~IXGBE_FLAG_DCB_ENABLED
;
1195 adapter
->temp_dcb_cfg
.pfc_mode_enable
= false;
1196 adapter
->dcb_cfg
.pfc_mode_enable
= false;
1199 adapter
->hw_tcs
= 0;
1200 adapter
->dcb_cfg
.num_tcs
.pg_tcs
= 1;
1201 adapter
->dcb_cfg
.num_tcs
.pfc_tcs
= 1;
1203 /* Disable SR-IOV support */
1204 e_dev_warn("Disabling SR-IOV support\n");
1205 ixgbe_disable_sriov(adapter
);
1208 e_dev_warn("Disabling RSS support\n");
1209 adapter
->ring_feature
[RING_F_RSS
].limit
= 1;
1211 /* recalculate number of queues now that many features have been
1212 * changed or disabled.
1214 ixgbe_set_num_queues(adapter
);
1215 adapter
->num_q_vectors
= 1;
1217 err
= pci_enable_msi(adapter
->pdev
);
1219 e_dev_warn("Failed to allocate MSI interrupt, falling back to legacy. Error: %d\n",
1222 adapter
->flags
|= IXGBE_FLAG_MSI_ENABLED
;
1226 * ixgbe_init_interrupt_scheme - Determine proper interrupt scheme
1227 * @adapter: board private structure to initialize
1229 * We determine which interrupt scheme to use based on...
1230 * - Kernel support (MSI, MSI-X)
1231 * - which can be user-defined (via MODULE_PARAM)
1232 * - Hardware queue count (num_*_queues)
1233 * - defined by miscellaneous hardware support/features (RSS, etc.)
1235 int ixgbe_init_interrupt_scheme(struct ixgbe_adapter
*adapter
)
1239 /* Number of supported queues */
1240 ixgbe_set_num_queues(adapter
);
1242 /* Set interrupt mode */
1243 ixgbe_set_interrupt_capability(adapter
);
1245 err
= ixgbe_alloc_q_vectors(adapter
);
1247 e_dev_err("Unable to allocate memory for queue vectors\n");
1248 goto err_alloc_q_vectors
;
1251 ixgbe_cache_ring_register(adapter
);
1253 e_dev_info("Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u XDP Queue count = %u\n",
1254 (adapter
->num_rx_queues
> 1) ? "Enabled" : "Disabled",
1255 adapter
->num_rx_queues
, adapter
->num_tx_queues
,
1256 adapter
->num_xdp_queues
);
1258 set_bit(__IXGBE_DOWN
, &adapter
->state
);
1262 err_alloc_q_vectors
:
1263 ixgbe_reset_interrupt_capability(adapter
);
1268 * ixgbe_clear_interrupt_scheme - Clear the current interrupt scheme settings
1269 * @adapter: board private structure to clear interrupt scheme on
1271 * We go through and clear interrupt specific resources and reset the structure
1272 * to pre-load conditions
1274 void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter
*adapter
)
1276 adapter
->num_tx_queues
= 0;
1277 adapter
->num_xdp_queues
= 0;
1278 adapter
->num_rx_queues
= 0;
1280 ixgbe_free_q_vectors(adapter
);
1281 ixgbe_reset_interrupt_capability(adapter
);
1284 void ixgbe_tx_ctxtdesc(struct ixgbe_ring
*tx_ring
, u32 vlan_macip_lens
,
1285 u32 fceof_saidx
, u32 type_tucmd
, u32 mss_l4len_idx
)
1287 struct ixgbe_adv_tx_context_desc
*context_desc
;
1288 u16 i
= tx_ring
->next_to_use
;
1290 context_desc
= IXGBE_TX_CTXTDESC(tx_ring
, i
);
1293 tx_ring
->next_to_use
= (i
< tx_ring
->count
) ? i
: 0;
1295 /* set bits to identify this as an advanced context descriptor */
1296 type_tucmd
|= IXGBE_TXD_CMD_DEXT
| IXGBE_ADVTXD_DTYP_CTXT
;
1298 context_desc
->vlan_macip_lens
= cpu_to_le32(vlan_macip_lens
);
1299 context_desc
->fceof_saidx
= cpu_to_le32(fceof_saidx
);
1300 context_desc
->type_tucmd_mlhl
= cpu_to_le32(type_tucmd
);
1301 context_desc
->mss_l4len_idx
= cpu_to_le32(mss_l4len_idx
);