2 # SPDX-License-Identifier: GPL-2.0
4 # A test for switch behavior under MC overload. An issue in Spectrum chips
5 # causes throughput of UC traffic to drop severely when a switch is under heavy
6 # MC load. This issue can be overcome by putting the switch to MC-aware mode.
7 # This test verifies that UC performance stays intact even as the switch is
8 # under MC flood, and therefore that the MC-aware mode is enabled and correctly
11 # Because mlxsw throttles CPU port, the traffic can't actually reach userspace
12 # at full speed. That makes it impossible to use iperf3 to simply measure the
13 # throughput, because many packets (that reach $h3) don't get to the kernel at
14 # all even in UDP mode (the situation is even worse in TCP mode, where one can't
15 # hope to see more than a couple Mbps).
17 # So instead we send traffic with mausezahn and use RX ethtool counters at $h3.
18 # Multicast traffic is untagged, unicast traffic is tagged with PCP 1. Therefore
19 # each gets a different priority and we can use per-prio ethtool counters to
20 # measure the throughput. In order to avoid prioritizing unicast traffic, prio
21 # qdisc is installed on $swp3 and maps all priorities to the same band #7 (and
24 # Mausezahn can't actually saturate the links unless it's using large frames.
25 # Thus we set MTU to 10K on all involved interfaces. Then both unicast and
26 # multicast traffic uses 8K frames.
28 # +---------------------------+ +----------------------------------+
30 # | | | unicast --> + $h2.111 |
31 # | multicast | | traffic | 192.0.2.129/28 |
32 # | traffic | | | e-qos-map 0:1 |
33 # | $h1 + <----- | | | |
34 # | 192.0.2.65/28 | | | + $h2 |
35 # +---------------|-----------+ +--------------|-------------------+
37 # +---------------|---------------------------------------|-------------------+
39 # | >1Gbps | | >1Gbps |
40 # | +-------------|------+ +----------|----------------+ |
41 # | | $swp1.1 + | | + $swp2.111 | |
42 # | | BR1 | SW | BR111 | |
43 # | | $swp3.1 + | | + $swp3.111 | |
44 # | +-------------|------+ +----------|----------------+ |
45 # | \_______________________________________/ |
48 # | | 1Gbps bottleneck |
49 # | | prio qdisc: {0..7} -> 7 |
50 # +------------------------------------|--------------------------------------+
52 # +--|-----------------+
58 # +--------------------+
66 lib_dir
=$
(dirname $0)/..
/..
/..
/net
/forwarding
69 source $lib_dir/lib.sh
70 source $lib_dir/devlink_lib.sh
75 simple_if_init
$h1 192.0.2.65/28
82 simple_if_fini
$h1 192.0.2.65/28
90 vlan_create
$h2 111 v
$h2 192.0.2.129/28
91 ip link
set dev
$h2.111
type vlan egress-qos-map
0:1
104 simple_if_init
$h3 192.0.2.66/28
107 vlan_create
$h3 111 v
$h3 192.0.2.130/28
115 simple_if_fini
$h3 192.0.2.66/28
120 ip link
set dev
$swp1 up
123 ip link
set dev
$swp2 up
126 ip link
set dev
$swp3 up
129 vlan_create
$swp2 111
130 vlan_create
$swp3 111
132 ethtool
-s $swp3 speed
1000 autoneg off
133 tc qdisc replace dev
$swp3 root handle
3: \
134 prio bands
8 priomap
7 7 7 7 7 7 7 7
136 ip link add name br1
type bridge vlan_filtering
0
137 ip link
set dev br1 up
138 ip link
set dev
$swp1 master br1
139 ip link
set dev
$swp3 master br1
141 ip link add name br111
type bridge vlan_filtering
0
142 ip link
set dev br111 up
143 ip link
set dev
$swp2.111 master br111
144 ip link
set dev
$swp3.111 master br111
146 # Make sure that ingress quotas are smaller than egress so that there is
147 # room for both streams of traffic to be admitted to shared buffer.
148 devlink_port_pool_th_set
$swp1 0 5
149 devlink_tc_bind_pool_th_set
$swp1 0 ingress
0 5
151 devlink_port_pool_th_set
$swp2 0 5
152 devlink_tc_bind_pool_th_set
$swp2 1 ingress
0 5
154 devlink_port_pool_th_set
$swp3 4 12
159 devlink_port_pool_th_restore
$swp3 4
161 devlink_tc_bind_pool_th_restore
$swp2 1 ingress
162 devlink_port_pool_th_restore
$swp2 0
164 devlink_tc_bind_pool_th_restore
$swp1 0 ingress
165 devlink_port_pool_th_restore
$swp1 0
167 ip link del dev br111
170 tc qdisc del dev
$swp3 root handle
3:
171 ethtool
-s $swp3 autoneg on
173 vlan_destroy
$swp3 111
174 vlan_destroy
$swp2 111
177 ip link
set dev
$swp3 down
180 ip link
set dev
$swp2 down
183 ip link
set dev
$swp1 down
221 ping_test
$h2 192.0.2.130
229 start_traffic
$h2.111
192.0.2.129 192.0.2.130 $h3mac
230 uc_rate
=($
(measure_rate
$swp2 $h3 rx_octets_prio_1
"UC-only"))
231 check_err $?
"Could not get high enough UC-only ingress rate"
233 local ucth1
=${uc_rate[1]}
235 start_traffic
$h1 own
bc bc
238 local t0
=$
(ethtool_stats_get
$h3 rx_octets_prio_0
)
239 local u0
=$
(ethtool_stats_get
$swp1 rx_octets_prio_0
)
242 start_traffic
$h2.111
192.0.2.129 192.0.2.130 $h3mac
243 uc_rate_2
=($
(measure_rate
$swp2 $h3 rx_octets_prio_1
"UC+MC"))
244 check_err $?
"Could not get high enough UC+MC ingress rate"
246 local ucth2
=${uc_rate_2[1]}
249 local t1
=$
(ethtool_stats_get
$h3 rx_octets_prio_0
)
250 local u1
=$
(ethtool_stats_get
$swp1 rx_octets_prio_0
)
254 ret = 100 * ($ucth1 - $ucth2) / $ucth1
255 if (ret > 0) { ret } else { 0 }
257 check_err $
(bc <<< "$deg > 25")
259 local interval
=$
((d1
- d0
))
260 local mc_ir
=$
(rate
$u0 $u1 $interval)
261 local mc_er
=$
(rate
$t0 $t1 $interval)
265 log_test
"UC performance under MC overload"
267 echo "UC-only throughput $(humanize $ucth1)"
268 echo "UC+MC throughput $(humanize $ucth2)"
269 echo "Degradation $deg %"
273 echo " ingress UC throughput $(humanize ${uc_rate[0]})"
274 echo " egress UC throughput $(humanize ${uc_rate[1]})"
276 echo " ingress UC throughput $(humanize ${uc_rate_2[0]})"
277 echo " egress UC throughput $(humanize ${uc_rate_2[1]})"
278 echo " ingress MC throughput $(humanize $mc_ir)"
279 echo " egress MC throughput $(humanize $mc_er)"
287 start_traffic
$h2.111
192.0.2.129 192.0.2.130 $h3mac
290 local t0
=$
(ethtool_stats_get
$h3 rx_octets_prio_1
)
291 local u0
=$
(ethtool_stats_get
$swp2 rx_octets_prio_1
)
298 for ((i
= 0; i
< attempts
; ++i
)); do
299 if $ARPING -c 1 -I $h1 -b 192.0.2.66 -q -w 0.1; then
307 local t1
=$
(ethtool_stats_get
$h3 rx_octets_prio_1
)
308 local u1
=$
(ethtool_stats_get
$swp2 rx_octets_prio_1
)
310 local interval
=$
((d1
- d0
))
311 local uc_ir
=$
(rate
$u0 $u1 $interval)
312 local uc_er
=$
(rate
$t0 $t1 $interval)
314 ((attempts
== passes
))
319 log_test
"MC performance under UC overload"
320 echo " ingress UC throughput $(humanize ${uc_ir})"
321 echo " egress UC throughput $(humanize ${uc_er})"
322 echo " sent $attempts BC ARPs, got $passes responses"