4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 * IEEE 802.3ad Link Aggregation - Send code.
29 * Implements the Distributor function.
33 #include <sys/modctl.h>
34 #include <sys/sunddi.h>
35 #include <sys/callb.h>
37 #include <sys/strsun.h>
38 #include <sys/strsubr.h>
41 #include <inet/common.h>
46 #include <netinet/udp.h>
49 #include <sys/aggr_impl.h>
52 * Update the TX load balancing policy of the specified group.
55 aggr_send_update_policy(aggr_grp_t
*grp
, uint32_t policy
)
57 uint8_t mac_policy
= 0;
59 ASSERT(MAC_PERIM_HELD(grp
->lg_mh
));
61 if ((policy
& AGGR_POLICY_L2
) != 0)
62 mac_policy
|= MAC_PKT_HASH_L2
;
63 if ((policy
& AGGR_POLICY_L3
) != 0)
64 mac_policy
|= MAC_PKT_HASH_L3
;
65 if ((policy
& AGGR_POLICY_L4
) != 0)
66 mac_policy
|= MAC_PKT_HASH_L4
;
68 grp
->lg_tx_policy
= policy
;
69 grp
->lg_mac_tx_policy
= mac_policy
;
72 #define HASH_HINT(hint) \
73 ((hint) ^ ((hint) >> 24) ^ ((hint) >> 16) ^ ((hint) >> 8))
76 * Function invoked by mac layer to find a specific TX ring on a port
80 aggr_find_tx_ring(void *arg
, mblk_t
*mp
, uintptr_t hint
, mac_ring_handle_t
*rh
)
82 aggr_grp_t
*grp
= arg
;
86 rw_enter(&grp
->lg_tx_lock
, RW_READER
);
87 if (grp
->lg_ntx_ports
== 0) {
89 * We could have returned from aggr_m_start() before
90 * the ports were actually attached. Drop the chain.
92 rw_exit(&grp
->lg_tx_lock
);
96 hash
= mac_pkt_hash(DL_ETHER
, mp
, grp
->lg_mac_tx_policy
, B_TRUE
);
97 port
= grp
->lg_tx_ports
[hash
% grp
->lg_ntx_ports
];
100 * Use hash as the hint so to direct traffic to
101 * different TX rings. Note below bit operation
102 * is needed in case hint is 0 to get the most
103 * benefit from HASH_HINT() algorithm.
105 if (port
->lp_tx_ring_cnt
> 1) {
107 hash
= (hash
<< 24 | hash
<< 16 | hash
);
108 hash
= (hash
<< 32 | hash
);
112 hash
= HASH_HINT(hash
);
113 *rh
= port
->lp_pseudo_tx_rings
[hash
% port
->lp_tx_ring_cnt
];
115 *rh
= port
->lp_pseudo_tx_rings
[0];
117 rw_exit(&grp
->lg_tx_lock
);
123 * aggr_tx_notify_thread:
125 * aggr_tx_ring_update() callback function wakes up this thread when
126 * it gets called. This thread will call mac_tx_ring_update() to
127 * notify upper mac of flow control getting relieved. Note that
128 * aggr_tx_ring_update() cannot call mac_tx_ring_update() directly
129 * because aggr_tx_ring_update() is called from lower mac with
133 aggr_tx_notify_thread(void *arg
)
136 aggr_grp_t
*grp
= (aggr_grp_t
*)arg
;
137 mac_ring_handle_t pseudo_mrh
;
139 CALLB_CPR_INIT(&cprinfo
, &grp
->lg_tx_flowctl_lock
, callb_generic_cpr
,
140 "aggr_tx_notify_thread");
142 mutex_enter(&grp
->lg_tx_flowctl_lock
);
143 while (!grp
->lg_tx_notify_done
) {
144 if ((grp
->lg_tx_blocked_cnt
) == 0) {
145 CALLB_CPR_SAFE_BEGIN(&cprinfo
);
146 cv_wait(&grp
->lg_tx_flowctl_cv
,
147 &grp
->lg_tx_flowctl_lock
);
148 CALLB_CPR_SAFE_END(&cprinfo
, &grp
->lg_tx_flowctl_lock
);
151 while (grp
->lg_tx_blocked_cnt
!= 0) {
152 grp
->lg_tx_blocked_cnt
--;
154 grp
->lg_tx_blocked_rings
[grp
->lg_tx_blocked_cnt
];
155 mutex_exit(&grp
->lg_tx_flowctl_lock
);
156 mac_tx_ring_update(grp
->lg_mh
, pseudo_mrh
);
157 mutex_enter(&grp
->lg_tx_flowctl_lock
);
161 * The grp is being destroyed, exit the thread.
163 grp
->lg_tx_notify_thread
= NULL
;
164 CALLB_CPR_EXIT(&cprinfo
);
169 * Callback function registered with lower mac to receive wakeups from
170 * drivers when flow control is relieved (i.e. Tx descriptors are
174 aggr_tx_ring_update(void *arg1
, uintptr_t arg2
)
176 aggr_port_t
*port
= (aggr_port_t
*)arg1
;
177 mac_ring_handle_t mrh
= (mac_ring_handle_t
)arg2
;
178 mac_ring_handle_t pseudo_mrh
;
179 aggr_grp_t
*grp
= port
->lp_grp
;
184 * If the underlying NIC does not expose TX rings,
185 * still as pseudo TX ring is presented to the
188 pseudo_mrh
= port
->lp_pseudo_tx_rings
[0];
190 for (i
= 0; i
< port
->lp_tx_ring_cnt
; i
++) {
191 if (port
->lp_tx_rings
[i
] == mrh
)
194 ASSERT(i
< port
->lp_tx_ring_cnt
);
195 pseudo_mrh
= port
->lp_pseudo_tx_rings
[i
];
197 mutex_enter(&grp
->lg_tx_flowctl_lock
);
199 * It could be possible that some (broken?) device driver
200 * could send more than one wakeup on the same ring. In
201 * such a case, multiple instances of the same pseudo TX
202 * ring should not be saved in lg_tx_blocked_rings[]
203 * array. So first check if woken up ring (pseudo_mrh) is
204 * already in the lg_tx_blocked_rings[] array.
206 for (i
= 0; i
< grp
->lg_tx_blocked_cnt
; i
++) {
207 if (grp
->lg_tx_blocked_rings
[i
] == pseudo_mrh
) {
208 mutex_exit(&grp
->lg_tx_flowctl_lock
);
212 /* A distinct mac_ring_handle. Save and increment count */
213 grp
->lg_tx_blocked_rings
[grp
->lg_tx_blocked_cnt
] = pseudo_mrh
;
214 grp
->lg_tx_blocked_cnt
++;
215 cv_signal(&grp
->lg_tx_flowctl_cv
);
216 mutex_exit(&grp
->lg_tx_flowctl_lock
);
220 * Send function invoked by the MAC service module.
223 aggr_ring_tx(void *arg
, mblk_t
*mp
)
225 aggr_pseudo_tx_ring_t
*pseudo_ring
= (aggr_pseudo_tx_ring_t
*)arg
;
226 aggr_port_t
*port
= pseudo_ring
->atr_port
;
228 return (mac_hwring_send_priv(port
->lp_mch
, pseudo_ring
->atr_hw_rh
, mp
));
232 * Enable sending on the specified port.
235 aggr_send_port_enable(aggr_port_t
*port
)
237 aggr_grp_t
*grp
= port
->lp_grp
;
239 ASSERT(MAC_PERIM_HELD(grp
->lg_mh
));
241 if (port
->lp_tx_enabled
|| (port
->lp_state
!=
242 AGGR_PORT_STATE_ATTACHED
)) {
243 /* already enabled or port not yet attached */
248 * Add to group's array of tx ports.
250 rw_enter(&grp
->lg_tx_lock
, RW_WRITER
);
251 if (grp
->lg_tx_ports_size
< grp
->lg_ntx_ports
+1) {
252 /* current array too small */
253 aggr_port_t
**new_ports
;
256 new_size
= grp
->lg_ntx_ports
+1;
257 new_ports
= kmem_zalloc(new_size
* sizeof (aggr_port_t
*),
260 if (grp
->lg_tx_ports_size
> 0) {
261 ASSERT(grp
->lg_tx_ports
!= NULL
);
262 bcopy(grp
->lg_tx_ports
, new_ports
,
263 grp
->lg_ntx_ports
* sizeof (aggr_port_t
*));
264 kmem_free(grp
->lg_tx_ports
,
265 grp
->lg_tx_ports_size
* sizeof (aggr_port_t
*));
268 grp
->lg_tx_ports
= new_ports
;
269 grp
->lg_tx_ports_size
= new_size
;
272 grp
->lg_tx_ports
[grp
->lg_ntx_ports
++] = port
;
273 port
->lp_tx_idx
= grp
->lg_ntx_ports
-1;
274 rw_exit(&grp
->lg_tx_lock
);
276 port
->lp_tx_enabled
= B_TRUE
;
278 aggr_grp_update_default(grp
);
282 * Disable sending from the specified port.
285 aggr_send_port_disable(aggr_port_t
*port
)
288 aggr_grp_t
*grp
= port
->lp_grp
;
290 ASSERT(MAC_PERIM_HELD(grp
->lg_mh
));
291 ASSERT(MAC_PERIM_HELD(port
->lp_mh
));
293 if (!port
->lp_tx_enabled
) {
294 /* not yet enabled */
298 rw_enter(&grp
->lg_tx_lock
, RW_WRITER
);
299 idx
= port
->lp_tx_idx
;
300 ntx
= grp
->lg_ntx_ports
;
303 /* remove from array of attached ports */
304 if (idx
== (ntx
- 1)) {
305 grp
->lg_tx_ports
[idx
] = NULL
;
307 /* not the last entry, replace with last one */
310 victim
= grp
->lg_tx_ports
[ntx
- 1];
311 grp
->lg_tx_ports
[ntx
- 1] = NULL
;
312 victim
->lp_tx_idx
= idx
;
313 grp
->lg_tx_ports
[idx
] = victim
;
318 rw_exit(&grp
->lg_tx_lock
);
320 port
->lp_tx_enabled
= B_FALSE
;
322 aggr_grp_update_default(grp
);