Merge remote-tracking branch 'origin/master'
[unleashed/lotheac.git] / usr / src / uts / common / io / aggr / aggr_send.c
blob4095e8fda5fd6f5b2e37e69a6295ded74f04d851
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 * IEEE 802.3ad Link Aggregation - Send code.
29 * Implements the Distributor function.
32 #include <sys/conf.h>
33 #include <sys/modctl.h>
34 #include <sys/sunddi.h>
35 #include <sys/callb.h>
36 #include <sys/vlan.h>
37 #include <sys/strsun.h>
38 #include <sys/strsubr.h>
39 #include <sys/dlpi.h>
41 #include <inet/common.h>
42 #include <inet/led.h>
43 #include <inet/ip.h>
44 #include <inet/ip6.h>
45 #include <inet/tcp.h>
46 #include <netinet/udp.h>
48 #include <sys/aggr.h>
49 #include <sys/aggr_impl.h>
52 * Update the TX load balancing policy of the specified group.
54 void
55 aggr_send_update_policy(aggr_grp_t *grp, uint32_t policy)
57 uint8_t mac_policy = 0;
59 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
61 if ((policy & AGGR_POLICY_L2) != 0)
62 mac_policy |= MAC_PKT_HASH_L2;
63 if ((policy & AGGR_POLICY_L3) != 0)
64 mac_policy |= MAC_PKT_HASH_L3;
65 if ((policy & AGGR_POLICY_L4) != 0)
66 mac_policy |= MAC_PKT_HASH_L4;
68 grp->lg_tx_policy = policy;
69 grp->lg_mac_tx_policy = mac_policy;
72 #define HASH_HINT(hint) \
73 ((hint) ^ ((hint) >> 24) ^ ((hint) >> 16) ^ ((hint) >> 8))
76 * Function invoked by mac layer to find a specific TX ring on a port
77 * to send data.
79 mblk_t *
80 aggr_find_tx_ring(void *arg, mblk_t *mp, uintptr_t hint, mac_ring_handle_t *rh)
82 aggr_grp_t *grp = arg;
83 aggr_port_t *port;
84 uint64_t hash;
86 rw_enter(&grp->lg_tx_lock, RW_READER);
87 if (grp->lg_ntx_ports == 0) {
89 * We could have returned from aggr_m_start() before
90 * the ports were actually attached. Drop the chain.
92 rw_exit(&grp->lg_tx_lock);
93 freemsgchain(mp);
94 return (NULL);
96 hash = mac_pkt_hash(DL_ETHER, mp, grp->lg_mac_tx_policy, B_TRUE);
97 port = grp->lg_tx_ports[hash % grp->lg_ntx_ports];
100 * Use hash as the hint so to direct traffic to
101 * different TX rings. Note below bit operation
102 * is needed in case hint is 0 to get the most
103 * benefit from HASH_HINT() algorithm.
105 if (port->lp_tx_ring_cnt > 1) {
106 if (hint == 0) {
107 hash = (hash << 24 | hash << 16 | hash);
108 hash = (hash << 32 | hash);
109 } else {
110 hash = hint;
112 hash = HASH_HINT(hash);
113 *rh = port->lp_pseudo_tx_rings[hash % port->lp_tx_ring_cnt];
114 } else {
115 *rh = port->lp_pseudo_tx_rings[0];
117 rw_exit(&grp->lg_tx_lock);
119 return (mp);
123 * aggr_tx_notify_thread:
125 * aggr_tx_ring_update() callback function wakes up this thread when
126 * it gets called. This thread will call mac_tx_ring_update() to
127 * notify upper mac of flow control getting relieved. Note that
128 * aggr_tx_ring_update() cannot call mac_tx_ring_update() directly
129 * because aggr_tx_ring_update() is called from lower mac with
130 * mi_rw_lock held.
132 void
133 aggr_tx_notify_thread(void *arg)
135 callb_cpr_t cprinfo;
136 aggr_grp_t *grp = (aggr_grp_t *)arg;
137 mac_ring_handle_t pseudo_mrh;
139 CALLB_CPR_INIT(&cprinfo, &grp->lg_tx_flowctl_lock, callb_generic_cpr,
140 "aggr_tx_notify_thread");
142 mutex_enter(&grp->lg_tx_flowctl_lock);
143 while (!grp->lg_tx_notify_done) {
144 if ((grp->lg_tx_blocked_cnt) == 0) {
145 CALLB_CPR_SAFE_BEGIN(&cprinfo);
146 cv_wait(&grp->lg_tx_flowctl_cv,
147 &grp->lg_tx_flowctl_lock);
148 CALLB_CPR_SAFE_END(&cprinfo, &grp->lg_tx_flowctl_lock);
149 continue;
151 while (grp->lg_tx_blocked_cnt != 0) {
152 grp->lg_tx_blocked_cnt--;
153 pseudo_mrh =
154 grp->lg_tx_blocked_rings[grp->lg_tx_blocked_cnt];
155 mutex_exit(&grp->lg_tx_flowctl_lock);
156 mac_tx_ring_update(grp->lg_mh, pseudo_mrh);
157 mutex_enter(&grp->lg_tx_flowctl_lock);
161 * The grp is being destroyed, exit the thread.
163 grp->lg_tx_notify_thread = NULL;
164 CALLB_CPR_EXIT(&cprinfo);
165 thread_exit();
169 * Callback function registered with lower mac to receive wakeups from
170 * drivers when flow control is relieved (i.e. Tx descriptors are
171 * available).
173 void
174 aggr_tx_ring_update(void *arg1, uintptr_t arg2)
176 aggr_port_t *port = (aggr_port_t *)arg1;
177 mac_ring_handle_t mrh = (mac_ring_handle_t)arg2;
178 mac_ring_handle_t pseudo_mrh;
179 aggr_grp_t *grp = port->lp_grp;
180 int i = 0;
182 if (mrh == NULL) {
184 * If the underlying NIC does not expose TX rings,
185 * still as pseudo TX ring is presented to the
186 * aggr mac.
188 pseudo_mrh = port->lp_pseudo_tx_rings[0];
189 } else {
190 for (i = 0; i < port->lp_tx_ring_cnt; i++) {
191 if (port->lp_tx_rings[i] == mrh)
192 break;
194 ASSERT(i < port->lp_tx_ring_cnt);
195 pseudo_mrh = port->lp_pseudo_tx_rings[i];
197 mutex_enter(&grp->lg_tx_flowctl_lock);
199 * It could be possible that some (broken?) device driver
200 * could send more than one wakeup on the same ring. In
201 * such a case, multiple instances of the same pseudo TX
202 * ring should not be saved in lg_tx_blocked_rings[]
203 * array. So first check if woken up ring (pseudo_mrh) is
204 * already in the lg_tx_blocked_rings[] array.
206 for (i = 0; i < grp->lg_tx_blocked_cnt; i++) {
207 if (grp->lg_tx_blocked_rings[i] == pseudo_mrh) {
208 mutex_exit(&grp->lg_tx_flowctl_lock);
209 return;
212 /* A distinct mac_ring_handle. Save and increment count */
213 grp->lg_tx_blocked_rings[grp->lg_tx_blocked_cnt] = pseudo_mrh;
214 grp->lg_tx_blocked_cnt++;
215 cv_signal(&grp->lg_tx_flowctl_cv);
216 mutex_exit(&grp->lg_tx_flowctl_lock);
220 * Send function invoked by the MAC service module.
222 mblk_t *
223 aggr_ring_tx(void *arg, mblk_t *mp)
225 aggr_pseudo_tx_ring_t *pseudo_ring = (aggr_pseudo_tx_ring_t *)arg;
226 aggr_port_t *port = pseudo_ring->atr_port;
228 return (mac_hwring_send_priv(port->lp_mch, pseudo_ring->atr_hw_rh, mp));
232 * Enable sending on the specified port.
234 void
235 aggr_send_port_enable(aggr_port_t *port)
237 aggr_grp_t *grp = port->lp_grp;
239 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
241 if (port->lp_tx_enabled || (port->lp_state !=
242 AGGR_PORT_STATE_ATTACHED)) {
243 /* already enabled or port not yet attached */
244 return;
248 * Add to group's array of tx ports.
250 rw_enter(&grp->lg_tx_lock, RW_WRITER);
251 if (grp->lg_tx_ports_size < grp->lg_ntx_ports+1) {
252 /* current array too small */
253 aggr_port_t **new_ports;
254 uint_t new_size;
256 new_size = grp->lg_ntx_ports+1;
257 new_ports = kmem_zalloc(new_size * sizeof (aggr_port_t *),
258 KM_SLEEP);
260 if (grp->lg_tx_ports_size > 0) {
261 ASSERT(grp->lg_tx_ports != NULL);
262 bcopy(grp->lg_tx_ports, new_ports,
263 grp->lg_ntx_ports * sizeof (aggr_port_t *));
264 kmem_free(grp->lg_tx_ports,
265 grp->lg_tx_ports_size * sizeof (aggr_port_t *));
268 grp->lg_tx_ports = new_ports;
269 grp->lg_tx_ports_size = new_size;
272 grp->lg_tx_ports[grp->lg_ntx_ports++] = port;
273 port->lp_tx_idx = grp->lg_ntx_ports-1;
274 rw_exit(&grp->lg_tx_lock);
276 port->lp_tx_enabled = B_TRUE;
278 aggr_grp_update_default(grp);
282 * Disable sending from the specified port.
284 void
285 aggr_send_port_disable(aggr_port_t *port)
287 uint_t idx, ntx;
288 aggr_grp_t *grp = port->lp_grp;
290 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
291 ASSERT(MAC_PERIM_HELD(port->lp_mh));
293 if (!port->lp_tx_enabled) {
294 /* not yet enabled */
295 return;
298 rw_enter(&grp->lg_tx_lock, RW_WRITER);
299 idx = port->lp_tx_idx;
300 ntx = grp->lg_ntx_ports;
301 ASSERT(idx < ntx);
303 /* remove from array of attached ports */
304 if (idx == (ntx - 1)) {
305 grp->lg_tx_ports[idx] = NULL;
306 } else {
307 /* not the last entry, replace with last one */
308 aggr_port_t *victim;
310 victim = grp->lg_tx_ports[ntx - 1];
311 grp->lg_tx_ports[ntx - 1] = NULL;
312 victim->lp_tx_idx = idx;
313 grp->lg_tx_ports[idx] = victim;
316 port->lp_tx_idx = 0;
317 grp->lg_ntx_ports--;
318 rw_exit(&grp->lg_tx_lock);
320 port->lp_tx_enabled = B_FALSE;
322 aggr_grp_update_default(grp);