8354 sync regcomp(3C) with upstream (fix make catalog)
[unleashed/tickless.git] / usr / src / uts / intel / io / vmxnet3s / vmxnet3_tx.c
blob4789bf55ef9d9d2f632d1c2fe28fda6abd7176f7
1 /*
2 * Copyright (C) 2007 VMware, Inc. All rights reserved.
4 * The contents of this file are subject to the terms of the Common
5 * Development and Distribution License (the "License") version 1.0
6 * and no later version. You may not use this file except in
7 * compliance with the License.
9 * You can obtain a copy of the License at
10 * http://www.opensource.org/licenses/cddl1.php
12 * See the License for the specific language governing permissions
13 * and limitations under the License.
17 * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
20 #include <vmxnet3.h>
22 typedef enum vmxnet3_txstatus {
23 VMXNET3_TX_OK,
24 VMXNET3_TX_FAILURE,
25 VMXNET3_TX_PULLUP,
26 VMXNET3_TX_RINGFULL
27 } vmxnet3_txstatus;
29 typedef struct vmxnet3_offload_t {
30 uint16_t om;
31 uint16_t hlen;
32 uint16_t msscof;
33 } vmxnet3_offload_t;
36 * Initialize a TxQueue. Currently nothing needs to be done.
38 /* ARGSUSED */
39 int
40 vmxnet3_txqueue_init(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq)
42 return (0);
46 * Finish a TxQueue by freeing all pending Tx.
48 void
49 vmxnet3_txqueue_fini(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq)
51 unsigned int i;
53 ASSERT(!dp->devEnabled);
55 for (i = 0; i < txq->cmdRing.size; i++) {
56 mblk_t *mp = txq->metaRing[i].mp;
57 if (mp) {
58 freemsg(mp);
64 * Build the offload context of a msg.
66 * Returns:
67 * 0 if everything went well.
68 * +n if n bytes need to be pulled up.
69 * -1 in case of error (not used).
71 static int
72 vmxnet3_tx_prepare_offload(vmxnet3_softc_t *dp, vmxnet3_offload_t *ol,
73 mblk_t *mp)
75 int ret = 0;
76 uint32_t start, stuff, value, flags, lso_flag, mss;
78 ol->om = VMXNET3_OM_NONE;
79 ol->hlen = 0;
80 ol->msscof = 0;
82 hcksum_retrieve(mp, NULL, NULL, &start, &stuff, NULL, &value, &flags);
84 mac_lso_get(mp, &mss, &lso_flag);
86 if (flags || lso_flag) {
87 struct ether_vlan_header *eth = (void *) mp->b_rptr;
88 uint8_t ethLen;
90 if (eth->ether_tpid == htons(ETHERTYPE_VLAN)) {
91 ethLen = sizeof (struct ether_vlan_header);
92 } else {
93 ethLen = sizeof (struct ether_header);
96 VMXNET3_DEBUG(dp, 4, "flags=0x%x, ethLen=%u, start=%u, "
97 "stuff=%u, value=%u\n", flags, ethLen, start, stuff, value);
99 if (lso_flag & HW_LSO) {
100 mblk_t *mblk = mp;
101 uint8_t *ip, *tcp;
102 uint8_t ipLen, tcpLen;
105 * Copy e1000g's behavior:
106 * - Do not assume all the headers are in the same mblk.
107 * - Assume each header is always within one mblk.
108 * - Assume the ethernet header is in the first mblk.
110 ip = mblk->b_rptr + ethLen;
111 if (ip >= mblk->b_wptr) {
112 mblk = mblk->b_cont;
113 ip = mblk->b_rptr;
115 ipLen = IPH_HDR_LENGTH((ipha_t *)ip);
116 tcp = ip + ipLen;
117 if (tcp >= mblk->b_wptr) {
118 mblk = mblk->b_cont;
119 tcp = mblk->b_rptr;
121 tcpLen = TCP_HDR_LENGTH((tcph_t *)tcp);
122 /* Careful, '>' instead of '>=' here */
123 if (tcp + tcpLen > mblk->b_wptr) {
124 mblk = mblk->b_cont;
127 ol->om = VMXNET3_OM_TSO;
128 ol->hlen = ethLen + ipLen + tcpLen;
129 ol->msscof = mss;
131 if (mblk != mp) {
132 ret = ol->hlen;
134 } else if (flags & HCK_PARTIALCKSUM) {
135 ol->om = VMXNET3_OM_CSUM;
136 ol->hlen = start + ethLen;
137 ol->msscof = stuff + ethLen;
141 return (ret);
145 * Map a msg into the Tx command ring of a vmxnet3 device.
147 * Returns:
148 * VMXNET3_TX_OK if everything went well.
149 * VMXNET3_TX_RINGFULL if the ring is nearly full.
150 * VMXNET3_TX_PULLUP if the msg is overfragmented.
151 * VMXNET3_TX_FAILURE if there was a DMA or offload error.
153 * Side effects:
154 * The ring is filled if VMXNET3_TX_OK is returned.
156 static vmxnet3_txstatus
157 vmxnet3_tx_one(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq,
158 vmxnet3_offload_t *ol, mblk_t *mp)
160 int ret = VMXNET3_TX_OK;
161 unsigned int frags = 0, totLen = 0;
162 vmxnet3_cmdring_t *cmdRing = &txq->cmdRing;
163 Vmxnet3_TxQueueCtrl *txqCtrl = txq->sharedCtrl;
164 Vmxnet3_GenericDesc *txDesc;
165 uint16_t sopIdx, eopIdx;
166 uint8_t sopGen, curGen;
167 mblk_t *mblk;
169 mutex_enter(&dp->txLock);
171 sopIdx = eopIdx = cmdRing->next2fill;
172 sopGen = cmdRing->gen;
173 curGen = !cmdRing->gen;
175 for (mblk = mp; mblk != NULL; mblk = mblk->b_cont) {
176 unsigned int len = MBLKL(mblk);
177 ddi_dma_cookie_t cookie;
178 uint_t cookieCount;
180 if (len) {
181 totLen += len;
182 } else {
183 continue;
186 if (ddi_dma_addr_bind_handle(dp->txDmaHandle, NULL,
187 (caddr_t)mblk->b_rptr, len,
188 DDI_DMA_RDWR | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, NULL,
189 &cookie, &cookieCount) != DDI_DMA_MAPPED) {
190 VMXNET3_WARN(dp, "ddi_dma_addr_bind_handle() failed\n");
191 ret = VMXNET3_TX_FAILURE;
192 goto error;
195 ASSERT(cookieCount);
197 do {
198 uint64_t addr = cookie.dmac_laddress;
199 size_t len = cookie.dmac_size;
201 do {
202 uint32_t dw2, dw3;
203 size_t chunkLen;
205 ASSERT(!txq->metaRing[eopIdx].mp);
206 ASSERT(cmdRing->avail - frags);
208 if (frags >= cmdRing->size - 1 ||
209 (ol->om != VMXNET3_OM_TSO &&
210 frags >= VMXNET3_MAX_TXD_PER_PKT)) {
211 VMXNET3_DEBUG(dp, 2,
212 "overfragmented mp (%u)\n", frags);
213 (void) ddi_dma_unbind_handle(
214 dp->txDmaHandle);
215 ret = VMXNET3_TX_PULLUP;
216 goto error;
218 if (cmdRing->avail - frags <= 1) {
219 dp->txMustResched = B_TRUE;
220 (void) ddi_dma_unbind_handle(
221 dp->txDmaHandle);
222 ret = VMXNET3_TX_RINGFULL;
223 goto error;
226 if (len > VMXNET3_MAX_TX_BUF_SIZE) {
227 chunkLen = VMXNET3_MAX_TX_BUF_SIZE;
228 } else {
229 chunkLen = len;
232 frags++;
233 eopIdx = cmdRing->next2fill;
235 txDesc = VMXNET3_GET_DESC(cmdRing, eopIdx);
236 ASSERT(txDesc->txd.gen != cmdRing->gen);
238 /* txd.addr */
239 txDesc->txd.addr = addr;
240 /* txd.dw2 */
241 dw2 = chunkLen == VMXNET3_MAX_TX_BUF_SIZE ?
242 0 : chunkLen;
243 dw2 |= curGen << VMXNET3_TXD_GEN_SHIFT;
244 txDesc->dword[2] = dw2;
245 ASSERT(txDesc->txd.len == len ||
246 txDesc->txd.len == 0);
247 /* txd.dw3 */
248 dw3 = 0;
249 txDesc->dword[3] = dw3;
251 VMXNET3_INC_RING_IDX(cmdRing,
252 cmdRing->next2fill);
253 curGen = cmdRing->gen;
255 addr += chunkLen;
256 len -= chunkLen;
257 } while (len);
259 if (--cookieCount) {
260 ddi_dma_nextcookie(dp->txDmaHandle, &cookie);
262 } while (cookieCount);
264 (void) ddi_dma_unbind_handle(dp->txDmaHandle);
267 /* Update the EOP descriptor */
268 txDesc = VMXNET3_GET_DESC(cmdRing, eopIdx);
269 txDesc->dword[3] |= VMXNET3_TXD_CQ | VMXNET3_TXD_EOP;
271 /* Update the SOP descriptor. Must be done last */
272 txDesc = VMXNET3_GET_DESC(cmdRing, sopIdx);
273 if (ol->om == VMXNET3_OM_TSO && txDesc->txd.len != 0 &&
274 txDesc->txd.len < ol->hlen) {
275 ret = VMXNET3_TX_FAILURE;
276 goto error;
278 txDesc->txd.om = ol->om;
279 txDesc->txd.hlen = ol->hlen;
280 txDesc->txd.msscof = ol->msscof;
281 membar_producer();
282 txDesc->txd.gen = sopGen;
284 /* Update the meta ring & metadata */
285 txq->metaRing[sopIdx].mp = mp;
286 txq->metaRing[eopIdx].sopIdx = sopIdx;
287 txq->metaRing[eopIdx].frags = frags;
288 cmdRing->avail -= frags;
289 if (ol->om == VMXNET3_OM_TSO) {
290 txqCtrl->txNumDeferred +=
291 (totLen - ol->hlen + ol->msscof - 1) / ol->msscof;
292 } else {
293 txqCtrl->txNumDeferred++;
296 VMXNET3_DEBUG(dp, 3, "tx 0x%p on [%u;%u]\n", mp, sopIdx, eopIdx);
298 goto done;
300 error:
301 /* Reverse the generation bits */
302 while (sopIdx != cmdRing->next2fill) {
303 VMXNET3_DEC_RING_IDX(cmdRing, cmdRing->next2fill);
304 txDesc = VMXNET3_GET_DESC(cmdRing, cmdRing->next2fill);
305 txDesc->txd.gen = !cmdRing->gen;
308 done:
309 mutex_exit(&dp->txLock);
311 return (ret);
315 * Send packets on a vmxnet3 device.
317 * Returns:
318 * NULL in case of success or failure.
319 * The mps to be retransmitted later if the ring is full.
321 mblk_t *
322 vmxnet3_tx(void *data, mblk_t *mps)
324 vmxnet3_softc_t *dp = data;
325 vmxnet3_txqueue_t *txq = &dp->txQueue;
326 vmxnet3_cmdring_t *cmdRing = &txq->cmdRing;
327 Vmxnet3_TxQueueCtrl *txqCtrl = txq->sharedCtrl;
328 vmxnet3_txstatus status = VMXNET3_TX_OK;
329 mblk_t *mp;
331 ASSERT(mps != NULL);
333 do {
334 vmxnet3_offload_t ol;
335 int pullup;
337 mp = mps;
338 mps = mp->b_next;
339 mp->b_next = NULL;
341 if (DB_TYPE(mp) != M_DATA) {
343 * PR #315560: M_PROTO mblks could be passed for
344 * some reason. Drop them because we don't understand
345 * them and because their contents are not Ethernet
346 * frames anyway.
348 ASSERT(B_FALSE);
349 freemsg(mp);
350 continue;
354 * Prepare the offload while we're still handling the original
355 * message -- msgpullup() discards the metadata afterwards.
357 pullup = vmxnet3_tx_prepare_offload(dp, &ol, mp);
358 if (pullup) {
359 mblk_t *new_mp = msgpullup(mp, pullup);
360 atomic_inc_32(&dp->tx_pullup_needed);
361 freemsg(mp);
362 if (new_mp) {
363 mp = new_mp;
364 } else {
365 atomic_inc_32(&dp->tx_pullup_failed);
366 continue;
371 * Try to map the message in the Tx ring.
372 * This call might fail for non-fatal reasons.
374 status = vmxnet3_tx_one(dp, txq, &ol, mp);
375 if (status == VMXNET3_TX_PULLUP) {
377 * Try one more time after flattening
378 * the message with msgpullup().
380 if (mp->b_cont != NULL) {
381 mblk_t *new_mp = msgpullup(mp, -1);
382 atomic_inc_32(&dp->tx_pullup_needed);
383 freemsg(mp);
384 if (new_mp) {
385 mp = new_mp;
386 status = vmxnet3_tx_one(dp, txq, &ol,
387 mp);
388 } else {
389 atomic_inc_32(&dp->tx_pullup_failed);
390 continue;
394 if (status != VMXNET3_TX_OK && status != VMXNET3_TX_RINGFULL) {
395 /* Fatal failure, drop it */
396 atomic_inc_32(&dp->tx_error);
397 freemsg(mp);
399 } while (mps && status != VMXNET3_TX_RINGFULL);
401 if (status == VMXNET3_TX_RINGFULL) {
402 atomic_inc_32(&dp->tx_ring_full);
403 mp->b_next = mps;
404 mps = mp;
405 } else {
406 ASSERT(!mps);
409 /* Notify the device */
410 mutex_enter(&dp->txLock);
411 if (txqCtrl->txNumDeferred >= txqCtrl->txThreshold) {
412 txqCtrl->txNumDeferred = 0;
413 VMXNET3_BAR0_PUT32(dp, VMXNET3_REG_TXPROD, cmdRing->next2fill);
415 mutex_exit(&dp->txLock);
417 return (mps);
421 * Parse a transmit queue and complete packets.
423 * Returns:
424 * B_TRUE if Tx must be updated or B_FALSE if no action is required.
426 boolean_t
427 vmxnet3_tx_complete(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq)
429 vmxnet3_cmdring_t *cmdRing = &txq->cmdRing;
430 vmxnet3_compring_t *compRing = &txq->compRing;
431 Vmxnet3_GenericDesc *compDesc;
432 boolean_t completedTx = B_FALSE;
433 boolean_t ret = B_FALSE;
435 mutex_enter(&dp->txLock);
437 compDesc = VMXNET3_GET_DESC(compRing, compRing->next2comp);
438 while (compDesc->tcd.gen == compRing->gen) {
439 vmxnet3_metatx_t *sopMetaDesc, *eopMetaDesc;
440 uint16_t sopIdx, eopIdx;
441 mblk_t *mp;
443 eopIdx = compDesc->tcd.txdIdx;
444 eopMetaDesc = &txq->metaRing[eopIdx];
445 sopIdx = eopMetaDesc->sopIdx;
446 sopMetaDesc = &txq->metaRing[sopIdx];
448 ASSERT(eopMetaDesc->frags);
449 cmdRing->avail += eopMetaDesc->frags;
451 ASSERT(sopMetaDesc->mp);
452 mp = sopMetaDesc->mp;
453 freemsg(mp);
455 eopMetaDesc->sopIdx = 0;
456 eopMetaDesc->frags = 0;
457 sopMetaDesc->mp = NULL;
459 completedTx = B_TRUE;
461 VMXNET3_DEBUG(dp, 3, "cp 0x%p on [%u;%u]\n", mp, sopIdx,
462 eopIdx);
464 VMXNET3_INC_RING_IDX(compRing, compRing->next2comp);
465 compDesc = VMXNET3_GET_DESC(compRing, compRing->next2comp);
468 if (dp->txMustResched && completedTx) {
469 dp->txMustResched = B_FALSE;
470 ret = B_TRUE;
473 mutex_exit(&dp->txLock);
475 return (ret);