4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
28 #define U32TOPTR(x) ((void *)(uintptr_t)(uint32_t)(x))
29 #define PTRTOU32(x) ((uint32_t)(uintptr_t)(void *)(x))
32 * ========== RX side routines ==========
35 #define RGE_DBG RGE_DBG_RECV /* debug flag for this code */
37 static uint32_t rge_atomic_reserve(uint32_t *count_p
, uint32_t n
);
38 #pragma inline(rge_atomic_reserve)
41 rge_atomic_reserve(uint32_t *count_p
, uint32_t n
)
51 return (0); /* no resources left */
52 } while (atomic_cas_32(count_p
, oldval
, newval
) != oldval
);
58 * Atomically increment a counter
60 static void rge_atomic_renounce(uint32_t *count_p
, uint32_t n
);
61 #pragma inline(rge_atomic_renounce)
64 rge_atomic_renounce(uint32_t *count_p
, uint32_t n
)
73 } while (atomic_cas_32(count_p
, oldval
, newval
) != oldval
);
77 * Callback code invoked from STREAMs when the recv data buffer is free
81 rge_rx_recycle(caddr_t arg
)
88 rx_buf
= (dma_buf_t
*)arg
;
89 rgep
= (rge_t
*)rx_buf
->private;
92 * In rge_unattach() and rge_attach(), this callback function will
93 * also be called to free mp in rge_fini_rings() and rge_init_rings().
94 * In such situation, we shouldn't do below desballoc(), otherwise,
95 * there'll be memory leak.
97 if (rgep
->rge_mac_state
== RGE_MAC_UNATTACH
||
98 rgep
->rge_mac_state
== RGE_MAC_ATTACH
)
102 * Recycle the data buffer again
103 * and fill them in free ring
105 rx_buf
->mp
= desballoc(DMA_VPTR(rx_buf
->pbuf
),
106 rgep
->rxbuf_size
, 0, &rx_buf
->rx_recycle
);
107 if (rx_buf
->mp
== NULL
) {
108 rge_problem(rgep
, "rge_rx_recycle: desballoc() failed");
111 mutex_enter(rgep
->rc_lock
);
112 slot_recy
= rgep
->rc_next
;
113 free_srbdp
= &rgep
->free_srbds
[slot_recy
];
115 ASSERT(free_srbdp
->rx_buf
== NULL
);
116 free_srbdp
->rx_buf
= rx_buf
;
117 rgep
->rc_next
= NEXT(slot_recy
, RGE_BUF_SLOTS
);
118 rge_atomic_renounce(&rgep
->rx_free
, 1);
119 if (rgep
->rx_bcopy
&& rgep
->rx_free
== RGE_BUF_SLOTS
)
120 rgep
->rx_bcopy
= B_FALSE
;
121 ASSERT(rgep
->rx_free
<= RGE_BUF_SLOTS
);
123 mutex_exit(rgep
->rc_lock
);
126 static int rge_rx_refill(rge_t
*rgep
, uint32_t slot
);
127 #pragma inline(rge_rx_refill)
130 rge_rx_refill(rge_t
*rgep
, uint32_t slot
)
137 srbdp
= &rgep
->sw_rbds
[slot
];
138 hw_rbd_p
= &rgep
->rx_ring
[slot
];
139 free_slot
= rgep
->rf_next
;
140 free_buf
= rgep
->free_srbds
[free_slot
].rx_buf
;
141 if (free_buf
!= NULL
) {
142 srbdp
->rx_buf
= free_buf
;
143 rgep
->free_srbds
[free_slot
].rx_buf
= NULL
;
144 hw_rbd_p
->host_buf_addr
= RGE_BSWAP_32(rgep
->head_room
+
145 + free_buf
->pbuf
.cookie
.dmac_laddress
);
146 hw_rbd_p
->host_buf_addr_hi
=
147 RGE_BSWAP_32(free_buf
->pbuf
.cookie
.dmac_laddress
>> 32);
148 rgep
->rf_next
= NEXT(free_slot
, RGE_BUF_SLOTS
);
152 * This situation shouldn't happen
154 rge_problem(rgep
, "rge_rx_refill: free buffer %d is NULL",
156 rgep
->rx_bcopy
= B_TRUE
;
161 static mblk_t
*rge_receive_packet(rge_t
*rgep
, uint32_t slot
);
162 #pragma inline(rge_receive_packet)
165 rge_receive_packet(rge_t
*rgep
, uint32_t slot
)
178 struct ether_vlan_header
*ehp
;
181 hw_rbd_p
= &rgep
->rx_ring
[slot
];
182 srbdp
= &rgep
->sw_rbds
[slot
];
185 * Read receive status
187 rx_status
= RGE_BSWAP_32(hw_rbd_p
->flags_len
) & RBD_FLAGS_MASK
;
190 * Handle error packet
192 if (!(rx_status
& BD_FLAG_PKT_END
)) {
193 RGE_DEBUG(("rge_receive_packet: not a complete packat"));
196 if (rx_status
& RBD_FLAG_ERROR
) {
197 if (rx_status
& RBD_FLAG_CRC_ERR
)
198 rgep
->stats
.crc_err
++;
199 if (rx_status
& RBD_FLAG_RUNT
)
200 rgep
->stats
.in_short
++;
202 * Set chip_error flag to reset chip:
203 * (suggested in Realtek programming guide.)
205 RGE_DEBUG(("rge_receive_packet: error packet, status = %x",
207 mutex_enter(rgep
->genlock
);
208 rgep
->rge_chip_state
= RGE_CHIP_ERROR
;
209 mutex_exit(rgep
->genlock
);
214 * Handle size error packet
216 packet_len
= RGE_BSWAP_32(hw_rbd_p
->flags_len
) & RBD_LEN_MASK
;
217 packet_len
-= ETHERFCSL
;
219 pflags
= RGE_BSWAP_32(hw_rbd_p
->vlan_tag
);
220 if (pflags
& RBD_VLAN_PKT
)
221 minsize
-= VLAN_TAGSZ
;
222 maxsize
= rgep
->ethmax_size
;
223 if (packet_len
< minsize
|| packet_len
> maxsize
) {
224 RGE_DEBUG(("rge_receive_packet: len err = %d", packet_len
));
228 DMA_SYNC(srbdp
->rx_buf
->pbuf
, DDI_DMA_SYNC_FORKERNEL
);
229 if (rgep
->rx_bcopy
|| packet_len
<= RGE_RECV_COPY_SIZE
||
230 !rge_atomic_reserve(&rgep
->rx_free
, 1)) {
232 * Allocate buffer to receive this good packet
234 mp
= allocb(packet_len
+ RGE_HEADROOM
, 0);
236 RGE_DEBUG(("rge_receive_packet: allocate buffer fail"));
237 rgep
->stats
.no_rcvbuf
++;
242 * Copy the data found into the new cluster
244 rx_ptr
= DMA_VPTR(srbdp
->rx_buf
->pbuf
);
245 mp
->b_rptr
= dp
= mp
->b_rptr
+ RGE_HEADROOM
;
246 bcopy(rx_ptr
+ rgep
->head_room
, dp
, packet_len
);
247 mp
->b_wptr
= dp
+ packet_len
;
249 mp
= srbdp
->rx_buf
->mp
;
250 mp
->b_rptr
+= rgep
->head_room
;
251 mp
->b_wptr
= mp
->b_rptr
+ packet_len
;
252 mp
->b_next
= mp
->b_cont
= NULL
;
254 * Refill the current receive bd buffer
255 * if fails, will just keep the mp.
257 if (!rge_rx_refill(rgep
, slot
))
260 rgep
->stats
.rbytes
+= packet_len
;
261 rgep
->stats
.rpackets
++;
266 if (pflags
& RBD_VLAN_PKT
)
267 vtag
= pflags
& RBD_VLAN_TAG
;
269 vtag
= TCI_CHIP2OS(vtag
);
271 * As h/w strips the VLAN tag from incoming packet, we need
272 * insert VLAN tag into this packet before send up here.
274 (void) memmove(mp
->b_rptr
- VLAN_TAGSZ
, mp
->b_rptr
,
276 mp
->b_rptr
-= VLAN_TAGSZ
;
277 ehp
= (struct ether_vlan_header
*)mp
->b_rptr
;
278 ehp
->ether_tpid
= htons(ETHERTYPE_VLAN
);
279 ehp
->ether_tci
= htons(vtag
);
280 rgep
->stats
.rbytes
+= VLAN_TAGSZ
;
284 * Check h/w checksum offload status
287 proto
= rx_status
& RBD_FLAG_PROTOCOL
;
288 if ((proto
== RBD_FLAG_TCP
&& !(rx_status
& RBD_TCP_CKSUM_ERR
)) ||
289 (proto
== RBD_FLAG_UDP
&& !(rx_status
& RBD_UDP_CKSUM_ERR
)))
290 pflags
|= HCK_FULLCKSUM_OK
;
291 if (proto
!= RBD_FLAG_NONE_IP
&& !(rx_status
& RBD_IP_CKSUM_ERR
))
292 pflags
|= HCK_IPV4_HDRCKSUM_OK
;
294 mac_hcksum_set(mp
, 0, 0, 0, 0, pflags
);
301 * Accept the packets received in rx ring.
303 * Returns a chain of mblks containing the received data, to be
304 * passed up to mac_rx().
305 * The routine returns only when a complete scan has been performed
306 * without finding any packets to receive.
307 * This function must SET the OWN bit of BD to indicate the packets
308 * it has accepted from the ring.
310 static mblk_t
*rge_receive_ring(rge_t
*rgep
);
311 #pragma inline(rge_receive_ring)
314 rge_receive_ring(rge_t
*rgep
)
322 ASSERT(mutex_owned(rgep
->rx_lock
));
325 * Sync (all) the receive ring descriptors
326 * before accepting the packets they describe
328 DMA_SYNC(rgep
->rx_desc
, DDI_DMA_SYNC_FORKERNEL
);
329 slot
= rgep
->rx_next
;
330 hw_rbd_p
= &rgep
->rx_ring
[slot
];
334 while (!(hw_rbd_p
->flags_len
& RGE_BSWAP_32(BD_FLAG_HW_OWN
))) {
335 if ((mp
= rge_receive_packet(rgep
, slot
)) != NULL
) {
343 hw_rbd_p
->flags_len
=
344 RGE_BSWAP_32(rgep
->rxbuf_size
- rgep
->head_room
);
345 HW_RBD_INIT(hw_rbd_p
, slot
);
346 slot
= NEXT(slot
, RGE_RECV_SLOTS
);
347 hw_rbd_p
= &rgep
->rx_ring
[slot
];
350 rgep
->rx_next
= slot
;
355 * Receive all ready packets.
357 void rge_receive(rge_t
*rgep
);
358 #pragma no_inline(rge_receive)
361 rge_receive(rge_t
*rgep
)
365 mutex_enter(rgep
->rx_lock
);
366 mp
= rge_receive_ring(rgep
);
367 mutex_exit(rgep
->rx_lock
);
370 mac_rx(rgep
->mh
, NULL
, mp
);
375 #define RGE_DBG RGE_DBG_SEND /* debug flag for this code */
379 * ========== Send-side recycle routines ==========
381 static uint32_t rge_send_claim(rge_t
*rgep
);
382 #pragma inline(rge_send_claim)
385 rge_send_claim(rge_t
*rgep
)
390 mutex_enter(rgep
->tx_lock
);
391 slot
= rgep
->tx_next
;
392 next
= NEXT(slot
, RGE_SEND_SLOTS
);
393 rgep
->tx_next
= next
;
395 mutex_exit(rgep
->tx_lock
);
398 * We check that our invariants still hold:
399 * + the slot and next indexes are in range
400 * + the slot must not be the last one (i.e. the *next*
401 * index must not match the next-recycle index), 'cos
402 * there must always be at least one free slot in a ring
404 ASSERT(slot
< RGE_SEND_SLOTS
);
405 ASSERT(next
< RGE_SEND_SLOTS
);
406 ASSERT(next
!= rgep
->tc_next
);
412 * We don't want to call this function every time after a successful
413 * h/w transmit done in ISR. Instead, we call this function in the
414 * rge_send() when there're few or no free tx BDs remained.
416 void rge_send_recycle(rge_t
*rgep
);
417 #pragma inline(rge_send_recycle)
420 rge_send_recycle(rge_t
*rgep
)
427 mutex_enter(rgep
->tc_lock
);
428 tc_head
= rgep
->tc_next
;
429 tc_tail
= rgep
->tc_tail
;
430 if (tc_head
== tc_tail
)
434 tc_tail
= LAST(tc_tail
, RGE_SEND_SLOTS
);
435 hw_sbd_p
= &rgep
->tx_ring
[tc_tail
];
436 if (tc_tail
== tc_head
) {
437 if (hw_sbd_p
->flags_len
&
438 RGE_BSWAP_32(BD_FLAG_HW_OWN
)) {
440 * Recyled nothing: bump the watchdog counter,
441 * thus guaranteeing that it's nonzero
442 * (watchdog activated).
444 if (rgep
->watchdog
== 0)
446 mutex_exit(rgep
->tc_lock
);
451 } while (hw_sbd_p
->flags_len
& RGE_BSWAP_32(BD_FLAG_HW_OWN
));
454 * Recyled something :-)
456 rgep
->tc_next
= NEXT(tc_tail
, RGE_SEND_SLOTS
);
457 n
= rgep
->tc_next
- tc_head
;
458 if (rgep
->tc_next
< tc_head
)
460 rge_atomic_renounce(&rgep
->tx_free
, n
);
462 ASSERT(rgep
->tx_free
<= RGE_SEND_SLOTS
);
465 mutex_exit(rgep
->tc_lock
);
466 if (rgep
->resched_needed
&&
467 rgep
->rge_mac_state
== RGE_MAC_STARTED
) {
468 rgep
->resched_needed
= B_FALSE
;
469 mac_tx_update(rgep
->mh
);
474 * Send a message by copying it into a preallocated (and premapped) buffer
476 static void rge_send_copy(rge_t
*rgep
, mblk_t
*mp
, uint16_t tci
);
477 #pragma inline(rge_send_copy)
480 rge_send_copy(rge_t
*rgep
, mblk_t
*mp
, uint16_t tci
)
490 struct ether_header
*ethhdr
;
495 * Up to the point where it claims a place, a send_msg()
496 * routine can indicate failure by returning B_FALSE. Once it's
497 * claimed a place, it mustn't fail.
499 * In this version, there's no setup to be done here, and there's
500 * nothing that can fail, so we can go straight to claiming our
501 * already-reserved place on the train.
503 * This is the point of no return!
505 slot
= rge_send_claim(rgep
);
506 ssbdp
= &rgep
->sw_sbds
[slot
];
509 * Copy the data into a pre-mapped buffer, which avoids the
510 * overhead (and complication) of mapping/unmapping STREAMS
511 * buffers and keeping hold of them until the DMA has completed.
513 * Because all buffers are the same size, and larger than the
514 * longest single valid message, we don't have to bother about
515 * splitting the message across multiple buffers either.
517 txb
= DMA_VPTR(ssbdp
->pbuf
);
522 * Do not copy the vlan tag
524 bcopy(bp
->b_rptr
, txb
, 2 * ETHERADDRL
);
525 txb
+= 2 * ETHERADDRL
;
526 totlen
+= 2 * ETHERADDRL
;
528 ASSERT(mblen
>= 2 * ETHERADDRL
+ VLAN_TAGSZ
);
529 mblen
-= 2 * ETHERADDRL
+ VLAN_TAGSZ
;
530 if ((totlen
+= mblen
) <= rgep
->ethmax_size
) {
531 bcopy(bp
->b_rptr
+ 2 * ETHERADDRL
+ VLAN_TAGSZ
,
536 rgep
->stats
.obytes
+= VLAN_TAGSZ
;
538 for (; bp
!= NULL
; bp
= bp
->b_cont
) {
540 if ((totlen
+= mblen
) <= rgep
->ethmax_size
) {
541 bcopy(bp
->b_rptr
, txb
, mblen
);
545 rgep
->stats
.obytes
+= totlen
;
546 rgep
->stats
.tx_pre_ismax
= rgep
->stats
.tx_cur_ismax
;
547 if (totlen
== rgep
->ethmax_size
)
548 rgep
->stats
.tx_cur_ismax
= B_TRUE
;
550 rgep
->stats
.tx_cur_ismax
= B_FALSE
;
553 * We'e reached the end of the chain; and we should have
554 * collected no more than ETHERMAX bytes into our buffer.
557 ASSERT(totlen
<= rgep
->ethmax_size
);
558 DMA_SYNC(ssbdp
->pbuf
, DDI_DMA_SYNC_FORDEV
);
561 * Update the hardware send buffer descriptor flags
563 hw_sbd_p
= &rgep
->tx_ring
[slot
];
564 ASSERT(hw_sbd_p
== ssbdp
->desc
.mem_va
);
565 hw_sbd_p
->flags_len
= RGE_BSWAP_32(totlen
& SBD_LEN_MASK
);
567 tci
= TCI_OS2CHIP(tci
);
568 hw_sbd_p
->vlan_tag
= RGE_BSWAP_32(tci
);
569 hw_sbd_p
->vlan_tag
|= RGE_BSWAP_32(SBD_VLAN_PKT
);
571 hw_sbd_p
->vlan_tag
= 0;
575 * h/w checksum offload flags
577 mac_hcksum_get(mp
, NULL
, NULL
, NULL
, NULL
, &pflags
);
578 if (pflags
& HCK_FULLCKSUM
) {
579 ASSERT(totlen
>= sizeof (struct ether_header
) +
581 ethhdr
= (struct ether_header
*)(DMA_VPTR(ssbdp
->pbuf
));
583 * Is the packet an IP(v4) packet?
585 if (ntohs(ethhdr
->ether_type
) == ETHERTYPE_IP
) {
586 ip_hdr
= (struct ip
*)
587 ((uint8_t *)DMA_VPTR(ssbdp
->pbuf
) +
588 sizeof (struct ether_header
));
589 if (ip_hdr
->ip_p
== IPPROTO_TCP
)
590 hw_sbd_p
->flags_len
|=
591 RGE_BSWAP_32(SBD_FLAG_TCP_CKSUM
);
592 else if (ip_hdr
->ip_p
== IPPROTO_UDP
)
593 hw_sbd_p
->flags_len
|=
594 RGE_BSWAP_32(SBD_FLAG_UDP_CKSUM
);
597 if (pflags
& HCK_IPV4_HDRCKSUM
)
598 hw_sbd_p
->flags_len
|= RGE_BSWAP_32(SBD_FLAG_IP_CKSUM
);
600 HW_SBD_SET(hw_sbd_p
, slot
);
604 * The message can be freed right away, as we've already
605 * copied the contents ...
611 rge_send(rge_t
*rgep
, mblk_t
*mp
)
613 struct ether_vlan_header
*ehp
;
616 ASSERT(mp
->b_next
== NULL
);
619 * Try to reserve a place in the transmit ring.
621 if (!rge_atomic_reserve(&rgep
->tx_free
, 1)) {
622 RGE_DEBUG(("rge_send: no free slots"));
624 rgep
->resched_needed
= B_TRUE
;
629 * Determine if the packet is VLAN tagged.
631 ASSERT(MBLKL(mp
) >= sizeof (struct ether_header
));
633 ehp
= (struct ether_vlan_header
*)mp
->b_rptr
;
634 if (ehp
->ether_tpid
== htons(ETHERTYPE_VLAN
))
635 tci
= ntohs(ehp
->ether_tci
);
638 * We've reserved a place :-)
639 * These ASSERTions check that our invariants still hold:
640 * there must still be at least one free place
641 * there must be at least one place NOT free (ours!)
643 ASSERT(rgep
->tx_free
< RGE_SEND_SLOTS
);
644 rge_send_copy(rgep
, mp
, tci
);
647 * Trigger chip h/w transmit ...
649 mutex_enter(rgep
->tx_lock
);
650 if (--rgep
->tx_flow
== 0) {
651 DMA_SYNC(rgep
->tx_desc
, DDI_DMA_SYNC_FORDEV
);
652 rgep
->tc_tail
= rgep
->tx_next
;
654 rgep
->stats
.opackets
++;
655 mutex_exit(rgep
->tx_lock
);
661 rge_reschedule(caddr_t arg1
, caddr_t arg2
)
665 rgep
= (rge_t
*)arg1
;
666 _NOTE(ARGUNUSED(arg2
))
668 rge_send_recycle(rgep
);
670 if (rgep
->chipid
.is_pcie
&& rgep
->tx_free
!= RGE_SEND_SLOTS
) {
672 * It's observed that in current Realtek PCI-E chips, tx
673 * request of the second fragment for upper layer packets
674 * will be ignored if the hardware transmission is in
675 * progress and will not be processed when the tx engine
676 * is idle. So one solution is to re-issue the requests
677 * if there are untransmitted packets after tx interrupts
680 rge_tx_trigger(rgep
);
683 return (DDI_INTR_CLAIMED
);
687 * rge_m_tx() - send a chain of packets
690 rge_m_tx(void *arg
, mblk_t
*mp
)
692 rge_t
*rgep
= arg
; /* private device info */
698 rw_enter(rgep
->errlock
, RW_READER
);
699 if ((rgep
->rge_mac_state
!= RGE_MAC_STARTED
) ||
700 (rgep
->rge_chip_state
!= RGE_CHIP_RUNNING
) ||
701 (rgep
->param_link_up
!= LINK_STATE_UP
)) {
702 rw_exit(rgep
->errlock
);
703 RGE_DEBUG(("rge_m_tx: tx doesn't work"));
712 if (!rge_send(rgep
, mp
)) {
720 rge_tx_trigger(rgep
);
722 rw_exit(rgep
->errlock
);