1 /******************************************************************************
3 Copyright (c) 2007, Myricom Inc.
4 Copyright (c) 2008, Intel Corporation.
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
10 1. Redistributions of source code must retain the above copyright notice,
11 this list of conditions and the following disclaimer.
13 2. Neither the name of the Myricom Inc, nor the names of its
14 contributors may be used to endorse or promote products derived from
15 this software without specific prior written permission.
17 3. Neither the name of the Intel Corporation, nor the names of its
18 contributors may be used to endorse or promote products derived from
19 this software without specific prior written permission.
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 POSSIBILITY OF SUCH DAMAGE.
34 ***************************************************************************/
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/endian.h>
40 #include <sys/kernel.h>
41 #include <sys/socket.h>
44 #include <net/ethernet.h>
45 #include <net/if_media.h>
47 #include <netinet/in_systm.h>
48 #include <netinet/in.h>
49 #include <netinet/ip.h>
50 #include <netinet/tcp.h>
51 #include <netinet/tcp_lro.h>
53 #include <machine/bus.h>
54 #include <machine/in_cksum.h>
57 static uint16_t do_csum_data(uint16_t *raw
, int len
)
68 csum
= (csum
>> 16) + (csum
& 0xffff);
69 csum
= (csum
>> 16) + (csum
& 0xffff);
70 return (uint16_t)csum
;
74 * Allocate and init the LRO data structures
77 tcp_lro_init(struct lro_ctrl
*cntl
)
79 struct lro_entry
*lro
;
82 SLIST_INIT(&cntl
->lro_free
);
83 SLIST_INIT(&cntl
->lro_active
);
85 cntl
->lro_bad_csum
= 0;
87 cntl
->lro_flushed
= 0;
89 for (i
= 0; i
< LRO_ENTRIES
; i
++) {
90 lro
= (struct lro_entry
*) malloc(sizeof (struct lro_entry
),
91 M_DEVBUF
, M_NOWAIT
| M_ZERO
);
98 SLIST_INSERT_HEAD(&cntl
->lro_free
, lro
, next
);
105 tcp_lro_free(struct lro_ctrl
*cntl
)
107 struct lro_entry
*entry
;
109 while (!SLIST_EMPTY(&cntl
->lro_free
)) {
110 entry
= SLIST_FIRST(&cntl
->lro_free
);
111 SLIST_REMOVE_HEAD(&cntl
->lro_free
, next
);
112 free(entry
, M_DEVBUF
);
117 tcp_lro_flush(struct lro_ctrl
*cntl
, struct lro_entry
*lro
)
123 uint32_t tcplen
, tcp_csum
;
126 if (lro
->append_cnt
) {
127 /* incorporate the new len into the ip header and
128 * re-calculate the checksum */
130 ip
->ip_len
= htons(lro
->len
- ETHER_HDR_LEN
);
132 ip
->ip_sum
= 0xffff ^
133 do_csum_data((uint16_t*)ip
,
136 lro
->m_head
->m_pkthdr
.csum_flags
= CSUM_IP_CHECKED
|
137 CSUM_IP_VALID
| CSUM_DATA_VALID
| CSUM_PSEUDO_HDR
;
138 lro
->m_head
->m_pkthdr
.csum_data
= 0xffff;
139 lro
->m_head
->m_pkthdr
.len
= lro
->len
;
141 /* incorporate the latest ack into the tcp header */
142 tcp
= (struct tcphdr
*) (ip
+ 1);
143 tcp
->th_ack
= lro
->ack_seq
;
144 tcp
->th_win
= lro
->window
;
145 /* incorporate latest timestamp into the tcp header */
146 if (lro
->timestamp
) {
147 ts_ptr
= (uint32_t *)(tcp
+ 1);
148 ts_ptr
[1] = htonl(lro
->tsval
);
149 ts_ptr
[2] = lro
->tsecr
;
152 * update checksum in tcp header by re-calculating the
153 * tcp pseudoheader checksum, and adding it to the checksum
154 * of the tcp payload data
157 tcplen
= lro
->len
- sizeof(*ip
) - ETHER_HDR_LEN
;
158 tcp_csum
= lro
->data_csum
;
159 tcp_csum
+= in_pseudo(ip
->ip_src
.s_addr
, ip
->ip_dst
.s_addr
,
160 htons(tcplen
+ IPPROTO_TCP
));
161 tcp_csum
+= do_csum_data((uint16_t*)tcp
,
163 tcp_csum
= (tcp_csum
& 0xffff) + (tcp_csum
>> 16);
164 tcp_csum
= (tcp_csum
& 0xffff) + (tcp_csum
>> 16);
165 tcp
->th_sum
= 0xffff ^ tcp_csum
;
168 (*ifp
->if_input
)(cntl
->ifp
, lro
->m_head
);
169 cntl
->lro_queued
+= lro
->append_cnt
+ 1;
174 SLIST_INSERT_HEAD(&cntl
->lro_free
, lro
, next
);
178 tcp_lro_rx(struct lro_ctrl
*cntl
, struct mbuf
*m_head
, uint32_t csum
)
180 struct ether_header
*eh
;
184 struct mbuf
*m_nxt
, *m_tail
;
185 struct lro_entry
*lro
;
186 int hlen
, ip_len
, tcp_hdr_len
, tcp_data_len
, tot_len
;
187 int opt_bytes
, trim
, csum_flags
;
188 uint32_t seq
, tmp_csum
, device_mtu
;
191 eh
= mtod(m_head
, struct ether_header
*);
192 if (eh
->ether_type
!= htons(ETHERTYPE_IP
))
194 ip
= (struct ip
*) (eh
+ 1);
195 if (ip
->ip_p
!= IPPROTO_TCP
)
198 /* ensure there are no options */
199 if ((ip
->ip_hl
<< 2) != sizeof (*ip
))
202 /* .. and the packet is not fragmented */
203 if (ip
->ip_off
& htons(IP_MF
|IP_OFFMASK
))
206 /* verify that the IP header checksum is correct */
207 csum_flags
= m_head
->m_pkthdr
.csum_flags
;
208 if (csum_flags
& CSUM_IP_CHECKED
) {
209 if (__predict_false((csum_flags
& CSUM_IP_VALID
) == 0)) {
210 cntl
->lro_bad_csum
++;
214 tmp_csum
= do_csum_data((uint16_t *)ip
, sizeof (*ip
));
215 if (__predict_false((tmp_csum
^ 0xffff) != 0)) {
216 cntl
->lro_bad_csum
++;
221 /* find the TCP header */
222 tcp
= (struct tcphdr
*) (ip
+ 1);
224 /* Get the TCP checksum if we dont have it */
228 /* ensure no bits set besides ack or psh */
229 if ((tcp
->th_flags
& ~(TH_ACK
| TH_PUSH
)) != 0)
232 /* check for timestamps. Since the only option we handle are
233 timestamps, we only have to handle the simple case of
234 aligned timestamps */
236 opt_bytes
= (tcp
->th_off
<< 2) - sizeof (*tcp
);
237 tcp_hdr_len
= sizeof (*tcp
) + opt_bytes
;
238 ts_ptr
= (uint32_t *)(tcp
+ 1);
239 if (opt_bytes
!= 0) {
240 if (__predict_false(opt_bytes
!= TCPOLEN_TSTAMP_APPA
) ||
241 (*ts_ptr
!= ntohl(TCPOPT_NOP
<<24|TCPOPT_NOP
<<16|
242 TCPOPT_TIMESTAMP
<<8|TCPOLEN_TIMESTAMP
)))
246 ip_len
= ntohs(ip
->ip_len
);
247 tcp_data_len
= ip_len
- (tcp
->th_off
<< 2) - sizeof (*ip
);
251 * If frame is padded beyond the end of the IP packet,
252 * then we must trim the extra bytes off the end.
254 tot_len
= m_head
->m_pkthdr
.len
;
255 trim
= tot_len
- (ip_len
+ ETHER_HDR_LEN
);
258 /* truncated packet */
261 m_adj(m_head
, -trim
);
262 tot_len
= m_head
->m_pkthdr
.len
;
266 m_tail
= NULL
; /* -Wuninitialized */
267 while (m_nxt
!= NULL
) {
269 m_nxt
= m_tail
->m_next
;
272 hlen
= ip_len
+ ETHER_HDR_LEN
- tcp_data_len
;
273 seq
= ntohl(tcp
->th_seq
);
275 SLIST_FOREACH(lro
, &cntl
->lro_active
, next
) {
276 if (lro
->source_port
== tcp
->th_sport
&&
277 lro
->dest_port
== tcp
->th_dport
&&
278 lro
->source_ip
== ip
->ip_src
.s_addr
&&
279 lro
->dest_ip
== ip
->ip_dst
.s_addr
) {
280 /* Try to append it */
282 if (__predict_false(seq
!= lro
->next_seq
)) {
283 /* out of order packet */
284 SLIST_REMOVE(&cntl
->lro_active
, lro
,
286 tcp_lro_flush(cntl
, lro
);
291 uint32_t tsval
= ntohl(*(ts_ptr
+ 1));
292 /* make sure timestamp values are increasing */
293 if (__predict_false(lro
->tsval
> tsval
||
294 *(ts_ptr
+ 2) == 0)) {
298 lro
->tsecr
= *(ts_ptr
+ 2);
301 lro
->next_seq
+= tcp_data_len
;
302 lro
->ack_seq
= tcp
->th_ack
;
303 lro
->window
= tcp
->th_win
;
305 if (tcp_data_len
== 0) {
309 /* subtract off the checksum of the tcp header
310 * from the hardware checksum, and add it to the
311 * stored tcp data checksum. Byteswap the checksum
312 * if the total length so far is odd
314 tmp_csum
= do_csum_data((uint16_t*)tcp
,
316 csum
= csum
+ (tmp_csum
^ 0xffff);
317 csum
= (csum
& 0xffff) + (csum
>> 16);
318 csum
= (csum
& 0xffff) + (csum
>> 16);
319 if (lro
->len
& 0x1) {
320 /* Odd number of bytes so far, flip bytes */
321 csum
= ((csum
<< 8) | (csum
>> 8)) & 0xffff;
323 csum
= csum
+ lro
->data_csum
;
324 csum
= (csum
& 0xffff) + (csum
>> 16);
325 csum
= (csum
& 0xffff) + (csum
>> 16);
326 lro
->data_csum
= csum
;
328 lro
->len
+= tcp_data_len
;
330 /* adjust mbuf so that m->m_data points to
331 the first byte of the payload */
333 /* append mbuf chain */
334 lro
->m_tail
->m_next
= m_head
;
335 /* advance the last pointer */
336 lro
->m_tail
= m_tail
;
337 /* flush packet if required */
338 device_mtu
= cntl
->ifp
->if_mtu
;
339 if (lro
->len
> (65535 - device_mtu
)) {
340 SLIST_REMOVE(&cntl
->lro_active
, lro
,
342 tcp_lro_flush(cntl
, lro
);
348 if (SLIST_EMPTY(&cntl
->lro_free
))
351 /* start a new chain */
352 lro
= SLIST_FIRST(&cntl
->lro_free
);
353 SLIST_REMOVE_HEAD(&cntl
->lro_free
, next
);
354 SLIST_INSERT_HEAD(&cntl
->lro_active
, lro
, next
);
355 lro
->source_port
= tcp
->th_sport
;
356 lro
->dest_port
= tcp
->th_dport
;
357 lro
->source_ip
= ip
->ip_src
.s_addr
;
358 lro
->dest_ip
= ip
->ip_dst
.s_addr
;
359 lro
->next_seq
= seq
+ tcp_data_len
;
360 lro
->mss
= tcp_data_len
;
361 lro
->ack_seq
= tcp
->th_ack
;
362 lro
->window
= tcp
->th_win
;
364 /* save the checksum of just the TCP payload by
365 * subtracting off the checksum of the TCP header from
366 * the entire hardware checksum
367 * Since IP header checksum is correct, checksum over
368 * the IP header is -0. Substracting -0 is unnecessary.
370 tmp_csum
= do_csum_data((uint16_t*)tcp
, tcp_hdr_len
);
371 csum
= csum
+ (tmp_csum
^ 0xffff);
372 csum
= (csum
& 0xffff) + (csum
>> 16);
373 csum
= (csum
& 0xffff) + (csum
>> 16);
374 lro
->data_csum
= csum
;
377 /* record timestamp if it is present */
380 lro
->tsval
= ntohl(*(ts_ptr
+ 1));
381 lro
->tsecr
= *(ts_ptr
+ 2);
384 lro
->m_head
= m_head
;
385 lro
->m_tail
= m_tail
;