1 /**************************************************************************
3 Copyright (c) 2007, Chelsio Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Chelsio Corporation nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
30 #include <sys/cdefs.h>
32 __KERNEL_RCSID(0, "$NetBSD: cxgb_lro.c,v 1.5 2007/12/15 00:39:29 perry Exp $");
35 __FBSDID("$FreeBSD: src/sys/dev/cxgb/cxgb_lro.c,v 1.8 2007/08/25 21:07:36 kmacy Exp $");
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/kernel.h>
43 #include <sys/module.h>
47 #include <machine/bus.h>
49 #include <machine/resource.h>
50 #include <sys/bus_dma.h>
53 #include <sys/queue.h>
55 #include <sys/taskqueue.h>
58 #include <netinet/in_systm.h>
59 #include <netinet/in.h>
60 #include <netinet/ip.h>
61 #include <netinet/tcp.h>
66 #include <cxgb_include.h>
68 #include <dev/cxgb/cxgb_include.h>
71 #include <machine/in_cksum.h>
75 #include "cxgb_include.h"
83 #define MBUF_HEADER_CHECK(m) do { \
84 if ((m->m_len == 0) || (m->m_pkthdr.len == 0) \
85 || ((m->m_flags & M_PKTHDR) == 0)) \
86 panic("lro_flush_session - mbuf len=%d pktlen=%d flags=0x%x\n", \
87 m->m_len, m->m_pkthdr.len, m->m_flags); \
88 if ((m->m_flags & M_PKTHDR) == 0) \
89 panic("first mbuf is not packet header - flags=0x%x\n", \
91 if ((m->m_len < ETHER_HDR_LEN) || (m->m_pkthdr.len < ETHER_HDR_LEN)) \
92 panic("packet too small len=%d pktlen=%d\n", \
93 m->m_len, m->m_pkthdr.len);\
96 #define MBUF_HEADER_CHECK(m)
99 #define IPH_OFFSET (2 + sizeof (struct cpl_rx_pkt) + ETHER_HDR_LEN)
100 #define LRO_SESSION_IDX_HINT_HASH(hash) (hash & (MAX_LRO_SES - 1))
101 #define LRO_IDX_INC(idx) idx = (idx + 1) & (MAX_LRO_SES - 1)
104 lro_match(struct mbuf
*m
, struct ip
*ih
, struct tcphdr
*th
)
106 struct ip
*sih
= (struct ip
*)(mtod(m
, uint8_t *) + IPH_OFFSET
);
107 struct tcphdr
*sth
= (struct tcphdr
*) (sih
+ 1);
109 return (th
->th_sport
== sth
->th_sport
&&
110 th
->th_dport
== sth
->th_dport
&&
111 ih
->ip_src
.s_addr
== sih
->ip_src
.s_addr
&&
112 ih
->ip_dst
.s_addr
== sih
->ip_dst
.s_addr
);
115 static __inline
struct t3_lro_session
*
116 lro_lookup(struct lro_state
*l
, int idx
, struct ip
*ih
, struct tcphdr
*th
)
118 struct t3_lro_session
*s
= NULL
;
119 int active
= l
->nactive
;
124 if (lro_match(s
->head
, ih
, th
))
135 can_lro_packet(struct cpl_rx_pkt
*cpl
, unsigned int rss_hi
)
137 struct ether_header
*eh
= (struct ether_header
*)(cpl
+ 1);
138 struct ip
*ih
= (struct ip
*)(eh
+ 1);
143 if (__predict_false(G_HASHTYPE(ntohl(rss_hi
)) != RSS_HASH_4_TUPLE
||
144 (*((uint8_t *)cpl
+ 1) & 0x90) != 0x10 ||
145 cpl
->csum
!= 0xffff || eh
->ether_type
!= ntohs(ETHERTYPE_IP
) ||
146 ih
->ip_hl
!= (sizeof (*ih
) >> 2))) {
154 can_lro_tcpsegment(struct tcphdr
*th
)
156 int olen
= (th
->th_off
<< 2) - sizeof (*th
);
157 u8 control_bits
= *((u8
*)th
+ 13);
159 if (__predict_false((control_bits
& 0xB7) != 0x10))
163 uint32_t *ptr
= (u32
*)(th
+ 1);
164 if (__predict_false(olen
!= TCPOLEN_TSTAMP_APPA
||
165 *ptr
!= ntohl((TCPOPT_NOP
<< 24) |
167 (TCPOPT_TIMESTAMP
<< 8) |
179 lro_new_session_init(struct t3_lro_session
*s
, struct mbuf
*m
)
181 struct ip
*ih
= (struct ip
*)(mtod(m
, uint8_t *) + IPH_OFFSET
);
182 struct tcphdr
*th
= (struct tcphdr
*) (ih
+ 1);
183 int ip_len
= ntohs(ih
->ip_len
);
185 DPRINTF("%s(s=%p, m=%p)\n", __func__
, s
, m
);
189 MBUF_HEADER_CHECK(m
);
191 s
->seq
= ntohl(th
->th_seq
) + ip_len
- sizeof(*ih
) - (th
->th_off
<< 2);
196 lro_flush_session(struct sge_qset
*qs
, struct t3_lro_session
*s
, struct mbuf
*m
)
198 struct lro_state
*l
= &qs
->lro
;
199 struct mbuf
*sm
= s
->head
;
200 struct ip
*ih
= (struct ip
*)(mtod(sm
, uint8_t *) + IPH_OFFSET
);
203 DPRINTF("%s(qs=%p, s=%p, ", __func__
,
207 DPRINTF("m=%p)\n", m
);
209 DPRINTF("m=NULL)\n");
211 ih
->ip_len
= htons(s
->ip_len
);
213 ih
->ip_sum
= in_cksum_hdr(ih
);
215 MBUF_HEADER_CHECK(sm
);
217 sm
->m_flags
|= M_LRO
;
218 t3_rx_eth(qs
->port
->adapter
, &qs
->rspq
, sm
, 2);
222 lro_new_session_init(s
, m
);
228 qs
->port_stats
[SGE_PSTATS_LRO_FLUSHED
]++;
231 static __inline
struct t3_lro_session
*
232 lro_new_session(struct sge_qset
*qs
, struct mbuf
*m
, uint32_t rss_hash
)
234 struct lro_state
*l
= &qs
->lro
;
235 int idx
= LRO_SESSION_IDX_HINT_HASH(rss_hash
);
236 struct t3_lro_session
*s
= &l
->sess
[idx
];
238 DPRINTF("%s(qs=%p, m=%p, rss_hash=0x%x)\n", __func__
,
241 if (__predict_true(!s
->head
))
244 if (l
->nactive
> MAX_LRO_SES
)
245 panic("MAX_LRO_PER_QSET exceeded");
247 if (l
->nactive
== MAX_LRO_SES
) {
248 lro_flush_session(qs
, s
, m
);
249 qs
->port_stats
[SGE_PSTATS_LRO_X_STREAMS
]++;
260 lro_new_session_init(s
, m
);
267 lro_update_session(struct t3_lro_session
*s
, struct mbuf
*m
)
269 struct mbuf
*sm
= s
->head
;
270 struct cpl_rx_pkt
*cpl
= (struct cpl_rx_pkt
*)(mtod(sm
, uint8_t *) + 2);
271 struct cpl_rx_pkt
*ncpl
= (struct cpl_rx_pkt
*)(mtod(m
, uint8_t *) + 2);
272 struct ip
*nih
= (struct ip
*)(mtod(m
, uint8_t *) + IPH_OFFSET
);
273 struct tcphdr
*th
, *nth
= (struct tcphdr
*)(nih
+ 1);
274 uint32_t seq
= ntohl(nth
->th_seq
);
275 int plen
, tcpiphlen
, olen
= (nth
->th_off
<< 2) - sizeof (*nth
);
278 DPRINTF("%s(s=%p, m=%p)\n", __func__
, s
, m
);
279 if (cpl
->vlan_valid
&& cpl
->vlan
!= ncpl
->vlan
) {
282 if (__predict_false(seq
!= s
->seq
)) {
283 DPRINTF("sequence mismatch\n");
287 MBUF_HEADER_CHECK(sm
);
288 th
= (struct tcphdr
*)(mtod(sm
, uint8_t *) + IPH_OFFSET
+ sizeof (struct ip
));
291 uint32_t *ptr
= (uint32_t *)(th
+ 1);
292 uint32_t *nptr
= (uint32_t *)(nth
+ 1);
294 if (__predict_false(ntohl(*(ptr
+ 1)) > ntohl(*(nptr
+ 1)) ||
298 *(ptr
+ 1) = *(nptr
+ 1);
299 *(ptr
+ 2) = *(nptr
+ 2);
301 th
->th_ack
= nth
->th_ack
;
302 th
->th_win
= nth
->th_win
;
304 tcpiphlen
= (nth
->th_off
<< 2) + sizeof (*nih
);
305 plen
= ntohs(nih
->ip_len
) - tcpiphlen
;
308 sm
->m_pkthdr
.len
+= plen
;
317 /* XXX this I *do not* understand */
318 if (plen
> skb_shinfo(s
->skb
)->gso_size
)
319 skb_shinfo(s
->skb
)->gso_size
= plen
;
321 #if __FreeBSD_version > 700000
322 if (plen
> sm
->m_pkthdr
.tso_segsz
)
323 sm
->m_pkthdr
.tso_segsz
= plen
;
325 DPRINTF("m_adj(%d)\n", (int)(IPH_OFFSET
+ tcpiphlen
));
326 m_adj(m
, IPH_OFFSET
+ tcpiphlen
);
328 if (__predict_false(!skb_shinfo(s
->skb
)->frag_list
))
329 skb_shinfo(s
->skb
)->frag_list
= skb
;
336 * XXX we really need to be able to
337 * support vectors of buffers in FreeBSD
339 int nr
= skb_shinfo(s
->skb
)->nr_frags
;
340 skb_shinfo(s
->skb
)->frags
[nr
].page
= frag
->page
;
341 skb_shinfo(s
->skb
)->frags
[nr
].page_offset
=
342 frag
->page_offset
+ IPH_OFFSET
+ tcpiphlen
;
343 skb_shinfo(s
->skb
)->frags
[nr
].size
= plen
;
344 skb_shinfo(s
->skb
)->nr_frags
= ++nr
;
351 t3_rx_eth_lro(adapter_t
*adap
, struct sge_rspq
*rq
, struct mbuf
*m
,
352 int ethpad
, uint32_t rss_hash
, uint32_t rss_csum
, int lro
)
354 struct sge_qset
*qs
= rspq_to_qset(rq
);
355 struct cpl_rx_pkt
*cpl
= (struct cpl_rx_pkt
*)(mtod(m
, uint8_t *) + ethpad
);
356 struct ether_header
*eh
= (struct ether_header
*)(cpl
+ 1);
359 struct t3_lro_session
*s
= NULL
;
364 if (!can_lro_packet(cpl
, rss_csum
))
367 ih
= (struct ip
*)(eh
+ 1);
368 th
= (struct tcphdr
*)(ih
+ 1);
370 s
= lro_lookup(&qs
->lro
,
371 LRO_SESSION_IDX_HINT_HASH(rss_hash
), ih
, th
);
373 if (__predict_false(!can_lro_tcpsegment(th
))) {
375 } else if (__predict_false(!s
)) {
376 s
= lro_new_session(qs
, m
, rss_hash
);
378 if (lro_update_session(s
, m
)) {
379 lro_flush_session(qs
, s
, m
);
382 if (__predict_false(s
->head
->m_pkthdr
.len
+ pi
->ifp
->if_mtu
> 65535)) {
383 lro_flush_session(qs
, s
, NULL
);
388 qs
->port_stats
[SGE_PSTATS_LRO_QUEUED
]++;
392 lro_flush_session(qs
, s
, NULL
);
394 if (m
->m_len
== 0 || m
->m_pkthdr
.len
== 0 || (m
->m_flags
& M_PKTHDR
) == 0)
395 DPRINTF("rx_eth_lro mbuf len=%d pktlen=%d flags=0x%x\n",
396 m
->m_len
, m
->m_pkthdr
.len
, m
->m_flags
);
398 t3_rx_eth(adap
, rq
, m
, ethpad
);
402 t3_lro_flush(adapter_t
*adap
, struct sge_qset
*qs
, struct lro_state
*state
)
404 unsigned int idx
= state
->active_idx
;
406 while (state
->nactive
) {
407 struct t3_lro_session
*s
= &state
->sess
[idx
];
410 lro_flush_session(qs
, s
, NULL
);