4 * Transmission Control Protocol, incoming traffic
6 * The input processing functions of TCP.
8 * These functions are generally called in the order (ip_input() ->) tcp_input() ->
9 * tcp_process() -> tcp_receive() (-> application).
14 * Copyright (c) 2001-2004 Swedish Institute of Computer Science.
15 * All rights reserved.
17 * Redistribution and use in source and binary forms, with or without modification,
18 * are permitted provided that the following conditions are met:
20 * 1. Redistributions of source code must retain the above copyright notice,
21 * this list of conditions and the following disclaimer.
22 * 2. Redistributions in binary form must reproduce the above copyright notice,
23 * this list of conditions and the following disclaimer in the documentation
24 * and/or other materials provided with the distribution.
25 * 3. The name of the author may not be used to endorse or promote products
26 * derived from this software without specific prior written permission.
28 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
29 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
30 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
31 * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
33 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
36 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
39 * This file is part of the lwIP TCP/IP stack.
41 * Author: Adam Dunkels <adam@sics.se>
48 #include "lwip/ip_addr.h"
49 #include "lwip/netif.h"
51 #include "lwip/memp.h"
53 #include "lwip/inet.h"
56 #include "lwip/stats.h"
58 #include "arch/perf.h"
60 /* These variables are global to all functions involved in the input
61 processing of TCP segments. They are set by the tcp_input()
63 static struct tcp_seg inseg
;
64 static struct tcp_hdr
*tcphdr
;
65 static struct ip_hdr
*iphdr
;
66 static u32_t seqno
, ackno
;
70 static u8_t recv_flags
;
71 static struct pbuf
*recv_data
;
73 struct tcp_pcb
*tcp_input_pcb
;
75 /* Forward declarations. */
76 static err_t
tcp_process(struct tcp_pcb
*pcb
);
77 static void tcp_receive(struct tcp_pcb
*pcb
);
78 static void tcp_parseopt(struct tcp_pcb
*pcb
);
80 static err_t
tcp_listen_input(struct tcp_pcb_listen
*pcb
);
81 static err_t
tcp_timewait_input(struct tcp_pcb
*pcb
);
86 * The initial input processing of TCP. It verifies the TCP header, demultiplexes
87 * the segment between the PCBs and passes it on to tcp_process(), which implements
88 * the TCP finite state machine. This function is called by the IP layer (in
93 tcp_input(struct pbuf
*p
, struct netif
*inp
)
95 struct tcp_pcb
*pcb
, *prev
;
96 struct tcp_pcb_listen
*lpcb
;
102 TCP_STATS_INC(tcp
.recv
);
105 tcphdr
= (struct tcp_hdr
*)((u8_t
*)p
->payload
+ IPH_HL(iphdr
) * 4);
108 tcp_debug_print(tcphdr
);
111 /* remove header from payload */
112 if (pbuf_header(p
, -((s16_t
)(IPH_HL(iphdr
) * 4))) || (p
->tot_len
< sizeof(struct tcp_hdr
))) {
113 /* drop short packets */
114 LWIP_DEBUGF(TCP_INPUT_DEBUG
, ("tcp_input: short packet (%"U16_F
" bytes) discarded\n", p
->tot_len
));
115 TCP_STATS_INC(tcp
.lenerr
);
116 TCP_STATS_INC(tcp
.drop
);
121 /* Don't even process incoming broadcasts/multicasts. */
122 if (ip_addr_isbroadcast(&(iphdr
->dest
), inp
) ||
123 ip_addr_ismulticast(&(iphdr
->dest
))) {
128 #if CHECKSUM_CHECK_TCP
129 /* Verify TCP checksum. */
130 if (inet_chksum_pseudo(p
, (struct ip_addr
*)&(iphdr
->src
),
131 (struct ip_addr
*)&(iphdr
->dest
),
132 IP_PROTO_TCP
, p
->tot_len
) != 0) {
133 LWIP_DEBUGF(TCP_INPUT_DEBUG
, ("tcp_input: packet discarded due to failing checksum 0x%04"X16_F
"\n",
134 inet_chksum_pseudo(p
, (struct ip_addr
*)&(iphdr
->src
), (struct ip_addr
*)&(iphdr
->dest
),
135 IP_PROTO_TCP
, p
->tot_len
)));
137 tcp_debug_print(tcphdr
);
138 #endif /* TCP_DEBUG */
139 TCP_STATS_INC(tcp
.chkerr
);
140 TCP_STATS_INC(tcp
.drop
);
147 /* Move the payload pointer in the pbuf so that it points to the
148 TCP data instead of the TCP header. */
149 hdrlen
= TCPH_HDRLEN(tcphdr
);
150 pbuf_header(p
, -(hdrlen
* 4));
152 /* Convert fields in TCP header to host byte order. */
153 tcphdr
->src
= ntohs(tcphdr
->src
);
154 tcphdr
->dest
= ntohs(tcphdr
->dest
);
155 seqno
= tcphdr
->seqno
= ntohl(tcphdr
->seqno
);
156 ackno
= tcphdr
->ackno
= ntohl(tcphdr
->ackno
);
157 tcphdr
->wnd
= ntohs(tcphdr
->wnd
);
159 flags
= TCPH_FLAGS(tcphdr
) & TCP_FLAGS
;
160 tcplen
= p
->tot_len
+ ((flags
& TCP_FIN
|| flags
& TCP_SYN
)? 1: 0);
162 /* Demultiplex an incoming segment. First, we check if it is destined
163 for an active connection. */
167 for(pcb
= tcp_active_pcbs
; pcb
!= NULL
; pcb
= pcb
->next
) {
168 LWIP_ASSERT("tcp_input: active pcb->state != CLOSED", pcb
->state
!= CLOSED
);
169 LWIP_ASSERT("tcp_input: active pcb->state != TIME-WAIT", pcb
->state
!= TIME_WAIT
);
170 LWIP_ASSERT("tcp_input: active pcb->state != LISTEN", pcb
->state
!= LISTEN
);
171 if (pcb
->remote_port
== tcphdr
->src
&&
172 pcb
->local_port
== tcphdr
->dest
&&
173 ip_addr_cmp(&(pcb
->remote_ip
), &(iphdr
->src
)) &&
174 ip_addr_cmp(&(pcb
->local_ip
), &(iphdr
->dest
))) {
176 /* Move this PCB to the front of the list so that subsequent
177 lookups will be faster (we exploit locality in TCP segment
179 LWIP_ASSERT("tcp_input: pcb->next != pcb (before cache)", pcb
->next
!= pcb
);
181 prev
->next
= pcb
->next
;
182 pcb
->next
= tcp_active_pcbs
;
183 tcp_active_pcbs
= pcb
;
185 LWIP_ASSERT("tcp_input: pcb->next != pcb (after cache)", pcb
->next
!= pcb
);
192 /* If it did not go to an active connection, we check the connections
193 in the TIME-WAIT state. */
195 for(pcb
= tcp_tw_pcbs
; pcb
!= NULL
; pcb
= pcb
->next
) {
196 LWIP_ASSERT("tcp_input: TIME-WAIT pcb->state == TIME-WAIT", pcb
->state
== TIME_WAIT
);
197 if (pcb
->remote_port
== tcphdr
->src
&&
198 pcb
->local_port
== tcphdr
->dest
&&
199 ip_addr_cmp(&(pcb
->remote_ip
), &(iphdr
->src
)) &&
200 ip_addr_cmp(&(pcb
->local_ip
), &(iphdr
->dest
))) {
201 /* We don't really care enough to move this PCB to the front
202 of the list since we are not very likely to receive that
203 many segments for connections in TIME-WAIT. */
204 LWIP_DEBUGF(TCP_INPUT_DEBUG
, ("tcp_input: packed for TIME_WAITing connection.\n"));
205 tcp_timewait_input(pcb
);
211 /* Finally, if we still did not get a match, we check all PCBs that
212 are LISTENing for incoming connections. */
214 for(lpcb
= tcp_listen_pcbs
.listen_pcbs
; lpcb
!= NULL
; lpcb
= lpcb
->next
) {
215 if ((ip_addr_isany(&(lpcb
->local_ip
)) ||
216 ip_addr_cmp(&(lpcb
->local_ip
), &(iphdr
->dest
))) &&
217 lpcb
->local_port
== tcphdr
->dest
) {
218 /* Move this PCB to the front of the list so that subsequent
219 lookups will be faster (we exploit locality in TCP segment
222 ((struct tcp_pcb_listen
*)prev
)->next
= lpcb
->next
;
223 /* our successor is the remainder of the listening list */
224 lpcb
->next
= tcp_listen_pcbs
.listen_pcbs
;
225 /* put this listening pcb at the head of the listening list */
226 tcp_listen_pcbs
.listen_pcbs
= lpcb
;
229 LWIP_DEBUGF(TCP_INPUT_DEBUG
, ("tcp_input: packed for LISTENing connection.\n"));
230 tcp_listen_input(lpcb
);
234 prev
= (struct tcp_pcb
*)lpcb
;
239 LWIP_DEBUGF(TCP_INPUT_DEBUG
, ("+-+-+-+-+-+-+-+-+-+-+-+-+-+- tcp_input: flags "));
240 tcp_debug_print_flags(TCPH_FLAGS(tcphdr
));
241 LWIP_DEBUGF(TCP_INPUT_DEBUG
, ("-+-+-+-+-+-+-+-+-+-+-+-+-+-+\n"));
242 #endif /* TCP_INPUT_DEBUG */
246 /* The incoming segment belongs to a connection. */
249 tcp_debug_print_state(pcb
->state
);
250 #endif /* TCP_DEBUG */
251 #endif /* TCP_INPUT_DEBUG */
253 /* Set up a tcp_seg structure. */
255 inseg
.len
= p
->tot_len
;
256 inseg
.dataptr
= p
->payload
;
258 inseg
.tcphdr
= tcphdr
;
264 err
= tcp_process(pcb
);
265 tcp_input_pcb
= NULL
;
266 /* A return value of ERR_ABRT means that tcp_abort() was called
267 and that the pcb has been freed. If so, we don't do anything. */
268 if (err
!= ERR_ABRT
) {
269 if (recv_flags
& TF_RESET
) {
270 /* TF_RESET means that the connection was reset by the other
271 end. We then call the error callback to inform the
272 application that the connection is dead before we
273 deallocate the PCB. */
274 TCP_EVENT_ERR(pcb
->errf
, pcb
->callback_arg
, ERR_RST
);
275 tcp_pcb_remove(&tcp_active_pcbs
, pcb
);
276 memp_free(MEMP_TCP_PCB
, pcb
);
277 } else if (recv_flags
& TF_CLOSED
) {
278 /* The connection has been closed and we will deallocate the
280 tcp_pcb_remove(&tcp_active_pcbs
, pcb
);
281 memp_free(MEMP_TCP_PCB
, pcb
);
284 /* If the application has registered a "sent" function to be
285 called when new send buffer space is available, we call it
287 if (pcb
->acked
> 0) {
288 TCP_EVENT_SENT(pcb
, pcb
->acked
, err
);
291 if (recv_data
!= NULL
) {
292 /* Notify application that data has been received. */
293 TCP_EVENT_RECV(pcb
, recv_data
, ERR_OK
, err
);
296 /* If a FIN segment was received, we call the callback
297 function with a NULL buffer to indicate EOF. */
298 if (recv_flags
& TF_GOT_FIN
) {
299 TCP_EVENT_RECV(pcb
, NULL
, ERR_OK
, err
);
301 /* If there were no errors, we try to send something out. */
309 /* We deallocate the incoming pbuf. If it was buffered by the
310 application, the application should have called pbuf_ref() to
311 increase the reference counter in the pbuf. If so, the buffer
312 isn't actually deallocated by the call to pbuf_free(), only the
313 reference count is decreased. */
314 if (inseg
.p
!= NULL
) pbuf_free(inseg
.p
);
317 tcp_debug_print_state(pcb
->state
);
318 #endif /* TCP_DEBUG */
319 #endif /* TCP_INPUT_DEBUG */
323 /* If no matching PCB was found, send a TCP RST (reset) to the
325 LWIP_DEBUGF(TCP_RST_DEBUG
, ("tcp_input: no PCB match found, resetting.\n"));
326 if (!(TCPH_FLAGS(tcphdr
) & TCP_RST
)) {
327 TCP_STATS_INC(tcp
.proterr
);
328 TCP_STATS_INC(tcp
.drop
);
329 tcp_rst(ackno
, seqno
+ tcplen
,
330 &(iphdr
->dest
), &(iphdr
->src
),
331 tcphdr
->dest
, tcphdr
->src
);
336 LWIP_ASSERT("tcp_input: tcp_pcbs_sane()", tcp_pcbs_sane());
337 PERF_STOP("tcp_input");
340 /* tcp_listen_input():
342 * Called by tcp_input() when a segment arrives for a listening
347 tcp_listen_input(struct tcp_pcb_listen
*pcb
)
349 struct tcp_pcb
*npcb
;
352 /* In the LISTEN state, we check for incoming SYN segments,
353 creates a new PCB, and responds with a SYN|ACK. */
354 if (flags
& TCP_ACK
) {
355 /* For incoming segments with the ACK flag set, respond with a
357 LWIP_DEBUGF(TCP_RST_DEBUG
, ("tcp_listen_input: ACK in LISTEN, sending reset\n"));
358 tcp_rst(ackno
+ 1, seqno
+ tcplen
,
359 &(iphdr
->dest
), &(iphdr
->src
),
360 tcphdr
->dest
, tcphdr
->src
);
361 } else if (flags
& TCP_SYN
) {
362 LWIP_DEBUGF(TCP_DEBUG
, ("TCP connection request %"U16_F
" -> %"U16_F
".\n", tcphdr
->src
, tcphdr
->dest
));
363 npcb
= tcp_alloc(pcb
->prio
);
364 /* If a new PCB could not be created (probably due to lack of memory),
365 we don't do anything, but rely on the sender will retransmit the
366 SYN at a time when we have more memory available. */
368 LWIP_DEBUGF(TCP_DEBUG
, ("tcp_listen_input: could not allocate PCB\n"));
369 TCP_STATS_INC(tcp
.memerr
);
372 /* Set up the new PCB. */
373 ip_addr_set(&(npcb
->local_ip
), &(iphdr
->dest
));
374 npcb
->local_port
= pcb
->local_port
;
375 ip_addr_set(&(npcb
->remote_ip
), &(iphdr
->src
));
376 npcb
->remote_port
= tcphdr
->src
;
377 npcb
->state
= SYN_RCVD
;
378 npcb
->rcv_nxt
= seqno
+ 1;
379 npcb
->snd_wnd
= tcphdr
->wnd
;
380 npcb
->ssthresh
= npcb
->snd_wnd
;
381 npcb
->snd_wl1
= seqno
- 1;/* initialise to seqno-1 to force window update */
382 npcb
->callback_arg
= pcb
->callback_arg
;
383 #if LWIP_CALLBACK_API
384 npcb
->accept
= pcb
->accept
;
385 #endif /* LWIP_CALLBACK_API */
386 /* inherit socket options */
387 npcb
->so_options
= pcb
->so_options
& (SOF_DEBUG
|SOF_DONTROUTE
|SOF_KEEPALIVE
|SOF_OOBINLINE
|SOF_LINGER
);
388 /* Register the new PCB so that we can begin receiving segments
390 TCP_REG(&tcp_active_pcbs
, npcb
);
392 /* Parse any options in the SYN. */
395 /* Build an MSS option. */
396 optdata
= htonl(((u32_t
)2 << 24) |
398 (((u32_t
)npcb
->mss
/ 256) << 8) |
400 /* Send a SYN|ACK together with the MSS option. */
401 tcp_enqueue(npcb
, NULL
, 0, TCP_SYN
| TCP_ACK
, 0, (u8_t
*)&optdata
, 4);
402 return tcp_output(npcb
);
407 /* tcp_timewait_input():
409 * Called by tcp_input() when a segment arrives for a connection in
414 tcp_timewait_input(struct tcp_pcb
*pcb
)
416 if (TCP_SEQ_GT(seqno
+ tcplen
, pcb
->rcv_nxt
)) {
417 pcb
->rcv_nxt
= seqno
+ tcplen
;
422 return tcp_output(pcb
);
427 * Implements the TCP state machine. Called by tcp_input. In some
428 * states tcp_receive() is called to receive data. The tcp_seg
429 * argument will be freed by the caller (tcp_input()) unless the
430 * recv_data pointer in the pcb is set.
434 tcp_process(struct tcp_pcb
*pcb
)
436 struct tcp_seg
*rseg
;
443 /* Process incoming RST segments. */
444 if (flags
& TCP_RST
) {
445 /* First, determine if the reset is acceptable. */
446 if (pcb
->state
== SYN_SENT
) {
447 if (ackno
== pcb
->snd_nxt
) {
451 /*if (TCP_SEQ_GEQ(seqno, pcb->rcv_nxt) &&
452 TCP_SEQ_LEQ(seqno, pcb->rcv_nxt + pcb->rcv_wnd)) {
454 if (TCP_SEQ_BETWEEN(seqno
, pcb
->rcv_nxt
, pcb
->rcv_nxt
+pcb
->rcv_wnd
)) {
460 LWIP_DEBUGF(TCP_INPUT_DEBUG
, ("tcp_process: Connection RESET\n"));
461 LWIP_ASSERT("tcp_input: pcb->state != CLOSED", pcb
->state
!= CLOSED
);
462 recv_flags
= TF_RESET
;
463 pcb
->flags
&= ~TF_ACK_DELAY
;
466 LWIP_DEBUGF(TCP_INPUT_DEBUG
, ("tcp_process: unacceptable reset seqno %"U32_F
" rcv_nxt %"U32_F
"\n",
467 seqno
, pcb
->rcv_nxt
));
468 LWIP_DEBUGF(TCP_DEBUG
, ("tcp_process: unacceptable reset seqno %"U32_F
" rcv_nxt %"U32_F
"\n",
469 seqno
, pcb
->rcv_nxt
));
474 /* Update the PCB (in)activity timer. */
475 pcb
->tmr
= tcp_ticks
;
478 /* Do different things depending on the TCP state. */
479 switch (pcb
->state
) {
481 LWIP_DEBUGF(TCP_INPUT_DEBUG
, ("SYN-SENT: ackno %"U32_F
" pcb->snd_nxt %"U32_F
" unacked %"U32_F
"\n", ackno
,
482 pcb
->snd_nxt
, ntohl(pcb
->unacked
->tcphdr
->seqno
)));
483 /* received SYN ACK with expected sequence number? */
484 if ((flags
& TCP_ACK
) && (flags
& TCP_SYN
)
485 && ackno
== ntohl(pcb
->unacked
->tcphdr
->seqno
) + 1) {
487 pcb
->rcv_nxt
= seqno
+ 1;
488 pcb
->lastack
= ackno
;
489 pcb
->snd_wnd
= tcphdr
->wnd
;
490 pcb
->snd_wl1
= seqno
- 1; /* initialise to seqno - 1 to force window update */
491 pcb
->state
= ESTABLISHED
;
492 pcb
->cwnd
= pcb
->mss
;
494 LWIP_DEBUGF(TCP_QLEN_DEBUG
, ("tcp_process: SYN-SENT --queuelen %"U16_F
"\n", (u16_t
)pcb
->snd_queuelen
));
496 pcb
->unacked
= rseg
->next
;
499 /* Parse any options in the SYNACK. */
502 /* Call the user specified function to call when sucessfully
504 TCP_EVENT_CONNECTED(pcb
, ERR_OK
, err
);
507 /* received ACK? possibly a half-open connection */
508 else if (flags
& TCP_ACK
) {
509 /* send a RST to bring the other side in a non-synchronized state. */
510 tcp_rst(ackno
, seqno
+ tcplen
, &(iphdr
->dest
), &(iphdr
->src
),
511 tcphdr
->dest
, tcphdr
->src
);
515 if (flags
& TCP_ACK
&&
516 !(flags
& TCP_RST
)) {
517 /* expected ACK number? */
518 if (TCP_SEQ_BETWEEN(ackno
, pcb
->lastack
+1, pcb
->snd_nxt
)) {
519 pcb
->state
= ESTABLISHED
;
520 LWIP_DEBUGF(TCP_DEBUG
, ("TCP connection established %"U16_F
" -> %"U16_F
".\n", inseg
.tcphdr
->src
, inseg
.tcphdr
->dest
));
521 #if LWIP_CALLBACK_API
522 LWIP_ASSERT("pcb->accept != NULL", pcb
->accept
!= NULL
);
524 /* Call the accept function. */
525 TCP_EVENT_ACCEPT(pcb
, ERR_OK
, err
);
527 /* If the accept function returns with an error, we abort
532 /* If there was any data contained within this ACK,
533 * we'd better pass it on to the application as well. */
535 pcb
->cwnd
= pcb
->mss
;
537 /* incorrect ACK number */
540 tcp_rst(ackno
, seqno
+ tcplen
, &(iphdr
->dest
), &(iphdr
->src
),
541 tcphdr
->dest
, tcphdr
->src
);
549 if (flags
& TCP_FIN
) {
551 pcb
->state
= CLOSE_WAIT
;
556 if (flags
& TCP_FIN
) {
557 if (flags
& TCP_ACK
&& ackno
== pcb
->snd_nxt
) {
558 LWIP_DEBUGF(TCP_DEBUG
,
559 ("TCP connection closed %"U16_F
" -> %"U16_F
".\n", inseg
.tcphdr
->src
, inseg
.tcphdr
->dest
));
562 TCP_RMV(&tcp_active_pcbs
, pcb
);
563 pcb
->state
= TIME_WAIT
;
564 TCP_REG(&tcp_tw_pcbs
, pcb
);
567 pcb
->state
= CLOSING
;
569 } else if (flags
& TCP_ACK
&& ackno
== pcb
->snd_nxt
) {
570 pcb
->state
= FIN_WAIT_2
;
575 if (flags
& TCP_FIN
) {
576 LWIP_DEBUGF(TCP_DEBUG
, ("TCP connection closed %"U16_F
" -> %"U16_F
".\n", inseg
.tcphdr
->src
, inseg
.tcphdr
->dest
));
579 TCP_RMV(&tcp_active_pcbs
, pcb
);
580 pcb
->state
= TIME_WAIT
;
581 TCP_REG(&tcp_tw_pcbs
, pcb
);
586 if (flags
& TCP_ACK
&& ackno
== pcb
->snd_nxt
) {
587 LWIP_DEBUGF(TCP_DEBUG
, ("TCP connection closed %"U16_F
" -> %"U16_F
".\n", inseg
.tcphdr
->src
, inseg
.tcphdr
->dest
));
590 TCP_RMV(&tcp_active_pcbs
, pcb
);
591 pcb
->state
= TIME_WAIT
;
592 TCP_REG(&tcp_tw_pcbs
, pcb
);
597 if (flags
& TCP_ACK
&& ackno
== pcb
->snd_nxt
) {
598 LWIP_DEBUGF(TCP_DEBUG
, ("TCP connection closed %"U16_F
" -> %"U16_F
".\n", inseg
.tcphdr
->src
, inseg
.tcphdr
->dest
));
600 recv_flags
= TF_CLOSED
;
611 * Called by tcp_process. Checks if the given segment is an ACK for outstanding
612 * data, and if so frees the memory of the buffered data. Next, is places the
613 * segment on any of the receive queues (pcb->recved or pcb->ooseq). If the segment
614 * is buffered, the pbuf is referenced by pbuf_ref so that it will not be freed until
615 * i it has been removed from the buffer.
617 * If the incoming segment constitutes an ACK for a segment that was used for RTT
618 * estimation, the RTT is estimated here as well.
622 tcp_receive(struct tcp_pcb
*pcb
)
624 struct tcp_seg
*next
;
626 struct tcp_seg
*prev
, *cseg
;
631 u32_t right_wnd_edge
;
635 if (flags
& TCP_ACK
) {
636 right_wnd_edge
= pcb
->snd_wnd
+ pcb
->snd_wl1
;
639 if (TCP_SEQ_LT(pcb
->snd_wl1
, seqno
) ||
640 (pcb
->snd_wl1
== seqno
&& TCP_SEQ_LT(pcb
->snd_wl2
, ackno
)) ||
641 (pcb
->snd_wl2
== ackno
&& tcphdr
->wnd
> pcb
->snd_wnd
)) {
642 pcb
->snd_wnd
= tcphdr
->wnd
;
643 pcb
->snd_wl1
= seqno
;
644 pcb
->snd_wl2
= ackno
;
645 LWIP_DEBUGF(TCP_WND_DEBUG
, ("tcp_receive: window update %"U32_F
"\n", pcb
->snd_wnd
));
648 if (pcb
->snd_wnd
!= tcphdr
->wnd
) {
649 LWIP_DEBUGF(TCP_WND_DEBUG
, ("tcp_receive: no window update lastack %"U32_F
" snd_max %"U32_F
" ackno %"U32_F
" wl1 %"U32_F
" seqno %"U32_F
" wl2 %"U32_F
"\n",
650 pcb
->lastack
, pcb
->snd_max
, ackno
, pcb
->snd_wl1
, seqno
, pcb
->snd_wl2
));
652 #endif /* TCP_WND_DEBUG */
656 if (pcb
->lastack
== ackno
) {
659 if (pcb
->snd_wl1
+ pcb
->snd_wnd
== right_wnd_edge
){
661 if (pcb
->dupacks
>= 3 && pcb
->unacked
!= NULL
) {
662 if (!(pcb
->flags
& TF_INFR
)) {
663 /* This is fast retransmit. Retransmit the first unacked segment. */
664 LWIP_DEBUGF(TCP_FR_DEBUG
, ("tcp_receive: dupacks %"U16_F
" (%"U32_F
"), fast retransmit %"U32_F
"\n",
665 (u16_t
)pcb
->dupacks
, pcb
->lastack
,
666 ntohl(pcb
->unacked
->tcphdr
->seqno
)));
668 /* Set ssthresh to max (FlightSize / 2, 2*SMSS) */
669 /*pcb->ssthresh = LWIP_MAX((pcb->snd_max -
672 /* Set ssthresh to half of the minimum of the currenct cwnd and the advertised window */
673 if(pcb
->cwnd
> pcb
->snd_wnd
)
674 pcb
->ssthresh
= pcb
->snd_wnd
/ 2;
676 pcb
->ssthresh
= pcb
->cwnd
/ 2;
678 pcb
->cwnd
= pcb
->ssthresh
+ 3 * pcb
->mss
;
679 pcb
->flags
|= TF_INFR
;
681 /* Inflate the congestion window, but not if it means that
682 the value overflows. */
683 if ((u16_t
)(pcb
->cwnd
+ pcb
->mss
) > pcb
->cwnd
) {
684 pcb
->cwnd
+= pcb
->mss
;
689 LWIP_DEBUGF(TCP_FR_DEBUG
, ("tcp_receive: dupack averted %"U32_F
" %"U32_F
"\n",
690 pcb
->snd_wl1
+ pcb
->snd_wnd
, right_wnd_edge
));
693 /*if (TCP_SEQ_LT(pcb->lastack, ackno) &&
694 TCP_SEQ_LEQ(ackno, pcb->snd_max)) { */
695 if(TCP_SEQ_BETWEEN(ackno
, pcb
->lastack
+1, pcb
->snd_max
)){
696 /* We come here when the ACK acknowledges new data. */
698 /* Reset the "IN Fast Retransmit" flag, since we are no longer
699 in fast retransmit. Also reset the congestion window to the
700 slow start threshold. */
701 if (pcb
->flags
& TF_INFR
) {
702 pcb
->flags
&= ~TF_INFR
;
703 pcb
->cwnd
= pcb
->ssthresh
;
706 /* Reset the number of retransmissions. */
709 /* Reset the retransmission time-out. */
710 pcb
->rto
= (pcb
->sa
>> 3) + pcb
->sv
;
712 /* Update the send buffer space. */
713 pcb
->acked
= ackno
- pcb
->lastack
;
715 pcb
->snd_buf
+= pcb
->acked
;
717 /* Reset the fast retransmit variables. */
719 pcb
->lastack
= ackno
;
721 /* Update the congestion control variables (cwnd and
723 if (pcb
->state
>= ESTABLISHED
) {
724 if (pcb
->cwnd
< pcb
->ssthresh
) {
725 if ((u16_t
)(pcb
->cwnd
+ pcb
->mss
) > pcb
->cwnd
) {
726 pcb
->cwnd
+= pcb
->mss
;
728 LWIP_DEBUGF(TCP_CWND_DEBUG
, ("tcp_receive: slow start cwnd %"U16_F
"\n", pcb
->cwnd
));
730 u16_t new_cwnd
= (pcb
->cwnd
+ pcb
->mss
* pcb
->mss
/ pcb
->cwnd
);
731 if (new_cwnd
> pcb
->cwnd
) {
732 pcb
->cwnd
= new_cwnd
;
734 LWIP_DEBUGF(TCP_CWND_DEBUG
, ("tcp_receive: congestion avoidance cwnd %"U16_F
"\n", pcb
->cwnd
));
737 LWIP_DEBUGF(TCP_INPUT_DEBUG
, ("tcp_receive: ACK for %"U32_F
", unacked->seqno %"U32_F
":%"U32_F
"\n",
739 pcb
->unacked
!= NULL
?
740 ntohl(pcb
->unacked
->tcphdr
->seqno
): 0,
741 pcb
->unacked
!= NULL
?
742 ntohl(pcb
->unacked
->tcphdr
->seqno
) + TCP_TCPLEN(pcb
->unacked
): 0));
744 /* Remove segment from the unacknowledged list if the incoming
745 ACK acknowlegdes them. */
746 while (pcb
->unacked
!= NULL
&&
747 TCP_SEQ_LEQ(ntohl(pcb
->unacked
->tcphdr
->seqno
) +
748 TCP_TCPLEN(pcb
->unacked
), ackno
)) {
749 LWIP_DEBUGF(TCP_INPUT_DEBUG
, ("tcp_receive: removing %"U32_F
":%"U32_F
" from pcb->unacked\n",
750 ntohl(pcb
->unacked
->tcphdr
->seqno
),
751 ntohl(pcb
->unacked
->tcphdr
->seqno
) +
752 TCP_TCPLEN(pcb
->unacked
)));
755 pcb
->unacked
= pcb
->unacked
->next
;
757 LWIP_DEBUGF(TCP_QLEN_DEBUG
, ("tcp_receive: queuelen %"U16_F
" ... ", (u16_t
)pcb
->snd_queuelen
));
758 pcb
->snd_queuelen
-= pbuf_clen(next
->p
);
761 LWIP_DEBUGF(TCP_QLEN_DEBUG
, ("%"U16_F
" (after freeing unacked)\n", (u16_t
)pcb
->snd_queuelen
));
762 if (pcb
->snd_queuelen
!= 0) {
763 LWIP_ASSERT("tcp_receive: valid queue length", pcb
->unacked
!= NULL
||
764 pcb
->unsent
!= NULL
);
770 /* We go through the ->unsent list to see if any of the segments
771 on the list are acknowledged by the ACK. This may seem
772 strange since an "unsent" segment shouldn't be acked. The
773 rationale is that lwIP puts all outstanding segments on the
774 ->unsent list after a retransmission, so these segments may
775 in fact have been sent once. */
776 while (pcb
->unsent
!= NULL
&&
777 /*TCP_SEQ_LEQ(ntohl(pcb->unsent->tcphdr->seqno) + TCP_TCPLEN(pcb->unsent), ackno) &&
778 TCP_SEQ_LEQ(ackno, pcb->snd_max)*/
779 TCP_SEQ_BETWEEN(ackno
, ntohl(pcb
->unsent
->tcphdr
->seqno
) + TCP_TCPLEN(pcb
->unsent
), pcb
->snd_max
)
781 LWIP_DEBUGF(TCP_INPUT_DEBUG
, ("tcp_receive: removing %"U32_F
":%"U32_F
" from pcb->unsent\n",
782 ntohl(pcb
->unsent
->tcphdr
->seqno
), ntohl(pcb
->unsent
->tcphdr
->seqno
) +
783 TCP_TCPLEN(pcb
->unsent
)));
786 pcb
->unsent
= pcb
->unsent
->next
;
787 LWIP_DEBUGF(TCP_QLEN_DEBUG
, ("tcp_receive: queuelen %"U16_F
" ... ", (u16_t
)pcb
->snd_queuelen
));
788 pcb
->snd_queuelen
-= pbuf_clen(next
->p
);
790 LWIP_DEBUGF(TCP_QLEN_DEBUG
, ("%"U16_F
" (after freeing unsent)\n", (u16_t
)pcb
->snd_queuelen
));
791 if (pcb
->snd_queuelen
!= 0) {
792 LWIP_ASSERT("tcp_receive: valid queue length",
793 pcb
->unacked
!= NULL
|| pcb
->unsent
!= NULL
);
796 if (pcb
->unsent
!= NULL
) {
797 pcb
->snd_nxt
= htonl(pcb
->unsent
->tcphdr
->seqno
);
800 /* End of ACK for new data processing. */
802 LWIP_DEBUGF(TCP_RTO_DEBUG
, ("tcp_receive: pcb->rttest %"U32_F
" rtseq %"U32_F
" ackno %"U32_F
"\n",
803 pcb
->rttest
, pcb
->rtseq
, ackno
));
805 /* RTT estimation calculations. This is done by checking if the
806 incoming segment acknowledges the segment we use to take a
807 round-trip time measurement. */
808 if (pcb
->rttest
&& TCP_SEQ_LT(pcb
->rtseq
, ackno
)) {
809 m
= tcp_ticks
- pcb
->rttest
;
811 LWIP_DEBUGF(TCP_RTO_DEBUG
, ("tcp_receive: experienced rtt %"U16_F
" ticks (%"U16_F
" msec).\n",
812 m
, m
* TCP_SLOW_INTERVAL
));
814 /* This is taken directly from VJs original code in his paper */
815 m
= m
- (pcb
->sa
>> 3);
820 m
= m
- (pcb
->sv
>> 2);
822 pcb
->rto
= (pcb
->sa
>> 3) + pcb
->sv
;
824 LWIP_DEBUGF(TCP_RTO_DEBUG
, ("tcp_receive: RTO %"U16_F
" (%"U16_F
" miliseconds)\n",
825 pcb
->rto
, pcb
->rto
* TCP_SLOW_INTERVAL
));
831 /* If the incoming segment contains data, we must process it
834 /* This code basically does three things:
836 +) If the incoming segment contains data that is the next
837 in-sequence data, this data is passed to the application. This
838 might involve trimming the first edge of the data. The rcv_nxt
839 variable and the advertised window are adjusted.
841 +) If the incoming segment has data that is above the next
842 sequence number expected (->rcv_nxt), the segment is placed on
843 the ->ooseq queue. This is done by finding the appropriate
844 place in the ->ooseq queue (which is ordered by sequence
845 number) and trim the segment in both ends if needed. An
846 immediate ACK is sent to indicate that we received an
847 out-of-sequence segment.
849 +) Finally, we check if the first segment on the ->ooseq queue
850 now is in sequence (i.e., if rcv_nxt >= ooseq->seqno). If
851 rcv_nxt > ooseq->seqno, we must trim the first edge of the
852 segment on ->ooseq before we adjust rcv_nxt. The data in the
853 segments that are now on sequence are chained onto the
854 incoming segment so that we only need to call the application
858 /* First, we check if we must trim the first edge. We have to do
859 this if the sequence number of the incoming segment is less
860 than rcv_nxt, and the sequence number plus the length of the
861 segment is larger than rcv_nxt. */
862 /* if (TCP_SEQ_LT(seqno, pcb->rcv_nxt)){
863 if (TCP_SEQ_LT(pcb->rcv_nxt, seqno + tcplen)) {*/
864 if(TCP_SEQ_BETWEEN(pcb
->rcv_nxt
, seqno
+1, seqno
+tcplen
-1)){
865 /* Trimming the first edge is done by pushing the payload
866 pointer in the pbuf downwards. This is somewhat tricky since
867 we do not want to discard the full contents of the pbuf up to
868 the new starting point of the data since we have to keep the
869 TCP header which is present in the first pbuf in the chain.
871 What is done is really quite a nasty hack: the first pbuf in
872 the pbuf chain is pointed to by inseg.p. Since we need to be
873 able to deallocate the whole pbuf, we cannot change this
874 inseg.p pointer to point to any of the later pbufs in the
875 chain. Instead, we point the ->payload pointer in the first
876 pbuf to data in one of the later pbufs. We also set the
877 inseg.data pointer to point to the right place. This way, the
878 ->p pointer will still point to the first pbuf, but the
879 ->p->payload pointer will point to data in another pbuf.
881 After we are done with adjusting the pbuf pointers we must
882 adjust the ->data pointer in the seg and the segment
885 off
= pcb
->rcv_nxt
- seqno
;
887 if (inseg
.p
->len
< off
) {
888 new_tot_len
= inseg
.p
->tot_len
- off
;
889 while (p
->len
< off
) {
891 /* KJM following line changed (with addition of new_tot_len var)
893 inseg.p->tot_len -= p->len; */
894 p
->tot_len
= new_tot_len
;
898 pbuf_header(p
, -off
);
900 pbuf_header(inseg
.p
, -off
);
902 /* KJM following line changed to use p->payload rather than inseg->p->payload
904 inseg
.dataptr
= p
->payload
;
905 inseg
.len
-= pcb
->rcv_nxt
- seqno
;
906 inseg
.tcphdr
->seqno
= seqno
= pcb
->rcv_nxt
;
909 if(TCP_SEQ_LT(seqno
, pcb
->rcv_nxt
)){
910 /* the whole segment is < rcv_nxt */
911 /* must be a duplicate of a packet that has already been correctly handled */
913 LWIP_DEBUGF(TCP_INPUT_DEBUG
, ("tcp_receive: duplicate seqno %"U32_F
"\n", seqno
));
918 /* The sequence number must be within the window (above rcv_nxt
919 and below rcv_nxt + rcv_wnd) in order to be further
921 /*if (TCP_SEQ_GEQ(seqno, pcb->rcv_nxt) &&
922 TCP_SEQ_LT(seqno, pcb->rcv_nxt + pcb->rcv_wnd)) {*/
923 if(TCP_SEQ_BETWEEN(seqno
, pcb
->rcv_nxt
, pcb
->rcv_nxt
+ pcb
->rcv_wnd
- 1)){
924 if (pcb
->rcv_nxt
== seqno
) {
925 /* The incoming segment is the next in sequence. We check if
926 we have to trim the end of the segment and update rcv_nxt
927 and pass the data to the application. */
929 if (pcb
->ooseq
!= NULL
&&
930 TCP_SEQ_LEQ(pcb
->ooseq
->tcphdr
->seqno
, seqno
+ inseg
.len
)) {
931 /* We have to trim the second edge of the incoming
933 inseg
.len
= pcb
->ooseq
->tcphdr
->seqno
- seqno
;
934 pbuf_realloc(inseg
.p
, inseg
.len
);
936 #endif /* TCP_QUEUE_OOSEQ */
938 tcplen
= TCP_TCPLEN(&inseg
);
940 /* First received FIN will be ACKed +1, on any successive (duplicate)
941 * FINs we are already in CLOSE_WAIT and have already done +1.
943 if (pcb
->state
!= CLOSE_WAIT
) {
944 pcb
->rcv_nxt
+= tcplen
;
947 /* Update the receiver's (our) window. */
948 if (pcb
->rcv_wnd
< tcplen
) {
951 pcb
->rcv_wnd
-= tcplen
;
954 /* If there is data in the segment, we make preparations to
955 pass this up to the application. The ->recv_data variable
956 is used for holding the pbuf that goes to the
957 application. The code for reassembling out-of-sequence data
958 chains its data on this pbuf as well.
960 If the segment was a FIN, we set the TF_GOT_FIN flag that will
961 be used to indicate to the application that the remote side has
962 closed its end of the connection. */
963 if (inseg
.p
->tot_len
> 0) {
965 /* Since this pbuf now is the responsibility of the
966 application, we delete our reference to it so that we won't
967 (mistakingly) deallocate it. */
970 if (TCPH_FLAGS(inseg
.tcphdr
) & TCP_FIN
) {
971 LWIP_DEBUGF(TCP_INPUT_DEBUG
, ("tcp_receive: received FIN.\n"));
972 recv_flags
= TF_GOT_FIN
;
976 /* We now check if we have segments on the ->ooseq queue that
977 is now in sequence. */
978 while (pcb
->ooseq
!= NULL
&&
979 pcb
->ooseq
->tcphdr
->seqno
== pcb
->rcv_nxt
) {
982 seqno
= pcb
->ooseq
->tcphdr
->seqno
;
984 pcb
->rcv_nxt
+= TCP_TCPLEN(cseg
);
985 if (pcb
->rcv_wnd
< TCP_TCPLEN(cseg
)) {
988 pcb
->rcv_wnd
-= TCP_TCPLEN(cseg
);
990 if (cseg
->p
->tot_len
> 0) {
991 /* Chain this pbuf onto the pbuf that we will pass to
994 pbuf_cat(recv_data
, cseg
->p
);
1000 if (TCPH_FLAGS(cseg
->tcphdr
) & TCP_FIN
) {
1001 LWIP_DEBUGF(TCP_INPUT_DEBUG
, ("tcp_receive: dequeued FIN.\n"));
1002 recv_flags
= TF_GOT_FIN
;
1006 pcb
->ooseq
= cseg
->next
;
1009 #endif /* TCP_QUEUE_OOSEQ */
1012 /* Acknowledge the segment(s). */
1016 /* We get here if the incoming segment is out-of-sequence. */
1019 /* We queue the segment on the ->ooseq queue. */
1020 if (pcb
->ooseq
== NULL
) {
1021 pcb
->ooseq
= tcp_seg_copy(&inseg
);
1023 /* If the queue is not empty, we walk through the queue and
1024 try to find a place where the sequence number of the
1025 incoming segment is between the sequence numbers of the
1026 previous and the next segment on the ->ooseq queue. That is
1027 the place where we put the incoming segment. If needed, we
1028 trim the second edges of the previous and the incoming
1029 segment so that it will fit into the sequence.
1031 If the incoming segment has the same sequence number as a
1032 segment on the ->ooseq queue, we discard the segment that
1033 contains less data. */
1036 for(next
= pcb
->ooseq
; next
!= NULL
; next
= next
->next
) {
1037 if (seqno
== next
->tcphdr
->seqno
) {
1038 /* The sequence number of the incoming segment is the
1039 same as the sequence number of the segment on
1040 ->ooseq. We check the lengths to see which one to
1042 if (inseg
.len
> next
->len
) {
1043 /* The incoming segment is larger than the old
1044 segment. We replace the old segment with the new
1046 cseg
= tcp_seg_copy(&inseg
);
1048 cseg
->next
= next
->next
;
1057 /* Either the lenghts are the same or the incoming
1058 segment was smaller than the old one; in either
1059 case, we ditch the incoming segment. */
1064 if (TCP_SEQ_LT(seqno
, next
->tcphdr
->seqno
)) {
1065 /* The sequence number of the incoming segment is lower
1066 than the sequence number of the first segment on the
1067 queue. We put the incoming segment first on the
1070 if (TCP_SEQ_GT(seqno
+ inseg
.len
, next
->tcphdr
->seqno
)) {
1071 /* We need to trim the incoming segment. */
1072 inseg
.len
= next
->tcphdr
->seqno
- seqno
;
1073 pbuf_realloc(inseg
.p
, inseg
.len
);
1075 cseg
= tcp_seg_copy(&inseg
);
1083 /*if (TCP_SEQ_LT(prev->tcphdr->seqno, seqno) &&
1084 TCP_SEQ_LT(seqno, next->tcphdr->seqno)) {*/
1085 if(TCP_SEQ_BETWEEN(seqno
, prev
->tcphdr
->seqno
+1, next
->tcphdr
->seqno
-1)){
1086 /* The sequence number of the incoming segment is in
1087 between the sequence numbers of the previous and
1088 the next segment on ->ooseq. We trim and insert the
1089 incoming segment and trim the previous segment, if
1091 if (TCP_SEQ_GT(seqno
+ inseg
.len
, next
->tcphdr
->seqno
)) {
1092 /* We need to trim the incoming segment. */
1093 inseg
.len
= next
->tcphdr
->seqno
- seqno
;
1094 pbuf_realloc(inseg
.p
, inseg
.len
);
1097 cseg
= tcp_seg_copy(&inseg
);
1101 if (TCP_SEQ_GT(prev
->tcphdr
->seqno
+ prev
->len
, seqno
)) {
1102 /* We need to trim the prev segment. */
1103 prev
->len
= seqno
- prev
->tcphdr
->seqno
;
1104 pbuf_realloc(prev
->p
, prev
->len
);
1109 /* If the "next" segment is the last segment on the
1110 ooseq queue, we add the incoming segment to the end
1112 if (next
->next
== NULL
&&
1113 TCP_SEQ_GT(seqno
, next
->tcphdr
->seqno
)) {
1114 next
->next
= tcp_seg_copy(&inseg
);
1115 if (next
->next
!= NULL
) {
1116 if (TCP_SEQ_GT(next
->tcphdr
->seqno
+ next
->len
, seqno
)) {
1117 /* We need to trim the last segment. */
1118 next
->len
= seqno
- next
->tcphdr
->seqno
;
1119 pbuf_realloc(next
->p
, next
->len
);
1128 #endif /* TCP_QUEUE_OOSEQ */
1132 /*if (TCP_SEQ_GT(pcb->rcv_nxt, seqno) ||
1133 TCP_SEQ_GEQ(seqno, pcb->rcv_nxt + pcb->rcv_wnd)) {*/
1134 if(!TCP_SEQ_BETWEEN(seqno
, pcb
->rcv_nxt
, pcb
->rcv_nxt
+ pcb
->rcv_wnd
-1)){
1139 /* Segments with length 0 is taken care of here. Segments that
1140 fall out of the window are ACKed. */
1141 /*if (TCP_SEQ_GT(pcb->rcv_nxt, seqno) ||
1142 TCP_SEQ_GEQ(seqno, pcb->rcv_nxt + pcb->rcv_wnd)) {*/
1143 if(!TCP_SEQ_BETWEEN(seqno
, pcb
->rcv_nxt
, pcb
->rcv_nxt
+ pcb
->rcv_wnd
-1)){
1152 * Parses the options contained in the incoming segment. (Code taken
1153 * from uIP with only small changes.)
1158 tcp_parseopt(struct tcp_pcb
*pcb
)
1164 opts
= (u8_t
*)tcphdr
+ TCP_HLEN
;
1166 /* Parse the TCP MSS option, if present. */
1167 if(TCPH_HDRLEN(tcphdr
) > 0x5) {
1168 for(c
= 0; c
< (TCPH_HDRLEN(tcphdr
) - 5) << 2 ;) {
1171 /* End of options. */
1173 } else if (opt
== 0x01) {
1176 } else if (opt
== 0x02 &&
1177 opts
[c
+ 1] == 0x04) {
1178 /* An MSS option with the right option length. */
1179 mss
= (opts
[c
+ 2] << 8) | opts
[c
+ 3];
1180 pcb
->mss
= mss
> TCP_MSS
? TCP_MSS
: mss
;
1182 /* And we are done processing options. */
1185 if (opts
[c
+ 1] == 0) {
1186 /* If the length field is zero, the options are malformed
1187 and we don't process them further. */
1190 /* All other options have a length field, so that we easily
1191 can skip past them. */
1197 #endif /* LWIP_TCP */