2 * Copyright 2006-2010, Haiku, Inc. All Rights Reserved.
3 * Distributed under the terms of the MIT License.
6 * Andrew Galante, haiku.galante@gmail.com
7 * Axel Dörfler, axeld@pinc-software.de
8 * Hugo Santos, hugosantos@gmail.com
12 #include "TCPEndpoint.h"
14 #include <netinet/in.h>
15 #include <netinet/ip.h>
16 #include <netinet/tcp.h>
23 #include <KernelExport.h>
26 #include <net_buffer.h>
27 #include <net_datalink.h>
29 #include <NetBufferUtilities.h>
30 #include <NetUtilities.h>
34 #include <util/AutoLock.h>
35 #include <util/list.h>
37 #include "EndpointManager.h"
41 // - RFC 793 - Transmission Control Protocol
42 // - RFC 813 - Window and Acknowledgement Strategy in TCP
43 // - RFC 1337 - TIME_WAIT Assassination Hazards in TCP
45 // Things this implementation currently doesn't implement:
46 // - TCP Slow Start, Congestion Avoidance, Fast Retransmit, and Fast Recovery,
47 // RFC 2001, RFC 2581, RFC 3042
48 // - NewReno Modification to TCP's Fast Recovery, RFC 2582
49 // - Explicit Congestion Notification (ECN), RFC 3168
51 // - SACK, Selective Acknowledgment - RFC 2018, RFC 2883, RFC 3517
52 // - Forward RTO-Recovery, RFC 4138
53 // - Time-Wait hash instead of keeping sockets alive
55 // Things incomplete in this implementation:
56 // - TCP Extensions for High Performance, RFC 1323 - RTTM, PAWS
58 #define PrintAddress(address) \
59 AddressString(Domain(), address, true).Data()
65 // the space before ', ##args' is important in order for this to work with cpp 2.95
66 # define TRACE(format, args...) dprintf("%" B_PRId32 ": TCP [%" \
67 B_PRIdBIGTIME "] %p (%12s) " format "\n", find_thread(NULL), \
68 system_time(), this, name_for_state(fState) , ##args)
70 # define TRACE(args...) do { } while (0)
74 # define PROBE(buffer, window) \
75 dprintf("TCP PROBE %" B_PRIdBIGTIME " %s %s %" B_PRIu32 " snxt %" B_PRIu32 \
76 " suna %" B_PRIu32 " cw %" B_PRIu32 " sst %" B_PRIu32 " win %" \
77 B_PRIu32 " swin %" B_PRIu32 " smax-suna %" B_PRIu32 " savail %" \
78 B_PRIuSIZE " sqused %" B_PRIuSIZE " rto %" B_PRIdBIGTIME "\n", \
79 system_time(), PrintAddress(buffer->source), \
80 PrintAddress(buffer->destination), buffer->size, fSendNext.Number(), \
81 fSendUnacknowledged.Number(), fCongestionWindow, fSlowStartThreshold, \
82 window, fSendWindow, (fSendMax - fSendUnacknowledged).Number(), \
83 fSendQueue.Available(fSendNext), fSendQueue.Used(), fRetransmitTimeout)
85 # define PROBE(buffer, window) do { } while (0)
89 namespace TCPTracing
{
91 class Receive
: public AbstractTraceEntry
{
93 Receive(TCPEndpoint
* endpoint
, tcp_segment_header
& segment
, uint32 window
,
98 fBufferSize(buffer
->size
),
99 fSequence(segment
.sequence
),
100 fAcknowledge(segment
.acknowledge
),
102 fState(endpoint
->State()),
103 fFlags(segment
.flags
)
108 virtual void AddDump(TraceOutput
& out
)
110 out
.Print("tcp:%p (%12s) receive buffer %p (%" B_PRIu32
" bytes), "
111 "flags %#" B_PRIx8
", seq %" B_PRIu32
", ack %" B_PRIu32
112 ", wnd %" B_PRIu32
, fEndpoint
, name_for_state(fState
), fBuffer
,
113 fBufferSize
, fFlags
, fSequence
, fAcknowledge
, fWindow
);
117 TCPEndpoint
* fEndpoint
;
127 class Send
: public AbstractTraceEntry
{
129 Send(TCPEndpoint
* endpoint
, tcp_segment_header
& segment
, net_buffer
* buffer
,
130 tcp_sequence firstSequence
, tcp_sequence lastSequence
)
134 fBufferSize(buffer
->size
),
135 fSequence(segment
.sequence
),
136 fAcknowledge(segment
.acknowledge
),
137 fFirstSequence(firstSequence
.Number()),
138 fLastSequence(lastSequence
.Number()),
139 fState(endpoint
->State()),
140 fFlags(segment
.flags
)
145 virtual void AddDump(TraceOutput
& out
)
147 out
.Print("tcp:%p (%12s) send buffer %p (%" B_PRIu32
" bytes), "
148 "flags %#" B_PRIx8
", seq %" B_PRIu32
", ack %" B_PRIu32
149 ", first %" B_PRIu32
", last %" B_PRIu32
, fEndpoint
,
150 name_for_state(fState
), fBuffer
, fBufferSize
, fFlags
, fSequence
,
151 fAcknowledge
, fFirstSequence
, fLastSequence
);
155 TCPEndpoint
* fEndpoint
;
160 uint32 fFirstSequence
;
161 uint32 fLastSequence
;
166 class State
: public AbstractTraceEntry
{
168 State(TCPEndpoint
* endpoint
)
171 fState(endpoint
->State())
176 virtual void AddDump(TraceOutput
& out
)
178 out
.Print("tcp:%p (%12s) state change", fEndpoint
,
179 name_for_state(fState
));
183 TCPEndpoint
* fEndpoint
;
187 class Spawn
: public AbstractTraceEntry
{
189 Spawn(TCPEndpoint
* listeningEndpoint
, TCPEndpoint
* spawnedEndpoint
)
191 fListeningEndpoint(listeningEndpoint
),
192 fSpawnedEndpoint(spawnedEndpoint
)
197 virtual void AddDump(TraceOutput
& out
)
199 out
.Print("tcp:%p spawns %p", fListeningEndpoint
, fSpawnedEndpoint
);
203 TCPEndpoint
* fListeningEndpoint
;
204 TCPEndpoint
* fSpawnedEndpoint
;
207 class Error
: public AbstractTraceEntry
{
209 Error(TCPEndpoint
* endpoint
, const char* error
, int32 line
)
214 fState(endpoint
->State())
219 virtual void AddDump(TraceOutput
& out
)
221 out
.Print("tcp:%p (%12s) error at line %" B_PRId32
": %s", fEndpoint
,
222 name_for_state(fState
), fLine
, fError
);
226 TCPEndpoint
* fEndpoint
;
232 class TimerSet
: public AbstractTraceEntry
{
234 TimerSet(TCPEndpoint
* endpoint
, const char* which
, bigtime_t timeout
)
239 fState(endpoint
->State())
244 virtual void AddDump(TraceOutput
& out
)
246 out
.Print("tcp:%p (%12s) %s timer set to %" B_PRIdBIGTIME
, fEndpoint
,
247 name_for_state(fState
), fWhich
, fTimeout
);
251 TCPEndpoint
* fEndpoint
;
257 class TimerTriggered
: public AbstractTraceEntry
{
259 TimerTriggered(TCPEndpoint
* endpoint
, const char* which
)
263 fState(endpoint
->State())
268 virtual void AddDump(TraceOutput
& out
)
270 out
.Print("tcp:%p (%12s) %s timer triggered", fEndpoint
,
271 name_for_state(fState
), fWhich
);
275 TCPEndpoint
* fEndpoint
;
280 class APICall
: public AbstractTraceEntry
{
282 APICall(TCPEndpoint
* endpoint
, const char* which
)
286 fState(endpoint
->State())
291 virtual void AddDump(TraceOutput
& out
)
293 out
.Print("tcp:%p (%12s) api call: %s", fEndpoint
,
294 name_for_state(fState
), fWhich
);
298 TCPEndpoint
* fEndpoint
;
303 } // namespace TCPTracing
305 # define T(x) new(std::nothrow) TCPTracing::x
308 #endif // TCP_TRACING
311 // constants for the fFlags field
313 FLAG_OPTION_WINDOW_SCALE
= 0x01,
314 FLAG_OPTION_TIMESTAMP
= 0x02,
315 // TODO: Should FLAG_NO_RECEIVE apply as well to received connections?
316 // That is, what is expected from accept() after a shutdown()
317 // is performed on a listen()ing socket.
318 FLAG_NO_RECEIVE
= 0x04,
320 FLAG_DELETE_ON_CLOSE
= 0x10,
326 static const int kTimestampFactor
= 1000;
327 // conversion factor between usec system time and msec tcp time
330 static inline bigtime_t
331 absolute_timeout(bigtime_t timeout
)
333 if (timeout
== 0 || timeout
== B_INFINITE_TIMEOUT
)
336 return timeout
+ system_time();
340 static inline status_t
341 posix_error(status_t error
)
343 if (error
== B_TIMED_OUT
)
344 return B_WOULD_BLOCK
;
351 in_window(const tcp_sequence
& sequence
, const tcp_sequence
& receiveNext
,
352 uint32 receiveWindow
)
354 return sequence
>= receiveNext
&& sequence
< (receiveNext
+ receiveWindow
);
359 segment_in_sequence(const tcp_segment_header
& segment
, int size
,
360 const tcp_sequence
& receiveNext
, uint32 receiveWindow
)
362 tcp_sequence
sequence(segment
.sequence
);
364 if (receiveWindow
== 0)
365 return sequence
== receiveNext
;
366 return in_window(sequence
, receiveNext
, receiveWindow
);
368 if (receiveWindow
== 0)
370 return in_window(sequence
, receiveNext
, receiveWindow
)
371 || in_window(sequence
+ size
- 1, receiveNext
, receiveWindow
);
377 is_writable(tcp_state state
)
379 return state
== ESTABLISHED
|| state
== FINISH_RECEIVED
;
384 is_establishing(tcp_state state
)
386 return state
== SYNCHRONIZE_SENT
|| state
== SYNCHRONIZE_RECEIVED
;
390 static inline uint32
tcp_now()
392 return system_time() / kTimestampFactor
;
396 static inline uint32
tcp_diff_timestamp(uint32 base
)
398 uint32 now
= tcp_now();
403 return now
+ UINT_MAX
- base
;
408 state_needs_finish(int32 state
)
410 return state
== WAIT_FOR_FINISH_ACKNOWLEDGE
411 || state
== FINISH_SENT
|| state
== CLOSING
;
418 TCPEndpoint::TCPEndpoint(net_socket
* socket
)
420 ProtocolSocket(socket
),
424 fReceiveWindowShift(0),
425 fSendUnacknowledged(0),
428 fSendUrgentOffset(0),
431 fSendMaxSegmentSize(TCP_DEFAULT_MAX_SEGMENT_SIZE
),
433 fSendQueue(socket
->send
.buffer_size
),
434 fInitialSendSequence(0),
435 fPreviousHighestAcknowledge(0),
436 fDuplicateAcknowledgeCount(0),
437 fPreviousFlightSize(0),
441 fReceiveMaxAdvertised(0),
442 fReceiveWindow(socket
->receive
.buffer_size
),
443 fReceiveMaxSegmentSize(TCP_DEFAULT_MAX_SEGMENT_SIZE
),
444 fReceiveQueue(socket
->receive
.buffer_size
),
445 fSmoothedRoundTripTime(0),
446 fRoundTripVariation(0),
448 fRoundTripStartSequence(0),
449 fRetransmitTimeout(TCP_INITIAL_RTT
),
450 fReceivedTimestamp(0),
451 fCongestionWindow(0),
452 fSlowStartThreshold(0),
454 fFlags(FLAG_OPTION_WINDOW_SCALE
| FLAG_OPTION_TIMESTAMP
)
456 // TODO: to be replaced with a real read/write locking strategy!
457 mutex_init(&fLock
, "tcp lock");
459 fReceiveCondition
.Init(this, "tcp receive");
460 fSendCondition
.Init(this, "tcp send");
462 gStackModule
->init_timer(&fPersistTimer
, TCPEndpoint::_PersistTimer
, this);
463 gStackModule
->init_timer(&fRetransmitTimer
, TCPEndpoint::_RetransmitTimer
,
465 gStackModule
->init_timer(&fDelayedAcknowledgeTimer
,
466 TCPEndpoint::_DelayedAcknowledgeTimer
, this);
467 gStackModule
->init_timer(&fTimeWaitTimer
, TCPEndpoint::_TimeWaitTimer
,
470 T(APICall(this, "constructor"));
474 TCPEndpoint::~TCPEndpoint()
478 T(APICall(this, "destructor"));
480 _CancelConnectionTimers();
481 gStackModule
->cancel_timer(&fTimeWaitTimer
);
482 T(TimerSet(this, "time-wait", -1));
484 if (fManager
!= NULL
) {
485 fManager
->Unbind(this);
486 put_endpoint_manager(fManager
);
489 mutex_destroy(&fLock
);
491 // we need to wait for all timers to return
492 gStackModule
->wait_for_timer(&fRetransmitTimer
);
493 gStackModule
->wait_for_timer(&fPersistTimer
);
494 gStackModule
->wait_for_timer(&fDelayedAcknowledgeTimer
);
495 gStackModule
->wait_for_timer(&fTimeWaitTimer
);
497 gDatalinkModule
->put_route(Domain(), fRoute
);
502 TCPEndpoint::InitCheck() const
508 // #pragma mark - protocol API
515 T(APICall(this, "open"));
517 status_t status
= ProtocolSocket::Open();
521 fManager
= get_endpoint_manager(Domain());
522 if (fManager
== NULL
)
532 MutexLocker
locker(fLock
);
535 T(APICall(this, "close"));
537 if (fState
== LISTEN
)
538 delete_sem(fAcceptSemaphore
);
540 if (fState
== SYNCHRONIZE_SENT
|| fState
== LISTEN
) {
541 // TODO: what about linger in case of SYNCHRONIZE_SENT?
547 status_t status
= _Disconnect(true);
551 if (socket
->options
& SO_LINGER
) {
552 TRACE("Close(): Lingering for %i secs", socket
->linger
);
554 bigtime_t maximum
= absolute_timeout(socket
->linger
* 1000000LL);
556 while (fSendQueue
.Used() > 0) {
557 status
= _WaitForCondition(fSendCondition
, locker
, maximum
);
558 if (status
== B_TIMED_OUT
|| status
== B_WOULD_BLOCK
)
560 else if (status
< B_OK
)
564 TRACE("Close(): after waiting, the SendQ was left with %" B_PRIuSIZE
565 " bytes.", fSendQueue
.Used());
574 MutexLocker
_(fLock
);
577 T(APICall(this, "free"));
579 if (fState
<= SYNCHRONIZE_SENT
)
582 // we are only interested in the timer, not in changing state
585 fFlags
|= FLAG_CLOSED
;
586 if ((fFlags
& FLAG_DELETE_ON_CLOSE
) == 0) {
587 // we'll be freed later when the 2MSL timer expires
588 gSocketModule
->acquire_socket(socket
);
593 /*! Creates and sends a synchronize packet to /a address, and then waits
594 until the connection has been established or refused.
597 TCPEndpoint::Connect(const sockaddr
* address
)
599 if (!AddressModule()->is_same_family(address
))
602 MutexLocker
locker(fLock
);
604 TRACE("Connect() on address %s", PrintAddress(address
));
605 T(APICall(this, "connect"));
607 if (gStackModule
->is_restarted_syscall()) {
608 bigtime_t timeout
= gStackModule
->restore_syscall_restart_timeout();
609 status_t status
= _WaitForEstablished(locker
, timeout
);
610 TRACE(" Connect(): Connection complete: %s (timeout was %"
611 B_PRIdBIGTIME
")", strerror(status
), timeout
);
612 return posix_error(status
);
615 // Can only call connect() from CLOSED or LISTEN states
616 // otherwise endpoint is considered already connected
617 if (fState
== LISTEN
) {
618 // this socket is about to connect; remove pending connections in the backlog
619 gSocketModule
->set_max_backlog(socket
, 0);
620 } else if (fState
== ESTABLISHED
) {
622 } else if (fState
!= CLOSED
)
625 // consider destination address INADDR_ANY as INADDR_LOOPBACK
626 sockaddr_storage _address
;
627 if (AddressModule()->is_empty_address(address
, false)) {
628 AddressModule()->get_loopback_address((sockaddr
*)&_address
);
629 // for IPv4 and IPv6 the port is at the same offset
630 ((sockaddr_in
&)_address
).sin_port
= ((sockaddr_in
*)address
)->sin_port
;
631 address
= (sockaddr
*)&_address
;
634 status_t status
= _PrepareSendPath(address
);
638 TRACE(" Connect(): starting 3-way handshake...");
640 fState
= SYNCHRONIZE_SENT
;
644 status
= _SendQueued();
645 if (status
!= B_OK
) {
650 // If we are running over Loopback, after _SendQueued() returns we
651 // may be in ESTABLISHED already.
652 if (fState
== ESTABLISHED
) {
653 TRACE(" Connect() completed after _SendQueued()");
657 // wait until 3-way handshake is complete (if needed)
658 bigtime_t timeout
= min_c(socket
->send
.timeout
, TCP_CONNECTION_TIMEOUT
);
660 // we're a non-blocking socket
661 TRACE(" Connect() delayed, return EINPROGRESS");
665 bigtime_t absoluteTimeout
= absolute_timeout(timeout
);
666 gStackModule
->store_syscall_restart_timeout(absoluteTimeout
);
668 status
= _WaitForEstablished(locker
, absoluteTimeout
);
669 TRACE(" Connect(): Connection complete: %s (timeout was %" B_PRIdBIGTIME
670 ")", strerror(status
), timeout
);
671 return posix_error(status
);
676 TCPEndpoint::Accept(struct net_socket
** _acceptedSocket
)
678 MutexLocker
locker(fLock
);
681 T(APICall(this, "accept"));
684 bigtime_t timeout
= absolute_timeout(socket
->receive
.timeout
);
685 if (gStackModule
->is_restarted_syscall())
686 timeout
= gStackModule
->restore_syscall_restart_timeout();
688 gStackModule
->store_syscall_restart_timeout(timeout
);
693 status
= acquire_sem_etc(fAcceptSemaphore
, 1, B_ABSOLUTE_TIMEOUT
694 | B_CAN_INTERRUPT
, timeout
);
695 if (status
!= B_OK
) {
696 if (status
== B_TIMED_OUT
&& socket
->receive
.timeout
== 0)
697 return B_WOULD_BLOCK
;
703 status
= gSocketModule
->dequeue_connected(socket
, _acceptedSocket
);
706 TRACE(" Accept() returning %p", (*_acceptedSocket
)->first_protocol
);
708 } while (status
!= B_OK
);
715 TCPEndpoint::Bind(const sockaddr
*address
)
720 MutexLocker
lock(fLock
);
722 TRACE("Bind() on address %s", PrintAddress(address
));
723 T(APICall(this, "bind"));
725 if (fState
!= CLOSED
)
728 return fManager
->Bind(this, address
);
733 TCPEndpoint::Unbind(struct sockaddr
*address
)
735 MutexLocker
_(fLock
);
738 T(APICall(this, "unbind"));
740 return fManager
->Unbind(this);
745 TCPEndpoint::Listen(int count
)
747 MutexLocker
_(fLock
);
750 T(APICall(this, "listen"));
752 if (fState
!= CLOSED
&& fState
!= LISTEN
)
755 if (fState
== CLOSED
) {
756 fAcceptSemaphore
= create_sem(0, "tcp accept");
757 if (fAcceptSemaphore
< B_OK
)
760 status_t status
= fManager
->SetPassive(this);
761 if (status
!= B_OK
) {
762 delete_sem(fAcceptSemaphore
);
763 fAcceptSemaphore
= -1;
768 gSocketModule
->set_max_backlog(socket
, count
);
777 TCPEndpoint::Shutdown(int direction
)
779 MutexLocker
lock(fLock
);
781 TRACE("Shutdown(%i)", direction
);
782 T(APICall(this, "shutdown"));
784 if (direction
== SHUT_RD
|| direction
== SHUT_RDWR
)
785 fFlags
|= FLAG_NO_RECEIVE
;
787 if (direction
== SHUT_WR
|| direction
== SHUT_RDWR
) {
788 // TODO: That's not correct. After read/write shutting down the socket
789 // one should still be able to read previously arrived data.
797 /*! Puts data contained in \a buffer into send buffer */
799 TCPEndpoint::SendData(net_buffer
*buffer
)
801 MutexLocker
lock(fLock
);
803 TRACE("SendData(buffer %p, size %" B_PRIu32
", flags %#" B_PRIx32
804 ") [total %" B_PRIuSIZE
" bytes, has %" B_PRIuSIZE
"]", buffer
,
805 buffer
->size
, buffer
->flags
, fSendQueue
.Size(), fSendQueue
.Free());
806 T(APICall(this, "senddata"));
808 uint32 flags
= buffer
->flags
;
810 if (fState
== CLOSED
)
812 if (fState
== LISTEN
)
814 if (!is_writable(fState
) && !is_establishing(fState
)) {
815 // we only send signals when called from userland
816 if (gStackModule
->is_syscall() && (flags
& MSG_NOSIGNAL
) == 0)
817 send_signal(find_thread(NULL
), SIGPIPE
);
821 size_t left
= buffer
->size
;
823 bigtime_t timeout
= absolute_timeout(socket
->send
.timeout
);
824 if (gStackModule
->is_restarted_syscall())
825 timeout
= gStackModule
->restore_syscall_restart_timeout();
827 gStackModule
->store_syscall_restart_timeout(timeout
);
830 while (fSendQueue
.Free() < socket
->send
.low_water_mark
) {
831 // wait until enough space is available
832 status_t status
= _WaitForCondition(fSendCondition
, lock
, timeout
);
834 TRACE(" SendData() returning %s (%d)",
835 strerror(posix_error(status
)), (int)posix_error(status
));
836 return posix_error(status
);
839 if (!is_writable(fState
) && !is_establishing(fState
)) {
840 // we only send signals when called from userland
841 if (gStackModule
->is_syscall())
842 send_signal(find_thread(NULL
), SIGPIPE
);
847 size_t size
= fSendQueue
.Free();
849 // we need to split the original buffer
850 net_buffer
* clone
= gBufferModule
->clone(buffer
, false);
851 // TODO: add offset/size parameter to net_buffer::clone() or
852 // even a move_data() function, as this is a bit inefficient
856 status_t status
= gBufferModule
->trim(clone
, size
);
857 if (status
!= B_OK
) {
858 gBufferModule
->free(clone
);
862 gBufferModule
->remove_header(buffer
, size
);
864 fSendQueue
.Add(clone
);
866 left
-= buffer
->size
;
867 fSendQueue
.Add(buffer
);
871 TRACE(" SendData(): %" B_PRIuSIZE
" bytes used.", fSendQueue
.Used());
874 if ((flags
& MSG_OOB
) != 0) {
875 fSendUrgentOffset
= fSendQueue
.LastSequence();
876 // RFC 961 specifies that the urgent offset points to the last
877 // byte of urgent data. However, this is commonly implemented as
878 // here, ie. it points to the first byte after the urgent data.
881 if ((flags
& MSG_EOF
) != 0)
884 if (fState
== ESTABLISHED
|| fState
== FINISH_RECEIVED
)
892 TCPEndpoint::SendAvailable()
894 MutexLocker
locker(fLock
);
898 if (is_writable(fState
))
899 available
= fSendQueue
.Free();
900 else if (is_establishing(fState
))
905 TRACE("SendAvailable(): %" B_PRIdSSIZE
, available
);
906 T(APICall(this, "sendavailable"));
912 TCPEndpoint::FillStat(net_stat
*stat
)
914 MutexLocker
_(fLock
);
916 strlcpy(stat
->state
, name_for_state(fState
), sizeof(stat
->state
));
917 stat
->receive_queue_size
= fReceiveQueue
.Available();
918 stat
->send_queue_size
= fSendQueue
.Used();
925 TCPEndpoint::ReadData(size_t numBytes
, uint32 flags
, net_buffer
** _buffer
)
927 MutexLocker
locker(fLock
);
929 TRACE("ReadData(%" B_PRIuSIZE
" bytes, flags %#" B_PRIx32
")", numBytes
,
931 T(APICall(this, "readdata"));
935 if (fState
== CLOSED
)
938 bigtime_t timeout
= absolute_timeout(socket
->receive
.timeout
);
939 if (gStackModule
->is_restarted_syscall())
940 timeout
= gStackModule
->restore_syscall_restart_timeout();
942 gStackModule
->store_syscall_restart_timeout(timeout
);
944 if (fState
== SYNCHRONIZE_SENT
|| fState
== SYNCHRONIZE_RECEIVED
) {
945 if (flags
& MSG_DONTWAIT
)
946 return B_WOULD_BLOCK
;
948 status_t status
= _WaitForEstablished(locker
, timeout
);
950 return posix_error(status
);
953 size_t dataNeeded
= socket
->receive
.low_water_mark
;
955 // When MSG_WAITALL is set then the function should block
956 // until the full amount of data can be returned.
957 if (flags
& MSG_WAITALL
)
958 dataNeeded
= numBytes
;
960 // TODO: add support for urgent data (MSG_OOB)
963 if (fState
== CLOSING
|| fState
== WAIT_FOR_FINISH_ACKNOWLEDGE
964 || fState
== TIME_WAIT
) {
965 // ``Connection closing''.
969 if (fReceiveQueue
.Available() > 0) {
970 if (fReceiveQueue
.Available() >= dataNeeded
971 || (fReceiveQueue
.PushedData() > 0
972 && fReceiveQueue
.PushedData() >= fReceiveQueue
.Available()))
974 } else if (fState
== FINISH_RECEIVED
) {
975 // ``If no text is awaiting delivery, the RECEIVE will
976 // get a Connection closing''.
980 if ((flags
& MSG_DONTWAIT
) != 0 || socket
->receive
.timeout
== 0)
981 return B_WOULD_BLOCK
;
983 if ((fFlags
& FLAG_NO_RECEIVE
) != 0)
986 status_t status
= _WaitForCondition(fReceiveCondition
, locker
, timeout
);
988 // The Open Group base specification mentions that EINTR should be
989 // returned if the recv() is interrupted before _any data_ is
990 // available. So we actually check if there is data, and if so,
991 // push it to the user.
992 if ((status
== B_TIMED_OUT
|| status
== B_INTERRUPTED
)
993 && fReceiveQueue
.Available() > 0)
996 return posix_error(status
);
1000 TRACE(" ReadData(): %" B_PRIuSIZE
" are available.",
1001 fReceiveQueue
.Available());
1003 if (numBytes
< fReceiveQueue
.Available())
1004 fReceiveCondition
.NotifyAll();
1006 bool clone
= (flags
& MSG_PEEK
) != 0;
1008 ssize_t receivedBytes
= fReceiveQueue
.Get(numBytes
, !clone
, _buffer
);
1010 TRACE(" ReadData(): %" B_PRIuSIZE
" bytes kept.",
1011 fReceiveQueue
.Available());
1013 // if we are opening the window, check if we should send an ACK
1015 SendAcknowledge(false);
1017 return receivedBytes
;
1022 TCPEndpoint::ReadAvailable()
1024 MutexLocker
locker(fLock
);
1026 TRACE("ReadAvailable(): %" B_PRIdSSIZE
, _AvailableData());
1027 T(APICall(this, "readavailable"));
1029 return _AvailableData();
1034 TCPEndpoint::SetSendBufferSize(size_t length
)
1036 MutexLocker
_(fLock
);
1037 fSendQueue
.SetMaxBytes(length
);
1043 TCPEndpoint::SetReceiveBufferSize(size_t length
)
1045 MutexLocker
_(fLock
);
1046 fReceiveQueue
.SetMaxBytes(length
);
1052 TCPEndpoint::GetOption(int option
, void* _value
, int* _length
)
1054 if (*_length
!= sizeof(int))
1057 int* value
= (int*)_value
;
1061 if ((fOptions
& TCP_NODELAY
) != 0)
1068 *value
= fReceiveMaxSegmentSize
;
1078 TCPEndpoint::SetOption(int option
, const void* _value
, int length
)
1080 if (option
!= TCP_NODELAY
)
1083 if (length
!= sizeof(int))
1086 const int* value
= (const int*)_value
;
1088 MutexLocker
_(fLock
);
1090 fOptions
|= TCP_NODELAY
;
1092 fOptions
&= ~TCP_NODELAY
;
1098 // #pragma mark - misc
1102 TCPEndpoint::IsBound() const
1104 return !LocalAddress().IsEmpty(true);
1109 TCPEndpoint::IsLocal() const
1111 return (fFlags
& FLAG_LOCAL
) != 0;
1116 TCPEndpoint::DelayedAcknowledge()
1118 if (gStackModule
->cancel_timer(&fDelayedAcknowledgeTimer
)) {
1119 // timer was active, send an ACK now (with the exception above,
1120 // we send every other ACK)
1121 T(TimerSet(this, "delayed ack", -1));
1122 return SendAcknowledge(true);
1125 gStackModule
->set_timer(&fDelayedAcknowledgeTimer
,
1126 TCP_DELAYED_ACKNOWLEDGE_TIMEOUT
);
1127 T(TimerSet(this, "delayed ack", TCP_DELAYED_ACKNOWLEDGE_TIMEOUT
));
1133 TCPEndpoint::SendAcknowledge(bool force
)
1135 return _SendQueued(force
, 0);
1140 TCPEndpoint::_StartPersistTimer()
1142 gStackModule
->set_timer(&fPersistTimer
, TCP_PERSIST_TIMEOUT
);
1143 T(TimerSet(this, "persist", TCP_PERSIST_TIMEOUT
));
1148 TCPEndpoint::_EnterTimeWait()
1150 TRACE("_EnterTimeWait()");
1152 if (fState
== TIME_WAIT
) {
1153 _CancelConnectionTimers();
1156 // we do not use TIME_WAIT state for local connections
1157 fFlags
|= FLAG_DELETE_ON_CLOSE
;
1167 TCPEndpoint::_UpdateTimeWait()
1169 gStackModule
->set_timer(&fTimeWaitTimer
, TCP_MAX_SEGMENT_LIFETIME
<< 1);
1170 T(TimerSet(this, "time-wait", TCP_MAX_SEGMENT_LIFETIME
<< 1));
1175 TCPEndpoint::_CancelConnectionTimers()
1177 gStackModule
->cancel_timer(&fRetransmitTimer
);
1178 T(TimerSet(this, "retransmit", -1));
1179 gStackModule
->cancel_timer(&fPersistTimer
);
1180 T(TimerSet(this, "persist", -1));
1181 gStackModule
->cancel_timer(&fDelayedAcknowledgeTimer
);
1182 T(TimerSet(this, "delayed ack", -1));
1186 /*! Sends the FIN flag to the peer when the connection is still open.
1187 Moves the endpoint to the next state depending on where it was.
1190 TCPEndpoint::_Disconnect(bool closing
)
1192 tcp_state previousState
= fState
;
1194 if (fState
== SYNCHRONIZE_RECEIVED
|| fState
== ESTABLISHED
)
1195 fState
= FINISH_SENT
;
1196 else if (fState
== FINISH_RECEIVED
)
1197 fState
= WAIT_FOR_FINISH_ACKNOWLEDGE
;
1203 status_t status
= _SendQueued();
1204 if (status
!= B_OK
) {
1205 fState
= previousState
;
1215 TCPEndpoint::_MarkEstablished()
1217 fState
= ESTABLISHED
;
1220 if (gSocketModule
->has_parent(socket
)) {
1221 gSocketModule
->set_connected(socket
);
1222 release_sem_etc(fAcceptSemaphore
, 1, B_DO_NOT_RESCHEDULE
);
1225 fSendCondition
.NotifyAll();
1226 gSocketModule
->notify(socket
, B_SELECT_WRITE
, fSendQueue
.Free());
1231 TCPEndpoint::_WaitForEstablished(MutexLocker
&locker
, bigtime_t timeout
)
1233 // TODO: Checking for CLOSED seems correct, but breaks several neon tests.
1234 // When investigating this, also have a look at _Close() and _HandleReset().
1235 while (fState
< ESTABLISHED
/* && fState != CLOSED*/) {
1236 if (socket
->error
!= B_OK
)
1237 return socket
->error
;
1239 status_t status
= _WaitForCondition(fSendCondition
, locker
, timeout
);
1248 // #pragma mark - receive
1252 TCPEndpoint::_Close()
1254 _CancelConnectionTimers();
1258 fFlags
|= FLAG_DELETE_ON_CLOSE
;
1260 fSendCondition
.NotifyAll();
1263 if (gSocketModule
->has_parent(socket
)) {
1264 // We still have a parent - obviously, we haven't been accepted yet,
1265 // so no one could ever close us.
1266 _CancelConnectionTimers();
1267 gSocketModule
->set_aborted(socket
);
1273 TCPEndpoint::_HandleReset(status_t error
)
1275 socket
->error
= error
;
1278 gSocketModule
->notify(socket
, B_SELECT_WRITE
, error
);
1279 gSocketModule
->notify(socket
, B_SELECT_ERROR
, error
);
1284 TCPEndpoint::_DuplicateAcknowledge(tcp_segment_header
&segment
)
1286 if (fDuplicateAcknowledgeCount
== 0)
1287 fPreviousFlightSize
= (fSendMax
- fSendUnacknowledged
).Number();
1289 if (++fDuplicateAcknowledgeCount
< 3) {
1290 if (fSendQueue
.Available(fSendMax
) != 0 && fSendWindow
!= 0) {
1291 fSendNext
= fSendMax
;
1292 fCongestionWindow
+= fDuplicateAcknowledgeCount
* fSendMaxSegmentSize
;
1294 TRACE("_DuplicateAcknowledge(): packet sent under limited transmit on receipt of dup ack");
1295 fCongestionWindow
-= fDuplicateAcknowledgeCount
* fSendMaxSegmentSize
;
1299 if (fDuplicateAcknowledgeCount
== 3) {
1300 if ((segment
.acknowledge
- 1) > fRecover
|| (fCongestionWindow
> fSendMaxSegmentSize
&&
1301 (fSendUnacknowledged
- fPreviousHighestAcknowledge
) <= 4 * fSendMaxSegmentSize
)) {
1302 fFlags
|= FLAG_RECOVERY
;
1303 fRecover
= fSendMax
.Number() - 1;
1304 fSlowStartThreshold
= max_c(fPreviousFlightSize
/ 2, 2 * fSendMaxSegmentSize
);
1305 fCongestionWindow
= fSlowStartThreshold
+ 3 * fSendMaxSegmentSize
;
1306 fSendNext
= segment
.acknowledge
;
1308 TRACE("_DuplicateAcknowledge(): packet sent under fast restransmit on the receipt of 3rd dup ack");
1310 } else if (fDuplicateAcknowledgeCount
> 3) {
1311 uint32 flightSize
= (fSendMax
- fSendUnacknowledged
).Number();
1312 if ((fDuplicateAcknowledgeCount
- 3) * fSendMaxSegmentSize
<= flightSize
)
1313 fCongestionWindow
+= fSendMaxSegmentSize
;
1314 if (fSendQueue
.Available(fSendMax
) != 0) {
1315 fSendNext
= fSendMax
;
1323 TCPEndpoint::_UpdateTimestamps(tcp_segment_header
& segment
,
1324 size_t segmentLength
)
1326 if (fFlags
& FLAG_OPTION_TIMESTAMP
) {
1327 tcp_sequence
sequence(segment
.sequence
);
1329 if (fLastAcknowledgeSent
>= sequence
1330 && fLastAcknowledgeSent
< (sequence
+ segmentLength
))
1331 fReceivedTimestamp
= segment
.timestamp_value
;
1337 TCPEndpoint::_AvailableData() const
1339 // TODO: Refer to the FLAG_NO_RECEIVE comment above regarding
1340 // the application of FLAG_NO_RECEIVE in listen()ing
1342 if (fState
== LISTEN
)
1343 return gSocketModule
->count_connected(socket
);
1344 if (fState
== SYNCHRONIZE_SENT
)
1347 ssize_t availableData
= fReceiveQueue
.Available();
1349 if (availableData
== 0 && !_ShouldReceive())
1352 return availableData
;
1357 TCPEndpoint::_NotifyReader()
1359 fReceiveCondition
.NotifyAll();
1360 gSocketModule
->notify(socket
, B_SELECT_READ
, _AvailableData());
1365 TCPEndpoint::_AddData(tcp_segment_header
& segment
, net_buffer
* buffer
)
1367 if ((segment
.flags
& TCP_FLAG_FINISH
) != 0) {
1368 // Remember the position of the finish received flag
1369 fFinishReceived
= true;
1370 fFinishReceivedAt
= segment
.sequence
+ buffer
->size
;
1373 fReceiveQueue
.Add(buffer
, segment
.sequence
);
1374 fReceiveNext
= fReceiveQueue
.NextSequence();
1376 if (fFinishReceived
) {
1377 // Set or reset the finish flag on the current segment
1378 if (fReceiveNext
< fFinishReceivedAt
)
1379 segment
.flags
&= ~TCP_FLAG_FINISH
;
1381 segment
.flags
|= TCP_FLAG_FINISH
;
1384 TRACE(" _AddData(): adding data, receive next = %" B_PRIu32
". Now have %"
1385 B_PRIuSIZE
" bytes.", fReceiveNext
.Number(), fReceiveQueue
.Available());
1387 if ((segment
.flags
& TCP_FLAG_PUSH
) != 0)
1388 fReceiveQueue
.SetPushPointer();
1390 return fReceiveQueue
.Available() > 0;
1395 TCPEndpoint::_PrepareReceivePath(tcp_segment_header
& segment
)
1397 fInitialReceiveSequence
= segment
.sequence
;
1398 fFinishReceived
= false;
1400 // count the received SYN
1403 fReceiveNext
= segment
.sequence
;
1404 fReceiveQueue
.SetInitialSequence(segment
.sequence
);
1406 if ((fOptions
& TCP_NOOPT
) == 0) {
1407 if (segment
.max_segment_size
> 0)
1408 fSendMaxSegmentSize
= segment
.max_segment_size
;
1410 if (segment
.options
& TCP_HAS_WINDOW_SCALE
) {
1411 fFlags
|= FLAG_OPTION_WINDOW_SCALE
;
1412 fSendWindowShift
= segment
.window_shift
;
1414 fFlags
&= ~FLAG_OPTION_WINDOW_SCALE
;
1415 fReceiveWindowShift
= 0;
1418 if (segment
.options
& TCP_HAS_TIMESTAMPS
) {
1419 fFlags
|= FLAG_OPTION_TIMESTAMP
;
1420 fReceivedTimestamp
= segment
.timestamp_value
;
1422 fFlags
&= ~FLAG_OPTION_TIMESTAMP
;
1425 if (fSendMaxSegmentSize
> 2190)
1426 fCongestionWindow
= 2 * fSendMaxSegmentSize
;
1427 else if (fSendMaxSegmentSize
> 1095)
1428 fCongestionWindow
= 3 * fSendMaxSegmentSize
;
1430 fCongestionWindow
= 4 * fSendMaxSegmentSize
;
1432 fSendMaxSegments
= fCongestionWindow
/ fSendMaxSegmentSize
;
1433 fSlowStartThreshold
= (uint32
)segment
.advertised_window
<< fSendWindowShift
;
1438 TCPEndpoint::_ShouldReceive() const
1440 if ((fFlags
& FLAG_NO_RECEIVE
) != 0)
1443 return fState
== ESTABLISHED
|| fState
== FINISH_SENT
1444 || fState
== FINISH_ACKNOWLEDGED
;
1449 TCPEndpoint::_Spawn(TCPEndpoint
* parent
, tcp_segment_header
& segment
,
1452 MutexLocker
_(fLock
);
1454 // TODO error checking
1455 ProtocolSocket::Open();
1457 fState
= SYNCHRONIZE_RECEIVED
;
1458 T(Spawn(parent
, this));
1460 fManager
= parent
->fManager
;
1462 LocalAddress().SetTo(buffer
->destination
);
1463 PeerAddress().SetTo(buffer
->source
);
1467 // TODO: proper error handling!
1468 if (fManager
->BindChild(this) != B_OK
) {
1469 T(Error(this, "binding failed", __LINE__
));
1472 if (_PrepareSendPath(*PeerAddress()) != B_OK
) {
1473 T(Error(this, "prepare send faild", __LINE__
));
1477 fOptions
= parent
->fOptions
;
1478 fAcceptSemaphore
= parent
->fAcceptSemaphore
;
1480 _PrepareReceivePath(segment
);
1483 if (_SendQueued() != B_OK
) {
1484 T(Error(this, "sending failed", __LINE__
));
1488 segment
.flags
&= ~TCP_FLAG_SYNCHRONIZE
;
1489 // we handled this flag now, it must not be set for further processing
1491 return _Receive(segment
, buffer
);
1496 TCPEndpoint::_ListenReceive(tcp_segment_header
& segment
, net_buffer
* buffer
)
1498 TRACE("ListenReceive()");
1500 // Essentially, we accept only TCP_FLAG_SYNCHRONIZE in this state,
1501 // but the error behaviour differs
1502 if (segment
.flags
& TCP_FLAG_RESET
)
1504 if (segment
.flags
& TCP_FLAG_ACKNOWLEDGE
)
1505 return DROP
| RESET
;
1506 if ((segment
.flags
& TCP_FLAG_SYNCHRONIZE
) == 0)
1509 // TODO: drop broadcast/multicast
1511 // spawn new endpoint for accept()
1512 net_socket
* newSocket
;
1513 if (gSocketModule
->spawn_pending_socket(socket
, &newSocket
) < B_OK
) {
1514 T(Error(this, "spawning failed", __LINE__
));
1518 return ((TCPEndpoint
*)newSocket
->first_protocol
)->_Spawn(this,
1524 TCPEndpoint::_SynchronizeSentReceive(tcp_segment_header
&segment
,
1527 TRACE("_SynchronizeSentReceive()");
1529 if ((segment
.flags
& TCP_FLAG_ACKNOWLEDGE
) != 0
1530 && (fInitialSendSequence
>= segment
.acknowledge
1531 || fSendMax
< segment
.acknowledge
))
1532 return DROP
| RESET
;
1534 if (segment
.flags
& TCP_FLAG_RESET
) {
1535 _HandleReset(ECONNREFUSED
);
1539 if ((segment
.flags
& TCP_FLAG_SYNCHRONIZE
) == 0)
1542 fSendUnacknowledged
= segment
.acknowledge
;
1543 _PrepareReceivePath(segment
);
1545 if (segment
.flags
& TCP_FLAG_ACKNOWLEDGE
) {
1548 // simultaneous open
1549 fState
= SYNCHRONIZE_RECEIVED
;
1553 segment
.flags
&= ~TCP_FLAG_SYNCHRONIZE
;
1554 // we handled this flag now, it must not be set for further processing
1556 return _Receive(segment
, buffer
) | IMMEDIATE_ACKNOWLEDGE
;
1561 TCPEndpoint::_Receive(tcp_segment_header
& segment
, net_buffer
* buffer
)
1563 // PAWS processing takes precedence over regular TCP acceptability check
1564 if ((fFlags
& FLAG_OPTION_TIMESTAMP
) != 0 && (segment
.flags
& TCP_FLAG_RESET
) == 0) {
1565 if ((segment
.options
& TCP_HAS_TIMESTAMPS
) == 0)
1567 if ((int32
)(fReceivedTimestamp
- segment
.timestamp_value
) > 0
1568 && (fReceivedTimestamp
- segment
.timestamp_value
) <= INT32_MAX
)
1569 return DROP
| IMMEDIATE_ACKNOWLEDGE
;
1572 uint32 advertisedWindow
= (uint32
)segment
.advertised_window
1573 << fSendWindowShift
;
1574 size_t segmentLength
= buffer
->size
;
1576 // First, handle the most common case for uni-directional data transfer
1577 // (known as header prediction - the segment must not change the window,
1578 // and must be the expected sequence, and contain no control flags)
1580 if (fState
== ESTABLISHED
1581 && segment
.AcknowledgeOnly()
1582 && fReceiveNext
== segment
.sequence
1583 && advertisedWindow
> 0 && advertisedWindow
== fSendWindow
1584 && fSendNext
== fSendMax
) {
1585 _UpdateTimestamps(segment
, segmentLength
);
1587 if (segmentLength
== 0) {
1588 // this is a pure acknowledge segment - we're on the sending end
1589 if (fSendUnacknowledged
< segment
.acknowledge
1590 && fSendMax
>= segment
.acknowledge
) {
1591 _Acknowledged(segment
);
1594 } else if (segment
.acknowledge
== fSendUnacknowledged
1595 && fReceiveQueue
.IsContiguous()
1596 && fReceiveQueue
.Free() >= segmentLength
1597 && (fFlags
& FLAG_NO_RECEIVE
) == 0) {
1598 if (_AddData(segment
, buffer
))
1601 return KEEP
| ((segment
.flags
& TCP_FLAG_PUSH
) != 0
1602 ? IMMEDIATE_ACKNOWLEDGE
: ACKNOWLEDGE
);
1606 // The fast path was not applicable, so we continue with the standard
1607 // processing of the incoming segment
1609 ASSERT(fState
!= SYNCHRONIZE_SENT
&& fState
!= LISTEN
);
1611 if (fState
!= CLOSED
&& fState
!= TIME_WAIT
) {
1612 // Check sequence number
1613 if (!segment_in_sequence(segment
, segmentLength
, fReceiveNext
,
1615 TRACE(" Receive(): segment out of window, next: %" B_PRIu32
1616 " wnd: %" B_PRIu32
, fReceiveNext
.Number(), fReceiveWindow
);
1617 if ((segment
.flags
& TCP_FLAG_RESET
) != 0) {
1618 // TODO: this doesn't look right - review!
1621 return DROP
| IMMEDIATE_ACKNOWLEDGE
;
1625 if ((segment
.flags
& TCP_FLAG_RESET
) != 0) {
1626 // Is this a valid reset?
1627 // We generally ignore resets in time wait state (see RFC 1337)
1628 if (fLastAcknowledgeSent
<= segment
.sequence
1629 && tcp_sequence(segment
.sequence
) < (fLastAcknowledgeSent
1631 && fState
!= TIME_WAIT
) {
1633 if (fState
== SYNCHRONIZE_RECEIVED
)
1634 error
= ECONNREFUSED
;
1635 else if (fState
== CLOSING
|| fState
== WAIT_FOR_FINISH_ACKNOWLEDGE
)
1640 _HandleReset(error
);
1646 if ((segment
.flags
& TCP_FLAG_SYNCHRONIZE
) != 0
1647 || (fState
== SYNCHRONIZE_RECEIVED
1648 && (fInitialReceiveSequence
> segment
.sequence
1649 || ((segment
.flags
& TCP_FLAG_ACKNOWLEDGE
) != 0
1650 && (fSendUnacknowledged
> segment
.acknowledge
1651 || fSendMax
< segment
.acknowledge
))))) {
1652 // reset the connection - either the initial SYN was faulty, or we
1653 // received a SYN within the data stream
1654 return DROP
| RESET
;
1657 // TODO: Check this! Why do we advertize a window outside of what we should
1659 fReceiveWindow
= max_c(fReceiveQueue
.Free(), fReceiveWindow
);
1660 // the window must not shrink
1662 // trim buffer to be within the receive window
1663 int32 drop
= (int32
)(fReceiveNext
- segment
.sequence
).Number();
1665 if ((uint32
)drop
> buffer
->size
1666 || ((uint32
)drop
== buffer
->size
1667 && (segment
.flags
& TCP_FLAG_FINISH
) == 0)) {
1668 // don't accidently remove a FIN we shouldn't remove
1669 segment
.flags
&= ~TCP_FLAG_FINISH
;
1670 drop
= buffer
->size
;
1673 // remove duplicate data at the start
1674 TRACE("* remove %" B_PRId32
" bytes from the start", drop
);
1675 gBufferModule
->remove_header(buffer
, drop
);
1676 segment
.sequence
+= drop
;
1679 int32 action
= KEEP
;
1681 // immediately acknowledge out-of-order segment to trigger fast-retransmit at the sender
1683 action
|= IMMEDIATE_ACKNOWLEDGE
;
1685 drop
= (int32
)(segment
.sequence
+ buffer
->size
1686 - (fReceiveNext
+ fReceiveWindow
)).Number();
1688 // remove data exceeding our window
1689 if ((uint32
)drop
>= buffer
->size
) {
1690 // if we can accept data, or the segment is not what we'd expect,
1691 // drop the segment (an immediate acknowledge is always triggered)
1692 if (fReceiveWindow
!= 0 || segment
.sequence
!= fReceiveNext
)
1693 return DROP
| IMMEDIATE_ACKNOWLEDGE
;
1695 action
|= IMMEDIATE_ACKNOWLEDGE
;
1698 if ((segment
.flags
& TCP_FLAG_FINISH
) != 0) {
1699 // we need to remove the finish, too, as part of the data
1703 segment
.flags
&= ~(TCP_FLAG_FINISH
| TCP_FLAG_PUSH
);
1704 TRACE("* remove %" B_PRId32
" bytes from the end", drop
);
1705 gBufferModule
->remove_trailer(buffer
, drop
);
1709 if (advertisedWindow
> fSendWindow
) {
1710 TRACE(" Receive(): Window update %" B_PRIu32
" -> %" B_PRIu32
,
1711 fSendWindow
, advertisedWindow
);
1715 if (advertisedWindow
> fSendWindow
)
1716 action
|= IMMEDIATE_ACKNOWLEDGE
;
1718 fSendWindow
= advertisedWindow
;
1719 if (advertisedWindow
> fSendMaxWindow
)
1720 fSendMaxWindow
= advertisedWindow
;
1722 // Then look at the acknowledgement for any updates
1724 if ((segment
.flags
& TCP_FLAG_ACKNOWLEDGE
) != 0) {
1725 // process acknowledged data
1726 if (fState
== SYNCHRONIZE_RECEIVED
)
1729 if (fSendMax
< segment
.acknowledge
)
1730 return DROP
| IMMEDIATE_ACKNOWLEDGE
;
1732 if (segment
.acknowledge
== fSendUnacknowledged
) {
1733 if (buffer
->size
== 0 && advertisedWindow
== fSendWindow
1734 && (segment
.flags
& TCP_FLAG_FINISH
) == 0 && fSendUnacknowledged
!= fSendMax
) {
1735 TRACE("Receive(): duplicate ack!");
1736 _DuplicateAcknowledge(segment
);
1738 } else if (segment
.acknowledge
< fSendUnacknowledged
) {
1741 // this segment acknowledges in flight data
1743 if (fDuplicateAcknowledgeCount
>= 3) {
1744 // deflate the window.
1745 if (segment
.acknowledge
> fRecover
) {
1746 uint32 flightSize
= (fSendMax
- fSendUnacknowledged
).Number();
1747 fCongestionWindow
= min_c(fSlowStartThreshold
,
1748 max_c(flightSize
, fSendMaxSegmentSize
) + fSendMaxSegmentSize
);
1749 fFlags
&= ~FLAG_RECOVERY
;
1753 if (fSendMax
== segment
.acknowledge
)
1754 TRACE("Receive(): all inflight data ack'd!");
1756 if (segment
.acknowledge
> fSendQueue
.LastSequence()
1757 && fState
> ESTABLISHED
) {
1758 TRACE("Receive(): FIN has been acknowledged!");
1762 fState
= FINISH_ACKNOWLEDGED
;
1770 case WAIT_FOR_FINISH_ACKNOWLEDGE
:
1779 if (fState
!= CLOSED
)
1780 _Acknowledged(segment
);
1784 if (segment
.flags
& TCP_FLAG_URGENT
) {
1785 if (fState
== ESTABLISHED
|| fState
== FINISH_SENT
1786 || fState
== FINISH_ACKNOWLEDGED
) {
1787 // TODO: Handle urgent data:
1788 // - RCV.UP <- max(RCV.UP, SEG.UP)
1789 // - signal the user that urgent data is available (SIGURG)
1793 bool notify
= false;
1795 // The buffer may be freed if its data is added to the queue, so cache
1796 // the size as we still need it later.
1797 uint32 bufferSize
= buffer
->size
;
1799 if ((bufferSize
> 0 || (segment
.flags
& TCP_FLAG_FINISH
) != 0)
1800 && _ShouldReceive())
1801 notify
= _AddData(segment
, buffer
);
1803 if ((fFlags
& FLAG_NO_RECEIVE
) != 0)
1804 fReceiveNext
+= buffer
->size
;
1806 action
= (action
& ~KEEP
) | DROP
;
1809 if ((segment
.flags
& TCP_FLAG_FINISH
) != 0) {
1811 if (fState
!= CLOSED
&& fState
!= LISTEN
&& fState
!= SYNCHRONIZE_SENT
) {
1812 TRACE("Receive(): peer is finishing connection!");
1817 fReceiveQueue
.SetPushPointer();
1819 // we'll reply immediately to the FIN if we are not
1820 // transitioning to TIME WAIT so we immediatly ACK it.
1821 action
|= IMMEDIATE_ACKNOWLEDGE
;
1823 // other side is closing connection; change states
1826 case SYNCHRONIZE_RECEIVED
:
1827 fState
= FINISH_RECEIVED
;
1831 // simultaneous close
1835 case FINISH_ACKNOWLEDGED
:
1853 if (bufferSize
> 0 || (segment
.flags
& TCP_FLAG_SYNCHRONIZE
) != 0)
1854 action
|= ACKNOWLEDGE
;
1856 _UpdateTimestamps(segment
, segmentLength
);
1858 TRACE("Receive() Action %" B_PRId32
, action
);
1865 TCPEndpoint::SegmentReceived(tcp_segment_header
& segment
, net_buffer
* buffer
)
1867 MutexLocker
locker(fLock
);
1869 TRACE("SegmentReceived(): buffer %p (%" B_PRIu32
" bytes) address %s "
1870 "to %s flags %#" B_PRIx8
", seq %" B_PRIu32
", ack %" B_PRIu32
1871 ", wnd %" B_PRIu32
, buffer
, buffer
->size
, PrintAddress(buffer
->source
),
1872 PrintAddress(buffer
->destination
), segment
.flags
, segment
.sequence
,
1873 segment
.acknowledge
,
1874 (uint32
)segment
.advertised_window
<< fSendWindowShift
);
1875 T(Receive(this, segment
,
1876 (uint32
)segment
.advertised_window
<< fSendWindowShift
, buffer
));
1877 int32 segmentAction
= DROP
;
1881 segmentAction
= _ListenReceive(segment
, buffer
);
1884 case SYNCHRONIZE_SENT
:
1885 segmentAction
= _SynchronizeSentReceive(segment
, buffer
);
1888 case SYNCHRONIZE_RECEIVED
:
1890 case FINISH_RECEIVED
:
1891 case WAIT_FOR_FINISH_ACKNOWLEDGE
:
1893 case FINISH_ACKNOWLEDGED
:
1897 segmentAction
= _Receive(segment
, buffer
);
1901 // process acknowledge action as asked for by the *Receive() method
1902 if (segmentAction
& IMMEDIATE_ACKNOWLEDGE
)
1903 SendAcknowledge(true);
1904 else if (segmentAction
& ACKNOWLEDGE
)
1905 DelayedAcknowledge();
1907 if ((fFlags
& (FLAG_CLOSED
| FLAG_DELETE_ON_CLOSE
))
1908 == (FLAG_CLOSED
| FLAG_DELETE_ON_CLOSE
)) {
1910 gSocketModule
->release_socket(socket
);
1913 return segmentAction
;
1917 // #pragma mark - send
1921 TCPEndpoint::_CurrentFlags()
1923 // we don't set FLAG_FINISH here, instead we do it
1924 // conditionally below depending if we are sending
1925 // the last bytes of the send queue.
1929 return TCP_FLAG_RESET
| TCP_FLAG_ACKNOWLEDGE
;
1931 case SYNCHRONIZE_SENT
:
1932 return TCP_FLAG_SYNCHRONIZE
;
1933 case SYNCHRONIZE_RECEIVED
:
1934 return TCP_FLAG_SYNCHRONIZE
| TCP_FLAG_ACKNOWLEDGE
;
1937 case FINISH_RECEIVED
:
1938 case FINISH_ACKNOWLEDGED
:
1940 case WAIT_FOR_FINISH_ACKNOWLEDGE
:
1943 return TCP_FLAG_ACKNOWLEDGE
;
1952 TCPEndpoint::_ShouldSendSegment(tcp_segment_header
& segment
, uint32 length
,
1953 uint32 segmentMaxSize
, uint32 flightSize
)
1955 if (fState
== ESTABLISHED
&& fSendMaxSegments
== 0)
1959 // Avoid the silly window syndrome - we only send a segment in case:
1960 // - we have a full segment to send, or
1961 // - we're at the end of our buffer queue, or
1962 // - the buffer is at least larger than half of the maximum send window,
1964 // - we're retransmitting data
1965 if (length
== segmentMaxSize
1966 || (fOptions
& TCP_NODELAY
) != 0
1967 || tcp_sequence(fSendNext
+ length
) == fSendQueue
.LastSequence()
1968 || (fSendMaxWindow
> 0 && length
>= fSendMaxWindow
/ 2))
1972 // check if we need to send a window update to the peer
1973 if (segment
.advertised_window
> 0) {
1974 // correct the window to take into account what already has been advertised
1975 uint32 window
= (segment
.advertised_window
<< fReceiveWindowShift
)
1976 - (fReceiveMaxAdvertised
- fReceiveNext
).Number();
1978 // if we can advertise a window larger than twice the maximum segment
1979 // size, or half the maximum buffer size we send a window update
1980 if (window
>= (fReceiveMaxSegmentSize
<< 1)
1981 || window
>= (socket
->receive
.buffer_size
>> 1))
1985 if ((segment
.flags
& (TCP_FLAG_SYNCHRONIZE
| TCP_FLAG_FINISH
1986 | TCP_FLAG_RESET
)) != 0)
1989 // We do have urgent data pending
1990 if (fSendUrgentOffset
> fSendNext
)
1993 // there is no reason to send a segment just now
1999 TCPEndpoint::_SendQueued(bool force
)
2001 return _SendQueued(force
, fSendWindow
);
2005 /*! Sends one or more TCP segments with the data waiting in the queue, or some
2006 specific flags that need to be sent.
2009 TCPEndpoint::_SendQueued(bool force
, uint32 sendWindow
)
2014 // in passive state?
2015 if (fState
== LISTEN
)
2018 tcp_segment_header
segment(_CurrentFlags());
2020 if ((fOptions
& TCP_NOOPT
) == 0) {
2021 if ((fFlags
& FLAG_OPTION_TIMESTAMP
) != 0) {
2022 segment
.options
|= TCP_HAS_TIMESTAMPS
;
2023 segment
.timestamp_reply
= fReceivedTimestamp
;
2024 segment
.timestamp_value
= tcp_now();
2027 if ((segment
.flags
& TCP_FLAG_SYNCHRONIZE
) != 0
2028 && fSendNext
== fInitialSendSequence
) {
2029 // add connection establishment options
2030 segment
.max_segment_size
= fReceiveMaxSegmentSize
;
2031 if (fFlags
& FLAG_OPTION_WINDOW_SCALE
) {
2032 segment
.options
|= TCP_HAS_WINDOW_SCALE
;
2033 segment
.window_shift
= fReceiveWindowShift
;
2038 size_t availableBytes
= fReceiveQueue
.Free();
2039 // window size must remain same for duplicate acknowledgements
2040 if (!fReceiveQueue
.IsContiguous())
2041 availableBytes
= (fReceiveMaxAdvertised
- fReceiveNext
).Number();
2043 if (fFlags
& FLAG_OPTION_WINDOW_SCALE
)
2044 segment
.advertised_window
= availableBytes
>> fReceiveWindowShift
;
2046 segment
.advertised_window
= min_c(TCP_MAX_WINDOW
, availableBytes
);
2048 segment
.acknowledge
= fReceiveNext
.Number();
2050 // Process urgent data
2051 if (fSendUrgentOffset
> fSendNext
) {
2052 segment
.flags
|= TCP_FLAG_URGENT
;
2053 segment
.urgent_offset
= (fSendUrgentOffset
- fSendNext
).Number();
2055 fSendUrgentOffset
= fSendUnacknowledged
.Number();
2056 // Keep urgent offset updated, so that it doesn't reach into our
2057 // send window on overlap
2058 segment
.urgent_offset
= 0;
2061 if (fCongestionWindow
> 0 && fCongestionWindow
< sendWindow
)
2062 sendWindow
= fCongestionWindow
;
2064 // fSendUnacknowledged
2065 // | fSendNext fSendMax
2068 // -----------------------------------
2069 // | effective window |
2070 // -----------------------------------
2072 // Flight size represents the window of data which is currently in the
2073 // ether. We should never send data such as the flight size becomes larger
2074 // than the effective window. Note however that the effective window may be
2075 // reduced (by congestion for instance), so at some point in time flight
2076 // size may be larger than the currently calculated window.
2078 uint32 flightSize
= (fSendMax
- fSendUnacknowledged
).Number();
2079 uint32 consumedWindow
= (fSendNext
- fSendUnacknowledged
).Number();
2081 if (consumedWindow
> sendWindow
) {
2083 // TODO: enter persist state? try to get a window update.
2085 sendWindow
-= consumedWindow
;
2087 if (force
&& sendWindow
== 0 && fSendNext
<= fSendQueue
.LastSequence()) {
2088 // send one byte of data to ask for a window update
2089 // (triggered by the persist timer)
2093 uint32 length
= min_c(fSendQueue
.Available(fSendNext
), sendWindow
);
2094 bool shouldStartRetransmitTimer
= fSendNext
== fSendUnacknowledged
;
2095 bool retransmit
= fSendNext
< fSendMax
;
2097 if (fDuplicateAcknowledgeCount
!= 0) {
2098 // send at most 1 SMSS of data when under limited transmit, fast transmit/recovery
2099 length
= min_c(length
, fSendMaxSegmentSize
);
2103 uint32 segmentMaxSize
= fSendMaxSegmentSize
2104 - tcp_options_length(segment
);
2105 uint32 segmentLength
= min_c(length
, segmentMaxSize
);
2107 if (fSendNext
+ segmentLength
== fSendQueue
.LastSequence()) {
2108 if (state_needs_finish(fState
))
2109 segment
.flags
|= TCP_FLAG_FINISH
;
2111 segment
.flags
|= TCP_FLAG_PUSH
;
2114 // Determine if we should really send this segment
2115 if (!force
&& !retransmit
&& !_ShouldSendSegment(segment
, segmentLength
,
2116 segmentMaxSize
, flightSize
)) {
2117 if (fSendQueue
.Available()
2118 && !gStackModule
->is_timer_active(&fPersistTimer
)
2119 && !gStackModule
->is_timer_active(&fRetransmitTimer
))
2120 _StartPersistTimer();
2124 net_buffer
*buffer
= gBufferModule
->create(256);
2128 status_t status
= B_OK
;
2129 if (segmentLength
> 0)
2130 status
= fSendQueue
.Get(buffer
, fSendNext
, segmentLength
);
2131 if (status
< B_OK
) {
2132 gBufferModule
->free(buffer
);
2136 LocalAddress().CopyTo(buffer
->source
);
2137 PeerAddress().CopyTo(buffer
->destination
);
2139 uint32 size
= buffer
->size
;
2140 segment
.sequence
= fSendNext
.Number();
2142 TRACE("SendQueued(): buffer %p (%" B_PRIu32
" bytes) address %s to "
2143 "%s flags %#" B_PRIx8
", seq %" B_PRIu32
", ack %" B_PRIu32
2144 ", rwnd %" B_PRIu16
", cwnd %" B_PRIu32
", ssthresh %" B_PRIu32
2145 ", len %" B_PRIu32
", first %" B_PRIu32
", last %" B_PRIu32
,
2146 buffer
, buffer
->size
, PrintAddress(buffer
->source
),
2147 PrintAddress(buffer
->destination
), segment
.flags
, segment
.sequence
,
2148 segment
.acknowledge
, segment
.advertised_window
,
2149 fCongestionWindow
, fSlowStartThreshold
, segmentLength
,
2150 fSendQueue
.FirstSequence().Number(),
2151 fSendQueue
.LastSequence().Number());
2152 T(Send(this, segment
, buffer
, fSendQueue
.FirstSequence(),
2153 fSendQueue
.LastSequence()));
2155 PROBE(buffer
, sendWindow
);
2156 sendWindow
-= buffer
->size
;
2158 status
= add_tcp_header(AddressModule(), segment
, buffer
);
2159 if (status
!= B_OK
) {
2160 gBufferModule
->free(buffer
);
2164 // Update send status - we need to do this before we send the data
2165 // for local connections as the answer is directly handled
2167 if (segment
.flags
& TCP_FLAG_SYNCHRONIZE
) {
2168 segment
.options
&= ~TCP_HAS_WINDOW_SCALE
;
2169 segment
.max_segment_size
= 0;
2173 if (segment
.flags
& TCP_FLAG_FINISH
)
2176 uint32 sendMax
= fSendMax
.Number();
2178 if (fSendMax
< fSendNext
)
2179 fSendMax
= fSendNext
;
2181 fReceiveMaxAdvertised
= fReceiveNext
2182 + ((uint32
)segment
.advertised_window
<< fReceiveWindowShift
);
2184 if (segmentLength
!= 0 && fState
== ESTABLISHED
)
2187 status
= next
->module
->send_routed_data(next
, fRoute
, buffer
);
2188 if (status
< B_OK
) {
2189 gBufferModule
->free(buffer
);
2191 fSendNext
= segment
.sequence
;
2193 // restore send status
2197 if (fSendTime
== 0 && !retransmit
2198 && (segmentLength
!= 0 || (segment
.flags
& TCP_FLAG_SYNCHRONIZE
) !=0)) {
2199 fSendTime
= tcp_now();
2200 fRoundTripStartSequence
= segment
.sequence
;
2203 if (shouldStartRetransmitTimer
&& size
> 0) {
2204 TRACE("starting initial retransmit timer of: %" B_PRIdBIGTIME
,
2205 fRetransmitTimeout
);
2206 gStackModule
->set_timer(&fRetransmitTimer
, fRetransmitTimeout
);
2207 T(TimerSet(this, "retransmit", fRetransmitTimeout
));
2208 shouldStartRetransmitTimer
= false;
2211 if (segment
.flags
& TCP_FLAG_ACKNOWLEDGE
)
2212 fLastAcknowledgeSent
= segment
.acknowledge
;
2214 length
-= segmentLength
;
2215 segment
.flags
&= ~(TCP_FLAG_SYNCHRONIZE
| TCP_FLAG_RESET
2221 } while (length
> 0);
2228 TCPEndpoint::_MaxSegmentSize(const sockaddr
* address
) const
2230 return next
->module
->get_mtu(next
, address
) - sizeof(tcp_header
);
2235 TCPEndpoint::_PrepareSendPath(const sockaddr
* peer
)
2237 if (fRoute
== NULL
) {
2238 fRoute
= gDatalinkModule
->get_route(Domain(), peer
);
2242 if ((fRoute
->flags
& RTF_LOCAL
) != 0)
2243 fFlags
|= FLAG_LOCAL
;
2246 // make sure connection does not already exist
2247 status_t status
= fManager
->SetConnection(this, *LocalAddress(), peer
,
2248 fRoute
->interface_address
->local
);
2252 fInitialSendSequence
= system_time() >> 4;
2253 fSendNext
= fInitialSendSequence
;
2254 fSendUnacknowledged
= fInitialSendSequence
;
2255 fSendMax
= fInitialSendSequence
;
2256 fSendUrgentOffset
= fInitialSendSequence
;
2257 fRecover
= fInitialSendSequence
.Number();
2259 // we are counting the SYN here
2260 fSendQueue
.SetInitialSequence(fSendNext
+ 1);
2262 fReceiveMaxSegmentSize
= _MaxSegmentSize(peer
);
2264 // Compute the window shift we advertise to our peer - if it doesn't support
2265 // this option, this will be reset to 0 (when its SYN is received)
2266 fReceiveWindowShift
= 0;
2267 while (fReceiveWindowShift
< TCP_MAX_WINDOW_SHIFT
2268 && (0xffffUL
<< fReceiveWindowShift
) < socket
->receive
.buffer_size
) {
2269 fReceiveWindowShift
++;
2277 TCPEndpoint::_Acknowledged(tcp_segment_header
& segment
)
2279 TRACE("_Acknowledged(): ack %" B_PRIu32
"; uack %" B_PRIu32
"; next %"
2280 B_PRIu32
"; max %" B_PRIu32
, segment
.acknowledge
,
2281 fSendUnacknowledged
.Number(), fSendNext
.Number(), fSendMax
.Number());
2283 ASSERT(fSendUnacknowledged
<= segment
.acknowledge
);
2285 if (fSendUnacknowledged
< segment
.acknowledge
) {
2286 fSendQueue
.RemoveUntil(segment
.acknowledge
);
2288 uint32 bytesAcknowledged
= segment
.acknowledge
- fSendUnacknowledged
.Number();
2289 fPreviousHighestAcknowledge
= fSendUnacknowledged
;
2290 fSendUnacknowledged
= segment
.acknowledge
;
2291 uint32 flightSize
= (fSendMax
- fSendUnacknowledged
).Number();
2292 int32 expectedSamples
= flightSize
/ (fSendMaxSegmentSize
<< 1);
2294 if (fPreviousHighestAcknowledge
> fSendUnacknowledged
) {
2295 // need to update the recover variable upon a sequence wraparound
2296 fRecover
= segment
.acknowledge
- 1;
2299 // the acknowledgment of the SYN/ACK MUST NOT increase the size of the congestion window
2300 if (fSendUnacknowledged
!= fInitialSendSequence
) {
2301 if (fCongestionWindow
< fSlowStartThreshold
)
2302 fCongestionWindow
+= min_c(bytesAcknowledged
, fSendMaxSegmentSize
);
2304 uint32 increment
= fSendMaxSegmentSize
* fSendMaxSegmentSize
;
2306 if (increment
< fCongestionWindow
)
2309 increment
/= fCongestionWindow
;
2311 fCongestionWindow
+= increment
;
2314 fSendMaxSegments
= UINT32_MAX
;
2317 if ((fFlags
& FLAG_RECOVERY
) != 0) {
2318 fSendNext
= fSendUnacknowledged
;
2320 fCongestionWindow
-= bytesAcknowledged
;
2322 if (bytesAcknowledged
> fSendMaxSegmentSize
)
2323 fCongestionWindow
+= fSendMaxSegmentSize
;
2325 fSendNext
= fSendMax
;
2327 fDuplicateAcknowledgeCount
= 0;
2329 if (fSendNext
< fSendUnacknowledged
)
2330 fSendNext
= fSendUnacknowledged
;
2332 if (fFlags
& FLAG_OPTION_TIMESTAMP
) {
2333 _UpdateRoundTripTime(tcp_diff_timestamp(segment
.timestamp_reply
),
2334 expectedSamples
> 0 ? expectedSamples
: 1);
2335 } else if (fSendTime
!= 0 && fRoundTripStartSequence
< segment
.acknowledge
) {
2336 _UpdateRoundTripTime(tcp_diff_timestamp(fSendTime
), 1);
2340 if (fSendUnacknowledged
== fSendMax
) {
2341 TRACE("all acknowledged, cancelling retransmission timer.");
2342 gStackModule
->cancel_timer(&fRetransmitTimer
);
2343 T(TimerSet(this, "retransmit", -1));
2345 TRACE("data acknowledged, resetting retransmission timer to: %"
2346 B_PRIdBIGTIME
, fRetransmitTimeout
);
2347 gStackModule
->set_timer(&fRetransmitTimer
, fRetransmitTimeout
);
2348 T(TimerSet(this, "retransmit", fRetransmitTimeout
));
2351 if (is_writable(fState
)) {
2352 // notify threads waiting on the socket to become writable again
2353 fSendCondition
.NotifyAll();
2354 gSocketModule
->notify(socket
, B_SELECT_WRITE
, fSendQueue
.Free());
2358 // if there is data left to be sent, send it now
2359 if (fSendQueue
.Used() > 0)
2365 TCPEndpoint::_Retransmit()
2367 TRACE("Retransmit()");
2369 if (fState
< ESTABLISHED
) {
2370 fRetransmitTimeout
= TCP_SYN_RETRANSMIT_TIMEOUT
;
2371 fCongestionWindow
= fSendMaxSegmentSize
;
2374 fDuplicateAcknowledgeCount
= 0;
2375 // Do exponential back off of the retransmit timeout
2376 fRetransmitTimeout
*= 2;
2377 if (fRetransmitTimeout
> TCP_MAX_RETRANSMIT_TIMEOUT
)
2378 fRetransmitTimeout
= TCP_MAX_RETRANSMIT_TIMEOUT
;
2381 fSendNext
= fSendUnacknowledged
;
2384 fRecover
= fSendNext
.Number() - 1;
2385 if ((fFlags
& FLAG_RECOVERY
) != 0)
2386 fFlags
&= ~FLAG_RECOVERY
;
2391 TCPEndpoint::_UpdateRoundTripTime(int32 roundTripTime
, int32 expectedSamples
)
2393 if(fSmoothedRoundTripTime
== 0) {
2394 fSmoothedRoundTripTime
= roundTripTime
;
2395 fRoundTripVariation
= roundTripTime
/ 2;
2396 fRetransmitTimeout
= (fSmoothedRoundTripTime
+ max_c(100, fRoundTripVariation
* 4))
2399 int32 delta
= fSmoothedRoundTripTime
- roundTripTime
;
2402 fRoundTripVariation
+= (delta
- fRoundTripVariation
) / (expectedSamples
* 4);
2403 fSmoothedRoundTripTime
+= (roundTripTime
- fSmoothedRoundTripTime
) / (expectedSamples
* 8);
2404 fRetransmitTimeout
= (fSmoothedRoundTripTime
+ max_c(100, fRoundTripVariation
* 4))
2408 if (fRetransmitTimeout
> TCP_MAX_RETRANSMIT_TIMEOUT
)
2409 fRetransmitTimeout
= TCP_MAX_RETRANSMIT_TIMEOUT
;
2411 if (fRetransmitTimeout
< TCP_MIN_RETRANSMIT_TIMEOUT
)
2412 fRetransmitTimeout
= TCP_MIN_RETRANSMIT_TIMEOUT
;
2414 TRACE(" RTO is now %" B_PRIdBIGTIME
" (after rtt %" B_PRId32
"ms)",
2415 fRetransmitTimeout
, roundTripTime
);
2420 TCPEndpoint::_ResetSlowStart()
2422 fSlowStartThreshold
= max_c((fSendMax
- fSendUnacknowledged
).Number() / 2,
2423 2 * fSendMaxSegmentSize
);
2424 fCongestionWindow
= fSendMaxSegmentSize
;
2428 // #pragma mark - timer
2432 TCPEndpoint::_RetransmitTimer(net_timer
* timer
, void* _endpoint
)
2434 TCPEndpoint
* endpoint
= (TCPEndpoint
*)_endpoint
;
2435 T(TimerTriggered(endpoint
, "retransmit"));
2437 MutexLocker
locker(endpoint
->fLock
);
2438 if (!locker
.IsLocked() || gStackModule
->is_timer_active(timer
))
2441 endpoint
->_Retransmit();
2446 TCPEndpoint::_PersistTimer(net_timer
* timer
, void* _endpoint
)
2448 TCPEndpoint
* endpoint
= (TCPEndpoint
*)_endpoint
;
2449 T(TimerTriggered(endpoint
, "persist"));
2451 MutexLocker
locker(endpoint
->fLock
);
2452 if (!locker
.IsLocked())
2455 // the timer might not have been canceled early enough
2456 if (endpoint
->State() == CLOSED
)
2459 endpoint
->_SendQueued(true);
2464 TCPEndpoint::_DelayedAcknowledgeTimer(net_timer
* timer
, void* _endpoint
)
2466 TCPEndpoint
* endpoint
= (TCPEndpoint
*)_endpoint
;
2467 T(TimerTriggered(endpoint
, "delayed ack"));
2469 MutexLocker
locker(endpoint
->fLock
);
2470 if (!locker
.IsLocked())
2473 // the timer might not have been canceled early enough
2474 if (endpoint
->State() == CLOSED
)
2477 endpoint
->SendAcknowledge(true);
2482 TCPEndpoint::_TimeWaitTimer(net_timer
* timer
, void* _endpoint
)
2484 TCPEndpoint
* endpoint
= (TCPEndpoint
*)_endpoint
;
2485 T(TimerTriggered(endpoint
, "time-wait"));
2487 MutexLocker
locker(endpoint
->fLock
);
2488 if (!locker
.IsLocked())
2491 if ((endpoint
->fFlags
& FLAG_CLOSED
) == 0) {
2492 endpoint
->fFlags
|= FLAG_DELETE_ON_CLOSE
;
2498 gSocketModule
->release_socket(endpoint
->socket
);
2503 TCPEndpoint::_WaitForCondition(ConditionVariable
& condition
,
2504 MutexLocker
& locker
, bigtime_t timeout
)
2506 ConditionVariableEntry entry
;
2507 condition
.Add(&entry
);
2510 status_t result
= entry
.Wait(B_ABSOLUTE_TIMEOUT
| B_CAN_INTERRUPT
, timeout
);
2521 TCPEndpoint::Dump() const
2523 kprintf("TCP endpoint %p\n", this);
2524 kprintf(" state: %s\n", name_for_state(fState
));
2525 kprintf(" flags: 0x%" B_PRIx32
"\n", fFlags
);
2527 kprintf(" lock: { %p, holder: %" B_PRId32
" }\n", &fLock
, fLock
.holder
);
2529 kprintf(" accept sem: %" B_PRId32
"\n", fAcceptSemaphore
);
2530 kprintf(" options: 0x%" B_PRIx32
"\n", (uint32
)fOptions
);
2532 kprintf(" window shift: %" B_PRIu8
"\n", fSendWindowShift
);
2533 kprintf(" unacknowledged: %" B_PRIu32
"\n",
2534 fSendUnacknowledged
.Number());
2535 kprintf(" next: %" B_PRIu32
"\n", fSendNext
.Number());
2536 kprintf(" max: %" B_PRIu32
"\n", fSendMax
.Number());
2537 kprintf(" urgent offset: %" B_PRIu32
"\n", fSendUrgentOffset
.Number());
2538 kprintf(" window: %" B_PRIu32
"\n", fSendWindow
);
2539 kprintf(" max window: %" B_PRIu32
"\n", fSendMaxWindow
);
2540 kprintf(" max segment size: %" B_PRIu32
"\n", fSendMaxSegmentSize
);
2541 kprintf(" queue: %" B_PRIuSIZE
" / %" B_PRIuSIZE
"\n", fSendQueue
.Used(),
2543 #if DEBUG_BUFFER_QUEUE
2546 kprintf(" last acknowledge sent: %" B_PRIu32
"\n",
2547 fLastAcknowledgeSent
.Number());
2548 kprintf(" initial sequence: %" B_PRIu32
"\n",
2549 fInitialSendSequence
.Number());
2550 kprintf(" receive\n");
2551 kprintf(" window shift: %" B_PRIu8
"\n", fReceiveWindowShift
);
2552 kprintf(" next: %" B_PRIu32
"\n", fReceiveNext
.Number());
2553 kprintf(" max advertised: %" B_PRIu32
"\n",
2554 fReceiveMaxAdvertised
.Number());
2555 kprintf(" window: %" B_PRIu32
"\n", fReceiveWindow
);
2556 kprintf(" max segment size: %" B_PRIu32
"\n", fReceiveMaxSegmentSize
);
2557 kprintf(" queue: %" B_PRIuSIZE
" / %" B_PRIuSIZE
"\n",
2558 fReceiveQueue
.Available(), fReceiveQueue
.Size());
2559 #if DEBUG_BUFFER_QUEUE
2560 fReceiveQueue
.Dump();
2562 kprintf(" initial sequence: %" B_PRIu32
"\n",
2563 fInitialReceiveSequence
.Number());
2564 kprintf(" duplicate acknowledge count: %" B_PRIu32
"\n",
2565 fDuplicateAcknowledgeCount
);
2566 kprintf(" smoothed round trip time: %" B_PRId32
" (deviation %" B_PRId32
")\n",
2567 fSmoothedRoundTripTime
, fRoundTripVariation
);
2568 kprintf(" retransmit timeout: %" B_PRId64
"\n", fRetransmitTimeout
);
2569 kprintf(" congestion window: %" B_PRIu32
"\n", fCongestionWindow
);
2570 kprintf(" slow start threshold: %" B_PRIu32
"\n", fSlowStartThreshold
);