2 * Copyright 2010 Andreas Färber <andreas.faerber@web.de>
3 * All rights reserved. Distributed under the terms of the MIT License.
8 * NOTE This is a cleanroom TCP implementation with some known issues.
9 * Protection Against Wrapping Sequence (PAWS) needs to be added.
10 * Congestion control needs to be implemented (slow start, recv. window size).
11 * The use of *Packets needs to be re-evaluated in the context of TCP;
12 * probably a singly-linked list of received data chunks is more efficient.
13 * Debug output should be tuned for better aspect oriented tracing.
14 * While Little Endian systems have been considered, this still needs testing.
18 #include <boot/net/TCP.h>
22 #include <KernelExport.h>
24 #include <boot/net/ChainBuffer.h>
25 #include <boot/net/NetStack.h>
27 #include "real_time_clock.h"
31 //#define TRACE_TCP_RANDOMNESS
32 //#define TRACE_TCP_CHECKSUM
33 //#define TRACE_TCP_QUEUE
37 # define TRACE(x, ...) dprintf(x, ## __VA_ARGS__)
39 # define TRACE(x, ...) ;
41 #ifdef TRACE_TCP_RANDOMNESS
42 # define TRACE_PORT(x, ...) dprintf(x, ## __VA_ARGS__)
44 # define TRACE_PORT(x, ...) ;
46 #if defined(TRACE_TCP_CHECKSUM)
47 # define TRACE_CHECKSUM(x, ...) dprintf(x, ## __VA_ARGS__)
49 # define TRACE_CHECKSUM(x, ...) ;
51 #if defined(TRACE_TCP_QUEUE)
52 # define TRACE_QUEUE(x, ...) dprintf(x, ## __VA_ARGS__)
54 # define TRACE_QUEUE(x, ...) ;
61 static unsigned int next
= 0;
63 next
= real_time_clock_usecs() / 1000000;
65 next
= (next
>> 1) ^ (unsigned int)((0 - (next
& 1U)) & 0xd0000001U
);
66 // characteristic polynomial: x^32 + x^31 + x^29 + x + 1
74 // TODO: Find suitable generator polynomial.
75 return _rand32() & 0x3fff;
79 TCPPacket::TCPPacket()
87 TCPPacket::~TCPPacket()
94 TCPPacket::SetTo(const void* data
, size_t size
, ip_addr_t sourceAddress
,
95 uint16 sourcePort
, ip_addr_t destinationAddress
, uint16 destinationPort
,
96 uint32 sequenceNumber
, uint32 acknowledgmentNumber
, uint8 flags
)
98 if (data
== NULL
&& size
> 0)
102 fData
= malloc(size
);
105 memcpy(fData
, data
, size
);
110 fSourceAddress
= sourceAddress
;
111 fSourcePort
= sourcePort
;
112 fDestinationAddress
= destinationAddress
;
113 fDestinationPort
= destinationPort
;
114 fSequenceNumber
= sequenceNumber
;
115 fAcknowledgmentNumber
= acknowledgmentNumber
;
123 TCPPacket::SourceAddress() const
125 return fSourceAddress
;
130 TCPPacket::DestinationAddress() const
132 return fDestinationAddress
;
137 TCPPacket::SourcePort() const
144 TCPPacket::DestinationPort() const
146 return fDestinationPort
;
151 TCPPacket::SequenceNumber() const
153 return fSequenceNumber
;
158 TCPPacket::AcknowledgmentNumber() const
160 return fAcknowledgmentNumber
;
165 TCPPacket::ProvidesSequenceNumber(uint32 sequenceNumber
) const
168 return fSequenceNumber
<= sequenceNumber
169 && fSequenceNumber
+ fSize
> sequenceNumber
;
174 TCPPacket::Next() const
181 TCPPacket::SetNext(TCPPacket
* packet
)
189 TCPSocket::TCPSocket()
191 fTCPService(NetStack::Default()->GetTCPService()),
192 fAddress(INADDR_ANY
),
197 fFirstSentPacket(NULL
),
198 fLastSentPacket(NULL
),
199 fState(TCP_SOCKET_STATE_INITIAL
),
200 fRemoteState(TCP_SOCKET_STATE_INITIAL
)
205 TCPSocket::~TCPSocket()
207 if (fTCPService
!= NULL
&& fPort
!= 0)
208 fTCPService
->UnbindSocket(this);
213 TCPSocket::WindowSize() const
215 // TODO A large window size leads to read timeouts
216 // due to resends occuring too late.
218 size_t windowSize
= 0xffff;
219 for (TCPPacket
* packet
= fFirstPacket
;
220 packet
!= NULL
&& windowSize
> packet
->DataSize();
221 packet
= packet
->Next())
222 windowSize
-= packet
->DataSize();
231 TCPSocket::Connect(ip_addr_t address
, uint16 port
)
233 fRemoteAddress
= address
;
235 fSequenceNumber
= _rand32();
236 fPort
= 0xC000 + (_rand14() & ~0xc000);
237 TRACE_PORT("TCPSocket::Connect(): connecting from port %u\n", fPort
);
238 fAcknowledgeNumber
= 0;
241 status_t error
= fTCPService
->BindSocket(this);
246 TCPPacket
* packet
= new(nothrow
) TCPPacket();
249 error
= packet
->SetTo(NULL
, 0, fAddress
, fPort
, address
, port
,
250 fSequenceNumber
, fAcknowledgeNumber
, TCP_SYN
);
255 error
= _Send(packet
);
258 fState
= TCP_SOCKET_STATE_SYN_SENT
;
263 error
= _WaitForState(TCP_SOCKET_STATE_OPEN
, 1000000LL);
265 TRACE("no SYN-ACK received\n");
268 TRACE("SYN-ACK received\n");
278 TCPPacket
* packet
= new(nothrow
) TCPPacket();
281 status_t error
= packet
->SetTo(NULL
, 0, fAddress
, fPort
, fRemoteAddress
,
282 fRemotePort
, fSequenceNumber
, fAcknowledgeNumber
, TCP_FIN
| TCP_ACK
);
287 error
= _Send(packet
);
290 fState
= TCP_SOCKET_STATE_FIN_SENT
;
293 error
= _WaitForState(TCP_SOCKET_STATE_CLOSED
, 1000000LL);
302 TCPSocket::Read(void* buffer
, size_t bufferSize
, size_t* bytesRead
,
305 TRACE("TCPSocket::Read(): size = %lu\n", bufferSize
);
306 if (bytesRead
== NULL
)
310 TCPPacket
* packet
= NULL
;
312 bigtime_t startTime
= system_time();
314 fTCPService
->ProcessIncomingPackets();
316 packet
= _PeekPacket();
317 if (packet
== NULL
&& fRemoteState
!= TCP_SOCKET_STATE_OPEN
)
319 if (packet
== NULL
&& timeout
> 0LL)
321 } while (packet
== NULL
&& system_time() - startTime
< timeout
);
322 if (packet
== NULL
) {
323 #ifdef TRACE_TCP_QUEUE
326 return (timeout
== 0) ? B_WOULD_BLOCK
: B_TIMED_OUT
;
328 uint32 packetOffset
= fNextSequence
- packet
->SequenceNumber();
329 size_t readBytes
= packet
->DataSize() - packetOffset
;
330 if (readBytes
> bufferSize
)
331 readBytes
= bufferSize
;
333 memcpy(buffer
, (uint8
*)packet
->Data() + packetOffset
, readBytes
);
334 *bytesRead
= readBytes
;
335 if (!packet
->ProvidesSequenceNumber(fNextSequence
+ readBytes
)) {
340 fNextSequence
+= readBytes
;
342 if (packet
== NULL
&& *bytesRead
< bufferSize
) {
345 buffer
= (uint8
*)buffer
+ readBytes
;
346 bufferSize
-= readBytes
;
347 fTCPService
->ProcessIncomingPackets();
348 packet
= _PeekPacket();
349 if (packet
== NULL
&& fRemoteState
!= TCP_SOCKET_STATE_OPEN
)
352 if (packet
== NULL
) {
356 readBytes
= packet
->DataSize();
357 if (readBytes
> bufferSize
)
358 readBytes
= bufferSize
;
360 memcpy(buffer
, packet
->Data(), readBytes
);
361 *bytesRead
+= readBytes
;
362 if (readBytes
== packet
->DataSize()) {
366 fNextSequence
+= readBytes
;
367 } while (readBytes
< bufferSize
&&
368 system_time() - startTime
< timeout
);
369 #ifdef TRACE_TCP_QUEUE
370 if (readBytes
< bufferSize
) {
371 TRACE_QUEUE("TCP: Unable to deliver more data!\n");
382 TCPSocket::Write(const void* buffer
, size_t bufferSize
)
384 if (buffer
== NULL
|| bufferSize
== 0)
387 // TODO: Check for MTU and create multiple packets if necessary.
389 TCPPacket
* packet
= new(nothrow
) TCPPacket();
392 status_t error
= packet
->SetTo(buffer
, bufferSize
, fAddress
, fPort
,
393 fRemoteAddress
, fRemotePort
, fSequenceNumber
, fAcknowledgeNumber
,
399 return _Send(packet
);
404 TCPSocket::Acknowledge(uint32 number
)
406 TRACE("TCPSocket::Acknowledge(): %lu\n", number
);
408 for (TCPPacket
* packet
= fFirstSentPacket
; packet
!= NULL
;
409 packet
= fFirstSentPacket
) {
410 if (packet
->SequenceNumber() >= number
)
412 fFirstSentPacket
= packet
->Next();
415 fLastSentPacket
= NULL
;
420 TCPSocket::ProcessPacket(TCPPacket
* packet
)
422 TRACE("TCPSocket::ProcessPacket()\n");
424 if ((packet
->Flags() & TCP_FIN
) != 0) {
425 fRemoteState
= TCP_SOCKET_STATE_FIN_SENT
;
426 TRACE("FIN received\n");
430 if (fState
== TCP_SOCKET_STATE_SYN_SENT
) {
431 if ((packet
->Flags() & TCP_SYN
) != 0
432 && (packet
->Flags() & TCP_ACK
) != 0) {
433 fNextSequence
= fAcknowledgeNumber
= packet
->SequenceNumber() + 1;
434 fRemoteState
= TCP_SOCKET_STATE_SYN_SENT
;
437 fState
= fRemoteState
= TCP_SOCKET_STATE_OPEN
;
440 } else if (fState
== TCP_SOCKET_STATE_OPEN
) {
441 } else if (fState
== TCP_SOCKET_STATE_FIN_SENT
) {
442 if ((packet
->Flags() & TCP_ACK
) != 0) {
443 TRACE("FIN-ACK received\n");
444 if (fRemoteState
== TCP_SOCKET_STATE_FIN_SENT
)
445 fState
= TCP_SOCKET_STATE_CLOSED
;
449 if (packet
->DataSize() == 0) {
450 TRACE("TCPSocket::ProcessPacket(): not queuing due to lack of data\n");
455 // For now rather protect us against being flooded with packets already
456 // acknowledged. "If it's important, they'll send it again."
458 if (packet
->SequenceNumber() < fAcknowledgeNumber
) {
459 TRACE_QUEUE("TCPSocket::ProcessPacket(): not queuing due to wraparound\n");
464 if (fLastPacket
== NULL
) {
465 // no packets enqueued
466 TRACE("TCPSocket::ProcessPacket(): first in queue\n");
467 packet
->SetNext(NULL
);
468 fFirstPacket
= fLastPacket
= packet
;
469 } else if (fLastPacket
->SequenceNumber() < packet
->SequenceNumber()) {
471 TRACE("TCPSocket::ProcessPacket(): enqueue in back\n");
472 packet
->SetNext(NULL
);
473 fLastPacket
->SetNext(packet
);
474 fLastPacket
= packet
;
475 } else if (fFirstPacket
->SequenceNumber() > packet
->SequenceNumber()) {
477 TRACE("TCPSocket::ProcessPacket(): enqueue in front\n");
478 TRACE_QUEUE("TCP: Enqueuing %lx - %lx in front! (next is %lx)\n",
479 packet
->SequenceNumber(),
480 packet
->SequenceNumber() + packet
->DataSize() - 1,
482 packet
->SetNext(fFirstPacket
);
483 fFirstPacket
= packet
;
484 } else if (fFirstPacket
->SequenceNumber() == packet
->SequenceNumber()) {
485 TRACE_QUEUE("%s(): dropping due to identical first packet\n", __func__
);
490 TRACE("TCPSocket::ProcessPacket(): enqueue in middle\n");
491 for (TCPPacket
* queuedPacket
= fFirstPacket
; queuedPacket
!= NULL
;
492 queuedPacket
= queuedPacket
->Next()) {
493 if (queuedPacket
->SequenceNumber() == packet
->SequenceNumber()) {
494 TRACE_QUEUE("TCPSocket::EnqueuePacket(): TCP packet dropped\n");
495 // we may be waiting for a previous packet
499 if (queuedPacket
->Next()->SequenceNumber()
500 > packet
->SequenceNumber()) {
501 packet
->SetNext(queuedPacket
->Next());
502 queuedPacket
->SetNext(packet
);
507 while (packet
!= NULL
&& packet
->SequenceNumber() == fAcknowledgeNumber
) {
508 fAcknowledgeNumber
= packet
->SequenceNumber() + packet
->DataSize();
509 packet
= packet
->Next();
515 TCPSocket::_PeekPacket()
517 TRACE("TCPSocket::_PeekPacket(): fNextSequence = %lu\n", fNextSequence
);
518 for (TCPPacket
* packet
= fFirstPacket
; packet
!= NULL
;
519 packet
= packet
->Next()) {
520 if (packet
->ProvidesSequenceNumber(fNextSequence
))
528 TCPSocket::_DequeuePacket()
530 //TRACE("TCPSocket::DequeuePacket()\n");
531 if (fFirstPacket
== NULL
)
534 if (fFirstPacket
->ProvidesSequenceNumber(fNextSequence
)) {
535 TCPPacket
* packet
= fFirstPacket
;
536 fFirstPacket
= packet
->Next();
537 if (fFirstPacket
== NULL
)
539 packet
->SetNext(NULL
);
540 TRACE("TCP: Dequeuing %lx - %lx from front.\n",
541 packet
->SequenceNumber(),
542 packet
->SequenceNumber() + packet
->DataSize() - 1);
546 for (TCPPacket
* packet
= fFirstPacket
;
547 packet
!= NULL
&& packet
->Next() != NULL
;
548 packet
= packet
->Next()) {
549 if (packet
->Next()->ProvidesSequenceNumber(fNextSequence
)) {
550 TCPPacket
* nextPacket
= packet
->Next();
551 packet
->SetNext(nextPacket
->Next());
552 if (fLastPacket
== nextPacket
)
553 fLastPacket
= packet
;
554 TRACE("TCP: Dequeuing %lx - %lx.\n",
555 nextPacket
->SequenceNumber(),
556 nextPacket
->SequenceNumber() + nextPacket
->DataSize() - 1);
560 TRACE_QUEUE("dequeue failed!\n");
566 TCPSocket::_Send(TCPPacket
* packet
, bool enqueue
)
568 ChainBuffer
buffer((void*)packet
->Data(), packet
->DataSize());
569 status_t error
= fTCPService
->Send(fPort
, fRemoteAddress
, fRemotePort
,
570 packet
->SequenceNumber(), fAcknowledgeNumber
, packet
->Flags(),
571 WindowSize(), &buffer
);
574 if (packet
->SequenceNumber() == fSequenceNumber
)
575 fSequenceNumber
+= packet
->DataSize();
578 _EnqueueOutgoingPacket(packet
);
585 TCPSocket::_ResendQueue()
587 TRACE("resending queue\n");
588 for (TCPPacket
* packet
= fFirstSentPacket
; packet
!= NULL
;
589 packet
= packet
->Next()) {
590 ChainBuffer
buffer((void*)packet
->Data(), packet
->DataSize());
591 status_t error
= fTCPService
->Send(fPort
, fRemoteAddress
, fRemotePort
,
592 packet
->SequenceNumber(), fAcknowledgeNumber
, packet
->Flags(),
593 WindowSize(), &buffer
);
602 TCPSocket::_EnqueueOutgoingPacket(TCPPacket
* packet
)
604 if (fLastSentPacket
!= NULL
) {
605 fLastSentPacket
->SetNext(packet
);
606 fLastSentPacket
= packet
;
608 fFirstSentPacket
= fLastSentPacket
= packet
;
613 #ifdef TRACE_TCP_QUEUE
616 TCPSocket::_DumpQueue()
618 TRACE_QUEUE("TCP: waiting for %lx (ack'ed %lx)\n", fNextSequence
, fAcknowledgeNumber
);
619 if (fFirstPacket
== NULL
)
620 TRACE_QUEUE("TCP: Queue is empty.\n");
622 for (TCPPacket
* packet
= fFirstPacket
; packet
!= NULL
;
623 packet
= packet
->Next()) {
624 TRACE_QUEUE("TCP: Queue: %lx\n", packet
->SequenceNumber());
627 if (fFirstSentPacket
!= NULL
)
628 TRACE_QUEUE("TCP: Send queue is non-empty.\n");
630 TRACE_QUEUE("TCP: Send queue is empty.\n");
639 TCPPacket
* packet
= new(nothrow
) TCPPacket();
642 status_t error
= packet
->SetTo(NULL
, 0, fAddress
, fPort
, fRemoteAddress
,
643 fRemotePort
, fSequenceNumber
, fAcknowledgeNumber
, TCP_ACK
);
648 error
= _Send(packet
, false);
657 TCPSocket::_WaitForState(TCPSocketState state
, bigtime_t timeout
)
659 if (fTCPService
== NULL
)
662 bigtime_t startTime
= system_time();
664 fTCPService
->ProcessIncomingPackets();
667 } while (system_time() - startTime
< timeout
);
668 return timeout
== 0 ? B_WOULD_BLOCK
: B_TIMED_OUT
;
674 TCPService::TCPService(IPService
* ipService
)
676 IPSubService(kTCPServiceName
),
677 fIPService(ipService
)
682 TCPService::~TCPService()
684 if (fIPService
!= NULL
)
685 fIPService
->UnregisterIPSubService(this);
692 if (fIPService
== NULL
)
695 if (!fIPService
->RegisterIPSubService(this))
703 TCPService::IPProtocol() const
710 TCPService::HandleIPPacket(IPService
* ipService
, ip_addr_t sourceIP
,
711 ip_addr_t destinationIP
, const void* data
, size_t size
)
713 TRACE("TCPService::HandleIPPacket(): source = %08lx, "
714 "destination = %08lx, %lu - %lu bytes\n", sourceIP
, destinationIP
,
715 size
, sizeof(tcp_header
));
717 if (data
== NULL
|| size
< sizeof(tcp_header
))
720 const tcp_header
* header
= (const tcp_header
*)data
;
722 uint16 chksum
= _ChecksumData(data
, size
, sourceIP
, destinationIP
);
724 TRACE_CHECKSUM("TCPService::HandleIPPacket(): invalid checksum "
725 "(%04x vs. %04x), padding %lu\n",
726 header
->checksum
, chksum
, size
% 2);
730 uint16 source
= ntohs(header
->source
);
731 uint16 destination
= ntohs(header
->destination
);
732 uint32 sequenceNumber
= ntohl(header
->seqNumber
);
733 uint32 ackedNumber
= ntohl(header
->ackNumber
);
734 TRACE("\tsource = %u, dest = %u, seq = %lu, ack = %lu, dataOffset = %u, "
735 "flags %s %s %s %s\n", source
, destination
, sequenceNumber
,
736 ackedNumber
, header
->dataOffset
,
737 (header
->flags
& TCP_ACK
) != 0 ? "ACK" : "",
738 (header
->flags
& TCP_SYN
) != 0 ? "SYN" : "",
739 (header
->flags
& TCP_FIN
) != 0 ? "FIN" : "",
740 (header
->flags
& TCP_RST
) != 0 ? "RST" : "");
741 if (header
->dataOffset
> 5) {
742 uint8
* option
= (uint8
*)data
+ sizeof(tcp_header
);
743 while ((uint32
*)option
< (uint32
*)data
+ header
->dataOffset
) {
744 uint8 optionKind
= option
[0];
747 uint8 optionLength
= 1;
748 if (optionKind
> 1) {
749 optionLength
= option
[1];
750 TRACE("\tTCP option kind %u, length %u\n",
751 optionKind
, optionLength
);
753 TRACE("\tTCP MSS = %04hu\n", *(uint16_t*)&option
[2]);
755 option
+= optionLength
;
759 TCPSocket
* socket
= _FindSocket(destinationIP
, destination
);
760 if (socket
== NULL
) {
761 // TODO If SYN, answer with RST?
762 TRACE("TCPService::HandleIPPacket(): no socket\n");
766 if ((header
->flags
& TCP_ACK
) != 0) {
767 socket
->Acknowledge(ackedNumber
);
770 TCPPacket
* packet
= new(nothrow
) TCPPacket();
773 status_t error
= packet
->SetTo((uint32
*)data
+ header
->dataOffset
,
774 size
- header
->dataOffset
* 4, sourceIP
, source
, destinationIP
,
775 destination
, sequenceNumber
, ackedNumber
, header
->flags
);
777 socket
->ProcessPacket(packet
);
784 TCPService::Send(uint16 sourcePort
, ip_addr_t destinationAddress
,
785 uint16 destinationPort
, uint32 sequenceNumber
,
786 uint32 acknowledgmentNumber
, uint8 flags
, uint16 windowSize
,
789 TRACE("TCPService::Send(): seq = %lu, ack = %lu\n",
790 sequenceNumber
, acknowledgmentNumber
);
791 if (fIPService
== NULL
)
797 ChainBuffer
headerBuffer(&header
, sizeof(header
), buffer
);
798 memset(&header
, 0, sizeof(header
));
799 header
.source
= htons(sourcePort
);
800 header
.destination
= htons(destinationPort
);
801 header
.seqNumber
= htonl(sequenceNumber
);
802 header
.ackNumber
= htonl(acknowledgmentNumber
);
803 header
.dataOffset
= 5;
804 header
.flags
= flags
;
805 header
.window
= htons(windowSize
);
808 header
.checksum
= htons(_ChecksumBuffer(&headerBuffer
,
809 fIPService
->IPAddress(), destinationAddress
,
810 headerBuffer
.TotalSize()));
812 return fIPService
->Send(destinationAddress
, IPPROTO_TCP
, &headerBuffer
);
817 TCPService::ProcessIncomingPackets()
819 if (fIPService
!= NULL
)
820 fIPService
->ProcessIncomingPackets();
825 TCPService::BindSocket(TCPSocket
* socket
)
830 if (_FindSocket(socket
->Address(), socket
->Port()) != NULL
)
833 return fSockets
.Add(socket
);
838 TCPService::UnbindSocket(TCPSocket
* socket
)
840 fSockets
.Remove(socket
);
845 TCPService::_ChecksumBuffer(ChainBuffer
* buffer
, ip_addr_t source
,
846 ip_addr_t destination
, uint16 length
)
848 struct pseudo_header
{
850 ip_addr_t destination
;
854 } __attribute__ ((__packed__
));
855 pseudo_header header
= {
863 ChainBuffer
headerBuffer(&header
, sizeof(header
), buffer
);
864 uint16 checksum
= ip_checksum(&headerBuffer
);
865 headerBuffer
.DetachNext();
871 TCPService::_ChecksumData(const void* data
, uint16 length
, ip_addr_t source
,
872 ip_addr_t destination
)
874 ChainBuffer
buffer((void*)data
, length
);
875 return _ChecksumBuffer(&buffer
, source
, destination
, length
);
880 TCPService::_FindSocket(ip_addr_t address
, uint16 port
)
882 for (int i
= 0; i
< fSockets
.Count(); i
++) {
883 TCPSocket
* socket
= fSockets
.ElementAt(i
);
884 // TODO Remove socket->Address() INADDR_ANY check once the socket is
885 // aware of both its IP addresses (local one is INADDR_ANY for now).
886 if ((address
== INADDR_ANY
|| socket
->Address() == INADDR_ANY
887 || socket
->Address() == address
)
888 && socket
->Port() == port
) {