2 * This file is part of the Nice GLib ICE library.
4 * (C) 2010 Collabora Ltd.
5 * Contact: Youness Alaoui
8 * The contents of this file are subject to the Mozilla Public License Version
9 * 1.1 (the "License"); you may not use this file except in compliance with
10 * the License. You may obtain a copy of the License at
11 * http://www.mozilla.org/MPL/
13 * Software distributed under the License is distributed on an "AS IS" basis,
14 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
15 * for the specific language governing rights and limitations under the
18 * The Original Code is the Nice GLib ICE library.
20 * The Initial Developers of the Original Code are Collabora Ltd and Nokia
21 * Corporation. All Rights Reserved.
24 * Youness Alaoui, Collabora Ltd.
26 * Alternatively, the contents of this file may be used under the terms of the
27 * the GNU Lesser General Public License Version 2.1 (the "LGPL"), in which
28 * case the provisions of LGPL are applicable instead of those above. If you
29 * wish to allow use of your version of this file only under the terms of the
30 * LGPL and not to allow others to use your version of this file under the
31 * MPL, indicate your decision by deleting the provisions above and replace
32 * them with the notice and other provisions required by the LGPL. If you do
33 * not delete the provisions above, a recipient may use your version of this
34 * file under either the MPL or the LGPL.
37 /* Reproducing license from libjingle for copied code */
41 * Copyright 2004--2005, Google Inc.
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions are met:
46 * 1. Redistributions of source code must retain the above copyright notice,
47 * this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright notice,
49 * this list of conditions and the following disclaimer in the documentation
50 * and/or other materials provided with the distribution.
51 * 3. The name of the author may not be used to endorse or promote products
52 * derived from this software without specific prior written permission.
54 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
55 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
56 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
57 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
58 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
59 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
60 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
61 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
62 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
63 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
67 #include <arpa/inet.h>
73 #include "pseudotcp.h"
75 G_DEFINE_TYPE (PseudoTcpSocket
, pseudo_tcp_socket
, G_TYPE_OBJECT
);
78 //////////////////////////////////////////////////////////////////////
80 //////////////////////////////////////////////////////////////////////
83 const guint16 PACKET_MAXIMUMS
[] = {
84 65535, // Theoretical maximum, Hyperchannel
86 17914, // 16Mb IBM Token Ring
88 //4464, // IEEE 802.5 (4Mb max)
90 //2048, // Wideband Network
91 2002, // IEEE 802.5 (4Mb recommended)
92 //1536, // Expermental Ethernet Networks
93 //1500, // Ethernet, Point-to-Point (default)
95 1006, // SLIP, ARPANET
96 //576, // X.25 Networks
97 //544, // DEC IP Portal
99 508, // IEEE 802/Source-Rt Bridge, ARCNET
100 296, // Point-to-Point (low delay)
101 //68, // Official minimum
102 0, // End of list marker
105 #define MAX_PACKET 65535
106 // Note: we removed lowest level because packet overhead was larger!
107 #define MIN_PACKET 296
109 // (+ up to 40 bytes of options?)
110 #define IP_HEADER_SIZE 20
111 #define ICMP_HEADER_SIZE 8
112 #define UDP_HEADER_SIZE 8
113 // TODO: Make JINGLE_HEADER_SIZE transparent to this code?
114 // when relay framing is in use
115 #define JINGLE_HEADER_SIZE 64
117 //////////////////////////////////////////////////////////////////////
118 // Global Constants and Functions
119 //////////////////////////////////////////////////////////////////////
122 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
123 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
124 // 0 | Conversation Number |
125 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
126 // 4 | Sequence Number |
127 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
128 // 8 | Acknowledgment Number |
129 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
130 // | | |U|A|P|R|S|F| |
131 // 12 | Control | |R|C|S|S|Y|I| Window |
132 // | | |G|K|H|T|N|N| |
133 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
134 // 16 | Timestamp sending |
135 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
136 // 20 | Timestamp receiving |
137 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
139 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
141 //////////////////////////////////////////////////////////////////////
143 #define MAX_SEQ 0xFFFFFFFF
144 #define HEADER_SIZE 24
146 #define PACKET_OVERHEAD (HEADER_SIZE + UDP_HEADER_SIZE + \
147 IP_HEADER_SIZE + JINGLE_HEADER_SIZE)
149 // MIN_RTO = 250 ms (RFC1122, Sec 4.2.3.1 "fractions of a second")
151 #define DEF_RTO 3000 /* 3 seconds (RFC1122, Sec 4.2.3.1) */
152 #define MAX_RTO 60000 /* 60 seconds */
153 #define ACK_DELAY 100 /* 100 milliseconds */
156 #define FLAG_FIN 0x01
157 #define FLAG_SYN 0x02
158 #define FLAG_ACK 0x10
161 #define FLAG_CTL 0x02
162 #define FLAG_RST 0x04
164 #define CTL_CONNECT 0
165 //#define CTL_REDIRECT 1
166 #define CTL_EXTRA 255
169 #define CTRL_BOUND 0x80000000
171 // If there are no pending clocks, wake up every 4 seconds
172 #define DEFAULT_TIMEOUT 4000
173 // If the connection is closed, once per minute
174 #define CLOSED_TIMEOUT (60 * 1000)
176 //////////////////////////////////////////////////////////////////////
178 //////////////////////////////////////////////////////////////////////
181 min (guint32 first
, guint32 second
)
183 return (first
< second
? first
:second
);
186 max (guint32 first
, guint32 second
)
188 return (first
> second
? first
:second
);
192 bound(guint32 lower
, guint32 middle
, guint32 upper
)
194 return min (max (lower
, middle
), upper
);
198 get_current_time(void)
201 g_get_current_time (&tv
);
202 return tv
.tv_sec
* 1000 + tv
.tv_usec
/ 1000;
206 time_is_between(guint32 later
, guint32 middle
, guint32 earlier
)
208 if (earlier
<= later
) {
209 return ((earlier
<= middle
) && (middle
<= later
));
211 return !((later
< middle
) && (middle
< earlier
));
216 time_diff(guint32 later
, guint32 earlier
)
218 guint32 LAST
= 0xFFFFFFFF;
219 guint32 HALF
= 0x80000000;
220 if (time_is_between(earlier
+ HALF
, later
, earlier
)) {
221 if (earlier
<= later
) {
222 return (long)(later
- earlier
);
224 return (long)(later
+ (LAST
- earlier
) + 1);
227 if (later
<= earlier
) {
228 return -(long) (earlier
- later
);
230 return -(long)(earlier
+ (LAST
- later
) + 1);
235 //////////////////////////////////////////////////////////////////////
237 //////////////////////////////////////////////////////////////////////
252 // Note: can't go as high as 1024 * 64, because of uint16 precision
253 kRcvBufSize
= 1024 * 60,
254 // Note: send buffer should be larger to make sure we can always fill the
256 kSndBufSize
= 1024 * 90
260 guint32 conv
, seq
, ack
;
265 guint32 tsval
, tsecr
;
279 struct _PseudoTcpSocketPrivate
{
280 PseudoTcpCallbacks callbacks
;
286 PseudoTcpState state
;
288 gboolean bReadEnable
, bWriteEnable
, bOutgoing
;
289 guint32 last_traffic
;
293 gchar rbuf
[kRcvBufSize
];
294 guint32 rcv_nxt
, rcv_wnd
, rlen
, lastrecv
;
298 gchar sbuf
[kSndBufSize
];
299 guint32 snd_nxt
, snd_wnd
, slen
, lastsend
, snd_una
;
300 // Maximum segment size, estimated protocol level, largest segment sent
301 guint32 mss
, msslevel
, largest
, mtu_advise
;
305 // Timestamp tracking
306 guint32 ts_recent
, ts_lastack
;
308 // Round-trip calculation
309 guint32 rx_rttvar
, rx_srtt
, rx_rto
;
311 // Congestion avoidance, Fast retransmit/recovery, Delayed ACKs
312 guint32 ssthresh
, cwnd
;
323 PROP_CONVERSATION
= 1,
330 static void pseudo_tcp_socket_get_property (GObject
*object
, guint property_id
,
331 GValue
*value
, GParamSpec
*pspec
);
332 static void pseudo_tcp_socket_set_property (GObject
*object
, guint property_id
,
333 const GValue
*value
, GParamSpec
*pspec
);
334 static void pseudo_tcp_socket_finalize (GObject
*object
);
337 static guint32
queue(PseudoTcpSocket
*self
, const gchar
* data
,
338 guint32 len
, gboolean bCtrl
);
339 static PseudoTcpWriteResult
packet(PseudoTcpSocket
*self
, guint32 seq
,
340 guint8 flags
, const gchar
* data
, guint32 len
);
341 static gboolean
parse(PseudoTcpSocket
*self
,
342 const guint8
* buffer
, guint32 size
);
343 static gboolean
process(PseudoTcpSocket
*self
, Segment
*seg
);
344 static gboolean
transmit(PseudoTcpSocket
*self
, const GList
*seg
, guint32 now
);
345 static void attempt_send(PseudoTcpSocket
*self
, SendFlags sflags
);
346 static void closedown(PseudoTcpSocket
*self
, guint32 err
);
347 static void adjustMTU(PseudoTcpSocket
*self
);
350 // The following logging is for detailed (packet-level) pseudotcp analysis only.
351 static PseudoTcpDebugLevel debug_level
= PSEUDO_TCP_DEBUG_NONE
;
353 #define DEBUG(level, fmt, ...) \
354 if (debug_level >= level) \
355 g_debug ("PseudoTcpSocket %p: " fmt, self, ## __VA_ARGS__)
358 pseudo_tcp_set_debug_level (PseudoTcpDebugLevel level
)
364 pseudo_tcp_socket_class_init (PseudoTcpSocketClass
*cls
)
366 GObjectClass
*object_class
= G_OBJECT_CLASS (cls
);
368 object_class
->get_property
= pseudo_tcp_socket_get_property
;
369 object_class
->set_property
= pseudo_tcp_socket_set_property
;
370 object_class
->finalize
= pseudo_tcp_socket_finalize
;
372 g_object_class_install_property (object_class
, PROP_CONVERSATION
,
373 g_param_spec_uint ("conversation", "TCP Conversation ID",
374 "The TCP Conversation ID",
376 G_PARAM_CONSTRUCT_ONLY
| G_PARAM_READWRITE
| G_PARAM_STATIC_STRINGS
));
378 g_object_class_install_property (object_class
, PROP_CALLBACKS
,
379 g_param_spec_pointer ("callbacks", "PseudoTcp socket callbacks",
380 "Structure with the callbacks to call when PseudoTcp events happen",
381 G_PARAM_READWRITE
| G_PARAM_STATIC_STRINGS
));
383 g_object_class_install_property (object_class
, PROP_STATE
,
384 g_param_spec_uint ("state", "PseudoTcp State",
385 "The current state (enum PseudoTcpState) of the PseudoTcp socket",
386 TCP_LISTEN
, TCP_CLOSED
, TCP_LISTEN
,
387 G_PARAM_READABLE
| G_PARAM_STATIC_STRINGS
));
393 pseudo_tcp_socket_get_property (GObject
*object
,
398 PseudoTcpSocket
*self
= PSEUDO_TCP_SOCKET (object
);
400 switch (property_id
) {
401 case PROP_CONVERSATION
:
402 g_value_set_uint (value
, self
->priv
->conv
);
405 g_value_set_pointer (value
, (gpointer
) &self
->priv
->callbacks
);
408 g_value_set_uint (value
, self
->priv
->state
);
411 G_OBJECT_WARN_INVALID_PROPERTY_ID (object
, property_id
, pspec
);
417 pseudo_tcp_socket_set_property (GObject
*object
,
422 PseudoTcpSocket
*self
= PSEUDO_TCP_SOCKET (object
);
424 switch (property_id
) {
425 case PROP_CONVERSATION
:
426 self
->priv
->conv
= g_value_get_uint (value
);
430 PseudoTcpCallbacks
*c
= g_value_get_pointer (value
);
431 self
->priv
->callbacks
= *c
;
435 G_OBJECT_WARN_INVALID_PROPERTY_ID (object
, property_id
, pspec
);
441 pseudo_tcp_socket_finalize (GObject
*object
)
443 PseudoTcpSocket
*self
= PSEUDO_TCP_SOCKET (object
);
444 PseudoTcpSocketPrivate
*priv
= self
->priv
;
450 for (i
= priv
->slist
; i
; i
= i
->next
) {
451 SSegment
*sseg
= i
->data
;
452 g_slice_free (SSegment
, sseg
);
454 for (i
= priv
->rlist
; i
; i
= i
->next
) {
455 RSegment
*rseg
= i
->data
;
456 g_slice_free (RSegment
, rseg
);
458 g_list_free (priv
->slist
);
460 g_list_free (priv
->rlist
);
466 if (G_OBJECT_CLASS (pseudo_tcp_socket_parent_class
)->finalize
)
467 G_OBJECT_CLASS (pseudo_tcp_socket_parent_class
)->finalize (object
);
472 pseudo_tcp_socket_init (PseudoTcpSocket
*obj
)
474 /* Use g_new0, and do not use g_object_set_private because the size of
475 * our private data is too big (150KB+) and the g_slice_allow cannot allocate
476 * it. So we handle the private ourselves */
477 PseudoTcpSocketPrivate
*priv
= g_new0 (PseudoTcpSocketPrivate
, 1);
478 guint32 now
= get_current_time();
482 priv
->shutdown
= SD_NONE
;
485 priv
->state
= TCP_LISTEN
;
487 priv
->rcv_wnd
= sizeof(priv
->rbuf
);
488 priv
->snd_nxt
= priv
->slen
= 0;
490 priv
->snd_una
= priv
->rcv_nxt
= priv
->rlen
= 0;
491 priv
->bReadEnable
= TRUE
;
492 priv
->bWriteEnable
= FALSE
;
497 priv
->mss
= MIN_PACKET
- PACKET_OVERHEAD
;
498 priv
->mtu_advise
= MAX_PACKET
;
502 priv
->cwnd
= 2 * priv
->mss
;
503 priv
->ssthresh
= sizeof(priv
->rbuf
);
504 priv
->lastrecv
= priv
->lastsend
= priv
->last_traffic
= now
;
505 priv
->bOutgoing
= FALSE
;
510 priv
->ts_recent
= priv
->ts_lastack
= 0;
512 priv
->rx_rto
= DEF_RTO
;
513 priv
->rx_srtt
= priv
->rx_rttvar
= 0;
516 PseudoTcpSocket
*pseudo_tcp_socket_new (guint32 conversation
,
517 PseudoTcpCallbacks
*callbacks
)
520 return g_object_new (PSEUDO_TCP_SOCKET_TYPE
,
521 "conversation", conversation
,
522 "callbacks", callbacks
,
527 pseudo_tcp_socket_connect(PseudoTcpSocket
*self
)
529 PseudoTcpSocketPrivate
*priv
= self
->priv
;
532 if (priv
->state
!= TCP_LISTEN
) {
533 priv
->error
= EINVAL
;
537 priv
->state
= TCP_SYN_SENT
;
538 DEBUG (PSEUDO_TCP_DEBUG_NORMAL
, "State: TCP_SYN_SENT");
540 buffer
[0] = CTL_CONNECT
;
541 queue(self
, buffer
, 1, TRUE
);
542 attempt_send(self
, sfNone
);
548 pseudo_tcp_socket_notify_mtu(PseudoTcpSocket
*self
, guint16 mtu
)
550 PseudoTcpSocketPrivate
*priv
= self
->priv
;
551 priv
->mtu_advise
= mtu
;
552 if (priv
->state
== TCP_ESTABLISHED
) {
558 pseudo_tcp_socket_notify_clock(PseudoTcpSocket
*self
)
560 PseudoTcpSocketPrivate
*priv
= self
->priv
;
561 guint32 now
= get_current_time ();
563 if (priv
->state
== TCP_CLOSED
)
566 // Check if it's time to retransmit a segment
567 if (priv
->rto_base
&&
568 (time_diff(priv
->rto_base
+ priv
->rx_rto
, now
) <= 0)) {
569 if (g_list_length (priv
->slist
) == 0) {
570 g_assert_not_reached ();
572 // Note: (priv->slist.front().xmit == 0)) {
573 // retransmit segments
577 DEBUG (PSEUDO_TCP_DEBUG_NORMAL
, "timeout retransmit (rto: %d) "
578 "(rto_base: %d) (now: %d) (dup_acks: %d)",
579 priv
->rx_rto
, priv
->rto_base
, now
, (guint
) priv
->dup_acks
);
581 if (!transmit(self
, priv
->slist
, now
)) {
582 closedown(self
, ECONNABORTED
);
586 nInFlight
= priv
->snd_nxt
- priv
->snd_una
;
587 priv
->ssthresh
= max(nInFlight
/ 2, 2 * priv
->mss
);
588 //LOG(LS_INFO) << "priv->ssthresh: " << priv->ssthresh << " nInFlight: " << nInFlight << " priv->mss: " << priv->mss;
589 priv
->cwnd
= priv
->mss
;
591 // Back off retransmit timer. Note: the limit is lower when connecting.
592 rto_limit
= (priv
->state
< TCP_ESTABLISHED
) ? DEF_RTO
: MAX_RTO
;
593 priv
->rx_rto
= min(rto_limit
, priv
->rx_rto
* 2);
594 priv
->rto_base
= now
;
598 // Check if it's time to probe closed windows
599 if ((priv
->snd_wnd
== 0)
600 && (time_diff(priv
->lastsend
+ priv
->rx_rto
, now
) <= 0)) {
601 if (time_diff(now
, priv
->lastrecv
) >= 15000) {
602 closedown(self
, ECONNABORTED
);
607 packet(self
, priv
->snd_nxt
- 1, 0, 0, 0);
608 priv
->lastsend
= now
;
610 // back off retransmit timer
611 priv
->rx_rto
= min(MAX_RTO
, priv
->rx_rto
* 2);
614 // Check if it's time to send delayed acks
615 if (priv
->t_ack
&& (time_diff(priv
->t_ack
+ ACK_DELAY
, now
) <= 0)) {
616 packet(self
, priv
->snd_nxt
, 0, 0, 0);
622 pseudo_tcp_socket_notify_packet(PseudoTcpSocket
*self
,
623 const gchar
* buffer
, guint32 len
)
625 if (len
> MAX_PACKET
) {
626 //LOG_F(WARNING) << "packet too large";
629 return parse(self
, (guint8
*) buffer
, len
);
633 pseudo_tcp_socket_get_next_clock(PseudoTcpSocket
*self
, long *timeout
)
635 PseudoTcpSocketPrivate
*priv
= self
->priv
;
636 guint32 now
= get_current_time ();
638 if (priv
->shutdown
== SD_FORCEFUL
)
641 if ((priv
->shutdown
== SD_GRACEFUL
)
642 && ((priv
->state
!= TCP_ESTABLISHED
)
643 || ((priv
->slen
== 0) && (priv
->t_ack
== 0)))) {
647 if (priv
->state
== TCP_CLOSED
) {
648 *timeout
= CLOSED_TIMEOUT
;
652 *timeout
= DEFAULT_TIMEOUT
;
655 *timeout
= min(*timeout
, time_diff(priv
->t_ack
+ ACK_DELAY
, now
));
657 if (priv
->rto_base
) {
658 *timeout
= min(*timeout
, time_diff(priv
->rto_base
+ priv
->rx_rto
, now
));
660 if (priv
->snd_wnd
== 0) {
661 *timeout
= min(*timeout
, time_diff(priv
->lastsend
+ priv
->rx_rto
, now
));
669 pseudo_tcp_socket_recv(PseudoTcpSocket
*self
, char * buffer
, size_t len
)
671 PseudoTcpSocketPrivate
*priv
= self
->priv
;
674 if (priv
->state
!= TCP_ESTABLISHED
) {
675 priv
->error
= ENOTCONN
;
679 if (priv
->rlen
== 0) {
680 priv
->bReadEnable
= TRUE
;
681 priv
->error
= EWOULDBLOCK
;
685 read
= min((guint32
) len
, priv
->rlen
);
686 memcpy(buffer
, priv
->rbuf
, read
);
689 /* !?! until we create a circular buffer, we need to move all of the rest
691 memmove(priv
->rbuf
, priv
->rbuf
+ read
, sizeof(priv
->rbuf
) - read
);
693 if ((sizeof(priv
->rbuf
) - priv
->rlen
- priv
->rcv_wnd
)
694 >= min(sizeof(priv
->rbuf
) / 2, priv
->mss
)) {
695 // !?! Not sure about this was closed business
696 gboolean bWasClosed
= (priv
->rcv_wnd
== 0);
698 priv
->rcv_wnd
= sizeof(priv
->rbuf
) - priv
->rlen
;
701 attempt_send(self
, sfImmediateAck
);
709 pseudo_tcp_socket_send(PseudoTcpSocket
*self
, const char * buffer
, guint32 len
)
711 PseudoTcpSocketPrivate
*priv
= self
->priv
;
714 if (priv
->state
!= TCP_ESTABLISHED
) {
715 priv
->error
= ENOTCONN
;
719 if (priv
->slen
== sizeof(priv
->sbuf
)) {
720 priv
->bWriteEnable
= TRUE
;
721 priv
->error
= EWOULDBLOCK
;
725 written
= queue(self
, buffer
, len
, FALSE
);
726 attempt_send(self
, sfNone
);
728 if (written
> 0 && (guint32
)written
< len
) {
729 priv
->bWriteEnable
= TRUE
;
736 pseudo_tcp_socket_close(PseudoTcpSocket
*self
, gboolean force
)
738 PseudoTcpSocketPrivate
*priv
= self
->priv
;
739 //nice_agent ("Closing socket %p : %d", sock, force?"true":"false");
740 priv
->shutdown
= force
? SD_FORCEFUL
: SD_GRACEFUL
;
744 pseudo_tcp_socket_get_error(PseudoTcpSocket
*self
)
746 PseudoTcpSocketPrivate
*priv
= self
->priv
;
751 // Internal Implementation
755 queue(PseudoTcpSocket
*self
, const gchar
* data
, guint32 len
, gboolean bCtrl
)
757 PseudoTcpSocketPrivate
*priv
= self
->priv
;
759 if (len
> sizeof(priv
->sbuf
) - priv
->slen
) {
761 len
= sizeof(priv
->sbuf
) - priv
->slen
;
764 // We can concatenate data if the last segment is the same type
765 // (control v. regular data), and has not been transmitted yet
766 if (g_list_length (priv
->slist
) > 0 &&
767 (((SSegment
*)g_list_last (priv
->slist
)->data
)->bCtrl
== bCtrl
) &&
768 (((SSegment
*)g_list_last (priv
->slist
)->data
)->xmit
== 0)) {
769 ((SSegment
*)g_list_last (priv
->slist
)->data
)->len
+= len
;
771 SSegment
*sseg
= g_slice_new0 (SSegment
);
772 sseg
->seq
= priv
->snd_una
+ priv
->slen
;
775 priv
->slist
= g_list_append (priv
->slist
, sseg
);
778 memcpy(priv
->sbuf
+ priv
->slen
, data
, len
);
780 //LOG(LS_INFO) << "PseudoTcp::queue - priv->slen = " << priv->slen;
784 static PseudoTcpWriteResult
785 packet(PseudoTcpSocket
*self
, guint32 seq
, guint8 flags
,
786 const gchar
* data
, guint32 len
)
788 PseudoTcpSocketPrivate
*priv
= self
->priv
;
789 guint32 now
= get_current_time();
790 guint8 buffer
[MAX_PACKET
];
791 PseudoTcpWriteResult wres
= WR_SUCCESS
;
793 g_assert(HEADER_SIZE
+ len
<= MAX_PACKET
);
795 *((uint32_t *) buffer
) = htonl(priv
->conv
);
796 *((uint32_t *) (buffer
+ 4)) = htonl(seq
);
797 *((uint32_t *) (buffer
+ 8)) = htonl(priv
->rcv_nxt
);
800 *((uint16_t *) (buffer
+ 14)) = htons((uint16_t)priv
->rcv_wnd
);
802 // Timestamp computations
803 *((uint32_t *) (buffer
+ 16)) = htonl(now
);
804 *((uint32_t *) (buffer
+ 20)) = htonl(priv
->ts_recent
);
805 priv
->ts_lastack
= priv
->rcv_nxt
;
808 memcpy(buffer
+ HEADER_SIZE
, data
, len
);
810 DEBUG (PSEUDO_TCP_DEBUG_VERBOSE
, "<-- <CONV=%d><FLG=%d><SEQ=%d:%d><ACK=%d>"
811 "<WND=%d><TS=%d><TSR=%d><LEN=%d>",
812 priv
->conv
, (unsigned)flags
, seq
, seq
+ len
, priv
->rcv_nxt
, priv
->rcv_wnd
,
813 now
% 10000, priv
->ts_recent
% 10000, len
);
815 wres
= priv
->callbacks
.WritePacket(self
, (gchar
*) buffer
, len
+ HEADER_SIZE
,
816 priv
->callbacks
.user_data
);
817 /* Note: When data is NULL, this is an ACK packet. We don't read the
818 return value for those, and thus we won't retry. So go ahead and treat
819 the packet as a success (basically simulate as if it were dropped),
820 which will prevent our timers from being messed up. */
821 if ((wres
!= WR_SUCCESS
) && (NULL
!= data
))
826 priv
->lastsend
= now
;
828 priv
->last_traffic
= now
;
829 priv
->bOutgoing
= TRUE
;
835 parse(PseudoTcpSocket
*self
, const guint8
* buffer
, guint32 size
)
842 seg
.conv
= ntohl(*(uint32_t *)buffer
);
843 seg
.seq
= ntohl(*(uint32_t *)(buffer
+ 4));
844 seg
.ack
= ntohl(*(uint32_t *)(buffer
+ 8));
845 seg
.flags
= buffer
[13];
846 seg
.wnd
= ntohs(*(uint16_t *)(buffer
+ 14));
848 seg
.tsval
= ntohl(*(uint32_t *)(buffer
+ 16));
849 seg
.tsecr
= ntohl(*(uint32_t *)(buffer
+ 20));
851 seg
.data
= ((gchar
*)buffer
) + HEADER_SIZE
;
852 seg
.len
= size
- HEADER_SIZE
;
854 DEBUG (PSEUDO_TCP_DEBUG_VERBOSE
, "--> <CONV=%d><FLG=%d><SEQ=%d:%d><ACK=%d>"
855 "<WND=%d><TS=%d><TSR=%d><LEN=%d>",
856 seg
.conv
, (unsigned)seg
.flags
, seg
.seq
, seg
.seq
+ seg
.len
, seg
.ack
,
857 seg
.wnd
, seg
.tsval
% 10000, seg
.tsecr
% 10000, seg
.len
);
859 return process(self
, &seg
);
864 process(PseudoTcpSocket
*self
, Segment
*seg
)
866 PseudoTcpSocketPrivate
*priv
= self
->priv
;
868 SendFlags sflags
= sfNone
;
869 gboolean bIgnoreData
;
871 gboolean bConnect
= FALSE
;
873 /* If this is the wrong conversation, send a reset!?!
874 (with the correct conversation?) */
875 if (seg
->conv
!= priv
->conv
) {
876 //if ((seg->flags & FLAG_RST) == 0) {
877 // packet(sock, tcb, seg->ack, 0, FLAG_RST, 0, 0);
879 DEBUG (PSEUDO_TCP_DEBUG_NORMAL
, "wrong conversation");
883 now
= get_current_time();
884 priv
->last_traffic
= priv
->lastrecv
= now
;
885 priv
->bOutgoing
= FALSE
;
887 if (priv
->state
== TCP_CLOSED
) {
889 DEBUG (PSEUDO_TCP_DEBUG_NORMAL
, "closed");
893 // Check if this is a reset segment
894 if (seg
->flags
& FLAG_RST
) {
895 closedown(self
, ECONNRESET
);
899 // Check for control data
901 if (seg
->flags
& FLAG_CTL
) {
903 DEBUG (PSEUDO_TCP_DEBUG_NORMAL
, "Missing control code");
905 } else if (seg
->data
[0] == CTL_CONNECT
) {
907 if (priv
->state
== TCP_LISTEN
) {
909 priv
->state
= TCP_SYN_RECEIVED
;
910 buffer
[0] = CTL_CONNECT
;
911 queue(self
, buffer
, 1, TRUE
);
912 } else if (priv
->state
== TCP_SYN_SENT
) {
913 priv
->state
= TCP_ESTABLISHED
;
914 DEBUG (PSEUDO_TCP_DEBUG_NORMAL
, "State: TCP_ESTABLISHED");
916 if (priv
->callbacks
.PseudoTcpOpened
)
917 priv
->callbacks
.PseudoTcpOpened(self
, priv
->callbacks
.user_data
);
921 DEBUG (PSEUDO_TCP_DEBUG_NORMAL
, "Unknown control code: %d", seg
->data
[0]);
927 if ((seg
->seq
<= priv
->ts_lastack
) &&
928 (priv
->ts_lastack
< seg
->seq
+ seg
->len
)) {
929 priv
->ts_recent
= seg
->tsval
;
932 // Check if this is a valuable ack
933 if ((seg
->ack
> priv
->snd_una
) && (seg
->ack
<= priv
->snd_nxt
)) {
936 guint32 kIdealRefillSize
;
938 // Calculate round-trip time
940 long rtt
= time_diff(now
, seg
->tsecr
);
942 if (priv
->rx_srtt
== 0) {
944 priv
->rx_rttvar
= rtt
/ 2;
946 priv
->rx_rttvar
= (3 * priv
->rx_rttvar
+
947 abs((long)(rtt
- priv
->rx_srtt
))) / 4;
948 priv
->rx_srtt
= (7 * priv
->rx_srtt
+ rtt
) / 8;
950 priv
->rx_rto
= bound(MIN_RTO
,
951 priv
->rx_srtt
+ max(1LU, 4 * priv
->rx_rttvar
), MAX_RTO
);
953 DEBUG (PSEUDO_TCP_DEBUG_VERBOSE
, "rtt: %ld srtt: %d rto: %d",
954 rtt
, priv
->rx_srtt
, priv
->rx_rto
);
956 g_assert_not_reached ();
960 priv
->snd_wnd
= seg
->wnd
;
962 nAcked
= seg
->ack
- priv
->snd_una
;
963 priv
->snd_una
= seg
->ack
;
965 priv
->rto_base
= (priv
->snd_una
== priv
->snd_nxt
) ? 0 : now
;
967 priv
->slen
-= nAcked
;
968 memmove(priv
->sbuf
, priv
->sbuf
+ nAcked
, priv
->slen
);
969 //LOG(LS_INFO) << "PseudoTcp::process - priv->slen = " << priv->slen;
971 for (nFree
= nAcked
; nFree
> 0; ) {
972 SSegment
*data
= (SSegment
*) (g_list_first (priv
->slist
)->data
);
973 g_assert(g_list_length (priv
->slist
) > 0);
974 if (nFree
< data
->len
) {
978 if (data
->len
> priv
->largest
) {
979 priv
->largest
= data
->len
;
982 g_slice_free (SSegment
, priv
->slist
->data
);
983 priv
->slist
= g_list_delete_link (priv
->slist
, priv
->slist
);
987 if (priv
->dup_acks
>= 3) {
988 if (priv
->snd_una
>= priv
->recover
) { // NewReno
989 guint32 nInFlight
= priv
->snd_nxt
- priv
->snd_una
;
991 priv
->cwnd
= min(priv
->ssthresh
, nInFlight
+ priv
->mss
);
992 DEBUG (PSEUDO_TCP_DEBUG_NORMAL
, "exit recovery");
995 DEBUG (PSEUDO_TCP_DEBUG_NORMAL
, "recovery retransmit");
996 if (!transmit(self
, priv
->slist
, now
)) {
997 closedown(self
, ECONNABORTED
);
1000 priv
->cwnd
+= priv
->mss
- min(nAcked
, priv
->cwnd
);
1004 // Slow start, congestion avoidance
1005 if (priv
->cwnd
< priv
->ssthresh
) {
1006 priv
->cwnd
+= priv
->mss
;
1008 priv
->cwnd
+= max(1LU, priv
->mss
* priv
->mss
/ priv
->cwnd
);
1013 if ((priv
->state
== TCP_SYN_RECEIVED
) && !bConnect
) {
1014 priv
->state
= TCP_ESTABLISHED
;
1015 DEBUG (PSEUDO_TCP_DEBUG_NORMAL
, "State: TCP_ESTABLISHED");
1017 if (priv
->callbacks
.PseudoTcpOpened
)
1018 priv
->callbacks
.PseudoTcpOpened(self
, priv
->callbacks
.user_data
);
1021 // If we make room in the send queue, notify the user
1022 // The goal it to make sure we always have at least enough data to fill the
1023 // window. We'd like to notify the app when we are halfway to that point.
1024 kIdealRefillSize
= (sizeof(priv
->sbuf
) + sizeof(priv
->rbuf
)) / 2;
1025 if (priv
->bWriteEnable
&& (priv
->slen
< kIdealRefillSize
)) {
1026 priv
->bWriteEnable
= FALSE
;
1027 if (priv
->callbacks
.PseudoTcpWritable
)
1028 priv
->callbacks
.PseudoTcpWritable(self
, priv
->callbacks
.user_data
);
1030 } else if (seg
->ack
== priv
->snd_una
) {
1031 /* !?! Note, tcp says don't do this... but otherwise how does a
1032 closed window become open? */
1033 priv
->snd_wnd
= seg
->wnd
;
1035 // Check duplicate acks
1037 // it's a dup ack, but with a data payload, so don't modify priv->dup_acks
1038 } else if (priv
->snd_una
!= priv
->snd_nxt
) {
1041 priv
->dup_acks
+= 1;
1042 if (priv
->dup_acks
== 3) { // (Fast Retransmit)
1043 DEBUG (PSEUDO_TCP_DEBUG_NORMAL
, "enter recovery");
1044 DEBUG (PSEUDO_TCP_DEBUG_NORMAL
, "recovery retransmit");
1045 if (!transmit(self
, priv
->slist
, now
)) {
1046 closedown(self
, ECONNABORTED
);
1049 priv
->recover
= priv
->snd_nxt
;
1050 nInFlight
= priv
->snd_nxt
- priv
->snd_una
;
1051 priv
->ssthresh
= max(nInFlight
/ 2, 2 * priv
->mss
);
1052 //LOG(LS_INFO) << "priv->ssthresh: " << priv->ssthresh << " nInFlight: " << nInFlight << " priv->mss: " << priv->mss;
1053 priv
->cwnd
= priv
->ssthresh
+ 3 * priv
->mss
;
1054 } else if (priv
->dup_acks
> 3) {
1055 priv
->cwnd
+= priv
->mss
;
1062 /* Conditions where acks must be sent:
1063 * 1) Segment is too old (they missed an ACK) (immediately)
1064 * 2) Segment is too new (we missed a segment) (immediately)
1065 * 3) Segment has data (so we need to ACK!) (delayed)
1066 * ... so the only time we don't need to ACK, is an empty segment
1067 * that points to rcv_nxt!
1070 if (seg
->seq
!= priv
->rcv_nxt
) {
1071 sflags
= sfImmediateAck
; // (Fast Recovery)
1072 } else if (seg
->len
!= 0) {
1073 sflags
= sfDelayedAck
;
1075 if (sflags
== sfImmediateAck
) {
1076 if (seg
->seq
> priv
->rcv_nxt
) {
1077 DEBUG (PSEUDO_TCP_DEBUG_NORMAL
, "too new");
1078 } else if (seg
->seq
+ seg
->len
<= priv
->rcv_nxt
) {
1079 DEBUG (PSEUDO_TCP_DEBUG_NORMAL
, "too old");
1083 // Adjust the incoming segment to fit our receive buffer
1084 if (seg
->seq
< priv
->rcv_nxt
) {
1085 guint32 nAdjust
= priv
->rcv_nxt
- seg
->seq
;
1086 if (nAdjust
< seg
->len
) {
1087 seg
->seq
+= nAdjust
;
1088 seg
->data
+= nAdjust
;
1089 seg
->len
-= nAdjust
;
1094 if ((seg
->seq
+ seg
->len
- priv
->rcv_nxt
) >
1095 (sizeof(priv
->rbuf
) - priv
->rlen
)) {
1096 guint32 nAdjust
= seg
->seq
+ seg
->len
- priv
->rcv_nxt
-
1097 (sizeof(priv
->rbuf
) - priv
->rlen
);
1098 if (nAdjust
< seg
->len
) {
1099 seg
->len
-= nAdjust
;
1105 bIgnoreData
= (seg
->flags
& FLAG_CTL
) || (priv
->shutdown
!= SD_NONE
);
1110 if (seg
->seq
== priv
->rcv_nxt
) {
1111 priv
->rcv_nxt
+= seg
->len
;
1114 guint32 nOffset
= seg
->seq
- priv
->rcv_nxt
;
1115 memcpy(priv
->rbuf
+ priv
->rlen
+ nOffset
, seg
->data
, seg
->len
);
1116 if (seg
->seq
== priv
->rcv_nxt
) {
1119 priv
->rlen
+= seg
->len
;
1120 priv
->rcv_nxt
+= seg
->len
;
1121 priv
->rcv_wnd
-= seg
->len
;
1125 while (iter
&& (((RSegment
*)iter
->data
)->seq
<= priv
->rcv_nxt
)) {
1126 RSegment
*data
= (RSegment
*)(iter
->data
);
1127 if (data
->seq
+ data
->len
> priv
->rcv_nxt
) {
1128 guint32 nAdjust
= (data
->seq
+ data
->len
) - priv
->rcv_nxt
;
1129 sflags
= sfImmediateAck
; // (Fast Recovery)
1130 DEBUG (PSEUDO_TCP_DEBUG_NORMAL
, "Recovered %d bytes (%d -> %d)",
1131 nAdjust
, priv
->rcv_nxt
, priv
->rcv_nxt
+ nAdjust
);
1132 priv
->rlen
+= nAdjust
;
1133 priv
->rcv_nxt
+= nAdjust
;
1134 priv
->rcv_wnd
-= nAdjust
;
1136 g_slice_free (RSegment
, priv
->rlist
->data
);
1137 priv
->rlist
= g_list_delete_link (priv
->rlist
, priv
->rlist
);
1141 DEBUG (PSEUDO_TCP_DEBUG_NORMAL
, "Saving %d bytes (%d -> %d)",
1142 seg
->len
, seg
->seq
, seg
->seq
+ seg
->len
);
1144 RSegment
*rseg
= g_slice_new0 (RSegment
);
1145 rseg
->seq
= seg
->seq
;
1146 rseg
->len
= seg
->len
;
1148 while (iter
&& (((RSegment
*)iter
->data
)->seq
< rseg
->seq
)) {
1149 iter
= g_list_next (iter
);
1151 priv
->rlist
= g_list_insert_before(priv
->rlist
, iter
, rseg
);
1156 attempt_send(self
, sflags
);
1158 // If we have new data, notify the user
1159 if (bNewData
&& priv
->bReadEnable
) {
1160 priv
->bReadEnable
= FALSE
;
1161 if (priv
->callbacks
.PseudoTcpReadable
)
1162 priv
->callbacks
.PseudoTcpReadable(self
, priv
->callbacks
.user_data
);
1169 transmit(PseudoTcpSocket
*self
, const GList
*seg
, guint32 now
)
1171 PseudoTcpSocketPrivate
*priv
= self
->priv
;
1172 SSegment
*segment
= (SSegment
*)(seg
->data
);
1173 guint32 nTransmit
= min(segment
->len
, priv
->mss
);
1175 if (segment
->xmit
>= ((priv
->state
== TCP_ESTABLISHED
) ? 15 : 30)) {
1176 DEBUG (PSEUDO_TCP_DEBUG_NORMAL
, "too many retransmits");
1181 guint32 seq
= segment
->seq
;
1182 guint8 flags
= (segment
->bCtrl
? FLAG_CTL
: 0);
1183 const gchar
* buffer
= priv
->sbuf
+ (segment
->seq
- priv
->snd_una
);
1184 PseudoTcpWriteResult wres
= packet(self
, seq
, flags
, buffer
, nTransmit
);
1186 if (wres
== WR_SUCCESS
)
1189 if (wres
== WR_FAIL
) {
1190 DEBUG (PSEUDO_TCP_DEBUG_NORMAL
, "packet failed");
1194 g_assert(wres
== WR_TOO_LARGE
);
1197 if (PACKET_MAXIMUMS
[priv
->msslevel
+ 1] == 0) {
1198 DEBUG (PSEUDO_TCP_DEBUG_NORMAL
, "MTU too small");
1201 /* !?! We need to break up all outstanding and pending packets
1202 and then retransmit!?! */
1204 priv
->mss
= PACKET_MAXIMUMS
[++priv
->msslevel
] - PACKET_OVERHEAD
;
1205 // I added this... haven't researched actual formula
1206 priv
->cwnd
= 2 * priv
->mss
;
1208 if (priv
->mss
< nTransmit
) {
1209 nTransmit
= priv
->mss
;
1213 DEBUG (PSEUDO_TCP_DEBUG_NORMAL
, "Adjusting mss to %d bytes ", priv
->mss
);
1216 if (nTransmit
< segment
->len
) {
1217 SSegment
*subseg
= g_slice_new0 (SSegment
);
1218 subseg
->seq
= segment
->seq
+ nTransmit
;
1219 subseg
->len
= segment
->len
- nTransmit
;
1220 subseg
->bCtrl
= segment
->bCtrl
;
1221 subseg
->xmit
= segment
->xmit
;
1223 DEBUG (PSEUDO_TCP_DEBUG_NORMAL
, "mss reduced to %d", priv
->mss
);
1225 segment
->len
= nTransmit
;
1226 priv
->slist
= g_list_insert_before(priv
->slist
, seg
->next
, subseg
);
1229 if (segment
->xmit
== 0) {
1230 priv
->snd_nxt
+= segment
->len
;
1234 if (priv
->rto_base
== 0) {
1235 priv
->rto_base
= now
;
1242 attempt_send(PseudoTcpSocket
*self
, SendFlags sflags
)
1244 PseudoTcpSocketPrivate
*priv
= self
->priv
;
1245 guint32 now
= get_current_time();
1246 gboolean bFirst
= TRUE
;
1248 if (time_diff(now
, priv
->lastsend
) > (long) priv
->rx_rto
) {
1249 priv
->cwnd
= priv
->mss
;
1262 if ((priv
->dup_acks
== 1) || (priv
->dup_acks
== 2)) { // Limited Transmit
1263 cwnd
+= priv
->dup_acks
* priv
->mss
;
1265 nWindow
= min(priv
->snd_wnd
, cwnd
);
1266 nInFlight
= priv
->snd_nxt
- priv
->snd_una
;
1267 nUseable
= (nInFlight
< nWindow
) ? (nWindow
- nInFlight
) : 0;
1268 nAvailable
= min(priv
->slen
- nInFlight
, priv
->mss
);
1270 if (nAvailable
> nUseable
) {
1271 if (nUseable
* 4 < nWindow
) {
1272 // RFC 813 - avoid SWS
1275 nAvailable
= nUseable
;
1281 DEBUG (PSEUDO_TCP_DEBUG_VERBOSE
, "[cwnd: %d nWindow: %d nInFlight: %d "
1282 "nAvailable: %d nQueued: %d nEmpty: %d ssthresh: %d]",
1283 priv
->cwnd
, nWindow
, nInFlight
, nAvailable
, priv
->slen
- nInFlight
,
1284 sizeof(priv
->sbuf
) - priv
->slen
, priv
->ssthresh
);
1287 if (nAvailable
== 0) {
1288 if (sflags
== sfNone
)
1291 // If this is an immediate ack, or the second delayed ack
1292 if ((sflags
== sfImmediateAck
) || priv
->t_ack
) {
1293 packet(self
, priv
->snd_nxt
, 0, 0, 0);
1295 priv
->t_ack
= get_current_time();
1301 if ((priv
->snd_nxt
> priv
->snd_una
) && (nAvailable
< priv
->mss
)) {
1305 // Find the next segment to transmit
1307 while (((SSegment
*)iter
->data
)->xmit
> 0) {
1308 iter
= g_list_next (iter
);
1312 // If the segment is too large, break it into two
1313 if (((SSegment
*)iter
->data
)->len
> nAvailable
) {
1314 SSegment
*subseg
= g_slice_new0 (SSegment
);
1315 subseg
->seq
= ((SSegment
*)iter
->data
)->seq
+ nAvailable
;
1316 subseg
->len
= ((SSegment
*)iter
->data
)->len
- nAvailable
;
1317 subseg
->bCtrl
= ((SSegment
*)iter
->data
)->bCtrl
;
1319 ((SSegment
*)iter
->data
)->len
= nAvailable
;
1320 priv
->slist
= g_list_insert_before(priv
->slist
, iter
->next
, subseg
);
1323 if (!transmit(self
, iter
, now
)) {
1324 DEBUG (PSEUDO_TCP_DEBUG_NORMAL
, "transmit failed");
1325 // TODO: consider closing socket
1334 closedown(PseudoTcpSocket
*self
, guint32 err
)
1336 PseudoTcpSocketPrivate
*priv
= self
->priv
;
1339 DEBUG (PSEUDO_TCP_DEBUG_NORMAL
, "State: TCP_CLOSED");
1340 priv
->state
= TCP_CLOSED
;
1341 if (priv
->callbacks
.PseudoTcpClosed
)
1342 priv
->callbacks
.PseudoTcpClosed(self
, err
, priv
->callbacks
.user_data
);
1346 adjustMTU(PseudoTcpSocket
*self
)
1348 PseudoTcpSocketPrivate
*priv
= self
->priv
;
1350 // Determine our current mss level, so that we can adjust appropriately later
1351 for (priv
->msslevel
= 0;
1352 PACKET_MAXIMUMS
[priv
->msslevel
+ 1] > 0;
1354 if (((guint16
)PACKET_MAXIMUMS
[priv
->msslevel
]) <= priv
->mtu_advise
) {
1358 priv
->mss
= priv
->mtu_advise
- PACKET_OVERHEAD
;
1359 // !?! Should we reset priv->largest here?
1360 DEBUG (PSEUDO_TCP_DEBUG_NORMAL
, "Adjusting mss to %d bytes", priv
->mss
);
1361 // Enforce minimums on ssthresh and cwnd
1362 priv
->ssthresh
= max(priv
->ssthresh
, 2 * priv
->mss
);
1363 priv
->cwnd
= max(priv
->cwnd
, priv
->mss
);