1 /* $NetBSD: tp_subr.c,v 1.21 2007/03/04 06:03:33 christos Exp $ */
4 * Copyright (c) 1991, 1993
5 * The Regents of the University of California. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * @(#)tp_subr.c 8.1 (Berkeley) 6/10/93
34 /***********************************************************
35 Copyright IBM Corporation 1987
39 Permission to use, copy, modify, and distribute this software and its
40 documentation for any purpose and without fee is hereby granted,
41 provided that the above copyright notice appear in all copies and that
42 both that copyright notice and this permission notice appear in
43 supporting documentation, and that the name of IBM not be
44 used in advertising or publicity pertaining to distribution of the
45 software without specific, written prior permission.
47 IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
48 ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
49 IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
50 ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
51 WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
52 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
55 ******************************************************************/
58 * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
61 * The main work of data transfer is done here. These routines are called
62 * from tp.trans. They include the routines that check the validity of acks
63 * and Xacks, (tp_goodack() and tp_goodXack() ) take packets from socket
64 * buffers and send them (tp_send()), drop the data from the socket buffers
65 * (tp_sbdrop()), and put incoming packet data into socket buffers
69 #include <sys/cdefs.h>
70 __KERNEL_RCSID(0, "$NetBSD: tp_subr.c,v 1.21 2007/03/04 06:03:33 christos Exp $");
72 #include <sys/param.h>
73 #include <sys/systm.h>
75 #include <sys/socket.h>
76 #include <sys/socketvar.h>
77 #include <sys/protosw.h>
78 #include <sys/errno.h>
80 #include <sys/kernel.h>
82 #include <netiso/tp_ip.h>
83 #include <netiso/iso.h>
84 #include <netiso/argo_debug.h>
85 #include <netiso/tp_timer.h>
86 #include <netiso/tp_param.h>
87 #include <netiso/tp_stat.h>
88 #include <netiso/tp_pcb.h>
89 #include <netiso/tp_tpdu.h>
90 #include <netiso/tp_trace.h>
91 #include <netiso/tp_meas.h>
92 #include <netiso/tp_seq.h>
93 #include <netiso/tp_var.h>
95 int tprexmtthresh
= 3;
99 * tp.trans, when an XAK arrives
100 * FUNCTION and ARGUMENTS:
101 * Determines if the sequence number (seq) from the XAK
102 * acks anything new. If so, drop the appropriate tpdu
103 * from the XPD send queue.
105 * Returns 1 if it did this, 0 if the ack caused no action.
108 tp_goodXack(struct tp_pcb
*tpcb
, SeqNum seq
)
112 if (tp_traceflags
[D_XPD
]) {
113 tptraceTPCB(TPPTgotXack
,
114 seq
, tpcb
->tp_Xuna
, tpcb
->tp_Xsndnxt
, tpcb
->tp_sndnew
,
119 if (seq
== tpcb
->tp_Xuna
) {
120 tpcb
->tp_Xuna
= tpcb
->tp_Xsndnxt
;
123 * DROP 1 packet from the Xsnd socket buf - just so happens
124 * that only one packet can be there at any time so drop the
125 * whole thing. If you allow > 1 packet the socket buffer,
126 * then you'll have to keep track of how many characters went
127 * w/ each XPD tpdu, so this will get messier
130 if (argo_debug
[D_XPD
]) {
131 dump_mbuf(tpcb
->tp_Xsnd
.sb_mb
,
132 "tp_goodXack Xsnd before sbdrop");
137 if (tp_traceflags
[D_XPD
]) {
138 tptraceTPCB(TPPTmisc
,
139 "goodXack: dropping cc ",
140 (int) (tpcb
->tp_Xsnd
.sb_cc
),
144 sbdroprecord(&tpcb
->tp_Xsnd
);
153 * FUNCTION and ARGUMENTS:
155 * smoothed average round trip time (*rtt)
156 * roundtrip time variance (*rtv) - actually deviation, not variance
157 * given the new value (diff)
163 tp_rtt_rtv(struct tp_pcb
*tpcb
)
165 int old
= tpcb
->tp_rtt
;
166 int elapsed
, delta
= 0;
168 elapsed
= hardclock_ticks
- tpcb
->tp_rttemit
;
170 if (tpcb
->tp_rtt
!= 0) {
172 * rtt is the smoothed round trip time in machine clock
173 * ticks (hz). It is stored as a fixed point number,
174 * unscaled (unlike the tcp srtt). The rationale here
175 * is that it is only significant to the nearest unit of
176 * slowtimo, which is at least 8 machine clock ticks
177 * so there is no need to scale. The smoothing is done
178 * according to the same formula as TCP (rtt = rtt*7/8
181 delta
= elapsed
- tpcb
->tp_rtt
;
182 if ((tpcb
->tp_rtt
+= (delta
>> TP_RTT_ALPHA
)) <= 0)
185 * rtv is a smoothed accumulated mean difference, unscaled
186 * for reasons expressed above.
187 * It is smoothed with an alpha of .75, and the round trip timer
188 * will be set to rtt + 4*rtv, also as TCP does.
192 if ((tpcb
->tp_rtv
+= ((delta
- tpcb
->tp_rtv
) >> TP_RTV_ALPHA
)) <= 0)
196 * No rtt measurement yet - use the unsmoothed rtt. Set the
197 * variance to half the rtt (so our first retransmit happens
200 tpcb
->tp_rtt
= elapsed
;
201 tpcb
->tp_rtv
= elapsed
>> 1;
203 tpcb
->tp_rttemit
= 0;
204 tpcb
->tp_rxtshift
= 0;
206 * Quoting TCP: "the retransmit should happen at rtt + 4 * rttvar.
207 * Because of the way we do the smoothing, srtt and rttvar
208 * will each average +1/2 tick of bias. When we compute
209 * the retransmit timer, we want 1/2 tick of rounding and
210 * 1 extra tick because of +-1/2 tick uncertainty in the
211 * firing of the timer. The bias will give us exactly the
212 * 1.5 tick we need. But, because the bias is
213 * statistical, we have to test that we don't drop below
214 * the minimum feasible timer (which is 2 ticks)."
216 TP_RANGESET(tpcb
->tp_dt_ticks
, TP_REXMTVAL(tpcb
),
217 tpcb
->tp_peer_acktime
, 128 /* XXX */ );
219 if (argo_debug
[D_RTT
]) {
220 printf("%s tpcb %p, elapsed %d, delta %d, rtt %d, rtv %d, old %d\n",
221 "tp_rtt_rtv:", tpcb
, elapsed
, delta
, tpcb
->tp_rtt
, tpcb
->tp_rtv
, old
);
224 tpcb
->tp_rxtcur
= tpcb
->tp_dt_ticks
;
229 * tp.trans when an AK arrives
230 * FUNCTION and ARGUMENTS:
231 * Given (cdt), the credit from the AK tpdu, and
232 * (seq), the sequence number from the AK tpdu,
233 * tp_goodack() determines if the AK acknowledges something in the send
234 * window, and if so, drops the appropriate packets from the retransmission
235 * list, computes the round trip time, and updates the retransmission timer
236 * based on the new smoothed round trip time.
239 * EITHER it actually acked something heretofore unacknowledged
240 * OR no news but the credit should be processed.
241 * If something heretofore unacked was acked with this sequence number,
242 * the appropriate tpdus are dropped from the retransmission control list,
243 * by calling tp_sbdrop().
244 * No need to see the tpdu itself.
247 tp_goodack(struct tp_pcb
*tpcb
, u_int cdt
, SeqNum seq
, u_int subseq
)
250 int bang
= 0; /* bang --> ack for something
251 * heretofore unacked */
255 if (argo_debug
[D_ACKRECV
]) {
256 printf("goodack tpcb %p seq 0x%x cdt %d una 0x%x new 0x%x nxt 0x%x\n",
257 tpcb
, seq
, cdt
, tpcb
->tp_snduna
, tpcb
->tp_sndnew
, tpcb
->tp_sndnxt
);
262 if (tp_traceflags
[D_ACKRECV
]) {
263 tptraceTPCB(TPPTgotack
,
264 seq
, cdt
, tpcb
->tp_snduna
, tpcb
->tp_sndnew
, subseq
);
270 tpmeas(tpcb
->tp_lref
, TPtime_ack_rcvd
, (struct timeval
*) 0, seq
, 0, 0);
274 if (seq
== tpcb
->tp_snduna
) {
275 if (subseq
< tpcb
->tp_r_subseq
||
276 (subseq
== tpcb
->tp_r_subseq
&& cdt
<= tpcb
->tp_fcredit
)) {
279 if (argo_debug
[D_ACKRECV
]) {
280 printf("goodack discard : tpcb %p subseq %d r_subseq %d\n",
281 tpcb
, subseq
, tpcb
->tp_r_subseq
);
286 if (cdt
== tpcb
->tp_fcredit
/* && thus subseq >
287 tpcb->tp_r_subseq */ ) {
288 tpcb
->tp_r_subseq
= subseq
;
289 if (tpcb
->tp_timer
[TM_data_retrans
] == 0)
290 tpcb
->tp_dupacks
= 0;
291 else if (++tpcb
->tp_dupacks
== tprexmtthresh
) {
293 * partner went out of his way to signal with
294 * different subsequences that he has the
295 * same lack of an expected packet. This may
296 * be an early indiciation of a loss
299 SeqNum onxt
= tpcb
->tp_sndnxt
;
300 struct mbuf
*onxt_m
= tpcb
->tp_sndnxt_m
;
301 u_int win
= min(tpcb
->tp_fcredit
,
302 tpcb
->tp_cong_win
/ tpcb
->tp_l_tpdusize
) / 2;
304 if (argo_debug
[D_ACKRECV
]) {
305 printf("%s tpcb %p seq 0x%x rttseq 0x%x onxt 0x%x\n",
306 "goodack dupacks:", tpcb
, seq
, tpcb
->tp_rttseq
, onxt
);
311 tpcb
->tp_ssthresh
= win
* tpcb
->tp_l_tpdusize
;
312 tpcb
->tp_timer
[TM_data_retrans
] = 0;
313 tpcb
->tp_rttemit
= 0;
314 tpcb
->tp_sndnxt
= tpcb
->tp_snduna
;
315 tpcb
->tp_sndnxt_m
= 0;
316 tpcb
->tp_cong_win
= tpcb
->tp_l_tpdusize
;
318 tpcb
->tp_cong_win
= tpcb
->tp_ssthresh
+
319 tpcb
->tp_dupacks
* tpcb
->tp_l_tpdusize
;
320 if (SEQ_GT(tpcb
, onxt
, tpcb
->tp_sndnxt
)) {
321 tpcb
->tp_sndnxt
= onxt
;
322 tpcb
->tp_sndnxt_m
= onxt_m
;
324 } else if (tpcb
->tp_dupacks
> tprexmtthresh
) {
325 tpcb
->tp_cong_win
+= tpcb
->tp_l_tpdusize
;
329 } else if (SEQ_LT(tpcb
, seq
, tpcb
->tp_snduna
))
330 goto discard_the_ack
;
332 * If the congestion window was inflated to account
333 * for the other side's cached packets, retract it.
335 if (tpcb
->tp_dupacks
> tprexmtthresh
&&
336 tpcb
->tp_cong_win
> tpcb
->tp_ssthresh
)
337 tpcb
->tp_cong_win
= tpcb
->tp_ssthresh
;
338 tpcb
->tp_r_subseq
= subseq
;
339 old_fcredit
= tpcb
->tp_fcredit
;
340 tpcb
->tp_fcredit
= cdt
;
341 if (cdt
> tpcb
->tp_maxfcredit
)
342 tpcb
->tp_maxfcredit
= cdt
;
343 tpcb
->tp_dupacks
= 0;
345 if (IN_SWINDOW(tpcb
, seq
, tpcb
->tp_snduna
, tpcb
->tp_sndnew
)) {
348 bytes_acked
= tp_sbdrop(tpcb
, seq
);
351 * If transmit timer is running and timed sequence
352 * number was acked, update smoothed round trip time.
353 * Since we now have an rtt measurement, cancel the
354 * timer backoff (cf., Phil Karn's retransmit alg.).
355 * Recompute the initial retransmit timer.
357 if (tpcb
->tp_rttemit
&& SEQ_GT(tpcb
, seq
, tpcb
->tp_rttseq
))
360 * If all outstanding data is acked, stop retransmit timer.
361 * If there is more data to be acked, restart retransmit
362 * timer, using current (possibly backed-off) value.
363 * OSI combines the keepalive and persistance functions.
364 * So, there is no persistance timer per se, to restart.
366 if (tpcb
->tp_class
!= TP_CLASS_0
)
367 tpcb
->tp_timer
[TM_data_retrans
] =
368 (seq
== tpcb
->tp_sndnew
) ? 0 : tpcb
->tp_rxtcur
;
370 * When new data is acked, open the congestion window.
371 * If the window gives us less than ssthresh packets
372 * in flight, open exponentially (maxseg per packet).
373 * Otherwise open linearly: maxseg per window
374 * (maxseg^2 / cwnd per packet), plus a constant
375 * fraction of a packet (maxseg/8) to help larger windows
376 * open quickly enough.
379 u_int cw
= tpcb
->tp_cong_win
, incr
= tpcb
->tp_l_tpdusize
;
381 incr
= min(incr
, bytes_acked
);
382 if (cw
> tpcb
->tp_ssthresh
)
383 incr
= incr
* incr
/ cw
+ incr
/ 8;
385 min(cw
+ incr
, tpcb
->tp_sock
->so_snd
.sb_hiwat
);
387 tpcb
->tp_snduna
= seq
;
388 if (SEQ_LT(tpcb
, tpcb
->tp_sndnxt
, seq
)) {
389 tpcb
->tp_sndnxt
= seq
;
390 tpcb
->tp_sndnxt_m
= 0;
394 if (cdt
!= 0 && old_fcredit
== 0) {
395 tpcb
->tp_sendfcc
= 1;
398 if (old_fcredit
!= 0)
400 /* The following might mean that the window shrunk */
401 if (tpcb
->tp_timer
[TM_data_retrans
]) {
402 tpcb
->tp_timer
[TM_data_retrans
] = 0;
403 tpcb
->tp_timer
[TM_sendack
] = tpcb
->tp_dt_ticks
;
404 if (tpcb
->tp_sndnxt
!= tpcb
->tp_snduna
) {
405 tpcb
->tp_sndnxt
= tpcb
->tp_snduna
;
406 tpcb
->tp_sndnxt_m
= 0;
410 tpcb
->tp_fcredit
= cdt
;
411 bang
|= (old_fcredit
< cdt
);
415 if (argo_debug
[D_ACKRECV
]) {
416 printf("goodack returns 0x%x, cdt 0x%x ocdt 0x%x cwin 0x%lx\n",
417 bang
, cdt
, old_fcredit
, tpcb
->tp_cong_win
);
421 * if (bang) XXXXX Very bad to remove this test, but somethings
431 * FUNCTION and ARGUMENTS:
432 * drops everything up TO but not INCLUDING seq # (seq)
433 * from the retransmission queue.
436 tp_sbdrop(struct tp_pcb
*tpcb
, SeqNum seq
)
438 struct sockbuf
*sb
= &tpcb
->tp_sock
->so_snd
;
439 int i
= SEQ_SUB(tpcb
, seq
, tpcb
->tp_snduna
);
440 int oldcc
= sb
->sb_cc
, oldi
= i
;
442 if (i
>= tpcb
->tp_seqhalf
)
443 printf("tp_spdropping too much -- should panic");
447 if (argo_debug
[D_ACKRECV
]) {
448 printf("tp_sbdroping %d pkts %ld bytes on %p at 0x%x\n",
449 oldi
, oldcc
- sb
->sb_cc
, tpcb
, seq
);
453 sowwakeup(tpcb
->tp_sock
);
454 return (oldcc
- sb
->sb_cc
);
459 * tp.trans on user send request, arrival of AK and arrival of XAK
460 * FUNCTION and ARGUMENTS:
461 * Emits tpdus starting at sequence number (tpcb->tp_sndnxt).
462 * Emits until a) runs out of data, or b) runs into an XPD mark, or
463 * c) it hits seq number (highseq) limited by cong or credit.
465 * If you want XPD to buffer > 1 du per socket buffer, you can
466 * modifiy this to issue XPD tpdus also, but then it'll have
467 * to take some argument(s) to distinguish between the type of DU to
470 * When something is sent for the first time, its time-of-send
471 * is stashed (in system clock ticks rather than pf_slowtimo ticks).
472 * When the ack arrives, the smoothed round-trip time is figured
476 tp_send(struct tp_pcb
*tpcb
)
481 struct sockbuf
*sb
= &tpcb
->tp_sock
->so_snd
;
482 unsigned int eotsdu
= 0;
483 SeqNum highseq
, checkseq
;
484 int idle
, idleticks
, off
, cong_win
;
486 int send_start_time
= hardclock_ticks
;
487 SeqNum oldnxt
= tpcb
->tp_sndnxt
;
488 #endif /* TP_PERF_MEAS */
490 idle
= (tpcb
->tp_snduna
== tpcb
->tp_sndnew
);
492 idleticks
= tpcb
->tp_inact_ticks
- tpcb
->tp_timer
[TM_inact
];
493 if (idleticks
> tpcb
->tp_dt_ticks
)
495 * We have been idle for "a while" and no acks are
496 * expected to clock out any data we send --
497 * slow start to get ack "clock" running again.
499 tpcb
->tp_cong_win
= tpcb
->tp_l_tpdusize
;
501 cong_win
= tpcb
->tp_cong_win
;
502 highseq
= SEQ(tpcb
, tpcb
->tp_fcredit
+ tpcb
->tp_snduna
);
503 if (tpcb
->tp_Xsnd
.sb_mb
)
504 highseq
= SEQ_MIN(tpcb
, highseq
, tpcb
->tp_sndnew
);
507 if (argo_debug
[D_DATA
]) {
508 printf("tp_send enter tpcb %p nxt 0x%x win %d high 0x%x\n",
509 tpcb
, tpcb
->tp_sndnxt
, cong_win
, highseq
);
513 if (tp_traceflags
[D_DATA
]) {
514 tptraceTPCB(TPPTmisc
, "tp_send sndnew snduna",
515 tpcb
->tp_sndnew
, tpcb
->tp_snduna
, 0, 0);
516 tptraceTPCB(TPPTmisc
, "tp_send tpcb->tp_sndnxt win fcredit congwin",
517 tpcb
->tp_sndnxt
, cong_win
, tpcb
->tp_fcredit
, tpcb
->tp_cong_win
);
521 if (tp_traceflags
[D_DATA
]) {
522 tptraceTPCB(TPPTmisc
, "tp_send 2 nxt high fcredit congwin",
523 tpcb
->tp_sndnxt
, highseq
, tpcb
->tp_fcredit
, cong_win
);
527 if (tpcb
->tp_sndnxt_m
)
528 m
= tpcb
->tp_sndnxt_m
;
530 off
= SEQ_SUB(tpcb
, tpcb
->tp_sndnxt
, tpcb
->tp_snduna
);
531 for (m
= sb
->sb_mb
; m
&& off
> 0; m
= m
->m_next
)
535 * Avoid silly window syndrome here . . . figure out how!
537 checkseq
= tpcb
->tp_sndnum
;
538 if (idle
&& SEQ_LT(tpcb
, tpcb
->tp_sndnum
, highseq
))
539 checkseq
= highseq
; /* i.e. DON'T retain highest assigned
542 while ((SEQ_LT(tpcb
, tpcb
->tp_sndnxt
, highseq
)) && m
&& cong_win
> 0) {
544 eotsdu
= (m
->m_flags
& M_EOR
) != 0;
545 len
= m
->m_pkthdr
.len
;
546 if (tpcb
->tp_sndnxt
== checkseq
&& eotsdu
== 0 &&
547 len
< (tpcb
->tp_l_tpdusize
/ 2))
548 break; /* Nagle . . . . . */
551 * make a copy - mb goes into the retransmission list while m
552 * gets emitted. m_copy won't copy a zero-length mbuf.
555 m
= m_copy(mb
, 0, M_COPYALL
);
559 if (tp_traceflags
[D_STASH
]) {
560 tptraceTPCB(TPPTmisc
,
561 "tp_send mcopy nxt high eotsdu len",
562 tpcb
->tp_sndnxt
, highseq
, eotsdu
, len
);
567 if (argo_debug
[D_DATA
]) {
568 printf("tp_sending tpcb %p nxt 0x%x\n",
569 tpcb
, tpcb
->tp_sndnxt
);
573 * when headers are precomputed, may need to fill in checksum
576 tpcb
->tp_sock
->so_error
=
577 tp_emit(DT_TPDU_type
, tpcb
, tpcb
->tp_sndnxt
, eotsdu
, m
);
578 if (tpcb
->tp_sock
->so_error
!= 0)
582 tpcb
->tp_sndnxt_m
= m
;
583 if (tpcb
->tp_sndnxt
== tpcb
->tp_sndnew
) {
584 SEQ_INC(tpcb
, tpcb
->tp_sndnew
);
586 * Time this transmission if not a retransmission and
587 * not currently timing anything.
589 if (tpcb
->tp_rttemit
== 0) {
590 tpcb
->tp_rttemit
= hardclock_ticks
;
591 tpcb
->tp_rttseq
= tpcb
->tp_sndnxt
;
593 tpcb
->tp_sndnxt
= tpcb
->tp_sndnew
;
595 SEQ_INC(tpcb
, tpcb
->tp_sndnxt
);
597 * Set retransmit timer if not currently set.
598 * Initial value for retransmit timer is smoothed
599 * round-trip time + 2 * round-trip time variance.
600 * Initialize shift counter which is used for backoff
601 * of retransmit time.
603 if (tpcb
->tp_timer
[TM_data_retrans
] == 0 &&
604 tpcb
->tp_class
!= TP_CLASS_0
) {
605 tpcb
->tp_timer
[TM_data_retrans
] = tpcb
->tp_dt_ticks
;
606 tpcb
->tp_timer
[TM_sendack
] = tpcb
->tp_keepalive_ticks
;
607 tpcb
->tp_rxtshift
= 0;
610 if (SEQ_GT(tpcb
, tpcb
->tp_sndnew
, tpcb
->tp_sndnum
))
611 tpcb
->tp_oktonagle
= 0;
618 elapsed
= hardclock_ticks
- send_start_time
;
620 npkts
= SEQ_SUB(tpcb
, tpcb
->tp_sndnxt
, oldnxt
);
625 if (npkts
> TP_PM_MAX
)
628 t
= &(tpcb
->tp_p_meas
->tps_sendtime
[npkts
]);
629 *t
+= (t
- elapsed
) >> TP_RTT_ALPHA
;
632 IncPStat(tpcb
, tps_win_lim_by_data
[npkts
]);
634 IncPStat(tpcb
, tps_win_lim_by_cdt
[npkts
]);
635 /* not true with congestion-window being used */
637 now
.tv_sec
= elapsed
/ hz
;
638 now
.tv_usec
= (elapsed
- (hz
* now
.tv_sec
)) * 1000000 / hz
;
639 tpmeas(tpcb
->tp_lref
,
640 TPsbsend
, &elapsed
, newseq
, tpcb
->tp_Nwindow
, npkts
);
642 #endif /* TP_PERF_MEAS */
646 if (tp_traceflags
[D_DATA
]) {
647 tptraceTPCB(TPPTmisc
,
648 "tp_send at end: new nxt eotsdu error",
649 tpcb
->tp_sndnew
, tpcb
->tp_sndnxt
, eotsdu
,
650 tpcb
->tp_sock
->so_error
);
660 tp_packetize(struct tp_pcb
*tpcb
, struct mbuf
*m
, int eotsdu
)
662 struct mbuf
*n
= NULL
;
663 struct sockbuf
*sb
= &tpcb
->tp_sock
->so_snd
;
664 int maxsize
= tpcb
->tp_l_tpdusize
665 - tp_headersize(DT_TPDU_type
, tpcb
)
666 - (tpcb
->tp_use_checksum
? 4 : 0);
667 int totlen
= m
->m_pkthdr
.len
;
670 * Pre-packetize the data in the sockbuf
671 * according to negotiated mtu. Do it here
672 * where we can safely wait for mbufs.
674 * This presumes knowledge of sockbuf conventions.
675 * TODO: allocate space for header and fill it in (once!).
678 if (argo_debug
[D_DATA
]) {
679 printf("SEND BF: maxsize %d totlen %d eotsdu %d sndnum 0x%x\n",
680 maxsize
, totlen
, eotsdu
, tpcb
->tp_sndnum
);
683 if (tpcb
->tp_oktonagle
) {
684 if ((n
= sb
->sb_mb
) == 0)
685 panic("tp_packetize");
688 if (n
->m_flags
& M_EOR
)
689 panic("tp_packetize 2");
690 SEQ_INC(tpcb
, tpcb
->tp_sndnum
);
691 if (totlen
+ n
->m_pkthdr
.len
< maxsize
) {
693 * There is an unsent packet with space,
696 struct mbuf
*old_n
= n
;
698 n
->m_pkthdr
.len
+= totlen
;
701 sbcompress(sb
, m
, n
);
711 if (totlen
> maxsize
) {
712 if ((m
= m_split(n
, maxsize
, M_WAIT
)) == 0)
713 panic("tp_packetize");
718 sbappendrecord(sb
, n
);
720 SEQ_INC(tpcb
, tpcb
->tp_sndnum
);
724 n
->m_flags
|= M_EOR
; /* XXX belongs at end */
725 tpcb
->tp_oktonagle
= 0;
727 SEQ_DEC(tpcb
, tpcb
->tp_sndnum
);
728 tpcb
->tp_oktonagle
= 1;
733 if (argo_debug
[D_DATA
]) {
734 printf("SEND out: oktonagle %d sndnum 0x%x\n",
735 tpcb
->tp_oktonagle
, tpcb
->tp_sndnum
);
745 * tp.trans on arrival of a DT tpdu
746 * FUNCTION, ARGUMENTS, and RETURN VALUE:
748 * a) something new arrived and it's got eotsdu_reached bit on,
749 * b) this arrival was caused other out-of-sequence things to be
751 * c) this arrival is the highest seq # for which we last gave credit
752 * (sender just sent a whole window)
753 * In other words, returns 1 if tp should send an ack immediately, 0 if
754 * the ack can wait a while.
756 * Note: this implementation no longer renegs on credit, (except
757 * when debugging option D_RENEG is on, for the purpose of testing
758 * ack subsequencing), so we don't need to check for incoming tpdus
759 * being in a reneged portion of the window.
763 tp_stash(struct tp_pcb
*tpcb
, struct tp_event
*e
)
765 int ack_reason
= tpcb
->tp_ack_strat
& ACK_STRAT_EACH
;
766 /* 0--> delay acks until full window */
767 /* 1--> ack each tpdu */
768 #define E e->TPDU_ATTR(DT)
771 struct mbuf
*n
= E
.e_data
;
776 if (argo_debug
[D_STASH
]) {
777 dump_mbuf(tpcb
->tp_sock
->so_rcv
.sb_mb
,
778 "stash: so_rcv before appending");
780 "stash: e_data before appending");
786 PStat(tpcb
, Nb_from_ll
) += E
.e_datalen
;
787 tpmeas(tpcb
->tp_lref
, TPtime_from_ll
,
789 (u_int
) PStat(tpcb
, Nb_from_ll
),
790 (u_int
) E
.e_datalen
);
794 if (E
.e_seq
== tpcb
->tp_rcvnxt
) {
797 if (argo_debug
[D_STASH
]) {
798 printf("stash EQ: seq 0x%x datalen 0x%x eot 0x%x\n",
799 E
.e_seq
, E
.e_datalen
, E
.e_eot
);
804 if (tp_traceflags
[D_STASH
]) {
805 tptraceTPCB(TPPTmisc
, "stash EQ: seq len eot",
806 E
.e_seq
, E
.e_datalen
, E
.e_eot
, 0);
812 sbappend(&tpcb
->tp_sock
->so_rcv
, E
.e_data
);
814 SEQ_INC(tpcb
, tpcb
->tp_rcvnxt
);
816 * move chains from the reassembly queue to the socket buffer
818 if (tpcb
->tp_rsycnt
) {
822 mp
= tpcb
->tp_rsyq
+ (tpcb
->tp_rcvnxt
%
823 tpcb
->tp_maxlcredit
);
824 mplim
= tpcb
->tp_rsyq
+ tpcb
->tp_maxlcredit
;
826 while (tpcb
->tp_rsycnt
&& *mp
) {
827 sbappend(&tpcb
->tp_sock
->so_rcv
, *mp
);
830 SEQ_INC(tpcb
, tpcb
->tp_rcvnxt
);
831 ack_reason
|= ACK_REORDER
;
837 if (argo_debug
[D_STASH
]) {
838 dump_mbuf(tpcb
->tp_sock
->so_rcv
.sb_mb
,
839 "stash: so_rcv after appending");
848 if (tp_traceflags
[D_STASH
]) {
849 tptraceTPCB(TPPTmisc
, "stash Reseq: seq rcvnxt lcdt",
850 E
.e_seq
, tpcb
->tp_rcvnxt
,
851 tpcb
->tp_lcredit
, 0);
855 if (tpcb
->tp_rsyq
== 0)
857 uwe
= SEQ(tpcb
, tpcb
->tp_rcvnxt
+ tpcb
->tp_maxlcredit
);
858 if (tpcb
->tp_rsyq
== 0 ||
859 !IN_RWINDOW(tpcb
, E
.e_seq
, tpcb
->tp_rcvnxt
, uwe
)) {
860 ack_reason
= ACK_DONT
;
862 } else if (*(mp
= tpcb
->tp_rsyq
+
863 (E
.e_seq
% tpcb
->tp_maxlcredit
)) != NULL
) {
865 if (argo_debug
[D_STASH
]) {
866 printf("tp_stash - drop & ack\n");
871 * retransmission - drop it and force
877 IncPStat(tpcb
, tps_n_ack_cuz_dup
);
882 ack_reason
|= ACK_DUP
;
886 ack_reason
= ACK_DONT
;
890 * there were some comments of historical interest
896 if (E
.e_seq
== tpcb
->tp_sent_uwe
)
897 ack_reason
|= ACK_STRAT_FULLWIN
;
900 if (tp_traceflags
[D_STASH
]) {
901 tptraceTPCB(TPPTmisc
,
902 "end of stash, eot, ack_reason, sent_uwe ",
903 E
.e_eot
, ack_reason
, tpcb
->tp_sent_uwe
, 0);
907 if (ack_reason
== ACK_DONT
) {
908 IncStat(ts_ackreason
[ACK_DONT
]);
913 if (ack_reason
& ACK_STRAT_EACH
) {
914 IncPStat(tpcb
, tps_n_ack_cuz_strat
);
915 } else if (ack_reason
& ACK_STRAT_FULLWIN
) {
916 IncPStat(tpcb
, tps_n_ack_cuz_fullwin
);
917 } else if (ack_reason
& ACK_REORDER
) {
918 IncPStat(tpcb
, tps_n_ack_cuz_reorder
);
920 tpmeas(tpcb
->tp_lref
, TPtime_ack_sent
, 0,
921 SEQ_ADD(tpcb
, E
.e_seq
, 1), 0, 0);
928 * keep track of all reasons
931 for (i
= 1; i
< _ACK_NUM_REASONS_
; i
++) {
932 if (ack_reason
& (1 << i
))
933 IncStat(ts_ackreason
[i
]);
942 * tp_rsyflush - drop all the packets on the reassembly queue.
943 * Do this when closing the socket, or when somebody has changed
944 * the space avaible in the receive socket (XXX).
947 tp_rsyflush(struct tp_pcb
*tpcb
)
950 if (tpcb
->tp_rsycnt
) {
951 for (mp
= tpcb
->tp_rsyq
+ tpcb
->tp_maxlcredit
;
952 --mp
>= tpcb
->tp_rsyq
;)
957 if (tpcb
->tp_rsycnt
) {
958 printf("tp_rsyflush %p\n", tpcb
);
962 free((void *) tpcb
->tp_rsyq
, M_PCB
);
967 tp_rsyset(struct tp_pcb
*tpcb
)
969 struct socket
*so
= tpcb
->tp_sock
;
970 int maxcredit
= tpcb
->tp_xtd_format
? 0xffff : 0xf;
971 int old_credit
= tpcb
->tp_maxlcredit
;
974 tpcb
->tp_maxlcredit
= maxcredit
= min(maxcredit
,
975 (so
->so_rcv
.sb_hiwat
+ tpcb
->tp_l_tpdusize
) / tpcb
->tp_l_tpdusize
);
977 if (old_credit
== tpcb
->tp_maxlcredit
&& tpcb
->tp_rsyq
!= 0)
979 maxcredit
*= sizeof(struct mbuf
*);
982 rsyq
= malloc(maxcredit
, M_PCB
, M_NOWAIT
|M_ZERO
);
983 tpcb
->tp_rsyq
= (struct mbuf
**) rsyq
;
988 tpsbcheck(struct tp_pcb
*tpcb
, int i
)
991 int len
= 0, mbcnt
= 0, pktlen
;
992 struct sockbuf
*sb
= &tpcb
->tp_sock
->so_snd
;
994 for (n
= sb
->sb_mb
; n
; n
= n
->m_nextpkt
) {
995 if ((n
->m_flags
& M_PKTHDR
) == 0)
996 panic("tpsbcheck nohdr");
997 pktlen
= len
+ n
->m_pkthdr
.len
;
998 for (m
= n
; m
; m
= m
->m_next
) {
1001 if (m
->m_flags
& M_EXT
)
1002 mbcnt
+= m
->m_ext
.ext_size
;
1004 if (len
!= pktlen
) {
1005 printf("test %d; len %d != pktlen %d on mbuf %p\n",
1007 panic("tpsbcheck short");
1010 if (len
!= sb
->sb_cc
|| mbcnt
!= sb
->sb_mbcnt
) {
1011 printf("test %d: cc %d != %ld || mbcnt %d != %ld\n", i
, len
, sb
->sb_cc
,
1012 mbcnt
, sb
->sb_mbcnt
);