No empty .Rs/.Re
[netbsd-mini2440.git] / sys / netiso / tp_subr.c
blobac8d44f2b8f16520f966490beb760b673c515230
1 /* $NetBSD: tp_subr.c,v 1.21 2007/03/04 06:03:33 christos Exp $ */
3 /*-
4 * Copyright (c) 1991, 1993
5 * The Regents of the University of California. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
31 * @(#)tp_subr.c 8.1 (Berkeley) 6/10/93
34 /***********************************************************
35 Copyright IBM Corporation 1987
37 All Rights Reserved
39 Permission to use, copy, modify, and distribute this software and its
40 documentation for any purpose and without fee is hereby granted,
41 provided that the above copyright notice appear in all copies and that
42 both that copyright notice and this permission notice appear in
43 supporting documentation, and that the name of IBM not be
44 used in advertising or publicity pertaining to distribution of the
45 software without specific, written prior permission.
47 IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
48 ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
49 IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
50 ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
51 WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
52 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
53 SOFTWARE.
55 ******************************************************************/
58 * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
61 * The main work of data transfer is done here. These routines are called
62 * from tp.trans. They include the routines that check the validity of acks
63 * and Xacks, (tp_goodack() and tp_goodXack() ) take packets from socket
64 * buffers and send them (tp_send()), drop the data from the socket buffers
65 * (tp_sbdrop()), and put incoming packet data into socket buffers
66 * (tp_stash()).
69 #include <sys/cdefs.h>
70 __KERNEL_RCSID(0, "$NetBSD: tp_subr.c,v 1.21 2007/03/04 06:03:33 christos Exp $");
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/mbuf.h>
75 #include <sys/socket.h>
76 #include <sys/socketvar.h>
77 #include <sys/protosw.h>
78 #include <sys/errno.h>
79 #include <sys/time.h>
80 #include <sys/kernel.h>
82 #include <netiso/tp_ip.h>
83 #include <netiso/iso.h>
84 #include <netiso/argo_debug.h>
85 #include <netiso/tp_timer.h>
86 #include <netiso/tp_param.h>
87 #include <netiso/tp_stat.h>
88 #include <netiso/tp_pcb.h>
89 #include <netiso/tp_tpdu.h>
90 #include <netiso/tp_trace.h>
91 #include <netiso/tp_meas.h>
92 #include <netiso/tp_seq.h>
93 #include <netiso/tp_var.h>
95 int tprexmtthresh = 3;
98 * CALLED FROM:
99 * tp.trans, when an XAK arrives
100 * FUNCTION and ARGUMENTS:
101 * Determines if the sequence number (seq) from the XAK
102 * acks anything new. If so, drop the appropriate tpdu
103 * from the XPD send queue.
104 * RETURN VALUE:
105 * Returns 1 if it did this, 0 if the ack caused no action.
108 tp_goodXack(struct tp_pcb *tpcb, SeqNum seq)
111 #ifdef TPPT
112 if (tp_traceflags[D_XPD]) {
113 tptraceTPCB(TPPTgotXack,
114 seq, tpcb->tp_Xuna, tpcb->tp_Xsndnxt, tpcb->tp_sndnew,
115 tpcb->tp_snduna);
117 #endif
119 if (seq == tpcb->tp_Xuna) {
120 tpcb->tp_Xuna = tpcb->tp_Xsndnxt;
123 * DROP 1 packet from the Xsnd socket buf - just so happens
124 * that only one packet can be there at any time so drop the
125 * whole thing. If you allow > 1 packet the socket buffer,
126 * then you'll have to keep track of how many characters went
127 * w/ each XPD tpdu, so this will get messier
129 #ifdef ARGO_DEBUG
130 if (argo_debug[D_XPD]) {
131 dump_mbuf(tpcb->tp_Xsnd.sb_mb,
132 "tp_goodXack Xsnd before sbdrop");
134 #endif
136 #ifdef TPPT
137 if (tp_traceflags[D_XPD]) {
138 tptraceTPCB(TPPTmisc,
139 "goodXack: dropping cc ",
140 (int) (tpcb->tp_Xsnd.sb_cc),
141 0, 0, 0);
143 #endif
144 sbdroprecord(&tpcb->tp_Xsnd);
145 return 1;
147 return 0;
151 * CALLED FROM:
152 * tp_good_ack()
153 * FUNCTION and ARGUMENTS:
154 * updates
155 * smoothed average round trip time (*rtt)
156 * roundtrip time variance (*rtv) - actually deviation, not variance
157 * given the new value (diff)
158 * RETURN VALUE:
159 * void
162 void
163 tp_rtt_rtv(struct tp_pcb *tpcb)
165 int old = tpcb->tp_rtt;
166 int elapsed, delta = 0;
168 elapsed = hardclock_ticks - tpcb->tp_rttemit;
170 if (tpcb->tp_rtt != 0) {
172 * rtt is the smoothed round trip time in machine clock
173 * ticks (hz). It is stored as a fixed point number,
174 * unscaled (unlike the tcp srtt). The rationale here
175 * is that it is only significant to the nearest unit of
176 * slowtimo, which is at least 8 machine clock ticks
177 * so there is no need to scale. The smoothing is done
178 * according to the same formula as TCP (rtt = rtt*7/8
179 * + measured_rtt/8).
181 delta = elapsed - tpcb->tp_rtt;
182 if ((tpcb->tp_rtt += (delta >> TP_RTT_ALPHA)) <= 0)
183 tpcb->tp_rtt = 1;
185 * rtv is a smoothed accumulated mean difference, unscaled
186 * for reasons expressed above.
187 * It is smoothed with an alpha of .75, and the round trip timer
188 * will be set to rtt + 4*rtv, also as TCP does.
190 if (delta < 0)
191 delta = -delta;
192 if ((tpcb->tp_rtv += ((delta - tpcb->tp_rtv) >> TP_RTV_ALPHA)) <= 0)
193 tpcb->tp_rtv = 1;
194 } else {
196 * No rtt measurement yet - use the unsmoothed rtt. Set the
197 * variance to half the rtt (so our first retransmit happens
198 * at 3*rtt)
200 tpcb->tp_rtt = elapsed;
201 tpcb->tp_rtv = elapsed >> 1;
203 tpcb->tp_rttemit = 0;
204 tpcb->tp_rxtshift = 0;
206 * Quoting TCP: "the retransmit should happen at rtt + 4 * rttvar.
207 * Because of the way we do the smoothing, srtt and rttvar
208 * will each average +1/2 tick of bias. When we compute
209 * the retransmit timer, we want 1/2 tick of rounding and
210 * 1 extra tick because of +-1/2 tick uncertainty in the
211 * firing of the timer. The bias will give us exactly the
212 * 1.5 tick we need. But, because the bias is
213 * statistical, we have to test that we don't drop below
214 * the minimum feasible timer (which is 2 ticks)."
216 TP_RANGESET(tpcb->tp_dt_ticks, TP_REXMTVAL(tpcb),
217 tpcb->tp_peer_acktime, 128 /* XXX */ );
218 #ifdef ARGO_DEBUG
219 if (argo_debug[D_RTT]) {
220 printf("%s tpcb %p, elapsed %d, delta %d, rtt %d, rtv %d, old %d\n",
221 "tp_rtt_rtv:", tpcb, elapsed, delta, tpcb->tp_rtt, tpcb->tp_rtv, old);
223 #endif
224 tpcb->tp_rxtcur = tpcb->tp_dt_ticks;
228 * CALLED FROM:
229 * tp.trans when an AK arrives
230 * FUNCTION and ARGUMENTS:
231 * Given (cdt), the credit from the AK tpdu, and
232 * (seq), the sequence number from the AK tpdu,
233 * tp_goodack() determines if the AK acknowledges something in the send
234 * window, and if so, drops the appropriate packets from the retransmission
235 * list, computes the round trip time, and updates the retransmission timer
236 * based on the new smoothed round trip time.
237 * RETURN VALUE:
238 * Returns 1 if
239 * EITHER it actually acked something heretofore unacknowledged
240 * OR no news but the credit should be processed.
241 * If something heretofore unacked was acked with this sequence number,
242 * the appropriate tpdus are dropped from the retransmission control list,
243 * by calling tp_sbdrop().
244 * No need to see the tpdu itself.
247 tp_goodack(struct tp_pcb *tpcb, u_int cdt, SeqNum seq, u_int subseq)
249 int old_fcredit = 0;
250 int bang = 0; /* bang --> ack for something
251 * heretofore unacked */
252 u_int bytes_acked;
254 #ifdef ARGO_DEBUG
255 if (argo_debug[D_ACKRECV]) {
256 printf("goodack tpcb %p seq 0x%x cdt %d una 0x%x new 0x%x nxt 0x%x\n",
257 tpcb, seq, cdt, tpcb->tp_snduna, tpcb->tp_sndnew, tpcb->tp_sndnxt);
259 #endif
261 #ifdef TPPT
262 if (tp_traceflags[D_ACKRECV]) {
263 tptraceTPCB(TPPTgotack,
264 seq, cdt, tpcb->tp_snduna, tpcb->tp_sndnew, subseq);
266 #endif
268 #ifdef TP_PERF_MEAS
269 if (DOPERF(tpcb)) {
270 tpmeas(tpcb->tp_lref, TPtime_ack_rcvd, (struct timeval *) 0, seq, 0, 0);
272 #endif
274 if (seq == tpcb->tp_snduna) {
275 if (subseq < tpcb->tp_r_subseq ||
276 (subseq == tpcb->tp_r_subseq && cdt <= tpcb->tp_fcredit)) {
277 discard_the_ack:
278 #ifdef ARGO_DEBUG
279 if (argo_debug[D_ACKRECV]) {
280 printf("goodack discard : tpcb %p subseq %d r_subseq %d\n",
281 tpcb, subseq, tpcb->tp_r_subseq);
283 #endif
284 goto done;
286 if (cdt == tpcb->tp_fcredit /* && thus subseq >
287 tpcb->tp_r_subseq */ ) {
288 tpcb->tp_r_subseq = subseq;
289 if (tpcb->tp_timer[TM_data_retrans] == 0)
290 tpcb->tp_dupacks = 0;
291 else if (++tpcb->tp_dupacks == tprexmtthresh) {
293 * partner went out of his way to signal with
294 * different subsequences that he has the
295 * same lack of an expected packet. This may
296 * be an early indiciation of a loss
299 SeqNum onxt = tpcb->tp_sndnxt;
300 struct mbuf *onxt_m = tpcb->tp_sndnxt_m;
301 u_int win = min(tpcb->tp_fcredit,
302 tpcb->tp_cong_win / tpcb->tp_l_tpdusize) / 2;
303 #ifdef ARGO_DEBUG
304 if (argo_debug[D_ACKRECV]) {
305 printf("%s tpcb %p seq 0x%x rttseq 0x%x onxt 0x%x\n",
306 "goodack dupacks:", tpcb, seq, tpcb->tp_rttseq, onxt);
308 #endif
309 if (win < 2)
310 win = 2;
311 tpcb->tp_ssthresh = win * tpcb->tp_l_tpdusize;
312 tpcb->tp_timer[TM_data_retrans] = 0;
313 tpcb->tp_rttemit = 0;
314 tpcb->tp_sndnxt = tpcb->tp_snduna;
315 tpcb->tp_sndnxt_m = 0;
316 tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
317 tp_send(tpcb);
318 tpcb->tp_cong_win = tpcb->tp_ssthresh +
319 tpcb->tp_dupacks * tpcb->tp_l_tpdusize;
320 if (SEQ_GT(tpcb, onxt, tpcb->tp_sndnxt)) {
321 tpcb->tp_sndnxt = onxt;
322 tpcb->tp_sndnxt_m = onxt_m;
324 } else if (tpcb->tp_dupacks > tprexmtthresh) {
325 tpcb->tp_cong_win += tpcb->tp_l_tpdusize;
327 goto done;
329 } else if (SEQ_LT(tpcb, seq, tpcb->tp_snduna))
330 goto discard_the_ack;
332 * If the congestion window was inflated to account
333 * for the other side's cached packets, retract it.
335 if (tpcb->tp_dupacks > tprexmtthresh &&
336 tpcb->tp_cong_win > tpcb->tp_ssthresh)
337 tpcb->tp_cong_win = tpcb->tp_ssthresh;
338 tpcb->tp_r_subseq = subseq;
339 old_fcredit = tpcb->tp_fcredit;
340 tpcb->tp_fcredit = cdt;
341 if (cdt > tpcb->tp_maxfcredit)
342 tpcb->tp_maxfcredit = cdt;
343 tpcb->tp_dupacks = 0;
345 if (IN_SWINDOW(tpcb, seq, tpcb->tp_snduna, tpcb->tp_sndnew)) {
347 tpsbcheck(tpcb, 0);
348 bytes_acked = tp_sbdrop(tpcb, seq);
349 tpsbcheck(tpcb, 1);
351 * If transmit timer is running and timed sequence
352 * number was acked, update smoothed round trip time.
353 * Since we now have an rtt measurement, cancel the
354 * timer backoff (cf., Phil Karn's retransmit alg.).
355 * Recompute the initial retransmit timer.
357 if (tpcb->tp_rttemit && SEQ_GT(tpcb, seq, tpcb->tp_rttseq))
358 tp_rtt_rtv(tpcb);
360 * If all outstanding data is acked, stop retransmit timer.
361 * If there is more data to be acked, restart retransmit
362 * timer, using current (possibly backed-off) value.
363 * OSI combines the keepalive and persistance functions.
364 * So, there is no persistance timer per se, to restart.
366 if (tpcb->tp_class != TP_CLASS_0)
367 tpcb->tp_timer[TM_data_retrans] =
368 (seq == tpcb->tp_sndnew) ? 0 : tpcb->tp_rxtcur;
370 * When new data is acked, open the congestion window.
371 * If the window gives us less than ssthresh packets
372 * in flight, open exponentially (maxseg per packet).
373 * Otherwise open linearly: maxseg per window
374 * (maxseg^2 / cwnd per packet), plus a constant
375 * fraction of a packet (maxseg/8) to help larger windows
376 * open quickly enough.
379 u_int cw = tpcb->tp_cong_win, incr = tpcb->tp_l_tpdusize;
381 incr = min(incr, bytes_acked);
382 if (cw > tpcb->tp_ssthresh)
383 incr = incr * incr / cw + incr / 8;
384 tpcb->tp_cong_win =
385 min(cw + incr, tpcb->tp_sock->so_snd.sb_hiwat);
387 tpcb->tp_snduna = seq;
388 if (SEQ_LT(tpcb, tpcb->tp_sndnxt, seq)) {
389 tpcb->tp_sndnxt = seq;
390 tpcb->tp_sndnxt_m = 0;
392 bang++;
394 if (cdt != 0 && old_fcredit == 0) {
395 tpcb->tp_sendfcc = 1;
397 if (cdt == 0) {
398 if (old_fcredit != 0)
399 IncStat(ts_zfcdt);
400 /* The following might mean that the window shrunk */
401 if (tpcb->tp_timer[TM_data_retrans]) {
402 tpcb->tp_timer[TM_data_retrans] = 0;
403 tpcb->tp_timer[TM_sendack] = tpcb->tp_dt_ticks;
404 if (tpcb->tp_sndnxt != tpcb->tp_snduna) {
405 tpcb->tp_sndnxt = tpcb->tp_snduna;
406 tpcb->tp_sndnxt_m = 0;
410 tpcb->tp_fcredit = cdt;
411 bang |= (old_fcredit < cdt);
413 done:
414 #ifdef ARGO_DEBUG
415 if (argo_debug[D_ACKRECV]) {
416 printf("goodack returns 0x%x, cdt 0x%x ocdt 0x%x cwin 0x%lx\n",
417 bang, cdt, old_fcredit, tpcb->tp_cong_win);
419 #endif
421 * if (bang) XXXXX Very bad to remove this test, but somethings
422 * broken
424 tp_send(tpcb);
425 return (bang);
429 * CALLED FROM:
430 * tp_goodack()
431 * FUNCTION and ARGUMENTS:
432 * drops everything up TO but not INCLUDING seq # (seq)
433 * from the retransmission queue.
436 tp_sbdrop(struct tp_pcb *tpcb, SeqNum seq)
438 struct sockbuf *sb = &tpcb->tp_sock->so_snd;
439 int i = SEQ_SUB(tpcb, seq, tpcb->tp_snduna);
440 int oldcc = sb->sb_cc, oldi = i;
442 if (i >= tpcb->tp_seqhalf)
443 printf("tp_spdropping too much -- should panic");
444 while (i-- > 0)
445 sbdroprecord(sb);
446 #ifdef ARGO_DEBUG
447 if (argo_debug[D_ACKRECV]) {
448 printf("tp_sbdroping %d pkts %ld bytes on %p at 0x%x\n",
449 oldi, oldcc - sb->sb_cc, tpcb, seq);
451 #endif
452 if (sb_notify(sb))
453 sowwakeup(tpcb->tp_sock);
454 return (oldcc - sb->sb_cc);
458 * CALLED FROM:
459 * tp.trans on user send request, arrival of AK and arrival of XAK
460 * FUNCTION and ARGUMENTS:
461 * Emits tpdus starting at sequence number (tpcb->tp_sndnxt).
462 * Emits until a) runs out of data, or b) runs into an XPD mark, or
463 * c) it hits seq number (highseq) limited by cong or credit.
465 * If you want XPD to buffer > 1 du per socket buffer, you can
466 * modifiy this to issue XPD tpdus also, but then it'll have
467 * to take some argument(s) to distinguish between the type of DU to
468 * hand tp_emit.
470 * When something is sent for the first time, its time-of-send
471 * is stashed (in system clock ticks rather than pf_slowtimo ticks).
472 * When the ack arrives, the smoothed round-trip time is figured
473 * using this value.
475 void
476 tp_send(struct tp_pcb *tpcb)
478 int len;
479 struct mbuf *m;
480 struct mbuf *mb = 0;
481 struct sockbuf *sb = &tpcb->tp_sock->so_snd;
482 unsigned int eotsdu = 0;
483 SeqNum highseq, checkseq;
484 int idle, idleticks, off, cong_win;
485 #ifdef TP_PERF_MEAS
486 int send_start_time = hardclock_ticks;
487 SeqNum oldnxt = tpcb->tp_sndnxt;
488 #endif /* TP_PERF_MEAS */
490 idle = (tpcb->tp_snduna == tpcb->tp_sndnew);
491 if (idle) {
492 idleticks = tpcb->tp_inact_ticks - tpcb->tp_timer[TM_inact];
493 if (idleticks > tpcb->tp_dt_ticks)
495 * We have been idle for "a while" and no acks are
496 * expected to clock out any data we send --
497 * slow start to get ack "clock" running again.
499 tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
501 cong_win = tpcb->tp_cong_win;
502 highseq = SEQ(tpcb, tpcb->tp_fcredit + tpcb->tp_snduna);
503 if (tpcb->tp_Xsnd.sb_mb)
504 highseq = SEQ_MIN(tpcb, highseq, tpcb->tp_sndnew);
506 #ifdef ARGO_DEBUG
507 if (argo_debug[D_DATA]) {
508 printf("tp_send enter tpcb %p nxt 0x%x win %d high 0x%x\n",
509 tpcb, tpcb->tp_sndnxt, cong_win, highseq);
511 #endif
512 #ifdef TPPT
513 if (tp_traceflags[D_DATA]) {
514 tptraceTPCB(TPPTmisc, "tp_send sndnew snduna",
515 tpcb->tp_sndnew, tpcb->tp_snduna, 0, 0);
516 tptraceTPCB(TPPTmisc, "tp_send tpcb->tp_sndnxt win fcredit congwin",
517 tpcb->tp_sndnxt, cong_win, tpcb->tp_fcredit, tpcb->tp_cong_win);
519 #endif
520 #ifdef TPPT
521 if (tp_traceflags[D_DATA]) {
522 tptraceTPCB(TPPTmisc, "tp_send 2 nxt high fcredit congwin",
523 tpcb->tp_sndnxt, highseq, tpcb->tp_fcredit, cong_win);
525 #endif
527 if (tpcb->tp_sndnxt_m)
528 m = tpcb->tp_sndnxt_m;
529 else {
530 off = SEQ_SUB(tpcb, tpcb->tp_sndnxt, tpcb->tp_snduna);
531 for (m = sb->sb_mb; m && off > 0; m = m->m_next)
532 off--;
535 * Avoid silly window syndrome here . . . figure out how!
537 checkseq = tpcb->tp_sndnum;
538 if (idle && SEQ_LT(tpcb, tpcb->tp_sndnum, highseq))
539 checkseq = highseq; /* i.e. DON'T retain highest assigned
540 * packet */
542 while ((SEQ_LT(tpcb, tpcb->tp_sndnxt, highseq)) && m && cong_win > 0) {
544 eotsdu = (m->m_flags & M_EOR) != 0;
545 len = m->m_pkthdr.len;
546 if (tpcb->tp_sndnxt == checkseq && eotsdu == 0 &&
547 len < (tpcb->tp_l_tpdusize / 2))
548 break; /* Nagle . . . . . */
549 cong_win -= len;
551 * make a copy - mb goes into the retransmission list while m
552 * gets emitted. m_copy won't copy a zero-length mbuf.
554 mb = m;
555 m = m_copy(mb, 0, M_COPYALL);
556 if (m == NULL)
557 break;
558 #ifdef TPPT
559 if (tp_traceflags[D_STASH]) {
560 tptraceTPCB(TPPTmisc,
561 "tp_send mcopy nxt high eotsdu len",
562 tpcb->tp_sndnxt, highseq, eotsdu, len);
564 #endif
566 #ifdef ARGO_DEBUG
567 if (argo_debug[D_DATA]) {
568 printf("tp_sending tpcb %p nxt 0x%x\n",
569 tpcb, tpcb->tp_sndnxt);
571 #endif
573 * when headers are precomputed, may need to fill in checksum
574 * here
576 tpcb->tp_sock->so_error =
577 tp_emit(DT_TPDU_type, tpcb, tpcb->tp_sndnxt, eotsdu, m);
578 if (tpcb->tp_sock->so_error != 0)
579 /* error */
580 break;
581 m = mb->m_nextpkt;
582 tpcb->tp_sndnxt_m = m;
583 if (tpcb->tp_sndnxt == tpcb->tp_sndnew) {
584 SEQ_INC(tpcb, tpcb->tp_sndnew);
586 * Time this transmission if not a retransmission and
587 * not currently timing anything.
589 if (tpcb->tp_rttemit == 0) {
590 tpcb->tp_rttemit = hardclock_ticks;
591 tpcb->tp_rttseq = tpcb->tp_sndnxt;
593 tpcb->tp_sndnxt = tpcb->tp_sndnew;
594 } else
595 SEQ_INC(tpcb, tpcb->tp_sndnxt);
597 * Set retransmit timer if not currently set.
598 * Initial value for retransmit timer is smoothed
599 * round-trip time + 2 * round-trip time variance.
600 * Initialize shift counter which is used for backoff
601 * of retransmit time.
603 if (tpcb->tp_timer[TM_data_retrans] == 0 &&
604 tpcb->tp_class != TP_CLASS_0) {
605 tpcb->tp_timer[TM_data_retrans] = tpcb->tp_dt_ticks;
606 tpcb->tp_timer[TM_sendack] = tpcb->tp_keepalive_ticks;
607 tpcb->tp_rxtshift = 0;
610 if (SEQ_GT(tpcb, tpcb->tp_sndnew, tpcb->tp_sndnum))
611 tpcb->tp_oktonagle = 0;
612 #ifdef TP_PERF_MEAS
613 if (DOPERF(tpcb)) {
614 int npkts;
615 int s, elapsed, *t;
616 struct timeval now;
618 elapsed = hardclock_ticks - send_start_time;
620 npkts = SEQ_SUB(tpcb, tpcb->tp_sndnxt, oldnxt);
622 if (npkts > 0)
623 tpcb->tp_Nwindow++;
625 if (npkts > TP_PM_MAX)
626 npkts = TP_PM_MAX;
628 t = &(tpcb->tp_p_meas->tps_sendtime[npkts]);
629 *t += (t - elapsed) >> TP_RTT_ALPHA;
631 if (mb == 0) {
632 IncPStat(tpcb, tps_win_lim_by_data[npkts]);
633 } else {
634 IncPStat(tpcb, tps_win_lim_by_cdt[npkts]);
635 /* not true with congestion-window being used */
637 now.tv_sec = elapsed / hz;
638 now.tv_usec = (elapsed - (hz * now.tv_sec)) * 1000000 / hz;
639 tpmeas(tpcb->tp_lref,
640 TPsbsend, &elapsed, newseq, tpcb->tp_Nwindow, npkts);
642 #endif /* TP_PERF_MEAS */
645 #ifdef TPPT
646 if (tp_traceflags[D_DATA]) {
647 tptraceTPCB(TPPTmisc,
648 "tp_send at end: new nxt eotsdu error",
649 tpcb->tp_sndnew, tpcb->tp_sndnxt, eotsdu,
650 tpcb->tp_sock->so_error);
653 #endif
656 int TPNagleok;
657 int TPNagled;
660 tp_packetize(struct tp_pcb *tpcb, struct mbuf *m, int eotsdu)
662 struct mbuf *n = NULL;
663 struct sockbuf *sb = &tpcb->tp_sock->so_snd;
664 int maxsize = tpcb->tp_l_tpdusize
665 - tp_headersize(DT_TPDU_type, tpcb)
666 - (tpcb->tp_use_checksum ? 4 : 0);
667 int totlen = m->m_pkthdr.len;
670 * Pre-packetize the data in the sockbuf
671 * according to negotiated mtu. Do it here
672 * where we can safely wait for mbufs.
674 * This presumes knowledge of sockbuf conventions.
675 * TODO: allocate space for header and fill it in (once!).
677 #ifdef ARGO_DEBUG
678 if (argo_debug[D_DATA]) {
679 printf("SEND BF: maxsize %d totlen %d eotsdu %d sndnum 0x%x\n",
680 maxsize, totlen, eotsdu, tpcb->tp_sndnum);
682 #endif
683 if (tpcb->tp_oktonagle) {
684 if ((n = sb->sb_mb) == 0)
685 panic("tp_packetize");
686 while (n->m_nextpkt)
687 n = n->m_nextpkt;
688 if (n->m_flags & M_EOR)
689 panic("tp_packetize 2");
690 SEQ_INC(tpcb, tpcb->tp_sndnum);
691 if (totlen + n->m_pkthdr.len < maxsize) {
693 * There is an unsent packet with space,
694 * combine data
696 struct mbuf *old_n = n;
697 tpsbcheck(tpcb, 3);
698 n->m_pkthdr.len += totlen;
699 while (n->m_next)
700 n = n->m_next;
701 sbcompress(sb, m, n);
702 tpsbcheck(tpcb, 4);
703 n = old_n;
704 TPNagled++;
705 goto out;
709 while (m) {
710 n = m;
711 if (totlen > maxsize) {
712 if ((m = m_split(n, maxsize, M_WAIT)) == 0)
713 panic("tp_packetize");
714 } else
715 m = 0;
716 totlen -= maxsize;
717 tpsbcheck(tpcb, 5);
718 sbappendrecord(sb, n);
719 tpsbcheck(tpcb, 6);
720 SEQ_INC(tpcb, tpcb->tp_sndnum);
722 out:
723 if (eotsdu) {
724 n->m_flags |= M_EOR; /* XXX belongs at end */
725 tpcb->tp_oktonagle = 0;
726 } else {
727 SEQ_DEC(tpcb, tpcb->tp_sndnum);
728 tpcb->tp_oktonagle = 1;
729 TPNagleok++;
732 #ifdef ARGO_DEBUG
733 if (argo_debug[D_DATA]) {
734 printf("SEND out: oktonagle %d sndnum 0x%x\n",
735 tpcb->tp_oktonagle, tpcb->tp_sndnum);
737 #endif
738 return 0;
743 * NAME: tp_stash()
744 * CALLED FROM:
745 * tp.trans on arrival of a DT tpdu
746 * FUNCTION, ARGUMENTS, and RETURN VALUE:
747 * Returns 1 if
748 * a) something new arrived and it's got eotsdu_reached bit on,
749 * b) this arrival was caused other out-of-sequence things to be
750 * accepted, or
751 * c) this arrival is the highest seq # for which we last gave credit
752 * (sender just sent a whole window)
753 * In other words, returns 1 if tp should send an ack immediately, 0 if
754 * the ack can wait a while.
756 * Note: this implementation no longer renegs on credit, (except
757 * when debugging option D_RENEG is on, for the purpose of testing
758 * ack subsequencing), so we don't need to check for incoming tpdus
759 * being in a reneged portion of the window.
763 tp_stash(struct tp_pcb *tpcb, struct tp_event *e)
765 int ack_reason = tpcb->tp_ack_strat & ACK_STRAT_EACH;
766 /* 0--> delay acks until full window */
767 /* 1--> ack each tpdu */
768 #define E e->TPDU_ATTR(DT)
770 if (E.e_eot) {
771 struct mbuf *n = E.e_data;
772 n->m_flags |= M_EOR;
773 n->m_nextpkt = 0;
775 #ifdef ARGO_DEBUG
776 if (argo_debug[D_STASH]) {
777 dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb,
778 "stash: so_rcv before appending");
779 dump_mbuf(E.e_data,
780 "stash: e_data before appending");
782 #endif
784 #ifdef TP_PERF_MEAS
785 if (DOPERF(tpcb)) {
786 PStat(tpcb, Nb_from_ll) += E.e_datalen;
787 tpmeas(tpcb->tp_lref, TPtime_from_ll,
788 &e->e_time, E.e_seq,
789 (u_int) PStat(tpcb, Nb_from_ll),
790 (u_int) E.e_datalen);
792 #endif
794 if (E.e_seq == tpcb->tp_rcvnxt) {
796 #ifdef ARGO_DEBUG
797 if (argo_debug[D_STASH]) {
798 printf("stash EQ: seq 0x%x datalen 0x%x eot 0x%x\n",
799 E.e_seq, E.e_datalen, E.e_eot);
801 #endif
803 #ifdef TPPT
804 if (tp_traceflags[D_STASH]) {
805 tptraceTPCB(TPPTmisc, "stash EQ: seq len eot",
806 E.e_seq, E.e_datalen, E.e_eot, 0);
808 #endif
810 SET_DELACK(tpcb);
812 sbappend(&tpcb->tp_sock->so_rcv, E.e_data);
814 SEQ_INC(tpcb, tpcb->tp_rcvnxt);
816 * move chains from the reassembly queue to the socket buffer
818 if (tpcb->tp_rsycnt) {
819 struct mbuf **mp;
820 struct mbuf **mplim;
822 mp = tpcb->tp_rsyq + (tpcb->tp_rcvnxt %
823 tpcb->tp_maxlcredit);
824 mplim = tpcb->tp_rsyq + tpcb->tp_maxlcredit;
826 while (tpcb->tp_rsycnt && *mp) {
827 sbappend(&tpcb->tp_sock->so_rcv, *mp);
828 tpcb->tp_rsycnt--;
829 *mp = 0;
830 SEQ_INC(tpcb, tpcb->tp_rcvnxt);
831 ack_reason |= ACK_REORDER;
832 if (++mp == mplim)
833 mp = tpcb->tp_rsyq;
836 #ifdef ARGO_DEBUG
837 if (argo_debug[D_STASH]) {
838 dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb,
839 "stash: so_rcv after appending");
841 #endif
843 } else {
844 struct mbuf **mp;
845 SeqNum uwe;
847 #ifdef TPPT
848 if (tp_traceflags[D_STASH]) {
849 tptraceTPCB(TPPTmisc, "stash Reseq: seq rcvnxt lcdt",
850 E.e_seq, tpcb->tp_rcvnxt,
851 tpcb->tp_lcredit, 0);
853 #endif
855 if (tpcb->tp_rsyq == 0)
856 tp_rsyset(tpcb);
857 uwe = SEQ(tpcb, tpcb->tp_rcvnxt + tpcb->tp_maxlcredit);
858 if (tpcb->tp_rsyq == 0 ||
859 !IN_RWINDOW(tpcb, E.e_seq, tpcb->tp_rcvnxt, uwe)) {
860 ack_reason = ACK_DONT;
861 m_freem(E.e_data);
862 } else if (*(mp = tpcb->tp_rsyq +
863 (E.e_seq % tpcb->tp_maxlcredit)) != NULL ) {
864 #ifdef ARGO_DEBUG
865 if (argo_debug[D_STASH]) {
866 printf("tp_stash - drop & ack\n");
868 #endif
871 * retransmission - drop it and force
872 * an ack
874 IncStat(ts_dt_dup);
875 #ifdef TP_PERF_MEAS
876 if (DOPERF(tpcb)) {
877 IncPStat(tpcb, tps_n_ack_cuz_dup);
879 #endif
881 m_freem(E.e_data);
882 ack_reason |= ACK_DUP;
883 } else {
884 *mp = E.e_data;
885 tpcb->tp_rsycnt++;
886 ack_reason = ACK_DONT;
890 * there were some comments of historical interest
891 * here.
894 LOCAL_CREDIT(tpcb);
896 if (E.e_seq == tpcb->tp_sent_uwe)
897 ack_reason |= ACK_STRAT_FULLWIN;
899 #ifdef TPPT
900 if (tp_traceflags[D_STASH]) {
901 tptraceTPCB(TPPTmisc,
902 "end of stash, eot, ack_reason, sent_uwe ",
903 E.e_eot, ack_reason, tpcb->tp_sent_uwe, 0);
905 #endif
907 if (ack_reason == ACK_DONT) {
908 IncStat(ts_ackreason[ACK_DONT]);
909 return 0;
910 } else {
911 #ifdef TP_PERF_MEAS
912 if (DOPERF(tpcb)) {
913 if (ack_reason & ACK_STRAT_EACH) {
914 IncPStat(tpcb, tps_n_ack_cuz_strat);
915 } else if (ack_reason & ACK_STRAT_FULLWIN) {
916 IncPStat(tpcb, tps_n_ack_cuz_fullwin);
917 } else if (ack_reason & ACK_REORDER) {
918 IncPStat(tpcb, tps_n_ack_cuz_reorder);
920 tpmeas(tpcb->tp_lref, TPtime_ack_sent, 0,
921 SEQ_ADD(tpcb, E.e_seq, 1), 0, 0);
923 #endif
925 int i;
928 * keep track of all reasons
929 * that apply
931 for (i = 1; i < _ACK_NUM_REASONS_; i++) {
932 if (ack_reason & (1 << i))
933 IncStat(ts_ackreason[i]);
936 return 1;
942 * tp_rsyflush - drop all the packets on the reassembly queue.
943 * Do this when closing the socket, or when somebody has changed
944 * the space avaible in the receive socket (XXX).
946 void
947 tp_rsyflush(struct tp_pcb *tpcb)
949 struct mbuf **mp;
950 if (tpcb->tp_rsycnt) {
951 for (mp = tpcb->tp_rsyq + tpcb->tp_maxlcredit;
952 --mp >= tpcb->tp_rsyq;)
953 if (*mp) {
954 tpcb->tp_rsycnt--;
955 m_freem(*mp);
957 if (tpcb->tp_rsycnt) {
958 printf("tp_rsyflush %p\n", tpcb);
959 tpcb->tp_rsycnt = 0;
962 free((void *) tpcb->tp_rsyq, M_PCB);
963 tpcb->tp_rsyq = 0;
966 void
967 tp_rsyset(struct tp_pcb *tpcb)
969 struct socket *so = tpcb->tp_sock;
970 int maxcredit = tpcb->tp_xtd_format ? 0xffff : 0xf;
971 int old_credit = tpcb->tp_maxlcredit;
972 void * rsyq;
974 tpcb->tp_maxlcredit = maxcredit = min(maxcredit,
975 (so->so_rcv.sb_hiwat + tpcb->tp_l_tpdusize) / tpcb->tp_l_tpdusize);
977 if (old_credit == tpcb->tp_maxlcredit && tpcb->tp_rsyq != 0)
978 return;
979 maxcredit *= sizeof(struct mbuf *);
980 if (tpcb->tp_rsyq)
981 tp_rsyflush(tpcb);
982 rsyq = malloc(maxcredit, M_PCB, M_NOWAIT|M_ZERO);
983 tpcb->tp_rsyq = (struct mbuf **) rsyq;
987 void
988 tpsbcheck(struct tp_pcb *tpcb, int i)
990 struct mbuf *n, *m;
991 int len = 0, mbcnt = 0, pktlen;
992 struct sockbuf *sb = &tpcb->tp_sock->so_snd;
994 for (n = sb->sb_mb; n; n = n->m_nextpkt) {
995 if ((n->m_flags & M_PKTHDR) == 0)
996 panic("tpsbcheck nohdr");
997 pktlen = len + n->m_pkthdr.len;
998 for (m = n; m; m = m->m_next) {
999 len += m->m_len;
1000 mbcnt += MSIZE;
1001 if (m->m_flags & M_EXT)
1002 mbcnt += m->m_ext.ext_size;
1004 if (len != pktlen) {
1005 printf("test %d; len %d != pktlen %d on mbuf %p\n",
1006 i, len, pktlen, n);
1007 panic("tpsbcheck short");
1010 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
1011 printf("test %d: cc %d != %ld || mbcnt %d != %ld\n", i, len, sb->sb_cc,
1012 mbcnt, sb->sb_mbcnt);
1013 panic("tpsbcheck");