]> git.saurik.com Git - apple/xnu.git/blob - bsd/netiso/tp_subr.c
xnu-123.5.tar.gz
[apple/xnu.git] / bsd / netiso / tp_subr.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*-
23 * Copyright (c) 1991, 1993
24 * The Regents of the University of California. All rights reserved.
25 *
26 * Redistribution and use in source and binary forms, with or without
27 * modification, are permitted provided that the following conditions
28 * are met:
29 * 1. Redistributions of source code must retain the above copyright
30 * notice, this list of conditions and the following disclaimer.
31 * 2. Redistributions in binary form must reproduce the above copyright
32 * notice, this list of conditions and the following disclaimer in the
33 * documentation and/or other materials provided with the distribution.
34 * 3. All advertising materials mentioning features or use of this software
35 * must display the following acknowledgement:
36 * This product includes software developed by the University of
37 * California, Berkeley and its contributors.
38 * 4. Neither the name of the University nor the names of its contributors
39 * may be used to endorse or promote products derived from this software
40 * without specific prior written permission.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * SUCH DAMAGE.
53 *
54 * @(#)tp_subr.c 8.1 (Berkeley) 6/10/93
55 */
56
57 /***********************************************************
58 Copyright IBM Corporation 1987
59
60 All Rights Reserved
61
62 Permission to use, copy, modify, and distribute this software and its
63 documentation for any purpose and without fee is hereby granted,
64 provided that the above copyright notice appear in all copies and that
65 both that copyright notice and this permission notice appear in
66 supporting documentation, and that the name of IBM not be
67 used in advertising or publicity pertaining to distribution of the
68 software without specific, written prior permission.
69
70 IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
71 ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
72 IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
73 ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
74 WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
75 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
76 SOFTWARE.
77
78 ******************************************************************/
79
80 /*
81 * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
82 */
83 /*
84 * ARGO TP
85 *
86 * The main work of data transfer is done here.
87 * These routines are called from tp.trans.
88 * They include the routines that check the validity of acks and Xacks,
89 * (tp_goodack() and tp_goodXack() )
90 * take packets from socket buffers and send them (tp_send()),
91 * drop the data from the socket buffers (tp_sbdrop()),
92 * and put incoming packet data into socket buffers (tp_stash()).
93 */
94
95 #include <sys/param.h>
96 #include <sys/systm.h>
97 #include <sys/mbuf.h>
98 #include <sys/socket.h>
99 #include <sys/socketvar.h>
100 #include <sys/protosw.h>
101 #include <sys/errno.h>
102 #include <sys/time.h>
103 #include <sys/kernel.h>
104
105 #include <netiso/tp_ip.h>
106 #include <netiso/iso.h>
107 #include <netiso/argo_debug.h>
108 #include <netiso/tp_timer.h>
109 #include <netiso/tp_param.h>
110 #include <netiso/tp_stat.h>
111 #include <netiso/tp_pcb.h>
112 #include <netiso/tp_tpdu.h>
113 #include <netiso/tp_trace.h>
114 #include <netiso/tp_meas.h>
115 #include <netiso/tp_seq.h>
116
117 int tp_emit(), tp_sbdrop();
118 int tprexmtthresh = 3;
119 extern int ticks;
120 void tp_send();
121
122 /*
123 * CALLED FROM:
124 * tp.trans, when an XAK arrives
125 * FUNCTION and ARGUMENTS:
126 * Determines if the sequence number (seq) from the XAK
127 * acks anything new. If so, drop the appropriate tpdu
128 * from the XPD send queue.
129 * RETURN VALUE:
130 * Returns 1 if it did this, 0 if the ack caused no action.
131 */
132 int
133 tp_goodXack(tpcb, seq)
134 struct tp_pcb *tpcb;
135 SeqNum seq;
136 {
137
138 IFTRACE(D_XPD)
139 tptraceTPCB(TPPTgotXack,
140 seq, tpcb->tp_Xuna, tpcb->tp_Xsndnxt, tpcb->tp_sndnew,
141 tpcb->tp_snduna);
142 ENDTRACE
143
144 if ( seq == tpcb->tp_Xuna ) {
145 tpcb->tp_Xuna = tpcb->tp_Xsndnxt;
146
147 /* DROP 1 packet from the Xsnd socket buf - just so happens
148 * that only one packet can be there at any time
149 * so drop the whole thing. If you allow > 1 packet
150 * the socket buffer, then you'll have to keep
151 * track of how many characters went w/ each XPD tpdu, so this
152 * will get messier
153 */
154 IFDEBUG(D_XPD)
155 dump_mbuf(tpcb->tp_Xsnd.sb_mb,
156 "tp_goodXack Xsnd before sbdrop");
157 ENDDEBUG
158
159 IFTRACE(D_XPD)
160 tptraceTPCB(TPPTmisc,
161 "goodXack: dropping cc ",
162 (int)(tpcb->tp_Xsnd.sb_cc),
163 0,0,0);
164 ENDTRACE
165 sbdroprecord(&tpcb->tp_Xsnd);
166 return 1;
167 }
168 return 0;
169 }
170
171 /*
172 * CALLED FROM:
173 * tp_good_ack()
174 * FUNCTION and ARGUMENTS:
175 * updates
176 * smoothed average round trip time (*rtt)
177 * roundtrip time variance (*rtv) - actually deviation, not variance
178 * given the new value (diff)
179 * RETURN VALUE:
180 * void
181 */
182
183 void
184 tp_rtt_rtv(tpcb)
185 register struct tp_pcb *tpcb;
186 {
187 int old = tpcb->tp_rtt;
188 int delta, elapsed = ticks - tpcb->tp_rttemit;
189
190 if (tpcb->tp_rtt != 0) {
191 /*
192 * rtt is the smoothed round trip time in machine clock ticks (hz).
193 * It is stored as a fixed point number, unscaled (unlike the tcp
194 * srtt). The rationale here is that it is only significant to the
195 * nearest unit of slowtimo, which is at least 8 machine clock ticks
196 * so there is no need to scale. The smoothing is done according
197 * to the same formula as TCP (rtt = rtt*7/8 + measured_rtt/8).
198 */
199 delta = elapsed - tpcb->tp_rtt;
200 if ((tpcb->tp_rtt += (delta >> TP_RTT_ALPHA)) <= 0)
201 tpcb->tp_rtt = 1;
202 /*
203 * rtv is a smoothed accumulated mean difference, unscaled
204 * for reasons expressed above.
205 * It is smoothed with an alpha of .75, and the round trip timer
206 * will be set to rtt + 4*rtv, also as TCP does.
207 */
208 if (delta < 0)
209 delta = -delta;
210 if ((tpcb->tp_rtv += ((delta - tpcb->tp_rtv) >> TP_RTV_ALPHA)) <= 0)
211 tpcb->tp_rtv = 1;
212 } else {
213 /*
214 * No rtt measurement yet - use the unsmoothed rtt.
215 * Set the variance to half the rtt (so our first
216 * retransmit happens at 3*rtt)
217 */
218 tpcb->tp_rtt = elapsed;
219 tpcb->tp_rtv = elapsed >> 1;
220 }
221 tpcb->tp_rttemit = 0;
222 tpcb->tp_rxtshift = 0;
223 /*
224 * Quoting TCP: "the retransmit should happen at rtt + 4 * rttvar.
225 * Because of the way we do the smoothing, srtt and rttvar
226 * will each average +1/2 tick of bias. When we compute
227 * the retransmit timer, we want 1/2 tick of rounding and
228 * 1 extra tick because of +-1/2 tick uncertainty in the
229 * firing of the timer. The bias will give us exactly the
230 * 1.5 tick we need. But, because the bias is
231 * statistical, we have to test that we don't drop below
232 * the minimum feasible timer (which is 2 ticks)."
233 */
234 TP_RANGESET(tpcb->tp_dt_ticks, TP_REXMTVAL(tpcb),
235 tpcb->tp_peer_acktime, 128 /* XXX */);
236 IFDEBUG(D_RTT)
237 printf("%s tpcb 0x%x, elapsed %d, delta %d, rtt %d, rtv %d, old %d\n",
238 "tp_rtt_rtv:",tpcb,elapsed,delta,tpcb->tp_rtt,tpcb->tp_rtv,old);
239 ENDDEBUG
240 tpcb->tp_rxtcur = tpcb->tp_dt_ticks;
241 }
242
243 /*
244 * CALLED FROM:
245 * tp.trans when an AK arrives
246 * FUNCTION and ARGUMENTS:
247 * Given (cdt), the credit from the AK tpdu, and
248 * (seq), the sequence number from the AK tpdu,
249 * tp_goodack() determines if the AK acknowledges something in the send
250 * window, and if so, drops the appropriate packets from the retransmission
251 * list, computes the round trip time, and updates the retransmission timer
252 * based on the new smoothed round trip time.
253 * RETURN VALUE:
254 * Returns 1 if
255 * EITHER it actually acked something heretofore unacknowledged
256 * OR no news but the credit should be processed.
257 * If something heretofore unacked was acked with this sequence number,
258 * the appropriate tpdus are dropped from the retransmission control list,
259 * by calling tp_sbdrop().
260 * No need to see the tpdu itself.
261 */
262 int
263 tp_goodack(tpcb, cdt, seq, subseq)
264 register struct tp_pcb *tpcb;
265 u_int cdt;
266 register SeqNum seq;
267 u_int subseq;
268 {
269 int old_fcredit;
270 int bang = 0; /* bang --> ack for something heretofore unacked */
271 u_int bytes_acked;
272
273 IFDEBUG(D_ACKRECV)
274 printf("goodack tpcb 0x%x seq 0x%x cdt %d una 0x%x new 0x%x nxt 0x%x\n",
275 tpcb, seq, cdt, tpcb->tp_snduna, tpcb->tp_sndnew, tpcb->tp_sndnxt);
276 ENDDEBUG
277 IFTRACE(D_ACKRECV)
278 tptraceTPCB(TPPTgotack,
279 seq,cdt, tpcb->tp_snduna,tpcb->tp_sndnew,subseq);
280 ENDTRACE
281
282 IFPERF(tpcb)
283 tpmeas(tpcb->tp_lref, TPtime_ack_rcvd, (struct timeval *)0, seq, 0, 0);
284 ENDPERF
285
286 if (seq == tpcb->tp_snduna) {
287 if (subseq < tpcb->tp_r_subseq ||
288 (subseq == tpcb->tp_r_subseq && cdt <= tpcb->tp_fcredit)) {
289 discard_the_ack:
290 IFDEBUG(D_ACKRECV)
291 printf("goodack discard : tpcb 0x%x subseq %d r_subseq %d\n",
292 tpcb, subseq, tpcb->tp_r_subseq);
293 ENDDEBUG
294 goto done;
295 }
296 if (cdt == tpcb->tp_fcredit /*&& thus subseq > tpcb->tp_r_subseq */) {
297 tpcb->tp_r_subseq = subseq;
298 if (tpcb->tp_timer[TM_data_retrans] == 0)
299 tpcb->tp_dupacks = 0;
300 else if (++tpcb->tp_dupacks == tprexmtthresh) {
301 /* partner went out of his way to signal with different
302 subsequences that he has the same lack of an expected
303 packet. This may be an early indiciation of a loss */
304
305 SeqNum onxt = tpcb->tp_sndnxt;
306 struct mbuf *onxt_m = tpcb->tp_sndnxt_m;
307 u_int win = min(tpcb->tp_fcredit,
308 tpcb->tp_cong_win / tpcb->tp_l_tpdusize) / 2;
309 IFDEBUG(D_ACKRECV)
310 printf("%s tpcb 0x%x seq 0x%x rttseq 0x%x onxt 0x%x\n",
311 "goodack dupacks:", tpcb, seq, tpcb->tp_rttseq, onxt);
312 ENDDEBUG
313 if (win < 2)
314 win = 2;
315 tpcb->tp_ssthresh = win * tpcb->tp_l_tpdusize;
316 tpcb->tp_timer[TM_data_retrans] = 0;
317 tpcb->tp_rttemit = 0;
318 tpcb->tp_sndnxt = tpcb->tp_snduna;
319 tpcb->tp_sndnxt_m = 0;
320 tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
321 tp_send(tpcb);
322 tpcb->tp_cong_win = tpcb->tp_ssthresh +
323 tpcb->tp_dupacks * tpcb->tp_l_tpdusize;
324 if (SEQ_GT(tpcb, onxt, tpcb->tp_sndnxt)) {
325 tpcb->tp_sndnxt = onxt;
326 tpcb->tp_sndnxt_m = onxt_m;
327 }
328
329 } else if (tpcb->tp_dupacks > tprexmtthresh) {
330 tpcb->tp_cong_win += tpcb->tp_l_tpdusize;
331 }
332 goto done;
333 }
334 } else if (SEQ_LT(tpcb, seq, tpcb->tp_snduna))
335 goto discard_the_ack;
336 /*
337 * If the congestion window was inflated to account
338 * for the other side's cached packets, retract it.
339 */
340 if (tpcb->tp_dupacks > tprexmtthresh &&
341 tpcb->tp_cong_win > tpcb->tp_ssthresh)
342 tpcb->tp_cong_win = tpcb->tp_ssthresh;
343 tpcb->tp_r_subseq = subseq;
344 old_fcredit = tpcb->tp_fcredit;
345 tpcb->tp_fcredit = cdt;
346 if (cdt > tpcb->tp_maxfcredit)
347 tpcb->tp_maxfcredit = cdt;
348 tpcb->tp_dupacks = 0;
349
350 if (IN_SWINDOW(tpcb, seq, tpcb->tp_snduna, tpcb->tp_sndnew)) {
351
352 tpsbcheck(tpcb, 0);
353 bytes_acked = tp_sbdrop(tpcb, seq);
354 tpsbcheck(tpcb, 1);
355 /*
356 * If transmit timer is running and timed sequence
357 * number was acked, update smoothed round trip time.
358 * Since we now have an rtt measurement, cancel the
359 * timer backoff (cf., Phil Karn's retransmit alg.).
360 * Recompute the initial retransmit timer.
361 */
362 if (tpcb->tp_rttemit && SEQ_GT(tpcb, seq, tpcb->tp_rttseq))
363 tp_rtt_rtv(tpcb);
364 /*
365 * If all outstanding data is acked, stop retransmit timer.
366 * If there is more data to be acked, restart retransmit
367 * timer, using current (possibly backed-off) value.
368 * OSI combines the keepalive and persistance functions.
369 * So, there is no persistance timer per se, to restart.
370 */
371 if (tpcb->tp_class != TP_CLASS_0)
372 tpcb->tp_timer[TM_data_retrans] =
373 (seq == tpcb->tp_sndnew) ? 0 : tpcb->tp_rxtcur;
374 /*
375 * When new data is acked, open the congestion window.
376 * If the window gives us less than ssthresh packets
377 * in flight, open exponentially (maxseg per packet).
378 * Otherwise open linearly: maxseg per window
379 * (maxseg^2 / cwnd per packet), plus a constant
380 * fraction of a packet (maxseg/8) to help larger windows
381 * open quickly enough.
382 */
383 {
384 u_int cw = tpcb->tp_cong_win, incr = tpcb->tp_l_tpdusize;
385
386 incr = min(incr, bytes_acked);
387 if (cw > tpcb->tp_ssthresh)
388 incr = incr * incr / cw + incr / 8;
389 tpcb->tp_cong_win =
390 min(cw + incr, tpcb->tp_sock->so_snd.sb_hiwat);
391 }
392 tpcb->tp_snduna = seq;
393 if (SEQ_LT(tpcb, tpcb->tp_sndnxt, seq)) {
394 tpcb->tp_sndnxt = seq;
395 tpcb->tp_sndnxt_m = 0;
396 }
397 bang++;
398 }
399
400 if( cdt != 0 && old_fcredit == 0 ) {
401 tpcb->tp_sendfcc = 1;
402 }
403 if (cdt == 0) {
404 if (old_fcredit != 0)
405 IncStat(ts_zfcdt);
406 /* The following might mean that the window shrunk */
407 if (tpcb->tp_timer[TM_data_retrans]) {
408 tpcb->tp_timer[TM_data_retrans] = 0;
409 tpcb->tp_timer[TM_sendack] = tpcb->tp_dt_ticks;
410 if (tpcb->tp_sndnxt != tpcb->tp_snduna) {
411 tpcb->tp_sndnxt = tpcb->tp_snduna;
412 tpcb->tp_sndnxt_m = 0;
413 }
414 }
415 }
416 tpcb->tp_fcredit = cdt;
417 bang |= (old_fcredit < cdt);
418
419 done:
420 IFDEBUG(D_ACKRECV)
421 printf("goodack returns 0x%x, cdt 0x%x ocdt 0x%x cwin 0x%x\n",
422 bang, cdt, old_fcredit, tpcb->tp_cong_win);
423 ENDDEBUG
424 /* if (bang) XXXXX Very bad to remove this test, but somethings broken */
425 tp_send(tpcb);
426 return (bang);
427 }
428
429 /*
430 * CALLED FROM:
431 * tp_goodack()
432 * FUNCTION and ARGUMENTS:
433 * drops everything up TO but not INCLUDING seq # (seq)
434 * from the retransmission queue.
435 */
436 tp_sbdrop(tpcb, seq)
437 register struct tp_pcb *tpcb;
438 SeqNum seq;
439 {
440 struct sockbuf *sb = &tpcb->tp_sock->so_snd;
441 register int i = SEQ_SUB(tpcb, seq, tpcb->tp_snduna);
442 int oldcc = sb->sb_cc, oldi = i;
443
444 if (i >= tpcb->tp_seqhalf)
445 printf("tp_spdropping too much -- should panic");
446 while (i-- > 0)
447 sbdroprecord(sb);
448 IFDEBUG(D_ACKRECV)
449 printf("tp_sbdroping %d pkts %d bytes on %x at 0x%x\n",
450 oldi, oldcc - sb->sb_cc, tpcb, seq);
451 ENDDEBUG
452 if (sb->sb_flags & SB_NOTIFY)
453 sowwakeup(tpcb->tp_sock);
454 return (oldcc - sb->sb_cc);
455 }
456
457 /*
458 * CALLED FROM:
459 * tp.trans on user send request, arrival of AK and arrival of XAK
460 * FUNCTION and ARGUMENTS:
461 * Emits tpdus starting at sequence number (tpcb->tp_sndnxt).
462 * Emits until a) runs out of data, or b) runs into an XPD mark, or
463 * c) it hits seq number (highseq) limited by cong or credit.
464 *
465 * If you want XPD to buffer > 1 du per socket buffer, you can
466 * modifiy this to issue XPD tpdus also, but then it'll have
467 * to take some argument(s) to distinguish between the type of DU to
468 * hand tp_emit.
469 *
470 * When something is sent for the first time, its time-of-send
471 * is stashed (in system clock ticks rather than pf_slowtimo ticks).
472 * When the ack arrives, the smoothed round-trip time is figured
473 * using this value.
474 */
475 void
476 tp_send(tpcb)
477 register struct tp_pcb *tpcb;
478 {
479 register int len;
480 register struct mbuf *m;
481 struct mbuf *mb = 0;
482 struct sockbuf *sb = &tpcb->tp_sock->so_snd;
483 unsigned int eotsdu = 0;
484 SeqNum highseq, checkseq;
485 int idle, idleticks, off, cong_win;
486 #ifdef TP_PERF_MEAS
487 int send_start_time = ticks;
488 SeqNum oldnxt = tpcb->tp_sndnxt;
489 #endif /* TP_PERF_MEAS */
490
491 idle = (tpcb->tp_snduna == tpcb->tp_sndnew);
492 if (idle) {
493 idleticks = tpcb->tp_inact_ticks - tpcb->tp_timer[TM_inact];
494 if (idleticks > tpcb->tp_dt_ticks)
495 /*
496 * We have been idle for "a while" and no acks are
497 * expected to clock out any data we send --
498 * slow start to get ack "clock" running again.
499 */
500 tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
501 }
502
503 cong_win = tpcb->tp_cong_win;
504 highseq = SEQ(tpcb, tpcb->tp_fcredit + tpcb->tp_snduna);
505 if (tpcb->tp_Xsnd.sb_mb)
506 highseq = SEQ_MIN(tpcb, highseq, tpcb->tp_sndnew);
507
508 IFDEBUG(D_DATA)
509 printf("tp_send enter tpcb 0x%x nxt 0x%x win %d high 0x%x\n",
510 tpcb, tpcb->tp_sndnxt, cong_win, highseq);
511 ENDDEBUG
512 IFTRACE(D_DATA)
513 tptraceTPCB( TPPTmisc, "tp_send sndnew snduna",
514 tpcb->tp_sndnew, tpcb->tp_snduna, 0, 0);
515 tptraceTPCB( TPPTmisc, "tp_send tpcb->tp_sndnxt win fcredit congwin",
516 tpcb->tp_sndnxt, cong_win, tpcb->tp_fcredit, tpcb->tp_cong_win);
517 ENDTRACE
518 IFTRACE(D_DATA)
519 tptraceTPCB( TPPTmisc, "tp_send 2 nxt high fcredit congwin",
520 tpcb->tp_sndnxt, highseq, tpcb->tp_fcredit, cong_win);
521 ENDTRACE
522
523 if (tpcb->tp_sndnxt_m)
524 m = tpcb->tp_sndnxt_m;
525 else {
526 off = SEQ_SUB(tpcb, tpcb->tp_sndnxt, tpcb->tp_snduna);
527 for (m = sb->sb_mb; m && off > 0; m = m->m_next)
528 off--;
529 }
530 send:
531 /*
532 * Avoid silly window syndrome here . . . figure out how!
533 */
534 checkseq = tpcb->tp_sndnum;
535 if (idle && SEQ_LT(tpcb, tpcb->tp_sndnum, highseq))
536 checkseq = highseq; /* i.e. DON'T retain highest assigned packet */
537
538 while ((SEQ_LT(tpcb, tpcb->tp_sndnxt, highseq)) && m && cong_win > 0) {
539
540 eotsdu = (m->m_flags & M_EOR) != 0;
541 len = m->m_pkthdr.len;
542 if (tpcb->tp_sndnxt == checkseq && eotsdu == 0 &&
543 len < (tpcb->tp_l_tpdusize / 2))
544 break; /* Nagle . . . . . */
545 cong_win -= len;
546 /* make a copy - mb goes into the retransmission list
547 * while m gets emitted. m_copy won't copy a zero-length mbuf.
548 */
549 mb = m;
550 m = m_copy(mb, 0, M_COPYALL);
551 if (m == MNULL)
552 break;
553 IFTRACE(D_STASH)
554 tptraceTPCB( TPPTmisc,
555 "tp_send mcopy nxt high eotsdu len",
556 tpcb->tp_sndnxt, highseq, eotsdu, len);
557 ENDTRACE
558
559 IFDEBUG(D_DATA)
560 printf("tp_sending tpcb 0x%x nxt 0x%x\n",
561 tpcb, tpcb->tp_sndnxt);
562 ENDDEBUG
563 /* when headers are precomputed, may need to fill
564 in checksum here */
565 if (tpcb->tp_sock->so_error =
566 tp_emit(DT_TPDU_type, tpcb, tpcb->tp_sndnxt, eotsdu, m)) {
567 /* error */
568 break;
569 }
570 m = mb->m_nextpkt;
571 tpcb->tp_sndnxt_m = m;
572 if (tpcb->tp_sndnxt == tpcb->tp_sndnew) {
573 SEQ_INC(tpcb, tpcb->tp_sndnew);
574 /*
575 * Time this transmission if not a retransmission and
576 * not currently timing anything.
577 */
578 if (tpcb->tp_rttemit == 0) {
579 tpcb->tp_rttemit = ticks;
580 tpcb->tp_rttseq = tpcb->tp_sndnxt;
581 }
582 tpcb->tp_sndnxt = tpcb->tp_sndnew;
583 } else
584 SEQ_INC(tpcb, tpcb->tp_sndnxt);
585 /*
586 * Set retransmit timer if not currently set.
587 * Initial value for retransmit timer is smoothed
588 * round-trip time + 2 * round-trip time variance.
589 * Initialize shift counter which is used for backoff
590 * of retransmit time.
591 */
592 if (tpcb->tp_timer[TM_data_retrans] == 0 &&
593 tpcb->tp_class != TP_CLASS_0) {
594 tpcb->tp_timer[TM_data_retrans] = tpcb->tp_dt_ticks;
595 tpcb->tp_timer[TM_sendack] = tpcb->tp_keepalive_ticks;
596 tpcb->tp_rxtshift = 0;
597 }
598 }
599 if (SEQ_GT(tpcb, tpcb->tp_sndnew, tpcb->tp_sndnum))
600 tpcb->tp_oktonagle = 0;
601 #ifdef TP_PERF_MEAS
602 IFPERF(tpcb)
603 {
604 register int npkts;
605 int elapsed = ticks - send_start_time, *t;
606 struct timeval now;
607
608 npkts = SEQ_SUB(tpcb, tpcb->tp_sndnxt, oldnxt);
609
610 if (npkts > 0)
611 tpcb->tp_Nwindow++;
612
613 if (npkts > TP_PM_MAX)
614 npkts = TP_PM_MAX;
615
616 t = &(tpcb->tp_p_meas->tps_sendtime[npkts]);
617 *t += (t - elapsed) >> TP_RTT_ALPHA;
618
619 if (mb == 0) {
620 IncPStat(tpcb, tps_win_lim_by_data[npkts] );
621 } else {
622 IncPStat(tpcb, tps_win_lim_by_cdt[npkts] );
623 /* not true with congestion-window being used */
624 }
625 now.tv_sec = elapsed / hz;
626 now.tv_usec = (elapsed - (hz * now.tv_sec)) * 1000000 / hz;
627 tpmeas( tpcb->tp_lref,
628 TPsbsend, &elapsed, newseq, tpcb->tp_Nwindow, npkts);
629 }
630 ENDPERF
631 #endif /* TP_PERF_MEAS */
632
633
634 IFTRACE(D_DATA)
635 tptraceTPCB( TPPTmisc,
636 "tp_send at end: new nxt eotsdu error",
637 tpcb->tp_sndnew, tpcb->tp_sndnxt, eotsdu, tpcb->tp_sock->so_error);
638
639 ENDTRACE
640 }
641
642 int TPNagleok;
643 int TPNagled;
644
645 tp_packetize(tpcb, m, eotsdu)
646 register struct tp_pcb *tpcb;
647 register struct mbuf *m;
648 int eotsdu;
649 {
650 register struct mbuf *n;
651 register struct sockbuf *sb = &tpcb->tp_sock->so_snd;
652 int maxsize = tpcb->tp_l_tpdusize
653 - tp_headersize(DT_TPDU_type, tpcb)
654 - (tpcb->tp_use_checksum?4:0) ;
655 int totlen = m->m_pkthdr.len;
656 struct mbuf *m_split();
657 /*
658 * Pre-packetize the data in the sockbuf
659 * according to negotiated mtu. Do it here
660 * where we can safely wait for mbufs.
661 *
662 * This presumes knowledge of sockbuf conventions.
663 * TODO: allocate space for header and fill it in (once!).
664 */
665 IFDEBUG(D_DATA)
666 printf("SEND BF: maxsize %d totlen %d eotsdu %d sndnum 0x%x\n",
667 maxsize, totlen, eotsdu, tpcb->tp_sndnum);
668 ENDTRACE
669 if (tpcb->tp_oktonagle) {
670 if ((n = sb->sb_mb) == 0)
671 panic("tp_packetize");
672 while (n->m_act)
673 n = n->m_act;
674 if (n->m_flags & M_EOR)
675 panic("tp_packetize 2");
676 SEQ_INC(tpcb, tpcb->tp_sndnum);
677 if (totlen + n->m_pkthdr.len < maxsize) {
678 /* There is an unsent packet with space, combine data */
679 struct mbuf *old_n = n;
680 tpsbcheck(tpcb,3);
681 n->m_pkthdr.len += totlen;
682 while (n->m_next)
683 n = n->m_next;
684 sbcompress(sb, m, n);
685 tpsbcheck(tpcb,4);
686 n = old_n;
687 TPNagled++;
688 goto out;
689 }
690 }
691 while (m) {
692 n = m;
693 if (totlen > maxsize) {
694 if ((m = m_split(n, maxsize, M_WAIT)) == 0)
695 panic("tp_packetize");
696 } else
697 m = 0;
698 totlen -= maxsize;
699 tpsbcheck(tpcb, 5);
700 sbappendrecord(sb, n);
701 tpsbcheck(tpcb, 6);
702 SEQ_INC(tpcb, tpcb->tp_sndnum);
703 }
704 out:
705 if (eotsdu) {
706 n->m_flags |= M_EOR; /* XXX belongs at end */
707 tpcb->tp_oktonagle = 0;
708 } else {
709 SEQ_DEC(tpcb, tpcb->tp_sndnum);
710 tpcb->tp_oktonagle = 1;
711 TPNagleok++;
712 }
713 IFDEBUG(D_DATA)
714 printf("SEND out: oktonagle %d sndnum 0x%x\n",
715 tpcb->tp_oktonagle, tpcb->tp_sndnum);
716 ENDTRACE
717 return 0;
718 }
719
720
721 /*
722 * NAME: tp_stash()
723 * CALLED FROM:
724 * tp.trans on arrival of a DT tpdu
725 * FUNCTION, ARGUMENTS, and RETURN VALUE:
726 * Returns 1 if
727 * a) something new arrived and it's got eotsdu_reached bit on,
728 * b) this arrival was caused other out-of-sequence things to be
729 * accepted, or
730 * c) this arrival is the highest seq # for which we last gave credit
731 * (sender just sent a whole window)
732 * In other words, returns 1 if tp should send an ack immediately, 0 if
733 * the ack can wait a while.
734 *
735 * Note: this implementation no longer renegs on credit, (except
736 * when debugging option D_RENEG is on, for the purpose of testing
737 * ack subsequencing), so we don't need to check for incoming tpdus
738 * being in a reneged portion of the window.
739 */
740
741 tp_stash(tpcb, e)
742 register struct tp_pcb *tpcb;
743 register struct tp_event *e;
744 {
745 register int ack_reason= tpcb->tp_ack_strat & ACK_STRAT_EACH;
746 /* 0--> delay acks until full window */
747 /* 1--> ack each tpdu */
748 #ifndef lint
749 #define E e->ATTR(DT_TPDU)
750 #else /* lint */
751 #define E e->ev_union.EV_DT_TPDU
752 #endif /* lint */
753
754 if ( E.e_eot ) {
755 register struct mbuf *n = E.e_data;
756 n->m_flags |= M_EOR;
757 n->m_act = 0;
758 }
759 IFDEBUG(D_STASH)
760 dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb,
761 "stash: so_rcv before appending");
762 dump_mbuf(E.e_data,
763 "stash: e_data before appending");
764 ENDDEBUG
765
766 IFPERF(tpcb)
767 PStat(tpcb, Nb_from_ll) += E.e_datalen;
768 tpmeas(tpcb->tp_lref, TPtime_from_ll, &e->e_time,
769 E.e_seq, (u_int)PStat(tpcb, Nb_from_ll), (u_int)E.e_datalen);
770 ENDPERF
771
772 if (E.e_seq == tpcb->tp_rcvnxt) {
773
774 IFDEBUG(D_STASH)
775 printf("stash EQ: seq 0x%x datalen 0x%x eot 0x%x\n",
776 E.e_seq, E.e_datalen, E.e_eot);
777 ENDDEBUG
778
779 IFTRACE(D_STASH)
780 tptraceTPCB(TPPTmisc, "stash EQ: seq len eot",
781 E.e_seq, E.e_datalen, E.e_eot, 0);
782 ENDTRACE
783
784 SET_DELACK(tpcb);
785
786 sbappend(&tpcb->tp_sock->so_rcv, E.e_data);
787
788 SEQ_INC( tpcb, tpcb->tp_rcvnxt );
789 /*
790 * move chains from the reassembly queue to the socket buffer
791 */
792 if (tpcb->tp_rsycnt) {
793 register struct mbuf **mp;
794 struct mbuf **mplim;
795
796 mp = tpcb->tp_rsyq + (tpcb->tp_rcvnxt % tpcb->tp_maxlcredit);
797 mplim = tpcb->tp_rsyq + tpcb->tp_maxlcredit;
798
799 while (tpcb->tp_rsycnt && *mp) {
800 sbappend(&tpcb->tp_sock->so_rcv, *mp);
801 tpcb->tp_rsycnt--;
802 *mp = 0;
803 SEQ_INC(tpcb, tpcb->tp_rcvnxt);
804 ack_reason |= ACK_REORDER;
805 if (++mp == mplim)
806 mp = tpcb->tp_rsyq;
807 }
808 }
809 IFDEBUG(D_STASH)
810 dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb,
811 "stash: so_rcv after appending");
812 ENDDEBUG
813
814 } else {
815 register struct mbuf **mp;
816 SeqNum uwe;
817
818 IFTRACE(D_STASH)
819 tptraceTPCB(TPPTmisc, "stash Reseq: seq rcvnxt lcdt",
820 E.e_seq, tpcb->tp_rcvnxt, tpcb->tp_lcredit, 0);
821 ENDTRACE
822
823 if (tpcb->tp_rsyq == 0)
824 tp_rsyset(tpcb);
825 uwe = SEQ(tpcb, tpcb->tp_rcvnxt + tpcb->tp_maxlcredit);
826 if (tpcb->tp_rsyq == 0 ||
827 !IN_RWINDOW(tpcb, E.e_seq, tpcb->tp_rcvnxt, uwe)) {
828 ack_reason = ACK_DONT;
829 m_freem(E.e_data);
830 } else if (*(mp = tpcb->tp_rsyq + (E.e_seq % tpcb->tp_maxlcredit))) {
831 IFDEBUG(D_STASH)
832 printf("tp_stash - drop & ack\n");
833 ENDDEBUG
834
835 /* retransmission - drop it and force an ack */
836 IncStat(ts_dt_dup);
837 IFPERF(tpcb)
838 IncPStat(tpcb, tps_n_ack_cuz_dup);
839 ENDPERF
840
841 m_freem(E.e_data);
842 ack_reason |= ACK_DUP;
843 } else {
844 *mp = E.e_data;
845 tpcb->tp_rsycnt++;
846 ack_reason = ACK_DONT;
847 }
848 }
849 /* there were some comments of historical interest here. */
850 {
851 LOCAL_CREDIT(tpcb);
852
853 if ( E.e_seq == tpcb->tp_sent_uwe )
854 ack_reason |= ACK_STRAT_FULLWIN;
855
856 IFTRACE(D_STASH)
857 tptraceTPCB(TPPTmisc,
858 "end of stash, eot, ack_reason, sent_uwe ",
859 E.e_eot, ack_reason, tpcb->tp_sent_uwe, 0);
860 ENDTRACE
861
862 if ( ack_reason == ACK_DONT ) {
863 IncStat( ts_ackreason[ACK_DONT] );
864 return 0;
865 } else {
866 IFPERF(tpcb)
867 if(ack_reason & ACK_STRAT_EACH) {
868 IncPStat(tpcb, tps_n_ack_cuz_strat);
869 } else if(ack_reason & ACK_STRAT_FULLWIN) {
870 IncPStat(tpcb, tps_n_ack_cuz_fullwin);
871 } else if(ack_reason & ACK_REORDER) {
872 IncPStat(tpcb, tps_n_ack_cuz_reorder);
873 }
874 tpmeas(tpcb->tp_lref, TPtime_ack_sent, 0,
875 SEQ_ADD(tpcb, E.e_seq, 1), 0, 0);
876 ENDPERF
877 {
878 register int i;
879
880 /* keep track of all reasons that apply */
881 for( i=1; i<_ACK_NUM_REASONS_ ;i++) {
882 if( ack_reason & (1<<i) )
883 IncStat( ts_ackreason[i] );
884 }
885 }
886 return 1;
887 }
888 }
889 }
890
891 /*
892 * tp_rsyflush - drop all the packets on the reassembly queue.
893 * Do this when closing the socket, or when somebody has changed
894 * the space avaible in the receive socket (XXX).
895 */
896 tp_rsyflush(tpcb)
897 register struct tp_pcb *tpcb;
898 {
899 register struct mbuf *m, **mp;
900 if (tpcb->tp_rsycnt) {
901 for (mp == tpcb->tp_rsyq + tpcb->tp_maxlcredit;
902 --mp >= tpcb->tp_rsyq; )
903 if (*mp) {
904 tpcb->tp_rsycnt--;
905 m_freem(*mp);
906 }
907 if (tpcb->tp_rsycnt) {
908 printf("tp_rsyflush %x\n", tpcb);
909 tpcb->tp_rsycnt = 0;
910 }
911 }
912 FREE((caddr_t)tpcb->tp_rsyq, M_PCB);
913 tpcb->tp_rsyq = 0;
914 }
915
916 tp_rsyset(tpcb)
917 register struct tp_pcb *tpcb;
918 {
919 register struct socket *so = tpcb->tp_sock;
920 int maxcredit = tpcb->tp_xtd_format ? 0xffff : 0xf;
921 int old_credit = tpcb->tp_maxlcredit;
922 caddr_t rsyq;
923
924 tpcb->tp_maxlcredit = maxcredit = min(maxcredit,
925 (so->so_rcv.sb_hiwat + tpcb->tp_l_tpdusize)/ tpcb->tp_l_tpdusize);
926
927 if (old_credit == tpcb->tp_maxlcredit && tpcb->tp_rsyq != 0)
928 return;
929 maxcredit *= sizeof(struct mbuf *);
930 if (tpcb->tp_rsyq)
931 tp_rsyflush(tpcb);
932 // if (rsyq = (caddr_t)malloc(maxcredit, M_PCB, M_NOWAIT))
933 MALLOC(rsyq, caddr_t, maxcredit, M_PCB, M_NOWAIT);
934 if (rsyq)
935 bzero(rsyq, maxcredit);
936 tpcb->tp_rsyq = (struct mbuf **)rsyq;
937 }
938
939 tpsbcheck(tpcb, i)
940 struct tp_pcb *tpcb;
941 {
942 register struct mbuf *n, *m;
943 register int len = 0, mbcnt = 0, pktlen;
944 struct sockbuf *sb = &tpcb->tp_sock->so_snd;
945
946 for (n = sb->sb_mb; n; n = n->m_nextpkt) {
947 if ((n->m_flags & M_PKTHDR) == 0)
948 panic("tpsbcheck nohdr");
949 pktlen = len + n->m_pkthdr.len;
950 for (m = n; m; m = m->m_next) {
951 len += m->m_len;
952 mbcnt += MSIZE;
953 if (m->m_flags & M_EXT)
954 mbcnt += m->m_ext.ext_size;
955 }
956 if (len != pktlen) {
957 printf("test %d; len %d != pktlen %d on mbuf 0x%x\n",
958 i, len, pktlen, n);
959 panic("tpsbcheck short");
960 }
961 }
962 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
963 printf("test %d: cc %d != %d || mbcnt %d != %d\n", i, len, sb->sb_cc,
964 mbcnt, sb->sb_mbcnt);
965 panic("tpsbcheck");
966 }
967 }