]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
c910b4d9 | 2 | * Copyright (c) 2000-2008 Apple Inc. All rights reserved. |
5d5c5d0d | 3 | * |
2d21ac55 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
1c79356b | 5 | * |
2d21ac55 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
8f6c56a5 | 14 | * |
2d21ac55 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
8f6c56a5 A |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
2d21ac55 A |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
8f6c56a5 | 25 | * |
2d21ac55 | 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
1c79356b A |
27 | */ |
28 | /* | |
29 | * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 | |
30 | * The Regents of the University of California. All rights reserved. | |
31 | * | |
32 | * Redistribution and use in source and binary forms, with or without | |
33 | * modification, are permitted provided that the following conditions | |
34 | * are met: | |
35 | * 1. Redistributions of source code must retain the above copyright | |
36 | * notice, this list of conditions and the following disclaimer. | |
37 | * 2. Redistributions in binary form must reproduce the above copyright | |
38 | * notice, this list of conditions and the following disclaimer in the | |
39 | * documentation and/or other materials provided with the distribution. | |
40 | * 3. All advertising materials mentioning features or use of this software | |
41 | * must display the following acknowledgement: | |
42 | * This product includes software developed by the University of | |
43 | * California, Berkeley and its contributors. | |
44 | * 4. Neither the name of the University nor the names of its contributors | |
45 | * may be used to endorse or promote products derived from this software | |
46 | * without specific prior written permission. | |
47 | * | |
48 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
49 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
50 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
51 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
52 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
53 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
54 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
55 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
56 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
57 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
58 | * SUCH DAMAGE. | |
59 | * | |
60 | * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 | |
9bccf70c | 61 | * $FreeBSD: src/sys/netinet/tcp_timer.c,v 1.34.2.11 2001/08/22 00:59:12 silby Exp $ |
1c79356b A |
62 | */ |
63 | ||
1c79356b A |
64 | |
65 | #include <sys/param.h> | |
66 | #include <sys/systm.h> | |
67 | #include <sys/kernel.h> | |
9bccf70c | 68 | #include <sys/mbuf.h> |
1c79356b A |
69 | #include <sys/sysctl.h> |
70 | #include <sys/socket.h> | |
71 | #include <sys/socketvar.h> | |
72 | #include <sys/protosw.h> | |
91447636 | 73 | #include <kern/locks.h> |
1c79356b A |
74 | |
75 | #include <kern/cpu_number.h> /* before tcp_seq.h, for tcp_random18() */ | |
76 | ||
77 | #include <net/route.h> | |
78 | ||
79 | #include <netinet/in.h> | |
80 | #include <netinet/in_systm.h> | |
1c79356b | 81 | #include <netinet/in_pcb.h> |
9bccf70c A |
82 | #if INET6 |
83 | #include <netinet6/in6_pcb.h> | |
84 | #endif | |
1c79356b A |
85 | #include <netinet/ip_var.h> |
86 | #include <netinet/tcp.h> | |
87 | #include <netinet/tcp_fsm.h> | |
88 | #include <netinet/tcp_seq.h> | |
89 | #include <netinet/tcp_timer.h> | |
90 | #include <netinet/tcp_var.h> | |
91 | #include <netinet/tcpip.h> | |
92 | #if TCPDEBUG | |
93 | #include <netinet/tcp_debug.h> | |
94 | #endif | |
95 | #include <sys/kdebug.h> | |
96 | ||
2d21ac55 A |
97 | extern void postevent(struct socket *, struct sockbuf *, |
98 | int); | |
1c79356b A |
99 | #define DBG_FNC_TCP_FAST NETDBG_CODE(DBG_NETTCP, (5 << 8)) |
100 | #define DBG_FNC_TCP_SLOW NETDBG_CODE(DBG_NETTCP, (5 << 8) | 1) | |
101 | ||
2d21ac55 A |
102 | static int background_io_trigger = 5; |
103 | SYSCTL_INT(_net_inet_tcp, OID_AUTO, background_io_trigger, CTLFLAG_RW, | |
104 | &background_io_trigger, 0, "Background IO Trigger Setting"); | |
105 | ||
55e303ae A |
106 | /* |
107 | * NOTE - WARNING | |
108 | * | |
109 | * | |
110 | * | |
111 | * | |
112 | */ | |
9bccf70c A |
113 | static int |
114 | sysctl_msec_to_ticks SYSCTL_HANDLER_ARGS | |
115 | { | |
2d21ac55 | 116 | #pragma unused(arg1, arg2) |
9bccf70c | 117 | int error, s, tt; |
1c79356b | 118 | |
9bccf70c | 119 | tt = *(int *)oidp->oid_arg1; |
2d21ac55 | 120 | s = tt * 1000 / TCP_RETRANSHZ;; |
1c79356b | 121 | |
9bccf70c A |
122 | error = sysctl_handle_int(oidp, &s, 0, req); |
123 | if (error || !req->newptr) | |
124 | return (error); | |
125 | ||
2d21ac55 | 126 | tt = s * TCP_RETRANSHZ / 1000; |
9bccf70c A |
127 | if (tt < 1) |
128 | return (EINVAL); | |
129 | ||
130 | *(int *)oidp->oid_arg1 = tt; | |
131 | return (0); | |
132 | } | |
1c79356b | 133 | |
9bccf70c A |
134 | int tcp_keepinit; |
135 | SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW, | |
136 | &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", ""); | |
137 | ||
138 | int tcp_keepidle; | |
139 | SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW, | |
140 | &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", ""); | |
141 | ||
142 | int tcp_keepintvl; | |
143 | SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW, | |
144 | &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", ""); | |
145 | ||
9bccf70c A |
146 | int tcp_msl; |
147 | SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW, | |
148 | &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime"); | |
1c79356b A |
149 | |
150 | static int always_keepalive = 0; | |
9bccf70c A |
151 | SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW, |
152 | &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections"); | |
1c79356b A |
153 | |
154 | static int tcp_keepcnt = TCPTV_KEEPCNT; | |
2d21ac55 | 155 | static int tcp_gc_done = FALSE; /* perfromed garbage collection of "used" sockets */ |
1c79356b | 156 | /* max idle probes */ |
9bccf70c | 157 | int tcp_maxpersistidle; |
1c79356b A |
158 | /* max idle time in persist */ |
159 | int tcp_maxidle; | |
160 | ||
1c79356b A |
161 | struct inpcbhead time_wait_slots[N_TIME_WAIT_SLOTS]; |
162 | int cur_tw_slot = 0; | |
163 | ||
164 | u_long *delack_bitmask; | |
1c79356b | 165 | |
2d21ac55 A |
166 | void add_to_time_wait_locked(struct tcpcb *tp); |
167 | void add_to_time_wait(struct tcpcb *tp) ; | |
168 | ||
1c79356b | 169 | |
2d21ac55 | 170 | void add_to_time_wait_locked(struct tcpcb *tp) |
1c79356b A |
171 | { |
172 | int tw_slot; | |
2d21ac55 | 173 | struct inpcbinfo *pcbinfo = &tcbinfo; |
1c79356b | 174 | |
91447636 | 175 | /* pcb list should be locked when we get here */ |
2d21ac55 | 176 | lck_rw_assert(pcbinfo->mtx, LCK_RW_ASSERT_EXCLUSIVE); |
91447636 | 177 | |
1c79356b A |
178 | LIST_REMOVE(tp->t_inpcb, inp_list); |
179 | ||
2d21ac55 | 180 | if (tp->t_timer[TCPT_2MSL] <= 0) |
1c79356b A |
181 | tp->t_timer[TCPT_2MSL] = 1; |
182 | ||
2d21ac55 A |
183 | /* |
184 | * Because we're pulling this pcb out of the main TCP pcb list, | |
185 | * we need to recalculate the TCPT_2MSL timer value for tcp_slowtimo | |
186 | * higher timer granularity. | |
187 | */ | |
188 | ||
189 | tp->t_timer[TCPT_2MSL] = (tp->t_timer[TCPT_2MSL] / TCP_RETRANSHZ) * PR_SLOWHZ; | |
190 | tp->t_rcvtime = (tp->t_rcvtime / TCP_RETRANSHZ) * PR_SLOWHZ; | |
191 | ||
192 | tp->t_rcvtime += tp->t_timer[TCPT_2MSL] & (N_TIME_WAIT_SLOTS - 1); | |
193 | ||
1c79356b A |
194 | tw_slot = (tp->t_timer[TCPT_2MSL] & (N_TIME_WAIT_SLOTS - 1)) + cur_tw_slot; |
195 | if (tw_slot >= N_TIME_WAIT_SLOTS) | |
196 | tw_slot -= N_TIME_WAIT_SLOTS; | |
197 | ||
198 | LIST_INSERT_HEAD(&time_wait_slots[tw_slot], tp->t_inpcb, inp_list); | |
199 | } | |
200 | ||
2d21ac55 | 201 | void add_to_time_wait(struct tcpcb *tp) |
91447636 A |
202 | { |
203 | struct inpcbinfo *pcbinfo = &tcbinfo; | |
204 | ||
205 | if (!lck_rw_try_lock_exclusive(pcbinfo->mtx)) { | |
206 | tcp_unlock(tp->t_inpcb->inp_socket, 0, 0); | |
207 | lck_rw_lock_exclusive(pcbinfo->mtx); | |
208 | tcp_lock(tp->t_inpcb->inp_socket, 0, 0); | |
209 | } | |
210 | add_to_time_wait_locked(tp); | |
211 | lck_rw_done(pcbinfo->mtx); | |
212 | } | |
1c79356b A |
213 | |
214 | ||
215 | ||
216 | ||
217 | /* | |
218 | * Fast timeout routine for processing delayed acks | |
219 | */ | |
220 | void | |
221 | tcp_fasttimo() | |
222 | { | |
2d21ac55 | 223 | struct inpcb *inp; |
1c79356b | 224 | register struct tcpcb *tp; |
2d21ac55 A |
225 | struct socket *so; |
226 | #if TCPDEBUG | |
227 | int ostate; | |
228 | #endif | |
1c79356b A |
229 | |
230 | ||
91447636 | 231 | struct inpcbinfo *pcbinfo = &tcbinfo; |
1c79356b | 232 | |
2d21ac55 | 233 | int delack_done = 0; |
1c79356b A |
234 | |
235 | KERNEL_DEBUG(DBG_FNC_TCP_FAST | DBG_FUNC_START, 0,0,0,0,0); | |
236 | ||
1c79356b | 237 | |
91447636 A |
238 | lck_rw_lock_shared(pcbinfo->mtx); |
239 | ||
240 | /* Walk the list of valid tcpcbs and send ACKS on the ones with DELACK bit set */ | |
241 | ||
2d21ac55 A |
242 | LIST_FOREACH(inp, &tcb, inp_list) { |
243 | ||
244 | so = inp->inp_socket; | |
245 | ||
246 | if (so == &tcbinfo.nat_dummy_socket) | |
247 | continue; | |
248 | ||
249 | if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) | |
250 | continue; | |
251 | ||
252 | tcp_lock(so, 1, 0); | |
253 | ||
254 | if ((in_pcb_checkstate(inp, WNT_RELEASE,1) == WNT_STOPUSING) && so->so_usecount == 1) { | |
255 | tcp_unlock(so, 1, 0); | |
256 | continue; | |
257 | } | |
258 | ||
259 | tp = intotcpcb(inp); | |
260 | ||
261 | if (tp == 0 || tp->t_state == TCPS_LISTEN) { | |
262 | tcp_unlock(so, 1, 0); | |
263 | continue; | |
264 | } | |
265 | ||
266 | ||
267 | /* Only run the retransmit timer in that case */ | |
268 | if (tp->t_timer[0] && --tp->t_timer[0] == 0) { | |
269 | tp = tcp_timers(tp, 0); | |
270 | if (tp == NULL) | |
271 | goto tpgone; | |
272 | } | |
273 | ||
274 | /* TCP pcb timers following the tcp_now clock rate */ | |
275 | ||
276 | tp->t_rcvtime++; | |
277 | tp->t_starttime++; | |
278 | if (tp->t_rtttime) | |
279 | tp->t_rtttime++; | |
280 | ||
281 | /* | |
282 | * Process delayed acks (if enabled) according to PR_FASTHZ, not the retrans timer | |
283 | */ | |
284 | ||
285 | if (tcp_delack_enabled && (tcp_now % (TCP_RETRANSHZ/PR_FASTHZ)) && tp->t_flags & TF_DELACK) { | |
286 | delack_done++; | |
287 | tp->t_flags &= ~TF_DELACK; | |
288 | tp->t_flags |= TF_ACKNOW; | |
289 | tcpstat.tcps_delack++; | |
290 | tp->t_unacksegs = 0; | |
291 | (void) tcp_output(tp); | |
292 | } | |
293 | tpgone: | |
294 | tcp_unlock(so, 1, 0); | |
1c79356b | 295 | } |
2d21ac55 | 296 | KERNEL_DEBUG(DBG_FNC_TCP_FAST | DBG_FUNC_END, delack_done, 0, tcpstat.tcps_delack,0,0); |
91447636 | 297 | lck_rw_done(pcbinfo->mtx); |
2d21ac55 A |
298 | |
299 | tcp_now++; | |
300 | timeout(tcp_fasttimo, 0, hz/TCP_RETRANSHZ); | |
1c79356b A |
301 | } |
302 | ||
2d21ac55 A |
303 | void |
304 | tcp_garbage_collect(inp, istimewait) | |
305 | struct inpcb *inp; | |
306 | int istimewait; | |
307 | { | |
308 | struct socket *so; | |
309 | struct tcpcb *tp; | |
310 | ||
311 | ||
312 | if (inp->inp_socket == &tcbinfo.nat_dummy_socket) | |
313 | return; | |
314 | ||
315 | ||
316 | if (!lck_mtx_try_lock(inp->inpcb_mtx)) /* skip if still in use */ | |
317 | return; | |
318 | ||
319 | so = inp->inp_socket; | |
320 | tp = intotcpcb(inp); | |
321 | ||
322 | if ((so->so_usecount == 1) && | |
323 | (so->so_flags & SOF_OVERFLOW)) { | |
324 | in_pcbdetach(inp); | |
325 | so->so_usecount--; | |
326 | lck_mtx_unlock(inp->inpcb_mtx); | |
327 | return; | |
328 | } | |
329 | else { | |
330 | if (inp->inp_wantcnt != WNT_STOPUSING) { | |
331 | lck_mtx_unlock(inp->inpcb_mtx); | |
332 | return; | |
333 | } | |
334 | } | |
335 | ||
336 | ||
337 | if (so->so_usecount == 0) | |
338 | in_pcbdispose(inp); | |
339 | else { | |
340 | /* Special case: | |
341 | * - Check for embryonic socket stuck on listener queue (4023660) | |
342 | * - overflowed socket dropped from the listening queue | |
343 | * and dispose of remaining reference | |
344 | */ | |
345 | if ((so->so_usecount == 1) && | |
346 | (((tp->t_state == TCPS_CLOSED) && (so->so_head != NULL) && (so->so_state & SS_INCOMP)) || | |
347 | (istimewait && (so->so_flags & SOF_OVERFLOW)))) { | |
348 | so->so_usecount--; | |
349 | in_pcbdispose(inp); | |
350 | } else | |
351 | lck_mtx_unlock(inp->inpcb_mtx); | |
352 | } | |
353 | } | |
354 | ||
355 | static int bg_cnt = 0; | |
356 | #define BG_COUNTER_MAX 3 | |
357 | ||
1c79356b A |
358 | void |
359 | tcp_slowtimo() | |
360 | { | |
4a3eedf9 | 361 | struct inpcb *inp, *nxt; |
91447636 A |
362 | struct tcpcb *tp; |
363 | struct socket *so; | |
364 | int i; | |
1c79356b A |
365 | #if TCPDEBUG |
366 | int ostate; | |
367 | #endif | |
2d21ac55 A |
368 | |
369 | static int tws_checked = 0; | |
370 | ||
91447636 | 371 | struct inpcbinfo *pcbinfo = &tcbinfo; |
1c79356b A |
372 | |
373 | KERNEL_DEBUG(DBG_FNC_TCP_SLOW | DBG_FUNC_START, 0,0,0,0,0); | |
1c79356b A |
374 | |
375 | tcp_maxidle = tcp_keepcnt * tcp_keepintvl; | |
376 | ||
91447636 A |
377 | lck_rw_lock_shared(pcbinfo->mtx); |
378 | ||
2d21ac55 | 379 | bg_cnt++; |
91447636 | 380 | |
2d21ac55 | 381 | LIST_FOREACH(inp, &tcb, inp_list) { |
8ad349bb | 382 | |
2d21ac55 A |
383 | so = inp->inp_socket; |
384 | ||
8ad349bb | 385 | if (so == &tcbinfo.nat_dummy_socket) |
2d21ac55 | 386 | continue; |
8ad349bb | 387 | |
2d21ac55 | 388 | if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) |
1c79356b | 389 | continue; |
91447636 | 390 | |
91447636 A |
391 | tcp_lock(so, 1, 0); |
392 | ||
393 | if ((in_pcb_checkstate(inp, WNT_RELEASE,1) == WNT_STOPUSING) && so->so_usecount == 1) { | |
394 | tcp_unlock(so, 1, 0); | |
395 | continue; | |
396 | } | |
397 | tp = intotcpcb(inp); | |
398 | if (tp == 0 || tp->t_state == TCPS_LISTEN) { | |
399 | tcp_unlock(so, 1, 0); | |
400 | continue; | |
401 | } | |
9bccf70c | 402 | |
2d21ac55 A |
403 | tp = intotcpcb(inp); |
404 | ||
405 | if (tp == 0 || tp->t_state == TCPS_LISTEN) | |
406 | goto tpgone; | |
407 | ||
408 | #if TRAFFIC_MGT | |
409 | if (so->so_traffic_mgt_flags & TRAFFIC_MGT_SO_BACKGROUND && | |
410 | bg_cnt > BG_COUNTER_MAX) { | |
411 | u_int32_t curr_recvtotal = tcpstat.tcps_rcvtotal; | |
412 | u_int32_t curr_bg_recvtotal = tcpstat.tcps_bg_rcvtotal; | |
413 | u_int32_t bg_recvdiff = curr_bg_recvtotal - tp->bg_recv_snapshot; | |
414 | u_int32_t tot_recvdiff = curr_recvtotal - tp->tot_recv_snapshot; | |
415 | u_int32_t fg_recv_change = tot_recvdiff - bg_recvdiff; | |
416 | u_int32_t recv_change; | |
417 | ||
418 | if (!(so->so_traffic_mgt_flags & TRAFFIC_MGT_SO_BG_SUPPRESSED)) { | |
419 | if (tot_recvdiff) | |
420 | recv_change = (fg_recv_change * 100) / tot_recvdiff; | |
421 | else | |
422 | recv_change = 0; | |
423 | ||
424 | if (recv_change > background_io_trigger) { | |
425 | so->so_traffic_mgt_flags |= TRAFFIC_MGT_SO_BG_SUPPRESSED; | |
426 | } | |
427 | ||
428 | tp->tot_recv_snapshot = curr_recvtotal; | |
429 | tp->bg_recv_snapshot = curr_bg_recvtotal; | |
430 | } | |
431 | else { // SUPPRESSED | |
432 | // this allows for bg traffic to subside before we start measuring total traffic change | |
433 | if (tot_recvdiff) | |
434 | recv_change = (bg_recvdiff * 100) / tot_recvdiff; | |
435 | else | |
436 | recv_change = 0; | |
437 | ||
438 | if (recv_change < background_io_trigger) { | |
439 | // Draconian for now: if there is any change at all, keep suppressed | |
440 | if (!tot_recvdiff) { | |
441 | so->so_traffic_mgt_flags &= ~TRAFFIC_MGT_SO_BG_SUPPRESSED; | |
442 | tp->t_unacksegs = 0; | |
443 | (void) tcp_output(tp); // open window | |
444 | } | |
445 | } | |
446 | ||
447 | tp->tot_recv_snapshot = curr_recvtotal; | |
448 | tp->bg_recv_snapshot = curr_bg_recvtotal; | |
449 | } | |
450 | } | |
451 | #endif /* TRAFFIC_MGT */ | |
452 | ||
453 | for (i = 1; i < TCPT_NTIMERS; i++) { | |
454 | if (tp->t_timer[i] != 0) { | |
455 | tp->t_timer[i] -= TCP_RETRANSHZ/PR_SLOWHZ; | |
456 | if (tp->t_timer[i] <= 0) { | |
1c79356b | 457 | #if TCPDEBUG |
2d21ac55 | 458 | ostate = tp->t_state; |
1c79356b | 459 | #endif |
2d21ac55 A |
460 | |
461 | tp->t_timer[i] = 0; /* account for granularity change between tcp_now and slowtimo */ | |
462 | tp = tcp_timers(tp, i); | |
463 | if (tp == NULL) | |
464 | goto tpgone; | |
1c79356b | 465 | #if TCPDEBUG |
2d21ac55 A |
466 | if (tp->t_inpcb->inp_socket->so_options |
467 | & SO_DEBUG) | |
468 | tcp_trace(TA_USER, ostate, tp, | |
469 | (void *)0, | |
470 | (struct tcphdr *)0, | |
471 | PRU_SLOWTIMO); | |
1c79356b | 472 | #endif |
2d21ac55 | 473 | } |
1c79356b A |
474 | } |
475 | } | |
1c79356b | 476 | tpgone: |
91447636 | 477 | tcp_unlock(so, 1, 0); |
1c79356b | 478 | } |
2d21ac55 A |
479 | |
480 | if (bg_cnt > 3) | |
481 | bg_cnt = 0; | |
1c79356b | 482 | |
2d21ac55 A |
483 | /* Second part of tcp_slowtimo: garbage collect socket/tcpcb |
484 | * We need to acquire the list lock exclusively to do this | |
485 | */ | |
486 | ||
487 | if (lck_rw_lock_shared_to_exclusive(pcbinfo->mtx) == FALSE) { | |
488 | if (tcp_gc_done == TRUE) { /* don't sweat it this time. cleanup was done last time */ | |
489 | tcp_gc_done = FALSE; | |
490 | KERNEL_DEBUG(DBG_FNC_TCP_SLOW | DBG_FUNC_END, tws_checked, cur_tw_slot,0,0,0); | |
491 | return; /* Upgrade failed and lost lock - give up this time. */ | |
492 | } | |
493 | lck_rw_lock_exclusive(pcbinfo->mtx); /* Upgrade failed, lost lock now take it again exclusive */ | |
494 | } | |
495 | tcp_gc_done = TRUE; | |
1c79356b A |
496 | |
497 | /* | |
498 | * Process the items in the current time-wait slot | |
499 | */ | |
2d21ac55 A |
500 | #if KDEBUG |
501 | tws_checked = 0; | |
502 | #endif | |
503 | KERNEL_DEBUG(DBG_FNC_TCP_SLOW | DBG_FUNC_NONE, tws_checked,0,0,0,0); | |
1c79356b | 504 | |
2d21ac55 | 505 | LIST_FOREACH(inp, &time_wait_slots[cur_tw_slot], inp_list) { |
1c79356b A |
506 | #if KDEBUG |
507 | tws_checked++; | |
508 | #endif | |
91447636 A |
509 | |
510 | if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) | |
511 | continue; | |
512 | ||
513 | tcp_lock(inp->inp_socket, 1, 0); | |
514 | ||
515 | if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) | |
516 | goto twunlock; | |
517 | ||
518 | tp = intotcpcb(inp); | |
2d21ac55 | 519 | if (tp == NULL) /* tp already closed, remove from list */ |
91447636 | 520 | goto twunlock; |
2d21ac55 | 521 | |
1c79356b A |
522 | if (tp->t_timer[TCPT_2MSL] >= N_TIME_WAIT_SLOTS) { |
523 | tp->t_timer[TCPT_2MSL] -= N_TIME_WAIT_SLOTS; | |
9bccf70c | 524 | tp->t_rcvtime += N_TIME_WAIT_SLOTS; |
1c79356b A |
525 | } |
526 | else | |
527 | tp->t_timer[TCPT_2MSL] = 0; | |
528 | ||
2d21ac55 A |
529 | if (tp->t_timer[TCPT_2MSL] == 0) { |
530 | ||
531 | /* That pcb is ready for a close */ | |
532 | tcp_free_sackholes(tp); | |
533 | tp = tcp_close(tp); | |
534 | } | |
91447636 A |
535 | twunlock: |
536 | tcp_unlock(inp->inp_socket, 1, 0); | |
537 | } | |
538 | ||
91447636 | 539 | |
4a3eedf9 | 540 | LIST_FOREACH_SAFE(inp, &tcb, inp_list, nxt) { |
2d21ac55 | 541 | tcp_garbage_collect(inp, 0); |
1c79356b A |
542 | } |
543 | ||
91447636 | 544 | /* Now cleanup the time wait ones */ |
4a3eedf9 | 545 | LIST_FOREACH_SAFE(inp, &time_wait_slots[cur_tw_slot], inp_list, nxt) { |
2d21ac55 | 546 | tcp_garbage_collect(inp, 1); |
91447636 A |
547 | } |
548 | ||
1c79356b A |
549 | if (++cur_tw_slot >= N_TIME_WAIT_SLOTS) |
550 | cur_tw_slot = 0; | |
91447636 A |
551 | |
552 | lck_rw_done(pcbinfo->mtx); | |
1c79356b A |
553 | KERNEL_DEBUG(DBG_FNC_TCP_SLOW | DBG_FUNC_END, tws_checked, cur_tw_slot,0,0,0); |
554 | } | |
555 | ||
556 | /* | |
557 | * Cancel all timers for TCP tp. | |
558 | */ | |
559 | void | |
560 | tcp_canceltimers(tp) | |
561 | struct tcpcb *tp; | |
562 | { | |
563 | register int i; | |
564 | ||
565 | for (i = 0; i < TCPT_NTIMERS; i++) | |
566 | tp->t_timer[i] = 0; | |
567 | } | |
568 | ||
9bccf70c A |
569 | int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] = |
570 | { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 }; | |
571 | ||
1c79356b A |
572 | int tcp_backoff[TCP_MAXRXTSHIFT + 1] = |
573 | { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 }; | |
574 | ||
575 | static int tcp_totbackoff = 511; /* sum of tcp_backoff[] */ | |
576 | ||
577 | /* | |
578 | * TCP timer processing. | |
579 | */ | |
580 | struct tcpcb * | |
581 | tcp_timers(tp, timer) | |
582 | register struct tcpcb *tp; | |
583 | int timer; | |
584 | { | |
585 | register int rexmt; | |
586 | struct socket *so_tmp; | |
2d21ac55 | 587 | struct inpcbinfo *pcbinfo = &tcbinfo; |
9bccf70c A |
588 | struct tcptemp *t_template; |
589 | ||
55e303ae A |
590 | #if TCPDEBUG |
591 | int ostate; | |
592 | #endif | |
593 | ||
1c79356b A |
594 | #if INET6 |
595 | int isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV4) == 0; | |
596 | #endif /* INET6 */ | |
597 | ||
91447636 | 598 | so_tmp = tp->t_inpcb->inp_socket; |
9bccf70c | 599 | |
1c79356b A |
600 | switch (timer) { |
601 | ||
602 | /* | |
603 | * 2 MSL timeout in shutdown went off. If we're closed but | |
604 | * still waiting for peer to close and connection has been idle | |
2d21ac55 A |
605 | * too long, or if 2MSL time is up from TIME_WAIT or FIN_WAIT_2, |
606 | * delete connection control block. | |
607 | * Otherwise, (this case shouldn't happen) check again in a bit | |
608 | * we keep the socket in the main list in that case. | |
1c79356b A |
609 | */ |
610 | case TCPT_2MSL: | |
8ad349bb | 611 | tcp_free_sackholes(tp); |
1c79356b | 612 | if (tp->t_state != TCPS_TIME_WAIT && |
2d21ac55 A |
613 | tp->t_state != TCPS_FIN_WAIT_2 && |
614 | tp->t_rcvtime < tcp_maxidle) { | |
91447636 | 615 | tp->t_timer[TCPT_2MSL] = (unsigned long)tcp_keepintvl; |
1c79356b | 616 | } |
91447636 | 617 | else { |
1c79356b | 618 | tp = tcp_close(tp); |
91447636 A |
619 | return(tp); |
620 | } | |
1c79356b A |
621 | break; |
622 | ||
623 | /* | |
624 | * Retransmission timer went off. Message has not | |
625 | * been acked within retransmit interval. Back off | |
626 | * to a longer retransmit interval and retransmit one segment. | |
627 | */ | |
628 | case TCPT_REXMT: | |
8ad349bb | 629 | tcp_free_sackholes(tp); |
1c79356b A |
630 | if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { |
631 | tp->t_rxtshift = TCP_MAXRXTSHIFT; | |
632 | tcpstat.tcps_timeoutdrop++; | |
1c79356b A |
633 | tp = tcp_drop(tp, tp->t_softerror ? |
634 | tp->t_softerror : ETIMEDOUT); | |
635 | postevent(so_tmp, 0, EV_TIMEOUT); | |
636 | break; | |
637 | } | |
9bccf70c A |
638 | |
639 | if (tp->t_rxtshift == 1) { | |
640 | /* | |
641 | * first retransmit; record ssthresh and cwnd so they can | |
642 | * be recovered if this turns out to be a "bad" retransmit. | |
643 | * A retransmit is considered "bad" if an ACK for this | |
644 | * segment is received within RTT/2 interval; the assumption | |
645 | * here is that the ACK was already in flight. See | |
646 | * "On Estimating End-to-End Network Path Properties" by | |
647 | * Allman and Paxson for more details. | |
648 | */ | |
649 | tp->snd_cwnd_prev = tp->snd_cwnd; | |
650 | tp->snd_ssthresh_prev = tp->snd_ssthresh; | |
8ad349bb A |
651 | tp->snd_recover_prev = tp->snd_recover; |
652 | if (IN_FASTRECOVERY(tp)) | |
653 | tp->t_flags |= TF_WASFRECOVERY; | |
654 | else | |
655 | tp->t_flags &= ~TF_WASFRECOVERY; | |
2d21ac55 | 656 | tp->t_badrxtwin = tcp_now + (tp->t_srtt >> (TCP_RTT_SHIFT)); |
9bccf70c | 657 | } |
1c79356b | 658 | tcpstat.tcps_rexmttimeo++; |
9bccf70c A |
659 | if (tp->t_state == TCPS_SYN_SENT) |
660 | rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift]; | |
661 | else | |
662 | rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; | |
1c79356b | 663 | TCPT_RANGESET(tp->t_rxtcur, rexmt, |
9bccf70c | 664 | tp->t_rttmin, TCPTV_REXMTMAX); |
1c79356b | 665 | tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; |
9bccf70c A |
666 | |
667 | /* | |
668 | * Disable rfc1323 and rfc1644 if we havn't got any response to | |
669 | * our third SYN to work-around some broken terminal servers | |
670 | * (most of which have hopefully been retired) that have bad VJ | |
671 | * header compression code which trashes TCP segments containing | |
672 | * unknown-to-them TCP options. | |
673 | */ | |
674 | if ((tp->t_state == TCPS_SYN_SENT) && (tp->t_rxtshift == 3)) | |
675 | tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_REQ_CC); | |
1c79356b A |
676 | /* |
677 | * If losing, let the lower level know and try for | |
678 | * a better route. Also, if we backed off this far, | |
679 | * our srtt estimate is probably bogus. Clobber it | |
680 | * so we'll take the next rtt measurement as our srtt; | |
681 | * move the current srtt into rttvar to keep the current | |
682 | * retransmit times until then. | |
683 | */ | |
684 | if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { | |
685 | #if INET6 | |
686 | if (isipv6) | |
687 | in6_losing(tp->t_inpcb); | |
688 | else | |
689 | #endif /* INET6 */ | |
690 | in_losing(tp->t_inpcb); | |
691 | tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); | |
692 | tp->t_srtt = 0; | |
693 | } | |
694 | tp->snd_nxt = tp->snd_una; | |
9bccf70c A |
695 | /* |
696 | * Note: We overload snd_recover to function also as the | |
697 | * snd_last variable described in RFC 2582 | |
698 | */ | |
699 | tp->snd_recover = tp->snd_max; | |
1c79356b A |
700 | /* |
701 | * Force a segment to be sent. | |
702 | */ | |
703 | tp->t_flags |= TF_ACKNOW; | |
704 | /* | |
705 | * If timing a segment in this window, stop the timer. | |
706 | */ | |
9bccf70c | 707 | tp->t_rtttime = 0; |
1c79356b A |
708 | /* |
709 | * Close the congestion window down to one segment | |
710 | * (we'll open it by one segment for each ack we get). | |
711 | * Since we probably have a window's worth of unacked | |
712 | * data accumulated, this "slow start" keeps us from | |
713 | * dumping all that data as back-to-back packets (which | |
714 | * might overwhelm an intermediate gateway). | |
715 | * | |
716 | * There are two phases to the opening: Initially we | |
717 | * open by one mss on each ack. This makes the window | |
718 | * size increase exponentially with time. If the | |
719 | * window is larger than the path can handle, this | |
720 | * exponential growth results in dropped packet(s) | |
721 | * almost immediately. To get more time between | |
722 | * drops but still "push" the network to take advantage | |
723 | * of improving conditions, we switch from exponential | |
724 | * to linear window opening at some threshhold size. | |
725 | * For a threshhold, we use half the current window | |
726 | * size, truncated to a multiple of the mss. | |
727 | * | |
728 | * (the minimum cwnd that will give us exponential | |
729 | * growth is 2 mss. We don't allow the threshhold | |
730 | * to go below this.) | |
731 | */ | |
cf7d32b8 A |
732 | if (tp->t_state >= TCPS_ESTABLISHED) { |
733 | u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; | |
734 | if (win < 2) | |
735 | win = 2; | |
736 | tp->snd_cwnd = tp->t_maxseg; | |
737 | tp->snd_ssthresh = win * tp->t_maxseg; | |
738 | tp->t_bytes_acked = 0; | |
739 | tp->t_dupacks = 0; | |
740 | tp->t_unacksegs = 0; | |
1c79356b | 741 | } |
8ad349bb | 742 | EXIT_FASTRECOVERY(tp); |
1c79356b A |
743 | (void) tcp_output(tp); |
744 | break; | |
745 | ||
746 | /* | |
747 | * Persistance timer into zero window. | |
748 | * Force a byte to be output, if possible. | |
749 | */ | |
750 | case TCPT_PERSIST: | |
751 | tcpstat.tcps_persisttimeo++; | |
752 | /* | |
753 | * Hack: if the peer is dead/unreachable, we do not | |
754 | * time out if the window is closed. After a full | |
755 | * backoff, drop the connection if the idle time | |
756 | * (no responses to probes) reaches the maximum | |
757 | * backoff that we would use if retransmitting. | |
758 | */ | |
759 | if (tp->t_rxtshift == TCP_MAXRXTSHIFT && | |
9bccf70c A |
760 | (tp->t_rcvtime >= tcp_maxpersistidle || |
761 | tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { | |
1c79356b A |
762 | tcpstat.tcps_persistdrop++; |
763 | so_tmp = tp->t_inpcb->inp_socket; | |
764 | tp = tcp_drop(tp, ETIMEDOUT); | |
765 | postevent(so_tmp, 0, EV_TIMEOUT); | |
766 | break; | |
767 | } | |
768 | tcp_setpersist(tp); | |
769 | tp->t_force = 1; | |
2d21ac55 | 770 | tp->t_unacksegs = 0; |
1c79356b A |
771 | (void) tcp_output(tp); |
772 | tp->t_force = 0; | |
773 | break; | |
774 | ||
775 | /* | |
776 | * Keep-alive timer went off; send something | |
777 | * or drop connection if idle for too long. | |
778 | */ | |
779 | case TCPT_KEEP: | |
780 | tcpstat.tcps_keeptimeo++; | |
781 | if (tp->t_state < TCPS_ESTABLISHED) | |
782 | goto dropit; | |
783 | if ((always_keepalive || | |
784 | tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) && | |
2d21ac55 | 785 | (tp->t_state <= TCPS_CLOSING || tp->t_state == TCPS_FIN_WAIT_2)) { |
91447636 | 786 | if (tp->t_rcvtime >= TCP_KEEPIDLE(tp) + (unsigned long)tcp_maxidle) |
1c79356b A |
787 | goto dropit; |
788 | /* | |
789 | * Send a packet designed to force a response | |
790 | * if the peer is up and reachable: | |
791 | * either an ACK if the connection is still alive, | |
792 | * or an RST if the peer has closed the connection | |
793 | * due to timeout or reboot. | |
794 | * Using sequence number tp->snd_una-1 | |
795 | * causes the transmitted zero-length segment | |
796 | * to lie outside the receive window; | |
797 | * by the protocol spec, this requires the | |
798 | * correspondent TCP to respond. | |
799 | */ | |
800 | tcpstat.tcps_keepprobe++; | |
9bccf70c A |
801 | t_template = tcp_maketemplate(tp); |
802 | if (t_template) { | |
c910b4d9 A |
803 | unsigned int ifscope; |
804 | ||
805 | if (tp->t_inpcb->inp_flags & INP_BOUND_IF) | |
806 | ifscope = tp->t_inpcb->inp_boundif; | |
807 | else | |
808 | ifscope = IFSCOPE_NONE; | |
809 | ||
9bccf70c A |
810 | tcp_respond(tp, t_template->tt_ipgen, |
811 | &t_template->tt_t, (struct mbuf *)NULL, | |
c910b4d9 | 812 | tp->rcv_nxt, tp->snd_una - 1, 0, ifscope); |
9bccf70c A |
813 | (void) m_free(dtom(t_template)); |
814 | } | |
1c79356b A |
815 | tp->t_timer[TCPT_KEEP] = tcp_keepintvl; |
816 | } else | |
55e303ae | 817 | tp->t_timer[TCPT_KEEP] = TCP_KEEPIDLE(tp); |
1c79356b | 818 | break; |
9bccf70c A |
819 | |
820 | #if TCPDEBUG | |
821 | if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) | |
822 | tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, | |
823 | PRU_SLOWTIMO); | |
824 | #endif | |
1c79356b A |
825 | dropit: |
826 | tcpstat.tcps_keepdrops++; | |
1c79356b A |
827 | tp = tcp_drop(tp, ETIMEDOUT); |
828 | postevent(so_tmp, 0, EV_TIMEOUT); | |
829 | break; | |
830 | } | |
831 | return (tp); | |
832 | } |