]>
Commit | Line | Data |
---|---|---|
fe8ab488 | 1 | /* |
cb323159 | 2 | * Copyright (c) 2013-2018 Apple Inc. All rights reserved. |
fe8ab488 A |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | #include <sys/param.h> | |
30 | #include <sys/systm.h> | |
31 | #include <sys/kernel.h> | |
32 | #include <sys/syslog.h> | |
33 | #include <sys/protosw.h> | |
34 | #include <sys/socketvar.h> | |
35 | #include <sys/kern_control.h> | |
36 | #include <sys/domain.h> | |
37 | ||
38 | #include <netinet/in.h> | |
39 | #include <netinet/tcp.h> | |
40 | #include <netinet/tcp_var.h> | |
41 | #include <netinet/tcp_cc.h> | |
42 | #include <mach/sdt.h> | |
43 | #include <libkern/OSAtomic.h> | |
44 | ||
f427ee49 A |
45 | static int tcp_cc_debug; |
46 | SYSCTL_INT(_net_inet_tcp, OID_AUTO, cc_debug, CTLFLAG_RW | CTLFLAG_LOCKED, | |
47 | &tcp_cc_debug, 0, "Enable debug data collection"); | |
fe8ab488 A |
48 | |
49 | extern struct tcp_cc_algo tcp_cc_newreno; | |
50 | SYSCTL_INT(_net_inet_tcp, OID_AUTO, newreno_sockets, | |
0a7de745 A |
51 | CTLFLAG_RD | CTLFLAG_LOCKED, &tcp_cc_newreno.num_sockets, |
52 | 0, "Number of sockets using newreno"); | |
fe8ab488 A |
53 | |
54 | extern struct tcp_cc_algo tcp_cc_ledbat; | |
55 | SYSCTL_INT(_net_inet_tcp, OID_AUTO, background_sockets, | |
0a7de745 A |
56 | CTLFLAG_RD | CTLFLAG_LOCKED, &tcp_cc_ledbat.num_sockets, |
57 | 0, "Number of sockets using background transport"); | |
fe8ab488 A |
58 | |
59 | extern struct tcp_cc_algo tcp_cc_cubic; | |
60 | SYSCTL_INT(_net_inet_tcp, OID_AUTO, cubic_sockets, | |
0a7de745 A |
61 | CTLFLAG_RD | CTLFLAG_LOCKED, &tcp_cc_cubic.num_sockets, |
62 | 0, "Number of sockets using cubic"); | |
fe8ab488 | 63 | |
5ba3f43e | 64 | SYSCTL_SKMEM_TCP_INT(OID_AUTO, use_newreno, |
0a7de745 A |
65 | CTLFLAG_RW | CTLFLAG_LOCKED, int, tcp_use_newreno, 0, |
66 | "Use TCP NewReno by default"); | |
fe8ab488 | 67 | |
3e170ce0 A |
68 | static int tcp_check_cwnd_nonvalidated = 1; |
69 | #if (DEBUG || DEVELOPMENT) | |
70 | SYSCTL_INT(_net_inet_tcp, OID_AUTO, cwnd_nonvalidated, | |
71 | CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_check_cwnd_nonvalidated, 0, | |
72 | "Check if congestion window is non-validated"); | |
73 | #endif /* (DEBUG || DEVELOPMENT) */ | |
74 | ||
fe8ab488 A |
75 | #define SET_SNDSB_IDEAL_SIZE(sndsb, size) \ |
76 | sndsb->sb_idealsize = min(max(tcp_sendspace, tp->snd_ssthresh), \ | |
77 | tcp_autosndbuf_max); | |
78 | ||
79 | /* Array containing pointers to currently implemented TCP CC algorithms */ | |
80 | struct tcp_cc_algo* tcp_cc_algo_list[TCP_CC_ALGO_COUNT]; | |
81 | struct zone *tcp_cc_zone; | |
82 | ||
fe8ab488 A |
83 | #define TCP_CCDBG_NOUNIT 0xffffffff |
84 | static kern_ctl_ref tcp_ccdbg_ctlref = NULL; | |
85 | volatile UInt32 tcp_ccdbg_unit = TCP_CCDBG_NOUNIT; | |
86 | ||
87 | void tcp_cc_init(void); | |
88 | static void tcp_cc_control_register(void); | |
89 | static errno_t tcp_ccdbg_control_connect(kern_ctl_ref kctl, | |
0a7de745 | 90 | struct sockaddr_ctl *sac, void **uinfo); |
fe8ab488 | 91 | static errno_t tcp_ccdbg_control_disconnect(kern_ctl_ref kctl, |
0a7de745 | 92 | u_int32_t unit, void *uinfo); |
fe8ab488 A |
93 | static struct tcp_cc_algo tcp_cc_algo_none; |
94 | /* | |
95 | * Initialize TCP congestion control algorithms. | |
96 | */ | |
0a7de745 | 97 | |
fe8ab488 A |
98 | void |
99 | tcp_cc_init(void) | |
100 | { | |
101 | bzero(&tcp_cc_algo_list, sizeof(tcp_cc_algo_list)); | |
102 | bzero(&tcp_cc_algo_none, sizeof(tcp_cc_algo_none)); | |
103 | ||
104 | tcp_cc_algo_list[TCP_CC_ALGO_NONE] = &tcp_cc_algo_none; | |
105 | tcp_cc_algo_list[TCP_CC_ALGO_NEWRENO_INDEX] = &tcp_cc_newreno; | |
106 | tcp_cc_algo_list[TCP_CC_ALGO_BACKGROUND_INDEX] = &tcp_cc_ledbat; | |
107 | tcp_cc_algo_list[TCP_CC_ALGO_CUBIC_INDEX] = &tcp_cc_cubic; | |
108 | ||
109 | tcp_cc_control_register(); | |
110 | } | |
111 | ||
112 | static void | |
113 | tcp_cc_control_register(void) | |
114 | { | |
115 | struct kern_ctl_reg ccdbg_control; | |
116 | errno_t err; | |
117 | ||
118 | bzero(&ccdbg_control, sizeof(ccdbg_control)); | |
cb323159 | 119 | strlcpy(ccdbg_control.ctl_name, TCP_CC_CONTROL_NAME, |
fe8ab488 A |
120 | sizeof(ccdbg_control.ctl_name)); |
121 | ccdbg_control.ctl_connect = tcp_ccdbg_control_connect; | |
122 | ccdbg_control.ctl_disconnect = tcp_ccdbg_control_disconnect; | |
123 | ccdbg_control.ctl_flags |= CTL_FLAG_PRIVILEGED; | |
124 | ccdbg_control.ctl_flags |= CTL_FLAG_REG_SOCK_STREAM; | |
cb323159 | 125 | ccdbg_control.ctl_sendsize = 32 * 1024; |
fe8ab488 A |
126 | |
127 | err = ctl_register(&ccdbg_control, &tcp_ccdbg_ctlref); | |
128 | if (err != 0) { | |
129 | log(LOG_ERR, "failed to register tcp_cc debug control"); | |
130 | } | |
131 | } | |
132 | ||
133 | /* Allow only one socket to connect at any time for debugging */ | |
134 | static errno_t | |
135 | tcp_ccdbg_control_connect(kern_ctl_ref kctl, struct sockaddr_ctl *sac, | |
0a7de745 | 136 | void **uinfo) |
fe8ab488 A |
137 | { |
138 | #pragma unused(kctl) | |
139 | #pragma unused(uinfo) | |
140 | ||
141 | UInt32 old_value = TCP_CCDBG_NOUNIT; | |
142 | UInt32 new_value = sac->sc_unit; | |
143 | ||
0a7de745 A |
144 | if (tcp_ccdbg_unit != old_value) { |
145 | return EALREADY; | |
146 | } | |
fe8ab488 | 147 | |
0a7de745 A |
148 | if (OSCompareAndSwap(old_value, new_value, &tcp_ccdbg_unit)) { |
149 | return 0; | |
150 | } else { | |
151 | return EALREADY; | |
152 | } | |
fe8ab488 A |
153 | } |
154 | ||
155 | static errno_t | |
156 | tcp_ccdbg_control_disconnect(kern_ctl_ref kctl, u_int32_t unit, void *uinfo) | |
157 | { | |
158 | #pragma unused(kctl, unit, uinfo) | |
159 | ||
160 | if (unit == tcp_ccdbg_unit) { | |
161 | UInt32 old_value = tcp_ccdbg_unit; | |
162 | UInt32 new_value = TCP_CCDBG_NOUNIT; | |
0a7de745 A |
163 | if (tcp_ccdbg_unit == new_value) { |
164 | return 0; | |
165 | } | |
fe8ab488 A |
166 | |
167 | if (!OSCompareAndSwap(old_value, new_value, | |
0a7de745 A |
168 | &tcp_ccdbg_unit)) { |
169 | log(LOG_DEBUG, | |
fe8ab488 | 170 | "failed to disconnect tcp_cc debug control"); |
0a7de745 | 171 | } |
fe8ab488 | 172 | } |
0a7de745 | 173 | return 0; |
fe8ab488 A |
174 | } |
175 | ||
176 | inline void | |
177 | tcp_ccdbg_trace(struct tcpcb *tp, struct tcphdr *th, int32_t event) | |
178 | { | |
179 | #if !CONFIG_DTRACE | |
180 | #pragma unused(th) | |
181 | #endif /* !CONFIG_DTRACE */ | |
182 | struct inpcb *inp = tp->t_inpcb; | |
183 | ||
184 | if (tcp_cc_debug && tcp_ccdbg_unit > 0) { | |
185 | struct tcp_cc_debug_state dbg_state; | |
186 | struct timespec tv; | |
187 | ||
188 | bzero(&dbg_state, sizeof(dbg_state)); | |
0a7de745 | 189 | |
fe8ab488 A |
190 | nanotime(&tv); |
191 | /* Take time in seconds */ | |
192 | dbg_state.ccd_tsns = (tv.tv_sec * 1000000000) + tv.tv_nsec; | |
0a7de745 | 193 | inet_ntop(SOCK_DOM(inp->inp_socket), |
fe8ab488 A |
194 | ((SOCK_DOM(inp->inp_socket) == PF_INET) ? |
195 | (void *)&inp->inp_laddr.s_addr : | |
196 | (void *)&inp->in6p_laddr), dbg_state.ccd_srcaddr, | |
197 | sizeof(dbg_state.ccd_srcaddr)); | |
198 | dbg_state.ccd_srcport = ntohs(inp->inp_lport); | |
199 | inet_ntop(SOCK_DOM(inp->inp_socket), | |
200 | ((SOCK_DOM(inp->inp_socket) == PF_INET) ? | |
201 | (void *)&inp->inp_faddr.s_addr : | |
202 | (void *)&inp->in6p_faddr), dbg_state.ccd_destaddr, | |
203 | sizeof(dbg_state.ccd_destaddr)); | |
204 | dbg_state.ccd_destport = ntohs(inp->inp_fport); | |
205 | ||
206 | dbg_state.ccd_snd_cwnd = tp->snd_cwnd; | |
207 | dbg_state.ccd_snd_wnd = tp->snd_wnd; | |
208 | dbg_state.ccd_snd_ssthresh = tp->snd_ssthresh; | |
3e170ce0 | 209 | dbg_state.ccd_pipeack = tp->t_pipeack; |
fe8ab488 A |
210 | dbg_state.ccd_rttcur = tp->t_rttcur; |
211 | dbg_state.ccd_rxtcur = tp->t_rxtcur; | |
212 | dbg_state.ccd_srtt = tp->t_srtt >> TCP_RTT_SHIFT; | |
213 | dbg_state.ccd_event = event; | |
214 | dbg_state.ccd_sndcc = inp->inp_socket->so_snd.sb_cc; | |
215 | dbg_state.ccd_sndhiwat = inp->inp_socket->so_snd.sb_hiwat; | |
216 | dbg_state.ccd_bytes_acked = tp->t_bytes_acked; | |
ecc0ceb4 | 217 | dbg_state.ccd_cc_index = tp->tcp_cc_index; |
fe8ab488 | 218 | switch (tp->tcp_cc_index) { |
0a7de745 | 219 | case TCP_CC_ALGO_CUBIC_INDEX: |
fe8ab488 A |
220 | dbg_state.u.cubic_state.ccd_last_max = |
221 | tp->t_ccstate->cub_last_max; | |
222 | dbg_state.u.cubic_state.ccd_tcp_win = | |
223 | tp->t_ccstate->cub_tcp_win; | |
fe8ab488 A |
224 | dbg_state.u.cubic_state.ccd_avg_lastmax = |
225 | tp->t_ccstate->cub_avg_lastmax; | |
226 | dbg_state.u.cubic_state.ccd_mean_deviation = | |
227 | tp->t_ccstate->cub_mean_dev; | |
228 | break; | |
0a7de745 | 229 | case TCP_CC_ALGO_BACKGROUND_INDEX: |
ecc0ceb4 A |
230 | dbg_state.u.ledbat_state.led_base_rtt = |
231 | get_base_rtt(tp); | |
232 | break; | |
0a7de745 | 233 | default: |
fe8ab488 A |
234 | break; |
235 | } | |
236 | ||
237 | ctl_enqueuedata(tcp_ccdbg_ctlref, tcp_ccdbg_unit, | |
0a7de745 | 238 | &dbg_state, sizeof(dbg_state), 0); |
fe8ab488 A |
239 | } |
240 | DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp, | |
0a7de745 | 241 | struct tcpcb *, tp, struct tcphdr *, th, int32_t, event); |
fe8ab488 A |
242 | } |
243 | ||
0a7de745 A |
244 | void |
245 | tcp_cc_resize_sndbuf(struct tcpcb *tp) | |
fe8ab488 A |
246 | { |
247 | struct sockbuf *sb; | |
248 | /* | |
249 | * If the send socket buffer size is bigger than ssthresh, | |
250 | * it is time to trim it because we do not want to hold | |
251 | * too many mbufs in the socket buffer | |
252 | */ | |
253 | sb = &tp->t_inpcb->inp_socket->so_snd; | |
254 | if (sb->sb_hiwat > tp->snd_ssthresh && | |
0a7de745 | 255 | (sb->sb_flags & SB_AUTOSIZE)) { |
fe8ab488 A |
256 | if (sb->sb_idealsize > tp->snd_ssthresh) { |
257 | SET_SNDSB_IDEAL_SIZE(sb, tp->snd_ssthresh); | |
258 | } | |
259 | sb->sb_flags |= SB_TRIM; | |
260 | } | |
261 | } | |
262 | ||
0a7de745 A |
263 | void |
264 | tcp_bad_rexmt_fix_sndbuf(struct tcpcb *tp) | |
fe8ab488 A |
265 | { |
266 | struct sockbuf *sb; | |
267 | sb = &tp->t_inpcb->inp_socket->so_snd; | |
0a7de745 | 268 | if ((sb->sb_flags & (SB_TRIM | SB_AUTOSIZE)) == (SB_TRIM | SB_AUTOSIZE)) { |
fe8ab488 | 269 | /* |
5ba3f43e | 270 | * If there was a retransmission that was not necessary |
fe8ab488 A |
271 | * then the size of socket buffer can be restored to |
272 | * what it was before | |
273 | */ | |
274 | SET_SNDSB_IDEAL_SIZE(sb, tp->snd_ssthresh); | |
275 | if (sb->sb_hiwat <= sb->sb_idealsize) { | |
276 | sbreserve(sb, sb->sb_idealsize); | |
277 | sb->sb_flags &= ~SB_TRIM; | |
278 | } | |
279 | } | |
280 | } | |
281 | ||
282 | /* | |
283 | * Calculate initial cwnd according to RFC3390. | |
fe8ab488 A |
284 | */ |
285 | void | |
286 | tcp_cc_cwnd_init_or_reset(struct tcpcb *tp) | |
287 | { | |
288 | if (tp->t_flags & TF_LOCAL) { | |
289 | tp->snd_cwnd = tp->t_maxseg * ss_fltsz_local; | |
290 | } else { | |
f427ee49 A |
291 | if (tcp_cubic_minor_fixes) { |
292 | tp->snd_cwnd = tcp_initial_cwnd(tp); | |
293 | } else { | |
294 | /* initial congestion window according to RFC 3390 */ | |
fe8ab488 | 295 | tp->snd_cwnd = min(4 * tp->t_maxseg, |
0a7de745 | 296 | max(2 * tp->t_maxseg, TCP_CC_CWND_INIT_BYTES)); |
0a7de745 | 297 | } |
fe8ab488 A |
298 | } |
299 | } | |
300 | ||
301 | /* | |
302 | * Indicate whether this ack should be delayed. | |
303 | * Here is the explanation for different settings of tcp_delack_enabled: | |
cb323159 | 304 | * - when set to 1, the behavior is same as when set to 2. We kept this |
fe8ab488 A |
305 | * for binary compatibility. |
306 | * - when set to 2, will "ack every other packet" | |
307 | * - if our last ack wasn't a 0-sized window. | |
0a7de745 A |
308 | * - if the peer hasn't sent us a TH_PUSH data packet (radar 3649245). |
309 | * If TH_PUSH is set, take this as a clue that we need to ACK | |
310 | * with no delay. This helps higher level protocols who | |
311 | * won't send us more data even if the window is open | |
fe8ab488 | 312 | * because their last "segment" hasn't been ACKed |
0a7de745 A |
313 | * - when set to 3, will do "streaming detection" |
314 | * - if we receive more than "maxseg_unacked" full packets | |
fe8ab488 | 315 | * in the last 100ms |
0a7de745 | 316 | * - if the connection is not in slow-start or idle or |
fe8ab488 A |
317 | * loss/recovery states |
318 | * - if those criteria aren't met, it will ack every other packet. | |
319 | */ | |
320 | int | |
321 | tcp_cc_delay_ack(struct tcpcb *tp, struct tcphdr *th) | |
322 | { | |
fe8ab488 | 323 | switch (tcp_delack_enabled) { |
0a7de745 A |
324 | case 1: |
325 | case 2: | |
fe8ab488 A |
326 | if ((tp->t_flags & TF_RXWIN0SENT) == 0 && |
327 | (th->th_flags & TH_PUSH) == 0 && | |
0a7de745 A |
328 | (tp->t_unacksegs == 1)) { |
329 | return 1; | |
330 | } | |
331 | break; | |
332 | case 3: | |
f427ee49 A |
333 | if (tcp_ack_strategy == TCP_ACK_STRATEGY_LEGACY) { |
334 | if ((tp->t_flags & TF_RXWIN0SENT) == 0 && | |
335 | (th->th_flags & TH_PUSH) == 0 && | |
336 | ((tp->t_unacksegs == 1) || | |
337 | ((tp->t_flags & TF_STRETCHACK) && | |
338 | tp->t_unacksegs < maxseg_unacked))) { | |
339 | return 1; | |
340 | } | |
341 | } else { | |
342 | uint32_t recwin; | |
343 | ||
344 | /* Get the receive-window we would announce */ | |
345 | recwin = tcp_sbspace(tp); | |
346 | if (recwin > (uint32_t)(TCP_MAXWIN << tp->rcv_scale)) { | |
347 | recwin = (uint32_t)(TCP_MAXWIN << tp->rcv_scale); | |
348 | } | |
349 | ||
350 | /* Delay ACK, if: | |
351 | * | |
352 | * 1. We are not sending a zero-window | |
353 | * 2. We are not forcing fast ACKs | |
354 | * 3. We have more than the low-water mark in receive-buffer | |
355 | * 4. The receive-window is not increasing | |
356 | * 5. We have less than or equal of an MSS unacked or | |
357 | * Window actually has been growing larger than the initial value by half of it. | |
358 | * (this makes sure that during ramp-up we ACK every second MSS | |
359 | * until we pass the tcp_recvspace * 1.5-threshold) | |
360 | * 6. We haven't waited for half a BDP | |
361 | * | |
362 | * (a note on 6: The receive-window is | |
363 | * roughly 2 BDP. Thus, recwin / 4 means half a BDP and | |
364 | * thus we enforce an ACK roughly twice per RTT - even | |
365 | * if the app does not read) | |
366 | */ | |
367 | if ((tp->t_flags & TF_RXWIN0SENT) == 0 && | |
368 | tp->t_forced_acks == 0 && | |
369 | tp->t_inpcb->inp_socket->so_rcv.sb_cc > tp->t_inpcb->inp_socket->so_rcv.sb_lowat && | |
370 | recwin <= tp->t_last_recwin && | |
371 | (tp->rcv_nxt - tp->last_ack_sent <= tp->t_maxseg || | |
372 | recwin > (uint32_t)(tcp_recvspace + (tcp_recvspace >> 1))) && | |
373 | (tp->rcv_nxt - tp->last_ack_sent) < (recwin >> 2)) { | |
374 | tp->t_stat.acks_delayed++; | |
375 | return 1; | |
376 | } | |
0a7de745 | 377 | } |
fe8ab488 A |
378 | break; |
379 | } | |
0a7de745 | 380 | return 0; |
fe8ab488 A |
381 | } |
382 | ||
383 | void | |
384 | tcp_cc_allocate_state(struct tcpcb *tp) | |
385 | { | |
386 | if (tp->tcp_cc_index == TCP_CC_ALGO_CUBIC_INDEX && | |
0a7de745 | 387 | tp->t_ccstate == NULL) { |
fe8ab488 A |
388 | tp->t_ccstate = (struct tcp_ccstate *)zalloc(tcp_cc_zone); |
389 | ||
390 | /* | |
391 | * If we could not allocate memory for congestion control | |
392 | * state, revert to using TCP NewReno as it does not | |
393 | * require any state | |
394 | */ | |
0a7de745 | 395 | if (tp->t_ccstate == NULL) { |
fe8ab488 | 396 | tp->tcp_cc_index = TCP_CC_ALGO_NEWRENO_INDEX; |
0a7de745 | 397 | } else { |
fe8ab488 | 398 | bzero(tp->t_ccstate, sizeof(*tp->t_ccstate)); |
0a7de745 | 399 | } |
fe8ab488 A |
400 | } |
401 | } | |
402 | ||
403 | /* | |
0a7de745 | 404 | * If stretch ack was disabled automatically on long standing connections, |
fe8ab488 A |
405 | * re-evaluate the situation after 15 minutes to enable it. |
406 | */ | |
0a7de745 | 407 | #define TCP_STRETCHACK_DISABLE_WIN (15 * 60 * TCP_RETRANSHZ) |
fe8ab488 A |
408 | void |
409 | tcp_cc_after_idle_stretchack(struct tcpcb *tp) | |
410 | { | |
411 | int32_t tdiff; | |
412 | ||
0a7de745 | 413 | if (!(tp->t_flagsext & TF_DISABLE_STRETCHACK)) { |
fe8ab488 | 414 | return; |
0a7de745 | 415 | } |
fe8ab488 A |
416 | |
417 | tdiff = timer_diff(tcp_now, 0, tp->rcv_nostrack_ts, 0); | |
0a7de745 | 418 | if (tdiff < 0) { |
fe8ab488 | 419 | tdiff = -tdiff; |
0a7de745 | 420 | } |
fe8ab488 A |
421 | |
422 | if (tdiff > TCP_STRETCHACK_DISABLE_WIN) { | |
423 | tp->t_flagsext &= ~TF_DISABLE_STRETCHACK; | |
424 | tp->t_stretchack_delayed = 0; | |
425 | ||
426 | tcp_reset_stretch_ack(tp); | |
427 | } | |
428 | } | |
3e170ce0 A |
429 | |
430 | /* | |
431 | * Detect if the congestion window is non-vlidated according to | |
432 | * draft-ietf-tcpm-newcwv-07 | |
433 | */ | |
434 | ||
435 | inline uint32_t | |
436 | tcp_cc_is_cwnd_nonvalidated(struct tcpcb *tp) | |
437 | { | |
5ba3f43e | 438 | struct socket *so = tp->t_inpcb->inp_socket; |
3e170ce0 A |
439 | if (tp->t_pipeack == 0 || tcp_check_cwnd_nonvalidated == 0) { |
440 | tp->t_flagsext &= ~TF_CWND_NONVALIDATED; | |
0a7de745 | 441 | return 0; |
3e170ce0 | 442 | } |
5ba3f43e A |
443 | |
444 | /* | |
445 | * The congestion window is validated if the number of bytes acked | |
446 | * is more than half of the current window or if there is more | |
447 | * data to send in the send socket buffer | |
448 | */ | |
449 | if (tp->t_pipeack >= (tp->snd_cwnd >> 1) || | |
0a7de745 | 450 | (so != NULL && so->so_snd.sb_cc > tp->snd_cwnd)) { |
3e170ce0 | 451 | tp->t_flagsext &= ~TF_CWND_NONVALIDATED; |
0a7de745 | 452 | } else { |
3e170ce0 | 453 | tp->t_flagsext |= TF_CWND_NONVALIDATED; |
0a7de745 A |
454 | } |
455 | return tp->t_flagsext & TF_CWND_NONVALIDATED; | |
3e170ce0 A |
456 | } |
457 | ||
458 | /* | |
459 | * Adjust congestion window in response to congestion in non-validated | |
460 | * phase. | |
461 | */ | |
462 | inline void | |
463 | tcp_cc_adjust_nonvalidated_cwnd(struct tcpcb *tp) | |
464 | { | |
465 | tp->t_pipeack = tcp_get_max_pipeack(tp); | |
466 | tcp_clear_pipeack_state(tp); | |
467 | tp->snd_cwnd = (max(tp->t_pipeack, tp->t_lossflightsize) >> 1); | |
f427ee49 A |
468 | if (tcp_cubic_minor_fixes) { |
469 | tp->snd_cwnd = max(tp->snd_cwnd, tp->t_maxseg); | |
470 | } else { | |
471 | tp->snd_cwnd = max(tp->snd_cwnd, TCP_CC_CWND_INIT_BYTES); | |
472 | } | |
3e170ce0 A |
473 | tp->snd_cwnd += tp->t_maxseg * tcprexmtthresh; |
474 | tp->t_flagsext &= ~TF_CWND_NONVALIDATED; | |
475 | } | |
476 | ||
477 | /* | |
478 | * Return maximum of all the pipeack samples. Since the number of samples | |
479 | * TCP_PIPEACK_SAMPLE_COUNT is 3 at this time, it will be simpler to do | |
480 | * a comparision. We should change ths if the number of samples increases. | |
481 | */ | |
482 | inline u_int32_t | |
483 | tcp_get_max_pipeack(struct tcpcb *tp) | |
484 | { | |
485 | u_int32_t max_pipeack = 0; | |
486 | max_pipeack = (tp->t_pipeack_sample[0] > tp->t_pipeack_sample[1]) ? | |
487 | tp->t_pipeack_sample[0] : tp->t_pipeack_sample[1]; | |
488 | max_pipeack = (tp->t_pipeack_sample[2] > max_pipeack) ? | |
489 | tp->t_pipeack_sample[2] : max_pipeack; | |
490 | ||
0a7de745 | 491 | return max_pipeack; |
3e170ce0 A |
492 | } |
493 | ||
494 | inline void | |
495 | tcp_clear_pipeack_state(struct tcpcb *tp) | |
496 | { | |
497 | bzero(tp->t_pipeack_sample, sizeof(tp->t_pipeack_sample)); | |
498 | tp->t_pipeack_ind = 0; | |
499 | tp->t_lossflightsize = 0; | |
500 | } |