]>
Commit | Line | Data |
---|---|---|
fe8ab488 A |
1 | /* |
2 | * Copyright (c) 2013-2014 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | #include <sys/param.h> | |
29 | #include <sys/systm.h> | |
30 | #include <sys/kernel.h> | |
31 | #include <sys/protosw.h> | |
32 | #include <sys/socketvar.h> | |
33 | #include <sys/syslog.h> | |
34 | ||
35 | #include <net/route.h> | |
36 | #include <netinet/in.h> | |
37 | #include <netinet/in_systm.h> | |
38 | #include <netinet/ip.h> | |
39 | ||
40 | #if INET6 | |
41 | #include <netinet/ip6.h> | |
42 | #endif /* INET6 */ | |
43 | ||
44 | #include <netinet/ip_var.h> | |
45 | #include <netinet/tcp.h> | |
46 | #include <netinet/tcp_timer.h> | |
47 | #include <netinet/tcp_var.h> | |
48 | #include <netinet/tcp_fsm.h> | |
49 | #include <netinet/tcp_var.h> | |
50 | #include <netinet/tcp_cc.h> | |
51 | #include <netinet/tcpip.h> | |
52 | #include <netinet/tcp_seq.h> | |
53 | #include <kern/task.h> | |
54 | #include <libkern/OSAtomic.h> | |
55 | ||
56 | static int tcp_cubic_init(struct tcpcb *tp); | |
57 | static int tcp_cubic_cleanup(struct tcpcb *tp); | |
58 | static void tcp_cubic_cwnd_init_or_reset(struct tcpcb *tp); | |
59 | static void tcp_cubic_congestion_avd(struct tcpcb *tp, struct tcphdr *th); | |
60 | static void tcp_cubic_ack_rcvd(struct tcpcb *tp, struct tcphdr *th); | |
61 | static void tcp_cubic_pre_fr(struct tcpcb *tp); | |
62 | static void tcp_cubic_post_fr(struct tcpcb *tp, struct tcphdr *th); | |
63 | static void tcp_cubic_after_timeout(struct tcpcb *tp); | |
64 | static int tcp_cubic_delay_ack(struct tcpcb *tp, struct tcphdr *th); | |
65 | static void tcp_cubic_switch_cc(struct tcpcb *tp, u_int16_t old_index); | |
66 | static uint32_t tcp_cubic_update(struct tcpcb *tp, u_int32_t rtt); | |
67 | static uint32_t tcp_cubic_tcpwin(struct tcpcb *tp, struct tcphdr *th); | |
68 | static inline void tcp_cubic_clear_state(struct tcpcb *tp); | |
69 | ||
70 | ||
71 | extern float cbrtf(float x); | |
72 | ||
73 | struct tcp_cc_algo tcp_cc_cubic = { | |
74 | .name = "cubic", | |
75 | .init = tcp_cubic_init, | |
76 | .cleanup = tcp_cubic_cleanup, | |
77 | .cwnd_init = tcp_cubic_cwnd_init_or_reset, | |
78 | .congestion_avd = tcp_cubic_congestion_avd, | |
79 | .ack_rcvd = tcp_cubic_ack_rcvd, | |
80 | .pre_fr = tcp_cubic_pre_fr, | |
81 | .post_fr = tcp_cubic_post_fr, | |
82 | .after_idle = tcp_cubic_cwnd_init_or_reset, | |
83 | .after_timeout = tcp_cubic_after_timeout, | |
84 | .delay_ack = tcp_cubic_delay_ack, | |
85 | .switch_to = tcp_cubic_switch_cc | |
86 | }; | |
87 | ||
cb323159 A |
88 | const float tcp_cubic_backoff = 0.2f; /* multiplicative decrease factor */ |
89 | const float tcp_cubic_coeff = 0.4f; | |
90 | const float tcp_cubic_fast_convergence_factor = 0.875f; | |
fe8ab488 | 91 | |
5ba3f43e | 92 | SYSCTL_SKMEM_TCP_INT(OID_AUTO, cubic_tcp_friendliness, CTLFLAG_RW | CTLFLAG_LOCKED, |
0a7de745 | 93 | static int, tcp_cubic_tcp_friendliness, 0, "Enable TCP friendliness"); |
fe8ab488 | 94 | |
5ba3f43e | 95 | SYSCTL_SKMEM_TCP_INT(OID_AUTO, cubic_fast_convergence, CTLFLAG_RW | CTLFLAG_LOCKED, |
0a7de745 | 96 | static int, tcp_cubic_fast_convergence, 0, "Enable fast convergence"); |
fe8ab488 | 97 | |
5ba3f43e | 98 | SYSCTL_SKMEM_TCP_INT(OID_AUTO, cubic_use_minrtt, CTLFLAG_RW | CTLFLAG_LOCKED, |
0a7de745 | 99 | static int, tcp_cubic_use_minrtt, 0, "use a min of 5 sec rtt"); |
fe8ab488 | 100 | |
0a7de745 A |
101 | static int |
102 | tcp_cubic_init(struct tcpcb *tp) | |
fe8ab488 A |
103 | { |
104 | OSIncrementAtomic((volatile SInt32 *)&tcp_cc_cubic.num_sockets); | |
105 | ||
106 | VERIFY(tp->t_ccstate != NULL); | |
107 | tcp_cubic_clear_state(tp); | |
0a7de745 | 108 | return 0; |
fe8ab488 A |
109 | } |
110 | ||
0a7de745 A |
111 | static int |
112 | tcp_cubic_cleanup(struct tcpcb *tp) | |
fe8ab488 A |
113 | { |
114 | #pragma unused(tp) | |
115 | OSDecrementAtomic((volatile SInt32 *)&tcp_cc_cubic.num_sockets); | |
0a7de745 | 116 | return 0; |
fe8ab488 A |
117 | } |
118 | ||
119 | /* | |
0a7de745 | 120 | * Initialize the congestion window at the beginning of a connection or |
fe8ab488 A |
121 | * after idle time |
122 | */ | |
0a7de745 A |
123 | static void |
124 | tcp_cubic_cwnd_init_or_reset(struct tcpcb *tp) | |
fe8ab488 | 125 | { |
0a7de745 | 126 | VERIFY(tp->t_ccstate != NULL); |
fe8ab488 A |
127 | |
128 | tcp_cubic_clear_state(tp); | |
129 | tcp_cc_cwnd_init_or_reset(tp); | |
3e170ce0 A |
130 | tp->t_pipeack = 0; |
131 | tcp_clear_pipeack_state(tp); | |
132 | ||
133 | /* Start counting bytes for RFC 3465 again */ | |
134 | tp->t_bytes_acked = 0; | |
fe8ab488 A |
135 | |
136 | /* | |
137 | * slow start threshold could get initialized to a lower value | |
138 | * when there is a cached value in the route metrics. In this case, | |
139 | * the connection can enter congestion avoidance without any packet | |
140 | * loss and Cubic will enter steady-state too early. It is better | |
141 | * to always probe to find the initial slow-start threshold. | |
142 | */ | |
143 | if (tp->t_inpcb->inp_stat->txbytes <= TCP_CC_CWND_INIT_BYTES | |
0a7de745 | 144 | && tp->snd_ssthresh < (TCP_MAXWIN << TCP_MAX_WINSHIFT)) { |
fe8ab488 | 145 | tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; |
0a7de745 | 146 | } |
fe8ab488 A |
147 | |
148 | /* Initialize cubic last max to be same as ssthresh */ | |
149 | tp->t_ccstate->cub_last_max = tp->snd_ssthresh; | |
fe8ab488 A |
150 | } |
151 | ||
152 | /* | |
0a7de745 | 153 | * Compute the target congestion window for the next RTT according to |
fe8ab488 A |
154 | * cubic equation when an ack is received. |
155 | * | |
156 | * W(t) = C(t-K)^3 + W(last_max) | |
157 | */ | |
158 | static uint32_t | |
159 | tcp_cubic_update(struct tcpcb *tp, u_int32_t rtt) | |
160 | { | |
161 | float K, var; | |
162 | u_int32_t elapsed_time, win; | |
163 | ||
fe8ab488 | 164 | win = min(tp->snd_cwnd, tp->snd_wnd); |
0a7de745 | 165 | if (tp->t_ccstate->cub_last_max == 0) { |
04b8595b | 166 | tp->t_ccstate->cub_last_max = tp->snd_ssthresh; |
0a7de745 | 167 | } |
04b8595b | 168 | |
fe8ab488 A |
169 | if (tp->t_ccstate->cub_epoch_start == 0) { |
170 | /* | |
171 | * This is the beginning of a new epoch, initialize some of | |
0a7de745 | 172 | * the variables that we need to use for computing the |
fe8ab488 A |
173 | * congestion window later. |
174 | */ | |
175 | tp->t_ccstate->cub_epoch_start = tcp_now; | |
0a7de745 | 176 | if (tp->t_ccstate->cub_epoch_start == 0) { |
fe8ab488 | 177 | tp->t_ccstate->cub_epoch_start = 1; |
0a7de745 | 178 | } |
fe8ab488 | 179 | if (win < tp->t_ccstate->cub_last_max) { |
fe8ab488 A |
180 | VERIFY(current_task() == kernel_task); |
181 | ||
182 | /* | |
183 | * Compute cubic epoch period, this is the time | |
184 | * period that the window will take to increase to | |
185 | * last_max again after backoff due to loss. | |
186 | */ | |
187 | K = (tp->t_ccstate->cub_last_max - win) | |
188 | / tp->t_maxseg / tcp_cubic_coeff; | |
189 | K = cbrtf(K); | |
190 | tp->t_ccstate->cub_epoch_period = K * TCP_RETRANSHZ; | |
191 | /* Origin point */ | |
0a7de745 A |
192 | tp->t_ccstate->cub_origin_point = |
193 | tp->t_ccstate->cub_last_max; | |
fe8ab488 A |
194 | } else { |
195 | tp->t_ccstate->cub_epoch_period = 0; | |
196 | tp->t_ccstate->cub_origin_point = win; | |
197 | } | |
198 | tp->t_ccstate->cub_target_win = 0; | |
199 | } | |
0a7de745 A |
200 | |
201 | VERIFY(tp->t_ccstate->cub_origin_point > 0); | |
fe8ab488 A |
202 | /* |
203 | * Compute the target window for the next RTT using smoothed RTT | |
204 | * as an estimate for next RTT. | |
205 | */ | |
0a7de745 A |
206 | elapsed_time = timer_diff(tcp_now, 0, |
207 | tp->t_ccstate->cub_epoch_start, 0); | |
fe8ab488 | 208 | |
0a7de745 | 209 | if (tcp_cubic_use_minrtt) { |
fe8ab488 | 210 | elapsed_time += max(tcp_cubic_use_minrtt, rtt); |
0a7de745 | 211 | } else { |
fe8ab488 | 212 | elapsed_time += rtt; |
0a7de745 | 213 | } |
fe8ab488 A |
214 | var = (elapsed_time - tp->t_ccstate->cub_epoch_period) / TCP_RETRANSHZ; |
215 | var = var * var * var * (tcp_cubic_coeff * tp->t_maxseg); | |
216 | ||
5ba3f43e | 217 | tp->t_ccstate->cub_target_win = (u_int32_t)(tp->t_ccstate->cub_origin_point + var); |
0a7de745 | 218 | return tp->t_ccstate->cub_target_win; |
fe8ab488 A |
219 | } |
220 | ||
221 | /* | |
222 | * Standard TCP utilizes bandwidth well in low RTT and low BDP connections | |
223 | * even when there is some packet loss. Enabling TCP mode will help Cubic | |
224 | * to achieve this kind of utilization. | |
225 | * | |
226 | * But if there is a bottleneck link in the path with a fixed size queue | |
227 | * and fixed bandwidth, TCP Cubic will help to reduce packet loss at this | |
228 | * link because of the steady-state behavior. Using average and mean | |
229 | * absolute deviation of W(lastmax), we try to detect if the congestion | |
230 | * window is close to the bottleneck bandwidth. In that case, disabling | |
0a7de745 | 231 | * TCP mode will help to minimize packet loss at this link. |
fe8ab488 A |
232 | * |
233 | * Disable TCP mode if the W(lastmax) (the window where previous packet | |
234 | * loss happened) is within a small range from the average last max | |
235 | * calculated. | |
236 | */ | |
237 | #define TCP_CUBIC_ENABLE_TCPMODE(_tp_) \ | |
238 | ((!soissrcrealtime((_tp_)->t_inpcb->inp_socket) && \ | |
239 | (_tp_)->t_ccstate->cub_mean_dev > (tp->t_maxseg << 1)) ? 1 : 0) | |
240 | ||
241 | /* | |
0a7de745 | 242 | * Compute the window growth if standard TCP (AIMD) was used with |
fe8ab488 | 243 | * a backoff of 0.5 and additive increase of 1 packet per RTT. |
0a7de745 | 244 | * |
fe8ab488 | 245 | * TCP window at time t can be calculated using the following equation |
0a7de745 | 246 | * with beta as 0.8 |
fe8ab488 A |
247 | * |
248 | * W(t) <- Wmax * beta + 3 * ((1 - beta)/(1 + beta)) * t/RTT | |
249 | * | |
250 | */ | |
251 | static uint32_t | |
252 | tcp_cubic_tcpwin(struct tcpcb *tp, struct tcphdr *th) | |
253 | { | |
254 | if (tp->t_ccstate->cub_tcp_win == 0) { | |
255 | tp->t_ccstate->cub_tcp_win = min(tp->snd_cwnd, tp->snd_wnd); | |
256 | tp->t_ccstate->cub_tcp_bytes_acked = 0; | |
257 | } else { | |
258 | tp->t_ccstate->cub_tcp_bytes_acked += | |
259 | BYTES_ACKED(th, tp); | |
260 | if (tp->t_ccstate->cub_tcp_bytes_acked >= | |
261 | tp->t_ccstate->cub_tcp_win) { | |
262 | tp->t_ccstate->cub_tcp_bytes_acked -= | |
263 | tp->t_ccstate->cub_tcp_win; | |
264 | tp->t_ccstate->cub_tcp_win += tp->t_maxseg; | |
265 | } | |
0a7de745 A |
266 | } |
267 | return tp->t_ccstate->cub_tcp_win; | |
fe8ab488 A |
268 | } |
269 | ||
270 | /* | |
271 | * Handle an in-sequence ack during congestion avoidance phase. | |
272 | */ | |
273 | static void | |
274 | tcp_cubic_congestion_avd(struct tcpcb *tp, struct tcphdr *th) | |
275 | { | |
276 | u_int32_t cubic_target_win, tcp_win, rtt; | |
277 | ||
3e170ce0 | 278 | /* Do not increase congestion window in non-validated phase */ |
0a7de745 | 279 | if (tcp_cc_is_cwnd_nonvalidated(tp) != 0) { |
3e170ce0 | 280 | return; |
0a7de745 | 281 | } |
3e170ce0 | 282 | |
fe8ab488 A |
283 | tp->t_bytes_acked += BYTES_ACKED(th, tp); |
284 | ||
285 | rtt = get_base_rtt(tp); | |
286 | /* | |
287 | * First compute cubic window. If cubic variables are not | |
288 | * initialized (after coming out of recovery), this call will | |
289 | * initialize them. | |
290 | */ | |
291 | cubic_target_win = tcp_cubic_update(tp, rtt); | |
292 | ||
293 | /* Compute TCP window if a multiplicative decrease of 0.2 is used */ | |
294 | tcp_win = tcp_cubic_tcpwin(tp, th); | |
295 | ||
296 | if (tp->snd_cwnd < tcp_win && | |
297 | (tcp_cubic_tcp_friendliness == 1 || | |
298 | TCP_CUBIC_ENABLE_TCPMODE(tp))) { | |
299 | /* this connection is in TCP-friendly region */ | |
300 | if (tp->t_bytes_acked >= tp->snd_cwnd) { | |
301 | tp->t_bytes_acked -= tp->snd_cwnd; | |
302 | tp->snd_cwnd = min(tcp_win, TCP_MAXWIN << tp->snd_scale); | |
303 | } | |
304 | } else { | |
305 | if (cubic_target_win > tp->snd_cwnd) { | |
306 | /* | |
307 | * The target win is computed for the next RTT. | |
308 | * To reach this value, cwnd will have to be updated | |
0a7de745 A |
309 | * one segment at a time. Compute how many bytes |
310 | * need to be acknowledged before we can increase | |
fe8ab488 A |
311 | * the cwnd by one segment. |
312 | */ | |
313 | u_int64_t incr_win; | |
314 | incr_win = tp->snd_cwnd * tp->t_maxseg; | |
315 | incr_win /= (cubic_target_win - tp->snd_cwnd); | |
316 | if (incr_win > 0 && | |
317 | tp->t_bytes_acked >= incr_win) { | |
318 | tp->t_bytes_acked -= incr_win; | |
0a7de745 | 319 | tp->snd_cwnd = |
fe8ab488 A |
320 | min((tp->snd_cwnd + tp->t_maxseg), |
321 | TCP_MAXWIN << tp->snd_scale); | |
322 | } | |
323 | } | |
324 | } | |
325 | } | |
326 | ||
327 | static void | |
328 | tcp_cubic_ack_rcvd(struct tcpcb *tp, struct tcphdr *th) | |
329 | { | |
3e170ce0 | 330 | /* Do not increase the congestion window in non-validated phase */ |
0a7de745 | 331 | if (tcp_cc_is_cwnd_nonvalidated(tp) != 0) { |
3e170ce0 | 332 | return; |
0a7de745 | 333 | } |
3e170ce0 | 334 | |
fe8ab488 A |
335 | if (tp->snd_cwnd >= tp->snd_ssthresh) { |
336 | /* Congestion avoidance phase */ | |
337 | tcp_cubic_congestion_avd(tp, th); | |
338 | } else { | |
339 | /* | |
340 | * Use 2*SMSS as limit on increment as suggested | |
341 | * by RFC 3465 section 2.3 | |
342 | */ | |
343 | uint32_t acked, abc_lim, incr; | |
3e170ce0 | 344 | |
fe8ab488 | 345 | acked = BYTES_ACKED(th, tp); |
0a7de745 A |
346 | abc_lim = (tcp_do_rfc3465_lim2 && |
347 | tp->snd_nxt == tp->snd_max) ? | |
348 | 2 * tp->t_maxseg : tp->t_maxseg; | |
fe8ab488 A |
349 | incr = min(acked, abc_lim); |
350 | ||
351 | tp->snd_cwnd += incr; | |
0a7de745 A |
352 | tp->snd_cwnd = min(tp->snd_cwnd, |
353 | TCP_MAXWIN << tp->snd_scale); | |
fe8ab488 A |
354 | } |
355 | } | |
356 | ||
357 | static void | |
358 | tcp_cubic_pre_fr(struct tcpcb *tp) | |
359 | { | |
5ba3f43e | 360 | u_int32_t win, avg; |
fe8ab488 A |
361 | int32_t dev; |
362 | tp->t_ccstate->cub_epoch_start = 0; | |
363 | tp->t_ccstate->cub_tcp_win = 0; | |
364 | tp->t_ccstate->cub_target_win = 0; | |
365 | tp->t_ccstate->cub_tcp_bytes_acked = 0; | |
366 | ||
367 | win = min(tp->snd_cwnd, tp->snd_wnd); | |
3e170ce0 A |
368 | if (tp->t_flagsext & TF_CWND_NONVALIDATED) { |
369 | tp->t_lossflightsize = tp->snd_max - tp->snd_una; | |
370 | win = (max(tp->t_pipeack, tp->t_lossflightsize)) >> 1; | |
371 | } else { | |
372 | tp->t_lossflightsize = 0; | |
373 | } | |
fe8ab488 A |
374 | /* |
375 | * Note the congestion window at which packet loss occurred as | |
376 | * cub_last_max. | |
377 | * | |
378 | * If the congestion window is less than the last max window when | |
0a7de745 | 379 | * loss occurred, it indicates that capacity available in the |
fe8ab488 A |
380 | * network has gone down. This can happen if a new flow has started |
381 | * and it is capturing some of the bandwidth. To reach convergence | |
0a7de745 | 382 | * quickly, backoff a little more. Disable fast convergence to |
fe8ab488 A |
383 | * disable this behavior. |
384 | */ | |
385 | if (win < tp->t_ccstate->cub_last_max && | |
0a7de745 | 386 | tcp_cubic_fast_convergence == 1) { |
5ba3f43e | 387 | tp->t_ccstate->cub_last_max = (u_int32_t)(win * |
0a7de745 A |
388 | tcp_cubic_fast_convergence_factor); |
389 | } else { | |
fe8ab488 | 390 | tp->t_ccstate->cub_last_max = win; |
0a7de745 | 391 | } |
fe8ab488 A |
392 | |
393 | if (tp->t_ccstate->cub_last_max == 0) { | |
394 | /* | |
395 | * If last_max is zero because snd_wnd is zero or for | |
396 | * any other reason, initialize it to the amount of data | |
397 | * in flight | |
398 | */ | |
399 | tp->t_ccstate->cub_last_max = tp->snd_max - tp->snd_una; | |
400 | } | |
401 | ||
402 | /* | |
403 | * Compute average and mean absolute deviation of the | |
404 | * window at which packet loss occurred. | |
405 | */ | |
406 | if (tp->t_ccstate->cub_avg_lastmax == 0) { | |
407 | tp->t_ccstate->cub_avg_lastmax = tp->t_ccstate->cub_last_max; | |
408 | } else { | |
409 | /* | |
410 | * Average is computed by taking 63 parts of | |
411 | * history and one part of the most recent value | |
412 | */ | |
413 | avg = tp->t_ccstate->cub_avg_lastmax; | |
414 | avg = (avg << 6) - avg; | |
415 | tp->t_ccstate->cub_avg_lastmax = | |
0a7de745 | 416 | (avg + tp->t_ccstate->cub_last_max) >> 6; |
fe8ab488 A |
417 | } |
418 | ||
419 | /* caluclate deviation from average */ | |
420 | dev = tp->t_ccstate->cub_avg_lastmax - tp->t_ccstate->cub_last_max; | |
421 | ||
422 | /* Take the absolute value */ | |
0a7de745 | 423 | if (dev < 0) { |
fe8ab488 | 424 | dev = -dev; |
0a7de745 | 425 | } |
fe8ab488 A |
426 | |
427 | if (tp->t_ccstate->cub_mean_dev == 0) { | |
428 | tp->t_ccstate->cub_mean_dev = dev; | |
429 | } else { | |
430 | dev = dev + ((tp->t_ccstate->cub_mean_dev << 4) | |
431 | - tp->t_ccstate->cub_mean_dev); | |
432 | tp->t_ccstate->cub_mean_dev = dev >> 4; | |
433 | } | |
434 | ||
435 | /* Backoff congestion window by tcp_cubic_backoff factor */ | |
5ba3f43e | 436 | win = (u_int32_t)(win - (win * tcp_cubic_backoff)); |
fe8ab488 | 437 | win = (win / tp->t_maxseg); |
0a7de745 | 438 | if (win < 2) { |
fe8ab488 | 439 | win = 2; |
0a7de745 | 440 | } |
fe8ab488 A |
441 | tp->snd_ssthresh = win * tp->t_maxseg; |
442 | tcp_cc_resize_sndbuf(tp); | |
443 | } | |
444 | ||
445 | static void | |
446 | tcp_cubic_post_fr(struct tcpcb *tp, struct tcphdr *th) | |
447 | { | |
448 | uint32_t flight_size = 0; | |
449 | ||
0a7de745 | 450 | if (SEQ_LEQ(th->th_ack, tp->snd_max)) { |
fe8ab488 | 451 | flight_size = tp->snd_max - th->th_ack; |
0a7de745 | 452 | } |
3e170ce0 A |
453 | |
454 | if (SACK_ENABLED(tp) && tp->t_lossflightsize > 0) { | |
455 | u_int32_t total_rxt_size = 0, ncwnd; | |
456 | /* | |
457 | * When SACK is enabled, the number of retransmitted bytes | |
458 | * can be counted more accurately. | |
459 | */ | |
460 | total_rxt_size = tcp_rxtseg_total_size(tp); | |
461 | ncwnd = max(tp->t_pipeack, tp->t_lossflightsize); | |
462 | if (total_rxt_size <= ncwnd) { | |
463 | ncwnd = ncwnd - total_rxt_size; | |
464 | } | |
465 | ||
466 | /* | |
467 | * To avoid sending a large burst at the end of recovery | |
468 | * set a max limit on ncwnd | |
469 | */ | |
470 | ncwnd = min(ncwnd, (tp->t_maxseg << 6)); | |
471 | ncwnd = ncwnd >> 1; | |
472 | flight_size = max(ncwnd, flight_size); | |
473 | } | |
fe8ab488 A |
474 | /* |
475 | * Complete ack. The current window was inflated for fast recovery. | |
476 | * It has to be deflated post recovery. | |
477 | * | |
0a7de745 | 478 | * Window inflation should have left us with approx snd_ssthresh |
fe8ab488 A |
479 | * outstanding data. If the flight size is zero or one segment, |
480 | * make congestion window to be at least as big as 2 segments to | |
481 | * avoid delayed acknowledgements. This is according to RFC 6582. | |
482 | */ | |
0a7de745 A |
483 | if (flight_size < tp->snd_ssthresh) { |
484 | tp->snd_cwnd = max(flight_size, tp->t_maxseg) | |
485 | + tp->t_maxseg; | |
486 | } else { | |
fe8ab488 | 487 | tp->snd_cwnd = tp->snd_ssthresh; |
0a7de745 | 488 | } |
fe8ab488 A |
489 | tp->t_ccstate->cub_tcp_win = 0; |
490 | tp->t_ccstate->cub_target_win = 0; | |
491 | tp->t_ccstate->cub_tcp_bytes_acked = 0; | |
492 | } | |
493 | ||
0a7de745 | 494 | static void |
fe8ab488 A |
495 | tcp_cubic_after_timeout(struct tcpcb *tp) |
496 | { | |
497 | VERIFY(tp->t_ccstate != NULL); | |
3e170ce0 A |
498 | |
499 | /* | |
500 | * Avoid adjusting congestion window due to SYN retransmissions. | |
501 | * If more than one byte (SYN) is outstanding then it is still | |
502 | * needed to adjust the window. | |
503 | */ | |
504 | if (tp->t_state < TCPS_ESTABLISHED && | |
0a7de745 | 505 | ((int)(tp->snd_max - tp->snd_una) <= 1)) { |
3e170ce0 | 506 | return; |
0a7de745 | 507 | } |
3e170ce0 | 508 | |
fe8ab488 A |
509 | if (!IN_FASTRECOVERY(tp)) { |
510 | tcp_cubic_clear_state(tp); | |
511 | tcp_cubic_pre_fr(tp); | |
512 | } | |
513 | ||
514 | /* | |
515 | * Close the congestion window down to one segment as a retransmit | |
516 | * timeout might indicate severe congestion. | |
517 | */ | |
518 | tp->snd_cwnd = tp->t_maxseg; | |
519 | } | |
520 | ||
521 | static int | |
522 | tcp_cubic_delay_ack(struct tcpcb *tp, struct tcphdr *th) | |
523 | { | |
0a7de745 | 524 | return tcp_cc_delay_ack(tp, th); |
fe8ab488 A |
525 | } |
526 | ||
527 | /* | |
0a7de745 | 528 | * When switching from a different CC it is better for Cubic to start |
fe8ab488 A |
529 | * fresh. The state required for Cubic calculation might be stale and it |
530 | * might not represent the current state of the network. If it starts as | |
531 | * a new connection it will probe and learn the existing network conditions. | |
532 | */ | |
533 | static void | |
534 | tcp_cubic_switch_cc(struct tcpcb *tp, uint16_t old_cc_index) | |
535 | { | |
536 | #pragma unused(old_cc_index) | |
537 | tcp_cubic_cwnd_init_or_reset(tp); | |
fe8ab488 A |
538 | |
539 | OSIncrementAtomic((volatile SInt32 *)&tcp_cc_cubic.num_sockets); | |
540 | } | |
541 | ||
0a7de745 A |
542 | static inline void |
543 | tcp_cubic_clear_state(struct tcpcb *tp) | |
fe8ab488 A |
544 | { |
545 | tp->t_ccstate->cub_last_max = 0; | |
546 | tp->t_ccstate->cub_epoch_start = 0; | |
547 | tp->t_ccstate->cub_origin_point = 0; | |
548 | tp->t_ccstate->cub_tcp_win = 0; | |
549 | tp->t_ccstate->cub_tcp_bytes_acked = 0; | |
550 | tp->t_ccstate->cub_epoch_period = 0; | |
551 | tp->t_ccstate->cub_target_win = 0; | |
552 | } |