]>
Commit | Line | Data |
---|---|---|
fe8ab488 A |
1 | /* |
2 | * Copyright (c) 2013-2014 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | #include <sys/param.h> | |
29 | #include <sys/systm.h> | |
30 | #include <sys/kernel.h> | |
31 | #include <sys/protosw.h> | |
32 | #include <sys/socketvar.h> | |
33 | #include <sys/syslog.h> | |
34 | ||
35 | #include <net/route.h> | |
36 | #include <netinet/in.h> | |
37 | #include <netinet/in_systm.h> | |
38 | #include <netinet/ip.h> | |
39 | ||
40 | #if INET6 | |
41 | #include <netinet/ip6.h> | |
42 | #endif /* INET6 */ | |
43 | ||
44 | #include <netinet/ip_var.h> | |
45 | #include <netinet/tcp.h> | |
46 | #include <netinet/tcp_timer.h> | |
47 | #include <netinet/tcp_var.h> | |
48 | #include <netinet/tcp_fsm.h> | |
49 | #include <netinet/tcp_var.h> | |
50 | #include <netinet/tcp_cc.h> | |
51 | #include <netinet/tcpip.h> | |
52 | #include <netinet/tcp_seq.h> | |
53 | #include <kern/task.h> | |
54 | #include <libkern/OSAtomic.h> | |
55 | ||
56 | static int tcp_cubic_init(struct tcpcb *tp); | |
57 | static int tcp_cubic_cleanup(struct tcpcb *tp); | |
58 | static void tcp_cubic_cwnd_init_or_reset(struct tcpcb *tp); | |
59 | static void tcp_cubic_congestion_avd(struct tcpcb *tp, struct tcphdr *th); | |
60 | static void tcp_cubic_ack_rcvd(struct tcpcb *tp, struct tcphdr *th); | |
61 | static void tcp_cubic_pre_fr(struct tcpcb *tp); | |
62 | static void tcp_cubic_post_fr(struct tcpcb *tp, struct tcphdr *th); | |
63 | static void tcp_cubic_after_timeout(struct tcpcb *tp); | |
64 | static int tcp_cubic_delay_ack(struct tcpcb *tp, struct tcphdr *th); | |
65 | static void tcp_cubic_switch_cc(struct tcpcb *tp, u_int16_t old_index); | |
66 | static uint32_t tcp_cubic_update(struct tcpcb *tp, u_int32_t rtt); | |
67 | static uint32_t tcp_cubic_tcpwin(struct tcpcb *tp, struct tcphdr *th); | |
68 | static inline void tcp_cubic_clear_state(struct tcpcb *tp); | |
69 | ||
70 | ||
71 | extern float cbrtf(float x); | |
72 | ||
73 | struct tcp_cc_algo tcp_cc_cubic = { | |
74 | .name = "cubic", | |
75 | .init = tcp_cubic_init, | |
76 | .cleanup = tcp_cubic_cleanup, | |
77 | .cwnd_init = tcp_cubic_cwnd_init_or_reset, | |
78 | .congestion_avd = tcp_cubic_congestion_avd, | |
79 | .ack_rcvd = tcp_cubic_ack_rcvd, | |
80 | .pre_fr = tcp_cubic_pre_fr, | |
81 | .post_fr = tcp_cubic_post_fr, | |
82 | .after_idle = tcp_cubic_cwnd_init_or_reset, | |
83 | .after_timeout = tcp_cubic_after_timeout, | |
84 | .delay_ack = tcp_cubic_delay_ack, | |
85 | .switch_to = tcp_cubic_switch_cc | |
86 | }; | |
87 | ||
88 | const float tcp_cubic_backoff = 0.2; /* multiplicative decrease factor */ | |
89 | const float tcp_cubic_coeff = 0.4; | |
90 | const float tcp_cubic_fast_convergence_factor = 0.875; | |
91 | ||
92 | static int tcp_cubic_tcp_friendliness = 0; | |
93 | SYSCTL_INT(_net_inet_tcp, OID_AUTO, cubic_tcp_friendliness, | |
94 | CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_cubic_tcp_friendliness, 0, | |
95 | "Enable TCP friendliness"); | |
96 | ||
97 | static int tcp_cubic_fast_convergence = 0; | |
98 | SYSCTL_INT(_net_inet_tcp, OID_AUTO, cubic_fast_convergence, | |
99 | CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_cubic_fast_convergence, 0, | |
100 | "Enable fast convergence"); | |
101 | ||
102 | static int tcp_cubic_use_minrtt = 0; | |
103 | SYSCTL_INT(_net_inet_tcp, OID_AUTO, cubic_use_minrtt, | |
104 | CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_cubic_use_minrtt, 0, | |
105 | "use a min of 5 sec rtt"); | |
106 | ||
107 | static int tcp_cubic_init(struct tcpcb *tp) | |
108 | { | |
109 | OSIncrementAtomic((volatile SInt32 *)&tcp_cc_cubic.num_sockets); | |
110 | ||
111 | VERIFY(tp->t_ccstate != NULL); | |
112 | tcp_cubic_clear_state(tp); | |
113 | return (0); | |
114 | } | |
115 | ||
116 | static int tcp_cubic_cleanup(struct tcpcb *tp) | |
117 | { | |
118 | #pragma unused(tp) | |
119 | OSDecrementAtomic((volatile SInt32 *)&tcp_cc_cubic.num_sockets); | |
120 | return (0); | |
121 | } | |
122 | ||
123 | /* | |
124 | * Initialize the congestion window at the beginning of a connection or | |
125 | * after idle time | |
126 | */ | |
127 | static void tcp_cubic_cwnd_init_or_reset(struct tcpcb *tp) | |
128 | { | |
129 | VERIFY(tp->t_ccstate != NULL); | |
130 | ||
131 | tcp_cubic_clear_state(tp); | |
132 | tcp_cc_cwnd_init_or_reset(tp); | |
3e170ce0 A |
133 | tp->t_pipeack = 0; |
134 | tcp_clear_pipeack_state(tp); | |
135 | ||
136 | /* Start counting bytes for RFC 3465 again */ | |
137 | tp->t_bytes_acked = 0; | |
fe8ab488 A |
138 | |
139 | /* | |
140 | * slow start threshold could get initialized to a lower value | |
141 | * when there is a cached value in the route metrics. In this case, | |
142 | * the connection can enter congestion avoidance without any packet | |
143 | * loss and Cubic will enter steady-state too early. It is better | |
144 | * to always probe to find the initial slow-start threshold. | |
145 | */ | |
146 | if (tp->t_inpcb->inp_stat->txbytes <= TCP_CC_CWND_INIT_BYTES | |
147 | && tp->snd_ssthresh < (TCP_MAXWIN << TCP_MAX_WINSHIFT)) | |
148 | tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; | |
149 | ||
150 | /* Initialize cubic last max to be same as ssthresh */ | |
151 | tp->t_ccstate->cub_last_max = tp->snd_ssthresh; | |
fe8ab488 A |
152 | } |
153 | ||
154 | /* | |
155 | * Compute the target congestion window for the next RTT according to | |
156 | * cubic equation when an ack is received. | |
157 | * | |
158 | * W(t) = C(t-K)^3 + W(last_max) | |
159 | */ | |
160 | static uint32_t | |
161 | tcp_cubic_update(struct tcpcb *tp, u_int32_t rtt) | |
162 | { | |
163 | float K, var; | |
164 | u_int32_t elapsed_time, win; | |
165 | ||
fe8ab488 | 166 | win = min(tp->snd_cwnd, tp->snd_wnd); |
04b8595b A |
167 | if (tp->t_ccstate->cub_last_max == 0) |
168 | tp->t_ccstate->cub_last_max = tp->snd_ssthresh; | |
169 | ||
fe8ab488 A |
170 | if (tp->t_ccstate->cub_epoch_start == 0) { |
171 | /* | |
172 | * This is the beginning of a new epoch, initialize some of | |
173 | * the variables that we need to use for computing the | |
174 | * congestion window later. | |
175 | */ | |
176 | tp->t_ccstate->cub_epoch_start = tcp_now; | |
177 | if (tp->t_ccstate->cub_epoch_start == 0) | |
178 | tp->t_ccstate->cub_epoch_start = 1; | |
179 | if (win < tp->t_ccstate->cub_last_max) { | |
180 | ||
181 | VERIFY(current_task() == kernel_task); | |
182 | ||
183 | /* | |
184 | * Compute cubic epoch period, this is the time | |
185 | * period that the window will take to increase to | |
186 | * last_max again after backoff due to loss. | |
187 | */ | |
188 | K = (tp->t_ccstate->cub_last_max - win) | |
189 | / tp->t_maxseg / tcp_cubic_coeff; | |
190 | K = cbrtf(K); | |
191 | tp->t_ccstate->cub_epoch_period = K * TCP_RETRANSHZ; | |
192 | /* Origin point */ | |
193 | tp->t_ccstate->cub_origin_point = | |
194 | tp->t_ccstate->cub_last_max; | |
195 | } else { | |
196 | tp->t_ccstate->cub_epoch_period = 0; | |
197 | tp->t_ccstate->cub_origin_point = win; | |
198 | } | |
199 | tp->t_ccstate->cub_target_win = 0; | |
200 | } | |
201 | ||
202 | VERIFY(tp->t_ccstate->cub_origin_point > 0); | |
203 | /* | |
204 | * Compute the target window for the next RTT using smoothed RTT | |
205 | * as an estimate for next RTT. | |
206 | */ | |
207 | elapsed_time = timer_diff(tcp_now, 0, | |
208 | tp->t_ccstate->cub_epoch_start, 0); | |
209 | ||
210 | if (tcp_cubic_use_minrtt) | |
211 | elapsed_time += max(tcp_cubic_use_minrtt, rtt); | |
212 | else | |
213 | elapsed_time += rtt; | |
214 | var = (elapsed_time - tp->t_ccstate->cub_epoch_period) / TCP_RETRANSHZ; | |
215 | var = var * var * var * (tcp_cubic_coeff * tp->t_maxseg); | |
216 | ||
217 | tp->t_ccstate->cub_target_win = tp->t_ccstate->cub_origin_point + var; | |
218 | return (tp->t_ccstate->cub_target_win); | |
219 | } | |
220 | ||
221 | /* | |
222 | * Standard TCP utilizes bandwidth well in low RTT and low BDP connections | |
223 | * even when there is some packet loss. Enabling TCP mode will help Cubic | |
224 | * to achieve this kind of utilization. | |
225 | * | |
226 | * But if there is a bottleneck link in the path with a fixed size queue | |
227 | * and fixed bandwidth, TCP Cubic will help to reduce packet loss at this | |
228 | * link because of the steady-state behavior. Using average and mean | |
229 | * absolute deviation of W(lastmax), we try to detect if the congestion | |
230 | * window is close to the bottleneck bandwidth. In that case, disabling | |
231 | * TCP mode will help to minimize packet loss at this link. | |
232 | * | |
233 | * Disable TCP mode if the W(lastmax) (the window where previous packet | |
234 | * loss happened) is within a small range from the average last max | |
235 | * calculated. | |
236 | */ | |
237 | #define TCP_CUBIC_ENABLE_TCPMODE(_tp_) \ | |
238 | ((!soissrcrealtime((_tp_)->t_inpcb->inp_socket) && \ | |
239 | (_tp_)->t_ccstate->cub_mean_dev > (tp->t_maxseg << 1)) ? 1 : 0) | |
240 | ||
241 | /* | |
242 | * Compute the window growth if standard TCP (AIMD) was used with | |
243 | * a backoff of 0.5 and additive increase of 1 packet per RTT. | |
244 | * | |
245 | * TCP window at time t can be calculated using the following equation | |
246 | * with beta as 0.8 | |
247 | * | |
248 | * W(t) <- Wmax * beta + 3 * ((1 - beta)/(1 + beta)) * t/RTT | |
249 | * | |
250 | */ | |
251 | static uint32_t | |
252 | tcp_cubic_tcpwin(struct tcpcb *tp, struct tcphdr *th) | |
253 | { | |
254 | if (tp->t_ccstate->cub_tcp_win == 0) { | |
255 | tp->t_ccstate->cub_tcp_win = min(tp->snd_cwnd, tp->snd_wnd); | |
256 | tp->t_ccstate->cub_tcp_bytes_acked = 0; | |
257 | } else { | |
258 | tp->t_ccstate->cub_tcp_bytes_acked += | |
259 | BYTES_ACKED(th, tp); | |
260 | if (tp->t_ccstate->cub_tcp_bytes_acked >= | |
261 | tp->t_ccstate->cub_tcp_win) { | |
262 | tp->t_ccstate->cub_tcp_bytes_acked -= | |
263 | tp->t_ccstate->cub_tcp_win; | |
264 | tp->t_ccstate->cub_tcp_win += tp->t_maxseg; | |
265 | } | |
266 | } | |
267 | return (tp->t_ccstate->cub_tcp_win); | |
268 | } | |
269 | ||
270 | /* | |
271 | * Handle an in-sequence ack during congestion avoidance phase. | |
272 | */ | |
273 | static void | |
274 | tcp_cubic_congestion_avd(struct tcpcb *tp, struct tcphdr *th) | |
275 | { | |
276 | u_int32_t cubic_target_win, tcp_win, rtt; | |
277 | ||
3e170ce0 A |
278 | /* Do not increase congestion window in non-validated phase */ |
279 | if (tcp_cc_is_cwnd_nonvalidated(tp) != 0) | |
280 | return; | |
281 | ||
fe8ab488 A |
282 | tp->t_bytes_acked += BYTES_ACKED(th, tp); |
283 | ||
284 | rtt = get_base_rtt(tp); | |
285 | /* | |
286 | * First compute cubic window. If cubic variables are not | |
287 | * initialized (after coming out of recovery), this call will | |
288 | * initialize them. | |
289 | */ | |
290 | cubic_target_win = tcp_cubic_update(tp, rtt); | |
291 | ||
292 | /* Compute TCP window if a multiplicative decrease of 0.2 is used */ | |
293 | tcp_win = tcp_cubic_tcpwin(tp, th); | |
294 | ||
295 | if (tp->snd_cwnd < tcp_win && | |
296 | (tcp_cubic_tcp_friendliness == 1 || | |
297 | TCP_CUBIC_ENABLE_TCPMODE(tp))) { | |
298 | /* this connection is in TCP-friendly region */ | |
299 | if (tp->t_bytes_acked >= tp->snd_cwnd) { | |
300 | tp->t_bytes_acked -= tp->snd_cwnd; | |
301 | tp->snd_cwnd = min(tcp_win, TCP_MAXWIN << tp->snd_scale); | |
302 | } | |
303 | } else { | |
304 | if (cubic_target_win > tp->snd_cwnd) { | |
305 | /* | |
306 | * The target win is computed for the next RTT. | |
307 | * To reach this value, cwnd will have to be updated | |
308 | * one segment at a time. Compute how many bytes | |
309 | * need to be acknowledged before we can increase | |
310 | * the cwnd by one segment. | |
311 | */ | |
312 | u_int64_t incr_win; | |
313 | incr_win = tp->snd_cwnd * tp->t_maxseg; | |
314 | incr_win /= (cubic_target_win - tp->snd_cwnd); | |
315 | if (incr_win > 0 && | |
316 | tp->t_bytes_acked >= incr_win) { | |
317 | tp->t_bytes_acked -= incr_win; | |
318 | tp->snd_cwnd = | |
319 | min((tp->snd_cwnd + tp->t_maxseg), | |
320 | TCP_MAXWIN << tp->snd_scale); | |
321 | } | |
322 | } | |
323 | } | |
324 | } | |
325 | ||
326 | static void | |
327 | tcp_cubic_ack_rcvd(struct tcpcb *tp, struct tcphdr *th) | |
328 | { | |
3e170ce0 A |
329 | /* Do not increase the congestion window in non-validated phase */ |
330 | if (tcp_cc_is_cwnd_nonvalidated(tp) != 0) | |
331 | return; | |
332 | ||
fe8ab488 A |
333 | if (tp->snd_cwnd >= tp->snd_ssthresh) { |
334 | /* Congestion avoidance phase */ | |
335 | tcp_cubic_congestion_avd(tp, th); | |
336 | } else { | |
337 | /* | |
338 | * Use 2*SMSS as limit on increment as suggested | |
339 | * by RFC 3465 section 2.3 | |
340 | */ | |
341 | uint32_t acked, abc_lim, incr; | |
3e170ce0 | 342 | |
fe8ab488 A |
343 | acked = BYTES_ACKED(th, tp); |
344 | abc_lim = (tcp_do_rfc3465_lim2 && | |
345 | tp->snd_nxt == tp->snd_max) ? | |
346 | 2 * tp->t_maxseg : tp->t_maxseg; | |
347 | incr = min(acked, abc_lim); | |
348 | ||
349 | tp->snd_cwnd += incr; | |
350 | tp->snd_cwnd = min(tp->snd_cwnd, | |
351 | TCP_MAXWIN << tp->snd_scale); | |
352 | } | |
353 | } | |
354 | ||
355 | static void | |
356 | tcp_cubic_pre_fr(struct tcpcb *tp) | |
357 | { | |
358 | uint32_t win, avg; | |
359 | int32_t dev; | |
360 | tp->t_ccstate->cub_epoch_start = 0; | |
361 | tp->t_ccstate->cub_tcp_win = 0; | |
362 | tp->t_ccstate->cub_target_win = 0; | |
363 | tp->t_ccstate->cub_tcp_bytes_acked = 0; | |
364 | ||
365 | win = min(tp->snd_cwnd, tp->snd_wnd); | |
3e170ce0 A |
366 | if (tp->t_flagsext & TF_CWND_NONVALIDATED) { |
367 | tp->t_lossflightsize = tp->snd_max - tp->snd_una; | |
368 | win = (max(tp->t_pipeack, tp->t_lossflightsize)) >> 1; | |
369 | } else { | |
370 | tp->t_lossflightsize = 0; | |
371 | } | |
fe8ab488 A |
372 | /* |
373 | * Note the congestion window at which packet loss occurred as | |
374 | * cub_last_max. | |
375 | * | |
376 | * If the congestion window is less than the last max window when | |
377 | * loss occurred, it indicates that capacity available in the | |
378 | * network has gone down. This can happen if a new flow has started | |
379 | * and it is capturing some of the bandwidth. To reach convergence | |
380 | * quickly, backoff a little more. Disable fast convergence to | |
381 | * disable this behavior. | |
382 | */ | |
383 | if (win < tp->t_ccstate->cub_last_max && | |
384 | tcp_cubic_fast_convergence == 1) | |
385 | tp->t_ccstate->cub_last_max = win * | |
386 | tcp_cubic_fast_convergence_factor; | |
387 | else | |
388 | tp->t_ccstate->cub_last_max = win; | |
389 | ||
390 | if (tp->t_ccstate->cub_last_max == 0) { | |
391 | /* | |
392 | * If last_max is zero because snd_wnd is zero or for | |
393 | * any other reason, initialize it to the amount of data | |
394 | * in flight | |
395 | */ | |
396 | tp->t_ccstate->cub_last_max = tp->snd_max - tp->snd_una; | |
397 | } | |
398 | ||
399 | /* | |
400 | * Compute average and mean absolute deviation of the | |
401 | * window at which packet loss occurred. | |
402 | */ | |
403 | if (tp->t_ccstate->cub_avg_lastmax == 0) { | |
404 | tp->t_ccstate->cub_avg_lastmax = tp->t_ccstate->cub_last_max; | |
405 | } else { | |
406 | /* | |
407 | * Average is computed by taking 63 parts of | |
408 | * history and one part of the most recent value | |
409 | */ | |
410 | avg = tp->t_ccstate->cub_avg_lastmax; | |
411 | avg = (avg << 6) - avg; | |
412 | tp->t_ccstate->cub_avg_lastmax = | |
413 | (avg + tp->t_ccstate->cub_last_max) >> 6; | |
414 | } | |
415 | ||
416 | /* caluclate deviation from average */ | |
417 | dev = tp->t_ccstate->cub_avg_lastmax - tp->t_ccstate->cub_last_max; | |
418 | ||
419 | /* Take the absolute value */ | |
420 | if (dev < 0) | |
421 | dev = -dev; | |
422 | ||
423 | if (tp->t_ccstate->cub_mean_dev == 0) { | |
424 | tp->t_ccstate->cub_mean_dev = dev; | |
425 | } else { | |
426 | dev = dev + ((tp->t_ccstate->cub_mean_dev << 4) | |
427 | - tp->t_ccstate->cub_mean_dev); | |
428 | tp->t_ccstate->cub_mean_dev = dev >> 4; | |
429 | } | |
430 | ||
431 | /* Backoff congestion window by tcp_cubic_backoff factor */ | |
432 | win = win - (win * tcp_cubic_backoff); | |
433 | win = (win / tp->t_maxseg); | |
434 | if (win < 2) | |
435 | win = 2; | |
436 | tp->snd_ssthresh = win * tp->t_maxseg; | |
437 | tcp_cc_resize_sndbuf(tp); | |
438 | } | |
439 | ||
440 | static void | |
441 | tcp_cubic_post_fr(struct tcpcb *tp, struct tcphdr *th) | |
442 | { | |
443 | uint32_t flight_size = 0; | |
444 | ||
445 | if (SEQ_LEQ(th->th_ack, tp->snd_max)) | |
446 | flight_size = tp->snd_max - th->th_ack; | |
3e170ce0 A |
447 | |
448 | if (SACK_ENABLED(tp) && tp->t_lossflightsize > 0) { | |
449 | u_int32_t total_rxt_size = 0, ncwnd; | |
450 | /* | |
451 | * When SACK is enabled, the number of retransmitted bytes | |
452 | * can be counted more accurately. | |
453 | */ | |
454 | total_rxt_size = tcp_rxtseg_total_size(tp); | |
455 | ncwnd = max(tp->t_pipeack, tp->t_lossflightsize); | |
456 | if (total_rxt_size <= ncwnd) { | |
457 | ncwnd = ncwnd - total_rxt_size; | |
458 | } | |
459 | ||
460 | /* | |
461 | * To avoid sending a large burst at the end of recovery | |
462 | * set a max limit on ncwnd | |
463 | */ | |
464 | ncwnd = min(ncwnd, (tp->t_maxseg << 6)); | |
465 | ncwnd = ncwnd >> 1; | |
466 | flight_size = max(ncwnd, flight_size); | |
467 | } | |
fe8ab488 A |
468 | /* |
469 | * Complete ack. The current window was inflated for fast recovery. | |
470 | * It has to be deflated post recovery. | |
471 | * | |
472 | * Window inflation should have left us with approx snd_ssthresh | |
473 | * outstanding data. If the flight size is zero or one segment, | |
474 | * make congestion window to be at least as big as 2 segments to | |
475 | * avoid delayed acknowledgements. This is according to RFC 6582. | |
476 | */ | |
477 | if (flight_size < tp->snd_ssthresh) | |
478 | tp->snd_cwnd = max(flight_size, tp->t_maxseg) | |
479 | + tp->t_maxseg; | |
480 | else | |
481 | tp->snd_cwnd = tp->snd_ssthresh; | |
482 | tp->t_ccstate->cub_tcp_win = 0; | |
483 | tp->t_ccstate->cub_target_win = 0; | |
484 | tp->t_ccstate->cub_tcp_bytes_acked = 0; | |
485 | } | |
486 | ||
487 | static void | |
488 | tcp_cubic_after_timeout(struct tcpcb *tp) | |
489 | { | |
490 | VERIFY(tp->t_ccstate != NULL); | |
3e170ce0 A |
491 | |
492 | /* | |
493 | * Avoid adjusting congestion window due to SYN retransmissions. | |
494 | * If more than one byte (SYN) is outstanding then it is still | |
495 | * needed to adjust the window. | |
496 | */ | |
497 | if (tp->t_state < TCPS_ESTABLISHED && | |
498 | ((int)(tp->snd_max - tp->snd_una) <= 1)) | |
499 | return; | |
500 | ||
fe8ab488 A |
501 | if (!IN_FASTRECOVERY(tp)) { |
502 | tcp_cubic_clear_state(tp); | |
503 | tcp_cubic_pre_fr(tp); | |
504 | } | |
505 | ||
506 | /* | |
507 | * Close the congestion window down to one segment as a retransmit | |
508 | * timeout might indicate severe congestion. | |
509 | */ | |
510 | tp->snd_cwnd = tp->t_maxseg; | |
511 | } | |
512 | ||
513 | static int | |
514 | tcp_cubic_delay_ack(struct tcpcb *tp, struct tcphdr *th) | |
515 | { | |
516 | return (tcp_cc_delay_ack(tp, th)); | |
517 | } | |
518 | ||
519 | /* | |
520 | * When switching from a different CC it is better for Cubic to start | |
521 | * fresh. The state required for Cubic calculation might be stale and it | |
522 | * might not represent the current state of the network. If it starts as | |
523 | * a new connection it will probe and learn the existing network conditions. | |
524 | */ | |
525 | static void | |
526 | tcp_cubic_switch_cc(struct tcpcb *tp, uint16_t old_cc_index) | |
527 | { | |
528 | #pragma unused(old_cc_index) | |
529 | tcp_cubic_cwnd_init_or_reset(tp); | |
fe8ab488 A |
530 | |
531 | OSIncrementAtomic((volatile SInt32 *)&tcp_cc_cubic.num_sockets); | |
532 | } | |
533 | ||
534 | static inline void tcp_cubic_clear_state(struct tcpcb *tp) | |
535 | { | |
536 | tp->t_ccstate->cub_last_max = 0; | |
537 | tp->t_ccstate->cub_epoch_start = 0; | |
538 | tp->t_ccstate->cub_origin_point = 0; | |
539 | tp->t_ccstate->cub_tcp_win = 0; | |
540 | tp->t_ccstate->cub_tcp_bytes_acked = 0; | |
541 | tp->t_ccstate->cub_epoch_period = 0; | |
542 | tp->t_ccstate->cub_target_win = 0; | |
543 | } |