]>
Commit | Line | Data |
---|---|---|
39236c6e | 1 | /* |
3e170ce0 | 2 | * Copyright (c) 2012-2015 Apple Inc. All rights reserved. |
39236c6e A |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | #include <sys/param.h> | |
30 | #include <sys/systm.h> | |
31 | #include <sys/kernel.h> | |
32 | #include <sys/mbuf.h> | |
33 | #include <sys/mcache.h> | |
34 | #include <sys/socket.h> | |
35 | #include <sys/socketvar.h> | |
36 | #include <sys/syslog.h> | |
37 | #include <sys/protosw.h> | |
38 | ||
39 | #include <kern/zalloc.h> | |
40 | #include <kern/locks.h> | |
41 | ||
42 | #include <mach/thread_act.h> | |
43 | #include <mach/sdt.h> | |
44 | ||
45 | #include <dev/random/randomdev.h> | |
46 | ||
47 | #include <net/if.h> | |
48 | #include <netinet/in.h> | |
49 | #include <netinet/in_var.h> | |
50 | #include <netinet/tcp.h> | |
51 | #include <netinet/tcp_fsm.h> | |
52 | #include <netinet/tcp_seq.h> | |
53 | #include <netinet/tcp_var.h> | |
54 | #include <netinet/mptcp_var.h> | |
55 | #include <netinet/mptcp.h> | |
56 | #include <netinet/mptcp_seq.h> | |
57 | #include <netinet/mptcp_opt.h> | |
58 | #include <netinet/mptcp_timer.h> | |
59 | ||
60 | int mptcp_enable = 1; | |
61 | SYSCTL_INT(_net_inet_mptcp, OID_AUTO, enable, CTLFLAG_RW | CTLFLAG_LOCKED, | |
62 | &mptcp_enable, 0, "Enable Multipath TCP Support"); | |
63 | ||
39236c6e A |
64 | /* Number of times to try negotiating MPTCP on SYN retransmissions */ |
65 | int mptcp_mpcap_retries = MPTCP_CAPABLE_RETRIES; | |
66 | SYSCTL_INT(_net_inet_mptcp, OID_AUTO, mptcp_cap_retr, | |
67 | CTLFLAG_RW | CTLFLAG_LOCKED, | |
68 | &mptcp_mpcap_retries, 0, "Number of MP Capable SYN Retries"); | |
69 | ||
70 | /* | |
71 | * By default, DSS checksum is turned off, revisit if we ever do | |
72 | * MPTCP for non SSL Traffic. | |
73 | */ | |
74 | int mptcp_dss_csum = 0; | |
75 | SYSCTL_INT(_net_inet_mptcp, OID_AUTO, dss_csum, CTLFLAG_RW | CTLFLAG_LOCKED, | |
76 | &mptcp_dss_csum, 0, "Enable DSS checksum"); | |
77 | ||
78 | /* | |
79 | * When mptcp_fail_thresh number of retransmissions are sent, subflow failover | |
80 | * is attempted on a different path. | |
81 | */ | |
82 | int mptcp_fail_thresh = 1; | |
83 | SYSCTL_INT(_net_inet_mptcp, OID_AUTO, fail, CTLFLAG_RW | CTLFLAG_LOCKED, | |
84 | &mptcp_fail_thresh, 0, "Failover threshold"); | |
85 | ||
86 | ||
87 | /* | |
fe8ab488 A |
88 | * MPTCP subflows have TCP keepalives set to ON. Set a conservative keeptime |
89 | * as carrier networks mostly have a 30 minute to 60 minute NAT Timeout. | |
90 | * Some carrier networks have a timeout of 10 or 15 minutes. | |
39236c6e | 91 | */ |
fe8ab488 | 92 | int mptcp_subflow_keeptime = 60*14; |
39236c6e A |
93 | SYSCTL_INT(_net_inet_mptcp, OID_AUTO, keepalive, CTLFLAG_RW | CTLFLAG_LOCKED, |
94 | &mptcp_subflow_keeptime, 0, "Keepalive in seconds"); | |
95 | ||
96 | /* | |
97 | * MP_PRIO option. | |
98 | */ | |
99 | int mptcp_mpprio_enable = 1; | |
100 | SYSCTL_INT(_net_inet_mptcp, OID_AUTO, mpprio, CTLFLAG_RW | CTLFLAG_LOCKED, | |
101 | &mptcp_mpprio_enable, 0, "Enable MP_PRIO option"); | |
102 | ||
103 | /* | |
104 | * REMOVE_ADDR option. | |
105 | */ | |
106 | int mptcp_remaddr_enable = 1; | |
107 | SYSCTL_INT(_net_inet_mptcp, OID_AUTO, remaddr, CTLFLAG_RW | CTLFLAG_LOCKED, | |
108 | &mptcp_remaddr_enable, 0, "Enable REMOVE_ADDR option"); | |
109 | ||
fe8ab488 A |
110 | /* |
111 | * FastJoin Option | |
112 | */ | |
113 | int mptcp_fastjoin = 1; | |
114 | SYSCTL_INT(_net_inet_mptcp, OID_AUTO, fastjoin, CTLFLAG_RW | CTLFLAG_LOCKED, | |
115 | &mptcp_fastjoin, 0, "Enable FastJoin Option"); | |
116 | ||
117 | int mptcp_zerortt_fastjoin = 0; | |
118 | SYSCTL_INT(_net_inet_mptcp, OID_AUTO, zerortt_fastjoin, CTLFLAG_RW | | |
119 | CTLFLAG_LOCKED, &mptcp_zerortt_fastjoin, 0, | |
120 | "Enable Zero RTT Fast Join"); | |
121 | ||
122 | /* | |
123 | * R/W Notification on resume | |
124 | */ | |
125 | int mptcp_rwnotify = 0; | |
126 | SYSCTL_INT(_net_inet_mptcp, OID_AUTO, rwnotify, CTLFLAG_RW | CTLFLAG_LOCKED, | |
127 | &mptcp_rwnotify, 0, "Enable RW notify on resume"); | |
128 | ||
3e170ce0 A |
129 | /* |
130 | * Using RTT history for sending new data | |
131 | */ | |
132 | int mptcp_use_rtthist = 1; | |
133 | SYSCTL_INT(_net_inet_mptcp, OID_AUTO, rtthist, CTLFLAG_RW | CTLFLAG_LOCKED, | |
134 | &mptcp_use_rtthist, 0, "Disable RTT History"); | |
135 | ||
136 | #define MPTCP_RTTHIST_MINTHRESH 500 | |
137 | int mptcp_rtthist_rtthresh = 600; | |
138 | SYSCTL_INT(_net_inet_mptcp, OID_AUTO, rtthist_thresh, CTLFLAG_RW | CTLFLAG_LOCKED, | |
139 | &mptcp_rtthist_rtthresh, 0, "Rtt threshold"); | |
140 | ||
141 | /* | |
142 | * Use RTO history for sending new data | |
143 | */ | |
144 | int mptcp_use_rto = 1; | |
145 | SYSCTL_INT(_net_inet_mptcp, OID_AUTO, userto, CTLFLAG_RW | CTLFLAG_LOCKED, | |
146 | &mptcp_use_rto, 0, "Disable RTO for subflow selection"); | |
147 | ||
148 | #define MPTCP_RTO_MINTHRESH 1000 | |
149 | int mptcp_rtothresh = 1500; | |
150 | SYSCTL_INT(_net_inet_mptcp, OID_AUTO, rto_thresh, CTLFLAG_RW | CTLFLAG_LOCKED, | |
151 | &mptcp_rtothresh, 0, "RTO threshold"); | |
152 | ||
153 | /* | |
154 | * Use server's chosen path for sending new data | |
155 | */ | |
156 | int mptcp_peerswitch = 1; | |
157 | SYSCTL_INT(_net_inet_mptcp, OID_AUTO, use_peer, CTLFLAG_RW | CTLFLAG_LOCKED, | |
158 | &mptcp_peerswitch, 0, "Use peer"); | |
159 | ||
160 | #define MPTCP_PEERSWITCH_CNTMIN 3 | |
161 | uint32_t mptcp_peerswitch_cnt = 3; | |
162 | SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, peerswitchno, CTLFLAG_RW | CTLFLAG_LOCKED, | |
163 | &mptcp_peerswitch_cnt, 0, "Set threshold based on peer's data arrival"); | |
164 | ||
165 | /* | |
166 | * Probe the preferred path, when it is not in use | |
167 | */ | |
168 | #define MPTCP_PROBETO_MIN 500 | |
169 | uint32_t mptcp_probeto = 1000; | |
170 | SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, probeto, CTLFLAG_RW | CTLFLAG_LOCKED, | |
171 | &mptcp_probeto, 0, "Disable probing by setting to 0"); | |
172 | ||
173 | #define MPTCP_PROBE_MX 15 | |
174 | uint32_t mptcp_probecnt = 5; | |
175 | SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, probecnt, CTLFLAG_RW | CTLFLAG_LOCKED, | |
176 | &mptcp_probecnt, 0, "Number of probe writes"); | |
177 | ||
178 | /* | |
179 | * Static declarations | |
180 | */ | |
181 | static int mptcp_validate_csum(struct tcpcb *, struct mbuf *, int); | |
182 | static uint16_t mptcp_input_csum(struct tcpcb *, struct mbuf *, int); | |
183 | ||
39236c6e A |
184 | /* |
185 | * MPTCP input, called when data has been read from a subflow socket. | |
186 | */ | |
187 | void | |
188 | mptcp_input(struct mptses *mpte, struct mbuf *m) | |
189 | { | |
190 | struct socket *mp_so; | |
191 | struct mptcb *mp_tp = NULL; | |
192 | u_int64_t mb_dsn; | |
193 | u_int32_t mb_datalen; | |
194 | int count = 0; | |
fe8ab488 | 195 | struct mbuf *save = NULL, *prev = NULL; |
39236c6e | 196 | struct mbuf *freelist = NULL, *tail = NULL; |
fe8ab488 | 197 | boolean_t in_fallback = FALSE; |
39236c6e A |
198 | |
199 | VERIFY(m->m_flags & M_PKTHDR); | |
200 | ||
201 | MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ | |
202 | mp_so = mpte->mpte_mppcb->mpp_socket; | |
203 | ||
204 | DTRACE_MPTCP(input); | |
205 | ||
206 | /* | |
207 | * Each mbuf contains MPTCP Data Sequence Map | |
208 | * Process the data for reassembly, delivery to MPTCP socket | |
209 | * client, etc. | |
210 | * | |
211 | */ | |
212 | count = mp_so->so_rcv.sb_cc; | |
213 | ||
214 | VERIFY(m != NULL); | |
fe8ab488 A |
215 | mp_tp = mpte->mpte_mptcb; |
216 | VERIFY(mp_tp != NULL); | |
217 | ||
218 | /* Ok to check for this flag without lock as its set in this thread */ | |
219 | in_fallback = (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP); | |
220 | ||
39236c6e A |
221 | /* |
222 | * In the degraded fallback case, data is accepted without DSS map | |
223 | */ | |
fe8ab488 | 224 | if (in_fallback) { |
39037602 A |
225 | fallback: |
226 | /* | |
227 | * assume degraded flow as this may be the first packet | |
228 | * without DSS, and the subflow state is not updated yet. | |
fe8ab488 | 229 | */ |
39236c6e A |
230 | if (sbappendstream(&mp_so->so_rcv, m)) |
231 | sorwakeup(mp_so); | |
232 | DTRACE_MPTCP5(receive__degraded, struct mbuf *, m, | |
233 | struct socket *, mp_so, | |
234 | struct sockbuf *, &mp_so->so_rcv, | |
235 | struct sockbuf *, &mp_so->so_snd, | |
236 | struct mptses *, mpte); | |
237 | count = mp_so->so_rcv.sb_cc - count; | |
3e170ce0 A |
238 | mptcplog((LOG_DEBUG, "MPTCP Receiver: Fallback read %d bytes\n", |
239 | count), MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE); | |
39236c6e A |
240 | return; |
241 | } | |
242 | ||
39236c6e A |
243 | MPT_LOCK(mp_tp); |
244 | do { | |
fe8ab488 A |
245 | /* If fallback occurs, mbufs will not have PKTF_MPTCP set */ |
246 | if (!(m->m_pkthdr.pkt_flags & PKTF_MPTCP)) { | |
247 | MPT_UNLOCK(mp_tp); | |
248 | goto fallback; | |
249 | } | |
250 | ||
39236c6e | 251 | save = m->m_next; |
fe8ab488 A |
252 | /* |
253 | * A single TCP packet formed of multiple mbufs | |
254 | * holds DSS mapping in the first mbuf of the chain. | |
255 | * Other mbufs in the chain may have M_PKTHDR set | |
256 | * even though they belong to the same TCP packet | |
257 | * and therefore use the DSS mapping stored in the | |
258 | * first mbuf of the mbuf chain. mptcp_input() can | |
259 | * get an mbuf chain with multiple TCP packets. | |
260 | */ | |
261 | while (save && (!(save->m_flags & M_PKTHDR) || | |
262 | !(save->m_pkthdr.pkt_flags & PKTF_MPTCP))) { | |
263 | prev = save; | |
264 | save = save->m_next; | |
265 | } | |
266 | if (prev) | |
267 | prev->m_next = NULL; | |
268 | else | |
269 | m->m_next = NULL; | |
39236c6e A |
270 | |
271 | mb_dsn = m->m_pkthdr.mp_dsn; | |
272 | mb_datalen = m->m_pkthdr.mp_rlen; | |
273 | ||
274 | if (MPTCP_SEQ_GT(mb_dsn, mp_tp->mpt_rcvatmark)) { | |
275 | tcpstat.tcps_mp_oodata++; | |
276 | MPT_UNLOCK(mp_tp); | |
277 | m_freem(m); | |
278 | return; | |
279 | /* | |
280 | * Reassembly queue support here in future. Per spec, | |
281 | * senders must implement retransmission timer to | |
282 | * retransmit unacked data. Dropping out of order | |
283 | * gives a slight hit on performance but allows us to | |
284 | * deploy MPTCP and protects us against in-window DoS | |
285 | * attacks that attempt to use up memory by sending | |
286 | * out of order data. When doing load sharing across | |
287 | * subflows, out of order support is a must. | |
288 | */ | |
289 | } | |
290 | ||
291 | if (MPTCP_SEQ_LT(mb_dsn, mp_tp->mpt_rcvatmark)) { | |
39236c6e A |
292 | if (MPTCP_SEQ_LEQ((mb_dsn + mb_datalen), |
293 | mp_tp->mpt_rcvatmark)) { | |
294 | if (freelist == NULL) | |
fe8ab488 A |
295 | freelist = m; |
296 | else | |
39236c6e | 297 | tail->m_next = m; |
fe8ab488 A |
298 | |
299 | if (prev != NULL) | |
300 | tail = prev; | |
301 | else | |
39236c6e | 302 | tail = m; |
fe8ab488 | 303 | |
39236c6e | 304 | m = save; |
fe8ab488 | 305 | prev = save = NULL; |
39236c6e A |
306 | continue; |
307 | } else { | |
308 | m_adj(m, (mp_tp->mpt_rcvatmark - mb_dsn)); | |
309 | } | |
3e170ce0 A |
310 | mptcplog((LOG_INFO, "MPTCP Receiver: Left Edge %llu\n", |
311 | mp_tp->mpt_rcvatmark), | |
312 | MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE); | |
39236c6e A |
313 | } |
314 | ||
315 | MPT_UNLOCK(mp_tp); | |
316 | if (sbappendstream(&mp_so->so_rcv, m)) { | |
317 | sorwakeup(mp_so); | |
318 | } | |
319 | DTRACE_MPTCP6(receive, struct mbuf *, m, struct socket *, mp_so, | |
320 | struct sockbuf *, &mp_so->so_rcv, | |
321 | struct sockbuf *, &mp_so->so_snd, | |
322 | struct mptses *, mpte, | |
323 | struct mptcb *, mp_tp); | |
324 | MPT_LOCK(mp_tp); | |
325 | count = mp_so->so_rcv.sb_cc - count; | |
326 | tcpstat.tcps_mp_rcvtotal++; | |
327 | tcpstat.tcps_mp_rcvbytes += count; | |
3e170ce0 A |
328 | mptcplog((LOG_DEBUG, "MPTCP Receiver: Read %d bytes\n", count), |
329 | MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE); | |
330 | ||
39236c6e A |
331 | /* |
332 | * The data received at the MPTCP layer will never exceed the | |
333 | * receive window because anything to the right of the | |
334 | * receive window will be trimmed at the subflow level. | |
335 | */ | |
336 | mp_tp->mpt_rcvwnd = mptcp_sbspace(mp_tp); | |
337 | mp_tp->mpt_rcvatmark += count; | |
338 | m = save; | |
fe8ab488 | 339 | prev = save = NULL; |
39236c6e A |
340 | count = mp_so->so_rcv.sb_cc; |
341 | } while (m); | |
342 | MPT_UNLOCK(mp_tp); | |
343 | ||
344 | if (freelist) | |
345 | m_freem(freelist); | |
346 | } | |
347 | ||
348 | /* | |
349 | * MPTCP output. | |
350 | */ | |
351 | int | |
352 | mptcp_output(struct mptses *mpte) | |
353 | { | |
354 | struct mptsub *mpts; | |
355 | struct mptsub *mpts_tried = NULL; | |
356 | struct socket *mp_so; | |
3e170ce0 | 357 | struct mptsub *preferred_mpts = NULL; |
39236c6e A |
358 | int error = 0; |
359 | ||
360 | MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ | |
361 | mp_so = mpte->mpte_mppcb->mpp_socket; | |
362 | if (mp_so->so_state & SS_CANTSENDMORE) { | |
3e170ce0 A |
363 | mptcplog((LOG_DEBUG, "MPTCP Sender: cantsendmore\n"), |
364 | MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE); | |
39236c6e A |
365 | return (EPIPE); |
366 | } | |
367 | ||
368 | try_again: | |
369 | /* get the "best" subflow to be used for transmission */ | |
3e170ce0 | 370 | mpts = mptcp_get_subflow(mpte, NULL, &preferred_mpts); |
39236c6e | 371 | if (mpts == NULL) { |
3e170ce0 A |
372 | mptcplog((LOG_ERR, "MPTCP Sender: mp_so 0x%llx no subflow\n", |
373 | (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)), | |
374 | MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG); | |
39236c6e A |
375 | goto out; |
376 | } | |
377 | ||
3e170ce0 A |
378 | mptcplog((LOG_DEBUG, "MPTCP Sender: mp_so 0x%llx using cid %d \n", |
379 | (uint64_t)VM_KERNEL_ADDRPERM(mp_so), mpts->mpts_connid), | |
380 | MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE); | |
39236c6e A |
381 | |
382 | /* In case there's just one flow, we reattempt later */ | |
383 | MPTS_LOCK(mpts); | |
384 | if ((mpts_tried != NULL) && ((mpts == mpts_tried) || | |
385 | (mpts->mpts_flags & MPTSF_FAILINGOVER))) { | |
386 | MPTS_UNLOCK(mpts); | |
387 | MPTS_LOCK(mpts_tried); | |
388 | mpts_tried->mpts_flags &= ~MPTSF_FAILINGOVER; | |
389 | mpts_tried->mpts_flags |= MPTSF_ACTIVE; | |
390 | MPTS_UNLOCK(mpts_tried); | |
3e170ce0 A |
391 | mptcp_start_timer(mpte, MPTT_REXMT); |
392 | mptcplog((LOG_DEBUG, "MPTCP Sender: mp_so 0x%llx retry later\n", | |
393 | (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)), | |
394 | MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE); | |
39236c6e A |
395 | goto out; |
396 | } | |
397 | ||
398 | DTRACE_MPTCP3(output, struct mptses *, mpte, struct mptsub *, mpts, | |
399 | struct socket *, mp_so); | |
400 | error = mptcp_subflow_output(mpte, mpts); | |
490019cf | 401 | if (error && error != EWOULDBLOCK) { |
39236c6e A |
402 | /* can be a temporary loss of source address or other error */ |
403 | mpts->mpts_flags |= MPTSF_FAILINGOVER; | |
404 | mpts->mpts_flags &= ~MPTSF_ACTIVE; | |
405 | mpts_tried = mpts; | |
406 | MPTS_UNLOCK(mpts); | |
490019cf A |
407 | mptcplog((LOG_INFO, "MPTCP Sender: %s Error = %d \n", |
408 | __func__, error), | |
3e170ce0 | 409 | MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG); |
39236c6e A |
410 | goto try_again; |
411 | } | |
412 | /* The model is to have only one active flow at a time */ | |
413 | mpts->mpts_flags |= MPTSF_ACTIVE; | |
3e170ce0 | 414 | mpts->mpts_probesoon = mpts->mpts_probecnt = 0; |
39236c6e | 415 | MPTS_UNLOCK(mpts); |
3e170ce0 A |
416 | |
417 | /* Allows us to update the smoothed rtt */ | |
418 | if ((mptcp_probeto) && (mptcp_probeto >= MPTCP_PROBETO_MIN) && | |
419 | (mpts != preferred_mpts) && (preferred_mpts != NULL)) { | |
420 | MPTS_LOCK(preferred_mpts); | |
421 | if (preferred_mpts->mpts_probesoon) { | |
422 | if ((tcp_now - preferred_mpts->mpts_probesoon) > | |
423 | mptcp_probeto) { | |
424 | (void) mptcp_subflow_output(mpte, preferred_mpts); | |
425 | if (preferred_mpts->mpts_probecnt >= | |
426 | MIN(mptcp_probecnt, MPTCP_PROBE_MX)) { | |
427 | preferred_mpts->mpts_probesoon = 0; | |
428 | preferred_mpts->mpts_probecnt = 0; | |
429 | } | |
430 | } | |
431 | } else { | |
432 | preferred_mpts->mpts_probesoon = tcp_now; | |
433 | preferred_mpts->mpts_probecnt = 0; | |
434 | } | |
435 | MPTS_UNLOCK(preferred_mpts); | |
436 | } | |
437 | ||
39236c6e A |
438 | if (mpte->mpte_active_sub == NULL) { |
439 | mpte->mpte_active_sub = mpts; | |
440 | } else if (mpte->mpte_active_sub != mpts) { | |
3e170ce0 A |
441 | mptcplog((LOG_DEBUG, "MPTCP Sender: switch [cid %d, srtt %d]" |
442 | "to [cid %d, srtt %d]\n", | |
443 | mpte->mpte_active_sub->mpts_connid, | |
444 | mpte->mpte_active_sub->mpts_srtt >> 5, | |
445 | mpts->mpts_connid, | |
446 | mpts->mpts_srtt >> 5), | |
447 | MPTCP_SENDER_DBG | MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG); | |
448 | ||
39236c6e A |
449 | MPTS_LOCK(mpte->mpte_active_sub); |
450 | mpte->mpte_active_sub->mpts_flags &= ~MPTSF_ACTIVE; | |
3e170ce0 | 451 | mpts->mpts_peerswitch = 0; |
39236c6e A |
452 | MPTS_UNLOCK(mpte->mpte_active_sub); |
453 | mpte->mpte_active_sub = mpts; | |
3e170ce0 | 454 | tcpstat.tcps_mp_switches++; |
39236c6e A |
455 | } |
456 | out: | |
457 | /* subflow errors should not be percolated back up */ | |
458 | return (0); | |
459 | } | |
460 | ||
461 | /* | |
462 | * Return the most eligible subflow to be used for sending data. | |
463 | * This function also serves to check if any alternate subflow is available | |
3e170ce0 A |
464 | * or not. best and second_best flows are chosen by their priority. third_best |
465 | * could be best or second_best but is under loss at the time of evaluation. | |
39236c6e A |
466 | */ |
467 | struct mptsub * | |
3e170ce0 | 468 | mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore, struct mptsub **preferred) |
39236c6e A |
469 | { |
470 | struct mptsub *mpts; | |
3e170ce0 A |
471 | struct mptsub *best = NULL; |
472 | struct mptsub *second_best = NULL; | |
473 | struct mptsub *third_best = NULL; | |
474 | struct mptsub *symptoms_best = NULL; | |
39236c6e A |
475 | struct socket *so = NULL; |
476 | ||
477 | MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ | |
478 | ||
479 | TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { | |
fe8ab488 | 480 | MPTS_LOCK(mpts); |
39236c6e A |
481 | |
482 | if ((ignore) && (mpts == ignore)) { | |
483 | MPTS_UNLOCK(mpts); | |
484 | continue; | |
485 | } | |
486 | ||
487 | /* There can only be one subflow in degraded state */ | |
488 | if (mpts->mpts_flags & MPTSF_MP_DEGRADED) { | |
489 | MPTS_UNLOCK(mpts); | |
3e170ce0 | 490 | best = mpts; |
39236c6e A |
491 | break; |
492 | } | |
493 | ||
fe8ab488 | 494 | /* |
490019cf | 495 | * Subflows with TFO or Fastjoin allow data to be written before |
fe8ab488 A |
496 | * the subflow is mp capable. |
497 | */ | |
498 | if (!(mpts->mpts_flags & MPTSF_MP_CAPABLE) && | |
490019cf A |
499 | !(mpts->mpts_flags & MPTSF_FASTJ_REQD) && |
500 | !(mpts->mpts_flags & MPTSF_TFO_REQD)) { | |
39236c6e A |
501 | MPTS_UNLOCK(mpts); |
502 | continue; | |
503 | } | |
504 | ||
505 | if (mpts->mpts_flags & MPTSF_SUSPENDED) { | |
506 | MPTS_UNLOCK(mpts); | |
507 | continue; | |
508 | } | |
509 | ||
fe8ab488 A |
510 | if ((mpts->mpts_flags & MPTSF_DISCONNECTED) || |
511 | (mpts->mpts_flags & MPTSF_DISCONNECTING)) { | |
512 | MPTS_UNLOCK(mpts); | |
513 | continue; | |
514 | } | |
515 | ||
39236c6e A |
516 | if (mpts->mpts_flags & MPTSF_FAILINGOVER) { |
517 | so = mpts->mpts_socket; | |
518 | if ((so) && (!(so->so_flags & SOF_PCBCLEARING))) { | |
519 | socket_lock(so, 1); | |
fe8ab488 A |
520 | if ((so->so_snd.sb_cc == 0) && |
521 | (mptcp_no_rto_spike(so))) { | |
39236c6e A |
522 | mpts->mpts_flags &= ~MPTSF_FAILINGOVER; |
523 | so->so_flags &= ~SOF_MP_TRYFAILOVER; | |
39236c6e A |
524 | socket_unlock(so, 1); |
525 | } else { | |
3e170ce0 A |
526 | third_best = mpts; |
527 | mptcplog((LOG_DEBUG, "MPTCP Sender: " | |
528 | "%s cid %d in failover\n", | |
529 | __func__, third_best->mpts_connid), | |
530 | MPTCP_SENDER_DBG, | |
531 | MPTCP_LOGLVL_VERBOSE); | |
39236c6e A |
532 | socket_unlock(so, 1); |
533 | MPTS_UNLOCK(mpts); | |
534 | continue; | |
535 | } | |
536 | } else { | |
537 | MPTS_UNLOCK(mpts); | |
538 | continue; | |
539 | } | |
540 | } | |
541 | ||
3e170ce0 A |
542 | /* When there are no preferred flows, use first one in list */ |
543 | if ((!second_best) && !(mpts->mpts_flags & MPTSF_PREFERRED)) | |
544 | second_best = mpts; | |
545 | ||
39236c6e | 546 | if (mpts->mpts_flags & MPTSF_PREFERRED) { |
3e170ce0 | 547 | best = mpts; |
39236c6e A |
548 | } |
549 | ||
39236c6e A |
550 | MPTS_UNLOCK(mpts); |
551 | } | |
3e170ce0 | 552 | |
39236c6e A |
553 | /* |
554 | * If there is no preferred or backup subflow, and there is no active | |
555 | * subflow use the last usable subflow. | |
556 | */ | |
3e170ce0 A |
557 | if (best == NULL) { |
558 | return (second_best ? second_best : third_best); | |
39236c6e A |
559 | } |
560 | ||
3e170ce0 A |
561 | if (second_best == NULL) { |
562 | return (best ? best : third_best); | |
563 | } | |
564 | ||
565 | if (preferred != NULL) | |
566 | *preferred = best; | |
567 | ||
568 | /* Use a hint from symptomsd if it exists */ | |
569 | symptoms_best = mptcp_use_symptoms_hints(best, second_best); | |
570 | if (symptoms_best != NULL) | |
571 | return (symptoms_best); | |
572 | ||
573 | /* Compare RTTs, select second_best if best's rtt exceeds rttthresh */ | |
574 | if ((mptcp_use_rtthist) && | |
575 | (best->mpts_srtt) && (second_best->mpts_srtt) && | |
576 | (best->mpts_srtt > second_best->mpts_srtt) && | |
577 | (best->mpts_srtt >= MAX((MPTCP_RTTHIST_MINTHRESH << 5), | |
578 | (mptcp_rtthist_rtthresh << 5)))) { | |
579 | tcpstat.tcps_mp_sel_rtt++; | |
580 | mptcplog((LOG_DEBUG, "MPTCP Sender: %s best cid %d" | |
581 | " at rtt %d, second cid %d at rtt %d\n", __func__, | |
582 | best->mpts_connid, best->mpts_srtt >> 5, | |
583 | second_best->mpts_connid, | |
584 | second_best->mpts_srtt >> 5), | |
585 | MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG); | |
586 | return (second_best); | |
587 | } | |
588 | ||
589 | /* Compare RTOs, select second_best if best's rto exceeds rtothresh */ | |
590 | if ((mptcp_use_rto) && | |
591 | (best->mpts_rxtcur) && (second_best->mpts_rxtcur) && | |
592 | (best->mpts_rxtcur > second_best->mpts_rxtcur) && | |
593 | (best->mpts_rxtcur >= | |
594 | MAX(MPTCP_RTO_MINTHRESH, mptcp_rtothresh))) { | |
595 | tcpstat.tcps_mp_sel_rto++; | |
596 | mptcplog((LOG_DEBUG, "MPTCP Sender: %s best cid %d" | |
597 | " at rto %d, second cid %d at rto %d\n", __func__, | |
598 | best->mpts_connid, best->mpts_rxtcur, | |
599 | second_best->mpts_connid, second_best->mpts_rxtcur), | |
600 | MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG); | |
601 | ||
602 | return (second_best); | |
603 | } | |
604 | ||
605 | /* If second_best received data, use second_best */ | |
606 | if (mptcp_peerswitch && | |
607 | (second_best->mpts_peerswitch > | |
608 | MAX(MPTCP_PEERSWITCH_CNTMIN, mptcp_peerswitch_cnt))) { | |
609 | tcpstat.tcps_mp_sel_peer++; | |
610 | mptcplog((LOG_DEBUG, "MPTCP Sender: %s: best cid %d" | |
611 | " but using cid %d after receiving %d segments\n", | |
612 | __func__, best->mpts_connid, second_best->mpts_connid, | |
613 | second_best->mpts_peerswitch), MPTCP_SENDER_DBG, | |
614 | MPTCP_LOGLVL_LOG); | |
615 | return (second_best); | |
616 | } | |
617 | return (best); | |
39236c6e A |
618 | } |
619 | ||
fe8ab488 A |
620 | struct mptsub * |
621 | mptcp_get_pending_subflow(struct mptses *mpte, struct mptsub *ignore) | |
622 | { | |
623 | struct mptsub *mpts = NULL; | |
39037602 | 624 | |
fe8ab488 A |
625 | MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ |
626 | ||
627 | TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { | |
628 | MPTS_LOCK(mpts); | |
629 | ||
630 | if ((ignore) && (mpts == ignore)) { | |
631 | MPTS_UNLOCK(mpts); | |
632 | continue; | |
633 | } | |
634 | ||
635 | if (mpts->mpts_flags & MPTSF_CONNECT_PENDING) { | |
636 | MPTS_UNLOCK(mpts); | |
637 | break; | |
638 | } | |
639 | ||
640 | MPTS_UNLOCK(mpts); | |
641 | } | |
642 | return (mpts); | |
643 | } | |
644 | ||
3e170ce0 A |
645 | static const char * |
646 | mptcp_event_to_str(uint32_t event) | |
647 | { | |
648 | const char *c = "UNDEFINED"; | |
649 | switch (event) { | |
650 | case MPCE_CLOSE: | |
651 | c = "MPCE_CLOSE"; | |
652 | break; | |
653 | case MPCE_RECV_DATA_ACK: | |
654 | c = "MPCE_RECV_DATA_ACK"; | |
655 | break; | |
656 | case MPCE_RECV_DATA_FIN: | |
657 | c = "MPCE_RECV_DATA_FIN"; | |
658 | break; | |
659 | } | |
660 | return (c); | |
661 | } | |
662 | ||
663 | static const char * | |
664 | mptcp_state_to_str(mptcp_state_t state) | |
665 | { | |
666 | const char *c = "UNDEFINED"; | |
667 | switch (state) { | |
668 | case MPTCPS_CLOSED: | |
669 | c = "MPTCPS_CLOSED"; | |
670 | break; | |
671 | case MPTCPS_LISTEN: | |
672 | c = "MPTCPS_LISTEN"; | |
673 | break; | |
674 | case MPTCPS_ESTABLISHED: | |
675 | c = "MPTCPS_ESTABLISHED"; | |
676 | break; | |
677 | case MPTCPS_CLOSE_WAIT: | |
678 | c = "MPTCPS_CLOSE_WAIT"; | |
679 | break; | |
680 | case MPTCPS_FIN_WAIT_1: | |
681 | c = "MPTCPS_FIN_WAIT_1"; | |
682 | break; | |
683 | case MPTCPS_CLOSING: | |
684 | c = "MPTCPS_CLOSING"; | |
685 | break; | |
686 | case MPTCPS_LAST_ACK: | |
687 | c = "MPTCPS_LAST_ACK"; | |
688 | break; | |
689 | case MPTCPS_FIN_WAIT_2: | |
690 | c = "MPTCPS_FIN_WAIT_2"; | |
691 | break; | |
692 | case MPTCPS_TIME_WAIT: | |
693 | c = "MPTCPS_TIME_WAIT"; | |
694 | break; | |
3e170ce0 A |
695 | case MPTCPS_TERMINATE: |
696 | c = "MPTCPS_TERMINATE"; | |
697 | break; | |
698 | } | |
699 | return (c); | |
700 | } | |
701 | ||
39236c6e A |
702 | void |
703 | mptcp_close_fsm(struct mptcb *mp_tp, uint32_t event) | |
704 | { | |
705 | MPT_LOCK_ASSERT_HELD(mp_tp); | |
3e170ce0 | 706 | mptcp_state_t old_state = mp_tp->mpt_state; |
39236c6e | 707 | |
39037602 | 708 | DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp, |
39236c6e A |
709 | uint32_t, event); |
710 | ||
711 | switch (mp_tp->mpt_state) { | |
712 | case MPTCPS_CLOSED: | |
713 | case MPTCPS_LISTEN: | |
714 | mp_tp->mpt_state = MPTCPS_CLOSED; | |
715 | break; | |
716 | ||
717 | case MPTCPS_ESTABLISHED: | |
fe8ab488 | 718 | if (event == MPCE_CLOSE) { |
39236c6e | 719 | mp_tp->mpt_state = MPTCPS_FIN_WAIT_1; |
fe8ab488 | 720 | mp_tp->mpt_sndmax += 1; /* adjust for Data FIN */ |
39037602 | 721 | } else if (event == MPCE_RECV_DATA_FIN) { |
fe8ab488 | 722 | mp_tp->mpt_rcvnxt += 1; /* adj remote data FIN */ |
39236c6e | 723 | mp_tp->mpt_state = MPTCPS_CLOSE_WAIT; |
39037602 | 724 | } |
39236c6e A |
725 | break; |
726 | ||
727 | case MPTCPS_CLOSE_WAIT: | |
fe8ab488 | 728 | if (event == MPCE_CLOSE) { |
39236c6e | 729 | mp_tp->mpt_state = MPTCPS_LAST_ACK; |
fe8ab488 | 730 | mp_tp->mpt_sndmax += 1; /* adjust for Data FIN */ |
39037602 | 731 | } |
39236c6e A |
732 | break; |
733 | ||
734 | case MPTCPS_FIN_WAIT_1: | |
39037602 | 735 | if (event == MPCE_RECV_DATA_ACK) { |
39236c6e | 736 | mp_tp->mpt_state = MPTCPS_FIN_WAIT_2; |
39037602 | 737 | } else if (event == MPCE_RECV_DATA_FIN) { |
fe8ab488 | 738 | mp_tp->mpt_rcvnxt += 1; /* adj remote data FIN */ |
39236c6e | 739 | mp_tp->mpt_state = MPTCPS_CLOSING; |
39037602 | 740 | } |
39236c6e A |
741 | break; |
742 | ||
743 | case MPTCPS_CLOSING: | |
744 | if (event == MPCE_RECV_DATA_ACK) | |
745 | mp_tp->mpt_state = MPTCPS_TIME_WAIT; | |
746 | break; | |
747 | ||
748 | case MPTCPS_LAST_ACK: | |
749 | if (event == MPCE_RECV_DATA_ACK) | |
fe8ab488 | 750 | mp_tp->mpt_state = MPTCPS_TERMINATE; |
39236c6e A |
751 | break; |
752 | ||
753 | case MPTCPS_FIN_WAIT_2: | |
fe8ab488 A |
754 | if (event == MPCE_RECV_DATA_FIN) { |
755 | mp_tp->mpt_rcvnxt += 1; /* adj remote data FIN */ | |
39236c6e | 756 | mp_tp->mpt_state = MPTCPS_TIME_WAIT; |
39037602 | 757 | } |
39236c6e A |
758 | break; |
759 | ||
760 | case MPTCPS_TIME_WAIT: | |
761 | break; | |
762 | ||
fe8ab488 | 763 | case MPTCPS_TERMINATE: |
39236c6e | 764 | break; |
39236c6e A |
765 | default: |
766 | VERIFY(0); | |
767 | /* NOTREACHED */ | |
768 | } | |
39037602 | 769 | DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp, |
39236c6e | 770 | uint32_t, event); |
3e170ce0 A |
771 | mptcplog((LOG_INFO, "MPTCP State: %s to %s on event %s\n", |
772 | mptcp_state_to_str(old_state), | |
773 | mptcp_state_to_str(mp_tp->mpt_state), | |
774 | mptcp_event_to_str(event)), | |
775 | MPTCP_STATE_DBG, MPTCP_LOGLVL_LOG); | |
39236c6e A |
776 | } |
777 | ||
778 | /* | |
779 | * Update the mptcb send state variables, but the actual sbdrop occurs | |
780 | * in MPTCP layer | |
781 | */ | |
782 | void | |
783 | mptcp_data_ack_rcvd(struct mptcb *mp_tp, struct tcpcb *tp, u_int64_t full_dack) | |
784 | { | |
785 | u_int64_t acked = 0; | |
786 | ||
787 | acked = full_dack - mp_tp->mpt_snduna; | |
788 | ||
789 | if (acked) { | |
790 | mp_tp->mpt_snduna += acked; | |
fe8ab488 A |
791 | /* In degraded mode, we may get some Data ACKs */ |
792 | if ((tp->t_mpflags & TMPF_TCP_FALLBACK) && | |
793 | !(mp_tp->mpt_flags & MPTCPF_POST_FALLBACK_SYNC) && | |
794 | MPTCP_SEQ_GT(mp_tp->mpt_sndnxt, mp_tp->mpt_snduna)) { | |
795 | /* bring back sndnxt to retransmit MPTCP data */ | |
796 | mp_tp->mpt_sndnxt = mp_tp->mpt_dsn_at_csum_fail; | |
797 | mp_tp->mpt_flags |= MPTCPF_POST_FALLBACK_SYNC; | |
39037602 | 798 | tp->t_inpcb->inp_socket->so_flags1 |= |
fe8ab488 A |
799 | SOF1_POST_FALLBACK_SYNC; |
800 | } | |
39236c6e A |
801 | } |
802 | if ((full_dack == mp_tp->mpt_sndmax) && | |
803 | (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_1)) { | |
804 | mptcp_close_fsm(mp_tp, MPCE_RECV_DATA_ACK); | |
805 | tp->t_mpflags &= ~TMPF_SEND_DFIN; | |
806 | } | |
807 | } | |
808 | ||
809 | /* If you change this function, match up mptcp_update_rcv_state_f */ | |
810 | void | |
811 | mptcp_update_dss_rcv_state(struct mptcp_dsn_opt *dss_info, struct tcpcb *tp, | |
812 | uint16_t csum) | |
813 | { | |
814 | struct mptcb *mp_tp = tptomptp(tp); | |
815 | u_int64_t full_dsn = 0; | |
816 | ||
817 | NTOHL(dss_info->mdss_dsn); | |
818 | NTOHL(dss_info->mdss_subflow_seqn); | |
819 | NTOHS(dss_info->mdss_data_len); | |
820 | ||
821 | /* XXX for autosndbuf grow sb here */ | |
822 | MPT_LOCK(mp_tp); | |
823 | MPTCP_EXTEND_DSN(mp_tp->mpt_rcvnxt, dss_info->mdss_dsn, full_dsn); | |
824 | MPT_UNLOCK(mp_tp); | |
825 | mptcp_update_rcv_state_meat(mp_tp, tp, | |
826 | full_dsn, dss_info->mdss_subflow_seqn, dss_info->mdss_data_len, | |
827 | csum); | |
828 | ||
829 | } | |
830 | ||
831 | void | |
832 | mptcp_update_rcv_state_meat(struct mptcb *mp_tp, struct tcpcb *tp, | |
833 | u_int64_t full_dsn, u_int32_t seqn, u_int16_t mdss_data_len, | |
834 | uint16_t csum) | |
835 | { | |
836 | if (mdss_data_len == 0) { | |
3e170ce0 A |
837 | mptcplog((LOG_INFO, "MPTCP Receiver: Infinite Mapping.\n"), |
838 | MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_LOG); | |
839 | ||
39236c6e | 840 | if ((mp_tp->mpt_flags & MPTCPF_CHECKSUM) && (csum != 0)) { |
3e170ce0 A |
841 | mptcplog((LOG_ERR, "MPTCP Receiver: Bad checksum %x \n", |
842 | csum), MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_ERR); | |
39236c6e A |
843 | } |
844 | mptcp_notify_mpfail(tp->t_inpcb->inp_socket); | |
845 | return; | |
846 | } | |
847 | MPT_LOCK(mp_tp); | |
3e170ce0 A |
848 | mptcplog((LOG_DEBUG, |
849 | "MPTCP Receiver: seqn = %x len = %x full = %llx " | |
850 | "rcvnxt = %llu \n", | |
851 | seqn, mdss_data_len, full_dsn, mp_tp->mpt_rcvnxt), | |
852 | MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE); | |
39236c6e A |
853 | |
854 | /* Process a Data FIN packet , handled in mptcp_do_fin_opt */ | |
855 | if ((seqn == 0) && (mdss_data_len == 1)) { | |
3e170ce0 A |
856 | mptcplog((LOG_INFO, "MPTCP Receiver: Data FIN in %s state \n", |
857 | mptcp_state_to_str(mp_tp->mpt_state)), | |
858 | MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_LOG); | |
39236c6e A |
859 | MPT_UNLOCK(mp_tp); |
860 | return; | |
861 | } | |
862 | MPT_UNLOCK(mp_tp); | |
863 | mptcp_notify_mpready(tp->t_inpcb->inp_socket); | |
864 | tp->t_rcv_map.mpt_dsn = full_dsn; | |
865 | tp->t_rcv_map.mpt_sseq = seqn; | |
866 | tp->t_rcv_map.mpt_len = mdss_data_len; | |
867 | tp->t_rcv_map.mpt_csum = csum; | |
868 | tp->t_mpflags |= TMPF_EMBED_DSN; | |
869 | } | |
870 | ||
871 | ||
872 | void | |
873 | mptcp_update_rcv_state_f(struct mptcp_dss_ack_opt *dss_info, struct tcpcb *tp, | |
874 | uint16_t csum) | |
875 | { | |
876 | u_int64_t full_dsn = 0; | |
877 | struct mptcb *mp_tp = tptomptp(tp); | |
878 | ||
490019cf A |
879 | /* |
880 | * May happen, because the caller of this function does an soevent. | |
881 | * Review after rdar://problem/24083886 | |
882 | */ | |
883 | if (!mp_tp) | |
884 | return; | |
885 | ||
39236c6e A |
886 | NTOHL(dss_info->mdss_dsn); |
887 | NTOHL(dss_info->mdss_subflow_seqn); | |
888 | NTOHS(dss_info->mdss_data_len); | |
889 | MPT_LOCK(mp_tp); | |
890 | MPTCP_EXTEND_DSN(mp_tp->mpt_rcvnxt, dss_info->mdss_dsn, full_dsn); | |
891 | MPT_UNLOCK(mp_tp); | |
892 | mptcp_update_rcv_state_meat(mp_tp, tp, | |
893 | full_dsn, | |
894 | dss_info->mdss_subflow_seqn, | |
895 | dss_info->mdss_data_len, | |
896 | csum); | |
897 | } | |
898 | ||
899 | void | |
900 | mptcp_update_rcv_state_g(struct mptcp_dss64_ack32_opt *dss_info, | |
901 | struct tcpcb *tp, uint16_t csum) | |
902 | { | |
903 | u_int64_t dsn = mptcp_ntoh64(dss_info->mdss_dsn); | |
904 | struct mptcb *mp_tp = tptomptp(tp); | |
905 | ||
490019cf A |
906 | /* |
907 | * May happen, because the caller of this function does an soevent. | |
908 | * Review after rdar://problem/24083886 | |
909 | */ | |
910 | if (!mp_tp) | |
911 | return; | |
912 | ||
39236c6e A |
913 | NTOHL(dss_info->mdss_subflow_seqn); |
914 | NTOHS(dss_info->mdss_data_len); | |
915 | mptcp_update_rcv_state_meat(mp_tp, tp, | |
916 | dsn, | |
917 | dss_info->mdss_subflow_seqn, | |
918 | dss_info->mdss_data_len, | |
919 | csum); | |
920 | } | |
921 | ||
3e170ce0 A |
922 | static int |
923 | mptcp_validate_dss_map(struct socket *so, struct tcpcb *tp, struct mbuf *m, | |
924 | int hdrlen) | |
925 | { | |
926 | u_int32_t sseq, datalen; | |
927 | ||
928 | if (!(m->m_pkthdr.pkt_flags & PKTF_MPTCP)) | |
929 | return 0; | |
930 | ||
931 | sseq = m->m_pkthdr.mp_rseq + tp->irs; | |
932 | datalen = m->m_pkthdr.mp_rlen; | |
933 | ||
934 | #if 0 | |
935 | /* enable this to test TCP fallback post connection establishment */ | |
936 | if (SEQ_GT(sseq, (tp->irs+1))) | |
937 | datalen = m->m_pkthdr.len - hdrlen - 1; | |
938 | #endif | |
939 | ||
940 | /* unacceptable DSS option, fallback to TCP */ | |
941 | if (m->m_pkthdr.len > ((int) datalen + hdrlen)) { | |
942 | mptcplog((LOG_ERR, "MPTCP Receiver: " | |
943 | "%s: mbuf len %d, MPTCP expected %d", | |
944 | __func__, m->m_pkthdr.len, datalen), | |
945 | MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_LOG); | |
946 | } else { | |
947 | return 0; | |
948 | } | |
949 | tp->t_mpflags |= TMPF_SND_MPFAIL; | |
950 | mptcp_notify_mpfail(so); | |
951 | m_freem(m); | |
952 | return -1; | |
953 | } | |
954 | ||
955 | int | |
956 | mptcp_input_preproc(struct tcpcb *tp, struct mbuf *m, int drop_hdrlen) | |
957 | { | |
958 | if (mptcp_validate_csum(tp, m, drop_hdrlen) != 0) | |
959 | return -1; | |
960 | ||
961 | mptcp_insert_rmap(tp, m); | |
962 | if (mptcp_validate_dss_map(tp->t_inpcb->inp_socket, tp, m, | |
963 | drop_hdrlen) != 0) | |
964 | return -1; | |
965 | return 0; | |
966 | } | |
967 | ||
39236c6e A |
968 | /* |
969 | * MPTCP Checksum support | |
970 | * The checksum is calculated whenever the MPTCP DSS option is included | |
971 | * in the TCP packet. The checksum includes the sum of the MPTCP psuedo | |
972 | * header and the actual data indicated by the length specified in the | |
973 | * DSS option. | |
974 | */ | |
975 | ||
3e170ce0 A |
976 | static int |
977 | mptcp_validate_csum(struct tcpcb *tp, struct mbuf *m, int drop_hdrlen) | |
978 | { | |
979 | uint16_t mptcp_csum = 0; | |
980 | mptcp_csum = mptcp_input_csum(tp, m, drop_hdrlen); | |
981 | if (mptcp_csum) { | |
982 | tp->t_mpflags |= TMPF_SND_MPFAIL; | |
983 | tp->t_mpflags &= ~TMPF_EMBED_DSN; | |
984 | mptcp_notify_mpfail(tp->t_inpcb->inp_socket); | |
985 | m_freem(m); | |
986 | tcpstat.tcps_mp_badcsum++; | |
987 | return -1; | |
988 | } | |
989 | return 0; | |
990 | } | |
991 | ||
992 | static uint16_t | |
39236c6e A |
993 | mptcp_input_csum(struct tcpcb *tp, struct mbuf *m, int off) |
994 | { | |
995 | struct mptcb *mp_tp = tptomptp(tp); | |
996 | uint32_t sum = 0; | |
997 | uint64_t dsn; | |
998 | uint32_t sseq; | |
999 | uint16_t len; | |
1000 | uint16_t csum; | |
1001 | ||
1002 | if (mp_tp == NULL) | |
1003 | return (0); | |
1004 | ||
1005 | if (!(mp_tp->mpt_flags & MPTCPF_CHECKSUM)) | |
1006 | return (0); | |
1007 | ||
1008 | if (!(tp->t_mpflags & TMPF_EMBED_DSN)) | |
1009 | return (0); | |
1010 | ||
1011 | if (tp->t_mpflags & TMPF_TCP_FALLBACK) | |
1012 | return (0); | |
1013 | ||
39037602 | 1014 | /* |
39236c6e A |
1015 | * The remote side may send a packet with fewer bytes than the |
1016 | * claimed DSS checksum length. | |
1017 | */ | |
1018 | if ((int)m_length2(m, NULL) < (off + tp->t_rcv_map.mpt_len)) | |
1019 | return (0xffff); | |
1020 | ||
1021 | if (tp->t_rcv_map.mpt_len != 0) | |
1022 | sum = m_sum16(m, off, tp->t_rcv_map.mpt_len); | |
1023 | ||
1024 | dsn = mptcp_hton64(tp->t_rcv_map.mpt_dsn); | |
1025 | sseq = htonl(tp->t_rcv_map.mpt_sseq); | |
1026 | len = htons(tp->t_rcv_map.mpt_len); | |
1027 | csum = tp->t_rcv_map.mpt_csum; | |
1028 | sum += in_pseudo64(dsn, sseq, (len + csum)); | |
1029 | ADDCARRY(sum); | |
1030 | DTRACE_MPTCP3(checksum__result, struct tcpcb *, tp, struct mbuf *, m, | |
1031 | uint32_t, sum); | |
3e170ce0 A |
1032 | mptcplog((LOG_DEBUG, "MPTCP Receiver: sum = %x \n", sum), |
1033 | MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE); | |
39236c6e A |
1034 | return (~sum & 0xffff); |
1035 | } | |
1036 | ||
1037 | void | |
1038 | mptcp_output_csum(struct tcpcb *tp, struct mbuf *m, int32_t len, | |
1039 | unsigned hdrlen, u_int64_t dss_val, u_int32_t *sseqp) | |
1040 | { | |
1041 | struct mptcb *mp_tp = tptomptp(tp); | |
1042 | u_int32_t sum = 0; | |
1043 | uint32_t sseq; | |
1044 | uint16_t dss_len; | |
1045 | uint16_t csum = 0; | |
1046 | uint16_t *csump = NULL; | |
1047 | ||
1048 | if (mp_tp == NULL) | |
1049 | return; | |
1050 | ||
1051 | if (!(mp_tp->mpt_flags & MPTCPF_CHECKSUM)) | |
1052 | return; | |
1053 | ||
1054 | if (sseqp == NULL) | |
1055 | return; | |
1056 | ||
1057 | if (len) | |
1058 | sum = m_sum16(m, hdrlen, len); | |
1059 | ||
1060 | dss_val = mptcp_hton64(dss_val); | |
1061 | sseq = *sseqp; | |
1062 | dss_len = *(uint16_t *)(void *)((u_char*)sseqp + sizeof (u_int32_t)); | |
1063 | sum += in_pseudo64(dss_val, sseq, (dss_len + csum)); | |
1064 | ||
1065 | ADDCARRY(sum); | |
1066 | sum = ~sum & 0xffff; | |
1067 | csump = (uint16_t *)(void *)((u_char*)sseqp + sizeof (u_int32_t) + | |
1068 | sizeof (uint16_t)); | |
1069 | DTRACE_MPTCP3(checksum__result, struct tcpcb *, tp, struct mbuf *, m, | |
1070 | uint32_t, sum); | |
1071 | *csump = sum; | |
3e170ce0 A |
1072 | mptcplog((LOG_DEBUG, "MPTCP Sender: sum = %x \n", sum), |
1073 | MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE); | |
1074 | } | |
1075 | ||
1076 | /* | |
1077 | * When WiFi signal starts fading, there's more loss and RTT spikes. | |
1078 | * Check if there has been a large spike by comparing against | |
1079 | * a tolerable RTT spike threshold. | |
1080 | */ | |
1081 | boolean_t | |
1082 | mptcp_no_rto_spike(struct socket *so) | |
1083 | { | |
1084 | struct tcpcb *tp = intotcpcb(sotoinpcb(so)); | |
1085 | int32_t spike = 0; | |
1086 | ||
1087 | if (tp->t_rxtcur > MAX(mptcp_rtothresh, MPTCP_RTO_MINTHRESH)) { | |
1088 | spike = tp->t_rxtcur - mptcp_rtothresh; | |
1089 | ||
1090 | mptcplog((LOG_DEBUG, "MPTCP Socket: %s: spike = %d rto = %d" | |
1091 | "best = %d cur = %d\n", __func__, spike, | |
1092 | tp->t_rxtcur, tp->t_rttbest >> TCP_RTT_SHIFT, | |
1093 | tp->t_rttcur), | |
1094 | (MPTCP_SOCKET_DBG|MPTCP_SENDER_DBG), MPTCP_LOGLVL_LOG); | |
1095 | ||
1096 | } | |
1097 | ||
1098 | if (spike > 0 ) { | |
1099 | return (FALSE); | |
1100 | } else { | |
1101 | return (TRUE); | |
1102 | } | |
39236c6e | 1103 | } |