]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/mptcp.c
xnu-2782.30.5.tar.gz
[apple/xnu.git] / bsd / netinet / mptcp.c
1 /*
2 * Copyright (c) 2012-2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/kernel.h>
32 #include <sys/mbuf.h>
33 #include <sys/mcache.h>
34 #include <sys/socket.h>
35 #include <sys/socketvar.h>
36 #include <sys/syslog.h>
37 #include <sys/protosw.h>
38
39 #include <kern/zalloc.h>
40 #include <kern/locks.h>
41
42 #include <mach/thread_act.h>
43 #include <mach/sdt.h>
44
45 #include <dev/random/randomdev.h>
46
47 #include <net/if.h>
48 #include <netinet/in.h>
49 #include <netinet/in_var.h>
50 #include <netinet/tcp.h>
51 #include <netinet/tcp_fsm.h>
52 #include <netinet/tcp_seq.h>
53 #include <netinet/tcp_var.h>
54 #include <netinet/mptcp_var.h>
55 #include <netinet/mptcp.h>
56 #include <netinet/mptcp_seq.h>
57 #include <netinet/mptcp_opt.h>
58 #include <netinet/mptcp_timer.h>
59
60 int mptcp_enable = 1;
61 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, enable, CTLFLAG_RW | CTLFLAG_LOCKED,
62 &mptcp_enable, 0, "Enable Multipath TCP Support");
63
64 int mptcp_dbg = 0;
65 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
66 &mptcp_dbg, 0, "Enable Multipath TCP Debugging");
67
68 /* Number of times to try negotiating MPTCP on SYN retransmissions */
69 int mptcp_mpcap_retries = MPTCP_CAPABLE_RETRIES;
70 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, mptcp_cap_retr,
71 CTLFLAG_RW | CTLFLAG_LOCKED,
72 &mptcp_mpcap_retries, 0, "Number of MP Capable SYN Retries");
73
74 /*
75 * By default, DSS checksum is turned off, revisit if we ever do
76 * MPTCP for non SSL Traffic.
77 */
78 int mptcp_dss_csum = 0;
79 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, dss_csum, CTLFLAG_RW | CTLFLAG_LOCKED,
80 &mptcp_dss_csum, 0, "Enable DSS checksum");
81
82 /*
83 * When mptcp_fail_thresh number of retransmissions are sent, subflow failover
84 * is attempted on a different path.
85 */
86 int mptcp_fail_thresh = 1;
87 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, fail, CTLFLAG_RW | CTLFLAG_LOCKED,
88 &mptcp_fail_thresh, 0, "Failover threshold");
89
90
91 /*
92 * MPTCP subflows have TCP keepalives set to ON. Set a conservative keeptime
93 * as carrier networks mostly have a 30 minute to 60 minute NAT Timeout.
94 * Some carrier networks have a timeout of 10 or 15 minutes.
95 */
96 int mptcp_subflow_keeptime = 60*14;
97 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, keepalive, CTLFLAG_RW | CTLFLAG_LOCKED,
98 &mptcp_subflow_keeptime, 0, "Keepalive in seconds");
99
100 /*
101 * MP_PRIO option.
102 */
103 int mptcp_mpprio_enable = 1;
104 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, mpprio, CTLFLAG_RW | CTLFLAG_LOCKED,
105 &mptcp_mpprio_enable, 0, "Enable MP_PRIO option");
106
107 /*
108 * REMOVE_ADDR option.
109 */
110 int mptcp_remaddr_enable = 1;
111 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, remaddr, CTLFLAG_RW | CTLFLAG_LOCKED,
112 &mptcp_remaddr_enable, 0, "Enable REMOVE_ADDR option");
113
114 /*
115 * FastJoin Option
116 */
117 int mptcp_fastjoin = 1;
118 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, fastjoin, CTLFLAG_RW | CTLFLAG_LOCKED,
119 &mptcp_fastjoin, 0, "Enable FastJoin Option");
120
121 int mptcp_zerortt_fastjoin = 0;
122 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, zerortt_fastjoin, CTLFLAG_RW |
123 CTLFLAG_LOCKED, &mptcp_zerortt_fastjoin, 0,
124 "Enable Zero RTT Fast Join");
125
126 /*
127 * R/W Notification on resume
128 */
129 int mptcp_rwnotify = 0;
130 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, rwnotify, CTLFLAG_RW | CTLFLAG_LOCKED,
131 &mptcp_rwnotify, 0, "Enable RW notify on resume");
132
133 /*
134 * MPTCP input, called when data has been read from a subflow socket.
135 */
136 void
137 mptcp_input(struct mptses *mpte, struct mbuf *m)
138 {
139 struct socket *mp_so;
140 struct mptcb *mp_tp = NULL;
141 u_int64_t mb_dsn;
142 u_int32_t mb_datalen;
143 int count = 0;
144 struct mbuf *save = NULL, *prev = NULL;
145 struct mbuf *freelist = NULL, *tail = NULL;
146 boolean_t in_fallback = FALSE;
147
148 VERIFY(m->m_flags & M_PKTHDR);
149
150 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
151 mp_so = mpte->mpte_mppcb->mpp_socket;
152
153 DTRACE_MPTCP(input);
154
155 /*
156 * Each mbuf contains MPTCP Data Sequence Map
157 * Process the data for reassembly, delivery to MPTCP socket
158 * client, etc.
159 *
160 */
161 count = mp_so->so_rcv.sb_cc;
162
163 VERIFY(m != NULL);
164 mp_tp = mpte->mpte_mptcb;
165 VERIFY(mp_tp != NULL);
166
167 /* Ok to check for this flag without lock as its set in this thread */
168 in_fallback = (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP);
169
170 /*
171 * In the degraded fallback case, data is accepted without DSS map
172 */
173 if (in_fallback) {
174 fallback:
175 /*
176 * assume degraded flow as this may be the first packet
177 * without DSS, and the subflow state is not updated yet.
178 */
179 if (sbappendstream(&mp_so->so_rcv, m))
180 sorwakeup(mp_so);
181 DTRACE_MPTCP5(receive__degraded, struct mbuf *, m,
182 struct socket *, mp_so,
183 struct sockbuf *, &mp_so->so_rcv,
184 struct sockbuf *, &mp_so->so_snd,
185 struct mptses *, mpte);
186 count = mp_so->so_rcv.sb_cc - count;
187 mptcplog3((LOG_DEBUG, "%s: fread %d bytes\n", __func__, count));
188 return;
189 }
190
191 MPT_LOCK(mp_tp);
192 do {
193 /* If fallback occurs, mbufs will not have PKTF_MPTCP set */
194 if (!(m->m_pkthdr.pkt_flags & PKTF_MPTCP)) {
195 MPT_UNLOCK(mp_tp);
196 goto fallback;
197 }
198
199 save = m->m_next;
200 /*
201 * A single TCP packet formed of multiple mbufs
202 * holds DSS mapping in the first mbuf of the chain.
203 * Other mbufs in the chain may have M_PKTHDR set
204 * even though they belong to the same TCP packet
205 * and therefore use the DSS mapping stored in the
206 * first mbuf of the mbuf chain. mptcp_input() can
207 * get an mbuf chain with multiple TCP packets.
208 */
209 while (save && (!(save->m_flags & M_PKTHDR) ||
210 !(save->m_pkthdr.pkt_flags & PKTF_MPTCP))) {
211 prev = save;
212 save = save->m_next;
213 }
214 if (prev)
215 prev->m_next = NULL;
216 else
217 m->m_next = NULL;
218
219 mb_dsn = m->m_pkthdr.mp_dsn;
220 mb_datalen = m->m_pkthdr.mp_rlen;
221
222 if (MPTCP_SEQ_GT(mb_dsn, mp_tp->mpt_rcvatmark)) {
223 tcpstat.tcps_mp_oodata++;
224 MPT_UNLOCK(mp_tp);
225 m_freem(m);
226 return;
227 /*
228 * Reassembly queue support here in future. Per spec,
229 * senders must implement retransmission timer to
230 * retransmit unacked data. Dropping out of order
231 * gives a slight hit on performance but allows us to
232 * deploy MPTCP and protects us against in-window DoS
233 * attacks that attempt to use up memory by sending
234 * out of order data. When doing load sharing across
235 * subflows, out of order support is a must.
236 */
237 }
238
239 if (MPTCP_SEQ_LT(mb_dsn, mp_tp->mpt_rcvatmark)) {
240 if (MPTCP_SEQ_LEQ((mb_dsn + mb_datalen),
241 mp_tp->mpt_rcvatmark)) {
242 if (freelist == NULL)
243 freelist = m;
244 else
245 tail->m_next = m;
246
247 if (prev != NULL)
248 tail = prev;
249 else
250 tail = m;
251
252 m = save;
253 prev = save = NULL;
254 continue;
255 } else {
256 m_adj(m, (mp_tp->mpt_rcvatmark - mb_dsn));
257 }
258 mptcplog((LOG_INFO, "%s: %llu %d 2 \n", __func__,
259 mp_tp->mpt_rcvatmark, m->m_pkthdr.len));
260 }
261
262 MPT_UNLOCK(mp_tp);
263 if (sbappendstream(&mp_so->so_rcv, m)) {
264 sorwakeup(mp_so);
265 }
266 DTRACE_MPTCP6(receive, struct mbuf *, m, struct socket *, mp_so,
267 struct sockbuf *, &mp_so->so_rcv,
268 struct sockbuf *, &mp_so->so_snd,
269 struct mptses *, mpte,
270 struct mptcb *, mp_tp);
271 MPT_LOCK(mp_tp);
272 count = mp_so->so_rcv.sb_cc - count;
273 tcpstat.tcps_mp_rcvtotal++;
274 tcpstat.tcps_mp_rcvbytes += count;
275 mptcplog3((LOG_DEBUG, "%s: read %d bytes\n", __func__, count));
276 /*
277 * The data received at the MPTCP layer will never exceed the
278 * receive window because anything to the right of the
279 * receive window will be trimmed at the subflow level.
280 */
281 mp_tp->mpt_rcvwnd = mptcp_sbspace(mp_tp);
282 mp_tp->mpt_rcvatmark += count;
283 m = save;
284 prev = save = NULL;
285 count = mp_so->so_rcv.sb_cc;
286 } while (m);
287 MPT_UNLOCK(mp_tp);
288
289 if (freelist)
290 m_freem(freelist);
291 }
292
293 /*
294 * MPTCP output.
295 */
296 int
297 mptcp_output(struct mptses *mpte)
298 {
299 struct mptsub *mpts;
300 struct mptsub *mpts_tried = NULL;
301 struct socket *mp_so;
302 int error = 0;
303
304 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
305 mp_so = mpte->mpte_mppcb->mpp_socket;
306 if (mp_so->so_state & SS_CANTSENDMORE) {
307 return (EPIPE);
308 }
309
310 try_again:
311 /* get the "best" subflow to be used for transmission */
312 mpts = mptcp_get_subflow(mpte, NULL);
313 if (mpts == NULL) {
314 mptcplog((LOG_ERR, "%s: mp_so 0x%llx has no usable subflow\n",
315 __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)));
316 goto out;
317 }
318
319 mptcplog3((LOG_INFO, "%s: mp_so 0x%llx cid %d \n", __func__,
320 (uint64_t)VM_KERNEL_ADDRPERM(mp_so), mpts->mpts_connid));
321
322 /* In case there's just one flow, we reattempt later */
323 MPTS_LOCK(mpts);
324 if ((mpts_tried != NULL) && ((mpts == mpts_tried) ||
325 (mpts->mpts_flags & MPTSF_FAILINGOVER))) {
326 MPTS_UNLOCK(mpts);
327 MPTS_LOCK(mpts_tried);
328 mpts_tried->mpts_flags &= ~MPTSF_FAILINGOVER;
329 mpts_tried->mpts_flags |= MPTSF_ACTIVE;
330 MPTS_UNLOCK(mpts_tried);
331 MPT_LOCK(mpte->mpte_mptcb);
332 mptcp_start_timer(mpte->mpte_mptcb, MPTT_REXMT);
333 MPT_UNLOCK(mpte->mpte_mptcb);
334 mptcplog((LOG_INFO, "%s: mp_so 0x%llx retry later\n",
335 __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)));
336 goto out;
337 }
338
339 DTRACE_MPTCP3(output, struct mptses *, mpte, struct mptsub *, mpts,
340 struct socket *, mp_so);
341 error = mptcp_subflow_output(mpte, mpts);
342 if (error) {
343 /* can be a temporary loss of source address or other error */
344 mpts->mpts_flags |= MPTSF_FAILINGOVER;
345 mpts->mpts_flags &= ~MPTSF_ACTIVE;
346 mpts_tried = mpts;
347 MPTS_UNLOCK(mpts);
348 mptcplog((LOG_INFO, "%s: error = %d \n", __func__, error));
349 goto try_again;
350 }
351 /* The model is to have only one active flow at a time */
352 mpts->mpts_flags |= MPTSF_ACTIVE;
353 MPTS_UNLOCK(mpts);
354 if (mpte->mpte_active_sub == NULL) {
355 mpte->mpte_active_sub = mpts;
356 } else if (mpte->mpte_active_sub != mpts) {
357 MPTS_LOCK(mpte->mpte_active_sub);
358 mpte->mpte_active_sub->mpts_flags &= ~MPTSF_ACTIVE;
359 MPTS_UNLOCK(mpte->mpte_active_sub);
360 mpte->mpte_active_sub = mpts;
361 }
362 out:
363 /* subflow errors should not be percolated back up */
364 return (0);
365 }
366
367 /*
368 * Return the most eligible subflow to be used for sending data.
369 * This function also serves to check if any alternate subflow is available
370 * or not.
371 */
372 struct mptsub *
373 mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore)
374 {
375 struct mptsub *mpts;
376 struct mptsub *fallback = NULL;
377 struct socket *so = NULL;
378
379 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
380
381 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
382 MPTS_LOCK(mpts);
383
384 if ((ignore) && (mpts == ignore)) {
385 MPTS_UNLOCK(mpts);
386 continue;
387 }
388
389 /* There can only be one subflow in degraded state */
390 if (mpts->mpts_flags & MPTSF_MP_DEGRADED) {
391 MPTS_UNLOCK(mpts);
392 break;
393 }
394
395 /*
396 * Subflows with Fastjoin allow data to be written before
397 * the subflow is mp capable.
398 */
399 if (!(mpts->mpts_flags & MPTSF_MP_CAPABLE) &&
400 !(mpts->mpts_flags & MPTSF_FASTJ_REQD)) {
401 MPTS_UNLOCK(mpts);
402 continue;
403 }
404
405 if (mpts->mpts_flags & MPTSF_SUSPENDED) {
406 MPTS_UNLOCK(mpts);
407 continue;
408 }
409
410 if ((mpts->mpts_flags & MPTSF_DISCONNECTED) ||
411 (mpts->mpts_flags & MPTSF_DISCONNECTING)) {
412 MPTS_UNLOCK(mpts);
413 continue;
414 }
415
416 if (mpts->mpts_flags & MPTSF_FAILINGOVER) {
417 so = mpts->mpts_socket;
418 if ((so) && (!(so->so_flags & SOF_PCBCLEARING))) {
419 socket_lock(so, 1);
420 if ((so->so_snd.sb_cc == 0) &&
421 (mptcp_no_rto_spike(so))) {
422 mpts->mpts_flags &= ~MPTSF_FAILINGOVER;
423 so->so_flags &= ~SOF_MP_TRYFAILOVER;
424 fallback = mpts;
425 socket_unlock(so, 1);
426 } else {
427 fallback = mpts;
428 socket_unlock(so, 1);
429 MPTS_UNLOCK(mpts);
430 continue;
431 }
432 } else {
433 MPTS_UNLOCK(mpts);
434 continue;
435 }
436 }
437
438 if (mpts->mpts_flags & MPTSF_PREFERRED) {
439 MPTS_UNLOCK(mpts);
440 break;
441 }
442
443 /* When there are no preferred flows, use first one in list */
444 fallback = mpts;
445
446 MPTS_UNLOCK(mpts);
447 }
448 /*
449 * If there is no preferred or backup subflow, and there is no active
450 * subflow use the last usable subflow.
451 */
452 if (mpts == NULL) {
453 return (fallback);
454 }
455
456 return (mpts);
457 }
458
459 struct mptsub *
460 mptcp_get_pending_subflow(struct mptses *mpte, struct mptsub *ignore)
461 {
462 struct mptsub *mpts = NULL;
463
464 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
465
466 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
467 MPTS_LOCK(mpts);
468
469 if ((ignore) && (mpts == ignore)) {
470 MPTS_UNLOCK(mpts);
471 continue;
472 }
473
474 if (mpts->mpts_flags & MPTSF_CONNECT_PENDING) {
475 MPTS_UNLOCK(mpts);
476 break;
477 }
478
479 MPTS_UNLOCK(mpts);
480 }
481 return (mpts);
482 }
483
484 void
485 mptcp_close_fsm(struct mptcb *mp_tp, uint32_t event)
486 {
487 MPT_LOCK_ASSERT_HELD(mp_tp);
488
489 DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp,
490 uint32_t, event);
491
492 switch (mp_tp->mpt_state) {
493 case MPTCPS_CLOSED:
494 case MPTCPS_LISTEN:
495 mp_tp->mpt_state = MPTCPS_CLOSED;
496 break;
497
498 case MPTCPS_ESTABLISHED:
499 if (event == MPCE_CLOSE) {
500 mp_tp->mpt_state = MPTCPS_FIN_WAIT_1;
501 mp_tp->mpt_sndmax += 1; /* adjust for Data FIN */
502 }
503 else if (event == MPCE_RECV_DATA_FIN) {
504 mp_tp->mpt_rcvnxt += 1; /* adj remote data FIN */
505 mp_tp->mpt_state = MPTCPS_CLOSE_WAIT;
506 }
507 break;
508
509 case MPTCPS_CLOSE_WAIT:
510 if (event == MPCE_CLOSE) {
511 mp_tp->mpt_state = MPTCPS_LAST_ACK;
512 mp_tp->mpt_sndmax += 1; /* adjust for Data FIN */
513 }
514 break;
515
516 case MPTCPS_FIN_WAIT_1:
517 if (event == MPCE_RECV_DATA_ACK)
518 mp_tp->mpt_state = MPTCPS_FIN_WAIT_2;
519 else if (event == MPCE_RECV_DATA_FIN) {
520 mp_tp->mpt_rcvnxt += 1; /* adj remote data FIN */
521 mp_tp->mpt_state = MPTCPS_CLOSING;
522 }
523 break;
524
525 case MPTCPS_CLOSING:
526 if (event == MPCE_RECV_DATA_ACK)
527 mp_tp->mpt_state = MPTCPS_TIME_WAIT;
528 break;
529
530 case MPTCPS_LAST_ACK:
531 if (event == MPCE_RECV_DATA_ACK)
532 mp_tp->mpt_state = MPTCPS_TERMINATE;
533 break;
534
535 case MPTCPS_FIN_WAIT_2:
536 if (event == MPCE_RECV_DATA_FIN) {
537 mp_tp->mpt_rcvnxt += 1; /* adj remote data FIN */
538 mp_tp->mpt_state = MPTCPS_TIME_WAIT;
539 }
540 break;
541
542 case MPTCPS_TIME_WAIT:
543 break;
544
545 case MPTCPS_FASTCLOSE_WAIT:
546 if (event == MPCE_CLOSE) {
547 /* no need to adjust for data FIN */
548 mp_tp->mpt_state = MPTCPS_TERMINATE;
549 }
550 break;
551 case MPTCPS_TERMINATE:
552 break;
553 default:
554 VERIFY(0);
555 /* NOTREACHED */
556 }
557 DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp,
558 uint32_t, event);
559 mptcplog((LOG_INFO, "%s: state = %d\n",
560 __func__, mp_tp->mpt_state));
561 }
562
563 /*
564 * Update the mptcb send state variables, but the actual sbdrop occurs
565 * in MPTCP layer
566 */
567 void
568 mptcp_data_ack_rcvd(struct mptcb *mp_tp, struct tcpcb *tp, u_int64_t full_dack)
569 {
570 u_int64_t acked = 0;
571
572 acked = full_dack - mp_tp->mpt_snduna;
573
574 if (acked) {
575 mp_tp->mpt_snduna += acked;
576 /* In degraded mode, we may get some Data ACKs */
577 if ((tp->t_mpflags & TMPF_TCP_FALLBACK) &&
578 !(mp_tp->mpt_flags & MPTCPF_POST_FALLBACK_SYNC) &&
579 MPTCP_SEQ_GT(mp_tp->mpt_sndnxt, mp_tp->mpt_snduna)) {
580 /* bring back sndnxt to retransmit MPTCP data */
581 mp_tp->mpt_sndnxt = mp_tp->mpt_dsn_at_csum_fail;
582 mp_tp->mpt_flags |= MPTCPF_POST_FALLBACK_SYNC;
583 tp->t_inpcb->inp_socket->so_flags1 |=
584 SOF1_POST_FALLBACK_SYNC;
585 }
586 }
587 if ((full_dack == mp_tp->mpt_sndmax) &&
588 (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_1)) {
589 mptcp_close_fsm(mp_tp, MPCE_RECV_DATA_ACK);
590 tp->t_mpflags &= ~TMPF_SEND_DFIN;
591 }
592 }
593
594 /* If you change this function, match up mptcp_update_rcv_state_f */
595 void
596 mptcp_update_dss_rcv_state(struct mptcp_dsn_opt *dss_info, struct tcpcb *tp,
597 uint16_t csum)
598 {
599 struct mptcb *mp_tp = tptomptp(tp);
600 u_int64_t full_dsn = 0;
601
602 NTOHL(dss_info->mdss_dsn);
603 NTOHL(dss_info->mdss_subflow_seqn);
604 NTOHS(dss_info->mdss_data_len);
605
606 /* XXX for autosndbuf grow sb here */
607 MPT_LOCK(mp_tp);
608 MPTCP_EXTEND_DSN(mp_tp->mpt_rcvnxt, dss_info->mdss_dsn, full_dsn);
609 MPT_UNLOCK(mp_tp);
610 mptcp_update_rcv_state_meat(mp_tp, tp,
611 full_dsn, dss_info->mdss_subflow_seqn, dss_info->mdss_data_len,
612 csum);
613
614 }
615
616 void
617 mptcp_update_rcv_state_meat(struct mptcb *mp_tp, struct tcpcb *tp,
618 u_int64_t full_dsn, u_int32_t seqn, u_int16_t mdss_data_len,
619 uint16_t csum)
620 {
621 if (mdss_data_len == 0) {
622 mptcplog((LOG_INFO, "%s: Received infinite mapping.",
623 __func__));
624 if ((mp_tp->mpt_flags & MPTCPF_CHECKSUM) && (csum != 0)) {
625 mptcplog((LOG_ERR, "%s: Bad checksum value %x \n",
626 __func__, csum));
627 }
628 mptcp_notify_mpfail(tp->t_inpcb->inp_socket);
629 return;
630 }
631 MPT_LOCK(mp_tp);
632 if (mptcp_dbg >= MP_VERBOSE_DEBUG_1)
633 printf("%s: seqn = %x len = %x full = %llx rcvnxt = %llu \n",
634 __func__, seqn, mdss_data_len, full_dsn,
635 mp_tp->mpt_rcvnxt);
636
637 /* Process a Data FIN packet , handled in mptcp_do_fin_opt */
638 if ((seqn == 0) && (mdss_data_len == 1)) {
639 mptcplog((LOG_INFO, "%s: Data FIN DSS opt state = %d \n",
640 __func__, mp_tp->mpt_state));
641 MPT_UNLOCK(mp_tp);
642 return;
643 }
644 MPT_UNLOCK(mp_tp);
645 mptcp_notify_mpready(tp->t_inpcb->inp_socket);
646 tp->t_rcv_map.mpt_dsn = full_dsn;
647 tp->t_rcv_map.mpt_sseq = seqn;
648 tp->t_rcv_map.mpt_len = mdss_data_len;
649 tp->t_rcv_map.mpt_csum = csum;
650 tp->t_mpflags |= TMPF_EMBED_DSN;
651 }
652
653
654 void
655 mptcp_update_rcv_state_f(struct mptcp_dss_ack_opt *dss_info, struct tcpcb *tp,
656 uint16_t csum)
657 {
658 u_int64_t full_dsn = 0;
659 struct mptcb *mp_tp = tptomptp(tp);
660
661 NTOHL(dss_info->mdss_dsn);
662 NTOHL(dss_info->mdss_subflow_seqn);
663 NTOHS(dss_info->mdss_data_len);
664 MPT_LOCK(mp_tp);
665 MPTCP_EXTEND_DSN(mp_tp->mpt_rcvnxt, dss_info->mdss_dsn, full_dsn);
666 MPT_UNLOCK(mp_tp);
667 mptcp_update_rcv_state_meat(mp_tp, tp,
668 full_dsn,
669 dss_info->mdss_subflow_seqn,
670 dss_info->mdss_data_len,
671 csum);
672 }
673
674 void
675 mptcp_update_rcv_state_g(struct mptcp_dss64_ack32_opt *dss_info,
676 struct tcpcb *tp, uint16_t csum)
677 {
678 u_int64_t dsn = mptcp_ntoh64(dss_info->mdss_dsn);
679 struct mptcb *mp_tp = tptomptp(tp);
680
681 NTOHL(dss_info->mdss_subflow_seqn);
682 NTOHS(dss_info->mdss_data_len);
683 mptcp_update_rcv_state_meat(mp_tp, tp,
684 dsn,
685 dss_info->mdss_subflow_seqn,
686 dss_info->mdss_data_len,
687 csum);
688 }
689
690 /*
691 * MPTCP Checksum support
692 * The checksum is calculated whenever the MPTCP DSS option is included
693 * in the TCP packet. The checksum includes the sum of the MPTCP psuedo
694 * header and the actual data indicated by the length specified in the
695 * DSS option.
696 */
697
698 uint16_t
699 mptcp_input_csum(struct tcpcb *tp, struct mbuf *m, int off)
700 {
701 struct mptcb *mp_tp = tptomptp(tp);
702 uint32_t sum = 0;
703 uint64_t dsn;
704 uint32_t sseq;
705 uint16_t len;
706 uint16_t csum;
707
708 if (mp_tp == NULL)
709 return (0);
710
711 if (!(mp_tp->mpt_flags & MPTCPF_CHECKSUM))
712 return (0);
713
714 if (!(tp->t_mpflags & TMPF_EMBED_DSN))
715 return (0);
716
717 if (tp->t_mpflags & TMPF_TCP_FALLBACK)
718 return (0);
719
720 /*
721 * The remote side may send a packet with fewer bytes than the
722 * claimed DSS checksum length.
723 */
724 if ((int)m_length2(m, NULL) < (off + tp->t_rcv_map.mpt_len))
725 return (0xffff);
726
727 if (tp->t_rcv_map.mpt_len != 0)
728 sum = m_sum16(m, off, tp->t_rcv_map.mpt_len);
729
730 dsn = mptcp_hton64(tp->t_rcv_map.mpt_dsn);
731 sseq = htonl(tp->t_rcv_map.mpt_sseq);
732 len = htons(tp->t_rcv_map.mpt_len);
733 csum = tp->t_rcv_map.mpt_csum;
734 sum += in_pseudo64(dsn, sseq, (len + csum));
735 ADDCARRY(sum);
736 DTRACE_MPTCP3(checksum__result, struct tcpcb *, tp, struct mbuf *, m,
737 uint32_t, sum);
738 mptcplog((LOG_INFO, "%s: sum = %x \n", __func__, sum));
739 return (~sum & 0xffff);
740 }
741
742 void
743 mptcp_output_csum(struct tcpcb *tp, struct mbuf *m, int32_t len,
744 unsigned hdrlen, u_int64_t dss_val, u_int32_t *sseqp)
745 {
746 struct mptcb *mp_tp = tptomptp(tp);
747 u_int32_t sum = 0;
748 uint32_t sseq;
749 uint16_t dss_len;
750 uint16_t csum = 0;
751 uint16_t *csump = NULL;
752
753 if (mp_tp == NULL)
754 return;
755
756 if (!(mp_tp->mpt_flags & MPTCPF_CHECKSUM))
757 return;
758
759 if (sseqp == NULL)
760 return;
761
762 if (len)
763 sum = m_sum16(m, hdrlen, len);
764
765 dss_val = mptcp_hton64(dss_val);
766 sseq = *sseqp;
767 dss_len = *(uint16_t *)(void *)((u_char*)sseqp + sizeof (u_int32_t));
768 sum += in_pseudo64(dss_val, sseq, (dss_len + csum));
769
770 ADDCARRY(sum);
771 sum = ~sum & 0xffff;
772 csump = (uint16_t *)(void *)((u_char*)sseqp + sizeof (u_int32_t) +
773 sizeof (uint16_t));
774 DTRACE_MPTCP3(checksum__result, struct tcpcb *, tp, struct mbuf *, m,
775 uint32_t, sum);
776 *csump = sum;
777 mptcplog3((LOG_INFO, "%s: sum = %x \n", __func__, sum));
778 }