2 * Copyright (c) 2012-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <netinet/in_systm.h>
31 #include <sys/socket.h>
32 #include <sys/socketvar.h>
33 #include <sys/syslog.h>
34 #include <net/route.h>
35 #include <netinet/in.h>
38 #include <netinet/ip.h>
39 #include <netinet/ip_var.h>
40 #include <netinet/in_var.h>
41 #include <netinet/tcp.h>
42 #include <netinet/tcp_cache.h>
43 #include <netinet/tcp_seq.h>
44 #include <netinet/tcpip.h>
45 #include <netinet/tcp_fsm.h>
46 #include <netinet/mptcp_var.h>
47 #include <netinet/mptcp.h>
48 #include <netinet/mptcp_opt.h>
49 #include <netinet/mptcp_seq.h>
51 #include <libkern/crypto/sha1.h>
52 #include <netinet/mptcp_timer.h>
56 static int mptcp_validate_join_hmac(struct tcpcb
*, u_char
*, int);
57 static int mptcp_snd_mpprio(struct tcpcb
*tp
, u_char
*cp
, int optlen
);
58 static void mptcp_send_remaddr_opt(struct tcpcb
*, struct mptcp_remaddr_opt
*);
61 * MPTCP Options Output Processing
65 mptcp_setup_first_subflow_syn_opts(struct socket
*so
, u_char
*opt
, unsigned optlen
)
67 struct mptcp_mpcapable_opt_common mptcp_opt
;
68 struct tcpcb
*tp
= sototcpcb(so
);
69 struct mptcb
*mp_tp
= tptomptp(tp
);
72 ret
= tcp_heuristic_do_mptcp(tp
);
74 os_log_info(mptcp_log_handle
, "%s - %lx: Not doing MPTCP due to heuristics",
75 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp
->mpt_mpte
));
76 mp_tp
->mpt_flags
|= MPTCPF_FALLBACK_HEURISTIC
;
81 * Avoid retransmitting the MP_CAPABLE option.
84 tp
->t_rxtshift
> mptcp_mpcap_retries
&&
85 !(tptomptp(tp
)->mpt_mpte
->mpte_flags
& MPTE_FORCE_ENABLE
)) {
86 if (!(mp_tp
->mpt_flags
& (MPTCPF_FALLBACK_HEURISTIC
| MPTCPF_HEURISTIC_TRAC
))) {
87 mp_tp
->mpt_flags
|= MPTCPF_HEURISTIC_TRAC
;
88 tcp_heuristic_mptcp_loss(tp
);
93 bzero(&mptcp_opt
, sizeof(struct mptcp_mpcapable_opt_common
));
95 mptcp_opt
.mmco_kind
= TCPOPT_MULTIPATH
;
97 sizeof(struct mptcp_mpcapable_opt_common
) +
99 mptcp_opt
.mmco_subtype
= MPO_CAPABLE
;
100 mptcp_opt
.mmco_version
= mp_tp
->mpt_version
;
101 mptcp_opt
.mmco_flags
|= MPCAP_PROPOSAL_SBIT
;
102 if (mp_tp
->mpt_flags
& MPTCPF_CHECKSUM
) {
103 mptcp_opt
.mmco_flags
|= MPCAP_CHECKSUM_CBIT
;
105 memcpy(opt
+ optlen
, &mptcp_opt
, sizeof(struct mptcp_mpcapable_opt_common
));
106 optlen
+= sizeof(struct mptcp_mpcapable_opt_common
);
107 memcpy(opt
+ optlen
, &mp_tp
->mpt_localkey
, sizeof(mptcp_key_t
));
108 optlen
+= sizeof(mptcp_key_t
);
114 mptcp_setup_join_subflow_syn_opts(struct socket
*so
, u_char
*opt
, unsigned optlen
)
116 struct mptcp_mpjoin_opt_req mpjoin_req
;
117 struct inpcb
*inp
= sotoinpcb(so
);
118 struct tcpcb
*tp
= NULL
;
132 bzero(&mpjoin_req
, sizeof(mpjoin_req
));
133 mpjoin_req
.mmjo_kind
= TCPOPT_MULTIPATH
;
134 mpjoin_req
.mmjo_len
= sizeof(mpjoin_req
);
135 mpjoin_req
.mmjo_subtype_bkp
= MPO_JOIN
<< 4;
137 if (tp
->t_mpflags
& TMPF_BACKUP_PATH
) {
138 mpjoin_req
.mmjo_subtype_bkp
|= MPTCP_BACKUP
;
139 } else if (inp
->inp_boundifp
&& IFNET_IS_CELLULAR(inp
->inp_boundifp
) &&
140 mpts
->mpts_mpte
->mpte_svctype
< MPTCP_SVCTYPE_AGGREGATE
) {
141 mpjoin_req
.mmjo_subtype_bkp
|= MPTCP_BACKUP
;
142 tp
->t_mpflags
|= TMPF_BACKUP_PATH
;
144 mpts
->mpts_flags
|= MPTSF_PREFERRED
;
147 mpjoin_req
.mmjo_addr_id
= tp
->t_local_aid
;
148 mpjoin_req
.mmjo_peer_token
= tptomptp(tp
)->mpt_remotetoken
;
149 if (mpjoin_req
.mmjo_peer_token
== 0) {
150 mptcplog((LOG_DEBUG
, "%s: peer token 0", __func__
),
151 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
153 mptcp_get_rands(tp
->t_local_aid
, tptomptp(tp
),
154 &mpjoin_req
.mmjo_rand
, NULL
);
155 memcpy(opt
+ optlen
, &mpjoin_req
, mpjoin_req
.mmjo_len
);
156 optlen
+= mpjoin_req
.mmjo_len
;
162 mptcp_setup_join_ack_opts(struct tcpcb
*tp
, u_char
*opt
, unsigned optlen
)
165 struct mptcp_mpjoin_opt_rsp2 join_rsp2
;
167 if ((MAX_TCPOPTLEN
- optlen
) < sizeof(struct mptcp_mpjoin_opt_rsp2
)) {
168 printf("%s: no space left %d \n", __func__
, optlen
);
172 bzero(&join_rsp2
, sizeof(struct mptcp_mpjoin_opt_rsp2
));
173 join_rsp2
.mmjo_kind
= TCPOPT_MULTIPATH
;
174 join_rsp2
.mmjo_len
= sizeof(struct mptcp_mpjoin_opt_rsp2
);
175 join_rsp2
.mmjo_subtype
= MPO_JOIN
;
176 mptcp_get_hmac(tp
->t_local_aid
, tptomptp(tp
),
177 (u_char
*)&join_rsp2
.mmjo_mac
);
178 memcpy(opt
+ optlen
, &join_rsp2
, join_rsp2
.mmjo_len
);
179 new_optlen
= optlen
+ join_rsp2
.mmjo_len
;
184 mptcp_setup_syn_opts(struct socket
*so
, u_char
*opt
, unsigned optlen
)
188 if (!(so
->so_flags
& SOF_MP_SEC_SUBFLOW
)) {
189 new_optlen
= mptcp_setup_first_subflow_syn_opts(so
, opt
, optlen
);
191 new_optlen
= mptcp_setup_join_subflow_syn_opts(so
, opt
, optlen
);
198 mptcp_send_mpfail(struct tcpcb
*tp
, u_char
*opt
, unsigned int optlen
)
200 #pragma unused(tp, opt, optlen)
202 struct mptcb
*mp_tp
= NULL
;
203 struct mptcp_mpfail_opt fail_opt
;
205 int len
= sizeof(struct mptcp_mpfail_opt
);
207 mp_tp
= tptomptp(tp
);
209 tp
->t_mpflags
&= ~TMPF_SND_MPFAIL
;
213 /* if option space low give up */
214 if ((MAX_TCPOPTLEN
- optlen
) < sizeof(struct mptcp_mpfail_opt
)) {
215 tp
->t_mpflags
&= ~TMPF_SND_MPFAIL
;
219 dsn
= mp_tp
->mpt_rcvnxt
;
221 bzero(&fail_opt
, sizeof(fail_opt
));
222 fail_opt
.mfail_kind
= TCPOPT_MULTIPATH
;
223 fail_opt
.mfail_len
= len
;
224 fail_opt
.mfail_subtype
= MPO_FAIL
;
225 fail_opt
.mfail_dsn
= mptcp_hton64(dsn
);
226 memcpy(opt
+ optlen
, &fail_opt
, len
);
228 tp
->t_mpflags
&= ~TMPF_SND_MPFAIL
;
229 mptcplog((LOG_DEBUG
, "%s: %d \n", __func__
,
230 tp
->t_local_aid
), (MPTCP_SOCKET_DBG
| MPTCP_SENDER_DBG
),
236 mptcp_send_infinite_mapping(struct tcpcb
*tp
, u_char
*opt
, unsigned int optlen
)
238 struct mptcp_dsn_opt infin_opt
;
239 struct mptcb
*mp_tp
= NULL
;
240 size_t len
= sizeof(struct mptcp_dsn_opt
);
241 struct socket
*so
= tp
->t_inpcb
->inp_socket
;
248 mp_tp
= tptomptp(tp
);
253 if (mp_tp
->mpt_flags
& MPTCPF_CHECKSUM
) {
258 if ((MAX_TCPOPTLEN
- optlen
) < (len
+ csum_len
)) {
262 bzero(&infin_opt
, sizeof(infin_opt
));
263 infin_opt
.mdss_copt
.mdss_kind
= TCPOPT_MULTIPATH
;
264 infin_opt
.mdss_copt
.mdss_len
= len
+ csum_len
;
265 infin_opt
.mdss_copt
.mdss_subtype
= MPO_DSS
;
266 infin_opt
.mdss_copt
.mdss_flags
|= MDSS_M
;
267 if (mp_tp
->mpt_flags
& MPTCPF_RECVD_MPFAIL
) {
268 infin_opt
.mdss_dsn
= (u_int32_t
)
269 MPTCP_DATASEQ_LOW32(mp_tp
->mpt_dsn_at_csum_fail
);
270 infin_opt
.mdss_subflow_seqn
= mp_tp
->mpt_ssn_at_csum_fail
;
273 * If MPTCP fallback happens, but TFO succeeds, the data on the
274 * SYN does not belong to the MPTCP data sequence space.
276 if ((tp
->t_tfo_stats
& TFO_S_SYN_DATA_ACKED
) &&
277 ((mp_tp
->mpt_local_idsn
+ 1) == mp_tp
->mpt_snduna
)) {
278 infin_opt
.mdss_subflow_seqn
= 1;
280 mptcplog((LOG_DEBUG
, "%s: idsn %llu snduna %llu \n",
281 __func__
, mp_tp
->mpt_local_idsn
,
283 (MPTCP_SOCKET_DBG
| MPTCP_SENDER_DBG
),
286 infin_opt
.mdss_subflow_seqn
= tp
->snd_una
- tp
->t_mpsub
->mpts_iss
;
288 infin_opt
.mdss_dsn
= (u_int32_t
)
289 MPTCP_DATASEQ_LOW32(mp_tp
->mpt_snduna
);
292 if ((infin_opt
.mdss_dsn
== 0) || (infin_opt
.mdss_subflow_seqn
== 0)) {
295 infin_opt
.mdss_dsn
= htonl(infin_opt
.mdss_dsn
);
296 infin_opt
.mdss_subflow_seqn
= htonl(infin_opt
.mdss_subflow_seqn
);
297 infin_opt
.mdss_data_len
= 0;
299 memcpy(opt
+ optlen
, &infin_opt
, len
);
302 /* The checksum field is set to 0 for infinite mapping */
304 memcpy(opt
+ optlen
, &csum
, csum_len
);
308 mptcplog((LOG_DEBUG
, "%s: dsn = %x, seq = %x len = %x\n", __func__
,
309 ntohl(infin_opt
.mdss_dsn
),
310 ntohl(infin_opt
.mdss_subflow_seqn
),
311 ntohs(infin_opt
.mdss_data_len
)),
312 (MPTCP_SOCKET_DBG
| MPTCP_SENDER_DBG
),
315 tp
->t_mpflags
|= TMPF_INFIN_SENT
;
316 tcpstat
.tcps_estab_fallback
++;
322 mptcp_ok_to_fin(struct tcpcb
*tp
, u_int64_t dsn
, u_int32_t datalen
)
324 struct mptcb
*mp_tp
= tptomptp(tp
);
326 dsn
= (mp_tp
->mpt_sndmax
& MPTCP_DATASEQ_LOW32_MASK
) | dsn
;
327 if ((dsn
+ datalen
) == mp_tp
->mpt_sndmax
) {
335 mptcp_setup_opts(struct tcpcb
*tp
, int32_t off
, u_char
*opt
,
336 unsigned int optlen
, int flags
, int len
,
337 boolean_t
*p_mptcp_acknow
)
339 struct inpcb
*inp
= (struct inpcb
*)tp
->t_inpcb
;
340 struct socket
*so
= inp
->inp_socket
;
341 struct mptcb
*mp_tp
= tptomptp(tp
);
342 boolean_t do_csum
= FALSE
;
343 boolean_t send_64bit_dsn
= FALSE
;
344 boolean_t send_64bit_ack
= FALSE
;
345 u_int32_t old_mpt_flags
= tp
->t_mpflags
& TMPF_MPTCP_SIGNALS
;
347 if (mptcp_enable
== 0 || mp_tp
== NULL
|| tp
->t_state
== TCPS_CLOSED
) {
352 socket_lock_assert_owned(mptetoso(mp_tp
->mpt_mpte
));
354 if (mp_tp
->mpt_flags
& MPTCPF_CHECKSUM
) {
358 /* tcp_output handles the SYN path separately */
359 if (flags
& TH_SYN
) {
363 if ((MAX_TCPOPTLEN
- optlen
) <
364 sizeof(struct mptcp_mpcapable_opt_common
)) {
365 mptcplog((LOG_ERR
, "%s: no space left %d flags %x tp->t_mpflags %x len %d\n",
366 __func__
, optlen
, flags
, tp
->t_mpflags
, len
),
367 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
371 if (tp
->t_mpflags
& TMPF_TCP_FALLBACK
) {
372 if (tp
->t_mpflags
& TMPF_SND_MPFAIL
) {
373 optlen
= mptcp_send_mpfail(tp
, opt
, optlen
);
374 } else if (!(tp
->t_mpflags
& TMPF_INFIN_SENT
)) {
375 optlen
= mptcp_send_infinite_mapping(tp
, opt
, optlen
);
380 if (tp
->t_mpflags
& TMPF_SND_KEYS
) {
381 struct mptcp_mpcapable_opt_rsp1 mptcp_opt
;
382 if ((MAX_TCPOPTLEN
- optlen
) <
383 sizeof(struct mptcp_mpcapable_opt_rsp1
)) {
386 bzero(&mptcp_opt
, sizeof(struct mptcp_mpcapable_opt_rsp1
));
387 mptcp_opt
.mmc_common
.mmco_kind
= TCPOPT_MULTIPATH
;
388 mptcp_opt
.mmc_common
.mmco_len
=
389 sizeof(struct mptcp_mpcapable_opt_rsp1
);
390 mptcp_opt
.mmc_common
.mmco_subtype
= MPO_CAPABLE
;
391 mptcp_opt
.mmc_common
.mmco_version
= mp_tp
->mpt_version
;
392 /* HMAC-SHA1 is the proposal */
393 mptcp_opt
.mmc_common
.mmco_flags
|= MPCAP_PROPOSAL_SBIT
;
394 if (mp_tp
->mpt_flags
& MPTCPF_CHECKSUM
) {
395 mptcp_opt
.mmc_common
.mmco_flags
|= MPCAP_CHECKSUM_CBIT
;
397 mptcp_opt
.mmc_localkey
= mp_tp
->mpt_localkey
;
398 mptcp_opt
.mmc_remotekey
= mp_tp
->mpt_remotekey
;
399 memcpy(opt
+ optlen
, &mptcp_opt
, mptcp_opt
.mmc_common
.mmco_len
);
400 optlen
+= mptcp_opt
.mmc_common
.mmco_len
;
401 tp
->t_mpflags
&= ~TMPF_SND_KEYS
;
404 tp
->t_mpuna
= tp
->snd_una
;
406 /* its a retransmission of the MP_CAPABLE ACK */
411 if (tp
->t_mpflags
& TMPF_SND_JACK
) {
412 /* Do the ACK part */
413 optlen
= mptcp_setup_join_ack_opts(tp
, opt
, optlen
);
415 tp
->t_mpuna
= tp
->snd_una
;
417 /* Start a timer to retransmit the ACK */
418 tp
->t_timer
[TCPT_JACK_RXMT
] =
419 OFFSET_FROM_START(tp
, tcp_jack_rxmt
);
421 tp
->t_mpflags
&= ~TMPF_SND_JACK
;
425 if (!(tp
->t_mpflags
& TMPF_MPTCP_TRUE
)) {
429 * From here on, all options are sent only if MPTCP_TRUE
430 * or when data is sent early on as in Fast Join
433 if ((tp
->t_mpflags
& TMPF_MPTCP_TRUE
) &&
434 (tp
->t_mpflags
& TMPF_SND_REM_ADDR
)) {
435 int rem_opt_len
= sizeof(struct mptcp_remaddr_opt
);
436 if ((optlen
+ rem_opt_len
) <= MAX_TCPOPTLEN
) {
437 mptcp_send_remaddr_opt(tp
,
438 (struct mptcp_remaddr_opt
*)(opt
+ optlen
));
439 optlen
+= rem_opt_len
;
441 tp
->t_mpflags
&= ~TMPF_SND_REM_ADDR
;
445 if (tp
->t_mpflags
& TMPF_SND_MPPRIO
) {
446 optlen
= mptcp_snd_mpprio(tp
, opt
, optlen
);
449 if (mp_tp
->mpt_flags
& MPTCPF_SND_64BITDSN
) {
450 send_64bit_dsn
= TRUE
;
452 if (mp_tp
->mpt_flags
& MPTCPF_SND_64BITACK
) {
453 send_64bit_ack
= TRUE
;
456 #define CHECK_OPTLEN { \
457 if ((MAX_TCPOPTLEN - optlen) < dssoptlen) { \
458 mptcplog((LOG_ERR, "%s: dssoptlen %d optlen %d \n", __func__, \
459 dssoptlen, optlen), \
460 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); \
465 #define DO_FIN(dsn_opt) { \
467 sndfin = mptcp_ok_to_fin(tp, dsn_opt.mdss_dsn, len); \
469 dsn_opt.mdss_copt.mdss_flags |= MDSS_F; \
470 dsn_opt.mdss_data_len += 1; \
472 dss_csum = in_addword(dss_csum, 1); \
476 #define CHECK_DATALEN { \
477 /* MPTCP socket does not support IP options */ \
478 if ((len + optlen + dssoptlen) > tp->t_maxopd) { \
479 mptcplog((LOG_ERR, "%s: nosp %d len %d opt %d %d %d\n", \
480 __func__, len, dssoptlen, optlen, \
481 tp->t_maxseg, tp->t_maxopd), \
482 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); \
483 /* remove option length from payload len */ \
484 len = tp->t_maxopd - optlen - dssoptlen; \
488 if ((tp
->t_mpflags
& TMPF_SEND_DSN
) &&
491 * If there was the need to send 64-bit Data ACK along
492 * with 64-bit DSN, then 26 or 28 bytes would be used.
493 * With timestamps and NOOP padding that will cause
494 * overflow. Hence, in the rare event that both 64-bit
495 * DSN and 64-bit ACK have to be sent, delay the send of
496 * 64-bit ACK until our 64-bit DSN is acked with a 64-bit ack.
497 * XXX If this delay causes issue, remove the 2-byte padding.
499 struct mptcp_dss64_ack32_opt dsn_ack_opt
;
500 unsigned int dssoptlen
= sizeof(dsn_ack_opt
);
509 bzero(&dsn_ack_opt
, sizeof(dsn_ack_opt
));
510 dsn_ack_opt
.mdss_copt
.mdss_kind
= TCPOPT_MULTIPATH
;
511 dsn_ack_opt
.mdss_copt
.mdss_subtype
= MPO_DSS
;
512 dsn_ack_opt
.mdss_copt
.mdss_len
= dssoptlen
;
513 dsn_ack_opt
.mdss_copt
.mdss_flags
|=
514 MDSS_M
| MDSS_m
| MDSS_A
;
518 mptcp_output_getm_dsnmap64(so
, off
,
519 &dsn_ack_opt
.mdss_dsn
,
520 &dsn_ack_opt
.mdss_subflow_seqn
,
521 &dsn_ack_opt
.mdss_data_len
,
524 if ((dsn_ack_opt
.mdss_data_len
== 0) ||
525 (dsn_ack_opt
.mdss_dsn
== 0)) {
529 if (tp
->t_mpflags
& TMPF_SEND_DFIN
) {
533 dsn_ack_opt
.mdss_ack
=
534 htonl(MPTCP_DATAACK_LOW32(mp_tp
->mpt_rcvnxt
));
536 dsn_ack_opt
.mdss_dsn
= mptcp_hton64(dsn_ack_opt
.mdss_dsn
);
537 dsn_ack_opt
.mdss_subflow_seqn
= htonl(
538 dsn_ack_opt
.mdss_subflow_seqn
);
539 dsn_ack_opt
.mdss_data_len
= htons(
540 dsn_ack_opt
.mdss_data_len
);
542 memcpy(opt
+ optlen
, &dsn_ack_opt
, sizeof(dsn_ack_opt
));
544 *((uint16_t *)(void *)(opt
+ optlen
+ sizeof(dsn_ack_opt
))) = dss_csum
;
548 mptcplog((LOG_DEBUG
, "%s: long DSS = %llx ACK = %llx \n", __func__
,
549 mptcp_ntoh64(dsn_ack_opt
.mdss_dsn
),
550 mptcp_ntoh64(dsn_ack_opt
.mdss_ack
)),
551 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
553 tp
->t_mpflags
&= ~TMPF_MPTCP_ACKNOW
;
557 if ((tp
->t_mpflags
& TMPF_SEND_DSN
) &&
559 !(tp
->t_mpflags
& TMPF_MPTCP_ACKNOW
)) {
560 struct mptcp_dsn_opt dsn_opt
;
561 unsigned int dssoptlen
= sizeof(struct mptcp_dsn_opt
);
570 bzero(&dsn_opt
, sizeof(dsn_opt
));
571 dsn_opt
.mdss_copt
.mdss_kind
= TCPOPT_MULTIPATH
;
572 dsn_opt
.mdss_copt
.mdss_subtype
= MPO_DSS
;
573 dsn_opt
.mdss_copt
.mdss_len
= dssoptlen
;
574 dsn_opt
.mdss_copt
.mdss_flags
|= MDSS_M
;
578 mptcp_output_getm_dsnmap32(so
, off
, &dsn_opt
.mdss_dsn
,
579 &dsn_opt
.mdss_subflow_seqn
,
580 &dsn_opt
.mdss_data_len
,
583 if ((dsn_opt
.mdss_data_len
== 0) ||
584 (dsn_opt
.mdss_dsn
== 0)) {
588 if (tp
->t_mpflags
& TMPF_SEND_DFIN
) {
592 dsn_opt
.mdss_dsn
= htonl(dsn_opt
.mdss_dsn
);
593 dsn_opt
.mdss_subflow_seqn
= htonl(dsn_opt
.mdss_subflow_seqn
);
594 dsn_opt
.mdss_data_len
= htons(dsn_opt
.mdss_data_len
);
595 memcpy(opt
+ optlen
, &dsn_opt
, sizeof(dsn_opt
));
597 *((uint16_t *)(void *)(opt
+ optlen
+ sizeof(dsn_opt
))) = dss_csum
;
601 tp
->t_mpflags
&= ~TMPF_MPTCP_ACKNOW
;
605 /* 32-bit Data ACK option */
606 if ((tp
->t_mpflags
& TMPF_MPTCP_ACKNOW
) &&
608 !(tp
->t_mpflags
& TMPF_SEND_DSN
) &&
609 !(tp
->t_mpflags
& TMPF_SEND_DFIN
)) {
610 struct mptcp_data_ack_opt dack_opt
;
611 unsigned int dssoptlen
= 0;
613 dssoptlen
= sizeof(dack_opt
);
617 bzero(&dack_opt
, dssoptlen
);
618 dack_opt
.mdss_copt
.mdss_kind
= TCPOPT_MULTIPATH
;
619 dack_opt
.mdss_copt
.mdss_len
= dssoptlen
;
620 dack_opt
.mdss_copt
.mdss_subtype
= MPO_DSS
;
621 dack_opt
.mdss_copt
.mdss_flags
|= MDSS_A
;
623 htonl(MPTCP_DATAACK_LOW32(mp_tp
->mpt_rcvnxt
));
624 memcpy(opt
+ optlen
, &dack_opt
, dssoptlen
);
626 VERIFY(optlen
<= MAX_TCPOPTLEN
);
627 tp
->t_mpflags
&= ~TMPF_MPTCP_ACKNOW
;
631 /* 64-bit Data ACK option */
632 if ((tp
->t_mpflags
& TMPF_MPTCP_ACKNOW
) &&
634 !(tp
->t_mpflags
& TMPF_SEND_DSN
) &&
635 !(tp
->t_mpflags
& TMPF_SEND_DFIN
)) {
636 struct mptcp_data_ack64_opt dack_opt
;
637 unsigned int dssoptlen
= 0;
639 dssoptlen
= sizeof(dack_opt
);
643 bzero(&dack_opt
, dssoptlen
);
644 dack_opt
.mdss_copt
.mdss_kind
= TCPOPT_MULTIPATH
;
645 dack_opt
.mdss_copt
.mdss_len
= dssoptlen
;
646 dack_opt
.mdss_copt
.mdss_subtype
= MPO_DSS
;
647 dack_opt
.mdss_copt
.mdss_flags
|= (MDSS_A
| MDSS_a
);
648 dack_opt
.mdss_ack
= mptcp_hton64(mp_tp
->mpt_rcvnxt
);
650 * The other end should retransmit 64-bit DSN until it
651 * receives a 64-bit ACK.
653 mp_tp
->mpt_flags
&= ~MPTCPF_SND_64BITACK
;
654 memcpy(opt
+ optlen
, &dack_opt
, dssoptlen
);
656 VERIFY(optlen
<= MAX_TCPOPTLEN
);
657 tp
->t_mpflags
&= ~TMPF_MPTCP_ACKNOW
;
661 /* 32-bit DSS+Data ACK option */
662 if ((tp
->t_mpflags
& TMPF_SEND_DSN
) &&
665 (tp
->t_mpflags
& TMPF_MPTCP_ACKNOW
)) {
666 struct mptcp_dss_ack_opt dss_ack_opt
;
667 unsigned int dssoptlen
= sizeof(dss_ack_opt
);
676 bzero(&dss_ack_opt
, sizeof(dss_ack_opt
));
677 dss_ack_opt
.mdss_copt
.mdss_kind
= TCPOPT_MULTIPATH
;
678 dss_ack_opt
.mdss_copt
.mdss_len
= dssoptlen
;
679 dss_ack_opt
.mdss_copt
.mdss_subtype
= MPO_DSS
;
680 dss_ack_opt
.mdss_copt
.mdss_flags
|= MDSS_A
| MDSS_M
;
681 dss_ack_opt
.mdss_ack
=
682 htonl(MPTCP_DATAACK_LOW32(mp_tp
->mpt_rcvnxt
));
686 mptcp_output_getm_dsnmap32(so
, off
, &dss_ack_opt
.mdss_dsn
,
687 &dss_ack_opt
.mdss_subflow_seqn
,
688 &dss_ack_opt
.mdss_data_len
,
691 if ((dss_ack_opt
.mdss_data_len
== 0) ||
692 (dss_ack_opt
.mdss_dsn
== 0)) {
696 if (tp
->t_mpflags
& TMPF_SEND_DFIN
) {
700 dss_ack_opt
.mdss_dsn
= htonl(dss_ack_opt
.mdss_dsn
);
701 dss_ack_opt
.mdss_subflow_seqn
=
702 htonl(dss_ack_opt
.mdss_subflow_seqn
);
703 dss_ack_opt
.mdss_data_len
= htons(dss_ack_opt
.mdss_data_len
);
704 memcpy(opt
+ optlen
, &dss_ack_opt
, sizeof(dss_ack_opt
));
706 *((uint16_t *)(void *)(opt
+ optlen
+ sizeof(dss_ack_opt
))) = dss_csum
;
711 if (optlen
> MAX_TCPOPTLEN
) {
712 panic("optlen too large");
714 tp
->t_mpflags
&= ~TMPF_MPTCP_ACKNOW
;
718 /* 32-bit DSS + 64-bit DACK option */
719 if ((tp
->t_mpflags
& TMPF_SEND_DSN
) &&
722 (tp
->t_mpflags
& TMPF_MPTCP_ACKNOW
)) {
723 struct mptcp_dss32_ack64_opt dss_ack_opt
;
724 unsigned int dssoptlen
= sizeof(dss_ack_opt
);
733 bzero(&dss_ack_opt
, sizeof(dss_ack_opt
));
734 dss_ack_opt
.mdss_copt
.mdss_kind
= TCPOPT_MULTIPATH
;
735 dss_ack_opt
.mdss_copt
.mdss_len
= dssoptlen
;
736 dss_ack_opt
.mdss_copt
.mdss_subtype
= MPO_DSS
;
737 dss_ack_opt
.mdss_copt
.mdss_flags
|= MDSS_M
| MDSS_A
| MDSS_a
;
738 dss_ack_opt
.mdss_ack
=
739 mptcp_hton64(mp_tp
->mpt_rcvnxt
);
743 mptcp_output_getm_dsnmap32(so
, off
, &dss_ack_opt
.mdss_dsn
,
744 &dss_ack_opt
.mdss_subflow_seqn
,
745 &dss_ack_opt
.mdss_data_len
,
748 if ((dss_ack_opt
.mdss_data_len
== 0) ||
749 (dss_ack_opt
.mdss_dsn
== 0)) {
753 if (tp
->t_mpflags
& TMPF_SEND_DFIN
) {
757 dss_ack_opt
.mdss_dsn
= htonl(dss_ack_opt
.mdss_dsn
);
758 dss_ack_opt
.mdss_subflow_seqn
=
759 htonl(dss_ack_opt
.mdss_subflow_seqn
);
760 dss_ack_opt
.mdss_data_len
= htons(dss_ack_opt
.mdss_data_len
);
761 memcpy(opt
+ optlen
, &dss_ack_opt
, sizeof(dss_ack_opt
));
763 *((uint16_t *)(void *)(opt
+ optlen
+ sizeof(dss_ack_opt
))) = dss_csum
;
768 if (optlen
> MAX_TCPOPTLEN
) {
769 panic("optlen too large");
771 tp
->t_mpflags
&= ~TMPF_MPTCP_ACKNOW
;
775 if (tp
->t_mpflags
& TMPF_SEND_DFIN
) {
776 unsigned int dssoptlen
= sizeof(struct mptcp_dss_ack_opt
);
777 struct mptcp_dss_ack_opt dss_ack_opt
;
781 uint64_t dss_val
= mptcp_hton64(mp_tp
->mpt_sndmax
- 1);
782 uint16_t dlen
= htons(1);
789 sum
= in_pseudo64(dss_val
, sseq
, dlen
);
791 dss_csum
= ~sum
& 0xffff;
796 bzero(&dss_ack_opt
, sizeof(dss_ack_opt
));
799 * Data FIN occupies one sequence space.
800 * Don't send it if it has been Acked.
802 if ((mp_tp
->mpt_sndnxt
+ 1 != mp_tp
->mpt_sndmax
) ||
803 (mp_tp
->mpt_snduna
== mp_tp
->mpt_sndmax
)) {
807 dss_ack_opt
.mdss_copt
.mdss_kind
= TCPOPT_MULTIPATH
;
808 dss_ack_opt
.mdss_copt
.mdss_len
= dssoptlen
;
809 dss_ack_opt
.mdss_copt
.mdss_subtype
= MPO_DSS
;
810 dss_ack_opt
.mdss_copt
.mdss_flags
|= MDSS_A
| MDSS_M
| MDSS_F
;
811 dss_ack_opt
.mdss_ack
=
812 htonl(MPTCP_DATAACK_LOW32(mp_tp
->mpt_rcvnxt
));
813 dss_ack_opt
.mdss_dsn
=
814 htonl(MPTCP_DATASEQ_LOW32(mp_tp
->mpt_sndmax
- 1));
815 dss_ack_opt
.mdss_subflow_seqn
= 0;
816 dss_ack_opt
.mdss_data_len
= 1;
817 dss_ack_opt
.mdss_data_len
= htons(dss_ack_opt
.mdss_data_len
);
818 memcpy(opt
+ optlen
, &dss_ack_opt
, sizeof(dss_ack_opt
));
820 *((uint16_t *)(void *)(opt
+ optlen
+ sizeof(dss_ack_opt
))) = dss_csum
;
827 if (TRUE
== *p_mptcp_acknow
) {
828 VERIFY(old_mpt_flags
!= 0);
829 u_int32_t new_mpt_flags
= tp
->t_mpflags
& TMPF_MPTCP_SIGNALS
;
832 * If none of the above mpflags were acted on by
833 * this routine, reset these flags and set p_mptcp_acknow
836 * XXX The reset value of p_mptcp_acknow can be used
837 * to communicate tcp_output to NOT send a pure ack without any
838 * MPTCP options as it will be treated as a dup ack.
839 * Since the instances of mptcp_setup_opts not acting on
840 * these options are mostly corner cases and sending a dup
841 * ack here would only have an impact if the system
842 * has sent consecutive dup acks before this false one,
843 * we haven't modified the logic in tcp_output to avoid
846 if (old_mpt_flags
== new_mpt_flags
) {
847 tp
->t_mpflags
&= ~TMPF_MPTCP_SIGNALS
;
848 *p_mptcp_acknow
= FALSE
;
849 mptcplog((LOG_DEBUG
, "%s: no action \n", __func__
),
850 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_LOG
);
852 mptcplog((LOG_DEBUG
, "%s: acknow set, old flags %x new flags %x \n",
853 __func__
, old_mpt_flags
, new_mpt_flags
),
854 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_LOG
);
862 * MPTCP Options Input Processing
866 mptcp_sanitize_option(struct tcpcb
*tp
, int mptcp_subtype
)
868 struct mptcb
*mp_tp
= tptomptp(tp
);
871 switch (mptcp_subtype
) {
874 case MPO_JOIN
: /* fall through */
875 case MPO_DSS
: /* fall through */
876 case MPO_FASTCLOSE
: /* fall through */
877 case MPO_FAIL
: /* fall through */
878 case MPO_REMOVE_ADDR
: /* fall through */
879 case MPO_ADD_ADDR
: /* fall through */
880 case MPO_PRIO
: /* fall through */
881 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) {
887 os_log_error(mptcp_log_handle
, "%s - %lx: type = %d \n", __func__
,
888 (unsigned long)VM_KERNEL_ADDRPERM(mp_tp
->mpt_mpte
), mptcp_subtype
);
895 mptcp_valid_mpcapable_common_opt(u_char
*cp
)
897 struct mptcp_mpcapable_opt_common
*rsp
=
898 (struct mptcp_mpcapable_opt_common
*)cp
;
900 /* mmco_kind, mmco_len and mmco_subtype are validated before */
902 if (!(rsp
->mmco_flags
& MPCAP_PROPOSAL_SBIT
)) {
906 if (rsp
->mmco_flags
& (MPCAP_BBIT
| MPCAP_DBIT
|
907 MPCAP_EBIT
| MPCAP_FBIT
| MPCAP_GBIT
)) {
916 mptcp_do_mpcapable_opt(struct tcpcb
*tp
, u_char
*cp
, struct tcphdr
*th
,
919 struct mptcp_mpcapable_opt_rsp
*rsp
= NULL
;
920 struct mptcb
*mp_tp
= tptomptp(tp
);
921 struct mptses
*mpte
= mp_tp
->mpt_mpte
;
923 /* Only valid on SYN/ACK */
924 if ((th
->th_flags
& (TH_SYN
| TH_ACK
)) != (TH_SYN
| TH_ACK
)) {
928 /* Validate the kind, len, flags */
929 if (mptcp_valid_mpcapable_common_opt(cp
) != 1) {
930 tcpstat
.tcps_invalid_mpcap
++;
934 /* handle SYN/ACK retransmission by acknowledging with ACK */
935 if (mp_tp
->mpt_state
>= MPTCPS_ESTABLISHED
) {
939 /* A SYN/ACK contains peer's key and flags */
940 if (optlen
!= sizeof(struct mptcp_mpcapable_opt_rsp
)) {
942 os_log_error(mptcp_log_handle
, "%s - %lx: SYN_ACK optlen = %d, sizeof mp opt = %lu \n",
943 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), optlen
,
944 sizeof(struct mptcp_mpcapable_opt_rsp
));
945 tcpstat
.tcps_invalid_mpcap
++;
950 * If checksum flag is set, enable MPTCP checksum, even if
951 * it was not negotiated on the first SYN.
953 if (((struct mptcp_mpcapable_opt_common
*)cp
)->mmco_flags
&
954 MPCAP_CHECKSUM_CBIT
) {
955 mp_tp
->mpt_flags
|= MPTCPF_CHECKSUM
;
958 if (((struct mptcp_mpcapable_opt_common
*)cp
)->mmco_flags
&
959 MPCAP_UNICAST_IPBIT
) {
960 mpte
->mpte_flags
|= MPTE_UNICAST_IP
;
963 rsp
= (struct mptcp_mpcapable_opt_rsp
*)cp
;
964 mp_tp
->mpt_remotekey
= rsp
->mmc_localkey
;
965 /* For now just downgrade to the peer's version */
966 mp_tp
->mpt_peer_version
= rsp
->mmc_common
.mmco_version
;
967 if (rsp
->mmc_common
.mmco_version
< mp_tp
->mpt_version
) {
968 mp_tp
->mpt_version
= rsp
->mmc_common
.mmco_version
;
969 tcpstat
.tcps_mp_verdowngrade
++;
971 if (mptcp_init_remote_parms(mp_tp
) != 0) {
972 tcpstat
.tcps_invalid_mpcap
++;
975 tcp_heuristic_mptcp_success(tp
);
976 tp
->t_mpflags
|= (TMPF_SND_KEYS
| TMPF_MPTCP_TRUE
);
981 mptcp_do_mpjoin_opt(struct tcpcb
*tp
, u_char
*cp
, struct tcphdr
*th
, int optlen
)
983 #define MPTCP_JOPT_ERROR_PATH(tp) { \
984 tcpstat.tcps_invalid_joins++; \
985 if (tp->t_inpcb->inp_socket != NULL) { \
986 soevent(tp->t_inpcb->inp_socket, \
987 SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST); \
991 struct mptcp_mpjoin_opt_rsp
*join_rsp
=
992 (struct mptcp_mpjoin_opt_rsp
*)cp
;
994 /* Only valid on SYN/ACK */
995 if ((th
->th_flags
& (TH_SYN
| TH_ACK
)) != (TH_SYN
| TH_ACK
)) {
999 if (optlen
!= sizeof(struct mptcp_mpjoin_opt_rsp
)) {
1000 os_log_error(mptcp_log_handle
, "%s - %lx: SYN_ACK: unexpected optlen = %d mp option = %lu\n",
1001 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(tptomptp(tp
)->mpt_mpte
),
1002 optlen
, sizeof(struct mptcp_mpjoin_opt_rsp
));
1003 tp
->t_mpflags
&= ~TMPF_PREESTABLISHED
;
1004 /* send RST and close */
1005 MPTCP_JOPT_ERROR_PATH(tp
);
1009 mptcp_set_raddr_rand(tp
->t_local_aid
, tptomptp(tp
),
1010 join_rsp
->mmjo_addr_id
, join_rsp
->mmjo_rand
);
1011 error
= mptcp_validate_join_hmac(tp
,
1012 (u_char
*)&join_rsp
->mmjo_mac
, SHA1_TRUNCATED
);
1014 os_log_error(mptcp_log_handle
, "%s - %lx: SYN_ACK error = %d \n",
1015 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(tptomptp(tp
)->mpt_mpte
),
1017 tp
->t_mpflags
&= ~TMPF_PREESTABLISHED
;
1018 /* send RST and close */
1019 MPTCP_JOPT_ERROR_PATH(tp
);
1022 tp
->t_mpflags
|= (TMPF_SENT_JOIN
| TMPF_SND_JACK
);
1026 mptcp_validate_join_hmac(struct tcpcb
*tp
, u_char
* hmac
, int mac_len
)
1028 u_char digest
[SHA1_RESULTLEN
] = {0};
1029 struct mptcb
*mp_tp
= tptomptp(tp
);
1030 u_int32_t rem_rand
, loc_rand
;
1032 rem_rand
= loc_rand
= 0;
1034 mptcp_get_rands(tp
->t_local_aid
, mp_tp
, &loc_rand
, &rem_rand
);
1035 if ((rem_rand
== 0) || (loc_rand
== 0)) {
1039 mptcp_hmac_sha1(mp_tp
->mpt_remotekey
, mp_tp
->mpt_localkey
, rem_rand
, loc_rand
,
1042 if (bcmp(digest
, hmac
, mac_len
) == 0) {
1043 return 0; /* matches */
1045 printf("%s: remote key %llx local key %llx remote rand %x "
1046 "local rand %x \n", __func__
, mp_tp
->mpt_remotekey
, mp_tp
->mpt_localkey
,
1047 rem_rand
, loc_rand
);
1053 * Update the mptcb send state variables, but the actual sbdrop occurs
1057 mptcp_data_ack_rcvd(struct mptcb
*mp_tp
, struct tcpcb
*tp
, u_int64_t full_dack
)
1059 uint64_t acked
= full_dack
- mp_tp
->mpt_snduna
;
1062 struct socket
*mp_so
= mptetoso(mp_tp
->mpt_mpte
);
1064 if (acked
> mp_so
->so_snd
.sb_cc
) {
1065 if (acked
> mp_so
->so_snd
.sb_cc
+ 1 ||
1066 mp_tp
->mpt_state
< MPTCPS_FIN_WAIT_1
) {
1067 os_log_error(mptcp_log_handle
, "%s - %lx: acked %u, sb_cc %u full %u suna %u state %u\n",
1068 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp
->mpt_mpte
),
1069 (uint32_t)acked
, mp_so
->so_snd
.sb_cc
,
1070 (uint32_t)full_dack
, (uint32_t)mp_tp
->mpt_snduna
,
1074 sbdrop(&mp_so
->so_snd
, (int)mp_so
->so_snd
.sb_cc
);
1076 sbdrop(&mp_so
->so_snd
, acked
);
1079 mp_tp
->mpt_snduna
+= acked
;
1080 /* In degraded mode, we may get some Data ACKs */
1081 if ((tp
->t_mpflags
& TMPF_TCP_FALLBACK
) &&
1082 !(mp_tp
->mpt_flags
& MPTCPF_POST_FALLBACK_SYNC
) &&
1083 MPTCP_SEQ_GT(mp_tp
->mpt_sndnxt
, mp_tp
->mpt_snduna
)) {
1084 /* bring back sndnxt to retransmit MPTCP data */
1085 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_dsn_at_csum_fail
;
1086 mp_tp
->mpt_flags
|= MPTCPF_POST_FALLBACK_SYNC
;
1087 tp
->t_inpcb
->inp_socket
->so_flags1
|=
1088 SOF1_POST_FALLBACK_SYNC
;
1091 mptcp_clean_reinjectq(mp_tp
->mpt_mpte
);
1095 if (full_dack
== mp_tp
->mpt_sndmax
&&
1096 mp_tp
->mpt_state
>= MPTCPS_FIN_WAIT_1
) {
1097 mptcp_close_fsm(mp_tp
, MPCE_RECV_DATA_ACK
);
1098 tp
->t_mpflags
&= ~TMPF_SEND_DFIN
;
1103 mptcp_update_window_wakeup(struct tcpcb
*tp
)
1105 struct mptcb
*mp_tp
= tptomptp(tp
);
1107 socket_lock_assert_owned(mptetoso(mp_tp
->mpt_mpte
));
1109 if (mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) {
1110 mp_tp
->mpt_sndwnd
= tp
->snd_wnd
;
1111 mp_tp
->mpt_sndwl1
= mp_tp
->mpt_rcvnxt
;
1112 mp_tp
->mpt_sndwl2
= mp_tp
->mpt_snduna
;
1115 sowwakeup(tp
->t_inpcb
->inp_socket
);
1119 mptcp_update_window(struct mptcb
*mp_tp
, u_int64_t ack
, u_int64_t seq
, u_int32_t tiwin
)
1121 if (MPTCP_SEQ_LT(mp_tp
->mpt_sndwl1
, seq
) ||
1122 (mp_tp
->mpt_sndwl1
== seq
&&
1123 (MPTCP_SEQ_LT(mp_tp
->mpt_sndwl2
, ack
) ||
1124 (mp_tp
->mpt_sndwl2
== ack
&& tiwin
> mp_tp
->mpt_sndwnd
)))) {
1125 mp_tp
->mpt_sndwnd
= tiwin
;
1126 mp_tp
->mpt_sndwl1
= seq
;
1127 mp_tp
->mpt_sndwl2
= ack
;
1132 mptcp_do_dss_opt_ack_meat(u_int64_t full_dack
, u_int64_t full_dsn
,
1133 struct tcpcb
*tp
, u_int32_t tiwin
)
1135 struct mptcb
*mp_tp
= tptomptp(tp
);
1136 int close_notify
= 0;
1138 tp
->t_mpflags
|= TMPF_RCVD_DACK
;
1140 if (MPTCP_SEQ_LEQ(full_dack
, mp_tp
->mpt_sndmax
) &&
1141 MPTCP_SEQ_GEQ(full_dack
, mp_tp
->mpt_snduna
)) {
1142 mptcp_data_ack_rcvd(mp_tp
, tp
, full_dack
);
1143 if (mp_tp
->mpt_state
> MPTCPS_FIN_WAIT_2
) {
1146 if (mp_tp
->mpt_flags
& MPTCPF_RCVD_64BITACK
) {
1147 mp_tp
->mpt_flags
&= ~MPTCPF_RCVD_64BITACK
;
1148 mp_tp
->mpt_flags
&= ~MPTCPF_SND_64BITDSN
;
1150 mptcp_notify_mpready(tp
->t_inpcb
->inp_socket
);
1152 mptcp_notify_close(tp
->t_inpcb
->inp_socket
);
1156 mptcp_update_window(mp_tp
, full_dack
, full_dsn
, tiwin
);
1160 mptcp_do_dss_opt_meat(u_char
*cp
, struct tcpcb
*tp
, struct tcphdr
*th
)
1162 struct mptcp_dss_copt
*dss_rsp
= (struct mptcp_dss_copt
*)cp
;
1163 u_int64_t full_dack
= 0;
1164 u_int32_t tiwin
= th
->th_win
<< tp
->snd_scale
;
1165 struct mptcb
*mp_tp
= tptomptp(tp
);
1168 #define MPTCP_DSS_OPT_SZ_CHK(len, expected_len) { \
1169 if (len != expected_len) { \
1170 mptcplog((LOG_ERR, "%s: bad len = %d dss: %x \n", __func__, \
1171 len, dss_rsp->mdss_flags), \
1172 (MPTCP_SOCKET_DBG|MPTCP_RECEIVER_DBG), \
1173 MPTCP_LOGLVL_LOG); \
1178 if (mp_tp
->mpt_flags
& MPTCPF_CHECKSUM
) {
1182 dss_rsp
->mdss_flags
&= (MDSS_A
| MDSS_a
| MDSS_M
| MDSS_m
);
1183 switch (dss_rsp
->mdss_flags
) {
1186 /* 32-bit DSS, No Data ACK */
1187 struct mptcp_dsn_opt
*dss_rsp1
;
1188 dss_rsp1
= (struct mptcp_dsn_opt
*)cp
;
1190 MPTCP_DSS_OPT_SZ_CHK(dss_rsp1
->mdss_copt
.mdss_len
,
1191 sizeof(struct mptcp_dsn_opt
) + csum_len
);
1192 if (csum_len
== 0) {
1193 mptcp_update_dss_rcv_state(dss_rsp1
, tp
, 0);
1195 mptcp_update_dss_rcv_state(dss_rsp1
, tp
,
1196 *(uint16_t *)(void *)(cp
+
1197 (dss_rsp1
->mdss_copt
.mdss_len
- csum_len
)));
1203 /* 32-bit Data ACK, no DSS */
1204 struct mptcp_data_ack_opt
*dack_opt
;
1205 dack_opt
= (struct mptcp_data_ack_opt
*)cp
;
1207 MPTCP_DSS_OPT_SZ_CHK(dack_opt
->mdss_copt
.mdss_len
,
1208 sizeof(struct mptcp_data_ack_opt
));
1210 u_int32_t dack
= dack_opt
->mdss_ack
;
1212 MPTCP_EXTEND_DSN(mp_tp
->mpt_snduna
, dack
, full_dack
);
1213 mptcp_do_dss_opt_ack_meat(full_dack
, mp_tp
->mpt_sndwl1
, tp
, tiwin
);
1216 case (MDSS_M
| MDSS_A
):
1218 /* 32-bit Data ACK + 32-bit DSS */
1219 struct mptcp_dss_ack_opt
*dss_ack_rsp
;
1220 dss_ack_rsp
= (struct mptcp_dss_ack_opt
*)cp
;
1224 MPTCP_DSS_OPT_SZ_CHK(dss_ack_rsp
->mdss_copt
.mdss_len
,
1225 sizeof(struct mptcp_dss_ack_opt
) + csum_len
);
1227 u_int32_t dack
= dss_ack_rsp
->mdss_ack
;
1229 MPTCP_EXTEND_DSN(mp_tp
->mpt_snduna
, dack
, full_dack
);
1231 NTOHL(dss_ack_rsp
->mdss_dsn
);
1232 NTOHL(dss_ack_rsp
->mdss_subflow_seqn
);
1233 NTOHS(dss_ack_rsp
->mdss_data_len
);
1234 MPTCP_EXTEND_DSN(mp_tp
->mpt_rcvnxt
, dss_ack_rsp
->mdss_dsn
, full_dsn
);
1236 mptcp_do_dss_opt_ack_meat(full_dack
, full_dsn
, tp
, tiwin
);
1238 if (csum_len
!= 0) {
1239 csum
= *(uint16_t *)(void *)(cp
+ (dss_ack_rsp
->mdss_copt
.mdss_len
- csum_len
));
1242 mptcp_update_rcv_state_meat(mp_tp
, tp
,
1244 dss_ack_rsp
->mdss_subflow_seqn
,
1245 dss_ack_rsp
->mdss_data_len
,
1249 case (MDSS_M
| MDSS_m
):
1251 /* 64-bit DSS , No Data ACK */
1252 struct mptcp_dsn64_opt
*dsn64
;
1253 dsn64
= (struct mptcp_dsn64_opt
*)cp
;
1257 MPTCP_DSS_OPT_SZ_CHK(dsn64
->mdss_copt
.mdss_len
,
1258 sizeof(struct mptcp_dsn64_opt
) + csum_len
);
1260 mp_tp
->mpt_flags
|= MPTCPF_SND_64BITACK
;
1262 full_dsn
= mptcp_ntoh64(dsn64
->mdss_dsn
);
1263 NTOHL(dsn64
->mdss_subflow_seqn
);
1264 NTOHS(dsn64
->mdss_data_len
);
1266 if (csum_len
!= 0) {
1267 csum
= *(uint16_t *)(void *)(cp
+ dsn64
->mdss_copt
.mdss_len
- csum_len
);
1270 mptcp_update_rcv_state_meat(mp_tp
, tp
, full_dsn
,
1271 dsn64
->mdss_subflow_seqn
,
1272 dsn64
->mdss_data_len
,
1276 case (MDSS_A
| MDSS_a
):
1278 /* 64-bit Data ACK, no DSS */
1279 struct mptcp_data_ack64_opt
*dack64
;
1280 dack64
= (struct mptcp_data_ack64_opt
*)cp
;
1282 MPTCP_DSS_OPT_SZ_CHK(dack64
->mdss_copt
.mdss_len
,
1283 sizeof(struct mptcp_data_ack64_opt
));
1285 mp_tp
->mpt_flags
|= MPTCPF_RCVD_64BITACK
;
1287 full_dack
= mptcp_ntoh64(dack64
->mdss_ack
);
1288 mptcp_do_dss_opt_ack_meat(full_dack
, mp_tp
->mpt_sndwl1
, tp
, tiwin
);
1291 case (MDSS_M
| MDSS_m
| MDSS_A
):
1293 /* 64-bit DSS + 32-bit Data ACK */
1294 struct mptcp_dss64_ack32_opt
*dss_ack_rsp
;
1295 dss_ack_rsp
= (struct mptcp_dss64_ack32_opt
*)cp
;
1299 MPTCP_DSS_OPT_SZ_CHK(dss_ack_rsp
->mdss_copt
.mdss_len
,
1300 sizeof(struct mptcp_dss64_ack32_opt
) + csum_len
);
1302 u_int32_t dack
= dss_ack_rsp
->mdss_ack
;
1304 mp_tp
->mpt_flags
|= MPTCPF_SND_64BITACK
;
1305 MPTCP_EXTEND_DSN(mp_tp
->mpt_snduna
, dack
, full_dack
);
1307 full_dsn
= mptcp_ntoh64(dss_ack_rsp
->mdss_dsn
);
1308 NTOHL(dss_ack_rsp
->mdss_subflow_seqn
);
1309 NTOHS(dss_ack_rsp
->mdss_data_len
);
1311 mptcp_do_dss_opt_ack_meat(full_dack
, full_dsn
, tp
, tiwin
);
1313 if (csum_len
!= 0) {
1314 csum
= *(uint16_t *)(void *)(cp
+ dss_ack_rsp
->mdss_copt
.mdss_len
- csum_len
);
1317 mptcp_update_rcv_state_meat(mp_tp
, tp
, full_dsn
,
1318 dss_ack_rsp
->mdss_subflow_seqn
,
1319 dss_ack_rsp
->mdss_data_len
,
1324 case (MDSS_M
| MDSS_A
| MDSS_a
):
1326 /* 32-bit DSS + 64-bit Data ACK */
1327 struct mptcp_dss32_ack64_opt
*dss32_ack64_opt
;
1328 dss32_ack64_opt
= (struct mptcp_dss32_ack64_opt
*)cp
;
1331 MPTCP_DSS_OPT_SZ_CHK(
1332 dss32_ack64_opt
->mdss_copt
.mdss_len
,
1333 sizeof(struct mptcp_dss32_ack64_opt
) + csum_len
);
1335 full_dack
= mptcp_ntoh64(dss32_ack64_opt
->mdss_ack
);
1336 NTOHL(dss32_ack64_opt
->mdss_dsn
);
1337 mp_tp
->mpt_flags
|= MPTCPF_RCVD_64BITACK
;
1338 MPTCP_EXTEND_DSN(mp_tp
->mpt_rcvnxt
,
1339 dss32_ack64_opt
->mdss_dsn
, full_dsn
);
1340 NTOHL(dss32_ack64_opt
->mdss_subflow_seqn
);
1341 NTOHS(dss32_ack64_opt
->mdss_data_len
);
1343 mptcp_do_dss_opt_ack_meat(full_dack
, full_dsn
, tp
, tiwin
);
1344 if (csum_len
== 0) {
1345 mptcp_update_rcv_state_meat(mp_tp
, tp
, full_dsn
,
1346 dss32_ack64_opt
->mdss_subflow_seqn
,
1347 dss32_ack64_opt
->mdss_data_len
, 0);
1349 mptcp_update_rcv_state_meat(mp_tp
, tp
, full_dsn
,
1350 dss32_ack64_opt
->mdss_subflow_seqn
,
1351 dss32_ack64_opt
->mdss_data_len
,
1352 *(uint16_t *)(void *)(cp
+
1353 dss32_ack64_opt
->mdss_copt
.mdss_len
-
1358 case (MDSS_M
| MDSS_m
| MDSS_A
| MDSS_a
):
1360 /* 64-bit DSS + 64-bit Data ACK */
1361 struct mptcp_dss64_ack64_opt
*dss64_ack64
;
1362 dss64_ack64
= (struct mptcp_dss64_ack64_opt
*)cp
;
1365 MPTCP_DSS_OPT_SZ_CHK(dss64_ack64
->mdss_copt
.mdss_len
,
1366 sizeof(struct mptcp_dss64_ack64_opt
) + csum_len
);
1368 mp_tp
->mpt_flags
|= MPTCPF_RCVD_64BITACK
;
1369 mp_tp
->mpt_flags
|= MPTCPF_SND_64BITACK
;
1370 full_dsn
= mptcp_ntoh64(dss64_ack64
->mdss_dsn
);
1371 full_dack
= mptcp_ntoh64(dss64_ack64
->mdss_dsn
);
1372 mptcp_do_dss_opt_ack_meat(full_dack
, full_dsn
, tp
, tiwin
);
1373 NTOHL(dss64_ack64
->mdss_subflow_seqn
);
1374 NTOHS(dss64_ack64
->mdss_data_len
);
1375 if (csum_len
== 0) {
1376 mptcp_update_rcv_state_meat(mp_tp
, tp
, full_dsn
,
1377 dss64_ack64
->mdss_subflow_seqn
,
1378 dss64_ack64
->mdss_data_len
, 0);
1380 mptcp_update_rcv_state_meat(mp_tp
, tp
, full_dsn
,
1381 dss64_ack64
->mdss_subflow_seqn
,
1382 dss64_ack64
->mdss_data_len
,
1383 *(uint16_t *)(void *)(cp
+
1384 dss64_ack64
->mdss_copt
.mdss_len
-
1390 mptcplog((LOG_DEBUG
, "%s: File bug, DSS flags = %x\n",
1391 __func__
, dss_rsp
->mdss_flags
),
1392 (MPTCP_SOCKET_DBG
| MPTCP_RECEIVER_DBG
),
1399 mptcp_do_dss_opt(struct tcpcb
*tp
, u_char
*cp
, struct tcphdr
*th
)
1401 struct mptcp_dss_copt
*dss_rsp
= (struct mptcp_dss_copt
*)cp
;
1402 struct mptcb
*mp_tp
= tptomptp(tp
);
1408 if (dss_rsp
->mdss_subtype
== MPO_DSS
) {
1409 if (dss_rsp
->mdss_flags
& MDSS_F
) {
1410 tp
->t_rcv_map
.mpt_dfin
= 1;
1413 mptcp_do_dss_opt_meat(cp
, tp
, th
);
1418 mptcp_do_fastclose_opt(struct tcpcb
*tp
, u_char
*cp
, struct tcphdr
*th
)
1420 struct mptcb
*mp_tp
= NULL
;
1421 struct mptcp_fastclose_opt
*fc_opt
= (struct mptcp_fastclose_opt
*)cp
;
1423 if (th
->th_flags
!= TH_ACK
) {
1427 if (fc_opt
->mfast_len
!= sizeof(struct mptcp_fastclose_opt
)) {
1428 tcpstat
.tcps_invalid_opt
++;
1432 mp_tp
= tptomptp(tp
);
1437 if (fc_opt
->mfast_key
!= mp_tp
->mpt_localkey
) {
1438 tcpstat
.tcps_invalid_opt
++;
1443 * fastclose could make us more vulnerable to attacks, hence
1444 * accept only those that are at the next expected sequence number.
1446 if (th
->th_seq
!= tp
->rcv_nxt
) {
1447 tcpstat
.tcps_invalid_opt
++;
1451 /* Reset this flow */
1452 tp
->t_mpflags
|= TMPF_FASTCLOSERCV
;
1454 if (tp
->t_inpcb
->inp_socket
!= NULL
) {
1455 soevent(tp
->t_inpcb
->inp_socket
,
1456 SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MUSTRST
);
1462 mptcp_do_mpfail_opt(struct tcpcb
*tp
, u_char
*cp
, struct tcphdr
*th
)
1464 struct mptcp_mpfail_opt
*fail_opt
= (struct mptcp_mpfail_opt
*)cp
;
1465 u_int32_t mdss_subflow_seqn
= 0;
1466 struct mptcb
*mp_tp
;
1470 * mpfail could make us more vulnerable to attacks. Hence accept
1471 * only those that are the next expected sequence number.
1473 if (th
->th_seq
!= tp
->rcv_nxt
) {
1474 tcpstat
.tcps_invalid_opt
++;
1478 /* A packet without RST, must atleast have the ACK bit set */
1479 if ((th
->th_flags
!= TH_ACK
) && (th
->th_flags
!= TH_RST
)) {
1483 if (fail_opt
->mfail_len
!= sizeof(struct mptcp_mpfail_opt
)) {
1487 mp_tp
= tptomptp(tp
);
1489 mp_tp
->mpt_flags
|= MPTCPF_RECVD_MPFAIL
;
1490 mp_tp
->mpt_dsn_at_csum_fail
= mptcp_hton64(fail_opt
->mfail_dsn
);
1491 error
= mptcp_get_map_for_dsn(tp
->t_inpcb
->inp_socket
,
1492 mp_tp
->mpt_dsn_at_csum_fail
, &mdss_subflow_seqn
);
1494 mp_tp
->mpt_ssn_at_csum_fail
= mdss_subflow_seqn
;
1497 mptcp_notify_mpfail(tp
->t_inpcb
->inp_socket
);
1501 mptcp_do_add_addr_opt(struct mptses
*mpte
, u_char
*cp
)
1503 struct mptcp_add_addr_opt
*addr_opt
= (struct mptcp_add_addr_opt
*)cp
;
1505 if (addr_opt
->maddr_len
!= MPTCP_ADD_ADDR_OPT_LEN_V4
&&
1506 addr_opt
->maddr_len
!= MPTCP_ADD_ADDR_OPT_LEN_V6
) {
1507 os_log_info(mptcp_log_handle
, "%s - %lx: Wrong ADD_ADDR length %u\n",
1508 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
1509 addr_opt
->maddr_len
);
1514 if (addr_opt
->maddr_len
== MPTCP_ADD_ADDR_OPT_LEN_V4
&&
1515 addr_opt
->maddr_ipversion
!= 4) {
1516 os_log_info(mptcp_log_handle
, "%s - %lx: ADD_ADDR length for v4 but version is %u\n",
1517 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
1518 addr_opt
->maddr_ipversion
);
1523 if (addr_opt
->maddr_len
== MPTCP_ADD_ADDR_OPT_LEN_V6
&&
1524 addr_opt
->maddr_ipversion
!= 6) {
1525 os_log_info(mptcp_log_handle
, "%s - %lx: ADD_ADDR length for v6 but version is %u\n",
1526 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
1527 addr_opt
->maddr_ipversion
);
1532 if (addr_opt
->maddr_len
== MPTCP_ADD_ADDR_OPT_LEN_V4
) {
1533 struct sockaddr_in
*dst
= &mpte
->mpte_dst_unicast_v4
;
1534 struct in_addr
*addr
= &addr_opt
->maddr_u
.maddr_addrv4
;
1535 in_addr_t haddr
= ntohl(addr
->s_addr
);
1537 if (IN_ZERONET(haddr
) ||
1538 IN_LOOPBACK(haddr
) ||
1539 IN_LINKLOCAL(haddr
) ||
1540 IN_DS_LITE(haddr
) ||
1541 IN_6TO4_RELAY_ANYCAST(haddr
) ||
1542 IN_MULTICAST(haddr
) ||
1543 INADDR_BROADCAST
== haddr
||
1544 IN_PRIVATE(haddr
) ||
1545 IN_SHARED_ADDRESS_SPACE(haddr
)) {
1546 os_log_info(mptcp_log_handle
, "%s - %lx: ADD_ADDR invalid addr: %x\n",
1547 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
1553 dst
->sin_len
= sizeof(*dst
);
1554 dst
->sin_family
= AF_INET
;
1555 dst
->sin_port
= mpte
->__mpte_dst_v4
.sin_port
;
1556 dst
->sin_addr
.s_addr
= addr
->s_addr
;
1558 struct sockaddr_in6
*dst
= &mpte
->mpte_dst_unicast_v6
;
1559 struct in6_addr
*addr
= &addr_opt
->maddr_u
.maddr_addrv6
;
1561 if (IN6_IS_ADDR_LINKLOCAL(addr
) ||
1562 IN6_IS_ADDR_MULTICAST(addr
) ||
1563 IN6_IS_ADDR_UNSPECIFIED(addr
) ||
1564 IN6_IS_ADDR_LOOPBACK(addr
) ||
1565 IN6_IS_ADDR_V4COMPAT(addr
) ||
1566 IN6_IS_ADDR_V4MAPPED(addr
)) {
1567 char dbuf
[MAX_IPv6_STR_LEN
];
1569 inet_ntop(AF_INET6
, &dst
->sin6_addr
, dbuf
, sizeof(dbuf
));
1570 os_log_info(mptcp_log_handle
, "%s - %lx: ADD_ADDRv6 invalid addr: %s\n",
1571 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
1577 dst
->sin6_len
= sizeof(*dst
);
1578 dst
->sin6_family
= AF_INET6
;
1579 dst
->sin6_port
= mpte
->__mpte_dst_v6
.sin6_port
;
1580 memcpy(&dst
->sin6_addr
, addr
, sizeof(*addr
));
1583 os_log_info(mptcp_log_handle
, "%s - %lx: Received ADD_ADDRv%u\n",
1584 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
1585 addr_opt
->maddr_ipversion
);
1587 mptcp_sched_create_subflows(mpte
);
1591 tcp_do_mptcp_options(struct tcpcb
*tp
, u_char
*cp
, struct tcphdr
*th
,
1592 struct tcpopt
*to
, int optlen
)
1595 struct mptcb
*mp_tp
= tptomptp(tp
);
1597 if (mp_tp
== NULL
) {
1601 socket_lock_assert_owned(mptetoso(mp_tp
->mpt_mpte
));
1603 /* All MPTCP options have atleast 4 bytes */
1608 mptcp_subtype
= (cp
[2] >> 4);
1610 if (mptcp_sanitize_option(tp
, mptcp_subtype
) == 0) {
1614 switch (mptcp_subtype
) {
1616 mptcp_do_mpcapable_opt(tp
, cp
, th
, optlen
);
1619 mptcp_do_mpjoin_opt(tp
, cp
, th
, optlen
);
1622 mptcp_do_dss_opt(tp
, cp
, th
);
1625 mptcp_do_fastclose_opt(tp
, cp
, th
);
1628 mptcp_do_mpfail_opt(tp
, cp
, th
);
1631 mptcp_do_add_addr_opt(mp_tp
->mpt_mpte
, cp
);
1633 case MPO_REMOVE_ADDR
: /* fall through */
1635 to
->to_flags
|= TOF_MPTCP
;
1643 /* REMOVE_ADDR option is sent when a source address goes away */
1645 mptcp_send_remaddr_opt(struct tcpcb
*tp
, struct mptcp_remaddr_opt
*opt
)
1647 mptcplog((LOG_DEBUG
, "%s: local id %d remove id %d \n",
1648 __func__
, tp
->t_local_aid
, tp
->t_rem_aid
),
1649 (MPTCP_SOCKET_DBG
| MPTCP_SENDER_DBG
), MPTCP_LOGLVL_LOG
);
1651 bzero(opt
, sizeof(*opt
));
1652 opt
->mr_kind
= TCPOPT_MULTIPATH
;
1653 opt
->mr_len
= sizeof(*opt
);
1654 opt
->mr_subtype
= MPO_REMOVE_ADDR
;
1655 opt
->mr_addr_id
= tp
->t_rem_aid
;
1656 tp
->t_mpflags
&= ~TMPF_SND_REM_ADDR
;
1659 /* We send MP_PRIO option based on the values set by the SIOCSCONNORDER ioctl */
1661 mptcp_snd_mpprio(struct tcpcb
*tp
, u_char
*cp
, int optlen
)
1663 struct mptcp_mpprio_addr_opt mpprio
;
1665 if (tp
->t_state
!= TCPS_ESTABLISHED
) {
1666 tp
->t_mpflags
&= ~TMPF_SND_MPPRIO
;
1670 if ((MAX_TCPOPTLEN
- optlen
) <
1671 (int)sizeof(mpprio
)) {
1675 bzero(&mpprio
, sizeof(mpprio
));
1676 mpprio
.mpprio_kind
= TCPOPT_MULTIPATH
;
1677 mpprio
.mpprio_len
= sizeof(mpprio
);
1678 mpprio
.mpprio_subtype
= MPO_PRIO
;
1679 if (tp
->t_mpflags
& TMPF_BACKUP_PATH
) {
1680 mpprio
.mpprio_flags
|= MPTCP_MPPRIO_BKP
;
1682 mpprio
.mpprio_addrid
= tp
->t_local_aid
;
1683 memcpy(cp
+ optlen
, &mpprio
, sizeof(mpprio
));
1684 optlen
+= sizeof(mpprio
);
1685 tp
->t_mpflags
&= ~TMPF_SND_MPPRIO
;
1686 mptcplog((LOG_DEBUG
, "%s: aid = %d \n", __func__
,
1688 (MPTCP_SOCKET_DBG
| MPTCP_SENDER_DBG
), MPTCP_LOGLVL_LOG
);