2 * Copyright (c) 2012-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <netinet/in_systm.h>
31 #include <sys/socket.h>
32 #include <sys/socketvar.h>
33 #include <sys/syslog.h>
34 #include <net/route.h>
35 #include <netinet/in.h>
38 #include <netinet/ip.h>
39 #include <netinet/ip_var.h>
40 #include <netinet/in_var.h>
41 #include <netinet/tcp.h>
42 #include <netinet/tcp_cache.h>
43 #include <netinet/tcp_seq.h>
44 #include <netinet/tcpip.h>
45 #include <netinet/tcp_fsm.h>
46 #include <netinet/mptcp_var.h>
47 #include <netinet/mptcp.h>
48 #include <netinet/mptcp_opt.h>
49 #include <netinet/mptcp_seq.h>
51 #include <libkern/crypto/sha1.h>
52 #include <netinet/mptcp_timer.h>
56 static int mptcp_validate_join_hmac(struct tcpcb
*, u_char
*, int);
57 static int mptcp_snd_mpprio(struct tcpcb
*tp
, u_char
*cp
, int optlen
);
58 static void mptcp_send_remaddr_opt(struct tcpcb
*, struct mptcp_remaddr_opt
*);
61 * MPTCP Options Output Processing
65 mptcp_setup_first_subflow_syn_opts(struct socket
*so
, u_char
*opt
, unsigned optlen
)
67 struct mptcp_mpcapable_opt_common mptcp_opt
;
68 struct tcpcb
*tp
= sototcpcb(so
);
69 struct mptcb
*mp_tp
= tptomptp(tp
);
72 ret
= tcp_heuristic_do_mptcp(tp
);
74 os_log_info(mptcp_log_handle
, "%s - %lx: Not doing MPTCP due to heuristics",
75 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp
->mpt_mpte
));
76 mp_tp
->mpt_flags
|= MPTCPF_FALLBACK_HEURISTIC
;
81 * Avoid retransmitting the MP_CAPABLE option.
84 tp
->t_rxtshift
> mptcp_mpcap_retries
&&
85 !(tptomptp(tp
)->mpt_mpte
->mpte_flags
& MPTE_FORCE_ENABLE
)) {
86 if (!(mp_tp
->mpt_flags
& (MPTCPF_FALLBACK_HEURISTIC
| MPTCPF_HEURISTIC_TRAC
))) {
87 mp_tp
->mpt_flags
|= MPTCPF_HEURISTIC_TRAC
;
88 tcp_heuristic_mptcp_loss(tp
);
93 bzero(&mptcp_opt
, sizeof(struct mptcp_mpcapable_opt_common
));
95 mptcp_opt
.mmco_kind
= TCPOPT_MULTIPATH
;
97 sizeof(struct mptcp_mpcapable_opt_common
) +
99 mptcp_opt
.mmco_subtype
= MPO_CAPABLE
;
100 mptcp_opt
.mmco_version
= mp_tp
->mpt_version
;
101 mptcp_opt
.mmco_flags
|= MPCAP_PROPOSAL_SBIT
;
102 if (mp_tp
->mpt_flags
& MPTCPF_CHECKSUM
) {
103 mptcp_opt
.mmco_flags
|= MPCAP_CHECKSUM_CBIT
;
105 memcpy(opt
+ optlen
, &mptcp_opt
, sizeof(struct mptcp_mpcapable_opt_common
));
106 optlen
+= sizeof(struct mptcp_mpcapable_opt_common
);
107 memcpy(opt
+ optlen
, &mp_tp
->mpt_localkey
, sizeof(mptcp_key_t
));
108 optlen
+= sizeof(mptcp_key_t
);
114 mptcp_setup_join_subflow_syn_opts(struct socket
*so
, u_char
*opt
, unsigned optlen
)
116 struct mptcp_mpjoin_opt_req mpjoin_req
;
117 struct inpcb
*inp
= sotoinpcb(so
);
118 struct tcpcb
*tp
= NULL
;
132 bzero(&mpjoin_req
, sizeof(mpjoin_req
));
133 mpjoin_req
.mmjo_kind
= TCPOPT_MULTIPATH
;
134 mpjoin_req
.mmjo_len
= sizeof(mpjoin_req
);
135 mpjoin_req
.mmjo_subtype_bkp
= MPO_JOIN
<< 4;
137 if (tp
->t_mpflags
& TMPF_BACKUP_PATH
) {
138 mpjoin_req
.mmjo_subtype_bkp
|= MPTCP_BACKUP
;
139 } else if (inp
->inp_boundifp
&& IFNET_IS_CELLULAR(inp
->inp_boundifp
) &&
140 mptcp_subflows_need_backup_flag(mpts
->mpts_mpte
)) {
141 mpjoin_req
.mmjo_subtype_bkp
|= MPTCP_BACKUP
;
142 tp
->t_mpflags
|= TMPF_BACKUP_PATH
;
144 mpts
->mpts_flags
|= MPTSF_PREFERRED
;
147 mpjoin_req
.mmjo_addr_id
= tp
->t_local_aid
;
148 mpjoin_req
.mmjo_peer_token
= tptomptp(tp
)->mpt_remotetoken
;
149 if (mpjoin_req
.mmjo_peer_token
== 0) {
150 mptcplog((LOG_DEBUG
, "%s: peer token 0", __func__
),
151 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
153 mptcp_get_rands(tp
->t_local_aid
, tptomptp(tp
),
154 &mpjoin_req
.mmjo_rand
, NULL
);
155 memcpy(opt
+ optlen
, &mpjoin_req
, mpjoin_req
.mmjo_len
);
156 optlen
+= mpjoin_req
.mmjo_len
;
162 mptcp_setup_join_ack_opts(struct tcpcb
*tp
, u_char
*opt
, unsigned optlen
)
165 struct mptcp_mpjoin_opt_rsp2 join_rsp2
;
167 if ((MAX_TCPOPTLEN
- optlen
) < sizeof(struct mptcp_mpjoin_opt_rsp2
)) {
168 printf("%s: no space left %d \n", __func__
, optlen
);
172 bzero(&join_rsp2
, sizeof(struct mptcp_mpjoin_opt_rsp2
));
173 join_rsp2
.mmjo_kind
= TCPOPT_MULTIPATH
;
174 join_rsp2
.mmjo_len
= sizeof(struct mptcp_mpjoin_opt_rsp2
);
175 join_rsp2
.mmjo_subtype
= MPO_JOIN
;
176 mptcp_get_hmac(tp
->t_local_aid
, tptomptp(tp
),
177 (u_char
*)&join_rsp2
.mmjo_mac
);
178 memcpy(opt
+ optlen
, &join_rsp2
, join_rsp2
.mmjo_len
);
179 new_optlen
= optlen
+ join_rsp2
.mmjo_len
;
184 mptcp_setup_syn_opts(struct socket
*so
, u_char
*opt
, unsigned optlen
)
188 if (!(so
->so_flags
& SOF_MP_SEC_SUBFLOW
)) {
189 new_optlen
= mptcp_setup_first_subflow_syn_opts(so
, opt
, optlen
);
191 new_optlen
= mptcp_setup_join_subflow_syn_opts(so
, opt
, optlen
);
198 mptcp_send_mpfail(struct tcpcb
*tp
, u_char
*opt
, unsigned int optlen
)
200 #pragma unused(tp, opt, optlen)
202 struct mptcb
*mp_tp
= NULL
;
203 struct mptcp_mpfail_opt fail_opt
;
205 uint8_t len
= sizeof(struct mptcp_mpfail_opt
);
207 mp_tp
= tptomptp(tp
);
209 tp
->t_mpflags
&= ~TMPF_SND_MPFAIL
;
213 /* if option space low give up */
214 if ((MAX_TCPOPTLEN
- optlen
) < sizeof(struct mptcp_mpfail_opt
)) {
215 tp
->t_mpflags
&= ~TMPF_SND_MPFAIL
;
219 dsn
= mp_tp
->mpt_rcvnxt
;
221 bzero(&fail_opt
, sizeof(fail_opt
));
222 fail_opt
.mfail_kind
= TCPOPT_MULTIPATH
;
223 fail_opt
.mfail_len
= len
;
224 fail_opt
.mfail_subtype
= MPO_FAIL
;
225 fail_opt
.mfail_dsn
= mptcp_hton64(dsn
);
226 memcpy(opt
+ optlen
, &fail_opt
, len
);
228 tp
->t_mpflags
&= ~TMPF_SND_MPFAIL
;
229 mptcplog((LOG_DEBUG
, "%s: %d \n", __func__
,
230 tp
->t_local_aid
), (MPTCP_SOCKET_DBG
| MPTCP_SENDER_DBG
),
236 mptcp_send_infinite_mapping(struct tcpcb
*tp
, u_char
*opt
, unsigned int optlen
)
238 struct socket
*so
= tp
->t_inpcb
->inp_socket
;
239 uint8_t len
= sizeof(struct mptcp_dsn_opt
);
240 struct mptcp_dsn_opt infin_opt
;
241 struct mptcb
*mp_tp
= NULL
;
242 uint8_t csum_len
= 0;
248 mp_tp
= tptomptp(tp
);
253 if (mp_tp
->mpt_flags
& MPTCPF_CHECKSUM
) {
258 if ((MAX_TCPOPTLEN
- optlen
) < (len
+ csum_len
)) {
262 bzero(&infin_opt
, sizeof(infin_opt
));
263 infin_opt
.mdss_copt
.mdss_kind
= TCPOPT_MULTIPATH
;
264 infin_opt
.mdss_copt
.mdss_len
= len
+ csum_len
;
265 infin_opt
.mdss_copt
.mdss_subtype
= MPO_DSS
;
266 infin_opt
.mdss_copt
.mdss_flags
|= MDSS_M
;
267 if (mp_tp
->mpt_flags
& MPTCPF_RECVD_MPFAIL
) {
268 infin_opt
.mdss_dsn
= (u_int32_t
)
269 MPTCP_DATASEQ_LOW32(mp_tp
->mpt_dsn_at_csum_fail
);
270 infin_opt
.mdss_subflow_seqn
= mp_tp
->mpt_ssn_at_csum_fail
;
273 * If MPTCP fallback happens, but TFO succeeds, the data on the
274 * SYN does not belong to the MPTCP data sequence space.
276 if ((tp
->t_tfo_stats
& TFO_S_SYN_DATA_ACKED
) &&
277 ((mp_tp
->mpt_local_idsn
+ 1) == mp_tp
->mpt_snduna
)) {
278 infin_opt
.mdss_subflow_seqn
= 1;
280 mptcplog((LOG_DEBUG
, "%s: idsn %llu snduna %llu \n",
281 __func__
, mp_tp
->mpt_local_idsn
,
283 (MPTCP_SOCKET_DBG
| MPTCP_SENDER_DBG
),
286 infin_opt
.mdss_subflow_seqn
= tp
->snd_una
- tp
->t_mpsub
->mpts_iss
;
288 infin_opt
.mdss_dsn
= (u_int32_t
)
289 MPTCP_DATASEQ_LOW32(mp_tp
->mpt_snduna
);
292 if ((infin_opt
.mdss_dsn
== 0) || (infin_opt
.mdss_subflow_seqn
== 0)) {
295 infin_opt
.mdss_dsn
= htonl(infin_opt
.mdss_dsn
);
296 infin_opt
.mdss_subflow_seqn
= htonl(infin_opt
.mdss_subflow_seqn
);
297 infin_opt
.mdss_data_len
= 0;
299 memcpy(opt
+ optlen
, &infin_opt
, len
);
302 /* The checksum field is set to 0 for infinite mapping */
304 memcpy(opt
+ optlen
, &csum
, csum_len
);
308 mptcplog((LOG_DEBUG
, "%s: dsn = %x, seq = %x len = %x\n", __func__
,
309 ntohl(infin_opt
.mdss_dsn
),
310 ntohl(infin_opt
.mdss_subflow_seqn
),
311 ntohs(infin_opt
.mdss_data_len
)),
312 (MPTCP_SOCKET_DBG
| MPTCP_SENDER_DBG
),
315 tp
->t_mpflags
|= TMPF_INFIN_SENT
;
316 tcpstat
.tcps_estab_fallback
++;
322 mptcp_ok_to_fin(struct tcpcb
*tp
, u_int64_t dsn
, u_int32_t datalen
)
324 struct mptcb
*mp_tp
= tptomptp(tp
);
326 dsn
= (mp_tp
->mpt_sndmax
& MPTCP_DATASEQ_LOW32_MASK
) | dsn
;
327 if ((dsn
+ datalen
) == mp_tp
->mpt_sndmax
) {
335 mptcp_setup_opts(struct tcpcb
*tp
, int32_t off
, u_char
*opt
,
336 unsigned int optlen
, int flags
, int len
,
337 boolean_t
*p_mptcp_acknow
, boolean_t
*do_not_compress
)
339 struct inpcb
*inp
= (struct inpcb
*)tp
->t_inpcb
;
340 struct socket
*so
= inp
->inp_socket
;
341 struct mptcb
*mp_tp
= tptomptp(tp
);
342 boolean_t do_csum
= FALSE
;
343 boolean_t send_64bit_dsn
= FALSE
;
344 boolean_t send_64bit_ack
= FALSE
;
345 u_int32_t old_mpt_flags
= tp
->t_mpflags
& TMPF_MPTCP_SIGNALS
;
347 if (mptcp_enable
== 0 || mp_tp
== NULL
|| tp
->t_state
== TCPS_CLOSED
) {
352 socket_lock_assert_owned(mptetoso(mp_tp
->mpt_mpte
));
354 if (mp_tp
->mpt_flags
& MPTCPF_CHECKSUM
) {
358 /* tcp_output handles the SYN path separately */
359 if (flags
& TH_SYN
) {
363 if ((MAX_TCPOPTLEN
- optlen
) <
364 sizeof(struct mptcp_mpcapable_opt_common
)) {
365 mptcplog((LOG_ERR
, "%s: no space left %d flags %x tp->t_mpflags %x len %d\n",
366 __func__
, optlen
, flags
, tp
->t_mpflags
, len
),
367 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
371 if (tp
->t_mpflags
& TMPF_TCP_FALLBACK
) {
372 if (tp
->t_mpflags
& TMPF_SND_MPFAIL
) {
373 optlen
= mptcp_send_mpfail(tp
, opt
, optlen
);
374 } else if (!(tp
->t_mpflags
& TMPF_INFIN_SENT
)) {
375 optlen
= mptcp_send_infinite_mapping(tp
, opt
, optlen
);
378 *do_not_compress
= TRUE
;
383 if (tp
->t_mpflags
& TMPF_SND_KEYS
) {
384 struct mptcp_mpcapable_opt_rsp1 mptcp_opt
;
385 if ((MAX_TCPOPTLEN
- optlen
) <
386 sizeof(struct mptcp_mpcapable_opt_rsp1
)) {
389 bzero(&mptcp_opt
, sizeof(struct mptcp_mpcapable_opt_rsp1
));
390 mptcp_opt
.mmc_common
.mmco_kind
= TCPOPT_MULTIPATH
;
391 mptcp_opt
.mmc_common
.mmco_len
=
392 sizeof(struct mptcp_mpcapable_opt_rsp1
);
393 mptcp_opt
.mmc_common
.mmco_subtype
= MPO_CAPABLE
;
394 mptcp_opt
.mmc_common
.mmco_version
= mp_tp
->mpt_version
;
395 /* HMAC-SHA1 is the proposal */
396 mptcp_opt
.mmc_common
.mmco_flags
|= MPCAP_PROPOSAL_SBIT
;
397 if (mp_tp
->mpt_flags
& MPTCPF_CHECKSUM
) {
398 mptcp_opt
.mmc_common
.mmco_flags
|= MPCAP_CHECKSUM_CBIT
;
400 mptcp_opt
.mmc_localkey
= mp_tp
->mpt_localkey
;
401 mptcp_opt
.mmc_remotekey
= mp_tp
->mpt_remotekey
;
402 memcpy(opt
+ optlen
, &mptcp_opt
, mptcp_opt
.mmc_common
.mmco_len
);
403 optlen
+= mptcp_opt
.mmc_common
.mmco_len
;
404 tp
->t_mpflags
&= ~TMPF_SND_KEYS
;
407 tp
->t_mpuna
= tp
->snd_una
;
409 /* its a retransmission of the MP_CAPABLE ACK */
412 *do_not_compress
= TRUE
;
417 if (tp
->t_mpflags
& TMPF_SND_JACK
) {
418 *do_not_compress
= TRUE
;
419 optlen
= mptcp_setup_join_ack_opts(tp
, opt
, optlen
);
421 tp
->t_mpuna
= tp
->snd_una
;
423 /* Start a timer to retransmit the ACK */
424 tp
->t_timer
[TCPT_JACK_RXMT
] =
425 OFFSET_FROM_START(tp
, tcp_jack_rxmt
);
427 tp
->t_mpflags
&= ~TMPF_SND_JACK
;
431 if (!(tp
->t_mpflags
& (TMPF_MPTCP_TRUE
| TMPF_PREESTABLISHED
))) {
435 * From here on, all options are sent only if MPTCP_TRUE
436 * or when data is sent early on as in Fast Join
439 if ((tp
->t_mpflags
& TMPF_MPTCP_TRUE
) &&
440 (tp
->t_mpflags
& TMPF_SND_REM_ADDR
)) {
441 int rem_opt_len
= sizeof(struct mptcp_remaddr_opt
);
442 if ((optlen
+ rem_opt_len
) <= MAX_TCPOPTLEN
) {
443 mptcp_send_remaddr_opt(tp
,
444 (struct mptcp_remaddr_opt
*)(opt
+ optlen
));
445 optlen
+= rem_opt_len
;
447 tp
->t_mpflags
&= ~TMPF_SND_REM_ADDR
;
450 *do_not_compress
= TRUE
;
453 if (tp
->t_mpflags
& TMPF_SND_MPPRIO
) {
454 optlen
= mptcp_snd_mpprio(tp
, opt
, optlen
);
456 *do_not_compress
= TRUE
;
459 if (mp_tp
->mpt_flags
& MPTCPF_SND_64BITDSN
) {
460 send_64bit_dsn
= TRUE
;
462 if (mp_tp
->mpt_flags
& MPTCPF_SND_64BITACK
) {
463 send_64bit_ack
= TRUE
;
466 #define CHECK_OPTLEN { \
467 if ((MAX_TCPOPTLEN - optlen) < dssoptlen) { \
468 mptcplog((LOG_ERR, "%s: dssoptlen %d optlen %d \n", __func__, \
469 dssoptlen, optlen), \
470 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); \
475 #define DO_FIN(dsn_opt) { \
477 sndfin = mptcp_ok_to_fin(tp, dsn_opt.mdss_dsn, len); \
479 dsn_opt.mdss_copt.mdss_flags |= MDSS_F; \
480 dsn_opt.mdss_data_len += 1; \
482 dss_csum = in_addword(dss_csum, 1); \
486 #define CHECK_DATALEN { \
487 /* MPTCP socket does not support IP options */ \
488 if ((len + optlen + dssoptlen) > tp->t_maxopd) { \
489 mptcplog((LOG_ERR, "%s: nosp %d len %d opt %d %d %d\n", \
490 __func__, len, dssoptlen, optlen, \
491 tp->t_maxseg, tp->t_maxopd), \
492 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); \
493 /* remove option length from payload len */ \
494 len = tp->t_maxopd - optlen - dssoptlen; \
498 if ((tp
->t_mpflags
& TMPF_SEND_DSN
) &&
501 * If there was the need to send 64-bit Data ACK along
502 * with 64-bit DSN, then 26 or 28 bytes would be used.
503 * With timestamps and NOOP padding that will cause
504 * overflow. Hence, in the rare event that both 64-bit
505 * DSN and 64-bit ACK have to be sent, delay the send of
506 * 64-bit ACK until our 64-bit DSN is acked with a 64-bit ack.
507 * XXX If this delay causes issue, remove the 2-byte padding.
509 struct mptcp_dss64_ack32_opt dsn_ack_opt
;
510 uint8_t dssoptlen
= sizeof(dsn_ack_opt
);
519 bzero(&dsn_ack_opt
, sizeof(dsn_ack_opt
));
520 dsn_ack_opt
.mdss_copt
.mdss_kind
= TCPOPT_MULTIPATH
;
521 dsn_ack_opt
.mdss_copt
.mdss_subtype
= MPO_DSS
;
522 dsn_ack_opt
.mdss_copt
.mdss_len
= dssoptlen
;
523 dsn_ack_opt
.mdss_copt
.mdss_flags
|=
524 MDSS_M
| MDSS_m
| MDSS_A
;
528 mptcp_output_getm_dsnmap64(so
, off
,
529 &dsn_ack_opt
.mdss_dsn
,
530 &dsn_ack_opt
.mdss_subflow_seqn
,
531 &dsn_ack_opt
.mdss_data_len
,
534 if ((dsn_ack_opt
.mdss_data_len
== 0) ||
535 (dsn_ack_opt
.mdss_dsn
== 0)) {
539 if (tp
->t_mpflags
& TMPF_SEND_DFIN
) {
543 dsn_ack_opt
.mdss_ack
=
544 htonl(MPTCP_DATAACK_LOW32(mp_tp
->mpt_rcvnxt
));
546 dsn_ack_opt
.mdss_dsn
= mptcp_hton64(dsn_ack_opt
.mdss_dsn
);
547 dsn_ack_opt
.mdss_subflow_seqn
= htonl(
548 dsn_ack_opt
.mdss_subflow_seqn
);
549 dsn_ack_opt
.mdss_data_len
= htons(
550 dsn_ack_opt
.mdss_data_len
);
552 memcpy(opt
+ optlen
, &dsn_ack_opt
, sizeof(dsn_ack_opt
));
554 *((uint16_t *)(void *)(opt
+ optlen
+ sizeof(dsn_ack_opt
))) = dss_csum
;
559 tp
->t_mpflags
&= ~TMPF_MPTCP_ACKNOW
;
561 *do_not_compress
= TRUE
;
566 if ((tp
->t_mpflags
& TMPF_SEND_DSN
) &&
568 !(tp
->t_mpflags
& TMPF_MPTCP_ACKNOW
)) {
569 struct mptcp_dsn_opt dsn_opt
;
570 uint8_t dssoptlen
= sizeof(struct mptcp_dsn_opt
);
579 bzero(&dsn_opt
, sizeof(dsn_opt
));
580 dsn_opt
.mdss_copt
.mdss_kind
= TCPOPT_MULTIPATH
;
581 dsn_opt
.mdss_copt
.mdss_subtype
= MPO_DSS
;
582 dsn_opt
.mdss_copt
.mdss_len
= dssoptlen
;
583 dsn_opt
.mdss_copt
.mdss_flags
|= MDSS_M
;
587 mptcp_output_getm_dsnmap32(so
, off
, &dsn_opt
.mdss_dsn
,
588 &dsn_opt
.mdss_subflow_seqn
,
589 &dsn_opt
.mdss_data_len
,
592 if ((dsn_opt
.mdss_data_len
== 0) ||
593 (dsn_opt
.mdss_dsn
== 0)) {
597 if (tp
->t_mpflags
& TMPF_SEND_DFIN
) {
601 dsn_opt
.mdss_dsn
= htonl(dsn_opt
.mdss_dsn
);
602 dsn_opt
.mdss_subflow_seqn
= htonl(dsn_opt
.mdss_subflow_seqn
);
603 dsn_opt
.mdss_data_len
= htons(dsn_opt
.mdss_data_len
);
604 memcpy(opt
+ optlen
, &dsn_opt
, sizeof(dsn_opt
));
606 *((uint16_t *)(void *)(opt
+ optlen
+ sizeof(dsn_opt
))) = dss_csum
;
610 tp
->t_mpflags
&= ~TMPF_MPTCP_ACKNOW
;
612 *do_not_compress
= TRUE
;
617 /* 32-bit Data ACK option */
618 if ((tp
->t_mpflags
& TMPF_MPTCP_ACKNOW
) &&
620 !(tp
->t_mpflags
& TMPF_SEND_DSN
) &&
621 !(tp
->t_mpflags
& TMPF_SEND_DFIN
)) {
622 struct mptcp_data_ack_opt dack_opt
;
623 uint8_t dssoptlen
= 0;
625 dssoptlen
= sizeof(dack_opt
);
629 bzero(&dack_opt
, dssoptlen
);
630 dack_opt
.mdss_copt
.mdss_kind
= TCPOPT_MULTIPATH
;
631 dack_opt
.mdss_copt
.mdss_len
= dssoptlen
;
632 dack_opt
.mdss_copt
.mdss_subtype
= MPO_DSS
;
633 dack_opt
.mdss_copt
.mdss_flags
|= MDSS_A
;
635 htonl(MPTCP_DATAACK_LOW32(mp_tp
->mpt_rcvnxt
));
636 memcpy(opt
+ optlen
, &dack_opt
, dssoptlen
);
638 VERIFY(optlen
<= MAX_TCPOPTLEN
);
639 tp
->t_mpflags
&= ~TMPF_MPTCP_ACKNOW
;
643 /* 64-bit Data ACK option */
644 if ((tp
->t_mpflags
& TMPF_MPTCP_ACKNOW
) &&
646 !(tp
->t_mpflags
& TMPF_SEND_DSN
) &&
647 !(tp
->t_mpflags
& TMPF_SEND_DFIN
)) {
648 struct mptcp_data_ack64_opt dack_opt
;
649 uint8_t dssoptlen
= 0;
651 dssoptlen
= sizeof(dack_opt
);
655 bzero(&dack_opt
, dssoptlen
);
656 dack_opt
.mdss_copt
.mdss_kind
= TCPOPT_MULTIPATH
;
657 dack_opt
.mdss_copt
.mdss_len
= dssoptlen
;
658 dack_opt
.mdss_copt
.mdss_subtype
= MPO_DSS
;
659 dack_opt
.mdss_copt
.mdss_flags
|= (MDSS_A
| MDSS_a
);
660 dack_opt
.mdss_ack
= mptcp_hton64(mp_tp
->mpt_rcvnxt
);
662 * The other end should retransmit 64-bit DSN until it
663 * receives a 64-bit ACK.
665 mp_tp
->mpt_flags
&= ~MPTCPF_SND_64BITACK
;
666 memcpy(opt
+ optlen
, &dack_opt
, dssoptlen
);
668 VERIFY(optlen
<= MAX_TCPOPTLEN
);
669 tp
->t_mpflags
&= ~TMPF_MPTCP_ACKNOW
;
673 /* 32-bit DSS+Data ACK option */
674 if ((tp
->t_mpflags
& TMPF_SEND_DSN
) &&
677 (tp
->t_mpflags
& TMPF_MPTCP_ACKNOW
)) {
678 struct mptcp_dss_ack_opt dss_ack_opt
;
679 uint8_t dssoptlen
= sizeof(dss_ack_opt
);
688 bzero(&dss_ack_opt
, sizeof(dss_ack_opt
));
689 dss_ack_opt
.mdss_copt
.mdss_kind
= TCPOPT_MULTIPATH
;
690 dss_ack_opt
.mdss_copt
.mdss_len
= dssoptlen
;
691 dss_ack_opt
.mdss_copt
.mdss_subtype
= MPO_DSS
;
692 dss_ack_opt
.mdss_copt
.mdss_flags
|= MDSS_A
| MDSS_M
;
693 dss_ack_opt
.mdss_ack
=
694 htonl(MPTCP_DATAACK_LOW32(mp_tp
->mpt_rcvnxt
));
698 mptcp_output_getm_dsnmap32(so
, off
, &dss_ack_opt
.mdss_dsn
,
699 &dss_ack_opt
.mdss_subflow_seqn
,
700 &dss_ack_opt
.mdss_data_len
,
703 if ((dss_ack_opt
.mdss_data_len
== 0) ||
704 (dss_ack_opt
.mdss_dsn
== 0)) {
708 if (tp
->t_mpflags
& TMPF_SEND_DFIN
) {
712 dss_ack_opt
.mdss_dsn
= htonl(dss_ack_opt
.mdss_dsn
);
713 dss_ack_opt
.mdss_subflow_seqn
=
714 htonl(dss_ack_opt
.mdss_subflow_seqn
);
715 dss_ack_opt
.mdss_data_len
= htons(dss_ack_opt
.mdss_data_len
);
716 memcpy(opt
+ optlen
, &dss_ack_opt
, sizeof(dss_ack_opt
));
718 *((uint16_t *)(void *)(opt
+ optlen
+ sizeof(dss_ack_opt
))) = dss_csum
;
723 if (optlen
> MAX_TCPOPTLEN
) {
724 panic("optlen too large");
726 tp
->t_mpflags
&= ~TMPF_MPTCP_ACKNOW
;
730 /* 32-bit DSS + 64-bit DACK option */
731 if ((tp
->t_mpflags
& TMPF_SEND_DSN
) &&
734 (tp
->t_mpflags
& TMPF_MPTCP_ACKNOW
)) {
735 struct mptcp_dss32_ack64_opt dss_ack_opt
;
736 uint8_t dssoptlen
= sizeof(dss_ack_opt
);
745 bzero(&dss_ack_opt
, sizeof(dss_ack_opt
));
746 dss_ack_opt
.mdss_copt
.mdss_kind
= TCPOPT_MULTIPATH
;
747 dss_ack_opt
.mdss_copt
.mdss_len
= dssoptlen
;
748 dss_ack_opt
.mdss_copt
.mdss_subtype
= MPO_DSS
;
749 dss_ack_opt
.mdss_copt
.mdss_flags
|= MDSS_M
| MDSS_A
| MDSS_a
;
750 dss_ack_opt
.mdss_ack
=
751 mptcp_hton64(mp_tp
->mpt_rcvnxt
);
755 mptcp_output_getm_dsnmap32(so
, off
, &dss_ack_opt
.mdss_dsn
,
756 &dss_ack_opt
.mdss_subflow_seqn
,
757 &dss_ack_opt
.mdss_data_len
,
760 if ((dss_ack_opt
.mdss_data_len
== 0) ||
761 (dss_ack_opt
.mdss_dsn
== 0)) {
765 if (tp
->t_mpflags
& TMPF_SEND_DFIN
) {
769 dss_ack_opt
.mdss_dsn
= htonl(dss_ack_opt
.mdss_dsn
);
770 dss_ack_opt
.mdss_subflow_seqn
=
771 htonl(dss_ack_opt
.mdss_subflow_seqn
);
772 dss_ack_opt
.mdss_data_len
= htons(dss_ack_opt
.mdss_data_len
);
773 memcpy(opt
+ optlen
, &dss_ack_opt
, sizeof(dss_ack_opt
));
775 *((uint16_t *)(void *)(opt
+ optlen
+ sizeof(dss_ack_opt
))) = dss_csum
;
780 if (optlen
> MAX_TCPOPTLEN
) {
781 panic("optlen too large");
783 tp
->t_mpflags
&= ~TMPF_MPTCP_ACKNOW
;
785 *do_not_compress
= TRUE
;
790 if (tp
->t_mpflags
& TMPF_SEND_DFIN
) {
791 uint8_t dssoptlen
= sizeof(struct mptcp_dss_ack_opt
);
792 struct mptcp_dss_ack_opt dss_ack_opt
;
796 uint64_t dss_val
= mptcp_hton64(mp_tp
->mpt_sndmax
- 1);
797 uint16_t dlen
= htons(1);
804 sum
= in_pseudo64(dss_val
, sseq
, dlen
);
806 dss_csum
= ~sum
& 0xffff;
811 bzero(&dss_ack_opt
, sizeof(dss_ack_opt
));
814 * Data FIN occupies one sequence space.
815 * Don't send it if it has been Acked.
817 if ((mp_tp
->mpt_sndnxt
+ 1 != mp_tp
->mpt_sndmax
) ||
818 (mp_tp
->mpt_snduna
== mp_tp
->mpt_sndmax
)) {
822 dss_ack_opt
.mdss_copt
.mdss_kind
= TCPOPT_MULTIPATH
;
823 dss_ack_opt
.mdss_copt
.mdss_len
= dssoptlen
;
824 dss_ack_opt
.mdss_copt
.mdss_subtype
= MPO_DSS
;
825 dss_ack_opt
.mdss_copt
.mdss_flags
|= MDSS_A
| MDSS_M
| MDSS_F
;
826 dss_ack_opt
.mdss_ack
=
827 htonl(MPTCP_DATAACK_LOW32(mp_tp
->mpt_rcvnxt
));
828 dss_ack_opt
.mdss_dsn
=
829 htonl(MPTCP_DATASEQ_LOW32(mp_tp
->mpt_sndmax
- 1));
830 dss_ack_opt
.mdss_subflow_seqn
= 0;
831 dss_ack_opt
.mdss_data_len
= 1;
832 dss_ack_opt
.mdss_data_len
= htons(dss_ack_opt
.mdss_data_len
);
833 memcpy(opt
+ optlen
, &dss_ack_opt
, sizeof(dss_ack_opt
));
835 *((uint16_t *)(void *)(opt
+ optlen
+ sizeof(dss_ack_opt
))) = dss_csum
;
840 *do_not_compress
= TRUE
;
844 if (TRUE
== *p_mptcp_acknow
) {
845 u_int32_t new_mpt_flags
= tp
->t_mpflags
& TMPF_MPTCP_SIGNALS
;
848 * If none of the above mpflags were acted on by
849 * this routine, reset these flags and set p_mptcp_acknow
852 * XXX The reset value of p_mptcp_acknow can be used
853 * to communicate tcp_output to NOT send a pure ack without any
854 * MPTCP options as it will be treated as a dup ack.
855 * Since the instances of mptcp_setup_opts not acting on
856 * these options are mostly corner cases and sending a dup
857 * ack here would only have an impact if the system
858 * has sent consecutive dup acks before this false one,
859 * we haven't modified the logic in tcp_output to avoid
862 if (old_mpt_flags
== new_mpt_flags
) {
863 tp
->t_mpflags
&= ~TMPF_MPTCP_SIGNALS
;
864 *p_mptcp_acknow
= FALSE
;
865 mptcplog((LOG_DEBUG
, "%s: no action \n", __func__
),
866 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_LOG
);
868 mptcplog((LOG_DEBUG
, "%s: acknow set, old flags %x new flags %x \n",
869 __func__
, old_mpt_flags
, new_mpt_flags
),
870 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_LOG
);
878 * MPTCP Options Input Processing
882 mptcp_sanitize_option(struct tcpcb
*tp
, int mptcp_subtype
)
884 struct mptcb
*mp_tp
= tptomptp(tp
);
887 switch (mptcp_subtype
) {
890 case MPO_JOIN
: /* fall through */
891 case MPO_DSS
: /* fall through */
892 case MPO_FASTCLOSE
: /* fall through */
893 case MPO_FAIL
: /* fall through */
894 case MPO_REMOVE_ADDR
: /* fall through */
895 case MPO_ADD_ADDR
: /* fall through */
896 case MPO_PRIO
: /* fall through */
897 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) {
903 os_log_error(mptcp_log_handle
, "%s - %lx: type = %d \n", __func__
,
904 (unsigned long)VM_KERNEL_ADDRPERM(mp_tp
->mpt_mpte
), mptcp_subtype
);
911 mptcp_valid_mpcapable_common_opt(u_char
*cp
)
913 struct mptcp_mpcapable_opt_common
*rsp
=
914 (struct mptcp_mpcapable_opt_common
*)cp
;
916 /* mmco_kind, mmco_len and mmco_subtype are validated before */
918 if (!(rsp
->mmco_flags
& MPCAP_PROPOSAL_SBIT
)) {
922 if (rsp
->mmco_flags
& (MPCAP_BBIT
| MPCAP_DBIT
|
923 MPCAP_EBIT
| MPCAP_FBIT
| MPCAP_GBIT
)) {
932 mptcp_do_mpcapable_opt(struct tcpcb
*tp
, u_char
*cp
, struct tcphdr
*th
,
935 struct mptcp_mpcapable_opt_rsp
*rsp
= NULL
;
936 struct mptcb
*mp_tp
= tptomptp(tp
);
937 struct mptses
*mpte
= mp_tp
->mpt_mpte
;
939 /* Only valid on SYN/ACK */
940 if ((th
->th_flags
& (TH_SYN
| TH_ACK
)) != (TH_SYN
| TH_ACK
)) {
944 /* Validate the kind, len, flags */
945 if (mptcp_valid_mpcapable_common_opt(cp
) != 1) {
946 tcpstat
.tcps_invalid_mpcap
++;
950 /* handle SYN/ACK retransmission by acknowledging with ACK */
951 if (mp_tp
->mpt_state
>= MPTCPS_ESTABLISHED
) {
955 /* A SYN/ACK contains peer's key and flags */
956 if (optlen
!= sizeof(struct mptcp_mpcapable_opt_rsp
)) {
958 os_log_error(mptcp_log_handle
, "%s - %lx: SYN_ACK optlen = %d, sizeof mp opt = %lu \n",
959 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), optlen
,
960 sizeof(struct mptcp_mpcapable_opt_rsp
));
961 tcpstat
.tcps_invalid_mpcap
++;
966 * If checksum flag is set, enable MPTCP checksum, even if
967 * it was not negotiated on the first SYN.
969 if (((struct mptcp_mpcapable_opt_common
*)cp
)->mmco_flags
&
970 MPCAP_CHECKSUM_CBIT
) {
971 mp_tp
->mpt_flags
|= MPTCPF_CHECKSUM
;
974 if (((struct mptcp_mpcapable_opt_common
*)cp
)->mmco_flags
&
975 MPCAP_UNICAST_IPBIT
) {
976 mpte
->mpte_flags
|= MPTE_UNICAST_IP
;
978 /* We need an explicit signal for the addresses - zero the existing ones */
979 memset(&mpte
->mpte_sub_dst_v4
, 0, sizeof(mpte
->mpte_sub_dst_v4
));
980 memset(&mpte
->mpte_sub_dst_v6
, 0, sizeof(mpte
->mpte_sub_dst_v6
));
983 rsp
= (struct mptcp_mpcapable_opt_rsp
*)cp
;
984 mp_tp
->mpt_remotekey
= rsp
->mmc_localkey
;
985 /* For now just downgrade to the peer's version */
986 mp_tp
->mpt_peer_version
= rsp
->mmc_common
.mmco_version
;
987 if (rsp
->mmc_common
.mmco_version
< mp_tp
->mpt_version
) {
988 mp_tp
->mpt_version
= rsp
->mmc_common
.mmco_version
;
989 tcpstat
.tcps_mp_verdowngrade
++;
991 if (mptcp_init_remote_parms(mp_tp
) != 0) {
992 tcpstat
.tcps_invalid_mpcap
++;
995 tcp_heuristic_mptcp_success(tp
);
996 tp
->t_mpflags
|= (TMPF_SND_KEYS
| TMPF_MPTCP_TRUE
);
1001 mptcp_do_mpjoin_opt(struct tcpcb
*tp
, u_char
*cp
, struct tcphdr
*th
, int optlen
)
1003 #define MPTCP_JOPT_ERROR_PATH(tp) { \
1004 tcpstat.tcps_invalid_joins++; \
1005 if (tp->t_inpcb->inp_socket != NULL) { \
1006 soevent(tp->t_inpcb->inp_socket, \
1007 SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST); \
1011 struct mptcp_mpjoin_opt_rsp
*join_rsp
=
1012 (struct mptcp_mpjoin_opt_rsp
*)cp
;
1014 /* Only valid on SYN/ACK */
1015 if ((th
->th_flags
& (TH_SYN
| TH_ACK
)) != (TH_SYN
| TH_ACK
)) {
1019 if (optlen
!= sizeof(struct mptcp_mpjoin_opt_rsp
)) {
1020 os_log_error(mptcp_log_handle
, "%s - %lx: SYN_ACK: unexpected optlen = %d mp option = %lu\n",
1021 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(tptomptp(tp
)->mpt_mpte
),
1022 optlen
, sizeof(struct mptcp_mpjoin_opt_rsp
));
1023 tp
->t_mpflags
&= ~TMPF_PREESTABLISHED
;
1024 /* send RST and close */
1025 MPTCP_JOPT_ERROR_PATH(tp
);
1029 mptcp_set_raddr_rand(tp
->t_local_aid
, tptomptp(tp
),
1030 join_rsp
->mmjo_addr_id
, join_rsp
->mmjo_rand
);
1031 error
= mptcp_validate_join_hmac(tp
,
1032 (u_char
*)&join_rsp
->mmjo_mac
, SHA1_TRUNCATED
);
1034 os_log_error(mptcp_log_handle
, "%s - %lx: SYN_ACK error = %d \n",
1035 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(tptomptp(tp
)->mpt_mpte
),
1037 tp
->t_mpflags
&= ~TMPF_PREESTABLISHED
;
1038 /* send RST and close */
1039 MPTCP_JOPT_ERROR_PATH(tp
);
1042 tp
->t_mpflags
|= (TMPF_SENT_JOIN
| TMPF_SND_JACK
);
1046 mptcp_validate_join_hmac(struct tcpcb
*tp
, u_char
* hmac
, int mac_len
)
1048 u_char digest
[SHA1_RESULTLEN
] = {0};
1049 struct mptcb
*mp_tp
= tptomptp(tp
);
1050 u_int32_t rem_rand
, loc_rand
;
1052 rem_rand
= loc_rand
= 0;
1054 mptcp_get_rands(tp
->t_local_aid
, mp_tp
, &loc_rand
, &rem_rand
);
1055 if ((rem_rand
== 0) || (loc_rand
== 0)) {
1059 mptcp_hmac_sha1(mp_tp
->mpt_remotekey
, mp_tp
->mpt_localkey
, rem_rand
, loc_rand
,
1062 if (bcmp(digest
, hmac
, mac_len
) == 0) {
1063 return 0; /* matches */
1065 printf("%s: remote key %llx local key %llx remote rand %x "
1066 "local rand %x \n", __func__
, mp_tp
->mpt_remotekey
, mp_tp
->mpt_localkey
,
1067 rem_rand
, loc_rand
);
1073 * Update the mptcb send state variables, but the actual sbdrop occurs
1077 mptcp_data_ack_rcvd(struct mptcb
*mp_tp
, struct tcpcb
*tp
, u_int64_t full_dack
)
1079 uint64_t acked
= full_dack
- mp_tp
->mpt_snduna
;
1081 VERIFY(acked
<= INT_MAX
);
1084 struct socket
*mp_so
= mptetoso(mp_tp
->mpt_mpte
);
1086 if (acked
> mp_so
->so_snd
.sb_cc
) {
1087 if (acked
> mp_so
->so_snd
.sb_cc
+ 1 ||
1088 mp_tp
->mpt_state
< MPTCPS_FIN_WAIT_1
) {
1089 os_log_error(mptcp_log_handle
, "%s - %lx: acked %u, sb_cc %u full %u suna %u state %u\n",
1090 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp
->mpt_mpte
),
1091 (uint32_t)acked
, mp_so
->so_snd
.sb_cc
,
1092 (uint32_t)full_dack
, (uint32_t)mp_tp
->mpt_snduna
,
1096 sbdrop(&mp_so
->so_snd
, (int)mp_so
->so_snd
.sb_cc
);
1098 sbdrop(&mp_so
->so_snd
, (int)acked
);
1101 mp_tp
->mpt_snduna
+= acked
;
1102 /* In degraded mode, we may get some Data ACKs */
1103 if ((tp
->t_mpflags
& TMPF_TCP_FALLBACK
) &&
1104 !(mp_tp
->mpt_flags
& MPTCPF_POST_FALLBACK_SYNC
) &&
1105 MPTCP_SEQ_GT(mp_tp
->mpt_sndnxt
, mp_tp
->mpt_snduna
)) {
1106 /* bring back sndnxt to retransmit MPTCP data */
1107 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_dsn_at_csum_fail
;
1108 mp_tp
->mpt_flags
|= MPTCPF_POST_FALLBACK_SYNC
;
1109 tp
->t_inpcb
->inp_socket
->so_flags1
|=
1110 SOF1_POST_FALLBACK_SYNC
;
1113 mptcp_clean_reinjectq(mp_tp
->mpt_mpte
);
1117 if (full_dack
== mp_tp
->mpt_sndmax
&&
1118 mp_tp
->mpt_state
>= MPTCPS_FIN_WAIT_1
) {
1119 mptcp_close_fsm(mp_tp
, MPCE_RECV_DATA_ACK
);
1120 tp
->t_mpflags
&= ~TMPF_SEND_DFIN
;
1125 mptcp_update_window_wakeup(struct tcpcb
*tp
)
1127 struct mptcb
*mp_tp
= tptomptp(tp
);
1129 socket_lock_assert_owned(mptetoso(mp_tp
->mpt_mpte
));
1131 if (mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) {
1132 mp_tp
->mpt_sndwnd
= tp
->snd_wnd
;
1133 mp_tp
->mpt_sndwl1
= mp_tp
->mpt_rcvnxt
;
1134 mp_tp
->mpt_sndwl2
= mp_tp
->mpt_snduna
;
1137 sowwakeup(tp
->t_inpcb
->inp_socket
);
1141 mptcp_update_window(struct mptcb
*mp_tp
, u_int64_t ack
, u_int64_t seq
, u_int32_t tiwin
)
1143 if (MPTCP_SEQ_LT(mp_tp
->mpt_sndwl1
, seq
) ||
1144 (mp_tp
->mpt_sndwl1
== seq
&&
1145 (MPTCP_SEQ_LT(mp_tp
->mpt_sndwl2
, ack
) ||
1146 (mp_tp
->mpt_sndwl2
== ack
&& tiwin
> mp_tp
->mpt_sndwnd
)))) {
1147 mp_tp
->mpt_sndwnd
= tiwin
;
1148 mp_tp
->mpt_sndwl1
= seq
;
1149 mp_tp
->mpt_sndwl2
= ack
;
1154 mptcp_do_dss_opt_ack_meat(u_int64_t full_dack
, u_int64_t full_dsn
,
1155 struct tcpcb
*tp
, u_int32_t tiwin
)
1157 struct mptcb
*mp_tp
= tptomptp(tp
);
1158 int close_notify
= 0;
1160 tp
->t_mpflags
|= TMPF_RCVD_DACK
;
1162 if (MPTCP_SEQ_LEQ(full_dack
, mp_tp
->mpt_sndmax
) &&
1163 MPTCP_SEQ_GEQ(full_dack
, mp_tp
->mpt_snduna
)) {
1164 mptcp_data_ack_rcvd(mp_tp
, tp
, full_dack
);
1165 if (mp_tp
->mpt_state
> MPTCPS_FIN_WAIT_2
) {
1168 if (mp_tp
->mpt_flags
& MPTCPF_RCVD_64BITACK
) {
1169 mp_tp
->mpt_flags
&= ~MPTCPF_RCVD_64BITACK
;
1170 mp_tp
->mpt_flags
&= ~MPTCPF_SND_64BITDSN
;
1172 mptcp_notify_mpready(tp
->t_inpcb
->inp_socket
);
1174 mptcp_notify_close(tp
->t_inpcb
->inp_socket
);
1178 mptcp_update_window(mp_tp
, full_dack
, full_dsn
, tiwin
);
1182 mptcp_do_dss_opt_meat(u_char
*cp
, struct tcpcb
*tp
, struct tcphdr
*th
)
1184 struct mptcp_dss_copt
*dss_rsp
= (struct mptcp_dss_copt
*)cp
;
1185 u_int64_t full_dack
= 0;
1186 u_int32_t tiwin
= th
->th_win
<< tp
->snd_scale
;
1187 struct mptcb
*mp_tp
= tptomptp(tp
);
1190 #define MPTCP_DSS_OPT_SZ_CHK(len, expected_len) { \
1191 if (len != expected_len) { \
1192 mptcplog((LOG_ERR, "%s: bad len = %d dss: %x \n", __func__, \
1193 len, dss_rsp->mdss_flags), \
1194 (MPTCP_SOCKET_DBG|MPTCP_RECEIVER_DBG), \
1195 MPTCP_LOGLVL_LOG); \
1200 if (mp_tp
->mpt_flags
& MPTCPF_CHECKSUM
) {
1204 dss_rsp
->mdss_flags
&= (MDSS_A
| MDSS_a
| MDSS_M
| MDSS_m
);
1205 switch (dss_rsp
->mdss_flags
) {
1208 /* 32-bit DSS, No Data ACK */
1209 struct mptcp_dsn_opt
*dss_rsp1
;
1210 dss_rsp1
= (struct mptcp_dsn_opt
*)cp
;
1212 MPTCP_DSS_OPT_SZ_CHK(dss_rsp1
->mdss_copt
.mdss_len
,
1213 sizeof(struct mptcp_dsn_opt
) + csum_len
);
1214 if (csum_len
== 0) {
1215 mptcp_update_dss_rcv_state(dss_rsp1
, tp
, 0);
1217 mptcp_update_dss_rcv_state(dss_rsp1
, tp
,
1218 *(uint16_t *)(void *)(cp
+
1219 (dss_rsp1
->mdss_copt
.mdss_len
- csum_len
)));
1225 /* 32-bit Data ACK, no DSS */
1226 struct mptcp_data_ack_opt
*dack_opt
;
1227 dack_opt
= (struct mptcp_data_ack_opt
*)cp
;
1229 MPTCP_DSS_OPT_SZ_CHK(dack_opt
->mdss_copt
.mdss_len
,
1230 sizeof(struct mptcp_data_ack_opt
));
1232 u_int32_t dack
= dack_opt
->mdss_ack
;
1234 MPTCP_EXTEND_DSN(mp_tp
->mpt_snduna
, dack
, full_dack
);
1235 mptcp_do_dss_opt_ack_meat(full_dack
, mp_tp
->mpt_sndwl1
, tp
, tiwin
);
1238 case (MDSS_M
| MDSS_A
):
1240 /* 32-bit Data ACK + 32-bit DSS */
1241 struct mptcp_dss_ack_opt
*dss_ack_rsp
;
1242 dss_ack_rsp
= (struct mptcp_dss_ack_opt
*)cp
;
1246 MPTCP_DSS_OPT_SZ_CHK(dss_ack_rsp
->mdss_copt
.mdss_len
,
1247 sizeof(struct mptcp_dss_ack_opt
) + csum_len
);
1249 u_int32_t dack
= dss_ack_rsp
->mdss_ack
;
1251 MPTCP_EXTEND_DSN(mp_tp
->mpt_snduna
, dack
, full_dack
);
1253 NTOHL(dss_ack_rsp
->mdss_dsn
);
1254 NTOHL(dss_ack_rsp
->mdss_subflow_seqn
);
1255 NTOHS(dss_ack_rsp
->mdss_data_len
);
1256 MPTCP_EXTEND_DSN(mp_tp
->mpt_rcvnxt
, dss_ack_rsp
->mdss_dsn
, full_dsn
);
1258 mptcp_do_dss_opt_ack_meat(full_dack
, full_dsn
, tp
, tiwin
);
1260 if (csum_len
!= 0) {
1261 csum
= *(uint16_t *)(void *)(cp
+ (dss_ack_rsp
->mdss_copt
.mdss_len
- csum_len
));
1264 mptcp_update_rcv_state_meat(mp_tp
, tp
,
1266 dss_ack_rsp
->mdss_subflow_seqn
,
1267 dss_ack_rsp
->mdss_data_len
,
1271 case (MDSS_M
| MDSS_m
):
1273 /* 64-bit DSS , No Data ACK */
1274 struct mptcp_dsn64_opt
*dsn64
;
1275 dsn64
= (struct mptcp_dsn64_opt
*)cp
;
1279 MPTCP_DSS_OPT_SZ_CHK(dsn64
->mdss_copt
.mdss_len
,
1280 sizeof(struct mptcp_dsn64_opt
) + csum_len
);
1282 mp_tp
->mpt_flags
|= MPTCPF_SND_64BITACK
;
1284 full_dsn
= mptcp_ntoh64(dsn64
->mdss_dsn
);
1285 NTOHL(dsn64
->mdss_subflow_seqn
);
1286 NTOHS(dsn64
->mdss_data_len
);
1288 if (csum_len
!= 0) {
1289 csum
= *(uint16_t *)(void *)(cp
+ dsn64
->mdss_copt
.mdss_len
- csum_len
);
1292 mptcp_update_rcv_state_meat(mp_tp
, tp
, full_dsn
,
1293 dsn64
->mdss_subflow_seqn
,
1294 dsn64
->mdss_data_len
,
1298 case (MDSS_A
| MDSS_a
):
1300 /* 64-bit Data ACK, no DSS */
1301 struct mptcp_data_ack64_opt
*dack64
;
1302 dack64
= (struct mptcp_data_ack64_opt
*)cp
;
1304 MPTCP_DSS_OPT_SZ_CHK(dack64
->mdss_copt
.mdss_len
,
1305 sizeof(struct mptcp_data_ack64_opt
));
1307 mp_tp
->mpt_flags
|= MPTCPF_RCVD_64BITACK
;
1309 full_dack
= mptcp_ntoh64(dack64
->mdss_ack
);
1310 mptcp_do_dss_opt_ack_meat(full_dack
, mp_tp
->mpt_sndwl1
, tp
, tiwin
);
1313 case (MDSS_M
| MDSS_m
| MDSS_A
):
1315 /* 64-bit DSS + 32-bit Data ACK */
1316 struct mptcp_dss64_ack32_opt
*dss_ack_rsp
;
1317 dss_ack_rsp
= (struct mptcp_dss64_ack32_opt
*)cp
;
1321 MPTCP_DSS_OPT_SZ_CHK(dss_ack_rsp
->mdss_copt
.mdss_len
,
1322 sizeof(struct mptcp_dss64_ack32_opt
) + csum_len
);
1324 u_int32_t dack
= dss_ack_rsp
->mdss_ack
;
1326 mp_tp
->mpt_flags
|= MPTCPF_SND_64BITACK
;
1327 MPTCP_EXTEND_DSN(mp_tp
->mpt_snduna
, dack
, full_dack
);
1329 full_dsn
= mptcp_ntoh64(dss_ack_rsp
->mdss_dsn
);
1330 NTOHL(dss_ack_rsp
->mdss_subflow_seqn
);
1331 NTOHS(dss_ack_rsp
->mdss_data_len
);
1333 mptcp_do_dss_opt_ack_meat(full_dack
, full_dsn
, tp
, tiwin
);
1335 if (csum_len
!= 0) {
1336 csum
= *(uint16_t *)(void *)(cp
+ dss_ack_rsp
->mdss_copt
.mdss_len
- csum_len
);
1339 mptcp_update_rcv_state_meat(mp_tp
, tp
, full_dsn
,
1340 dss_ack_rsp
->mdss_subflow_seqn
,
1341 dss_ack_rsp
->mdss_data_len
,
1346 case (MDSS_M
| MDSS_A
| MDSS_a
):
1348 /* 32-bit DSS + 64-bit Data ACK */
1349 struct mptcp_dss32_ack64_opt
*dss32_ack64_opt
;
1350 dss32_ack64_opt
= (struct mptcp_dss32_ack64_opt
*)cp
;
1353 MPTCP_DSS_OPT_SZ_CHK(
1354 dss32_ack64_opt
->mdss_copt
.mdss_len
,
1355 sizeof(struct mptcp_dss32_ack64_opt
) + csum_len
);
1357 full_dack
= mptcp_ntoh64(dss32_ack64_opt
->mdss_ack
);
1358 NTOHL(dss32_ack64_opt
->mdss_dsn
);
1359 mp_tp
->mpt_flags
|= MPTCPF_RCVD_64BITACK
;
1360 MPTCP_EXTEND_DSN(mp_tp
->mpt_rcvnxt
,
1361 dss32_ack64_opt
->mdss_dsn
, full_dsn
);
1362 NTOHL(dss32_ack64_opt
->mdss_subflow_seqn
);
1363 NTOHS(dss32_ack64_opt
->mdss_data_len
);
1365 mptcp_do_dss_opt_ack_meat(full_dack
, full_dsn
, tp
, tiwin
);
1366 if (csum_len
== 0) {
1367 mptcp_update_rcv_state_meat(mp_tp
, tp
, full_dsn
,
1368 dss32_ack64_opt
->mdss_subflow_seqn
,
1369 dss32_ack64_opt
->mdss_data_len
, 0);
1371 mptcp_update_rcv_state_meat(mp_tp
, tp
, full_dsn
,
1372 dss32_ack64_opt
->mdss_subflow_seqn
,
1373 dss32_ack64_opt
->mdss_data_len
,
1374 *(uint16_t *)(void *)(cp
+
1375 dss32_ack64_opt
->mdss_copt
.mdss_len
-
1380 case (MDSS_M
| MDSS_m
| MDSS_A
| MDSS_a
):
1382 /* 64-bit DSS + 64-bit Data ACK */
1383 struct mptcp_dss64_ack64_opt
*dss64_ack64
;
1384 dss64_ack64
= (struct mptcp_dss64_ack64_opt
*)cp
;
1387 MPTCP_DSS_OPT_SZ_CHK(dss64_ack64
->mdss_copt
.mdss_len
,
1388 sizeof(struct mptcp_dss64_ack64_opt
) + csum_len
);
1390 mp_tp
->mpt_flags
|= MPTCPF_RCVD_64BITACK
;
1391 mp_tp
->mpt_flags
|= MPTCPF_SND_64BITACK
;
1392 full_dsn
= mptcp_ntoh64(dss64_ack64
->mdss_dsn
);
1393 full_dack
= mptcp_ntoh64(dss64_ack64
->mdss_dsn
);
1394 mptcp_do_dss_opt_ack_meat(full_dack
, full_dsn
, tp
, tiwin
);
1395 NTOHL(dss64_ack64
->mdss_subflow_seqn
);
1396 NTOHS(dss64_ack64
->mdss_data_len
);
1397 if (csum_len
== 0) {
1398 mptcp_update_rcv_state_meat(mp_tp
, tp
, full_dsn
,
1399 dss64_ack64
->mdss_subflow_seqn
,
1400 dss64_ack64
->mdss_data_len
, 0);
1402 mptcp_update_rcv_state_meat(mp_tp
, tp
, full_dsn
,
1403 dss64_ack64
->mdss_subflow_seqn
,
1404 dss64_ack64
->mdss_data_len
,
1405 *(uint16_t *)(void *)(cp
+
1406 dss64_ack64
->mdss_copt
.mdss_len
-
1412 mptcplog((LOG_DEBUG
, "%s: File bug, DSS flags = %x\n",
1413 __func__
, dss_rsp
->mdss_flags
),
1414 (MPTCP_SOCKET_DBG
| MPTCP_RECEIVER_DBG
),
1421 mptcp_do_dss_opt(struct tcpcb
*tp
, u_char
*cp
, struct tcphdr
*th
)
1423 struct mptcp_dss_copt
*dss_rsp
= (struct mptcp_dss_copt
*)cp
;
1424 struct mptcb
*mp_tp
= tptomptp(tp
);
1430 if (dss_rsp
->mdss_subtype
== MPO_DSS
) {
1431 if (dss_rsp
->mdss_flags
& MDSS_F
) {
1432 tp
->t_rcv_map
.mpt_dfin
= 1;
1434 tp
->t_rcv_map
.mpt_dfin
= 0;
1437 mptcp_do_dss_opt_meat(cp
, tp
, th
);
1442 mptcp_do_fastclose_opt(struct tcpcb
*tp
, u_char
*cp
, struct tcphdr
*th
)
1444 struct mptcb
*mp_tp
= NULL
;
1445 struct mptcp_fastclose_opt
*fc_opt
= (struct mptcp_fastclose_opt
*)cp
;
1447 if (th
->th_flags
!= TH_ACK
) {
1451 if (fc_opt
->mfast_len
!= sizeof(struct mptcp_fastclose_opt
)) {
1452 tcpstat
.tcps_invalid_opt
++;
1456 mp_tp
= tptomptp(tp
);
1461 if (fc_opt
->mfast_key
!= mp_tp
->mpt_localkey
) {
1462 tcpstat
.tcps_invalid_opt
++;
1467 * fastclose could make us more vulnerable to attacks, hence
1468 * accept only those that are at the next expected sequence number.
1470 if (th
->th_seq
!= tp
->rcv_nxt
) {
1471 tcpstat
.tcps_invalid_opt
++;
1475 /* Reset this flow */
1476 tp
->t_mpflags
|= TMPF_FASTCLOSERCV
;
1478 if (tp
->t_inpcb
->inp_socket
!= NULL
) {
1479 soevent(tp
->t_inpcb
->inp_socket
,
1480 SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MUSTRST
);
1486 mptcp_do_mpfail_opt(struct tcpcb
*tp
, u_char
*cp
, struct tcphdr
*th
)
1488 struct mptcp_mpfail_opt
*fail_opt
= (struct mptcp_mpfail_opt
*)cp
;
1489 u_int32_t mdss_subflow_seqn
= 0;
1490 struct mptcb
*mp_tp
;
1494 * mpfail could make us more vulnerable to attacks. Hence accept
1495 * only those that are the next expected sequence number.
1497 if (th
->th_seq
!= tp
->rcv_nxt
) {
1498 tcpstat
.tcps_invalid_opt
++;
1502 /* A packet without RST, must atleast have the ACK bit set */
1503 if ((th
->th_flags
!= TH_ACK
) && (th
->th_flags
!= TH_RST
)) {
1507 if (fail_opt
->mfail_len
!= sizeof(struct mptcp_mpfail_opt
)) {
1511 mp_tp
= tptomptp(tp
);
1513 mp_tp
->mpt_flags
|= MPTCPF_RECVD_MPFAIL
;
1514 mp_tp
->mpt_dsn_at_csum_fail
= mptcp_hton64(fail_opt
->mfail_dsn
);
1515 error
= mptcp_get_map_for_dsn(tp
->t_inpcb
->inp_socket
,
1516 mp_tp
->mpt_dsn_at_csum_fail
, &mdss_subflow_seqn
);
1518 mp_tp
->mpt_ssn_at_csum_fail
= mdss_subflow_seqn
;
1521 mptcp_notify_mpfail(tp
->t_inpcb
->inp_socket
);
1525 mptcp_do_add_addr_opt(struct mptses
*mpte
, u_char
*cp
)
1527 struct mptcp_add_addr_opt
*addr_opt
= (struct mptcp_add_addr_opt
*)cp
;
1529 if (addr_opt
->maddr_len
!= MPTCP_ADD_ADDR_OPT_LEN_V4
&&
1530 addr_opt
->maddr_len
!= MPTCP_ADD_ADDR_OPT_LEN_V6
) {
1531 os_log_info(mptcp_log_handle
, "%s - %lx: Wrong ADD_ADDR length %u\n",
1532 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
1533 addr_opt
->maddr_len
);
1538 if (addr_opt
->maddr_len
== MPTCP_ADD_ADDR_OPT_LEN_V4
&&
1539 addr_opt
->maddr_ipversion
!= 4) {
1540 os_log_info(mptcp_log_handle
, "%s - %lx: ADD_ADDR length for v4 but version is %u\n",
1541 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
1542 addr_opt
->maddr_ipversion
);
1547 if (addr_opt
->maddr_len
== MPTCP_ADD_ADDR_OPT_LEN_V6
&&
1548 addr_opt
->maddr_ipversion
!= 6) {
1549 os_log_info(mptcp_log_handle
, "%s - %lx: ADD_ADDR length for v6 but version is %u\n",
1550 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
1551 addr_opt
->maddr_ipversion
);
1556 if (addr_opt
->maddr_len
== MPTCP_ADD_ADDR_OPT_LEN_V4
) {
1557 struct sockaddr_in
*dst
= &mpte
->mpte_sub_dst_v4
;
1558 struct in_addr
*addr
= &addr_opt
->maddr_u
.maddr_addrv4
;
1559 in_addr_t haddr
= ntohl(addr
->s_addr
);
1561 if (IN_ZERONET(haddr
) ||
1562 IN_LOOPBACK(haddr
) ||
1563 IN_LINKLOCAL(haddr
) ||
1564 IN_DS_LITE(haddr
) ||
1565 IN_6TO4_RELAY_ANYCAST(haddr
) ||
1566 IN_MULTICAST(haddr
) ||
1567 INADDR_BROADCAST
== haddr
||
1568 IN_PRIVATE(haddr
) ||
1569 IN_SHARED_ADDRESS_SPACE(haddr
)) {
1570 os_log_info(mptcp_log_handle
, "%s - %lx: ADD_ADDR invalid addr: %x\n",
1571 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
1577 dst
->sin_len
= sizeof(*dst
);
1578 dst
->sin_family
= AF_INET
;
1579 dst
->sin_port
= mpte
->__mpte_dst_v4
.sin_port
;
1580 dst
->sin_addr
.s_addr
= addr
->s_addr
;
1582 struct sockaddr_in6
*dst
= &mpte
->mpte_sub_dst_v6
;
1583 struct in6_addr
*addr
= &addr_opt
->maddr_u
.maddr_addrv6
;
1585 if (IN6_IS_ADDR_LINKLOCAL(addr
) ||
1586 IN6_IS_ADDR_MULTICAST(addr
) ||
1587 IN6_IS_ADDR_UNSPECIFIED(addr
) ||
1588 IN6_IS_ADDR_LOOPBACK(addr
) ||
1589 IN6_IS_ADDR_V4COMPAT(addr
) ||
1590 IN6_IS_ADDR_V4MAPPED(addr
)) {
1591 char dbuf
[MAX_IPv6_STR_LEN
];
1593 inet_ntop(AF_INET6
, &dst
->sin6_addr
, dbuf
, sizeof(dbuf
));
1594 os_log_info(mptcp_log_handle
, "%s - %lx: ADD_ADDRv6 invalid addr: %s\n",
1595 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
1601 dst
->sin6_len
= sizeof(*dst
);
1602 dst
->sin6_family
= AF_INET6
;
1603 dst
->sin6_port
= mpte
->__mpte_dst_v6
.sin6_port
;
1604 memcpy(&dst
->sin6_addr
, addr
, sizeof(*addr
));
1607 os_log_info(mptcp_log_handle
, "%s - %lx: Received ADD_ADDRv%u\n",
1608 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
1609 addr_opt
->maddr_ipversion
);
1611 mptcp_sched_create_subflows(mpte
);
1615 tcp_do_mptcp_options(struct tcpcb
*tp
, u_char
*cp
, struct tcphdr
*th
,
1616 struct tcpopt
*to
, int optlen
)
1619 struct mptcb
*mp_tp
= tptomptp(tp
);
1621 if (mp_tp
== NULL
) {
1625 socket_lock_assert_owned(mptetoso(mp_tp
->mpt_mpte
));
1627 /* All MPTCP options have atleast 4 bytes */
1632 mptcp_subtype
= (cp
[2] >> 4);
1634 if (mptcp_sanitize_option(tp
, mptcp_subtype
) == 0) {
1638 switch (mptcp_subtype
) {
1640 mptcp_do_mpcapable_opt(tp
, cp
, th
, optlen
);
1643 mptcp_do_mpjoin_opt(tp
, cp
, th
, optlen
);
1646 mptcp_do_dss_opt(tp
, cp
, th
);
1649 mptcp_do_fastclose_opt(tp
, cp
, th
);
1652 mptcp_do_mpfail_opt(tp
, cp
, th
);
1655 mptcp_do_add_addr_opt(mp_tp
->mpt_mpte
, cp
);
1657 case MPO_REMOVE_ADDR
: /* fall through */
1659 to
->to_flags
|= TOF_MPTCP
;
1667 /* REMOVE_ADDR option is sent when a source address goes away */
1669 mptcp_send_remaddr_opt(struct tcpcb
*tp
, struct mptcp_remaddr_opt
*opt
)
1671 mptcplog((LOG_DEBUG
, "%s: local id %d remove id %d \n",
1672 __func__
, tp
->t_local_aid
, tp
->t_rem_aid
),
1673 (MPTCP_SOCKET_DBG
| MPTCP_SENDER_DBG
), MPTCP_LOGLVL_LOG
);
1675 bzero(opt
, sizeof(*opt
));
1676 opt
->mr_kind
= TCPOPT_MULTIPATH
;
1677 opt
->mr_len
= sizeof(*opt
);
1678 opt
->mr_subtype
= MPO_REMOVE_ADDR
;
1679 opt
->mr_addr_id
= tp
->t_rem_aid
;
1680 tp
->t_mpflags
&= ~TMPF_SND_REM_ADDR
;
1683 /* We send MP_PRIO option based on the values set by the SIOCSCONNORDER ioctl */
1685 mptcp_snd_mpprio(struct tcpcb
*tp
, u_char
*cp
, int optlen
)
1687 struct mptcp_mpprio_addr_opt mpprio
;
1689 if (tp
->t_state
!= TCPS_ESTABLISHED
) {
1690 tp
->t_mpflags
&= ~TMPF_SND_MPPRIO
;
1694 if ((MAX_TCPOPTLEN
- optlen
) <
1695 (int)sizeof(mpprio
)) {
1699 bzero(&mpprio
, sizeof(mpprio
));
1700 mpprio
.mpprio_kind
= TCPOPT_MULTIPATH
;
1701 mpprio
.mpprio_len
= sizeof(mpprio
);
1702 mpprio
.mpprio_subtype
= MPO_PRIO
;
1703 if (tp
->t_mpflags
& TMPF_BACKUP_PATH
) {
1704 mpprio
.mpprio_flags
|= MPTCP_MPPRIO_BKP
;
1706 mpprio
.mpprio_addrid
= tp
->t_local_aid
;
1707 memcpy(cp
+ optlen
, &mpprio
, sizeof(mpprio
));
1708 optlen
+= sizeof(mpprio
);
1709 tp
->t_mpflags
&= ~TMPF_SND_MPPRIO
;
1710 mptcplog((LOG_DEBUG
, "%s: aid = %d \n", __func__
,
1712 (MPTCP_SOCKET_DBG
| MPTCP_SENDER_DBG
), MPTCP_LOGLVL_LOG
);