]>
Commit | Line | Data |
---|---|---|
39236c6e A |
1 | /* |
2 | * Copyright (c) 2012-2013 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | #include <sys/param.h> | |
30 | #include <sys/systm.h> | |
31 | #include <sys/kernel.h> | |
32 | #include <sys/mbuf.h> | |
33 | #include <sys/mcache.h> | |
34 | #include <sys/socket.h> | |
35 | #include <sys/socketvar.h> | |
36 | #include <sys/syslog.h> | |
37 | #include <sys/protosw.h> | |
38 | ||
39 | #include <kern/zalloc.h> | |
40 | #include <kern/locks.h> | |
41 | ||
42 | #include <mach/thread_act.h> | |
43 | #include <mach/sdt.h> | |
44 | ||
45 | #include <dev/random/randomdev.h> | |
46 | ||
47 | #include <net/if.h> | |
48 | #include <netinet/in.h> | |
49 | #include <netinet/in_var.h> | |
50 | #include <netinet/tcp.h> | |
51 | #include <netinet/tcp_fsm.h> | |
52 | #include <netinet/tcp_seq.h> | |
53 | #include <netinet/tcp_var.h> | |
54 | #include <netinet/mptcp_var.h> | |
55 | #include <netinet/mptcp.h> | |
56 | #include <netinet/mptcp_seq.h> | |
57 | #include <netinet/mptcp_opt.h> | |
58 | #include <netinet/mptcp_timer.h> | |
59 | ||
60 | int mptcp_enable = 1; | |
61 | SYSCTL_INT(_net_inet_mptcp, OID_AUTO, enable, CTLFLAG_RW | CTLFLAG_LOCKED, | |
62 | &mptcp_enable, 0, "Enable Multipath TCP Support"); | |
63 | ||
64 | int mptcp_dbg = 0; | |
65 | SYSCTL_INT(_net_inet_mptcp, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED, | |
66 | &mptcp_dbg, 0, "Enable Multipath TCP Debugging"); | |
67 | ||
68 | /* Number of times to try negotiating MPTCP on SYN retransmissions */ | |
69 | int mptcp_mpcap_retries = MPTCP_CAPABLE_RETRIES; | |
70 | SYSCTL_INT(_net_inet_mptcp, OID_AUTO, mptcp_cap_retr, | |
71 | CTLFLAG_RW | CTLFLAG_LOCKED, | |
72 | &mptcp_mpcap_retries, 0, "Number of MP Capable SYN Retries"); | |
73 | ||
74 | /* | |
75 | * By default, DSS checksum is turned off, revisit if we ever do | |
76 | * MPTCP for non SSL Traffic. | |
77 | */ | |
78 | int mptcp_dss_csum = 0; | |
79 | SYSCTL_INT(_net_inet_mptcp, OID_AUTO, dss_csum, CTLFLAG_RW | CTLFLAG_LOCKED, | |
80 | &mptcp_dss_csum, 0, "Enable DSS checksum"); | |
81 | ||
82 | /* | |
83 | * When mptcp_fail_thresh number of retransmissions are sent, subflow failover | |
84 | * is attempted on a different path. | |
85 | */ | |
86 | int mptcp_fail_thresh = 1; | |
87 | SYSCTL_INT(_net_inet_mptcp, OID_AUTO, fail, CTLFLAG_RW | CTLFLAG_LOCKED, | |
88 | &mptcp_fail_thresh, 0, "Failover threshold"); | |
89 | ||
90 | ||
91 | /* | |
92 | * MPTCP subflows have TCP keepalives set to ON | |
93 | */ | |
94 | int mptcp_subflow_keeptime = 60; | |
95 | SYSCTL_INT(_net_inet_mptcp, OID_AUTO, keepalive, CTLFLAG_RW | CTLFLAG_LOCKED, | |
96 | &mptcp_subflow_keeptime, 0, "Keepalive in seconds"); | |
97 | ||
98 | /* | |
99 | * MP_PRIO option. | |
100 | */ | |
101 | int mptcp_mpprio_enable = 1; | |
102 | SYSCTL_INT(_net_inet_mptcp, OID_AUTO, mpprio, CTLFLAG_RW | CTLFLAG_LOCKED, | |
103 | &mptcp_mpprio_enable, 0, "Enable MP_PRIO option"); | |
104 | ||
105 | /* | |
106 | * REMOVE_ADDR option. | |
107 | */ | |
108 | int mptcp_remaddr_enable = 1; | |
109 | SYSCTL_INT(_net_inet_mptcp, OID_AUTO, remaddr, CTLFLAG_RW | CTLFLAG_LOCKED, | |
110 | &mptcp_remaddr_enable, 0, "Enable REMOVE_ADDR option"); | |
111 | ||
112 | /* | |
113 | * MPTCP input, called when data has been read from a subflow socket. | |
114 | */ | |
115 | void | |
116 | mptcp_input(struct mptses *mpte, struct mbuf *m) | |
117 | { | |
118 | struct socket *mp_so; | |
119 | struct mptcb *mp_tp = NULL; | |
120 | u_int64_t mb_dsn; | |
121 | u_int32_t mb_datalen; | |
122 | int count = 0; | |
123 | struct mbuf *save = NULL; | |
124 | struct mbuf *freelist = NULL, *tail = NULL; | |
125 | ||
126 | VERIFY(m->m_flags & M_PKTHDR); | |
127 | ||
128 | MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ | |
129 | mp_so = mpte->mpte_mppcb->mpp_socket; | |
130 | ||
131 | DTRACE_MPTCP(input); | |
132 | ||
133 | /* | |
134 | * Each mbuf contains MPTCP Data Sequence Map | |
135 | * Process the data for reassembly, delivery to MPTCP socket | |
136 | * client, etc. | |
137 | * | |
138 | */ | |
139 | count = mp_so->so_rcv.sb_cc; | |
140 | ||
141 | VERIFY(m != NULL); | |
142 | /* | |
143 | * In the degraded fallback case, data is accepted without DSS map | |
144 | */ | |
145 | if (!(m->m_pkthdr.pkt_flags & PKTF_MPTCP)) { | |
146 | /* XXX need a check that this is indeed degraded */ | |
147 | if (sbappendstream(&mp_so->so_rcv, m)) | |
148 | sorwakeup(mp_so); | |
149 | DTRACE_MPTCP5(receive__degraded, struct mbuf *, m, | |
150 | struct socket *, mp_so, | |
151 | struct sockbuf *, &mp_so->so_rcv, | |
152 | struct sockbuf *, &mp_so->so_snd, | |
153 | struct mptses *, mpte); | |
154 | count = mp_so->so_rcv.sb_cc - count; | |
155 | mptcplog3((LOG_DEBUG, "%s: fread %d bytes\n", __func__, count)); | |
156 | return; | |
157 | } | |
158 | ||
159 | mp_tp = mpte->mpte_mptcb; | |
160 | VERIFY(mp_tp != NULL); | |
161 | ||
162 | MPT_LOCK(mp_tp); | |
163 | do { | |
164 | save = m->m_next; | |
165 | m->m_next = NULL; | |
166 | ||
167 | mb_dsn = m->m_pkthdr.mp_dsn; | |
168 | mb_datalen = m->m_pkthdr.mp_rlen; | |
169 | ||
170 | if (MPTCP_SEQ_GT(mb_dsn, mp_tp->mpt_rcvatmark)) { | |
171 | tcpstat.tcps_mp_oodata++; | |
172 | MPT_UNLOCK(mp_tp); | |
173 | m_freem(m); | |
174 | return; | |
175 | /* | |
176 | * Reassembly queue support here in future. Per spec, | |
177 | * senders must implement retransmission timer to | |
178 | * retransmit unacked data. Dropping out of order | |
179 | * gives a slight hit on performance but allows us to | |
180 | * deploy MPTCP and protects us against in-window DoS | |
181 | * attacks that attempt to use up memory by sending | |
182 | * out of order data. When doing load sharing across | |
183 | * subflows, out of order support is a must. | |
184 | */ | |
185 | } | |
186 | ||
187 | if (MPTCP_SEQ_LT(mb_dsn, mp_tp->mpt_rcvatmark)) { | |
188 | VERIFY(m->m_pkthdr.pkt_flags & PKTF_MPTCP); | |
189 | VERIFY(m->m_flags & M_PKTHDR); | |
190 | VERIFY(m->m_len >= (int)mb_datalen); | |
191 | VERIFY(m->m_pkthdr.len >= (int)mb_datalen); | |
192 | if (MPTCP_SEQ_LEQ((mb_dsn + mb_datalen), | |
193 | mp_tp->mpt_rcvatmark)) { | |
194 | if (freelist == NULL) | |
195 | freelist = tail = m; | |
196 | else { | |
197 | tail->m_next = m; | |
198 | tail = m; | |
199 | } | |
200 | m = save; | |
201 | continue; | |
202 | } else { | |
203 | m_adj(m, (mp_tp->mpt_rcvatmark - mb_dsn)); | |
204 | } | |
205 | mptcplog((LOG_INFO, "%s: %llu %d 2 \n", __func__, | |
206 | mp_tp->mpt_rcvatmark, m->m_pkthdr.len)); | |
207 | } | |
208 | ||
209 | MPT_UNLOCK(mp_tp); | |
210 | if (sbappendstream(&mp_so->so_rcv, m)) { | |
211 | sorwakeup(mp_so); | |
212 | } | |
213 | DTRACE_MPTCP6(receive, struct mbuf *, m, struct socket *, mp_so, | |
214 | struct sockbuf *, &mp_so->so_rcv, | |
215 | struct sockbuf *, &mp_so->so_snd, | |
216 | struct mptses *, mpte, | |
217 | struct mptcb *, mp_tp); | |
218 | MPT_LOCK(mp_tp); | |
219 | count = mp_so->so_rcv.sb_cc - count; | |
220 | tcpstat.tcps_mp_rcvtotal++; | |
221 | tcpstat.tcps_mp_rcvbytes += count; | |
222 | mptcplog3((LOG_DEBUG, "%s: read %d bytes\n", __func__, count)); | |
223 | /* | |
224 | * The data received at the MPTCP layer will never exceed the | |
225 | * receive window because anything to the right of the | |
226 | * receive window will be trimmed at the subflow level. | |
227 | */ | |
228 | mp_tp->mpt_rcvwnd = mptcp_sbspace(mp_tp); | |
229 | mp_tp->mpt_rcvatmark += count; | |
230 | m = save; | |
231 | count = mp_so->so_rcv.sb_cc; | |
232 | } while (m); | |
233 | MPT_UNLOCK(mp_tp); | |
234 | ||
235 | if (freelist) | |
236 | m_freem(freelist); | |
237 | } | |
238 | ||
239 | /* | |
240 | * MPTCP output. | |
241 | */ | |
242 | int | |
243 | mptcp_output(struct mptses *mpte) | |
244 | { | |
245 | struct mptsub *mpts; | |
246 | struct mptsub *mpts_tried = NULL; | |
247 | struct socket *mp_so; | |
248 | int error = 0; | |
249 | ||
250 | MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ | |
251 | mp_so = mpte->mpte_mppcb->mpp_socket; | |
252 | if (mp_so->so_state & SS_CANTSENDMORE) { | |
253 | return (EPIPE); | |
254 | } | |
255 | ||
256 | try_again: | |
257 | /* get the "best" subflow to be used for transmission */ | |
258 | mpts = mptcp_get_subflow(mpte, NULL); | |
259 | if (mpts == NULL) { | |
260 | mptcplog((LOG_ERR, "%s: mp_so 0x%llx has no usable subflow\n", | |
261 | __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so))); | |
262 | goto out; | |
263 | } | |
264 | ||
265 | mptcplog3((LOG_INFO, "%s: mp_so 0x%llx cid %d \n", __func__, | |
266 | (uint64_t)VM_KERNEL_ADDRPERM(mp_so), mpts->mpts_connid)); | |
267 | ||
268 | /* In case there's just one flow, we reattempt later */ | |
269 | MPTS_LOCK(mpts); | |
270 | if ((mpts_tried != NULL) && ((mpts == mpts_tried) || | |
271 | (mpts->mpts_flags & MPTSF_FAILINGOVER))) { | |
272 | MPTS_UNLOCK(mpts); | |
273 | MPTS_LOCK(mpts_tried); | |
274 | mpts_tried->mpts_flags &= ~MPTSF_FAILINGOVER; | |
275 | mpts_tried->mpts_flags |= MPTSF_ACTIVE; | |
276 | MPTS_UNLOCK(mpts_tried); | |
277 | MPT_LOCK(mpte->mpte_mptcb); | |
278 | mptcp_start_timer(mpte->mpte_mptcb, MPTT_REXMT); | |
279 | MPT_UNLOCK(mpte->mpte_mptcb); | |
280 | mptcplog((LOG_INFO, "%s: mp_so 0x%llx retry later\n", | |
281 | __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so))); | |
282 | goto out; | |
283 | } | |
284 | ||
285 | DTRACE_MPTCP3(output, struct mptses *, mpte, struct mptsub *, mpts, | |
286 | struct socket *, mp_so); | |
287 | error = mptcp_subflow_output(mpte, mpts); | |
288 | if (error) { | |
289 | /* can be a temporary loss of source address or other error */ | |
290 | mpts->mpts_flags |= MPTSF_FAILINGOVER; | |
291 | mpts->mpts_flags &= ~MPTSF_ACTIVE; | |
292 | mpts_tried = mpts; | |
293 | MPTS_UNLOCK(mpts); | |
294 | mptcplog((LOG_INFO, "%s: error = %d \n", __func__, error)); | |
295 | goto try_again; | |
296 | } | |
297 | /* The model is to have only one active flow at a time */ | |
298 | mpts->mpts_flags |= MPTSF_ACTIVE; | |
299 | MPTS_UNLOCK(mpts); | |
300 | if (mpte->mpte_active_sub == NULL) { | |
301 | mpte->mpte_active_sub = mpts; | |
302 | } else if (mpte->mpte_active_sub != mpts) { | |
303 | MPTS_LOCK(mpte->mpte_active_sub); | |
304 | mpte->mpte_active_sub->mpts_flags &= ~MPTSF_ACTIVE; | |
305 | MPTS_UNLOCK(mpte->mpte_active_sub); | |
306 | mpte->mpte_active_sub = mpts; | |
307 | } | |
308 | out: | |
309 | /* subflow errors should not be percolated back up */ | |
310 | return (0); | |
311 | } | |
312 | ||
313 | /* | |
314 | * Return the most eligible subflow to be used for sending data. | |
315 | * This function also serves to check if any alternate subflow is available | |
316 | * or not. | |
317 | */ | |
318 | struct mptsub * | |
319 | mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore) | |
320 | { | |
321 | struct mptsub *mpts; | |
322 | struct mptsub *fallback = NULL; | |
323 | struct socket *so = NULL; | |
324 | ||
325 | MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ | |
326 | ||
327 | TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { | |
328 | MPTS_LOCK_SPIN(mpts); | |
329 | ||
330 | if ((ignore) && (mpts == ignore)) { | |
331 | MPTS_UNLOCK(mpts); | |
332 | continue; | |
333 | } | |
334 | ||
335 | /* There can only be one subflow in degraded state */ | |
336 | if (mpts->mpts_flags & MPTSF_MP_DEGRADED) { | |
337 | MPTS_UNLOCK(mpts); | |
338 | break; | |
339 | } | |
340 | ||
341 | if (!(mpts->mpts_flags & MPTSF_MP_CAPABLE)) { | |
342 | MPTS_UNLOCK(mpts); | |
343 | continue; | |
344 | } | |
345 | ||
346 | if (mpts->mpts_flags & MPTSF_SUSPENDED) { | |
347 | MPTS_UNLOCK(mpts); | |
348 | continue; | |
349 | } | |
350 | ||
351 | if (mpts->mpts_flags & MPTSF_FAILINGOVER) { | |
352 | so = mpts->mpts_socket; | |
353 | if ((so) && (!(so->so_flags & SOF_PCBCLEARING))) { | |
354 | socket_lock(so, 1); | |
355 | if (so->so_snd.sb_cc == 0) { | |
356 | mpts->mpts_flags &= ~MPTSF_FAILINGOVER; | |
357 | so->so_flags &= ~SOF_MP_TRYFAILOVER; | |
358 | fallback = mpts; | |
359 | socket_unlock(so, 1); | |
360 | } else { | |
361 | fallback = mpts; | |
362 | socket_unlock(so, 1); | |
363 | MPTS_UNLOCK(mpts); | |
364 | continue; | |
365 | } | |
366 | } else { | |
367 | MPTS_UNLOCK(mpts); | |
368 | continue; | |
369 | } | |
370 | } | |
371 | ||
372 | if (mpts->mpts_flags & MPTSF_PREFERRED) { | |
373 | MPTS_UNLOCK(mpts); | |
374 | break; | |
375 | } | |
376 | ||
377 | /* When there are no preferred flows, use first one in list */ | |
378 | if (fallback == NULL) | |
379 | fallback = mpts; | |
380 | ||
381 | MPTS_UNLOCK(mpts); | |
382 | } | |
383 | /* | |
384 | * If there is no preferred or backup subflow, and there is no active | |
385 | * subflow use the last usable subflow. | |
386 | */ | |
387 | if (mpts == NULL) { | |
388 | return (fallback); | |
389 | } | |
390 | ||
391 | return (mpts); | |
392 | } | |
393 | ||
394 | void | |
395 | mptcp_close_fsm(struct mptcb *mp_tp, uint32_t event) | |
396 | { | |
397 | MPT_LOCK_ASSERT_HELD(mp_tp); | |
398 | ||
399 | DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp, | |
400 | uint32_t, event); | |
401 | ||
402 | switch (mp_tp->mpt_state) { | |
403 | case MPTCPS_CLOSED: | |
404 | case MPTCPS_LISTEN: | |
405 | mp_tp->mpt_state = MPTCPS_CLOSED; | |
406 | break; | |
407 | ||
408 | case MPTCPS_ESTABLISHED: | |
409 | if (event == MPCE_CLOSE) | |
410 | mp_tp->mpt_state = MPTCPS_FIN_WAIT_1; | |
411 | else if (event == MPCE_RECV_DATA_FIN) | |
412 | mp_tp->mpt_state = MPTCPS_CLOSE_WAIT; | |
413 | break; | |
414 | ||
415 | case MPTCPS_CLOSE_WAIT: | |
416 | if (event == MPCE_CLOSE) | |
417 | mp_tp->mpt_state = MPTCPS_LAST_ACK; | |
418 | break; | |
419 | ||
420 | case MPTCPS_FIN_WAIT_1: | |
421 | if (event == MPCE_RECV_DATA_ACK) | |
422 | mp_tp->mpt_state = MPTCPS_FIN_WAIT_2; | |
423 | else if (event == MPCE_RECV_DATA_FIN) | |
424 | mp_tp->mpt_state = MPTCPS_CLOSING; | |
425 | break; | |
426 | ||
427 | case MPTCPS_CLOSING: | |
428 | if (event == MPCE_RECV_DATA_ACK) | |
429 | mp_tp->mpt_state = MPTCPS_TIME_WAIT; | |
430 | break; | |
431 | ||
432 | case MPTCPS_LAST_ACK: | |
433 | if (event == MPCE_RECV_DATA_ACK) | |
434 | mp_tp->mpt_state = MPTCPS_CLOSED; | |
435 | break; | |
436 | ||
437 | case MPTCPS_FIN_WAIT_2: | |
438 | if (event == MPCE_RECV_DATA_FIN) | |
439 | mp_tp->mpt_state = MPTCPS_TIME_WAIT; | |
440 | break; | |
441 | ||
442 | case MPTCPS_TIME_WAIT: | |
443 | break; | |
444 | ||
445 | case MPTCPS_FASTCLOSE_WAIT: | |
446 | if (event == MPCE_CLOSE) | |
447 | mp_tp->mpt_state = MPTCPS_CLOSED; | |
448 | break; | |
449 | ||
450 | default: | |
451 | VERIFY(0); | |
452 | /* NOTREACHED */ | |
453 | } | |
454 | DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp, | |
455 | uint32_t, event); | |
456 | mptcplog((LOG_INFO, "%s: state = %d\n", | |
457 | __func__, mp_tp->mpt_state)); | |
458 | } | |
459 | ||
460 | /* | |
461 | * Update the mptcb send state variables, but the actual sbdrop occurs | |
462 | * in MPTCP layer | |
463 | */ | |
464 | void | |
465 | mptcp_data_ack_rcvd(struct mptcb *mp_tp, struct tcpcb *tp, u_int64_t full_dack) | |
466 | { | |
467 | u_int64_t acked = 0; | |
468 | ||
469 | acked = full_dack - mp_tp->mpt_snduna; | |
470 | ||
471 | if (acked) { | |
472 | mp_tp->mpt_snduna += acked; | |
473 | } | |
474 | if ((full_dack == mp_tp->mpt_sndmax) && | |
475 | (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_1)) { | |
476 | mptcp_close_fsm(mp_tp, MPCE_RECV_DATA_ACK); | |
477 | tp->t_mpflags &= ~TMPF_SEND_DFIN; | |
478 | } | |
479 | } | |
480 | ||
481 | /* If you change this function, match up mptcp_update_rcv_state_f */ | |
482 | void | |
483 | mptcp_update_dss_rcv_state(struct mptcp_dsn_opt *dss_info, struct tcpcb *tp, | |
484 | uint16_t csum) | |
485 | { | |
486 | struct mptcb *mp_tp = tptomptp(tp); | |
487 | u_int64_t full_dsn = 0; | |
488 | ||
489 | NTOHL(dss_info->mdss_dsn); | |
490 | NTOHL(dss_info->mdss_subflow_seqn); | |
491 | NTOHS(dss_info->mdss_data_len); | |
492 | ||
493 | /* XXX for autosndbuf grow sb here */ | |
494 | MPT_LOCK(mp_tp); | |
495 | MPTCP_EXTEND_DSN(mp_tp->mpt_rcvnxt, dss_info->mdss_dsn, full_dsn); | |
496 | MPT_UNLOCK(mp_tp); | |
497 | mptcp_update_rcv_state_meat(mp_tp, tp, | |
498 | full_dsn, dss_info->mdss_subflow_seqn, dss_info->mdss_data_len, | |
499 | csum); | |
500 | ||
501 | } | |
502 | ||
503 | void | |
504 | mptcp_update_rcv_state_meat(struct mptcb *mp_tp, struct tcpcb *tp, | |
505 | u_int64_t full_dsn, u_int32_t seqn, u_int16_t mdss_data_len, | |
506 | uint16_t csum) | |
507 | { | |
508 | if (mdss_data_len == 0) { | |
509 | mptcplog((LOG_INFO, "%s: Received infinite mapping.", | |
510 | __func__)); | |
511 | if ((mp_tp->mpt_flags & MPTCPF_CHECKSUM) && (csum != 0)) { | |
512 | mptcplog((LOG_ERR, "%s: Bad checksum value %x \n", | |
513 | __func__, csum)); | |
514 | } | |
515 | mptcp_notify_mpfail(tp->t_inpcb->inp_socket); | |
516 | return; | |
517 | } | |
518 | MPT_LOCK(mp_tp); | |
519 | if (mptcp_dbg >= MP_VERBOSE_DEBUG_1) | |
520 | printf("%s: seqn = %x len = %x full = %llx rcvnxt = %llu \n", | |
521 | __func__, seqn, mdss_data_len, full_dsn, | |
522 | mp_tp->mpt_rcvnxt); | |
523 | ||
524 | /* Process a Data FIN packet , handled in mptcp_do_fin_opt */ | |
525 | if ((seqn == 0) && (mdss_data_len == 1)) { | |
526 | mptcplog((LOG_INFO, "%s: Data FIN DSS opt state = %d \n", | |
527 | __func__, mp_tp->mpt_state)); | |
528 | MPT_UNLOCK(mp_tp); | |
529 | return; | |
530 | } | |
531 | MPT_UNLOCK(mp_tp); | |
532 | mptcp_notify_mpready(tp->t_inpcb->inp_socket); | |
533 | tp->t_rcv_map.mpt_dsn = full_dsn; | |
534 | tp->t_rcv_map.mpt_sseq = seqn; | |
535 | tp->t_rcv_map.mpt_len = mdss_data_len; | |
536 | tp->t_rcv_map.mpt_csum = csum; | |
537 | tp->t_mpflags |= TMPF_EMBED_DSN; | |
538 | } | |
539 | ||
540 | ||
541 | void | |
542 | mptcp_update_rcv_state_f(struct mptcp_dss_ack_opt *dss_info, struct tcpcb *tp, | |
543 | uint16_t csum) | |
544 | { | |
545 | u_int64_t full_dsn = 0; | |
546 | struct mptcb *mp_tp = tptomptp(tp); | |
547 | ||
548 | NTOHL(dss_info->mdss_dsn); | |
549 | NTOHL(dss_info->mdss_subflow_seqn); | |
550 | NTOHS(dss_info->mdss_data_len); | |
551 | MPT_LOCK(mp_tp); | |
552 | MPTCP_EXTEND_DSN(mp_tp->mpt_rcvnxt, dss_info->mdss_dsn, full_dsn); | |
553 | MPT_UNLOCK(mp_tp); | |
554 | mptcp_update_rcv_state_meat(mp_tp, tp, | |
555 | full_dsn, | |
556 | dss_info->mdss_subflow_seqn, | |
557 | dss_info->mdss_data_len, | |
558 | csum); | |
559 | } | |
560 | ||
561 | void | |
562 | mptcp_update_rcv_state_g(struct mptcp_dss64_ack32_opt *dss_info, | |
563 | struct tcpcb *tp, uint16_t csum) | |
564 | { | |
565 | u_int64_t dsn = mptcp_ntoh64(dss_info->mdss_dsn); | |
566 | struct mptcb *mp_tp = tptomptp(tp); | |
567 | ||
568 | NTOHL(dss_info->mdss_subflow_seqn); | |
569 | NTOHS(dss_info->mdss_data_len); | |
570 | mptcp_update_rcv_state_meat(mp_tp, tp, | |
571 | dsn, | |
572 | dss_info->mdss_subflow_seqn, | |
573 | dss_info->mdss_data_len, | |
574 | csum); | |
575 | } | |
576 | ||
577 | /* | |
578 | * MPTCP Checksum support | |
579 | * The checksum is calculated whenever the MPTCP DSS option is included | |
580 | * in the TCP packet. The checksum includes the sum of the MPTCP psuedo | |
581 | * header and the actual data indicated by the length specified in the | |
582 | * DSS option. | |
583 | */ | |
584 | ||
585 | uint16_t | |
586 | mptcp_input_csum(struct tcpcb *tp, struct mbuf *m, int off) | |
587 | { | |
588 | struct mptcb *mp_tp = tptomptp(tp); | |
589 | uint32_t sum = 0; | |
590 | uint64_t dsn; | |
591 | uint32_t sseq; | |
592 | uint16_t len; | |
593 | uint16_t csum; | |
594 | ||
595 | if (mp_tp == NULL) | |
596 | return (0); | |
597 | ||
598 | if (!(mp_tp->mpt_flags & MPTCPF_CHECKSUM)) | |
599 | return (0); | |
600 | ||
601 | if (!(tp->t_mpflags & TMPF_EMBED_DSN)) | |
602 | return (0); | |
603 | ||
604 | if (tp->t_mpflags & TMPF_TCP_FALLBACK) | |
605 | return (0); | |
606 | ||
607 | /* | |
608 | * The remote side may send a packet with fewer bytes than the | |
609 | * claimed DSS checksum length. | |
610 | */ | |
611 | if ((int)m_length2(m, NULL) < (off + tp->t_rcv_map.mpt_len)) | |
612 | return (0xffff); | |
613 | ||
614 | if (tp->t_rcv_map.mpt_len != 0) | |
615 | sum = m_sum16(m, off, tp->t_rcv_map.mpt_len); | |
616 | ||
617 | dsn = mptcp_hton64(tp->t_rcv_map.mpt_dsn); | |
618 | sseq = htonl(tp->t_rcv_map.mpt_sseq); | |
619 | len = htons(tp->t_rcv_map.mpt_len); | |
620 | csum = tp->t_rcv_map.mpt_csum; | |
621 | sum += in_pseudo64(dsn, sseq, (len + csum)); | |
622 | ADDCARRY(sum); | |
623 | DTRACE_MPTCP3(checksum__result, struct tcpcb *, tp, struct mbuf *, m, | |
624 | uint32_t, sum); | |
625 | mptcplog((LOG_INFO, "%s: sum = %x \n", __func__, sum)); | |
626 | return (~sum & 0xffff); | |
627 | } | |
628 | ||
629 | void | |
630 | mptcp_output_csum(struct tcpcb *tp, struct mbuf *m, int32_t len, | |
631 | unsigned hdrlen, u_int64_t dss_val, u_int32_t *sseqp) | |
632 | { | |
633 | struct mptcb *mp_tp = tptomptp(tp); | |
634 | u_int32_t sum = 0; | |
635 | uint32_t sseq; | |
636 | uint16_t dss_len; | |
637 | uint16_t csum = 0; | |
638 | uint16_t *csump = NULL; | |
639 | ||
640 | if (mp_tp == NULL) | |
641 | return; | |
642 | ||
643 | if (!(mp_tp->mpt_flags & MPTCPF_CHECKSUM)) | |
644 | return; | |
645 | ||
646 | if (sseqp == NULL) | |
647 | return; | |
648 | ||
649 | if (len) | |
650 | sum = m_sum16(m, hdrlen, len); | |
651 | ||
652 | dss_val = mptcp_hton64(dss_val); | |
653 | sseq = *sseqp; | |
654 | dss_len = *(uint16_t *)(void *)((u_char*)sseqp + sizeof (u_int32_t)); | |
655 | sum += in_pseudo64(dss_val, sseq, (dss_len + csum)); | |
656 | ||
657 | ADDCARRY(sum); | |
658 | sum = ~sum & 0xffff; | |
659 | csump = (uint16_t *)(void *)((u_char*)sseqp + sizeof (u_int32_t) + | |
660 | sizeof (uint16_t)); | |
661 | DTRACE_MPTCP3(checksum__result, struct tcpcb *, tp, struct mbuf *, m, | |
662 | uint32_t, sum); | |
663 | *csump = sum; | |
664 | mptcplog3((LOG_INFO, "%s: sum = %x \n", __func__, sum)); | |
665 | } |