]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/mp_pcb.c
xnu-6153.61.1.tar.gz
[apple/xnu.git] / bsd / netinet / mp_pcb.c
1 /*
2 * Copyright (c) 2012-2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/kernel.h>
32 #include <sys/mbuf.h>
33 #include <sys/mcache.h>
34 #include <sys/syslog.h>
35 #include <sys/socket.h>
36 #include <sys/socketvar.h>
37 #include <sys/protosw.h>
38 #include <sys/proc_internal.h>
39
40 #include <mach/boolean.h>
41 #include <kern/zalloc.h>
42 #include <kern/locks.h>
43
44 #include <netinet/mp_pcb.h>
45 #include <netinet/mptcp_var.h>
46 #include <netinet6/in6_pcb.h>
47
48 static lck_grp_t *mp_lock_grp;
49 static lck_attr_t *mp_lock_attr;
50 static lck_grp_attr_t *mp_lock_grp_attr;
51 decl_lck_mtx_data(static, mp_lock); /* global MULTIPATH lock */
52 decl_lck_mtx_data(static, mp_timeout_lock);
53
54 static TAILQ_HEAD(, mppcbinfo) mppi_head = TAILQ_HEAD_INITIALIZER(mppi_head);
55
56 static boolean_t mp_timeout_run; /* MP timer is scheduled to run */
57 static boolean_t mp_garbage_collecting;
58 static boolean_t mp_ticking;
59 static void mp_sched_timeout(void);
60 static void mp_timeout(void *);
61
62 static void
63 mpp_lock_assert_held(struct mppcb *mp)
64 {
65 #if !MACH_ASSERT
66 #pragma unused(mp)
67 #endif
68 LCK_MTX_ASSERT(&mp->mpp_lock, LCK_MTX_ASSERT_OWNED);
69 }
70
71 void
72 mp_pcbinit(void)
73 {
74 static int mp_initialized = 0;
75
76 VERIFY(!mp_initialized);
77 mp_initialized = 1;
78
79 mp_lock_grp_attr = lck_grp_attr_alloc_init();
80 mp_lock_grp = lck_grp_alloc_init("multipath", mp_lock_grp_attr);
81 mp_lock_attr = lck_attr_alloc_init();
82 lck_mtx_init(&mp_lock, mp_lock_grp, mp_lock_attr);
83 lck_mtx_init(&mp_timeout_lock, mp_lock_grp, mp_lock_attr);
84 }
85
86 static void
87 mp_timeout(void *arg)
88 {
89 #pragma unused(arg)
90 struct mppcbinfo *mppi;
91 boolean_t t, gc;
92 uint32_t t_act = 0;
93 uint32_t gc_act = 0;
94
95 /*
96 * Update coarse-grained networking timestamp (in sec.); the idea
97 * is to piggy-back on the timeout callout to update the counter
98 * returnable via net_uptime().
99 */
100 net_update_uptime();
101
102 lck_mtx_lock_spin(&mp_timeout_lock);
103 gc = mp_garbage_collecting;
104 mp_garbage_collecting = FALSE;
105
106 t = mp_ticking;
107 mp_ticking = FALSE;
108
109 if (gc || t) {
110 lck_mtx_unlock(&mp_timeout_lock);
111
112 lck_mtx_lock(&mp_lock);
113 TAILQ_FOREACH(mppi, &mppi_head, mppi_entry) {
114 if ((gc && mppi->mppi_gc != NULL) ||
115 (t && mppi->mppi_timer != NULL)) {
116 lck_mtx_lock(&mppi->mppi_lock);
117 if (gc && mppi->mppi_gc != NULL) {
118 gc_act += mppi->mppi_gc(mppi);
119 }
120 if (t && mppi->mppi_timer != NULL) {
121 t_act += mppi->mppi_timer(mppi);
122 }
123 lck_mtx_unlock(&mppi->mppi_lock);
124 }
125 }
126 lck_mtx_unlock(&mp_lock);
127
128 lck_mtx_lock_spin(&mp_timeout_lock);
129 }
130
131 /* lock was dropped above, so check first before overriding */
132 if (!mp_garbage_collecting) {
133 mp_garbage_collecting = (gc_act != 0);
134 }
135 if (!mp_ticking) {
136 mp_ticking = (t_act != 0);
137 }
138
139 /* re-arm the timer if there's work to do */
140 mp_timeout_run = FALSE;
141 mp_sched_timeout();
142 lck_mtx_unlock(&mp_timeout_lock);
143 }
144
145 static void
146 mp_sched_timeout(void)
147 {
148 LCK_MTX_ASSERT(&mp_timeout_lock, LCK_MTX_ASSERT_OWNED);
149
150 if (!mp_timeout_run && (mp_garbage_collecting || mp_ticking)) {
151 lck_mtx_convert_spin(&mp_timeout_lock);
152 mp_timeout_run = TRUE;
153 timeout(mp_timeout, NULL, hz);
154 }
155 }
156
157 void
158 mp_gc_sched(void)
159 {
160 lck_mtx_lock_spin(&mp_timeout_lock);
161 mp_garbage_collecting = TRUE;
162 mp_sched_timeout();
163 lck_mtx_unlock(&mp_timeout_lock);
164 }
165
166 void
167 mptcp_timer_sched(void)
168 {
169 lck_mtx_lock_spin(&mp_timeout_lock);
170 mp_ticking = TRUE;
171 mp_sched_timeout();
172 lck_mtx_unlock(&mp_timeout_lock);
173 }
174
175 void
176 mp_pcbinfo_attach(struct mppcbinfo *mppi)
177 {
178 struct mppcbinfo *mppi0;
179
180 lck_mtx_lock(&mp_lock);
181 TAILQ_FOREACH(mppi0, &mppi_head, mppi_entry) {
182 if (mppi0 == mppi) {
183 panic("%s: mppi %p already in the list\n",
184 __func__, mppi);
185 /* NOTREACHED */
186 }
187 }
188 TAILQ_INSERT_TAIL(&mppi_head, mppi, mppi_entry);
189 lck_mtx_unlock(&mp_lock);
190 }
191
192 int
193 mp_pcbinfo_detach(struct mppcbinfo *mppi)
194 {
195 struct mppcbinfo *mppi0;
196 int error = 0;
197
198 lck_mtx_lock(&mp_lock);
199 TAILQ_FOREACH(mppi0, &mppi_head, mppi_entry) {
200 if (mppi0 == mppi) {
201 break;
202 }
203 }
204 if (mppi0 != NULL) {
205 TAILQ_REMOVE(&mppi_head, mppi0, mppi_entry);
206 } else {
207 error = ENXIO;
208 }
209 lck_mtx_unlock(&mp_lock);
210
211 return error;
212 }
213
214 int
215 mp_pcballoc(struct socket *so, struct mppcbinfo *mppi)
216 {
217 struct mppcb *mpp = NULL;
218 int error;
219
220 VERIFY(mpsotomppcb(so) == NULL);
221
222 mpp = zalloc(mppi->mppi_zone);
223 if (mpp == NULL) {
224 return ENOBUFS;
225 }
226
227 bzero(mpp, mppi->mppi_size);
228 lck_mtx_init(&mpp->mpp_lock, mppi->mppi_lock_grp, mppi->mppi_lock_attr);
229 mpp->mpp_pcbinfo = mppi;
230 mpp->mpp_state = MPPCB_STATE_INUSE;
231 mpp->mpp_socket = so;
232 so->so_pcb = mpp;
233
234 error = mptcp_session_create(mpp);
235 if (error) {
236 lck_mtx_destroy(&mpp->mpp_lock, mppi->mppi_lock_grp);
237 zfree(mppi->mppi_zone, mpp);
238 return error;
239 }
240
241 lck_mtx_lock(&mppi->mppi_lock);
242 mpp->mpp_flags |= MPP_ATTACHED;
243 TAILQ_INSERT_TAIL(&mppi->mppi_pcbs, mpp, mpp_entry);
244 mppi->mppi_count++;
245
246 lck_mtx_unlock(&mppi->mppi_lock);
247
248 return 0;
249 }
250
251 void
252 mp_pcbdetach(struct socket *mp_so)
253 {
254 struct mppcb *mpp = mpsotomppcb(mp_so);
255
256 mpp->mpp_state = MPPCB_STATE_DEAD;
257
258 mp_gc_sched();
259 }
260
261 void
262 mp_pcbdispose(struct mppcb *mpp)
263 {
264 struct mppcbinfo *mppi = mpp->mpp_pcbinfo;
265
266 VERIFY(mppi != NULL);
267
268 LCK_MTX_ASSERT(&mppi->mppi_lock, LCK_MTX_ASSERT_OWNED);
269 mpp_lock_assert_held(mpp);
270
271 VERIFY(mpp->mpp_state == MPPCB_STATE_DEAD);
272 VERIFY(mpp->mpp_flags & MPP_ATTACHED);
273
274 mpp->mpp_flags &= ~MPP_ATTACHED;
275 TAILQ_REMOVE(&mppi->mppi_pcbs, mpp, mpp_entry);
276 VERIFY(mppi->mppi_count != 0);
277 mppi->mppi_count--;
278
279 if (mppi->mppi_count == 0) {
280 if (mptcp_cellicon_refcount) {
281 os_log_error(mptcp_log_handle, "%s: No more MPTCP-flows, but cell icon counter is %u\n",
282 __func__, mptcp_cellicon_refcount);
283 mptcp_clear_cellicon();
284 mptcp_cellicon_refcount = 0;
285 }
286 }
287
288 VERIFY(mpp->mpp_inside == 0);
289 mpp_unlock(mpp);
290
291 #if NECP
292 necp_mppcb_dispose(mpp);
293 #endif /* NECP */
294
295 lck_mtx_destroy(&mpp->mpp_lock, mppi->mppi_lock_grp);
296
297 VERIFY(mpp->mpp_socket != NULL);
298 VERIFY(mpp->mpp_socket->so_usecount == 0);
299 mpp->mpp_socket->so_pcb = NULL;
300 mpp->mpp_socket = NULL;
301
302 zfree(mppi->mppi_zone, mpp);
303 }
304
305 static int
306 mp_getaddr_v4(struct socket *mp_so, struct sockaddr **nam, boolean_t peer)
307 {
308 struct mptses *mpte = mpsotompte(mp_so);
309 struct sockaddr_in *sin;
310
311 /*
312 * Do the malloc first in case it blocks.
313 */
314 MALLOC(sin, struct sockaddr_in *, sizeof(*sin), M_SONAME, M_WAITOK);
315 if (sin == NULL) {
316 return ENOBUFS;
317 }
318 bzero(sin, sizeof(*sin));
319 sin->sin_family = AF_INET;
320 sin->sin_len = sizeof(*sin);
321
322 if (!peer) {
323 sin->sin_port = mpte->__mpte_src_v4.sin_port;
324 sin->sin_addr = mpte->__mpte_src_v4.sin_addr;
325 } else {
326 sin->sin_port = mpte->__mpte_dst_v4.sin_port;
327 sin->sin_addr = mpte->__mpte_dst_v4.sin_addr;
328 }
329
330 *nam = (struct sockaddr *)sin;
331 return 0;
332 }
333
334 static int
335 mp_getaddr_v6(struct socket *mp_so, struct sockaddr **nam, boolean_t peer)
336 {
337 struct mptses *mpte = mpsotompte(mp_so);
338 struct in6_addr addr;
339 in_port_t port;
340
341 if (!peer) {
342 port = mpte->__mpte_src_v6.sin6_port;
343 addr = mpte->__mpte_src_v6.sin6_addr;
344 } else {
345 port = mpte->__mpte_dst_v6.sin6_port;
346 addr = mpte->__mpte_dst_v6.sin6_addr;
347 }
348
349 *nam = in6_sockaddr(port, &addr);
350 if (*nam == NULL) {
351 return ENOBUFS;
352 }
353
354 return 0;
355 }
356
357 int
358 mp_getsockaddr(struct socket *mp_so, struct sockaddr **nam)
359 {
360 struct mptses *mpte = mpsotompte(mp_so);
361
362 if (mpte->mpte_src.sa_family == AF_INET || mpte->mpte_src.sa_family == 0) {
363 return mp_getaddr_v4(mp_so, nam, false);
364 } else if (mpte->mpte_src.sa_family == AF_INET6) {
365 return mp_getaddr_v6(mp_so, nam, false);
366 } else {
367 return EINVAL;
368 }
369 }
370
371 int
372 mp_getpeeraddr(struct socket *mp_so, struct sockaddr **nam)
373 {
374 struct mptses *mpte = mpsotompte(mp_so);
375
376 if (mpte->mpte_src.sa_family == AF_INET || mpte->mpte_src.sa_family == 0) {
377 return mp_getaddr_v4(mp_so, nam, true);
378 } else if (mpte->mpte_src.sa_family == AF_INET6) {
379 return mp_getaddr_v6(mp_so, nam, true);
380 } else {
381 return EINVAL;
382 }
383 }