]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/in_pcb.c
xnu-4903.241.1.tar.gz
[apple/xnu.git] / bsd / netinet / in_pcb.c
1 /*
2 * Copyright (c) 2000-2018 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1991, 1993, 1995
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95
61 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $
62 */
63
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/malloc.h>
67 #include <sys/mbuf.h>
68 #include <sys/domain.h>
69 #include <sys/protosw.h>
70 #include <sys/socket.h>
71 #include <sys/socketvar.h>
72 #include <sys/proc.h>
73 #include <sys/kernel.h>
74 #include <sys/sysctl.h>
75 #include <sys/mcache.h>
76 #include <sys/kauth.h>
77 #include <sys/priv.h>
78 #include <sys/proc_uuid_policy.h>
79 #include <sys/syslog.h>
80 #include <sys/priv.h>
81 #include <net/dlil.h>
82
83 #include <libkern/OSAtomic.h>
84 #include <kern/locks.h>
85
86 #include <machine/limits.h>
87
88 #include <kern/zalloc.h>
89
90 #include <net/if.h>
91 #include <net/if_types.h>
92 #include <net/route.h>
93 #include <net/flowhash.h>
94 #include <net/flowadv.h>
95 #include <net/nat464_utils.h>
96 #include <net/ntstat.h>
97
98 #include <netinet/in.h>
99 #include <netinet/in_pcb.h>
100 #include <netinet/in_var.h>
101 #include <netinet/ip_var.h>
102 #if INET6
103 #include <netinet/ip6.h>
104 #include <netinet6/ip6_var.h>
105 #endif /* INET6 */
106
107 #include <sys/kdebug.h>
108 #include <sys/random.h>
109
110 #include <dev/random/randomdev.h>
111 #include <mach/boolean.h>
112
113 #include <pexpert/pexpert.h>
114
115 #if NECP
116 #include <net/necp.h>
117 #endif
118
119 #include <sys/stat.h>
120 #include <sys/ubc.h>
121 #include <sys/vnode.h>
122
123 static lck_grp_t *inpcb_lock_grp;
124 static lck_attr_t *inpcb_lock_attr;
125 static lck_grp_attr_t *inpcb_lock_grp_attr;
126 decl_lck_mtx_data(static, inpcb_lock); /* global INPCB lock */
127 decl_lck_mtx_data(static, inpcb_timeout_lock);
128
129 static TAILQ_HEAD(, inpcbinfo) inpcb_head = TAILQ_HEAD_INITIALIZER(inpcb_head);
130
131 static u_int16_t inpcb_timeout_run = 0; /* INPCB timer is scheduled to run */
132 static boolean_t inpcb_garbage_collecting = FALSE; /* gc timer is scheduled */
133 static boolean_t inpcb_ticking = FALSE; /* "slow" timer is scheduled */
134 static boolean_t inpcb_fast_timer_on = FALSE;
135
136 #define INPCB_GCREQ_THRESHOLD 50000
137
138 static thread_call_t inpcb_thread_call, inpcb_fast_thread_call;
139 static void inpcb_sched_timeout(void);
140 static void inpcb_sched_lazy_timeout(void);
141 static void _inpcb_sched_timeout(unsigned int);
142 static void inpcb_timeout(void *, void *);
143 const int inpcb_timeout_lazy = 10; /* 10 seconds leeway for lazy timers */
144 extern int tvtohz(struct timeval *);
145
146 #if CONFIG_PROC_UUID_POLICY
147 static void inp_update_cellular_policy(struct inpcb *, boolean_t);
148 #if NECP
149 static void inp_update_necp_want_app_policy(struct inpcb *, boolean_t);
150 #endif /* NECP */
151 #endif /* !CONFIG_PROC_UUID_POLICY */
152
153 #define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8))
154 #define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1))
155
156 /*
157 * These configure the range of local port addresses assigned to
158 * "unspecified" outgoing connections/packets/whatever.
159 */
160 int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */
161 int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */
162 int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
163 int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */
164 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
165 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */
166
167 #define RANGECHK(var, min, max) \
168 if ((var) < (min)) { (var) = (min); } \
169 else if ((var) > (max)) { (var) = (max); }
170
171 static int
172 sysctl_net_ipport_check SYSCTL_HANDLER_ARGS
173 {
174 #pragma unused(arg1, arg2)
175 int error;
176
177 error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
178 if (!error) {
179 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
180 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
181 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX);
182 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX);
183 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX);
184 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX);
185 }
186 return (error);
187 }
188
189 #undef RANGECHK
190
191 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange,
192 CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IP Ports");
193
194 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst,
195 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
196 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
197 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast,
198 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
199 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
200 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first,
201 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
202 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
203 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last,
204 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
205 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
206 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst,
207 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
208 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
209 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast,
210 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
211 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
212
213 static uint32_t apn_fallbk_debug = 0;
214 #define apn_fallbk_log(x) do { if (apn_fallbk_debug >= 1) log x; } while (0)
215
216 #if CONFIG_EMBEDDED
217 static boolean_t apn_fallbk_enabled = TRUE;
218
219 SYSCTL_DECL(_net_inet);
220 SYSCTL_NODE(_net_inet, OID_AUTO, apn_fallback, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "APN Fallback");
221 SYSCTL_UINT(_net_inet_apn_fallback, OID_AUTO, enable, CTLFLAG_RW | CTLFLAG_LOCKED,
222 &apn_fallbk_enabled, 0, "APN fallback enable");
223 SYSCTL_UINT(_net_inet_apn_fallback, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
224 &apn_fallbk_debug, 0, "APN fallback debug enable");
225 #else
226 static boolean_t apn_fallbk_enabled = FALSE;
227 #endif
228
229 extern int udp_use_randomport;
230 extern int tcp_use_randomport;
231
232 /* Structs used for flowhash computation */
233 struct inp_flowhash_key_addr {
234 union {
235 struct in_addr v4;
236 struct in6_addr v6;
237 u_int8_t addr8[16];
238 u_int16_t addr16[8];
239 u_int32_t addr32[4];
240 } infha;
241 };
242
243 struct inp_flowhash_key {
244 struct inp_flowhash_key_addr infh_laddr;
245 struct inp_flowhash_key_addr infh_faddr;
246 u_int32_t infh_lport;
247 u_int32_t infh_fport;
248 u_int32_t infh_af;
249 u_int32_t infh_proto;
250 u_int32_t infh_rand1;
251 u_int32_t infh_rand2;
252 };
253
254 static u_int32_t inp_hash_seed = 0;
255
256 static int infc_cmp(const struct inpcb *, const struct inpcb *);
257
258 /* Flags used by inp_fc_getinp */
259 #define INPFC_SOLOCKED 0x1
260 #define INPFC_REMOVE 0x2
261 static struct inpcb *inp_fc_getinp(u_int32_t, u_int32_t);
262
263 static void inp_fc_feedback(struct inpcb *);
264 extern void tcp_remove_from_time_wait(struct inpcb *inp);
265
266 decl_lck_mtx_data(static, inp_fc_lck);
267
268 RB_HEAD(inp_fc_tree, inpcb) inp_fc_tree;
269 RB_PROTOTYPE(inp_fc_tree, inpcb, infc_link, infc_cmp);
270 RB_GENERATE(inp_fc_tree, inpcb, infc_link, infc_cmp);
271
272 /*
273 * Use this inp as a key to find an inp in the flowhash tree.
274 * Accesses to it are protected by inp_fc_lck.
275 */
276 struct inpcb key_inp;
277
278 /*
279 * in_pcb.c: manage the Protocol Control Blocks.
280 */
281
282 void
283 in_pcbinit(void)
284 {
285 static int inpcb_initialized = 0;
286
287 VERIFY(!inpcb_initialized);
288 inpcb_initialized = 1;
289
290 inpcb_lock_grp_attr = lck_grp_attr_alloc_init();
291 inpcb_lock_grp = lck_grp_alloc_init("inpcb", inpcb_lock_grp_attr);
292 inpcb_lock_attr = lck_attr_alloc_init();
293 lck_mtx_init(&inpcb_lock, inpcb_lock_grp, inpcb_lock_attr);
294 lck_mtx_init(&inpcb_timeout_lock, inpcb_lock_grp, inpcb_lock_attr);
295 inpcb_thread_call = thread_call_allocate_with_priority(inpcb_timeout,
296 NULL, THREAD_CALL_PRIORITY_KERNEL);
297 inpcb_fast_thread_call = thread_call_allocate_with_priority(
298 inpcb_timeout, NULL, THREAD_CALL_PRIORITY_KERNEL);
299 if (inpcb_thread_call == NULL || inpcb_fast_thread_call == NULL)
300 panic("unable to alloc the inpcb thread call");
301
302 /*
303 * Initialize data structures required to deliver
304 * flow advisories.
305 */
306 lck_mtx_init(&inp_fc_lck, inpcb_lock_grp, inpcb_lock_attr);
307 lck_mtx_lock(&inp_fc_lck);
308 RB_INIT(&inp_fc_tree);
309 bzero(&key_inp, sizeof(key_inp));
310 lck_mtx_unlock(&inp_fc_lck);
311 }
312
313 #define INPCB_HAVE_TIMER_REQ(req) (((req).intimer_lazy > 0) || \
314 ((req).intimer_fast > 0) || ((req).intimer_nodelay > 0))
315 static void
316 inpcb_timeout(void *arg0, void *arg1)
317 {
318 #pragma unused(arg0, arg1)
319 struct inpcbinfo *ipi;
320 boolean_t t, gc;
321 struct intimercount gccnt, tmcnt;
322
323 /*
324 * Update coarse-grained networking timestamp (in sec.); the idea
325 * is to piggy-back on the timeout callout to update the counter
326 * returnable via net_uptime().
327 */
328 net_update_uptime();
329
330 bzero(&gccnt, sizeof(gccnt));
331 bzero(&tmcnt, sizeof(tmcnt));
332
333 lck_mtx_lock_spin(&inpcb_timeout_lock);
334 gc = inpcb_garbage_collecting;
335 inpcb_garbage_collecting = FALSE;
336
337 t = inpcb_ticking;
338 inpcb_ticking = FALSE;
339
340 if (gc || t) {
341 lck_mtx_unlock(&inpcb_timeout_lock);
342
343 lck_mtx_lock(&inpcb_lock);
344 TAILQ_FOREACH(ipi, &inpcb_head, ipi_entry) {
345 if (INPCB_HAVE_TIMER_REQ(ipi->ipi_gc_req)) {
346 bzero(&ipi->ipi_gc_req,
347 sizeof(ipi->ipi_gc_req));
348 if (gc && ipi->ipi_gc != NULL) {
349 ipi->ipi_gc(ipi);
350 gccnt.intimer_lazy +=
351 ipi->ipi_gc_req.intimer_lazy;
352 gccnt.intimer_fast +=
353 ipi->ipi_gc_req.intimer_fast;
354 gccnt.intimer_nodelay +=
355 ipi->ipi_gc_req.intimer_nodelay;
356 }
357 }
358 if (INPCB_HAVE_TIMER_REQ(ipi->ipi_timer_req)) {
359 bzero(&ipi->ipi_timer_req,
360 sizeof(ipi->ipi_timer_req));
361 if (t && ipi->ipi_timer != NULL) {
362 ipi->ipi_timer(ipi);
363 tmcnt.intimer_lazy +=
364 ipi->ipi_timer_req.intimer_lazy;
365 tmcnt.intimer_fast +=
366 ipi->ipi_timer_req.intimer_fast;
367 tmcnt.intimer_nodelay +=
368 ipi->ipi_timer_req.intimer_nodelay;
369 }
370 }
371 }
372 lck_mtx_unlock(&inpcb_lock);
373 lck_mtx_lock_spin(&inpcb_timeout_lock);
374 }
375
376 /* lock was dropped above, so check first before overriding */
377 if (!inpcb_garbage_collecting)
378 inpcb_garbage_collecting = INPCB_HAVE_TIMER_REQ(gccnt);
379 if (!inpcb_ticking)
380 inpcb_ticking = INPCB_HAVE_TIMER_REQ(tmcnt);
381
382 /* re-arm the timer if there's work to do */
383 inpcb_timeout_run--;
384 VERIFY(inpcb_timeout_run >= 0 && inpcb_timeout_run < 2);
385
386 if (gccnt.intimer_nodelay > 0 || tmcnt.intimer_nodelay > 0)
387 inpcb_sched_timeout();
388 else if ((gccnt.intimer_fast + tmcnt.intimer_fast) <= 5)
389 /* be lazy when idle with little activity */
390 inpcb_sched_lazy_timeout();
391 else
392 inpcb_sched_timeout();
393
394 lck_mtx_unlock(&inpcb_timeout_lock);
395 }
396
397 static void
398 inpcb_sched_timeout(void)
399 {
400 _inpcb_sched_timeout(0);
401 }
402
403 static void
404 inpcb_sched_lazy_timeout(void)
405 {
406 _inpcb_sched_timeout(inpcb_timeout_lazy);
407 }
408
409 static void
410 _inpcb_sched_timeout(unsigned int offset)
411 {
412 uint64_t deadline, leeway;
413
414 clock_interval_to_deadline(1, NSEC_PER_SEC, &deadline);
415 LCK_MTX_ASSERT(&inpcb_timeout_lock, LCK_MTX_ASSERT_OWNED);
416 if (inpcb_timeout_run == 0 &&
417 (inpcb_garbage_collecting || inpcb_ticking)) {
418 lck_mtx_convert_spin(&inpcb_timeout_lock);
419 inpcb_timeout_run++;
420 if (offset == 0) {
421 inpcb_fast_timer_on = TRUE;
422 thread_call_enter_delayed(inpcb_thread_call,
423 deadline);
424 } else {
425 inpcb_fast_timer_on = FALSE;
426 clock_interval_to_absolutetime_interval(offset,
427 NSEC_PER_SEC, &leeway);
428 thread_call_enter_delayed_with_leeway(
429 inpcb_thread_call, NULL, deadline, leeway,
430 THREAD_CALL_DELAY_LEEWAY);
431 }
432 } else if (inpcb_timeout_run == 1 &&
433 offset == 0 && !inpcb_fast_timer_on) {
434 /*
435 * Since the request was for a fast timer but the
436 * scheduled timer is a lazy timer, try to schedule
437 * another instance of fast timer also.
438 */
439 lck_mtx_convert_spin(&inpcb_timeout_lock);
440 inpcb_timeout_run++;
441 inpcb_fast_timer_on = TRUE;
442 thread_call_enter_delayed(inpcb_fast_thread_call, deadline);
443 }
444 }
445
446 void
447 inpcb_gc_sched(struct inpcbinfo *ipi, u_int32_t type)
448 {
449 u_int32_t gccnt;
450
451 lck_mtx_lock_spin(&inpcb_timeout_lock);
452 inpcb_garbage_collecting = TRUE;
453 gccnt = ipi->ipi_gc_req.intimer_nodelay +
454 ipi->ipi_gc_req.intimer_fast;
455
456 if (gccnt > INPCB_GCREQ_THRESHOLD) {
457 type = INPCB_TIMER_FAST;
458 }
459
460 switch (type) {
461 case INPCB_TIMER_NODELAY:
462 atomic_add_32(&ipi->ipi_gc_req.intimer_nodelay, 1);
463 inpcb_sched_timeout();
464 break;
465 case INPCB_TIMER_FAST:
466 atomic_add_32(&ipi->ipi_gc_req.intimer_fast, 1);
467 inpcb_sched_timeout();
468 break;
469 default:
470 atomic_add_32(&ipi->ipi_gc_req.intimer_lazy, 1);
471 inpcb_sched_lazy_timeout();
472 break;
473 }
474 lck_mtx_unlock(&inpcb_timeout_lock);
475 }
476
477 void
478 inpcb_timer_sched(struct inpcbinfo *ipi, u_int32_t type)
479 {
480
481 lck_mtx_lock_spin(&inpcb_timeout_lock);
482 inpcb_ticking = TRUE;
483 switch (type) {
484 case INPCB_TIMER_NODELAY:
485 atomic_add_32(&ipi->ipi_timer_req.intimer_nodelay, 1);
486 inpcb_sched_timeout();
487 break;
488 case INPCB_TIMER_FAST:
489 atomic_add_32(&ipi->ipi_timer_req.intimer_fast, 1);
490 inpcb_sched_timeout();
491 break;
492 default:
493 atomic_add_32(&ipi->ipi_timer_req.intimer_lazy, 1);
494 inpcb_sched_lazy_timeout();
495 break;
496 }
497 lck_mtx_unlock(&inpcb_timeout_lock);
498 }
499
500 void
501 in_pcbinfo_attach(struct inpcbinfo *ipi)
502 {
503 struct inpcbinfo *ipi0;
504
505 lck_mtx_lock(&inpcb_lock);
506 TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) {
507 if (ipi0 == ipi) {
508 panic("%s: ipi %p already in the list\n",
509 __func__, ipi);
510 /* NOTREACHED */
511 }
512 }
513 TAILQ_INSERT_TAIL(&inpcb_head, ipi, ipi_entry);
514 lck_mtx_unlock(&inpcb_lock);
515 }
516
517 int
518 in_pcbinfo_detach(struct inpcbinfo *ipi)
519 {
520 struct inpcbinfo *ipi0;
521 int error = 0;
522
523 lck_mtx_lock(&inpcb_lock);
524 TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) {
525 if (ipi0 == ipi)
526 break;
527 }
528 if (ipi0 != NULL)
529 TAILQ_REMOVE(&inpcb_head, ipi0, ipi_entry);
530 else
531 error = ENXIO;
532 lck_mtx_unlock(&inpcb_lock);
533
534 return (error);
535 }
536
537 /*
538 * Allocate a PCB and associate it with the socket.
539 *
540 * Returns: 0 Success
541 * ENOBUFS
542 * ENOMEM
543 */
544 int
545 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p)
546 {
547 #pragma unused(p)
548 struct inpcb *inp;
549 caddr_t temp;
550 #if CONFIG_MACF_NET
551 int mac_error;
552 #endif /* CONFIG_MACF_NET */
553
554 if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) {
555 inp = (struct inpcb *)zalloc(pcbinfo->ipi_zone);
556 if (inp == NULL)
557 return (ENOBUFS);
558 bzero((caddr_t)inp, sizeof (*inp));
559 } else {
560 inp = (struct inpcb *)(void *)so->so_saved_pcb;
561 temp = inp->inp_saved_ppcb;
562 bzero((caddr_t)inp, sizeof (*inp));
563 inp->inp_saved_ppcb = temp;
564 }
565
566 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
567 inp->inp_pcbinfo = pcbinfo;
568 inp->inp_socket = so;
569 #if CONFIG_MACF_NET
570 mac_error = mac_inpcb_label_init(inp, M_WAITOK);
571 if (mac_error != 0) {
572 if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0)
573 zfree(pcbinfo->ipi_zone, inp);
574 return (mac_error);
575 }
576 mac_inpcb_label_associate(so, inp);
577 #endif /* CONFIG_MACF_NET */
578 /* make sure inp_stat is always 64-bit aligned */
579 inp->inp_stat = (struct inp_stat *)P2ROUNDUP(inp->inp_stat_store,
580 sizeof (u_int64_t));
581 if (((uintptr_t)inp->inp_stat - (uintptr_t)inp->inp_stat_store) +
582 sizeof (*inp->inp_stat) > sizeof (inp->inp_stat_store)) {
583 panic("%s: insufficient space to align inp_stat", __func__);
584 /* NOTREACHED */
585 }
586
587 /* make sure inp_cstat is always 64-bit aligned */
588 inp->inp_cstat = (struct inp_stat *)P2ROUNDUP(inp->inp_cstat_store,
589 sizeof (u_int64_t));
590 if (((uintptr_t)inp->inp_cstat - (uintptr_t)inp->inp_cstat_store) +
591 sizeof (*inp->inp_cstat) > sizeof (inp->inp_cstat_store)) {
592 panic("%s: insufficient space to align inp_cstat", __func__);
593 /* NOTREACHED */
594 }
595
596 /* make sure inp_wstat is always 64-bit aligned */
597 inp->inp_wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_wstat_store,
598 sizeof (u_int64_t));
599 if (((uintptr_t)inp->inp_wstat - (uintptr_t)inp->inp_wstat_store) +
600 sizeof (*inp->inp_wstat) > sizeof (inp->inp_wstat_store)) {
601 panic("%s: insufficient space to align inp_wstat", __func__);
602 /* NOTREACHED */
603 }
604
605 /* make sure inp_Wstat is always 64-bit aligned */
606 inp->inp_Wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_Wstat_store,
607 sizeof (u_int64_t));
608 if (((uintptr_t)inp->inp_Wstat - (uintptr_t)inp->inp_Wstat_store) +
609 sizeof (*inp->inp_Wstat) > sizeof (inp->inp_Wstat_store)) {
610 panic("%s: insufficient space to align inp_Wstat", __func__);
611 /* NOTREACHED */
612 }
613
614 so->so_pcb = (caddr_t)inp;
615
616 if (so->so_proto->pr_flags & PR_PCBLOCK) {
617 lck_mtx_init(&inp->inpcb_mtx, pcbinfo->ipi_lock_grp,
618 pcbinfo->ipi_lock_attr);
619 }
620
621 #if INET6
622 if (SOCK_DOM(so) == PF_INET6 && !ip6_mapped_addr_on)
623 inp->inp_flags |= IN6P_IPV6_V6ONLY;
624
625 if (ip6_auto_flowlabel)
626 inp->inp_flags |= IN6P_AUTOFLOWLABEL;
627 #endif /* INET6 */
628 if (intcoproc_unrestricted)
629 inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED;
630
631 (void) inp_update_policy(inp);
632
633 lck_rw_lock_exclusive(pcbinfo->ipi_lock);
634 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
635 LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
636 pcbinfo->ipi_count++;
637 lck_rw_done(pcbinfo->ipi_lock);
638 return (0);
639 }
640
641 /*
642 * in_pcblookup_local_and_cleanup does everything
643 * in_pcblookup_local does but it checks for a socket
644 * that's going away. Since we know that the lock is
645 * held read+write when this funciton is called, we
646 * can safely dispose of this socket like the slow
647 * timer would usually do and return NULL. This is
648 * great for bind.
649 */
650 struct inpcb *
651 in_pcblookup_local_and_cleanup(struct inpcbinfo *pcbinfo, struct in_addr laddr,
652 u_int lport_arg, int wild_okay)
653 {
654 struct inpcb *inp;
655
656 /* Perform normal lookup */
657 inp = in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay);
658
659 /* Check if we found a match but it's waiting to be disposed */
660 if (inp != NULL && inp->inp_wantcnt == WNT_STOPUSING) {
661 struct socket *so = inp->inp_socket;
662
663 socket_lock(so, 0);
664
665 if (so->so_usecount == 0) {
666 if (inp->inp_state != INPCB_STATE_DEAD)
667 in_pcbdetach(inp);
668 in_pcbdispose(inp); /* will unlock & destroy */
669 inp = NULL;
670 } else {
671 socket_unlock(so, 0);
672 }
673 }
674
675 return (inp);
676 }
677
678 static void
679 in_pcb_conflict_post_msg(u_int16_t port)
680 {
681 /*
682 * Radar 5523020 send a kernel event notification if a
683 * non-participating socket tries to bind the port a socket
684 * who has set SOF_NOTIFYCONFLICT owns.
685 */
686 struct kev_msg ev_msg;
687 struct kev_in_portinuse in_portinuse;
688
689 bzero(&in_portinuse, sizeof (struct kev_in_portinuse));
690 bzero(&ev_msg, sizeof (struct kev_msg));
691 in_portinuse.port = ntohs(port); /* port in host order */
692 in_portinuse.req_pid = proc_selfpid();
693 ev_msg.vendor_code = KEV_VENDOR_APPLE;
694 ev_msg.kev_class = KEV_NETWORK_CLASS;
695 ev_msg.kev_subclass = KEV_INET_SUBCLASS;
696 ev_msg.event_code = KEV_INET_PORTINUSE;
697 ev_msg.dv[0].data_ptr = &in_portinuse;
698 ev_msg.dv[0].data_length = sizeof (struct kev_in_portinuse);
699 ev_msg.dv[1].data_length = 0;
700 dlil_post_complete_msg(NULL, &ev_msg);
701 }
702
703 /*
704 * Bind an INPCB to an address and/or port. This routine should not alter
705 * the caller-supplied local address "nam".
706 *
707 * Returns: 0 Success
708 * EADDRNOTAVAIL Address not available.
709 * EINVAL Invalid argument
710 * EAFNOSUPPORT Address family not supported [notdef]
711 * EACCES Permission denied
712 * EADDRINUSE Address in use
713 * EAGAIN Resource unavailable, try again
714 * priv_check_cred:EPERM Operation not permitted
715 */
716 int
717 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
718 {
719 struct socket *so = inp->inp_socket;
720 unsigned short *lastport;
721 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
722 u_short lport = 0, rand_port = 0;
723 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
724 int error, randomport, conflict = 0;
725 boolean_t anonport = FALSE;
726 kauth_cred_t cred;
727 struct in_addr laddr;
728 struct ifnet *outif = NULL;
729
730 if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */
731 return (EADDRNOTAVAIL);
732 if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT)))
733 wild = 1;
734
735 bzero(&laddr, sizeof(laddr));
736
737 socket_unlock(so, 0); /* keep reference on socket */
738 lck_rw_lock_exclusive(pcbinfo->ipi_lock);
739 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) {
740 /* another thread completed the bind */
741 lck_rw_done(pcbinfo->ipi_lock);
742 socket_lock(so, 0);
743 return (EINVAL);
744 }
745
746 if (nam != NULL) {
747 if (nam->sa_len != sizeof (struct sockaddr_in)) {
748 lck_rw_done(pcbinfo->ipi_lock);
749 socket_lock(so, 0);
750 return (EINVAL);
751 }
752 #if 0
753 /*
754 * We should check the family, but old programs
755 * incorrectly fail to initialize it.
756 */
757 if (nam->sa_family != AF_INET) {
758 lck_rw_done(pcbinfo->ipi_lock);
759 socket_lock(so, 0);
760 return (EAFNOSUPPORT);
761 }
762 #endif /* 0 */
763 lport = SIN(nam)->sin_port;
764
765 if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr))) {
766 /*
767 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
768 * allow complete duplication of binding if
769 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
770 * and a multicast address is bound on both
771 * new and duplicated sockets.
772 */
773 if (so->so_options & SO_REUSEADDR)
774 reuseport = SO_REUSEADDR|SO_REUSEPORT;
775 } else if (SIN(nam)->sin_addr.s_addr != INADDR_ANY) {
776 struct sockaddr_in sin;
777 struct ifaddr *ifa;
778
779 /* Sanitized for interface address searches */
780 bzero(&sin, sizeof (sin));
781 sin.sin_family = AF_INET;
782 sin.sin_len = sizeof (struct sockaddr_in);
783 sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
784
785 ifa = ifa_ifwithaddr(SA(&sin));
786 if (ifa == NULL) {
787 lck_rw_done(pcbinfo->ipi_lock);
788 socket_lock(so, 0);
789 return (EADDRNOTAVAIL);
790 } else {
791 /*
792 * Opportunistically determine the outbound
793 * interface that may be used; this may not
794 * hold true if we end up using a route
795 * going over a different interface, e.g.
796 * when sending to a local address. This
797 * will get updated again after sending.
798 */
799 IFA_LOCK(ifa);
800 outif = ifa->ifa_ifp;
801 IFA_UNLOCK(ifa);
802 IFA_REMREF(ifa);
803 }
804 }
805 if (lport != 0) {
806 struct inpcb *t;
807 uid_t u;
808
809 #if !CONFIG_EMBEDDED
810 if (ntohs(lport) < IPPORT_RESERVED &&
811 SIN(nam)->sin_addr.s_addr != 0) {
812 cred = kauth_cred_proc_ref(p);
813 error = priv_check_cred(cred,
814 PRIV_NETINET_RESERVEDPORT, 0);
815 kauth_cred_unref(&cred);
816 if (error != 0) {
817 lck_rw_done(pcbinfo->ipi_lock);
818 socket_lock(so, 0);
819 return (EACCES);
820 }
821 }
822 #endif /* !CONFIG_EMBEDDED */
823 if (!IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) &&
824 (u = kauth_cred_getuid(so->so_cred)) != 0 &&
825 (t = in_pcblookup_local_and_cleanup(
826 inp->inp_pcbinfo, SIN(nam)->sin_addr, lport,
827 INPLOOKUP_WILDCARD)) != NULL &&
828 (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
829 t->inp_laddr.s_addr != INADDR_ANY ||
830 !(t->inp_socket->so_options & SO_REUSEPORT)) &&
831 (u != kauth_cred_getuid(t->inp_socket->so_cred)) &&
832 !(t->inp_socket->so_flags & SOF_REUSESHAREUID) &&
833 (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
834 t->inp_laddr.s_addr != INADDR_ANY)) {
835 if ((t->inp_socket->so_flags &
836 SOF_NOTIFYCONFLICT) &&
837 !(so->so_flags & SOF_NOTIFYCONFLICT))
838 conflict = 1;
839
840 lck_rw_done(pcbinfo->ipi_lock);
841
842 if (conflict)
843 in_pcb_conflict_post_msg(lport);
844
845 socket_lock(so, 0);
846 return (EADDRINUSE);
847 }
848 t = in_pcblookup_local_and_cleanup(pcbinfo,
849 SIN(nam)->sin_addr, lport, wild);
850 if (t != NULL &&
851 (reuseport & t->inp_socket->so_options) == 0) {
852 #if INET6
853 if (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
854 t->inp_laddr.s_addr != INADDR_ANY ||
855 SOCK_DOM(so) != PF_INET6 ||
856 SOCK_DOM(t->inp_socket) != PF_INET6)
857 #endif /* INET6 */
858 {
859
860 if ((t->inp_socket->so_flags &
861 SOF_NOTIFYCONFLICT) &&
862 !(so->so_flags & SOF_NOTIFYCONFLICT))
863 conflict = 1;
864
865 lck_rw_done(pcbinfo->ipi_lock);
866
867 if (conflict)
868 in_pcb_conflict_post_msg(lport);
869 socket_lock(so, 0);
870 return (EADDRINUSE);
871 }
872 }
873 }
874 laddr = SIN(nam)->sin_addr;
875 }
876 if (lport == 0) {
877 u_short first, last;
878 int count;
879 bool found;
880
881 randomport = (so->so_flags & SOF_BINDRANDOMPORT) ||
882 (so->so_type == SOCK_STREAM ? tcp_use_randomport :
883 udp_use_randomport);
884
885 /*
886 * Even though this looks similar to the code in
887 * in6_pcbsetport, the v6 vs v4 checks are different.
888 */
889 anonport = TRUE;
890 if (inp->inp_flags & INP_HIGHPORT) {
891 first = ipport_hifirstauto; /* sysctl */
892 last = ipport_hilastauto;
893 lastport = &pcbinfo->ipi_lasthi;
894 } else if (inp->inp_flags & INP_LOWPORT) {
895 cred = kauth_cred_proc_ref(p);
896 error = priv_check_cred(cred,
897 PRIV_NETINET_RESERVEDPORT, 0);
898 kauth_cred_unref(&cred);
899 if (error != 0) {
900 lck_rw_done(pcbinfo->ipi_lock);
901 socket_lock(so, 0);
902 return (error);
903 }
904 first = ipport_lowfirstauto; /* 1023 */
905 last = ipport_lowlastauto; /* 600 */
906 lastport = &pcbinfo->ipi_lastlow;
907 } else {
908 first = ipport_firstauto; /* sysctl */
909 last = ipport_lastauto;
910 lastport = &pcbinfo->ipi_lastport;
911 }
912 /* No point in randomizing if only one port is available */
913
914 if (first == last)
915 randomport = 0;
916 /*
917 * Simple check to ensure all ports are not used up causing
918 * a deadlock here.
919 *
920 * We split the two cases (up and down) so that the direction
921 * is not being tested on each round of the loop.
922 */
923 if (first > last) {
924 struct in_addr lookup_addr;
925
926 /*
927 * counting down
928 */
929 if (randomport) {
930 read_frandom(&rand_port, sizeof (rand_port));
931 *lastport =
932 first - (rand_port % (first - last));
933 }
934 count = first - last;
935
936 lookup_addr = (laddr.s_addr != INADDR_ANY) ? laddr :
937 inp->inp_laddr;
938
939 found = false;
940 do {
941 if (count-- < 0) { /* completely used? */
942 lck_rw_done(pcbinfo->ipi_lock);
943 socket_lock(so, 0);
944 return (EADDRNOTAVAIL);
945 }
946 --*lastport;
947 if (*lastport > first || *lastport < last)
948 *lastport = first;
949 lport = htons(*lastport);
950
951 found = in_pcblookup_local_and_cleanup(pcbinfo,
952 lookup_addr, lport, wild) == NULL;
953 } while (!found);
954 } else {
955 struct in_addr lookup_addr;
956
957 /*
958 * counting up
959 */
960 if (randomport) {
961 read_frandom(&rand_port, sizeof (rand_port));
962 *lastport =
963 first + (rand_port % (first - last));
964 }
965 count = last - first;
966
967 lookup_addr = (laddr.s_addr != INADDR_ANY) ? laddr :
968 inp->inp_laddr;
969
970 found = false;
971 do {
972 if (count-- < 0) { /* completely used? */
973 lck_rw_done(pcbinfo->ipi_lock);
974 socket_lock(so, 0);
975 return (EADDRNOTAVAIL);
976 }
977 ++*lastport;
978 if (*lastport < first || *lastport > last)
979 *lastport = first;
980 lport = htons(*lastport);
981
982 found = in_pcblookup_local_and_cleanup(pcbinfo,
983 lookup_addr, lport, wild) == NULL;
984 } while (!found);
985 }
986 }
987 socket_lock(so, 0);
988
989 /*
990 * We unlocked socket's protocol lock for a long time.
991 * The socket might have been dropped/defuncted.
992 * Checking if world has changed since.
993 */
994 if (inp->inp_state == INPCB_STATE_DEAD) {
995 lck_rw_done(pcbinfo->ipi_lock);
996 return (ECONNABORTED);
997 }
998
999 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) {
1000 lck_rw_done(pcbinfo->ipi_lock);
1001 return (EINVAL);
1002 }
1003
1004 if (laddr.s_addr != INADDR_ANY) {
1005 inp->inp_laddr = laddr;
1006 inp->inp_last_outifp = outif;
1007 }
1008 inp->inp_lport = lport;
1009 if (anonport)
1010 inp->inp_flags |= INP_ANONPORT;
1011
1012 if (in_pcbinshash(inp, 1) != 0) {
1013 inp->inp_laddr.s_addr = INADDR_ANY;
1014 inp->inp_last_outifp = NULL;
1015
1016 inp->inp_lport = 0;
1017 if (anonport)
1018 inp->inp_flags &= ~INP_ANONPORT;
1019 lck_rw_done(pcbinfo->ipi_lock);
1020 return (EAGAIN);
1021 }
1022 lck_rw_done(pcbinfo->ipi_lock);
1023 sflt_notify(so, sock_evt_bound, NULL);
1024 return (0);
1025 }
1026
1027 #define APN_FALLBACK_IP_FILTER(a) \
1028 (IN_LINKLOCAL(ntohl((a)->sin_addr.s_addr)) || \
1029 IN_LOOPBACK(ntohl((a)->sin_addr.s_addr)) || \
1030 IN_ZERONET(ntohl((a)->sin_addr.s_addr)) || \
1031 IN_MULTICAST(ntohl((a)->sin_addr.s_addr)) || \
1032 IN_PRIVATE(ntohl((a)->sin_addr.s_addr)))
1033
1034 #define APN_FALLBACK_NOTIF_INTERVAL 2 /* Magic Number */
1035 static uint64_t last_apn_fallback = 0;
1036
1037 static boolean_t
1038 apn_fallback_required (proc_t proc, struct socket *so, struct sockaddr_in *p_dstv4)
1039 {
1040 uint64_t timenow;
1041 struct sockaddr_storage lookup_default_addr;
1042 struct rtentry *rt = NULL;
1043
1044 VERIFY(proc != NULL);
1045
1046 if (apn_fallbk_enabled == FALSE)
1047 return FALSE;
1048
1049 if (proc == kernproc)
1050 return FALSE;
1051
1052 if (so && (so->so_options & SO_NOAPNFALLBK))
1053 return FALSE;
1054
1055 timenow = net_uptime();
1056 if ((timenow - last_apn_fallback) < APN_FALLBACK_NOTIF_INTERVAL) {
1057 apn_fallbk_log((LOG_INFO, "APN fallback notification throttled.\n"));
1058 return FALSE;
1059 }
1060
1061 if (p_dstv4 && APN_FALLBACK_IP_FILTER(p_dstv4))
1062 return FALSE;
1063
1064 /* Check if we have unscoped IPv6 default route through cellular */
1065 bzero(&lookup_default_addr, sizeof(lookup_default_addr));
1066 lookup_default_addr.ss_family = AF_INET6;
1067 lookup_default_addr.ss_len = sizeof(struct sockaddr_in6);
1068
1069 rt = rtalloc1((struct sockaddr *)&lookup_default_addr, 0, 0);
1070 if (NULL == rt) {
1071 apn_fallbk_log((LOG_INFO, "APN fallback notification could not find "
1072 "unscoped default IPv6 route.\n"));
1073 return FALSE;
1074 }
1075
1076 if (!IFNET_IS_CELLULAR(rt->rt_ifp)) {
1077 rtfree(rt);
1078 apn_fallbk_log((LOG_INFO, "APN fallback notification could not find "
1079 "unscoped default IPv6 route through cellular interface.\n"));
1080 return FALSE;
1081 }
1082
1083 /*
1084 * We have a default IPv6 route, ensure that
1085 * we do not have IPv4 default route before triggering
1086 * the event
1087 */
1088 rtfree(rt);
1089 rt = NULL;
1090
1091 bzero(&lookup_default_addr, sizeof(lookup_default_addr));
1092 lookup_default_addr.ss_family = AF_INET;
1093 lookup_default_addr.ss_len = sizeof(struct sockaddr_in);
1094
1095 rt = rtalloc1((struct sockaddr *)&lookup_default_addr, 0, 0);
1096
1097 if (rt) {
1098 rtfree(rt);
1099 rt = NULL;
1100 apn_fallbk_log((LOG_INFO, "APN fallback notification found unscoped "
1101 "IPv4 default route!\n"));
1102 return FALSE;
1103 }
1104
1105 {
1106 /*
1107 * We disable APN fallback if the binary is not a third-party app.
1108 * Note that platform daemons use their process name as a
1109 * bundle ID so we filter out bundle IDs without dots.
1110 */
1111 const char *bundle_id = cs_identity_get(proc);
1112 if (bundle_id == NULL ||
1113 bundle_id[0] == '\0' ||
1114 strchr(bundle_id, '.') == NULL ||
1115 strncmp(bundle_id, "com.apple.", sizeof("com.apple.") - 1) == 0) {
1116 apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found first-"
1117 "party bundle ID \"%s\"!\n", (bundle_id ? bundle_id : "NULL")));
1118 return FALSE;
1119 }
1120 }
1121
1122 {
1123 /*
1124 * The Apple App Store IPv6 requirement started on
1125 * June 1st, 2016 at 12:00:00 AM PDT.
1126 * We disable APN fallback if the binary is more recent than that.
1127 * We check both atime and birthtime since birthtime is not always supported.
1128 */
1129 static const long ipv6_start_date = 1464764400L;
1130 vfs_context_t context;
1131 struct stat64 sb;
1132 int vn_stat_error;
1133
1134 bzero(&sb, sizeof(struct stat64));
1135 context = vfs_context_create(NULL);
1136 vn_stat_error = vn_stat(proc->p_textvp, &sb, NULL, 1, context);
1137 (void)vfs_context_rele(context);
1138
1139 if (vn_stat_error != 0 ||
1140 sb.st_atimespec.tv_sec >= ipv6_start_date ||
1141 sb.st_birthtimespec.tv_sec >= ipv6_start_date) {
1142 apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found binary "
1143 "too recent! (err %d atime %ld mtime %ld ctime %ld birthtime %ld)\n",
1144 vn_stat_error, sb.st_atimespec.tv_sec, sb.st_mtimespec.tv_sec,
1145 sb.st_ctimespec.tv_sec, sb.st_birthtimespec.tv_sec));
1146 return FALSE;
1147 }
1148 }
1149 return TRUE;
1150 }
1151
1152 static void
1153 apn_fallback_trigger(proc_t proc, struct socket *so)
1154 {
1155 pid_t pid = 0;
1156 struct kev_msg ev_msg;
1157 struct kev_netevent_apnfallbk_data apnfallbk_data;
1158
1159 last_apn_fallback = net_uptime();
1160 pid = proc_pid(proc);
1161 uuid_t application_uuid;
1162 uuid_clear(application_uuid);
1163 proc_getexecutableuuid(proc, application_uuid,
1164 sizeof(application_uuid));
1165
1166 bzero(&ev_msg, sizeof (struct kev_msg));
1167 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1168 ev_msg.kev_class = KEV_NETWORK_CLASS;
1169 ev_msg.kev_subclass = KEV_NETEVENT_SUBCLASS;
1170 ev_msg.event_code = KEV_NETEVENT_APNFALLBACK;
1171
1172 bzero(&apnfallbk_data, sizeof(apnfallbk_data));
1173
1174 if (so->so_flags & SOF_DELEGATED) {
1175 apnfallbk_data.epid = so->e_pid;
1176 uuid_copy(apnfallbk_data.euuid, so->e_uuid);
1177 } else {
1178 apnfallbk_data.epid = so->last_pid;
1179 uuid_copy(apnfallbk_data.euuid, so->last_uuid);
1180 }
1181
1182 ev_msg.dv[0].data_ptr = &apnfallbk_data;
1183 ev_msg.dv[0].data_length = sizeof(apnfallbk_data);
1184 kev_post_msg(&ev_msg);
1185 apn_fallbk_log((LOG_INFO, "APN fallback notification issued.\n"));
1186 }
1187
1188 /*
1189 * Transform old in_pcbconnect() into an inner subroutine for new
1190 * in_pcbconnect(); do some validity-checking on the remote address
1191 * (in "nam") and then determine local host address (i.e., which
1192 * interface) to use to access that remote host.
1193 *
1194 * This routine may alter the caller-supplied remote address "nam".
1195 *
1196 * The caller may override the bound-to-interface setting of the socket
1197 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
1198 *
1199 * This routine might return an ifp with a reference held if the caller
1200 * provides a non-NULL outif, even in the error case. The caller is
1201 * responsible for releasing its reference.
1202 *
1203 * Returns: 0 Success
1204 * EINVAL Invalid argument
1205 * EAFNOSUPPORT Address family not supported
1206 * EADDRNOTAVAIL Address not available
1207 */
1208 int
1209 in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr,
1210 unsigned int ifscope, struct ifnet **outif, int raw)
1211 {
1212 struct route *ro = &inp->inp_route;
1213 struct in_ifaddr *ia = NULL;
1214 struct sockaddr_in sin;
1215 int error = 0;
1216 boolean_t restricted = FALSE;
1217
1218 if (outif != NULL)
1219 *outif = NULL;
1220 if (nam->sa_len != sizeof (struct sockaddr_in))
1221 return (EINVAL);
1222 if (SIN(nam)->sin_family != AF_INET)
1223 return (EAFNOSUPPORT);
1224 if (raw == 0 && SIN(nam)->sin_port == 0)
1225 return (EADDRNOTAVAIL);
1226
1227 /*
1228 * If the destination address is INADDR_ANY,
1229 * use the primary local address.
1230 * If the supplied address is INADDR_BROADCAST,
1231 * and the primary interface supports broadcast,
1232 * choose the broadcast address for that interface.
1233 */
1234 if (raw == 0 && (SIN(nam)->sin_addr.s_addr == INADDR_ANY ||
1235 SIN(nam)->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST)) {
1236 lck_rw_lock_shared(in_ifaddr_rwlock);
1237 if (!TAILQ_EMPTY(&in_ifaddrhead)) {
1238 ia = TAILQ_FIRST(&in_ifaddrhead);
1239 IFA_LOCK_SPIN(&ia->ia_ifa);
1240 if (SIN(nam)->sin_addr.s_addr == INADDR_ANY) {
1241 SIN(nam)->sin_addr = IA_SIN(ia)->sin_addr;
1242 } else if (ia->ia_ifp->if_flags & IFF_BROADCAST) {
1243 SIN(nam)->sin_addr =
1244 SIN(&ia->ia_broadaddr)->sin_addr;
1245 }
1246 IFA_UNLOCK(&ia->ia_ifa);
1247 ia = NULL;
1248 }
1249 lck_rw_done(in_ifaddr_rwlock);
1250 }
1251 /*
1252 * Otherwise, if the socket has already bound the source, just use it.
1253 */
1254 if (inp->inp_laddr.s_addr != INADDR_ANY) {
1255 VERIFY(ia == NULL);
1256 *laddr = inp->inp_laddr;
1257 return (0);
1258 }
1259
1260 /*
1261 * If the ifscope is specified by the caller (e.g. IP_PKTINFO)
1262 * then it overrides the sticky ifscope set for the socket.
1263 */
1264 if (ifscope == IFSCOPE_NONE && (inp->inp_flags & INP_BOUND_IF))
1265 ifscope = inp->inp_boundifp->if_index;
1266
1267 /*
1268 * If route is known or can be allocated now,
1269 * our src addr is taken from the i/f, else punt.
1270 * Note that we should check the address family of the cached
1271 * destination, in case of sharing the cache with IPv6.
1272 */
1273 if (ro->ro_rt != NULL)
1274 RT_LOCK_SPIN(ro->ro_rt);
1275 if (ROUTE_UNUSABLE(ro) || ro->ro_dst.sa_family != AF_INET ||
1276 SIN(&ro->ro_dst)->sin_addr.s_addr != SIN(nam)->sin_addr.s_addr ||
1277 (inp->inp_socket->so_options & SO_DONTROUTE)) {
1278 if (ro->ro_rt != NULL)
1279 RT_UNLOCK(ro->ro_rt);
1280 ROUTE_RELEASE(ro);
1281 }
1282 if (!(inp->inp_socket->so_options & SO_DONTROUTE) &&
1283 (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL)) {
1284 if (ro->ro_rt != NULL)
1285 RT_UNLOCK(ro->ro_rt);
1286 ROUTE_RELEASE(ro);
1287 /* No route yet, so try to acquire one */
1288 bzero(&ro->ro_dst, sizeof (struct sockaddr_in));
1289 ro->ro_dst.sa_family = AF_INET;
1290 ro->ro_dst.sa_len = sizeof (struct sockaddr_in);
1291 SIN(&ro->ro_dst)->sin_addr = SIN(nam)->sin_addr;
1292 rtalloc_scoped(ro, ifscope);
1293 if (ro->ro_rt != NULL)
1294 RT_LOCK_SPIN(ro->ro_rt);
1295 }
1296 /* Sanitized local copy for interface address searches */
1297 bzero(&sin, sizeof (sin));
1298 sin.sin_family = AF_INET;
1299 sin.sin_len = sizeof (struct sockaddr_in);
1300 sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
1301 /*
1302 * If we did not find (or use) a route, assume dest is reachable
1303 * on a directly connected network and try to find a corresponding
1304 * interface to take the source address from.
1305 */
1306 if (ro->ro_rt == NULL) {
1307 proc_t proc = current_proc();
1308
1309 VERIFY(ia == NULL);
1310 ia = ifatoia(ifa_ifwithdstaddr(SA(&sin)));
1311 if (ia == NULL)
1312 ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope));
1313 error = ((ia == NULL) ? ENETUNREACH : 0);
1314
1315 if (apn_fallback_required(proc, inp->inp_socket,
1316 (void *)nam))
1317 apn_fallback_trigger(proc, inp->inp_socket);
1318
1319 goto done;
1320 }
1321 RT_LOCK_ASSERT_HELD(ro->ro_rt);
1322 /*
1323 * If the outgoing interface on the route found is not
1324 * a loopback interface, use the address from that interface.
1325 */
1326 if (!(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) {
1327 VERIFY(ia == NULL);
1328 /*
1329 * If the route points to a cellular interface and the
1330 * caller forbids our using interfaces of such type,
1331 * pretend that there is no route.
1332 * Apply the same logic for expensive interfaces.
1333 */
1334 if (inp_restricted_send(inp, ro->ro_rt->rt_ifp)) {
1335 RT_UNLOCK(ro->ro_rt);
1336 ROUTE_RELEASE(ro);
1337 error = EHOSTUNREACH;
1338 restricted = TRUE;
1339 } else {
1340 /* Become a regular mutex */
1341 RT_CONVERT_LOCK(ro->ro_rt);
1342 ia = ifatoia(ro->ro_rt->rt_ifa);
1343 IFA_ADDREF(&ia->ia_ifa);
1344
1345 /*
1346 * Mark the control block for notification of
1347 * a possible flow that might undergo clat46
1348 * translation.
1349 *
1350 * We defer the decision to a later point when
1351 * inpcb is being disposed off.
1352 * The reason is that we only want to send notification
1353 * if the flow was ever used to send data.
1354 */
1355 if (IS_INTF_CLAT46(ro->ro_rt->rt_ifp))
1356 inp->inp_flags2 |= INP2_CLAT46_FLOW;
1357
1358 RT_UNLOCK(ro->ro_rt);
1359 error = 0;
1360 }
1361 goto done;
1362 }
1363 VERIFY(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK);
1364 RT_UNLOCK(ro->ro_rt);
1365 /*
1366 * The outgoing interface is marked with 'loopback net', so a route
1367 * to ourselves is here.
1368 * Try to find the interface of the destination address and then
1369 * take the address from there. That interface is not necessarily
1370 * a loopback interface.
1371 */
1372 VERIFY(ia == NULL);
1373 ia = ifatoia(ifa_ifwithdstaddr(SA(&sin)));
1374 if (ia == NULL)
1375 ia = ifatoia(ifa_ifwithaddr_scoped(SA(&sin), ifscope));
1376 if (ia == NULL)
1377 ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope));
1378 if (ia == NULL) {
1379 RT_LOCK(ro->ro_rt);
1380 ia = ifatoia(ro->ro_rt->rt_ifa);
1381 if (ia != NULL)
1382 IFA_ADDREF(&ia->ia_ifa);
1383 RT_UNLOCK(ro->ro_rt);
1384 }
1385 error = ((ia == NULL) ? ENETUNREACH : 0);
1386
1387 done:
1388 /*
1389 * If the destination address is multicast and an outgoing
1390 * interface has been set as a multicast option, use the
1391 * address of that interface as our source address.
1392 */
1393 if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) &&
1394 inp->inp_moptions != NULL) {
1395 struct ip_moptions *imo;
1396 struct ifnet *ifp;
1397
1398 imo = inp->inp_moptions;
1399 IMO_LOCK(imo);
1400 if (imo->imo_multicast_ifp != NULL && (ia == NULL ||
1401 ia->ia_ifp != imo->imo_multicast_ifp)) {
1402 ifp = imo->imo_multicast_ifp;
1403 if (ia != NULL)
1404 IFA_REMREF(&ia->ia_ifa);
1405 lck_rw_lock_shared(in_ifaddr_rwlock);
1406 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
1407 if (ia->ia_ifp == ifp)
1408 break;
1409 }
1410 if (ia != NULL)
1411 IFA_ADDREF(&ia->ia_ifa);
1412 lck_rw_done(in_ifaddr_rwlock);
1413 if (ia == NULL)
1414 error = EADDRNOTAVAIL;
1415 else
1416 error = 0;
1417 }
1418 IMO_UNLOCK(imo);
1419 }
1420 /*
1421 * Don't do pcblookup call here; return interface in laddr
1422 * and exit to caller, that will do the lookup.
1423 */
1424 if (ia != NULL) {
1425 /*
1426 * If the source address belongs to a cellular interface
1427 * and the socket forbids our using interfaces of such
1428 * type, pretend that there is no source address.
1429 * Apply the same logic for expensive interfaces.
1430 */
1431 IFA_LOCK_SPIN(&ia->ia_ifa);
1432 if (inp_restricted_send(inp, ia->ia_ifa.ifa_ifp)) {
1433 IFA_UNLOCK(&ia->ia_ifa);
1434 error = EHOSTUNREACH;
1435 restricted = TRUE;
1436 } else if (error == 0) {
1437 *laddr = ia->ia_addr.sin_addr;
1438 if (outif != NULL) {
1439 struct ifnet *ifp;
1440
1441 if (ro->ro_rt != NULL)
1442 ifp = ro->ro_rt->rt_ifp;
1443 else
1444 ifp = ia->ia_ifp;
1445
1446 VERIFY(ifp != NULL);
1447 IFA_CONVERT_LOCK(&ia->ia_ifa);
1448 ifnet_reference(ifp); /* for caller */
1449 if (*outif != NULL)
1450 ifnet_release(*outif);
1451 *outif = ifp;
1452 }
1453 IFA_UNLOCK(&ia->ia_ifa);
1454 } else {
1455 IFA_UNLOCK(&ia->ia_ifa);
1456 }
1457 IFA_REMREF(&ia->ia_ifa);
1458 ia = NULL;
1459 }
1460
1461 if (restricted && error == EHOSTUNREACH) {
1462 soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED |
1463 SO_FILT_HINT_IFDENIED));
1464 }
1465
1466 return (error);
1467 }
1468
1469 /*
1470 * Outer subroutine:
1471 * Connect from a socket to a specified address.
1472 * Both address and port must be specified in argument sin.
1473 * If don't have a local address for this socket yet,
1474 * then pick one.
1475 *
1476 * The caller may override the bound-to-interface setting of the socket
1477 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
1478 */
1479 int
1480 in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p,
1481 unsigned int ifscope, struct ifnet **outif)
1482 {
1483 struct in_addr laddr;
1484 struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam;
1485 struct inpcb *pcb;
1486 int error;
1487 struct socket *so = inp->inp_socket;
1488
1489 #if CONTENT_FILTER
1490 if (so)
1491 so->so_state_change_cnt++;
1492 #endif
1493
1494 /*
1495 * Call inner routine, to assign local interface address.
1496 */
1497 if ((error = in_pcbladdr(inp, nam, &laddr, ifscope, outif, 0)) != 0)
1498 return (error);
1499
1500 socket_unlock(so, 0);
1501 pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
1502 inp->inp_laddr.s_addr ? inp->inp_laddr : laddr,
1503 inp->inp_lport, 0, NULL);
1504 socket_lock(so, 0);
1505
1506 /*
1507 * Check if the socket is still in a valid state. When we unlock this
1508 * embryonic socket, it can get aborted if another thread is closing
1509 * the listener (radar 7947600).
1510 */
1511 if ((so->so_flags & SOF_ABORTED) != 0)
1512 return (ECONNREFUSED);
1513
1514 if (pcb != NULL) {
1515 in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0);
1516 return (EADDRINUSE);
1517 }
1518 if (inp->inp_laddr.s_addr == INADDR_ANY) {
1519 if (inp->inp_lport == 0) {
1520 error = in_pcbbind(inp, NULL, p);
1521 if (error)
1522 return (error);
1523 }
1524 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
1525 /*
1526 * Lock inversion issue, mostly with udp
1527 * multicast packets.
1528 */
1529 socket_unlock(so, 0);
1530 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
1531 socket_lock(so, 0);
1532 }
1533 inp->inp_laddr = laddr;
1534 /* no reference needed */
1535 inp->inp_last_outifp = (outif != NULL) ? *outif : NULL;
1536 inp->inp_flags |= INP_INADDR_ANY;
1537 } else {
1538 /*
1539 * Usage of IP_PKTINFO, without local port already
1540 * speficified will cause kernel to panic,
1541 * see rdar://problem/18508185.
1542 * For now returning error to avoid a kernel panic
1543 * This routines can be refactored and handle this better
1544 * in future.
1545 */
1546 if (inp->inp_lport == 0)
1547 return (EINVAL);
1548 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
1549 /*
1550 * Lock inversion issue, mostly with udp
1551 * multicast packets.
1552 */
1553 socket_unlock(so, 0);
1554 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
1555 socket_lock(so, 0);
1556 }
1557 }
1558 inp->inp_faddr = sin->sin_addr;
1559 inp->inp_fport = sin->sin_port;
1560 if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP)
1561 nstat_pcb_invalidate_cache(inp);
1562 in_pcbrehash(inp);
1563 lck_rw_done(inp->inp_pcbinfo->ipi_lock);
1564 return (0);
1565 }
1566
1567 void
1568 in_pcbdisconnect(struct inpcb *inp)
1569 {
1570 struct socket *so = inp->inp_socket;
1571
1572 if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP)
1573 nstat_pcb_cache(inp);
1574
1575 inp->inp_faddr.s_addr = INADDR_ANY;
1576 inp->inp_fport = 0;
1577
1578 #if CONTENT_FILTER
1579 if (so)
1580 so->so_state_change_cnt++;
1581 #endif
1582
1583 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
1584 /* lock inversion issue, mostly with udp multicast packets */
1585 socket_unlock(so, 0);
1586 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
1587 socket_lock(so, 0);
1588 }
1589
1590 in_pcbrehash(inp);
1591 lck_rw_done(inp->inp_pcbinfo->ipi_lock);
1592 /*
1593 * A multipath subflow socket would have its SS_NOFDREF set by default,
1594 * so check for SOF_MP_SUBFLOW socket flag before detaching the PCB;
1595 * when the socket is closed for real, SOF_MP_SUBFLOW would be cleared.
1596 */
1597 if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF))
1598 in_pcbdetach(inp);
1599 }
1600
1601 void
1602 in_pcbdetach(struct inpcb *inp)
1603 {
1604 struct socket *so = inp->inp_socket;
1605
1606 if (so->so_pcb == NULL) {
1607 /* PCB has been disposed */
1608 panic("%s: inp=%p so=%p proto=%d so_pcb is null!\n", __func__,
1609 inp, so, SOCK_PROTO(so));
1610 /* NOTREACHED */
1611 }
1612
1613 #if IPSEC
1614 if (inp->inp_sp != NULL) {
1615 (void) ipsec4_delete_pcbpolicy(inp);
1616 }
1617 #endif /* IPSEC */
1618
1619 if (inp->inp_stat != NULL && SOCK_PROTO(so) == IPPROTO_UDP) {
1620 if (inp->inp_stat->rxpackets == 0 && inp->inp_stat->txpackets == 0) {
1621 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_inet_dgram_no_data);
1622 }
1623 }
1624
1625 /*
1626 * Let NetworkStatistics know this PCB is going away
1627 * before we detach it.
1628 */
1629 if (nstat_collect &&
1630 (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP))
1631 nstat_pcb_detach(inp);
1632
1633 /* Free memory buffer held for generating keep alives */
1634 if (inp->inp_keepalive_data != NULL) {
1635 FREE(inp->inp_keepalive_data, M_TEMP);
1636 inp->inp_keepalive_data = NULL;
1637 }
1638
1639 /* mark socket state as dead */
1640 if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) {
1641 panic("%s: so=%p proto=%d couldn't set to STOPUSING\n",
1642 __func__, so, SOCK_PROTO(so));
1643 /* NOTREACHED */
1644 }
1645
1646 if (!(so->so_flags & SOF_PCBCLEARING)) {
1647 struct ip_moptions *imo;
1648
1649 inp->inp_vflag = 0;
1650 if (inp->inp_options != NULL) {
1651 (void) m_free(inp->inp_options);
1652 inp->inp_options = NULL;
1653 }
1654 ROUTE_RELEASE(&inp->inp_route);
1655 imo = inp->inp_moptions;
1656 inp->inp_moptions = NULL;
1657 sofreelastref(so, 0);
1658 inp->inp_state = INPCB_STATE_DEAD;
1659
1660 /*
1661 * Enqueue an event to send kernel event notification
1662 * if the flow has to CLAT46 for data packets
1663 */
1664 if (inp->inp_flags2 & INP2_CLAT46_FLOW) {
1665 /*
1666 * If there has been any exchange of data bytes
1667 * over this flow.
1668 * Schedule a notification to report that flow is
1669 * using client side translation.
1670 */
1671 if (inp->inp_stat != NULL &&
1672 (inp->inp_stat->txbytes != 0 ||
1673 inp->inp_stat->rxbytes !=0)) {
1674 if (so->so_flags & SOF_DELEGATED) {
1675 in6_clat46_event_enqueue_nwk_wq_entry(
1676 IN6_CLAT46_EVENT_V4_FLOW,
1677 so->e_pid,
1678 so->e_uuid);
1679 } else {
1680 in6_clat46_event_enqueue_nwk_wq_entry(
1681 IN6_CLAT46_EVENT_V4_FLOW,
1682 so->last_pid,
1683 so->last_uuid);
1684 }
1685 }
1686 }
1687
1688 /* makes sure we're not called twice from so_close */
1689 so->so_flags |= SOF_PCBCLEARING;
1690
1691 inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST);
1692
1693 /*
1694 * See inp_join_group() for why we need to unlock
1695 */
1696 if (imo != NULL) {
1697 socket_unlock(so, 0);
1698 IMO_REMREF(imo);
1699 socket_lock(so, 0);
1700 }
1701 }
1702 }
1703
1704
1705 void
1706 in_pcbdispose(struct inpcb *inp)
1707 {
1708 struct socket *so = inp->inp_socket;
1709 struct inpcbinfo *ipi = inp->inp_pcbinfo;
1710
1711 if (so != NULL && so->so_usecount != 0) {
1712 panic("%s: so %p [%d,%d] usecount %d lockhistory %s\n",
1713 __func__, so, SOCK_DOM(so), SOCK_TYPE(so), so->so_usecount,
1714 solockhistory_nr(so));
1715 /* NOTREACHED */
1716 } else if (inp->inp_wantcnt != WNT_STOPUSING) {
1717 if (so != NULL) {
1718 panic_plain("%s: inp %p invalid wantcnt %d, so %p "
1719 "[%d,%d] usecount %d retaincnt %d state 0x%x "
1720 "flags 0x%x lockhistory %s\n", __func__, inp,
1721 inp->inp_wantcnt, so, SOCK_DOM(so), SOCK_TYPE(so),
1722 so->so_usecount, so->so_retaincnt, so->so_state,
1723 so->so_flags, solockhistory_nr(so));
1724 /* NOTREACHED */
1725 } else {
1726 panic("%s: inp %p invalid wantcnt %d no socket\n",
1727 __func__, inp, inp->inp_wantcnt);
1728 /* NOTREACHED */
1729 }
1730 }
1731
1732 LCK_RW_ASSERT(ipi->ipi_lock, LCK_RW_ASSERT_EXCLUSIVE);
1733
1734 inp->inp_gencnt = ++ipi->ipi_gencnt;
1735 /* access ipi in in_pcbremlists */
1736 in_pcbremlists(inp);
1737
1738 if (so != NULL) {
1739 if (so->so_proto->pr_flags & PR_PCBLOCK) {
1740 sofreelastref(so, 0);
1741 if (so->so_rcv.sb_cc > 0 || so->so_snd.sb_cc > 0) {
1742 /*
1743 * selthreadclear() already called
1744 * during sofreelastref() above.
1745 */
1746 sbrelease(&so->so_rcv);
1747 sbrelease(&so->so_snd);
1748 }
1749 if (so->so_head != NULL) {
1750 panic("%s: so=%p head still exist\n",
1751 __func__, so);
1752 /* NOTREACHED */
1753 }
1754 lck_mtx_unlock(&inp->inpcb_mtx);
1755
1756 #if NECP
1757 necp_inpcb_remove_cb(inp);
1758 #endif /* NECP */
1759
1760 lck_mtx_destroy(&inp->inpcb_mtx, ipi->ipi_lock_grp);
1761 }
1762 /* makes sure we're not called twice from so_close */
1763 so->so_flags |= SOF_PCBCLEARING;
1764 so->so_saved_pcb = (caddr_t)inp;
1765 so->so_pcb = NULL;
1766 inp->inp_socket = NULL;
1767 #if CONFIG_MACF_NET
1768 mac_inpcb_label_destroy(inp);
1769 #endif /* CONFIG_MACF_NET */
1770 #if NECP
1771 necp_inpcb_dispose(inp);
1772 #endif /* NECP */
1773 /*
1774 * In case there a route cached after a detach (possible
1775 * in the tcp case), make sure that it is freed before
1776 * we deallocate the structure.
1777 */
1778 ROUTE_RELEASE(&inp->inp_route);
1779 if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) {
1780 zfree(ipi->ipi_zone, inp);
1781 }
1782 sodealloc(so);
1783 }
1784 }
1785
1786 /*
1787 * The calling convention of in_getsockaddr() and in_getpeeraddr() was
1788 * modified to match the pru_sockaddr() and pru_peeraddr() entry points
1789 * in struct pr_usrreqs, so that protocols can just reference then directly
1790 * without the need for a wrapper function.
1791 */
1792 int
1793 in_getsockaddr(struct socket *so, struct sockaddr **nam)
1794 {
1795 struct inpcb *inp;
1796 struct sockaddr_in *sin;
1797
1798 /*
1799 * Do the malloc first in case it blocks.
1800 */
1801 MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK);
1802 if (sin == NULL)
1803 return (ENOBUFS);
1804 bzero(sin, sizeof (*sin));
1805 sin->sin_family = AF_INET;
1806 sin->sin_len = sizeof (*sin);
1807
1808 if ((inp = sotoinpcb(so)) == NULL) {
1809 FREE(sin, M_SONAME);
1810 return (EINVAL);
1811 }
1812 sin->sin_port = inp->inp_lport;
1813 sin->sin_addr = inp->inp_laddr;
1814
1815 *nam = (struct sockaddr *)sin;
1816 return (0);
1817 }
1818
1819 int
1820 in_getsockaddr_s(struct socket *so, struct sockaddr_in *ss)
1821 {
1822 struct sockaddr_in *sin = ss;
1823 struct inpcb *inp;
1824
1825 VERIFY(ss != NULL);
1826 bzero(ss, sizeof (*ss));
1827
1828 sin->sin_family = AF_INET;
1829 sin->sin_len = sizeof (*sin);
1830
1831 if ((inp = sotoinpcb(so)) == NULL)
1832 return (EINVAL);
1833
1834 sin->sin_port = inp->inp_lport;
1835 sin->sin_addr = inp->inp_laddr;
1836 return (0);
1837 }
1838
1839 int
1840 in_getpeeraddr(struct socket *so, struct sockaddr **nam)
1841 {
1842 struct inpcb *inp;
1843 struct sockaddr_in *sin;
1844
1845 /*
1846 * Do the malloc first in case it blocks.
1847 */
1848 MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK);
1849 if (sin == NULL)
1850 return (ENOBUFS);
1851 bzero((caddr_t)sin, sizeof (*sin));
1852 sin->sin_family = AF_INET;
1853 sin->sin_len = sizeof (*sin);
1854
1855 if ((inp = sotoinpcb(so)) == NULL) {
1856 FREE(sin, M_SONAME);
1857 return (EINVAL);
1858 }
1859 sin->sin_port = inp->inp_fport;
1860 sin->sin_addr = inp->inp_faddr;
1861
1862 *nam = (struct sockaddr *)sin;
1863 return (0);
1864 }
1865
1866 void
1867 in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr,
1868 int errno, void (*notify)(struct inpcb *, int))
1869 {
1870 struct inpcb *inp;
1871
1872 lck_rw_lock_shared(pcbinfo->ipi_lock);
1873
1874 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1875 #if INET6
1876 if (!(inp->inp_vflag & INP_IPV4))
1877 continue;
1878 #endif /* INET6 */
1879 if (inp->inp_faddr.s_addr != faddr.s_addr ||
1880 inp->inp_socket == NULL)
1881 continue;
1882 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING)
1883 continue;
1884 socket_lock(inp->inp_socket, 1);
1885 (*notify)(inp, errno);
1886 (void) in_pcb_checkstate(inp, WNT_RELEASE, 1);
1887 socket_unlock(inp->inp_socket, 1);
1888 }
1889 lck_rw_done(pcbinfo->ipi_lock);
1890 }
1891
1892 /*
1893 * Check for alternatives when higher level complains
1894 * about service problems. For now, invalidate cached
1895 * routing information. If the route was created dynamically
1896 * (by a redirect), time to try a default gateway again.
1897 */
1898 void
1899 in_losing(struct inpcb *inp)
1900 {
1901 boolean_t release = FALSE;
1902 struct rtentry *rt;
1903
1904 if ((rt = inp->inp_route.ro_rt) != NULL) {
1905 struct in_ifaddr *ia = NULL;
1906
1907 RT_LOCK(rt);
1908 if (rt->rt_flags & RTF_DYNAMIC) {
1909 /*
1910 * Prevent another thread from modifying rt_key,
1911 * rt_gateway via rt_setgate() after rt_lock is
1912 * dropped by marking the route as defunct.
1913 */
1914 rt->rt_flags |= RTF_CONDEMNED;
1915 RT_UNLOCK(rt);
1916 (void) rtrequest(RTM_DELETE, rt_key(rt),
1917 rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL);
1918 } else {
1919 RT_UNLOCK(rt);
1920 }
1921 /* if the address is gone keep the old route in the pcb */
1922 if (inp->inp_laddr.s_addr != INADDR_ANY &&
1923 (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
1924 /*
1925 * Address is around; ditch the route. A new route
1926 * can be allocated the next time output is attempted.
1927 */
1928 release = TRUE;
1929 }
1930 if (ia != NULL)
1931 IFA_REMREF(&ia->ia_ifa);
1932 }
1933 if (rt == NULL || release)
1934 ROUTE_RELEASE(&inp->inp_route);
1935 }
1936
1937 /*
1938 * After a routing change, flush old routing
1939 * and allocate a (hopefully) better one.
1940 */
1941 void
1942 in_rtchange(struct inpcb *inp, int errno)
1943 {
1944 #pragma unused(errno)
1945 boolean_t release = FALSE;
1946 struct rtentry *rt;
1947
1948 if ((rt = inp->inp_route.ro_rt) != NULL) {
1949 struct in_ifaddr *ia = NULL;
1950
1951 /* if address is gone, keep the old route */
1952 if (inp->inp_laddr.s_addr != INADDR_ANY &&
1953 (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
1954 /*
1955 * Address is around; ditch the route. A new route
1956 * can be allocated the next time output is attempted.
1957 */
1958 release = TRUE;
1959 }
1960 if (ia != NULL)
1961 IFA_REMREF(&ia->ia_ifa);
1962 }
1963 if (rt == NULL || release)
1964 ROUTE_RELEASE(&inp->inp_route);
1965 }
1966
1967 /*
1968 * Lookup a PCB based on the local address and port.
1969 */
1970 struct inpcb *
1971 in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
1972 unsigned int lport_arg, int wild_okay)
1973 {
1974 struct inpcb *inp;
1975 int matchwild = 3, wildcard;
1976 u_short lport = lport_arg;
1977
1978 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0, 0, 0, 0, 0);
1979
1980 if (!wild_okay) {
1981 struct inpcbhead *head;
1982 /*
1983 * Look for an unconnected (wildcard foreign addr) PCB that
1984 * matches the local address and port we're looking for.
1985 */
1986 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
1987 pcbinfo->ipi_hashmask)];
1988 LIST_FOREACH(inp, head, inp_hash) {
1989 #if INET6
1990 if (!(inp->inp_vflag & INP_IPV4))
1991 continue;
1992 #endif /* INET6 */
1993 if (inp->inp_faddr.s_addr == INADDR_ANY &&
1994 inp->inp_laddr.s_addr == laddr.s_addr &&
1995 inp->inp_lport == lport) {
1996 /*
1997 * Found.
1998 */
1999 return (inp);
2000 }
2001 }
2002 /*
2003 * Not found.
2004 */
2005 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0, 0, 0, 0, 0);
2006 return (NULL);
2007 } else {
2008 struct inpcbporthead *porthash;
2009 struct inpcbport *phd;
2010 struct inpcb *match = NULL;
2011 /*
2012 * Best fit PCB lookup.
2013 *
2014 * First see if this local port is in use by looking on the
2015 * port hash list.
2016 */
2017 porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
2018 pcbinfo->ipi_porthashmask)];
2019 LIST_FOREACH(phd, porthash, phd_hash) {
2020 if (phd->phd_port == lport)
2021 break;
2022 }
2023 if (phd != NULL) {
2024 /*
2025 * Port is in use by one or more PCBs. Look for best
2026 * fit.
2027 */
2028 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
2029 wildcard = 0;
2030 #if INET6
2031 if (!(inp->inp_vflag & INP_IPV4))
2032 continue;
2033 #endif /* INET6 */
2034 if (inp->inp_faddr.s_addr != INADDR_ANY)
2035 wildcard++;
2036 if (inp->inp_laddr.s_addr != INADDR_ANY) {
2037 if (laddr.s_addr == INADDR_ANY)
2038 wildcard++;
2039 else if (inp->inp_laddr.s_addr !=
2040 laddr.s_addr)
2041 continue;
2042 } else {
2043 if (laddr.s_addr != INADDR_ANY)
2044 wildcard++;
2045 }
2046 if (wildcard < matchwild) {
2047 match = inp;
2048 matchwild = wildcard;
2049 if (matchwild == 0) {
2050 break;
2051 }
2052 }
2053 }
2054 }
2055 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match,
2056 0, 0, 0, 0);
2057 return (match);
2058 }
2059 }
2060
2061 /*
2062 * Check if PCB exists in hash list.
2063 */
2064 int
2065 in_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2066 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
2067 uid_t *uid, gid_t *gid, struct ifnet *ifp)
2068 {
2069 struct inpcbhead *head;
2070 struct inpcb *inp;
2071 u_short fport = fport_arg, lport = lport_arg;
2072 int found = 0;
2073 struct inpcb *local_wild = NULL;
2074 #if INET6
2075 struct inpcb *local_wild_mapped = NULL;
2076 #endif /* INET6 */
2077
2078 *uid = UID_MAX;
2079 *gid = GID_MAX;
2080
2081 /*
2082 * We may have found the pcb in the last lookup - check this first.
2083 */
2084
2085 lck_rw_lock_shared(pcbinfo->ipi_lock);
2086
2087 /*
2088 * First look for an exact match.
2089 */
2090 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
2091 pcbinfo->ipi_hashmask)];
2092 LIST_FOREACH(inp, head, inp_hash) {
2093 #if INET6
2094 if (!(inp->inp_vflag & INP_IPV4))
2095 continue;
2096 #endif /* INET6 */
2097 if (inp_restricted_recv(inp, ifp))
2098 continue;
2099
2100 if (inp->inp_faddr.s_addr == faddr.s_addr &&
2101 inp->inp_laddr.s_addr == laddr.s_addr &&
2102 inp->inp_fport == fport &&
2103 inp->inp_lport == lport) {
2104 if ((found = (inp->inp_socket != NULL))) {
2105 /*
2106 * Found.
2107 */
2108 *uid = kauth_cred_getuid(
2109 inp->inp_socket->so_cred);
2110 *gid = kauth_cred_getgid(
2111 inp->inp_socket->so_cred);
2112 }
2113 lck_rw_done(pcbinfo->ipi_lock);
2114 return (found);
2115 }
2116 }
2117
2118 if (!wildcard) {
2119 /*
2120 * Not found.
2121 */
2122 lck_rw_done(pcbinfo->ipi_lock);
2123 return (0);
2124 }
2125
2126 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
2127 pcbinfo->ipi_hashmask)];
2128 LIST_FOREACH(inp, head, inp_hash) {
2129 #if INET6
2130 if (!(inp->inp_vflag & INP_IPV4))
2131 continue;
2132 #endif /* INET6 */
2133 if (inp_restricted_recv(inp, ifp))
2134 continue;
2135
2136 if (inp->inp_faddr.s_addr == INADDR_ANY &&
2137 inp->inp_lport == lport) {
2138 if (inp->inp_laddr.s_addr == laddr.s_addr) {
2139 if ((found = (inp->inp_socket != NULL))) {
2140 *uid = kauth_cred_getuid(
2141 inp->inp_socket->so_cred);
2142 *gid = kauth_cred_getgid(
2143 inp->inp_socket->so_cred);
2144 }
2145 lck_rw_done(pcbinfo->ipi_lock);
2146 return (found);
2147 } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
2148 #if INET6
2149 if (inp->inp_socket &&
2150 SOCK_CHECK_DOM(inp->inp_socket, PF_INET6))
2151 local_wild_mapped = inp;
2152 else
2153 #endif /* INET6 */
2154 local_wild = inp;
2155 }
2156 }
2157 }
2158 if (local_wild == NULL) {
2159 #if INET6
2160 if (local_wild_mapped != NULL) {
2161 if ((found = (local_wild_mapped->inp_socket != NULL))) {
2162 *uid = kauth_cred_getuid(
2163 local_wild_mapped->inp_socket->so_cred);
2164 *gid = kauth_cred_getgid(
2165 local_wild_mapped->inp_socket->so_cred);
2166 }
2167 lck_rw_done(pcbinfo->ipi_lock);
2168 return (found);
2169 }
2170 #endif /* INET6 */
2171 lck_rw_done(pcbinfo->ipi_lock);
2172 return (0);
2173 }
2174 if ((found = (local_wild->inp_socket != NULL))) {
2175 *uid = kauth_cred_getuid(
2176 local_wild->inp_socket->so_cred);
2177 *gid = kauth_cred_getgid(
2178 local_wild->inp_socket->so_cred);
2179 }
2180 lck_rw_done(pcbinfo->ipi_lock);
2181 return (found);
2182 }
2183
2184 /*
2185 * Lookup PCB in hash list.
2186 */
2187 struct inpcb *
2188 in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2189 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
2190 struct ifnet *ifp)
2191 {
2192 struct inpcbhead *head;
2193 struct inpcb *inp;
2194 u_short fport = fport_arg, lport = lport_arg;
2195 struct inpcb *local_wild = NULL;
2196 #if INET6
2197 struct inpcb *local_wild_mapped = NULL;
2198 #endif /* INET6 */
2199
2200 /*
2201 * We may have found the pcb in the last lookup - check this first.
2202 */
2203
2204 lck_rw_lock_shared(pcbinfo->ipi_lock);
2205
2206 /*
2207 * First look for an exact match.
2208 */
2209 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
2210 pcbinfo->ipi_hashmask)];
2211 LIST_FOREACH(inp, head, inp_hash) {
2212 #if INET6
2213 if (!(inp->inp_vflag & INP_IPV4))
2214 continue;
2215 #endif /* INET6 */
2216 if (inp_restricted_recv(inp, ifp))
2217 continue;
2218
2219 if (inp->inp_faddr.s_addr == faddr.s_addr &&
2220 inp->inp_laddr.s_addr == laddr.s_addr &&
2221 inp->inp_fport == fport &&
2222 inp->inp_lport == lport) {
2223 /*
2224 * Found.
2225 */
2226 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
2227 WNT_STOPUSING) {
2228 lck_rw_done(pcbinfo->ipi_lock);
2229 return (inp);
2230 } else {
2231 /* it's there but dead, say it isn't found */
2232 lck_rw_done(pcbinfo->ipi_lock);
2233 return (NULL);
2234 }
2235 }
2236 }
2237
2238 if (!wildcard) {
2239 /*
2240 * Not found.
2241 */
2242 lck_rw_done(pcbinfo->ipi_lock);
2243 return (NULL);
2244 }
2245
2246 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
2247 pcbinfo->ipi_hashmask)];
2248 LIST_FOREACH(inp, head, inp_hash) {
2249 #if INET6
2250 if (!(inp->inp_vflag & INP_IPV4))
2251 continue;
2252 #endif /* INET6 */
2253 if (inp_restricted_recv(inp, ifp))
2254 continue;
2255
2256 if (inp->inp_faddr.s_addr == INADDR_ANY &&
2257 inp->inp_lport == lport) {
2258 if (inp->inp_laddr.s_addr == laddr.s_addr) {
2259 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
2260 WNT_STOPUSING) {
2261 lck_rw_done(pcbinfo->ipi_lock);
2262 return (inp);
2263 } else {
2264 /* it's dead; say it isn't found */
2265 lck_rw_done(pcbinfo->ipi_lock);
2266 return (NULL);
2267 }
2268 } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
2269 #if INET6
2270 if (SOCK_CHECK_DOM(inp->inp_socket, PF_INET6))
2271 local_wild_mapped = inp;
2272 else
2273 #endif /* INET6 */
2274 local_wild = inp;
2275 }
2276 }
2277 }
2278 if (local_wild == NULL) {
2279 #if INET6
2280 if (local_wild_mapped != NULL) {
2281 if (in_pcb_checkstate(local_wild_mapped,
2282 WNT_ACQUIRE, 0) != WNT_STOPUSING) {
2283 lck_rw_done(pcbinfo->ipi_lock);
2284 return (local_wild_mapped);
2285 } else {
2286 /* it's dead; say it isn't found */
2287 lck_rw_done(pcbinfo->ipi_lock);
2288 return (NULL);
2289 }
2290 }
2291 #endif /* INET6 */
2292 lck_rw_done(pcbinfo->ipi_lock);
2293 return (NULL);
2294 }
2295 if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
2296 lck_rw_done(pcbinfo->ipi_lock);
2297 return (local_wild);
2298 }
2299 /*
2300 * It's either not found or is already dead.
2301 */
2302 lck_rw_done(pcbinfo->ipi_lock);
2303 return (NULL);
2304 }
2305
2306 /*
2307 * @brief Insert PCB onto various hash lists.
2308 *
2309 * @param inp Pointer to internet protocol control block
2310 * @param locked Implies if ipi_lock (protecting pcb list)
2311 * is already locked or not.
2312 *
2313 * @return int error on failure and 0 on success
2314 */
2315 int
2316 in_pcbinshash(struct inpcb *inp, int locked)
2317 {
2318 struct inpcbhead *pcbhash;
2319 struct inpcbporthead *pcbporthash;
2320 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
2321 struct inpcbport *phd;
2322 u_int32_t hashkey_faddr;
2323
2324 if (!locked) {
2325 if (!lck_rw_try_lock_exclusive(pcbinfo->ipi_lock)) {
2326 /*
2327 * Lock inversion issue, mostly with udp
2328 * multicast packets
2329 */
2330 socket_unlock(inp->inp_socket, 0);
2331 lck_rw_lock_exclusive(pcbinfo->ipi_lock);
2332 socket_lock(inp->inp_socket, 0);
2333 }
2334 }
2335
2336 /*
2337 * This routine or its caller may have given up
2338 * socket's protocol lock briefly.
2339 * During that time the socket may have been dropped.
2340 * Safe-guarding against that.
2341 */
2342 if (inp->inp_state == INPCB_STATE_DEAD) {
2343 if (!locked) {
2344 lck_rw_done(pcbinfo->ipi_lock);
2345 }
2346 return (ECONNABORTED);
2347 }
2348
2349
2350 #if INET6
2351 if (inp->inp_vflag & INP_IPV6)
2352 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
2353 else
2354 #endif /* INET6 */
2355 hashkey_faddr = inp->inp_faddr.s_addr;
2356
2357 inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
2358 inp->inp_fport, pcbinfo->ipi_hashmask);
2359
2360 pcbhash = &pcbinfo->ipi_hashbase[inp->inp_hash_element];
2361
2362 pcbporthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(inp->inp_lport,
2363 pcbinfo->ipi_porthashmask)];
2364
2365 /*
2366 * Go through port list and look for a head for this lport.
2367 */
2368 LIST_FOREACH(phd, pcbporthash, phd_hash) {
2369 if (phd->phd_port == inp->inp_lport)
2370 break;
2371 }
2372
2373 /*
2374 * If none exists, malloc one and tack it on.
2375 */
2376 if (phd == NULL) {
2377 MALLOC(phd, struct inpcbport *, sizeof (struct inpcbport),
2378 M_PCB, M_WAITOK);
2379 if (phd == NULL) {
2380 if (!locked)
2381 lck_rw_done(pcbinfo->ipi_lock);
2382 return (ENOBUFS); /* XXX */
2383 }
2384 phd->phd_port = inp->inp_lport;
2385 LIST_INIT(&phd->phd_pcblist);
2386 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
2387 }
2388
2389 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2390
2391
2392 inp->inp_phd = phd;
2393 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
2394 LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
2395 inp->inp_flags2 |= INP2_INHASHLIST;
2396
2397 if (!locked)
2398 lck_rw_done(pcbinfo->ipi_lock);
2399
2400 #if NECP
2401 // This call catches the original setting of the local address
2402 inp_update_necp_policy(inp, NULL, NULL, 0);
2403 #endif /* NECP */
2404
2405 return (0);
2406 }
2407
2408 /*
2409 * Move PCB to the proper hash bucket when { faddr, fport } have been
2410 * changed. NOTE: This does not handle the case of the lport changing (the
2411 * hashed port list would have to be updated as well), so the lport must
2412 * not change after in_pcbinshash() has been called.
2413 */
2414 void
2415 in_pcbrehash(struct inpcb *inp)
2416 {
2417 struct inpcbhead *head;
2418 u_int32_t hashkey_faddr;
2419
2420 #if INET6
2421 if (inp->inp_vflag & INP_IPV6)
2422 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
2423 else
2424 #endif /* INET6 */
2425 hashkey_faddr = inp->inp_faddr.s_addr;
2426
2427 inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
2428 inp->inp_fport, inp->inp_pcbinfo->ipi_hashmask);
2429 head = &inp->inp_pcbinfo->ipi_hashbase[inp->inp_hash_element];
2430
2431 if (inp->inp_flags2 & INP2_INHASHLIST) {
2432 LIST_REMOVE(inp, inp_hash);
2433 inp->inp_flags2 &= ~INP2_INHASHLIST;
2434 }
2435
2436 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2437 LIST_INSERT_HEAD(head, inp, inp_hash);
2438 inp->inp_flags2 |= INP2_INHASHLIST;
2439
2440 #if NECP
2441 // This call catches updates to the remote addresses
2442 inp_update_necp_policy(inp, NULL, NULL, 0);
2443 #endif /* NECP */
2444 }
2445
2446 /*
2447 * Remove PCB from various lists.
2448 * Must be called pcbinfo lock is held in exclusive mode.
2449 */
2450 void
2451 in_pcbremlists(struct inpcb *inp)
2452 {
2453 inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
2454
2455 /*
2456 * Check if it's in hashlist -- an inp is placed in hashlist when
2457 * it's local port gets assigned. So it should also be present
2458 * in the port list.
2459 */
2460 if (inp->inp_flags2 & INP2_INHASHLIST) {
2461 struct inpcbport *phd = inp->inp_phd;
2462
2463 VERIFY(phd != NULL && inp->inp_lport > 0);
2464
2465 LIST_REMOVE(inp, inp_hash);
2466 inp->inp_hash.le_next = NULL;
2467 inp->inp_hash.le_prev = NULL;
2468
2469 LIST_REMOVE(inp, inp_portlist);
2470 inp->inp_portlist.le_next = NULL;
2471 inp->inp_portlist.le_prev = NULL;
2472 if (LIST_EMPTY(&phd->phd_pcblist)) {
2473 LIST_REMOVE(phd, phd_hash);
2474 FREE(phd, M_PCB);
2475 }
2476 inp->inp_phd = NULL;
2477 inp->inp_flags2 &= ~INP2_INHASHLIST;
2478 }
2479 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2480
2481 if (inp->inp_flags2 & INP2_TIMEWAIT) {
2482 /* Remove from time-wait queue */
2483 tcp_remove_from_time_wait(inp);
2484 inp->inp_flags2 &= ~INP2_TIMEWAIT;
2485 VERIFY(inp->inp_pcbinfo->ipi_twcount != 0);
2486 inp->inp_pcbinfo->ipi_twcount--;
2487 } else {
2488 /* Remove from global inp list if it is not time-wait */
2489 LIST_REMOVE(inp, inp_list);
2490 }
2491
2492 if (inp->inp_flags2 & INP2_IN_FCTREE) {
2493 inp_fc_getinp(inp->inp_flowhash, (INPFC_SOLOCKED|INPFC_REMOVE));
2494 VERIFY(!(inp->inp_flags2 & INP2_IN_FCTREE));
2495 }
2496
2497 inp->inp_pcbinfo->ipi_count--;
2498 }
2499
2500 /*
2501 * Mechanism used to defer the memory release of PCBs
2502 * The pcb list will contain the pcb until the reaper can clean it up if
2503 * the following conditions are met:
2504 * 1) state "DEAD",
2505 * 2) wantcnt is STOPUSING
2506 * 3) usecount is 0
2507 * This function will be called to either mark the pcb as
2508 */
2509 int
2510 in_pcb_checkstate(struct inpcb *pcb, int mode, int locked)
2511 {
2512 volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt;
2513 UInt32 origwant;
2514 UInt32 newwant;
2515
2516 switch (mode) {
2517 case WNT_STOPUSING:
2518 /*
2519 * Try to mark the pcb as ready for recycling. CAS with
2520 * STOPUSING, if success we're good, if it's in use, will
2521 * be marked later
2522 */
2523 if (locked == 0)
2524 socket_lock(pcb->inp_socket, 1);
2525 pcb->inp_state = INPCB_STATE_DEAD;
2526
2527 stopusing:
2528 if (pcb->inp_socket->so_usecount < 0) {
2529 panic("%s: pcb=%p so=%p usecount is negative\n",
2530 __func__, pcb, pcb->inp_socket);
2531 /* NOTREACHED */
2532 }
2533 if (locked == 0)
2534 socket_unlock(pcb->inp_socket, 1);
2535
2536 inpcb_gc_sched(pcb->inp_pcbinfo, INPCB_TIMER_FAST);
2537
2538 origwant = *wantcnt;
2539 if ((UInt16) origwant == 0xffff) /* should stop using */
2540 return (WNT_STOPUSING);
2541 newwant = 0xffff;
2542 if ((UInt16) origwant == 0) {
2543 /* try to mark it as unsuable now */
2544 OSCompareAndSwap(origwant, newwant, wantcnt);
2545 }
2546 return (WNT_STOPUSING);
2547
2548 case WNT_ACQUIRE:
2549 /*
2550 * Try to increase reference to pcb. If WNT_STOPUSING
2551 * should bail out. If socket state DEAD, try to set count
2552 * to STOPUSING, return failed otherwise increase cnt.
2553 */
2554 do {
2555 origwant = *wantcnt;
2556 if ((UInt16) origwant == 0xffff) {
2557 /* should stop using */
2558 return (WNT_STOPUSING);
2559 }
2560 newwant = origwant + 1;
2561 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
2562 return (WNT_ACQUIRE);
2563
2564 case WNT_RELEASE:
2565 /*
2566 * Release reference. If result is null and pcb state
2567 * is DEAD, set wanted bit to STOPUSING
2568 */
2569 if (locked == 0)
2570 socket_lock(pcb->inp_socket, 1);
2571
2572 do {
2573 origwant = *wantcnt;
2574 if ((UInt16) origwant == 0x0) {
2575 panic("%s: pcb=%p release with zero count",
2576 __func__, pcb);
2577 /* NOTREACHED */
2578 }
2579 if ((UInt16) origwant == 0xffff) {
2580 /* should stop using */
2581 if (locked == 0)
2582 socket_unlock(pcb->inp_socket, 1);
2583 return (WNT_STOPUSING);
2584 }
2585 newwant = origwant - 1;
2586 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
2587
2588 if (pcb->inp_state == INPCB_STATE_DEAD)
2589 goto stopusing;
2590 if (pcb->inp_socket->so_usecount < 0) {
2591 panic("%s: RELEASE pcb=%p so=%p usecount is negative\n",
2592 __func__, pcb, pcb->inp_socket);
2593 /* NOTREACHED */
2594 }
2595
2596 if (locked == 0)
2597 socket_unlock(pcb->inp_socket, 1);
2598 return (WNT_RELEASE);
2599
2600 default:
2601 panic("%s: so=%p not a valid state =%x\n", __func__,
2602 pcb->inp_socket, mode);
2603 /* NOTREACHED */
2604 }
2605
2606 /* NOTREACHED */
2607 return (mode);
2608 }
2609
2610 /*
2611 * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat.
2612 * The inpcb_compat data structure is passed to user space and must
2613 * not change. We intentionally avoid copying pointers.
2614 */
2615 void
2616 inpcb_to_compat(struct inpcb *inp, struct inpcb_compat *inp_compat)
2617 {
2618 bzero(inp_compat, sizeof (*inp_compat));
2619 inp_compat->inp_fport = inp->inp_fport;
2620 inp_compat->inp_lport = inp->inp_lport;
2621 inp_compat->nat_owner = 0;
2622 inp_compat->nat_cookie = 0;
2623 inp_compat->inp_gencnt = inp->inp_gencnt;
2624 inp_compat->inp_flags = inp->inp_flags;
2625 inp_compat->inp_flow = inp->inp_flow;
2626 inp_compat->inp_vflag = inp->inp_vflag;
2627 inp_compat->inp_ip_ttl = inp->inp_ip_ttl;
2628 inp_compat->inp_ip_p = inp->inp_ip_p;
2629 inp_compat->inp_dependfaddr.inp6_foreign =
2630 inp->inp_dependfaddr.inp6_foreign;
2631 inp_compat->inp_dependladdr.inp6_local =
2632 inp->inp_dependladdr.inp6_local;
2633 inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
2634 inp_compat->inp_depend6.inp6_hlim = 0;
2635 inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
2636 inp_compat->inp_depend6.inp6_ifindex = 0;
2637 inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
2638 }
2639
2640 #if !CONFIG_EMBEDDED
2641 void
2642 inpcb_to_xinpcb64(struct inpcb *inp, struct xinpcb64 *xinp)
2643 {
2644 xinp->inp_fport = inp->inp_fport;
2645 xinp->inp_lport = inp->inp_lport;
2646 xinp->inp_gencnt = inp->inp_gencnt;
2647 xinp->inp_flags = inp->inp_flags;
2648 xinp->inp_flow = inp->inp_flow;
2649 xinp->inp_vflag = inp->inp_vflag;
2650 xinp->inp_ip_ttl = inp->inp_ip_ttl;
2651 xinp->inp_ip_p = inp->inp_ip_p;
2652 xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign;
2653 xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local;
2654 xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
2655 xinp->inp_depend6.inp6_hlim = 0;
2656 xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
2657 xinp->inp_depend6.inp6_ifindex = 0;
2658 xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
2659 }
2660 #endif /* !CONFIG_EMBEDDED */
2661
2662 /*
2663 * The following routines implement this scheme:
2664 *
2665 * Callers of ip_output() that intend to cache the route in the inpcb pass
2666 * a local copy of the struct route to ip_output(). Using a local copy of
2667 * the cached route significantly simplifies things as IP no longer has to
2668 * worry about having exclusive access to the passed in struct route, since
2669 * it's defined in the caller's stack; in essence, this allows for a lock-
2670 * less operation when updating the struct route at the IP level and below,
2671 * whenever necessary. The scheme works as follows:
2672 *
2673 * Prior to dropping the socket's lock and calling ip_output(), the caller
2674 * copies the struct route from the inpcb into its stack, and adds a reference
2675 * to the cached route entry, if there was any. The socket's lock is then
2676 * dropped and ip_output() is called with a pointer to the copy of struct
2677 * route defined on the stack (not to the one in the inpcb.)
2678 *
2679 * Upon returning from ip_output(), the caller then acquires the socket's
2680 * lock and synchronizes the cache; if there is no route cached in the inpcb,
2681 * it copies the local copy of struct route (which may or may not contain any
2682 * route) back into the cache; otherwise, if the inpcb has a route cached in
2683 * it, the one in the local copy will be freed, if there's any. Trashing the
2684 * cached route in the inpcb can be avoided because ip_output() is single-
2685 * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized
2686 * by the socket/transport layer.)
2687 */
2688 void
2689 inp_route_copyout(struct inpcb *inp, struct route *dst)
2690 {
2691 struct route *src = &inp->inp_route;
2692
2693 socket_lock_assert_owned(inp->inp_socket);
2694
2695 /*
2696 * If the route in the PCB is stale or not for IPv4, blow it away;
2697 * this is possible in the case of IPv4-mapped address case.
2698 */
2699 if (ROUTE_UNUSABLE(src) || rt_key(src->ro_rt)->sa_family != AF_INET)
2700 ROUTE_RELEASE(src);
2701
2702 route_copyout(dst, src, sizeof (*dst));
2703 }
2704
2705 void
2706 inp_route_copyin(struct inpcb *inp, struct route *src)
2707 {
2708 struct route *dst = &inp->inp_route;
2709
2710 socket_lock_assert_owned(inp->inp_socket);
2711
2712 /* Minor sanity check */
2713 if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET)
2714 panic("%s: wrong or corrupted route: %p", __func__, src);
2715
2716 route_copyin(src, dst, sizeof (*src));
2717 }
2718
2719 /*
2720 * Handler for setting IP_BOUND_IF/IPV6_BOUND_IF socket option.
2721 */
2722 int
2723 inp_bindif(struct inpcb *inp, unsigned int ifscope, struct ifnet **pifp)
2724 {
2725 struct ifnet *ifp = NULL;
2726
2727 ifnet_head_lock_shared();
2728 if ((ifscope > (unsigned)if_index) || (ifscope != IFSCOPE_NONE &&
2729 (ifp = ifindex2ifnet[ifscope]) == NULL)) {
2730 ifnet_head_done();
2731 return (ENXIO);
2732 }
2733 ifnet_head_done();
2734
2735 VERIFY(ifp != NULL || ifscope == IFSCOPE_NONE);
2736
2737 /*
2738 * A zero interface scope value indicates an "unbind".
2739 * Otherwise, take in whatever value the app desires;
2740 * the app may already know the scope (or force itself
2741 * to such a scope) ahead of time before the interface
2742 * gets attached. It doesn't matter either way; any
2743 * route lookup from this point on will require an
2744 * exact match for the embedded interface scope.
2745 */
2746 inp->inp_boundifp = ifp;
2747 if (inp->inp_boundifp == NULL)
2748 inp->inp_flags &= ~INP_BOUND_IF;
2749 else
2750 inp->inp_flags |= INP_BOUND_IF;
2751
2752 /* Blow away any cached route in the PCB */
2753 ROUTE_RELEASE(&inp->inp_route);
2754
2755 if (pifp != NULL)
2756 *pifp = ifp;
2757
2758 return (0);
2759 }
2760
2761 /*
2762 * Handler for setting IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
2763 * as well as for setting PROC_UUID_NO_CELLULAR policy.
2764 */
2765 void
2766 inp_set_nocellular(struct inpcb *inp)
2767 {
2768 inp->inp_flags |= INP_NO_IFT_CELLULAR;
2769
2770 /* Blow away any cached route in the PCB */
2771 ROUTE_RELEASE(&inp->inp_route);
2772 }
2773
2774 /*
2775 * Handler for clearing IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
2776 * as well as for clearing PROC_UUID_NO_CELLULAR policy.
2777 */
2778 void
2779 inp_clear_nocellular(struct inpcb *inp)
2780 {
2781 struct socket *so = inp->inp_socket;
2782
2783 /*
2784 * SO_RESTRICT_DENY_CELLULAR socket restriction issued on the socket
2785 * has a higher precendence than INP_NO_IFT_CELLULAR. Clear the flag
2786 * if and only if the socket is unrestricted.
2787 */
2788 if (so != NULL && !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) {
2789 inp->inp_flags &= ~INP_NO_IFT_CELLULAR;
2790
2791 /* Blow away any cached route in the PCB */
2792 ROUTE_RELEASE(&inp->inp_route);
2793 }
2794 }
2795
2796 void
2797 inp_set_noexpensive(struct inpcb *inp)
2798 {
2799 inp->inp_flags2 |= INP2_NO_IFF_EXPENSIVE;
2800
2801 /* Blow away any cached route in the PCB */
2802 ROUTE_RELEASE(&inp->inp_route);
2803 }
2804
2805 void
2806 inp_set_awdl_unrestricted(struct inpcb *inp)
2807 {
2808 inp->inp_flags2 |= INP2_AWDL_UNRESTRICTED;
2809
2810 /* Blow away any cached route in the PCB */
2811 ROUTE_RELEASE(&inp->inp_route);
2812 }
2813
2814 boolean_t
2815 inp_get_awdl_unrestricted(struct inpcb *inp)
2816 {
2817 return (inp->inp_flags2 & INP2_AWDL_UNRESTRICTED) ? TRUE : FALSE;
2818 }
2819
2820 void
2821 inp_clear_awdl_unrestricted(struct inpcb *inp)
2822 {
2823 inp->inp_flags2 &= ~INP2_AWDL_UNRESTRICTED;
2824
2825 /* Blow away any cached route in the PCB */
2826 ROUTE_RELEASE(&inp->inp_route);
2827 }
2828
2829 void
2830 inp_set_intcoproc_allowed(struct inpcb *inp)
2831 {
2832 inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED;
2833
2834 /* Blow away any cached route in the PCB */
2835 ROUTE_RELEASE(&inp->inp_route);
2836 }
2837
2838 boolean_t
2839 inp_get_intcoproc_allowed(struct inpcb *inp)
2840 {
2841 return (inp->inp_flags2 & INP2_INTCOPROC_ALLOWED) ? TRUE : FALSE;
2842 }
2843
2844 void
2845 inp_clear_intcoproc_allowed(struct inpcb *inp)
2846 {
2847 inp->inp_flags2 &= ~INP2_INTCOPROC_ALLOWED;
2848
2849 /* Blow away any cached route in the PCB */
2850 ROUTE_RELEASE(&inp->inp_route);
2851 }
2852
2853 #if NECP
2854 /*
2855 * Called when PROC_UUID_NECP_APP_POLICY is set.
2856 */
2857 void
2858 inp_set_want_app_policy(struct inpcb *inp)
2859 {
2860 inp->inp_flags2 |= INP2_WANT_APP_POLICY;
2861 }
2862
2863 /*
2864 * Called when PROC_UUID_NECP_APP_POLICY is cleared.
2865 */
2866 void
2867 inp_clear_want_app_policy(struct inpcb *inp)
2868 {
2869 inp->inp_flags2 &= ~INP2_WANT_APP_POLICY;
2870 }
2871 #endif /* NECP */
2872
2873 /*
2874 * Calculate flow hash for an inp, used by an interface to identify a
2875 * flow. When an interface provides flow control advisory, this flow
2876 * hash is used as an identifier.
2877 */
2878 u_int32_t
2879 inp_calc_flowhash(struct inpcb *inp)
2880 {
2881 struct inp_flowhash_key fh __attribute__((aligned(8)));
2882 u_int32_t flowhash = 0;
2883 struct inpcb *tmp_inp = NULL;
2884
2885 if (inp_hash_seed == 0)
2886 inp_hash_seed = RandomULong();
2887
2888 bzero(&fh, sizeof (fh));
2889
2890 bcopy(&inp->inp_dependladdr, &fh.infh_laddr, sizeof (fh.infh_laddr));
2891 bcopy(&inp->inp_dependfaddr, &fh.infh_faddr, sizeof (fh.infh_faddr));
2892
2893 fh.infh_lport = inp->inp_lport;
2894 fh.infh_fport = inp->inp_fport;
2895 fh.infh_af = (inp->inp_vflag & INP_IPV6) ? AF_INET6 : AF_INET;
2896 fh.infh_proto = inp->inp_ip_p;
2897 fh.infh_rand1 = RandomULong();
2898 fh.infh_rand2 = RandomULong();
2899
2900 try_again:
2901 flowhash = net_flowhash(&fh, sizeof (fh), inp_hash_seed);
2902 if (flowhash == 0) {
2903 /* try to get a non-zero flowhash */
2904 inp_hash_seed = RandomULong();
2905 goto try_again;
2906 }
2907
2908 inp->inp_flowhash = flowhash;
2909
2910 /* Insert the inp into inp_fc_tree */
2911 lck_mtx_lock_spin(&inp_fc_lck);
2912 tmp_inp = RB_FIND(inp_fc_tree, &inp_fc_tree, inp);
2913 if (tmp_inp != NULL) {
2914 /*
2915 * There is a different inp with the same flowhash.
2916 * There can be a collision on flow hash but the
2917 * probability is low. Let's recompute the
2918 * flowhash.
2919 */
2920 lck_mtx_unlock(&inp_fc_lck);
2921 /* recompute hash seed */
2922 inp_hash_seed = RandomULong();
2923 goto try_again;
2924 }
2925
2926 RB_INSERT(inp_fc_tree, &inp_fc_tree, inp);
2927 inp->inp_flags2 |= INP2_IN_FCTREE;
2928 lck_mtx_unlock(&inp_fc_lck);
2929
2930 return (flowhash);
2931 }
2932
2933 void
2934 inp_flowadv(uint32_t flowhash)
2935 {
2936 struct inpcb *inp;
2937
2938 inp = inp_fc_getinp(flowhash, 0);
2939
2940 if (inp == NULL)
2941 return;
2942 inp_fc_feedback(inp);
2943 }
2944
2945 /*
2946 * Function to compare inp_fc_entries in inp flow control tree
2947 */
2948 static inline int
2949 infc_cmp(const struct inpcb *inp1, const struct inpcb *inp2)
2950 {
2951 return (memcmp(&(inp1->inp_flowhash), &(inp2->inp_flowhash),
2952 sizeof(inp1->inp_flowhash)));
2953 }
2954
2955 static struct inpcb *
2956 inp_fc_getinp(u_int32_t flowhash, u_int32_t flags)
2957 {
2958 struct inpcb *inp = NULL;
2959 int locked = (flags & INPFC_SOLOCKED) ? 1 : 0;
2960
2961 lck_mtx_lock_spin(&inp_fc_lck);
2962 key_inp.inp_flowhash = flowhash;
2963 inp = RB_FIND(inp_fc_tree, &inp_fc_tree, &key_inp);
2964 if (inp == NULL) {
2965 /* inp is not present, return */
2966 lck_mtx_unlock(&inp_fc_lck);
2967 return (NULL);
2968 }
2969
2970 if (flags & INPFC_REMOVE) {
2971 RB_REMOVE(inp_fc_tree, &inp_fc_tree, inp);
2972 lck_mtx_unlock(&inp_fc_lck);
2973
2974 bzero(&(inp->infc_link), sizeof (inp->infc_link));
2975 inp->inp_flags2 &= ~INP2_IN_FCTREE;
2976 return (NULL);
2977 }
2978
2979 if (in_pcb_checkstate(inp, WNT_ACQUIRE, locked) == WNT_STOPUSING)
2980 inp = NULL;
2981 lck_mtx_unlock(&inp_fc_lck);
2982
2983 return (inp);
2984 }
2985
2986 static void
2987 inp_fc_feedback(struct inpcb *inp)
2988 {
2989 struct socket *so = inp->inp_socket;
2990
2991 /* we already hold a want_cnt on this inp, socket can't be null */
2992 VERIFY(so != NULL);
2993 socket_lock(so, 1);
2994
2995 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
2996 socket_unlock(so, 1);
2997 return;
2998 }
2999
3000 if (inp->inp_sndinprog_cnt > 0)
3001 inp->inp_flags |= INP_FC_FEEDBACK;
3002
3003 /*
3004 * Return if the connection is not in flow-controlled state.
3005 * This can happen if the connection experienced
3006 * loss while it was in flow controlled state
3007 */
3008 if (!INP_WAIT_FOR_IF_FEEDBACK(inp)) {
3009 socket_unlock(so, 1);
3010 return;
3011 }
3012 inp_reset_fc_state(inp);
3013
3014 if (SOCK_TYPE(so) == SOCK_STREAM)
3015 inp_fc_unthrottle_tcp(inp);
3016
3017 socket_unlock(so, 1);
3018 }
3019
3020 void
3021 inp_reset_fc_state(struct inpcb *inp)
3022 {
3023 struct socket *so = inp->inp_socket;
3024 int suspended = (INP_IS_FLOW_SUSPENDED(inp)) ? 1 : 0;
3025 int needwakeup = (INP_WAIT_FOR_IF_FEEDBACK(inp)) ? 1 : 0;
3026
3027 inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
3028
3029 if (suspended) {
3030 so->so_flags &= ~(SOF_SUSPENDED);
3031 soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_RESUME));
3032 }
3033
3034 /* Give a write wakeup to unblock the socket */
3035 if (needwakeup)
3036 sowwakeup(so);
3037 }
3038
3039 int
3040 inp_set_fc_state(struct inpcb *inp, int advcode)
3041 {
3042 struct inpcb *tmp_inp = NULL;
3043 /*
3044 * If there was a feedback from the interface when
3045 * send operation was in progress, we should ignore
3046 * this flow advisory to avoid a race between setting
3047 * flow controlled state and receiving feedback from
3048 * the interface
3049 */
3050 if (inp->inp_flags & INP_FC_FEEDBACK)
3051 return (0);
3052
3053 inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
3054 if ((tmp_inp = inp_fc_getinp(inp->inp_flowhash,
3055 INPFC_SOLOCKED)) != NULL) {
3056 if (in_pcb_checkstate(tmp_inp, WNT_RELEASE, 1) == WNT_STOPUSING)
3057 return (0);
3058 VERIFY(tmp_inp == inp);
3059 switch (advcode) {
3060 case FADV_FLOW_CONTROLLED:
3061 inp->inp_flags |= INP_FLOW_CONTROLLED;
3062 break;
3063 case FADV_SUSPENDED:
3064 inp->inp_flags |= INP_FLOW_SUSPENDED;
3065 soevent(inp->inp_socket,
3066 (SO_FILT_HINT_LOCKED | SO_FILT_HINT_SUSPEND));
3067
3068 /* Record the fact that suspend event was sent */
3069 inp->inp_socket->so_flags |= SOF_SUSPENDED;
3070 break;
3071 }
3072 return (1);
3073 }
3074 return (0);
3075 }
3076
3077 /*
3078 * Handler for SO_FLUSH socket option.
3079 */
3080 int
3081 inp_flush(struct inpcb *inp, int optval)
3082 {
3083 u_int32_t flowhash = inp->inp_flowhash;
3084 struct ifnet *rtifp, *oifp;
3085
3086 /* Either all classes or one of the valid ones */
3087 if (optval != SO_TC_ALL && !SO_VALID_TC(optval))
3088 return (EINVAL);
3089
3090 /* We need a flow hash for identification */
3091 if (flowhash == 0)
3092 return (0);
3093
3094 /* Grab the interfaces from the route and pcb */
3095 rtifp = ((inp->inp_route.ro_rt != NULL) ?
3096 inp->inp_route.ro_rt->rt_ifp : NULL);
3097 oifp = inp->inp_last_outifp;
3098
3099 if (rtifp != NULL)
3100 if_qflush_sc(rtifp, so_tc2msc(optval), flowhash, NULL, NULL, 0);
3101 if (oifp != NULL && oifp != rtifp)
3102 if_qflush_sc(oifp, so_tc2msc(optval), flowhash, NULL, NULL, 0);
3103
3104 return (0);
3105 }
3106
3107 /*
3108 * Clear the INP_INADDR_ANY flag (special case for PPP only)
3109 */
3110 void
3111 inp_clear_INP_INADDR_ANY(struct socket *so)
3112 {
3113 struct inpcb *inp = NULL;
3114
3115 socket_lock(so, 1);
3116 inp = sotoinpcb(so);
3117 if (inp) {
3118 inp->inp_flags &= ~INP_INADDR_ANY;
3119 }
3120 socket_unlock(so, 1);
3121 }
3122
3123 void
3124 inp_get_soprocinfo(struct inpcb *inp, struct so_procinfo *soprocinfo)
3125 {
3126 struct socket *so = inp->inp_socket;
3127
3128 soprocinfo->spi_pid = so->last_pid;
3129 if (so->last_pid != 0)
3130 uuid_copy(soprocinfo->spi_uuid, so->last_uuid);
3131 /*
3132 * When not delegated, the effective pid is the same as the real pid
3133 */
3134 if (so->so_flags & SOF_DELEGATED) {
3135 soprocinfo->spi_delegated = 1;
3136 soprocinfo->spi_epid = so->e_pid;
3137 uuid_copy(soprocinfo->spi_euuid, so->e_uuid);
3138 } else {
3139 soprocinfo->spi_delegated = 0;
3140 soprocinfo->spi_epid = so->last_pid;
3141 }
3142 }
3143
3144 int
3145 inp_findinpcb_procinfo(struct inpcbinfo *pcbinfo, uint32_t flowhash,
3146 struct so_procinfo *soprocinfo)
3147 {
3148 struct inpcb *inp = NULL;
3149 int found = 0;
3150
3151 bzero(soprocinfo, sizeof (struct so_procinfo));
3152
3153 if (!flowhash)
3154 return (-1);
3155
3156 lck_rw_lock_shared(pcbinfo->ipi_lock);
3157 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
3158 if (inp->inp_state != INPCB_STATE_DEAD &&
3159 inp->inp_socket != NULL &&
3160 inp->inp_flowhash == flowhash) {
3161 found = 1;
3162 inp_get_soprocinfo(inp, soprocinfo);
3163 break;
3164 }
3165 }
3166 lck_rw_done(pcbinfo->ipi_lock);
3167
3168 return (found);
3169 }
3170
3171 #if CONFIG_PROC_UUID_POLICY
3172 static void
3173 inp_update_cellular_policy(struct inpcb *inp, boolean_t set)
3174 {
3175 struct socket *so = inp->inp_socket;
3176 int before, after;
3177
3178 VERIFY(so != NULL);
3179 VERIFY(inp->inp_state != INPCB_STATE_DEAD);
3180
3181 before = INP_NO_CELLULAR(inp);
3182 if (set) {
3183 inp_set_nocellular(inp);
3184 } else {
3185 inp_clear_nocellular(inp);
3186 }
3187 after = INP_NO_CELLULAR(inp);
3188 if (net_io_policy_log && (before != after)) {
3189 static const char *ok = "OK";
3190 static const char *nok = "NOACCESS";
3191 uuid_string_t euuid_buf;
3192 pid_t epid;
3193
3194 if (so->so_flags & SOF_DELEGATED) {
3195 uuid_unparse(so->e_uuid, euuid_buf);
3196 epid = so->e_pid;
3197 } else {
3198 uuid_unparse(so->last_uuid, euuid_buf);
3199 epid = so->last_pid;
3200 }
3201
3202 /* allow this socket to generate another notification event */
3203 so->so_ifdenied_notifies = 0;
3204
3205 log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d "
3206 "euuid %s%s %s->%s\n", __func__,
3207 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
3208 SOCK_TYPE(so), epid, euuid_buf,
3209 (so->so_flags & SOF_DELEGATED) ?
3210 " [delegated]" : "",
3211 ((before < after) ? ok : nok),
3212 ((before < after) ? nok : ok));
3213 }
3214 }
3215
3216 #if NECP
3217 static void
3218 inp_update_necp_want_app_policy(struct inpcb *inp, boolean_t set)
3219 {
3220 struct socket *so = inp->inp_socket;
3221 int before, after;
3222
3223 VERIFY(so != NULL);
3224 VERIFY(inp->inp_state != INPCB_STATE_DEAD);
3225
3226 before = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
3227 if (set) {
3228 inp_set_want_app_policy(inp);
3229 } else {
3230 inp_clear_want_app_policy(inp);
3231 }
3232 after = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
3233 if (net_io_policy_log && (before != after)) {
3234 static const char *wanted = "WANTED";
3235 static const char *unwanted = "UNWANTED";
3236 uuid_string_t euuid_buf;
3237 pid_t epid;
3238
3239 if (so->so_flags & SOF_DELEGATED) {
3240 uuid_unparse(so->e_uuid, euuid_buf);
3241 epid = so->e_pid;
3242 } else {
3243 uuid_unparse(so->last_uuid, euuid_buf);
3244 epid = so->last_pid;
3245 }
3246
3247 log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d "
3248 "euuid %s%s %s->%s\n", __func__,
3249 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
3250 SOCK_TYPE(so), epid, euuid_buf,
3251 (so->so_flags & SOF_DELEGATED) ?
3252 " [delegated]" : "",
3253 ((before < after) ? unwanted : wanted),
3254 ((before < after) ? wanted : unwanted));
3255 }
3256 }
3257 #endif /* NECP */
3258 #endif /* !CONFIG_PROC_UUID_POLICY */
3259
3260 #if NECP
3261 void
3262 inp_update_necp_policy(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int override_bound_interface)
3263 {
3264 necp_socket_find_policy_match(inp, override_local_addr, override_remote_addr, override_bound_interface);
3265 if (necp_socket_should_rescope(inp) &&
3266 inp->inp_lport == 0 &&
3267 inp->inp_laddr.s_addr == INADDR_ANY &&
3268 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
3269 // If we should rescope, and the socket is not yet bound
3270 inp_bindif(inp, necp_socket_get_rescope_if_index(inp), NULL);
3271 }
3272 }
3273 #endif /* NECP */
3274
3275 int
3276 inp_update_policy(struct inpcb *inp)
3277 {
3278 #if CONFIG_PROC_UUID_POLICY
3279 struct socket *so = inp->inp_socket;
3280 uint32_t pflags = 0;
3281 int32_t ogencnt;
3282 int err = 0;
3283
3284 if (!net_io_policy_uuid ||
3285 so == NULL || inp->inp_state == INPCB_STATE_DEAD)
3286 return (0);
3287
3288 /*
3289 * Kernel-created sockets that aren't delegating other sockets
3290 * are currently exempted from UUID policy checks.
3291 */
3292 if (so->last_pid == 0 && !(so->so_flags & SOF_DELEGATED))
3293 return (0);
3294
3295 ogencnt = so->so_policy_gencnt;
3296 err = proc_uuid_policy_lookup(((so->so_flags & SOF_DELEGATED) ?
3297 so->e_uuid : so->last_uuid), &pflags, &so->so_policy_gencnt);
3298
3299 /*
3300 * Discard cached generation count if the entry is gone (ENOENT),
3301 * so that we go thru the checks below.
3302 */
3303 if (err == ENOENT && ogencnt != 0)
3304 so->so_policy_gencnt = 0;
3305
3306 /*
3307 * If the generation count has changed, inspect the policy flags
3308 * and act accordingly. If a policy flag was previously set and
3309 * the UUID is no longer present in the table (ENOENT), treat it
3310 * as if the flag has been cleared.
3311 */
3312 if ((err == 0 || err == ENOENT) && ogencnt != so->so_policy_gencnt) {
3313 /* update cellular policy for this socket */
3314 if (err == 0 && (pflags & PROC_UUID_NO_CELLULAR)) {
3315 inp_update_cellular_policy(inp, TRUE);
3316 } else if (!(pflags & PROC_UUID_NO_CELLULAR)) {
3317 inp_update_cellular_policy(inp, FALSE);
3318 }
3319 #if NECP
3320 /* update necp want app policy for this socket */
3321 if (err == 0 && (pflags & PROC_UUID_NECP_APP_POLICY)) {
3322 inp_update_necp_want_app_policy(inp, TRUE);
3323 } else if (!(pflags & PROC_UUID_NECP_APP_POLICY)) {
3324 inp_update_necp_want_app_policy(inp, FALSE);
3325 }
3326 #endif /* NECP */
3327 }
3328
3329 return ((err == ENOENT) ? 0 : err);
3330 #else /* !CONFIG_PROC_UUID_POLICY */
3331 #pragma unused(inp)
3332 return (0);
3333 #endif /* !CONFIG_PROC_UUID_POLICY */
3334 }
3335
3336 static unsigned int log_restricted;
3337 SYSCTL_DECL(_net_inet);
3338 SYSCTL_INT(_net_inet, OID_AUTO, log_restricted,
3339 CTLFLAG_RW | CTLFLAG_LOCKED, &log_restricted, 0,
3340 "Log network restrictions");
3341 /*
3342 * Called when we need to enforce policy restrictions in the input path.
3343 *
3344 * Returns TRUE if we're not allowed to receive data, otherwise FALSE.
3345 */
3346 static boolean_t
3347 _inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp)
3348 {
3349 VERIFY(inp != NULL);
3350
3351 /*
3352 * Inbound restrictions.
3353 */
3354 if (!sorestrictrecv)
3355 return (FALSE);
3356
3357 if (ifp == NULL)
3358 return (FALSE);
3359
3360 if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp))
3361 return (TRUE);
3362
3363 if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp))
3364 return (TRUE);
3365
3366 if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp))
3367 return (TRUE);
3368
3369 if (!(ifp->if_eflags & IFEF_RESTRICTED_RECV))
3370 return (FALSE);
3371
3372 if (inp->inp_flags & INP_RECV_ANYIF)
3373 return (FALSE);
3374
3375 if ((inp->inp_flags & INP_BOUND_IF) && inp->inp_boundifp == ifp)
3376 return (FALSE);
3377
3378 if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp))
3379 return (TRUE);
3380
3381 return (TRUE);
3382 }
3383
3384 boolean_t
3385 inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp)
3386 {
3387 boolean_t ret;
3388
3389 ret = _inp_restricted_recv(inp, ifp);
3390 if (ret == TRUE && log_restricted) {
3391 printf("pid %d (%s) is unable to receive packets on %s\n",
3392 current_proc()->p_pid, proc_best_name(current_proc()),
3393 ifp->if_xname);
3394 }
3395 return (ret);
3396 }
3397
3398 /*
3399 * Called when we need to enforce policy restrictions in the output path.
3400 *
3401 * Returns TRUE if we're not allowed to send data out, otherwise FALSE.
3402 */
3403 static boolean_t
3404 _inp_restricted_send(struct inpcb *inp, struct ifnet *ifp)
3405 {
3406 VERIFY(inp != NULL);
3407
3408 /*
3409 * Outbound restrictions.
3410 */
3411 if (!sorestrictsend)
3412 return (FALSE);
3413
3414 if (ifp == NULL)
3415 return (FALSE);
3416
3417 if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp))
3418 return (TRUE);
3419
3420 if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp))
3421 return (TRUE);
3422
3423 if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp))
3424 return (TRUE);
3425
3426 if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp))
3427 return (TRUE);
3428
3429 return (FALSE);
3430 }
3431
3432 boolean_t
3433 inp_restricted_send(struct inpcb *inp, struct ifnet *ifp)
3434 {
3435 boolean_t ret;
3436
3437 ret = _inp_restricted_send(inp, ifp);
3438 if (ret == TRUE && log_restricted) {
3439 printf("pid %d (%s) is unable to transmit packets on %s\n",
3440 current_proc()->p_pid, proc_best_name(current_proc()),
3441 ifp->if_xname);
3442 }
3443 return (ret);
3444 }
3445
3446 inline void
3447 inp_count_sndbytes(struct inpcb *inp, u_int32_t th_ack)
3448 {
3449 struct ifnet *ifp = inp->inp_last_outifp;
3450 struct socket *so = inp->inp_socket;
3451 if (ifp != NULL && !(so->so_flags & SOF_MP_SUBFLOW) &&
3452 (ifp->if_type == IFT_CELLULAR ||
3453 ifp->if_subfamily == IFNET_SUBFAMILY_WIFI)) {
3454 int32_t unsent;
3455
3456 so->so_snd.sb_flags |= SB_SNDBYTE_CNT;
3457
3458 /*
3459 * There can be data outstanding before the connection
3460 * becomes established -- TFO case
3461 */
3462 if (so->so_snd.sb_cc > 0)
3463 inp_incr_sndbytes_total(so, so->so_snd.sb_cc);
3464
3465 unsent = inp_get_sndbytes_allunsent(so, th_ack);
3466 if (unsent > 0)
3467 inp_incr_sndbytes_unsent(so, unsent);
3468 }
3469 }
3470
3471 inline void
3472 inp_incr_sndbytes_total(struct socket *so, int32_t len)
3473 {
3474 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3475 struct ifnet *ifp = inp->inp_last_outifp;
3476
3477 if (ifp != NULL) {
3478 VERIFY(ifp->if_sndbyte_total >= 0);
3479 OSAddAtomic64(len, &ifp->if_sndbyte_total);
3480 }
3481 }
3482
3483 inline void
3484 inp_decr_sndbytes_total(struct socket *so, int32_t len)
3485 {
3486 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3487 struct ifnet *ifp = inp->inp_last_outifp;
3488
3489 if (ifp != NULL) {
3490 VERIFY(ifp->if_sndbyte_total >= len);
3491 OSAddAtomic64(-len, &ifp->if_sndbyte_total);
3492 }
3493 }
3494
3495 inline void
3496 inp_incr_sndbytes_unsent(struct socket *so, int32_t len)
3497 {
3498 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3499 struct ifnet *ifp = inp->inp_last_outifp;
3500
3501 if (ifp != NULL) {
3502 VERIFY(ifp->if_sndbyte_unsent >= 0);
3503 OSAddAtomic64(len, &ifp->if_sndbyte_unsent);
3504 }
3505 }
3506
3507 inline void
3508 inp_decr_sndbytes_unsent(struct socket *so, int32_t len)
3509 {
3510 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3511 struct ifnet *ifp = inp->inp_last_outifp;
3512
3513 if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT))
3514 return;
3515
3516 if (ifp != NULL) {
3517 if (ifp->if_sndbyte_unsent >= len)
3518 OSAddAtomic64(-len, &ifp->if_sndbyte_unsent);
3519 else
3520 ifp->if_sndbyte_unsent = 0;
3521 }
3522 }
3523
3524 inline void
3525 inp_decr_sndbytes_allunsent(struct socket *so, u_int32_t th_ack)
3526 {
3527 int32_t len;
3528
3529 if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT))
3530 return;
3531
3532 len = inp_get_sndbytes_allunsent(so, th_ack);
3533 inp_decr_sndbytes_unsent(so, len);
3534 }
3535
3536
3537 inline void
3538 inp_set_activity_bitmap(struct inpcb *inp)
3539 {
3540 in_stat_set_activity_bitmap(&inp->inp_nw_activity, net_uptime());
3541 }
3542
3543 inline void
3544 inp_get_activity_bitmap(struct inpcb *inp, activity_bitmap_t *ab)
3545 {
3546 bcopy(&inp->inp_nw_activity, ab, sizeof (*ab));
3547 }