]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/in_pcb.c
xnu-3247.10.11.tar.gz
[apple/xnu.git] / bsd / netinet / in_pcb.c
1 /*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1991, 1993, 1995
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95
61 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $
62 */
63
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/malloc.h>
67 #include <sys/mbuf.h>
68 #include <sys/domain.h>
69 #include <sys/protosw.h>
70 #include <sys/socket.h>
71 #include <sys/socketvar.h>
72 #include <sys/proc.h>
73 #include <sys/kernel.h>
74 #include <sys/sysctl.h>
75 #include <sys/mcache.h>
76 #include <sys/kauth.h>
77 #include <sys/priv.h>
78 #include <sys/proc_uuid_policy.h>
79 #include <sys/syslog.h>
80 #include <sys/priv.h>
81
82 #include <libkern/OSAtomic.h>
83 #include <kern/locks.h>
84
85 #include <machine/limits.h>
86
87 #include <kern/zalloc.h>
88
89 #include <net/if.h>
90 #include <net/if_types.h>
91 #include <net/route.h>
92 #include <net/flowhash.h>
93 #include <net/flowadv.h>
94 #include <net/ntstat.h>
95
96 #include <netinet/in.h>
97 #include <netinet/in_pcb.h>
98 #include <netinet/in_var.h>
99 #include <netinet/ip_var.h>
100 #if INET6
101 #include <netinet/ip6.h>
102 #include <netinet6/ip6_var.h>
103 #endif /* INET6 */
104
105 #include <sys/kdebug.h>
106 #include <sys/random.h>
107
108 #include <dev/random/randomdev.h>
109 #include <mach/boolean.h>
110
111 #if NECP
112 #include <net/necp.h>
113 #endif
114
115 static lck_grp_t *inpcb_lock_grp;
116 static lck_attr_t *inpcb_lock_attr;
117 static lck_grp_attr_t *inpcb_lock_grp_attr;
118 decl_lck_mtx_data(static, inpcb_lock); /* global INPCB lock */
119 decl_lck_mtx_data(static, inpcb_timeout_lock);
120
121 static TAILQ_HEAD(, inpcbinfo) inpcb_head = TAILQ_HEAD_INITIALIZER(inpcb_head);
122
123 static u_int16_t inpcb_timeout_run = 0; /* INPCB timer is scheduled to run */
124 static boolean_t inpcb_garbage_collecting = FALSE; /* gc timer is scheduled */
125 static boolean_t inpcb_ticking = FALSE; /* "slow" timer is scheduled */
126 static boolean_t inpcb_fast_timer_on = FALSE;
127
128 /*
129 * If the total number of gc reqs is above a threshold, schedule
130 * garbage collect timer sooner
131 */
132 static boolean_t inpcb_toomany_gcreq = FALSE;
133
134 #define INPCB_GCREQ_THRESHOLD 50000
135 #define INPCB_TOOMANY_GCREQ_TIMER (hz/10) /* 10 times a second */
136
137 static void inpcb_sched_timeout(struct timeval *);
138 static void inpcb_timeout(void *);
139 int inpcb_timeout_lazy = 10; /* 10 seconds leeway for lazy timers */
140 extern int tvtohz(struct timeval *);
141
142 #if CONFIG_PROC_UUID_POLICY
143 static void inp_update_cellular_policy(struct inpcb *, boolean_t);
144 #if NECP
145 static void inp_update_necp_want_app_policy(struct inpcb *, boolean_t);
146 #endif /* NECP */
147 #endif /* !CONFIG_PROC_UUID_POLICY */
148
149 #define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8))
150 #define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1))
151
152 /*
153 * These configure the range of local port addresses assigned to
154 * "unspecified" outgoing connections/packets/whatever.
155 */
156 int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */
157 int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */
158 int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
159 int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */
160 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
161 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */
162
163 #define RANGECHK(var, min, max) \
164 if ((var) < (min)) { (var) = (min); } \
165 else if ((var) > (max)) { (var) = (max); }
166
167 static int
168 sysctl_net_ipport_check SYSCTL_HANDLER_ARGS
169 {
170 #pragma unused(arg1, arg2)
171 int error;
172
173 error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
174 if (!error) {
175 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
176 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
177 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX);
178 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX);
179 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX);
180 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX);
181 }
182 return (error);
183 }
184
185 #undef RANGECHK
186
187 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange,
188 CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IP Ports");
189
190 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst,
191 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
192 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
193 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast,
194 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
195 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
196 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first,
197 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
198 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
199 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last,
200 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
201 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
202 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst,
203 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
204 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
205 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast,
206 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
207 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
208
209 extern int udp_use_randomport;
210 extern int tcp_use_randomport;
211
212 /* Structs used for flowhash computation */
213 struct inp_flowhash_key_addr {
214 union {
215 struct in_addr v4;
216 struct in6_addr v6;
217 u_int8_t addr8[16];
218 u_int16_t addr16[8];
219 u_int32_t addr32[4];
220 } infha;
221 };
222
223 struct inp_flowhash_key {
224 struct inp_flowhash_key_addr infh_laddr;
225 struct inp_flowhash_key_addr infh_faddr;
226 u_int32_t infh_lport;
227 u_int32_t infh_fport;
228 u_int32_t infh_af;
229 u_int32_t infh_proto;
230 u_int32_t infh_rand1;
231 u_int32_t infh_rand2;
232 };
233
234 static u_int32_t inp_hash_seed = 0;
235
236 static int infc_cmp(const struct inpcb *, const struct inpcb *);
237
238 /* Flags used by inp_fc_getinp */
239 #define INPFC_SOLOCKED 0x1
240 #define INPFC_REMOVE 0x2
241 static struct inpcb *inp_fc_getinp(u_int32_t, u_int32_t);
242
243 static void inp_fc_feedback(struct inpcb *);
244 extern void tcp_remove_from_time_wait(struct inpcb *inp);
245
246 decl_lck_mtx_data(static, inp_fc_lck);
247
248 RB_HEAD(inp_fc_tree, inpcb) inp_fc_tree;
249 RB_PROTOTYPE(inp_fc_tree, inpcb, infc_link, infc_cmp);
250 RB_GENERATE(inp_fc_tree, inpcb, infc_link, infc_cmp);
251
252 /*
253 * Use this inp as a key to find an inp in the flowhash tree.
254 * Accesses to it are protected by inp_fc_lck.
255 */
256 struct inpcb key_inp;
257
258 /*
259 * in_pcb.c: manage the Protocol Control Blocks.
260 */
261
262 void
263 in_pcbinit(void)
264 {
265 static int inpcb_initialized = 0;
266
267 VERIFY(!inpcb_initialized);
268 inpcb_initialized = 1;
269
270 inpcb_lock_grp_attr = lck_grp_attr_alloc_init();
271 inpcb_lock_grp = lck_grp_alloc_init("inpcb", inpcb_lock_grp_attr);
272 inpcb_lock_attr = lck_attr_alloc_init();
273 lck_mtx_init(&inpcb_lock, inpcb_lock_grp, inpcb_lock_attr);
274 lck_mtx_init(&inpcb_timeout_lock, inpcb_lock_grp, inpcb_lock_attr);
275
276 /*
277 * Initialize data structures required to deliver
278 * flow advisories.
279 */
280 lck_mtx_init(&inp_fc_lck, inpcb_lock_grp, inpcb_lock_attr);
281 lck_mtx_lock(&inp_fc_lck);
282 RB_INIT(&inp_fc_tree);
283 bzero(&key_inp, sizeof(key_inp));
284 lck_mtx_unlock(&inp_fc_lck);
285 }
286
287 #define INPCB_HAVE_TIMER_REQ(req) (((req).intimer_lazy > 0) || \
288 ((req).intimer_fast > 0) || ((req).intimer_nodelay > 0))
289 static void
290 inpcb_timeout(void *arg)
291 {
292 #pragma unused(arg)
293 struct inpcbinfo *ipi;
294 boolean_t t, gc;
295 struct intimercount gccnt, tmcnt;
296 struct timeval leeway;
297 boolean_t toomany_gc = FALSE;
298
299 if (arg != NULL) {
300 VERIFY(arg == &inpcb_toomany_gcreq);
301 toomany_gc = *(boolean_t *)arg;
302 }
303
304 /*
305 * Update coarse-grained networking timestamp (in sec.); the idea
306 * is to piggy-back on the timeout callout to update the counter
307 * returnable via net_uptime().
308 */
309 net_update_uptime();
310
311 bzero(&gccnt, sizeof(gccnt));
312 bzero(&tmcnt, sizeof(tmcnt));
313
314 lck_mtx_lock_spin(&inpcb_timeout_lock);
315 gc = inpcb_garbage_collecting;
316 inpcb_garbage_collecting = FALSE;
317
318 t = inpcb_ticking;
319 inpcb_ticking = FALSE;
320
321 if (gc || t) {
322 lck_mtx_unlock(&inpcb_timeout_lock);
323
324 lck_mtx_lock(&inpcb_lock);
325 TAILQ_FOREACH(ipi, &inpcb_head, ipi_entry) {
326 if (INPCB_HAVE_TIMER_REQ(ipi->ipi_gc_req)) {
327 bzero(&ipi->ipi_gc_req,
328 sizeof(ipi->ipi_gc_req));
329 if (gc && ipi->ipi_gc != NULL) {
330 ipi->ipi_gc(ipi);
331 gccnt.intimer_lazy +=
332 ipi->ipi_gc_req.intimer_lazy;
333 gccnt.intimer_fast +=
334 ipi->ipi_gc_req.intimer_fast;
335 gccnt.intimer_nodelay +=
336 ipi->ipi_gc_req.intimer_nodelay;
337 }
338 }
339 if (INPCB_HAVE_TIMER_REQ(ipi->ipi_timer_req)) {
340 bzero(&ipi->ipi_timer_req,
341 sizeof(ipi->ipi_timer_req));
342 if (t && ipi->ipi_timer != NULL) {
343 ipi->ipi_timer(ipi);
344 tmcnt.intimer_lazy +=
345 ipi->ipi_timer_req.intimer_lazy;
346 tmcnt.intimer_lazy +=
347 ipi->ipi_timer_req.intimer_fast;
348 tmcnt.intimer_nodelay +=
349 ipi->ipi_timer_req.intimer_nodelay;
350 }
351 }
352 }
353 lck_mtx_unlock(&inpcb_lock);
354 lck_mtx_lock_spin(&inpcb_timeout_lock);
355 }
356
357 /* lock was dropped above, so check first before overriding */
358 if (!inpcb_garbage_collecting)
359 inpcb_garbage_collecting = INPCB_HAVE_TIMER_REQ(gccnt);
360 if (!inpcb_ticking)
361 inpcb_ticking = INPCB_HAVE_TIMER_REQ(tmcnt);
362
363 /* re-arm the timer if there's work to do */
364 if (toomany_gc) {
365 inpcb_toomany_gcreq = FALSE;
366 } else {
367 inpcb_timeout_run--;
368 VERIFY(inpcb_timeout_run >= 0 && inpcb_timeout_run < 2);
369 }
370
371 bzero(&leeway, sizeof(leeway));
372 leeway.tv_sec = inpcb_timeout_lazy;
373 if (gccnt.intimer_nodelay > 0 || tmcnt.intimer_nodelay > 0)
374 inpcb_sched_timeout(NULL);
375 else if ((gccnt.intimer_fast + tmcnt.intimer_fast) <= 5)
376 /* be lazy when idle with little activity */
377 inpcb_sched_timeout(&leeway);
378 else
379 inpcb_sched_timeout(NULL);
380
381 lck_mtx_unlock(&inpcb_timeout_lock);
382 }
383
384 static void
385 inpcb_sched_timeout(struct timeval *leeway)
386 {
387 lck_mtx_assert(&inpcb_timeout_lock, LCK_MTX_ASSERT_OWNED);
388
389 if (inpcb_timeout_run == 0 &&
390 (inpcb_garbage_collecting || inpcb_ticking)) {
391 lck_mtx_convert_spin(&inpcb_timeout_lock);
392 inpcb_timeout_run++;
393 if (leeway == NULL) {
394 inpcb_fast_timer_on = TRUE;
395 timeout(inpcb_timeout, NULL, hz);
396 } else {
397 inpcb_fast_timer_on = FALSE;
398 timeout_with_leeway(inpcb_timeout, NULL, hz,
399 tvtohz(leeway));
400 }
401 } else if (inpcb_timeout_run == 1 &&
402 leeway == NULL && !inpcb_fast_timer_on) {
403 /*
404 * Since the request was for a fast timer but the
405 * scheduled timer is a lazy timer, try to schedule
406 * another instance of fast timer also
407 */
408 lck_mtx_convert_spin(&inpcb_timeout_lock);
409 inpcb_timeout_run++;
410 inpcb_fast_timer_on = TRUE;
411 timeout(inpcb_timeout, NULL, hz);
412 }
413 }
414
415 void
416 inpcb_gc_sched(struct inpcbinfo *ipi, u_int32_t type)
417 {
418 struct timeval leeway;
419 u_int32_t gccnt;
420 lck_mtx_lock_spin(&inpcb_timeout_lock);
421 inpcb_garbage_collecting = TRUE;
422
423 gccnt = ipi->ipi_gc_req.intimer_nodelay +
424 ipi->ipi_gc_req.intimer_fast;
425
426 if (gccnt > INPCB_GCREQ_THRESHOLD && !inpcb_toomany_gcreq) {
427 inpcb_toomany_gcreq = TRUE;
428
429 /*
430 * There are toomany pcbs waiting to be garbage collected,
431 * schedule a much faster timeout in addition to
432 * the caller's request
433 */
434 lck_mtx_convert_spin(&inpcb_timeout_lock);
435 timeout(inpcb_timeout, (void *)&inpcb_toomany_gcreq,
436 INPCB_TOOMANY_GCREQ_TIMER);
437 }
438
439 switch (type) {
440 case INPCB_TIMER_NODELAY:
441 atomic_add_32(&ipi->ipi_gc_req.intimer_nodelay, 1);
442 inpcb_sched_timeout(NULL);
443 break;
444 case INPCB_TIMER_FAST:
445 atomic_add_32(&ipi->ipi_gc_req.intimer_fast, 1);
446 inpcb_sched_timeout(NULL);
447 break;
448 default:
449 atomic_add_32(&ipi->ipi_gc_req.intimer_lazy, 1);
450 leeway.tv_sec = inpcb_timeout_lazy;
451 leeway.tv_usec = 0;
452 inpcb_sched_timeout(&leeway);
453 break;
454 }
455 lck_mtx_unlock(&inpcb_timeout_lock);
456 }
457
458 void
459 inpcb_timer_sched(struct inpcbinfo *ipi, u_int32_t type)
460 {
461 struct timeval leeway;
462 lck_mtx_lock_spin(&inpcb_timeout_lock);
463 inpcb_ticking = TRUE;
464 switch (type) {
465 case INPCB_TIMER_NODELAY:
466 atomic_add_32(&ipi->ipi_timer_req.intimer_nodelay, 1);
467 inpcb_sched_timeout(NULL);
468 break;
469 case INPCB_TIMER_FAST:
470 atomic_add_32(&ipi->ipi_timer_req.intimer_fast, 1);
471 inpcb_sched_timeout(NULL);
472 break;
473 default:
474 atomic_add_32(&ipi->ipi_timer_req.intimer_lazy, 1);
475 leeway.tv_sec = inpcb_timeout_lazy;
476 leeway.tv_usec = 0;
477 inpcb_sched_timeout(&leeway);
478 break;
479 }
480 lck_mtx_unlock(&inpcb_timeout_lock);
481 }
482
483 void
484 in_pcbinfo_attach(struct inpcbinfo *ipi)
485 {
486 struct inpcbinfo *ipi0;
487
488 lck_mtx_lock(&inpcb_lock);
489 TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) {
490 if (ipi0 == ipi) {
491 panic("%s: ipi %p already in the list\n",
492 __func__, ipi);
493 /* NOTREACHED */
494 }
495 }
496 TAILQ_INSERT_TAIL(&inpcb_head, ipi, ipi_entry);
497 lck_mtx_unlock(&inpcb_lock);
498 }
499
500 int
501 in_pcbinfo_detach(struct inpcbinfo *ipi)
502 {
503 struct inpcbinfo *ipi0;
504 int error = 0;
505
506 lck_mtx_lock(&inpcb_lock);
507 TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) {
508 if (ipi0 == ipi)
509 break;
510 }
511 if (ipi0 != NULL)
512 TAILQ_REMOVE(&inpcb_head, ipi0, ipi_entry);
513 else
514 error = ENXIO;
515 lck_mtx_unlock(&inpcb_lock);
516
517 return (error);
518 }
519
520 /*
521 * Allocate a PCB and associate it with the socket.
522 *
523 * Returns: 0 Success
524 * ENOBUFS
525 * ENOMEM
526 */
527 int
528 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p)
529 {
530 #pragma unused(p)
531 struct inpcb *inp;
532 caddr_t temp;
533 #if CONFIG_MACF_NET
534 int mac_error;
535 #endif /* CONFIG_MACF_NET */
536
537 if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) {
538 inp = (struct inpcb *)zalloc(pcbinfo->ipi_zone);
539 if (inp == NULL)
540 return (ENOBUFS);
541 bzero((caddr_t)inp, sizeof (*inp));
542 } else {
543 inp = (struct inpcb *)(void *)so->so_saved_pcb;
544 temp = inp->inp_saved_ppcb;
545 bzero((caddr_t)inp, sizeof (*inp));
546 inp->inp_saved_ppcb = temp;
547 }
548
549 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
550 inp->inp_pcbinfo = pcbinfo;
551 inp->inp_socket = so;
552 #if CONFIG_MACF_NET
553 mac_error = mac_inpcb_label_init(inp, M_WAITOK);
554 if (mac_error != 0) {
555 if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0)
556 zfree(pcbinfo->ipi_zone, inp);
557 return (mac_error);
558 }
559 mac_inpcb_label_associate(so, inp);
560 #endif /* CONFIG_MACF_NET */
561 /* make sure inp_stat is always 64-bit aligned */
562 inp->inp_stat = (struct inp_stat *)P2ROUNDUP(inp->inp_stat_store,
563 sizeof (u_int64_t));
564 if (((uintptr_t)inp->inp_stat - (uintptr_t)inp->inp_stat_store) +
565 sizeof (*inp->inp_stat) > sizeof (inp->inp_stat_store)) {
566 panic("%s: insufficient space to align inp_stat", __func__);
567 /* NOTREACHED */
568 }
569
570 /* make sure inp_cstat is always 64-bit aligned */
571 inp->inp_cstat = (struct inp_stat *)P2ROUNDUP(inp->inp_cstat_store,
572 sizeof (u_int64_t));
573 if (((uintptr_t)inp->inp_cstat - (uintptr_t)inp->inp_cstat_store) +
574 sizeof (*inp->inp_cstat) > sizeof (inp->inp_cstat_store)) {
575 panic("%s: insufficient space to align inp_cstat", __func__);
576 /* NOTREACHED */
577 }
578
579 /* make sure inp_wstat is always 64-bit aligned */
580 inp->inp_wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_wstat_store,
581 sizeof (u_int64_t));
582 if (((uintptr_t)inp->inp_wstat - (uintptr_t)inp->inp_wstat_store) +
583 sizeof (*inp->inp_wstat) > sizeof (inp->inp_wstat_store)) {
584 panic("%s: insufficient space to align inp_wstat", __func__);
585 /* NOTREACHED */
586 }
587
588 /* make sure inp_Wstat is always 64-bit aligned */
589 inp->inp_Wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_Wstat_store,
590 sizeof (u_int64_t));
591 if (((uintptr_t)inp->inp_Wstat - (uintptr_t)inp->inp_Wstat_store) +
592 sizeof (*inp->inp_Wstat) > sizeof (inp->inp_Wstat_store)) {
593 panic("%s: insufficient space to align inp_Wstat", __func__);
594 /* NOTREACHED */
595 }
596
597 so->so_pcb = (caddr_t)inp;
598
599 if (so->so_proto->pr_flags & PR_PCBLOCK) {
600 lck_mtx_init(&inp->inpcb_mtx, pcbinfo->ipi_lock_grp,
601 pcbinfo->ipi_lock_attr);
602 }
603
604 #if INET6
605 if (SOCK_DOM(so) == PF_INET6 && !ip6_mapped_addr_on)
606 inp->inp_flags |= IN6P_IPV6_V6ONLY;
607
608 if (ip6_auto_flowlabel)
609 inp->inp_flags |= IN6P_AUTOFLOWLABEL;
610 #endif /* INET6 */
611
612 (void) inp_update_policy(inp);
613
614 lck_rw_lock_exclusive(pcbinfo->ipi_lock);
615 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
616 LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
617 pcbinfo->ipi_count++;
618 lck_rw_done(pcbinfo->ipi_lock);
619 return (0);
620 }
621
622 /*
623 * in_pcblookup_local_and_cleanup does everything
624 * in_pcblookup_local does but it checks for a socket
625 * that's going away. Since we know that the lock is
626 * held read+write when this funciton is called, we
627 * can safely dispose of this socket like the slow
628 * timer would usually do and return NULL. This is
629 * great for bind.
630 */
631 struct inpcb *
632 in_pcblookup_local_and_cleanup(struct inpcbinfo *pcbinfo, struct in_addr laddr,
633 u_int lport_arg, int wild_okay)
634 {
635 struct inpcb *inp;
636
637 /* Perform normal lookup */
638 inp = in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay);
639
640 /* Check if we found a match but it's waiting to be disposed */
641 if (inp != NULL && inp->inp_wantcnt == WNT_STOPUSING) {
642 struct socket *so = inp->inp_socket;
643
644 lck_mtx_lock(&inp->inpcb_mtx);
645
646 if (so->so_usecount == 0) {
647 if (inp->inp_state != INPCB_STATE_DEAD)
648 in_pcbdetach(inp);
649 in_pcbdispose(inp); /* will unlock & destroy */
650 inp = NULL;
651 } else {
652 lck_mtx_unlock(&inp->inpcb_mtx);
653 }
654 }
655
656 return (inp);
657 }
658
659 static void
660 in_pcb_conflict_post_msg(u_int16_t port)
661 {
662 /*
663 * Radar 5523020 send a kernel event notification if a
664 * non-participating socket tries to bind the port a socket
665 * who has set SOF_NOTIFYCONFLICT owns.
666 */
667 struct kev_msg ev_msg;
668 struct kev_in_portinuse in_portinuse;
669
670 bzero(&in_portinuse, sizeof (struct kev_in_portinuse));
671 bzero(&ev_msg, sizeof (struct kev_msg));
672 in_portinuse.port = ntohs(port); /* port in host order */
673 in_portinuse.req_pid = proc_selfpid();
674 ev_msg.vendor_code = KEV_VENDOR_APPLE;
675 ev_msg.kev_class = KEV_NETWORK_CLASS;
676 ev_msg.kev_subclass = KEV_INET_SUBCLASS;
677 ev_msg.event_code = KEV_INET_PORTINUSE;
678 ev_msg.dv[0].data_ptr = &in_portinuse;
679 ev_msg.dv[0].data_length = sizeof (struct kev_in_portinuse);
680 ev_msg.dv[1].data_length = 0;
681 kev_post_msg(&ev_msg);
682 }
683
684 /*
685 * Bind an INPCB to an address and/or port. This routine should not alter
686 * the caller-supplied local address "nam".
687 *
688 * Returns: 0 Success
689 * EADDRNOTAVAIL Address not available.
690 * EINVAL Invalid argument
691 * EAFNOSUPPORT Address family not supported [notdef]
692 * EACCES Permission denied
693 * EADDRINUSE Address in use
694 * EAGAIN Resource unavailable, try again
695 * priv_check_cred:EPERM Operation not permitted
696 */
697 int
698 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
699 {
700 struct socket *so = inp->inp_socket;
701 unsigned short *lastport;
702 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
703 u_short lport = 0, rand_port = 0;
704 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
705 int error, randomport, conflict = 0;
706 boolean_t anonport = FALSE;
707 kauth_cred_t cred;
708 struct in_addr laddr;
709 struct ifnet *outif = NULL;
710
711 if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */
712 return (EADDRNOTAVAIL);
713 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY)
714 return (EINVAL);
715 if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT)))
716 wild = 1;
717 socket_unlock(so, 0); /* keep reference on socket */
718 lck_rw_lock_exclusive(pcbinfo->ipi_lock);
719
720 bzero(&laddr, sizeof(laddr));
721
722 if (nam != NULL) {
723
724 if (nam->sa_len != sizeof (struct sockaddr_in)) {
725 lck_rw_done(pcbinfo->ipi_lock);
726 socket_lock(so, 0);
727 return (EINVAL);
728 }
729 #if 0
730 /*
731 * We should check the family, but old programs
732 * incorrectly fail to initialize it.
733 */
734 if (nam->sa_family != AF_INET) {
735 lck_rw_done(pcbinfo->ipi_lock);
736 socket_lock(so, 0);
737 return (EAFNOSUPPORT);
738 }
739 #endif /* 0 */
740 lport = SIN(nam)->sin_port;
741
742 if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr))) {
743 /*
744 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
745 * allow complete duplication of binding if
746 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
747 * and a multicast address is bound on both
748 * new and duplicated sockets.
749 */
750 if (so->so_options & SO_REUSEADDR)
751 reuseport = SO_REUSEADDR|SO_REUSEPORT;
752 } else if (SIN(nam)->sin_addr.s_addr != INADDR_ANY) {
753 struct sockaddr_in sin;
754 struct ifaddr *ifa;
755
756 /* Sanitized for interface address searches */
757 bzero(&sin, sizeof (sin));
758 sin.sin_family = AF_INET;
759 sin.sin_len = sizeof (struct sockaddr_in);
760 sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
761
762 ifa = ifa_ifwithaddr(SA(&sin));
763 if (ifa == NULL) {
764 lck_rw_done(pcbinfo->ipi_lock);
765 socket_lock(so, 0);
766 return (EADDRNOTAVAIL);
767 } else {
768 /*
769 * Opportunistically determine the outbound
770 * interface that may be used; this may not
771 * hold true if we end up using a route
772 * going over a different interface, e.g.
773 * when sending to a local address. This
774 * will get updated again after sending.
775 */
776 IFA_LOCK(ifa);
777 outif = ifa->ifa_ifp;
778 IFA_UNLOCK(ifa);
779 IFA_REMREF(ifa);
780 }
781 }
782 if (lport != 0) {
783 struct inpcb *t;
784 uid_t u;
785
786 if (ntohs(lport) < IPPORT_RESERVED) {
787 cred = kauth_cred_proc_ref(p);
788 error = priv_check_cred(cred,
789 PRIV_NETINET_RESERVEDPORT, 0);
790 kauth_cred_unref(&cred);
791 if (error != 0) {
792 lck_rw_done(pcbinfo->ipi_lock);
793 socket_lock(so, 0);
794 return (EACCES);
795 }
796 }
797 if (!IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) &&
798 (u = kauth_cred_getuid(so->so_cred)) != 0 &&
799 (t = in_pcblookup_local_and_cleanup(
800 inp->inp_pcbinfo, SIN(nam)->sin_addr, lport,
801 INPLOOKUP_WILDCARD)) != NULL &&
802 (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
803 t->inp_laddr.s_addr != INADDR_ANY ||
804 !(t->inp_socket->so_options & SO_REUSEPORT)) &&
805 (u != kauth_cred_getuid(t->inp_socket->so_cred)) &&
806 !(t->inp_socket->so_flags & SOF_REUSESHAREUID) &&
807 (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
808 t->inp_laddr.s_addr != INADDR_ANY)) {
809 if ((t->inp_socket->so_flags &
810 SOF_NOTIFYCONFLICT) &&
811 !(so->so_flags & SOF_NOTIFYCONFLICT))
812 conflict = 1;
813
814 lck_rw_done(pcbinfo->ipi_lock);
815
816 if (conflict)
817 in_pcb_conflict_post_msg(lport);
818
819 socket_lock(so, 0);
820 return (EADDRINUSE);
821 }
822 t = in_pcblookup_local_and_cleanup(pcbinfo,
823 SIN(nam)->sin_addr, lport, wild);
824 if (t != NULL &&
825 (reuseport & t->inp_socket->so_options) == 0) {
826 #if INET6
827 if (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
828 t->inp_laddr.s_addr != INADDR_ANY ||
829 SOCK_DOM(so) != PF_INET6 ||
830 SOCK_DOM(t->inp_socket) != PF_INET6)
831 #endif /* INET6 */
832 {
833
834 if ((t->inp_socket->so_flags &
835 SOF_NOTIFYCONFLICT) &&
836 !(so->so_flags & SOF_NOTIFYCONFLICT))
837 conflict = 1;
838
839 lck_rw_done(pcbinfo->ipi_lock);
840
841 if (conflict)
842 in_pcb_conflict_post_msg(lport);
843 socket_lock(so, 0);
844 return (EADDRINUSE);
845 }
846 }
847 }
848 laddr = SIN(nam)->sin_addr;
849 }
850 if (lport == 0) {
851 u_short first, last;
852 int count;
853
854 randomport = (so->so_flags & SOF_BINDRANDOMPORT) ||
855 (so->so_type == SOCK_STREAM ? tcp_use_randomport :
856 udp_use_randomport);
857
858 /*
859 * Even though this looks similar to the code in
860 * in6_pcbsetport, the v6 vs v4 checks are different.
861 */
862 anonport = TRUE;
863 if (inp->inp_flags & INP_HIGHPORT) {
864 first = ipport_hifirstauto; /* sysctl */
865 last = ipport_hilastauto;
866 lastport = &pcbinfo->ipi_lasthi;
867 } else if (inp->inp_flags & INP_LOWPORT) {
868 cred = kauth_cred_proc_ref(p);
869 error = priv_check_cred(cred,
870 PRIV_NETINET_RESERVEDPORT, 0);
871 kauth_cred_unref(&cred);
872 if (error != 0) {
873 lck_rw_done(pcbinfo->ipi_lock);
874 socket_lock(so, 0);
875 return (error);
876 }
877 first = ipport_lowfirstauto; /* 1023 */
878 last = ipport_lowlastauto; /* 600 */
879 lastport = &pcbinfo->ipi_lastlow;
880 } else {
881 first = ipport_firstauto; /* sysctl */
882 last = ipport_lastauto;
883 lastport = &pcbinfo->ipi_lastport;
884 }
885 /* No point in randomizing if only one port is available */
886
887 if (first == last)
888 randomport = 0;
889 /*
890 * Simple check to ensure all ports are not used up causing
891 * a deadlock here.
892 *
893 * We split the two cases (up and down) so that the direction
894 * is not being tested on each round of the loop.
895 */
896 if (first > last) {
897 /*
898 * counting down
899 */
900 if (randomport) {
901 read_random(&rand_port, sizeof (rand_port));
902 *lastport =
903 first - (rand_port % (first - last));
904 }
905 count = first - last;
906
907 do {
908 if (count-- < 0) { /* completely used? */
909 lck_rw_done(pcbinfo->ipi_lock);
910 socket_lock(so, 0);
911 return (EADDRNOTAVAIL);
912 }
913 --*lastport;
914 if (*lastport > first || *lastport < last)
915 *lastport = first;
916 lport = htons(*lastport);
917 } while (in_pcblookup_local_and_cleanup(pcbinfo,
918 ((laddr.s_addr != INADDR_ANY) ? laddr :
919 inp->inp_laddr), lport, wild));
920 } else {
921 /*
922 * counting up
923 */
924 if (randomport) {
925 read_random(&rand_port, sizeof (rand_port));
926 *lastport =
927 first + (rand_port % (first - last));
928 }
929 count = last - first;
930
931 do {
932 if (count-- < 0) { /* completely used? */
933 lck_rw_done(pcbinfo->ipi_lock);
934 socket_lock(so, 0);
935 return (EADDRNOTAVAIL);
936 }
937 ++*lastport;
938 if (*lastport < first || *lastport > last)
939 *lastport = first;
940 lport = htons(*lastport);
941 } while (in_pcblookup_local_and_cleanup(pcbinfo,
942 ((laddr.s_addr != INADDR_ANY) ? laddr :
943 inp->inp_laddr), lport, wild));
944 }
945 }
946 socket_lock(so, 0);
947 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) {
948 lck_rw_done(pcbinfo->ipi_lock);
949 return (EINVAL);
950 }
951
952 if (laddr.s_addr != INADDR_ANY) {
953 inp->inp_laddr = laddr;
954 inp->inp_last_outifp = outif;
955 }
956 inp->inp_lport = lport;
957 if (anonport)
958 inp->inp_flags |= INP_ANONPORT;
959
960 if (in_pcbinshash(inp, 1) != 0) {
961 inp->inp_laddr.s_addr = INADDR_ANY;
962 inp->inp_last_outifp = NULL;
963
964 inp->inp_lport = 0;
965 if (anonport)
966 inp->inp_flags &= ~INP_ANONPORT;
967 lck_rw_done(pcbinfo->ipi_lock);
968 return (EAGAIN);
969 }
970 lck_rw_done(pcbinfo->ipi_lock);
971 sflt_notify(so, sock_evt_bound, NULL);
972 return (0);
973 }
974
975 /*
976 * Transform old in_pcbconnect() into an inner subroutine for new
977 * in_pcbconnect(); do some validity-checking on the remote address
978 * (in "nam") and then determine local host address (i.e., which
979 * interface) to use to access that remote host.
980 *
981 * This routine may alter the caller-supplied remote address "nam".
982 *
983 * The caller may override the bound-to-interface setting of the socket
984 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
985 *
986 * This routine might return an ifp with a reference held if the caller
987 * provides a non-NULL outif, even in the error case. The caller is
988 * responsible for releasing its reference.
989 *
990 * Returns: 0 Success
991 * EINVAL Invalid argument
992 * EAFNOSUPPORT Address family not supported
993 * EADDRNOTAVAIL Address not available
994 */
995 int
996 in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr,
997 unsigned int ifscope, struct ifnet **outif)
998 {
999 struct route *ro = &inp->inp_route;
1000 struct in_ifaddr *ia = NULL;
1001 struct sockaddr_in sin;
1002 int error = 0;
1003 boolean_t restricted = FALSE;
1004
1005 if (outif != NULL)
1006 *outif = NULL;
1007 if (nam->sa_len != sizeof (struct sockaddr_in))
1008 return (EINVAL);
1009 if (SIN(nam)->sin_family != AF_INET)
1010 return (EAFNOSUPPORT);
1011 if (SIN(nam)->sin_port == 0)
1012 return (EADDRNOTAVAIL);
1013
1014 /*
1015 * If the destination address is INADDR_ANY,
1016 * use the primary local address.
1017 * If the supplied address is INADDR_BROADCAST,
1018 * and the primary interface supports broadcast,
1019 * choose the broadcast address for that interface.
1020 */
1021 if (SIN(nam)->sin_addr.s_addr == INADDR_ANY ||
1022 SIN(nam)->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST) {
1023 lck_rw_lock_shared(in_ifaddr_rwlock);
1024 if (!TAILQ_EMPTY(&in_ifaddrhead)) {
1025 ia = TAILQ_FIRST(&in_ifaddrhead);
1026 IFA_LOCK_SPIN(&ia->ia_ifa);
1027 if (SIN(nam)->sin_addr.s_addr == INADDR_ANY) {
1028 SIN(nam)->sin_addr = IA_SIN(ia)->sin_addr;
1029 } else if (ia->ia_ifp->if_flags & IFF_BROADCAST) {
1030 SIN(nam)->sin_addr =
1031 SIN(&ia->ia_broadaddr)->sin_addr;
1032 }
1033 IFA_UNLOCK(&ia->ia_ifa);
1034 ia = NULL;
1035 }
1036 lck_rw_done(in_ifaddr_rwlock);
1037 }
1038 /*
1039 * Otherwise, if the socket has already bound the source, just use it.
1040 */
1041 if (inp->inp_laddr.s_addr != INADDR_ANY) {
1042 VERIFY(ia == NULL);
1043 *laddr = inp->inp_laddr;
1044 return (0);
1045 }
1046
1047 /*
1048 * If the ifscope is specified by the caller (e.g. IP_PKTINFO)
1049 * then it overrides the sticky ifscope set for the socket.
1050 */
1051 if (ifscope == IFSCOPE_NONE && (inp->inp_flags & INP_BOUND_IF))
1052 ifscope = inp->inp_boundifp->if_index;
1053
1054 /*
1055 * If route is known or can be allocated now,
1056 * our src addr is taken from the i/f, else punt.
1057 * Note that we should check the address family of the cached
1058 * destination, in case of sharing the cache with IPv6.
1059 */
1060 if (ro->ro_rt != NULL)
1061 RT_LOCK_SPIN(ro->ro_rt);
1062 if (ROUTE_UNUSABLE(ro) || ro->ro_dst.sa_family != AF_INET ||
1063 SIN(&ro->ro_dst)->sin_addr.s_addr != SIN(nam)->sin_addr.s_addr ||
1064 (inp->inp_socket->so_options & SO_DONTROUTE)) {
1065 if (ro->ro_rt != NULL)
1066 RT_UNLOCK(ro->ro_rt);
1067 ROUTE_RELEASE(ro);
1068 }
1069 if (!(inp->inp_socket->so_options & SO_DONTROUTE) &&
1070 (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL)) {
1071 if (ro->ro_rt != NULL)
1072 RT_UNLOCK(ro->ro_rt);
1073 ROUTE_RELEASE(ro);
1074 /* No route yet, so try to acquire one */
1075 bzero(&ro->ro_dst, sizeof (struct sockaddr_in));
1076 ro->ro_dst.sa_family = AF_INET;
1077 ro->ro_dst.sa_len = sizeof (struct sockaddr_in);
1078 SIN(&ro->ro_dst)->sin_addr = SIN(nam)->sin_addr;
1079 rtalloc_scoped(ro, ifscope);
1080 if (ro->ro_rt != NULL)
1081 RT_LOCK_SPIN(ro->ro_rt);
1082 }
1083 /* Sanitized local copy for interface address searches */
1084 bzero(&sin, sizeof (sin));
1085 sin.sin_family = AF_INET;
1086 sin.sin_len = sizeof (struct sockaddr_in);
1087 sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
1088 /*
1089 * If we did not find (or use) a route, assume dest is reachable
1090 * on a directly connected network and try to find a corresponding
1091 * interface to take the source address from.
1092 */
1093 if (ro->ro_rt == NULL) {
1094 VERIFY(ia == NULL);
1095 ia = ifatoia(ifa_ifwithdstaddr(SA(&sin)));
1096 if (ia == NULL)
1097 ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope));
1098 error = ((ia == NULL) ? ENETUNREACH : 0);
1099 goto done;
1100 }
1101 RT_LOCK_ASSERT_HELD(ro->ro_rt);
1102 /*
1103 * If the outgoing interface on the route found is not
1104 * a loopback interface, use the address from that interface.
1105 */
1106 if (!(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) {
1107 VERIFY(ia == NULL);
1108 /*
1109 * If the route points to a cellular interface and the
1110 * caller forbids our using interfaces of such type,
1111 * pretend that there is no route.
1112 * Apply the same logic for expensive interfaces.
1113 */
1114 if (inp_restricted_send(inp, ro->ro_rt->rt_ifp)) {
1115 RT_UNLOCK(ro->ro_rt);
1116 ROUTE_RELEASE(ro);
1117 error = EHOSTUNREACH;
1118 restricted = TRUE;
1119 } else {
1120 /* Become a regular mutex */
1121 RT_CONVERT_LOCK(ro->ro_rt);
1122 ia = ifatoia(ro->ro_rt->rt_ifa);
1123 IFA_ADDREF(&ia->ia_ifa);
1124 RT_UNLOCK(ro->ro_rt);
1125 error = 0;
1126 }
1127 goto done;
1128 }
1129 VERIFY(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK);
1130 RT_UNLOCK(ro->ro_rt);
1131 /*
1132 * The outgoing interface is marked with 'loopback net', so a route
1133 * to ourselves is here.
1134 * Try to find the interface of the destination address and then
1135 * take the address from there. That interface is not necessarily
1136 * a loopback interface.
1137 */
1138 VERIFY(ia == NULL);
1139 ia = ifatoia(ifa_ifwithdstaddr(SA(&sin)));
1140 if (ia == NULL)
1141 ia = ifatoia(ifa_ifwithaddr_scoped(SA(&sin), ifscope));
1142 if (ia == NULL)
1143 ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope));
1144 if (ia == NULL) {
1145 RT_LOCK(ro->ro_rt);
1146 ia = ifatoia(ro->ro_rt->rt_ifa);
1147 if (ia != NULL)
1148 IFA_ADDREF(&ia->ia_ifa);
1149 RT_UNLOCK(ro->ro_rt);
1150 }
1151 error = ((ia == NULL) ? ENETUNREACH : 0);
1152
1153 done:
1154 /*
1155 * If the destination address is multicast and an outgoing
1156 * interface has been set as a multicast option, use the
1157 * address of that interface as our source address.
1158 */
1159 if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) &&
1160 inp->inp_moptions != NULL) {
1161 struct ip_moptions *imo;
1162 struct ifnet *ifp;
1163
1164 imo = inp->inp_moptions;
1165 IMO_LOCK(imo);
1166 if (imo->imo_multicast_ifp != NULL && (ia == NULL ||
1167 ia->ia_ifp != imo->imo_multicast_ifp)) {
1168 ifp = imo->imo_multicast_ifp;
1169 if (ia != NULL)
1170 IFA_REMREF(&ia->ia_ifa);
1171 lck_rw_lock_shared(in_ifaddr_rwlock);
1172 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
1173 if (ia->ia_ifp == ifp)
1174 break;
1175 }
1176 if (ia != NULL)
1177 IFA_ADDREF(&ia->ia_ifa);
1178 lck_rw_done(in_ifaddr_rwlock);
1179 if (ia == NULL)
1180 error = EADDRNOTAVAIL;
1181 else
1182 error = 0;
1183 }
1184 IMO_UNLOCK(imo);
1185 }
1186 /*
1187 * Don't do pcblookup call here; return interface in laddr
1188 * and exit to caller, that will do the lookup.
1189 */
1190 if (ia != NULL) {
1191 /*
1192 * If the source address belongs to a cellular interface
1193 * and the socket forbids our using interfaces of such
1194 * type, pretend that there is no source address.
1195 * Apply the same logic for expensive interfaces.
1196 */
1197 IFA_LOCK_SPIN(&ia->ia_ifa);
1198 if (inp_restricted_send(inp, ia->ia_ifa.ifa_ifp)) {
1199 IFA_UNLOCK(&ia->ia_ifa);
1200 error = EHOSTUNREACH;
1201 restricted = TRUE;
1202 } else if (error == 0) {
1203 *laddr = ia->ia_addr.sin_addr;
1204 if (outif != NULL) {
1205 struct ifnet *ifp;
1206
1207 if (ro->ro_rt != NULL)
1208 ifp = ro->ro_rt->rt_ifp;
1209 else
1210 ifp = ia->ia_ifp;
1211
1212 VERIFY(ifp != NULL);
1213 IFA_CONVERT_LOCK(&ia->ia_ifa);
1214 ifnet_reference(ifp); /* for caller */
1215 if (*outif != NULL)
1216 ifnet_release(*outif);
1217 *outif = ifp;
1218 }
1219 IFA_UNLOCK(&ia->ia_ifa);
1220 } else {
1221 IFA_UNLOCK(&ia->ia_ifa);
1222 }
1223 IFA_REMREF(&ia->ia_ifa);
1224 ia = NULL;
1225 }
1226
1227 if (restricted && error == EHOSTUNREACH) {
1228 soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED |
1229 SO_FILT_HINT_IFDENIED));
1230 }
1231
1232 return (error);
1233 }
1234
1235 /*
1236 * Outer subroutine:
1237 * Connect from a socket to a specified address.
1238 * Both address and port must be specified in argument sin.
1239 * If don't have a local address for this socket yet,
1240 * then pick one.
1241 *
1242 * The caller may override the bound-to-interface setting of the socket
1243 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
1244 */
1245 int
1246 in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p,
1247 unsigned int ifscope, struct ifnet **outif)
1248 {
1249 struct in_addr laddr;
1250 struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam;
1251 struct inpcb *pcb;
1252 int error;
1253 struct socket *so = inp->inp_socket;
1254
1255 /*
1256 * Call inner routine, to assign local interface address.
1257 */
1258 if ((error = in_pcbladdr(inp, nam, &laddr, ifscope, outif)) != 0)
1259 return (error);
1260
1261 socket_unlock(so, 0);
1262 pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
1263 inp->inp_laddr.s_addr ? inp->inp_laddr : laddr,
1264 inp->inp_lport, 0, NULL);
1265 socket_lock(so, 0);
1266
1267 /*
1268 * Check if the socket is still in a valid state. When we unlock this
1269 * embryonic socket, it can get aborted if another thread is closing
1270 * the listener (radar 7947600).
1271 */
1272 if ((so->so_flags & SOF_ABORTED) != 0)
1273 return (ECONNREFUSED);
1274
1275 if (pcb != NULL) {
1276 in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0);
1277 return (EADDRINUSE);
1278 }
1279 if (inp->inp_laddr.s_addr == INADDR_ANY) {
1280 if (inp->inp_lport == 0) {
1281 error = in_pcbbind(inp, NULL, p);
1282 if (error)
1283 return (error);
1284 }
1285 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
1286 /*
1287 * Lock inversion issue, mostly with udp
1288 * multicast packets.
1289 */
1290 socket_unlock(so, 0);
1291 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
1292 socket_lock(so, 0);
1293 }
1294 inp->inp_laddr = laddr;
1295 /* no reference needed */
1296 inp->inp_last_outifp = (outif != NULL) ? *outif : NULL;
1297 inp->inp_flags |= INP_INADDR_ANY;
1298 } else {
1299 /*
1300 * Usage of IP_PKTINFO, without local port already
1301 * speficified will cause kernel to panic,
1302 * see rdar://problem/18508185.
1303 * For now returning error to avoid a kernel panic
1304 * This routines can be refactored and handle this better
1305 * in future.
1306 */
1307 if (inp->inp_lport == 0)
1308 return (EINVAL);
1309 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
1310 /*
1311 * Lock inversion issue, mostly with udp
1312 * multicast packets.
1313 */
1314 socket_unlock(so, 0);
1315 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
1316 socket_lock(so, 0);
1317 }
1318 }
1319 inp->inp_faddr = sin->sin_addr;
1320 inp->inp_fport = sin->sin_port;
1321 if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP)
1322 nstat_pcb_invalidate_cache(inp);
1323 in_pcbrehash(inp);
1324 lck_rw_done(inp->inp_pcbinfo->ipi_lock);
1325 return (0);
1326 }
1327
1328 void
1329 in_pcbdisconnect(struct inpcb *inp)
1330 {
1331 struct socket *so = inp->inp_socket;
1332
1333 if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP)
1334 nstat_pcb_cache(inp);
1335
1336 inp->inp_faddr.s_addr = INADDR_ANY;
1337 inp->inp_fport = 0;
1338
1339 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
1340 /* lock inversion issue, mostly with udp multicast packets */
1341 socket_unlock(so, 0);
1342 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
1343 socket_lock(so, 0);
1344 }
1345
1346 in_pcbrehash(inp);
1347 lck_rw_done(inp->inp_pcbinfo->ipi_lock);
1348 /*
1349 * A multipath subflow socket would have its SS_NOFDREF set by default,
1350 * so check for SOF_MP_SUBFLOW socket flag before detaching the PCB;
1351 * when the socket is closed for real, SOF_MP_SUBFLOW would be cleared.
1352 */
1353 if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF))
1354 in_pcbdetach(inp);
1355 }
1356
1357 void
1358 in_pcbdetach(struct inpcb *inp)
1359 {
1360 struct socket *so = inp->inp_socket;
1361
1362 if (so->so_pcb == NULL) {
1363 /* PCB has been disposed */
1364 panic("%s: inp=%p so=%p proto=%d so_pcb is null!\n", __func__,
1365 inp, so, SOCK_PROTO(so));
1366 /* NOTREACHED */
1367 }
1368
1369 #if IPSEC
1370 if (inp->inp_sp != NULL) {
1371 (void) ipsec4_delete_pcbpolicy(inp);
1372 }
1373 #endif /* IPSEC */
1374
1375 /*
1376 * Let NetworkStatistics know this PCB is going away
1377 * before we detach it.
1378 */
1379 if (nstat_collect &&
1380 (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP))
1381 nstat_pcb_detach(inp);
1382
1383 /* Free memory buffer held for generating keep alives */
1384 if (inp->inp_keepalive_data != NULL) {
1385 FREE(inp->inp_keepalive_data, M_TEMP);
1386 inp->inp_keepalive_data = NULL;
1387 }
1388
1389 /* mark socket state as dead */
1390 if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) {
1391 panic("%s: so=%p proto=%d couldn't set to STOPUSING\n",
1392 __func__, so, SOCK_PROTO(so));
1393 /* NOTREACHED */
1394 }
1395
1396 if (!(so->so_flags & SOF_PCBCLEARING)) {
1397 struct ip_moptions *imo;
1398
1399 inp->inp_vflag = 0;
1400 if (inp->inp_options != NULL) {
1401 (void) m_free(inp->inp_options);
1402 inp->inp_options = NULL;
1403 }
1404 ROUTE_RELEASE(&inp->inp_route);
1405 imo = inp->inp_moptions;
1406 inp->inp_moptions = NULL;
1407 if (imo != NULL)
1408 IMO_REMREF(imo);
1409 sofreelastref(so, 0);
1410 inp->inp_state = INPCB_STATE_DEAD;
1411 /* makes sure we're not called twice from so_close */
1412 so->so_flags |= SOF_PCBCLEARING;
1413
1414 inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST);
1415 }
1416 }
1417
1418
1419 void
1420 in_pcbdispose(struct inpcb *inp)
1421 {
1422 struct socket *so = inp->inp_socket;
1423 struct inpcbinfo *ipi = inp->inp_pcbinfo;
1424
1425 if (so != NULL && so->so_usecount != 0) {
1426 panic("%s: so %p [%d,%d] usecount %d lockhistory %s\n",
1427 __func__, so, SOCK_DOM(so), SOCK_TYPE(so), so->so_usecount,
1428 solockhistory_nr(so));
1429 /* NOTREACHED */
1430 } else if (inp->inp_wantcnt != WNT_STOPUSING) {
1431 if (so != NULL) {
1432 panic_plain("%s: inp %p invalid wantcnt %d, so %p "
1433 "[%d,%d] usecount %d retaincnt %d state 0x%x "
1434 "flags 0x%x lockhistory %s\n", __func__, inp,
1435 inp->inp_wantcnt, so, SOCK_DOM(so), SOCK_TYPE(so),
1436 so->so_usecount, so->so_retaincnt, so->so_state,
1437 so->so_flags, solockhistory_nr(so));
1438 /* NOTREACHED */
1439 } else {
1440 panic("%s: inp %p invalid wantcnt %d no socket\n",
1441 __func__, inp, inp->inp_wantcnt);
1442 /* NOTREACHED */
1443 }
1444 }
1445
1446 lck_rw_assert(ipi->ipi_lock, LCK_RW_ASSERT_EXCLUSIVE);
1447
1448 inp->inp_gencnt = ++ipi->ipi_gencnt;
1449 /* access ipi in in_pcbremlists */
1450 in_pcbremlists(inp);
1451
1452 if (so != NULL) {
1453 if (so->so_proto->pr_flags & PR_PCBLOCK) {
1454 sofreelastref(so, 0);
1455 if (so->so_rcv.sb_cc > 0 || so->so_snd.sb_cc > 0) {
1456 /*
1457 * selthreadclear() already called
1458 * during sofreelastref() above.
1459 */
1460 sbrelease(&so->so_rcv);
1461 sbrelease(&so->so_snd);
1462 }
1463 if (so->so_head != NULL) {
1464 panic("%s: so=%p head still exist\n",
1465 __func__, so);
1466 /* NOTREACHED */
1467 }
1468 lck_mtx_unlock(&inp->inpcb_mtx);
1469 lck_mtx_destroy(&inp->inpcb_mtx, ipi->ipi_lock_grp);
1470 }
1471 /* makes sure we're not called twice from so_close */
1472 so->so_flags |= SOF_PCBCLEARING;
1473 so->so_saved_pcb = (caddr_t)inp;
1474 so->so_pcb = NULL;
1475 inp->inp_socket = NULL;
1476 #if CONFIG_MACF_NET
1477 mac_inpcb_label_destroy(inp);
1478 #endif /* CONFIG_MACF_NET */
1479 /*
1480 * In case there a route cached after a detach (possible
1481 * in the tcp case), make sure that it is freed before
1482 * we deallocate the structure.
1483 */
1484 ROUTE_RELEASE(&inp->inp_route);
1485 if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) {
1486 zfree(ipi->ipi_zone, inp);
1487 }
1488 sodealloc(so);
1489 }
1490 }
1491
1492 /*
1493 * The calling convention of in_getsockaddr() and in_getpeeraddr() was
1494 * modified to match the pru_sockaddr() and pru_peeraddr() entry points
1495 * in struct pr_usrreqs, so that protocols can just reference then directly
1496 * without the need for a wrapper function.
1497 */
1498 int
1499 in_getsockaddr(struct socket *so, struct sockaddr **nam)
1500 {
1501 struct inpcb *inp;
1502 struct sockaddr_in *sin;
1503
1504 /*
1505 * Do the malloc first in case it blocks.
1506 */
1507 MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK);
1508 if (sin == NULL)
1509 return (ENOBUFS);
1510 bzero(sin, sizeof (*sin));
1511 sin->sin_family = AF_INET;
1512 sin->sin_len = sizeof (*sin);
1513
1514 if ((inp = sotoinpcb(so)) == NULL) {
1515 FREE(sin, M_SONAME);
1516 return (EINVAL);
1517 }
1518 sin->sin_port = inp->inp_lport;
1519 sin->sin_addr = inp->inp_laddr;
1520
1521 *nam = (struct sockaddr *)sin;
1522 return (0);
1523 }
1524
1525 int
1526 in_getsockaddr_s(struct socket *so, struct sockaddr_storage *ss)
1527 {
1528 struct sockaddr_in *sin = SIN(ss);
1529 struct inpcb *inp;
1530
1531 VERIFY(ss != NULL);
1532 bzero(ss, sizeof (*ss));
1533
1534 sin->sin_family = AF_INET;
1535 sin->sin_len = sizeof (*sin);
1536
1537 if ((inp = sotoinpcb(so)) == NULL
1538 #if NECP
1539 || (necp_socket_should_use_flow_divert(inp))
1540 #endif /* NECP */
1541 )
1542 return (inp == NULL ? EINVAL : EPROTOTYPE);
1543
1544 sin->sin_port = inp->inp_lport;
1545 sin->sin_addr = inp->inp_laddr;
1546 return (0);
1547 }
1548
1549 int
1550 in_getpeeraddr(struct socket *so, struct sockaddr **nam)
1551 {
1552 struct inpcb *inp;
1553 struct sockaddr_in *sin;
1554
1555 /*
1556 * Do the malloc first in case it blocks.
1557 */
1558 MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK);
1559 if (sin == NULL)
1560 return (ENOBUFS);
1561 bzero((caddr_t)sin, sizeof (*sin));
1562 sin->sin_family = AF_INET;
1563 sin->sin_len = sizeof (*sin);
1564
1565 if ((inp = sotoinpcb(so)) == NULL) {
1566 FREE(sin, M_SONAME);
1567 return (EINVAL);
1568 }
1569 sin->sin_port = inp->inp_fport;
1570 sin->sin_addr = inp->inp_faddr;
1571
1572 *nam = (struct sockaddr *)sin;
1573 return (0);
1574 }
1575
1576 int
1577 in_getpeeraddr_s(struct socket *so, struct sockaddr_storage *ss)
1578 {
1579 struct sockaddr_in *sin = SIN(ss);
1580 struct inpcb *inp;
1581
1582 VERIFY(ss != NULL);
1583 bzero(ss, sizeof (*ss));
1584
1585 sin->sin_family = AF_INET;
1586 sin->sin_len = sizeof (*sin);
1587
1588 if ((inp = sotoinpcb(so)) == NULL
1589 #if NECP
1590 || (necp_socket_should_use_flow_divert(inp))
1591 #endif /* NECP */
1592 ) {
1593 return (inp == NULL ? EINVAL : EPROTOTYPE);
1594 }
1595
1596 sin->sin_port = inp->inp_fport;
1597 sin->sin_addr = inp->inp_faddr;
1598 return (0);
1599 }
1600
1601 void
1602 in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr,
1603 int errno, void (*notify)(struct inpcb *, int))
1604 {
1605 struct inpcb *inp;
1606
1607 lck_rw_lock_shared(pcbinfo->ipi_lock);
1608
1609 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1610 #if INET6
1611 if (!(inp->inp_vflag & INP_IPV4))
1612 continue;
1613 #endif /* INET6 */
1614 if (inp->inp_faddr.s_addr != faddr.s_addr ||
1615 inp->inp_socket == NULL)
1616 continue;
1617 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING)
1618 continue;
1619 socket_lock(inp->inp_socket, 1);
1620 (*notify)(inp, errno);
1621 (void) in_pcb_checkstate(inp, WNT_RELEASE, 1);
1622 socket_unlock(inp->inp_socket, 1);
1623 }
1624 lck_rw_done(pcbinfo->ipi_lock);
1625 }
1626
1627 /*
1628 * Check for alternatives when higher level complains
1629 * about service problems. For now, invalidate cached
1630 * routing information. If the route was created dynamically
1631 * (by a redirect), time to try a default gateway again.
1632 */
1633 void
1634 in_losing(struct inpcb *inp)
1635 {
1636 boolean_t release = FALSE;
1637 struct rtentry *rt;
1638
1639 if ((rt = inp->inp_route.ro_rt) != NULL) {
1640 struct in_ifaddr *ia = NULL;
1641
1642 RT_LOCK(rt);
1643 if (rt->rt_flags & RTF_DYNAMIC) {
1644 /*
1645 * Prevent another thread from modifying rt_key,
1646 * rt_gateway via rt_setgate() after rt_lock is
1647 * dropped by marking the route as defunct.
1648 */
1649 rt->rt_flags |= RTF_CONDEMNED;
1650 RT_UNLOCK(rt);
1651 (void) rtrequest(RTM_DELETE, rt_key(rt),
1652 rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL);
1653 } else {
1654 RT_UNLOCK(rt);
1655 }
1656 /* if the address is gone keep the old route in the pcb */
1657 if (inp->inp_laddr.s_addr != INADDR_ANY &&
1658 (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
1659 /*
1660 * Address is around; ditch the route. A new route
1661 * can be allocated the next time output is attempted.
1662 */
1663 release = TRUE;
1664 }
1665 if (ia != NULL)
1666 IFA_REMREF(&ia->ia_ifa);
1667 }
1668 if (rt == NULL || release)
1669 ROUTE_RELEASE(&inp->inp_route);
1670 }
1671
1672 /*
1673 * After a routing change, flush old routing
1674 * and allocate a (hopefully) better one.
1675 */
1676 void
1677 in_rtchange(struct inpcb *inp, int errno)
1678 {
1679 #pragma unused(errno)
1680 boolean_t release = FALSE;
1681 struct rtentry *rt;
1682
1683 if ((rt = inp->inp_route.ro_rt) != NULL) {
1684 struct in_ifaddr *ia = NULL;
1685
1686 /* if address is gone, keep the old route */
1687 if (inp->inp_laddr.s_addr != INADDR_ANY &&
1688 (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
1689 /*
1690 * Address is around; ditch the route. A new route
1691 * can be allocated the next time output is attempted.
1692 */
1693 release = TRUE;
1694 }
1695 if (ia != NULL)
1696 IFA_REMREF(&ia->ia_ifa);
1697 }
1698 if (rt == NULL || release)
1699 ROUTE_RELEASE(&inp->inp_route);
1700 }
1701
1702 /*
1703 * Lookup a PCB based on the local address and port.
1704 */
1705 struct inpcb *
1706 in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
1707 unsigned int lport_arg, int wild_okay)
1708 {
1709 struct inpcb *inp;
1710 int matchwild = 3, wildcard;
1711 u_short lport = lport_arg;
1712
1713 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0, 0, 0, 0, 0);
1714
1715 if (!wild_okay) {
1716 struct inpcbhead *head;
1717 /*
1718 * Look for an unconnected (wildcard foreign addr) PCB that
1719 * matches the local address and port we're looking for.
1720 */
1721 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
1722 pcbinfo->ipi_hashmask)];
1723 LIST_FOREACH(inp, head, inp_hash) {
1724 #if INET6
1725 if (!(inp->inp_vflag & INP_IPV4))
1726 continue;
1727 #endif /* INET6 */
1728 if (inp->inp_faddr.s_addr == INADDR_ANY &&
1729 inp->inp_laddr.s_addr == laddr.s_addr &&
1730 inp->inp_lport == lport) {
1731 /*
1732 * Found.
1733 */
1734 return (inp);
1735 }
1736 }
1737 /*
1738 * Not found.
1739 */
1740 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0, 0, 0, 0, 0);
1741 return (NULL);
1742 } else {
1743 struct inpcbporthead *porthash;
1744 struct inpcbport *phd;
1745 struct inpcb *match = NULL;
1746 /*
1747 * Best fit PCB lookup.
1748 *
1749 * First see if this local port is in use by looking on the
1750 * port hash list.
1751 */
1752 porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
1753 pcbinfo->ipi_porthashmask)];
1754 LIST_FOREACH(phd, porthash, phd_hash) {
1755 if (phd->phd_port == lport)
1756 break;
1757 }
1758 if (phd != NULL) {
1759 /*
1760 * Port is in use by one or more PCBs. Look for best
1761 * fit.
1762 */
1763 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
1764 wildcard = 0;
1765 #if INET6
1766 if (!(inp->inp_vflag & INP_IPV4))
1767 continue;
1768 #endif /* INET6 */
1769 if (inp->inp_faddr.s_addr != INADDR_ANY)
1770 wildcard++;
1771 if (inp->inp_laddr.s_addr != INADDR_ANY) {
1772 if (laddr.s_addr == INADDR_ANY)
1773 wildcard++;
1774 else if (inp->inp_laddr.s_addr !=
1775 laddr.s_addr)
1776 continue;
1777 } else {
1778 if (laddr.s_addr != INADDR_ANY)
1779 wildcard++;
1780 }
1781 if (wildcard < matchwild) {
1782 match = inp;
1783 matchwild = wildcard;
1784 if (matchwild == 0) {
1785 break;
1786 }
1787 }
1788 }
1789 }
1790 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match,
1791 0, 0, 0, 0);
1792 return (match);
1793 }
1794 }
1795
1796 /*
1797 * Check if PCB exists in hash list.
1798 */
1799 int
1800 in_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in_addr faddr,
1801 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
1802 uid_t *uid, gid_t *gid, struct ifnet *ifp)
1803 {
1804 struct inpcbhead *head;
1805 struct inpcb *inp;
1806 u_short fport = fport_arg, lport = lport_arg;
1807 int found = 0;
1808 struct inpcb *local_wild = NULL;
1809 #if INET6
1810 struct inpcb *local_wild_mapped = NULL;
1811 #endif /* INET6 */
1812
1813 *uid = UID_MAX;
1814 *gid = GID_MAX;
1815
1816 /*
1817 * We may have found the pcb in the last lookup - check this first.
1818 */
1819
1820 lck_rw_lock_shared(pcbinfo->ipi_lock);
1821
1822 /*
1823 * First look for an exact match.
1824 */
1825 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
1826 pcbinfo->ipi_hashmask)];
1827 LIST_FOREACH(inp, head, inp_hash) {
1828 #if INET6
1829 if (!(inp->inp_vflag & INP_IPV4))
1830 continue;
1831 #endif /* INET6 */
1832 if (inp_restricted_recv(inp, ifp))
1833 continue;
1834
1835 if (inp->inp_faddr.s_addr == faddr.s_addr &&
1836 inp->inp_laddr.s_addr == laddr.s_addr &&
1837 inp->inp_fport == fport &&
1838 inp->inp_lport == lport) {
1839 if ((found = (inp->inp_socket != NULL))) {
1840 /*
1841 * Found.
1842 */
1843 *uid = kauth_cred_getuid(
1844 inp->inp_socket->so_cred);
1845 *gid = kauth_cred_getgid(
1846 inp->inp_socket->so_cred);
1847 }
1848 lck_rw_done(pcbinfo->ipi_lock);
1849 return (found);
1850 }
1851 }
1852
1853 if (!wildcard) {
1854 /*
1855 * Not found.
1856 */
1857 lck_rw_done(pcbinfo->ipi_lock);
1858 return (0);
1859 }
1860
1861 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
1862 pcbinfo->ipi_hashmask)];
1863 LIST_FOREACH(inp, head, inp_hash) {
1864 #if INET6
1865 if (!(inp->inp_vflag & INP_IPV4))
1866 continue;
1867 #endif /* INET6 */
1868 if (inp_restricted_recv(inp, ifp))
1869 continue;
1870
1871 if (inp->inp_faddr.s_addr == INADDR_ANY &&
1872 inp->inp_lport == lport) {
1873 if (inp->inp_laddr.s_addr == laddr.s_addr) {
1874 if ((found = (inp->inp_socket != NULL))) {
1875 *uid = kauth_cred_getuid(
1876 inp->inp_socket->so_cred);
1877 *gid = kauth_cred_getgid(
1878 inp->inp_socket->so_cred);
1879 }
1880 lck_rw_done(pcbinfo->ipi_lock);
1881 return (found);
1882 } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
1883 #if INET6
1884 if (inp->inp_socket &&
1885 SOCK_CHECK_DOM(inp->inp_socket, PF_INET6))
1886 local_wild_mapped = inp;
1887 else
1888 #endif /* INET6 */
1889 local_wild = inp;
1890 }
1891 }
1892 }
1893 if (local_wild == NULL) {
1894 #if INET6
1895 if (local_wild_mapped != NULL) {
1896 if ((found = (local_wild_mapped->inp_socket != NULL))) {
1897 *uid = kauth_cred_getuid(
1898 local_wild_mapped->inp_socket->so_cred);
1899 *gid = kauth_cred_getgid(
1900 local_wild_mapped->inp_socket->so_cred);
1901 }
1902 lck_rw_done(pcbinfo->ipi_lock);
1903 return (found);
1904 }
1905 #endif /* INET6 */
1906 lck_rw_done(pcbinfo->ipi_lock);
1907 return (0);
1908 }
1909 if ((found = (local_wild->inp_socket != NULL))) {
1910 *uid = kauth_cred_getuid(
1911 local_wild->inp_socket->so_cred);
1912 *gid = kauth_cred_getgid(
1913 local_wild->inp_socket->so_cred);
1914 }
1915 lck_rw_done(pcbinfo->ipi_lock);
1916 return (found);
1917 }
1918
1919 /*
1920 * Lookup PCB in hash list.
1921 */
1922 struct inpcb *
1923 in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
1924 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
1925 struct ifnet *ifp)
1926 {
1927 struct inpcbhead *head;
1928 struct inpcb *inp;
1929 u_short fport = fport_arg, lport = lport_arg;
1930 struct inpcb *local_wild = NULL;
1931 #if INET6
1932 struct inpcb *local_wild_mapped = NULL;
1933 #endif /* INET6 */
1934
1935 /*
1936 * We may have found the pcb in the last lookup - check this first.
1937 */
1938
1939 lck_rw_lock_shared(pcbinfo->ipi_lock);
1940
1941 /*
1942 * First look for an exact match.
1943 */
1944 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
1945 pcbinfo->ipi_hashmask)];
1946 LIST_FOREACH(inp, head, inp_hash) {
1947 #if INET6
1948 if (!(inp->inp_vflag & INP_IPV4))
1949 continue;
1950 #endif /* INET6 */
1951 if (inp_restricted_recv(inp, ifp))
1952 continue;
1953
1954 if (inp->inp_faddr.s_addr == faddr.s_addr &&
1955 inp->inp_laddr.s_addr == laddr.s_addr &&
1956 inp->inp_fport == fport &&
1957 inp->inp_lport == lport) {
1958 /*
1959 * Found.
1960 */
1961 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
1962 WNT_STOPUSING) {
1963 lck_rw_done(pcbinfo->ipi_lock);
1964 return (inp);
1965 } else {
1966 /* it's there but dead, say it isn't found */
1967 lck_rw_done(pcbinfo->ipi_lock);
1968 return (NULL);
1969 }
1970 }
1971 }
1972
1973 if (!wildcard) {
1974 /*
1975 * Not found.
1976 */
1977 lck_rw_done(pcbinfo->ipi_lock);
1978 return (NULL);
1979 }
1980
1981 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
1982 pcbinfo->ipi_hashmask)];
1983 LIST_FOREACH(inp, head, inp_hash) {
1984 #if INET6
1985 if (!(inp->inp_vflag & INP_IPV4))
1986 continue;
1987 #endif /* INET6 */
1988 if (inp_restricted_recv(inp, ifp))
1989 continue;
1990
1991 if (inp->inp_faddr.s_addr == INADDR_ANY &&
1992 inp->inp_lport == lport) {
1993 if (inp->inp_laddr.s_addr == laddr.s_addr) {
1994 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
1995 WNT_STOPUSING) {
1996 lck_rw_done(pcbinfo->ipi_lock);
1997 return (inp);
1998 } else {
1999 /* it's dead; say it isn't found */
2000 lck_rw_done(pcbinfo->ipi_lock);
2001 return (NULL);
2002 }
2003 } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
2004 #if INET6
2005 if (SOCK_CHECK_DOM(inp->inp_socket, PF_INET6))
2006 local_wild_mapped = inp;
2007 else
2008 #endif /* INET6 */
2009 local_wild = inp;
2010 }
2011 }
2012 }
2013 if (local_wild == NULL) {
2014 #if INET6
2015 if (local_wild_mapped != NULL) {
2016 if (in_pcb_checkstate(local_wild_mapped,
2017 WNT_ACQUIRE, 0) != WNT_STOPUSING) {
2018 lck_rw_done(pcbinfo->ipi_lock);
2019 return (local_wild_mapped);
2020 } else {
2021 /* it's dead; say it isn't found */
2022 lck_rw_done(pcbinfo->ipi_lock);
2023 return (NULL);
2024 }
2025 }
2026 #endif /* INET6 */
2027 lck_rw_done(pcbinfo->ipi_lock);
2028 return (NULL);
2029 }
2030 if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
2031 lck_rw_done(pcbinfo->ipi_lock);
2032 return (local_wild);
2033 }
2034 /*
2035 * It's either not found or is already dead.
2036 */
2037 lck_rw_done(pcbinfo->ipi_lock);
2038 return (NULL);
2039 }
2040
2041 /*
2042 * Insert PCB onto various hash lists.
2043 */
2044 int
2045 in_pcbinshash(struct inpcb *inp, int locked)
2046 {
2047 struct inpcbhead *pcbhash;
2048 struct inpcbporthead *pcbporthash;
2049 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
2050 struct inpcbport *phd;
2051 u_int32_t hashkey_faddr;
2052
2053 if (!locked) {
2054 if (!lck_rw_try_lock_exclusive(pcbinfo->ipi_lock)) {
2055 /*
2056 * Lock inversion issue, mostly with udp
2057 * multicast packets
2058 */
2059 socket_unlock(inp->inp_socket, 0);
2060 lck_rw_lock_exclusive(pcbinfo->ipi_lock);
2061 socket_lock(inp->inp_socket, 0);
2062 if (inp->inp_state == INPCB_STATE_DEAD) {
2063 /*
2064 * The socket got dropped when
2065 * it was unlocked
2066 */
2067 lck_rw_done(pcbinfo->ipi_lock);
2068 return (ECONNABORTED);
2069 }
2070 }
2071 }
2072
2073 #if INET6
2074 if (inp->inp_vflag & INP_IPV6)
2075 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
2076 else
2077 #endif /* INET6 */
2078 hashkey_faddr = inp->inp_faddr.s_addr;
2079
2080 inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
2081 inp->inp_fport, pcbinfo->ipi_hashmask);
2082
2083 pcbhash = &pcbinfo->ipi_hashbase[inp->inp_hash_element];
2084
2085 pcbporthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(inp->inp_lport,
2086 pcbinfo->ipi_porthashmask)];
2087
2088 /*
2089 * Go through port list and look for a head for this lport.
2090 */
2091 LIST_FOREACH(phd, pcbporthash, phd_hash) {
2092 if (phd->phd_port == inp->inp_lport)
2093 break;
2094 }
2095
2096 VERIFY(inp->inp_state != INPCB_STATE_DEAD);
2097
2098 /*
2099 * If none exists, malloc one and tack it on.
2100 */
2101 if (phd == NULL) {
2102 MALLOC(phd, struct inpcbport *, sizeof (struct inpcbport),
2103 M_PCB, M_WAITOK);
2104 if (phd == NULL) {
2105 if (!locked)
2106 lck_rw_done(pcbinfo->ipi_lock);
2107 return (ENOBUFS); /* XXX */
2108 }
2109 phd->phd_port = inp->inp_lport;
2110 LIST_INIT(&phd->phd_pcblist);
2111 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
2112 }
2113
2114 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2115 inp->inp_phd = phd;
2116 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
2117 LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
2118 inp->inp_flags2 |= INP2_INHASHLIST;
2119
2120 if (!locked)
2121 lck_rw_done(pcbinfo->ipi_lock);
2122
2123 #if NECP
2124 // This call catches the original setting of the local address
2125 inp_update_necp_policy(inp, NULL, NULL, 0);
2126 #endif /* NECP */
2127
2128 return (0);
2129 }
2130
2131 /*
2132 * Move PCB to the proper hash bucket when { faddr, fport } have been
2133 * changed. NOTE: This does not handle the case of the lport changing (the
2134 * hashed port list would have to be updated as well), so the lport must
2135 * not change after in_pcbinshash() has been called.
2136 */
2137 void
2138 in_pcbrehash(struct inpcb *inp)
2139 {
2140 struct inpcbhead *head;
2141 u_int32_t hashkey_faddr;
2142
2143 #if INET6
2144 if (inp->inp_vflag & INP_IPV6)
2145 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
2146 else
2147 #endif /* INET6 */
2148 hashkey_faddr = inp->inp_faddr.s_addr;
2149
2150 inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
2151 inp->inp_fport, inp->inp_pcbinfo->ipi_hashmask);
2152 head = &inp->inp_pcbinfo->ipi_hashbase[inp->inp_hash_element];
2153
2154 if (inp->inp_flags2 & INP2_INHASHLIST) {
2155 LIST_REMOVE(inp, inp_hash);
2156 inp->inp_flags2 &= ~INP2_INHASHLIST;
2157 }
2158
2159 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2160 LIST_INSERT_HEAD(head, inp, inp_hash);
2161 inp->inp_flags2 |= INP2_INHASHLIST;
2162
2163 #if NECP
2164 // This call catches updates to the remote addresses
2165 inp_update_necp_policy(inp, NULL, NULL, 0);
2166 #endif /* NECP */
2167 }
2168
2169 /*
2170 * Remove PCB from various lists.
2171 * Must be called pcbinfo lock is held in exclusive mode.
2172 */
2173 void
2174 in_pcbremlists(struct inpcb *inp)
2175 {
2176 inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
2177
2178 /*
2179 * Check if it's in hashlist -- an inp is placed in hashlist when
2180 * it's local port gets assigned. So it should also be present
2181 * in the port list.
2182 */
2183 if (inp->inp_flags2 & INP2_INHASHLIST) {
2184 struct inpcbport *phd = inp->inp_phd;
2185
2186 VERIFY(phd != NULL && inp->inp_lport > 0);
2187
2188 LIST_REMOVE(inp, inp_hash);
2189 inp->inp_hash.le_next = NULL;
2190 inp->inp_hash.le_prev = NULL;
2191
2192 LIST_REMOVE(inp, inp_portlist);
2193 inp->inp_portlist.le_next = NULL;
2194 inp->inp_portlist.le_prev = NULL;
2195 if (LIST_EMPTY(&phd->phd_pcblist)) {
2196 LIST_REMOVE(phd, phd_hash);
2197 FREE(phd, M_PCB);
2198 }
2199 inp->inp_phd = NULL;
2200 inp->inp_flags2 &= ~INP2_INHASHLIST;
2201 }
2202 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2203
2204 if (inp->inp_flags2 & INP2_TIMEWAIT) {
2205 /* Remove from time-wait queue */
2206 tcp_remove_from_time_wait(inp);
2207 inp->inp_flags2 &= ~INP2_TIMEWAIT;
2208 VERIFY(inp->inp_pcbinfo->ipi_twcount != 0);
2209 inp->inp_pcbinfo->ipi_twcount--;
2210 } else {
2211 /* Remove from global inp list if it is not time-wait */
2212 LIST_REMOVE(inp, inp_list);
2213 }
2214
2215 if (inp->inp_flags2 & INP2_IN_FCTREE) {
2216 inp_fc_getinp(inp->inp_flowhash, (INPFC_SOLOCKED|INPFC_REMOVE));
2217 VERIFY(!(inp->inp_flags2 & INP2_IN_FCTREE));
2218 }
2219
2220 inp->inp_pcbinfo->ipi_count--;
2221 }
2222
2223 /*
2224 * Mechanism used to defer the memory release of PCBs
2225 * The pcb list will contain the pcb until the reaper can clean it up if
2226 * the following conditions are met:
2227 * 1) state "DEAD",
2228 * 2) wantcnt is STOPUSING
2229 * 3) usecount is 0
2230 * This function will be called to either mark the pcb as
2231 */
2232 int
2233 in_pcb_checkstate(struct inpcb *pcb, int mode, int locked)
2234 {
2235 volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt;
2236 UInt32 origwant;
2237 UInt32 newwant;
2238
2239 switch (mode) {
2240 case WNT_STOPUSING:
2241 /*
2242 * Try to mark the pcb as ready for recycling. CAS with
2243 * STOPUSING, if success we're good, if it's in use, will
2244 * be marked later
2245 */
2246 if (locked == 0)
2247 socket_lock(pcb->inp_socket, 1);
2248 pcb->inp_state = INPCB_STATE_DEAD;
2249
2250 stopusing:
2251 if (pcb->inp_socket->so_usecount < 0) {
2252 panic("%s: pcb=%p so=%p usecount is negative\n",
2253 __func__, pcb, pcb->inp_socket);
2254 /* NOTREACHED */
2255 }
2256 if (locked == 0)
2257 socket_unlock(pcb->inp_socket, 1);
2258
2259 inpcb_gc_sched(pcb->inp_pcbinfo, INPCB_TIMER_FAST);
2260
2261 origwant = *wantcnt;
2262 if ((UInt16) origwant == 0xffff) /* should stop using */
2263 return (WNT_STOPUSING);
2264 newwant = 0xffff;
2265 if ((UInt16) origwant == 0) {
2266 /* try to mark it as unsuable now */
2267 OSCompareAndSwap(origwant, newwant, wantcnt);
2268 }
2269 return (WNT_STOPUSING);
2270 break;
2271
2272 case WNT_ACQUIRE:
2273 /*
2274 * Try to increase reference to pcb. If WNT_STOPUSING
2275 * should bail out. If socket state DEAD, try to set count
2276 * to STOPUSING, return failed otherwise increase cnt.
2277 */
2278 do {
2279 origwant = *wantcnt;
2280 if ((UInt16) origwant == 0xffff) {
2281 /* should stop using */
2282 return (WNT_STOPUSING);
2283 }
2284 newwant = origwant + 1;
2285 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
2286 return (WNT_ACQUIRE);
2287 break;
2288
2289 case WNT_RELEASE:
2290 /*
2291 * Release reference. If result is null and pcb state
2292 * is DEAD, set wanted bit to STOPUSING
2293 */
2294 if (locked == 0)
2295 socket_lock(pcb->inp_socket, 1);
2296
2297 do {
2298 origwant = *wantcnt;
2299 if ((UInt16) origwant == 0x0) {
2300 panic("%s: pcb=%p release with zero count",
2301 __func__, pcb);
2302 /* NOTREACHED */
2303 }
2304 if ((UInt16) origwant == 0xffff) {
2305 /* should stop using */
2306 if (locked == 0)
2307 socket_unlock(pcb->inp_socket, 1);
2308 return (WNT_STOPUSING);
2309 }
2310 newwant = origwant - 1;
2311 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
2312
2313 if (pcb->inp_state == INPCB_STATE_DEAD)
2314 goto stopusing;
2315 if (pcb->inp_socket->so_usecount < 0) {
2316 panic("%s: RELEASE pcb=%p so=%p usecount is negative\n",
2317 __func__, pcb, pcb->inp_socket);
2318 /* NOTREACHED */
2319 }
2320
2321 if (locked == 0)
2322 socket_unlock(pcb->inp_socket, 1);
2323 return (WNT_RELEASE);
2324 break;
2325
2326 default:
2327 panic("%s: so=%p not a valid state =%x\n", __func__,
2328 pcb->inp_socket, mode);
2329 /* NOTREACHED */
2330 }
2331
2332 /* NOTREACHED */
2333 return (mode);
2334 }
2335
2336 /*
2337 * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat.
2338 * The inpcb_compat data structure is passed to user space and must
2339 * not change. We intentionally avoid copying pointers.
2340 */
2341 void
2342 inpcb_to_compat(struct inpcb *inp, struct inpcb_compat *inp_compat)
2343 {
2344 bzero(inp_compat, sizeof (*inp_compat));
2345 inp_compat->inp_fport = inp->inp_fport;
2346 inp_compat->inp_lport = inp->inp_lport;
2347 inp_compat->nat_owner = 0;
2348 inp_compat->nat_cookie = 0;
2349 inp_compat->inp_gencnt = inp->inp_gencnt;
2350 inp_compat->inp_flags = inp->inp_flags;
2351 inp_compat->inp_flow = inp->inp_flow;
2352 inp_compat->inp_vflag = inp->inp_vflag;
2353 inp_compat->inp_ip_ttl = inp->inp_ip_ttl;
2354 inp_compat->inp_ip_p = inp->inp_ip_p;
2355 inp_compat->inp_dependfaddr.inp6_foreign =
2356 inp->inp_dependfaddr.inp6_foreign;
2357 inp_compat->inp_dependladdr.inp6_local =
2358 inp->inp_dependladdr.inp6_local;
2359 inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
2360 inp_compat->inp_depend6.inp6_hlim = 0;
2361 inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
2362 inp_compat->inp_depend6.inp6_ifindex = 0;
2363 inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
2364 }
2365
2366 void
2367 inpcb_to_xinpcb64(struct inpcb *inp, struct xinpcb64 *xinp)
2368 {
2369 xinp->inp_fport = inp->inp_fport;
2370 xinp->inp_lport = inp->inp_lport;
2371 xinp->inp_gencnt = inp->inp_gencnt;
2372 xinp->inp_flags = inp->inp_flags;
2373 xinp->inp_flow = inp->inp_flow;
2374 xinp->inp_vflag = inp->inp_vflag;
2375 xinp->inp_ip_ttl = inp->inp_ip_ttl;
2376 xinp->inp_ip_p = inp->inp_ip_p;
2377 xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign;
2378 xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local;
2379 xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
2380 xinp->inp_depend6.inp6_hlim = 0;
2381 xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
2382 xinp->inp_depend6.inp6_ifindex = 0;
2383 xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
2384 }
2385
2386 /*
2387 * The following routines implement this scheme:
2388 *
2389 * Callers of ip_output() that intend to cache the route in the inpcb pass
2390 * a local copy of the struct route to ip_output(). Using a local copy of
2391 * the cached route significantly simplifies things as IP no longer has to
2392 * worry about having exclusive access to the passed in struct route, since
2393 * it's defined in the caller's stack; in essence, this allows for a lock-
2394 * less operation when updating the struct route at the IP level and below,
2395 * whenever necessary. The scheme works as follows:
2396 *
2397 * Prior to dropping the socket's lock and calling ip_output(), the caller
2398 * copies the struct route from the inpcb into its stack, and adds a reference
2399 * to the cached route entry, if there was any. The socket's lock is then
2400 * dropped and ip_output() is called with a pointer to the copy of struct
2401 * route defined on the stack (not to the one in the inpcb.)
2402 *
2403 * Upon returning from ip_output(), the caller then acquires the socket's
2404 * lock and synchronizes the cache; if there is no route cached in the inpcb,
2405 * it copies the local copy of struct route (which may or may not contain any
2406 * route) back into the cache; otherwise, if the inpcb has a route cached in
2407 * it, the one in the local copy will be freed, if there's any. Trashing the
2408 * cached route in the inpcb can be avoided because ip_output() is single-
2409 * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized
2410 * by the socket/transport layer.)
2411 */
2412 void
2413 inp_route_copyout(struct inpcb *inp, struct route *dst)
2414 {
2415 struct route *src = &inp->inp_route;
2416
2417 lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
2418
2419 /*
2420 * If the route in the PCB is stale or not for IPv4, blow it away;
2421 * this is possible in the case of IPv4-mapped address case.
2422 */
2423 if (ROUTE_UNUSABLE(src) || rt_key(src->ro_rt)->sa_family != AF_INET)
2424 ROUTE_RELEASE(src);
2425
2426 route_copyout(dst, src, sizeof (*dst));
2427 }
2428
2429 void
2430 inp_route_copyin(struct inpcb *inp, struct route *src)
2431 {
2432 struct route *dst = &inp->inp_route;
2433
2434 lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
2435
2436 /* Minor sanity check */
2437 if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET)
2438 panic("%s: wrong or corrupted route: %p", __func__, src);
2439
2440 route_copyin(src, dst, sizeof (*src));
2441 }
2442
2443 /*
2444 * Handler for setting IP_FORCE_OUT_IFP/IP_BOUND_IF/IPV6_BOUND_IF socket option.
2445 */
2446 int
2447 inp_bindif(struct inpcb *inp, unsigned int ifscope, struct ifnet **pifp)
2448 {
2449 struct ifnet *ifp = NULL;
2450
2451 ifnet_head_lock_shared();
2452 if ((ifscope > (unsigned)if_index) || (ifscope != IFSCOPE_NONE &&
2453 (ifp = ifindex2ifnet[ifscope]) == NULL)) {
2454 ifnet_head_done();
2455 return (ENXIO);
2456 }
2457 ifnet_head_done();
2458
2459 VERIFY(ifp != NULL || ifscope == IFSCOPE_NONE);
2460
2461 /*
2462 * A zero interface scope value indicates an "unbind".
2463 * Otherwise, take in whatever value the app desires;
2464 * the app may already know the scope (or force itself
2465 * to such a scope) ahead of time before the interface
2466 * gets attached. It doesn't matter either way; any
2467 * route lookup from this point on will require an
2468 * exact match for the embedded interface scope.
2469 */
2470 inp->inp_boundifp = ifp;
2471 if (inp->inp_boundifp == NULL)
2472 inp->inp_flags &= ~INP_BOUND_IF;
2473 else
2474 inp->inp_flags |= INP_BOUND_IF;
2475
2476 /* Blow away any cached route in the PCB */
2477 ROUTE_RELEASE(&inp->inp_route);
2478
2479 if (pifp != NULL)
2480 *pifp = ifp;
2481
2482 return (0);
2483 }
2484
2485 /*
2486 * Handler for setting IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
2487 * as well as for setting PROC_UUID_NO_CELLULAR policy.
2488 */
2489 void
2490 inp_set_nocellular(struct inpcb *inp)
2491 {
2492 inp->inp_flags |= INP_NO_IFT_CELLULAR;
2493
2494 /* Blow away any cached route in the PCB */
2495 ROUTE_RELEASE(&inp->inp_route);
2496 }
2497
2498 /*
2499 * Handler for clearing IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
2500 * as well as for clearing PROC_UUID_NO_CELLULAR policy.
2501 */
2502 void
2503 inp_clear_nocellular(struct inpcb *inp)
2504 {
2505 struct socket *so = inp->inp_socket;
2506
2507 /*
2508 * SO_RESTRICT_DENY_CELLULAR socket restriction issued on the socket
2509 * has a higher precendence than INP_NO_IFT_CELLULAR. Clear the flag
2510 * if and only if the socket is unrestricted.
2511 */
2512 if (so != NULL && !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) {
2513 inp->inp_flags &= ~INP_NO_IFT_CELLULAR;
2514
2515 /* Blow away any cached route in the PCB */
2516 ROUTE_RELEASE(&inp->inp_route);
2517 }
2518 }
2519
2520 void
2521 inp_set_noexpensive(struct inpcb *inp)
2522 {
2523 inp->inp_flags2 |= INP2_NO_IFF_EXPENSIVE;
2524
2525 /* Blow away any cached route in the PCB */
2526 ROUTE_RELEASE(&inp->inp_route);
2527 }
2528
2529 void
2530 inp_set_awdl_unrestricted(struct inpcb *inp)
2531 {
2532 inp->inp_flags2 |= INP2_AWDL_UNRESTRICTED;
2533
2534 /* Blow away any cached route in the PCB */
2535 ROUTE_RELEASE(&inp->inp_route);
2536 }
2537
2538 boolean_t
2539 inp_get_awdl_unrestricted(struct inpcb *inp)
2540 {
2541 return (inp->inp_flags2 & INP2_AWDL_UNRESTRICTED) ? TRUE : FALSE;
2542 }
2543
2544 void
2545 inp_clear_awdl_unrestricted(struct inpcb *inp)
2546 {
2547 inp->inp_flags2 &= ~INP2_AWDL_UNRESTRICTED;
2548
2549 /* Blow away any cached route in the PCB */
2550 ROUTE_RELEASE(&inp->inp_route);
2551 }
2552
2553 #if NECP
2554 /*
2555 * Called when PROC_UUID_NECP_APP_POLICY is set.
2556 */
2557 void
2558 inp_set_want_app_policy(struct inpcb *inp)
2559 {
2560 inp->inp_flags2 |= INP2_WANT_APP_POLICY;
2561 }
2562
2563 /*
2564 * Called when PROC_UUID_NECP_APP_POLICY is cleared.
2565 */
2566 void
2567 inp_clear_want_app_policy(struct inpcb *inp)
2568 {
2569 inp->inp_flags2 &= ~INP2_WANT_APP_POLICY;
2570 }
2571 #endif /* NECP */
2572
2573 /*
2574 * Calculate flow hash for an inp, used by an interface to identify a
2575 * flow. When an interface provides flow control advisory, this flow
2576 * hash is used as an identifier.
2577 */
2578 u_int32_t
2579 inp_calc_flowhash(struct inpcb *inp)
2580 {
2581 struct inp_flowhash_key fh __attribute__((aligned(8)));
2582 u_int32_t flowhash = 0;
2583 struct inpcb *tmp_inp = NULL;
2584
2585 if (inp_hash_seed == 0)
2586 inp_hash_seed = RandomULong();
2587
2588 bzero(&fh, sizeof (fh));
2589
2590 bcopy(&inp->inp_dependladdr, &fh.infh_laddr, sizeof (fh.infh_laddr));
2591 bcopy(&inp->inp_dependfaddr, &fh.infh_faddr, sizeof (fh.infh_faddr));
2592
2593 fh.infh_lport = inp->inp_lport;
2594 fh.infh_fport = inp->inp_fport;
2595 fh.infh_af = (inp->inp_vflag & INP_IPV6) ? AF_INET6 : AF_INET;
2596 fh.infh_proto = inp->inp_ip_p;
2597 fh.infh_rand1 = RandomULong();
2598 fh.infh_rand2 = RandomULong();
2599
2600 try_again:
2601 flowhash = net_flowhash(&fh, sizeof (fh), inp_hash_seed);
2602 if (flowhash == 0) {
2603 /* try to get a non-zero flowhash */
2604 inp_hash_seed = RandomULong();
2605 goto try_again;
2606 }
2607
2608 inp->inp_flowhash = flowhash;
2609
2610 /* Insert the inp into inp_fc_tree */
2611 lck_mtx_lock_spin(&inp_fc_lck);
2612 tmp_inp = RB_FIND(inp_fc_tree, &inp_fc_tree, inp);
2613 if (tmp_inp != NULL) {
2614 /*
2615 * There is a different inp with the same flowhash.
2616 * There can be a collision on flow hash but the
2617 * probability is low. Let's recompute the
2618 * flowhash.
2619 */
2620 lck_mtx_unlock(&inp_fc_lck);
2621 /* recompute hash seed */
2622 inp_hash_seed = RandomULong();
2623 goto try_again;
2624 }
2625
2626 RB_INSERT(inp_fc_tree, &inp_fc_tree, inp);
2627 inp->inp_flags2 |= INP2_IN_FCTREE;
2628 lck_mtx_unlock(&inp_fc_lck);
2629
2630 return (flowhash);
2631 }
2632
2633 void
2634 inp_flowadv(uint32_t flowhash)
2635 {
2636 struct inpcb *inp;
2637
2638 inp = inp_fc_getinp(flowhash, 0);
2639
2640 if (inp == NULL)
2641 return;
2642 inp_fc_feedback(inp);
2643 }
2644
2645 /*
2646 * Function to compare inp_fc_entries in inp flow control tree
2647 */
2648 static inline int
2649 infc_cmp(const struct inpcb *inp1, const struct inpcb *inp2)
2650 {
2651 return (memcmp(&(inp1->inp_flowhash), &(inp2->inp_flowhash),
2652 sizeof(inp1->inp_flowhash)));
2653 }
2654
2655 static struct inpcb *
2656 inp_fc_getinp(u_int32_t flowhash, u_int32_t flags)
2657 {
2658 struct inpcb *inp = NULL;
2659 int locked = (flags & INPFC_SOLOCKED) ? 1 : 0;
2660
2661 lck_mtx_lock_spin(&inp_fc_lck);
2662 key_inp.inp_flowhash = flowhash;
2663 inp = RB_FIND(inp_fc_tree, &inp_fc_tree, &key_inp);
2664 if (inp == NULL) {
2665 /* inp is not present, return */
2666 lck_mtx_unlock(&inp_fc_lck);
2667 return (NULL);
2668 }
2669
2670 if (flags & INPFC_REMOVE) {
2671 RB_REMOVE(inp_fc_tree, &inp_fc_tree, inp);
2672 lck_mtx_unlock(&inp_fc_lck);
2673
2674 bzero(&(inp->infc_link), sizeof (inp->infc_link));
2675 inp->inp_flags2 &= ~INP2_IN_FCTREE;
2676 return (NULL);
2677 }
2678
2679 if (in_pcb_checkstate(inp, WNT_ACQUIRE, locked) == WNT_STOPUSING)
2680 inp = NULL;
2681 lck_mtx_unlock(&inp_fc_lck);
2682
2683 return (inp);
2684 }
2685
2686 static void
2687 inp_fc_feedback(struct inpcb *inp)
2688 {
2689 struct socket *so = inp->inp_socket;
2690
2691 /* we already hold a want_cnt on this inp, socket can't be null */
2692 VERIFY(so != NULL);
2693 socket_lock(so, 1);
2694
2695 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
2696 socket_unlock(so, 1);
2697 return;
2698 }
2699
2700 if (inp->inp_sndinprog_cnt > 0)
2701 inp->inp_flags |= INP_FC_FEEDBACK;
2702
2703 /*
2704 * Return if the connection is not in flow-controlled state.
2705 * This can happen if the connection experienced
2706 * loss while it was in flow controlled state
2707 */
2708 if (!INP_WAIT_FOR_IF_FEEDBACK(inp)) {
2709 socket_unlock(so, 1);
2710 return;
2711 }
2712 inp_reset_fc_state(inp);
2713
2714 if (SOCK_TYPE(so) == SOCK_STREAM)
2715 inp_fc_unthrottle_tcp(inp);
2716
2717 socket_unlock(so, 1);
2718 }
2719
2720 void
2721 inp_reset_fc_state(struct inpcb *inp)
2722 {
2723 struct socket *so = inp->inp_socket;
2724 int suspended = (INP_IS_FLOW_SUSPENDED(inp)) ? 1 : 0;
2725 int needwakeup = (INP_WAIT_FOR_IF_FEEDBACK(inp)) ? 1 : 0;
2726
2727 inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
2728
2729 if (suspended) {
2730 so->so_flags &= ~(SOF_SUSPENDED);
2731 soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_RESUME));
2732 }
2733
2734 /* Give a write wakeup to unblock the socket */
2735 if (needwakeup)
2736 sowwakeup(so);
2737 }
2738
2739 int
2740 inp_set_fc_state(struct inpcb *inp, int advcode)
2741 {
2742 struct inpcb *tmp_inp = NULL;
2743 /*
2744 * If there was a feedback from the interface when
2745 * send operation was in progress, we should ignore
2746 * this flow advisory to avoid a race between setting
2747 * flow controlled state and receiving feedback from
2748 * the interface
2749 */
2750 if (inp->inp_flags & INP_FC_FEEDBACK)
2751 return (0);
2752
2753 inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
2754 if ((tmp_inp = inp_fc_getinp(inp->inp_flowhash,
2755 INPFC_SOLOCKED)) != NULL) {
2756 if (in_pcb_checkstate(tmp_inp, WNT_RELEASE, 1) == WNT_STOPUSING)
2757 return (0);
2758 VERIFY(tmp_inp == inp);
2759 switch (advcode) {
2760 case FADV_FLOW_CONTROLLED:
2761 inp->inp_flags |= INP_FLOW_CONTROLLED;
2762 break;
2763 case FADV_SUSPENDED:
2764 inp->inp_flags |= INP_FLOW_SUSPENDED;
2765 soevent(inp->inp_socket,
2766 (SO_FILT_HINT_LOCKED | SO_FILT_HINT_SUSPEND));
2767
2768 /* Record the fact that suspend event was sent */
2769 inp->inp_socket->so_flags |= SOF_SUSPENDED;
2770 break;
2771 }
2772 return (1);
2773 }
2774 return (0);
2775 }
2776
2777 /*
2778 * Handler for SO_FLUSH socket option.
2779 */
2780 int
2781 inp_flush(struct inpcb *inp, int optval)
2782 {
2783 u_int32_t flowhash = inp->inp_flowhash;
2784 struct ifnet *rtifp, *oifp;
2785
2786 /* Either all classes or one of the valid ones */
2787 if (optval != SO_TC_ALL && !SO_VALID_TC(optval))
2788 return (EINVAL);
2789
2790 /* We need a flow hash for identification */
2791 if (flowhash == 0)
2792 return (0);
2793
2794 /* Grab the interfaces from the route and pcb */
2795 rtifp = ((inp->inp_route.ro_rt != NULL) ?
2796 inp->inp_route.ro_rt->rt_ifp : NULL);
2797 oifp = inp->inp_last_outifp;
2798
2799 if (rtifp != NULL)
2800 if_qflush_sc(rtifp, so_tc2msc(optval), flowhash, NULL, NULL, 0);
2801 if (oifp != NULL && oifp != rtifp)
2802 if_qflush_sc(oifp, so_tc2msc(optval), flowhash, NULL, NULL, 0);
2803
2804 return (0);
2805 }
2806
2807 /*
2808 * Clear the INP_INADDR_ANY flag (special case for PPP only)
2809 */
2810 void
2811 inp_clear_INP_INADDR_ANY(struct socket *so)
2812 {
2813 struct inpcb *inp = NULL;
2814
2815 socket_lock(so, 1);
2816 inp = sotoinpcb(so);
2817 if (inp) {
2818 inp->inp_flags &= ~INP_INADDR_ANY;
2819 }
2820 socket_unlock(so, 1);
2821 }
2822
2823 void
2824 inp_get_soprocinfo(struct inpcb *inp, struct so_procinfo *soprocinfo)
2825 {
2826 struct socket *so = inp->inp_socket;
2827
2828 soprocinfo->spi_pid = so->last_pid;
2829 if (so->last_pid != 0)
2830 uuid_copy(soprocinfo->spi_uuid, so->last_uuid);
2831 /*
2832 * When not delegated, the effective pid is the same as the real pid
2833 */
2834 if (so->so_flags & SOF_DELEGATED) {
2835 soprocinfo->spi_delegated = 1;
2836 soprocinfo->spi_epid = so->e_pid;
2837 uuid_copy(soprocinfo->spi_euuid, so->e_uuid);
2838 } else {
2839 soprocinfo->spi_delegated = 0;
2840 soprocinfo->spi_epid = so->last_pid;
2841 }
2842 }
2843
2844 int
2845 inp_findinpcb_procinfo(struct inpcbinfo *pcbinfo, uint32_t flowhash,
2846 struct so_procinfo *soprocinfo)
2847 {
2848 struct inpcb *inp = NULL;
2849 int found = 0;
2850
2851 bzero(soprocinfo, sizeof (struct so_procinfo));
2852
2853 if (!flowhash)
2854 return (-1);
2855
2856 lck_rw_lock_shared(pcbinfo->ipi_lock);
2857 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
2858 if (inp->inp_state != INPCB_STATE_DEAD &&
2859 inp->inp_socket != NULL &&
2860 inp->inp_flowhash == flowhash) {
2861 found = 1;
2862 inp_get_soprocinfo(inp, soprocinfo);
2863 break;
2864 }
2865 }
2866 lck_rw_done(pcbinfo->ipi_lock);
2867
2868 return (found);
2869 }
2870
2871 #if CONFIG_PROC_UUID_POLICY
2872 static void
2873 inp_update_cellular_policy(struct inpcb *inp, boolean_t set)
2874 {
2875 struct socket *so = inp->inp_socket;
2876 int before, after;
2877
2878 VERIFY(so != NULL);
2879 VERIFY(inp->inp_state != INPCB_STATE_DEAD);
2880
2881 before = INP_NO_CELLULAR(inp);
2882 if (set) {
2883 inp_set_nocellular(inp);
2884 } else {
2885 inp_clear_nocellular(inp);
2886 }
2887 after = INP_NO_CELLULAR(inp);
2888 if (net_io_policy_log && (before != after)) {
2889 static const char *ok = "OK";
2890 static const char *nok = "NOACCESS";
2891 uuid_string_t euuid_buf;
2892 pid_t epid;
2893
2894 if (so->so_flags & SOF_DELEGATED) {
2895 uuid_unparse(so->e_uuid, euuid_buf);
2896 epid = so->e_pid;
2897 } else {
2898 uuid_unparse(so->last_uuid, euuid_buf);
2899 epid = so->last_pid;
2900 }
2901
2902 /* allow this socket to generate another notification event */
2903 so->so_ifdenied_notifies = 0;
2904
2905 log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d "
2906 "euuid %s%s %s->%s\n", __func__,
2907 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
2908 SOCK_TYPE(so), epid, euuid_buf,
2909 (so->so_flags & SOF_DELEGATED) ?
2910 " [delegated]" : "",
2911 ((before < after) ? ok : nok),
2912 ((before < after) ? nok : ok));
2913 }
2914 }
2915
2916 #if NECP
2917 static void
2918 inp_update_necp_want_app_policy(struct inpcb *inp, boolean_t set)
2919 {
2920 struct socket *so = inp->inp_socket;
2921 int before, after;
2922
2923 VERIFY(so != NULL);
2924 VERIFY(inp->inp_state != INPCB_STATE_DEAD);
2925
2926 before = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
2927 if (set) {
2928 inp_set_want_app_policy(inp);
2929 } else {
2930 inp_clear_want_app_policy(inp);
2931 }
2932 after = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
2933 if (net_io_policy_log && (before != after)) {
2934 static const char *wanted = "WANTED";
2935 static const char *unwanted = "UNWANTED";
2936 uuid_string_t euuid_buf;
2937 pid_t epid;
2938
2939 if (so->so_flags & SOF_DELEGATED) {
2940 uuid_unparse(so->e_uuid, euuid_buf);
2941 epid = so->e_pid;
2942 } else {
2943 uuid_unparse(so->last_uuid, euuid_buf);
2944 epid = so->last_pid;
2945 }
2946
2947 log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d "
2948 "euuid %s%s %s->%s\n", __func__,
2949 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
2950 SOCK_TYPE(so), epid, euuid_buf,
2951 (so->so_flags & SOF_DELEGATED) ?
2952 " [delegated]" : "",
2953 ((before < after) ? unwanted : wanted),
2954 ((before < after) ? wanted : unwanted));
2955 }
2956 }
2957 #endif /* NECP */
2958 #endif /* !CONFIG_PROC_UUID_POLICY */
2959
2960 #if NECP
2961 void
2962 inp_update_necp_policy(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int override_bound_interface)
2963 {
2964 necp_socket_find_policy_match(inp, override_local_addr, override_remote_addr, override_bound_interface);
2965 if (necp_socket_should_rescope(inp) &&
2966 inp->inp_lport == 0 &&
2967 inp->inp_laddr.s_addr == INADDR_ANY &&
2968 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
2969 // If we should rescope, and the socket is not yet bound
2970 inp_bindif(inp, necp_socket_get_rescope_if_index(inp), NULL);
2971 }
2972 }
2973 #endif /* NECP */
2974
2975 int
2976 inp_update_policy(struct inpcb *inp)
2977 {
2978 #if CONFIG_PROC_UUID_POLICY
2979 struct socket *so = inp->inp_socket;
2980 uint32_t pflags = 0;
2981 int32_t ogencnt;
2982 int err = 0;
2983
2984 if (!net_io_policy_uuid ||
2985 so == NULL || inp->inp_state == INPCB_STATE_DEAD)
2986 return (0);
2987
2988 /*
2989 * Kernel-created sockets that aren't delegating other sockets
2990 * are currently exempted from UUID policy checks.
2991 */
2992 if (so->last_pid == 0 && !(so->so_flags & SOF_DELEGATED))
2993 return (0);
2994
2995 ogencnt = so->so_policy_gencnt;
2996 err = proc_uuid_policy_lookup(((so->so_flags & SOF_DELEGATED) ?
2997 so->e_uuid : so->last_uuid), &pflags, &so->so_policy_gencnt);
2998
2999 /*
3000 * Discard cached generation count if the entry is gone (ENOENT),
3001 * so that we go thru the checks below.
3002 */
3003 if (err == ENOENT && ogencnt != 0)
3004 so->so_policy_gencnt = 0;
3005
3006 /*
3007 * If the generation count has changed, inspect the policy flags
3008 * and act accordingly. If a policy flag was previously set and
3009 * the UUID is no longer present in the table (ENOENT), treat it
3010 * as if the flag has been cleared.
3011 */
3012 if ((err == 0 || err == ENOENT) && ogencnt != so->so_policy_gencnt) {
3013 /* update cellular policy for this socket */
3014 if (err == 0 && (pflags & PROC_UUID_NO_CELLULAR)) {
3015 inp_update_cellular_policy(inp, TRUE);
3016 } else if (!(pflags & PROC_UUID_NO_CELLULAR)) {
3017 inp_update_cellular_policy(inp, FALSE);
3018 }
3019 #if NECP
3020 /* update necp want app policy for this socket */
3021 if (err == 0 && (pflags & PROC_UUID_NECP_APP_POLICY)) {
3022 inp_update_necp_want_app_policy(inp, TRUE);
3023 } else if (!(pflags & PROC_UUID_NECP_APP_POLICY)) {
3024 inp_update_necp_want_app_policy(inp, FALSE);
3025 }
3026 #endif /* NECP */
3027 }
3028
3029 return ((err == ENOENT) ? 0 : err);
3030 #else /* !CONFIG_PROC_UUID_POLICY */
3031 #pragma unused(inp)
3032 return (0);
3033 #endif /* !CONFIG_PROC_UUID_POLICY */
3034 }
3035 /*
3036 * Called when we need to enforce policy restrictions in the input path.
3037 *
3038 * Returns TRUE if we're not allowed to receive data, otherwise FALSE.
3039 */
3040 boolean_t
3041 inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp)
3042 {
3043 VERIFY(inp != NULL);
3044
3045 /*
3046 * Inbound restrictions.
3047 */
3048 if (!sorestrictrecv)
3049 return (FALSE);
3050
3051 if (ifp == NULL)
3052 return (FALSE);
3053
3054 if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp))
3055 return (TRUE);
3056
3057 if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp))
3058 return (TRUE);
3059
3060 if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp))
3061 return (TRUE);
3062
3063 if (!(ifp->if_eflags & IFEF_RESTRICTED_RECV))
3064 return (FALSE);
3065
3066 if (inp->inp_flags & INP_RECV_ANYIF)
3067 return (FALSE);
3068
3069 if ((inp->inp_flags & INP_BOUND_IF) && inp->inp_boundifp == ifp)
3070 return (FALSE);
3071
3072 return (TRUE);
3073 }
3074
3075 /*
3076 * Called when we need to enforce policy restrictions in the output path.
3077 *
3078 * Returns TRUE if we're not allowed to send data out, otherwise FALSE.
3079 */
3080 boolean_t
3081 inp_restricted_send(struct inpcb *inp, struct ifnet *ifp)
3082 {
3083 VERIFY(inp != NULL);
3084
3085 /*
3086 * Outbound restrictions.
3087 */
3088 if (!sorestrictsend)
3089 return (FALSE);
3090
3091 if (ifp == NULL)
3092 return (FALSE);
3093
3094 if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp))
3095 return (TRUE);
3096
3097 if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp))
3098 return (TRUE);
3099
3100 if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp))
3101 return (TRUE);
3102
3103 return (FALSE);
3104 }