]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/in_pcb.c
xnu-3248.50.21.tar.gz
[apple/xnu.git] / bsd / netinet / in_pcb.c
1 /*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1991, 1993, 1995
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95
61 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $
62 */
63
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/malloc.h>
67 #include <sys/mbuf.h>
68 #include <sys/domain.h>
69 #include <sys/protosw.h>
70 #include <sys/socket.h>
71 #include <sys/socketvar.h>
72 #include <sys/proc.h>
73 #include <sys/kernel.h>
74 #include <sys/sysctl.h>
75 #include <sys/mcache.h>
76 #include <sys/kauth.h>
77 #include <sys/priv.h>
78 #include <sys/proc_uuid_policy.h>
79 #include <sys/syslog.h>
80 #include <sys/priv.h>
81
82 #include <libkern/OSAtomic.h>
83 #include <kern/locks.h>
84
85 #include <machine/limits.h>
86
87 #include <kern/zalloc.h>
88
89 #include <net/if.h>
90 #include <net/if_types.h>
91 #include <net/route.h>
92 #include <net/flowhash.h>
93 #include <net/flowadv.h>
94 #include <net/ntstat.h>
95
96 #include <netinet/in.h>
97 #include <netinet/in_pcb.h>
98 #include <netinet/in_var.h>
99 #include <netinet/ip_var.h>
100 #if INET6
101 #include <netinet/ip6.h>
102 #include <netinet6/ip6_var.h>
103 #endif /* INET6 */
104
105 #include <sys/kdebug.h>
106 #include <sys/random.h>
107
108 #include <dev/random/randomdev.h>
109 #include <mach/boolean.h>
110
111 #if NECP
112 #include <net/necp.h>
113 #endif
114
115 static lck_grp_t *inpcb_lock_grp;
116 static lck_attr_t *inpcb_lock_attr;
117 static lck_grp_attr_t *inpcb_lock_grp_attr;
118 decl_lck_mtx_data(static, inpcb_lock); /* global INPCB lock */
119 decl_lck_mtx_data(static, inpcb_timeout_lock);
120
121 static TAILQ_HEAD(, inpcbinfo) inpcb_head = TAILQ_HEAD_INITIALIZER(inpcb_head);
122
123 static u_int16_t inpcb_timeout_run = 0; /* INPCB timer is scheduled to run */
124 static boolean_t inpcb_garbage_collecting = FALSE; /* gc timer is scheduled */
125 static boolean_t inpcb_ticking = FALSE; /* "slow" timer is scheduled */
126 static boolean_t inpcb_fast_timer_on = FALSE;
127
128 /*
129 * If the total number of gc reqs is above a threshold, schedule
130 * garbage collect timer sooner
131 */
132 static boolean_t inpcb_toomany_gcreq = FALSE;
133
134 #define INPCB_GCREQ_THRESHOLD 50000
135 #define INPCB_TOOMANY_GCREQ_TIMER (hz/10) /* 10 times a second */
136
137 static void inpcb_sched_timeout(struct timeval *);
138 static void inpcb_timeout(void *);
139 int inpcb_timeout_lazy = 10; /* 10 seconds leeway for lazy timers */
140 extern int tvtohz(struct timeval *);
141
142 #if CONFIG_PROC_UUID_POLICY
143 static void inp_update_cellular_policy(struct inpcb *, boolean_t);
144 #if NECP
145 static void inp_update_necp_want_app_policy(struct inpcb *, boolean_t);
146 #endif /* NECP */
147 #endif /* !CONFIG_PROC_UUID_POLICY */
148
149 #define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8))
150 #define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1))
151
152 /*
153 * These configure the range of local port addresses assigned to
154 * "unspecified" outgoing connections/packets/whatever.
155 */
156 int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */
157 int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */
158 int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
159 int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */
160 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
161 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */
162
163 #define RANGECHK(var, min, max) \
164 if ((var) < (min)) { (var) = (min); } \
165 else if ((var) > (max)) { (var) = (max); }
166
167 static int
168 sysctl_net_ipport_check SYSCTL_HANDLER_ARGS
169 {
170 #pragma unused(arg1, arg2)
171 int error;
172
173 error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
174 if (!error) {
175 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
176 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
177 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX);
178 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX);
179 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX);
180 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX);
181 }
182 return (error);
183 }
184
185 #undef RANGECHK
186
187 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange,
188 CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IP Ports");
189
190 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst,
191 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
192 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
193 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast,
194 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
195 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
196 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first,
197 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
198 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
199 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last,
200 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
201 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
202 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst,
203 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
204 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
205 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast,
206 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
207 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
208
209 extern int udp_use_randomport;
210 extern int tcp_use_randomport;
211
212 /* Structs used for flowhash computation */
213 struct inp_flowhash_key_addr {
214 union {
215 struct in_addr v4;
216 struct in6_addr v6;
217 u_int8_t addr8[16];
218 u_int16_t addr16[8];
219 u_int32_t addr32[4];
220 } infha;
221 };
222
223 struct inp_flowhash_key {
224 struct inp_flowhash_key_addr infh_laddr;
225 struct inp_flowhash_key_addr infh_faddr;
226 u_int32_t infh_lport;
227 u_int32_t infh_fport;
228 u_int32_t infh_af;
229 u_int32_t infh_proto;
230 u_int32_t infh_rand1;
231 u_int32_t infh_rand2;
232 };
233
234 static u_int32_t inp_hash_seed = 0;
235
236 static int infc_cmp(const struct inpcb *, const struct inpcb *);
237
238 /* Flags used by inp_fc_getinp */
239 #define INPFC_SOLOCKED 0x1
240 #define INPFC_REMOVE 0x2
241 static struct inpcb *inp_fc_getinp(u_int32_t, u_int32_t);
242
243 static void inp_fc_feedback(struct inpcb *);
244 extern void tcp_remove_from_time_wait(struct inpcb *inp);
245
246 decl_lck_mtx_data(static, inp_fc_lck);
247
248 RB_HEAD(inp_fc_tree, inpcb) inp_fc_tree;
249 RB_PROTOTYPE(inp_fc_tree, inpcb, infc_link, infc_cmp);
250 RB_GENERATE(inp_fc_tree, inpcb, infc_link, infc_cmp);
251
252 /*
253 * Use this inp as a key to find an inp in the flowhash tree.
254 * Accesses to it are protected by inp_fc_lck.
255 */
256 struct inpcb key_inp;
257
258 /*
259 * in_pcb.c: manage the Protocol Control Blocks.
260 */
261
262 void
263 in_pcbinit(void)
264 {
265 static int inpcb_initialized = 0;
266
267 VERIFY(!inpcb_initialized);
268 inpcb_initialized = 1;
269
270 inpcb_lock_grp_attr = lck_grp_attr_alloc_init();
271 inpcb_lock_grp = lck_grp_alloc_init("inpcb", inpcb_lock_grp_attr);
272 inpcb_lock_attr = lck_attr_alloc_init();
273 lck_mtx_init(&inpcb_lock, inpcb_lock_grp, inpcb_lock_attr);
274 lck_mtx_init(&inpcb_timeout_lock, inpcb_lock_grp, inpcb_lock_attr);
275
276 /*
277 * Initialize data structures required to deliver
278 * flow advisories.
279 */
280 lck_mtx_init(&inp_fc_lck, inpcb_lock_grp, inpcb_lock_attr);
281 lck_mtx_lock(&inp_fc_lck);
282 RB_INIT(&inp_fc_tree);
283 bzero(&key_inp, sizeof(key_inp));
284 lck_mtx_unlock(&inp_fc_lck);
285 }
286
287 #define INPCB_HAVE_TIMER_REQ(req) (((req).intimer_lazy > 0) || \
288 ((req).intimer_fast > 0) || ((req).intimer_nodelay > 0))
289 static void
290 inpcb_timeout(void *arg)
291 {
292 #pragma unused(arg)
293 struct inpcbinfo *ipi;
294 boolean_t t, gc;
295 struct intimercount gccnt, tmcnt;
296 struct timeval leeway;
297 boolean_t toomany_gc = FALSE;
298
299 if (arg != NULL) {
300 VERIFY(arg == &inpcb_toomany_gcreq);
301 toomany_gc = *(boolean_t *)arg;
302 }
303
304 /*
305 * Update coarse-grained networking timestamp (in sec.); the idea
306 * is to piggy-back on the timeout callout to update the counter
307 * returnable via net_uptime().
308 */
309 net_update_uptime();
310
311 bzero(&gccnt, sizeof(gccnt));
312 bzero(&tmcnt, sizeof(tmcnt));
313
314 lck_mtx_lock_spin(&inpcb_timeout_lock);
315 gc = inpcb_garbage_collecting;
316 inpcb_garbage_collecting = FALSE;
317
318 t = inpcb_ticking;
319 inpcb_ticking = FALSE;
320
321 if (gc || t) {
322 lck_mtx_unlock(&inpcb_timeout_lock);
323
324 lck_mtx_lock(&inpcb_lock);
325 TAILQ_FOREACH(ipi, &inpcb_head, ipi_entry) {
326 if (INPCB_HAVE_TIMER_REQ(ipi->ipi_gc_req)) {
327 bzero(&ipi->ipi_gc_req,
328 sizeof(ipi->ipi_gc_req));
329 if (gc && ipi->ipi_gc != NULL) {
330 ipi->ipi_gc(ipi);
331 gccnt.intimer_lazy +=
332 ipi->ipi_gc_req.intimer_lazy;
333 gccnt.intimer_fast +=
334 ipi->ipi_gc_req.intimer_fast;
335 gccnt.intimer_nodelay +=
336 ipi->ipi_gc_req.intimer_nodelay;
337 }
338 }
339 if (INPCB_HAVE_TIMER_REQ(ipi->ipi_timer_req)) {
340 bzero(&ipi->ipi_timer_req,
341 sizeof(ipi->ipi_timer_req));
342 if (t && ipi->ipi_timer != NULL) {
343 ipi->ipi_timer(ipi);
344 tmcnt.intimer_lazy +=
345 ipi->ipi_timer_req.intimer_lazy;
346 tmcnt.intimer_lazy +=
347 ipi->ipi_timer_req.intimer_fast;
348 tmcnt.intimer_nodelay +=
349 ipi->ipi_timer_req.intimer_nodelay;
350 }
351 }
352 }
353 lck_mtx_unlock(&inpcb_lock);
354 lck_mtx_lock_spin(&inpcb_timeout_lock);
355 }
356
357 /* lock was dropped above, so check first before overriding */
358 if (!inpcb_garbage_collecting)
359 inpcb_garbage_collecting = INPCB_HAVE_TIMER_REQ(gccnt);
360 if (!inpcb_ticking)
361 inpcb_ticking = INPCB_HAVE_TIMER_REQ(tmcnt);
362
363 /* re-arm the timer if there's work to do */
364 if (toomany_gc) {
365 inpcb_toomany_gcreq = FALSE;
366 } else {
367 inpcb_timeout_run--;
368 VERIFY(inpcb_timeout_run >= 0 && inpcb_timeout_run < 2);
369 }
370
371 bzero(&leeway, sizeof(leeway));
372 leeway.tv_sec = inpcb_timeout_lazy;
373 if (gccnt.intimer_nodelay > 0 || tmcnt.intimer_nodelay > 0)
374 inpcb_sched_timeout(NULL);
375 else if ((gccnt.intimer_fast + tmcnt.intimer_fast) <= 5)
376 /* be lazy when idle with little activity */
377 inpcb_sched_timeout(&leeway);
378 else
379 inpcb_sched_timeout(NULL);
380
381 lck_mtx_unlock(&inpcb_timeout_lock);
382 }
383
384 static void
385 inpcb_sched_timeout(struct timeval *leeway)
386 {
387 lck_mtx_assert(&inpcb_timeout_lock, LCK_MTX_ASSERT_OWNED);
388
389 if (inpcb_timeout_run == 0 &&
390 (inpcb_garbage_collecting || inpcb_ticking)) {
391 lck_mtx_convert_spin(&inpcb_timeout_lock);
392 inpcb_timeout_run++;
393 if (leeway == NULL) {
394 inpcb_fast_timer_on = TRUE;
395 timeout(inpcb_timeout, NULL, hz);
396 } else {
397 inpcb_fast_timer_on = FALSE;
398 timeout_with_leeway(inpcb_timeout, NULL, hz,
399 tvtohz(leeway));
400 }
401 } else if (inpcb_timeout_run == 1 &&
402 leeway == NULL && !inpcb_fast_timer_on) {
403 /*
404 * Since the request was for a fast timer but the
405 * scheduled timer is a lazy timer, try to schedule
406 * another instance of fast timer also
407 */
408 lck_mtx_convert_spin(&inpcb_timeout_lock);
409 inpcb_timeout_run++;
410 inpcb_fast_timer_on = TRUE;
411 timeout(inpcb_timeout, NULL, hz);
412 }
413 }
414
415 void
416 inpcb_gc_sched(struct inpcbinfo *ipi, u_int32_t type)
417 {
418 struct timeval leeway;
419 u_int32_t gccnt;
420 lck_mtx_lock_spin(&inpcb_timeout_lock);
421 inpcb_garbage_collecting = TRUE;
422
423 gccnt = ipi->ipi_gc_req.intimer_nodelay +
424 ipi->ipi_gc_req.intimer_fast;
425
426 if (gccnt > INPCB_GCREQ_THRESHOLD && !inpcb_toomany_gcreq) {
427 inpcb_toomany_gcreq = TRUE;
428
429 /*
430 * There are toomany pcbs waiting to be garbage collected,
431 * schedule a much faster timeout in addition to
432 * the caller's request
433 */
434 lck_mtx_convert_spin(&inpcb_timeout_lock);
435 timeout(inpcb_timeout, (void *)&inpcb_toomany_gcreq,
436 INPCB_TOOMANY_GCREQ_TIMER);
437 }
438
439 switch (type) {
440 case INPCB_TIMER_NODELAY:
441 atomic_add_32(&ipi->ipi_gc_req.intimer_nodelay, 1);
442 inpcb_sched_timeout(NULL);
443 break;
444 case INPCB_TIMER_FAST:
445 atomic_add_32(&ipi->ipi_gc_req.intimer_fast, 1);
446 inpcb_sched_timeout(NULL);
447 break;
448 default:
449 atomic_add_32(&ipi->ipi_gc_req.intimer_lazy, 1);
450 leeway.tv_sec = inpcb_timeout_lazy;
451 leeway.tv_usec = 0;
452 inpcb_sched_timeout(&leeway);
453 break;
454 }
455 lck_mtx_unlock(&inpcb_timeout_lock);
456 }
457
458 void
459 inpcb_timer_sched(struct inpcbinfo *ipi, u_int32_t type)
460 {
461 struct timeval leeway;
462 lck_mtx_lock_spin(&inpcb_timeout_lock);
463 inpcb_ticking = TRUE;
464 switch (type) {
465 case INPCB_TIMER_NODELAY:
466 atomic_add_32(&ipi->ipi_timer_req.intimer_nodelay, 1);
467 inpcb_sched_timeout(NULL);
468 break;
469 case INPCB_TIMER_FAST:
470 atomic_add_32(&ipi->ipi_timer_req.intimer_fast, 1);
471 inpcb_sched_timeout(NULL);
472 break;
473 default:
474 atomic_add_32(&ipi->ipi_timer_req.intimer_lazy, 1);
475 leeway.tv_sec = inpcb_timeout_lazy;
476 leeway.tv_usec = 0;
477 inpcb_sched_timeout(&leeway);
478 break;
479 }
480 lck_mtx_unlock(&inpcb_timeout_lock);
481 }
482
483 void
484 in_pcbinfo_attach(struct inpcbinfo *ipi)
485 {
486 struct inpcbinfo *ipi0;
487
488 lck_mtx_lock(&inpcb_lock);
489 TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) {
490 if (ipi0 == ipi) {
491 panic("%s: ipi %p already in the list\n",
492 __func__, ipi);
493 /* NOTREACHED */
494 }
495 }
496 TAILQ_INSERT_TAIL(&inpcb_head, ipi, ipi_entry);
497 lck_mtx_unlock(&inpcb_lock);
498 }
499
500 int
501 in_pcbinfo_detach(struct inpcbinfo *ipi)
502 {
503 struct inpcbinfo *ipi0;
504 int error = 0;
505
506 lck_mtx_lock(&inpcb_lock);
507 TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) {
508 if (ipi0 == ipi)
509 break;
510 }
511 if (ipi0 != NULL)
512 TAILQ_REMOVE(&inpcb_head, ipi0, ipi_entry);
513 else
514 error = ENXIO;
515 lck_mtx_unlock(&inpcb_lock);
516
517 return (error);
518 }
519
520 /*
521 * Allocate a PCB and associate it with the socket.
522 *
523 * Returns: 0 Success
524 * ENOBUFS
525 * ENOMEM
526 */
527 int
528 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p)
529 {
530 #pragma unused(p)
531 struct inpcb *inp;
532 caddr_t temp;
533 #if CONFIG_MACF_NET
534 int mac_error;
535 #endif /* CONFIG_MACF_NET */
536
537 if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) {
538 inp = (struct inpcb *)zalloc(pcbinfo->ipi_zone);
539 if (inp == NULL)
540 return (ENOBUFS);
541 bzero((caddr_t)inp, sizeof (*inp));
542 } else {
543 inp = (struct inpcb *)(void *)so->so_saved_pcb;
544 temp = inp->inp_saved_ppcb;
545 bzero((caddr_t)inp, sizeof (*inp));
546 inp->inp_saved_ppcb = temp;
547 }
548
549 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
550 inp->inp_pcbinfo = pcbinfo;
551 inp->inp_socket = so;
552 #if CONFIG_MACF_NET
553 mac_error = mac_inpcb_label_init(inp, M_WAITOK);
554 if (mac_error != 0) {
555 if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0)
556 zfree(pcbinfo->ipi_zone, inp);
557 return (mac_error);
558 }
559 mac_inpcb_label_associate(so, inp);
560 #endif /* CONFIG_MACF_NET */
561 /* make sure inp_stat is always 64-bit aligned */
562 inp->inp_stat = (struct inp_stat *)P2ROUNDUP(inp->inp_stat_store,
563 sizeof (u_int64_t));
564 if (((uintptr_t)inp->inp_stat - (uintptr_t)inp->inp_stat_store) +
565 sizeof (*inp->inp_stat) > sizeof (inp->inp_stat_store)) {
566 panic("%s: insufficient space to align inp_stat", __func__);
567 /* NOTREACHED */
568 }
569
570 /* make sure inp_cstat is always 64-bit aligned */
571 inp->inp_cstat = (struct inp_stat *)P2ROUNDUP(inp->inp_cstat_store,
572 sizeof (u_int64_t));
573 if (((uintptr_t)inp->inp_cstat - (uintptr_t)inp->inp_cstat_store) +
574 sizeof (*inp->inp_cstat) > sizeof (inp->inp_cstat_store)) {
575 panic("%s: insufficient space to align inp_cstat", __func__);
576 /* NOTREACHED */
577 }
578
579 /* make sure inp_wstat is always 64-bit aligned */
580 inp->inp_wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_wstat_store,
581 sizeof (u_int64_t));
582 if (((uintptr_t)inp->inp_wstat - (uintptr_t)inp->inp_wstat_store) +
583 sizeof (*inp->inp_wstat) > sizeof (inp->inp_wstat_store)) {
584 panic("%s: insufficient space to align inp_wstat", __func__);
585 /* NOTREACHED */
586 }
587
588 /* make sure inp_Wstat is always 64-bit aligned */
589 inp->inp_Wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_Wstat_store,
590 sizeof (u_int64_t));
591 if (((uintptr_t)inp->inp_Wstat - (uintptr_t)inp->inp_Wstat_store) +
592 sizeof (*inp->inp_Wstat) > sizeof (inp->inp_Wstat_store)) {
593 panic("%s: insufficient space to align inp_Wstat", __func__);
594 /* NOTREACHED */
595 }
596
597 so->so_pcb = (caddr_t)inp;
598
599 if (so->so_proto->pr_flags & PR_PCBLOCK) {
600 lck_mtx_init(&inp->inpcb_mtx, pcbinfo->ipi_lock_grp,
601 pcbinfo->ipi_lock_attr);
602 }
603
604 #if INET6
605 if (SOCK_DOM(so) == PF_INET6 && !ip6_mapped_addr_on)
606 inp->inp_flags |= IN6P_IPV6_V6ONLY;
607
608 if (ip6_auto_flowlabel)
609 inp->inp_flags |= IN6P_AUTOFLOWLABEL;
610 #endif /* INET6 */
611
612 (void) inp_update_policy(inp);
613
614 lck_rw_lock_exclusive(pcbinfo->ipi_lock);
615 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
616 LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
617 pcbinfo->ipi_count++;
618 lck_rw_done(pcbinfo->ipi_lock);
619 return (0);
620 }
621
622 /*
623 * in_pcblookup_local_and_cleanup does everything
624 * in_pcblookup_local does but it checks for a socket
625 * that's going away. Since we know that the lock is
626 * held read+write when this funciton is called, we
627 * can safely dispose of this socket like the slow
628 * timer would usually do and return NULL. This is
629 * great for bind.
630 */
631 struct inpcb *
632 in_pcblookup_local_and_cleanup(struct inpcbinfo *pcbinfo, struct in_addr laddr,
633 u_int lport_arg, int wild_okay)
634 {
635 struct inpcb *inp;
636
637 /* Perform normal lookup */
638 inp = in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay);
639
640 /* Check if we found a match but it's waiting to be disposed */
641 if (inp != NULL && inp->inp_wantcnt == WNT_STOPUSING) {
642 struct socket *so = inp->inp_socket;
643
644 lck_mtx_lock(&inp->inpcb_mtx);
645
646 if (so->so_usecount == 0) {
647 if (inp->inp_state != INPCB_STATE_DEAD)
648 in_pcbdetach(inp);
649 in_pcbdispose(inp); /* will unlock & destroy */
650 inp = NULL;
651 } else {
652 lck_mtx_unlock(&inp->inpcb_mtx);
653 }
654 }
655
656 return (inp);
657 }
658
659 static void
660 in_pcb_conflict_post_msg(u_int16_t port)
661 {
662 /*
663 * Radar 5523020 send a kernel event notification if a
664 * non-participating socket tries to bind the port a socket
665 * who has set SOF_NOTIFYCONFLICT owns.
666 */
667 struct kev_msg ev_msg;
668 struct kev_in_portinuse in_portinuse;
669
670 bzero(&in_portinuse, sizeof (struct kev_in_portinuse));
671 bzero(&ev_msg, sizeof (struct kev_msg));
672 in_portinuse.port = ntohs(port); /* port in host order */
673 in_portinuse.req_pid = proc_selfpid();
674 ev_msg.vendor_code = KEV_VENDOR_APPLE;
675 ev_msg.kev_class = KEV_NETWORK_CLASS;
676 ev_msg.kev_subclass = KEV_INET_SUBCLASS;
677 ev_msg.event_code = KEV_INET_PORTINUSE;
678 ev_msg.dv[0].data_ptr = &in_portinuse;
679 ev_msg.dv[0].data_length = sizeof (struct kev_in_portinuse);
680 ev_msg.dv[1].data_length = 0;
681 kev_post_msg(&ev_msg);
682 }
683
684 /*
685 * Bind an INPCB to an address and/or port. This routine should not alter
686 * the caller-supplied local address "nam".
687 *
688 * Returns: 0 Success
689 * EADDRNOTAVAIL Address not available.
690 * EINVAL Invalid argument
691 * EAFNOSUPPORT Address family not supported [notdef]
692 * EACCES Permission denied
693 * EADDRINUSE Address in use
694 * EAGAIN Resource unavailable, try again
695 * priv_check_cred:EPERM Operation not permitted
696 */
697 int
698 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
699 {
700 struct socket *so = inp->inp_socket;
701 unsigned short *lastport;
702 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
703 u_short lport = 0, rand_port = 0;
704 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
705 int error, randomport, conflict = 0;
706 boolean_t anonport = FALSE;
707 kauth_cred_t cred;
708 struct in_addr laddr;
709 struct ifnet *outif = NULL;
710
711 if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */
712 return (EADDRNOTAVAIL);
713 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY)
714 return (EINVAL);
715 if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT)))
716 wild = 1;
717
718 bzero(&laddr, sizeof(laddr));
719
720 socket_unlock(so, 0); /* keep reference on socket */
721 lck_rw_lock_exclusive(pcbinfo->ipi_lock);
722
723 if (nam != NULL) {
724
725 if (nam->sa_len != sizeof (struct sockaddr_in)) {
726 lck_rw_done(pcbinfo->ipi_lock);
727 socket_lock(so, 0);
728 return (EINVAL);
729 }
730 #if 0
731 /*
732 * We should check the family, but old programs
733 * incorrectly fail to initialize it.
734 */
735 if (nam->sa_family != AF_INET) {
736 lck_rw_done(pcbinfo->ipi_lock);
737 socket_lock(so, 0);
738 return (EAFNOSUPPORT);
739 }
740 #endif /* 0 */
741 lport = SIN(nam)->sin_port;
742
743 if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr))) {
744 /*
745 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
746 * allow complete duplication of binding if
747 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
748 * and a multicast address is bound on both
749 * new and duplicated sockets.
750 */
751 if (so->so_options & SO_REUSEADDR)
752 reuseport = SO_REUSEADDR|SO_REUSEPORT;
753 } else if (SIN(nam)->sin_addr.s_addr != INADDR_ANY) {
754 struct sockaddr_in sin;
755 struct ifaddr *ifa;
756
757 /* Sanitized for interface address searches */
758 bzero(&sin, sizeof (sin));
759 sin.sin_family = AF_INET;
760 sin.sin_len = sizeof (struct sockaddr_in);
761 sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
762
763 ifa = ifa_ifwithaddr(SA(&sin));
764 if (ifa == NULL) {
765 lck_rw_done(pcbinfo->ipi_lock);
766 socket_lock(so, 0);
767 return (EADDRNOTAVAIL);
768 } else {
769 /*
770 * Opportunistically determine the outbound
771 * interface that may be used; this may not
772 * hold true if we end up using a route
773 * going over a different interface, e.g.
774 * when sending to a local address. This
775 * will get updated again after sending.
776 */
777 IFA_LOCK(ifa);
778 outif = ifa->ifa_ifp;
779 IFA_UNLOCK(ifa);
780 IFA_REMREF(ifa);
781 }
782 }
783 if (lport != 0) {
784 struct inpcb *t;
785 uid_t u;
786
787 if (ntohs(lport) < IPPORT_RESERVED) {
788 cred = kauth_cred_proc_ref(p);
789 error = priv_check_cred(cred,
790 PRIV_NETINET_RESERVEDPORT, 0);
791 kauth_cred_unref(&cred);
792 if (error != 0) {
793 lck_rw_done(pcbinfo->ipi_lock);
794 socket_lock(so, 0);
795 return (EACCES);
796 }
797 }
798 if (!IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) &&
799 (u = kauth_cred_getuid(so->so_cred)) != 0 &&
800 (t = in_pcblookup_local_and_cleanup(
801 inp->inp_pcbinfo, SIN(nam)->sin_addr, lport,
802 INPLOOKUP_WILDCARD)) != NULL &&
803 (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
804 t->inp_laddr.s_addr != INADDR_ANY ||
805 !(t->inp_socket->so_options & SO_REUSEPORT)) &&
806 (u != kauth_cred_getuid(t->inp_socket->so_cred)) &&
807 !(t->inp_socket->so_flags & SOF_REUSESHAREUID) &&
808 (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
809 t->inp_laddr.s_addr != INADDR_ANY)) {
810 if ((t->inp_socket->so_flags &
811 SOF_NOTIFYCONFLICT) &&
812 !(so->so_flags & SOF_NOTIFYCONFLICT))
813 conflict = 1;
814
815 lck_rw_done(pcbinfo->ipi_lock);
816
817 if (conflict)
818 in_pcb_conflict_post_msg(lport);
819
820 socket_lock(so, 0);
821 return (EADDRINUSE);
822 }
823 t = in_pcblookup_local_and_cleanup(pcbinfo,
824 SIN(nam)->sin_addr, lport, wild);
825 if (t != NULL &&
826 (reuseport & t->inp_socket->so_options) == 0) {
827 #if INET6
828 if (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
829 t->inp_laddr.s_addr != INADDR_ANY ||
830 SOCK_DOM(so) != PF_INET6 ||
831 SOCK_DOM(t->inp_socket) != PF_INET6)
832 #endif /* INET6 */
833 {
834
835 if ((t->inp_socket->so_flags &
836 SOF_NOTIFYCONFLICT) &&
837 !(so->so_flags & SOF_NOTIFYCONFLICT))
838 conflict = 1;
839
840 lck_rw_done(pcbinfo->ipi_lock);
841
842 if (conflict)
843 in_pcb_conflict_post_msg(lport);
844 socket_lock(so, 0);
845 return (EADDRINUSE);
846 }
847 }
848 }
849 laddr = SIN(nam)->sin_addr;
850 }
851 if (lport == 0) {
852 u_short first, last;
853 int count;
854
855 randomport = (so->so_flags & SOF_BINDRANDOMPORT) ||
856 (so->so_type == SOCK_STREAM ? tcp_use_randomport :
857 udp_use_randomport);
858
859 /*
860 * Even though this looks similar to the code in
861 * in6_pcbsetport, the v6 vs v4 checks are different.
862 */
863 anonport = TRUE;
864 if (inp->inp_flags & INP_HIGHPORT) {
865 first = ipport_hifirstauto; /* sysctl */
866 last = ipport_hilastauto;
867 lastport = &pcbinfo->ipi_lasthi;
868 } else if (inp->inp_flags & INP_LOWPORT) {
869 cred = kauth_cred_proc_ref(p);
870 error = priv_check_cred(cred,
871 PRIV_NETINET_RESERVEDPORT, 0);
872 kauth_cred_unref(&cred);
873 if (error != 0) {
874 lck_rw_done(pcbinfo->ipi_lock);
875 socket_lock(so, 0);
876 return (error);
877 }
878 first = ipport_lowfirstauto; /* 1023 */
879 last = ipport_lowlastauto; /* 600 */
880 lastport = &pcbinfo->ipi_lastlow;
881 } else {
882 first = ipport_firstauto; /* sysctl */
883 last = ipport_lastauto;
884 lastport = &pcbinfo->ipi_lastport;
885 }
886 /* No point in randomizing if only one port is available */
887
888 if (first == last)
889 randomport = 0;
890 /*
891 * Simple check to ensure all ports are not used up causing
892 * a deadlock here.
893 *
894 * We split the two cases (up and down) so that the direction
895 * is not being tested on each round of the loop.
896 */
897 if (first > last) {
898 /*
899 * counting down
900 */
901 if (randomport) {
902 read_random(&rand_port, sizeof (rand_port));
903 *lastport =
904 first - (rand_port % (first - last));
905 }
906 count = first - last;
907
908 do {
909 if (count-- < 0) { /* completely used? */
910 lck_rw_done(pcbinfo->ipi_lock);
911 socket_lock(so, 0);
912 return (EADDRNOTAVAIL);
913 }
914 --*lastport;
915 if (*lastport > first || *lastport < last)
916 *lastport = first;
917 lport = htons(*lastport);
918 } while (in_pcblookup_local_and_cleanup(pcbinfo,
919 ((laddr.s_addr != INADDR_ANY) ? laddr :
920 inp->inp_laddr), lport, wild));
921 } else {
922 /*
923 * counting up
924 */
925 if (randomport) {
926 read_random(&rand_port, sizeof (rand_port));
927 *lastport =
928 first + (rand_port % (first - last));
929 }
930 count = last - first;
931
932 do {
933 if (count-- < 0) { /* completely used? */
934 lck_rw_done(pcbinfo->ipi_lock);
935 socket_lock(so, 0);
936 return (EADDRNOTAVAIL);
937 }
938 ++*lastport;
939 if (*lastport < first || *lastport > last)
940 *lastport = first;
941 lport = htons(*lastport);
942 } while (in_pcblookup_local_and_cleanup(pcbinfo,
943 ((laddr.s_addr != INADDR_ANY) ? laddr :
944 inp->inp_laddr), lport, wild));
945 }
946 }
947 socket_lock(so, 0);
948
949 /*
950 * We unlocked socket's protocol lock for a long time.
951 * The socket might have been dropped/defuncted.
952 * Checking if world has changed since.
953 */
954 if (inp->inp_state == INPCB_STATE_DEAD) {
955 lck_rw_done(pcbinfo->ipi_lock);
956 return (ECONNABORTED);
957 }
958
959 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) {
960 lck_rw_done(pcbinfo->ipi_lock);
961 return (EINVAL);
962 }
963
964 if (laddr.s_addr != INADDR_ANY) {
965 inp->inp_laddr = laddr;
966 inp->inp_last_outifp = outif;
967 }
968 inp->inp_lport = lport;
969 if (anonport)
970 inp->inp_flags |= INP_ANONPORT;
971
972 if (in_pcbinshash(inp, 1) != 0) {
973 inp->inp_laddr.s_addr = INADDR_ANY;
974 inp->inp_last_outifp = NULL;
975
976 inp->inp_lport = 0;
977 if (anonport)
978 inp->inp_flags &= ~INP_ANONPORT;
979 lck_rw_done(pcbinfo->ipi_lock);
980 return (EAGAIN);
981 }
982 lck_rw_done(pcbinfo->ipi_lock);
983 sflt_notify(so, sock_evt_bound, NULL);
984 return (0);
985 }
986
987 /*
988 * Transform old in_pcbconnect() into an inner subroutine for new
989 * in_pcbconnect(); do some validity-checking on the remote address
990 * (in "nam") and then determine local host address (i.e., which
991 * interface) to use to access that remote host.
992 *
993 * This routine may alter the caller-supplied remote address "nam".
994 *
995 * The caller may override the bound-to-interface setting of the socket
996 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
997 *
998 * This routine might return an ifp with a reference held if the caller
999 * provides a non-NULL outif, even in the error case. The caller is
1000 * responsible for releasing its reference.
1001 *
1002 * Returns: 0 Success
1003 * EINVAL Invalid argument
1004 * EAFNOSUPPORT Address family not supported
1005 * EADDRNOTAVAIL Address not available
1006 */
1007 int
1008 in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr,
1009 unsigned int ifscope, struct ifnet **outif)
1010 {
1011 struct route *ro = &inp->inp_route;
1012 struct in_ifaddr *ia = NULL;
1013 struct sockaddr_in sin;
1014 int error = 0;
1015 boolean_t restricted = FALSE;
1016
1017 if (outif != NULL)
1018 *outif = NULL;
1019 if (nam->sa_len != sizeof (struct sockaddr_in))
1020 return (EINVAL);
1021 if (SIN(nam)->sin_family != AF_INET)
1022 return (EAFNOSUPPORT);
1023 if (SIN(nam)->sin_port == 0)
1024 return (EADDRNOTAVAIL);
1025
1026 /*
1027 * If the destination address is INADDR_ANY,
1028 * use the primary local address.
1029 * If the supplied address is INADDR_BROADCAST,
1030 * and the primary interface supports broadcast,
1031 * choose the broadcast address for that interface.
1032 */
1033 if (SIN(nam)->sin_addr.s_addr == INADDR_ANY ||
1034 SIN(nam)->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST) {
1035 lck_rw_lock_shared(in_ifaddr_rwlock);
1036 if (!TAILQ_EMPTY(&in_ifaddrhead)) {
1037 ia = TAILQ_FIRST(&in_ifaddrhead);
1038 IFA_LOCK_SPIN(&ia->ia_ifa);
1039 if (SIN(nam)->sin_addr.s_addr == INADDR_ANY) {
1040 SIN(nam)->sin_addr = IA_SIN(ia)->sin_addr;
1041 } else if (ia->ia_ifp->if_flags & IFF_BROADCAST) {
1042 SIN(nam)->sin_addr =
1043 SIN(&ia->ia_broadaddr)->sin_addr;
1044 }
1045 IFA_UNLOCK(&ia->ia_ifa);
1046 ia = NULL;
1047 }
1048 lck_rw_done(in_ifaddr_rwlock);
1049 }
1050 /*
1051 * Otherwise, if the socket has already bound the source, just use it.
1052 */
1053 if (inp->inp_laddr.s_addr != INADDR_ANY) {
1054 VERIFY(ia == NULL);
1055 *laddr = inp->inp_laddr;
1056 return (0);
1057 }
1058
1059 /*
1060 * If the ifscope is specified by the caller (e.g. IP_PKTINFO)
1061 * then it overrides the sticky ifscope set for the socket.
1062 */
1063 if (ifscope == IFSCOPE_NONE && (inp->inp_flags & INP_BOUND_IF))
1064 ifscope = inp->inp_boundifp->if_index;
1065
1066 /*
1067 * If route is known or can be allocated now,
1068 * our src addr is taken from the i/f, else punt.
1069 * Note that we should check the address family of the cached
1070 * destination, in case of sharing the cache with IPv6.
1071 */
1072 if (ro->ro_rt != NULL)
1073 RT_LOCK_SPIN(ro->ro_rt);
1074 if (ROUTE_UNUSABLE(ro) || ro->ro_dst.sa_family != AF_INET ||
1075 SIN(&ro->ro_dst)->sin_addr.s_addr != SIN(nam)->sin_addr.s_addr ||
1076 (inp->inp_socket->so_options & SO_DONTROUTE)) {
1077 if (ro->ro_rt != NULL)
1078 RT_UNLOCK(ro->ro_rt);
1079 ROUTE_RELEASE(ro);
1080 }
1081 if (!(inp->inp_socket->so_options & SO_DONTROUTE) &&
1082 (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL)) {
1083 if (ro->ro_rt != NULL)
1084 RT_UNLOCK(ro->ro_rt);
1085 ROUTE_RELEASE(ro);
1086 /* No route yet, so try to acquire one */
1087 bzero(&ro->ro_dst, sizeof (struct sockaddr_in));
1088 ro->ro_dst.sa_family = AF_INET;
1089 ro->ro_dst.sa_len = sizeof (struct sockaddr_in);
1090 SIN(&ro->ro_dst)->sin_addr = SIN(nam)->sin_addr;
1091 rtalloc_scoped(ro, ifscope);
1092 if (ro->ro_rt != NULL)
1093 RT_LOCK_SPIN(ro->ro_rt);
1094 }
1095 /* Sanitized local copy for interface address searches */
1096 bzero(&sin, sizeof (sin));
1097 sin.sin_family = AF_INET;
1098 sin.sin_len = sizeof (struct sockaddr_in);
1099 sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
1100 /*
1101 * If we did not find (or use) a route, assume dest is reachable
1102 * on a directly connected network and try to find a corresponding
1103 * interface to take the source address from.
1104 */
1105 if (ro->ro_rt == NULL) {
1106 VERIFY(ia == NULL);
1107 ia = ifatoia(ifa_ifwithdstaddr(SA(&sin)));
1108 if (ia == NULL)
1109 ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope));
1110 error = ((ia == NULL) ? ENETUNREACH : 0);
1111 goto done;
1112 }
1113 RT_LOCK_ASSERT_HELD(ro->ro_rt);
1114 /*
1115 * If the outgoing interface on the route found is not
1116 * a loopback interface, use the address from that interface.
1117 */
1118 if (!(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) {
1119 VERIFY(ia == NULL);
1120 /*
1121 * If the route points to a cellular interface and the
1122 * caller forbids our using interfaces of such type,
1123 * pretend that there is no route.
1124 * Apply the same logic for expensive interfaces.
1125 */
1126 if (inp_restricted_send(inp, ro->ro_rt->rt_ifp)) {
1127 RT_UNLOCK(ro->ro_rt);
1128 ROUTE_RELEASE(ro);
1129 error = EHOSTUNREACH;
1130 restricted = TRUE;
1131 } else {
1132 /* Become a regular mutex */
1133 RT_CONVERT_LOCK(ro->ro_rt);
1134 ia = ifatoia(ro->ro_rt->rt_ifa);
1135 IFA_ADDREF(&ia->ia_ifa);
1136 RT_UNLOCK(ro->ro_rt);
1137 error = 0;
1138 }
1139 goto done;
1140 }
1141 VERIFY(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK);
1142 RT_UNLOCK(ro->ro_rt);
1143 /*
1144 * The outgoing interface is marked with 'loopback net', so a route
1145 * to ourselves is here.
1146 * Try to find the interface of the destination address and then
1147 * take the address from there. That interface is not necessarily
1148 * a loopback interface.
1149 */
1150 VERIFY(ia == NULL);
1151 ia = ifatoia(ifa_ifwithdstaddr(SA(&sin)));
1152 if (ia == NULL)
1153 ia = ifatoia(ifa_ifwithaddr_scoped(SA(&sin), ifscope));
1154 if (ia == NULL)
1155 ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope));
1156 if (ia == NULL) {
1157 RT_LOCK(ro->ro_rt);
1158 ia = ifatoia(ro->ro_rt->rt_ifa);
1159 if (ia != NULL)
1160 IFA_ADDREF(&ia->ia_ifa);
1161 RT_UNLOCK(ro->ro_rt);
1162 }
1163 error = ((ia == NULL) ? ENETUNREACH : 0);
1164
1165 done:
1166 /*
1167 * If the destination address is multicast and an outgoing
1168 * interface has been set as a multicast option, use the
1169 * address of that interface as our source address.
1170 */
1171 if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) &&
1172 inp->inp_moptions != NULL) {
1173 struct ip_moptions *imo;
1174 struct ifnet *ifp;
1175
1176 imo = inp->inp_moptions;
1177 IMO_LOCK(imo);
1178 if (imo->imo_multicast_ifp != NULL && (ia == NULL ||
1179 ia->ia_ifp != imo->imo_multicast_ifp)) {
1180 ifp = imo->imo_multicast_ifp;
1181 if (ia != NULL)
1182 IFA_REMREF(&ia->ia_ifa);
1183 lck_rw_lock_shared(in_ifaddr_rwlock);
1184 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
1185 if (ia->ia_ifp == ifp)
1186 break;
1187 }
1188 if (ia != NULL)
1189 IFA_ADDREF(&ia->ia_ifa);
1190 lck_rw_done(in_ifaddr_rwlock);
1191 if (ia == NULL)
1192 error = EADDRNOTAVAIL;
1193 else
1194 error = 0;
1195 }
1196 IMO_UNLOCK(imo);
1197 }
1198 /*
1199 * Don't do pcblookup call here; return interface in laddr
1200 * and exit to caller, that will do the lookup.
1201 */
1202 if (ia != NULL) {
1203 /*
1204 * If the source address belongs to a cellular interface
1205 * and the socket forbids our using interfaces of such
1206 * type, pretend that there is no source address.
1207 * Apply the same logic for expensive interfaces.
1208 */
1209 IFA_LOCK_SPIN(&ia->ia_ifa);
1210 if (inp_restricted_send(inp, ia->ia_ifa.ifa_ifp)) {
1211 IFA_UNLOCK(&ia->ia_ifa);
1212 error = EHOSTUNREACH;
1213 restricted = TRUE;
1214 } else if (error == 0) {
1215 *laddr = ia->ia_addr.sin_addr;
1216 if (outif != NULL) {
1217 struct ifnet *ifp;
1218
1219 if (ro->ro_rt != NULL)
1220 ifp = ro->ro_rt->rt_ifp;
1221 else
1222 ifp = ia->ia_ifp;
1223
1224 VERIFY(ifp != NULL);
1225 IFA_CONVERT_LOCK(&ia->ia_ifa);
1226 ifnet_reference(ifp); /* for caller */
1227 if (*outif != NULL)
1228 ifnet_release(*outif);
1229 *outif = ifp;
1230 }
1231 IFA_UNLOCK(&ia->ia_ifa);
1232 } else {
1233 IFA_UNLOCK(&ia->ia_ifa);
1234 }
1235 IFA_REMREF(&ia->ia_ifa);
1236 ia = NULL;
1237 }
1238
1239 if (restricted && error == EHOSTUNREACH) {
1240 soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED |
1241 SO_FILT_HINT_IFDENIED));
1242 }
1243
1244 return (error);
1245 }
1246
1247 /*
1248 * Outer subroutine:
1249 * Connect from a socket to a specified address.
1250 * Both address and port must be specified in argument sin.
1251 * If don't have a local address for this socket yet,
1252 * then pick one.
1253 *
1254 * The caller may override the bound-to-interface setting of the socket
1255 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
1256 */
1257 int
1258 in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p,
1259 unsigned int ifscope, struct ifnet **outif)
1260 {
1261 struct in_addr laddr;
1262 struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam;
1263 struct inpcb *pcb;
1264 int error;
1265 struct socket *so = inp->inp_socket;
1266
1267 /*
1268 * Call inner routine, to assign local interface address.
1269 */
1270 if ((error = in_pcbladdr(inp, nam, &laddr, ifscope, outif)) != 0)
1271 return (error);
1272
1273 socket_unlock(so, 0);
1274 pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
1275 inp->inp_laddr.s_addr ? inp->inp_laddr : laddr,
1276 inp->inp_lport, 0, NULL);
1277 socket_lock(so, 0);
1278
1279 /*
1280 * Check if the socket is still in a valid state. When we unlock this
1281 * embryonic socket, it can get aborted if another thread is closing
1282 * the listener (radar 7947600).
1283 */
1284 if ((so->so_flags & SOF_ABORTED) != 0)
1285 return (ECONNREFUSED);
1286
1287 if (pcb != NULL) {
1288 in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0);
1289 return (EADDRINUSE);
1290 }
1291 if (inp->inp_laddr.s_addr == INADDR_ANY) {
1292 if (inp->inp_lport == 0) {
1293 error = in_pcbbind(inp, NULL, p);
1294 if (error)
1295 return (error);
1296 }
1297 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
1298 /*
1299 * Lock inversion issue, mostly with udp
1300 * multicast packets.
1301 */
1302 socket_unlock(so, 0);
1303 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
1304 socket_lock(so, 0);
1305 }
1306 inp->inp_laddr = laddr;
1307 /* no reference needed */
1308 inp->inp_last_outifp = (outif != NULL) ? *outif : NULL;
1309 inp->inp_flags |= INP_INADDR_ANY;
1310 } else {
1311 /*
1312 * Usage of IP_PKTINFO, without local port already
1313 * speficified will cause kernel to panic,
1314 * see rdar://problem/18508185.
1315 * For now returning error to avoid a kernel panic
1316 * This routines can be refactored and handle this better
1317 * in future.
1318 */
1319 if (inp->inp_lport == 0)
1320 return (EINVAL);
1321 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
1322 /*
1323 * Lock inversion issue, mostly with udp
1324 * multicast packets.
1325 */
1326 socket_unlock(so, 0);
1327 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
1328 socket_lock(so, 0);
1329 }
1330 }
1331 inp->inp_faddr = sin->sin_addr;
1332 inp->inp_fport = sin->sin_port;
1333 if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP)
1334 nstat_pcb_invalidate_cache(inp);
1335 in_pcbrehash(inp);
1336 lck_rw_done(inp->inp_pcbinfo->ipi_lock);
1337 return (0);
1338 }
1339
1340 void
1341 in_pcbdisconnect(struct inpcb *inp)
1342 {
1343 struct socket *so = inp->inp_socket;
1344
1345 if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP)
1346 nstat_pcb_cache(inp);
1347
1348 inp->inp_faddr.s_addr = INADDR_ANY;
1349 inp->inp_fport = 0;
1350
1351 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
1352 /* lock inversion issue, mostly with udp multicast packets */
1353 socket_unlock(so, 0);
1354 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
1355 socket_lock(so, 0);
1356 }
1357
1358 in_pcbrehash(inp);
1359 lck_rw_done(inp->inp_pcbinfo->ipi_lock);
1360 /*
1361 * A multipath subflow socket would have its SS_NOFDREF set by default,
1362 * so check for SOF_MP_SUBFLOW socket flag before detaching the PCB;
1363 * when the socket is closed for real, SOF_MP_SUBFLOW would be cleared.
1364 */
1365 if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF))
1366 in_pcbdetach(inp);
1367 }
1368
1369 void
1370 in_pcbdetach(struct inpcb *inp)
1371 {
1372 struct socket *so = inp->inp_socket;
1373
1374 if (so->so_pcb == NULL) {
1375 /* PCB has been disposed */
1376 panic("%s: inp=%p so=%p proto=%d so_pcb is null!\n", __func__,
1377 inp, so, SOCK_PROTO(so));
1378 /* NOTREACHED */
1379 }
1380
1381 #if IPSEC
1382 if (inp->inp_sp != NULL) {
1383 (void) ipsec4_delete_pcbpolicy(inp);
1384 }
1385 #endif /* IPSEC */
1386
1387 /*
1388 * Let NetworkStatistics know this PCB is going away
1389 * before we detach it.
1390 */
1391 if (nstat_collect &&
1392 (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP))
1393 nstat_pcb_detach(inp);
1394
1395 /* Free memory buffer held for generating keep alives */
1396 if (inp->inp_keepalive_data != NULL) {
1397 FREE(inp->inp_keepalive_data, M_TEMP);
1398 inp->inp_keepalive_data = NULL;
1399 }
1400
1401 /* mark socket state as dead */
1402 if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) {
1403 panic("%s: so=%p proto=%d couldn't set to STOPUSING\n",
1404 __func__, so, SOCK_PROTO(so));
1405 /* NOTREACHED */
1406 }
1407
1408 if (!(so->so_flags & SOF_PCBCLEARING)) {
1409 struct ip_moptions *imo;
1410
1411 inp->inp_vflag = 0;
1412 if (inp->inp_options != NULL) {
1413 (void) m_free(inp->inp_options);
1414 inp->inp_options = NULL;
1415 }
1416 ROUTE_RELEASE(&inp->inp_route);
1417 imo = inp->inp_moptions;
1418 inp->inp_moptions = NULL;
1419 if (imo != NULL)
1420 IMO_REMREF(imo);
1421 sofreelastref(so, 0);
1422 inp->inp_state = INPCB_STATE_DEAD;
1423 /* makes sure we're not called twice from so_close */
1424 so->so_flags |= SOF_PCBCLEARING;
1425
1426 inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST);
1427 }
1428 }
1429
1430
1431 void
1432 in_pcbdispose(struct inpcb *inp)
1433 {
1434 struct socket *so = inp->inp_socket;
1435 struct inpcbinfo *ipi = inp->inp_pcbinfo;
1436
1437 if (so != NULL && so->so_usecount != 0) {
1438 panic("%s: so %p [%d,%d] usecount %d lockhistory %s\n",
1439 __func__, so, SOCK_DOM(so), SOCK_TYPE(so), so->so_usecount,
1440 solockhistory_nr(so));
1441 /* NOTREACHED */
1442 } else if (inp->inp_wantcnt != WNT_STOPUSING) {
1443 if (so != NULL) {
1444 panic_plain("%s: inp %p invalid wantcnt %d, so %p "
1445 "[%d,%d] usecount %d retaincnt %d state 0x%x "
1446 "flags 0x%x lockhistory %s\n", __func__, inp,
1447 inp->inp_wantcnt, so, SOCK_DOM(so), SOCK_TYPE(so),
1448 so->so_usecount, so->so_retaincnt, so->so_state,
1449 so->so_flags, solockhistory_nr(so));
1450 /* NOTREACHED */
1451 } else {
1452 panic("%s: inp %p invalid wantcnt %d no socket\n",
1453 __func__, inp, inp->inp_wantcnt);
1454 /* NOTREACHED */
1455 }
1456 }
1457
1458 lck_rw_assert(ipi->ipi_lock, LCK_RW_ASSERT_EXCLUSIVE);
1459
1460 inp->inp_gencnt = ++ipi->ipi_gencnt;
1461 /* access ipi in in_pcbremlists */
1462 in_pcbremlists(inp);
1463
1464 if (so != NULL) {
1465 if (so->so_proto->pr_flags & PR_PCBLOCK) {
1466 sofreelastref(so, 0);
1467 if (so->so_rcv.sb_cc > 0 || so->so_snd.sb_cc > 0) {
1468 /*
1469 * selthreadclear() already called
1470 * during sofreelastref() above.
1471 */
1472 sbrelease(&so->so_rcv);
1473 sbrelease(&so->so_snd);
1474 }
1475 if (so->so_head != NULL) {
1476 panic("%s: so=%p head still exist\n",
1477 __func__, so);
1478 /* NOTREACHED */
1479 }
1480 lck_mtx_unlock(&inp->inpcb_mtx);
1481 lck_mtx_destroy(&inp->inpcb_mtx, ipi->ipi_lock_grp);
1482 }
1483 /* makes sure we're not called twice from so_close */
1484 so->so_flags |= SOF_PCBCLEARING;
1485 so->so_saved_pcb = (caddr_t)inp;
1486 so->so_pcb = NULL;
1487 inp->inp_socket = NULL;
1488 #if CONFIG_MACF_NET
1489 mac_inpcb_label_destroy(inp);
1490 #endif /* CONFIG_MACF_NET */
1491 /*
1492 * In case there a route cached after a detach (possible
1493 * in the tcp case), make sure that it is freed before
1494 * we deallocate the structure.
1495 */
1496 ROUTE_RELEASE(&inp->inp_route);
1497 if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) {
1498 zfree(ipi->ipi_zone, inp);
1499 }
1500 sodealloc(so);
1501 }
1502 }
1503
1504 /*
1505 * The calling convention of in_getsockaddr() and in_getpeeraddr() was
1506 * modified to match the pru_sockaddr() and pru_peeraddr() entry points
1507 * in struct pr_usrreqs, so that protocols can just reference then directly
1508 * without the need for a wrapper function.
1509 */
1510 int
1511 in_getsockaddr(struct socket *so, struct sockaddr **nam)
1512 {
1513 struct inpcb *inp;
1514 struct sockaddr_in *sin;
1515
1516 /*
1517 * Do the malloc first in case it blocks.
1518 */
1519 MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK);
1520 if (sin == NULL)
1521 return (ENOBUFS);
1522 bzero(sin, sizeof (*sin));
1523 sin->sin_family = AF_INET;
1524 sin->sin_len = sizeof (*sin);
1525
1526 if ((inp = sotoinpcb(so)) == NULL) {
1527 FREE(sin, M_SONAME);
1528 return (EINVAL);
1529 }
1530 sin->sin_port = inp->inp_lport;
1531 sin->sin_addr = inp->inp_laddr;
1532
1533 *nam = (struct sockaddr *)sin;
1534 return (0);
1535 }
1536
1537 int
1538 in_getsockaddr_s(struct socket *so, struct sockaddr_storage *ss)
1539 {
1540 struct sockaddr_in *sin = SIN(ss);
1541 struct inpcb *inp;
1542
1543 VERIFY(ss != NULL);
1544 bzero(ss, sizeof (*ss));
1545
1546 sin->sin_family = AF_INET;
1547 sin->sin_len = sizeof (*sin);
1548
1549 if ((inp = sotoinpcb(so)) == NULL
1550 #if NECP
1551 || (necp_socket_should_use_flow_divert(inp))
1552 #endif /* NECP */
1553 )
1554 return (inp == NULL ? EINVAL : EPROTOTYPE);
1555
1556 sin->sin_port = inp->inp_lport;
1557 sin->sin_addr = inp->inp_laddr;
1558 return (0);
1559 }
1560
1561 int
1562 in_getpeeraddr(struct socket *so, struct sockaddr **nam)
1563 {
1564 struct inpcb *inp;
1565 struct sockaddr_in *sin;
1566
1567 /*
1568 * Do the malloc first in case it blocks.
1569 */
1570 MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK);
1571 if (sin == NULL)
1572 return (ENOBUFS);
1573 bzero((caddr_t)sin, sizeof (*sin));
1574 sin->sin_family = AF_INET;
1575 sin->sin_len = sizeof (*sin);
1576
1577 if ((inp = sotoinpcb(so)) == NULL) {
1578 FREE(sin, M_SONAME);
1579 return (EINVAL);
1580 }
1581 sin->sin_port = inp->inp_fport;
1582 sin->sin_addr = inp->inp_faddr;
1583
1584 *nam = (struct sockaddr *)sin;
1585 return (0);
1586 }
1587
1588 int
1589 in_getpeeraddr_s(struct socket *so, struct sockaddr_storage *ss)
1590 {
1591 struct sockaddr_in *sin = SIN(ss);
1592 struct inpcb *inp;
1593
1594 VERIFY(ss != NULL);
1595 bzero(ss, sizeof (*ss));
1596
1597 sin->sin_family = AF_INET;
1598 sin->sin_len = sizeof (*sin);
1599
1600 if ((inp = sotoinpcb(so)) == NULL
1601 #if NECP
1602 || (necp_socket_should_use_flow_divert(inp))
1603 #endif /* NECP */
1604 ) {
1605 return (inp == NULL ? EINVAL : EPROTOTYPE);
1606 }
1607
1608 sin->sin_port = inp->inp_fport;
1609 sin->sin_addr = inp->inp_faddr;
1610 return (0);
1611 }
1612
1613 void
1614 in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr,
1615 int errno, void (*notify)(struct inpcb *, int))
1616 {
1617 struct inpcb *inp;
1618
1619 lck_rw_lock_shared(pcbinfo->ipi_lock);
1620
1621 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1622 #if INET6
1623 if (!(inp->inp_vflag & INP_IPV4))
1624 continue;
1625 #endif /* INET6 */
1626 if (inp->inp_faddr.s_addr != faddr.s_addr ||
1627 inp->inp_socket == NULL)
1628 continue;
1629 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING)
1630 continue;
1631 socket_lock(inp->inp_socket, 1);
1632 (*notify)(inp, errno);
1633 (void) in_pcb_checkstate(inp, WNT_RELEASE, 1);
1634 socket_unlock(inp->inp_socket, 1);
1635 }
1636 lck_rw_done(pcbinfo->ipi_lock);
1637 }
1638
1639 /*
1640 * Check for alternatives when higher level complains
1641 * about service problems. For now, invalidate cached
1642 * routing information. If the route was created dynamically
1643 * (by a redirect), time to try a default gateway again.
1644 */
1645 void
1646 in_losing(struct inpcb *inp)
1647 {
1648 boolean_t release = FALSE;
1649 struct rtentry *rt;
1650
1651 if ((rt = inp->inp_route.ro_rt) != NULL) {
1652 struct in_ifaddr *ia = NULL;
1653
1654 RT_LOCK(rt);
1655 if (rt->rt_flags & RTF_DYNAMIC) {
1656 /*
1657 * Prevent another thread from modifying rt_key,
1658 * rt_gateway via rt_setgate() after rt_lock is
1659 * dropped by marking the route as defunct.
1660 */
1661 rt->rt_flags |= RTF_CONDEMNED;
1662 RT_UNLOCK(rt);
1663 (void) rtrequest(RTM_DELETE, rt_key(rt),
1664 rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL);
1665 } else {
1666 RT_UNLOCK(rt);
1667 }
1668 /* if the address is gone keep the old route in the pcb */
1669 if (inp->inp_laddr.s_addr != INADDR_ANY &&
1670 (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
1671 /*
1672 * Address is around; ditch the route. A new route
1673 * can be allocated the next time output is attempted.
1674 */
1675 release = TRUE;
1676 }
1677 if (ia != NULL)
1678 IFA_REMREF(&ia->ia_ifa);
1679 }
1680 if (rt == NULL || release)
1681 ROUTE_RELEASE(&inp->inp_route);
1682 }
1683
1684 /*
1685 * After a routing change, flush old routing
1686 * and allocate a (hopefully) better one.
1687 */
1688 void
1689 in_rtchange(struct inpcb *inp, int errno)
1690 {
1691 #pragma unused(errno)
1692 boolean_t release = FALSE;
1693 struct rtentry *rt;
1694
1695 if ((rt = inp->inp_route.ro_rt) != NULL) {
1696 struct in_ifaddr *ia = NULL;
1697
1698 /* if address is gone, keep the old route */
1699 if (inp->inp_laddr.s_addr != INADDR_ANY &&
1700 (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
1701 /*
1702 * Address is around; ditch the route. A new route
1703 * can be allocated the next time output is attempted.
1704 */
1705 release = TRUE;
1706 }
1707 if (ia != NULL)
1708 IFA_REMREF(&ia->ia_ifa);
1709 }
1710 if (rt == NULL || release)
1711 ROUTE_RELEASE(&inp->inp_route);
1712 }
1713
1714 /*
1715 * Lookup a PCB based on the local address and port.
1716 */
1717 struct inpcb *
1718 in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
1719 unsigned int lport_arg, int wild_okay)
1720 {
1721 struct inpcb *inp;
1722 int matchwild = 3, wildcard;
1723 u_short lport = lport_arg;
1724
1725 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0, 0, 0, 0, 0);
1726
1727 if (!wild_okay) {
1728 struct inpcbhead *head;
1729 /*
1730 * Look for an unconnected (wildcard foreign addr) PCB that
1731 * matches the local address and port we're looking for.
1732 */
1733 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
1734 pcbinfo->ipi_hashmask)];
1735 LIST_FOREACH(inp, head, inp_hash) {
1736 #if INET6
1737 if (!(inp->inp_vflag & INP_IPV4))
1738 continue;
1739 #endif /* INET6 */
1740 if (inp->inp_faddr.s_addr == INADDR_ANY &&
1741 inp->inp_laddr.s_addr == laddr.s_addr &&
1742 inp->inp_lport == lport) {
1743 /*
1744 * Found.
1745 */
1746 return (inp);
1747 }
1748 }
1749 /*
1750 * Not found.
1751 */
1752 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0, 0, 0, 0, 0);
1753 return (NULL);
1754 } else {
1755 struct inpcbporthead *porthash;
1756 struct inpcbport *phd;
1757 struct inpcb *match = NULL;
1758 /*
1759 * Best fit PCB lookup.
1760 *
1761 * First see if this local port is in use by looking on the
1762 * port hash list.
1763 */
1764 porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
1765 pcbinfo->ipi_porthashmask)];
1766 LIST_FOREACH(phd, porthash, phd_hash) {
1767 if (phd->phd_port == lport)
1768 break;
1769 }
1770 if (phd != NULL) {
1771 /*
1772 * Port is in use by one or more PCBs. Look for best
1773 * fit.
1774 */
1775 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
1776 wildcard = 0;
1777 #if INET6
1778 if (!(inp->inp_vflag & INP_IPV4))
1779 continue;
1780 #endif /* INET6 */
1781 if (inp->inp_faddr.s_addr != INADDR_ANY)
1782 wildcard++;
1783 if (inp->inp_laddr.s_addr != INADDR_ANY) {
1784 if (laddr.s_addr == INADDR_ANY)
1785 wildcard++;
1786 else if (inp->inp_laddr.s_addr !=
1787 laddr.s_addr)
1788 continue;
1789 } else {
1790 if (laddr.s_addr != INADDR_ANY)
1791 wildcard++;
1792 }
1793 if (wildcard < matchwild) {
1794 match = inp;
1795 matchwild = wildcard;
1796 if (matchwild == 0) {
1797 break;
1798 }
1799 }
1800 }
1801 }
1802 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match,
1803 0, 0, 0, 0);
1804 return (match);
1805 }
1806 }
1807
1808 /*
1809 * Check if PCB exists in hash list.
1810 */
1811 int
1812 in_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in_addr faddr,
1813 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
1814 uid_t *uid, gid_t *gid, struct ifnet *ifp)
1815 {
1816 struct inpcbhead *head;
1817 struct inpcb *inp;
1818 u_short fport = fport_arg, lport = lport_arg;
1819 int found = 0;
1820 struct inpcb *local_wild = NULL;
1821 #if INET6
1822 struct inpcb *local_wild_mapped = NULL;
1823 #endif /* INET6 */
1824
1825 *uid = UID_MAX;
1826 *gid = GID_MAX;
1827
1828 /*
1829 * We may have found the pcb in the last lookup - check this first.
1830 */
1831
1832 lck_rw_lock_shared(pcbinfo->ipi_lock);
1833
1834 /*
1835 * First look for an exact match.
1836 */
1837 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
1838 pcbinfo->ipi_hashmask)];
1839 LIST_FOREACH(inp, head, inp_hash) {
1840 #if INET6
1841 if (!(inp->inp_vflag & INP_IPV4))
1842 continue;
1843 #endif /* INET6 */
1844 if (inp_restricted_recv(inp, ifp))
1845 continue;
1846
1847 if (inp->inp_faddr.s_addr == faddr.s_addr &&
1848 inp->inp_laddr.s_addr == laddr.s_addr &&
1849 inp->inp_fport == fport &&
1850 inp->inp_lport == lport) {
1851 if ((found = (inp->inp_socket != NULL))) {
1852 /*
1853 * Found.
1854 */
1855 *uid = kauth_cred_getuid(
1856 inp->inp_socket->so_cred);
1857 *gid = kauth_cred_getgid(
1858 inp->inp_socket->so_cred);
1859 }
1860 lck_rw_done(pcbinfo->ipi_lock);
1861 return (found);
1862 }
1863 }
1864
1865 if (!wildcard) {
1866 /*
1867 * Not found.
1868 */
1869 lck_rw_done(pcbinfo->ipi_lock);
1870 return (0);
1871 }
1872
1873 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
1874 pcbinfo->ipi_hashmask)];
1875 LIST_FOREACH(inp, head, inp_hash) {
1876 #if INET6
1877 if (!(inp->inp_vflag & INP_IPV4))
1878 continue;
1879 #endif /* INET6 */
1880 if (inp_restricted_recv(inp, ifp))
1881 continue;
1882
1883 if (inp->inp_faddr.s_addr == INADDR_ANY &&
1884 inp->inp_lport == lport) {
1885 if (inp->inp_laddr.s_addr == laddr.s_addr) {
1886 if ((found = (inp->inp_socket != NULL))) {
1887 *uid = kauth_cred_getuid(
1888 inp->inp_socket->so_cred);
1889 *gid = kauth_cred_getgid(
1890 inp->inp_socket->so_cred);
1891 }
1892 lck_rw_done(pcbinfo->ipi_lock);
1893 return (found);
1894 } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
1895 #if INET6
1896 if (inp->inp_socket &&
1897 SOCK_CHECK_DOM(inp->inp_socket, PF_INET6))
1898 local_wild_mapped = inp;
1899 else
1900 #endif /* INET6 */
1901 local_wild = inp;
1902 }
1903 }
1904 }
1905 if (local_wild == NULL) {
1906 #if INET6
1907 if (local_wild_mapped != NULL) {
1908 if ((found = (local_wild_mapped->inp_socket != NULL))) {
1909 *uid = kauth_cred_getuid(
1910 local_wild_mapped->inp_socket->so_cred);
1911 *gid = kauth_cred_getgid(
1912 local_wild_mapped->inp_socket->so_cred);
1913 }
1914 lck_rw_done(pcbinfo->ipi_lock);
1915 return (found);
1916 }
1917 #endif /* INET6 */
1918 lck_rw_done(pcbinfo->ipi_lock);
1919 return (0);
1920 }
1921 if ((found = (local_wild->inp_socket != NULL))) {
1922 *uid = kauth_cred_getuid(
1923 local_wild->inp_socket->so_cred);
1924 *gid = kauth_cred_getgid(
1925 local_wild->inp_socket->so_cred);
1926 }
1927 lck_rw_done(pcbinfo->ipi_lock);
1928 return (found);
1929 }
1930
1931 /*
1932 * Lookup PCB in hash list.
1933 */
1934 struct inpcb *
1935 in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
1936 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
1937 struct ifnet *ifp)
1938 {
1939 struct inpcbhead *head;
1940 struct inpcb *inp;
1941 u_short fport = fport_arg, lport = lport_arg;
1942 struct inpcb *local_wild = NULL;
1943 #if INET6
1944 struct inpcb *local_wild_mapped = NULL;
1945 #endif /* INET6 */
1946
1947 /*
1948 * We may have found the pcb in the last lookup - check this first.
1949 */
1950
1951 lck_rw_lock_shared(pcbinfo->ipi_lock);
1952
1953 /*
1954 * First look for an exact match.
1955 */
1956 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
1957 pcbinfo->ipi_hashmask)];
1958 LIST_FOREACH(inp, head, inp_hash) {
1959 #if INET6
1960 if (!(inp->inp_vflag & INP_IPV4))
1961 continue;
1962 #endif /* INET6 */
1963 if (inp_restricted_recv(inp, ifp))
1964 continue;
1965
1966 if (inp->inp_faddr.s_addr == faddr.s_addr &&
1967 inp->inp_laddr.s_addr == laddr.s_addr &&
1968 inp->inp_fport == fport &&
1969 inp->inp_lport == lport) {
1970 /*
1971 * Found.
1972 */
1973 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
1974 WNT_STOPUSING) {
1975 lck_rw_done(pcbinfo->ipi_lock);
1976 return (inp);
1977 } else {
1978 /* it's there but dead, say it isn't found */
1979 lck_rw_done(pcbinfo->ipi_lock);
1980 return (NULL);
1981 }
1982 }
1983 }
1984
1985 if (!wildcard) {
1986 /*
1987 * Not found.
1988 */
1989 lck_rw_done(pcbinfo->ipi_lock);
1990 return (NULL);
1991 }
1992
1993 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
1994 pcbinfo->ipi_hashmask)];
1995 LIST_FOREACH(inp, head, inp_hash) {
1996 #if INET6
1997 if (!(inp->inp_vflag & INP_IPV4))
1998 continue;
1999 #endif /* INET6 */
2000 if (inp_restricted_recv(inp, ifp))
2001 continue;
2002
2003 if (inp->inp_faddr.s_addr == INADDR_ANY &&
2004 inp->inp_lport == lport) {
2005 if (inp->inp_laddr.s_addr == laddr.s_addr) {
2006 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
2007 WNT_STOPUSING) {
2008 lck_rw_done(pcbinfo->ipi_lock);
2009 return (inp);
2010 } else {
2011 /* it's dead; say it isn't found */
2012 lck_rw_done(pcbinfo->ipi_lock);
2013 return (NULL);
2014 }
2015 } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
2016 #if INET6
2017 if (SOCK_CHECK_DOM(inp->inp_socket, PF_INET6))
2018 local_wild_mapped = inp;
2019 else
2020 #endif /* INET6 */
2021 local_wild = inp;
2022 }
2023 }
2024 }
2025 if (local_wild == NULL) {
2026 #if INET6
2027 if (local_wild_mapped != NULL) {
2028 if (in_pcb_checkstate(local_wild_mapped,
2029 WNT_ACQUIRE, 0) != WNT_STOPUSING) {
2030 lck_rw_done(pcbinfo->ipi_lock);
2031 return (local_wild_mapped);
2032 } else {
2033 /* it's dead; say it isn't found */
2034 lck_rw_done(pcbinfo->ipi_lock);
2035 return (NULL);
2036 }
2037 }
2038 #endif /* INET6 */
2039 lck_rw_done(pcbinfo->ipi_lock);
2040 return (NULL);
2041 }
2042 if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
2043 lck_rw_done(pcbinfo->ipi_lock);
2044 return (local_wild);
2045 }
2046 /*
2047 * It's either not found or is already dead.
2048 */
2049 lck_rw_done(pcbinfo->ipi_lock);
2050 return (NULL);
2051 }
2052
2053 /*
2054 * @brief Insert PCB onto various hash lists.
2055 *
2056 * @param inp Pointer to internet protocol control block
2057 * @param locked Implies if ipi_lock (protecting pcb list)
2058 * is already locked or not.
2059 *
2060 * @return int error on failure and 0 on success
2061 */
2062 int
2063 in_pcbinshash(struct inpcb *inp, int locked)
2064 {
2065 struct inpcbhead *pcbhash;
2066 struct inpcbporthead *pcbporthash;
2067 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
2068 struct inpcbport *phd;
2069 u_int32_t hashkey_faddr;
2070
2071 if (!locked) {
2072 if (!lck_rw_try_lock_exclusive(pcbinfo->ipi_lock)) {
2073 /*
2074 * Lock inversion issue, mostly with udp
2075 * multicast packets
2076 */
2077 socket_unlock(inp->inp_socket, 0);
2078 lck_rw_lock_exclusive(pcbinfo->ipi_lock);
2079 socket_lock(inp->inp_socket, 0);
2080 }
2081 }
2082
2083 /*
2084 * This routine or its caller may have given up
2085 * socket's protocol lock briefly.
2086 * During that time the socket may have been dropped.
2087 * Safe-guarding against that.
2088 */
2089 if (inp->inp_state == INPCB_STATE_DEAD) {
2090 if (!locked) {
2091 lck_rw_done(pcbinfo->ipi_lock);
2092 }
2093 return (ECONNABORTED);
2094 }
2095
2096
2097 #if INET6
2098 if (inp->inp_vflag & INP_IPV6)
2099 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
2100 else
2101 #endif /* INET6 */
2102 hashkey_faddr = inp->inp_faddr.s_addr;
2103
2104 inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
2105 inp->inp_fport, pcbinfo->ipi_hashmask);
2106
2107 pcbhash = &pcbinfo->ipi_hashbase[inp->inp_hash_element];
2108
2109 pcbporthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(inp->inp_lport,
2110 pcbinfo->ipi_porthashmask)];
2111
2112 /*
2113 * Go through port list and look for a head for this lport.
2114 */
2115 LIST_FOREACH(phd, pcbporthash, phd_hash) {
2116 if (phd->phd_port == inp->inp_lport)
2117 break;
2118 }
2119
2120 /*
2121 * If none exists, malloc one and tack it on.
2122 */
2123 if (phd == NULL) {
2124 MALLOC(phd, struct inpcbport *, sizeof (struct inpcbport),
2125 M_PCB, M_WAITOK);
2126 if (phd == NULL) {
2127 if (!locked)
2128 lck_rw_done(pcbinfo->ipi_lock);
2129 return (ENOBUFS); /* XXX */
2130 }
2131 phd->phd_port = inp->inp_lport;
2132 LIST_INIT(&phd->phd_pcblist);
2133 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
2134 }
2135
2136 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2137 inp->inp_phd = phd;
2138 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
2139 LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
2140 inp->inp_flags2 |= INP2_INHASHLIST;
2141
2142 if (!locked)
2143 lck_rw_done(pcbinfo->ipi_lock);
2144
2145 #if NECP
2146 // This call catches the original setting of the local address
2147 inp_update_necp_policy(inp, NULL, NULL, 0);
2148 #endif /* NECP */
2149
2150 return (0);
2151 }
2152
2153 /*
2154 * Move PCB to the proper hash bucket when { faddr, fport } have been
2155 * changed. NOTE: This does not handle the case of the lport changing (the
2156 * hashed port list would have to be updated as well), so the lport must
2157 * not change after in_pcbinshash() has been called.
2158 */
2159 void
2160 in_pcbrehash(struct inpcb *inp)
2161 {
2162 struct inpcbhead *head;
2163 u_int32_t hashkey_faddr;
2164
2165 #if INET6
2166 if (inp->inp_vflag & INP_IPV6)
2167 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
2168 else
2169 #endif /* INET6 */
2170 hashkey_faddr = inp->inp_faddr.s_addr;
2171
2172 inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
2173 inp->inp_fport, inp->inp_pcbinfo->ipi_hashmask);
2174 head = &inp->inp_pcbinfo->ipi_hashbase[inp->inp_hash_element];
2175
2176 if (inp->inp_flags2 & INP2_INHASHLIST) {
2177 LIST_REMOVE(inp, inp_hash);
2178 inp->inp_flags2 &= ~INP2_INHASHLIST;
2179 }
2180
2181 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2182 LIST_INSERT_HEAD(head, inp, inp_hash);
2183 inp->inp_flags2 |= INP2_INHASHLIST;
2184
2185 #if NECP
2186 // This call catches updates to the remote addresses
2187 inp_update_necp_policy(inp, NULL, NULL, 0);
2188 #endif /* NECP */
2189 }
2190
2191 /*
2192 * Remove PCB from various lists.
2193 * Must be called pcbinfo lock is held in exclusive mode.
2194 */
2195 void
2196 in_pcbremlists(struct inpcb *inp)
2197 {
2198 inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
2199
2200 /*
2201 * Check if it's in hashlist -- an inp is placed in hashlist when
2202 * it's local port gets assigned. So it should also be present
2203 * in the port list.
2204 */
2205 if (inp->inp_flags2 & INP2_INHASHLIST) {
2206 struct inpcbport *phd = inp->inp_phd;
2207
2208 VERIFY(phd != NULL && inp->inp_lport > 0);
2209
2210 LIST_REMOVE(inp, inp_hash);
2211 inp->inp_hash.le_next = NULL;
2212 inp->inp_hash.le_prev = NULL;
2213
2214 LIST_REMOVE(inp, inp_portlist);
2215 inp->inp_portlist.le_next = NULL;
2216 inp->inp_portlist.le_prev = NULL;
2217 if (LIST_EMPTY(&phd->phd_pcblist)) {
2218 LIST_REMOVE(phd, phd_hash);
2219 FREE(phd, M_PCB);
2220 }
2221 inp->inp_phd = NULL;
2222 inp->inp_flags2 &= ~INP2_INHASHLIST;
2223 }
2224 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2225
2226 if (inp->inp_flags2 & INP2_TIMEWAIT) {
2227 /* Remove from time-wait queue */
2228 tcp_remove_from_time_wait(inp);
2229 inp->inp_flags2 &= ~INP2_TIMEWAIT;
2230 VERIFY(inp->inp_pcbinfo->ipi_twcount != 0);
2231 inp->inp_pcbinfo->ipi_twcount--;
2232 } else {
2233 /* Remove from global inp list if it is not time-wait */
2234 LIST_REMOVE(inp, inp_list);
2235 }
2236
2237 if (inp->inp_flags2 & INP2_IN_FCTREE) {
2238 inp_fc_getinp(inp->inp_flowhash, (INPFC_SOLOCKED|INPFC_REMOVE));
2239 VERIFY(!(inp->inp_flags2 & INP2_IN_FCTREE));
2240 }
2241
2242 inp->inp_pcbinfo->ipi_count--;
2243 }
2244
2245 /*
2246 * Mechanism used to defer the memory release of PCBs
2247 * The pcb list will contain the pcb until the reaper can clean it up if
2248 * the following conditions are met:
2249 * 1) state "DEAD",
2250 * 2) wantcnt is STOPUSING
2251 * 3) usecount is 0
2252 * This function will be called to either mark the pcb as
2253 */
2254 int
2255 in_pcb_checkstate(struct inpcb *pcb, int mode, int locked)
2256 {
2257 volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt;
2258 UInt32 origwant;
2259 UInt32 newwant;
2260
2261 switch (mode) {
2262 case WNT_STOPUSING:
2263 /*
2264 * Try to mark the pcb as ready for recycling. CAS with
2265 * STOPUSING, if success we're good, if it's in use, will
2266 * be marked later
2267 */
2268 if (locked == 0)
2269 socket_lock(pcb->inp_socket, 1);
2270 pcb->inp_state = INPCB_STATE_DEAD;
2271
2272 stopusing:
2273 if (pcb->inp_socket->so_usecount < 0) {
2274 panic("%s: pcb=%p so=%p usecount is negative\n",
2275 __func__, pcb, pcb->inp_socket);
2276 /* NOTREACHED */
2277 }
2278 if (locked == 0)
2279 socket_unlock(pcb->inp_socket, 1);
2280
2281 inpcb_gc_sched(pcb->inp_pcbinfo, INPCB_TIMER_FAST);
2282
2283 origwant = *wantcnt;
2284 if ((UInt16) origwant == 0xffff) /* should stop using */
2285 return (WNT_STOPUSING);
2286 newwant = 0xffff;
2287 if ((UInt16) origwant == 0) {
2288 /* try to mark it as unsuable now */
2289 OSCompareAndSwap(origwant, newwant, wantcnt);
2290 }
2291 return (WNT_STOPUSING);
2292 break;
2293
2294 case WNT_ACQUIRE:
2295 /*
2296 * Try to increase reference to pcb. If WNT_STOPUSING
2297 * should bail out. If socket state DEAD, try to set count
2298 * to STOPUSING, return failed otherwise increase cnt.
2299 */
2300 do {
2301 origwant = *wantcnt;
2302 if ((UInt16) origwant == 0xffff) {
2303 /* should stop using */
2304 return (WNT_STOPUSING);
2305 }
2306 newwant = origwant + 1;
2307 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
2308 return (WNT_ACQUIRE);
2309 break;
2310
2311 case WNT_RELEASE:
2312 /*
2313 * Release reference. If result is null and pcb state
2314 * is DEAD, set wanted bit to STOPUSING
2315 */
2316 if (locked == 0)
2317 socket_lock(pcb->inp_socket, 1);
2318
2319 do {
2320 origwant = *wantcnt;
2321 if ((UInt16) origwant == 0x0) {
2322 panic("%s: pcb=%p release with zero count",
2323 __func__, pcb);
2324 /* NOTREACHED */
2325 }
2326 if ((UInt16) origwant == 0xffff) {
2327 /* should stop using */
2328 if (locked == 0)
2329 socket_unlock(pcb->inp_socket, 1);
2330 return (WNT_STOPUSING);
2331 }
2332 newwant = origwant - 1;
2333 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
2334
2335 if (pcb->inp_state == INPCB_STATE_DEAD)
2336 goto stopusing;
2337 if (pcb->inp_socket->so_usecount < 0) {
2338 panic("%s: RELEASE pcb=%p so=%p usecount is negative\n",
2339 __func__, pcb, pcb->inp_socket);
2340 /* NOTREACHED */
2341 }
2342
2343 if (locked == 0)
2344 socket_unlock(pcb->inp_socket, 1);
2345 return (WNT_RELEASE);
2346 break;
2347
2348 default:
2349 panic("%s: so=%p not a valid state =%x\n", __func__,
2350 pcb->inp_socket, mode);
2351 /* NOTREACHED */
2352 }
2353
2354 /* NOTREACHED */
2355 return (mode);
2356 }
2357
2358 /*
2359 * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat.
2360 * The inpcb_compat data structure is passed to user space and must
2361 * not change. We intentionally avoid copying pointers.
2362 */
2363 void
2364 inpcb_to_compat(struct inpcb *inp, struct inpcb_compat *inp_compat)
2365 {
2366 bzero(inp_compat, sizeof (*inp_compat));
2367 inp_compat->inp_fport = inp->inp_fport;
2368 inp_compat->inp_lport = inp->inp_lport;
2369 inp_compat->nat_owner = 0;
2370 inp_compat->nat_cookie = 0;
2371 inp_compat->inp_gencnt = inp->inp_gencnt;
2372 inp_compat->inp_flags = inp->inp_flags;
2373 inp_compat->inp_flow = inp->inp_flow;
2374 inp_compat->inp_vflag = inp->inp_vflag;
2375 inp_compat->inp_ip_ttl = inp->inp_ip_ttl;
2376 inp_compat->inp_ip_p = inp->inp_ip_p;
2377 inp_compat->inp_dependfaddr.inp6_foreign =
2378 inp->inp_dependfaddr.inp6_foreign;
2379 inp_compat->inp_dependladdr.inp6_local =
2380 inp->inp_dependladdr.inp6_local;
2381 inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
2382 inp_compat->inp_depend6.inp6_hlim = 0;
2383 inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
2384 inp_compat->inp_depend6.inp6_ifindex = 0;
2385 inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
2386 }
2387
2388 void
2389 inpcb_to_xinpcb64(struct inpcb *inp, struct xinpcb64 *xinp)
2390 {
2391 xinp->inp_fport = inp->inp_fport;
2392 xinp->inp_lport = inp->inp_lport;
2393 xinp->inp_gencnt = inp->inp_gencnt;
2394 xinp->inp_flags = inp->inp_flags;
2395 xinp->inp_flow = inp->inp_flow;
2396 xinp->inp_vflag = inp->inp_vflag;
2397 xinp->inp_ip_ttl = inp->inp_ip_ttl;
2398 xinp->inp_ip_p = inp->inp_ip_p;
2399 xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign;
2400 xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local;
2401 xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
2402 xinp->inp_depend6.inp6_hlim = 0;
2403 xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
2404 xinp->inp_depend6.inp6_ifindex = 0;
2405 xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
2406 }
2407
2408 /*
2409 * The following routines implement this scheme:
2410 *
2411 * Callers of ip_output() that intend to cache the route in the inpcb pass
2412 * a local copy of the struct route to ip_output(). Using a local copy of
2413 * the cached route significantly simplifies things as IP no longer has to
2414 * worry about having exclusive access to the passed in struct route, since
2415 * it's defined in the caller's stack; in essence, this allows for a lock-
2416 * less operation when updating the struct route at the IP level and below,
2417 * whenever necessary. The scheme works as follows:
2418 *
2419 * Prior to dropping the socket's lock and calling ip_output(), the caller
2420 * copies the struct route from the inpcb into its stack, and adds a reference
2421 * to the cached route entry, if there was any. The socket's lock is then
2422 * dropped and ip_output() is called with a pointer to the copy of struct
2423 * route defined on the stack (not to the one in the inpcb.)
2424 *
2425 * Upon returning from ip_output(), the caller then acquires the socket's
2426 * lock and synchronizes the cache; if there is no route cached in the inpcb,
2427 * it copies the local copy of struct route (which may or may not contain any
2428 * route) back into the cache; otherwise, if the inpcb has a route cached in
2429 * it, the one in the local copy will be freed, if there's any. Trashing the
2430 * cached route in the inpcb can be avoided because ip_output() is single-
2431 * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized
2432 * by the socket/transport layer.)
2433 */
2434 void
2435 inp_route_copyout(struct inpcb *inp, struct route *dst)
2436 {
2437 struct route *src = &inp->inp_route;
2438
2439 lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
2440
2441 /*
2442 * If the route in the PCB is stale or not for IPv4, blow it away;
2443 * this is possible in the case of IPv4-mapped address case.
2444 */
2445 if (ROUTE_UNUSABLE(src) || rt_key(src->ro_rt)->sa_family != AF_INET)
2446 ROUTE_RELEASE(src);
2447
2448 route_copyout(dst, src, sizeof (*dst));
2449 }
2450
2451 void
2452 inp_route_copyin(struct inpcb *inp, struct route *src)
2453 {
2454 struct route *dst = &inp->inp_route;
2455
2456 lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
2457
2458 /* Minor sanity check */
2459 if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET)
2460 panic("%s: wrong or corrupted route: %p", __func__, src);
2461
2462 route_copyin(src, dst, sizeof (*src));
2463 }
2464
2465 /*
2466 * Handler for setting IP_FORCE_OUT_IFP/IP_BOUND_IF/IPV6_BOUND_IF socket option.
2467 */
2468 int
2469 inp_bindif(struct inpcb *inp, unsigned int ifscope, struct ifnet **pifp)
2470 {
2471 struct ifnet *ifp = NULL;
2472
2473 ifnet_head_lock_shared();
2474 if ((ifscope > (unsigned)if_index) || (ifscope != IFSCOPE_NONE &&
2475 (ifp = ifindex2ifnet[ifscope]) == NULL)) {
2476 ifnet_head_done();
2477 return (ENXIO);
2478 }
2479 ifnet_head_done();
2480
2481 VERIFY(ifp != NULL || ifscope == IFSCOPE_NONE);
2482
2483 /*
2484 * A zero interface scope value indicates an "unbind".
2485 * Otherwise, take in whatever value the app desires;
2486 * the app may already know the scope (or force itself
2487 * to such a scope) ahead of time before the interface
2488 * gets attached. It doesn't matter either way; any
2489 * route lookup from this point on will require an
2490 * exact match for the embedded interface scope.
2491 */
2492 inp->inp_boundifp = ifp;
2493 if (inp->inp_boundifp == NULL)
2494 inp->inp_flags &= ~INP_BOUND_IF;
2495 else
2496 inp->inp_flags |= INP_BOUND_IF;
2497
2498 /* Blow away any cached route in the PCB */
2499 ROUTE_RELEASE(&inp->inp_route);
2500
2501 if (pifp != NULL)
2502 *pifp = ifp;
2503
2504 return (0);
2505 }
2506
2507 /*
2508 * Handler for setting IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
2509 * as well as for setting PROC_UUID_NO_CELLULAR policy.
2510 */
2511 void
2512 inp_set_nocellular(struct inpcb *inp)
2513 {
2514 inp->inp_flags |= INP_NO_IFT_CELLULAR;
2515
2516 /* Blow away any cached route in the PCB */
2517 ROUTE_RELEASE(&inp->inp_route);
2518 }
2519
2520 /*
2521 * Handler for clearing IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
2522 * as well as for clearing PROC_UUID_NO_CELLULAR policy.
2523 */
2524 void
2525 inp_clear_nocellular(struct inpcb *inp)
2526 {
2527 struct socket *so = inp->inp_socket;
2528
2529 /*
2530 * SO_RESTRICT_DENY_CELLULAR socket restriction issued on the socket
2531 * has a higher precendence than INP_NO_IFT_CELLULAR. Clear the flag
2532 * if and only if the socket is unrestricted.
2533 */
2534 if (so != NULL && !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) {
2535 inp->inp_flags &= ~INP_NO_IFT_CELLULAR;
2536
2537 /* Blow away any cached route in the PCB */
2538 ROUTE_RELEASE(&inp->inp_route);
2539 }
2540 }
2541
2542 void
2543 inp_set_noexpensive(struct inpcb *inp)
2544 {
2545 inp->inp_flags2 |= INP2_NO_IFF_EXPENSIVE;
2546
2547 /* Blow away any cached route in the PCB */
2548 ROUTE_RELEASE(&inp->inp_route);
2549 }
2550
2551 void
2552 inp_set_awdl_unrestricted(struct inpcb *inp)
2553 {
2554 inp->inp_flags2 |= INP2_AWDL_UNRESTRICTED;
2555
2556 /* Blow away any cached route in the PCB */
2557 ROUTE_RELEASE(&inp->inp_route);
2558 }
2559
2560 boolean_t
2561 inp_get_awdl_unrestricted(struct inpcb *inp)
2562 {
2563 return (inp->inp_flags2 & INP2_AWDL_UNRESTRICTED) ? TRUE : FALSE;
2564 }
2565
2566 void
2567 inp_clear_awdl_unrestricted(struct inpcb *inp)
2568 {
2569 inp->inp_flags2 &= ~INP2_AWDL_UNRESTRICTED;
2570
2571 /* Blow away any cached route in the PCB */
2572 ROUTE_RELEASE(&inp->inp_route);
2573 }
2574
2575 #if NECP
2576 /*
2577 * Called when PROC_UUID_NECP_APP_POLICY is set.
2578 */
2579 void
2580 inp_set_want_app_policy(struct inpcb *inp)
2581 {
2582 inp->inp_flags2 |= INP2_WANT_APP_POLICY;
2583 }
2584
2585 /*
2586 * Called when PROC_UUID_NECP_APP_POLICY is cleared.
2587 */
2588 void
2589 inp_clear_want_app_policy(struct inpcb *inp)
2590 {
2591 inp->inp_flags2 &= ~INP2_WANT_APP_POLICY;
2592 }
2593 #endif /* NECP */
2594
2595 /*
2596 * Calculate flow hash for an inp, used by an interface to identify a
2597 * flow. When an interface provides flow control advisory, this flow
2598 * hash is used as an identifier.
2599 */
2600 u_int32_t
2601 inp_calc_flowhash(struct inpcb *inp)
2602 {
2603 struct inp_flowhash_key fh __attribute__((aligned(8)));
2604 u_int32_t flowhash = 0;
2605 struct inpcb *tmp_inp = NULL;
2606
2607 if (inp_hash_seed == 0)
2608 inp_hash_seed = RandomULong();
2609
2610 bzero(&fh, sizeof (fh));
2611
2612 bcopy(&inp->inp_dependladdr, &fh.infh_laddr, sizeof (fh.infh_laddr));
2613 bcopy(&inp->inp_dependfaddr, &fh.infh_faddr, sizeof (fh.infh_faddr));
2614
2615 fh.infh_lport = inp->inp_lport;
2616 fh.infh_fport = inp->inp_fport;
2617 fh.infh_af = (inp->inp_vflag & INP_IPV6) ? AF_INET6 : AF_INET;
2618 fh.infh_proto = inp->inp_ip_p;
2619 fh.infh_rand1 = RandomULong();
2620 fh.infh_rand2 = RandomULong();
2621
2622 try_again:
2623 flowhash = net_flowhash(&fh, sizeof (fh), inp_hash_seed);
2624 if (flowhash == 0) {
2625 /* try to get a non-zero flowhash */
2626 inp_hash_seed = RandomULong();
2627 goto try_again;
2628 }
2629
2630 inp->inp_flowhash = flowhash;
2631
2632 /* Insert the inp into inp_fc_tree */
2633 lck_mtx_lock_spin(&inp_fc_lck);
2634 tmp_inp = RB_FIND(inp_fc_tree, &inp_fc_tree, inp);
2635 if (tmp_inp != NULL) {
2636 /*
2637 * There is a different inp with the same flowhash.
2638 * There can be a collision on flow hash but the
2639 * probability is low. Let's recompute the
2640 * flowhash.
2641 */
2642 lck_mtx_unlock(&inp_fc_lck);
2643 /* recompute hash seed */
2644 inp_hash_seed = RandomULong();
2645 goto try_again;
2646 }
2647
2648 RB_INSERT(inp_fc_tree, &inp_fc_tree, inp);
2649 inp->inp_flags2 |= INP2_IN_FCTREE;
2650 lck_mtx_unlock(&inp_fc_lck);
2651
2652 return (flowhash);
2653 }
2654
2655 void
2656 inp_flowadv(uint32_t flowhash)
2657 {
2658 struct inpcb *inp;
2659
2660 inp = inp_fc_getinp(flowhash, 0);
2661
2662 if (inp == NULL)
2663 return;
2664 inp_fc_feedback(inp);
2665 }
2666
2667 /*
2668 * Function to compare inp_fc_entries in inp flow control tree
2669 */
2670 static inline int
2671 infc_cmp(const struct inpcb *inp1, const struct inpcb *inp2)
2672 {
2673 return (memcmp(&(inp1->inp_flowhash), &(inp2->inp_flowhash),
2674 sizeof(inp1->inp_flowhash)));
2675 }
2676
2677 static struct inpcb *
2678 inp_fc_getinp(u_int32_t flowhash, u_int32_t flags)
2679 {
2680 struct inpcb *inp = NULL;
2681 int locked = (flags & INPFC_SOLOCKED) ? 1 : 0;
2682
2683 lck_mtx_lock_spin(&inp_fc_lck);
2684 key_inp.inp_flowhash = flowhash;
2685 inp = RB_FIND(inp_fc_tree, &inp_fc_tree, &key_inp);
2686 if (inp == NULL) {
2687 /* inp is not present, return */
2688 lck_mtx_unlock(&inp_fc_lck);
2689 return (NULL);
2690 }
2691
2692 if (flags & INPFC_REMOVE) {
2693 RB_REMOVE(inp_fc_tree, &inp_fc_tree, inp);
2694 lck_mtx_unlock(&inp_fc_lck);
2695
2696 bzero(&(inp->infc_link), sizeof (inp->infc_link));
2697 inp->inp_flags2 &= ~INP2_IN_FCTREE;
2698 return (NULL);
2699 }
2700
2701 if (in_pcb_checkstate(inp, WNT_ACQUIRE, locked) == WNT_STOPUSING)
2702 inp = NULL;
2703 lck_mtx_unlock(&inp_fc_lck);
2704
2705 return (inp);
2706 }
2707
2708 static void
2709 inp_fc_feedback(struct inpcb *inp)
2710 {
2711 struct socket *so = inp->inp_socket;
2712
2713 /* we already hold a want_cnt on this inp, socket can't be null */
2714 VERIFY(so != NULL);
2715 socket_lock(so, 1);
2716
2717 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
2718 socket_unlock(so, 1);
2719 return;
2720 }
2721
2722 if (inp->inp_sndinprog_cnt > 0)
2723 inp->inp_flags |= INP_FC_FEEDBACK;
2724
2725 /*
2726 * Return if the connection is not in flow-controlled state.
2727 * This can happen if the connection experienced
2728 * loss while it was in flow controlled state
2729 */
2730 if (!INP_WAIT_FOR_IF_FEEDBACK(inp)) {
2731 socket_unlock(so, 1);
2732 return;
2733 }
2734 inp_reset_fc_state(inp);
2735
2736 if (SOCK_TYPE(so) == SOCK_STREAM)
2737 inp_fc_unthrottle_tcp(inp);
2738
2739 socket_unlock(so, 1);
2740 }
2741
2742 void
2743 inp_reset_fc_state(struct inpcb *inp)
2744 {
2745 struct socket *so = inp->inp_socket;
2746 int suspended = (INP_IS_FLOW_SUSPENDED(inp)) ? 1 : 0;
2747 int needwakeup = (INP_WAIT_FOR_IF_FEEDBACK(inp)) ? 1 : 0;
2748
2749 inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
2750
2751 if (suspended) {
2752 so->so_flags &= ~(SOF_SUSPENDED);
2753 soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_RESUME));
2754 }
2755
2756 /* Give a write wakeup to unblock the socket */
2757 if (needwakeup)
2758 sowwakeup(so);
2759 }
2760
2761 int
2762 inp_set_fc_state(struct inpcb *inp, int advcode)
2763 {
2764 struct inpcb *tmp_inp = NULL;
2765 /*
2766 * If there was a feedback from the interface when
2767 * send operation was in progress, we should ignore
2768 * this flow advisory to avoid a race between setting
2769 * flow controlled state and receiving feedback from
2770 * the interface
2771 */
2772 if (inp->inp_flags & INP_FC_FEEDBACK)
2773 return (0);
2774
2775 inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
2776 if ((tmp_inp = inp_fc_getinp(inp->inp_flowhash,
2777 INPFC_SOLOCKED)) != NULL) {
2778 if (in_pcb_checkstate(tmp_inp, WNT_RELEASE, 1) == WNT_STOPUSING)
2779 return (0);
2780 VERIFY(tmp_inp == inp);
2781 switch (advcode) {
2782 case FADV_FLOW_CONTROLLED:
2783 inp->inp_flags |= INP_FLOW_CONTROLLED;
2784 break;
2785 case FADV_SUSPENDED:
2786 inp->inp_flags |= INP_FLOW_SUSPENDED;
2787 soevent(inp->inp_socket,
2788 (SO_FILT_HINT_LOCKED | SO_FILT_HINT_SUSPEND));
2789
2790 /* Record the fact that suspend event was sent */
2791 inp->inp_socket->so_flags |= SOF_SUSPENDED;
2792 break;
2793 }
2794 return (1);
2795 }
2796 return (0);
2797 }
2798
2799 /*
2800 * Handler for SO_FLUSH socket option.
2801 */
2802 int
2803 inp_flush(struct inpcb *inp, int optval)
2804 {
2805 u_int32_t flowhash = inp->inp_flowhash;
2806 struct ifnet *rtifp, *oifp;
2807
2808 /* Either all classes or one of the valid ones */
2809 if (optval != SO_TC_ALL && !SO_VALID_TC(optval))
2810 return (EINVAL);
2811
2812 /* We need a flow hash for identification */
2813 if (flowhash == 0)
2814 return (0);
2815
2816 /* Grab the interfaces from the route and pcb */
2817 rtifp = ((inp->inp_route.ro_rt != NULL) ?
2818 inp->inp_route.ro_rt->rt_ifp : NULL);
2819 oifp = inp->inp_last_outifp;
2820
2821 if (rtifp != NULL)
2822 if_qflush_sc(rtifp, so_tc2msc(optval), flowhash, NULL, NULL, 0);
2823 if (oifp != NULL && oifp != rtifp)
2824 if_qflush_sc(oifp, so_tc2msc(optval), flowhash, NULL, NULL, 0);
2825
2826 return (0);
2827 }
2828
2829 /*
2830 * Clear the INP_INADDR_ANY flag (special case for PPP only)
2831 */
2832 void
2833 inp_clear_INP_INADDR_ANY(struct socket *so)
2834 {
2835 struct inpcb *inp = NULL;
2836
2837 socket_lock(so, 1);
2838 inp = sotoinpcb(so);
2839 if (inp) {
2840 inp->inp_flags &= ~INP_INADDR_ANY;
2841 }
2842 socket_unlock(so, 1);
2843 }
2844
2845 void
2846 inp_get_soprocinfo(struct inpcb *inp, struct so_procinfo *soprocinfo)
2847 {
2848 struct socket *so = inp->inp_socket;
2849
2850 soprocinfo->spi_pid = so->last_pid;
2851 if (so->last_pid != 0)
2852 uuid_copy(soprocinfo->spi_uuid, so->last_uuid);
2853 /*
2854 * When not delegated, the effective pid is the same as the real pid
2855 */
2856 if (so->so_flags & SOF_DELEGATED) {
2857 soprocinfo->spi_delegated = 1;
2858 soprocinfo->spi_epid = so->e_pid;
2859 uuid_copy(soprocinfo->spi_euuid, so->e_uuid);
2860 } else {
2861 soprocinfo->spi_delegated = 0;
2862 soprocinfo->spi_epid = so->last_pid;
2863 }
2864 }
2865
2866 int
2867 inp_findinpcb_procinfo(struct inpcbinfo *pcbinfo, uint32_t flowhash,
2868 struct so_procinfo *soprocinfo)
2869 {
2870 struct inpcb *inp = NULL;
2871 int found = 0;
2872
2873 bzero(soprocinfo, sizeof (struct so_procinfo));
2874
2875 if (!flowhash)
2876 return (-1);
2877
2878 lck_rw_lock_shared(pcbinfo->ipi_lock);
2879 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
2880 if (inp->inp_state != INPCB_STATE_DEAD &&
2881 inp->inp_socket != NULL &&
2882 inp->inp_flowhash == flowhash) {
2883 found = 1;
2884 inp_get_soprocinfo(inp, soprocinfo);
2885 break;
2886 }
2887 }
2888 lck_rw_done(pcbinfo->ipi_lock);
2889
2890 return (found);
2891 }
2892
2893 #if CONFIG_PROC_UUID_POLICY
2894 static void
2895 inp_update_cellular_policy(struct inpcb *inp, boolean_t set)
2896 {
2897 struct socket *so = inp->inp_socket;
2898 int before, after;
2899
2900 VERIFY(so != NULL);
2901 VERIFY(inp->inp_state != INPCB_STATE_DEAD);
2902
2903 before = INP_NO_CELLULAR(inp);
2904 if (set) {
2905 inp_set_nocellular(inp);
2906 } else {
2907 inp_clear_nocellular(inp);
2908 }
2909 after = INP_NO_CELLULAR(inp);
2910 if (net_io_policy_log && (before != after)) {
2911 static const char *ok = "OK";
2912 static const char *nok = "NOACCESS";
2913 uuid_string_t euuid_buf;
2914 pid_t epid;
2915
2916 if (so->so_flags & SOF_DELEGATED) {
2917 uuid_unparse(so->e_uuid, euuid_buf);
2918 epid = so->e_pid;
2919 } else {
2920 uuid_unparse(so->last_uuid, euuid_buf);
2921 epid = so->last_pid;
2922 }
2923
2924 /* allow this socket to generate another notification event */
2925 so->so_ifdenied_notifies = 0;
2926
2927 log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d "
2928 "euuid %s%s %s->%s\n", __func__,
2929 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
2930 SOCK_TYPE(so), epid, euuid_buf,
2931 (so->so_flags & SOF_DELEGATED) ?
2932 " [delegated]" : "",
2933 ((before < after) ? ok : nok),
2934 ((before < after) ? nok : ok));
2935 }
2936 }
2937
2938 #if NECP
2939 static void
2940 inp_update_necp_want_app_policy(struct inpcb *inp, boolean_t set)
2941 {
2942 struct socket *so = inp->inp_socket;
2943 int before, after;
2944
2945 VERIFY(so != NULL);
2946 VERIFY(inp->inp_state != INPCB_STATE_DEAD);
2947
2948 before = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
2949 if (set) {
2950 inp_set_want_app_policy(inp);
2951 } else {
2952 inp_clear_want_app_policy(inp);
2953 }
2954 after = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
2955 if (net_io_policy_log && (before != after)) {
2956 static const char *wanted = "WANTED";
2957 static const char *unwanted = "UNWANTED";
2958 uuid_string_t euuid_buf;
2959 pid_t epid;
2960
2961 if (so->so_flags & SOF_DELEGATED) {
2962 uuid_unparse(so->e_uuid, euuid_buf);
2963 epid = so->e_pid;
2964 } else {
2965 uuid_unparse(so->last_uuid, euuid_buf);
2966 epid = so->last_pid;
2967 }
2968
2969 log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d "
2970 "euuid %s%s %s->%s\n", __func__,
2971 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
2972 SOCK_TYPE(so), epid, euuid_buf,
2973 (so->so_flags & SOF_DELEGATED) ?
2974 " [delegated]" : "",
2975 ((before < after) ? unwanted : wanted),
2976 ((before < after) ? wanted : unwanted));
2977 }
2978 }
2979 #endif /* NECP */
2980 #endif /* !CONFIG_PROC_UUID_POLICY */
2981
2982 #if NECP
2983 void
2984 inp_update_necp_policy(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int override_bound_interface)
2985 {
2986 necp_socket_find_policy_match(inp, override_local_addr, override_remote_addr, override_bound_interface);
2987 if (necp_socket_should_rescope(inp) &&
2988 inp->inp_lport == 0 &&
2989 inp->inp_laddr.s_addr == INADDR_ANY &&
2990 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
2991 // If we should rescope, and the socket is not yet bound
2992 inp_bindif(inp, necp_socket_get_rescope_if_index(inp), NULL);
2993 }
2994 }
2995 #endif /* NECP */
2996
2997 int
2998 inp_update_policy(struct inpcb *inp)
2999 {
3000 #if CONFIG_PROC_UUID_POLICY
3001 struct socket *so = inp->inp_socket;
3002 uint32_t pflags = 0;
3003 int32_t ogencnt;
3004 int err = 0;
3005
3006 if (!net_io_policy_uuid ||
3007 so == NULL || inp->inp_state == INPCB_STATE_DEAD)
3008 return (0);
3009
3010 /*
3011 * Kernel-created sockets that aren't delegating other sockets
3012 * are currently exempted from UUID policy checks.
3013 */
3014 if (so->last_pid == 0 && !(so->so_flags & SOF_DELEGATED))
3015 return (0);
3016
3017 ogencnt = so->so_policy_gencnt;
3018 err = proc_uuid_policy_lookup(((so->so_flags & SOF_DELEGATED) ?
3019 so->e_uuid : so->last_uuid), &pflags, &so->so_policy_gencnt);
3020
3021 /*
3022 * Discard cached generation count if the entry is gone (ENOENT),
3023 * so that we go thru the checks below.
3024 */
3025 if (err == ENOENT && ogencnt != 0)
3026 so->so_policy_gencnt = 0;
3027
3028 /*
3029 * If the generation count has changed, inspect the policy flags
3030 * and act accordingly. If a policy flag was previously set and
3031 * the UUID is no longer present in the table (ENOENT), treat it
3032 * as if the flag has been cleared.
3033 */
3034 if ((err == 0 || err == ENOENT) && ogencnt != so->so_policy_gencnt) {
3035 /* update cellular policy for this socket */
3036 if (err == 0 && (pflags & PROC_UUID_NO_CELLULAR)) {
3037 inp_update_cellular_policy(inp, TRUE);
3038 } else if (!(pflags & PROC_UUID_NO_CELLULAR)) {
3039 inp_update_cellular_policy(inp, FALSE);
3040 }
3041 #if NECP
3042 /* update necp want app policy for this socket */
3043 if (err == 0 && (pflags & PROC_UUID_NECP_APP_POLICY)) {
3044 inp_update_necp_want_app_policy(inp, TRUE);
3045 } else if (!(pflags & PROC_UUID_NECP_APP_POLICY)) {
3046 inp_update_necp_want_app_policy(inp, FALSE);
3047 }
3048 #endif /* NECP */
3049 }
3050
3051 return ((err == ENOENT) ? 0 : err);
3052 #else /* !CONFIG_PROC_UUID_POLICY */
3053 #pragma unused(inp)
3054 return (0);
3055 #endif /* !CONFIG_PROC_UUID_POLICY */
3056 }
3057 /*
3058 * Called when we need to enforce policy restrictions in the input path.
3059 *
3060 * Returns TRUE if we're not allowed to receive data, otherwise FALSE.
3061 */
3062 boolean_t
3063 inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp)
3064 {
3065 VERIFY(inp != NULL);
3066
3067 /*
3068 * Inbound restrictions.
3069 */
3070 if (!sorestrictrecv)
3071 return (FALSE);
3072
3073 if (ifp == NULL)
3074 return (FALSE);
3075
3076 if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp))
3077 return (TRUE);
3078
3079 if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp))
3080 return (TRUE);
3081
3082 if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp))
3083 return (TRUE);
3084
3085 if (!(ifp->if_eflags & IFEF_RESTRICTED_RECV))
3086 return (FALSE);
3087
3088 if (inp->inp_flags & INP_RECV_ANYIF)
3089 return (FALSE);
3090
3091 if ((inp->inp_flags & INP_BOUND_IF) && inp->inp_boundifp == ifp)
3092 return (FALSE);
3093
3094 return (TRUE);
3095 }
3096
3097 /*
3098 * Called when we need to enforce policy restrictions in the output path.
3099 *
3100 * Returns TRUE if we're not allowed to send data out, otherwise FALSE.
3101 */
3102 boolean_t
3103 inp_restricted_send(struct inpcb *inp, struct ifnet *ifp)
3104 {
3105 VERIFY(inp != NULL);
3106
3107 /*
3108 * Outbound restrictions.
3109 */
3110 if (!sorestrictsend)
3111 return (FALSE);
3112
3113 if (ifp == NULL)
3114 return (FALSE);
3115
3116 if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp))
3117 return (TRUE);
3118
3119 if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp))
3120 return (TRUE);
3121
3122 if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp))
3123 return (TRUE);
3124
3125 return (FALSE);
3126 }