]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/in_pcb.c
xnu-3789.1.32.tar.gz
[apple/xnu.git] / bsd / netinet / in_pcb.c
1 /*
2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1991, 1993, 1995
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95
61 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $
62 */
63
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/malloc.h>
67 #include <sys/mbuf.h>
68 #include <sys/domain.h>
69 #include <sys/protosw.h>
70 #include <sys/socket.h>
71 #include <sys/socketvar.h>
72 #include <sys/proc.h>
73 #include <sys/kernel.h>
74 #include <sys/sysctl.h>
75 #include <sys/mcache.h>
76 #include <sys/kauth.h>
77 #include <sys/priv.h>
78 #include <sys/proc_uuid_policy.h>
79 #include <sys/syslog.h>
80 #include <sys/priv.h>
81 #include <net/dlil.h>
82
83 #include <libkern/OSAtomic.h>
84 #include <kern/locks.h>
85
86 #include <machine/limits.h>
87
88 #include <kern/zalloc.h>
89
90 #include <net/if.h>
91 #include <net/if_types.h>
92 #include <net/route.h>
93 #include <net/flowhash.h>
94 #include <net/flowadv.h>
95 #include <net/ntstat.h>
96
97 #include <netinet/in.h>
98 #include <netinet/in_pcb.h>
99 #include <netinet/in_var.h>
100 #include <netinet/ip_var.h>
101 #if INET6
102 #include <netinet/ip6.h>
103 #include <netinet6/ip6_var.h>
104 #endif /* INET6 */
105
106 #include <sys/kdebug.h>
107 #include <sys/random.h>
108
109 #include <dev/random/randomdev.h>
110 #include <mach/boolean.h>
111
112 #include <pexpert/pexpert.h>
113
114 #if NECP
115 #include <net/necp.h>
116 #endif
117
118 #include <sys/stat.h>
119 #include <sys/ubc.h>
120 #include <sys/vnode.h>
121
122 static lck_grp_t *inpcb_lock_grp;
123 static lck_attr_t *inpcb_lock_attr;
124 static lck_grp_attr_t *inpcb_lock_grp_attr;
125 decl_lck_mtx_data(static, inpcb_lock); /* global INPCB lock */
126 decl_lck_mtx_data(static, inpcb_timeout_lock);
127
128 static TAILQ_HEAD(, inpcbinfo) inpcb_head = TAILQ_HEAD_INITIALIZER(inpcb_head);
129
130 static u_int16_t inpcb_timeout_run = 0; /* INPCB timer is scheduled to run */
131 static boolean_t inpcb_garbage_collecting = FALSE; /* gc timer is scheduled */
132 static boolean_t inpcb_ticking = FALSE; /* "slow" timer is scheduled */
133 static boolean_t inpcb_fast_timer_on = FALSE;
134 static boolean_t intcoproc_unrestricted = FALSE;
135
136 /*
137 * If the total number of gc reqs is above a threshold, schedule
138 * garbage collect timer sooner
139 */
140 static boolean_t inpcb_toomany_gcreq = FALSE;
141
142 #define INPCB_GCREQ_THRESHOLD 50000
143
144 static thread_call_t inpcb_thread_call, inpcb_fast_thread_call;
145 static void inpcb_sched_timeout(void);
146 static void inpcb_sched_lazy_timeout(void);
147 static void _inpcb_sched_timeout(unsigned int);
148 static void inpcb_timeout(void *, void *);
149 const int inpcb_timeout_lazy = 10; /* 10 seconds leeway for lazy timers */
150 extern int tvtohz(struct timeval *);
151
152 #if CONFIG_PROC_UUID_POLICY
153 static void inp_update_cellular_policy(struct inpcb *, boolean_t);
154 #if NECP
155 static void inp_update_necp_want_app_policy(struct inpcb *, boolean_t);
156 #endif /* NECP */
157 #endif /* !CONFIG_PROC_UUID_POLICY */
158
159 #define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8))
160 #define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1))
161
162 /*
163 * These configure the range of local port addresses assigned to
164 * "unspecified" outgoing connections/packets/whatever.
165 */
166 int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */
167 int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */
168 int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
169 int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */
170 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
171 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */
172
173 #define RANGECHK(var, min, max) \
174 if ((var) < (min)) { (var) = (min); } \
175 else if ((var) > (max)) { (var) = (max); }
176
177 static int
178 sysctl_net_ipport_check SYSCTL_HANDLER_ARGS
179 {
180 #pragma unused(arg1, arg2)
181 int error;
182
183 error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
184 if (!error) {
185 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
186 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
187 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX);
188 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX);
189 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX);
190 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX);
191 }
192 return (error);
193 }
194
195 #undef RANGECHK
196
197 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange,
198 CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IP Ports");
199
200 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst,
201 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
202 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
203 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast,
204 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
205 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
206 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first,
207 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
208 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
209 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last,
210 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
211 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
212 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst,
213 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
214 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
215 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast,
216 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
217 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
218
219 static uint32_t apn_fallbk_debug = 0;
220 #define apn_fallbk_log(x) do { if (apn_fallbk_debug >= 1) log x; } while (0)
221
222 static boolean_t apn_fallbk_enabled = FALSE;
223
224 extern int udp_use_randomport;
225 extern int tcp_use_randomport;
226
227 /* Structs used for flowhash computation */
228 struct inp_flowhash_key_addr {
229 union {
230 struct in_addr v4;
231 struct in6_addr v6;
232 u_int8_t addr8[16];
233 u_int16_t addr16[8];
234 u_int32_t addr32[4];
235 } infha;
236 };
237
238 struct inp_flowhash_key {
239 struct inp_flowhash_key_addr infh_laddr;
240 struct inp_flowhash_key_addr infh_faddr;
241 u_int32_t infh_lport;
242 u_int32_t infh_fport;
243 u_int32_t infh_af;
244 u_int32_t infh_proto;
245 u_int32_t infh_rand1;
246 u_int32_t infh_rand2;
247 };
248
249 static u_int32_t inp_hash_seed = 0;
250
251 static int infc_cmp(const struct inpcb *, const struct inpcb *);
252
253 /* Flags used by inp_fc_getinp */
254 #define INPFC_SOLOCKED 0x1
255 #define INPFC_REMOVE 0x2
256 static struct inpcb *inp_fc_getinp(u_int32_t, u_int32_t);
257
258 static void inp_fc_feedback(struct inpcb *);
259 extern void tcp_remove_from_time_wait(struct inpcb *inp);
260
261 decl_lck_mtx_data(static, inp_fc_lck);
262
263 RB_HEAD(inp_fc_tree, inpcb) inp_fc_tree;
264 RB_PROTOTYPE(inp_fc_tree, inpcb, infc_link, infc_cmp);
265 RB_GENERATE(inp_fc_tree, inpcb, infc_link, infc_cmp);
266
267 /*
268 * Use this inp as a key to find an inp in the flowhash tree.
269 * Accesses to it are protected by inp_fc_lck.
270 */
271 struct inpcb key_inp;
272
273 /*
274 * in_pcb.c: manage the Protocol Control Blocks.
275 */
276
277 void
278 in_pcbinit(void)
279 {
280 static int inpcb_initialized = 0;
281
282 VERIFY(!inpcb_initialized);
283 inpcb_initialized = 1;
284
285 inpcb_lock_grp_attr = lck_grp_attr_alloc_init();
286 inpcb_lock_grp = lck_grp_alloc_init("inpcb", inpcb_lock_grp_attr);
287 inpcb_lock_attr = lck_attr_alloc_init();
288 lck_mtx_init(&inpcb_lock, inpcb_lock_grp, inpcb_lock_attr);
289 lck_mtx_init(&inpcb_timeout_lock, inpcb_lock_grp, inpcb_lock_attr);
290 inpcb_thread_call = thread_call_allocate_with_priority(inpcb_timeout,
291 NULL, THREAD_CALL_PRIORITY_KERNEL);
292 inpcb_fast_thread_call = thread_call_allocate_with_priority(
293 inpcb_timeout, NULL, THREAD_CALL_PRIORITY_KERNEL);
294 if (inpcb_thread_call == NULL || inpcb_fast_thread_call == NULL)
295 panic("unable to alloc the inpcb thread call");
296
297 /*
298 * Initialize data structures required to deliver
299 * flow advisories.
300 */
301 lck_mtx_init(&inp_fc_lck, inpcb_lock_grp, inpcb_lock_attr);
302 lck_mtx_lock(&inp_fc_lck);
303 RB_INIT(&inp_fc_tree);
304 bzero(&key_inp, sizeof(key_inp));
305 lck_mtx_unlock(&inp_fc_lck);
306
307 PE_parse_boot_argn("intcoproc_unrestricted", &intcoproc_unrestricted,
308 sizeof (intcoproc_unrestricted));
309 }
310
311 #define INPCB_HAVE_TIMER_REQ(req) (((req).intimer_lazy > 0) || \
312 ((req).intimer_fast > 0) || ((req).intimer_nodelay > 0))
313 static void
314 inpcb_timeout(void *arg0, void *arg1)
315 {
316 #pragma unused(arg0)
317 struct inpcbinfo *ipi;
318 boolean_t t, gc;
319 struct intimercount gccnt, tmcnt;
320 boolean_t toomany_gc = FALSE;
321
322 if (arg1 != NULL) {
323 VERIFY(arg1 == &inpcb_toomany_gcreq);
324 toomany_gc = *(boolean_t *)arg1;
325 }
326
327 /*
328 * Update coarse-grained networking timestamp (in sec.); the idea
329 * is to piggy-back on the timeout callout to update the counter
330 * returnable via net_uptime().
331 */
332 net_update_uptime();
333
334 bzero(&gccnt, sizeof(gccnt));
335 bzero(&tmcnt, sizeof(tmcnt));
336
337 lck_mtx_lock_spin(&inpcb_timeout_lock);
338 gc = inpcb_garbage_collecting;
339 inpcb_garbage_collecting = FALSE;
340
341 t = inpcb_ticking;
342 inpcb_ticking = FALSE;
343
344 if (gc || t) {
345 lck_mtx_unlock(&inpcb_timeout_lock);
346
347 lck_mtx_lock(&inpcb_lock);
348 TAILQ_FOREACH(ipi, &inpcb_head, ipi_entry) {
349 if (INPCB_HAVE_TIMER_REQ(ipi->ipi_gc_req)) {
350 bzero(&ipi->ipi_gc_req,
351 sizeof(ipi->ipi_gc_req));
352 if (gc && ipi->ipi_gc != NULL) {
353 ipi->ipi_gc(ipi);
354 gccnt.intimer_lazy +=
355 ipi->ipi_gc_req.intimer_lazy;
356 gccnt.intimer_fast +=
357 ipi->ipi_gc_req.intimer_fast;
358 gccnt.intimer_nodelay +=
359 ipi->ipi_gc_req.intimer_nodelay;
360 }
361 }
362 if (INPCB_HAVE_TIMER_REQ(ipi->ipi_timer_req)) {
363 bzero(&ipi->ipi_timer_req,
364 sizeof(ipi->ipi_timer_req));
365 if (t && ipi->ipi_timer != NULL) {
366 ipi->ipi_timer(ipi);
367 tmcnt.intimer_lazy +=
368 ipi->ipi_timer_req.intimer_lazy;
369 tmcnt.intimer_lazy +=
370 ipi->ipi_timer_req.intimer_fast;
371 tmcnt.intimer_nodelay +=
372 ipi->ipi_timer_req.intimer_nodelay;
373 }
374 }
375 }
376 lck_mtx_unlock(&inpcb_lock);
377 lck_mtx_lock_spin(&inpcb_timeout_lock);
378 }
379
380 /* lock was dropped above, so check first before overriding */
381 if (!inpcb_garbage_collecting)
382 inpcb_garbage_collecting = INPCB_HAVE_TIMER_REQ(gccnt);
383 if (!inpcb_ticking)
384 inpcb_ticking = INPCB_HAVE_TIMER_REQ(tmcnt);
385
386 /* re-arm the timer if there's work to do */
387 if (toomany_gc) {
388 inpcb_toomany_gcreq = FALSE;
389 } else {
390 inpcb_timeout_run--;
391 VERIFY(inpcb_timeout_run >= 0 && inpcb_timeout_run < 2);
392 }
393
394 if (gccnt.intimer_nodelay > 0 || tmcnt.intimer_nodelay > 0)
395 inpcb_sched_timeout();
396 else if ((gccnt.intimer_fast + tmcnt.intimer_fast) <= 5)
397 /* be lazy when idle with little activity */
398 inpcb_sched_lazy_timeout();
399 else
400 inpcb_sched_timeout();
401
402 lck_mtx_unlock(&inpcb_timeout_lock);
403 }
404
405 static void
406 inpcb_sched_timeout(void)
407 {
408 _inpcb_sched_timeout(0);
409 }
410
411 static void
412 inpcb_sched_lazy_timeout(void)
413 {
414 _inpcb_sched_timeout(inpcb_timeout_lazy);
415 }
416
417 static void
418 _inpcb_sched_timeout(unsigned int offset)
419 {
420 uint64_t deadline, leeway;
421
422 clock_interval_to_deadline(1, NSEC_PER_SEC, &deadline);
423 lck_mtx_assert(&inpcb_timeout_lock, LCK_MTX_ASSERT_OWNED);
424 if (inpcb_timeout_run == 0 &&
425 (inpcb_garbage_collecting || inpcb_ticking)) {
426 lck_mtx_convert_spin(&inpcb_timeout_lock);
427 inpcb_timeout_run++;
428 if (offset == 0) {
429 inpcb_fast_timer_on = TRUE;
430 thread_call_enter_delayed(inpcb_thread_call,
431 deadline);
432 } else {
433 inpcb_fast_timer_on = FALSE;
434 clock_interval_to_absolutetime_interval(offset,
435 NSEC_PER_SEC, &leeway);
436 thread_call_enter_delayed_with_leeway(
437 inpcb_thread_call, NULL, deadline, leeway,
438 THREAD_CALL_DELAY_LEEWAY);
439 }
440 } else if (inpcb_timeout_run == 1 &&
441 offset == 0 && !inpcb_fast_timer_on) {
442 /*
443 * Since the request was for a fast timer but the
444 * scheduled timer is a lazy timer, try to schedule
445 * another instance of fast timer also.
446 */
447 lck_mtx_convert_spin(&inpcb_timeout_lock);
448 inpcb_timeout_run++;
449 inpcb_fast_timer_on = TRUE;
450 thread_call_enter_delayed(inpcb_fast_thread_call, deadline);
451 }
452 }
453
454 void
455 inpcb_gc_sched(struct inpcbinfo *ipi, u_int32_t type)
456 {
457 u_int32_t gccnt;
458 uint64_t deadline;
459
460 lck_mtx_lock_spin(&inpcb_timeout_lock);
461 inpcb_garbage_collecting = TRUE;
462 gccnt = ipi->ipi_gc_req.intimer_nodelay +
463 ipi->ipi_gc_req.intimer_fast;
464
465 if (gccnt > INPCB_GCREQ_THRESHOLD && !inpcb_toomany_gcreq) {
466 inpcb_toomany_gcreq = TRUE;
467
468 /*
469 * There are toomany pcbs waiting to be garbage collected,
470 * schedule a much faster timeout in addition to
471 * the caller's request
472 */
473 lck_mtx_convert_spin(&inpcb_timeout_lock);
474 clock_interval_to_deadline(100, NSEC_PER_MSEC, &deadline);
475 thread_call_enter1_delayed(inpcb_thread_call,
476 &inpcb_toomany_gcreq, deadline);
477 }
478
479 switch (type) {
480 case INPCB_TIMER_NODELAY:
481 atomic_add_32(&ipi->ipi_gc_req.intimer_nodelay, 1);
482 inpcb_sched_timeout();
483 break;
484 case INPCB_TIMER_FAST:
485 atomic_add_32(&ipi->ipi_gc_req.intimer_fast, 1);
486 inpcb_sched_timeout();
487 break;
488 default:
489 atomic_add_32(&ipi->ipi_gc_req.intimer_lazy, 1);
490 inpcb_sched_lazy_timeout();
491 break;
492 }
493 lck_mtx_unlock(&inpcb_timeout_lock);
494 }
495
496 void
497 inpcb_timer_sched(struct inpcbinfo *ipi, u_int32_t type)
498 {
499
500 lck_mtx_lock_spin(&inpcb_timeout_lock);
501 inpcb_ticking = TRUE;
502 switch (type) {
503 case INPCB_TIMER_NODELAY:
504 atomic_add_32(&ipi->ipi_timer_req.intimer_nodelay, 1);
505 inpcb_sched_timeout();
506 break;
507 case INPCB_TIMER_FAST:
508 atomic_add_32(&ipi->ipi_timer_req.intimer_fast, 1);
509 inpcb_sched_timeout();
510 break;
511 default:
512 atomic_add_32(&ipi->ipi_timer_req.intimer_lazy, 1);
513 inpcb_sched_lazy_timeout();
514 break;
515 }
516 lck_mtx_unlock(&inpcb_timeout_lock);
517 }
518
519 void
520 in_pcbinfo_attach(struct inpcbinfo *ipi)
521 {
522 struct inpcbinfo *ipi0;
523
524 lck_mtx_lock(&inpcb_lock);
525 TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) {
526 if (ipi0 == ipi) {
527 panic("%s: ipi %p already in the list\n",
528 __func__, ipi);
529 /* NOTREACHED */
530 }
531 }
532 TAILQ_INSERT_TAIL(&inpcb_head, ipi, ipi_entry);
533 lck_mtx_unlock(&inpcb_lock);
534 }
535
536 int
537 in_pcbinfo_detach(struct inpcbinfo *ipi)
538 {
539 struct inpcbinfo *ipi0;
540 int error = 0;
541
542 lck_mtx_lock(&inpcb_lock);
543 TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) {
544 if (ipi0 == ipi)
545 break;
546 }
547 if (ipi0 != NULL)
548 TAILQ_REMOVE(&inpcb_head, ipi0, ipi_entry);
549 else
550 error = ENXIO;
551 lck_mtx_unlock(&inpcb_lock);
552
553 return (error);
554 }
555
556 /*
557 * Allocate a PCB and associate it with the socket.
558 *
559 * Returns: 0 Success
560 * ENOBUFS
561 * ENOMEM
562 */
563 int
564 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p)
565 {
566 #pragma unused(p)
567 struct inpcb *inp;
568 caddr_t temp;
569 #if CONFIG_MACF_NET
570 int mac_error;
571 #endif /* CONFIG_MACF_NET */
572
573 if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) {
574 inp = (struct inpcb *)zalloc(pcbinfo->ipi_zone);
575 if (inp == NULL)
576 return (ENOBUFS);
577 bzero((caddr_t)inp, sizeof (*inp));
578 } else {
579 inp = (struct inpcb *)(void *)so->so_saved_pcb;
580 temp = inp->inp_saved_ppcb;
581 bzero((caddr_t)inp, sizeof (*inp));
582 inp->inp_saved_ppcb = temp;
583 }
584
585 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
586 inp->inp_pcbinfo = pcbinfo;
587 inp->inp_socket = so;
588 #if CONFIG_MACF_NET
589 mac_error = mac_inpcb_label_init(inp, M_WAITOK);
590 if (mac_error != 0) {
591 if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0)
592 zfree(pcbinfo->ipi_zone, inp);
593 return (mac_error);
594 }
595 mac_inpcb_label_associate(so, inp);
596 #endif /* CONFIG_MACF_NET */
597 /* make sure inp_stat is always 64-bit aligned */
598 inp->inp_stat = (struct inp_stat *)P2ROUNDUP(inp->inp_stat_store,
599 sizeof (u_int64_t));
600 if (((uintptr_t)inp->inp_stat - (uintptr_t)inp->inp_stat_store) +
601 sizeof (*inp->inp_stat) > sizeof (inp->inp_stat_store)) {
602 panic("%s: insufficient space to align inp_stat", __func__);
603 /* NOTREACHED */
604 }
605
606 /* make sure inp_cstat is always 64-bit aligned */
607 inp->inp_cstat = (struct inp_stat *)P2ROUNDUP(inp->inp_cstat_store,
608 sizeof (u_int64_t));
609 if (((uintptr_t)inp->inp_cstat - (uintptr_t)inp->inp_cstat_store) +
610 sizeof (*inp->inp_cstat) > sizeof (inp->inp_cstat_store)) {
611 panic("%s: insufficient space to align inp_cstat", __func__);
612 /* NOTREACHED */
613 }
614
615 /* make sure inp_wstat is always 64-bit aligned */
616 inp->inp_wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_wstat_store,
617 sizeof (u_int64_t));
618 if (((uintptr_t)inp->inp_wstat - (uintptr_t)inp->inp_wstat_store) +
619 sizeof (*inp->inp_wstat) > sizeof (inp->inp_wstat_store)) {
620 panic("%s: insufficient space to align inp_wstat", __func__);
621 /* NOTREACHED */
622 }
623
624 /* make sure inp_Wstat is always 64-bit aligned */
625 inp->inp_Wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_Wstat_store,
626 sizeof (u_int64_t));
627 if (((uintptr_t)inp->inp_Wstat - (uintptr_t)inp->inp_Wstat_store) +
628 sizeof (*inp->inp_Wstat) > sizeof (inp->inp_Wstat_store)) {
629 panic("%s: insufficient space to align inp_Wstat", __func__);
630 /* NOTREACHED */
631 }
632
633 so->so_pcb = (caddr_t)inp;
634
635 if (so->so_proto->pr_flags & PR_PCBLOCK) {
636 lck_mtx_init(&inp->inpcb_mtx, pcbinfo->ipi_lock_grp,
637 pcbinfo->ipi_lock_attr);
638 }
639
640 #if INET6
641 if (SOCK_DOM(so) == PF_INET6 && !ip6_mapped_addr_on)
642 inp->inp_flags |= IN6P_IPV6_V6ONLY;
643
644 if (ip6_auto_flowlabel)
645 inp->inp_flags |= IN6P_AUTOFLOWLABEL;
646 #endif /* INET6 */
647 if (intcoproc_unrestricted)
648 inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED;
649
650 (void) inp_update_policy(inp);
651
652 lck_rw_lock_exclusive(pcbinfo->ipi_lock);
653 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
654 LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
655 pcbinfo->ipi_count++;
656 lck_rw_done(pcbinfo->ipi_lock);
657 return (0);
658 }
659
660 /*
661 * in_pcblookup_local_and_cleanup does everything
662 * in_pcblookup_local does but it checks for a socket
663 * that's going away. Since we know that the lock is
664 * held read+write when this funciton is called, we
665 * can safely dispose of this socket like the slow
666 * timer would usually do and return NULL. This is
667 * great for bind.
668 */
669 struct inpcb *
670 in_pcblookup_local_and_cleanup(struct inpcbinfo *pcbinfo, struct in_addr laddr,
671 u_int lport_arg, int wild_okay)
672 {
673 struct inpcb *inp;
674
675 /* Perform normal lookup */
676 inp = in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay);
677
678 /* Check if we found a match but it's waiting to be disposed */
679 if (inp != NULL && inp->inp_wantcnt == WNT_STOPUSING) {
680 struct socket *so = inp->inp_socket;
681
682 lck_mtx_lock(&inp->inpcb_mtx);
683
684 if (so->so_usecount == 0) {
685 if (inp->inp_state != INPCB_STATE_DEAD)
686 in_pcbdetach(inp);
687 in_pcbdispose(inp); /* will unlock & destroy */
688 inp = NULL;
689 } else {
690 lck_mtx_unlock(&inp->inpcb_mtx);
691 }
692 }
693
694 return (inp);
695 }
696
697 static void
698 in_pcb_conflict_post_msg(u_int16_t port)
699 {
700 /*
701 * Radar 5523020 send a kernel event notification if a
702 * non-participating socket tries to bind the port a socket
703 * who has set SOF_NOTIFYCONFLICT owns.
704 */
705 struct kev_msg ev_msg;
706 struct kev_in_portinuse in_portinuse;
707
708 bzero(&in_portinuse, sizeof (struct kev_in_portinuse));
709 bzero(&ev_msg, sizeof (struct kev_msg));
710 in_portinuse.port = ntohs(port); /* port in host order */
711 in_portinuse.req_pid = proc_selfpid();
712 ev_msg.vendor_code = KEV_VENDOR_APPLE;
713 ev_msg.kev_class = KEV_NETWORK_CLASS;
714 ev_msg.kev_subclass = KEV_INET_SUBCLASS;
715 ev_msg.event_code = KEV_INET_PORTINUSE;
716 ev_msg.dv[0].data_ptr = &in_portinuse;
717 ev_msg.dv[0].data_length = sizeof (struct kev_in_portinuse);
718 ev_msg.dv[1].data_length = 0;
719 dlil_post_complete_msg(NULL, &ev_msg);
720 }
721
722 /*
723 * Bind an INPCB to an address and/or port. This routine should not alter
724 * the caller-supplied local address "nam".
725 *
726 * Returns: 0 Success
727 * EADDRNOTAVAIL Address not available.
728 * EINVAL Invalid argument
729 * EAFNOSUPPORT Address family not supported [notdef]
730 * EACCES Permission denied
731 * EADDRINUSE Address in use
732 * EAGAIN Resource unavailable, try again
733 * priv_check_cred:EPERM Operation not permitted
734 */
735 int
736 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
737 {
738 struct socket *so = inp->inp_socket;
739 unsigned short *lastport;
740 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
741 u_short lport = 0, rand_port = 0;
742 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
743 int error, randomport, conflict = 0;
744 boolean_t anonport = FALSE;
745 kauth_cred_t cred;
746 struct in_addr laddr;
747 struct ifnet *outif = NULL;
748
749 if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */
750 return (EADDRNOTAVAIL);
751 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY)
752 return (EINVAL);
753 if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT)))
754 wild = 1;
755
756 bzero(&laddr, sizeof(laddr));
757
758 socket_unlock(so, 0); /* keep reference on socket */
759 lck_rw_lock_exclusive(pcbinfo->ipi_lock);
760
761 if (nam != NULL) {
762
763 if (nam->sa_len != sizeof (struct sockaddr_in)) {
764 lck_rw_done(pcbinfo->ipi_lock);
765 socket_lock(so, 0);
766 return (EINVAL);
767 }
768 #if 0
769 /*
770 * We should check the family, but old programs
771 * incorrectly fail to initialize it.
772 */
773 if (nam->sa_family != AF_INET) {
774 lck_rw_done(pcbinfo->ipi_lock);
775 socket_lock(so, 0);
776 return (EAFNOSUPPORT);
777 }
778 #endif /* 0 */
779 lport = SIN(nam)->sin_port;
780
781 if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr))) {
782 /*
783 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
784 * allow complete duplication of binding if
785 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
786 * and a multicast address is bound on both
787 * new and duplicated sockets.
788 */
789 if (so->so_options & SO_REUSEADDR)
790 reuseport = SO_REUSEADDR|SO_REUSEPORT;
791 } else if (SIN(nam)->sin_addr.s_addr != INADDR_ANY) {
792 struct sockaddr_in sin;
793 struct ifaddr *ifa;
794
795 /* Sanitized for interface address searches */
796 bzero(&sin, sizeof (sin));
797 sin.sin_family = AF_INET;
798 sin.sin_len = sizeof (struct sockaddr_in);
799 sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
800
801 ifa = ifa_ifwithaddr(SA(&sin));
802 if (ifa == NULL) {
803 lck_rw_done(pcbinfo->ipi_lock);
804 socket_lock(so, 0);
805 return (EADDRNOTAVAIL);
806 } else {
807 /*
808 * Opportunistically determine the outbound
809 * interface that may be used; this may not
810 * hold true if we end up using a route
811 * going over a different interface, e.g.
812 * when sending to a local address. This
813 * will get updated again after sending.
814 */
815 IFA_LOCK(ifa);
816 outif = ifa->ifa_ifp;
817 IFA_UNLOCK(ifa);
818 IFA_REMREF(ifa);
819 }
820 }
821 if (lport != 0) {
822 struct inpcb *t;
823 uid_t u;
824
825 if (ntohs(lport) < IPPORT_RESERVED) {
826 cred = kauth_cred_proc_ref(p);
827 error = priv_check_cred(cred,
828 PRIV_NETINET_RESERVEDPORT, 0);
829 kauth_cred_unref(&cred);
830 if (error != 0) {
831 lck_rw_done(pcbinfo->ipi_lock);
832 socket_lock(so, 0);
833 return (EACCES);
834 }
835 }
836 if (!IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) &&
837 (u = kauth_cred_getuid(so->so_cred)) != 0 &&
838 (t = in_pcblookup_local_and_cleanup(
839 inp->inp_pcbinfo, SIN(nam)->sin_addr, lport,
840 INPLOOKUP_WILDCARD)) != NULL &&
841 (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
842 t->inp_laddr.s_addr != INADDR_ANY ||
843 !(t->inp_socket->so_options & SO_REUSEPORT)) &&
844 (u != kauth_cred_getuid(t->inp_socket->so_cred)) &&
845 !(t->inp_socket->so_flags & SOF_REUSESHAREUID) &&
846 (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
847 t->inp_laddr.s_addr != INADDR_ANY)) {
848 if ((t->inp_socket->so_flags &
849 SOF_NOTIFYCONFLICT) &&
850 !(so->so_flags & SOF_NOTIFYCONFLICT))
851 conflict = 1;
852
853 lck_rw_done(pcbinfo->ipi_lock);
854
855 if (conflict)
856 in_pcb_conflict_post_msg(lport);
857
858 socket_lock(so, 0);
859 return (EADDRINUSE);
860 }
861 t = in_pcblookup_local_and_cleanup(pcbinfo,
862 SIN(nam)->sin_addr, lport, wild);
863 if (t != NULL &&
864 (reuseport & t->inp_socket->so_options) == 0) {
865 #if INET6
866 if (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
867 t->inp_laddr.s_addr != INADDR_ANY ||
868 SOCK_DOM(so) != PF_INET6 ||
869 SOCK_DOM(t->inp_socket) != PF_INET6)
870 #endif /* INET6 */
871 {
872
873 if ((t->inp_socket->so_flags &
874 SOF_NOTIFYCONFLICT) &&
875 !(so->so_flags & SOF_NOTIFYCONFLICT))
876 conflict = 1;
877
878 lck_rw_done(pcbinfo->ipi_lock);
879
880 if (conflict)
881 in_pcb_conflict_post_msg(lport);
882 socket_lock(so, 0);
883 return (EADDRINUSE);
884 }
885 }
886 }
887 laddr = SIN(nam)->sin_addr;
888 }
889 if (lport == 0) {
890 u_short first, last;
891 int count;
892
893 randomport = (so->so_flags & SOF_BINDRANDOMPORT) ||
894 (so->so_type == SOCK_STREAM ? tcp_use_randomport :
895 udp_use_randomport);
896
897 /*
898 * Even though this looks similar to the code in
899 * in6_pcbsetport, the v6 vs v4 checks are different.
900 */
901 anonport = TRUE;
902 if (inp->inp_flags & INP_HIGHPORT) {
903 first = ipport_hifirstauto; /* sysctl */
904 last = ipport_hilastauto;
905 lastport = &pcbinfo->ipi_lasthi;
906 } else if (inp->inp_flags & INP_LOWPORT) {
907 cred = kauth_cred_proc_ref(p);
908 error = priv_check_cred(cred,
909 PRIV_NETINET_RESERVEDPORT, 0);
910 kauth_cred_unref(&cred);
911 if (error != 0) {
912 lck_rw_done(pcbinfo->ipi_lock);
913 socket_lock(so, 0);
914 return (error);
915 }
916 first = ipport_lowfirstauto; /* 1023 */
917 last = ipport_lowlastauto; /* 600 */
918 lastport = &pcbinfo->ipi_lastlow;
919 } else {
920 first = ipport_firstauto; /* sysctl */
921 last = ipport_lastauto;
922 lastport = &pcbinfo->ipi_lastport;
923 }
924 /* No point in randomizing if only one port is available */
925
926 if (first == last)
927 randomport = 0;
928 /*
929 * Simple check to ensure all ports are not used up causing
930 * a deadlock here.
931 *
932 * We split the two cases (up and down) so that the direction
933 * is not being tested on each round of the loop.
934 */
935 if (first > last) {
936 /*
937 * counting down
938 */
939 if (randomport) {
940 read_random(&rand_port, sizeof (rand_port));
941 *lastport =
942 first - (rand_port % (first - last));
943 }
944 count = first - last;
945
946 do {
947 if (count-- < 0) { /* completely used? */
948 lck_rw_done(pcbinfo->ipi_lock);
949 socket_lock(so, 0);
950 return (EADDRNOTAVAIL);
951 }
952 --*lastport;
953 if (*lastport > first || *lastport < last)
954 *lastport = first;
955 lport = htons(*lastport);
956 } while (in_pcblookup_local_and_cleanup(pcbinfo,
957 ((laddr.s_addr != INADDR_ANY) ? laddr :
958 inp->inp_laddr), lport, wild));
959 } else {
960 /*
961 * counting up
962 */
963 if (randomport) {
964 read_random(&rand_port, sizeof (rand_port));
965 *lastport =
966 first + (rand_port % (first - last));
967 }
968 count = last - first;
969
970 do {
971 if (count-- < 0) { /* completely used? */
972 lck_rw_done(pcbinfo->ipi_lock);
973 socket_lock(so, 0);
974 return (EADDRNOTAVAIL);
975 }
976 ++*lastport;
977 if (*lastport < first || *lastport > last)
978 *lastport = first;
979 lport = htons(*lastport);
980 } while (in_pcblookup_local_and_cleanup(pcbinfo,
981 ((laddr.s_addr != INADDR_ANY) ? laddr :
982 inp->inp_laddr), lport, wild));
983 }
984 }
985 socket_lock(so, 0);
986
987 /*
988 * We unlocked socket's protocol lock for a long time.
989 * The socket might have been dropped/defuncted.
990 * Checking if world has changed since.
991 */
992 if (inp->inp_state == INPCB_STATE_DEAD) {
993 lck_rw_done(pcbinfo->ipi_lock);
994 return (ECONNABORTED);
995 }
996
997 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) {
998 lck_rw_done(pcbinfo->ipi_lock);
999 return (EINVAL);
1000 }
1001
1002 if (laddr.s_addr != INADDR_ANY) {
1003 inp->inp_laddr = laddr;
1004 inp->inp_last_outifp = outif;
1005 }
1006 inp->inp_lport = lport;
1007 if (anonport)
1008 inp->inp_flags |= INP_ANONPORT;
1009
1010 if (in_pcbinshash(inp, 1) != 0) {
1011 inp->inp_laddr.s_addr = INADDR_ANY;
1012 inp->inp_last_outifp = NULL;
1013
1014 inp->inp_lport = 0;
1015 if (anonport)
1016 inp->inp_flags &= ~INP_ANONPORT;
1017 lck_rw_done(pcbinfo->ipi_lock);
1018 return (EAGAIN);
1019 }
1020 lck_rw_done(pcbinfo->ipi_lock);
1021 sflt_notify(so, sock_evt_bound, NULL);
1022 return (0);
1023 }
1024
1025 #define APN_FALLBACK_IP_FILTER(a) \
1026 (IN_LINKLOCAL(ntohl((a)->sin_addr.s_addr)) || \
1027 IN_LOOPBACK(ntohl((a)->sin_addr.s_addr)) || \
1028 IN_ZERONET(ntohl((a)->sin_addr.s_addr)) || \
1029 IN_MULTICAST(ntohl((a)->sin_addr.s_addr)) || \
1030 IN_PRIVATE(ntohl((a)->sin_addr.s_addr)))
1031
1032 #define APN_FALLBACK_NOTIF_INTERVAL 2 /* Magic Number */
1033 static uint64_t last_apn_fallback = 0;
1034
1035 static boolean_t
1036 apn_fallback_required (proc_t proc, struct socket *so, struct sockaddr_in *p_dstv4)
1037 {
1038 uint64_t timenow;
1039 struct sockaddr_storage lookup_default_addr;
1040 struct rtentry *rt = NULL;
1041
1042 VERIFY(proc != NULL);
1043
1044 if (apn_fallbk_enabled == FALSE)
1045 return FALSE;
1046
1047 if (proc == kernproc)
1048 return FALSE;
1049
1050 if (so && (so->so_options & SO_NOAPNFALLBK))
1051 return FALSE;
1052
1053 timenow = net_uptime();
1054 if ((timenow - last_apn_fallback) < APN_FALLBACK_NOTIF_INTERVAL) {
1055 apn_fallbk_log((LOG_INFO, "APN fallback notification throttled.\n"));
1056 return FALSE;
1057 }
1058
1059 if (p_dstv4 && APN_FALLBACK_IP_FILTER(p_dstv4))
1060 return FALSE;
1061
1062 /* Check if we have unscoped IPv6 default route through cellular */
1063 bzero(&lookup_default_addr, sizeof(lookup_default_addr));
1064 lookup_default_addr.ss_family = AF_INET6;
1065 lookup_default_addr.ss_len = sizeof(struct sockaddr_in6);
1066
1067 rt = rtalloc1((struct sockaddr *)&lookup_default_addr, 0, 0);
1068 if (NULL == rt) {
1069 apn_fallbk_log((LOG_INFO, "APN fallback notification could not find "
1070 "unscoped default IPv6 route.\n"));
1071 return FALSE;
1072 }
1073
1074 if (!IFNET_IS_CELLULAR(rt->rt_ifp)) {
1075 rtfree(rt);
1076 apn_fallbk_log((LOG_INFO, "APN fallback notification could not find "
1077 "unscoped default IPv6 route through cellular interface.\n"));
1078 return FALSE;
1079 }
1080
1081 /*
1082 * We have a default IPv6 route, ensure that
1083 * we do not have IPv4 default route before triggering
1084 * the event
1085 */
1086 rtfree(rt);
1087 rt = NULL;
1088
1089 bzero(&lookup_default_addr, sizeof(lookup_default_addr));
1090 lookup_default_addr.ss_family = AF_INET;
1091 lookup_default_addr.ss_len = sizeof(struct sockaddr_in);
1092
1093 rt = rtalloc1((struct sockaddr *)&lookup_default_addr, 0, 0);
1094
1095 if (rt) {
1096 rtfree(rt);
1097 rt = NULL;
1098 apn_fallbk_log((LOG_INFO, "APN fallback notification found unscoped "
1099 "IPv4 default route!\n"));
1100 return FALSE;
1101 }
1102
1103 {
1104 /*
1105 * We disable APN fallback if the binary is not a third-party app.
1106 * Note that platform daemons use their process name as a
1107 * bundle ID so we filter out bundle IDs without dots.
1108 */
1109 const char *bundle_id = cs_identity_get(proc);
1110 if (bundle_id == NULL ||
1111 bundle_id[0] == '\0' ||
1112 strchr(bundle_id, '.') == NULL ||
1113 strncmp(bundle_id, "com.apple.", sizeof("com.apple.") - 1) == 0) {
1114 apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found first-"
1115 "party bundle ID \"%s\"!\n", (bundle_id ? bundle_id : "NULL")));
1116 return FALSE;
1117 }
1118 }
1119
1120 {
1121 /*
1122 * The Apple App Store IPv6 requirement started on
1123 * June 1st, 2016 at 12:00:00 AM PDT.
1124 * We disable APN fallback if the binary is more recent than that.
1125 * We check both atime and birthtime since birthtime is not always supported.
1126 */
1127 static const long ipv6_start_date = 1464764400L;
1128 vfs_context_t context;
1129 struct stat64 sb;
1130 int vn_stat_error;
1131
1132 bzero(&sb, sizeof(struct stat64));
1133 context = vfs_context_create(NULL);
1134 vn_stat_error = vn_stat(proc->p_textvp, &sb, NULL, 1, context);
1135 (void)vfs_context_rele(context);
1136
1137 if (vn_stat_error != 0 ||
1138 sb.st_atimespec.tv_sec >= ipv6_start_date ||
1139 sb.st_birthtimespec.tv_sec >= ipv6_start_date) {
1140 apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found binary "
1141 "too recent! (err %d atime %ld mtime %ld ctime %ld birthtime %ld)\n",
1142 vn_stat_error, sb.st_atimespec.tv_sec, sb.st_mtimespec.tv_sec,
1143 sb.st_ctimespec.tv_sec, sb.st_birthtimespec.tv_sec));
1144 return FALSE;
1145 }
1146 }
1147 return TRUE;
1148 }
1149
1150 static void
1151 apn_fallback_trigger(proc_t proc)
1152 {
1153 pid_t pid = 0;
1154 struct kev_msg ev_msg;
1155 struct kev_netevent_apnfallbk_data apnfallbk_data;
1156
1157 last_apn_fallback = net_uptime();
1158 pid = proc_pid(proc);
1159 uuid_t application_uuid;
1160 uuid_clear(application_uuid);
1161 proc_getexecutableuuid(proc, application_uuid,
1162 sizeof(application_uuid));
1163
1164 bzero(&ev_msg, sizeof (struct kev_msg));
1165 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1166 ev_msg.kev_class = KEV_NETWORK_CLASS;
1167 ev_msg.kev_subclass = KEV_NETEVENT_SUBCLASS;
1168 ev_msg.event_code = KEV_NETEVENT_APNFALLBACK;
1169
1170 bzero(&apnfallbk_data, sizeof(apnfallbk_data));
1171 apnfallbk_data.epid = pid;
1172 uuid_copy(apnfallbk_data.euuid, application_uuid);
1173
1174 ev_msg.dv[0].data_ptr = &apnfallbk_data;
1175 ev_msg.dv[0].data_length = sizeof(apnfallbk_data);
1176 kev_post_msg(&ev_msg);
1177 apn_fallbk_log((LOG_INFO, "APN fallback notification issued.\n"));
1178 }
1179
1180 /*
1181 * Transform old in_pcbconnect() into an inner subroutine for new
1182 * in_pcbconnect(); do some validity-checking on the remote address
1183 * (in "nam") and then determine local host address (i.e., which
1184 * interface) to use to access that remote host.
1185 *
1186 * This routine may alter the caller-supplied remote address "nam".
1187 *
1188 * The caller may override the bound-to-interface setting of the socket
1189 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
1190 *
1191 * This routine might return an ifp with a reference held if the caller
1192 * provides a non-NULL outif, even in the error case. The caller is
1193 * responsible for releasing its reference.
1194 *
1195 * Returns: 0 Success
1196 * EINVAL Invalid argument
1197 * EAFNOSUPPORT Address family not supported
1198 * EADDRNOTAVAIL Address not available
1199 */
1200 int
1201 in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr,
1202 unsigned int ifscope, struct ifnet **outif, int raw)
1203 {
1204 struct route *ro = &inp->inp_route;
1205 struct in_ifaddr *ia = NULL;
1206 struct sockaddr_in sin;
1207 int error = 0;
1208 boolean_t restricted = FALSE;
1209
1210 if (outif != NULL)
1211 *outif = NULL;
1212 if (nam->sa_len != sizeof (struct sockaddr_in))
1213 return (EINVAL);
1214 if (SIN(nam)->sin_family != AF_INET)
1215 return (EAFNOSUPPORT);
1216 if (raw == 0 && SIN(nam)->sin_port == 0)
1217 return (EADDRNOTAVAIL);
1218
1219 /*
1220 * If the destination address is INADDR_ANY,
1221 * use the primary local address.
1222 * If the supplied address is INADDR_BROADCAST,
1223 * and the primary interface supports broadcast,
1224 * choose the broadcast address for that interface.
1225 */
1226 if (raw == 0 && (SIN(nam)->sin_addr.s_addr == INADDR_ANY ||
1227 SIN(nam)->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST)) {
1228 lck_rw_lock_shared(in_ifaddr_rwlock);
1229 if (!TAILQ_EMPTY(&in_ifaddrhead)) {
1230 ia = TAILQ_FIRST(&in_ifaddrhead);
1231 IFA_LOCK_SPIN(&ia->ia_ifa);
1232 if (SIN(nam)->sin_addr.s_addr == INADDR_ANY) {
1233 SIN(nam)->sin_addr = IA_SIN(ia)->sin_addr;
1234 } else if (ia->ia_ifp->if_flags & IFF_BROADCAST) {
1235 SIN(nam)->sin_addr =
1236 SIN(&ia->ia_broadaddr)->sin_addr;
1237 }
1238 IFA_UNLOCK(&ia->ia_ifa);
1239 ia = NULL;
1240 }
1241 lck_rw_done(in_ifaddr_rwlock);
1242 }
1243 /*
1244 * Otherwise, if the socket has already bound the source, just use it.
1245 */
1246 if (inp->inp_laddr.s_addr != INADDR_ANY) {
1247 VERIFY(ia == NULL);
1248 *laddr = inp->inp_laddr;
1249 return (0);
1250 }
1251
1252 /*
1253 * If the ifscope is specified by the caller (e.g. IP_PKTINFO)
1254 * then it overrides the sticky ifscope set for the socket.
1255 */
1256 if (ifscope == IFSCOPE_NONE && (inp->inp_flags & INP_BOUND_IF))
1257 ifscope = inp->inp_boundifp->if_index;
1258
1259 /*
1260 * If route is known or can be allocated now,
1261 * our src addr is taken from the i/f, else punt.
1262 * Note that we should check the address family of the cached
1263 * destination, in case of sharing the cache with IPv6.
1264 */
1265 if (ro->ro_rt != NULL)
1266 RT_LOCK_SPIN(ro->ro_rt);
1267 if (ROUTE_UNUSABLE(ro) || ro->ro_dst.sa_family != AF_INET ||
1268 SIN(&ro->ro_dst)->sin_addr.s_addr != SIN(nam)->sin_addr.s_addr ||
1269 (inp->inp_socket->so_options & SO_DONTROUTE)) {
1270 if (ro->ro_rt != NULL)
1271 RT_UNLOCK(ro->ro_rt);
1272 ROUTE_RELEASE(ro);
1273 }
1274 if (!(inp->inp_socket->so_options & SO_DONTROUTE) &&
1275 (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL)) {
1276 if (ro->ro_rt != NULL)
1277 RT_UNLOCK(ro->ro_rt);
1278 ROUTE_RELEASE(ro);
1279 /* No route yet, so try to acquire one */
1280 bzero(&ro->ro_dst, sizeof (struct sockaddr_in));
1281 ro->ro_dst.sa_family = AF_INET;
1282 ro->ro_dst.sa_len = sizeof (struct sockaddr_in);
1283 SIN(&ro->ro_dst)->sin_addr = SIN(nam)->sin_addr;
1284 rtalloc_scoped(ro, ifscope);
1285 if (ro->ro_rt != NULL)
1286 RT_LOCK_SPIN(ro->ro_rt);
1287 }
1288 /* Sanitized local copy for interface address searches */
1289 bzero(&sin, sizeof (sin));
1290 sin.sin_family = AF_INET;
1291 sin.sin_len = sizeof (struct sockaddr_in);
1292 sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
1293 /*
1294 * If we did not find (or use) a route, assume dest is reachable
1295 * on a directly connected network and try to find a corresponding
1296 * interface to take the source address from.
1297 */
1298 if (ro->ro_rt == NULL) {
1299 proc_t proc = current_proc();
1300
1301 VERIFY(ia == NULL);
1302 ia = ifatoia(ifa_ifwithdstaddr(SA(&sin)));
1303 if (ia == NULL)
1304 ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope));
1305 error = ((ia == NULL) ? ENETUNREACH : 0);
1306
1307 if (apn_fallback_required(proc, inp->inp_socket,
1308 (void *)nam))
1309 apn_fallback_trigger(proc);
1310
1311 goto done;
1312 }
1313 RT_LOCK_ASSERT_HELD(ro->ro_rt);
1314 /*
1315 * If the outgoing interface on the route found is not
1316 * a loopback interface, use the address from that interface.
1317 */
1318 if (!(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) {
1319 VERIFY(ia == NULL);
1320 /*
1321 * If the route points to a cellular interface and the
1322 * caller forbids our using interfaces of such type,
1323 * pretend that there is no route.
1324 * Apply the same logic for expensive interfaces.
1325 */
1326 if (inp_restricted_send(inp, ro->ro_rt->rt_ifp)) {
1327 RT_UNLOCK(ro->ro_rt);
1328 ROUTE_RELEASE(ro);
1329 error = EHOSTUNREACH;
1330 restricted = TRUE;
1331 } else {
1332 /* Become a regular mutex */
1333 RT_CONVERT_LOCK(ro->ro_rt);
1334 ia = ifatoia(ro->ro_rt->rt_ifa);
1335 IFA_ADDREF(&ia->ia_ifa);
1336 RT_UNLOCK(ro->ro_rt);
1337 error = 0;
1338 }
1339 goto done;
1340 }
1341 VERIFY(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK);
1342 RT_UNLOCK(ro->ro_rt);
1343 /*
1344 * The outgoing interface is marked with 'loopback net', so a route
1345 * to ourselves is here.
1346 * Try to find the interface of the destination address and then
1347 * take the address from there. That interface is not necessarily
1348 * a loopback interface.
1349 */
1350 VERIFY(ia == NULL);
1351 ia = ifatoia(ifa_ifwithdstaddr(SA(&sin)));
1352 if (ia == NULL)
1353 ia = ifatoia(ifa_ifwithaddr_scoped(SA(&sin), ifscope));
1354 if (ia == NULL)
1355 ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope));
1356 if (ia == NULL) {
1357 RT_LOCK(ro->ro_rt);
1358 ia = ifatoia(ro->ro_rt->rt_ifa);
1359 if (ia != NULL)
1360 IFA_ADDREF(&ia->ia_ifa);
1361 RT_UNLOCK(ro->ro_rt);
1362 }
1363 error = ((ia == NULL) ? ENETUNREACH : 0);
1364
1365 done:
1366 /*
1367 * If the destination address is multicast and an outgoing
1368 * interface has been set as a multicast option, use the
1369 * address of that interface as our source address.
1370 */
1371 if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) &&
1372 inp->inp_moptions != NULL) {
1373 struct ip_moptions *imo;
1374 struct ifnet *ifp;
1375
1376 imo = inp->inp_moptions;
1377 IMO_LOCK(imo);
1378 if (imo->imo_multicast_ifp != NULL && (ia == NULL ||
1379 ia->ia_ifp != imo->imo_multicast_ifp)) {
1380 ifp = imo->imo_multicast_ifp;
1381 if (ia != NULL)
1382 IFA_REMREF(&ia->ia_ifa);
1383 lck_rw_lock_shared(in_ifaddr_rwlock);
1384 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
1385 if (ia->ia_ifp == ifp)
1386 break;
1387 }
1388 if (ia != NULL)
1389 IFA_ADDREF(&ia->ia_ifa);
1390 lck_rw_done(in_ifaddr_rwlock);
1391 if (ia == NULL)
1392 error = EADDRNOTAVAIL;
1393 else
1394 error = 0;
1395 }
1396 IMO_UNLOCK(imo);
1397 }
1398 /*
1399 * Don't do pcblookup call here; return interface in laddr
1400 * and exit to caller, that will do the lookup.
1401 */
1402 if (ia != NULL) {
1403 /*
1404 * If the source address belongs to a cellular interface
1405 * and the socket forbids our using interfaces of such
1406 * type, pretend that there is no source address.
1407 * Apply the same logic for expensive interfaces.
1408 */
1409 IFA_LOCK_SPIN(&ia->ia_ifa);
1410 if (inp_restricted_send(inp, ia->ia_ifa.ifa_ifp)) {
1411 IFA_UNLOCK(&ia->ia_ifa);
1412 error = EHOSTUNREACH;
1413 restricted = TRUE;
1414 } else if (error == 0) {
1415 *laddr = ia->ia_addr.sin_addr;
1416 if (outif != NULL) {
1417 struct ifnet *ifp;
1418
1419 if (ro->ro_rt != NULL)
1420 ifp = ro->ro_rt->rt_ifp;
1421 else
1422 ifp = ia->ia_ifp;
1423
1424 VERIFY(ifp != NULL);
1425 IFA_CONVERT_LOCK(&ia->ia_ifa);
1426 ifnet_reference(ifp); /* for caller */
1427 if (*outif != NULL)
1428 ifnet_release(*outif);
1429 *outif = ifp;
1430 }
1431 IFA_UNLOCK(&ia->ia_ifa);
1432 } else {
1433 IFA_UNLOCK(&ia->ia_ifa);
1434 }
1435 IFA_REMREF(&ia->ia_ifa);
1436 ia = NULL;
1437 }
1438
1439 if (restricted && error == EHOSTUNREACH) {
1440 soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED |
1441 SO_FILT_HINT_IFDENIED));
1442 }
1443
1444 return (error);
1445 }
1446
1447 /*
1448 * Outer subroutine:
1449 * Connect from a socket to a specified address.
1450 * Both address and port must be specified in argument sin.
1451 * If don't have a local address for this socket yet,
1452 * then pick one.
1453 *
1454 * The caller may override the bound-to-interface setting of the socket
1455 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
1456 */
1457 int
1458 in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p,
1459 unsigned int ifscope, struct ifnet **outif)
1460 {
1461 struct in_addr laddr;
1462 struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam;
1463 struct inpcb *pcb;
1464 int error;
1465 struct socket *so = inp->inp_socket;
1466
1467 /*
1468 * Call inner routine, to assign local interface address.
1469 */
1470 if ((error = in_pcbladdr(inp, nam, &laddr, ifscope, outif, 0)) != 0)
1471 return (error);
1472
1473 socket_unlock(so, 0);
1474 pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
1475 inp->inp_laddr.s_addr ? inp->inp_laddr : laddr,
1476 inp->inp_lport, 0, NULL);
1477 socket_lock(so, 0);
1478
1479 /*
1480 * Check if the socket is still in a valid state. When we unlock this
1481 * embryonic socket, it can get aborted if another thread is closing
1482 * the listener (radar 7947600).
1483 */
1484 if ((so->so_flags & SOF_ABORTED) != 0)
1485 return (ECONNREFUSED);
1486
1487 if (pcb != NULL) {
1488 in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0);
1489 return (EADDRINUSE);
1490 }
1491 if (inp->inp_laddr.s_addr == INADDR_ANY) {
1492 if (inp->inp_lport == 0) {
1493 error = in_pcbbind(inp, NULL, p);
1494 if (error)
1495 return (error);
1496 }
1497 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
1498 /*
1499 * Lock inversion issue, mostly with udp
1500 * multicast packets.
1501 */
1502 socket_unlock(so, 0);
1503 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
1504 socket_lock(so, 0);
1505 }
1506 inp->inp_laddr = laddr;
1507 /* no reference needed */
1508 inp->inp_last_outifp = (outif != NULL) ? *outif : NULL;
1509 inp->inp_flags |= INP_INADDR_ANY;
1510 } else {
1511 /*
1512 * Usage of IP_PKTINFO, without local port already
1513 * speficified will cause kernel to panic,
1514 * see rdar://problem/18508185.
1515 * For now returning error to avoid a kernel panic
1516 * This routines can be refactored and handle this better
1517 * in future.
1518 */
1519 if (inp->inp_lport == 0)
1520 return (EINVAL);
1521 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
1522 /*
1523 * Lock inversion issue, mostly with udp
1524 * multicast packets.
1525 */
1526 socket_unlock(so, 0);
1527 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
1528 socket_lock(so, 0);
1529 }
1530 }
1531 inp->inp_faddr = sin->sin_addr;
1532 inp->inp_fport = sin->sin_port;
1533 if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP)
1534 nstat_pcb_invalidate_cache(inp);
1535 in_pcbrehash(inp);
1536 lck_rw_done(inp->inp_pcbinfo->ipi_lock);
1537 return (0);
1538 }
1539
1540 void
1541 in_pcbdisconnect(struct inpcb *inp)
1542 {
1543 struct socket *so = inp->inp_socket;
1544
1545 if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP)
1546 nstat_pcb_cache(inp);
1547
1548 inp->inp_faddr.s_addr = INADDR_ANY;
1549 inp->inp_fport = 0;
1550
1551 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
1552 /* lock inversion issue, mostly with udp multicast packets */
1553 socket_unlock(so, 0);
1554 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
1555 socket_lock(so, 0);
1556 }
1557
1558 in_pcbrehash(inp);
1559 lck_rw_done(inp->inp_pcbinfo->ipi_lock);
1560 /*
1561 * A multipath subflow socket would have its SS_NOFDREF set by default,
1562 * so check for SOF_MP_SUBFLOW socket flag before detaching the PCB;
1563 * when the socket is closed for real, SOF_MP_SUBFLOW would be cleared.
1564 */
1565 if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF))
1566 in_pcbdetach(inp);
1567 }
1568
1569 void
1570 in_pcbdetach(struct inpcb *inp)
1571 {
1572 struct socket *so = inp->inp_socket;
1573
1574 if (so->so_pcb == NULL) {
1575 /* PCB has been disposed */
1576 panic("%s: inp=%p so=%p proto=%d so_pcb is null!\n", __func__,
1577 inp, so, SOCK_PROTO(so));
1578 /* NOTREACHED */
1579 }
1580
1581 #if IPSEC
1582 if (inp->inp_sp != NULL) {
1583 (void) ipsec4_delete_pcbpolicy(inp);
1584 }
1585 #endif /* IPSEC */
1586
1587 /*
1588 * Let NetworkStatistics know this PCB is going away
1589 * before we detach it.
1590 */
1591 if (nstat_collect &&
1592 (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP))
1593 nstat_pcb_detach(inp);
1594
1595 /* Free memory buffer held for generating keep alives */
1596 if (inp->inp_keepalive_data != NULL) {
1597 FREE(inp->inp_keepalive_data, M_TEMP);
1598 inp->inp_keepalive_data = NULL;
1599 }
1600
1601 /* mark socket state as dead */
1602 if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) {
1603 panic("%s: so=%p proto=%d couldn't set to STOPUSING\n",
1604 __func__, so, SOCK_PROTO(so));
1605 /* NOTREACHED */
1606 }
1607
1608 if (!(so->so_flags & SOF_PCBCLEARING)) {
1609 struct ip_moptions *imo;
1610
1611 inp->inp_vflag = 0;
1612 if (inp->inp_options != NULL) {
1613 (void) m_free(inp->inp_options);
1614 inp->inp_options = NULL;
1615 }
1616 ROUTE_RELEASE(&inp->inp_route);
1617 imo = inp->inp_moptions;
1618 inp->inp_moptions = NULL;
1619 sofreelastref(so, 0);
1620 inp->inp_state = INPCB_STATE_DEAD;
1621 /* makes sure we're not called twice from so_close */
1622 so->so_flags |= SOF_PCBCLEARING;
1623
1624 inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST);
1625
1626 /*
1627 * See inp_join_group() for why we need to unlock
1628 */
1629 if (imo != NULL) {
1630 socket_unlock(so, 0);
1631 IMO_REMREF(imo);
1632 socket_lock(so, 0);
1633 }
1634 }
1635 }
1636
1637
1638 void
1639 in_pcbdispose(struct inpcb *inp)
1640 {
1641 struct socket *so = inp->inp_socket;
1642 struct inpcbinfo *ipi = inp->inp_pcbinfo;
1643
1644 if (so != NULL && so->so_usecount != 0) {
1645 panic("%s: so %p [%d,%d] usecount %d lockhistory %s\n",
1646 __func__, so, SOCK_DOM(so), SOCK_TYPE(so), so->so_usecount,
1647 solockhistory_nr(so));
1648 /* NOTREACHED */
1649 } else if (inp->inp_wantcnt != WNT_STOPUSING) {
1650 if (so != NULL) {
1651 panic_plain("%s: inp %p invalid wantcnt %d, so %p "
1652 "[%d,%d] usecount %d retaincnt %d state 0x%x "
1653 "flags 0x%x lockhistory %s\n", __func__, inp,
1654 inp->inp_wantcnt, so, SOCK_DOM(so), SOCK_TYPE(so),
1655 so->so_usecount, so->so_retaincnt, so->so_state,
1656 so->so_flags, solockhistory_nr(so));
1657 /* NOTREACHED */
1658 } else {
1659 panic("%s: inp %p invalid wantcnt %d no socket\n",
1660 __func__, inp, inp->inp_wantcnt);
1661 /* NOTREACHED */
1662 }
1663 }
1664
1665 lck_rw_assert(ipi->ipi_lock, LCK_RW_ASSERT_EXCLUSIVE);
1666
1667 inp->inp_gencnt = ++ipi->ipi_gencnt;
1668 /* access ipi in in_pcbremlists */
1669 in_pcbremlists(inp);
1670
1671 if (so != NULL) {
1672 if (so->so_proto->pr_flags & PR_PCBLOCK) {
1673 sofreelastref(so, 0);
1674 if (so->so_rcv.sb_cc > 0 || so->so_snd.sb_cc > 0) {
1675 /*
1676 * selthreadclear() already called
1677 * during sofreelastref() above.
1678 */
1679 sbrelease(&so->so_rcv);
1680 sbrelease(&so->so_snd);
1681 }
1682 if (so->so_head != NULL) {
1683 panic("%s: so=%p head still exist\n",
1684 __func__, so);
1685 /* NOTREACHED */
1686 }
1687 lck_mtx_unlock(&inp->inpcb_mtx);
1688 lck_mtx_destroy(&inp->inpcb_mtx, ipi->ipi_lock_grp);
1689 }
1690 /* makes sure we're not called twice from so_close */
1691 so->so_flags |= SOF_PCBCLEARING;
1692 so->so_saved_pcb = (caddr_t)inp;
1693 so->so_pcb = NULL;
1694 inp->inp_socket = NULL;
1695 #if CONFIG_MACF_NET
1696 mac_inpcb_label_destroy(inp);
1697 #endif /* CONFIG_MACF_NET */
1698 #if NECP
1699 necp_inpcb_dispose(inp);
1700 #endif /* NECP */
1701 /*
1702 * In case there a route cached after a detach (possible
1703 * in the tcp case), make sure that it is freed before
1704 * we deallocate the structure.
1705 */
1706 ROUTE_RELEASE(&inp->inp_route);
1707 if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) {
1708 zfree(ipi->ipi_zone, inp);
1709 }
1710 sodealloc(so);
1711 }
1712 }
1713
1714 /*
1715 * The calling convention of in_getsockaddr() and in_getpeeraddr() was
1716 * modified to match the pru_sockaddr() and pru_peeraddr() entry points
1717 * in struct pr_usrreqs, so that protocols can just reference then directly
1718 * without the need for a wrapper function.
1719 */
1720 int
1721 in_getsockaddr(struct socket *so, struct sockaddr **nam)
1722 {
1723 struct inpcb *inp;
1724 struct sockaddr_in *sin;
1725
1726 /*
1727 * Do the malloc first in case it blocks.
1728 */
1729 MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK);
1730 if (sin == NULL)
1731 return (ENOBUFS);
1732 bzero(sin, sizeof (*sin));
1733 sin->sin_family = AF_INET;
1734 sin->sin_len = sizeof (*sin);
1735
1736 if ((inp = sotoinpcb(so)) == NULL) {
1737 FREE(sin, M_SONAME);
1738 return (EINVAL);
1739 }
1740 sin->sin_port = inp->inp_lport;
1741 sin->sin_addr = inp->inp_laddr;
1742
1743 *nam = (struct sockaddr *)sin;
1744 return (0);
1745 }
1746
1747 int
1748 in_getsockaddr_s(struct socket *so, struct sockaddr_storage *ss)
1749 {
1750 struct sockaddr_in *sin = SIN(ss);
1751 struct inpcb *inp;
1752
1753 VERIFY(ss != NULL);
1754 bzero(ss, sizeof (*ss));
1755
1756 sin->sin_family = AF_INET;
1757 sin->sin_len = sizeof (*sin);
1758
1759 if ((inp = sotoinpcb(so)) == NULL
1760 #if NECP
1761 || (necp_socket_should_use_flow_divert(inp))
1762 #endif /* NECP */
1763 )
1764 return (inp == NULL ? EINVAL : EPROTOTYPE);
1765
1766 sin->sin_port = inp->inp_lport;
1767 sin->sin_addr = inp->inp_laddr;
1768 return (0);
1769 }
1770
1771 int
1772 in_getpeeraddr(struct socket *so, struct sockaddr **nam)
1773 {
1774 struct inpcb *inp;
1775 struct sockaddr_in *sin;
1776
1777 /*
1778 * Do the malloc first in case it blocks.
1779 */
1780 MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK);
1781 if (sin == NULL)
1782 return (ENOBUFS);
1783 bzero((caddr_t)sin, sizeof (*sin));
1784 sin->sin_family = AF_INET;
1785 sin->sin_len = sizeof (*sin);
1786
1787 if ((inp = sotoinpcb(so)) == NULL) {
1788 FREE(sin, M_SONAME);
1789 return (EINVAL);
1790 }
1791 sin->sin_port = inp->inp_fport;
1792 sin->sin_addr = inp->inp_faddr;
1793
1794 *nam = (struct sockaddr *)sin;
1795 return (0);
1796 }
1797
1798 int
1799 in_getpeeraddr_s(struct socket *so, struct sockaddr_storage *ss)
1800 {
1801 struct sockaddr_in *sin = SIN(ss);
1802 struct inpcb *inp;
1803
1804 VERIFY(ss != NULL);
1805 bzero(ss, sizeof (*ss));
1806
1807 sin->sin_family = AF_INET;
1808 sin->sin_len = sizeof (*sin);
1809
1810 if ((inp = sotoinpcb(so)) == NULL
1811 #if NECP
1812 || (necp_socket_should_use_flow_divert(inp))
1813 #endif /* NECP */
1814 ) {
1815 return (inp == NULL ? EINVAL : EPROTOTYPE);
1816 }
1817
1818 sin->sin_port = inp->inp_fport;
1819 sin->sin_addr = inp->inp_faddr;
1820 return (0);
1821 }
1822
1823 void
1824 in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr,
1825 int errno, void (*notify)(struct inpcb *, int))
1826 {
1827 struct inpcb *inp;
1828
1829 lck_rw_lock_shared(pcbinfo->ipi_lock);
1830
1831 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1832 #if INET6
1833 if (!(inp->inp_vflag & INP_IPV4))
1834 continue;
1835 #endif /* INET6 */
1836 if (inp->inp_faddr.s_addr != faddr.s_addr ||
1837 inp->inp_socket == NULL)
1838 continue;
1839 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING)
1840 continue;
1841 socket_lock(inp->inp_socket, 1);
1842 (*notify)(inp, errno);
1843 (void) in_pcb_checkstate(inp, WNT_RELEASE, 1);
1844 socket_unlock(inp->inp_socket, 1);
1845 }
1846 lck_rw_done(pcbinfo->ipi_lock);
1847 }
1848
1849 /*
1850 * Check for alternatives when higher level complains
1851 * about service problems. For now, invalidate cached
1852 * routing information. If the route was created dynamically
1853 * (by a redirect), time to try a default gateway again.
1854 */
1855 void
1856 in_losing(struct inpcb *inp)
1857 {
1858 boolean_t release = FALSE;
1859 struct rtentry *rt;
1860
1861 if ((rt = inp->inp_route.ro_rt) != NULL) {
1862 struct in_ifaddr *ia = NULL;
1863
1864 RT_LOCK(rt);
1865 if (rt->rt_flags & RTF_DYNAMIC) {
1866 /*
1867 * Prevent another thread from modifying rt_key,
1868 * rt_gateway via rt_setgate() after rt_lock is
1869 * dropped by marking the route as defunct.
1870 */
1871 rt->rt_flags |= RTF_CONDEMNED;
1872 RT_UNLOCK(rt);
1873 (void) rtrequest(RTM_DELETE, rt_key(rt),
1874 rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL);
1875 } else {
1876 RT_UNLOCK(rt);
1877 }
1878 /* if the address is gone keep the old route in the pcb */
1879 if (inp->inp_laddr.s_addr != INADDR_ANY &&
1880 (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
1881 /*
1882 * Address is around; ditch the route. A new route
1883 * can be allocated the next time output is attempted.
1884 */
1885 release = TRUE;
1886 }
1887 if (ia != NULL)
1888 IFA_REMREF(&ia->ia_ifa);
1889 }
1890 if (rt == NULL || release)
1891 ROUTE_RELEASE(&inp->inp_route);
1892 }
1893
1894 /*
1895 * After a routing change, flush old routing
1896 * and allocate a (hopefully) better one.
1897 */
1898 void
1899 in_rtchange(struct inpcb *inp, int errno)
1900 {
1901 #pragma unused(errno)
1902 boolean_t release = FALSE;
1903 struct rtentry *rt;
1904
1905 if ((rt = inp->inp_route.ro_rt) != NULL) {
1906 struct in_ifaddr *ia = NULL;
1907
1908 /* if address is gone, keep the old route */
1909 if (inp->inp_laddr.s_addr != INADDR_ANY &&
1910 (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
1911 /*
1912 * Address is around; ditch the route. A new route
1913 * can be allocated the next time output is attempted.
1914 */
1915 release = TRUE;
1916 }
1917 if (ia != NULL)
1918 IFA_REMREF(&ia->ia_ifa);
1919 }
1920 if (rt == NULL || release)
1921 ROUTE_RELEASE(&inp->inp_route);
1922 }
1923
1924 /*
1925 * Lookup a PCB based on the local address and port.
1926 */
1927 struct inpcb *
1928 in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
1929 unsigned int lport_arg, int wild_okay)
1930 {
1931 struct inpcb *inp;
1932 int matchwild = 3, wildcard;
1933 u_short lport = lport_arg;
1934
1935 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0, 0, 0, 0, 0);
1936
1937 if (!wild_okay) {
1938 struct inpcbhead *head;
1939 /*
1940 * Look for an unconnected (wildcard foreign addr) PCB that
1941 * matches the local address and port we're looking for.
1942 */
1943 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
1944 pcbinfo->ipi_hashmask)];
1945 LIST_FOREACH(inp, head, inp_hash) {
1946 #if INET6
1947 if (!(inp->inp_vflag & INP_IPV4))
1948 continue;
1949 #endif /* INET6 */
1950 if (inp->inp_faddr.s_addr == INADDR_ANY &&
1951 inp->inp_laddr.s_addr == laddr.s_addr &&
1952 inp->inp_lport == lport) {
1953 /*
1954 * Found.
1955 */
1956 return (inp);
1957 }
1958 }
1959 /*
1960 * Not found.
1961 */
1962 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0, 0, 0, 0, 0);
1963 return (NULL);
1964 } else {
1965 struct inpcbporthead *porthash;
1966 struct inpcbport *phd;
1967 struct inpcb *match = NULL;
1968 /*
1969 * Best fit PCB lookup.
1970 *
1971 * First see if this local port is in use by looking on the
1972 * port hash list.
1973 */
1974 porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
1975 pcbinfo->ipi_porthashmask)];
1976 LIST_FOREACH(phd, porthash, phd_hash) {
1977 if (phd->phd_port == lport)
1978 break;
1979 }
1980 if (phd != NULL) {
1981 /*
1982 * Port is in use by one or more PCBs. Look for best
1983 * fit.
1984 */
1985 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
1986 wildcard = 0;
1987 #if INET6
1988 if (!(inp->inp_vflag & INP_IPV4))
1989 continue;
1990 #endif /* INET6 */
1991 if (inp->inp_faddr.s_addr != INADDR_ANY)
1992 wildcard++;
1993 if (inp->inp_laddr.s_addr != INADDR_ANY) {
1994 if (laddr.s_addr == INADDR_ANY)
1995 wildcard++;
1996 else if (inp->inp_laddr.s_addr !=
1997 laddr.s_addr)
1998 continue;
1999 } else {
2000 if (laddr.s_addr != INADDR_ANY)
2001 wildcard++;
2002 }
2003 if (wildcard < matchwild) {
2004 match = inp;
2005 matchwild = wildcard;
2006 if (matchwild == 0) {
2007 break;
2008 }
2009 }
2010 }
2011 }
2012 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match,
2013 0, 0, 0, 0);
2014 return (match);
2015 }
2016 }
2017
2018 /*
2019 * Check if PCB exists in hash list.
2020 */
2021 int
2022 in_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2023 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
2024 uid_t *uid, gid_t *gid, struct ifnet *ifp)
2025 {
2026 struct inpcbhead *head;
2027 struct inpcb *inp;
2028 u_short fport = fport_arg, lport = lport_arg;
2029 int found = 0;
2030 struct inpcb *local_wild = NULL;
2031 #if INET6
2032 struct inpcb *local_wild_mapped = NULL;
2033 #endif /* INET6 */
2034
2035 *uid = UID_MAX;
2036 *gid = GID_MAX;
2037
2038 /*
2039 * We may have found the pcb in the last lookup - check this first.
2040 */
2041
2042 lck_rw_lock_shared(pcbinfo->ipi_lock);
2043
2044 /*
2045 * First look for an exact match.
2046 */
2047 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
2048 pcbinfo->ipi_hashmask)];
2049 LIST_FOREACH(inp, head, inp_hash) {
2050 #if INET6
2051 if (!(inp->inp_vflag & INP_IPV4))
2052 continue;
2053 #endif /* INET6 */
2054 if (inp_restricted_recv(inp, ifp))
2055 continue;
2056
2057 if (inp->inp_faddr.s_addr == faddr.s_addr &&
2058 inp->inp_laddr.s_addr == laddr.s_addr &&
2059 inp->inp_fport == fport &&
2060 inp->inp_lport == lport) {
2061 if ((found = (inp->inp_socket != NULL))) {
2062 /*
2063 * Found.
2064 */
2065 *uid = kauth_cred_getuid(
2066 inp->inp_socket->so_cred);
2067 *gid = kauth_cred_getgid(
2068 inp->inp_socket->so_cred);
2069 }
2070 lck_rw_done(pcbinfo->ipi_lock);
2071 return (found);
2072 }
2073 }
2074
2075 if (!wildcard) {
2076 /*
2077 * Not found.
2078 */
2079 lck_rw_done(pcbinfo->ipi_lock);
2080 return (0);
2081 }
2082
2083 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
2084 pcbinfo->ipi_hashmask)];
2085 LIST_FOREACH(inp, head, inp_hash) {
2086 #if INET6
2087 if (!(inp->inp_vflag & INP_IPV4))
2088 continue;
2089 #endif /* INET6 */
2090 if (inp_restricted_recv(inp, ifp))
2091 continue;
2092
2093 if (inp->inp_faddr.s_addr == INADDR_ANY &&
2094 inp->inp_lport == lport) {
2095 if (inp->inp_laddr.s_addr == laddr.s_addr) {
2096 if ((found = (inp->inp_socket != NULL))) {
2097 *uid = kauth_cred_getuid(
2098 inp->inp_socket->so_cred);
2099 *gid = kauth_cred_getgid(
2100 inp->inp_socket->so_cred);
2101 }
2102 lck_rw_done(pcbinfo->ipi_lock);
2103 return (found);
2104 } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
2105 #if INET6
2106 if (inp->inp_socket &&
2107 SOCK_CHECK_DOM(inp->inp_socket, PF_INET6))
2108 local_wild_mapped = inp;
2109 else
2110 #endif /* INET6 */
2111 local_wild = inp;
2112 }
2113 }
2114 }
2115 if (local_wild == NULL) {
2116 #if INET6
2117 if (local_wild_mapped != NULL) {
2118 if ((found = (local_wild_mapped->inp_socket != NULL))) {
2119 *uid = kauth_cred_getuid(
2120 local_wild_mapped->inp_socket->so_cred);
2121 *gid = kauth_cred_getgid(
2122 local_wild_mapped->inp_socket->so_cred);
2123 }
2124 lck_rw_done(pcbinfo->ipi_lock);
2125 return (found);
2126 }
2127 #endif /* INET6 */
2128 lck_rw_done(pcbinfo->ipi_lock);
2129 return (0);
2130 }
2131 if ((found = (local_wild->inp_socket != NULL))) {
2132 *uid = kauth_cred_getuid(
2133 local_wild->inp_socket->so_cred);
2134 *gid = kauth_cred_getgid(
2135 local_wild->inp_socket->so_cred);
2136 }
2137 lck_rw_done(pcbinfo->ipi_lock);
2138 return (found);
2139 }
2140
2141 /*
2142 * Lookup PCB in hash list.
2143 */
2144 struct inpcb *
2145 in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2146 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
2147 struct ifnet *ifp)
2148 {
2149 struct inpcbhead *head;
2150 struct inpcb *inp;
2151 u_short fport = fport_arg, lport = lport_arg;
2152 struct inpcb *local_wild = NULL;
2153 #if INET6
2154 struct inpcb *local_wild_mapped = NULL;
2155 #endif /* INET6 */
2156
2157 /*
2158 * We may have found the pcb in the last lookup - check this first.
2159 */
2160
2161 lck_rw_lock_shared(pcbinfo->ipi_lock);
2162
2163 /*
2164 * First look for an exact match.
2165 */
2166 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
2167 pcbinfo->ipi_hashmask)];
2168 LIST_FOREACH(inp, head, inp_hash) {
2169 #if INET6
2170 if (!(inp->inp_vflag & INP_IPV4))
2171 continue;
2172 #endif /* INET6 */
2173 if (inp_restricted_recv(inp, ifp))
2174 continue;
2175
2176 if (inp->inp_faddr.s_addr == faddr.s_addr &&
2177 inp->inp_laddr.s_addr == laddr.s_addr &&
2178 inp->inp_fport == fport &&
2179 inp->inp_lport == lport) {
2180 /*
2181 * Found.
2182 */
2183 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
2184 WNT_STOPUSING) {
2185 lck_rw_done(pcbinfo->ipi_lock);
2186 return (inp);
2187 } else {
2188 /* it's there but dead, say it isn't found */
2189 lck_rw_done(pcbinfo->ipi_lock);
2190 return (NULL);
2191 }
2192 }
2193 }
2194
2195 if (!wildcard) {
2196 /*
2197 * Not found.
2198 */
2199 lck_rw_done(pcbinfo->ipi_lock);
2200 return (NULL);
2201 }
2202
2203 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
2204 pcbinfo->ipi_hashmask)];
2205 LIST_FOREACH(inp, head, inp_hash) {
2206 #if INET6
2207 if (!(inp->inp_vflag & INP_IPV4))
2208 continue;
2209 #endif /* INET6 */
2210 if (inp_restricted_recv(inp, ifp))
2211 continue;
2212
2213 if (inp->inp_faddr.s_addr == INADDR_ANY &&
2214 inp->inp_lport == lport) {
2215 if (inp->inp_laddr.s_addr == laddr.s_addr) {
2216 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
2217 WNT_STOPUSING) {
2218 lck_rw_done(pcbinfo->ipi_lock);
2219 return (inp);
2220 } else {
2221 /* it's dead; say it isn't found */
2222 lck_rw_done(pcbinfo->ipi_lock);
2223 return (NULL);
2224 }
2225 } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
2226 #if INET6
2227 if (SOCK_CHECK_DOM(inp->inp_socket, PF_INET6))
2228 local_wild_mapped = inp;
2229 else
2230 #endif /* INET6 */
2231 local_wild = inp;
2232 }
2233 }
2234 }
2235 if (local_wild == NULL) {
2236 #if INET6
2237 if (local_wild_mapped != NULL) {
2238 if (in_pcb_checkstate(local_wild_mapped,
2239 WNT_ACQUIRE, 0) != WNT_STOPUSING) {
2240 lck_rw_done(pcbinfo->ipi_lock);
2241 return (local_wild_mapped);
2242 } else {
2243 /* it's dead; say it isn't found */
2244 lck_rw_done(pcbinfo->ipi_lock);
2245 return (NULL);
2246 }
2247 }
2248 #endif /* INET6 */
2249 lck_rw_done(pcbinfo->ipi_lock);
2250 return (NULL);
2251 }
2252 if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
2253 lck_rw_done(pcbinfo->ipi_lock);
2254 return (local_wild);
2255 }
2256 /*
2257 * It's either not found or is already dead.
2258 */
2259 lck_rw_done(pcbinfo->ipi_lock);
2260 return (NULL);
2261 }
2262
2263 /*
2264 * @brief Insert PCB onto various hash lists.
2265 *
2266 * @param inp Pointer to internet protocol control block
2267 * @param locked Implies if ipi_lock (protecting pcb list)
2268 * is already locked or not.
2269 *
2270 * @return int error on failure and 0 on success
2271 */
2272 int
2273 in_pcbinshash(struct inpcb *inp, int locked)
2274 {
2275 struct inpcbhead *pcbhash;
2276 struct inpcbporthead *pcbporthash;
2277 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
2278 struct inpcbport *phd;
2279 u_int32_t hashkey_faddr;
2280
2281 if (!locked) {
2282 if (!lck_rw_try_lock_exclusive(pcbinfo->ipi_lock)) {
2283 /*
2284 * Lock inversion issue, mostly with udp
2285 * multicast packets
2286 */
2287 socket_unlock(inp->inp_socket, 0);
2288 lck_rw_lock_exclusive(pcbinfo->ipi_lock);
2289 socket_lock(inp->inp_socket, 0);
2290 }
2291 }
2292
2293 /*
2294 * This routine or its caller may have given up
2295 * socket's protocol lock briefly.
2296 * During that time the socket may have been dropped.
2297 * Safe-guarding against that.
2298 */
2299 if (inp->inp_state == INPCB_STATE_DEAD) {
2300 if (!locked) {
2301 lck_rw_done(pcbinfo->ipi_lock);
2302 }
2303 return (ECONNABORTED);
2304 }
2305
2306
2307 #if INET6
2308 if (inp->inp_vflag & INP_IPV6)
2309 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
2310 else
2311 #endif /* INET6 */
2312 hashkey_faddr = inp->inp_faddr.s_addr;
2313
2314 inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
2315 inp->inp_fport, pcbinfo->ipi_hashmask);
2316
2317 pcbhash = &pcbinfo->ipi_hashbase[inp->inp_hash_element];
2318
2319 pcbporthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(inp->inp_lport,
2320 pcbinfo->ipi_porthashmask)];
2321
2322 /*
2323 * Go through port list and look for a head for this lport.
2324 */
2325 LIST_FOREACH(phd, pcbporthash, phd_hash) {
2326 if (phd->phd_port == inp->inp_lport)
2327 break;
2328 }
2329
2330 /*
2331 * If none exists, malloc one and tack it on.
2332 */
2333 if (phd == NULL) {
2334 MALLOC(phd, struct inpcbport *, sizeof (struct inpcbport),
2335 M_PCB, M_WAITOK);
2336 if (phd == NULL) {
2337 if (!locked)
2338 lck_rw_done(pcbinfo->ipi_lock);
2339 return (ENOBUFS); /* XXX */
2340 }
2341 phd->phd_port = inp->inp_lport;
2342 LIST_INIT(&phd->phd_pcblist);
2343 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
2344 }
2345
2346 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2347 inp->inp_phd = phd;
2348 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
2349 LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
2350 inp->inp_flags2 |= INP2_INHASHLIST;
2351
2352 if (!locked)
2353 lck_rw_done(pcbinfo->ipi_lock);
2354
2355 #if NECP
2356 // This call catches the original setting of the local address
2357 inp_update_necp_policy(inp, NULL, NULL, 0);
2358 #endif /* NECP */
2359
2360 return (0);
2361 }
2362
2363 /*
2364 * Move PCB to the proper hash bucket when { faddr, fport } have been
2365 * changed. NOTE: This does not handle the case of the lport changing (the
2366 * hashed port list would have to be updated as well), so the lport must
2367 * not change after in_pcbinshash() has been called.
2368 */
2369 void
2370 in_pcbrehash(struct inpcb *inp)
2371 {
2372 struct inpcbhead *head;
2373 u_int32_t hashkey_faddr;
2374
2375 #if INET6
2376 if (inp->inp_vflag & INP_IPV6)
2377 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
2378 else
2379 #endif /* INET6 */
2380 hashkey_faddr = inp->inp_faddr.s_addr;
2381
2382 inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
2383 inp->inp_fport, inp->inp_pcbinfo->ipi_hashmask);
2384 head = &inp->inp_pcbinfo->ipi_hashbase[inp->inp_hash_element];
2385
2386 if (inp->inp_flags2 & INP2_INHASHLIST) {
2387 LIST_REMOVE(inp, inp_hash);
2388 inp->inp_flags2 &= ~INP2_INHASHLIST;
2389 }
2390
2391 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2392 LIST_INSERT_HEAD(head, inp, inp_hash);
2393 inp->inp_flags2 |= INP2_INHASHLIST;
2394
2395 #if NECP
2396 // This call catches updates to the remote addresses
2397 inp_update_necp_policy(inp, NULL, NULL, 0);
2398 #endif /* NECP */
2399 }
2400
2401 /*
2402 * Remove PCB from various lists.
2403 * Must be called pcbinfo lock is held in exclusive mode.
2404 */
2405 void
2406 in_pcbremlists(struct inpcb *inp)
2407 {
2408 inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
2409
2410 /*
2411 * Check if it's in hashlist -- an inp is placed in hashlist when
2412 * it's local port gets assigned. So it should also be present
2413 * in the port list.
2414 */
2415 if (inp->inp_flags2 & INP2_INHASHLIST) {
2416 struct inpcbport *phd = inp->inp_phd;
2417
2418 VERIFY(phd != NULL && inp->inp_lport > 0);
2419
2420 LIST_REMOVE(inp, inp_hash);
2421 inp->inp_hash.le_next = NULL;
2422 inp->inp_hash.le_prev = NULL;
2423
2424 LIST_REMOVE(inp, inp_portlist);
2425 inp->inp_portlist.le_next = NULL;
2426 inp->inp_portlist.le_prev = NULL;
2427 if (LIST_EMPTY(&phd->phd_pcblist)) {
2428 LIST_REMOVE(phd, phd_hash);
2429 FREE(phd, M_PCB);
2430 }
2431 inp->inp_phd = NULL;
2432 inp->inp_flags2 &= ~INP2_INHASHLIST;
2433 }
2434 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2435
2436 if (inp->inp_flags2 & INP2_TIMEWAIT) {
2437 /* Remove from time-wait queue */
2438 tcp_remove_from_time_wait(inp);
2439 inp->inp_flags2 &= ~INP2_TIMEWAIT;
2440 VERIFY(inp->inp_pcbinfo->ipi_twcount != 0);
2441 inp->inp_pcbinfo->ipi_twcount--;
2442 } else {
2443 /* Remove from global inp list if it is not time-wait */
2444 LIST_REMOVE(inp, inp_list);
2445 }
2446
2447 if (inp->inp_flags2 & INP2_IN_FCTREE) {
2448 inp_fc_getinp(inp->inp_flowhash, (INPFC_SOLOCKED|INPFC_REMOVE));
2449 VERIFY(!(inp->inp_flags2 & INP2_IN_FCTREE));
2450 }
2451
2452 inp->inp_pcbinfo->ipi_count--;
2453 }
2454
2455 /*
2456 * Mechanism used to defer the memory release of PCBs
2457 * The pcb list will contain the pcb until the reaper can clean it up if
2458 * the following conditions are met:
2459 * 1) state "DEAD",
2460 * 2) wantcnt is STOPUSING
2461 * 3) usecount is 0
2462 * This function will be called to either mark the pcb as
2463 */
2464 int
2465 in_pcb_checkstate(struct inpcb *pcb, int mode, int locked)
2466 {
2467 volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt;
2468 UInt32 origwant;
2469 UInt32 newwant;
2470
2471 switch (mode) {
2472 case WNT_STOPUSING:
2473 /*
2474 * Try to mark the pcb as ready for recycling. CAS with
2475 * STOPUSING, if success we're good, if it's in use, will
2476 * be marked later
2477 */
2478 if (locked == 0)
2479 socket_lock(pcb->inp_socket, 1);
2480 pcb->inp_state = INPCB_STATE_DEAD;
2481
2482 stopusing:
2483 if (pcb->inp_socket->so_usecount < 0) {
2484 panic("%s: pcb=%p so=%p usecount is negative\n",
2485 __func__, pcb, pcb->inp_socket);
2486 /* NOTREACHED */
2487 }
2488 if (locked == 0)
2489 socket_unlock(pcb->inp_socket, 1);
2490
2491 inpcb_gc_sched(pcb->inp_pcbinfo, INPCB_TIMER_FAST);
2492
2493 origwant = *wantcnt;
2494 if ((UInt16) origwant == 0xffff) /* should stop using */
2495 return (WNT_STOPUSING);
2496 newwant = 0xffff;
2497 if ((UInt16) origwant == 0) {
2498 /* try to mark it as unsuable now */
2499 OSCompareAndSwap(origwant, newwant, wantcnt);
2500 }
2501 return (WNT_STOPUSING);
2502
2503 case WNT_ACQUIRE:
2504 /*
2505 * Try to increase reference to pcb. If WNT_STOPUSING
2506 * should bail out. If socket state DEAD, try to set count
2507 * to STOPUSING, return failed otherwise increase cnt.
2508 */
2509 do {
2510 origwant = *wantcnt;
2511 if ((UInt16) origwant == 0xffff) {
2512 /* should stop using */
2513 return (WNT_STOPUSING);
2514 }
2515 newwant = origwant + 1;
2516 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
2517 return (WNT_ACQUIRE);
2518
2519 case WNT_RELEASE:
2520 /*
2521 * Release reference. If result is null and pcb state
2522 * is DEAD, set wanted bit to STOPUSING
2523 */
2524 if (locked == 0)
2525 socket_lock(pcb->inp_socket, 1);
2526
2527 do {
2528 origwant = *wantcnt;
2529 if ((UInt16) origwant == 0x0) {
2530 panic("%s: pcb=%p release with zero count",
2531 __func__, pcb);
2532 /* NOTREACHED */
2533 }
2534 if ((UInt16) origwant == 0xffff) {
2535 /* should stop using */
2536 if (locked == 0)
2537 socket_unlock(pcb->inp_socket, 1);
2538 return (WNT_STOPUSING);
2539 }
2540 newwant = origwant - 1;
2541 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
2542
2543 if (pcb->inp_state == INPCB_STATE_DEAD)
2544 goto stopusing;
2545 if (pcb->inp_socket->so_usecount < 0) {
2546 panic("%s: RELEASE pcb=%p so=%p usecount is negative\n",
2547 __func__, pcb, pcb->inp_socket);
2548 /* NOTREACHED */
2549 }
2550
2551 if (locked == 0)
2552 socket_unlock(pcb->inp_socket, 1);
2553 return (WNT_RELEASE);
2554
2555 default:
2556 panic("%s: so=%p not a valid state =%x\n", __func__,
2557 pcb->inp_socket, mode);
2558 /* NOTREACHED */
2559 }
2560
2561 /* NOTREACHED */
2562 return (mode);
2563 }
2564
2565 /*
2566 * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat.
2567 * The inpcb_compat data structure is passed to user space and must
2568 * not change. We intentionally avoid copying pointers.
2569 */
2570 void
2571 inpcb_to_compat(struct inpcb *inp, struct inpcb_compat *inp_compat)
2572 {
2573 bzero(inp_compat, sizeof (*inp_compat));
2574 inp_compat->inp_fport = inp->inp_fport;
2575 inp_compat->inp_lport = inp->inp_lport;
2576 inp_compat->nat_owner = 0;
2577 inp_compat->nat_cookie = 0;
2578 inp_compat->inp_gencnt = inp->inp_gencnt;
2579 inp_compat->inp_flags = inp->inp_flags;
2580 inp_compat->inp_flow = inp->inp_flow;
2581 inp_compat->inp_vflag = inp->inp_vflag;
2582 inp_compat->inp_ip_ttl = inp->inp_ip_ttl;
2583 inp_compat->inp_ip_p = inp->inp_ip_p;
2584 inp_compat->inp_dependfaddr.inp6_foreign =
2585 inp->inp_dependfaddr.inp6_foreign;
2586 inp_compat->inp_dependladdr.inp6_local =
2587 inp->inp_dependladdr.inp6_local;
2588 inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
2589 inp_compat->inp_depend6.inp6_hlim = 0;
2590 inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
2591 inp_compat->inp_depend6.inp6_ifindex = 0;
2592 inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
2593 }
2594
2595 void
2596 inpcb_to_xinpcb64(struct inpcb *inp, struct xinpcb64 *xinp)
2597 {
2598 xinp->inp_fport = inp->inp_fport;
2599 xinp->inp_lport = inp->inp_lport;
2600 xinp->inp_gencnt = inp->inp_gencnt;
2601 xinp->inp_flags = inp->inp_flags;
2602 xinp->inp_flow = inp->inp_flow;
2603 xinp->inp_vflag = inp->inp_vflag;
2604 xinp->inp_ip_ttl = inp->inp_ip_ttl;
2605 xinp->inp_ip_p = inp->inp_ip_p;
2606 xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign;
2607 xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local;
2608 xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
2609 xinp->inp_depend6.inp6_hlim = 0;
2610 xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
2611 xinp->inp_depend6.inp6_ifindex = 0;
2612 xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
2613 }
2614
2615 /*
2616 * The following routines implement this scheme:
2617 *
2618 * Callers of ip_output() that intend to cache the route in the inpcb pass
2619 * a local copy of the struct route to ip_output(). Using a local copy of
2620 * the cached route significantly simplifies things as IP no longer has to
2621 * worry about having exclusive access to the passed in struct route, since
2622 * it's defined in the caller's stack; in essence, this allows for a lock-
2623 * less operation when updating the struct route at the IP level and below,
2624 * whenever necessary. The scheme works as follows:
2625 *
2626 * Prior to dropping the socket's lock and calling ip_output(), the caller
2627 * copies the struct route from the inpcb into its stack, and adds a reference
2628 * to the cached route entry, if there was any. The socket's lock is then
2629 * dropped and ip_output() is called with a pointer to the copy of struct
2630 * route defined on the stack (not to the one in the inpcb.)
2631 *
2632 * Upon returning from ip_output(), the caller then acquires the socket's
2633 * lock and synchronizes the cache; if there is no route cached in the inpcb,
2634 * it copies the local copy of struct route (which may or may not contain any
2635 * route) back into the cache; otherwise, if the inpcb has a route cached in
2636 * it, the one in the local copy will be freed, if there's any. Trashing the
2637 * cached route in the inpcb can be avoided because ip_output() is single-
2638 * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized
2639 * by the socket/transport layer.)
2640 */
2641 void
2642 inp_route_copyout(struct inpcb *inp, struct route *dst)
2643 {
2644 struct route *src = &inp->inp_route;
2645
2646 lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
2647
2648 /*
2649 * If the route in the PCB is stale or not for IPv4, blow it away;
2650 * this is possible in the case of IPv4-mapped address case.
2651 */
2652 if (ROUTE_UNUSABLE(src) || rt_key(src->ro_rt)->sa_family != AF_INET)
2653 ROUTE_RELEASE(src);
2654
2655 route_copyout(dst, src, sizeof (*dst));
2656 }
2657
2658 void
2659 inp_route_copyin(struct inpcb *inp, struct route *src)
2660 {
2661 struct route *dst = &inp->inp_route;
2662
2663 lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
2664
2665 /* Minor sanity check */
2666 if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET)
2667 panic("%s: wrong or corrupted route: %p", __func__, src);
2668
2669 route_copyin(src, dst, sizeof (*src));
2670 }
2671
2672 /*
2673 * Handler for setting IP_BOUND_IF/IPV6_BOUND_IF socket option.
2674 */
2675 int
2676 inp_bindif(struct inpcb *inp, unsigned int ifscope, struct ifnet **pifp)
2677 {
2678 struct ifnet *ifp = NULL;
2679
2680 ifnet_head_lock_shared();
2681 if ((ifscope > (unsigned)if_index) || (ifscope != IFSCOPE_NONE &&
2682 (ifp = ifindex2ifnet[ifscope]) == NULL)) {
2683 ifnet_head_done();
2684 return (ENXIO);
2685 }
2686 ifnet_head_done();
2687
2688 VERIFY(ifp != NULL || ifscope == IFSCOPE_NONE);
2689
2690 /*
2691 * A zero interface scope value indicates an "unbind".
2692 * Otherwise, take in whatever value the app desires;
2693 * the app may already know the scope (or force itself
2694 * to such a scope) ahead of time before the interface
2695 * gets attached. It doesn't matter either way; any
2696 * route lookup from this point on will require an
2697 * exact match for the embedded interface scope.
2698 */
2699 inp->inp_boundifp = ifp;
2700 if (inp->inp_boundifp == NULL)
2701 inp->inp_flags &= ~INP_BOUND_IF;
2702 else
2703 inp->inp_flags |= INP_BOUND_IF;
2704
2705 /* Blow away any cached route in the PCB */
2706 ROUTE_RELEASE(&inp->inp_route);
2707
2708 if (pifp != NULL)
2709 *pifp = ifp;
2710
2711 return (0);
2712 }
2713
2714 /*
2715 * Handler for setting IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
2716 * as well as for setting PROC_UUID_NO_CELLULAR policy.
2717 */
2718 void
2719 inp_set_nocellular(struct inpcb *inp)
2720 {
2721 inp->inp_flags |= INP_NO_IFT_CELLULAR;
2722
2723 /* Blow away any cached route in the PCB */
2724 ROUTE_RELEASE(&inp->inp_route);
2725 }
2726
2727 /*
2728 * Handler for clearing IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
2729 * as well as for clearing PROC_UUID_NO_CELLULAR policy.
2730 */
2731 void
2732 inp_clear_nocellular(struct inpcb *inp)
2733 {
2734 struct socket *so = inp->inp_socket;
2735
2736 /*
2737 * SO_RESTRICT_DENY_CELLULAR socket restriction issued on the socket
2738 * has a higher precendence than INP_NO_IFT_CELLULAR. Clear the flag
2739 * if and only if the socket is unrestricted.
2740 */
2741 if (so != NULL && !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) {
2742 inp->inp_flags &= ~INP_NO_IFT_CELLULAR;
2743
2744 /* Blow away any cached route in the PCB */
2745 ROUTE_RELEASE(&inp->inp_route);
2746 }
2747 }
2748
2749 void
2750 inp_set_noexpensive(struct inpcb *inp)
2751 {
2752 inp->inp_flags2 |= INP2_NO_IFF_EXPENSIVE;
2753
2754 /* Blow away any cached route in the PCB */
2755 ROUTE_RELEASE(&inp->inp_route);
2756 }
2757
2758 void
2759 inp_set_awdl_unrestricted(struct inpcb *inp)
2760 {
2761 inp->inp_flags2 |= INP2_AWDL_UNRESTRICTED;
2762
2763 /* Blow away any cached route in the PCB */
2764 ROUTE_RELEASE(&inp->inp_route);
2765 }
2766
2767 boolean_t
2768 inp_get_awdl_unrestricted(struct inpcb *inp)
2769 {
2770 return (inp->inp_flags2 & INP2_AWDL_UNRESTRICTED) ? TRUE : FALSE;
2771 }
2772
2773 void
2774 inp_clear_awdl_unrestricted(struct inpcb *inp)
2775 {
2776 inp->inp_flags2 &= ~INP2_AWDL_UNRESTRICTED;
2777
2778 /* Blow away any cached route in the PCB */
2779 ROUTE_RELEASE(&inp->inp_route);
2780 }
2781
2782 void
2783 inp_set_intcoproc_allowed(struct inpcb *inp)
2784 {
2785 inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED;
2786
2787 /* Blow away any cached route in the PCB */
2788 ROUTE_RELEASE(&inp->inp_route);
2789 }
2790
2791 boolean_t
2792 inp_get_intcoproc_allowed(struct inpcb *inp)
2793 {
2794 return (inp->inp_flags2 & INP2_INTCOPROC_ALLOWED) ? TRUE : FALSE;
2795 }
2796
2797 void
2798 inp_clear_intcoproc_allowed(struct inpcb *inp)
2799 {
2800 inp->inp_flags2 &= ~INP2_INTCOPROC_ALLOWED;
2801
2802 /* Blow away any cached route in the PCB */
2803 ROUTE_RELEASE(&inp->inp_route);
2804 }
2805
2806 #if NECP
2807 /*
2808 * Called when PROC_UUID_NECP_APP_POLICY is set.
2809 */
2810 void
2811 inp_set_want_app_policy(struct inpcb *inp)
2812 {
2813 inp->inp_flags2 |= INP2_WANT_APP_POLICY;
2814 }
2815
2816 /*
2817 * Called when PROC_UUID_NECP_APP_POLICY is cleared.
2818 */
2819 void
2820 inp_clear_want_app_policy(struct inpcb *inp)
2821 {
2822 inp->inp_flags2 &= ~INP2_WANT_APP_POLICY;
2823 }
2824 #endif /* NECP */
2825
2826 /*
2827 * Calculate flow hash for an inp, used by an interface to identify a
2828 * flow. When an interface provides flow control advisory, this flow
2829 * hash is used as an identifier.
2830 */
2831 u_int32_t
2832 inp_calc_flowhash(struct inpcb *inp)
2833 {
2834 struct inp_flowhash_key fh __attribute__((aligned(8)));
2835 u_int32_t flowhash = 0;
2836 struct inpcb *tmp_inp = NULL;
2837
2838 if (inp_hash_seed == 0)
2839 inp_hash_seed = RandomULong();
2840
2841 bzero(&fh, sizeof (fh));
2842
2843 bcopy(&inp->inp_dependladdr, &fh.infh_laddr, sizeof (fh.infh_laddr));
2844 bcopy(&inp->inp_dependfaddr, &fh.infh_faddr, sizeof (fh.infh_faddr));
2845
2846 fh.infh_lport = inp->inp_lport;
2847 fh.infh_fport = inp->inp_fport;
2848 fh.infh_af = (inp->inp_vflag & INP_IPV6) ? AF_INET6 : AF_INET;
2849 fh.infh_proto = inp->inp_ip_p;
2850 fh.infh_rand1 = RandomULong();
2851 fh.infh_rand2 = RandomULong();
2852
2853 try_again:
2854 flowhash = net_flowhash(&fh, sizeof (fh), inp_hash_seed);
2855 if (flowhash == 0) {
2856 /* try to get a non-zero flowhash */
2857 inp_hash_seed = RandomULong();
2858 goto try_again;
2859 }
2860
2861 inp->inp_flowhash = flowhash;
2862
2863 /* Insert the inp into inp_fc_tree */
2864 lck_mtx_lock_spin(&inp_fc_lck);
2865 tmp_inp = RB_FIND(inp_fc_tree, &inp_fc_tree, inp);
2866 if (tmp_inp != NULL) {
2867 /*
2868 * There is a different inp with the same flowhash.
2869 * There can be a collision on flow hash but the
2870 * probability is low. Let's recompute the
2871 * flowhash.
2872 */
2873 lck_mtx_unlock(&inp_fc_lck);
2874 /* recompute hash seed */
2875 inp_hash_seed = RandomULong();
2876 goto try_again;
2877 }
2878
2879 RB_INSERT(inp_fc_tree, &inp_fc_tree, inp);
2880 inp->inp_flags2 |= INP2_IN_FCTREE;
2881 lck_mtx_unlock(&inp_fc_lck);
2882
2883 return (flowhash);
2884 }
2885
2886 void
2887 inp_flowadv(uint32_t flowhash)
2888 {
2889 struct inpcb *inp;
2890
2891 inp = inp_fc_getinp(flowhash, 0);
2892
2893 if (inp == NULL)
2894 return;
2895 inp_fc_feedback(inp);
2896 }
2897
2898 /*
2899 * Function to compare inp_fc_entries in inp flow control tree
2900 */
2901 static inline int
2902 infc_cmp(const struct inpcb *inp1, const struct inpcb *inp2)
2903 {
2904 return (memcmp(&(inp1->inp_flowhash), &(inp2->inp_flowhash),
2905 sizeof(inp1->inp_flowhash)));
2906 }
2907
2908 static struct inpcb *
2909 inp_fc_getinp(u_int32_t flowhash, u_int32_t flags)
2910 {
2911 struct inpcb *inp = NULL;
2912 int locked = (flags & INPFC_SOLOCKED) ? 1 : 0;
2913
2914 lck_mtx_lock_spin(&inp_fc_lck);
2915 key_inp.inp_flowhash = flowhash;
2916 inp = RB_FIND(inp_fc_tree, &inp_fc_tree, &key_inp);
2917 if (inp == NULL) {
2918 /* inp is not present, return */
2919 lck_mtx_unlock(&inp_fc_lck);
2920 return (NULL);
2921 }
2922
2923 if (flags & INPFC_REMOVE) {
2924 RB_REMOVE(inp_fc_tree, &inp_fc_tree, inp);
2925 lck_mtx_unlock(&inp_fc_lck);
2926
2927 bzero(&(inp->infc_link), sizeof (inp->infc_link));
2928 inp->inp_flags2 &= ~INP2_IN_FCTREE;
2929 return (NULL);
2930 }
2931
2932 if (in_pcb_checkstate(inp, WNT_ACQUIRE, locked) == WNT_STOPUSING)
2933 inp = NULL;
2934 lck_mtx_unlock(&inp_fc_lck);
2935
2936 return (inp);
2937 }
2938
2939 static void
2940 inp_fc_feedback(struct inpcb *inp)
2941 {
2942 struct socket *so = inp->inp_socket;
2943
2944 /* we already hold a want_cnt on this inp, socket can't be null */
2945 VERIFY(so != NULL);
2946 socket_lock(so, 1);
2947
2948 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
2949 socket_unlock(so, 1);
2950 return;
2951 }
2952
2953 if (inp->inp_sndinprog_cnt > 0)
2954 inp->inp_flags |= INP_FC_FEEDBACK;
2955
2956 /*
2957 * Return if the connection is not in flow-controlled state.
2958 * This can happen if the connection experienced
2959 * loss while it was in flow controlled state
2960 */
2961 if (!INP_WAIT_FOR_IF_FEEDBACK(inp)) {
2962 socket_unlock(so, 1);
2963 return;
2964 }
2965 inp_reset_fc_state(inp);
2966
2967 if (SOCK_TYPE(so) == SOCK_STREAM)
2968 inp_fc_unthrottle_tcp(inp);
2969
2970 socket_unlock(so, 1);
2971 }
2972
2973 void
2974 inp_reset_fc_state(struct inpcb *inp)
2975 {
2976 struct socket *so = inp->inp_socket;
2977 int suspended = (INP_IS_FLOW_SUSPENDED(inp)) ? 1 : 0;
2978 int needwakeup = (INP_WAIT_FOR_IF_FEEDBACK(inp)) ? 1 : 0;
2979
2980 inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
2981
2982 if (suspended) {
2983 so->so_flags &= ~(SOF_SUSPENDED);
2984 soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_RESUME));
2985 }
2986
2987 /* Give a write wakeup to unblock the socket */
2988 if (needwakeup)
2989 sowwakeup(so);
2990 }
2991
2992 int
2993 inp_set_fc_state(struct inpcb *inp, int advcode)
2994 {
2995 struct inpcb *tmp_inp = NULL;
2996 /*
2997 * If there was a feedback from the interface when
2998 * send operation was in progress, we should ignore
2999 * this flow advisory to avoid a race between setting
3000 * flow controlled state and receiving feedback from
3001 * the interface
3002 */
3003 if (inp->inp_flags & INP_FC_FEEDBACK)
3004 return (0);
3005
3006 inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
3007 if ((tmp_inp = inp_fc_getinp(inp->inp_flowhash,
3008 INPFC_SOLOCKED)) != NULL) {
3009 if (in_pcb_checkstate(tmp_inp, WNT_RELEASE, 1) == WNT_STOPUSING)
3010 return (0);
3011 VERIFY(tmp_inp == inp);
3012 switch (advcode) {
3013 case FADV_FLOW_CONTROLLED:
3014 inp->inp_flags |= INP_FLOW_CONTROLLED;
3015 break;
3016 case FADV_SUSPENDED:
3017 inp->inp_flags |= INP_FLOW_SUSPENDED;
3018 soevent(inp->inp_socket,
3019 (SO_FILT_HINT_LOCKED | SO_FILT_HINT_SUSPEND));
3020
3021 /* Record the fact that suspend event was sent */
3022 inp->inp_socket->so_flags |= SOF_SUSPENDED;
3023 break;
3024 }
3025 return (1);
3026 }
3027 return (0);
3028 }
3029
3030 /*
3031 * Handler for SO_FLUSH socket option.
3032 */
3033 int
3034 inp_flush(struct inpcb *inp, int optval)
3035 {
3036 u_int32_t flowhash = inp->inp_flowhash;
3037 struct ifnet *rtifp, *oifp;
3038
3039 /* Either all classes or one of the valid ones */
3040 if (optval != SO_TC_ALL && !SO_VALID_TC(optval))
3041 return (EINVAL);
3042
3043 /* We need a flow hash for identification */
3044 if (flowhash == 0)
3045 return (0);
3046
3047 /* Grab the interfaces from the route and pcb */
3048 rtifp = ((inp->inp_route.ro_rt != NULL) ?
3049 inp->inp_route.ro_rt->rt_ifp : NULL);
3050 oifp = inp->inp_last_outifp;
3051
3052 if (rtifp != NULL)
3053 if_qflush_sc(rtifp, so_tc2msc(optval), flowhash, NULL, NULL, 0);
3054 if (oifp != NULL && oifp != rtifp)
3055 if_qflush_sc(oifp, so_tc2msc(optval), flowhash, NULL, NULL, 0);
3056
3057 return (0);
3058 }
3059
3060 /*
3061 * Clear the INP_INADDR_ANY flag (special case for PPP only)
3062 */
3063 void
3064 inp_clear_INP_INADDR_ANY(struct socket *so)
3065 {
3066 struct inpcb *inp = NULL;
3067
3068 socket_lock(so, 1);
3069 inp = sotoinpcb(so);
3070 if (inp) {
3071 inp->inp_flags &= ~INP_INADDR_ANY;
3072 }
3073 socket_unlock(so, 1);
3074 }
3075
3076 void
3077 inp_get_soprocinfo(struct inpcb *inp, struct so_procinfo *soprocinfo)
3078 {
3079 struct socket *so = inp->inp_socket;
3080
3081 soprocinfo->spi_pid = so->last_pid;
3082 if (so->last_pid != 0)
3083 uuid_copy(soprocinfo->spi_uuid, so->last_uuid);
3084 /*
3085 * When not delegated, the effective pid is the same as the real pid
3086 */
3087 if (so->so_flags & SOF_DELEGATED) {
3088 soprocinfo->spi_delegated = 1;
3089 soprocinfo->spi_epid = so->e_pid;
3090 uuid_copy(soprocinfo->spi_euuid, so->e_uuid);
3091 } else {
3092 soprocinfo->spi_delegated = 0;
3093 soprocinfo->spi_epid = so->last_pid;
3094 }
3095 }
3096
3097 int
3098 inp_findinpcb_procinfo(struct inpcbinfo *pcbinfo, uint32_t flowhash,
3099 struct so_procinfo *soprocinfo)
3100 {
3101 struct inpcb *inp = NULL;
3102 int found = 0;
3103
3104 bzero(soprocinfo, sizeof (struct so_procinfo));
3105
3106 if (!flowhash)
3107 return (-1);
3108
3109 lck_rw_lock_shared(pcbinfo->ipi_lock);
3110 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
3111 if (inp->inp_state != INPCB_STATE_DEAD &&
3112 inp->inp_socket != NULL &&
3113 inp->inp_flowhash == flowhash) {
3114 found = 1;
3115 inp_get_soprocinfo(inp, soprocinfo);
3116 break;
3117 }
3118 }
3119 lck_rw_done(pcbinfo->ipi_lock);
3120
3121 return (found);
3122 }
3123
3124 #if CONFIG_PROC_UUID_POLICY
3125 static void
3126 inp_update_cellular_policy(struct inpcb *inp, boolean_t set)
3127 {
3128 struct socket *so = inp->inp_socket;
3129 int before, after;
3130
3131 VERIFY(so != NULL);
3132 VERIFY(inp->inp_state != INPCB_STATE_DEAD);
3133
3134 before = INP_NO_CELLULAR(inp);
3135 if (set) {
3136 inp_set_nocellular(inp);
3137 } else {
3138 inp_clear_nocellular(inp);
3139 }
3140 after = INP_NO_CELLULAR(inp);
3141 if (net_io_policy_log && (before != after)) {
3142 static const char *ok = "OK";
3143 static const char *nok = "NOACCESS";
3144 uuid_string_t euuid_buf;
3145 pid_t epid;
3146
3147 if (so->so_flags & SOF_DELEGATED) {
3148 uuid_unparse(so->e_uuid, euuid_buf);
3149 epid = so->e_pid;
3150 } else {
3151 uuid_unparse(so->last_uuid, euuid_buf);
3152 epid = so->last_pid;
3153 }
3154
3155 /* allow this socket to generate another notification event */
3156 so->so_ifdenied_notifies = 0;
3157
3158 log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d "
3159 "euuid %s%s %s->%s\n", __func__,
3160 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
3161 SOCK_TYPE(so), epid, euuid_buf,
3162 (so->so_flags & SOF_DELEGATED) ?
3163 " [delegated]" : "",
3164 ((before < after) ? ok : nok),
3165 ((before < after) ? nok : ok));
3166 }
3167 }
3168
3169 #if NECP
3170 static void
3171 inp_update_necp_want_app_policy(struct inpcb *inp, boolean_t set)
3172 {
3173 struct socket *so = inp->inp_socket;
3174 int before, after;
3175
3176 VERIFY(so != NULL);
3177 VERIFY(inp->inp_state != INPCB_STATE_DEAD);
3178
3179 before = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
3180 if (set) {
3181 inp_set_want_app_policy(inp);
3182 } else {
3183 inp_clear_want_app_policy(inp);
3184 }
3185 after = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
3186 if (net_io_policy_log && (before != after)) {
3187 static const char *wanted = "WANTED";
3188 static const char *unwanted = "UNWANTED";
3189 uuid_string_t euuid_buf;
3190 pid_t epid;
3191
3192 if (so->so_flags & SOF_DELEGATED) {
3193 uuid_unparse(so->e_uuid, euuid_buf);
3194 epid = so->e_pid;
3195 } else {
3196 uuid_unparse(so->last_uuid, euuid_buf);
3197 epid = so->last_pid;
3198 }
3199
3200 log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d "
3201 "euuid %s%s %s->%s\n", __func__,
3202 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
3203 SOCK_TYPE(so), epid, euuid_buf,
3204 (so->so_flags & SOF_DELEGATED) ?
3205 " [delegated]" : "",
3206 ((before < after) ? unwanted : wanted),
3207 ((before < after) ? wanted : unwanted));
3208 }
3209 }
3210 #endif /* NECP */
3211 #endif /* !CONFIG_PROC_UUID_POLICY */
3212
3213 #if NECP
3214 void
3215 inp_update_necp_policy(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int override_bound_interface)
3216 {
3217 necp_socket_find_policy_match(inp, override_local_addr, override_remote_addr, override_bound_interface);
3218 if (necp_socket_should_rescope(inp) &&
3219 inp->inp_lport == 0 &&
3220 inp->inp_laddr.s_addr == INADDR_ANY &&
3221 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
3222 // If we should rescope, and the socket is not yet bound
3223 inp_bindif(inp, necp_socket_get_rescope_if_index(inp), NULL);
3224 }
3225 }
3226 #endif /* NECP */
3227
3228 int
3229 inp_update_policy(struct inpcb *inp)
3230 {
3231 #if CONFIG_PROC_UUID_POLICY
3232 struct socket *so = inp->inp_socket;
3233 uint32_t pflags = 0;
3234 int32_t ogencnt;
3235 int err = 0;
3236
3237 if (!net_io_policy_uuid ||
3238 so == NULL || inp->inp_state == INPCB_STATE_DEAD)
3239 return (0);
3240
3241 /*
3242 * Kernel-created sockets that aren't delegating other sockets
3243 * are currently exempted from UUID policy checks.
3244 */
3245 if (so->last_pid == 0 && !(so->so_flags & SOF_DELEGATED))
3246 return (0);
3247
3248 ogencnt = so->so_policy_gencnt;
3249 err = proc_uuid_policy_lookup(((so->so_flags & SOF_DELEGATED) ?
3250 so->e_uuid : so->last_uuid), &pflags, &so->so_policy_gencnt);
3251
3252 /*
3253 * Discard cached generation count if the entry is gone (ENOENT),
3254 * so that we go thru the checks below.
3255 */
3256 if (err == ENOENT && ogencnt != 0)
3257 so->so_policy_gencnt = 0;
3258
3259 /*
3260 * If the generation count has changed, inspect the policy flags
3261 * and act accordingly. If a policy flag was previously set and
3262 * the UUID is no longer present in the table (ENOENT), treat it
3263 * as if the flag has been cleared.
3264 */
3265 if ((err == 0 || err == ENOENT) && ogencnt != so->so_policy_gencnt) {
3266 /* update cellular policy for this socket */
3267 if (err == 0 && (pflags & PROC_UUID_NO_CELLULAR)) {
3268 inp_update_cellular_policy(inp, TRUE);
3269 } else if (!(pflags & PROC_UUID_NO_CELLULAR)) {
3270 inp_update_cellular_policy(inp, FALSE);
3271 }
3272 #if NECP
3273 /* update necp want app policy for this socket */
3274 if (err == 0 && (pflags & PROC_UUID_NECP_APP_POLICY)) {
3275 inp_update_necp_want_app_policy(inp, TRUE);
3276 } else if (!(pflags & PROC_UUID_NECP_APP_POLICY)) {
3277 inp_update_necp_want_app_policy(inp, FALSE);
3278 }
3279 #endif /* NECP */
3280 }
3281
3282 return ((err == ENOENT) ? 0 : err);
3283 #else /* !CONFIG_PROC_UUID_POLICY */
3284 #pragma unused(inp)
3285 return (0);
3286 #endif /* !CONFIG_PROC_UUID_POLICY */
3287 }
3288
3289 static unsigned int log_restricted;
3290 SYSCTL_DECL(_net_inet);
3291 SYSCTL_INT(_net_inet, OID_AUTO, log_restricted,
3292 CTLFLAG_RW | CTLFLAG_LOCKED, &log_restricted, 0,
3293 "Log network restrictions");
3294 /*
3295 * Called when we need to enforce policy restrictions in the input path.
3296 *
3297 * Returns TRUE if we're not allowed to receive data, otherwise FALSE.
3298 */
3299 static boolean_t
3300 _inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp)
3301 {
3302 VERIFY(inp != NULL);
3303
3304 /*
3305 * Inbound restrictions.
3306 */
3307 if (!sorestrictrecv)
3308 return (FALSE);
3309
3310 if (ifp == NULL)
3311 return (FALSE);
3312
3313 if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp))
3314 return (TRUE);
3315
3316 if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp))
3317 return (TRUE);
3318
3319 if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp))
3320 return (TRUE);
3321
3322 if (!(ifp->if_eflags & IFEF_RESTRICTED_RECV))
3323 return (FALSE);
3324
3325 if (inp->inp_flags & INP_RECV_ANYIF)
3326 return (FALSE);
3327
3328 if ((inp->inp_flags & INP_BOUND_IF) && inp->inp_boundifp == ifp)
3329 return (FALSE);
3330
3331 if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp))
3332 return (TRUE);
3333
3334 return (TRUE);
3335 }
3336
3337 boolean_t
3338 inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp)
3339 {
3340 boolean_t ret;
3341
3342 ret = _inp_restricted_recv(inp, ifp);
3343 if (ret == TRUE && log_restricted) {
3344 printf("pid %d is unable to receive packets on %s\n",
3345 current_proc()->p_pid, ifp->if_xname);
3346 }
3347 return (ret);
3348 }
3349
3350 /*
3351 * Called when we need to enforce policy restrictions in the output path.
3352 *
3353 * Returns TRUE if we're not allowed to send data out, otherwise FALSE.
3354 */
3355 static boolean_t
3356 _inp_restricted_send(struct inpcb *inp, struct ifnet *ifp)
3357 {
3358 VERIFY(inp != NULL);
3359
3360 /*
3361 * Outbound restrictions.
3362 */
3363 if (!sorestrictsend)
3364 return (FALSE);
3365
3366 if (ifp == NULL)
3367 return (FALSE);
3368
3369 if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp))
3370 return (TRUE);
3371
3372 if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp))
3373 return (TRUE);
3374
3375 if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp))
3376 return (TRUE);
3377
3378 if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp))
3379 return (TRUE);
3380
3381 return (FALSE);
3382 }
3383
3384 boolean_t
3385 inp_restricted_send(struct inpcb *inp, struct ifnet *ifp)
3386 {
3387 boolean_t ret;
3388
3389 ret = _inp_restricted_send(inp, ifp);
3390 if (ret == TRUE && log_restricted) {
3391 printf("pid %d is unable to transmit packets on %s\n",
3392 current_proc()->p_pid, ifp->if_xname);
3393 }
3394 return (ret);
3395 }
3396
3397 inline void
3398 inp_count_sndbytes(struct inpcb *inp, u_int32_t th_ack)
3399 {
3400 struct ifnet *ifp = inp->inp_last_outifp;
3401 struct socket *so = inp->inp_socket;
3402 if (ifp != NULL && !(so->so_flags & SOF_MP_SUBFLOW) &&
3403 (ifp->if_type == IFT_CELLULAR ||
3404 ifp->if_subfamily == IFNET_SUBFAMILY_WIFI)) {
3405 int32_t unsent;
3406
3407 so->so_snd.sb_flags |= SB_SNDBYTE_CNT;
3408
3409 /*
3410 * There can be data outstanding before the connection
3411 * becomes established -- TFO case
3412 */
3413 if (so->so_snd.sb_cc > 0)
3414 inp_incr_sndbytes_total(so, so->so_snd.sb_cc);
3415
3416 unsent = inp_get_sndbytes_allunsent(so, th_ack);
3417 if (unsent > 0)
3418 inp_incr_sndbytes_unsent(so, unsent);
3419 }
3420 }
3421
3422 inline void
3423 inp_incr_sndbytes_total(struct socket *so, int32_t len)
3424 {
3425 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3426 struct ifnet *ifp = inp->inp_last_outifp;
3427
3428 if (ifp != NULL) {
3429 VERIFY(ifp->if_sndbyte_total >= 0);
3430 OSAddAtomic64(len, &ifp->if_sndbyte_total);
3431 }
3432 }
3433
3434 inline void
3435 inp_decr_sndbytes_total(struct socket *so, int32_t len)
3436 {
3437 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3438 struct ifnet *ifp = inp->inp_last_outifp;
3439
3440 if (ifp != NULL) {
3441 VERIFY(ifp->if_sndbyte_total >= len);
3442 OSAddAtomic64(-len, &ifp->if_sndbyte_total);
3443 }
3444 }
3445
3446 inline void
3447 inp_incr_sndbytes_unsent(struct socket *so, int32_t len)
3448 {
3449 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3450 struct ifnet *ifp = inp->inp_last_outifp;
3451
3452 if (ifp != NULL) {
3453 VERIFY(ifp->if_sndbyte_unsent >= 0);
3454 OSAddAtomic64(len, &ifp->if_sndbyte_unsent);
3455 }
3456 }
3457
3458 inline void
3459 inp_decr_sndbytes_unsent(struct socket *so, int32_t len)
3460 {
3461 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3462 struct ifnet *ifp = inp->inp_last_outifp;
3463
3464 if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT))
3465 return;
3466
3467 if (ifp != NULL) {
3468 if (ifp->if_sndbyte_unsent >= len)
3469 OSAddAtomic64(-len, &ifp->if_sndbyte_unsent);
3470 else
3471 ifp->if_sndbyte_unsent = 0;
3472 }
3473 }
3474
3475 inline void
3476 inp_decr_sndbytes_allunsent(struct socket *so, u_int32_t th_ack)
3477 {
3478 int32_t len;
3479
3480 if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT))
3481 return;
3482
3483 len = inp_get_sndbytes_allunsent(so, th_ack);
3484 inp_decr_sndbytes_unsent(so, len);
3485 }