]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/in_pcb.c
3371c71b3ace215b8e75839f6b1ed977e23ddff3
[apple/xnu.git] / bsd / netinet / in_pcb.c
1 /*
2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1991, 1993, 1995
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95
61 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $
62 */
63
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/malloc.h>
67 #include <sys/mbuf.h>
68 #include <sys/domain.h>
69 #include <sys/protosw.h>
70 #include <sys/socket.h>
71 #include <sys/socketvar.h>
72 #include <sys/proc.h>
73 #include <sys/kernel.h>
74 #include <sys/sysctl.h>
75 #include <sys/mcache.h>
76 #include <sys/kauth.h>
77 #include <sys/priv.h>
78 #include <sys/proc_uuid_policy.h>
79 #include <sys/syslog.h>
80 #include <sys/priv.h>
81 #include <net/dlil.h>
82
83 #include <libkern/OSAtomic.h>
84 #include <kern/locks.h>
85
86 #include <machine/limits.h>
87
88 #include <kern/zalloc.h>
89
90 #include <net/if.h>
91 #include <net/if_types.h>
92 #include <net/route.h>
93 #include <net/flowhash.h>
94 #include <net/flowadv.h>
95 #include <net/ntstat.h>
96
97 #include <netinet/in.h>
98 #include <netinet/in_pcb.h>
99 #include <netinet/in_var.h>
100 #include <netinet/ip_var.h>
101 #if INET6
102 #include <netinet/ip6.h>
103 #include <netinet6/ip6_var.h>
104 #endif /* INET6 */
105
106 #include <sys/kdebug.h>
107 #include <sys/random.h>
108
109 #include <dev/random/randomdev.h>
110 #include <mach/boolean.h>
111
112 #include <pexpert/pexpert.h>
113
114 #if NECP
115 #include <net/necp.h>
116 #endif
117
118 #include <sys/stat.h>
119 #include <sys/ubc.h>
120 #include <sys/vnode.h>
121
122 static lck_grp_t *inpcb_lock_grp;
123 static lck_attr_t *inpcb_lock_attr;
124 static lck_grp_attr_t *inpcb_lock_grp_attr;
125 decl_lck_mtx_data(static, inpcb_lock); /* global INPCB lock */
126 decl_lck_mtx_data(static, inpcb_timeout_lock);
127
128 static TAILQ_HEAD(, inpcbinfo) inpcb_head = TAILQ_HEAD_INITIALIZER(inpcb_head);
129
130 static u_int16_t inpcb_timeout_run = 0; /* INPCB timer is scheduled to run */
131 static boolean_t inpcb_garbage_collecting = FALSE; /* gc timer is scheduled */
132 static boolean_t inpcb_ticking = FALSE; /* "slow" timer is scheduled */
133 static boolean_t inpcb_fast_timer_on = FALSE;
134 static boolean_t intcoproc_unrestricted = FALSE;
135
136 extern char *proc_best_name(proc_t);
137
138 #define INPCB_GCREQ_THRESHOLD 50000
139
140 static thread_call_t inpcb_thread_call, inpcb_fast_thread_call;
141 static void inpcb_sched_timeout(void);
142 static void inpcb_sched_lazy_timeout(void);
143 static void _inpcb_sched_timeout(unsigned int);
144 static void inpcb_timeout(void *, void *);
145 const int inpcb_timeout_lazy = 10; /* 10 seconds leeway for lazy timers */
146 extern int tvtohz(struct timeval *);
147
148 #if CONFIG_PROC_UUID_POLICY
149 static void inp_update_cellular_policy(struct inpcb *, boolean_t);
150 #if NECP
151 static void inp_update_necp_want_app_policy(struct inpcb *, boolean_t);
152 #endif /* NECP */
153 #endif /* !CONFIG_PROC_UUID_POLICY */
154
155 #define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8))
156 #define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1))
157
158 /*
159 * These configure the range of local port addresses assigned to
160 * "unspecified" outgoing connections/packets/whatever.
161 */
162 int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */
163 int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */
164 int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
165 int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */
166 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
167 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */
168
169 #define RANGECHK(var, min, max) \
170 if ((var) < (min)) { (var) = (min); } \
171 else if ((var) > (max)) { (var) = (max); }
172
173 static int
174 sysctl_net_ipport_check SYSCTL_HANDLER_ARGS
175 {
176 #pragma unused(arg1, arg2)
177 int error;
178
179 error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
180 if (!error) {
181 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
182 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
183 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX);
184 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX);
185 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX);
186 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX);
187 }
188 return (error);
189 }
190
191 #undef RANGECHK
192
193 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange,
194 CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IP Ports");
195
196 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst,
197 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
198 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
199 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast,
200 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
201 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
202 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first,
203 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
204 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
205 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last,
206 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
207 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
208 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst,
209 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
210 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
211 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast,
212 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
213 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
214
215 static uint32_t apn_fallbk_debug = 0;
216 #define apn_fallbk_log(x) do { if (apn_fallbk_debug >= 1) log x; } while (0)
217
218 #if CONFIG_EMBEDDED
219 static boolean_t apn_fallbk_enabled = TRUE;
220
221 SYSCTL_DECL(_net_inet);
222 SYSCTL_NODE(_net_inet, OID_AUTO, apn_fallback, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "APN Fallback");
223 SYSCTL_UINT(_net_inet_apn_fallback, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
224 &apn_fallbk_debug, 0, "APN fallback debug enable");
225 #else
226 static boolean_t apn_fallbk_enabled = FALSE;
227 #endif
228
229 extern int udp_use_randomport;
230 extern int tcp_use_randomport;
231
232 /* Structs used for flowhash computation */
233 struct inp_flowhash_key_addr {
234 union {
235 struct in_addr v4;
236 struct in6_addr v6;
237 u_int8_t addr8[16];
238 u_int16_t addr16[8];
239 u_int32_t addr32[4];
240 } infha;
241 };
242
243 struct inp_flowhash_key {
244 struct inp_flowhash_key_addr infh_laddr;
245 struct inp_flowhash_key_addr infh_faddr;
246 u_int32_t infh_lport;
247 u_int32_t infh_fport;
248 u_int32_t infh_af;
249 u_int32_t infh_proto;
250 u_int32_t infh_rand1;
251 u_int32_t infh_rand2;
252 };
253
254 static u_int32_t inp_hash_seed = 0;
255
256 static int infc_cmp(const struct inpcb *, const struct inpcb *);
257
258 /* Flags used by inp_fc_getinp */
259 #define INPFC_SOLOCKED 0x1
260 #define INPFC_REMOVE 0x2
261 static struct inpcb *inp_fc_getinp(u_int32_t, u_int32_t);
262
263 static void inp_fc_feedback(struct inpcb *);
264 extern void tcp_remove_from_time_wait(struct inpcb *inp);
265
266 decl_lck_mtx_data(static, inp_fc_lck);
267
268 RB_HEAD(inp_fc_tree, inpcb) inp_fc_tree;
269 RB_PROTOTYPE(inp_fc_tree, inpcb, infc_link, infc_cmp);
270 RB_GENERATE(inp_fc_tree, inpcb, infc_link, infc_cmp);
271
272 /*
273 * Use this inp as a key to find an inp in the flowhash tree.
274 * Accesses to it are protected by inp_fc_lck.
275 */
276 struct inpcb key_inp;
277
278 /*
279 * in_pcb.c: manage the Protocol Control Blocks.
280 */
281
282 void
283 in_pcbinit(void)
284 {
285 static int inpcb_initialized = 0;
286
287 VERIFY(!inpcb_initialized);
288 inpcb_initialized = 1;
289
290 inpcb_lock_grp_attr = lck_grp_attr_alloc_init();
291 inpcb_lock_grp = lck_grp_alloc_init("inpcb", inpcb_lock_grp_attr);
292 inpcb_lock_attr = lck_attr_alloc_init();
293 lck_mtx_init(&inpcb_lock, inpcb_lock_grp, inpcb_lock_attr);
294 lck_mtx_init(&inpcb_timeout_lock, inpcb_lock_grp, inpcb_lock_attr);
295 inpcb_thread_call = thread_call_allocate_with_priority(inpcb_timeout,
296 NULL, THREAD_CALL_PRIORITY_KERNEL);
297 inpcb_fast_thread_call = thread_call_allocate_with_priority(
298 inpcb_timeout, NULL, THREAD_CALL_PRIORITY_KERNEL);
299 if (inpcb_thread_call == NULL || inpcb_fast_thread_call == NULL)
300 panic("unable to alloc the inpcb thread call");
301
302 /*
303 * Initialize data structures required to deliver
304 * flow advisories.
305 */
306 lck_mtx_init(&inp_fc_lck, inpcb_lock_grp, inpcb_lock_attr);
307 lck_mtx_lock(&inp_fc_lck);
308 RB_INIT(&inp_fc_tree);
309 bzero(&key_inp, sizeof(key_inp));
310 lck_mtx_unlock(&inp_fc_lck);
311
312 PE_parse_boot_argn("intcoproc_unrestricted", &intcoproc_unrestricted,
313 sizeof (intcoproc_unrestricted));
314 }
315
316 #define INPCB_HAVE_TIMER_REQ(req) (((req).intimer_lazy > 0) || \
317 ((req).intimer_fast > 0) || ((req).intimer_nodelay > 0))
318 static void
319 inpcb_timeout(void *arg0, void *arg1)
320 {
321 #pragma unused(arg0, arg1)
322 struct inpcbinfo *ipi;
323 boolean_t t, gc;
324 struct intimercount gccnt, tmcnt;
325
326 /*
327 * Update coarse-grained networking timestamp (in sec.); the idea
328 * is to piggy-back on the timeout callout to update the counter
329 * returnable via net_uptime().
330 */
331 net_update_uptime();
332
333 bzero(&gccnt, sizeof(gccnt));
334 bzero(&tmcnt, sizeof(tmcnt));
335
336 lck_mtx_lock_spin(&inpcb_timeout_lock);
337 gc = inpcb_garbage_collecting;
338 inpcb_garbage_collecting = FALSE;
339
340 t = inpcb_ticking;
341 inpcb_ticking = FALSE;
342
343 if (gc || t) {
344 lck_mtx_unlock(&inpcb_timeout_lock);
345
346 lck_mtx_lock(&inpcb_lock);
347 TAILQ_FOREACH(ipi, &inpcb_head, ipi_entry) {
348 if (INPCB_HAVE_TIMER_REQ(ipi->ipi_gc_req)) {
349 bzero(&ipi->ipi_gc_req,
350 sizeof(ipi->ipi_gc_req));
351 if (gc && ipi->ipi_gc != NULL) {
352 ipi->ipi_gc(ipi);
353 gccnt.intimer_lazy +=
354 ipi->ipi_gc_req.intimer_lazy;
355 gccnt.intimer_fast +=
356 ipi->ipi_gc_req.intimer_fast;
357 gccnt.intimer_nodelay +=
358 ipi->ipi_gc_req.intimer_nodelay;
359 }
360 }
361 if (INPCB_HAVE_TIMER_REQ(ipi->ipi_timer_req)) {
362 bzero(&ipi->ipi_timer_req,
363 sizeof(ipi->ipi_timer_req));
364 if (t && ipi->ipi_timer != NULL) {
365 ipi->ipi_timer(ipi);
366 tmcnt.intimer_lazy +=
367 ipi->ipi_timer_req.intimer_lazy;
368 tmcnt.intimer_fast +=
369 ipi->ipi_timer_req.intimer_fast;
370 tmcnt.intimer_nodelay +=
371 ipi->ipi_timer_req.intimer_nodelay;
372 }
373 }
374 }
375 lck_mtx_unlock(&inpcb_lock);
376 lck_mtx_lock_spin(&inpcb_timeout_lock);
377 }
378
379 /* lock was dropped above, so check first before overriding */
380 if (!inpcb_garbage_collecting)
381 inpcb_garbage_collecting = INPCB_HAVE_TIMER_REQ(gccnt);
382 if (!inpcb_ticking)
383 inpcb_ticking = INPCB_HAVE_TIMER_REQ(tmcnt);
384
385 /* re-arm the timer if there's work to do */
386 inpcb_timeout_run--;
387 VERIFY(inpcb_timeout_run >= 0 && inpcb_timeout_run < 2);
388
389 if (gccnt.intimer_nodelay > 0 || tmcnt.intimer_nodelay > 0)
390 inpcb_sched_timeout();
391 else if ((gccnt.intimer_fast + tmcnt.intimer_fast) <= 5)
392 /* be lazy when idle with little activity */
393 inpcb_sched_lazy_timeout();
394 else
395 inpcb_sched_timeout();
396
397 lck_mtx_unlock(&inpcb_timeout_lock);
398 }
399
400 static void
401 inpcb_sched_timeout(void)
402 {
403 _inpcb_sched_timeout(0);
404 }
405
406 static void
407 inpcb_sched_lazy_timeout(void)
408 {
409 _inpcb_sched_timeout(inpcb_timeout_lazy);
410 }
411
412 static void
413 _inpcb_sched_timeout(unsigned int offset)
414 {
415 uint64_t deadline, leeway;
416
417 clock_interval_to_deadline(1, NSEC_PER_SEC, &deadline);
418 LCK_MTX_ASSERT(&inpcb_timeout_lock, LCK_MTX_ASSERT_OWNED);
419 if (inpcb_timeout_run == 0 &&
420 (inpcb_garbage_collecting || inpcb_ticking)) {
421 lck_mtx_convert_spin(&inpcb_timeout_lock);
422 inpcb_timeout_run++;
423 if (offset == 0) {
424 inpcb_fast_timer_on = TRUE;
425 thread_call_enter_delayed(inpcb_thread_call,
426 deadline);
427 } else {
428 inpcb_fast_timer_on = FALSE;
429 clock_interval_to_absolutetime_interval(offset,
430 NSEC_PER_SEC, &leeway);
431 thread_call_enter_delayed_with_leeway(
432 inpcb_thread_call, NULL, deadline, leeway,
433 THREAD_CALL_DELAY_LEEWAY);
434 }
435 } else if (inpcb_timeout_run == 1 &&
436 offset == 0 && !inpcb_fast_timer_on) {
437 /*
438 * Since the request was for a fast timer but the
439 * scheduled timer is a lazy timer, try to schedule
440 * another instance of fast timer also.
441 */
442 lck_mtx_convert_spin(&inpcb_timeout_lock);
443 inpcb_timeout_run++;
444 inpcb_fast_timer_on = TRUE;
445 thread_call_enter_delayed(inpcb_fast_thread_call, deadline);
446 }
447 }
448
449 void
450 inpcb_gc_sched(struct inpcbinfo *ipi, u_int32_t type)
451 {
452 u_int32_t gccnt;
453
454 lck_mtx_lock_spin(&inpcb_timeout_lock);
455 inpcb_garbage_collecting = TRUE;
456 gccnt = ipi->ipi_gc_req.intimer_nodelay +
457 ipi->ipi_gc_req.intimer_fast;
458
459 if (gccnt > INPCB_GCREQ_THRESHOLD) {
460 type = INPCB_TIMER_FAST;
461 }
462
463 switch (type) {
464 case INPCB_TIMER_NODELAY:
465 atomic_add_32(&ipi->ipi_gc_req.intimer_nodelay, 1);
466 inpcb_sched_timeout();
467 break;
468 case INPCB_TIMER_FAST:
469 atomic_add_32(&ipi->ipi_gc_req.intimer_fast, 1);
470 inpcb_sched_timeout();
471 break;
472 default:
473 atomic_add_32(&ipi->ipi_gc_req.intimer_lazy, 1);
474 inpcb_sched_lazy_timeout();
475 break;
476 }
477 lck_mtx_unlock(&inpcb_timeout_lock);
478 }
479
480 void
481 inpcb_timer_sched(struct inpcbinfo *ipi, u_int32_t type)
482 {
483
484 lck_mtx_lock_spin(&inpcb_timeout_lock);
485 inpcb_ticking = TRUE;
486 switch (type) {
487 case INPCB_TIMER_NODELAY:
488 atomic_add_32(&ipi->ipi_timer_req.intimer_nodelay, 1);
489 inpcb_sched_timeout();
490 break;
491 case INPCB_TIMER_FAST:
492 atomic_add_32(&ipi->ipi_timer_req.intimer_fast, 1);
493 inpcb_sched_timeout();
494 break;
495 default:
496 atomic_add_32(&ipi->ipi_timer_req.intimer_lazy, 1);
497 inpcb_sched_lazy_timeout();
498 break;
499 }
500 lck_mtx_unlock(&inpcb_timeout_lock);
501 }
502
503 void
504 in_pcbinfo_attach(struct inpcbinfo *ipi)
505 {
506 struct inpcbinfo *ipi0;
507
508 lck_mtx_lock(&inpcb_lock);
509 TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) {
510 if (ipi0 == ipi) {
511 panic("%s: ipi %p already in the list\n",
512 __func__, ipi);
513 /* NOTREACHED */
514 }
515 }
516 TAILQ_INSERT_TAIL(&inpcb_head, ipi, ipi_entry);
517 lck_mtx_unlock(&inpcb_lock);
518 }
519
520 int
521 in_pcbinfo_detach(struct inpcbinfo *ipi)
522 {
523 struct inpcbinfo *ipi0;
524 int error = 0;
525
526 lck_mtx_lock(&inpcb_lock);
527 TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) {
528 if (ipi0 == ipi)
529 break;
530 }
531 if (ipi0 != NULL)
532 TAILQ_REMOVE(&inpcb_head, ipi0, ipi_entry);
533 else
534 error = ENXIO;
535 lck_mtx_unlock(&inpcb_lock);
536
537 return (error);
538 }
539
540 /*
541 * Allocate a PCB and associate it with the socket.
542 *
543 * Returns: 0 Success
544 * ENOBUFS
545 * ENOMEM
546 */
547 int
548 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p)
549 {
550 #pragma unused(p)
551 struct inpcb *inp;
552 caddr_t temp;
553 #if CONFIG_MACF_NET
554 int mac_error;
555 #endif /* CONFIG_MACF_NET */
556
557 if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) {
558 inp = (struct inpcb *)zalloc(pcbinfo->ipi_zone);
559 if (inp == NULL)
560 return (ENOBUFS);
561 bzero((caddr_t)inp, sizeof (*inp));
562 } else {
563 inp = (struct inpcb *)(void *)so->so_saved_pcb;
564 temp = inp->inp_saved_ppcb;
565 bzero((caddr_t)inp, sizeof (*inp));
566 inp->inp_saved_ppcb = temp;
567 }
568
569 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
570 inp->inp_pcbinfo = pcbinfo;
571 inp->inp_socket = so;
572 #if CONFIG_MACF_NET
573 mac_error = mac_inpcb_label_init(inp, M_WAITOK);
574 if (mac_error != 0) {
575 if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0)
576 zfree(pcbinfo->ipi_zone, inp);
577 return (mac_error);
578 }
579 mac_inpcb_label_associate(so, inp);
580 #endif /* CONFIG_MACF_NET */
581 /* make sure inp_stat is always 64-bit aligned */
582 inp->inp_stat = (struct inp_stat *)P2ROUNDUP(inp->inp_stat_store,
583 sizeof (u_int64_t));
584 if (((uintptr_t)inp->inp_stat - (uintptr_t)inp->inp_stat_store) +
585 sizeof (*inp->inp_stat) > sizeof (inp->inp_stat_store)) {
586 panic("%s: insufficient space to align inp_stat", __func__);
587 /* NOTREACHED */
588 }
589
590 /* make sure inp_cstat is always 64-bit aligned */
591 inp->inp_cstat = (struct inp_stat *)P2ROUNDUP(inp->inp_cstat_store,
592 sizeof (u_int64_t));
593 if (((uintptr_t)inp->inp_cstat - (uintptr_t)inp->inp_cstat_store) +
594 sizeof (*inp->inp_cstat) > sizeof (inp->inp_cstat_store)) {
595 panic("%s: insufficient space to align inp_cstat", __func__);
596 /* NOTREACHED */
597 }
598
599 /* make sure inp_wstat is always 64-bit aligned */
600 inp->inp_wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_wstat_store,
601 sizeof (u_int64_t));
602 if (((uintptr_t)inp->inp_wstat - (uintptr_t)inp->inp_wstat_store) +
603 sizeof (*inp->inp_wstat) > sizeof (inp->inp_wstat_store)) {
604 panic("%s: insufficient space to align inp_wstat", __func__);
605 /* NOTREACHED */
606 }
607
608 /* make sure inp_Wstat is always 64-bit aligned */
609 inp->inp_Wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_Wstat_store,
610 sizeof (u_int64_t));
611 if (((uintptr_t)inp->inp_Wstat - (uintptr_t)inp->inp_Wstat_store) +
612 sizeof (*inp->inp_Wstat) > sizeof (inp->inp_Wstat_store)) {
613 panic("%s: insufficient space to align inp_Wstat", __func__);
614 /* NOTREACHED */
615 }
616
617 so->so_pcb = (caddr_t)inp;
618
619 if (so->so_proto->pr_flags & PR_PCBLOCK) {
620 lck_mtx_init(&inp->inpcb_mtx, pcbinfo->ipi_lock_grp,
621 pcbinfo->ipi_lock_attr);
622 }
623
624 #if INET6
625 if (SOCK_DOM(so) == PF_INET6 && !ip6_mapped_addr_on)
626 inp->inp_flags |= IN6P_IPV6_V6ONLY;
627
628 if (ip6_auto_flowlabel)
629 inp->inp_flags |= IN6P_AUTOFLOWLABEL;
630 #endif /* INET6 */
631 if (intcoproc_unrestricted)
632 inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED;
633
634 (void) inp_update_policy(inp);
635
636 lck_rw_lock_exclusive(pcbinfo->ipi_lock);
637 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
638 LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
639 pcbinfo->ipi_count++;
640 lck_rw_done(pcbinfo->ipi_lock);
641 return (0);
642 }
643
644 /*
645 * in_pcblookup_local_and_cleanup does everything
646 * in_pcblookup_local does but it checks for a socket
647 * that's going away. Since we know that the lock is
648 * held read+write when this funciton is called, we
649 * can safely dispose of this socket like the slow
650 * timer would usually do and return NULL. This is
651 * great for bind.
652 */
653 struct inpcb *
654 in_pcblookup_local_and_cleanup(struct inpcbinfo *pcbinfo, struct in_addr laddr,
655 u_int lport_arg, int wild_okay)
656 {
657 struct inpcb *inp;
658
659 /* Perform normal lookup */
660 inp = in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay);
661
662 /* Check if we found a match but it's waiting to be disposed */
663 if (inp != NULL && inp->inp_wantcnt == WNT_STOPUSING) {
664 struct socket *so = inp->inp_socket;
665
666 socket_lock(so, 0);
667
668 if (so->so_usecount == 0) {
669 if (inp->inp_state != INPCB_STATE_DEAD)
670 in_pcbdetach(inp);
671 in_pcbdispose(inp); /* will unlock & destroy */
672 inp = NULL;
673 } else {
674 socket_unlock(so, 0);
675 }
676 }
677
678 return (inp);
679 }
680
681 static void
682 in_pcb_conflict_post_msg(u_int16_t port)
683 {
684 /*
685 * Radar 5523020 send a kernel event notification if a
686 * non-participating socket tries to bind the port a socket
687 * who has set SOF_NOTIFYCONFLICT owns.
688 */
689 struct kev_msg ev_msg;
690 struct kev_in_portinuse in_portinuse;
691
692 bzero(&in_portinuse, sizeof (struct kev_in_portinuse));
693 bzero(&ev_msg, sizeof (struct kev_msg));
694 in_portinuse.port = ntohs(port); /* port in host order */
695 in_portinuse.req_pid = proc_selfpid();
696 ev_msg.vendor_code = KEV_VENDOR_APPLE;
697 ev_msg.kev_class = KEV_NETWORK_CLASS;
698 ev_msg.kev_subclass = KEV_INET_SUBCLASS;
699 ev_msg.event_code = KEV_INET_PORTINUSE;
700 ev_msg.dv[0].data_ptr = &in_portinuse;
701 ev_msg.dv[0].data_length = sizeof (struct kev_in_portinuse);
702 ev_msg.dv[1].data_length = 0;
703 dlil_post_complete_msg(NULL, &ev_msg);
704 }
705
706 /*
707 * Bind an INPCB to an address and/or port. This routine should not alter
708 * the caller-supplied local address "nam".
709 *
710 * Returns: 0 Success
711 * EADDRNOTAVAIL Address not available.
712 * EINVAL Invalid argument
713 * EAFNOSUPPORT Address family not supported [notdef]
714 * EACCES Permission denied
715 * EADDRINUSE Address in use
716 * EAGAIN Resource unavailable, try again
717 * priv_check_cred:EPERM Operation not permitted
718 */
719 int
720 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
721 {
722 struct socket *so = inp->inp_socket;
723 unsigned short *lastport;
724 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
725 u_short lport = 0, rand_port = 0;
726 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
727 int error, randomport, conflict = 0;
728 boolean_t anonport = FALSE;
729 kauth_cred_t cred;
730 struct in_addr laddr;
731 struct ifnet *outif = NULL;
732
733 if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */
734 return (EADDRNOTAVAIL);
735 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY)
736 return (EINVAL);
737 if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT)))
738 wild = 1;
739
740 bzero(&laddr, sizeof(laddr));
741
742 socket_unlock(so, 0); /* keep reference on socket */
743 lck_rw_lock_exclusive(pcbinfo->ipi_lock);
744
745 if (nam != NULL) {
746
747 if (nam->sa_len != sizeof (struct sockaddr_in)) {
748 lck_rw_done(pcbinfo->ipi_lock);
749 socket_lock(so, 0);
750 return (EINVAL);
751 }
752 #if 0
753 /*
754 * We should check the family, but old programs
755 * incorrectly fail to initialize it.
756 */
757 if (nam->sa_family != AF_INET) {
758 lck_rw_done(pcbinfo->ipi_lock);
759 socket_lock(so, 0);
760 return (EAFNOSUPPORT);
761 }
762 #endif /* 0 */
763 lport = SIN(nam)->sin_port;
764
765 if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr))) {
766 /*
767 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
768 * allow complete duplication of binding if
769 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
770 * and a multicast address is bound on both
771 * new and duplicated sockets.
772 */
773 if (so->so_options & SO_REUSEADDR)
774 reuseport = SO_REUSEADDR|SO_REUSEPORT;
775 } else if (SIN(nam)->sin_addr.s_addr != INADDR_ANY) {
776 struct sockaddr_in sin;
777 struct ifaddr *ifa;
778
779 /* Sanitized for interface address searches */
780 bzero(&sin, sizeof (sin));
781 sin.sin_family = AF_INET;
782 sin.sin_len = sizeof (struct sockaddr_in);
783 sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
784
785 ifa = ifa_ifwithaddr(SA(&sin));
786 if (ifa == NULL) {
787 lck_rw_done(pcbinfo->ipi_lock);
788 socket_lock(so, 0);
789 return (EADDRNOTAVAIL);
790 } else {
791 /*
792 * Opportunistically determine the outbound
793 * interface that may be used; this may not
794 * hold true if we end up using a route
795 * going over a different interface, e.g.
796 * when sending to a local address. This
797 * will get updated again after sending.
798 */
799 IFA_LOCK(ifa);
800 outif = ifa->ifa_ifp;
801 IFA_UNLOCK(ifa);
802 IFA_REMREF(ifa);
803 }
804 }
805 if (lport != 0) {
806 struct inpcb *t;
807 uid_t u;
808
809 #if !CONFIG_EMBEDDED
810 if (ntohs(lport) < IPPORT_RESERVED) {
811 cred = kauth_cred_proc_ref(p);
812 error = priv_check_cred(cred,
813 PRIV_NETINET_RESERVEDPORT, 0);
814 kauth_cred_unref(&cred);
815 if (error != 0) {
816 lck_rw_done(pcbinfo->ipi_lock);
817 socket_lock(so, 0);
818 return (EACCES);
819 }
820 }
821 #endif /* !CONFIG_EMBEDDED */
822 if (!IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) &&
823 (u = kauth_cred_getuid(so->so_cred)) != 0 &&
824 (t = in_pcblookup_local_and_cleanup(
825 inp->inp_pcbinfo, SIN(nam)->sin_addr, lport,
826 INPLOOKUP_WILDCARD)) != NULL &&
827 (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
828 t->inp_laddr.s_addr != INADDR_ANY ||
829 !(t->inp_socket->so_options & SO_REUSEPORT)) &&
830 (u != kauth_cred_getuid(t->inp_socket->so_cred)) &&
831 !(t->inp_socket->so_flags & SOF_REUSESHAREUID) &&
832 (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
833 t->inp_laddr.s_addr != INADDR_ANY)) {
834 if ((t->inp_socket->so_flags &
835 SOF_NOTIFYCONFLICT) &&
836 !(so->so_flags & SOF_NOTIFYCONFLICT))
837 conflict = 1;
838
839 lck_rw_done(pcbinfo->ipi_lock);
840
841 if (conflict)
842 in_pcb_conflict_post_msg(lport);
843
844 socket_lock(so, 0);
845 return (EADDRINUSE);
846 }
847 t = in_pcblookup_local_and_cleanup(pcbinfo,
848 SIN(nam)->sin_addr, lport, wild);
849 if (t != NULL &&
850 (reuseport & t->inp_socket->so_options) == 0) {
851 #if INET6
852 if (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
853 t->inp_laddr.s_addr != INADDR_ANY ||
854 SOCK_DOM(so) != PF_INET6 ||
855 SOCK_DOM(t->inp_socket) != PF_INET6)
856 #endif /* INET6 */
857 {
858
859 if ((t->inp_socket->so_flags &
860 SOF_NOTIFYCONFLICT) &&
861 !(so->so_flags & SOF_NOTIFYCONFLICT))
862 conflict = 1;
863
864 lck_rw_done(pcbinfo->ipi_lock);
865
866 if (conflict)
867 in_pcb_conflict_post_msg(lport);
868 socket_lock(so, 0);
869 return (EADDRINUSE);
870 }
871 }
872 }
873 laddr = SIN(nam)->sin_addr;
874 }
875 if (lport == 0) {
876 u_short first, last;
877 int count;
878 bool found;
879
880 randomport = (so->so_flags & SOF_BINDRANDOMPORT) ||
881 (so->so_type == SOCK_STREAM ? tcp_use_randomport :
882 udp_use_randomport);
883
884 /*
885 * Even though this looks similar to the code in
886 * in6_pcbsetport, the v6 vs v4 checks are different.
887 */
888 anonport = TRUE;
889 if (inp->inp_flags & INP_HIGHPORT) {
890 first = ipport_hifirstauto; /* sysctl */
891 last = ipport_hilastauto;
892 lastport = &pcbinfo->ipi_lasthi;
893 } else if (inp->inp_flags & INP_LOWPORT) {
894 cred = kauth_cred_proc_ref(p);
895 error = priv_check_cred(cred,
896 PRIV_NETINET_RESERVEDPORT, 0);
897 kauth_cred_unref(&cred);
898 if (error != 0) {
899 lck_rw_done(pcbinfo->ipi_lock);
900 socket_lock(so, 0);
901 return (error);
902 }
903 first = ipport_lowfirstauto; /* 1023 */
904 last = ipport_lowlastauto; /* 600 */
905 lastport = &pcbinfo->ipi_lastlow;
906 } else {
907 first = ipport_firstauto; /* sysctl */
908 last = ipport_lastauto;
909 lastport = &pcbinfo->ipi_lastport;
910 }
911 /* No point in randomizing if only one port is available */
912
913 if (first == last)
914 randomport = 0;
915 /*
916 * Simple check to ensure all ports are not used up causing
917 * a deadlock here.
918 *
919 * We split the two cases (up and down) so that the direction
920 * is not being tested on each round of the loop.
921 */
922 if (first > last) {
923 struct in_addr lookup_addr;
924
925 /*
926 * counting down
927 */
928 if (randomport) {
929 read_frandom(&rand_port, sizeof (rand_port));
930 *lastport =
931 first - (rand_port % (first - last));
932 }
933 count = first - last;
934
935 lookup_addr = (laddr.s_addr != INADDR_ANY) ? laddr :
936 inp->inp_laddr;
937
938 found = false;
939 do {
940 if (count-- < 0) { /* completely used? */
941 lck_rw_done(pcbinfo->ipi_lock);
942 socket_lock(so, 0);
943 return (EADDRNOTAVAIL);
944 }
945 --*lastport;
946 if (*lastport > first || *lastport < last)
947 *lastport = first;
948 lport = htons(*lastport);
949
950 found = in_pcblookup_local_and_cleanup(pcbinfo,
951 lookup_addr, lport, wild) == NULL;
952 } while (!found);
953 } else {
954 struct in_addr lookup_addr;
955
956 /*
957 * counting up
958 */
959 if (randomport) {
960 read_frandom(&rand_port, sizeof (rand_port));
961 *lastport =
962 first + (rand_port % (first - last));
963 }
964 count = last - first;
965
966 lookup_addr = (laddr.s_addr != INADDR_ANY) ? laddr :
967 inp->inp_laddr;
968
969 found = false;
970 do {
971 if (count-- < 0) { /* completely used? */
972 lck_rw_done(pcbinfo->ipi_lock);
973 socket_lock(so, 0);
974 return (EADDRNOTAVAIL);
975 }
976 ++*lastport;
977 if (*lastport < first || *lastport > last)
978 *lastport = first;
979 lport = htons(*lastport);
980
981 found = in_pcblookup_local_and_cleanup(pcbinfo,
982 lookup_addr, lport, wild) == NULL;
983 } while (!found);
984 }
985 }
986 socket_lock(so, 0);
987
988 /*
989 * We unlocked socket's protocol lock for a long time.
990 * The socket might have been dropped/defuncted.
991 * Checking if world has changed since.
992 */
993 if (inp->inp_state == INPCB_STATE_DEAD) {
994 lck_rw_done(pcbinfo->ipi_lock);
995 return (ECONNABORTED);
996 }
997
998 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) {
999 lck_rw_done(pcbinfo->ipi_lock);
1000 return (EINVAL);
1001 }
1002
1003 if (laddr.s_addr != INADDR_ANY) {
1004 inp->inp_laddr = laddr;
1005 inp->inp_last_outifp = outif;
1006 }
1007 inp->inp_lport = lport;
1008 if (anonport)
1009 inp->inp_flags |= INP_ANONPORT;
1010
1011 if (in_pcbinshash(inp, 1) != 0) {
1012 inp->inp_laddr.s_addr = INADDR_ANY;
1013 inp->inp_last_outifp = NULL;
1014
1015 inp->inp_lport = 0;
1016 if (anonport)
1017 inp->inp_flags &= ~INP_ANONPORT;
1018 lck_rw_done(pcbinfo->ipi_lock);
1019 return (EAGAIN);
1020 }
1021 lck_rw_done(pcbinfo->ipi_lock);
1022 sflt_notify(so, sock_evt_bound, NULL);
1023 return (0);
1024 }
1025
1026 #define APN_FALLBACK_IP_FILTER(a) \
1027 (IN_LINKLOCAL(ntohl((a)->sin_addr.s_addr)) || \
1028 IN_LOOPBACK(ntohl((a)->sin_addr.s_addr)) || \
1029 IN_ZERONET(ntohl((a)->sin_addr.s_addr)) || \
1030 IN_MULTICAST(ntohl((a)->sin_addr.s_addr)) || \
1031 IN_PRIVATE(ntohl((a)->sin_addr.s_addr)))
1032
1033 #define APN_FALLBACK_NOTIF_INTERVAL 2 /* Magic Number */
1034 static uint64_t last_apn_fallback = 0;
1035
1036 static boolean_t
1037 apn_fallback_required (proc_t proc, struct socket *so, struct sockaddr_in *p_dstv4)
1038 {
1039 uint64_t timenow;
1040 struct sockaddr_storage lookup_default_addr;
1041 struct rtentry *rt = NULL;
1042
1043 VERIFY(proc != NULL);
1044
1045 if (apn_fallbk_enabled == FALSE)
1046 return FALSE;
1047
1048 if (proc == kernproc)
1049 return FALSE;
1050
1051 if (so && (so->so_options & SO_NOAPNFALLBK))
1052 return FALSE;
1053
1054 timenow = net_uptime();
1055 if ((timenow - last_apn_fallback) < APN_FALLBACK_NOTIF_INTERVAL) {
1056 apn_fallbk_log((LOG_INFO, "APN fallback notification throttled.\n"));
1057 return FALSE;
1058 }
1059
1060 if (p_dstv4 && APN_FALLBACK_IP_FILTER(p_dstv4))
1061 return FALSE;
1062
1063 /* Check if we have unscoped IPv6 default route through cellular */
1064 bzero(&lookup_default_addr, sizeof(lookup_default_addr));
1065 lookup_default_addr.ss_family = AF_INET6;
1066 lookup_default_addr.ss_len = sizeof(struct sockaddr_in6);
1067
1068 rt = rtalloc1((struct sockaddr *)&lookup_default_addr, 0, 0);
1069 if (NULL == rt) {
1070 apn_fallbk_log((LOG_INFO, "APN fallback notification could not find "
1071 "unscoped default IPv6 route.\n"));
1072 return FALSE;
1073 }
1074
1075 if (!IFNET_IS_CELLULAR(rt->rt_ifp)) {
1076 rtfree(rt);
1077 apn_fallbk_log((LOG_INFO, "APN fallback notification could not find "
1078 "unscoped default IPv6 route through cellular interface.\n"));
1079 return FALSE;
1080 }
1081
1082 /*
1083 * We have a default IPv6 route, ensure that
1084 * we do not have IPv4 default route before triggering
1085 * the event
1086 */
1087 rtfree(rt);
1088 rt = NULL;
1089
1090 bzero(&lookup_default_addr, sizeof(lookup_default_addr));
1091 lookup_default_addr.ss_family = AF_INET;
1092 lookup_default_addr.ss_len = sizeof(struct sockaddr_in);
1093
1094 rt = rtalloc1((struct sockaddr *)&lookup_default_addr, 0, 0);
1095
1096 if (rt) {
1097 rtfree(rt);
1098 rt = NULL;
1099 apn_fallbk_log((LOG_INFO, "APN fallback notification found unscoped "
1100 "IPv4 default route!\n"));
1101 return FALSE;
1102 }
1103
1104 {
1105 /*
1106 * We disable APN fallback if the binary is not a third-party app.
1107 * Note that platform daemons use their process name as a
1108 * bundle ID so we filter out bundle IDs without dots.
1109 */
1110 const char *bundle_id = cs_identity_get(proc);
1111 if (bundle_id == NULL ||
1112 bundle_id[0] == '\0' ||
1113 strchr(bundle_id, '.') == NULL ||
1114 strncmp(bundle_id, "com.apple.", sizeof("com.apple.") - 1) == 0) {
1115 apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found first-"
1116 "party bundle ID \"%s\"!\n", (bundle_id ? bundle_id : "NULL")));
1117 return FALSE;
1118 }
1119 }
1120
1121 {
1122 /*
1123 * The Apple App Store IPv6 requirement started on
1124 * June 1st, 2016 at 12:00:00 AM PDT.
1125 * We disable APN fallback if the binary is more recent than that.
1126 * We check both atime and birthtime since birthtime is not always supported.
1127 */
1128 static const long ipv6_start_date = 1464764400L;
1129 vfs_context_t context;
1130 struct stat64 sb;
1131 int vn_stat_error;
1132
1133 bzero(&sb, sizeof(struct stat64));
1134 context = vfs_context_create(NULL);
1135 vn_stat_error = vn_stat(proc->p_textvp, &sb, NULL, 1, context);
1136 (void)vfs_context_rele(context);
1137
1138 if (vn_stat_error != 0 ||
1139 sb.st_atimespec.tv_sec >= ipv6_start_date ||
1140 sb.st_birthtimespec.tv_sec >= ipv6_start_date) {
1141 apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found binary "
1142 "too recent! (err %d atime %ld mtime %ld ctime %ld birthtime %ld)\n",
1143 vn_stat_error, sb.st_atimespec.tv_sec, sb.st_mtimespec.tv_sec,
1144 sb.st_ctimespec.tv_sec, sb.st_birthtimespec.tv_sec));
1145 return FALSE;
1146 }
1147 }
1148 return TRUE;
1149 }
1150
1151 static void
1152 apn_fallback_trigger(proc_t proc)
1153 {
1154 pid_t pid = 0;
1155 struct kev_msg ev_msg;
1156 struct kev_netevent_apnfallbk_data apnfallbk_data;
1157
1158 last_apn_fallback = net_uptime();
1159 pid = proc_pid(proc);
1160 uuid_t application_uuid;
1161 uuid_clear(application_uuid);
1162 proc_getexecutableuuid(proc, application_uuid,
1163 sizeof(application_uuid));
1164
1165 bzero(&ev_msg, sizeof (struct kev_msg));
1166 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1167 ev_msg.kev_class = KEV_NETWORK_CLASS;
1168 ev_msg.kev_subclass = KEV_NETEVENT_SUBCLASS;
1169 ev_msg.event_code = KEV_NETEVENT_APNFALLBACK;
1170
1171 bzero(&apnfallbk_data, sizeof(apnfallbk_data));
1172 apnfallbk_data.epid = pid;
1173 uuid_copy(apnfallbk_data.euuid, application_uuid);
1174
1175 ev_msg.dv[0].data_ptr = &apnfallbk_data;
1176 ev_msg.dv[0].data_length = sizeof(apnfallbk_data);
1177 kev_post_msg(&ev_msg);
1178 apn_fallbk_log((LOG_INFO, "APN fallback notification issued.\n"));
1179 }
1180
1181 /*
1182 * Transform old in_pcbconnect() into an inner subroutine for new
1183 * in_pcbconnect(); do some validity-checking on the remote address
1184 * (in "nam") and then determine local host address (i.e., which
1185 * interface) to use to access that remote host.
1186 *
1187 * This routine may alter the caller-supplied remote address "nam".
1188 *
1189 * The caller may override the bound-to-interface setting of the socket
1190 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
1191 *
1192 * This routine might return an ifp with a reference held if the caller
1193 * provides a non-NULL outif, even in the error case. The caller is
1194 * responsible for releasing its reference.
1195 *
1196 * Returns: 0 Success
1197 * EINVAL Invalid argument
1198 * EAFNOSUPPORT Address family not supported
1199 * EADDRNOTAVAIL Address not available
1200 */
1201 int
1202 in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr,
1203 unsigned int ifscope, struct ifnet **outif, int raw)
1204 {
1205 struct route *ro = &inp->inp_route;
1206 struct in_ifaddr *ia = NULL;
1207 struct sockaddr_in sin;
1208 int error = 0;
1209 boolean_t restricted = FALSE;
1210
1211 if (outif != NULL)
1212 *outif = NULL;
1213 if (nam->sa_len != sizeof (struct sockaddr_in))
1214 return (EINVAL);
1215 if (SIN(nam)->sin_family != AF_INET)
1216 return (EAFNOSUPPORT);
1217 if (raw == 0 && SIN(nam)->sin_port == 0)
1218 return (EADDRNOTAVAIL);
1219
1220 /*
1221 * If the destination address is INADDR_ANY,
1222 * use the primary local address.
1223 * If the supplied address is INADDR_BROADCAST,
1224 * and the primary interface supports broadcast,
1225 * choose the broadcast address for that interface.
1226 */
1227 if (raw == 0 && (SIN(nam)->sin_addr.s_addr == INADDR_ANY ||
1228 SIN(nam)->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST)) {
1229 lck_rw_lock_shared(in_ifaddr_rwlock);
1230 if (!TAILQ_EMPTY(&in_ifaddrhead)) {
1231 ia = TAILQ_FIRST(&in_ifaddrhead);
1232 IFA_LOCK_SPIN(&ia->ia_ifa);
1233 if (SIN(nam)->sin_addr.s_addr == INADDR_ANY) {
1234 SIN(nam)->sin_addr = IA_SIN(ia)->sin_addr;
1235 } else if (ia->ia_ifp->if_flags & IFF_BROADCAST) {
1236 SIN(nam)->sin_addr =
1237 SIN(&ia->ia_broadaddr)->sin_addr;
1238 }
1239 IFA_UNLOCK(&ia->ia_ifa);
1240 ia = NULL;
1241 }
1242 lck_rw_done(in_ifaddr_rwlock);
1243 }
1244 /*
1245 * Otherwise, if the socket has already bound the source, just use it.
1246 */
1247 if (inp->inp_laddr.s_addr != INADDR_ANY) {
1248 VERIFY(ia == NULL);
1249 *laddr = inp->inp_laddr;
1250 return (0);
1251 }
1252
1253 /*
1254 * If the ifscope is specified by the caller (e.g. IP_PKTINFO)
1255 * then it overrides the sticky ifscope set for the socket.
1256 */
1257 if (ifscope == IFSCOPE_NONE && (inp->inp_flags & INP_BOUND_IF))
1258 ifscope = inp->inp_boundifp->if_index;
1259
1260 /*
1261 * If route is known or can be allocated now,
1262 * our src addr is taken from the i/f, else punt.
1263 * Note that we should check the address family of the cached
1264 * destination, in case of sharing the cache with IPv6.
1265 */
1266 if (ro->ro_rt != NULL)
1267 RT_LOCK_SPIN(ro->ro_rt);
1268 if (ROUTE_UNUSABLE(ro) || ro->ro_dst.sa_family != AF_INET ||
1269 SIN(&ro->ro_dst)->sin_addr.s_addr != SIN(nam)->sin_addr.s_addr ||
1270 (inp->inp_socket->so_options & SO_DONTROUTE)) {
1271 if (ro->ro_rt != NULL)
1272 RT_UNLOCK(ro->ro_rt);
1273 ROUTE_RELEASE(ro);
1274 }
1275 if (!(inp->inp_socket->so_options & SO_DONTROUTE) &&
1276 (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL)) {
1277 if (ro->ro_rt != NULL)
1278 RT_UNLOCK(ro->ro_rt);
1279 ROUTE_RELEASE(ro);
1280 /* No route yet, so try to acquire one */
1281 bzero(&ro->ro_dst, sizeof (struct sockaddr_in));
1282 ro->ro_dst.sa_family = AF_INET;
1283 ro->ro_dst.sa_len = sizeof (struct sockaddr_in);
1284 SIN(&ro->ro_dst)->sin_addr = SIN(nam)->sin_addr;
1285 rtalloc_scoped(ro, ifscope);
1286 if (ro->ro_rt != NULL)
1287 RT_LOCK_SPIN(ro->ro_rt);
1288 }
1289 /* Sanitized local copy for interface address searches */
1290 bzero(&sin, sizeof (sin));
1291 sin.sin_family = AF_INET;
1292 sin.sin_len = sizeof (struct sockaddr_in);
1293 sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
1294 /*
1295 * If we did not find (or use) a route, assume dest is reachable
1296 * on a directly connected network and try to find a corresponding
1297 * interface to take the source address from.
1298 */
1299 if (ro->ro_rt == NULL) {
1300 proc_t proc = current_proc();
1301
1302 VERIFY(ia == NULL);
1303 ia = ifatoia(ifa_ifwithdstaddr(SA(&sin)));
1304 if (ia == NULL)
1305 ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope));
1306 error = ((ia == NULL) ? ENETUNREACH : 0);
1307
1308 if (apn_fallback_required(proc, inp->inp_socket,
1309 (void *)nam))
1310 apn_fallback_trigger(proc);
1311
1312 goto done;
1313 }
1314 RT_LOCK_ASSERT_HELD(ro->ro_rt);
1315 /*
1316 * If the outgoing interface on the route found is not
1317 * a loopback interface, use the address from that interface.
1318 */
1319 if (!(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) {
1320 VERIFY(ia == NULL);
1321 /*
1322 * If the route points to a cellular interface and the
1323 * caller forbids our using interfaces of such type,
1324 * pretend that there is no route.
1325 * Apply the same logic for expensive interfaces.
1326 */
1327 if (inp_restricted_send(inp, ro->ro_rt->rt_ifp)) {
1328 RT_UNLOCK(ro->ro_rt);
1329 ROUTE_RELEASE(ro);
1330 error = EHOSTUNREACH;
1331 restricted = TRUE;
1332 } else {
1333 /* Become a regular mutex */
1334 RT_CONVERT_LOCK(ro->ro_rt);
1335 ia = ifatoia(ro->ro_rt->rt_ifa);
1336 IFA_ADDREF(&ia->ia_ifa);
1337 RT_UNLOCK(ro->ro_rt);
1338 error = 0;
1339 }
1340 goto done;
1341 }
1342 VERIFY(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK);
1343 RT_UNLOCK(ro->ro_rt);
1344 /*
1345 * The outgoing interface is marked with 'loopback net', so a route
1346 * to ourselves is here.
1347 * Try to find the interface of the destination address and then
1348 * take the address from there. That interface is not necessarily
1349 * a loopback interface.
1350 */
1351 VERIFY(ia == NULL);
1352 ia = ifatoia(ifa_ifwithdstaddr(SA(&sin)));
1353 if (ia == NULL)
1354 ia = ifatoia(ifa_ifwithaddr_scoped(SA(&sin), ifscope));
1355 if (ia == NULL)
1356 ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope));
1357 if (ia == NULL) {
1358 RT_LOCK(ro->ro_rt);
1359 ia = ifatoia(ro->ro_rt->rt_ifa);
1360 if (ia != NULL)
1361 IFA_ADDREF(&ia->ia_ifa);
1362 RT_UNLOCK(ro->ro_rt);
1363 }
1364 error = ((ia == NULL) ? ENETUNREACH : 0);
1365
1366 done:
1367 /*
1368 * If the destination address is multicast and an outgoing
1369 * interface has been set as a multicast option, use the
1370 * address of that interface as our source address.
1371 */
1372 if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) &&
1373 inp->inp_moptions != NULL) {
1374 struct ip_moptions *imo;
1375 struct ifnet *ifp;
1376
1377 imo = inp->inp_moptions;
1378 IMO_LOCK(imo);
1379 if (imo->imo_multicast_ifp != NULL && (ia == NULL ||
1380 ia->ia_ifp != imo->imo_multicast_ifp)) {
1381 ifp = imo->imo_multicast_ifp;
1382 if (ia != NULL)
1383 IFA_REMREF(&ia->ia_ifa);
1384 lck_rw_lock_shared(in_ifaddr_rwlock);
1385 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
1386 if (ia->ia_ifp == ifp)
1387 break;
1388 }
1389 if (ia != NULL)
1390 IFA_ADDREF(&ia->ia_ifa);
1391 lck_rw_done(in_ifaddr_rwlock);
1392 if (ia == NULL)
1393 error = EADDRNOTAVAIL;
1394 else
1395 error = 0;
1396 }
1397 IMO_UNLOCK(imo);
1398 }
1399 /*
1400 * Don't do pcblookup call here; return interface in laddr
1401 * and exit to caller, that will do the lookup.
1402 */
1403 if (ia != NULL) {
1404 /*
1405 * If the source address belongs to a cellular interface
1406 * and the socket forbids our using interfaces of such
1407 * type, pretend that there is no source address.
1408 * Apply the same logic for expensive interfaces.
1409 */
1410 IFA_LOCK_SPIN(&ia->ia_ifa);
1411 if (inp_restricted_send(inp, ia->ia_ifa.ifa_ifp)) {
1412 IFA_UNLOCK(&ia->ia_ifa);
1413 error = EHOSTUNREACH;
1414 restricted = TRUE;
1415 } else if (error == 0) {
1416 *laddr = ia->ia_addr.sin_addr;
1417 if (outif != NULL) {
1418 struct ifnet *ifp;
1419
1420 if (ro->ro_rt != NULL)
1421 ifp = ro->ro_rt->rt_ifp;
1422 else
1423 ifp = ia->ia_ifp;
1424
1425 VERIFY(ifp != NULL);
1426 IFA_CONVERT_LOCK(&ia->ia_ifa);
1427 ifnet_reference(ifp); /* for caller */
1428 if (*outif != NULL)
1429 ifnet_release(*outif);
1430 *outif = ifp;
1431 }
1432 IFA_UNLOCK(&ia->ia_ifa);
1433 } else {
1434 IFA_UNLOCK(&ia->ia_ifa);
1435 }
1436 IFA_REMREF(&ia->ia_ifa);
1437 ia = NULL;
1438 }
1439
1440 if (restricted && error == EHOSTUNREACH) {
1441 soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED |
1442 SO_FILT_HINT_IFDENIED));
1443 }
1444
1445 return (error);
1446 }
1447
1448 /*
1449 * Outer subroutine:
1450 * Connect from a socket to a specified address.
1451 * Both address and port must be specified in argument sin.
1452 * If don't have a local address for this socket yet,
1453 * then pick one.
1454 *
1455 * The caller may override the bound-to-interface setting of the socket
1456 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
1457 */
1458 int
1459 in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p,
1460 unsigned int ifscope, struct ifnet **outif)
1461 {
1462 struct in_addr laddr;
1463 struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam;
1464 struct inpcb *pcb;
1465 int error;
1466 struct socket *so = inp->inp_socket;
1467
1468 /*
1469 * Call inner routine, to assign local interface address.
1470 */
1471 if ((error = in_pcbladdr(inp, nam, &laddr, ifscope, outif, 0)) != 0)
1472 return (error);
1473
1474 socket_unlock(so, 0);
1475 pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
1476 inp->inp_laddr.s_addr ? inp->inp_laddr : laddr,
1477 inp->inp_lport, 0, NULL);
1478 socket_lock(so, 0);
1479
1480 /*
1481 * Check if the socket is still in a valid state. When we unlock this
1482 * embryonic socket, it can get aborted if another thread is closing
1483 * the listener (radar 7947600).
1484 */
1485 if ((so->so_flags & SOF_ABORTED) != 0)
1486 return (ECONNREFUSED);
1487
1488 if (pcb != NULL) {
1489 in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0);
1490 return (EADDRINUSE);
1491 }
1492 if (inp->inp_laddr.s_addr == INADDR_ANY) {
1493 if (inp->inp_lport == 0) {
1494 error = in_pcbbind(inp, NULL, p);
1495 if (error)
1496 return (error);
1497 }
1498 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
1499 /*
1500 * Lock inversion issue, mostly with udp
1501 * multicast packets.
1502 */
1503 socket_unlock(so, 0);
1504 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
1505 socket_lock(so, 0);
1506 }
1507 inp->inp_laddr = laddr;
1508 /* no reference needed */
1509 inp->inp_last_outifp = (outif != NULL) ? *outif : NULL;
1510 inp->inp_flags |= INP_INADDR_ANY;
1511 } else {
1512 /*
1513 * Usage of IP_PKTINFO, without local port already
1514 * speficified will cause kernel to panic,
1515 * see rdar://problem/18508185.
1516 * For now returning error to avoid a kernel panic
1517 * This routines can be refactored and handle this better
1518 * in future.
1519 */
1520 if (inp->inp_lport == 0)
1521 return (EINVAL);
1522 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
1523 /*
1524 * Lock inversion issue, mostly with udp
1525 * multicast packets.
1526 */
1527 socket_unlock(so, 0);
1528 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
1529 socket_lock(so, 0);
1530 }
1531 }
1532 inp->inp_faddr = sin->sin_addr;
1533 inp->inp_fport = sin->sin_port;
1534 if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP)
1535 nstat_pcb_invalidate_cache(inp);
1536 in_pcbrehash(inp);
1537 lck_rw_done(inp->inp_pcbinfo->ipi_lock);
1538 return (0);
1539 }
1540
1541 void
1542 in_pcbdisconnect(struct inpcb *inp)
1543 {
1544 struct socket *so = inp->inp_socket;
1545
1546 if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP)
1547 nstat_pcb_cache(inp);
1548
1549 inp->inp_faddr.s_addr = INADDR_ANY;
1550 inp->inp_fport = 0;
1551
1552 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
1553 /* lock inversion issue, mostly with udp multicast packets */
1554 socket_unlock(so, 0);
1555 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
1556 socket_lock(so, 0);
1557 }
1558
1559 in_pcbrehash(inp);
1560 lck_rw_done(inp->inp_pcbinfo->ipi_lock);
1561 /*
1562 * A multipath subflow socket would have its SS_NOFDREF set by default,
1563 * so check for SOF_MP_SUBFLOW socket flag before detaching the PCB;
1564 * when the socket is closed for real, SOF_MP_SUBFLOW would be cleared.
1565 */
1566 if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF))
1567 in_pcbdetach(inp);
1568 }
1569
1570 void
1571 in_pcbdetach(struct inpcb *inp)
1572 {
1573 struct socket *so = inp->inp_socket;
1574
1575 if (so->so_pcb == NULL) {
1576 /* PCB has been disposed */
1577 panic("%s: inp=%p so=%p proto=%d so_pcb is null!\n", __func__,
1578 inp, so, SOCK_PROTO(so));
1579 /* NOTREACHED */
1580 }
1581
1582 #if IPSEC
1583 if (inp->inp_sp != NULL) {
1584 (void) ipsec4_delete_pcbpolicy(inp);
1585 }
1586 #endif /* IPSEC */
1587
1588 if (inp->inp_stat != NULL && SOCK_PROTO(so) == IPPROTO_UDP) {
1589 if (inp->inp_stat->rxpackets == 0 && inp->inp_stat->txpackets == 0) {
1590 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_inet_dgram_no_data);
1591 }
1592 }
1593
1594 /*
1595 * Let NetworkStatistics know this PCB is going away
1596 * before we detach it.
1597 */
1598 if (nstat_collect &&
1599 (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP))
1600 nstat_pcb_detach(inp);
1601
1602 /* Free memory buffer held for generating keep alives */
1603 if (inp->inp_keepalive_data != NULL) {
1604 FREE(inp->inp_keepalive_data, M_TEMP);
1605 inp->inp_keepalive_data = NULL;
1606 }
1607
1608 /* mark socket state as dead */
1609 if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) {
1610 panic("%s: so=%p proto=%d couldn't set to STOPUSING\n",
1611 __func__, so, SOCK_PROTO(so));
1612 /* NOTREACHED */
1613 }
1614
1615 if (!(so->so_flags & SOF_PCBCLEARING)) {
1616 struct ip_moptions *imo;
1617
1618 inp->inp_vflag = 0;
1619 if (inp->inp_options != NULL) {
1620 (void) m_free(inp->inp_options);
1621 inp->inp_options = NULL;
1622 }
1623 ROUTE_RELEASE(&inp->inp_route);
1624 imo = inp->inp_moptions;
1625 inp->inp_moptions = NULL;
1626 sofreelastref(so, 0);
1627 inp->inp_state = INPCB_STATE_DEAD;
1628 /* makes sure we're not called twice from so_close */
1629 so->so_flags |= SOF_PCBCLEARING;
1630
1631 inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST);
1632
1633 /*
1634 * See inp_join_group() for why we need to unlock
1635 */
1636 if (imo != NULL) {
1637 socket_unlock(so, 0);
1638 IMO_REMREF(imo);
1639 socket_lock(so, 0);
1640 }
1641 }
1642 }
1643
1644
1645 void
1646 in_pcbdispose(struct inpcb *inp)
1647 {
1648 struct socket *so = inp->inp_socket;
1649 struct inpcbinfo *ipi = inp->inp_pcbinfo;
1650
1651 if (so != NULL && so->so_usecount != 0) {
1652 panic("%s: so %p [%d,%d] usecount %d lockhistory %s\n",
1653 __func__, so, SOCK_DOM(so), SOCK_TYPE(so), so->so_usecount,
1654 solockhistory_nr(so));
1655 /* NOTREACHED */
1656 } else if (inp->inp_wantcnt != WNT_STOPUSING) {
1657 if (so != NULL) {
1658 panic_plain("%s: inp %p invalid wantcnt %d, so %p "
1659 "[%d,%d] usecount %d retaincnt %d state 0x%x "
1660 "flags 0x%x lockhistory %s\n", __func__, inp,
1661 inp->inp_wantcnt, so, SOCK_DOM(so), SOCK_TYPE(so),
1662 so->so_usecount, so->so_retaincnt, so->so_state,
1663 so->so_flags, solockhistory_nr(so));
1664 /* NOTREACHED */
1665 } else {
1666 panic("%s: inp %p invalid wantcnt %d no socket\n",
1667 __func__, inp, inp->inp_wantcnt);
1668 /* NOTREACHED */
1669 }
1670 }
1671
1672 LCK_RW_ASSERT(ipi->ipi_lock, LCK_RW_ASSERT_EXCLUSIVE);
1673
1674 inp->inp_gencnt = ++ipi->ipi_gencnt;
1675 /* access ipi in in_pcbremlists */
1676 in_pcbremlists(inp);
1677
1678 if (so != NULL) {
1679 if (so->so_proto->pr_flags & PR_PCBLOCK) {
1680 sofreelastref(so, 0);
1681 if (so->so_rcv.sb_cc > 0 || so->so_snd.sb_cc > 0) {
1682 /*
1683 * selthreadclear() already called
1684 * during sofreelastref() above.
1685 */
1686 sbrelease(&so->so_rcv);
1687 sbrelease(&so->so_snd);
1688 }
1689 if (so->so_head != NULL) {
1690 panic("%s: so=%p head still exist\n",
1691 __func__, so);
1692 /* NOTREACHED */
1693 }
1694 lck_mtx_unlock(&inp->inpcb_mtx);
1695
1696 #if NECP
1697 necp_inpcb_remove_cb(inp);
1698 #endif /* NECP */
1699
1700 lck_mtx_destroy(&inp->inpcb_mtx, ipi->ipi_lock_grp);
1701 }
1702 /* makes sure we're not called twice from so_close */
1703 so->so_flags |= SOF_PCBCLEARING;
1704 so->so_saved_pcb = (caddr_t)inp;
1705 so->so_pcb = NULL;
1706 inp->inp_socket = NULL;
1707 #if CONFIG_MACF_NET
1708 mac_inpcb_label_destroy(inp);
1709 #endif /* CONFIG_MACF_NET */
1710 #if NECP
1711 necp_inpcb_dispose(inp);
1712 #endif /* NECP */
1713 /*
1714 * In case there a route cached after a detach (possible
1715 * in the tcp case), make sure that it is freed before
1716 * we deallocate the structure.
1717 */
1718 ROUTE_RELEASE(&inp->inp_route);
1719 if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) {
1720 zfree(ipi->ipi_zone, inp);
1721 }
1722 sodealloc(so);
1723 }
1724 }
1725
1726 /*
1727 * The calling convention of in_getsockaddr() and in_getpeeraddr() was
1728 * modified to match the pru_sockaddr() and pru_peeraddr() entry points
1729 * in struct pr_usrreqs, so that protocols can just reference then directly
1730 * without the need for a wrapper function.
1731 */
1732 int
1733 in_getsockaddr(struct socket *so, struct sockaddr **nam)
1734 {
1735 struct inpcb *inp;
1736 struct sockaddr_in *sin;
1737
1738 /*
1739 * Do the malloc first in case it blocks.
1740 */
1741 MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK);
1742 if (sin == NULL)
1743 return (ENOBUFS);
1744 bzero(sin, sizeof (*sin));
1745 sin->sin_family = AF_INET;
1746 sin->sin_len = sizeof (*sin);
1747
1748 if ((inp = sotoinpcb(so)) == NULL) {
1749 FREE(sin, M_SONAME);
1750 return (EINVAL);
1751 }
1752 sin->sin_port = inp->inp_lport;
1753 sin->sin_addr = inp->inp_laddr;
1754
1755 *nam = (struct sockaddr *)sin;
1756 return (0);
1757 }
1758
1759 int
1760 in_getsockaddr_s(struct socket *so, struct sockaddr_in *ss)
1761 {
1762 struct sockaddr_in *sin = ss;
1763 struct inpcb *inp;
1764
1765 VERIFY(ss != NULL);
1766 bzero(ss, sizeof (*ss));
1767
1768 sin->sin_family = AF_INET;
1769 sin->sin_len = sizeof (*sin);
1770
1771 if ((inp = sotoinpcb(so)) == NULL)
1772 return (EINVAL);
1773
1774 sin->sin_port = inp->inp_lport;
1775 sin->sin_addr = inp->inp_laddr;
1776 return (0);
1777 }
1778
1779 int
1780 in_getpeeraddr(struct socket *so, struct sockaddr **nam)
1781 {
1782 struct inpcb *inp;
1783 struct sockaddr_in *sin;
1784
1785 /*
1786 * Do the malloc first in case it blocks.
1787 */
1788 MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK);
1789 if (sin == NULL)
1790 return (ENOBUFS);
1791 bzero((caddr_t)sin, sizeof (*sin));
1792 sin->sin_family = AF_INET;
1793 sin->sin_len = sizeof (*sin);
1794
1795 if ((inp = sotoinpcb(so)) == NULL) {
1796 FREE(sin, M_SONAME);
1797 return (EINVAL);
1798 }
1799 sin->sin_port = inp->inp_fport;
1800 sin->sin_addr = inp->inp_faddr;
1801
1802 *nam = (struct sockaddr *)sin;
1803 return (0);
1804 }
1805
1806 void
1807 in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr,
1808 int errno, void (*notify)(struct inpcb *, int))
1809 {
1810 struct inpcb *inp;
1811
1812 lck_rw_lock_shared(pcbinfo->ipi_lock);
1813
1814 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1815 #if INET6
1816 if (!(inp->inp_vflag & INP_IPV4))
1817 continue;
1818 #endif /* INET6 */
1819 if (inp->inp_faddr.s_addr != faddr.s_addr ||
1820 inp->inp_socket == NULL)
1821 continue;
1822 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING)
1823 continue;
1824 socket_lock(inp->inp_socket, 1);
1825 (*notify)(inp, errno);
1826 (void) in_pcb_checkstate(inp, WNT_RELEASE, 1);
1827 socket_unlock(inp->inp_socket, 1);
1828 }
1829 lck_rw_done(pcbinfo->ipi_lock);
1830 }
1831
1832 /*
1833 * Check for alternatives when higher level complains
1834 * about service problems. For now, invalidate cached
1835 * routing information. If the route was created dynamically
1836 * (by a redirect), time to try a default gateway again.
1837 */
1838 void
1839 in_losing(struct inpcb *inp)
1840 {
1841 boolean_t release = FALSE;
1842 struct rtentry *rt;
1843
1844 if ((rt = inp->inp_route.ro_rt) != NULL) {
1845 struct in_ifaddr *ia = NULL;
1846
1847 RT_LOCK(rt);
1848 if (rt->rt_flags & RTF_DYNAMIC) {
1849 /*
1850 * Prevent another thread from modifying rt_key,
1851 * rt_gateway via rt_setgate() after rt_lock is
1852 * dropped by marking the route as defunct.
1853 */
1854 rt->rt_flags |= RTF_CONDEMNED;
1855 RT_UNLOCK(rt);
1856 (void) rtrequest(RTM_DELETE, rt_key(rt),
1857 rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL);
1858 } else {
1859 RT_UNLOCK(rt);
1860 }
1861 /* if the address is gone keep the old route in the pcb */
1862 if (inp->inp_laddr.s_addr != INADDR_ANY &&
1863 (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
1864 /*
1865 * Address is around; ditch the route. A new route
1866 * can be allocated the next time output is attempted.
1867 */
1868 release = TRUE;
1869 }
1870 if (ia != NULL)
1871 IFA_REMREF(&ia->ia_ifa);
1872 }
1873 if (rt == NULL || release)
1874 ROUTE_RELEASE(&inp->inp_route);
1875 }
1876
1877 /*
1878 * After a routing change, flush old routing
1879 * and allocate a (hopefully) better one.
1880 */
1881 void
1882 in_rtchange(struct inpcb *inp, int errno)
1883 {
1884 #pragma unused(errno)
1885 boolean_t release = FALSE;
1886 struct rtentry *rt;
1887
1888 if ((rt = inp->inp_route.ro_rt) != NULL) {
1889 struct in_ifaddr *ia = NULL;
1890
1891 /* if address is gone, keep the old route */
1892 if (inp->inp_laddr.s_addr != INADDR_ANY &&
1893 (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
1894 /*
1895 * Address is around; ditch the route. A new route
1896 * can be allocated the next time output is attempted.
1897 */
1898 release = TRUE;
1899 }
1900 if (ia != NULL)
1901 IFA_REMREF(&ia->ia_ifa);
1902 }
1903 if (rt == NULL || release)
1904 ROUTE_RELEASE(&inp->inp_route);
1905 }
1906
1907 /*
1908 * Lookup a PCB based on the local address and port.
1909 */
1910 struct inpcb *
1911 in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
1912 unsigned int lport_arg, int wild_okay)
1913 {
1914 struct inpcb *inp;
1915 int matchwild = 3, wildcard;
1916 u_short lport = lport_arg;
1917
1918 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0, 0, 0, 0, 0);
1919
1920 if (!wild_okay) {
1921 struct inpcbhead *head;
1922 /*
1923 * Look for an unconnected (wildcard foreign addr) PCB that
1924 * matches the local address and port we're looking for.
1925 */
1926 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
1927 pcbinfo->ipi_hashmask)];
1928 LIST_FOREACH(inp, head, inp_hash) {
1929 #if INET6
1930 if (!(inp->inp_vflag & INP_IPV4))
1931 continue;
1932 #endif /* INET6 */
1933 if (inp->inp_faddr.s_addr == INADDR_ANY &&
1934 inp->inp_laddr.s_addr == laddr.s_addr &&
1935 inp->inp_lport == lport) {
1936 /*
1937 * Found.
1938 */
1939 return (inp);
1940 }
1941 }
1942 /*
1943 * Not found.
1944 */
1945 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0, 0, 0, 0, 0);
1946 return (NULL);
1947 } else {
1948 struct inpcbporthead *porthash;
1949 struct inpcbport *phd;
1950 struct inpcb *match = NULL;
1951 /*
1952 * Best fit PCB lookup.
1953 *
1954 * First see if this local port is in use by looking on the
1955 * port hash list.
1956 */
1957 porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
1958 pcbinfo->ipi_porthashmask)];
1959 LIST_FOREACH(phd, porthash, phd_hash) {
1960 if (phd->phd_port == lport)
1961 break;
1962 }
1963 if (phd != NULL) {
1964 /*
1965 * Port is in use by one or more PCBs. Look for best
1966 * fit.
1967 */
1968 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
1969 wildcard = 0;
1970 #if INET6
1971 if (!(inp->inp_vflag & INP_IPV4))
1972 continue;
1973 #endif /* INET6 */
1974 if (inp->inp_faddr.s_addr != INADDR_ANY)
1975 wildcard++;
1976 if (inp->inp_laddr.s_addr != INADDR_ANY) {
1977 if (laddr.s_addr == INADDR_ANY)
1978 wildcard++;
1979 else if (inp->inp_laddr.s_addr !=
1980 laddr.s_addr)
1981 continue;
1982 } else {
1983 if (laddr.s_addr != INADDR_ANY)
1984 wildcard++;
1985 }
1986 if (wildcard < matchwild) {
1987 match = inp;
1988 matchwild = wildcard;
1989 if (matchwild == 0) {
1990 break;
1991 }
1992 }
1993 }
1994 }
1995 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match,
1996 0, 0, 0, 0);
1997 return (match);
1998 }
1999 }
2000
2001 /*
2002 * Check if PCB exists in hash list.
2003 */
2004 int
2005 in_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2006 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
2007 uid_t *uid, gid_t *gid, struct ifnet *ifp)
2008 {
2009 struct inpcbhead *head;
2010 struct inpcb *inp;
2011 u_short fport = fport_arg, lport = lport_arg;
2012 int found = 0;
2013 struct inpcb *local_wild = NULL;
2014 #if INET6
2015 struct inpcb *local_wild_mapped = NULL;
2016 #endif /* INET6 */
2017
2018 *uid = UID_MAX;
2019 *gid = GID_MAX;
2020
2021 /*
2022 * We may have found the pcb in the last lookup - check this first.
2023 */
2024
2025 lck_rw_lock_shared(pcbinfo->ipi_lock);
2026
2027 /*
2028 * First look for an exact match.
2029 */
2030 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
2031 pcbinfo->ipi_hashmask)];
2032 LIST_FOREACH(inp, head, inp_hash) {
2033 #if INET6
2034 if (!(inp->inp_vflag & INP_IPV4))
2035 continue;
2036 #endif /* INET6 */
2037 if (inp_restricted_recv(inp, ifp))
2038 continue;
2039
2040 if (inp->inp_faddr.s_addr == faddr.s_addr &&
2041 inp->inp_laddr.s_addr == laddr.s_addr &&
2042 inp->inp_fport == fport &&
2043 inp->inp_lport == lport) {
2044 if ((found = (inp->inp_socket != NULL))) {
2045 /*
2046 * Found.
2047 */
2048 *uid = kauth_cred_getuid(
2049 inp->inp_socket->so_cred);
2050 *gid = kauth_cred_getgid(
2051 inp->inp_socket->so_cred);
2052 }
2053 lck_rw_done(pcbinfo->ipi_lock);
2054 return (found);
2055 }
2056 }
2057
2058 if (!wildcard) {
2059 /*
2060 * Not found.
2061 */
2062 lck_rw_done(pcbinfo->ipi_lock);
2063 return (0);
2064 }
2065
2066 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
2067 pcbinfo->ipi_hashmask)];
2068 LIST_FOREACH(inp, head, inp_hash) {
2069 #if INET6
2070 if (!(inp->inp_vflag & INP_IPV4))
2071 continue;
2072 #endif /* INET6 */
2073 if (inp_restricted_recv(inp, ifp))
2074 continue;
2075
2076 if (inp->inp_faddr.s_addr == INADDR_ANY &&
2077 inp->inp_lport == lport) {
2078 if (inp->inp_laddr.s_addr == laddr.s_addr) {
2079 if ((found = (inp->inp_socket != NULL))) {
2080 *uid = kauth_cred_getuid(
2081 inp->inp_socket->so_cred);
2082 *gid = kauth_cred_getgid(
2083 inp->inp_socket->so_cred);
2084 }
2085 lck_rw_done(pcbinfo->ipi_lock);
2086 return (found);
2087 } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
2088 #if INET6
2089 if (inp->inp_socket &&
2090 SOCK_CHECK_DOM(inp->inp_socket, PF_INET6))
2091 local_wild_mapped = inp;
2092 else
2093 #endif /* INET6 */
2094 local_wild = inp;
2095 }
2096 }
2097 }
2098 if (local_wild == NULL) {
2099 #if INET6
2100 if (local_wild_mapped != NULL) {
2101 if ((found = (local_wild_mapped->inp_socket != NULL))) {
2102 *uid = kauth_cred_getuid(
2103 local_wild_mapped->inp_socket->so_cred);
2104 *gid = kauth_cred_getgid(
2105 local_wild_mapped->inp_socket->so_cred);
2106 }
2107 lck_rw_done(pcbinfo->ipi_lock);
2108 return (found);
2109 }
2110 #endif /* INET6 */
2111 lck_rw_done(pcbinfo->ipi_lock);
2112 return (0);
2113 }
2114 if ((found = (local_wild->inp_socket != NULL))) {
2115 *uid = kauth_cred_getuid(
2116 local_wild->inp_socket->so_cred);
2117 *gid = kauth_cred_getgid(
2118 local_wild->inp_socket->so_cred);
2119 }
2120 lck_rw_done(pcbinfo->ipi_lock);
2121 return (found);
2122 }
2123
2124 /*
2125 * Lookup PCB in hash list.
2126 */
2127 struct inpcb *
2128 in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2129 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
2130 struct ifnet *ifp)
2131 {
2132 struct inpcbhead *head;
2133 struct inpcb *inp;
2134 u_short fport = fport_arg, lport = lport_arg;
2135 struct inpcb *local_wild = NULL;
2136 #if INET6
2137 struct inpcb *local_wild_mapped = NULL;
2138 #endif /* INET6 */
2139
2140 /*
2141 * We may have found the pcb in the last lookup - check this first.
2142 */
2143
2144 lck_rw_lock_shared(pcbinfo->ipi_lock);
2145
2146 /*
2147 * First look for an exact match.
2148 */
2149 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
2150 pcbinfo->ipi_hashmask)];
2151 LIST_FOREACH(inp, head, inp_hash) {
2152 #if INET6
2153 if (!(inp->inp_vflag & INP_IPV4))
2154 continue;
2155 #endif /* INET6 */
2156 if (inp_restricted_recv(inp, ifp))
2157 continue;
2158
2159 if (inp->inp_faddr.s_addr == faddr.s_addr &&
2160 inp->inp_laddr.s_addr == laddr.s_addr &&
2161 inp->inp_fport == fport &&
2162 inp->inp_lport == lport) {
2163 /*
2164 * Found.
2165 */
2166 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
2167 WNT_STOPUSING) {
2168 lck_rw_done(pcbinfo->ipi_lock);
2169 return (inp);
2170 } else {
2171 /* it's there but dead, say it isn't found */
2172 lck_rw_done(pcbinfo->ipi_lock);
2173 return (NULL);
2174 }
2175 }
2176 }
2177
2178 if (!wildcard) {
2179 /*
2180 * Not found.
2181 */
2182 lck_rw_done(pcbinfo->ipi_lock);
2183 return (NULL);
2184 }
2185
2186 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
2187 pcbinfo->ipi_hashmask)];
2188 LIST_FOREACH(inp, head, inp_hash) {
2189 #if INET6
2190 if (!(inp->inp_vflag & INP_IPV4))
2191 continue;
2192 #endif /* INET6 */
2193 if (inp_restricted_recv(inp, ifp))
2194 continue;
2195
2196 if (inp->inp_faddr.s_addr == INADDR_ANY &&
2197 inp->inp_lport == lport) {
2198 if (inp->inp_laddr.s_addr == laddr.s_addr) {
2199 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
2200 WNT_STOPUSING) {
2201 lck_rw_done(pcbinfo->ipi_lock);
2202 return (inp);
2203 } else {
2204 /* it's dead; say it isn't found */
2205 lck_rw_done(pcbinfo->ipi_lock);
2206 return (NULL);
2207 }
2208 } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
2209 #if INET6
2210 if (SOCK_CHECK_DOM(inp->inp_socket, PF_INET6))
2211 local_wild_mapped = inp;
2212 else
2213 #endif /* INET6 */
2214 local_wild = inp;
2215 }
2216 }
2217 }
2218 if (local_wild == NULL) {
2219 #if INET6
2220 if (local_wild_mapped != NULL) {
2221 if (in_pcb_checkstate(local_wild_mapped,
2222 WNT_ACQUIRE, 0) != WNT_STOPUSING) {
2223 lck_rw_done(pcbinfo->ipi_lock);
2224 return (local_wild_mapped);
2225 } else {
2226 /* it's dead; say it isn't found */
2227 lck_rw_done(pcbinfo->ipi_lock);
2228 return (NULL);
2229 }
2230 }
2231 #endif /* INET6 */
2232 lck_rw_done(pcbinfo->ipi_lock);
2233 return (NULL);
2234 }
2235 if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
2236 lck_rw_done(pcbinfo->ipi_lock);
2237 return (local_wild);
2238 }
2239 /*
2240 * It's either not found or is already dead.
2241 */
2242 lck_rw_done(pcbinfo->ipi_lock);
2243 return (NULL);
2244 }
2245
2246 /*
2247 * @brief Insert PCB onto various hash lists.
2248 *
2249 * @param inp Pointer to internet protocol control block
2250 * @param locked Implies if ipi_lock (protecting pcb list)
2251 * is already locked or not.
2252 *
2253 * @return int error on failure and 0 on success
2254 */
2255 int
2256 in_pcbinshash(struct inpcb *inp, int locked)
2257 {
2258 struct inpcbhead *pcbhash;
2259 struct inpcbporthead *pcbporthash;
2260 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
2261 struct inpcbport *phd;
2262 u_int32_t hashkey_faddr;
2263
2264 if (!locked) {
2265 if (!lck_rw_try_lock_exclusive(pcbinfo->ipi_lock)) {
2266 /*
2267 * Lock inversion issue, mostly with udp
2268 * multicast packets
2269 */
2270 socket_unlock(inp->inp_socket, 0);
2271 lck_rw_lock_exclusive(pcbinfo->ipi_lock);
2272 socket_lock(inp->inp_socket, 0);
2273 }
2274 }
2275
2276 /*
2277 * This routine or its caller may have given up
2278 * socket's protocol lock briefly.
2279 * During that time the socket may have been dropped.
2280 * Safe-guarding against that.
2281 */
2282 if (inp->inp_state == INPCB_STATE_DEAD) {
2283 if (!locked) {
2284 lck_rw_done(pcbinfo->ipi_lock);
2285 }
2286 return (ECONNABORTED);
2287 }
2288
2289
2290 #if INET6
2291 if (inp->inp_vflag & INP_IPV6)
2292 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
2293 else
2294 #endif /* INET6 */
2295 hashkey_faddr = inp->inp_faddr.s_addr;
2296
2297 inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
2298 inp->inp_fport, pcbinfo->ipi_hashmask);
2299
2300 pcbhash = &pcbinfo->ipi_hashbase[inp->inp_hash_element];
2301
2302 pcbporthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(inp->inp_lport,
2303 pcbinfo->ipi_porthashmask)];
2304
2305 /*
2306 * Go through port list and look for a head for this lport.
2307 */
2308 LIST_FOREACH(phd, pcbporthash, phd_hash) {
2309 if (phd->phd_port == inp->inp_lport)
2310 break;
2311 }
2312
2313 /*
2314 * If none exists, malloc one and tack it on.
2315 */
2316 if (phd == NULL) {
2317 MALLOC(phd, struct inpcbport *, sizeof (struct inpcbport),
2318 M_PCB, M_WAITOK);
2319 if (phd == NULL) {
2320 if (!locked)
2321 lck_rw_done(pcbinfo->ipi_lock);
2322 return (ENOBUFS); /* XXX */
2323 }
2324 phd->phd_port = inp->inp_lport;
2325 LIST_INIT(&phd->phd_pcblist);
2326 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
2327 }
2328
2329 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2330
2331
2332 inp->inp_phd = phd;
2333 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
2334 LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
2335 inp->inp_flags2 |= INP2_INHASHLIST;
2336
2337 if (!locked)
2338 lck_rw_done(pcbinfo->ipi_lock);
2339
2340 #if NECP
2341 // This call catches the original setting of the local address
2342 inp_update_necp_policy(inp, NULL, NULL, 0);
2343 #endif /* NECP */
2344
2345 return (0);
2346 }
2347
2348 /*
2349 * Move PCB to the proper hash bucket when { faddr, fport } have been
2350 * changed. NOTE: This does not handle the case of the lport changing (the
2351 * hashed port list would have to be updated as well), so the lport must
2352 * not change after in_pcbinshash() has been called.
2353 */
2354 void
2355 in_pcbrehash(struct inpcb *inp)
2356 {
2357 struct inpcbhead *head;
2358 u_int32_t hashkey_faddr;
2359
2360 #if INET6
2361 if (inp->inp_vflag & INP_IPV6)
2362 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
2363 else
2364 #endif /* INET6 */
2365 hashkey_faddr = inp->inp_faddr.s_addr;
2366
2367 inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
2368 inp->inp_fport, inp->inp_pcbinfo->ipi_hashmask);
2369 head = &inp->inp_pcbinfo->ipi_hashbase[inp->inp_hash_element];
2370
2371 if (inp->inp_flags2 & INP2_INHASHLIST) {
2372 LIST_REMOVE(inp, inp_hash);
2373 inp->inp_flags2 &= ~INP2_INHASHLIST;
2374 }
2375
2376 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2377 LIST_INSERT_HEAD(head, inp, inp_hash);
2378 inp->inp_flags2 |= INP2_INHASHLIST;
2379
2380 #if NECP
2381 // This call catches updates to the remote addresses
2382 inp_update_necp_policy(inp, NULL, NULL, 0);
2383 #endif /* NECP */
2384 }
2385
2386 /*
2387 * Remove PCB from various lists.
2388 * Must be called pcbinfo lock is held in exclusive mode.
2389 */
2390 void
2391 in_pcbremlists(struct inpcb *inp)
2392 {
2393 inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
2394
2395 /*
2396 * Check if it's in hashlist -- an inp is placed in hashlist when
2397 * it's local port gets assigned. So it should also be present
2398 * in the port list.
2399 */
2400 if (inp->inp_flags2 & INP2_INHASHLIST) {
2401 struct inpcbport *phd = inp->inp_phd;
2402
2403 VERIFY(phd != NULL && inp->inp_lport > 0);
2404
2405 LIST_REMOVE(inp, inp_hash);
2406 inp->inp_hash.le_next = NULL;
2407 inp->inp_hash.le_prev = NULL;
2408
2409 LIST_REMOVE(inp, inp_portlist);
2410 inp->inp_portlist.le_next = NULL;
2411 inp->inp_portlist.le_prev = NULL;
2412 if (LIST_EMPTY(&phd->phd_pcblist)) {
2413 LIST_REMOVE(phd, phd_hash);
2414 FREE(phd, M_PCB);
2415 }
2416 inp->inp_phd = NULL;
2417 inp->inp_flags2 &= ~INP2_INHASHLIST;
2418 }
2419 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2420
2421 if (inp->inp_flags2 & INP2_TIMEWAIT) {
2422 /* Remove from time-wait queue */
2423 tcp_remove_from_time_wait(inp);
2424 inp->inp_flags2 &= ~INP2_TIMEWAIT;
2425 VERIFY(inp->inp_pcbinfo->ipi_twcount != 0);
2426 inp->inp_pcbinfo->ipi_twcount--;
2427 } else {
2428 /* Remove from global inp list if it is not time-wait */
2429 LIST_REMOVE(inp, inp_list);
2430 }
2431
2432 if (inp->inp_flags2 & INP2_IN_FCTREE) {
2433 inp_fc_getinp(inp->inp_flowhash, (INPFC_SOLOCKED|INPFC_REMOVE));
2434 VERIFY(!(inp->inp_flags2 & INP2_IN_FCTREE));
2435 }
2436
2437 inp->inp_pcbinfo->ipi_count--;
2438 }
2439
2440 /*
2441 * Mechanism used to defer the memory release of PCBs
2442 * The pcb list will contain the pcb until the reaper can clean it up if
2443 * the following conditions are met:
2444 * 1) state "DEAD",
2445 * 2) wantcnt is STOPUSING
2446 * 3) usecount is 0
2447 * This function will be called to either mark the pcb as
2448 */
2449 int
2450 in_pcb_checkstate(struct inpcb *pcb, int mode, int locked)
2451 {
2452 volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt;
2453 UInt32 origwant;
2454 UInt32 newwant;
2455
2456 switch (mode) {
2457 case WNT_STOPUSING:
2458 /*
2459 * Try to mark the pcb as ready for recycling. CAS with
2460 * STOPUSING, if success we're good, if it's in use, will
2461 * be marked later
2462 */
2463 if (locked == 0)
2464 socket_lock(pcb->inp_socket, 1);
2465 pcb->inp_state = INPCB_STATE_DEAD;
2466
2467 stopusing:
2468 if (pcb->inp_socket->so_usecount < 0) {
2469 panic("%s: pcb=%p so=%p usecount is negative\n",
2470 __func__, pcb, pcb->inp_socket);
2471 /* NOTREACHED */
2472 }
2473 if (locked == 0)
2474 socket_unlock(pcb->inp_socket, 1);
2475
2476 inpcb_gc_sched(pcb->inp_pcbinfo, INPCB_TIMER_FAST);
2477
2478 origwant = *wantcnt;
2479 if ((UInt16) origwant == 0xffff) /* should stop using */
2480 return (WNT_STOPUSING);
2481 newwant = 0xffff;
2482 if ((UInt16) origwant == 0) {
2483 /* try to mark it as unsuable now */
2484 OSCompareAndSwap(origwant, newwant, wantcnt);
2485 }
2486 return (WNT_STOPUSING);
2487
2488 case WNT_ACQUIRE:
2489 /*
2490 * Try to increase reference to pcb. If WNT_STOPUSING
2491 * should bail out. If socket state DEAD, try to set count
2492 * to STOPUSING, return failed otherwise increase cnt.
2493 */
2494 do {
2495 origwant = *wantcnt;
2496 if ((UInt16) origwant == 0xffff) {
2497 /* should stop using */
2498 return (WNT_STOPUSING);
2499 }
2500 newwant = origwant + 1;
2501 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
2502 return (WNT_ACQUIRE);
2503
2504 case WNT_RELEASE:
2505 /*
2506 * Release reference. If result is null and pcb state
2507 * is DEAD, set wanted bit to STOPUSING
2508 */
2509 if (locked == 0)
2510 socket_lock(pcb->inp_socket, 1);
2511
2512 do {
2513 origwant = *wantcnt;
2514 if ((UInt16) origwant == 0x0) {
2515 panic("%s: pcb=%p release with zero count",
2516 __func__, pcb);
2517 /* NOTREACHED */
2518 }
2519 if ((UInt16) origwant == 0xffff) {
2520 /* should stop using */
2521 if (locked == 0)
2522 socket_unlock(pcb->inp_socket, 1);
2523 return (WNT_STOPUSING);
2524 }
2525 newwant = origwant - 1;
2526 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
2527
2528 if (pcb->inp_state == INPCB_STATE_DEAD)
2529 goto stopusing;
2530 if (pcb->inp_socket->so_usecount < 0) {
2531 panic("%s: RELEASE pcb=%p so=%p usecount is negative\n",
2532 __func__, pcb, pcb->inp_socket);
2533 /* NOTREACHED */
2534 }
2535
2536 if (locked == 0)
2537 socket_unlock(pcb->inp_socket, 1);
2538 return (WNT_RELEASE);
2539
2540 default:
2541 panic("%s: so=%p not a valid state =%x\n", __func__,
2542 pcb->inp_socket, mode);
2543 /* NOTREACHED */
2544 }
2545
2546 /* NOTREACHED */
2547 return (mode);
2548 }
2549
2550 /*
2551 * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat.
2552 * The inpcb_compat data structure is passed to user space and must
2553 * not change. We intentionally avoid copying pointers.
2554 */
2555 void
2556 inpcb_to_compat(struct inpcb *inp, struct inpcb_compat *inp_compat)
2557 {
2558 bzero(inp_compat, sizeof (*inp_compat));
2559 inp_compat->inp_fport = inp->inp_fport;
2560 inp_compat->inp_lport = inp->inp_lport;
2561 inp_compat->nat_owner = 0;
2562 inp_compat->nat_cookie = 0;
2563 inp_compat->inp_gencnt = inp->inp_gencnt;
2564 inp_compat->inp_flags = inp->inp_flags;
2565 inp_compat->inp_flow = inp->inp_flow;
2566 inp_compat->inp_vflag = inp->inp_vflag;
2567 inp_compat->inp_ip_ttl = inp->inp_ip_ttl;
2568 inp_compat->inp_ip_p = inp->inp_ip_p;
2569 inp_compat->inp_dependfaddr.inp6_foreign =
2570 inp->inp_dependfaddr.inp6_foreign;
2571 inp_compat->inp_dependladdr.inp6_local =
2572 inp->inp_dependladdr.inp6_local;
2573 inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
2574 inp_compat->inp_depend6.inp6_hlim = 0;
2575 inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
2576 inp_compat->inp_depend6.inp6_ifindex = 0;
2577 inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
2578 }
2579
2580 #if !CONFIG_EMBEDDED
2581 void
2582 inpcb_to_xinpcb64(struct inpcb *inp, struct xinpcb64 *xinp)
2583 {
2584 xinp->inp_fport = inp->inp_fport;
2585 xinp->inp_lport = inp->inp_lport;
2586 xinp->inp_gencnt = inp->inp_gencnt;
2587 xinp->inp_flags = inp->inp_flags;
2588 xinp->inp_flow = inp->inp_flow;
2589 xinp->inp_vflag = inp->inp_vflag;
2590 xinp->inp_ip_ttl = inp->inp_ip_ttl;
2591 xinp->inp_ip_p = inp->inp_ip_p;
2592 xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign;
2593 xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local;
2594 xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
2595 xinp->inp_depend6.inp6_hlim = 0;
2596 xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
2597 xinp->inp_depend6.inp6_ifindex = 0;
2598 xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
2599 }
2600 #endif /* !CONFIG_EMBEDDED */
2601
2602 /*
2603 * The following routines implement this scheme:
2604 *
2605 * Callers of ip_output() that intend to cache the route in the inpcb pass
2606 * a local copy of the struct route to ip_output(). Using a local copy of
2607 * the cached route significantly simplifies things as IP no longer has to
2608 * worry about having exclusive access to the passed in struct route, since
2609 * it's defined in the caller's stack; in essence, this allows for a lock-
2610 * less operation when updating the struct route at the IP level and below,
2611 * whenever necessary. The scheme works as follows:
2612 *
2613 * Prior to dropping the socket's lock and calling ip_output(), the caller
2614 * copies the struct route from the inpcb into its stack, and adds a reference
2615 * to the cached route entry, if there was any. The socket's lock is then
2616 * dropped and ip_output() is called with a pointer to the copy of struct
2617 * route defined on the stack (not to the one in the inpcb.)
2618 *
2619 * Upon returning from ip_output(), the caller then acquires the socket's
2620 * lock and synchronizes the cache; if there is no route cached in the inpcb,
2621 * it copies the local copy of struct route (which may or may not contain any
2622 * route) back into the cache; otherwise, if the inpcb has a route cached in
2623 * it, the one in the local copy will be freed, if there's any. Trashing the
2624 * cached route in the inpcb can be avoided because ip_output() is single-
2625 * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized
2626 * by the socket/transport layer.)
2627 */
2628 void
2629 inp_route_copyout(struct inpcb *inp, struct route *dst)
2630 {
2631 struct route *src = &inp->inp_route;
2632
2633 socket_lock_assert_owned(inp->inp_socket);
2634
2635 /*
2636 * If the route in the PCB is stale or not for IPv4, blow it away;
2637 * this is possible in the case of IPv4-mapped address case.
2638 */
2639 if (ROUTE_UNUSABLE(src) || rt_key(src->ro_rt)->sa_family != AF_INET)
2640 ROUTE_RELEASE(src);
2641
2642 route_copyout(dst, src, sizeof (*dst));
2643 }
2644
2645 void
2646 inp_route_copyin(struct inpcb *inp, struct route *src)
2647 {
2648 struct route *dst = &inp->inp_route;
2649
2650 socket_lock_assert_owned(inp->inp_socket);
2651
2652 /* Minor sanity check */
2653 if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET)
2654 panic("%s: wrong or corrupted route: %p", __func__, src);
2655
2656 route_copyin(src, dst, sizeof (*src));
2657 }
2658
2659 /*
2660 * Handler for setting IP_BOUND_IF/IPV6_BOUND_IF socket option.
2661 */
2662 int
2663 inp_bindif(struct inpcb *inp, unsigned int ifscope, struct ifnet **pifp)
2664 {
2665 struct ifnet *ifp = NULL;
2666
2667 ifnet_head_lock_shared();
2668 if ((ifscope > (unsigned)if_index) || (ifscope != IFSCOPE_NONE &&
2669 (ifp = ifindex2ifnet[ifscope]) == NULL)) {
2670 ifnet_head_done();
2671 return (ENXIO);
2672 }
2673 ifnet_head_done();
2674
2675 VERIFY(ifp != NULL || ifscope == IFSCOPE_NONE);
2676
2677 /*
2678 * A zero interface scope value indicates an "unbind".
2679 * Otherwise, take in whatever value the app desires;
2680 * the app may already know the scope (or force itself
2681 * to such a scope) ahead of time before the interface
2682 * gets attached. It doesn't matter either way; any
2683 * route lookup from this point on will require an
2684 * exact match for the embedded interface scope.
2685 */
2686 inp->inp_boundifp = ifp;
2687 if (inp->inp_boundifp == NULL)
2688 inp->inp_flags &= ~INP_BOUND_IF;
2689 else
2690 inp->inp_flags |= INP_BOUND_IF;
2691
2692 /* Blow away any cached route in the PCB */
2693 ROUTE_RELEASE(&inp->inp_route);
2694
2695 if (pifp != NULL)
2696 *pifp = ifp;
2697
2698 return (0);
2699 }
2700
2701 /*
2702 * Handler for setting IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
2703 * as well as for setting PROC_UUID_NO_CELLULAR policy.
2704 */
2705 void
2706 inp_set_nocellular(struct inpcb *inp)
2707 {
2708 inp->inp_flags |= INP_NO_IFT_CELLULAR;
2709
2710 /* Blow away any cached route in the PCB */
2711 ROUTE_RELEASE(&inp->inp_route);
2712 }
2713
2714 /*
2715 * Handler for clearing IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
2716 * as well as for clearing PROC_UUID_NO_CELLULAR policy.
2717 */
2718 void
2719 inp_clear_nocellular(struct inpcb *inp)
2720 {
2721 struct socket *so = inp->inp_socket;
2722
2723 /*
2724 * SO_RESTRICT_DENY_CELLULAR socket restriction issued on the socket
2725 * has a higher precendence than INP_NO_IFT_CELLULAR. Clear the flag
2726 * if and only if the socket is unrestricted.
2727 */
2728 if (so != NULL && !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) {
2729 inp->inp_flags &= ~INP_NO_IFT_CELLULAR;
2730
2731 /* Blow away any cached route in the PCB */
2732 ROUTE_RELEASE(&inp->inp_route);
2733 }
2734 }
2735
2736 void
2737 inp_set_noexpensive(struct inpcb *inp)
2738 {
2739 inp->inp_flags2 |= INP2_NO_IFF_EXPENSIVE;
2740
2741 /* Blow away any cached route in the PCB */
2742 ROUTE_RELEASE(&inp->inp_route);
2743 }
2744
2745 void
2746 inp_set_awdl_unrestricted(struct inpcb *inp)
2747 {
2748 inp->inp_flags2 |= INP2_AWDL_UNRESTRICTED;
2749
2750 /* Blow away any cached route in the PCB */
2751 ROUTE_RELEASE(&inp->inp_route);
2752 }
2753
2754 boolean_t
2755 inp_get_awdl_unrestricted(struct inpcb *inp)
2756 {
2757 return (inp->inp_flags2 & INP2_AWDL_UNRESTRICTED) ? TRUE : FALSE;
2758 }
2759
2760 void
2761 inp_clear_awdl_unrestricted(struct inpcb *inp)
2762 {
2763 inp->inp_flags2 &= ~INP2_AWDL_UNRESTRICTED;
2764
2765 /* Blow away any cached route in the PCB */
2766 ROUTE_RELEASE(&inp->inp_route);
2767 }
2768
2769 void
2770 inp_set_intcoproc_allowed(struct inpcb *inp)
2771 {
2772 inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED;
2773
2774 /* Blow away any cached route in the PCB */
2775 ROUTE_RELEASE(&inp->inp_route);
2776 }
2777
2778 boolean_t
2779 inp_get_intcoproc_allowed(struct inpcb *inp)
2780 {
2781 return (inp->inp_flags2 & INP2_INTCOPROC_ALLOWED) ? TRUE : FALSE;
2782 }
2783
2784 void
2785 inp_clear_intcoproc_allowed(struct inpcb *inp)
2786 {
2787 inp->inp_flags2 &= ~INP2_INTCOPROC_ALLOWED;
2788
2789 /* Blow away any cached route in the PCB */
2790 ROUTE_RELEASE(&inp->inp_route);
2791 }
2792
2793 #if NECP
2794 /*
2795 * Called when PROC_UUID_NECP_APP_POLICY is set.
2796 */
2797 void
2798 inp_set_want_app_policy(struct inpcb *inp)
2799 {
2800 inp->inp_flags2 |= INP2_WANT_APP_POLICY;
2801 }
2802
2803 /*
2804 * Called when PROC_UUID_NECP_APP_POLICY is cleared.
2805 */
2806 void
2807 inp_clear_want_app_policy(struct inpcb *inp)
2808 {
2809 inp->inp_flags2 &= ~INP2_WANT_APP_POLICY;
2810 }
2811 #endif /* NECP */
2812
2813 /*
2814 * Calculate flow hash for an inp, used by an interface to identify a
2815 * flow. When an interface provides flow control advisory, this flow
2816 * hash is used as an identifier.
2817 */
2818 u_int32_t
2819 inp_calc_flowhash(struct inpcb *inp)
2820 {
2821 struct inp_flowhash_key fh __attribute__((aligned(8)));
2822 u_int32_t flowhash = 0;
2823 struct inpcb *tmp_inp = NULL;
2824
2825 if (inp_hash_seed == 0)
2826 inp_hash_seed = RandomULong();
2827
2828 bzero(&fh, sizeof (fh));
2829
2830 bcopy(&inp->inp_dependladdr, &fh.infh_laddr, sizeof (fh.infh_laddr));
2831 bcopy(&inp->inp_dependfaddr, &fh.infh_faddr, sizeof (fh.infh_faddr));
2832
2833 fh.infh_lport = inp->inp_lport;
2834 fh.infh_fport = inp->inp_fport;
2835 fh.infh_af = (inp->inp_vflag & INP_IPV6) ? AF_INET6 : AF_INET;
2836 fh.infh_proto = inp->inp_ip_p;
2837 fh.infh_rand1 = RandomULong();
2838 fh.infh_rand2 = RandomULong();
2839
2840 try_again:
2841 flowhash = net_flowhash(&fh, sizeof (fh), inp_hash_seed);
2842 if (flowhash == 0) {
2843 /* try to get a non-zero flowhash */
2844 inp_hash_seed = RandomULong();
2845 goto try_again;
2846 }
2847
2848 inp->inp_flowhash = flowhash;
2849
2850 /* Insert the inp into inp_fc_tree */
2851 lck_mtx_lock_spin(&inp_fc_lck);
2852 tmp_inp = RB_FIND(inp_fc_tree, &inp_fc_tree, inp);
2853 if (tmp_inp != NULL) {
2854 /*
2855 * There is a different inp with the same flowhash.
2856 * There can be a collision on flow hash but the
2857 * probability is low. Let's recompute the
2858 * flowhash.
2859 */
2860 lck_mtx_unlock(&inp_fc_lck);
2861 /* recompute hash seed */
2862 inp_hash_seed = RandomULong();
2863 goto try_again;
2864 }
2865
2866 RB_INSERT(inp_fc_tree, &inp_fc_tree, inp);
2867 inp->inp_flags2 |= INP2_IN_FCTREE;
2868 lck_mtx_unlock(&inp_fc_lck);
2869
2870 return (flowhash);
2871 }
2872
2873 void
2874 inp_flowadv(uint32_t flowhash)
2875 {
2876 struct inpcb *inp;
2877
2878 inp = inp_fc_getinp(flowhash, 0);
2879
2880 if (inp == NULL)
2881 return;
2882 inp_fc_feedback(inp);
2883 }
2884
2885 /*
2886 * Function to compare inp_fc_entries in inp flow control tree
2887 */
2888 static inline int
2889 infc_cmp(const struct inpcb *inp1, const struct inpcb *inp2)
2890 {
2891 return (memcmp(&(inp1->inp_flowhash), &(inp2->inp_flowhash),
2892 sizeof(inp1->inp_flowhash)));
2893 }
2894
2895 static struct inpcb *
2896 inp_fc_getinp(u_int32_t flowhash, u_int32_t flags)
2897 {
2898 struct inpcb *inp = NULL;
2899 int locked = (flags & INPFC_SOLOCKED) ? 1 : 0;
2900
2901 lck_mtx_lock_spin(&inp_fc_lck);
2902 key_inp.inp_flowhash = flowhash;
2903 inp = RB_FIND(inp_fc_tree, &inp_fc_tree, &key_inp);
2904 if (inp == NULL) {
2905 /* inp is not present, return */
2906 lck_mtx_unlock(&inp_fc_lck);
2907 return (NULL);
2908 }
2909
2910 if (flags & INPFC_REMOVE) {
2911 RB_REMOVE(inp_fc_tree, &inp_fc_tree, inp);
2912 lck_mtx_unlock(&inp_fc_lck);
2913
2914 bzero(&(inp->infc_link), sizeof (inp->infc_link));
2915 inp->inp_flags2 &= ~INP2_IN_FCTREE;
2916 return (NULL);
2917 }
2918
2919 if (in_pcb_checkstate(inp, WNT_ACQUIRE, locked) == WNT_STOPUSING)
2920 inp = NULL;
2921 lck_mtx_unlock(&inp_fc_lck);
2922
2923 return (inp);
2924 }
2925
2926 static void
2927 inp_fc_feedback(struct inpcb *inp)
2928 {
2929 struct socket *so = inp->inp_socket;
2930
2931 /* we already hold a want_cnt on this inp, socket can't be null */
2932 VERIFY(so != NULL);
2933 socket_lock(so, 1);
2934
2935 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
2936 socket_unlock(so, 1);
2937 return;
2938 }
2939
2940 if (inp->inp_sndinprog_cnt > 0)
2941 inp->inp_flags |= INP_FC_FEEDBACK;
2942
2943 /*
2944 * Return if the connection is not in flow-controlled state.
2945 * This can happen if the connection experienced
2946 * loss while it was in flow controlled state
2947 */
2948 if (!INP_WAIT_FOR_IF_FEEDBACK(inp)) {
2949 socket_unlock(so, 1);
2950 return;
2951 }
2952 inp_reset_fc_state(inp);
2953
2954 if (SOCK_TYPE(so) == SOCK_STREAM)
2955 inp_fc_unthrottle_tcp(inp);
2956
2957 socket_unlock(so, 1);
2958 }
2959
2960 void
2961 inp_reset_fc_state(struct inpcb *inp)
2962 {
2963 struct socket *so = inp->inp_socket;
2964 int suspended = (INP_IS_FLOW_SUSPENDED(inp)) ? 1 : 0;
2965 int needwakeup = (INP_WAIT_FOR_IF_FEEDBACK(inp)) ? 1 : 0;
2966
2967 inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
2968
2969 if (suspended) {
2970 so->so_flags &= ~(SOF_SUSPENDED);
2971 soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_RESUME));
2972 }
2973
2974 /* Give a write wakeup to unblock the socket */
2975 if (needwakeup)
2976 sowwakeup(so);
2977 }
2978
2979 int
2980 inp_set_fc_state(struct inpcb *inp, int advcode)
2981 {
2982 struct inpcb *tmp_inp = NULL;
2983 /*
2984 * If there was a feedback from the interface when
2985 * send operation was in progress, we should ignore
2986 * this flow advisory to avoid a race between setting
2987 * flow controlled state and receiving feedback from
2988 * the interface
2989 */
2990 if (inp->inp_flags & INP_FC_FEEDBACK)
2991 return (0);
2992
2993 inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
2994 if ((tmp_inp = inp_fc_getinp(inp->inp_flowhash,
2995 INPFC_SOLOCKED)) != NULL) {
2996 if (in_pcb_checkstate(tmp_inp, WNT_RELEASE, 1) == WNT_STOPUSING)
2997 return (0);
2998 VERIFY(tmp_inp == inp);
2999 switch (advcode) {
3000 case FADV_FLOW_CONTROLLED:
3001 inp->inp_flags |= INP_FLOW_CONTROLLED;
3002 break;
3003 case FADV_SUSPENDED:
3004 inp->inp_flags |= INP_FLOW_SUSPENDED;
3005 soevent(inp->inp_socket,
3006 (SO_FILT_HINT_LOCKED | SO_FILT_HINT_SUSPEND));
3007
3008 /* Record the fact that suspend event was sent */
3009 inp->inp_socket->so_flags |= SOF_SUSPENDED;
3010 break;
3011 }
3012 return (1);
3013 }
3014 return (0);
3015 }
3016
3017 /*
3018 * Handler for SO_FLUSH socket option.
3019 */
3020 int
3021 inp_flush(struct inpcb *inp, int optval)
3022 {
3023 u_int32_t flowhash = inp->inp_flowhash;
3024 struct ifnet *rtifp, *oifp;
3025
3026 /* Either all classes or one of the valid ones */
3027 if (optval != SO_TC_ALL && !SO_VALID_TC(optval))
3028 return (EINVAL);
3029
3030 /* We need a flow hash for identification */
3031 if (flowhash == 0)
3032 return (0);
3033
3034 /* Grab the interfaces from the route and pcb */
3035 rtifp = ((inp->inp_route.ro_rt != NULL) ?
3036 inp->inp_route.ro_rt->rt_ifp : NULL);
3037 oifp = inp->inp_last_outifp;
3038
3039 if (rtifp != NULL)
3040 if_qflush_sc(rtifp, so_tc2msc(optval), flowhash, NULL, NULL, 0);
3041 if (oifp != NULL && oifp != rtifp)
3042 if_qflush_sc(oifp, so_tc2msc(optval), flowhash, NULL, NULL, 0);
3043
3044 return (0);
3045 }
3046
3047 /*
3048 * Clear the INP_INADDR_ANY flag (special case for PPP only)
3049 */
3050 void
3051 inp_clear_INP_INADDR_ANY(struct socket *so)
3052 {
3053 struct inpcb *inp = NULL;
3054
3055 socket_lock(so, 1);
3056 inp = sotoinpcb(so);
3057 if (inp) {
3058 inp->inp_flags &= ~INP_INADDR_ANY;
3059 }
3060 socket_unlock(so, 1);
3061 }
3062
3063 void
3064 inp_get_soprocinfo(struct inpcb *inp, struct so_procinfo *soprocinfo)
3065 {
3066 struct socket *so = inp->inp_socket;
3067
3068 soprocinfo->spi_pid = so->last_pid;
3069 if (so->last_pid != 0)
3070 uuid_copy(soprocinfo->spi_uuid, so->last_uuid);
3071 /*
3072 * When not delegated, the effective pid is the same as the real pid
3073 */
3074 if (so->so_flags & SOF_DELEGATED) {
3075 soprocinfo->spi_delegated = 1;
3076 soprocinfo->spi_epid = so->e_pid;
3077 uuid_copy(soprocinfo->spi_euuid, so->e_uuid);
3078 } else {
3079 soprocinfo->spi_delegated = 0;
3080 soprocinfo->spi_epid = so->last_pid;
3081 }
3082 }
3083
3084 int
3085 inp_findinpcb_procinfo(struct inpcbinfo *pcbinfo, uint32_t flowhash,
3086 struct so_procinfo *soprocinfo)
3087 {
3088 struct inpcb *inp = NULL;
3089 int found = 0;
3090
3091 bzero(soprocinfo, sizeof (struct so_procinfo));
3092
3093 if (!flowhash)
3094 return (-1);
3095
3096 lck_rw_lock_shared(pcbinfo->ipi_lock);
3097 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
3098 if (inp->inp_state != INPCB_STATE_DEAD &&
3099 inp->inp_socket != NULL &&
3100 inp->inp_flowhash == flowhash) {
3101 found = 1;
3102 inp_get_soprocinfo(inp, soprocinfo);
3103 break;
3104 }
3105 }
3106 lck_rw_done(pcbinfo->ipi_lock);
3107
3108 return (found);
3109 }
3110
3111 #if CONFIG_PROC_UUID_POLICY
3112 static void
3113 inp_update_cellular_policy(struct inpcb *inp, boolean_t set)
3114 {
3115 struct socket *so = inp->inp_socket;
3116 int before, after;
3117
3118 VERIFY(so != NULL);
3119 VERIFY(inp->inp_state != INPCB_STATE_DEAD);
3120
3121 before = INP_NO_CELLULAR(inp);
3122 if (set) {
3123 inp_set_nocellular(inp);
3124 } else {
3125 inp_clear_nocellular(inp);
3126 }
3127 after = INP_NO_CELLULAR(inp);
3128 if (net_io_policy_log && (before != after)) {
3129 static const char *ok = "OK";
3130 static const char *nok = "NOACCESS";
3131 uuid_string_t euuid_buf;
3132 pid_t epid;
3133
3134 if (so->so_flags & SOF_DELEGATED) {
3135 uuid_unparse(so->e_uuid, euuid_buf);
3136 epid = so->e_pid;
3137 } else {
3138 uuid_unparse(so->last_uuid, euuid_buf);
3139 epid = so->last_pid;
3140 }
3141
3142 /* allow this socket to generate another notification event */
3143 so->so_ifdenied_notifies = 0;
3144
3145 log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d "
3146 "euuid %s%s %s->%s\n", __func__,
3147 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
3148 SOCK_TYPE(so), epid, euuid_buf,
3149 (so->so_flags & SOF_DELEGATED) ?
3150 " [delegated]" : "",
3151 ((before < after) ? ok : nok),
3152 ((before < after) ? nok : ok));
3153 }
3154 }
3155
3156 #if NECP
3157 static void
3158 inp_update_necp_want_app_policy(struct inpcb *inp, boolean_t set)
3159 {
3160 struct socket *so = inp->inp_socket;
3161 int before, after;
3162
3163 VERIFY(so != NULL);
3164 VERIFY(inp->inp_state != INPCB_STATE_DEAD);
3165
3166 before = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
3167 if (set) {
3168 inp_set_want_app_policy(inp);
3169 } else {
3170 inp_clear_want_app_policy(inp);
3171 }
3172 after = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
3173 if (net_io_policy_log && (before != after)) {
3174 static const char *wanted = "WANTED";
3175 static const char *unwanted = "UNWANTED";
3176 uuid_string_t euuid_buf;
3177 pid_t epid;
3178
3179 if (so->so_flags & SOF_DELEGATED) {
3180 uuid_unparse(so->e_uuid, euuid_buf);
3181 epid = so->e_pid;
3182 } else {
3183 uuid_unparse(so->last_uuid, euuid_buf);
3184 epid = so->last_pid;
3185 }
3186
3187 log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d "
3188 "euuid %s%s %s->%s\n", __func__,
3189 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
3190 SOCK_TYPE(so), epid, euuid_buf,
3191 (so->so_flags & SOF_DELEGATED) ?
3192 " [delegated]" : "",
3193 ((before < after) ? unwanted : wanted),
3194 ((before < after) ? wanted : unwanted));
3195 }
3196 }
3197 #endif /* NECP */
3198 #endif /* !CONFIG_PROC_UUID_POLICY */
3199
3200 #if NECP
3201 void
3202 inp_update_necp_policy(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int override_bound_interface)
3203 {
3204 necp_socket_find_policy_match(inp, override_local_addr, override_remote_addr, override_bound_interface);
3205 if (necp_socket_should_rescope(inp) &&
3206 inp->inp_lport == 0 &&
3207 inp->inp_laddr.s_addr == INADDR_ANY &&
3208 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
3209 // If we should rescope, and the socket is not yet bound
3210 inp_bindif(inp, necp_socket_get_rescope_if_index(inp), NULL);
3211 }
3212 }
3213 #endif /* NECP */
3214
3215 int
3216 inp_update_policy(struct inpcb *inp)
3217 {
3218 #if CONFIG_PROC_UUID_POLICY
3219 struct socket *so = inp->inp_socket;
3220 uint32_t pflags = 0;
3221 int32_t ogencnt;
3222 int err = 0;
3223
3224 if (!net_io_policy_uuid ||
3225 so == NULL || inp->inp_state == INPCB_STATE_DEAD)
3226 return (0);
3227
3228 /*
3229 * Kernel-created sockets that aren't delegating other sockets
3230 * are currently exempted from UUID policy checks.
3231 */
3232 if (so->last_pid == 0 && !(so->so_flags & SOF_DELEGATED))
3233 return (0);
3234
3235 ogencnt = so->so_policy_gencnt;
3236 err = proc_uuid_policy_lookup(((so->so_flags & SOF_DELEGATED) ?
3237 so->e_uuid : so->last_uuid), &pflags, &so->so_policy_gencnt);
3238
3239 /*
3240 * Discard cached generation count if the entry is gone (ENOENT),
3241 * so that we go thru the checks below.
3242 */
3243 if (err == ENOENT && ogencnt != 0)
3244 so->so_policy_gencnt = 0;
3245
3246 /*
3247 * If the generation count has changed, inspect the policy flags
3248 * and act accordingly. If a policy flag was previously set and
3249 * the UUID is no longer present in the table (ENOENT), treat it
3250 * as if the flag has been cleared.
3251 */
3252 if ((err == 0 || err == ENOENT) && ogencnt != so->so_policy_gencnt) {
3253 /* update cellular policy for this socket */
3254 if (err == 0 && (pflags & PROC_UUID_NO_CELLULAR)) {
3255 inp_update_cellular_policy(inp, TRUE);
3256 } else if (!(pflags & PROC_UUID_NO_CELLULAR)) {
3257 inp_update_cellular_policy(inp, FALSE);
3258 }
3259 #if NECP
3260 /* update necp want app policy for this socket */
3261 if (err == 0 && (pflags & PROC_UUID_NECP_APP_POLICY)) {
3262 inp_update_necp_want_app_policy(inp, TRUE);
3263 } else if (!(pflags & PROC_UUID_NECP_APP_POLICY)) {
3264 inp_update_necp_want_app_policy(inp, FALSE);
3265 }
3266 #endif /* NECP */
3267 }
3268
3269 return ((err == ENOENT) ? 0 : err);
3270 #else /* !CONFIG_PROC_UUID_POLICY */
3271 #pragma unused(inp)
3272 return (0);
3273 #endif /* !CONFIG_PROC_UUID_POLICY */
3274 }
3275
3276 static unsigned int log_restricted;
3277 SYSCTL_DECL(_net_inet);
3278 SYSCTL_INT(_net_inet, OID_AUTO, log_restricted,
3279 CTLFLAG_RW | CTLFLAG_LOCKED, &log_restricted, 0,
3280 "Log network restrictions");
3281 /*
3282 * Called when we need to enforce policy restrictions in the input path.
3283 *
3284 * Returns TRUE if we're not allowed to receive data, otherwise FALSE.
3285 */
3286 static boolean_t
3287 _inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp)
3288 {
3289 VERIFY(inp != NULL);
3290
3291 /*
3292 * Inbound restrictions.
3293 */
3294 if (!sorestrictrecv)
3295 return (FALSE);
3296
3297 if (ifp == NULL)
3298 return (FALSE);
3299
3300 if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp))
3301 return (TRUE);
3302
3303 if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp))
3304 return (TRUE);
3305
3306 if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp))
3307 return (TRUE);
3308
3309 if (!(ifp->if_eflags & IFEF_RESTRICTED_RECV))
3310 return (FALSE);
3311
3312 if (inp->inp_flags & INP_RECV_ANYIF)
3313 return (FALSE);
3314
3315 if ((inp->inp_flags & INP_BOUND_IF) && inp->inp_boundifp == ifp)
3316 return (FALSE);
3317
3318 if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp))
3319 return (TRUE);
3320
3321 return (TRUE);
3322 }
3323
3324 boolean_t
3325 inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp)
3326 {
3327 boolean_t ret;
3328
3329 ret = _inp_restricted_recv(inp, ifp);
3330 if (ret == TRUE && log_restricted) {
3331 printf("pid %d (%s) is unable to receive packets on %s\n",
3332 current_proc()->p_pid, proc_best_name(current_proc()),
3333 ifp->if_xname);
3334 }
3335 return (ret);
3336 }
3337
3338 /*
3339 * Called when we need to enforce policy restrictions in the output path.
3340 *
3341 * Returns TRUE if we're not allowed to send data out, otherwise FALSE.
3342 */
3343 static boolean_t
3344 _inp_restricted_send(struct inpcb *inp, struct ifnet *ifp)
3345 {
3346 VERIFY(inp != NULL);
3347
3348 /*
3349 * Outbound restrictions.
3350 */
3351 if (!sorestrictsend)
3352 return (FALSE);
3353
3354 if (ifp == NULL)
3355 return (FALSE);
3356
3357 if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp))
3358 return (TRUE);
3359
3360 if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp))
3361 return (TRUE);
3362
3363 if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp))
3364 return (TRUE);
3365
3366 if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp))
3367 return (TRUE);
3368
3369 return (FALSE);
3370 }
3371
3372 boolean_t
3373 inp_restricted_send(struct inpcb *inp, struct ifnet *ifp)
3374 {
3375 boolean_t ret;
3376
3377 ret = _inp_restricted_send(inp, ifp);
3378 if (ret == TRUE && log_restricted) {
3379 printf("pid %d (%s) is unable to transmit packets on %s\n",
3380 current_proc()->p_pid, proc_best_name(current_proc()),
3381 ifp->if_xname);
3382 }
3383 return (ret);
3384 }
3385
3386 inline void
3387 inp_count_sndbytes(struct inpcb *inp, u_int32_t th_ack)
3388 {
3389 struct ifnet *ifp = inp->inp_last_outifp;
3390 struct socket *so = inp->inp_socket;
3391 if (ifp != NULL && !(so->so_flags & SOF_MP_SUBFLOW) &&
3392 (ifp->if_type == IFT_CELLULAR ||
3393 ifp->if_subfamily == IFNET_SUBFAMILY_WIFI)) {
3394 int32_t unsent;
3395
3396 so->so_snd.sb_flags |= SB_SNDBYTE_CNT;
3397
3398 /*
3399 * There can be data outstanding before the connection
3400 * becomes established -- TFO case
3401 */
3402 if (so->so_snd.sb_cc > 0)
3403 inp_incr_sndbytes_total(so, so->so_snd.sb_cc);
3404
3405 unsent = inp_get_sndbytes_allunsent(so, th_ack);
3406 if (unsent > 0)
3407 inp_incr_sndbytes_unsent(so, unsent);
3408 }
3409 }
3410
3411 inline void
3412 inp_incr_sndbytes_total(struct socket *so, int32_t len)
3413 {
3414 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3415 struct ifnet *ifp = inp->inp_last_outifp;
3416
3417 if (ifp != NULL) {
3418 VERIFY(ifp->if_sndbyte_total >= 0);
3419 OSAddAtomic64(len, &ifp->if_sndbyte_total);
3420 }
3421 }
3422
3423 inline void
3424 inp_decr_sndbytes_total(struct socket *so, int32_t len)
3425 {
3426 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3427 struct ifnet *ifp = inp->inp_last_outifp;
3428
3429 if (ifp != NULL) {
3430 VERIFY(ifp->if_sndbyte_total >= len);
3431 OSAddAtomic64(-len, &ifp->if_sndbyte_total);
3432 }
3433 }
3434
3435 inline void
3436 inp_incr_sndbytes_unsent(struct socket *so, int32_t len)
3437 {
3438 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3439 struct ifnet *ifp = inp->inp_last_outifp;
3440
3441 if (ifp != NULL) {
3442 VERIFY(ifp->if_sndbyte_unsent >= 0);
3443 OSAddAtomic64(len, &ifp->if_sndbyte_unsent);
3444 }
3445 }
3446
3447 inline void
3448 inp_decr_sndbytes_unsent(struct socket *so, int32_t len)
3449 {
3450 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3451 struct ifnet *ifp = inp->inp_last_outifp;
3452
3453 if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT))
3454 return;
3455
3456 if (ifp != NULL) {
3457 if (ifp->if_sndbyte_unsent >= len)
3458 OSAddAtomic64(-len, &ifp->if_sndbyte_unsent);
3459 else
3460 ifp->if_sndbyte_unsent = 0;
3461 }
3462 }
3463
3464 inline void
3465 inp_decr_sndbytes_allunsent(struct socket *so, u_int32_t th_ack)
3466 {
3467 int32_t len;
3468
3469 if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT))
3470 return;
3471
3472 len = inp_get_sndbytes_allunsent(so, th_ack);
3473 inp_decr_sndbytes_unsent(so, len);
3474 }
3475
3476
3477 inline void
3478 inp_set_activity_bitmap(struct inpcb *inp)
3479 {
3480 in_stat_set_activity_bitmap(&inp->inp_nw_activity, net_uptime());
3481 }
3482
3483 inline void
3484 inp_get_activity_bitmap(struct inpcb *inp, activity_bitmap_t *ab)
3485 {
3486 bcopy(&inp->inp_nw_activity, ab, sizeof (*ab));
3487 }