]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/in_pcb.c
ef0731a8291254338bd83162ce91ff61f9022a38
[apple/xnu.git] / bsd / netinet / in_pcb.c
1 /*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1991, 1993, 1995
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95
61 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $
62 */
63
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/malloc.h>
67 #include <sys/mbuf.h>
68 #include <sys/domain.h>
69 #include <sys/protosw.h>
70 #include <sys/socket.h>
71 #include <sys/socketvar.h>
72 #include <sys/proc.h>
73 #include <sys/kernel.h>
74 #include <sys/sysctl.h>
75 #include <sys/mcache.h>
76 #include <sys/kauth.h>
77 #include <sys/priv.h>
78 #include <sys/proc_uuid_policy.h>
79 #include <sys/syslog.h>
80 #include <sys/priv.h>
81 #include <net/dlil.h>
82
83 #include <libkern/OSAtomic.h>
84 #include <kern/locks.h>
85
86 #include <machine/limits.h>
87
88 #include <kern/zalloc.h>
89
90 #include <net/if.h>
91 #include <net/if_types.h>
92 #include <net/route.h>
93 #include <net/flowhash.h>
94 #include <net/flowadv.h>
95 #include <net/nat464_utils.h>
96 #include <net/ntstat.h>
97 #include <net/restricted_in_port.h>
98
99 #include <netinet/in.h>
100 #include <netinet/in_pcb.h>
101 #include <netinet/in_var.h>
102 #include <netinet/ip_var.h>
103
104 #include <netinet/ip6.h>
105 #include <netinet6/ip6_var.h>
106
107 #include <sys/kdebug.h>
108 #include <sys/random.h>
109
110 #include <dev/random/randomdev.h>
111 #include <mach/boolean.h>
112
113 #include <pexpert/pexpert.h>
114
115 #if NECP
116 #include <net/necp.h>
117 #endif
118
119 #include <sys/stat.h>
120 #include <sys/ubc.h>
121 #include <sys/vnode.h>
122
123 #include <os/log.h>
124
125 extern const char *proc_name_address(struct proc *);
126
127 static lck_grp_t *inpcb_lock_grp;
128 static lck_attr_t *inpcb_lock_attr;
129 static lck_grp_attr_t *inpcb_lock_grp_attr;
130 decl_lck_mtx_data(static, inpcb_lock); /* global INPCB lock */
131 decl_lck_mtx_data(static, inpcb_timeout_lock);
132
133 static TAILQ_HEAD(, inpcbinfo) inpcb_head = TAILQ_HEAD_INITIALIZER(inpcb_head);
134
135 static u_int16_t inpcb_timeout_run = 0; /* INPCB timer is scheduled to run */
136 static boolean_t inpcb_garbage_collecting = FALSE; /* gc timer is scheduled */
137 static boolean_t inpcb_ticking = FALSE; /* "slow" timer is scheduled */
138 static boolean_t inpcb_fast_timer_on = FALSE;
139
140 #define INPCB_GCREQ_THRESHOLD 50000
141
142 static thread_call_t inpcb_thread_call, inpcb_fast_thread_call;
143 static void inpcb_sched_timeout(void);
144 static void inpcb_sched_lazy_timeout(void);
145 static void _inpcb_sched_timeout(unsigned int);
146 static void inpcb_timeout(void *, void *);
147 const int inpcb_timeout_lazy = 10; /* 10 seconds leeway for lazy timers */
148 extern int tvtohz(struct timeval *);
149
150 #if CONFIG_PROC_UUID_POLICY
151 static void inp_update_cellular_policy(struct inpcb *, boolean_t);
152 #if NECP
153 static void inp_update_necp_want_app_policy(struct inpcb *, boolean_t);
154 #endif /* NECP */
155 #endif /* !CONFIG_PROC_UUID_POLICY */
156
157 #define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8))
158 #define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1))
159
160 int allow_udp_port_exhaustion = 0;
161
162 /*
163 * These configure the range of local port addresses assigned to
164 * "unspecified" outgoing connections/packets/whatever.
165 */
166 int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */
167 int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */
168 int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
169 int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */
170 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
171 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */
172
173 #define RANGECHK(var, min, max) \
174 if ((var) < (min)) { (var) = (min); } \
175 else if ((var) > (max)) { (var) = (max); }
176
177 static int
178 sysctl_net_ipport_check SYSCTL_HANDLER_ARGS
179 {
180 #pragma unused(arg1, arg2)
181 int error;
182 int new_value = *(int *)oidp->oid_arg1;
183 #if (DEBUG | DEVELOPMENT)
184 int old_value = *(int *)oidp->oid_arg1;
185 /*
186 * For unit testing allow a non-superuser process with the
187 * proper entitlement to modify the variables
188 */
189 if (req->newptr) {
190 if (proc_suser(current_proc()) != 0 &&
191 (error = priv_check_cred(kauth_cred_get(),
192 PRIV_NETINET_RESERVEDPORT, 0))) {
193 return EPERM;
194 }
195 }
196 #endif /* (DEBUG | DEVELOPMENT) */
197
198 error = sysctl_handle_int(oidp, &new_value, 0, req);
199 if (!error) {
200 if (oidp->oid_arg1 == &ipport_lowfirstauto || oidp->oid_arg1 == &ipport_lowlastauto) {
201 RANGECHK(new_value, 1, IPPORT_RESERVED - 1);
202 } else {
203 RANGECHK(new_value, IPPORT_RESERVED, USHRT_MAX);
204 }
205 *(int *)oidp->oid_arg1 = new_value;
206 }
207
208 #if (DEBUG | DEVELOPMENT)
209 os_log(OS_LOG_DEFAULT,
210 "%s:%u sysctl net.restricted_port.verbose: %d -> %d)",
211 proc_best_name(current_proc()), proc_selfpid(),
212 old_value, *(int *)oidp->oid_arg1);
213 #endif /* (DEBUG | DEVELOPMENT) */
214
215 return error;
216 }
217
218 #undef RANGECHK
219
220 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange,
221 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "IP Ports");
222
223 #if (DEBUG | DEVELOPMENT)
224 #define CTLFAGS_IP_PORTRANGE (CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY)
225 #else
226 #define CTLFAGS_IP_PORTRANGE (CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED)
227 #endif /* (DEBUG | DEVELOPMENT) */
228
229 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst,
230 CTLFAGS_IP_PORTRANGE,
231 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
232 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast,
233 CTLFAGS_IP_PORTRANGE,
234 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
235 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first,
236 CTLFAGS_IP_PORTRANGE,
237 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
238 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last,
239 CTLFAGS_IP_PORTRANGE,
240 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
241 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst,
242 CTLFAGS_IP_PORTRANGE,
243 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
244 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast,
245 CTLFAGS_IP_PORTRANGE,
246 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
247 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, ipport_allow_udp_port_exhaustion,
248 CTLFLAG_LOCKED | CTLFLAG_RW, &allow_udp_port_exhaustion, 0, "");
249
250 static uint32_t apn_fallbk_debug = 0;
251 #define apn_fallbk_log(x) do { if (apn_fallbk_debug >= 1) log x; } while (0)
252
253 #if !XNU_TARGET_OS_OSX
254 static boolean_t apn_fallbk_enabled = TRUE;
255
256 SYSCTL_DECL(_net_inet);
257 SYSCTL_NODE(_net_inet, OID_AUTO, apn_fallback, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "APN Fallback");
258 SYSCTL_UINT(_net_inet_apn_fallback, OID_AUTO, enable, CTLFLAG_RW | CTLFLAG_LOCKED,
259 &apn_fallbk_enabled, 0, "APN fallback enable");
260 SYSCTL_UINT(_net_inet_apn_fallback, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
261 &apn_fallbk_debug, 0, "APN fallback debug enable");
262 #else /* XNU_TARGET_OS_OSX */
263 static boolean_t apn_fallbk_enabled = FALSE;
264 #endif /* XNU_TARGET_OS_OSX */
265
266 extern int udp_use_randomport;
267 extern int tcp_use_randomport;
268
269 /* Structs used for flowhash computation */
270 struct inp_flowhash_key_addr {
271 union {
272 struct in_addr v4;
273 struct in6_addr v6;
274 u_int8_t addr8[16];
275 u_int16_t addr16[8];
276 u_int32_t addr32[4];
277 } infha;
278 };
279
280 struct inp_flowhash_key {
281 struct inp_flowhash_key_addr infh_laddr;
282 struct inp_flowhash_key_addr infh_faddr;
283 u_int32_t infh_lport;
284 u_int32_t infh_fport;
285 u_int32_t infh_af;
286 u_int32_t infh_proto;
287 u_int32_t infh_rand1;
288 u_int32_t infh_rand2;
289 };
290
291 static u_int32_t inp_hash_seed = 0;
292
293 static int infc_cmp(const struct inpcb *, const struct inpcb *);
294
295 /* Flags used by inp_fc_getinp */
296 #define INPFC_SOLOCKED 0x1
297 #define INPFC_REMOVE 0x2
298 static struct inpcb *inp_fc_getinp(u_int32_t, u_int32_t);
299
300 static void inp_fc_feedback(struct inpcb *);
301 extern void tcp_remove_from_time_wait(struct inpcb *inp);
302
303 decl_lck_mtx_data(static, inp_fc_lck);
304
305 RB_HEAD(inp_fc_tree, inpcb) inp_fc_tree;
306 RB_PROTOTYPE(inp_fc_tree, inpcb, infc_link, infc_cmp);
307 RB_GENERATE(inp_fc_tree, inpcb, infc_link, infc_cmp);
308
309 /*
310 * Use this inp as a key to find an inp in the flowhash tree.
311 * Accesses to it are protected by inp_fc_lck.
312 */
313 struct inpcb key_inp;
314
315 /*
316 * in_pcb.c: manage the Protocol Control Blocks.
317 */
318
319 void
320 in_pcbinit(void)
321 {
322 static int inpcb_initialized = 0;
323
324 VERIFY(!inpcb_initialized);
325 inpcb_initialized = 1;
326
327 inpcb_lock_grp_attr = lck_grp_attr_alloc_init();
328 inpcb_lock_grp = lck_grp_alloc_init("inpcb", inpcb_lock_grp_attr);
329 inpcb_lock_attr = lck_attr_alloc_init();
330 lck_mtx_init(&inpcb_lock, inpcb_lock_grp, inpcb_lock_attr);
331 lck_mtx_init(&inpcb_timeout_lock, inpcb_lock_grp, inpcb_lock_attr);
332 inpcb_thread_call = thread_call_allocate_with_priority(inpcb_timeout,
333 NULL, THREAD_CALL_PRIORITY_KERNEL);
334 inpcb_fast_thread_call = thread_call_allocate_with_priority(
335 inpcb_timeout, NULL, THREAD_CALL_PRIORITY_KERNEL);
336 if (inpcb_thread_call == NULL || inpcb_fast_thread_call == NULL) {
337 panic("unable to alloc the inpcb thread call");
338 }
339
340 /*
341 * Initialize data structures required to deliver
342 * flow advisories.
343 */
344 lck_mtx_init(&inp_fc_lck, inpcb_lock_grp, inpcb_lock_attr);
345 lck_mtx_lock(&inp_fc_lck);
346 RB_INIT(&inp_fc_tree);
347 bzero(&key_inp, sizeof(key_inp));
348 lck_mtx_unlock(&inp_fc_lck);
349 }
350
351 #define INPCB_HAVE_TIMER_REQ(req) (((req).intimer_lazy > 0) || \
352 ((req).intimer_fast > 0) || ((req).intimer_nodelay > 0))
353 static void
354 inpcb_timeout(void *arg0, void *arg1)
355 {
356 #pragma unused(arg0, arg1)
357 struct inpcbinfo *ipi;
358 boolean_t t, gc;
359 struct intimercount gccnt, tmcnt;
360
361 /*
362 * Update coarse-grained networking timestamp (in sec.); the idea
363 * is to piggy-back on the timeout callout to update the counter
364 * returnable via net_uptime().
365 */
366 net_update_uptime();
367
368 bzero(&gccnt, sizeof(gccnt));
369 bzero(&tmcnt, sizeof(tmcnt));
370
371 lck_mtx_lock_spin(&inpcb_timeout_lock);
372 gc = inpcb_garbage_collecting;
373 inpcb_garbage_collecting = FALSE;
374
375 t = inpcb_ticking;
376 inpcb_ticking = FALSE;
377
378 if (gc || t) {
379 lck_mtx_unlock(&inpcb_timeout_lock);
380
381 lck_mtx_lock(&inpcb_lock);
382 TAILQ_FOREACH(ipi, &inpcb_head, ipi_entry) {
383 if (INPCB_HAVE_TIMER_REQ(ipi->ipi_gc_req)) {
384 bzero(&ipi->ipi_gc_req,
385 sizeof(ipi->ipi_gc_req));
386 if (gc && ipi->ipi_gc != NULL) {
387 ipi->ipi_gc(ipi);
388 gccnt.intimer_lazy +=
389 ipi->ipi_gc_req.intimer_lazy;
390 gccnt.intimer_fast +=
391 ipi->ipi_gc_req.intimer_fast;
392 gccnt.intimer_nodelay +=
393 ipi->ipi_gc_req.intimer_nodelay;
394 }
395 }
396 if (INPCB_HAVE_TIMER_REQ(ipi->ipi_timer_req)) {
397 bzero(&ipi->ipi_timer_req,
398 sizeof(ipi->ipi_timer_req));
399 if (t && ipi->ipi_timer != NULL) {
400 ipi->ipi_timer(ipi);
401 tmcnt.intimer_lazy +=
402 ipi->ipi_timer_req.intimer_lazy;
403 tmcnt.intimer_fast +=
404 ipi->ipi_timer_req.intimer_fast;
405 tmcnt.intimer_nodelay +=
406 ipi->ipi_timer_req.intimer_nodelay;
407 }
408 }
409 }
410 lck_mtx_unlock(&inpcb_lock);
411 lck_mtx_lock_spin(&inpcb_timeout_lock);
412 }
413
414 /* lock was dropped above, so check first before overriding */
415 if (!inpcb_garbage_collecting) {
416 inpcb_garbage_collecting = INPCB_HAVE_TIMER_REQ(gccnt);
417 }
418 if (!inpcb_ticking) {
419 inpcb_ticking = INPCB_HAVE_TIMER_REQ(tmcnt);
420 }
421
422 /* re-arm the timer if there's work to do */
423 inpcb_timeout_run--;
424 VERIFY(inpcb_timeout_run >= 0 && inpcb_timeout_run < 2);
425
426 if (gccnt.intimer_nodelay > 0 || tmcnt.intimer_nodelay > 0) {
427 inpcb_sched_timeout();
428 } else if ((gccnt.intimer_fast + tmcnt.intimer_fast) <= 5) {
429 /* be lazy when idle with little activity */
430 inpcb_sched_lazy_timeout();
431 } else {
432 inpcb_sched_timeout();
433 }
434
435 lck_mtx_unlock(&inpcb_timeout_lock);
436 }
437
438 static void
439 inpcb_sched_timeout(void)
440 {
441 _inpcb_sched_timeout(0);
442 }
443
444 static void
445 inpcb_sched_lazy_timeout(void)
446 {
447 _inpcb_sched_timeout(inpcb_timeout_lazy);
448 }
449
450 static void
451 _inpcb_sched_timeout(unsigned int offset)
452 {
453 uint64_t deadline, leeway;
454
455 clock_interval_to_deadline(1, NSEC_PER_SEC, &deadline);
456 LCK_MTX_ASSERT(&inpcb_timeout_lock, LCK_MTX_ASSERT_OWNED);
457 if (inpcb_timeout_run == 0 &&
458 (inpcb_garbage_collecting || inpcb_ticking)) {
459 lck_mtx_convert_spin(&inpcb_timeout_lock);
460 inpcb_timeout_run++;
461 if (offset == 0) {
462 inpcb_fast_timer_on = TRUE;
463 thread_call_enter_delayed(inpcb_thread_call,
464 deadline);
465 } else {
466 inpcb_fast_timer_on = FALSE;
467 clock_interval_to_absolutetime_interval(offset,
468 NSEC_PER_SEC, &leeway);
469 thread_call_enter_delayed_with_leeway(
470 inpcb_thread_call, NULL, deadline, leeway,
471 THREAD_CALL_DELAY_LEEWAY);
472 }
473 } else if (inpcb_timeout_run == 1 &&
474 offset == 0 && !inpcb_fast_timer_on) {
475 /*
476 * Since the request was for a fast timer but the
477 * scheduled timer is a lazy timer, try to schedule
478 * another instance of fast timer also.
479 */
480 lck_mtx_convert_spin(&inpcb_timeout_lock);
481 inpcb_timeout_run++;
482 inpcb_fast_timer_on = TRUE;
483 thread_call_enter_delayed(inpcb_fast_thread_call, deadline);
484 }
485 }
486
487 void
488 inpcb_gc_sched(struct inpcbinfo *ipi, u_int32_t type)
489 {
490 u_int32_t gccnt;
491
492 lck_mtx_lock_spin(&inpcb_timeout_lock);
493 inpcb_garbage_collecting = TRUE;
494 gccnt = ipi->ipi_gc_req.intimer_nodelay +
495 ipi->ipi_gc_req.intimer_fast;
496
497 if (gccnt > INPCB_GCREQ_THRESHOLD) {
498 type = INPCB_TIMER_FAST;
499 }
500
501 switch (type) {
502 case INPCB_TIMER_NODELAY:
503 atomic_add_32(&ipi->ipi_gc_req.intimer_nodelay, 1);
504 inpcb_sched_timeout();
505 break;
506 case INPCB_TIMER_FAST:
507 atomic_add_32(&ipi->ipi_gc_req.intimer_fast, 1);
508 inpcb_sched_timeout();
509 break;
510 default:
511 atomic_add_32(&ipi->ipi_gc_req.intimer_lazy, 1);
512 inpcb_sched_lazy_timeout();
513 break;
514 }
515 lck_mtx_unlock(&inpcb_timeout_lock);
516 }
517
518 void
519 inpcb_timer_sched(struct inpcbinfo *ipi, u_int32_t type)
520 {
521 lck_mtx_lock_spin(&inpcb_timeout_lock);
522 inpcb_ticking = TRUE;
523 switch (type) {
524 case INPCB_TIMER_NODELAY:
525 atomic_add_32(&ipi->ipi_timer_req.intimer_nodelay, 1);
526 inpcb_sched_timeout();
527 break;
528 case INPCB_TIMER_FAST:
529 atomic_add_32(&ipi->ipi_timer_req.intimer_fast, 1);
530 inpcb_sched_timeout();
531 break;
532 default:
533 atomic_add_32(&ipi->ipi_timer_req.intimer_lazy, 1);
534 inpcb_sched_lazy_timeout();
535 break;
536 }
537 lck_mtx_unlock(&inpcb_timeout_lock);
538 }
539
540 void
541 in_pcbinfo_attach(struct inpcbinfo *ipi)
542 {
543 struct inpcbinfo *ipi0;
544
545 lck_mtx_lock(&inpcb_lock);
546 TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) {
547 if (ipi0 == ipi) {
548 panic("%s: ipi %p already in the list\n",
549 __func__, ipi);
550 /* NOTREACHED */
551 }
552 }
553 TAILQ_INSERT_TAIL(&inpcb_head, ipi, ipi_entry);
554 lck_mtx_unlock(&inpcb_lock);
555 }
556
557 int
558 in_pcbinfo_detach(struct inpcbinfo *ipi)
559 {
560 struct inpcbinfo *ipi0;
561 int error = 0;
562
563 lck_mtx_lock(&inpcb_lock);
564 TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) {
565 if (ipi0 == ipi) {
566 break;
567 }
568 }
569 if (ipi0 != NULL) {
570 TAILQ_REMOVE(&inpcb_head, ipi0, ipi_entry);
571 } else {
572 error = ENXIO;
573 }
574 lck_mtx_unlock(&inpcb_lock);
575
576 return error;
577 }
578
579 /*
580 * Allocate a PCB and associate it with the socket.
581 *
582 * Returns: 0 Success
583 * ENOBUFS
584 * ENOMEM
585 */
586 int
587 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p)
588 {
589 #pragma unused(p)
590 struct inpcb *inp;
591 caddr_t temp;
592
593 if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) {
594 inp = (struct inpcb *)zalloc(pcbinfo->ipi_zone);
595 if (inp == NULL) {
596 return ENOBUFS;
597 }
598 bzero((caddr_t)inp, sizeof(*inp));
599 } else {
600 inp = (struct inpcb *)(void *)so->so_saved_pcb;
601 temp = inp->inp_saved_ppcb;
602 bzero((caddr_t)inp, sizeof(*inp));
603 inp->inp_saved_ppcb = temp;
604 }
605
606 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
607 inp->inp_pcbinfo = pcbinfo;
608 inp->inp_socket = so;
609 /* make sure inp_stat is always 64-bit aligned */
610 inp->inp_stat = (struct inp_stat *)P2ROUNDUP(inp->inp_stat_store,
611 sizeof(u_int64_t));
612 if (((uintptr_t)inp->inp_stat - (uintptr_t)inp->inp_stat_store) +
613 sizeof(*inp->inp_stat) > sizeof(inp->inp_stat_store)) {
614 panic("%s: insufficient space to align inp_stat", __func__);
615 /* NOTREACHED */
616 }
617
618 /* make sure inp_cstat is always 64-bit aligned */
619 inp->inp_cstat = (struct inp_stat *)P2ROUNDUP(inp->inp_cstat_store,
620 sizeof(u_int64_t));
621 if (((uintptr_t)inp->inp_cstat - (uintptr_t)inp->inp_cstat_store) +
622 sizeof(*inp->inp_cstat) > sizeof(inp->inp_cstat_store)) {
623 panic("%s: insufficient space to align inp_cstat", __func__);
624 /* NOTREACHED */
625 }
626
627 /* make sure inp_wstat is always 64-bit aligned */
628 inp->inp_wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_wstat_store,
629 sizeof(u_int64_t));
630 if (((uintptr_t)inp->inp_wstat - (uintptr_t)inp->inp_wstat_store) +
631 sizeof(*inp->inp_wstat) > sizeof(inp->inp_wstat_store)) {
632 panic("%s: insufficient space to align inp_wstat", __func__);
633 /* NOTREACHED */
634 }
635
636 /* make sure inp_Wstat is always 64-bit aligned */
637 inp->inp_Wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_Wstat_store,
638 sizeof(u_int64_t));
639 if (((uintptr_t)inp->inp_Wstat - (uintptr_t)inp->inp_Wstat_store) +
640 sizeof(*inp->inp_Wstat) > sizeof(inp->inp_Wstat_store)) {
641 panic("%s: insufficient space to align inp_Wstat", __func__);
642 /* NOTREACHED */
643 }
644
645 so->so_pcb = (caddr_t)inp;
646
647 if (so->so_proto->pr_flags & PR_PCBLOCK) {
648 lck_mtx_init(&inp->inpcb_mtx, pcbinfo->ipi_lock_grp,
649 pcbinfo->ipi_lock_attr);
650 }
651
652 if (SOCK_DOM(so) == PF_INET6 && !ip6_mapped_addr_on) {
653 inp->inp_flags |= IN6P_IPV6_V6ONLY;
654 }
655
656 if (ip6_auto_flowlabel) {
657 inp->inp_flags |= IN6P_AUTOFLOWLABEL;
658 }
659 if (intcoproc_unrestricted) {
660 inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED;
661 }
662
663 (void) inp_update_policy(inp);
664
665 lck_rw_lock_exclusive(pcbinfo->ipi_lock);
666 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
667 LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
668 pcbinfo->ipi_count++;
669 lck_rw_done(pcbinfo->ipi_lock);
670 return 0;
671 }
672
673 /*
674 * in_pcblookup_local_and_cleanup does everything
675 * in_pcblookup_local does but it checks for a socket
676 * that's going away. Since we know that the lock is
677 * held read+write when this function is called, we
678 * can safely dispose of this socket like the slow
679 * timer would usually do and return NULL. This is
680 * great for bind.
681 */
682 struct inpcb *
683 in_pcblookup_local_and_cleanup(struct inpcbinfo *pcbinfo, struct in_addr laddr,
684 u_int lport_arg, int wild_okay)
685 {
686 struct inpcb *inp;
687
688 /* Perform normal lookup */
689 inp = in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay);
690
691 /* Check if we found a match but it's waiting to be disposed */
692 if (inp != NULL && inp->inp_wantcnt == WNT_STOPUSING) {
693 struct socket *so = inp->inp_socket;
694
695 socket_lock(so, 0);
696
697 if (so->so_usecount == 0) {
698 if (inp->inp_state != INPCB_STATE_DEAD) {
699 in_pcbdetach(inp);
700 }
701 in_pcbdispose(inp); /* will unlock & destroy */
702 inp = NULL;
703 } else {
704 socket_unlock(so, 0);
705 }
706 }
707
708 return inp;
709 }
710
711 static void
712 in_pcb_conflict_post_msg(u_int16_t port)
713 {
714 /*
715 * Radar 5523020 send a kernel event notification if a
716 * non-participating socket tries to bind the port a socket
717 * who has set SOF_NOTIFYCONFLICT owns.
718 */
719 struct kev_msg ev_msg;
720 struct kev_in_portinuse in_portinuse;
721
722 bzero(&in_portinuse, sizeof(struct kev_in_portinuse));
723 bzero(&ev_msg, sizeof(struct kev_msg));
724 in_portinuse.port = ntohs(port); /* port in host order */
725 in_portinuse.req_pid = proc_selfpid();
726 ev_msg.vendor_code = KEV_VENDOR_APPLE;
727 ev_msg.kev_class = KEV_NETWORK_CLASS;
728 ev_msg.kev_subclass = KEV_INET_SUBCLASS;
729 ev_msg.event_code = KEV_INET_PORTINUSE;
730 ev_msg.dv[0].data_ptr = &in_portinuse;
731 ev_msg.dv[0].data_length = sizeof(struct kev_in_portinuse);
732 ev_msg.dv[1].data_length = 0;
733 dlil_post_complete_msg(NULL, &ev_msg);
734 }
735
736 /*
737 * Bind an INPCB to an address and/or port. This routine should not alter
738 * the caller-supplied local address "nam".
739 *
740 * Returns: 0 Success
741 * EADDRNOTAVAIL Address not available.
742 * EINVAL Invalid argument
743 * EAFNOSUPPORT Address family not supported [notdef]
744 * EACCES Permission denied
745 * EADDRINUSE Address in use
746 * EAGAIN Resource unavailable, try again
747 * priv_check_cred:EPERM Operation not permitted
748 */
749 int
750 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
751 {
752 struct socket *so = inp->inp_socket;
753 unsigned short *lastport;
754 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
755 u_short lport = 0, rand_port = 0;
756 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
757 int error, randomport, conflict = 0;
758 boolean_t anonport = FALSE;
759 kauth_cred_t cred;
760 struct in_addr laddr;
761 struct ifnet *outif = NULL;
762
763 if (TAILQ_EMPTY(&in_ifaddrhead)) { /* XXX broken! */
764 return EADDRNOTAVAIL;
765 }
766 if (!(so->so_options & (SO_REUSEADDR | SO_REUSEPORT))) {
767 wild = 1;
768 }
769
770 bzero(&laddr, sizeof(laddr));
771
772 socket_unlock(so, 0); /* keep reference on socket */
773 lck_rw_lock_exclusive(pcbinfo->ipi_lock);
774 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) {
775 /* another thread completed the bind */
776 lck_rw_done(pcbinfo->ipi_lock);
777 socket_lock(so, 0);
778 return EINVAL;
779 }
780
781 if (nam != NULL) {
782 if (nam->sa_len != sizeof(struct sockaddr_in)) {
783 lck_rw_done(pcbinfo->ipi_lock);
784 socket_lock(so, 0);
785 return EINVAL;
786 }
787 #if 0
788 /*
789 * We should check the family, but old programs
790 * incorrectly fail to initialize it.
791 */
792 if (nam->sa_family != AF_INET) {
793 lck_rw_done(pcbinfo->ipi_lock);
794 socket_lock(so, 0);
795 return EAFNOSUPPORT;
796 }
797 #endif /* 0 */
798 lport = SIN(nam)->sin_port;
799
800 if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr))) {
801 /*
802 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
803 * allow complete duplication of binding if
804 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
805 * and a multicast address is bound on both
806 * new and duplicated sockets.
807 */
808 if (so->so_options & SO_REUSEADDR) {
809 reuseport = SO_REUSEADDR | SO_REUSEPORT;
810 }
811 } else if (SIN(nam)->sin_addr.s_addr != INADDR_ANY) {
812 struct sockaddr_in sin;
813 struct ifaddr *ifa;
814
815 /* Sanitized for interface address searches */
816 bzero(&sin, sizeof(sin));
817 sin.sin_family = AF_INET;
818 sin.sin_len = sizeof(struct sockaddr_in);
819 sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
820
821 ifa = ifa_ifwithaddr(SA(&sin));
822 if (ifa == NULL) {
823 lck_rw_done(pcbinfo->ipi_lock);
824 socket_lock(so, 0);
825 return EADDRNOTAVAIL;
826 } else {
827 /*
828 * Opportunistically determine the outbound
829 * interface that may be used; this may not
830 * hold true if we end up using a route
831 * going over a different interface, e.g.
832 * when sending to a local address. This
833 * will get updated again after sending.
834 */
835 IFA_LOCK(ifa);
836 outif = ifa->ifa_ifp;
837 IFA_UNLOCK(ifa);
838 IFA_REMREF(ifa);
839 }
840 }
841
842
843 if (lport != 0) {
844 struct inpcb *t;
845 uid_t u;
846
847 #if XNU_TARGET_OS_OSX
848 if (ntohs(lport) < IPPORT_RESERVED &&
849 SIN(nam)->sin_addr.s_addr != 0 &&
850 !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
851 cred = kauth_cred_proc_ref(p);
852 error = priv_check_cred(cred,
853 PRIV_NETINET_RESERVEDPORT, 0);
854 kauth_cred_unref(&cred);
855 if (error != 0) {
856 lck_rw_done(pcbinfo->ipi_lock);
857 socket_lock(so, 0);
858 return EACCES;
859 }
860 }
861 #endif /* XNU_TARGET_OS_OSX */
862 /*
863 * Check wether the process is allowed to bind to a restricted port
864 */
865 if (!current_task_can_use_restricted_in_port(lport,
866 (uint8_t)so->so_proto->pr_protocol, PORT_FLAGS_BSD)) {
867 lck_rw_done(pcbinfo->ipi_lock);
868 socket_lock(so, 0);
869 return EADDRINUSE;
870 }
871
872 if (!IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) &&
873 (u = kauth_cred_getuid(so->so_cred)) != 0 &&
874 (t = in_pcblookup_local_and_cleanup(
875 inp->inp_pcbinfo, SIN(nam)->sin_addr, lport,
876 INPLOOKUP_WILDCARD)) != NULL &&
877 (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
878 t->inp_laddr.s_addr != INADDR_ANY ||
879 !(t->inp_socket->so_options & SO_REUSEPORT)) &&
880 (u != kauth_cred_getuid(t->inp_socket->so_cred)) &&
881 !(t->inp_socket->so_flags & SOF_REUSESHAREUID) &&
882 (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
883 t->inp_laddr.s_addr != INADDR_ANY) &&
884 (!(t->inp_flags2 & INP2_EXTERNAL_PORT) ||
885 !(inp->inp_flags2 & INP2_EXTERNAL_PORT) ||
886 uuid_compare(t->necp_client_uuid, inp->necp_client_uuid) != 0)) {
887 if ((t->inp_socket->so_flags &
888 SOF_NOTIFYCONFLICT) &&
889 !(so->so_flags & SOF_NOTIFYCONFLICT)) {
890 conflict = 1;
891 }
892
893 lck_rw_done(pcbinfo->ipi_lock);
894
895 if (conflict) {
896 in_pcb_conflict_post_msg(lport);
897 }
898
899 socket_lock(so, 0);
900 return EADDRINUSE;
901 }
902 t = in_pcblookup_local_and_cleanup(pcbinfo,
903 SIN(nam)->sin_addr, lport, wild);
904 if (t != NULL &&
905 (reuseport & t->inp_socket->so_options) == 0 &&
906 (!(t->inp_flags2 & INP2_EXTERNAL_PORT) ||
907 !(inp->inp_flags2 & INP2_EXTERNAL_PORT) ||
908 uuid_compare(t->necp_client_uuid, inp->necp_client_uuid) != 0)) {
909 if (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
910 t->inp_laddr.s_addr != INADDR_ANY ||
911 SOCK_DOM(so) != PF_INET6 ||
912 SOCK_DOM(t->inp_socket) != PF_INET6) {
913 if ((t->inp_socket->so_flags &
914 SOF_NOTIFYCONFLICT) &&
915 !(so->so_flags & SOF_NOTIFYCONFLICT)) {
916 conflict = 1;
917 }
918
919 lck_rw_done(pcbinfo->ipi_lock);
920
921 if (conflict) {
922 in_pcb_conflict_post_msg(lport);
923 }
924 socket_lock(so, 0);
925 return EADDRINUSE;
926 }
927 }
928 }
929 laddr = SIN(nam)->sin_addr;
930 }
931 if (lport == 0) {
932 u_short first, last;
933 int count;
934 bool found;
935
936 /*
937 * Override wild = 1 for implicit bind (mainly used by connect)
938 * For implicit bind (lport == 0), we always use an unused port,
939 * so REUSEADDR|REUSEPORT don't apply
940 */
941 wild = 1;
942
943 randomport = (so->so_flags & SOF_BINDRANDOMPORT) ||
944 (so->so_type == SOCK_STREAM ? tcp_use_randomport :
945 udp_use_randomport);
946
947 /*
948 * Even though this looks similar to the code in
949 * in6_pcbsetport, the v6 vs v4 checks are different.
950 */
951 anonport = TRUE;
952 if (inp->inp_flags & INP_HIGHPORT) {
953 first = (u_short)ipport_hifirstauto; /* sysctl */
954 last = (u_short)ipport_hilastauto;
955 lastport = &pcbinfo->ipi_lasthi;
956 } else if (inp->inp_flags & INP_LOWPORT) {
957 cred = kauth_cred_proc_ref(p);
958 error = priv_check_cred(cred,
959 PRIV_NETINET_RESERVEDPORT, 0);
960 kauth_cred_unref(&cred);
961 if (error != 0) {
962 lck_rw_done(pcbinfo->ipi_lock);
963 socket_lock(so, 0);
964 return error;
965 }
966 first = (u_short)ipport_lowfirstauto; /* 1023 */
967 last = (u_short)ipport_lowlastauto; /* 600 */
968 lastport = &pcbinfo->ipi_lastlow;
969 } else {
970 first = (u_short)ipport_firstauto; /* sysctl */
971 last = (u_short)ipport_lastauto;
972 lastport = &pcbinfo->ipi_lastport;
973 }
974 /* No point in randomizing if only one port is available */
975
976 if (first == last) {
977 randomport = 0;
978 }
979 /*
980 * Simple check to ensure all ports are not used up causing
981 * a deadlock here.
982 *
983 * We split the two cases (up and down) so that the direction
984 * is not being tested on each round of the loop.
985 */
986 if (first > last) {
987 struct in_addr lookup_addr;
988
989 /*
990 * counting down
991 */
992 if (randomport) {
993 read_frandom(&rand_port, sizeof(rand_port));
994 *lastport =
995 first - (rand_port % (first - last));
996 }
997 count = first - last;
998
999 lookup_addr = (laddr.s_addr != INADDR_ANY) ? laddr :
1000 inp->inp_laddr;
1001
1002 found = false;
1003 do {
1004 if (count-- < 0) { /* completely used? */
1005 lck_rw_done(pcbinfo->ipi_lock);
1006 socket_lock(so, 0);
1007 return EADDRNOTAVAIL;
1008 }
1009 --*lastport;
1010 if (*lastport > first || *lastport < last) {
1011 *lastport = first;
1012 }
1013 lport = htons(*lastport);
1014
1015 /*
1016 * Skip if this is a restricted port as we do not want to
1017 * restricted ports as ephemeral
1018 */
1019 if (IS_RESTRICTED_IN_PORT(lport)) {
1020 continue;
1021 }
1022
1023 found = in_pcblookup_local_and_cleanup(pcbinfo,
1024 lookup_addr, lport, wild) == NULL;
1025 } while (!found);
1026 } else {
1027 struct in_addr lookup_addr;
1028
1029 /*
1030 * counting up
1031 */
1032 if (randomport) {
1033 read_frandom(&rand_port, sizeof(rand_port));
1034 *lastport =
1035 first + (rand_port % (first - last));
1036 }
1037 count = last - first;
1038
1039 lookup_addr = (laddr.s_addr != INADDR_ANY) ? laddr :
1040 inp->inp_laddr;
1041
1042 found = false;
1043 do {
1044 if (count-- < 0) { /* completely used? */
1045 lck_rw_done(pcbinfo->ipi_lock);
1046 socket_lock(so, 0);
1047 return EADDRNOTAVAIL;
1048 }
1049 ++*lastport;
1050 if (*lastport < first || *lastport > last) {
1051 *lastport = first;
1052 }
1053 lport = htons(*lastport);
1054
1055 /*
1056 * Skip if this is a restricted port as we do not want to
1057 * restricted ports as ephemeral
1058 */
1059 if (IS_RESTRICTED_IN_PORT(lport)) {
1060 continue;
1061 }
1062
1063 found = in_pcblookup_local_and_cleanup(pcbinfo,
1064 lookup_addr, lport, wild) == NULL;
1065 } while (!found);
1066 }
1067 }
1068 socket_lock(so, 0);
1069
1070 /*
1071 * We unlocked socket's protocol lock for a long time.
1072 * The socket might have been dropped/defuncted.
1073 * Checking if world has changed since.
1074 */
1075 if (inp->inp_state == INPCB_STATE_DEAD) {
1076 lck_rw_done(pcbinfo->ipi_lock);
1077 return ECONNABORTED;
1078 }
1079
1080 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) {
1081 lck_rw_done(pcbinfo->ipi_lock);
1082 return EINVAL;
1083 }
1084
1085 if (laddr.s_addr != INADDR_ANY) {
1086 inp->inp_laddr = laddr;
1087 inp->inp_last_outifp = outif;
1088 }
1089 inp->inp_lport = lport;
1090 if (anonport) {
1091 inp->inp_flags |= INP_ANONPORT;
1092 }
1093
1094 if (in_pcbinshash(inp, 1) != 0) {
1095 inp->inp_laddr.s_addr = INADDR_ANY;
1096 inp->inp_last_outifp = NULL;
1097
1098 inp->inp_lport = 0;
1099 if (anonport) {
1100 inp->inp_flags &= ~INP_ANONPORT;
1101 }
1102 lck_rw_done(pcbinfo->ipi_lock);
1103 return EAGAIN;
1104 }
1105 lck_rw_done(pcbinfo->ipi_lock);
1106 sflt_notify(so, sock_evt_bound, NULL);
1107 return 0;
1108 }
1109
1110 #define APN_FALLBACK_IP_FILTER(a) \
1111 (IN_LINKLOCAL(ntohl((a)->sin_addr.s_addr)) || \
1112 IN_LOOPBACK(ntohl((a)->sin_addr.s_addr)) || \
1113 IN_ZERONET(ntohl((a)->sin_addr.s_addr)) || \
1114 IN_MULTICAST(ntohl((a)->sin_addr.s_addr)) || \
1115 IN_PRIVATE(ntohl((a)->sin_addr.s_addr)))
1116
1117 #define APN_FALLBACK_NOTIF_INTERVAL 2 /* Magic Number */
1118 static uint64_t last_apn_fallback = 0;
1119
1120 static boolean_t
1121 apn_fallback_required(proc_t proc, struct socket *so, struct sockaddr_in *p_dstv4)
1122 {
1123 uint64_t timenow;
1124 struct sockaddr_storage lookup_default_addr;
1125 struct rtentry *rt = NULL;
1126
1127 VERIFY(proc != NULL);
1128
1129 if (apn_fallbk_enabled == FALSE) {
1130 return FALSE;
1131 }
1132
1133 if (proc == kernproc) {
1134 return FALSE;
1135 }
1136
1137 if (so && (so->so_options & SO_NOAPNFALLBK)) {
1138 return FALSE;
1139 }
1140
1141 timenow = net_uptime();
1142 if ((timenow - last_apn_fallback) < APN_FALLBACK_NOTIF_INTERVAL) {
1143 apn_fallbk_log((LOG_INFO, "APN fallback notification throttled.\n"));
1144 return FALSE;
1145 }
1146
1147 if (p_dstv4 && APN_FALLBACK_IP_FILTER(p_dstv4)) {
1148 return FALSE;
1149 }
1150
1151 /* Check if we have unscoped IPv6 default route through cellular */
1152 bzero(&lookup_default_addr, sizeof(lookup_default_addr));
1153 lookup_default_addr.ss_family = AF_INET6;
1154 lookup_default_addr.ss_len = sizeof(struct sockaddr_in6);
1155
1156 rt = rtalloc1((struct sockaddr *)&lookup_default_addr, 0, 0);
1157 if (NULL == rt) {
1158 apn_fallbk_log((LOG_INFO, "APN fallback notification could not find "
1159 "unscoped default IPv6 route.\n"));
1160 return FALSE;
1161 }
1162
1163 if (!IFNET_IS_CELLULAR(rt->rt_ifp)) {
1164 rtfree(rt);
1165 apn_fallbk_log((LOG_INFO, "APN fallback notification could not find "
1166 "unscoped default IPv6 route through cellular interface.\n"));
1167 return FALSE;
1168 }
1169
1170 /*
1171 * We have a default IPv6 route, ensure that
1172 * we do not have IPv4 default route before triggering
1173 * the event
1174 */
1175 rtfree(rt);
1176 rt = NULL;
1177
1178 bzero(&lookup_default_addr, sizeof(lookup_default_addr));
1179 lookup_default_addr.ss_family = AF_INET;
1180 lookup_default_addr.ss_len = sizeof(struct sockaddr_in);
1181
1182 rt = rtalloc1((struct sockaddr *)&lookup_default_addr, 0, 0);
1183
1184 if (rt) {
1185 rtfree(rt);
1186 rt = NULL;
1187 apn_fallbk_log((LOG_INFO, "APN fallback notification found unscoped "
1188 "IPv4 default route!\n"));
1189 return FALSE;
1190 }
1191
1192 {
1193 /*
1194 * We disable APN fallback if the binary is not a third-party app.
1195 * Note that platform daemons use their process name as a
1196 * bundle ID so we filter out bundle IDs without dots.
1197 */
1198 const char *bundle_id = cs_identity_get(proc);
1199 if (bundle_id == NULL ||
1200 bundle_id[0] == '\0' ||
1201 strchr(bundle_id, '.') == NULL ||
1202 strncmp(bundle_id, "com.apple.", sizeof("com.apple.") - 1) == 0) {
1203 apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found first-"
1204 "party bundle ID \"%s\"!\n", (bundle_id ? bundle_id : "NULL")));
1205 return FALSE;
1206 }
1207 }
1208
1209 {
1210 /*
1211 * The Apple App Store IPv6 requirement started on
1212 * June 1st, 2016 at 12:00:00 AM PDT.
1213 * We disable APN fallback if the binary is more recent than that.
1214 * We check both atime and birthtime since birthtime is not always supported.
1215 */
1216 static const long ipv6_start_date = 1464764400L;
1217 vfs_context_t context;
1218 struct stat64 sb;
1219 int vn_stat_error;
1220
1221 bzero(&sb, sizeof(struct stat64));
1222 context = vfs_context_create(NULL);
1223 vn_stat_error = vn_stat(proc->p_textvp, &sb, NULL, 1, 0, context);
1224 (void)vfs_context_rele(context);
1225
1226 if (vn_stat_error != 0 ||
1227 sb.st_atimespec.tv_sec >= ipv6_start_date ||
1228 sb.st_birthtimespec.tv_sec >= ipv6_start_date) {
1229 apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found binary "
1230 "too recent! (err %d atime %ld mtime %ld ctime %ld birthtime %ld)\n",
1231 vn_stat_error, sb.st_atimespec.tv_sec, sb.st_mtimespec.tv_sec,
1232 sb.st_ctimespec.tv_sec, sb.st_birthtimespec.tv_sec));
1233 return FALSE;
1234 }
1235 }
1236 return TRUE;
1237 }
1238
1239 static void
1240 apn_fallback_trigger(proc_t proc, struct socket *so)
1241 {
1242 pid_t pid = 0;
1243 struct kev_msg ev_msg;
1244 struct kev_netevent_apnfallbk_data apnfallbk_data;
1245
1246 last_apn_fallback = net_uptime();
1247 pid = proc_pid(proc);
1248 uuid_t application_uuid;
1249 uuid_clear(application_uuid);
1250 proc_getexecutableuuid(proc, application_uuid,
1251 sizeof(application_uuid));
1252
1253 bzero(&ev_msg, sizeof(struct kev_msg));
1254 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1255 ev_msg.kev_class = KEV_NETWORK_CLASS;
1256 ev_msg.kev_subclass = KEV_NETEVENT_SUBCLASS;
1257 ev_msg.event_code = KEV_NETEVENT_APNFALLBACK;
1258
1259 bzero(&apnfallbk_data, sizeof(apnfallbk_data));
1260
1261 if (so->so_flags & SOF_DELEGATED) {
1262 apnfallbk_data.epid = so->e_pid;
1263 uuid_copy(apnfallbk_data.euuid, so->e_uuid);
1264 } else {
1265 apnfallbk_data.epid = so->last_pid;
1266 uuid_copy(apnfallbk_data.euuid, so->last_uuid);
1267 }
1268
1269 ev_msg.dv[0].data_ptr = &apnfallbk_data;
1270 ev_msg.dv[0].data_length = sizeof(apnfallbk_data);
1271 kev_post_msg(&ev_msg);
1272 apn_fallbk_log((LOG_INFO, "APN fallback notification issued.\n"));
1273 }
1274
1275 /*
1276 * Transform old in_pcbconnect() into an inner subroutine for new
1277 * in_pcbconnect(); do some validity-checking on the remote address
1278 * (in "nam") and then determine local host address (i.e., which
1279 * interface) to use to access that remote host.
1280 *
1281 * This routine may alter the caller-supplied remote address "nam".
1282 *
1283 * The caller may override the bound-to-interface setting of the socket
1284 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
1285 *
1286 * This routine might return an ifp with a reference held if the caller
1287 * provides a non-NULL outif, even in the error case. The caller is
1288 * responsible for releasing its reference.
1289 *
1290 * Returns: 0 Success
1291 * EINVAL Invalid argument
1292 * EAFNOSUPPORT Address family not supported
1293 * EADDRNOTAVAIL Address not available
1294 */
1295 int
1296 in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr,
1297 unsigned int ifscope, struct ifnet **outif, int raw)
1298 {
1299 struct route *ro = &inp->inp_route;
1300 struct in_ifaddr *ia = NULL;
1301 struct sockaddr_in sin;
1302 int error = 0;
1303 boolean_t restricted = FALSE;
1304
1305 if (outif != NULL) {
1306 *outif = NULL;
1307 }
1308 if (nam->sa_len != sizeof(struct sockaddr_in)) {
1309 return EINVAL;
1310 }
1311 if (SIN(nam)->sin_family != AF_INET) {
1312 return EAFNOSUPPORT;
1313 }
1314 if (raw == 0 && SIN(nam)->sin_port == 0) {
1315 return EADDRNOTAVAIL;
1316 }
1317
1318 /*
1319 * If the destination address is INADDR_ANY,
1320 * use the primary local address.
1321 * If the supplied address is INADDR_BROADCAST,
1322 * and the primary interface supports broadcast,
1323 * choose the broadcast address for that interface.
1324 */
1325 if (raw == 0 && (SIN(nam)->sin_addr.s_addr == INADDR_ANY ||
1326 SIN(nam)->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST)) {
1327 lck_rw_lock_shared(in_ifaddr_rwlock);
1328 if (!TAILQ_EMPTY(&in_ifaddrhead)) {
1329 ia = TAILQ_FIRST(&in_ifaddrhead);
1330 IFA_LOCK_SPIN(&ia->ia_ifa);
1331 if (SIN(nam)->sin_addr.s_addr == INADDR_ANY) {
1332 SIN(nam)->sin_addr = IA_SIN(ia)->sin_addr;
1333 } else if (ia->ia_ifp->if_flags & IFF_BROADCAST) {
1334 SIN(nam)->sin_addr =
1335 SIN(&ia->ia_broadaddr)->sin_addr;
1336 }
1337 IFA_UNLOCK(&ia->ia_ifa);
1338 ia = NULL;
1339 }
1340 lck_rw_done(in_ifaddr_rwlock);
1341 }
1342 /*
1343 * Otherwise, if the socket has already bound the source, just use it.
1344 */
1345 if (inp->inp_laddr.s_addr != INADDR_ANY) {
1346 VERIFY(ia == NULL);
1347 *laddr = inp->inp_laddr;
1348 return 0;
1349 }
1350
1351 /*
1352 * If the ifscope is specified by the caller (e.g. IP_PKTINFO)
1353 * then it overrides the sticky ifscope set for the socket.
1354 */
1355 if (ifscope == IFSCOPE_NONE && (inp->inp_flags & INP_BOUND_IF)) {
1356 ifscope = inp->inp_boundifp->if_index;
1357 }
1358
1359 /*
1360 * If route is known or can be allocated now,
1361 * our src addr is taken from the i/f, else punt.
1362 * Note that we should check the address family of the cached
1363 * destination, in case of sharing the cache with IPv6.
1364 */
1365 if (ro->ro_rt != NULL) {
1366 RT_LOCK_SPIN(ro->ro_rt);
1367 }
1368 if (ROUTE_UNUSABLE(ro) || ro->ro_dst.sa_family != AF_INET ||
1369 SIN(&ro->ro_dst)->sin_addr.s_addr != SIN(nam)->sin_addr.s_addr ||
1370 (inp->inp_socket->so_options & SO_DONTROUTE)) {
1371 if (ro->ro_rt != NULL) {
1372 RT_UNLOCK(ro->ro_rt);
1373 }
1374 ROUTE_RELEASE(ro);
1375 }
1376 if (!(inp->inp_socket->so_options & SO_DONTROUTE) &&
1377 (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL)) {
1378 if (ro->ro_rt != NULL) {
1379 RT_UNLOCK(ro->ro_rt);
1380 }
1381 ROUTE_RELEASE(ro);
1382 /* No route yet, so try to acquire one */
1383 bzero(&ro->ro_dst, sizeof(struct sockaddr_in));
1384 ro->ro_dst.sa_family = AF_INET;
1385 ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
1386 SIN(&ro->ro_dst)->sin_addr = SIN(nam)->sin_addr;
1387 rtalloc_scoped(ro, ifscope);
1388 if (ro->ro_rt != NULL) {
1389 RT_LOCK_SPIN(ro->ro_rt);
1390 }
1391 }
1392 /* Sanitized local copy for interface address searches */
1393 bzero(&sin, sizeof(sin));
1394 sin.sin_family = AF_INET;
1395 sin.sin_len = sizeof(struct sockaddr_in);
1396 sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
1397 /*
1398 * If we did not find (or use) a route, assume dest is reachable
1399 * on a directly connected network and try to find a corresponding
1400 * interface to take the source address from.
1401 */
1402 if (ro->ro_rt == NULL) {
1403 proc_t proc = current_proc();
1404
1405 VERIFY(ia == NULL);
1406 ia = ifatoia(ifa_ifwithdstaddr(SA(&sin)));
1407 if (ia == NULL) {
1408 ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope));
1409 }
1410 error = ((ia == NULL) ? ENETUNREACH : 0);
1411
1412 if (apn_fallback_required(proc, inp->inp_socket,
1413 (void *)nam)) {
1414 apn_fallback_trigger(proc, inp->inp_socket);
1415 }
1416
1417 goto done;
1418 }
1419 RT_LOCK_ASSERT_HELD(ro->ro_rt);
1420 /*
1421 * If the outgoing interface on the route found is not
1422 * a loopback interface, use the address from that interface.
1423 */
1424 if (!(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) {
1425 VERIFY(ia == NULL);
1426 /*
1427 * If the route points to a cellular interface and the
1428 * caller forbids our using interfaces of such type,
1429 * pretend that there is no route.
1430 * Apply the same logic for expensive interfaces.
1431 */
1432 if (inp_restricted_send(inp, ro->ro_rt->rt_ifp)) {
1433 RT_UNLOCK(ro->ro_rt);
1434 ROUTE_RELEASE(ro);
1435 error = EHOSTUNREACH;
1436 restricted = TRUE;
1437 } else {
1438 /* Become a regular mutex */
1439 RT_CONVERT_LOCK(ro->ro_rt);
1440 ia = ifatoia(ro->ro_rt->rt_ifa);
1441 IFA_ADDREF(&ia->ia_ifa);
1442
1443 /*
1444 * Mark the control block for notification of
1445 * a possible flow that might undergo clat46
1446 * translation.
1447 *
1448 * We defer the decision to a later point when
1449 * inpcb is being disposed off.
1450 * The reason is that we only want to send notification
1451 * if the flow was ever used to send data.
1452 */
1453 if (IS_INTF_CLAT46(ro->ro_rt->rt_ifp)) {
1454 inp->inp_flags2 |= INP2_CLAT46_FLOW;
1455 }
1456
1457 RT_UNLOCK(ro->ro_rt);
1458 error = 0;
1459 }
1460 goto done;
1461 }
1462 VERIFY(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK);
1463 RT_UNLOCK(ro->ro_rt);
1464 /*
1465 * The outgoing interface is marked with 'loopback net', so a route
1466 * to ourselves is here.
1467 * Try to find the interface of the destination address and then
1468 * take the address from there. That interface is not necessarily
1469 * a loopback interface.
1470 */
1471 VERIFY(ia == NULL);
1472 ia = ifatoia(ifa_ifwithdstaddr(SA(&sin)));
1473 if (ia == NULL) {
1474 ia = ifatoia(ifa_ifwithaddr_scoped(SA(&sin), ifscope));
1475 }
1476 if (ia == NULL) {
1477 ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope));
1478 }
1479 if (ia == NULL) {
1480 RT_LOCK(ro->ro_rt);
1481 ia = ifatoia(ro->ro_rt->rt_ifa);
1482 if (ia != NULL) {
1483 IFA_ADDREF(&ia->ia_ifa);
1484 }
1485 RT_UNLOCK(ro->ro_rt);
1486 }
1487 error = ((ia == NULL) ? ENETUNREACH : 0);
1488
1489 done:
1490 /*
1491 * If the destination address is multicast and an outgoing
1492 * interface has been set as a multicast option, use the
1493 * address of that interface as our source address.
1494 */
1495 if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) &&
1496 inp->inp_moptions != NULL) {
1497 struct ip_moptions *imo;
1498 struct ifnet *ifp;
1499
1500 imo = inp->inp_moptions;
1501 IMO_LOCK(imo);
1502 if (imo->imo_multicast_ifp != NULL && (ia == NULL ||
1503 ia->ia_ifp != imo->imo_multicast_ifp)) {
1504 ifp = imo->imo_multicast_ifp;
1505 if (ia != NULL) {
1506 IFA_REMREF(&ia->ia_ifa);
1507 }
1508 lck_rw_lock_shared(in_ifaddr_rwlock);
1509 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
1510 if (ia->ia_ifp == ifp) {
1511 break;
1512 }
1513 }
1514 if (ia != NULL) {
1515 IFA_ADDREF(&ia->ia_ifa);
1516 }
1517 lck_rw_done(in_ifaddr_rwlock);
1518 if (ia == NULL) {
1519 error = EADDRNOTAVAIL;
1520 } else {
1521 error = 0;
1522 }
1523 }
1524 IMO_UNLOCK(imo);
1525 }
1526 /*
1527 * Don't do pcblookup call here; return interface in laddr
1528 * and exit to caller, that will do the lookup.
1529 */
1530 if (ia != NULL) {
1531 /*
1532 * If the source address belongs to a cellular interface
1533 * and the socket forbids our using interfaces of such
1534 * type, pretend that there is no source address.
1535 * Apply the same logic for expensive interfaces.
1536 */
1537 IFA_LOCK_SPIN(&ia->ia_ifa);
1538 if (inp_restricted_send(inp, ia->ia_ifa.ifa_ifp)) {
1539 IFA_UNLOCK(&ia->ia_ifa);
1540 error = EHOSTUNREACH;
1541 restricted = TRUE;
1542 } else if (error == 0) {
1543 *laddr = ia->ia_addr.sin_addr;
1544 if (outif != NULL) {
1545 struct ifnet *ifp;
1546
1547 if (ro->ro_rt != NULL) {
1548 ifp = ro->ro_rt->rt_ifp;
1549 } else {
1550 ifp = ia->ia_ifp;
1551 }
1552
1553 VERIFY(ifp != NULL);
1554 IFA_CONVERT_LOCK(&ia->ia_ifa);
1555 ifnet_reference(ifp); /* for caller */
1556 if (*outif != NULL) {
1557 ifnet_release(*outif);
1558 }
1559 *outif = ifp;
1560 }
1561 IFA_UNLOCK(&ia->ia_ifa);
1562 } else {
1563 IFA_UNLOCK(&ia->ia_ifa);
1564 }
1565 IFA_REMREF(&ia->ia_ifa);
1566 ia = NULL;
1567 }
1568
1569 if (restricted && error == EHOSTUNREACH) {
1570 soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED |
1571 SO_FILT_HINT_IFDENIED));
1572 }
1573
1574 return error;
1575 }
1576
1577 /*
1578 * Outer subroutine:
1579 * Connect from a socket to a specified address.
1580 * Both address and port must be specified in argument sin.
1581 * If don't have a local address for this socket yet,
1582 * then pick one.
1583 *
1584 * The caller may override the bound-to-interface setting of the socket
1585 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
1586 */
1587 int
1588 in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p,
1589 unsigned int ifscope, struct ifnet **outif)
1590 {
1591 struct in_addr laddr;
1592 struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam;
1593 struct inpcb *pcb;
1594 int error;
1595 struct socket *so = inp->inp_socket;
1596
1597 #if CONTENT_FILTER
1598 if (so) {
1599 so->so_state_change_cnt++;
1600 }
1601 #endif
1602
1603 /*
1604 * Call inner routine, to assign local interface address.
1605 */
1606 if ((error = in_pcbladdr(inp, nam, &laddr, ifscope, outif, 0)) != 0) {
1607 return error;
1608 }
1609
1610 socket_unlock(so, 0);
1611 pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
1612 inp->inp_laddr.s_addr ? inp->inp_laddr : laddr,
1613 inp->inp_lport, 0, NULL);
1614 socket_lock(so, 0);
1615
1616 /*
1617 * Check if the socket is still in a valid state. When we unlock this
1618 * embryonic socket, it can get aborted if another thread is closing
1619 * the listener (radar 7947600).
1620 */
1621 if ((so->so_flags & SOF_ABORTED) != 0) {
1622 return ECONNREFUSED;
1623 }
1624
1625 if (pcb != NULL) {
1626 in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0);
1627 return EADDRINUSE;
1628 }
1629 if (inp->inp_laddr.s_addr == INADDR_ANY) {
1630 if (inp->inp_lport == 0) {
1631 error = in_pcbbind(inp, NULL, p);
1632 if (error) {
1633 return error;
1634 }
1635 }
1636 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
1637 /*
1638 * Lock inversion issue, mostly with udp
1639 * multicast packets.
1640 */
1641 socket_unlock(so, 0);
1642 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
1643 socket_lock(so, 0);
1644 }
1645 inp->inp_laddr = laddr;
1646 /* no reference needed */
1647 inp->inp_last_outifp = (outif != NULL) ? *outif : NULL;
1648 inp->inp_flags |= INP_INADDR_ANY;
1649 } else {
1650 /*
1651 * Usage of IP_PKTINFO, without local port already
1652 * speficified will cause kernel to panic,
1653 * see rdar://problem/18508185.
1654 * For now returning error to avoid a kernel panic
1655 * This routines can be refactored and handle this better
1656 * in future.
1657 */
1658 if (inp->inp_lport == 0) {
1659 return EINVAL;
1660 }
1661 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
1662 /*
1663 * Lock inversion issue, mostly with udp
1664 * multicast packets.
1665 */
1666 socket_unlock(so, 0);
1667 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
1668 socket_lock(so, 0);
1669 }
1670 }
1671 inp->inp_faddr = sin->sin_addr;
1672 inp->inp_fport = sin->sin_port;
1673 if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) {
1674 nstat_pcb_invalidate_cache(inp);
1675 }
1676 in_pcbrehash(inp);
1677 lck_rw_done(inp->inp_pcbinfo->ipi_lock);
1678 return 0;
1679 }
1680
1681 void
1682 in_pcbdisconnect(struct inpcb *inp)
1683 {
1684 struct socket *so = inp->inp_socket;
1685
1686 if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) {
1687 nstat_pcb_cache(inp);
1688 }
1689
1690 inp->inp_faddr.s_addr = INADDR_ANY;
1691 inp->inp_fport = 0;
1692
1693 #if CONTENT_FILTER
1694 if (so) {
1695 so->so_state_change_cnt++;
1696 }
1697 #endif
1698
1699 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
1700 /* lock inversion issue, mostly with udp multicast packets */
1701 socket_unlock(so, 0);
1702 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
1703 socket_lock(so, 0);
1704 }
1705
1706 in_pcbrehash(inp);
1707 lck_rw_done(inp->inp_pcbinfo->ipi_lock);
1708 /*
1709 * A multipath subflow socket would have its SS_NOFDREF set by default,
1710 * so check for SOF_MP_SUBFLOW socket flag before detaching the PCB;
1711 * when the socket is closed for real, SOF_MP_SUBFLOW would be cleared.
1712 */
1713 if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF)) {
1714 in_pcbdetach(inp);
1715 }
1716 }
1717
1718 void
1719 in_pcbdetach(struct inpcb *inp)
1720 {
1721 struct socket *so = inp->inp_socket;
1722
1723 if (so->so_pcb == NULL) {
1724 /* PCB has been disposed */
1725 panic("%s: inp=%p so=%p proto=%d so_pcb is null!\n", __func__,
1726 inp, so, SOCK_PROTO(so));
1727 /* NOTREACHED */
1728 }
1729
1730 #if IPSEC
1731 if (inp->inp_sp != NULL) {
1732 (void) ipsec4_delete_pcbpolicy(inp);
1733 }
1734 #endif /* IPSEC */
1735
1736 if (inp->inp_stat != NULL && SOCK_PROTO(so) == IPPROTO_UDP) {
1737 if (inp->inp_stat->rxpackets == 0 && inp->inp_stat->txpackets == 0) {
1738 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_inet_dgram_no_data);
1739 }
1740 }
1741
1742 /*
1743 * Let NetworkStatistics know this PCB is going away
1744 * before we detach it.
1745 */
1746 if (nstat_collect &&
1747 (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP)) {
1748 nstat_pcb_detach(inp);
1749 }
1750
1751 /* Free memory buffer held for generating keep alives */
1752 if (inp->inp_keepalive_data != NULL) {
1753 FREE(inp->inp_keepalive_data, M_TEMP);
1754 inp->inp_keepalive_data = NULL;
1755 }
1756
1757 /* mark socket state as dead */
1758 if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) {
1759 panic("%s: so=%p proto=%d couldn't set to STOPUSING\n",
1760 __func__, so, SOCK_PROTO(so));
1761 /* NOTREACHED */
1762 }
1763
1764 if (!(so->so_flags & SOF_PCBCLEARING)) {
1765 struct ip_moptions *imo;
1766
1767 inp->inp_vflag = 0;
1768 if (inp->inp_options != NULL) {
1769 (void) m_free(inp->inp_options);
1770 inp->inp_options = NULL;
1771 }
1772 ROUTE_RELEASE(&inp->inp_route);
1773 imo = inp->inp_moptions;
1774 inp->inp_moptions = NULL;
1775 sofreelastref(so, 0);
1776 inp->inp_state = INPCB_STATE_DEAD;
1777
1778 /*
1779 * Enqueue an event to send kernel event notification
1780 * if the flow has to CLAT46 for data packets
1781 */
1782 if (inp->inp_flags2 & INP2_CLAT46_FLOW) {
1783 /*
1784 * If there has been any exchange of data bytes
1785 * over this flow.
1786 * Schedule a notification to report that flow is
1787 * using client side translation.
1788 */
1789 if (inp->inp_stat != NULL &&
1790 (inp->inp_stat->txbytes != 0 ||
1791 inp->inp_stat->rxbytes != 0)) {
1792 if (so->so_flags & SOF_DELEGATED) {
1793 in6_clat46_event_enqueue_nwk_wq_entry(
1794 IN6_CLAT46_EVENT_V4_FLOW,
1795 so->e_pid,
1796 so->e_uuid);
1797 } else {
1798 in6_clat46_event_enqueue_nwk_wq_entry(
1799 IN6_CLAT46_EVENT_V4_FLOW,
1800 so->last_pid,
1801 so->last_uuid);
1802 }
1803 }
1804 }
1805
1806 /* makes sure we're not called twice from so_close */
1807 so->so_flags |= SOF_PCBCLEARING;
1808
1809 inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST);
1810
1811 /*
1812 * See inp_join_group() for why we need to unlock
1813 */
1814 if (imo != NULL) {
1815 socket_unlock(so, 0);
1816 IMO_REMREF(imo);
1817 socket_lock(so, 0);
1818 }
1819 }
1820 }
1821
1822
1823 void
1824 in_pcbdispose(struct inpcb *inp)
1825 {
1826 struct socket *so = inp->inp_socket;
1827 struct inpcbinfo *ipi = inp->inp_pcbinfo;
1828
1829 if (so != NULL && so->so_usecount != 0) {
1830 panic("%s: so %p [%d,%d] usecount %d lockhistory %s\n",
1831 __func__, so, SOCK_DOM(so), SOCK_TYPE(so), so->so_usecount,
1832 solockhistory_nr(so));
1833 /* NOTREACHED */
1834 } else if (inp->inp_wantcnt != WNT_STOPUSING) {
1835 if (so != NULL) {
1836 panic_plain("%s: inp %p invalid wantcnt %d, so %p "
1837 "[%d,%d] usecount %d retaincnt %d state 0x%x "
1838 "flags 0x%x lockhistory %s\n", __func__, inp,
1839 inp->inp_wantcnt, so, SOCK_DOM(so), SOCK_TYPE(so),
1840 so->so_usecount, so->so_retaincnt, so->so_state,
1841 so->so_flags, solockhistory_nr(so));
1842 /* NOTREACHED */
1843 } else {
1844 panic("%s: inp %p invalid wantcnt %d no socket\n",
1845 __func__, inp, inp->inp_wantcnt);
1846 /* NOTREACHED */
1847 }
1848 }
1849
1850 LCK_RW_ASSERT(ipi->ipi_lock, LCK_RW_ASSERT_EXCLUSIVE);
1851
1852 inp->inp_gencnt = ++ipi->ipi_gencnt;
1853 /* access ipi in in_pcbremlists */
1854 in_pcbremlists(inp);
1855
1856 if (so != NULL) {
1857 if (so->so_proto->pr_flags & PR_PCBLOCK) {
1858 sofreelastref(so, 0);
1859 if (so->so_rcv.sb_cc > 0 || so->so_snd.sb_cc > 0) {
1860 /*
1861 * selthreadclear() already called
1862 * during sofreelastref() above.
1863 */
1864 sbrelease(&so->so_rcv);
1865 sbrelease(&so->so_snd);
1866 }
1867 if (so->so_head != NULL) {
1868 panic("%s: so=%p head still exist\n",
1869 __func__, so);
1870 /* NOTREACHED */
1871 }
1872 lck_mtx_unlock(&inp->inpcb_mtx);
1873
1874 #if NECP
1875 necp_inpcb_remove_cb(inp);
1876 #endif /* NECP */
1877
1878 lck_mtx_destroy(&inp->inpcb_mtx, ipi->ipi_lock_grp);
1879 }
1880 /* makes sure we're not called twice from so_close */
1881 so->so_flags |= SOF_PCBCLEARING;
1882 so->so_saved_pcb = (caddr_t)inp;
1883 so->so_pcb = NULL;
1884 inp->inp_socket = NULL;
1885 #if NECP
1886 necp_inpcb_dispose(inp);
1887 #endif /* NECP */
1888 /*
1889 * In case there a route cached after a detach (possible
1890 * in the tcp case), make sure that it is freed before
1891 * we deallocate the structure.
1892 */
1893 ROUTE_RELEASE(&inp->inp_route);
1894 if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) {
1895 zfree(ipi->ipi_zone, inp);
1896 }
1897 sodealloc(so);
1898 }
1899 }
1900
1901 /*
1902 * The calling convention of in_getsockaddr() and in_getpeeraddr() was
1903 * modified to match the pru_sockaddr() and pru_peeraddr() entry points
1904 * in struct pr_usrreqs, so that protocols can just reference then directly
1905 * without the need for a wrapper function.
1906 */
1907 int
1908 in_getsockaddr(struct socket *so, struct sockaddr **nam)
1909 {
1910 struct inpcb *inp;
1911 struct sockaddr_in *sin;
1912
1913 /*
1914 * Do the malloc first in case it blocks.
1915 */
1916 MALLOC(sin, struct sockaddr_in *, sizeof(*sin), M_SONAME, M_WAITOK);
1917 if (sin == NULL) {
1918 return ENOBUFS;
1919 }
1920 bzero(sin, sizeof(*sin));
1921 sin->sin_family = AF_INET;
1922 sin->sin_len = sizeof(*sin);
1923
1924 if ((inp = sotoinpcb(so)) == NULL) {
1925 FREE(sin, M_SONAME);
1926 return EINVAL;
1927 }
1928 sin->sin_port = inp->inp_lport;
1929 sin->sin_addr = inp->inp_laddr;
1930
1931 *nam = (struct sockaddr *)sin;
1932 return 0;
1933 }
1934
1935 int
1936 in_getsockaddr_s(struct socket *so, struct sockaddr_in *ss)
1937 {
1938 struct sockaddr_in *sin = ss;
1939 struct inpcb *inp;
1940
1941 VERIFY(ss != NULL);
1942 bzero(ss, sizeof(*ss));
1943
1944 sin->sin_family = AF_INET;
1945 sin->sin_len = sizeof(*sin);
1946
1947 if ((inp = sotoinpcb(so)) == NULL) {
1948 return EINVAL;
1949 }
1950
1951 sin->sin_port = inp->inp_lport;
1952 sin->sin_addr = inp->inp_laddr;
1953 return 0;
1954 }
1955
1956 int
1957 in_getpeeraddr(struct socket *so, struct sockaddr **nam)
1958 {
1959 struct inpcb *inp;
1960 struct sockaddr_in *sin;
1961
1962 /*
1963 * Do the malloc first in case it blocks.
1964 */
1965 MALLOC(sin, struct sockaddr_in *, sizeof(*sin), M_SONAME, M_WAITOK);
1966 if (sin == NULL) {
1967 return ENOBUFS;
1968 }
1969 bzero((caddr_t)sin, sizeof(*sin));
1970 sin->sin_family = AF_INET;
1971 sin->sin_len = sizeof(*sin);
1972
1973 if ((inp = sotoinpcb(so)) == NULL) {
1974 FREE(sin, M_SONAME);
1975 return EINVAL;
1976 }
1977 sin->sin_port = inp->inp_fport;
1978 sin->sin_addr = inp->inp_faddr;
1979
1980 *nam = (struct sockaddr *)sin;
1981 return 0;
1982 }
1983
1984 void
1985 in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr,
1986 int errno, void (*notify)(struct inpcb *, int))
1987 {
1988 struct inpcb *inp;
1989
1990 lck_rw_lock_shared(pcbinfo->ipi_lock);
1991
1992 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1993 if (!(inp->inp_vflag & INP_IPV4)) {
1994 continue;
1995 }
1996 if (inp->inp_faddr.s_addr != faddr.s_addr ||
1997 inp->inp_socket == NULL) {
1998 continue;
1999 }
2000 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) {
2001 continue;
2002 }
2003 socket_lock(inp->inp_socket, 1);
2004 (*notify)(inp, errno);
2005 (void) in_pcb_checkstate(inp, WNT_RELEASE, 1);
2006 socket_unlock(inp->inp_socket, 1);
2007 }
2008 lck_rw_done(pcbinfo->ipi_lock);
2009 }
2010
2011 /*
2012 * Check for alternatives when higher level complains
2013 * about service problems. For now, invalidate cached
2014 * routing information. If the route was created dynamically
2015 * (by a redirect), time to try a default gateway again.
2016 */
2017 void
2018 in_losing(struct inpcb *inp)
2019 {
2020 boolean_t release = FALSE;
2021 struct rtentry *rt;
2022
2023 if ((rt = inp->inp_route.ro_rt) != NULL) {
2024 struct in_ifaddr *ia = NULL;
2025
2026 RT_LOCK(rt);
2027 if (rt->rt_flags & RTF_DYNAMIC) {
2028 /*
2029 * Prevent another thread from modifying rt_key,
2030 * rt_gateway via rt_setgate() after rt_lock is
2031 * dropped by marking the route as defunct.
2032 */
2033 rt->rt_flags |= RTF_CONDEMNED;
2034 RT_UNLOCK(rt);
2035 (void) rtrequest(RTM_DELETE, rt_key(rt),
2036 rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL);
2037 } else {
2038 RT_UNLOCK(rt);
2039 }
2040 /* if the address is gone keep the old route in the pcb */
2041 if (inp->inp_laddr.s_addr != INADDR_ANY &&
2042 (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
2043 /*
2044 * Address is around; ditch the route. A new route
2045 * can be allocated the next time output is attempted.
2046 */
2047 release = TRUE;
2048 }
2049 if (ia != NULL) {
2050 IFA_REMREF(&ia->ia_ifa);
2051 }
2052 }
2053 if (rt == NULL || release) {
2054 ROUTE_RELEASE(&inp->inp_route);
2055 }
2056 }
2057
2058 /*
2059 * After a routing change, flush old routing
2060 * and allocate a (hopefully) better one.
2061 */
2062 void
2063 in_rtchange(struct inpcb *inp, int errno)
2064 {
2065 #pragma unused(errno)
2066 boolean_t release = FALSE;
2067 struct rtentry *rt;
2068
2069 if ((rt = inp->inp_route.ro_rt) != NULL) {
2070 struct in_ifaddr *ia = NULL;
2071
2072 /* if address is gone, keep the old route */
2073 if (inp->inp_laddr.s_addr != INADDR_ANY &&
2074 (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
2075 /*
2076 * Address is around; ditch the route. A new route
2077 * can be allocated the next time output is attempted.
2078 */
2079 release = TRUE;
2080 }
2081 if (ia != NULL) {
2082 IFA_REMREF(&ia->ia_ifa);
2083 }
2084 }
2085 if (rt == NULL || release) {
2086 ROUTE_RELEASE(&inp->inp_route);
2087 }
2088 }
2089
2090 /*
2091 * Lookup a PCB based on the local address and port.
2092 */
2093 struct inpcb *
2094 in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
2095 unsigned int lport_arg, int wild_okay)
2096 {
2097 struct inpcb *inp;
2098 int matchwild = 3, wildcard;
2099 u_short lport = (u_short)lport_arg;
2100
2101 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0, 0, 0, 0, 0);
2102
2103 if (!wild_okay) {
2104 struct inpcbhead *head;
2105 /*
2106 * Look for an unconnected (wildcard foreign addr) PCB that
2107 * matches the local address and port we're looking for.
2108 */
2109 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
2110 pcbinfo->ipi_hashmask)];
2111 LIST_FOREACH(inp, head, inp_hash) {
2112 if (!(inp->inp_vflag & INP_IPV4)) {
2113 continue;
2114 }
2115 if (inp->inp_faddr.s_addr == INADDR_ANY &&
2116 inp->inp_laddr.s_addr == laddr.s_addr &&
2117 inp->inp_lport == lport) {
2118 /*
2119 * Found.
2120 */
2121 return inp;
2122 }
2123 }
2124 /*
2125 * Not found.
2126 */
2127 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0, 0, 0, 0, 0);
2128 return NULL;
2129 } else {
2130 struct inpcbporthead *porthash;
2131 struct inpcbport *phd;
2132 struct inpcb *match = NULL;
2133 /*
2134 * Best fit PCB lookup.
2135 *
2136 * First see if this local port is in use by looking on the
2137 * port hash list.
2138 */
2139 porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
2140 pcbinfo->ipi_porthashmask)];
2141 LIST_FOREACH(phd, porthash, phd_hash) {
2142 if (phd->phd_port == lport) {
2143 break;
2144 }
2145 }
2146 if (phd != NULL) {
2147 /*
2148 * Port is in use by one or more PCBs. Look for best
2149 * fit.
2150 */
2151 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
2152 wildcard = 0;
2153 if (!(inp->inp_vflag & INP_IPV4)) {
2154 continue;
2155 }
2156 if (inp->inp_faddr.s_addr != INADDR_ANY) {
2157 wildcard++;
2158 }
2159 if (inp->inp_laddr.s_addr != INADDR_ANY) {
2160 if (laddr.s_addr == INADDR_ANY) {
2161 wildcard++;
2162 } else if (inp->inp_laddr.s_addr !=
2163 laddr.s_addr) {
2164 continue;
2165 }
2166 } else {
2167 if (laddr.s_addr != INADDR_ANY) {
2168 wildcard++;
2169 }
2170 }
2171 if (wildcard < matchwild) {
2172 match = inp;
2173 matchwild = wildcard;
2174 if (matchwild == 0) {
2175 break;
2176 }
2177 }
2178 }
2179 }
2180 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match,
2181 0, 0, 0, 0);
2182 return match;
2183 }
2184 }
2185
2186 /*
2187 * Check if PCB exists in hash list.
2188 */
2189 int
2190 in_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2191 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
2192 uid_t *uid, gid_t *gid, struct ifnet *ifp)
2193 {
2194 struct inpcbhead *head;
2195 struct inpcb *inp;
2196 u_short fport = (u_short)fport_arg, lport = (u_short)lport_arg;
2197 int found = 0;
2198 struct inpcb *local_wild = NULL;
2199 struct inpcb *local_wild_mapped = NULL;
2200
2201 *uid = UID_MAX;
2202 *gid = GID_MAX;
2203
2204 /*
2205 * We may have found the pcb in the last lookup - check this first.
2206 */
2207
2208 lck_rw_lock_shared(pcbinfo->ipi_lock);
2209
2210 /*
2211 * First look for an exact match.
2212 */
2213 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
2214 pcbinfo->ipi_hashmask)];
2215 LIST_FOREACH(inp, head, inp_hash) {
2216 if (!(inp->inp_vflag & INP_IPV4)) {
2217 continue;
2218 }
2219 if (inp_restricted_recv(inp, ifp)) {
2220 continue;
2221 }
2222
2223 #if NECP
2224 if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2225 continue;
2226 }
2227 #endif /* NECP */
2228
2229 if (inp->inp_faddr.s_addr == faddr.s_addr &&
2230 inp->inp_laddr.s_addr == laddr.s_addr &&
2231 inp->inp_fport == fport &&
2232 inp->inp_lport == lport) {
2233 if ((found = (inp->inp_socket != NULL))) {
2234 /*
2235 * Found.
2236 */
2237 *uid = kauth_cred_getuid(
2238 inp->inp_socket->so_cred);
2239 *gid = kauth_cred_getgid(
2240 inp->inp_socket->so_cred);
2241 }
2242 lck_rw_done(pcbinfo->ipi_lock);
2243 return found;
2244 }
2245 }
2246
2247 if (!wildcard) {
2248 /*
2249 * Not found.
2250 */
2251 lck_rw_done(pcbinfo->ipi_lock);
2252 return 0;
2253 }
2254
2255 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
2256 pcbinfo->ipi_hashmask)];
2257 LIST_FOREACH(inp, head, inp_hash) {
2258 if (!(inp->inp_vflag & INP_IPV4)) {
2259 continue;
2260 }
2261 if (inp_restricted_recv(inp, ifp)) {
2262 continue;
2263 }
2264
2265 #if NECP
2266 if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2267 continue;
2268 }
2269 #endif /* NECP */
2270
2271 if (inp->inp_faddr.s_addr == INADDR_ANY &&
2272 inp->inp_lport == lport) {
2273 if (inp->inp_laddr.s_addr == laddr.s_addr) {
2274 if ((found = (inp->inp_socket != NULL))) {
2275 *uid = kauth_cred_getuid(
2276 inp->inp_socket->so_cred);
2277 *gid = kauth_cred_getgid(
2278 inp->inp_socket->so_cred);
2279 }
2280 lck_rw_done(pcbinfo->ipi_lock);
2281 return found;
2282 } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
2283 if (inp->inp_socket &&
2284 SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) {
2285 local_wild_mapped = inp;
2286 } else {
2287 local_wild = inp;
2288 }
2289 }
2290 }
2291 }
2292 if (local_wild == NULL) {
2293 if (local_wild_mapped != NULL) {
2294 if ((found = (local_wild_mapped->inp_socket != NULL))) {
2295 *uid = kauth_cred_getuid(
2296 local_wild_mapped->inp_socket->so_cred);
2297 *gid = kauth_cred_getgid(
2298 local_wild_mapped->inp_socket->so_cred);
2299 }
2300 lck_rw_done(pcbinfo->ipi_lock);
2301 return found;
2302 }
2303 lck_rw_done(pcbinfo->ipi_lock);
2304 return 0;
2305 }
2306 if ((found = (local_wild->inp_socket != NULL))) {
2307 *uid = kauth_cred_getuid(
2308 local_wild->inp_socket->so_cred);
2309 *gid = kauth_cred_getgid(
2310 local_wild->inp_socket->so_cred);
2311 }
2312 lck_rw_done(pcbinfo->ipi_lock);
2313 return found;
2314 }
2315
2316 /*
2317 * Lookup PCB in hash list.
2318 */
2319 struct inpcb *
2320 in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2321 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
2322 struct ifnet *ifp)
2323 {
2324 struct inpcbhead *head;
2325 struct inpcb *inp;
2326 u_short fport = (u_short)fport_arg, lport = (u_short)lport_arg;
2327 struct inpcb *local_wild = NULL;
2328 struct inpcb *local_wild_mapped = NULL;
2329
2330 /*
2331 * We may have found the pcb in the last lookup - check this first.
2332 */
2333
2334 lck_rw_lock_shared(pcbinfo->ipi_lock);
2335
2336 /*
2337 * First look for an exact match.
2338 */
2339 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
2340 pcbinfo->ipi_hashmask)];
2341 LIST_FOREACH(inp, head, inp_hash) {
2342 if (!(inp->inp_vflag & INP_IPV4)) {
2343 continue;
2344 }
2345 if (inp_restricted_recv(inp, ifp)) {
2346 continue;
2347 }
2348
2349 #if NECP
2350 if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2351 continue;
2352 }
2353 #endif /* NECP */
2354
2355 if (inp->inp_faddr.s_addr == faddr.s_addr &&
2356 inp->inp_laddr.s_addr == laddr.s_addr &&
2357 inp->inp_fport == fport &&
2358 inp->inp_lport == lport) {
2359 /*
2360 * Found.
2361 */
2362 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
2363 WNT_STOPUSING) {
2364 lck_rw_done(pcbinfo->ipi_lock);
2365 return inp;
2366 } else {
2367 /* it's there but dead, say it isn't found */
2368 lck_rw_done(pcbinfo->ipi_lock);
2369 return NULL;
2370 }
2371 }
2372 }
2373
2374 if (!wildcard) {
2375 /*
2376 * Not found.
2377 */
2378 lck_rw_done(pcbinfo->ipi_lock);
2379 return NULL;
2380 }
2381
2382 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
2383 pcbinfo->ipi_hashmask)];
2384 LIST_FOREACH(inp, head, inp_hash) {
2385 if (!(inp->inp_vflag & INP_IPV4)) {
2386 continue;
2387 }
2388 if (inp_restricted_recv(inp, ifp)) {
2389 continue;
2390 }
2391
2392 #if NECP
2393 if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2394 continue;
2395 }
2396 #endif /* NECP */
2397
2398 if (inp->inp_faddr.s_addr == INADDR_ANY &&
2399 inp->inp_lport == lport) {
2400 if (inp->inp_laddr.s_addr == laddr.s_addr) {
2401 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
2402 WNT_STOPUSING) {
2403 lck_rw_done(pcbinfo->ipi_lock);
2404 return inp;
2405 } else {
2406 /* it's dead; say it isn't found */
2407 lck_rw_done(pcbinfo->ipi_lock);
2408 return NULL;
2409 }
2410 } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
2411 if (SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) {
2412 local_wild_mapped = inp;
2413 } else {
2414 local_wild = inp;
2415 }
2416 }
2417 }
2418 }
2419 if (local_wild == NULL) {
2420 if (local_wild_mapped != NULL) {
2421 if (in_pcb_checkstate(local_wild_mapped,
2422 WNT_ACQUIRE, 0) != WNT_STOPUSING) {
2423 lck_rw_done(pcbinfo->ipi_lock);
2424 return local_wild_mapped;
2425 } else {
2426 /* it's dead; say it isn't found */
2427 lck_rw_done(pcbinfo->ipi_lock);
2428 return NULL;
2429 }
2430 }
2431 lck_rw_done(pcbinfo->ipi_lock);
2432 return NULL;
2433 }
2434 if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
2435 lck_rw_done(pcbinfo->ipi_lock);
2436 return local_wild;
2437 }
2438 /*
2439 * It's either not found or is already dead.
2440 */
2441 lck_rw_done(pcbinfo->ipi_lock);
2442 return NULL;
2443 }
2444
2445 /*
2446 * @brief Insert PCB onto various hash lists.
2447 *
2448 * @param inp Pointer to internet protocol control block
2449 * @param locked Implies if ipi_lock (protecting pcb list)
2450 * is already locked or not.
2451 *
2452 * @return int error on failure and 0 on success
2453 */
2454 int
2455 in_pcbinshash(struct inpcb *inp, int locked)
2456 {
2457 struct inpcbhead *pcbhash;
2458 struct inpcbporthead *pcbporthash;
2459 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
2460 struct inpcbport *phd;
2461 u_int32_t hashkey_faddr;
2462
2463 if (!locked) {
2464 if (!lck_rw_try_lock_exclusive(pcbinfo->ipi_lock)) {
2465 /*
2466 * Lock inversion issue, mostly with udp
2467 * multicast packets
2468 */
2469 socket_unlock(inp->inp_socket, 0);
2470 lck_rw_lock_exclusive(pcbinfo->ipi_lock);
2471 socket_lock(inp->inp_socket, 0);
2472 }
2473 }
2474
2475 /*
2476 * This routine or its caller may have given up
2477 * socket's protocol lock briefly.
2478 * During that time the socket may have been dropped.
2479 * Safe-guarding against that.
2480 */
2481 if (inp->inp_state == INPCB_STATE_DEAD) {
2482 if (!locked) {
2483 lck_rw_done(pcbinfo->ipi_lock);
2484 }
2485 return ECONNABORTED;
2486 }
2487
2488
2489 if (inp->inp_vflag & INP_IPV6) {
2490 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
2491 } else {
2492 hashkey_faddr = inp->inp_faddr.s_addr;
2493 }
2494
2495 inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
2496 inp->inp_fport, pcbinfo->ipi_hashmask);
2497
2498 pcbhash = &pcbinfo->ipi_hashbase[inp->inp_hash_element];
2499
2500 pcbporthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(inp->inp_lport,
2501 pcbinfo->ipi_porthashmask)];
2502
2503 /*
2504 * Go through port list and look for a head for this lport.
2505 */
2506 LIST_FOREACH(phd, pcbporthash, phd_hash) {
2507 if (phd->phd_port == inp->inp_lport) {
2508 break;
2509 }
2510 }
2511
2512 /*
2513 * If none exists, malloc one and tack it on.
2514 */
2515 if (phd == NULL) {
2516 MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport),
2517 M_PCB, M_WAITOK);
2518 if (phd == NULL) {
2519 if (!locked) {
2520 lck_rw_done(pcbinfo->ipi_lock);
2521 }
2522 return ENOBUFS; /* XXX */
2523 }
2524 phd->phd_port = inp->inp_lport;
2525 LIST_INIT(&phd->phd_pcblist);
2526 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
2527 }
2528
2529 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2530
2531
2532 inp->inp_phd = phd;
2533 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
2534 LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
2535 inp->inp_flags2 |= INP2_INHASHLIST;
2536
2537 if (!locked) {
2538 lck_rw_done(pcbinfo->ipi_lock);
2539 }
2540
2541 #if NECP
2542 // This call catches the original setting of the local address
2543 inp_update_necp_policy(inp, NULL, NULL, 0);
2544 #endif /* NECP */
2545
2546 return 0;
2547 }
2548
2549 /*
2550 * Move PCB to the proper hash bucket when { faddr, fport } have been
2551 * changed. NOTE: This does not handle the case of the lport changing (the
2552 * hashed port list would have to be updated as well), so the lport must
2553 * not change after in_pcbinshash() has been called.
2554 */
2555 void
2556 in_pcbrehash(struct inpcb *inp)
2557 {
2558 struct inpcbhead *head;
2559 u_int32_t hashkey_faddr;
2560
2561 if (inp->inp_vflag & INP_IPV6) {
2562 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
2563 } else {
2564 hashkey_faddr = inp->inp_faddr.s_addr;
2565 }
2566
2567 inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
2568 inp->inp_fport, inp->inp_pcbinfo->ipi_hashmask);
2569 head = &inp->inp_pcbinfo->ipi_hashbase[inp->inp_hash_element];
2570
2571 if (inp->inp_flags2 & INP2_INHASHLIST) {
2572 LIST_REMOVE(inp, inp_hash);
2573 inp->inp_flags2 &= ~INP2_INHASHLIST;
2574 }
2575
2576 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2577 LIST_INSERT_HEAD(head, inp, inp_hash);
2578 inp->inp_flags2 |= INP2_INHASHLIST;
2579
2580 #if NECP
2581 // This call catches updates to the remote addresses
2582 inp_update_necp_policy(inp, NULL, NULL, 0);
2583 #endif /* NECP */
2584 }
2585
2586 /*
2587 * Remove PCB from various lists.
2588 * Must be called pcbinfo lock is held in exclusive mode.
2589 */
2590 void
2591 in_pcbremlists(struct inpcb *inp)
2592 {
2593 inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
2594
2595 /*
2596 * Check if it's in hashlist -- an inp is placed in hashlist when
2597 * it's local port gets assigned. So it should also be present
2598 * in the port list.
2599 */
2600 if (inp->inp_flags2 & INP2_INHASHLIST) {
2601 struct inpcbport *phd = inp->inp_phd;
2602
2603 VERIFY(phd != NULL && inp->inp_lport > 0);
2604
2605 LIST_REMOVE(inp, inp_hash);
2606 inp->inp_hash.le_next = NULL;
2607 inp->inp_hash.le_prev = NULL;
2608
2609 LIST_REMOVE(inp, inp_portlist);
2610 inp->inp_portlist.le_next = NULL;
2611 inp->inp_portlist.le_prev = NULL;
2612 if (LIST_EMPTY(&phd->phd_pcblist)) {
2613 LIST_REMOVE(phd, phd_hash);
2614 FREE(phd, M_PCB);
2615 }
2616 inp->inp_phd = NULL;
2617 inp->inp_flags2 &= ~INP2_INHASHLIST;
2618 }
2619 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2620
2621 if (inp->inp_flags2 & INP2_TIMEWAIT) {
2622 /* Remove from time-wait queue */
2623 tcp_remove_from_time_wait(inp);
2624 inp->inp_flags2 &= ~INP2_TIMEWAIT;
2625 VERIFY(inp->inp_pcbinfo->ipi_twcount != 0);
2626 inp->inp_pcbinfo->ipi_twcount--;
2627 } else {
2628 /* Remove from global inp list if it is not time-wait */
2629 LIST_REMOVE(inp, inp_list);
2630 }
2631
2632 if (inp->inp_flags2 & INP2_IN_FCTREE) {
2633 inp_fc_getinp(inp->inp_flowhash, (INPFC_SOLOCKED | INPFC_REMOVE));
2634 VERIFY(!(inp->inp_flags2 & INP2_IN_FCTREE));
2635 }
2636
2637 inp->inp_pcbinfo->ipi_count--;
2638 }
2639
2640 /*
2641 * Mechanism used to defer the memory release of PCBs
2642 * The pcb list will contain the pcb until the reaper can clean it up if
2643 * the following conditions are met:
2644 * 1) state "DEAD",
2645 * 2) wantcnt is STOPUSING
2646 * 3) usecount is 0
2647 * This function will be called to either mark the pcb as
2648 */
2649 int
2650 in_pcb_checkstate(struct inpcb *pcb, int mode, int locked)
2651 {
2652 volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt;
2653 UInt32 origwant;
2654 UInt32 newwant;
2655
2656 switch (mode) {
2657 case WNT_STOPUSING:
2658 /*
2659 * Try to mark the pcb as ready for recycling. CAS with
2660 * STOPUSING, if success we're good, if it's in use, will
2661 * be marked later
2662 */
2663 if (locked == 0) {
2664 socket_lock(pcb->inp_socket, 1);
2665 }
2666 pcb->inp_state = INPCB_STATE_DEAD;
2667
2668 stopusing:
2669 if (pcb->inp_socket->so_usecount < 0) {
2670 panic("%s: pcb=%p so=%p usecount is negative\n",
2671 __func__, pcb, pcb->inp_socket);
2672 /* NOTREACHED */
2673 }
2674 if (locked == 0) {
2675 socket_unlock(pcb->inp_socket, 1);
2676 }
2677
2678 inpcb_gc_sched(pcb->inp_pcbinfo, INPCB_TIMER_FAST);
2679
2680 origwant = *wantcnt;
2681 if ((UInt16) origwant == 0xffff) { /* should stop using */
2682 return WNT_STOPUSING;
2683 }
2684 newwant = 0xffff;
2685 if ((UInt16) origwant == 0) {
2686 /* try to mark it as unsuable now */
2687 OSCompareAndSwap(origwant, newwant, wantcnt);
2688 }
2689 return WNT_STOPUSING;
2690
2691 case WNT_ACQUIRE:
2692 /*
2693 * Try to increase reference to pcb. If WNT_STOPUSING
2694 * should bail out. If socket state DEAD, try to set count
2695 * to STOPUSING, return failed otherwise increase cnt.
2696 */
2697 do {
2698 origwant = *wantcnt;
2699 if ((UInt16) origwant == 0xffff) {
2700 /* should stop using */
2701 return WNT_STOPUSING;
2702 }
2703 newwant = origwant + 1;
2704 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
2705 return WNT_ACQUIRE;
2706
2707 case WNT_RELEASE:
2708 /*
2709 * Release reference. If result is null and pcb state
2710 * is DEAD, set wanted bit to STOPUSING
2711 */
2712 if (locked == 0) {
2713 socket_lock(pcb->inp_socket, 1);
2714 }
2715
2716 do {
2717 origwant = *wantcnt;
2718 if ((UInt16) origwant == 0x0) {
2719 panic("%s: pcb=%p release with zero count",
2720 __func__, pcb);
2721 /* NOTREACHED */
2722 }
2723 if ((UInt16) origwant == 0xffff) {
2724 /* should stop using */
2725 if (locked == 0) {
2726 socket_unlock(pcb->inp_socket, 1);
2727 }
2728 return WNT_STOPUSING;
2729 }
2730 newwant = origwant - 1;
2731 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
2732
2733 if (pcb->inp_state == INPCB_STATE_DEAD) {
2734 goto stopusing;
2735 }
2736 if (pcb->inp_socket->so_usecount < 0) {
2737 panic("%s: RELEASE pcb=%p so=%p usecount is negative\n",
2738 __func__, pcb, pcb->inp_socket);
2739 /* NOTREACHED */
2740 }
2741
2742 if (locked == 0) {
2743 socket_unlock(pcb->inp_socket, 1);
2744 }
2745 return WNT_RELEASE;
2746
2747 default:
2748 panic("%s: so=%p not a valid state =%x\n", __func__,
2749 pcb->inp_socket, mode);
2750 /* NOTREACHED */
2751 }
2752
2753 /* NOTREACHED */
2754 return mode;
2755 }
2756
2757 /*
2758 * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat.
2759 * The inpcb_compat data structure is passed to user space and must
2760 * not change. We intentionally avoid copying pointers.
2761 */
2762 void
2763 inpcb_to_compat(struct inpcb *inp, struct inpcb_compat *inp_compat)
2764 {
2765 bzero(inp_compat, sizeof(*inp_compat));
2766 inp_compat->inp_fport = inp->inp_fport;
2767 inp_compat->inp_lport = inp->inp_lport;
2768 inp_compat->nat_owner = 0;
2769 inp_compat->nat_cookie = 0;
2770 inp_compat->inp_gencnt = inp->inp_gencnt;
2771 inp_compat->inp_flags = inp->inp_flags;
2772 inp_compat->inp_flow = inp->inp_flow;
2773 inp_compat->inp_vflag = inp->inp_vflag;
2774 inp_compat->inp_ip_ttl = inp->inp_ip_ttl;
2775 inp_compat->inp_ip_p = inp->inp_ip_p;
2776 inp_compat->inp_dependfaddr.inp6_foreign =
2777 inp->inp_dependfaddr.inp6_foreign;
2778 inp_compat->inp_dependladdr.inp6_local =
2779 inp->inp_dependladdr.inp6_local;
2780 inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
2781 inp_compat->inp_depend6.inp6_hlim = 0;
2782 inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
2783 inp_compat->inp_depend6.inp6_ifindex = 0;
2784 inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
2785 }
2786
2787 #if XNU_TARGET_OS_OSX
2788 void
2789 inpcb_to_xinpcb64(struct inpcb *inp, struct xinpcb64 *xinp)
2790 {
2791 xinp->inp_fport = inp->inp_fport;
2792 xinp->inp_lport = inp->inp_lport;
2793 xinp->inp_gencnt = inp->inp_gencnt;
2794 xinp->inp_flags = inp->inp_flags;
2795 xinp->inp_flow = inp->inp_flow;
2796 xinp->inp_vflag = inp->inp_vflag;
2797 xinp->inp_ip_ttl = inp->inp_ip_ttl;
2798 xinp->inp_ip_p = inp->inp_ip_p;
2799 xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign;
2800 xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local;
2801 xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
2802 xinp->inp_depend6.inp6_hlim = 0;
2803 xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
2804 xinp->inp_depend6.inp6_ifindex = 0;
2805 xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
2806 }
2807 #endif /* XNU_TARGET_OS_OSX */
2808
2809 /*
2810 * The following routines implement this scheme:
2811 *
2812 * Callers of ip_output() that intend to cache the route in the inpcb pass
2813 * a local copy of the struct route to ip_output(). Using a local copy of
2814 * the cached route significantly simplifies things as IP no longer has to
2815 * worry about having exclusive access to the passed in struct route, since
2816 * it's defined in the caller's stack; in essence, this allows for a lock-
2817 * less operation when updating the struct route at the IP level and below,
2818 * whenever necessary. The scheme works as follows:
2819 *
2820 * Prior to dropping the socket's lock and calling ip_output(), the caller
2821 * copies the struct route from the inpcb into its stack, and adds a reference
2822 * to the cached route entry, if there was any. The socket's lock is then
2823 * dropped and ip_output() is called with a pointer to the copy of struct
2824 * route defined on the stack (not to the one in the inpcb.)
2825 *
2826 * Upon returning from ip_output(), the caller then acquires the socket's
2827 * lock and synchronizes the cache; if there is no route cached in the inpcb,
2828 * it copies the local copy of struct route (which may or may not contain any
2829 * route) back into the cache; otherwise, if the inpcb has a route cached in
2830 * it, the one in the local copy will be freed, if there's any. Trashing the
2831 * cached route in the inpcb can be avoided because ip_output() is single-
2832 * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized
2833 * by the socket/transport layer.)
2834 */
2835 void
2836 inp_route_copyout(struct inpcb *inp, struct route *dst)
2837 {
2838 struct route *src = &inp->inp_route;
2839
2840 socket_lock_assert_owned(inp->inp_socket);
2841
2842 /*
2843 * If the route in the PCB is stale or not for IPv4, blow it away;
2844 * this is possible in the case of IPv4-mapped address case.
2845 */
2846 if (ROUTE_UNUSABLE(src) || rt_key(src->ro_rt)->sa_family != AF_INET) {
2847 ROUTE_RELEASE(src);
2848 }
2849
2850 route_copyout(dst, src, sizeof(*dst));
2851 }
2852
2853 void
2854 inp_route_copyin(struct inpcb *inp, struct route *src)
2855 {
2856 struct route *dst = &inp->inp_route;
2857
2858 socket_lock_assert_owned(inp->inp_socket);
2859
2860 /* Minor sanity check */
2861 if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET) {
2862 panic("%s: wrong or corrupted route: %p", __func__, src);
2863 }
2864
2865 route_copyin(src, dst, sizeof(*src));
2866 }
2867
2868 /*
2869 * Handler for setting IP_BOUND_IF/IPV6_BOUND_IF socket option.
2870 */
2871 int
2872 inp_bindif(struct inpcb *inp, unsigned int ifscope, struct ifnet **pifp)
2873 {
2874 struct ifnet *ifp = NULL;
2875
2876 ifnet_head_lock_shared();
2877 if ((ifscope > (unsigned)if_index) || (ifscope != IFSCOPE_NONE &&
2878 (ifp = ifindex2ifnet[ifscope]) == NULL)) {
2879 ifnet_head_done();
2880 return ENXIO;
2881 }
2882 ifnet_head_done();
2883
2884 VERIFY(ifp != NULL || ifscope == IFSCOPE_NONE);
2885
2886 /*
2887 * A zero interface scope value indicates an "unbind".
2888 * Otherwise, take in whatever value the app desires;
2889 * the app may already know the scope (or force itself
2890 * to such a scope) ahead of time before the interface
2891 * gets attached. It doesn't matter either way; any
2892 * route lookup from this point on will require an
2893 * exact match for the embedded interface scope.
2894 */
2895 inp->inp_boundifp = ifp;
2896 if (inp->inp_boundifp == NULL) {
2897 inp->inp_flags &= ~INP_BOUND_IF;
2898 } else {
2899 inp->inp_flags |= INP_BOUND_IF;
2900 }
2901
2902 /* Blow away any cached route in the PCB */
2903 ROUTE_RELEASE(&inp->inp_route);
2904
2905 if (pifp != NULL) {
2906 *pifp = ifp;
2907 }
2908
2909 return 0;
2910 }
2911
2912 /*
2913 * Handler for setting IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
2914 * as well as for setting PROC_UUID_NO_CELLULAR policy.
2915 */
2916 void
2917 inp_set_nocellular(struct inpcb *inp)
2918 {
2919 inp->inp_flags |= INP_NO_IFT_CELLULAR;
2920
2921 /* Blow away any cached route in the PCB */
2922 ROUTE_RELEASE(&inp->inp_route);
2923 }
2924
2925 /*
2926 * Handler for clearing IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
2927 * as well as for clearing PROC_UUID_NO_CELLULAR policy.
2928 */
2929 void
2930 inp_clear_nocellular(struct inpcb *inp)
2931 {
2932 struct socket *so = inp->inp_socket;
2933
2934 /*
2935 * SO_RESTRICT_DENY_CELLULAR socket restriction issued on the socket
2936 * has a higher precendence than INP_NO_IFT_CELLULAR. Clear the flag
2937 * if and only if the socket is unrestricted.
2938 */
2939 if (so != NULL && !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) {
2940 inp->inp_flags &= ~INP_NO_IFT_CELLULAR;
2941
2942 /* Blow away any cached route in the PCB */
2943 ROUTE_RELEASE(&inp->inp_route);
2944 }
2945 }
2946
2947 void
2948 inp_set_noexpensive(struct inpcb *inp)
2949 {
2950 inp->inp_flags2 |= INP2_NO_IFF_EXPENSIVE;
2951
2952 /* Blow away any cached route in the PCB */
2953 ROUTE_RELEASE(&inp->inp_route);
2954 }
2955
2956 void
2957 inp_set_noconstrained(struct inpcb *inp)
2958 {
2959 inp->inp_flags2 |= INP2_NO_IFF_CONSTRAINED;
2960
2961 /* Blow away any cached route in the PCB */
2962 ROUTE_RELEASE(&inp->inp_route);
2963 }
2964
2965 void
2966 inp_set_awdl_unrestricted(struct inpcb *inp)
2967 {
2968 inp->inp_flags2 |= INP2_AWDL_UNRESTRICTED;
2969
2970 /* Blow away any cached route in the PCB */
2971 ROUTE_RELEASE(&inp->inp_route);
2972 }
2973
2974 boolean_t
2975 inp_get_awdl_unrestricted(struct inpcb *inp)
2976 {
2977 return (inp->inp_flags2 & INP2_AWDL_UNRESTRICTED) ? TRUE : FALSE;
2978 }
2979
2980 void
2981 inp_clear_awdl_unrestricted(struct inpcb *inp)
2982 {
2983 inp->inp_flags2 &= ~INP2_AWDL_UNRESTRICTED;
2984
2985 /* Blow away any cached route in the PCB */
2986 ROUTE_RELEASE(&inp->inp_route);
2987 }
2988
2989 void
2990 inp_set_intcoproc_allowed(struct inpcb *inp)
2991 {
2992 inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED;
2993
2994 /* Blow away any cached route in the PCB */
2995 ROUTE_RELEASE(&inp->inp_route);
2996 }
2997
2998 boolean_t
2999 inp_get_intcoproc_allowed(struct inpcb *inp)
3000 {
3001 return (inp->inp_flags2 & INP2_INTCOPROC_ALLOWED) ? TRUE : FALSE;
3002 }
3003
3004 void
3005 inp_clear_intcoproc_allowed(struct inpcb *inp)
3006 {
3007 inp->inp_flags2 &= ~INP2_INTCOPROC_ALLOWED;
3008
3009 /* Blow away any cached route in the PCB */
3010 ROUTE_RELEASE(&inp->inp_route);
3011 }
3012
3013 #if NECP
3014 /*
3015 * Called when PROC_UUID_NECP_APP_POLICY is set.
3016 */
3017 void
3018 inp_set_want_app_policy(struct inpcb *inp)
3019 {
3020 inp->inp_flags2 |= INP2_WANT_APP_POLICY;
3021 }
3022
3023 /*
3024 * Called when PROC_UUID_NECP_APP_POLICY is cleared.
3025 */
3026 void
3027 inp_clear_want_app_policy(struct inpcb *inp)
3028 {
3029 inp->inp_flags2 &= ~INP2_WANT_APP_POLICY;
3030 }
3031 #endif /* NECP */
3032
3033 /*
3034 * Calculate flow hash for an inp, used by an interface to identify a
3035 * flow. When an interface provides flow control advisory, this flow
3036 * hash is used as an identifier.
3037 */
3038 u_int32_t
3039 inp_calc_flowhash(struct inpcb *inp)
3040 {
3041 struct inp_flowhash_key fh __attribute__((aligned(8)));
3042 u_int32_t flowhash = 0;
3043 struct inpcb *tmp_inp = NULL;
3044
3045 if (inp_hash_seed == 0) {
3046 inp_hash_seed = RandomULong();
3047 }
3048
3049 bzero(&fh, sizeof(fh));
3050
3051 bcopy(&inp->inp_dependladdr, &fh.infh_laddr, sizeof(fh.infh_laddr));
3052 bcopy(&inp->inp_dependfaddr, &fh.infh_faddr, sizeof(fh.infh_faddr));
3053
3054 fh.infh_lport = inp->inp_lport;
3055 fh.infh_fport = inp->inp_fport;
3056 fh.infh_af = (inp->inp_vflag & INP_IPV6) ? AF_INET6 : AF_INET;
3057 fh.infh_proto = inp->inp_ip_p;
3058 fh.infh_rand1 = RandomULong();
3059 fh.infh_rand2 = RandomULong();
3060
3061 try_again:
3062 flowhash = net_flowhash(&fh, sizeof(fh), inp_hash_seed);
3063 if (flowhash == 0) {
3064 /* try to get a non-zero flowhash */
3065 inp_hash_seed = RandomULong();
3066 goto try_again;
3067 }
3068
3069 inp->inp_flowhash = flowhash;
3070
3071 /* Insert the inp into inp_fc_tree */
3072 lck_mtx_lock_spin(&inp_fc_lck);
3073 tmp_inp = RB_FIND(inp_fc_tree, &inp_fc_tree, inp);
3074 if (tmp_inp != NULL) {
3075 /*
3076 * There is a different inp with the same flowhash.
3077 * There can be a collision on flow hash but the
3078 * probability is low. Let's recompute the
3079 * flowhash.
3080 */
3081 lck_mtx_unlock(&inp_fc_lck);
3082 /* recompute hash seed */
3083 inp_hash_seed = RandomULong();
3084 goto try_again;
3085 }
3086
3087 RB_INSERT(inp_fc_tree, &inp_fc_tree, inp);
3088 inp->inp_flags2 |= INP2_IN_FCTREE;
3089 lck_mtx_unlock(&inp_fc_lck);
3090
3091 return flowhash;
3092 }
3093
3094 void
3095 inp_flowadv(uint32_t flowhash)
3096 {
3097 struct inpcb *inp;
3098
3099 inp = inp_fc_getinp(flowhash, 0);
3100
3101 if (inp == NULL) {
3102 return;
3103 }
3104 inp_fc_feedback(inp);
3105 }
3106
3107 /*
3108 * Function to compare inp_fc_entries in inp flow control tree
3109 */
3110 static inline int
3111 infc_cmp(const struct inpcb *inp1, const struct inpcb *inp2)
3112 {
3113 return memcmp(&(inp1->inp_flowhash), &(inp2->inp_flowhash),
3114 sizeof(inp1->inp_flowhash));
3115 }
3116
3117 static struct inpcb *
3118 inp_fc_getinp(u_int32_t flowhash, u_int32_t flags)
3119 {
3120 struct inpcb *inp = NULL;
3121 int locked = (flags & INPFC_SOLOCKED) ? 1 : 0;
3122
3123 lck_mtx_lock_spin(&inp_fc_lck);
3124 key_inp.inp_flowhash = flowhash;
3125 inp = RB_FIND(inp_fc_tree, &inp_fc_tree, &key_inp);
3126 if (inp == NULL) {
3127 /* inp is not present, return */
3128 lck_mtx_unlock(&inp_fc_lck);
3129 return NULL;
3130 }
3131
3132 if (flags & INPFC_REMOVE) {
3133 RB_REMOVE(inp_fc_tree, &inp_fc_tree, inp);
3134 lck_mtx_unlock(&inp_fc_lck);
3135
3136 bzero(&(inp->infc_link), sizeof(inp->infc_link));
3137 inp->inp_flags2 &= ~INP2_IN_FCTREE;
3138 return NULL;
3139 }
3140
3141 if (in_pcb_checkstate(inp, WNT_ACQUIRE, locked) == WNT_STOPUSING) {
3142 inp = NULL;
3143 }
3144 lck_mtx_unlock(&inp_fc_lck);
3145
3146 return inp;
3147 }
3148
3149 static void
3150 inp_fc_feedback(struct inpcb *inp)
3151 {
3152 struct socket *so = inp->inp_socket;
3153
3154 /* we already hold a want_cnt on this inp, socket can't be null */
3155 VERIFY(so != NULL);
3156 socket_lock(so, 1);
3157
3158 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
3159 socket_unlock(so, 1);
3160 return;
3161 }
3162
3163 if (inp->inp_sndinprog_cnt > 0) {
3164 inp->inp_flags |= INP_FC_FEEDBACK;
3165 }
3166
3167 /*
3168 * Return if the connection is not in flow-controlled state.
3169 * This can happen if the connection experienced
3170 * loss while it was in flow controlled state
3171 */
3172 if (!INP_WAIT_FOR_IF_FEEDBACK(inp)) {
3173 socket_unlock(so, 1);
3174 return;
3175 }
3176 inp_reset_fc_state(inp);
3177
3178 if (SOCK_TYPE(so) == SOCK_STREAM) {
3179 inp_fc_unthrottle_tcp(inp);
3180 }
3181
3182 socket_unlock(so, 1);
3183 }
3184
3185 void
3186 inp_reset_fc_state(struct inpcb *inp)
3187 {
3188 struct socket *so = inp->inp_socket;
3189 int suspended = (INP_IS_FLOW_SUSPENDED(inp)) ? 1 : 0;
3190 int needwakeup = (INP_WAIT_FOR_IF_FEEDBACK(inp)) ? 1 : 0;
3191
3192 inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
3193
3194 if (suspended) {
3195 so->so_flags &= ~(SOF_SUSPENDED);
3196 soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_RESUME));
3197 }
3198
3199 /* Give a write wakeup to unblock the socket */
3200 if (needwakeup) {
3201 sowwakeup(so);
3202 }
3203 }
3204
3205 int
3206 inp_set_fc_state(struct inpcb *inp, int advcode)
3207 {
3208 boolean_t is_flow_controlled = INP_WAIT_FOR_IF_FEEDBACK(inp);
3209 struct inpcb *tmp_inp = NULL;
3210 /*
3211 * If there was a feedback from the interface when
3212 * send operation was in progress, we should ignore
3213 * this flow advisory to avoid a race between setting
3214 * flow controlled state and receiving feedback from
3215 * the interface
3216 */
3217 if (inp->inp_flags & INP_FC_FEEDBACK) {
3218 return 0;
3219 }
3220
3221 inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
3222 if ((tmp_inp = inp_fc_getinp(inp->inp_flowhash,
3223 INPFC_SOLOCKED)) != NULL) {
3224 if (in_pcb_checkstate(tmp_inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
3225 return 0;
3226 }
3227 VERIFY(tmp_inp == inp);
3228 switch (advcode) {
3229 case FADV_FLOW_CONTROLLED:
3230 inp->inp_flags |= INP_FLOW_CONTROLLED;
3231 break;
3232 case FADV_SUSPENDED:
3233 inp->inp_flags |= INP_FLOW_SUSPENDED;
3234 soevent(inp->inp_socket,
3235 (SO_FILT_HINT_LOCKED | SO_FILT_HINT_SUSPEND));
3236
3237 /* Record the fact that suspend event was sent */
3238 inp->inp_socket->so_flags |= SOF_SUSPENDED;
3239 break;
3240 }
3241
3242 if (!is_flow_controlled && SOCK_TYPE(inp->inp_socket) == SOCK_STREAM) {
3243 inp_fc_throttle_tcp(inp);
3244 }
3245 return 1;
3246 }
3247 return 0;
3248 }
3249
3250 /*
3251 * Handler for SO_FLUSH socket option.
3252 */
3253 int
3254 inp_flush(struct inpcb *inp, int optval)
3255 {
3256 u_int32_t flowhash = inp->inp_flowhash;
3257 struct ifnet *rtifp, *oifp;
3258
3259 /* Either all classes or one of the valid ones */
3260 if (optval != SO_TC_ALL && !SO_VALID_TC(optval)) {
3261 return EINVAL;
3262 }
3263
3264 /* We need a flow hash for identification */
3265 if (flowhash == 0) {
3266 return 0;
3267 }
3268
3269 /* Grab the interfaces from the route and pcb */
3270 rtifp = ((inp->inp_route.ro_rt != NULL) ?
3271 inp->inp_route.ro_rt->rt_ifp : NULL);
3272 oifp = inp->inp_last_outifp;
3273
3274 if (rtifp != NULL) {
3275 if_qflush_sc(rtifp, so_tc2msc(optval), flowhash, NULL, NULL, 0);
3276 }
3277 if (oifp != NULL && oifp != rtifp) {
3278 if_qflush_sc(oifp, so_tc2msc(optval), flowhash, NULL, NULL, 0);
3279 }
3280
3281 return 0;
3282 }
3283
3284 /*
3285 * Clear the INP_INADDR_ANY flag (special case for PPP only)
3286 */
3287 void
3288 inp_clear_INP_INADDR_ANY(struct socket *so)
3289 {
3290 struct inpcb *inp = NULL;
3291
3292 socket_lock(so, 1);
3293 inp = sotoinpcb(so);
3294 if (inp) {
3295 inp->inp_flags &= ~INP_INADDR_ANY;
3296 }
3297 socket_unlock(so, 1);
3298 }
3299
3300 void
3301 inp_get_soprocinfo(struct inpcb *inp, struct so_procinfo *soprocinfo)
3302 {
3303 struct socket *so = inp->inp_socket;
3304
3305 soprocinfo->spi_pid = so->last_pid;
3306 strlcpy(&soprocinfo->spi_proc_name[0], &inp->inp_last_proc_name[0],
3307 sizeof(soprocinfo->spi_proc_name));
3308 if (so->last_pid != 0) {
3309 uuid_copy(soprocinfo->spi_uuid, so->last_uuid);
3310 }
3311 /*
3312 * When not delegated, the effective pid is the same as the real pid
3313 */
3314 if (so->so_flags & SOF_DELEGATED) {
3315 soprocinfo->spi_delegated = 1;
3316 soprocinfo->spi_epid = so->e_pid;
3317 uuid_copy(soprocinfo->spi_euuid, so->e_uuid);
3318 } else {
3319 soprocinfo->spi_delegated = 0;
3320 soprocinfo->spi_epid = so->last_pid;
3321 }
3322 strlcpy(&soprocinfo->spi_e_proc_name[0], &inp->inp_e_proc_name[0],
3323 sizeof(soprocinfo->spi_e_proc_name));
3324 }
3325
3326 int
3327 inp_findinpcb_procinfo(struct inpcbinfo *pcbinfo, uint32_t flowhash,
3328 struct so_procinfo *soprocinfo)
3329 {
3330 struct inpcb *inp = NULL;
3331 int found = 0;
3332
3333 bzero(soprocinfo, sizeof(struct so_procinfo));
3334
3335 if (!flowhash) {
3336 return -1;
3337 }
3338
3339 lck_rw_lock_shared(pcbinfo->ipi_lock);
3340 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
3341 if (inp->inp_state != INPCB_STATE_DEAD &&
3342 inp->inp_socket != NULL &&
3343 inp->inp_flowhash == flowhash) {
3344 found = 1;
3345 inp_get_soprocinfo(inp, soprocinfo);
3346 break;
3347 }
3348 }
3349 lck_rw_done(pcbinfo->ipi_lock);
3350
3351 return found;
3352 }
3353
3354 #if CONFIG_PROC_UUID_POLICY
3355 static void
3356 inp_update_cellular_policy(struct inpcb *inp, boolean_t set)
3357 {
3358 struct socket *so = inp->inp_socket;
3359 int before, after;
3360
3361 VERIFY(so != NULL);
3362 VERIFY(inp->inp_state != INPCB_STATE_DEAD);
3363
3364 before = INP_NO_CELLULAR(inp);
3365 if (set) {
3366 inp_set_nocellular(inp);
3367 } else {
3368 inp_clear_nocellular(inp);
3369 }
3370 after = INP_NO_CELLULAR(inp);
3371 if (net_io_policy_log && (before != after)) {
3372 static const char *ok = "OK";
3373 static const char *nok = "NOACCESS";
3374 uuid_string_t euuid_buf;
3375 pid_t epid;
3376
3377 if (so->so_flags & SOF_DELEGATED) {
3378 uuid_unparse(so->e_uuid, euuid_buf);
3379 epid = so->e_pid;
3380 } else {
3381 uuid_unparse(so->last_uuid, euuid_buf);
3382 epid = so->last_pid;
3383 }
3384
3385 /* allow this socket to generate another notification event */
3386 so->so_ifdenied_notifies = 0;
3387
3388 log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d "
3389 "euuid %s%s %s->%s\n", __func__,
3390 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
3391 SOCK_TYPE(so), epid, euuid_buf,
3392 (so->so_flags & SOF_DELEGATED) ?
3393 " [delegated]" : "",
3394 ((before < after) ? ok : nok),
3395 ((before < after) ? nok : ok));
3396 }
3397 }
3398
3399 #if NECP
3400 static void
3401 inp_update_necp_want_app_policy(struct inpcb *inp, boolean_t set)
3402 {
3403 struct socket *so = inp->inp_socket;
3404 int before, after;
3405
3406 VERIFY(so != NULL);
3407 VERIFY(inp->inp_state != INPCB_STATE_DEAD);
3408
3409 before = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
3410 if (set) {
3411 inp_set_want_app_policy(inp);
3412 } else {
3413 inp_clear_want_app_policy(inp);
3414 }
3415 after = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
3416 if (net_io_policy_log && (before != after)) {
3417 static const char *wanted = "WANTED";
3418 static const char *unwanted = "UNWANTED";
3419 uuid_string_t euuid_buf;
3420 pid_t epid;
3421
3422 if (so->so_flags & SOF_DELEGATED) {
3423 uuid_unparse(so->e_uuid, euuid_buf);
3424 epid = so->e_pid;
3425 } else {
3426 uuid_unparse(so->last_uuid, euuid_buf);
3427 epid = so->last_pid;
3428 }
3429
3430 log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d "
3431 "euuid %s%s %s->%s\n", __func__,
3432 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
3433 SOCK_TYPE(so), epid, euuid_buf,
3434 (so->so_flags & SOF_DELEGATED) ?
3435 " [delegated]" : "",
3436 ((before < after) ? unwanted : wanted),
3437 ((before < after) ? wanted : unwanted));
3438 }
3439 }
3440 #endif /* NECP */
3441 #endif /* !CONFIG_PROC_UUID_POLICY */
3442
3443 #if NECP
3444 void
3445 inp_update_necp_policy(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int override_bound_interface)
3446 {
3447 necp_socket_find_policy_match(inp, override_local_addr, override_remote_addr, override_bound_interface);
3448 if (necp_socket_should_rescope(inp) &&
3449 inp->inp_lport == 0 &&
3450 inp->inp_laddr.s_addr == INADDR_ANY &&
3451 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
3452 // If we should rescope, and the socket is not yet bound
3453 inp_bindif(inp, necp_socket_get_rescope_if_index(inp), NULL);
3454 }
3455 }
3456 #endif /* NECP */
3457
3458 int
3459 inp_update_policy(struct inpcb *inp)
3460 {
3461 #if CONFIG_PROC_UUID_POLICY
3462 struct socket *so = inp->inp_socket;
3463 uint32_t pflags = 0;
3464 int32_t ogencnt;
3465 int err = 0;
3466 uint8_t *lookup_uuid = NULL;
3467
3468 if (!net_io_policy_uuid ||
3469 so == NULL || inp->inp_state == INPCB_STATE_DEAD) {
3470 return 0;
3471 }
3472
3473 /*
3474 * Kernel-created sockets that aren't delegating other sockets
3475 * are currently exempted from UUID policy checks.
3476 */
3477 if (so->last_pid == 0 && !(so->so_flags & SOF_DELEGATED)) {
3478 return 0;
3479 }
3480
3481 #if defined(XNU_TARGET_OS_OSX)
3482 if (so->so_rpid > 0) {
3483 lookup_uuid = so->so_ruuid;
3484 ogencnt = so->so_policy_gencnt;
3485 err = proc_uuid_policy_lookup(lookup_uuid, &pflags, &so->so_policy_gencnt);
3486 }
3487 #endif
3488 if (lookup_uuid == NULL || err == ENOENT) {
3489 lookup_uuid = ((so->so_flags & SOF_DELEGATED) ? so->e_uuid : so->last_uuid);
3490 ogencnt = so->so_policy_gencnt;
3491 err = proc_uuid_policy_lookup(lookup_uuid, &pflags, &so->so_policy_gencnt);
3492 }
3493
3494 /*
3495 * Discard cached generation count if the entry is gone (ENOENT),
3496 * so that we go thru the checks below.
3497 */
3498 if (err == ENOENT && ogencnt != 0) {
3499 so->so_policy_gencnt = 0;
3500 }
3501
3502 /*
3503 * If the generation count has changed, inspect the policy flags
3504 * and act accordingly. If a policy flag was previously set and
3505 * the UUID is no longer present in the table (ENOENT), treat it
3506 * as if the flag has been cleared.
3507 */
3508 if ((err == 0 || err == ENOENT) && ogencnt != so->so_policy_gencnt) {
3509 /* update cellular policy for this socket */
3510 if (err == 0 && (pflags & PROC_UUID_NO_CELLULAR)) {
3511 inp_update_cellular_policy(inp, TRUE);
3512 } else if (!(pflags & PROC_UUID_NO_CELLULAR)) {
3513 inp_update_cellular_policy(inp, FALSE);
3514 }
3515 #if NECP
3516 /* update necp want app policy for this socket */
3517 if (err == 0 && (pflags & PROC_UUID_NECP_APP_POLICY)) {
3518 inp_update_necp_want_app_policy(inp, TRUE);
3519 } else if (!(pflags & PROC_UUID_NECP_APP_POLICY)) {
3520 inp_update_necp_want_app_policy(inp, FALSE);
3521 }
3522 #endif /* NECP */
3523 }
3524
3525 return (err == ENOENT) ? 0 : err;
3526 #else /* !CONFIG_PROC_UUID_POLICY */
3527 #pragma unused(inp)
3528 return 0;
3529 #endif /* !CONFIG_PROC_UUID_POLICY */
3530 }
3531
3532 static unsigned int log_restricted;
3533 SYSCTL_DECL(_net_inet);
3534 SYSCTL_INT(_net_inet, OID_AUTO, log_restricted,
3535 CTLFLAG_RW | CTLFLAG_LOCKED, &log_restricted, 0,
3536 "Log network restrictions");
3537 /*
3538 * Called when we need to enforce policy restrictions in the input path.
3539 *
3540 * Returns TRUE if we're not allowed to receive data, otherwise FALSE.
3541 */
3542 static boolean_t
3543 _inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp)
3544 {
3545 VERIFY(inp != NULL);
3546
3547 /*
3548 * Inbound restrictions.
3549 */
3550 if (!sorestrictrecv) {
3551 return FALSE;
3552 }
3553
3554 if (ifp == NULL) {
3555 return FALSE;
3556 }
3557
3558 if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp)) {
3559 return TRUE;
3560 }
3561
3562 if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp)) {
3563 return TRUE;
3564 }
3565
3566 if (IFNET_IS_CONSTRAINED(ifp) && INP_NO_CONSTRAINED(inp)) {
3567 return TRUE;
3568 }
3569
3570 if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp)) {
3571 return TRUE;
3572 }
3573
3574 if (!(ifp->if_eflags & IFEF_RESTRICTED_RECV)) {
3575 return FALSE;
3576 }
3577
3578 if (inp->inp_flags & INP_RECV_ANYIF) {
3579 return FALSE;
3580 }
3581
3582 if ((inp->inp_flags & INP_BOUND_IF) && inp->inp_boundifp == ifp) {
3583 return FALSE;
3584 }
3585
3586 if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp)) {
3587 return TRUE;
3588 }
3589
3590 return TRUE;
3591 }
3592
3593 boolean_t
3594 inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp)
3595 {
3596 boolean_t ret;
3597
3598 ret = _inp_restricted_recv(inp, ifp);
3599 if (ret == TRUE && log_restricted) {
3600 printf("pid %d (%s) is unable to receive packets on %s\n",
3601 current_proc()->p_pid, proc_best_name(current_proc()),
3602 ifp->if_xname);
3603 }
3604 return ret;
3605 }
3606
3607 /*
3608 * Called when we need to enforce policy restrictions in the output path.
3609 *
3610 * Returns TRUE if we're not allowed to send data out, otherwise FALSE.
3611 */
3612 static boolean_t
3613 _inp_restricted_send(struct inpcb *inp, struct ifnet *ifp)
3614 {
3615 VERIFY(inp != NULL);
3616
3617 /*
3618 * Outbound restrictions.
3619 */
3620 if (!sorestrictsend) {
3621 return FALSE;
3622 }
3623
3624 if (ifp == NULL) {
3625 return FALSE;
3626 }
3627
3628 if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp)) {
3629 return TRUE;
3630 }
3631
3632 if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp)) {
3633 return TRUE;
3634 }
3635
3636 if (IFNET_IS_CONSTRAINED(ifp) && INP_NO_CONSTRAINED(inp)) {
3637 return TRUE;
3638 }
3639
3640 if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp)) {
3641 return TRUE;
3642 }
3643
3644 if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp)) {
3645 return TRUE;
3646 }
3647
3648 return FALSE;
3649 }
3650
3651 boolean_t
3652 inp_restricted_send(struct inpcb *inp, struct ifnet *ifp)
3653 {
3654 boolean_t ret;
3655
3656 ret = _inp_restricted_send(inp, ifp);
3657 if (ret == TRUE && log_restricted) {
3658 printf("pid %d (%s) is unable to transmit packets on %s\n",
3659 current_proc()->p_pid, proc_best_name(current_proc()),
3660 ifp->if_xname);
3661 }
3662 return ret;
3663 }
3664
3665 inline void
3666 inp_count_sndbytes(struct inpcb *inp, u_int32_t th_ack)
3667 {
3668 struct ifnet *ifp = inp->inp_last_outifp;
3669 struct socket *so = inp->inp_socket;
3670 if (ifp != NULL && !(so->so_flags & SOF_MP_SUBFLOW) &&
3671 (ifp->if_type == IFT_CELLULAR || IFNET_IS_WIFI(ifp))) {
3672 int32_t unsent;
3673
3674 so->so_snd.sb_flags |= SB_SNDBYTE_CNT;
3675
3676 /*
3677 * There can be data outstanding before the connection
3678 * becomes established -- TFO case
3679 */
3680 if (so->so_snd.sb_cc > 0) {
3681 inp_incr_sndbytes_total(so, so->so_snd.sb_cc);
3682 }
3683
3684 unsent = inp_get_sndbytes_allunsent(so, th_ack);
3685 if (unsent > 0) {
3686 inp_incr_sndbytes_unsent(so, unsent);
3687 }
3688 }
3689 }
3690
3691 inline void
3692 inp_incr_sndbytes_total(struct socket *so, int32_t len)
3693 {
3694 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3695 struct ifnet *ifp = inp->inp_last_outifp;
3696
3697 if (ifp != NULL) {
3698 VERIFY(ifp->if_sndbyte_total >= 0);
3699 OSAddAtomic64(len, &ifp->if_sndbyte_total);
3700 }
3701 }
3702
3703 inline void
3704 inp_decr_sndbytes_total(struct socket *so, int32_t len)
3705 {
3706 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3707 struct ifnet *ifp = inp->inp_last_outifp;
3708
3709 if (ifp != NULL) {
3710 VERIFY(ifp->if_sndbyte_total >= len);
3711 OSAddAtomic64(-len, &ifp->if_sndbyte_total);
3712 }
3713 }
3714
3715 inline void
3716 inp_incr_sndbytes_unsent(struct socket *so, int32_t len)
3717 {
3718 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3719 struct ifnet *ifp = inp->inp_last_outifp;
3720
3721 if (ifp != NULL) {
3722 VERIFY(ifp->if_sndbyte_unsent >= 0);
3723 OSAddAtomic64(len, &ifp->if_sndbyte_unsent);
3724 }
3725 }
3726
3727 inline void
3728 inp_decr_sndbytes_unsent(struct socket *so, int32_t len)
3729 {
3730 if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT)) {
3731 return;
3732 }
3733
3734 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3735 struct ifnet *ifp = inp->inp_last_outifp;
3736
3737 if (ifp != NULL) {
3738 if (ifp->if_sndbyte_unsent >= len) {
3739 OSAddAtomic64(-len, &ifp->if_sndbyte_unsent);
3740 } else {
3741 ifp->if_sndbyte_unsent = 0;
3742 }
3743 }
3744 }
3745
3746 inline void
3747 inp_decr_sndbytes_allunsent(struct socket *so, u_int32_t th_ack)
3748 {
3749 int32_t len;
3750
3751 if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT)) {
3752 return;
3753 }
3754
3755 len = inp_get_sndbytes_allunsent(so, th_ack);
3756 inp_decr_sndbytes_unsent(so, len);
3757 }
3758
3759
3760 inline void
3761 inp_set_activity_bitmap(struct inpcb *inp)
3762 {
3763 in_stat_set_activity_bitmap(&inp->inp_nw_activity, net_uptime());
3764 }
3765
3766 inline void
3767 inp_get_activity_bitmap(struct inpcb *inp, activity_bitmap_t *ab)
3768 {
3769 bcopy(&inp->inp_nw_activity, ab, sizeof(*ab));
3770 }
3771
3772 void
3773 inp_update_last_owner(struct socket *so, struct proc *p, struct proc *ep)
3774 {
3775 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3776
3777 if (inp == NULL) {
3778 return;
3779 }
3780
3781 if (p != NULL) {
3782 strlcpy(&inp->inp_last_proc_name[0], proc_name_address(p), sizeof(inp->inp_last_proc_name));
3783 }
3784 if (so->so_flags & SOF_DELEGATED) {
3785 if (ep != NULL) {
3786 strlcpy(&inp->inp_e_proc_name[0], proc_name_address(ep), sizeof(inp->inp_e_proc_name));
3787 } else {
3788 inp->inp_e_proc_name[0] = 0;
3789 }
3790 } else {
3791 inp->inp_e_proc_name[0] = 0;
3792 }
3793 }
3794
3795 void
3796 inp_copy_last_owner(struct socket *so, struct socket *head)
3797 {
3798 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3799 struct inpcb *head_inp = (struct inpcb *)head->so_pcb;
3800
3801 if (inp == NULL || head_inp == NULL) {
3802 return;
3803 }
3804
3805 strlcpy(&inp->inp_last_proc_name[0], &head_inp->inp_last_proc_name[0], sizeof(inp->inp_last_proc_name));
3806 strlcpy(&inp->inp_e_proc_name[0], &head_inp->inp_e_proc_name[0], sizeof(inp->inp_e_proc_name));
3807 }