]> git.saurik.com Git - apple/xnu.git/blame_incremental - bsd/netinet/in_pcb.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / bsd / netinet / in_pcb.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Copyright (c) 1982, 1986, 1991, 1993, 1995
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95
61 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $
62 */
63
64#include <sys/param.h>
65#include <sys/systm.h>
66#include <sys/malloc.h>
67#include <sys/mbuf.h>
68#include <sys/domain.h>
69#include <sys/protosw.h>
70#include <sys/socket.h>
71#include <sys/socketvar.h>
72#include <sys/proc.h>
73#include <sys/kernel.h>
74#include <sys/sysctl.h>
75#include <sys/mcache.h>
76#include <sys/kauth.h>
77#include <sys/priv.h>
78#include <sys/proc_uuid_policy.h>
79#include <sys/syslog.h>
80#include <sys/priv.h>
81#include <net/dlil.h>
82
83#include <libkern/OSAtomic.h>
84#include <kern/locks.h>
85
86#include <machine/limits.h>
87
88#include <kern/zalloc.h>
89
90#include <net/if.h>
91#include <net/if_types.h>
92#include <net/route.h>
93#include <net/flowhash.h>
94#include <net/flowadv.h>
95#include <net/nat464_utils.h>
96#include <net/ntstat.h>
97#include <net/restricted_in_port.h>
98
99#include <netinet/in.h>
100#include <netinet/in_pcb.h>
101#include <netinet/in_var.h>
102#include <netinet/ip_var.h>
103
104#include <netinet/ip6.h>
105#include <netinet6/ip6_var.h>
106
107#include <sys/kdebug.h>
108#include <sys/random.h>
109
110#include <dev/random/randomdev.h>
111#include <mach/boolean.h>
112
113#include <pexpert/pexpert.h>
114
115#if NECP
116#include <net/necp.h>
117#endif
118
119#include <sys/stat.h>
120#include <sys/ubc.h>
121#include <sys/vnode.h>
122
123#include <os/log.h>
124
125extern const char *proc_name_address(struct proc *);
126
127static lck_grp_t *inpcb_lock_grp;
128static lck_attr_t *inpcb_lock_attr;
129static lck_grp_attr_t *inpcb_lock_grp_attr;
130decl_lck_mtx_data(static, inpcb_lock); /* global INPCB lock */
131decl_lck_mtx_data(static, inpcb_timeout_lock);
132
133static TAILQ_HEAD(, inpcbinfo) inpcb_head = TAILQ_HEAD_INITIALIZER(inpcb_head);
134
135static u_int16_t inpcb_timeout_run = 0; /* INPCB timer is scheduled to run */
136static boolean_t inpcb_garbage_collecting = FALSE; /* gc timer is scheduled */
137static boolean_t inpcb_ticking = FALSE; /* "slow" timer is scheduled */
138static boolean_t inpcb_fast_timer_on = FALSE;
139
140#define INPCB_GCREQ_THRESHOLD 50000
141
142static thread_call_t inpcb_thread_call, inpcb_fast_thread_call;
143static void inpcb_sched_timeout(void);
144static void inpcb_sched_lazy_timeout(void);
145static void _inpcb_sched_timeout(unsigned int);
146static void inpcb_timeout(void *, void *);
147const int inpcb_timeout_lazy = 10; /* 10 seconds leeway for lazy timers */
148extern int tvtohz(struct timeval *);
149
150#if CONFIG_PROC_UUID_POLICY
151static void inp_update_cellular_policy(struct inpcb *, boolean_t);
152#if NECP
153static void inp_update_necp_want_app_policy(struct inpcb *, boolean_t);
154#endif /* NECP */
155#endif /* !CONFIG_PROC_UUID_POLICY */
156
157#define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8))
158#define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1))
159
160int allow_udp_port_exhaustion = 0;
161
162/*
163 * These configure the range of local port addresses assigned to
164 * "unspecified" outgoing connections/packets/whatever.
165 */
166int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */
167int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */
168int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
169int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */
170int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
171int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */
172
173#define RANGECHK(var, min, max) \
174 if ((var) < (min)) { (var) = (min); } \
175 else if ((var) > (max)) { (var) = (max); }
176
177static int
178sysctl_net_ipport_check SYSCTL_HANDLER_ARGS
179{
180#pragma unused(arg1, arg2)
181 int error;
182 int new_value = *(int *)oidp->oid_arg1;
183#if (DEBUG | DEVELOPMENT)
184 int old_value = *(int *)oidp->oid_arg1;
185 /*
186 * For unit testing allow a non-superuser process with the
187 * proper entitlement to modify the variables
188 */
189 if (req->newptr) {
190 if (proc_suser(current_proc()) != 0 &&
191 (error = priv_check_cred(kauth_cred_get(),
192 PRIV_NETINET_RESERVEDPORT, 0))) {
193 return EPERM;
194 }
195 }
196#endif /* (DEBUG | DEVELOPMENT) */
197
198 error = sysctl_handle_int(oidp, &new_value, 0, req);
199 if (!error) {
200 if (oidp->oid_arg1 == &ipport_lowfirstauto || oidp->oid_arg1 == &ipport_lowlastauto) {
201 RANGECHK(new_value, 1, IPPORT_RESERVED - 1);
202 } else {
203 RANGECHK(new_value, IPPORT_RESERVED, USHRT_MAX);
204 }
205 *(int *)oidp->oid_arg1 = new_value;
206 }
207
208#if (DEBUG | DEVELOPMENT)
209 os_log(OS_LOG_DEFAULT,
210 "%s:%u sysctl net.restricted_port.verbose: %d -> %d)",
211 proc_best_name(current_proc()), proc_selfpid(),
212 old_value, *(int *)oidp->oid_arg1);
213#endif /* (DEBUG | DEVELOPMENT) */
214
215 return error;
216}
217
218#undef RANGECHK
219
220SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange,
221 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "IP Ports");
222
223#if (DEBUG | DEVELOPMENT)
224#define CTLFAGS_IP_PORTRANGE (CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY)
225#else
226#define CTLFAGS_IP_PORTRANGE (CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED)
227#endif /* (DEBUG | DEVELOPMENT) */
228
229SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst,
230 CTLFAGS_IP_PORTRANGE,
231 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
232SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast,
233 CTLFAGS_IP_PORTRANGE,
234 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
235SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first,
236 CTLFAGS_IP_PORTRANGE,
237 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
238SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last,
239 CTLFAGS_IP_PORTRANGE,
240 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
241SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst,
242 CTLFAGS_IP_PORTRANGE,
243 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
244SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast,
245 CTLFAGS_IP_PORTRANGE,
246 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
247SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, ipport_allow_udp_port_exhaustion,
248 CTLFLAG_LOCKED | CTLFLAG_RW, &allow_udp_port_exhaustion, 0, "");
249
250static uint32_t apn_fallbk_debug = 0;
251#define apn_fallbk_log(x) do { if (apn_fallbk_debug >= 1) log x; } while (0)
252
253#if !XNU_TARGET_OS_OSX
254static boolean_t apn_fallbk_enabled = TRUE;
255
256SYSCTL_DECL(_net_inet);
257SYSCTL_NODE(_net_inet, OID_AUTO, apn_fallback, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "APN Fallback");
258SYSCTL_UINT(_net_inet_apn_fallback, OID_AUTO, enable, CTLFLAG_RW | CTLFLAG_LOCKED,
259 &apn_fallbk_enabled, 0, "APN fallback enable");
260SYSCTL_UINT(_net_inet_apn_fallback, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
261 &apn_fallbk_debug, 0, "APN fallback debug enable");
262#else /* XNU_TARGET_OS_OSX */
263static boolean_t apn_fallbk_enabled = FALSE;
264#endif /* XNU_TARGET_OS_OSX */
265
266extern int udp_use_randomport;
267extern int tcp_use_randomport;
268
269/* Structs used for flowhash computation */
270struct inp_flowhash_key_addr {
271 union {
272 struct in_addr v4;
273 struct in6_addr v6;
274 u_int8_t addr8[16];
275 u_int16_t addr16[8];
276 u_int32_t addr32[4];
277 } infha;
278};
279
280struct inp_flowhash_key {
281 struct inp_flowhash_key_addr infh_laddr;
282 struct inp_flowhash_key_addr infh_faddr;
283 u_int32_t infh_lport;
284 u_int32_t infh_fport;
285 u_int32_t infh_af;
286 u_int32_t infh_proto;
287 u_int32_t infh_rand1;
288 u_int32_t infh_rand2;
289};
290
291static u_int32_t inp_hash_seed = 0;
292
293static int infc_cmp(const struct inpcb *, const struct inpcb *);
294
295/* Flags used by inp_fc_getinp */
296#define INPFC_SOLOCKED 0x1
297#define INPFC_REMOVE 0x2
298static struct inpcb *inp_fc_getinp(u_int32_t, u_int32_t);
299
300static void inp_fc_feedback(struct inpcb *);
301extern void tcp_remove_from_time_wait(struct inpcb *inp);
302
303decl_lck_mtx_data(static, inp_fc_lck);
304
305RB_HEAD(inp_fc_tree, inpcb) inp_fc_tree;
306RB_PROTOTYPE(inp_fc_tree, inpcb, infc_link, infc_cmp);
307RB_GENERATE(inp_fc_tree, inpcb, infc_link, infc_cmp);
308
309/*
310 * Use this inp as a key to find an inp in the flowhash tree.
311 * Accesses to it are protected by inp_fc_lck.
312 */
313struct inpcb key_inp;
314
315/*
316 * in_pcb.c: manage the Protocol Control Blocks.
317 */
318
319void
320in_pcbinit(void)
321{
322 static int inpcb_initialized = 0;
323
324 VERIFY(!inpcb_initialized);
325 inpcb_initialized = 1;
326
327 inpcb_lock_grp_attr = lck_grp_attr_alloc_init();
328 inpcb_lock_grp = lck_grp_alloc_init("inpcb", inpcb_lock_grp_attr);
329 inpcb_lock_attr = lck_attr_alloc_init();
330 lck_mtx_init(&inpcb_lock, inpcb_lock_grp, inpcb_lock_attr);
331 lck_mtx_init(&inpcb_timeout_lock, inpcb_lock_grp, inpcb_lock_attr);
332 inpcb_thread_call = thread_call_allocate_with_priority(inpcb_timeout,
333 NULL, THREAD_CALL_PRIORITY_KERNEL);
334 /* Give it an arg so that we know that this is the fast timer */
335 inpcb_fast_thread_call = thread_call_allocate_with_priority(
336 inpcb_timeout, &inpcb_timeout, THREAD_CALL_PRIORITY_KERNEL);
337 if (inpcb_thread_call == NULL || inpcb_fast_thread_call == NULL) {
338 panic("unable to alloc the inpcb thread call");
339 }
340
341 /*
342 * Initialize data structures required to deliver
343 * flow advisories.
344 */
345 lck_mtx_init(&inp_fc_lck, inpcb_lock_grp, inpcb_lock_attr);
346 lck_mtx_lock(&inp_fc_lck);
347 RB_INIT(&inp_fc_tree);
348 bzero(&key_inp, sizeof(key_inp));
349 lck_mtx_unlock(&inp_fc_lck);
350}
351
352#define INPCB_HAVE_TIMER_REQ(req) (((req).intimer_lazy > 0) || \
353 ((req).intimer_fast > 0) || ((req).intimer_nodelay > 0))
354static void
355inpcb_timeout(void *arg0, void *arg1)
356{
357#pragma unused(arg1)
358 struct inpcbinfo *ipi;
359 boolean_t t, gc;
360 struct intimercount gccnt, tmcnt;
361
362 /*
363 * Update coarse-grained networking timestamp (in sec.); the idea
364 * is to piggy-back on the timeout callout to update the counter
365 * returnable via net_uptime().
366 */
367 net_update_uptime();
368
369 bzero(&gccnt, sizeof(gccnt));
370 bzero(&tmcnt, sizeof(tmcnt));
371
372 lck_mtx_lock_spin(&inpcb_timeout_lock);
373 gc = inpcb_garbage_collecting;
374 inpcb_garbage_collecting = FALSE;
375
376 t = inpcb_ticking;
377 inpcb_ticking = FALSE;
378
379 if (gc || t) {
380 lck_mtx_unlock(&inpcb_timeout_lock);
381
382 lck_mtx_lock(&inpcb_lock);
383 TAILQ_FOREACH(ipi, &inpcb_head, ipi_entry) {
384 if (INPCB_HAVE_TIMER_REQ(ipi->ipi_gc_req)) {
385 bzero(&ipi->ipi_gc_req,
386 sizeof(ipi->ipi_gc_req));
387 if (gc && ipi->ipi_gc != NULL) {
388 ipi->ipi_gc(ipi);
389 gccnt.intimer_lazy +=
390 ipi->ipi_gc_req.intimer_lazy;
391 gccnt.intimer_fast +=
392 ipi->ipi_gc_req.intimer_fast;
393 gccnt.intimer_nodelay +=
394 ipi->ipi_gc_req.intimer_nodelay;
395 }
396 }
397 if (INPCB_HAVE_TIMER_REQ(ipi->ipi_timer_req)) {
398 bzero(&ipi->ipi_timer_req,
399 sizeof(ipi->ipi_timer_req));
400 if (t && ipi->ipi_timer != NULL) {
401 ipi->ipi_timer(ipi);
402 tmcnt.intimer_lazy +=
403 ipi->ipi_timer_req.intimer_lazy;
404 tmcnt.intimer_fast +=
405 ipi->ipi_timer_req.intimer_fast;
406 tmcnt.intimer_nodelay +=
407 ipi->ipi_timer_req.intimer_nodelay;
408 }
409 }
410 }
411 lck_mtx_unlock(&inpcb_lock);
412 lck_mtx_lock_spin(&inpcb_timeout_lock);
413 }
414
415 /* lock was dropped above, so check first before overriding */
416 if (!inpcb_garbage_collecting) {
417 inpcb_garbage_collecting = INPCB_HAVE_TIMER_REQ(gccnt);
418 }
419 if (!inpcb_ticking) {
420 inpcb_ticking = INPCB_HAVE_TIMER_REQ(tmcnt);
421 }
422
423 /* arg0 will be set if we are the fast timer */
424 if (arg0 != NULL) {
425 inpcb_fast_timer_on = FALSE;
426 }
427 inpcb_timeout_run--;
428 VERIFY(inpcb_timeout_run >= 0 && inpcb_timeout_run < 2);
429
430 /* re-arm the timer if there's work to do */
431 if (gccnt.intimer_nodelay > 0 || tmcnt.intimer_nodelay > 0) {
432 inpcb_sched_timeout();
433 } else if ((gccnt.intimer_fast + tmcnt.intimer_fast) <= 5) {
434 /* be lazy when idle with little activity */
435 inpcb_sched_lazy_timeout();
436 } else {
437 inpcb_sched_timeout();
438 }
439
440 lck_mtx_unlock(&inpcb_timeout_lock);
441}
442
443static void
444inpcb_sched_timeout(void)
445{
446 _inpcb_sched_timeout(0);
447}
448
449static void
450inpcb_sched_lazy_timeout(void)
451{
452 _inpcb_sched_timeout(inpcb_timeout_lazy);
453}
454
455static void
456_inpcb_sched_timeout(unsigned int offset)
457{
458 uint64_t deadline, leeway;
459
460 clock_interval_to_deadline(1, NSEC_PER_SEC, &deadline);
461 LCK_MTX_ASSERT(&inpcb_timeout_lock, LCK_MTX_ASSERT_OWNED);
462 if (inpcb_timeout_run == 0 &&
463 (inpcb_garbage_collecting || inpcb_ticking)) {
464 lck_mtx_convert_spin(&inpcb_timeout_lock);
465 inpcb_timeout_run++;
466 if (offset == 0) {
467 inpcb_fast_timer_on = TRUE;
468 thread_call_enter_delayed(inpcb_fast_thread_call,
469 deadline);
470 } else {
471 inpcb_fast_timer_on = FALSE;
472 clock_interval_to_absolutetime_interval(offset,
473 NSEC_PER_SEC, &leeway);
474 thread_call_enter_delayed_with_leeway(
475 inpcb_thread_call, NULL, deadline, leeway,
476 THREAD_CALL_DELAY_LEEWAY);
477 }
478 } else if (inpcb_timeout_run == 1 &&
479 offset == 0 && !inpcb_fast_timer_on) {
480 /*
481 * Since the request was for a fast timer but the
482 * scheduled timer is a lazy timer, try to schedule
483 * another instance of fast timer also.
484 */
485 lck_mtx_convert_spin(&inpcb_timeout_lock);
486 inpcb_timeout_run++;
487 inpcb_fast_timer_on = TRUE;
488 thread_call_enter_delayed(inpcb_fast_thread_call, deadline);
489 }
490}
491
492void
493inpcb_gc_sched(struct inpcbinfo *ipi, u_int32_t type)
494{
495 u_int32_t gccnt;
496
497 lck_mtx_lock_spin(&inpcb_timeout_lock);
498 inpcb_garbage_collecting = TRUE;
499 gccnt = ipi->ipi_gc_req.intimer_nodelay +
500 ipi->ipi_gc_req.intimer_fast;
501
502 if (gccnt > INPCB_GCREQ_THRESHOLD) {
503 type = INPCB_TIMER_FAST;
504 }
505
506 switch (type) {
507 case INPCB_TIMER_NODELAY:
508 atomic_add_32(&ipi->ipi_gc_req.intimer_nodelay, 1);
509 inpcb_sched_timeout();
510 break;
511 case INPCB_TIMER_FAST:
512 atomic_add_32(&ipi->ipi_gc_req.intimer_fast, 1);
513 inpcb_sched_timeout();
514 break;
515 default:
516 atomic_add_32(&ipi->ipi_gc_req.intimer_lazy, 1);
517 inpcb_sched_lazy_timeout();
518 break;
519 }
520 lck_mtx_unlock(&inpcb_timeout_lock);
521}
522
523void
524inpcb_timer_sched(struct inpcbinfo *ipi, u_int32_t type)
525{
526 lck_mtx_lock_spin(&inpcb_timeout_lock);
527 inpcb_ticking = TRUE;
528 switch (type) {
529 case INPCB_TIMER_NODELAY:
530 atomic_add_32(&ipi->ipi_timer_req.intimer_nodelay, 1);
531 inpcb_sched_timeout();
532 break;
533 case INPCB_TIMER_FAST:
534 atomic_add_32(&ipi->ipi_timer_req.intimer_fast, 1);
535 inpcb_sched_timeout();
536 break;
537 default:
538 atomic_add_32(&ipi->ipi_timer_req.intimer_lazy, 1);
539 inpcb_sched_lazy_timeout();
540 break;
541 }
542 lck_mtx_unlock(&inpcb_timeout_lock);
543}
544
545void
546in_pcbinfo_attach(struct inpcbinfo *ipi)
547{
548 struct inpcbinfo *ipi0;
549
550 lck_mtx_lock(&inpcb_lock);
551 TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) {
552 if (ipi0 == ipi) {
553 panic("%s: ipi %p already in the list\n",
554 __func__, ipi);
555 /* NOTREACHED */
556 }
557 }
558 TAILQ_INSERT_TAIL(&inpcb_head, ipi, ipi_entry);
559 lck_mtx_unlock(&inpcb_lock);
560}
561
562int
563in_pcbinfo_detach(struct inpcbinfo *ipi)
564{
565 struct inpcbinfo *ipi0;
566 int error = 0;
567
568 lck_mtx_lock(&inpcb_lock);
569 TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) {
570 if (ipi0 == ipi) {
571 break;
572 }
573 }
574 if (ipi0 != NULL) {
575 TAILQ_REMOVE(&inpcb_head, ipi0, ipi_entry);
576 } else {
577 error = ENXIO;
578 }
579 lck_mtx_unlock(&inpcb_lock);
580
581 return error;
582}
583
584/*
585 * Allocate a PCB and associate it with the socket.
586 *
587 * Returns: 0 Success
588 * ENOBUFS
589 * ENOMEM
590 */
591int
592in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p)
593{
594#pragma unused(p)
595 struct inpcb *inp;
596 caddr_t temp;
597
598 if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) {
599 inp = (struct inpcb *)zalloc(pcbinfo->ipi_zone);
600 if (inp == NULL) {
601 return ENOBUFS;
602 }
603 bzero((caddr_t)inp, sizeof(*inp));
604 } else {
605 inp = (struct inpcb *)(void *)so->so_saved_pcb;
606 temp = inp->inp_saved_ppcb;
607 bzero((caddr_t)inp, sizeof(*inp));
608 inp->inp_saved_ppcb = temp;
609 }
610
611 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
612 inp->inp_pcbinfo = pcbinfo;
613 inp->inp_socket = so;
614 /* make sure inp_stat is always 64-bit aligned */
615 inp->inp_stat = (struct inp_stat *)P2ROUNDUP(inp->inp_stat_store,
616 sizeof(u_int64_t));
617 if (((uintptr_t)inp->inp_stat - (uintptr_t)inp->inp_stat_store) +
618 sizeof(*inp->inp_stat) > sizeof(inp->inp_stat_store)) {
619 panic("%s: insufficient space to align inp_stat", __func__);
620 /* NOTREACHED */
621 }
622
623 /* make sure inp_cstat is always 64-bit aligned */
624 inp->inp_cstat = (struct inp_stat *)P2ROUNDUP(inp->inp_cstat_store,
625 sizeof(u_int64_t));
626 if (((uintptr_t)inp->inp_cstat - (uintptr_t)inp->inp_cstat_store) +
627 sizeof(*inp->inp_cstat) > sizeof(inp->inp_cstat_store)) {
628 panic("%s: insufficient space to align inp_cstat", __func__);
629 /* NOTREACHED */
630 }
631
632 /* make sure inp_wstat is always 64-bit aligned */
633 inp->inp_wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_wstat_store,
634 sizeof(u_int64_t));
635 if (((uintptr_t)inp->inp_wstat - (uintptr_t)inp->inp_wstat_store) +
636 sizeof(*inp->inp_wstat) > sizeof(inp->inp_wstat_store)) {
637 panic("%s: insufficient space to align inp_wstat", __func__);
638 /* NOTREACHED */
639 }
640
641 /* make sure inp_Wstat is always 64-bit aligned */
642 inp->inp_Wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_Wstat_store,
643 sizeof(u_int64_t));
644 if (((uintptr_t)inp->inp_Wstat - (uintptr_t)inp->inp_Wstat_store) +
645 sizeof(*inp->inp_Wstat) > sizeof(inp->inp_Wstat_store)) {
646 panic("%s: insufficient space to align inp_Wstat", __func__);
647 /* NOTREACHED */
648 }
649
650 so->so_pcb = (caddr_t)inp;
651
652 if (so->so_proto->pr_flags & PR_PCBLOCK) {
653 lck_mtx_init(&inp->inpcb_mtx, pcbinfo->ipi_lock_grp,
654 pcbinfo->ipi_lock_attr);
655 }
656
657 if (SOCK_DOM(so) == PF_INET6 && !ip6_mapped_addr_on) {
658 inp->inp_flags |= IN6P_IPV6_V6ONLY;
659 }
660
661 if (ip6_auto_flowlabel) {
662 inp->inp_flags |= IN6P_AUTOFLOWLABEL;
663 }
664 if (intcoproc_unrestricted) {
665 inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED;
666 }
667
668 (void) inp_update_policy(inp);
669
670 lck_rw_lock_exclusive(pcbinfo->ipi_lock);
671 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
672 LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
673 pcbinfo->ipi_count++;
674 lck_rw_done(pcbinfo->ipi_lock);
675 return 0;
676}
677
678/*
679 * in_pcblookup_local_and_cleanup does everything
680 * in_pcblookup_local does but it checks for a socket
681 * that's going away. Since we know that the lock is
682 * held read+write when this function is called, we
683 * can safely dispose of this socket like the slow
684 * timer would usually do and return NULL. This is
685 * great for bind.
686 */
687struct inpcb *
688in_pcblookup_local_and_cleanup(struct inpcbinfo *pcbinfo, struct in_addr laddr,
689 u_int lport_arg, int wild_okay)
690{
691 struct inpcb *inp;
692
693 /* Perform normal lookup */
694 inp = in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay);
695
696 /* Check if we found a match but it's waiting to be disposed */
697 if (inp != NULL && inp->inp_wantcnt == WNT_STOPUSING) {
698 struct socket *so = inp->inp_socket;
699
700 socket_lock(so, 0);
701
702 if (so->so_usecount == 0) {
703 if (inp->inp_state != INPCB_STATE_DEAD) {
704 in_pcbdetach(inp);
705 }
706 in_pcbdispose(inp); /* will unlock & destroy */
707 inp = NULL;
708 } else {
709 socket_unlock(so, 0);
710 }
711 }
712
713 return inp;
714}
715
716static void
717in_pcb_conflict_post_msg(u_int16_t port)
718{
719 /*
720 * Radar 5523020 send a kernel event notification if a
721 * non-participating socket tries to bind the port a socket
722 * who has set SOF_NOTIFYCONFLICT owns.
723 */
724 struct kev_msg ev_msg;
725 struct kev_in_portinuse in_portinuse;
726
727 bzero(&in_portinuse, sizeof(struct kev_in_portinuse));
728 bzero(&ev_msg, sizeof(struct kev_msg));
729 in_portinuse.port = ntohs(port); /* port in host order */
730 in_portinuse.req_pid = proc_selfpid();
731 ev_msg.vendor_code = KEV_VENDOR_APPLE;
732 ev_msg.kev_class = KEV_NETWORK_CLASS;
733 ev_msg.kev_subclass = KEV_INET_SUBCLASS;
734 ev_msg.event_code = KEV_INET_PORTINUSE;
735 ev_msg.dv[0].data_ptr = &in_portinuse;
736 ev_msg.dv[0].data_length = sizeof(struct kev_in_portinuse);
737 ev_msg.dv[1].data_length = 0;
738 dlil_post_complete_msg(NULL, &ev_msg);
739}
740
741/*
742 * Bind an INPCB to an address and/or port. This routine should not alter
743 * the caller-supplied local address "nam".
744 *
745 * Returns: 0 Success
746 * EADDRNOTAVAIL Address not available.
747 * EINVAL Invalid argument
748 * EAFNOSUPPORT Address family not supported [notdef]
749 * EACCES Permission denied
750 * EADDRINUSE Address in use
751 * EAGAIN Resource unavailable, try again
752 * priv_check_cred:EPERM Operation not permitted
753 */
754int
755in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
756{
757 struct socket *so = inp->inp_socket;
758 unsigned short *lastport;
759 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
760 u_short lport = 0, rand_port = 0;
761 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
762 int error, randomport, conflict = 0;
763 boolean_t anonport = FALSE;
764 kauth_cred_t cred;
765 struct in_addr laddr;
766 struct ifnet *outif = NULL;
767
768 if (TAILQ_EMPTY(&in_ifaddrhead)) { /* XXX broken! */
769 return EADDRNOTAVAIL;
770 }
771 if (!(so->so_options & (SO_REUSEADDR | SO_REUSEPORT))) {
772 wild = 1;
773 }
774
775 bzero(&laddr, sizeof(laddr));
776
777 socket_unlock(so, 0); /* keep reference on socket */
778 lck_rw_lock_exclusive(pcbinfo->ipi_lock);
779 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) {
780 /* another thread completed the bind */
781 lck_rw_done(pcbinfo->ipi_lock);
782 socket_lock(so, 0);
783 return EINVAL;
784 }
785
786 if (nam != NULL) {
787 if (nam->sa_len != sizeof(struct sockaddr_in)) {
788 lck_rw_done(pcbinfo->ipi_lock);
789 socket_lock(so, 0);
790 return EINVAL;
791 }
792#if 0
793 /*
794 * We should check the family, but old programs
795 * incorrectly fail to initialize it.
796 */
797 if (nam->sa_family != AF_INET) {
798 lck_rw_done(pcbinfo->ipi_lock);
799 socket_lock(so, 0);
800 return EAFNOSUPPORT;
801 }
802#endif /* 0 */
803 lport = SIN(nam)->sin_port;
804
805 if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr))) {
806 /*
807 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
808 * allow complete duplication of binding if
809 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
810 * and a multicast address is bound on both
811 * new and duplicated sockets.
812 */
813 if (so->so_options & SO_REUSEADDR) {
814 reuseport = SO_REUSEADDR | SO_REUSEPORT;
815 }
816 } else if (SIN(nam)->sin_addr.s_addr != INADDR_ANY) {
817 struct sockaddr_in sin;
818 struct ifaddr *ifa;
819
820 /* Sanitized for interface address searches */
821 bzero(&sin, sizeof(sin));
822 sin.sin_family = AF_INET;
823 sin.sin_len = sizeof(struct sockaddr_in);
824 sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
825
826 ifa = ifa_ifwithaddr(SA(&sin));
827 if (ifa == NULL) {
828 lck_rw_done(pcbinfo->ipi_lock);
829 socket_lock(so, 0);
830 return EADDRNOTAVAIL;
831 } else {
832 /*
833 * Opportunistically determine the outbound
834 * interface that may be used; this may not
835 * hold true if we end up using a route
836 * going over a different interface, e.g.
837 * when sending to a local address. This
838 * will get updated again after sending.
839 */
840 IFA_LOCK(ifa);
841 outif = ifa->ifa_ifp;
842 IFA_UNLOCK(ifa);
843 IFA_REMREF(ifa);
844 }
845 }
846
847
848 if (lport != 0) {
849 struct inpcb *t;
850 uid_t u;
851
852#if XNU_TARGET_OS_OSX
853 if (ntohs(lport) < IPPORT_RESERVED &&
854 SIN(nam)->sin_addr.s_addr != 0 &&
855 !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
856 cred = kauth_cred_proc_ref(p);
857 error = priv_check_cred(cred,
858 PRIV_NETINET_RESERVEDPORT, 0);
859 kauth_cred_unref(&cred);
860 if (error != 0) {
861 lck_rw_done(pcbinfo->ipi_lock);
862 socket_lock(so, 0);
863 return EACCES;
864 }
865 }
866#endif /* XNU_TARGET_OS_OSX */
867 /*
868 * Check wether the process is allowed to bind to a restricted port
869 */
870 if (!current_task_can_use_restricted_in_port(lport,
871 (uint8_t)so->so_proto->pr_protocol, PORT_FLAGS_BSD)) {
872 lck_rw_done(pcbinfo->ipi_lock);
873 socket_lock(so, 0);
874 return EADDRINUSE;
875 }
876
877 if (!IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) &&
878 (u = kauth_cred_getuid(so->so_cred)) != 0 &&
879 (t = in_pcblookup_local_and_cleanup(
880 inp->inp_pcbinfo, SIN(nam)->sin_addr, lport,
881 INPLOOKUP_WILDCARD)) != NULL &&
882 (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
883 t->inp_laddr.s_addr != INADDR_ANY ||
884 !(t->inp_socket->so_options & SO_REUSEPORT)) &&
885 (u != kauth_cred_getuid(t->inp_socket->so_cred)) &&
886 !(t->inp_socket->so_flags & SOF_REUSESHAREUID) &&
887 (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
888 t->inp_laddr.s_addr != INADDR_ANY) &&
889 (!(t->inp_flags2 & INP2_EXTERNAL_PORT) ||
890 !(inp->inp_flags2 & INP2_EXTERNAL_PORT) ||
891 uuid_compare(t->necp_client_uuid, inp->necp_client_uuid) != 0)) {
892 if ((t->inp_socket->so_flags &
893 SOF_NOTIFYCONFLICT) &&
894 !(so->so_flags & SOF_NOTIFYCONFLICT)) {
895 conflict = 1;
896 }
897
898 lck_rw_done(pcbinfo->ipi_lock);
899
900 if (conflict) {
901 in_pcb_conflict_post_msg(lport);
902 }
903
904 socket_lock(so, 0);
905 return EADDRINUSE;
906 }
907 t = in_pcblookup_local_and_cleanup(pcbinfo,
908 SIN(nam)->sin_addr, lport, wild);
909 if (t != NULL &&
910 (reuseport & t->inp_socket->so_options) == 0 &&
911 (!(t->inp_flags2 & INP2_EXTERNAL_PORT) ||
912 !(inp->inp_flags2 & INP2_EXTERNAL_PORT) ||
913 uuid_compare(t->necp_client_uuid, inp->necp_client_uuid) != 0)) {
914 if (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
915 t->inp_laddr.s_addr != INADDR_ANY ||
916 SOCK_DOM(so) != PF_INET6 ||
917 SOCK_DOM(t->inp_socket) != PF_INET6) {
918 if ((t->inp_socket->so_flags &
919 SOF_NOTIFYCONFLICT) &&
920 !(so->so_flags & SOF_NOTIFYCONFLICT)) {
921 conflict = 1;
922 }
923
924 lck_rw_done(pcbinfo->ipi_lock);
925
926 if (conflict) {
927 in_pcb_conflict_post_msg(lport);
928 }
929 socket_lock(so, 0);
930 return EADDRINUSE;
931 }
932 }
933 }
934 laddr = SIN(nam)->sin_addr;
935 }
936 if (lport == 0) {
937 u_short first, last;
938 int count;
939 bool found;
940
941 /*
942 * Override wild = 1 for implicit bind (mainly used by connect)
943 * For implicit bind (lport == 0), we always use an unused port,
944 * so REUSEADDR|REUSEPORT don't apply
945 */
946 wild = 1;
947
948 randomport = (so->so_flags & SOF_BINDRANDOMPORT) ||
949 (so->so_type == SOCK_STREAM ? tcp_use_randomport :
950 udp_use_randomport);
951
952 /*
953 * Even though this looks similar to the code in
954 * in6_pcbsetport, the v6 vs v4 checks are different.
955 */
956 anonport = TRUE;
957 if (inp->inp_flags & INP_HIGHPORT) {
958 first = (u_short)ipport_hifirstauto; /* sysctl */
959 last = (u_short)ipport_hilastauto;
960 lastport = &pcbinfo->ipi_lasthi;
961 } else if (inp->inp_flags & INP_LOWPORT) {
962 cred = kauth_cred_proc_ref(p);
963 error = priv_check_cred(cred,
964 PRIV_NETINET_RESERVEDPORT, 0);
965 kauth_cred_unref(&cred);
966 if (error != 0) {
967 lck_rw_done(pcbinfo->ipi_lock);
968 socket_lock(so, 0);
969 return error;
970 }
971 first = (u_short)ipport_lowfirstauto; /* 1023 */
972 last = (u_short)ipport_lowlastauto; /* 600 */
973 lastport = &pcbinfo->ipi_lastlow;
974 } else {
975 first = (u_short)ipport_firstauto; /* sysctl */
976 last = (u_short)ipport_lastauto;
977 lastport = &pcbinfo->ipi_lastport;
978 }
979 /* No point in randomizing if only one port is available */
980
981 if (first == last) {
982 randomport = 0;
983 }
984 /*
985 * Simple check to ensure all ports are not used up causing
986 * a deadlock here.
987 *
988 * We split the two cases (up and down) so that the direction
989 * is not being tested on each round of the loop.
990 */
991 if (first > last) {
992 struct in_addr lookup_addr;
993
994 /*
995 * counting down
996 */
997 if (randomport) {
998 read_frandom(&rand_port, sizeof(rand_port));
999 *lastport =
1000 first - (rand_port % (first - last));
1001 }
1002 count = first - last;
1003
1004 lookup_addr = (laddr.s_addr != INADDR_ANY) ? laddr :
1005 inp->inp_laddr;
1006
1007 found = false;
1008 do {
1009 if (count-- < 0) { /* completely used? */
1010 lck_rw_done(pcbinfo->ipi_lock);
1011 socket_lock(so, 0);
1012 return EADDRNOTAVAIL;
1013 }
1014 --*lastport;
1015 if (*lastport > first || *lastport < last) {
1016 *lastport = first;
1017 }
1018 lport = htons(*lastport);
1019
1020 /*
1021 * Skip if this is a restricted port as we do not want to
1022 * restricted ports as ephemeral
1023 */
1024 if (IS_RESTRICTED_IN_PORT(lport)) {
1025 continue;
1026 }
1027
1028 found = in_pcblookup_local_and_cleanup(pcbinfo,
1029 lookup_addr, lport, wild) == NULL;
1030 } while (!found);
1031 } else {
1032 struct in_addr lookup_addr;
1033
1034 /*
1035 * counting up
1036 */
1037 if (randomport) {
1038 read_frandom(&rand_port, sizeof(rand_port));
1039 *lastport =
1040 first + (rand_port % (first - last));
1041 }
1042 count = last - first;
1043
1044 lookup_addr = (laddr.s_addr != INADDR_ANY) ? laddr :
1045 inp->inp_laddr;
1046
1047 found = false;
1048 do {
1049 if (count-- < 0) { /* completely used? */
1050 lck_rw_done(pcbinfo->ipi_lock);
1051 socket_lock(so, 0);
1052 return EADDRNOTAVAIL;
1053 }
1054 ++*lastport;
1055 if (*lastport < first || *lastport > last) {
1056 *lastport = first;
1057 }
1058 lport = htons(*lastport);
1059
1060 /*
1061 * Skip if this is a restricted port as we do not want to
1062 * restricted ports as ephemeral
1063 */
1064 if (IS_RESTRICTED_IN_PORT(lport)) {
1065 continue;
1066 }
1067
1068 found = in_pcblookup_local_and_cleanup(pcbinfo,
1069 lookup_addr, lport, wild) == NULL;
1070 } while (!found);
1071 }
1072 }
1073 socket_lock(so, 0);
1074
1075 /*
1076 * We unlocked socket's protocol lock for a long time.
1077 * The socket might have been dropped/defuncted.
1078 * Checking if world has changed since.
1079 */
1080 if (inp->inp_state == INPCB_STATE_DEAD) {
1081 lck_rw_done(pcbinfo->ipi_lock);
1082 return ECONNABORTED;
1083 }
1084
1085 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) {
1086 lck_rw_done(pcbinfo->ipi_lock);
1087 return EINVAL;
1088 }
1089
1090 if (laddr.s_addr != INADDR_ANY) {
1091 inp->inp_laddr = laddr;
1092 inp->inp_last_outifp = outif;
1093 }
1094 inp->inp_lport = lport;
1095 if (anonport) {
1096 inp->inp_flags |= INP_ANONPORT;
1097 }
1098
1099 if (in_pcbinshash(inp, 1) != 0) {
1100 inp->inp_laddr.s_addr = INADDR_ANY;
1101 inp->inp_last_outifp = NULL;
1102
1103 inp->inp_lport = 0;
1104 if (anonport) {
1105 inp->inp_flags &= ~INP_ANONPORT;
1106 }
1107 lck_rw_done(pcbinfo->ipi_lock);
1108 return EAGAIN;
1109 }
1110 lck_rw_done(pcbinfo->ipi_lock);
1111 sflt_notify(so, sock_evt_bound, NULL);
1112 return 0;
1113}
1114
1115#define APN_FALLBACK_IP_FILTER(a) \
1116 (IN_LINKLOCAL(ntohl((a)->sin_addr.s_addr)) || \
1117 IN_LOOPBACK(ntohl((a)->sin_addr.s_addr)) || \
1118 IN_ZERONET(ntohl((a)->sin_addr.s_addr)) || \
1119 IN_MULTICAST(ntohl((a)->sin_addr.s_addr)) || \
1120 IN_PRIVATE(ntohl((a)->sin_addr.s_addr)))
1121
1122#define APN_FALLBACK_NOTIF_INTERVAL 2 /* Magic Number */
1123static uint64_t last_apn_fallback = 0;
1124
1125static boolean_t
1126apn_fallback_required(proc_t proc, struct socket *so, struct sockaddr_in *p_dstv4)
1127{
1128 uint64_t timenow;
1129 struct sockaddr_storage lookup_default_addr;
1130 struct rtentry *rt = NULL;
1131
1132 VERIFY(proc != NULL);
1133
1134 if (apn_fallbk_enabled == FALSE) {
1135 return FALSE;
1136 }
1137
1138 if (proc == kernproc) {
1139 return FALSE;
1140 }
1141
1142 if (so && (so->so_options & SO_NOAPNFALLBK)) {
1143 return FALSE;
1144 }
1145
1146 timenow = net_uptime();
1147 if ((timenow - last_apn_fallback) < APN_FALLBACK_NOTIF_INTERVAL) {
1148 apn_fallbk_log((LOG_INFO, "APN fallback notification throttled.\n"));
1149 return FALSE;
1150 }
1151
1152 if (p_dstv4 && APN_FALLBACK_IP_FILTER(p_dstv4)) {
1153 return FALSE;
1154 }
1155
1156 /* Check if we have unscoped IPv6 default route through cellular */
1157 bzero(&lookup_default_addr, sizeof(lookup_default_addr));
1158 lookup_default_addr.ss_family = AF_INET6;
1159 lookup_default_addr.ss_len = sizeof(struct sockaddr_in6);
1160
1161 rt = rtalloc1((struct sockaddr *)&lookup_default_addr, 0, 0);
1162 if (NULL == rt) {
1163 apn_fallbk_log((LOG_INFO, "APN fallback notification could not find "
1164 "unscoped default IPv6 route.\n"));
1165 return FALSE;
1166 }
1167
1168 if (!IFNET_IS_CELLULAR(rt->rt_ifp)) {
1169 rtfree(rt);
1170 apn_fallbk_log((LOG_INFO, "APN fallback notification could not find "
1171 "unscoped default IPv6 route through cellular interface.\n"));
1172 return FALSE;
1173 }
1174
1175 /*
1176 * We have a default IPv6 route, ensure that
1177 * we do not have IPv4 default route before triggering
1178 * the event
1179 */
1180 rtfree(rt);
1181 rt = NULL;
1182
1183 bzero(&lookup_default_addr, sizeof(lookup_default_addr));
1184 lookup_default_addr.ss_family = AF_INET;
1185 lookup_default_addr.ss_len = sizeof(struct sockaddr_in);
1186
1187 rt = rtalloc1((struct sockaddr *)&lookup_default_addr, 0, 0);
1188
1189 if (rt) {
1190 rtfree(rt);
1191 rt = NULL;
1192 apn_fallbk_log((LOG_INFO, "APN fallback notification found unscoped "
1193 "IPv4 default route!\n"));
1194 return FALSE;
1195 }
1196
1197 {
1198 /*
1199 * We disable APN fallback if the binary is not a third-party app.
1200 * Note that platform daemons use their process name as a
1201 * bundle ID so we filter out bundle IDs without dots.
1202 */
1203 const char *bundle_id = cs_identity_get(proc);
1204 if (bundle_id == NULL ||
1205 bundle_id[0] == '\0' ||
1206 strchr(bundle_id, '.') == NULL ||
1207 strncmp(bundle_id, "com.apple.", sizeof("com.apple.") - 1) == 0) {
1208 apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found first-"
1209 "party bundle ID \"%s\"!\n", (bundle_id ? bundle_id : "NULL")));
1210 return FALSE;
1211 }
1212 }
1213
1214 {
1215 /*
1216 * The Apple App Store IPv6 requirement started on
1217 * June 1st, 2016 at 12:00:00 AM PDT.
1218 * We disable APN fallback if the binary is more recent than that.
1219 * We check both atime and birthtime since birthtime is not always supported.
1220 */
1221 static const long ipv6_start_date = 1464764400L;
1222 vfs_context_t context;
1223 struct stat64 sb;
1224 int vn_stat_error;
1225
1226 bzero(&sb, sizeof(struct stat64));
1227 context = vfs_context_create(NULL);
1228 vn_stat_error = vn_stat(proc->p_textvp, &sb, NULL, 1, 0, context);
1229 (void)vfs_context_rele(context);
1230
1231 if (vn_stat_error != 0 ||
1232 sb.st_atimespec.tv_sec >= ipv6_start_date ||
1233 sb.st_birthtimespec.tv_sec >= ipv6_start_date) {
1234 apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found binary "
1235 "too recent! (err %d atime %ld mtime %ld ctime %ld birthtime %ld)\n",
1236 vn_stat_error, sb.st_atimespec.tv_sec, sb.st_mtimespec.tv_sec,
1237 sb.st_ctimespec.tv_sec, sb.st_birthtimespec.tv_sec));
1238 return FALSE;
1239 }
1240 }
1241 return TRUE;
1242}
1243
1244static void
1245apn_fallback_trigger(proc_t proc, struct socket *so)
1246{
1247 pid_t pid = 0;
1248 struct kev_msg ev_msg;
1249 struct kev_netevent_apnfallbk_data apnfallbk_data;
1250
1251 last_apn_fallback = net_uptime();
1252 pid = proc_pid(proc);
1253 uuid_t application_uuid;
1254 uuid_clear(application_uuid);
1255 proc_getexecutableuuid(proc, application_uuid,
1256 sizeof(application_uuid));
1257
1258 bzero(&ev_msg, sizeof(struct kev_msg));
1259 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1260 ev_msg.kev_class = KEV_NETWORK_CLASS;
1261 ev_msg.kev_subclass = KEV_NETEVENT_SUBCLASS;
1262 ev_msg.event_code = KEV_NETEVENT_APNFALLBACK;
1263
1264 bzero(&apnfallbk_data, sizeof(apnfallbk_data));
1265
1266 if (so->so_flags & SOF_DELEGATED) {
1267 apnfallbk_data.epid = so->e_pid;
1268 uuid_copy(apnfallbk_data.euuid, so->e_uuid);
1269 } else {
1270 apnfallbk_data.epid = so->last_pid;
1271 uuid_copy(apnfallbk_data.euuid, so->last_uuid);
1272 }
1273
1274 ev_msg.dv[0].data_ptr = &apnfallbk_data;
1275 ev_msg.dv[0].data_length = sizeof(apnfallbk_data);
1276 kev_post_msg(&ev_msg);
1277 apn_fallbk_log((LOG_INFO, "APN fallback notification issued.\n"));
1278}
1279
1280/*
1281 * Transform old in_pcbconnect() into an inner subroutine for new
1282 * in_pcbconnect(); do some validity-checking on the remote address
1283 * (in "nam") and then determine local host address (i.e., which
1284 * interface) to use to access that remote host.
1285 *
1286 * This routine may alter the caller-supplied remote address "nam".
1287 *
1288 * The caller may override the bound-to-interface setting of the socket
1289 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
1290 *
1291 * This routine might return an ifp with a reference held if the caller
1292 * provides a non-NULL outif, even in the error case. The caller is
1293 * responsible for releasing its reference.
1294 *
1295 * Returns: 0 Success
1296 * EINVAL Invalid argument
1297 * EAFNOSUPPORT Address family not supported
1298 * EADDRNOTAVAIL Address not available
1299 */
1300int
1301in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr,
1302 unsigned int ifscope, struct ifnet **outif, int raw)
1303{
1304 struct route *ro = &inp->inp_route;
1305 struct in_ifaddr *ia = NULL;
1306 struct sockaddr_in sin;
1307 int error = 0;
1308 boolean_t restricted = FALSE;
1309
1310 if (outif != NULL) {
1311 *outif = NULL;
1312 }
1313 if (nam->sa_len != sizeof(struct sockaddr_in)) {
1314 return EINVAL;
1315 }
1316 if (SIN(nam)->sin_family != AF_INET) {
1317 return EAFNOSUPPORT;
1318 }
1319 if (raw == 0 && SIN(nam)->sin_port == 0) {
1320 return EADDRNOTAVAIL;
1321 }
1322
1323 /*
1324 * If the destination address is INADDR_ANY,
1325 * use the primary local address.
1326 * If the supplied address is INADDR_BROADCAST,
1327 * and the primary interface supports broadcast,
1328 * choose the broadcast address for that interface.
1329 */
1330 if (raw == 0 && (SIN(nam)->sin_addr.s_addr == INADDR_ANY ||
1331 SIN(nam)->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST)) {
1332 lck_rw_lock_shared(in_ifaddr_rwlock);
1333 if (!TAILQ_EMPTY(&in_ifaddrhead)) {
1334 ia = TAILQ_FIRST(&in_ifaddrhead);
1335 IFA_LOCK_SPIN(&ia->ia_ifa);
1336 if (SIN(nam)->sin_addr.s_addr == INADDR_ANY) {
1337 SIN(nam)->sin_addr = IA_SIN(ia)->sin_addr;
1338 } else if (ia->ia_ifp->if_flags & IFF_BROADCAST) {
1339 SIN(nam)->sin_addr =
1340 SIN(&ia->ia_broadaddr)->sin_addr;
1341 }
1342 IFA_UNLOCK(&ia->ia_ifa);
1343 ia = NULL;
1344 }
1345 lck_rw_done(in_ifaddr_rwlock);
1346 }
1347 /*
1348 * Otherwise, if the socket has already bound the source, just use it.
1349 */
1350 if (inp->inp_laddr.s_addr != INADDR_ANY) {
1351 VERIFY(ia == NULL);
1352 *laddr = inp->inp_laddr;
1353 return 0;
1354 }
1355
1356 /*
1357 * If the ifscope is specified by the caller (e.g. IP_PKTINFO)
1358 * then it overrides the sticky ifscope set for the socket.
1359 */
1360 if (ifscope == IFSCOPE_NONE && (inp->inp_flags & INP_BOUND_IF)) {
1361 ifscope = inp->inp_boundifp->if_index;
1362 }
1363
1364 /*
1365 * If route is known or can be allocated now,
1366 * our src addr is taken from the i/f, else punt.
1367 * Note that we should check the address family of the cached
1368 * destination, in case of sharing the cache with IPv6.
1369 */
1370 if (ro->ro_rt != NULL) {
1371 RT_LOCK_SPIN(ro->ro_rt);
1372 }
1373 if (ROUTE_UNUSABLE(ro) || ro->ro_dst.sa_family != AF_INET ||
1374 SIN(&ro->ro_dst)->sin_addr.s_addr != SIN(nam)->sin_addr.s_addr ||
1375 (inp->inp_socket->so_options & SO_DONTROUTE)) {
1376 if (ro->ro_rt != NULL) {
1377 RT_UNLOCK(ro->ro_rt);
1378 }
1379 ROUTE_RELEASE(ro);
1380 }
1381 if (!(inp->inp_socket->so_options & SO_DONTROUTE) &&
1382 (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL)) {
1383 if (ro->ro_rt != NULL) {
1384 RT_UNLOCK(ro->ro_rt);
1385 }
1386 ROUTE_RELEASE(ro);
1387 /* No route yet, so try to acquire one */
1388 bzero(&ro->ro_dst, sizeof(struct sockaddr_in));
1389 ro->ro_dst.sa_family = AF_INET;
1390 ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
1391 SIN(&ro->ro_dst)->sin_addr = SIN(nam)->sin_addr;
1392 rtalloc_scoped(ro, ifscope);
1393 if (ro->ro_rt != NULL) {
1394 RT_LOCK_SPIN(ro->ro_rt);
1395 }
1396 }
1397 /* Sanitized local copy for interface address searches */
1398 bzero(&sin, sizeof(sin));
1399 sin.sin_family = AF_INET;
1400 sin.sin_len = sizeof(struct sockaddr_in);
1401 sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
1402 /*
1403 * If we did not find (or use) a route, assume dest is reachable
1404 * on a directly connected network and try to find a corresponding
1405 * interface to take the source address from.
1406 */
1407 if (ro->ro_rt == NULL) {
1408 proc_t proc = current_proc();
1409
1410 VERIFY(ia == NULL);
1411 ia = ifatoia(ifa_ifwithdstaddr(SA(&sin)));
1412 if (ia == NULL) {
1413 ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope));
1414 }
1415 error = ((ia == NULL) ? ENETUNREACH : 0);
1416
1417 if (apn_fallback_required(proc, inp->inp_socket,
1418 (void *)nam)) {
1419 apn_fallback_trigger(proc, inp->inp_socket);
1420 }
1421
1422 goto done;
1423 }
1424 RT_LOCK_ASSERT_HELD(ro->ro_rt);
1425 /*
1426 * If the outgoing interface on the route found is not
1427 * a loopback interface, use the address from that interface.
1428 */
1429 if (!(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) {
1430 VERIFY(ia == NULL);
1431 /*
1432 * If the route points to a cellular interface and the
1433 * caller forbids our using interfaces of such type,
1434 * pretend that there is no route.
1435 * Apply the same logic for expensive interfaces.
1436 */
1437 if (inp_restricted_send(inp, ro->ro_rt->rt_ifp)) {
1438 RT_UNLOCK(ro->ro_rt);
1439 ROUTE_RELEASE(ro);
1440 error = EHOSTUNREACH;
1441 restricted = TRUE;
1442 } else {
1443 /* Become a regular mutex */
1444 RT_CONVERT_LOCK(ro->ro_rt);
1445 ia = ifatoia(ro->ro_rt->rt_ifa);
1446 IFA_ADDREF(&ia->ia_ifa);
1447
1448 /*
1449 * Mark the control block for notification of
1450 * a possible flow that might undergo clat46
1451 * translation.
1452 *
1453 * We defer the decision to a later point when
1454 * inpcb is being disposed off.
1455 * The reason is that we only want to send notification
1456 * if the flow was ever used to send data.
1457 */
1458 if (IS_INTF_CLAT46(ro->ro_rt->rt_ifp)) {
1459 inp->inp_flags2 |= INP2_CLAT46_FLOW;
1460 }
1461
1462 RT_UNLOCK(ro->ro_rt);
1463 error = 0;
1464 }
1465 goto done;
1466 }
1467 VERIFY(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK);
1468 RT_UNLOCK(ro->ro_rt);
1469 /*
1470 * The outgoing interface is marked with 'loopback net', so a route
1471 * to ourselves is here.
1472 * Try to find the interface of the destination address and then
1473 * take the address from there. That interface is not necessarily
1474 * a loopback interface.
1475 */
1476 VERIFY(ia == NULL);
1477 ia = ifatoia(ifa_ifwithdstaddr(SA(&sin)));
1478 if (ia == NULL) {
1479 ia = ifatoia(ifa_ifwithaddr_scoped(SA(&sin), ifscope));
1480 }
1481 if (ia == NULL) {
1482 ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope));
1483 }
1484 if (ia == NULL) {
1485 RT_LOCK(ro->ro_rt);
1486 ia = ifatoia(ro->ro_rt->rt_ifa);
1487 if (ia != NULL) {
1488 IFA_ADDREF(&ia->ia_ifa);
1489 }
1490 RT_UNLOCK(ro->ro_rt);
1491 }
1492 error = ((ia == NULL) ? ENETUNREACH : 0);
1493
1494done:
1495 /*
1496 * If the destination address is multicast and an outgoing
1497 * interface has been set as a multicast option, use the
1498 * address of that interface as our source address.
1499 */
1500 if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) &&
1501 inp->inp_moptions != NULL) {
1502 struct ip_moptions *imo;
1503 struct ifnet *ifp;
1504
1505 imo = inp->inp_moptions;
1506 IMO_LOCK(imo);
1507 if (imo->imo_multicast_ifp != NULL && (ia == NULL ||
1508 ia->ia_ifp != imo->imo_multicast_ifp)) {
1509 ifp = imo->imo_multicast_ifp;
1510 if (ia != NULL) {
1511 IFA_REMREF(&ia->ia_ifa);
1512 }
1513 lck_rw_lock_shared(in_ifaddr_rwlock);
1514 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
1515 if (ia->ia_ifp == ifp) {
1516 break;
1517 }
1518 }
1519 if (ia != NULL) {
1520 IFA_ADDREF(&ia->ia_ifa);
1521 }
1522 lck_rw_done(in_ifaddr_rwlock);
1523 if (ia == NULL) {
1524 error = EADDRNOTAVAIL;
1525 } else {
1526 error = 0;
1527 }
1528 }
1529 IMO_UNLOCK(imo);
1530 }
1531 /*
1532 * Don't do pcblookup call here; return interface in laddr
1533 * and exit to caller, that will do the lookup.
1534 */
1535 if (ia != NULL) {
1536 /*
1537 * If the source address belongs to a cellular interface
1538 * and the socket forbids our using interfaces of such
1539 * type, pretend that there is no source address.
1540 * Apply the same logic for expensive interfaces.
1541 */
1542 IFA_LOCK_SPIN(&ia->ia_ifa);
1543 if (inp_restricted_send(inp, ia->ia_ifa.ifa_ifp)) {
1544 IFA_UNLOCK(&ia->ia_ifa);
1545 error = EHOSTUNREACH;
1546 restricted = TRUE;
1547 } else if (error == 0) {
1548 *laddr = ia->ia_addr.sin_addr;
1549 if (outif != NULL) {
1550 struct ifnet *ifp;
1551
1552 if (ro->ro_rt != NULL) {
1553 ifp = ro->ro_rt->rt_ifp;
1554 } else {
1555 ifp = ia->ia_ifp;
1556 }
1557
1558 VERIFY(ifp != NULL);
1559 IFA_CONVERT_LOCK(&ia->ia_ifa);
1560 ifnet_reference(ifp); /* for caller */
1561 if (*outif != NULL) {
1562 ifnet_release(*outif);
1563 }
1564 *outif = ifp;
1565 }
1566 IFA_UNLOCK(&ia->ia_ifa);
1567 } else {
1568 IFA_UNLOCK(&ia->ia_ifa);
1569 }
1570 IFA_REMREF(&ia->ia_ifa);
1571 ia = NULL;
1572 }
1573
1574 if (restricted && error == EHOSTUNREACH) {
1575 soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED |
1576 SO_FILT_HINT_IFDENIED));
1577 }
1578
1579 return error;
1580}
1581
1582/*
1583 * Outer subroutine:
1584 * Connect from a socket to a specified address.
1585 * Both address and port must be specified in argument sin.
1586 * If don't have a local address for this socket yet,
1587 * then pick one.
1588 *
1589 * The caller may override the bound-to-interface setting of the socket
1590 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
1591 */
1592int
1593in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p,
1594 unsigned int ifscope, struct ifnet **outif)
1595{
1596 struct in_addr laddr;
1597 struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam;
1598 struct inpcb *pcb;
1599 int error;
1600 struct socket *so = inp->inp_socket;
1601
1602#if CONTENT_FILTER
1603 if (so) {
1604 so->so_state_change_cnt++;
1605 }
1606#endif
1607
1608 /*
1609 * Call inner routine, to assign local interface address.
1610 */
1611 if ((error = in_pcbladdr(inp, nam, &laddr, ifscope, outif, 0)) != 0) {
1612 return error;
1613 }
1614
1615 socket_unlock(so, 0);
1616 pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
1617 inp->inp_laddr.s_addr ? inp->inp_laddr : laddr,
1618 inp->inp_lport, 0, NULL);
1619 socket_lock(so, 0);
1620
1621 /*
1622 * Check if the socket is still in a valid state. When we unlock this
1623 * embryonic socket, it can get aborted if another thread is closing
1624 * the listener (radar 7947600).
1625 */
1626 if ((so->so_flags & SOF_ABORTED) != 0) {
1627 return ECONNREFUSED;
1628 }
1629
1630 if (pcb != NULL) {
1631 in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0);
1632 return EADDRINUSE;
1633 }
1634 if (inp->inp_laddr.s_addr == INADDR_ANY) {
1635 if (inp->inp_lport == 0) {
1636 error = in_pcbbind(inp, NULL, p);
1637 if (error) {
1638 return error;
1639 }
1640 }
1641 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
1642 /*
1643 * Lock inversion issue, mostly with udp
1644 * multicast packets.
1645 */
1646 socket_unlock(so, 0);
1647 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
1648 socket_lock(so, 0);
1649 }
1650 inp->inp_laddr = laddr;
1651 /* no reference needed */
1652 inp->inp_last_outifp = (outif != NULL) ? *outif : NULL;
1653 inp->inp_flags |= INP_INADDR_ANY;
1654 } else {
1655 /*
1656 * Usage of IP_PKTINFO, without local port already
1657 * speficified will cause kernel to panic,
1658 * see rdar://problem/18508185.
1659 * For now returning error to avoid a kernel panic
1660 * This routines can be refactored and handle this better
1661 * in future.
1662 */
1663 if (inp->inp_lport == 0) {
1664 return EINVAL;
1665 }
1666 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
1667 /*
1668 * Lock inversion issue, mostly with udp
1669 * multicast packets.
1670 */
1671 socket_unlock(so, 0);
1672 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
1673 socket_lock(so, 0);
1674 }
1675 }
1676 inp->inp_faddr = sin->sin_addr;
1677 inp->inp_fport = sin->sin_port;
1678 if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) {
1679 nstat_pcb_invalidate_cache(inp);
1680 }
1681 in_pcbrehash(inp);
1682 lck_rw_done(inp->inp_pcbinfo->ipi_lock);
1683 return 0;
1684}
1685
1686void
1687in_pcbdisconnect(struct inpcb *inp)
1688{
1689 struct socket *so = inp->inp_socket;
1690
1691 if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) {
1692 nstat_pcb_cache(inp);
1693 }
1694
1695 inp->inp_faddr.s_addr = INADDR_ANY;
1696 inp->inp_fport = 0;
1697
1698#if CONTENT_FILTER
1699 if (so) {
1700 so->so_state_change_cnt++;
1701 }
1702#endif
1703
1704 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
1705 /* lock inversion issue, mostly with udp multicast packets */
1706 socket_unlock(so, 0);
1707 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
1708 socket_lock(so, 0);
1709 }
1710
1711 in_pcbrehash(inp);
1712 lck_rw_done(inp->inp_pcbinfo->ipi_lock);
1713 /*
1714 * A multipath subflow socket would have its SS_NOFDREF set by default,
1715 * so check for SOF_MP_SUBFLOW socket flag before detaching the PCB;
1716 * when the socket is closed for real, SOF_MP_SUBFLOW would be cleared.
1717 */
1718 if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF)) {
1719 in_pcbdetach(inp);
1720 }
1721}
1722
1723void
1724in_pcbdetach(struct inpcb *inp)
1725{
1726 struct socket *so = inp->inp_socket;
1727
1728 if (so->so_pcb == NULL) {
1729 /* PCB has been disposed */
1730 panic("%s: inp=%p so=%p proto=%d so_pcb is null!\n", __func__,
1731 inp, so, SOCK_PROTO(so));
1732 /* NOTREACHED */
1733 }
1734
1735#if IPSEC
1736 if (inp->inp_sp != NULL) {
1737 (void) ipsec4_delete_pcbpolicy(inp);
1738 }
1739#endif /* IPSEC */
1740
1741 if (inp->inp_stat != NULL && SOCK_PROTO(so) == IPPROTO_UDP) {
1742 if (inp->inp_stat->rxpackets == 0 && inp->inp_stat->txpackets == 0) {
1743 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_inet_dgram_no_data);
1744 }
1745 }
1746
1747 /*
1748 * Let NetworkStatistics know this PCB is going away
1749 * before we detach it.
1750 */
1751 if (nstat_collect &&
1752 (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP)) {
1753 nstat_pcb_detach(inp);
1754 }
1755
1756 /* Free memory buffer held for generating keep alives */
1757 if (inp->inp_keepalive_data != NULL) {
1758 FREE(inp->inp_keepalive_data, M_TEMP);
1759 inp->inp_keepalive_data = NULL;
1760 }
1761
1762 /* mark socket state as dead */
1763 if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) {
1764 panic("%s: so=%p proto=%d couldn't set to STOPUSING\n",
1765 __func__, so, SOCK_PROTO(so));
1766 /* NOTREACHED */
1767 }
1768
1769 if (!(so->so_flags & SOF_PCBCLEARING)) {
1770 struct ip_moptions *imo;
1771
1772 inp->inp_vflag = 0;
1773 if (inp->inp_options != NULL) {
1774 (void) m_free(inp->inp_options);
1775 inp->inp_options = NULL;
1776 }
1777 ROUTE_RELEASE(&inp->inp_route);
1778 imo = inp->inp_moptions;
1779 inp->inp_moptions = NULL;
1780 sofreelastref(so, 0);
1781 inp->inp_state = INPCB_STATE_DEAD;
1782
1783 /*
1784 * Enqueue an event to send kernel event notification
1785 * if the flow has to CLAT46 for data packets
1786 */
1787 if (inp->inp_flags2 & INP2_CLAT46_FLOW) {
1788 /*
1789 * If there has been any exchange of data bytes
1790 * over this flow.
1791 * Schedule a notification to report that flow is
1792 * using client side translation.
1793 */
1794 if (inp->inp_stat != NULL &&
1795 (inp->inp_stat->txbytes != 0 ||
1796 inp->inp_stat->rxbytes != 0)) {
1797 if (so->so_flags & SOF_DELEGATED) {
1798 in6_clat46_event_enqueue_nwk_wq_entry(
1799 IN6_CLAT46_EVENT_V4_FLOW,
1800 so->e_pid,
1801 so->e_uuid);
1802 } else {
1803 in6_clat46_event_enqueue_nwk_wq_entry(
1804 IN6_CLAT46_EVENT_V4_FLOW,
1805 so->last_pid,
1806 so->last_uuid);
1807 }
1808 }
1809 }
1810
1811 /* makes sure we're not called twice from so_close */
1812 so->so_flags |= SOF_PCBCLEARING;
1813
1814 inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST);
1815
1816 /*
1817 * See inp_join_group() for why we need to unlock
1818 */
1819 if (imo != NULL) {
1820 socket_unlock(so, 0);
1821 IMO_REMREF(imo);
1822 socket_lock(so, 0);
1823 }
1824 }
1825}
1826
1827
1828void
1829in_pcbdispose(struct inpcb *inp)
1830{
1831 struct socket *so = inp->inp_socket;
1832 struct inpcbinfo *ipi = inp->inp_pcbinfo;
1833
1834 if (so != NULL && so->so_usecount != 0) {
1835 panic("%s: so %p [%d,%d] usecount %d lockhistory %s\n",
1836 __func__, so, SOCK_DOM(so), SOCK_TYPE(so), so->so_usecount,
1837 solockhistory_nr(so));
1838 /* NOTREACHED */
1839 } else if (inp->inp_wantcnt != WNT_STOPUSING) {
1840 if (so != NULL) {
1841 panic_plain("%s: inp %p invalid wantcnt %d, so %p "
1842 "[%d,%d] usecount %d retaincnt %d state 0x%x "
1843 "flags 0x%x lockhistory %s\n", __func__, inp,
1844 inp->inp_wantcnt, so, SOCK_DOM(so), SOCK_TYPE(so),
1845 so->so_usecount, so->so_retaincnt, so->so_state,
1846 so->so_flags, solockhistory_nr(so));
1847 /* NOTREACHED */
1848 } else {
1849 panic("%s: inp %p invalid wantcnt %d no socket\n",
1850 __func__, inp, inp->inp_wantcnt);
1851 /* NOTREACHED */
1852 }
1853 }
1854
1855 LCK_RW_ASSERT(ipi->ipi_lock, LCK_RW_ASSERT_EXCLUSIVE);
1856
1857 inp->inp_gencnt = ++ipi->ipi_gencnt;
1858 /* access ipi in in_pcbremlists */
1859 in_pcbremlists(inp);
1860
1861 if (so != NULL) {
1862 if (so->so_proto->pr_flags & PR_PCBLOCK) {
1863 sofreelastref(so, 0);
1864 if (so->so_rcv.sb_cc > 0 || so->so_snd.sb_cc > 0) {
1865 /*
1866 * selthreadclear() already called
1867 * during sofreelastref() above.
1868 */
1869 sbrelease(&so->so_rcv);
1870 sbrelease(&so->so_snd);
1871 }
1872 if (so->so_head != NULL) {
1873 panic("%s: so=%p head still exist\n",
1874 __func__, so);
1875 /* NOTREACHED */
1876 }
1877 lck_mtx_unlock(&inp->inpcb_mtx);
1878
1879#if NECP
1880 necp_inpcb_remove_cb(inp);
1881#endif /* NECP */
1882
1883 lck_mtx_destroy(&inp->inpcb_mtx, ipi->ipi_lock_grp);
1884 }
1885 /* makes sure we're not called twice from so_close */
1886 so->so_flags |= SOF_PCBCLEARING;
1887 so->so_saved_pcb = (caddr_t)inp;
1888 so->so_pcb = NULL;
1889 inp->inp_socket = NULL;
1890#if NECP
1891 necp_inpcb_dispose(inp);
1892#endif /* NECP */
1893 /*
1894 * In case there a route cached after a detach (possible
1895 * in the tcp case), make sure that it is freed before
1896 * we deallocate the structure.
1897 */
1898 ROUTE_RELEASE(&inp->inp_route);
1899 if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) {
1900 zfree(ipi->ipi_zone, inp);
1901 }
1902 sodealloc(so);
1903 }
1904}
1905
1906/*
1907 * The calling convention of in_getsockaddr() and in_getpeeraddr() was
1908 * modified to match the pru_sockaddr() and pru_peeraddr() entry points
1909 * in struct pr_usrreqs, so that protocols can just reference then directly
1910 * without the need for a wrapper function.
1911 */
1912int
1913in_getsockaddr(struct socket *so, struct sockaddr **nam)
1914{
1915 struct inpcb *inp;
1916 struct sockaddr_in *sin;
1917
1918 /*
1919 * Do the malloc first in case it blocks.
1920 */
1921 MALLOC(sin, struct sockaddr_in *, sizeof(*sin), M_SONAME, M_WAITOK);
1922 if (sin == NULL) {
1923 return ENOBUFS;
1924 }
1925 bzero(sin, sizeof(*sin));
1926 sin->sin_family = AF_INET;
1927 sin->sin_len = sizeof(*sin);
1928
1929 if ((inp = sotoinpcb(so)) == NULL) {
1930 FREE(sin, M_SONAME);
1931 return EINVAL;
1932 }
1933 sin->sin_port = inp->inp_lport;
1934 sin->sin_addr = inp->inp_laddr;
1935
1936 *nam = (struct sockaddr *)sin;
1937 return 0;
1938}
1939
1940int
1941in_getsockaddr_s(struct socket *so, struct sockaddr_in *ss)
1942{
1943 struct sockaddr_in *sin = ss;
1944 struct inpcb *inp;
1945
1946 VERIFY(ss != NULL);
1947 bzero(ss, sizeof(*ss));
1948
1949 sin->sin_family = AF_INET;
1950 sin->sin_len = sizeof(*sin);
1951
1952 if ((inp = sotoinpcb(so)) == NULL) {
1953 return EINVAL;
1954 }
1955
1956 sin->sin_port = inp->inp_lport;
1957 sin->sin_addr = inp->inp_laddr;
1958 return 0;
1959}
1960
1961int
1962in_getpeeraddr(struct socket *so, struct sockaddr **nam)
1963{
1964 struct inpcb *inp;
1965 struct sockaddr_in *sin;
1966
1967 /*
1968 * Do the malloc first in case it blocks.
1969 */
1970 MALLOC(sin, struct sockaddr_in *, sizeof(*sin), M_SONAME, M_WAITOK);
1971 if (sin == NULL) {
1972 return ENOBUFS;
1973 }
1974 bzero((caddr_t)sin, sizeof(*sin));
1975 sin->sin_family = AF_INET;
1976 sin->sin_len = sizeof(*sin);
1977
1978 if ((inp = sotoinpcb(so)) == NULL) {
1979 FREE(sin, M_SONAME);
1980 return EINVAL;
1981 }
1982 sin->sin_port = inp->inp_fport;
1983 sin->sin_addr = inp->inp_faddr;
1984
1985 *nam = (struct sockaddr *)sin;
1986 return 0;
1987}
1988
1989void
1990in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr,
1991 int errno, void (*notify)(struct inpcb *, int))
1992{
1993 struct inpcb *inp;
1994
1995 lck_rw_lock_shared(pcbinfo->ipi_lock);
1996
1997 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1998 if (!(inp->inp_vflag & INP_IPV4)) {
1999 continue;
2000 }
2001 if (inp->inp_faddr.s_addr != faddr.s_addr ||
2002 inp->inp_socket == NULL) {
2003 continue;
2004 }
2005 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) {
2006 continue;
2007 }
2008 socket_lock(inp->inp_socket, 1);
2009 (*notify)(inp, errno);
2010 (void) in_pcb_checkstate(inp, WNT_RELEASE, 1);
2011 socket_unlock(inp->inp_socket, 1);
2012 }
2013 lck_rw_done(pcbinfo->ipi_lock);
2014}
2015
2016/*
2017 * Check for alternatives when higher level complains
2018 * about service problems. For now, invalidate cached
2019 * routing information. If the route was created dynamically
2020 * (by a redirect), time to try a default gateway again.
2021 */
2022void
2023in_losing(struct inpcb *inp)
2024{
2025 boolean_t release = FALSE;
2026 struct rtentry *rt;
2027
2028 if ((rt = inp->inp_route.ro_rt) != NULL) {
2029 struct in_ifaddr *ia = NULL;
2030
2031 RT_LOCK(rt);
2032 if (rt->rt_flags & RTF_DYNAMIC) {
2033 /*
2034 * Prevent another thread from modifying rt_key,
2035 * rt_gateway via rt_setgate() after rt_lock is
2036 * dropped by marking the route as defunct.
2037 */
2038 rt->rt_flags |= RTF_CONDEMNED;
2039 RT_UNLOCK(rt);
2040 (void) rtrequest(RTM_DELETE, rt_key(rt),
2041 rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL);
2042 } else {
2043 RT_UNLOCK(rt);
2044 }
2045 /* if the address is gone keep the old route in the pcb */
2046 if (inp->inp_laddr.s_addr != INADDR_ANY &&
2047 (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
2048 /*
2049 * Address is around; ditch the route. A new route
2050 * can be allocated the next time output is attempted.
2051 */
2052 release = TRUE;
2053 }
2054 if (ia != NULL) {
2055 IFA_REMREF(&ia->ia_ifa);
2056 }
2057 }
2058 if (rt == NULL || release) {
2059 ROUTE_RELEASE(&inp->inp_route);
2060 }
2061}
2062
2063/*
2064 * After a routing change, flush old routing
2065 * and allocate a (hopefully) better one.
2066 */
2067void
2068in_rtchange(struct inpcb *inp, int errno)
2069{
2070#pragma unused(errno)
2071 boolean_t release = FALSE;
2072 struct rtentry *rt;
2073
2074 if ((rt = inp->inp_route.ro_rt) != NULL) {
2075 struct in_ifaddr *ia = NULL;
2076
2077 /* if address is gone, keep the old route */
2078 if (inp->inp_laddr.s_addr != INADDR_ANY &&
2079 (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
2080 /*
2081 * Address is around; ditch the route. A new route
2082 * can be allocated the next time output is attempted.
2083 */
2084 release = TRUE;
2085 }
2086 if (ia != NULL) {
2087 IFA_REMREF(&ia->ia_ifa);
2088 }
2089 }
2090 if (rt == NULL || release) {
2091 ROUTE_RELEASE(&inp->inp_route);
2092 }
2093}
2094
2095/*
2096 * Lookup a PCB based on the local address and port.
2097 */
2098struct inpcb *
2099in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
2100 unsigned int lport_arg, int wild_okay)
2101{
2102 struct inpcb *inp;
2103 int matchwild = 3, wildcard;
2104 u_short lport = (u_short)lport_arg;
2105
2106 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0, 0, 0, 0, 0);
2107
2108 if (!wild_okay) {
2109 struct inpcbhead *head;
2110 /*
2111 * Look for an unconnected (wildcard foreign addr) PCB that
2112 * matches the local address and port we're looking for.
2113 */
2114 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
2115 pcbinfo->ipi_hashmask)];
2116 LIST_FOREACH(inp, head, inp_hash) {
2117 if (!(inp->inp_vflag & INP_IPV4)) {
2118 continue;
2119 }
2120 if (inp->inp_faddr.s_addr == INADDR_ANY &&
2121 inp->inp_laddr.s_addr == laddr.s_addr &&
2122 inp->inp_lport == lport) {
2123 /*
2124 * Found.
2125 */
2126 return inp;
2127 }
2128 }
2129 /*
2130 * Not found.
2131 */
2132 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0, 0, 0, 0, 0);
2133 return NULL;
2134 } else {
2135 struct inpcbporthead *porthash;
2136 struct inpcbport *phd;
2137 struct inpcb *match = NULL;
2138 /*
2139 * Best fit PCB lookup.
2140 *
2141 * First see if this local port is in use by looking on the
2142 * port hash list.
2143 */
2144 porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
2145 pcbinfo->ipi_porthashmask)];
2146 LIST_FOREACH(phd, porthash, phd_hash) {
2147 if (phd->phd_port == lport) {
2148 break;
2149 }
2150 }
2151 if (phd != NULL) {
2152 /*
2153 * Port is in use by one or more PCBs. Look for best
2154 * fit.
2155 */
2156 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
2157 wildcard = 0;
2158 if (!(inp->inp_vflag & INP_IPV4)) {
2159 continue;
2160 }
2161 if (inp->inp_faddr.s_addr != INADDR_ANY) {
2162 wildcard++;
2163 }
2164 if (inp->inp_laddr.s_addr != INADDR_ANY) {
2165 if (laddr.s_addr == INADDR_ANY) {
2166 wildcard++;
2167 } else if (inp->inp_laddr.s_addr !=
2168 laddr.s_addr) {
2169 continue;
2170 }
2171 } else {
2172 if (laddr.s_addr != INADDR_ANY) {
2173 wildcard++;
2174 }
2175 }
2176 if (wildcard < matchwild) {
2177 match = inp;
2178 matchwild = wildcard;
2179 if (matchwild == 0) {
2180 break;
2181 }
2182 }
2183 }
2184 }
2185 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match,
2186 0, 0, 0, 0);
2187 return match;
2188 }
2189}
2190
2191/*
2192 * Check if PCB exists in hash list.
2193 */
2194int
2195in_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2196 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
2197 uid_t *uid, gid_t *gid, struct ifnet *ifp)
2198{
2199 struct inpcbhead *head;
2200 struct inpcb *inp;
2201 u_short fport = (u_short)fport_arg, lport = (u_short)lport_arg;
2202 int found = 0;
2203 struct inpcb *local_wild = NULL;
2204 struct inpcb *local_wild_mapped = NULL;
2205
2206 *uid = UID_MAX;
2207 *gid = GID_MAX;
2208
2209 /*
2210 * We may have found the pcb in the last lookup - check this first.
2211 */
2212
2213 lck_rw_lock_shared(pcbinfo->ipi_lock);
2214
2215 /*
2216 * First look for an exact match.
2217 */
2218 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
2219 pcbinfo->ipi_hashmask)];
2220 LIST_FOREACH(inp, head, inp_hash) {
2221 if (!(inp->inp_vflag & INP_IPV4)) {
2222 continue;
2223 }
2224 if (inp_restricted_recv(inp, ifp)) {
2225 continue;
2226 }
2227
2228#if NECP
2229 if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2230 continue;
2231 }
2232#endif /* NECP */
2233
2234 if (inp->inp_faddr.s_addr == faddr.s_addr &&
2235 inp->inp_laddr.s_addr == laddr.s_addr &&
2236 inp->inp_fport == fport &&
2237 inp->inp_lport == lport) {
2238 if ((found = (inp->inp_socket != NULL))) {
2239 /*
2240 * Found.
2241 */
2242 *uid = kauth_cred_getuid(
2243 inp->inp_socket->so_cred);
2244 *gid = kauth_cred_getgid(
2245 inp->inp_socket->so_cred);
2246 }
2247 lck_rw_done(pcbinfo->ipi_lock);
2248 return found;
2249 }
2250 }
2251
2252 if (!wildcard) {
2253 /*
2254 * Not found.
2255 */
2256 lck_rw_done(pcbinfo->ipi_lock);
2257 return 0;
2258 }
2259
2260 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
2261 pcbinfo->ipi_hashmask)];
2262 LIST_FOREACH(inp, head, inp_hash) {
2263 if (!(inp->inp_vflag & INP_IPV4)) {
2264 continue;
2265 }
2266 if (inp_restricted_recv(inp, ifp)) {
2267 continue;
2268 }
2269
2270#if NECP
2271 if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2272 continue;
2273 }
2274#endif /* NECP */
2275
2276 if (inp->inp_faddr.s_addr == INADDR_ANY &&
2277 inp->inp_lport == lport) {
2278 if (inp->inp_laddr.s_addr == laddr.s_addr) {
2279 if ((found = (inp->inp_socket != NULL))) {
2280 *uid = kauth_cred_getuid(
2281 inp->inp_socket->so_cred);
2282 *gid = kauth_cred_getgid(
2283 inp->inp_socket->so_cred);
2284 }
2285 lck_rw_done(pcbinfo->ipi_lock);
2286 return found;
2287 } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
2288 if (inp->inp_socket &&
2289 SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) {
2290 local_wild_mapped = inp;
2291 } else {
2292 local_wild = inp;
2293 }
2294 }
2295 }
2296 }
2297 if (local_wild == NULL) {
2298 if (local_wild_mapped != NULL) {
2299 if ((found = (local_wild_mapped->inp_socket != NULL))) {
2300 *uid = kauth_cred_getuid(
2301 local_wild_mapped->inp_socket->so_cred);
2302 *gid = kauth_cred_getgid(
2303 local_wild_mapped->inp_socket->so_cred);
2304 }
2305 lck_rw_done(pcbinfo->ipi_lock);
2306 return found;
2307 }
2308 lck_rw_done(pcbinfo->ipi_lock);
2309 return 0;
2310 }
2311 if ((found = (local_wild->inp_socket != NULL))) {
2312 *uid = kauth_cred_getuid(
2313 local_wild->inp_socket->so_cred);
2314 *gid = kauth_cred_getgid(
2315 local_wild->inp_socket->so_cred);
2316 }
2317 lck_rw_done(pcbinfo->ipi_lock);
2318 return found;
2319}
2320
2321/*
2322 * Lookup PCB in hash list.
2323 */
2324struct inpcb *
2325in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2326 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
2327 struct ifnet *ifp)
2328{
2329 struct inpcbhead *head;
2330 struct inpcb *inp;
2331 u_short fport = (u_short)fport_arg, lport = (u_short)lport_arg;
2332 struct inpcb *local_wild = NULL;
2333 struct inpcb *local_wild_mapped = NULL;
2334
2335 /*
2336 * We may have found the pcb in the last lookup - check this first.
2337 */
2338
2339 lck_rw_lock_shared(pcbinfo->ipi_lock);
2340
2341 /*
2342 * First look for an exact match.
2343 */
2344 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
2345 pcbinfo->ipi_hashmask)];
2346 LIST_FOREACH(inp, head, inp_hash) {
2347 if (!(inp->inp_vflag & INP_IPV4)) {
2348 continue;
2349 }
2350 if (inp_restricted_recv(inp, ifp)) {
2351 continue;
2352 }
2353
2354#if NECP
2355 if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2356 continue;
2357 }
2358#endif /* NECP */
2359
2360 if (inp->inp_faddr.s_addr == faddr.s_addr &&
2361 inp->inp_laddr.s_addr == laddr.s_addr &&
2362 inp->inp_fport == fport &&
2363 inp->inp_lport == lport) {
2364 /*
2365 * Found.
2366 */
2367 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
2368 WNT_STOPUSING) {
2369 lck_rw_done(pcbinfo->ipi_lock);
2370 return inp;
2371 } else {
2372 /* it's there but dead, say it isn't found */
2373 lck_rw_done(pcbinfo->ipi_lock);
2374 return NULL;
2375 }
2376 }
2377 }
2378
2379 if (!wildcard) {
2380 /*
2381 * Not found.
2382 */
2383 lck_rw_done(pcbinfo->ipi_lock);
2384 return NULL;
2385 }
2386
2387 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
2388 pcbinfo->ipi_hashmask)];
2389 LIST_FOREACH(inp, head, inp_hash) {
2390 if (!(inp->inp_vflag & INP_IPV4)) {
2391 continue;
2392 }
2393 if (inp_restricted_recv(inp, ifp)) {
2394 continue;
2395 }
2396
2397#if NECP
2398 if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2399 continue;
2400 }
2401#endif /* NECP */
2402
2403 if (inp->inp_faddr.s_addr == INADDR_ANY &&
2404 inp->inp_lport == lport) {
2405 if (inp->inp_laddr.s_addr == laddr.s_addr) {
2406 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
2407 WNT_STOPUSING) {
2408 lck_rw_done(pcbinfo->ipi_lock);
2409 return inp;
2410 } else {
2411 /* it's dead; say it isn't found */
2412 lck_rw_done(pcbinfo->ipi_lock);
2413 return NULL;
2414 }
2415 } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
2416 if (SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) {
2417 local_wild_mapped = inp;
2418 } else {
2419 local_wild = inp;
2420 }
2421 }
2422 }
2423 }
2424 if (local_wild == NULL) {
2425 if (local_wild_mapped != NULL) {
2426 if (in_pcb_checkstate(local_wild_mapped,
2427 WNT_ACQUIRE, 0) != WNT_STOPUSING) {
2428 lck_rw_done(pcbinfo->ipi_lock);
2429 return local_wild_mapped;
2430 } else {
2431 /* it's dead; say it isn't found */
2432 lck_rw_done(pcbinfo->ipi_lock);
2433 return NULL;
2434 }
2435 }
2436 lck_rw_done(pcbinfo->ipi_lock);
2437 return NULL;
2438 }
2439 if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
2440 lck_rw_done(pcbinfo->ipi_lock);
2441 return local_wild;
2442 }
2443 /*
2444 * It's either not found or is already dead.
2445 */
2446 lck_rw_done(pcbinfo->ipi_lock);
2447 return NULL;
2448}
2449
2450/*
2451 * @brief Insert PCB onto various hash lists.
2452 *
2453 * @param inp Pointer to internet protocol control block
2454 * @param locked Implies if ipi_lock (protecting pcb list)
2455 * is already locked or not.
2456 *
2457 * @return int error on failure and 0 on success
2458 */
2459int
2460in_pcbinshash(struct inpcb *inp, int locked)
2461{
2462 struct inpcbhead *pcbhash;
2463 struct inpcbporthead *pcbporthash;
2464 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
2465 struct inpcbport *phd;
2466 u_int32_t hashkey_faddr;
2467
2468 if (!locked) {
2469 if (!lck_rw_try_lock_exclusive(pcbinfo->ipi_lock)) {
2470 /*
2471 * Lock inversion issue, mostly with udp
2472 * multicast packets
2473 */
2474 socket_unlock(inp->inp_socket, 0);
2475 lck_rw_lock_exclusive(pcbinfo->ipi_lock);
2476 socket_lock(inp->inp_socket, 0);
2477 }
2478 }
2479
2480 /*
2481 * This routine or its caller may have given up
2482 * socket's protocol lock briefly.
2483 * During that time the socket may have been dropped.
2484 * Safe-guarding against that.
2485 */
2486 if (inp->inp_state == INPCB_STATE_DEAD) {
2487 if (!locked) {
2488 lck_rw_done(pcbinfo->ipi_lock);
2489 }
2490 return ECONNABORTED;
2491 }
2492
2493
2494 if (inp->inp_vflag & INP_IPV6) {
2495 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
2496 } else {
2497 hashkey_faddr = inp->inp_faddr.s_addr;
2498 }
2499
2500 inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
2501 inp->inp_fport, pcbinfo->ipi_hashmask);
2502
2503 pcbhash = &pcbinfo->ipi_hashbase[inp->inp_hash_element];
2504
2505 pcbporthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(inp->inp_lport,
2506 pcbinfo->ipi_porthashmask)];
2507
2508 /*
2509 * Go through port list and look for a head for this lport.
2510 */
2511 LIST_FOREACH(phd, pcbporthash, phd_hash) {
2512 if (phd->phd_port == inp->inp_lport) {
2513 break;
2514 }
2515 }
2516
2517 /*
2518 * If none exists, malloc one and tack it on.
2519 */
2520 if (phd == NULL) {
2521 MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport),
2522 M_PCB, M_WAITOK);
2523 if (phd == NULL) {
2524 if (!locked) {
2525 lck_rw_done(pcbinfo->ipi_lock);
2526 }
2527 return ENOBUFS; /* XXX */
2528 }
2529 phd->phd_port = inp->inp_lport;
2530 LIST_INIT(&phd->phd_pcblist);
2531 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
2532 }
2533
2534 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2535
2536
2537 inp->inp_phd = phd;
2538 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
2539 LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
2540 inp->inp_flags2 |= INP2_INHASHLIST;
2541
2542 if (!locked) {
2543 lck_rw_done(pcbinfo->ipi_lock);
2544 }
2545
2546#if NECP
2547 // This call catches the original setting of the local address
2548 inp_update_necp_policy(inp, NULL, NULL, 0);
2549#endif /* NECP */
2550
2551 return 0;
2552}
2553
2554/*
2555 * Move PCB to the proper hash bucket when { faddr, fport } have been
2556 * changed. NOTE: This does not handle the case of the lport changing (the
2557 * hashed port list would have to be updated as well), so the lport must
2558 * not change after in_pcbinshash() has been called.
2559 */
2560void
2561in_pcbrehash(struct inpcb *inp)
2562{
2563 struct inpcbhead *head;
2564 u_int32_t hashkey_faddr;
2565
2566 if (inp->inp_vflag & INP_IPV6) {
2567 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
2568 } else {
2569 hashkey_faddr = inp->inp_faddr.s_addr;
2570 }
2571
2572 inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
2573 inp->inp_fport, inp->inp_pcbinfo->ipi_hashmask);
2574 head = &inp->inp_pcbinfo->ipi_hashbase[inp->inp_hash_element];
2575
2576 if (inp->inp_flags2 & INP2_INHASHLIST) {
2577 LIST_REMOVE(inp, inp_hash);
2578 inp->inp_flags2 &= ~INP2_INHASHLIST;
2579 }
2580
2581 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2582 LIST_INSERT_HEAD(head, inp, inp_hash);
2583 inp->inp_flags2 |= INP2_INHASHLIST;
2584
2585#if NECP
2586 // This call catches updates to the remote addresses
2587 inp_update_necp_policy(inp, NULL, NULL, 0);
2588#endif /* NECP */
2589}
2590
2591/*
2592 * Remove PCB from various lists.
2593 * Must be called pcbinfo lock is held in exclusive mode.
2594 */
2595void
2596in_pcbremlists(struct inpcb *inp)
2597{
2598 inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
2599
2600 /*
2601 * Check if it's in hashlist -- an inp is placed in hashlist when
2602 * it's local port gets assigned. So it should also be present
2603 * in the port list.
2604 */
2605 if (inp->inp_flags2 & INP2_INHASHLIST) {
2606 struct inpcbport *phd = inp->inp_phd;
2607
2608 VERIFY(phd != NULL && inp->inp_lport > 0);
2609
2610 LIST_REMOVE(inp, inp_hash);
2611 inp->inp_hash.le_next = NULL;
2612 inp->inp_hash.le_prev = NULL;
2613
2614 LIST_REMOVE(inp, inp_portlist);
2615 inp->inp_portlist.le_next = NULL;
2616 inp->inp_portlist.le_prev = NULL;
2617 if (LIST_EMPTY(&phd->phd_pcblist)) {
2618 LIST_REMOVE(phd, phd_hash);
2619 FREE(phd, M_PCB);
2620 }
2621 inp->inp_phd = NULL;
2622 inp->inp_flags2 &= ~INP2_INHASHLIST;
2623 }
2624 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2625
2626 if (inp->inp_flags2 & INP2_TIMEWAIT) {
2627 /* Remove from time-wait queue */
2628 tcp_remove_from_time_wait(inp);
2629 inp->inp_flags2 &= ~INP2_TIMEWAIT;
2630 VERIFY(inp->inp_pcbinfo->ipi_twcount != 0);
2631 inp->inp_pcbinfo->ipi_twcount--;
2632 } else {
2633 /* Remove from global inp list if it is not time-wait */
2634 LIST_REMOVE(inp, inp_list);
2635 }
2636
2637 if (inp->inp_flags2 & INP2_IN_FCTREE) {
2638 inp_fc_getinp(inp->inp_flowhash, (INPFC_SOLOCKED | INPFC_REMOVE));
2639 VERIFY(!(inp->inp_flags2 & INP2_IN_FCTREE));
2640 }
2641
2642 inp->inp_pcbinfo->ipi_count--;
2643}
2644
2645/*
2646 * Mechanism used to defer the memory release of PCBs
2647 * The pcb list will contain the pcb until the reaper can clean it up if
2648 * the following conditions are met:
2649 * 1) state "DEAD",
2650 * 2) wantcnt is STOPUSING
2651 * 3) usecount is 0
2652 * This function will be called to either mark the pcb as
2653 */
2654int
2655in_pcb_checkstate(struct inpcb *pcb, int mode, int locked)
2656{
2657 volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt;
2658 UInt32 origwant;
2659 UInt32 newwant;
2660
2661 switch (mode) {
2662 case WNT_STOPUSING:
2663 /*
2664 * Try to mark the pcb as ready for recycling. CAS with
2665 * STOPUSING, if success we're good, if it's in use, will
2666 * be marked later
2667 */
2668 if (locked == 0) {
2669 socket_lock(pcb->inp_socket, 1);
2670 }
2671 pcb->inp_state = INPCB_STATE_DEAD;
2672
2673stopusing:
2674 if (pcb->inp_socket->so_usecount < 0) {
2675 panic("%s: pcb=%p so=%p usecount is negative\n",
2676 __func__, pcb, pcb->inp_socket);
2677 /* NOTREACHED */
2678 }
2679 if (locked == 0) {
2680 socket_unlock(pcb->inp_socket, 1);
2681 }
2682
2683 inpcb_gc_sched(pcb->inp_pcbinfo, INPCB_TIMER_FAST);
2684
2685 origwant = *wantcnt;
2686 if ((UInt16) origwant == 0xffff) { /* should stop using */
2687 return WNT_STOPUSING;
2688 }
2689 newwant = 0xffff;
2690 if ((UInt16) origwant == 0) {
2691 /* try to mark it as unsuable now */
2692 OSCompareAndSwap(origwant, newwant, wantcnt);
2693 }
2694 return WNT_STOPUSING;
2695
2696 case WNT_ACQUIRE:
2697 /*
2698 * Try to increase reference to pcb. If WNT_STOPUSING
2699 * should bail out. If socket state DEAD, try to set count
2700 * to STOPUSING, return failed otherwise increase cnt.
2701 */
2702 do {
2703 origwant = *wantcnt;
2704 if ((UInt16) origwant == 0xffff) {
2705 /* should stop using */
2706 return WNT_STOPUSING;
2707 }
2708 newwant = origwant + 1;
2709 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
2710 return WNT_ACQUIRE;
2711
2712 case WNT_RELEASE:
2713 /*
2714 * Release reference. If result is null and pcb state
2715 * is DEAD, set wanted bit to STOPUSING
2716 */
2717 if (locked == 0) {
2718 socket_lock(pcb->inp_socket, 1);
2719 }
2720
2721 do {
2722 origwant = *wantcnt;
2723 if ((UInt16) origwant == 0x0) {
2724 panic("%s: pcb=%p release with zero count",
2725 __func__, pcb);
2726 /* NOTREACHED */
2727 }
2728 if ((UInt16) origwant == 0xffff) {
2729 /* should stop using */
2730 if (locked == 0) {
2731 socket_unlock(pcb->inp_socket, 1);
2732 }
2733 return WNT_STOPUSING;
2734 }
2735 newwant = origwant - 1;
2736 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
2737
2738 if (pcb->inp_state == INPCB_STATE_DEAD) {
2739 goto stopusing;
2740 }
2741 if (pcb->inp_socket->so_usecount < 0) {
2742 panic("%s: RELEASE pcb=%p so=%p usecount is negative\n",
2743 __func__, pcb, pcb->inp_socket);
2744 /* NOTREACHED */
2745 }
2746
2747 if (locked == 0) {
2748 socket_unlock(pcb->inp_socket, 1);
2749 }
2750 return WNT_RELEASE;
2751
2752 default:
2753 panic("%s: so=%p not a valid state =%x\n", __func__,
2754 pcb->inp_socket, mode);
2755 /* NOTREACHED */
2756 }
2757
2758 /* NOTREACHED */
2759 return mode;
2760}
2761
2762/*
2763 * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat.
2764 * The inpcb_compat data structure is passed to user space and must
2765 * not change. We intentionally avoid copying pointers.
2766 */
2767void
2768inpcb_to_compat(struct inpcb *inp, struct inpcb_compat *inp_compat)
2769{
2770 bzero(inp_compat, sizeof(*inp_compat));
2771 inp_compat->inp_fport = inp->inp_fport;
2772 inp_compat->inp_lport = inp->inp_lport;
2773 inp_compat->nat_owner = 0;
2774 inp_compat->nat_cookie = 0;
2775 inp_compat->inp_gencnt = inp->inp_gencnt;
2776 inp_compat->inp_flags = inp->inp_flags;
2777 inp_compat->inp_flow = inp->inp_flow;
2778 inp_compat->inp_vflag = inp->inp_vflag;
2779 inp_compat->inp_ip_ttl = inp->inp_ip_ttl;
2780 inp_compat->inp_ip_p = inp->inp_ip_p;
2781 inp_compat->inp_dependfaddr.inp6_foreign =
2782 inp->inp_dependfaddr.inp6_foreign;
2783 inp_compat->inp_dependladdr.inp6_local =
2784 inp->inp_dependladdr.inp6_local;
2785 inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
2786 inp_compat->inp_depend6.inp6_hlim = 0;
2787 inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
2788 inp_compat->inp_depend6.inp6_ifindex = 0;
2789 inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
2790}
2791
2792#if XNU_TARGET_OS_OSX
2793void
2794inpcb_to_xinpcb64(struct inpcb *inp, struct xinpcb64 *xinp)
2795{
2796 xinp->inp_fport = inp->inp_fport;
2797 xinp->inp_lport = inp->inp_lport;
2798 xinp->inp_gencnt = inp->inp_gencnt;
2799 xinp->inp_flags = inp->inp_flags;
2800 xinp->inp_flow = inp->inp_flow;
2801 xinp->inp_vflag = inp->inp_vflag;
2802 xinp->inp_ip_ttl = inp->inp_ip_ttl;
2803 xinp->inp_ip_p = inp->inp_ip_p;
2804 xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign;
2805 xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local;
2806 xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
2807 xinp->inp_depend6.inp6_hlim = 0;
2808 xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
2809 xinp->inp_depend6.inp6_ifindex = 0;
2810 xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
2811}
2812#endif /* XNU_TARGET_OS_OSX */
2813
2814/*
2815 * The following routines implement this scheme:
2816 *
2817 * Callers of ip_output() that intend to cache the route in the inpcb pass
2818 * a local copy of the struct route to ip_output(). Using a local copy of
2819 * the cached route significantly simplifies things as IP no longer has to
2820 * worry about having exclusive access to the passed in struct route, since
2821 * it's defined in the caller's stack; in essence, this allows for a lock-
2822 * less operation when updating the struct route at the IP level and below,
2823 * whenever necessary. The scheme works as follows:
2824 *
2825 * Prior to dropping the socket's lock and calling ip_output(), the caller
2826 * copies the struct route from the inpcb into its stack, and adds a reference
2827 * to the cached route entry, if there was any. The socket's lock is then
2828 * dropped and ip_output() is called with a pointer to the copy of struct
2829 * route defined on the stack (not to the one in the inpcb.)
2830 *
2831 * Upon returning from ip_output(), the caller then acquires the socket's
2832 * lock and synchronizes the cache; if there is no route cached in the inpcb,
2833 * it copies the local copy of struct route (which may or may not contain any
2834 * route) back into the cache; otherwise, if the inpcb has a route cached in
2835 * it, the one in the local copy will be freed, if there's any. Trashing the
2836 * cached route in the inpcb can be avoided because ip_output() is single-
2837 * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized
2838 * by the socket/transport layer.)
2839 */
2840void
2841inp_route_copyout(struct inpcb *inp, struct route *dst)
2842{
2843 struct route *src = &inp->inp_route;
2844
2845 socket_lock_assert_owned(inp->inp_socket);
2846
2847 /*
2848 * If the route in the PCB is stale or not for IPv4, blow it away;
2849 * this is possible in the case of IPv4-mapped address case.
2850 */
2851 if (ROUTE_UNUSABLE(src) || rt_key(src->ro_rt)->sa_family != AF_INET) {
2852 ROUTE_RELEASE(src);
2853 }
2854
2855 route_copyout(dst, src, sizeof(*dst));
2856}
2857
2858void
2859inp_route_copyin(struct inpcb *inp, struct route *src)
2860{
2861 struct route *dst = &inp->inp_route;
2862
2863 socket_lock_assert_owned(inp->inp_socket);
2864
2865 /* Minor sanity check */
2866 if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET) {
2867 panic("%s: wrong or corrupted route: %p", __func__, src);
2868 }
2869
2870 route_copyin(src, dst, sizeof(*src));
2871}
2872
2873/*
2874 * Handler for setting IP_BOUND_IF/IPV6_BOUND_IF socket option.
2875 */
2876int
2877inp_bindif(struct inpcb *inp, unsigned int ifscope, struct ifnet **pifp)
2878{
2879 struct ifnet *ifp = NULL;
2880
2881 ifnet_head_lock_shared();
2882 if ((ifscope > (unsigned)if_index) || (ifscope != IFSCOPE_NONE &&
2883 (ifp = ifindex2ifnet[ifscope]) == NULL)) {
2884 ifnet_head_done();
2885 return ENXIO;
2886 }
2887 ifnet_head_done();
2888
2889 VERIFY(ifp != NULL || ifscope == IFSCOPE_NONE);
2890
2891 /*
2892 * A zero interface scope value indicates an "unbind".
2893 * Otherwise, take in whatever value the app desires;
2894 * the app may already know the scope (or force itself
2895 * to such a scope) ahead of time before the interface
2896 * gets attached. It doesn't matter either way; any
2897 * route lookup from this point on will require an
2898 * exact match for the embedded interface scope.
2899 */
2900 inp->inp_boundifp = ifp;
2901 if (inp->inp_boundifp == NULL) {
2902 inp->inp_flags &= ~INP_BOUND_IF;
2903 } else {
2904 inp->inp_flags |= INP_BOUND_IF;
2905 }
2906
2907 /* Blow away any cached route in the PCB */
2908 ROUTE_RELEASE(&inp->inp_route);
2909
2910 if (pifp != NULL) {
2911 *pifp = ifp;
2912 }
2913
2914 return 0;
2915}
2916
2917/*
2918 * Handler for setting IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
2919 * as well as for setting PROC_UUID_NO_CELLULAR policy.
2920 */
2921void
2922inp_set_nocellular(struct inpcb *inp)
2923{
2924 inp->inp_flags |= INP_NO_IFT_CELLULAR;
2925
2926 /* Blow away any cached route in the PCB */
2927 ROUTE_RELEASE(&inp->inp_route);
2928}
2929
2930/*
2931 * Handler for clearing IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
2932 * as well as for clearing PROC_UUID_NO_CELLULAR policy.
2933 */
2934void
2935inp_clear_nocellular(struct inpcb *inp)
2936{
2937 struct socket *so = inp->inp_socket;
2938
2939 /*
2940 * SO_RESTRICT_DENY_CELLULAR socket restriction issued on the socket
2941 * has a higher precendence than INP_NO_IFT_CELLULAR. Clear the flag
2942 * if and only if the socket is unrestricted.
2943 */
2944 if (so != NULL && !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) {
2945 inp->inp_flags &= ~INP_NO_IFT_CELLULAR;
2946
2947 /* Blow away any cached route in the PCB */
2948 ROUTE_RELEASE(&inp->inp_route);
2949 }
2950}
2951
2952void
2953inp_set_noexpensive(struct inpcb *inp)
2954{
2955 inp->inp_flags2 |= INP2_NO_IFF_EXPENSIVE;
2956
2957 /* Blow away any cached route in the PCB */
2958 ROUTE_RELEASE(&inp->inp_route);
2959}
2960
2961void
2962inp_set_noconstrained(struct inpcb *inp)
2963{
2964 inp->inp_flags2 |= INP2_NO_IFF_CONSTRAINED;
2965
2966 /* Blow away any cached route in the PCB */
2967 ROUTE_RELEASE(&inp->inp_route);
2968}
2969
2970void
2971inp_set_awdl_unrestricted(struct inpcb *inp)
2972{
2973 inp->inp_flags2 |= INP2_AWDL_UNRESTRICTED;
2974
2975 /* Blow away any cached route in the PCB */
2976 ROUTE_RELEASE(&inp->inp_route);
2977}
2978
2979boolean_t
2980inp_get_awdl_unrestricted(struct inpcb *inp)
2981{
2982 return (inp->inp_flags2 & INP2_AWDL_UNRESTRICTED) ? TRUE : FALSE;
2983}
2984
2985void
2986inp_clear_awdl_unrestricted(struct inpcb *inp)
2987{
2988 inp->inp_flags2 &= ~INP2_AWDL_UNRESTRICTED;
2989
2990 /* Blow away any cached route in the PCB */
2991 ROUTE_RELEASE(&inp->inp_route);
2992}
2993
2994void
2995inp_set_intcoproc_allowed(struct inpcb *inp)
2996{
2997 inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED;
2998
2999 /* Blow away any cached route in the PCB */
3000 ROUTE_RELEASE(&inp->inp_route);
3001}
3002
3003boolean_t
3004inp_get_intcoproc_allowed(struct inpcb *inp)
3005{
3006 return (inp->inp_flags2 & INP2_INTCOPROC_ALLOWED) ? TRUE : FALSE;
3007}
3008
3009void
3010inp_clear_intcoproc_allowed(struct inpcb *inp)
3011{
3012 inp->inp_flags2 &= ~INP2_INTCOPROC_ALLOWED;
3013
3014 /* Blow away any cached route in the PCB */
3015 ROUTE_RELEASE(&inp->inp_route);
3016}
3017
3018#if NECP
3019/*
3020 * Called when PROC_UUID_NECP_APP_POLICY is set.
3021 */
3022void
3023inp_set_want_app_policy(struct inpcb *inp)
3024{
3025 inp->inp_flags2 |= INP2_WANT_APP_POLICY;
3026}
3027
3028/*
3029 * Called when PROC_UUID_NECP_APP_POLICY is cleared.
3030 */
3031void
3032inp_clear_want_app_policy(struct inpcb *inp)
3033{
3034 inp->inp_flags2 &= ~INP2_WANT_APP_POLICY;
3035}
3036#endif /* NECP */
3037
3038/*
3039 * Calculate flow hash for an inp, used by an interface to identify a
3040 * flow. When an interface provides flow control advisory, this flow
3041 * hash is used as an identifier.
3042 */
3043u_int32_t
3044inp_calc_flowhash(struct inpcb *inp)
3045{
3046 struct inp_flowhash_key fh __attribute__((aligned(8)));
3047 u_int32_t flowhash = 0;
3048 struct inpcb *tmp_inp = NULL;
3049
3050 if (inp_hash_seed == 0) {
3051 inp_hash_seed = RandomULong();
3052 }
3053
3054 bzero(&fh, sizeof(fh));
3055
3056 bcopy(&inp->inp_dependladdr, &fh.infh_laddr, sizeof(fh.infh_laddr));
3057 bcopy(&inp->inp_dependfaddr, &fh.infh_faddr, sizeof(fh.infh_faddr));
3058
3059 fh.infh_lport = inp->inp_lport;
3060 fh.infh_fport = inp->inp_fport;
3061 fh.infh_af = (inp->inp_vflag & INP_IPV6) ? AF_INET6 : AF_INET;
3062 fh.infh_proto = inp->inp_ip_p;
3063 fh.infh_rand1 = RandomULong();
3064 fh.infh_rand2 = RandomULong();
3065
3066try_again:
3067 flowhash = net_flowhash(&fh, sizeof(fh), inp_hash_seed);
3068 if (flowhash == 0) {
3069 /* try to get a non-zero flowhash */
3070 inp_hash_seed = RandomULong();
3071 goto try_again;
3072 }
3073
3074 inp->inp_flowhash = flowhash;
3075
3076 /* Insert the inp into inp_fc_tree */
3077 lck_mtx_lock_spin(&inp_fc_lck);
3078 tmp_inp = RB_FIND(inp_fc_tree, &inp_fc_tree, inp);
3079 if (tmp_inp != NULL) {
3080 /*
3081 * There is a different inp with the same flowhash.
3082 * There can be a collision on flow hash but the
3083 * probability is low. Let's recompute the
3084 * flowhash.
3085 */
3086 lck_mtx_unlock(&inp_fc_lck);
3087 /* recompute hash seed */
3088 inp_hash_seed = RandomULong();
3089 goto try_again;
3090 }
3091
3092 RB_INSERT(inp_fc_tree, &inp_fc_tree, inp);
3093 inp->inp_flags2 |= INP2_IN_FCTREE;
3094 lck_mtx_unlock(&inp_fc_lck);
3095
3096 return flowhash;
3097}
3098
3099void
3100inp_flowadv(uint32_t flowhash)
3101{
3102 struct inpcb *inp;
3103
3104 inp = inp_fc_getinp(flowhash, 0);
3105
3106 if (inp == NULL) {
3107 return;
3108 }
3109 inp_fc_feedback(inp);
3110}
3111
3112/*
3113 * Function to compare inp_fc_entries in inp flow control tree
3114 */
3115static inline int
3116infc_cmp(const struct inpcb *inp1, const struct inpcb *inp2)
3117{
3118 return memcmp(&(inp1->inp_flowhash), &(inp2->inp_flowhash),
3119 sizeof(inp1->inp_flowhash));
3120}
3121
3122static struct inpcb *
3123inp_fc_getinp(u_int32_t flowhash, u_int32_t flags)
3124{
3125 struct inpcb *inp = NULL;
3126 int locked = (flags & INPFC_SOLOCKED) ? 1 : 0;
3127
3128 lck_mtx_lock_spin(&inp_fc_lck);
3129 key_inp.inp_flowhash = flowhash;
3130 inp = RB_FIND(inp_fc_tree, &inp_fc_tree, &key_inp);
3131 if (inp == NULL) {
3132 /* inp is not present, return */
3133 lck_mtx_unlock(&inp_fc_lck);
3134 return NULL;
3135 }
3136
3137 if (flags & INPFC_REMOVE) {
3138 RB_REMOVE(inp_fc_tree, &inp_fc_tree, inp);
3139 lck_mtx_unlock(&inp_fc_lck);
3140
3141 bzero(&(inp->infc_link), sizeof(inp->infc_link));
3142 inp->inp_flags2 &= ~INP2_IN_FCTREE;
3143 return NULL;
3144 }
3145
3146 if (in_pcb_checkstate(inp, WNT_ACQUIRE, locked) == WNT_STOPUSING) {
3147 inp = NULL;
3148 }
3149 lck_mtx_unlock(&inp_fc_lck);
3150
3151 return inp;
3152}
3153
3154static void
3155inp_fc_feedback(struct inpcb *inp)
3156{
3157 struct socket *so = inp->inp_socket;
3158
3159 /* we already hold a want_cnt on this inp, socket can't be null */
3160 VERIFY(so != NULL);
3161 socket_lock(so, 1);
3162
3163 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
3164 socket_unlock(so, 1);
3165 return;
3166 }
3167
3168 if (inp->inp_sndinprog_cnt > 0) {
3169 inp->inp_flags |= INP_FC_FEEDBACK;
3170 }
3171
3172 /*
3173 * Return if the connection is not in flow-controlled state.
3174 * This can happen if the connection experienced
3175 * loss while it was in flow controlled state
3176 */
3177 if (!INP_WAIT_FOR_IF_FEEDBACK(inp)) {
3178 socket_unlock(so, 1);
3179 return;
3180 }
3181 inp_reset_fc_state(inp);
3182
3183 if (SOCK_TYPE(so) == SOCK_STREAM) {
3184 inp_fc_unthrottle_tcp(inp);
3185 }
3186
3187 socket_unlock(so, 1);
3188}
3189
3190void
3191inp_reset_fc_state(struct inpcb *inp)
3192{
3193 struct socket *so = inp->inp_socket;
3194 int suspended = (INP_IS_FLOW_SUSPENDED(inp)) ? 1 : 0;
3195 int needwakeup = (INP_WAIT_FOR_IF_FEEDBACK(inp)) ? 1 : 0;
3196
3197 inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
3198
3199 if (suspended) {
3200 so->so_flags &= ~(SOF_SUSPENDED);
3201 soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_RESUME));
3202 }
3203
3204 /* Give a write wakeup to unblock the socket */
3205 if (needwakeup) {
3206 sowwakeup(so);
3207 }
3208}
3209
3210int
3211inp_set_fc_state(struct inpcb *inp, int advcode)
3212{
3213 boolean_t is_flow_controlled = INP_WAIT_FOR_IF_FEEDBACK(inp);
3214 struct inpcb *tmp_inp = NULL;
3215 /*
3216 * If there was a feedback from the interface when
3217 * send operation was in progress, we should ignore
3218 * this flow advisory to avoid a race between setting
3219 * flow controlled state and receiving feedback from
3220 * the interface
3221 */
3222 if (inp->inp_flags & INP_FC_FEEDBACK) {
3223 return 0;
3224 }
3225
3226 inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
3227 if ((tmp_inp = inp_fc_getinp(inp->inp_flowhash,
3228 INPFC_SOLOCKED)) != NULL) {
3229 if (in_pcb_checkstate(tmp_inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
3230 return 0;
3231 }
3232 VERIFY(tmp_inp == inp);
3233 switch (advcode) {
3234 case FADV_FLOW_CONTROLLED:
3235 inp->inp_flags |= INP_FLOW_CONTROLLED;
3236 break;
3237 case FADV_SUSPENDED:
3238 inp->inp_flags |= INP_FLOW_SUSPENDED;
3239 soevent(inp->inp_socket,
3240 (SO_FILT_HINT_LOCKED | SO_FILT_HINT_SUSPEND));
3241
3242 /* Record the fact that suspend event was sent */
3243 inp->inp_socket->so_flags |= SOF_SUSPENDED;
3244 break;
3245 }
3246
3247 if (!is_flow_controlled && SOCK_TYPE(inp->inp_socket) == SOCK_STREAM) {
3248 inp_fc_throttle_tcp(inp);
3249 }
3250 return 1;
3251 }
3252 return 0;
3253}
3254
3255/*
3256 * Handler for SO_FLUSH socket option.
3257 */
3258int
3259inp_flush(struct inpcb *inp, int optval)
3260{
3261 u_int32_t flowhash = inp->inp_flowhash;
3262 struct ifnet *rtifp, *oifp;
3263
3264 /* Either all classes or one of the valid ones */
3265 if (optval != SO_TC_ALL && !SO_VALID_TC(optval)) {
3266 return EINVAL;
3267 }
3268
3269 /* We need a flow hash for identification */
3270 if (flowhash == 0) {
3271 return 0;
3272 }
3273
3274 /* Grab the interfaces from the route and pcb */
3275 rtifp = ((inp->inp_route.ro_rt != NULL) ?
3276 inp->inp_route.ro_rt->rt_ifp : NULL);
3277 oifp = inp->inp_last_outifp;
3278
3279 if (rtifp != NULL) {
3280 if_qflush_sc(rtifp, so_tc2msc(optval), flowhash, NULL, NULL, 0);
3281 }
3282 if (oifp != NULL && oifp != rtifp) {
3283 if_qflush_sc(oifp, so_tc2msc(optval), flowhash, NULL, NULL, 0);
3284 }
3285
3286 return 0;
3287}
3288
3289/*
3290 * Clear the INP_INADDR_ANY flag (special case for PPP only)
3291 */
3292void
3293inp_clear_INP_INADDR_ANY(struct socket *so)
3294{
3295 struct inpcb *inp = NULL;
3296
3297 socket_lock(so, 1);
3298 inp = sotoinpcb(so);
3299 if (inp) {
3300 inp->inp_flags &= ~INP_INADDR_ANY;
3301 }
3302 socket_unlock(so, 1);
3303}
3304
3305void
3306inp_get_soprocinfo(struct inpcb *inp, struct so_procinfo *soprocinfo)
3307{
3308 struct socket *so = inp->inp_socket;
3309
3310 soprocinfo->spi_pid = so->last_pid;
3311 strlcpy(&soprocinfo->spi_proc_name[0], &inp->inp_last_proc_name[0],
3312 sizeof(soprocinfo->spi_proc_name));
3313 if (so->last_pid != 0) {
3314 uuid_copy(soprocinfo->spi_uuid, so->last_uuid);
3315 }
3316 /*
3317 * When not delegated, the effective pid is the same as the real pid
3318 */
3319 if (so->so_flags & SOF_DELEGATED) {
3320 soprocinfo->spi_delegated = 1;
3321 soprocinfo->spi_epid = so->e_pid;
3322 uuid_copy(soprocinfo->spi_euuid, so->e_uuid);
3323 } else {
3324 soprocinfo->spi_delegated = 0;
3325 soprocinfo->spi_epid = so->last_pid;
3326 }
3327 strlcpy(&soprocinfo->spi_e_proc_name[0], &inp->inp_e_proc_name[0],
3328 sizeof(soprocinfo->spi_e_proc_name));
3329}
3330
3331int
3332inp_findinpcb_procinfo(struct inpcbinfo *pcbinfo, uint32_t flowhash,
3333 struct so_procinfo *soprocinfo)
3334{
3335 struct inpcb *inp = NULL;
3336 int found = 0;
3337
3338 bzero(soprocinfo, sizeof(struct so_procinfo));
3339
3340 if (!flowhash) {
3341 return -1;
3342 }
3343
3344 lck_rw_lock_shared(pcbinfo->ipi_lock);
3345 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
3346 if (inp->inp_state != INPCB_STATE_DEAD &&
3347 inp->inp_socket != NULL &&
3348 inp->inp_flowhash == flowhash) {
3349 found = 1;
3350 inp_get_soprocinfo(inp, soprocinfo);
3351 break;
3352 }
3353 }
3354 lck_rw_done(pcbinfo->ipi_lock);
3355
3356 return found;
3357}
3358
3359#if CONFIG_PROC_UUID_POLICY
3360static void
3361inp_update_cellular_policy(struct inpcb *inp, boolean_t set)
3362{
3363 struct socket *so = inp->inp_socket;
3364 int before, after;
3365
3366 VERIFY(so != NULL);
3367 VERIFY(inp->inp_state != INPCB_STATE_DEAD);
3368
3369 before = INP_NO_CELLULAR(inp);
3370 if (set) {
3371 inp_set_nocellular(inp);
3372 } else {
3373 inp_clear_nocellular(inp);
3374 }
3375 after = INP_NO_CELLULAR(inp);
3376 if (net_io_policy_log && (before != after)) {
3377 static const char *ok = "OK";
3378 static const char *nok = "NOACCESS";
3379 uuid_string_t euuid_buf;
3380 pid_t epid;
3381
3382 if (so->so_flags & SOF_DELEGATED) {
3383 uuid_unparse(so->e_uuid, euuid_buf);
3384 epid = so->e_pid;
3385 } else {
3386 uuid_unparse(so->last_uuid, euuid_buf);
3387 epid = so->last_pid;
3388 }
3389
3390 /* allow this socket to generate another notification event */
3391 so->so_ifdenied_notifies = 0;
3392
3393 log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d "
3394 "euuid %s%s %s->%s\n", __func__,
3395 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
3396 SOCK_TYPE(so), epid, euuid_buf,
3397 (so->so_flags & SOF_DELEGATED) ?
3398 " [delegated]" : "",
3399 ((before < after) ? ok : nok),
3400 ((before < after) ? nok : ok));
3401 }
3402}
3403
3404#if NECP
3405static void
3406inp_update_necp_want_app_policy(struct inpcb *inp, boolean_t set)
3407{
3408 struct socket *so = inp->inp_socket;
3409 int before, after;
3410
3411 VERIFY(so != NULL);
3412 VERIFY(inp->inp_state != INPCB_STATE_DEAD);
3413
3414 before = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
3415 if (set) {
3416 inp_set_want_app_policy(inp);
3417 } else {
3418 inp_clear_want_app_policy(inp);
3419 }
3420 after = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
3421 if (net_io_policy_log && (before != after)) {
3422 static const char *wanted = "WANTED";
3423 static const char *unwanted = "UNWANTED";
3424 uuid_string_t euuid_buf;
3425 pid_t epid;
3426
3427 if (so->so_flags & SOF_DELEGATED) {
3428 uuid_unparse(so->e_uuid, euuid_buf);
3429 epid = so->e_pid;
3430 } else {
3431 uuid_unparse(so->last_uuid, euuid_buf);
3432 epid = so->last_pid;
3433 }
3434
3435 log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d "
3436 "euuid %s%s %s->%s\n", __func__,
3437 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
3438 SOCK_TYPE(so), epid, euuid_buf,
3439 (so->so_flags & SOF_DELEGATED) ?
3440 " [delegated]" : "",
3441 ((before < after) ? unwanted : wanted),
3442 ((before < after) ? wanted : unwanted));
3443 }
3444}
3445#endif /* NECP */
3446#endif /* !CONFIG_PROC_UUID_POLICY */
3447
3448#if NECP
3449void
3450inp_update_necp_policy(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int override_bound_interface)
3451{
3452 necp_socket_find_policy_match(inp, override_local_addr, override_remote_addr, override_bound_interface);
3453 if (necp_socket_should_rescope(inp) &&
3454 inp->inp_lport == 0 &&
3455 inp->inp_laddr.s_addr == INADDR_ANY &&
3456 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
3457 // If we should rescope, and the socket is not yet bound
3458 inp_bindif(inp, necp_socket_get_rescope_if_index(inp), NULL);
3459 }
3460}
3461#endif /* NECP */
3462
3463int
3464inp_update_policy(struct inpcb *inp)
3465{
3466#if CONFIG_PROC_UUID_POLICY
3467 struct socket *so = inp->inp_socket;
3468 uint32_t pflags = 0;
3469 int32_t ogencnt;
3470 int err = 0;
3471 uint8_t *lookup_uuid = NULL;
3472
3473 if (!net_io_policy_uuid ||
3474 so == NULL || inp->inp_state == INPCB_STATE_DEAD) {
3475 return 0;
3476 }
3477
3478 /*
3479 * Kernel-created sockets that aren't delegating other sockets
3480 * are currently exempted from UUID policy checks.
3481 */
3482 if (so->last_pid == 0 && !(so->so_flags & SOF_DELEGATED)) {
3483 return 0;
3484 }
3485
3486#if defined(XNU_TARGET_OS_OSX)
3487 if (so->so_rpid > 0) {
3488 lookup_uuid = so->so_ruuid;
3489 ogencnt = so->so_policy_gencnt;
3490 err = proc_uuid_policy_lookup(lookup_uuid, &pflags, &so->so_policy_gencnt);
3491 }
3492#endif
3493 if (lookup_uuid == NULL || err == ENOENT) {
3494 lookup_uuid = ((so->so_flags & SOF_DELEGATED) ? so->e_uuid : so->last_uuid);
3495 ogencnt = so->so_policy_gencnt;
3496 err = proc_uuid_policy_lookup(lookup_uuid, &pflags, &so->so_policy_gencnt);
3497 }
3498
3499 /*
3500 * Discard cached generation count if the entry is gone (ENOENT),
3501 * so that we go thru the checks below.
3502 */
3503 if (err == ENOENT && ogencnt != 0) {
3504 so->so_policy_gencnt = 0;
3505 }
3506
3507 /*
3508 * If the generation count has changed, inspect the policy flags
3509 * and act accordingly. If a policy flag was previously set and
3510 * the UUID is no longer present in the table (ENOENT), treat it
3511 * as if the flag has been cleared.
3512 */
3513 if ((err == 0 || err == ENOENT) && ogencnt != so->so_policy_gencnt) {
3514 /* update cellular policy for this socket */
3515 if (err == 0 && (pflags & PROC_UUID_NO_CELLULAR)) {
3516 inp_update_cellular_policy(inp, TRUE);
3517 } else if (!(pflags & PROC_UUID_NO_CELLULAR)) {
3518 inp_update_cellular_policy(inp, FALSE);
3519 }
3520#if NECP
3521 /* update necp want app policy for this socket */
3522 if (err == 0 && (pflags & PROC_UUID_NECP_APP_POLICY)) {
3523 inp_update_necp_want_app_policy(inp, TRUE);
3524 } else if (!(pflags & PROC_UUID_NECP_APP_POLICY)) {
3525 inp_update_necp_want_app_policy(inp, FALSE);
3526 }
3527#endif /* NECP */
3528 }
3529
3530 return (err == ENOENT) ? 0 : err;
3531#else /* !CONFIG_PROC_UUID_POLICY */
3532#pragma unused(inp)
3533 return 0;
3534#endif /* !CONFIG_PROC_UUID_POLICY */
3535}
3536
3537static unsigned int log_restricted;
3538SYSCTL_DECL(_net_inet);
3539SYSCTL_INT(_net_inet, OID_AUTO, log_restricted,
3540 CTLFLAG_RW | CTLFLAG_LOCKED, &log_restricted, 0,
3541 "Log network restrictions");
3542/*
3543 * Called when we need to enforce policy restrictions in the input path.
3544 *
3545 * Returns TRUE if we're not allowed to receive data, otherwise FALSE.
3546 */
3547static boolean_t
3548_inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp)
3549{
3550 VERIFY(inp != NULL);
3551
3552 /*
3553 * Inbound restrictions.
3554 */
3555 if (!sorestrictrecv) {
3556 return FALSE;
3557 }
3558
3559 if (ifp == NULL) {
3560 return FALSE;
3561 }
3562
3563 if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp)) {
3564 return TRUE;
3565 }
3566
3567 if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp)) {
3568 return TRUE;
3569 }
3570
3571 if (IFNET_IS_CONSTRAINED(ifp) && INP_NO_CONSTRAINED(inp)) {
3572 return TRUE;
3573 }
3574
3575 if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp)) {
3576 return TRUE;
3577 }
3578
3579 if (!(ifp->if_eflags & IFEF_RESTRICTED_RECV)) {
3580 return FALSE;
3581 }
3582
3583 if (inp->inp_flags & INP_RECV_ANYIF) {
3584 return FALSE;
3585 }
3586
3587 if ((inp->inp_flags & INP_BOUND_IF) && inp->inp_boundifp == ifp) {
3588 return FALSE;
3589 }
3590
3591 if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp)) {
3592 return TRUE;
3593 }
3594
3595 return TRUE;
3596}
3597
3598boolean_t
3599inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp)
3600{
3601 boolean_t ret;
3602
3603 ret = _inp_restricted_recv(inp, ifp);
3604 if (ret == TRUE && log_restricted) {
3605 printf("pid %d (%s) is unable to receive packets on %s\n",
3606 current_proc()->p_pid, proc_best_name(current_proc()),
3607 ifp->if_xname);
3608 }
3609 return ret;
3610}
3611
3612/*
3613 * Called when we need to enforce policy restrictions in the output path.
3614 *
3615 * Returns TRUE if we're not allowed to send data out, otherwise FALSE.
3616 */
3617static boolean_t
3618_inp_restricted_send(struct inpcb *inp, struct ifnet *ifp)
3619{
3620 VERIFY(inp != NULL);
3621
3622 /*
3623 * Outbound restrictions.
3624 */
3625 if (!sorestrictsend) {
3626 return FALSE;
3627 }
3628
3629 if (ifp == NULL) {
3630 return FALSE;
3631 }
3632
3633 if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp)) {
3634 return TRUE;
3635 }
3636
3637 if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp)) {
3638 return TRUE;
3639 }
3640
3641 if (IFNET_IS_CONSTRAINED(ifp) && INP_NO_CONSTRAINED(inp)) {
3642 return TRUE;
3643 }
3644
3645 if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp)) {
3646 return TRUE;
3647 }
3648
3649 if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp)) {
3650 return TRUE;
3651 }
3652
3653 return FALSE;
3654}
3655
3656boolean_t
3657inp_restricted_send(struct inpcb *inp, struct ifnet *ifp)
3658{
3659 boolean_t ret;
3660
3661 ret = _inp_restricted_send(inp, ifp);
3662 if (ret == TRUE && log_restricted) {
3663 printf("pid %d (%s) is unable to transmit packets on %s\n",
3664 current_proc()->p_pid, proc_best_name(current_proc()),
3665 ifp->if_xname);
3666 }
3667 return ret;
3668}
3669
3670inline void
3671inp_count_sndbytes(struct inpcb *inp, u_int32_t th_ack)
3672{
3673 struct ifnet *ifp = inp->inp_last_outifp;
3674 struct socket *so = inp->inp_socket;
3675 if (ifp != NULL && !(so->so_flags & SOF_MP_SUBFLOW) &&
3676 (ifp->if_type == IFT_CELLULAR || IFNET_IS_WIFI(ifp))) {
3677 int32_t unsent;
3678
3679 so->so_snd.sb_flags |= SB_SNDBYTE_CNT;
3680
3681 /*
3682 * There can be data outstanding before the connection
3683 * becomes established -- TFO case
3684 */
3685 if (so->so_snd.sb_cc > 0) {
3686 inp_incr_sndbytes_total(so, so->so_snd.sb_cc);
3687 }
3688
3689 unsent = inp_get_sndbytes_allunsent(so, th_ack);
3690 if (unsent > 0) {
3691 inp_incr_sndbytes_unsent(so, unsent);
3692 }
3693 }
3694}
3695
3696inline void
3697inp_incr_sndbytes_total(struct socket *so, int32_t len)
3698{
3699 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3700 struct ifnet *ifp = inp->inp_last_outifp;
3701
3702 if (ifp != NULL) {
3703 VERIFY(ifp->if_sndbyte_total >= 0);
3704 OSAddAtomic64(len, &ifp->if_sndbyte_total);
3705 }
3706}
3707
3708inline void
3709inp_decr_sndbytes_total(struct socket *so, int32_t len)
3710{
3711 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3712 struct ifnet *ifp = inp->inp_last_outifp;
3713
3714 if (ifp != NULL) {
3715 VERIFY(ifp->if_sndbyte_total >= len);
3716 OSAddAtomic64(-len, &ifp->if_sndbyte_total);
3717 }
3718}
3719
3720inline void
3721inp_incr_sndbytes_unsent(struct socket *so, int32_t len)
3722{
3723 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3724 struct ifnet *ifp = inp->inp_last_outifp;
3725
3726 if (ifp != NULL) {
3727 VERIFY(ifp->if_sndbyte_unsent >= 0);
3728 OSAddAtomic64(len, &ifp->if_sndbyte_unsent);
3729 }
3730}
3731
3732inline void
3733inp_decr_sndbytes_unsent(struct socket *so, int32_t len)
3734{
3735 if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT)) {
3736 return;
3737 }
3738
3739 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3740 struct ifnet *ifp = inp->inp_last_outifp;
3741
3742 if (ifp != NULL) {
3743 if (ifp->if_sndbyte_unsent >= len) {
3744 OSAddAtomic64(-len, &ifp->if_sndbyte_unsent);
3745 } else {
3746 ifp->if_sndbyte_unsent = 0;
3747 }
3748 }
3749}
3750
3751inline void
3752inp_decr_sndbytes_allunsent(struct socket *so, u_int32_t th_ack)
3753{
3754 int32_t len;
3755
3756 if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT)) {
3757 return;
3758 }
3759
3760 len = inp_get_sndbytes_allunsent(so, th_ack);
3761 inp_decr_sndbytes_unsent(so, len);
3762}
3763
3764
3765inline void
3766inp_set_activity_bitmap(struct inpcb *inp)
3767{
3768 in_stat_set_activity_bitmap(&inp->inp_nw_activity, net_uptime());
3769}
3770
3771inline void
3772inp_get_activity_bitmap(struct inpcb *inp, activity_bitmap_t *ab)
3773{
3774 bcopy(&inp->inp_nw_activity, ab, sizeof(*ab));
3775}
3776
3777void
3778inp_update_last_owner(struct socket *so, struct proc *p, struct proc *ep)
3779{
3780 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3781
3782 if (inp == NULL) {
3783 return;
3784 }
3785
3786 if (p != NULL) {
3787 strlcpy(&inp->inp_last_proc_name[0], proc_name_address(p), sizeof(inp->inp_last_proc_name));
3788 }
3789 if (so->so_flags & SOF_DELEGATED) {
3790 if (ep != NULL) {
3791 strlcpy(&inp->inp_e_proc_name[0], proc_name_address(ep), sizeof(inp->inp_e_proc_name));
3792 } else {
3793 inp->inp_e_proc_name[0] = 0;
3794 }
3795 } else {
3796 inp->inp_e_proc_name[0] = 0;
3797 }
3798}
3799
3800void
3801inp_copy_last_owner(struct socket *so, struct socket *head)
3802{
3803 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3804 struct inpcb *head_inp = (struct inpcb *)head->so_pcb;
3805
3806 if (inp == NULL || head_inp == NULL) {
3807 return;
3808 }
3809
3810 strlcpy(&inp->inp_last_proc_name[0], &head_inp->inp_last_proc_name[0], sizeof(inp->inp_last_proc_name));
3811 strlcpy(&inp->inp_e_proc_name[0], &head_inp->inp_e_proc_name[0], sizeof(inp->inp_e_proc_name));
3812}