2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1982, 1986, 1991, 1993, 1995
30 * The Regents of the University of California. All rights reserved.
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95
61 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/malloc.h>
68 #include <sys/domain.h>
69 #include <sys/protosw.h>
70 #include <sys/socket.h>
71 #include <sys/socketvar.h>
73 #include <sys/kernel.h>
74 #include <sys/sysctl.h>
75 #include <sys/mcache.h>
76 #include <sys/kauth.h>
78 #include <sys/proc_uuid_policy.h>
79 #include <sys/syslog.h>
83 #include <libkern/OSAtomic.h>
84 #include <kern/locks.h>
86 #include <machine/limits.h>
88 #include <kern/zalloc.h>
91 #include <net/if_types.h>
92 #include <net/route.h>
93 #include <net/flowhash.h>
94 #include <net/flowadv.h>
95 #include <net/nat464_utils.h>
96 #include <net/ntstat.h>
97 #include <net/restricted_in_port.h>
99 #include <netinet/in.h>
100 #include <netinet/in_pcb.h>
101 #include <netinet/in_var.h>
102 #include <netinet/ip_var.h>
104 #include <netinet/ip6.h>
105 #include <netinet6/ip6_var.h>
107 #include <sys/kdebug.h>
108 #include <sys/random.h>
110 #include <dev/random/randomdev.h>
111 #include <mach/boolean.h>
113 #include <pexpert/pexpert.h>
116 #include <net/necp.h>
119 #include <sys/stat.h>
121 #include <sys/vnode.h>
125 extern const char *proc_name_address(struct proc
*);
127 static lck_grp_t
*inpcb_lock_grp
;
128 static lck_attr_t
*inpcb_lock_attr
;
129 static lck_grp_attr_t
*inpcb_lock_grp_attr
;
130 decl_lck_mtx_data(static, inpcb_lock
); /* global INPCB lock */
131 decl_lck_mtx_data(static, inpcb_timeout_lock
);
133 static TAILQ_HEAD(, inpcbinfo
) inpcb_head
= TAILQ_HEAD_INITIALIZER(inpcb_head
);
135 static u_int16_t inpcb_timeout_run
= 0; /* INPCB timer is scheduled to run */
136 static boolean_t inpcb_garbage_collecting
= FALSE
; /* gc timer is scheduled */
137 static boolean_t inpcb_ticking
= FALSE
; /* "slow" timer is scheduled */
138 static boolean_t inpcb_fast_timer_on
= FALSE
;
140 #define INPCB_GCREQ_THRESHOLD 50000
142 static thread_call_t inpcb_thread_call
, inpcb_fast_thread_call
;
143 static void inpcb_sched_timeout(void);
144 static void inpcb_sched_lazy_timeout(void);
145 static void _inpcb_sched_timeout(unsigned int);
146 static void inpcb_timeout(void *, void *);
147 const int inpcb_timeout_lazy
= 10; /* 10 seconds leeway for lazy timers */
148 extern int tvtohz(struct timeval
*);
150 #if CONFIG_PROC_UUID_POLICY
151 static void inp_update_cellular_policy(struct inpcb
*, boolean_t
);
153 static void inp_update_necp_want_app_policy(struct inpcb
*, boolean_t
);
155 #endif /* !CONFIG_PROC_UUID_POLICY */
157 #define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8))
158 #define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1))
160 int allow_udp_port_exhaustion
= 0;
163 * These configure the range of local port addresses assigned to
164 * "unspecified" outgoing connections/packets/whatever.
166 int ipport_lowfirstauto
= IPPORT_RESERVED
- 1; /* 1023 */
167 int ipport_lowlastauto
= IPPORT_RESERVEDSTART
; /* 600 */
168 int ipport_firstauto
= IPPORT_HIFIRSTAUTO
; /* 49152 */
169 int ipport_lastauto
= IPPORT_HILASTAUTO
; /* 65535 */
170 int ipport_hifirstauto
= IPPORT_HIFIRSTAUTO
; /* 49152 */
171 int ipport_hilastauto
= IPPORT_HILASTAUTO
; /* 65535 */
173 #define RANGECHK(var, min, max) \
174 if ((var) < (min)) { (var) = (min); } \
175 else if ((var) > (max)) { (var) = (max); }
178 sysctl_net_ipport_check SYSCTL_HANDLER_ARGS
180 #pragma unused(arg1, arg2)
182 int new_value
= *(int *)oidp
->oid_arg1
;
183 #if (DEBUG | DEVELOPMENT)
184 int old_value
= *(int *)oidp
->oid_arg1
;
186 * For unit testing allow a non-superuser process with the
187 * proper entitlement to modify the variables
190 if (proc_suser(current_proc()) != 0 &&
191 (error
= priv_check_cred(kauth_cred_get(),
192 PRIV_NETINET_RESERVEDPORT
, 0))) {
196 #endif /* (DEBUG | DEVELOPMENT) */
198 error
= sysctl_handle_int(oidp
, &new_value
, 0, req
);
200 if (oidp
->oid_arg1
== &ipport_lowfirstauto
|| oidp
->oid_arg1
== &ipport_lowlastauto
) {
201 RANGECHK(new_value
, 1, IPPORT_RESERVED
- 1);
203 RANGECHK(new_value
, IPPORT_RESERVED
, USHRT_MAX
);
205 *(int *)oidp
->oid_arg1
= new_value
;
208 #if (DEBUG | DEVELOPMENT)
209 os_log(OS_LOG_DEFAULT
,
210 "%s:%u sysctl net.restricted_port.verbose: %d -> %d)",
211 proc_best_name(current_proc()), proc_selfpid(),
212 old_value
, *(int *)oidp
->oid_arg1
);
213 #endif /* (DEBUG | DEVELOPMENT) */
220 SYSCTL_NODE(_net_inet_ip
, IPPROTO_IP
, portrange
,
221 CTLFLAG_RW
| CTLFLAG_LOCKED
, 0, "IP Ports");
223 #if (DEBUG | DEVELOPMENT)
224 #define CTLFAGS_IP_PORTRANGE (CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY)
226 #define CTLFAGS_IP_PORTRANGE (CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED)
227 #endif /* (DEBUG | DEVELOPMENT) */
229 SYSCTL_PROC(_net_inet_ip_portrange
, OID_AUTO
, lowfirst
,
230 CTLFAGS_IP_PORTRANGE
,
231 &ipport_lowfirstauto
, 0, &sysctl_net_ipport_check
, "I", "");
232 SYSCTL_PROC(_net_inet_ip_portrange
, OID_AUTO
, lowlast
,
233 CTLFAGS_IP_PORTRANGE
,
234 &ipport_lowlastauto
, 0, &sysctl_net_ipport_check
, "I", "");
235 SYSCTL_PROC(_net_inet_ip_portrange
, OID_AUTO
, first
,
236 CTLFAGS_IP_PORTRANGE
,
237 &ipport_firstauto
, 0, &sysctl_net_ipport_check
, "I", "");
238 SYSCTL_PROC(_net_inet_ip_portrange
, OID_AUTO
, last
,
239 CTLFAGS_IP_PORTRANGE
,
240 &ipport_lastauto
, 0, &sysctl_net_ipport_check
, "I", "");
241 SYSCTL_PROC(_net_inet_ip_portrange
, OID_AUTO
, hifirst
,
242 CTLFAGS_IP_PORTRANGE
,
243 &ipport_hifirstauto
, 0, &sysctl_net_ipport_check
, "I", "");
244 SYSCTL_PROC(_net_inet_ip_portrange
, OID_AUTO
, hilast
,
245 CTLFAGS_IP_PORTRANGE
,
246 &ipport_hilastauto
, 0, &sysctl_net_ipport_check
, "I", "");
247 SYSCTL_INT(_net_inet_ip_portrange
, OID_AUTO
, ipport_allow_udp_port_exhaustion
,
248 CTLFLAG_LOCKED
| CTLFLAG_RW
, &allow_udp_port_exhaustion
, 0, "");
250 static uint32_t apn_fallbk_debug
= 0;
251 #define apn_fallbk_log(x) do { if (apn_fallbk_debug >= 1) log x; } while (0)
253 #if !XNU_TARGET_OS_OSX
254 static boolean_t apn_fallbk_enabled
= TRUE
;
256 SYSCTL_DECL(_net_inet
);
257 SYSCTL_NODE(_net_inet
, OID_AUTO
, apn_fallback
, CTLFLAG_RW
| CTLFLAG_LOCKED
, 0, "APN Fallback");
258 SYSCTL_UINT(_net_inet_apn_fallback
, OID_AUTO
, enable
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
259 &apn_fallbk_enabled
, 0, "APN fallback enable");
260 SYSCTL_UINT(_net_inet_apn_fallback
, OID_AUTO
, debug
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
261 &apn_fallbk_debug
, 0, "APN fallback debug enable");
262 #else /* XNU_TARGET_OS_OSX */
263 static boolean_t apn_fallbk_enabled
= FALSE
;
264 #endif /* XNU_TARGET_OS_OSX */
266 extern int udp_use_randomport
;
267 extern int tcp_use_randomport
;
269 /* Structs used for flowhash computation */
270 struct inp_flowhash_key_addr
{
280 struct inp_flowhash_key
{
281 struct inp_flowhash_key_addr infh_laddr
;
282 struct inp_flowhash_key_addr infh_faddr
;
283 u_int32_t infh_lport
;
284 u_int32_t infh_fport
;
286 u_int32_t infh_proto
;
287 u_int32_t infh_rand1
;
288 u_int32_t infh_rand2
;
291 static u_int32_t inp_hash_seed
= 0;
293 static int infc_cmp(const struct inpcb
*, const struct inpcb
*);
295 /* Flags used by inp_fc_getinp */
296 #define INPFC_SOLOCKED 0x1
297 #define INPFC_REMOVE 0x2
298 static struct inpcb
*inp_fc_getinp(u_int32_t
, u_int32_t
);
300 static void inp_fc_feedback(struct inpcb
*);
301 extern void tcp_remove_from_time_wait(struct inpcb
*inp
);
303 decl_lck_mtx_data(static, inp_fc_lck
);
305 RB_HEAD(inp_fc_tree
, inpcb
) inp_fc_tree
;
306 RB_PROTOTYPE(inp_fc_tree
, inpcb
, infc_link
, infc_cmp
);
307 RB_GENERATE(inp_fc_tree
, inpcb
, infc_link
, infc_cmp
);
310 * Use this inp as a key to find an inp in the flowhash tree.
311 * Accesses to it are protected by inp_fc_lck.
313 struct inpcb key_inp
;
316 * in_pcb.c: manage the Protocol Control Blocks.
322 static int inpcb_initialized
= 0;
324 VERIFY(!inpcb_initialized
);
325 inpcb_initialized
= 1;
327 inpcb_lock_grp_attr
= lck_grp_attr_alloc_init();
328 inpcb_lock_grp
= lck_grp_alloc_init("inpcb", inpcb_lock_grp_attr
);
329 inpcb_lock_attr
= lck_attr_alloc_init();
330 lck_mtx_init(&inpcb_lock
, inpcb_lock_grp
, inpcb_lock_attr
);
331 lck_mtx_init(&inpcb_timeout_lock
, inpcb_lock_grp
, inpcb_lock_attr
);
332 inpcb_thread_call
= thread_call_allocate_with_priority(inpcb_timeout
,
333 NULL
, THREAD_CALL_PRIORITY_KERNEL
);
334 /* Give it an arg so that we know that this is the fast timer */
335 inpcb_fast_thread_call
= thread_call_allocate_with_priority(
336 inpcb_timeout
, &inpcb_timeout
, THREAD_CALL_PRIORITY_KERNEL
);
337 if (inpcb_thread_call
== NULL
|| inpcb_fast_thread_call
== NULL
) {
338 panic("unable to alloc the inpcb thread call");
342 * Initialize data structures required to deliver
345 lck_mtx_init(&inp_fc_lck
, inpcb_lock_grp
, inpcb_lock_attr
);
346 lck_mtx_lock(&inp_fc_lck
);
347 RB_INIT(&inp_fc_tree
);
348 bzero(&key_inp
, sizeof(key_inp
));
349 lck_mtx_unlock(&inp_fc_lck
);
352 #define INPCB_HAVE_TIMER_REQ(req) (((req).intimer_lazy > 0) || \
353 ((req).intimer_fast > 0) || ((req).intimer_nodelay > 0))
355 inpcb_timeout(void *arg0
, void *arg1
)
358 struct inpcbinfo
*ipi
;
360 struct intimercount gccnt
, tmcnt
;
363 * Update coarse-grained networking timestamp (in sec.); the idea
364 * is to piggy-back on the timeout callout to update the counter
365 * returnable via net_uptime().
369 bzero(&gccnt
, sizeof(gccnt
));
370 bzero(&tmcnt
, sizeof(tmcnt
));
372 lck_mtx_lock_spin(&inpcb_timeout_lock
);
373 gc
= inpcb_garbage_collecting
;
374 inpcb_garbage_collecting
= FALSE
;
377 inpcb_ticking
= FALSE
;
380 lck_mtx_unlock(&inpcb_timeout_lock
);
382 lck_mtx_lock(&inpcb_lock
);
383 TAILQ_FOREACH(ipi
, &inpcb_head
, ipi_entry
) {
384 if (INPCB_HAVE_TIMER_REQ(ipi
->ipi_gc_req
)) {
385 bzero(&ipi
->ipi_gc_req
,
386 sizeof(ipi
->ipi_gc_req
));
387 if (gc
&& ipi
->ipi_gc
!= NULL
) {
389 gccnt
.intimer_lazy
+=
390 ipi
->ipi_gc_req
.intimer_lazy
;
391 gccnt
.intimer_fast
+=
392 ipi
->ipi_gc_req
.intimer_fast
;
393 gccnt
.intimer_nodelay
+=
394 ipi
->ipi_gc_req
.intimer_nodelay
;
397 if (INPCB_HAVE_TIMER_REQ(ipi
->ipi_timer_req
)) {
398 bzero(&ipi
->ipi_timer_req
,
399 sizeof(ipi
->ipi_timer_req
));
400 if (t
&& ipi
->ipi_timer
!= NULL
) {
402 tmcnt
.intimer_lazy
+=
403 ipi
->ipi_timer_req
.intimer_lazy
;
404 tmcnt
.intimer_fast
+=
405 ipi
->ipi_timer_req
.intimer_fast
;
406 tmcnt
.intimer_nodelay
+=
407 ipi
->ipi_timer_req
.intimer_nodelay
;
411 lck_mtx_unlock(&inpcb_lock
);
412 lck_mtx_lock_spin(&inpcb_timeout_lock
);
415 /* lock was dropped above, so check first before overriding */
416 if (!inpcb_garbage_collecting
) {
417 inpcb_garbage_collecting
= INPCB_HAVE_TIMER_REQ(gccnt
);
419 if (!inpcb_ticking
) {
420 inpcb_ticking
= INPCB_HAVE_TIMER_REQ(tmcnt
);
423 /* arg0 will be set if we are the fast timer */
425 inpcb_fast_timer_on
= FALSE
;
428 VERIFY(inpcb_timeout_run
>= 0 && inpcb_timeout_run
< 2);
430 /* re-arm the timer if there's work to do */
431 if (gccnt
.intimer_nodelay
> 0 || tmcnt
.intimer_nodelay
> 0) {
432 inpcb_sched_timeout();
433 } else if ((gccnt
.intimer_fast
+ tmcnt
.intimer_fast
) <= 5) {
434 /* be lazy when idle with little activity */
435 inpcb_sched_lazy_timeout();
437 inpcb_sched_timeout();
440 lck_mtx_unlock(&inpcb_timeout_lock
);
444 inpcb_sched_timeout(void)
446 _inpcb_sched_timeout(0);
450 inpcb_sched_lazy_timeout(void)
452 _inpcb_sched_timeout(inpcb_timeout_lazy
);
456 _inpcb_sched_timeout(unsigned int offset
)
458 uint64_t deadline
, leeway
;
460 clock_interval_to_deadline(1, NSEC_PER_SEC
, &deadline
);
461 LCK_MTX_ASSERT(&inpcb_timeout_lock
, LCK_MTX_ASSERT_OWNED
);
462 if (inpcb_timeout_run
== 0 &&
463 (inpcb_garbage_collecting
|| inpcb_ticking
)) {
464 lck_mtx_convert_spin(&inpcb_timeout_lock
);
467 inpcb_fast_timer_on
= TRUE
;
468 thread_call_enter_delayed(inpcb_fast_thread_call
,
471 inpcb_fast_timer_on
= FALSE
;
472 clock_interval_to_absolutetime_interval(offset
,
473 NSEC_PER_SEC
, &leeway
);
474 thread_call_enter_delayed_with_leeway(
475 inpcb_thread_call
, NULL
, deadline
, leeway
,
476 THREAD_CALL_DELAY_LEEWAY
);
478 } else if (inpcb_timeout_run
== 1 &&
479 offset
== 0 && !inpcb_fast_timer_on
) {
481 * Since the request was for a fast timer but the
482 * scheduled timer is a lazy timer, try to schedule
483 * another instance of fast timer also.
485 lck_mtx_convert_spin(&inpcb_timeout_lock
);
487 inpcb_fast_timer_on
= TRUE
;
488 thread_call_enter_delayed(inpcb_fast_thread_call
, deadline
);
493 inpcb_gc_sched(struct inpcbinfo
*ipi
, u_int32_t type
)
497 lck_mtx_lock_spin(&inpcb_timeout_lock
);
498 inpcb_garbage_collecting
= TRUE
;
499 gccnt
= ipi
->ipi_gc_req
.intimer_nodelay
+
500 ipi
->ipi_gc_req
.intimer_fast
;
502 if (gccnt
> INPCB_GCREQ_THRESHOLD
) {
503 type
= INPCB_TIMER_FAST
;
507 case INPCB_TIMER_NODELAY
:
508 atomic_add_32(&ipi
->ipi_gc_req
.intimer_nodelay
, 1);
509 inpcb_sched_timeout();
511 case INPCB_TIMER_FAST
:
512 atomic_add_32(&ipi
->ipi_gc_req
.intimer_fast
, 1);
513 inpcb_sched_timeout();
516 atomic_add_32(&ipi
->ipi_gc_req
.intimer_lazy
, 1);
517 inpcb_sched_lazy_timeout();
520 lck_mtx_unlock(&inpcb_timeout_lock
);
524 inpcb_timer_sched(struct inpcbinfo
*ipi
, u_int32_t type
)
526 lck_mtx_lock_spin(&inpcb_timeout_lock
);
527 inpcb_ticking
= TRUE
;
529 case INPCB_TIMER_NODELAY
:
530 atomic_add_32(&ipi
->ipi_timer_req
.intimer_nodelay
, 1);
531 inpcb_sched_timeout();
533 case INPCB_TIMER_FAST
:
534 atomic_add_32(&ipi
->ipi_timer_req
.intimer_fast
, 1);
535 inpcb_sched_timeout();
538 atomic_add_32(&ipi
->ipi_timer_req
.intimer_lazy
, 1);
539 inpcb_sched_lazy_timeout();
542 lck_mtx_unlock(&inpcb_timeout_lock
);
546 in_pcbinfo_attach(struct inpcbinfo
*ipi
)
548 struct inpcbinfo
*ipi0
;
550 lck_mtx_lock(&inpcb_lock
);
551 TAILQ_FOREACH(ipi0
, &inpcb_head
, ipi_entry
) {
553 panic("%s: ipi %p already in the list\n",
558 TAILQ_INSERT_TAIL(&inpcb_head
, ipi
, ipi_entry
);
559 lck_mtx_unlock(&inpcb_lock
);
563 in_pcbinfo_detach(struct inpcbinfo
*ipi
)
565 struct inpcbinfo
*ipi0
;
568 lck_mtx_lock(&inpcb_lock
);
569 TAILQ_FOREACH(ipi0
, &inpcb_head
, ipi_entry
) {
575 TAILQ_REMOVE(&inpcb_head
, ipi0
, ipi_entry
);
579 lck_mtx_unlock(&inpcb_lock
);
585 * Allocate a PCB and associate it with the socket.
592 in_pcballoc(struct socket
*so
, struct inpcbinfo
*pcbinfo
, struct proc
*p
)
598 if ((so
->so_flags1
& SOF1_CACHED_IN_SOCK_LAYER
) == 0) {
599 inp
= (struct inpcb
*)zalloc(pcbinfo
->ipi_zone
);
603 bzero((caddr_t
)inp
, sizeof(*inp
));
605 inp
= (struct inpcb
*)(void *)so
->so_saved_pcb
;
606 temp
= inp
->inp_saved_ppcb
;
607 bzero((caddr_t
)inp
, sizeof(*inp
));
608 inp
->inp_saved_ppcb
= temp
;
611 inp
->inp_gencnt
= ++pcbinfo
->ipi_gencnt
;
612 inp
->inp_pcbinfo
= pcbinfo
;
613 inp
->inp_socket
= so
;
614 /* make sure inp_stat is always 64-bit aligned */
615 inp
->inp_stat
= (struct inp_stat
*)P2ROUNDUP(inp
->inp_stat_store
,
617 if (((uintptr_t)inp
->inp_stat
- (uintptr_t)inp
->inp_stat_store
) +
618 sizeof(*inp
->inp_stat
) > sizeof(inp
->inp_stat_store
)) {
619 panic("%s: insufficient space to align inp_stat", __func__
);
623 /* make sure inp_cstat is always 64-bit aligned */
624 inp
->inp_cstat
= (struct inp_stat
*)P2ROUNDUP(inp
->inp_cstat_store
,
626 if (((uintptr_t)inp
->inp_cstat
- (uintptr_t)inp
->inp_cstat_store
) +
627 sizeof(*inp
->inp_cstat
) > sizeof(inp
->inp_cstat_store
)) {
628 panic("%s: insufficient space to align inp_cstat", __func__
);
632 /* make sure inp_wstat is always 64-bit aligned */
633 inp
->inp_wstat
= (struct inp_stat
*)P2ROUNDUP(inp
->inp_wstat_store
,
635 if (((uintptr_t)inp
->inp_wstat
- (uintptr_t)inp
->inp_wstat_store
) +
636 sizeof(*inp
->inp_wstat
) > sizeof(inp
->inp_wstat_store
)) {
637 panic("%s: insufficient space to align inp_wstat", __func__
);
641 /* make sure inp_Wstat is always 64-bit aligned */
642 inp
->inp_Wstat
= (struct inp_stat
*)P2ROUNDUP(inp
->inp_Wstat_store
,
644 if (((uintptr_t)inp
->inp_Wstat
- (uintptr_t)inp
->inp_Wstat_store
) +
645 sizeof(*inp
->inp_Wstat
) > sizeof(inp
->inp_Wstat_store
)) {
646 panic("%s: insufficient space to align inp_Wstat", __func__
);
650 so
->so_pcb
= (caddr_t
)inp
;
652 if (so
->so_proto
->pr_flags
& PR_PCBLOCK
) {
653 lck_mtx_init(&inp
->inpcb_mtx
, pcbinfo
->ipi_lock_grp
,
654 pcbinfo
->ipi_lock_attr
);
657 if (SOCK_DOM(so
) == PF_INET6
&& !ip6_mapped_addr_on
) {
658 inp
->inp_flags
|= IN6P_IPV6_V6ONLY
;
661 if (ip6_auto_flowlabel
) {
662 inp
->inp_flags
|= IN6P_AUTOFLOWLABEL
;
664 if (intcoproc_unrestricted
) {
665 inp
->inp_flags2
|= INP2_INTCOPROC_ALLOWED
;
668 (void) inp_update_policy(inp
);
670 lck_rw_lock_exclusive(pcbinfo
->ipi_lock
);
671 inp
->inp_gencnt
= ++pcbinfo
->ipi_gencnt
;
672 LIST_INSERT_HEAD(pcbinfo
->ipi_listhead
, inp
, inp_list
);
673 pcbinfo
->ipi_count
++;
674 lck_rw_done(pcbinfo
->ipi_lock
);
679 * in_pcblookup_local_and_cleanup does everything
680 * in_pcblookup_local does but it checks for a socket
681 * that's going away. Since we know that the lock is
682 * held read+write when this function is called, we
683 * can safely dispose of this socket like the slow
684 * timer would usually do and return NULL. This is
688 in_pcblookup_local_and_cleanup(struct inpcbinfo
*pcbinfo
, struct in_addr laddr
,
689 u_int lport_arg
, int wild_okay
)
693 /* Perform normal lookup */
694 inp
= in_pcblookup_local(pcbinfo
, laddr
, lport_arg
, wild_okay
);
696 /* Check if we found a match but it's waiting to be disposed */
697 if (inp
!= NULL
&& inp
->inp_wantcnt
== WNT_STOPUSING
) {
698 struct socket
*so
= inp
->inp_socket
;
702 if (so
->so_usecount
== 0) {
703 if (inp
->inp_state
!= INPCB_STATE_DEAD
) {
706 in_pcbdispose(inp
); /* will unlock & destroy */
709 socket_unlock(so
, 0);
717 in_pcb_conflict_post_msg(u_int16_t port
)
720 * Radar 5523020 send a kernel event notification if a
721 * non-participating socket tries to bind the port a socket
722 * who has set SOF_NOTIFYCONFLICT owns.
724 struct kev_msg ev_msg
;
725 struct kev_in_portinuse in_portinuse
;
727 bzero(&in_portinuse
, sizeof(struct kev_in_portinuse
));
728 bzero(&ev_msg
, sizeof(struct kev_msg
));
729 in_portinuse
.port
= ntohs(port
); /* port in host order */
730 in_portinuse
.req_pid
= proc_selfpid();
731 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
732 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
733 ev_msg
.kev_subclass
= KEV_INET_SUBCLASS
;
734 ev_msg
.event_code
= KEV_INET_PORTINUSE
;
735 ev_msg
.dv
[0].data_ptr
= &in_portinuse
;
736 ev_msg
.dv
[0].data_length
= sizeof(struct kev_in_portinuse
);
737 ev_msg
.dv
[1].data_length
= 0;
738 dlil_post_complete_msg(NULL
, &ev_msg
);
742 * Bind an INPCB to an address and/or port. This routine should not alter
743 * the caller-supplied local address "nam".
746 * EADDRNOTAVAIL Address not available.
747 * EINVAL Invalid argument
748 * EAFNOSUPPORT Address family not supported [notdef]
749 * EACCES Permission denied
750 * EADDRINUSE Address in use
751 * EAGAIN Resource unavailable, try again
752 * priv_check_cred:EPERM Operation not permitted
755 in_pcbbind(struct inpcb
*inp
, struct sockaddr
*nam
, struct proc
*p
)
757 struct socket
*so
= inp
->inp_socket
;
758 unsigned short *lastport
;
759 struct inpcbinfo
*pcbinfo
= inp
->inp_pcbinfo
;
760 u_short lport
= 0, rand_port
= 0;
761 int wild
= 0, reuseport
= (so
->so_options
& SO_REUSEPORT
);
762 int error
, randomport
, conflict
= 0;
763 boolean_t anonport
= FALSE
;
765 struct in_addr laddr
;
766 struct ifnet
*outif
= NULL
;
768 if (TAILQ_EMPTY(&in_ifaddrhead
)) { /* XXX broken! */
769 return EADDRNOTAVAIL
;
771 if (!(so
->so_options
& (SO_REUSEADDR
| SO_REUSEPORT
))) {
775 bzero(&laddr
, sizeof(laddr
));
777 socket_unlock(so
, 0); /* keep reference on socket */
778 lck_rw_lock_exclusive(pcbinfo
->ipi_lock
);
779 if (inp
->inp_lport
!= 0 || inp
->inp_laddr
.s_addr
!= INADDR_ANY
) {
780 /* another thread completed the bind */
781 lck_rw_done(pcbinfo
->ipi_lock
);
787 if (nam
->sa_len
!= sizeof(struct sockaddr_in
)) {
788 lck_rw_done(pcbinfo
->ipi_lock
);
794 * We should check the family, but old programs
795 * incorrectly fail to initialize it.
797 if (nam
->sa_family
!= AF_INET
) {
798 lck_rw_done(pcbinfo
->ipi_lock
);
803 lport
= SIN(nam
)->sin_port
;
805 if (IN_MULTICAST(ntohl(SIN(nam
)->sin_addr
.s_addr
))) {
807 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
808 * allow complete duplication of binding if
809 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
810 * and a multicast address is bound on both
811 * new and duplicated sockets.
813 if (so
->so_options
& SO_REUSEADDR
) {
814 reuseport
= SO_REUSEADDR
| SO_REUSEPORT
;
816 } else if (SIN(nam
)->sin_addr
.s_addr
!= INADDR_ANY
) {
817 struct sockaddr_in sin
;
820 /* Sanitized for interface address searches */
821 bzero(&sin
, sizeof(sin
));
822 sin
.sin_family
= AF_INET
;
823 sin
.sin_len
= sizeof(struct sockaddr_in
);
824 sin
.sin_addr
.s_addr
= SIN(nam
)->sin_addr
.s_addr
;
826 ifa
= ifa_ifwithaddr(SA(&sin
));
828 lck_rw_done(pcbinfo
->ipi_lock
);
830 return EADDRNOTAVAIL
;
833 * Opportunistically determine the outbound
834 * interface that may be used; this may not
835 * hold true if we end up using a route
836 * going over a different interface, e.g.
837 * when sending to a local address. This
838 * will get updated again after sending.
841 outif
= ifa
->ifa_ifp
;
852 #if XNU_TARGET_OS_OSX
853 if (ntohs(lport
) < IPPORT_RESERVED
&&
854 SIN(nam
)->sin_addr
.s_addr
!= 0 &&
855 !(inp
->inp_flags2
& INP2_EXTERNAL_PORT
)) {
856 cred
= kauth_cred_proc_ref(p
);
857 error
= priv_check_cred(cred
,
858 PRIV_NETINET_RESERVEDPORT
, 0);
859 kauth_cred_unref(&cred
);
861 lck_rw_done(pcbinfo
->ipi_lock
);
866 #endif /* XNU_TARGET_OS_OSX */
868 * Check wether the process is allowed to bind to a restricted port
870 if (!current_task_can_use_restricted_in_port(lport
,
871 (uint8_t)so
->so_proto
->pr_protocol
, PORT_FLAGS_BSD
)) {
872 lck_rw_done(pcbinfo
->ipi_lock
);
877 if (!IN_MULTICAST(ntohl(SIN(nam
)->sin_addr
.s_addr
)) &&
878 (u
= kauth_cred_getuid(so
->so_cred
)) != 0 &&
879 (t
= in_pcblookup_local_and_cleanup(
880 inp
->inp_pcbinfo
, SIN(nam
)->sin_addr
, lport
,
881 INPLOOKUP_WILDCARD
)) != NULL
&&
882 (SIN(nam
)->sin_addr
.s_addr
!= INADDR_ANY
||
883 t
->inp_laddr
.s_addr
!= INADDR_ANY
||
884 !(t
->inp_socket
->so_options
& SO_REUSEPORT
)) &&
885 (u
!= kauth_cred_getuid(t
->inp_socket
->so_cred
)) &&
886 !(t
->inp_socket
->so_flags
& SOF_REUSESHAREUID
) &&
887 (SIN(nam
)->sin_addr
.s_addr
!= INADDR_ANY
||
888 t
->inp_laddr
.s_addr
!= INADDR_ANY
) &&
889 (!(t
->inp_flags2
& INP2_EXTERNAL_PORT
) ||
890 !(inp
->inp_flags2
& INP2_EXTERNAL_PORT
) ||
891 uuid_compare(t
->necp_client_uuid
, inp
->necp_client_uuid
) != 0)) {
892 if ((t
->inp_socket
->so_flags
&
893 SOF_NOTIFYCONFLICT
) &&
894 !(so
->so_flags
& SOF_NOTIFYCONFLICT
)) {
898 lck_rw_done(pcbinfo
->ipi_lock
);
901 in_pcb_conflict_post_msg(lport
);
907 t
= in_pcblookup_local_and_cleanup(pcbinfo
,
908 SIN(nam
)->sin_addr
, lport
, wild
);
910 (reuseport
& t
->inp_socket
->so_options
) == 0 &&
911 (!(t
->inp_flags2
& INP2_EXTERNAL_PORT
) ||
912 !(inp
->inp_flags2
& INP2_EXTERNAL_PORT
) ||
913 uuid_compare(t
->necp_client_uuid
, inp
->necp_client_uuid
) != 0)) {
914 if (SIN(nam
)->sin_addr
.s_addr
!= INADDR_ANY
||
915 t
->inp_laddr
.s_addr
!= INADDR_ANY
||
916 SOCK_DOM(so
) != PF_INET6
||
917 SOCK_DOM(t
->inp_socket
) != PF_INET6
) {
918 if ((t
->inp_socket
->so_flags
&
919 SOF_NOTIFYCONFLICT
) &&
920 !(so
->so_flags
& SOF_NOTIFYCONFLICT
)) {
924 lck_rw_done(pcbinfo
->ipi_lock
);
927 in_pcb_conflict_post_msg(lport
);
934 laddr
= SIN(nam
)->sin_addr
;
942 * Override wild = 1 for implicit bind (mainly used by connect)
943 * For implicit bind (lport == 0), we always use an unused port,
944 * so REUSEADDR|REUSEPORT don't apply
948 randomport
= (so
->so_flags
& SOF_BINDRANDOMPORT
) ||
949 (so
->so_type
== SOCK_STREAM
? tcp_use_randomport
:
953 * Even though this looks similar to the code in
954 * in6_pcbsetport, the v6 vs v4 checks are different.
957 if (inp
->inp_flags
& INP_HIGHPORT
) {
958 first
= (u_short
)ipport_hifirstauto
; /* sysctl */
959 last
= (u_short
)ipport_hilastauto
;
960 lastport
= &pcbinfo
->ipi_lasthi
;
961 } else if (inp
->inp_flags
& INP_LOWPORT
) {
962 cred
= kauth_cred_proc_ref(p
);
963 error
= priv_check_cred(cred
,
964 PRIV_NETINET_RESERVEDPORT
, 0);
965 kauth_cred_unref(&cred
);
967 lck_rw_done(pcbinfo
->ipi_lock
);
971 first
= (u_short
)ipport_lowfirstauto
; /* 1023 */
972 last
= (u_short
)ipport_lowlastauto
; /* 600 */
973 lastport
= &pcbinfo
->ipi_lastlow
;
975 first
= (u_short
)ipport_firstauto
; /* sysctl */
976 last
= (u_short
)ipport_lastauto
;
977 lastport
= &pcbinfo
->ipi_lastport
;
979 /* No point in randomizing if only one port is available */
985 * Simple check to ensure all ports are not used up causing
988 * We split the two cases (up and down) so that the direction
989 * is not being tested on each round of the loop.
992 struct in_addr lookup_addr
;
998 read_frandom(&rand_port
, sizeof(rand_port
));
1000 first
- (rand_port
% (first
- last
));
1002 count
= first
- last
;
1004 lookup_addr
= (laddr
.s_addr
!= INADDR_ANY
) ? laddr
:
1009 if (count
-- < 0) { /* completely used? */
1010 lck_rw_done(pcbinfo
->ipi_lock
);
1012 return EADDRNOTAVAIL
;
1015 if (*lastport
> first
|| *lastport
< last
) {
1018 lport
= htons(*lastport
);
1021 * Skip if this is a restricted port as we do not want to
1022 * restricted ports as ephemeral
1024 if (IS_RESTRICTED_IN_PORT(lport
)) {
1028 found
= in_pcblookup_local_and_cleanup(pcbinfo
,
1029 lookup_addr
, lport
, wild
) == NULL
;
1032 struct in_addr lookup_addr
;
1038 read_frandom(&rand_port
, sizeof(rand_port
));
1040 first
+ (rand_port
% (first
- last
));
1042 count
= last
- first
;
1044 lookup_addr
= (laddr
.s_addr
!= INADDR_ANY
) ? laddr
:
1049 if (count
-- < 0) { /* completely used? */
1050 lck_rw_done(pcbinfo
->ipi_lock
);
1052 return EADDRNOTAVAIL
;
1055 if (*lastport
< first
|| *lastport
> last
) {
1058 lport
= htons(*lastport
);
1061 * Skip if this is a restricted port as we do not want to
1062 * restricted ports as ephemeral
1064 if (IS_RESTRICTED_IN_PORT(lport
)) {
1068 found
= in_pcblookup_local_and_cleanup(pcbinfo
,
1069 lookup_addr
, lport
, wild
) == NULL
;
1076 * We unlocked socket's protocol lock for a long time.
1077 * The socket might have been dropped/defuncted.
1078 * Checking if world has changed since.
1080 if (inp
->inp_state
== INPCB_STATE_DEAD
) {
1081 lck_rw_done(pcbinfo
->ipi_lock
);
1082 return ECONNABORTED
;
1085 if (inp
->inp_lport
!= 0 || inp
->inp_laddr
.s_addr
!= INADDR_ANY
) {
1086 lck_rw_done(pcbinfo
->ipi_lock
);
1090 if (laddr
.s_addr
!= INADDR_ANY
) {
1091 inp
->inp_laddr
= laddr
;
1092 inp
->inp_last_outifp
= outif
;
1094 inp
->inp_lport
= lport
;
1096 inp
->inp_flags
|= INP_ANONPORT
;
1099 if (in_pcbinshash(inp
, 1) != 0) {
1100 inp
->inp_laddr
.s_addr
= INADDR_ANY
;
1101 inp
->inp_last_outifp
= NULL
;
1105 inp
->inp_flags
&= ~INP_ANONPORT
;
1107 lck_rw_done(pcbinfo
->ipi_lock
);
1110 lck_rw_done(pcbinfo
->ipi_lock
);
1111 sflt_notify(so
, sock_evt_bound
, NULL
);
1115 #define APN_FALLBACK_IP_FILTER(a) \
1116 (IN_LINKLOCAL(ntohl((a)->sin_addr.s_addr)) || \
1117 IN_LOOPBACK(ntohl((a)->sin_addr.s_addr)) || \
1118 IN_ZERONET(ntohl((a)->sin_addr.s_addr)) || \
1119 IN_MULTICAST(ntohl((a)->sin_addr.s_addr)) || \
1120 IN_PRIVATE(ntohl((a)->sin_addr.s_addr)))
1122 #define APN_FALLBACK_NOTIF_INTERVAL 2 /* Magic Number */
1123 static uint64_t last_apn_fallback
= 0;
1126 apn_fallback_required(proc_t proc
, struct socket
*so
, struct sockaddr_in
*p_dstv4
)
1129 struct sockaddr_storage lookup_default_addr
;
1130 struct rtentry
*rt
= NULL
;
1132 VERIFY(proc
!= NULL
);
1134 if (apn_fallbk_enabled
== FALSE
) {
1138 if (proc
== kernproc
) {
1142 if (so
&& (so
->so_options
& SO_NOAPNFALLBK
)) {
1146 timenow
= net_uptime();
1147 if ((timenow
- last_apn_fallback
) < APN_FALLBACK_NOTIF_INTERVAL
) {
1148 apn_fallbk_log((LOG_INFO
, "APN fallback notification throttled.\n"));
1152 if (p_dstv4
&& APN_FALLBACK_IP_FILTER(p_dstv4
)) {
1156 /* Check if we have unscoped IPv6 default route through cellular */
1157 bzero(&lookup_default_addr
, sizeof(lookup_default_addr
));
1158 lookup_default_addr
.ss_family
= AF_INET6
;
1159 lookup_default_addr
.ss_len
= sizeof(struct sockaddr_in6
);
1161 rt
= rtalloc1((struct sockaddr
*)&lookup_default_addr
, 0, 0);
1163 apn_fallbk_log((LOG_INFO
, "APN fallback notification could not find "
1164 "unscoped default IPv6 route.\n"));
1168 if (!IFNET_IS_CELLULAR(rt
->rt_ifp
)) {
1170 apn_fallbk_log((LOG_INFO
, "APN fallback notification could not find "
1171 "unscoped default IPv6 route through cellular interface.\n"));
1176 * We have a default IPv6 route, ensure that
1177 * we do not have IPv4 default route before triggering
1183 bzero(&lookup_default_addr
, sizeof(lookup_default_addr
));
1184 lookup_default_addr
.ss_family
= AF_INET
;
1185 lookup_default_addr
.ss_len
= sizeof(struct sockaddr_in
);
1187 rt
= rtalloc1((struct sockaddr
*)&lookup_default_addr
, 0, 0);
1192 apn_fallbk_log((LOG_INFO
, "APN fallback notification found unscoped "
1193 "IPv4 default route!\n"));
1199 * We disable APN fallback if the binary is not a third-party app.
1200 * Note that platform daemons use their process name as a
1201 * bundle ID so we filter out bundle IDs without dots.
1203 const char *bundle_id
= cs_identity_get(proc
);
1204 if (bundle_id
== NULL
||
1205 bundle_id
[0] == '\0' ||
1206 strchr(bundle_id
, '.') == NULL
||
1207 strncmp(bundle_id
, "com.apple.", sizeof("com.apple.") - 1) == 0) {
1208 apn_fallbk_log((LOG_INFO
, "Abort: APN fallback notification found first-"
1209 "party bundle ID \"%s\"!\n", (bundle_id
? bundle_id
: "NULL")));
1216 * The Apple App Store IPv6 requirement started on
1217 * June 1st, 2016 at 12:00:00 AM PDT.
1218 * We disable APN fallback if the binary is more recent than that.
1219 * We check both atime and birthtime since birthtime is not always supported.
1221 static const long ipv6_start_date
= 1464764400L;
1222 vfs_context_t context
;
1226 bzero(&sb
, sizeof(struct stat64
));
1227 context
= vfs_context_create(NULL
);
1228 vn_stat_error
= vn_stat(proc
->p_textvp
, &sb
, NULL
, 1, 0, context
);
1229 (void)vfs_context_rele(context
);
1231 if (vn_stat_error
!= 0 ||
1232 sb
.st_atimespec
.tv_sec
>= ipv6_start_date
||
1233 sb
.st_birthtimespec
.tv_sec
>= ipv6_start_date
) {
1234 apn_fallbk_log((LOG_INFO
, "Abort: APN fallback notification found binary "
1235 "too recent! (err %d atime %ld mtime %ld ctime %ld birthtime %ld)\n",
1236 vn_stat_error
, sb
.st_atimespec
.tv_sec
, sb
.st_mtimespec
.tv_sec
,
1237 sb
.st_ctimespec
.tv_sec
, sb
.st_birthtimespec
.tv_sec
));
1245 apn_fallback_trigger(proc_t proc
, struct socket
*so
)
1248 struct kev_msg ev_msg
;
1249 struct kev_netevent_apnfallbk_data apnfallbk_data
;
1251 last_apn_fallback
= net_uptime();
1252 pid
= proc_pid(proc
);
1253 uuid_t application_uuid
;
1254 uuid_clear(application_uuid
);
1255 proc_getexecutableuuid(proc
, application_uuid
,
1256 sizeof(application_uuid
));
1258 bzero(&ev_msg
, sizeof(struct kev_msg
));
1259 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
1260 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
1261 ev_msg
.kev_subclass
= KEV_NETEVENT_SUBCLASS
;
1262 ev_msg
.event_code
= KEV_NETEVENT_APNFALLBACK
;
1264 bzero(&apnfallbk_data
, sizeof(apnfallbk_data
));
1266 if (so
->so_flags
& SOF_DELEGATED
) {
1267 apnfallbk_data
.epid
= so
->e_pid
;
1268 uuid_copy(apnfallbk_data
.euuid
, so
->e_uuid
);
1270 apnfallbk_data
.epid
= so
->last_pid
;
1271 uuid_copy(apnfallbk_data
.euuid
, so
->last_uuid
);
1274 ev_msg
.dv
[0].data_ptr
= &apnfallbk_data
;
1275 ev_msg
.dv
[0].data_length
= sizeof(apnfallbk_data
);
1276 kev_post_msg(&ev_msg
);
1277 apn_fallbk_log((LOG_INFO
, "APN fallback notification issued.\n"));
1281 * Transform old in_pcbconnect() into an inner subroutine for new
1282 * in_pcbconnect(); do some validity-checking on the remote address
1283 * (in "nam") and then determine local host address (i.e., which
1284 * interface) to use to access that remote host.
1286 * This routine may alter the caller-supplied remote address "nam".
1288 * The caller may override the bound-to-interface setting of the socket
1289 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
1291 * This routine might return an ifp with a reference held if the caller
1292 * provides a non-NULL outif, even in the error case. The caller is
1293 * responsible for releasing its reference.
1295 * Returns: 0 Success
1296 * EINVAL Invalid argument
1297 * EAFNOSUPPORT Address family not supported
1298 * EADDRNOTAVAIL Address not available
1301 in_pcbladdr(struct inpcb
*inp
, struct sockaddr
*nam
, struct in_addr
*laddr
,
1302 unsigned int ifscope
, struct ifnet
**outif
, int raw
)
1304 struct route
*ro
= &inp
->inp_route
;
1305 struct in_ifaddr
*ia
= NULL
;
1306 struct sockaddr_in sin
;
1308 boolean_t restricted
= FALSE
;
1310 if (outif
!= NULL
) {
1313 if (nam
->sa_len
!= sizeof(struct sockaddr_in
)) {
1316 if (SIN(nam
)->sin_family
!= AF_INET
) {
1317 return EAFNOSUPPORT
;
1319 if (raw
== 0 && SIN(nam
)->sin_port
== 0) {
1320 return EADDRNOTAVAIL
;
1324 * If the destination address is INADDR_ANY,
1325 * use the primary local address.
1326 * If the supplied address is INADDR_BROADCAST,
1327 * and the primary interface supports broadcast,
1328 * choose the broadcast address for that interface.
1330 if (raw
== 0 && (SIN(nam
)->sin_addr
.s_addr
== INADDR_ANY
||
1331 SIN(nam
)->sin_addr
.s_addr
== (u_int32_t
)INADDR_BROADCAST
)) {
1332 lck_rw_lock_shared(in_ifaddr_rwlock
);
1333 if (!TAILQ_EMPTY(&in_ifaddrhead
)) {
1334 ia
= TAILQ_FIRST(&in_ifaddrhead
);
1335 IFA_LOCK_SPIN(&ia
->ia_ifa
);
1336 if (SIN(nam
)->sin_addr
.s_addr
== INADDR_ANY
) {
1337 SIN(nam
)->sin_addr
= IA_SIN(ia
)->sin_addr
;
1338 } else if (ia
->ia_ifp
->if_flags
& IFF_BROADCAST
) {
1339 SIN(nam
)->sin_addr
=
1340 SIN(&ia
->ia_broadaddr
)->sin_addr
;
1342 IFA_UNLOCK(&ia
->ia_ifa
);
1345 lck_rw_done(in_ifaddr_rwlock
);
1348 * Otherwise, if the socket has already bound the source, just use it.
1350 if (inp
->inp_laddr
.s_addr
!= INADDR_ANY
) {
1352 *laddr
= inp
->inp_laddr
;
1357 * If the ifscope is specified by the caller (e.g. IP_PKTINFO)
1358 * then it overrides the sticky ifscope set for the socket.
1360 if (ifscope
== IFSCOPE_NONE
&& (inp
->inp_flags
& INP_BOUND_IF
)) {
1361 ifscope
= inp
->inp_boundifp
->if_index
;
1365 * If route is known or can be allocated now,
1366 * our src addr is taken from the i/f, else punt.
1367 * Note that we should check the address family of the cached
1368 * destination, in case of sharing the cache with IPv6.
1370 if (ro
->ro_rt
!= NULL
) {
1371 RT_LOCK_SPIN(ro
->ro_rt
);
1373 if (ROUTE_UNUSABLE(ro
) || ro
->ro_dst
.sa_family
!= AF_INET
||
1374 SIN(&ro
->ro_dst
)->sin_addr
.s_addr
!= SIN(nam
)->sin_addr
.s_addr
||
1375 (inp
->inp_socket
->so_options
& SO_DONTROUTE
)) {
1376 if (ro
->ro_rt
!= NULL
) {
1377 RT_UNLOCK(ro
->ro_rt
);
1381 if (!(inp
->inp_socket
->so_options
& SO_DONTROUTE
) &&
1382 (ro
->ro_rt
== NULL
|| ro
->ro_rt
->rt_ifp
== NULL
)) {
1383 if (ro
->ro_rt
!= NULL
) {
1384 RT_UNLOCK(ro
->ro_rt
);
1387 /* No route yet, so try to acquire one */
1388 bzero(&ro
->ro_dst
, sizeof(struct sockaddr_in
));
1389 ro
->ro_dst
.sa_family
= AF_INET
;
1390 ro
->ro_dst
.sa_len
= sizeof(struct sockaddr_in
);
1391 SIN(&ro
->ro_dst
)->sin_addr
= SIN(nam
)->sin_addr
;
1392 rtalloc_scoped(ro
, ifscope
);
1393 if (ro
->ro_rt
!= NULL
) {
1394 RT_LOCK_SPIN(ro
->ro_rt
);
1397 /* Sanitized local copy for interface address searches */
1398 bzero(&sin
, sizeof(sin
));
1399 sin
.sin_family
= AF_INET
;
1400 sin
.sin_len
= sizeof(struct sockaddr_in
);
1401 sin
.sin_addr
.s_addr
= SIN(nam
)->sin_addr
.s_addr
;
1403 * If we did not find (or use) a route, assume dest is reachable
1404 * on a directly connected network and try to find a corresponding
1405 * interface to take the source address from.
1407 if (ro
->ro_rt
== NULL
) {
1408 proc_t proc
= current_proc();
1411 ia
= ifatoia(ifa_ifwithdstaddr(SA(&sin
)));
1413 ia
= ifatoia(ifa_ifwithnet_scoped(SA(&sin
), ifscope
));
1415 error
= ((ia
== NULL
) ? ENETUNREACH
: 0);
1417 if (apn_fallback_required(proc
, inp
->inp_socket
,
1419 apn_fallback_trigger(proc
, inp
->inp_socket
);
1424 RT_LOCK_ASSERT_HELD(ro
->ro_rt
);
1426 * If the outgoing interface on the route found is not
1427 * a loopback interface, use the address from that interface.
1429 if (!(ro
->ro_rt
->rt_ifp
->if_flags
& IFF_LOOPBACK
)) {
1432 * If the route points to a cellular interface and the
1433 * caller forbids our using interfaces of such type,
1434 * pretend that there is no route.
1435 * Apply the same logic for expensive interfaces.
1437 if (inp_restricted_send(inp
, ro
->ro_rt
->rt_ifp
)) {
1438 RT_UNLOCK(ro
->ro_rt
);
1440 error
= EHOSTUNREACH
;
1443 /* Become a regular mutex */
1444 RT_CONVERT_LOCK(ro
->ro_rt
);
1445 ia
= ifatoia(ro
->ro_rt
->rt_ifa
);
1446 IFA_ADDREF(&ia
->ia_ifa
);
1449 * Mark the control block for notification of
1450 * a possible flow that might undergo clat46
1453 * We defer the decision to a later point when
1454 * inpcb is being disposed off.
1455 * The reason is that we only want to send notification
1456 * if the flow was ever used to send data.
1458 if (IS_INTF_CLAT46(ro
->ro_rt
->rt_ifp
)) {
1459 inp
->inp_flags2
|= INP2_CLAT46_FLOW
;
1462 RT_UNLOCK(ro
->ro_rt
);
1467 VERIFY(ro
->ro_rt
->rt_ifp
->if_flags
& IFF_LOOPBACK
);
1468 RT_UNLOCK(ro
->ro_rt
);
1470 * The outgoing interface is marked with 'loopback net', so a route
1471 * to ourselves is here.
1472 * Try to find the interface of the destination address and then
1473 * take the address from there. That interface is not necessarily
1474 * a loopback interface.
1477 ia
= ifatoia(ifa_ifwithdstaddr(SA(&sin
)));
1479 ia
= ifatoia(ifa_ifwithaddr_scoped(SA(&sin
), ifscope
));
1482 ia
= ifatoia(ifa_ifwithnet_scoped(SA(&sin
), ifscope
));
1486 ia
= ifatoia(ro
->ro_rt
->rt_ifa
);
1488 IFA_ADDREF(&ia
->ia_ifa
);
1490 RT_UNLOCK(ro
->ro_rt
);
1492 error
= ((ia
== NULL
) ? ENETUNREACH
: 0);
1496 * If the destination address is multicast and an outgoing
1497 * interface has been set as a multicast option, use the
1498 * address of that interface as our source address.
1500 if (IN_MULTICAST(ntohl(SIN(nam
)->sin_addr
.s_addr
)) &&
1501 inp
->inp_moptions
!= NULL
) {
1502 struct ip_moptions
*imo
;
1505 imo
= inp
->inp_moptions
;
1507 if (imo
->imo_multicast_ifp
!= NULL
&& (ia
== NULL
||
1508 ia
->ia_ifp
!= imo
->imo_multicast_ifp
)) {
1509 ifp
= imo
->imo_multicast_ifp
;
1511 IFA_REMREF(&ia
->ia_ifa
);
1513 lck_rw_lock_shared(in_ifaddr_rwlock
);
1514 TAILQ_FOREACH(ia
, &in_ifaddrhead
, ia_link
) {
1515 if (ia
->ia_ifp
== ifp
) {
1520 IFA_ADDREF(&ia
->ia_ifa
);
1522 lck_rw_done(in_ifaddr_rwlock
);
1524 error
= EADDRNOTAVAIL
;
1532 * Don't do pcblookup call here; return interface in laddr
1533 * and exit to caller, that will do the lookup.
1537 * If the source address belongs to a cellular interface
1538 * and the socket forbids our using interfaces of such
1539 * type, pretend that there is no source address.
1540 * Apply the same logic for expensive interfaces.
1542 IFA_LOCK_SPIN(&ia
->ia_ifa
);
1543 if (inp_restricted_send(inp
, ia
->ia_ifa
.ifa_ifp
)) {
1544 IFA_UNLOCK(&ia
->ia_ifa
);
1545 error
= EHOSTUNREACH
;
1547 } else if (error
== 0) {
1548 *laddr
= ia
->ia_addr
.sin_addr
;
1549 if (outif
!= NULL
) {
1552 if (ro
->ro_rt
!= NULL
) {
1553 ifp
= ro
->ro_rt
->rt_ifp
;
1558 VERIFY(ifp
!= NULL
);
1559 IFA_CONVERT_LOCK(&ia
->ia_ifa
);
1560 ifnet_reference(ifp
); /* for caller */
1561 if (*outif
!= NULL
) {
1562 ifnet_release(*outif
);
1566 IFA_UNLOCK(&ia
->ia_ifa
);
1568 IFA_UNLOCK(&ia
->ia_ifa
);
1570 IFA_REMREF(&ia
->ia_ifa
);
1574 if (restricted
&& error
== EHOSTUNREACH
) {
1575 soevent(inp
->inp_socket
, (SO_FILT_HINT_LOCKED
|
1576 SO_FILT_HINT_IFDENIED
));
1584 * Connect from a socket to a specified address.
1585 * Both address and port must be specified in argument sin.
1586 * If don't have a local address for this socket yet,
1589 * The caller may override the bound-to-interface setting of the socket
1590 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
1593 in_pcbconnect(struct inpcb
*inp
, struct sockaddr
*nam
, struct proc
*p
,
1594 unsigned int ifscope
, struct ifnet
**outif
)
1596 struct in_addr laddr
;
1597 struct sockaddr_in
*sin
= (struct sockaddr_in
*)(void *)nam
;
1600 struct socket
*so
= inp
->inp_socket
;
1604 so
->so_state_change_cnt
++;
1609 * Call inner routine, to assign local interface address.
1611 if ((error
= in_pcbladdr(inp
, nam
, &laddr
, ifscope
, outif
, 0)) != 0) {
1615 socket_unlock(so
, 0);
1616 pcb
= in_pcblookup_hash(inp
->inp_pcbinfo
, sin
->sin_addr
, sin
->sin_port
,
1617 inp
->inp_laddr
.s_addr
? inp
->inp_laddr
: laddr
,
1618 inp
->inp_lport
, 0, NULL
);
1622 * Check if the socket is still in a valid state. When we unlock this
1623 * embryonic socket, it can get aborted if another thread is closing
1624 * the listener (radar 7947600).
1626 if ((so
->so_flags
& SOF_ABORTED
) != 0) {
1627 return ECONNREFUSED
;
1631 in_pcb_checkstate(pcb
, WNT_RELEASE
, pcb
== inp
? 1 : 0);
1634 if (inp
->inp_laddr
.s_addr
== INADDR_ANY
) {
1635 if (inp
->inp_lport
== 0) {
1636 error
= in_pcbbind(inp
, NULL
, p
);
1641 if (!lck_rw_try_lock_exclusive(inp
->inp_pcbinfo
->ipi_lock
)) {
1643 * Lock inversion issue, mostly with udp
1644 * multicast packets.
1646 socket_unlock(so
, 0);
1647 lck_rw_lock_exclusive(inp
->inp_pcbinfo
->ipi_lock
);
1650 inp
->inp_laddr
= laddr
;
1651 /* no reference needed */
1652 inp
->inp_last_outifp
= (outif
!= NULL
) ? *outif
: NULL
;
1653 inp
->inp_flags
|= INP_INADDR_ANY
;
1656 * Usage of IP_PKTINFO, without local port already
1657 * speficified will cause kernel to panic,
1658 * see rdar://problem/18508185.
1659 * For now returning error to avoid a kernel panic
1660 * This routines can be refactored and handle this better
1663 if (inp
->inp_lport
== 0) {
1666 if (!lck_rw_try_lock_exclusive(inp
->inp_pcbinfo
->ipi_lock
)) {
1668 * Lock inversion issue, mostly with udp
1669 * multicast packets.
1671 socket_unlock(so
, 0);
1672 lck_rw_lock_exclusive(inp
->inp_pcbinfo
->ipi_lock
);
1676 inp
->inp_faddr
= sin
->sin_addr
;
1677 inp
->inp_fport
= sin
->sin_port
;
1678 if (nstat_collect
&& SOCK_PROTO(so
) == IPPROTO_UDP
) {
1679 nstat_pcb_invalidate_cache(inp
);
1682 lck_rw_done(inp
->inp_pcbinfo
->ipi_lock
);
1687 in_pcbdisconnect(struct inpcb
*inp
)
1689 struct socket
*so
= inp
->inp_socket
;
1691 if (nstat_collect
&& SOCK_PROTO(so
) == IPPROTO_UDP
) {
1692 nstat_pcb_cache(inp
);
1695 inp
->inp_faddr
.s_addr
= INADDR_ANY
;
1700 so
->so_state_change_cnt
++;
1704 if (!lck_rw_try_lock_exclusive(inp
->inp_pcbinfo
->ipi_lock
)) {
1705 /* lock inversion issue, mostly with udp multicast packets */
1706 socket_unlock(so
, 0);
1707 lck_rw_lock_exclusive(inp
->inp_pcbinfo
->ipi_lock
);
1712 lck_rw_done(inp
->inp_pcbinfo
->ipi_lock
);
1714 * A multipath subflow socket would have its SS_NOFDREF set by default,
1715 * so check for SOF_MP_SUBFLOW socket flag before detaching the PCB;
1716 * when the socket is closed for real, SOF_MP_SUBFLOW would be cleared.
1718 if (!(so
->so_flags
& SOF_MP_SUBFLOW
) && (so
->so_state
& SS_NOFDREF
)) {
1724 in_pcbdetach(struct inpcb
*inp
)
1726 struct socket
*so
= inp
->inp_socket
;
1728 if (so
->so_pcb
== NULL
) {
1729 /* PCB has been disposed */
1730 panic("%s: inp=%p so=%p proto=%d so_pcb is null!\n", __func__
,
1731 inp
, so
, SOCK_PROTO(so
));
1736 if (inp
->inp_sp
!= NULL
) {
1737 (void) ipsec4_delete_pcbpolicy(inp
);
1741 if (inp
->inp_stat
!= NULL
&& SOCK_PROTO(so
) == IPPROTO_UDP
) {
1742 if (inp
->inp_stat
->rxpackets
== 0 && inp
->inp_stat
->txpackets
== 0) {
1743 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_inet_dgram_no_data
);
1748 * Let NetworkStatistics know this PCB is going away
1749 * before we detach it.
1751 if (nstat_collect
&&
1752 (SOCK_PROTO(so
) == IPPROTO_TCP
|| SOCK_PROTO(so
) == IPPROTO_UDP
)) {
1753 nstat_pcb_detach(inp
);
1756 /* Free memory buffer held for generating keep alives */
1757 if (inp
->inp_keepalive_data
!= NULL
) {
1758 FREE(inp
->inp_keepalive_data
, M_TEMP
);
1759 inp
->inp_keepalive_data
= NULL
;
1762 /* mark socket state as dead */
1763 if (in_pcb_checkstate(inp
, WNT_STOPUSING
, 1) != WNT_STOPUSING
) {
1764 panic("%s: so=%p proto=%d couldn't set to STOPUSING\n",
1765 __func__
, so
, SOCK_PROTO(so
));
1769 if (!(so
->so_flags
& SOF_PCBCLEARING
)) {
1770 struct ip_moptions
*imo
;
1773 if (inp
->inp_options
!= NULL
) {
1774 (void) m_free(inp
->inp_options
);
1775 inp
->inp_options
= NULL
;
1777 ROUTE_RELEASE(&inp
->inp_route
);
1778 imo
= inp
->inp_moptions
;
1779 inp
->inp_moptions
= NULL
;
1780 sofreelastref(so
, 0);
1781 inp
->inp_state
= INPCB_STATE_DEAD
;
1784 * Enqueue an event to send kernel event notification
1785 * if the flow has to CLAT46 for data packets
1787 if (inp
->inp_flags2
& INP2_CLAT46_FLOW
) {
1789 * If there has been any exchange of data bytes
1791 * Schedule a notification to report that flow is
1792 * using client side translation.
1794 if (inp
->inp_stat
!= NULL
&&
1795 (inp
->inp_stat
->txbytes
!= 0 ||
1796 inp
->inp_stat
->rxbytes
!= 0)) {
1797 if (so
->so_flags
& SOF_DELEGATED
) {
1798 in6_clat46_event_enqueue_nwk_wq_entry(
1799 IN6_CLAT46_EVENT_V4_FLOW
,
1803 in6_clat46_event_enqueue_nwk_wq_entry(
1804 IN6_CLAT46_EVENT_V4_FLOW
,
1811 /* makes sure we're not called twice from so_close */
1812 so
->so_flags
|= SOF_PCBCLEARING
;
1814 inpcb_gc_sched(inp
->inp_pcbinfo
, INPCB_TIMER_FAST
);
1817 * See inp_join_group() for why we need to unlock
1820 socket_unlock(so
, 0);
1829 in_pcbdispose(struct inpcb
*inp
)
1831 struct socket
*so
= inp
->inp_socket
;
1832 struct inpcbinfo
*ipi
= inp
->inp_pcbinfo
;
1834 if (so
!= NULL
&& so
->so_usecount
!= 0) {
1835 panic("%s: so %p [%d,%d] usecount %d lockhistory %s\n",
1836 __func__
, so
, SOCK_DOM(so
), SOCK_TYPE(so
), so
->so_usecount
,
1837 solockhistory_nr(so
));
1839 } else if (inp
->inp_wantcnt
!= WNT_STOPUSING
) {
1841 panic_plain("%s: inp %p invalid wantcnt %d, so %p "
1842 "[%d,%d] usecount %d retaincnt %d state 0x%x "
1843 "flags 0x%x lockhistory %s\n", __func__
, inp
,
1844 inp
->inp_wantcnt
, so
, SOCK_DOM(so
), SOCK_TYPE(so
),
1845 so
->so_usecount
, so
->so_retaincnt
, so
->so_state
,
1846 so
->so_flags
, solockhistory_nr(so
));
1849 panic("%s: inp %p invalid wantcnt %d no socket\n",
1850 __func__
, inp
, inp
->inp_wantcnt
);
1855 LCK_RW_ASSERT(ipi
->ipi_lock
, LCK_RW_ASSERT_EXCLUSIVE
);
1857 inp
->inp_gencnt
= ++ipi
->ipi_gencnt
;
1858 /* access ipi in in_pcbremlists */
1859 in_pcbremlists(inp
);
1862 if (so
->so_proto
->pr_flags
& PR_PCBLOCK
) {
1863 sofreelastref(so
, 0);
1864 if (so
->so_rcv
.sb_cc
> 0 || so
->so_snd
.sb_cc
> 0) {
1866 * selthreadclear() already called
1867 * during sofreelastref() above.
1869 sbrelease(&so
->so_rcv
);
1870 sbrelease(&so
->so_snd
);
1872 if (so
->so_head
!= NULL
) {
1873 panic("%s: so=%p head still exist\n",
1877 lck_mtx_unlock(&inp
->inpcb_mtx
);
1880 necp_inpcb_remove_cb(inp
);
1883 lck_mtx_destroy(&inp
->inpcb_mtx
, ipi
->ipi_lock_grp
);
1885 /* makes sure we're not called twice from so_close */
1886 so
->so_flags
|= SOF_PCBCLEARING
;
1887 so
->so_saved_pcb
= (caddr_t
)inp
;
1889 inp
->inp_socket
= NULL
;
1891 necp_inpcb_dispose(inp
);
1894 * In case there a route cached after a detach (possible
1895 * in the tcp case), make sure that it is freed before
1896 * we deallocate the structure.
1898 ROUTE_RELEASE(&inp
->inp_route
);
1899 if ((so
->so_flags1
& SOF1_CACHED_IN_SOCK_LAYER
) == 0) {
1900 zfree(ipi
->ipi_zone
, inp
);
1907 * The calling convention of in_getsockaddr() and in_getpeeraddr() was
1908 * modified to match the pru_sockaddr() and pru_peeraddr() entry points
1909 * in struct pr_usrreqs, so that protocols can just reference then directly
1910 * without the need for a wrapper function.
1913 in_getsockaddr(struct socket
*so
, struct sockaddr
**nam
)
1916 struct sockaddr_in
*sin
;
1919 * Do the malloc first in case it blocks.
1921 MALLOC(sin
, struct sockaddr_in
*, sizeof(*sin
), M_SONAME
, M_WAITOK
);
1925 bzero(sin
, sizeof(*sin
));
1926 sin
->sin_family
= AF_INET
;
1927 sin
->sin_len
= sizeof(*sin
);
1929 if ((inp
= sotoinpcb(so
)) == NULL
) {
1930 FREE(sin
, M_SONAME
);
1933 sin
->sin_port
= inp
->inp_lport
;
1934 sin
->sin_addr
= inp
->inp_laddr
;
1936 *nam
= (struct sockaddr
*)sin
;
1941 in_getsockaddr_s(struct socket
*so
, struct sockaddr_in
*ss
)
1943 struct sockaddr_in
*sin
= ss
;
1947 bzero(ss
, sizeof(*ss
));
1949 sin
->sin_family
= AF_INET
;
1950 sin
->sin_len
= sizeof(*sin
);
1952 if ((inp
= sotoinpcb(so
)) == NULL
) {
1956 sin
->sin_port
= inp
->inp_lport
;
1957 sin
->sin_addr
= inp
->inp_laddr
;
1962 in_getpeeraddr(struct socket
*so
, struct sockaddr
**nam
)
1965 struct sockaddr_in
*sin
;
1968 * Do the malloc first in case it blocks.
1970 MALLOC(sin
, struct sockaddr_in
*, sizeof(*sin
), M_SONAME
, M_WAITOK
);
1974 bzero((caddr_t
)sin
, sizeof(*sin
));
1975 sin
->sin_family
= AF_INET
;
1976 sin
->sin_len
= sizeof(*sin
);
1978 if ((inp
= sotoinpcb(so
)) == NULL
) {
1979 FREE(sin
, M_SONAME
);
1982 sin
->sin_port
= inp
->inp_fport
;
1983 sin
->sin_addr
= inp
->inp_faddr
;
1985 *nam
= (struct sockaddr
*)sin
;
1990 in_pcbnotifyall(struct inpcbinfo
*pcbinfo
, struct in_addr faddr
,
1991 int errno
, void (*notify
)(struct inpcb
*, int))
1995 lck_rw_lock_shared(pcbinfo
->ipi_lock
);
1997 LIST_FOREACH(inp
, pcbinfo
->ipi_listhead
, inp_list
) {
1998 if (!(inp
->inp_vflag
& INP_IPV4
)) {
2001 if (inp
->inp_faddr
.s_addr
!= faddr
.s_addr
||
2002 inp
->inp_socket
== NULL
) {
2005 if (in_pcb_checkstate(inp
, WNT_ACQUIRE
, 0) == WNT_STOPUSING
) {
2008 socket_lock(inp
->inp_socket
, 1);
2009 (*notify
)(inp
, errno
);
2010 (void) in_pcb_checkstate(inp
, WNT_RELEASE
, 1);
2011 socket_unlock(inp
->inp_socket
, 1);
2013 lck_rw_done(pcbinfo
->ipi_lock
);
2017 * Check for alternatives when higher level complains
2018 * about service problems. For now, invalidate cached
2019 * routing information. If the route was created dynamically
2020 * (by a redirect), time to try a default gateway again.
2023 in_losing(struct inpcb
*inp
)
2025 boolean_t release
= FALSE
;
2028 if ((rt
= inp
->inp_route
.ro_rt
) != NULL
) {
2029 struct in_ifaddr
*ia
= NULL
;
2032 if (rt
->rt_flags
& RTF_DYNAMIC
) {
2034 * Prevent another thread from modifying rt_key,
2035 * rt_gateway via rt_setgate() after rt_lock is
2036 * dropped by marking the route as defunct.
2038 rt
->rt_flags
|= RTF_CONDEMNED
;
2040 (void) rtrequest(RTM_DELETE
, rt_key(rt
),
2041 rt
->rt_gateway
, rt_mask(rt
), rt
->rt_flags
, NULL
);
2045 /* if the address is gone keep the old route in the pcb */
2046 if (inp
->inp_laddr
.s_addr
!= INADDR_ANY
&&
2047 (ia
= ifa_foraddr(inp
->inp_laddr
.s_addr
)) != NULL
) {
2049 * Address is around; ditch the route. A new route
2050 * can be allocated the next time output is attempted.
2055 IFA_REMREF(&ia
->ia_ifa
);
2058 if (rt
== NULL
|| release
) {
2059 ROUTE_RELEASE(&inp
->inp_route
);
2064 * After a routing change, flush old routing
2065 * and allocate a (hopefully) better one.
2068 in_rtchange(struct inpcb
*inp
, int errno
)
2070 #pragma unused(errno)
2071 boolean_t release
= FALSE
;
2074 if ((rt
= inp
->inp_route
.ro_rt
) != NULL
) {
2075 struct in_ifaddr
*ia
= NULL
;
2077 /* if address is gone, keep the old route */
2078 if (inp
->inp_laddr
.s_addr
!= INADDR_ANY
&&
2079 (ia
= ifa_foraddr(inp
->inp_laddr
.s_addr
)) != NULL
) {
2081 * Address is around; ditch the route. A new route
2082 * can be allocated the next time output is attempted.
2087 IFA_REMREF(&ia
->ia_ifa
);
2090 if (rt
== NULL
|| release
) {
2091 ROUTE_RELEASE(&inp
->inp_route
);
2096 * Lookup a PCB based on the local address and port.
2099 in_pcblookup_local(struct inpcbinfo
*pcbinfo
, struct in_addr laddr
,
2100 unsigned int lport_arg
, int wild_okay
)
2103 int matchwild
= 3, wildcard
;
2104 u_short lport
= (u_short
)lport_arg
;
2106 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
2109 struct inpcbhead
*head
;
2111 * Look for an unconnected (wildcard foreign addr) PCB that
2112 * matches the local address and port we're looking for.
2114 head
= &pcbinfo
->ipi_hashbase
[INP_PCBHASH(INADDR_ANY
, lport
, 0,
2115 pcbinfo
->ipi_hashmask
)];
2116 LIST_FOREACH(inp
, head
, inp_hash
) {
2117 if (!(inp
->inp_vflag
& INP_IPV4
)) {
2120 if (inp
->inp_faddr
.s_addr
== INADDR_ANY
&&
2121 inp
->inp_laddr
.s_addr
== laddr
.s_addr
&&
2122 inp
->inp_lport
== lport
) {
2132 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
2135 struct inpcbporthead
*porthash
;
2136 struct inpcbport
*phd
;
2137 struct inpcb
*match
= NULL
;
2139 * Best fit PCB lookup.
2141 * First see if this local port is in use by looking on the
2144 porthash
= &pcbinfo
->ipi_porthashbase
[INP_PCBPORTHASH(lport
,
2145 pcbinfo
->ipi_porthashmask
)];
2146 LIST_FOREACH(phd
, porthash
, phd_hash
) {
2147 if (phd
->phd_port
== lport
) {
2153 * Port is in use by one or more PCBs. Look for best
2156 LIST_FOREACH(inp
, &phd
->phd_pcblist
, inp_portlist
) {
2158 if (!(inp
->inp_vflag
& INP_IPV4
)) {
2161 if (inp
->inp_faddr
.s_addr
!= INADDR_ANY
) {
2164 if (inp
->inp_laddr
.s_addr
!= INADDR_ANY
) {
2165 if (laddr
.s_addr
== INADDR_ANY
) {
2167 } else if (inp
->inp_laddr
.s_addr
!=
2172 if (laddr
.s_addr
!= INADDR_ANY
) {
2176 if (wildcard
< matchwild
) {
2178 matchwild
= wildcard
;
2179 if (matchwild
== 0) {
2185 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP
| DBG_FUNC_END
, match
,
2192 * Check if PCB exists in hash list.
2195 in_pcblookup_hash_exists(struct inpcbinfo
*pcbinfo
, struct in_addr faddr
,
2196 u_int fport_arg
, struct in_addr laddr
, u_int lport_arg
, int wildcard
,
2197 uid_t
*uid
, gid_t
*gid
, struct ifnet
*ifp
)
2199 struct inpcbhead
*head
;
2201 u_short fport
= (u_short
)fport_arg
, lport
= (u_short
)lport_arg
;
2203 struct inpcb
*local_wild
= NULL
;
2204 struct inpcb
*local_wild_mapped
= NULL
;
2210 * We may have found the pcb in the last lookup - check this first.
2213 lck_rw_lock_shared(pcbinfo
->ipi_lock
);
2216 * First look for an exact match.
2218 head
= &pcbinfo
->ipi_hashbase
[INP_PCBHASH(faddr
.s_addr
, lport
, fport
,
2219 pcbinfo
->ipi_hashmask
)];
2220 LIST_FOREACH(inp
, head
, inp_hash
) {
2221 if (!(inp
->inp_vflag
& INP_IPV4
)) {
2224 if (inp_restricted_recv(inp
, ifp
)) {
2229 if (!necp_socket_is_allowed_to_recv_on_interface(inp
, ifp
)) {
2234 if (inp
->inp_faddr
.s_addr
== faddr
.s_addr
&&
2235 inp
->inp_laddr
.s_addr
== laddr
.s_addr
&&
2236 inp
->inp_fport
== fport
&&
2237 inp
->inp_lport
== lport
) {
2238 if ((found
= (inp
->inp_socket
!= NULL
))) {
2242 *uid
= kauth_cred_getuid(
2243 inp
->inp_socket
->so_cred
);
2244 *gid
= kauth_cred_getgid(
2245 inp
->inp_socket
->so_cred
);
2247 lck_rw_done(pcbinfo
->ipi_lock
);
2256 lck_rw_done(pcbinfo
->ipi_lock
);
2260 head
= &pcbinfo
->ipi_hashbase
[INP_PCBHASH(INADDR_ANY
, lport
, 0,
2261 pcbinfo
->ipi_hashmask
)];
2262 LIST_FOREACH(inp
, head
, inp_hash
) {
2263 if (!(inp
->inp_vflag
& INP_IPV4
)) {
2266 if (inp_restricted_recv(inp
, ifp
)) {
2271 if (!necp_socket_is_allowed_to_recv_on_interface(inp
, ifp
)) {
2276 if (inp
->inp_faddr
.s_addr
== INADDR_ANY
&&
2277 inp
->inp_lport
== lport
) {
2278 if (inp
->inp_laddr
.s_addr
== laddr
.s_addr
) {
2279 if ((found
= (inp
->inp_socket
!= NULL
))) {
2280 *uid
= kauth_cred_getuid(
2281 inp
->inp_socket
->so_cred
);
2282 *gid
= kauth_cred_getgid(
2283 inp
->inp_socket
->so_cred
);
2285 lck_rw_done(pcbinfo
->ipi_lock
);
2287 } else if (inp
->inp_laddr
.s_addr
== INADDR_ANY
) {
2288 if (inp
->inp_socket
&&
2289 SOCK_CHECK_DOM(inp
->inp_socket
, PF_INET6
)) {
2290 local_wild_mapped
= inp
;
2297 if (local_wild
== NULL
) {
2298 if (local_wild_mapped
!= NULL
) {
2299 if ((found
= (local_wild_mapped
->inp_socket
!= NULL
))) {
2300 *uid
= kauth_cred_getuid(
2301 local_wild_mapped
->inp_socket
->so_cred
);
2302 *gid
= kauth_cred_getgid(
2303 local_wild_mapped
->inp_socket
->so_cred
);
2305 lck_rw_done(pcbinfo
->ipi_lock
);
2308 lck_rw_done(pcbinfo
->ipi_lock
);
2311 if ((found
= (local_wild
->inp_socket
!= NULL
))) {
2312 *uid
= kauth_cred_getuid(
2313 local_wild
->inp_socket
->so_cred
);
2314 *gid
= kauth_cred_getgid(
2315 local_wild
->inp_socket
->so_cred
);
2317 lck_rw_done(pcbinfo
->ipi_lock
);
2322 * Lookup PCB in hash list.
2325 in_pcblookup_hash(struct inpcbinfo
*pcbinfo
, struct in_addr faddr
,
2326 u_int fport_arg
, struct in_addr laddr
, u_int lport_arg
, int wildcard
,
2329 struct inpcbhead
*head
;
2331 u_short fport
= (u_short
)fport_arg
, lport
= (u_short
)lport_arg
;
2332 struct inpcb
*local_wild
= NULL
;
2333 struct inpcb
*local_wild_mapped
= NULL
;
2336 * We may have found the pcb in the last lookup - check this first.
2339 lck_rw_lock_shared(pcbinfo
->ipi_lock
);
2342 * First look for an exact match.
2344 head
= &pcbinfo
->ipi_hashbase
[INP_PCBHASH(faddr
.s_addr
, lport
, fport
,
2345 pcbinfo
->ipi_hashmask
)];
2346 LIST_FOREACH(inp
, head
, inp_hash
) {
2347 if (!(inp
->inp_vflag
& INP_IPV4
)) {
2350 if (inp_restricted_recv(inp
, ifp
)) {
2355 if (!necp_socket_is_allowed_to_recv_on_interface(inp
, ifp
)) {
2360 if (inp
->inp_faddr
.s_addr
== faddr
.s_addr
&&
2361 inp
->inp_laddr
.s_addr
== laddr
.s_addr
&&
2362 inp
->inp_fport
== fport
&&
2363 inp
->inp_lport
== lport
) {
2367 if (in_pcb_checkstate(inp
, WNT_ACQUIRE
, 0) !=
2369 lck_rw_done(pcbinfo
->ipi_lock
);
2372 /* it's there but dead, say it isn't found */
2373 lck_rw_done(pcbinfo
->ipi_lock
);
2383 lck_rw_done(pcbinfo
->ipi_lock
);
2387 head
= &pcbinfo
->ipi_hashbase
[INP_PCBHASH(INADDR_ANY
, lport
, 0,
2388 pcbinfo
->ipi_hashmask
)];
2389 LIST_FOREACH(inp
, head
, inp_hash
) {
2390 if (!(inp
->inp_vflag
& INP_IPV4
)) {
2393 if (inp_restricted_recv(inp
, ifp
)) {
2398 if (!necp_socket_is_allowed_to_recv_on_interface(inp
, ifp
)) {
2403 if (inp
->inp_faddr
.s_addr
== INADDR_ANY
&&
2404 inp
->inp_lport
== lport
) {
2405 if (inp
->inp_laddr
.s_addr
== laddr
.s_addr
) {
2406 if (in_pcb_checkstate(inp
, WNT_ACQUIRE
, 0) !=
2408 lck_rw_done(pcbinfo
->ipi_lock
);
2411 /* it's dead; say it isn't found */
2412 lck_rw_done(pcbinfo
->ipi_lock
);
2415 } else if (inp
->inp_laddr
.s_addr
== INADDR_ANY
) {
2416 if (SOCK_CHECK_DOM(inp
->inp_socket
, PF_INET6
)) {
2417 local_wild_mapped
= inp
;
2424 if (local_wild
== NULL
) {
2425 if (local_wild_mapped
!= NULL
) {
2426 if (in_pcb_checkstate(local_wild_mapped
,
2427 WNT_ACQUIRE
, 0) != WNT_STOPUSING
) {
2428 lck_rw_done(pcbinfo
->ipi_lock
);
2429 return local_wild_mapped
;
2431 /* it's dead; say it isn't found */
2432 lck_rw_done(pcbinfo
->ipi_lock
);
2436 lck_rw_done(pcbinfo
->ipi_lock
);
2439 if (in_pcb_checkstate(local_wild
, WNT_ACQUIRE
, 0) != WNT_STOPUSING
) {
2440 lck_rw_done(pcbinfo
->ipi_lock
);
2444 * It's either not found or is already dead.
2446 lck_rw_done(pcbinfo
->ipi_lock
);
2451 * @brief Insert PCB onto various hash lists.
2453 * @param inp Pointer to internet protocol control block
2454 * @param locked Implies if ipi_lock (protecting pcb list)
2455 * is already locked or not.
2457 * @return int error on failure and 0 on success
2460 in_pcbinshash(struct inpcb
*inp
, int locked
)
2462 struct inpcbhead
*pcbhash
;
2463 struct inpcbporthead
*pcbporthash
;
2464 struct inpcbinfo
*pcbinfo
= inp
->inp_pcbinfo
;
2465 struct inpcbport
*phd
;
2466 u_int32_t hashkey_faddr
;
2469 if (!lck_rw_try_lock_exclusive(pcbinfo
->ipi_lock
)) {
2471 * Lock inversion issue, mostly with udp
2474 socket_unlock(inp
->inp_socket
, 0);
2475 lck_rw_lock_exclusive(pcbinfo
->ipi_lock
);
2476 socket_lock(inp
->inp_socket
, 0);
2481 * This routine or its caller may have given up
2482 * socket's protocol lock briefly.
2483 * During that time the socket may have been dropped.
2484 * Safe-guarding against that.
2486 if (inp
->inp_state
== INPCB_STATE_DEAD
) {
2488 lck_rw_done(pcbinfo
->ipi_lock
);
2490 return ECONNABORTED
;
2494 if (inp
->inp_vflag
& INP_IPV6
) {
2495 hashkey_faddr
= inp
->in6p_faddr
.s6_addr32
[3] /* XXX */;
2497 hashkey_faddr
= inp
->inp_faddr
.s_addr
;
2500 inp
->inp_hash_element
= INP_PCBHASH(hashkey_faddr
, inp
->inp_lport
,
2501 inp
->inp_fport
, pcbinfo
->ipi_hashmask
);
2503 pcbhash
= &pcbinfo
->ipi_hashbase
[inp
->inp_hash_element
];
2505 pcbporthash
= &pcbinfo
->ipi_porthashbase
[INP_PCBPORTHASH(inp
->inp_lport
,
2506 pcbinfo
->ipi_porthashmask
)];
2509 * Go through port list and look for a head for this lport.
2511 LIST_FOREACH(phd
, pcbporthash
, phd_hash
) {
2512 if (phd
->phd_port
== inp
->inp_lport
) {
2518 * If none exists, malloc one and tack it on.
2521 MALLOC(phd
, struct inpcbport
*, sizeof(struct inpcbport
),
2525 lck_rw_done(pcbinfo
->ipi_lock
);
2527 return ENOBUFS
; /* XXX */
2529 phd
->phd_port
= inp
->inp_lport
;
2530 LIST_INIT(&phd
->phd_pcblist
);
2531 LIST_INSERT_HEAD(pcbporthash
, phd
, phd_hash
);
2534 VERIFY(!(inp
->inp_flags2
& INP2_INHASHLIST
));
2538 LIST_INSERT_HEAD(&phd
->phd_pcblist
, inp
, inp_portlist
);
2539 LIST_INSERT_HEAD(pcbhash
, inp
, inp_hash
);
2540 inp
->inp_flags2
|= INP2_INHASHLIST
;
2543 lck_rw_done(pcbinfo
->ipi_lock
);
2547 // This call catches the original setting of the local address
2548 inp_update_necp_policy(inp
, NULL
, NULL
, 0);
2555 * Move PCB to the proper hash bucket when { faddr, fport } have been
2556 * changed. NOTE: This does not handle the case of the lport changing (the
2557 * hashed port list would have to be updated as well), so the lport must
2558 * not change after in_pcbinshash() has been called.
2561 in_pcbrehash(struct inpcb
*inp
)
2563 struct inpcbhead
*head
;
2564 u_int32_t hashkey_faddr
;
2566 if (inp
->inp_vflag
& INP_IPV6
) {
2567 hashkey_faddr
= inp
->in6p_faddr
.s6_addr32
[3] /* XXX */;
2569 hashkey_faddr
= inp
->inp_faddr
.s_addr
;
2572 inp
->inp_hash_element
= INP_PCBHASH(hashkey_faddr
, inp
->inp_lport
,
2573 inp
->inp_fport
, inp
->inp_pcbinfo
->ipi_hashmask
);
2574 head
= &inp
->inp_pcbinfo
->ipi_hashbase
[inp
->inp_hash_element
];
2576 if (inp
->inp_flags2
& INP2_INHASHLIST
) {
2577 LIST_REMOVE(inp
, inp_hash
);
2578 inp
->inp_flags2
&= ~INP2_INHASHLIST
;
2581 VERIFY(!(inp
->inp_flags2
& INP2_INHASHLIST
));
2582 LIST_INSERT_HEAD(head
, inp
, inp_hash
);
2583 inp
->inp_flags2
|= INP2_INHASHLIST
;
2586 // This call catches updates to the remote addresses
2587 inp_update_necp_policy(inp
, NULL
, NULL
, 0);
2592 * Remove PCB from various lists.
2593 * Must be called pcbinfo lock is held in exclusive mode.
2596 in_pcbremlists(struct inpcb
*inp
)
2598 inp
->inp_gencnt
= ++inp
->inp_pcbinfo
->ipi_gencnt
;
2601 * Check if it's in hashlist -- an inp is placed in hashlist when
2602 * it's local port gets assigned. So it should also be present
2605 if (inp
->inp_flags2
& INP2_INHASHLIST
) {
2606 struct inpcbport
*phd
= inp
->inp_phd
;
2608 VERIFY(phd
!= NULL
&& inp
->inp_lport
> 0);
2610 LIST_REMOVE(inp
, inp_hash
);
2611 inp
->inp_hash
.le_next
= NULL
;
2612 inp
->inp_hash
.le_prev
= NULL
;
2614 LIST_REMOVE(inp
, inp_portlist
);
2615 inp
->inp_portlist
.le_next
= NULL
;
2616 inp
->inp_portlist
.le_prev
= NULL
;
2617 if (LIST_EMPTY(&phd
->phd_pcblist
)) {
2618 LIST_REMOVE(phd
, phd_hash
);
2621 inp
->inp_phd
= NULL
;
2622 inp
->inp_flags2
&= ~INP2_INHASHLIST
;
2624 VERIFY(!(inp
->inp_flags2
& INP2_INHASHLIST
));
2626 if (inp
->inp_flags2
& INP2_TIMEWAIT
) {
2627 /* Remove from time-wait queue */
2628 tcp_remove_from_time_wait(inp
);
2629 inp
->inp_flags2
&= ~INP2_TIMEWAIT
;
2630 VERIFY(inp
->inp_pcbinfo
->ipi_twcount
!= 0);
2631 inp
->inp_pcbinfo
->ipi_twcount
--;
2633 /* Remove from global inp list if it is not time-wait */
2634 LIST_REMOVE(inp
, inp_list
);
2637 if (inp
->inp_flags2
& INP2_IN_FCTREE
) {
2638 inp_fc_getinp(inp
->inp_flowhash
, (INPFC_SOLOCKED
| INPFC_REMOVE
));
2639 VERIFY(!(inp
->inp_flags2
& INP2_IN_FCTREE
));
2642 inp
->inp_pcbinfo
->ipi_count
--;
2646 * Mechanism used to defer the memory release of PCBs
2647 * The pcb list will contain the pcb until the reaper can clean it up if
2648 * the following conditions are met:
2650 * 2) wantcnt is STOPUSING
2652 * This function will be called to either mark the pcb as
2655 in_pcb_checkstate(struct inpcb
*pcb
, int mode
, int locked
)
2657 volatile UInt32
*wantcnt
= (volatile UInt32
*)&pcb
->inp_wantcnt
;
2664 * Try to mark the pcb as ready for recycling. CAS with
2665 * STOPUSING, if success we're good, if it's in use, will
2669 socket_lock(pcb
->inp_socket
, 1);
2671 pcb
->inp_state
= INPCB_STATE_DEAD
;
2674 if (pcb
->inp_socket
->so_usecount
< 0) {
2675 panic("%s: pcb=%p so=%p usecount is negative\n",
2676 __func__
, pcb
, pcb
->inp_socket
);
2680 socket_unlock(pcb
->inp_socket
, 1);
2683 inpcb_gc_sched(pcb
->inp_pcbinfo
, INPCB_TIMER_FAST
);
2685 origwant
= *wantcnt
;
2686 if ((UInt16
) origwant
== 0xffff) { /* should stop using */
2687 return WNT_STOPUSING
;
2690 if ((UInt16
) origwant
== 0) {
2691 /* try to mark it as unsuable now */
2692 OSCompareAndSwap(origwant
, newwant
, wantcnt
);
2694 return WNT_STOPUSING
;
2698 * Try to increase reference to pcb. If WNT_STOPUSING
2699 * should bail out. If socket state DEAD, try to set count
2700 * to STOPUSING, return failed otherwise increase cnt.
2703 origwant
= *wantcnt
;
2704 if ((UInt16
) origwant
== 0xffff) {
2705 /* should stop using */
2706 return WNT_STOPUSING
;
2708 newwant
= origwant
+ 1;
2709 } while (!OSCompareAndSwap(origwant
, newwant
, wantcnt
));
2714 * Release reference. If result is null and pcb state
2715 * is DEAD, set wanted bit to STOPUSING
2718 socket_lock(pcb
->inp_socket
, 1);
2722 origwant
= *wantcnt
;
2723 if ((UInt16
) origwant
== 0x0) {
2724 panic("%s: pcb=%p release with zero count",
2728 if ((UInt16
) origwant
== 0xffff) {
2729 /* should stop using */
2731 socket_unlock(pcb
->inp_socket
, 1);
2733 return WNT_STOPUSING
;
2735 newwant
= origwant
- 1;
2736 } while (!OSCompareAndSwap(origwant
, newwant
, wantcnt
));
2738 if (pcb
->inp_state
== INPCB_STATE_DEAD
) {
2741 if (pcb
->inp_socket
->so_usecount
< 0) {
2742 panic("%s: RELEASE pcb=%p so=%p usecount is negative\n",
2743 __func__
, pcb
, pcb
->inp_socket
);
2748 socket_unlock(pcb
->inp_socket
, 1);
2753 panic("%s: so=%p not a valid state =%x\n", __func__
,
2754 pcb
->inp_socket
, mode
);
2763 * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat.
2764 * The inpcb_compat data structure is passed to user space and must
2765 * not change. We intentionally avoid copying pointers.
2768 inpcb_to_compat(struct inpcb
*inp
, struct inpcb_compat
*inp_compat
)
2770 bzero(inp_compat
, sizeof(*inp_compat
));
2771 inp_compat
->inp_fport
= inp
->inp_fport
;
2772 inp_compat
->inp_lport
= inp
->inp_lport
;
2773 inp_compat
->nat_owner
= 0;
2774 inp_compat
->nat_cookie
= 0;
2775 inp_compat
->inp_gencnt
= inp
->inp_gencnt
;
2776 inp_compat
->inp_flags
= inp
->inp_flags
;
2777 inp_compat
->inp_flow
= inp
->inp_flow
;
2778 inp_compat
->inp_vflag
= inp
->inp_vflag
;
2779 inp_compat
->inp_ip_ttl
= inp
->inp_ip_ttl
;
2780 inp_compat
->inp_ip_p
= inp
->inp_ip_p
;
2781 inp_compat
->inp_dependfaddr
.inp6_foreign
=
2782 inp
->inp_dependfaddr
.inp6_foreign
;
2783 inp_compat
->inp_dependladdr
.inp6_local
=
2784 inp
->inp_dependladdr
.inp6_local
;
2785 inp_compat
->inp_depend4
.inp4_ip_tos
= inp
->inp_depend4
.inp4_ip_tos
;
2786 inp_compat
->inp_depend6
.inp6_hlim
= 0;
2787 inp_compat
->inp_depend6
.inp6_cksum
= inp
->inp_depend6
.inp6_cksum
;
2788 inp_compat
->inp_depend6
.inp6_ifindex
= 0;
2789 inp_compat
->inp_depend6
.inp6_hops
= inp
->inp_depend6
.inp6_hops
;
2792 #if XNU_TARGET_OS_OSX
2794 inpcb_to_xinpcb64(struct inpcb
*inp
, struct xinpcb64
*xinp
)
2796 xinp
->inp_fport
= inp
->inp_fport
;
2797 xinp
->inp_lport
= inp
->inp_lport
;
2798 xinp
->inp_gencnt
= inp
->inp_gencnt
;
2799 xinp
->inp_flags
= inp
->inp_flags
;
2800 xinp
->inp_flow
= inp
->inp_flow
;
2801 xinp
->inp_vflag
= inp
->inp_vflag
;
2802 xinp
->inp_ip_ttl
= inp
->inp_ip_ttl
;
2803 xinp
->inp_ip_p
= inp
->inp_ip_p
;
2804 xinp
->inp_dependfaddr
.inp6_foreign
= inp
->inp_dependfaddr
.inp6_foreign
;
2805 xinp
->inp_dependladdr
.inp6_local
= inp
->inp_dependladdr
.inp6_local
;
2806 xinp
->inp_depend4
.inp4_ip_tos
= inp
->inp_depend4
.inp4_ip_tos
;
2807 xinp
->inp_depend6
.inp6_hlim
= 0;
2808 xinp
->inp_depend6
.inp6_cksum
= inp
->inp_depend6
.inp6_cksum
;
2809 xinp
->inp_depend6
.inp6_ifindex
= 0;
2810 xinp
->inp_depend6
.inp6_hops
= inp
->inp_depend6
.inp6_hops
;
2812 #endif /* XNU_TARGET_OS_OSX */
2815 * The following routines implement this scheme:
2817 * Callers of ip_output() that intend to cache the route in the inpcb pass
2818 * a local copy of the struct route to ip_output(). Using a local copy of
2819 * the cached route significantly simplifies things as IP no longer has to
2820 * worry about having exclusive access to the passed in struct route, since
2821 * it's defined in the caller's stack; in essence, this allows for a lock-
2822 * less operation when updating the struct route at the IP level and below,
2823 * whenever necessary. The scheme works as follows:
2825 * Prior to dropping the socket's lock and calling ip_output(), the caller
2826 * copies the struct route from the inpcb into its stack, and adds a reference
2827 * to the cached route entry, if there was any. The socket's lock is then
2828 * dropped and ip_output() is called with a pointer to the copy of struct
2829 * route defined on the stack (not to the one in the inpcb.)
2831 * Upon returning from ip_output(), the caller then acquires the socket's
2832 * lock and synchronizes the cache; if there is no route cached in the inpcb,
2833 * it copies the local copy of struct route (which may or may not contain any
2834 * route) back into the cache; otherwise, if the inpcb has a route cached in
2835 * it, the one in the local copy will be freed, if there's any. Trashing the
2836 * cached route in the inpcb can be avoided because ip_output() is single-
2837 * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized
2838 * by the socket/transport layer.)
2841 inp_route_copyout(struct inpcb
*inp
, struct route
*dst
)
2843 struct route
*src
= &inp
->inp_route
;
2845 socket_lock_assert_owned(inp
->inp_socket
);
2848 * If the route in the PCB is stale or not for IPv4, blow it away;
2849 * this is possible in the case of IPv4-mapped address case.
2851 if (ROUTE_UNUSABLE(src
) || rt_key(src
->ro_rt
)->sa_family
!= AF_INET
) {
2855 route_copyout(dst
, src
, sizeof(*dst
));
2859 inp_route_copyin(struct inpcb
*inp
, struct route
*src
)
2861 struct route
*dst
= &inp
->inp_route
;
2863 socket_lock_assert_owned(inp
->inp_socket
);
2865 /* Minor sanity check */
2866 if (src
->ro_rt
!= NULL
&& rt_key(src
->ro_rt
)->sa_family
!= AF_INET
) {
2867 panic("%s: wrong or corrupted route: %p", __func__
, src
);
2870 route_copyin(src
, dst
, sizeof(*src
));
2874 * Handler for setting IP_BOUND_IF/IPV6_BOUND_IF socket option.
2877 inp_bindif(struct inpcb
*inp
, unsigned int ifscope
, struct ifnet
**pifp
)
2879 struct ifnet
*ifp
= NULL
;
2881 ifnet_head_lock_shared();
2882 if ((ifscope
> (unsigned)if_index
) || (ifscope
!= IFSCOPE_NONE
&&
2883 (ifp
= ifindex2ifnet
[ifscope
]) == NULL
)) {
2889 VERIFY(ifp
!= NULL
|| ifscope
== IFSCOPE_NONE
);
2892 * A zero interface scope value indicates an "unbind".
2893 * Otherwise, take in whatever value the app desires;
2894 * the app may already know the scope (or force itself
2895 * to such a scope) ahead of time before the interface
2896 * gets attached. It doesn't matter either way; any
2897 * route lookup from this point on will require an
2898 * exact match for the embedded interface scope.
2900 inp
->inp_boundifp
= ifp
;
2901 if (inp
->inp_boundifp
== NULL
) {
2902 inp
->inp_flags
&= ~INP_BOUND_IF
;
2904 inp
->inp_flags
|= INP_BOUND_IF
;
2907 /* Blow away any cached route in the PCB */
2908 ROUTE_RELEASE(&inp
->inp_route
);
2918 * Handler for setting IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
2919 * as well as for setting PROC_UUID_NO_CELLULAR policy.
2922 inp_set_nocellular(struct inpcb
*inp
)
2924 inp
->inp_flags
|= INP_NO_IFT_CELLULAR
;
2926 /* Blow away any cached route in the PCB */
2927 ROUTE_RELEASE(&inp
->inp_route
);
2931 * Handler for clearing IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
2932 * as well as for clearing PROC_UUID_NO_CELLULAR policy.
2935 inp_clear_nocellular(struct inpcb
*inp
)
2937 struct socket
*so
= inp
->inp_socket
;
2940 * SO_RESTRICT_DENY_CELLULAR socket restriction issued on the socket
2941 * has a higher precendence than INP_NO_IFT_CELLULAR. Clear the flag
2942 * if and only if the socket is unrestricted.
2944 if (so
!= NULL
&& !(so
->so_restrictions
& SO_RESTRICT_DENY_CELLULAR
)) {
2945 inp
->inp_flags
&= ~INP_NO_IFT_CELLULAR
;
2947 /* Blow away any cached route in the PCB */
2948 ROUTE_RELEASE(&inp
->inp_route
);
2953 inp_set_noexpensive(struct inpcb
*inp
)
2955 inp
->inp_flags2
|= INP2_NO_IFF_EXPENSIVE
;
2957 /* Blow away any cached route in the PCB */
2958 ROUTE_RELEASE(&inp
->inp_route
);
2962 inp_set_noconstrained(struct inpcb
*inp
)
2964 inp
->inp_flags2
|= INP2_NO_IFF_CONSTRAINED
;
2966 /* Blow away any cached route in the PCB */
2967 ROUTE_RELEASE(&inp
->inp_route
);
2971 inp_set_awdl_unrestricted(struct inpcb
*inp
)
2973 inp
->inp_flags2
|= INP2_AWDL_UNRESTRICTED
;
2975 /* Blow away any cached route in the PCB */
2976 ROUTE_RELEASE(&inp
->inp_route
);
2980 inp_get_awdl_unrestricted(struct inpcb
*inp
)
2982 return (inp
->inp_flags2
& INP2_AWDL_UNRESTRICTED
) ? TRUE
: FALSE
;
2986 inp_clear_awdl_unrestricted(struct inpcb
*inp
)
2988 inp
->inp_flags2
&= ~INP2_AWDL_UNRESTRICTED
;
2990 /* Blow away any cached route in the PCB */
2991 ROUTE_RELEASE(&inp
->inp_route
);
2995 inp_set_intcoproc_allowed(struct inpcb
*inp
)
2997 inp
->inp_flags2
|= INP2_INTCOPROC_ALLOWED
;
2999 /* Blow away any cached route in the PCB */
3000 ROUTE_RELEASE(&inp
->inp_route
);
3004 inp_get_intcoproc_allowed(struct inpcb
*inp
)
3006 return (inp
->inp_flags2
& INP2_INTCOPROC_ALLOWED
) ? TRUE
: FALSE
;
3010 inp_clear_intcoproc_allowed(struct inpcb
*inp
)
3012 inp
->inp_flags2
&= ~INP2_INTCOPROC_ALLOWED
;
3014 /* Blow away any cached route in the PCB */
3015 ROUTE_RELEASE(&inp
->inp_route
);
3020 * Called when PROC_UUID_NECP_APP_POLICY is set.
3023 inp_set_want_app_policy(struct inpcb
*inp
)
3025 inp
->inp_flags2
|= INP2_WANT_APP_POLICY
;
3029 * Called when PROC_UUID_NECP_APP_POLICY is cleared.
3032 inp_clear_want_app_policy(struct inpcb
*inp
)
3034 inp
->inp_flags2
&= ~INP2_WANT_APP_POLICY
;
3039 * Calculate flow hash for an inp, used by an interface to identify a
3040 * flow. When an interface provides flow control advisory, this flow
3041 * hash is used as an identifier.
3044 inp_calc_flowhash(struct inpcb
*inp
)
3046 struct inp_flowhash_key fh
__attribute__((aligned(8)));
3047 u_int32_t flowhash
= 0;
3048 struct inpcb
*tmp_inp
= NULL
;
3050 if (inp_hash_seed
== 0) {
3051 inp_hash_seed
= RandomULong();
3054 bzero(&fh
, sizeof(fh
));
3056 bcopy(&inp
->inp_dependladdr
, &fh
.infh_laddr
, sizeof(fh
.infh_laddr
));
3057 bcopy(&inp
->inp_dependfaddr
, &fh
.infh_faddr
, sizeof(fh
.infh_faddr
));
3059 fh
.infh_lport
= inp
->inp_lport
;
3060 fh
.infh_fport
= inp
->inp_fport
;
3061 fh
.infh_af
= (inp
->inp_vflag
& INP_IPV6
) ? AF_INET6
: AF_INET
;
3062 fh
.infh_proto
= inp
->inp_ip_p
;
3063 fh
.infh_rand1
= RandomULong();
3064 fh
.infh_rand2
= RandomULong();
3067 flowhash
= net_flowhash(&fh
, sizeof(fh
), inp_hash_seed
);
3068 if (flowhash
== 0) {
3069 /* try to get a non-zero flowhash */
3070 inp_hash_seed
= RandomULong();
3074 inp
->inp_flowhash
= flowhash
;
3076 /* Insert the inp into inp_fc_tree */
3077 lck_mtx_lock_spin(&inp_fc_lck
);
3078 tmp_inp
= RB_FIND(inp_fc_tree
, &inp_fc_tree
, inp
);
3079 if (tmp_inp
!= NULL
) {
3081 * There is a different inp with the same flowhash.
3082 * There can be a collision on flow hash but the
3083 * probability is low. Let's recompute the
3086 lck_mtx_unlock(&inp_fc_lck
);
3087 /* recompute hash seed */
3088 inp_hash_seed
= RandomULong();
3092 RB_INSERT(inp_fc_tree
, &inp_fc_tree
, inp
);
3093 inp
->inp_flags2
|= INP2_IN_FCTREE
;
3094 lck_mtx_unlock(&inp_fc_lck
);
3100 inp_flowadv(uint32_t flowhash
)
3104 inp
= inp_fc_getinp(flowhash
, 0);
3109 inp_fc_feedback(inp
);
3113 * Function to compare inp_fc_entries in inp flow control tree
3116 infc_cmp(const struct inpcb
*inp1
, const struct inpcb
*inp2
)
3118 return memcmp(&(inp1
->inp_flowhash
), &(inp2
->inp_flowhash
),
3119 sizeof(inp1
->inp_flowhash
));
3122 static struct inpcb
*
3123 inp_fc_getinp(u_int32_t flowhash
, u_int32_t flags
)
3125 struct inpcb
*inp
= NULL
;
3126 int locked
= (flags
& INPFC_SOLOCKED
) ? 1 : 0;
3128 lck_mtx_lock_spin(&inp_fc_lck
);
3129 key_inp
.inp_flowhash
= flowhash
;
3130 inp
= RB_FIND(inp_fc_tree
, &inp_fc_tree
, &key_inp
);
3132 /* inp is not present, return */
3133 lck_mtx_unlock(&inp_fc_lck
);
3137 if (flags
& INPFC_REMOVE
) {
3138 RB_REMOVE(inp_fc_tree
, &inp_fc_tree
, inp
);
3139 lck_mtx_unlock(&inp_fc_lck
);
3141 bzero(&(inp
->infc_link
), sizeof(inp
->infc_link
));
3142 inp
->inp_flags2
&= ~INP2_IN_FCTREE
;
3146 if (in_pcb_checkstate(inp
, WNT_ACQUIRE
, locked
) == WNT_STOPUSING
) {
3149 lck_mtx_unlock(&inp_fc_lck
);
3155 inp_fc_feedback(struct inpcb
*inp
)
3157 struct socket
*so
= inp
->inp_socket
;
3159 /* we already hold a want_cnt on this inp, socket can't be null */
3163 if (in_pcb_checkstate(inp
, WNT_RELEASE
, 1) == WNT_STOPUSING
) {
3164 socket_unlock(so
, 1);
3168 if (inp
->inp_sndinprog_cnt
> 0) {
3169 inp
->inp_flags
|= INP_FC_FEEDBACK
;
3173 * Return if the connection is not in flow-controlled state.
3174 * This can happen if the connection experienced
3175 * loss while it was in flow controlled state
3177 if (!INP_WAIT_FOR_IF_FEEDBACK(inp
)) {
3178 socket_unlock(so
, 1);
3181 inp_reset_fc_state(inp
);
3183 if (SOCK_TYPE(so
) == SOCK_STREAM
) {
3184 inp_fc_unthrottle_tcp(inp
);
3187 socket_unlock(so
, 1);
3191 inp_reset_fc_state(struct inpcb
*inp
)
3193 struct socket
*so
= inp
->inp_socket
;
3194 int suspended
= (INP_IS_FLOW_SUSPENDED(inp
)) ? 1 : 0;
3195 int needwakeup
= (INP_WAIT_FOR_IF_FEEDBACK(inp
)) ? 1 : 0;
3197 inp
->inp_flags
&= ~(INP_FLOW_CONTROLLED
| INP_FLOW_SUSPENDED
);
3200 so
->so_flags
&= ~(SOF_SUSPENDED
);
3201 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_RESUME
));
3204 /* Give a write wakeup to unblock the socket */
3211 inp_set_fc_state(struct inpcb
*inp
, int advcode
)
3213 boolean_t is_flow_controlled
= INP_WAIT_FOR_IF_FEEDBACK(inp
);
3214 struct inpcb
*tmp_inp
= NULL
;
3216 * If there was a feedback from the interface when
3217 * send operation was in progress, we should ignore
3218 * this flow advisory to avoid a race between setting
3219 * flow controlled state and receiving feedback from
3222 if (inp
->inp_flags
& INP_FC_FEEDBACK
) {
3226 inp
->inp_flags
&= ~(INP_FLOW_CONTROLLED
| INP_FLOW_SUSPENDED
);
3227 if ((tmp_inp
= inp_fc_getinp(inp
->inp_flowhash
,
3228 INPFC_SOLOCKED
)) != NULL
) {
3229 if (in_pcb_checkstate(tmp_inp
, WNT_RELEASE
, 1) == WNT_STOPUSING
) {
3232 VERIFY(tmp_inp
== inp
);
3234 case FADV_FLOW_CONTROLLED
:
3235 inp
->inp_flags
|= INP_FLOW_CONTROLLED
;
3237 case FADV_SUSPENDED
:
3238 inp
->inp_flags
|= INP_FLOW_SUSPENDED
;
3239 soevent(inp
->inp_socket
,
3240 (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_SUSPEND
));
3242 /* Record the fact that suspend event was sent */
3243 inp
->inp_socket
->so_flags
|= SOF_SUSPENDED
;
3247 if (!is_flow_controlled
&& SOCK_TYPE(inp
->inp_socket
) == SOCK_STREAM
) {
3248 inp_fc_throttle_tcp(inp
);
3256 * Handler for SO_FLUSH socket option.
3259 inp_flush(struct inpcb
*inp
, int optval
)
3261 u_int32_t flowhash
= inp
->inp_flowhash
;
3262 struct ifnet
*rtifp
, *oifp
;
3264 /* Either all classes or one of the valid ones */
3265 if (optval
!= SO_TC_ALL
&& !SO_VALID_TC(optval
)) {
3269 /* We need a flow hash for identification */
3270 if (flowhash
== 0) {
3274 /* Grab the interfaces from the route and pcb */
3275 rtifp
= ((inp
->inp_route
.ro_rt
!= NULL
) ?
3276 inp
->inp_route
.ro_rt
->rt_ifp
: NULL
);
3277 oifp
= inp
->inp_last_outifp
;
3279 if (rtifp
!= NULL
) {
3280 if_qflush_sc(rtifp
, so_tc2msc(optval
), flowhash
, NULL
, NULL
, 0);
3282 if (oifp
!= NULL
&& oifp
!= rtifp
) {
3283 if_qflush_sc(oifp
, so_tc2msc(optval
), flowhash
, NULL
, NULL
, 0);
3290 * Clear the INP_INADDR_ANY flag (special case for PPP only)
3293 inp_clear_INP_INADDR_ANY(struct socket
*so
)
3295 struct inpcb
*inp
= NULL
;
3298 inp
= sotoinpcb(so
);
3300 inp
->inp_flags
&= ~INP_INADDR_ANY
;
3302 socket_unlock(so
, 1);
3306 inp_get_soprocinfo(struct inpcb
*inp
, struct so_procinfo
*soprocinfo
)
3308 struct socket
*so
= inp
->inp_socket
;
3310 soprocinfo
->spi_pid
= so
->last_pid
;
3311 strlcpy(&soprocinfo
->spi_proc_name
[0], &inp
->inp_last_proc_name
[0],
3312 sizeof(soprocinfo
->spi_proc_name
));
3313 if (so
->last_pid
!= 0) {
3314 uuid_copy(soprocinfo
->spi_uuid
, so
->last_uuid
);
3317 * When not delegated, the effective pid is the same as the real pid
3319 if (so
->so_flags
& SOF_DELEGATED
) {
3320 soprocinfo
->spi_delegated
= 1;
3321 soprocinfo
->spi_epid
= so
->e_pid
;
3322 uuid_copy(soprocinfo
->spi_euuid
, so
->e_uuid
);
3324 soprocinfo
->spi_delegated
= 0;
3325 soprocinfo
->spi_epid
= so
->last_pid
;
3327 strlcpy(&soprocinfo
->spi_e_proc_name
[0], &inp
->inp_e_proc_name
[0],
3328 sizeof(soprocinfo
->spi_e_proc_name
));
3332 inp_findinpcb_procinfo(struct inpcbinfo
*pcbinfo
, uint32_t flowhash
,
3333 struct so_procinfo
*soprocinfo
)
3335 struct inpcb
*inp
= NULL
;
3338 bzero(soprocinfo
, sizeof(struct so_procinfo
));
3344 lck_rw_lock_shared(pcbinfo
->ipi_lock
);
3345 LIST_FOREACH(inp
, pcbinfo
->ipi_listhead
, inp_list
) {
3346 if (inp
->inp_state
!= INPCB_STATE_DEAD
&&
3347 inp
->inp_socket
!= NULL
&&
3348 inp
->inp_flowhash
== flowhash
) {
3350 inp_get_soprocinfo(inp
, soprocinfo
);
3354 lck_rw_done(pcbinfo
->ipi_lock
);
3359 #if CONFIG_PROC_UUID_POLICY
3361 inp_update_cellular_policy(struct inpcb
*inp
, boolean_t set
)
3363 struct socket
*so
= inp
->inp_socket
;
3367 VERIFY(inp
->inp_state
!= INPCB_STATE_DEAD
);
3369 before
= INP_NO_CELLULAR(inp
);
3371 inp_set_nocellular(inp
);
3373 inp_clear_nocellular(inp
);
3375 after
= INP_NO_CELLULAR(inp
);
3376 if (net_io_policy_log
&& (before
!= after
)) {
3377 static const char *ok
= "OK";
3378 static const char *nok
= "NOACCESS";
3379 uuid_string_t euuid_buf
;
3382 if (so
->so_flags
& SOF_DELEGATED
) {
3383 uuid_unparse(so
->e_uuid
, euuid_buf
);
3386 uuid_unparse(so
->last_uuid
, euuid_buf
);
3387 epid
= so
->last_pid
;
3390 /* allow this socket to generate another notification event */
3391 so
->so_ifdenied_notifies
= 0;
3393 log(LOG_DEBUG
, "%s: so 0x%llx [%d,%d] epid %d "
3394 "euuid %s%s %s->%s\n", __func__
,
3395 (uint64_t)VM_KERNEL_ADDRPERM(so
), SOCK_DOM(so
),
3396 SOCK_TYPE(so
), epid
, euuid_buf
,
3397 (so
->so_flags
& SOF_DELEGATED
) ?
3398 " [delegated]" : "",
3399 ((before
< after
) ? ok
: nok
),
3400 ((before
< after
) ? nok
: ok
));
3406 inp_update_necp_want_app_policy(struct inpcb
*inp
, boolean_t set
)
3408 struct socket
*so
= inp
->inp_socket
;
3412 VERIFY(inp
->inp_state
!= INPCB_STATE_DEAD
);
3414 before
= (inp
->inp_flags2
& INP2_WANT_APP_POLICY
);
3416 inp_set_want_app_policy(inp
);
3418 inp_clear_want_app_policy(inp
);
3420 after
= (inp
->inp_flags2
& INP2_WANT_APP_POLICY
);
3421 if (net_io_policy_log
&& (before
!= after
)) {
3422 static const char *wanted
= "WANTED";
3423 static const char *unwanted
= "UNWANTED";
3424 uuid_string_t euuid_buf
;
3427 if (so
->so_flags
& SOF_DELEGATED
) {
3428 uuid_unparse(so
->e_uuid
, euuid_buf
);
3431 uuid_unparse(so
->last_uuid
, euuid_buf
);
3432 epid
= so
->last_pid
;
3435 log(LOG_DEBUG
, "%s: so 0x%llx [%d,%d] epid %d "
3436 "euuid %s%s %s->%s\n", __func__
,
3437 (uint64_t)VM_KERNEL_ADDRPERM(so
), SOCK_DOM(so
),
3438 SOCK_TYPE(so
), epid
, euuid_buf
,
3439 (so
->so_flags
& SOF_DELEGATED
) ?
3440 " [delegated]" : "",
3441 ((before
< after
) ? unwanted
: wanted
),
3442 ((before
< after
) ? wanted
: unwanted
));
3446 #endif /* !CONFIG_PROC_UUID_POLICY */
3450 inp_update_necp_policy(struct inpcb
*inp
, struct sockaddr
*override_local_addr
, struct sockaddr
*override_remote_addr
, u_int override_bound_interface
)
3452 necp_socket_find_policy_match(inp
, override_local_addr
, override_remote_addr
, override_bound_interface
);
3453 if (necp_socket_should_rescope(inp
) &&
3454 inp
->inp_lport
== 0 &&
3455 inp
->inp_laddr
.s_addr
== INADDR_ANY
&&
3456 IN6_IS_ADDR_UNSPECIFIED(&inp
->in6p_laddr
)) {
3457 // If we should rescope, and the socket is not yet bound
3458 inp_bindif(inp
, necp_socket_get_rescope_if_index(inp
), NULL
);
3464 inp_update_policy(struct inpcb
*inp
)
3466 #if CONFIG_PROC_UUID_POLICY
3467 struct socket
*so
= inp
->inp_socket
;
3468 uint32_t pflags
= 0;
3471 uint8_t *lookup_uuid
= NULL
;
3473 if (!net_io_policy_uuid
||
3474 so
== NULL
|| inp
->inp_state
== INPCB_STATE_DEAD
) {
3479 * Kernel-created sockets that aren't delegating other sockets
3480 * are currently exempted from UUID policy checks.
3482 if (so
->last_pid
== 0 && !(so
->so_flags
& SOF_DELEGATED
)) {
3486 #if defined(XNU_TARGET_OS_OSX)
3487 if (so
->so_rpid
> 0) {
3488 lookup_uuid
= so
->so_ruuid
;
3489 ogencnt
= so
->so_policy_gencnt
;
3490 err
= proc_uuid_policy_lookup(lookup_uuid
, &pflags
, &so
->so_policy_gencnt
);
3493 if (lookup_uuid
== NULL
|| err
== ENOENT
) {
3494 lookup_uuid
= ((so
->so_flags
& SOF_DELEGATED
) ? so
->e_uuid
: so
->last_uuid
);
3495 ogencnt
= so
->so_policy_gencnt
;
3496 err
= proc_uuid_policy_lookup(lookup_uuid
, &pflags
, &so
->so_policy_gencnt
);
3500 * Discard cached generation count if the entry is gone (ENOENT),
3501 * so that we go thru the checks below.
3503 if (err
== ENOENT
&& ogencnt
!= 0) {
3504 so
->so_policy_gencnt
= 0;
3508 * If the generation count has changed, inspect the policy flags
3509 * and act accordingly. If a policy flag was previously set and
3510 * the UUID is no longer present in the table (ENOENT), treat it
3511 * as if the flag has been cleared.
3513 if ((err
== 0 || err
== ENOENT
) && ogencnt
!= so
->so_policy_gencnt
) {
3514 /* update cellular policy for this socket */
3515 if (err
== 0 && (pflags
& PROC_UUID_NO_CELLULAR
)) {
3516 inp_update_cellular_policy(inp
, TRUE
);
3517 } else if (!(pflags
& PROC_UUID_NO_CELLULAR
)) {
3518 inp_update_cellular_policy(inp
, FALSE
);
3521 /* update necp want app policy for this socket */
3522 if (err
== 0 && (pflags
& PROC_UUID_NECP_APP_POLICY
)) {
3523 inp_update_necp_want_app_policy(inp
, TRUE
);
3524 } else if (!(pflags
& PROC_UUID_NECP_APP_POLICY
)) {
3525 inp_update_necp_want_app_policy(inp
, FALSE
);
3530 return (err
== ENOENT
) ? 0 : err
;
3531 #else /* !CONFIG_PROC_UUID_POLICY */
3534 #endif /* !CONFIG_PROC_UUID_POLICY */
3537 static unsigned int log_restricted
;
3538 SYSCTL_DECL(_net_inet
);
3539 SYSCTL_INT(_net_inet
, OID_AUTO
, log_restricted
,
3540 CTLFLAG_RW
| CTLFLAG_LOCKED
, &log_restricted
, 0,
3541 "Log network restrictions");
3543 * Called when we need to enforce policy restrictions in the input path.
3545 * Returns TRUE if we're not allowed to receive data, otherwise FALSE.
3548 _inp_restricted_recv(struct inpcb
*inp
, struct ifnet
*ifp
)
3550 VERIFY(inp
!= NULL
);
3553 * Inbound restrictions.
3555 if (!sorestrictrecv
) {
3563 if (IFNET_IS_CELLULAR(ifp
) && INP_NO_CELLULAR(inp
)) {
3567 if (IFNET_IS_EXPENSIVE(ifp
) && INP_NO_EXPENSIVE(inp
)) {
3571 if (IFNET_IS_CONSTRAINED(ifp
) && INP_NO_CONSTRAINED(inp
)) {
3575 if (IFNET_IS_AWDL_RESTRICTED(ifp
) && !INP_AWDL_UNRESTRICTED(inp
)) {
3579 if (!(ifp
->if_eflags
& IFEF_RESTRICTED_RECV
)) {
3583 if (inp
->inp_flags
& INP_RECV_ANYIF
) {
3587 if ((inp
->inp_flags
& INP_BOUND_IF
) && inp
->inp_boundifp
== ifp
) {
3591 if (IFNET_IS_INTCOPROC(ifp
) && !INP_INTCOPROC_ALLOWED(inp
)) {
3599 inp_restricted_recv(struct inpcb
*inp
, struct ifnet
*ifp
)
3603 ret
= _inp_restricted_recv(inp
, ifp
);
3604 if (ret
== TRUE
&& log_restricted
) {
3605 printf("pid %d (%s) is unable to receive packets on %s\n",
3606 current_proc()->p_pid
, proc_best_name(current_proc()),
3613 * Called when we need to enforce policy restrictions in the output path.
3615 * Returns TRUE if we're not allowed to send data out, otherwise FALSE.
3618 _inp_restricted_send(struct inpcb
*inp
, struct ifnet
*ifp
)
3620 VERIFY(inp
!= NULL
);
3623 * Outbound restrictions.
3625 if (!sorestrictsend
) {
3633 if (IFNET_IS_CELLULAR(ifp
) && INP_NO_CELLULAR(inp
)) {
3637 if (IFNET_IS_EXPENSIVE(ifp
) && INP_NO_EXPENSIVE(inp
)) {
3641 if (IFNET_IS_CONSTRAINED(ifp
) && INP_NO_CONSTRAINED(inp
)) {
3645 if (IFNET_IS_AWDL_RESTRICTED(ifp
) && !INP_AWDL_UNRESTRICTED(inp
)) {
3649 if (IFNET_IS_INTCOPROC(ifp
) && !INP_INTCOPROC_ALLOWED(inp
)) {
3657 inp_restricted_send(struct inpcb
*inp
, struct ifnet
*ifp
)
3661 ret
= _inp_restricted_send(inp
, ifp
);
3662 if (ret
== TRUE
&& log_restricted
) {
3663 printf("pid %d (%s) is unable to transmit packets on %s\n",
3664 current_proc()->p_pid
, proc_best_name(current_proc()),
3671 inp_count_sndbytes(struct inpcb
*inp
, u_int32_t th_ack
)
3673 struct ifnet
*ifp
= inp
->inp_last_outifp
;
3674 struct socket
*so
= inp
->inp_socket
;
3675 if (ifp
!= NULL
&& !(so
->so_flags
& SOF_MP_SUBFLOW
) &&
3676 (ifp
->if_type
== IFT_CELLULAR
|| IFNET_IS_WIFI(ifp
))) {
3679 so
->so_snd
.sb_flags
|= SB_SNDBYTE_CNT
;
3682 * There can be data outstanding before the connection
3683 * becomes established -- TFO case
3685 if (so
->so_snd
.sb_cc
> 0) {
3686 inp_incr_sndbytes_total(so
, so
->so_snd
.sb_cc
);
3689 unsent
= inp_get_sndbytes_allunsent(so
, th_ack
);
3691 inp_incr_sndbytes_unsent(so
, unsent
);
3697 inp_incr_sndbytes_total(struct socket
*so
, int32_t len
)
3699 struct inpcb
*inp
= (struct inpcb
*)so
->so_pcb
;
3700 struct ifnet
*ifp
= inp
->inp_last_outifp
;
3703 VERIFY(ifp
->if_sndbyte_total
>= 0);
3704 OSAddAtomic64(len
, &ifp
->if_sndbyte_total
);
3709 inp_decr_sndbytes_total(struct socket
*so
, int32_t len
)
3711 struct inpcb
*inp
= (struct inpcb
*)so
->so_pcb
;
3712 struct ifnet
*ifp
= inp
->inp_last_outifp
;
3715 VERIFY(ifp
->if_sndbyte_total
>= len
);
3716 OSAddAtomic64(-len
, &ifp
->if_sndbyte_total
);
3721 inp_incr_sndbytes_unsent(struct socket
*so
, int32_t len
)
3723 struct inpcb
*inp
= (struct inpcb
*)so
->so_pcb
;
3724 struct ifnet
*ifp
= inp
->inp_last_outifp
;
3727 VERIFY(ifp
->if_sndbyte_unsent
>= 0);
3728 OSAddAtomic64(len
, &ifp
->if_sndbyte_unsent
);
3733 inp_decr_sndbytes_unsent(struct socket
*so
, int32_t len
)
3735 if (so
== NULL
|| !(so
->so_snd
.sb_flags
& SB_SNDBYTE_CNT
)) {
3739 struct inpcb
*inp
= (struct inpcb
*)so
->so_pcb
;
3740 struct ifnet
*ifp
= inp
->inp_last_outifp
;
3743 if (ifp
->if_sndbyte_unsent
>= len
) {
3744 OSAddAtomic64(-len
, &ifp
->if_sndbyte_unsent
);
3746 ifp
->if_sndbyte_unsent
= 0;
3752 inp_decr_sndbytes_allunsent(struct socket
*so
, u_int32_t th_ack
)
3756 if (so
== NULL
|| !(so
->so_snd
.sb_flags
& SB_SNDBYTE_CNT
)) {
3760 len
= inp_get_sndbytes_allunsent(so
, th_ack
);
3761 inp_decr_sndbytes_unsent(so
, len
);
3766 inp_set_activity_bitmap(struct inpcb
*inp
)
3768 in_stat_set_activity_bitmap(&inp
->inp_nw_activity
, net_uptime());
3772 inp_get_activity_bitmap(struct inpcb
*inp
, activity_bitmap_t
*ab
)
3774 bcopy(&inp
->inp_nw_activity
, ab
, sizeof(*ab
));
3778 inp_update_last_owner(struct socket
*so
, struct proc
*p
, struct proc
*ep
)
3780 struct inpcb
*inp
= (struct inpcb
*)so
->so_pcb
;
3787 strlcpy(&inp
->inp_last_proc_name
[0], proc_name_address(p
), sizeof(inp
->inp_last_proc_name
));
3789 if (so
->so_flags
& SOF_DELEGATED
) {
3791 strlcpy(&inp
->inp_e_proc_name
[0], proc_name_address(ep
), sizeof(inp
->inp_e_proc_name
));
3793 inp
->inp_e_proc_name
[0] = 0;
3796 inp
->inp_e_proc_name
[0] = 0;
3801 inp_copy_last_owner(struct socket
*so
, struct socket
*head
)
3803 struct inpcb
*inp
= (struct inpcb
*)so
->so_pcb
;
3804 struct inpcb
*head_inp
= (struct inpcb
*)head
->so_pcb
;
3806 if (inp
== NULL
|| head_inp
== NULL
) {
3810 strlcpy(&inp
->inp_last_proc_name
[0], &head_inp
->inp_last_proc_name
[0], sizeof(inp
->inp_last_proc_name
));
3811 strlcpy(&inp
->inp_e_proc_name
[0], &head_inp
->inp_e_proc_name
[0], sizeof(inp
->inp_e_proc_name
));