2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1982, 1986, 1991, 1993, 1995
30 * The Regents of the University of California. All rights reserved.
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95
61 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/malloc.h>
68 #include <sys/domain.h>
69 #include <sys/protosw.h>
70 #include <sys/socket.h>
71 #include <sys/socketvar.h>
73 #include <sys/kernel.h>
74 #include <sys/sysctl.h>
75 #include <sys/mcache.h>
76 #include <sys/kauth.h>
78 #include <sys/proc_uuid_policy.h>
79 #include <sys/syslog.h>
82 #include <libkern/OSAtomic.h>
83 #include <kern/locks.h>
85 #include <machine/limits.h>
87 #include <kern/zalloc.h>
90 #include <net/if_types.h>
91 #include <net/route.h>
92 #include <net/flowhash.h>
93 #include <net/flowadv.h>
94 #include <net/ntstat.h>
96 #include <netinet/in.h>
97 #include <netinet/in_pcb.h>
98 #include <netinet/in_var.h>
99 #include <netinet/ip_var.h>
101 #include <netinet/ip6.h>
102 #include <netinet6/ip6_var.h>
105 #include <sys/kdebug.h>
106 #include <sys/random.h>
108 #include <dev/random/randomdev.h>
109 #include <mach/boolean.h>
112 #include <net/necp.h>
115 static lck_grp_t
*inpcb_lock_grp
;
116 static lck_attr_t
*inpcb_lock_attr
;
117 static lck_grp_attr_t
*inpcb_lock_grp_attr
;
118 decl_lck_mtx_data(static, inpcb_lock
); /* global INPCB lock */
119 decl_lck_mtx_data(static, inpcb_timeout_lock
);
121 static TAILQ_HEAD(, inpcbinfo
) inpcb_head
= TAILQ_HEAD_INITIALIZER(inpcb_head
);
123 static u_int16_t inpcb_timeout_run
= 0; /* INPCB timer is scheduled to run */
124 static boolean_t inpcb_garbage_collecting
= FALSE
; /* gc timer is scheduled */
125 static boolean_t inpcb_ticking
= FALSE
; /* "slow" timer is scheduled */
126 static boolean_t inpcb_fast_timer_on
= FALSE
;
129 * If the total number of gc reqs is above a threshold, schedule
130 * garbage collect timer sooner
132 static boolean_t inpcb_toomany_gcreq
= FALSE
;
134 #define INPCB_GCREQ_THRESHOLD 50000
135 #define INPCB_TOOMANY_GCREQ_TIMER (hz/10) /* 10 times a second */
137 static void inpcb_sched_timeout(struct timeval
*);
138 static void inpcb_timeout(void *);
139 int inpcb_timeout_lazy
= 10; /* 10 seconds leeway for lazy timers */
140 extern int tvtohz(struct timeval
*);
142 #if CONFIG_PROC_UUID_POLICY
143 static void inp_update_cellular_policy(struct inpcb
*, boolean_t
);
145 static void inp_update_necp_want_app_policy(struct inpcb
*, boolean_t
);
147 #endif /* !CONFIG_PROC_UUID_POLICY */
149 #define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8))
150 #define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1))
153 * These configure the range of local port addresses assigned to
154 * "unspecified" outgoing connections/packets/whatever.
156 int ipport_lowfirstauto
= IPPORT_RESERVED
- 1; /* 1023 */
157 int ipport_lowlastauto
= IPPORT_RESERVEDSTART
; /* 600 */
158 int ipport_firstauto
= IPPORT_HIFIRSTAUTO
; /* 49152 */
159 int ipport_lastauto
= IPPORT_HILASTAUTO
; /* 65535 */
160 int ipport_hifirstauto
= IPPORT_HIFIRSTAUTO
; /* 49152 */
161 int ipport_hilastauto
= IPPORT_HILASTAUTO
; /* 65535 */
163 #define RANGECHK(var, min, max) \
164 if ((var) < (min)) { (var) = (min); } \
165 else if ((var) > (max)) { (var) = (max); }
168 sysctl_net_ipport_check SYSCTL_HANDLER_ARGS
170 #pragma unused(arg1, arg2)
173 error
= sysctl_handle_int(oidp
, oidp
->oid_arg1
, oidp
->oid_arg2
, req
);
175 RANGECHK(ipport_lowfirstauto
, 1, IPPORT_RESERVED
- 1);
176 RANGECHK(ipport_lowlastauto
, 1, IPPORT_RESERVED
- 1);
177 RANGECHK(ipport_firstauto
, IPPORT_RESERVED
, USHRT_MAX
);
178 RANGECHK(ipport_lastauto
, IPPORT_RESERVED
, USHRT_MAX
);
179 RANGECHK(ipport_hifirstauto
, IPPORT_RESERVED
, USHRT_MAX
);
180 RANGECHK(ipport_hilastauto
, IPPORT_RESERVED
, USHRT_MAX
);
187 SYSCTL_NODE(_net_inet_ip
, IPPROTO_IP
, portrange
,
188 CTLFLAG_RW
|CTLFLAG_LOCKED
, 0, "IP Ports");
190 SYSCTL_PROC(_net_inet_ip_portrange
, OID_AUTO
, lowfirst
,
191 CTLTYPE_INT
|CTLFLAG_RW
| CTLFLAG_LOCKED
,
192 &ipport_lowfirstauto
, 0, &sysctl_net_ipport_check
, "I", "");
193 SYSCTL_PROC(_net_inet_ip_portrange
, OID_AUTO
, lowlast
,
194 CTLTYPE_INT
|CTLFLAG_RW
| CTLFLAG_LOCKED
,
195 &ipport_lowlastauto
, 0, &sysctl_net_ipport_check
, "I", "");
196 SYSCTL_PROC(_net_inet_ip_portrange
, OID_AUTO
, first
,
197 CTLTYPE_INT
|CTLFLAG_RW
| CTLFLAG_LOCKED
,
198 &ipport_firstauto
, 0, &sysctl_net_ipport_check
, "I", "");
199 SYSCTL_PROC(_net_inet_ip_portrange
, OID_AUTO
, last
,
200 CTLTYPE_INT
|CTLFLAG_RW
| CTLFLAG_LOCKED
,
201 &ipport_lastauto
, 0, &sysctl_net_ipport_check
, "I", "");
202 SYSCTL_PROC(_net_inet_ip_portrange
, OID_AUTO
, hifirst
,
203 CTLTYPE_INT
|CTLFLAG_RW
| CTLFLAG_LOCKED
,
204 &ipport_hifirstauto
, 0, &sysctl_net_ipport_check
, "I", "");
205 SYSCTL_PROC(_net_inet_ip_portrange
, OID_AUTO
, hilast
,
206 CTLTYPE_INT
|CTLFLAG_RW
| CTLFLAG_LOCKED
,
207 &ipport_hilastauto
, 0, &sysctl_net_ipport_check
, "I", "");
209 extern int udp_use_randomport
;
210 extern int tcp_use_randomport
;
212 /* Structs used for flowhash computation */
213 struct inp_flowhash_key_addr
{
223 struct inp_flowhash_key
{
224 struct inp_flowhash_key_addr infh_laddr
;
225 struct inp_flowhash_key_addr infh_faddr
;
226 u_int32_t infh_lport
;
227 u_int32_t infh_fport
;
229 u_int32_t infh_proto
;
230 u_int32_t infh_rand1
;
231 u_int32_t infh_rand2
;
234 static u_int32_t inp_hash_seed
= 0;
236 static int infc_cmp(const struct inpcb
*, const struct inpcb
*);
238 /* Flags used by inp_fc_getinp */
239 #define INPFC_SOLOCKED 0x1
240 #define INPFC_REMOVE 0x2
241 static struct inpcb
*inp_fc_getinp(u_int32_t
, u_int32_t
);
243 static void inp_fc_feedback(struct inpcb
*);
244 extern void tcp_remove_from_time_wait(struct inpcb
*inp
);
246 decl_lck_mtx_data(static, inp_fc_lck
);
248 RB_HEAD(inp_fc_tree
, inpcb
) inp_fc_tree
;
249 RB_PROTOTYPE(inp_fc_tree
, inpcb
, infc_link
, infc_cmp
);
250 RB_GENERATE(inp_fc_tree
, inpcb
, infc_link
, infc_cmp
);
253 * Use this inp as a key to find an inp in the flowhash tree.
254 * Accesses to it are protected by inp_fc_lck.
256 struct inpcb key_inp
;
259 * in_pcb.c: manage the Protocol Control Blocks.
265 static int inpcb_initialized
= 0;
267 VERIFY(!inpcb_initialized
);
268 inpcb_initialized
= 1;
270 inpcb_lock_grp_attr
= lck_grp_attr_alloc_init();
271 inpcb_lock_grp
= lck_grp_alloc_init("inpcb", inpcb_lock_grp_attr
);
272 inpcb_lock_attr
= lck_attr_alloc_init();
273 lck_mtx_init(&inpcb_lock
, inpcb_lock_grp
, inpcb_lock_attr
);
274 lck_mtx_init(&inpcb_timeout_lock
, inpcb_lock_grp
, inpcb_lock_attr
);
277 * Initialize data structures required to deliver
280 lck_mtx_init(&inp_fc_lck
, inpcb_lock_grp
, inpcb_lock_attr
);
281 lck_mtx_lock(&inp_fc_lck
);
282 RB_INIT(&inp_fc_tree
);
283 bzero(&key_inp
, sizeof(key_inp
));
284 lck_mtx_unlock(&inp_fc_lck
);
287 #define INPCB_HAVE_TIMER_REQ(req) (((req).intimer_lazy > 0) || \
288 ((req).intimer_fast > 0) || ((req).intimer_nodelay > 0))
290 inpcb_timeout(void *arg
)
293 struct inpcbinfo
*ipi
;
295 struct intimercount gccnt
, tmcnt
;
296 struct timeval leeway
;
297 boolean_t toomany_gc
= FALSE
;
300 VERIFY(arg
== &inpcb_toomany_gcreq
);
301 toomany_gc
= *(boolean_t
*)arg
;
305 * Update coarse-grained networking timestamp (in sec.); the idea
306 * is to piggy-back on the timeout callout to update the counter
307 * returnable via net_uptime().
311 bzero(&gccnt
, sizeof(gccnt
));
312 bzero(&tmcnt
, sizeof(tmcnt
));
314 lck_mtx_lock_spin(&inpcb_timeout_lock
);
315 gc
= inpcb_garbage_collecting
;
316 inpcb_garbage_collecting
= FALSE
;
319 inpcb_ticking
= FALSE
;
322 lck_mtx_unlock(&inpcb_timeout_lock
);
324 lck_mtx_lock(&inpcb_lock
);
325 TAILQ_FOREACH(ipi
, &inpcb_head
, ipi_entry
) {
326 if (INPCB_HAVE_TIMER_REQ(ipi
->ipi_gc_req
)) {
327 bzero(&ipi
->ipi_gc_req
,
328 sizeof(ipi
->ipi_gc_req
));
329 if (gc
&& ipi
->ipi_gc
!= NULL
) {
331 gccnt
.intimer_lazy
+=
332 ipi
->ipi_gc_req
.intimer_lazy
;
333 gccnt
.intimer_fast
+=
334 ipi
->ipi_gc_req
.intimer_fast
;
335 gccnt
.intimer_nodelay
+=
336 ipi
->ipi_gc_req
.intimer_nodelay
;
339 if (INPCB_HAVE_TIMER_REQ(ipi
->ipi_timer_req
)) {
340 bzero(&ipi
->ipi_timer_req
,
341 sizeof(ipi
->ipi_timer_req
));
342 if (t
&& ipi
->ipi_timer
!= NULL
) {
344 tmcnt
.intimer_lazy
+=
345 ipi
->ipi_timer_req
.intimer_lazy
;
346 tmcnt
.intimer_lazy
+=
347 ipi
->ipi_timer_req
.intimer_fast
;
348 tmcnt
.intimer_nodelay
+=
349 ipi
->ipi_timer_req
.intimer_nodelay
;
353 lck_mtx_unlock(&inpcb_lock
);
354 lck_mtx_lock_spin(&inpcb_timeout_lock
);
357 /* lock was dropped above, so check first before overriding */
358 if (!inpcb_garbage_collecting
)
359 inpcb_garbage_collecting
= INPCB_HAVE_TIMER_REQ(gccnt
);
361 inpcb_ticking
= INPCB_HAVE_TIMER_REQ(tmcnt
);
363 /* re-arm the timer if there's work to do */
365 inpcb_toomany_gcreq
= FALSE
;
368 VERIFY(inpcb_timeout_run
>= 0 && inpcb_timeout_run
< 2);
371 bzero(&leeway
, sizeof(leeway
));
372 leeway
.tv_sec
= inpcb_timeout_lazy
;
373 if (gccnt
.intimer_nodelay
> 0 || tmcnt
.intimer_nodelay
> 0)
374 inpcb_sched_timeout(NULL
);
375 else if ((gccnt
.intimer_fast
+ tmcnt
.intimer_fast
) <= 5)
376 /* be lazy when idle with little activity */
377 inpcb_sched_timeout(&leeway
);
379 inpcb_sched_timeout(NULL
);
381 lck_mtx_unlock(&inpcb_timeout_lock
);
385 inpcb_sched_timeout(struct timeval
*leeway
)
387 lck_mtx_assert(&inpcb_timeout_lock
, LCK_MTX_ASSERT_OWNED
);
389 if (inpcb_timeout_run
== 0 &&
390 (inpcb_garbage_collecting
|| inpcb_ticking
)) {
391 lck_mtx_convert_spin(&inpcb_timeout_lock
);
393 if (leeway
== NULL
) {
394 inpcb_fast_timer_on
= TRUE
;
395 timeout(inpcb_timeout
, NULL
, hz
);
397 inpcb_fast_timer_on
= FALSE
;
398 timeout_with_leeway(inpcb_timeout
, NULL
, hz
,
401 } else if (inpcb_timeout_run
== 1 &&
402 leeway
== NULL
&& !inpcb_fast_timer_on
) {
404 * Since the request was for a fast timer but the
405 * scheduled timer is a lazy timer, try to schedule
406 * another instance of fast timer also
408 lck_mtx_convert_spin(&inpcb_timeout_lock
);
410 inpcb_fast_timer_on
= TRUE
;
411 timeout(inpcb_timeout
, NULL
, hz
);
416 inpcb_gc_sched(struct inpcbinfo
*ipi
, u_int32_t type
)
418 struct timeval leeway
;
420 lck_mtx_lock_spin(&inpcb_timeout_lock
);
421 inpcb_garbage_collecting
= TRUE
;
423 gccnt
= ipi
->ipi_gc_req
.intimer_nodelay
+
424 ipi
->ipi_gc_req
.intimer_fast
;
426 if (gccnt
> INPCB_GCREQ_THRESHOLD
&& !inpcb_toomany_gcreq
) {
427 inpcb_toomany_gcreq
= TRUE
;
430 * There are toomany pcbs waiting to be garbage collected,
431 * schedule a much faster timeout in addition to
432 * the caller's request
434 lck_mtx_convert_spin(&inpcb_timeout_lock
);
435 timeout(inpcb_timeout
, (void *)&inpcb_toomany_gcreq
,
436 INPCB_TOOMANY_GCREQ_TIMER
);
440 case INPCB_TIMER_NODELAY
:
441 atomic_add_32(&ipi
->ipi_gc_req
.intimer_nodelay
, 1);
442 inpcb_sched_timeout(NULL
);
444 case INPCB_TIMER_FAST
:
445 atomic_add_32(&ipi
->ipi_gc_req
.intimer_fast
, 1);
446 inpcb_sched_timeout(NULL
);
449 atomic_add_32(&ipi
->ipi_gc_req
.intimer_lazy
, 1);
450 leeway
.tv_sec
= inpcb_timeout_lazy
;
452 inpcb_sched_timeout(&leeway
);
455 lck_mtx_unlock(&inpcb_timeout_lock
);
459 inpcb_timer_sched(struct inpcbinfo
*ipi
, u_int32_t type
)
461 struct timeval leeway
;
462 lck_mtx_lock_spin(&inpcb_timeout_lock
);
463 inpcb_ticking
= TRUE
;
465 case INPCB_TIMER_NODELAY
:
466 atomic_add_32(&ipi
->ipi_timer_req
.intimer_nodelay
, 1);
467 inpcb_sched_timeout(NULL
);
469 case INPCB_TIMER_FAST
:
470 atomic_add_32(&ipi
->ipi_timer_req
.intimer_fast
, 1);
471 inpcb_sched_timeout(NULL
);
474 atomic_add_32(&ipi
->ipi_timer_req
.intimer_lazy
, 1);
475 leeway
.tv_sec
= inpcb_timeout_lazy
;
477 inpcb_sched_timeout(&leeway
);
480 lck_mtx_unlock(&inpcb_timeout_lock
);
484 in_pcbinfo_attach(struct inpcbinfo
*ipi
)
486 struct inpcbinfo
*ipi0
;
488 lck_mtx_lock(&inpcb_lock
);
489 TAILQ_FOREACH(ipi0
, &inpcb_head
, ipi_entry
) {
491 panic("%s: ipi %p already in the list\n",
496 TAILQ_INSERT_TAIL(&inpcb_head
, ipi
, ipi_entry
);
497 lck_mtx_unlock(&inpcb_lock
);
501 in_pcbinfo_detach(struct inpcbinfo
*ipi
)
503 struct inpcbinfo
*ipi0
;
506 lck_mtx_lock(&inpcb_lock
);
507 TAILQ_FOREACH(ipi0
, &inpcb_head
, ipi_entry
) {
512 TAILQ_REMOVE(&inpcb_head
, ipi0
, ipi_entry
);
515 lck_mtx_unlock(&inpcb_lock
);
521 * Allocate a PCB and associate it with the socket.
528 in_pcballoc(struct socket
*so
, struct inpcbinfo
*pcbinfo
, struct proc
*p
)
535 #endif /* CONFIG_MACF_NET */
537 if ((so
->so_flags1
& SOF1_CACHED_IN_SOCK_LAYER
) == 0) {
538 inp
= (struct inpcb
*)zalloc(pcbinfo
->ipi_zone
);
541 bzero((caddr_t
)inp
, sizeof (*inp
));
543 inp
= (struct inpcb
*)(void *)so
->so_saved_pcb
;
544 temp
= inp
->inp_saved_ppcb
;
545 bzero((caddr_t
)inp
, sizeof (*inp
));
546 inp
->inp_saved_ppcb
= temp
;
549 inp
->inp_gencnt
= ++pcbinfo
->ipi_gencnt
;
550 inp
->inp_pcbinfo
= pcbinfo
;
551 inp
->inp_socket
= so
;
553 mac_error
= mac_inpcb_label_init(inp
, M_WAITOK
);
554 if (mac_error
!= 0) {
555 if ((so
->so_flags1
& SOF1_CACHED_IN_SOCK_LAYER
) == 0)
556 zfree(pcbinfo
->ipi_zone
, inp
);
559 mac_inpcb_label_associate(so
, inp
);
560 #endif /* CONFIG_MACF_NET */
561 /* make sure inp_stat is always 64-bit aligned */
562 inp
->inp_stat
= (struct inp_stat
*)P2ROUNDUP(inp
->inp_stat_store
,
564 if (((uintptr_t)inp
->inp_stat
- (uintptr_t)inp
->inp_stat_store
) +
565 sizeof (*inp
->inp_stat
) > sizeof (inp
->inp_stat_store
)) {
566 panic("%s: insufficient space to align inp_stat", __func__
);
570 /* make sure inp_cstat is always 64-bit aligned */
571 inp
->inp_cstat
= (struct inp_stat
*)P2ROUNDUP(inp
->inp_cstat_store
,
573 if (((uintptr_t)inp
->inp_cstat
- (uintptr_t)inp
->inp_cstat_store
) +
574 sizeof (*inp
->inp_cstat
) > sizeof (inp
->inp_cstat_store
)) {
575 panic("%s: insufficient space to align inp_cstat", __func__
);
579 /* make sure inp_wstat is always 64-bit aligned */
580 inp
->inp_wstat
= (struct inp_stat
*)P2ROUNDUP(inp
->inp_wstat_store
,
582 if (((uintptr_t)inp
->inp_wstat
- (uintptr_t)inp
->inp_wstat_store
) +
583 sizeof (*inp
->inp_wstat
) > sizeof (inp
->inp_wstat_store
)) {
584 panic("%s: insufficient space to align inp_wstat", __func__
);
588 /* make sure inp_Wstat is always 64-bit aligned */
589 inp
->inp_Wstat
= (struct inp_stat
*)P2ROUNDUP(inp
->inp_Wstat_store
,
591 if (((uintptr_t)inp
->inp_Wstat
- (uintptr_t)inp
->inp_Wstat_store
) +
592 sizeof (*inp
->inp_Wstat
) > sizeof (inp
->inp_Wstat_store
)) {
593 panic("%s: insufficient space to align inp_Wstat", __func__
);
597 so
->so_pcb
= (caddr_t
)inp
;
599 if (so
->so_proto
->pr_flags
& PR_PCBLOCK
) {
600 lck_mtx_init(&inp
->inpcb_mtx
, pcbinfo
->ipi_lock_grp
,
601 pcbinfo
->ipi_lock_attr
);
605 if (SOCK_DOM(so
) == PF_INET6
&& !ip6_mapped_addr_on
)
606 inp
->inp_flags
|= IN6P_IPV6_V6ONLY
;
608 if (ip6_auto_flowlabel
)
609 inp
->inp_flags
|= IN6P_AUTOFLOWLABEL
;
612 (void) inp_update_policy(inp
);
614 lck_rw_lock_exclusive(pcbinfo
->ipi_lock
);
615 inp
->inp_gencnt
= ++pcbinfo
->ipi_gencnt
;
616 LIST_INSERT_HEAD(pcbinfo
->ipi_listhead
, inp
, inp_list
);
617 pcbinfo
->ipi_count
++;
618 lck_rw_done(pcbinfo
->ipi_lock
);
623 * in_pcblookup_local_and_cleanup does everything
624 * in_pcblookup_local does but it checks for a socket
625 * that's going away. Since we know that the lock is
626 * held read+write when this funciton is called, we
627 * can safely dispose of this socket like the slow
628 * timer would usually do and return NULL. This is
632 in_pcblookup_local_and_cleanup(struct inpcbinfo
*pcbinfo
, struct in_addr laddr
,
633 u_int lport_arg
, int wild_okay
)
637 /* Perform normal lookup */
638 inp
= in_pcblookup_local(pcbinfo
, laddr
, lport_arg
, wild_okay
);
640 /* Check if we found a match but it's waiting to be disposed */
641 if (inp
!= NULL
&& inp
->inp_wantcnt
== WNT_STOPUSING
) {
642 struct socket
*so
= inp
->inp_socket
;
644 lck_mtx_lock(&inp
->inpcb_mtx
);
646 if (so
->so_usecount
== 0) {
647 if (inp
->inp_state
!= INPCB_STATE_DEAD
)
649 in_pcbdispose(inp
); /* will unlock & destroy */
652 lck_mtx_unlock(&inp
->inpcb_mtx
);
660 in_pcb_conflict_post_msg(u_int16_t port
)
663 * Radar 5523020 send a kernel event notification if a
664 * non-participating socket tries to bind the port a socket
665 * who has set SOF_NOTIFYCONFLICT owns.
667 struct kev_msg ev_msg
;
668 struct kev_in_portinuse in_portinuse
;
670 bzero(&in_portinuse
, sizeof (struct kev_in_portinuse
));
671 bzero(&ev_msg
, sizeof (struct kev_msg
));
672 in_portinuse
.port
= ntohs(port
); /* port in host order */
673 in_portinuse
.req_pid
= proc_selfpid();
674 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
675 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
676 ev_msg
.kev_subclass
= KEV_INET_SUBCLASS
;
677 ev_msg
.event_code
= KEV_INET_PORTINUSE
;
678 ev_msg
.dv
[0].data_ptr
= &in_portinuse
;
679 ev_msg
.dv
[0].data_length
= sizeof (struct kev_in_portinuse
);
680 ev_msg
.dv
[1].data_length
= 0;
681 kev_post_msg(&ev_msg
);
685 * Bind an INPCB to an address and/or port. This routine should not alter
686 * the caller-supplied local address "nam".
689 * EADDRNOTAVAIL Address not available.
690 * EINVAL Invalid argument
691 * EAFNOSUPPORT Address family not supported [notdef]
692 * EACCES Permission denied
693 * EADDRINUSE Address in use
694 * EAGAIN Resource unavailable, try again
695 * priv_check_cred:EPERM Operation not permitted
698 in_pcbbind(struct inpcb
*inp
, struct sockaddr
*nam
, struct proc
*p
)
700 struct socket
*so
= inp
->inp_socket
;
701 unsigned short *lastport
;
702 struct inpcbinfo
*pcbinfo
= inp
->inp_pcbinfo
;
703 u_short lport
= 0, rand_port
= 0;
704 int wild
= 0, reuseport
= (so
->so_options
& SO_REUSEPORT
);
705 int error
, randomport
, conflict
= 0;
706 boolean_t anonport
= FALSE
;
708 struct in_addr laddr
;
709 struct ifnet
*outif
= NULL
;
711 if (TAILQ_EMPTY(&in_ifaddrhead
)) /* XXX broken! */
712 return (EADDRNOTAVAIL
);
713 if (inp
->inp_lport
!= 0 || inp
->inp_laddr
.s_addr
!= INADDR_ANY
)
715 if (!(so
->so_options
& (SO_REUSEADDR
|SO_REUSEPORT
)))
718 bzero(&laddr
, sizeof(laddr
));
720 socket_unlock(so
, 0); /* keep reference on socket */
721 lck_rw_lock_exclusive(pcbinfo
->ipi_lock
);
725 if (nam
->sa_len
!= sizeof (struct sockaddr_in
)) {
726 lck_rw_done(pcbinfo
->ipi_lock
);
732 * We should check the family, but old programs
733 * incorrectly fail to initialize it.
735 if (nam
->sa_family
!= AF_INET
) {
736 lck_rw_done(pcbinfo
->ipi_lock
);
738 return (EAFNOSUPPORT
);
741 lport
= SIN(nam
)->sin_port
;
743 if (IN_MULTICAST(ntohl(SIN(nam
)->sin_addr
.s_addr
))) {
745 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
746 * allow complete duplication of binding if
747 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
748 * and a multicast address is bound on both
749 * new and duplicated sockets.
751 if (so
->so_options
& SO_REUSEADDR
)
752 reuseport
= SO_REUSEADDR
|SO_REUSEPORT
;
753 } else if (SIN(nam
)->sin_addr
.s_addr
!= INADDR_ANY
) {
754 struct sockaddr_in sin
;
757 /* Sanitized for interface address searches */
758 bzero(&sin
, sizeof (sin
));
759 sin
.sin_family
= AF_INET
;
760 sin
.sin_len
= sizeof (struct sockaddr_in
);
761 sin
.sin_addr
.s_addr
= SIN(nam
)->sin_addr
.s_addr
;
763 ifa
= ifa_ifwithaddr(SA(&sin
));
765 lck_rw_done(pcbinfo
->ipi_lock
);
767 return (EADDRNOTAVAIL
);
770 * Opportunistically determine the outbound
771 * interface that may be used; this may not
772 * hold true if we end up using a route
773 * going over a different interface, e.g.
774 * when sending to a local address. This
775 * will get updated again after sending.
778 outif
= ifa
->ifa_ifp
;
787 if (ntohs(lport
) < IPPORT_RESERVED
) {
788 cred
= kauth_cred_proc_ref(p
);
789 error
= priv_check_cred(cred
,
790 PRIV_NETINET_RESERVEDPORT
, 0);
791 kauth_cred_unref(&cred
);
793 lck_rw_done(pcbinfo
->ipi_lock
);
798 if (!IN_MULTICAST(ntohl(SIN(nam
)->sin_addr
.s_addr
)) &&
799 (u
= kauth_cred_getuid(so
->so_cred
)) != 0 &&
800 (t
= in_pcblookup_local_and_cleanup(
801 inp
->inp_pcbinfo
, SIN(nam
)->sin_addr
, lport
,
802 INPLOOKUP_WILDCARD
)) != NULL
&&
803 (SIN(nam
)->sin_addr
.s_addr
!= INADDR_ANY
||
804 t
->inp_laddr
.s_addr
!= INADDR_ANY
||
805 !(t
->inp_socket
->so_options
& SO_REUSEPORT
)) &&
806 (u
!= kauth_cred_getuid(t
->inp_socket
->so_cred
)) &&
807 !(t
->inp_socket
->so_flags
& SOF_REUSESHAREUID
) &&
808 (SIN(nam
)->sin_addr
.s_addr
!= INADDR_ANY
||
809 t
->inp_laddr
.s_addr
!= INADDR_ANY
)) {
810 if ((t
->inp_socket
->so_flags
&
811 SOF_NOTIFYCONFLICT
) &&
812 !(so
->so_flags
& SOF_NOTIFYCONFLICT
))
815 lck_rw_done(pcbinfo
->ipi_lock
);
818 in_pcb_conflict_post_msg(lport
);
823 t
= in_pcblookup_local_and_cleanup(pcbinfo
,
824 SIN(nam
)->sin_addr
, lport
, wild
);
826 (reuseport
& t
->inp_socket
->so_options
) == 0) {
828 if (SIN(nam
)->sin_addr
.s_addr
!= INADDR_ANY
||
829 t
->inp_laddr
.s_addr
!= INADDR_ANY
||
830 SOCK_DOM(so
) != PF_INET6
||
831 SOCK_DOM(t
->inp_socket
) != PF_INET6
)
835 if ((t
->inp_socket
->so_flags
&
836 SOF_NOTIFYCONFLICT
) &&
837 !(so
->so_flags
& SOF_NOTIFYCONFLICT
))
840 lck_rw_done(pcbinfo
->ipi_lock
);
843 in_pcb_conflict_post_msg(lport
);
849 laddr
= SIN(nam
)->sin_addr
;
855 randomport
= (so
->so_flags
& SOF_BINDRANDOMPORT
) ||
856 (so
->so_type
== SOCK_STREAM
? tcp_use_randomport
:
860 * Even though this looks similar to the code in
861 * in6_pcbsetport, the v6 vs v4 checks are different.
864 if (inp
->inp_flags
& INP_HIGHPORT
) {
865 first
= ipport_hifirstauto
; /* sysctl */
866 last
= ipport_hilastauto
;
867 lastport
= &pcbinfo
->ipi_lasthi
;
868 } else if (inp
->inp_flags
& INP_LOWPORT
) {
869 cred
= kauth_cred_proc_ref(p
);
870 error
= priv_check_cred(cred
,
871 PRIV_NETINET_RESERVEDPORT
, 0);
872 kauth_cred_unref(&cred
);
874 lck_rw_done(pcbinfo
->ipi_lock
);
878 first
= ipport_lowfirstauto
; /* 1023 */
879 last
= ipport_lowlastauto
; /* 600 */
880 lastport
= &pcbinfo
->ipi_lastlow
;
882 first
= ipport_firstauto
; /* sysctl */
883 last
= ipport_lastauto
;
884 lastport
= &pcbinfo
->ipi_lastport
;
886 /* No point in randomizing if only one port is available */
891 * Simple check to ensure all ports are not used up causing
894 * We split the two cases (up and down) so that the direction
895 * is not being tested on each round of the loop.
902 read_random(&rand_port
, sizeof (rand_port
));
904 first
- (rand_port
% (first
- last
));
906 count
= first
- last
;
909 if (count
-- < 0) { /* completely used? */
910 lck_rw_done(pcbinfo
->ipi_lock
);
912 return (EADDRNOTAVAIL
);
915 if (*lastport
> first
|| *lastport
< last
)
917 lport
= htons(*lastport
);
918 } while (in_pcblookup_local_and_cleanup(pcbinfo
,
919 ((laddr
.s_addr
!= INADDR_ANY
) ? laddr
:
920 inp
->inp_laddr
), lport
, wild
));
926 read_random(&rand_port
, sizeof (rand_port
));
928 first
+ (rand_port
% (first
- last
));
930 count
= last
- first
;
933 if (count
-- < 0) { /* completely used? */
934 lck_rw_done(pcbinfo
->ipi_lock
);
936 return (EADDRNOTAVAIL
);
939 if (*lastport
< first
|| *lastport
> last
)
941 lport
= htons(*lastport
);
942 } while (in_pcblookup_local_and_cleanup(pcbinfo
,
943 ((laddr
.s_addr
!= INADDR_ANY
) ? laddr
:
944 inp
->inp_laddr
), lport
, wild
));
950 * We unlocked socket's protocol lock for a long time.
951 * The socket might have been dropped/defuncted.
952 * Checking if world has changed since.
954 if (inp
->inp_state
== INPCB_STATE_DEAD
) {
955 lck_rw_done(pcbinfo
->ipi_lock
);
956 return (ECONNABORTED
);
959 if (inp
->inp_lport
!= 0 || inp
->inp_laddr
.s_addr
!= INADDR_ANY
) {
960 lck_rw_done(pcbinfo
->ipi_lock
);
964 if (laddr
.s_addr
!= INADDR_ANY
) {
965 inp
->inp_laddr
= laddr
;
966 inp
->inp_last_outifp
= outif
;
968 inp
->inp_lport
= lport
;
970 inp
->inp_flags
|= INP_ANONPORT
;
972 if (in_pcbinshash(inp
, 1) != 0) {
973 inp
->inp_laddr
.s_addr
= INADDR_ANY
;
974 inp
->inp_last_outifp
= NULL
;
978 inp
->inp_flags
&= ~INP_ANONPORT
;
979 lck_rw_done(pcbinfo
->ipi_lock
);
982 lck_rw_done(pcbinfo
->ipi_lock
);
983 sflt_notify(so
, sock_evt_bound
, NULL
);
988 * Transform old in_pcbconnect() into an inner subroutine for new
989 * in_pcbconnect(); do some validity-checking on the remote address
990 * (in "nam") and then determine local host address (i.e., which
991 * interface) to use to access that remote host.
993 * This routine may alter the caller-supplied remote address "nam".
995 * The caller may override the bound-to-interface setting of the socket
996 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
998 * This routine might return an ifp with a reference held if the caller
999 * provides a non-NULL outif, even in the error case. The caller is
1000 * responsible for releasing its reference.
1002 * Returns: 0 Success
1003 * EINVAL Invalid argument
1004 * EAFNOSUPPORT Address family not supported
1005 * EADDRNOTAVAIL Address not available
1008 in_pcbladdr(struct inpcb
*inp
, struct sockaddr
*nam
, struct in_addr
*laddr
,
1009 unsigned int ifscope
, struct ifnet
**outif
)
1011 struct route
*ro
= &inp
->inp_route
;
1012 struct in_ifaddr
*ia
= NULL
;
1013 struct sockaddr_in sin
;
1015 boolean_t restricted
= FALSE
;
1019 if (nam
->sa_len
!= sizeof (struct sockaddr_in
))
1021 if (SIN(nam
)->sin_family
!= AF_INET
)
1022 return (EAFNOSUPPORT
);
1023 if (SIN(nam
)->sin_port
== 0)
1024 return (EADDRNOTAVAIL
);
1027 * If the destination address is INADDR_ANY,
1028 * use the primary local address.
1029 * If the supplied address is INADDR_BROADCAST,
1030 * and the primary interface supports broadcast,
1031 * choose the broadcast address for that interface.
1033 if (SIN(nam
)->sin_addr
.s_addr
== INADDR_ANY
||
1034 SIN(nam
)->sin_addr
.s_addr
== (u_int32_t
)INADDR_BROADCAST
) {
1035 lck_rw_lock_shared(in_ifaddr_rwlock
);
1036 if (!TAILQ_EMPTY(&in_ifaddrhead
)) {
1037 ia
= TAILQ_FIRST(&in_ifaddrhead
);
1038 IFA_LOCK_SPIN(&ia
->ia_ifa
);
1039 if (SIN(nam
)->sin_addr
.s_addr
== INADDR_ANY
) {
1040 SIN(nam
)->sin_addr
= IA_SIN(ia
)->sin_addr
;
1041 } else if (ia
->ia_ifp
->if_flags
& IFF_BROADCAST
) {
1042 SIN(nam
)->sin_addr
=
1043 SIN(&ia
->ia_broadaddr
)->sin_addr
;
1045 IFA_UNLOCK(&ia
->ia_ifa
);
1048 lck_rw_done(in_ifaddr_rwlock
);
1051 * Otherwise, if the socket has already bound the source, just use it.
1053 if (inp
->inp_laddr
.s_addr
!= INADDR_ANY
) {
1055 *laddr
= inp
->inp_laddr
;
1060 * If the ifscope is specified by the caller (e.g. IP_PKTINFO)
1061 * then it overrides the sticky ifscope set for the socket.
1063 if (ifscope
== IFSCOPE_NONE
&& (inp
->inp_flags
& INP_BOUND_IF
))
1064 ifscope
= inp
->inp_boundifp
->if_index
;
1067 * If route is known or can be allocated now,
1068 * our src addr is taken from the i/f, else punt.
1069 * Note that we should check the address family of the cached
1070 * destination, in case of sharing the cache with IPv6.
1072 if (ro
->ro_rt
!= NULL
)
1073 RT_LOCK_SPIN(ro
->ro_rt
);
1074 if (ROUTE_UNUSABLE(ro
) || ro
->ro_dst
.sa_family
!= AF_INET
||
1075 SIN(&ro
->ro_dst
)->sin_addr
.s_addr
!= SIN(nam
)->sin_addr
.s_addr
||
1076 (inp
->inp_socket
->so_options
& SO_DONTROUTE
)) {
1077 if (ro
->ro_rt
!= NULL
)
1078 RT_UNLOCK(ro
->ro_rt
);
1081 if (!(inp
->inp_socket
->so_options
& SO_DONTROUTE
) &&
1082 (ro
->ro_rt
== NULL
|| ro
->ro_rt
->rt_ifp
== NULL
)) {
1083 if (ro
->ro_rt
!= NULL
)
1084 RT_UNLOCK(ro
->ro_rt
);
1086 /* No route yet, so try to acquire one */
1087 bzero(&ro
->ro_dst
, sizeof (struct sockaddr_in
));
1088 ro
->ro_dst
.sa_family
= AF_INET
;
1089 ro
->ro_dst
.sa_len
= sizeof (struct sockaddr_in
);
1090 SIN(&ro
->ro_dst
)->sin_addr
= SIN(nam
)->sin_addr
;
1091 rtalloc_scoped(ro
, ifscope
);
1092 if (ro
->ro_rt
!= NULL
)
1093 RT_LOCK_SPIN(ro
->ro_rt
);
1095 /* Sanitized local copy for interface address searches */
1096 bzero(&sin
, sizeof (sin
));
1097 sin
.sin_family
= AF_INET
;
1098 sin
.sin_len
= sizeof (struct sockaddr_in
);
1099 sin
.sin_addr
.s_addr
= SIN(nam
)->sin_addr
.s_addr
;
1101 * If we did not find (or use) a route, assume dest is reachable
1102 * on a directly connected network and try to find a corresponding
1103 * interface to take the source address from.
1105 if (ro
->ro_rt
== NULL
) {
1107 ia
= ifatoia(ifa_ifwithdstaddr(SA(&sin
)));
1109 ia
= ifatoia(ifa_ifwithnet_scoped(SA(&sin
), ifscope
));
1110 error
= ((ia
== NULL
) ? ENETUNREACH
: 0);
1113 RT_LOCK_ASSERT_HELD(ro
->ro_rt
);
1115 * If the outgoing interface on the route found is not
1116 * a loopback interface, use the address from that interface.
1118 if (!(ro
->ro_rt
->rt_ifp
->if_flags
& IFF_LOOPBACK
)) {
1121 * If the route points to a cellular interface and the
1122 * caller forbids our using interfaces of such type,
1123 * pretend that there is no route.
1124 * Apply the same logic for expensive interfaces.
1126 if (inp_restricted_send(inp
, ro
->ro_rt
->rt_ifp
)) {
1127 RT_UNLOCK(ro
->ro_rt
);
1129 error
= EHOSTUNREACH
;
1132 /* Become a regular mutex */
1133 RT_CONVERT_LOCK(ro
->ro_rt
);
1134 ia
= ifatoia(ro
->ro_rt
->rt_ifa
);
1135 IFA_ADDREF(&ia
->ia_ifa
);
1136 RT_UNLOCK(ro
->ro_rt
);
1141 VERIFY(ro
->ro_rt
->rt_ifp
->if_flags
& IFF_LOOPBACK
);
1142 RT_UNLOCK(ro
->ro_rt
);
1144 * The outgoing interface is marked with 'loopback net', so a route
1145 * to ourselves is here.
1146 * Try to find the interface of the destination address and then
1147 * take the address from there. That interface is not necessarily
1148 * a loopback interface.
1151 ia
= ifatoia(ifa_ifwithdstaddr(SA(&sin
)));
1153 ia
= ifatoia(ifa_ifwithaddr_scoped(SA(&sin
), ifscope
));
1155 ia
= ifatoia(ifa_ifwithnet_scoped(SA(&sin
), ifscope
));
1158 ia
= ifatoia(ro
->ro_rt
->rt_ifa
);
1160 IFA_ADDREF(&ia
->ia_ifa
);
1161 RT_UNLOCK(ro
->ro_rt
);
1163 error
= ((ia
== NULL
) ? ENETUNREACH
: 0);
1167 * If the destination address is multicast and an outgoing
1168 * interface has been set as a multicast option, use the
1169 * address of that interface as our source address.
1171 if (IN_MULTICAST(ntohl(SIN(nam
)->sin_addr
.s_addr
)) &&
1172 inp
->inp_moptions
!= NULL
) {
1173 struct ip_moptions
*imo
;
1176 imo
= inp
->inp_moptions
;
1178 if (imo
->imo_multicast_ifp
!= NULL
&& (ia
== NULL
||
1179 ia
->ia_ifp
!= imo
->imo_multicast_ifp
)) {
1180 ifp
= imo
->imo_multicast_ifp
;
1182 IFA_REMREF(&ia
->ia_ifa
);
1183 lck_rw_lock_shared(in_ifaddr_rwlock
);
1184 TAILQ_FOREACH(ia
, &in_ifaddrhead
, ia_link
) {
1185 if (ia
->ia_ifp
== ifp
)
1189 IFA_ADDREF(&ia
->ia_ifa
);
1190 lck_rw_done(in_ifaddr_rwlock
);
1192 error
= EADDRNOTAVAIL
;
1199 * Don't do pcblookup call here; return interface in laddr
1200 * and exit to caller, that will do the lookup.
1204 * If the source address belongs to a cellular interface
1205 * and the socket forbids our using interfaces of such
1206 * type, pretend that there is no source address.
1207 * Apply the same logic for expensive interfaces.
1209 IFA_LOCK_SPIN(&ia
->ia_ifa
);
1210 if (inp_restricted_send(inp
, ia
->ia_ifa
.ifa_ifp
)) {
1211 IFA_UNLOCK(&ia
->ia_ifa
);
1212 error
= EHOSTUNREACH
;
1214 } else if (error
== 0) {
1215 *laddr
= ia
->ia_addr
.sin_addr
;
1216 if (outif
!= NULL
) {
1219 if (ro
->ro_rt
!= NULL
)
1220 ifp
= ro
->ro_rt
->rt_ifp
;
1224 VERIFY(ifp
!= NULL
);
1225 IFA_CONVERT_LOCK(&ia
->ia_ifa
);
1226 ifnet_reference(ifp
); /* for caller */
1228 ifnet_release(*outif
);
1231 IFA_UNLOCK(&ia
->ia_ifa
);
1233 IFA_UNLOCK(&ia
->ia_ifa
);
1235 IFA_REMREF(&ia
->ia_ifa
);
1239 if (restricted
&& error
== EHOSTUNREACH
) {
1240 soevent(inp
->inp_socket
, (SO_FILT_HINT_LOCKED
|
1241 SO_FILT_HINT_IFDENIED
));
1249 * Connect from a socket to a specified address.
1250 * Both address and port must be specified in argument sin.
1251 * If don't have a local address for this socket yet,
1254 * The caller may override the bound-to-interface setting of the socket
1255 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
1258 in_pcbconnect(struct inpcb
*inp
, struct sockaddr
*nam
, struct proc
*p
,
1259 unsigned int ifscope
, struct ifnet
**outif
)
1261 struct in_addr laddr
;
1262 struct sockaddr_in
*sin
= (struct sockaddr_in
*)(void *)nam
;
1265 struct socket
*so
= inp
->inp_socket
;
1268 * Call inner routine, to assign local interface address.
1270 if ((error
= in_pcbladdr(inp
, nam
, &laddr
, ifscope
, outif
)) != 0)
1273 socket_unlock(so
, 0);
1274 pcb
= in_pcblookup_hash(inp
->inp_pcbinfo
, sin
->sin_addr
, sin
->sin_port
,
1275 inp
->inp_laddr
.s_addr
? inp
->inp_laddr
: laddr
,
1276 inp
->inp_lport
, 0, NULL
);
1280 * Check if the socket is still in a valid state. When we unlock this
1281 * embryonic socket, it can get aborted if another thread is closing
1282 * the listener (radar 7947600).
1284 if ((so
->so_flags
& SOF_ABORTED
) != 0)
1285 return (ECONNREFUSED
);
1288 in_pcb_checkstate(pcb
, WNT_RELEASE
, pcb
== inp
? 1 : 0);
1289 return (EADDRINUSE
);
1291 if (inp
->inp_laddr
.s_addr
== INADDR_ANY
) {
1292 if (inp
->inp_lport
== 0) {
1293 error
= in_pcbbind(inp
, NULL
, p
);
1297 if (!lck_rw_try_lock_exclusive(inp
->inp_pcbinfo
->ipi_lock
)) {
1299 * Lock inversion issue, mostly with udp
1300 * multicast packets.
1302 socket_unlock(so
, 0);
1303 lck_rw_lock_exclusive(inp
->inp_pcbinfo
->ipi_lock
);
1306 inp
->inp_laddr
= laddr
;
1307 /* no reference needed */
1308 inp
->inp_last_outifp
= (outif
!= NULL
) ? *outif
: NULL
;
1309 inp
->inp_flags
|= INP_INADDR_ANY
;
1312 * Usage of IP_PKTINFO, without local port already
1313 * speficified will cause kernel to panic,
1314 * see rdar://problem/18508185.
1315 * For now returning error to avoid a kernel panic
1316 * This routines can be refactored and handle this better
1319 if (inp
->inp_lport
== 0)
1321 if (!lck_rw_try_lock_exclusive(inp
->inp_pcbinfo
->ipi_lock
)) {
1323 * Lock inversion issue, mostly with udp
1324 * multicast packets.
1326 socket_unlock(so
, 0);
1327 lck_rw_lock_exclusive(inp
->inp_pcbinfo
->ipi_lock
);
1331 inp
->inp_faddr
= sin
->sin_addr
;
1332 inp
->inp_fport
= sin
->sin_port
;
1333 if (nstat_collect
&& SOCK_PROTO(so
) == IPPROTO_UDP
)
1334 nstat_pcb_invalidate_cache(inp
);
1336 lck_rw_done(inp
->inp_pcbinfo
->ipi_lock
);
1341 in_pcbdisconnect(struct inpcb
*inp
)
1343 struct socket
*so
= inp
->inp_socket
;
1345 if (nstat_collect
&& SOCK_PROTO(so
) == IPPROTO_UDP
)
1346 nstat_pcb_cache(inp
);
1348 inp
->inp_faddr
.s_addr
= INADDR_ANY
;
1351 if (!lck_rw_try_lock_exclusive(inp
->inp_pcbinfo
->ipi_lock
)) {
1352 /* lock inversion issue, mostly with udp multicast packets */
1353 socket_unlock(so
, 0);
1354 lck_rw_lock_exclusive(inp
->inp_pcbinfo
->ipi_lock
);
1359 lck_rw_done(inp
->inp_pcbinfo
->ipi_lock
);
1361 * A multipath subflow socket would have its SS_NOFDREF set by default,
1362 * so check for SOF_MP_SUBFLOW socket flag before detaching the PCB;
1363 * when the socket is closed for real, SOF_MP_SUBFLOW would be cleared.
1365 if (!(so
->so_flags
& SOF_MP_SUBFLOW
) && (so
->so_state
& SS_NOFDREF
))
1370 in_pcbdetach(struct inpcb
*inp
)
1372 struct socket
*so
= inp
->inp_socket
;
1374 if (so
->so_pcb
== NULL
) {
1375 /* PCB has been disposed */
1376 panic("%s: inp=%p so=%p proto=%d so_pcb is null!\n", __func__
,
1377 inp
, so
, SOCK_PROTO(so
));
1382 if (inp
->inp_sp
!= NULL
) {
1383 (void) ipsec4_delete_pcbpolicy(inp
);
1388 * Let NetworkStatistics know this PCB is going away
1389 * before we detach it.
1391 if (nstat_collect
&&
1392 (SOCK_PROTO(so
) == IPPROTO_TCP
|| SOCK_PROTO(so
) == IPPROTO_UDP
))
1393 nstat_pcb_detach(inp
);
1395 /* Free memory buffer held for generating keep alives */
1396 if (inp
->inp_keepalive_data
!= NULL
) {
1397 FREE(inp
->inp_keepalive_data
, M_TEMP
);
1398 inp
->inp_keepalive_data
= NULL
;
1401 /* mark socket state as dead */
1402 if (in_pcb_checkstate(inp
, WNT_STOPUSING
, 1) != WNT_STOPUSING
) {
1403 panic("%s: so=%p proto=%d couldn't set to STOPUSING\n",
1404 __func__
, so
, SOCK_PROTO(so
));
1408 if (!(so
->so_flags
& SOF_PCBCLEARING
)) {
1409 struct ip_moptions
*imo
;
1412 if (inp
->inp_options
!= NULL
) {
1413 (void) m_free(inp
->inp_options
);
1414 inp
->inp_options
= NULL
;
1416 ROUTE_RELEASE(&inp
->inp_route
);
1417 imo
= inp
->inp_moptions
;
1418 inp
->inp_moptions
= NULL
;
1421 sofreelastref(so
, 0);
1422 inp
->inp_state
= INPCB_STATE_DEAD
;
1423 /* makes sure we're not called twice from so_close */
1424 so
->so_flags
|= SOF_PCBCLEARING
;
1426 inpcb_gc_sched(inp
->inp_pcbinfo
, INPCB_TIMER_FAST
);
1432 in_pcbdispose(struct inpcb
*inp
)
1434 struct socket
*so
= inp
->inp_socket
;
1435 struct inpcbinfo
*ipi
= inp
->inp_pcbinfo
;
1437 if (so
!= NULL
&& so
->so_usecount
!= 0) {
1438 panic("%s: so %p [%d,%d] usecount %d lockhistory %s\n",
1439 __func__
, so
, SOCK_DOM(so
), SOCK_TYPE(so
), so
->so_usecount
,
1440 solockhistory_nr(so
));
1442 } else if (inp
->inp_wantcnt
!= WNT_STOPUSING
) {
1444 panic_plain("%s: inp %p invalid wantcnt %d, so %p "
1445 "[%d,%d] usecount %d retaincnt %d state 0x%x "
1446 "flags 0x%x lockhistory %s\n", __func__
, inp
,
1447 inp
->inp_wantcnt
, so
, SOCK_DOM(so
), SOCK_TYPE(so
),
1448 so
->so_usecount
, so
->so_retaincnt
, so
->so_state
,
1449 so
->so_flags
, solockhistory_nr(so
));
1452 panic("%s: inp %p invalid wantcnt %d no socket\n",
1453 __func__
, inp
, inp
->inp_wantcnt
);
1458 lck_rw_assert(ipi
->ipi_lock
, LCK_RW_ASSERT_EXCLUSIVE
);
1460 inp
->inp_gencnt
= ++ipi
->ipi_gencnt
;
1461 /* access ipi in in_pcbremlists */
1462 in_pcbremlists(inp
);
1465 if (so
->so_proto
->pr_flags
& PR_PCBLOCK
) {
1466 sofreelastref(so
, 0);
1467 if (so
->so_rcv
.sb_cc
> 0 || so
->so_snd
.sb_cc
> 0) {
1469 * selthreadclear() already called
1470 * during sofreelastref() above.
1472 sbrelease(&so
->so_rcv
);
1473 sbrelease(&so
->so_snd
);
1475 if (so
->so_head
!= NULL
) {
1476 panic("%s: so=%p head still exist\n",
1480 lck_mtx_unlock(&inp
->inpcb_mtx
);
1481 lck_mtx_destroy(&inp
->inpcb_mtx
, ipi
->ipi_lock_grp
);
1483 /* makes sure we're not called twice from so_close */
1484 so
->so_flags
|= SOF_PCBCLEARING
;
1485 so
->so_saved_pcb
= (caddr_t
)inp
;
1487 inp
->inp_socket
= NULL
;
1489 mac_inpcb_label_destroy(inp
);
1490 #endif /* CONFIG_MACF_NET */
1492 * In case there a route cached after a detach (possible
1493 * in the tcp case), make sure that it is freed before
1494 * we deallocate the structure.
1496 ROUTE_RELEASE(&inp
->inp_route
);
1497 if ((so
->so_flags1
& SOF1_CACHED_IN_SOCK_LAYER
) == 0) {
1498 zfree(ipi
->ipi_zone
, inp
);
1505 * The calling convention of in_getsockaddr() and in_getpeeraddr() was
1506 * modified to match the pru_sockaddr() and pru_peeraddr() entry points
1507 * in struct pr_usrreqs, so that protocols can just reference then directly
1508 * without the need for a wrapper function.
1511 in_getsockaddr(struct socket
*so
, struct sockaddr
**nam
)
1514 struct sockaddr_in
*sin
;
1517 * Do the malloc first in case it blocks.
1519 MALLOC(sin
, struct sockaddr_in
*, sizeof (*sin
), M_SONAME
, M_WAITOK
);
1522 bzero(sin
, sizeof (*sin
));
1523 sin
->sin_family
= AF_INET
;
1524 sin
->sin_len
= sizeof (*sin
);
1526 if ((inp
= sotoinpcb(so
)) == NULL
) {
1527 FREE(sin
, M_SONAME
);
1530 sin
->sin_port
= inp
->inp_lport
;
1531 sin
->sin_addr
= inp
->inp_laddr
;
1533 *nam
= (struct sockaddr
*)sin
;
1538 in_getsockaddr_s(struct socket
*so
, struct sockaddr_storage
*ss
)
1540 struct sockaddr_in
*sin
= SIN(ss
);
1544 bzero(ss
, sizeof (*ss
));
1546 sin
->sin_family
= AF_INET
;
1547 sin
->sin_len
= sizeof (*sin
);
1549 if ((inp
= sotoinpcb(so
)) == NULL
1551 || (necp_socket_should_use_flow_divert(inp
))
1554 return (inp
== NULL
? EINVAL
: EPROTOTYPE
);
1556 sin
->sin_port
= inp
->inp_lport
;
1557 sin
->sin_addr
= inp
->inp_laddr
;
1562 in_getpeeraddr(struct socket
*so
, struct sockaddr
**nam
)
1565 struct sockaddr_in
*sin
;
1568 * Do the malloc first in case it blocks.
1570 MALLOC(sin
, struct sockaddr_in
*, sizeof (*sin
), M_SONAME
, M_WAITOK
);
1573 bzero((caddr_t
)sin
, sizeof (*sin
));
1574 sin
->sin_family
= AF_INET
;
1575 sin
->sin_len
= sizeof (*sin
);
1577 if ((inp
= sotoinpcb(so
)) == NULL
) {
1578 FREE(sin
, M_SONAME
);
1581 sin
->sin_port
= inp
->inp_fport
;
1582 sin
->sin_addr
= inp
->inp_faddr
;
1584 *nam
= (struct sockaddr
*)sin
;
1589 in_getpeeraddr_s(struct socket
*so
, struct sockaddr_storage
*ss
)
1591 struct sockaddr_in
*sin
= SIN(ss
);
1595 bzero(ss
, sizeof (*ss
));
1597 sin
->sin_family
= AF_INET
;
1598 sin
->sin_len
= sizeof (*sin
);
1600 if ((inp
= sotoinpcb(so
)) == NULL
1602 || (necp_socket_should_use_flow_divert(inp
))
1605 return (inp
== NULL
? EINVAL
: EPROTOTYPE
);
1608 sin
->sin_port
= inp
->inp_fport
;
1609 sin
->sin_addr
= inp
->inp_faddr
;
1614 in_pcbnotifyall(struct inpcbinfo
*pcbinfo
, struct in_addr faddr
,
1615 int errno
, void (*notify
)(struct inpcb
*, int))
1619 lck_rw_lock_shared(pcbinfo
->ipi_lock
);
1621 LIST_FOREACH(inp
, pcbinfo
->ipi_listhead
, inp_list
) {
1623 if (!(inp
->inp_vflag
& INP_IPV4
))
1626 if (inp
->inp_faddr
.s_addr
!= faddr
.s_addr
||
1627 inp
->inp_socket
== NULL
)
1629 if (in_pcb_checkstate(inp
, WNT_ACQUIRE
, 0) == WNT_STOPUSING
)
1631 socket_lock(inp
->inp_socket
, 1);
1632 (*notify
)(inp
, errno
);
1633 (void) in_pcb_checkstate(inp
, WNT_RELEASE
, 1);
1634 socket_unlock(inp
->inp_socket
, 1);
1636 lck_rw_done(pcbinfo
->ipi_lock
);
1640 * Check for alternatives when higher level complains
1641 * about service problems. For now, invalidate cached
1642 * routing information. If the route was created dynamically
1643 * (by a redirect), time to try a default gateway again.
1646 in_losing(struct inpcb
*inp
)
1648 boolean_t release
= FALSE
;
1651 if ((rt
= inp
->inp_route
.ro_rt
) != NULL
) {
1652 struct in_ifaddr
*ia
= NULL
;
1655 if (rt
->rt_flags
& RTF_DYNAMIC
) {
1657 * Prevent another thread from modifying rt_key,
1658 * rt_gateway via rt_setgate() after rt_lock is
1659 * dropped by marking the route as defunct.
1661 rt
->rt_flags
|= RTF_CONDEMNED
;
1663 (void) rtrequest(RTM_DELETE
, rt_key(rt
),
1664 rt
->rt_gateway
, rt_mask(rt
), rt
->rt_flags
, NULL
);
1668 /* if the address is gone keep the old route in the pcb */
1669 if (inp
->inp_laddr
.s_addr
!= INADDR_ANY
&&
1670 (ia
= ifa_foraddr(inp
->inp_laddr
.s_addr
)) != NULL
) {
1672 * Address is around; ditch the route. A new route
1673 * can be allocated the next time output is attempted.
1678 IFA_REMREF(&ia
->ia_ifa
);
1680 if (rt
== NULL
|| release
)
1681 ROUTE_RELEASE(&inp
->inp_route
);
1685 * After a routing change, flush old routing
1686 * and allocate a (hopefully) better one.
1689 in_rtchange(struct inpcb
*inp
, int errno
)
1691 #pragma unused(errno)
1692 boolean_t release
= FALSE
;
1695 if ((rt
= inp
->inp_route
.ro_rt
) != NULL
) {
1696 struct in_ifaddr
*ia
= NULL
;
1698 /* if address is gone, keep the old route */
1699 if (inp
->inp_laddr
.s_addr
!= INADDR_ANY
&&
1700 (ia
= ifa_foraddr(inp
->inp_laddr
.s_addr
)) != NULL
) {
1702 * Address is around; ditch the route. A new route
1703 * can be allocated the next time output is attempted.
1708 IFA_REMREF(&ia
->ia_ifa
);
1710 if (rt
== NULL
|| release
)
1711 ROUTE_RELEASE(&inp
->inp_route
);
1715 * Lookup a PCB based on the local address and port.
1718 in_pcblookup_local(struct inpcbinfo
*pcbinfo
, struct in_addr laddr
,
1719 unsigned int lport_arg
, int wild_okay
)
1722 int matchwild
= 3, wildcard
;
1723 u_short lport
= lport_arg
;
1725 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
1728 struct inpcbhead
*head
;
1730 * Look for an unconnected (wildcard foreign addr) PCB that
1731 * matches the local address and port we're looking for.
1733 head
= &pcbinfo
->ipi_hashbase
[INP_PCBHASH(INADDR_ANY
, lport
, 0,
1734 pcbinfo
->ipi_hashmask
)];
1735 LIST_FOREACH(inp
, head
, inp_hash
) {
1737 if (!(inp
->inp_vflag
& INP_IPV4
))
1740 if (inp
->inp_faddr
.s_addr
== INADDR_ANY
&&
1741 inp
->inp_laddr
.s_addr
== laddr
.s_addr
&&
1742 inp
->inp_lport
== lport
) {
1752 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
1755 struct inpcbporthead
*porthash
;
1756 struct inpcbport
*phd
;
1757 struct inpcb
*match
= NULL
;
1759 * Best fit PCB lookup.
1761 * First see if this local port is in use by looking on the
1764 porthash
= &pcbinfo
->ipi_porthashbase
[INP_PCBPORTHASH(lport
,
1765 pcbinfo
->ipi_porthashmask
)];
1766 LIST_FOREACH(phd
, porthash
, phd_hash
) {
1767 if (phd
->phd_port
== lport
)
1772 * Port is in use by one or more PCBs. Look for best
1775 LIST_FOREACH(inp
, &phd
->phd_pcblist
, inp_portlist
) {
1778 if (!(inp
->inp_vflag
& INP_IPV4
))
1781 if (inp
->inp_faddr
.s_addr
!= INADDR_ANY
)
1783 if (inp
->inp_laddr
.s_addr
!= INADDR_ANY
) {
1784 if (laddr
.s_addr
== INADDR_ANY
)
1786 else if (inp
->inp_laddr
.s_addr
!=
1790 if (laddr
.s_addr
!= INADDR_ANY
)
1793 if (wildcard
< matchwild
) {
1795 matchwild
= wildcard
;
1796 if (matchwild
== 0) {
1802 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP
| DBG_FUNC_END
, match
,
1809 * Check if PCB exists in hash list.
1812 in_pcblookup_hash_exists(struct inpcbinfo
*pcbinfo
, struct in_addr faddr
,
1813 u_int fport_arg
, struct in_addr laddr
, u_int lport_arg
, int wildcard
,
1814 uid_t
*uid
, gid_t
*gid
, struct ifnet
*ifp
)
1816 struct inpcbhead
*head
;
1818 u_short fport
= fport_arg
, lport
= lport_arg
;
1820 struct inpcb
*local_wild
= NULL
;
1822 struct inpcb
*local_wild_mapped
= NULL
;
1829 * We may have found the pcb in the last lookup - check this first.
1832 lck_rw_lock_shared(pcbinfo
->ipi_lock
);
1835 * First look for an exact match.
1837 head
= &pcbinfo
->ipi_hashbase
[INP_PCBHASH(faddr
.s_addr
, lport
, fport
,
1838 pcbinfo
->ipi_hashmask
)];
1839 LIST_FOREACH(inp
, head
, inp_hash
) {
1841 if (!(inp
->inp_vflag
& INP_IPV4
))
1844 if (inp_restricted_recv(inp
, ifp
))
1847 if (inp
->inp_faddr
.s_addr
== faddr
.s_addr
&&
1848 inp
->inp_laddr
.s_addr
== laddr
.s_addr
&&
1849 inp
->inp_fport
== fport
&&
1850 inp
->inp_lport
== lport
) {
1851 if ((found
= (inp
->inp_socket
!= NULL
))) {
1855 *uid
= kauth_cred_getuid(
1856 inp
->inp_socket
->so_cred
);
1857 *gid
= kauth_cred_getgid(
1858 inp
->inp_socket
->so_cred
);
1860 lck_rw_done(pcbinfo
->ipi_lock
);
1869 lck_rw_done(pcbinfo
->ipi_lock
);
1873 head
= &pcbinfo
->ipi_hashbase
[INP_PCBHASH(INADDR_ANY
, lport
, 0,
1874 pcbinfo
->ipi_hashmask
)];
1875 LIST_FOREACH(inp
, head
, inp_hash
) {
1877 if (!(inp
->inp_vflag
& INP_IPV4
))
1880 if (inp_restricted_recv(inp
, ifp
))
1883 if (inp
->inp_faddr
.s_addr
== INADDR_ANY
&&
1884 inp
->inp_lport
== lport
) {
1885 if (inp
->inp_laddr
.s_addr
== laddr
.s_addr
) {
1886 if ((found
= (inp
->inp_socket
!= NULL
))) {
1887 *uid
= kauth_cred_getuid(
1888 inp
->inp_socket
->so_cred
);
1889 *gid
= kauth_cred_getgid(
1890 inp
->inp_socket
->so_cred
);
1892 lck_rw_done(pcbinfo
->ipi_lock
);
1894 } else if (inp
->inp_laddr
.s_addr
== INADDR_ANY
) {
1896 if (inp
->inp_socket
&&
1897 SOCK_CHECK_DOM(inp
->inp_socket
, PF_INET6
))
1898 local_wild_mapped
= inp
;
1905 if (local_wild
== NULL
) {
1907 if (local_wild_mapped
!= NULL
) {
1908 if ((found
= (local_wild_mapped
->inp_socket
!= NULL
))) {
1909 *uid
= kauth_cred_getuid(
1910 local_wild_mapped
->inp_socket
->so_cred
);
1911 *gid
= kauth_cred_getgid(
1912 local_wild_mapped
->inp_socket
->so_cred
);
1914 lck_rw_done(pcbinfo
->ipi_lock
);
1918 lck_rw_done(pcbinfo
->ipi_lock
);
1921 if ((found
= (local_wild
->inp_socket
!= NULL
))) {
1922 *uid
= kauth_cred_getuid(
1923 local_wild
->inp_socket
->so_cred
);
1924 *gid
= kauth_cred_getgid(
1925 local_wild
->inp_socket
->so_cred
);
1927 lck_rw_done(pcbinfo
->ipi_lock
);
1932 * Lookup PCB in hash list.
1935 in_pcblookup_hash(struct inpcbinfo
*pcbinfo
, struct in_addr faddr
,
1936 u_int fport_arg
, struct in_addr laddr
, u_int lport_arg
, int wildcard
,
1939 struct inpcbhead
*head
;
1941 u_short fport
= fport_arg
, lport
= lport_arg
;
1942 struct inpcb
*local_wild
= NULL
;
1944 struct inpcb
*local_wild_mapped
= NULL
;
1948 * We may have found the pcb in the last lookup - check this first.
1951 lck_rw_lock_shared(pcbinfo
->ipi_lock
);
1954 * First look for an exact match.
1956 head
= &pcbinfo
->ipi_hashbase
[INP_PCBHASH(faddr
.s_addr
, lport
, fport
,
1957 pcbinfo
->ipi_hashmask
)];
1958 LIST_FOREACH(inp
, head
, inp_hash
) {
1960 if (!(inp
->inp_vflag
& INP_IPV4
))
1963 if (inp_restricted_recv(inp
, ifp
))
1966 if (inp
->inp_faddr
.s_addr
== faddr
.s_addr
&&
1967 inp
->inp_laddr
.s_addr
== laddr
.s_addr
&&
1968 inp
->inp_fport
== fport
&&
1969 inp
->inp_lport
== lport
) {
1973 if (in_pcb_checkstate(inp
, WNT_ACQUIRE
, 0) !=
1975 lck_rw_done(pcbinfo
->ipi_lock
);
1978 /* it's there but dead, say it isn't found */
1979 lck_rw_done(pcbinfo
->ipi_lock
);
1989 lck_rw_done(pcbinfo
->ipi_lock
);
1993 head
= &pcbinfo
->ipi_hashbase
[INP_PCBHASH(INADDR_ANY
, lport
, 0,
1994 pcbinfo
->ipi_hashmask
)];
1995 LIST_FOREACH(inp
, head
, inp_hash
) {
1997 if (!(inp
->inp_vflag
& INP_IPV4
))
2000 if (inp_restricted_recv(inp
, ifp
))
2003 if (inp
->inp_faddr
.s_addr
== INADDR_ANY
&&
2004 inp
->inp_lport
== lport
) {
2005 if (inp
->inp_laddr
.s_addr
== laddr
.s_addr
) {
2006 if (in_pcb_checkstate(inp
, WNT_ACQUIRE
, 0) !=
2008 lck_rw_done(pcbinfo
->ipi_lock
);
2011 /* it's dead; say it isn't found */
2012 lck_rw_done(pcbinfo
->ipi_lock
);
2015 } else if (inp
->inp_laddr
.s_addr
== INADDR_ANY
) {
2017 if (SOCK_CHECK_DOM(inp
->inp_socket
, PF_INET6
))
2018 local_wild_mapped
= inp
;
2025 if (local_wild
== NULL
) {
2027 if (local_wild_mapped
!= NULL
) {
2028 if (in_pcb_checkstate(local_wild_mapped
,
2029 WNT_ACQUIRE
, 0) != WNT_STOPUSING
) {
2030 lck_rw_done(pcbinfo
->ipi_lock
);
2031 return (local_wild_mapped
);
2033 /* it's dead; say it isn't found */
2034 lck_rw_done(pcbinfo
->ipi_lock
);
2039 lck_rw_done(pcbinfo
->ipi_lock
);
2042 if (in_pcb_checkstate(local_wild
, WNT_ACQUIRE
, 0) != WNT_STOPUSING
) {
2043 lck_rw_done(pcbinfo
->ipi_lock
);
2044 return (local_wild
);
2047 * It's either not found or is already dead.
2049 lck_rw_done(pcbinfo
->ipi_lock
);
2054 * @brief Insert PCB onto various hash lists.
2056 * @param inp Pointer to internet protocol control block
2057 * @param locked Implies if ipi_lock (protecting pcb list)
2058 * is already locked or not.
2060 * @return int error on failure and 0 on success
2063 in_pcbinshash(struct inpcb
*inp
, int locked
)
2065 struct inpcbhead
*pcbhash
;
2066 struct inpcbporthead
*pcbporthash
;
2067 struct inpcbinfo
*pcbinfo
= inp
->inp_pcbinfo
;
2068 struct inpcbport
*phd
;
2069 u_int32_t hashkey_faddr
;
2072 if (!lck_rw_try_lock_exclusive(pcbinfo
->ipi_lock
)) {
2074 * Lock inversion issue, mostly with udp
2077 socket_unlock(inp
->inp_socket
, 0);
2078 lck_rw_lock_exclusive(pcbinfo
->ipi_lock
);
2079 socket_lock(inp
->inp_socket
, 0);
2084 * This routine or its caller may have given up
2085 * socket's protocol lock briefly.
2086 * During that time the socket may have been dropped.
2087 * Safe-guarding against that.
2089 if (inp
->inp_state
== INPCB_STATE_DEAD
) {
2091 lck_rw_done(pcbinfo
->ipi_lock
);
2093 return (ECONNABORTED
);
2098 if (inp
->inp_vflag
& INP_IPV6
)
2099 hashkey_faddr
= inp
->in6p_faddr
.s6_addr32
[3] /* XXX */;
2102 hashkey_faddr
= inp
->inp_faddr
.s_addr
;
2104 inp
->inp_hash_element
= INP_PCBHASH(hashkey_faddr
, inp
->inp_lport
,
2105 inp
->inp_fport
, pcbinfo
->ipi_hashmask
);
2107 pcbhash
= &pcbinfo
->ipi_hashbase
[inp
->inp_hash_element
];
2109 pcbporthash
= &pcbinfo
->ipi_porthashbase
[INP_PCBPORTHASH(inp
->inp_lport
,
2110 pcbinfo
->ipi_porthashmask
)];
2113 * Go through port list and look for a head for this lport.
2115 LIST_FOREACH(phd
, pcbporthash
, phd_hash
) {
2116 if (phd
->phd_port
== inp
->inp_lport
)
2121 * If none exists, malloc one and tack it on.
2124 MALLOC(phd
, struct inpcbport
*, sizeof (struct inpcbport
),
2128 lck_rw_done(pcbinfo
->ipi_lock
);
2129 return (ENOBUFS
); /* XXX */
2131 phd
->phd_port
= inp
->inp_lport
;
2132 LIST_INIT(&phd
->phd_pcblist
);
2133 LIST_INSERT_HEAD(pcbporthash
, phd
, phd_hash
);
2136 VERIFY(!(inp
->inp_flags2
& INP2_INHASHLIST
));
2138 LIST_INSERT_HEAD(&phd
->phd_pcblist
, inp
, inp_portlist
);
2139 LIST_INSERT_HEAD(pcbhash
, inp
, inp_hash
);
2140 inp
->inp_flags2
|= INP2_INHASHLIST
;
2143 lck_rw_done(pcbinfo
->ipi_lock
);
2146 // This call catches the original setting of the local address
2147 inp_update_necp_policy(inp
, NULL
, NULL
, 0);
2154 * Move PCB to the proper hash bucket when { faddr, fport } have been
2155 * changed. NOTE: This does not handle the case of the lport changing (the
2156 * hashed port list would have to be updated as well), so the lport must
2157 * not change after in_pcbinshash() has been called.
2160 in_pcbrehash(struct inpcb
*inp
)
2162 struct inpcbhead
*head
;
2163 u_int32_t hashkey_faddr
;
2166 if (inp
->inp_vflag
& INP_IPV6
)
2167 hashkey_faddr
= inp
->in6p_faddr
.s6_addr32
[3] /* XXX */;
2170 hashkey_faddr
= inp
->inp_faddr
.s_addr
;
2172 inp
->inp_hash_element
= INP_PCBHASH(hashkey_faddr
, inp
->inp_lport
,
2173 inp
->inp_fport
, inp
->inp_pcbinfo
->ipi_hashmask
);
2174 head
= &inp
->inp_pcbinfo
->ipi_hashbase
[inp
->inp_hash_element
];
2176 if (inp
->inp_flags2
& INP2_INHASHLIST
) {
2177 LIST_REMOVE(inp
, inp_hash
);
2178 inp
->inp_flags2
&= ~INP2_INHASHLIST
;
2181 VERIFY(!(inp
->inp_flags2
& INP2_INHASHLIST
));
2182 LIST_INSERT_HEAD(head
, inp
, inp_hash
);
2183 inp
->inp_flags2
|= INP2_INHASHLIST
;
2186 // This call catches updates to the remote addresses
2187 inp_update_necp_policy(inp
, NULL
, NULL
, 0);
2192 * Remove PCB from various lists.
2193 * Must be called pcbinfo lock is held in exclusive mode.
2196 in_pcbremlists(struct inpcb
*inp
)
2198 inp
->inp_gencnt
= ++inp
->inp_pcbinfo
->ipi_gencnt
;
2201 * Check if it's in hashlist -- an inp is placed in hashlist when
2202 * it's local port gets assigned. So it should also be present
2205 if (inp
->inp_flags2
& INP2_INHASHLIST
) {
2206 struct inpcbport
*phd
= inp
->inp_phd
;
2208 VERIFY(phd
!= NULL
&& inp
->inp_lport
> 0);
2210 LIST_REMOVE(inp
, inp_hash
);
2211 inp
->inp_hash
.le_next
= NULL
;
2212 inp
->inp_hash
.le_prev
= NULL
;
2214 LIST_REMOVE(inp
, inp_portlist
);
2215 inp
->inp_portlist
.le_next
= NULL
;
2216 inp
->inp_portlist
.le_prev
= NULL
;
2217 if (LIST_EMPTY(&phd
->phd_pcblist
)) {
2218 LIST_REMOVE(phd
, phd_hash
);
2221 inp
->inp_phd
= NULL
;
2222 inp
->inp_flags2
&= ~INP2_INHASHLIST
;
2224 VERIFY(!(inp
->inp_flags2
& INP2_INHASHLIST
));
2226 if (inp
->inp_flags2
& INP2_TIMEWAIT
) {
2227 /* Remove from time-wait queue */
2228 tcp_remove_from_time_wait(inp
);
2229 inp
->inp_flags2
&= ~INP2_TIMEWAIT
;
2230 VERIFY(inp
->inp_pcbinfo
->ipi_twcount
!= 0);
2231 inp
->inp_pcbinfo
->ipi_twcount
--;
2233 /* Remove from global inp list if it is not time-wait */
2234 LIST_REMOVE(inp
, inp_list
);
2237 if (inp
->inp_flags2
& INP2_IN_FCTREE
) {
2238 inp_fc_getinp(inp
->inp_flowhash
, (INPFC_SOLOCKED
|INPFC_REMOVE
));
2239 VERIFY(!(inp
->inp_flags2
& INP2_IN_FCTREE
));
2242 inp
->inp_pcbinfo
->ipi_count
--;
2246 * Mechanism used to defer the memory release of PCBs
2247 * The pcb list will contain the pcb until the reaper can clean it up if
2248 * the following conditions are met:
2250 * 2) wantcnt is STOPUSING
2252 * This function will be called to either mark the pcb as
2255 in_pcb_checkstate(struct inpcb
*pcb
, int mode
, int locked
)
2257 volatile UInt32
*wantcnt
= (volatile UInt32
*)&pcb
->inp_wantcnt
;
2264 * Try to mark the pcb as ready for recycling. CAS with
2265 * STOPUSING, if success we're good, if it's in use, will
2269 socket_lock(pcb
->inp_socket
, 1);
2270 pcb
->inp_state
= INPCB_STATE_DEAD
;
2273 if (pcb
->inp_socket
->so_usecount
< 0) {
2274 panic("%s: pcb=%p so=%p usecount is negative\n",
2275 __func__
, pcb
, pcb
->inp_socket
);
2279 socket_unlock(pcb
->inp_socket
, 1);
2281 inpcb_gc_sched(pcb
->inp_pcbinfo
, INPCB_TIMER_FAST
);
2283 origwant
= *wantcnt
;
2284 if ((UInt16
) origwant
== 0xffff) /* should stop using */
2285 return (WNT_STOPUSING
);
2287 if ((UInt16
) origwant
== 0) {
2288 /* try to mark it as unsuable now */
2289 OSCompareAndSwap(origwant
, newwant
, wantcnt
);
2291 return (WNT_STOPUSING
);
2296 * Try to increase reference to pcb. If WNT_STOPUSING
2297 * should bail out. If socket state DEAD, try to set count
2298 * to STOPUSING, return failed otherwise increase cnt.
2301 origwant
= *wantcnt
;
2302 if ((UInt16
) origwant
== 0xffff) {
2303 /* should stop using */
2304 return (WNT_STOPUSING
);
2306 newwant
= origwant
+ 1;
2307 } while (!OSCompareAndSwap(origwant
, newwant
, wantcnt
));
2308 return (WNT_ACQUIRE
);
2313 * Release reference. If result is null and pcb state
2314 * is DEAD, set wanted bit to STOPUSING
2317 socket_lock(pcb
->inp_socket
, 1);
2320 origwant
= *wantcnt
;
2321 if ((UInt16
) origwant
== 0x0) {
2322 panic("%s: pcb=%p release with zero count",
2326 if ((UInt16
) origwant
== 0xffff) {
2327 /* should stop using */
2329 socket_unlock(pcb
->inp_socket
, 1);
2330 return (WNT_STOPUSING
);
2332 newwant
= origwant
- 1;
2333 } while (!OSCompareAndSwap(origwant
, newwant
, wantcnt
));
2335 if (pcb
->inp_state
== INPCB_STATE_DEAD
)
2337 if (pcb
->inp_socket
->so_usecount
< 0) {
2338 panic("%s: RELEASE pcb=%p so=%p usecount is negative\n",
2339 __func__
, pcb
, pcb
->inp_socket
);
2344 socket_unlock(pcb
->inp_socket
, 1);
2345 return (WNT_RELEASE
);
2349 panic("%s: so=%p not a valid state =%x\n", __func__
,
2350 pcb
->inp_socket
, mode
);
2359 * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat.
2360 * The inpcb_compat data structure is passed to user space and must
2361 * not change. We intentionally avoid copying pointers.
2364 inpcb_to_compat(struct inpcb
*inp
, struct inpcb_compat
*inp_compat
)
2366 bzero(inp_compat
, sizeof (*inp_compat
));
2367 inp_compat
->inp_fport
= inp
->inp_fport
;
2368 inp_compat
->inp_lport
= inp
->inp_lport
;
2369 inp_compat
->nat_owner
= 0;
2370 inp_compat
->nat_cookie
= 0;
2371 inp_compat
->inp_gencnt
= inp
->inp_gencnt
;
2372 inp_compat
->inp_flags
= inp
->inp_flags
;
2373 inp_compat
->inp_flow
= inp
->inp_flow
;
2374 inp_compat
->inp_vflag
= inp
->inp_vflag
;
2375 inp_compat
->inp_ip_ttl
= inp
->inp_ip_ttl
;
2376 inp_compat
->inp_ip_p
= inp
->inp_ip_p
;
2377 inp_compat
->inp_dependfaddr
.inp6_foreign
=
2378 inp
->inp_dependfaddr
.inp6_foreign
;
2379 inp_compat
->inp_dependladdr
.inp6_local
=
2380 inp
->inp_dependladdr
.inp6_local
;
2381 inp_compat
->inp_depend4
.inp4_ip_tos
= inp
->inp_depend4
.inp4_ip_tos
;
2382 inp_compat
->inp_depend6
.inp6_hlim
= 0;
2383 inp_compat
->inp_depend6
.inp6_cksum
= inp
->inp_depend6
.inp6_cksum
;
2384 inp_compat
->inp_depend6
.inp6_ifindex
= 0;
2385 inp_compat
->inp_depend6
.inp6_hops
= inp
->inp_depend6
.inp6_hops
;
2389 inpcb_to_xinpcb64(struct inpcb
*inp
, struct xinpcb64
*xinp
)
2391 xinp
->inp_fport
= inp
->inp_fport
;
2392 xinp
->inp_lport
= inp
->inp_lport
;
2393 xinp
->inp_gencnt
= inp
->inp_gencnt
;
2394 xinp
->inp_flags
= inp
->inp_flags
;
2395 xinp
->inp_flow
= inp
->inp_flow
;
2396 xinp
->inp_vflag
= inp
->inp_vflag
;
2397 xinp
->inp_ip_ttl
= inp
->inp_ip_ttl
;
2398 xinp
->inp_ip_p
= inp
->inp_ip_p
;
2399 xinp
->inp_dependfaddr
.inp6_foreign
= inp
->inp_dependfaddr
.inp6_foreign
;
2400 xinp
->inp_dependladdr
.inp6_local
= inp
->inp_dependladdr
.inp6_local
;
2401 xinp
->inp_depend4
.inp4_ip_tos
= inp
->inp_depend4
.inp4_ip_tos
;
2402 xinp
->inp_depend6
.inp6_hlim
= 0;
2403 xinp
->inp_depend6
.inp6_cksum
= inp
->inp_depend6
.inp6_cksum
;
2404 xinp
->inp_depend6
.inp6_ifindex
= 0;
2405 xinp
->inp_depend6
.inp6_hops
= inp
->inp_depend6
.inp6_hops
;
2409 * The following routines implement this scheme:
2411 * Callers of ip_output() that intend to cache the route in the inpcb pass
2412 * a local copy of the struct route to ip_output(). Using a local copy of
2413 * the cached route significantly simplifies things as IP no longer has to
2414 * worry about having exclusive access to the passed in struct route, since
2415 * it's defined in the caller's stack; in essence, this allows for a lock-
2416 * less operation when updating the struct route at the IP level and below,
2417 * whenever necessary. The scheme works as follows:
2419 * Prior to dropping the socket's lock and calling ip_output(), the caller
2420 * copies the struct route from the inpcb into its stack, and adds a reference
2421 * to the cached route entry, if there was any. The socket's lock is then
2422 * dropped and ip_output() is called with a pointer to the copy of struct
2423 * route defined on the stack (not to the one in the inpcb.)
2425 * Upon returning from ip_output(), the caller then acquires the socket's
2426 * lock and synchronizes the cache; if there is no route cached in the inpcb,
2427 * it copies the local copy of struct route (which may or may not contain any
2428 * route) back into the cache; otherwise, if the inpcb has a route cached in
2429 * it, the one in the local copy will be freed, if there's any. Trashing the
2430 * cached route in the inpcb can be avoided because ip_output() is single-
2431 * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized
2432 * by the socket/transport layer.)
2435 inp_route_copyout(struct inpcb
*inp
, struct route
*dst
)
2437 struct route
*src
= &inp
->inp_route
;
2439 lck_mtx_assert(&inp
->inpcb_mtx
, LCK_MTX_ASSERT_OWNED
);
2442 * If the route in the PCB is stale or not for IPv4, blow it away;
2443 * this is possible in the case of IPv4-mapped address case.
2445 if (ROUTE_UNUSABLE(src
) || rt_key(src
->ro_rt
)->sa_family
!= AF_INET
)
2448 route_copyout(dst
, src
, sizeof (*dst
));
2452 inp_route_copyin(struct inpcb
*inp
, struct route
*src
)
2454 struct route
*dst
= &inp
->inp_route
;
2456 lck_mtx_assert(&inp
->inpcb_mtx
, LCK_MTX_ASSERT_OWNED
);
2458 /* Minor sanity check */
2459 if (src
->ro_rt
!= NULL
&& rt_key(src
->ro_rt
)->sa_family
!= AF_INET
)
2460 panic("%s: wrong or corrupted route: %p", __func__
, src
);
2462 route_copyin(src
, dst
, sizeof (*src
));
2466 * Handler for setting IP_FORCE_OUT_IFP/IP_BOUND_IF/IPV6_BOUND_IF socket option.
2469 inp_bindif(struct inpcb
*inp
, unsigned int ifscope
, struct ifnet
**pifp
)
2471 struct ifnet
*ifp
= NULL
;
2473 ifnet_head_lock_shared();
2474 if ((ifscope
> (unsigned)if_index
) || (ifscope
!= IFSCOPE_NONE
&&
2475 (ifp
= ifindex2ifnet
[ifscope
]) == NULL
)) {
2481 VERIFY(ifp
!= NULL
|| ifscope
== IFSCOPE_NONE
);
2484 * A zero interface scope value indicates an "unbind".
2485 * Otherwise, take in whatever value the app desires;
2486 * the app may already know the scope (or force itself
2487 * to such a scope) ahead of time before the interface
2488 * gets attached. It doesn't matter either way; any
2489 * route lookup from this point on will require an
2490 * exact match for the embedded interface scope.
2492 inp
->inp_boundifp
= ifp
;
2493 if (inp
->inp_boundifp
== NULL
)
2494 inp
->inp_flags
&= ~INP_BOUND_IF
;
2496 inp
->inp_flags
|= INP_BOUND_IF
;
2498 /* Blow away any cached route in the PCB */
2499 ROUTE_RELEASE(&inp
->inp_route
);
2508 * Handler for setting IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
2509 * as well as for setting PROC_UUID_NO_CELLULAR policy.
2512 inp_set_nocellular(struct inpcb
*inp
)
2514 inp
->inp_flags
|= INP_NO_IFT_CELLULAR
;
2516 /* Blow away any cached route in the PCB */
2517 ROUTE_RELEASE(&inp
->inp_route
);
2521 * Handler for clearing IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
2522 * as well as for clearing PROC_UUID_NO_CELLULAR policy.
2525 inp_clear_nocellular(struct inpcb
*inp
)
2527 struct socket
*so
= inp
->inp_socket
;
2530 * SO_RESTRICT_DENY_CELLULAR socket restriction issued on the socket
2531 * has a higher precendence than INP_NO_IFT_CELLULAR. Clear the flag
2532 * if and only if the socket is unrestricted.
2534 if (so
!= NULL
&& !(so
->so_restrictions
& SO_RESTRICT_DENY_CELLULAR
)) {
2535 inp
->inp_flags
&= ~INP_NO_IFT_CELLULAR
;
2537 /* Blow away any cached route in the PCB */
2538 ROUTE_RELEASE(&inp
->inp_route
);
2543 inp_set_noexpensive(struct inpcb
*inp
)
2545 inp
->inp_flags2
|= INP2_NO_IFF_EXPENSIVE
;
2547 /* Blow away any cached route in the PCB */
2548 ROUTE_RELEASE(&inp
->inp_route
);
2552 inp_set_awdl_unrestricted(struct inpcb
*inp
)
2554 inp
->inp_flags2
|= INP2_AWDL_UNRESTRICTED
;
2556 /* Blow away any cached route in the PCB */
2557 ROUTE_RELEASE(&inp
->inp_route
);
2561 inp_get_awdl_unrestricted(struct inpcb
*inp
)
2563 return (inp
->inp_flags2
& INP2_AWDL_UNRESTRICTED
) ? TRUE
: FALSE
;
2567 inp_clear_awdl_unrestricted(struct inpcb
*inp
)
2569 inp
->inp_flags2
&= ~INP2_AWDL_UNRESTRICTED
;
2571 /* Blow away any cached route in the PCB */
2572 ROUTE_RELEASE(&inp
->inp_route
);
2577 * Called when PROC_UUID_NECP_APP_POLICY is set.
2580 inp_set_want_app_policy(struct inpcb
*inp
)
2582 inp
->inp_flags2
|= INP2_WANT_APP_POLICY
;
2586 * Called when PROC_UUID_NECP_APP_POLICY is cleared.
2589 inp_clear_want_app_policy(struct inpcb
*inp
)
2591 inp
->inp_flags2
&= ~INP2_WANT_APP_POLICY
;
2596 * Calculate flow hash for an inp, used by an interface to identify a
2597 * flow. When an interface provides flow control advisory, this flow
2598 * hash is used as an identifier.
2601 inp_calc_flowhash(struct inpcb
*inp
)
2603 struct inp_flowhash_key fh
__attribute__((aligned(8)));
2604 u_int32_t flowhash
= 0;
2605 struct inpcb
*tmp_inp
= NULL
;
2607 if (inp_hash_seed
== 0)
2608 inp_hash_seed
= RandomULong();
2610 bzero(&fh
, sizeof (fh
));
2612 bcopy(&inp
->inp_dependladdr
, &fh
.infh_laddr
, sizeof (fh
.infh_laddr
));
2613 bcopy(&inp
->inp_dependfaddr
, &fh
.infh_faddr
, sizeof (fh
.infh_faddr
));
2615 fh
.infh_lport
= inp
->inp_lport
;
2616 fh
.infh_fport
= inp
->inp_fport
;
2617 fh
.infh_af
= (inp
->inp_vflag
& INP_IPV6
) ? AF_INET6
: AF_INET
;
2618 fh
.infh_proto
= inp
->inp_ip_p
;
2619 fh
.infh_rand1
= RandomULong();
2620 fh
.infh_rand2
= RandomULong();
2623 flowhash
= net_flowhash(&fh
, sizeof (fh
), inp_hash_seed
);
2624 if (flowhash
== 0) {
2625 /* try to get a non-zero flowhash */
2626 inp_hash_seed
= RandomULong();
2630 inp
->inp_flowhash
= flowhash
;
2632 /* Insert the inp into inp_fc_tree */
2633 lck_mtx_lock_spin(&inp_fc_lck
);
2634 tmp_inp
= RB_FIND(inp_fc_tree
, &inp_fc_tree
, inp
);
2635 if (tmp_inp
!= NULL
) {
2637 * There is a different inp with the same flowhash.
2638 * There can be a collision on flow hash but the
2639 * probability is low. Let's recompute the
2642 lck_mtx_unlock(&inp_fc_lck
);
2643 /* recompute hash seed */
2644 inp_hash_seed
= RandomULong();
2648 RB_INSERT(inp_fc_tree
, &inp_fc_tree
, inp
);
2649 inp
->inp_flags2
|= INP2_IN_FCTREE
;
2650 lck_mtx_unlock(&inp_fc_lck
);
2656 inp_flowadv(uint32_t flowhash
)
2660 inp
= inp_fc_getinp(flowhash
, 0);
2664 inp_fc_feedback(inp
);
2668 * Function to compare inp_fc_entries in inp flow control tree
2671 infc_cmp(const struct inpcb
*inp1
, const struct inpcb
*inp2
)
2673 return (memcmp(&(inp1
->inp_flowhash
), &(inp2
->inp_flowhash
),
2674 sizeof(inp1
->inp_flowhash
)));
2677 static struct inpcb
*
2678 inp_fc_getinp(u_int32_t flowhash
, u_int32_t flags
)
2680 struct inpcb
*inp
= NULL
;
2681 int locked
= (flags
& INPFC_SOLOCKED
) ? 1 : 0;
2683 lck_mtx_lock_spin(&inp_fc_lck
);
2684 key_inp
.inp_flowhash
= flowhash
;
2685 inp
= RB_FIND(inp_fc_tree
, &inp_fc_tree
, &key_inp
);
2687 /* inp is not present, return */
2688 lck_mtx_unlock(&inp_fc_lck
);
2692 if (flags
& INPFC_REMOVE
) {
2693 RB_REMOVE(inp_fc_tree
, &inp_fc_tree
, inp
);
2694 lck_mtx_unlock(&inp_fc_lck
);
2696 bzero(&(inp
->infc_link
), sizeof (inp
->infc_link
));
2697 inp
->inp_flags2
&= ~INP2_IN_FCTREE
;
2701 if (in_pcb_checkstate(inp
, WNT_ACQUIRE
, locked
) == WNT_STOPUSING
)
2703 lck_mtx_unlock(&inp_fc_lck
);
2709 inp_fc_feedback(struct inpcb
*inp
)
2711 struct socket
*so
= inp
->inp_socket
;
2713 /* we already hold a want_cnt on this inp, socket can't be null */
2717 if (in_pcb_checkstate(inp
, WNT_RELEASE
, 1) == WNT_STOPUSING
) {
2718 socket_unlock(so
, 1);
2722 if (inp
->inp_sndinprog_cnt
> 0)
2723 inp
->inp_flags
|= INP_FC_FEEDBACK
;
2726 * Return if the connection is not in flow-controlled state.
2727 * This can happen if the connection experienced
2728 * loss while it was in flow controlled state
2730 if (!INP_WAIT_FOR_IF_FEEDBACK(inp
)) {
2731 socket_unlock(so
, 1);
2734 inp_reset_fc_state(inp
);
2736 if (SOCK_TYPE(so
) == SOCK_STREAM
)
2737 inp_fc_unthrottle_tcp(inp
);
2739 socket_unlock(so
, 1);
2743 inp_reset_fc_state(struct inpcb
*inp
)
2745 struct socket
*so
= inp
->inp_socket
;
2746 int suspended
= (INP_IS_FLOW_SUSPENDED(inp
)) ? 1 : 0;
2747 int needwakeup
= (INP_WAIT_FOR_IF_FEEDBACK(inp
)) ? 1 : 0;
2749 inp
->inp_flags
&= ~(INP_FLOW_CONTROLLED
| INP_FLOW_SUSPENDED
);
2752 so
->so_flags
&= ~(SOF_SUSPENDED
);
2753 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_RESUME
));
2756 /* Give a write wakeup to unblock the socket */
2762 inp_set_fc_state(struct inpcb
*inp
, int advcode
)
2764 struct inpcb
*tmp_inp
= NULL
;
2766 * If there was a feedback from the interface when
2767 * send operation was in progress, we should ignore
2768 * this flow advisory to avoid a race between setting
2769 * flow controlled state and receiving feedback from
2772 if (inp
->inp_flags
& INP_FC_FEEDBACK
)
2775 inp
->inp_flags
&= ~(INP_FLOW_CONTROLLED
| INP_FLOW_SUSPENDED
);
2776 if ((tmp_inp
= inp_fc_getinp(inp
->inp_flowhash
,
2777 INPFC_SOLOCKED
)) != NULL
) {
2778 if (in_pcb_checkstate(tmp_inp
, WNT_RELEASE
, 1) == WNT_STOPUSING
)
2780 VERIFY(tmp_inp
== inp
);
2782 case FADV_FLOW_CONTROLLED
:
2783 inp
->inp_flags
|= INP_FLOW_CONTROLLED
;
2785 case FADV_SUSPENDED
:
2786 inp
->inp_flags
|= INP_FLOW_SUSPENDED
;
2787 soevent(inp
->inp_socket
,
2788 (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_SUSPEND
));
2790 /* Record the fact that suspend event was sent */
2791 inp
->inp_socket
->so_flags
|= SOF_SUSPENDED
;
2800 * Handler for SO_FLUSH socket option.
2803 inp_flush(struct inpcb
*inp
, int optval
)
2805 u_int32_t flowhash
= inp
->inp_flowhash
;
2806 struct ifnet
*rtifp
, *oifp
;
2808 /* Either all classes or one of the valid ones */
2809 if (optval
!= SO_TC_ALL
&& !SO_VALID_TC(optval
))
2812 /* We need a flow hash for identification */
2816 /* Grab the interfaces from the route and pcb */
2817 rtifp
= ((inp
->inp_route
.ro_rt
!= NULL
) ?
2818 inp
->inp_route
.ro_rt
->rt_ifp
: NULL
);
2819 oifp
= inp
->inp_last_outifp
;
2822 if_qflush_sc(rtifp
, so_tc2msc(optval
), flowhash
, NULL
, NULL
, 0);
2823 if (oifp
!= NULL
&& oifp
!= rtifp
)
2824 if_qflush_sc(oifp
, so_tc2msc(optval
), flowhash
, NULL
, NULL
, 0);
2830 * Clear the INP_INADDR_ANY flag (special case for PPP only)
2833 inp_clear_INP_INADDR_ANY(struct socket
*so
)
2835 struct inpcb
*inp
= NULL
;
2838 inp
= sotoinpcb(so
);
2840 inp
->inp_flags
&= ~INP_INADDR_ANY
;
2842 socket_unlock(so
, 1);
2846 inp_get_soprocinfo(struct inpcb
*inp
, struct so_procinfo
*soprocinfo
)
2848 struct socket
*so
= inp
->inp_socket
;
2850 soprocinfo
->spi_pid
= so
->last_pid
;
2851 if (so
->last_pid
!= 0)
2852 uuid_copy(soprocinfo
->spi_uuid
, so
->last_uuid
);
2854 * When not delegated, the effective pid is the same as the real pid
2856 if (so
->so_flags
& SOF_DELEGATED
) {
2857 soprocinfo
->spi_delegated
= 1;
2858 soprocinfo
->spi_epid
= so
->e_pid
;
2859 uuid_copy(soprocinfo
->spi_euuid
, so
->e_uuid
);
2861 soprocinfo
->spi_delegated
= 0;
2862 soprocinfo
->spi_epid
= so
->last_pid
;
2867 inp_findinpcb_procinfo(struct inpcbinfo
*pcbinfo
, uint32_t flowhash
,
2868 struct so_procinfo
*soprocinfo
)
2870 struct inpcb
*inp
= NULL
;
2873 bzero(soprocinfo
, sizeof (struct so_procinfo
));
2878 lck_rw_lock_shared(pcbinfo
->ipi_lock
);
2879 LIST_FOREACH(inp
, pcbinfo
->ipi_listhead
, inp_list
) {
2880 if (inp
->inp_state
!= INPCB_STATE_DEAD
&&
2881 inp
->inp_socket
!= NULL
&&
2882 inp
->inp_flowhash
== flowhash
) {
2884 inp_get_soprocinfo(inp
, soprocinfo
);
2888 lck_rw_done(pcbinfo
->ipi_lock
);
2893 #if CONFIG_PROC_UUID_POLICY
2895 inp_update_cellular_policy(struct inpcb
*inp
, boolean_t set
)
2897 struct socket
*so
= inp
->inp_socket
;
2901 VERIFY(inp
->inp_state
!= INPCB_STATE_DEAD
);
2903 before
= INP_NO_CELLULAR(inp
);
2905 inp_set_nocellular(inp
);
2907 inp_clear_nocellular(inp
);
2909 after
= INP_NO_CELLULAR(inp
);
2910 if (net_io_policy_log
&& (before
!= after
)) {
2911 static const char *ok
= "OK";
2912 static const char *nok
= "NOACCESS";
2913 uuid_string_t euuid_buf
;
2916 if (so
->so_flags
& SOF_DELEGATED
) {
2917 uuid_unparse(so
->e_uuid
, euuid_buf
);
2920 uuid_unparse(so
->last_uuid
, euuid_buf
);
2921 epid
= so
->last_pid
;
2924 /* allow this socket to generate another notification event */
2925 so
->so_ifdenied_notifies
= 0;
2927 log(LOG_DEBUG
, "%s: so 0x%llx [%d,%d] epid %d "
2928 "euuid %s%s %s->%s\n", __func__
,
2929 (uint64_t)VM_KERNEL_ADDRPERM(so
), SOCK_DOM(so
),
2930 SOCK_TYPE(so
), epid
, euuid_buf
,
2931 (so
->so_flags
& SOF_DELEGATED
) ?
2932 " [delegated]" : "",
2933 ((before
< after
) ? ok
: nok
),
2934 ((before
< after
) ? nok
: ok
));
2940 inp_update_necp_want_app_policy(struct inpcb
*inp
, boolean_t set
)
2942 struct socket
*so
= inp
->inp_socket
;
2946 VERIFY(inp
->inp_state
!= INPCB_STATE_DEAD
);
2948 before
= (inp
->inp_flags2
& INP2_WANT_APP_POLICY
);
2950 inp_set_want_app_policy(inp
);
2952 inp_clear_want_app_policy(inp
);
2954 after
= (inp
->inp_flags2
& INP2_WANT_APP_POLICY
);
2955 if (net_io_policy_log
&& (before
!= after
)) {
2956 static const char *wanted
= "WANTED";
2957 static const char *unwanted
= "UNWANTED";
2958 uuid_string_t euuid_buf
;
2961 if (so
->so_flags
& SOF_DELEGATED
) {
2962 uuid_unparse(so
->e_uuid
, euuid_buf
);
2965 uuid_unparse(so
->last_uuid
, euuid_buf
);
2966 epid
= so
->last_pid
;
2969 log(LOG_DEBUG
, "%s: so 0x%llx [%d,%d] epid %d "
2970 "euuid %s%s %s->%s\n", __func__
,
2971 (uint64_t)VM_KERNEL_ADDRPERM(so
), SOCK_DOM(so
),
2972 SOCK_TYPE(so
), epid
, euuid_buf
,
2973 (so
->so_flags
& SOF_DELEGATED
) ?
2974 " [delegated]" : "",
2975 ((before
< after
) ? unwanted
: wanted
),
2976 ((before
< after
) ? wanted
: unwanted
));
2980 #endif /* !CONFIG_PROC_UUID_POLICY */
2984 inp_update_necp_policy(struct inpcb
*inp
, struct sockaddr
*override_local_addr
, struct sockaddr
*override_remote_addr
, u_int override_bound_interface
)
2986 necp_socket_find_policy_match(inp
, override_local_addr
, override_remote_addr
, override_bound_interface
);
2987 if (necp_socket_should_rescope(inp
) &&
2988 inp
->inp_lport
== 0 &&
2989 inp
->inp_laddr
.s_addr
== INADDR_ANY
&&
2990 IN6_IS_ADDR_UNSPECIFIED(&inp
->in6p_laddr
)) {
2991 // If we should rescope, and the socket is not yet bound
2992 inp_bindif(inp
, necp_socket_get_rescope_if_index(inp
), NULL
);
2998 inp_update_policy(struct inpcb
*inp
)
3000 #if CONFIG_PROC_UUID_POLICY
3001 struct socket
*so
= inp
->inp_socket
;
3002 uint32_t pflags
= 0;
3006 if (!net_io_policy_uuid
||
3007 so
== NULL
|| inp
->inp_state
== INPCB_STATE_DEAD
)
3011 * Kernel-created sockets that aren't delegating other sockets
3012 * are currently exempted from UUID policy checks.
3014 if (so
->last_pid
== 0 && !(so
->so_flags
& SOF_DELEGATED
))
3017 ogencnt
= so
->so_policy_gencnt
;
3018 err
= proc_uuid_policy_lookup(((so
->so_flags
& SOF_DELEGATED
) ?
3019 so
->e_uuid
: so
->last_uuid
), &pflags
, &so
->so_policy_gencnt
);
3022 * Discard cached generation count if the entry is gone (ENOENT),
3023 * so that we go thru the checks below.
3025 if (err
== ENOENT
&& ogencnt
!= 0)
3026 so
->so_policy_gencnt
= 0;
3029 * If the generation count has changed, inspect the policy flags
3030 * and act accordingly. If a policy flag was previously set and
3031 * the UUID is no longer present in the table (ENOENT), treat it
3032 * as if the flag has been cleared.
3034 if ((err
== 0 || err
== ENOENT
) && ogencnt
!= so
->so_policy_gencnt
) {
3035 /* update cellular policy for this socket */
3036 if (err
== 0 && (pflags
& PROC_UUID_NO_CELLULAR
)) {
3037 inp_update_cellular_policy(inp
, TRUE
);
3038 } else if (!(pflags
& PROC_UUID_NO_CELLULAR
)) {
3039 inp_update_cellular_policy(inp
, FALSE
);
3042 /* update necp want app policy for this socket */
3043 if (err
== 0 && (pflags
& PROC_UUID_NECP_APP_POLICY
)) {
3044 inp_update_necp_want_app_policy(inp
, TRUE
);
3045 } else if (!(pflags
& PROC_UUID_NECP_APP_POLICY
)) {
3046 inp_update_necp_want_app_policy(inp
, FALSE
);
3051 return ((err
== ENOENT
) ? 0 : err
);
3052 #else /* !CONFIG_PROC_UUID_POLICY */
3055 #endif /* !CONFIG_PROC_UUID_POLICY */
3058 * Called when we need to enforce policy restrictions in the input path.
3060 * Returns TRUE if we're not allowed to receive data, otherwise FALSE.
3063 inp_restricted_recv(struct inpcb
*inp
, struct ifnet
*ifp
)
3065 VERIFY(inp
!= NULL
);
3068 * Inbound restrictions.
3070 if (!sorestrictrecv
)
3076 if (IFNET_IS_CELLULAR(ifp
) && INP_NO_CELLULAR(inp
))
3079 if (IFNET_IS_EXPENSIVE(ifp
) && INP_NO_EXPENSIVE(inp
))
3082 if (IFNET_IS_AWDL_RESTRICTED(ifp
) && !INP_AWDL_UNRESTRICTED(inp
))
3085 if (!(ifp
->if_eflags
& IFEF_RESTRICTED_RECV
))
3088 if (inp
->inp_flags
& INP_RECV_ANYIF
)
3091 if ((inp
->inp_flags
& INP_BOUND_IF
) && inp
->inp_boundifp
== ifp
)
3098 * Called when we need to enforce policy restrictions in the output path.
3100 * Returns TRUE if we're not allowed to send data out, otherwise FALSE.
3103 inp_restricted_send(struct inpcb
*inp
, struct ifnet
*ifp
)
3105 VERIFY(inp
!= NULL
);
3108 * Outbound restrictions.
3110 if (!sorestrictsend
)
3116 if (IFNET_IS_CELLULAR(ifp
) && INP_NO_CELLULAR(inp
))
3119 if (IFNET_IS_EXPENSIVE(ifp
) && INP_NO_EXPENSIVE(inp
))
3122 if (IFNET_IS_AWDL_RESTRICTED(ifp
) && !INP_AWDL_UNRESTRICTED(inp
))