2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1982, 1986, 1991, 1993, 1995
30 * The Regents of the University of California. All rights reserved.
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95
61 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/malloc.h>
68 #include <sys/domain.h>
69 #include <sys/protosw.h>
70 #include <sys/socket.h>
71 #include <sys/socketvar.h>
76 #include <sys/kernel.h>
77 #include <sys/sysctl.h>
78 #include <sys/mcache.h>
79 #include <sys/kauth.h>
81 #include <libkern/OSAtomic.h>
82 #include <kern/locks.h>
84 #include <machine/limits.h>
87 #include <kern/zalloc.h>
91 #include <net/if_types.h>
92 #include <net/route.h>
93 #include <net/flowhash.h>
94 #include <net/flowadv.h>
96 #include <netinet/in.h>
97 #include <netinet/in_pcb.h>
98 #include <netinet/in_var.h>
99 #include <netinet/ip_var.h>
101 #include <netinet/ip6.h>
102 #include <netinet6/ip6_var.h>
106 #include <netinet6/ipsec.h>
107 #include <netkey/key.h>
110 #include <sys/kdebug.h>
111 #include <sys/random.h>
112 #include <dev/random/randomdev.h>
115 extern int ipsec_bypass
;
118 #define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8))
119 #define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1))
121 struct in_addr zeroin_addr
;
124 * These configure the range of local port addresses assigned to
125 * "unspecified" outgoing connections/packets/whatever.
127 int ipport_lowfirstauto
= IPPORT_RESERVED
- 1; /* 1023 */
128 int ipport_lowlastauto
= IPPORT_RESERVEDSTART
; /* 600 */
130 int ipport_firstauto
= IPPORT_RESERVED
; /* 1024 */
131 int ipport_lastauto
= IPPORT_USERRESERVED
; /* 5000 */
133 int ipport_firstauto
= IPPORT_HIFIRSTAUTO
; /* 49152 */
134 int ipport_lastauto
= IPPORT_HILASTAUTO
; /* 65535 */
136 int ipport_hifirstauto
= IPPORT_HIFIRSTAUTO
; /* 49152 */
137 int ipport_hilastauto
= IPPORT_HILASTAUTO
; /* 65535 */
139 #define RANGECHK(var, min, max) \
140 if ((var) < (min)) { (var) = (min); } \
141 else if ((var) > (max)) { (var) = (max); }
144 sysctl_net_ipport_check SYSCTL_HANDLER_ARGS
146 #pragma unused(arg1, arg2)
147 int error
= sysctl_handle_int(oidp
,
148 oidp
->oid_arg1
, oidp
->oid_arg2
, req
);
150 RANGECHK(ipport_lowfirstauto
, 1, IPPORT_RESERVED
- 1);
151 RANGECHK(ipport_lowlastauto
, 1, IPPORT_RESERVED
- 1);
152 RANGECHK(ipport_firstauto
, IPPORT_RESERVED
, USHRT_MAX
);
153 RANGECHK(ipport_lastauto
, IPPORT_RESERVED
, USHRT_MAX
);
154 RANGECHK(ipport_hifirstauto
, IPPORT_RESERVED
, USHRT_MAX
);
155 RANGECHK(ipport_hilastauto
, IPPORT_RESERVED
, USHRT_MAX
);
162 SYSCTL_NODE(_net_inet_ip
, IPPROTO_IP
, portrange
, CTLFLAG_RW
|CTLFLAG_LOCKED
, 0, "IP Ports");
164 SYSCTL_PROC(_net_inet_ip_portrange
, OID_AUTO
, lowfirst
, CTLTYPE_INT
|CTLFLAG_RW
| CTLFLAG_LOCKED
,
165 &ipport_lowfirstauto
, 0, &sysctl_net_ipport_check
, "I", "");
166 SYSCTL_PROC(_net_inet_ip_portrange
, OID_AUTO
, lowlast
, CTLTYPE_INT
|CTLFLAG_RW
| CTLFLAG_LOCKED
,
167 &ipport_lowlastauto
, 0, &sysctl_net_ipport_check
, "I", "");
168 SYSCTL_PROC(_net_inet_ip_portrange
, OID_AUTO
, first
, CTLTYPE_INT
|CTLFLAG_RW
| CTLFLAG_LOCKED
,
169 &ipport_firstauto
, 0, &sysctl_net_ipport_check
, "I", "");
170 SYSCTL_PROC(_net_inet_ip_portrange
, OID_AUTO
, last
, CTLTYPE_INT
|CTLFLAG_RW
| CTLFLAG_LOCKED
,
171 &ipport_lastauto
, 0, &sysctl_net_ipport_check
, "I", "");
172 SYSCTL_PROC(_net_inet_ip_portrange
, OID_AUTO
, hifirst
, CTLTYPE_INT
|CTLFLAG_RW
| CTLFLAG_LOCKED
,
173 &ipport_hifirstauto
, 0, &sysctl_net_ipport_check
, "I", "");
174 SYSCTL_PROC(_net_inet_ip_portrange
, OID_AUTO
, hilast
, CTLTYPE_INT
|CTLFLAG_RW
| CTLFLAG_LOCKED
,
175 &ipport_hilastauto
, 0, &sysctl_net_ipport_check
, "I", "");
177 extern int udp_use_randomport
;
178 extern int tcp_use_randomport
;
180 /* Structs used for flowhash computation */
181 struct inp_flowhash_key_addr
{
191 struct inp_flowhash_key
{
192 struct inp_flowhash_key_addr infh_laddr
;
193 struct inp_flowhash_key_addr infh_faddr
;
194 u_int32_t infh_lport
;
195 u_int32_t infh_fport
;
197 u_int32_t infh_proto
;
198 u_int32_t infh_rand1
;
199 u_int32_t infh_rand2
;
202 u_int32_t inp_hash_seed
= 0;
204 static __inline
int infc_cmp(const struct inpcb
*,
205 const struct inpcb
*);
206 lck_grp_t
*inp_lck_grp
;
207 lck_grp_attr_t
*inp_lck_grp_attr
;
208 lck_attr_t
*inp_lck_attr
;
209 decl_lck_mtx_data(, inp_fc_lck
);
211 RB_HEAD(inp_fc_tree
, inpcb
) inp_fc_tree
;
212 RB_PROTOTYPE(inp_fc_tree
, inpcb
, infc_link
, infc_cmp
);
213 RB_GENERATE(inp_fc_tree
, inpcb
, infc_link
, infc_cmp
);
216 * Use this inp as a key to find an inp in the flowhash tree.
217 * Accesses to it are protected by inp_fc_lck.
219 struct inpcb key_inp
;
222 * in_pcb.c: manage the Protocol Control Blocks.
226 * Initialize data structures required to deliver
230 socket_flowadv_init(void)
232 inp_lck_grp_attr
= lck_grp_attr_alloc_init();
233 inp_lck_grp
= lck_grp_alloc_init("inp_lck_grp", inp_lck_grp_attr
);
235 inp_lck_attr
= lck_attr_alloc_init();
236 lck_mtx_init(&inp_fc_lck
, inp_lck_grp
, inp_lck_attr
);
238 lck_mtx_lock(&inp_fc_lck
);
239 RB_INIT(&inp_fc_tree
);
240 bzero(&key_inp
, sizeof(key_inp
));
241 lck_mtx_unlock(&inp_fc_lck
);
245 * Allocate a PCB and associate it with the socket.
250 * ipsec_init_policy:??? [IPSEC]
253 in_pcballoc(struct socket
*so
, struct inpcbinfo
*pcbinfo
, __unused
struct proc
*p
)
266 if (so
->cached_in_sock_layer
== 0) {
268 printf("PCBALLOC calling zalloc for socket %x\n", so
);
270 inp
= (struct inpcb
*) zalloc(pcbinfo
->ipi_zone
);
273 bzero((caddr_t
)inp
, sizeof(*inp
));
277 printf("PCBALLOC reusing PCB for socket %x\n", so
);
279 inp
= (struct inpcb
*)(void *)so
->so_saved_pcb
;
280 temp
= inp
->inp_saved_ppcb
;
281 bzero((caddr_t
) inp
, sizeof(*inp
));
282 inp
->inp_saved_ppcb
= temp
;
285 inp
->inp_gencnt
= ++pcbinfo
->ipi_gencnt
;
286 inp
->inp_pcbinfo
= pcbinfo
;
287 inp
->inp_socket
= so
;
289 mac_error
= mac_inpcb_label_init(inp
, M_WAITOK
);
290 if (mac_error
!= 0) {
291 if (so
->cached_in_sock_layer
== 0)
292 zfree(pcbinfo
->ipi_zone
, inp
);
295 mac_inpcb_label_associate(so
, inp
);
297 // make sure inp_stat is always 64bit aligned
298 inp
->inp_stat
= (struct inp_stat
*)P2ROUNDUP(inp
->inp_stat_store
, sizeof(u_int64_t
));
299 if (((uintptr_t)inp
->inp_stat
- (uintptr_t)inp
->inp_stat_store
)
300 + sizeof(*inp
->inp_stat
) > sizeof(inp
->inp_stat_store
)) {
301 panic("insufficient space to align inp_stat");
304 so
->so_pcb
= (caddr_t
)inp
;
306 if (so
->so_proto
->pr_flags
& PR_PCBLOCK
) {
307 lck_mtx_init(&inp
->inpcb_mtx
, pcbinfo
->mtx_grp
, pcbinfo
->mtx_attr
);
312 if (ipsec_bypass
== 0) {
313 error
= ipsec_init_policy(so
, &inp
->inp_sp
);
315 zfree(pcbinfo
->ipi_zone
, inp
);
322 if (INP_SOCKAF(so
) == AF_INET6
&& !ip6_mapped_addr_on
)
323 inp
->inp_flags
|= IN6P_IPV6_V6ONLY
;
327 if (ip6_auto_flowlabel
)
328 inp
->inp_flags
|= IN6P_AUTOFLOWLABEL
;
330 lck_rw_lock_exclusive(pcbinfo
->mtx
);
331 inp
->inp_gencnt
= ++pcbinfo
->ipi_gencnt
;
332 LIST_INSERT_HEAD(pcbinfo
->listhead
, inp
, inp_list
);
333 pcbinfo
->ipi_count
++;
334 lck_rw_done(pcbinfo
->mtx
);
340 in_pcblookup_local_and_cleanup does everything
341 in_pcblookup_local does but it checks for a socket
342 that's going away. Since we know that the lock is
343 held read+write when this funciton is called, we
344 can safely dispose of this socket like the slow
345 timer would usually do and return NULL. This is
349 in_pcblookup_local_and_cleanup(
350 struct inpcbinfo
*pcbinfo
,
351 struct in_addr laddr
,
357 /* Perform normal lookup */
358 inp
= in_pcblookup_local(pcbinfo
, laddr
, lport_arg
, wild_okay
);
360 /* Check if we found a match but it's waiting to be disposed */
361 if (inp
&& inp
->inp_wantcnt
== WNT_STOPUSING
) {
362 struct socket
*so
= inp
->inp_socket
;
364 lck_mtx_lock(&inp
->inpcb_mtx
);
366 if (so
->so_usecount
== 0) {
367 if (inp
->inp_state
!= INPCB_STATE_DEAD
)
373 lck_mtx_unlock(&inp
->inpcb_mtx
);
380 #ifdef __APPLE_API_PRIVATE
382 in_pcb_conflict_post_msg(u_int16_t port
)
385 * Radar 5523020 send a kernel event notification if a non-participating socket tries to bind
386 * the port a socket who has set SOF_NOTIFYCONFLICT owns.
388 struct kev_msg ev_msg
;
389 struct kev_in_portinuse in_portinuse
;
391 bzero(&in_portinuse
, sizeof(struct kev_in_portinuse
));
392 bzero(&ev_msg
, sizeof(struct kev_msg
));
393 in_portinuse
.port
= ntohs(port
); /* port in host order */
394 in_portinuse
.req_pid
= proc_selfpid();
395 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
396 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
397 ev_msg
.kev_subclass
= KEV_INET_SUBCLASS
;
398 ev_msg
.event_code
= KEV_INET_PORTINUSE
;
399 ev_msg
.dv
[0].data_ptr
= &in_portinuse
;
400 ev_msg
.dv
[0].data_length
= sizeof(struct kev_in_portinuse
);
401 ev_msg
.dv
[1].data_length
= 0;
402 kev_post_msg(&ev_msg
);
407 * EADDRNOTAVAIL Address not available.
408 * EINVAL Invalid argument
409 * EAFNOSUPPORT Address family not supported [notdef]
410 * EACCES Permission denied
411 * EADDRINUSE Address in use
412 * EAGAIN Resource unavailable, try again
413 * priv_check_cred:EPERM Operation not permitted
416 in_pcbbind(struct inpcb
*inp
, struct sockaddr
*nam
, struct proc
*p
)
418 struct socket
*so
= inp
->inp_socket
;
419 unsigned short *lastport
;
420 struct sockaddr_in
*sin
;
421 struct inpcbinfo
*pcbinfo
= inp
->inp_pcbinfo
;
422 u_short lport
= 0, rand_port
= 0;
423 int wild
= 0, reuseport
= (so
->so_options
& SO_REUSEPORT
);
424 int error
, randomport
, conflict
= 0;
427 if (TAILQ_EMPTY(&in_ifaddrhead
)) /* XXX broken! */
428 return (EADDRNOTAVAIL
);
429 if (inp
->inp_lport
|| inp
->inp_laddr
.s_addr
!= INADDR_ANY
)
431 if ((so
->so_options
& (SO_REUSEADDR
|SO_REUSEPORT
)) == 0)
433 socket_unlock(so
, 0); /* keep reference on socket */
434 lck_rw_lock_exclusive(pcbinfo
->mtx
);
436 struct ifnet
*outif
= NULL
;
438 sin
= (struct sockaddr_in
*)(void *)nam
;
439 if (nam
->sa_len
!= sizeof (*sin
)) {
440 lck_rw_done(pcbinfo
->mtx
);
446 * We should check the family, but old programs
447 * incorrectly fail to initialize it.
449 if (sin
->sin_family
!= AF_INET
) {
450 lck_rw_done(pcbinfo
->mtx
);
452 return (EAFNOSUPPORT
);
455 lport
= sin
->sin_port
;
456 if (IN_MULTICAST(ntohl(sin
->sin_addr
.s_addr
))) {
458 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
459 * allow complete duplication of binding if
460 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
461 * and a multicast address is bound on both
462 * new and duplicated sockets.
464 if (so
->so_options
& SO_REUSEADDR
)
465 reuseport
= SO_REUSEADDR
|SO_REUSEPORT
;
466 } else if (sin
->sin_addr
.s_addr
!= INADDR_ANY
) {
468 sin
->sin_port
= 0; /* yech... */
469 if ((ifa
= ifa_ifwithaddr((struct sockaddr
*)sin
)) == 0) {
470 lck_rw_done(pcbinfo
->mtx
);
472 return (EADDRNOTAVAIL
);
476 outif
= ifa
->ifa_ifp
;
486 if (ntohs(lport
) < IPPORT_RESERVED
) {
487 cred
= kauth_cred_proc_ref(p
);
488 error
= priv_check_cred(cred
, PRIV_NETINET_RESERVEDPORT
, 0);
489 kauth_cred_unref(&cred
);
491 lck_rw_done(pcbinfo
->mtx
);
497 if (kauth_cred_getuid(so
->so_cred
) &&
498 !IN_MULTICAST(ntohl(sin
->sin_addr
.s_addr
))) {
499 t
= in_pcblookup_local_and_cleanup(inp
->inp_pcbinfo
,
500 sin
->sin_addr
, lport
, INPLOOKUP_WILDCARD
);
502 (ntohl(sin
->sin_addr
.s_addr
) != INADDR_ANY
||
503 ntohl(t
->inp_laddr
.s_addr
) != INADDR_ANY
||
504 (t
->inp_socket
->so_options
&
505 SO_REUSEPORT
) == 0) &&
506 (kauth_cred_getuid(so
->so_cred
) !=
507 kauth_cred_getuid(t
->inp_socket
->so_cred
)) &&
508 ((t
->inp_socket
->so_flags
& SOF_REUSESHAREUID
) == 0) &&
509 (ntohl(sin
->sin_addr
.s_addr
) != INADDR_ANY
||
510 ntohl(t
->inp_laddr
.s_addr
) != INADDR_ANY
))
512 #ifdef __APPLE_API_PRIVATE
514 if ((t
->inp_socket
->so_flags
& SOF_NOTIFYCONFLICT
) && ((so
->so_flags
& SOF_NOTIFYCONFLICT
) == 0))
517 lck_rw_done(pcbinfo
->mtx
);
520 in_pcb_conflict_post_msg(lport
);
522 lck_rw_done(pcbinfo
->mtx
);
523 #endif /* __APPLE_API_PRIVATE */
529 t
= in_pcblookup_local_and_cleanup(pcbinfo
, sin
->sin_addr
,
532 (reuseport
& t
->inp_socket
->so_options
) == 0) {
534 if (ntohl(sin
->sin_addr
.s_addr
) !=
536 ntohl(t
->inp_laddr
.s_addr
) !=
538 INP_SOCKAF(so
) != AF_INET6
||
539 INP_SOCKAF(t
->inp_socket
) != AF_INET6
)
542 #ifdef __APPLE_API_PRIVATE
544 if ((t
->inp_socket
->so_flags
& SOF_NOTIFYCONFLICT
) && ((so
->so_flags
& SOF_NOTIFYCONFLICT
) == 0))
547 lck_rw_done(pcbinfo
->mtx
);
550 in_pcb_conflict_post_msg(lport
);
552 lck_rw_done(pcbinfo
->mtx
);
553 #endif /* __APPLE_API_PRIVATE */
559 inp
->inp_laddr
= sin
->sin_addr
;
560 inp
->inp_last_outifp
= outif
;
566 randomport
= (so
->so_flags
& SOF_BINDRANDOMPORT
) ||
567 (so
->so_type
== SOCK_STREAM
? tcp_use_randomport
: udp_use_randomport
);
569 inp
->inp_flags
|= INP_ANONPORT
;
571 if (inp
->inp_flags
& INP_HIGHPORT
) {
572 first
= ipport_hifirstauto
; /* sysctl */
573 last
= ipport_hilastauto
;
574 lastport
= &pcbinfo
->lasthi
;
575 } else if (inp
->inp_flags
& INP_LOWPORT
) {
576 cred
= kauth_cred_proc_ref(p
);
577 error
= priv_check_cred(cred
, PRIV_NETINET_RESERVEDPORT
, 0);
578 kauth_cred_unref(&cred
);
580 lck_rw_done(pcbinfo
->mtx
);
584 first
= ipport_lowfirstauto
; /* 1023 */
585 last
= ipport_lowlastauto
; /* 600 */
586 lastport
= &pcbinfo
->lastlow
;
588 first
= ipport_firstauto
; /* sysctl */
589 last
= ipport_lastauto
;
590 lastport
= &pcbinfo
->lastport
;
592 /* No point in randomizing if only one port is available */
597 * Simple check to ensure all ports are not used up causing
600 * We split the two cases (up and down) so that the direction
601 * is not being tested on each round of the loop.
608 read_random(&rand_port
, sizeof(rand_port
));
609 *lastport
= first
- (rand_port
% (first
- last
));
611 count
= first
- last
;
614 if (count
-- < 0) { /* completely used? */
615 lck_rw_done(pcbinfo
->mtx
);
617 inp
->inp_laddr
.s_addr
= INADDR_ANY
;
618 inp
->inp_last_outifp
= NULL
;
619 return (EADDRNOTAVAIL
);
622 if (*lastport
> first
|| *lastport
< last
)
624 lport
= htons(*lastport
);
625 } while (in_pcblookup_local_and_cleanup(pcbinfo
,
626 inp
->inp_laddr
, lport
, wild
));
632 read_random(&rand_port
, sizeof(rand_port
));
633 *lastport
= first
+ (rand_port
% (first
- last
));
635 count
= last
- first
;
638 if (count
-- < 0) { /* completely used? */
639 lck_rw_done(pcbinfo
->mtx
);
641 inp
->inp_laddr
.s_addr
= INADDR_ANY
;
642 inp
->inp_last_outifp
= NULL
;
643 return (EADDRNOTAVAIL
);
646 if (*lastport
< first
|| *lastport
> last
)
648 lport
= htons(*lastport
);
649 } while (in_pcblookup_local_and_cleanup(pcbinfo
,
650 inp
->inp_laddr
, lport
, wild
));
654 inp
->inp_lport
= lport
;
655 if (in_pcbinshash(inp
, 1) != 0) {
656 inp
->inp_laddr
.s_addr
= INADDR_ANY
;
658 inp
->inp_last_outifp
= NULL
;
659 lck_rw_done(pcbinfo
->mtx
);
662 lck_rw_done(pcbinfo
->mtx
);
663 sflt_notify(so
, sock_evt_bound
, NULL
);
668 * Transform old in_pcbconnect() into an inner subroutine for new
669 * in_pcbconnect(): Do some validity-checking on the remote
670 * address (in mbuf 'nam') and then determine local host address
671 * (i.e., which interface) to use to access that remote host.
673 * This preserves definition of in_pcbconnect(), while supporting a
674 * slightly different version for T/TCP. (This is more than
675 * a bit of a kludge, but cleaning up the internal interfaces would
676 * have forced minor changes in every protocol).
679 * EINVAL Invalid argument
680 * EAFNOSUPPORT Address family not supported
681 * EADDRNOTAVAIL Address not available
684 in_pcbladdr(struct inpcb
*inp
, struct sockaddr
*nam
,
685 struct sockaddr_in
*plocal_sin
, struct ifnet
**outif
)
687 struct in_ifaddr
*ia
;
688 struct sockaddr_in
*sin
= (struct sockaddr_in
*)(void *)nam
;
690 if (nam
->sa_len
!= sizeof (*sin
))
692 if (sin
->sin_family
!= AF_INET
)
693 return (EAFNOSUPPORT
);
694 if (sin
->sin_port
== 0)
695 return (EADDRNOTAVAIL
);
697 lck_rw_lock_shared(in_ifaddr_rwlock
);
698 if (!TAILQ_EMPTY(&in_ifaddrhead
)) {
699 ia
= TAILQ_FIRST(&in_ifaddrhead
);
701 * If the destination address is INADDR_ANY,
702 * use the primary local address.
703 * If the supplied address is INADDR_BROADCAST,
704 * and the primary interface supports broadcast,
705 * choose the broadcast address for that interface.
707 IFA_LOCK_SPIN(&ia
->ia_ifa
);
708 if (sin
->sin_addr
.s_addr
== INADDR_ANY
)
709 sin
->sin_addr
= IA_SIN(ia
)->sin_addr
;
710 else if (sin
->sin_addr
.s_addr
== (u_int32_t
)INADDR_BROADCAST
&&
711 (ia
->ia_ifp
->if_flags
& IFF_BROADCAST
))
712 sin
->sin_addr
= satosin(&ia
->ia_broadaddr
)->sin_addr
;
713 IFA_UNLOCK(&ia
->ia_ifa
);
716 lck_rw_done(in_ifaddr_rwlock
);
718 if (inp
->inp_laddr
.s_addr
== INADDR_ANY
) {
720 unsigned int ifscope
= IFSCOPE_NONE
;
723 * If the socket is bound to a specifc interface, the
724 * optional scoped takes precedence over that if it
725 * is set by the caller.
727 ia
= (struct in_ifaddr
*)0;
729 if (outif
!= NULL
&& *outif
!= NULL
)
730 ifscope
= (*outif
)->if_index
;
731 else if (inp
->inp_flags
& INP_BOUND_IF
)
732 ifscope
= inp
->inp_boundifp
->if_index
;
734 nocell
= (inp
->inp_flags
& INP_NO_IFT_CELLULAR
) ? 1 : 0;
736 * If route is known or can be allocated now,
737 * our src addr is taken from the i/f, else punt.
738 * Note that we should check the address family of the cached
739 * destination, in case of sharing the cache with IPv6.
741 ro
= &inp
->inp_route
;
742 if (ro
->ro_rt
!= NULL
)
743 RT_LOCK_SPIN(ro
->ro_rt
);
744 if (ro
->ro_rt
&& (ro
->ro_dst
.sa_family
!= AF_INET
||
745 satosin(&ro
->ro_dst
)->sin_addr
.s_addr
!=
746 sin
->sin_addr
.s_addr
||
747 inp
->inp_socket
->so_options
& SO_DONTROUTE
||
748 ro
->ro_rt
->generation_id
!= route_generation
)) {
749 RT_UNLOCK(ro
->ro_rt
);
753 if ((inp
->inp_socket
->so_options
& SO_DONTROUTE
) == 0 && /*XXX*/
754 (ro
->ro_rt
== NULL
|| ro
->ro_rt
->rt_ifp
== NULL
)) {
755 if (ro
->ro_rt
!= NULL
)
756 RT_UNLOCK(ro
->ro_rt
);
757 /* No route yet, so try to acquire one */
758 bzero(&ro
->ro_dst
, sizeof(struct sockaddr_in
));
759 ro
->ro_dst
.sa_family
= AF_INET
;
760 ro
->ro_dst
.sa_len
= sizeof(struct sockaddr_in
);
761 ((struct sockaddr_in
*)(void *)&ro
->ro_dst
)->sin_addr
=
763 rtalloc_scoped(ro
, ifscope
);
764 if (ro
->ro_rt
!= NULL
)
765 RT_LOCK_SPIN(ro
->ro_rt
);
768 * If the route points to a cellular interface and the
769 * caller forbids our using interfaces of such type,
770 * pretend that there is no route.
772 if (nocell
&& ro
->ro_rt
!= NULL
) {
773 RT_LOCK_ASSERT_HELD(ro
->ro_rt
);
774 if (ro
->ro_rt
->rt_ifp
->if_type
== IFT_CELLULAR
) {
775 RT_UNLOCK(ro
->ro_rt
);
778 soevent(inp
->inp_socket
,
779 (SO_FILT_HINT_LOCKED
|
780 SO_FILT_HINT_IFDENIED
));
784 * If we found a route, use the address
785 * corresponding to the outgoing interface
786 * unless it is the loopback (in case a route
787 * to our address on another net goes to loopback).
789 if (ro
->ro_rt
!= NULL
) {
790 /* Become a regular mutex */
791 RT_CONVERT_LOCK(ro
->ro_rt
);
792 if (!(ro
->ro_rt
->rt_ifp
->if_flags
& IFF_LOOPBACK
)) {
793 ia
= ifatoia(ro
->ro_rt
->rt_ifa
);
795 IFA_ADDREF(&ia
->ia_ifa
);
798 RT_UNLOCK(ro
->ro_rt
);
801 u_short fport
= sin
->sin_port
;
804 ia
= ifatoia(ifa_ifwithdstaddr(sintosa(sin
)));
806 ia
= ifatoia(ifa_ifwithnet_scoped(sintosa(sin
),
809 sin
->sin_port
= fport
;
811 lck_rw_lock_shared(in_ifaddr_rwlock
);
812 ia
= TAILQ_FIRST(&in_ifaddrhead
);
814 IFA_ADDREF(&ia
->ia_ifa
);
815 lck_rw_done(in_ifaddr_rwlock
);
818 * If the source address belongs to a cellular interface
819 * and the socket forbids our using interfaces of such
820 * type, pretend that there is no source address.
822 if (nocell
&& ia
!= NULL
&&
823 ia
->ia_ifa
.ifa_ifp
->if_type
== IFT_CELLULAR
) {
824 IFA_REMREF(&ia
->ia_ifa
);
826 soevent(inp
->inp_socket
,
827 (SO_FILT_HINT_LOCKED
|
828 SO_FILT_HINT_IFDENIED
));
831 return (EADDRNOTAVAIL
);
834 * If the destination address is multicast and an outgoing
835 * interface has been set as a multicast option, use the
836 * address of that interface as our source address.
838 if (IN_MULTICAST(ntohl(sin
->sin_addr
.s_addr
)) &&
839 inp
->inp_moptions
!= NULL
) {
840 struct ip_moptions
*imo
;
843 imo
= inp
->inp_moptions
;
845 if (imo
->imo_multicast_ifp
!= NULL
&& (ia
== NULL
||
846 ia
->ia_ifp
!= imo
->imo_multicast_ifp
)) {
847 ifp
= imo
->imo_multicast_ifp
;
849 IFA_REMREF(&ia
->ia_ifa
);
850 lck_rw_lock_shared(in_ifaddr_rwlock
);
851 TAILQ_FOREACH(ia
, &in_ifaddrhead
, ia_link
) {
852 if (ia
->ia_ifp
== ifp
)
856 IFA_ADDREF(&ia
->ia_ifa
);
857 lck_rw_done(in_ifaddr_rwlock
);
860 return (EADDRNOTAVAIL
);
866 * Don't do pcblookup call here; return interface in plocal_sin
867 * and exit to caller, that will do the lookup.
869 IFA_LOCK_SPIN(&ia
->ia_ifa
);
870 *plocal_sin
= ia
->ia_addr
;
873 IFA_UNLOCK(&ia
->ia_ifa
);
874 IFA_REMREF(&ia
->ia_ifa
);
881 * Connect from a socket to a specified address.
882 * Both address and port must be specified in argument sin.
883 * If don't have a local address for this socket yet,
887 in_pcbconnect(struct inpcb
*inp
, struct sockaddr
*nam
, struct proc
*p
,
888 struct ifnet
**outif
)
890 struct sockaddr_in ifaddr
;
891 struct sockaddr_in
*sin
= (struct sockaddr_in
*)(void *)nam
;
896 * Call inner routine, to assign local interface address.
898 if ((error
= in_pcbladdr(inp
, nam
, &ifaddr
, outif
)) != 0)
901 socket_unlock(inp
->inp_socket
, 0);
902 pcb
= in_pcblookup_hash(inp
->inp_pcbinfo
, sin
->sin_addr
, sin
->sin_port
,
903 inp
->inp_laddr
.s_addr
? inp
->inp_laddr
: ifaddr
.sin_addr
,
904 inp
->inp_lport
, 0, NULL
);
905 socket_lock(inp
->inp_socket
, 0);
907 /* Check if the socket is still in a valid state. When we unlock this
908 * embryonic socket, it can get aborted if another thread is closing
909 * the listener (radar 7947600).
911 if ((inp
->inp_socket
->so_flags
& SOF_ABORTED
) != 0) {
916 in_pcb_checkstate(pcb
, WNT_RELEASE
, pcb
== inp
? 1 : 0);
919 if (inp
->inp_laddr
.s_addr
== INADDR_ANY
) {
920 if (inp
->inp_lport
== 0) {
921 error
= in_pcbbind(inp
, (struct sockaddr
*)0, p
);
925 if (!lck_rw_try_lock_exclusive(inp
->inp_pcbinfo
->mtx
)) {
926 /*lock inversion issue, mostly with udp multicast packets */
927 socket_unlock(inp
->inp_socket
, 0);
928 lck_rw_lock_exclusive(inp
->inp_pcbinfo
->mtx
);
929 socket_lock(inp
->inp_socket
, 0);
931 inp
->inp_laddr
= ifaddr
.sin_addr
;
932 inp
->inp_last_outifp
= (outif
!= NULL
) ? *outif
: NULL
;
933 inp
->inp_flags
|= INP_INADDR_ANY
;
936 if (!lck_rw_try_lock_exclusive(inp
->inp_pcbinfo
->mtx
)) {
937 /*lock inversion issue, mostly with udp multicast packets */
938 socket_unlock(inp
->inp_socket
, 0);
939 lck_rw_lock_exclusive(inp
->inp_pcbinfo
->mtx
);
940 socket_lock(inp
->inp_socket
, 0);
943 inp
->inp_faddr
= sin
->sin_addr
;
944 inp
->inp_fport
= sin
->sin_port
;
946 lck_rw_done(inp
->inp_pcbinfo
->mtx
);
951 in_pcbdisconnect(struct inpcb
*inp
)
954 inp
->inp_faddr
.s_addr
= INADDR_ANY
;
957 if (!lck_rw_try_lock_exclusive(inp
->inp_pcbinfo
->mtx
)) {
958 /*lock inversion issue, mostly with udp multicast packets */
959 socket_unlock(inp
->inp_socket
, 0);
960 lck_rw_lock_exclusive(inp
->inp_pcbinfo
->mtx
);
961 socket_lock(inp
->inp_socket
, 0);
965 lck_rw_done(inp
->inp_pcbinfo
->mtx
);
967 if (inp
->inp_socket
->so_state
& SS_NOFDREF
)
972 in_pcbdetach(struct inpcb
*inp
)
974 struct socket
*so
= inp
->inp_socket
;
976 if (so
->so_pcb
== 0) { /* we've been called twice */
977 panic("in_pcbdetach: inp=%p so=%p proto=%d so_pcb is null!\n",
978 inp
, so
, so
->so_proto
->pr_protocol
);
982 if (ipsec_bypass
== 0) {
983 ipsec4_delete_pcbpolicy(inp
);
987 /* mark socket state as dead */
988 if (in_pcb_checkstate(inp
, WNT_STOPUSING
, 1) != WNT_STOPUSING
)
989 panic("in_pcbdetach so=%p prot=%x couldn't set to STOPUSING\n", so
, so
->so_proto
->pr_protocol
);
992 if (so
->cached_in_sock_layer
)
993 printf("in_pcbdetach for cached socket %x flags=%x\n", so
, so
->so_flags
);
995 printf("in_pcbdetach for allocated socket %x flags=%x\n", so
, so
->so_flags
);
997 if ((so
->so_flags
& SOF_PCBCLEARING
) == 0) {
999 struct ip_moptions
*imo
;
1002 if (inp
->inp_options
)
1003 (void)m_free(inp
->inp_options
);
1004 if ((rt
= inp
->inp_route
.ro_rt
) != NULL
) {
1005 inp
->inp_route
.ro_rt
= NULL
;
1008 imo
= inp
->inp_moptions
;
1009 inp
->inp_moptions
= NULL
;
1012 sofreelastref(so
, 0);
1013 inp
->inp_state
= INPCB_STATE_DEAD
;
1014 so
->so_flags
|= SOF_PCBCLEARING
; /* makes sure we're not called twice from so_close */
1020 in_pcbdispose(struct inpcb
*inp
)
1022 struct socket
*so
= inp
->inp_socket
;
1023 struct inpcbinfo
*ipi
= inp
->inp_pcbinfo
;
1026 if (inp
->inp_state
!= INPCB_STATE_DEAD
) {
1027 printf("in_pcbdispose: not dead yet? so=%p\n", so
);
1030 if (so
&& so
->so_usecount
!= 0)
1031 panic("%s: so %p so_usecount %d so_lockhistory %s\n",
1032 __func__
, so
, so
->so_usecount
,
1033 (so
!= NULL
) ? solockhistory_nr(so
) : "--");
1035 lck_rw_assert(ipi
->mtx
, LCK_RW_ASSERT_EXCLUSIVE
);
1037 inp
->inp_gencnt
= ++ipi
->ipi_gencnt
;
1038 /* access ipi in in_pcbremlists */
1039 in_pcbremlists(inp
);
1042 if (so
->so_proto
->pr_flags
& PR_PCBLOCK
) {
1043 sofreelastref(so
, 0);
1044 if (so
->so_rcv
.sb_cc
|| so
->so_snd
.sb_cc
) {
1046 printf("in_pcbdispose sb not cleaned up so=%p rc_cci=%x snd_cc=%x\n",
1047 so
, so
->so_rcv
.sb_cc
, so
->so_snd
.sb_cc
);
1049 sbrelease(&so
->so_rcv
);
1050 sbrelease(&so
->so_snd
);
1052 if (so
->so_head
!= NULL
)
1053 panic("in_pcbdispose, so=%p head still exist\n", so
);
1054 lck_mtx_unlock(&inp
->inpcb_mtx
);
1055 lck_mtx_destroy(&inp
->inpcb_mtx
, ipi
->mtx_grp
);
1057 so
->so_flags
|= SOF_PCBCLEARING
; /* makes sure we're not called twice from so_close */
1058 so
->so_saved_pcb
= (caddr_t
) inp
;
1060 inp
->inp_socket
= 0;
1062 mac_inpcb_label_destroy(inp
);
1065 * In case there a route cached after a detach (possible
1066 * in the tcp case), make sure that it is freed before
1067 * we deallocate the structure.
1069 if (inp
->inp_route
.ro_rt
!= NULL
) {
1070 rtfree(inp
->inp_route
.ro_rt
);
1071 inp
->inp_route
.ro_rt
= NULL
;
1073 if (so
->cached_in_sock_layer
== 0) {
1074 zfree(ipi
->ipi_zone
, inp
);
1080 printf("in_pcbdispose: no socket for inp=%p\n", inp
);
1085 * The calling convention of in_setsockaddr() and in_setpeeraddr() was
1086 * modified to match the pru_sockaddr() and pru_peeraddr() entry points
1087 * in struct pr_usrreqs, so that protocols can just reference then directly
1088 * without the need for a wrapper function. The socket must have a valid
1089 * (i.e., non-nil) PCB, but it should be impossible to get an invalid one
1090 * except through a kernel programming error, so it is acceptable to panic
1091 * (or in this case trap) if the PCB is invalid. (Actually, we don't trap
1092 * because there actually /is/ a programming error somewhere... XXX)
1094 * Returns: 0 Success
1095 * ENOBUFS No buffer space available
1096 * ECONNRESET Connection reset
1099 in_setsockaddr(struct socket
*so
, struct sockaddr
**nam
)
1102 struct sockaddr_in
*sin
;
1105 * Do the malloc first in case it blocks.
1107 MALLOC(sin
, struct sockaddr_in
*, sizeof *sin
, M_SONAME
, M_WAITOK
);
1110 bzero(sin
, sizeof *sin
);
1111 sin
->sin_family
= AF_INET
;
1112 sin
->sin_len
= sizeof(*sin
);
1114 inp
= sotoinpcb(so
);
1116 FREE(sin
, M_SONAME
);
1119 sin
->sin_port
= inp
->inp_lport
;
1120 sin
->sin_addr
= inp
->inp_laddr
;
1122 *nam
= (struct sockaddr
*)sin
;
1127 in_setpeeraddr(struct socket
*so
, struct sockaddr
**nam
)
1130 struct sockaddr_in
*sin
;
1133 * Do the malloc first in case it blocks.
1135 MALLOC(sin
, struct sockaddr_in
*, sizeof *sin
, M_SONAME
, M_WAITOK
);
1138 bzero((caddr_t
)sin
, sizeof (*sin
));
1139 sin
->sin_family
= AF_INET
;
1140 sin
->sin_len
= sizeof(*sin
);
1142 inp
= sotoinpcb(so
);
1144 FREE(sin
, M_SONAME
);
1147 sin
->sin_port
= inp
->inp_fport
;
1148 sin
->sin_addr
= inp
->inp_faddr
;
1150 *nam
= (struct sockaddr
*)sin
;
1155 in_pcbnotifyall(struct inpcbinfo
*pcbinfo
, struct in_addr faddr
,
1156 int errno
, void (*notify
)(struct inpcb
*, int))
1160 lck_rw_lock_shared(pcbinfo
->mtx
);
1162 LIST_FOREACH(inp
, pcbinfo
->listhead
, inp_list
) {
1164 if ((inp
->inp_vflag
& INP_IPV4
) == 0)
1167 if (inp
->inp_faddr
.s_addr
!= faddr
.s_addr
||
1168 inp
->inp_socket
== NULL
)
1170 if (in_pcb_checkstate(inp
, WNT_ACQUIRE
, 0) == WNT_STOPUSING
)
1172 socket_lock(inp
->inp_socket
, 1);
1173 (*notify
)(inp
, errno
);
1174 (void)in_pcb_checkstate(inp
, WNT_RELEASE
, 1);
1175 socket_unlock(inp
->inp_socket
, 1);
1177 lck_rw_done(pcbinfo
->mtx
);
1181 * Check for alternatives when higher level complains
1182 * about service problems. For now, invalidate cached
1183 * routing information. If the route was created dynamically
1184 * (by a redirect), time to try a default gateway again.
1187 in_losing(struct inpcb
*inp
)
1190 struct rt_addrinfo info
;
1192 if ((rt
= inp
->inp_route
.ro_rt
) != NULL
) {
1193 struct in_ifaddr
*ia
;
1195 bzero((caddr_t
)&info
, sizeof(info
));
1197 info
.rti_info
[RTAX_DST
] =
1198 (struct sockaddr
*)&inp
->inp_route
.ro_dst
;
1199 info
.rti_info
[RTAX_GATEWAY
] = rt
->rt_gateway
;
1200 info
.rti_info
[RTAX_NETMASK
] = rt_mask(rt
);
1201 rt_missmsg(RTM_LOSING
, &info
, rt
->rt_flags
, 0);
1202 if (rt
->rt_flags
& RTF_DYNAMIC
) {
1204 * Prevent another thread from modifying rt_key,
1205 * rt_gateway via rt_setgate() after rt_lock is
1206 * dropped by marking the route as defunct.
1208 rt
->rt_flags
|= RTF_CONDEMNED
;
1210 (void) rtrequest(RTM_DELETE
, rt_key(rt
),
1211 rt
->rt_gateway
, rt_mask(rt
), rt
->rt_flags
,
1212 (struct rtentry
**)0);
1216 /* if the address is gone keep the old route in the pcb */
1217 if ((ia
= ifa_foraddr(inp
->inp_laddr
.s_addr
)) != NULL
) {
1218 inp
->inp_route
.ro_rt
= NULL
;
1220 IFA_REMREF(&ia
->ia_ifa
);
1223 * A new route can be allocated
1224 * the next time output is attempted.
1230 * After a routing change, flush old routing
1231 * and allocate a (hopefully) better one.
1234 in_rtchange(struct inpcb
*inp
, __unused
int errno
)
1238 if ((rt
= inp
->inp_route
.ro_rt
) != NULL
) {
1239 struct in_ifaddr
*ia
;
1241 if ((ia
= ifa_foraddr(inp
->inp_laddr
.s_addr
)) == NULL
) {
1242 return; /* we can't remove the route now. not sure if still ok to use src */
1244 IFA_REMREF(&ia
->ia_ifa
);
1246 inp
->inp_route
.ro_rt
= NULL
;
1248 * A new route can be allocated the next time
1249 * output is attempted.
1255 * Lookup a PCB based on the local address and port.
1258 in_pcblookup_local(struct inpcbinfo
*pcbinfo
, struct in_addr laddr
,
1259 unsigned int lport_arg
, int wild_okay
)
1262 int matchwild
= 3, wildcard
;
1263 u_short lport
= lport_arg
;
1265 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP
| DBG_FUNC_START
, 0,0,0,0,0);
1268 struct inpcbhead
*head
;
1270 * Look for an unconnected (wildcard foreign addr) PCB that
1271 * matches the local address and port we're looking for.
1273 head
= &pcbinfo
->hashbase
[INP_PCBHASH(INADDR_ANY
, lport
, 0, pcbinfo
->hashmask
)];
1274 LIST_FOREACH(inp
, head
, inp_hash
) {
1276 if ((inp
->inp_vflag
& INP_IPV4
) == 0)
1279 if (inp
->inp_faddr
.s_addr
== INADDR_ANY
&&
1280 inp
->inp_laddr
.s_addr
== laddr
.s_addr
&&
1281 inp
->inp_lport
== lport
) {
1291 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP
| DBG_FUNC_END
, 0,0,0,0,0);
1294 struct inpcbporthead
*porthash
;
1295 struct inpcbport
*phd
;
1296 struct inpcb
*match
= NULL
;
1298 * Best fit PCB lookup.
1300 * First see if this local port is in use by looking on the
1303 porthash
= &pcbinfo
->porthashbase
[INP_PCBPORTHASH(lport
,
1304 pcbinfo
->porthashmask
)];
1305 LIST_FOREACH(phd
, porthash
, phd_hash
) {
1306 if (phd
->phd_port
== lport
)
1311 * Port is in use by one or more PCBs. Look for best
1314 LIST_FOREACH(inp
, &phd
->phd_pcblist
, inp_portlist
) {
1317 if ((inp
->inp_vflag
& INP_IPV4
) == 0)
1320 if (inp
->inp_faddr
.s_addr
!= INADDR_ANY
)
1322 if (inp
->inp_laddr
.s_addr
!= INADDR_ANY
) {
1323 if (laddr
.s_addr
== INADDR_ANY
)
1325 else if (inp
->inp_laddr
.s_addr
!= laddr
.s_addr
)
1328 if (laddr
.s_addr
!= INADDR_ANY
)
1331 if (wildcard
< matchwild
) {
1333 matchwild
= wildcard
;
1334 if (matchwild
== 0) {
1340 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP
| DBG_FUNC_END
, match
,0,0,0,0);
1346 * Check if PCB exists in hash list.
1349 in_pcblookup_hash_exists(
1350 struct inpcbinfo
*pcbinfo
,
1351 struct in_addr faddr
,
1353 struct in_addr laddr
,
1360 struct inpcbhead
*head
;
1362 u_short fport
= fport_arg
, lport
= lport_arg
;
1369 * We may have found the pcb in the last lookup - check this first.
1372 lck_rw_lock_shared(pcbinfo
->mtx
);
1375 * First look for an exact match.
1377 head
= &pcbinfo
->hashbase
[INP_PCBHASH(faddr
.s_addr
, lport
, fport
,
1378 pcbinfo
->hashmask
)];
1379 LIST_FOREACH(inp
, head
, inp_hash
) {
1381 if ((inp
->inp_vflag
& INP_IPV4
) == 0)
1384 if (ip_restrictrecvif
&& ifp
!= NULL
&&
1385 (ifp
->if_eflags
& IFEF_RESTRICTED_RECV
) &&
1386 !(inp
->inp_flags
& INP_RECV_ANYIF
))
1389 if (inp
->inp_faddr
.s_addr
== faddr
.s_addr
&&
1390 inp
->inp_laddr
.s_addr
== laddr
.s_addr
&&
1391 inp
->inp_fport
== fport
&&
1392 inp
->inp_lport
== lport
) {
1393 if ((found
= (inp
->inp_socket
!= NULL
))) {
1397 *uid
= kauth_cred_getuid(
1398 inp
->inp_socket
->so_cred
);
1399 *gid
= kauth_cred_getgid(
1400 inp
->inp_socket
->so_cred
);
1402 lck_rw_done(pcbinfo
->mtx
);
1407 struct inpcb
*local_wild
= NULL
;
1409 struct inpcb
*local_wild_mapped
= NULL
;
1412 head
= &pcbinfo
->hashbase
[INP_PCBHASH(INADDR_ANY
, lport
, 0,
1413 pcbinfo
->hashmask
)];
1414 LIST_FOREACH(inp
, head
, inp_hash
) {
1416 if ((inp
->inp_vflag
& INP_IPV4
) == 0)
1419 if (ip_restrictrecvif
&& ifp
!= NULL
&&
1420 (ifp
->if_eflags
& IFEF_RESTRICTED_RECV
) &&
1421 !(inp
->inp_flags
& INP_RECV_ANYIF
))
1424 if (inp
->inp_faddr
.s_addr
== INADDR_ANY
&&
1425 inp
->inp_lport
== lport
) {
1426 if (inp
->inp_laddr
.s_addr
== laddr
.s_addr
) {
1427 if ((found
= (inp
->inp_socket
!= NULL
))) {
1428 *uid
= kauth_cred_getuid(
1429 inp
->inp_socket
->so_cred
);
1430 *gid
= kauth_cred_getgid(
1431 inp
->inp_socket
->so_cred
);
1433 lck_rw_done(pcbinfo
->mtx
);
1436 else if (inp
->inp_laddr
.s_addr
== INADDR_ANY
) {
1438 if (inp
->inp_socket
&&
1439 INP_CHECK_SOCKAF(inp
->inp_socket
,
1441 local_wild_mapped
= inp
;
1448 if (local_wild
== NULL
) {
1450 if (local_wild_mapped
!= NULL
) {
1451 if ((found
= (local_wild_mapped
->inp_socket
!= NULL
))) {
1452 *uid
= kauth_cred_getuid(
1453 local_wild_mapped
->inp_socket
->so_cred
);
1454 *gid
= kauth_cred_getgid(
1455 local_wild_mapped
->inp_socket
->so_cred
);
1457 lck_rw_done(pcbinfo
->mtx
);
1461 lck_rw_done(pcbinfo
->mtx
);
1464 if (local_wild
!= NULL
) {
1465 if ((found
= (local_wild
->inp_socket
!= NULL
))) {
1466 *uid
= kauth_cred_getuid(
1467 local_wild
->inp_socket
->so_cred
);
1468 *gid
= kauth_cred_getgid(
1469 local_wild
->inp_socket
->so_cred
);
1471 lck_rw_done(pcbinfo
->mtx
);
1479 lck_rw_done(pcbinfo
->mtx
);
1484 * Lookup PCB in hash list.
1488 struct inpcbinfo
*pcbinfo
,
1489 struct in_addr faddr
,
1491 struct in_addr laddr
,
1496 struct inpcbhead
*head
;
1498 u_short fport
= fport_arg
, lport
= lport_arg
;
1501 * We may have found the pcb in the last lookup - check this first.
1504 lck_rw_lock_shared(pcbinfo
->mtx
);
1507 * First look for an exact match.
1509 head
= &pcbinfo
->hashbase
[INP_PCBHASH(faddr
.s_addr
, lport
, fport
, pcbinfo
->hashmask
)];
1510 LIST_FOREACH(inp
, head
, inp_hash
) {
1512 if ((inp
->inp_vflag
& INP_IPV4
) == 0)
1515 if (ip_restrictrecvif
&& ifp
!= NULL
&&
1516 (ifp
->if_eflags
& IFEF_RESTRICTED_RECV
) &&
1517 !(inp
->inp_flags
& INP_RECV_ANYIF
))
1520 if (inp
->inp_faddr
.s_addr
== faddr
.s_addr
&&
1521 inp
->inp_laddr
.s_addr
== laddr
.s_addr
&&
1522 inp
->inp_fport
== fport
&&
1523 inp
->inp_lport
== lport
) {
1527 if (in_pcb_checkstate(inp
, WNT_ACQUIRE
, 0) != WNT_STOPUSING
) {
1528 lck_rw_done(pcbinfo
->mtx
);
1531 else { /* it's there but dead, say it isn't found */
1532 lck_rw_done(pcbinfo
->mtx
);
1538 struct inpcb
*local_wild
= NULL
;
1540 struct inpcb
*local_wild_mapped
= NULL
;
1543 head
= &pcbinfo
->hashbase
[INP_PCBHASH(INADDR_ANY
, lport
, 0, pcbinfo
->hashmask
)];
1544 LIST_FOREACH(inp
, head
, inp_hash
) {
1546 if ((inp
->inp_vflag
& INP_IPV4
) == 0)
1549 if (ip_restrictrecvif
&& ifp
!= NULL
&&
1550 (ifp
->if_eflags
& IFEF_RESTRICTED_RECV
) &&
1551 !(inp
->inp_flags
& INP_RECV_ANYIF
))
1554 if (inp
->inp_faddr
.s_addr
== INADDR_ANY
&&
1555 inp
->inp_lport
== lport
) {
1556 if (inp
->inp_laddr
.s_addr
== laddr
.s_addr
) {
1557 if (in_pcb_checkstate(inp
, WNT_ACQUIRE
, 0) != WNT_STOPUSING
) {
1558 lck_rw_done(pcbinfo
->mtx
);
1561 else { /* it's there but dead, say it isn't found */
1562 lck_rw_done(pcbinfo
->mtx
);
1566 else if (inp
->inp_laddr
.s_addr
== INADDR_ANY
) {
1568 if (INP_CHECK_SOCKAF(inp
->inp_socket
,
1570 local_wild_mapped
= inp
;
1577 if (local_wild
== NULL
) {
1579 if (local_wild_mapped
!= NULL
) {
1580 if (in_pcb_checkstate(local_wild_mapped
, WNT_ACQUIRE
, 0) != WNT_STOPUSING
) {
1581 lck_rw_done(pcbinfo
->mtx
);
1582 return (local_wild_mapped
);
1584 else { /* it's there but dead, say it isn't found */
1585 lck_rw_done(pcbinfo
->mtx
);
1590 lck_rw_done(pcbinfo
->mtx
);
1593 if (in_pcb_checkstate(local_wild
, WNT_ACQUIRE
, 0) != WNT_STOPUSING
) {
1594 lck_rw_done(pcbinfo
->mtx
);
1595 return (local_wild
);
1597 else { /* it's there but dead, say it isn't found */
1598 lck_rw_done(pcbinfo
->mtx
);
1606 lck_rw_done(pcbinfo
->mtx
);
1611 * Insert PCB onto various hash lists.
1614 in_pcbinshash(struct inpcb
*inp
, int locked
)
1616 struct inpcbhead
*pcbhash
;
1617 struct inpcbporthead
*pcbporthash
;
1618 struct inpcbinfo
*pcbinfo
= inp
->inp_pcbinfo
;
1619 struct inpcbport
*phd
;
1620 u_int32_t hashkey_faddr
;
1623 if (!lck_rw_try_lock_exclusive(pcbinfo
->mtx
)) {
1624 /*lock inversion issue, mostly with udp multicast packets */
1625 socket_unlock(inp
->inp_socket
, 0);
1626 lck_rw_lock_exclusive(pcbinfo
->mtx
);
1627 socket_lock(inp
->inp_socket
, 0);
1628 if (inp
->inp_state
== INPCB_STATE_DEAD
) {
1629 /* The socket got dropped when it was unlocked */
1630 lck_rw_done(pcbinfo
->mtx
);
1631 return(ECONNABORTED
);
1637 if (inp
->inp_vflag
& INP_IPV6
)
1638 hashkey_faddr
= inp
->in6p_faddr
.s6_addr32
[3] /* XXX */;
1641 hashkey_faddr
= inp
->inp_faddr
.s_addr
;
1643 inp
->hash_element
= INP_PCBHASH(hashkey_faddr
, inp
->inp_lport
, inp
->inp_fport
, pcbinfo
->hashmask
);
1645 pcbhash
= &pcbinfo
->hashbase
[inp
->hash_element
];
1647 pcbporthash
= &pcbinfo
->porthashbase
[INP_PCBPORTHASH(inp
->inp_lport
,
1648 pcbinfo
->porthashmask
)];
1651 * Go through port list and look for a head for this lport.
1653 LIST_FOREACH(phd
, pcbporthash
, phd_hash
) {
1654 if (phd
->phd_port
== inp
->inp_lport
)
1658 VERIFY(inp
->inp_state
!= INPCB_STATE_DEAD
);
1661 * If none exists, malloc one and tack it on.
1664 MALLOC(phd
, struct inpcbport
*, sizeof(struct inpcbport
), M_PCB
, M_WAITOK
);
1667 lck_rw_done(pcbinfo
->mtx
);
1668 return (ENOBUFS
); /* XXX */
1670 phd
->phd_port
= inp
->inp_lport
;
1671 LIST_INIT(&phd
->phd_pcblist
);
1672 LIST_INSERT_HEAD(pcbporthash
, phd
, phd_hash
);
1675 LIST_INSERT_HEAD(&phd
->phd_pcblist
, inp
, inp_portlist
);
1676 LIST_INSERT_HEAD(pcbhash
, inp
, inp_hash
);
1678 lck_rw_done(pcbinfo
->mtx
);
1683 * Move PCB to the proper hash bucket when { faddr, fport } have been
1684 * changed. NOTE: This does not handle the case of the lport changing (the
1685 * hashed port list would have to be updated as well), so the lport must
1686 * not change after in_pcbinshash() has been called.
1689 in_pcbrehash(struct inpcb
*inp
)
1691 struct inpcbhead
*head
;
1692 u_int32_t hashkey_faddr
;
1695 if (inp
->inp_vflag
& INP_IPV6
)
1696 hashkey_faddr
= inp
->in6p_faddr
.s6_addr32
[3] /* XXX */;
1699 hashkey_faddr
= inp
->inp_faddr
.s_addr
;
1700 inp
->hash_element
= INP_PCBHASH(hashkey_faddr
, inp
->inp_lport
,
1701 inp
->inp_fport
, inp
->inp_pcbinfo
->hashmask
);
1702 head
= &inp
->inp_pcbinfo
->hashbase
[inp
->hash_element
];
1704 LIST_REMOVE(inp
, inp_hash
);
1705 LIST_INSERT_HEAD(head
, inp
, inp_hash
);
1709 * Remove PCB from various lists.
1710 * Must be called pcbinfo lock is held in exclusive mode.
1713 in_pcbremlists(struct inpcb
*inp
)
1715 inp
->inp_gencnt
= ++inp
->inp_pcbinfo
->ipi_gencnt
;
1717 if (inp
->inp_lport
) {
1718 struct inpcbport
*phd
= inp
->inp_phd
;
1720 LIST_REMOVE(inp
, inp_hash
);
1721 LIST_REMOVE(inp
, inp_portlist
);
1722 if (phd
!= NULL
&& (LIST_FIRST(&phd
->phd_pcblist
) == NULL
)) {
1723 LIST_REMOVE(phd
, phd_hash
);
1727 LIST_REMOVE(inp
, inp_list
);
1729 if (inp
->inp_flags2
& INP2_IN_FCTREE
) {
1730 inp_fc_getinp(inp
->inp_flowhash
,
1731 (INPFC_SOLOCKED
|INPFC_REMOVE
));
1732 VERIFY(!(inp
->inp_flags2
& INP2_IN_FCTREE
));
1734 inp
->inp_pcbinfo
->ipi_count
--;
1737 /* Mechanism used to defer the memory release of PCBs
1738 * The pcb list will contain the pcb until the ripper can clean it up if
1739 * the following conditions are met: 1) state "DEAD", 2) wantcnt is STOPUSING
1740 * 3) usecount is null
1741 * This function will be called to either mark the pcb as
1744 in_pcb_checkstate(struct inpcb
*pcb
, int mode
, int locked
)
1747 volatile UInt32
*wantcnt
= (volatile UInt32
*)&pcb
->inp_wantcnt
;
1753 case WNT_STOPUSING
: /* try to mark the pcb as ready for recycling */
1755 /* compareswap with STOPUSING, if success we're good, if it's in use, will be marked later */
1758 socket_lock(pcb
->inp_socket
, 1);
1759 pcb
->inp_state
= INPCB_STATE_DEAD
;
1762 if (pcb
->inp_socket
->so_usecount
< 0)
1763 panic("in_pcb_checkstate STOP pcb=%p so=%p usecount is negative\n", pcb
, pcb
->inp_socket
);
1765 socket_unlock(pcb
->inp_socket
, 1);
1767 origwant
= *wantcnt
;
1768 if ((UInt16
) origwant
== 0xffff ) /* should stop using */
1769 return (WNT_STOPUSING
);
1771 if ((UInt16
) origwant
== 0) {/* try to mark it as unsuable now */
1772 OSCompareAndSwap(origwant
, newwant
, wantcnt
) ;
1774 return (WNT_STOPUSING
);
1777 case WNT_ACQUIRE
: /* try to increase reference to pcb */
1778 /* if WNT_STOPUSING should bail out */
1780 * if socket state DEAD, try to set count to STOPUSING, return failed
1781 * otherwise increase cnt
1784 origwant
= *wantcnt
;
1785 if ((UInt16
) origwant
== 0xffff ) {/* should stop using */
1786 // printf("in_pcb_checkstate: ACQ PCB was STOPUSING while release. odd pcb=%p\n", pcb);
1787 return (WNT_STOPUSING
);
1789 newwant
= origwant
+ 1;
1790 } while (!OSCompareAndSwap(origwant
, newwant
, wantcnt
));
1791 return (WNT_ACQUIRE
);
1794 case WNT_RELEASE
: /* release reference. if result is null and pcb state is DEAD,
1795 set wanted bit to STOPUSING
1799 socket_lock(pcb
->inp_socket
, 1);
1802 origwant
= *wantcnt
;
1803 if ((UInt16
) origwant
== 0x0 )
1804 panic("in_pcb_checkstate pcb=%p release with zero count", pcb
);
1805 if ((UInt16
) origwant
== 0xffff ) {/* should stop using */
1807 printf("in_pcb_checkstate: REL PCB was STOPUSING while release. odd pcb=%p\n", pcb
);
1810 socket_unlock(pcb
->inp_socket
, 1);
1811 return (WNT_STOPUSING
);
1813 newwant
= origwant
- 1;
1814 } while (!OSCompareAndSwap(origwant
, newwant
, wantcnt
));
1816 if (pcb
->inp_state
== INPCB_STATE_DEAD
)
1818 if (pcb
->inp_socket
->so_usecount
< 0)
1819 panic("in_pcb_checkstate RELEASE pcb=%p so=%p usecount is negative\n", pcb
, pcb
->inp_socket
);
1822 socket_unlock(pcb
->inp_socket
, 1);
1823 return (WNT_RELEASE
);
1828 panic("in_pcb_checkstate: so=%p not a valid state =%x\n", pcb
->inp_socket
, mode
);
1836 * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat.
1837 * The inpcb_compat data structure is passed to user space and must
1838 * not change. We intentionally avoid copying pointers.
1843 struct inpcb_compat
*inp_compat
)
1845 bzero(inp_compat
, sizeof(*inp_compat
));
1846 inp_compat
->inp_fport
= inp
->inp_fport
;
1847 inp_compat
->inp_lport
= inp
->inp_lport
;
1848 inp_compat
->nat_owner
= 0;
1849 inp_compat
->nat_cookie
= inp
->nat_cookie
;
1850 inp_compat
->inp_gencnt
= inp
->inp_gencnt
;
1851 inp_compat
->inp_flags
= inp
->inp_flags
;
1852 inp_compat
->inp_flow
= inp
->inp_flow
;
1853 inp_compat
->inp_vflag
= inp
->inp_vflag
;
1854 inp_compat
->inp_ip_ttl
= inp
->inp_ip_ttl
;
1855 inp_compat
->inp_ip_p
= inp
->inp_ip_p
;
1856 inp_compat
->inp_dependfaddr
.inp6_foreign
= inp
->inp_dependfaddr
.inp6_foreign
;
1857 inp_compat
->inp_dependladdr
.inp6_local
= inp
->inp_dependladdr
.inp6_local
;
1858 inp_compat
->inp_depend4
.inp4_ip_tos
= inp
->inp_depend4
.inp4_ip_tos
;
1859 inp_compat
->inp_depend6
.inp6_hlim
= inp
->inp_depend6
.inp6_hlim
;
1860 inp_compat
->inp_depend6
.inp6_cksum
= inp
->inp_depend6
.inp6_cksum
;
1861 inp_compat
->inp_depend6
.inp6_ifindex
= inp
->inp_depend6
.inp6_ifindex
;
1862 inp_compat
->inp_depend6
.inp6_hops
= inp
->inp_depend6
.inp6_hops
;
1865 #if !CONFIG_EMBEDDED
1870 struct xinpcb64
*xinp
)
1872 xinp
->inp_fport
= inp
->inp_fport
;
1873 xinp
->inp_lport
= inp
->inp_lport
;
1874 xinp
->inp_gencnt
= inp
->inp_gencnt
;
1875 xinp
->inp_flags
= inp
->inp_flags
;
1876 xinp
->inp_flow
= inp
->inp_flow
;
1877 xinp
->inp_vflag
= inp
->inp_vflag
;
1878 xinp
->inp_ip_ttl
= inp
->inp_ip_ttl
;
1879 xinp
->inp_ip_p
= inp
->inp_ip_p
;
1880 xinp
->inp_dependfaddr
.inp6_foreign
= inp
->inp_dependfaddr
.inp6_foreign
;
1881 xinp
->inp_dependladdr
.inp6_local
= inp
->inp_dependladdr
.inp6_local
;
1882 xinp
->inp_depend4
.inp4_ip_tos
= inp
->inp_depend4
.inp4_ip_tos
;
1883 xinp
->inp_depend6
.inp6_hlim
= inp
->inp_depend6
.inp6_hlim
;
1884 xinp
->inp_depend6
.inp6_cksum
= inp
->inp_depend6
.inp6_cksum
;
1885 xinp
->inp_depend6
.inp6_ifindex
= inp
->inp_depend6
.inp6_ifindex
;
1886 xinp
->inp_depend6
.inp6_hops
= inp
->inp_depend6
.inp6_hops
;
1889 #endif /* !CONFIG_EMBEDDED */
1893 * The following routines implement this scheme:
1895 * Callers of ip_output() that intend to cache the route in the inpcb pass
1896 * a local copy of the struct route to ip_output(). Using a local copy of
1897 * the cached route significantly simplifies things as IP no longer has to
1898 * worry about having exclusive access to the passed in struct route, since
1899 * it's defined in the caller's stack; in essence, this allows for a lock-
1900 * less operation when updating the struct route at the IP level and below,
1901 * whenever necessary. The scheme works as follows:
1903 * Prior to dropping the socket's lock and calling ip_output(), the caller
1904 * copies the struct route from the inpcb into its stack, and adds a reference
1905 * to the cached route entry, if there was any. The socket's lock is then
1906 * dropped and ip_output() is called with a pointer to the copy of struct
1907 * route defined on the stack (not to the one in the inpcb.)
1909 * Upon returning from ip_output(), the caller then acquires the socket's
1910 * lock and synchronizes the cache; if there is no route cached in the inpcb,
1911 * it copies the local copy of struct route (which may or may not contain any
1912 * route) back into the cache; otherwise, if the inpcb has a route cached in
1913 * it, the one in the local copy will be freed, if there's any. Trashing the
1914 * cached route in the inpcb can be avoided because ip_output() is single-
1915 * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized
1916 * by the socket/transport layer.)
1919 inp_route_copyout(struct inpcb
*inp
, struct route
*dst
)
1921 struct route
*src
= &inp
->inp_route
;
1923 lck_mtx_assert(&inp
->inpcb_mtx
, LCK_MTX_ASSERT_OWNED
);
1926 * If the route in the PCB is not for IPv4, blow it away;
1927 * this is possible in the case of IPv4-mapped address case.
1929 if (src
->ro_rt
!= NULL
&& rt_key(src
->ro_rt
)->sa_family
!= AF_INET
) {
1934 route_copyout(dst
, src
, sizeof(*dst
));
1938 inp_route_copyin(struct inpcb
*inp
, struct route
*src
)
1940 struct route
*dst
= &inp
->inp_route
;
1942 lck_mtx_assert(&inp
->inpcb_mtx
, LCK_MTX_ASSERT_OWNED
);
1944 /* Minor sanity check */
1945 if (src
->ro_rt
!= NULL
&& rt_key(src
->ro_rt
)->sa_family
!= AF_INET
)
1946 panic("%s: wrong or corrupted route: %p", __func__
, src
);
1948 route_copyin(src
, dst
, sizeof(*src
));
1952 * Handler for setting IP_FORCE_OUT_IFP/IP_BOUND_IF/IPV6_BOUND_IF socket option.
1955 inp_bindif(struct inpcb
*inp
, unsigned int ifscope
)
1957 struct ifnet
*ifp
= NULL
;
1959 ifnet_head_lock_shared();
1960 if ((ifscope
> (unsigned)if_index
) || (ifscope
!= IFSCOPE_NONE
&&
1961 (ifp
= ifindex2ifnet
[ifscope
]) == NULL
)) {
1967 VERIFY(ifp
!= NULL
|| ifscope
== IFSCOPE_NONE
);
1970 * A zero interface scope value indicates an "unbind".
1971 * Otherwise, take in whatever value the app desires;
1972 * the app may already know the scope (or force itself
1973 * to such a scope) ahead of time before the interface
1974 * gets attached. It doesn't matter either way; any
1975 * route lookup from this point on will require an
1976 * exact match for the embedded interface scope.
1978 inp
->inp_boundifp
= ifp
;
1979 if (inp
->inp_boundifp
== NULL
)
1980 inp
->inp_flags
&= ~INP_BOUND_IF
;
1982 inp
->inp_flags
|= INP_BOUND_IF
;
1984 /* Blow away any cached route in the PCB */
1985 if (inp
->inp_route
.ro_rt
!= NULL
) {
1986 rtfree(inp
->inp_route
.ro_rt
);
1987 inp
->inp_route
.ro_rt
= NULL
;
1994 * Handler for setting IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option.
1997 inp_nocellular(struct inpcb
*inp
, unsigned int val
)
2000 inp
->inp_flags
|= INP_NO_IFT_CELLULAR
;
2001 } else if (inp
->inp_flags
& INP_NO_IFT_CELLULAR
) {
2002 /* once set, it cannot be unset */
2006 /* Blow away any cached route in the PCB */
2007 if (inp
->inp_route
.ro_rt
!= NULL
) {
2008 rtfree(inp
->inp_route
.ro_rt
);
2009 inp
->inp_route
.ro_rt
= NULL
;
2016 * Calculate flow hash for an inp, used by an interface to identify a
2017 * flow. When an interface provides flow control advisory, this flow
2018 * hash is used as an identifier.
2021 inp_calc_flowhash(struct inpcb
*inp
)
2023 struct inp_flowhash_key fh
__attribute__((aligned(8)));
2024 u_int32_t flowhash
= 0;
2025 struct inpcb
*tmp_inp
= NULL
;
2027 if (inp_hash_seed
== 0)
2028 inp_hash_seed
= RandomULong();
2030 bzero(&fh
, sizeof (fh
));
2032 bcopy(&inp
->inp_dependladdr
, &fh
.infh_laddr
, sizeof (fh
.infh_laddr
));
2033 bcopy(&inp
->inp_dependfaddr
, &fh
.infh_faddr
, sizeof (fh
.infh_faddr
));
2035 fh
.infh_lport
= inp
->inp_lport
;
2036 fh
.infh_fport
= inp
->inp_fport
;
2037 fh
.infh_af
= (inp
->inp_vflag
& INP_IPV6
) ? AF_INET6
: AF_INET
;
2038 fh
.infh_proto
= inp
->inp_ip_p
;
2039 fh
.infh_rand1
= RandomULong();
2040 fh
.infh_rand2
= RandomULong();
2043 flowhash
= net_flowhash(&fh
, sizeof (fh
), inp_hash_seed
);
2044 if (flowhash
== 0) {
2045 /* try to get a non-zero flowhash */
2046 inp_hash_seed
= RandomULong();
2050 inp
->inp_flowhash
= flowhash
;
2052 /* Insert the inp into inp_fc_tree */
2054 lck_mtx_lock(&inp_fc_lck
);
2055 tmp_inp
= RB_FIND(inp_fc_tree
, &inp_fc_tree
, inp
);
2056 if (tmp_inp
!= NULL
) {
2058 * There is a different inp with the same flowhash.
2059 * There can be a collision on flow hash but the
2060 * probability is low. Let's recompute the
2063 lck_mtx_unlock(&inp_fc_lck
);
2064 /* recompute hash seed */
2065 inp_hash_seed
= RandomULong();
2068 RB_INSERT(inp_fc_tree
, &inp_fc_tree
, inp
);
2069 inp
->inp_flags2
|= INP2_IN_FCTREE
;
2070 lck_mtx_unlock(&inp_fc_lck
);
2076 * Function to compare inp_fc_entries in inp flow control tree
2079 infc_cmp(const struct inpcb
*inp1
, const struct inpcb
*inp2
)
2081 return (memcmp(&(inp1
->inp_flowhash
), &(inp2
->inp_flowhash
),
2082 sizeof(inp1
->inp_flowhash
)));
2086 inp_fc_getinp(u_int32_t flowhash
, u_int32_t flags
)
2088 struct inpcb
*inp
= NULL
;
2089 int locked
= (flags
& INPFC_SOLOCKED
) ? 1 : 0;
2091 lck_mtx_lock_spin(&inp_fc_lck
);
2092 key_inp
.inp_flowhash
= flowhash
;
2093 inp
= RB_FIND(inp_fc_tree
, &inp_fc_tree
, &key_inp
);
2095 /* inp is not present, return */
2096 lck_mtx_unlock(&inp_fc_lck
);
2100 if (flags
& INPFC_REMOVE
) {
2101 RB_REMOVE(inp_fc_tree
, &inp_fc_tree
, inp
);
2102 lck_mtx_unlock(&inp_fc_lck
);
2104 bzero(&(inp
->infc_link
), sizeof (inp
->infc_link
));
2105 inp
->inp_flags2
&= ~INP2_IN_FCTREE
;
2108 if (in_pcb_checkstate(inp
, WNT_ACQUIRE
, locked
) == WNT_STOPUSING
)
2110 lck_mtx_unlock(&inp_fc_lck
);
2116 inp_fc_feedback(struct inpcb
*inp
)
2118 struct socket
*so
= inp
->inp_socket
;
2120 /* we already hold a want_cnt on this inp, socket can't be null */
2121 VERIFY (so
!= NULL
);
2124 if (in_pcb_checkstate(inp
, WNT_RELEASE
, 1) == WNT_STOPUSING
) {
2125 socket_unlock(so
, 1);
2130 * Return if the connection is not in flow-controlled state.
2131 * This can happen if the connection experienced
2132 * loss while it was in flow controlled state
2134 if (!INP_WAIT_FOR_IF_FEEDBACK(inp
)) {
2135 socket_unlock(so
, 1);
2138 inp_reset_fc_state(inp
);
2140 if (so
->so_proto
->pr_type
== SOCK_STREAM
)
2141 inp_fc_unthrottle_tcp(inp
);
2143 socket_unlock(so
, 1);
2147 inp_reset_fc_state(struct inpcb
*inp
)
2149 struct socket
*so
= inp
->inp_socket
;
2150 int suspended
= (INP_IS_FLOW_SUSPENDED(inp
)) ? 1 : 0;
2151 int needwakeup
= (INP_WAIT_FOR_IF_FEEDBACK(inp
)) ? 1 : 0;
2153 inp
->inp_flags
&= ~(INP_FLOW_CONTROLLED
| INP_FLOW_SUSPENDED
);
2156 so
->so_flags
&= ~(SOF_SUSPENDED
);
2157 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_RESUME
));
2160 if (inp
->inp_sndinprog_cnt
> 0)
2161 inp
->inp_flags
|= INP_FC_FEEDBACK
;
2163 /* Give a write wakeup to unblock the socket */
2169 inp_set_fc_state(struct inpcb
*inp
, int advcode
)
2171 struct inpcb
*tmp_inp
= NULL
;
2173 * If there was a feedback from the interface when
2174 * send operation was in progress, we should ignore
2175 * this flow advisory to avoid a race between setting
2176 * flow controlled state and receiving feedback from
2179 if (inp
->inp_flags
& INP_FC_FEEDBACK
)
2182 inp
->inp_flags
&= ~(INP_FLOW_CONTROLLED
| INP_FLOW_SUSPENDED
);
2183 if ((tmp_inp
= inp_fc_getinp(inp
->inp_flowhash
, INPFC_SOLOCKED
))
2185 if (in_pcb_checkstate(tmp_inp
, WNT_RELEASE
, 1)
2188 VERIFY(tmp_inp
== inp
);
2190 case FADV_FLOW_CONTROLLED
:
2191 inp
->inp_flags
|= INP_FLOW_CONTROLLED
;
2193 case FADV_SUSPENDED
:
2194 inp
->inp_flags
|= INP_FLOW_SUSPENDED
;
2195 soevent(inp
->inp_socket
,
2196 (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_SUSPEND
));
2198 /* Record the fact that suspend event was sent */
2199 inp
->inp_socket
->so_flags
|= SOF_SUSPENDED
;
2208 * Handler for SO_FLUSH socket option.
2211 inp_flush(struct inpcb
*inp
, int optval
)
2213 u_int32_t flowhash
= inp
->inp_flowhash
;
2216 /* Either all classes or one of the valid ones */
2217 if (optval
!= SO_TC_ALL
&& !SO_VALID_TC(optval
))
2220 /* We need a flow hash for identification */
2224 /* We need a cached route for the interface */
2225 if ((rt
= inp
->inp_route
.ro_rt
) != NULL
) {
2226 struct ifnet
*ifp
= rt
->rt_ifp
;
2227 if_qflush_sc(ifp
, so_tc2msc(optval
), flowhash
, NULL
, NULL
, 0);
2234 * Clear the INP_INADDR_ANY flag (special case for PPP only)
2236 void inp_clear_INP_INADDR_ANY(struct socket
*so
)
2238 struct inpcb
*inp
= NULL
;
2241 inp
= sotoinpcb(so
);
2243 inp
->inp_flags
&= ~INP_INADDR_ANY
;
2245 socket_unlock(so
, 1);