]> git.saurik.com Git - apple/xnu.git/blame - bsd/netinet/in_pcb.c
xnu-1699.22.81.tar.gz
[apple/xnu.git] / bsd / netinet / in_pcb.c
CommitLineData
1c79356b 1/*
6d2010ae 2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1982, 1986, 1991, 1993, 1995
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95
9bccf70c 61 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $
1c79356b
A
62 */
63
64#include <sys/param.h>
65#include <sys/systm.h>
66#include <sys/malloc.h>
67#include <sys/mbuf.h>
1c79356b 68#include <sys/domain.h>
1c79356b
A
69#include <sys/protosw.h>
70#include <sys/socket.h>
71#include <sys/socketvar.h>
72#include <sys/proc.h>
9bccf70c
A
73#ifndef __APPLE__
74#include <sys/jail.h>
75#endif
1c79356b
A
76#include <sys/kernel.h>
77#include <sys/sysctl.h>
6d2010ae
A
78#include <sys/mcache.h>
79#include <sys/kauth.h>
80#include <sys/priv.h>
91447636 81#include <libkern/OSAtomic.h>
1c79356b
A
82
83#include <machine/limits.h>
84
9bccf70c 85#ifdef __APPLE__
1c79356b
A
86#include <kern/zalloc.h>
87#endif
88
89#include <net/if.h>
1c79356b 90#include <net/if_types.h>
9bccf70c 91#include <net/route.h>
1c79356b
A
92
93#include <netinet/in.h>
94#include <netinet/in_pcb.h>
95#include <netinet/in_var.h>
96#include <netinet/ip_var.h>
97#if INET6
98#include <netinet/ip6.h>
99#include <netinet6/ip6_var.h>
100#endif /* INET6 */
101
102#include "faith.h"
103
104#if IPSEC
105#include <netinet6/ipsec.h>
106#include <netkey/key.h>
1c79356b
A
107#endif /* IPSEC */
108
109#include <sys/kdebug.h>
b0d623f7 110#include <sys/random.h>
1c79356b 111
9bccf70c
A
112#if IPSEC
113extern int ipsec_bypass;
114#endif
1c79356b
A
115
116#define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8))
117#define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1))
118
119struct in_addr zeroin_addr;
120
1c79356b
A
121/*
122 * These configure the range of local port addresses assigned to
123 * "unspecified" outgoing connections/packets/whatever.
124 */
9bccf70c
A
125int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */
126int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */
127#ifndef __APPLE__
128int ipport_firstauto = IPPORT_RESERVED; /* 1024 */
129int ipport_lastauto = IPPORT_USERRESERVED; /* 5000 */
130#else
131int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
132int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */
133#endif
134int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
135int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */
1c79356b
A
136
137#define RANGECHK(var, min, max) \
138 if ((var) < (min)) { (var) = (min); } \
139 else if ((var) > (max)) { (var) = (max); }
140
1c79356b
A
141static int
142sysctl_net_ipport_check SYSCTL_HANDLER_ARGS
143{
2d21ac55 144#pragma unused(arg1, arg2)
1c79356b
A
145 int error = sysctl_handle_int(oidp,
146 oidp->oid_arg1, oidp->oid_arg2, req);
147 if (!error) {
148 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
149 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
150 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX);
151 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX);
152 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX);
153 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX);
154 }
155 return error;
156}
157
158#undef RANGECHK
159
2d21ac55 160SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IP Ports");
1c79356b 161
6d2010ae 162SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
1c79356b 163 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
6d2010ae 164SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
1c79356b 165 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
6d2010ae 166SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
1c79356b 167 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
6d2010ae 168SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
1c79356b 169 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
6d2010ae 170SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
1c79356b 171 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
6d2010ae 172SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
1c79356b
A
173 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
174
b0d623f7
A
175extern int udp_use_randomport;
176extern int tcp_use_randomport;
177
1c79356b
A
178/*
179 * in_pcb.c: manage the Protocol Control Blocks.
180 *
181 * NOTE: It is assumed that most of these functions will be called at
182 * splnet(). XXX - There are, unfortunately, a few exceptions to this
183 * rule that should be fixed.
184 */
185
186/*
187 * Allocate a PCB and associate it with the socket.
2d21ac55
A
188 *
189 * Returns: 0 Success
190 * ENOBUFS
191 * ENOMEM
192 * ipsec_init_policy:??? [IPSEC]
1c79356b
A
193 */
194int
2d21ac55 195in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, __unused struct proc *p)
1c79356b 196{
2d21ac55 197 struct inpcb *inp;
1c79356b 198 caddr_t temp;
9bccf70c 199#if IPSEC
91447636 200#ifndef __APPLE__
9bccf70c 201 int error;
91447636 202#endif
2d21ac55
A
203#endif
204#if CONFIG_MACF_NET
205 int mac_error;
9bccf70c 206#endif
1c79356b
A
207
208 if (so->cached_in_sock_layer == 0) {
209#if TEMPDEBUG
210 printf("PCBALLOC calling zalloc for socket %x\n", so);
211#endif
212 inp = (struct inpcb *) zalloc(pcbinfo->ipi_zone);
213 if (inp == NULL)
214 return (ENOBUFS);
215 bzero((caddr_t)inp, sizeof(*inp));
216 }
217 else {
218#if TEMPDEBUG
219 printf("PCBALLOC reusing PCB for socket %x\n", so);
220#endif
221 inp = (struct inpcb *) so->so_saved_pcb;
222 temp = inp->inp_saved_ppcb;
223 bzero((caddr_t) inp, sizeof(*inp));
224 inp->inp_saved_ppcb = temp;
225 }
226
227 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
228 inp->inp_pcbinfo = pcbinfo;
229 inp->inp_socket = so;
2d21ac55
A
230#if CONFIG_MACF_NET
231 mac_error = mac_inpcb_label_init(inp, M_WAITOK);
232 if (mac_error != 0) {
233 if (so->cached_in_sock_layer == 0)
234 zfree(pcbinfo->ipi_zone, inp);
235 return (mac_error);
236 }
237 mac_inpcb_label_associate(so, inp);
238#endif
6d2010ae
A
239 // make sure inp_stat is always 64bit aligned
240 inp->inp_stat = (struct inp_stat*)P2ROUNDUP(inp->inp_stat_store, sizeof(u_int64_t));
241 if (((uintptr_t)inp->inp_stat - (uintptr_t)inp->inp_stat_store)
242 + sizeof(*inp->inp_stat) > sizeof(inp->inp_stat_store)) {
243 panic("insufficient space to align inp_stat");
244 }
245
91447636
A
246 so->so_pcb = (caddr_t)inp;
247
248 if (so->so_proto->pr_flags & PR_PCBLOCK) {
6d2010ae 249 lck_mtx_init(&inp->inpcb_mtx, pcbinfo->mtx_grp, pcbinfo->mtx_attr);
91447636
A
250 }
251
9bccf70c
A
252#if IPSEC
253#ifndef __APPLE__
254 if (ipsec_bypass == 0) {
255 error = ipsec_init_policy(so, &inp->inp_sp);
256 if (error != 0) {
91447636 257 zfree(pcbinfo->ipi_zone, inp);
9bccf70c
A
258 return error;
259 }
260 }
261#endif
262#endif /*IPSEC*/
2d21ac55 263#if INET6
9bccf70c
A
264 if (INP_SOCKAF(so) == AF_INET6 && !ip6_mapped_addr_on)
265 inp->inp_flags |= IN6P_IPV6_V6ONLY;
266#endif
91447636 267
9bccf70c
A
268#if INET6
269 if (ip6_auto_flowlabel)
270 inp->inp_flags |= IN6P_AUTOFLOWLABEL;
271#endif
91447636
A
272 lck_rw_lock_exclusive(pcbinfo->mtx);
273 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
274 LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list);
275 pcbinfo->ipi_count++;
276 lck_rw_done(pcbinfo->mtx);
1c79356b
A
277 return (0);
278}
279
2d21ac55
A
280
281/*
282 in_pcblookup_local_and_cleanup does everything
283 in_pcblookup_local does but it checks for a socket
284 that's going away. Since we know that the lock is
285 held read+write when this funciton is called, we
286 can safely dispose of this socket like the slow
287 timer would usually do and return NULL. This is
288 great for bind.
289*/
290struct inpcb*
291in_pcblookup_local_and_cleanup(
292 struct inpcbinfo *pcbinfo,
293 struct in_addr laddr,
294 u_int lport_arg,
295 int wild_okay)
296{
297 struct inpcb *inp;
298
299 /* Perform normal lookup */
300 inp = in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay);
301
302 /* Check if we found a match but it's waiting to be disposed */
303 if (inp && inp->inp_wantcnt == WNT_STOPUSING) {
304 struct socket *so = inp->inp_socket;
305
6d2010ae 306 lck_mtx_lock(&inp->inpcb_mtx);
2d21ac55
A
307
308 if (so->so_usecount == 0) {
b0d623f7
A
309 if (inp->inp_state != INPCB_STATE_DEAD)
310 in_pcbdetach(inp);
2d21ac55
A
311 in_pcbdispose(inp);
312 inp = NULL;
313 }
314 else {
6d2010ae 315 lck_mtx_unlock(&inp->inpcb_mtx);
2d21ac55
A
316 }
317 }
318
319 return inp;
320}
321
322#ifdef __APPLE_API_PRIVATE
c910b4d9 323static void
2d21ac55
A
324in_pcb_conflict_post_msg(u_int16_t port)
325{
326 /*
327 * Radar 5523020 send a kernel event notification if a non-participating socket tries to bind
328 * the port a socket who has set SOF_NOTIFYCONFLICT owns.
329 */
330 struct kev_msg ev_msg;
331 struct kev_in_portinuse in_portinuse;
332
6d2010ae
A
333 bzero(&in_portinuse, sizeof(struct kev_in_portinuse));
334 bzero(&ev_msg, sizeof(struct kev_msg));
2d21ac55
A
335 in_portinuse.port = ntohs(port); /* port in host order */
336 in_portinuse.req_pid = proc_selfpid();
337 ev_msg.vendor_code = KEV_VENDOR_APPLE;
338 ev_msg.kev_class = KEV_NETWORK_CLASS;
339 ev_msg.kev_subclass = KEV_INET_SUBCLASS;
340 ev_msg.event_code = KEV_INET_PORTINUSE;
341 ev_msg.dv[0].data_ptr = &in_portinuse;
342 ev_msg.dv[0].data_length = sizeof(struct kev_in_portinuse);
343 ev_msg.dv[1].data_length = 0;
344 kev_post_msg(&ev_msg);
345}
346#endif
347/*
348 * Returns: 0 Success
349 * EADDRNOTAVAIL Address not available.
350 * EINVAL Invalid argument
351 * EAFNOSUPPORT Address family not supported [notdef]
352 * EACCES Permission denied
353 * EADDRINUSE Address in use
354 * EAGAIN Resource unavailable, try again
6d2010ae 355 * priv_check_cred:EPERM Operation not permitted
2d21ac55 356 */
1c79356b 357int
2d21ac55 358in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
1c79356b 359{
2d21ac55 360 struct socket *so = inp->inp_socket;
9bccf70c 361 unsigned short *lastport;
1c79356b
A
362 struct sockaddr_in *sin;
363 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
b0d623f7 364 u_short lport = 0, rand_port = 0;
1c79356b 365 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
b0d623f7 366 int error, randomport, conflict = 0;
6d2010ae 367 kauth_cred_t cred;
1c79356b
A
368
369 if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */
370 return (EADDRNOTAVAIL);
371 if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY)
372 return (EINVAL);
373 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
374 wild = 1;
91447636
A
375 socket_unlock(so, 0); /* keep reference on socket */
376 lck_rw_lock_exclusive(pcbinfo->mtx);
1c79356b 377 if (nam) {
6d2010ae
A
378 unsigned int outif = 0;
379
1c79356b 380 sin = (struct sockaddr_in *)nam;
91447636
A
381 if (nam->sa_len != sizeof (*sin)) {
382 lck_rw_done(pcbinfo->mtx);
383 socket_lock(so, 0);
1c79356b 384 return (EINVAL);
91447636 385 }
1c79356b
A
386#ifdef notdef
387 /*
388 * We should check the family, but old programs
389 * incorrectly fail to initialize it.
390 */
91447636
A
391 if (sin->sin_family != AF_INET) {
392 lck_rw_done(pcbinfo->mtx);
393 socket_lock(so, 0);
1c79356b 394 return (EAFNOSUPPORT);
91447636 395 }
1c79356b
A
396#endif
397 lport = sin->sin_port;
398 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
399 /*
400 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
401 * allow complete duplication of binding if
402 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
403 * and a multicast address is bound on both
404 * new and duplicated sockets.
405 */
406 if (so->so_options & SO_REUSEADDR)
407 reuseport = SO_REUSEADDR|SO_REUSEPORT;
408 } else if (sin->sin_addr.s_addr != INADDR_ANY) {
91447636 409 struct ifaddr *ifa;
1c79356b 410 sin->sin_port = 0; /* yech... */
91447636
A
411 if ((ifa = ifa_ifwithaddr((struct sockaddr *)sin)) == 0) {
412 lck_rw_done(pcbinfo->mtx);
413 socket_lock(so, 0);
1c79356b 414 return (EADDRNOTAVAIL);
91447636
A
415 }
416 else {
6d2010ae
A
417 IFA_LOCK(ifa);
418 outif = ifa->ifa_ifp->if_index;
419 IFA_UNLOCK(ifa);
420 IFA_REMREF(ifa);
91447636 421 }
1c79356b
A
422 }
423 if (lport) {
424 struct inpcb *t;
425
426 /* GROSS */
b0d623f7 427#if !CONFIG_EMBEDDED
6d2010ae
A
428 if (ntohs(lport) < IPPORT_RESERVED) {
429 cred = kauth_cred_proc_ref(p);
430 error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0);
431 kauth_cred_unref(&cred);
432 if (error != 0) {
433 lck_rw_done(pcbinfo->mtx);
434 socket_lock(so, 0);
435 return (EACCES);
436 }
91447636 437 }
b0d623f7 438#endif
1c79356b
A
439 if (so->so_uid &&
440 !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
2d21ac55 441 t = in_pcblookup_local_and_cleanup(inp->inp_pcbinfo,
1c79356b
A
442 sin->sin_addr, lport, INPLOOKUP_WILDCARD);
443 if (t &&
444 (ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
445 ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
446 (t->inp_socket->so_options &
447 SO_REUSEPORT) == 0) &&
2d21ac55
A
448 (so->so_uid != t->inp_socket->so_uid) &&
449 ((t->inp_socket->so_flags & SOF_REUSESHAREUID) == 0)) {
1c79356b 450#if INET6
9bccf70c 451 if (ntohl(sin->sin_addr.s_addr) !=
1c79356b
A
452 INADDR_ANY ||
453 ntohl(t->inp_laddr.s_addr) !=
454 INADDR_ANY ||
455 INP_SOCKAF(so) ==
2d21ac55
A
456 INP_SOCKAF(t->inp_socket))
457#endif /* INET6 */
458 {
459#ifdef __APPLE_API_PRIVATE
460
461 if ((t->inp_socket->so_flags & SOF_NOTIFYCONFLICT) && ((so->so_flags & SOF_NOTIFYCONFLICT) == 0))
462 conflict = 1;
463
464 lck_rw_done(pcbinfo->mtx);
465
466 if (conflict)
467 in_pcb_conflict_post_msg(lport);
468#else
91447636 469 lck_rw_done(pcbinfo->mtx);
2d21ac55
A
470#endif /* __APPLE_API_PRIVATE */
471
91447636
A
472 socket_lock(so, 0);
473 return (EADDRINUSE);
474 }
1c79356b
A
475 }
476 }
2d21ac55 477 t = in_pcblookup_local_and_cleanup(pcbinfo, sin->sin_addr,
1c79356b
A
478 lport, wild);
479 if (t &&
480 (reuseport & t->inp_socket->so_options) == 0) {
481#if INET6
482 if (ip6_mapped_addr_on == 0 ||
483 ntohl(sin->sin_addr.s_addr) !=
484 INADDR_ANY ||
485 ntohl(t->inp_laddr.s_addr) !=
486 INADDR_ANY ||
487 INP_SOCKAF(so) ==
2d21ac55
A
488 INP_SOCKAF(t->inp_socket))
489#endif /* INET6 */
490 {
491#ifdef __APPLE_API_PRIVATE
492
493 if ((t->inp_socket->so_flags & SOF_NOTIFYCONFLICT) && ((so->so_flags & SOF_NOTIFYCONFLICT) == 0))
494 conflict = 1;
495
91447636 496 lck_rw_done(pcbinfo->mtx);
2d21ac55
A
497
498 if (conflict)
499 in_pcb_conflict_post_msg(lport);
500#else
501 lck_rw_done(pcbinfo->mtx);
502#endif /* __APPLE_API_PRIVATE */
91447636
A
503 socket_lock(so, 0);
504 return (EADDRINUSE);
505 }
1c79356b
A
506 }
507 }
508 inp->inp_laddr = sin->sin_addr;
6d2010ae 509 inp->inp_last_outif = outif;
1c79356b
A
510 }
511 if (lport == 0) {
512 u_short first, last;
513 int count;
514
b0d623f7
A
515 randomport = (so->so_flags & SOF_BINDRANDOMPORT) ||
516 (so->so_type == SOCK_STREAM ? tcp_use_randomport : udp_use_randomport);
517
1c79356b
A
518 inp->inp_flags |= INP_ANONPORT;
519
520 if (inp->inp_flags & INP_HIGHPORT) {
521 first = ipport_hifirstauto; /* sysctl */
522 last = ipport_hilastauto;
523 lastport = &pcbinfo->lasthi;
524 } else if (inp->inp_flags & INP_LOWPORT) {
6d2010ae
A
525 cred = kauth_cred_proc_ref(p);
526 error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0);
527 kauth_cred_unref(&cred);
528 if (error != 0) {
91447636
A
529 lck_rw_done(pcbinfo->mtx);
530 socket_lock(so, 0);
1c79356b 531 return error;
91447636 532 }
1c79356b
A
533 first = ipport_lowfirstauto; /* 1023 */
534 last = ipport_lowlastauto; /* 600 */
535 lastport = &pcbinfo->lastlow;
536 } else {
537 first = ipport_firstauto; /* sysctl */
538 last = ipport_lastauto;
539 lastport = &pcbinfo->lastport;
540 }
b0d623f7
A
541 /* No point in randomizing if only one port is available */
542
543 if (first == last)
544 randomport = 0;
1c79356b
A
545 /*
546 * Simple check to ensure all ports are not used up causing
547 * a deadlock here.
548 *
549 * We split the two cases (up and down) so that the direction
550 * is not being tested on each round of the loop.
551 */
552 if (first > last) {
553 /*
554 * counting down
555 */
b0d623f7
A
556 if (randomport) {
557 read_random(&rand_port, sizeof(rand_port));
558 *lastport = first - (rand_port % (first - last));
559 }
1c79356b
A
560 count = first - last;
561
562 do {
563 if (count-- < 0) { /* completely used? */
91447636
A
564 lck_rw_done(pcbinfo->mtx);
565 socket_lock(so, 0);
1c79356b 566 inp->inp_laddr.s_addr = INADDR_ANY;
6d2010ae 567 inp->inp_last_outif = 0;
9bccf70c 568 return (EADDRNOTAVAIL);
1c79356b
A
569 }
570 --*lastport;
571 if (*lastport > first || *lastport < last)
572 *lastport = first;
573 lport = htons(*lastport);
2d21ac55 574 } while (in_pcblookup_local_and_cleanup(pcbinfo,
1c79356b
A
575 inp->inp_laddr, lport, wild));
576 } else {
577 /*
578 * counting up
579 */
b0d623f7
A
580 if (randomport) {
581 read_random(&rand_port, sizeof(rand_port));
582 *lastport = first + (rand_port % (first - last));
583 }
1c79356b
A
584 count = last - first;
585
586 do {
587 if (count-- < 0) { /* completely used? */
91447636
A
588 lck_rw_done(pcbinfo->mtx);
589 socket_lock(so, 0);
1c79356b 590 inp->inp_laddr.s_addr = INADDR_ANY;
6d2010ae 591 inp->inp_last_outif = 0;
9bccf70c 592 return (EADDRNOTAVAIL);
1c79356b
A
593 }
594 ++*lastport;
595 if (*lastport < first || *lastport > last)
596 *lastport = first;
597 lport = htons(*lastport);
2d21ac55 598 } while (in_pcblookup_local_and_cleanup(pcbinfo,
1c79356b
A
599 inp->inp_laddr, lport, wild));
600 }
601 }
91447636 602 socket_lock(so, 0);
1c79356b 603 inp->inp_lport = lport;
91447636 604 if (in_pcbinshash(inp, 1) != 0) {
1c79356b
A
605 inp->inp_laddr.s_addr = INADDR_ANY;
606 inp->inp_lport = 0;
6d2010ae 607 inp->inp_last_outif = 0;
91447636 608 lck_rw_done(pcbinfo->mtx);
1c79356b
A
609 return (EAGAIN);
610 }
91447636 611 lck_rw_done(pcbinfo->mtx);
2d21ac55 612 sflt_notify(so, sock_evt_bound, NULL);
1c79356b
A
613 return (0);
614}
615
616/*
617 * Transform old in_pcbconnect() into an inner subroutine for new
618 * in_pcbconnect(): Do some validity-checking on the remote
619 * address (in mbuf 'nam') and then determine local host address
620 * (i.e., which interface) to use to access that remote host.
621 *
622 * This preserves definition of in_pcbconnect(), while supporting a
623 * slightly different version for T/TCP. (This is more than
624 * a bit of a kludge, but cleaning up the internal interfaces would
625 * have forced minor changes in every protocol).
2d21ac55
A
626 *
627 * Returns: 0 Success
628 * EINVAL Invalid argument
629 * EAFNOSUPPORT Address family not supported
630 * EADDRNOTAVAIL Address not available
1c79356b 631 */
1c79356b 632int
2d21ac55 633in_pcbladdr(struct inpcb *inp, struct sockaddr *nam,
6d2010ae 634 struct sockaddr_in *plocal_sin, unsigned int *out_ifscope)
1c79356b
A
635{
636 struct in_ifaddr *ia;
2d21ac55 637 struct sockaddr_in *sin = (struct sockaddr_in *)nam;
1c79356b
A
638
639 if (nam->sa_len != sizeof (*sin))
640 return (EINVAL);
641 if (sin->sin_family != AF_INET)
642 return (EAFNOSUPPORT);
643 if (sin->sin_port == 0)
644 return (EADDRNOTAVAIL);
b0d623f7
A
645
646 lck_rw_lock_shared(in_ifaddr_rwlock);
1c79356b 647 if (!TAILQ_EMPTY(&in_ifaddrhead)) {
6d2010ae 648 ia = TAILQ_FIRST(&in_ifaddrhead);
1c79356b
A
649 /*
650 * If the destination address is INADDR_ANY,
651 * use the primary local address.
652 * If the supplied address is INADDR_BROADCAST,
653 * and the primary interface supports broadcast,
654 * choose the broadcast address for that interface.
655 */
656#define satosin(sa) ((struct sockaddr_in *)(sa))
657#define sintosa(sin) ((struct sockaddr *)(sin))
658#define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
6d2010ae 659 IFA_LOCK_SPIN(&ia->ia_ifa);
1c79356b 660 if (sin->sin_addr.s_addr == INADDR_ANY)
6d2010ae 661 sin->sin_addr = IA_SIN(ia)->sin_addr;
b0d623f7 662 else if (sin->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST &&
6d2010ae
A
663 (ia->ia_ifp->if_flags & IFF_BROADCAST))
664 sin->sin_addr = satosin(&ia->ia_broadaddr)->sin_addr;
665 IFA_UNLOCK(&ia->ia_ifa);
666 ia = NULL;
1c79356b 667 }
b0d623f7
A
668 lck_rw_done(in_ifaddr_rwlock);
669
1c79356b 670 if (inp->inp_laddr.s_addr == INADDR_ANY) {
2d21ac55 671 struct route *ro;
6d2010ae
A
672 unsigned int ifscope = IFSCOPE_NONE;
673 unsigned int nocell;
674 /*
675 * If the socket is bound to a specifc interface, the
676 * optional scoped takes precedence over that if it
677 * is set by the caller.
678 */
1c79356b 679 ia = (struct in_ifaddr *)0;
6d2010ae
A
680
681 if (out_ifscope != NULL && *out_ifscope != IFSCOPE_NONE)
682 ifscope = *out_ifscope;
683 else if (inp->inp_flags & INP_BOUND_IF)
684 ifscope = inp->inp_boundif;
685
686 nocell = (inp->inp_flags & INP_NO_IFT_CELLULAR) ? 1 : 0;
1c79356b
A
687 /*
688 * If route is known or can be allocated now,
689 * our src addr is taken from the i/f, else punt.
55e303ae
A
690 * Note that we should check the address family of the cached
691 * destination, in case of sharing the cache with IPv6.
1c79356b
A
692 */
693 ro = &inp->inp_route;
b0d623f7
A
694 if (ro->ro_rt != NULL)
695 RT_LOCK_SPIN(ro->ro_rt);
696 if (ro->ro_rt && (ro->ro_dst.sa_family != AF_INET ||
697 satosin(&ro->ro_dst)->sin_addr.s_addr !=
698 sin->sin_addr.s_addr ||
699 inp->inp_socket->so_options & SO_DONTROUTE ||
55e303ae 700 ro->ro_rt->generation_id != route_generation)) {
b0d623f7
A
701 RT_UNLOCK(ro->ro_rt);
702 rtfree(ro->ro_rt);
703 ro->ro_rt = NULL;
1c79356b
A
704 }
705 if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/
b0d623f7
A
706 (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL)) {
707 if (ro->ro_rt != NULL)
708 RT_UNLOCK(ro->ro_rt);
1c79356b 709 /* No route yet, so try to acquire one */
55e303ae 710 bzero(&ro->ro_dst, sizeof(struct sockaddr_in));
1c79356b
A
711 ro->ro_dst.sa_family = AF_INET;
712 ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
713 ((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
714 sin->sin_addr;
6d2010ae 715 rtalloc_scoped(ro, ifscope);
b0d623f7
A
716 if (ro->ro_rt != NULL)
717 RT_LOCK_SPIN(ro->ro_rt);
1c79356b 718 }
6d2010ae
A
719 /*
720 * If the route points to a cellular interface and the
721 * caller forbids our using interfaces of such type,
722 * pretend that there is no route.
723 */
724 if (nocell && ro->ro_rt != NULL) {
725 RT_LOCK_ASSERT_HELD(ro->ro_rt);
726 if (ro->ro_rt->rt_ifp->if_type == IFT_CELLULAR) {
727 RT_UNLOCK(ro->ro_rt);
728 rtfree(ro->ro_rt);
729 ro->ro_rt = NULL;
730 }
731 }
1c79356b
A
732 /*
733 * If we found a route, use the address
734 * corresponding to the outgoing interface
735 * unless it is the loopback (in case a route
736 * to our address on another net goes to loopback).
737 */
b0d623f7 738 if (ro->ro_rt != NULL) {
6d2010ae
A
739 /* Become a regular mutex */
740 RT_CONVERT_LOCK(ro->ro_rt);
b0d623f7
A
741 if (!(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) {
742 ia = ifatoia(ro->ro_rt->rt_ifa);
6d2010ae
A
743 if (ia) {
744 IFA_ADDREF(&ia->ia_ifa);
745 }
b0d623f7
A
746 }
747 RT_UNLOCK(ro->ro_rt);
91447636 748 }
1c79356b
A
749 if (ia == 0) {
750 u_short fport = sin->sin_port;
751
752 sin->sin_port = 0;
753 ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin)));
91447636 754 if (ia == 0) {
c910b4d9
A
755 ia = ifatoia(ifa_ifwithnet_scoped(sintosa(sin),
756 ifscope));
91447636 757 }
1c79356b 758 sin->sin_port = fport;
91447636 759 if (ia == 0) {
b0d623f7 760 lck_rw_lock_shared(in_ifaddr_rwlock);
9bccf70c 761 ia = TAILQ_FIRST(&in_ifaddrhead);
91447636 762 if (ia)
6d2010ae 763 IFA_ADDREF(&ia->ia_ifa);
b0d623f7 764 lck_rw_done(in_ifaddr_rwlock);
91447636 765 }
6d2010ae
A
766 /*
767 * If the source address belongs to a cellular interface
768 * and the socket forbids our using interfaces of such
769 * type, pretend that there is no source address.
770 */
771 if (nocell && ia != NULL &&
772 ia->ia_ifa.ifa_ifp->if_type == IFT_CELLULAR) {
773 IFA_REMREF(&ia->ia_ifa);
774 ia = NULL;
775 }
b0d623f7 776 if (ia == 0)
1c79356b
A
777 return (EADDRNOTAVAIL);
778 }
779 /*
780 * If the destination address is multicast and an outgoing
781 * interface has been set as a multicast option, use the
782 * address of that interface as our source address.
783 */
784 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
785 inp->inp_moptions != NULL) {
786 struct ip_moptions *imo;
787 struct ifnet *ifp;
788
789 imo = inp->inp_moptions;
6d2010ae 790 IMO_LOCK(imo);
91447636
A
791 if (imo->imo_multicast_ifp != NULL && (ia == NULL ||
792 ia->ia_ifp != imo->imo_multicast_ifp)) {
1c79356b 793 ifp = imo->imo_multicast_ifp;
91447636 794 if (ia)
6d2010ae 795 IFA_REMREF(&ia->ia_ifa);
b0d623f7
A
796 lck_rw_lock_shared(in_ifaddr_rwlock);
797 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
1c79356b
A
798 if (ia->ia_ifp == ifp)
799 break;
91447636 800 }
b0d623f7 801 if (ia)
6d2010ae 802 IFA_ADDREF(&ia->ia_ifa);
b0d623f7 803 lck_rw_done(in_ifaddr_rwlock);
6d2010ae
A
804 if (ia == 0) {
805 IMO_UNLOCK(imo);
b0d623f7 806 return (EADDRNOTAVAIL);
6d2010ae 807 }
1c79356b 808 }
6d2010ae 809 IMO_UNLOCK(imo);
1c79356b 810 }
91447636
A
811 /*
812 * Don't do pcblookup call here; return interface in plocal_sin
813 * and exit to caller, that will do the lookup.
814 */
6d2010ae
A
815 IFA_LOCK_SPIN(&ia->ia_ifa);
816 *plocal_sin = ia->ia_addr;
817 if (out_ifscope != NULL)
818 *out_ifscope = ia->ia_ifp->if_index;
819 IFA_UNLOCK(&ia->ia_ifa);
820 IFA_REMREF(&ia->ia_ifa);
1c79356b
A
821 }
822 return(0);
823}
824
825/*
826 * Outer subroutine:
827 * Connect from a socket to a specified address.
828 * Both address and port must be specified in argument sin.
829 * If don't have a local address for this socket yet,
830 * then pick one.
831 */
832int
6d2010ae 833in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p, unsigned int *ifscope)
1c79356b 834{
6d2010ae 835 struct sockaddr_in ifaddr;
9bccf70c 836 struct sockaddr_in *sin = (struct sockaddr_in *)nam;
91447636 837 struct inpcb *pcb;
1c79356b
A
838 int error;
839
840 /*
841 * Call inner routine, to assign local interface address.
842 */
6d2010ae 843 if ((error = in_pcbladdr(inp, nam, &ifaddr, ifscope)) != 0)
1c79356b
A
844 return(error);
845
91447636
A
846 socket_unlock(inp->inp_socket, 0);
847 pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
6d2010ae 848 inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr.sin_addr,
91447636
A
849 inp->inp_lport, 0, NULL);
850 socket_lock(inp->inp_socket, 0);
6d2010ae
A
851
852 /* Check if the socket is still in a valid state. When we unlock this
853 * embryonic socket, it can get aborted if another thread is closing
854 * the listener (radar 7947600).
855 */
856 if ((inp->inp_socket->so_flags & SOF_ABORTED) != 0) {
857 return ECONNREFUSED;
858 }
859
91447636 860 if (pcb != NULL) {
0b4c1975 861 in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0);
1c79356b
A
862 return (EADDRINUSE);
863 }
864 if (inp->inp_laddr.s_addr == INADDR_ANY) {
9bccf70c
A
865 if (inp->inp_lport == 0) {
866 error = in_pcbbind(inp, (struct sockaddr *)0, p);
867 if (error)
868 return (error);
869 }
91447636
A
870 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) {
871 /*lock inversion issue, mostly with udp multicast packets */
872 socket_unlock(inp->inp_socket, 0);
873 lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx);
874 socket_lock(inp->inp_socket, 0);
875 }
6d2010ae
A
876 inp->inp_laddr = ifaddr.sin_addr;
877 inp->inp_last_outif = ifscope ? *ifscope : IFSCOPE_NONE;
55e303ae 878 inp->inp_flags |= INP_INADDR_ANY;
91447636
A
879 }
880 else {
881 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) {
882 /*lock inversion issue, mostly with udp multicast packets */
883 socket_unlock(inp->inp_socket, 0);
884 lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx);
885 socket_lock(inp->inp_socket, 0);
886 }
1c79356b
A
887 }
888 inp->inp_faddr = sin->sin_addr;
889 inp->inp_fport = sin->sin_port;
890 in_pcbrehash(inp);
91447636 891 lck_rw_done(inp->inp_pcbinfo->mtx);
1c79356b
A
892 return (0);
893}
894
895void
2d21ac55 896in_pcbdisconnect(struct inpcb *inp)
1c79356b
A
897{
898
899 inp->inp_faddr.s_addr = INADDR_ANY;
900 inp->inp_fport = 0;
91447636
A
901
902 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) {
903 /*lock inversion issue, mostly with udp multicast packets */
904 socket_unlock(inp->inp_socket, 0);
905 lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx);
906 socket_lock(inp->inp_socket, 0);
907 }
908
1c79356b 909 in_pcbrehash(inp);
91447636
A
910 lck_rw_done(inp->inp_pcbinfo->mtx);
911
912 if (inp->inp_socket->so_state & SS_NOFDREF)
1c79356b
A
913 in_pcbdetach(inp);
914}
915
916void
2d21ac55 917in_pcbdetach(struct inpcb *inp)
1c79356b
A
918{
919 struct socket *so = inp->inp_socket;
1c79356b 920
91447636 921 if (so->so_pcb == 0) { /* we've been called twice */
2d21ac55 922 panic("in_pcbdetach: inp=%p so=%p proto=%d so_pcb is null!\n",
91447636
A
923 inp, so, so->so_proto->pr_protocol);
924 }
ab86ba33 925
1c79356b 926#if IPSEC
91447636 927 if (ipsec_bypass == 0) {
91447636 928 ipsec4_delete_pcbpolicy(inp);
91447636 929 }
1c79356b 930#endif /*IPSEC*/
91447636
A
931
932 /* mark socket state as dead */
933 if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING)
2d21ac55 934 panic("in_pcbdetach so=%p prot=%x couldn't set to STOPUSING\n", so, so->so_proto->pr_protocol);
1c79356b
A
935
936#if TEMPDEBUG
937 if (so->cached_in_sock_layer)
91447636 938 printf("in_pcbdetach for cached socket %x flags=%x\n", so, so->so_flags);
1c79356b 939 else
91447636 940 printf("in_pcbdetach for allocated socket %x flags=%x\n", so, so->so_flags);
1c79356b 941#endif
91447636 942 if ((so->so_flags & SOF_PCBCLEARING) == 0) {
2d21ac55 943 struct rtentry *rt;
6d2010ae 944 struct ip_moptions *imo;
2d21ac55 945
91447636
A
946 inp->inp_vflag = 0;
947 if (inp->inp_options)
948 (void)m_free(inp->inp_options);
2d21ac55
A
949 if ((rt = inp->inp_route.ro_rt) != NULL) {
950 inp->inp_route.ro_rt = NULL;
b0d623f7 951 rtfree(rt);
91447636 952 }
6d2010ae 953 imo = inp->inp_moptions;
91447636 954 inp->inp_moptions = NULL;
6d2010ae
A
955 if (imo != NULL)
956 IMO_REMREF(imo);
91447636
A
957 sofreelastref(so, 0);
958 inp->inp_state = INPCB_STATE_DEAD;
959 so->so_flags |= SOF_PCBCLEARING; /* makes sure we're not called twice from so_close */
960 }
961}
1c79356b 962
1c79356b 963
91447636 964void
2d21ac55 965in_pcbdispose(struct inpcb *inp)
91447636
A
966{
967 struct socket *so = inp->inp_socket;
968 struct inpcbinfo *ipi = inp->inp_pcbinfo;
969
970#if TEMPDEBUG
971 if (inp->inp_state != INPCB_STATE_DEAD) {
2d21ac55 972 printf("in_pcbdispose: not dead yet? so=%p\n", so);
91447636
A
973 }
974#endif
91447636 975 if (so && so->so_usecount != 0)
6d2010ae
A
976 panic("%s: so %p so_usecount %d so_lockhistory %s\n",
977 __func__, so, so->so_usecount,
978 (so != NULL) ? solockhistory_nr(so) : "--");
91447636 979
2d21ac55 980 lck_rw_assert(ipi->mtx, LCK_RW_ASSERT_EXCLUSIVE);
91447636
A
981
982 inp->inp_gencnt = ++ipi->ipi_gencnt;
983 /*### access ipi in in_pcbremlists */
984 in_pcbremlists(inp);
985
986 if (so) {
987 if (so->so_proto->pr_flags & PR_PCBLOCK) {
988 sofreelastref(so, 0);
989 if (so->so_rcv.sb_cc || so->so_snd.sb_cc) {
990#if TEMPDEBUG
2d21ac55 991 printf("in_pcbdispose sb not cleaned up so=%p rc_cci=%x snd_cc=%x\n",
91447636
A
992 so, so->so_rcv.sb_cc, so->so_snd.sb_cc);
993#endif
994 sbrelease(&so->so_rcv);
995 sbrelease(&so->so_snd);
996 }
997 if (so->so_head != NULL)
2d21ac55 998 panic("in_pcbdispose, so=%p head still exist\n", so);
6d2010ae
A
999 lck_mtx_unlock(&inp->inpcb_mtx);
1000 lck_mtx_destroy(&inp->inpcb_mtx, ipi->mtx_grp);
9bccf70c 1001 }
91447636 1002 so->so_flags |= SOF_PCBCLEARING; /* makes sure we're not called twice from so_close */
2d21ac55 1003 so->so_saved_pcb = (caddr_t) inp;
91447636
A
1004 so->so_pcb = 0;
1005 inp->inp_socket = 0;
2d21ac55
A
1006#if CONFIG_MACF_NET
1007 mac_inpcb_label_destroy(inp);
1008#endif
b0d623f7
A
1009 /*
1010 * In case there a route cached after a detach (possible
1011 * in the tcp case), make sure that it is freed before
1012 * we deallocate the structure.
1013 */
1014 if (inp->inp_route.ro_rt != NULL) {
1015 rtfree(inp->inp_route.ro_rt);
1016 inp->inp_route.ro_rt = NULL;
1017 }
91447636
A
1018 if (so->cached_in_sock_layer == 0) {
1019 zfree(ipi->ipi_zone, inp);
55e303ae 1020 }
91447636 1021 sodealloc(so);
9bccf70c 1022 }
91447636 1023#if TEMPDEBUG
1c79356b 1024 else
2d21ac55 1025 printf("in_pcbdispose: no socket for inp=%p\n", inp);
91447636 1026#endif
1c79356b
A
1027}
1028
1029/*
1030 * The calling convention of in_setsockaddr() and in_setpeeraddr() was
1031 * modified to match the pru_sockaddr() and pru_peeraddr() entry points
1032 * in struct pr_usrreqs, so that protocols can just reference then directly
1033 * without the need for a wrapper function. The socket must have a valid
1034 * (i.e., non-nil) PCB, but it should be impossible to get an invalid one
1035 * except through a kernel programming error, so it is acceptable to panic
1036 * (or in this case trap) if the PCB is invalid. (Actually, we don't trap
1037 * because there actually /is/ a programming error somewhere... XXX)
2d21ac55
A
1038 *
1039 * Returns: 0 Success
1040 * ENOBUFS No buffer space available
1041 * ECONNRESET Connection reset
1c79356b
A
1042 */
1043int
2d21ac55 1044in_setsockaddr(struct socket *so, struct sockaddr **nam)
1c79356b 1045{
2d21ac55
A
1046 struct inpcb *inp;
1047 struct sockaddr_in *sin;
1c79356b
A
1048
1049 /*
1050 * Do the malloc first in case it blocks.
1051 */
1052 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_WAITOK);
0b4e3aa0
A
1053 if (sin == NULL)
1054 return ENOBUFS;
1c79356b
A
1055 bzero(sin, sizeof *sin);
1056 sin->sin_family = AF_INET;
1057 sin->sin_len = sizeof(*sin);
1058
1c79356b
A
1059 inp = sotoinpcb(so);
1060 if (!inp) {
1c79356b 1061 FREE(sin, M_SONAME);
9bccf70c 1062 return ECONNRESET;
1c79356b
A
1063 }
1064 sin->sin_port = inp->inp_lport;
1065 sin->sin_addr = inp->inp_laddr;
1c79356b
A
1066
1067 *nam = (struct sockaddr *)sin;
1068 return 0;
1069}
1070
1071int
2d21ac55 1072in_setpeeraddr(struct socket *so, struct sockaddr **nam)
1c79356b 1073{
1c79356b 1074 struct inpcb *inp;
2d21ac55 1075 struct sockaddr_in *sin;
1c79356b
A
1076
1077 /*
1078 * Do the malloc first in case it blocks.
1079 */
1080 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_WAITOK);
0b4e3aa0
A
1081 if (sin == NULL)
1082 return ENOBUFS;
1c79356b
A
1083 bzero((caddr_t)sin, sizeof (*sin));
1084 sin->sin_family = AF_INET;
1085 sin->sin_len = sizeof(*sin);
1086
1c79356b
A
1087 inp = sotoinpcb(so);
1088 if (!inp) {
1c79356b 1089 FREE(sin, M_SONAME);
9bccf70c 1090 return ECONNRESET;
1c79356b
A
1091 }
1092 sin->sin_port = inp->inp_fport;
1093 sin->sin_addr = inp->inp_faddr;
1c79356b
A
1094
1095 *nam = (struct sockaddr *)sin;
1096 return 0;
1097}
1098
1c79356b 1099void
2d21ac55
A
1100in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr,
1101 int errno, void (*notify)(struct inpcb *, int))
1c79356b 1102{
91447636
A
1103 struct inpcb *inp;
1104
1105 lck_rw_lock_shared(pcbinfo->mtx);
1c79356b 1106
91447636 1107 LIST_FOREACH(inp, pcbinfo->listhead, inp_list) {
9bccf70c
A
1108#if INET6
1109 if ((inp->inp_vflag & INP_IPV4) == 0)
1c79356b 1110 continue;
9bccf70c 1111#endif
1c79356b 1112 if (inp->inp_faddr.s_addr != faddr.s_addr ||
9bccf70c
A
1113 inp->inp_socket == NULL)
1114 continue;
91447636
A
1115 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING)
1116 continue;
1117 socket_lock(inp->inp_socket, 1);
9bccf70c 1118 (*notify)(inp, errno);
91447636
A
1119 (void)in_pcb_checkstate(inp, WNT_RELEASE, 1);
1120 socket_unlock(inp->inp_socket, 1);
1c79356b 1121 }
91447636 1122 lck_rw_done(pcbinfo->mtx);
1c79356b
A
1123}
1124
1125/*
1126 * Check for alternatives when higher level complains
1127 * about service problems. For now, invalidate cached
1128 * routing information. If the route was created dynamically
1129 * (by a redirect), time to try a default gateway again.
1130 */
1131void
2d21ac55 1132in_losing(struct inpcb *inp)
1c79356b 1133{
2d21ac55 1134 struct rtentry *rt;
1c79356b
A
1135 struct rt_addrinfo info;
1136
b0d623f7
A
1137 if ((rt = inp->inp_route.ro_rt) != NULL) {
1138 struct in_ifaddr *ia;
1139
1c79356b 1140 bzero((caddr_t)&info, sizeof(info));
b0d623f7 1141 RT_LOCK(rt);
1c79356b
A
1142 info.rti_info[RTAX_DST] =
1143 (struct sockaddr *)&inp->inp_route.ro_dst;
1144 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1145 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1146 rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
b0d623f7
A
1147 if (rt->rt_flags & RTF_DYNAMIC) {
1148 /*
1149 * Prevent another thread from modifying rt_key,
1150 * rt_gateway via rt_setgate() after rt_lock is
1151 * dropped by marking the route as defunct.
1152 */
1153 rt->rt_flags |= RTF_CONDEMNED;
1154 RT_UNLOCK(rt);
1155 (void) rtrequest(RTM_DELETE, rt_key(rt),
1c79356b
A
1156 rt->rt_gateway, rt_mask(rt), rt->rt_flags,
1157 (struct rtentry **)0);
b0d623f7
A
1158 } else {
1159 RT_UNLOCK(rt);
1160 }
2d21ac55 1161 /* if the address is gone keep the old route in the pcb */
b0d623f7
A
1162 if ((ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
1163 inp->inp_route.ro_rt = NULL;
1164 rtfree(rt);
6d2010ae 1165 IFA_REMREF(&ia->ia_ifa);
2d21ac55 1166 }
1c79356b
A
1167 /*
1168 * A new route can be allocated
1169 * the next time output is attempted.
1170 */
1c79356b
A
1171 }
1172}
1173
1174/*
1175 * After a routing change, flush old routing
1176 * and allocate a (hopefully) better one.
1177 */
9bccf70c 1178void
2d21ac55 1179in_rtchange(struct inpcb *inp, __unused int errno)
1c79356b 1180{
2d21ac55
A
1181 struct rtentry *rt;
1182
1183 if ((rt = inp->inp_route.ro_rt) != NULL) {
b0d623f7
A
1184 struct in_ifaddr *ia;
1185
1186 if ((ia = ifa_foraddr(inp->inp_laddr.s_addr)) == NULL) {
ab86ba33 1187 return; /* we can't remove the route now. not sure if still ok to use src */
2d21ac55 1188 }
6d2010ae 1189 IFA_REMREF(&ia->ia_ifa);
b0d623f7 1190 rtfree(rt);
2d21ac55 1191 inp->inp_route.ro_rt = NULL;
1c79356b
A
1192 /*
1193 * A new route can be allocated the next time
1194 * output is attempted.
1195 */
1196 }
1197}
1198
1199/*
1200 * Lookup a PCB based on the local address and port.
1201 */
1202struct inpcb *
2d21ac55
A
1203in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
1204 unsigned int lport_arg, int wild_okay)
1c79356b 1205{
2d21ac55 1206 struct inpcb *inp;
1c79356b
A
1207 int matchwild = 3, wildcard;
1208 u_short lport = lport_arg;
1209
1210 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0,0,0,0,0);
1211
1212 if (!wild_okay) {
1213 struct inpcbhead *head;
1214 /*
1215 * Look for an unconnected (wildcard foreign addr) PCB that
1216 * matches the local address and port we're looking for.
1217 */
1218 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
9bccf70c
A
1219 LIST_FOREACH(inp, head, inp_hash) {
1220#if INET6
1221 if ((inp->inp_vflag & INP_IPV4) == 0)
1c79356b 1222 continue;
9bccf70c 1223#endif
1c79356b
A
1224 if (inp->inp_faddr.s_addr == INADDR_ANY &&
1225 inp->inp_laddr.s_addr == laddr.s_addr &&
1226 inp->inp_lport == lport) {
1227 /*
1228 * Found.
1229 */
1230 return (inp);
1231 }
1232 }
1233 /*
1234 * Not found.
1235 */
1236 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0,0,0,0,0);
1237 return (NULL);
1238 } else {
1239 struct inpcbporthead *porthash;
1240 struct inpcbport *phd;
1241 struct inpcb *match = NULL;
1242 /*
1243 * Best fit PCB lookup.
1244 *
1245 * First see if this local port is in use by looking on the
1246 * port hash list.
1247 */
1248 porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport,
1249 pcbinfo->porthashmask)];
9bccf70c 1250 LIST_FOREACH(phd, porthash, phd_hash) {
1c79356b
A
1251 if (phd->phd_port == lport)
1252 break;
1253 }
1254 if (phd != NULL) {
1255 /*
1256 * Port is in use by one or more PCBs. Look for best
1257 * fit.
1258 */
9bccf70c 1259 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
1c79356b 1260 wildcard = 0;
9bccf70c
A
1261#if INET6
1262 if ((inp->inp_vflag & INP_IPV4) == 0)
1c79356b 1263 continue;
9bccf70c 1264#endif
1c79356b
A
1265 if (inp->inp_faddr.s_addr != INADDR_ANY)
1266 wildcard++;
1267 if (inp->inp_laddr.s_addr != INADDR_ANY) {
1268 if (laddr.s_addr == INADDR_ANY)
1269 wildcard++;
1270 else if (inp->inp_laddr.s_addr != laddr.s_addr)
1271 continue;
1272 } else {
1273 if (laddr.s_addr != INADDR_ANY)
1274 wildcard++;
1275 }
1276 if (wildcard < matchwild) {
1277 match = inp;
1278 matchwild = wildcard;
1279 if (matchwild == 0) {
1280 break;
1281 }
1282 }
1283 }
1284 }
1285 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match,0,0,0,0);
1286 return (match);
1287 }
1288}
1289
6d2010ae
A
1290/*
1291 * Check if PCB exists in hash list.
1292 */
1293int
1294in_pcblookup_hash_exists(
1295 struct inpcbinfo *pcbinfo,
1296 struct in_addr faddr,
1297 u_int fport_arg,
1298 struct in_addr laddr,
1299 u_int lport_arg,
1300 int wildcard,
1301 uid_t *uid,
1302 gid_t *gid,
1303 __unused struct ifnet *ifp)
1304{
1305 struct inpcbhead *head;
1306 struct inpcb *inp;
1307 u_short fport = fport_arg, lport = lport_arg;
1308 int found;
1309
1310 *uid = UID_MAX;
1311 *gid = GID_MAX;
1312
1313 /*
1314 * We may have found the pcb in the last lookup - check this first.
1315 */
1316
1317 lck_rw_lock_shared(pcbinfo->mtx);
1318
1319 /*
1320 * First look for an exact match.
1321 */
1322 head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
1323 pcbinfo->hashmask)];
1324 LIST_FOREACH(inp, head, inp_hash) {
1325#if INET6
1326 if ((inp->inp_vflag & INP_IPV4) == 0)
1327 continue;
1328#endif
1329 if (inp->inp_faddr.s_addr == faddr.s_addr &&
1330 inp->inp_laddr.s_addr == laddr.s_addr &&
1331 inp->inp_fport == fport &&
1332 inp->inp_lport == lport) {
1333 if ((found = (inp->inp_socket != NULL))) {
1334 /*
1335 * Found.
1336 */
1337 *uid = inp->inp_socket->so_uid;
1338 *gid = inp->inp_socket->so_gid;
1339 }
1340 lck_rw_done(pcbinfo->mtx);
1341 return (found);
1342 }
1343 }
1344 if (wildcard) {
1345 struct inpcb *local_wild = NULL;
1346#if INET6
1347 struct inpcb *local_wild_mapped = NULL;
1348#endif
1349
1350 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
1351 pcbinfo->hashmask)];
1352 LIST_FOREACH(inp, head, inp_hash) {
1353#if INET6
1354 if ((inp->inp_vflag & INP_IPV4) == 0)
1355 continue;
1356#endif
1357 if (inp->inp_faddr.s_addr == INADDR_ANY &&
1358 inp->inp_lport == lport) {
1359#if defined(NFAITH) && NFAITH > 0
1360 if (ifp && ifp->if_type == IFT_FAITH &&
1361 (inp->inp_flags & INP_FAITH) == 0)
1362 continue;
1363#endif
1364 if (inp->inp_laddr.s_addr == laddr.s_addr) {
1365 if ((found = (inp->inp_socket != NULL))) {
1366 *uid = inp->inp_socket->so_uid;
1367 *gid = inp->inp_socket->so_gid;
1368 }
1369 lck_rw_done(pcbinfo->mtx);
1370 return (found);
1371 }
1372 else if (inp->inp_laddr.s_addr == INADDR_ANY) {
1373#if INET6
1374 if (inp->inp_socket &&
1375 INP_CHECK_SOCKAF(inp->inp_socket,
1376 AF_INET6))
1377 local_wild_mapped = inp;
1378 else
1379#endif /* INET6 */
1380 local_wild = inp;
1381 }
1382 }
1383 }
1384 if (local_wild == NULL) {
1385#if INET6
1386 if (local_wild_mapped != NULL) {
1387 if ((found = (local_wild_mapped->inp_socket != NULL))) {
1388 *uid = local_wild_mapped->inp_socket->so_uid;
1389 *gid = local_wild_mapped->inp_socket->so_gid;
1390 }
1391 lck_rw_done(pcbinfo->mtx);
1392 return (found);
1393 }
1394#endif /* INET6 */
1395 lck_rw_done(pcbinfo->mtx);
1396 return (0);
1397 }
1398 if (local_wild != NULL) {
1399 if ((found = (local_wild->inp_socket != NULL))) {
1400 *uid = local_wild->inp_socket->so_uid;
1401 *gid = local_wild->inp_socket->so_gid;
1402 }
1403 lck_rw_done(pcbinfo->mtx);
1404 return (found);
1405 }
1406 }
1407
1408 /*
1409 * Not found.
1410 */
1411 lck_rw_done(pcbinfo->mtx);
1412 return (0);
1413}
1414
1c79356b
A
1415/*
1416 * Lookup PCB in hash list.
1417 */
1418struct inpcb *
91447636
A
1419in_pcblookup_hash(
1420 struct inpcbinfo *pcbinfo,
1421 struct in_addr faddr,
1422 u_int fport_arg,
1423 struct in_addr laddr,
1424 u_int lport_arg,
1425 int wildcard,
2d21ac55 1426 __unused struct ifnet *ifp)
1c79356b
A
1427{
1428 struct inpcbhead *head;
2d21ac55 1429 struct inpcb *inp;
1c79356b
A
1430 u_short fport = fport_arg, lport = lport_arg;
1431
1432 /*
1433 * We may have found the pcb in the last lookup - check this first.
1434 */
1435
91447636 1436 lck_rw_lock_shared(pcbinfo->mtx);
1c79356b
A
1437
1438 /*
1439 * First look for an exact match.
1440 */
1441 head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)];
9bccf70c
A
1442 LIST_FOREACH(inp, head, inp_hash) {
1443#if INET6
1444 if ((inp->inp_vflag & INP_IPV4) == 0)
1c79356b 1445 continue;
9bccf70c 1446#endif
1c79356b
A
1447 if (inp->inp_faddr.s_addr == faddr.s_addr &&
1448 inp->inp_laddr.s_addr == laddr.s_addr &&
1449 inp->inp_fport == fport &&
1450 inp->inp_lport == lport) {
1451 /*
1452 * Found.
1453 */
91447636
A
1454 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1455 lck_rw_done(pcbinfo->mtx);
1456 return (inp);
1457 }
1458 else { /* it's there but dead, say it isn't found */
1459 lck_rw_done(pcbinfo->mtx);
1460 return(NULL);
1461 }
1c79356b
A
1462 }
1463 }
1464 if (wildcard) {
1465 struct inpcb *local_wild = NULL;
1466#if INET6
1467 struct inpcb *local_wild_mapped = NULL;
1468#endif
1469
1470 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
9bccf70c
A
1471 LIST_FOREACH(inp, head, inp_hash) {
1472#if INET6
1473 if ((inp->inp_vflag & INP_IPV4) == 0)
1c79356b 1474 continue;
9bccf70c 1475#endif
1c79356b
A
1476 if (inp->inp_faddr.s_addr == INADDR_ANY &&
1477 inp->inp_lport == lport) {
1478#if defined(NFAITH) && NFAITH > 0
1479 if (ifp && ifp->if_type == IFT_FAITH &&
1480 (inp->inp_flags & INP_FAITH) == 0)
1481 continue;
1482#endif
91447636
A
1483 if (inp->inp_laddr.s_addr == laddr.s_addr) {
1484 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1485 lck_rw_done(pcbinfo->mtx);
1486 return (inp);
1487 }
1488 else { /* it's there but dead, say it isn't found */
1489 lck_rw_done(pcbinfo->mtx);
1490 return(NULL);
1491 }
1492 }
1c79356b 1493 else if (inp->inp_laddr.s_addr == INADDR_ANY) {
2d21ac55 1494#if INET6
1c79356b
A
1495 if (INP_CHECK_SOCKAF(inp->inp_socket,
1496 AF_INET6))
1497 local_wild_mapped = inp;
1498 else
2d21ac55 1499#endif /* INET6 */
1c79356b
A
1500 local_wild = inp;
1501 }
1502 }
1503 }
91447636 1504 if (local_wild == NULL) {
2d21ac55 1505#if INET6
91447636
A
1506 if (local_wild_mapped != NULL) {
1507 if (in_pcb_checkstate(local_wild_mapped, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1508 lck_rw_done(pcbinfo->mtx);
1509 return (local_wild_mapped);
1510 }
1511 else { /* it's there but dead, say it isn't found */
1512 lck_rw_done(pcbinfo->mtx);
1513 return(NULL);
1514 }
1515 }
2d21ac55 1516#endif /* INET6 */
91447636
A
1517 lck_rw_done(pcbinfo->mtx);
1518 return (NULL);
1519 }
91447636
A
1520 if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1521 lck_rw_done(pcbinfo->mtx);
1522 return (local_wild);
1523 }
1524 else { /* it's there but dead, say it isn't found */
1525 lck_rw_done(pcbinfo->mtx);
1526 return(NULL);
1527 }
1c79356b
A
1528 }
1529
1530 /*
1531 * Not found.
1532 */
91447636 1533 lck_rw_done(pcbinfo->mtx);
1c79356b
A
1534 return (NULL);
1535}
1536
1537/*
1538 * Insert PCB onto various hash lists.
1539 */
1540int
2d21ac55 1541in_pcbinshash(struct inpcb *inp, int locked)
1c79356b
A
1542{
1543 struct inpcbhead *pcbhash;
1544 struct inpcbporthead *pcbporthash;
1545 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
1546 struct inpcbport *phd;
1547 u_int32_t hashkey_faddr;
1548
b0d623f7
A
1549 if (!locked) {
1550 if (!lck_rw_try_lock_exclusive(pcbinfo->mtx)) {
6d2010ae 1551 /*lock inversion issue, mostly with udp multicast packets */
b0d623f7
A
1552 socket_unlock(inp->inp_socket, 0);
1553 lck_rw_lock_exclusive(pcbinfo->mtx);
1554 socket_lock(inp->inp_socket, 0);
6d2010ae
A
1555 if (inp->inp_state == INPCB_STATE_DEAD) {
1556 /* The socket got dropped when it was unlocked */
1557 lck_rw_done(pcbinfo->mtx);
1558 return(ECONNABORTED);
1559 }
b0d623f7
A
1560 }
1561 }
1562
1c79356b
A
1563#if INET6
1564 if (inp->inp_vflag & INP_IPV6)
1565 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
1566 else
1567#endif /* INET6 */
1568 hashkey_faddr = inp->inp_faddr.s_addr;
1569
91447636
A
1570 inp->hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport, inp->inp_fport, pcbinfo->hashmask);
1571
91447636 1572 pcbhash = &pcbinfo->hashbase[inp->hash_element];
1c79356b
A
1573
1574 pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport,
1575 pcbinfo->porthashmask)];
1576
1577 /*
1578 * Go through port list and look for a head for this lport.
1579 */
9bccf70c 1580 LIST_FOREACH(phd, pcbporthash, phd_hash) {
1c79356b
A
1581 if (phd->phd_port == inp->inp_lport)
1582 break;
1583 }
1584 /*
1585 * If none exists, malloc one and tack it on.
1586 */
1587 if (phd == NULL) {
0b4e3aa0 1588 MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_WAITOK);
1c79356b 1589 if (phd == NULL) {
91447636
A
1590 if (!locked)
1591 lck_rw_done(pcbinfo->mtx);
1c79356b
A
1592 return (ENOBUFS); /* XXX */
1593 }
1594 phd->phd_port = inp->inp_lport;
1595 LIST_INIT(&phd->phd_pcblist);
1596 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
1597 }
1598 inp->inp_phd = phd;
1599 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
1600 LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
91447636
A
1601 if (!locked)
1602 lck_rw_done(pcbinfo->mtx);
1c79356b
A
1603 return (0);
1604}
1605
1606/*
1607 * Move PCB to the proper hash bucket when { faddr, fport } have been
1608 * changed. NOTE: This does not handle the case of the lport changing (the
1609 * hashed port list would have to be updated as well), so the lport must
1610 * not change after in_pcbinshash() has been called.
1611 */
1612void
2d21ac55 1613in_pcbrehash(struct inpcb *inp)
1c79356b
A
1614{
1615 struct inpcbhead *head;
1616 u_int32_t hashkey_faddr;
1617
1618#if INET6
1619 if (inp->inp_vflag & INP_IPV6)
1620 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
1621 else
1622#endif /* INET6 */
1623 hashkey_faddr = inp->inp_faddr.s_addr;
91447636
A
1624 inp->hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
1625 inp->inp_fport, inp->inp_pcbinfo->hashmask);
1626 head = &inp->inp_pcbinfo->hashbase[inp->hash_element];
1c79356b
A
1627
1628 LIST_REMOVE(inp, inp_hash);
1629 LIST_INSERT_HEAD(head, inp, inp_hash);
1c79356b
A
1630}
1631
1632/*
1633 * Remove PCB from various lists.
1634 */
91447636 1635//###LOCK must be called with list lock held
1c79356b 1636void
2d21ac55 1637in_pcbremlists(struct inpcb *inp)
1c79356b
A
1638{
1639 inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
1c79356b
A
1640
1641 if (inp->inp_lport) {
1642 struct inpcbport *phd = inp->inp_phd;
1643
1644 LIST_REMOVE(inp, inp_hash);
1645 LIST_REMOVE(inp, inp_portlist);
55e303ae 1646 if (phd != NULL && (LIST_FIRST(&phd->phd_pcblist) == NULL)) {
1c79356b
A
1647 LIST_REMOVE(phd, phd_hash);
1648 FREE(phd, M_PCB);
1649 }
1650 }
1c79356b
A
1651 LIST_REMOVE(inp, inp_list);
1652 inp->inp_pcbinfo->ipi_count--;
1653}
1654
91447636
A
1655/* Mechanism used to defer the memory release of PCBs
1656 * The pcb list will contain the pcb until the ripper can clean it up if
1657 * the following conditions are met: 1) state "DEAD", 2) wantcnt is STOPUSING
1658 * 3) usecount is null
1659 * This function will be called to either mark the pcb as
1660*/
1661int
1662in_pcb_checkstate(struct inpcb *pcb, int mode, int locked)
91447636
A
1663{
1664
1665 volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt;
2d21ac55
A
1666 UInt32 origwant;
1667 UInt32 newwant;
91447636
A
1668
1669 switch (mode) {
1670
1671 case WNT_STOPUSING: /* try to mark the pcb as ready for recycling */
1672
1673 /* compareswap with STOPUSING, if success we're good, if it's in use, will be marked later */
1674
1675 if (locked == 0)
1676 socket_lock(pcb->inp_socket, 1);
1677 pcb->inp_state = INPCB_STATE_DEAD;
6d2010ae 1678
91447636
A
1679stopusing:
1680 if (pcb->inp_socket->so_usecount < 0)
2d21ac55 1681 panic("in_pcb_checkstate STOP pcb=%p so=%p usecount is negative\n", pcb, pcb->inp_socket);
91447636
A
1682 if (locked == 0)
1683 socket_unlock(pcb->inp_socket, 1);
1684
1685 origwant = *wantcnt;
1686 if ((UInt16) origwant == 0xffff ) /* should stop using */
1687 return (WNT_STOPUSING);
1688 newwant = 0xffff;
1689 if ((UInt16) origwant == 0) {/* try to mark it as unsuable now */
2d21ac55 1690 OSCompareAndSwap(origwant, newwant, wantcnt) ;
91447636
A
1691 }
1692 return (WNT_STOPUSING);
1693 break;
1694
1695 case WNT_ACQUIRE: /* try to increase reference to pcb */
1696 /* if WNT_STOPUSING should bail out */
1697 /*
1698 * if socket state DEAD, try to set count to STOPUSING, return failed
1699 * otherwise increase cnt
1700 */
1701 do {
1702 origwant = *wantcnt;
1703 if ((UInt16) origwant == 0xffff ) {/* should stop using */
2d21ac55 1704// printf("in_pcb_checkstate: ACQ PCB was STOPUSING while release. odd pcb=%p\n", pcb);
91447636
A
1705 return (WNT_STOPUSING);
1706 }
1707 newwant = origwant + 1;
2d21ac55 1708 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
91447636
A
1709 return (WNT_ACQUIRE);
1710 break;
1711
1712 case WNT_RELEASE: /* release reference. if result is null and pcb state is DEAD,
1713 set wanted bit to STOPUSING
1714 */
1715
1716 if (locked == 0)
1717 socket_lock(pcb->inp_socket, 1);
1718
1719 do {
1720 origwant = *wantcnt;
1721 if ((UInt16) origwant == 0x0 )
2d21ac55 1722 panic("in_pcb_checkstate pcb=%p release with zero count", pcb);
91447636
A
1723 if ((UInt16) origwant == 0xffff ) {/* should stop using */
1724#if TEMPDEBUG
2d21ac55 1725 printf("in_pcb_checkstate: REL PCB was STOPUSING while release. odd pcb=%p\n", pcb);
91447636
A
1726#endif
1727 if (locked == 0)
1728 socket_unlock(pcb->inp_socket, 1);
1729 return (WNT_STOPUSING);
1730 }
1731 newwant = origwant - 1;
2d21ac55 1732 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
91447636
A
1733
1734 if (pcb->inp_state == INPCB_STATE_DEAD)
1735 goto stopusing;
1736 if (pcb->inp_socket->so_usecount < 0)
2d21ac55 1737 panic("in_pcb_checkstate RELEASE pcb=%p so=%p usecount is negative\n", pcb, pcb->inp_socket);
91447636
A
1738
1739 if (locked == 0)
1740 socket_unlock(pcb->inp_socket, 1);
1741 return (WNT_RELEASE);
1742 break;
1743
1744 default:
1745
2d21ac55 1746 panic("in_pcb_checkstate: so=%p not a valid state =%x\n", pcb->inp_socket, mode);
91447636
A
1747 }
1748
1749 /* NOTREACHED */
1750 return (mode);
1751}
1752
1753/*
1754 * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat.
1755 * The inpcb_compat data structure is passed to user space and must
b0d623f7 1756 * not change. We intentionally avoid copying pointers.
91447636
A
1757 */
1758void
1759inpcb_to_compat(
1760 struct inpcb *inp,
1761 struct inpcb_compat *inp_compat)
1762{
1763 bzero(inp_compat, sizeof(*inp_compat));
1764 inp_compat->inp_fport = inp->inp_fport;
1765 inp_compat->inp_lport = inp->inp_lport;
91447636
A
1766 inp_compat->nat_owner = inp->nat_owner;
1767 inp_compat->nat_cookie = inp->nat_cookie;
1768 inp_compat->inp_gencnt = inp->inp_gencnt;
1769 inp_compat->inp_flags = inp->inp_flags;
1770 inp_compat->inp_flow = inp->inp_flow;
1771 inp_compat->inp_vflag = inp->inp_vflag;
1772 inp_compat->inp_ip_ttl = inp->inp_ip_ttl;
1773 inp_compat->inp_ip_p = inp->inp_ip_p;
1774 inp_compat->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign;
1775 inp_compat->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local;
1776 inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
1777 inp_compat->inp_depend6.inp6_hlim = inp->inp_depend6.inp6_hlim;
1778 inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
b0d623f7 1779 inp_compat->inp_depend6.inp6_ifindex = inp->inp_depend6.inp6_ifindex;
91447636
A
1780 inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
1781}
9bccf70c 1782
b0d623f7
A
1783#if !CONFIG_EMBEDDED
1784
1785void
1786inpcb_to_xinpcb64(
1787 struct inpcb *inp,
1788 struct xinpcb64 *xinp)
1789{
6d2010ae
A
1790 xinp->inp_fport = inp->inp_fport;
1791 xinp->inp_lport = inp->inp_lport;
1792 xinp->inp_gencnt = inp->inp_gencnt;
1793 xinp->inp_flags = inp->inp_flags;
1794 xinp->inp_flow = inp->inp_flow;
1795 xinp->inp_vflag = inp->inp_vflag;
1796 xinp->inp_ip_ttl = inp->inp_ip_ttl;
1797 xinp->inp_ip_p = inp->inp_ip_p;
1798 xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign;
1799 xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local;
1800 xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
1801 xinp->inp_depend6.inp6_hlim = inp->inp_depend6.inp6_hlim;
1802 xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
b0d623f7 1803 xinp->inp_depend6.inp6_ifindex = inp->inp_depend6.inp6_ifindex;
6d2010ae 1804 xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
b0d623f7
A
1805}
1806
1807#endif /* !CONFIG_EMBEDDED */
1808
6d2010ae 1809
b0d623f7
A
1810/*
1811 * The following routines implement this scheme:
1812 *
1813 * Callers of ip_output() that intend to cache the route in the inpcb pass
1814 * a local copy of the struct route to ip_output(). Using a local copy of
1815 * the cached route significantly simplifies things as IP no longer has to
1816 * worry about having exclusive access to the passed in struct route, since
1817 * it's defined in the caller's stack; in essence, this allows for a lock-
1818 * less operation when updating the struct route at the IP level and below,
1819 * whenever necessary. The scheme works as follows:
1820 *
1821 * Prior to dropping the socket's lock and calling ip_output(), the caller
1822 * copies the struct route from the inpcb into its stack, and adds a reference
1823 * to the cached route entry, if there was any. The socket's lock is then
1824 * dropped and ip_output() is called with a pointer to the copy of struct
1825 * route defined on the stack (not to the one in the inpcb.)
1826 *
1827 * Upon returning from ip_output(), the caller then acquires the socket's
1828 * lock and synchronizes the cache; if there is no route cached in the inpcb,
1829 * it copies the local copy of struct route (which may or may not contain any
1830 * route) back into the cache; otherwise, if the inpcb has a route cached in
1831 * it, the one in the local copy will be freed, if there's any. Trashing the
1832 * cached route in the inpcb can be avoided because ip_output() is single-
1833 * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized
1834 * by the socket/transport layer.)
1835 */
1836void
1837inp_route_copyout(struct inpcb *inp, struct route *dst)
1838{
1839 struct route *src = &inp->inp_route;
1840
6d2010ae 1841 lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
b0d623f7 1842
0b4c1975
A
1843 /*
1844 * If the route in the PCB is not for IPv4, blow it away;
1845 * this is possible in the case of IPv4-mapped address case.
1846 */
1847 if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET) {
1848 rtfree(src->ro_rt);
1849 src->ro_rt = NULL;
1850 }
6d2010ae
A
1851
1852 route_copyout(dst, src, sizeof(*dst));
b0d623f7
A
1853}
1854
1855void
1856inp_route_copyin(struct inpcb *inp, struct route *src)
1857{
1858 struct route *dst = &inp->inp_route;
1859
6d2010ae 1860 lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
b0d623f7
A
1861
1862 /* Minor sanity check */
1863 if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET)
1864 panic("%s: wrong or corrupted route: %p", __func__, src);
1865
6d2010ae
A
1866 route_copyin(src, dst, sizeof(*src));
1867}
1868
1869/*
1870 * Handler for setting IP_FORCE_OUT_IFP/IP_BOUND_IF/IPV6_BOUND_IF socket option.
1871 */
1872void
1873inp_bindif(struct inpcb *inp, unsigned int ifscope)
1874{
1875 /*
1876 * A zero interface scope value indicates an "unbind".
1877 * Otherwise, take in whatever value the app desires;
1878 * the app may already know the scope (or force itself
1879 * to such a scope) ahead of time before the interface
1880 * gets attached. It doesn't matter either way; any
1881 * route lookup from this point on will require an
1882 * exact match for the embedded interface scope.
1883 */
1884 inp->inp_boundif = ifscope;
1885 if (inp->inp_boundif == IFSCOPE_NONE)
1886 inp->inp_flags &= ~INP_BOUND_IF;
1887 else
1888 inp->inp_flags |= INP_BOUND_IF;
1889
1890 /* Blow away any cached route in the PCB */
1891 if (inp->inp_route.ro_rt != NULL) {
1892 rtfree(inp->inp_route.ro_rt);
1893 inp->inp_route.ro_rt = NULL;
1894 }
1895}
1896
1897/*
1898 * Handler for setting IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option.
1899 */
1900int
1901inp_nocellular(struct inpcb *inp, unsigned int val)
1902{
1903 if (val) {
1904 inp->inp_flags |= INP_NO_IFT_CELLULAR;
1905 } else if (inp->inp_flags & INP_NO_IFT_CELLULAR) {
1906 /* once set, it cannot be unset */
1907 return (EINVAL);
b0d623f7 1908 }
6d2010ae
A
1909
1910 /* Blow away any cached route in the PCB */
1911 if (inp->inp_route.ro_rt != NULL) {
1912 rtfree(inp->inp_route.ro_rt);
1913 inp->inp_route.ro_rt = NULL;
1914 }
1915
1916 return (0);
b0d623f7 1917}