]> git.saurik.com Git - apple/xnu.git/blame - bsd/netinet/in_pcb.c
xnu-1504.15.3.tar.gz
[apple/xnu.git] / bsd / netinet / in_pcb.c
CommitLineData
1c79356b 1/*
c910b4d9 2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1982, 1986, 1991, 1993, 1995
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95
9bccf70c 61 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $
1c79356b
A
62 */
63
64#include <sys/param.h>
65#include <sys/systm.h>
66#include <sys/malloc.h>
67#include <sys/mbuf.h>
1c79356b 68#include <sys/domain.h>
1c79356b
A
69#include <sys/protosw.h>
70#include <sys/socket.h>
71#include <sys/socketvar.h>
72#include <sys/proc.h>
9bccf70c
A
73#ifndef __APPLE__
74#include <sys/jail.h>
75#endif
1c79356b
A
76#include <sys/kernel.h>
77#include <sys/sysctl.h>
91447636 78#include <libkern/OSAtomic.h>
1c79356b
A
79
80#include <machine/limits.h>
81
9bccf70c 82#ifdef __APPLE__
1c79356b
A
83#include <kern/zalloc.h>
84#endif
85
86#include <net/if.h>
1c79356b 87#include <net/if_types.h>
9bccf70c 88#include <net/route.h>
1c79356b
A
89
90#include <netinet/in.h>
91#include <netinet/in_pcb.h>
92#include <netinet/in_var.h>
93#include <netinet/ip_var.h>
94#if INET6
95#include <netinet/ip6.h>
96#include <netinet6/ip6_var.h>
97#endif /* INET6 */
98
99#include "faith.h"
100
101#if IPSEC
102#include <netinet6/ipsec.h>
103#include <netkey/key.h>
1c79356b
A
104#endif /* IPSEC */
105
106#include <sys/kdebug.h>
b0d623f7 107#include <sys/random.h>
1c79356b 108
9bccf70c
A
109#if IPSEC
110extern int ipsec_bypass;
111#endif
1c79356b
A
112
113#define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8))
114#define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1))
115
116struct in_addr zeroin_addr;
117
1c79356b
A
118/*
119 * These configure the range of local port addresses assigned to
120 * "unspecified" outgoing connections/packets/whatever.
121 */
9bccf70c
A
122int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */
123int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */
124#ifndef __APPLE__
125int ipport_firstauto = IPPORT_RESERVED; /* 1024 */
126int ipport_lastauto = IPPORT_USERRESERVED; /* 5000 */
127#else
128int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
129int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */
130#endif
131int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
132int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */
1c79356b
A
133
134#define RANGECHK(var, min, max) \
135 if ((var) < (min)) { (var) = (min); } \
136 else if ((var) > (max)) { (var) = (max); }
137
1c79356b
A
138static int
139sysctl_net_ipport_check SYSCTL_HANDLER_ARGS
140{
2d21ac55 141#pragma unused(arg1, arg2)
1c79356b
A
142 int error = sysctl_handle_int(oidp,
143 oidp->oid_arg1, oidp->oid_arg2, req);
144 if (!error) {
145 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
146 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
147 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX);
148 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX);
149 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX);
150 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX);
151 }
152 return error;
153}
154
155#undef RANGECHK
156
2d21ac55 157SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IP Ports");
1c79356b
A
158
159SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW,
160 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
161SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW,
162 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
163SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW,
164 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
165SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW,
166 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
167SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW,
168 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
169SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW,
170 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
171
b0d623f7
A
172extern int udp_use_randomport;
173extern int tcp_use_randomport;
174
1c79356b
A
175/*
176 * in_pcb.c: manage the Protocol Control Blocks.
177 *
178 * NOTE: It is assumed that most of these functions will be called at
179 * splnet(). XXX - There are, unfortunately, a few exceptions to this
180 * rule that should be fixed.
181 */
182
183/*
184 * Allocate a PCB and associate it with the socket.
2d21ac55
A
185 *
186 * Returns: 0 Success
187 * ENOBUFS
188 * ENOMEM
189 * ipsec_init_policy:??? [IPSEC]
1c79356b
A
190 */
191int
2d21ac55 192in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, __unused struct proc *p)
1c79356b 193{
2d21ac55 194 struct inpcb *inp;
1c79356b 195 caddr_t temp;
9bccf70c 196#if IPSEC
91447636 197#ifndef __APPLE__
9bccf70c 198 int error;
91447636 199#endif
2d21ac55
A
200#endif
201#if CONFIG_MACF_NET
202 int mac_error;
9bccf70c 203#endif
1c79356b
A
204
205 if (so->cached_in_sock_layer == 0) {
206#if TEMPDEBUG
207 printf("PCBALLOC calling zalloc for socket %x\n", so);
208#endif
209 inp = (struct inpcb *) zalloc(pcbinfo->ipi_zone);
210 if (inp == NULL)
211 return (ENOBUFS);
212 bzero((caddr_t)inp, sizeof(*inp));
213 }
214 else {
215#if TEMPDEBUG
216 printf("PCBALLOC reusing PCB for socket %x\n", so);
217#endif
218 inp = (struct inpcb *) so->so_saved_pcb;
219 temp = inp->inp_saved_ppcb;
220 bzero((caddr_t) inp, sizeof(*inp));
221 inp->inp_saved_ppcb = temp;
222 }
223
224 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
225 inp->inp_pcbinfo = pcbinfo;
226 inp->inp_socket = so;
2d21ac55
A
227#if CONFIG_MACF_NET
228 mac_error = mac_inpcb_label_init(inp, M_WAITOK);
229 if (mac_error != 0) {
230 if (so->cached_in_sock_layer == 0)
231 zfree(pcbinfo->ipi_zone, inp);
232 return (mac_error);
233 }
234 mac_inpcb_label_associate(so, inp);
235#endif
91447636
A
236 so->so_pcb = (caddr_t)inp;
237
238 if (so->so_proto->pr_flags & PR_PCBLOCK) {
239 inp->inpcb_mtx = lck_mtx_alloc_init(pcbinfo->mtx_grp, pcbinfo->mtx_attr);
240 if (inp->inpcb_mtx == NULL) {
2d21ac55 241 printf("in_pcballoc: can't alloc mutex! so=%p\n", so);
91447636
A
242 return(ENOMEM);
243 }
244 }
245
9bccf70c
A
246#if IPSEC
247#ifndef __APPLE__
248 if (ipsec_bypass == 0) {
249 error = ipsec_init_policy(so, &inp->inp_sp);
250 if (error != 0) {
91447636 251 zfree(pcbinfo->ipi_zone, inp);
9bccf70c
A
252 return error;
253 }
254 }
255#endif
256#endif /*IPSEC*/
2d21ac55 257#if INET6
9bccf70c
A
258 if (INP_SOCKAF(so) == AF_INET6 && !ip6_mapped_addr_on)
259 inp->inp_flags |= IN6P_IPV6_V6ONLY;
260#endif
91447636 261
9bccf70c
A
262#if INET6
263 if (ip6_auto_flowlabel)
264 inp->inp_flags |= IN6P_AUTOFLOWLABEL;
265#endif
91447636
A
266 lck_rw_lock_exclusive(pcbinfo->mtx);
267 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
268 LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list);
269 pcbinfo->ipi_count++;
270 lck_rw_done(pcbinfo->mtx);
1c79356b
A
271 return (0);
272}
273
2d21ac55
A
274
275/*
276 in_pcblookup_local_and_cleanup does everything
277 in_pcblookup_local does but it checks for a socket
278 that's going away. Since we know that the lock is
279 held read+write when this funciton is called, we
280 can safely dispose of this socket like the slow
281 timer would usually do and return NULL. This is
282 great for bind.
283*/
284struct inpcb*
285in_pcblookup_local_and_cleanup(
286 struct inpcbinfo *pcbinfo,
287 struct in_addr laddr,
288 u_int lport_arg,
289 int wild_okay)
290{
291 struct inpcb *inp;
292
293 /* Perform normal lookup */
294 inp = in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay);
295
296 /* Check if we found a match but it's waiting to be disposed */
297 if (inp && inp->inp_wantcnt == WNT_STOPUSING) {
298 struct socket *so = inp->inp_socket;
299
300 lck_mtx_lock(inp->inpcb_mtx);
301
302 if (so->so_usecount == 0) {
b0d623f7
A
303 if (inp->inp_state != INPCB_STATE_DEAD)
304 in_pcbdetach(inp);
2d21ac55
A
305 in_pcbdispose(inp);
306 inp = NULL;
307 }
308 else {
309 lck_mtx_unlock(inp->inpcb_mtx);
310 }
311 }
312
313 return inp;
314}
315
316#ifdef __APPLE_API_PRIVATE
c910b4d9 317static void
2d21ac55
A
318in_pcb_conflict_post_msg(u_int16_t port)
319{
320 /*
321 * Radar 5523020 send a kernel event notification if a non-participating socket tries to bind
322 * the port a socket who has set SOF_NOTIFYCONFLICT owns.
323 */
324 struct kev_msg ev_msg;
325 struct kev_in_portinuse in_portinuse;
326
327 in_portinuse.port = ntohs(port); /* port in host order */
328 in_portinuse.req_pid = proc_selfpid();
329 ev_msg.vendor_code = KEV_VENDOR_APPLE;
330 ev_msg.kev_class = KEV_NETWORK_CLASS;
331 ev_msg.kev_subclass = KEV_INET_SUBCLASS;
332 ev_msg.event_code = KEV_INET_PORTINUSE;
333 ev_msg.dv[0].data_ptr = &in_portinuse;
334 ev_msg.dv[0].data_length = sizeof(struct kev_in_portinuse);
335 ev_msg.dv[1].data_length = 0;
336 kev_post_msg(&ev_msg);
337}
338#endif
339/*
340 * Returns: 0 Success
341 * EADDRNOTAVAIL Address not available.
342 * EINVAL Invalid argument
343 * EAFNOSUPPORT Address family not supported [notdef]
344 * EACCES Permission denied
345 * EADDRINUSE Address in use
346 * EAGAIN Resource unavailable, try again
347 * proc_suser:EPERM Operation not permitted
348 */
1c79356b 349int
2d21ac55 350in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
1c79356b 351{
2d21ac55 352 struct socket *so = inp->inp_socket;
9bccf70c 353 unsigned short *lastport;
1c79356b
A
354 struct sockaddr_in *sin;
355 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
b0d623f7 356 u_short lport = 0, rand_port = 0;
1c79356b 357 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
b0d623f7 358 int error, randomport, conflict = 0;
1c79356b
A
359
360 if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */
361 return (EADDRNOTAVAIL);
362 if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY)
363 return (EINVAL);
364 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
365 wild = 1;
91447636
A
366 socket_unlock(so, 0); /* keep reference on socket */
367 lck_rw_lock_exclusive(pcbinfo->mtx);
1c79356b
A
368 if (nam) {
369 sin = (struct sockaddr_in *)nam;
91447636
A
370 if (nam->sa_len != sizeof (*sin)) {
371 lck_rw_done(pcbinfo->mtx);
372 socket_lock(so, 0);
1c79356b 373 return (EINVAL);
91447636 374 }
1c79356b
A
375#ifdef notdef
376 /*
377 * We should check the family, but old programs
378 * incorrectly fail to initialize it.
379 */
91447636
A
380 if (sin->sin_family != AF_INET) {
381 lck_rw_done(pcbinfo->mtx);
382 socket_lock(so, 0);
1c79356b 383 return (EAFNOSUPPORT);
91447636 384 }
1c79356b
A
385#endif
386 lport = sin->sin_port;
387 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
388 /*
389 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
390 * allow complete duplication of binding if
391 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
392 * and a multicast address is bound on both
393 * new and duplicated sockets.
394 */
395 if (so->so_options & SO_REUSEADDR)
396 reuseport = SO_REUSEADDR|SO_REUSEPORT;
397 } else if (sin->sin_addr.s_addr != INADDR_ANY) {
91447636 398 struct ifaddr *ifa;
1c79356b 399 sin->sin_port = 0; /* yech... */
91447636
A
400 if ((ifa = ifa_ifwithaddr((struct sockaddr *)sin)) == 0) {
401 lck_rw_done(pcbinfo->mtx);
402 socket_lock(so, 0);
1c79356b 403 return (EADDRNOTAVAIL);
91447636
A
404 }
405 else {
406 ifafree(ifa);
407 }
1c79356b
A
408 }
409 if (lport) {
410 struct inpcb *t;
411
412 /* GROSS */
b0d623f7
A
413#if !CONFIG_EMBEDDED
414 if (ntohs(lport) < IPPORT_RESERVED && proc_suser(p)) {
91447636
A
415 lck_rw_done(pcbinfo->mtx);
416 socket_lock(so, 0);
1c79356b 417 return (EACCES);
91447636 418 }
b0d623f7 419#endif
1c79356b
A
420 if (so->so_uid &&
421 !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
2d21ac55 422 t = in_pcblookup_local_and_cleanup(inp->inp_pcbinfo,
1c79356b
A
423 sin->sin_addr, lport, INPLOOKUP_WILDCARD);
424 if (t &&
425 (ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
426 ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
427 (t->inp_socket->so_options &
428 SO_REUSEPORT) == 0) &&
2d21ac55
A
429 (so->so_uid != t->inp_socket->so_uid) &&
430 ((t->inp_socket->so_flags & SOF_REUSESHAREUID) == 0)) {
1c79356b 431#if INET6
9bccf70c 432 if (ntohl(sin->sin_addr.s_addr) !=
1c79356b
A
433 INADDR_ANY ||
434 ntohl(t->inp_laddr.s_addr) !=
435 INADDR_ANY ||
436 INP_SOCKAF(so) ==
2d21ac55
A
437 INP_SOCKAF(t->inp_socket))
438#endif /* INET6 */
439 {
440#ifdef __APPLE_API_PRIVATE
441
442 if ((t->inp_socket->so_flags & SOF_NOTIFYCONFLICT) && ((so->so_flags & SOF_NOTIFYCONFLICT) == 0))
443 conflict = 1;
444
445 lck_rw_done(pcbinfo->mtx);
446
447 if (conflict)
448 in_pcb_conflict_post_msg(lport);
449#else
91447636 450 lck_rw_done(pcbinfo->mtx);
2d21ac55
A
451#endif /* __APPLE_API_PRIVATE */
452
91447636
A
453 socket_lock(so, 0);
454 return (EADDRINUSE);
455 }
1c79356b
A
456 }
457 }
2d21ac55 458 t = in_pcblookup_local_and_cleanup(pcbinfo, sin->sin_addr,
1c79356b
A
459 lport, wild);
460 if (t &&
461 (reuseport & t->inp_socket->so_options) == 0) {
462#if INET6
463 if (ip6_mapped_addr_on == 0 ||
464 ntohl(sin->sin_addr.s_addr) !=
465 INADDR_ANY ||
466 ntohl(t->inp_laddr.s_addr) !=
467 INADDR_ANY ||
468 INP_SOCKAF(so) ==
2d21ac55
A
469 INP_SOCKAF(t->inp_socket))
470#endif /* INET6 */
471 {
472#ifdef __APPLE_API_PRIVATE
473
474 if ((t->inp_socket->so_flags & SOF_NOTIFYCONFLICT) && ((so->so_flags & SOF_NOTIFYCONFLICT) == 0))
475 conflict = 1;
476
91447636 477 lck_rw_done(pcbinfo->mtx);
2d21ac55
A
478
479 if (conflict)
480 in_pcb_conflict_post_msg(lport);
481#else
482 lck_rw_done(pcbinfo->mtx);
483#endif /* __APPLE_API_PRIVATE */
91447636
A
484 socket_lock(so, 0);
485 return (EADDRINUSE);
486 }
1c79356b
A
487 }
488 }
489 inp->inp_laddr = sin->sin_addr;
490 }
491 if (lport == 0) {
492 u_short first, last;
493 int count;
494
b0d623f7
A
495 randomport = (so->so_flags & SOF_BINDRANDOMPORT) ||
496 (so->so_type == SOCK_STREAM ? tcp_use_randomport : udp_use_randomport);
497
1c79356b
A
498 inp->inp_flags |= INP_ANONPORT;
499
500 if (inp->inp_flags & INP_HIGHPORT) {
501 first = ipport_hifirstauto; /* sysctl */
502 last = ipport_hilastauto;
503 lastport = &pcbinfo->lasthi;
504 } else if (inp->inp_flags & INP_LOWPORT) {
b0d623f7 505 if ((error = proc_suser(p)) != 0) {
91447636
A
506 lck_rw_done(pcbinfo->mtx);
507 socket_lock(so, 0);
1c79356b 508 return error;
91447636 509 }
1c79356b
A
510 first = ipport_lowfirstauto; /* 1023 */
511 last = ipport_lowlastauto; /* 600 */
512 lastport = &pcbinfo->lastlow;
513 } else {
514 first = ipport_firstauto; /* sysctl */
515 last = ipport_lastauto;
516 lastport = &pcbinfo->lastport;
517 }
b0d623f7
A
518 /* No point in randomizing if only one port is available */
519
520 if (first == last)
521 randomport = 0;
1c79356b
A
522 /*
523 * Simple check to ensure all ports are not used up causing
524 * a deadlock here.
525 *
526 * We split the two cases (up and down) so that the direction
527 * is not being tested on each round of the loop.
528 */
529 if (first > last) {
530 /*
531 * counting down
532 */
b0d623f7
A
533 if (randomport) {
534 read_random(&rand_port, sizeof(rand_port));
535 *lastport = first - (rand_port % (first - last));
536 }
1c79356b
A
537 count = first - last;
538
539 do {
540 if (count-- < 0) { /* completely used? */
91447636
A
541 lck_rw_done(pcbinfo->mtx);
542 socket_lock(so, 0);
1c79356b 543 inp->inp_laddr.s_addr = INADDR_ANY;
9bccf70c 544 return (EADDRNOTAVAIL);
1c79356b
A
545 }
546 --*lastport;
547 if (*lastport > first || *lastport < last)
548 *lastport = first;
549 lport = htons(*lastport);
2d21ac55 550 } while (in_pcblookup_local_and_cleanup(pcbinfo,
1c79356b
A
551 inp->inp_laddr, lport, wild));
552 } else {
553 /*
554 * counting up
555 */
b0d623f7
A
556 if (randomport) {
557 read_random(&rand_port, sizeof(rand_port));
558 *lastport = first + (rand_port % (first - last));
559 }
1c79356b
A
560 count = last - first;
561
562 do {
563 if (count-- < 0) { /* completely used? */
91447636
A
564 lck_rw_done(pcbinfo->mtx);
565 socket_lock(so, 0);
1c79356b 566 inp->inp_laddr.s_addr = INADDR_ANY;
9bccf70c 567 return (EADDRNOTAVAIL);
1c79356b
A
568 }
569 ++*lastport;
570 if (*lastport < first || *lastport > last)
571 *lastport = first;
572 lport = htons(*lastport);
2d21ac55 573 } while (in_pcblookup_local_and_cleanup(pcbinfo,
1c79356b
A
574 inp->inp_laddr, lport, wild));
575 }
576 }
91447636 577 socket_lock(so, 0);
1c79356b 578 inp->inp_lport = lport;
91447636 579 if (in_pcbinshash(inp, 1) != 0) {
1c79356b
A
580 inp->inp_laddr.s_addr = INADDR_ANY;
581 inp->inp_lport = 0;
91447636 582 lck_rw_done(pcbinfo->mtx);
1c79356b
A
583 return (EAGAIN);
584 }
91447636 585 lck_rw_done(pcbinfo->mtx);
2d21ac55 586 sflt_notify(so, sock_evt_bound, NULL);
1c79356b
A
587 return (0);
588}
589
590/*
591 * Transform old in_pcbconnect() into an inner subroutine for new
592 * in_pcbconnect(): Do some validity-checking on the remote
593 * address (in mbuf 'nam') and then determine local host address
594 * (i.e., which interface) to use to access that remote host.
595 *
596 * This preserves definition of in_pcbconnect(), while supporting a
597 * slightly different version for T/TCP. (This is more than
598 * a bit of a kludge, but cleaning up the internal interfaces would
599 * have forced minor changes in every protocol).
2d21ac55
A
600 *
601 * Returns: 0 Success
602 * EINVAL Invalid argument
603 * EAFNOSUPPORT Address family not supported
604 * EADDRNOTAVAIL Address not available
1c79356b 605 */
1c79356b 606int
2d21ac55
A
607in_pcbladdr(struct inpcb *inp, struct sockaddr *nam,
608 struct sockaddr_in **plocal_sin)
1c79356b
A
609{
610 struct in_ifaddr *ia;
2d21ac55 611 struct sockaddr_in *sin = (struct sockaddr_in *)nam;
1c79356b
A
612
613 if (nam->sa_len != sizeof (*sin))
614 return (EINVAL);
615 if (sin->sin_family != AF_INET)
616 return (EAFNOSUPPORT);
617 if (sin->sin_port == 0)
618 return (EADDRNOTAVAIL);
b0d623f7
A
619
620 lck_rw_lock_shared(in_ifaddr_rwlock);
1c79356b
A
621 if (!TAILQ_EMPTY(&in_ifaddrhead)) {
622 /*
623 * If the destination address is INADDR_ANY,
624 * use the primary local address.
625 * If the supplied address is INADDR_BROADCAST,
626 * and the primary interface supports broadcast,
627 * choose the broadcast address for that interface.
628 */
629#define satosin(sa) ((struct sockaddr_in *)(sa))
630#define sintosa(sin) ((struct sockaddr *)(sin))
631#define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
632 if (sin->sin_addr.s_addr == INADDR_ANY)
9bccf70c 633 sin->sin_addr = IA_SIN(TAILQ_FIRST(&in_ifaddrhead))->sin_addr;
b0d623f7 634 else if (sin->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST &&
9bccf70c
A
635 (TAILQ_FIRST(&in_ifaddrhead)->ia_ifp->if_flags & IFF_BROADCAST))
636 sin->sin_addr = satosin(&TAILQ_FIRST(&in_ifaddrhead)->ia_broadaddr)->sin_addr;
1c79356b 637 }
b0d623f7
A
638 lck_rw_done(in_ifaddr_rwlock);
639
1c79356b 640 if (inp->inp_laddr.s_addr == INADDR_ANY) {
2d21ac55 641 struct route *ro;
c910b4d9 642 unsigned int ifscope;
1c79356b
A
643
644 ia = (struct in_ifaddr *)0;
c910b4d9
A
645 ifscope = (inp->inp_flags & INP_BOUND_IF) ?
646 inp->inp_boundif : IFSCOPE_NONE;
1c79356b
A
647 /*
648 * If route is known or can be allocated now,
649 * our src addr is taken from the i/f, else punt.
55e303ae
A
650 * Note that we should check the address family of the cached
651 * destination, in case of sharing the cache with IPv6.
1c79356b
A
652 */
653 ro = &inp->inp_route;
b0d623f7
A
654 if (ro->ro_rt != NULL)
655 RT_LOCK_SPIN(ro->ro_rt);
656 if (ro->ro_rt && (ro->ro_dst.sa_family != AF_INET ||
657 satosin(&ro->ro_dst)->sin_addr.s_addr !=
658 sin->sin_addr.s_addr ||
659 inp->inp_socket->so_options & SO_DONTROUTE ||
55e303ae 660 ro->ro_rt->generation_id != route_generation)) {
b0d623f7
A
661 RT_UNLOCK(ro->ro_rt);
662 rtfree(ro->ro_rt);
663 ro->ro_rt = NULL;
1c79356b
A
664 }
665 if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/
b0d623f7
A
666 (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL)) {
667 if (ro->ro_rt != NULL)
668 RT_UNLOCK(ro->ro_rt);
1c79356b 669 /* No route yet, so try to acquire one */
55e303ae 670 bzero(&ro->ro_dst, sizeof(struct sockaddr_in));
1c79356b
A
671 ro->ro_dst.sa_family = AF_INET;
672 ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
673 ((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
674 sin->sin_addr;
b0d623f7
A
675 rtalloc_scoped_ign(ro, 0, ifscope);
676 if (ro->ro_rt != NULL)
677 RT_LOCK_SPIN(ro->ro_rt);
1c79356b
A
678 }
679 /*
680 * If we found a route, use the address
681 * corresponding to the outgoing interface
682 * unless it is the loopback (in case a route
683 * to our address on another net goes to loopback).
684 */
b0d623f7
A
685 if (ro->ro_rt != NULL) {
686 RT_LOCK_ASSERT_HELD(ro->ro_rt);
687 if (!(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) {
688 ia = ifatoia(ro->ro_rt->rt_ifa);
689 if (ia)
690 ifaref(&ia->ia_ifa);
691 }
692 RT_UNLOCK(ro->ro_rt);
91447636 693 }
1c79356b
A
694 if (ia == 0) {
695 u_short fport = sin->sin_port;
696
697 sin->sin_port = 0;
698 ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin)));
91447636 699 if (ia == 0) {
c910b4d9
A
700 ia = ifatoia(ifa_ifwithnet_scoped(sintosa(sin),
701 ifscope));
91447636 702 }
1c79356b 703 sin->sin_port = fport;
91447636 704 if (ia == 0) {
b0d623f7 705 lck_rw_lock_shared(in_ifaddr_rwlock);
9bccf70c 706 ia = TAILQ_FIRST(&in_ifaddrhead);
91447636
A
707 if (ia)
708 ifaref(&ia->ia_ifa);
b0d623f7 709 lck_rw_done(in_ifaddr_rwlock);
91447636 710 }
b0d623f7 711 if (ia == 0)
1c79356b
A
712 return (EADDRNOTAVAIL);
713 }
714 /*
715 * If the destination address is multicast and an outgoing
716 * interface has been set as a multicast option, use the
717 * address of that interface as our source address.
718 */
719 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
720 inp->inp_moptions != NULL) {
721 struct ip_moptions *imo;
722 struct ifnet *ifp;
723
724 imo = inp->inp_moptions;
91447636
A
725 if (imo->imo_multicast_ifp != NULL && (ia == NULL ||
726 ia->ia_ifp != imo->imo_multicast_ifp)) {
1c79356b 727 ifp = imo->imo_multicast_ifp;
91447636
A
728 if (ia)
729 ifafree(&ia->ia_ifa);
b0d623f7
A
730 lck_rw_lock_shared(in_ifaddr_rwlock);
731 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
1c79356b
A
732 if (ia->ia_ifp == ifp)
733 break;
91447636 734 }
b0d623f7
A
735 if (ia)
736 ifaref(&ia->ia_ifa);
737 lck_rw_done(in_ifaddr_rwlock);
738 if (ia == 0)
739 return (EADDRNOTAVAIL);
1c79356b
A
740 }
741 }
91447636
A
742 /*
743 * Don't do pcblookup call here; return interface in plocal_sin
744 * and exit to caller, that will do the lookup.
745 */
1c79356b 746 *plocal_sin = &ia->ia_addr;
91447636 747 ifafree(&ia->ia_ifa);
1c79356b
A
748 }
749 return(0);
750}
751
752/*
753 * Outer subroutine:
754 * Connect from a socket to a specified address.
755 * Both address and port must be specified in argument sin.
756 * If don't have a local address for this socket yet,
757 * then pick one.
758 */
759int
2d21ac55 760in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
1c79356b
A
761{
762 struct sockaddr_in *ifaddr;
9bccf70c 763 struct sockaddr_in *sin = (struct sockaddr_in *)nam;
91447636 764 struct inpcb *pcb;
1c79356b
A
765 int error;
766
767 /*
768 * Call inner routine, to assign local interface address.
769 */
770 if ((error = in_pcbladdr(inp, nam, &ifaddr)) != 0)
771 return(error);
772
91447636
A
773 socket_unlock(inp->inp_socket, 0);
774 pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
1c79356b 775 inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr->sin_addr,
91447636
A
776 inp->inp_lport, 0, NULL);
777 socket_lock(inp->inp_socket, 0);
778 if (pcb != NULL) {
0b4c1975 779 in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0);
1c79356b
A
780 return (EADDRINUSE);
781 }
782 if (inp->inp_laddr.s_addr == INADDR_ANY) {
9bccf70c
A
783 if (inp->inp_lport == 0) {
784 error = in_pcbbind(inp, (struct sockaddr *)0, p);
785 if (error)
786 return (error);
787 }
91447636
A
788 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) {
789 /*lock inversion issue, mostly with udp multicast packets */
790 socket_unlock(inp->inp_socket, 0);
791 lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx);
792 socket_lock(inp->inp_socket, 0);
793 }
1c79356b 794 inp->inp_laddr = ifaddr->sin_addr;
55e303ae 795 inp->inp_flags |= INP_INADDR_ANY;
91447636
A
796 }
797 else {
798 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) {
799 /*lock inversion issue, mostly with udp multicast packets */
800 socket_unlock(inp->inp_socket, 0);
801 lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx);
802 socket_lock(inp->inp_socket, 0);
803 }
1c79356b
A
804 }
805 inp->inp_faddr = sin->sin_addr;
806 inp->inp_fport = sin->sin_port;
807 in_pcbrehash(inp);
91447636 808 lck_rw_done(inp->inp_pcbinfo->mtx);
1c79356b
A
809 return (0);
810}
811
812void
2d21ac55 813in_pcbdisconnect(struct inpcb *inp)
1c79356b
A
814{
815
816 inp->inp_faddr.s_addr = INADDR_ANY;
817 inp->inp_fport = 0;
91447636
A
818
819 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) {
820 /*lock inversion issue, mostly with udp multicast packets */
821 socket_unlock(inp->inp_socket, 0);
822 lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx);
823 socket_lock(inp->inp_socket, 0);
824 }
825
1c79356b 826 in_pcbrehash(inp);
91447636
A
827 lck_rw_done(inp->inp_pcbinfo->mtx);
828
829 if (inp->inp_socket->so_state & SS_NOFDREF)
1c79356b
A
830 in_pcbdetach(inp);
831}
832
833void
2d21ac55 834in_pcbdetach(struct inpcb *inp)
1c79356b
A
835{
836 struct socket *so = inp->inp_socket;
1c79356b 837
91447636 838 if (so->so_pcb == 0) { /* we've been called twice */
2d21ac55 839 panic("in_pcbdetach: inp=%p so=%p proto=%d so_pcb is null!\n",
91447636
A
840 inp, so, so->so_proto->pr_protocol);
841 }
ab86ba33 842
1c79356b 843#if IPSEC
91447636 844 if (ipsec_bypass == 0) {
91447636 845 ipsec4_delete_pcbpolicy(inp);
91447636 846 }
1c79356b 847#endif /*IPSEC*/
91447636
A
848
849 /* mark socket state as dead */
850 if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING)
2d21ac55 851 panic("in_pcbdetach so=%p prot=%x couldn't set to STOPUSING\n", so, so->so_proto->pr_protocol);
1c79356b
A
852
853#if TEMPDEBUG
854 if (so->cached_in_sock_layer)
91447636 855 printf("in_pcbdetach for cached socket %x flags=%x\n", so, so->so_flags);
1c79356b 856 else
91447636 857 printf("in_pcbdetach for allocated socket %x flags=%x\n", so, so->so_flags);
1c79356b 858#endif
91447636 859 if ((so->so_flags & SOF_PCBCLEARING) == 0) {
2d21ac55
A
860 struct rtentry *rt;
861
91447636
A
862 inp->inp_vflag = 0;
863 if (inp->inp_options)
864 (void)m_free(inp->inp_options);
2d21ac55
A
865 if ((rt = inp->inp_route.ro_rt) != NULL) {
866 inp->inp_route.ro_rt = NULL;
b0d623f7 867 rtfree(rt);
91447636
A
868 }
869 ip_freemoptions(inp->inp_moptions);
870 inp->inp_moptions = NULL;
871 sofreelastref(so, 0);
872 inp->inp_state = INPCB_STATE_DEAD;
873 so->so_flags |= SOF_PCBCLEARING; /* makes sure we're not called twice from so_close */
874 }
875}
1c79356b 876
1c79356b 877
91447636 878void
2d21ac55 879in_pcbdispose(struct inpcb *inp)
91447636
A
880{
881 struct socket *so = inp->inp_socket;
882 struct inpcbinfo *ipi = inp->inp_pcbinfo;
883
884#if TEMPDEBUG
885 if (inp->inp_state != INPCB_STATE_DEAD) {
2d21ac55 886 printf("in_pcbdispose: not dead yet? so=%p\n", so);
91447636
A
887 }
888#endif
889
890 if (so && so->so_usecount != 0)
2d21ac55 891 panic("in_pcbdispose: use count=%x so=%p\n", so->so_usecount, so);
91447636 892
2d21ac55 893 lck_rw_assert(ipi->mtx, LCK_RW_ASSERT_EXCLUSIVE);
91447636
A
894
895 inp->inp_gencnt = ++ipi->ipi_gencnt;
896 /*### access ipi in in_pcbremlists */
897 in_pcbremlists(inp);
898
899 if (so) {
900 if (so->so_proto->pr_flags & PR_PCBLOCK) {
901 sofreelastref(so, 0);
902 if (so->so_rcv.sb_cc || so->so_snd.sb_cc) {
903#if TEMPDEBUG
2d21ac55 904 printf("in_pcbdispose sb not cleaned up so=%p rc_cci=%x snd_cc=%x\n",
91447636
A
905 so, so->so_rcv.sb_cc, so->so_snd.sb_cc);
906#endif
907 sbrelease(&so->so_rcv);
908 sbrelease(&so->so_snd);
909 }
910 if (so->so_head != NULL)
2d21ac55 911 panic("in_pcbdispose, so=%p head still exist\n", so);
91447636
A
912 lck_mtx_unlock(inp->inpcb_mtx);
913 lck_mtx_free(inp->inpcb_mtx, ipi->mtx_grp);
9bccf70c 914 }
91447636 915 so->so_flags |= SOF_PCBCLEARING; /* makes sure we're not called twice from so_close */
2d21ac55 916 so->so_saved_pcb = (caddr_t) inp;
91447636
A
917 so->so_pcb = 0;
918 inp->inp_socket = 0;
2d21ac55
A
919#if CONFIG_MACF_NET
920 mac_inpcb_label_destroy(inp);
921#endif
b0d623f7
A
922 /*
923 * In case there a route cached after a detach (possible
924 * in the tcp case), make sure that it is freed before
925 * we deallocate the structure.
926 */
927 if (inp->inp_route.ro_rt != NULL) {
928 rtfree(inp->inp_route.ro_rt);
929 inp->inp_route.ro_rt = NULL;
930 }
91447636
A
931 if (so->cached_in_sock_layer == 0) {
932 zfree(ipi->ipi_zone, inp);
55e303ae 933 }
91447636 934 sodealloc(so);
9bccf70c 935 }
91447636 936#if TEMPDEBUG
1c79356b 937 else
2d21ac55 938 printf("in_pcbdispose: no socket for inp=%p\n", inp);
91447636 939#endif
1c79356b
A
940}
941
942/*
943 * The calling convention of in_setsockaddr() and in_setpeeraddr() was
944 * modified to match the pru_sockaddr() and pru_peeraddr() entry points
945 * in struct pr_usrreqs, so that protocols can just reference then directly
946 * without the need for a wrapper function. The socket must have a valid
947 * (i.e., non-nil) PCB, but it should be impossible to get an invalid one
948 * except through a kernel programming error, so it is acceptable to panic
949 * (or in this case trap) if the PCB is invalid. (Actually, we don't trap
950 * because there actually /is/ a programming error somewhere... XXX)
2d21ac55
A
951 *
952 * Returns: 0 Success
953 * ENOBUFS No buffer space available
954 * ECONNRESET Connection reset
1c79356b
A
955 */
956int
2d21ac55 957in_setsockaddr(struct socket *so, struct sockaddr **nam)
1c79356b 958{
2d21ac55
A
959 struct inpcb *inp;
960 struct sockaddr_in *sin;
1c79356b
A
961
962 /*
963 * Do the malloc first in case it blocks.
964 */
965 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_WAITOK);
0b4e3aa0
A
966 if (sin == NULL)
967 return ENOBUFS;
1c79356b
A
968 bzero(sin, sizeof *sin);
969 sin->sin_family = AF_INET;
970 sin->sin_len = sizeof(*sin);
971
1c79356b
A
972 inp = sotoinpcb(so);
973 if (!inp) {
1c79356b 974 FREE(sin, M_SONAME);
9bccf70c 975 return ECONNRESET;
1c79356b
A
976 }
977 sin->sin_port = inp->inp_lport;
978 sin->sin_addr = inp->inp_laddr;
1c79356b
A
979
980 *nam = (struct sockaddr *)sin;
981 return 0;
982}
983
984int
2d21ac55 985in_setpeeraddr(struct socket *so, struct sockaddr **nam)
1c79356b 986{
1c79356b 987 struct inpcb *inp;
2d21ac55 988 struct sockaddr_in *sin;
1c79356b
A
989
990 /*
991 * Do the malloc first in case it blocks.
992 */
993 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_WAITOK);
0b4e3aa0
A
994 if (sin == NULL)
995 return ENOBUFS;
1c79356b
A
996 bzero((caddr_t)sin, sizeof (*sin));
997 sin->sin_family = AF_INET;
998 sin->sin_len = sizeof(*sin);
999
1c79356b
A
1000 inp = sotoinpcb(so);
1001 if (!inp) {
1c79356b 1002 FREE(sin, M_SONAME);
9bccf70c 1003 return ECONNRESET;
1c79356b
A
1004 }
1005 sin->sin_port = inp->inp_fport;
1006 sin->sin_addr = inp->inp_faddr;
1c79356b
A
1007
1008 *nam = (struct sockaddr *)sin;
1009 return 0;
1010}
1011
1c79356b 1012void
2d21ac55
A
1013in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr,
1014 int errno, void (*notify)(struct inpcb *, int))
1c79356b 1015{
91447636
A
1016 struct inpcb *inp;
1017
1018 lck_rw_lock_shared(pcbinfo->mtx);
1c79356b 1019
91447636 1020 LIST_FOREACH(inp, pcbinfo->listhead, inp_list) {
9bccf70c
A
1021#if INET6
1022 if ((inp->inp_vflag & INP_IPV4) == 0)
1c79356b 1023 continue;
9bccf70c 1024#endif
1c79356b 1025 if (inp->inp_faddr.s_addr != faddr.s_addr ||
9bccf70c
A
1026 inp->inp_socket == NULL)
1027 continue;
91447636
A
1028 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING)
1029 continue;
1030 socket_lock(inp->inp_socket, 1);
9bccf70c 1031 (*notify)(inp, errno);
91447636
A
1032 (void)in_pcb_checkstate(inp, WNT_RELEASE, 1);
1033 socket_unlock(inp->inp_socket, 1);
1c79356b 1034 }
91447636 1035 lck_rw_done(pcbinfo->mtx);
1c79356b
A
1036}
1037
1038/*
1039 * Check for alternatives when higher level complains
1040 * about service problems. For now, invalidate cached
1041 * routing information. If the route was created dynamically
1042 * (by a redirect), time to try a default gateway again.
1043 */
1044void
2d21ac55 1045in_losing(struct inpcb *inp)
1c79356b 1046{
2d21ac55 1047 struct rtentry *rt;
1c79356b
A
1048 struct rt_addrinfo info;
1049
b0d623f7
A
1050 if ((rt = inp->inp_route.ro_rt) != NULL) {
1051 struct in_ifaddr *ia;
1052
1c79356b 1053 bzero((caddr_t)&info, sizeof(info));
b0d623f7 1054 RT_LOCK(rt);
1c79356b
A
1055 info.rti_info[RTAX_DST] =
1056 (struct sockaddr *)&inp->inp_route.ro_dst;
1057 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1058 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1059 rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
b0d623f7
A
1060 if (rt->rt_flags & RTF_DYNAMIC) {
1061 /*
1062 * Prevent another thread from modifying rt_key,
1063 * rt_gateway via rt_setgate() after rt_lock is
1064 * dropped by marking the route as defunct.
1065 */
1066 rt->rt_flags |= RTF_CONDEMNED;
1067 RT_UNLOCK(rt);
1068 (void) rtrequest(RTM_DELETE, rt_key(rt),
1c79356b
A
1069 rt->rt_gateway, rt_mask(rt), rt->rt_flags,
1070 (struct rtentry **)0);
b0d623f7
A
1071 } else {
1072 RT_UNLOCK(rt);
1073 }
2d21ac55 1074 /* if the address is gone keep the old route in the pcb */
b0d623f7
A
1075 if ((ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
1076 inp->inp_route.ro_rt = NULL;
1077 rtfree(rt);
1078 ifafree(&ia->ia_ifa);
2d21ac55 1079 }
1c79356b
A
1080 /*
1081 * A new route can be allocated
1082 * the next time output is attempted.
1083 */
1c79356b
A
1084 }
1085}
1086
1087/*
1088 * After a routing change, flush old routing
1089 * and allocate a (hopefully) better one.
1090 */
9bccf70c 1091void
2d21ac55 1092in_rtchange(struct inpcb *inp, __unused int errno)
1c79356b 1093{
2d21ac55
A
1094 struct rtentry *rt;
1095
1096 if ((rt = inp->inp_route.ro_rt) != NULL) {
b0d623f7
A
1097 struct in_ifaddr *ia;
1098
1099 if ((ia = ifa_foraddr(inp->inp_laddr.s_addr)) == NULL) {
ab86ba33 1100 return; /* we can't remove the route now. not sure if still ok to use src */
2d21ac55 1101 }
b0d623f7
A
1102 ifafree(&ia->ia_ifa);
1103 rtfree(rt);
2d21ac55 1104 inp->inp_route.ro_rt = NULL;
1c79356b
A
1105 /*
1106 * A new route can be allocated the next time
1107 * output is attempted.
1108 */
1109 }
1110}
1111
1112/*
1113 * Lookup a PCB based on the local address and port.
1114 */
1115struct inpcb *
2d21ac55
A
1116in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
1117 unsigned int lport_arg, int wild_okay)
1c79356b 1118{
2d21ac55 1119 struct inpcb *inp;
1c79356b
A
1120 int matchwild = 3, wildcard;
1121 u_short lport = lport_arg;
1122
1123 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0,0,0,0,0);
1124
1125 if (!wild_okay) {
1126 struct inpcbhead *head;
1127 /*
1128 * Look for an unconnected (wildcard foreign addr) PCB that
1129 * matches the local address and port we're looking for.
1130 */
1131 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
9bccf70c
A
1132 LIST_FOREACH(inp, head, inp_hash) {
1133#if INET6
1134 if ((inp->inp_vflag & INP_IPV4) == 0)
1c79356b 1135 continue;
9bccf70c 1136#endif
1c79356b
A
1137 if (inp->inp_faddr.s_addr == INADDR_ANY &&
1138 inp->inp_laddr.s_addr == laddr.s_addr &&
1139 inp->inp_lport == lport) {
1140 /*
1141 * Found.
1142 */
1143 return (inp);
1144 }
1145 }
1146 /*
1147 * Not found.
1148 */
1149 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0,0,0,0,0);
1150 return (NULL);
1151 } else {
1152 struct inpcbporthead *porthash;
1153 struct inpcbport *phd;
1154 struct inpcb *match = NULL;
1155 /*
1156 * Best fit PCB lookup.
1157 *
1158 * First see if this local port is in use by looking on the
1159 * port hash list.
1160 */
1161 porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport,
1162 pcbinfo->porthashmask)];
9bccf70c 1163 LIST_FOREACH(phd, porthash, phd_hash) {
1c79356b
A
1164 if (phd->phd_port == lport)
1165 break;
1166 }
1167 if (phd != NULL) {
1168 /*
1169 * Port is in use by one or more PCBs. Look for best
1170 * fit.
1171 */
9bccf70c 1172 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
1c79356b 1173 wildcard = 0;
9bccf70c
A
1174#if INET6
1175 if ((inp->inp_vflag & INP_IPV4) == 0)
1c79356b 1176 continue;
9bccf70c 1177#endif
1c79356b
A
1178 if (inp->inp_faddr.s_addr != INADDR_ANY)
1179 wildcard++;
1180 if (inp->inp_laddr.s_addr != INADDR_ANY) {
1181 if (laddr.s_addr == INADDR_ANY)
1182 wildcard++;
1183 else if (inp->inp_laddr.s_addr != laddr.s_addr)
1184 continue;
1185 } else {
1186 if (laddr.s_addr != INADDR_ANY)
1187 wildcard++;
1188 }
1189 if (wildcard < matchwild) {
1190 match = inp;
1191 matchwild = wildcard;
1192 if (matchwild == 0) {
1193 break;
1194 }
1195 }
1196 }
1197 }
1198 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match,0,0,0,0);
1199 return (match);
1200 }
1201}
1202
1203/*
1204 * Lookup PCB in hash list.
1205 */
1206struct inpcb *
91447636
A
1207in_pcblookup_hash(
1208 struct inpcbinfo *pcbinfo,
1209 struct in_addr faddr,
1210 u_int fport_arg,
1211 struct in_addr laddr,
1212 u_int lport_arg,
1213 int wildcard,
2d21ac55 1214 __unused struct ifnet *ifp)
1c79356b
A
1215{
1216 struct inpcbhead *head;
2d21ac55 1217 struct inpcb *inp;
1c79356b
A
1218 u_short fport = fport_arg, lport = lport_arg;
1219
1220 /*
1221 * We may have found the pcb in the last lookup - check this first.
1222 */
1223
91447636 1224 lck_rw_lock_shared(pcbinfo->mtx);
1c79356b
A
1225
1226 /*
1227 * First look for an exact match.
1228 */
1229 head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)];
9bccf70c
A
1230 LIST_FOREACH(inp, head, inp_hash) {
1231#if INET6
1232 if ((inp->inp_vflag & INP_IPV4) == 0)
1c79356b 1233 continue;
9bccf70c 1234#endif
1c79356b
A
1235 if (inp->inp_faddr.s_addr == faddr.s_addr &&
1236 inp->inp_laddr.s_addr == laddr.s_addr &&
1237 inp->inp_fport == fport &&
1238 inp->inp_lport == lport) {
1239 /*
1240 * Found.
1241 */
91447636
A
1242 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1243 lck_rw_done(pcbinfo->mtx);
1244 return (inp);
1245 }
1246 else { /* it's there but dead, say it isn't found */
1247 lck_rw_done(pcbinfo->mtx);
1248 return(NULL);
1249 }
1c79356b
A
1250 }
1251 }
1252 if (wildcard) {
1253 struct inpcb *local_wild = NULL;
1254#if INET6
1255 struct inpcb *local_wild_mapped = NULL;
1256#endif
1257
1258 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
9bccf70c
A
1259 LIST_FOREACH(inp, head, inp_hash) {
1260#if INET6
1261 if ((inp->inp_vflag & INP_IPV4) == 0)
1c79356b 1262 continue;
9bccf70c 1263#endif
1c79356b
A
1264 if (inp->inp_faddr.s_addr == INADDR_ANY &&
1265 inp->inp_lport == lport) {
1266#if defined(NFAITH) && NFAITH > 0
1267 if (ifp && ifp->if_type == IFT_FAITH &&
1268 (inp->inp_flags & INP_FAITH) == 0)
1269 continue;
1270#endif
91447636
A
1271 if (inp->inp_laddr.s_addr == laddr.s_addr) {
1272 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1273 lck_rw_done(pcbinfo->mtx);
1274 return (inp);
1275 }
1276 else { /* it's there but dead, say it isn't found */
1277 lck_rw_done(pcbinfo->mtx);
1278 return(NULL);
1279 }
1280 }
1c79356b 1281 else if (inp->inp_laddr.s_addr == INADDR_ANY) {
2d21ac55 1282#if INET6
1c79356b
A
1283 if (INP_CHECK_SOCKAF(inp->inp_socket,
1284 AF_INET6))
1285 local_wild_mapped = inp;
1286 else
2d21ac55 1287#endif /* INET6 */
1c79356b
A
1288 local_wild = inp;
1289 }
1290 }
1291 }
91447636 1292 if (local_wild == NULL) {
2d21ac55 1293#if INET6
91447636
A
1294 if (local_wild_mapped != NULL) {
1295 if (in_pcb_checkstate(local_wild_mapped, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1296 lck_rw_done(pcbinfo->mtx);
1297 return (local_wild_mapped);
1298 }
1299 else { /* it's there but dead, say it isn't found */
1300 lck_rw_done(pcbinfo->mtx);
1301 return(NULL);
1302 }
1303 }
2d21ac55 1304#endif /* INET6 */
91447636
A
1305 lck_rw_done(pcbinfo->mtx);
1306 return (NULL);
1307 }
91447636
A
1308 if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1309 lck_rw_done(pcbinfo->mtx);
1310 return (local_wild);
1311 }
1312 else { /* it's there but dead, say it isn't found */
1313 lck_rw_done(pcbinfo->mtx);
1314 return(NULL);
1315 }
1c79356b
A
1316 }
1317
1318 /*
1319 * Not found.
1320 */
91447636 1321 lck_rw_done(pcbinfo->mtx);
1c79356b
A
1322 return (NULL);
1323}
1324
1325/*
1326 * Insert PCB onto various hash lists.
1327 */
1328int
2d21ac55 1329in_pcbinshash(struct inpcb *inp, int locked)
1c79356b
A
1330{
1331 struct inpcbhead *pcbhash;
1332 struct inpcbporthead *pcbporthash;
1333 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
1334 struct inpcbport *phd;
1335 u_int32_t hashkey_faddr;
1336
b0d623f7
A
1337 if (!locked) {
1338 if (!lck_rw_try_lock_exclusive(pcbinfo->mtx)) {
1339 /*lock inversion issue, mostly with udp multicast packets */
1340 socket_unlock(inp->inp_socket, 0);
1341 lck_rw_lock_exclusive(pcbinfo->mtx);
1342 socket_lock(inp->inp_socket, 0);
1343 }
1344 }
1345
1c79356b
A
1346#if INET6
1347 if (inp->inp_vflag & INP_IPV6)
1348 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
1349 else
1350#endif /* INET6 */
1351 hashkey_faddr = inp->inp_faddr.s_addr;
1352
91447636
A
1353 inp->hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport, inp->inp_fport, pcbinfo->hashmask);
1354
91447636 1355 pcbhash = &pcbinfo->hashbase[inp->hash_element];
1c79356b
A
1356
1357 pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport,
1358 pcbinfo->porthashmask)];
1359
1360 /*
1361 * Go through port list and look for a head for this lport.
1362 */
9bccf70c 1363 LIST_FOREACH(phd, pcbporthash, phd_hash) {
1c79356b
A
1364 if (phd->phd_port == inp->inp_lport)
1365 break;
1366 }
1367 /*
1368 * If none exists, malloc one and tack it on.
1369 */
1370 if (phd == NULL) {
0b4e3aa0 1371 MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_WAITOK);
1c79356b 1372 if (phd == NULL) {
91447636
A
1373 if (!locked)
1374 lck_rw_done(pcbinfo->mtx);
1c79356b
A
1375 return (ENOBUFS); /* XXX */
1376 }
1377 phd->phd_port = inp->inp_lport;
1378 LIST_INIT(&phd->phd_pcblist);
1379 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
1380 }
1381 inp->inp_phd = phd;
1382 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
1383 LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
91447636
A
1384 if (!locked)
1385 lck_rw_done(pcbinfo->mtx);
1c79356b
A
1386 return (0);
1387}
1388
1389/*
1390 * Move PCB to the proper hash bucket when { faddr, fport } have been
1391 * changed. NOTE: This does not handle the case of the lport changing (the
1392 * hashed port list would have to be updated as well), so the lport must
1393 * not change after in_pcbinshash() has been called.
1394 */
1395void
2d21ac55 1396in_pcbrehash(struct inpcb *inp)
1c79356b
A
1397{
1398 struct inpcbhead *head;
1399 u_int32_t hashkey_faddr;
1400
1401#if INET6
1402 if (inp->inp_vflag & INP_IPV6)
1403 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
1404 else
1405#endif /* INET6 */
1406 hashkey_faddr = inp->inp_faddr.s_addr;
91447636
A
1407 inp->hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
1408 inp->inp_fport, inp->inp_pcbinfo->hashmask);
1409 head = &inp->inp_pcbinfo->hashbase[inp->hash_element];
1c79356b
A
1410
1411 LIST_REMOVE(inp, inp_hash);
1412 LIST_INSERT_HEAD(head, inp, inp_hash);
1c79356b
A
1413}
1414
1415/*
1416 * Remove PCB from various lists.
1417 */
91447636 1418//###LOCK must be called with list lock held
1c79356b 1419void
2d21ac55 1420in_pcbremlists(struct inpcb *inp)
1c79356b
A
1421{
1422 inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
1c79356b
A
1423
1424 if (inp->inp_lport) {
1425 struct inpcbport *phd = inp->inp_phd;
1426
1427 LIST_REMOVE(inp, inp_hash);
1428 LIST_REMOVE(inp, inp_portlist);
55e303ae 1429 if (phd != NULL && (LIST_FIRST(&phd->phd_pcblist) == NULL)) {
1c79356b
A
1430 LIST_REMOVE(phd, phd_hash);
1431 FREE(phd, M_PCB);
1432 }
1433 }
1c79356b
A
1434 LIST_REMOVE(inp, inp_list);
1435 inp->inp_pcbinfo->ipi_count--;
1436}
1437
91447636
A
1438/* Mechanism used to defer the memory release of PCBs
1439 * The pcb list will contain the pcb until the ripper can clean it up if
1440 * the following conditions are met: 1) state "DEAD", 2) wantcnt is STOPUSING
1441 * 3) usecount is null
1442 * This function will be called to either mark the pcb as
1443*/
1444int
1445in_pcb_checkstate(struct inpcb *pcb, int mode, int locked)
91447636
A
1446{
1447
1448 volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt;
2d21ac55
A
1449 UInt32 origwant;
1450 UInt32 newwant;
91447636
A
1451
1452 switch (mode) {
1453
1454 case WNT_STOPUSING: /* try to mark the pcb as ready for recycling */
1455
1456 /* compareswap with STOPUSING, if success we're good, if it's in use, will be marked later */
1457
1458 if (locked == 0)
1459 socket_lock(pcb->inp_socket, 1);
1460 pcb->inp_state = INPCB_STATE_DEAD;
1461stopusing:
1462 if (pcb->inp_socket->so_usecount < 0)
2d21ac55 1463 panic("in_pcb_checkstate STOP pcb=%p so=%p usecount is negative\n", pcb, pcb->inp_socket);
91447636
A
1464 if (locked == 0)
1465 socket_unlock(pcb->inp_socket, 1);
1466
1467 origwant = *wantcnt;
1468 if ((UInt16) origwant == 0xffff ) /* should stop using */
1469 return (WNT_STOPUSING);
1470 newwant = 0xffff;
1471 if ((UInt16) origwant == 0) {/* try to mark it as unsuable now */
2d21ac55 1472 OSCompareAndSwap(origwant, newwant, wantcnt) ;
91447636
A
1473 }
1474 return (WNT_STOPUSING);
1475 break;
1476
1477 case WNT_ACQUIRE: /* try to increase reference to pcb */
1478 /* if WNT_STOPUSING should bail out */
1479 /*
1480 * if socket state DEAD, try to set count to STOPUSING, return failed
1481 * otherwise increase cnt
1482 */
1483 do {
1484 origwant = *wantcnt;
1485 if ((UInt16) origwant == 0xffff ) {/* should stop using */
2d21ac55 1486// printf("in_pcb_checkstate: ACQ PCB was STOPUSING while release. odd pcb=%p\n", pcb);
91447636
A
1487 return (WNT_STOPUSING);
1488 }
1489 newwant = origwant + 1;
2d21ac55 1490 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
91447636
A
1491 return (WNT_ACQUIRE);
1492 break;
1493
1494 case WNT_RELEASE: /* release reference. if result is null and pcb state is DEAD,
1495 set wanted bit to STOPUSING
1496 */
1497
1498 if (locked == 0)
1499 socket_lock(pcb->inp_socket, 1);
1500
1501 do {
1502 origwant = *wantcnt;
1503 if ((UInt16) origwant == 0x0 )
2d21ac55 1504 panic("in_pcb_checkstate pcb=%p release with zero count", pcb);
91447636
A
1505 if ((UInt16) origwant == 0xffff ) {/* should stop using */
1506#if TEMPDEBUG
2d21ac55 1507 printf("in_pcb_checkstate: REL PCB was STOPUSING while release. odd pcb=%p\n", pcb);
91447636
A
1508#endif
1509 if (locked == 0)
1510 socket_unlock(pcb->inp_socket, 1);
1511 return (WNT_STOPUSING);
1512 }
1513 newwant = origwant - 1;
2d21ac55 1514 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
91447636
A
1515
1516 if (pcb->inp_state == INPCB_STATE_DEAD)
1517 goto stopusing;
1518 if (pcb->inp_socket->so_usecount < 0)
2d21ac55 1519 panic("in_pcb_checkstate RELEASE pcb=%p so=%p usecount is negative\n", pcb, pcb->inp_socket);
91447636
A
1520
1521 if (locked == 0)
1522 socket_unlock(pcb->inp_socket, 1);
1523 return (WNT_RELEASE);
1524 break;
1525
1526 default:
1527
2d21ac55 1528 panic("in_pcb_checkstate: so=%p not a valid state =%x\n", pcb->inp_socket, mode);
91447636
A
1529 }
1530
1531 /* NOTREACHED */
1532 return (mode);
1533}
1534
1535/*
1536 * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat.
1537 * The inpcb_compat data structure is passed to user space and must
b0d623f7 1538 * not change. We intentionally avoid copying pointers.
91447636
A
1539 */
1540void
1541inpcb_to_compat(
1542 struct inpcb *inp,
1543 struct inpcb_compat *inp_compat)
1544{
1545 bzero(inp_compat, sizeof(*inp_compat));
1546 inp_compat->inp_fport = inp->inp_fport;
1547 inp_compat->inp_lport = inp->inp_lport;
91447636
A
1548 inp_compat->nat_owner = inp->nat_owner;
1549 inp_compat->nat_cookie = inp->nat_cookie;
1550 inp_compat->inp_gencnt = inp->inp_gencnt;
1551 inp_compat->inp_flags = inp->inp_flags;
1552 inp_compat->inp_flow = inp->inp_flow;
1553 inp_compat->inp_vflag = inp->inp_vflag;
1554 inp_compat->inp_ip_ttl = inp->inp_ip_ttl;
1555 inp_compat->inp_ip_p = inp->inp_ip_p;
1556 inp_compat->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign;
1557 inp_compat->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local;
1558 inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
1559 inp_compat->inp_depend6.inp6_hlim = inp->inp_depend6.inp6_hlim;
1560 inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
b0d623f7 1561 inp_compat->inp_depend6.inp6_ifindex = inp->inp_depend6.inp6_ifindex;
91447636
A
1562 inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
1563}
9bccf70c 1564
b0d623f7
A
1565#if !CONFIG_EMBEDDED
1566
1567void
1568inpcb_to_xinpcb64(
1569 struct inpcb *inp,
1570 struct xinpcb64 *xinp)
1571{
1572 xinp->inp_fport = inp->inp_fport;
1573 xinp->inp_lport = inp->inp_lport;
1574 xinp->inp_gencnt = inp->inp_gencnt;
1575 xinp->inp_flags = inp->inp_flags;
1576 xinp->inp_flow = inp->inp_flow;
1577 xinp->inp_vflag = inp->inp_vflag;
1578 xinp->inp_ip_ttl = inp->inp_ip_ttl;
1579 xinp->inp_ip_p = inp->inp_ip_p;
1580 xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign;
1581 xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local;
1582 xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
1583 xinp->inp_depend6.inp6_hlim = inp->inp_depend6.inp6_hlim;
1584 xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
1585 xinp->inp_depend6.inp6_ifindex = inp->inp_depend6.inp6_ifindex;
1586 xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
1587}
1588
1589#endif /* !CONFIG_EMBEDDED */
1590
1591/*
1592 * The following routines implement this scheme:
1593 *
1594 * Callers of ip_output() that intend to cache the route in the inpcb pass
1595 * a local copy of the struct route to ip_output(). Using a local copy of
1596 * the cached route significantly simplifies things as IP no longer has to
1597 * worry about having exclusive access to the passed in struct route, since
1598 * it's defined in the caller's stack; in essence, this allows for a lock-
1599 * less operation when updating the struct route at the IP level and below,
1600 * whenever necessary. The scheme works as follows:
1601 *
1602 * Prior to dropping the socket's lock and calling ip_output(), the caller
1603 * copies the struct route from the inpcb into its stack, and adds a reference
1604 * to the cached route entry, if there was any. The socket's lock is then
1605 * dropped and ip_output() is called with a pointer to the copy of struct
1606 * route defined on the stack (not to the one in the inpcb.)
1607 *
1608 * Upon returning from ip_output(), the caller then acquires the socket's
1609 * lock and synchronizes the cache; if there is no route cached in the inpcb,
1610 * it copies the local copy of struct route (which may or may not contain any
1611 * route) back into the cache; otherwise, if the inpcb has a route cached in
1612 * it, the one in the local copy will be freed, if there's any. Trashing the
1613 * cached route in the inpcb can be avoided because ip_output() is single-
1614 * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized
1615 * by the socket/transport layer.)
1616 */
1617void
1618inp_route_copyout(struct inpcb *inp, struct route *dst)
1619{
1620 struct route *src = &inp->inp_route;
1621
1622 lck_mtx_assert(inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
1623
0b4c1975
A
1624 /*
1625 * If the route in the PCB is not for IPv4, blow it away;
1626 * this is possible in the case of IPv4-mapped address case.
1627 */
1628 if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET) {
1629 rtfree(src->ro_rt);
1630 src->ro_rt = NULL;
1631 }
b0d623f7
A
1632
1633 /* Copy everything (rt, dst, flags) from PCB */
1634 bcopy(src, dst, sizeof (*dst));
1635
1636 /* Hold one reference for the local copy of struct route */
1637 if (dst->ro_rt != NULL)
1638 RT_ADDREF(dst->ro_rt);
1639}
1640
1641void
1642inp_route_copyin(struct inpcb *inp, struct route *src)
1643{
1644 struct route *dst = &inp->inp_route;
1645
1646 lck_mtx_assert(inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
1647
1648 /* Minor sanity check */
1649 if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET)
1650 panic("%s: wrong or corrupted route: %p", __func__, src);
1651
1652 /* No cached route in the PCB? */
1653 if (dst->ro_rt == NULL) {
1654 /*
1655 * Copy everything (rt, dst, flags) from ip_output();
1656 * the reference to the route was held at the time
1657 * it was allocated and is kept intact.
1658 */
1659 bcopy(src, dst, sizeof (*dst));
1660 } else if (src->ro_rt != NULL) {
1661 /*
1662 * If the same, update just the ro_flags and ditch the one
1663 * in the local copy. Else ditch the one that is currently
1664 * cached, and cache what we got back from ip_output().
1665 */
1666 if (dst->ro_rt == src->ro_rt) {
1667 dst->ro_flags = src->ro_flags;
1668 rtfree(src->ro_rt);
1669 src->ro_rt = NULL;
1670 } else {
1671 rtfree(dst->ro_rt);
1672 bcopy(src, dst, sizeof (*dst));
1673 }
1674 }
1675}