]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/in_pcb.c
xnu-1699.22.73.tar.gz
[apple/xnu.git] / bsd / netinet / in_pcb.c
1 /*
2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1991, 1993, 1995
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95
61 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $
62 */
63
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/malloc.h>
67 #include <sys/mbuf.h>
68 #include <sys/domain.h>
69 #include <sys/protosw.h>
70 #include <sys/socket.h>
71 #include <sys/socketvar.h>
72 #include <sys/proc.h>
73 #ifndef __APPLE__
74 #include <sys/jail.h>
75 #endif
76 #include <sys/kernel.h>
77 #include <sys/sysctl.h>
78 #include <sys/mcache.h>
79 #include <sys/kauth.h>
80 #include <sys/priv.h>
81 #include <libkern/OSAtomic.h>
82
83 #include <machine/limits.h>
84
85 #ifdef __APPLE__
86 #include <kern/zalloc.h>
87 #endif
88
89 #include <net/if.h>
90 #include <net/if_types.h>
91 #include <net/route.h>
92
93 #include <netinet/in.h>
94 #include <netinet/in_pcb.h>
95 #include <netinet/in_var.h>
96 #include <netinet/ip_var.h>
97 #if INET6
98 #include <netinet/ip6.h>
99 #include <netinet6/ip6_var.h>
100 #endif /* INET6 */
101
102 #include "faith.h"
103
104 #if IPSEC
105 #include <netinet6/ipsec.h>
106 #include <netkey/key.h>
107 #endif /* IPSEC */
108
109 #include <sys/kdebug.h>
110 #include <sys/random.h>
111
112 #if IPSEC
113 extern int ipsec_bypass;
114 #endif
115
116 #define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8))
117 #define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1))
118
119 struct in_addr zeroin_addr;
120
121 /*
122 * These configure the range of local port addresses assigned to
123 * "unspecified" outgoing connections/packets/whatever.
124 */
125 int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */
126 int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */
127 #ifndef __APPLE__
128 int ipport_firstauto = IPPORT_RESERVED; /* 1024 */
129 int ipport_lastauto = IPPORT_USERRESERVED; /* 5000 */
130 #else
131 int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
132 int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */
133 #endif
134 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
135 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */
136
137 #define RANGECHK(var, min, max) \
138 if ((var) < (min)) { (var) = (min); } \
139 else if ((var) > (max)) { (var) = (max); }
140
141 static int
142 sysctl_net_ipport_check SYSCTL_HANDLER_ARGS
143 {
144 #pragma unused(arg1, arg2)
145 int error = sysctl_handle_int(oidp,
146 oidp->oid_arg1, oidp->oid_arg2, req);
147 if (!error) {
148 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
149 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
150 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX);
151 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX);
152 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX);
153 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX);
154 }
155 return error;
156 }
157
158 #undef RANGECHK
159
160 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IP Ports");
161
162 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
163 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
164 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
165 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
166 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
167 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
168 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
169 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
170 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
171 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
172 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
173 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
174
175 extern int udp_use_randomport;
176 extern int tcp_use_randomport;
177
178 /*
179 * in_pcb.c: manage the Protocol Control Blocks.
180 *
181 * NOTE: It is assumed that most of these functions will be called at
182 * splnet(). XXX - There are, unfortunately, a few exceptions to this
183 * rule that should be fixed.
184 */
185
186 /*
187 * Allocate a PCB and associate it with the socket.
188 *
189 * Returns: 0 Success
190 * ENOBUFS
191 * ENOMEM
192 * ipsec_init_policy:??? [IPSEC]
193 */
194 int
195 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, __unused struct proc *p)
196 {
197 struct inpcb *inp;
198 caddr_t temp;
199 #if IPSEC
200 #ifndef __APPLE__
201 int error;
202 #endif
203 #endif
204 #if CONFIG_MACF_NET
205 int mac_error;
206 #endif
207
208 if (so->cached_in_sock_layer == 0) {
209 #if TEMPDEBUG
210 printf("PCBALLOC calling zalloc for socket %x\n", so);
211 #endif
212 inp = (struct inpcb *) zalloc(pcbinfo->ipi_zone);
213 if (inp == NULL)
214 return (ENOBUFS);
215 bzero((caddr_t)inp, sizeof(*inp));
216 }
217 else {
218 #if TEMPDEBUG
219 printf("PCBALLOC reusing PCB for socket %x\n", so);
220 #endif
221 inp = (struct inpcb *) so->so_saved_pcb;
222 temp = inp->inp_saved_ppcb;
223 bzero((caddr_t) inp, sizeof(*inp));
224 inp->inp_saved_ppcb = temp;
225 }
226
227 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
228 inp->inp_pcbinfo = pcbinfo;
229 inp->inp_socket = so;
230 #if CONFIG_MACF_NET
231 mac_error = mac_inpcb_label_init(inp, M_WAITOK);
232 if (mac_error != 0) {
233 if (so->cached_in_sock_layer == 0)
234 zfree(pcbinfo->ipi_zone, inp);
235 return (mac_error);
236 }
237 mac_inpcb_label_associate(so, inp);
238 #endif
239 // make sure inp_stat is always 64bit aligned
240 inp->inp_stat = (struct inp_stat*)P2ROUNDUP(inp->inp_stat_store, sizeof(u_int64_t));
241 if (((uintptr_t)inp->inp_stat - (uintptr_t)inp->inp_stat_store)
242 + sizeof(*inp->inp_stat) > sizeof(inp->inp_stat_store)) {
243 panic("insufficient space to align inp_stat");
244 }
245
246 so->so_pcb = (caddr_t)inp;
247
248 if (so->so_proto->pr_flags & PR_PCBLOCK) {
249 lck_mtx_init(&inp->inpcb_mtx, pcbinfo->mtx_grp, pcbinfo->mtx_attr);
250 }
251
252 #if IPSEC
253 #ifndef __APPLE__
254 if (ipsec_bypass == 0) {
255 error = ipsec_init_policy(so, &inp->inp_sp);
256 if (error != 0) {
257 zfree(pcbinfo->ipi_zone, inp);
258 return error;
259 }
260 }
261 #endif
262 #endif /*IPSEC*/
263 #if INET6
264 if (INP_SOCKAF(so) == AF_INET6 && !ip6_mapped_addr_on)
265 inp->inp_flags |= IN6P_IPV6_V6ONLY;
266 #endif
267
268 #if INET6
269 if (ip6_auto_flowlabel)
270 inp->inp_flags |= IN6P_AUTOFLOWLABEL;
271 #endif
272 lck_rw_lock_exclusive(pcbinfo->mtx);
273 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
274 LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list);
275 pcbinfo->ipi_count++;
276 lck_rw_done(pcbinfo->mtx);
277 return (0);
278 }
279
280
281 /*
282 in_pcblookup_local_and_cleanup does everything
283 in_pcblookup_local does but it checks for a socket
284 that's going away. Since we know that the lock is
285 held read+write when this funciton is called, we
286 can safely dispose of this socket like the slow
287 timer would usually do and return NULL. This is
288 great for bind.
289 */
290 struct inpcb*
291 in_pcblookup_local_and_cleanup(
292 struct inpcbinfo *pcbinfo,
293 struct in_addr laddr,
294 u_int lport_arg,
295 int wild_okay)
296 {
297 struct inpcb *inp;
298
299 /* Perform normal lookup */
300 inp = in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay);
301
302 /* Check if we found a match but it's waiting to be disposed */
303 if (inp && inp->inp_wantcnt == WNT_STOPUSING) {
304 struct socket *so = inp->inp_socket;
305
306 lck_mtx_lock(&inp->inpcb_mtx);
307
308 if (so->so_usecount == 0) {
309 if (inp->inp_state != INPCB_STATE_DEAD)
310 in_pcbdetach(inp);
311 in_pcbdispose(inp);
312 inp = NULL;
313 }
314 else {
315 lck_mtx_unlock(&inp->inpcb_mtx);
316 }
317 }
318
319 return inp;
320 }
321
322 #ifdef __APPLE_API_PRIVATE
323 static void
324 in_pcb_conflict_post_msg(u_int16_t port)
325 {
326 /*
327 * Radar 5523020 send a kernel event notification if a non-participating socket tries to bind
328 * the port a socket who has set SOF_NOTIFYCONFLICT owns.
329 */
330 struct kev_msg ev_msg;
331 struct kev_in_portinuse in_portinuse;
332
333 bzero(&in_portinuse, sizeof(struct kev_in_portinuse));
334 bzero(&ev_msg, sizeof(struct kev_msg));
335 in_portinuse.port = ntohs(port); /* port in host order */
336 in_portinuse.req_pid = proc_selfpid();
337 ev_msg.vendor_code = KEV_VENDOR_APPLE;
338 ev_msg.kev_class = KEV_NETWORK_CLASS;
339 ev_msg.kev_subclass = KEV_INET_SUBCLASS;
340 ev_msg.event_code = KEV_INET_PORTINUSE;
341 ev_msg.dv[0].data_ptr = &in_portinuse;
342 ev_msg.dv[0].data_length = sizeof(struct kev_in_portinuse);
343 ev_msg.dv[1].data_length = 0;
344 kev_post_msg(&ev_msg);
345 }
346 #endif
347 /*
348 * Returns: 0 Success
349 * EADDRNOTAVAIL Address not available.
350 * EINVAL Invalid argument
351 * EAFNOSUPPORT Address family not supported [notdef]
352 * EACCES Permission denied
353 * EADDRINUSE Address in use
354 * EAGAIN Resource unavailable, try again
355 * priv_check_cred:EPERM Operation not permitted
356 */
357 int
358 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
359 {
360 struct socket *so = inp->inp_socket;
361 unsigned short *lastport;
362 struct sockaddr_in *sin;
363 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
364 u_short lport = 0, rand_port = 0;
365 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
366 int error, randomport, conflict = 0;
367 kauth_cred_t cred;
368
369 if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */
370 return (EADDRNOTAVAIL);
371 if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY)
372 return (EINVAL);
373 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
374 wild = 1;
375 socket_unlock(so, 0); /* keep reference on socket */
376 lck_rw_lock_exclusive(pcbinfo->mtx);
377 if (nam) {
378 unsigned int outif = 0;
379
380 sin = (struct sockaddr_in *)nam;
381 if (nam->sa_len != sizeof (*sin)) {
382 lck_rw_done(pcbinfo->mtx);
383 socket_lock(so, 0);
384 return (EINVAL);
385 }
386 #ifdef notdef
387 /*
388 * We should check the family, but old programs
389 * incorrectly fail to initialize it.
390 */
391 if (sin->sin_family != AF_INET) {
392 lck_rw_done(pcbinfo->mtx);
393 socket_lock(so, 0);
394 return (EAFNOSUPPORT);
395 }
396 #endif
397 lport = sin->sin_port;
398 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
399 /*
400 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
401 * allow complete duplication of binding if
402 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
403 * and a multicast address is bound on both
404 * new and duplicated sockets.
405 */
406 if (so->so_options & SO_REUSEADDR)
407 reuseport = SO_REUSEADDR|SO_REUSEPORT;
408 } else if (sin->sin_addr.s_addr != INADDR_ANY) {
409 struct ifaddr *ifa;
410 sin->sin_port = 0; /* yech... */
411 if ((ifa = ifa_ifwithaddr((struct sockaddr *)sin)) == 0) {
412 lck_rw_done(pcbinfo->mtx);
413 socket_lock(so, 0);
414 return (EADDRNOTAVAIL);
415 }
416 else {
417 IFA_LOCK(ifa);
418 outif = ifa->ifa_ifp->if_index;
419 IFA_UNLOCK(ifa);
420 IFA_REMREF(ifa);
421 }
422 }
423 if (lport) {
424 struct inpcb *t;
425
426 /* GROSS */
427 #if !CONFIG_EMBEDDED
428 if (ntohs(lport) < IPPORT_RESERVED) {
429 cred = kauth_cred_proc_ref(p);
430 error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0);
431 kauth_cred_unref(&cred);
432 if (error != 0) {
433 lck_rw_done(pcbinfo->mtx);
434 socket_lock(so, 0);
435 return (EACCES);
436 }
437 }
438 #endif
439 if (so->so_uid &&
440 !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
441 t = in_pcblookup_local_and_cleanup(inp->inp_pcbinfo,
442 sin->sin_addr, lport, INPLOOKUP_WILDCARD);
443 if (t &&
444 (ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
445 ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
446 (t->inp_socket->so_options &
447 SO_REUSEPORT) == 0) &&
448 (so->so_uid != t->inp_socket->so_uid) &&
449 ((t->inp_socket->so_flags & SOF_REUSESHAREUID) == 0)) {
450 #if INET6
451 if (ntohl(sin->sin_addr.s_addr) !=
452 INADDR_ANY ||
453 ntohl(t->inp_laddr.s_addr) !=
454 INADDR_ANY ||
455 INP_SOCKAF(so) ==
456 INP_SOCKAF(t->inp_socket))
457 #endif /* INET6 */
458 {
459 #ifdef __APPLE_API_PRIVATE
460
461 if ((t->inp_socket->so_flags & SOF_NOTIFYCONFLICT) && ((so->so_flags & SOF_NOTIFYCONFLICT) == 0))
462 conflict = 1;
463
464 lck_rw_done(pcbinfo->mtx);
465
466 if (conflict)
467 in_pcb_conflict_post_msg(lport);
468 #else
469 lck_rw_done(pcbinfo->mtx);
470 #endif /* __APPLE_API_PRIVATE */
471
472 socket_lock(so, 0);
473 return (EADDRINUSE);
474 }
475 }
476 }
477 t = in_pcblookup_local_and_cleanup(pcbinfo, sin->sin_addr,
478 lport, wild);
479 if (t &&
480 (reuseport & t->inp_socket->so_options) == 0) {
481 #if INET6
482 if (ip6_mapped_addr_on == 0 ||
483 ntohl(sin->sin_addr.s_addr) !=
484 INADDR_ANY ||
485 ntohl(t->inp_laddr.s_addr) !=
486 INADDR_ANY ||
487 INP_SOCKAF(so) ==
488 INP_SOCKAF(t->inp_socket))
489 #endif /* INET6 */
490 {
491 #ifdef __APPLE_API_PRIVATE
492
493 if ((t->inp_socket->so_flags & SOF_NOTIFYCONFLICT) && ((so->so_flags & SOF_NOTIFYCONFLICT) == 0))
494 conflict = 1;
495
496 lck_rw_done(pcbinfo->mtx);
497
498 if (conflict)
499 in_pcb_conflict_post_msg(lport);
500 #else
501 lck_rw_done(pcbinfo->mtx);
502 #endif /* __APPLE_API_PRIVATE */
503 socket_lock(so, 0);
504 return (EADDRINUSE);
505 }
506 }
507 }
508 inp->inp_laddr = sin->sin_addr;
509 inp->inp_last_outif = outif;
510 }
511 if (lport == 0) {
512 u_short first, last;
513 int count;
514
515 randomport = (so->so_flags & SOF_BINDRANDOMPORT) ||
516 (so->so_type == SOCK_STREAM ? tcp_use_randomport : udp_use_randomport);
517
518 inp->inp_flags |= INP_ANONPORT;
519
520 if (inp->inp_flags & INP_HIGHPORT) {
521 first = ipport_hifirstauto; /* sysctl */
522 last = ipport_hilastauto;
523 lastport = &pcbinfo->lasthi;
524 } else if (inp->inp_flags & INP_LOWPORT) {
525 cred = kauth_cred_proc_ref(p);
526 error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0);
527 kauth_cred_unref(&cred);
528 if (error != 0) {
529 lck_rw_done(pcbinfo->mtx);
530 socket_lock(so, 0);
531 return error;
532 }
533 first = ipport_lowfirstauto; /* 1023 */
534 last = ipport_lowlastauto; /* 600 */
535 lastport = &pcbinfo->lastlow;
536 } else {
537 first = ipport_firstauto; /* sysctl */
538 last = ipport_lastauto;
539 lastport = &pcbinfo->lastport;
540 }
541 /* No point in randomizing if only one port is available */
542
543 if (first == last)
544 randomport = 0;
545 /*
546 * Simple check to ensure all ports are not used up causing
547 * a deadlock here.
548 *
549 * We split the two cases (up and down) so that the direction
550 * is not being tested on each round of the loop.
551 */
552 if (first > last) {
553 /*
554 * counting down
555 */
556 if (randomport) {
557 read_random(&rand_port, sizeof(rand_port));
558 *lastport = first - (rand_port % (first - last));
559 }
560 count = first - last;
561
562 do {
563 if (count-- < 0) { /* completely used? */
564 lck_rw_done(pcbinfo->mtx);
565 socket_lock(so, 0);
566 inp->inp_laddr.s_addr = INADDR_ANY;
567 inp->inp_last_outif = 0;
568 return (EADDRNOTAVAIL);
569 }
570 --*lastport;
571 if (*lastport > first || *lastport < last)
572 *lastport = first;
573 lport = htons(*lastport);
574 } while (in_pcblookup_local_and_cleanup(pcbinfo,
575 inp->inp_laddr, lport, wild));
576 } else {
577 /*
578 * counting up
579 */
580 if (randomport) {
581 read_random(&rand_port, sizeof(rand_port));
582 *lastport = first + (rand_port % (first - last));
583 }
584 count = last - first;
585
586 do {
587 if (count-- < 0) { /* completely used? */
588 lck_rw_done(pcbinfo->mtx);
589 socket_lock(so, 0);
590 inp->inp_laddr.s_addr = INADDR_ANY;
591 inp->inp_last_outif = 0;
592 return (EADDRNOTAVAIL);
593 }
594 ++*lastport;
595 if (*lastport < first || *lastport > last)
596 *lastport = first;
597 lport = htons(*lastport);
598 } while (in_pcblookup_local_and_cleanup(pcbinfo,
599 inp->inp_laddr, lport, wild));
600 }
601 }
602 socket_lock(so, 0);
603 inp->inp_lport = lport;
604 if (in_pcbinshash(inp, 1) != 0) {
605 inp->inp_laddr.s_addr = INADDR_ANY;
606 inp->inp_lport = 0;
607 inp->inp_last_outif = 0;
608 lck_rw_done(pcbinfo->mtx);
609 return (EAGAIN);
610 }
611 lck_rw_done(pcbinfo->mtx);
612 sflt_notify(so, sock_evt_bound, NULL);
613 return (0);
614 }
615
616 /*
617 * Transform old in_pcbconnect() into an inner subroutine for new
618 * in_pcbconnect(): Do some validity-checking on the remote
619 * address (in mbuf 'nam') and then determine local host address
620 * (i.e., which interface) to use to access that remote host.
621 *
622 * This preserves definition of in_pcbconnect(), while supporting a
623 * slightly different version for T/TCP. (This is more than
624 * a bit of a kludge, but cleaning up the internal interfaces would
625 * have forced minor changes in every protocol).
626 *
627 * Returns: 0 Success
628 * EINVAL Invalid argument
629 * EAFNOSUPPORT Address family not supported
630 * EADDRNOTAVAIL Address not available
631 */
632 int
633 in_pcbladdr(struct inpcb *inp, struct sockaddr *nam,
634 struct sockaddr_in *plocal_sin, unsigned int *out_ifscope)
635 {
636 struct in_ifaddr *ia;
637 struct sockaddr_in *sin = (struct sockaddr_in *)nam;
638
639 if (nam->sa_len != sizeof (*sin))
640 return (EINVAL);
641 if (sin->sin_family != AF_INET)
642 return (EAFNOSUPPORT);
643 if (sin->sin_port == 0)
644 return (EADDRNOTAVAIL);
645
646 lck_rw_lock_shared(in_ifaddr_rwlock);
647 if (!TAILQ_EMPTY(&in_ifaddrhead)) {
648 ia = TAILQ_FIRST(&in_ifaddrhead);
649 /*
650 * If the destination address is INADDR_ANY,
651 * use the primary local address.
652 * If the supplied address is INADDR_BROADCAST,
653 * and the primary interface supports broadcast,
654 * choose the broadcast address for that interface.
655 */
656 #define satosin(sa) ((struct sockaddr_in *)(sa))
657 #define sintosa(sin) ((struct sockaddr *)(sin))
658 #define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
659 IFA_LOCK_SPIN(&ia->ia_ifa);
660 if (sin->sin_addr.s_addr == INADDR_ANY)
661 sin->sin_addr = IA_SIN(ia)->sin_addr;
662 else if (sin->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST &&
663 (ia->ia_ifp->if_flags & IFF_BROADCAST))
664 sin->sin_addr = satosin(&ia->ia_broadaddr)->sin_addr;
665 IFA_UNLOCK(&ia->ia_ifa);
666 ia = NULL;
667 }
668 lck_rw_done(in_ifaddr_rwlock);
669
670 if (inp->inp_laddr.s_addr == INADDR_ANY) {
671 struct route *ro;
672 unsigned int ifscope = IFSCOPE_NONE;
673 unsigned int nocell;
674 /*
675 * If the socket is bound to a specifc interface, the
676 * optional scoped takes precedence over that if it
677 * is set by the caller.
678 */
679 ia = (struct in_ifaddr *)0;
680
681 if (out_ifscope != NULL && *out_ifscope != IFSCOPE_NONE)
682 ifscope = *out_ifscope;
683 else if (inp->inp_flags & INP_BOUND_IF)
684 ifscope = inp->inp_boundif;
685
686 nocell = (inp->inp_flags & INP_NO_IFT_CELLULAR) ? 1 : 0;
687 /*
688 * If route is known or can be allocated now,
689 * our src addr is taken from the i/f, else punt.
690 * Note that we should check the address family of the cached
691 * destination, in case of sharing the cache with IPv6.
692 */
693 ro = &inp->inp_route;
694 if (ro->ro_rt != NULL)
695 RT_LOCK_SPIN(ro->ro_rt);
696 if (ro->ro_rt && (ro->ro_dst.sa_family != AF_INET ||
697 satosin(&ro->ro_dst)->sin_addr.s_addr !=
698 sin->sin_addr.s_addr ||
699 inp->inp_socket->so_options & SO_DONTROUTE ||
700 ro->ro_rt->generation_id != route_generation)) {
701 RT_UNLOCK(ro->ro_rt);
702 rtfree(ro->ro_rt);
703 ro->ro_rt = NULL;
704 }
705 if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/
706 (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL)) {
707 if (ro->ro_rt != NULL)
708 RT_UNLOCK(ro->ro_rt);
709 /* No route yet, so try to acquire one */
710 bzero(&ro->ro_dst, sizeof(struct sockaddr_in));
711 ro->ro_dst.sa_family = AF_INET;
712 ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
713 ((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
714 sin->sin_addr;
715 rtalloc_scoped(ro, ifscope);
716 if (ro->ro_rt != NULL)
717 RT_LOCK_SPIN(ro->ro_rt);
718 }
719 /*
720 * If the route points to a cellular interface and the
721 * caller forbids our using interfaces of such type,
722 * pretend that there is no route.
723 */
724 if (nocell && ro->ro_rt != NULL) {
725 RT_LOCK_ASSERT_HELD(ro->ro_rt);
726 if (ro->ro_rt->rt_ifp->if_type == IFT_CELLULAR) {
727 RT_UNLOCK(ro->ro_rt);
728 rtfree(ro->ro_rt);
729 ro->ro_rt = NULL;
730 }
731 }
732 /*
733 * If we found a route, use the address
734 * corresponding to the outgoing interface
735 * unless it is the loopback (in case a route
736 * to our address on another net goes to loopback).
737 */
738 if (ro->ro_rt != NULL) {
739 /* Become a regular mutex */
740 RT_CONVERT_LOCK(ro->ro_rt);
741 if (!(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) {
742 ia = ifatoia(ro->ro_rt->rt_ifa);
743 if (ia) {
744 IFA_ADDREF(&ia->ia_ifa);
745 }
746 }
747 RT_UNLOCK(ro->ro_rt);
748 }
749 if (ia == 0) {
750 u_short fport = sin->sin_port;
751
752 sin->sin_port = 0;
753 ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin)));
754 if (ia == 0) {
755 ia = ifatoia(ifa_ifwithnet_scoped(sintosa(sin),
756 ifscope));
757 }
758 sin->sin_port = fport;
759 if (ia == 0) {
760 lck_rw_lock_shared(in_ifaddr_rwlock);
761 ia = TAILQ_FIRST(&in_ifaddrhead);
762 if (ia)
763 IFA_ADDREF(&ia->ia_ifa);
764 lck_rw_done(in_ifaddr_rwlock);
765 }
766 /*
767 * If the source address belongs to a cellular interface
768 * and the socket forbids our using interfaces of such
769 * type, pretend that there is no source address.
770 */
771 if (nocell && ia != NULL &&
772 ia->ia_ifa.ifa_ifp->if_type == IFT_CELLULAR) {
773 IFA_REMREF(&ia->ia_ifa);
774 ia = NULL;
775 }
776 if (ia == 0)
777 return (EADDRNOTAVAIL);
778 }
779 /*
780 * If the destination address is multicast and an outgoing
781 * interface has been set as a multicast option, use the
782 * address of that interface as our source address.
783 */
784 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
785 inp->inp_moptions != NULL) {
786 struct ip_moptions *imo;
787 struct ifnet *ifp;
788
789 imo = inp->inp_moptions;
790 IMO_LOCK(imo);
791 if (imo->imo_multicast_ifp != NULL && (ia == NULL ||
792 ia->ia_ifp != imo->imo_multicast_ifp)) {
793 ifp = imo->imo_multicast_ifp;
794 if (ia)
795 IFA_REMREF(&ia->ia_ifa);
796 lck_rw_lock_shared(in_ifaddr_rwlock);
797 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
798 if (ia->ia_ifp == ifp)
799 break;
800 }
801 if (ia)
802 IFA_ADDREF(&ia->ia_ifa);
803 lck_rw_done(in_ifaddr_rwlock);
804 if (ia == 0) {
805 IMO_UNLOCK(imo);
806 return (EADDRNOTAVAIL);
807 }
808 }
809 IMO_UNLOCK(imo);
810 }
811 /*
812 * Don't do pcblookup call here; return interface in plocal_sin
813 * and exit to caller, that will do the lookup.
814 */
815 IFA_LOCK_SPIN(&ia->ia_ifa);
816 *plocal_sin = ia->ia_addr;
817 if (out_ifscope != NULL)
818 *out_ifscope = ia->ia_ifp->if_index;
819 IFA_UNLOCK(&ia->ia_ifa);
820 IFA_REMREF(&ia->ia_ifa);
821 }
822 return(0);
823 }
824
825 /*
826 * Outer subroutine:
827 * Connect from a socket to a specified address.
828 * Both address and port must be specified in argument sin.
829 * If don't have a local address for this socket yet,
830 * then pick one.
831 */
832 int
833 in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p, unsigned int *ifscope)
834 {
835 struct sockaddr_in ifaddr;
836 struct sockaddr_in *sin = (struct sockaddr_in *)nam;
837 struct inpcb *pcb;
838 int error;
839
840 /*
841 * Call inner routine, to assign local interface address.
842 */
843 if ((error = in_pcbladdr(inp, nam, &ifaddr, ifscope)) != 0)
844 return(error);
845
846 socket_unlock(inp->inp_socket, 0);
847 pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
848 inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr.sin_addr,
849 inp->inp_lport, 0, NULL);
850 socket_lock(inp->inp_socket, 0);
851
852 /* Check if the socket is still in a valid state. When we unlock this
853 * embryonic socket, it can get aborted if another thread is closing
854 * the listener (radar 7947600).
855 */
856 if ((inp->inp_socket->so_flags & SOF_ABORTED) != 0) {
857 return ECONNREFUSED;
858 }
859
860 if (pcb != NULL) {
861 in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0);
862 return (EADDRINUSE);
863 }
864 if (inp->inp_laddr.s_addr == INADDR_ANY) {
865 if (inp->inp_lport == 0) {
866 error = in_pcbbind(inp, (struct sockaddr *)0, p);
867 if (error)
868 return (error);
869 }
870 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) {
871 /*lock inversion issue, mostly with udp multicast packets */
872 socket_unlock(inp->inp_socket, 0);
873 lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx);
874 socket_lock(inp->inp_socket, 0);
875 }
876 inp->inp_laddr = ifaddr.sin_addr;
877 inp->inp_last_outif = ifscope ? *ifscope : IFSCOPE_NONE;
878 inp->inp_flags |= INP_INADDR_ANY;
879 }
880 else {
881 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) {
882 /*lock inversion issue, mostly with udp multicast packets */
883 socket_unlock(inp->inp_socket, 0);
884 lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx);
885 socket_lock(inp->inp_socket, 0);
886 }
887 }
888 inp->inp_faddr = sin->sin_addr;
889 inp->inp_fport = sin->sin_port;
890 in_pcbrehash(inp);
891 lck_rw_done(inp->inp_pcbinfo->mtx);
892 return (0);
893 }
894
895 void
896 in_pcbdisconnect(struct inpcb *inp)
897 {
898
899 inp->inp_faddr.s_addr = INADDR_ANY;
900 inp->inp_fport = 0;
901
902 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) {
903 /*lock inversion issue, mostly with udp multicast packets */
904 socket_unlock(inp->inp_socket, 0);
905 lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx);
906 socket_lock(inp->inp_socket, 0);
907 }
908
909 in_pcbrehash(inp);
910 lck_rw_done(inp->inp_pcbinfo->mtx);
911
912 if (inp->inp_socket->so_state & SS_NOFDREF)
913 in_pcbdetach(inp);
914 }
915
916 void
917 in_pcbdetach(struct inpcb *inp)
918 {
919 struct socket *so = inp->inp_socket;
920
921 if (so->so_pcb == 0) { /* we've been called twice */
922 panic("in_pcbdetach: inp=%p so=%p proto=%d so_pcb is null!\n",
923 inp, so, so->so_proto->pr_protocol);
924 }
925
926 #if IPSEC
927 if (ipsec_bypass == 0) {
928 ipsec4_delete_pcbpolicy(inp);
929 }
930 #endif /*IPSEC*/
931
932 /* mark socket state as dead */
933 if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING)
934 panic("in_pcbdetach so=%p prot=%x couldn't set to STOPUSING\n", so, so->so_proto->pr_protocol);
935
936 #if TEMPDEBUG
937 if (so->cached_in_sock_layer)
938 printf("in_pcbdetach for cached socket %x flags=%x\n", so, so->so_flags);
939 else
940 printf("in_pcbdetach for allocated socket %x flags=%x\n", so, so->so_flags);
941 #endif
942 if ((so->so_flags & SOF_PCBCLEARING) == 0) {
943 struct rtentry *rt;
944 struct ip_moptions *imo;
945
946 inp->inp_vflag = 0;
947 if (inp->inp_options)
948 (void)m_free(inp->inp_options);
949 if ((rt = inp->inp_route.ro_rt) != NULL) {
950 inp->inp_route.ro_rt = NULL;
951 rtfree(rt);
952 }
953 imo = inp->inp_moptions;
954 inp->inp_moptions = NULL;
955 if (imo != NULL)
956 IMO_REMREF(imo);
957 sofreelastref(so, 0);
958 inp->inp_state = INPCB_STATE_DEAD;
959 so->so_flags |= SOF_PCBCLEARING; /* makes sure we're not called twice from so_close */
960 }
961 }
962
963
964 void
965 in_pcbdispose(struct inpcb *inp)
966 {
967 struct socket *so = inp->inp_socket;
968 struct inpcbinfo *ipi = inp->inp_pcbinfo;
969
970 #if TEMPDEBUG
971 if (inp->inp_state != INPCB_STATE_DEAD) {
972 printf("in_pcbdispose: not dead yet? so=%p\n", so);
973 }
974 #endif
975 if (so && so->so_usecount != 0)
976 panic("%s: so %p so_usecount %d so_lockhistory %s\n",
977 __func__, so, so->so_usecount,
978 (so != NULL) ? solockhistory_nr(so) : "--");
979
980 lck_rw_assert(ipi->mtx, LCK_RW_ASSERT_EXCLUSIVE);
981
982 inp->inp_gencnt = ++ipi->ipi_gencnt;
983 /*### access ipi in in_pcbremlists */
984 in_pcbremlists(inp);
985
986 if (so) {
987 if (so->so_proto->pr_flags & PR_PCBLOCK) {
988 sofreelastref(so, 0);
989 if (so->so_rcv.sb_cc || so->so_snd.sb_cc) {
990 #if TEMPDEBUG
991 printf("in_pcbdispose sb not cleaned up so=%p rc_cci=%x snd_cc=%x\n",
992 so, so->so_rcv.sb_cc, so->so_snd.sb_cc);
993 #endif
994 sbrelease(&so->so_rcv);
995 sbrelease(&so->so_snd);
996 }
997 if (so->so_head != NULL)
998 panic("in_pcbdispose, so=%p head still exist\n", so);
999 lck_mtx_unlock(&inp->inpcb_mtx);
1000 lck_mtx_destroy(&inp->inpcb_mtx, ipi->mtx_grp);
1001 }
1002 so->so_flags |= SOF_PCBCLEARING; /* makes sure we're not called twice from so_close */
1003 so->so_saved_pcb = (caddr_t) inp;
1004 so->so_pcb = 0;
1005 inp->inp_socket = 0;
1006 #if CONFIG_MACF_NET
1007 mac_inpcb_label_destroy(inp);
1008 #endif
1009 /*
1010 * In case there a route cached after a detach (possible
1011 * in the tcp case), make sure that it is freed before
1012 * we deallocate the structure.
1013 */
1014 if (inp->inp_route.ro_rt != NULL) {
1015 rtfree(inp->inp_route.ro_rt);
1016 inp->inp_route.ro_rt = NULL;
1017 }
1018 if (so->cached_in_sock_layer == 0) {
1019 zfree(ipi->ipi_zone, inp);
1020 }
1021 sodealloc(so);
1022 }
1023 #if TEMPDEBUG
1024 else
1025 printf("in_pcbdispose: no socket for inp=%p\n", inp);
1026 #endif
1027 }
1028
1029 /*
1030 * The calling convention of in_setsockaddr() and in_setpeeraddr() was
1031 * modified to match the pru_sockaddr() and pru_peeraddr() entry points
1032 * in struct pr_usrreqs, so that protocols can just reference then directly
1033 * without the need for a wrapper function. The socket must have a valid
1034 * (i.e., non-nil) PCB, but it should be impossible to get an invalid one
1035 * except through a kernel programming error, so it is acceptable to panic
1036 * (or in this case trap) if the PCB is invalid. (Actually, we don't trap
1037 * because there actually /is/ a programming error somewhere... XXX)
1038 *
1039 * Returns: 0 Success
1040 * ENOBUFS No buffer space available
1041 * ECONNRESET Connection reset
1042 */
1043 int
1044 in_setsockaddr(struct socket *so, struct sockaddr **nam)
1045 {
1046 struct inpcb *inp;
1047 struct sockaddr_in *sin;
1048
1049 /*
1050 * Do the malloc first in case it blocks.
1051 */
1052 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_WAITOK);
1053 if (sin == NULL)
1054 return ENOBUFS;
1055 bzero(sin, sizeof *sin);
1056 sin->sin_family = AF_INET;
1057 sin->sin_len = sizeof(*sin);
1058
1059 inp = sotoinpcb(so);
1060 if (!inp) {
1061 FREE(sin, M_SONAME);
1062 return ECONNRESET;
1063 }
1064 sin->sin_port = inp->inp_lport;
1065 sin->sin_addr = inp->inp_laddr;
1066
1067 *nam = (struct sockaddr *)sin;
1068 return 0;
1069 }
1070
1071 int
1072 in_setpeeraddr(struct socket *so, struct sockaddr **nam)
1073 {
1074 struct inpcb *inp;
1075 struct sockaddr_in *sin;
1076
1077 /*
1078 * Do the malloc first in case it blocks.
1079 */
1080 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_WAITOK);
1081 if (sin == NULL)
1082 return ENOBUFS;
1083 bzero((caddr_t)sin, sizeof (*sin));
1084 sin->sin_family = AF_INET;
1085 sin->sin_len = sizeof(*sin);
1086
1087 inp = sotoinpcb(so);
1088 if (!inp) {
1089 FREE(sin, M_SONAME);
1090 return ECONNRESET;
1091 }
1092 sin->sin_port = inp->inp_fport;
1093 sin->sin_addr = inp->inp_faddr;
1094
1095 *nam = (struct sockaddr *)sin;
1096 return 0;
1097 }
1098
1099 void
1100 in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr,
1101 int errno, void (*notify)(struct inpcb *, int))
1102 {
1103 struct inpcb *inp;
1104
1105 lck_rw_lock_shared(pcbinfo->mtx);
1106
1107 LIST_FOREACH(inp, pcbinfo->listhead, inp_list) {
1108 #if INET6
1109 if ((inp->inp_vflag & INP_IPV4) == 0)
1110 continue;
1111 #endif
1112 if (inp->inp_faddr.s_addr != faddr.s_addr ||
1113 inp->inp_socket == NULL)
1114 continue;
1115 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING)
1116 continue;
1117 socket_lock(inp->inp_socket, 1);
1118 (*notify)(inp, errno);
1119 (void)in_pcb_checkstate(inp, WNT_RELEASE, 1);
1120 socket_unlock(inp->inp_socket, 1);
1121 }
1122 lck_rw_done(pcbinfo->mtx);
1123 }
1124
1125 /*
1126 * Check for alternatives when higher level complains
1127 * about service problems. For now, invalidate cached
1128 * routing information. If the route was created dynamically
1129 * (by a redirect), time to try a default gateway again.
1130 */
1131 void
1132 in_losing(struct inpcb *inp)
1133 {
1134 struct rtentry *rt;
1135 struct rt_addrinfo info;
1136
1137 if ((rt = inp->inp_route.ro_rt) != NULL) {
1138 struct in_ifaddr *ia;
1139
1140 bzero((caddr_t)&info, sizeof(info));
1141 RT_LOCK(rt);
1142 info.rti_info[RTAX_DST] =
1143 (struct sockaddr *)&inp->inp_route.ro_dst;
1144 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1145 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1146 rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
1147 if (rt->rt_flags & RTF_DYNAMIC) {
1148 /*
1149 * Prevent another thread from modifying rt_key,
1150 * rt_gateway via rt_setgate() after rt_lock is
1151 * dropped by marking the route as defunct.
1152 */
1153 rt->rt_flags |= RTF_CONDEMNED;
1154 RT_UNLOCK(rt);
1155 (void) rtrequest(RTM_DELETE, rt_key(rt),
1156 rt->rt_gateway, rt_mask(rt), rt->rt_flags,
1157 (struct rtentry **)0);
1158 } else {
1159 RT_UNLOCK(rt);
1160 }
1161 /* if the address is gone keep the old route in the pcb */
1162 if ((ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
1163 inp->inp_route.ro_rt = NULL;
1164 rtfree(rt);
1165 IFA_REMREF(&ia->ia_ifa);
1166 }
1167 /*
1168 * A new route can be allocated
1169 * the next time output is attempted.
1170 */
1171 }
1172 }
1173
1174 /*
1175 * After a routing change, flush old routing
1176 * and allocate a (hopefully) better one.
1177 */
1178 void
1179 in_rtchange(struct inpcb *inp, __unused int errno)
1180 {
1181 struct rtentry *rt;
1182
1183 if ((rt = inp->inp_route.ro_rt) != NULL) {
1184 struct in_ifaddr *ia;
1185
1186 if ((ia = ifa_foraddr(inp->inp_laddr.s_addr)) == NULL) {
1187 return; /* we can't remove the route now. not sure if still ok to use src */
1188 }
1189 IFA_REMREF(&ia->ia_ifa);
1190 rtfree(rt);
1191 inp->inp_route.ro_rt = NULL;
1192 /*
1193 * A new route can be allocated the next time
1194 * output is attempted.
1195 */
1196 }
1197 }
1198
1199 /*
1200 * Lookup a PCB based on the local address and port.
1201 */
1202 struct inpcb *
1203 in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
1204 unsigned int lport_arg, int wild_okay)
1205 {
1206 struct inpcb *inp;
1207 int matchwild = 3, wildcard;
1208 u_short lport = lport_arg;
1209
1210 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0,0,0,0,0);
1211
1212 if (!wild_okay) {
1213 struct inpcbhead *head;
1214 /*
1215 * Look for an unconnected (wildcard foreign addr) PCB that
1216 * matches the local address and port we're looking for.
1217 */
1218 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
1219 LIST_FOREACH(inp, head, inp_hash) {
1220 #if INET6
1221 if ((inp->inp_vflag & INP_IPV4) == 0)
1222 continue;
1223 #endif
1224 if (inp->inp_faddr.s_addr == INADDR_ANY &&
1225 inp->inp_laddr.s_addr == laddr.s_addr &&
1226 inp->inp_lport == lport) {
1227 /*
1228 * Found.
1229 */
1230 return (inp);
1231 }
1232 }
1233 /*
1234 * Not found.
1235 */
1236 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0,0,0,0,0);
1237 return (NULL);
1238 } else {
1239 struct inpcbporthead *porthash;
1240 struct inpcbport *phd;
1241 struct inpcb *match = NULL;
1242 /*
1243 * Best fit PCB lookup.
1244 *
1245 * First see if this local port is in use by looking on the
1246 * port hash list.
1247 */
1248 porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport,
1249 pcbinfo->porthashmask)];
1250 LIST_FOREACH(phd, porthash, phd_hash) {
1251 if (phd->phd_port == lport)
1252 break;
1253 }
1254 if (phd != NULL) {
1255 /*
1256 * Port is in use by one or more PCBs. Look for best
1257 * fit.
1258 */
1259 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
1260 wildcard = 0;
1261 #if INET6
1262 if ((inp->inp_vflag & INP_IPV4) == 0)
1263 continue;
1264 #endif
1265 if (inp->inp_faddr.s_addr != INADDR_ANY)
1266 wildcard++;
1267 if (inp->inp_laddr.s_addr != INADDR_ANY) {
1268 if (laddr.s_addr == INADDR_ANY)
1269 wildcard++;
1270 else if (inp->inp_laddr.s_addr != laddr.s_addr)
1271 continue;
1272 } else {
1273 if (laddr.s_addr != INADDR_ANY)
1274 wildcard++;
1275 }
1276 if (wildcard < matchwild) {
1277 match = inp;
1278 matchwild = wildcard;
1279 if (matchwild == 0) {
1280 break;
1281 }
1282 }
1283 }
1284 }
1285 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match,0,0,0,0);
1286 return (match);
1287 }
1288 }
1289
1290 /*
1291 * Check if PCB exists in hash list.
1292 */
1293 int
1294 in_pcblookup_hash_exists(
1295 struct inpcbinfo *pcbinfo,
1296 struct in_addr faddr,
1297 u_int fport_arg,
1298 struct in_addr laddr,
1299 u_int lport_arg,
1300 int wildcard,
1301 uid_t *uid,
1302 gid_t *gid,
1303 __unused struct ifnet *ifp)
1304 {
1305 struct inpcbhead *head;
1306 struct inpcb *inp;
1307 u_short fport = fport_arg, lport = lport_arg;
1308 int found;
1309
1310 *uid = UID_MAX;
1311 *gid = GID_MAX;
1312
1313 /*
1314 * We may have found the pcb in the last lookup - check this first.
1315 */
1316
1317 lck_rw_lock_shared(pcbinfo->mtx);
1318
1319 /*
1320 * First look for an exact match.
1321 */
1322 head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
1323 pcbinfo->hashmask)];
1324 LIST_FOREACH(inp, head, inp_hash) {
1325 #if INET6
1326 if ((inp->inp_vflag & INP_IPV4) == 0)
1327 continue;
1328 #endif
1329 if (inp->inp_faddr.s_addr == faddr.s_addr &&
1330 inp->inp_laddr.s_addr == laddr.s_addr &&
1331 inp->inp_fport == fport &&
1332 inp->inp_lport == lport) {
1333 if ((found = (inp->inp_socket != NULL))) {
1334 /*
1335 * Found.
1336 */
1337 *uid = inp->inp_socket->so_uid;
1338 *gid = inp->inp_socket->so_gid;
1339 }
1340 lck_rw_done(pcbinfo->mtx);
1341 return (found);
1342 }
1343 }
1344 if (wildcard) {
1345 struct inpcb *local_wild = NULL;
1346 #if INET6
1347 struct inpcb *local_wild_mapped = NULL;
1348 #endif
1349
1350 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
1351 pcbinfo->hashmask)];
1352 LIST_FOREACH(inp, head, inp_hash) {
1353 #if INET6
1354 if ((inp->inp_vflag & INP_IPV4) == 0)
1355 continue;
1356 #endif
1357 if (inp->inp_faddr.s_addr == INADDR_ANY &&
1358 inp->inp_lport == lport) {
1359 #if defined(NFAITH) && NFAITH > 0
1360 if (ifp && ifp->if_type == IFT_FAITH &&
1361 (inp->inp_flags & INP_FAITH) == 0)
1362 continue;
1363 #endif
1364 if (inp->inp_laddr.s_addr == laddr.s_addr) {
1365 if ((found = (inp->inp_socket != NULL))) {
1366 *uid = inp->inp_socket->so_uid;
1367 *gid = inp->inp_socket->so_gid;
1368 }
1369 lck_rw_done(pcbinfo->mtx);
1370 return (found);
1371 }
1372 else if (inp->inp_laddr.s_addr == INADDR_ANY) {
1373 #if INET6
1374 if (inp->inp_socket &&
1375 INP_CHECK_SOCKAF(inp->inp_socket,
1376 AF_INET6))
1377 local_wild_mapped = inp;
1378 else
1379 #endif /* INET6 */
1380 local_wild = inp;
1381 }
1382 }
1383 }
1384 if (local_wild == NULL) {
1385 #if INET6
1386 if (local_wild_mapped != NULL) {
1387 if ((found = (local_wild_mapped->inp_socket != NULL))) {
1388 *uid = local_wild_mapped->inp_socket->so_uid;
1389 *gid = local_wild_mapped->inp_socket->so_gid;
1390 }
1391 lck_rw_done(pcbinfo->mtx);
1392 return (found);
1393 }
1394 #endif /* INET6 */
1395 lck_rw_done(pcbinfo->mtx);
1396 return (0);
1397 }
1398 if (local_wild != NULL) {
1399 if ((found = (local_wild->inp_socket != NULL))) {
1400 *uid = local_wild->inp_socket->so_uid;
1401 *gid = local_wild->inp_socket->so_gid;
1402 }
1403 lck_rw_done(pcbinfo->mtx);
1404 return (found);
1405 }
1406 }
1407
1408 /*
1409 * Not found.
1410 */
1411 lck_rw_done(pcbinfo->mtx);
1412 return (0);
1413 }
1414
1415 /*
1416 * Lookup PCB in hash list.
1417 */
1418 struct inpcb *
1419 in_pcblookup_hash(
1420 struct inpcbinfo *pcbinfo,
1421 struct in_addr faddr,
1422 u_int fport_arg,
1423 struct in_addr laddr,
1424 u_int lport_arg,
1425 int wildcard,
1426 __unused struct ifnet *ifp)
1427 {
1428 struct inpcbhead *head;
1429 struct inpcb *inp;
1430 u_short fport = fport_arg, lport = lport_arg;
1431
1432 /*
1433 * We may have found the pcb in the last lookup - check this first.
1434 */
1435
1436 lck_rw_lock_shared(pcbinfo->mtx);
1437
1438 /*
1439 * First look for an exact match.
1440 */
1441 head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)];
1442 LIST_FOREACH(inp, head, inp_hash) {
1443 #if INET6
1444 if ((inp->inp_vflag & INP_IPV4) == 0)
1445 continue;
1446 #endif
1447 if (inp->inp_faddr.s_addr == faddr.s_addr &&
1448 inp->inp_laddr.s_addr == laddr.s_addr &&
1449 inp->inp_fport == fport &&
1450 inp->inp_lport == lport) {
1451 /*
1452 * Found.
1453 */
1454 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1455 lck_rw_done(pcbinfo->mtx);
1456 return (inp);
1457 }
1458 else { /* it's there but dead, say it isn't found */
1459 lck_rw_done(pcbinfo->mtx);
1460 return(NULL);
1461 }
1462 }
1463 }
1464 if (wildcard) {
1465 struct inpcb *local_wild = NULL;
1466 #if INET6
1467 struct inpcb *local_wild_mapped = NULL;
1468 #endif
1469
1470 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
1471 LIST_FOREACH(inp, head, inp_hash) {
1472 #if INET6
1473 if ((inp->inp_vflag & INP_IPV4) == 0)
1474 continue;
1475 #endif
1476 if (inp->inp_faddr.s_addr == INADDR_ANY &&
1477 inp->inp_lport == lport) {
1478 #if defined(NFAITH) && NFAITH > 0
1479 if (ifp && ifp->if_type == IFT_FAITH &&
1480 (inp->inp_flags & INP_FAITH) == 0)
1481 continue;
1482 #endif
1483 if (inp->inp_laddr.s_addr == laddr.s_addr) {
1484 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1485 lck_rw_done(pcbinfo->mtx);
1486 return (inp);
1487 }
1488 else { /* it's there but dead, say it isn't found */
1489 lck_rw_done(pcbinfo->mtx);
1490 return(NULL);
1491 }
1492 }
1493 else if (inp->inp_laddr.s_addr == INADDR_ANY) {
1494 #if INET6
1495 if (INP_CHECK_SOCKAF(inp->inp_socket,
1496 AF_INET6))
1497 local_wild_mapped = inp;
1498 else
1499 #endif /* INET6 */
1500 local_wild = inp;
1501 }
1502 }
1503 }
1504 if (local_wild == NULL) {
1505 #if INET6
1506 if (local_wild_mapped != NULL) {
1507 if (in_pcb_checkstate(local_wild_mapped, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1508 lck_rw_done(pcbinfo->mtx);
1509 return (local_wild_mapped);
1510 }
1511 else { /* it's there but dead, say it isn't found */
1512 lck_rw_done(pcbinfo->mtx);
1513 return(NULL);
1514 }
1515 }
1516 #endif /* INET6 */
1517 lck_rw_done(pcbinfo->mtx);
1518 return (NULL);
1519 }
1520 if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1521 lck_rw_done(pcbinfo->mtx);
1522 return (local_wild);
1523 }
1524 else { /* it's there but dead, say it isn't found */
1525 lck_rw_done(pcbinfo->mtx);
1526 return(NULL);
1527 }
1528 }
1529
1530 /*
1531 * Not found.
1532 */
1533 lck_rw_done(pcbinfo->mtx);
1534 return (NULL);
1535 }
1536
1537 /*
1538 * Insert PCB onto various hash lists.
1539 */
1540 int
1541 in_pcbinshash(struct inpcb *inp, int locked)
1542 {
1543 struct inpcbhead *pcbhash;
1544 struct inpcbporthead *pcbporthash;
1545 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
1546 struct inpcbport *phd;
1547 u_int32_t hashkey_faddr;
1548
1549 if (!locked) {
1550 if (!lck_rw_try_lock_exclusive(pcbinfo->mtx)) {
1551 /*lock inversion issue, mostly with udp multicast packets */
1552 socket_unlock(inp->inp_socket, 0);
1553 lck_rw_lock_exclusive(pcbinfo->mtx);
1554 socket_lock(inp->inp_socket, 0);
1555 if (inp->inp_state == INPCB_STATE_DEAD) {
1556 /* The socket got dropped when it was unlocked */
1557 lck_rw_done(pcbinfo->mtx);
1558 return(ECONNABORTED);
1559 }
1560 }
1561 }
1562
1563 #if INET6
1564 if (inp->inp_vflag & INP_IPV6)
1565 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
1566 else
1567 #endif /* INET6 */
1568 hashkey_faddr = inp->inp_faddr.s_addr;
1569
1570 inp->hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport, inp->inp_fport, pcbinfo->hashmask);
1571
1572 pcbhash = &pcbinfo->hashbase[inp->hash_element];
1573
1574 pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport,
1575 pcbinfo->porthashmask)];
1576
1577 /*
1578 * Go through port list and look for a head for this lport.
1579 */
1580 LIST_FOREACH(phd, pcbporthash, phd_hash) {
1581 if (phd->phd_port == inp->inp_lport)
1582 break;
1583 }
1584 /*
1585 * If none exists, malloc one and tack it on.
1586 */
1587 if (phd == NULL) {
1588 MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_WAITOK);
1589 if (phd == NULL) {
1590 if (!locked)
1591 lck_rw_done(pcbinfo->mtx);
1592 return (ENOBUFS); /* XXX */
1593 }
1594 phd->phd_port = inp->inp_lport;
1595 LIST_INIT(&phd->phd_pcblist);
1596 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
1597 }
1598 inp->inp_phd = phd;
1599 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
1600 LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
1601 if (!locked)
1602 lck_rw_done(pcbinfo->mtx);
1603 return (0);
1604 }
1605
1606 /*
1607 * Move PCB to the proper hash bucket when { faddr, fport } have been
1608 * changed. NOTE: This does not handle the case of the lport changing (the
1609 * hashed port list would have to be updated as well), so the lport must
1610 * not change after in_pcbinshash() has been called.
1611 */
1612 void
1613 in_pcbrehash(struct inpcb *inp)
1614 {
1615 struct inpcbhead *head;
1616 u_int32_t hashkey_faddr;
1617
1618 #if INET6
1619 if (inp->inp_vflag & INP_IPV6)
1620 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
1621 else
1622 #endif /* INET6 */
1623 hashkey_faddr = inp->inp_faddr.s_addr;
1624 inp->hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
1625 inp->inp_fport, inp->inp_pcbinfo->hashmask);
1626 head = &inp->inp_pcbinfo->hashbase[inp->hash_element];
1627
1628 LIST_REMOVE(inp, inp_hash);
1629 LIST_INSERT_HEAD(head, inp, inp_hash);
1630 }
1631
1632 /*
1633 * Remove PCB from various lists.
1634 */
1635 //###LOCK must be called with list lock held
1636 void
1637 in_pcbremlists(struct inpcb *inp)
1638 {
1639 inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
1640
1641 if (inp->inp_lport) {
1642 struct inpcbport *phd = inp->inp_phd;
1643
1644 LIST_REMOVE(inp, inp_hash);
1645 LIST_REMOVE(inp, inp_portlist);
1646 if (phd != NULL && (LIST_FIRST(&phd->phd_pcblist) == NULL)) {
1647 LIST_REMOVE(phd, phd_hash);
1648 FREE(phd, M_PCB);
1649 }
1650 }
1651 LIST_REMOVE(inp, inp_list);
1652 inp->inp_pcbinfo->ipi_count--;
1653 }
1654
1655 /* Mechanism used to defer the memory release of PCBs
1656 * The pcb list will contain the pcb until the ripper can clean it up if
1657 * the following conditions are met: 1) state "DEAD", 2) wantcnt is STOPUSING
1658 * 3) usecount is null
1659 * This function will be called to either mark the pcb as
1660 */
1661 int
1662 in_pcb_checkstate(struct inpcb *pcb, int mode, int locked)
1663 {
1664
1665 volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt;
1666 UInt32 origwant;
1667 UInt32 newwant;
1668
1669 switch (mode) {
1670
1671 case WNT_STOPUSING: /* try to mark the pcb as ready for recycling */
1672
1673 /* compareswap with STOPUSING, if success we're good, if it's in use, will be marked later */
1674
1675 if (locked == 0)
1676 socket_lock(pcb->inp_socket, 1);
1677 pcb->inp_state = INPCB_STATE_DEAD;
1678
1679 stopusing:
1680 if (pcb->inp_socket->so_usecount < 0)
1681 panic("in_pcb_checkstate STOP pcb=%p so=%p usecount is negative\n", pcb, pcb->inp_socket);
1682 if (locked == 0)
1683 socket_unlock(pcb->inp_socket, 1);
1684
1685 origwant = *wantcnt;
1686 if ((UInt16) origwant == 0xffff ) /* should stop using */
1687 return (WNT_STOPUSING);
1688 newwant = 0xffff;
1689 if ((UInt16) origwant == 0) {/* try to mark it as unsuable now */
1690 OSCompareAndSwap(origwant, newwant, wantcnt) ;
1691 }
1692 return (WNT_STOPUSING);
1693 break;
1694
1695 case WNT_ACQUIRE: /* try to increase reference to pcb */
1696 /* if WNT_STOPUSING should bail out */
1697 /*
1698 * if socket state DEAD, try to set count to STOPUSING, return failed
1699 * otherwise increase cnt
1700 */
1701 do {
1702 origwant = *wantcnt;
1703 if ((UInt16) origwant == 0xffff ) {/* should stop using */
1704 // printf("in_pcb_checkstate: ACQ PCB was STOPUSING while release. odd pcb=%p\n", pcb);
1705 return (WNT_STOPUSING);
1706 }
1707 newwant = origwant + 1;
1708 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
1709 return (WNT_ACQUIRE);
1710 break;
1711
1712 case WNT_RELEASE: /* release reference. if result is null and pcb state is DEAD,
1713 set wanted bit to STOPUSING
1714 */
1715
1716 if (locked == 0)
1717 socket_lock(pcb->inp_socket, 1);
1718
1719 do {
1720 origwant = *wantcnt;
1721 if ((UInt16) origwant == 0x0 )
1722 panic("in_pcb_checkstate pcb=%p release with zero count", pcb);
1723 if ((UInt16) origwant == 0xffff ) {/* should stop using */
1724 #if TEMPDEBUG
1725 printf("in_pcb_checkstate: REL PCB was STOPUSING while release. odd pcb=%p\n", pcb);
1726 #endif
1727 if (locked == 0)
1728 socket_unlock(pcb->inp_socket, 1);
1729 return (WNT_STOPUSING);
1730 }
1731 newwant = origwant - 1;
1732 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
1733
1734 if (pcb->inp_state == INPCB_STATE_DEAD)
1735 goto stopusing;
1736 if (pcb->inp_socket->so_usecount < 0)
1737 panic("in_pcb_checkstate RELEASE pcb=%p so=%p usecount is negative\n", pcb, pcb->inp_socket);
1738
1739 if (locked == 0)
1740 socket_unlock(pcb->inp_socket, 1);
1741 return (WNT_RELEASE);
1742 break;
1743
1744 default:
1745
1746 panic("in_pcb_checkstate: so=%p not a valid state =%x\n", pcb->inp_socket, mode);
1747 }
1748
1749 /* NOTREACHED */
1750 return (mode);
1751 }
1752
1753 /*
1754 * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat.
1755 * The inpcb_compat data structure is passed to user space and must
1756 * not change. We intentionally avoid copying pointers.
1757 */
1758 void
1759 inpcb_to_compat(
1760 struct inpcb *inp,
1761 struct inpcb_compat *inp_compat)
1762 {
1763 bzero(inp_compat, sizeof(*inp_compat));
1764 inp_compat->inp_fport = inp->inp_fport;
1765 inp_compat->inp_lport = inp->inp_lport;
1766 inp_compat->nat_owner = inp->nat_owner;
1767 inp_compat->nat_cookie = inp->nat_cookie;
1768 inp_compat->inp_gencnt = inp->inp_gencnt;
1769 inp_compat->inp_flags = inp->inp_flags;
1770 inp_compat->inp_flow = inp->inp_flow;
1771 inp_compat->inp_vflag = inp->inp_vflag;
1772 inp_compat->inp_ip_ttl = inp->inp_ip_ttl;
1773 inp_compat->inp_ip_p = inp->inp_ip_p;
1774 inp_compat->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign;
1775 inp_compat->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local;
1776 inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
1777 inp_compat->inp_depend6.inp6_hlim = inp->inp_depend6.inp6_hlim;
1778 inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
1779 inp_compat->inp_depend6.inp6_ifindex = inp->inp_depend6.inp6_ifindex;
1780 inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
1781 }
1782
1783 #if !CONFIG_EMBEDDED
1784
1785 void
1786 inpcb_to_xinpcb64(
1787 struct inpcb *inp,
1788 struct xinpcb64 *xinp)
1789 {
1790 xinp->inp_fport = inp->inp_fport;
1791 xinp->inp_lport = inp->inp_lport;
1792 xinp->inp_gencnt = inp->inp_gencnt;
1793 xinp->inp_flags = inp->inp_flags;
1794 xinp->inp_flow = inp->inp_flow;
1795 xinp->inp_vflag = inp->inp_vflag;
1796 xinp->inp_ip_ttl = inp->inp_ip_ttl;
1797 xinp->inp_ip_p = inp->inp_ip_p;
1798 xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign;
1799 xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local;
1800 xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
1801 xinp->inp_depend6.inp6_hlim = inp->inp_depend6.inp6_hlim;
1802 xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
1803 xinp->inp_depend6.inp6_ifindex = inp->inp_depend6.inp6_ifindex;
1804 xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
1805 }
1806
1807 #endif /* !CONFIG_EMBEDDED */
1808
1809
1810 /*
1811 * The following routines implement this scheme:
1812 *
1813 * Callers of ip_output() that intend to cache the route in the inpcb pass
1814 * a local copy of the struct route to ip_output(). Using a local copy of
1815 * the cached route significantly simplifies things as IP no longer has to
1816 * worry about having exclusive access to the passed in struct route, since
1817 * it's defined in the caller's stack; in essence, this allows for a lock-
1818 * less operation when updating the struct route at the IP level and below,
1819 * whenever necessary. The scheme works as follows:
1820 *
1821 * Prior to dropping the socket's lock and calling ip_output(), the caller
1822 * copies the struct route from the inpcb into its stack, and adds a reference
1823 * to the cached route entry, if there was any. The socket's lock is then
1824 * dropped and ip_output() is called with a pointer to the copy of struct
1825 * route defined on the stack (not to the one in the inpcb.)
1826 *
1827 * Upon returning from ip_output(), the caller then acquires the socket's
1828 * lock and synchronizes the cache; if there is no route cached in the inpcb,
1829 * it copies the local copy of struct route (which may or may not contain any
1830 * route) back into the cache; otherwise, if the inpcb has a route cached in
1831 * it, the one in the local copy will be freed, if there's any. Trashing the
1832 * cached route in the inpcb can be avoided because ip_output() is single-
1833 * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized
1834 * by the socket/transport layer.)
1835 */
1836 void
1837 inp_route_copyout(struct inpcb *inp, struct route *dst)
1838 {
1839 struct route *src = &inp->inp_route;
1840
1841 lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
1842
1843 /*
1844 * If the route in the PCB is not for IPv4, blow it away;
1845 * this is possible in the case of IPv4-mapped address case.
1846 */
1847 if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET) {
1848 rtfree(src->ro_rt);
1849 src->ro_rt = NULL;
1850 }
1851
1852 route_copyout(dst, src, sizeof(*dst));
1853 }
1854
1855 void
1856 inp_route_copyin(struct inpcb *inp, struct route *src)
1857 {
1858 struct route *dst = &inp->inp_route;
1859
1860 lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
1861
1862 /* Minor sanity check */
1863 if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET)
1864 panic("%s: wrong or corrupted route: %p", __func__, src);
1865
1866 route_copyin(src, dst, sizeof(*src));
1867 }
1868
1869 /*
1870 * Handler for setting IP_FORCE_OUT_IFP/IP_BOUND_IF/IPV6_BOUND_IF socket option.
1871 */
1872 void
1873 inp_bindif(struct inpcb *inp, unsigned int ifscope)
1874 {
1875 /*
1876 * A zero interface scope value indicates an "unbind".
1877 * Otherwise, take in whatever value the app desires;
1878 * the app may already know the scope (or force itself
1879 * to such a scope) ahead of time before the interface
1880 * gets attached. It doesn't matter either way; any
1881 * route lookup from this point on will require an
1882 * exact match for the embedded interface scope.
1883 */
1884 inp->inp_boundif = ifscope;
1885 if (inp->inp_boundif == IFSCOPE_NONE)
1886 inp->inp_flags &= ~INP_BOUND_IF;
1887 else
1888 inp->inp_flags |= INP_BOUND_IF;
1889
1890 /* Blow away any cached route in the PCB */
1891 if (inp->inp_route.ro_rt != NULL) {
1892 rtfree(inp->inp_route.ro_rt);
1893 inp->inp_route.ro_rt = NULL;
1894 }
1895 }
1896
1897 /*
1898 * Handler for setting IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option.
1899 */
1900 int
1901 inp_nocellular(struct inpcb *inp, unsigned int val)
1902 {
1903 if (val) {
1904 inp->inp_flags |= INP_NO_IFT_CELLULAR;
1905 } else if (inp->inp_flags & INP_NO_IFT_CELLULAR) {
1906 /* once set, it cannot be unset */
1907 return (EINVAL);
1908 }
1909
1910 /* Blow away any cached route in the PCB */
1911 if (inp->inp_route.ro_rt != NULL) {
1912 rtfree(inp->inp_route.ro_rt);
1913 inp->inp_route.ro_rt = NULL;
1914 }
1915
1916 return (0);
1917 }