]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/in_pcb.c
xnu-1228.7.58.tar.gz
[apple/xnu.git] / bsd / netinet / in_pcb.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1991, 1993, 1995
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95
61 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $
62 */
63
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/malloc.h>
67 #include <sys/mbuf.h>
68 #include <sys/domain.h>
69 #include <sys/protosw.h>
70 #include <sys/socket.h>
71 #include <sys/socketvar.h>
72 #include <sys/proc.h>
73 #ifndef __APPLE__
74 #include <sys/jail.h>
75 #endif
76 #include <sys/kernel.h>
77 #include <sys/sysctl.h>
78 #include <libkern/OSAtomic.h>
79
80 #include <machine/limits.h>
81
82 #ifdef __APPLE__
83 #include <kern/zalloc.h>
84 #endif
85
86 #include <net/if.h>
87 #include <net/if_types.h>
88 #include <net/route.h>
89
90 #include <netinet/in.h>
91 #include <netinet/in_pcb.h>
92 #include <netinet/in_var.h>
93 #include <netinet/ip_var.h>
94 #if INET6
95 #include <netinet/ip6.h>
96 #include <netinet6/ip6_var.h>
97 #endif /* INET6 */
98
99 #include "faith.h"
100
101 #if IPSEC
102 #include <netinet6/ipsec.h>
103 #include <netkey/key.h>
104 #endif /* IPSEC */
105
106 #include <sys/kdebug.h>
107
108 #if IPSEC
109 extern int ipsec_bypass;
110 #endif
111
112 extern u_long route_generation;
113
114 #define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8))
115 #define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1))
116
117 struct in_addr zeroin_addr;
118
119 /*
120 * These configure the range of local port addresses assigned to
121 * "unspecified" outgoing connections/packets/whatever.
122 */
123 int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */
124 int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */
125 #ifndef __APPLE__
126 int ipport_firstauto = IPPORT_RESERVED; /* 1024 */
127 int ipport_lastauto = IPPORT_USERRESERVED; /* 5000 */
128 #else
129 int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
130 int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */
131 #endif
132 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
133 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */
134
135 #define RANGECHK(var, min, max) \
136 if ((var) < (min)) { (var) = (min); } \
137 else if ((var) > (max)) { (var) = (max); }
138
139 static int
140 sysctl_net_ipport_check SYSCTL_HANDLER_ARGS
141 {
142 #pragma unused(arg1, arg2)
143 int error = sysctl_handle_int(oidp,
144 oidp->oid_arg1, oidp->oid_arg2, req);
145 if (!error) {
146 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
147 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
148 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX);
149 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX);
150 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX);
151 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX);
152 }
153 return error;
154 }
155
156 #undef RANGECHK
157
158 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IP Ports");
159
160 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW,
161 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
162 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW,
163 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
164 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW,
165 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
166 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW,
167 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
168 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW,
169 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
170 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW,
171 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
172
173 /*
174 * in_pcb.c: manage the Protocol Control Blocks.
175 *
176 * NOTE: It is assumed that most of these functions will be called at
177 * splnet(). XXX - There are, unfortunately, a few exceptions to this
178 * rule that should be fixed.
179 */
180
181 /*
182 * Allocate a PCB and associate it with the socket.
183 *
184 * Returns: 0 Success
185 * ENOBUFS
186 * ENOMEM
187 * ipsec_init_policy:??? [IPSEC]
188 */
189 int
190 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, __unused struct proc *p)
191 {
192 struct inpcb *inp;
193 caddr_t temp;
194 #if IPSEC
195 #ifndef __APPLE__
196 int error;
197 #endif
198 #endif
199 #if CONFIG_MACF_NET
200 int mac_error;
201 #endif
202
203 if (so->cached_in_sock_layer == 0) {
204 #if TEMPDEBUG
205 printf("PCBALLOC calling zalloc for socket %x\n", so);
206 #endif
207 inp = (struct inpcb *) zalloc(pcbinfo->ipi_zone);
208 if (inp == NULL)
209 return (ENOBUFS);
210 bzero((caddr_t)inp, sizeof(*inp));
211 }
212 else {
213 #if TEMPDEBUG
214 printf("PCBALLOC reusing PCB for socket %x\n", so);
215 #endif
216 inp = (struct inpcb *) so->so_saved_pcb;
217 temp = inp->inp_saved_ppcb;
218 bzero((caddr_t) inp, sizeof(*inp));
219 inp->inp_saved_ppcb = temp;
220 }
221
222 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
223 inp->inp_pcbinfo = pcbinfo;
224 inp->inp_socket = so;
225 #if CONFIG_MACF_NET
226 mac_error = mac_inpcb_label_init(inp, M_WAITOK);
227 if (mac_error != 0) {
228 if (so->cached_in_sock_layer == 0)
229 zfree(pcbinfo->ipi_zone, inp);
230 return (mac_error);
231 }
232 mac_inpcb_label_associate(so, inp);
233 #endif
234 so->so_pcb = (caddr_t)inp;
235
236 if (so->so_proto->pr_flags & PR_PCBLOCK) {
237 inp->inpcb_mtx = lck_mtx_alloc_init(pcbinfo->mtx_grp, pcbinfo->mtx_attr);
238 if (inp->inpcb_mtx == NULL) {
239 printf("in_pcballoc: can't alloc mutex! so=%p\n", so);
240 return(ENOMEM);
241 }
242 }
243
244 #if IPSEC
245 #ifndef __APPLE__
246 if (ipsec_bypass == 0) {
247 error = ipsec_init_policy(so, &inp->inp_sp);
248 if (error != 0) {
249 zfree(pcbinfo->ipi_zone, inp);
250 return error;
251 }
252 }
253 #endif
254 #endif /*IPSEC*/
255 #if INET6
256 if (INP_SOCKAF(so) == AF_INET6 && !ip6_mapped_addr_on)
257 inp->inp_flags |= IN6P_IPV6_V6ONLY;
258 #endif
259
260 #if INET6
261 if (ip6_auto_flowlabel)
262 inp->inp_flags |= IN6P_AUTOFLOWLABEL;
263 #endif
264 lck_rw_lock_exclusive(pcbinfo->mtx);
265 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
266 LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list);
267 pcbinfo->ipi_count++;
268 lck_rw_done(pcbinfo->mtx);
269 return (0);
270 }
271
272
273 /*
274 in_pcblookup_local_and_cleanup does everything
275 in_pcblookup_local does but it checks for a socket
276 that's going away. Since we know that the lock is
277 held read+write when this funciton is called, we
278 can safely dispose of this socket like the slow
279 timer would usually do and return NULL. This is
280 great for bind.
281 */
282 struct inpcb*
283 in_pcblookup_local_and_cleanup(
284 struct inpcbinfo *pcbinfo,
285 struct in_addr laddr,
286 u_int lport_arg,
287 int wild_okay)
288 {
289 struct inpcb *inp;
290
291 /* Perform normal lookup */
292 inp = in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay);
293
294 /* Check if we found a match but it's waiting to be disposed */
295 if (inp && inp->inp_wantcnt == WNT_STOPUSING) {
296 struct socket *so = inp->inp_socket;
297
298 lck_mtx_lock(inp->inpcb_mtx);
299
300 if (so->so_usecount == 0) {
301 in_pcbdispose(inp);
302 inp = NULL;
303 }
304 else {
305 lck_mtx_unlock(inp->inpcb_mtx);
306 }
307 }
308
309 return inp;
310 }
311
312 #ifdef __APPLE_API_PRIVATE
313 in_pcb_conflict_post_msg(u_int16_t port)
314 {
315 /*
316 * Radar 5523020 send a kernel event notification if a non-participating socket tries to bind
317 * the port a socket who has set SOF_NOTIFYCONFLICT owns.
318 */
319 struct kev_msg ev_msg;
320 struct kev_in_portinuse in_portinuse;
321
322 in_portinuse.port = ntohs(port); /* port in host order */
323 in_portinuse.req_pid = proc_selfpid();
324 ev_msg.vendor_code = KEV_VENDOR_APPLE;
325 ev_msg.kev_class = KEV_NETWORK_CLASS;
326 ev_msg.kev_subclass = KEV_INET_SUBCLASS;
327 ev_msg.event_code = KEV_INET_PORTINUSE;
328 ev_msg.dv[0].data_ptr = &in_portinuse;
329 ev_msg.dv[0].data_length = sizeof(struct kev_in_portinuse);
330 ev_msg.dv[1].data_length = 0;
331 kev_post_msg(&ev_msg);
332 }
333 #endif
334 /*
335 * Returns: 0 Success
336 * EADDRNOTAVAIL Address not available.
337 * EINVAL Invalid argument
338 * EAFNOSUPPORT Address family not supported [notdef]
339 * EACCES Permission denied
340 * EADDRINUSE Address in use
341 * EAGAIN Resource unavailable, try again
342 * proc_suser:EPERM Operation not permitted
343 */
344 int
345 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
346 {
347 struct socket *so = inp->inp_socket;
348 unsigned short *lastport;
349 struct sockaddr_in *sin;
350 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
351 u_short lport = 0;
352 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
353 int error, conflict = 0;
354
355 if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */
356 return (EADDRNOTAVAIL);
357 if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY)
358 return (EINVAL);
359 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
360 wild = 1;
361 socket_unlock(so, 0); /* keep reference on socket */
362 lck_rw_lock_exclusive(pcbinfo->mtx);
363 if (nam) {
364 sin = (struct sockaddr_in *)nam;
365 if (nam->sa_len != sizeof (*sin)) {
366 lck_rw_done(pcbinfo->mtx);
367 socket_lock(so, 0);
368 return (EINVAL);
369 }
370 #ifdef notdef
371 /*
372 * We should check the family, but old programs
373 * incorrectly fail to initialize it.
374 */
375 if (sin->sin_family != AF_INET) {
376 lck_rw_done(pcbinfo->mtx);
377 socket_lock(so, 0);
378 return (EAFNOSUPPORT);
379 }
380 #endif
381 lport = sin->sin_port;
382 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
383 /*
384 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
385 * allow complete duplication of binding if
386 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
387 * and a multicast address is bound on both
388 * new and duplicated sockets.
389 */
390 if (so->so_options & SO_REUSEADDR)
391 reuseport = SO_REUSEADDR|SO_REUSEPORT;
392 } else if (sin->sin_addr.s_addr != INADDR_ANY) {
393 struct ifaddr *ifa;
394 sin->sin_port = 0; /* yech... */
395 if ((ifa = ifa_ifwithaddr((struct sockaddr *)sin)) == 0) {
396 lck_rw_done(pcbinfo->mtx);
397 socket_lock(so, 0);
398 return (EADDRNOTAVAIL);
399 }
400 else {
401 ifafree(ifa);
402 }
403 }
404 if (lport) {
405 struct inpcb *t;
406
407 /* GROSS */
408 if (ntohs(lport) < IPPORT_RESERVED && p &&
409 proc_suser(p)) {
410 lck_rw_done(pcbinfo->mtx);
411 socket_lock(so, 0);
412 return (EACCES);
413 }
414 if (so->so_uid &&
415 !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
416 t = in_pcblookup_local_and_cleanup(inp->inp_pcbinfo,
417 sin->sin_addr, lport, INPLOOKUP_WILDCARD);
418 if (t &&
419 (ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
420 ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
421 (t->inp_socket->so_options &
422 SO_REUSEPORT) == 0) &&
423 (so->so_uid != t->inp_socket->so_uid) &&
424 ((t->inp_socket->so_flags & SOF_REUSESHAREUID) == 0)) {
425 #if INET6
426 if (ntohl(sin->sin_addr.s_addr) !=
427 INADDR_ANY ||
428 ntohl(t->inp_laddr.s_addr) !=
429 INADDR_ANY ||
430 INP_SOCKAF(so) ==
431 INP_SOCKAF(t->inp_socket))
432 #endif /* INET6 */
433 {
434 #ifdef __APPLE_API_PRIVATE
435
436 if ((t->inp_socket->so_flags & SOF_NOTIFYCONFLICT) && ((so->so_flags & SOF_NOTIFYCONFLICT) == 0))
437 conflict = 1;
438
439 lck_rw_done(pcbinfo->mtx);
440
441 if (conflict)
442 in_pcb_conflict_post_msg(lport);
443 #else
444 lck_rw_done(pcbinfo->mtx);
445 #endif /* __APPLE_API_PRIVATE */
446
447 socket_lock(so, 0);
448 return (EADDRINUSE);
449 }
450 }
451 }
452 t = in_pcblookup_local_and_cleanup(pcbinfo, sin->sin_addr,
453 lport, wild);
454 if (t &&
455 (reuseport & t->inp_socket->so_options) == 0) {
456 #if INET6
457 if (ip6_mapped_addr_on == 0 ||
458 ntohl(sin->sin_addr.s_addr) !=
459 INADDR_ANY ||
460 ntohl(t->inp_laddr.s_addr) !=
461 INADDR_ANY ||
462 INP_SOCKAF(so) ==
463 INP_SOCKAF(t->inp_socket))
464 #endif /* INET6 */
465 {
466 #ifdef __APPLE_API_PRIVATE
467
468 if ((t->inp_socket->so_flags & SOF_NOTIFYCONFLICT) && ((so->so_flags & SOF_NOTIFYCONFLICT) == 0))
469 conflict = 1;
470
471 lck_rw_done(pcbinfo->mtx);
472
473 if (conflict)
474 in_pcb_conflict_post_msg(lport);
475 #else
476 lck_rw_done(pcbinfo->mtx);
477 #endif /* __APPLE_API_PRIVATE */
478 socket_lock(so, 0);
479 return (EADDRINUSE);
480 }
481 }
482 }
483 inp->inp_laddr = sin->sin_addr;
484 }
485 if (lport == 0) {
486 u_short first, last;
487 int count;
488
489 inp->inp_flags |= INP_ANONPORT;
490
491 if (inp->inp_flags & INP_HIGHPORT) {
492 first = ipport_hifirstauto; /* sysctl */
493 last = ipport_hilastauto;
494 lastport = &pcbinfo->lasthi;
495 } else if (inp->inp_flags & INP_LOWPORT) {
496 if (p && (error = proc_suser(p))) {
497 lck_rw_done(pcbinfo->mtx);
498 socket_lock(so, 0);
499 return error;
500 }
501 first = ipport_lowfirstauto; /* 1023 */
502 last = ipport_lowlastauto; /* 600 */
503 lastport = &pcbinfo->lastlow;
504 } else {
505 first = ipport_firstauto; /* sysctl */
506 last = ipport_lastauto;
507 lastport = &pcbinfo->lastport;
508 }
509 /*
510 * Simple check to ensure all ports are not used up causing
511 * a deadlock here.
512 *
513 * We split the two cases (up and down) so that the direction
514 * is not being tested on each round of the loop.
515 */
516 if (first > last) {
517 /*
518 * counting down
519 */
520 count = first - last;
521
522 do {
523 if (count-- < 0) { /* completely used? */
524 lck_rw_done(pcbinfo->mtx);
525 socket_lock(so, 0);
526 inp->inp_laddr.s_addr = INADDR_ANY;
527 return (EADDRNOTAVAIL);
528 }
529 --*lastport;
530 if (*lastport > first || *lastport < last)
531 *lastport = first;
532 lport = htons(*lastport);
533 } while (in_pcblookup_local_and_cleanup(pcbinfo,
534 inp->inp_laddr, lport, wild));
535 } else {
536 /*
537 * counting up
538 */
539 count = last - first;
540
541 do {
542 if (count-- < 0) { /* completely used? */
543 lck_rw_done(pcbinfo->mtx);
544 socket_lock(so, 0);
545 inp->inp_laddr.s_addr = INADDR_ANY;
546 return (EADDRNOTAVAIL);
547 }
548 ++*lastport;
549 if (*lastport < first || *lastport > last)
550 *lastport = first;
551 lport = htons(*lastport);
552 } while (in_pcblookup_local_and_cleanup(pcbinfo,
553 inp->inp_laddr, lport, wild));
554 }
555 }
556 socket_lock(so, 0);
557 inp->inp_lport = lport;
558 if (in_pcbinshash(inp, 1) != 0) {
559 inp->inp_laddr.s_addr = INADDR_ANY;
560 inp->inp_lport = 0;
561 lck_rw_done(pcbinfo->mtx);
562 return (EAGAIN);
563 }
564 lck_rw_done(pcbinfo->mtx);
565 sflt_notify(so, sock_evt_bound, NULL);
566 return (0);
567 }
568
569 #if CONFIG_FORCE_OUT_IFP
570 /*
571 * pdp_context_route_locked is losely based on rtalloc_ign_locked with
572 * the hope that it can be used anywhere rtalloc_ign_locked is.
573 */
574 __private_extern__ void
575 pdp_context_route_locked(ifnet_t ifp, struct route *ro)
576 {
577 struct in_ifaddr *ia;
578 struct rtentry *rt;
579
580 if ((rt = ro->ro_rt) != NULL) {
581 if (rt->rt_ifp == ifp && rt->rt_flags & RTF_UP)
582 return;
583
584 rtfree_locked(rt);
585 ro->ro_rt = NULL;
586 }
587
588 if (ifp == NULL)
589 return;
590
591 /* Find the first IP address, we will use a fake route off of that */
592 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
593 if (ia->ia_ifp == ifp)
594 break;
595 }
596
597 /* Hrmm no IP addresses here :( */
598 if (ia == NULL)
599 return;
600
601 rt = ia->ia_route;
602 if (rt == NULL) {
603 struct sockaddr *ifa = ia->ia_ifa.ifa_addr;
604
605 /* Allocate and set up a fake route */
606 if ((rt = rte_alloc()) == NULL)
607 return;
608
609 bzero(rt, sizeof(*rt));
610 rt->rt_flags = RTF_UP | RTF_STATIC;
611 if (rt_setgate(rt, ifa, ifa) != 0) {
612 rte_free(rt);
613 return;
614 }
615 /*
616 * Explicitly zero the key so that:
617 * rt_tables[rt_key(rt)->sa_family] == rt_tables[0] == NULL
618 */
619 bzero(rt_key(rt), ifa->sa_len);
620
621 rtsetifa(rt, &ia->ia_ifa);
622 rt->rt_ifp = rt->rt_ifa->ifa_ifp;
623
624 /* Take a reference for the ia pointer to this */
625 ia->ia_route = rt;
626 rtref(rt);
627
628 /*
629 * One more rtentry floating around that is not
630 * linked to the routing table.
631 */
632 (void) OSIncrementAtomic((SInt32 *)&rttrash);
633 }
634 rt->generation_id = route_generation;
635 rtref(rt); /* increment the reference count */
636 ro->ro_rt = rt;
637 }
638 #endif
639
640 /*
641 * Transform old in_pcbconnect() into an inner subroutine for new
642 * in_pcbconnect(): Do some validity-checking on the remote
643 * address (in mbuf 'nam') and then determine local host address
644 * (i.e., which interface) to use to access that remote host.
645 *
646 * This preserves definition of in_pcbconnect(), while supporting a
647 * slightly different version for T/TCP. (This is more than
648 * a bit of a kludge, but cleaning up the internal interfaces would
649 * have forced minor changes in every protocol).
650 *
651 * Returns: 0 Success
652 * EINVAL Invalid argument
653 * EAFNOSUPPORT Address family not supported
654 * EADDRNOTAVAIL Address not available
655 */
656 int
657 in_pcbladdr(struct inpcb *inp, struct sockaddr *nam,
658 struct sockaddr_in **plocal_sin)
659 {
660 struct in_ifaddr *ia;
661 struct sockaddr_in *sin = (struct sockaddr_in *)nam;
662
663 if (nam->sa_len != sizeof (*sin))
664 return (EINVAL);
665 if (sin->sin_family != AF_INET)
666 return (EAFNOSUPPORT);
667 if (sin->sin_port == 0)
668 return (EADDRNOTAVAIL);
669
670 lck_mtx_lock(rt_mtx);
671
672 if (!TAILQ_EMPTY(&in_ifaddrhead)) {
673 /*
674 * If the destination address is INADDR_ANY,
675 * use the primary local address.
676 * If the supplied address is INADDR_BROADCAST,
677 * and the primary interface supports broadcast,
678 * choose the broadcast address for that interface.
679 */
680 #define satosin(sa) ((struct sockaddr_in *)(sa))
681 #define sintosa(sin) ((struct sockaddr *)(sin))
682 #define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
683 if (sin->sin_addr.s_addr == INADDR_ANY)
684 sin->sin_addr = IA_SIN(TAILQ_FIRST(&in_ifaddrhead))->sin_addr;
685 else if (sin->sin_addr.s_addr == (u_long)INADDR_BROADCAST &&
686 (TAILQ_FIRST(&in_ifaddrhead)->ia_ifp->if_flags & IFF_BROADCAST))
687 sin->sin_addr = satosin(&TAILQ_FIRST(&in_ifaddrhead)->ia_broadaddr)->sin_addr;
688 }
689 if (inp->inp_laddr.s_addr == INADDR_ANY) {
690 struct route *ro;
691
692 ia = (struct in_ifaddr *)0;
693 /*
694 * If route is known or can be allocated now,
695 * our src addr is taken from the i/f, else punt.
696 * Note that we should check the address family of the cached
697 * destination, in case of sharing the cache with IPv6.
698 */
699 ro = &inp->inp_route;
700 if (ro->ro_rt &&
701 (ro->ro_dst.sa_family != AF_INET ||
702 satosin(&ro->ro_dst)->sin_addr.s_addr !=
703 sin->sin_addr.s_addr ||
704 inp->inp_socket->so_options & SO_DONTROUTE ||
705 ro->ro_rt->generation_id != route_generation)) {
706 rtfree_locked(ro->ro_rt);
707 ro->ro_rt = (struct rtentry *)0;
708 }
709 if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/
710 (ro->ro_rt == (struct rtentry *)0 ||
711 ro->ro_rt->rt_ifp == 0)) {
712 /* No route yet, so try to acquire one */
713 bzero(&ro->ro_dst, sizeof(struct sockaddr_in));
714 ro->ro_dst.sa_family = AF_INET;
715 ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
716 ((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
717 sin->sin_addr;
718 #if CONFIG_FORCE_OUT_IFP
719 /* If the socket has requested a specific interface, use that address */
720 if (inp->pdp_ifp != NULL) {
721 pdp_context_route_locked(inp->pdp_ifp, ro);
722 }
723 else
724 #endif /* CONFIG_FORCE_OUT_IFP */
725 rtalloc_ign_locked(ro, 0UL);
726 }
727 /*
728 * If we found a route, use the address
729 * corresponding to the outgoing interface
730 * unless it is the loopback (in case a route
731 * to our address on another net goes to loopback).
732 */
733 if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) {
734 ia = ifatoia(ro->ro_rt->rt_ifa);
735 if (ia)
736 ifaref(&ia->ia_ifa);
737 }
738 if (ia == 0) {
739 u_short fport = sin->sin_port;
740
741 sin->sin_port = 0;
742 ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin)));
743 if (ia == 0) {
744 ia = ifatoia(ifa_ifwithnet(sintosa(sin)));
745 }
746 sin->sin_port = fport;
747 if (ia == 0) {
748 ia = TAILQ_FIRST(&in_ifaddrhead);
749 if (ia)
750 ifaref(&ia->ia_ifa);
751 }
752 if (ia == 0) {
753 lck_mtx_unlock(rt_mtx);
754 return (EADDRNOTAVAIL);
755 }
756 }
757 /*
758 * If the destination address is multicast and an outgoing
759 * interface has been set as a multicast option, use the
760 * address of that interface as our source address.
761 */
762 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
763 inp->inp_moptions != NULL) {
764 struct ip_moptions *imo;
765 struct ifnet *ifp;
766
767 imo = inp->inp_moptions;
768 if (imo->imo_multicast_ifp != NULL && (ia == NULL ||
769 ia->ia_ifp != imo->imo_multicast_ifp)) {
770 ifp = imo->imo_multicast_ifp;
771 if (ia)
772 ifafree(&ia->ia_ifa);
773 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link)
774 if (ia->ia_ifp == ifp)
775 break;
776 if (ia == 0) {
777 lck_mtx_unlock(rt_mtx);
778 return (EADDRNOTAVAIL);
779 }
780 ifaref(&ia->ia_ifa);
781 }
782 }
783 /*
784 * Don't do pcblookup call here; return interface in plocal_sin
785 * and exit to caller, that will do the lookup.
786 */
787 *plocal_sin = &ia->ia_addr;
788 ifafree(&ia->ia_ifa);
789 }
790 lck_mtx_unlock(rt_mtx);
791 return(0);
792 }
793
794 /*
795 * Outer subroutine:
796 * Connect from a socket to a specified address.
797 * Both address and port must be specified in argument sin.
798 * If don't have a local address for this socket yet,
799 * then pick one.
800 */
801 int
802 in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
803 {
804 struct sockaddr_in *ifaddr;
805 struct sockaddr_in *sin = (struct sockaddr_in *)nam;
806 struct inpcb *pcb;
807 int error;
808
809 /*
810 * Call inner routine, to assign local interface address.
811 */
812 if ((error = in_pcbladdr(inp, nam, &ifaddr)) != 0)
813 return(error);
814
815 socket_unlock(inp->inp_socket, 0);
816 pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
817 inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr->sin_addr,
818 inp->inp_lport, 0, NULL);
819 socket_lock(inp->inp_socket, 0);
820 if (pcb != NULL) {
821 in_pcb_checkstate(pcb, WNT_RELEASE, 0);
822 return (EADDRINUSE);
823 }
824 if (inp->inp_laddr.s_addr == INADDR_ANY) {
825 if (inp->inp_lport == 0) {
826 error = in_pcbbind(inp, (struct sockaddr *)0, p);
827 if (error)
828 return (error);
829 }
830 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) {
831 /*lock inversion issue, mostly with udp multicast packets */
832 socket_unlock(inp->inp_socket, 0);
833 lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx);
834 socket_lock(inp->inp_socket, 0);
835 }
836 inp->inp_laddr = ifaddr->sin_addr;
837 inp->inp_flags |= INP_INADDR_ANY;
838 }
839 else {
840 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) {
841 /*lock inversion issue, mostly with udp multicast packets */
842 socket_unlock(inp->inp_socket, 0);
843 lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx);
844 socket_lock(inp->inp_socket, 0);
845 }
846 }
847 inp->inp_faddr = sin->sin_addr;
848 inp->inp_fport = sin->sin_port;
849 in_pcbrehash(inp);
850 lck_rw_done(inp->inp_pcbinfo->mtx);
851 return (0);
852 }
853
854 void
855 in_pcbdisconnect(struct inpcb *inp)
856 {
857
858 inp->inp_faddr.s_addr = INADDR_ANY;
859 inp->inp_fport = 0;
860
861 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) {
862 /*lock inversion issue, mostly with udp multicast packets */
863 socket_unlock(inp->inp_socket, 0);
864 lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx);
865 socket_lock(inp->inp_socket, 0);
866 }
867
868 in_pcbrehash(inp);
869 lck_rw_done(inp->inp_pcbinfo->mtx);
870
871 if (inp->inp_socket->so_state & SS_NOFDREF)
872 in_pcbdetach(inp);
873 }
874
875 void
876 in_pcbdetach(struct inpcb *inp)
877 {
878 struct socket *so = inp->inp_socket;
879
880 if (so->so_pcb == 0) { /* we've been called twice */
881 panic("in_pcbdetach: inp=%p so=%p proto=%d so_pcb is null!\n",
882 inp, so, so->so_proto->pr_protocol);
883 }
884
885 #if IPSEC
886 if (ipsec_bypass == 0) {
887 ipsec4_delete_pcbpolicy(inp);
888 }
889 #endif /*IPSEC*/
890
891 /* mark socket state as dead */
892 if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING)
893 panic("in_pcbdetach so=%p prot=%x couldn't set to STOPUSING\n", so, so->so_proto->pr_protocol);
894
895 #if TEMPDEBUG
896 if (so->cached_in_sock_layer)
897 printf("in_pcbdetach for cached socket %x flags=%x\n", so, so->so_flags);
898 else
899 printf("in_pcbdetach for allocated socket %x flags=%x\n", so, so->so_flags);
900 #endif
901 if ((so->so_flags & SOF_PCBCLEARING) == 0) {
902 struct rtentry *rt;
903
904 inp->inp_vflag = 0;
905 if (inp->inp_options)
906 (void)m_free(inp->inp_options);
907 lck_mtx_lock(rt_mtx);
908 if ((rt = inp->inp_route.ro_rt) != NULL) {
909 inp->inp_route.ro_rt = NULL;
910 rtfree_locked(rt);
911 }
912 lck_mtx_unlock(rt_mtx);
913 ip_freemoptions(inp->inp_moptions);
914 inp->inp_moptions = NULL;
915 sofreelastref(so, 0);
916 inp->inp_state = INPCB_STATE_DEAD;
917 so->so_flags |= SOF_PCBCLEARING; /* makes sure we're not called twice from so_close */
918 }
919 }
920
921
922 void
923 in_pcbdispose(struct inpcb *inp)
924 {
925 struct socket *so = inp->inp_socket;
926 struct inpcbinfo *ipi = inp->inp_pcbinfo;
927
928 #if TEMPDEBUG
929 if (inp->inp_state != INPCB_STATE_DEAD) {
930 printf("in_pcbdispose: not dead yet? so=%p\n", so);
931 }
932 #endif
933
934 if (so && so->so_usecount != 0)
935 panic("in_pcbdispose: use count=%x so=%p\n", so->so_usecount, so);
936
937 lck_rw_assert(ipi->mtx, LCK_RW_ASSERT_EXCLUSIVE);
938
939 inp->inp_gencnt = ++ipi->ipi_gencnt;
940 /*### access ipi in in_pcbremlists */
941 in_pcbremlists(inp);
942
943 if (so) {
944 if (so->so_proto->pr_flags & PR_PCBLOCK) {
945 sofreelastref(so, 0);
946 if (so->so_rcv.sb_cc || so->so_snd.sb_cc) {
947 #if TEMPDEBUG
948 printf("in_pcbdispose sb not cleaned up so=%p rc_cci=%x snd_cc=%x\n",
949 so, so->so_rcv.sb_cc, so->so_snd.sb_cc);
950 #endif
951 sbrelease(&so->so_rcv);
952 sbrelease(&so->so_snd);
953 }
954 if (so->so_head != NULL)
955 panic("in_pcbdispose, so=%p head still exist\n", so);
956 lck_mtx_unlock(inp->inpcb_mtx);
957 lck_mtx_free(inp->inpcb_mtx, ipi->mtx_grp);
958 }
959 so->so_flags |= SOF_PCBCLEARING; /* makes sure we're not called twice from so_close */
960 so->so_saved_pcb = (caddr_t) inp;
961 so->so_pcb = 0;
962 inp->inp_socket = 0;
963 inp->reserved[0] = (u_int32_t)so;
964 #if CONFIG_MACF_NET
965 mac_inpcb_label_destroy(inp);
966 #endif
967 if (so->cached_in_sock_layer == 0) {
968 zfree(ipi->ipi_zone, inp);
969 }
970 sodealloc(so);
971 }
972 #if TEMPDEBUG
973 else
974 printf("in_pcbdispose: no socket for inp=%p\n", inp);
975 #endif
976 }
977
978 /*
979 * The calling convention of in_setsockaddr() and in_setpeeraddr() was
980 * modified to match the pru_sockaddr() and pru_peeraddr() entry points
981 * in struct pr_usrreqs, so that protocols can just reference then directly
982 * without the need for a wrapper function. The socket must have a valid
983 * (i.e., non-nil) PCB, but it should be impossible to get an invalid one
984 * except through a kernel programming error, so it is acceptable to panic
985 * (or in this case trap) if the PCB is invalid. (Actually, we don't trap
986 * because there actually /is/ a programming error somewhere... XXX)
987 *
988 * Returns: 0 Success
989 * ENOBUFS No buffer space available
990 * ECONNRESET Connection reset
991 */
992 int
993 in_setsockaddr(struct socket *so, struct sockaddr **nam)
994 {
995 struct inpcb *inp;
996 struct sockaddr_in *sin;
997
998 /*
999 * Do the malloc first in case it blocks.
1000 */
1001 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_WAITOK);
1002 if (sin == NULL)
1003 return ENOBUFS;
1004 bzero(sin, sizeof *sin);
1005 sin->sin_family = AF_INET;
1006 sin->sin_len = sizeof(*sin);
1007
1008 inp = sotoinpcb(so);
1009 if (!inp) {
1010 FREE(sin, M_SONAME);
1011 return ECONNRESET;
1012 }
1013 sin->sin_port = inp->inp_lport;
1014 sin->sin_addr = inp->inp_laddr;
1015
1016 *nam = (struct sockaddr *)sin;
1017 return 0;
1018 }
1019
1020 int
1021 in_setpeeraddr(struct socket *so, struct sockaddr **nam)
1022 {
1023 struct inpcb *inp;
1024 struct sockaddr_in *sin;
1025
1026 /*
1027 * Do the malloc first in case it blocks.
1028 */
1029 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_WAITOK);
1030 if (sin == NULL)
1031 return ENOBUFS;
1032 bzero((caddr_t)sin, sizeof (*sin));
1033 sin->sin_family = AF_INET;
1034 sin->sin_len = sizeof(*sin);
1035
1036 inp = sotoinpcb(so);
1037 if (!inp) {
1038 FREE(sin, M_SONAME);
1039 return ECONNRESET;
1040 }
1041 sin->sin_port = inp->inp_fport;
1042 sin->sin_addr = inp->inp_faddr;
1043
1044 *nam = (struct sockaddr *)sin;
1045 return 0;
1046 }
1047
1048 void
1049 in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr,
1050 int errno, void (*notify)(struct inpcb *, int))
1051 {
1052 struct inpcb *inp;
1053
1054 lck_rw_lock_shared(pcbinfo->mtx);
1055
1056 LIST_FOREACH(inp, pcbinfo->listhead, inp_list) {
1057 #if INET6
1058 if ((inp->inp_vflag & INP_IPV4) == 0)
1059 continue;
1060 #endif
1061 if (inp->inp_faddr.s_addr != faddr.s_addr ||
1062 inp->inp_socket == NULL)
1063 continue;
1064 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING)
1065 continue;
1066 socket_lock(inp->inp_socket, 1);
1067 (*notify)(inp, errno);
1068 (void)in_pcb_checkstate(inp, WNT_RELEASE, 1);
1069 socket_unlock(inp->inp_socket, 1);
1070 }
1071 lck_rw_done(pcbinfo->mtx);
1072 }
1073
1074 void
1075 in_pcbpurgeif0(
1076 struct inpcb *head,
1077 struct ifnet *ifp)
1078 {
1079 struct inpcb *inp;
1080 struct ip_moptions *imo;
1081 int i, gap;
1082
1083 for (inp = head; inp != NULL; inp = LIST_NEXT(inp, inp_list)) {
1084 imo = inp->inp_moptions;
1085 if ((inp->inp_vflag & INP_IPV4) &&
1086 imo != NULL) {
1087 /*
1088 * Unselect the outgoing interface if it is being
1089 * detached.
1090 */
1091 if (imo->imo_multicast_ifp == ifp)
1092 imo->imo_multicast_ifp = NULL;
1093
1094 /*
1095 * Drop multicast group membership if we joined
1096 * through the interface being detached.
1097 */
1098 for (i = 0, gap = 0; i < imo->imo_num_memberships;
1099 i++) {
1100 if (imo->imo_membership[i]->inm_ifp == ifp) {
1101 in_delmulti(&imo->imo_membership[i]);
1102 gap++;
1103 } else if (gap != 0)
1104 imo->imo_membership[i - gap] =
1105 imo->imo_membership[i];
1106 }
1107 imo->imo_num_memberships -= gap;
1108 }
1109 }
1110 }
1111
1112 /*
1113 * Check for alternatives when higher level complains
1114 * about service problems. For now, invalidate cached
1115 * routing information. If the route was created dynamically
1116 * (by a redirect), time to try a default gateway again.
1117 */
1118 void
1119 in_losing(struct inpcb *inp)
1120 {
1121 struct rtentry *rt;
1122 struct rt_addrinfo info;
1123
1124 if ((rt = inp->inp_route.ro_rt)) {
1125 lck_mtx_lock(rt_mtx);
1126 /* Check again, this time while holding the lock */
1127 if ((rt = inp->inp_route.ro_rt) == NULL) {
1128 lck_mtx_unlock(rt_mtx);
1129 return;
1130 }
1131 bzero((caddr_t)&info, sizeof(info));
1132 info.rti_info[RTAX_DST] =
1133 (struct sockaddr *)&inp->inp_route.ro_dst;
1134 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1135 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1136 rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
1137 if (rt->rt_flags & RTF_DYNAMIC)
1138 (void) rtrequest_locked(RTM_DELETE, rt_key(rt),
1139 rt->rt_gateway, rt_mask(rt), rt->rt_flags,
1140 (struct rtentry **)0);
1141 /* if the address is gone keep the old route in the pcb */
1142 if ((ifa_foraddr(inp->inp_laddr.s_addr)) != 0) {
1143 inp->inp_route.ro_rt = 0;
1144 rtfree_locked(rt);
1145 }
1146 lck_mtx_unlock(rt_mtx);
1147 /*
1148 * A new route can be allocated
1149 * the next time output is attempted.
1150 */
1151 }
1152 }
1153
1154 /*
1155 * After a routing change, flush old routing
1156 * and allocate a (hopefully) better one.
1157 */
1158 void
1159 in_rtchange(struct inpcb *inp, __unused int errno)
1160 {
1161 struct rtentry *rt;
1162
1163 if ((rt = inp->inp_route.ro_rt) != NULL) {
1164 if ((ifa_foraddr(inp->inp_laddr.s_addr)) == 0)
1165 return; /* we can't remove the route now. not sure if still ok to use src */
1166 lck_mtx_lock(rt_mtx);
1167 /* Check again, this time while holding the lock */
1168 if ((rt = inp->inp_route.ro_rt) == NULL) {
1169 lck_mtx_unlock(rt_mtx);
1170 return;
1171 }
1172 rtfree_locked(rt);
1173 inp->inp_route.ro_rt = NULL;
1174 lck_mtx_unlock(rt_mtx);
1175 /*
1176 * A new route can be allocated the next time
1177 * output is attempted.
1178 */
1179 }
1180 }
1181
1182 /*
1183 * Lookup a PCB based on the local address and port.
1184 */
1185 struct inpcb *
1186 in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
1187 unsigned int lport_arg, int wild_okay)
1188 {
1189 struct inpcb *inp;
1190 int matchwild = 3, wildcard;
1191 u_short lport = lport_arg;
1192
1193 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0,0,0,0,0);
1194
1195 if (!wild_okay) {
1196 struct inpcbhead *head;
1197 /*
1198 * Look for an unconnected (wildcard foreign addr) PCB that
1199 * matches the local address and port we're looking for.
1200 */
1201 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
1202 LIST_FOREACH(inp, head, inp_hash) {
1203 #if INET6
1204 if ((inp->inp_vflag & INP_IPV4) == 0)
1205 continue;
1206 #endif
1207 if (inp->inp_faddr.s_addr == INADDR_ANY &&
1208 inp->inp_laddr.s_addr == laddr.s_addr &&
1209 inp->inp_lport == lport) {
1210 /*
1211 * Found.
1212 */
1213 return (inp);
1214 }
1215 }
1216 /*
1217 * Not found.
1218 */
1219 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0,0,0,0,0);
1220 return (NULL);
1221 } else {
1222 struct inpcbporthead *porthash;
1223 struct inpcbport *phd;
1224 struct inpcb *match = NULL;
1225 /*
1226 * Best fit PCB lookup.
1227 *
1228 * First see if this local port is in use by looking on the
1229 * port hash list.
1230 */
1231 porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport,
1232 pcbinfo->porthashmask)];
1233 LIST_FOREACH(phd, porthash, phd_hash) {
1234 if (phd->phd_port == lport)
1235 break;
1236 }
1237 if (phd != NULL) {
1238 /*
1239 * Port is in use by one or more PCBs. Look for best
1240 * fit.
1241 */
1242 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
1243 wildcard = 0;
1244 #if INET6
1245 if ((inp->inp_vflag & INP_IPV4) == 0)
1246 continue;
1247 #endif
1248 if (inp->inp_faddr.s_addr != INADDR_ANY)
1249 wildcard++;
1250 if (inp->inp_laddr.s_addr != INADDR_ANY) {
1251 if (laddr.s_addr == INADDR_ANY)
1252 wildcard++;
1253 else if (inp->inp_laddr.s_addr != laddr.s_addr)
1254 continue;
1255 } else {
1256 if (laddr.s_addr != INADDR_ANY)
1257 wildcard++;
1258 }
1259 if (wildcard < matchwild) {
1260 match = inp;
1261 matchwild = wildcard;
1262 if (matchwild == 0) {
1263 break;
1264 }
1265 }
1266 }
1267 }
1268 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match,0,0,0,0);
1269 return (match);
1270 }
1271 }
1272
1273 /*
1274 * Lookup PCB in hash list.
1275 */
1276 struct inpcb *
1277 in_pcblookup_hash(
1278 struct inpcbinfo *pcbinfo,
1279 struct in_addr faddr,
1280 u_int fport_arg,
1281 struct in_addr laddr,
1282 u_int lport_arg,
1283 int wildcard,
1284 __unused struct ifnet *ifp)
1285 {
1286 struct inpcbhead *head;
1287 struct inpcb *inp;
1288 u_short fport = fport_arg, lport = lport_arg;
1289
1290 /*
1291 * We may have found the pcb in the last lookup - check this first.
1292 */
1293
1294 lck_rw_lock_shared(pcbinfo->mtx);
1295
1296 /*
1297 * First look for an exact match.
1298 */
1299 head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)];
1300 LIST_FOREACH(inp, head, inp_hash) {
1301 #if INET6
1302 if ((inp->inp_vflag & INP_IPV4) == 0)
1303 continue;
1304 #endif
1305 if (inp->inp_faddr.s_addr == faddr.s_addr &&
1306 inp->inp_laddr.s_addr == laddr.s_addr &&
1307 inp->inp_fport == fport &&
1308 inp->inp_lport == lport) {
1309 /*
1310 * Found.
1311 */
1312 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1313 lck_rw_done(pcbinfo->mtx);
1314 return (inp);
1315 }
1316 else { /* it's there but dead, say it isn't found */
1317 lck_rw_done(pcbinfo->mtx);
1318 return(NULL);
1319 }
1320 }
1321 }
1322 if (wildcard) {
1323 struct inpcb *local_wild = NULL;
1324 #if INET6
1325 struct inpcb *local_wild_mapped = NULL;
1326 #endif
1327
1328 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
1329 LIST_FOREACH(inp, head, inp_hash) {
1330 #if INET6
1331 if ((inp->inp_vflag & INP_IPV4) == 0)
1332 continue;
1333 #endif
1334 if (inp->inp_faddr.s_addr == INADDR_ANY &&
1335 inp->inp_lport == lport) {
1336 #if defined(NFAITH) && NFAITH > 0
1337 if (ifp && ifp->if_type == IFT_FAITH &&
1338 (inp->inp_flags & INP_FAITH) == 0)
1339 continue;
1340 #endif
1341 if (inp->inp_laddr.s_addr == laddr.s_addr) {
1342 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1343 lck_rw_done(pcbinfo->mtx);
1344 return (inp);
1345 }
1346 else { /* it's there but dead, say it isn't found */
1347 lck_rw_done(pcbinfo->mtx);
1348 return(NULL);
1349 }
1350 }
1351 else if (inp->inp_laddr.s_addr == INADDR_ANY) {
1352 #if INET6
1353 if (INP_CHECK_SOCKAF(inp->inp_socket,
1354 AF_INET6))
1355 local_wild_mapped = inp;
1356 else
1357 #endif /* INET6 */
1358 local_wild = inp;
1359 }
1360 }
1361 }
1362 if (local_wild == NULL) {
1363 #if INET6
1364 if (local_wild_mapped != NULL) {
1365 if (in_pcb_checkstate(local_wild_mapped, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1366 lck_rw_done(pcbinfo->mtx);
1367 return (local_wild_mapped);
1368 }
1369 else { /* it's there but dead, say it isn't found */
1370 lck_rw_done(pcbinfo->mtx);
1371 return(NULL);
1372 }
1373 }
1374 #endif /* INET6 */
1375 lck_rw_done(pcbinfo->mtx);
1376 return (NULL);
1377 }
1378 if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1379 lck_rw_done(pcbinfo->mtx);
1380 return (local_wild);
1381 }
1382 else { /* it's there but dead, say it isn't found */
1383 lck_rw_done(pcbinfo->mtx);
1384 return(NULL);
1385 }
1386 }
1387
1388 /*
1389 * Not found.
1390 */
1391 lck_rw_done(pcbinfo->mtx);
1392 return (NULL);
1393 }
1394
1395 /*
1396 * Insert PCB onto various hash lists.
1397 */
1398 int
1399 in_pcbinshash(struct inpcb *inp, int locked)
1400 {
1401 struct inpcbhead *pcbhash;
1402 struct inpcbporthead *pcbporthash;
1403 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
1404 struct inpcbport *phd;
1405 u_int32_t hashkey_faddr;
1406
1407 #if INET6
1408 if (inp->inp_vflag & INP_IPV6)
1409 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
1410 else
1411 #endif /* INET6 */
1412 hashkey_faddr = inp->inp_faddr.s_addr;
1413
1414 inp->hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport, inp->inp_fport, pcbinfo->hashmask);
1415
1416 if (!locked) {
1417 if (!lck_rw_try_lock_exclusive(pcbinfo->mtx)) {
1418 /*lock inversion issue, mostly with udp multicast packets */
1419 socket_unlock(inp->inp_socket, 0);
1420 lck_rw_lock_exclusive(pcbinfo->mtx);
1421 socket_lock(inp->inp_socket, 0);
1422 }
1423 }
1424
1425 pcbhash = &pcbinfo->hashbase[inp->hash_element];
1426
1427 pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport,
1428 pcbinfo->porthashmask)];
1429
1430 /*
1431 * Go through port list and look for a head for this lport.
1432 */
1433 LIST_FOREACH(phd, pcbporthash, phd_hash) {
1434 if (phd->phd_port == inp->inp_lport)
1435 break;
1436 }
1437 /*
1438 * If none exists, malloc one and tack it on.
1439 */
1440 if (phd == NULL) {
1441 MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_WAITOK);
1442 if (phd == NULL) {
1443 if (!locked)
1444 lck_rw_done(pcbinfo->mtx);
1445 return (ENOBUFS); /* XXX */
1446 }
1447 phd->phd_port = inp->inp_lport;
1448 LIST_INIT(&phd->phd_pcblist);
1449 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
1450 }
1451 inp->inp_phd = phd;
1452 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
1453 LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
1454 if (!locked)
1455 lck_rw_done(pcbinfo->mtx);
1456 return (0);
1457 }
1458
1459 /*
1460 * Move PCB to the proper hash bucket when { faddr, fport } have been
1461 * changed. NOTE: This does not handle the case of the lport changing (the
1462 * hashed port list would have to be updated as well), so the lport must
1463 * not change after in_pcbinshash() has been called.
1464 */
1465 void
1466 in_pcbrehash(struct inpcb *inp)
1467 {
1468 struct inpcbhead *head;
1469 u_int32_t hashkey_faddr;
1470
1471 #if INET6
1472 if (inp->inp_vflag & INP_IPV6)
1473 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
1474 else
1475 #endif /* INET6 */
1476 hashkey_faddr = inp->inp_faddr.s_addr;
1477 inp->hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
1478 inp->inp_fport, inp->inp_pcbinfo->hashmask);
1479 head = &inp->inp_pcbinfo->hashbase[inp->hash_element];
1480
1481 LIST_REMOVE(inp, inp_hash);
1482 LIST_INSERT_HEAD(head, inp, inp_hash);
1483 }
1484
1485 /*
1486 * Remove PCB from various lists.
1487 */
1488 //###LOCK must be called with list lock held
1489 void
1490 in_pcbremlists(struct inpcb *inp)
1491 {
1492 inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
1493
1494 if (inp->inp_lport) {
1495 struct inpcbport *phd = inp->inp_phd;
1496
1497 LIST_REMOVE(inp, inp_hash);
1498 LIST_REMOVE(inp, inp_portlist);
1499 if (phd != NULL && (LIST_FIRST(&phd->phd_pcblist) == NULL)) {
1500 LIST_REMOVE(phd, phd_hash);
1501 FREE(phd, M_PCB);
1502 }
1503 }
1504 LIST_REMOVE(inp, inp_list);
1505 inp->inp_pcbinfo->ipi_count--;
1506 }
1507
1508 static void in_pcb_detach_port( struct inpcb *inp);
1509 int
1510 in_pcb_grab_port (struct inpcbinfo *pcbinfo,
1511 u_short options,
1512 struct in_addr laddr,
1513 u_short *lport,
1514 struct in_addr faddr,
1515 u_short fport,
1516 u_int cookie,
1517 u_char owner_id)
1518 {
1519 struct inpcb *inp, *pcb;
1520 struct sockaddr_in sin;
1521 struct proc *p = current_proc();
1522 int stat;
1523
1524
1525 pcbinfo->nat_dummy_socket.so_pcb = 0;
1526 pcbinfo->nat_dummy_socket.so_options = 0;
1527 if (*lport) {
1528 /* The grabber wants a particular port */
1529
1530 if (faddr.s_addr || fport) {
1531 /*
1532 * This is either the second half of an active connect, or
1533 * it's from the acceptance of an incoming connection.
1534 */
1535 if (laddr.s_addr == 0) {
1536 pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb;
1537 return EINVAL;
1538 }
1539
1540 inp = in_pcblookup_hash(pcbinfo, faddr, fport, laddr, *lport, 0, NULL);
1541 if (inp) {
1542 /* pcb was found, its count was upped. need to decrease it here */
1543 in_pcb_checkstate(inp, WNT_RELEASE, 0);
1544 if (!(IN_MULTICAST(ntohl(laddr.s_addr)))) {
1545 pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb;
1546 return (EADDRINUSE);
1547 }
1548 }
1549
1550 stat = in_pcballoc(&pcbinfo->nat_dummy_socket, pcbinfo, p);
1551 if (stat) {
1552 pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb;
1553 return stat;
1554 }
1555 pcb = sotoinpcb(&pcbinfo->nat_dummy_socket);
1556 pcb->inp_vflag |= INP_IPV4;
1557
1558 pcb->inp_lport = *lport;
1559 pcb->inp_laddr.s_addr = laddr.s_addr;
1560
1561 pcb->inp_faddr = faddr;
1562 pcb->inp_fport = fport;
1563
1564 lck_rw_lock_exclusive(pcbinfo->mtx);
1565 in_pcbinshash(pcb, 1);
1566 lck_rw_done(pcbinfo->mtx);
1567 }
1568 else {
1569 /*
1570 * This is either a bind for a passive socket, or it's the
1571 * first part of bind-connect sequence (not likely since an
1572 * ephemeral port is usually used in this case). Or, it's
1573 * the result of a connection acceptance when the foreign
1574 * address/port cannot be provided (which requires the SO_REUSEADDR
1575 * flag if laddr is not multicast).
1576 */
1577
1578 stat = in_pcballoc(&pcbinfo->nat_dummy_socket, pcbinfo, p);
1579 if (stat) {
1580 pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb;
1581 return stat;
1582 }
1583 pcb = sotoinpcb(&pcbinfo->nat_dummy_socket);
1584 pcb->inp_vflag |= INP_IPV4;
1585
1586 pcbinfo->nat_dummy_socket.so_options = options;
1587 bzero(&sin, sizeof(struct sockaddr_in));
1588 sin.sin_len = sizeof(struct sockaddr_in);
1589 sin.sin_family = AF_INET;
1590 sin.sin_addr.s_addr = laddr.s_addr;
1591 sin.sin_port = *lport;
1592
1593 socket_lock(&pcbinfo->nat_dummy_socket, 1);
1594 stat = in_pcbbind((struct inpcb *) pcbinfo->nat_dummy_socket.so_pcb,
1595 (struct sockaddr *) &sin, p);
1596 if (stat) {
1597 socket_unlock(&pcbinfo->nat_dummy_socket, 1); /*detach first */
1598 in_pcb_detach_port(pcb); /* will restore dummy pcb */
1599 return stat;
1600 }
1601 socket_unlock(&pcbinfo->nat_dummy_socket, 1);
1602 }
1603 }
1604 else {
1605 /* The grabber wants an ephemeral port */
1606
1607 stat = in_pcballoc(&pcbinfo->nat_dummy_socket, pcbinfo, p);
1608 if (stat) {
1609 pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb;
1610 return stat;
1611 }
1612 pcb = sotoinpcb(&pcbinfo->nat_dummy_socket);
1613 pcb->inp_vflag |= INP_IPV4;
1614
1615 bzero(&sin, sizeof(struct sockaddr_in));
1616 sin.sin_len = sizeof(struct sockaddr_in);
1617 sin.sin_family = AF_INET;
1618 sin.sin_addr.s_addr = laddr.s_addr;
1619 sin.sin_port = 0;
1620
1621 if (faddr.s_addr || fport) {
1622 /*
1623 * Not sure if this case will be used - could occur when connect
1624 * is called, skipping the bind.
1625 */
1626
1627 if (laddr.s_addr == 0) {
1628 in_pcb_detach_port(pcb); /* restores dummy pcb */
1629 return EINVAL;
1630 }
1631
1632 socket_lock(&pcbinfo->nat_dummy_socket, 1);
1633 stat = in_pcbbind((struct inpcb *) pcbinfo->nat_dummy_socket.so_pcb,
1634 (struct sockaddr *) &sin, p);
1635 if (stat) {
1636 socket_unlock(&pcbinfo->nat_dummy_socket, 1);
1637 in_pcb_detach_port(pcb); /* restores dummy pcb */
1638 return stat;
1639 }
1640
1641 socket_unlock(&pcbinfo->nat_dummy_socket, 1);
1642 inp = in_pcblookup_hash(pcbinfo, faddr, fport,
1643 pcb->inp_laddr, pcb->inp_lport, 0, NULL);
1644 if (inp) {
1645 /* pcb was found, its count was upped. need to decrease it here */
1646 in_pcb_checkstate(inp, WNT_RELEASE, 0);
1647 in_pcb_detach_port(pcb);
1648 return (EADDRINUSE);
1649 }
1650
1651 lck_rw_lock_exclusive(pcbinfo->mtx);
1652 pcb->inp_faddr = faddr;
1653 pcb->inp_fport = fport;
1654 in_pcbrehash(pcb);
1655 lck_rw_done(pcbinfo->mtx);
1656 }
1657 else {
1658 /*
1659 * This is a simple bind of an ephemeral port. The local addr
1660 * may or may not be defined.
1661 */
1662
1663 socket_lock(&pcbinfo->nat_dummy_socket, 1);
1664 stat = in_pcbbind((struct inpcb *) pcbinfo->nat_dummy_socket.so_pcb,
1665 (struct sockaddr *) &sin, p);
1666 if (stat) {
1667 socket_unlock(&pcbinfo->nat_dummy_socket, 1);
1668 in_pcb_detach_port(pcb);
1669 return stat;
1670 }
1671 socket_unlock(&pcbinfo->nat_dummy_socket, 1);
1672 }
1673 *lport = pcb->inp_lport;
1674 }
1675
1676
1677 pcb->nat_owner = owner_id;
1678 pcb->nat_cookie = cookie;
1679 pcb->inp_ppcb = (caddr_t) pcbinfo->dummy_cb;
1680 pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb; /* restores dummypcb */
1681 return 0;
1682 }
1683
1684 /* 3962035 - in_pcb_letgo_port needs a special case function for detaching */
1685 static void
1686 in_pcb_detach_port(
1687 struct inpcb *inp)
1688 {
1689 struct socket *so = inp->inp_socket;
1690 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
1691
1692 if (so != &pcbinfo->nat_dummy_socket)
1693 panic("in_pcb_detach_port: not a dummy_sock: so=%p, inp=%p\n", so, inp);
1694 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
1695 /*### access ipi in in_pcbremlists */
1696 in_pcbremlists(inp);
1697
1698 inp->inp_socket = 0;
1699 inp->reserved[0] = (u_int32_t) so;
1700 zfree(pcbinfo->ipi_zone, inp);
1701 pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb; /* restores dummypcb */
1702 }
1703
1704 int
1705 in_pcb_letgo_port(struct inpcbinfo *pcbinfo, struct in_addr laddr, u_short lport,
1706 struct in_addr faddr, u_short fport, u_char owner_id)
1707 {
1708 struct inpcbhead *head;
1709 struct inpcb *inp;
1710
1711 /*
1712 * First look for an exact match.
1713 */
1714
1715 lck_rw_lock_exclusive(pcbinfo->mtx);
1716 head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)];
1717 for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) {
1718 if (inp->inp_faddr.s_addr == faddr.s_addr &&
1719 inp->inp_laddr.s_addr == laddr.s_addr &&
1720 inp->inp_fport == fport &&
1721 inp->inp_lport == lport &&
1722 inp->nat_owner == owner_id) {
1723 /*
1724 * Found.
1725 */
1726 in_pcb_detach_port(inp);
1727 lck_rw_done(pcbinfo->mtx);
1728 return 0;
1729 }
1730 }
1731
1732 lck_rw_done(pcbinfo->mtx);
1733 return ENOENT;
1734 }
1735
1736 u_char
1737 in_pcb_get_owner(struct inpcbinfo *pcbinfo,
1738 struct in_addr laddr, u_short lport,
1739 struct in_addr faddr, u_short fport,
1740 u_int *cookie)
1741
1742 {
1743 struct inpcb *inp;
1744 u_char owner_id = INPCB_NO_OWNER;
1745 struct inpcbport *phd;
1746 struct inpcbporthead *porthash;
1747
1748
1749 if (IN_MULTICAST(laddr.s_addr)) {
1750 /*
1751 * Walk through PCB's looking for registered
1752 * owners.
1753 */
1754
1755 lck_rw_lock_shared(pcbinfo->mtx);
1756 porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport,
1757 pcbinfo->porthashmask)];
1758 for (phd = porthash->lh_first; phd != NULL; phd = phd->phd_hash.le_next) {
1759 if (phd->phd_port == lport)
1760 break;
1761 }
1762
1763 if (phd == 0) {
1764 lck_rw_done(pcbinfo->mtx);
1765 return INPCB_NO_OWNER;
1766 }
1767
1768 owner_id = INPCB_NO_OWNER;
1769 for (inp = phd->phd_pcblist.lh_first; inp != NULL;
1770 inp = inp->inp_portlist.le_next) {
1771
1772 if (inp->inp_laddr.s_addr == laddr.s_addr) {
1773 if (inp->nat_owner == 0)
1774 owner_id |= INPCB_OWNED_BY_X;
1775 else
1776 owner_id |= inp->nat_owner;
1777 }
1778 }
1779
1780 lck_rw_done(pcbinfo->mtx);
1781 return owner_id;
1782 }
1783 else {
1784 inp = in_pcblookup_hash(pcbinfo, faddr, fport,
1785 laddr, lport, 1, NULL);
1786 if (inp) {
1787 /* pcb was found, its count was upped. need to decrease it here */
1788 /* if we found it, that pcb is already locked by the caller */
1789 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING)
1790 return(INPCB_NO_OWNER);
1791
1792 if (inp->nat_owner) {
1793 owner_id = inp->nat_owner;
1794 *cookie = inp->nat_cookie;
1795 }
1796 else {
1797 owner_id = INPCB_OWNED_BY_X;
1798 }
1799 }
1800 else
1801 owner_id = INPCB_NO_OWNER;
1802
1803 return owner_id;
1804 }
1805 }
1806
1807 int
1808 in_pcb_new_share_client(struct inpcbinfo *pcbinfo, u_char *owner_id)
1809 {
1810
1811 int i;
1812
1813
1814 for (i=0; i < INPCB_MAX_IDS; i++) {
1815 if ((pcbinfo->all_owners & (1 << i)) == 0) {
1816 pcbinfo->all_owners |= (1 << i);
1817 *owner_id = (1 << i);
1818 return 0;
1819 }
1820 }
1821
1822 return ENOSPC;
1823 }
1824
1825 int
1826 in_pcb_rem_share_client(struct inpcbinfo *pcbinfo, u_char owner_id)
1827 {
1828 struct inpcb *inp;
1829
1830
1831 lck_rw_lock_exclusive(pcbinfo->mtx);
1832 if (pcbinfo->all_owners & owner_id) {
1833 pcbinfo->all_owners &= ~owner_id;
1834 for (inp = pcbinfo->listhead->lh_first; inp != NULL; inp = inp->inp_list.le_next) {
1835 if (inp->nat_owner & owner_id) {
1836 if (inp->nat_owner == owner_id)
1837 /*
1838 * Deallocate the pcb
1839 */
1840 in_pcb_detach_port(inp);
1841 else
1842 inp->nat_owner &= ~owner_id;
1843 }
1844 }
1845 }
1846 else {
1847 lck_rw_done(pcbinfo->mtx);
1848 return ENOENT;
1849 }
1850
1851 lck_rw_done(pcbinfo->mtx);
1852 return 0;
1853 }
1854
1855
1856
1857 void in_pcb_nat_init(struct inpcbinfo *pcbinfo, int afamily,
1858 int pfamily, int protocol)
1859 {
1860 int stat;
1861 struct proc *p = current_proc();
1862
1863 bzero(&pcbinfo->nat_dummy_socket, sizeof(struct socket));
1864 #if CONFIG_MACF_NET
1865 mac_socket_label_init(&pcbinfo->nat_dummy_socket, M_WAITOK);
1866 #endif
1867 pcbinfo->nat_dummy_socket.so_proto = pffindproto_locked(afamily, pfamily, protocol);
1868 pcbinfo->all_owners = 0;
1869 stat = in_pcballoc(&pcbinfo->nat_dummy_socket, pcbinfo, p);
1870 if (stat)
1871 panic("in_pcb_nat_init: can't alloc fakepcb err=%d\n", stat);
1872 pcbinfo->nat_dummy_pcb = (struct inpcb *)pcbinfo->nat_dummy_socket.so_pcb;
1873 }
1874
1875 /* Mechanism used to defer the memory release of PCBs
1876 * The pcb list will contain the pcb until the ripper can clean it up if
1877 * the following conditions are met: 1) state "DEAD", 2) wantcnt is STOPUSING
1878 * 3) usecount is null
1879 * This function will be called to either mark the pcb as
1880 */
1881 int
1882 in_pcb_checkstate(struct inpcb *pcb, int mode, int locked)
1883 {
1884
1885 volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt;
1886 UInt32 origwant;
1887 UInt32 newwant;
1888
1889 switch (mode) {
1890
1891 case WNT_STOPUSING: /* try to mark the pcb as ready for recycling */
1892
1893 /* compareswap with STOPUSING, if success we're good, if it's in use, will be marked later */
1894
1895 if (locked == 0)
1896 socket_lock(pcb->inp_socket, 1);
1897 pcb->inp_state = INPCB_STATE_DEAD;
1898 stopusing:
1899 if (pcb->inp_socket->so_usecount < 0)
1900 panic("in_pcb_checkstate STOP pcb=%p so=%p usecount is negative\n", pcb, pcb->inp_socket);
1901 if (locked == 0)
1902 socket_unlock(pcb->inp_socket, 1);
1903
1904 origwant = *wantcnt;
1905 if ((UInt16) origwant == 0xffff ) /* should stop using */
1906 return (WNT_STOPUSING);
1907 newwant = 0xffff;
1908 if ((UInt16) origwant == 0) {/* try to mark it as unsuable now */
1909 OSCompareAndSwap(origwant, newwant, wantcnt) ;
1910 }
1911 return (WNT_STOPUSING);
1912 break;
1913
1914 case WNT_ACQUIRE: /* try to increase reference to pcb */
1915 /* if WNT_STOPUSING should bail out */
1916 /*
1917 * if socket state DEAD, try to set count to STOPUSING, return failed
1918 * otherwise increase cnt
1919 */
1920 do {
1921 origwant = *wantcnt;
1922 if ((UInt16) origwant == 0xffff ) {/* should stop using */
1923 // printf("in_pcb_checkstate: ACQ PCB was STOPUSING while release. odd pcb=%p\n", pcb);
1924 return (WNT_STOPUSING);
1925 }
1926 newwant = origwant + 1;
1927 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
1928 return (WNT_ACQUIRE);
1929 break;
1930
1931 case WNT_RELEASE: /* release reference. if result is null and pcb state is DEAD,
1932 set wanted bit to STOPUSING
1933 */
1934
1935 if (locked == 0)
1936 socket_lock(pcb->inp_socket, 1);
1937
1938 do {
1939 origwant = *wantcnt;
1940 if ((UInt16) origwant == 0x0 )
1941 panic("in_pcb_checkstate pcb=%p release with zero count", pcb);
1942 if ((UInt16) origwant == 0xffff ) {/* should stop using */
1943 #if TEMPDEBUG
1944 printf("in_pcb_checkstate: REL PCB was STOPUSING while release. odd pcb=%p\n", pcb);
1945 #endif
1946 if (locked == 0)
1947 socket_unlock(pcb->inp_socket, 1);
1948 return (WNT_STOPUSING);
1949 }
1950 newwant = origwant - 1;
1951 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
1952
1953 if (pcb->inp_state == INPCB_STATE_DEAD)
1954 goto stopusing;
1955 if (pcb->inp_socket->so_usecount < 0)
1956 panic("in_pcb_checkstate RELEASE pcb=%p so=%p usecount is negative\n", pcb, pcb->inp_socket);
1957
1958 if (locked == 0)
1959 socket_unlock(pcb->inp_socket, 1);
1960 return (WNT_RELEASE);
1961 break;
1962
1963 default:
1964
1965 panic("in_pcb_checkstate: so=%p not a valid state =%x\n", pcb->inp_socket, mode);
1966 }
1967
1968 /* NOTREACHED */
1969 return (mode);
1970 }
1971
1972 /*
1973 * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat.
1974 * The inpcb_compat data structure is passed to user space and must
1975 * not change. We intentionally avoid copying pointers. The socket is
1976 * the one exception, though we probably shouldn't copy that either.
1977 */
1978 void
1979 inpcb_to_compat(
1980 struct inpcb *inp,
1981 struct inpcb_compat *inp_compat)
1982 {
1983 bzero(inp_compat, sizeof(*inp_compat));
1984 inp_compat->inp_fport = inp->inp_fport;
1985 inp_compat->inp_lport = inp->inp_lport;
1986 inp_compat->inp_socket = inp->inp_socket;
1987 inp_compat->nat_owner = inp->nat_owner;
1988 inp_compat->nat_cookie = inp->nat_cookie;
1989 inp_compat->inp_gencnt = inp->inp_gencnt;
1990 inp_compat->inp_flags = inp->inp_flags;
1991 inp_compat->inp_flow = inp->inp_flow;
1992 inp_compat->inp_vflag = inp->inp_vflag;
1993 inp_compat->inp_ip_ttl = inp->inp_ip_ttl;
1994 inp_compat->inp_ip_p = inp->inp_ip_p;
1995 inp_compat->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign;
1996 inp_compat->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local;
1997 inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
1998 inp_compat->inp_depend6.inp6_hlim = inp->inp_depend6.inp6_hlim;
1999 inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
2000 inp_compat->inp6_ifindex = inp->inp6_ifindex;
2001 inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
2002 }
2003