]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/in_pcb.c
fce3bb78b8b2a9c8f87bf63f50fa31f961e4c004
[apple/xnu.git] / bsd / netinet / in_pcb.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1991, 1993, 1995
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95
61 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $
62 */
63
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/malloc.h>
67 #include <sys/mbuf.h>
68 #include <sys/domain.h>
69 #include <sys/protosw.h>
70 #include <sys/socket.h>
71 #include <sys/socketvar.h>
72 #include <sys/proc.h>
73 #ifndef __APPLE__
74 #include <sys/jail.h>
75 #endif
76 #include <sys/kernel.h>
77 #include <sys/sysctl.h>
78 #include <libkern/OSAtomic.h>
79
80 #include <machine/limits.h>
81
82 #ifdef __APPLE__
83 #include <kern/zalloc.h>
84 #endif
85
86 #include <net/if.h>
87 #include <net/if_types.h>
88 #include <net/route.h>
89
90 #include <netinet/in.h>
91 #include <netinet/in_pcb.h>
92 #include <netinet/in_var.h>
93 #include <netinet/ip_var.h>
94 #if INET6
95 #include <netinet/ip6.h>
96 #include <netinet6/ip6_var.h>
97 #endif /* INET6 */
98
99 #include "faith.h"
100
101 #if IPSEC
102 #include <netinet6/ipsec.h>
103 #include <netkey/key.h>
104 #endif /* IPSEC */
105
106 #include <sys/kdebug.h>
107
108 #if IPSEC
109 extern int ipsec_bypass;
110 #endif
111
112 extern u_long route_generation;
113
114 #define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8))
115 #define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1))
116
117 struct in_addr zeroin_addr;
118
119 /*
120 * These configure the range of local port addresses assigned to
121 * "unspecified" outgoing connections/packets/whatever.
122 */
123 int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */
124 int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */
125 #ifndef __APPLE__
126 int ipport_firstauto = IPPORT_RESERVED; /* 1024 */
127 int ipport_lastauto = IPPORT_USERRESERVED; /* 5000 */
128 #else
129 int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
130 int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */
131 #endif
132 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
133 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */
134
135 #define RANGECHK(var, min, max) \
136 if ((var) < (min)) { (var) = (min); } \
137 else if ((var) > (max)) { (var) = (max); }
138
139 static int
140 sysctl_net_ipport_check SYSCTL_HANDLER_ARGS
141 {
142 #pragma unused(arg1, arg2)
143 int error = sysctl_handle_int(oidp,
144 oidp->oid_arg1, oidp->oid_arg2, req);
145 if (!error) {
146 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
147 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
148 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX);
149 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX);
150 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX);
151 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX);
152 }
153 return error;
154 }
155
156 #undef RANGECHK
157
158 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IP Ports");
159
160 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW,
161 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
162 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW,
163 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
164 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW,
165 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
166 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW,
167 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
168 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW,
169 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
170 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW,
171 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
172
173 /*
174 * in_pcb.c: manage the Protocol Control Blocks.
175 *
176 * NOTE: It is assumed that most of these functions will be called at
177 * splnet(). XXX - There are, unfortunately, a few exceptions to this
178 * rule that should be fixed.
179 */
180
181 /*
182 * Allocate a PCB and associate it with the socket.
183 *
184 * Returns: 0 Success
185 * ENOBUFS
186 * ENOMEM
187 * ipsec_init_policy:??? [IPSEC]
188 */
189 int
190 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, __unused struct proc *p)
191 {
192 struct inpcb *inp;
193 caddr_t temp;
194 #if IPSEC
195 #ifndef __APPLE__
196 int error;
197 #endif
198 #endif
199 #if CONFIG_MACF_NET
200 int mac_error;
201 #endif
202
203 if (so->cached_in_sock_layer == 0) {
204 #if TEMPDEBUG
205 printf("PCBALLOC calling zalloc for socket %x\n", so);
206 #endif
207 inp = (struct inpcb *) zalloc(pcbinfo->ipi_zone);
208 if (inp == NULL)
209 return (ENOBUFS);
210 bzero((caddr_t)inp, sizeof(*inp));
211 }
212 else {
213 #if TEMPDEBUG
214 printf("PCBALLOC reusing PCB for socket %x\n", so);
215 #endif
216 inp = (struct inpcb *) so->so_saved_pcb;
217 temp = inp->inp_saved_ppcb;
218 bzero((caddr_t) inp, sizeof(*inp));
219 inp->inp_saved_ppcb = temp;
220 }
221
222 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
223 inp->inp_pcbinfo = pcbinfo;
224 inp->inp_socket = so;
225 #if CONFIG_MACF_NET
226 mac_error = mac_inpcb_label_init(inp, M_WAITOK);
227 if (mac_error != 0) {
228 if (so->cached_in_sock_layer == 0)
229 zfree(pcbinfo->ipi_zone, inp);
230 return (mac_error);
231 }
232 mac_inpcb_label_associate(so, inp);
233 #endif
234 #if CONFIG_IP_EDGEHOLE
235 ip_edgehole_attach(inp);
236 #endif
237 so->so_pcb = (caddr_t)inp;
238
239 if (so->so_proto->pr_flags & PR_PCBLOCK) {
240 inp->inpcb_mtx = lck_mtx_alloc_init(pcbinfo->mtx_grp, pcbinfo->mtx_attr);
241 if (inp->inpcb_mtx == NULL) {
242 printf("in_pcballoc: can't alloc mutex! so=%p\n", so);
243 return(ENOMEM);
244 }
245 }
246
247 #if IPSEC
248 #ifndef __APPLE__
249 if (ipsec_bypass == 0) {
250 error = ipsec_init_policy(so, &inp->inp_sp);
251 if (error != 0) {
252 zfree(pcbinfo->ipi_zone, inp);
253 return error;
254 }
255 }
256 #endif
257 #endif /*IPSEC*/
258 #if INET6
259 if (INP_SOCKAF(so) == AF_INET6 && !ip6_mapped_addr_on)
260 inp->inp_flags |= IN6P_IPV6_V6ONLY;
261 #endif
262
263 #if INET6
264 if (ip6_auto_flowlabel)
265 inp->inp_flags |= IN6P_AUTOFLOWLABEL;
266 #endif
267 lck_rw_lock_exclusive(pcbinfo->mtx);
268 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
269 LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list);
270 pcbinfo->ipi_count++;
271 lck_rw_done(pcbinfo->mtx);
272 return (0);
273 }
274
275
276 /*
277 in_pcblookup_local_and_cleanup does everything
278 in_pcblookup_local does but it checks for a socket
279 that's going away. Since we know that the lock is
280 held read+write when this funciton is called, we
281 can safely dispose of this socket like the slow
282 timer would usually do and return NULL. This is
283 great for bind.
284 */
285 struct inpcb*
286 in_pcblookup_local_and_cleanup(
287 struct inpcbinfo *pcbinfo,
288 struct in_addr laddr,
289 u_int lport_arg,
290 int wild_okay)
291 {
292 struct inpcb *inp;
293
294 /* Perform normal lookup */
295 inp = in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay);
296
297 /* Check if we found a match but it's waiting to be disposed */
298 if (inp && inp->inp_wantcnt == WNT_STOPUSING) {
299 struct socket *so = inp->inp_socket;
300
301 lck_mtx_lock(inp->inpcb_mtx);
302
303 if (so->so_usecount == 0) {
304 in_pcbdispose(inp);
305 inp = NULL;
306 }
307 else {
308 lck_mtx_unlock(inp->inpcb_mtx);
309 }
310 }
311
312 return inp;
313 }
314
315 #ifdef __APPLE_API_PRIVATE
316 in_pcb_conflict_post_msg(u_int16_t port)
317 {
318 /*
319 * Radar 5523020 send a kernel event notification if a non-participating socket tries to bind
320 * the port a socket who has set SOF_NOTIFYCONFLICT owns.
321 */
322 struct kev_msg ev_msg;
323 struct kev_in_portinuse in_portinuse;
324
325 in_portinuse.port = ntohs(port); /* port in host order */
326 in_portinuse.req_pid = proc_selfpid();
327 ev_msg.vendor_code = KEV_VENDOR_APPLE;
328 ev_msg.kev_class = KEV_NETWORK_CLASS;
329 ev_msg.kev_subclass = KEV_INET_SUBCLASS;
330 ev_msg.event_code = KEV_INET_PORTINUSE;
331 ev_msg.dv[0].data_ptr = &in_portinuse;
332 ev_msg.dv[0].data_length = sizeof(struct kev_in_portinuse);
333 ev_msg.dv[1].data_length = 0;
334 kev_post_msg(&ev_msg);
335 }
336 #endif
337 /*
338 * Returns: 0 Success
339 * EADDRNOTAVAIL Address not available.
340 * EINVAL Invalid argument
341 * EAFNOSUPPORT Address family not supported [notdef]
342 * EACCES Permission denied
343 * EADDRINUSE Address in use
344 * EAGAIN Resource unavailable, try again
345 * proc_suser:EPERM Operation not permitted
346 */
347 int
348 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
349 {
350 struct socket *so = inp->inp_socket;
351 unsigned short *lastport;
352 struct sockaddr_in *sin;
353 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
354 u_short lport = 0;
355 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
356 int error, conflict = 0;
357
358 if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */
359 return (EADDRNOTAVAIL);
360 if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY)
361 return (EINVAL);
362 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
363 wild = 1;
364 socket_unlock(so, 0); /* keep reference on socket */
365 lck_rw_lock_exclusive(pcbinfo->mtx);
366 if (nam) {
367 sin = (struct sockaddr_in *)nam;
368 if (nam->sa_len != sizeof (*sin)) {
369 lck_rw_done(pcbinfo->mtx);
370 socket_lock(so, 0);
371 return (EINVAL);
372 }
373 #ifdef notdef
374 /*
375 * We should check the family, but old programs
376 * incorrectly fail to initialize it.
377 */
378 if (sin->sin_family != AF_INET) {
379 lck_rw_done(pcbinfo->mtx);
380 socket_lock(so, 0);
381 return (EAFNOSUPPORT);
382 }
383 #endif
384 lport = sin->sin_port;
385 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
386 /*
387 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
388 * allow complete duplication of binding if
389 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
390 * and a multicast address is bound on both
391 * new and duplicated sockets.
392 */
393 if (so->so_options & SO_REUSEADDR)
394 reuseport = SO_REUSEADDR|SO_REUSEPORT;
395 } else if (sin->sin_addr.s_addr != INADDR_ANY) {
396 struct ifaddr *ifa;
397 sin->sin_port = 0; /* yech... */
398 if ((ifa = ifa_ifwithaddr((struct sockaddr *)sin)) == 0) {
399 lck_rw_done(pcbinfo->mtx);
400 socket_lock(so, 0);
401 return (EADDRNOTAVAIL);
402 }
403 else {
404 ifafree(ifa);
405 }
406 }
407 if (lport) {
408 struct inpcb *t;
409
410 /* GROSS */
411 if (ntohs(lport) < IPPORT_RESERVED && p &&
412 proc_suser(p)) {
413 lck_rw_done(pcbinfo->mtx);
414 socket_lock(so, 0);
415 return (EACCES);
416 }
417 if (so->so_uid &&
418 !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
419 t = in_pcblookup_local_and_cleanup(inp->inp_pcbinfo,
420 sin->sin_addr, lport, INPLOOKUP_WILDCARD);
421 if (t &&
422 (ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
423 ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
424 (t->inp_socket->so_options &
425 SO_REUSEPORT) == 0) &&
426 (so->so_uid != t->inp_socket->so_uid) &&
427 ((t->inp_socket->so_flags & SOF_REUSESHAREUID) == 0)) {
428 #if INET6
429 if (ntohl(sin->sin_addr.s_addr) !=
430 INADDR_ANY ||
431 ntohl(t->inp_laddr.s_addr) !=
432 INADDR_ANY ||
433 INP_SOCKAF(so) ==
434 INP_SOCKAF(t->inp_socket))
435 #endif /* INET6 */
436 {
437 #ifdef __APPLE_API_PRIVATE
438
439 if ((t->inp_socket->so_flags & SOF_NOTIFYCONFLICT) && ((so->so_flags & SOF_NOTIFYCONFLICT) == 0))
440 conflict = 1;
441
442 lck_rw_done(pcbinfo->mtx);
443
444 if (conflict)
445 in_pcb_conflict_post_msg(lport);
446 #else
447 lck_rw_done(pcbinfo->mtx);
448 #endif /* __APPLE_API_PRIVATE */
449
450 socket_lock(so, 0);
451 return (EADDRINUSE);
452 }
453 }
454 }
455 t = in_pcblookup_local_and_cleanup(pcbinfo, sin->sin_addr,
456 lport, wild);
457 if (t &&
458 (reuseport & t->inp_socket->so_options) == 0) {
459 #if INET6
460 if (ip6_mapped_addr_on == 0 ||
461 ntohl(sin->sin_addr.s_addr) !=
462 INADDR_ANY ||
463 ntohl(t->inp_laddr.s_addr) !=
464 INADDR_ANY ||
465 INP_SOCKAF(so) ==
466 INP_SOCKAF(t->inp_socket))
467 #endif /* INET6 */
468 {
469 #ifdef __APPLE_API_PRIVATE
470
471 if ((t->inp_socket->so_flags & SOF_NOTIFYCONFLICT) && ((so->so_flags & SOF_NOTIFYCONFLICT) == 0))
472 conflict = 1;
473
474 lck_rw_done(pcbinfo->mtx);
475
476 if (conflict)
477 in_pcb_conflict_post_msg(lport);
478 #else
479 lck_rw_done(pcbinfo->mtx);
480 #endif /* __APPLE_API_PRIVATE */
481 socket_lock(so, 0);
482 return (EADDRINUSE);
483 }
484 }
485 }
486 inp->inp_laddr = sin->sin_addr;
487 }
488 if (lport == 0) {
489 u_short first, last;
490 int count;
491
492 inp->inp_flags |= INP_ANONPORT;
493
494 if (inp->inp_flags & INP_HIGHPORT) {
495 first = ipport_hifirstauto; /* sysctl */
496 last = ipport_hilastauto;
497 lastport = &pcbinfo->lasthi;
498 } else if (inp->inp_flags & INP_LOWPORT) {
499 if (p && (error = proc_suser(p))) {
500 lck_rw_done(pcbinfo->mtx);
501 socket_lock(so, 0);
502 return error;
503 }
504 first = ipport_lowfirstauto; /* 1023 */
505 last = ipport_lowlastauto; /* 600 */
506 lastport = &pcbinfo->lastlow;
507 } else {
508 first = ipport_firstauto; /* sysctl */
509 last = ipport_lastauto;
510 lastport = &pcbinfo->lastport;
511 }
512 /*
513 * Simple check to ensure all ports are not used up causing
514 * a deadlock here.
515 *
516 * We split the two cases (up and down) so that the direction
517 * is not being tested on each round of the loop.
518 */
519 if (first > last) {
520 /*
521 * counting down
522 */
523 count = first - last;
524
525 do {
526 if (count-- < 0) { /* completely used? */
527 lck_rw_done(pcbinfo->mtx);
528 socket_lock(so, 0);
529 inp->inp_laddr.s_addr = INADDR_ANY;
530 return (EADDRNOTAVAIL);
531 }
532 --*lastport;
533 if (*lastport > first || *lastport < last)
534 *lastport = first;
535 lport = htons(*lastport);
536 } while (in_pcblookup_local_and_cleanup(pcbinfo,
537 inp->inp_laddr, lport, wild));
538 } else {
539 /*
540 * counting up
541 */
542 count = last - first;
543
544 do {
545 if (count-- < 0) { /* completely used? */
546 lck_rw_done(pcbinfo->mtx);
547 socket_lock(so, 0);
548 inp->inp_laddr.s_addr = INADDR_ANY;
549 return (EADDRNOTAVAIL);
550 }
551 ++*lastport;
552 if (*lastport < first || *lastport > last)
553 *lastport = first;
554 lport = htons(*lastport);
555 } while (in_pcblookup_local_and_cleanup(pcbinfo,
556 inp->inp_laddr, lport, wild));
557 }
558 }
559 socket_lock(so, 0);
560 inp->inp_lport = lport;
561 if (in_pcbinshash(inp, 1) != 0) {
562 inp->inp_laddr.s_addr = INADDR_ANY;
563 inp->inp_lport = 0;
564 lck_rw_done(pcbinfo->mtx);
565 return (EAGAIN);
566 }
567 lck_rw_done(pcbinfo->mtx);
568 sflt_notify(so, sock_evt_bound, NULL);
569 return (0);
570 }
571
572 #if CONFIG_FORCE_OUT_IFP
573 /*
574 * pdp_context_route_locked is losely based on rtalloc_ign_locked with
575 * the hope that it can be used anywhere rtalloc_ign_locked is.
576 */
577 __private_extern__ void
578 pdp_context_route_locked(ifnet_t ifp, struct route *ro)
579 {
580 struct in_ifaddr *ia;
581 struct rtentry *rt;
582
583 if ((rt = ro->ro_rt) != NULL) {
584 if (rt->rt_ifp == ifp && rt->rt_flags & RTF_UP)
585 return;
586
587 rtfree_locked(rt);
588 ro->ro_rt = NULL;
589 }
590
591 if (ifp == NULL)
592 return;
593
594 /* Find the first IP address, we will use a fake route off of that */
595 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
596 if (ia->ia_ifp == ifp)
597 break;
598 }
599
600 /* Hrmm no IP addresses here :( */
601 if (ia == NULL)
602 return;
603
604 rt = ia->ia_route;
605 if (rt == NULL) {
606 struct sockaddr *ifa = ia->ia_ifa.ifa_addr;
607
608 /* Allocate and set up a fake route */
609 if ((rt = rte_alloc()) == NULL)
610 return;
611
612 bzero(rt, sizeof(*rt));
613 rt->rt_flags = RTF_UP | RTF_STATIC;
614 if (rt_setgate(rt, ifa, ifa) != 0) {
615 rte_free(rt);
616 return;
617 }
618 /*
619 * Explicitly zero the key so that:
620 * rt_tables[rt_key(rt)->sa_family] == rt_tables[0] == NULL
621 */
622 bzero(rt_key(rt), ifa->sa_len);
623
624 rtsetifa(rt, &ia->ia_ifa);
625 rt->rt_ifp = rt->rt_ifa->ifa_ifp;
626
627 /* Take a reference for the ia pointer to this */
628 ia->ia_route = rt;
629 rtref(rt);
630
631 /*
632 * One more rtentry floating around that is not
633 * linked to the routing table.
634 */
635 (void) OSIncrementAtomic((SInt32 *)&rttrash);
636 }
637 rt->generation_id = route_generation;
638 rtref(rt); /* increment the reference count */
639 ro->ro_rt = rt;
640 }
641 #endif
642
643 /*
644 * Transform old in_pcbconnect() into an inner subroutine for new
645 * in_pcbconnect(): Do some validity-checking on the remote
646 * address (in mbuf 'nam') and then determine local host address
647 * (i.e., which interface) to use to access that remote host.
648 *
649 * This preserves definition of in_pcbconnect(), while supporting a
650 * slightly different version for T/TCP. (This is more than
651 * a bit of a kludge, but cleaning up the internal interfaces would
652 * have forced minor changes in every protocol).
653 *
654 * Returns: 0 Success
655 * EINVAL Invalid argument
656 * EAFNOSUPPORT Address family not supported
657 * EADDRNOTAVAIL Address not available
658 */
659 int
660 in_pcbladdr(struct inpcb *inp, struct sockaddr *nam,
661 struct sockaddr_in **plocal_sin)
662 {
663 struct in_ifaddr *ia;
664 struct sockaddr_in *sin = (struct sockaddr_in *)nam;
665
666 if (nam->sa_len != sizeof (*sin))
667 return (EINVAL);
668 if (sin->sin_family != AF_INET)
669 return (EAFNOSUPPORT);
670 if (sin->sin_port == 0)
671 return (EADDRNOTAVAIL);
672
673 lck_mtx_lock(rt_mtx);
674
675 if (!TAILQ_EMPTY(&in_ifaddrhead)) {
676 /*
677 * If the destination address is INADDR_ANY,
678 * use the primary local address.
679 * If the supplied address is INADDR_BROADCAST,
680 * and the primary interface supports broadcast,
681 * choose the broadcast address for that interface.
682 */
683 #define satosin(sa) ((struct sockaddr_in *)(sa))
684 #define sintosa(sin) ((struct sockaddr *)(sin))
685 #define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
686 if (sin->sin_addr.s_addr == INADDR_ANY)
687 sin->sin_addr = IA_SIN(TAILQ_FIRST(&in_ifaddrhead))->sin_addr;
688 else if (sin->sin_addr.s_addr == (u_long)INADDR_BROADCAST &&
689 (TAILQ_FIRST(&in_ifaddrhead)->ia_ifp->if_flags & IFF_BROADCAST))
690 sin->sin_addr = satosin(&TAILQ_FIRST(&in_ifaddrhead)->ia_broadaddr)->sin_addr;
691 }
692 if (inp->inp_laddr.s_addr == INADDR_ANY) {
693 struct route *ro;
694
695 ia = (struct in_ifaddr *)0;
696 /*
697 * If route is known or can be allocated now,
698 * our src addr is taken from the i/f, else punt.
699 * Note that we should check the address family of the cached
700 * destination, in case of sharing the cache with IPv6.
701 */
702 ro = &inp->inp_route;
703 if (ro->ro_rt &&
704 (ro->ro_dst.sa_family != AF_INET ||
705 satosin(&ro->ro_dst)->sin_addr.s_addr !=
706 sin->sin_addr.s_addr ||
707 inp->inp_socket->so_options & SO_DONTROUTE ||
708 ro->ro_rt->generation_id != route_generation)) {
709 rtfree_locked(ro->ro_rt);
710 ro->ro_rt = (struct rtentry *)0;
711 }
712 if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/
713 (ro->ro_rt == (struct rtentry *)0 ||
714 ro->ro_rt->rt_ifp == 0)) {
715 /* No route yet, so try to acquire one */
716 bzero(&ro->ro_dst, sizeof(struct sockaddr_in));
717 ro->ro_dst.sa_family = AF_INET;
718 ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
719 ((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
720 sin->sin_addr;
721 #if CONFIG_FORCE_OUT_IFP
722 /* If the socket has requested a specific interface, use that address */
723 if (inp->pdp_ifp != NULL) {
724 pdp_context_route_locked(inp->pdp_ifp, ro);
725 }
726 else
727 #endif /* CONFIG_FORCE_OUT_IFP */
728 rtalloc_ign_locked(ro, 0UL);
729 }
730 /*
731 * If we found a route, use the address
732 * corresponding to the outgoing interface
733 * unless it is the loopback (in case a route
734 * to our address on another net goes to loopback).
735 */
736 if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) {
737 ia = ifatoia(ro->ro_rt->rt_ifa);
738 if (ia)
739 ifaref(&ia->ia_ifa);
740 }
741 if (ia == 0) {
742 u_short fport = sin->sin_port;
743
744 sin->sin_port = 0;
745 ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin)));
746 if (ia == 0) {
747 ia = ifatoia(ifa_ifwithnet(sintosa(sin)));
748 }
749 sin->sin_port = fport;
750 if (ia == 0) {
751 ia = TAILQ_FIRST(&in_ifaddrhead);
752 if (ia)
753 ifaref(&ia->ia_ifa);
754 }
755 if (ia == 0) {
756 lck_mtx_unlock(rt_mtx);
757 return (EADDRNOTAVAIL);
758 }
759 }
760 /*
761 * If the destination address is multicast and an outgoing
762 * interface has been set as a multicast option, use the
763 * address of that interface as our source address.
764 */
765 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
766 inp->inp_moptions != NULL) {
767 struct ip_moptions *imo;
768 struct ifnet *ifp;
769
770 imo = inp->inp_moptions;
771 if (imo->imo_multicast_ifp != NULL && (ia == NULL ||
772 ia->ia_ifp != imo->imo_multicast_ifp)) {
773 ifp = imo->imo_multicast_ifp;
774 if (ia)
775 ifafree(&ia->ia_ifa);
776 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link)
777 if (ia->ia_ifp == ifp)
778 break;
779 if (ia == 0) {
780 lck_mtx_unlock(rt_mtx);
781 return (EADDRNOTAVAIL);
782 }
783 ifaref(&ia->ia_ifa);
784 }
785 }
786 /*
787 * Don't do pcblookup call here; return interface in plocal_sin
788 * and exit to caller, that will do the lookup.
789 */
790 *plocal_sin = &ia->ia_addr;
791 ifafree(&ia->ia_ifa);
792 }
793 lck_mtx_unlock(rt_mtx);
794 return(0);
795 }
796
797 /*
798 * Outer subroutine:
799 * Connect from a socket to a specified address.
800 * Both address and port must be specified in argument sin.
801 * If don't have a local address for this socket yet,
802 * then pick one.
803 */
804 int
805 in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
806 {
807 struct sockaddr_in *ifaddr;
808 struct sockaddr_in *sin = (struct sockaddr_in *)nam;
809 struct inpcb *pcb;
810 int error;
811
812 /*
813 * Call inner routine, to assign local interface address.
814 */
815 if ((error = in_pcbladdr(inp, nam, &ifaddr)) != 0)
816 return(error);
817
818 socket_unlock(inp->inp_socket, 0);
819 pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
820 inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr->sin_addr,
821 inp->inp_lport, 0, NULL);
822 socket_lock(inp->inp_socket, 0);
823 if (pcb != NULL) {
824 in_pcb_checkstate(pcb, WNT_RELEASE, 0);
825 return (EADDRINUSE);
826 }
827 if (inp->inp_laddr.s_addr == INADDR_ANY) {
828 if (inp->inp_lport == 0) {
829 error = in_pcbbind(inp, (struct sockaddr *)0, p);
830 if (error)
831 return (error);
832 }
833 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) {
834 /*lock inversion issue, mostly with udp multicast packets */
835 socket_unlock(inp->inp_socket, 0);
836 lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx);
837 socket_lock(inp->inp_socket, 0);
838 }
839 inp->inp_laddr = ifaddr->sin_addr;
840 inp->inp_flags |= INP_INADDR_ANY;
841 }
842 else {
843 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) {
844 /*lock inversion issue, mostly with udp multicast packets */
845 socket_unlock(inp->inp_socket, 0);
846 lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx);
847 socket_lock(inp->inp_socket, 0);
848 }
849 }
850 inp->inp_faddr = sin->sin_addr;
851 inp->inp_fport = sin->sin_port;
852 in_pcbrehash(inp);
853 lck_rw_done(inp->inp_pcbinfo->mtx);
854 return (0);
855 }
856
857 void
858 in_pcbdisconnect(struct inpcb *inp)
859 {
860
861 inp->inp_faddr.s_addr = INADDR_ANY;
862 inp->inp_fport = 0;
863
864 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) {
865 /*lock inversion issue, mostly with udp multicast packets */
866 socket_unlock(inp->inp_socket, 0);
867 lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx);
868 socket_lock(inp->inp_socket, 0);
869 }
870
871 in_pcbrehash(inp);
872 lck_rw_done(inp->inp_pcbinfo->mtx);
873
874 if (inp->inp_socket->so_state & SS_NOFDREF)
875 in_pcbdetach(inp);
876 }
877
878 void
879 in_pcbdetach(struct inpcb *inp)
880 {
881 struct socket *so = inp->inp_socket;
882
883 if (so->so_pcb == 0) { /* we've been called twice */
884 panic("in_pcbdetach: inp=%p so=%p proto=%d so_pcb is null!\n",
885 inp, so, so->so_proto->pr_protocol);
886 }
887
888 #if IPSEC
889 if (ipsec_bypass == 0) {
890 ipsec4_delete_pcbpolicy(inp);
891 }
892 #endif /*IPSEC*/
893
894 /* mark socket state as dead */
895 if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING)
896 panic("in_pcbdetach so=%p prot=%x couldn't set to STOPUSING\n", so, so->so_proto->pr_protocol);
897
898 #if TEMPDEBUG
899 if (so->cached_in_sock_layer)
900 printf("in_pcbdetach for cached socket %x flags=%x\n", so, so->so_flags);
901 else
902 printf("in_pcbdetach for allocated socket %x flags=%x\n", so, so->so_flags);
903 #endif
904 if ((so->so_flags & SOF_PCBCLEARING) == 0) {
905 struct rtentry *rt;
906
907 inp->inp_vflag = 0;
908 if (inp->inp_options)
909 (void)m_free(inp->inp_options);
910 lck_mtx_lock(rt_mtx);
911 if ((rt = inp->inp_route.ro_rt) != NULL) {
912 inp->inp_route.ro_rt = NULL;
913 rtfree_locked(rt);
914 }
915 lck_mtx_unlock(rt_mtx);
916 ip_freemoptions(inp->inp_moptions);
917 inp->inp_moptions = NULL;
918 sofreelastref(so, 0);
919 inp->inp_state = INPCB_STATE_DEAD;
920 so->so_flags |= SOF_PCBCLEARING; /* makes sure we're not called twice from so_close */
921 }
922 }
923
924
925 void
926 in_pcbdispose(struct inpcb *inp)
927 {
928 struct socket *so = inp->inp_socket;
929 struct inpcbinfo *ipi = inp->inp_pcbinfo;
930
931 #if TEMPDEBUG
932 if (inp->inp_state != INPCB_STATE_DEAD) {
933 printf("in_pcbdispose: not dead yet? so=%p\n", so);
934 }
935 #endif
936
937 if (so && so->so_usecount != 0)
938 panic("in_pcbdispose: use count=%x so=%p\n", so->so_usecount, so);
939
940 lck_rw_assert(ipi->mtx, LCK_RW_ASSERT_EXCLUSIVE);
941
942 inp->inp_gencnt = ++ipi->ipi_gencnt;
943 /*### access ipi in in_pcbremlists */
944 in_pcbremlists(inp);
945
946 if (so) {
947 if (so->so_proto->pr_flags & PR_PCBLOCK) {
948 sofreelastref(so, 0);
949 if (so->so_rcv.sb_cc || so->so_snd.sb_cc) {
950 #if TEMPDEBUG
951 printf("in_pcbdispose sb not cleaned up so=%p rc_cci=%x snd_cc=%x\n",
952 so, so->so_rcv.sb_cc, so->so_snd.sb_cc);
953 #endif
954 sbrelease(&so->so_rcv);
955 sbrelease(&so->so_snd);
956 }
957 if (so->so_head != NULL)
958 panic("in_pcbdispose, so=%p head still exist\n", so);
959 lck_mtx_unlock(inp->inpcb_mtx);
960 lck_mtx_free(inp->inpcb_mtx, ipi->mtx_grp);
961 }
962 so->so_flags |= SOF_PCBCLEARING; /* makes sure we're not called twice from so_close */
963 so->so_saved_pcb = (caddr_t) inp;
964 so->so_pcb = 0;
965 inp->inp_socket = 0;
966 inp->reserved[0] = (u_int32_t)so;
967 #if CONFIG_MACF_NET
968 mac_inpcb_label_destroy(inp);
969 #endif
970 if (so->cached_in_sock_layer == 0) {
971 zfree(ipi->ipi_zone, inp);
972 }
973 sodealloc(so);
974 }
975 #if TEMPDEBUG
976 else
977 printf("in_pcbdispose: no socket for inp=%p\n", inp);
978 #endif
979 }
980
981 /*
982 * The calling convention of in_setsockaddr() and in_setpeeraddr() was
983 * modified to match the pru_sockaddr() and pru_peeraddr() entry points
984 * in struct pr_usrreqs, so that protocols can just reference then directly
985 * without the need for a wrapper function. The socket must have a valid
986 * (i.e., non-nil) PCB, but it should be impossible to get an invalid one
987 * except through a kernel programming error, so it is acceptable to panic
988 * (or in this case trap) if the PCB is invalid. (Actually, we don't trap
989 * because there actually /is/ a programming error somewhere... XXX)
990 *
991 * Returns: 0 Success
992 * ENOBUFS No buffer space available
993 * ECONNRESET Connection reset
994 */
995 int
996 in_setsockaddr(struct socket *so, struct sockaddr **nam)
997 {
998 struct inpcb *inp;
999 struct sockaddr_in *sin;
1000
1001 /*
1002 * Do the malloc first in case it blocks.
1003 */
1004 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_WAITOK);
1005 if (sin == NULL)
1006 return ENOBUFS;
1007 bzero(sin, sizeof *sin);
1008 sin->sin_family = AF_INET;
1009 sin->sin_len = sizeof(*sin);
1010
1011 inp = sotoinpcb(so);
1012 if (!inp) {
1013 FREE(sin, M_SONAME);
1014 return ECONNRESET;
1015 }
1016 sin->sin_port = inp->inp_lport;
1017 sin->sin_addr = inp->inp_laddr;
1018
1019 *nam = (struct sockaddr *)sin;
1020 return 0;
1021 }
1022
1023 int
1024 in_setpeeraddr(struct socket *so, struct sockaddr **nam)
1025 {
1026 struct inpcb *inp;
1027 struct sockaddr_in *sin;
1028
1029 /*
1030 * Do the malloc first in case it blocks.
1031 */
1032 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_WAITOK);
1033 if (sin == NULL)
1034 return ENOBUFS;
1035 bzero((caddr_t)sin, sizeof (*sin));
1036 sin->sin_family = AF_INET;
1037 sin->sin_len = sizeof(*sin);
1038
1039 inp = sotoinpcb(so);
1040 if (!inp) {
1041 FREE(sin, M_SONAME);
1042 return ECONNRESET;
1043 }
1044 sin->sin_port = inp->inp_fport;
1045 sin->sin_addr = inp->inp_faddr;
1046
1047 *nam = (struct sockaddr *)sin;
1048 return 0;
1049 }
1050
1051 void
1052 in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr,
1053 int errno, void (*notify)(struct inpcb *, int))
1054 {
1055 struct inpcb *inp;
1056
1057 lck_rw_lock_shared(pcbinfo->mtx);
1058
1059 LIST_FOREACH(inp, pcbinfo->listhead, inp_list) {
1060 #if INET6
1061 if ((inp->inp_vflag & INP_IPV4) == 0)
1062 continue;
1063 #endif
1064 if (inp->inp_faddr.s_addr != faddr.s_addr ||
1065 inp->inp_socket == NULL)
1066 continue;
1067 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING)
1068 continue;
1069 socket_lock(inp->inp_socket, 1);
1070 (*notify)(inp, errno);
1071 (void)in_pcb_checkstate(inp, WNT_RELEASE, 1);
1072 socket_unlock(inp->inp_socket, 1);
1073 }
1074 lck_rw_done(pcbinfo->mtx);
1075 }
1076
1077 void
1078 in_pcbpurgeif0(
1079 struct inpcb *head,
1080 struct ifnet *ifp)
1081 {
1082 struct inpcb *inp;
1083 struct ip_moptions *imo;
1084 int i, gap;
1085
1086 for (inp = head; inp != NULL; inp = LIST_NEXT(inp, inp_list)) {
1087 imo = inp->inp_moptions;
1088 if ((inp->inp_vflag & INP_IPV4) &&
1089 imo != NULL) {
1090 /*
1091 * Unselect the outgoing interface if it is being
1092 * detached.
1093 */
1094 if (imo->imo_multicast_ifp == ifp)
1095 imo->imo_multicast_ifp = NULL;
1096
1097 /*
1098 * Drop multicast group membership if we joined
1099 * through the interface being detached.
1100 */
1101 for (i = 0, gap = 0; i < imo->imo_num_memberships;
1102 i++) {
1103 if (imo->imo_membership[i]->inm_ifp == ifp) {
1104 in_delmulti(&imo->imo_membership[i]);
1105 gap++;
1106 } else if (gap != 0)
1107 imo->imo_membership[i - gap] =
1108 imo->imo_membership[i];
1109 }
1110 imo->imo_num_memberships -= gap;
1111 }
1112 }
1113 }
1114
1115 /*
1116 * Check for alternatives when higher level complains
1117 * about service problems. For now, invalidate cached
1118 * routing information. If the route was created dynamically
1119 * (by a redirect), time to try a default gateway again.
1120 */
1121 void
1122 in_losing(struct inpcb *inp)
1123 {
1124 struct rtentry *rt;
1125 struct rt_addrinfo info;
1126
1127 if ((rt = inp->inp_route.ro_rt)) {
1128 lck_mtx_lock(rt_mtx);
1129 /* Check again, this time while holding the lock */
1130 if ((rt = inp->inp_route.ro_rt) == NULL) {
1131 lck_mtx_unlock(rt_mtx);
1132 return;
1133 }
1134 bzero((caddr_t)&info, sizeof(info));
1135 info.rti_info[RTAX_DST] =
1136 (struct sockaddr *)&inp->inp_route.ro_dst;
1137 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1138 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1139 rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
1140 if (rt->rt_flags & RTF_DYNAMIC)
1141 (void) rtrequest_locked(RTM_DELETE, rt_key(rt),
1142 rt->rt_gateway, rt_mask(rt), rt->rt_flags,
1143 (struct rtentry **)0);
1144 /* if the address is gone keep the old route in the pcb */
1145 if ((ifa_foraddr(inp->inp_laddr.s_addr)) != 0) {
1146 inp->inp_route.ro_rt = 0;
1147 rtfree_locked(rt);
1148 }
1149 lck_mtx_unlock(rt_mtx);
1150 /*
1151 * A new route can be allocated
1152 * the next time output is attempted.
1153 */
1154 }
1155 }
1156
1157 /*
1158 * After a routing change, flush old routing
1159 * and allocate a (hopefully) better one.
1160 */
1161 void
1162 in_rtchange(struct inpcb *inp, __unused int errno)
1163 {
1164 struct rtentry *rt;
1165
1166 if ((rt = inp->inp_route.ro_rt) != NULL) {
1167 if ((ifa_foraddr(inp->inp_laddr.s_addr)) == 0)
1168 return; /* we can't remove the route now. not sure if still ok to use src */
1169 lck_mtx_lock(rt_mtx);
1170 /* Check again, this time while holding the lock */
1171 if ((rt = inp->inp_route.ro_rt) == NULL) {
1172 lck_mtx_unlock(rt_mtx);
1173 return;
1174 }
1175 rtfree_locked(rt);
1176 inp->inp_route.ro_rt = NULL;
1177 lck_mtx_unlock(rt_mtx);
1178 /*
1179 * A new route can be allocated the next time
1180 * output is attempted.
1181 */
1182 }
1183 }
1184
1185 /*
1186 * Lookup a PCB based on the local address and port.
1187 */
1188 struct inpcb *
1189 in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
1190 unsigned int lport_arg, int wild_okay)
1191 {
1192 struct inpcb *inp;
1193 int matchwild = 3, wildcard;
1194 u_short lport = lport_arg;
1195
1196 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0,0,0,0,0);
1197
1198 if (!wild_okay) {
1199 struct inpcbhead *head;
1200 /*
1201 * Look for an unconnected (wildcard foreign addr) PCB that
1202 * matches the local address and port we're looking for.
1203 */
1204 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
1205 LIST_FOREACH(inp, head, inp_hash) {
1206 #if INET6
1207 if ((inp->inp_vflag & INP_IPV4) == 0)
1208 continue;
1209 #endif
1210 if (inp->inp_faddr.s_addr == INADDR_ANY &&
1211 inp->inp_laddr.s_addr == laddr.s_addr &&
1212 inp->inp_lport == lport) {
1213 /*
1214 * Found.
1215 */
1216 return (inp);
1217 }
1218 }
1219 /*
1220 * Not found.
1221 */
1222 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0,0,0,0,0);
1223 return (NULL);
1224 } else {
1225 struct inpcbporthead *porthash;
1226 struct inpcbport *phd;
1227 struct inpcb *match = NULL;
1228 /*
1229 * Best fit PCB lookup.
1230 *
1231 * First see if this local port is in use by looking on the
1232 * port hash list.
1233 */
1234 porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport,
1235 pcbinfo->porthashmask)];
1236 LIST_FOREACH(phd, porthash, phd_hash) {
1237 if (phd->phd_port == lport)
1238 break;
1239 }
1240 if (phd != NULL) {
1241 /*
1242 * Port is in use by one or more PCBs. Look for best
1243 * fit.
1244 */
1245 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
1246 wildcard = 0;
1247 #if INET6
1248 if ((inp->inp_vflag & INP_IPV4) == 0)
1249 continue;
1250 #endif
1251 if (inp->inp_faddr.s_addr != INADDR_ANY)
1252 wildcard++;
1253 if (inp->inp_laddr.s_addr != INADDR_ANY) {
1254 if (laddr.s_addr == INADDR_ANY)
1255 wildcard++;
1256 else if (inp->inp_laddr.s_addr != laddr.s_addr)
1257 continue;
1258 } else {
1259 if (laddr.s_addr != INADDR_ANY)
1260 wildcard++;
1261 }
1262 if (wildcard < matchwild) {
1263 match = inp;
1264 matchwild = wildcard;
1265 if (matchwild == 0) {
1266 break;
1267 }
1268 }
1269 }
1270 }
1271 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match,0,0,0,0);
1272 return (match);
1273 }
1274 }
1275
1276 /*
1277 * Lookup PCB in hash list.
1278 */
1279 struct inpcb *
1280 in_pcblookup_hash(
1281 struct inpcbinfo *pcbinfo,
1282 struct in_addr faddr,
1283 u_int fport_arg,
1284 struct in_addr laddr,
1285 u_int lport_arg,
1286 int wildcard,
1287 __unused struct ifnet *ifp)
1288 {
1289 struct inpcbhead *head;
1290 struct inpcb *inp;
1291 u_short fport = fport_arg, lport = lport_arg;
1292
1293 /*
1294 * We may have found the pcb in the last lookup - check this first.
1295 */
1296
1297 lck_rw_lock_shared(pcbinfo->mtx);
1298
1299 /*
1300 * First look for an exact match.
1301 */
1302 head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)];
1303 LIST_FOREACH(inp, head, inp_hash) {
1304 #if INET6
1305 if ((inp->inp_vflag & INP_IPV4) == 0)
1306 continue;
1307 #endif
1308 if (inp->inp_faddr.s_addr == faddr.s_addr &&
1309 inp->inp_laddr.s_addr == laddr.s_addr &&
1310 inp->inp_fport == fport &&
1311 inp->inp_lport == lport) {
1312 /*
1313 * Found.
1314 */
1315 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1316 lck_rw_done(pcbinfo->mtx);
1317 return (inp);
1318 }
1319 else { /* it's there but dead, say it isn't found */
1320 lck_rw_done(pcbinfo->mtx);
1321 return(NULL);
1322 }
1323 }
1324 }
1325 if (wildcard) {
1326 struct inpcb *local_wild = NULL;
1327 #if INET6
1328 struct inpcb *local_wild_mapped = NULL;
1329 #endif
1330
1331 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
1332 LIST_FOREACH(inp, head, inp_hash) {
1333 #if INET6
1334 if ((inp->inp_vflag & INP_IPV4) == 0)
1335 continue;
1336 #endif
1337 if (inp->inp_faddr.s_addr == INADDR_ANY &&
1338 inp->inp_lport == lport) {
1339 #if defined(NFAITH) && NFAITH > 0
1340 if (ifp && ifp->if_type == IFT_FAITH &&
1341 (inp->inp_flags & INP_FAITH) == 0)
1342 continue;
1343 #endif
1344 if (inp->inp_laddr.s_addr == laddr.s_addr) {
1345 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1346 lck_rw_done(pcbinfo->mtx);
1347 return (inp);
1348 }
1349 else { /* it's there but dead, say it isn't found */
1350 lck_rw_done(pcbinfo->mtx);
1351 return(NULL);
1352 }
1353 }
1354 else if (inp->inp_laddr.s_addr == INADDR_ANY) {
1355 #if INET6
1356 if (INP_CHECK_SOCKAF(inp->inp_socket,
1357 AF_INET6))
1358 local_wild_mapped = inp;
1359 else
1360 #endif /* INET6 */
1361 local_wild = inp;
1362 }
1363 }
1364 }
1365 if (local_wild == NULL) {
1366 #if INET6
1367 if (local_wild_mapped != NULL) {
1368 if (in_pcb_checkstate(local_wild_mapped, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1369 lck_rw_done(pcbinfo->mtx);
1370 return (local_wild_mapped);
1371 }
1372 else { /* it's there but dead, say it isn't found */
1373 lck_rw_done(pcbinfo->mtx);
1374 return(NULL);
1375 }
1376 }
1377 #endif /* INET6 */
1378 lck_rw_done(pcbinfo->mtx);
1379 return (NULL);
1380 }
1381 if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1382 lck_rw_done(pcbinfo->mtx);
1383 return (local_wild);
1384 }
1385 else { /* it's there but dead, say it isn't found */
1386 lck_rw_done(pcbinfo->mtx);
1387 return(NULL);
1388 }
1389 }
1390
1391 /*
1392 * Not found.
1393 */
1394 lck_rw_done(pcbinfo->mtx);
1395 return (NULL);
1396 }
1397
1398 /*
1399 * Insert PCB onto various hash lists.
1400 */
1401 int
1402 in_pcbinshash(struct inpcb *inp, int locked)
1403 {
1404 struct inpcbhead *pcbhash;
1405 struct inpcbporthead *pcbporthash;
1406 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
1407 struct inpcbport *phd;
1408 u_int32_t hashkey_faddr;
1409
1410 #if INET6
1411 if (inp->inp_vflag & INP_IPV6)
1412 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
1413 else
1414 #endif /* INET6 */
1415 hashkey_faddr = inp->inp_faddr.s_addr;
1416
1417 inp->hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport, inp->inp_fport, pcbinfo->hashmask);
1418
1419 if (!locked) {
1420 if (!lck_rw_try_lock_exclusive(pcbinfo->mtx)) {
1421 /*lock inversion issue, mostly with udp multicast packets */
1422 socket_unlock(inp->inp_socket, 0);
1423 lck_rw_lock_exclusive(pcbinfo->mtx);
1424 socket_lock(inp->inp_socket, 0);
1425 }
1426 }
1427
1428 pcbhash = &pcbinfo->hashbase[inp->hash_element];
1429
1430 pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport,
1431 pcbinfo->porthashmask)];
1432
1433 /*
1434 * Go through port list and look for a head for this lport.
1435 */
1436 LIST_FOREACH(phd, pcbporthash, phd_hash) {
1437 if (phd->phd_port == inp->inp_lport)
1438 break;
1439 }
1440 /*
1441 * If none exists, malloc one and tack it on.
1442 */
1443 if (phd == NULL) {
1444 MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_WAITOK);
1445 if (phd == NULL) {
1446 if (!locked)
1447 lck_rw_done(pcbinfo->mtx);
1448 return (ENOBUFS); /* XXX */
1449 }
1450 phd->phd_port = inp->inp_lport;
1451 LIST_INIT(&phd->phd_pcblist);
1452 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
1453 }
1454 inp->inp_phd = phd;
1455 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
1456 LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
1457 if (!locked)
1458 lck_rw_done(pcbinfo->mtx);
1459 return (0);
1460 }
1461
1462 /*
1463 * Move PCB to the proper hash bucket when { faddr, fport } have been
1464 * changed. NOTE: This does not handle the case of the lport changing (the
1465 * hashed port list would have to be updated as well), so the lport must
1466 * not change after in_pcbinshash() has been called.
1467 */
1468 void
1469 in_pcbrehash(struct inpcb *inp)
1470 {
1471 struct inpcbhead *head;
1472 u_int32_t hashkey_faddr;
1473
1474 #if INET6
1475 if (inp->inp_vflag & INP_IPV6)
1476 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
1477 else
1478 #endif /* INET6 */
1479 hashkey_faddr = inp->inp_faddr.s_addr;
1480 inp->hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
1481 inp->inp_fport, inp->inp_pcbinfo->hashmask);
1482 head = &inp->inp_pcbinfo->hashbase[inp->hash_element];
1483
1484 LIST_REMOVE(inp, inp_hash);
1485 LIST_INSERT_HEAD(head, inp, inp_hash);
1486 }
1487
1488 /*
1489 * Remove PCB from various lists.
1490 */
1491 //###LOCK must be called with list lock held
1492 void
1493 in_pcbremlists(struct inpcb *inp)
1494 {
1495 inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
1496
1497 if (inp->inp_lport) {
1498 struct inpcbport *phd = inp->inp_phd;
1499
1500 LIST_REMOVE(inp, inp_hash);
1501 LIST_REMOVE(inp, inp_portlist);
1502 if (phd != NULL && (LIST_FIRST(&phd->phd_pcblist) == NULL)) {
1503 LIST_REMOVE(phd, phd_hash);
1504 FREE(phd, M_PCB);
1505 }
1506 }
1507 LIST_REMOVE(inp, inp_list);
1508 inp->inp_pcbinfo->ipi_count--;
1509 }
1510
1511 static void in_pcb_detach_port( struct inpcb *inp);
1512 int
1513 in_pcb_grab_port (struct inpcbinfo *pcbinfo,
1514 u_short options,
1515 struct in_addr laddr,
1516 u_short *lport,
1517 struct in_addr faddr,
1518 u_short fport,
1519 u_int cookie,
1520 u_char owner_id)
1521 {
1522 struct inpcb *inp, *pcb;
1523 struct sockaddr_in sin;
1524 struct proc *p = current_proc();
1525 int stat;
1526
1527
1528 pcbinfo->nat_dummy_socket.so_pcb = 0;
1529 pcbinfo->nat_dummy_socket.so_options = 0;
1530 if (*lport) {
1531 /* The grabber wants a particular port */
1532
1533 if (faddr.s_addr || fport) {
1534 /*
1535 * This is either the second half of an active connect, or
1536 * it's from the acceptance of an incoming connection.
1537 */
1538 if (laddr.s_addr == 0) {
1539 pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb;
1540 return EINVAL;
1541 }
1542
1543 inp = in_pcblookup_hash(pcbinfo, faddr, fport, laddr, *lport, 0, NULL);
1544 if (inp) {
1545 /* pcb was found, its count was upped. need to decrease it here */
1546 in_pcb_checkstate(inp, WNT_RELEASE, 0);
1547 if (!(IN_MULTICAST(ntohl(laddr.s_addr)))) {
1548 pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb;
1549 return (EADDRINUSE);
1550 }
1551 }
1552
1553 stat = in_pcballoc(&pcbinfo->nat_dummy_socket, pcbinfo, p);
1554 if (stat) {
1555 pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb;
1556 return stat;
1557 }
1558 pcb = sotoinpcb(&pcbinfo->nat_dummy_socket);
1559 pcb->inp_vflag |= INP_IPV4;
1560
1561 pcb->inp_lport = *lport;
1562 pcb->inp_laddr.s_addr = laddr.s_addr;
1563
1564 pcb->inp_faddr = faddr;
1565 pcb->inp_fport = fport;
1566
1567 lck_rw_lock_exclusive(pcbinfo->mtx);
1568 in_pcbinshash(pcb, 1);
1569 lck_rw_done(pcbinfo->mtx);
1570 }
1571 else {
1572 /*
1573 * This is either a bind for a passive socket, or it's the
1574 * first part of bind-connect sequence (not likely since an
1575 * ephemeral port is usually used in this case). Or, it's
1576 * the result of a connection acceptance when the foreign
1577 * address/port cannot be provided (which requires the SO_REUSEADDR
1578 * flag if laddr is not multicast).
1579 */
1580
1581 stat = in_pcballoc(&pcbinfo->nat_dummy_socket, pcbinfo, p);
1582 if (stat) {
1583 pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb;
1584 return stat;
1585 }
1586 pcb = sotoinpcb(&pcbinfo->nat_dummy_socket);
1587 pcb->inp_vflag |= INP_IPV4;
1588
1589 pcbinfo->nat_dummy_socket.so_options = options;
1590 bzero(&sin, sizeof(struct sockaddr_in));
1591 sin.sin_len = sizeof(struct sockaddr_in);
1592 sin.sin_family = AF_INET;
1593 sin.sin_addr.s_addr = laddr.s_addr;
1594 sin.sin_port = *lport;
1595
1596 socket_lock(&pcbinfo->nat_dummy_socket, 1);
1597 stat = in_pcbbind((struct inpcb *) pcbinfo->nat_dummy_socket.so_pcb,
1598 (struct sockaddr *) &sin, p);
1599 if (stat) {
1600 socket_unlock(&pcbinfo->nat_dummy_socket, 1); /*detach first */
1601 in_pcb_detach_port(pcb); /* will restore dummy pcb */
1602 return stat;
1603 }
1604 socket_unlock(&pcbinfo->nat_dummy_socket, 1);
1605 }
1606 }
1607 else {
1608 /* The grabber wants an ephemeral port */
1609
1610 stat = in_pcballoc(&pcbinfo->nat_dummy_socket, pcbinfo, p);
1611 if (stat) {
1612 pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb;
1613 return stat;
1614 }
1615 pcb = sotoinpcb(&pcbinfo->nat_dummy_socket);
1616 pcb->inp_vflag |= INP_IPV4;
1617
1618 bzero(&sin, sizeof(struct sockaddr_in));
1619 sin.sin_len = sizeof(struct sockaddr_in);
1620 sin.sin_family = AF_INET;
1621 sin.sin_addr.s_addr = laddr.s_addr;
1622 sin.sin_port = 0;
1623
1624 if (faddr.s_addr || fport) {
1625 /*
1626 * Not sure if this case will be used - could occur when connect
1627 * is called, skipping the bind.
1628 */
1629
1630 if (laddr.s_addr == 0) {
1631 in_pcb_detach_port(pcb); /* restores dummy pcb */
1632 return EINVAL;
1633 }
1634
1635 socket_lock(&pcbinfo->nat_dummy_socket, 1);
1636 stat = in_pcbbind((struct inpcb *) pcbinfo->nat_dummy_socket.so_pcb,
1637 (struct sockaddr *) &sin, p);
1638 if (stat) {
1639 socket_unlock(&pcbinfo->nat_dummy_socket, 1);
1640 in_pcb_detach_port(pcb); /* restores dummy pcb */
1641 return stat;
1642 }
1643
1644 socket_unlock(&pcbinfo->nat_dummy_socket, 1);
1645 inp = in_pcblookup_hash(pcbinfo, faddr, fport,
1646 pcb->inp_laddr, pcb->inp_lport, 0, NULL);
1647 if (inp) {
1648 /* pcb was found, its count was upped. need to decrease it here */
1649 in_pcb_checkstate(inp, WNT_RELEASE, 0);
1650 in_pcb_detach_port(pcb);
1651 return (EADDRINUSE);
1652 }
1653
1654 lck_rw_lock_exclusive(pcbinfo->mtx);
1655 pcb->inp_faddr = faddr;
1656 pcb->inp_fport = fport;
1657 in_pcbrehash(pcb);
1658 lck_rw_done(pcbinfo->mtx);
1659 }
1660 else {
1661 /*
1662 * This is a simple bind of an ephemeral port. The local addr
1663 * may or may not be defined.
1664 */
1665
1666 socket_lock(&pcbinfo->nat_dummy_socket, 1);
1667 stat = in_pcbbind((struct inpcb *) pcbinfo->nat_dummy_socket.so_pcb,
1668 (struct sockaddr *) &sin, p);
1669 if (stat) {
1670 socket_unlock(&pcbinfo->nat_dummy_socket, 1);
1671 in_pcb_detach_port(pcb);
1672 return stat;
1673 }
1674 socket_unlock(&pcbinfo->nat_dummy_socket, 1);
1675 }
1676 *lport = pcb->inp_lport;
1677 }
1678
1679
1680 pcb->nat_owner = owner_id;
1681 pcb->nat_cookie = cookie;
1682 pcb->inp_ppcb = (caddr_t) pcbinfo->dummy_cb;
1683 pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb; /* restores dummypcb */
1684 return 0;
1685 }
1686
1687 /* 3962035 - in_pcb_letgo_port needs a special case function for detaching */
1688 static void
1689 in_pcb_detach_port(
1690 struct inpcb *inp)
1691 {
1692 struct socket *so = inp->inp_socket;
1693 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
1694
1695 if (so != &pcbinfo->nat_dummy_socket)
1696 panic("in_pcb_detach_port: not a dummy_sock: so=%p, inp=%p\n", so, inp);
1697 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
1698 /*### access ipi in in_pcbremlists */
1699 in_pcbremlists(inp);
1700
1701 inp->inp_socket = 0;
1702 inp->reserved[0] = (u_int32_t) so;
1703 zfree(pcbinfo->ipi_zone, inp);
1704 pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb; /* restores dummypcb */
1705 }
1706
1707 int
1708 in_pcb_letgo_port(struct inpcbinfo *pcbinfo, struct in_addr laddr, u_short lport,
1709 struct in_addr faddr, u_short fport, u_char owner_id)
1710 {
1711 struct inpcbhead *head;
1712 struct inpcb *inp;
1713
1714 /*
1715 * First look for an exact match.
1716 */
1717
1718 lck_rw_lock_exclusive(pcbinfo->mtx);
1719 head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)];
1720 for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) {
1721 if (inp->inp_faddr.s_addr == faddr.s_addr &&
1722 inp->inp_laddr.s_addr == laddr.s_addr &&
1723 inp->inp_fport == fport &&
1724 inp->inp_lport == lport &&
1725 inp->nat_owner == owner_id) {
1726 /*
1727 * Found.
1728 */
1729 in_pcb_detach_port(inp);
1730 lck_rw_done(pcbinfo->mtx);
1731 return 0;
1732 }
1733 }
1734
1735 lck_rw_done(pcbinfo->mtx);
1736 return ENOENT;
1737 }
1738
1739 u_char
1740 in_pcb_get_owner(struct inpcbinfo *pcbinfo,
1741 struct in_addr laddr, u_short lport,
1742 struct in_addr faddr, u_short fport,
1743 u_int *cookie)
1744
1745 {
1746 struct inpcb *inp;
1747 u_char owner_id = INPCB_NO_OWNER;
1748 struct inpcbport *phd;
1749 struct inpcbporthead *porthash;
1750
1751
1752 if (IN_MULTICAST(laddr.s_addr)) {
1753 /*
1754 * Walk through PCB's looking for registered
1755 * owners.
1756 */
1757
1758 lck_rw_lock_shared(pcbinfo->mtx);
1759 porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport,
1760 pcbinfo->porthashmask)];
1761 for (phd = porthash->lh_first; phd != NULL; phd = phd->phd_hash.le_next) {
1762 if (phd->phd_port == lport)
1763 break;
1764 }
1765
1766 if (phd == 0) {
1767 lck_rw_done(pcbinfo->mtx);
1768 return INPCB_NO_OWNER;
1769 }
1770
1771 owner_id = INPCB_NO_OWNER;
1772 for (inp = phd->phd_pcblist.lh_first; inp != NULL;
1773 inp = inp->inp_portlist.le_next) {
1774
1775 if (inp->inp_laddr.s_addr == laddr.s_addr) {
1776 if (inp->nat_owner == 0)
1777 owner_id |= INPCB_OWNED_BY_X;
1778 else
1779 owner_id |= inp->nat_owner;
1780 }
1781 }
1782
1783 lck_rw_done(pcbinfo->mtx);
1784 return owner_id;
1785 }
1786 else {
1787 inp = in_pcblookup_hash(pcbinfo, faddr, fport,
1788 laddr, lport, 1, NULL);
1789 if (inp) {
1790 /* pcb was found, its count was upped. need to decrease it here */
1791 /* if we found it, that pcb is already locked by the caller */
1792 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING)
1793 return(INPCB_NO_OWNER);
1794
1795 if (inp->nat_owner) {
1796 owner_id = inp->nat_owner;
1797 *cookie = inp->nat_cookie;
1798 }
1799 else {
1800 owner_id = INPCB_OWNED_BY_X;
1801 }
1802 }
1803 else
1804 owner_id = INPCB_NO_OWNER;
1805
1806 return owner_id;
1807 }
1808 }
1809
1810 int
1811 in_pcb_new_share_client(struct inpcbinfo *pcbinfo, u_char *owner_id)
1812 {
1813
1814 int i;
1815
1816
1817 for (i=0; i < INPCB_MAX_IDS; i++) {
1818 if ((pcbinfo->all_owners & (1 << i)) == 0) {
1819 pcbinfo->all_owners |= (1 << i);
1820 *owner_id = (1 << i);
1821 return 0;
1822 }
1823 }
1824
1825 return ENOSPC;
1826 }
1827
1828 int
1829 in_pcb_rem_share_client(struct inpcbinfo *pcbinfo, u_char owner_id)
1830 {
1831 struct inpcb *inp;
1832
1833
1834 lck_rw_lock_exclusive(pcbinfo->mtx);
1835 if (pcbinfo->all_owners & owner_id) {
1836 pcbinfo->all_owners &= ~owner_id;
1837 for (inp = pcbinfo->listhead->lh_first; inp != NULL; inp = inp->inp_list.le_next) {
1838 if (inp->nat_owner & owner_id) {
1839 if (inp->nat_owner == owner_id)
1840 /*
1841 * Deallocate the pcb
1842 */
1843 in_pcb_detach_port(inp);
1844 else
1845 inp->nat_owner &= ~owner_id;
1846 }
1847 }
1848 }
1849 else {
1850 lck_rw_done(pcbinfo->mtx);
1851 return ENOENT;
1852 }
1853
1854 lck_rw_done(pcbinfo->mtx);
1855 return 0;
1856 }
1857
1858
1859
1860 void in_pcb_nat_init(struct inpcbinfo *pcbinfo, int afamily,
1861 int pfamily, int protocol)
1862 {
1863 int stat;
1864 struct proc *p = current_proc();
1865
1866 bzero(&pcbinfo->nat_dummy_socket, sizeof(struct socket));
1867 #if CONFIG_MACF_NET
1868 mac_socket_label_init(&pcbinfo->nat_dummy_socket, M_WAITOK);
1869 #endif
1870 pcbinfo->nat_dummy_socket.so_proto = pffindproto_locked(afamily, pfamily, protocol);
1871 pcbinfo->all_owners = 0;
1872 stat = in_pcballoc(&pcbinfo->nat_dummy_socket, pcbinfo, p);
1873 if (stat)
1874 panic("in_pcb_nat_init: can't alloc fakepcb err=%d\n", stat);
1875 pcbinfo->nat_dummy_pcb = (struct inpcb *)pcbinfo->nat_dummy_socket.so_pcb;
1876 }
1877
1878 /* Mechanism used to defer the memory release of PCBs
1879 * The pcb list will contain the pcb until the ripper can clean it up if
1880 * the following conditions are met: 1) state "DEAD", 2) wantcnt is STOPUSING
1881 * 3) usecount is null
1882 * This function will be called to either mark the pcb as
1883 */
1884 int
1885 in_pcb_checkstate(struct inpcb *pcb, int mode, int locked)
1886 {
1887
1888 volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt;
1889 UInt32 origwant;
1890 UInt32 newwant;
1891
1892 switch (mode) {
1893
1894 case WNT_STOPUSING: /* try to mark the pcb as ready for recycling */
1895
1896 /* compareswap with STOPUSING, if success we're good, if it's in use, will be marked later */
1897
1898 if (locked == 0)
1899 socket_lock(pcb->inp_socket, 1);
1900 pcb->inp_state = INPCB_STATE_DEAD;
1901 stopusing:
1902 if (pcb->inp_socket->so_usecount < 0)
1903 panic("in_pcb_checkstate STOP pcb=%p so=%p usecount is negative\n", pcb, pcb->inp_socket);
1904 if (locked == 0)
1905 socket_unlock(pcb->inp_socket, 1);
1906
1907 origwant = *wantcnt;
1908 if ((UInt16) origwant == 0xffff ) /* should stop using */
1909 return (WNT_STOPUSING);
1910 newwant = 0xffff;
1911 if ((UInt16) origwant == 0) {/* try to mark it as unsuable now */
1912 OSCompareAndSwap(origwant, newwant, wantcnt) ;
1913 }
1914 return (WNT_STOPUSING);
1915 break;
1916
1917 case WNT_ACQUIRE: /* try to increase reference to pcb */
1918 /* if WNT_STOPUSING should bail out */
1919 /*
1920 * if socket state DEAD, try to set count to STOPUSING, return failed
1921 * otherwise increase cnt
1922 */
1923 do {
1924 origwant = *wantcnt;
1925 if ((UInt16) origwant == 0xffff ) {/* should stop using */
1926 // printf("in_pcb_checkstate: ACQ PCB was STOPUSING while release. odd pcb=%p\n", pcb);
1927 return (WNT_STOPUSING);
1928 }
1929 newwant = origwant + 1;
1930 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
1931 return (WNT_ACQUIRE);
1932 break;
1933
1934 case WNT_RELEASE: /* release reference. if result is null and pcb state is DEAD,
1935 set wanted bit to STOPUSING
1936 */
1937
1938 if (locked == 0)
1939 socket_lock(pcb->inp_socket, 1);
1940
1941 do {
1942 origwant = *wantcnt;
1943 if ((UInt16) origwant == 0x0 )
1944 panic("in_pcb_checkstate pcb=%p release with zero count", pcb);
1945 if ((UInt16) origwant == 0xffff ) {/* should stop using */
1946 #if TEMPDEBUG
1947 printf("in_pcb_checkstate: REL PCB was STOPUSING while release. odd pcb=%p\n", pcb);
1948 #endif
1949 if (locked == 0)
1950 socket_unlock(pcb->inp_socket, 1);
1951 return (WNT_STOPUSING);
1952 }
1953 newwant = origwant - 1;
1954 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
1955
1956 if (pcb->inp_state == INPCB_STATE_DEAD)
1957 goto stopusing;
1958 if (pcb->inp_socket->so_usecount < 0)
1959 panic("in_pcb_checkstate RELEASE pcb=%p so=%p usecount is negative\n", pcb, pcb->inp_socket);
1960
1961 if (locked == 0)
1962 socket_unlock(pcb->inp_socket, 1);
1963 return (WNT_RELEASE);
1964 break;
1965
1966 default:
1967
1968 panic("in_pcb_checkstate: so=%p not a valid state =%x\n", pcb->inp_socket, mode);
1969 }
1970
1971 /* NOTREACHED */
1972 return (mode);
1973 }
1974
1975 /*
1976 * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat.
1977 * The inpcb_compat data structure is passed to user space and must
1978 * not change. We intentionally avoid copying pointers. The socket is
1979 * the one exception, though we probably shouldn't copy that either.
1980 */
1981 void
1982 inpcb_to_compat(
1983 struct inpcb *inp,
1984 struct inpcb_compat *inp_compat)
1985 {
1986 bzero(inp_compat, sizeof(*inp_compat));
1987 inp_compat->inp_fport = inp->inp_fport;
1988 inp_compat->inp_lport = inp->inp_lport;
1989 inp_compat->inp_socket = inp->inp_socket;
1990 inp_compat->nat_owner = inp->nat_owner;
1991 inp_compat->nat_cookie = inp->nat_cookie;
1992 inp_compat->inp_gencnt = inp->inp_gencnt;
1993 inp_compat->inp_flags = inp->inp_flags;
1994 inp_compat->inp_flow = inp->inp_flow;
1995 inp_compat->inp_vflag = inp->inp_vflag;
1996 inp_compat->inp_ip_ttl = inp->inp_ip_ttl;
1997 inp_compat->inp_ip_p = inp->inp_ip_p;
1998 inp_compat->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign;
1999 inp_compat->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local;
2000 inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
2001 inp_compat->inp_depend6.inp6_hlim = inp->inp_depend6.inp6_hlim;
2002 inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
2003 inp_compat->inp6_ifindex = inp->inp6_ifindex;
2004 inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
2005 }
2006