]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/in_pcb.c
xnu-792.2.4.tar.gz
[apple/xnu.git] / bsd / netinet / in_pcb.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*
23 * Copyright (c) 1982, 1986, 1991, 1993, 1995
24 * The Regents of the University of California. All rights reserved.
25 *
26 * Redistribution and use in source and binary forms, with or without
27 * modification, are permitted provided that the following conditions
28 * are met:
29 * 1. Redistributions of source code must retain the above copyright
30 * notice, this list of conditions and the following disclaimer.
31 * 2. Redistributions in binary form must reproduce the above copyright
32 * notice, this list of conditions and the following disclaimer in the
33 * documentation and/or other materials provided with the distribution.
34 * 3. All advertising materials mentioning features or use of this software
35 * must display the following acknowledgement:
36 * This product includes software developed by the University of
37 * California, Berkeley and its contributors.
38 * 4. Neither the name of the University nor the names of its contributors
39 * may be used to endorse or promote products derived from this software
40 * without specific prior written permission.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * SUCH DAMAGE.
53 *
54 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95
55 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $
56 */
57
58 #include <sys/param.h>
59 #include <sys/systm.h>
60 #include <sys/malloc.h>
61 #include <sys/mbuf.h>
62 #include <sys/domain.h>
63 #include <sys/protosw.h>
64 #include <sys/socket.h>
65 #include <sys/socketvar.h>
66 #include <sys/proc.h>
67 #ifndef __APPLE__
68 #include <sys/jail.h>
69 #endif
70 #include <sys/kernel.h>
71 #include <sys/sysctl.h>
72 #include <libkern/OSAtomic.h>
73
74 #include <machine/limits.h>
75
76 #ifdef __APPLE__
77 #include <kern/zalloc.h>
78 #endif
79
80 #include <net/if.h>
81 #include <net/if_types.h>
82 #include <net/route.h>
83
84 #include <netinet/in.h>
85 #include <netinet/in_pcb.h>
86 #include <netinet/in_var.h>
87 #include <netinet/ip_var.h>
88 #if INET6
89 #include <netinet/ip6.h>
90 #include <netinet6/ip6_var.h>
91 #endif /* INET6 */
92
93 #include "faith.h"
94
95 #if IPSEC
96 #include <netinet6/ipsec.h>
97 #include <netkey/key.h>
98 #endif /* IPSEC */
99
100 #include <sys/kdebug.h>
101
102 #if IPSEC
103 extern int ipsec_bypass;
104 extern lck_mtx_t *sadb_mutex;
105 #endif
106
107 extern u_long route_generation;
108
109 #define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8))
110 #define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1))
111
112 struct in_addr zeroin_addr;
113
114 /*
115 * These configure the range of local port addresses assigned to
116 * "unspecified" outgoing connections/packets/whatever.
117 */
118 int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */
119 int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */
120 #ifndef __APPLE__
121 int ipport_firstauto = IPPORT_RESERVED; /* 1024 */
122 int ipport_lastauto = IPPORT_USERRESERVED; /* 5000 */
123 #else
124 int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
125 int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */
126 #endif
127 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
128 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */
129
130 #define RANGECHK(var, min, max) \
131 if ((var) < (min)) { (var) = (min); } \
132 else if ((var) > (max)) { (var) = (max); }
133
134 static int
135 sysctl_net_ipport_check SYSCTL_HANDLER_ARGS
136 {
137 int error = sysctl_handle_int(oidp,
138 oidp->oid_arg1, oidp->oid_arg2, req);
139 if (!error) {
140 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
141 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
142 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX);
143 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX);
144 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX);
145 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX);
146 }
147 return error;
148 }
149
150 #undef RANGECHK
151
152 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports");
153
154 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW,
155 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
156 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW,
157 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
158 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW,
159 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
160 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW,
161 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
162 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW,
163 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
164 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW,
165 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
166
167 /*
168 * in_pcb.c: manage the Protocol Control Blocks.
169 *
170 * NOTE: It is assumed that most of these functions will be called at
171 * splnet(). XXX - There are, unfortunately, a few exceptions to this
172 * rule that should be fixed.
173 */
174
175 /*
176 * Allocate a PCB and associate it with the socket.
177 */
178 int
179 in_pcballoc(so, pcbinfo, p)
180 struct socket *so;
181 struct inpcbinfo *pcbinfo;
182 struct proc *p;
183 {
184 register struct inpcb *inp;
185 caddr_t temp;
186 #if IPSEC
187 #ifndef __APPLE__
188 int error;
189 #endif
190 #endif
191
192 if (so->cached_in_sock_layer == 0) {
193 #if TEMPDEBUG
194 printf("PCBALLOC calling zalloc for socket %x\n", so);
195 #endif
196 inp = (struct inpcb *) zalloc(pcbinfo->ipi_zone);
197 if (inp == NULL)
198 return (ENOBUFS);
199 bzero((caddr_t)inp, sizeof(*inp));
200 }
201 else {
202 #if TEMPDEBUG
203 printf("PCBALLOC reusing PCB for socket %x\n", so);
204 #endif
205 inp = (struct inpcb *) so->so_saved_pcb;
206 temp = inp->inp_saved_ppcb;
207 bzero((caddr_t) inp, sizeof(*inp));
208 inp->inp_saved_ppcb = temp;
209 }
210
211 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
212 inp->inp_pcbinfo = pcbinfo;
213 inp->inp_socket = so;
214 so->so_pcb = (caddr_t)inp;
215
216 if (so->so_proto->pr_flags & PR_PCBLOCK) {
217 inp->inpcb_mtx = lck_mtx_alloc_init(pcbinfo->mtx_grp, pcbinfo->mtx_attr);
218 if (inp->inpcb_mtx == NULL) {
219 printf("in_pcballoc: can't alloc mutex! so=%x\n", so);
220 return(ENOMEM);
221 }
222 }
223
224 #if IPSEC
225 #ifndef __APPLE__
226 if (ipsec_bypass == 0) {
227 lck_mtx_lock(sadb_mutex);
228 error = ipsec_init_policy(so, &inp->inp_sp);
229 lck_mtx_unlock(sadb_mutex);
230 if (error != 0) {
231 zfree(pcbinfo->ipi_zone, inp);
232 return error;
233 }
234 }
235 #endif
236 #endif /*IPSEC*/
237 #if defined(INET6)
238 if (INP_SOCKAF(so) == AF_INET6 && !ip6_mapped_addr_on)
239 inp->inp_flags |= IN6P_IPV6_V6ONLY;
240 #endif
241
242 #if INET6
243 if (ip6_auto_flowlabel)
244 inp->inp_flags |= IN6P_AUTOFLOWLABEL;
245 #endif
246 lck_rw_lock_exclusive(pcbinfo->mtx);
247 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
248 LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list);
249 pcbinfo->ipi_count++;
250 lck_rw_done(pcbinfo->mtx);
251 return (0);
252 }
253
254 int
255 in_pcbbind(inp, nam, p)
256 register struct inpcb *inp;
257 struct sockaddr *nam;
258 struct proc *p;
259 {
260 register struct socket *so = inp->inp_socket;
261 unsigned short *lastport;
262 struct sockaddr_in *sin;
263 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
264 u_short lport = 0;
265 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
266 int error;
267
268 if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */
269 return (EADDRNOTAVAIL);
270 if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY)
271 return (EINVAL);
272 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
273 wild = 1;
274 socket_unlock(so, 0); /* keep reference on socket */
275 lck_rw_lock_exclusive(pcbinfo->mtx);
276 if (nam) {
277 sin = (struct sockaddr_in *)nam;
278 if (nam->sa_len != sizeof (*sin)) {
279 lck_rw_done(pcbinfo->mtx);
280 socket_lock(so, 0);
281 return (EINVAL);
282 }
283 #ifdef notdef
284 /*
285 * We should check the family, but old programs
286 * incorrectly fail to initialize it.
287 */
288 if (sin->sin_family != AF_INET) {
289 lck_rw_done(pcbinfo->mtx);
290 socket_lock(so, 0);
291 return (EAFNOSUPPORT);
292 }
293 #endif
294 lport = sin->sin_port;
295 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
296 /*
297 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
298 * allow complete duplication of binding if
299 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
300 * and a multicast address is bound on both
301 * new and duplicated sockets.
302 */
303 if (so->so_options & SO_REUSEADDR)
304 reuseport = SO_REUSEADDR|SO_REUSEPORT;
305 } else if (sin->sin_addr.s_addr != INADDR_ANY) {
306 struct ifaddr *ifa;
307 sin->sin_port = 0; /* yech... */
308 if ((ifa = ifa_ifwithaddr((struct sockaddr *)sin)) == 0) {
309 lck_rw_done(pcbinfo->mtx);
310 socket_lock(so, 0);
311 return (EADDRNOTAVAIL);
312 }
313 else {
314 ifafree(ifa);
315 }
316 }
317 if (lport) {
318 struct inpcb *t;
319
320 /* GROSS */
321 if (ntohs(lport) < IPPORT_RESERVED && p &&
322 proc_suser(p)) {
323 lck_rw_done(pcbinfo->mtx);
324 socket_lock(so, 0);
325 return (EACCES);
326 }
327 if (so->so_uid &&
328 !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
329 t = in_pcblookup_local(inp->inp_pcbinfo,
330 sin->sin_addr, lport, INPLOOKUP_WILDCARD);
331 if (t &&
332 (ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
333 ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
334 (t->inp_socket->so_options &
335 SO_REUSEPORT) == 0) &&
336 (so->so_uid != t->inp_socket->so_uid)) {
337 #if INET6
338 if (ntohl(sin->sin_addr.s_addr) !=
339 INADDR_ANY ||
340 ntohl(t->inp_laddr.s_addr) !=
341 INADDR_ANY ||
342 INP_SOCKAF(so) ==
343 INP_SOCKAF(t->inp_socket)) {
344 #endif /* defined(INET6) */
345 lck_rw_done(pcbinfo->mtx);
346 socket_lock(so, 0);
347 return (EADDRINUSE);
348 }
349 }
350 }
351 t = in_pcblookup_local(pcbinfo, sin->sin_addr,
352 lport, wild);
353 if (t &&
354 (reuseport & t->inp_socket->so_options) == 0) {
355 #if INET6
356 if (ip6_mapped_addr_on == 0 ||
357 ntohl(sin->sin_addr.s_addr) !=
358 INADDR_ANY ||
359 ntohl(t->inp_laddr.s_addr) !=
360 INADDR_ANY ||
361 INP_SOCKAF(so) ==
362 INP_SOCKAF(t->inp_socket)) {
363 #endif /* defined(INET6) */
364 lck_rw_done(pcbinfo->mtx);
365 socket_lock(so, 0);
366 return (EADDRINUSE);
367 }
368 }
369 }
370 inp->inp_laddr = sin->sin_addr;
371 }
372 if (lport == 0) {
373 u_short first, last;
374 int count;
375
376 inp->inp_flags |= INP_ANONPORT;
377
378 if (inp->inp_flags & INP_HIGHPORT) {
379 first = ipport_hifirstauto; /* sysctl */
380 last = ipport_hilastauto;
381 lastport = &pcbinfo->lasthi;
382 } else if (inp->inp_flags & INP_LOWPORT) {
383 if (p && (error = proc_suser(p))) {
384 lck_rw_done(pcbinfo->mtx);
385 socket_lock(so, 0);
386 return error;
387 }
388 first = ipport_lowfirstauto; /* 1023 */
389 last = ipport_lowlastauto; /* 600 */
390 lastport = &pcbinfo->lastlow;
391 } else {
392 first = ipport_firstauto; /* sysctl */
393 last = ipport_lastauto;
394 lastport = &pcbinfo->lastport;
395 }
396 /*
397 * Simple check to ensure all ports are not used up causing
398 * a deadlock here.
399 *
400 * We split the two cases (up and down) so that the direction
401 * is not being tested on each round of the loop.
402 */
403 if (first > last) {
404 /*
405 * counting down
406 */
407 count = first - last;
408
409 do {
410 if (count-- < 0) { /* completely used? */
411 lck_rw_done(pcbinfo->mtx);
412 socket_lock(so, 0);
413 inp->inp_laddr.s_addr = INADDR_ANY;
414 return (EADDRNOTAVAIL);
415 }
416 --*lastport;
417 if (*lastport > first || *lastport < last)
418 *lastport = first;
419 lport = htons(*lastport);
420 } while (in_pcblookup_local(pcbinfo,
421 inp->inp_laddr, lport, wild));
422 } else {
423 /*
424 * counting up
425 */
426 count = last - first;
427
428 do {
429 if (count-- < 0) { /* completely used? */
430 lck_rw_done(pcbinfo->mtx);
431 socket_lock(so, 0);
432 inp->inp_laddr.s_addr = INADDR_ANY;
433 return (EADDRNOTAVAIL);
434 }
435 ++*lastport;
436 if (*lastport < first || *lastport > last)
437 *lastport = first;
438 lport = htons(*lastport);
439 } while (in_pcblookup_local(pcbinfo,
440 inp->inp_laddr, lport, wild));
441 }
442 }
443 socket_lock(so, 0);
444 inp->inp_lport = lport;
445 if (in_pcbinshash(inp, 1) != 0) {
446 inp->inp_laddr.s_addr = INADDR_ANY;
447 inp->inp_lport = 0;
448 lck_rw_done(pcbinfo->mtx);
449 return (EAGAIN);
450 }
451 lck_rw_done(pcbinfo->mtx);
452 return (0);
453 }
454
455 /*
456 * Transform old in_pcbconnect() into an inner subroutine for new
457 * in_pcbconnect(): Do some validity-checking on the remote
458 * address (in mbuf 'nam') and then determine local host address
459 * (i.e., which interface) to use to access that remote host.
460 *
461 * This preserves definition of in_pcbconnect(), while supporting a
462 * slightly different version for T/TCP. (This is more than
463 * a bit of a kludge, but cleaning up the internal interfaces would
464 * have forced minor changes in every protocol).
465 */
466
467 int
468 in_pcbladdr(inp, nam, plocal_sin)
469 register struct inpcb *inp;
470 struct sockaddr *nam;
471 struct sockaddr_in **plocal_sin;
472 {
473 struct in_ifaddr *ia;
474 register struct sockaddr_in *sin = (struct sockaddr_in *)nam;
475
476 if (nam->sa_len != sizeof (*sin))
477 return (EINVAL);
478 if (sin->sin_family != AF_INET)
479 return (EAFNOSUPPORT);
480 if (sin->sin_port == 0)
481 return (EADDRNOTAVAIL);
482 lck_mtx_lock(rt_mtx);
483 if (!TAILQ_EMPTY(&in_ifaddrhead)) {
484 /*
485 * If the destination address is INADDR_ANY,
486 * use the primary local address.
487 * If the supplied address is INADDR_BROADCAST,
488 * and the primary interface supports broadcast,
489 * choose the broadcast address for that interface.
490 */
491 #define satosin(sa) ((struct sockaddr_in *)(sa))
492 #define sintosa(sin) ((struct sockaddr *)(sin))
493 #define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
494 if (sin->sin_addr.s_addr == INADDR_ANY)
495 sin->sin_addr = IA_SIN(TAILQ_FIRST(&in_ifaddrhead))->sin_addr;
496 else if (sin->sin_addr.s_addr == (u_long)INADDR_BROADCAST &&
497 (TAILQ_FIRST(&in_ifaddrhead)->ia_ifp->if_flags & IFF_BROADCAST))
498 sin->sin_addr = satosin(&TAILQ_FIRST(&in_ifaddrhead)->ia_broadaddr)->sin_addr;
499 }
500 if (inp->inp_laddr.s_addr == INADDR_ANY) {
501 register struct route *ro;
502
503 ia = (struct in_ifaddr *)0;
504 /*
505 * If route is known or can be allocated now,
506 * our src addr is taken from the i/f, else punt.
507 * Note that we should check the address family of the cached
508 * destination, in case of sharing the cache with IPv6.
509 */
510 ro = &inp->inp_route;
511 if (ro->ro_rt &&
512 (ro->ro_dst.sa_family != AF_INET ||
513 satosin(&ro->ro_dst)->sin_addr.s_addr !=
514 sin->sin_addr.s_addr ||
515 inp->inp_socket->so_options & SO_DONTROUTE ||
516 ro->ro_rt->generation_id != route_generation)) {
517 rtfree_locked(ro->ro_rt);
518 ro->ro_rt = (struct rtentry *)0;
519 }
520 if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/
521 (ro->ro_rt == (struct rtentry *)0 ||
522 ro->ro_rt->rt_ifp == 0)) {
523 /* No route yet, so try to acquire one */
524 bzero(&ro->ro_dst, sizeof(struct sockaddr_in));
525 ro->ro_dst.sa_family = AF_INET;
526 ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
527 ((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
528 sin->sin_addr;
529 rtalloc_ign_locked(ro, 0UL);
530 }
531 /*
532 * If we found a route, use the address
533 * corresponding to the outgoing interface
534 * unless it is the loopback (in case a route
535 * to our address on another net goes to loopback).
536 */
537 if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) {
538 ia = ifatoia(ro->ro_rt->rt_ifa);
539 if (ia)
540 ifaref(&ia->ia_ifa);
541 }
542 if (ia == 0) {
543 u_short fport = sin->sin_port;
544
545 sin->sin_port = 0;
546 ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin)));
547 if (ia == 0) {
548 ia = ifatoia(ifa_ifwithnet(sintosa(sin)));
549 }
550 sin->sin_port = fport;
551 if (ia == 0) {
552 ia = TAILQ_FIRST(&in_ifaddrhead);
553 if (ia)
554 ifaref(&ia->ia_ifa);
555 }
556 if (ia == 0) {
557 lck_mtx_unlock(rt_mtx);
558 return (EADDRNOTAVAIL);
559 }
560 }
561 /*
562 * If the destination address is multicast and an outgoing
563 * interface has been set as a multicast option, use the
564 * address of that interface as our source address.
565 */
566 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
567 inp->inp_moptions != NULL) {
568 struct ip_moptions *imo;
569 struct ifnet *ifp;
570
571 imo = inp->inp_moptions;
572 if (imo->imo_multicast_ifp != NULL && (ia == NULL ||
573 ia->ia_ifp != imo->imo_multicast_ifp)) {
574 ifp = imo->imo_multicast_ifp;
575 if (ia)
576 ifafree(&ia->ia_ifa);
577 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link)
578 if (ia->ia_ifp == ifp)
579 break;
580 if (ia == 0) {
581 lck_mtx_unlock(rt_mtx);
582 return (EADDRNOTAVAIL);
583 }
584 ifaref(ia);
585 }
586 }
587 /*
588 * Don't do pcblookup call here; return interface in plocal_sin
589 * and exit to caller, that will do the lookup.
590 */
591 *plocal_sin = &ia->ia_addr;
592 ifafree(&ia->ia_ifa);
593 }
594 lck_mtx_unlock(rt_mtx);
595 return(0);
596 }
597
598 /*
599 * Outer subroutine:
600 * Connect from a socket to a specified address.
601 * Both address and port must be specified in argument sin.
602 * If don't have a local address for this socket yet,
603 * then pick one.
604 */
605 int
606 in_pcbconnect(inp, nam, p)
607 register struct inpcb *inp;
608 struct sockaddr *nam;
609 struct proc *p;
610 {
611 struct sockaddr_in *ifaddr;
612 struct sockaddr_in *sin = (struct sockaddr_in *)nam;
613 struct inpcb *pcb;
614 int error;
615
616 /*
617 * Call inner routine, to assign local interface address.
618 */
619 if ((error = in_pcbladdr(inp, nam, &ifaddr)) != 0)
620 return(error);
621
622 socket_unlock(inp->inp_socket, 0);
623 pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
624 inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr->sin_addr,
625 inp->inp_lport, 0, NULL);
626 socket_lock(inp->inp_socket, 0);
627 if (pcb != NULL) {
628 in_pcb_checkstate(pcb, WNT_RELEASE, 0);
629 return (EADDRINUSE);
630 }
631 if (inp->inp_laddr.s_addr == INADDR_ANY) {
632 if (inp->inp_lport == 0) {
633 error = in_pcbbind(inp, (struct sockaddr *)0, p);
634 if (error)
635 return (error);
636 }
637 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) {
638 /*lock inversion issue, mostly with udp multicast packets */
639 socket_unlock(inp->inp_socket, 0);
640 lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx);
641 socket_lock(inp->inp_socket, 0);
642 }
643 inp->inp_laddr = ifaddr->sin_addr;
644 inp->inp_flags |= INP_INADDR_ANY;
645 }
646 else {
647 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) {
648 /*lock inversion issue, mostly with udp multicast packets */
649 socket_unlock(inp->inp_socket, 0);
650 lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx);
651 socket_lock(inp->inp_socket, 0);
652 }
653 }
654 inp->inp_faddr = sin->sin_addr;
655 inp->inp_fport = sin->sin_port;
656 in_pcbrehash(inp);
657 lck_rw_done(inp->inp_pcbinfo->mtx);
658 return (0);
659 }
660
661 void
662 in_pcbdisconnect(inp)
663 struct inpcb *inp;
664 {
665
666 inp->inp_faddr.s_addr = INADDR_ANY;
667 inp->inp_fport = 0;
668
669 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) {
670 /*lock inversion issue, mostly with udp multicast packets */
671 socket_unlock(inp->inp_socket, 0);
672 lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx);
673 socket_lock(inp->inp_socket, 0);
674 }
675
676 in_pcbrehash(inp);
677 lck_rw_done(inp->inp_pcbinfo->mtx);
678
679 if (inp->inp_socket->so_state & SS_NOFDREF)
680 in_pcbdetach(inp);
681 }
682
683 void
684 in_pcbdetach(inp)
685 struct inpcb *inp;
686 {
687 struct socket *so = inp->inp_socket;
688 struct rtentry *rt = inp->inp_route.ro_rt;
689
690 if (so->so_pcb == 0) { /* we've been called twice */
691 panic("in_pcbdetach: inp=%x so=%x proto=%x so_pcb is null!\n",
692 inp, so, so->so_proto->pr_protocol);
693 }
694
695 #if IPSEC
696 if (ipsec_bypass == 0) {
697 lck_mtx_lock(sadb_mutex);
698 ipsec4_delete_pcbpolicy(inp);
699 lck_mtx_unlock(sadb_mutex);
700 }
701 #endif /*IPSEC*/
702
703 /* mark socket state as dead */
704 if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING)
705 panic("in_pcbdetach so=%x prot=%x couldn't set to STOPUSING\n", so, so->so_proto->pr_protocol);
706
707 #if TEMPDEBUG
708 if (so->cached_in_sock_layer)
709 printf("in_pcbdetach for cached socket %x flags=%x\n", so, so->so_flags);
710 else
711 printf("in_pcbdetach for allocated socket %x flags=%x\n", so, so->so_flags);
712 #endif
713 if ((so->so_flags & SOF_PCBCLEARING) == 0) {
714 inp->inp_vflag = 0;
715 if (inp->inp_options)
716 (void)m_free(inp->inp_options);
717 if (rt) {
718 /*
719 * route deletion requires reference count to be <= zero
720 */
721 lck_mtx_lock(rt_mtx);
722 if ((rt->rt_flags & RTF_DELCLONE) &&
723 (rt->rt_flags & RTF_WASCLONED) &&
724 (rt->rt_refcnt <= 1)) {
725 rtunref(rt);
726 rt->rt_flags &= ~RTF_UP;
727 rtrequest_locked(RTM_DELETE, rt_key(rt),
728 rt->rt_gateway, rt_mask(rt),
729 rt->rt_flags, (struct rtentry **)0);
730 }
731 else {
732 rtfree_locked(rt);
733 inp->inp_route.ro_rt = 0;
734 }
735 lck_mtx_unlock(rt_mtx);
736 }
737 ip_freemoptions(inp->inp_moptions);
738 inp->inp_moptions = NULL;
739 sofreelastref(so, 0);
740 inp->inp_state = INPCB_STATE_DEAD;
741 so->so_flags |= SOF_PCBCLEARING; /* makes sure we're not called twice from so_close */
742 }
743 }
744
745
746 void
747 in_pcbdispose(inp)
748 struct inpcb *inp;
749 {
750 struct socket *so = inp->inp_socket;
751 struct inpcbinfo *ipi = inp->inp_pcbinfo;
752
753 #if TEMPDEBUG
754 if (inp->inp_state != INPCB_STATE_DEAD) {
755 printf("in_pcbdispose: not dead yet? so=%x\n", so);
756 }
757 #endif
758
759 if (so && so->so_usecount != 0)
760 panic("in_pcbdispose: use count=%x so=%x\n", so->so_usecount, so);
761
762
763 inp->inp_gencnt = ++ipi->ipi_gencnt;
764 /*### access ipi in in_pcbremlists */
765 in_pcbremlists(inp);
766
767 if (so) {
768 if (so->so_proto->pr_flags & PR_PCBLOCK) {
769 sofreelastref(so, 0);
770 if (so->so_rcv.sb_cc || so->so_snd.sb_cc) {
771 #if TEMPDEBUG
772 printf("in_pcbdispose sb not cleaned up so=%x rc_cci=%x snd_cc=%x\n",
773 so, so->so_rcv.sb_cc, so->so_snd.sb_cc);
774 #endif
775 sbrelease(&so->so_rcv);
776 sbrelease(&so->so_snd);
777 }
778 if (so->so_head != NULL)
779 panic("in_pcbdispose, so=%x head still exist\n", so);
780 lck_mtx_unlock(inp->inpcb_mtx);
781 lck_mtx_free(inp->inpcb_mtx, ipi->mtx_grp);
782 }
783 so->so_flags |= SOF_PCBCLEARING; /* makes sure we're not called twice from so_close */
784 so->so_saved_pcb = (caddr_t) inp;
785 so->so_pcb = 0;
786 inp->inp_socket = 0;
787 inp->reserved[0] = so;
788 if (so->cached_in_sock_layer == 0) {
789 zfree(ipi->ipi_zone, inp);
790 }
791 sodealloc(so);
792 }
793 #if TEMPDEBUG
794 else
795 printf("in_pcbdispose: no socket for inp=%x\n", inp);
796 #endif
797 }
798
799 /*
800 * The calling convention of in_setsockaddr() and in_setpeeraddr() was
801 * modified to match the pru_sockaddr() and pru_peeraddr() entry points
802 * in struct pr_usrreqs, so that protocols can just reference then directly
803 * without the need for a wrapper function. The socket must have a valid
804 * (i.e., non-nil) PCB, but it should be impossible to get an invalid one
805 * except through a kernel programming error, so it is acceptable to panic
806 * (or in this case trap) if the PCB is invalid. (Actually, we don't trap
807 * because there actually /is/ a programming error somewhere... XXX)
808 */
809 int
810 in_setsockaddr(so, nam)
811 struct socket *so;
812 struct sockaddr **nam;
813 {
814 register struct inpcb *inp;
815 register struct sockaddr_in *sin;
816
817 /*
818 * Do the malloc first in case it blocks.
819 */
820 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_WAITOK);
821 if (sin == NULL)
822 return ENOBUFS;
823 bzero(sin, sizeof *sin);
824 sin->sin_family = AF_INET;
825 sin->sin_len = sizeof(*sin);
826
827 inp = sotoinpcb(so);
828 if (!inp) {
829 FREE(sin, M_SONAME);
830 return ECONNRESET;
831 }
832 sin->sin_port = inp->inp_lport;
833 sin->sin_addr = inp->inp_laddr;
834
835 *nam = (struct sockaddr *)sin;
836 return 0;
837 }
838
839 int
840 in_setpeeraddr(so, nam)
841 struct socket *so;
842 struct sockaddr **nam;
843 {
844 struct inpcb *inp;
845 register struct sockaddr_in *sin;
846
847 /*
848 * Do the malloc first in case it blocks.
849 */
850 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_WAITOK);
851 if (sin == NULL)
852 return ENOBUFS;
853 bzero((caddr_t)sin, sizeof (*sin));
854 sin->sin_family = AF_INET;
855 sin->sin_len = sizeof(*sin);
856
857 inp = sotoinpcb(so);
858 if (!inp) {
859 FREE(sin, M_SONAME);
860 return ECONNRESET;
861 }
862 sin->sin_port = inp->inp_fport;
863 sin->sin_addr = inp->inp_faddr;
864
865 *nam = (struct sockaddr *)sin;
866 return 0;
867 }
868
869 void
870 in_pcbnotifyall(pcbinfo, faddr, errno, notify)
871 struct inpcbinfo *pcbinfo;
872 struct in_addr faddr;
873 void (*notify) (struct inpcb *, int);
874 {
875 struct inpcb *inp;
876
877 lck_rw_lock_shared(pcbinfo->mtx);
878
879 LIST_FOREACH(inp, pcbinfo->listhead, inp_list) {
880 #if INET6
881 if ((inp->inp_vflag & INP_IPV4) == 0)
882 continue;
883 #endif
884 if (inp->inp_faddr.s_addr != faddr.s_addr ||
885 inp->inp_socket == NULL)
886 continue;
887 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING)
888 continue;
889 socket_lock(inp->inp_socket, 1);
890 (*notify)(inp, errno);
891 (void)in_pcb_checkstate(inp, WNT_RELEASE, 1);
892 socket_unlock(inp->inp_socket, 1);
893 }
894 lck_rw_done(pcbinfo->mtx);
895 }
896
897 void
898 in_pcbpurgeif0(
899 struct inpcb *head,
900 struct ifnet *ifp)
901 {
902 struct inpcb *inp;
903 struct ip_moptions *imo;
904 int i, gap;
905
906 for (inp = head; inp != NULL; inp = LIST_NEXT(inp, inp_list)) {
907 imo = inp->inp_moptions;
908 if ((inp->inp_vflag & INP_IPV4) &&
909 imo != NULL) {
910 /*
911 * Unselect the outgoing interface if it is being
912 * detached.
913 */
914 if (imo->imo_multicast_ifp == ifp)
915 imo->imo_multicast_ifp = NULL;
916
917 /*
918 * Drop multicast group membership if we joined
919 * through the interface being detached.
920 */
921 for (i = 0, gap = 0; i < imo->imo_num_memberships;
922 i++) {
923 if (imo->imo_membership[i]->inm_ifp == ifp) {
924 in_delmulti(&imo->imo_membership[i]);
925 gap++;
926 } else if (gap != 0)
927 imo->imo_membership[i - gap] =
928 imo->imo_membership[i];
929 }
930 imo->imo_num_memberships -= gap;
931 }
932 }
933 }
934
935 /*
936 * Check for alternatives when higher level complains
937 * about service problems. For now, invalidate cached
938 * routing information. If the route was created dynamically
939 * (by a redirect), time to try a default gateway again.
940 */
941 void
942 in_losing(inp)
943 struct inpcb *inp;
944 {
945 register struct rtentry *rt;
946 struct rt_addrinfo info;
947
948 if ((rt = inp->inp_route.ro_rt)) {
949 lck_mtx_lock(rt_mtx);
950 bzero((caddr_t)&info, sizeof(info));
951 info.rti_info[RTAX_DST] =
952 (struct sockaddr *)&inp->inp_route.ro_dst;
953 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
954 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
955 rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
956 if (rt->rt_flags & RTF_DYNAMIC)
957 (void) rtrequest_locked(RTM_DELETE, rt_key(rt),
958 rt->rt_gateway, rt_mask(rt), rt->rt_flags,
959 (struct rtentry **)0);
960 inp->inp_route.ro_rt = 0;
961 rtfree_locked(rt);
962 lck_mtx_unlock(rt_mtx);
963 /*
964 * A new route can be allocated
965 * the next time output is attempted.
966 */
967 }
968 }
969
970 /*
971 * After a routing change, flush old routing
972 * and allocate a (hopefully) better one.
973 */
974 void
975 in_rtchange(inp, errno)
976 register struct inpcb *inp;
977 int errno;
978 {
979 if (inp->inp_route.ro_rt) {
980 if ((ifa_foraddr(inp->inp_laddr.s_addr)) == 0)
981 return; /* we can't remove the route now. not sure if still ok to use src */
982 rtfree(inp->inp_route.ro_rt);
983 inp->inp_route.ro_rt = 0;
984 /*
985 * A new route can be allocated the next time
986 * output is attempted.
987 */
988 }
989 }
990
991 /*
992 * Lookup a PCB based on the local address and port.
993 */
994 struct inpcb *
995 in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay)
996 struct inpcbinfo *pcbinfo;
997 struct in_addr laddr;
998 u_int lport_arg;
999 int wild_okay;
1000 {
1001 register struct inpcb *inp;
1002 int matchwild = 3, wildcard;
1003 u_short lport = lport_arg;
1004
1005 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0,0,0,0,0);
1006
1007 if (!wild_okay) {
1008 struct inpcbhead *head;
1009 /*
1010 * Look for an unconnected (wildcard foreign addr) PCB that
1011 * matches the local address and port we're looking for.
1012 */
1013 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
1014 LIST_FOREACH(inp, head, inp_hash) {
1015 #if INET6
1016 if ((inp->inp_vflag & INP_IPV4) == 0)
1017 continue;
1018 #endif
1019 if (inp->inp_faddr.s_addr == INADDR_ANY &&
1020 inp->inp_laddr.s_addr == laddr.s_addr &&
1021 inp->inp_lport == lport) {
1022 /*
1023 * Found.
1024 */
1025 return (inp);
1026 }
1027 }
1028 /*
1029 * Not found.
1030 */
1031 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0,0,0,0,0);
1032 return (NULL);
1033 } else {
1034 struct inpcbporthead *porthash;
1035 struct inpcbport *phd;
1036 struct inpcb *match = NULL;
1037 /*
1038 * Best fit PCB lookup.
1039 *
1040 * First see if this local port is in use by looking on the
1041 * port hash list.
1042 */
1043 porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport,
1044 pcbinfo->porthashmask)];
1045 LIST_FOREACH(phd, porthash, phd_hash) {
1046 if (phd->phd_port == lport)
1047 break;
1048 }
1049 if (phd != NULL) {
1050 /*
1051 * Port is in use by one or more PCBs. Look for best
1052 * fit.
1053 */
1054 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
1055 wildcard = 0;
1056 #if INET6
1057 if ((inp->inp_vflag & INP_IPV4) == 0)
1058 continue;
1059 #endif
1060 if (inp->inp_faddr.s_addr != INADDR_ANY)
1061 wildcard++;
1062 if (inp->inp_laddr.s_addr != INADDR_ANY) {
1063 if (laddr.s_addr == INADDR_ANY)
1064 wildcard++;
1065 else if (inp->inp_laddr.s_addr != laddr.s_addr)
1066 continue;
1067 } else {
1068 if (laddr.s_addr != INADDR_ANY)
1069 wildcard++;
1070 }
1071 if (wildcard < matchwild) {
1072 match = inp;
1073 matchwild = wildcard;
1074 if (matchwild == 0) {
1075 break;
1076 }
1077 }
1078 }
1079 }
1080 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match,0,0,0,0);
1081 return (match);
1082 }
1083 }
1084
1085 /*
1086 * Lookup PCB in hash list.
1087 */
1088 struct inpcb *
1089 in_pcblookup_hash(
1090 struct inpcbinfo *pcbinfo,
1091 struct in_addr faddr,
1092 u_int fport_arg,
1093 struct in_addr laddr,
1094 u_int lport_arg,
1095 int wildcard,
1096 struct ifnet *ifp)
1097 {
1098 struct inpcbhead *head;
1099 register struct inpcb *inp;
1100 u_short fport = fport_arg, lport = lport_arg;
1101
1102 /*
1103 * We may have found the pcb in the last lookup - check this first.
1104 */
1105
1106 lck_rw_lock_shared(pcbinfo->mtx);
1107
1108 /*
1109 * First look for an exact match.
1110 */
1111 head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)];
1112 LIST_FOREACH(inp, head, inp_hash) {
1113 #if INET6
1114 if ((inp->inp_vflag & INP_IPV4) == 0)
1115 continue;
1116 #endif
1117 if (inp->inp_faddr.s_addr == faddr.s_addr &&
1118 inp->inp_laddr.s_addr == laddr.s_addr &&
1119 inp->inp_fport == fport &&
1120 inp->inp_lport == lport) {
1121 /*
1122 * Found.
1123 */
1124 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1125 lck_rw_done(pcbinfo->mtx);
1126 return (inp);
1127 }
1128 else { /* it's there but dead, say it isn't found */
1129 lck_rw_done(pcbinfo->mtx);
1130 return(NULL);
1131 }
1132 }
1133 }
1134 if (wildcard) {
1135 struct inpcb *local_wild = NULL;
1136 #if INET6
1137 struct inpcb *local_wild_mapped = NULL;
1138 #endif
1139
1140 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
1141 LIST_FOREACH(inp, head, inp_hash) {
1142 #if INET6
1143 if ((inp->inp_vflag & INP_IPV4) == 0)
1144 continue;
1145 #endif
1146 if (inp->inp_faddr.s_addr == INADDR_ANY &&
1147 inp->inp_lport == lport) {
1148 #if defined(NFAITH) && NFAITH > 0
1149 if (ifp && ifp->if_type == IFT_FAITH &&
1150 (inp->inp_flags & INP_FAITH) == 0)
1151 continue;
1152 #endif
1153 if (inp->inp_laddr.s_addr == laddr.s_addr) {
1154 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1155 lck_rw_done(pcbinfo->mtx);
1156 return (inp);
1157 }
1158 else { /* it's there but dead, say it isn't found */
1159 lck_rw_done(pcbinfo->mtx);
1160 return(NULL);
1161 }
1162 }
1163 else if (inp->inp_laddr.s_addr == INADDR_ANY) {
1164 #if defined(INET6)
1165 if (INP_CHECK_SOCKAF(inp->inp_socket,
1166 AF_INET6))
1167 local_wild_mapped = inp;
1168 else
1169 #endif /* defined(INET6) */
1170 local_wild = inp;
1171 }
1172 }
1173 }
1174 #if defined(INET6)
1175 if (local_wild == NULL) {
1176 if (local_wild_mapped != NULL) {
1177 if (in_pcb_checkstate(local_wild_mapped, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1178 lck_rw_done(pcbinfo->mtx);
1179 return (local_wild_mapped);
1180 }
1181 else { /* it's there but dead, say it isn't found */
1182 lck_rw_done(pcbinfo->mtx);
1183 return(NULL);
1184 }
1185 }
1186 lck_rw_done(pcbinfo->mtx);
1187 return (NULL);
1188 }
1189 #endif /* defined(INET6) */
1190 if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1191 lck_rw_done(pcbinfo->mtx);
1192 return (local_wild);
1193 }
1194 else { /* it's there but dead, say it isn't found */
1195 lck_rw_done(pcbinfo->mtx);
1196 return(NULL);
1197 }
1198 }
1199
1200 /*
1201 * Not found.
1202 */
1203 lck_rw_done(pcbinfo->mtx);
1204 return (NULL);
1205 }
1206
1207 /*
1208 * Insert PCB onto various hash lists.
1209 */
1210 int
1211 in_pcbinshash(inp, locked)
1212 struct inpcb *inp;
1213 int locked; /* list already locked exclusive */
1214 {
1215 struct inpcbhead *pcbhash;
1216 struct inpcbporthead *pcbporthash;
1217 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
1218 struct inpcbport *phd;
1219 u_int32_t hashkey_faddr;
1220
1221 #if INET6
1222 if (inp->inp_vflag & INP_IPV6)
1223 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
1224 else
1225 #endif /* INET6 */
1226 hashkey_faddr = inp->inp_faddr.s_addr;
1227
1228 inp->hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport, inp->inp_fport, pcbinfo->hashmask);
1229
1230 if (!locked) {
1231 if (!lck_rw_try_lock_exclusive(pcbinfo->mtx)) {
1232 /*lock inversion issue, mostly with udp multicast packets */
1233 socket_unlock(inp->inp_socket, 0);
1234 lck_rw_lock_exclusive(pcbinfo->mtx);
1235 socket_lock(inp->inp_socket, 0);
1236 }
1237 }
1238
1239 pcbhash = &pcbinfo->hashbase[inp->hash_element];
1240
1241 pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport,
1242 pcbinfo->porthashmask)];
1243
1244 /*
1245 * Go through port list and look for a head for this lport.
1246 */
1247 LIST_FOREACH(phd, pcbporthash, phd_hash) {
1248 if (phd->phd_port == inp->inp_lport)
1249 break;
1250 }
1251 /*
1252 * If none exists, malloc one and tack it on.
1253 */
1254 if (phd == NULL) {
1255 MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_WAITOK);
1256 if (phd == NULL) {
1257 if (!locked)
1258 lck_rw_done(pcbinfo->mtx);
1259 return (ENOBUFS); /* XXX */
1260 }
1261 phd->phd_port = inp->inp_lport;
1262 LIST_INIT(&phd->phd_pcblist);
1263 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
1264 }
1265 inp->inp_phd = phd;
1266 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
1267 LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
1268 if (!locked)
1269 lck_rw_done(pcbinfo->mtx);
1270 return (0);
1271 }
1272
1273 /*
1274 * Move PCB to the proper hash bucket when { faddr, fport } have been
1275 * changed. NOTE: This does not handle the case of the lport changing (the
1276 * hashed port list would have to be updated as well), so the lport must
1277 * not change after in_pcbinshash() has been called.
1278 */
1279 void
1280 in_pcbrehash(inp)
1281 struct inpcb *inp;
1282 {
1283 struct inpcbhead *head;
1284 u_int32_t hashkey_faddr;
1285
1286 #if INET6
1287 if (inp->inp_vflag & INP_IPV6)
1288 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
1289 else
1290 #endif /* INET6 */
1291 hashkey_faddr = inp->inp_faddr.s_addr;
1292 inp->hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
1293 inp->inp_fport, inp->inp_pcbinfo->hashmask);
1294 head = &inp->inp_pcbinfo->hashbase[inp->hash_element];
1295
1296 LIST_REMOVE(inp, inp_hash);
1297 LIST_INSERT_HEAD(head, inp, inp_hash);
1298 }
1299
1300 /*
1301 * Remove PCB from various lists.
1302 */
1303 //###LOCK must be called with list lock held
1304 void
1305 in_pcbremlists(inp)
1306 struct inpcb *inp;
1307 {
1308 inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
1309
1310 if (inp->inp_lport) {
1311 struct inpcbport *phd = inp->inp_phd;
1312
1313 LIST_REMOVE(inp, inp_hash);
1314 LIST_REMOVE(inp, inp_portlist);
1315 if (phd != NULL && (LIST_FIRST(&phd->phd_pcblist) == NULL)) {
1316 LIST_REMOVE(phd, phd_hash);
1317 FREE(phd, M_PCB);
1318 }
1319 }
1320 LIST_REMOVE(inp, inp_list);
1321 inp->inp_pcbinfo->ipi_count--;
1322 }
1323
1324 static void in_pcb_detach_port( struct inpcb *inp);
1325 int
1326 in_pcb_grab_port (struct inpcbinfo *pcbinfo,
1327 u_short options,
1328 struct in_addr laddr,
1329 u_short *lport,
1330 struct in_addr faddr,
1331 u_short fport,
1332 u_int cookie,
1333 u_char owner_id)
1334 {
1335 struct inpcb *inp, *pcb;
1336 struct sockaddr_in sin;
1337 struct proc *p = current_proc();
1338 int stat;
1339
1340
1341 pcbinfo->nat_dummy_socket.so_pcb = 0;
1342 pcbinfo->nat_dummy_socket.so_options = 0;
1343 if (*lport) {
1344 /* The grabber wants a particular port */
1345
1346 if (faddr.s_addr || fport) {
1347 /*
1348 * This is either the second half of an active connect, or
1349 * it's from the acceptance of an incoming connection.
1350 */
1351 if (laddr.s_addr == 0) {
1352 pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb;
1353 return EINVAL;
1354 }
1355
1356 inp = in_pcblookup_hash(pcbinfo, faddr, fport, laddr, *lport, 0, NULL);
1357 if (inp) {
1358 /* pcb was found, its count was upped. need to decrease it here */
1359 in_pcb_checkstate(inp, WNT_RELEASE, 0);
1360 if (!(IN_MULTICAST(ntohl(laddr.s_addr)))) {
1361 pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb;
1362 return (EADDRINUSE);
1363 }
1364 }
1365
1366 stat = in_pcballoc(&pcbinfo->nat_dummy_socket, pcbinfo, p);
1367 if (stat) {
1368 pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb;
1369 return stat;
1370 }
1371 pcb = sotoinpcb(&pcbinfo->nat_dummy_socket);
1372 pcb->inp_vflag |= INP_IPV4;
1373
1374 pcb->inp_lport = *lport;
1375 pcb->inp_laddr.s_addr = laddr.s_addr;
1376
1377 pcb->inp_faddr = faddr;
1378 pcb->inp_fport = fport;
1379
1380 lck_rw_lock_exclusive(pcbinfo->mtx);
1381 in_pcbinshash(pcb, 1);
1382 lck_rw_done(pcbinfo->mtx);
1383 }
1384 else {
1385 /*
1386 * This is either a bind for a passive socket, or it's the
1387 * first part of bind-connect sequence (not likely since an
1388 * ephemeral port is usually used in this case). Or, it's
1389 * the result of a connection acceptance when the foreign
1390 * address/port cannot be provided (which requires the SO_REUSEADDR
1391 * flag if laddr is not multicast).
1392 */
1393
1394 stat = in_pcballoc(&pcbinfo->nat_dummy_socket, pcbinfo, p);
1395 if (stat) {
1396 pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb;
1397 return stat;
1398 }
1399 pcb = sotoinpcb(&pcbinfo->nat_dummy_socket);
1400 pcb->inp_vflag |= INP_IPV4;
1401
1402 pcbinfo->nat_dummy_socket.so_options = options;
1403 bzero(&sin, sizeof(struct sockaddr_in));
1404 sin.sin_len = sizeof(struct sockaddr_in);
1405 sin.sin_family = AF_INET;
1406 sin.sin_addr.s_addr = laddr.s_addr;
1407 sin.sin_port = *lport;
1408
1409 socket_lock(&pcbinfo->nat_dummy_socket, 1);
1410 stat = in_pcbbind((struct inpcb *) pcbinfo->nat_dummy_socket.so_pcb,
1411 (struct sockaddr *) &sin, p);
1412 if (stat) {
1413 socket_unlock(&pcbinfo->nat_dummy_socket, 1); /*detach first */
1414 in_pcb_detach_port(pcb); /* will restore dummy pcb */
1415 return stat;
1416 }
1417 socket_unlock(&pcbinfo->nat_dummy_socket, 1);
1418 }
1419 }
1420 else {
1421 /* The grabber wants an ephemeral port */
1422
1423 stat = in_pcballoc(&pcbinfo->nat_dummy_socket, pcbinfo, p);
1424 if (stat) {
1425 pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb;
1426 return stat;
1427 }
1428 pcb = sotoinpcb(&pcbinfo->nat_dummy_socket);
1429 pcb->inp_vflag |= INP_IPV4;
1430
1431 bzero(&sin, sizeof(struct sockaddr_in));
1432 sin.sin_len = sizeof(struct sockaddr_in);
1433 sin.sin_family = AF_INET;
1434 sin.sin_addr.s_addr = laddr.s_addr;
1435 sin.sin_port = 0;
1436
1437 if (faddr.s_addr || fport) {
1438 /*
1439 * Not sure if this case will be used - could occur when connect
1440 * is called, skipping the bind.
1441 */
1442
1443 if (laddr.s_addr == 0) {
1444 in_pcb_detach_port(pcb); /* restores dummy pcb */
1445 return EINVAL;
1446 }
1447
1448 socket_lock(&pcbinfo->nat_dummy_socket, 1);
1449 stat = in_pcbbind((struct inpcb *) pcbinfo->nat_dummy_socket.so_pcb,
1450 (struct sockaddr *) &sin, p);
1451 if (stat) {
1452 socket_unlock(&pcbinfo->nat_dummy_socket, 1);
1453 in_pcb_detach_port(pcb); /* restores dummy pcb */
1454 return stat;
1455 }
1456
1457 socket_unlock(&pcbinfo->nat_dummy_socket, 1);
1458 inp = in_pcblookup_hash(pcbinfo, faddr, fport,
1459 pcb->inp_laddr, pcb->inp_lport, 0, NULL);
1460 if (inp) {
1461 /* pcb was found, its count was upped. need to decrease it here */
1462 in_pcb_checkstate(inp, WNT_RELEASE, 0);
1463 in_pcb_detach_port(pcb);
1464 return (EADDRINUSE);
1465 }
1466
1467 lck_rw_lock_exclusive(pcbinfo->mtx);
1468 pcb->inp_faddr = faddr;
1469 pcb->inp_fport = fport;
1470 in_pcbrehash(pcb);
1471 lck_rw_done(pcbinfo->mtx);
1472 }
1473 else {
1474 /*
1475 * This is a simple bind of an ephemeral port. The local addr
1476 * may or may not be defined.
1477 */
1478
1479 socket_lock(&pcbinfo->nat_dummy_socket, 1);
1480 stat = in_pcbbind((struct inpcb *) pcbinfo->nat_dummy_socket.so_pcb,
1481 (struct sockaddr *) &sin, p);
1482 if (stat) {
1483 socket_unlock(&pcbinfo->nat_dummy_socket, 1);
1484 in_pcb_detach_port(pcb);
1485 return stat;
1486 }
1487 socket_unlock(&pcbinfo->nat_dummy_socket, 1);
1488 }
1489 *lport = pcb->inp_lport;
1490 }
1491
1492
1493 pcb->nat_owner = owner_id;
1494 pcb->nat_cookie = cookie;
1495 pcb->inp_ppcb = (caddr_t) pcbinfo->dummy_cb;
1496 pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb; /* restores dummypcb */
1497 return 0;
1498 }
1499
1500 /* 3962035 - in_pcb_letgo_port needs a special case function for detaching */
1501 static void
1502 in_pcb_detach_port(
1503 struct inpcb *inp)
1504 {
1505 struct socket *so = inp->inp_socket;
1506 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
1507
1508 if (so != &pcbinfo->nat_dummy_socket)
1509 panic("in_pcb_detach_port: not a dummy_sock: so=%x, inp=%x\n", so, inp);
1510 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
1511 /*### access ipi in in_pcbremlists */
1512 in_pcbremlists(inp);
1513
1514 inp->inp_socket = 0;
1515 inp->reserved[0] = so;
1516 zfree(pcbinfo->ipi_zone, inp);
1517 pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb; /* restores dummypcb */
1518 }
1519
1520 int
1521 in_pcb_letgo_port(struct inpcbinfo *pcbinfo, struct in_addr laddr, u_short lport,
1522 struct in_addr faddr, u_short fport, u_char owner_id)
1523 {
1524 struct inpcbhead *head;
1525 register struct inpcb *inp;
1526
1527
1528 /*
1529 * First look for an exact match.
1530 */
1531
1532 lck_rw_lock_exclusive(pcbinfo->mtx);
1533 head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)];
1534 for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) {
1535 if (inp->inp_faddr.s_addr == faddr.s_addr &&
1536 inp->inp_laddr.s_addr == laddr.s_addr &&
1537 inp->inp_fport == fport &&
1538 inp->inp_lport == lport &&
1539 inp->nat_owner == owner_id) {
1540 /*
1541 * Found.
1542 */
1543 in_pcb_detach_port(inp);
1544 lck_rw_done(pcbinfo->mtx);
1545 return 0;
1546 }
1547 }
1548
1549 lck_rw_done(pcbinfo->mtx);
1550 return ENOENT;
1551 }
1552
1553 u_char
1554 in_pcb_get_owner(struct inpcbinfo *pcbinfo,
1555 struct in_addr laddr, u_short lport,
1556 struct in_addr faddr, u_short fport,
1557 u_int *cookie)
1558
1559 {
1560 struct inpcb *inp;
1561 u_char owner_id = INPCB_NO_OWNER;
1562 struct inpcbport *phd;
1563 struct inpcbporthead *porthash;
1564
1565
1566 if (IN_MULTICAST(laddr.s_addr)) {
1567 /*
1568 * Walk through PCB's looking for registered
1569 * owners.
1570 */
1571
1572 lck_rw_lock_shared(pcbinfo->mtx);
1573 porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport,
1574 pcbinfo->porthashmask)];
1575 for (phd = porthash->lh_first; phd != NULL; phd = phd->phd_hash.le_next) {
1576 if (phd->phd_port == lport)
1577 break;
1578 }
1579
1580 if (phd == 0) {
1581 lck_rw_done(pcbinfo->mtx);
1582 return INPCB_NO_OWNER;
1583 }
1584
1585 owner_id = INPCB_NO_OWNER;
1586 for (inp = phd->phd_pcblist.lh_first; inp != NULL;
1587 inp = inp->inp_portlist.le_next) {
1588
1589 if (inp->inp_laddr.s_addr == laddr.s_addr) {
1590 if (inp->nat_owner == 0)
1591 owner_id |= INPCB_OWNED_BY_X;
1592 else
1593 owner_id |= inp->nat_owner;
1594 }
1595 }
1596
1597 lck_rw_done(pcbinfo->mtx);
1598 return owner_id;
1599 }
1600 else {
1601 inp = in_pcblookup_hash(pcbinfo, faddr, fport,
1602 laddr, lport, 1, NULL);
1603 if (inp) {
1604 /* pcb was found, its count was upped. need to decrease it here */
1605 /* if we found it, that pcb is already locked by the caller */
1606 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING)
1607 return(INPCB_NO_OWNER);
1608
1609 if (inp->nat_owner) {
1610 owner_id = inp->nat_owner;
1611 *cookie = inp->nat_cookie;
1612 }
1613 else {
1614 owner_id = INPCB_OWNED_BY_X;
1615 }
1616 }
1617 else
1618 owner_id = INPCB_NO_OWNER;
1619
1620 return owner_id;
1621 }
1622 }
1623
1624 int
1625 in_pcb_new_share_client(struct inpcbinfo *pcbinfo, u_char *owner_id)
1626 {
1627
1628 int i;
1629
1630
1631 for (i=0; i < INPCB_MAX_IDS; i++) {
1632 if ((pcbinfo->all_owners & (1 << i)) == 0) {
1633 pcbinfo->all_owners |= (1 << i);
1634 *owner_id = (1 << i);
1635 return 0;
1636 }
1637 }
1638
1639 return ENOSPC;
1640 }
1641
1642 int
1643 in_pcb_rem_share_client(struct inpcbinfo *pcbinfo, u_char owner_id)
1644 {
1645 struct inpcb *inp;
1646
1647
1648 lck_rw_lock_exclusive(pcbinfo->mtx);
1649 if (pcbinfo->all_owners & owner_id) {
1650 pcbinfo->all_owners &= ~owner_id;
1651 for (inp = pcbinfo->listhead->lh_first; inp != NULL; inp = inp->inp_list.le_next) {
1652 if (inp->nat_owner & owner_id) {
1653 if (inp->nat_owner == owner_id)
1654 /*
1655 * Deallocate the pcb
1656 */
1657 in_pcb_detach_port(inp);
1658 else
1659 inp->nat_owner &= ~owner_id;
1660 }
1661 }
1662 }
1663 else {
1664 lck_rw_done(pcbinfo->mtx);
1665 return ENOENT;
1666 }
1667
1668 lck_rw_done(pcbinfo->mtx);
1669 return 0;
1670 }
1671
1672
1673
1674 void in_pcb_nat_init(struct inpcbinfo *pcbinfo, int afamily,
1675 int pfamily, int protocol)
1676 {
1677 int stat;
1678 struct proc *p = current_proc();
1679
1680 bzero(&pcbinfo->nat_dummy_socket, sizeof(struct socket));
1681 pcbinfo->nat_dummy_socket.so_proto = pffindproto_locked(afamily, pfamily, protocol);
1682 pcbinfo->all_owners = 0;
1683 stat = in_pcballoc(&pcbinfo->nat_dummy_socket, pcbinfo, p);
1684 if (stat)
1685 panic("in_pcb_nat_init: can't alloc fakepcb err=%\n", stat);
1686 pcbinfo->nat_dummy_pcb = pcbinfo->nat_dummy_socket.so_pcb;
1687 }
1688
1689 /* Mechanism used to defer the memory release of PCBs
1690 * The pcb list will contain the pcb until the ripper can clean it up if
1691 * the following conditions are met: 1) state "DEAD", 2) wantcnt is STOPUSING
1692 * 3) usecount is null
1693 * This function will be called to either mark the pcb as
1694 */
1695 int
1696 in_pcb_checkstate(struct inpcb *pcb, int mode, int locked)
1697
1698 {
1699
1700 volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt;
1701 UInt32 origwant;
1702 UInt32 newwant;
1703
1704 switch (mode) {
1705
1706 case WNT_STOPUSING: /* try to mark the pcb as ready for recycling */
1707
1708 /* compareswap with STOPUSING, if success we're good, if it's in use, will be marked later */
1709
1710 if (locked == 0)
1711 socket_lock(pcb->inp_socket, 1);
1712 pcb->inp_state = INPCB_STATE_DEAD;
1713 stopusing:
1714 if (pcb->inp_socket->so_usecount < 0)
1715 panic("in_pcb_checkstate STOP pcb=%x so=%x usecount is negative\n", pcb, pcb->inp_socket);
1716 if (locked == 0)
1717 socket_unlock(pcb->inp_socket, 1);
1718
1719 origwant = *wantcnt;
1720 if ((UInt16) origwant == 0xffff ) /* should stop using */
1721 return (WNT_STOPUSING);
1722 newwant = 0xffff;
1723 if ((UInt16) origwant == 0) {/* try to mark it as unsuable now */
1724 OSCompareAndSwap(origwant, newwant, (UInt32 *) wantcnt) ;
1725 }
1726 return (WNT_STOPUSING);
1727 break;
1728
1729 case WNT_ACQUIRE: /* try to increase reference to pcb */
1730 /* if WNT_STOPUSING should bail out */
1731 /*
1732 * if socket state DEAD, try to set count to STOPUSING, return failed
1733 * otherwise increase cnt
1734 */
1735 do {
1736 origwant = *wantcnt;
1737 if ((UInt16) origwant == 0xffff ) {/* should stop using */
1738 // printf("in_pcb_checkstate: ACQ PCB was STOPUSING while release. odd pcb=%x\n", pcb);
1739 return (WNT_STOPUSING);
1740 }
1741 newwant = origwant + 1;
1742 } while (!OSCompareAndSwap(origwant, newwant, (UInt32 *) wantcnt));
1743 return (WNT_ACQUIRE);
1744 break;
1745
1746 case WNT_RELEASE: /* release reference. if result is null and pcb state is DEAD,
1747 set wanted bit to STOPUSING
1748 */
1749
1750 if (locked == 0)
1751 socket_lock(pcb->inp_socket, 1);
1752
1753 do {
1754 origwant = *wantcnt;
1755 if ((UInt16) origwant == 0x0 )
1756 panic("in_pcb_checkstate pcb=%x release with zero count", pcb);
1757 if ((UInt16) origwant == 0xffff ) {/* should stop using */
1758 #if TEMPDEBUG
1759 printf("in_pcb_checkstate: REL PCB was STOPUSING while release. odd pcb=%x\n", pcb);
1760 #endif
1761 if (locked == 0)
1762 socket_unlock(pcb->inp_socket, 1);
1763 return (WNT_STOPUSING);
1764 }
1765 newwant = origwant - 1;
1766 } while (!OSCompareAndSwap(origwant, newwant, (UInt32 *) wantcnt));
1767
1768 if (pcb->inp_state == INPCB_STATE_DEAD)
1769 goto stopusing;
1770 if (pcb->inp_socket->so_usecount < 0)
1771 panic("in_pcb_checkstate RELEASE pcb=%x so=%x usecount is negative\n", pcb, pcb->inp_socket);
1772
1773 if (locked == 0)
1774 socket_unlock(pcb->inp_socket, 1);
1775 return (WNT_RELEASE);
1776 break;
1777
1778 default:
1779
1780 panic("in_pcb_checkstate: so=%x not a valid state =%x\n", pcb->inp_socket, mode);
1781 }
1782
1783 /* NOTREACHED */
1784 return (mode);
1785 }
1786
1787 /*
1788 * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat.
1789 * The inpcb_compat data structure is passed to user space and must
1790 * not change. We intentionally avoid copying pointers. The socket is
1791 * the one exception, though we probably shouldn't copy that either.
1792 */
1793 void
1794 inpcb_to_compat(
1795 struct inpcb *inp,
1796 struct inpcb_compat *inp_compat)
1797 {
1798 bzero(inp_compat, sizeof(*inp_compat));
1799 inp_compat->inp_fport = inp->inp_fport;
1800 inp_compat->inp_lport = inp->inp_lport;
1801 inp_compat->inp_socket = inp->inp_socket;
1802 inp_compat->nat_owner = inp->nat_owner;
1803 inp_compat->nat_cookie = inp->nat_cookie;
1804 inp_compat->inp_gencnt = inp->inp_gencnt;
1805 inp_compat->inp_flags = inp->inp_flags;
1806 inp_compat->inp_flow = inp->inp_flow;
1807 inp_compat->inp_vflag = inp->inp_vflag;
1808 inp_compat->inp_ip_ttl = inp->inp_ip_ttl;
1809 inp_compat->inp_ip_p = inp->inp_ip_p;
1810 inp_compat->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign;
1811 inp_compat->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local;
1812 inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
1813 inp_compat->inp_depend6.inp6_hlim = inp->inp_depend6.inp6_hlim;
1814 inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
1815 inp_compat->inp6_ifindex = inp->inp6_ifindex;
1816 inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
1817 }
1818
1819 #ifndef __APPLE__
1820 prison_xinpcb(struct proc *p, struct inpcb *inp)
1821 {
1822 if (!p->p_prison)
1823 return (0);
1824 if (ntohl(inp->inp_laddr.s_addr) == p->p_prison->pr_ip)
1825 return (0);
1826 return (1);
1827 }
1828 #endif