]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/in_pcb.c
xnu-792.18.15.tar.gz
[apple/xnu.git] / bsd / netinet / in_pcb.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1991, 1993, 1995
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95
61 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $
62 */
63
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/malloc.h>
67 #include <sys/mbuf.h>
68 #include <sys/domain.h>
69 #include <sys/protosw.h>
70 #include <sys/socket.h>
71 #include <sys/socketvar.h>
72 #include <sys/proc.h>
73 #ifndef __APPLE__
74 #include <sys/jail.h>
75 #endif
76 #include <sys/kernel.h>
77 #include <sys/sysctl.h>
78 #include <libkern/OSAtomic.h>
79
80 #include <machine/limits.h>
81
82 #ifdef __APPLE__
83 #include <kern/zalloc.h>
84 #endif
85
86 #include <net/if.h>
87 #include <net/if_types.h>
88 #include <net/route.h>
89
90 #include <netinet/in.h>
91 #include <netinet/in_pcb.h>
92 #include <netinet/in_var.h>
93 #include <netinet/ip_var.h>
94 #if INET6
95 #include <netinet/ip6.h>
96 #include <netinet6/ip6_var.h>
97 #endif /* INET6 */
98
99 #include "faith.h"
100
101 #if IPSEC
102 #include <netinet6/ipsec.h>
103 #include <netkey/key.h>
104 #endif /* IPSEC */
105
106 #include <sys/kdebug.h>
107
108 #if IPSEC
109 extern int ipsec_bypass;
110 extern lck_mtx_t *sadb_mutex;
111 #endif
112
113 extern u_long route_generation;
114
115 #define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8))
116 #define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1))
117
118 struct in_addr zeroin_addr;
119
120 /*
121 * These configure the range of local port addresses assigned to
122 * "unspecified" outgoing connections/packets/whatever.
123 */
124 int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */
125 int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */
126 #ifndef __APPLE__
127 int ipport_firstauto = IPPORT_RESERVED; /* 1024 */
128 int ipport_lastauto = IPPORT_USERRESERVED; /* 5000 */
129 #else
130 int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
131 int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */
132 #endif
133 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
134 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */
135
136 #define RANGECHK(var, min, max) \
137 if ((var) < (min)) { (var) = (min); } \
138 else if ((var) > (max)) { (var) = (max); }
139
140 static int
141 sysctl_net_ipport_check SYSCTL_HANDLER_ARGS
142 {
143 int error = sysctl_handle_int(oidp,
144 oidp->oid_arg1, oidp->oid_arg2, req);
145 if (!error) {
146 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
147 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
148 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX);
149 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX);
150 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX);
151 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX);
152 }
153 return error;
154 }
155
156 #undef RANGECHK
157
158 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports");
159
160 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW,
161 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
162 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW,
163 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
164 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW,
165 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
166 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW,
167 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
168 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW,
169 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
170 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW,
171 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
172
173 /*
174 * in_pcb.c: manage the Protocol Control Blocks.
175 *
176 * NOTE: It is assumed that most of these functions will be called at
177 * splnet(). XXX - There are, unfortunately, a few exceptions to this
178 * rule that should be fixed.
179 */
180
181 /*
182 * Allocate a PCB and associate it with the socket.
183 */
184 int
185 in_pcballoc(so, pcbinfo, p)
186 struct socket *so;
187 struct inpcbinfo *pcbinfo;
188 struct proc *p;
189 {
190 register struct inpcb *inp;
191 caddr_t temp;
192 #if IPSEC
193 #ifndef __APPLE__
194 int error;
195 #endif
196 #endif
197
198 if (so->cached_in_sock_layer == 0) {
199 #if TEMPDEBUG
200 printf("PCBALLOC calling zalloc for socket %x\n", so);
201 #endif
202 inp = (struct inpcb *) zalloc(pcbinfo->ipi_zone);
203 if (inp == NULL)
204 return (ENOBUFS);
205 bzero((caddr_t)inp, sizeof(*inp));
206 }
207 else {
208 #if TEMPDEBUG
209 printf("PCBALLOC reusing PCB for socket %x\n", so);
210 #endif
211 inp = (struct inpcb *) so->so_saved_pcb;
212 temp = inp->inp_saved_ppcb;
213 bzero((caddr_t) inp, sizeof(*inp));
214 inp->inp_saved_ppcb = temp;
215 }
216
217 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
218 inp->inp_pcbinfo = pcbinfo;
219 inp->inp_socket = so;
220 so->so_pcb = (caddr_t)inp;
221
222 if (so->so_proto->pr_flags & PR_PCBLOCK) {
223 inp->inpcb_mtx = lck_mtx_alloc_init(pcbinfo->mtx_grp, pcbinfo->mtx_attr);
224 if (inp->inpcb_mtx == NULL) {
225 printf("in_pcballoc: can't alloc mutex! so=%x\n", so);
226 return(ENOMEM);
227 }
228 }
229
230 #if IPSEC
231 #ifndef __APPLE__
232 if (ipsec_bypass == 0) {
233 lck_mtx_lock(sadb_mutex);
234 error = ipsec_init_policy(so, &inp->inp_sp);
235 lck_mtx_unlock(sadb_mutex);
236 if (error != 0) {
237 zfree(pcbinfo->ipi_zone, inp);
238 return error;
239 }
240 }
241 #endif
242 #endif /*IPSEC*/
243 #if defined(INET6)
244 if (INP_SOCKAF(so) == AF_INET6 && !ip6_mapped_addr_on)
245 inp->inp_flags |= IN6P_IPV6_V6ONLY;
246 #endif
247
248 #if INET6
249 if (ip6_auto_flowlabel)
250 inp->inp_flags |= IN6P_AUTOFLOWLABEL;
251 #endif
252 lck_rw_lock_exclusive(pcbinfo->mtx);
253 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
254 LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list);
255 pcbinfo->ipi_count++;
256 lck_rw_done(pcbinfo->mtx);
257 return (0);
258 }
259
260 int
261 in_pcbbind(inp, nam, p)
262 register struct inpcb *inp;
263 struct sockaddr *nam;
264 struct proc *p;
265 {
266 register struct socket *so = inp->inp_socket;
267 unsigned short *lastport;
268 struct sockaddr_in *sin;
269 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
270 u_short lport = 0;
271 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
272 int error;
273
274 if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */
275 return (EADDRNOTAVAIL);
276 if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY)
277 return (EINVAL);
278 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
279 wild = 1;
280 socket_unlock(so, 0); /* keep reference on socket */
281 lck_rw_lock_exclusive(pcbinfo->mtx);
282 if (nam) {
283 sin = (struct sockaddr_in *)nam;
284 if (nam->sa_len != sizeof (*sin)) {
285 lck_rw_done(pcbinfo->mtx);
286 socket_lock(so, 0);
287 return (EINVAL);
288 }
289 #ifdef notdef
290 /*
291 * We should check the family, but old programs
292 * incorrectly fail to initialize it.
293 */
294 if (sin->sin_family != AF_INET) {
295 lck_rw_done(pcbinfo->mtx);
296 socket_lock(so, 0);
297 return (EAFNOSUPPORT);
298 }
299 #endif
300 lport = sin->sin_port;
301 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
302 /*
303 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
304 * allow complete duplication of binding if
305 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
306 * and a multicast address is bound on both
307 * new and duplicated sockets.
308 */
309 if (so->so_options & SO_REUSEADDR)
310 reuseport = SO_REUSEADDR|SO_REUSEPORT;
311 } else if (sin->sin_addr.s_addr != INADDR_ANY) {
312 struct ifaddr *ifa;
313 sin->sin_port = 0; /* yech... */
314 if ((ifa = ifa_ifwithaddr((struct sockaddr *)sin)) == 0) {
315 lck_rw_done(pcbinfo->mtx);
316 socket_lock(so, 0);
317 return (EADDRNOTAVAIL);
318 }
319 else {
320 ifafree(ifa);
321 }
322 }
323 if (lport) {
324 struct inpcb *t;
325
326 /* GROSS */
327 if (ntohs(lport) < IPPORT_RESERVED && p &&
328 proc_suser(p)) {
329 lck_rw_done(pcbinfo->mtx);
330 socket_lock(so, 0);
331 return (EACCES);
332 }
333 if (so->so_uid &&
334 !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
335 t = in_pcblookup_local(inp->inp_pcbinfo,
336 sin->sin_addr, lport, INPLOOKUP_WILDCARD);
337 if (t &&
338 (ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
339 ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
340 (t->inp_socket->so_options &
341 SO_REUSEPORT) == 0) &&
342 (so->so_uid != t->inp_socket->so_uid)) {
343 #if INET6
344 if (ntohl(sin->sin_addr.s_addr) !=
345 INADDR_ANY ||
346 ntohl(t->inp_laddr.s_addr) !=
347 INADDR_ANY ||
348 INP_SOCKAF(so) ==
349 INP_SOCKAF(t->inp_socket)) {
350 #endif /* defined(INET6) */
351 lck_rw_done(pcbinfo->mtx);
352 socket_lock(so, 0);
353 return (EADDRINUSE);
354 }
355 }
356 }
357 t = in_pcblookup_local(pcbinfo, sin->sin_addr,
358 lport, wild);
359 if (t &&
360 (reuseport & t->inp_socket->so_options) == 0) {
361 #if INET6
362 if (ip6_mapped_addr_on == 0 ||
363 ntohl(sin->sin_addr.s_addr) !=
364 INADDR_ANY ||
365 ntohl(t->inp_laddr.s_addr) !=
366 INADDR_ANY ||
367 INP_SOCKAF(so) ==
368 INP_SOCKAF(t->inp_socket)) {
369 #endif /* defined(INET6) */
370 lck_rw_done(pcbinfo->mtx);
371 socket_lock(so, 0);
372 return (EADDRINUSE);
373 }
374 }
375 }
376 inp->inp_laddr = sin->sin_addr;
377 }
378 if (lport == 0) {
379 u_short first, last;
380 int count;
381
382 inp->inp_flags |= INP_ANONPORT;
383
384 if (inp->inp_flags & INP_HIGHPORT) {
385 first = ipport_hifirstauto; /* sysctl */
386 last = ipport_hilastauto;
387 lastport = &pcbinfo->lasthi;
388 } else if (inp->inp_flags & INP_LOWPORT) {
389 if (p && (error = proc_suser(p))) {
390 lck_rw_done(pcbinfo->mtx);
391 socket_lock(so, 0);
392 return error;
393 }
394 first = ipport_lowfirstauto; /* 1023 */
395 last = ipport_lowlastauto; /* 600 */
396 lastport = &pcbinfo->lastlow;
397 } else {
398 first = ipport_firstauto; /* sysctl */
399 last = ipport_lastauto;
400 lastport = &pcbinfo->lastport;
401 }
402 /*
403 * Simple check to ensure all ports are not used up causing
404 * a deadlock here.
405 *
406 * We split the two cases (up and down) so that the direction
407 * is not being tested on each round of the loop.
408 */
409 if (first > last) {
410 /*
411 * counting down
412 */
413 count = first - last;
414
415 do {
416 if (count-- < 0) { /* completely used? */
417 lck_rw_done(pcbinfo->mtx);
418 socket_lock(so, 0);
419 inp->inp_laddr.s_addr = INADDR_ANY;
420 return (EADDRNOTAVAIL);
421 }
422 --*lastport;
423 if (*lastport > first || *lastport < last)
424 *lastport = first;
425 lport = htons(*lastport);
426 } while (in_pcblookup_local(pcbinfo,
427 inp->inp_laddr, lport, wild));
428 } else {
429 /*
430 * counting up
431 */
432 count = last - first;
433
434 do {
435 if (count-- < 0) { /* completely used? */
436 lck_rw_done(pcbinfo->mtx);
437 socket_lock(so, 0);
438 inp->inp_laddr.s_addr = INADDR_ANY;
439 return (EADDRNOTAVAIL);
440 }
441 ++*lastport;
442 if (*lastport < first || *lastport > last)
443 *lastport = first;
444 lport = htons(*lastport);
445 } while (in_pcblookup_local(pcbinfo,
446 inp->inp_laddr, lport, wild));
447 }
448 }
449 socket_lock(so, 0);
450 inp->inp_lport = lport;
451 if (in_pcbinshash(inp, 1) != 0) {
452 inp->inp_laddr.s_addr = INADDR_ANY;
453 inp->inp_lport = 0;
454 lck_rw_done(pcbinfo->mtx);
455 return (EAGAIN);
456 }
457 lck_rw_done(pcbinfo->mtx);
458 return (0);
459 }
460
461 /*
462 * Transform old in_pcbconnect() into an inner subroutine for new
463 * in_pcbconnect(): Do some validity-checking on the remote
464 * address (in mbuf 'nam') and then determine local host address
465 * (i.e., which interface) to use to access that remote host.
466 *
467 * This preserves definition of in_pcbconnect(), while supporting a
468 * slightly different version for T/TCP. (This is more than
469 * a bit of a kludge, but cleaning up the internal interfaces would
470 * have forced minor changes in every protocol).
471 */
472
473 int
474 in_pcbladdr(inp, nam, plocal_sin)
475 register struct inpcb *inp;
476 struct sockaddr *nam;
477 struct sockaddr_in **plocal_sin;
478 {
479 struct in_ifaddr *ia;
480 register struct sockaddr_in *sin = (struct sockaddr_in *)nam;
481
482 if (nam->sa_len != sizeof (*sin))
483 return (EINVAL);
484 if (sin->sin_family != AF_INET)
485 return (EAFNOSUPPORT);
486 if (sin->sin_port == 0)
487 return (EADDRNOTAVAIL);
488 lck_mtx_lock(rt_mtx);
489 if (!TAILQ_EMPTY(&in_ifaddrhead)) {
490 /*
491 * If the destination address is INADDR_ANY,
492 * use the primary local address.
493 * If the supplied address is INADDR_BROADCAST,
494 * and the primary interface supports broadcast,
495 * choose the broadcast address for that interface.
496 */
497 #define satosin(sa) ((struct sockaddr_in *)(sa))
498 #define sintosa(sin) ((struct sockaddr *)(sin))
499 #define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
500 if (sin->sin_addr.s_addr == INADDR_ANY)
501 sin->sin_addr = IA_SIN(TAILQ_FIRST(&in_ifaddrhead))->sin_addr;
502 else if (sin->sin_addr.s_addr == (u_long)INADDR_BROADCAST &&
503 (TAILQ_FIRST(&in_ifaddrhead)->ia_ifp->if_flags & IFF_BROADCAST))
504 sin->sin_addr = satosin(&TAILQ_FIRST(&in_ifaddrhead)->ia_broadaddr)->sin_addr;
505 }
506 if (inp->inp_laddr.s_addr == INADDR_ANY) {
507 register struct route *ro;
508
509 ia = (struct in_ifaddr *)0;
510 /*
511 * If route is known or can be allocated now,
512 * our src addr is taken from the i/f, else punt.
513 * Note that we should check the address family of the cached
514 * destination, in case of sharing the cache with IPv6.
515 */
516 ro = &inp->inp_route;
517 if (ro->ro_rt &&
518 (ro->ro_dst.sa_family != AF_INET ||
519 satosin(&ro->ro_dst)->sin_addr.s_addr !=
520 sin->sin_addr.s_addr ||
521 inp->inp_socket->so_options & SO_DONTROUTE ||
522 ro->ro_rt->generation_id != route_generation)) {
523 rtfree_locked(ro->ro_rt);
524 ro->ro_rt = (struct rtentry *)0;
525 }
526 if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/
527 (ro->ro_rt == (struct rtentry *)0 ||
528 ro->ro_rt->rt_ifp == 0)) {
529 /* No route yet, so try to acquire one */
530 bzero(&ro->ro_dst, sizeof(struct sockaddr_in));
531 ro->ro_dst.sa_family = AF_INET;
532 ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
533 ((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
534 sin->sin_addr;
535 rtalloc_ign_locked(ro, 0UL);
536 }
537 /*
538 * If we found a route, use the address
539 * corresponding to the outgoing interface
540 * unless it is the loopback (in case a route
541 * to our address on another net goes to loopback).
542 */
543 if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) {
544 ia = ifatoia(ro->ro_rt->rt_ifa);
545 if (ia)
546 ifaref(&ia->ia_ifa);
547 }
548 if (ia == 0) {
549 u_short fport = sin->sin_port;
550
551 sin->sin_port = 0;
552 ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin)));
553 if (ia == 0) {
554 ia = ifatoia(ifa_ifwithnet(sintosa(sin)));
555 }
556 sin->sin_port = fport;
557 if (ia == 0) {
558 ia = TAILQ_FIRST(&in_ifaddrhead);
559 if (ia)
560 ifaref(&ia->ia_ifa);
561 }
562 if (ia == 0) {
563 lck_mtx_unlock(rt_mtx);
564 return (EADDRNOTAVAIL);
565 }
566 }
567 /*
568 * If the destination address is multicast and an outgoing
569 * interface has been set as a multicast option, use the
570 * address of that interface as our source address.
571 */
572 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
573 inp->inp_moptions != NULL) {
574 struct ip_moptions *imo;
575 struct ifnet *ifp;
576
577 imo = inp->inp_moptions;
578 if (imo->imo_multicast_ifp != NULL && (ia == NULL ||
579 ia->ia_ifp != imo->imo_multicast_ifp)) {
580 ifp = imo->imo_multicast_ifp;
581 if (ia)
582 ifafree(&ia->ia_ifa);
583 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link)
584 if (ia->ia_ifp == ifp)
585 break;
586 if (ia == 0) {
587 lck_mtx_unlock(rt_mtx);
588 return (EADDRNOTAVAIL);
589 }
590 ifaref(ia);
591 }
592 }
593 /*
594 * Don't do pcblookup call here; return interface in plocal_sin
595 * and exit to caller, that will do the lookup.
596 */
597 *plocal_sin = &ia->ia_addr;
598 ifafree(&ia->ia_ifa);
599 }
600 lck_mtx_unlock(rt_mtx);
601 return(0);
602 }
603
604 /*
605 * Outer subroutine:
606 * Connect from a socket to a specified address.
607 * Both address and port must be specified in argument sin.
608 * If don't have a local address for this socket yet,
609 * then pick one.
610 */
611 int
612 in_pcbconnect(inp, nam, p)
613 register struct inpcb *inp;
614 struct sockaddr *nam;
615 struct proc *p;
616 {
617 struct sockaddr_in *ifaddr;
618 struct sockaddr_in *sin = (struct sockaddr_in *)nam;
619 struct inpcb *pcb;
620 int error;
621
622 /*
623 * Call inner routine, to assign local interface address.
624 */
625 if ((error = in_pcbladdr(inp, nam, &ifaddr)) != 0)
626 return(error);
627
628 socket_unlock(inp->inp_socket, 0);
629 pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
630 inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr->sin_addr,
631 inp->inp_lport, 0, NULL);
632 socket_lock(inp->inp_socket, 0);
633 if (pcb != NULL) {
634 in_pcb_checkstate(pcb, WNT_RELEASE, 0);
635 return (EADDRINUSE);
636 }
637 if (inp->inp_laddr.s_addr == INADDR_ANY) {
638 if (inp->inp_lport == 0) {
639 error = in_pcbbind(inp, (struct sockaddr *)0, p);
640 if (error)
641 return (error);
642 }
643 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) {
644 /*lock inversion issue, mostly with udp multicast packets */
645 socket_unlock(inp->inp_socket, 0);
646 lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx);
647 socket_lock(inp->inp_socket, 0);
648 }
649 inp->inp_laddr = ifaddr->sin_addr;
650 inp->inp_flags |= INP_INADDR_ANY;
651 }
652 else {
653 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) {
654 /*lock inversion issue, mostly with udp multicast packets */
655 socket_unlock(inp->inp_socket, 0);
656 lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx);
657 socket_lock(inp->inp_socket, 0);
658 }
659 }
660 inp->inp_faddr = sin->sin_addr;
661 inp->inp_fport = sin->sin_port;
662 in_pcbrehash(inp);
663 lck_rw_done(inp->inp_pcbinfo->mtx);
664 return (0);
665 }
666
667 void
668 in_pcbdisconnect(inp)
669 struct inpcb *inp;
670 {
671
672 inp->inp_faddr.s_addr = INADDR_ANY;
673 inp->inp_fport = 0;
674
675 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) {
676 /*lock inversion issue, mostly with udp multicast packets */
677 socket_unlock(inp->inp_socket, 0);
678 lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx);
679 socket_lock(inp->inp_socket, 0);
680 }
681
682 in_pcbrehash(inp);
683 lck_rw_done(inp->inp_pcbinfo->mtx);
684
685 if (inp->inp_socket->so_state & SS_NOFDREF)
686 in_pcbdetach(inp);
687 }
688
689 void
690 in_pcbdetach(inp)
691 struct inpcb *inp;
692 {
693 struct socket *so = inp->inp_socket;
694 struct rtentry *rt = inp->inp_route.ro_rt;
695
696 if (so->so_pcb == 0) { /* we've been called twice */
697 panic("in_pcbdetach: inp=%x so=%x proto=%x so_pcb is null!\n",
698 inp, so, so->so_proto->pr_protocol);
699 }
700
701 #if IPSEC
702 if (ipsec_bypass == 0) {
703 lck_mtx_lock(sadb_mutex);
704 ipsec4_delete_pcbpolicy(inp);
705 lck_mtx_unlock(sadb_mutex);
706 }
707 #endif /*IPSEC*/
708
709 /* mark socket state as dead */
710 if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING)
711 panic("in_pcbdetach so=%x prot=%x couldn't set to STOPUSING\n", so, so->so_proto->pr_protocol);
712
713 #if TEMPDEBUG
714 if (so->cached_in_sock_layer)
715 printf("in_pcbdetach for cached socket %x flags=%x\n", so, so->so_flags);
716 else
717 printf("in_pcbdetach for allocated socket %x flags=%x\n", so, so->so_flags);
718 #endif
719 if ((so->so_flags & SOF_PCBCLEARING) == 0) {
720 inp->inp_vflag = 0;
721 if (inp->inp_options)
722 (void)m_free(inp->inp_options);
723 if (rt) {
724 /*
725 * route deletion requires reference count to be <= zero
726 */
727 lck_mtx_lock(rt_mtx);
728 if ((rt->rt_flags & RTF_DELCLONE) &&
729 (rt->rt_flags & RTF_WASCLONED) &&
730 (rt->rt_refcnt <= 1)) {
731 rtunref(rt);
732 rt->rt_flags &= ~RTF_UP;
733 rtrequest_locked(RTM_DELETE, rt_key(rt),
734 rt->rt_gateway, rt_mask(rt),
735 rt->rt_flags, (struct rtentry **)0);
736 }
737 else {
738 rtfree_locked(rt);
739 inp->inp_route.ro_rt = 0;
740 }
741 lck_mtx_unlock(rt_mtx);
742 }
743 ip_freemoptions(inp->inp_moptions);
744 inp->inp_moptions = NULL;
745 sofreelastref(so, 0);
746 inp->inp_state = INPCB_STATE_DEAD;
747 so->so_flags |= SOF_PCBCLEARING; /* makes sure we're not called twice from so_close */
748 }
749 }
750
751
752 void
753 in_pcbdispose(inp)
754 struct inpcb *inp;
755 {
756 struct socket *so = inp->inp_socket;
757 struct inpcbinfo *ipi = inp->inp_pcbinfo;
758
759 #if TEMPDEBUG
760 if (inp->inp_state != INPCB_STATE_DEAD) {
761 printf("in_pcbdispose: not dead yet? so=%x\n", so);
762 }
763 #endif
764
765 if (so && so->so_usecount != 0)
766 panic("in_pcbdispose: use count=%x so=%x\n", so->so_usecount, so);
767
768
769 inp->inp_gencnt = ++ipi->ipi_gencnt;
770 /*### access ipi in in_pcbremlists */
771 in_pcbremlists(inp);
772
773 if (so) {
774 if (so->so_proto->pr_flags & PR_PCBLOCK) {
775 sofreelastref(so, 0);
776 if (so->so_rcv.sb_cc || so->so_snd.sb_cc) {
777 #if TEMPDEBUG
778 printf("in_pcbdispose sb not cleaned up so=%x rc_cci=%x snd_cc=%x\n",
779 so, so->so_rcv.sb_cc, so->so_snd.sb_cc);
780 #endif
781 sbrelease(&so->so_rcv);
782 sbrelease(&so->so_snd);
783 }
784 if (so->so_head != NULL)
785 panic("in_pcbdispose, so=%x head still exist\n", so);
786 lck_mtx_unlock(inp->inpcb_mtx);
787 lck_mtx_free(inp->inpcb_mtx, ipi->mtx_grp);
788 }
789 so->so_flags |= SOF_PCBCLEARING; /* makes sure we're not called twice from so_close */
790 so->so_saved_pcb = (caddr_t) inp;
791 so->so_pcb = 0;
792 inp->inp_socket = 0;
793 inp->reserved[0] = so;
794 if (so->cached_in_sock_layer == 0) {
795 zfree(ipi->ipi_zone, inp);
796 }
797 sodealloc(so);
798 }
799 #if TEMPDEBUG
800 else
801 printf("in_pcbdispose: no socket for inp=%x\n", inp);
802 #endif
803 }
804
805 /*
806 * The calling convention of in_setsockaddr() and in_setpeeraddr() was
807 * modified to match the pru_sockaddr() and pru_peeraddr() entry points
808 * in struct pr_usrreqs, so that protocols can just reference then directly
809 * without the need for a wrapper function. The socket must have a valid
810 * (i.e., non-nil) PCB, but it should be impossible to get an invalid one
811 * except through a kernel programming error, so it is acceptable to panic
812 * (or in this case trap) if the PCB is invalid. (Actually, we don't trap
813 * because there actually /is/ a programming error somewhere... XXX)
814 */
815 int
816 in_setsockaddr(so, nam)
817 struct socket *so;
818 struct sockaddr **nam;
819 {
820 register struct inpcb *inp;
821 register struct sockaddr_in *sin;
822
823 /*
824 * Do the malloc first in case it blocks.
825 */
826 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_WAITOK);
827 if (sin == NULL)
828 return ENOBUFS;
829 bzero(sin, sizeof *sin);
830 sin->sin_family = AF_INET;
831 sin->sin_len = sizeof(*sin);
832
833 inp = sotoinpcb(so);
834 if (!inp) {
835 FREE(sin, M_SONAME);
836 return ECONNRESET;
837 }
838 sin->sin_port = inp->inp_lport;
839 sin->sin_addr = inp->inp_laddr;
840
841 *nam = (struct sockaddr *)sin;
842 return 0;
843 }
844
845 int
846 in_setpeeraddr(so, nam)
847 struct socket *so;
848 struct sockaddr **nam;
849 {
850 struct inpcb *inp;
851 register struct sockaddr_in *sin;
852
853 /*
854 * Do the malloc first in case it blocks.
855 */
856 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_WAITOK);
857 if (sin == NULL)
858 return ENOBUFS;
859 bzero((caddr_t)sin, sizeof (*sin));
860 sin->sin_family = AF_INET;
861 sin->sin_len = sizeof(*sin);
862
863 inp = sotoinpcb(so);
864 if (!inp) {
865 FREE(sin, M_SONAME);
866 return ECONNRESET;
867 }
868 sin->sin_port = inp->inp_fport;
869 sin->sin_addr = inp->inp_faddr;
870
871 *nam = (struct sockaddr *)sin;
872 return 0;
873 }
874
875 void
876 in_pcbnotifyall(pcbinfo, faddr, errno, notify)
877 struct inpcbinfo *pcbinfo;
878 struct in_addr faddr;
879 void (*notify) (struct inpcb *, int);
880 {
881 struct inpcb *inp;
882
883 lck_rw_lock_shared(pcbinfo->mtx);
884
885 LIST_FOREACH(inp, pcbinfo->listhead, inp_list) {
886 #if INET6
887 if ((inp->inp_vflag & INP_IPV4) == 0)
888 continue;
889 #endif
890 if (inp->inp_faddr.s_addr != faddr.s_addr ||
891 inp->inp_socket == NULL)
892 continue;
893 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING)
894 continue;
895 socket_lock(inp->inp_socket, 1);
896 (*notify)(inp, errno);
897 (void)in_pcb_checkstate(inp, WNT_RELEASE, 1);
898 socket_unlock(inp->inp_socket, 1);
899 }
900 lck_rw_done(pcbinfo->mtx);
901 }
902
903 void
904 in_pcbpurgeif0(
905 struct inpcb *head,
906 struct ifnet *ifp)
907 {
908 struct inpcb *inp;
909 struct ip_moptions *imo;
910 int i, gap;
911
912 for (inp = head; inp != NULL; inp = LIST_NEXT(inp, inp_list)) {
913 imo = inp->inp_moptions;
914 if ((inp->inp_vflag & INP_IPV4) &&
915 imo != NULL) {
916 /*
917 * Unselect the outgoing interface if it is being
918 * detached.
919 */
920 if (imo->imo_multicast_ifp == ifp)
921 imo->imo_multicast_ifp = NULL;
922
923 /*
924 * Drop multicast group membership if we joined
925 * through the interface being detached.
926 */
927 for (i = 0, gap = 0; i < imo->imo_num_memberships;
928 i++) {
929 if (imo->imo_membership[i]->inm_ifp == ifp) {
930 in_delmulti(&imo->imo_membership[i]);
931 gap++;
932 } else if (gap != 0)
933 imo->imo_membership[i - gap] =
934 imo->imo_membership[i];
935 }
936 imo->imo_num_memberships -= gap;
937 }
938 }
939 }
940
941 /*
942 * Check for alternatives when higher level complains
943 * about service problems. For now, invalidate cached
944 * routing information. If the route was created dynamically
945 * (by a redirect), time to try a default gateway again.
946 */
947 void
948 in_losing(inp)
949 struct inpcb *inp;
950 {
951 register struct rtentry *rt;
952 struct rt_addrinfo info;
953
954 if ((rt = inp->inp_route.ro_rt)) {
955 lck_mtx_lock(rt_mtx);
956 bzero((caddr_t)&info, sizeof(info));
957 info.rti_info[RTAX_DST] =
958 (struct sockaddr *)&inp->inp_route.ro_dst;
959 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
960 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
961 rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
962 if (rt->rt_flags & RTF_DYNAMIC)
963 (void) rtrequest_locked(RTM_DELETE, rt_key(rt),
964 rt->rt_gateway, rt_mask(rt), rt->rt_flags,
965 (struct rtentry **)0);
966 inp->inp_route.ro_rt = 0;
967 rtfree_locked(rt);
968 lck_mtx_unlock(rt_mtx);
969 /*
970 * A new route can be allocated
971 * the next time output is attempted.
972 */
973 }
974 }
975
976 /*
977 * After a routing change, flush old routing
978 * and allocate a (hopefully) better one.
979 */
980 void
981 in_rtchange(inp, errno)
982 register struct inpcb *inp;
983 int errno;
984 {
985 if (inp->inp_route.ro_rt) {
986 if ((ifa_foraddr(inp->inp_laddr.s_addr)) == 0)
987 return; /* we can't remove the route now. not sure if still ok to use src */
988 rtfree(inp->inp_route.ro_rt);
989 inp->inp_route.ro_rt = 0;
990 /*
991 * A new route can be allocated the next time
992 * output is attempted.
993 */
994 }
995 }
996
997 /*
998 * Lookup a PCB based on the local address and port.
999 */
1000 struct inpcb *
1001 in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay)
1002 struct inpcbinfo *pcbinfo;
1003 struct in_addr laddr;
1004 u_int lport_arg;
1005 int wild_okay;
1006 {
1007 register struct inpcb *inp;
1008 int matchwild = 3, wildcard;
1009 u_short lport = lport_arg;
1010
1011 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0,0,0,0,0);
1012
1013 if (!wild_okay) {
1014 struct inpcbhead *head;
1015 /*
1016 * Look for an unconnected (wildcard foreign addr) PCB that
1017 * matches the local address and port we're looking for.
1018 */
1019 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
1020 LIST_FOREACH(inp, head, inp_hash) {
1021 #if INET6
1022 if ((inp->inp_vflag & INP_IPV4) == 0)
1023 continue;
1024 #endif
1025 if (inp->inp_faddr.s_addr == INADDR_ANY &&
1026 inp->inp_laddr.s_addr == laddr.s_addr &&
1027 inp->inp_lport == lport) {
1028 /*
1029 * Found.
1030 */
1031 return (inp);
1032 }
1033 }
1034 /*
1035 * Not found.
1036 */
1037 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0,0,0,0,0);
1038 return (NULL);
1039 } else {
1040 struct inpcbporthead *porthash;
1041 struct inpcbport *phd;
1042 struct inpcb *match = NULL;
1043 /*
1044 * Best fit PCB lookup.
1045 *
1046 * First see if this local port is in use by looking on the
1047 * port hash list.
1048 */
1049 porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport,
1050 pcbinfo->porthashmask)];
1051 LIST_FOREACH(phd, porthash, phd_hash) {
1052 if (phd->phd_port == lport)
1053 break;
1054 }
1055 if (phd != NULL) {
1056 /*
1057 * Port is in use by one or more PCBs. Look for best
1058 * fit.
1059 */
1060 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
1061 wildcard = 0;
1062 #if INET6
1063 if ((inp->inp_vflag & INP_IPV4) == 0)
1064 continue;
1065 #endif
1066 if (inp->inp_faddr.s_addr != INADDR_ANY)
1067 wildcard++;
1068 if (inp->inp_laddr.s_addr != INADDR_ANY) {
1069 if (laddr.s_addr == INADDR_ANY)
1070 wildcard++;
1071 else if (inp->inp_laddr.s_addr != laddr.s_addr)
1072 continue;
1073 } else {
1074 if (laddr.s_addr != INADDR_ANY)
1075 wildcard++;
1076 }
1077 if (wildcard < matchwild) {
1078 match = inp;
1079 matchwild = wildcard;
1080 if (matchwild == 0) {
1081 break;
1082 }
1083 }
1084 }
1085 }
1086 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match,0,0,0,0);
1087 return (match);
1088 }
1089 }
1090
1091 /*
1092 * Lookup PCB in hash list.
1093 */
1094 struct inpcb *
1095 in_pcblookup_hash(
1096 struct inpcbinfo *pcbinfo,
1097 struct in_addr faddr,
1098 u_int fport_arg,
1099 struct in_addr laddr,
1100 u_int lport_arg,
1101 int wildcard,
1102 struct ifnet *ifp)
1103 {
1104 struct inpcbhead *head;
1105 register struct inpcb *inp;
1106 u_short fport = fport_arg, lport = lport_arg;
1107
1108 /*
1109 * We may have found the pcb in the last lookup - check this first.
1110 */
1111
1112 lck_rw_lock_shared(pcbinfo->mtx);
1113
1114 /*
1115 * First look for an exact match.
1116 */
1117 head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)];
1118 LIST_FOREACH(inp, head, inp_hash) {
1119 #if INET6
1120 if ((inp->inp_vflag & INP_IPV4) == 0)
1121 continue;
1122 #endif
1123 if (inp->inp_faddr.s_addr == faddr.s_addr &&
1124 inp->inp_laddr.s_addr == laddr.s_addr &&
1125 inp->inp_fport == fport &&
1126 inp->inp_lport == lport) {
1127 /*
1128 * Found.
1129 */
1130 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1131 lck_rw_done(pcbinfo->mtx);
1132 return (inp);
1133 }
1134 else { /* it's there but dead, say it isn't found */
1135 lck_rw_done(pcbinfo->mtx);
1136 return(NULL);
1137 }
1138 }
1139 }
1140 if (wildcard) {
1141 struct inpcb *local_wild = NULL;
1142 #if INET6
1143 struct inpcb *local_wild_mapped = NULL;
1144 #endif
1145
1146 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
1147 LIST_FOREACH(inp, head, inp_hash) {
1148 #if INET6
1149 if ((inp->inp_vflag & INP_IPV4) == 0)
1150 continue;
1151 #endif
1152 if (inp->inp_faddr.s_addr == INADDR_ANY &&
1153 inp->inp_lport == lport) {
1154 #if defined(NFAITH) && NFAITH > 0
1155 if (ifp && ifp->if_type == IFT_FAITH &&
1156 (inp->inp_flags & INP_FAITH) == 0)
1157 continue;
1158 #endif
1159 if (inp->inp_laddr.s_addr == laddr.s_addr) {
1160 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1161 lck_rw_done(pcbinfo->mtx);
1162 return (inp);
1163 }
1164 else { /* it's there but dead, say it isn't found */
1165 lck_rw_done(pcbinfo->mtx);
1166 return(NULL);
1167 }
1168 }
1169 else if (inp->inp_laddr.s_addr == INADDR_ANY) {
1170 #if defined(INET6)
1171 if (INP_CHECK_SOCKAF(inp->inp_socket,
1172 AF_INET6))
1173 local_wild_mapped = inp;
1174 else
1175 #endif /* defined(INET6) */
1176 local_wild = inp;
1177 }
1178 }
1179 }
1180 #if defined(INET6)
1181 if (local_wild == NULL) {
1182 if (local_wild_mapped != NULL) {
1183 if (in_pcb_checkstate(local_wild_mapped, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1184 lck_rw_done(pcbinfo->mtx);
1185 return (local_wild_mapped);
1186 }
1187 else { /* it's there but dead, say it isn't found */
1188 lck_rw_done(pcbinfo->mtx);
1189 return(NULL);
1190 }
1191 }
1192 lck_rw_done(pcbinfo->mtx);
1193 return (NULL);
1194 }
1195 #endif /* defined(INET6) */
1196 if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1197 lck_rw_done(pcbinfo->mtx);
1198 return (local_wild);
1199 }
1200 else { /* it's there but dead, say it isn't found */
1201 lck_rw_done(pcbinfo->mtx);
1202 return(NULL);
1203 }
1204 }
1205
1206 /*
1207 * Not found.
1208 */
1209 lck_rw_done(pcbinfo->mtx);
1210 return (NULL);
1211 }
1212
1213 /*
1214 * Insert PCB onto various hash lists.
1215 */
1216 int
1217 in_pcbinshash(inp, locked)
1218 struct inpcb *inp;
1219 int locked; /* list already locked exclusive */
1220 {
1221 struct inpcbhead *pcbhash;
1222 struct inpcbporthead *pcbporthash;
1223 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
1224 struct inpcbport *phd;
1225 u_int32_t hashkey_faddr;
1226
1227 #if INET6
1228 if (inp->inp_vflag & INP_IPV6)
1229 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
1230 else
1231 #endif /* INET6 */
1232 hashkey_faddr = inp->inp_faddr.s_addr;
1233
1234 inp->hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport, inp->inp_fport, pcbinfo->hashmask);
1235
1236 if (!locked) {
1237 if (!lck_rw_try_lock_exclusive(pcbinfo->mtx)) {
1238 /*lock inversion issue, mostly with udp multicast packets */
1239 socket_unlock(inp->inp_socket, 0);
1240 lck_rw_lock_exclusive(pcbinfo->mtx);
1241 socket_lock(inp->inp_socket, 0);
1242 }
1243 }
1244
1245 pcbhash = &pcbinfo->hashbase[inp->hash_element];
1246
1247 pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport,
1248 pcbinfo->porthashmask)];
1249
1250 /*
1251 * Go through port list and look for a head for this lport.
1252 */
1253 LIST_FOREACH(phd, pcbporthash, phd_hash) {
1254 if (phd->phd_port == inp->inp_lport)
1255 break;
1256 }
1257 /*
1258 * If none exists, malloc one and tack it on.
1259 */
1260 if (phd == NULL) {
1261 MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_WAITOK);
1262 if (phd == NULL) {
1263 if (!locked)
1264 lck_rw_done(pcbinfo->mtx);
1265 return (ENOBUFS); /* XXX */
1266 }
1267 phd->phd_port = inp->inp_lport;
1268 LIST_INIT(&phd->phd_pcblist);
1269 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
1270 }
1271 inp->inp_phd = phd;
1272 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
1273 LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
1274 if (!locked)
1275 lck_rw_done(pcbinfo->mtx);
1276 return (0);
1277 }
1278
1279 /*
1280 * Move PCB to the proper hash bucket when { faddr, fport } have been
1281 * changed. NOTE: This does not handle the case of the lport changing (the
1282 * hashed port list would have to be updated as well), so the lport must
1283 * not change after in_pcbinshash() has been called.
1284 */
1285 void
1286 in_pcbrehash(inp)
1287 struct inpcb *inp;
1288 {
1289 struct inpcbhead *head;
1290 u_int32_t hashkey_faddr;
1291
1292 #if INET6
1293 if (inp->inp_vflag & INP_IPV6)
1294 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
1295 else
1296 #endif /* INET6 */
1297 hashkey_faddr = inp->inp_faddr.s_addr;
1298 inp->hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
1299 inp->inp_fport, inp->inp_pcbinfo->hashmask);
1300 head = &inp->inp_pcbinfo->hashbase[inp->hash_element];
1301
1302 LIST_REMOVE(inp, inp_hash);
1303 LIST_INSERT_HEAD(head, inp, inp_hash);
1304 }
1305
1306 /*
1307 * Remove PCB from various lists.
1308 */
1309 //###LOCK must be called with list lock held
1310 void
1311 in_pcbremlists(inp)
1312 struct inpcb *inp;
1313 {
1314 inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
1315
1316 if (inp->inp_lport) {
1317 struct inpcbport *phd = inp->inp_phd;
1318
1319 LIST_REMOVE(inp, inp_hash);
1320 LIST_REMOVE(inp, inp_portlist);
1321 if (phd != NULL && (LIST_FIRST(&phd->phd_pcblist) == NULL)) {
1322 LIST_REMOVE(phd, phd_hash);
1323 FREE(phd, M_PCB);
1324 }
1325 }
1326 LIST_REMOVE(inp, inp_list);
1327 inp->inp_pcbinfo->ipi_count--;
1328 }
1329
1330 static void in_pcb_detach_port( struct inpcb *inp);
1331 int
1332 in_pcb_grab_port (struct inpcbinfo *pcbinfo,
1333 u_short options,
1334 struct in_addr laddr,
1335 u_short *lport,
1336 struct in_addr faddr,
1337 u_short fport,
1338 u_int cookie,
1339 u_char owner_id)
1340 {
1341 struct inpcb *inp, *pcb;
1342 struct sockaddr_in sin;
1343 struct proc *p = current_proc();
1344 int stat;
1345
1346
1347 pcbinfo->nat_dummy_socket.so_pcb = 0;
1348 pcbinfo->nat_dummy_socket.so_options = 0;
1349 if (*lport) {
1350 /* The grabber wants a particular port */
1351
1352 if (faddr.s_addr || fport) {
1353 /*
1354 * This is either the second half of an active connect, or
1355 * it's from the acceptance of an incoming connection.
1356 */
1357 if (laddr.s_addr == 0) {
1358 pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb;
1359 return EINVAL;
1360 }
1361
1362 inp = in_pcblookup_hash(pcbinfo, faddr, fport, laddr, *lport, 0, NULL);
1363 if (inp) {
1364 /* pcb was found, its count was upped. need to decrease it here */
1365 in_pcb_checkstate(inp, WNT_RELEASE, 0);
1366 if (!(IN_MULTICAST(ntohl(laddr.s_addr)))) {
1367 pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb;
1368 return (EADDRINUSE);
1369 }
1370 }
1371
1372 stat = in_pcballoc(&pcbinfo->nat_dummy_socket, pcbinfo, p);
1373 if (stat) {
1374 pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb;
1375 return stat;
1376 }
1377 pcb = sotoinpcb(&pcbinfo->nat_dummy_socket);
1378 pcb->inp_vflag |= INP_IPV4;
1379
1380 pcb->inp_lport = *lport;
1381 pcb->inp_laddr.s_addr = laddr.s_addr;
1382
1383 pcb->inp_faddr = faddr;
1384 pcb->inp_fport = fport;
1385
1386 lck_rw_lock_exclusive(pcbinfo->mtx);
1387 in_pcbinshash(pcb, 1);
1388 lck_rw_done(pcbinfo->mtx);
1389 }
1390 else {
1391 /*
1392 * This is either a bind for a passive socket, or it's the
1393 * first part of bind-connect sequence (not likely since an
1394 * ephemeral port is usually used in this case). Or, it's
1395 * the result of a connection acceptance when the foreign
1396 * address/port cannot be provided (which requires the SO_REUSEADDR
1397 * flag if laddr is not multicast).
1398 */
1399
1400 stat = in_pcballoc(&pcbinfo->nat_dummy_socket, pcbinfo, p);
1401 if (stat) {
1402 pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb;
1403 return stat;
1404 }
1405 pcb = sotoinpcb(&pcbinfo->nat_dummy_socket);
1406 pcb->inp_vflag |= INP_IPV4;
1407
1408 pcbinfo->nat_dummy_socket.so_options = options;
1409 bzero(&sin, sizeof(struct sockaddr_in));
1410 sin.sin_len = sizeof(struct sockaddr_in);
1411 sin.sin_family = AF_INET;
1412 sin.sin_addr.s_addr = laddr.s_addr;
1413 sin.sin_port = *lport;
1414
1415 socket_lock(&pcbinfo->nat_dummy_socket, 1);
1416 stat = in_pcbbind((struct inpcb *) pcbinfo->nat_dummy_socket.so_pcb,
1417 (struct sockaddr *) &sin, p);
1418 if (stat) {
1419 socket_unlock(&pcbinfo->nat_dummy_socket, 1); /*detach first */
1420 in_pcb_detach_port(pcb); /* will restore dummy pcb */
1421 return stat;
1422 }
1423 socket_unlock(&pcbinfo->nat_dummy_socket, 1);
1424 }
1425 }
1426 else {
1427 /* The grabber wants an ephemeral port */
1428
1429 stat = in_pcballoc(&pcbinfo->nat_dummy_socket, pcbinfo, p);
1430 if (stat) {
1431 pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb;
1432 return stat;
1433 }
1434 pcb = sotoinpcb(&pcbinfo->nat_dummy_socket);
1435 pcb->inp_vflag |= INP_IPV4;
1436
1437 bzero(&sin, sizeof(struct sockaddr_in));
1438 sin.sin_len = sizeof(struct sockaddr_in);
1439 sin.sin_family = AF_INET;
1440 sin.sin_addr.s_addr = laddr.s_addr;
1441 sin.sin_port = 0;
1442
1443 if (faddr.s_addr || fport) {
1444 /*
1445 * Not sure if this case will be used - could occur when connect
1446 * is called, skipping the bind.
1447 */
1448
1449 if (laddr.s_addr == 0) {
1450 in_pcb_detach_port(pcb); /* restores dummy pcb */
1451 return EINVAL;
1452 }
1453
1454 socket_lock(&pcbinfo->nat_dummy_socket, 1);
1455 stat = in_pcbbind((struct inpcb *) pcbinfo->nat_dummy_socket.so_pcb,
1456 (struct sockaddr *) &sin, p);
1457 if (stat) {
1458 socket_unlock(&pcbinfo->nat_dummy_socket, 1);
1459 in_pcb_detach_port(pcb); /* restores dummy pcb */
1460 return stat;
1461 }
1462
1463 socket_unlock(&pcbinfo->nat_dummy_socket, 1);
1464 inp = in_pcblookup_hash(pcbinfo, faddr, fport,
1465 pcb->inp_laddr, pcb->inp_lport, 0, NULL);
1466 if (inp) {
1467 /* pcb was found, its count was upped. need to decrease it here */
1468 in_pcb_checkstate(inp, WNT_RELEASE, 0);
1469 in_pcb_detach_port(pcb);
1470 return (EADDRINUSE);
1471 }
1472
1473 lck_rw_lock_exclusive(pcbinfo->mtx);
1474 pcb->inp_faddr = faddr;
1475 pcb->inp_fport = fport;
1476 in_pcbrehash(pcb);
1477 lck_rw_done(pcbinfo->mtx);
1478 }
1479 else {
1480 /*
1481 * This is a simple bind of an ephemeral port. The local addr
1482 * may or may not be defined.
1483 */
1484
1485 socket_lock(&pcbinfo->nat_dummy_socket, 1);
1486 stat = in_pcbbind((struct inpcb *) pcbinfo->nat_dummy_socket.so_pcb,
1487 (struct sockaddr *) &sin, p);
1488 if (stat) {
1489 socket_unlock(&pcbinfo->nat_dummy_socket, 1);
1490 in_pcb_detach_port(pcb);
1491 return stat;
1492 }
1493 socket_unlock(&pcbinfo->nat_dummy_socket, 1);
1494 }
1495 *lport = pcb->inp_lport;
1496 }
1497
1498
1499 pcb->nat_owner = owner_id;
1500 pcb->nat_cookie = cookie;
1501 pcb->inp_ppcb = (caddr_t) pcbinfo->dummy_cb;
1502 pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb; /* restores dummypcb */
1503 return 0;
1504 }
1505
1506 /* 3962035 - in_pcb_letgo_port needs a special case function for detaching */
1507 static void
1508 in_pcb_detach_port(
1509 struct inpcb *inp)
1510 {
1511 struct socket *so = inp->inp_socket;
1512 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
1513
1514 if (so != &pcbinfo->nat_dummy_socket)
1515 panic("in_pcb_detach_port: not a dummy_sock: so=%x, inp=%x\n", so, inp);
1516 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
1517 /*### access ipi in in_pcbremlists */
1518 in_pcbremlists(inp);
1519
1520 inp->inp_socket = 0;
1521 inp->reserved[0] = so;
1522 zfree(pcbinfo->ipi_zone, inp);
1523 pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb; /* restores dummypcb */
1524 }
1525
1526 int
1527 in_pcb_letgo_port(struct inpcbinfo *pcbinfo, struct in_addr laddr, u_short lport,
1528 struct in_addr faddr, u_short fport, u_char owner_id)
1529 {
1530 struct inpcbhead *head;
1531 register struct inpcb *inp;
1532
1533
1534 /*
1535 * First look for an exact match.
1536 */
1537
1538 lck_rw_lock_exclusive(pcbinfo->mtx);
1539 head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)];
1540 for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) {
1541 if (inp->inp_faddr.s_addr == faddr.s_addr &&
1542 inp->inp_laddr.s_addr == laddr.s_addr &&
1543 inp->inp_fport == fport &&
1544 inp->inp_lport == lport &&
1545 inp->nat_owner == owner_id) {
1546 /*
1547 * Found.
1548 */
1549 in_pcb_detach_port(inp);
1550 lck_rw_done(pcbinfo->mtx);
1551 return 0;
1552 }
1553 }
1554
1555 lck_rw_done(pcbinfo->mtx);
1556 return ENOENT;
1557 }
1558
1559 u_char
1560 in_pcb_get_owner(struct inpcbinfo *pcbinfo,
1561 struct in_addr laddr, u_short lport,
1562 struct in_addr faddr, u_short fport,
1563 u_int *cookie)
1564
1565 {
1566 struct inpcb *inp;
1567 u_char owner_id = INPCB_NO_OWNER;
1568 struct inpcbport *phd;
1569 struct inpcbporthead *porthash;
1570
1571
1572 if (IN_MULTICAST(laddr.s_addr)) {
1573 /*
1574 * Walk through PCB's looking for registered
1575 * owners.
1576 */
1577
1578 lck_rw_lock_shared(pcbinfo->mtx);
1579 porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport,
1580 pcbinfo->porthashmask)];
1581 for (phd = porthash->lh_first; phd != NULL; phd = phd->phd_hash.le_next) {
1582 if (phd->phd_port == lport)
1583 break;
1584 }
1585
1586 if (phd == 0) {
1587 lck_rw_done(pcbinfo->mtx);
1588 return INPCB_NO_OWNER;
1589 }
1590
1591 owner_id = INPCB_NO_OWNER;
1592 for (inp = phd->phd_pcblist.lh_first; inp != NULL;
1593 inp = inp->inp_portlist.le_next) {
1594
1595 if (inp->inp_laddr.s_addr == laddr.s_addr) {
1596 if (inp->nat_owner == 0)
1597 owner_id |= INPCB_OWNED_BY_X;
1598 else
1599 owner_id |= inp->nat_owner;
1600 }
1601 }
1602
1603 lck_rw_done(pcbinfo->mtx);
1604 return owner_id;
1605 }
1606 else {
1607 inp = in_pcblookup_hash(pcbinfo, faddr, fport,
1608 laddr, lport, 1, NULL);
1609 if (inp) {
1610 /* pcb was found, its count was upped. need to decrease it here */
1611 /* if we found it, that pcb is already locked by the caller */
1612 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING)
1613 return(INPCB_NO_OWNER);
1614
1615 if (inp->nat_owner) {
1616 owner_id = inp->nat_owner;
1617 *cookie = inp->nat_cookie;
1618 }
1619 else {
1620 owner_id = INPCB_OWNED_BY_X;
1621 }
1622 }
1623 else
1624 owner_id = INPCB_NO_OWNER;
1625
1626 return owner_id;
1627 }
1628 }
1629
1630 int
1631 in_pcb_new_share_client(struct inpcbinfo *pcbinfo, u_char *owner_id)
1632 {
1633
1634 int i;
1635
1636
1637 for (i=0; i < INPCB_MAX_IDS; i++) {
1638 if ((pcbinfo->all_owners & (1 << i)) == 0) {
1639 pcbinfo->all_owners |= (1 << i);
1640 *owner_id = (1 << i);
1641 return 0;
1642 }
1643 }
1644
1645 return ENOSPC;
1646 }
1647
1648 int
1649 in_pcb_rem_share_client(struct inpcbinfo *pcbinfo, u_char owner_id)
1650 {
1651 struct inpcb *inp;
1652
1653
1654 lck_rw_lock_exclusive(pcbinfo->mtx);
1655 if (pcbinfo->all_owners & owner_id) {
1656 pcbinfo->all_owners &= ~owner_id;
1657 for (inp = pcbinfo->listhead->lh_first; inp != NULL; inp = inp->inp_list.le_next) {
1658 if (inp->nat_owner & owner_id) {
1659 if (inp->nat_owner == owner_id)
1660 /*
1661 * Deallocate the pcb
1662 */
1663 in_pcb_detach_port(inp);
1664 else
1665 inp->nat_owner &= ~owner_id;
1666 }
1667 }
1668 }
1669 else {
1670 lck_rw_done(pcbinfo->mtx);
1671 return ENOENT;
1672 }
1673
1674 lck_rw_done(pcbinfo->mtx);
1675 return 0;
1676 }
1677
1678
1679
1680 void in_pcb_nat_init(struct inpcbinfo *pcbinfo, int afamily,
1681 int pfamily, int protocol)
1682 {
1683 int stat;
1684 struct proc *p = current_proc();
1685
1686 bzero(&pcbinfo->nat_dummy_socket, sizeof(struct socket));
1687 pcbinfo->nat_dummy_socket.so_proto = pffindproto_locked(afamily, pfamily, protocol);
1688 pcbinfo->all_owners = 0;
1689 stat = in_pcballoc(&pcbinfo->nat_dummy_socket, pcbinfo, p);
1690 if (stat)
1691 panic("in_pcb_nat_init: can't alloc fakepcb err=%\n", stat);
1692 pcbinfo->nat_dummy_pcb = pcbinfo->nat_dummy_socket.so_pcb;
1693 }
1694
1695 /* Mechanism used to defer the memory release of PCBs
1696 * The pcb list will contain the pcb until the ripper can clean it up if
1697 * the following conditions are met: 1) state "DEAD", 2) wantcnt is STOPUSING
1698 * 3) usecount is null
1699 * This function will be called to either mark the pcb as
1700 */
1701 int
1702 in_pcb_checkstate(struct inpcb *pcb, int mode, int locked)
1703
1704 {
1705
1706 volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt;
1707 UInt32 origwant;
1708 UInt32 newwant;
1709
1710 switch (mode) {
1711
1712 case WNT_STOPUSING: /* try to mark the pcb as ready for recycling */
1713
1714 /* compareswap with STOPUSING, if success we're good, if it's in use, will be marked later */
1715
1716 if (locked == 0)
1717 socket_lock(pcb->inp_socket, 1);
1718 pcb->inp_state = INPCB_STATE_DEAD;
1719 stopusing:
1720 if (pcb->inp_socket->so_usecount < 0)
1721 panic("in_pcb_checkstate STOP pcb=%x so=%x usecount is negative\n", pcb, pcb->inp_socket);
1722 if (locked == 0)
1723 socket_unlock(pcb->inp_socket, 1);
1724
1725 origwant = *wantcnt;
1726 if ((UInt16) origwant == 0xffff ) /* should stop using */
1727 return (WNT_STOPUSING);
1728 newwant = 0xffff;
1729 if ((UInt16) origwant == 0) {/* try to mark it as unsuable now */
1730 OSCompareAndSwap(origwant, newwant, (UInt32 *) wantcnt) ;
1731 }
1732 return (WNT_STOPUSING);
1733 break;
1734
1735 case WNT_ACQUIRE: /* try to increase reference to pcb */
1736 /* if WNT_STOPUSING should bail out */
1737 /*
1738 * if socket state DEAD, try to set count to STOPUSING, return failed
1739 * otherwise increase cnt
1740 */
1741 do {
1742 origwant = *wantcnt;
1743 if ((UInt16) origwant == 0xffff ) {/* should stop using */
1744 // printf("in_pcb_checkstate: ACQ PCB was STOPUSING while release. odd pcb=%x\n", pcb);
1745 return (WNT_STOPUSING);
1746 }
1747 newwant = origwant + 1;
1748 } while (!OSCompareAndSwap(origwant, newwant, (UInt32 *) wantcnt));
1749 return (WNT_ACQUIRE);
1750 break;
1751
1752 case WNT_RELEASE: /* release reference. if result is null and pcb state is DEAD,
1753 set wanted bit to STOPUSING
1754 */
1755
1756 if (locked == 0)
1757 socket_lock(pcb->inp_socket, 1);
1758
1759 do {
1760 origwant = *wantcnt;
1761 if ((UInt16) origwant == 0x0 )
1762 panic("in_pcb_checkstate pcb=%x release with zero count", pcb);
1763 if ((UInt16) origwant == 0xffff ) {/* should stop using */
1764 #if TEMPDEBUG
1765 printf("in_pcb_checkstate: REL PCB was STOPUSING while release. odd pcb=%x\n", pcb);
1766 #endif
1767 if (locked == 0)
1768 socket_unlock(pcb->inp_socket, 1);
1769 return (WNT_STOPUSING);
1770 }
1771 newwant = origwant - 1;
1772 } while (!OSCompareAndSwap(origwant, newwant, (UInt32 *) wantcnt));
1773
1774 if (pcb->inp_state == INPCB_STATE_DEAD)
1775 goto stopusing;
1776 if (pcb->inp_socket->so_usecount < 0)
1777 panic("in_pcb_checkstate RELEASE pcb=%x so=%x usecount is negative\n", pcb, pcb->inp_socket);
1778
1779 if (locked == 0)
1780 socket_unlock(pcb->inp_socket, 1);
1781 return (WNT_RELEASE);
1782 break;
1783
1784 default:
1785
1786 panic("in_pcb_checkstate: so=%x not a valid state =%x\n", pcb->inp_socket, mode);
1787 }
1788
1789 /* NOTREACHED */
1790 return (mode);
1791 }
1792
1793 /*
1794 * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat.
1795 * The inpcb_compat data structure is passed to user space and must
1796 * not change. We intentionally avoid copying pointers. The socket is
1797 * the one exception, though we probably shouldn't copy that either.
1798 */
1799 void
1800 inpcb_to_compat(
1801 struct inpcb *inp,
1802 struct inpcb_compat *inp_compat)
1803 {
1804 bzero(inp_compat, sizeof(*inp_compat));
1805 inp_compat->inp_fport = inp->inp_fport;
1806 inp_compat->inp_lport = inp->inp_lport;
1807 inp_compat->inp_socket = inp->inp_socket;
1808 inp_compat->nat_owner = inp->nat_owner;
1809 inp_compat->nat_cookie = inp->nat_cookie;
1810 inp_compat->inp_gencnt = inp->inp_gencnt;
1811 inp_compat->inp_flags = inp->inp_flags;
1812 inp_compat->inp_flow = inp->inp_flow;
1813 inp_compat->inp_vflag = inp->inp_vflag;
1814 inp_compat->inp_ip_ttl = inp->inp_ip_ttl;
1815 inp_compat->inp_ip_p = inp->inp_ip_p;
1816 inp_compat->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign;
1817 inp_compat->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local;
1818 inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
1819 inp_compat->inp_depend6.inp6_hlim = inp->inp_depend6.inp6_hlim;
1820 inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
1821 inp_compat->inp6_ifindex = inp->inp6_ifindex;
1822 inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
1823 }
1824
1825 #ifndef __APPLE__
1826 prison_xinpcb(struct proc *p, struct inpcb *inp)
1827 {
1828 if (!p->p_prison)
1829 return (0);
1830 if (ntohl(inp->inp_laddr.s_addr) == p->p_prison->pr_ip)
1831 return (0);
1832 return (1);
1833 }
1834 #endif