]> git.saurik.com Git - apple/xnu.git/blame - bsd/netinet/in_pcb.c
xnu-2782.30.5.tar.gz
[apple/xnu.git] / bsd / netinet / in_pcb.c
CommitLineData
1c79356b 1/*
fe8ab488 2 * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
39236c6e 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
39236c6e 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
39236c6e 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
39236c6e 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1982, 1986, 1991, 1993, 1995
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95
9bccf70c 61 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $
1c79356b
A
62 */
63
64#include <sys/param.h>
65#include <sys/systm.h>
66#include <sys/malloc.h>
67#include <sys/mbuf.h>
1c79356b 68#include <sys/domain.h>
1c79356b
A
69#include <sys/protosw.h>
70#include <sys/socket.h>
71#include <sys/socketvar.h>
72#include <sys/proc.h>
73#include <sys/kernel.h>
74#include <sys/sysctl.h>
6d2010ae
A
75#include <sys/mcache.h>
76#include <sys/kauth.h>
77#include <sys/priv.h>
39236c6e
A
78#include <sys/proc_uuid_policy.h>
79#include <sys/syslog.h>
fe8ab488 80#include <sys/priv.h>
39236c6e 81
91447636 82#include <libkern/OSAtomic.h>
316670eb 83#include <kern/locks.h>
1c79356b
A
84
85#include <machine/limits.h>
86
1c79356b 87#include <kern/zalloc.h>
1c79356b
A
88
89#include <net/if.h>
1c79356b 90#include <net/if_types.h>
9bccf70c 91#include <net/route.h>
316670eb
A
92#include <net/flowhash.h>
93#include <net/flowadv.h>
fe8ab488 94#include <net/ntstat.h>
1c79356b
A
95
96#include <netinet/in.h>
97#include <netinet/in_pcb.h>
98#include <netinet/in_var.h>
99#include <netinet/ip_var.h>
100#if INET6
101#include <netinet/ip6.h>
102#include <netinet6/ip6_var.h>
103#endif /* INET6 */
104
1c79356b 105#include <sys/kdebug.h>
b0d623f7 106#include <sys/random.h>
39236c6e 107
316670eb 108#include <dev/random/randomdev.h>
39236c6e 109#include <mach/boolean.h>
1c79356b 110
fe8ab488
A
111#if NECP
112#include <net/necp.h>
9bccf70c 113#endif
1c79356b 114
39236c6e
A
115static lck_grp_t *inpcb_lock_grp;
116static lck_attr_t *inpcb_lock_attr;
117static lck_grp_attr_t *inpcb_lock_grp_attr;
118decl_lck_mtx_data(static, inpcb_lock); /* global INPCB lock */
119decl_lck_mtx_data(static, inpcb_timeout_lock);
120
121static TAILQ_HEAD(, inpcbinfo) inpcb_head = TAILQ_HEAD_INITIALIZER(inpcb_head);
122
123static u_int16_t inpcb_timeout_run = 0; /* INPCB timer is scheduled to run */
124static boolean_t inpcb_garbage_collecting = FALSE; /* gc timer is scheduled */
125static boolean_t inpcb_ticking = FALSE; /* "slow" timer is scheduled */
126static boolean_t inpcb_fast_timer_on = FALSE;
fe8ab488
A
127
128/*
129 * If the total number of gc reqs is above a threshold, schedule
130 * garbage collect timer sooner
131 */
132static boolean_t inpcb_toomany_gcreq = FALSE;
133
134#define INPCB_GCREQ_THRESHOLD 50000
135#define INPCB_TOOMANY_GCREQ_TIMER (hz/10) /* 10 times a second */
136
39236c6e
A
137static void inpcb_sched_timeout(struct timeval *);
138static void inpcb_timeout(void *);
139int inpcb_timeout_lazy = 10; /* 10 seconds leeway for lazy timers */
140extern int tvtohz(struct timeval *);
141
142#if CONFIG_PROC_UUID_POLICY
143static void inp_update_cellular_policy(struct inpcb *, boolean_t);
fe8ab488
A
144#if NECP
145static void inp_update_necp_want_app_policy(struct inpcb *, boolean_t);
146#endif /* NECP */
39236c6e
A
147#endif /* !CONFIG_PROC_UUID_POLICY */
148
39236c6e
A
149#define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8))
150#define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1))
1c79356b 151
1c79356b
A
152/*
153 * These configure the range of local port addresses assigned to
154 * "unspecified" outgoing connections/packets/whatever.
155 */
9bccf70c
A
156int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */
157int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */
39236c6e
A
158int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
159int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */
9bccf70c
A
160int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
161int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */
1c79356b 162
39236c6e 163#define RANGECHK(var, min, max) \
1c79356b
A
164 if ((var) < (min)) { (var) = (min); } \
165 else if ((var) > (max)) { (var) = (max); }
166
1c79356b
A
167static int
168sysctl_net_ipport_check SYSCTL_HANDLER_ARGS
169{
2d21ac55 170#pragma unused(arg1, arg2)
39236c6e
A
171 int error;
172
173 error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
1c79356b
A
174 if (!error) {
175 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
176 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
177 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX);
178 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX);
179 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX);
180 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX);
181 }
39236c6e 182 return (error);
1c79356b
A
183}
184
185#undef RANGECHK
186
39236c6e
A
187SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange,
188 CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IP Ports");
189
190SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst,
191 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
192 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
193SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast,
194 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
195 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
196SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first,
197 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
198 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
199SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last,
200 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
201 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
202SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst,
203 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
204 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
205SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast,
206 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
207 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
1c79356b 208
b0d623f7
A
209extern int udp_use_randomport;
210extern int tcp_use_randomport;
211
316670eb
A
212/* Structs used for flowhash computation */
213struct inp_flowhash_key_addr {
214 union {
215 struct in_addr v4;
216 struct in6_addr v6;
217 u_int8_t addr8[16];
218 u_int16_t addr16[8];
219 u_int32_t addr32[4];
220 } infha;
221};
222
223struct inp_flowhash_key {
39236c6e 224 struct inp_flowhash_key_addr infh_laddr;
316670eb
A
225 struct inp_flowhash_key_addr infh_faddr;
226 u_int32_t infh_lport;
227 u_int32_t infh_fport;
228 u_int32_t infh_af;
229 u_int32_t infh_proto;
230 u_int32_t infh_rand1;
231 u_int32_t infh_rand2;
232};
233
39236c6e
A
234static u_int32_t inp_hash_seed = 0;
235
236static int infc_cmp(const struct inpcb *, const struct inpcb *);
237
238/* Flags used by inp_fc_getinp */
239#define INPFC_SOLOCKED 0x1
240#define INPFC_REMOVE 0x2
241static struct inpcb *inp_fc_getinp(u_int32_t, u_int32_t);
242
243static void inp_fc_feedback(struct inpcb *);
244extern void tcp_remove_from_time_wait(struct inpcb *inp);
316670eb 245
39236c6e 246decl_lck_mtx_data(static, inp_fc_lck);
316670eb 247
bd504ef0
A
248RB_HEAD(inp_fc_tree, inpcb) inp_fc_tree;
249RB_PROTOTYPE(inp_fc_tree, inpcb, infc_link, infc_cmp);
250RB_GENERATE(inp_fc_tree, inpcb, infc_link, infc_cmp);
316670eb 251
bd504ef0
A
252/*
253 * Use this inp as a key to find an inp in the flowhash tree.
254 * Accesses to it are protected by inp_fc_lck.
255 */
256struct inpcb key_inp;
316670eb 257
1c79356b
A
258/*
259 * in_pcb.c: manage the Protocol Control Blocks.
1c79356b
A
260 */
261
316670eb 262void
39236c6e 263in_pcbinit(void)
316670eb 264{
39236c6e 265 static int inpcb_initialized = 0;
316670eb 266
39236c6e
A
267 VERIFY(!inpcb_initialized);
268 inpcb_initialized = 1;
316670eb 269
39236c6e
A
270 inpcb_lock_grp_attr = lck_grp_attr_alloc_init();
271 inpcb_lock_grp = lck_grp_alloc_init("inpcb", inpcb_lock_grp_attr);
272 inpcb_lock_attr = lck_attr_alloc_init();
273 lck_mtx_init(&inpcb_lock, inpcb_lock_grp, inpcb_lock_attr);
274 lck_mtx_init(&inpcb_timeout_lock, inpcb_lock_grp, inpcb_lock_attr);
275
276 /*
277 * Initialize data structures required to deliver
278 * flow advisories.
279 */
280 lck_mtx_init(&inp_fc_lck, inpcb_lock_grp, inpcb_lock_attr);
bd504ef0 281 lck_mtx_lock(&inp_fc_lck);
316670eb 282 RB_INIT(&inp_fc_tree);
bd504ef0
A
283 bzero(&key_inp, sizeof(key_inp));
284 lck_mtx_unlock(&inp_fc_lck);
316670eb
A
285}
286
39236c6e
A
287#define INPCB_HAVE_TIMER_REQ(req) (((req).intimer_lazy > 0) || \
288 ((req).intimer_fast > 0) || ((req).intimer_nodelay > 0))
289static void
290inpcb_timeout(void *arg)
291{
292#pragma unused(arg)
293 struct inpcbinfo *ipi;
294 boolean_t t, gc;
295 struct intimercount gccnt, tmcnt;
296 struct timeval leeway;
fe8ab488
A
297 boolean_t toomany_gc = FALSE;
298
299 if (arg != NULL) {
300 VERIFY(arg == &inpcb_toomany_gcreq);
301 toomany_gc = *(boolean_t *)arg;
302 }
39236c6e
A
303
304 /*
305 * Update coarse-grained networking timestamp (in sec.); the idea
306 * is to piggy-back on the timeout callout to update the counter
307 * returnable via net_uptime().
308 */
309 net_update_uptime();
310
fe8ab488
A
311 bzero(&gccnt, sizeof(gccnt));
312 bzero(&tmcnt, sizeof(tmcnt));
313
39236c6e
A
314 lck_mtx_lock_spin(&inpcb_timeout_lock);
315 gc = inpcb_garbage_collecting;
316 inpcb_garbage_collecting = FALSE;
39236c6e
A
317
318 t = inpcb_ticking;
319 inpcb_ticking = FALSE;
320
321 if (gc || t) {
322 lck_mtx_unlock(&inpcb_timeout_lock);
323
324 lck_mtx_lock(&inpcb_lock);
325 TAILQ_FOREACH(ipi, &inpcb_head, ipi_entry) {
326 if (INPCB_HAVE_TIMER_REQ(ipi->ipi_gc_req)) {
327 bzero(&ipi->ipi_gc_req,
328 sizeof(ipi->ipi_gc_req));
329 if (gc && ipi->ipi_gc != NULL) {
330 ipi->ipi_gc(ipi);
331 gccnt.intimer_lazy +=
332 ipi->ipi_gc_req.intimer_lazy;
333 gccnt.intimer_fast +=
334 ipi->ipi_gc_req.intimer_fast;
335 gccnt.intimer_nodelay +=
336 ipi->ipi_gc_req.intimer_nodelay;
337 }
338 }
339 if (INPCB_HAVE_TIMER_REQ(ipi->ipi_timer_req)) {
340 bzero(&ipi->ipi_timer_req,
341 sizeof(ipi->ipi_timer_req));
342 if (t && ipi->ipi_timer != NULL) {
343 ipi->ipi_timer(ipi);
344 tmcnt.intimer_lazy +=
345 ipi->ipi_timer_req.intimer_lazy;
346 tmcnt.intimer_lazy +=
347 ipi->ipi_timer_req.intimer_fast;
348 tmcnt.intimer_nodelay +=
349 ipi->ipi_timer_req.intimer_nodelay;
350 }
351 }
352 }
353 lck_mtx_unlock(&inpcb_lock);
354 lck_mtx_lock_spin(&inpcb_timeout_lock);
355 }
356
357 /* lock was dropped above, so check first before overriding */
358 if (!inpcb_garbage_collecting)
359 inpcb_garbage_collecting = INPCB_HAVE_TIMER_REQ(gccnt);
360 if (!inpcb_ticking)
361 inpcb_ticking = INPCB_HAVE_TIMER_REQ(tmcnt);
362
363 /* re-arm the timer if there's work to do */
fe8ab488
A
364 if (toomany_gc) {
365 inpcb_toomany_gcreq = FALSE;
366 } else {
367 inpcb_timeout_run--;
368 VERIFY(inpcb_timeout_run >= 0 && inpcb_timeout_run < 2);
369 }
39236c6e
A
370
371 bzero(&leeway, sizeof(leeway));
372 leeway.tv_sec = inpcb_timeout_lazy;
373 if (gccnt.intimer_nodelay > 0 || tmcnt.intimer_nodelay > 0)
374 inpcb_sched_timeout(NULL);
375 else if ((gccnt.intimer_fast + tmcnt.intimer_fast) <= 5)
376 /* be lazy when idle with little activity */
377 inpcb_sched_timeout(&leeway);
378 else
379 inpcb_sched_timeout(NULL);
380
381 lck_mtx_unlock(&inpcb_timeout_lock);
382}
383
384static void
385inpcb_sched_timeout(struct timeval *leeway)
386{
387 lck_mtx_assert(&inpcb_timeout_lock, LCK_MTX_ASSERT_OWNED);
388
389 if (inpcb_timeout_run == 0 &&
390 (inpcb_garbage_collecting || inpcb_ticking)) {
391 lck_mtx_convert_spin(&inpcb_timeout_lock);
392 inpcb_timeout_run++;
393 if (leeway == NULL) {
394 inpcb_fast_timer_on = TRUE;
395 timeout(inpcb_timeout, NULL, hz);
396 } else {
397 inpcb_fast_timer_on = FALSE;
398 timeout_with_leeway(inpcb_timeout, NULL, hz,
399 tvtohz(leeway));
400 }
401 } else if (inpcb_timeout_run == 1 &&
402 leeway == NULL && !inpcb_fast_timer_on) {
403 /*
404 * Since the request was for a fast timer but the
405 * scheduled timer is a lazy timer, try to schedule
406 * another instance of fast timer also
407 */
408 lck_mtx_convert_spin(&inpcb_timeout_lock);
409 inpcb_timeout_run++;
410 inpcb_fast_timer_on = TRUE;
411 timeout(inpcb_timeout, NULL, hz);
412 }
413}
414
415void
416inpcb_gc_sched(struct inpcbinfo *ipi, u_int32_t type)
417{
418 struct timeval leeway;
fe8ab488 419 u_int32_t gccnt;
39236c6e
A
420 lck_mtx_lock_spin(&inpcb_timeout_lock);
421 inpcb_garbage_collecting = TRUE;
fe8ab488
A
422
423 gccnt = ipi->ipi_gc_req.intimer_nodelay +
424 ipi->ipi_gc_req.intimer_fast;
425
426 if (gccnt > INPCB_GCREQ_THRESHOLD && !inpcb_toomany_gcreq) {
427 inpcb_toomany_gcreq = TRUE;
428
429 /*
430 * There are toomany pcbs waiting to be garbage collected,
431 * schedule a much faster timeout in addition to
432 * the caller's request
433 */
434 lck_mtx_convert_spin(&inpcb_timeout_lock);
435 timeout(inpcb_timeout, (void *)&inpcb_toomany_gcreq,
436 INPCB_TOOMANY_GCREQ_TIMER);
437 }
438
39236c6e
A
439 switch (type) {
440 case INPCB_TIMER_NODELAY:
441 atomic_add_32(&ipi->ipi_gc_req.intimer_nodelay, 1);
442 inpcb_sched_timeout(NULL);
443 break;
444 case INPCB_TIMER_FAST:
445 atomic_add_32(&ipi->ipi_gc_req.intimer_fast, 1);
446 inpcb_sched_timeout(NULL);
447 break;
448 default:
449 atomic_add_32(&ipi->ipi_gc_req.intimer_lazy, 1);
450 leeway.tv_sec = inpcb_timeout_lazy;
451 leeway.tv_usec = 0;
452 inpcb_sched_timeout(&leeway);
453 break;
454 }
455 lck_mtx_unlock(&inpcb_timeout_lock);
456}
457
458void
459inpcb_timer_sched(struct inpcbinfo *ipi, u_int32_t type)
460{
461 struct timeval leeway;
462 lck_mtx_lock_spin(&inpcb_timeout_lock);
463 inpcb_ticking = TRUE;
464 switch (type) {
465 case INPCB_TIMER_NODELAY:
466 atomic_add_32(&ipi->ipi_timer_req.intimer_nodelay, 1);
467 inpcb_sched_timeout(NULL);
468 break;
469 case INPCB_TIMER_FAST:
470 atomic_add_32(&ipi->ipi_timer_req.intimer_fast, 1);
471 inpcb_sched_timeout(NULL);
472 break;
473 default:
474 atomic_add_32(&ipi->ipi_timer_req.intimer_lazy, 1);
475 leeway.tv_sec = inpcb_timeout_lazy;
476 leeway.tv_usec = 0;
477 inpcb_sched_timeout(&leeway);
478 break;
479 }
480 lck_mtx_unlock(&inpcb_timeout_lock);
481}
482
483void
484in_pcbinfo_attach(struct inpcbinfo *ipi)
485{
486 struct inpcbinfo *ipi0;
487
488 lck_mtx_lock(&inpcb_lock);
489 TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) {
490 if (ipi0 == ipi) {
491 panic("%s: ipi %p already in the list\n",
492 __func__, ipi);
493 /* NOTREACHED */
494 }
495 }
496 TAILQ_INSERT_TAIL(&inpcb_head, ipi, ipi_entry);
497 lck_mtx_unlock(&inpcb_lock);
498}
499
500int
501in_pcbinfo_detach(struct inpcbinfo *ipi)
502{
503 struct inpcbinfo *ipi0;
504 int error = 0;
505
506 lck_mtx_lock(&inpcb_lock);
507 TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) {
508 if (ipi0 == ipi)
509 break;
510 }
511 if (ipi0 != NULL)
512 TAILQ_REMOVE(&inpcb_head, ipi0, ipi_entry);
513 else
514 error = ENXIO;
515 lck_mtx_unlock(&inpcb_lock);
516
517 return (error);
518}
519
1c79356b
A
520/*
521 * Allocate a PCB and associate it with the socket.
2d21ac55
A
522 *
523 * Returns: 0 Success
524 * ENOBUFS
525 * ENOMEM
1c79356b
A
526 */
527int
39236c6e 528in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p)
1c79356b 529{
39236c6e 530#pragma unused(p)
2d21ac55 531 struct inpcb *inp;
39236c6e 532 caddr_t temp;
2d21ac55
A
533#if CONFIG_MACF_NET
534 int mac_error;
39236c6e 535#endif /* CONFIG_MACF_NET */
1c79356b 536
39236c6e
A
537 if (!so->cached_in_sock_layer) {
538 inp = (struct inpcb *)zalloc(pcbinfo->ipi_zone);
539 if (inp == NULL)
540 return (ENOBUFS);
541 bzero((caddr_t)inp, sizeof (*inp));
542 } else {
543 inp = (struct inpcb *)(void *)so->so_saved_pcb;
544 temp = inp->inp_saved_ppcb;
545 bzero((caddr_t)inp, sizeof (*inp));
546 inp->inp_saved_ppcb = temp;
1c79356b
A
547 }
548
549 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
550 inp->inp_pcbinfo = pcbinfo;
551 inp->inp_socket = so;
2d21ac55
A
552#if CONFIG_MACF_NET
553 mac_error = mac_inpcb_label_init(inp, M_WAITOK);
554 if (mac_error != 0) {
39236c6e 555 if (!so->cached_in_sock_layer)
2d21ac55
A
556 zfree(pcbinfo->ipi_zone, inp);
557 return (mac_error);
558 }
559 mac_inpcb_label_associate(so, inp);
39236c6e
A
560#endif /* CONFIG_MACF_NET */
561 /* make sure inp_stat is always 64-bit aligned */
562 inp->inp_stat = (struct inp_stat *)P2ROUNDUP(inp->inp_stat_store,
563 sizeof (u_int64_t));
564 if (((uintptr_t)inp->inp_stat - (uintptr_t)inp->inp_stat_store) +
565 sizeof (*inp->inp_stat) > sizeof (inp->inp_stat_store)) {
566 panic("%s: insufficient space to align inp_stat", __func__);
567 /* NOTREACHED */
568 }
569
570 /* make sure inp_cstat is always 64-bit aligned */
571 inp->inp_cstat = (struct inp_stat *)P2ROUNDUP(inp->inp_cstat_store,
572 sizeof (u_int64_t));
573 if (((uintptr_t)inp->inp_cstat - (uintptr_t)inp->inp_cstat_store) +
574 sizeof (*inp->inp_cstat) > sizeof (inp->inp_cstat_store)) {
575 panic("%s: insufficient space to align inp_cstat", __func__);
576 /* NOTREACHED */
577 }
578
579 /* make sure inp_wstat is always 64-bit aligned */
580 inp->inp_wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_wstat_store,
581 sizeof (u_int64_t));
582 if (((uintptr_t)inp->inp_wstat - (uintptr_t)inp->inp_wstat_store) +
583 sizeof (*inp->inp_wstat) > sizeof (inp->inp_wstat_store)) {
584 panic("%s: insufficient space to align inp_wstat", __func__);
585 /* NOTREACHED */
6d2010ae
A
586 }
587
fe8ab488
A
588 /* make sure inp_Wstat is always 64-bit aligned */
589 inp->inp_Wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_Wstat_store,
590 sizeof (u_int64_t));
591 if (((uintptr_t)inp->inp_Wstat - (uintptr_t)inp->inp_Wstat_store) +
592 sizeof (*inp->inp_Wstat) > sizeof (inp->inp_Wstat_store)) {
593 panic("%s: insufficient space to align inp_Wstat", __func__);
594 /* NOTREACHED */
595 }
596
91447636
A
597 so->so_pcb = (caddr_t)inp;
598
599 if (so->so_proto->pr_flags & PR_PCBLOCK) {
39236c6e
A
600 lck_mtx_init(&inp->inpcb_mtx, pcbinfo->ipi_lock_grp,
601 pcbinfo->ipi_lock_attr);
91447636
A
602 }
603
2d21ac55 604#if INET6
39236c6e 605 if (SOCK_DOM(so) == PF_INET6 && !ip6_mapped_addr_on)
9bccf70c 606 inp->inp_flags |= IN6P_IPV6_V6ONLY;
39236c6e 607
9bccf70c
A
608 if (ip6_auto_flowlabel)
609 inp->inp_flags |= IN6P_AUTOFLOWLABEL;
39236c6e
A
610#endif /* INET6 */
611
612 (void) inp_update_policy(inp);
613
614 lck_rw_lock_exclusive(pcbinfo->ipi_lock);
91447636 615 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
39236c6e 616 LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
91447636 617 pcbinfo->ipi_count++;
39236c6e 618 lck_rw_done(pcbinfo->ipi_lock);
1c79356b
A
619 return (0);
620}
621
2d21ac55 622/*
39236c6e
A
623 * in_pcblookup_local_and_cleanup does everything
624 * in_pcblookup_local does but it checks for a socket
625 * that's going away. Since we know that the lock is
626 * held read+write when this funciton is called, we
627 * can safely dispose of this socket like the slow
628 * timer would usually do and return NULL. This is
629 * great for bind.
630 */
631struct inpcb *
632in_pcblookup_local_and_cleanup(struct inpcbinfo *pcbinfo, struct in_addr laddr,
633 u_int lport_arg, int wild_okay)
2d21ac55
A
634{
635 struct inpcb *inp;
39236c6e 636
2d21ac55
A
637 /* Perform normal lookup */
638 inp = in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay);
39236c6e 639
2d21ac55 640 /* Check if we found a match but it's waiting to be disposed */
39236c6e 641 if (inp != NULL && inp->inp_wantcnt == WNT_STOPUSING) {
2d21ac55 642 struct socket *so = inp->inp_socket;
39236c6e 643
6d2010ae 644 lck_mtx_lock(&inp->inpcb_mtx);
39236c6e 645
2d21ac55 646 if (so->so_usecount == 0) {
b0d623f7
A
647 if (inp->inp_state != INPCB_STATE_DEAD)
648 in_pcbdetach(inp);
39236c6e 649 in_pcbdispose(inp); /* will unlock & destroy */
2d21ac55 650 inp = NULL;
39236c6e 651 } else {
6d2010ae 652 lck_mtx_unlock(&inp->inpcb_mtx);
2d21ac55
A
653 }
654 }
39236c6e
A
655
656 return (inp);
2d21ac55
A
657}
658
c910b4d9 659static void
2d21ac55
A
660in_pcb_conflict_post_msg(u_int16_t port)
661{
39236c6e
A
662 /*
663 * Radar 5523020 send a kernel event notification if a
664 * non-participating socket tries to bind the port a socket
665 * who has set SOF_NOTIFYCONFLICT owns.
2d21ac55 666 */
39236c6e 667 struct kev_msg ev_msg;
2d21ac55
A
668 struct kev_in_portinuse in_portinuse;
669
39236c6e
A
670 bzero(&in_portinuse, sizeof (struct kev_in_portinuse));
671 bzero(&ev_msg, sizeof (struct kev_msg));
2d21ac55
A
672 in_portinuse.port = ntohs(port); /* port in host order */
673 in_portinuse.req_pid = proc_selfpid();
674 ev_msg.vendor_code = KEV_VENDOR_APPLE;
675 ev_msg.kev_class = KEV_NETWORK_CLASS;
676 ev_msg.kev_subclass = KEV_INET_SUBCLASS;
677 ev_msg.event_code = KEV_INET_PORTINUSE;
678 ev_msg.dv[0].data_ptr = &in_portinuse;
39236c6e 679 ev_msg.dv[0].data_length = sizeof (struct kev_in_portinuse);
2d21ac55
A
680 ev_msg.dv[1].data_length = 0;
681 kev_post_msg(&ev_msg);
682}
39236c6e 683
2d21ac55 684/*
39236c6e
A
685 * Bind an INPCB to an address and/or port. This routine should not alter
686 * the caller-supplied local address "nam".
687 *
2d21ac55
A
688 * Returns: 0 Success
689 * EADDRNOTAVAIL Address not available.
690 * EINVAL Invalid argument
691 * EAFNOSUPPORT Address family not supported [notdef]
692 * EACCES Permission denied
693 * EADDRINUSE Address in use
694 * EAGAIN Resource unavailable, try again
6d2010ae 695 * priv_check_cred:EPERM Operation not permitted
2d21ac55 696 */
1c79356b 697int
2d21ac55 698in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
1c79356b 699{
2d21ac55 700 struct socket *so = inp->inp_socket;
9bccf70c 701 unsigned short *lastport;
1c79356b 702 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
b0d623f7 703 u_short lport = 0, rand_port = 0;
1c79356b 704 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
b0d623f7 705 int error, randomport, conflict = 0;
fe8ab488 706 boolean_t anonport = FALSE;
6d2010ae 707 kauth_cred_t cred;
fe8ab488
A
708 struct in_addr laddr;
709 struct ifnet *outif = NULL;
1c79356b
A
710
711 if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */
712 return (EADDRNOTAVAIL);
39236c6e 713 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY)
1c79356b 714 return (EINVAL);
39236c6e 715 if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT)))
1c79356b 716 wild = 1;
91447636 717 socket_unlock(so, 0); /* keep reference on socket */
39236c6e 718 lck_rw_lock_exclusive(pcbinfo->ipi_lock);
fe8ab488
A
719
720 bzero(&laddr, sizeof(laddr));
721
39236c6e 722 if (nam != NULL) {
6d2010ae 723
39236c6e
A
724 if (nam->sa_len != sizeof (struct sockaddr_in)) {
725 lck_rw_done(pcbinfo->ipi_lock);
91447636 726 socket_lock(so, 0);
1c79356b 727 return (EINVAL);
91447636 728 }
39236c6e 729#if 0
1c79356b
A
730 /*
731 * We should check the family, but old programs
732 * incorrectly fail to initialize it.
733 */
39236c6e
A
734 if (nam->sa_family != AF_INET) {
735 lck_rw_done(pcbinfo->ipi_lock);
91447636 736 socket_lock(so, 0);
1c79356b 737 return (EAFNOSUPPORT);
91447636 738 }
39236c6e
A
739#endif /* 0 */
740 lport = SIN(nam)->sin_port;
741
742 if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr))) {
1c79356b
A
743 /*
744 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
745 * allow complete duplication of binding if
746 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
747 * and a multicast address is bound on both
748 * new and duplicated sockets.
749 */
750 if (so->so_options & SO_REUSEADDR)
751 reuseport = SO_REUSEADDR|SO_REUSEPORT;
39236c6e
A
752 } else if (SIN(nam)->sin_addr.s_addr != INADDR_ANY) {
753 struct sockaddr_in sin;
91447636 754 struct ifaddr *ifa;
39236c6e
A
755
756 /* Sanitized for interface address searches */
757 bzero(&sin, sizeof (sin));
758 sin.sin_family = AF_INET;
759 sin.sin_len = sizeof (struct sockaddr_in);
760 sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
761
762 ifa = ifa_ifwithaddr(SA(&sin));
763 if (ifa == NULL) {
764 lck_rw_done(pcbinfo->ipi_lock);
91447636 765 socket_lock(so, 0);
1c79356b 766 return (EADDRNOTAVAIL);
39236c6e
A
767 } else {
768 /*
769 * Opportunistically determine the outbound
770 * interface that may be used; this may not
771 * hold true if we end up using a route
772 * going over a different interface, e.g.
773 * when sending to a local address. This
774 * will get updated again after sending.
775 */
6d2010ae 776 IFA_LOCK(ifa);
316670eb 777 outif = ifa->ifa_ifp;
6d2010ae
A
778 IFA_UNLOCK(ifa);
779 IFA_REMREF(ifa);
91447636 780 }
1c79356b 781 }
39236c6e 782 if (lport != 0) {
1c79356b 783 struct inpcb *t;
39236c6e 784 uid_t u;
1c79356b 785
6d2010ae
A
786 if (ntohs(lport) < IPPORT_RESERVED) {
787 cred = kauth_cred_proc_ref(p);
39236c6e
A
788 error = priv_check_cred(cred,
789 PRIV_NETINET_RESERVEDPORT, 0);
6d2010ae
A
790 kauth_cred_unref(&cred);
791 if (error != 0) {
39236c6e 792 lck_rw_done(pcbinfo->ipi_lock);
6d2010ae
A
793 socket_lock(so, 0);
794 return (EACCES);
795 }
91447636 796 }
39236c6e
A
797 if (!IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) &&
798 (u = kauth_cred_getuid(so->so_cred)) != 0 &&
799 (t = in_pcblookup_local_and_cleanup(
800 inp->inp_pcbinfo, SIN(nam)->sin_addr, lport,
801 INPLOOKUP_WILDCARD)) != NULL &&
802 (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
803 t->inp_laddr.s_addr != INADDR_ANY ||
804 !(t->inp_socket->so_options & SO_REUSEPORT)) &&
805 (u != kauth_cred_getuid(t->inp_socket->so_cred)) &&
806 !(t->inp_socket->so_flags & SOF_REUSESHAREUID) &&
807 (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
808 t->inp_laddr.s_addr != INADDR_ANY)) {
809 if ((t->inp_socket->so_flags &
810 SOF_NOTIFYCONFLICT) &&
811 !(so->so_flags & SOF_NOTIFYCONFLICT))
812 conflict = 1;
813
814 lck_rw_done(pcbinfo->ipi_lock);
815
816 if (conflict)
817 in_pcb_conflict_post_msg(lport);
2d21ac55 818
39236c6e
A
819 socket_lock(so, 0);
820 return (EADDRINUSE);
1c79356b 821 }
39236c6e
A
822 t = in_pcblookup_local_and_cleanup(pcbinfo,
823 SIN(nam)->sin_addr, lport, wild);
824 if (t != NULL &&
1c79356b
A
825 (reuseport & t->inp_socket->so_options) == 0) {
826#if INET6
39236c6e
A
827 if (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
828 t->inp_laddr.s_addr != INADDR_ANY ||
829 SOCK_DOM(so) != PF_INET6 ||
830 SOCK_DOM(t->inp_socket) != PF_INET6)
2d21ac55
A
831#endif /* INET6 */
832 {
2d21ac55 833
39236c6e
A
834 if ((t->inp_socket->so_flags &
835 SOF_NOTIFYCONFLICT) &&
836 !(so->so_flags & SOF_NOTIFYCONFLICT))
2d21ac55
A
837 conflict = 1;
838
39236c6e 839 lck_rw_done(pcbinfo->ipi_lock);
2d21ac55
A
840
841 if (conflict)
842 in_pcb_conflict_post_msg(lport);
91447636
A
843 socket_lock(so, 0);
844 return (EADDRINUSE);
845 }
1c79356b
A
846 }
847 }
fe8ab488 848 laddr = SIN(nam)->sin_addr;
1c79356b
A
849 }
850 if (lport == 0) {
851 u_short first, last;
852 int count;
853
39236c6e
A
854 randomport = (so->so_flags & SOF_BINDRANDOMPORT) ||
855 (so->so_type == SOCK_STREAM ? tcp_use_randomport :
856 udp_use_randomport);
857
858 /*
fe8ab488
A
859 * Even though this looks similar to the code in
860 * in6_pcbsetport, the v6 vs v4 checks are different.
39236c6e 861 */
fe8ab488 862 anonport = TRUE;
1c79356b
A
863 if (inp->inp_flags & INP_HIGHPORT) {
864 first = ipport_hifirstauto; /* sysctl */
865 last = ipport_hilastauto;
39236c6e 866 lastport = &pcbinfo->ipi_lasthi;
1c79356b 867 } else if (inp->inp_flags & INP_LOWPORT) {
6d2010ae 868 cred = kauth_cred_proc_ref(p);
39236c6e
A
869 error = priv_check_cred(cred,
870 PRIV_NETINET_RESERVEDPORT, 0);
6d2010ae
A
871 kauth_cred_unref(&cred);
872 if (error != 0) {
39236c6e 873 lck_rw_done(pcbinfo->ipi_lock);
91447636 874 socket_lock(so, 0);
39236c6e 875 return (error);
91447636 876 }
1c79356b
A
877 first = ipport_lowfirstauto; /* 1023 */
878 last = ipport_lowlastauto; /* 600 */
39236c6e 879 lastport = &pcbinfo->ipi_lastlow;
1c79356b
A
880 } else {
881 first = ipport_firstauto; /* sysctl */
882 last = ipport_lastauto;
39236c6e 883 lastport = &pcbinfo->ipi_lastport;
1c79356b 884 }
b0d623f7
A
885 /* No point in randomizing if only one port is available */
886
887 if (first == last)
39236c6e 888 randomport = 0;
1c79356b
A
889 /*
890 * Simple check to ensure all ports are not used up causing
891 * a deadlock here.
892 *
893 * We split the two cases (up and down) so that the direction
894 * is not being tested on each round of the loop.
895 */
896 if (first > last) {
897 /*
898 * counting down
899 */
b0d623f7 900 if (randomport) {
39236c6e
A
901 read_random(&rand_port, sizeof (rand_port));
902 *lastport =
903 first - (rand_port % (first - last));
b0d623f7 904 }
1c79356b
A
905 count = first - last;
906
907 do {
908 if (count-- < 0) { /* completely used? */
39236c6e 909 lck_rw_done(pcbinfo->ipi_lock);
91447636 910 socket_lock(so, 0);
9bccf70c 911 return (EADDRNOTAVAIL);
1c79356b
A
912 }
913 --*lastport;
914 if (*lastport > first || *lastport < last)
915 *lastport = first;
916 lport = htons(*lastport);
2d21ac55 917 } while (in_pcblookup_local_and_cleanup(pcbinfo,
fe8ab488
A
918 ((laddr.s_addr != INADDR_ANY) ? laddr :
919 inp->inp_laddr), lport, wild));
1c79356b
A
920 } else {
921 /*
922 * counting up
923 */
b0d623f7 924 if (randomport) {
39236c6e
A
925 read_random(&rand_port, sizeof (rand_port));
926 *lastport =
927 first + (rand_port % (first - last));
b0d623f7 928 }
1c79356b
A
929 count = last - first;
930
931 do {
932 if (count-- < 0) { /* completely used? */
39236c6e 933 lck_rw_done(pcbinfo->ipi_lock);
91447636 934 socket_lock(so, 0);
9bccf70c 935 return (EADDRNOTAVAIL);
1c79356b
A
936 }
937 ++*lastport;
938 if (*lastport < first || *lastport > last)
939 *lastport = first;
940 lport = htons(*lastport);
2d21ac55 941 } while (in_pcblookup_local_and_cleanup(pcbinfo,
fe8ab488
A
942 ((laddr.s_addr != INADDR_ANY) ? laddr :
943 inp->inp_laddr), lport, wild));
1c79356b
A
944 }
945 }
91447636 946 socket_lock(so, 0);
fe8ab488
A
947 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) {
948 lck_rw_done(pcbinfo->ipi_lock);
949 return (EINVAL);
950 }
951
952 if (laddr.s_addr != INADDR_ANY) {
953 inp->inp_laddr = laddr;
954 inp->inp_last_outifp = outif;
955 }
1c79356b 956 inp->inp_lport = lport;
fe8ab488
A
957 if (anonport)
958 inp->inp_flags |= INP_ANONPORT;
959
91447636 960 if (in_pcbinshash(inp, 1) != 0) {
1c79356b 961 inp->inp_laddr.s_addr = INADDR_ANY;
316670eb 962 inp->inp_last_outifp = NULL;
fe8ab488
A
963
964 inp->inp_lport = 0;
965 if (anonport)
966 inp->inp_flags &= ~INP_ANONPORT;
39236c6e 967 lck_rw_done(pcbinfo->ipi_lock);
1c79356b
A
968 return (EAGAIN);
969 }
39236c6e 970 lck_rw_done(pcbinfo->ipi_lock);
2d21ac55 971 sflt_notify(so, sock_evt_bound, NULL);
1c79356b
A
972 return (0);
973}
974
975/*
39236c6e
A
976 * Transform old in_pcbconnect() into an inner subroutine for new
977 * in_pcbconnect(); do some validity-checking on the remote address
978 * (in "nam") and then determine local host address (i.e., which
979 * interface) to use to access that remote host.
980 *
981 * This routine may alter the caller-supplied remote address "nam".
1c79356b 982 *
39236c6e
A
983 * The caller may override the bound-to-interface setting of the socket
984 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
985 *
986 * This routine might return an ifp with a reference held if the caller
987 * provides a non-NULL outif, even in the error case. The caller is
988 * responsible for releasing its reference.
2d21ac55
A
989 *
990 * Returns: 0 Success
991 * EINVAL Invalid argument
992 * EAFNOSUPPORT Address family not supported
993 * EADDRNOTAVAIL Address not available
1c79356b 994 */
1c79356b 995int
39236c6e
A
996in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr,
997 unsigned int ifscope, struct ifnet **outif)
1c79356b 998{
39236c6e
A
999 struct route *ro = &inp->inp_route;
1000 struct in_ifaddr *ia = NULL;
1001 struct sockaddr_in sin;
1002 int error = 0;
fe8ab488 1003 boolean_t restricted = FALSE;
39236c6e
A
1004
1005 if (outif != NULL)
1006 *outif = NULL;
1007 if (nam->sa_len != sizeof (struct sockaddr_in))
1c79356b 1008 return (EINVAL);
39236c6e 1009 if (SIN(nam)->sin_family != AF_INET)
1c79356b 1010 return (EAFNOSUPPORT);
39236c6e 1011 if (SIN(nam)->sin_port == 0)
1c79356b 1012 return (EADDRNOTAVAIL);
b0d623f7 1013
39236c6e
A
1014 /*
1015 * If the destination address is INADDR_ANY,
1016 * use the primary local address.
1017 * If the supplied address is INADDR_BROADCAST,
1018 * and the primary interface supports broadcast,
1019 * choose the broadcast address for that interface.
1020 */
1021 if (SIN(nam)->sin_addr.s_addr == INADDR_ANY ||
1022 SIN(nam)->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST) {
1023 lck_rw_lock_shared(in_ifaddr_rwlock);
1024 if (!TAILQ_EMPTY(&in_ifaddrhead)) {
1025 ia = TAILQ_FIRST(&in_ifaddrhead);
1026 IFA_LOCK_SPIN(&ia->ia_ifa);
1027 if (SIN(nam)->sin_addr.s_addr == INADDR_ANY) {
1028 SIN(nam)->sin_addr = IA_SIN(ia)->sin_addr;
1029 } else if (ia->ia_ifp->if_flags & IFF_BROADCAST) {
1030 SIN(nam)->sin_addr =
1031 SIN(&ia->ia_broadaddr)->sin_addr;
1032 }
1033 IFA_UNLOCK(&ia->ia_ifa);
1034 ia = NULL;
1035 }
1036 lck_rw_done(in_ifaddr_rwlock);
1037 }
1038 /*
1039 * Otherwise, if the socket has already bound the source, just use it.
1040 */
1041 if (inp->inp_laddr.s_addr != INADDR_ANY) {
1042 VERIFY(ia == NULL);
1043 *laddr = inp->inp_laddr;
1044 return (0);
1c79356b 1045 }
6d2010ae 1046
39236c6e
A
1047 /*
1048 * If the ifscope is specified by the caller (e.g. IP_PKTINFO)
1049 * then it overrides the sticky ifscope set for the socket.
1050 */
1051 if (ifscope == IFSCOPE_NONE && (inp->inp_flags & INP_BOUND_IF))
1052 ifscope = inp->inp_boundifp->if_index;
6d2010ae 1053
39236c6e
A
1054 /*
1055 * If route is known or can be allocated now,
1056 * our src addr is taken from the i/f, else punt.
1057 * Note that we should check the address family of the cached
1058 * destination, in case of sharing the cache with IPv6.
1059 */
1060 if (ro->ro_rt != NULL)
1061 RT_LOCK_SPIN(ro->ro_rt);
1062 if (ROUTE_UNUSABLE(ro) || ro->ro_dst.sa_family != AF_INET ||
1063 SIN(&ro->ro_dst)->sin_addr.s_addr != SIN(nam)->sin_addr.s_addr ||
1064 (inp->inp_socket->so_options & SO_DONTROUTE)) {
b0d623f7 1065 if (ro->ro_rt != NULL)
b0d623f7 1066 RT_UNLOCK(ro->ro_rt);
39236c6e
A
1067 ROUTE_RELEASE(ro);
1068 }
1069 if (!(inp->inp_socket->so_options & SO_DONTROUTE) &&
1070 (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL)) {
1071 if (ro->ro_rt != NULL)
1072 RT_UNLOCK(ro->ro_rt);
1073 ROUTE_RELEASE(ro);
1074 /* No route yet, so try to acquire one */
1075 bzero(&ro->ro_dst, sizeof (struct sockaddr_in));
1076 ro->ro_dst.sa_family = AF_INET;
1077 ro->ro_dst.sa_len = sizeof (struct sockaddr_in);
1078 SIN(&ro->ro_dst)->sin_addr = SIN(nam)->sin_addr;
1079 rtalloc_scoped(ro, ifscope);
1080 if (ro->ro_rt != NULL)
1081 RT_LOCK_SPIN(ro->ro_rt);
1082 }
1083 /* Sanitized local copy for interface address searches */
1084 bzero(&sin, sizeof (sin));
1085 sin.sin_family = AF_INET;
1086 sin.sin_len = sizeof (struct sockaddr_in);
1087 sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
1088 /*
1089 * If we did not find (or use) a route, assume dest is reachable
1090 * on a directly connected network and try to find a corresponding
1091 * interface to take the source address from.
1092 */
1093 if (ro->ro_rt == NULL) {
1094 VERIFY(ia == NULL);
1095 ia = ifatoia(ifa_ifwithdstaddr(SA(&sin)));
1096 if (ia == NULL)
1097 ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope));
1098 error = ((ia == NULL) ? ENETUNREACH : 0);
1099 goto done;
1100 }
1101 RT_LOCK_ASSERT_HELD(ro->ro_rt);
1102 /*
1103 * If the outgoing interface on the route found is not
1104 * a loopback interface, use the address from that interface.
1105 */
1106 if (!(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) {
1107 VERIFY(ia == NULL);
6d2010ae
A
1108 /*
1109 * If the route points to a cellular interface and the
1110 * caller forbids our using interfaces of such type,
1111 * pretend that there is no route.
fe8ab488 1112 * Apply the same logic for expensive interfaces.
6d2010ae 1113 */
fe8ab488 1114 if (inp_restricted_send(inp, ro->ro_rt->rt_ifp)) {
39236c6e
A
1115 RT_UNLOCK(ro->ro_rt);
1116 ROUTE_RELEASE(ro);
1117 error = EHOSTUNREACH;
fe8ab488 1118 restricted = TRUE;
39236c6e 1119 } else {
6d2010ae
A
1120 /* Become a regular mutex */
1121 RT_CONVERT_LOCK(ro->ro_rt);
39236c6e
A
1122 ia = ifatoia(ro->ro_rt->rt_ifa);
1123 IFA_ADDREF(&ia->ia_ifa);
b0d623f7 1124 RT_UNLOCK(ro->ro_rt);
39236c6e 1125 error = 0;
91447636 1126 }
39236c6e
A
1127 goto done;
1128 }
1129 VERIFY(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK);
1130 RT_UNLOCK(ro->ro_rt);
1131 /*
1132 * The outgoing interface is marked with 'loopback net', so a route
1133 * to ourselves is here.
1134 * Try to find the interface of the destination address and then
1135 * take the address from there. That interface is not necessarily
1136 * a loopback interface.
1137 */
1138 VERIFY(ia == NULL);
1139 ia = ifatoia(ifa_ifwithdstaddr(SA(&sin)));
1140 if (ia == NULL)
1141 ia = ifatoia(ifa_ifwithaddr_scoped(SA(&sin), ifscope));
1142 if (ia == NULL)
1143 ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope));
1144 if (ia == NULL) {
1145 RT_LOCK(ro->ro_rt);
1146 ia = ifatoia(ro->ro_rt->rt_ifa);
1147 if (ia != NULL)
1148 IFA_ADDREF(&ia->ia_ifa);
1149 RT_UNLOCK(ro->ro_rt);
1150 }
1151 error = ((ia == NULL) ? ENETUNREACH : 0);
1152
1153done:
1154 /*
1155 * If the destination address is multicast and an outgoing
1156 * interface has been set as a multicast option, use the
1157 * address of that interface as our source address.
1158 */
15129b1c 1159 if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) &&
39236c6e
A
1160 inp->inp_moptions != NULL) {
1161 struct ip_moptions *imo;
1162 struct ifnet *ifp;
1163
1164 imo = inp->inp_moptions;
1165 IMO_LOCK(imo);
1166 if (imo->imo_multicast_ifp != NULL && (ia == NULL ||
1167 ia->ia_ifp != imo->imo_multicast_ifp)) {
1168 ifp = imo->imo_multicast_ifp;
1169 if (ia != NULL)
6d2010ae 1170 IFA_REMREF(&ia->ia_ifa);
39236c6e
A
1171 lck_rw_lock_shared(in_ifaddr_rwlock);
1172 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
1173 if (ia->ia_ifp == ifp)
1174 break;
6d2010ae 1175 }
39236c6e
A
1176 if (ia != NULL)
1177 IFA_ADDREF(&ia->ia_ifa);
1178 lck_rw_done(in_ifaddr_rwlock);
1179 if (ia == NULL)
1180 error = EADDRNOTAVAIL;
15129b1c
A
1181 else
1182 error = 0;
1c79356b 1183 }
39236c6e
A
1184 IMO_UNLOCK(imo);
1185 }
1186 /*
1187 * Don't do pcblookup call here; return interface in laddr
1188 * and exit to caller, that will do the lookup.
1189 */
1190 if (ia != NULL) {
1c79356b 1191 /*
39236c6e
A
1192 * If the source address belongs to a cellular interface
1193 * and the socket forbids our using interfaces of such
1194 * type, pretend that there is no source address.
fe8ab488 1195 * Apply the same logic for expensive interfaces.
1c79356b 1196 */
39236c6e 1197 IFA_LOCK_SPIN(&ia->ia_ifa);
fe8ab488 1198 if (inp_restricted_send(inp, ia->ia_ifa.ifa_ifp)) {
39236c6e
A
1199 IFA_UNLOCK(&ia->ia_ifa);
1200 error = EHOSTUNREACH;
fe8ab488 1201 restricted = TRUE;
39236c6e
A
1202 } else if (error == 0) {
1203 *laddr = ia->ia_addr.sin_addr;
1204 if (outif != NULL) {
1205 struct ifnet *ifp;
1206
1207 if (ro->ro_rt != NULL)
1208 ifp = ro->ro_rt->rt_ifp;
1209 else
1210 ifp = ia->ia_ifp;
1211
1212 VERIFY(ifp != NULL);
1213 IFA_CONVERT_LOCK(&ia->ia_ifa);
1214 ifnet_reference(ifp); /* for caller */
1215 if (*outif != NULL)
1216 ifnet_release(*outif);
1217 *outif = ifp;
1c79356b 1218 }
39236c6e
A
1219 IFA_UNLOCK(&ia->ia_ifa);
1220 } else {
1221 IFA_UNLOCK(&ia->ia_ifa);
1c79356b 1222 }
6d2010ae 1223 IFA_REMREF(&ia->ia_ifa);
39236c6e
A
1224 ia = NULL;
1225 }
1226
fe8ab488 1227 if (restricted && error == EHOSTUNREACH) {
39236c6e
A
1228 soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED |
1229 SO_FILT_HINT_IFDENIED));
1c79356b 1230 }
39236c6e
A
1231
1232 return (error);
1c79356b
A
1233}
1234
1235/*
1236 * Outer subroutine:
1237 * Connect from a socket to a specified address.
1238 * Both address and port must be specified in argument sin.
1239 * If don't have a local address for this socket yet,
1240 * then pick one.
39236c6e
A
1241 *
1242 * The caller may override the bound-to-interface setting of the socket
1243 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
1c79356b
A
1244 */
1245int
316670eb 1246in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p,
39236c6e 1247 unsigned int ifscope, struct ifnet **outif)
1c79356b 1248{
39236c6e 1249 struct in_addr laddr;
316670eb 1250 struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam;
91447636 1251 struct inpcb *pcb;
1c79356b 1252 int error;
fe8ab488 1253 struct socket *so = inp->inp_socket;
1c79356b
A
1254
1255 /*
1256 * Call inner routine, to assign local interface address.
1257 */
39236c6e
A
1258 if ((error = in_pcbladdr(inp, nam, &laddr, ifscope, outif)) != 0)
1259 return (error);
1c79356b 1260
fe8ab488 1261 socket_unlock(so, 0);
91447636 1262 pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
39236c6e 1263 inp->inp_laddr.s_addr ? inp->inp_laddr : laddr,
91447636 1264 inp->inp_lport, 0, NULL);
fe8ab488 1265 socket_lock(so, 0);
6d2010ae 1266
39236c6e
A
1267 /*
1268 * Check if the socket is still in a valid state. When we unlock this
1269 * embryonic socket, it can get aborted if another thread is closing
6d2010ae
A
1270 * the listener (radar 7947600).
1271 */
fe8ab488 1272 if ((so->so_flags & SOF_ABORTED) != 0)
39236c6e 1273 return (ECONNREFUSED);
6d2010ae 1274
91447636 1275 if (pcb != NULL) {
0b4c1975 1276 in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0);
1c79356b
A
1277 return (EADDRINUSE);
1278 }
1279 if (inp->inp_laddr.s_addr == INADDR_ANY) {
9bccf70c 1280 if (inp->inp_lport == 0) {
39236c6e 1281 error = in_pcbbind(inp, NULL, p);
9bccf70c 1282 if (error)
39236c6e 1283 return (error);
9bccf70c 1284 }
39236c6e
A
1285 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
1286 /*
1287 * Lock inversion issue, mostly with udp
1288 * multicast packets.
1289 */
fe8ab488 1290 socket_unlock(so, 0);
39236c6e 1291 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
fe8ab488 1292 socket_lock(so, 0);
91447636 1293 }
39236c6e
A
1294 inp->inp_laddr = laddr;
1295 /* no reference needed */
316670eb 1296 inp->inp_last_outifp = (outif != NULL) ? *outif : NULL;
55e303ae 1297 inp->inp_flags |= INP_INADDR_ANY;
39236c6e
A
1298 } else {
1299 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
1300 /*
1301 * Lock inversion issue, mostly with udp
1302 * multicast packets.
1303 */
fe8ab488 1304 socket_unlock(so, 0);
39236c6e 1305 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
fe8ab488 1306 socket_lock(so, 0);
91447636 1307 }
1c79356b
A
1308 }
1309 inp->inp_faddr = sin->sin_addr;
1310 inp->inp_fport = sin->sin_port;
fe8ab488
A
1311 if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP)
1312 nstat_pcb_invalidate_cache(inp);
1c79356b 1313 in_pcbrehash(inp);
39236c6e 1314 lck_rw_done(inp->inp_pcbinfo->ipi_lock);
1c79356b
A
1315 return (0);
1316}
1317
1318void
2d21ac55 1319in_pcbdisconnect(struct inpcb *inp)
1c79356b 1320{
39236c6e 1321 struct socket *so = inp->inp_socket;
1c79356b 1322
fe8ab488
A
1323 if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP)
1324 nstat_pcb_cache(inp);
1325
1c79356b
A
1326 inp->inp_faddr.s_addr = INADDR_ANY;
1327 inp->inp_fport = 0;
91447636 1328
39236c6e
A
1329 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
1330 /* lock inversion issue, mostly with udp multicast packets */
1331 socket_unlock(so, 0);
1332 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
1333 socket_lock(so, 0);
91447636
A
1334 }
1335
1c79356b 1336 in_pcbrehash(inp);
39236c6e
A
1337 lck_rw_done(inp->inp_pcbinfo->ipi_lock);
1338 /*
1339 * A multipath subflow socket would have its SS_NOFDREF set by default,
1340 * so check for SOF_MP_SUBFLOW socket flag before detaching the PCB;
1341 * when the socket is closed for real, SOF_MP_SUBFLOW would be cleared.
1342 */
1343 if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF))
1c79356b
A
1344 in_pcbdetach(inp);
1345}
1346
1347void
2d21ac55 1348in_pcbdetach(struct inpcb *inp)
1c79356b
A
1349{
1350 struct socket *so = inp->inp_socket;
1c79356b 1351
39236c6e
A
1352 if (so->so_pcb == NULL) {
1353 /* PCB has been disposed */
1354 panic("%s: inp=%p so=%p proto=%d so_pcb is null!\n", __func__,
1355 inp, so, SOCK_PROTO(so));
1356 /* NOTREACHED */
91447636 1357 }
fe8ab488 1358
1c79356b 1359#if IPSEC
39236c6e
A
1360 if (inp->inp_sp != NULL) {
1361 (void) ipsec4_delete_pcbpolicy(inp);
91447636 1362 }
39236c6e 1363#endif /* IPSEC */
fe8ab488
A
1364
1365 /*
1366 * Let NetworkStatistics know this PCB is going away
1367 * before we detach it.
1368 */
1369 if (nstat_collect &&
1370 (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP))
1371 nstat_pcb_detach(inp);
91447636 1372 /* mark socket state as dead */
39236c6e
A
1373 if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) {
1374 panic("%s: so=%p proto=%d couldn't set to STOPUSING\n",
1375 __func__, so, SOCK_PROTO(so));
1376 /* NOTREACHED */
1377 }
1c79356b 1378
39236c6e 1379 if (!(so->so_flags & SOF_PCBCLEARING)) {
6d2010ae 1380 struct ip_moptions *imo;
2d21ac55 1381
91447636 1382 inp->inp_vflag = 0;
39236c6e
A
1383 if (inp->inp_options != NULL) {
1384 (void) m_free(inp->inp_options);
1385 inp->inp_options = NULL;
91447636 1386 }
39236c6e 1387 ROUTE_RELEASE(&inp->inp_route);
6d2010ae 1388 imo = inp->inp_moptions;
91447636 1389 inp->inp_moptions = NULL;
6d2010ae
A
1390 if (imo != NULL)
1391 IMO_REMREF(imo);
91447636
A
1392 sofreelastref(so, 0);
1393 inp->inp_state = INPCB_STATE_DEAD;
39236c6e
A
1394 /* makes sure we're not called twice from so_close */
1395 so->so_flags |= SOF_PCBCLEARING;
1396
1397 inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST);
91447636
A
1398 }
1399}
1c79356b 1400
1c79356b 1401
39236c6e
A
1402void
1403in_pcbdispose(struct inpcb *inp)
91447636
A
1404{
1405 struct socket *so = inp->inp_socket;
1406 struct inpcbinfo *ipi = inp->inp_pcbinfo;
1407
39236c6e
A
1408 if (so != NULL && so->so_usecount != 0) {
1409 panic("%s: so %p [%d,%d] usecount %d lockhistory %s\n",
1410 __func__, so, SOCK_DOM(so), SOCK_TYPE(so), so->so_usecount,
1411 solockhistory_nr(so));
1412 /* NOTREACHED */
1413 } else if (inp->inp_wantcnt != WNT_STOPUSING) {
1414 if (so != NULL) {
1415 panic_plain("%s: inp %p invalid wantcnt %d, so %p "
1416 "[%d,%d] usecount %d retaincnt %d state 0x%x "
1417 "flags 0x%x lockhistory %s\n", __func__, inp,
1418 inp->inp_wantcnt, so, SOCK_DOM(so), SOCK_TYPE(so),
1419 so->so_usecount, so->so_retaincnt, so->so_state,
1420 so->so_flags, solockhistory_nr(so));
1421 /* NOTREACHED */
1422 } else {
1423 panic("%s: inp %p invalid wantcnt %d no socket\n",
1424 __func__, inp, inp->inp_wantcnt);
1425 /* NOTREACHED */
1426 }
91447636 1427 }
91447636 1428
39236c6e 1429 lck_rw_assert(ipi->ipi_lock, LCK_RW_ASSERT_EXCLUSIVE);
91447636
A
1430
1431 inp->inp_gencnt = ++ipi->ipi_gencnt;
316670eb 1432 /* access ipi in in_pcbremlists */
91447636 1433 in_pcbremlists(inp);
316670eb 1434
39236c6e 1435 if (so != NULL) {
91447636
A
1436 if (so->so_proto->pr_flags & PR_PCBLOCK) {
1437 sofreelastref(so, 0);
39236c6e
A
1438 if (so->so_rcv.sb_cc > 0 || so->so_snd.sb_cc > 0) {
1439 /*
1440 * selthreadclear() already called
1441 * during sofreelastref() above.
1442 */
91447636
A
1443 sbrelease(&so->so_rcv);
1444 sbrelease(&so->so_snd);
1445 }
39236c6e
A
1446 if (so->so_head != NULL) {
1447 panic("%s: so=%p head still exist\n",
1448 __func__, so);
1449 /* NOTREACHED */
1450 }
1451 lck_mtx_unlock(&inp->inpcb_mtx);
1452 lck_mtx_destroy(&inp->inpcb_mtx, ipi->ipi_lock_grp);
9bccf70c 1453 }
39236c6e
A
1454 /* makes sure we're not called twice from so_close */
1455 so->so_flags |= SOF_PCBCLEARING;
1456 so->so_saved_pcb = (caddr_t)inp;
1457 so->so_pcb = NULL;
1458 inp->inp_socket = NULL;
2d21ac55
A
1459#if CONFIG_MACF_NET
1460 mac_inpcb_label_destroy(inp);
39236c6e 1461#endif /* CONFIG_MACF_NET */
b0d623f7
A
1462 /*
1463 * In case there a route cached after a detach (possible
1464 * in the tcp case), make sure that it is freed before
1465 * we deallocate the structure.
1466 */
39236c6e
A
1467 ROUTE_RELEASE(&inp->inp_route);
1468 if (!so->cached_in_sock_layer) {
91447636 1469 zfree(ipi->ipi_zone, inp);
55e303ae 1470 }
91447636 1471 sodealloc(so);
9bccf70c 1472 }
1c79356b
A
1473}
1474
1475/*
39236c6e 1476 * The calling convention of in_getsockaddr() and in_getpeeraddr() was
1c79356b
A
1477 * modified to match the pru_sockaddr() and pru_peeraddr() entry points
1478 * in struct pr_usrreqs, so that protocols can just reference then directly
39236c6e 1479 * without the need for a wrapper function.
1c79356b
A
1480 */
1481int
39236c6e 1482in_getsockaddr(struct socket *so, struct sockaddr **nam)
1c79356b 1483{
2d21ac55
A
1484 struct inpcb *inp;
1485 struct sockaddr_in *sin;
1c79356b
A
1486
1487 /*
1488 * Do the malloc first in case it blocks.
1489 */
39236c6e 1490 MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK);
0b4e3aa0 1491 if (sin == NULL)
39236c6e
A
1492 return (ENOBUFS);
1493 bzero(sin, sizeof (*sin));
1c79356b 1494 sin->sin_family = AF_INET;
39236c6e 1495 sin->sin_len = sizeof (*sin);
1c79356b 1496
39236c6e 1497 if ((inp = sotoinpcb(so)) == NULL) {
1c79356b 1498 FREE(sin, M_SONAME);
39236c6e 1499 return (EINVAL);
1c79356b
A
1500 }
1501 sin->sin_port = inp->inp_lport;
1502 sin->sin_addr = inp->inp_laddr;
1c79356b
A
1503
1504 *nam = (struct sockaddr *)sin;
39236c6e 1505 return (0);
1c79356b
A
1506}
1507
1508int
39236c6e 1509in_getsockaddr_s(struct socket *so, struct sockaddr_storage *ss)
1c79356b 1510{
39236c6e 1511 struct sockaddr_in *sin = SIN(ss);
1c79356b 1512 struct inpcb *inp;
1c79356b 1513
39236c6e
A
1514 VERIFY(ss != NULL);
1515 bzero(ss, sizeof (*ss));
1516
1c79356b 1517 sin->sin_family = AF_INET;
39236c6e 1518 sin->sin_len = sizeof (*sin);
1c79356b 1519
fe8ab488
A
1520 if ((inp = sotoinpcb(so)) == NULL
1521#if NECP
1522 || (necp_socket_should_use_flow_divert(inp))
1523#endif /* NECP */
1524 )
39236c6e
A
1525 return (inp == NULL ? EINVAL : EPROTOTYPE);
1526
1527 sin->sin_port = inp->inp_lport;
1528 sin->sin_addr = inp->inp_laddr;
1529 return (0);
1530}
1531
1532int
1533in_getpeeraddr(struct socket *so, struct sockaddr **nam)
1534{
1535 struct inpcb *inp;
1536 struct sockaddr_in *sin;
1537
1538 /*
1539 * Do the malloc first in case it blocks.
1540 */
1541 MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK);
1542 if (sin == NULL)
1543 return (ENOBUFS);
1544 bzero((caddr_t)sin, sizeof (*sin));
1545 sin->sin_family = AF_INET;
1546 sin->sin_len = sizeof (*sin);
1547
1548 if ((inp = sotoinpcb(so)) == NULL) {
1c79356b 1549 FREE(sin, M_SONAME);
39236c6e 1550 return (EINVAL);
1c79356b
A
1551 }
1552 sin->sin_port = inp->inp_fport;
1553 sin->sin_addr = inp->inp_faddr;
1c79356b
A
1554
1555 *nam = (struct sockaddr *)sin;
39236c6e
A
1556 return (0);
1557}
1558
1559int
1560in_getpeeraddr_s(struct socket *so, struct sockaddr_storage *ss)
1561{
1562 struct sockaddr_in *sin = SIN(ss);
1563 struct inpcb *inp;
1564
1565 VERIFY(ss != NULL);
1566 bzero(ss, sizeof (*ss));
1567
1568 sin->sin_family = AF_INET;
1569 sin->sin_len = sizeof (*sin);
1570
fe8ab488
A
1571 if ((inp = sotoinpcb(so)) == NULL
1572#if NECP
1573 || (necp_socket_should_use_flow_divert(inp))
1574#endif /* NECP */
1575 ) {
39236c6e
A
1576 return (inp == NULL ? EINVAL : EPROTOTYPE);
1577 }
1578
1579 sin->sin_port = inp->inp_fport;
1580 sin->sin_addr = inp->inp_faddr;
1581 return (0);
1c79356b
A
1582}
1583
1c79356b 1584void
2d21ac55 1585in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr,
39236c6e 1586 int errno, void (*notify)(struct inpcb *, int))
1c79356b 1587{
91447636
A
1588 struct inpcb *inp;
1589
39236c6e 1590 lck_rw_lock_shared(pcbinfo->ipi_lock);
1c79356b 1591
39236c6e 1592 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
9bccf70c 1593#if INET6
39236c6e 1594 if (!(inp->inp_vflag & INP_IPV4))
1c79356b 1595 continue;
39236c6e 1596#endif /* INET6 */
1c79356b 1597 if (inp->inp_faddr.s_addr != faddr.s_addr ||
9bccf70c 1598 inp->inp_socket == NULL)
39236c6e
A
1599 continue;
1600 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING)
91447636
A
1601 continue;
1602 socket_lock(inp->inp_socket, 1);
9bccf70c 1603 (*notify)(inp, errno);
39236c6e 1604 (void) in_pcb_checkstate(inp, WNT_RELEASE, 1);
91447636 1605 socket_unlock(inp->inp_socket, 1);
1c79356b 1606 }
39236c6e 1607 lck_rw_done(pcbinfo->ipi_lock);
1c79356b
A
1608}
1609
1610/*
1611 * Check for alternatives when higher level complains
1612 * about service problems. For now, invalidate cached
1613 * routing information. If the route was created dynamically
1614 * (by a redirect), time to try a default gateway again.
1615 */
1616void
2d21ac55 1617in_losing(struct inpcb *inp)
1c79356b 1618{
39236c6e 1619 boolean_t release = FALSE;
2d21ac55 1620 struct rtentry *rt;
1c79356b
A
1621 struct rt_addrinfo info;
1622
b0d623f7 1623 if ((rt = inp->inp_route.ro_rt) != NULL) {
39236c6e 1624 struct in_ifaddr *ia = NULL;
b0d623f7 1625
39236c6e 1626 bzero((caddr_t)&info, sizeof (info));
b0d623f7 1627 RT_LOCK(rt);
1c79356b 1628 info.rti_info[RTAX_DST] =
39236c6e 1629 (struct sockaddr *)&inp->inp_route.ro_dst;
1c79356b
A
1630 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1631 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1632 rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
b0d623f7
A
1633 if (rt->rt_flags & RTF_DYNAMIC) {
1634 /*
1635 * Prevent another thread from modifying rt_key,
1636 * rt_gateway via rt_setgate() after rt_lock is
1637 * dropped by marking the route as defunct.
1638 */
1639 rt->rt_flags |= RTF_CONDEMNED;
1640 RT_UNLOCK(rt);
1641 (void) rtrequest(RTM_DELETE, rt_key(rt),
39236c6e 1642 rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL);
b0d623f7
A
1643 } else {
1644 RT_UNLOCK(rt);
1645 }
2d21ac55 1646 /* if the address is gone keep the old route in the pcb */
39236c6e
A
1647 if (inp->inp_laddr.s_addr != INADDR_ANY &&
1648 (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
1649 /*
1650 * Address is around; ditch the route. A new route
1651 * can be allocated the next time output is attempted.
1652 */
1653 release = TRUE;
2d21ac55 1654 }
39236c6e
A
1655 if (ia != NULL)
1656 IFA_REMREF(&ia->ia_ifa);
1c79356b 1657 }
39236c6e
A
1658 if (rt == NULL || release)
1659 ROUTE_RELEASE(&inp->inp_route);
1c79356b
A
1660}
1661
1662/*
1663 * After a routing change, flush old routing
1664 * and allocate a (hopefully) better one.
1665 */
9bccf70c 1666void
39236c6e 1667in_rtchange(struct inpcb *inp, int errno)
1c79356b 1668{
39236c6e
A
1669#pragma unused(errno)
1670 boolean_t release = FALSE;
2d21ac55
A
1671 struct rtentry *rt;
1672
1673 if ((rt = inp->inp_route.ro_rt) != NULL) {
39236c6e 1674 struct in_ifaddr *ia = NULL;
b0d623f7 1675
39236c6e
A
1676 /* if address is gone, keep the old route */
1677 if (inp->inp_laddr.s_addr != INADDR_ANY &&
1678 (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
1679 /*
1680 * Address is around; ditch the route. A new route
1681 * can be allocated the next time output is attempted.
1682 */
1683 release = TRUE;
2d21ac55 1684 }
39236c6e
A
1685 if (ia != NULL)
1686 IFA_REMREF(&ia->ia_ifa);
1c79356b 1687 }
39236c6e
A
1688 if (rt == NULL || release)
1689 ROUTE_RELEASE(&inp->inp_route);
1c79356b
A
1690}
1691
1692/*
1693 * Lookup a PCB based on the local address and port.
1694 */
1695struct inpcb *
2d21ac55 1696in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
39236c6e 1697 unsigned int lport_arg, int wild_okay)
1c79356b 1698{
2d21ac55 1699 struct inpcb *inp;
1c79356b
A
1700 int matchwild = 3, wildcard;
1701 u_short lport = lport_arg;
1702
39236c6e 1703 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0, 0, 0, 0, 0);
1c79356b
A
1704
1705 if (!wild_okay) {
1706 struct inpcbhead *head;
1707 /*
1708 * Look for an unconnected (wildcard foreign addr) PCB that
1709 * matches the local address and port we're looking for.
1710 */
39236c6e
A
1711 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
1712 pcbinfo->ipi_hashmask)];
9bccf70c
A
1713 LIST_FOREACH(inp, head, inp_hash) {
1714#if INET6
39236c6e 1715 if (!(inp->inp_vflag & INP_IPV4))
1c79356b 1716 continue;
39236c6e 1717#endif /* INET6 */
1c79356b
A
1718 if (inp->inp_faddr.s_addr == INADDR_ANY &&
1719 inp->inp_laddr.s_addr == laddr.s_addr &&
1720 inp->inp_lport == lport) {
1721 /*
1722 * Found.
1723 */
1724 return (inp);
1725 }
1726 }
1727 /*
1728 * Not found.
1729 */
39236c6e 1730 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0, 0, 0, 0, 0);
1c79356b
A
1731 return (NULL);
1732 } else {
1733 struct inpcbporthead *porthash;
1734 struct inpcbport *phd;
1735 struct inpcb *match = NULL;
1736 /*
1737 * Best fit PCB lookup.
1738 *
1739 * First see if this local port is in use by looking on the
1740 * port hash list.
1741 */
39236c6e
A
1742 porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
1743 pcbinfo->ipi_porthashmask)];
9bccf70c 1744 LIST_FOREACH(phd, porthash, phd_hash) {
1c79356b
A
1745 if (phd->phd_port == lport)
1746 break;
1747 }
1748 if (phd != NULL) {
1749 /*
1750 * Port is in use by one or more PCBs. Look for best
1751 * fit.
1752 */
9bccf70c 1753 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
1c79356b 1754 wildcard = 0;
9bccf70c 1755#if INET6
39236c6e 1756 if (!(inp->inp_vflag & INP_IPV4))
1c79356b 1757 continue;
39236c6e 1758#endif /* INET6 */
1c79356b
A
1759 if (inp->inp_faddr.s_addr != INADDR_ANY)
1760 wildcard++;
1761 if (inp->inp_laddr.s_addr != INADDR_ANY) {
1762 if (laddr.s_addr == INADDR_ANY)
1763 wildcard++;
39236c6e
A
1764 else if (inp->inp_laddr.s_addr !=
1765 laddr.s_addr)
1c79356b
A
1766 continue;
1767 } else {
1768 if (laddr.s_addr != INADDR_ANY)
1769 wildcard++;
1770 }
1771 if (wildcard < matchwild) {
1772 match = inp;
1773 matchwild = wildcard;
1774 if (matchwild == 0) {
1775 break;
1776 }
1777 }
1778 }
1779 }
39236c6e
A
1780 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match,
1781 0, 0, 0, 0);
1c79356b
A
1782 return (match);
1783 }
1784}
1785
6d2010ae
A
1786/*
1787 * Check if PCB exists in hash list.
1788 */
1789int
39236c6e
A
1790in_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in_addr faddr,
1791 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
1792 uid_t *uid, gid_t *gid, struct ifnet *ifp)
6d2010ae
A
1793{
1794 struct inpcbhead *head;
1795 struct inpcb *inp;
1796 u_short fport = fport_arg, lport = lport_arg;
39236c6e
A
1797 int found = 0;
1798 struct inpcb *local_wild = NULL;
1799#if INET6
1800 struct inpcb *local_wild_mapped = NULL;
1801#endif /* INET6 */
6d2010ae
A
1802
1803 *uid = UID_MAX;
1804 *gid = GID_MAX;
316670eb 1805
6d2010ae
A
1806 /*
1807 * We may have found the pcb in the last lookup - check this first.
1808 */
1809
39236c6e 1810 lck_rw_lock_shared(pcbinfo->ipi_lock);
6d2010ae
A
1811
1812 /*
1813 * First look for an exact match.
1814 */
39236c6e
A
1815 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
1816 pcbinfo->ipi_hashmask)];
6d2010ae
A
1817 LIST_FOREACH(inp, head, inp_hash) {
1818#if INET6
39236c6e 1819 if (!(inp->inp_vflag & INP_IPV4))
6d2010ae 1820 continue;
39236c6e 1821#endif /* INET6 */
fe8ab488 1822 if (inp_restricted_recv(inp, ifp))
316670eb
A
1823 continue;
1824
6d2010ae
A
1825 if (inp->inp_faddr.s_addr == faddr.s_addr &&
1826 inp->inp_laddr.s_addr == laddr.s_addr &&
1827 inp->inp_fport == fport &&
1828 inp->inp_lport == lport) {
1829 if ((found = (inp->inp_socket != NULL))) {
1830 /*
1831 * Found.
1832 */
316670eb
A
1833 *uid = kauth_cred_getuid(
1834 inp->inp_socket->so_cred);
1835 *gid = kauth_cred_getgid(
1836 inp->inp_socket->so_cred);
6d2010ae 1837 }
39236c6e 1838 lck_rw_done(pcbinfo->ipi_lock);
6d2010ae
A
1839 return (found);
1840 }
1841 }
6d2010ae 1842
39236c6e
A
1843 if (!wildcard) {
1844 /*
1845 * Not found.
1846 */
1847 lck_rw_done(pcbinfo->ipi_lock);
1848 return (0);
1849 }
316670eb 1850
39236c6e
A
1851 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
1852 pcbinfo->ipi_hashmask)];
1853 LIST_FOREACH(inp, head, inp_hash) {
6d2010ae 1854#if INET6
39236c6e
A
1855 if (!(inp->inp_vflag & INP_IPV4))
1856 continue;
6d2010ae 1857#endif /* INET6 */
fe8ab488 1858 if (inp_restricted_recv(inp, ifp))
39236c6e
A
1859 continue;
1860
1861 if (inp->inp_faddr.s_addr == INADDR_ANY &&
1862 inp->inp_lport == lport) {
1863 if (inp->inp_laddr.s_addr == laddr.s_addr) {
1864 if ((found = (inp->inp_socket != NULL))) {
316670eb 1865 *uid = kauth_cred_getuid(
39236c6e 1866 inp->inp_socket->so_cred);
316670eb 1867 *gid = kauth_cred_getgid(
39236c6e 1868 inp->inp_socket->so_cred);
6d2010ae 1869 }
39236c6e 1870 lck_rw_done(pcbinfo->ipi_lock);
6d2010ae 1871 return (found);
39236c6e
A
1872 } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
1873#if INET6
1874 if (inp->inp_socket &&
1875 SOCK_CHECK_DOM(inp->inp_socket, PF_INET6))
1876 local_wild_mapped = inp;
1877 else
6d2010ae 1878#endif /* INET6 */
39236c6e
A
1879 local_wild = inp;
1880 }
6d2010ae 1881 }
39236c6e
A
1882 }
1883 if (local_wild == NULL) {
1884#if INET6
1885 if (local_wild_mapped != NULL) {
1886 if ((found = (local_wild_mapped->inp_socket != NULL))) {
316670eb 1887 *uid = kauth_cred_getuid(
39236c6e 1888 local_wild_mapped->inp_socket->so_cred);
316670eb 1889 *gid = kauth_cred_getgid(
39236c6e 1890 local_wild_mapped->inp_socket->so_cred);
6d2010ae 1891 }
39236c6e 1892 lck_rw_done(pcbinfo->ipi_lock);
6d2010ae
A
1893 return (found);
1894 }
39236c6e
A
1895#endif /* INET6 */
1896 lck_rw_done(pcbinfo->ipi_lock);
1897 return (0);
6d2010ae 1898 }
39236c6e
A
1899 if ((found = (local_wild->inp_socket != NULL))) {
1900 *uid = kauth_cred_getuid(
1901 local_wild->inp_socket->so_cred);
1902 *gid = kauth_cred_getgid(
1903 local_wild->inp_socket->so_cred);
1904 }
1905 lck_rw_done(pcbinfo->ipi_lock);
1906 return (found);
6d2010ae
A
1907}
1908
1c79356b
A
1909/*
1910 * Lookup PCB in hash list.
1911 */
1912struct inpcb *
39236c6e
A
1913in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
1914 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
1915 struct ifnet *ifp)
1c79356b
A
1916{
1917 struct inpcbhead *head;
2d21ac55 1918 struct inpcb *inp;
1c79356b 1919 u_short fport = fport_arg, lport = lport_arg;
39236c6e
A
1920 struct inpcb *local_wild = NULL;
1921#if INET6
1922 struct inpcb *local_wild_mapped = NULL;
1923#endif /* INET6 */
1c79356b
A
1924
1925 /*
1926 * We may have found the pcb in the last lookup - check this first.
1927 */
1928
39236c6e 1929 lck_rw_lock_shared(pcbinfo->ipi_lock);
1c79356b
A
1930
1931 /*
1932 * First look for an exact match.
1933 */
39236c6e
A
1934 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
1935 pcbinfo->ipi_hashmask)];
9bccf70c
A
1936 LIST_FOREACH(inp, head, inp_hash) {
1937#if INET6
39236c6e 1938 if (!(inp->inp_vflag & INP_IPV4))
1c79356b 1939 continue;
39236c6e 1940#endif /* INET6 */
fe8ab488 1941 if (inp_restricted_recv(inp, ifp))
316670eb
A
1942 continue;
1943
1c79356b
A
1944 if (inp->inp_faddr.s_addr == faddr.s_addr &&
1945 inp->inp_laddr.s_addr == laddr.s_addr &&
1946 inp->inp_fport == fport &&
1947 inp->inp_lport == lport) {
1948 /*
1949 * Found.
1950 */
39236c6e
A
1951 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
1952 WNT_STOPUSING) {
1953 lck_rw_done(pcbinfo->ipi_lock);
91447636 1954 return (inp);
39236c6e
A
1955 } else {
1956 /* it's there but dead, say it isn't found */
1957 lck_rw_done(pcbinfo->ipi_lock);
316670eb 1958 return (NULL);
91447636 1959 }
1c79356b
A
1960 }
1961 }
1c79356b 1962
39236c6e
A
1963 if (!wildcard) {
1964 /*
1965 * Not found.
1966 */
1967 lck_rw_done(pcbinfo->ipi_lock);
1968 return (NULL);
1969 }
1970
1971 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
1972 pcbinfo->ipi_hashmask)];
1973 LIST_FOREACH(inp, head, inp_hash) {
9bccf70c 1974#if INET6
39236c6e
A
1975 if (!(inp->inp_vflag & INP_IPV4))
1976 continue;
1977#endif /* INET6 */
fe8ab488 1978 if (inp_restricted_recv(inp, ifp))
39236c6e
A
1979 continue;
1980
1981 if (inp->inp_faddr.s_addr == INADDR_ANY &&
1982 inp->inp_lport == lport) {
1983 if (inp->inp_laddr.s_addr == laddr.s_addr) {
1984 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
1985 WNT_STOPUSING) {
1986 lck_rw_done(pcbinfo->ipi_lock);
1987 return (inp);
1988 } else {
1989 /* it's dead; say it isn't found */
1990 lck_rw_done(pcbinfo->ipi_lock);
1991 return (NULL);
91447636 1992 }
39236c6e 1993 } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
2d21ac55 1994#if INET6
39236c6e
A
1995 if (SOCK_CHECK_DOM(inp->inp_socket, PF_INET6))
1996 local_wild_mapped = inp;
1997 else
2d21ac55 1998#endif /* INET6 */
1c79356b 1999 local_wild = inp;
1c79356b
A
2000 }
2001 }
39236c6e
A
2002 }
2003 if (local_wild == NULL) {
2d21ac55 2004#if INET6
39236c6e
A
2005 if (local_wild_mapped != NULL) {
2006 if (in_pcb_checkstate(local_wild_mapped,
2007 WNT_ACQUIRE, 0) != WNT_STOPUSING) {
2008 lck_rw_done(pcbinfo->ipi_lock);
2009 return (local_wild_mapped);
2010 } else {
2011 /* it's dead; say it isn't found */
2012 lck_rw_done(pcbinfo->ipi_lock);
2013 return (NULL);
91447636 2014 }
91447636 2015 }
39236c6e
A
2016#endif /* INET6 */
2017 lck_rw_done(pcbinfo->ipi_lock);
2018 return (NULL);
2019 }
2020 if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
2021 lck_rw_done(pcbinfo->ipi_lock);
2022 return (local_wild);
1c79356b 2023 }
1c79356b 2024 /*
39236c6e 2025 * It's either not found or is already dead.
1c79356b 2026 */
39236c6e 2027 lck_rw_done(pcbinfo->ipi_lock);
1c79356b
A
2028 return (NULL);
2029}
2030
2031/*
2032 * Insert PCB onto various hash lists.
2033 */
2034int
2d21ac55 2035in_pcbinshash(struct inpcb *inp, int locked)
1c79356b
A
2036{
2037 struct inpcbhead *pcbhash;
2038 struct inpcbporthead *pcbporthash;
2039 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
2040 struct inpcbport *phd;
2041 u_int32_t hashkey_faddr;
2042
39236c6e
A
2043 if (!locked) {
2044 if (!lck_rw_try_lock_exclusive(pcbinfo->ipi_lock)) {
2045 /*
2046 * Lock inversion issue, mostly with udp
2047 * multicast packets
2048 */
2049 socket_unlock(inp->inp_socket, 0);
2050 lck_rw_lock_exclusive(pcbinfo->ipi_lock);
2051 socket_lock(inp->inp_socket, 0);
6d2010ae 2052 if (inp->inp_state == INPCB_STATE_DEAD) {
39236c6e
A
2053 /*
2054 * The socket got dropped when
2055 * it was unlocked
2056 */
2057 lck_rw_done(pcbinfo->ipi_lock);
2058 return (ECONNABORTED);
6d2010ae 2059 }
39236c6e
A
2060 }
2061 }
b0d623f7 2062
1c79356b
A
2063#if INET6
2064 if (inp->inp_vflag & INP_IPV6)
2065 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
2066 else
2067#endif /* INET6 */
39236c6e 2068 hashkey_faddr = inp->inp_faddr.s_addr;
1c79356b 2069
39236c6e
A
2070 inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
2071 inp->inp_fport, pcbinfo->ipi_hashmask);
91447636 2072
39236c6e 2073 pcbhash = &pcbinfo->ipi_hashbase[inp->inp_hash_element];
1c79356b 2074
39236c6e
A
2075 pcbporthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(inp->inp_lport,
2076 pcbinfo->ipi_porthashmask)];
1c79356b
A
2077
2078 /*
2079 * Go through port list and look for a head for this lport.
2080 */
9bccf70c 2081 LIST_FOREACH(phd, pcbporthash, phd_hash) {
1c79356b
A
2082 if (phd->phd_port == inp->inp_lport)
2083 break;
2084 }
316670eb
A
2085
2086 VERIFY(inp->inp_state != INPCB_STATE_DEAD);
2087
1c79356b
A
2088 /*
2089 * If none exists, malloc one and tack it on.
2090 */
2091 if (phd == NULL) {
39236c6e
A
2092 MALLOC(phd, struct inpcbport *, sizeof (struct inpcbport),
2093 M_PCB, M_WAITOK);
1c79356b 2094 if (phd == NULL) {
91447636 2095 if (!locked)
39236c6e 2096 lck_rw_done(pcbinfo->ipi_lock);
1c79356b
A
2097 return (ENOBUFS); /* XXX */
2098 }
2099 phd->phd_port = inp->inp_lport;
2100 LIST_INIT(&phd->phd_pcblist);
2101 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
2102 }
fe8ab488
A
2103
2104 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
1c79356b
A
2105 inp->inp_phd = phd;
2106 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
2107 LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
fe8ab488
A
2108 inp->inp_flags2 |= INP2_INHASHLIST;
2109
91447636 2110 if (!locked)
39236c6e 2111 lck_rw_done(pcbinfo->ipi_lock);
fe8ab488
A
2112
2113#if NECP
2114 // This call catches the original setting of the local address
2115 inp_update_necp_policy(inp, NULL, NULL, 0);
2116#endif /* NECP */
2117
1c79356b
A
2118 return (0);
2119}
2120
2121/*
2122 * Move PCB to the proper hash bucket when { faddr, fport } have been
2123 * changed. NOTE: This does not handle the case of the lport changing (the
2124 * hashed port list would have to be updated as well), so the lport must
2125 * not change after in_pcbinshash() has been called.
2126 */
2127void
2d21ac55 2128in_pcbrehash(struct inpcb *inp)
1c79356b
A
2129{
2130 struct inpcbhead *head;
2131 u_int32_t hashkey_faddr;
2132
2133#if INET6
2134 if (inp->inp_vflag & INP_IPV6)
2135 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
2136 else
2137#endif /* INET6 */
39236c6e
A
2138 hashkey_faddr = inp->inp_faddr.s_addr;
2139
2140 inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
2141 inp->inp_fport, inp->inp_pcbinfo->ipi_hashmask);
2142 head = &inp->inp_pcbinfo->ipi_hashbase[inp->inp_hash_element];
1c79356b 2143
fe8ab488
A
2144 if (inp->inp_flags2 & INP2_INHASHLIST) {
2145 LIST_REMOVE(inp, inp_hash);
2146 inp->inp_flags2 &= ~INP2_INHASHLIST;
2147 }
2148
2149 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
1c79356b 2150 LIST_INSERT_HEAD(head, inp, inp_hash);
fe8ab488
A
2151 inp->inp_flags2 |= INP2_INHASHLIST;
2152
2153#if NECP
2154 // This call catches updates to the remote addresses
2155 inp_update_necp_policy(inp, NULL, NULL, 0);
2156#endif /* NECP */
1c79356b
A
2157}
2158
2159/*
2160 * Remove PCB from various lists.
316670eb 2161 * Must be called pcbinfo lock is held in exclusive mode.
1c79356b
A
2162 */
2163void
2d21ac55 2164in_pcbremlists(struct inpcb *inp)
1c79356b
A
2165{
2166 inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
1c79356b 2167
fe8ab488
A
2168 /*
2169 * Check if it's in hashlist -- an inp is placed in hashlist when
2170 * it's local port gets assigned. So it should also be present
2171 * in the port list.
2172 */
2173 if (inp->inp_flags2 & INP2_INHASHLIST) {
1c79356b
A
2174 struct inpcbport *phd = inp->inp_phd;
2175
fe8ab488
A
2176 VERIFY(phd != NULL && inp->inp_lport > 0);
2177
1c79356b 2178 LIST_REMOVE(inp, inp_hash);
fe8ab488
A
2179 inp->inp_hash.le_next = NULL;
2180 inp->inp_hash.le_prev = NULL;
2181
1c79356b 2182 LIST_REMOVE(inp, inp_portlist);
fe8ab488
A
2183 inp->inp_portlist.le_next = NULL;
2184 inp->inp_portlist.le_prev = NULL;
2185 if (LIST_EMPTY(&phd->phd_pcblist)) {
1c79356b
A
2186 LIST_REMOVE(phd, phd_hash);
2187 FREE(phd, M_PCB);
2188 }
fe8ab488
A
2189 inp->inp_phd = NULL;
2190 inp->inp_flags2 &= ~INP2_INHASHLIST;
1c79356b 2191 }
fe8ab488 2192 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
39236c6e
A
2193
2194 if (inp->inp_flags2 & INP2_TIMEWAIT) {
2195 /* Remove from time-wait queue */
2196 tcp_remove_from_time_wait(inp);
2197 inp->inp_flags2 &= ~INP2_TIMEWAIT;
2198 VERIFY(inp->inp_pcbinfo->ipi_twcount != 0);
2199 inp->inp_pcbinfo->ipi_twcount--;
2200 } else {
2201 /* Remove from global inp list if it is not time-wait */
2202 LIST_REMOVE(inp, inp_list);
2203 }
316670eb 2204
bd504ef0 2205 if (inp->inp_flags2 & INP2_IN_FCTREE) {
39236c6e 2206 inp_fc_getinp(inp->inp_flowhash, (INPFC_SOLOCKED|INPFC_REMOVE));
bd504ef0
A
2207 VERIFY(!(inp->inp_flags2 & INP2_IN_FCTREE));
2208 }
39236c6e 2209
1c79356b
A
2210 inp->inp_pcbinfo->ipi_count--;
2211}
2212
39236c6e
A
2213/*
2214 * Mechanism used to defer the memory release of PCBs
2215 * The pcb list will contain the pcb until the reaper can clean it up if
2216 * the following conditions are met:
2217 * 1) state "DEAD",
2218 * 2) wantcnt is STOPUSING
2219 * 3) usecount is 0
91447636 2220 * This function will be called to either mark the pcb as
39236c6e 2221 */
91447636
A
2222int
2223in_pcb_checkstate(struct inpcb *pcb, int mode, int locked)
91447636 2224{
39236c6e 2225 volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt;
2d21ac55
A
2226 UInt32 origwant;
2227 UInt32 newwant;
91447636
A
2228
2229 switch (mode) {
39236c6e
A
2230 case WNT_STOPUSING:
2231 /*
2232 * Try to mark the pcb as ready for recycling. CAS with
2233 * STOPUSING, if success we're good, if it's in use, will
2234 * be marked later
2235 */
2236 if (locked == 0)
2237 socket_lock(pcb->inp_socket, 1);
2238 pcb->inp_state = INPCB_STATE_DEAD;
91447636 2239
39236c6e
A
2240stopusing:
2241 if (pcb->inp_socket->so_usecount < 0) {
2242 panic("%s: pcb=%p so=%p usecount is negative\n",
2243 __func__, pcb, pcb->inp_socket);
2244 /* NOTREACHED */
2245 }
2246 if (locked == 0)
2247 socket_unlock(pcb->inp_socket, 1);
91447636 2248
39236c6e 2249 inpcb_gc_sched(pcb->inp_pcbinfo, INPCB_TIMER_FAST);
6d2010ae 2250
39236c6e
A
2251 origwant = *wantcnt;
2252 if ((UInt16) origwant == 0xffff) /* should stop using */
2253 return (WNT_STOPUSING);
2254 newwant = 0xffff;
2255 if ((UInt16) origwant == 0) {
2256 /* try to mark it as unsuable now */
2257 OSCompareAndSwap(origwant, newwant, wantcnt);
2258 }
2259 return (WNT_STOPUSING);
2260 break;
91447636 2261
39236c6e
A
2262 case WNT_ACQUIRE:
2263 /*
2264 * Try to increase reference to pcb. If WNT_STOPUSING
2265 * should bail out. If socket state DEAD, try to set count
2266 * to STOPUSING, return failed otherwise increase cnt.
2267 */
2268 do {
91447636 2269 origwant = *wantcnt;
39236c6e
A
2270 if ((UInt16) origwant == 0xffff) {
2271 /* should stop using */
91447636 2272 return (WNT_STOPUSING);
91447636 2273 }
39236c6e
A
2274 newwant = origwant + 1;
2275 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
2276 return (WNT_ACQUIRE);
2277 break;
91447636 2278
39236c6e
A
2279 case WNT_RELEASE:
2280 /*
2281 * Release reference. If result is null and pcb state
2282 * is DEAD, set wanted bit to STOPUSING
2283 */
2284 if (locked == 0)
2285 socket_lock(pcb->inp_socket, 1);
91447636 2286
39236c6e
A
2287 do {
2288 origwant = *wantcnt;
2289 if ((UInt16) origwant == 0x0) {
2290 panic("%s: pcb=%p release with zero count",
2291 __func__, pcb);
2292 /* NOTREACHED */
2293 }
2294 if ((UInt16) origwant == 0xffff) {
2295 /* should stop using */
2296 if (locked == 0)
2297 socket_unlock(pcb->inp_socket, 1);
2298 return (WNT_STOPUSING);
2299 }
2300 newwant = origwant - 1;
2301 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
2302
2303 if (pcb->inp_state == INPCB_STATE_DEAD)
2304 goto stopusing;
2305 if (pcb->inp_socket->so_usecount < 0) {
2306 panic("%s: RELEASE pcb=%p so=%p usecount is negative\n",
2307 __func__, pcb, pcb->inp_socket);
2308 /* NOTREACHED */
2309 }
91447636 2310
39236c6e
A
2311 if (locked == 0)
2312 socket_unlock(pcb->inp_socket, 1);
2313 return (WNT_RELEASE);
2314 break;
91447636 2315
39236c6e
A
2316 default:
2317 panic("%s: so=%p not a valid state =%x\n", __func__,
2318 pcb->inp_socket, mode);
2319 /* NOTREACHED */
91447636
A
2320 }
2321
2322 /* NOTREACHED */
2323 return (mode);
2324}
2325
2326/*
2327 * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat.
2328 * The inpcb_compat data structure is passed to user space and must
b0d623f7 2329 * not change. We intentionally avoid copying pointers.
91447636
A
2330 */
2331void
39236c6e 2332inpcb_to_compat(struct inpcb *inp, struct inpcb_compat *inp_compat)
91447636 2333{
39236c6e 2334 bzero(inp_compat, sizeof (*inp_compat));
91447636
A
2335 inp_compat->inp_fport = inp->inp_fport;
2336 inp_compat->inp_lport = inp->inp_lport;
316670eb 2337 inp_compat->nat_owner = 0;
39236c6e 2338 inp_compat->nat_cookie = 0;
91447636
A
2339 inp_compat->inp_gencnt = inp->inp_gencnt;
2340 inp_compat->inp_flags = inp->inp_flags;
2341 inp_compat->inp_flow = inp->inp_flow;
2342 inp_compat->inp_vflag = inp->inp_vflag;
2343 inp_compat->inp_ip_ttl = inp->inp_ip_ttl;
2344 inp_compat->inp_ip_p = inp->inp_ip_p;
39236c6e
A
2345 inp_compat->inp_dependfaddr.inp6_foreign =
2346 inp->inp_dependfaddr.inp6_foreign;
2347 inp_compat->inp_dependladdr.inp6_local =
2348 inp->inp_dependladdr.inp6_local;
91447636 2349 inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
39236c6e 2350 inp_compat->inp_depend6.inp6_hlim = 0;
91447636 2351 inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
39236c6e 2352 inp_compat->inp_depend6.inp6_ifindex = 0;
91447636
A
2353 inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
2354}
9bccf70c 2355
b0d623f7 2356void
39236c6e 2357inpcb_to_xinpcb64(struct inpcb *inp, struct xinpcb64 *xinp)
b0d623f7 2358{
6d2010ae
A
2359 xinp->inp_fport = inp->inp_fport;
2360 xinp->inp_lport = inp->inp_lport;
2361 xinp->inp_gencnt = inp->inp_gencnt;
2362 xinp->inp_flags = inp->inp_flags;
2363 xinp->inp_flow = inp->inp_flow;
2364 xinp->inp_vflag = inp->inp_vflag;
2365 xinp->inp_ip_ttl = inp->inp_ip_ttl;
2366 xinp->inp_ip_p = inp->inp_ip_p;
2367 xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign;
2368 xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local;
2369 xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
39236c6e 2370 xinp->inp_depend6.inp6_hlim = 0;
6d2010ae 2371 xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
39236c6e 2372 xinp->inp_depend6.inp6_ifindex = 0;
6d2010ae 2373 xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
b0d623f7
A
2374}
2375
b0d623f7
A
2376/*
2377 * The following routines implement this scheme:
2378 *
2379 * Callers of ip_output() that intend to cache the route in the inpcb pass
2380 * a local copy of the struct route to ip_output(). Using a local copy of
2381 * the cached route significantly simplifies things as IP no longer has to
2382 * worry about having exclusive access to the passed in struct route, since
2383 * it's defined in the caller's stack; in essence, this allows for a lock-
2384 * less operation when updating the struct route at the IP level and below,
2385 * whenever necessary. The scheme works as follows:
2386 *
2387 * Prior to dropping the socket's lock and calling ip_output(), the caller
2388 * copies the struct route from the inpcb into its stack, and adds a reference
2389 * to the cached route entry, if there was any. The socket's lock is then
2390 * dropped and ip_output() is called with a pointer to the copy of struct
2391 * route defined on the stack (not to the one in the inpcb.)
2392 *
2393 * Upon returning from ip_output(), the caller then acquires the socket's
2394 * lock and synchronizes the cache; if there is no route cached in the inpcb,
2395 * it copies the local copy of struct route (which may or may not contain any
2396 * route) back into the cache; otherwise, if the inpcb has a route cached in
2397 * it, the one in the local copy will be freed, if there's any. Trashing the
2398 * cached route in the inpcb can be avoided because ip_output() is single-
2399 * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized
2400 * by the socket/transport layer.)
2401 */
2402void
2403inp_route_copyout(struct inpcb *inp, struct route *dst)
2404{
2405 struct route *src = &inp->inp_route;
2406
6d2010ae 2407 lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
b0d623f7 2408
0b4c1975 2409 /*
39236c6e 2410 * If the route in the PCB is stale or not for IPv4, blow it away;
0b4c1975
A
2411 * this is possible in the case of IPv4-mapped address case.
2412 */
39236c6e
A
2413 if (ROUTE_UNUSABLE(src) || rt_key(src->ro_rt)->sa_family != AF_INET)
2414 ROUTE_RELEASE(src);
316670eb 2415
39236c6e 2416 route_copyout(dst, src, sizeof (*dst));
b0d623f7
A
2417}
2418
2419void
2420inp_route_copyin(struct inpcb *inp, struct route *src)
2421{
2422 struct route *dst = &inp->inp_route;
2423
6d2010ae 2424 lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
b0d623f7
A
2425
2426 /* Minor sanity check */
2427 if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET)
2428 panic("%s: wrong or corrupted route: %p", __func__, src);
2429
39236c6e 2430 route_copyin(src, dst, sizeof (*src));
6d2010ae
A
2431}
2432
2433/*
2434 * Handler for setting IP_FORCE_OUT_IFP/IP_BOUND_IF/IPV6_BOUND_IF socket option.
2435 */
316670eb 2436int
39236c6e 2437inp_bindif(struct inpcb *inp, unsigned int ifscope, struct ifnet **pifp)
6d2010ae 2438{
316670eb
A
2439 struct ifnet *ifp = NULL;
2440
2441 ifnet_head_lock_shared();
2442 if ((ifscope > (unsigned)if_index) || (ifscope != IFSCOPE_NONE &&
2443 (ifp = ifindex2ifnet[ifscope]) == NULL)) {
2444 ifnet_head_done();
2445 return (ENXIO);
2446 }
2447 ifnet_head_done();
2448
2449 VERIFY(ifp != NULL || ifscope == IFSCOPE_NONE);
2450
6d2010ae
A
2451 /*
2452 * A zero interface scope value indicates an "unbind".
2453 * Otherwise, take in whatever value the app desires;
2454 * the app may already know the scope (or force itself
2455 * to such a scope) ahead of time before the interface
2456 * gets attached. It doesn't matter either way; any
2457 * route lookup from this point on will require an
2458 * exact match for the embedded interface scope.
2459 */
316670eb
A
2460 inp->inp_boundifp = ifp;
2461 if (inp->inp_boundifp == NULL)
6d2010ae
A
2462 inp->inp_flags &= ~INP_BOUND_IF;
2463 else
2464 inp->inp_flags |= INP_BOUND_IF;
2465
2466 /* Blow away any cached route in the PCB */
39236c6e
A
2467 ROUTE_RELEASE(&inp->inp_route);
2468
2469 if (pifp != NULL)
2470 *pifp = ifp;
316670eb
A
2471
2472 return (0);
6d2010ae
A
2473}
2474
2475/*
39236c6e
A
2476 * Handler for setting IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
2477 * as well as for setting PROC_UUID_NO_CELLULAR policy.
6d2010ae 2478 */
39236c6e
A
2479void
2480inp_set_nocellular(struct inpcb *inp)
6d2010ae 2481{
39236c6e 2482 inp->inp_flags |= INP_NO_IFT_CELLULAR;
6d2010ae
A
2483
2484 /* Blow away any cached route in the PCB */
39236c6e
A
2485 ROUTE_RELEASE(&inp->inp_route);
2486}
2487
2488/*
2489 * Handler for clearing IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
2490 * as well as for clearing PROC_UUID_NO_CELLULAR policy.
2491 */
2492void
2493inp_clear_nocellular(struct inpcb *inp)
2494{
2495 struct socket *so = inp->inp_socket;
2496
2497 /*
2498 * SO_RESTRICT_DENY_CELLULAR socket restriction issued on the socket
2499 * has a higher precendence than INP_NO_IFT_CELLULAR. Clear the flag
2500 * if and only if the socket is unrestricted.
2501 */
2502 if (so != NULL && !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) {
2503 inp->inp_flags &= ~INP_NO_IFT_CELLULAR;
2504
2505 /* Blow away any cached route in the PCB */
2506 ROUTE_RELEASE(&inp->inp_route);
6d2010ae 2507 }
39236c6e 2508}
6d2010ae 2509
fe8ab488
A
2510void
2511inp_set_noexpensive(struct inpcb *inp)
2512{
2513 inp->inp_flags2 |= INP2_NO_IFF_EXPENSIVE;
2514
2515 /* Blow away any cached route in the PCB */
2516 ROUTE_RELEASE(&inp->inp_route);
2517}
2518
2519void
2520inp_set_awdl_unrestricted(struct inpcb *inp)
2521{
2522 inp->inp_flags2 |= INP2_AWDL_UNRESTRICTED;
2523
2524 /* Blow away any cached route in the PCB */
2525 ROUTE_RELEASE(&inp->inp_route);
2526}
2527
2528boolean_t
2529inp_get_awdl_unrestricted(struct inpcb *inp)
2530{
2531 return (inp->inp_flags2 & INP2_AWDL_UNRESTRICTED) ? TRUE : FALSE;
2532}
2533
2534void
2535inp_clear_awdl_unrestricted(struct inpcb *inp)
2536{
2537 inp->inp_flags2 &= ~INP2_AWDL_UNRESTRICTED;
2538
2539 /* Blow away any cached route in the PCB */
2540 ROUTE_RELEASE(&inp->inp_route);
2541}
2542
2543#if NECP
39236c6e 2544/*
fe8ab488 2545 * Called when PROC_UUID_NECP_APP_POLICY is set.
39236c6e
A
2546 */
2547void
fe8ab488 2548inp_set_want_app_policy(struct inpcb *inp)
39236c6e 2549{
fe8ab488 2550 inp->inp_flags2 |= INP2_WANT_APP_POLICY;
39236c6e
A
2551}
2552
2553/*
fe8ab488 2554 * Called when PROC_UUID_NECP_APP_POLICY is cleared.
39236c6e
A
2555 */
2556void
fe8ab488 2557inp_clear_want_app_policy(struct inpcb *inp)
39236c6e 2558{
fe8ab488 2559 inp->inp_flags2 &= ~INP2_WANT_APP_POLICY;
b0d623f7 2560}
fe8ab488 2561#endif /* NECP */
316670eb
A
2562
2563/*
2564 * Calculate flow hash for an inp, used by an interface to identify a
2565 * flow. When an interface provides flow control advisory, this flow
2566 * hash is used as an identifier.
2567 */
2568u_int32_t
2569inp_calc_flowhash(struct inpcb *inp)
2570{
2571 struct inp_flowhash_key fh __attribute__((aligned(8)));
2572 u_int32_t flowhash = 0;
bd504ef0 2573 struct inpcb *tmp_inp = NULL;
316670eb
A
2574
2575 if (inp_hash_seed == 0)
2576 inp_hash_seed = RandomULong();
2577
2578 bzero(&fh, sizeof (fh));
2579
2580 bcopy(&inp->inp_dependladdr, &fh.infh_laddr, sizeof (fh.infh_laddr));
2581 bcopy(&inp->inp_dependfaddr, &fh.infh_faddr, sizeof (fh.infh_faddr));
2582
2583 fh.infh_lport = inp->inp_lport;
2584 fh.infh_fport = inp->inp_fport;
2585 fh.infh_af = (inp->inp_vflag & INP_IPV6) ? AF_INET6 : AF_INET;
2586 fh.infh_proto = inp->inp_ip_p;
2587 fh.infh_rand1 = RandomULong();
2588 fh.infh_rand2 = RandomULong();
2589
2590try_again:
2591 flowhash = net_flowhash(&fh, sizeof (fh), inp_hash_seed);
2592 if (flowhash == 0) {
2593 /* try to get a non-zero flowhash */
2594 inp_hash_seed = RandomULong();
2595 goto try_again;
2596 }
2597
bd504ef0 2598 inp->inp_flowhash = flowhash;
316670eb 2599
bd504ef0 2600 /* Insert the inp into inp_fc_tree */
39236c6e 2601 lck_mtx_lock_spin(&inp_fc_lck);
bd504ef0
A
2602 tmp_inp = RB_FIND(inp_fc_tree, &inp_fc_tree, inp);
2603 if (tmp_inp != NULL) {
316670eb 2604 /*
bd504ef0
A
2605 * There is a different inp with the same flowhash.
2606 * There can be a collision on flow hash but the
39236c6e 2607 * probability is low. Let's recompute the
bd504ef0 2608 * flowhash.
316670eb
A
2609 */
2610 lck_mtx_unlock(&inp_fc_lck);
bd504ef0
A
2611 /* recompute hash seed */
2612 inp_hash_seed = RandomULong();
2613 goto try_again;
316670eb 2614 }
39236c6e 2615
bd504ef0
A
2616 RB_INSERT(inp_fc_tree, &inp_fc_tree, inp);
2617 inp->inp_flags2 |= INP2_IN_FCTREE;
316670eb 2618 lck_mtx_unlock(&inp_fc_lck);
bd504ef0 2619
39236c6e
A
2620 return (flowhash);
2621}
2622
2623void
2624inp_flowadv(uint32_t flowhash)
2625{
2626 struct inpcb *inp;
2627
2628 inp = inp_fc_getinp(flowhash, 0);
2629
2630 if (inp == NULL)
2631 return;
2632 inp_fc_feedback(inp);
316670eb
A
2633}
2634
bd504ef0
A
2635/*
2636 * Function to compare inp_fc_entries in inp flow control tree
2637 */
2638static inline int
2639infc_cmp(const struct inpcb *inp1, const struct inpcb *inp2)
316670eb 2640{
bd504ef0 2641 return (memcmp(&(inp1->inp_flowhash), &(inp2->inp_flowhash),
39236c6e 2642 sizeof(inp1->inp_flowhash)));
bd504ef0 2643}
316670eb 2644
39236c6e 2645static struct inpcb *
bd504ef0
A
2646inp_fc_getinp(u_int32_t flowhash, u_int32_t flags)
2647{
2648 struct inpcb *inp = NULL;
2649 int locked = (flags & INPFC_SOLOCKED) ? 1 : 0;
316670eb
A
2650
2651 lck_mtx_lock_spin(&inp_fc_lck);
bd504ef0
A
2652 key_inp.inp_flowhash = flowhash;
2653 inp = RB_FIND(inp_fc_tree, &inp_fc_tree, &key_inp);
2654 if (inp == NULL) {
316670eb
A
2655 /* inp is not present, return */
2656 lck_mtx_unlock(&inp_fc_lck);
2657 return (NULL);
2658 }
2659
bd504ef0
A
2660 if (flags & INPFC_REMOVE) {
2661 RB_REMOVE(inp_fc_tree, &inp_fc_tree, inp);
2662 lck_mtx_unlock(&inp_fc_lck);
316670eb 2663
bd504ef0
A
2664 bzero(&(inp->infc_link), sizeof (inp->infc_link));
2665 inp->inp_flags2 &= ~INP2_IN_FCTREE;
2666 return (NULL);
316670eb 2667 }
39236c6e 2668
bd504ef0
A
2669 if (in_pcb_checkstate(inp, WNT_ACQUIRE, locked) == WNT_STOPUSING)
2670 inp = NULL;
316670eb
A
2671 lck_mtx_unlock(&inp_fc_lck);
2672
bd504ef0 2673 return (inp);
316670eb
A
2674}
2675
39236c6e 2676static void
316670eb
A
2677inp_fc_feedback(struct inpcb *inp)
2678{
2679 struct socket *so = inp->inp_socket;
2680
2681 /* we already hold a want_cnt on this inp, socket can't be null */
39236c6e 2682 VERIFY(so != NULL);
316670eb
A
2683 socket_lock(so, 1);
2684
2685 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
2686 socket_unlock(so, 1);
2687 return;
2688 }
2689
fe8ab488
A
2690 if (inp->inp_sndinprog_cnt > 0)
2691 inp->inp_flags |= INP_FC_FEEDBACK;
2692
316670eb
A
2693 /*
2694 * Return if the connection is not in flow-controlled state.
2695 * This can happen if the connection experienced
2696 * loss while it was in flow controlled state
2697 */
2698 if (!INP_WAIT_FOR_IF_FEEDBACK(inp)) {
2699 socket_unlock(so, 1);
2700 return;
2701 }
2702 inp_reset_fc_state(inp);
2703
39236c6e 2704 if (SOCK_TYPE(so) == SOCK_STREAM)
316670eb
A
2705 inp_fc_unthrottle_tcp(inp);
2706
2707 socket_unlock(so, 1);
2708}
2709
2710void
2711inp_reset_fc_state(struct inpcb *inp)
2712{
2713 struct socket *so = inp->inp_socket;
2714 int suspended = (INP_IS_FLOW_SUSPENDED(inp)) ? 1 : 0;
2715 int needwakeup = (INP_WAIT_FOR_IF_FEEDBACK(inp)) ? 1 : 0;
2716
2717 inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
2718
2719 if (suspended) {
2720 so->so_flags &= ~(SOF_SUSPENDED);
2721 soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_RESUME));
2722 }
2723
316670eb
A
2724 /* Give a write wakeup to unblock the socket */
2725 if (needwakeup)
2726 sowwakeup(so);
2727}
2728
2729int
2730inp_set_fc_state(struct inpcb *inp, int advcode)
2731{
bd504ef0 2732 struct inpcb *tmp_inp = NULL;
316670eb 2733 /*
39236c6e 2734 * If there was a feedback from the interface when
316670eb
A
2735 * send operation was in progress, we should ignore
2736 * this flow advisory to avoid a race between setting
2737 * flow controlled state and receiving feedback from
2738 * the interface
2739 */
2740 if (inp->inp_flags & INP_FC_FEEDBACK)
39236c6e 2741 return (0);
316670eb
A
2742
2743 inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
39236c6e
A
2744 if ((tmp_inp = inp_fc_getinp(inp->inp_flowhash,
2745 INPFC_SOLOCKED)) != NULL) {
2746 if (in_pcb_checkstate(tmp_inp, WNT_RELEASE, 1) == WNT_STOPUSING)
bd504ef0
A
2747 return (0);
2748 VERIFY(tmp_inp == inp);
316670eb
A
2749 switch (advcode) {
2750 case FADV_FLOW_CONTROLLED:
2751 inp->inp_flags |= INP_FLOW_CONTROLLED;
2752 break;
2753 case FADV_SUSPENDED:
2754 inp->inp_flags |= INP_FLOW_SUSPENDED;
2755 soevent(inp->inp_socket,
2756 (SO_FILT_HINT_LOCKED | SO_FILT_HINT_SUSPEND));
2757
2758 /* Record the fact that suspend event was sent */
2759 inp->inp_socket->so_flags |= SOF_SUSPENDED;
2760 break;
2761 }
bd504ef0 2762 return (1);
316670eb 2763 }
39236c6e 2764 return (0);
316670eb
A
2765}
2766
2767/*
2768 * Handler for SO_FLUSH socket option.
2769 */
2770int
2771inp_flush(struct inpcb *inp, int optval)
2772{
2773 u_int32_t flowhash = inp->inp_flowhash;
39236c6e 2774 struct ifnet *rtifp, *oifp;
316670eb
A
2775
2776 /* Either all classes or one of the valid ones */
2777 if (optval != SO_TC_ALL && !SO_VALID_TC(optval))
2778 return (EINVAL);
2779
2780 /* We need a flow hash for identification */
2781 if (flowhash == 0)
2782 return (0);
2783
39236c6e
A
2784 /* Grab the interfaces from the route and pcb */
2785 rtifp = ((inp->inp_route.ro_rt != NULL) ?
2786 inp->inp_route.ro_rt->rt_ifp : NULL);
2787 oifp = inp->inp_last_outifp;
2788
2789 if (rtifp != NULL)
2790 if_qflush_sc(rtifp, so_tc2msc(optval), flowhash, NULL, NULL, 0);
2791 if (oifp != NULL && oifp != rtifp)
2792 if_qflush_sc(oifp, so_tc2msc(optval), flowhash, NULL, NULL, 0);
316670eb
A
2793
2794 return (0);
2795}
2796
2797/*
2798 * Clear the INP_INADDR_ANY flag (special case for PPP only)
2799 */
39236c6e
A
2800void
2801inp_clear_INP_INADDR_ANY(struct socket *so)
316670eb
A
2802{
2803 struct inpcb *inp = NULL;
2804
2805 socket_lock(so, 1);
2806 inp = sotoinpcb(so);
2807 if (inp) {
2808 inp->inp_flags &= ~INP_INADDR_ANY;
2809 }
2810 socket_unlock(so, 1);
2811}
2812
39236c6e
A
2813void
2814inp_get_soprocinfo(struct inpcb *inp, struct so_procinfo *soprocinfo)
2815{
2816 struct socket *so = inp->inp_socket;
2817
2818 soprocinfo->spi_pid = so->last_pid;
fe8ab488
A
2819 if (so->last_pid != 0)
2820 uuid_copy(soprocinfo->spi_uuid, so->last_uuid);
39236c6e
A
2821 /*
2822 * When not delegated, the effective pid is the same as the real pid
2823 */
fe8ab488 2824 if (so->so_flags & SOF_DELEGATED) {
39236c6e 2825 soprocinfo->spi_epid = so->e_pid;
fe8ab488
A
2826 if (so->e_pid != 0)
2827 uuid_copy(soprocinfo->spi_euuid, so->e_uuid);
2828 } else {
39236c6e 2829 soprocinfo->spi_epid = so->last_pid;
fe8ab488 2830 }
39236c6e
A
2831}
2832
2833int
2834inp_findinpcb_procinfo(struct inpcbinfo *pcbinfo, uint32_t flowhash,
2835 struct so_procinfo *soprocinfo)
2836{
2837 struct inpcb *inp = NULL;
2838 int found = 0;
2839
2840 bzero(soprocinfo, sizeof (struct so_procinfo));
2841
2842 if (!flowhash)
2843 return (-1);
2844
2845 lck_rw_lock_shared(pcbinfo->ipi_lock);
2846 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
2847 if (inp->inp_state != INPCB_STATE_DEAD &&
2848 inp->inp_socket != NULL &&
2849 inp->inp_flowhash == flowhash) {
2850 found = 1;
2851 inp_get_soprocinfo(inp, soprocinfo);
2852 break;
2853 }
2854 }
2855 lck_rw_done(pcbinfo->ipi_lock);
2856
2857 return (found);
2858}
2859
2860#if CONFIG_PROC_UUID_POLICY
2861static void
2862inp_update_cellular_policy(struct inpcb *inp, boolean_t set)
2863{
2864 struct socket *so = inp->inp_socket;
2865 int before, after;
2866
2867 VERIFY(so != NULL);
2868 VERIFY(inp->inp_state != INPCB_STATE_DEAD);
2869
fe8ab488 2870 before = INP_NO_CELLULAR(inp);
39236c6e
A
2871 if (set) {
2872 inp_set_nocellular(inp);
2873 } else {
2874 inp_clear_nocellular(inp);
2875 }
fe8ab488 2876 after = INP_NO_CELLULAR(inp);
39236c6e
A
2877 if (net_io_policy_log && (before != after)) {
2878 static const char *ok = "OK";
2879 static const char *nok = "NOACCESS";
2880 uuid_string_t euuid_buf;
2881 pid_t epid;
2882
2883 if (so->so_flags & SOF_DELEGATED) {
2884 uuid_unparse(so->e_uuid, euuid_buf);
2885 epid = so->e_pid;
2886 } else {
2887 uuid_unparse(so->last_uuid, euuid_buf);
2888 epid = so->last_pid;
2889 }
2890
2891 /* allow this socket to generate another notification event */
2892 so->so_ifdenied_notifies = 0;
2893
2894 log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d "
2895 "euuid %s%s %s->%s\n", __func__,
2896 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
2897 SOCK_TYPE(so), epid, euuid_buf,
2898 (so->so_flags & SOF_DELEGATED) ?
2899 " [delegated]" : "",
2900 ((before < after) ? ok : nok),
2901 ((before < after) ? nok : ok));
2902 }
2903}
2904
fe8ab488 2905#if NECP
39236c6e 2906static void
fe8ab488 2907inp_update_necp_want_app_policy(struct inpcb *inp, boolean_t set)
39236c6e
A
2908{
2909 struct socket *so = inp->inp_socket;
2910 int before, after;
2911
2912 VERIFY(so != NULL);
2913 VERIFY(inp->inp_state != INPCB_STATE_DEAD);
2914
fe8ab488 2915 before = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
39236c6e 2916 if (set) {
fe8ab488 2917 inp_set_want_app_policy(inp);
39236c6e 2918 } else {
fe8ab488 2919 inp_clear_want_app_policy(inp);
39236c6e 2920 }
fe8ab488 2921 after = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
39236c6e
A
2922 if (net_io_policy_log && (before != after)) {
2923 static const char *wanted = "WANTED";
2924 static const char *unwanted = "UNWANTED";
2925 uuid_string_t euuid_buf;
2926 pid_t epid;
2927
2928 if (so->so_flags & SOF_DELEGATED) {
2929 uuid_unparse(so->e_uuid, euuid_buf);
2930 epid = so->e_pid;
2931 } else {
2932 uuid_unparse(so->last_uuid, euuid_buf);
2933 epid = so->last_pid;
2934 }
2935
2936 log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d "
2937 "euuid %s%s %s->%s\n", __func__,
2938 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
2939 SOCK_TYPE(so), epid, euuid_buf,
2940 (so->so_flags & SOF_DELEGATED) ?
2941 " [delegated]" : "",
2942 ((before < after) ? unwanted : wanted),
2943 ((before < after) ? wanted : unwanted));
2944 }
2945}
fe8ab488 2946#endif /* NECP */
39236c6e
A
2947#endif /* !CONFIG_PROC_UUID_POLICY */
2948
fe8ab488
A
2949#if NECP
2950void
2951inp_update_necp_policy(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int override_bound_interface)
2952{
2953 necp_socket_find_policy_match(inp, override_local_addr, override_remote_addr, override_bound_interface);
2954 if (necp_socket_should_rescope(inp) &&
2955 inp->inp_lport == 0 &&
2956 inp->inp_laddr.s_addr == INADDR_ANY &&
2957 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
2958 // If we should rescope, and the socket is not yet bound
2959 inp_bindif(inp, necp_socket_get_rescope_if_index(inp), NULL);
2960 }
2961}
2962#endif /* NECP */
2963
39236c6e
A
2964int
2965inp_update_policy(struct inpcb *inp)
2966{
2967#if CONFIG_PROC_UUID_POLICY
2968 struct socket *so = inp->inp_socket;
2969 uint32_t pflags = 0;
2970 int32_t ogencnt;
2971 int err = 0;
2972
2973 if (!net_io_policy_uuid ||
2974 so == NULL || inp->inp_state == INPCB_STATE_DEAD)
2975 return (0);
2976
2977 /*
2978 * Kernel-created sockets that aren't delegating other sockets
2979 * are currently exempted from UUID policy checks.
2980 */
2981 if (so->last_pid == 0 && !(so->so_flags & SOF_DELEGATED))
2982 return (0);
2983
2984 ogencnt = so->so_policy_gencnt;
2985 err = proc_uuid_policy_lookup(((so->so_flags & SOF_DELEGATED) ?
2986 so->e_uuid : so->last_uuid), &pflags, &so->so_policy_gencnt);
2987
2988 /*
2989 * Discard cached generation count if the entry is gone (ENOENT),
2990 * so that we go thru the checks below.
2991 */
2992 if (err == ENOENT && ogencnt != 0)
2993 so->so_policy_gencnt = 0;
2994
2995 /*
2996 * If the generation count has changed, inspect the policy flags
2997 * and act accordingly. If a policy flag was previously set and
2998 * the UUID is no longer present in the table (ENOENT), treat it
2999 * as if the flag has been cleared.
3000 */
3001 if ((err == 0 || err == ENOENT) && ogencnt != so->so_policy_gencnt) {
3002 /* update cellular policy for this socket */
3003 if (err == 0 && (pflags & PROC_UUID_NO_CELLULAR)) {
3004 inp_update_cellular_policy(inp, TRUE);
3005 } else if (!(pflags & PROC_UUID_NO_CELLULAR)) {
3006 inp_update_cellular_policy(inp, FALSE);
3007 }
fe8ab488
A
3008#if NECP
3009 /* update necp want app policy for this socket */
3010 if (err == 0 && (pflags & PROC_UUID_NECP_APP_POLICY)) {
3011 inp_update_necp_want_app_policy(inp, TRUE);
3012 } else if (!(pflags & PROC_UUID_NECP_APP_POLICY)) {
3013 inp_update_necp_want_app_policy(inp, FALSE);
39236c6e 3014 }
fe8ab488 3015#endif /* NECP */
39236c6e
A
3016 }
3017
3018 return ((err == ENOENT) ? 0 : err);
3019#else /* !CONFIG_PROC_UUID_POLICY */
3020#pragma unused(inp)
3021 return (0);
3022#endif /* !CONFIG_PROC_UUID_POLICY */
3023}
fe8ab488
A
3024/*
3025 * Called when we need to enforce policy restrictions in the input path.
3026 *
3027 * Returns TRUE if we're not allowed to receive data, otherwise FALSE.
3028 */
39236c6e 3029boolean_t
fe8ab488 3030inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp)
39236c6e
A
3031{
3032 VERIFY(inp != NULL);
3033
fe8ab488
A
3034 /*
3035 * Inbound restrictions.
3036 */
39236c6e
A
3037 if (!sorestrictrecv)
3038 return (FALSE);
3039
fe8ab488
A
3040 if (ifp == NULL)
3041 return (FALSE);
3042
3043 if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp))
3044 return (TRUE);
3045
3046 if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp))
3047 return (TRUE);
3048
3049 if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp))
3050 return (TRUE);
3051
3052 if (!(ifp->if_eflags & IFEF_RESTRICTED_RECV))
39236c6e
A
3053 return (FALSE);
3054
3055 if (inp->inp_flags & INP_RECV_ANYIF)
3056 return (FALSE);
3057
3058 if ((inp->inp_flags & INP_BOUND_IF) && inp->inp_boundifp == ifp)
3059 return (FALSE);
3060
3061 return (TRUE);
3062}
fe8ab488
A
3063
3064/*
3065 * Called when we need to enforce policy restrictions in the output path.
3066 *
3067 * Returns TRUE if we're not allowed to send data out, otherwise FALSE.
3068 */
3069boolean_t
3070inp_restricted_send(struct inpcb *inp, struct ifnet *ifp)
3071{
3072 VERIFY(inp != NULL);
3073
3074 /*
3075 * Outbound restrictions.
3076 */
3077 if (!sorestrictsend)
3078 return (FALSE);
3079
3080 if (ifp == NULL)
3081 return (FALSE);
3082
3083 if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp))
3084 return (TRUE);
3085
3086 if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp))
3087 return (TRUE);
3088
3089 if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp))
3090 return (TRUE);
3091
3092 return (FALSE);
3093}