]> git.saurik.com Git - apple/xnu.git/blame - bsd/netinet/in_pcb.c
xnu-3248.60.10.tar.gz
[apple/xnu.git] / bsd / netinet / in_pcb.c
CommitLineData
1c79356b 1/*
3e170ce0 2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
39236c6e 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
39236c6e 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
39236c6e 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
39236c6e 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1982, 1986, 1991, 1993, 1995
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95
9bccf70c 61 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $
1c79356b
A
62 */
63
64#include <sys/param.h>
65#include <sys/systm.h>
66#include <sys/malloc.h>
67#include <sys/mbuf.h>
1c79356b 68#include <sys/domain.h>
1c79356b
A
69#include <sys/protosw.h>
70#include <sys/socket.h>
71#include <sys/socketvar.h>
72#include <sys/proc.h>
73#include <sys/kernel.h>
74#include <sys/sysctl.h>
6d2010ae
A
75#include <sys/mcache.h>
76#include <sys/kauth.h>
77#include <sys/priv.h>
39236c6e
A
78#include <sys/proc_uuid_policy.h>
79#include <sys/syslog.h>
fe8ab488 80#include <sys/priv.h>
39236c6e 81
91447636 82#include <libkern/OSAtomic.h>
316670eb 83#include <kern/locks.h>
1c79356b
A
84
85#include <machine/limits.h>
86
1c79356b 87#include <kern/zalloc.h>
1c79356b
A
88
89#include <net/if.h>
1c79356b 90#include <net/if_types.h>
9bccf70c 91#include <net/route.h>
316670eb
A
92#include <net/flowhash.h>
93#include <net/flowadv.h>
fe8ab488 94#include <net/ntstat.h>
1c79356b
A
95
96#include <netinet/in.h>
97#include <netinet/in_pcb.h>
98#include <netinet/in_var.h>
99#include <netinet/ip_var.h>
100#if INET6
101#include <netinet/ip6.h>
102#include <netinet6/ip6_var.h>
103#endif /* INET6 */
104
1c79356b 105#include <sys/kdebug.h>
b0d623f7 106#include <sys/random.h>
39236c6e 107
316670eb 108#include <dev/random/randomdev.h>
39236c6e 109#include <mach/boolean.h>
1c79356b 110
fe8ab488
A
111#if NECP
112#include <net/necp.h>
9bccf70c 113#endif
1c79356b 114
39236c6e
A
115static lck_grp_t *inpcb_lock_grp;
116static lck_attr_t *inpcb_lock_attr;
117static lck_grp_attr_t *inpcb_lock_grp_attr;
118decl_lck_mtx_data(static, inpcb_lock); /* global INPCB lock */
119decl_lck_mtx_data(static, inpcb_timeout_lock);
120
121static TAILQ_HEAD(, inpcbinfo) inpcb_head = TAILQ_HEAD_INITIALIZER(inpcb_head);
122
123static u_int16_t inpcb_timeout_run = 0; /* INPCB timer is scheduled to run */
124static boolean_t inpcb_garbage_collecting = FALSE; /* gc timer is scheduled */
125static boolean_t inpcb_ticking = FALSE; /* "slow" timer is scheduled */
126static boolean_t inpcb_fast_timer_on = FALSE;
fe8ab488
A
127
128/*
129 * If the total number of gc reqs is above a threshold, schedule
130 * garbage collect timer sooner
131 */
132static boolean_t inpcb_toomany_gcreq = FALSE;
133
134#define INPCB_GCREQ_THRESHOLD 50000
135#define INPCB_TOOMANY_GCREQ_TIMER (hz/10) /* 10 times a second */
136
39236c6e
A
137static void inpcb_sched_timeout(struct timeval *);
138static void inpcb_timeout(void *);
139int inpcb_timeout_lazy = 10; /* 10 seconds leeway for lazy timers */
140extern int tvtohz(struct timeval *);
141
142#if CONFIG_PROC_UUID_POLICY
143static void inp_update_cellular_policy(struct inpcb *, boolean_t);
fe8ab488
A
144#if NECP
145static void inp_update_necp_want_app_policy(struct inpcb *, boolean_t);
146#endif /* NECP */
39236c6e
A
147#endif /* !CONFIG_PROC_UUID_POLICY */
148
39236c6e
A
149#define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8))
150#define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1))
1c79356b 151
1c79356b
A
152/*
153 * These configure the range of local port addresses assigned to
154 * "unspecified" outgoing connections/packets/whatever.
155 */
9bccf70c
A
156int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */
157int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */
39236c6e
A
158int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
159int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */
9bccf70c
A
160int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
161int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */
1c79356b 162
39236c6e 163#define RANGECHK(var, min, max) \
1c79356b
A
164 if ((var) < (min)) { (var) = (min); } \
165 else if ((var) > (max)) { (var) = (max); }
166
1c79356b
A
167static int
168sysctl_net_ipport_check SYSCTL_HANDLER_ARGS
169{
2d21ac55 170#pragma unused(arg1, arg2)
39236c6e
A
171 int error;
172
173 error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
1c79356b
A
174 if (!error) {
175 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
176 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
177 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX);
178 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX);
179 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX);
180 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX);
181 }
39236c6e 182 return (error);
1c79356b
A
183}
184
185#undef RANGECHK
186
39236c6e
A
187SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange,
188 CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IP Ports");
189
190SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst,
191 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
192 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
193SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast,
194 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
195 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
196SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first,
197 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
198 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
199SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last,
200 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
201 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
202SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst,
203 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
204 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
205SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast,
206 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
207 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
1c79356b 208
b0d623f7
A
209extern int udp_use_randomport;
210extern int tcp_use_randomport;
211
316670eb
A
212/* Structs used for flowhash computation */
213struct inp_flowhash_key_addr {
214 union {
215 struct in_addr v4;
216 struct in6_addr v6;
217 u_int8_t addr8[16];
218 u_int16_t addr16[8];
219 u_int32_t addr32[4];
220 } infha;
221};
222
223struct inp_flowhash_key {
39236c6e 224 struct inp_flowhash_key_addr infh_laddr;
316670eb
A
225 struct inp_flowhash_key_addr infh_faddr;
226 u_int32_t infh_lport;
227 u_int32_t infh_fport;
228 u_int32_t infh_af;
229 u_int32_t infh_proto;
230 u_int32_t infh_rand1;
231 u_int32_t infh_rand2;
232};
233
39236c6e
A
234static u_int32_t inp_hash_seed = 0;
235
236static int infc_cmp(const struct inpcb *, const struct inpcb *);
237
238/* Flags used by inp_fc_getinp */
239#define INPFC_SOLOCKED 0x1
240#define INPFC_REMOVE 0x2
241static struct inpcb *inp_fc_getinp(u_int32_t, u_int32_t);
242
243static void inp_fc_feedback(struct inpcb *);
244extern void tcp_remove_from_time_wait(struct inpcb *inp);
316670eb 245
39236c6e 246decl_lck_mtx_data(static, inp_fc_lck);
316670eb 247
bd504ef0
A
248RB_HEAD(inp_fc_tree, inpcb) inp_fc_tree;
249RB_PROTOTYPE(inp_fc_tree, inpcb, infc_link, infc_cmp);
250RB_GENERATE(inp_fc_tree, inpcb, infc_link, infc_cmp);
316670eb 251
bd504ef0
A
252/*
253 * Use this inp as a key to find an inp in the flowhash tree.
254 * Accesses to it are protected by inp_fc_lck.
255 */
256struct inpcb key_inp;
316670eb 257
1c79356b
A
258/*
259 * in_pcb.c: manage the Protocol Control Blocks.
1c79356b
A
260 */
261
316670eb 262void
39236c6e 263in_pcbinit(void)
316670eb 264{
39236c6e 265 static int inpcb_initialized = 0;
316670eb 266
39236c6e
A
267 VERIFY(!inpcb_initialized);
268 inpcb_initialized = 1;
316670eb 269
39236c6e
A
270 inpcb_lock_grp_attr = lck_grp_attr_alloc_init();
271 inpcb_lock_grp = lck_grp_alloc_init("inpcb", inpcb_lock_grp_attr);
272 inpcb_lock_attr = lck_attr_alloc_init();
273 lck_mtx_init(&inpcb_lock, inpcb_lock_grp, inpcb_lock_attr);
274 lck_mtx_init(&inpcb_timeout_lock, inpcb_lock_grp, inpcb_lock_attr);
275
276 /*
277 * Initialize data structures required to deliver
278 * flow advisories.
279 */
280 lck_mtx_init(&inp_fc_lck, inpcb_lock_grp, inpcb_lock_attr);
bd504ef0 281 lck_mtx_lock(&inp_fc_lck);
316670eb 282 RB_INIT(&inp_fc_tree);
bd504ef0
A
283 bzero(&key_inp, sizeof(key_inp));
284 lck_mtx_unlock(&inp_fc_lck);
316670eb
A
285}
286
39236c6e
A
287#define INPCB_HAVE_TIMER_REQ(req) (((req).intimer_lazy > 0) || \
288 ((req).intimer_fast > 0) || ((req).intimer_nodelay > 0))
289static void
290inpcb_timeout(void *arg)
291{
292#pragma unused(arg)
293 struct inpcbinfo *ipi;
294 boolean_t t, gc;
295 struct intimercount gccnt, tmcnt;
296 struct timeval leeway;
fe8ab488
A
297 boolean_t toomany_gc = FALSE;
298
299 if (arg != NULL) {
300 VERIFY(arg == &inpcb_toomany_gcreq);
301 toomany_gc = *(boolean_t *)arg;
302 }
39236c6e
A
303
304 /*
305 * Update coarse-grained networking timestamp (in sec.); the idea
306 * is to piggy-back on the timeout callout to update the counter
307 * returnable via net_uptime().
308 */
309 net_update_uptime();
310
fe8ab488
A
311 bzero(&gccnt, sizeof(gccnt));
312 bzero(&tmcnt, sizeof(tmcnt));
313
39236c6e
A
314 lck_mtx_lock_spin(&inpcb_timeout_lock);
315 gc = inpcb_garbage_collecting;
316 inpcb_garbage_collecting = FALSE;
39236c6e
A
317
318 t = inpcb_ticking;
319 inpcb_ticking = FALSE;
320
321 if (gc || t) {
322 lck_mtx_unlock(&inpcb_timeout_lock);
323
324 lck_mtx_lock(&inpcb_lock);
325 TAILQ_FOREACH(ipi, &inpcb_head, ipi_entry) {
326 if (INPCB_HAVE_TIMER_REQ(ipi->ipi_gc_req)) {
327 bzero(&ipi->ipi_gc_req,
328 sizeof(ipi->ipi_gc_req));
329 if (gc && ipi->ipi_gc != NULL) {
330 ipi->ipi_gc(ipi);
331 gccnt.intimer_lazy +=
332 ipi->ipi_gc_req.intimer_lazy;
333 gccnt.intimer_fast +=
334 ipi->ipi_gc_req.intimer_fast;
335 gccnt.intimer_nodelay +=
336 ipi->ipi_gc_req.intimer_nodelay;
337 }
338 }
339 if (INPCB_HAVE_TIMER_REQ(ipi->ipi_timer_req)) {
340 bzero(&ipi->ipi_timer_req,
341 sizeof(ipi->ipi_timer_req));
342 if (t && ipi->ipi_timer != NULL) {
343 ipi->ipi_timer(ipi);
344 tmcnt.intimer_lazy +=
345 ipi->ipi_timer_req.intimer_lazy;
346 tmcnt.intimer_lazy +=
347 ipi->ipi_timer_req.intimer_fast;
348 tmcnt.intimer_nodelay +=
349 ipi->ipi_timer_req.intimer_nodelay;
350 }
351 }
352 }
353 lck_mtx_unlock(&inpcb_lock);
354 lck_mtx_lock_spin(&inpcb_timeout_lock);
355 }
356
357 /* lock was dropped above, so check first before overriding */
358 if (!inpcb_garbage_collecting)
359 inpcb_garbage_collecting = INPCB_HAVE_TIMER_REQ(gccnt);
360 if (!inpcb_ticking)
361 inpcb_ticking = INPCB_HAVE_TIMER_REQ(tmcnt);
362
363 /* re-arm the timer if there's work to do */
fe8ab488
A
364 if (toomany_gc) {
365 inpcb_toomany_gcreq = FALSE;
366 } else {
367 inpcb_timeout_run--;
368 VERIFY(inpcb_timeout_run >= 0 && inpcb_timeout_run < 2);
369 }
39236c6e
A
370
371 bzero(&leeway, sizeof(leeway));
372 leeway.tv_sec = inpcb_timeout_lazy;
373 if (gccnt.intimer_nodelay > 0 || tmcnt.intimer_nodelay > 0)
374 inpcb_sched_timeout(NULL);
375 else if ((gccnt.intimer_fast + tmcnt.intimer_fast) <= 5)
376 /* be lazy when idle with little activity */
377 inpcb_sched_timeout(&leeway);
378 else
379 inpcb_sched_timeout(NULL);
380
381 lck_mtx_unlock(&inpcb_timeout_lock);
382}
383
384static void
385inpcb_sched_timeout(struct timeval *leeway)
386{
387 lck_mtx_assert(&inpcb_timeout_lock, LCK_MTX_ASSERT_OWNED);
388
389 if (inpcb_timeout_run == 0 &&
390 (inpcb_garbage_collecting || inpcb_ticking)) {
391 lck_mtx_convert_spin(&inpcb_timeout_lock);
392 inpcb_timeout_run++;
393 if (leeway == NULL) {
394 inpcb_fast_timer_on = TRUE;
395 timeout(inpcb_timeout, NULL, hz);
396 } else {
397 inpcb_fast_timer_on = FALSE;
398 timeout_with_leeway(inpcb_timeout, NULL, hz,
399 tvtohz(leeway));
400 }
401 } else if (inpcb_timeout_run == 1 &&
402 leeway == NULL && !inpcb_fast_timer_on) {
403 /*
404 * Since the request was for a fast timer but the
405 * scheduled timer is a lazy timer, try to schedule
406 * another instance of fast timer also
407 */
408 lck_mtx_convert_spin(&inpcb_timeout_lock);
409 inpcb_timeout_run++;
410 inpcb_fast_timer_on = TRUE;
411 timeout(inpcb_timeout, NULL, hz);
412 }
413}
414
415void
416inpcb_gc_sched(struct inpcbinfo *ipi, u_int32_t type)
417{
418 struct timeval leeway;
fe8ab488 419 u_int32_t gccnt;
39236c6e
A
420 lck_mtx_lock_spin(&inpcb_timeout_lock);
421 inpcb_garbage_collecting = TRUE;
fe8ab488
A
422
423 gccnt = ipi->ipi_gc_req.intimer_nodelay +
424 ipi->ipi_gc_req.intimer_fast;
425
426 if (gccnt > INPCB_GCREQ_THRESHOLD && !inpcb_toomany_gcreq) {
427 inpcb_toomany_gcreq = TRUE;
428
429 /*
430 * There are toomany pcbs waiting to be garbage collected,
431 * schedule a much faster timeout in addition to
432 * the caller's request
433 */
434 lck_mtx_convert_spin(&inpcb_timeout_lock);
435 timeout(inpcb_timeout, (void *)&inpcb_toomany_gcreq,
436 INPCB_TOOMANY_GCREQ_TIMER);
437 }
438
39236c6e
A
439 switch (type) {
440 case INPCB_TIMER_NODELAY:
441 atomic_add_32(&ipi->ipi_gc_req.intimer_nodelay, 1);
442 inpcb_sched_timeout(NULL);
443 break;
444 case INPCB_TIMER_FAST:
445 atomic_add_32(&ipi->ipi_gc_req.intimer_fast, 1);
446 inpcb_sched_timeout(NULL);
447 break;
448 default:
449 atomic_add_32(&ipi->ipi_gc_req.intimer_lazy, 1);
450 leeway.tv_sec = inpcb_timeout_lazy;
451 leeway.tv_usec = 0;
452 inpcb_sched_timeout(&leeway);
453 break;
454 }
455 lck_mtx_unlock(&inpcb_timeout_lock);
456}
457
458void
459inpcb_timer_sched(struct inpcbinfo *ipi, u_int32_t type)
460{
461 struct timeval leeway;
462 lck_mtx_lock_spin(&inpcb_timeout_lock);
463 inpcb_ticking = TRUE;
464 switch (type) {
465 case INPCB_TIMER_NODELAY:
466 atomic_add_32(&ipi->ipi_timer_req.intimer_nodelay, 1);
467 inpcb_sched_timeout(NULL);
468 break;
469 case INPCB_TIMER_FAST:
470 atomic_add_32(&ipi->ipi_timer_req.intimer_fast, 1);
471 inpcb_sched_timeout(NULL);
472 break;
473 default:
474 atomic_add_32(&ipi->ipi_timer_req.intimer_lazy, 1);
475 leeway.tv_sec = inpcb_timeout_lazy;
476 leeway.tv_usec = 0;
477 inpcb_sched_timeout(&leeway);
478 break;
479 }
480 lck_mtx_unlock(&inpcb_timeout_lock);
481}
482
483void
484in_pcbinfo_attach(struct inpcbinfo *ipi)
485{
486 struct inpcbinfo *ipi0;
487
488 lck_mtx_lock(&inpcb_lock);
489 TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) {
490 if (ipi0 == ipi) {
491 panic("%s: ipi %p already in the list\n",
492 __func__, ipi);
493 /* NOTREACHED */
494 }
495 }
496 TAILQ_INSERT_TAIL(&inpcb_head, ipi, ipi_entry);
497 lck_mtx_unlock(&inpcb_lock);
498}
499
500int
501in_pcbinfo_detach(struct inpcbinfo *ipi)
502{
503 struct inpcbinfo *ipi0;
504 int error = 0;
505
506 lck_mtx_lock(&inpcb_lock);
507 TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) {
508 if (ipi0 == ipi)
509 break;
510 }
511 if (ipi0 != NULL)
512 TAILQ_REMOVE(&inpcb_head, ipi0, ipi_entry);
513 else
514 error = ENXIO;
515 lck_mtx_unlock(&inpcb_lock);
516
517 return (error);
518}
519
1c79356b
A
520/*
521 * Allocate a PCB and associate it with the socket.
2d21ac55
A
522 *
523 * Returns: 0 Success
524 * ENOBUFS
525 * ENOMEM
1c79356b
A
526 */
527int
39236c6e 528in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p)
1c79356b 529{
39236c6e 530#pragma unused(p)
2d21ac55 531 struct inpcb *inp;
39236c6e 532 caddr_t temp;
2d21ac55
A
533#if CONFIG_MACF_NET
534 int mac_error;
39236c6e 535#endif /* CONFIG_MACF_NET */
1c79356b 536
3e170ce0 537 if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) {
39236c6e
A
538 inp = (struct inpcb *)zalloc(pcbinfo->ipi_zone);
539 if (inp == NULL)
540 return (ENOBUFS);
541 bzero((caddr_t)inp, sizeof (*inp));
542 } else {
543 inp = (struct inpcb *)(void *)so->so_saved_pcb;
544 temp = inp->inp_saved_ppcb;
545 bzero((caddr_t)inp, sizeof (*inp));
546 inp->inp_saved_ppcb = temp;
1c79356b
A
547 }
548
549 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
550 inp->inp_pcbinfo = pcbinfo;
551 inp->inp_socket = so;
2d21ac55
A
552#if CONFIG_MACF_NET
553 mac_error = mac_inpcb_label_init(inp, M_WAITOK);
554 if (mac_error != 0) {
3e170ce0 555 if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0)
2d21ac55
A
556 zfree(pcbinfo->ipi_zone, inp);
557 return (mac_error);
558 }
559 mac_inpcb_label_associate(so, inp);
39236c6e
A
560#endif /* CONFIG_MACF_NET */
561 /* make sure inp_stat is always 64-bit aligned */
562 inp->inp_stat = (struct inp_stat *)P2ROUNDUP(inp->inp_stat_store,
563 sizeof (u_int64_t));
564 if (((uintptr_t)inp->inp_stat - (uintptr_t)inp->inp_stat_store) +
565 sizeof (*inp->inp_stat) > sizeof (inp->inp_stat_store)) {
566 panic("%s: insufficient space to align inp_stat", __func__);
567 /* NOTREACHED */
568 }
569
570 /* make sure inp_cstat is always 64-bit aligned */
571 inp->inp_cstat = (struct inp_stat *)P2ROUNDUP(inp->inp_cstat_store,
572 sizeof (u_int64_t));
573 if (((uintptr_t)inp->inp_cstat - (uintptr_t)inp->inp_cstat_store) +
574 sizeof (*inp->inp_cstat) > sizeof (inp->inp_cstat_store)) {
575 panic("%s: insufficient space to align inp_cstat", __func__);
576 /* NOTREACHED */
577 }
578
579 /* make sure inp_wstat is always 64-bit aligned */
580 inp->inp_wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_wstat_store,
581 sizeof (u_int64_t));
582 if (((uintptr_t)inp->inp_wstat - (uintptr_t)inp->inp_wstat_store) +
583 sizeof (*inp->inp_wstat) > sizeof (inp->inp_wstat_store)) {
584 panic("%s: insufficient space to align inp_wstat", __func__);
585 /* NOTREACHED */
6d2010ae
A
586 }
587
fe8ab488
A
588 /* make sure inp_Wstat is always 64-bit aligned */
589 inp->inp_Wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_Wstat_store,
590 sizeof (u_int64_t));
591 if (((uintptr_t)inp->inp_Wstat - (uintptr_t)inp->inp_Wstat_store) +
592 sizeof (*inp->inp_Wstat) > sizeof (inp->inp_Wstat_store)) {
593 panic("%s: insufficient space to align inp_Wstat", __func__);
594 /* NOTREACHED */
595 }
596
91447636
A
597 so->so_pcb = (caddr_t)inp;
598
599 if (so->so_proto->pr_flags & PR_PCBLOCK) {
39236c6e
A
600 lck_mtx_init(&inp->inpcb_mtx, pcbinfo->ipi_lock_grp,
601 pcbinfo->ipi_lock_attr);
91447636
A
602 }
603
2d21ac55 604#if INET6
39236c6e 605 if (SOCK_DOM(so) == PF_INET6 && !ip6_mapped_addr_on)
9bccf70c 606 inp->inp_flags |= IN6P_IPV6_V6ONLY;
39236c6e 607
9bccf70c
A
608 if (ip6_auto_flowlabel)
609 inp->inp_flags |= IN6P_AUTOFLOWLABEL;
39236c6e
A
610#endif /* INET6 */
611
612 (void) inp_update_policy(inp);
613
614 lck_rw_lock_exclusive(pcbinfo->ipi_lock);
91447636 615 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
39236c6e 616 LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
91447636 617 pcbinfo->ipi_count++;
39236c6e 618 lck_rw_done(pcbinfo->ipi_lock);
1c79356b
A
619 return (0);
620}
621
2d21ac55 622/*
39236c6e
A
623 * in_pcblookup_local_and_cleanup does everything
624 * in_pcblookup_local does but it checks for a socket
625 * that's going away. Since we know that the lock is
626 * held read+write when this funciton is called, we
627 * can safely dispose of this socket like the slow
628 * timer would usually do and return NULL. This is
629 * great for bind.
630 */
631struct inpcb *
632in_pcblookup_local_and_cleanup(struct inpcbinfo *pcbinfo, struct in_addr laddr,
633 u_int lport_arg, int wild_okay)
2d21ac55
A
634{
635 struct inpcb *inp;
39236c6e 636
2d21ac55
A
637 /* Perform normal lookup */
638 inp = in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay);
39236c6e 639
2d21ac55 640 /* Check if we found a match but it's waiting to be disposed */
39236c6e 641 if (inp != NULL && inp->inp_wantcnt == WNT_STOPUSING) {
2d21ac55 642 struct socket *so = inp->inp_socket;
39236c6e 643
6d2010ae 644 lck_mtx_lock(&inp->inpcb_mtx);
39236c6e 645
2d21ac55 646 if (so->so_usecount == 0) {
b0d623f7
A
647 if (inp->inp_state != INPCB_STATE_DEAD)
648 in_pcbdetach(inp);
39236c6e 649 in_pcbdispose(inp); /* will unlock & destroy */
2d21ac55 650 inp = NULL;
39236c6e 651 } else {
6d2010ae 652 lck_mtx_unlock(&inp->inpcb_mtx);
2d21ac55
A
653 }
654 }
39236c6e
A
655
656 return (inp);
2d21ac55
A
657}
658
c910b4d9 659static void
2d21ac55
A
660in_pcb_conflict_post_msg(u_int16_t port)
661{
39236c6e
A
662 /*
663 * Radar 5523020 send a kernel event notification if a
664 * non-participating socket tries to bind the port a socket
665 * who has set SOF_NOTIFYCONFLICT owns.
2d21ac55 666 */
39236c6e 667 struct kev_msg ev_msg;
2d21ac55
A
668 struct kev_in_portinuse in_portinuse;
669
39236c6e
A
670 bzero(&in_portinuse, sizeof (struct kev_in_portinuse));
671 bzero(&ev_msg, sizeof (struct kev_msg));
2d21ac55
A
672 in_portinuse.port = ntohs(port); /* port in host order */
673 in_portinuse.req_pid = proc_selfpid();
674 ev_msg.vendor_code = KEV_VENDOR_APPLE;
675 ev_msg.kev_class = KEV_NETWORK_CLASS;
676 ev_msg.kev_subclass = KEV_INET_SUBCLASS;
677 ev_msg.event_code = KEV_INET_PORTINUSE;
678 ev_msg.dv[0].data_ptr = &in_portinuse;
39236c6e 679 ev_msg.dv[0].data_length = sizeof (struct kev_in_portinuse);
2d21ac55
A
680 ev_msg.dv[1].data_length = 0;
681 kev_post_msg(&ev_msg);
682}
39236c6e 683
2d21ac55 684/*
39236c6e
A
685 * Bind an INPCB to an address and/or port. This routine should not alter
686 * the caller-supplied local address "nam".
687 *
2d21ac55
A
688 * Returns: 0 Success
689 * EADDRNOTAVAIL Address not available.
690 * EINVAL Invalid argument
691 * EAFNOSUPPORT Address family not supported [notdef]
692 * EACCES Permission denied
693 * EADDRINUSE Address in use
694 * EAGAIN Resource unavailable, try again
6d2010ae 695 * priv_check_cred:EPERM Operation not permitted
2d21ac55 696 */
1c79356b 697int
2d21ac55 698in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
1c79356b 699{
2d21ac55 700 struct socket *so = inp->inp_socket;
9bccf70c 701 unsigned short *lastport;
1c79356b 702 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
b0d623f7 703 u_short lport = 0, rand_port = 0;
1c79356b 704 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
b0d623f7 705 int error, randomport, conflict = 0;
fe8ab488 706 boolean_t anonport = FALSE;
6d2010ae 707 kauth_cred_t cred;
fe8ab488
A
708 struct in_addr laddr;
709 struct ifnet *outif = NULL;
1c79356b
A
710
711 if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */
712 return (EADDRNOTAVAIL);
39236c6e 713 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY)
1c79356b 714 return (EINVAL);
39236c6e 715 if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT)))
1c79356b 716 wild = 1;
fe8ab488
A
717
718 bzero(&laddr, sizeof(laddr));
719
4bd07ac2
A
720 socket_unlock(so, 0); /* keep reference on socket */
721 lck_rw_lock_exclusive(pcbinfo->ipi_lock);
722
39236c6e 723 if (nam != NULL) {
6d2010ae 724
39236c6e
A
725 if (nam->sa_len != sizeof (struct sockaddr_in)) {
726 lck_rw_done(pcbinfo->ipi_lock);
91447636 727 socket_lock(so, 0);
1c79356b 728 return (EINVAL);
91447636 729 }
39236c6e 730#if 0
1c79356b
A
731 /*
732 * We should check the family, but old programs
733 * incorrectly fail to initialize it.
734 */
39236c6e
A
735 if (nam->sa_family != AF_INET) {
736 lck_rw_done(pcbinfo->ipi_lock);
91447636 737 socket_lock(so, 0);
1c79356b 738 return (EAFNOSUPPORT);
91447636 739 }
39236c6e
A
740#endif /* 0 */
741 lport = SIN(nam)->sin_port;
742
743 if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr))) {
1c79356b
A
744 /*
745 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
746 * allow complete duplication of binding if
747 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
748 * and a multicast address is bound on both
749 * new and duplicated sockets.
750 */
751 if (so->so_options & SO_REUSEADDR)
752 reuseport = SO_REUSEADDR|SO_REUSEPORT;
39236c6e
A
753 } else if (SIN(nam)->sin_addr.s_addr != INADDR_ANY) {
754 struct sockaddr_in sin;
91447636 755 struct ifaddr *ifa;
39236c6e
A
756
757 /* Sanitized for interface address searches */
758 bzero(&sin, sizeof (sin));
759 sin.sin_family = AF_INET;
760 sin.sin_len = sizeof (struct sockaddr_in);
761 sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
762
763 ifa = ifa_ifwithaddr(SA(&sin));
764 if (ifa == NULL) {
765 lck_rw_done(pcbinfo->ipi_lock);
91447636 766 socket_lock(so, 0);
1c79356b 767 return (EADDRNOTAVAIL);
39236c6e
A
768 } else {
769 /*
770 * Opportunistically determine the outbound
771 * interface that may be used; this may not
772 * hold true if we end up using a route
773 * going over a different interface, e.g.
774 * when sending to a local address. This
775 * will get updated again after sending.
776 */
6d2010ae 777 IFA_LOCK(ifa);
316670eb 778 outif = ifa->ifa_ifp;
6d2010ae
A
779 IFA_UNLOCK(ifa);
780 IFA_REMREF(ifa);
91447636 781 }
1c79356b 782 }
39236c6e 783 if (lport != 0) {
1c79356b 784 struct inpcb *t;
39236c6e 785 uid_t u;
1c79356b 786
6d2010ae
A
787 if (ntohs(lport) < IPPORT_RESERVED) {
788 cred = kauth_cred_proc_ref(p);
39236c6e
A
789 error = priv_check_cred(cred,
790 PRIV_NETINET_RESERVEDPORT, 0);
6d2010ae
A
791 kauth_cred_unref(&cred);
792 if (error != 0) {
39236c6e 793 lck_rw_done(pcbinfo->ipi_lock);
6d2010ae
A
794 socket_lock(so, 0);
795 return (EACCES);
796 }
91447636 797 }
39236c6e
A
798 if (!IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) &&
799 (u = kauth_cred_getuid(so->so_cred)) != 0 &&
800 (t = in_pcblookup_local_and_cleanup(
801 inp->inp_pcbinfo, SIN(nam)->sin_addr, lport,
802 INPLOOKUP_WILDCARD)) != NULL &&
803 (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
804 t->inp_laddr.s_addr != INADDR_ANY ||
805 !(t->inp_socket->so_options & SO_REUSEPORT)) &&
806 (u != kauth_cred_getuid(t->inp_socket->so_cred)) &&
807 !(t->inp_socket->so_flags & SOF_REUSESHAREUID) &&
808 (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
809 t->inp_laddr.s_addr != INADDR_ANY)) {
810 if ((t->inp_socket->so_flags &
811 SOF_NOTIFYCONFLICT) &&
812 !(so->so_flags & SOF_NOTIFYCONFLICT))
813 conflict = 1;
814
815 lck_rw_done(pcbinfo->ipi_lock);
816
817 if (conflict)
818 in_pcb_conflict_post_msg(lport);
2d21ac55 819
39236c6e
A
820 socket_lock(so, 0);
821 return (EADDRINUSE);
1c79356b 822 }
39236c6e
A
823 t = in_pcblookup_local_and_cleanup(pcbinfo,
824 SIN(nam)->sin_addr, lport, wild);
825 if (t != NULL &&
1c79356b
A
826 (reuseport & t->inp_socket->so_options) == 0) {
827#if INET6
39236c6e
A
828 if (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
829 t->inp_laddr.s_addr != INADDR_ANY ||
830 SOCK_DOM(so) != PF_INET6 ||
831 SOCK_DOM(t->inp_socket) != PF_INET6)
2d21ac55
A
832#endif /* INET6 */
833 {
2d21ac55 834
39236c6e
A
835 if ((t->inp_socket->so_flags &
836 SOF_NOTIFYCONFLICT) &&
837 !(so->so_flags & SOF_NOTIFYCONFLICT))
2d21ac55
A
838 conflict = 1;
839
39236c6e 840 lck_rw_done(pcbinfo->ipi_lock);
2d21ac55
A
841
842 if (conflict)
843 in_pcb_conflict_post_msg(lport);
91447636
A
844 socket_lock(so, 0);
845 return (EADDRINUSE);
846 }
1c79356b
A
847 }
848 }
fe8ab488 849 laddr = SIN(nam)->sin_addr;
1c79356b
A
850 }
851 if (lport == 0) {
852 u_short first, last;
853 int count;
854
39236c6e
A
855 randomport = (so->so_flags & SOF_BINDRANDOMPORT) ||
856 (so->so_type == SOCK_STREAM ? tcp_use_randomport :
857 udp_use_randomport);
858
859 /*
fe8ab488
A
860 * Even though this looks similar to the code in
861 * in6_pcbsetport, the v6 vs v4 checks are different.
39236c6e 862 */
fe8ab488 863 anonport = TRUE;
1c79356b
A
864 if (inp->inp_flags & INP_HIGHPORT) {
865 first = ipport_hifirstauto; /* sysctl */
866 last = ipport_hilastauto;
39236c6e 867 lastport = &pcbinfo->ipi_lasthi;
1c79356b 868 } else if (inp->inp_flags & INP_LOWPORT) {
6d2010ae 869 cred = kauth_cred_proc_ref(p);
39236c6e
A
870 error = priv_check_cred(cred,
871 PRIV_NETINET_RESERVEDPORT, 0);
6d2010ae
A
872 kauth_cred_unref(&cred);
873 if (error != 0) {
39236c6e 874 lck_rw_done(pcbinfo->ipi_lock);
91447636 875 socket_lock(so, 0);
39236c6e 876 return (error);
91447636 877 }
1c79356b
A
878 first = ipport_lowfirstauto; /* 1023 */
879 last = ipport_lowlastauto; /* 600 */
39236c6e 880 lastport = &pcbinfo->ipi_lastlow;
1c79356b
A
881 } else {
882 first = ipport_firstauto; /* sysctl */
883 last = ipport_lastauto;
39236c6e 884 lastport = &pcbinfo->ipi_lastport;
1c79356b 885 }
b0d623f7
A
886 /* No point in randomizing if only one port is available */
887
888 if (first == last)
39236c6e 889 randomport = 0;
1c79356b
A
890 /*
891 * Simple check to ensure all ports are not used up causing
892 * a deadlock here.
893 *
894 * We split the two cases (up and down) so that the direction
895 * is not being tested on each round of the loop.
896 */
897 if (first > last) {
898 /*
899 * counting down
900 */
b0d623f7 901 if (randomport) {
39236c6e
A
902 read_random(&rand_port, sizeof (rand_port));
903 *lastport =
904 first - (rand_port % (first - last));
b0d623f7 905 }
1c79356b
A
906 count = first - last;
907
908 do {
909 if (count-- < 0) { /* completely used? */
39236c6e 910 lck_rw_done(pcbinfo->ipi_lock);
91447636 911 socket_lock(so, 0);
9bccf70c 912 return (EADDRNOTAVAIL);
1c79356b
A
913 }
914 --*lastport;
915 if (*lastport > first || *lastport < last)
916 *lastport = first;
917 lport = htons(*lastport);
2d21ac55 918 } while (in_pcblookup_local_and_cleanup(pcbinfo,
fe8ab488
A
919 ((laddr.s_addr != INADDR_ANY) ? laddr :
920 inp->inp_laddr), lport, wild));
1c79356b
A
921 } else {
922 /*
923 * counting up
924 */
b0d623f7 925 if (randomport) {
39236c6e
A
926 read_random(&rand_port, sizeof (rand_port));
927 *lastport =
928 first + (rand_port % (first - last));
b0d623f7 929 }
1c79356b
A
930 count = last - first;
931
932 do {
933 if (count-- < 0) { /* completely used? */
39236c6e 934 lck_rw_done(pcbinfo->ipi_lock);
91447636 935 socket_lock(so, 0);
9bccf70c 936 return (EADDRNOTAVAIL);
1c79356b
A
937 }
938 ++*lastport;
939 if (*lastport < first || *lastport > last)
940 *lastport = first;
941 lport = htons(*lastport);
2d21ac55 942 } while (in_pcblookup_local_and_cleanup(pcbinfo,
fe8ab488
A
943 ((laddr.s_addr != INADDR_ANY) ? laddr :
944 inp->inp_laddr), lport, wild));
1c79356b
A
945 }
946 }
91447636 947 socket_lock(so, 0);
4bd07ac2
A
948
949 /*
950 * We unlocked socket's protocol lock for a long time.
951 * The socket might have been dropped/defuncted.
952 * Checking if world has changed since.
953 */
954 if (inp->inp_state == INPCB_STATE_DEAD) {
955 lck_rw_done(pcbinfo->ipi_lock);
956 return (ECONNABORTED);
957 }
958
fe8ab488
A
959 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) {
960 lck_rw_done(pcbinfo->ipi_lock);
961 return (EINVAL);
962 }
963
964 if (laddr.s_addr != INADDR_ANY) {
965 inp->inp_laddr = laddr;
966 inp->inp_last_outifp = outif;
967 }
1c79356b 968 inp->inp_lport = lport;
fe8ab488
A
969 if (anonport)
970 inp->inp_flags |= INP_ANONPORT;
971
91447636 972 if (in_pcbinshash(inp, 1) != 0) {
1c79356b 973 inp->inp_laddr.s_addr = INADDR_ANY;
316670eb 974 inp->inp_last_outifp = NULL;
fe8ab488
A
975
976 inp->inp_lport = 0;
977 if (anonport)
978 inp->inp_flags &= ~INP_ANONPORT;
39236c6e 979 lck_rw_done(pcbinfo->ipi_lock);
1c79356b
A
980 return (EAGAIN);
981 }
39236c6e 982 lck_rw_done(pcbinfo->ipi_lock);
2d21ac55 983 sflt_notify(so, sock_evt_bound, NULL);
1c79356b
A
984 return (0);
985}
986
987/*
39236c6e
A
988 * Transform old in_pcbconnect() into an inner subroutine for new
989 * in_pcbconnect(); do some validity-checking on the remote address
990 * (in "nam") and then determine local host address (i.e., which
991 * interface) to use to access that remote host.
992 *
993 * This routine may alter the caller-supplied remote address "nam".
1c79356b 994 *
39236c6e
A
995 * The caller may override the bound-to-interface setting of the socket
996 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
997 *
998 * This routine might return an ifp with a reference held if the caller
999 * provides a non-NULL outif, even in the error case. The caller is
1000 * responsible for releasing its reference.
2d21ac55
A
1001 *
1002 * Returns: 0 Success
1003 * EINVAL Invalid argument
1004 * EAFNOSUPPORT Address family not supported
1005 * EADDRNOTAVAIL Address not available
1c79356b 1006 */
1c79356b 1007int
39236c6e
A
1008in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr,
1009 unsigned int ifscope, struct ifnet **outif)
1c79356b 1010{
39236c6e
A
1011 struct route *ro = &inp->inp_route;
1012 struct in_ifaddr *ia = NULL;
1013 struct sockaddr_in sin;
1014 int error = 0;
fe8ab488 1015 boolean_t restricted = FALSE;
39236c6e
A
1016
1017 if (outif != NULL)
1018 *outif = NULL;
1019 if (nam->sa_len != sizeof (struct sockaddr_in))
1c79356b 1020 return (EINVAL);
39236c6e 1021 if (SIN(nam)->sin_family != AF_INET)
1c79356b 1022 return (EAFNOSUPPORT);
39236c6e 1023 if (SIN(nam)->sin_port == 0)
1c79356b 1024 return (EADDRNOTAVAIL);
b0d623f7 1025
39236c6e
A
1026 /*
1027 * If the destination address is INADDR_ANY,
1028 * use the primary local address.
1029 * If the supplied address is INADDR_BROADCAST,
1030 * and the primary interface supports broadcast,
1031 * choose the broadcast address for that interface.
1032 */
1033 if (SIN(nam)->sin_addr.s_addr == INADDR_ANY ||
1034 SIN(nam)->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST) {
1035 lck_rw_lock_shared(in_ifaddr_rwlock);
1036 if (!TAILQ_EMPTY(&in_ifaddrhead)) {
1037 ia = TAILQ_FIRST(&in_ifaddrhead);
1038 IFA_LOCK_SPIN(&ia->ia_ifa);
1039 if (SIN(nam)->sin_addr.s_addr == INADDR_ANY) {
1040 SIN(nam)->sin_addr = IA_SIN(ia)->sin_addr;
1041 } else if (ia->ia_ifp->if_flags & IFF_BROADCAST) {
1042 SIN(nam)->sin_addr =
1043 SIN(&ia->ia_broadaddr)->sin_addr;
1044 }
1045 IFA_UNLOCK(&ia->ia_ifa);
1046 ia = NULL;
1047 }
1048 lck_rw_done(in_ifaddr_rwlock);
1049 }
1050 /*
1051 * Otherwise, if the socket has already bound the source, just use it.
1052 */
1053 if (inp->inp_laddr.s_addr != INADDR_ANY) {
1054 VERIFY(ia == NULL);
1055 *laddr = inp->inp_laddr;
1056 return (0);
1c79356b 1057 }
6d2010ae 1058
39236c6e
A
1059 /*
1060 * If the ifscope is specified by the caller (e.g. IP_PKTINFO)
1061 * then it overrides the sticky ifscope set for the socket.
1062 */
1063 if (ifscope == IFSCOPE_NONE && (inp->inp_flags & INP_BOUND_IF))
1064 ifscope = inp->inp_boundifp->if_index;
6d2010ae 1065
39236c6e
A
1066 /*
1067 * If route is known or can be allocated now,
1068 * our src addr is taken from the i/f, else punt.
1069 * Note that we should check the address family of the cached
1070 * destination, in case of sharing the cache with IPv6.
1071 */
1072 if (ro->ro_rt != NULL)
1073 RT_LOCK_SPIN(ro->ro_rt);
1074 if (ROUTE_UNUSABLE(ro) || ro->ro_dst.sa_family != AF_INET ||
1075 SIN(&ro->ro_dst)->sin_addr.s_addr != SIN(nam)->sin_addr.s_addr ||
1076 (inp->inp_socket->so_options & SO_DONTROUTE)) {
b0d623f7 1077 if (ro->ro_rt != NULL)
b0d623f7 1078 RT_UNLOCK(ro->ro_rt);
39236c6e
A
1079 ROUTE_RELEASE(ro);
1080 }
1081 if (!(inp->inp_socket->so_options & SO_DONTROUTE) &&
1082 (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL)) {
1083 if (ro->ro_rt != NULL)
1084 RT_UNLOCK(ro->ro_rt);
1085 ROUTE_RELEASE(ro);
1086 /* No route yet, so try to acquire one */
1087 bzero(&ro->ro_dst, sizeof (struct sockaddr_in));
1088 ro->ro_dst.sa_family = AF_INET;
1089 ro->ro_dst.sa_len = sizeof (struct sockaddr_in);
1090 SIN(&ro->ro_dst)->sin_addr = SIN(nam)->sin_addr;
1091 rtalloc_scoped(ro, ifscope);
1092 if (ro->ro_rt != NULL)
1093 RT_LOCK_SPIN(ro->ro_rt);
1094 }
1095 /* Sanitized local copy for interface address searches */
1096 bzero(&sin, sizeof (sin));
1097 sin.sin_family = AF_INET;
1098 sin.sin_len = sizeof (struct sockaddr_in);
1099 sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
1100 /*
1101 * If we did not find (or use) a route, assume dest is reachable
1102 * on a directly connected network and try to find a corresponding
1103 * interface to take the source address from.
1104 */
1105 if (ro->ro_rt == NULL) {
1106 VERIFY(ia == NULL);
1107 ia = ifatoia(ifa_ifwithdstaddr(SA(&sin)));
1108 if (ia == NULL)
1109 ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope));
1110 error = ((ia == NULL) ? ENETUNREACH : 0);
1111 goto done;
1112 }
1113 RT_LOCK_ASSERT_HELD(ro->ro_rt);
1114 /*
1115 * If the outgoing interface on the route found is not
1116 * a loopback interface, use the address from that interface.
1117 */
1118 if (!(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) {
1119 VERIFY(ia == NULL);
6d2010ae
A
1120 /*
1121 * If the route points to a cellular interface and the
1122 * caller forbids our using interfaces of such type,
1123 * pretend that there is no route.
fe8ab488 1124 * Apply the same logic for expensive interfaces.
6d2010ae 1125 */
fe8ab488 1126 if (inp_restricted_send(inp, ro->ro_rt->rt_ifp)) {
39236c6e
A
1127 RT_UNLOCK(ro->ro_rt);
1128 ROUTE_RELEASE(ro);
1129 error = EHOSTUNREACH;
fe8ab488 1130 restricted = TRUE;
39236c6e 1131 } else {
6d2010ae
A
1132 /* Become a regular mutex */
1133 RT_CONVERT_LOCK(ro->ro_rt);
39236c6e
A
1134 ia = ifatoia(ro->ro_rt->rt_ifa);
1135 IFA_ADDREF(&ia->ia_ifa);
b0d623f7 1136 RT_UNLOCK(ro->ro_rt);
39236c6e 1137 error = 0;
91447636 1138 }
39236c6e
A
1139 goto done;
1140 }
1141 VERIFY(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK);
1142 RT_UNLOCK(ro->ro_rt);
1143 /*
1144 * The outgoing interface is marked with 'loopback net', so a route
1145 * to ourselves is here.
1146 * Try to find the interface of the destination address and then
1147 * take the address from there. That interface is not necessarily
1148 * a loopback interface.
1149 */
1150 VERIFY(ia == NULL);
1151 ia = ifatoia(ifa_ifwithdstaddr(SA(&sin)));
1152 if (ia == NULL)
1153 ia = ifatoia(ifa_ifwithaddr_scoped(SA(&sin), ifscope));
1154 if (ia == NULL)
1155 ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope));
1156 if (ia == NULL) {
1157 RT_LOCK(ro->ro_rt);
1158 ia = ifatoia(ro->ro_rt->rt_ifa);
1159 if (ia != NULL)
1160 IFA_ADDREF(&ia->ia_ifa);
1161 RT_UNLOCK(ro->ro_rt);
1162 }
1163 error = ((ia == NULL) ? ENETUNREACH : 0);
1164
1165done:
1166 /*
1167 * If the destination address is multicast and an outgoing
1168 * interface has been set as a multicast option, use the
1169 * address of that interface as our source address.
1170 */
15129b1c 1171 if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) &&
39236c6e
A
1172 inp->inp_moptions != NULL) {
1173 struct ip_moptions *imo;
1174 struct ifnet *ifp;
1175
1176 imo = inp->inp_moptions;
1177 IMO_LOCK(imo);
1178 if (imo->imo_multicast_ifp != NULL && (ia == NULL ||
1179 ia->ia_ifp != imo->imo_multicast_ifp)) {
1180 ifp = imo->imo_multicast_ifp;
1181 if (ia != NULL)
6d2010ae 1182 IFA_REMREF(&ia->ia_ifa);
39236c6e
A
1183 lck_rw_lock_shared(in_ifaddr_rwlock);
1184 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
1185 if (ia->ia_ifp == ifp)
1186 break;
6d2010ae 1187 }
39236c6e
A
1188 if (ia != NULL)
1189 IFA_ADDREF(&ia->ia_ifa);
1190 lck_rw_done(in_ifaddr_rwlock);
1191 if (ia == NULL)
1192 error = EADDRNOTAVAIL;
15129b1c
A
1193 else
1194 error = 0;
1c79356b 1195 }
39236c6e
A
1196 IMO_UNLOCK(imo);
1197 }
1198 /*
1199 * Don't do pcblookup call here; return interface in laddr
1200 * and exit to caller, that will do the lookup.
1201 */
1202 if (ia != NULL) {
1c79356b 1203 /*
39236c6e
A
1204 * If the source address belongs to a cellular interface
1205 * and the socket forbids our using interfaces of such
1206 * type, pretend that there is no source address.
fe8ab488 1207 * Apply the same logic for expensive interfaces.
1c79356b 1208 */
39236c6e 1209 IFA_LOCK_SPIN(&ia->ia_ifa);
fe8ab488 1210 if (inp_restricted_send(inp, ia->ia_ifa.ifa_ifp)) {
39236c6e
A
1211 IFA_UNLOCK(&ia->ia_ifa);
1212 error = EHOSTUNREACH;
fe8ab488 1213 restricted = TRUE;
39236c6e
A
1214 } else if (error == 0) {
1215 *laddr = ia->ia_addr.sin_addr;
1216 if (outif != NULL) {
1217 struct ifnet *ifp;
1218
1219 if (ro->ro_rt != NULL)
1220 ifp = ro->ro_rt->rt_ifp;
1221 else
1222 ifp = ia->ia_ifp;
1223
1224 VERIFY(ifp != NULL);
1225 IFA_CONVERT_LOCK(&ia->ia_ifa);
1226 ifnet_reference(ifp); /* for caller */
1227 if (*outif != NULL)
1228 ifnet_release(*outif);
1229 *outif = ifp;
1c79356b 1230 }
39236c6e
A
1231 IFA_UNLOCK(&ia->ia_ifa);
1232 } else {
1233 IFA_UNLOCK(&ia->ia_ifa);
1c79356b 1234 }
6d2010ae 1235 IFA_REMREF(&ia->ia_ifa);
39236c6e
A
1236 ia = NULL;
1237 }
1238
fe8ab488 1239 if (restricted && error == EHOSTUNREACH) {
39236c6e
A
1240 soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED |
1241 SO_FILT_HINT_IFDENIED));
1c79356b 1242 }
39236c6e
A
1243
1244 return (error);
1c79356b
A
1245}
1246
1247/*
1248 * Outer subroutine:
1249 * Connect from a socket to a specified address.
1250 * Both address and port must be specified in argument sin.
1251 * If don't have a local address for this socket yet,
1252 * then pick one.
39236c6e
A
1253 *
1254 * The caller may override the bound-to-interface setting of the socket
1255 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
1c79356b
A
1256 */
1257int
316670eb 1258in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p,
39236c6e 1259 unsigned int ifscope, struct ifnet **outif)
1c79356b 1260{
39236c6e 1261 struct in_addr laddr;
316670eb 1262 struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam;
91447636 1263 struct inpcb *pcb;
1c79356b 1264 int error;
fe8ab488 1265 struct socket *so = inp->inp_socket;
1c79356b
A
1266
1267 /*
1268 * Call inner routine, to assign local interface address.
1269 */
39236c6e
A
1270 if ((error = in_pcbladdr(inp, nam, &laddr, ifscope, outif)) != 0)
1271 return (error);
1c79356b 1272
fe8ab488 1273 socket_unlock(so, 0);
91447636 1274 pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
39236c6e 1275 inp->inp_laddr.s_addr ? inp->inp_laddr : laddr,
91447636 1276 inp->inp_lport, 0, NULL);
fe8ab488 1277 socket_lock(so, 0);
6d2010ae 1278
39236c6e
A
1279 /*
1280 * Check if the socket is still in a valid state. When we unlock this
1281 * embryonic socket, it can get aborted if another thread is closing
6d2010ae
A
1282 * the listener (radar 7947600).
1283 */
fe8ab488 1284 if ((so->so_flags & SOF_ABORTED) != 0)
39236c6e 1285 return (ECONNREFUSED);
6d2010ae 1286
91447636 1287 if (pcb != NULL) {
0b4c1975 1288 in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0);
1c79356b
A
1289 return (EADDRINUSE);
1290 }
1291 if (inp->inp_laddr.s_addr == INADDR_ANY) {
9bccf70c 1292 if (inp->inp_lport == 0) {
39236c6e 1293 error = in_pcbbind(inp, NULL, p);
9bccf70c 1294 if (error)
39236c6e 1295 return (error);
9bccf70c 1296 }
39236c6e
A
1297 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
1298 /*
1299 * Lock inversion issue, mostly with udp
1300 * multicast packets.
1301 */
fe8ab488 1302 socket_unlock(so, 0);
39236c6e 1303 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
fe8ab488 1304 socket_lock(so, 0);
91447636 1305 }
39236c6e
A
1306 inp->inp_laddr = laddr;
1307 /* no reference needed */
316670eb 1308 inp->inp_last_outifp = (outif != NULL) ? *outif : NULL;
55e303ae 1309 inp->inp_flags |= INP_INADDR_ANY;
39236c6e 1310 } else {
3e170ce0
A
1311 /*
1312 * Usage of IP_PKTINFO, without local port already
1313 * speficified will cause kernel to panic,
1314 * see rdar://problem/18508185.
1315 * For now returning error to avoid a kernel panic
1316 * This routines can be refactored and handle this better
1317 * in future.
1318 */
1319 if (inp->inp_lport == 0)
1320 return (EINVAL);
39236c6e
A
1321 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
1322 /*
1323 * Lock inversion issue, mostly with udp
1324 * multicast packets.
1325 */
fe8ab488 1326 socket_unlock(so, 0);
39236c6e 1327 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
fe8ab488 1328 socket_lock(so, 0);
91447636 1329 }
1c79356b
A
1330 }
1331 inp->inp_faddr = sin->sin_addr;
1332 inp->inp_fport = sin->sin_port;
fe8ab488
A
1333 if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP)
1334 nstat_pcb_invalidate_cache(inp);
1c79356b 1335 in_pcbrehash(inp);
39236c6e 1336 lck_rw_done(inp->inp_pcbinfo->ipi_lock);
1c79356b
A
1337 return (0);
1338}
1339
1340void
2d21ac55 1341in_pcbdisconnect(struct inpcb *inp)
1c79356b 1342{
39236c6e 1343 struct socket *so = inp->inp_socket;
1c79356b 1344
fe8ab488
A
1345 if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP)
1346 nstat_pcb_cache(inp);
1347
1c79356b
A
1348 inp->inp_faddr.s_addr = INADDR_ANY;
1349 inp->inp_fport = 0;
91447636 1350
39236c6e
A
1351 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
1352 /* lock inversion issue, mostly with udp multicast packets */
1353 socket_unlock(so, 0);
1354 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
1355 socket_lock(so, 0);
91447636
A
1356 }
1357
1c79356b 1358 in_pcbrehash(inp);
39236c6e
A
1359 lck_rw_done(inp->inp_pcbinfo->ipi_lock);
1360 /*
1361 * A multipath subflow socket would have its SS_NOFDREF set by default,
1362 * so check for SOF_MP_SUBFLOW socket flag before detaching the PCB;
1363 * when the socket is closed for real, SOF_MP_SUBFLOW would be cleared.
1364 */
1365 if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF))
1c79356b
A
1366 in_pcbdetach(inp);
1367}
1368
1369void
2d21ac55 1370in_pcbdetach(struct inpcb *inp)
1c79356b
A
1371{
1372 struct socket *so = inp->inp_socket;
1c79356b 1373
39236c6e
A
1374 if (so->so_pcb == NULL) {
1375 /* PCB has been disposed */
1376 panic("%s: inp=%p so=%p proto=%d so_pcb is null!\n", __func__,
1377 inp, so, SOCK_PROTO(so));
1378 /* NOTREACHED */
91447636 1379 }
fe8ab488 1380
1c79356b 1381#if IPSEC
39236c6e
A
1382 if (inp->inp_sp != NULL) {
1383 (void) ipsec4_delete_pcbpolicy(inp);
91447636 1384 }
39236c6e 1385#endif /* IPSEC */
fe8ab488
A
1386
1387 /*
1388 * Let NetworkStatistics know this PCB is going away
1389 * before we detach it.
1390 */
1391 if (nstat_collect &&
1392 (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP))
1393 nstat_pcb_detach(inp);
3e170ce0
A
1394
1395 /* Free memory buffer held for generating keep alives */
1396 if (inp->inp_keepalive_data != NULL) {
1397 FREE(inp->inp_keepalive_data, M_TEMP);
1398 inp->inp_keepalive_data = NULL;
1399 }
1400
91447636 1401 /* mark socket state as dead */
39236c6e
A
1402 if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) {
1403 panic("%s: so=%p proto=%d couldn't set to STOPUSING\n",
1404 __func__, so, SOCK_PROTO(so));
1405 /* NOTREACHED */
1406 }
1c79356b 1407
39236c6e 1408 if (!(so->so_flags & SOF_PCBCLEARING)) {
6d2010ae 1409 struct ip_moptions *imo;
2d21ac55 1410
91447636 1411 inp->inp_vflag = 0;
39236c6e
A
1412 if (inp->inp_options != NULL) {
1413 (void) m_free(inp->inp_options);
1414 inp->inp_options = NULL;
91447636 1415 }
39236c6e 1416 ROUTE_RELEASE(&inp->inp_route);
6d2010ae 1417 imo = inp->inp_moptions;
91447636 1418 inp->inp_moptions = NULL;
6d2010ae
A
1419 if (imo != NULL)
1420 IMO_REMREF(imo);
91447636
A
1421 sofreelastref(so, 0);
1422 inp->inp_state = INPCB_STATE_DEAD;
39236c6e
A
1423 /* makes sure we're not called twice from so_close */
1424 so->so_flags |= SOF_PCBCLEARING;
1425
1426 inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST);
91447636
A
1427 }
1428}
1c79356b 1429
1c79356b 1430
39236c6e
A
1431void
1432in_pcbdispose(struct inpcb *inp)
91447636
A
1433{
1434 struct socket *so = inp->inp_socket;
1435 struct inpcbinfo *ipi = inp->inp_pcbinfo;
1436
39236c6e
A
1437 if (so != NULL && so->so_usecount != 0) {
1438 panic("%s: so %p [%d,%d] usecount %d lockhistory %s\n",
1439 __func__, so, SOCK_DOM(so), SOCK_TYPE(so), so->so_usecount,
1440 solockhistory_nr(so));
1441 /* NOTREACHED */
1442 } else if (inp->inp_wantcnt != WNT_STOPUSING) {
1443 if (so != NULL) {
1444 panic_plain("%s: inp %p invalid wantcnt %d, so %p "
1445 "[%d,%d] usecount %d retaincnt %d state 0x%x "
1446 "flags 0x%x lockhistory %s\n", __func__, inp,
1447 inp->inp_wantcnt, so, SOCK_DOM(so), SOCK_TYPE(so),
1448 so->so_usecount, so->so_retaincnt, so->so_state,
1449 so->so_flags, solockhistory_nr(so));
1450 /* NOTREACHED */
1451 } else {
1452 panic("%s: inp %p invalid wantcnt %d no socket\n",
1453 __func__, inp, inp->inp_wantcnt);
1454 /* NOTREACHED */
1455 }
91447636 1456 }
91447636 1457
39236c6e 1458 lck_rw_assert(ipi->ipi_lock, LCK_RW_ASSERT_EXCLUSIVE);
91447636
A
1459
1460 inp->inp_gencnt = ++ipi->ipi_gencnt;
316670eb 1461 /* access ipi in in_pcbremlists */
91447636 1462 in_pcbremlists(inp);
316670eb 1463
39236c6e 1464 if (so != NULL) {
91447636
A
1465 if (so->so_proto->pr_flags & PR_PCBLOCK) {
1466 sofreelastref(so, 0);
39236c6e
A
1467 if (so->so_rcv.sb_cc > 0 || so->so_snd.sb_cc > 0) {
1468 /*
1469 * selthreadclear() already called
1470 * during sofreelastref() above.
1471 */
91447636
A
1472 sbrelease(&so->so_rcv);
1473 sbrelease(&so->so_snd);
1474 }
39236c6e
A
1475 if (so->so_head != NULL) {
1476 panic("%s: so=%p head still exist\n",
1477 __func__, so);
1478 /* NOTREACHED */
1479 }
1480 lck_mtx_unlock(&inp->inpcb_mtx);
1481 lck_mtx_destroy(&inp->inpcb_mtx, ipi->ipi_lock_grp);
9bccf70c 1482 }
39236c6e
A
1483 /* makes sure we're not called twice from so_close */
1484 so->so_flags |= SOF_PCBCLEARING;
1485 so->so_saved_pcb = (caddr_t)inp;
1486 so->so_pcb = NULL;
1487 inp->inp_socket = NULL;
2d21ac55
A
1488#if CONFIG_MACF_NET
1489 mac_inpcb_label_destroy(inp);
39236c6e 1490#endif /* CONFIG_MACF_NET */
b0d623f7
A
1491 /*
1492 * In case there a route cached after a detach (possible
1493 * in the tcp case), make sure that it is freed before
1494 * we deallocate the structure.
1495 */
39236c6e 1496 ROUTE_RELEASE(&inp->inp_route);
3e170ce0 1497 if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) {
91447636 1498 zfree(ipi->ipi_zone, inp);
55e303ae 1499 }
91447636 1500 sodealloc(so);
9bccf70c 1501 }
1c79356b
A
1502}
1503
1504/*
39236c6e 1505 * The calling convention of in_getsockaddr() and in_getpeeraddr() was
1c79356b
A
1506 * modified to match the pru_sockaddr() and pru_peeraddr() entry points
1507 * in struct pr_usrreqs, so that protocols can just reference then directly
39236c6e 1508 * without the need for a wrapper function.
1c79356b
A
1509 */
1510int
39236c6e 1511in_getsockaddr(struct socket *so, struct sockaddr **nam)
1c79356b 1512{
2d21ac55
A
1513 struct inpcb *inp;
1514 struct sockaddr_in *sin;
1c79356b
A
1515
1516 /*
1517 * Do the malloc first in case it blocks.
1518 */
39236c6e 1519 MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK);
0b4e3aa0 1520 if (sin == NULL)
39236c6e
A
1521 return (ENOBUFS);
1522 bzero(sin, sizeof (*sin));
1c79356b 1523 sin->sin_family = AF_INET;
39236c6e 1524 sin->sin_len = sizeof (*sin);
1c79356b 1525
39236c6e 1526 if ((inp = sotoinpcb(so)) == NULL) {
1c79356b 1527 FREE(sin, M_SONAME);
39236c6e 1528 return (EINVAL);
1c79356b
A
1529 }
1530 sin->sin_port = inp->inp_lport;
1531 sin->sin_addr = inp->inp_laddr;
1c79356b
A
1532
1533 *nam = (struct sockaddr *)sin;
39236c6e 1534 return (0);
1c79356b
A
1535}
1536
1537int
39236c6e 1538in_getsockaddr_s(struct socket *so, struct sockaddr_storage *ss)
1c79356b 1539{
39236c6e 1540 struct sockaddr_in *sin = SIN(ss);
1c79356b 1541 struct inpcb *inp;
1c79356b 1542
39236c6e
A
1543 VERIFY(ss != NULL);
1544 bzero(ss, sizeof (*ss));
1545
1c79356b 1546 sin->sin_family = AF_INET;
39236c6e 1547 sin->sin_len = sizeof (*sin);
1c79356b 1548
fe8ab488
A
1549 if ((inp = sotoinpcb(so)) == NULL
1550#if NECP
1551 || (necp_socket_should_use_flow_divert(inp))
1552#endif /* NECP */
1553 )
39236c6e
A
1554 return (inp == NULL ? EINVAL : EPROTOTYPE);
1555
1556 sin->sin_port = inp->inp_lport;
1557 sin->sin_addr = inp->inp_laddr;
1558 return (0);
1559}
1560
1561int
1562in_getpeeraddr(struct socket *so, struct sockaddr **nam)
1563{
1564 struct inpcb *inp;
1565 struct sockaddr_in *sin;
1566
1567 /*
1568 * Do the malloc first in case it blocks.
1569 */
1570 MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK);
1571 if (sin == NULL)
1572 return (ENOBUFS);
1573 bzero((caddr_t)sin, sizeof (*sin));
1574 sin->sin_family = AF_INET;
1575 sin->sin_len = sizeof (*sin);
1576
1577 if ((inp = sotoinpcb(so)) == NULL) {
1c79356b 1578 FREE(sin, M_SONAME);
39236c6e 1579 return (EINVAL);
1c79356b
A
1580 }
1581 sin->sin_port = inp->inp_fport;
1582 sin->sin_addr = inp->inp_faddr;
1c79356b
A
1583
1584 *nam = (struct sockaddr *)sin;
39236c6e
A
1585 return (0);
1586}
1587
1588int
1589in_getpeeraddr_s(struct socket *so, struct sockaddr_storage *ss)
1590{
1591 struct sockaddr_in *sin = SIN(ss);
1592 struct inpcb *inp;
1593
1594 VERIFY(ss != NULL);
1595 bzero(ss, sizeof (*ss));
1596
1597 sin->sin_family = AF_INET;
1598 sin->sin_len = sizeof (*sin);
1599
fe8ab488
A
1600 if ((inp = sotoinpcb(so)) == NULL
1601#if NECP
1602 || (necp_socket_should_use_flow_divert(inp))
1603#endif /* NECP */
1604 ) {
39236c6e
A
1605 return (inp == NULL ? EINVAL : EPROTOTYPE);
1606 }
1607
1608 sin->sin_port = inp->inp_fport;
1609 sin->sin_addr = inp->inp_faddr;
1610 return (0);
1c79356b
A
1611}
1612
1c79356b 1613void
2d21ac55 1614in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr,
39236c6e 1615 int errno, void (*notify)(struct inpcb *, int))
1c79356b 1616{
91447636
A
1617 struct inpcb *inp;
1618
39236c6e 1619 lck_rw_lock_shared(pcbinfo->ipi_lock);
1c79356b 1620
39236c6e 1621 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
9bccf70c 1622#if INET6
39236c6e 1623 if (!(inp->inp_vflag & INP_IPV4))
1c79356b 1624 continue;
39236c6e 1625#endif /* INET6 */
1c79356b 1626 if (inp->inp_faddr.s_addr != faddr.s_addr ||
9bccf70c 1627 inp->inp_socket == NULL)
39236c6e
A
1628 continue;
1629 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING)
91447636
A
1630 continue;
1631 socket_lock(inp->inp_socket, 1);
9bccf70c 1632 (*notify)(inp, errno);
39236c6e 1633 (void) in_pcb_checkstate(inp, WNT_RELEASE, 1);
91447636 1634 socket_unlock(inp->inp_socket, 1);
1c79356b 1635 }
39236c6e 1636 lck_rw_done(pcbinfo->ipi_lock);
1c79356b
A
1637}
1638
1639/*
1640 * Check for alternatives when higher level complains
1641 * about service problems. For now, invalidate cached
1642 * routing information. If the route was created dynamically
1643 * (by a redirect), time to try a default gateway again.
1644 */
1645void
2d21ac55 1646in_losing(struct inpcb *inp)
1c79356b 1647{
39236c6e 1648 boolean_t release = FALSE;
2d21ac55 1649 struct rtentry *rt;
1c79356b 1650
b0d623f7 1651 if ((rt = inp->inp_route.ro_rt) != NULL) {
39236c6e 1652 struct in_ifaddr *ia = NULL;
b0d623f7 1653
b0d623f7 1654 RT_LOCK(rt);
b0d623f7
A
1655 if (rt->rt_flags & RTF_DYNAMIC) {
1656 /*
1657 * Prevent another thread from modifying rt_key,
1658 * rt_gateway via rt_setgate() after rt_lock is
1659 * dropped by marking the route as defunct.
1660 */
1661 rt->rt_flags |= RTF_CONDEMNED;
1662 RT_UNLOCK(rt);
1663 (void) rtrequest(RTM_DELETE, rt_key(rt),
39236c6e 1664 rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL);
b0d623f7
A
1665 } else {
1666 RT_UNLOCK(rt);
1667 }
2d21ac55 1668 /* if the address is gone keep the old route in the pcb */
39236c6e
A
1669 if (inp->inp_laddr.s_addr != INADDR_ANY &&
1670 (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
1671 /*
1672 * Address is around; ditch the route. A new route
1673 * can be allocated the next time output is attempted.
1674 */
1675 release = TRUE;
2d21ac55 1676 }
39236c6e
A
1677 if (ia != NULL)
1678 IFA_REMREF(&ia->ia_ifa);
1c79356b 1679 }
39236c6e
A
1680 if (rt == NULL || release)
1681 ROUTE_RELEASE(&inp->inp_route);
1c79356b
A
1682}
1683
1684/*
1685 * After a routing change, flush old routing
1686 * and allocate a (hopefully) better one.
1687 */
9bccf70c 1688void
39236c6e 1689in_rtchange(struct inpcb *inp, int errno)
1c79356b 1690{
39236c6e
A
1691#pragma unused(errno)
1692 boolean_t release = FALSE;
2d21ac55
A
1693 struct rtentry *rt;
1694
1695 if ((rt = inp->inp_route.ro_rt) != NULL) {
39236c6e 1696 struct in_ifaddr *ia = NULL;
b0d623f7 1697
39236c6e
A
1698 /* if address is gone, keep the old route */
1699 if (inp->inp_laddr.s_addr != INADDR_ANY &&
1700 (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
1701 /*
1702 * Address is around; ditch the route. A new route
1703 * can be allocated the next time output is attempted.
1704 */
1705 release = TRUE;
2d21ac55 1706 }
39236c6e
A
1707 if (ia != NULL)
1708 IFA_REMREF(&ia->ia_ifa);
1c79356b 1709 }
39236c6e
A
1710 if (rt == NULL || release)
1711 ROUTE_RELEASE(&inp->inp_route);
1c79356b
A
1712}
1713
1714/*
1715 * Lookup a PCB based on the local address and port.
1716 */
1717struct inpcb *
2d21ac55 1718in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
39236c6e 1719 unsigned int lport_arg, int wild_okay)
1c79356b 1720{
2d21ac55 1721 struct inpcb *inp;
1c79356b
A
1722 int matchwild = 3, wildcard;
1723 u_short lport = lport_arg;
1724
39236c6e 1725 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0, 0, 0, 0, 0);
1c79356b
A
1726
1727 if (!wild_okay) {
1728 struct inpcbhead *head;
1729 /*
1730 * Look for an unconnected (wildcard foreign addr) PCB that
1731 * matches the local address and port we're looking for.
1732 */
39236c6e
A
1733 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
1734 pcbinfo->ipi_hashmask)];
9bccf70c
A
1735 LIST_FOREACH(inp, head, inp_hash) {
1736#if INET6
39236c6e 1737 if (!(inp->inp_vflag & INP_IPV4))
1c79356b 1738 continue;
39236c6e 1739#endif /* INET6 */
1c79356b
A
1740 if (inp->inp_faddr.s_addr == INADDR_ANY &&
1741 inp->inp_laddr.s_addr == laddr.s_addr &&
1742 inp->inp_lport == lport) {
1743 /*
1744 * Found.
1745 */
1746 return (inp);
1747 }
1748 }
1749 /*
1750 * Not found.
1751 */
39236c6e 1752 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0, 0, 0, 0, 0);
1c79356b
A
1753 return (NULL);
1754 } else {
1755 struct inpcbporthead *porthash;
1756 struct inpcbport *phd;
1757 struct inpcb *match = NULL;
1758 /*
1759 * Best fit PCB lookup.
1760 *
1761 * First see if this local port is in use by looking on the
1762 * port hash list.
1763 */
39236c6e
A
1764 porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
1765 pcbinfo->ipi_porthashmask)];
9bccf70c 1766 LIST_FOREACH(phd, porthash, phd_hash) {
1c79356b
A
1767 if (phd->phd_port == lport)
1768 break;
1769 }
1770 if (phd != NULL) {
1771 /*
1772 * Port is in use by one or more PCBs. Look for best
1773 * fit.
1774 */
9bccf70c 1775 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
1c79356b 1776 wildcard = 0;
9bccf70c 1777#if INET6
39236c6e 1778 if (!(inp->inp_vflag & INP_IPV4))
1c79356b 1779 continue;
39236c6e 1780#endif /* INET6 */
1c79356b
A
1781 if (inp->inp_faddr.s_addr != INADDR_ANY)
1782 wildcard++;
1783 if (inp->inp_laddr.s_addr != INADDR_ANY) {
1784 if (laddr.s_addr == INADDR_ANY)
1785 wildcard++;
39236c6e
A
1786 else if (inp->inp_laddr.s_addr !=
1787 laddr.s_addr)
1c79356b
A
1788 continue;
1789 } else {
1790 if (laddr.s_addr != INADDR_ANY)
1791 wildcard++;
1792 }
1793 if (wildcard < matchwild) {
1794 match = inp;
1795 matchwild = wildcard;
1796 if (matchwild == 0) {
1797 break;
1798 }
1799 }
1800 }
1801 }
39236c6e
A
1802 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match,
1803 0, 0, 0, 0);
1c79356b
A
1804 return (match);
1805 }
1806}
1807
6d2010ae
A
1808/*
1809 * Check if PCB exists in hash list.
1810 */
1811int
39236c6e
A
1812in_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in_addr faddr,
1813 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
1814 uid_t *uid, gid_t *gid, struct ifnet *ifp)
6d2010ae
A
1815{
1816 struct inpcbhead *head;
1817 struct inpcb *inp;
1818 u_short fport = fport_arg, lport = lport_arg;
39236c6e
A
1819 int found = 0;
1820 struct inpcb *local_wild = NULL;
1821#if INET6
1822 struct inpcb *local_wild_mapped = NULL;
1823#endif /* INET6 */
6d2010ae
A
1824
1825 *uid = UID_MAX;
1826 *gid = GID_MAX;
316670eb 1827
6d2010ae
A
1828 /*
1829 * We may have found the pcb in the last lookup - check this first.
1830 */
1831
39236c6e 1832 lck_rw_lock_shared(pcbinfo->ipi_lock);
6d2010ae
A
1833
1834 /*
1835 * First look for an exact match.
1836 */
39236c6e
A
1837 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
1838 pcbinfo->ipi_hashmask)];
6d2010ae
A
1839 LIST_FOREACH(inp, head, inp_hash) {
1840#if INET6
39236c6e 1841 if (!(inp->inp_vflag & INP_IPV4))
6d2010ae 1842 continue;
39236c6e 1843#endif /* INET6 */
fe8ab488 1844 if (inp_restricted_recv(inp, ifp))
316670eb
A
1845 continue;
1846
6d2010ae
A
1847 if (inp->inp_faddr.s_addr == faddr.s_addr &&
1848 inp->inp_laddr.s_addr == laddr.s_addr &&
1849 inp->inp_fport == fport &&
1850 inp->inp_lport == lport) {
1851 if ((found = (inp->inp_socket != NULL))) {
1852 /*
1853 * Found.
1854 */
316670eb
A
1855 *uid = kauth_cred_getuid(
1856 inp->inp_socket->so_cred);
1857 *gid = kauth_cred_getgid(
1858 inp->inp_socket->so_cred);
6d2010ae 1859 }
39236c6e 1860 lck_rw_done(pcbinfo->ipi_lock);
6d2010ae
A
1861 return (found);
1862 }
1863 }
6d2010ae 1864
39236c6e
A
1865 if (!wildcard) {
1866 /*
1867 * Not found.
1868 */
1869 lck_rw_done(pcbinfo->ipi_lock);
1870 return (0);
1871 }
316670eb 1872
39236c6e
A
1873 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
1874 pcbinfo->ipi_hashmask)];
1875 LIST_FOREACH(inp, head, inp_hash) {
6d2010ae 1876#if INET6
39236c6e
A
1877 if (!(inp->inp_vflag & INP_IPV4))
1878 continue;
6d2010ae 1879#endif /* INET6 */
fe8ab488 1880 if (inp_restricted_recv(inp, ifp))
39236c6e
A
1881 continue;
1882
1883 if (inp->inp_faddr.s_addr == INADDR_ANY &&
1884 inp->inp_lport == lport) {
1885 if (inp->inp_laddr.s_addr == laddr.s_addr) {
1886 if ((found = (inp->inp_socket != NULL))) {
316670eb 1887 *uid = kauth_cred_getuid(
39236c6e 1888 inp->inp_socket->so_cred);
316670eb 1889 *gid = kauth_cred_getgid(
39236c6e 1890 inp->inp_socket->so_cred);
6d2010ae 1891 }
39236c6e 1892 lck_rw_done(pcbinfo->ipi_lock);
6d2010ae 1893 return (found);
39236c6e
A
1894 } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
1895#if INET6
1896 if (inp->inp_socket &&
1897 SOCK_CHECK_DOM(inp->inp_socket, PF_INET6))
1898 local_wild_mapped = inp;
1899 else
6d2010ae 1900#endif /* INET6 */
39236c6e
A
1901 local_wild = inp;
1902 }
6d2010ae 1903 }
39236c6e
A
1904 }
1905 if (local_wild == NULL) {
1906#if INET6
1907 if (local_wild_mapped != NULL) {
1908 if ((found = (local_wild_mapped->inp_socket != NULL))) {
316670eb 1909 *uid = kauth_cred_getuid(
39236c6e 1910 local_wild_mapped->inp_socket->so_cred);
316670eb 1911 *gid = kauth_cred_getgid(
39236c6e 1912 local_wild_mapped->inp_socket->so_cred);
6d2010ae 1913 }
39236c6e 1914 lck_rw_done(pcbinfo->ipi_lock);
6d2010ae
A
1915 return (found);
1916 }
39236c6e
A
1917#endif /* INET6 */
1918 lck_rw_done(pcbinfo->ipi_lock);
1919 return (0);
6d2010ae 1920 }
39236c6e
A
1921 if ((found = (local_wild->inp_socket != NULL))) {
1922 *uid = kauth_cred_getuid(
1923 local_wild->inp_socket->so_cred);
1924 *gid = kauth_cred_getgid(
1925 local_wild->inp_socket->so_cred);
1926 }
1927 lck_rw_done(pcbinfo->ipi_lock);
1928 return (found);
6d2010ae
A
1929}
1930
1c79356b
A
1931/*
1932 * Lookup PCB in hash list.
1933 */
1934struct inpcb *
39236c6e
A
1935in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
1936 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
1937 struct ifnet *ifp)
1c79356b
A
1938{
1939 struct inpcbhead *head;
2d21ac55 1940 struct inpcb *inp;
1c79356b 1941 u_short fport = fport_arg, lport = lport_arg;
39236c6e
A
1942 struct inpcb *local_wild = NULL;
1943#if INET6
1944 struct inpcb *local_wild_mapped = NULL;
1945#endif /* INET6 */
1c79356b
A
1946
1947 /*
1948 * We may have found the pcb in the last lookup - check this first.
1949 */
1950
39236c6e 1951 lck_rw_lock_shared(pcbinfo->ipi_lock);
1c79356b
A
1952
1953 /*
1954 * First look for an exact match.
1955 */
39236c6e
A
1956 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
1957 pcbinfo->ipi_hashmask)];
9bccf70c
A
1958 LIST_FOREACH(inp, head, inp_hash) {
1959#if INET6
39236c6e 1960 if (!(inp->inp_vflag & INP_IPV4))
1c79356b 1961 continue;
39236c6e 1962#endif /* INET6 */
fe8ab488 1963 if (inp_restricted_recv(inp, ifp))
316670eb
A
1964 continue;
1965
1c79356b
A
1966 if (inp->inp_faddr.s_addr == faddr.s_addr &&
1967 inp->inp_laddr.s_addr == laddr.s_addr &&
1968 inp->inp_fport == fport &&
1969 inp->inp_lport == lport) {
1970 /*
1971 * Found.
1972 */
39236c6e
A
1973 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
1974 WNT_STOPUSING) {
1975 lck_rw_done(pcbinfo->ipi_lock);
91447636 1976 return (inp);
39236c6e
A
1977 } else {
1978 /* it's there but dead, say it isn't found */
1979 lck_rw_done(pcbinfo->ipi_lock);
316670eb 1980 return (NULL);
91447636 1981 }
1c79356b
A
1982 }
1983 }
1c79356b 1984
39236c6e
A
1985 if (!wildcard) {
1986 /*
1987 * Not found.
1988 */
1989 lck_rw_done(pcbinfo->ipi_lock);
1990 return (NULL);
1991 }
1992
1993 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
1994 pcbinfo->ipi_hashmask)];
1995 LIST_FOREACH(inp, head, inp_hash) {
9bccf70c 1996#if INET6
39236c6e
A
1997 if (!(inp->inp_vflag & INP_IPV4))
1998 continue;
1999#endif /* INET6 */
fe8ab488 2000 if (inp_restricted_recv(inp, ifp))
39236c6e
A
2001 continue;
2002
2003 if (inp->inp_faddr.s_addr == INADDR_ANY &&
2004 inp->inp_lport == lport) {
2005 if (inp->inp_laddr.s_addr == laddr.s_addr) {
2006 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
2007 WNT_STOPUSING) {
2008 lck_rw_done(pcbinfo->ipi_lock);
2009 return (inp);
2010 } else {
2011 /* it's dead; say it isn't found */
2012 lck_rw_done(pcbinfo->ipi_lock);
2013 return (NULL);
91447636 2014 }
39236c6e 2015 } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
2d21ac55 2016#if INET6
39236c6e
A
2017 if (SOCK_CHECK_DOM(inp->inp_socket, PF_INET6))
2018 local_wild_mapped = inp;
2019 else
2d21ac55 2020#endif /* INET6 */
1c79356b 2021 local_wild = inp;
1c79356b
A
2022 }
2023 }
39236c6e
A
2024 }
2025 if (local_wild == NULL) {
2d21ac55 2026#if INET6
39236c6e
A
2027 if (local_wild_mapped != NULL) {
2028 if (in_pcb_checkstate(local_wild_mapped,
2029 WNT_ACQUIRE, 0) != WNT_STOPUSING) {
2030 lck_rw_done(pcbinfo->ipi_lock);
2031 return (local_wild_mapped);
2032 } else {
2033 /* it's dead; say it isn't found */
2034 lck_rw_done(pcbinfo->ipi_lock);
2035 return (NULL);
91447636 2036 }
91447636 2037 }
39236c6e
A
2038#endif /* INET6 */
2039 lck_rw_done(pcbinfo->ipi_lock);
2040 return (NULL);
2041 }
2042 if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
2043 lck_rw_done(pcbinfo->ipi_lock);
2044 return (local_wild);
1c79356b 2045 }
1c79356b 2046 /*
39236c6e 2047 * It's either not found or is already dead.
1c79356b 2048 */
39236c6e 2049 lck_rw_done(pcbinfo->ipi_lock);
1c79356b
A
2050 return (NULL);
2051}
2052
2053/*
4bd07ac2
A
2054 * @brief Insert PCB onto various hash lists.
2055 *
2056 * @param inp Pointer to internet protocol control block
2057 * @param locked Implies if ipi_lock (protecting pcb list)
2058 * is already locked or not.
2059 *
2060 * @return int error on failure and 0 on success
1c79356b
A
2061 */
2062int
2d21ac55 2063in_pcbinshash(struct inpcb *inp, int locked)
1c79356b
A
2064{
2065 struct inpcbhead *pcbhash;
2066 struct inpcbporthead *pcbporthash;
2067 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
2068 struct inpcbport *phd;
2069 u_int32_t hashkey_faddr;
2070
39236c6e
A
2071 if (!locked) {
2072 if (!lck_rw_try_lock_exclusive(pcbinfo->ipi_lock)) {
2073 /*
2074 * Lock inversion issue, mostly with udp
2075 * multicast packets
2076 */
2077 socket_unlock(inp->inp_socket, 0);
2078 lck_rw_lock_exclusive(pcbinfo->ipi_lock);
2079 socket_lock(inp->inp_socket, 0);
39236c6e
A
2080 }
2081 }
b0d623f7 2082
4bd07ac2
A
2083 /*
2084 * This routine or its caller may have given up
2085 * socket's protocol lock briefly.
2086 * During that time the socket may have been dropped.
2087 * Safe-guarding against that.
2088 */
2089 if (inp->inp_state == INPCB_STATE_DEAD) {
2090 if (!locked) {
2091 lck_rw_done(pcbinfo->ipi_lock);
2092 }
2093 return (ECONNABORTED);
2094 }
2095
2096
1c79356b
A
2097#if INET6
2098 if (inp->inp_vflag & INP_IPV6)
2099 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
2100 else
2101#endif /* INET6 */
39236c6e 2102 hashkey_faddr = inp->inp_faddr.s_addr;
1c79356b 2103
39236c6e
A
2104 inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
2105 inp->inp_fport, pcbinfo->ipi_hashmask);
91447636 2106
39236c6e 2107 pcbhash = &pcbinfo->ipi_hashbase[inp->inp_hash_element];
1c79356b 2108
39236c6e
A
2109 pcbporthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(inp->inp_lport,
2110 pcbinfo->ipi_porthashmask)];
1c79356b
A
2111
2112 /*
2113 * Go through port list and look for a head for this lport.
2114 */
9bccf70c 2115 LIST_FOREACH(phd, pcbporthash, phd_hash) {
1c79356b
A
2116 if (phd->phd_port == inp->inp_lport)
2117 break;
2118 }
316670eb 2119
1c79356b
A
2120 /*
2121 * If none exists, malloc one and tack it on.
2122 */
2123 if (phd == NULL) {
39236c6e
A
2124 MALLOC(phd, struct inpcbport *, sizeof (struct inpcbport),
2125 M_PCB, M_WAITOK);
1c79356b 2126 if (phd == NULL) {
91447636 2127 if (!locked)
39236c6e 2128 lck_rw_done(pcbinfo->ipi_lock);
1c79356b
A
2129 return (ENOBUFS); /* XXX */
2130 }
2131 phd->phd_port = inp->inp_lport;
2132 LIST_INIT(&phd->phd_pcblist);
2133 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
2134 }
fe8ab488
A
2135
2136 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
1c79356b
A
2137 inp->inp_phd = phd;
2138 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
2139 LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
fe8ab488
A
2140 inp->inp_flags2 |= INP2_INHASHLIST;
2141
91447636 2142 if (!locked)
39236c6e 2143 lck_rw_done(pcbinfo->ipi_lock);
fe8ab488
A
2144
2145#if NECP
2146 // This call catches the original setting of the local address
2147 inp_update_necp_policy(inp, NULL, NULL, 0);
2148#endif /* NECP */
2149
1c79356b
A
2150 return (0);
2151}
2152
2153/*
2154 * Move PCB to the proper hash bucket when { faddr, fport } have been
2155 * changed. NOTE: This does not handle the case of the lport changing (the
2156 * hashed port list would have to be updated as well), so the lport must
2157 * not change after in_pcbinshash() has been called.
2158 */
2159void
2d21ac55 2160in_pcbrehash(struct inpcb *inp)
1c79356b
A
2161{
2162 struct inpcbhead *head;
2163 u_int32_t hashkey_faddr;
2164
2165#if INET6
2166 if (inp->inp_vflag & INP_IPV6)
2167 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
2168 else
2169#endif /* INET6 */
39236c6e
A
2170 hashkey_faddr = inp->inp_faddr.s_addr;
2171
2172 inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
2173 inp->inp_fport, inp->inp_pcbinfo->ipi_hashmask);
2174 head = &inp->inp_pcbinfo->ipi_hashbase[inp->inp_hash_element];
1c79356b 2175
fe8ab488
A
2176 if (inp->inp_flags2 & INP2_INHASHLIST) {
2177 LIST_REMOVE(inp, inp_hash);
2178 inp->inp_flags2 &= ~INP2_INHASHLIST;
2179 }
2180
2181 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
1c79356b 2182 LIST_INSERT_HEAD(head, inp, inp_hash);
fe8ab488
A
2183 inp->inp_flags2 |= INP2_INHASHLIST;
2184
2185#if NECP
2186 // This call catches updates to the remote addresses
2187 inp_update_necp_policy(inp, NULL, NULL, 0);
2188#endif /* NECP */
1c79356b
A
2189}
2190
2191/*
2192 * Remove PCB from various lists.
316670eb 2193 * Must be called pcbinfo lock is held in exclusive mode.
1c79356b
A
2194 */
2195void
2d21ac55 2196in_pcbremlists(struct inpcb *inp)
1c79356b
A
2197{
2198 inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
1c79356b 2199
fe8ab488
A
2200 /*
2201 * Check if it's in hashlist -- an inp is placed in hashlist when
2202 * it's local port gets assigned. So it should also be present
2203 * in the port list.
2204 */
2205 if (inp->inp_flags2 & INP2_INHASHLIST) {
1c79356b
A
2206 struct inpcbport *phd = inp->inp_phd;
2207
fe8ab488
A
2208 VERIFY(phd != NULL && inp->inp_lport > 0);
2209
1c79356b 2210 LIST_REMOVE(inp, inp_hash);
fe8ab488
A
2211 inp->inp_hash.le_next = NULL;
2212 inp->inp_hash.le_prev = NULL;
2213
1c79356b 2214 LIST_REMOVE(inp, inp_portlist);
fe8ab488
A
2215 inp->inp_portlist.le_next = NULL;
2216 inp->inp_portlist.le_prev = NULL;
2217 if (LIST_EMPTY(&phd->phd_pcblist)) {
1c79356b
A
2218 LIST_REMOVE(phd, phd_hash);
2219 FREE(phd, M_PCB);
2220 }
fe8ab488
A
2221 inp->inp_phd = NULL;
2222 inp->inp_flags2 &= ~INP2_INHASHLIST;
1c79356b 2223 }
fe8ab488 2224 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
39236c6e
A
2225
2226 if (inp->inp_flags2 & INP2_TIMEWAIT) {
2227 /* Remove from time-wait queue */
2228 tcp_remove_from_time_wait(inp);
2229 inp->inp_flags2 &= ~INP2_TIMEWAIT;
2230 VERIFY(inp->inp_pcbinfo->ipi_twcount != 0);
2231 inp->inp_pcbinfo->ipi_twcount--;
2232 } else {
2233 /* Remove from global inp list if it is not time-wait */
2234 LIST_REMOVE(inp, inp_list);
2235 }
316670eb 2236
bd504ef0 2237 if (inp->inp_flags2 & INP2_IN_FCTREE) {
39236c6e 2238 inp_fc_getinp(inp->inp_flowhash, (INPFC_SOLOCKED|INPFC_REMOVE));
bd504ef0
A
2239 VERIFY(!(inp->inp_flags2 & INP2_IN_FCTREE));
2240 }
39236c6e 2241
1c79356b
A
2242 inp->inp_pcbinfo->ipi_count--;
2243}
2244
39236c6e
A
2245/*
2246 * Mechanism used to defer the memory release of PCBs
2247 * The pcb list will contain the pcb until the reaper can clean it up if
2248 * the following conditions are met:
2249 * 1) state "DEAD",
2250 * 2) wantcnt is STOPUSING
2251 * 3) usecount is 0
91447636 2252 * This function will be called to either mark the pcb as
39236c6e 2253 */
91447636
A
2254int
2255in_pcb_checkstate(struct inpcb *pcb, int mode, int locked)
91447636 2256{
39236c6e 2257 volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt;
2d21ac55
A
2258 UInt32 origwant;
2259 UInt32 newwant;
91447636
A
2260
2261 switch (mode) {
39236c6e
A
2262 case WNT_STOPUSING:
2263 /*
2264 * Try to mark the pcb as ready for recycling. CAS with
2265 * STOPUSING, if success we're good, if it's in use, will
2266 * be marked later
2267 */
2268 if (locked == 0)
2269 socket_lock(pcb->inp_socket, 1);
2270 pcb->inp_state = INPCB_STATE_DEAD;
91447636 2271
39236c6e
A
2272stopusing:
2273 if (pcb->inp_socket->so_usecount < 0) {
2274 panic("%s: pcb=%p so=%p usecount is negative\n",
2275 __func__, pcb, pcb->inp_socket);
2276 /* NOTREACHED */
2277 }
2278 if (locked == 0)
2279 socket_unlock(pcb->inp_socket, 1);
91447636 2280
39236c6e 2281 inpcb_gc_sched(pcb->inp_pcbinfo, INPCB_TIMER_FAST);
6d2010ae 2282
39236c6e
A
2283 origwant = *wantcnt;
2284 if ((UInt16) origwant == 0xffff) /* should stop using */
2285 return (WNT_STOPUSING);
2286 newwant = 0xffff;
2287 if ((UInt16) origwant == 0) {
2288 /* try to mark it as unsuable now */
2289 OSCompareAndSwap(origwant, newwant, wantcnt);
2290 }
2291 return (WNT_STOPUSING);
2292 break;
91447636 2293
39236c6e
A
2294 case WNT_ACQUIRE:
2295 /*
2296 * Try to increase reference to pcb. If WNT_STOPUSING
2297 * should bail out. If socket state DEAD, try to set count
2298 * to STOPUSING, return failed otherwise increase cnt.
2299 */
2300 do {
91447636 2301 origwant = *wantcnt;
39236c6e
A
2302 if ((UInt16) origwant == 0xffff) {
2303 /* should stop using */
91447636 2304 return (WNT_STOPUSING);
91447636 2305 }
39236c6e
A
2306 newwant = origwant + 1;
2307 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
2308 return (WNT_ACQUIRE);
2309 break;
91447636 2310
39236c6e
A
2311 case WNT_RELEASE:
2312 /*
2313 * Release reference. If result is null and pcb state
2314 * is DEAD, set wanted bit to STOPUSING
2315 */
2316 if (locked == 0)
2317 socket_lock(pcb->inp_socket, 1);
91447636 2318
39236c6e
A
2319 do {
2320 origwant = *wantcnt;
2321 if ((UInt16) origwant == 0x0) {
2322 panic("%s: pcb=%p release with zero count",
2323 __func__, pcb);
2324 /* NOTREACHED */
2325 }
2326 if ((UInt16) origwant == 0xffff) {
2327 /* should stop using */
2328 if (locked == 0)
2329 socket_unlock(pcb->inp_socket, 1);
2330 return (WNT_STOPUSING);
2331 }
2332 newwant = origwant - 1;
2333 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
2334
2335 if (pcb->inp_state == INPCB_STATE_DEAD)
2336 goto stopusing;
2337 if (pcb->inp_socket->so_usecount < 0) {
2338 panic("%s: RELEASE pcb=%p so=%p usecount is negative\n",
2339 __func__, pcb, pcb->inp_socket);
2340 /* NOTREACHED */
2341 }
91447636 2342
39236c6e
A
2343 if (locked == 0)
2344 socket_unlock(pcb->inp_socket, 1);
2345 return (WNT_RELEASE);
2346 break;
91447636 2347
39236c6e
A
2348 default:
2349 panic("%s: so=%p not a valid state =%x\n", __func__,
2350 pcb->inp_socket, mode);
2351 /* NOTREACHED */
91447636
A
2352 }
2353
2354 /* NOTREACHED */
2355 return (mode);
2356}
2357
2358/*
2359 * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat.
2360 * The inpcb_compat data structure is passed to user space and must
b0d623f7 2361 * not change. We intentionally avoid copying pointers.
91447636
A
2362 */
2363void
39236c6e 2364inpcb_to_compat(struct inpcb *inp, struct inpcb_compat *inp_compat)
91447636 2365{
39236c6e 2366 bzero(inp_compat, sizeof (*inp_compat));
91447636
A
2367 inp_compat->inp_fport = inp->inp_fport;
2368 inp_compat->inp_lport = inp->inp_lport;
316670eb 2369 inp_compat->nat_owner = 0;
39236c6e 2370 inp_compat->nat_cookie = 0;
91447636
A
2371 inp_compat->inp_gencnt = inp->inp_gencnt;
2372 inp_compat->inp_flags = inp->inp_flags;
2373 inp_compat->inp_flow = inp->inp_flow;
2374 inp_compat->inp_vflag = inp->inp_vflag;
2375 inp_compat->inp_ip_ttl = inp->inp_ip_ttl;
2376 inp_compat->inp_ip_p = inp->inp_ip_p;
39236c6e
A
2377 inp_compat->inp_dependfaddr.inp6_foreign =
2378 inp->inp_dependfaddr.inp6_foreign;
2379 inp_compat->inp_dependladdr.inp6_local =
2380 inp->inp_dependladdr.inp6_local;
91447636 2381 inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
39236c6e 2382 inp_compat->inp_depend6.inp6_hlim = 0;
91447636 2383 inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
39236c6e 2384 inp_compat->inp_depend6.inp6_ifindex = 0;
91447636
A
2385 inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
2386}
9bccf70c 2387
b0d623f7 2388void
39236c6e 2389inpcb_to_xinpcb64(struct inpcb *inp, struct xinpcb64 *xinp)
b0d623f7 2390{
6d2010ae
A
2391 xinp->inp_fport = inp->inp_fport;
2392 xinp->inp_lport = inp->inp_lport;
2393 xinp->inp_gencnt = inp->inp_gencnt;
2394 xinp->inp_flags = inp->inp_flags;
2395 xinp->inp_flow = inp->inp_flow;
2396 xinp->inp_vflag = inp->inp_vflag;
2397 xinp->inp_ip_ttl = inp->inp_ip_ttl;
2398 xinp->inp_ip_p = inp->inp_ip_p;
2399 xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign;
2400 xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local;
2401 xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
39236c6e 2402 xinp->inp_depend6.inp6_hlim = 0;
6d2010ae 2403 xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
39236c6e 2404 xinp->inp_depend6.inp6_ifindex = 0;
6d2010ae 2405 xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
b0d623f7
A
2406}
2407
b0d623f7
A
2408/*
2409 * The following routines implement this scheme:
2410 *
2411 * Callers of ip_output() that intend to cache the route in the inpcb pass
2412 * a local copy of the struct route to ip_output(). Using a local copy of
2413 * the cached route significantly simplifies things as IP no longer has to
2414 * worry about having exclusive access to the passed in struct route, since
2415 * it's defined in the caller's stack; in essence, this allows for a lock-
2416 * less operation when updating the struct route at the IP level and below,
2417 * whenever necessary. The scheme works as follows:
2418 *
2419 * Prior to dropping the socket's lock and calling ip_output(), the caller
2420 * copies the struct route from the inpcb into its stack, and adds a reference
2421 * to the cached route entry, if there was any. The socket's lock is then
2422 * dropped and ip_output() is called with a pointer to the copy of struct
2423 * route defined on the stack (not to the one in the inpcb.)
2424 *
2425 * Upon returning from ip_output(), the caller then acquires the socket's
2426 * lock and synchronizes the cache; if there is no route cached in the inpcb,
2427 * it copies the local copy of struct route (which may or may not contain any
2428 * route) back into the cache; otherwise, if the inpcb has a route cached in
2429 * it, the one in the local copy will be freed, if there's any. Trashing the
2430 * cached route in the inpcb can be avoided because ip_output() is single-
2431 * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized
2432 * by the socket/transport layer.)
2433 */
2434void
2435inp_route_copyout(struct inpcb *inp, struct route *dst)
2436{
2437 struct route *src = &inp->inp_route;
2438
6d2010ae 2439 lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
b0d623f7 2440
0b4c1975 2441 /*
39236c6e 2442 * If the route in the PCB is stale or not for IPv4, blow it away;
0b4c1975
A
2443 * this is possible in the case of IPv4-mapped address case.
2444 */
39236c6e
A
2445 if (ROUTE_UNUSABLE(src) || rt_key(src->ro_rt)->sa_family != AF_INET)
2446 ROUTE_RELEASE(src);
316670eb 2447
39236c6e 2448 route_copyout(dst, src, sizeof (*dst));
b0d623f7
A
2449}
2450
2451void
2452inp_route_copyin(struct inpcb *inp, struct route *src)
2453{
2454 struct route *dst = &inp->inp_route;
2455
6d2010ae 2456 lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
b0d623f7
A
2457
2458 /* Minor sanity check */
2459 if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET)
2460 panic("%s: wrong or corrupted route: %p", __func__, src);
2461
39236c6e 2462 route_copyin(src, dst, sizeof (*src));
6d2010ae
A
2463}
2464
2465/*
2466 * Handler for setting IP_FORCE_OUT_IFP/IP_BOUND_IF/IPV6_BOUND_IF socket option.
2467 */
316670eb 2468int
39236c6e 2469inp_bindif(struct inpcb *inp, unsigned int ifscope, struct ifnet **pifp)
6d2010ae 2470{
316670eb
A
2471 struct ifnet *ifp = NULL;
2472
2473 ifnet_head_lock_shared();
2474 if ((ifscope > (unsigned)if_index) || (ifscope != IFSCOPE_NONE &&
2475 (ifp = ifindex2ifnet[ifscope]) == NULL)) {
2476 ifnet_head_done();
2477 return (ENXIO);
2478 }
2479 ifnet_head_done();
2480
2481 VERIFY(ifp != NULL || ifscope == IFSCOPE_NONE);
2482
6d2010ae
A
2483 /*
2484 * A zero interface scope value indicates an "unbind".
2485 * Otherwise, take in whatever value the app desires;
2486 * the app may already know the scope (or force itself
2487 * to such a scope) ahead of time before the interface
2488 * gets attached. It doesn't matter either way; any
2489 * route lookup from this point on will require an
2490 * exact match for the embedded interface scope.
2491 */
316670eb
A
2492 inp->inp_boundifp = ifp;
2493 if (inp->inp_boundifp == NULL)
6d2010ae
A
2494 inp->inp_flags &= ~INP_BOUND_IF;
2495 else
2496 inp->inp_flags |= INP_BOUND_IF;
2497
2498 /* Blow away any cached route in the PCB */
39236c6e
A
2499 ROUTE_RELEASE(&inp->inp_route);
2500
2501 if (pifp != NULL)
2502 *pifp = ifp;
316670eb
A
2503
2504 return (0);
6d2010ae
A
2505}
2506
2507/*
39236c6e
A
2508 * Handler for setting IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
2509 * as well as for setting PROC_UUID_NO_CELLULAR policy.
6d2010ae 2510 */
39236c6e
A
2511void
2512inp_set_nocellular(struct inpcb *inp)
6d2010ae 2513{
39236c6e 2514 inp->inp_flags |= INP_NO_IFT_CELLULAR;
6d2010ae
A
2515
2516 /* Blow away any cached route in the PCB */
39236c6e
A
2517 ROUTE_RELEASE(&inp->inp_route);
2518}
2519
2520/*
2521 * Handler for clearing IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
2522 * as well as for clearing PROC_UUID_NO_CELLULAR policy.
2523 */
2524void
2525inp_clear_nocellular(struct inpcb *inp)
2526{
2527 struct socket *so = inp->inp_socket;
2528
2529 /*
2530 * SO_RESTRICT_DENY_CELLULAR socket restriction issued on the socket
2531 * has a higher precendence than INP_NO_IFT_CELLULAR. Clear the flag
2532 * if and only if the socket is unrestricted.
2533 */
2534 if (so != NULL && !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) {
2535 inp->inp_flags &= ~INP_NO_IFT_CELLULAR;
2536
2537 /* Blow away any cached route in the PCB */
2538 ROUTE_RELEASE(&inp->inp_route);
6d2010ae 2539 }
39236c6e 2540}
6d2010ae 2541
fe8ab488
A
2542void
2543inp_set_noexpensive(struct inpcb *inp)
2544{
2545 inp->inp_flags2 |= INP2_NO_IFF_EXPENSIVE;
2546
2547 /* Blow away any cached route in the PCB */
2548 ROUTE_RELEASE(&inp->inp_route);
2549}
2550
2551void
2552inp_set_awdl_unrestricted(struct inpcb *inp)
2553{
2554 inp->inp_flags2 |= INP2_AWDL_UNRESTRICTED;
2555
2556 /* Blow away any cached route in the PCB */
2557 ROUTE_RELEASE(&inp->inp_route);
2558}
2559
2560boolean_t
2561inp_get_awdl_unrestricted(struct inpcb *inp)
2562{
2563 return (inp->inp_flags2 & INP2_AWDL_UNRESTRICTED) ? TRUE : FALSE;
2564}
2565
2566void
2567inp_clear_awdl_unrestricted(struct inpcb *inp)
2568{
2569 inp->inp_flags2 &= ~INP2_AWDL_UNRESTRICTED;
2570
2571 /* Blow away any cached route in the PCB */
2572 ROUTE_RELEASE(&inp->inp_route);
2573}
2574
2575#if NECP
39236c6e 2576/*
fe8ab488 2577 * Called when PROC_UUID_NECP_APP_POLICY is set.
39236c6e
A
2578 */
2579void
fe8ab488 2580inp_set_want_app_policy(struct inpcb *inp)
39236c6e 2581{
fe8ab488 2582 inp->inp_flags2 |= INP2_WANT_APP_POLICY;
39236c6e
A
2583}
2584
2585/*
fe8ab488 2586 * Called when PROC_UUID_NECP_APP_POLICY is cleared.
39236c6e
A
2587 */
2588void
fe8ab488 2589inp_clear_want_app_policy(struct inpcb *inp)
39236c6e 2590{
fe8ab488 2591 inp->inp_flags2 &= ~INP2_WANT_APP_POLICY;
b0d623f7 2592}
fe8ab488 2593#endif /* NECP */
316670eb
A
2594
2595/*
2596 * Calculate flow hash for an inp, used by an interface to identify a
2597 * flow. When an interface provides flow control advisory, this flow
2598 * hash is used as an identifier.
2599 */
2600u_int32_t
2601inp_calc_flowhash(struct inpcb *inp)
2602{
2603 struct inp_flowhash_key fh __attribute__((aligned(8)));
2604 u_int32_t flowhash = 0;
bd504ef0 2605 struct inpcb *tmp_inp = NULL;
316670eb
A
2606
2607 if (inp_hash_seed == 0)
2608 inp_hash_seed = RandomULong();
2609
2610 bzero(&fh, sizeof (fh));
2611
2612 bcopy(&inp->inp_dependladdr, &fh.infh_laddr, sizeof (fh.infh_laddr));
2613 bcopy(&inp->inp_dependfaddr, &fh.infh_faddr, sizeof (fh.infh_faddr));
2614
2615 fh.infh_lport = inp->inp_lport;
2616 fh.infh_fport = inp->inp_fport;
2617 fh.infh_af = (inp->inp_vflag & INP_IPV6) ? AF_INET6 : AF_INET;
2618 fh.infh_proto = inp->inp_ip_p;
2619 fh.infh_rand1 = RandomULong();
2620 fh.infh_rand2 = RandomULong();
2621
2622try_again:
2623 flowhash = net_flowhash(&fh, sizeof (fh), inp_hash_seed);
2624 if (flowhash == 0) {
2625 /* try to get a non-zero flowhash */
2626 inp_hash_seed = RandomULong();
2627 goto try_again;
2628 }
2629
bd504ef0 2630 inp->inp_flowhash = flowhash;
316670eb 2631
bd504ef0 2632 /* Insert the inp into inp_fc_tree */
39236c6e 2633 lck_mtx_lock_spin(&inp_fc_lck);
bd504ef0
A
2634 tmp_inp = RB_FIND(inp_fc_tree, &inp_fc_tree, inp);
2635 if (tmp_inp != NULL) {
316670eb 2636 /*
bd504ef0
A
2637 * There is a different inp with the same flowhash.
2638 * There can be a collision on flow hash but the
39236c6e 2639 * probability is low. Let's recompute the
bd504ef0 2640 * flowhash.
316670eb
A
2641 */
2642 lck_mtx_unlock(&inp_fc_lck);
bd504ef0
A
2643 /* recompute hash seed */
2644 inp_hash_seed = RandomULong();
2645 goto try_again;
316670eb 2646 }
39236c6e 2647
bd504ef0
A
2648 RB_INSERT(inp_fc_tree, &inp_fc_tree, inp);
2649 inp->inp_flags2 |= INP2_IN_FCTREE;
316670eb 2650 lck_mtx_unlock(&inp_fc_lck);
bd504ef0 2651
39236c6e
A
2652 return (flowhash);
2653}
2654
2655void
2656inp_flowadv(uint32_t flowhash)
2657{
2658 struct inpcb *inp;
2659
2660 inp = inp_fc_getinp(flowhash, 0);
2661
2662 if (inp == NULL)
2663 return;
2664 inp_fc_feedback(inp);
316670eb
A
2665}
2666
bd504ef0
A
2667/*
2668 * Function to compare inp_fc_entries in inp flow control tree
2669 */
2670static inline int
2671infc_cmp(const struct inpcb *inp1, const struct inpcb *inp2)
316670eb 2672{
bd504ef0 2673 return (memcmp(&(inp1->inp_flowhash), &(inp2->inp_flowhash),
39236c6e 2674 sizeof(inp1->inp_flowhash)));
bd504ef0 2675}
316670eb 2676
39236c6e 2677static struct inpcb *
bd504ef0
A
2678inp_fc_getinp(u_int32_t flowhash, u_int32_t flags)
2679{
2680 struct inpcb *inp = NULL;
2681 int locked = (flags & INPFC_SOLOCKED) ? 1 : 0;
316670eb
A
2682
2683 lck_mtx_lock_spin(&inp_fc_lck);
bd504ef0
A
2684 key_inp.inp_flowhash = flowhash;
2685 inp = RB_FIND(inp_fc_tree, &inp_fc_tree, &key_inp);
2686 if (inp == NULL) {
316670eb
A
2687 /* inp is not present, return */
2688 lck_mtx_unlock(&inp_fc_lck);
2689 return (NULL);
2690 }
2691
bd504ef0
A
2692 if (flags & INPFC_REMOVE) {
2693 RB_REMOVE(inp_fc_tree, &inp_fc_tree, inp);
2694 lck_mtx_unlock(&inp_fc_lck);
316670eb 2695
bd504ef0
A
2696 bzero(&(inp->infc_link), sizeof (inp->infc_link));
2697 inp->inp_flags2 &= ~INP2_IN_FCTREE;
2698 return (NULL);
316670eb 2699 }
39236c6e 2700
bd504ef0
A
2701 if (in_pcb_checkstate(inp, WNT_ACQUIRE, locked) == WNT_STOPUSING)
2702 inp = NULL;
316670eb
A
2703 lck_mtx_unlock(&inp_fc_lck);
2704
bd504ef0 2705 return (inp);
316670eb
A
2706}
2707
39236c6e 2708static void
316670eb
A
2709inp_fc_feedback(struct inpcb *inp)
2710{
2711 struct socket *so = inp->inp_socket;
2712
2713 /* we already hold a want_cnt on this inp, socket can't be null */
39236c6e 2714 VERIFY(so != NULL);
316670eb
A
2715 socket_lock(so, 1);
2716
2717 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
2718 socket_unlock(so, 1);
2719 return;
2720 }
2721
fe8ab488
A
2722 if (inp->inp_sndinprog_cnt > 0)
2723 inp->inp_flags |= INP_FC_FEEDBACK;
2724
316670eb
A
2725 /*
2726 * Return if the connection is not in flow-controlled state.
2727 * This can happen if the connection experienced
2728 * loss while it was in flow controlled state
2729 */
2730 if (!INP_WAIT_FOR_IF_FEEDBACK(inp)) {
2731 socket_unlock(so, 1);
2732 return;
2733 }
2734 inp_reset_fc_state(inp);
2735
39236c6e 2736 if (SOCK_TYPE(so) == SOCK_STREAM)
316670eb
A
2737 inp_fc_unthrottle_tcp(inp);
2738
2739 socket_unlock(so, 1);
2740}
2741
2742void
2743inp_reset_fc_state(struct inpcb *inp)
2744{
2745 struct socket *so = inp->inp_socket;
2746 int suspended = (INP_IS_FLOW_SUSPENDED(inp)) ? 1 : 0;
2747 int needwakeup = (INP_WAIT_FOR_IF_FEEDBACK(inp)) ? 1 : 0;
2748
2749 inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
2750
2751 if (suspended) {
2752 so->so_flags &= ~(SOF_SUSPENDED);
2753 soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_RESUME));
2754 }
2755
316670eb
A
2756 /* Give a write wakeup to unblock the socket */
2757 if (needwakeup)
2758 sowwakeup(so);
2759}
2760
2761int
2762inp_set_fc_state(struct inpcb *inp, int advcode)
2763{
bd504ef0 2764 struct inpcb *tmp_inp = NULL;
316670eb 2765 /*
39236c6e 2766 * If there was a feedback from the interface when
316670eb
A
2767 * send operation was in progress, we should ignore
2768 * this flow advisory to avoid a race between setting
2769 * flow controlled state and receiving feedback from
2770 * the interface
2771 */
2772 if (inp->inp_flags & INP_FC_FEEDBACK)
39236c6e 2773 return (0);
316670eb
A
2774
2775 inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
39236c6e
A
2776 if ((tmp_inp = inp_fc_getinp(inp->inp_flowhash,
2777 INPFC_SOLOCKED)) != NULL) {
2778 if (in_pcb_checkstate(tmp_inp, WNT_RELEASE, 1) == WNT_STOPUSING)
bd504ef0
A
2779 return (0);
2780 VERIFY(tmp_inp == inp);
316670eb
A
2781 switch (advcode) {
2782 case FADV_FLOW_CONTROLLED:
2783 inp->inp_flags |= INP_FLOW_CONTROLLED;
2784 break;
2785 case FADV_SUSPENDED:
2786 inp->inp_flags |= INP_FLOW_SUSPENDED;
2787 soevent(inp->inp_socket,
2788 (SO_FILT_HINT_LOCKED | SO_FILT_HINT_SUSPEND));
2789
2790 /* Record the fact that suspend event was sent */
2791 inp->inp_socket->so_flags |= SOF_SUSPENDED;
2792 break;
2793 }
bd504ef0 2794 return (1);
316670eb 2795 }
39236c6e 2796 return (0);
316670eb
A
2797}
2798
2799/*
2800 * Handler for SO_FLUSH socket option.
2801 */
2802int
2803inp_flush(struct inpcb *inp, int optval)
2804{
2805 u_int32_t flowhash = inp->inp_flowhash;
39236c6e 2806 struct ifnet *rtifp, *oifp;
316670eb
A
2807
2808 /* Either all classes or one of the valid ones */
2809 if (optval != SO_TC_ALL && !SO_VALID_TC(optval))
2810 return (EINVAL);
2811
2812 /* We need a flow hash for identification */
2813 if (flowhash == 0)
2814 return (0);
2815
39236c6e
A
2816 /* Grab the interfaces from the route and pcb */
2817 rtifp = ((inp->inp_route.ro_rt != NULL) ?
2818 inp->inp_route.ro_rt->rt_ifp : NULL);
2819 oifp = inp->inp_last_outifp;
2820
2821 if (rtifp != NULL)
2822 if_qflush_sc(rtifp, so_tc2msc(optval), flowhash, NULL, NULL, 0);
2823 if (oifp != NULL && oifp != rtifp)
2824 if_qflush_sc(oifp, so_tc2msc(optval), flowhash, NULL, NULL, 0);
316670eb
A
2825
2826 return (0);
2827}
2828
2829/*
2830 * Clear the INP_INADDR_ANY flag (special case for PPP only)
2831 */
39236c6e
A
2832void
2833inp_clear_INP_INADDR_ANY(struct socket *so)
316670eb
A
2834{
2835 struct inpcb *inp = NULL;
2836
2837 socket_lock(so, 1);
2838 inp = sotoinpcb(so);
2839 if (inp) {
2840 inp->inp_flags &= ~INP_INADDR_ANY;
2841 }
2842 socket_unlock(so, 1);
2843}
2844
39236c6e
A
2845void
2846inp_get_soprocinfo(struct inpcb *inp, struct so_procinfo *soprocinfo)
2847{
2848 struct socket *so = inp->inp_socket;
2849
2850 soprocinfo->spi_pid = so->last_pid;
fe8ab488
A
2851 if (so->last_pid != 0)
2852 uuid_copy(soprocinfo->spi_uuid, so->last_uuid);
39236c6e
A
2853 /*
2854 * When not delegated, the effective pid is the same as the real pid
2855 */
fe8ab488 2856 if (so->so_flags & SOF_DELEGATED) {
3e170ce0 2857 soprocinfo->spi_delegated = 1;
39236c6e 2858 soprocinfo->spi_epid = so->e_pid;
3e170ce0 2859 uuid_copy(soprocinfo->spi_euuid, so->e_uuid);
fe8ab488 2860 } else {
3e170ce0 2861 soprocinfo->spi_delegated = 0;
39236c6e 2862 soprocinfo->spi_epid = so->last_pid;
fe8ab488 2863 }
39236c6e
A
2864}
2865
2866int
2867inp_findinpcb_procinfo(struct inpcbinfo *pcbinfo, uint32_t flowhash,
2868 struct so_procinfo *soprocinfo)
2869{
2870 struct inpcb *inp = NULL;
2871 int found = 0;
2872
2873 bzero(soprocinfo, sizeof (struct so_procinfo));
2874
2875 if (!flowhash)
2876 return (-1);
2877
2878 lck_rw_lock_shared(pcbinfo->ipi_lock);
2879 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
2880 if (inp->inp_state != INPCB_STATE_DEAD &&
2881 inp->inp_socket != NULL &&
2882 inp->inp_flowhash == flowhash) {
2883 found = 1;
2884 inp_get_soprocinfo(inp, soprocinfo);
2885 break;
2886 }
2887 }
2888 lck_rw_done(pcbinfo->ipi_lock);
2889
2890 return (found);
2891}
2892
2893#if CONFIG_PROC_UUID_POLICY
2894static void
2895inp_update_cellular_policy(struct inpcb *inp, boolean_t set)
2896{
2897 struct socket *so = inp->inp_socket;
2898 int before, after;
2899
2900 VERIFY(so != NULL);
2901 VERIFY(inp->inp_state != INPCB_STATE_DEAD);
2902
fe8ab488 2903 before = INP_NO_CELLULAR(inp);
39236c6e
A
2904 if (set) {
2905 inp_set_nocellular(inp);
2906 } else {
2907 inp_clear_nocellular(inp);
2908 }
fe8ab488 2909 after = INP_NO_CELLULAR(inp);
39236c6e
A
2910 if (net_io_policy_log && (before != after)) {
2911 static const char *ok = "OK";
2912 static const char *nok = "NOACCESS";
2913 uuid_string_t euuid_buf;
2914 pid_t epid;
2915
2916 if (so->so_flags & SOF_DELEGATED) {
2917 uuid_unparse(so->e_uuid, euuid_buf);
2918 epid = so->e_pid;
2919 } else {
2920 uuid_unparse(so->last_uuid, euuid_buf);
2921 epid = so->last_pid;
2922 }
2923
2924 /* allow this socket to generate another notification event */
2925 so->so_ifdenied_notifies = 0;
2926
2927 log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d "
2928 "euuid %s%s %s->%s\n", __func__,
2929 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
2930 SOCK_TYPE(so), epid, euuid_buf,
2931 (so->so_flags & SOF_DELEGATED) ?
2932 " [delegated]" : "",
2933 ((before < after) ? ok : nok),
2934 ((before < after) ? nok : ok));
2935 }
2936}
2937
fe8ab488 2938#if NECP
39236c6e 2939static void
fe8ab488 2940inp_update_necp_want_app_policy(struct inpcb *inp, boolean_t set)
39236c6e
A
2941{
2942 struct socket *so = inp->inp_socket;
2943 int before, after;
2944
2945 VERIFY(so != NULL);
2946 VERIFY(inp->inp_state != INPCB_STATE_DEAD);
2947
fe8ab488 2948 before = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
39236c6e 2949 if (set) {
fe8ab488 2950 inp_set_want_app_policy(inp);
39236c6e 2951 } else {
fe8ab488 2952 inp_clear_want_app_policy(inp);
39236c6e 2953 }
fe8ab488 2954 after = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
39236c6e
A
2955 if (net_io_policy_log && (before != after)) {
2956 static const char *wanted = "WANTED";
2957 static const char *unwanted = "UNWANTED";
2958 uuid_string_t euuid_buf;
2959 pid_t epid;
2960
2961 if (so->so_flags & SOF_DELEGATED) {
2962 uuid_unparse(so->e_uuid, euuid_buf);
2963 epid = so->e_pid;
2964 } else {
2965 uuid_unparse(so->last_uuid, euuid_buf);
2966 epid = so->last_pid;
2967 }
2968
2969 log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d "
2970 "euuid %s%s %s->%s\n", __func__,
2971 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
2972 SOCK_TYPE(so), epid, euuid_buf,
2973 (so->so_flags & SOF_DELEGATED) ?
2974 " [delegated]" : "",
2975 ((before < after) ? unwanted : wanted),
2976 ((before < after) ? wanted : unwanted));
2977 }
2978}
fe8ab488 2979#endif /* NECP */
39236c6e
A
2980#endif /* !CONFIG_PROC_UUID_POLICY */
2981
fe8ab488
A
2982#if NECP
2983void
2984inp_update_necp_policy(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int override_bound_interface)
2985{
2986 necp_socket_find_policy_match(inp, override_local_addr, override_remote_addr, override_bound_interface);
2987 if (necp_socket_should_rescope(inp) &&
2988 inp->inp_lport == 0 &&
2989 inp->inp_laddr.s_addr == INADDR_ANY &&
2990 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
2991 // If we should rescope, and the socket is not yet bound
2992 inp_bindif(inp, necp_socket_get_rescope_if_index(inp), NULL);
2993 }
2994}
2995#endif /* NECP */
2996
39236c6e
A
2997int
2998inp_update_policy(struct inpcb *inp)
2999{
3000#if CONFIG_PROC_UUID_POLICY
3001 struct socket *so = inp->inp_socket;
3002 uint32_t pflags = 0;
3003 int32_t ogencnt;
3004 int err = 0;
3005
3006 if (!net_io_policy_uuid ||
3007 so == NULL || inp->inp_state == INPCB_STATE_DEAD)
3008 return (0);
3009
3010 /*
3011 * Kernel-created sockets that aren't delegating other sockets
3012 * are currently exempted from UUID policy checks.
3013 */
3014 if (so->last_pid == 0 && !(so->so_flags & SOF_DELEGATED))
3015 return (0);
3016
3017 ogencnt = so->so_policy_gencnt;
3018 err = proc_uuid_policy_lookup(((so->so_flags & SOF_DELEGATED) ?
3019 so->e_uuid : so->last_uuid), &pflags, &so->so_policy_gencnt);
3020
3021 /*
3022 * Discard cached generation count if the entry is gone (ENOENT),
3023 * so that we go thru the checks below.
3024 */
3025 if (err == ENOENT && ogencnt != 0)
3026 so->so_policy_gencnt = 0;
3027
3028 /*
3029 * If the generation count has changed, inspect the policy flags
3030 * and act accordingly. If a policy flag was previously set and
3031 * the UUID is no longer present in the table (ENOENT), treat it
3032 * as if the flag has been cleared.
3033 */
3034 if ((err == 0 || err == ENOENT) && ogencnt != so->so_policy_gencnt) {
3035 /* update cellular policy for this socket */
3036 if (err == 0 && (pflags & PROC_UUID_NO_CELLULAR)) {
3037 inp_update_cellular_policy(inp, TRUE);
3038 } else if (!(pflags & PROC_UUID_NO_CELLULAR)) {
3039 inp_update_cellular_policy(inp, FALSE);
3040 }
fe8ab488
A
3041#if NECP
3042 /* update necp want app policy for this socket */
3043 if (err == 0 && (pflags & PROC_UUID_NECP_APP_POLICY)) {
3044 inp_update_necp_want_app_policy(inp, TRUE);
3045 } else if (!(pflags & PROC_UUID_NECP_APP_POLICY)) {
3046 inp_update_necp_want_app_policy(inp, FALSE);
39236c6e 3047 }
fe8ab488 3048#endif /* NECP */
39236c6e
A
3049 }
3050
3051 return ((err == ENOENT) ? 0 : err);
3052#else /* !CONFIG_PROC_UUID_POLICY */
3053#pragma unused(inp)
3054 return (0);
3055#endif /* !CONFIG_PROC_UUID_POLICY */
3056}
fe8ab488
A
3057/*
3058 * Called when we need to enforce policy restrictions in the input path.
3059 *
3060 * Returns TRUE if we're not allowed to receive data, otherwise FALSE.
3061 */
39236c6e 3062boolean_t
fe8ab488 3063inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp)
39236c6e
A
3064{
3065 VERIFY(inp != NULL);
3066
fe8ab488
A
3067 /*
3068 * Inbound restrictions.
3069 */
39236c6e
A
3070 if (!sorestrictrecv)
3071 return (FALSE);
3072
fe8ab488
A
3073 if (ifp == NULL)
3074 return (FALSE);
3075
3076 if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp))
3077 return (TRUE);
3078
3079 if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp))
3080 return (TRUE);
3081
3082 if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp))
3083 return (TRUE);
3084
3085 if (!(ifp->if_eflags & IFEF_RESTRICTED_RECV))
39236c6e
A
3086 return (FALSE);
3087
3088 if (inp->inp_flags & INP_RECV_ANYIF)
3089 return (FALSE);
3090
3091 if ((inp->inp_flags & INP_BOUND_IF) && inp->inp_boundifp == ifp)
3092 return (FALSE);
3093
3094 return (TRUE);
3095}
fe8ab488
A
3096
3097/*
3098 * Called when we need to enforce policy restrictions in the output path.
3099 *
3100 * Returns TRUE if we're not allowed to send data out, otherwise FALSE.
3101 */
3102boolean_t
3103inp_restricted_send(struct inpcb *inp, struct ifnet *ifp)
3104{
3105 VERIFY(inp != NULL);
3106
3107 /*
3108 * Outbound restrictions.
3109 */
3110 if (!sorestrictsend)
3111 return (FALSE);
3112
3113 if (ifp == NULL)
3114 return (FALSE);
3115
3116 if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp))
3117 return (TRUE);
3118
3119 if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp))
3120 return (TRUE);
3121
3122 if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp))
3123 return (TRUE);
3124
3125 return (FALSE);
3126}