]> git.saurik.com Git - apple/xnu.git/blame - bsd/netinet/in_pcb.c
xnu-3789.21.4.tar.gz
[apple/xnu.git] / bsd / netinet / in_pcb.c
CommitLineData
1c79356b 1/*
39037602 2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
39236c6e 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
39236c6e 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
39236c6e 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
39236c6e 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1982, 1986, 1991, 1993, 1995
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95
9bccf70c 61 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $
1c79356b
A
62 */
63
64#include <sys/param.h>
65#include <sys/systm.h>
66#include <sys/malloc.h>
67#include <sys/mbuf.h>
1c79356b 68#include <sys/domain.h>
1c79356b
A
69#include <sys/protosw.h>
70#include <sys/socket.h>
71#include <sys/socketvar.h>
72#include <sys/proc.h>
73#include <sys/kernel.h>
74#include <sys/sysctl.h>
6d2010ae
A
75#include <sys/mcache.h>
76#include <sys/kauth.h>
77#include <sys/priv.h>
39236c6e
A
78#include <sys/proc_uuid_policy.h>
79#include <sys/syslog.h>
fe8ab488 80#include <sys/priv.h>
39037602 81#include <net/dlil.h>
39236c6e 82
91447636 83#include <libkern/OSAtomic.h>
316670eb 84#include <kern/locks.h>
1c79356b
A
85
86#include <machine/limits.h>
87
1c79356b 88#include <kern/zalloc.h>
1c79356b
A
89
90#include <net/if.h>
1c79356b 91#include <net/if_types.h>
9bccf70c 92#include <net/route.h>
316670eb
A
93#include <net/flowhash.h>
94#include <net/flowadv.h>
fe8ab488 95#include <net/ntstat.h>
1c79356b
A
96
97#include <netinet/in.h>
98#include <netinet/in_pcb.h>
99#include <netinet/in_var.h>
100#include <netinet/ip_var.h>
101#if INET6
102#include <netinet/ip6.h>
103#include <netinet6/ip6_var.h>
104#endif /* INET6 */
105
1c79356b 106#include <sys/kdebug.h>
b0d623f7 107#include <sys/random.h>
39236c6e 108
316670eb 109#include <dev/random/randomdev.h>
39236c6e 110#include <mach/boolean.h>
1c79356b 111
39037602
A
112#include <pexpert/pexpert.h>
113
fe8ab488
A
114#if NECP
115#include <net/necp.h>
9bccf70c 116#endif
1c79356b 117
39037602
A
118#include <sys/stat.h>
119#include <sys/ubc.h>
120#include <sys/vnode.h>
121
39236c6e
A
122static lck_grp_t *inpcb_lock_grp;
123static lck_attr_t *inpcb_lock_attr;
124static lck_grp_attr_t *inpcb_lock_grp_attr;
125decl_lck_mtx_data(static, inpcb_lock); /* global INPCB lock */
126decl_lck_mtx_data(static, inpcb_timeout_lock);
127
128static TAILQ_HEAD(, inpcbinfo) inpcb_head = TAILQ_HEAD_INITIALIZER(inpcb_head);
129
130static u_int16_t inpcb_timeout_run = 0; /* INPCB timer is scheduled to run */
131static boolean_t inpcb_garbage_collecting = FALSE; /* gc timer is scheduled */
132static boolean_t inpcb_ticking = FALSE; /* "slow" timer is scheduled */
133static boolean_t inpcb_fast_timer_on = FALSE;
39037602 134static boolean_t intcoproc_unrestricted = FALSE;
fe8ab488 135
743345f9
A
136extern char *proc_best_name(proc_t);
137
fe8ab488
A
138/*
139 * If the total number of gc reqs is above a threshold, schedule
140 * garbage collect timer sooner
141 */
142static boolean_t inpcb_toomany_gcreq = FALSE;
143
144#define INPCB_GCREQ_THRESHOLD 50000
fe8ab488 145
39037602
A
146static thread_call_t inpcb_thread_call, inpcb_fast_thread_call;
147static void inpcb_sched_timeout(void);
148static void inpcb_sched_lazy_timeout(void);
149static void _inpcb_sched_timeout(unsigned int);
150static void inpcb_timeout(void *, void *);
151const int inpcb_timeout_lazy = 10; /* 10 seconds leeway for lazy timers */
39236c6e
A
152extern int tvtohz(struct timeval *);
153
154#if CONFIG_PROC_UUID_POLICY
155static void inp_update_cellular_policy(struct inpcb *, boolean_t);
fe8ab488
A
156#if NECP
157static void inp_update_necp_want_app_policy(struct inpcb *, boolean_t);
158#endif /* NECP */
39236c6e
A
159#endif /* !CONFIG_PROC_UUID_POLICY */
160
39236c6e
A
161#define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8))
162#define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1))
1c79356b 163
1c79356b
A
164/*
165 * These configure the range of local port addresses assigned to
166 * "unspecified" outgoing connections/packets/whatever.
167 */
9bccf70c
A
168int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */
169int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */
39236c6e
A
170int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
171int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */
9bccf70c
A
172int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
173int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */
1c79356b 174
39236c6e 175#define RANGECHK(var, min, max) \
1c79356b
A
176 if ((var) < (min)) { (var) = (min); } \
177 else if ((var) > (max)) { (var) = (max); }
178
1c79356b
A
179static int
180sysctl_net_ipport_check SYSCTL_HANDLER_ARGS
181{
2d21ac55 182#pragma unused(arg1, arg2)
39236c6e
A
183 int error;
184
185 error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
1c79356b
A
186 if (!error) {
187 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
188 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
189 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX);
190 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX);
191 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX);
192 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX);
193 }
39236c6e 194 return (error);
1c79356b
A
195}
196
197#undef RANGECHK
198
39236c6e
A
199SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange,
200 CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IP Ports");
201
202SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst,
203 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
204 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
205SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast,
206 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
207 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
208SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first,
209 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
210 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
211SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last,
212 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
213 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
214SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst,
215 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
216 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
217SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast,
218 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
219 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
1c79356b 220
39037602
A
221static uint32_t apn_fallbk_debug = 0;
222#define apn_fallbk_log(x) do { if (apn_fallbk_debug >= 1) log x; } while (0)
223
224static boolean_t apn_fallbk_enabled = FALSE;
225
b0d623f7
A
226extern int udp_use_randomport;
227extern int tcp_use_randomport;
228
316670eb
A
229/* Structs used for flowhash computation */
230struct inp_flowhash_key_addr {
231 union {
232 struct in_addr v4;
233 struct in6_addr v6;
234 u_int8_t addr8[16];
235 u_int16_t addr16[8];
236 u_int32_t addr32[4];
237 } infha;
238};
239
240struct inp_flowhash_key {
39236c6e 241 struct inp_flowhash_key_addr infh_laddr;
316670eb
A
242 struct inp_flowhash_key_addr infh_faddr;
243 u_int32_t infh_lport;
244 u_int32_t infh_fport;
245 u_int32_t infh_af;
246 u_int32_t infh_proto;
247 u_int32_t infh_rand1;
248 u_int32_t infh_rand2;
249};
250
39236c6e
A
251static u_int32_t inp_hash_seed = 0;
252
253static int infc_cmp(const struct inpcb *, const struct inpcb *);
254
255/* Flags used by inp_fc_getinp */
256#define INPFC_SOLOCKED 0x1
257#define INPFC_REMOVE 0x2
258static struct inpcb *inp_fc_getinp(u_int32_t, u_int32_t);
259
260static void inp_fc_feedback(struct inpcb *);
261extern void tcp_remove_from_time_wait(struct inpcb *inp);
316670eb 262
39236c6e 263decl_lck_mtx_data(static, inp_fc_lck);
316670eb 264
bd504ef0
A
265RB_HEAD(inp_fc_tree, inpcb) inp_fc_tree;
266RB_PROTOTYPE(inp_fc_tree, inpcb, infc_link, infc_cmp);
267RB_GENERATE(inp_fc_tree, inpcb, infc_link, infc_cmp);
316670eb 268
bd504ef0
A
269/*
270 * Use this inp as a key to find an inp in the flowhash tree.
271 * Accesses to it are protected by inp_fc_lck.
272 */
273struct inpcb key_inp;
316670eb 274
1c79356b
A
275/*
276 * in_pcb.c: manage the Protocol Control Blocks.
1c79356b
A
277 */
278
316670eb 279void
39236c6e 280in_pcbinit(void)
316670eb 281{
39236c6e 282 static int inpcb_initialized = 0;
316670eb 283
39236c6e
A
284 VERIFY(!inpcb_initialized);
285 inpcb_initialized = 1;
316670eb 286
39236c6e
A
287 inpcb_lock_grp_attr = lck_grp_attr_alloc_init();
288 inpcb_lock_grp = lck_grp_alloc_init("inpcb", inpcb_lock_grp_attr);
289 inpcb_lock_attr = lck_attr_alloc_init();
290 lck_mtx_init(&inpcb_lock, inpcb_lock_grp, inpcb_lock_attr);
291 lck_mtx_init(&inpcb_timeout_lock, inpcb_lock_grp, inpcb_lock_attr);
39037602
A
292 inpcb_thread_call = thread_call_allocate_with_priority(inpcb_timeout,
293 NULL, THREAD_CALL_PRIORITY_KERNEL);
294 inpcb_fast_thread_call = thread_call_allocate_with_priority(
295 inpcb_timeout, NULL, THREAD_CALL_PRIORITY_KERNEL);
296 if (inpcb_thread_call == NULL || inpcb_fast_thread_call == NULL)
297 panic("unable to alloc the inpcb thread call");
39236c6e
A
298
299 /*
300 * Initialize data structures required to deliver
301 * flow advisories.
302 */
303 lck_mtx_init(&inp_fc_lck, inpcb_lock_grp, inpcb_lock_attr);
bd504ef0 304 lck_mtx_lock(&inp_fc_lck);
316670eb 305 RB_INIT(&inp_fc_tree);
bd504ef0
A
306 bzero(&key_inp, sizeof(key_inp));
307 lck_mtx_unlock(&inp_fc_lck);
39037602
A
308
309 PE_parse_boot_argn("intcoproc_unrestricted", &intcoproc_unrestricted,
310 sizeof (intcoproc_unrestricted));
316670eb
A
311}
312
39236c6e
A
313#define INPCB_HAVE_TIMER_REQ(req) (((req).intimer_lazy > 0) || \
314 ((req).intimer_fast > 0) || ((req).intimer_nodelay > 0))
315static void
39037602 316inpcb_timeout(void *arg0, void *arg1)
39236c6e 317{
39037602 318#pragma unused(arg0)
39236c6e
A
319 struct inpcbinfo *ipi;
320 boolean_t t, gc;
321 struct intimercount gccnt, tmcnt;
fe8ab488
A
322 boolean_t toomany_gc = FALSE;
323
39037602
A
324 if (arg1 != NULL) {
325 VERIFY(arg1 == &inpcb_toomany_gcreq);
326 toomany_gc = *(boolean_t *)arg1;
fe8ab488 327 }
39236c6e
A
328
329 /*
330 * Update coarse-grained networking timestamp (in sec.); the idea
331 * is to piggy-back on the timeout callout to update the counter
332 * returnable via net_uptime().
333 */
334 net_update_uptime();
335
fe8ab488
A
336 bzero(&gccnt, sizeof(gccnt));
337 bzero(&tmcnt, sizeof(tmcnt));
338
39236c6e
A
339 lck_mtx_lock_spin(&inpcb_timeout_lock);
340 gc = inpcb_garbage_collecting;
341 inpcb_garbage_collecting = FALSE;
39236c6e
A
342
343 t = inpcb_ticking;
344 inpcb_ticking = FALSE;
345
346 if (gc || t) {
347 lck_mtx_unlock(&inpcb_timeout_lock);
348
349 lck_mtx_lock(&inpcb_lock);
350 TAILQ_FOREACH(ipi, &inpcb_head, ipi_entry) {
351 if (INPCB_HAVE_TIMER_REQ(ipi->ipi_gc_req)) {
352 bzero(&ipi->ipi_gc_req,
353 sizeof(ipi->ipi_gc_req));
354 if (gc && ipi->ipi_gc != NULL) {
355 ipi->ipi_gc(ipi);
356 gccnt.intimer_lazy +=
357 ipi->ipi_gc_req.intimer_lazy;
358 gccnt.intimer_fast +=
359 ipi->ipi_gc_req.intimer_fast;
360 gccnt.intimer_nodelay +=
361 ipi->ipi_gc_req.intimer_nodelay;
362 }
363 }
364 if (INPCB_HAVE_TIMER_REQ(ipi->ipi_timer_req)) {
365 bzero(&ipi->ipi_timer_req,
366 sizeof(ipi->ipi_timer_req));
367 if (t && ipi->ipi_timer != NULL) {
368 ipi->ipi_timer(ipi);
369 tmcnt.intimer_lazy +=
370 ipi->ipi_timer_req.intimer_lazy;
371 tmcnt.intimer_lazy +=
372 ipi->ipi_timer_req.intimer_fast;
373 tmcnt.intimer_nodelay +=
374 ipi->ipi_timer_req.intimer_nodelay;
375 }
376 }
377 }
378 lck_mtx_unlock(&inpcb_lock);
379 lck_mtx_lock_spin(&inpcb_timeout_lock);
380 }
381
382 /* lock was dropped above, so check first before overriding */
383 if (!inpcb_garbage_collecting)
384 inpcb_garbage_collecting = INPCB_HAVE_TIMER_REQ(gccnt);
385 if (!inpcb_ticking)
386 inpcb_ticking = INPCB_HAVE_TIMER_REQ(tmcnt);
387
388 /* re-arm the timer if there's work to do */
fe8ab488
A
389 if (toomany_gc) {
390 inpcb_toomany_gcreq = FALSE;
391 } else {
392 inpcb_timeout_run--;
393 VERIFY(inpcb_timeout_run >= 0 && inpcb_timeout_run < 2);
394 }
39236c6e 395
39236c6e 396 if (gccnt.intimer_nodelay > 0 || tmcnt.intimer_nodelay > 0)
39037602 397 inpcb_sched_timeout();
39236c6e
A
398 else if ((gccnt.intimer_fast + tmcnt.intimer_fast) <= 5)
399 /* be lazy when idle with little activity */
39037602 400 inpcb_sched_lazy_timeout();
39236c6e 401 else
39037602 402 inpcb_sched_timeout();
39236c6e
A
403
404 lck_mtx_unlock(&inpcb_timeout_lock);
405}
406
407static void
39037602 408inpcb_sched_timeout(void)
39236c6e 409{
39037602
A
410 _inpcb_sched_timeout(0);
411}
412
413static void
414inpcb_sched_lazy_timeout(void)
415{
416 _inpcb_sched_timeout(inpcb_timeout_lazy);
417}
39236c6e 418
39037602
A
419static void
420_inpcb_sched_timeout(unsigned int offset)
421{
422 uint64_t deadline, leeway;
423
424 clock_interval_to_deadline(1, NSEC_PER_SEC, &deadline);
425 lck_mtx_assert(&inpcb_timeout_lock, LCK_MTX_ASSERT_OWNED);
39236c6e 426 if (inpcb_timeout_run == 0 &&
39037602 427 (inpcb_garbage_collecting || inpcb_ticking)) {
39236c6e
A
428 lck_mtx_convert_spin(&inpcb_timeout_lock);
429 inpcb_timeout_run++;
39037602 430 if (offset == 0) {
39236c6e 431 inpcb_fast_timer_on = TRUE;
39037602
A
432 thread_call_enter_delayed(inpcb_thread_call,
433 deadline);
39236c6e
A
434 } else {
435 inpcb_fast_timer_on = FALSE;
39037602
A
436 clock_interval_to_absolutetime_interval(offset,
437 NSEC_PER_SEC, &leeway);
438 thread_call_enter_delayed_with_leeway(
439 inpcb_thread_call, NULL, deadline, leeway,
440 THREAD_CALL_DELAY_LEEWAY);
39236c6e
A
441 }
442 } else if (inpcb_timeout_run == 1 &&
39037602 443 offset == 0 && !inpcb_fast_timer_on) {
39236c6e
A
444 /*
445 * Since the request was for a fast timer but the
446 * scheduled timer is a lazy timer, try to schedule
39037602 447 * another instance of fast timer also.
39236c6e
A
448 */
449 lck_mtx_convert_spin(&inpcb_timeout_lock);
450 inpcb_timeout_run++;
451 inpcb_fast_timer_on = TRUE;
39037602 452 thread_call_enter_delayed(inpcb_fast_thread_call, deadline);
39236c6e
A
453 }
454}
455
456void
457inpcb_gc_sched(struct inpcbinfo *ipi, u_int32_t type)
458{
fe8ab488 459 u_int32_t gccnt;
39037602
A
460 uint64_t deadline;
461
39236c6e
A
462 lck_mtx_lock_spin(&inpcb_timeout_lock);
463 inpcb_garbage_collecting = TRUE;
fe8ab488
A
464 gccnt = ipi->ipi_gc_req.intimer_nodelay +
465 ipi->ipi_gc_req.intimer_fast;
466
467 if (gccnt > INPCB_GCREQ_THRESHOLD && !inpcb_toomany_gcreq) {
468 inpcb_toomany_gcreq = TRUE;
469
470 /*
471 * There are toomany pcbs waiting to be garbage collected,
472 * schedule a much faster timeout in addition to
473 * the caller's request
474 */
475 lck_mtx_convert_spin(&inpcb_timeout_lock);
39037602
A
476 clock_interval_to_deadline(100, NSEC_PER_MSEC, &deadline);
477 thread_call_enter1_delayed(inpcb_thread_call,
478 &inpcb_toomany_gcreq, deadline);
fe8ab488
A
479 }
480
39236c6e
A
481 switch (type) {
482 case INPCB_TIMER_NODELAY:
483 atomic_add_32(&ipi->ipi_gc_req.intimer_nodelay, 1);
39037602 484 inpcb_sched_timeout();
39236c6e
A
485 break;
486 case INPCB_TIMER_FAST:
487 atomic_add_32(&ipi->ipi_gc_req.intimer_fast, 1);
39037602 488 inpcb_sched_timeout();
39236c6e
A
489 break;
490 default:
491 atomic_add_32(&ipi->ipi_gc_req.intimer_lazy, 1);
39037602 492 inpcb_sched_lazy_timeout();
39236c6e
A
493 break;
494 }
495 lck_mtx_unlock(&inpcb_timeout_lock);
496}
497
498void
499inpcb_timer_sched(struct inpcbinfo *ipi, u_int32_t type)
500{
39037602 501
39236c6e
A
502 lck_mtx_lock_spin(&inpcb_timeout_lock);
503 inpcb_ticking = TRUE;
504 switch (type) {
505 case INPCB_TIMER_NODELAY:
506 atomic_add_32(&ipi->ipi_timer_req.intimer_nodelay, 1);
39037602 507 inpcb_sched_timeout();
39236c6e
A
508 break;
509 case INPCB_TIMER_FAST:
510 atomic_add_32(&ipi->ipi_timer_req.intimer_fast, 1);
39037602 511 inpcb_sched_timeout();
39236c6e
A
512 break;
513 default:
514 atomic_add_32(&ipi->ipi_timer_req.intimer_lazy, 1);
39037602 515 inpcb_sched_lazy_timeout();
39236c6e
A
516 break;
517 }
518 lck_mtx_unlock(&inpcb_timeout_lock);
519}
520
521void
522in_pcbinfo_attach(struct inpcbinfo *ipi)
523{
524 struct inpcbinfo *ipi0;
525
526 lck_mtx_lock(&inpcb_lock);
527 TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) {
528 if (ipi0 == ipi) {
529 panic("%s: ipi %p already in the list\n",
530 __func__, ipi);
531 /* NOTREACHED */
532 }
533 }
534 TAILQ_INSERT_TAIL(&inpcb_head, ipi, ipi_entry);
535 lck_mtx_unlock(&inpcb_lock);
536}
537
538int
539in_pcbinfo_detach(struct inpcbinfo *ipi)
540{
541 struct inpcbinfo *ipi0;
542 int error = 0;
543
544 lck_mtx_lock(&inpcb_lock);
545 TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) {
546 if (ipi0 == ipi)
547 break;
548 }
549 if (ipi0 != NULL)
550 TAILQ_REMOVE(&inpcb_head, ipi0, ipi_entry);
551 else
552 error = ENXIO;
553 lck_mtx_unlock(&inpcb_lock);
554
555 return (error);
556}
557
1c79356b
A
558/*
559 * Allocate a PCB and associate it with the socket.
2d21ac55
A
560 *
561 * Returns: 0 Success
562 * ENOBUFS
563 * ENOMEM
1c79356b
A
564 */
565int
39236c6e 566in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p)
1c79356b 567{
39236c6e 568#pragma unused(p)
2d21ac55 569 struct inpcb *inp;
39236c6e 570 caddr_t temp;
2d21ac55
A
571#if CONFIG_MACF_NET
572 int mac_error;
39236c6e 573#endif /* CONFIG_MACF_NET */
1c79356b 574
3e170ce0 575 if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) {
39236c6e
A
576 inp = (struct inpcb *)zalloc(pcbinfo->ipi_zone);
577 if (inp == NULL)
578 return (ENOBUFS);
579 bzero((caddr_t)inp, sizeof (*inp));
580 } else {
581 inp = (struct inpcb *)(void *)so->so_saved_pcb;
582 temp = inp->inp_saved_ppcb;
583 bzero((caddr_t)inp, sizeof (*inp));
584 inp->inp_saved_ppcb = temp;
1c79356b
A
585 }
586
587 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
588 inp->inp_pcbinfo = pcbinfo;
589 inp->inp_socket = so;
2d21ac55
A
590#if CONFIG_MACF_NET
591 mac_error = mac_inpcb_label_init(inp, M_WAITOK);
592 if (mac_error != 0) {
3e170ce0 593 if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0)
2d21ac55
A
594 zfree(pcbinfo->ipi_zone, inp);
595 return (mac_error);
596 }
597 mac_inpcb_label_associate(so, inp);
39236c6e
A
598#endif /* CONFIG_MACF_NET */
599 /* make sure inp_stat is always 64-bit aligned */
600 inp->inp_stat = (struct inp_stat *)P2ROUNDUP(inp->inp_stat_store,
601 sizeof (u_int64_t));
602 if (((uintptr_t)inp->inp_stat - (uintptr_t)inp->inp_stat_store) +
603 sizeof (*inp->inp_stat) > sizeof (inp->inp_stat_store)) {
604 panic("%s: insufficient space to align inp_stat", __func__);
605 /* NOTREACHED */
606 }
607
608 /* make sure inp_cstat is always 64-bit aligned */
609 inp->inp_cstat = (struct inp_stat *)P2ROUNDUP(inp->inp_cstat_store,
610 sizeof (u_int64_t));
611 if (((uintptr_t)inp->inp_cstat - (uintptr_t)inp->inp_cstat_store) +
612 sizeof (*inp->inp_cstat) > sizeof (inp->inp_cstat_store)) {
613 panic("%s: insufficient space to align inp_cstat", __func__);
614 /* NOTREACHED */
615 }
616
617 /* make sure inp_wstat is always 64-bit aligned */
618 inp->inp_wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_wstat_store,
619 sizeof (u_int64_t));
620 if (((uintptr_t)inp->inp_wstat - (uintptr_t)inp->inp_wstat_store) +
621 sizeof (*inp->inp_wstat) > sizeof (inp->inp_wstat_store)) {
622 panic("%s: insufficient space to align inp_wstat", __func__);
623 /* NOTREACHED */
6d2010ae
A
624 }
625
fe8ab488
A
626 /* make sure inp_Wstat is always 64-bit aligned */
627 inp->inp_Wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_Wstat_store,
628 sizeof (u_int64_t));
629 if (((uintptr_t)inp->inp_Wstat - (uintptr_t)inp->inp_Wstat_store) +
630 sizeof (*inp->inp_Wstat) > sizeof (inp->inp_Wstat_store)) {
631 panic("%s: insufficient space to align inp_Wstat", __func__);
632 /* NOTREACHED */
633 }
39037602 634
91447636
A
635 so->so_pcb = (caddr_t)inp;
636
637 if (so->so_proto->pr_flags & PR_PCBLOCK) {
39236c6e
A
638 lck_mtx_init(&inp->inpcb_mtx, pcbinfo->ipi_lock_grp,
639 pcbinfo->ipi_lock_attr);
91447636
A
640 }
641
2d21ac55 642#if INET6
39236c6e 643 if (SOCK_DOM(so) == PF_INET6 && !ip6_mapped_addr_on)
9bccf70c 644 inp->inp_flags |= IN6P_IPV6_V6ONLY;
39236c6e 645
9bccf70c
A
646 if (ip6_auto_flowlabel)
647 inp->inp_flags |= IN6P_AUTOFLOWLABEL;
39236c6e 648#endif /* INET6 */
39037602
A
649 if (intcoproc_unrestricted)
650 inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED;
39236c6e
A
651
652 (void) inp_update_policy(inp);
653
654 lck_rw_lock_exclusive(pcbinfo->ipi_lock);
91447636 655 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
39236c6e 656 LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
91447636 657 pcbinfo->ipi_count++;
39236c6e 658 lck_rw_done(pcbinfo->ipi_lock);
1c79356b
A
659 return (0);
660}
661
2d21ac55 662/*
39236c6e
A
663 * in_pcblookup_local_and_cleanup does everything
664 * in_pcblookup_local does but it checks for a socket
665 * that's going away. Since we know that the lock is
666 * held read+write when this funciton is called, we
667 * can safely dispose of this socket like the slow
668 * timer would usually do and return NULL. This is
669 * great for bind.
670 */
671struct inpcb *
672in_pcblookup_local_and_cleanup(struct inpcbinfo *pcbinfo, struct in_addr laddr,
673 u_int lport_arg, int wild_okay)
2d21ac55
A
674{
675 struct inpcb *inp;
39236c6e 676
2d21ac55
A
677 /* Perform normal lookup */
678 inp = in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay);
39236c6e 679
2d21ac55 680 /* Check if we found a match but it's waiting to be disposed */
39236c6e 681 if (inp != NULL && inp->inp_wantcnt == WNT_STOPUSING) {
2d21ac55 682 struct socket *so = inp->inp_socket;
39236c6e 683
6d2010ae 684 lck_mtx_lock(&inp->inpcb_mtx);
39236c6e 685
2d21ac55 686 if (so->so_usecount == 0) {
b0d623f7
A
687 if (inp->inp_state != INPCB_STATE_DEAD)
688 in_pcbdetach(inp);
39236c6e 689 in_pcbdispose(inp); /* will unlock & destroy */
2d21ac55 690 inp = NULL;
39236c6e 691 } else {
6d2010ae 692 lck_mtx_unlock(&inp->inpcb_mtx);
2d21ac55
A
693 }
694 }
39236c6e
A
695
696 return (inp);
2d21ac55
A
697}
698
c910b4d9 699static void
2d21ac55
A
700in_pcb_conflict_post_msg(u_int16_t port)
701{
39236c6e
A
702 /*
703 * Radar 5523020 send a kernel event notification if a
704 * non-participating socket tries to bind the port a socket
705 * who has set SOF_NOTIFYCONFLICT owns.
2d21ac55 706 */
39236c6e 707 struct kev_msg ev_msg;
2d21ac55
A
708 struct kev_in_portinuse in_portinuse;
709
39236c6e
A
710 bzero(&in_portinuse, sizeof (struct kev_in_portinuse));
711 bzero(&ev_msg, sizeof (struct kev_msg));
2d21ac55
A
712 in_portinuse.port = ntohs(port); /* port in host order */
713 in_portinuse.req_pid = proc_selfpid();
714 ev_msg.vendor_code = KEV_VENDOR_APPLE;
715 ev_msg.kev_class = KEV_NETWORK_CLASS;
716 ev_msg.kev_subclass = KEV_INET_SUBCLASS;
717 ev_msg.event_code = KEV_INET_PORTINUSE;
718 ev_msg.dv[0].data_ptr = &in_portinuse;
39236c6e 719 ev_msg.dv[0].data_length = sizeof (struct kev_in_portinuse);
2d21ac55 720 ev_msg.dv[1].data_length = 0;
39037602 721 dlil_post_complete_msg(NULL, &ev_msg);
2d21ac55 722}
39236c6e 723
2d21ac55 724/*
39236c6e
A
725 * Bind an INPCB to an address and/or port. This routine should not alter
726 * the caller-supplied local address "nam".
727 *
2d21ac55
A
728 * Returns: 0 Success
729 * EADDRNOTAVAIL Address not available.
730 * EINVAL Invalid argument
731 * EAFNOSUPPORT Address family not supported [notdef]
732 * EACCES Permission denied
733 * EADDRINUSE Address in use
734 * EAGAIN Resource unavailable, try again
6d2010ae 735 * priv_check_cred:EPERM Operation not permitted
2d21ac55 736 */
1c79356b 737int
2d21ac55 738in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
1c79356b 739{
2d21ac55 740 struct socket *so = inp->inp_socket;
9bccf70c 741 unsigned short *lastport;
1c79356b 742 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
b0d623f7 743 u_short lport = 0, rand_port = 0;
1c79356b 744 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
b0d623f7 745 int error, randomport, conflict = 0;
fe8ab488 746 boolean_t anonport = FALSE;
6d2010ae 747 kauth_cred_t cred;
fe8ab488
A
748 struct in_addr laddr;
749 struct ifnet *outif = NULL;
1c79356b
A
750
751 if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */
752 return (EADDRNOTAVAIL);
39236c6e 753 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY)
1c79356b 754 return (EINVAL);
39236c6e 755 if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT)))
1c79356b 756 wild = 1;
fe8ab488
A
757
758 bzero(&laddr, sizeof(laddr));
759
4bd07ac2
A
760 socket_unlock(so, 0); /* keep reference on socket */
761 lck_rw_lock_exclusive(pcbinfo->ipi_lock);
762
39236c6e 763 if (nam != NULL) {
6d2010ae 764
39236c6e
A
765 if (nam->sa_len != sizeof (struct sockaddr_in)) {
766 lck_rw_done(pcbinfo->ipi_lock);
91447636 767 socket_lock(so, 0);
1c79356b 768 return (EINVAL);
91447636 769 }
39236c6e 770#if 0
1c79356b
A
771 /*
772 * We should check the family, but old programs
773 * incorrectly fail to initialize it.
774 */
39236c6e
A
775 if (nam->sa_family != AF_INET) {
776 lck_rw_done(pcbinfo->ipi_lock);
91447636 777 socket_lock(so, 0);
1c79356b 778 return (EAFNOSUPPORT);
91447636 779 }
39236c6e
A
780#endif /* 0 */
781 lport = SIN(nam)->sin_port;
782
783 if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr))) {
1c79356b
A
784 /*
785 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
786 * allow complete duplication of binding if
787 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
788 * and a multicast address is bound on both
789 * new and duplicated sockets.
790 */
791 if (so->so_options & SO_REUSEADDR)
792 reuseport = SO_REUSEADDR|SO_REUSEPORT;
39236c6e
A
793 } else if (SIN(nam)->sin_addr.s_addr != INADDR_ANY) {
794 struct sockaddr_in sin;
91447636 795 struct ifaddr *ifa;
39236c6e
A
796
797 /* Sanitized for interface address searches */
798 bzero(&sin, sizeof (sin));
799 sin.sin_family = AF_INET;
800 sin.sin_len = sizeof (struct sockaddr_in);
801 sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
802
803 ifa = ifa_ifwithaddr(SA(&sin));
804 if (ifa == NULL) {
805 lck_rw_done(pcbinfo->ipi_lock);
91447636 806 socket_lock(so, 0);
1c79356b 807 return (EADDRNOTAVAIL);
39236c6e
A
808 } else {
809 /*
810 * Opportunistically determine the outbound
811 * interface that may be used; this may not
812 * hold true if we end up using a route
813 * going over a different interface, e.g.
814 * when sending to a local address. This
815 * will get updated again after sending.
816 */
6d2010ae 817 IFA_LOCK(ifa);
316670eb 818 outif = ifa->ifa_ifp;
6d2010ae
A
819 IFA_UNLOCK(ifa);
820 IFA_REMREF(ifa);
91447636 821 }
1c79356b 822 }
39236c6e 823 if (lport != 0) {
1c79356b 824 struct inpcb *t;
39236c6e 825 uid_t u;
1c79356b 826
6d2010ae
A
827 if (ntohs(lport) < IPPORT_RESERVED) {
828 cred = kauth_cred_proc_ref(p);
39236c6e
A
829 error = priv_check_cred(cred,
830 PRIV_NETINET_RESERVEDPORT, 0);
6d2010ae
A
831 kauth_cred_unref(&cred);
832 if (error != 0) {
39236c6e 833 lck_rw_done(pcbinfo->ipi_lock);
6d2010ae
A
834 socket_lock(so, 0);
835 return (EACCES);
836 }
91447636 837 }
39236c6e
A
838 if (!IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) &&
839 (u = kauth_cred_getuid(so->so_cred)) != 0 &&
840 (t = in_pcblookup_local_and_cleanup(
841 inp->inp_pcbinfo, SIN(nam)->sin_addr, lport,
842 INPLOOKUP_WILDCARD)) != NULL &&
843 (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
844 t->inp_laddr.s_addr != INADDR_ANY ||
845 !(t->inp_socket->so_options & SO_REUSEPORT)) &&
846 (u != kauth_cred_getuid(t->inp_socket->so_cred)) &&
847 !(t->inp_socket->so_flags & SOF_REUSESHAREUID) &&
848 (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
849 t->inp_laddr.s_addr != INADDR_ANY)) {
850 if ((t->inp_socket->so_flags &
851 SOF_NOTIFYCONFLICT) &&
852 !(so->so_flags & SOF_NOTIFYCONFLICT))
853 conflict = 1;
854
855 lck_rw_done(pcbinfo->ipi_lock);
856
857 if (conflict)
858 in_pcb_conflict_post_msg(lport);
2d21ac55 859
39236c6e
A
860 socket_lock(so, 0);
861 return (EADDRINUSE);
1c79356b 862 }
39236c6e
A
863 t = in_pcblookup_local_and_cleanup(pcbinfo,
864 SIN(nam)->sin_addr, lport, wild);
865 if (t != NULL &&
1c79356b
A
866 (reuseport & t->inp_socket->so_options) == 0) {
867#if INET6
39236c6e
A
868 if (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
869 t->inp_laddr.s_addr != INADDR_ANY ||
870 SOCK_DOM(so) != PF_INET6 ||
871 SOCK_DOM(t->inp_socket) != PF_INET6)
2d21ac55
A
872#endif /* INET6 */
873 {
2d21ac55 874
39236c6e
A
875 if ((t->inp_socket->so_flags &
876 SOF_NOTIFYCONFLICT) &&
877 !(so->so_flags & SOF_NOTIFYCONFLICT))
2d21ac55
A
878 conflict = 1;
879
39236c6e 880 lck_rw_done(pcbinfo->ipi_lock);
2d21ac55
A
881
882 if (conflict)
883 in_pcb_conflict_post_msg(lport);
91447636
A
884 socket_lock(so, 0);
885 return (EADDRINUSE);
886 }
1c79356b
A
887 }
888 }
fe8ab488 889 laddr = SIN(nam)->sin_addr;
1c79356b
A
890 }
891 if (lport == 0) {
892 u_short first, last;
893 int count;
894
39236c6e
A
895 randomport = (so->so_flags & SOF_BINDRANDOMPORT) ||
896 (so->so_type == SOCK_STREAM ? tcp_use_randomport :
897 udp_use_randomport);
898
899 /*
fe8ab488
A
900 * Even though this looks similar to the code in
901 * in6_pcbsetport, the v6 vs v4 checks are different.
39236c6e 902 */
fe8ab488 903 anonport = TRUE;
1c79356b
A
904 if (inp->inp_flags & INP_HIGHPORT) {
905 first = ipport_hifirstauto; /* sysctl */
906 last = ipport_hilastauto;
39236c6e 907 lastport = &pcbinfo->ipi_lasthi;
1c79356b 908 } else if (inp->inp_flags & INP_LOWPORT) {
6d2010ae 909 cred = kauth_cred_proc_ref(p);
39236c6e
A
910 error = priv_check_cred(cred,
911 PRIV_NETINET_RESERVEDPORT, 0);
6d2010ae
A
912 kauth_cred_unref(&cred);
913 if (error != 0) {
39236c6e 914 lck_rw_done(pcbinfo->ipi_lock);
91447636 915 socket_lock(so, 0);
39236c6e 916 return (error);
91447636 917 }
1c79356b
A
918 first = ipport_lowfirstauto; /* 1023 */
919 last = ipport_lowlastauto; /* 600 */
39236c6e 920 lastport = &pcbinfo->ipi_lastlow;
1c79356b
A
921 } else {
922 first = ipport_firstauto; /* sysctl */
923 last = ipport_lastauto;
39236c6e 924 lastport = &pcbinfo->ipi_lastport;
1c79356b 925 }
b0d623f7
A
926 /* No point in randomizing if only one port is available */
927
928 if (first == last)
39236c6e 929 randomport = 0;
1c79356b
A
930 /*
931 * Simple check to ensure all ports are not used up causing
932 * a deadlock here.
933 *
934 * We split the two cases (up and down) so that the direction
935 * is not being tested on each round of the loop.
936 */
937 if (first > last) {
938 /*
939 * counting down
940 */
b0d623f7 941 if (randomport) {
39236c6e
A
942 read_random(&rand_port, sizeof (rand_port));
943 *lastport =
944 first - (rand_port % (first - last));
b0d623f7 945 }
1c79356b
A
946 count = first - last;
947
948 do {
949 if (count-- < 0) { /* completely used? */
39236c6e 950 lck_rw_done(pcbinfo->ipi_lock);
91447636 951 socket_lock(so, 0);
9bccf70c 952 return (EADDRNOTAVAIL);
1c79356b
A
953 }
954 --*lastport;
955 if (*lastport > first || *lastport < last)
956 *lastport = first;
957 lport = htons(*lastport);
2d21ac55 958 } while (in_pcblookup_local_and_cleanup(pcbinfo,
39037602 959 ((laddr.s_addr != INADDR_ANY) ? laddr :
fe8ab488 960 inp->inp_laddr), lport, wild));
1c79356b
A
961 } else {
962 /*
963 * counting up
964 */
b0d623f7 965 if (randomport) {
39236c6e
A
966 read_random(&rand_port, sizeof (rand_port));
967 *lastport =
968 first + (rand_port % (first - last));
b0d623f7 969 }
1c79356b
A
970 count = last - first;
971
972 do {
973 if (count-- < 0) { /* completely used? */
39236c6e 974 lck_rw_done(pcbinfo->ipi_lock);
91447636 975 socket_lock(so, 0);
9bccf70c 976 return (EADDRNOTAVAIL);
1c79356b
A
977 }
978 ++*lastport;
979 if (*lastport < first || *lastport > last)
980 *lastport = first;
981 lport = htons(*lastport);
2d21ac55 982 } while (in_pcblookup_local_and_cleanup(pcbinfo,
fe8ab488
A
983 ((laddr.s_addr != INADDR_ANY) ? laddr :
984 inp->inp_laddr), lport, wild));
1c79356b
A
985 }
986 }
91447636 987 socket_lock(so, 0);
4bd07ac2
A
988
989 /*
990 * We unlocked socket's protocol lock for a long time.
991 * The socket might have been dropped/defuncted.
992 * Checking if world has changed since.
993 */
994 if (inp->inp_state == INPCB_STATE_DEAD) {
995 lck_rw_done(pcbinfo->ipi_lock);
996 return (ECONNABORTED);
997 }
998
fe8ab488
A
999 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) {
1000 lck_rw_done(pcbinfo->ipi_lock);
1001 return (EINVAL);
1002 }
1003
1004 if (laddr.s_addr != INADDR_ANY) {
1005 inp->inp_laddr = laddr;
1006 inp->inp_last_outifp = outif;
1007 }
1c79356b 1008 inp->inp_lport = lport;
fe8ab488
A
1009 if (anonport)
1010 inp->inp_flags |= INP_ANONPORT;
1011
91447636 1012 if (in_pcbinshash(inp, 1) != 0) {
1c79356b 1013 inp->inp_laddr.s_addr = INADDR_ANY;
316670eb 1014 inp->inp_last_outifp = NULL;
fe8ab488
A
1015
1016 inp->inp_lport = 0;
1017 if (anonport)
1018 inp->inp_flags &= ~INP_ANONPORT;
39236c6e 1019 lck_rw_done(pcbinfo->ipi_lock);
1c79356b
A
1020 return (EAGAIN);
1021 }
39236c6e 1022 lck_rw_done(pcbinfo->ipi_lock);
2d21ac55 1023 sflt_notify(so, sock_evt_bound, NULL);
1c79356b
A
1024 return (0);
1025}
1026
39037602
A
1027#define APN_FALLBACK_IP_FILTER(a) \
1028 (IN_LINKLOCAL(ntohl((a)->sin_addr.s_addr)) || \
1029 IN_LOOPBACK(ntohl((a)->sin_addr.s_addr)) || \
1030 IN_ZERONET(ntohl((a)->sin_addr.s_addr)) || \
1031 IN_MULTICAST(ntohl((a)->sin_addr.s_addr)) || \
1032 IN_PRIVATE(ntohl((a)->sin_addr.s_addr)))
1033
1034#define APN_FALLBACK_NOTIF_INTERVAL 2 /* Magic Number */
1035static uint64_t last_apn_fallback = 0;
1036
1037static boolean_t
1038apn_fallback_required (proc_t proc, struct socket *so, struct sockaddr_in *p_dstv4)
1039{
1040 uint64_t timenow;
1041 struct sockaddr_storage lookup_default_addr;
1042 struct rtentry *rt = NULL;
1043
1044 VERIFY(proc != NULL);
1045
1046 if (apn_fallbk_enabled == FALSE)
1047 return FALSE;
1048
1049 if (proc == kernproc)
1050 return FALSE;
1051
1052 if (so && (so->so_options & SO_NOAPNFALLBK))
1053 return FALSE;
1054
1055 timenow = net_uptime();
1056 if ((timenow - last_apn_fallback) < APN_FALLBACK_NOTIF_INTERVAL) {
1057 apn_fallbk_log((LOG_INFO, "APN fallback notification throttled.\n"));
1058 return FALSE;
1059 }
1060
1061 if (p_dstv4 && APN_FALLBACK_IP_FILTER(p_dstv4))
1062 return FALSE;
1063
1064 /* Check if we have unscoped IPv6 default route through cellular */
1065 bzero(&lookup_default_addr, sizeof(lookup_default_addr));
1066 lookup_default_addr.ss_family = AF_INET6;
1067 lookup_default_addr.ss_len = sizeof(struct sockaddr_in6);
1068
1069 rt = rtalloc1((struct sockaddr *)&lookup_default_addr, 0, 0);
1070 if (NULL == rt) {
1071 apn_fallbk_log((LOG_INFO, "APN fallback notification could not find "
1072 "unscoped default IPv6 route.\n"));
1073 return FALSE;
1074 }
1075
1076 if (!IFNET_IS_CELLULAR(rt->rt_ifp)) {
1077 rtfree(rt);
1078 apn_fallbk_log((LOG_INFO, "APN fallback notification could not find "
1079 "unscoped default IPv6 route through cellular interface.\n"));
1080 return FALSE;
1081 }
1082
1083 /*
1084 * We have a default IPv6 route, ensure that
1085 * we do not have IPv4 default route before triggering
1086 * the event
1087 */
1088 rtfree(rt);
1089 rt = NULL;
1090
1091 bzero(&lookup_default_addr, sizeof(lookup_default_addr));
1092 lookup_default_addr.ss_family = AF_INET;
1093 lookup_default_addr.ss_len = sizeof(struct sockaddr_in);
1094
1095 rt = rtalloc1((struct sockaddr *)&lookup_default_addr, 0, 0);
1096
1097 if (rt) {
1098 rtfree(rt);
1099 rt = NULL;
1100 apn_fallbk_log((LOG_INFO, "APN fallback notification found unscoped "
1101 "IPv4 default route!\n"));
1102 return FALSE;
1103 }
1104
1105 {
1106 /*
1107 * We disable APN fallback if the binary is not a third-party app.
1108 * Note that platform daemons use their process name as a
1109 * bundle ID so we filter out bundle IDs without dots.
1110 */
1111 const char *bundle_id = cs_identity_get(proc);
1112 if (bundle_id == NULL ||
1113 bundle_id[0] == '\0' ||
1114 strchr(bundle_id, '.') == NULL ||
1115 strncmp(bundle_id, "com.apple.", sizeof("com.apple.") - 1) == 0) {
1116 apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found first-"
1117 "party bundle ID \"%s\"!\n", (bundle_id ? bundle_id : "NULL")));
1118 return FALSE;
1119 }
1120 }
1121
1122 {
1123 /*
1124 * The Apple App Store IPv6 requirement started on
1125 * June 1st, 2016 at 12:00:00 AM PDT.
1126 * We disable APN fallback if the binary is more recent than that.
1127 * We check both atime and birthtime since birthtime is not always supported.
1128 */
1129 static const long ipv6_start_date = 1464764400L;
1130 vfs_context_t context;
1131 struct stat64 sb;
1132 int vn_stat_error;
1133
1134 bzero(&sb, sizeof(struct stat64));
1135 context = vfs_context_create(NULL);
1136 vn_stat_error = vn_stat(proc->p_textvp, &sb, NULL, 1, context);
1137 (void)vfs_context_rele(context);
1138
1139 if (vn_stat_error != 0 ||
1140 sb.st_atimespec.tv_sec >= ipv6_start_date ||
1141 sb.st_birthtimespec.tv_sec >= ipv6_start_date) {
1142 apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found binary "
1143 "too recent! (err %d atime %ld mtime %ld ctime %ld birthtime %ld)\n",
1144 vn_stat_error, sb.st_atimespec.tv_sec, sb.st_mtimespec.tv_sec,
1145 sb.st_ctimespec.tv_sec, sb.st_birthtimespec.tv_sec));
1146 return FALSE;
1147 }
1148 }
1149 return TRUE;
1150}
1151
1152static void
1153apn_fallback_trigger(proc_t proc)
1154{
1155 pid_t pid = 0;
1156 struct kev_msg ev_msg;
1157 struct kev_netevent_apnfallbk_data apnfallbk_data;
1158
1159 last_apn_fallback = net_uptime();
1160 pid = proc_pid(proc);
1161 uuid_t application_uuid;
1162 uuid_clear(application_uuid);
1163 proc_getexecutableuuid(proc, application_uuid,
1164 sizeof(application_uuid));
1165
1166 bzero(&ev_msg, sizeof (struct kev_msg));
1167 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1168 ev_msg.kev_class = KEV_NETWORK_CLASS;
1169 ev_msg.kev_subclass = KEV_NETEVENT_SUBCLASS;
1170 ev_msg.event_code = KEV_NETEVENT_APNFALLBACK;
1171
1172 bzero(&apnfallbk_data, sizeof(apnfallbk_data));
1173 apnfallbk_data.epid = pid;
1174 uuid_copy(apnfallbk_data.euuid, application_uuid);
1175
1176 ev_msg.dv[0].data_ptr = &apnfallbk_data;
1177 ev_msg.dv[0].data_length = sizeof(apnfallbk_data);
1178 kev_post_msg(&ev_msg);
1179 apn_fallbk_log((LOG_INFO, "APN fallback notification issued.\n"));
1180}
1181
1c79356b 1182/*
39236c6e
A
1183 * Transform old in_pcbconnect() into an inner subroutine for new
1184 * in_pcbconnect(); do some validity-checking on the remote address
1185 * (in "nam") and then determine local host address (i.e., which
1186 * interface) to use to access that remote host.
1187 *
1188 * This routine may alter the caller-supplied remote address "nam".
1c79356b 1189 *
39236c6e
A
1190 * The caller may override the bound-to-interface setting of the socket
1191 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
1192 *
1193 * This routine might return an ifp with a reference held if the caller
1194 * provides a non-NULL outif, even in the error case. The caller is
1195 * responsible for releasing its reference.
2d21ac55
A
1196 *
1197 * Returns: 0 Success
1198 * EINVAL Invalid argument
1199 * EAFNOSUPPORT Address family not supported
1200 * EADDRNOTAVAIL Address not available
1c79356b 1201 */
1c79356b 1202int
39236c6e 1203in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr,
39037602 1204 unsigned int ifscope, struct ifnet **outif, int raw)
1c79356b 1205{
39236c6e
A
1206 struct route *ro = &inp->inp_route;
1207 struct in_ifaddr *ia = NULL;
1208 struct sockaddr_in sin;
1209 int error = 0;
fe8ab488 1210 boolean_t restricted = FALSE;
39236c6e
A
1211
1212 if (outif != NULL)
1213 *outif = NULL;
1214 if (nam->sa_len != sizeof (struct sockaddr_in))
1c79356b 1215 return (EINVAL);
39236c6e 1216 if (SIN(nam)->sin_family != AF_INET)
1c79356b 1217 return (EAFNOSUPPORT);
39037602 1218 if (raw == 0 && SIN(nam)->sin_port == 0)
1c79356b 1219 return (EADDRNOTAVAIL);
b0d623f7 1220
39236c6e
A
1221 /*
1222 * If the destination address is INADDR_ANY,
1223 * use the primary local address.
1224 * If the supplied address is INADDR_BROADCAST,
1225 * and the primary interface supports broadcast,
1226 * choose the broadcast address for that interface.
1227 */
39037602
A
1228 if (raw == 0 && (SIN(nam)->sin_addr.s_addr == INADDR_ANY ||
1229 SIN(nam)->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST)) {
39236c6e
A
1230 lck_rw_lock_shared(in_ifaddr_rwlock);
1231 if (!TAILQ_EMPTY(&in_ifaddrhead)) {
1232 ia = TAILQ_FIRST(&in_ifaddrhead);
1233 IFA_LOCK_SPIN(&ia->ia_ifa);
1234 if (SIN(nam)->sin_addr.s_addr == INADDR_ANY) {
1235 SIN(nam)->sin_addr = IA_SIN(ia)->sin_addr;
1236 } else if (ia->ia_ifp->if_flags & IFF_BROADCAST) {
1237 SIN(nam)->sin_addr =
1238 SIN(&ia->ia_broadaddr)->sin_addr;
1239 }
1240 IFA_UNLOCK(&ia->ia_ifa);
1241 ia = NULL;
1242 }
1243 lck_rw_done(in_ifaddr_rwlock);
1244 }
1245 /*
1246 * Otherwise, if the socket has already bound the source, just use it.
1247 */
1248 if (inp->inp_laddr.s_addr != INADDR_ANY) {
1249 VERIFY(ia == NULL);
1250 *laddr = inp->inp_laddr;
1251 return (0);
1c79356b 1252 }
6d2010ae 1253
39236c6e
A
1254 /*
1255 * If the ifscope is specified by the caller (e.g. IP_PKTINFO)
1256 * then it overrides the sticky ifscope set for the socket.
1257 */
1258 if (ifscope == IFSCOPE_NONE && (inp->inp_flags & INP_BOUND_IF))
1259 ifscope = inp->inp_boundifp->if_index;
6d2010ae 1260
39236c6e
A
1261 /*
1262 * If route is known or can be allocated now,
1263 * our src addr is taken from the i/f, else punt.
1264 * Note that we should check the address family of the cached
1265 * destination, in case of sharing the cache with IPv6.
1266 */
1267 if (ro->ro_rt != NULL)
1268 RT_LOCK_SPIN(ro->ro_rt);
1269 if (ROUTE_UNUSABLE(ro) || ro->ro_dst.sa_family != AF_INET ||
1270 SIN(&ro->ro_dst)->sin_addr.s_addr != SIN(nam)->sin_addr.s_addr ||
1271 (inp->inp_socket->so_options & SO_DONTROUTE)) {
b0d623f7 1272 if (ro->ro_rt != NULL)
b0d623f7 1273 RT_UNLOCK(ro->ro_rt);
39236c6e
A
1274 ROUTE_RELEASE(ro);
1275 }
1276 if (!(inp->inp_socket->so_options & SO_DONTROUTE) &&
1277 (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL)) {
1278 if (ro->ro_rt != NULL)
1279 RT_UNLOCK(ro->ro_rt);
1280 ROUTE_RELEASE(ro);
1281 /* No route yet, so try to acquire one */
1282 bzero(&ro->ro_dst, sizeof (struct sockaddr_in));
1283 ro->ro_dst.sa_family = AF_INET;
1284 ro->ro_dst.sa_len = sizeof (struct sockaddr_in);
1285 SIN(&ro->ro_dst)->sin_addr = SIN(nam)->sin_addr;
1286 rtalloc_scoped(ro, ifscope);
1287 if (ro->ro_rt != NULL)
1288 RT_LOCK_SPIN(ro->ro_rt);
1289 }
1290 /* Sanitized local copy for interface address searches */
1291 bzero(&sin, sizeof (sin));
1292 sin.sin_family = AF_INET;
1293 sin.sin_len = sizeof (struct sockaddr_in);
1294 sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
1295 /*
1296 * If we did not find (or use) a route, assume dest is reachable
1297 * on a directly connected network and try to find a corresponding
1298 * interface to take the source address from.
1299 */
1300 if (ro->ro_rt == NULL) {
39037602
A
1301 proc_t proc = current_proc();
1302
39236c6e
A
1303 VERIFY(ia == NULL);
1304 ia = ifatoia(ifa_ifwithdstaddr(SA(&sin)));
1305 if (ia == NULL)
1306 ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope));
1307 error = ((ia == NULL) ? ENETUNREACH : 0);
743345f9 1308
39037602
A
1309 if (apn_fallback_required(proc, inp->inp_socket,
1310 (void *)nam))
1311 apn_fallback_trigger(proc);
1312
39236c6e
A
1313 goto done;
1314 }
1315 RT_LOCK_ASSERT_HELD(ro->ro_rt);
1316 /*
1317 * If the outgoing interface on the route found is not
1318 * a loopback interface, use the address from that interface.
1319 */
1320 if (!(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) {
1321 VERIFY(ia == NULL);
6d2010ae
A
1322 /*
1323 * If the route points to a cellular interface and the
1324 * caller forbids our using interfaces of such type,
1325 * pretend that there is no route.
fe8ab488 1326 * Apply the same logic for expensive interfaces.
6d2010ae 1327 */
fe8ab488 1328 if (inp_restricted_send(inp, ro->ro_rt->rt_ifp)) {
39236c6e
A
1329 RT_UNLOCK(ro->ro_rt);
1330 ROUTE_RELEASE(ro);
1331 error = EHOSTUNREACH;
fe8ab488 1332 restricted = TRUE;
39236c6e 1333 } else {
6d2010ae
A
1334 /* Become a regular mutex */
1335 RT_CONVERT_LOCK(ro->ro_rt);
39236c6e
A
1336 ia = ifatoia(ro->ro_rt->rt_ifa);
1337 IFA_ADDREF(&ia->ia_ifa);
b0d623f7 1338 RT_UNLOCK(ro->ro_rt);
39236c6e 1339 error = 0;
91447636 1340 }
39236c6e
A
1341 goto done;
1342 }
1343 VERIFY(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK);
1344 RT_UNLOCK(ro->ro_rt);
1345 /*
1346 * The outgoing interface is marked with 'loopback net', so a route
1347 * to ourselves is here.
1348 * Try to find the interface of the destination address and then
1349 * take the address from there. That interface is not necessarily
1350 * a loopback interface.
1351 */
1352 VERIFY(ia == NULL);
1353 ia = ifatoia(ifa_ifwithdstaddr(SA(&sin)));
1354 if (ia == NULL)
1355 ia = ifatoia(ifa_ifwithaddr_scoped(SA(&sin), ifscope));
1356 if (ia == NULL)
1357 ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope));
1358 if (ia == NULL) {
1359 RT_LOCK(ro->ro_rt);
1360 ia = ifatoia(ro->ro_rt->rt_ifa);
1361 if (ia != NULL)
1362 IFA_ADDREF(&ia->ia_ifa);
1363 RT_UNLOCK(ro->ro_rt);
1364 }
1365 error = ((ia == NULL) ? ENETUNREACH : 0);
1366
1367done:
1368 /*
1369 * If the destination address is multicast and an outgoing
1370 * interface has been set as a multicast option, use the
1371 * address of that interface as our source address.
1372 */
15129b1c 1373 if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) &&
39236c6e
A
1374 inp->inp_moptions != NULL) {
1375 struct ip_moptions *imo;
1376 struct ifnet *ifp;
1377
1378 imo = inp->inp_moptions;
1379 IMO_LOCK(imo);
1380 if (imo->imo_multicast_ifp != NULL && (ia == NULL ||
1381 ia->ia_ifp != imo->imo_multicast_ifp)) {
1382 ifp = imo->imo_multicast_ifp;
1383 if (ia != NULL)
6d2010ae 1384 IFA_REMREF(&ia->ia_ifa);
39236c6e
A
1385 lck_rw_lock_shared(in_ifaddr_rwlock);
1386 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
1387 if (ia->ia_ifp == ifp)
1388 break;
6d2010ae 1389 }
39236c6e
A
1390 if (ia != NULL)
1391 IFA_ADDREF(&ia->ia_ifa);
1392 lck_rw_done(in_ifaddr_rwlock);
1393 if (ia == NULL)
1394 error = EADDRNOTAVAIL;
15129b1c
A
1395 else
1396 error = 0;
1c79356b 1397 }
39236c6e
A
1398 IMO_UNLOCK(imo);
1399 }
1400 /*
1401 * Don't do pcblookup call here; return interface in laddr
1402 * and exit to caller, that will do the lookup.
1403 */
1404 if (ia != NULL) {
1c79356b 1405 /*
39236c6e
A
1406 * If the source address belongs to a cellular interface
1407 * and the socket forbids our using interfaces of such
1408 * type, pretend that there is no source address.
fe8ab488 1409 * Apply the same logic for expensive interfaces.
1c79356b 1410 */
39236c6e 1411 IFA_LOCK_SPIN(&ia->ia_ifa);
fe8ab488 1412 if (inp_restricted_send(inp, ia->ia_ifa.ifa_ifp)) {
39236c6e
A
1413 IFA_UNLOCK(&ia->ia_ifa);
1414 error = EHOSTUNREACH;
fe8ab488 1415 restricted = TRUE;
39236c6e
A
1416 } else if (error == 0) {
1417 *laddr = ia->ia_addr.sin_addr;
1418 if (outif != NULL) {
1419 struct ifnet *ifp;
1420
1421 if (ro->ro_rt != NULL)
1422 ifp = ro->ro_rt->rt_ifp;
1423 else
1424 ifp = ia->ia_ifp;
1425
1426 VERIFY(ifp != NULL);
1427 IFA_CONVERT_LOCK(&ia->ia_ifa);
1428 ifnet_reference(ifp); /* for caller */
1429 if (*outif != NULL)
1430 ifnet_release(*outif);
1431 *outif = ifp;
1c79356b 1432 }
39236c6e
A
1433 IFA_UNLOCK(&ia->ia_ifa);
1434 } else {
1435 IFA_UNLOCK(&ia->ia_ifa);
1c79356b 1436 }
6d2010ae 1437 IFA_REMREF(&ia->ia_ifa);
39236c6e
A
1438 ia = NULL;
1439 }
1440
fe8ab488 1441 if (restricted && error == EHOSTUNREACH) {
39236c6e
A
1442 soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED |
1443 SO_FILT_HINT_IFDENIED));
1c79356b 1444 }
39236c6e
A
1445
1446 return (error);
1c79356b
A
1447}
1448
1449/*
1450 * Outer subroutine:
1451 * Connect from a socket to a specified address.
1452 * Both address and port must be specified in argument sin.
1453 * If don't have a local address for this socket yet,
1454 * then pick one.
39236c6e
A
1455 *
1456 * The caller may override the bound-to-interface setting of the socket
1457 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
1c79356b
A
1458 */
1459int
316670eb 1460in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p,
39236c6e 1461 unsigned int ifscope, struct ifnet **outif)
1c79356b 1462{
39236c6e 1463 struct in_addr laddr;
316670eb 1464 struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam;
91447636 1465 struct inpcb *pcb;
1c79356b 1466 int error;
fe8ab488 1467 struct socket *so = inp->inp_socket;
1c79356b
A
1468
1469 /*
1470 * Call inner routine, to assign local interface address.
1471 */
39037602 1472 if ((error = in_pcbladdr(inp, nam, &laddr, ifscope, outif, 0)) != 0)
39236c6e 1473 return (error);
1c79356b 1474
fe8ab488 1475 socket_unlock(so, 0);
91447636 1476 pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
39236c6e 1477 inp->inp_laddr.s_addr ? inp->inp_laddr : laddr,
91447636 1478 inp->inp_lport, 0, NULL);
fe8ab488 1479 socket_lock(so, 0);
6d2010ae 1480
39236c6e
A
1481 /*
1482 * Check if the socket is still in a valid state. When we unlock this
1483 * embryonic socket, it can get aborted if another thread is closing
6d2010ae
A
1484 * the listener (radar 7947600).
1485 */
fe8ab488 1486 if ((so->so_flags & SOF_ABORTED) != 0)
39236c6e 1487 return (ECONNREFUSED);
6d2010ae 1488
91447636 1489 if (pcb != NULL) {
0b4c1975 1490 in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0);
1c79356b
A
1491 return (EADDRINUSE);
1492 }
1493 if (inp->inp_laddr.s_addr == INADDR_ANY) {
9bccf70c 1494 if (inp->inp_lport == 0) {
39236c6e 1495 error = in_pcbbind(inp, NULL, p);
9bccf70c 1496 if (error)
39236c6e 1497 return (error);
9bccf70c 1498 }
39236c6e
A
1499 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
1500 /*
1501 * Lock inversion issue, mostly with udp
1502 * multicast packets.
1503 */
fe8ab488 1504 socket_unlock(so, 0);
39236c6e 1505 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
fe8ab488 1506 socket_lock(so, 0);
91447636 1507 }
39236c6e
A
1508 inp->inp_laddr = laddr;
1509 /* no reference needed */
316670eb 1510 inp->inp_last_outifp = (outif != NULL) ? *outif : NULL;
55e303ae 1511 inp->inp_flags |= INP_INADDR_ANY;
39236c6e 1512 } else {
3e170ce0
A
1513 /*
1514 * Usage of IP_PKTINFO, without local port already
1515 * speficified will cause kernel to panic,
1516 * see rdar://problem/18508185.
1517 * For now returning error to avoid a kernel panic
1518 * This routines can be refactored and handle this better
1519 * in future.
1520 */
1521 if (inp->inp_lport == 0)
1522 return (EINVAL);
39236c6e
A
1523 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
1524 /*
1525 * Lock inversion issue, mostly with udp
1526 * multicast packets.
1527 */
fe8ab488 1528 socket_unlock(so, 0);
39236c6e 1529 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
fe8ab488 1530 socket_lock(so, 0);
91447636 1531 }
1c79356b
A
1532 }
1533 inp->inp_faddr = sin->sin_addr;
1534 inp->inp_fport = sin->sin_port;
fe8ab488
A
1535 if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP)
1536 nstat_pcb_invalidate_cache(inp);
1c79356b 1537 in_pcbrehash(inp);
39236c6e 1538 lck_rw_done(inp->inp_pcbinfo->ipi_lock);
1c79356b
A
1539 return (0);
1540}
1541
1542void
2d21ac55 1543in_pcbdisconnect(struct inpcb *inp)
1c79356b 1544{
39236c6e 1545 struct socket *so = inp->inp_socket;
1c79356b 1546
fe8ab488
A
1547 if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP)
1548 nstat_pcb_cache(inp);
1549
1c79356b
A
1550 inp->inp_faddr.s_addr = INADDR_ANY;
1551 inp->inp_fport = 0;
91447636 1552
39236c6e
A
1553 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
1554 /* lock inversion issue, mostly with udp multicast packets */
1555 socket_unlock(so, 0);
1556 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
1557 socket_lock(so, 0);
91447636
A
1558 }
1559
1c79356b 1560 in_pcbrehash(inp);
39236c6e
A
1561 lck_rw_done(inp->inp_pcbinfo->ipi_lock);
1562 /*
1563 * A multipath subflow socket would have its SS_NOFDREF set by default,
1564 * so check for SOF_MP_SUBFLOW socket flag before detaching the PCB;
1565 * when the socket is closed for real, SOF_MP_SUBFLOW would be cleared.
1566 */
1567 if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF))
1c79356b
A
1568 in_pcbdetach(inp);
1569}
1570
1571void
2d21ac55 1572in_pcbdetach(struct inpcb *inp)
1c79356b
A
1573{
1574 struct socket *so = inp->inp_socket;
1c79356b 1575
39236c6e
A
1576 if (so->so_pcb == NULL) {
1577 /* PCB has been disposed */
1578 panic("%s: inp=%p so=%p proto=%d so_pcb is null!\n", __func__,
1579 inp, so, SOCK_PROTO(so));
1580 /* NOTREACHED */
91447636 1581 }
39037602 1582
1c79356b 1583#if IPSEC
39236c6e
A
1584 if (inp->inp_sp != NULL) {
1585 (void) ipsec4_delete_pcbpolicy(inp);
91447636 1586 }
39236c6e 1587#endif /* IPSEC */
39037602 1588
fe8ab488
A
1589 /*
1590 * Let NetworkStatistics know this PCB is going away
1591 * before we detach it.
1592 */
39037602 1593 if (nstat_collect &&
fe8ab488
A
1594 (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP))
1595 nstat_pcb_detach(inp);
3e170ce0
A
1596
1597 /* Free memory buffer held for generating keep alives */
1598 if (inp->inp_keepalive_data != NULL) {
1599 FREE(inp->inp_keepalive_data, M_TEMP);
1600 inp->inp_keepalive_data = NULL;
1601 }
1602
91447636 1603 /* mark socket state as dead */
39236c6e
A
1604 if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) {
1605 panic("%s: so=%p proto=%d couldn't set to STOPUSING\n",
1606 __func__, so, SOCK_PROTO(so));
1607 /* NOTREACHED */
1608 }
1c79356b 1609
39236c6e 1610 if (!(so->so_flags & SOF_PCBCLEARING)) {
6d2010ae 1611 struct ip_moptions *imo;
2d21ac55 1612
91447636 1613 inp->inp_vflag = 0;
39236c6e
A
1614 if (inp->inp_options != NULL) {
1615 (void) m_free(inp->inp_options);
1616 inp->inp_options = NULL;
91447636 1617 }
39236c6e 1618 ROUTE_RELEASE(&inp->inp_route);
6d2010ae 1619 imo = inp->inp_moptions;
91447636
A
1620 inp->inp_moptions = NULL;
1621 sofreelastref(so, 0);
1622 inp->inp_state = INPCB_STATE_DEAD;
39236c6e
A
1623 /* makes sure we're not called twice from so_close */
1624 so->so_flags |= SOF_PCBCLEARING;
1625
1626 inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST);
39037602
A
1627
1628 /*
1629 * See inp_join_group() for why we need to unlock
1630 */
1631 if (imo != NULL) {
1632 socket_unlock(so, 0);
1633 IMO_REMREF(imo);
1634 socket_lock(so, 0);
1635 }
91447636
A
1636 }
1637}
1c79356b 1638
1c79356b 1639
39236c6e
A
1640void
1641in_pcbdispose(struct inpcb *inp)
91447636
A
1642{
1643 struct socket *so = inp->inp_socket;
1644 struct inpcbinfo *ipi = inp->inp_pcbinfo;
1645
39236c6e
A
1646 if (so != NULL && so->so_usecount != 0) {
1647 panic("%s: so %p [%d,%d] usecount %d lockhistory %s\n",
1648 __func__, so, SOCK_DOM(so), SOCK_TYPE(so), so->so_usecount,
1649 solockhistory_nr(so));
1650 /* NOTREACHED */
1651 } else if (inp->inp_wantcnt != WNT_STOPUSING) {
1652 if (so != NULL) {
1653 panic_plain("%s: inp %p invalid wantcnt %d, so %p "
1654 "[%d,%d] usecount %d retaincnt %d state 0x%x "
1655 "flags 0x%x lockhistory %s\n", __func__, inp,
1656 inp->inp_wantcnt, so, SOCK_DOM(so), SOCK_TYPE(so),
1657 so->so_usecount, so->so_retaincnt, so->so_state,
1658 so->so_flags, solockhistory_nr(so));
1659 /* NOTREACHED */
1660 } else {
1661 panic("%s: inp %p invalid wantcnt %d no socket\n",
1662 __func__, inp, inp->inp_wantcnt);
1663 /* NOTREACHED */
1664 }
91447636 1665 }
91447636 1666
39236c6e 1667 lck_rw_assert(ipi->ipi_lock, LCK_RW_ASSERT_EXCLUSIVE);
91447636
A
1668
1669 inp->inp_gencnt = ++ipi->ipi_gencnt;
316670eb 1670 /* access ipi in in_pcbremlists */
91447636 1671 in_pcbremlists(inp);
316670eb 1672
39236c6e 1673 if (so != NULL) {
91447636
A
1674 if (so->so_proto->pr_flags & PR_PCBLOCK) {
1675 sofreelastref(so, 0);
39236c6e
A
1676 if (so->so_rcv.sb_cc > 0 || so->so_snd.sb_cc > 0) {
1677 /*
1678 * selthreadclear() already called
1679 * during sofreelastref() above.
1680 */
91447636
A
1681 sbrelease(&so->so_rcv);
1682 sbrelease(&so->so_snd);
1683 }
39236c6e
A
1684 if (so->so_head != NULL) {
1685 panic("%s: so=%p head still exist\n",
1686 __func__, so);
1687 /* NOTREACHED */
1688 }
1689 lck_mtx_unlock(&inp->inpcb_mtx);
1690 lck_mtx_destroy(&inp->inpcb_mtx, ipi->ipi_lock_grp);
9bccf70c 1691 }
39236c6e
A
1692 /* makes sure we're not called twice from so_close */
1693 so->so_flags |= SOF_PCBCLEARING;
1694 so->so_saved_pcb = (caddr_t)inp;
1695 so->so_pcb = NULL;
1696 inp->inp_socket = NULL;
2d21ac55
A
1697#if CONFIG_MACF_NET
1698 mac_inpcb_label_destroy(inp);
39236c6e 1699#endif /* CONFIG_MACF_NET */
39037602
A
1700#if NECP
1701 necp_inpcb_dispose(inp);
1702#endif /* NECP */
b0d623f7
A
1703 /*
1704 * In case there a route cached after a detach (possible
1705 * in the tcp case), make sure that it is freed before
1706 * we deallocate the structure.
1707 */
39236c6e 1708 ROUTE_RELEASE(&inp->inp_route);
3e170ce0 1709 if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) {
91447636 1710 zfree(ipi->ipi_zone, inp);
55e303ae 1711 }
91447636 1712 sodealloc(so);
9bccf70c 1713 }
1c79356b
A
1714}
1715
1716/*
39236c6e 1717 * The calling convention of in_getsockaddr() and in_getpeeraddr() was
1c79356b
A
1718 * modified to match the pru_sockaddr() and pru_peeraddr() entry points
1719 * in struct pr_usrreqs, so that protocols can just reference then directly
39236c6e 1720 * without the need for a wrapper function.
1c79356b
A
1721 */
1722int
39236c6e 1723in_getsockaddr(struct socket *so, struct sockaddr **nam)
1c79356b 1724{
2d21ac55
A
1725 struct inpcb *inp;
1726 struct sockaddr_in *sin;
1c79356b
A
1727
1728 /*
1729 * Do the malloc first in case it blocks.
1730 */
39236c6e 1731 MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK);
0b4e3aa0 1732 if (sin == NULL)
39236c6e
A
1733 return (ENOBUFS);
1734 bzero(sin, sizeof (*sin));
1c79356b 1735 sin->sin_family = AF_INET;
39236c6e 1736 sin->sin_len = sizeof (*sin);
1c79356b 1737
39236c6e 1738 if ((inp = sotoinpcb(so)) == NULL) {
1c79356b 1739 FREE(sin, M_SONAME);
39236c6e 1740 return (EINVAL);
1c79356b
A
1741 }
1742 sin->sin_port = inp->inp_lport;
1743 sin->sin_addr = inp->inp_laddr;
1c79356b
A
1744
1745 *nam = (struct sockaddr *)sin;
39236c6e 1746 return (0);
1c79356b
A
1747}
1748
1749int
39236c6e 1750in_getsockaddr_s(struct socket *so, struct sockaddr_storage *ss)
1c79356b 1751{
39236c6e 1752 struct sockaddr_in *sin = SIN(ss);
1c79356b 1753 struct inpcb *inp;
1c79356b 1754
39236c6e
A
1755 VERIFY(ss != NULL);
1756 bzero(ss, sizeof (*ss));
1757
1c79356b 1758 sin->sin_family = AF_INET;
39236c6e 1759 sin->sin_len = sizeof (*sin);
1c79356b 1760
fe8ab488
A
1761 if ((inp = sotoinpcb(so)) == NULL
1762#if NECP
1763 || (necp_socket_should_use_flow_divert(inp))
1764#endif /* NECP */
1765 )
39236c6e
A
1766 return (inp == NULL ? EINVAL : EPROTOTYPE);
1767
1768 sin->sin_port = inp->inp_lport;
1769 sin->sin_addr = inp->inp_laddr;
1770 return (0);
1771}
1772
1773int
1774in_getpeeraddr(struct socket *so, struct sockaddr **nam)
1775{
1776 struct inpcb *inp;
1777 struct sockaddr_in *sin;
1778
1779 /*
1780 * Do the malloc first in case it blocks.
1781 */
1782 MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK);
1783 if (sin == NULL)
1784 return (ENOBUFS);
1785 bzero((caddr_t)sin, sizeof (*sin));
1786 sin->sin_family = AF_INET;
1787 sin->sin_len = sizeof (*sin);
1788
1789 if ((inp = sotoinpcb(so)) == NULL) {
1c79356b 1790 FREE(sin, M_SONAME);
39236c6e 1791 return (EINVAL);
1c79356b
A
1792 }
1793 sin->sin_port = inp->inp_fport;
1794 sin->sin_addr = inp->inp_faddr;
1c79356b
A
1795
1796 *nam = (struct sockaddr *)sin;
39236c6e
A
1797 return (0);
1798}
1799
1800int
1801in_getpeeraddr_s(struct socket *so, struct sockaddr_storage *ss)
1802{
1803 struct sockaddr_in *sin = SIN(ss);
1804 struct inpcb *inp;
1805
1806 VERIFY(ss != NULL);
1807 bzero(ss, sizeof (*ss));
1808
1809 sin->sin_family = AF_INET;
1810 sin->sin_len = sizeof (*sin);
1811
fe8ab488
A
1812 if ((inp = sotoinpcb(so)) == NULL
1813#if NECP
1814 || (necp_socket_should_use_flow_divert(inp))
1815#endif /* NECP */
1816 ) {
39236c6e
A
1817 return (inp == NULL ? EINVAL : EPROTOTYPE);
1818 }
1819
1820 sin->sin_port = inp->inp_fport;
1821 sin->sin_addr = inp->inp_faddr;
1822 return (0);
1c79356b
A
1823}
1824
1c79356b 1825void
2d21ac55 1826in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr,
39236c6e 1827 int errno, void (*notify)(struct inpcb *, int))
1c79356b 1828{
91447636
A
1829 struct inpcb *inp;
1830
39236c6e 1831 lck_rw_lock_shared(pcbinfo->ipi_lock);
1c79356b 1832
39236c6e 1833 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
9bccf70c 1834#if INET6
39236c6e 1835 if (!(inp->inp_vflag & INP_IPV4))
1c79356b 1836 continue;
39236c6e 1837#endif /* INET6 */
1c79356b 1838 if (inp->inp_faddr.s_addr != faddr.s_addr ||
9bccf70c 1839 inp->inp_socket == NULL)
39236c6e
A
1840 continue;
1841 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING)
91447636
A
1842 continue;
1843 socket_lock(inp->inp_socket, 1);
9bccf70c 1844 (*notify)(inp, errno);
39236c6e 1845 (void) in_pcb_checkstate(inp, WNT_RELEASE, 1);
91447636 1846 socket_unlock(inp->inp_socket, 1);
1c79356b 1847 }
39236c6e 1848 lck_rw_done(pcbinfo->ipi_lock);
1c79356b
A
1849}
1850
1851/*
1852 * Check for alternatives when higher level complains
1853 * about service problems. For now, invalidate cached
1854 * routing information. If the route was created dynamically
1855 * (by a redirect), time to try a default gateway again.
1856 */
1857void
2d21ac55 1858in_losing(struct inpcb *inp)
1c79356b 1859{
39236c6e 1860 boolean_t release = FALSE;
2d21ac55 1861 struct rtentry *rt;
1c79356b 1862
b0d623f7 1863 if ((rt = inp->inp_route.ro_rt) != NULL) {
39236c6e 1864 struct in_ifaddr *ia = NULL;
b0d623f7 1865
b0d623f7 1866 RT_LOCK(rt);
b0d623f7
A
1867 if (rt->rt_flags & RTF_DYNAMIC) {
1868 /*
1869 * Prevent another thread from modifying rt_key,
1870 * rt_gateway via rt_setgate() after rt_lock is
1871 * dropped by marking the route as defunct.
1872 */
1873 rt->rt_flags |= RTF_CONDEMNED;
1874 RT_UNLOCK(rt);
1875 (void) rtrequest(RTM_DELETE, rt_key(rt),
39236c6e 1876 rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL);
b0d623f7
A
1877 } else {
1878 RT_UNLOCK(rt);
1879 }
2d21ac55 1880 /* if the address is gone keep the old route in the pcb */
39236c6e
A
1881 if (inp->inp_laddr.s_addr != INADDR_ANY &&
1882 (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
1883 /*
1884 * Address is around; ditch the route. A new route
1885 * can be allocated the next time output is attempted.
1886 */
1887 release = TRUE;
2d21ac55 1888 }
39236c6e
A
1889 if (ia != NULL)
1890 IFA_REMREF(&ia->ia_ifa);
1c79356b 1891 }
39236c6e
A
1892 if (rt == NULL || release)
1893 ROUTE_RELEASE(&inp->inp_route);
1c79356b
A
1894}
1895
1896/*
1897 * After a routing change, flush old routing
1898 * and allocate a (hopefully) better one.
1899 */
9bccf70c 1900void
39236c6e 1901in_rtchange(struct inpcb *inp, int errno)
1c79356b 1902{
39236c6e
A
1903#pragma unused(errno)
1904 boolean_t release = FALSE;
2d21ac55
A
1905 struct rtentry *rt;
1906
1907 if ((rt = inp->inp_route.ro_rt) != NULL) {
39236c6e 1908 struct in_ifaddr *ia = NULL;
b0d623f7 1909
39236c6e
A
1910 /* if address is gone, keep the old route */
1911 if (inp->inp_laddr.s_addr != INADDR_ANY &&
1912 (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
1913 /*
1914 * Address is around; ditch the route. A new route
1915 * can be allocated the next time output is attempted.
1916 */
1917 release = TRUE;
2d21ac55 1918 }
39236c6e
A
1919 if (ia != NULL)
1920 IFA_REMREF(&ia->ia_ifa);
1c79356b 1921 }
39236c6e
A
1922 if (rt == NULL || release)
1923 ROUTE_RELEASE(&inp->inp_route);
1c79356b
A
1924}
1925
1926/*
1927 * Lookup a PCB based on the local address and port.
1928 */
1929struct inpcb *
2d21ac55 1930in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
39236c6e 1931 unsigned int lport_arg, int wild_okay)
1c79356b 1932{
2d21ac55 1933 struct inpcb *inp;
1c79356b
A
1934 int matchwild = 3, wildcard;
1935 u_short lport = lport_arg;
1936
39236c6e 1937 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0, 0, 0, 0, 0);
1c79356b
A
1938
1939 if (!wild_okay) {
1940 struct inpcbhead *head;
1941 /*
1942 * Look for an unconnected (wildcard foreign addr) PCB that
1943 * matches the local address and port we're looking for.
1944 */
39236c6e
A
1945 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
1946 pcbinfo->ipi_hashmask)];
9bccf70c
A
1947 LIST_FOREACH(inp, head, inp_hash) {
1948#if INET6
39236c6e 1949 if (!(inp->inp_vflag & INP_IPV4))
1c79356b 1950 continue;
39236c6e 1951#endif /* INET6 */
1c79356b
A
1952 if (inp->inp_faddr.s_addr == INADDR_ANY &&
1953 inp->inp_laddr.s_addr == laddr.s_addr &&
1954 inp->inp_lport == lport) {
1955 /*
1956 * Found.
1957 */
1958 return (inp);
1959 }
1960 }
1961 /*
1962 * Not found.
1963 */
39236c6e 1964 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0, 0, 0, 0, 0);
1c79356b
A
1965 return (NULL);
1966 } else {
1967 struct inpcbporthead *porthash;
1968 struct inpcbport *phd;
1969 struct inpcb *match = NULL;
1970 /*
1971 * Best fit PCB lookup.
1972 *
1973 * First see if this local port is in use by looking on the
1974 * port hash list.
1975 */
39236c6e
A
1976 porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
1977 pcbinfo->ipi_porthashmask)];
9bccf70c 1978 LIST_FOREACH(phd, porthash, phd_hash) {
1c79356b
A
1979 if (phd->phd_port == lport)
1980 break;
1981 }
1982 if (phd != NULL) {
1983 /*
1984 * Port is in use by one or more PCBs. Look for best
1985 * fit.
1986 */
9bccf70c 1987 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
1c79356b 1988 wildcard = 0;
9bccf70c 1989#if INET6
39236c6e 1990 if (!(inp->inp_vflag & INP_IPV4))
1c79356b 1991 continue;
39236c6e 1992#endif /* INET6 */
1c79356b
A
1993 if (inp->inp_faddr.s_addr != INADDR_ANY)
1994 wildcard++;
1995 if (inp->inp_laddr.s_addr != INADDR_ANY) {
1996 if (laddr.s_addr == INADDR_ANY)
1997 wildcard++;
39236c6e
A
1998 else if (inp->inp_laddr.s_addr !=
1999 laddr.s_addr)
1c79356b
A
2000 continue;
2001 } else {
2002 if (laddr.s_addr != INADDR_ANY)
2003 wildcard++;
2004 }
2005 if (wildcard < matchwild) {
2006 match = inp;
2007 matchwild = wildcard;
2008 if (matchwild == 0) {
2009 break;
2010 }
2011 }
2012 }
2013 }
39236c6e
A
2014 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match,
2015 0, 0, 0, 0);
1c79356b
A
2016 return (match);
2017 }
2018}
2019
6d2010ae
A
2020/*
2021 * Check if PCB exists in hash list.
2022 */
2023int
39236c6e
A
2024in_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2025 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
2026 uid_t *uid, gid_t *gid, struct ifnet *ifp)
6d2010ae
A
2027{
2028 struct inpcbhead *head;
2029 struct inpcb *inp;
2030 u_short fport = fport_arg, lport = lport_arg;
39236c6e
A
2031 int found = 0;
2032 struct inpcb *local_wild = NULL;
2033#if INET6
2034 struct inpcb *local_wild_mapped = NULL;
2035#endif /* INET6 */
6d2010ae
A
2036
2037 *uid = UID_MAX;
2038 *gid = GID_MAX;
316670eb 2039
6d2010ae
A
2040 /*
2041 * We may have found the pcb in the last lookup - check this first.
2042 */
2043
39236c6e 2044 lck_rw_lock_shared(pcbinfo->ipi_lock);
6d2010ae
A
2045
2046 /*
2047 * First look for an exact match.
2048 */
39236c6e
A
2049 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
2050 pcbinfo->ipi_hashmask)];
6d2010ae
A
2051 LIST_FOREACH(inp, head, inp_hash) {
2052#if INET6
39236c6e 2053 if (!(inp->inp_vflag & INP_IPV4))
6d2010ae 2054 continue;
39236c6e 2055#endif /* INET6 */
fe8ab488 2056 if (inp_restricted_recv(inp, ifp))
316670eb
A
2057 continue;
2058
6d2010ae
A
2059 if (inp->inp_faddr.s_addr == faddr.s_addr &&
2060 inp->inp_laddr.s_addr == laddr.s_addr &&
2061 inp->inp_fport == fport &&
2062 inp->inp_lport == lport) {
2063 if ((found = (inp->inp_socket != NULL))) {
2064 /*
2065 * Found.
2066 */
316670eb
A
2067 *uid = kauth_cred_getuid(
2068 inp->inp_socket->so_cred);
2069 *gid = kauth_cred_getgid(
2070 inp->inp_socket->so_cred);
6d2010ae 2071 }
39236c6e 2072 lck_rw_done(pcbinfo->ipi_lock);
6d2010ae
A
2073 return (found);
2074 }
2075 }
6d2010ae 2076
39236c6e
A
2077 if (!wildcard) {
2078 /*
2079 * Not found.
2080 */
2081 lck_rw_done(pcbinfo->ipi_lock);
2082 return (0);
2083 }
316670eb 2084
39236c6e
A
2085 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
2086 pcbinfo->ipi_hashmask)];
2087 LIST_FOREACH(inp, head, inp_hash) {
6d2010ae 2088#if INET6
39236c6e
A
2089 if (!(inp->inp_vflag & INP_IPV4))
2090 continue;
6d2010ae 2091#endif /* INET6 */
fe8ab488 2092 if (inp_restricted_recv(inp, ifp))
39236c6e
A
2093 continue;
2094
2095 if (inp->inp_faddr.s_addr == INADDR_ANY &&
2096 inp->inp_lport == lport) {
2097 if (inp->inp_laddr.s_addr == laddr.s_addr) {
2098 if ((found = (inp->inp_socket != NULL))) {
316670eb 2099 *uid = kauth_cred_getuid(
39236c6e 2100 inp->inp_socket->so_cred);
316670eb 2101 *gid = kauth_cred_getgid(
39236c6e 2102 inp->inp_socket->so_cred);
6d2010ae 2103 }
39236c6e 2104 lck_rw_done(pcbinfo->ipi_lock);
6d2010ae 2105 return (found);
39236c6e
A
2106 } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
2107#if INET6
2108 if (inp->inp_socket &&
2109 SOCK_CHECK_DOM(inp->inp_socket, PF_INET6))
2110 local_wild_mapped = inp;
2111 else
6d2010ae 2112#endif /* INET6 */
39236c6e
A
2113 local_wild = inp;
2114 }
6d2010ae 2115 }
39236c6e
A
2116 }
2117 if (local_wild == NULL) {
2118#if INET6
2119 if (local_wild_mapped != NULL) {
2120 if ((found = (local_wild_mapped->inp_socket != NULL))) {
316670eb 2121 *uid = kauth_cred_getuid(
39236c6e 2122 local_wild_mapped->inp_socket->so_cred);
316670eb 2123 *gid = kauth_cred_getgid(
39236c6e 2124 local_wild_mapped->inp_socket->so_cred);
6d2010ae 2125 }
39236c6e 2126 lck_rw_done(pcbinfo->ipi_lock);
6d2010ae
A
2127 return (found);
2128 }
39236c6e
A
2129#endif /* INET6 */
2130 lck_rw_done(pcbinfo->ipi_lock);
2131 return (0);
6d2010ae 2132 }
39236c6e
A
2133 if ((found = (local_wild->inp_socket != NULL))) {
2134 *uid = kauth_cred_getuid(
2135 local_wild->inp_socket->so_cred);
2136 *gid = kauth_cred_getgid(
2137 local_wild->inp_socket->so_cred);
2138 }
2139 lck_rw_done(pcbinfo->ipi_lock);
2140 return (found);
6d2010ae
A
2141}
2142
1c79356b
A
2143/*
2144 * Lookup PCB in hash list.
2145 */
2146struct inpcb *
39236c6e
A
2147in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2148 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
2149 struct ifnet *ifp)
1c79356b
A
2150{
2151 struct inpcbhead *head;
2d21ac55 2152 struct inpcb *inp;
1c79356b 2153 u_short fport = fport_arg, lport = lport_arg;
39236c6e
A
2154 struct inpcb *local_wild = NULL;
2155#if INET6
2156 struct inpcb *local_wild_mapped = NULL;
2157#endif /* INET6 */
1c79356b
A
2158
2159 /*
2160 * We may have found the pcb in the last lookup - check this first.
2161 */
2162
39236c6e 2163 lck_rw_lock_shared(pcbinfo->ipi_lock);
1c79356b
A
2164
2165 /*
2166 * First look for an exact match.
2167 */
39236c6e
A
2168 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
2169 pcbinfo->ipi_hashmask)];
9bccf70c
A
2170 LIST_FOREACH(inp, head, inp_hash) {
2171#if INET6
39236c6e 2172 if (!(inp->inp_vflag & INP_IPV4))
1c79356b 2173 continue;
39236c6e 2174#endif /* INET6 */
fe8ab488 2175 if (inp_restricted_recv(inp, ifp))
316670eb
A
2176 continue;
2177
1c79356b
A
2178 if (inp->inp_faddr.s_addr == faddr.s_addr &&
2179 inp->inp_laddr.s_addr == laddr.s_addr &&
2180 inp->inp_fport == fport &&
2181 inp->inp_lport == lport) {
2182 /*
2183 * Found.
2184 */
39236c6e
A
2185 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
2186 WNT_STOPUSING) {
2187 lck_rw_done(pcbinfo->ipi_lock);
91447636 2188 return (inp);
39236c6e
A
2189 } else {
2190 /* it's there but dead, say it isn't found */
2191 lck_rw_done(pcbinfo->ipi_lock);
316670eb 2192 return (NULL);
91447636 2193 }
1c79356b
A
2194 }
2195 }
1c79356b 2196
39236c6e
A
2197 if (!wildcard) {
2198 /*
2199 * Not found.
2200 */
2201 lck_rw_done(pcbinfo->ipi_lock);
2202 return (NULL);
2203 }
2204
2205 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
2206 pcbinfo->ipi_hashmask)];
2207 LIST_FOREACH(inp, head, inp_hash) {
9bccf70c 2208#if INET6
39236c6e
A
2209 if (!(inp->inp_vflag & INP_IPV4))
2210 continue;
2211#endif /* INET6 */
fe8ab488 2212 if (inp_restricted_recv(inp, ifp))
39236c6e
A
2213 continue;
2214
2215 if (inp->inp_faddr.s_addr == INADDR_ANY &&
2216 inp->inp_lport == lport) {
2217 if (inp->inp_laddr.s_addr == laddr.s_addr) {
2218 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
2219 WNT_STOPUSING) {
2220 lck_rw_done(pcbinfo->ipi_lock);
2221 return (inp);
2222 } else {
2223 /* it's dead; say it isn't found */
2224 lck_rw_done(pcbinfo->ipi_lock);
2225 return (NULL);
91447636 2226 }
39236c6e 2227 } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
2d21ac55 2228#if INET6
39236c6e
A
2229 if (SOCK_CHECK_DOM(inp->inp_socket, PF_INET6))
2230 local_wild_mapped = inp;
2231 else
2d21ac55 2232#endif /* INET6 */
1c79356b 2233 local_wild = inp;
1c79356b
A
2234 }
2235 }
39236c6e
A
2236 }
2237 if (local_wild == NULL) {
2d21ac55 2238#if INET6
39236c6e
A
2239 if (local_wild_mapped != NULL) {
2240 if (in_pcb_checkstate(local_wild_mapped,
2241 WNT_ACQUIRE, 0) != WNT_STOPUSING) {
2242 lck_rw_done(pcbinfo->ipi_lock);
2243 return (local_wild_mapped);
2244 } else {
2245 /* it's dead; say it isn't found */
2246 lck_rw_done(pcbinfo->ipi_lock);
2247 return (NULL);
91447636 2248 }
91447636 2249 }
39236c6e
A
2250#endif /* INET6 */
2251 lck_rw_done(pcbinfo->ipi_lock);
2252 return (NULL);
2253 }
2254 if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
2255 lck_rw_done(pcbinfo->ipi_lock);
2256 return (local_wild);
1c79356b 2257 }
1c79356b 2258 /*
39236c6e 2259 * It's either not found or is already dead.
1c79356b 2260 */
39236c6e 2261 lck_rw_done(pcbinfo->ipi_lock);
1c79356b
A
2262 return (NULL);
2263}
2264
2265/*
4bd07ac2
A
2266 * @brief Insert PCB onto various hash lists.
2267 *
2268 * @param inp Pointer to internet protocol control block
2269 * @param locked Implies if ipi_lock (protecting pcb list)
2270 * is already locked or not.
2271 *
2272 * @return int error on failure and 0 on success
1c79356b
A
2273 */
2274int
2d21ac55 2275in_pcbinshash(struct inpcb *inp, int locked)
1c79356b
A
2276{
2277 struct inpcbhead *pcbhash;
2278 struct inpcbporthead *pcbporthash;
2279 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
2280 struct inpcbport *phd;
2281 u_int32_t hashkey_faddr;
2282
39236c6e
A
2283 if (!locked) {
2284 if (!lck_rw_try_lock_exclusive(pcbinfo->ipi_lock)) {
2285 /*
2286 * Lock inversion issue, mostly with udp
2287 * multicast packets
2288 */
2289 socket_unlock(inp->inp_socket, 0);
2290 lck_rw_lock_exclusive(pcbinfo->ipi_lock);
2291 socket_lock(inp->inp_socket, 0);
39236c6e
A
2292 }
2293 }
b0d623f7 2294
4bd07ac2
A
2295 /*
2296 * This routine or its caller may have given up
2297 * socket's protocol lock briefly.
2298 * During that time the socket may have been dropped.
2299 * Safe-guarding against that.
2300 */
2301 if (inp->inp_state == INPCB_STATE_DEAD) {
2302 if (!locked) {
2303 lck_rw_done(pcbinfo->ipi_lock);
2304 }
2305 return (ECONNABORTED);
2306 }
2307
2308
1c79356b
A
2309#if INET6
2310 if (inp->inp_vflag & INP_IPV6)
2311 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
2312 else
2313#endif /* INET6 */
39236c6e 2314 hashkey_faddr = inp->inp_faddr.s_addr;
1c79356b 2315
39236c6e
A
2316 inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
2317 inp->inp_fport, pcbinfo->ipi_hashmask);
91447636 2318
39236c6e 2319 pcbhash = &pcbinfo->ipi_hashbase[inp->inp_hash_element];
1c79356b 2320
39236c6e
A
2321 pcbporthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(inp->inp_lport,
2322 pcbinfo->ipi_porthashmask)];
1c79356b
A
2323
2324 /*
2325 * Go through port list and look for a head for this lport.
2326 */
9bccf70c 2327 LIST_FOREACH(phd, pcbporthash, phd_hash) {
1c79356b
A
2328 if (phd->phd_port == inp->inp_lport)
2329 break;
2330 }
316670eb 2331
1c79356b
A
2332 /*
2333 * If none exists, malloc one and tack it on.
2334 */
2335 if (phd == NULL) {
39236c6e
A
2336 MALLOC(phd, struct inpcbport *, sizeof (struct inpcbport),
2337 M_PCB, M_WAITOK);
1c79356b 2338 if (phd == NULL) {
91447636 2339 if (!locked)
39236c6e 2340 lck_rw_done(pcbinfo->ipi_lock);
1c79356b
A
2341 return (ENOBUFS); /* XXX */
2342 }
2343 phd->phd_port = inp->inp_lport;
2344 LIST_INIT(&phd->phd_pcblist);
2345 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
2346 }
fe8ab488
A
2347
2348 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
1c79356b
A
2349 inp->inp_phd = phd;
2350 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
2351 LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
fe8ab488
A
2352 inp->inp_flags2 |= INP2_INHASHLIST;
2353
91447636 2354 if (!locked)
39236c6e 2355 lck_rw_done(pcbinfo->ipi_lock);
39037602 2356
fe8ab488
A
2357#if NECP
2358 // This call catches the original setting of the local address
2359 inp_update_necp_policy(inp, NULL, NULL, 0);
2360#endif /* NECP */
39037602 2361
1c79356b
A
2362 return (0);
2363}
2364
2365/*
2366 * Move PCB to the proper hash bucket when { faddr, fport } have been
2367 * changed. NOTE: This does not handle the case of the lport changing (the
2368 * hashed port list would have to be updated as well), so the lport must
2369 * not change after in_pcbinshash() has been called.
2370 */
2371void
2d21ac55 2372in_pcbrehash(struct inpcb *inp)
1c79356b
A
2373{
2374 struct inpcbhead *head;
2375 u_int32_t hashkey_faddr;
2376
2377#if INET6
2378 if (inp->inp_vflag & INP_IPV6)
2379 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
2380 else
2381#endif /* INET6 */
39236c6e
A
2382 hashkey_faddr = inp->inp_faddr.s_addr;
2383
2384 inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
2385 inp->inp_fport, inp->inp_pcbinfo->ipi_hashmask);
2386 head = &inp->inp_pcbinfo->ipi_hashbase[inp->inp_hash_element];
1c79356b 2387
fe8ab488
A
2388 if (inp->inp_flags2 & INP2_INHASHLIST) {
2389 LIST_REMOVE(inp, inp_hash);
2390 inp->inp_flags2 &= ~INP2_INHASHLIST;
2391 }
2392
2393 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
1c79356b 2394 LIST_INSERT_HEAD(head, inp, inp_hash);
fe8ab488 2395 inp->inp_flags2 |= INP2_INHASHLIST;
39037602 2396
fe8ab488
A
2397#if NECP
2398 // This call catches updates to the remote addresses
2399 inp_update_necp_policy(inp, NULL, NULL, 0);
2400#endif /* NECP */
1c79356b
A
2401}
2402
2403/*
2404 * Remove PCB from various lists.
316670eb 2405 * Must be called pcbinfo lock is held in exclusive mode.
1c79356b
A
2406 */
2407void
2d21ac55 2408in_pcbremlists(struct inpcb *inp)
1c79356b
A
2409{
2410 inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
1c79356b 2411
fe8ab488
A
2412 /*
2413 * Check if it's in hashlist -- an inp is placed in hashlist when
39037602 2414 * it's local port gets assigned. So it should also be present
fe8ab488
A
2415 * in the port list.
2416 */
2417 if (inp->inp_flags2 & INP2_INHASHLIST) {
1c79356b
A
2418 struct inpcbport *phd = inp->inp_phd;
2419
fe8ab488
A
2420 VERIFY(phd != NULL && inp->inp_lport > 0);
2421
1c79356b 2422 LIST_REMOVE(inp, inp_hash);
fe8ab488
A
2423 inp->inp_hash.le_next = NULL;
2424 inp->inp_hash.le_prev = NULL;
2425
1c79356b 2426 LIST_REMOVE(inp, inp_portlist);
fe8ab488
A
2427 inp->inp_portlist.le_next = NULL;
2428 inp->inp_portlist.le_prev = NULL;
2429 if (LIST_EMPTY(&phd->phd_pcblist)) {
1c79356b
A
2430 LIST_REMOVE(phd, phd_hash);
2431 FREE(phd, M_PCB);
2432 }
fe8ab488
A
2433 inp->inp_phd = NULL;
2434 inp->inp_flags2 &= ~INP2_INHASHLIST;
1c79356b 2435 }
fe8ab488 2436 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
39236c6e
A
2437
2438 if (inp->inp_flags2 & INP2_TIMEWAIT) {
2439 /* Remove from time-wait queue */
2440 tcp_remove_from_time_wait(inp);
2441 inp->inp_flags2 &= ~INP2_TIMEWAIT;
2442 VERIFY(inp->inp_pcbinfo->ipi_twcount != 0);
2443 inp->inp_pcbinfo->ipi_twcount--;
2444 } else {
2445 /* Remove from global inp list if it is not time-wait */
2446 LIST_REMOVE(inp, inp_list);
2447 }
316670eb 2448
bd504ef0 2449 if (inp->inp_flags2 & INP2_IN_FCTREE) {
39236c6e 2450 inp_fc_getinp(inp->inp_flowhash, (INPFC_SOLOCKED|INPFC_REMOVE));
bd504ef0
A
2451 VERIFY(!(inp->inp_flags2 & INP2_IN_FCTREE));
2452 }
39236c6e 2453
1c79356b
A
2454 inp->inp_pcbinfo->ipi_count--;
2455}
2456
39236c6e
A
2457/*
2458 * Mechanism used to defer the memory release of PCBs
2459 * The pcb list will contain the pcb until the reaper can clean it up if
2460 * the following conditions are met:
2461 * 1) state "DEAD",
2462 * 2) wantcnt is STOPUSING
2463 * 3) usecount is 0
91447636 2464 * This function will be called to either mark the pcb as
39236c6e 2465 */
91447636
A
2466int
2467in_pcb_checkstate(struct inpcb *pcb, int mode, int locked)
91447636 2468{
39236c6e 2469 volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt;
2d21ac55
A
2470 UInt32 origwant;
2471 UInt32 newwant;
91447636
A
2472
2473 switch (mode) {
39236c6e
A
2474 case WNT_STOPUSING:
2475 /*
2476 * Try to mark the pcb as ready for recycling. CAS with
2477 * STOPUSING, if success we're good, if it's in use, will
2478 * be marked later
2479 */
2480 if (locked == 0)
2481 socket_lock(pcb->inp_socket, 1);
2482 pcb->inp_state = INPCB_STATE_DEAD;
91447636 2483
39236c6e
A
2484stopusing:
2485 if (pcb->inp_socket->so_usecount < 0) {
2486 panic("%s: pcb=%p so=%p usecount is negative\n",
2487 __func__, pcb, pcb->inp_socket);
2488 /* NOTREACHED */
2489 }
2490 if (locked == 0)
2491 socket_unlock(pcb->inp_socket, 1);
91447636 2492
39236c6e 2493 inpcb_gc_sched(pcb->inp_pcbinfo, INPCB_TIMER_FAST);
6d2010ae 2494
39236c6e
A
2495 origwant = *wantcnt;
2496 if ((UInt16) origwant == 0xffff) /* should stop using */
2497 return (WNT_STOPUSING);
2498 newwant = 0xffff;
2499 if ((UInt16) origwant == 0) {
2500 /* try to mark it as unsuable now */
2501 OSCompareAndSwap(origwant, newwant, wantcnt);
2502 }
2503 return (WNT_STOPUSING);
91447636 2504
39236c6e
A
2505 case WNT_ACQUIRE:
2506 /*
2507 * Try to increase reference to pcb. If WNT_STOPUSING
2508 * should bail out. If socket state DEAD, try to set count
2509 * to STOPUSING, return failed otherwise increase cnt.
2510 */
2511 do {
91447636 2512 origwant = *wantcnt;
39236c6e
A
2513 if ((UInt16) origwant == 0xffff) {
2514 /* should stop using */
91447636 2515 return (WNT_STOPUSING);
91447636 2516 }
39236c6e
A
2517 newwant = origwant + 1;
2518 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
2519 return (WNT_ACQUIRE);
91447636 2520
39236c6e
A
2521 case WNT_RELEASE:
2522 /*
2523 * Release reference. If result is null and pcb state
2524 * is DEAD, set wanted bit to STOPUSING
2525 */
2526 if (locked == 0)
2527 socket_lock(pcb->inp_socket, 1);
91447636 2528
39236c6e
A
2529 do {
2530 origwant = *wantcnt;
2531 if ((UInt16) origwant == 0x0) {
2532 panic("%s: pcb=%p release with zero count",
2533 __func__, pcb);
2534 /* NOTREACHED */
2535 }
2536 if ((UInt16) origwant == 0xffff) {
2537 /* should stop using */
2538 if (locked == 0)
2539 socket_unlock(pcb->inp_socket, 1);
2540 return (WNT_STOPUSING);
2541 }
2542 newwant = origwant - 1;
2543 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
2544
2545 if (pcb->inp_state == INPCB_STATE_DEAD)
2546 goto stopusing;
2547 if (pcb->inp_socket->so_usecount < 0) {
2548 panic("%s: RELEASE pcb=%p so=%p usecount is negative\n",
2549 __func__, pcb, pcb->inp_socket);
2550 /* NOTREACHED */
2551 }
91447636 2552
39236c6e
A
2553 if (locked == 0)
2554 socket_unlock(pcb->inp_socket, 1);
2555 return (WNT_RELEASE);
91447636 2556
39236c6e
A
2557 default:
2558 panic("%s: so=%p not a valid state =%x\n", __func__,
2559 pcb->inp_socket, mode);
2560 /* NOTREACHED */
91447636
A
2561 }
2562
2563 /* NOTREACHED */
2564 return (mode);
2565}
2566
2567/*
2568 * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat.
2569 * The inpcb_compat data structure is passed to user space and must
b0d623f7 2570 * not change. We intentionally avoid copying pointers.
91447636
A
2571 */
2572void
39236c6e 2573inpcb_to_compat(struct inpcb *inp, struct inpcb_compat *inp_compat)
91447636 2574{
39236c6e 2575 bzero(inp_compat, sizeof (*inp_compat));
91447636
A
2576 inp_compat->inp_fport = inp->inp_fport;
2577 inp_compat->inp_lport = inp->inp_lport;
316670eb 2578 inp_compat->nat_owner = 0;
39236c6e 2579 inp_compat->nat_cookie = 0;
91447636
A
2580 inp_compat->inp_gencnt = inp->inp_gencnt;
2581 inp_compat->inp_flags = inp->inp_flags;
2582 inp_compat->inp_flow = inp->inp_flow;
2583 inp_compat->inp_vflag = inp->inp_vflag;
2584 inp_compat->inp_ip_ttl = inp->inp_ip_ttl;
2585 inp_compat->inp_ip_p = inp->inp_ip_p;
39236c6e
A
2586 inp_compat->inp_dependfaddr.inp6_foreign =
2587 inp->inp_dependfaddr.inp6_foreign;
2588 inp_compat->inp_dependladdr.inp6_local =
2589 inp->inp_dependladdr.inp6_local;
91447636 2590 inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
39236c6e 2591 inp_compat->inp_depend6.inp6_hlim = 0;
91447636 2592 inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
39236c6e 2593 inp_compat->inp_depend6.inp6_ifindex = 0;
91447636
A
2594 inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
2595}
9bccf70c 2596
b0d623f7 2597void
39236c6e 2598inpcb_to_xinpcb64(struct inpcb *inp, struct xinpcb64 *xinp)
b0d623f7 2599{
6d2010ae
A
2600 xinp->inp_fport = inp->inp_fport;
2601 xinp->inp_lport = inp->inp_lport;
2602 xinp->inp_gencnt = inp->inp_gencnt;
2603 xinp->inp_flags = inp->inp_flags;
2604 xinp->inp_flow = inp->inp_flow;
2605 xinp->inp_vflag = inp->inp_vflag;
2606 xinp->inp_ip_ttl = inp->inp_ip_ttl;
2607 xinp->inp_ip_p = inp->inp_ip_p;
2608 xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign;
2609 xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local;
2610 xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
39236c6e 2611 xinp->inp_depend6.inp6_hlim = 0;
6d2010ae 2612 xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
39236c6e 2613 xinp->inp_depend6.inp6_ifindex = 0;
6d2010ae 2614 xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
b0d623f7
A
2615}
2616
b0d623f7
A
2617/*
2618 * The following routines implement this scheme:
2619 *
2620 * Callers of ip_output() that intend to cache the route in the inpcb pass
2621 * a local copy of the struct route to ip_output(). Using a local copy of
2622 * the cached route significantly simplifies things as IP no longer has to
2623 * worry about having exclusive access to the passed in struct route, since
2624 * it's defined in the caller's stack; in essence, this allows for a lock-
2625 * less operation when updating the struct route at the IP level and below,
2626 * whenever necessary. The scheme works as follows:
2627 *
2628 * Prior to dropping the socket's lock and calling ip_output(), the caller
2629 * copies the struct route from the inpcb into its stack, and adds a reference
2630 * to the cached route entry, if there was any. The socket's lock is then
2631 * dropped and ip_output() is called with a pointer to the copy of struct
2632 * route defined on the stack (not to the one in the inpcb.)
2633 *
2634 * Upon returning from ip_output(), the caller then acquires the socket's
2635 * lock and synchronizes the cache; if there is no route cached in the inpcb,
2636 * it copies the local copy of struct route (which may or may not contain any
2637 * route) back into the cache; otherwise, if the inpcb has a route cached in
2638 * it, the one in the local copy will be freed, if there's any. Trashing the
2639 * cached route in the inpcb can be avoided because ip_output() is single-
2640 * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized
2641 * by the socket/transport layer.)
2642 */
2643void
2644inp_route_copyout(struct inpcb *inp, struct route *dst)
2645{
2646 struct route *src = &inp->inp_route;
2647
6d2010ae 2648 lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
b0d623f7 2649
0b4c1975 2650 /*
39236c6e 2651 * If the route in the PCB is stale or not for IPv4, blow it away;
0b4c1975
A
2652 * this is possible in the case of IPv4-mapped address case.
2653 */
39236c6e
A
2654 if (ROUTE_UNUSABLE(src) || rt_key(src->ro_rt)->sa_family != AF_INET)
2655 ROUTE_RELEASE(src);
316670eb 2656
39236c6e 2657 route_copyout(dst, src, sizeof (*dst));
b0d623f7
A
2658}
2659
2660void
2661inp_route_copyin(struct inpcb *inp, struct route *src)
2662{
2663 struct route *dst = &inp->inp_route;
2664
6d2010ae 2665 lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
b0d623f7
A
2666
2667 /* Minor sanity check */
2668 if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET)
2669 panic("%s: wrong or corrupted route: %p", __func__, src);
2670
39236c6e 2671 route_copyin(src, dst, sizeof (*src));
6d2010ae
A
2672}
2673
2674/*
39037602 2675 * Handler for setting IP_BOUND_IF/IPV6_BOUND_IF socket option.
6d2010ae 2676 */
316670eb 2677int
39236c6e 2678inp_bindif(struct inpcb *inp, unsigned int ifscope, struct ifnet **pifp)
6d2010ae 2679{
316670eb
A
2680 struct ifnet *ifp = NULL;
2681
2682 ifnet_head_lock_shared();
2683 if ((ifscope > (unsigned)if_index) || (ifscope != IFSCOPE_NONE &&
2684 (ifp = ifindex2ifnet[ifscope]) == NULL)) {
2685 ifnet_head_done();
2686 return (ENXIO);
2687 }
2688 ifnet_head_done();
2689
2690 VERIFY(ifp != NULL || ifscope == IFSCOPE_NONE);
2691
6d2010ae
A
2692 /*
2693 * A zero interface scope value indicates an "unbind".
2694 * Otherwise, take in whatever value the app desires;
2695 * the app may already know the scope (or force itself
2696 * to such a scope) ahead of time before the interface
2697 * gets attached. It doesn't matter either way; any
2698 * route lookup from this point on will require an
2699 * exact match for the embedded interface scope.
2700 */
316670eb
A
2701 inp->inp_boundifp = ifp;
2702 if (inp->inp_boundifp == NULL)
6d2010ae
A
2703 inp->inp_flags &= ~INP_BOUND_IF;
2704 else
2705 inp->inp_flags |= INP_BOUND_IF;
2706
2707 /* Blow away any cached route in the PCB */
39236c6e
A
2708 ROUTE_RELEASE(&inp->inp_route);
2709
2710 if (pifp != NULL)
2711 *pifp = ifp;
316670eb
A
2712
2713 return (0);
6d2010ae
A
2714}
2715
2716/*
39236c6e
A
2717 * Handler for setting IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
2718 * as well as for setting PROC_UUID_NO_CELLULAR policy.
6d2010ae 2719 */
39236c6e
A
2720void
2721inp_set_nocellular(struct inpcb *inp)
6d2010ae 2722{
39236c6e 2723 inp->inp_flags |= INP_NO_IFT_CELLULAR;
6d2010ae
A
2724
2725 /* Blow away any cached route in the PCB */
39236c6e
A
2726 ROUTE_RELEASE(&inp->inp_route);
2727}
2728
2729/*
2730 * Handler for clearing IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
2731 * as well as for clearing PROC_UUID_NO_CELLULAR policy.
2732 */
2733void
2734inp_clear_nocellular(struct inpcb *inp)
2735{
2736 struct socket *so = inp->inp_socket;
2737
2738 /*
2739 * SO_RESTRICT_DENY_CELLULAR socket restriction issued on the socket
2740 * has a higher precendence than INP_NO_IFT_CELLULAR. Clear the flag
2741 * if and only if the socket is unrestricted.
2742 */
2743 if (so != NULL && !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) {
2744 inp->inp_flags &= ~INP_NO_IFT_CELLULAR;
2745
2746 /* Blow away any cached route in the PCB */
2747 ROUTE_RELEASE(&inp->inp_route);
6d2010ae 2748 }
39236c6e 2749}
6d2010ae 2750
fe8ab488
A
2751void
2752inp_set_noexpensive(struct inpcb *inp)
2753{
2754 inp->inp_flags2 |= INP2_NO_IFF_EXPENSIVE;
2755
2756 /* Blow away any cached route in the PCB */
2757 ROUTE_RELEASE(&inp->inp_route);
2758}
2759
2760void
2761inp_set_awdl_unrestricted(struct inpcb *inp)
2762{
2763 inp->inp_flags2 |= INP2_AWDL_UNRESTRICTED;
2764
2765 /* Blow away any cached route in the PCB */
2766 ROUTE_RELEASE(&inp->inp_route);
2767}
2768
2769boolean_t
2770inp_get_awdl_unrestricted(struct inpcb *inp)
2771{
2772 return (inp->inp_flags2 & INP2_AWDL_UNRESTRICTED) ? TRUE : FALSE;
2773}
2774
2775void
2776inp_clear_awdl_unrestricted(struct inpcb *inp)
2777{
2778 inp->inp_flags2 &= ~INP2_AWDL_UNRESTRICTED;
2779
2780 /* Blow away any cached route in the PCB */
2781 ROUTE_RELEASE(&inp->inp_route);
2782}
2783
39037602
A
2784void
2785inp_set_intcoproc_allowed(struct inpcb *inp)
2786{
2787 inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED;
2788
2789 /* Blow away any cached route in the PCB */
2790 ROUTE_RELEASE(&inp->inp_route);
2791}
2792
2793boolean_t
2794inp_get_intcoproc_allowed(struct inpcb *inp)
2795{
2796 return (inp->inp_flags2 & INP2_INTCOPROC_ALLOWED) ? TRUE : FALSE;
2797}
2798
2799void
2800inp_clear_intcoproc_allowed(struct inpcb *inp)
2801{
2802 inp->inp_flags2 &= ~INP2_INTCOPROC_ALLOWED;
2803
2804 /* Blow away any cached route in the PCB */
2805 ROUTE_RELEASE(&inp->inp_route);
2806}
2807
fe8ab488 2808#if NECP
39236c6e 2809/*
fe8ab488 2810 * Called when PROC_UUID_NECP_APP_POLICY is set.
39236c6e
A
2811 */
2812void
fe8ab488 2813inp_set_want_app_policy(struct inpcb *inp)
39236c6e 2814{
fe8ab488 2815 inp->inp_flags2 |= INP2_WANT_APP_POLICY;
39236c6e
A
2816}
2817
2818/*
fe8ab488 2819 * Called when PROC_UUID_NECP_APP_POLICY is cleared.
39236c6e
A
2820 */
2821void
fe8ab488 2822inp_clear_want_app_policy(struct inpcb *inp)
39236c6e 2823{
fe8ab488 2824 inp->inp_flags2 &= ~INP2_WANT_APP_POLICY;
b0d623f7 2825}
fe8ab488 2826#endif /* NECP */
316670eb
A
2827
2828/*
2829 * Calculate flow hash for an inp, used by an interface to identify a
2830 * flow. When an interface provides flow control advisory, this flow
2831 * hash is used as an identifier.
2832 */
2833u_int32_t
2834inp_calc_flowhash(struct inpcb *inp)
2835{
2836 struct inp_flowhash_key fh __attribute__((aligned(8)));
2837 u_int32_t flowhash = 0;
bd504ef0 2838 struct inpcb *tmp_inp = NULL;
316670eb
A
2839
2840 if (inp_hash_seed == 0)
2841 inp_hash_seed = RandomULong();
2842
2843 bzero(&fh, sizeof (fh));
2844
2845 bcopy(&inp->inp_dependladdr, &fh.infh_laddr, sizeof (fh.infh_laddr));
2846 bcopy(&inp->inp_dependfaddr, &fh.infh_faddr, sizeof (fh.infh_faddr));
2847
2848 fh.infh_lport = inp->inp_lport;
2849 fh.infh_fport = inp->inp_fport;
2850 fh.infh_af = (inp->inp_vflag & INP_IPV6) ? AF_INET6 : AF_INET;
2851 fh.infh_proto = inp->inp_ip_p;
2852 fh.infh_rand1 = RandomULong();
2853 fh.infh_rand2 = RandomULong();
2854
2855try_again:
2856 flowhash = net_flowhash(&fh, sizeof (fh), inp_hash_seed);
2857 if (flowhash == 0) {
2858 /* try to get a non-zero flowhash */
2859 inp_hash_seed = RandomULong();
2860 goto try_again;
2861 }
2862
bd504ef0 2863 inp->inp_flowhash = flowhash;
316670eb 2864
bd504ef0 2865 /* Insert the inp into inp_fc_tree */
39236c6e 2866 lck_mtx_lock_spin(&inp_fc_lck);
bd504ef0
A
2867 tmp_inp = RB_FIND(inp_fc_tree, &inp_fc_tree, inp);
2868 if (tmp_inp != NULL) {
316670eb 2869 /*
bd504ef0
A
2870 * There is a different inp with the same flowhash.
2871 * There can be a collision on flow hash but the
39236c6e 2872 * probability is low. Let's recompute the
bd504ef0 2873 * flowhash.
316670eb
A
2874 */
2875 lck_mtx_unlock(&inp_fc_lck);
bd504ef0
A
2876 /* recompute hash seed */
2877 inp_hash_seed = RandomULong();
2878 goto try_again;
316670eb 2879 }
39236c6e 2880
bd504ef0
A
2881 RB_INSERT(inp_fc_tree, &inp_fc_tree, inp);
2882 inp->inp_flags2 |= INP2_IN_FCTREE;
316670eb 2883 lck_mtx_unlock(&inp_fc_lck);
bd504ef0 2884
39236c6e
A
2885 return (flowhash);
2886}
2887
2888void
2889inp_flowadv(uint32_t flowhash)
2890{
2891 struct inpcb *inp;
2892
2893 inp = inp_fc_getinp(flowhash, 0);
2894
2895 if (inp == NULL)
2896 return;
2897 inp_fc_feedback(inp);
316670eb
A
2898}
2899
bd504ef0
A
2900/*
2901 * Function to compare inp_fc_entries in inp flow control tree
2902 */
2903static inline int
2904infc_cmp(const struct inpcb *inp1, const struct inpcb *inp2)
316670eb 2905{
bd504ef0 2906 return (memcmp(&(inp1->inp_flowhash), &(inp2->inp_flowhash),
39236c6e 2907 sizeof(inp1->inp_flowhash)));
bd504ef0 2908}
316670eb 2909
39236c6e 2910static struct inpcb *
bd504ef0
A
2911inp_fc_getinp(u_int32_t flowhash, u_int32_t flags)
2912{
2913 struct inpcb *inp = NULL;
2914 int locked = (flags & INPFC_SOLOCKED) ? 1 : 0;
316670eb
A
2915
2916 lck_mtx_lock_spin(&inp_fc_lck);
bd504ef0
A
2917 key_inp.inp_flowhash = flowhash;
2918 inp = RB_FIND(inp_fc_tree, &inp_fc_tree, &key_inp);
2919 if (inp == NULL) {
316670eb
A
2920 /* inp is not present, return */
2921 lck_mtx_unlock(&inp_fc_lck);
2922 return (NULL);
2923 }
2924
bd504ef0
A
2925 if (flags & INPFC_REMOVE) {
2926 RB_REMOVE(inp_fc_tree, &inp_fc_tree, inp);
2927 lck_mtx_unlock(&inp_fc_lck);
316670eb 2928
bd504ef0
A
2929 bzero(&(inp->infc_link), sizeof (inp->infc_link));
2930 inp->inp_flags2 &= ~INP2_IN_FCTREE;
2931 return (NULL);
316670eb 2932 }
39236c6e 2933
bd504ef0
A
2934 if (in_pcb_checkstate(inp, WNT_ACQUIRE, locked) == WNT_STOPUSING)
2935 inp = NULL;
316670eb
A
2936 lck_mtx_unlock(&inp_fc_lck);
2937
bd504ef0 2938 return (inp);
316670eb
A
2939}
2940
39236c6e 2941static void
316670eb
A
2942inp_fc_feedback(struct inpcb *inp)
2943{
2944 struct socket *so = inp->inp_socket;
2945
2946 /* we already hold a want_cnt on this inp, socket can't be null */
39236c6e 2947 VERIFY(so != NULL);
316670eb
A
2948 socket_lock(so, 1);
2949
2950 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
2951 socket_unlock(so, 1);
2952 return;
2953 }
2954
fe8ab488
A
2955 if (inp->inp_sndinprog_cnt > 0)
2956 inp->inp_flags |= INP_FC_FEEDBACK;
2957
316670eb
A
2958 /*
2959 * Return if the connection is not in flow-controlled state.
2960 * This can happen if the connection experienced
2961 * loss while it was in flow controlled state
2962 */
2963 if (!INP_WAIT_FOR_IF_FEEDBACK(inp)) {
2964 socket_unlock(so, 1);
2965 return;
2966 }
2967 inp_reset_fc_state(inp);
2968
39236c6e 2969 if (SOCK_TYPE(so) == SOCK_STREAM)
316670eb
A
2970 inp_fc_unthrottle_tcp(inp);
2971
2972 socket_unlock(so, 1);
2973}
2974
2975void
2976inp_reset_fc_state(struct inpcb *inp)
2977{
2978 struct socket *so = inp->inp_socket;
2979 int suspended = (INP_IS_FLOW_SUSPENDED(inp)) ? 1 : 0;
2980 int needwakeup = (INP_WAIT_FOR_IF_FEEDBACK(inp)) ? 1 : 0;
2981
2982 inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
2983
2984 if (suspended) {
2985 so->so_flags &= ~(SOF_SUSPENDED);
2986 soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_RESUME));
2987 }
2988
316670eb
A
2989 /* Give a write wakeup to unblock the socket */
2990 if (needwakeup)
2991 sowwakeup(so);
2992}
2993
2994int
2995inp_set_fc_state(struct inpcb *inp, int advcode)
2996{
bd504ef0 2997 struct inpcb *tmp_inp = NULL;
316670eb 2998 /*
39236c6e 2999 * If there was a feedback from the interface when
316670eb
A
3000 * send operation was in progress, we should ignore
3001 * this flow advisory to avoid a race between setting
3002 * flow controlled state and receiving feedback from
3003 * the interface
3004 */
3005 if (inp->inp_flags & INP_FC_FEEDBACK)
39236c6e 3006 return (0);
316670eb
A
3007
3008 inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
39236c6e
A
3009 if ((tmp_inp = inp_fc_getinp(inp->inp_flowhash,
3010 INPFC_SOLOCKED)) != NULL) {
3011 if (in_pcb_checkstate(tmp_inp, WNT_RELEASE, 1) == WNT_STOPUSING)
bd504ef0
A
3012 return (0);
3013 VERIFY(tmp_inp == inp);
316670eb
A
3014 switch (advcode) {
3015 case FADV_FLOW_CONTROLLED:
3016 inp->inp_flags |= INP_FLOW_CONTROLLED;
3017 break;
3018 case FADV_SUSPENDED:
3019 inp->inp_flags |= INP_FLOW_SUSPENDED;
3020 soevent(inp->inp_socket,
3021 (SO_FILT_HINT_LOCKED | SO_FILT_HINT_SUSPEND));
3022
3023 /* Record the fact that suspend event was sent */
3024 inp->inp_socket->so_flags |= SOF_SUSPENDED;
3025 break;
3026 }
bd504ef0 3027 return (1);
316670eb 3028 }
39236c6e 3029 return (0);
316670eb
A
3030}
3031
3032/*
3033 * Handler for SO_FLUSH socket option.
3034 */
3035int
3036inp_flush(struct inpcb *inp, int optval)
3037{
3038 u_int32_t flowhash = inp->inp_flowhash;
39236c6e 3039 struct ifnet *rtifp, *oifp;
316670eb
A
3040
3041 /* Either all classes or one of the valid ones */
3042 if (optval != SO_TC_ALL && !SO_VALID_TC(optval))
3043 return (EINVAL);
3044
3045 /* We need a flow hash for identification */
3046 if (flowhash == 0)
3047 return (0);
3048
39236c6e
A
3049 /* Grab the interfaces from the route and pcb */
3050 rtifp = ((inp->inp_route.ro_rt != NULL) ?
3051 inp->inp_route.ro_rt->rt_ifp : NULL);
3052 oifp = inp->inp_last_outifp;
3053
3054 if (rtifp != NULL)
3055 if_qflush_sc(rtifp, so_tc2msc(optval), flowhash, NULL, NULL, 0);
3056 if (oifp != NULL && oifp != rtifp)
3057 if_qflush_sc(oifp, so_tc2msc(optval), flowhash, NULL, NULL, 0);
316670eb
A
3058
3059 return (0);
3060}
3061
3062/*
3063 * Clear the INP_INADDR_ANY flag (special case for PPP only)
3064 */
39236c6e
A
3065void
3066inp_clear_INP_INADDR_ANY(struct socket *so)
316670eb
A
3067{
3068 struct inpcb *inp = NULL;
3069
3070 socket_lock(so, 1);
3071 inp = sotoinpcb(so);
3072 if (inp) {
3073 inp->inp_flags &= ~INP_INADDR_ANY;
3074 }
3075 socket_unlock(so, 1);
3076}
3077
39236c6e
A
3078void
3079inp_get_soprocinfo(struct inpcb *inp, struct so_procinfo *soprocinfo)
3080{
3081 struct socket *so = inp->inp_socket;
3082
3083 soprocinfo->spi_pid = so->last_pid;
fe8ab488
A
3084 if (so->last_pid != 0)
3085 uuid_copy(soprocinfo->spi_uuid, so->last_uuid);
39236c6e
A
3086 /*
3087 * When not delegated, the effective pid is the same as the real pid
3088 */
fe8ab488 3089 if (so->so_flags & SOF_DELEGATED) {
3e170ce0 3090 soprocinfo->spi_delegated = 1;
39236c6e 3091 soprocinfo->spi_epid = so->e_pid;
3e170ce0 3092 uuid_copy(soprocinfo->spi_euuid, so->e_uuid);
fe8ab488 3093 } else {
3e170ce0 3094 soprocinfo->spi_delegated = 0;
39236c6e 3095 soprocinfo->spi_epid = so->last_pid;
fe8ab488 3096 }
39236c6e
A
3097}
3098
3099int
3100inp_findinpcb_procinfo(struct inpcbinfo *pcbinfo, uint32_t flowhash,
3101 struct so_procinfo *soprocinfo)
3102{
3103 struct inpcb *inp = NULL;
3104 int found = 0;
3105
3106 bzero(soprocinfo, sizeof (struct so_procinfo));
3107
3108 if (!flowhash)
3109 return (-1);
3110
3111 lck_rw_lock_shared(pcbinfo->ipi_lock);
3112 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
3113 if (inp->inp_state != INPCB_STATE_DEAD &&
3114 inp->inp_socket != NULL &&
3115 inp->inp_flowhash == flowhash) {
3116 found = 1;
3117 inp_get_soprocinfo(inp, soprocinfo);
3118 break;
3119 }
3120 }
3121 lck_rw_done(pcbinfo->ipi_lock);
3122
3123 return (found);
3124}
3125
3126#if CONFIG_PROC_UUID_POLICY
3127static void
3128inp_update_cellular_policy(struct inpcb *inp, boolean_t set)
3129{
3130 struct socket *so = inp->inp_socket;
3131 int before, after;
3132
3133 VERIFY(so != NULL);
3134 VERIFY(inp->inp_state != INPCB_STATE_DEAD);
3135
fe8ab488 3136 before = INP_NO_CELLULAR(inp);
39236c6e
A
3137 if (set) {
3138 inp_set_nocellular(inp);
3139 } else {
3140 inp_clear_nocellular(inp);
3141 }
fe8ab488 3142 after = INP_NO_CELLULAR(inp);
39236c6e
A
3143 if (net_io_policy_log && (before != after)) {
3144 static const char *ok = "OK";
3145 static const char *nok = "NOACCESS";
3146 uuid_string_t euuid_buf;
3147 pid_t epid;
3148
3149 if (so->so_flags & SOF_DELEGATED) {
3150 uuid_unparse(so->e_uuid, euuid_buf);
3151 epid = so->e_pid;
3152 } else {
3153 uuid_unparse(so->last_uuid, euuid_buf);
3154 epid = so->last_pid;
3155 }
3156
3157 /* allow this socket to generate another notification event */
3158 so->so_ifdenied_notifies = 0;
3159
3160 log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d "
3161 "euuid %s%s %s->%s\n", __func__,
3162 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
3163 SOCK_TYPE(so), epid, euuid_buf,
3164 (so->so_flags & SOF_DELEGATED) ?
3165 " [delegated]" : "",
3166 ((before < after) ? ok : nok),
3167 ((before < after) ? nok : ok));
3168 }
3169}
3170
fe8ab488 3171#if NECP
39236c6e 3172static void
fe8ab488 3173inp_update_necp_want_app_policy(struct inpcb *inp, boolean_t set)
39236c6e
A
3174{
3175 struct socket *so = inp->inp_socket;
3176 int before, after;
3177
3178 VERIFY(so != NULL);
3179 VERIFY(inp->inp_state != INPCB_STATE_DEAD);
3180
fe8ab488 3181 before = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
39236c6e 3182 if (set) {
fe8ab488 3183 inp_set_want_app_policy(inp);
39236c6e 3184 } else {
fe8ab488 3185 inp_clear_want_app_policy(inp);
39236c6e 3186 }
fe8ab488 3187 after = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
39236c6e
A
3188 if (net_io_policy_log && (before != after)) {
3189 static const char *wanted = "WANTED";
3190 static const char *unwanted = "UNWANTED";
3191 uuid_string_t euuid_buf;
3192 pid_t epid;
3193
3194 if (so->so_flags & SOF_DELEGATED) {
3195 uuid_unparse(so->e_uuid, euuid_buf);
3196 epid = so->e_pid;
3197 } else {
3198 uuid_unparse(so->last_uuid, euuid_buf);
3199 epid = so->last_pid;
3200 }
3201
3202 log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d "
3203 "euuid %s%s %s->%s\n", __func__,
3204 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
3205 SOCK_TYPE(so), epid, euuid_buf,
3206 (so->so_flags & SOF_DELEGATED) ?
3207 " [delegated]" : "",
3208 ((before < after) ? unwanted : wanted),
3209 ((before < after) ? wanted : unwanted));
3210 }
3211}
fe8ab488 3212#endif /* NECP */
39236c6e
A
3213#endif /* !CONFIG_PROC_UUID_POLICY */
3214
fe8ab488
A
3215#if NECP
3216void
3217inp_update_necp_policy(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int override_bound_interface)
3218{
3219 necp_socket_find_policy_match(inp, override_local_addr, override_remote_addr, override_bound_interface);
3220 if (necp_socket_should_rescope(inp) &&
3221 inp->inp_lport == 0 &&
3222 inp->inp_laddr.s_addr == INADDR_ANY &&
3223 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
3224 // If we should rescope, and the socket is not yet bound
3225 inp_bindif(inp, necp_socket_get_rescope_if_index(inp), NULL);
3226 }
3227}
3228#endif /* NECP */
3229
39236c6e
A
3230int
3231inp_update_policy(struct inpcb *inp)
3232{
3233#if CONFIG_PROC_UUID_POLICY
3234 struct socket *so = inp->inp_socket;
3235 uint32_t pflags = 0;
3236 int32_t ogencnt;
3237 int err = 0;
3238
3239 if (!net_io_policy_uuid ||
3240 so == NULL || inp->inp_state == INPCB_STATE_DEAD)
3241 return (0);
3242
3243 /*
3244 * Kernel-created sockets that aren't delegating other sockets
3245 * are currently exempted from UUID policy checks.
3246 */
3247 if (so->last_pid == 0 && !(so->so_flags & SOF_DELEGATED))
3248 return (0);
3249
3250 ogencnt = so->so_policy_gencnt;
3251 err = proc_uuid_policy_lookup(((so->so_flags & SOF_DELEGATED) ?
3252 so->e_uuid : so->last_uuid), &pflags, &so->so_policy_gencnt);
3253
3254 /*
3255 * Discard cached generation count if the entry is gone (ENOENT),
3256 * so that we go thru the checks below.
3257 */
3258 if (err == ENOENT && ogencnt != 0)
3259 so->so_policy_gencnt = 0;
3260
3261 /*
3262 * If the generation count has changed, inspect the policy flags
3263 * and act accordingly. If a policy flag was previously set and
3264 * the UUID is no longer present in the table (ENOENT), treat it
3265 * as if the flag has been cleared.
3266 */
3267 if ((err == 0 || err == ENOENT) && ogencnt != so->so_policy_gencnt) {
3268 /* update cellular policy for this socket */
3269 if (err == 0 && (pflags & PROC_UUID_NO_CELLULAR)) {
3270 inp_update_cellular_policy(inp, TRUE);
3271 } else if (!(pflags & PROC_UUID_NO_CELLULAR)) {
3272 inp_update_cellular_policy(inp, FALSE);
3273 }
fe8ab488
A
3274#if NECP
3275 /* update necp want app policy for this socket */
3276 if (err == 0 && (pflags & PROC_UUID_NECP_APP_POLICY)) {
3277 inp_update_necp_want_app_policy(inp, TRUE);
3278 } else if (!(pflags & PROC_UUID_NECP_APP_POLICY)) {
3279 inp_update_necp_want_app_policy(inp, FALSE);
39236c6e 3280 }
fe8ab488 3281#endif /* NECP */
39236c6e
A
3282 }
3283
3284 return ((err == ENOENT) ? 0 : err);
3285#else /* !CONFIG_PROC_UUID_POLICY */
3286#pragma unused(inp)
3287 return (0);
3288#endif /* !CONFIG_PROC_UUID_POLICY */
3289}
39037602
A
3290
3291static unsigned int log_restricted;
3292SYSCTL_DECL(_net_inet);
3293SYSCTL_INT(_net_inet, OID_AUTO, log_restricted,
3294 CTLFLAG_RW | CTLFLAG_LOCKED, &log_restricted, 0,
3295 "Log network restrictions");
fe8ab488
A
3296/*
3297 * Called when we need to enforce policy restrictions in the input path.
3298 *
3299 * Returns TRUE if we're not allowed to receive data, otherwise FALSE.
3300 */
39037602
A
3301static boolean_t
3302_inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp)
39236c6e
A
3303{
3304 VERIFY(inp != NULL);
3305
fe8ab488
A
3306 /*
3307 * Inbound restrictions.
3308 */
39236c6e
A
3309 if (!sorestrictrecv)
3310 return (FALSE);
3311
fe8ab488
A
3312 if (ifp == NULL)
3313 return (FALSE);
3314
3315 if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp))
3316 return (TRUE);
3317
3318 if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp))
3319 return (TRUE);
3320
3321 if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp))
3322 return (TRUE);
39037602 3323
fe8ab488 3324 if (!(ifp->if_eflags & IFEF_RESTRICTED_RECV))
39236c6e
A
3325 return (FALSE);
3326
3327 if (inp->inp_flags & INP_RECV_ANYIF)
3328 return (FALSE);
3329
3330 if ((inp->inp_flags & INP_BOUND_IF) && inp->inp_boundifp == ifp)
3331 return (FALSE);
3332
39037602
A
3333 if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp))
3334 return (TRUE);
3335
39236c6e
A
3336 return (TRUE);
3337}
fe8ab488 3338
39037602
A
3339boolean_t
3340inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp)
3341{
3342 boolean_t ret;
3343
3344 ret = _inp_restricted_recv(inp, ifp);
3345 if (ret == TRUE && log_restricted) {
743345f9
A
3346 printf("pid %d (%s) is unable to receive packets on %s\n",
3347 current_proc()->p_pid, proc_best_name(current_proc()),
3348 ifp->if_xname);
39037602
A
3349 }
3350 return (ret);
3351}
3352
fe8ab488
A
3353/*
3354 * Called when we need to enforce policy restrictions in the output path.
3355 *
3356 * Returns TRUE if we're not allowed to send data out, otherwise FALSE.
3357 */
39037602
A
3358static boolean_t
3359_inp_restricted_send(struct inpcb *inp, struct ifnet *ifp)
fe8ab488
A
3360{
3361 VERIFY(inp != NULL);
3362
3363 /*
3364 * Outbound restrictions.
3365 */
3366 if (!sorestrictsend)
3367 return (FALSE);
3368
3369 if (ifp == NULL)
3370 return (FALSE);
3371
3372 if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp))
3373 return (TRUE);
3374
3375 if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp))
3376 return (TRUE);
3377
3378 if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp))
3379 return (TRUE);
3380
39037602
A
3381 if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp))
3382 return (TRUE);
3383
fe8ab488
A
3384 return (FALSE);
3385}
39037602
A
3386
3387boolean_t
3388inp_restricted_send(struct inpcb *inp, struct ifnet *ifp)
3389{
3390 boolean_t ret;
3391
3392 ret = _inp_restricted_send(inp, ifp);
3393 if (ret == TRUE && log_restricted) {
743345f9
A
3394 printf("pid %d (%s) is unable to transmit packets on %s\n",
3395 current_proc()->p_pid, proc_best_name(current_proc()),
3396 ifp->if_xname);
39037602
A
3397 }
3398 return (ret);
3399}
3400
3401inline void
3402inp_count_sndbytes(struct inpcb *inp, u_int32_t th_ack)
3403{
3404 struct ifnet *ifp = inp->inp_last_outifp;
3405 struct socket *so = inp->inp_socket;
3406 if (ifp != NULL && !(so->so_flags & SOF_MP_SUBFLOW) &&
3407 (ifp->if_type == IFT_CELLULAR ||
3408 ifp->if_subfamily == IFNET_SUBFAMILY_WIFI)) {
3409 int32_t unsent;
3410
3411 so->so_snd.sb_flags |= SB_SNDBYTE_CNT;
3412
3413 /*
3414 * There can be data outstanding before the connection
3415 * becomes established -- TFO case
3416 */
3417 if (so->so_snd.sb_cc > 0)
3418 inp_incr_sndbytes_total(so, so->so_snd.sb_cc);
3419
3420 unsent = inp_get_sndbytes_allunsent(so, th_ack);
3421 if (unsent > 0)
3422 inp_incr_sndbytes_unsent(so, unsent);
3423 }
3424}
3425
3426inline void
3427inp_incr_sndbytes_total(struct socket *so, int32_t len)
3428{
3429 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3430 struct ifnet *ifp = inp->inp_last_outifp;
3431
3432 if (ifp != NULL) {
3433 VERIFY(ifp->if_sndbyte_total >= 0);
3434 OSAddAtomic64(len, &ifp->if_sndbyte_total);
3435 }
3436}
3437
3438inline void
3439inp_decr_sndbytes_total(struct socket *so, int32_t len)
3440{
3441 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3442 struct ifnet *ifp = inp->inp_last_outifp;
3443
3444 if (ifp != NULL) {
3445 VERIFY(ifp->if_sndbyte_total >= len);
3446 OSAddAtomic64(-len, &ifp->if_sndbyte_total);
3447 }
3448}
3449
3450inline void
3451inp_incr_sndbytes_unsent(struct socket *so, int32_t len)
3452{
3453 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3454 struct ifnet *ifp = inp->inp_last_outifp;
3455
3456 if (ifp != NULL) {
3457 VERIFY(ifp->if_sndbyte_unsent >= 0);
3458 OSAddAtomic64(len, &ifp->if_sndbyte_unsent);
3459 }
3460}
3461
3462inline void
3463inp_decr_sndbytes_unsent(struct socket *so, int32_t len)
3464{
3465 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3466 struct ifnet *ifp = inp->inp_last_outifp;
3467
3468 if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT))
3469 return;
3470
3471 if (ifp != NULL) {
3472 if (ifp->if_sndbyte_unsent >= len)
3473 OSAddAtomic64(-len, &ifp->if_sndbyte_unsent);
3474 else
3475 ifp->if_sndbyte_unsent = 0;
3476 }
3477}
3478
3479inline void
3480inp_decr_sndbytes_allunsent(struct socket *so, u_int32_t th_ack)
3481{
3482 int32_t len;
3483
3484 if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT))
3485 return;
3486
3487 len = inp_get_sndbytes_allunsent(so, th_ack);
3488 inp_decr_sndbytes_unsent(so, len);
3489}