]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
39037602 | 2 | * Copyright (c) 2000-2016 Apple Inc. All rights reserved. |
5d5c5d0d | 3 | * |
2d21ac55 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
39236c6e | 5 | * |
2d21ac55 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
39236c6e | 14 | * |
2d21ac55 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
39236c6e | 17 | * |
2d21ac55 A |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
8f6c56a5 A |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
2d21ac55 A |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
39236c6e | 25 | * |
2d21ac55 | 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
1c79356b A |
27 | */ |
28 | /* | |
29 | * Copyright (c) 1982, 1986, 1991, 1993, 1995 | |
30 | * The Regents of the University of California. All rights reserved. | |
31 | * | |
32 | * Redistribution and use in source and binary forms, with or without | |
33 | * modification, are permitted provided that the following conditions | |
34 | * are met: | |
35 | * 1. Redistributions of source code must retain the above copyright | |
36 | * notice, this list of conditions and the following disclaimer. | |
37 | * 2. Redistributions in binary form must reproduce the above copyright | |
38 | * notice, this list of conditions and the following disclaimer in the | |
39 | * documentation and/or other materials provided with the distribution. | |
40 | * 3. All advertising materials mentioning features or use of this software | |
41 | * must display the following acknowledgement: | |
42 | * This product includes software developed by the University of | |
43 | * California, Berkeley and its contributors. | |
44 | * 4. Neither the name of the University nor the names of its contributors | |
45 | * may be used to endorse or promote products derived from this software | |
46 | * without specific prior written permission. | |
47 | * | |
48 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
49 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
50 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
51 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
52 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
53 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
54 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
55 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
56 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
57 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
58 | * SUCH DAMAGE. | |
59 | * | |
60 | * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 | |
9bccf70c | 61 | * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $ |
1c79356b A |
62 | */ |
63 | ||
64 | #include <sys/param.h> | |
65 | #include <sys/systm.h> | |
66 | #include <sys/malloc.h> | |
67 | #include <sys/mbuf.h> | |
1c79356b | 68 | #include <sys/domain.h> |
1c79356b A |
69 | #include <sys/protosw.h> |
70 | #include <sys/socket.h> | |
71 | #include <sys/socketvar.h> | |
72 | #include <sys/proc.h> | |
73 | #include <sys/kernel.h> | |
74 | #include <sys/sysctl.h> | |
6d2010ae A |
75 | #include <sys/mcache.h> |
76 | #include <sys/kauth.h> | |
77 | #include <sys/priv.h> | |
39236c6e A |
78 | #include <sys/proc_uuid_policy.h> |
79 | #include <sys/syslog.h> | |
fe8ab488 | 80 | #include <sys/priv.h> |
39037602 | 81 | #include <net/dlil.h> |
39236c6e | 82 | |
91447636 | 83 | #include <libkern/OSAtomic.h> |
316670eb | 84 | #include <kern/locks.h> |
1c79356b A |
85 | |
86 | #include <machine/limits.h> | |
87 | ||
1c79356b | 88 | #include <kern/zalloc.h> |
1c79356b A |
89 | |
90 | #include <net/if.h> | |
1c79356b | 91 | #include <net/if_types.h> |
9bccf70c | 92 | #include <net/route.h> |
316670eb A |
93 | #include <net/flowhash.h> |
94 | #include <net/flowadv.h> | |
fe8ab488 | 95 | #include <net/ntstat.h> |
1c79356b A |
96 | |
97 | #include <netinet/in.h> | |
98 | #include <netinet/in_pcb.h> | |
99 | #include <netinet/in_var.h> | |
100 | #include <netinet/ip_var.h> | |
101 | #if INET6 | |
102 | #include <netinet/ip6.h> | |
103 | #include <netinet6/ip6_var.h> | |
104 | #endif /* INET6 */ | |
105 | ||
1c79356b | 106 | #include <sys/kdebug.h> |
b0d623f7 | 107 | #include <sys/random.h> |
39236c6e | 108 | |
316670eb | 109 | #include <dev/random/randomdev.h> |
39236c6e | 110 | #include <mach/boolean.h> |
1c79356b | 111 | |
39037602 A |
112 | #include <pexpert/pexpert.h> |
113 | ||
fe8ab488 A |
114 | #if NECP |
115 | #include <net/necp.h> | |
9bccf70c | 116 | #endif |
1c79356b | 117 | |
39037602 A |
118 | #include <sys/stat.h> |
119 | #include <sys/ubc.h> | |
120 | #include <sys/vnode.h> | |
121 | ||
39236c6e A |
122 | static lck_grp_t *inpcb_lock_grp; |
123 | static lck_attr_t *inpcb_lock_attr; | |
124 | static lck_grp_attr_t *inpcb_lock_grp_attr; | |
125 | decl_lck_mtx_data(static, inpcb_lock); /* global INPCB lock */ | |
126 | decl_lck_mtx_data(static, inpcb_timeout_lock); | |
127 | ||
128 | static TAILQ_HEAD(, inpcbinfo) inpcb_head = TAILQ_HEAD_INITIALIZER(inpcb_head); | |
129 | ||
130 | static u_int16_t inpcb_timeout_run = 0; /* INPCB timer is scheduled to run */ | |
131 | static boolean_t inpcb_garbage_collecting = FALSE; /* gc timer is scheduled */ | |
132 | static boolean_t inpcb_ticking = FALSE; /* "slow" timer is scheduled */ | |
133 | static boolean_t inpcb_fast_timer_on = FALSE; | |
39037602 | 134 | static boolean_t intcoproc_unrestricted = FALSE; |
fe8ab488 | 135 | |
743345f9 A |
136 | extern char *proc_best_name(proc_t); |
137 | ||
fe8ab488 A |
138 | /* |
139 | * If the total number of gc reqs is above a threshold, schedule | |
140 | * garbage collect timer sooner | |
141 | */ | |
142 | static boolean_t inpcb_toomany_gcreq = FALSE; | |
143 | ||
144 | #define INPCB_GCREQ_THRESHOLD 50000 | |
fe8ab488 | 145 | |
39037602 A |
146 | static thread_call_t inpcb_thread_call, inpcb_fast_thread_call; |
147 | static void inpcb_sched_timeout(void); | |
148 | static void inpcb_sched_lazy_timeout(void); | |
149 | static void _inpcb_sched_timeout(unsigned int); | |
150 | static void inpcb_timeout(void *, void *); | |
151 | const int inpcb_timeout_lazy = 10; /* 10 seconds leeway for lazy timers */ | |
39236c6e A |
152 | extern int tvtohz(struct timeval *); |
153 | ||
154 | #if CONFIG_PROC_UUID_POLICY | |
155 | static void inp_update_cellular_policy(struct inpcb *, boolean_t); | |
fe8ab488 A |
156 | #if NECP |
157 | static void inp_update_necp_want_app_policy(struct inpcb *, boolean_t); | |
158 | #endif /* NECP */ | |
39236c6e A |
159 | #endif /* !CONFIG_PROC_UUID_POLICY */ |
160 | ||
39236c6e A |
161 | #define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8)) |
162 | #define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1)) | |
1c79356b | 163 | |
1c79356b A |
164 | /* |
165 | * These configure the range of local port addresses assigned to | |
166 | * "unspecified" outgoing connections/packets/whatever. | |
167 | */ | |
9bccf70c A |
168 | int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */ |
169 | int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */ | |
39236c6e A |
170 | int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ |
171 | int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */ | |
9bccf70c A |
172 | int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ |
173 | int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */ | |
1c79356b | 174 | |
39236c6e | 175 | #define RANGECHK(var, min, max) \ |
1c79356b A |
176 | if ((var) < (min)) { (var) = (min); } \ |
177 | else if ((var) > (max)) { (var) = (max); } | |
178 | ||
1c79356b A |
179 | static int |
180 | sysctl_net_ipport_check SYSCTL_HANDLER_ARGS | |
181 | { | |
2d21ac55 | 182 | #pragma unused(arg1, arg2) |
39236c6e A |
183 | int error; |
184 | ||
185 | error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); | |
1c79356b A |
186 | if (!error) { |
187 | RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); | |
188 | RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1); | |
189 | RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX); | |
190 | RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX); | |
191 | RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX); | |
192 | RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX); | |
193 | } | |
39236c6e | 194 | return (error); |
1c79356b A |
195 | } |
196 | ||
197 | #undef RANGECHK | |
198 | ||
39236c6e A |
199 | SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, |
200 | CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IP Ports"); | |
201 | ||
202 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, | |
203 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, | |
204 | &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", ""); | |
205 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, | |
206 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, | |
207 | &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", ""); | |
208 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, | |
209 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, | |
210 | &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", ""); | |
211 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, | |
212 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, | |
213 | &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", ""); | |
214 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, | |
215 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, | |
216 | &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", ""); | |
217 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, | |
218 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, | |
219 | &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", ""); | |
1c79356b | 220 | |
39037602 A |
221 | static uint32_t apn_fallbk_debug = 0; |
222 | #define apn_fallbk_log(x) do { if (apn_fallbk_debug >= 1) log x; } while (0) | |
223 | ||
224 | static boolean_t apn_fallbk_enabled = FALSE; | |
225 | ||
b0d623f7 A |
226 | extern int udp_use_randomport; |
227 | extern int tcp_use_randomport; | |
228 | ||
316670eb A |
229 | /* Structs used for flowhash computation */ |
230 | struct inp_flowhash_key_addr { | |
231 | union { | |
232 | struct in_addr v4; | |
233 | struct in6_addr v6; | |
234 | u_int8_t addr8[16]; | |
235 | u_int16_t addr16[8]; | |
236 | u_int32_t addr32[4]; | |
237 | } infha; | |
238 | }; | |
239 | ||
240 | struct inp_flowhash_key { | |
39236c6e | 241 | struct inp_flowhash_key_addr infh_laddr; |
316670eb A |
242 | struct inp_flowhash_key_addr infh_faddr; |
243 | u_int32_t infh_lport; | |
244 | u_int32_t infh_fport; | |
245 | u_int32_t infh_af; | |
246 | u_int32_t infh_proto; | |
247 | u_int32_t infh_rand1; | |
248 | u_int32_t infh_rand2; | |
249 | }; | |
250 | ||
39236c6e A |
251 | static u_int32_t inp_hash_seed = 0; |
252 | ||
253 | static int infc_cmp(const struct inpcb *, const struct inpcb *); | |
254 | ||
255 | /* Flags used by inp_fc_getinp */ | |
256 | #define INPFC_SOLOCKED 0x1 | |
257 | #define INPFC_REMOVE 0x2 | |
258 | static struct inpcb *inp_fc_getinp(u_int32_t, u_int32_t); | |
259 | ||
260 | static void inp_fc_feedback(struct inpcb *); | |
261 | extern void tcp_remove_from_time_wait(struct inpcb *inp); | |
316670eb | 262 | |
39236c6e | 263 | decl_lck_mtx_data(static, inp_fc_lck); |
316670eb | 264 | |
bd504ef0 A |
265 | RB_HEAD(inp_fc_tree, inpcb) inp_fc_tree; |
266 | RB_PROTOTYPE(inp_fc_tree, inpcb, infc_link, infc_cmp); | |
267 | RB_GENERATE(inp_fc_tree, inpcb, infc_link, infc_cmp); | |
316670eb | 268 | |
bd504ef0 A |
269 | /* |
270 | * Use this inp as a key to find an inp in the flowhash tree. | |
271 | * Accesses to it are protected by inp_fc_lck. | |
272 | */ | |
273 | struct inpcb key_inp; | |
316670eb | 274 | |
1c79356b A |
275 | /* |
276 | * in_pcb.c: manage the Protocol Control Blocks. | |
1c79356b A |
277 | */ |
278 | ||
316670eb | 279 | void |
39236c6e | 280 | in_pcbinit(void) |
316670eb | 281 | { |
39236c6e | 282 | static int inpcb_initialized = 0; |
316670eb | 283 | |
39236c6e A |
284 | VERIFY(!inpcb_initialized); |
285 | inpcb_initialized = 1; | |
316670eb | 286 | |
39236c6e A |
287 | inpcb_lock_grp_attr = lck_grp_attr_alloc_init(); |
288 | inpcb_lock_grp = lck_grp_alloc_init("inpcb", inpcb_lock_grp_attr); | |
289 | inpcb_lock_attr = lck_attr_alloc_init(); | |
290 | lck_mtx_init(&inpcb_lock, inpcb_lock_grp, inpcb_lock_attr); | |
291 | lck_mtx_init(&inpcb_timeout_lock, inpcb_lock_grp, inpcb_lock_attr); | |
39037602 A |
292 | inpcb_thread_call = thread_call_allocate_with_priority(inpcb_timeout, |
293 | NULL, THREAD_CALL_PRIORITY_KERNEL); | |
294 | inpcb_fast_thread_call = thread_call_allocate_with_priority( | |
295 | inpcb_timeout, NULL, THREAD_CALL_PRIORITY_KERNEL); | |
296 | if (inpcb_thread_call == NULL || inpcb_fast_thread_call == NULL) | |
297 | panic("unable to alloc the inpcb thread call"); | |
39236c6e A |
298 | |
299 | /* | |
300 | * Initialize data structures required to deliver | |
301 | * flow advisories. | |
302 | */ | |
303 | lck_mtx_init(&inp_fc_lck, inpcb_lock_grp, inpcb_lock_attr); | |
bd504ef0 | 304 | lck_mtx_lock(&inp_fc_lck); |
316670eb | 305 | RB_INIT(&inp_fc_tree); |
bd504ef0 A |
306 | bzero(&key_inp, sizeof(key_inp)); |
307 | lck_mtx_unlock(&inp_fc_lck); | |
39037602 A |
308 | |
309 | PE_parse_boot_argn("intcoproc_unrestricted", &intcoproc_unrestricted, | |
310 | sizeof (intcoproc_unrestricted)); | |
316670eb A |
311 | } |
312 | ||
39236c6e A |
313 | #define INPCB_HAVE_TIMER_REQ(req) (((req).intimer_lazy > 0) || \ |
314 | ((req).intimer_fast > 0) || ((req).intimer_nodelay > 0)) | |
315 | static void | |
39037602 | 316 | inpcb_timeout(void *arg0, void *arg1) |
39236c6e | 317 | { |
39037602 | 318 | #pragma unused(arg0) |
39236c6e A |
319 | struct inpcbinfo *ipi; |
320 | boolean_t t, gc; | |
321 | struct intimercount gccnt, tmcnt; | |
fe8ab488 A |
322 | boolean_t toomany_gc = FALSE; |
323 | ||
39037602 A |
324 | if (arg1 != NULL) { |
325 | VERIFY(arg1 == &inpcb_toomany_gcreq); | |
326 | toomany_gc = *(boolean_t *)arg1; | |
fe8ab488 | 327 | } |
39236c6e A |
328 | |
329 | /* | |
330 | * Update coarse-grained networking timestamp (in sec.); the idea | |
331 | * is to piggy-back on the timeout callout to update the counter | |
332 | * returnable via net_uptime(). | |
333 | */ | |
334 | net_update_uptime(); | |
335 | ||
fe8ab488 A |
336 | bzero(&gccnt, sizeof(gccnt)); |
337 | bzero(&tmcnt, sizeof(tmcnt)); | |
338 | ||
39236c6e A |
339 | lck_mtx_lock_spin(&inpcb_timeout_lock); |
340 | gc = inpcb_garbage_collecting; | |
341 | inpcb_garbage_collecting = FALSE; | |
39236c6e A |
342 | |
343 | t = inpcb_ticking; | |
344 | inpcb_ticking = FALSE; | |
345 | ||
346 | if (gc || t) { | |
347 | lck_mtx_unlock(&inpcb_timeout_lock); | |
348 | ||
349 | lck_mtx_lock(&inpcb_lock); | |
350 | TAILQ_FOREACH(ipi, &inpcb_head, ipi_entry) { | |
351 | if (INPCB_HAVE_TIMER_REQ(ipi->ipi_gc_req)) { | |
352 | bzero(&ipi->ipi_gc_req, | |
353 | sizeof(ipi->ipi_gc_req)); | |
354 | if (gc && ipi->ipi_gc != NULL) { | |
355 | ipi->ipi_gc(ipi); | |
356 | gccnt.intimer_lazy += | |
357 | ipi->ipi_gc_req.intimer_lazy; | |
358 | gccnt.intimer_fast += | |
359 | ipi->ipi_gc_req.intimer_fast; | |
360 | gccnt.intimer_nodelay += | |
361 | ipi->ipi_gc_req.intimer_nodelay; | |
362 | } | |
363 | } | |
364 | if (INPCB_HAVE_TIMER_REQ(ipi->ipi_timer_req)) { | |
365 | bzero(&ipi->ipi_timer_req, | |
366 | sizeof(ipi->ipi_timer_req)); | |
367 | if (t && ipi->ipi_timer != NULL) { | |
368 | ipi->ipi_timer(ipi); | |
369 | tmcnt.intimer_lazy += | |
370 | ipi->ipi_timer_req.intimer_lazy; | |
371 | tmcnt.intimer_lazy += | |
372 | ipi->ipi_timer_req.intimer_fast; | |
373 | tmcnt.intimer_nodelay += | |
374 | ipi->ipi_timer_req.intimer_nodelay; | |
375 | } | |
376 | } | |
377 | } | |
378 | lck_mtx_unlock(&inpcb_lock); | |
379 | lck_mtx_lock_spin(&inpcb_timeout_lock); | |
380 | } | |
381 | ||
382 | /* lock was dropped above, so check first before overriding */ | |
383 | if (!inpcb_garbage_collecting) | |
384 | inpcb_garbage_collecting = INPCB_HAVE_TIMER_REQ(gccnt); | |
385 | if (!inpcb_ticking) | |
386 | inpcb_ticking = INPCB_HAVE_TIMER_REQ(tmcnt); | |
387 | ||
388 | /* re-arm the timer if there's work to do */ | |
fe8ab488 A |
389 | if (toomany_gc) { |
390 | inpcb_toomany_gcreq = FALSE; | |
391 | } else { | |
392 | inpcb_timeout_run--; | |
393 | VERIFY(inpcb_timeout_run >= 0 && inpcb_timeout_run < 2); | |
394 | } | |
39236c6e | 395 | |
39236c6e | 396 | if (gccnt.intimer_nodelay > 0 || tmcnt.intimer_nodelay > 0) |
39037602 | 397 | inpcb_sched_timeout(); |
39236c6e A |
398 | else if ((gccnt.intimer_fast + tmcnt.intimer_fast) <= 5) |
399 | /* be lazy when idle with little activity */ | |
39037602 | 400 | inpcb_sched_lazy_timeout(); |
39236c6e | 401 | else |
39037602 | 402 | inpcb_sched_timeout(); |
39236c6e A |
403 | |
404 | lck_mtx_unlock(&inpcb_timeout_lock); | |
405 | } | |
406 | ||
407 | static void | |
39037602 | 408 | inpcb_sched_timeout(void) |
39236c6e | 409 | { |
39037602 A |
410 | _inpcb_sched_timeout(0); |
411 | } | |
412 | ||
413 | static void | |
414 | inpcb_sched_lazy_timeout(void) | |
415 | { | |
416 | _inpcb_sched_timeout(inpcb_timeout_lazy); | |
417 | } | |
39236c6e | 418 | |
39037602 A |
419 | static void |
420 | _inpcb_sched_timeout(unsigned int offset) | |
421 | { | |
422 | uint64_t deadline, leeway; | |
423 | ||
424 | clock_interval_to_deadline(1, NSEC_PER_SEC, &deadline); | |
425 | lck_mtx_assert(&inpcb_timeout_lock, LCK_MTX_ASSERT_OWNED); | |
39236c6e | 426 | if (inpcb_timeout_run == 0 && |
39037602 | 427 | (inpcb_garbage_collecting || inpcb_ticking)) { |
39236c6e A |
428 | lck_mtx_convert_spin(&inpcb_timeout_lock); |
429 | inpcb_timeout_run++; | |
39037602 | 430 | if (offset == 0) { |
39236c6e | 431 | inpcb_fast_timer_on = TRUE; |
39037602 A |
432 | thread_call_enter_delayed(inpcb_thread_call, |
433 | deadline); | |
39236c6e A |
434 | } else { |
435 | inpcb_fast_timer_on = FALSE; | |
39037602 A |
436 | clock_interval_to_absolutetime_interval(offset, |
437 | NSEC_PER_SEC, &leeway); | |
438 | thread_call_enter_delayed_with_leeway( | |
439 | inpcb_thread_call, NULL, deadline, leeway, | |
440 | THREAD_CALL_DELAY_LEEWAY); | |
39236c6e A |
441 | } |
442 | } else if (inpcb_timeout_run == 1 && | |
39037602 | 443 | offset == 0 && !inpcb_fast_timer_on) { |
39236c6e A |
444 | /* |
445 | * Since the request was for a fast timer but the | |
446 | * scheduled timer is a lazy timer, try to schedule | |
39037602 | 447 | * another instance of fast timer also. |
39236c6e A |
448 | */ |
449 | lck_mtx_convert_spin(&inpcb_timeout_lock); | |
450 | inpcb_timeout_run++; | |
451 | inpcb_fast_timer_on = TRUE; | |
39037602 | 452 | thread_call_enter_delayed(inpcb_fast_thread_call, deadline); |
39236c6e A |
453 | } |
454 | } | |
455 | ||
456 | void | |
457 | inpcb_gc_sched(struct inpcbinfo *ipi, u_int32_t type) | |
458 | { | |
fe8ab488 | 459 | u_int32_t gccnt; |
39037602 A |
460 | uint64_t deadline; |
461 | ||
39236c6e A |
462 | lck_mtx_lock_spin(&inpcb_timeout_lock); |
463 | inpcb_garbage_collecting = TRUE; | |
fe8ab488 A |
464 | gccnt = ipi->ipi_gc_req.intimer_nodelay + |
465 | ipi->ipi_gc_req.intimer_fast; | |
466 | ||
467 | if (gccnt > INPCB_GCREQ_THRESHOLD && !inpcb_toomany_gcreq) { | |
468 | inpcb_toomany_gcreq = TRUE; | |
469 | ||
470 | /* | |
471 | * There are toomany pcbs waiting to be garbage collected, | |
472 | * schedule a much faster timeout in addition to | |
473 | * the caller's request | |
474 | */ | |
475 | lck_mtx_convert_spin(&inpcb_timeout_lock); | |
39037602 A |
476 | clock_interval_to_deadline(100, NSEC_PER_MSEC, &deadline); |
477 | thread_call_enter1_delayed(inpcb_thread_call, | |
478 | &inpcb_toomany_gcreq, deadline); | |
fe8ab488 A |
479 | } |
480 | ||
39236c6e A |
481 | switch (type) { |
482 | case INPCB_TIMER_NODELAY: | |
483 | atomic_add_32(&ipi->ipi_gc_req.intimer_nodelay, 1); | |
39037602 | 484 | inpcb_sched_timeout(); |
39236c6e A |
485 | break; |
486 | case INPCB_TIMER_FAST: | |
487 | atomic_add_32(&ipi->ipi_gc_req.intimer_fast, 1); | |
39037602 | 488 | inpcb_sched_timeout(); |
39236c6e A |
489 | break; |
490 | default: | |
491 | atomic_add_32(&ipi->ipi_gc_req.intimer_lazy, 1); | |
39037602 | 492 | inpcb_sched_lazy_timeout(); |
39236c6e A |
493 | break; |
494 | } | |
495 | lck_mtx_unlock(&inpcb_timeout_lock); | |
496 | } | |
497 | ||
498 | void | |
499 | inpcb_timer_sched(struct inpcbinfo *ipi, u_int32_t type) | |
500 | { | |
39037602 | 501 | |
39236c6e A |
502 | lck_mtx_lock_spin(&inpcb_timeout_lock); |
503 | inpcb_ticking = TRUE; | |
504 | switch (type) { | |
505 | case INPCB_TIMER_NODELAY: | |
506 | atomic_add_32(&ipi->ipi_timer_req.intimer_nodelay, 1); | |
39037602 | 507 | inpcb_sched_timeout(); |
39236c6e A |
508 | break; |
509 | case INPCB_TIMER_FAST: | |
510 | atomic_add_32(&ipi->ipi_timer_req.intimer_fast, 1); | |
39037602 | 511 | inpcb_sched_timeout(); |
39236c6e A |
512 | break; |
513 | default: | |
514 | atomic_add_32(&ipi->ipi_timer_req.intimer_lazy, 1); | |
39037602 | 515 | inpcb_sched_lazy_timeout(); |
39236c6e A |
516 | break; |
517 | } | |
518 | lck_mtx_unlock(&inpcb_timeout_lock); | |
519 | } | |
520 | ||
521 | void | |
522 | in_pcbinfo_attach(struct inpcbinfo *ipi) | |
523 | { | |
524 | struct inpcbinfo *ipi0; | |
525 | ||
526 | lck_mtx_lock(&inpcb_lock); | |
527 | TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) { | |
528 | if (ipi0 == ipi) { | |
529 | panic("%s: ipi %p already in the list\n", | |
530 | __func__, ipi); | |
531 | /* NOTREACHED */ | |
532 | } | |
533 | } | |
534 | TAILQ_INSERT_TAIL(&inpcb_head, ipi, ipi_entry); | |
535 | lck_mtx_unlock(&inpcb_lock); | |
536 | } | |
537 | ||
538 | int | |
539 | in_pcbinfo_detach(struct inpcbinfo *ipi) | |
540 | { | |
541 | struct inpcbinfo *ipi0; | |
542 | int error = 0; | |
543 | ||
544 | lck_mtx_lock(&inpcb_lock); | |
545 | TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) { | |
546 | if (ipi0 == ipi) | |
547 | break; | |
548 | } | |
549 | if (ipi0 != NULL) | |
550 | TAILQ_REMOVE(&inpcb_head, ipi0, ipi_entry); | |
551 | else | |
552 | error = ENXIO; | |
553 | lck_mtx_unlock(&inpcb_lock); | |
554 | ||
555 | return (error); | |
556 | } | |
557 | ||
1c79356b A |
558 | /* |
559 | * Allocate a PCB and associate it with the socket. | |
2d21ac55 A |
560 | * |
561 | * Returns: 0 Success | |
562 | * ENOBUFS | |
563 | * ENOMEM | |
1c79356b A |
564 | */ |
565 | int | |
39236c6e | 566 | in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p) |
1c79356b | 567 | { |
39236c6e | 568 | #pragma unused(p) |
2d21ac55 | 569 | struct inpcb *inp; |
39236c6e | 570 | caddr_t temp; |
2d21ac55 A |
571 | #if CONFIG_MACF_NET |
572 | int mac_error; | |
39236c6e | 573 | #endif /* CONFIG_MACF_NET */ |
1c79356b | 574 | |
3e170ce0 | 575 | if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) { |
39236c6e A |
576 | inp = (struct inpcb *)zalloc(pcbinfo->ipi_zone); |
577 | if (inp == NULL) | |
578 | return (ENOBUFS); | |
579 | bzero((caddr_t)inp, sizeof (*inp)); | |
580 | } else { | |
581 | inp = (struct inpcb *)(void *)so->so_saved_pcb; | |
582 | temp = inp->inp_saved_ppcb; | |
583 | bzero((caddr_t)inp, sizeof (*inp)); | |
584 | inp->inp_saved_ppcb = temp; | |
1c79356b A |
585 | } |
586 | ||
587 | inp->inp_gencnt = ++pcbinfo->ipi_gencnt; | |
588 | inp->inp_pcbinfo = pcbinfo; | |
589 | inp->inp_socket = so; | |
2d21ac55 A |
590 | #if CONFIG_MACF_NET |
591 | mac_error = mac_inpcb_label_init(inp, M_WAITOK); | |
592 | if (mac_error != 0) { | |
3e170ce0 | 593 | if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) |
2d21ac55 A |
594 | zfree(pcbinfo->ipi_zone, inp); |
595 | return (mac_error); | |
596 | } | |
597 | mac_inpcb_label_associate(so, inp); | |
39236c6e A |
598 | #endif /* CONFIG_MACF_NET */ |
599 | /* make sure inp_stat is always 64-bit aligned */ | |
600 | inp->inp_stat = (struct inp_stat *)P2ROUNDUP(inp->inp_stat_store, | |
601 | sizeof (u_int64_t)); | |
602 | if (((uintptr_t)inp->inp_stat - (uintptr_t)inp->inp_stat_store) + | |
603 | sizeof (*inp->inp_stat) > sizeof (inp->inp_stat_store)) { | |
604 | panic("%s: insufficient space to align inp_stat", __func__); | |
605 | /* NOTREACHED */ | |
606 | } | |
607 | ||
608 | /* make sure inp_cstat is always 64-bit aligned */ | |
609 | inp->inp_cstat = (struct inp_stat *)P2ROUNDUP(inp->inp_cstat_store, | |
610 | sizeof (u_int64_t)); | |
611 | if (((uintptr_t)inp->inp_cstat - (uintptr_t)inp->inp_cstat_store) + | |
612 | sizeof (*inp->inp_cstat) > sizeof (inp->inp_cstat_store)) { | |
613 | panic("%s: insufficient space to align inp_cstat", __func__); | |
614 | /* NOTREACHED */ | |
615 | } | |
616 | ||
617 | /* make sure inp_wstat is always 64-bit aligned */ | |
618 | inp->inp_wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_wstat_store, | |
619 | sizeof (u_int64_t)); | |
620 | if (((uintptr_t)inp->inp_wstat - (uintptr_t)inp->inp_wstat_store) + | |
621 | sizeof (*inp->inp_wstat) > sizeof (inp->inp_wstat_store)) { | |
622 | panic("%s: insufficient space to align inp_wstat", __func__); | |
623 | /* NOTREACHED */ | |
6d2010ae A |
624 | } |
625 | ||
fe8ab488 A |
626 | /* make sure inp_Wstat is always 64-bit aligned */ |
627 | inp->inp_Wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_Wstat_store, | |
628 | sizeof (u_int64_t)); | |
629 | if (((uintptr_t)inp->inp_Wstat - (uintptr_t)inp->inp_Wstat_store) + | |
630 | sizeof (*inp->inp_Wstat) > sizeof (inp->inp_Wstat_store)) { | |
631 | panic("%s: insufficient space to align inp_Wstat", __func__); | |
632 | /* NOTREACHED */ | |
633 | } | |
39037602 | 634 | |
91447636 A |
635 | so->so_pcb = (caddr_t)inp; |
636 | ||
637 | if (so->so_proto->pr_flags & PR_PCBLOCK) { | |
39236c6e A |
638 | lck_mtx_init(&inp->inpcb_mtx, pcbinfo->ipi_lock_grp, |
639 | pcbinfo->ipi_lock_attr); | |
91447636 A |
640 | } |
641 | ||
2d21ac55 | 642 | #if INET6 |
39236c6e | 643 | if (SOCK_DOM(so) == PF_INET6 && !ip6_mapped_addr_on) |
9bccf70c | 644 | inp->inp_flags |= IN6P_IPV6_V6ONLY; |
39236c6e | 645 | |
9bccf70c A |
646 | if (ip6_auto_flowlabel) |
647 | inp->inp_flags |= IN6P_AUTOFLOWLABEL; | |
39236c6e | 648 | #endif /* INET6 */ |
39037602 A |
649 | if (intcoproc_unrestricted) |
650 | inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED; | |
39236c6e A |
651 | |
652 | (void) inp_update_policy(inp); | |
653 | ||
654 | lck_rw_lock_exclusive(pcbinfo->ipi_lock); | |
91447636 | 655 | inp->inp_gencnt = ++pcbinfo->ipi_gencnt; |
39236c6e | 656 | LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list); |
91447636 | 657 | pcbinfo->ipi_count++; |
39236c6e | 658 | lck_rw_done(pcbinfo->ipi_lock); |
1c79356b A |
659 | return (0); |
660 | } | |
661 | ||
2d21ac55 | 662 | /* |
39236c6e A |
663 | * in_pcblookup_local_and_cleanup does everything |
664 | * in_pcblookup_local does but it checks for a socket | |
665 | * that's going away. Since we know that the lock is | |
666 | * held read+write when this funciton is called, we | |
667 | * can safely dispose of this socket like the slow | |
668 | * timer would usually do and return NULL. This is | |
669 | * great for bind. | |
670 | */ | |
671 | struct inpcb * | |
672 | in_pcblookup_local_and_cleanup(struct inpcbinfo *pcbinfo, struct in_addr laddr, | |
673 | u_int lport_arg, int wild_okay) | |
2d21ac55 A |
674 | { |
675 | struct inpcb *inp; | |
39236c6e | 676 | |
2d21ac55 A |
677 | /* Perform normal lookup */ |
678 | inp = in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay); | |
39236c6e | 679 | |
2d21ac55 | 680 | /* Check if we found a match but it's waiting to be disposed */ |
39236c6e | 681 | if (inp != NULL && inp->inp_wantcnt == WNT_STOPUSING) { |
2d21ac55 | 682 | struct socket *so = inp->inp_socket; |
39236c6e | 683 | |
6d2010ae | 684 | lck_mtx_lock(&inp->inpcb_mtx); |
39236c6e | 685 | |
2d21ac55 | 686 | if (so->so_usecount == 0) { |
b0d623f7 A |
687 | if (inp->inp_state != INPCB_STATE_DEAD) |
688 | in_pcbdetach(inp); | |
39236c6e | 689 | in_pcbdispose(inp); /* will unlock & destroy */ |
2d21ac55 | 690 | inp = NULL; |
39236c6e | 691 | } else { |
6d2010ae | 692 | lck_mtx_unlock(&inp->inpcb_mtx); |
2d21ac55 A |
693 | } |
694 | } | |
39236c6e A |
695 | |
696 | return (inp); | |
2d21ac55 A |
697 | } |
698 | ||
c910b4d9 | 699 | static void |
2d21ac55 A |
700 | in_pcb_conflict_post_msg(u_int16_t port) |
701 | { | |
39236c6e A |
702 | /* |
703 | * Radar 5523020 send a kernel event notification if a | |
704 | * non-participating socket tries to bind the port a socket | |
705 | * who has set SOF_NOTIFYCONFLICT owns. | |
2d21ac55 | 706 | */ |
39236c6e | 707 | struct kev_msg ev_msg; |
2d21ac55 A |
708 | struct kev_in_portinuse in_portinuse; |
709 | ||
39236c6e A |
710 | bzero(&in_portinuse, sizeof (struct kev_in_portinuse)); |
711 | bzero(&ev_msg, sizeof (struct kev_msg)); | |
2d21ac55 A |
712 | in_portinuse.port = ntohs(port); /* port in host order */ |
713 | in_portinuse.req_pid = proc_selfpid(); | |
714 | ev_msg.vendor_code = KEV_VENDOR_APPLE; | |
715 | ev_msg.kev_class = KEV_NETWORK_CLASS; | |
716 | ev_msg.kev_subclass = KEV_INET_SUBCLASS; | |
717 | ev_msg.event_code = KEV_INET_PORTINUSE; | |
718 | ev_msg.dv[0].data_ptr = &in_portinuse; | |
39236c6e | 719 | ev_msg.dv[0].data_length = sizeof (struct kev_in_portinuse); |
2d21ac55 | 720 | ev_msg.dv[1].data_length = 0; |
39037602 | 721 | dlil_post_complete_msg(NULL, &ev_msg); |
2d21ac55 | 722 | } |
39236c6e | 723 | |
2d21ac55 | 724 | /* |
39236c6e A |
725 | * Bind an INPCB to an address and/or port. This routine should not alter |
726 | * the caller-supplied local address "nam". | |
727 | * | |
2d21ac55 A |
728 | * Returns: 0 Success |
729 | * EADDRNOTAVAIL Address not available. | |
730 | * EINVAL Invalid argument | |
731 | * EAFNOSUPPORT Address family not supported [notdef] | |
732 | * EACCES Permission denied | |
733 | * EADDRINUSE Address in use | |
734 | * EAGAIN Resource unavailable, try again | |
6d2010ae | 735 | * priv_check_cred:EPERM Operation not permitted |
2d21ac55 | 736 | */ |
1c79356b | 737 | int |
2d21ac55 | 738 | in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) |
1c79356b | 739 | { |
2d21ac55 | 740 | struct socket *so = inp->inp_socket; |
9bccf70c | 741 | unsigned short *lastport; |
1c79356b | 742 | struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; |
b0d623f7 | 743 | u_short lport = 0, rand_port = 0; |
1c79356b | 744 | int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); |
b0d623f7 | 745 | int error, randomport, conflict = 0; |
fe8ab488 | 746 | boolean_t anonport = FALSE; |
6d2010ae | 747 | kauth_cred_t cred; |
fe8ab488 A |
748 | struct in_addr laddr; |
749 | struct ifnet *outif = NULL; | |
1c79356b A |
750 | |
751 | if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */ | |
752 | return (EADDRNOTAVAIL); | |
39236c6e | 753 | if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) |
1c79356b | 754 | return (EINVAL); |
39236c6e | 755 | if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT))) |
1c79356b | 756 | wild = 1; |
fe8ab488 A |
757 | |
758 | bzero(&laddr, sizeof(laddr)); | |
759 | ||
4bd07ac2 A |
760 | socket_unlock(so, 0); /* keep reference on socket */ |
761 | lck_rw_lock_exclusive(pcbinfo->ipi_lock); | |
762 | ||
39236c6e | 763 | if (nam != NULL) { |
6d2010ae | 764 | |
39236c6e A |
765 | if (nam->sa_len != sizeof (struct sockaddr_in)) { |
766 | lck_rw_done(pcbinfo->ipi_lock); | |
91447636 | 767 | socket_lock(so, 0); |
1c79356b | 768 | return (EINVAL); |
91447636 | 769 | } |
39236c6e | 770 | #if 0 |
1c79356b A |
771 | /* |
772 | * We should check the family, but old programs | |
773 | * incorrectly fail to initialize it. | |
774 | */ | |
39236c6e A |
775 | if (nam->sa_family != AF_INET) { |
776 | lck_rw_done(pcbinfo->ipi_lock); | |
91447636 | 777 | socket_lock(so, 0); |
1c79356b | 778 | return (EAFNOSUPPORT); |
91447636 | 779 | } |
39236c6e A |
780 | #endif /* 0 */ |
781 | lport = SIN(nam)->sin_port; | |
782 | ||
783 | if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr))) { | |
1c79356b A |
784 | /* |
785 | * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; | |
786 | * allow complete duplication of binding if | |
787 | * SO_REUSEPORT is set, or if SO_REUSEADDR is set | |
788 | * and a multicast address is bound on both | |
789 | * new and duplicated sockets. | |
790 | */ | |
791 | if (so->so_options & SO_REUSEADDR) | |
792 | reuseport = SO_REUSEADDR|SO_REUSEPORT; | |
39236c6e A |
793 | } else if (SIN(nam)->sin_addr.s_addr != INADDR_ANY) { |
794 | struct sockaddr_in sin; | |
91447636 | 795 | struct ifaddr *ifa; |
39236c6e A |
796 | |
797 | /* Sanitized for interface address searches */ | |
798 | bzero(&sin, sizeof (sin)); | |
799 | sin.sin_family = AF_INET; | |
800 | sin.sin_len = sizeof (struct sockaddr_in); | |
801 | sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr; | |
802 | ||
803 | ifa = ifa_ifwithaddr(SA(&sin)); | |
804 | if (ifa == NULL) { | |
805 | lck_rw_done(pcbinfo->ipi_lock); | |
91447636 | 806 | socket_lock(so, 0); |
1c79356b | 807 | return (EADDRNOTAVAIL); |
39236c6e A |
808 | } else { |
809 | /* | |
810 | * Opportunistically determine the outbound | |
811 | * interface that may be used; this may not | |
812 | * hold true if we end up using a route | |
813 | * going over a different interface, e.g. | |
814 | * when sending to a local address. This | |
815 | * will get updated again after sending. | |
816 | */ | |
6d2010ae | 817 | IFA_LOCK(ifa); |
316670eb | 818 | outif = ifa->ifa_ifp; |
6d2010ae A |
819 | IFA_UNLOCK(ifa); |
820 | IFA_REMREF(ifa); | |
91447636 | 821 | } |
1c79356b | 822 | } |
39236c6e | 823 | if (lport != 0) { |
1c79356b | 824 | struct inpcb *t; |
39236c6e | 825 | uid_t u; |
1c79356b | 826 | |
6d2010ae A |
827 | if (ntohs(lport) < IPPORT_RESERVED) { |
828 | cred = kauth_cred_proc_ref(p); | |
39236c6e A |
829 | error = priv_check_cred(cred, |
830 | PRIV_NETINET_RESERVEDPORT, 0); | |
6d2010ae A |
831 | kauth_cred_unref(&cred); |
832 | if (error != 0) { | |
39236c6e | 833 | lck_rw_done(pcbinfo->ipi_lock); |
6d2010ae A |
834 | socket_lock(so, 0); |
835 | return (EACCES); | |
836 | } | |
91447636 | 837 | } |
39236c6e A |
838 | if (!IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) && |
839 | (u = kauth_cred_getuid(so->so_cred)) != 0 && | |
840 | (t = in_pcblookup_local_and_cleanup( | |
841 | inp->inp_pcbinfo, SIN(nam)->sin_addr, lport, | |
842 | INPLOOKUP_WILDCARD)) != NULL && | |
843 | (SIN(nam)->sin_addr.s_addr != INADDR_ANY || | |
844 | t->inp_laddr.s_addr != INADDR_ANY || | |
845 | !(t->inp_socket->so_options & SO_REUSEPORT)) && | |
846 | (u != kauth_cred_getuid(t->inp_socket->so_cred)) && | |
847 | !(t->inp_socket->so_flags & SOF_REUSESHAREUID) && | |
848 | (SIN(nam)->sin_addr.s_addr != INADDR_ANY || | |
849 | t->inp_laddr.s_addr != INADDR_ANY)) { | |
850 | if ((t->inp_socket->so_flags & | |
851 | SOF_NOTIFYCONFLICT) && | |
852 | !(so->so_flags & SOF_NOTIFYCONFLICT)) | |
853 | conflict = 1; | |
854 | ||
855 | lck_rw_done(pcbinfo->ipi_lock); | |
856 | ||
857 | if (conflict) | |
858 | in_pcb_conflict_post_msg(lport); | |
2d21ac55 | 859 | |
39236c6e A |
860 | socket_lock(so, 0); |
861 | return (EADDRINUSE); | |
1c79356b | 862 | } |
39236c6e A |
863 | t = in_pcblookup_local_and_cleanup(pcbinfo, |
864 | SIN(nam)->sin_addr, lport, wild); | |
865 | if (t != NULL && | |
1c79356b A |
866 | (reuseport & t->inp_socket->so_options) == 0) { |
867 | #if INET6 | |
39236c6e A |
868 | if (SIN(nam)->sin_addr.s_addr != INADDR_ANY || |
869 | t->inp_laddr.s_addr != INADDR_ANY || | |
870 | SOCK_DOM(so) != PF_INET6 || | |
871 | SOCK_DOM(t->inp_socket) != PF_INET6) | |
2d21ac55 A |
872 | #endif /* INET6 */ |
873 | { | |
2d21ac55 | 874 | |
39236c6e A |
875 | if ((t->inp_socket->so_flags & |
876 | SOF_NOTIFYCONFLICT) && | |
877 | !(so->so_flags & SOF_NOTIFYCONFLICT)) | |
2d21ac55 A |
878 | conflict = 1; |
879 | ||
39236c6e | 880 | lck_rw_done(pcbinfo->ipi_lock); |
2d21ac55 A |
881 | |
882 | if (conflict) | |
883 | in_pcb_conflict_post_msg(lport); | |
91447636 A |
884 | socket_lock(so, 0); |
885 | return (EADDRINUSE); | |
886 | } | |
1c79356b A |
887 | } |
888 | } | |
fe8ab488 | 889 | laddr = SIN(nam)->sin_addr; |
1c79356b A |
890 | } |
891 | if (lport == 0) { | |
892 | u_short first, last; | |
893 | int count; | |
894 | ||
39236c6e A |
895 | randomport = (so->so_flags & SOF_BINDRANDOMPORT) || |
896 | (so->so_type == SOCK_STREAM ? tcp_use_randomport : | |
897 | udp_use_randomport); | |
898 | ||
899 | /* | |
fe8ab488 A |
900 | * Even though this looks similar to the code in |
901 | * in6_pcbsetport, the v6 vs v4 checks are different. | |
39236c6e | 902 | */ |
fe8ab488 | 903 | anonport = TRUE; |
1c79356b A |
904 | if (inp->inp_flags & INP_HIGHPORT) { |
905 | first = ipport_hifirstauto; /* sysctl */ | |
906 | last = ipport_hilastauto; | |
39236c6e | 907 | lastport = &pcbinfo->ipi_lasthi; |
1c79356b | 908 | } else if (inp->inp_flags & INP_LOWPORT) { |
6d2010ae | 909 | cred = kauth_cred_proc_ref(p); |
39236c6e A |
910 | error = priv_check_cred(cred, |
911 | PRIV_NETINET_RESERVEDPORT, 0); | |
6d2010ae A |
912 | kauth_cred_unref(&cred); |
913 | if (error != 0) { | |
39236c6e | 914 | lck_rw_done(pcbinfo->ipi_lock); |
91447636 | 915 | socket_lock(so, 0); |
39236c6e | 916 | return (error); |
91447636 | 917 | } |
1c79356b A |
918 | first = ipport_lowfirstauto; /* 1023 */ |
919 | last = ipport_lowlastauto; /* 600 */ | |
39236c6e | 920 | lastport = &pcbinfo->ipi_lastlow; |
1c79356b A |
921 | } else { |
922 | first = ipport_firstauto; /* sysctl */ | |
923 | last = ipport_lastauto; | |
39236c6e | 924 | lastport = &pcbinfo->ipi_lastport; |
1c79356b | 925 | } |
b0d623f7 A |
926 | /* No point in randomizing if only one port is available */ |
927 | ||
928 | if (first == last) | |
39236c6e | 929 | randomport = 0; |
1c79356b A |
930 | /* |
931 | * Simple check to ensure all ports are not used up causing | |
932 | * a deadlock here. | |
933 | * | |
934 | * We split the two cases (up and down) so that the direction | |
935 | * is not being tested on each round of the loop. | |
936 | */ | |
937 | if (first > last) { | |
938 | /* | |
939 | * counting down | |
940 | */ | |
b0d623f7 | 941 | if (randomport) { |
39236c6e A |
942 | read_random(&rand_port, sizeof (rand_port)); |
943 | *lastport = | |
944 | first - (rand_port % (first - last)); | |
b0d623f7 | 945 | } |
1c79356b A |
946 | count = first - last; |
947 | ||
948 | do { | |
949 | if (count-- < 0) { /* completely used? */ | |
39236c6e | 950 | lck_rw_done(pcbinfo->ipi_lock); |
91447636 | 951 | socket_lock(so, 0); |
9bccf70c | 952 | return (EADDRNOTAVAIL); |
1c79356b A |
953 | } |
954 | --*lastport; | |
955 | if (*lastport > first || *lastport < last) | |
956 | *lastport = first; | |
957 | lport = htons(*lastport); | |
2d21ac55 | 958 | } while (in_pcblookup_local_and_cleanup(pcbinfo, |
39037602 | 959 | ((laddr.s_addr != INADDR_ANY) ? laddr : |
fe8ab488 | 960 | inp->inp_laddr), lport, wild)); |
1c79356b A |
961 | } else { |
962 | /* | |
963 | * counting up | |
964 | */ | |
b0d623f7 | 965 | if (randomport) { |
39236c6e A |
966 | read_random(&rand_port, sizeof (rand_port)); |
967 | *lastport = | |
968 | first + (rand_port % (first - last)); | |
b0d623f7 | 969 | } |
1c79356b A |
970 | count = last - first; |
971 | ||
972 | do { | |
973 | if (count-- < 0) { /* completely used? */ | |
39236c6e | 974 | lck_rw_done(pcbinfo->ipi_lock); |
91447636 | 975 | socket_lock(so, 0); |
9bccf70c | 976 | return (EADDRNOTAVAIL); |
1c79356b A |
977 | } |
978 | ++*lastport; | |
979 | if (*lastport < first || *lastport > last) | |
980 | *lastport = first; | |
981 | lport = htons(*lastport); | |
2d21ac55 | 982 | } while (in_pcblookup_local_and_cleanup(pcbinfo, |
fe8ab488 A |
983 | ((laddr.s_addr != INADDR_ANY) ? laddr : |
984 | inp->inp_laddr), lport, wild)); | |
1c79356b A |
985 | } |
986 | } | |
91447636 | 987 | socket_lock(so, 0); |
4bd07ac2 A |
988 | |
989 | /* | |
990 | * We unlocked socket's protocol lock for a long time. | |
991 | * The socket might have been dropped/defuncted. | |
992 | * Checking if world has changed since. | |
993 | */ | |
994 | if (inp->inp_state == INPCB_STATE_DEAD) { | |
995 | lck_rw_done(pcbinfo->ipi_lock); | |
996 | return (ECONNABORTED); | |
997 | } | |
998 | ||
fe8ab488 A |
999 | if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) { |
1000 | lck_rw_done(pcbinfo->ipi_lock); | |
1001 | return (EINVAL); | |
1002 | } | |
1003 | ||
1004 | if (laddr.s_addr != INADDR_ANY) { | |
1005 | inp->inp_laddr = laddr; | |
1006 | inp->inp_last_outifp = outif; | |
1007 | } | |
1c79356b | 1008 | inp->inp_lport = lport; |
fe8ab488 A |
1009 | if (anonport) |
1010 | inp->inp_flags |= INP_ANONPORT; | |
1011 | ||
91447636 | 1012 | if (in_pcbinshash(inp, 1) != 0) { |
1c79356b | 1013 | inp->inp_laddr.s_addr = INADDR_ANY; |
316670eb | 1014 | inp->inp_last_outifp = NULL; |
fe8ab488 A |
1015 | |
1016 | inp->inp_lport = 0; | |
1017 | if (anonport) | |
1018 | inp->inp_flags &= ~INP_ANONPORT; | |
39236c6e | 1019 | lck_rw_done(pcbinfo->ipi_lock); |
1c79356b A |
1020 | return (EAGAIN); |
1021 | } | |
39236c6e | 1022 | lck_rw_done(pcbinfo->ipi_lock); |
2d21ac55 | 1023 | sflt_notify(so, sock_evt_bound, NULL); |
1c79356b A |
1024 | return (0); |
1025 | } | |
1026 | ||
39037602 A |
1027 | #define APN_FALLBACK_IP_FILTER(a) \ |
1028 | (IN_LINKLOCAL(ntohl((a)->sin_addr.s_addr)) || \ | |
1029 | IN_LOOPBACK(ntohl((a)->sin_addr.s_addr)) || \ | |
1030 | IN_ZERONET(ntohl((a)->sin_addr.s_addr)) || \ | |
1031 | IN_MULTICAST(ntohl((a)->sin_addr.s_addr)) || \ | |
1032 | IN_PRIVATE(ntohl((a)->sin_addr.s_addr))) | |
1033 | ||
1034 | #define APN_FALLBACK_NOTIF_INTERVAL 2 /* Magic Number */ | |
1035 | static uint64_t last_apn_fallback = 0; | |
1036 | ||
1037 | static boolean_t | |
1038 | apn_fallback_required (proc_t proc, struct socket *so, struct sockaddr_in *p_dstv4) | |
1039 | { | |
1040 | uint64_t timenow; | |
1041 | struct sockaddr_storage lookup_default_addr; | |
1042 | struct rtentry *rt = NULL; | |
1043 | ||
1044 | VERIFY(proc != NULL); | |
1045 | ||
1046 | if (apn_fallbk_enabled == FALSE) | |
1047 | return FALSE; | |
1048 | ||
1049 | if (proc == kernproc) | |
1050 | return FALSE; | |
1051 | ||
1052 | if (so && (so->so_options & SO_NOAPNFALLBK)) | |
1053 | return FALSE; | |
1054 | ||
1055 | timenow = net_uptime(); | |
1056 | if ((timenow - last_apn_fallback) < APN_FALLBACK_NOTIF_INTERVAL) { | |
1057 | apn_fallbk_log((LOG_INFO, "APN fallback notification throttled.\n")); | |
1058 | return FALSE; | |
1059 | } | |
1060 | ||
1061 | if (p_dstv4 && APN_FALLBACK_IP_FILTER(p_dstv4)) | |
1062 | return FALSE; | |
1063 | ||
1064 | /* Check if we have unscoped IPv6 default route through cellular */ | |
1065 | bzero(&lookup_default_addr, sizeof(lookup_default_addr)); | |
1066 | lookup_default_addr.ss_family = AF_INET6; | |
1067 | lookup_default_addr.ss_len = sizeof(struct sockaddr_in6); | |
1068 | ||
1069 | rt = rtalloc1((struct sockaddr *)&lookup_default_addr, 0, 0); | |
1070 | if (NULL == rt) { | |
1071 | apn_fallbk_log((LOG_INFO, "APN fallback notification could not find " | |
1072 | "unscoped default IPv6 route.\n")); | |
1073 | return FALSE; | |
1074 | } | |
1075 | ||
1076 | if (!IFNET_IS_CELLULAR(rt->rt_ifp)) { | |
1077 | rtfree(rt); | |
1078 | apn_fallbk_log((LOG_INFO, "APN fallback notification could not find " | |
1079 | "unscoped default IPv6 route through cellular interface.\n")); | |
1080 | return FALSE; | |
1081 | } | |
1082 | ||
1083 | /* | |
1084 | * We have a default IPv6 route, ensure that | |
1085 | * we do not have IPv4 default route before triggering | |
1086 | * the event | |
1087 | */ | |
1088 | rtfree(rt); | |
1089 | rt = NULL; | |
1090 | ||
1091 | bzero(&lookup_default_addr, sizeof(lookup_default_addr)); | |
1092 | lookup_default_addr.ss_family = AF_INET; | |
1093 | lookup_default_addr.ss_len = sizeof(struct sockaddr_in); | |
1094 | ||
1095 | rt = rtalloc1((struct sockaddr *)&lookup_default_addr, 0, 0); | |
1096 | ||
1097 | if (rt) { | |
1098 | rtfree(rt); | |
1099 | rt = NULL; | |
1100 | apn_fallbk_log((LOG_INFO, "APN fallback notification found unscoped " | |
1101 | "IPv4 default route!\n")); | |
1102 | return FALSE; | |
1103 | } | |
1104 | ||
1105 | { | |
1106 | /* | |
1107 | * We disable APN fallback if the binary is not a third-party app. | |
1108 | * Note that platform daemons use their process name as a | |
1109 | * bundle ID so we filter out bundle IDs without dots. | |
1110 | */ | |
1111 | const char *bundle_id = cs_identity_get(proc); | |
1112 | if (bundle_id == NULL || | |
1113 | bundle_id[0] == '\0' || | |
1114 | strchr(bundle_id, '.') == NULL || | |
1115 | strncmp(bundle_id, "com.apple.", sizeof("com.apple.") - 1) == 0) { | |
1116 | apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found first-" | |
1117 | "party bundle ID \"%s\"!\n", (bundle_id ? bundle_id : "NULL"))); | |
1118 | return FALSE; | |
1119 | } | |
1120 | } | |
1121 | ||
1122 | { | |
1123 | /* | |
1124 | * The Apple App Store IPv6 requirement started on | |
1125 | * June 1st, 2016 at 12:00:00 AM PDT. | |
1126 | * We disable APN fallback if the binary is more recent than that. | |
1127 | * We check both atime and birthtime since birthtime is not always supported. | |
1128 | */ | |
1129 | static const long ipv6_start_date = 1464764400L; | |
1130 | vfs_context_t context; | |
1131 | struct stat64 sb; | |
1132 | int vn_stat_error; | |
1133 | ||
1134 | bzero(&sb, sizeof(struct stat64)); | |
1135 | context = vfs_context_create(NULL); | |
1136 | vn_stat_error = vn_stat(proc->p_textvp, &sb, NULL, 1, context); | |
1137 | (void)vfs_context_rele(context); | |
1138 | ||
1139 | if (vn_stat_error != 0 || | |
1140 | sb.st_atimespec.tv_sec >= ipv6_start_date || | |
1141 | sb.st_birthtimespec.tv_sec >= ipv6_start_date) { | |
1142 | apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found binary " | |
1143 | "too recent! (err %d atime %ld mtime %ld ctime %ld birthtime %ld)\n", | |
1144 | vn_stat_error, sb.st_atimespec.tv_sec, sb.st_mtimespec.tv_sec, | |
1145 | sb.st_ctimespec.tv_sec, sb.st_birthtimespec.tv_sec)); | |
1146 | return FALSE; | |
1147 | } | |
1148 | } | |
1149 | return TRUE; | |
1150 | } | |
1151 | ||
1152 | static void | |
1153 | apn_fallback_trigger(proc_t proc) | |
1154 | { | |
1155 | pid_t pid = 0; | |
1156 | struct kev_msg ev_msg; | |
1157 | struct kev_netevent_apnfallbk_data apnfallbk_data; | |
1158 | ||
1159 | last_apn_fallback = net_uptime(); | |
1160 | pid = proc_pid(proc); | |
1161 | uuid_t application_uuid; | |
1162 | uuid_clear(application_uuid); | |
1163 | proc_getexecutableuuid(proc, application_uuid, | |
1164 | sizeof(application_uuid)); | |
1165 | ||
1166 | bzero(&ev_msg, sizeof (struct kev_msg)); | |
1167 | ev_msg.vendor_code = KEV_VENDOR_APPLE; | |
1168 | ev_msg.kev_class = KEV_NETWORK_CLASS; | |
1169 | ev_msg.kev_subclass = KEV_NETEVENT_SUBCLASS; | |
1170 | ev_msg.event_code = KEV_NETEVENT_APNFALLBACK; | |
1171 | ||
1172 | bzero(&apnfallbk_data, sizeof(apnfallbk_data)); | |
1173 | apnfallbk_data.epid = pid; | |
1174 | uuid_copy(apnfallbk_data.euuid, application_uuid); | |
1175 | ||
1176 | ev_msg.dv[0].data_ptr = &apnfallbk_data; | |
1177 | ev_msg.dv[0].data_length = sizeof(apnfallbk_data); | |
1178 | kev_post_msg(&ev_msg); | |
1179 | apn_fallbk_log((LOG_INFO, "APN fallback notification issued.\n")); | |
1180 | } | |
1181 | ||
1c79356b | 1182 | /* |
39236c6e A |
1183 | * Transform old in_pcbconnect() into an inner subroutine for new |
1184 | * in_pcbconnect(); do some validity-checking on the remote address | |
1185 | * (in "nam") and then determine local host address (i.e., which | |
1186 | * interface) to use to access that remote host. | |
1187 | * | |
1188 | * This routine may alter the caller-supplied remote address "nam". | |
1c79356b | 1189 | * |
39236c6e A |
1190 | * The caller may override the bound-to-interface setting of the socket |
1191 | * by specifying the ifscope parameter (e.g. from IP_PKTINFO.) | |
1192 | * | |
1193 | * This routine might return an ifp with a reference held if the caller | |
1194 | * provides a non-NULL outif, even in the error case. The caller is | |
1195 | * responsible for releasing its reference. | |
2d21ac55 A |
1196 | * |
1197 | * Returns: 0 Success | |
1198 | * EINVAL Invalid argument | |
1199 | * EAFNOSUPPORT Address family not supported | |
1200 | * EADDRNOTAVAIL Address not available | |
1c79356b | 1201 | */ |
1c79356b | 1202 | int |
39236c6e | 1203 | in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr, |
39037602 | 1204 | unsigned int ifscope, struct ifnet **outif, int raw) |
1c79356b | 1205 | { |
39236c6e A |
1206 | struct route *ro = &inp->inp_route; |
1207 | struct in_ifaddr *ia = NULL; | |
1208 | struct sockaddr_in sin; | |
1209 | int error = 0; | |
fe8ab488 | 1210 | boolean_t restricted = FALSE; |
39236c6e A |
1211 | |
1212 | if (outif != NULL) | |
1213 | *outif = NULL; | |
1214 | if (nam->sa_len != sizeof (struct sockaddr_in)) | |
1c79356b | 1215 | return (EINVAL); |
39236c6e | 1216 | if (SIN(nam)->sin_family != AF_INET) |
1c79356b | 1217 | return (EAFNOSUPPORT); |
39037602 | 1218 | if (raw == 0 && SIN(nam)->sin_port == 0) |
1c79356b | 1219 | return (EADDRNOTAVAIL); |
b0d623f7 | 1220 | |
39236c6e A |
1221 | /* |
1222 | * If the destination address is INADDR_ANY, | |
1223 | * use the primary local address. | |
1224 | * If the supplied address is INADDR_BROADCAST, | |
1225 | * and the primary interface supports broadcast, | |
1226 | * choose the broadcast address for that interface. | |
1227 | */ | |
39037602 A |
1228 | if (raw == 0 && (SIN(nam)->sin_addr.s_addr == INADDR_ANY || |
1229 | SIN(nam)->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST)) { | |
39236c6e A |
1230 | lck_rw_lock_shared(in_ifaddr_rwlock); |
1231 | if (!TAILQ_EMPTY(&in_ifaddrhead)) { | |
1232 | ia = TAILQ_FIRST(&in_ifaddrhead); | |
1233 | IFA_LOCK_SPIN(&ia->ia_ifa); | |
1234 | if (SIN(nam)->sin_addr.s_addr == INADDR_ANY) { | |
1235 | SIN(nam)->sin_addr = IA_SIN(ia)->sin_addr; | |
1236 | } else if (ia->ia_ifp->if_flags & IFF_BROADCAST) { | |
1237 | SIN(nam)->sin_addr = | |
1238 | SIN(&ia->ia_broadaddr)->sin_addr; | |
1239 | } | |
1240 | IFA_UNLOCK(&ia->ia_ifa); | |
1241 | ia = NULL; | |
1242 | } | |
1243 | lck_rw_done(in_ifaddr_rwlock); | |
1244 | } | |
1245 | /* | |
1246 | * Otherwise, if the socket has already bound the source, just use it. | |
1247 | */ | |
1248 | if (inp->inp_laddr.s_addr != INADDR_ANY) { | |
1249 | VERIFY(ia == NULL); | |
1250 | *laddr = inp->inp_laddr; | |
1251 | return (0); | |
1c79356b | 1252 | } |
6d2010ae | 1253 | |
39236c6e A |
1254 | /* |
1255 | * If the ifscope is specified by the caller (e.g. IP_PKTINFO) | |
1256 | * then it overrides the sticky ifscope set for the socket. | |
1257 | */ | |
1258 | if (ifscope == IFSCOPE_NONE && (inp->inp_flags & INP_BOUND_IF)) | |
1259 | ifscope = inp->inp_boundifp->if_index; | |
6d2010ae | 1260 | |
39236c6e A |
1261 | /* |
1262 | * If route is known or can be allocated now, | |
1263 | * our src addr is taken from the i/f, else punt. | |
1264 | * Note that we should check the address family of the cached | |
1265 | * destination, in case of sharing the cache with IPv6. | |
1266 | */ | |
1267 | if (ro->ro_rt != NULL) | |
1268 | RT_LOCK_SPIN(ro->ro_rt); | |
1269 | if (ROUTE_UNUSABLE(ro) || ro->ro_dst.sa_family != AF_INET || | |
1270 | SIN(&ro->ro_dst)->sin_addr.s_addr != SIN(nam)->sin_addr.s_addr || | |
1271 | (inp->inp_socket->so_options & SO_DONTROUTE)) { | |
b0d623f7 | 1272 | if (ro->ro_rt != NULL) |
b0d623f7 | 1273 | RT_UNLOCK(ro->ro_rt); |
39236c6e A |
1274 | ROUTE_RELEASE(ro); |
1275 | } | |
1276 | if (!(inp->inp_socket->so_options & SO_DONTROUTE) && | |
1277 | (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL)) { | |
1278 | if (ro->ro_rt != NULL) | |
1279 | RT_UNLOCK(ro->ro_rt); | |
1280 | ROUTE_RELEASE(ro); | |
1281 | /* No route yet, so try to acquire one */ | |
1282 | bzero(&ro->ro_dst, sizeof (struct sockaddr_in)); | |
1283 | ro->ro_dst.sa_family = AF_INET; | |
1284 | ro->ro_dst.sa_len = sizeof (struct sockaddr_in); | |
1285 | SIN(&ro->ro_dst)->sin_addr = SIN(nam)->sin_addr; | |
1286 | rtalloc_scoped(ro, ifscope); | |
1287 | if (ro->ro_rt != NULL) | |
1288 | RT_LOCK_SPIN(ro->ro_rt); | |
1289 | } | |
1290 | /* Sanitized local copy for interface address searches */ | |
1291 | bzero(&sin, sizeof (sin)); | |
1292 | sin.sin_family = AF_INET; | |
1293 | sin.sin_len = sizeof (struct sockaddr_in); | |
1294 | sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr; | |
1295 | /* | |
1296 | * If we did not find (or use) a route, assume dest is reachable | |
1297 | * on a directly connected network and try to find a corresponding | |
1298 | * interface to take the source address from. | |
1299 | */ | |
1300 | if (ro->ro_rt == NULL) { | |
39037602 A |
1301 | proc_t proc = current_proc(); |
1302 | ||
39236c6e A |
1303 | VERIFY(ia == NULL); |
1304 | ia = ifatoia(ifa_ifwithdstaddr(SA(&sin))); | |
1305 | if (ia == NULL) | |
1306 | ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope)); | |
1307 | error = ((ia == NULL) ? ENETUNREACH : 0); | |
743345f9 | 1308 | |
39037602 A |
1309 | if (apn_fallback_required(proc, inp->inp_socket, |
1310 | (void *)nam)) | |
1311 | apn_fallback_trigger(proc); | |
1312 | ||
39236c6e A |
1313 | goto done; |
1314 | } | |
1315 | RT_LOCK_ASSERT_HELD(ro->ro_rt); | |
1316 | /* | |
1317 | * If the outgoing interface on the route found is not | |
1318 | * a loopback interface, use the address from that interface. | |
1319 | */ | |
1320 | if (!(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) { | |
1321 | VERIFY(ia == NULL); | |
6d2010ae A |
1322 | /* |
1323 | * If the route points to a cellular interface and the | |
1324 | * caller forbids our using interfaces of such type, | |
1325 | * pretend that there is no route. | |
fe8ab488 | 1326 | * Apply the same logic for expensive interfaces. |
6d2010ae | 1327 | */ |
fe8ab488 | 1328 | if (inp_restricted_send(inp, ro->ro_rt->rt_ifp)) { |
39236c6e A |
1329 | RT_UNLOCK(ro->ro_rt); |
1330 | ROUTE_RELEASE(ro); | |
1331 | error = EHOSTUNREACH; | |
fe8ab488 | 1332 | restricted = TRUE; |
39236c6e | 1333 | } else { |
6d2010ae A |
1334 | /* Become a regular mutex */ |
1335 | RT_CONVERT_LOCK(ro->ro_rt); | |
39236c6e A |
1336 | ia = ifatoia(ro->ro_rt->rt_ifa); |
1337 | IFA_ADDREF(&ia->ia_ifa); | |
b0d623f7 | 1338 | RT_UNLOCK(ro->ro_rt); |
39236c6e | 1339 | error = 0; |
91447636 | 1340 | } |
39236c6e A |
1341 | goto done; |
1342 | } | |
1343 | VERIFY(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK); | |
1344 | RT_UNLOCK(ro->ro_rt); | |
1345 | /* | |
1346 | * The outgoing interface is marked with 'loopback net', so a route | |
1347 | * to ourselves is here. | |
1348 | * Try to find the interface of the destination address and then | |
1349 | * take the address from there. That interface is not necessarily | |
1350 | * a loopback interface. | |
1351 | */ | |
1352 | VERIFY(ia == NULL); | |
1353 | ia = ifatoia(ifa_ifwithdstaddr(SA(&sin))); | |
1354 | if (ia == NULL) | |
1355 | ia = ifatoia(ifa_ifwithaddr_scoped(SA(&sin), ifscope)); | |
1356 | if (ia == NULL) | |
1357 | ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope)); | |
1358 | if (ia == NULL) { | |
1359 | RT_LOCK(ro->ro_rt); | |
1360 | ia = ifatoia(ro->ro_rt->rt_ifa); | |
1361 | if (ia != NULL) | |
1362 | IFA_ADDREF(&ia->ia_ifa); | |
1363 | RT_UNLOCK(ro->ro_rt); | |
1364 | } | |
1365 | error = ((ia == NULL) ? ENETUNREACH : 0); | |
1366 | ||
1367 | done: | |
1368 | /* | |
1369 | * If the destination address is multicast and an outgoing | |
1370 | * interface has been set as a multicast option, use the | |
1371 | * address of that interface as our source address. | |
1372 | */ | |
15129b1c | 1373 | if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) && |
39236c6e A |
1374 | inp->inp_moptions != NULL) { |
1375 | struct ip_moptions *imo; | |
1376 | struct ifnet *ifp; | |
1377 | ||
1378 | imo = inp->inp_moptions; | |
1379 | IMO_LOCK(imo); | |
1380 | if (imo->imo_multicast_ifp != NULL && (ia == NULL || | |
1381 | ia->ia_ifp != imo->imo_multicast_ifp)) { | |
1382 | ifp = imo->imo_multicast_ifp; | |
1383 | if (ia != NULL) | |
6d2010ae | 1384 | IFA_REMREF(&ia->ia_ifa); |
39236c6e A |
1385 | lck_rw_lock_shared(in_ifaddr_rwlock); |
1386 | TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { | |
1387 | if (ia->ia_ifp == ifp) | |
1388 | break; | |
6d2010ae | 1389 | } |
39236c6e A |
1390 | if (ia != NULL) |
1391 | IFA_ADDREF(&ia->ia_ifa); | |
1392 | lck_rw_done(in_ifaddr_rwlock); | |
1393 | if (ia == NULL) | |
1394 | error = EADDRNOTAVAIL; | |
15129b1c A |
1395 | else |
1396 | error = 0; | |
1c79356b | 1397 | } |
39236c6e A |
1398 | IMO_UNLOCK(imo); |
1399 | } | |
1400 | /* | |
1401 | * Don't do pcblookup call here; return interface in laddr | |
1402 | * and exit to caller, that will do the lookup. | |
1403 | */ | |
1404 | if (ia != NULL) { | |
1c79356b | 1405 | /* |
39236c6e A |
1406 | * If the source address belongs to a cellular interface |
1407 | * and the socket forbids our using interfaces of such | |
1408 | * type, pretend that there is no source address. | |
fe8ab488 | 1409 | * Apply the same logic for expensive interfaces. |
1c79356b | 1410 | */ |
39236c6e | 1411 | IFA_LOCK_SPIN(&ia->ia_ifa); |
fe8ab488 | 1412 | if (inp_restricted_send(inp, ia->ia_ifa.ifa_ifp)) { |
39236c6e A |
1413 | IFA_UNLOCK(&ia->ia_ifa); |
1414 | error = EHOSTUNREACH; | |
fe8ab488 | 1415 | restricted = TRUE; |
39236c6e A |
1416 | } else if (error == 0) { |
1417 | *laddr = ia->ia_addr.sin_addr; | |
1418 | if (outif != NULL) { | |
1419 | struct ifnet *ifp; | |
1420 | ||
1421 | if (ro->ro_rt != NULL) | |
1422 | ifp = ro->ro_rt->rt_ifp; | |
1423 | else | |
1424 | ifp = ia->ia_ifp; | |
1425 | ||
1426 | VERIFY(ifp != NULL); | |
1427 | IFA_CONVERT_LOCK(&ia->ia_ifa); | |
1428 | ifnet_reference(ifp); /* for caller */ | |
1429 | if (*outif != NULL) | |
1430 | ifnet_release(*outif); | |
1431 | *outif = ifp; | |
1c79356b | 1432 | } |
39236c6e A |
1433 | IFA_UNLOCK(&ia->ia_ifa); |
1434 | } else { | |
1435 | IFA_UNLOCK(&ia->ia_ifa); | |
1c79356b | 1436 | } |
6d2010ae | 1437 | IFA_REMREF(&ia->ia_ifa); |
39236c6e A |
1438 | ia = NULL; |
1439 | } | |
1440 | ||
fe8ab488 | 1441 | if (restricted && error == EHOSTUNREACH) { |
39236c6e A |
1442 | soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED | |
1443 | SO_FILT_HINT_IFDENIED)); | |
1c79356b | 1444 | } |
39236c6e A |
1445 | |
1446 | return (error); | |
1c79356b A |
1447 | } |
1448 | ||
1449 | /* | |
1450 | * Outer subroutine: | |
1451 | * Connect from a socket to a specified address. | |
1452 | * Both address and port must be specified in argument sin. | |
1453 | * If don't have a local address for this socket yet, | |
1454 | * then pick one. | |
39236c6e A |
1455 | * |
1456 | * The caller may override the bound-to-interface setting of the socket | |
1457 | * by specifying the ifscope parameter (e.g. from IP_PKTINFO.) | |
1c79356b A |
1458 | */ |
1459 | int | |
316670eb | 1460 | in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p, |
39236c6e | 1461 | unsigned int ifscope, struct ifnet **outif) |
1c79356b | 1462 | { |
39236c6e | 1463 | struct in_addr laddr; |
316670eb | 1464 | struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam; |
91447636 | 1465 | struct inpcb *pcb; |
1c79356b | 1466 | int error; |
fe8ab488 | 1467 | struct socket *so = inp->inp_socket; |
1c79356b A |
1468 | |
1469 | /* | |
1470 | * Call inner routine, to assign local interface address. | |
1471 | */ | |
39037602 | 1472 | if ((error = in_pcbladdr(inp, nam, &laddr, ifscope, outif, 0)) != 0) |
39236c6e | 1473 | return (error); |
1c79356b | 1474 | |
fe8ab488 | 1475 | socket_unlock(so, 0); |
91447636 | 1476 | pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port, |
39236c6e | 1477 | inp->inp_laddr.s_addr ? inp->inp_laddr : laddr, |
91447636 | 1478 | inp->inp_lport, 0, NULL); |
fe8ab488 | 1479 | socket_lock(so, 0); |
6d2010ae | 1480 | |
39236c6e A |
1481 | /* |
1482 | * Check if the socket is still in a valid state. When we unlock this | |
1483 | * embryonic socket, it can get aborted if another thread is closing | |
6d2010ae A |
1484 | * the listener (radar 7947600). |
1485 | */ | |
fe8ab488 | 1486 | if ((so->so_flags & SOF_ABORTED) != 0) |
39236c6e | 1487 | return (ECONNREFUSED); |
6d2010ae | 1488 | |
91447636 | 1489 | if (pcb != NULL) { |
0b4c1975 | 1490 | in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0); |
1c79356b A |
1491 | return (EADDRINUSE); |
1492 | } | |
1493 | if (inp->inp_laddr.s_addr == INADDR_ANY) { | |
9bccf70c | 1494 | if (inp->inp_lport == 0) { |
39236c6e | 1495 | error = in_pcbbind(inp, NULL, p); |
9bccf70c | 1496 | if (error) |
39236c6e | 1497 | return (error); |
9bccf70c | 1498 | } |
39236c6e A |
1499 | if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { |
1500 | /* | |
1501 | * Lock inversion issue, mostly with udp | |
1502 | * multicast packets. | |
1503 | */ | |
fe8ab488 | 1504 | socket_unlock(so, 0); |
39236c6e | 1505 | lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); |
fe8ab488 | 1506 | socket_lock(so, 0); |
91447636 | 1507 | } |
39236c6e A |
1508 | inp->inp_laddr = laddr; |
1509 | /* no reference needed */ | |
316670eb | 1510 | inp->inp_last_outifp = (outif != NULL) ? *outif : NULL; |
55e303ae | 1511 | inp->inp_flags |= INP_INADDR_ANY; |
39236c6e | 1512 | } else { |
3e170ce0 A |
1513 | /* |
1514 | * Usage of IP_PKTINFO, without local port already | |
1515 | * speficified will cause kernel to panic, | |
1516 | * see rdar://problem/18508185. | |
1517 | * For now returning error to avoid a kernel panic | |
1518 | * This routines can be refactored and handle this better | |
1519 | * in future. | |
1520 | */ | |
1521 | if (inp->inp_lport == 0) | |
1522 | return (EINVAL); | |
39236c6e A |
1523 | if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { |
1524 | /* | |
1525 | * Lock inversion issue, mostly with udp | |
1526 | * multicast packets. | |
1527 | */ | |
fe8ab488 | 1528 | socket_unlock(so, 0); |
39236c6e | 1529 | lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); |
fe8ab488 | 1530 | socket_lock(so, 0); |
91447636 | 1531 | } |
1c79356b A |
1532 | } |
1533 | inp->inp_faddr = sin->sin_addr; | |
1534 | inp->inp_fport = sin->sin_port; | |
fe8ab488 A |
1535 | if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) |
1536 | nstat_pcb_invalidate_cache(inp); | |
1c79356b | 1537 | in_pcbrehash(inp); |
39236c6e | 1538 | lck_rw_done(inp->inp_pcbinfo->ipi_lock); |
1c79356b A |
1539 | return (0); |
1540 | } | |
1541 | ||
1542 | void | |
2d21ac55 | 1543 | in_pcbdisconnect(struct inpcb *inp) |
1c79356b | 1544 | { |
39236c6e | 1545 | struct socket *so = inp->inp_socket; |
1c79356b | 1546 | |
fe8ab488 A |
1547 | if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) |
1548 | nstat_pcb_cache(inp); | |
1549 | ||
1c79356b A |
1550 | inp->inp_faddr.s_addr = INADDR_ANY; |
1551 | inp->inp_fport = 0; | |
91447636 | 1552 | |
39236c6e A |
1553 | if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { |
1554 | /* lock inversion issue, mostly with udp multicast packets */ | |
1555 | socket_unlock(so, 0); | |
1556 | lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); | |
1557 | socket_lock(so, 0); | |
91447636 A |
1558 | } |
1559 | ||
1c79356b | 1560 | in_pcbrehash(inp); |
39236c6e A |
1561 | lck_rw_done(inp->inp_pcbinfo->ipi_lock); |
1562 | /* | |
1563 | * A multipath subflow socket would have its SS_NOFDREF set by default, | |
1564 | * so check for SOF_MP_SUBFLOW socket flag before detaching the PCB; | |
1565 | * when the socket is closed for real, SOF_MP_SUBFLOW would be cleared. | |
1566 | */ | |
1567 | if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF)) | |
1c79356b A |
1568 | in_pcbdetach(inp); |
1569 | } | |
1570 | ||
1571 | void | |
2d21ac55 | 1572 | in_pcbdetach(struct inpcb *inp) |
1c79356b A |
1573 | { |
1574 | struct socket *so = inp->inp_socket; | |
1c79356b | 1575 | |
39236c6e A |
1576 | if (so->so_pcb == NULL) { |
1577 | /* PCB has been disposed */ | |
1578 | panic("%s: inp=%p so=%p proto=%d so_pcb is null!\n", __func__, | |
1579 | inp, so, SOCK_PROTO(so)); | |
1580 | /* NOTREACHED */ | |
91447636 | 1581 | } |
39037602 | 1582 | |
1c79356b | 1583 | #if IPSEC |
39236c6e A |
1584 | if (inp->inp_sp != NULL) { |
1585 | (void) ipsec4_delete_pcbpolicy(inp); | |
91447636 | 1586 | } |
39236c6e | 1587 | #endif /* IPSEC */ |
39037602 | 1588 | |
fe8ab488 A |
1589 | /* |
1590 | * Let NetworkStatistics know this PCB is going away | |
1591 | * before we detach it. | |
1592 | */ | |
39037602 | 1593 | if (nstat_collect && |
fe8ab488 A |
1594 | (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP)) |
1595 | nstat_pcb_detach(inp); | |
3e170ce0 A |
1596 | |
1597 | /* Free memory buffer held for generating keep alives */ | |
1598 | if (inp->inp_keepalive_data != NULL) { | |
1599 | FREE(inp->inp_keepalive_data, M_TEMP); | |
1600 | inp->inp_keepalive_data = NULL; | |
1601 | } | |
1602 | ||
91447636 | 1603 | /* mark socket state as dead */ |
39236c6e A |
1604 | if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) { |
1605 | panic("%s: so=%p proto=%d couldn't set to STOPUSING\n", | |
1606 | __func__, so, SOCK_PROTO(so)); | |
1607 | /* NOTREACHED */ | |
1608 | } | |
1c79356b | 1609 | |
39236c6e | 1610 | if (!(so->so_flags & SOF_PCBCLEARING)) { |
6d2010ae | 1611 | struct ip_moptions *imo; |
2d21ac55 | 1612 | |
91447636 | 1613 | inp->inp_vflag = 0; |
39236c6e A |
1614 | if (inp->inp_options != NULL) { |
1615 | (void) m_free(inp->inp_options); | |
1616 | inp->inp_options = NULL; | |
91447636 | 1617 | } |
39236c6e | 1618 | ROUTE_RELEASE(&inp->inp_route); |
6d2010ae | 1619 | imo = inp->inp_moptions; |
91447636 A |
1620 | inp->inp_moptions = NULL; |
1621 | sofreelastref(so, 0); | |
1622 | inp->inp_state = INPCB_STATE_DEAD; | |
39236c6e A |
1623 | /* makes sure we're not called twice from so_close */ |
1624 | so->so_flags |= SOF_PCBCLEARING; | |
1625 | ||
1626 | inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST); | |
39037602 A |
1627 | |
1628 | /* | |
1629 | * See inp_join_group() for why we need to unlock | |
1630 | */ | |
1631 | if (imo != NULL) { | |
1632 | socket_unlock(so, 0); | |
1633 | IMO_REMREF(imo); | |
1634 | socket_lock(so, 0); | |
1635 | } | |
91447636 A |
1636 | } |
1637 | } | |
1c79356b | 1638 | |
1c79356b | 1639 | |
39236c6e A |
1640 | void |
1641 | in_pcbdispose(struct inpcb *inp) | |
91447636 A |
1642 | { |
1643 | struct socket *so = inp->inp_socket; | |
1644 | struct inpcbinfo *ipi = inp->inp_pcbinfo; | |
1645 | ||
39236c6e A |
1646 | if (so != NULL && so->so_usecount != 0) { |
1647 | panic("%s: so %p [%d,%d] usecount %d lockhistory %s\n", | |
1648 | __func__, so, SOCK_DOM(so), SOCK_TYPE(so), so->so_usecount, | |
1649 | solockhistory_nr(so)); | |
1650 | /* NOTREACHED */ | |
1651 | } else if (inp->inp_wantcnt != WNT_STOPUSING) { | |
1652 | if (so != NULL) { | |
1653 | panic_plain("%s: inp %p invalid wantcnt %d, so %p " | |
1654 | "[%d,%d] usecount %d retaincnt %d state 0x%x " | |
1655 | "flags 0x%x lockhistory %s\n", __func__, inp, | |
1656 | inp->inp_wantcnt, so, SOCK_DOM(so), SOCK_TYPE(so), | |
1657 | so->so_usecount, so->so_retaincnt, so->so_state, | |
1658 | so->so_flags, solockhistory_nr(so)); | |
1659 | /* NOTREACHED */ | |
1660 | } else { | |
1661 | panic("%s: inp %p invalid wantcnt %d no socket\n", | |
1662 | __func__, inp, inp->inp_wantcnt); | |
1663 | /* NOTREACHED */ | |
1664 | } | |
91447636 | 1665 | } |
91447636 | 1666 | |
39236c6e | 1667 | lck_rw_assert(ipi->ipi_lock, LCK_RW_ASSERT_EXCLUSIVE); |
91447636 A |
1668 | |
1669 | inp->inp_gencnt = ++ipi->ipi_gencnt; | |
316670eb | 1670 | /* access ipi in in_pcbremlists */ |
91447636 | 1671 | in_pcbremlists(inp); |
316670eb | 1672 | |
39236c6e | 1673 | if (so != NULL) { |
91447636 A |
1674 | if (so->so_proto->pr_flags & PR_PCBLOCK) { |
1675 | sofreelastref(so, 0); | |
39236c6e A |
1676 | if (so->so_rcv.sb_cc > 0 || so->so_snd.sb_cc > 0) { |
1677 | /* | |
1678 | * selthreadclear() already called | |
1679 | * during sofreelastref() above. | |
1680 | */ | |
91447636 A |
1681 | sbrelease(&so->so_rcv); |
1682 | sbrelease(&so->so_snd); | |
1683 | } | |
39236c6e A |
1684 | if (so->so_head != NULL) { |
1685 | panic("%s: so=%p head still exist\n", | |
1686 | __func__, so); | |
1687 | /* NOTREACHED */ | |
1688 | } | |
1689 | lck_mtx_unlock(&inp->inpcb_mtx); | |
1690 | lck_mtx_destroy(&inp->inpcb_mtx, ipi->ipi_lock_grp); | |
9bccf70c | 1691 | } |
39236c6e A |
1692 | /* makes sure we're not called twice from so_close */ |
1693 | so->so_flags |= SOF_PCBCLEARING; | |
1694 | so->so_saved_pcb = (caddr_t)inp; | |
1695 | so->so_pcb = NULL; | |
1696 | inp->inp_socket = NULL; | |
2d21ac55 A |
1697 | #if CONFIG_MACF_NET |
1698 | mac_inpcb_label_destroy(inp); | |
39236c6e | 1699 | #endif /* CONFIG_MACF_NET */ |
39037602 A |
1700 | #if NECP |
1701 | necp_inpcb_dispose(inp); | |
1702 | #endif /* NECP */ | |
b0d623f7 A |
1703 | /* |
1704 | * In case there a route cached after a detach (possible | |
1705 | * in the tcp case), make sure that it is freed before | |
1706 | * we deallocate the structure. | |
1707 | */ | |
39236c6e | 1708 | ROUTE_RELEASE(&inp->inp_route); |
3e170ce0 | 1709 | if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) { |
91447636 | 1710 | zfree(ipi->ipi_zone, inp); |
55e303ae | 1711 | } |
91447636 | 1712 | sodealloc(so); |
9bccf70c | 1713 | } |
1c79356b A |
1714 | } |
1715 | ||
1716 | /* | |
39236c6e | 1717 | * The calling convention of in_getsockaddr() and in_getpeeraddr() was |
1c79356b A |
1718 | * modified to match the pru_sockaddr() and pru_peeraddr() entry points |
1719 | * in struct pr_usrreqs, so that protocols can just reference then directly | |
39236c6e | 1720 | * without the need for a wrapper function. |
1c79356b A |
1721 | */ |
1722 | int | |
39236c6e | 1723 | in_getsockaddr(struct socket *so, struct sockaddr **nam) |
1c79356b | 1724 | { |
2d21ac55 A |
1725 | struct inpcb *inp; |
1726 | struct sockaddr_in *sin; | |
1c79356b A |
1727 | |
1728 | /* | |
1729 | * Do the malloc first in case it blocks. | |
1730 | */ | |
39236c6e | 1731 | MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK); |
0b4e3aa0 | 1732 | if (sin == NULL) |
39236c6e A |
1733 | return (ENOBUFS); |
1734 | bzero(sin, sizeof (*sin)); | |
1c79356b | 1735 | sin->sin_family = AF_INET; |
39236c6e | 1736 | sin->sin_len = sizeof (*sin); |
1c79356b | 1737 | |
39236c6e | 1738 | if ((inp = sotoinpcb(so)) == NULL) { |
1c79356b | 1739 | FREE(sin, M_SONAME); |
39236c6e | 1740 | return (EINVAL); |
1c79356b A |
1741 | } |
1742 | sin->sin_port = inp->inp_lport; | |
1743 | sin->sin_addr = inp->inp_laddr; | |
1c79356b A |
1744 | |
1745 | *nam = (struct sockaddr *)sin; | |
39236c6e | 1746 | return (0); |
1c79356b A |
1747 | } |
1748 | ||
1749 | int | |
39236c6e | 1750 | in_getsockaddr_s(struct socket *so, struct sockaddr_storage *ss) |
1c79356b | 1751 | { |
39236c6e | 1752 | struct sockaddr_in *sin = SIN(ss); |
1c79356b | 1753 | struct inpcb *inp; |
1c79356b | 1754 | |
39236c6e A |
1755 | VERIFY(ss != NULL); |
1756 | bzero(ss, sizeof (*ss)); | |
1757 | ||
1c79356b | 1758 | sin->sin_family = AF_INET; |
39236c6e | 1759 | sin->sin_len = sizeof (*sin); |
1c79356b | 1760 | |
fe8ab488 A |
1761 | if ((inp = sotoinpcb(so)) == NULL |
1762 | #if NECP | |
1763 | || (necp_socket_should_use_flow_divert(inp)) | |
1764 | #endif /* NECP */ | |
1765 | ) | |
39236c6e A |
1766 | return (inp == NULL ? EINVAL : EPROTOTYPE); |
1767 | ||
1768 | sin->sin_port = inp->inp_lport; | |
1769 | sin->sin_addr = inp->inp_laddr; | |
1770 | return (0); | |
1771 | } | |
1772 | ||
1773 | int | |
1774 | in_getpeeraddr(struct socket *so, struct sockaddr **nam) | |
1775 | { | |
1776 | struct inpcb *inp; | |
1777 | struct sockaddr_in *sin; | |
1778 | ||
1779 | /* | |
1780 | * Do the malloc first in case it blocks. | |
1781 | */ | |
1782 | MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK); | |
1783 | if (sin == NULL) | |
1784 | return (ENOBUFS); | |
1785 | bzero((caddr_t)sin, sizeof (*sin)); | |
1786 | sin->sin_family = AF_INET; | |
1787 | sin->sin_len = sizeof (*sin); | |
1788 | ||
1789 | if ((inp = sotoinpcb(so)) == NULL) { | |
1c79356b | 1790 | FREE(sin, M_SONAME); |
39236c6e | 1791 | return (EINVAL); |
1c79356b A |
1792 | } |
1793 | sin->sin_port = inp->inp_fport; | |
1794 | sin->sin_addr = inp->inp_faddr; | |
1c79356b A |
1795 | |
1796 | *nam = (struct sockaddr *)sin; | |
39236c6e A |
1797 | return (0); |
1798 | } | |
1799 | ||
1800 | int | |
1801 | in_getpeeraddr_s(struct socket *so, struct sockaddr_storage *ss) | |
1802 | { | |
1803 | struct sockaddr_in *sin = SIN(ss); | |
1804 | struct inpcb *inp; | |
1805 | ||
1806 | VERIFY(ss != NULL); | |
1807 | bzero(ss, sizeof (*ss)); | |
1808 | ||
1809 | sin->sin_family = AF_INET; | |
1810 | sin->sin_len = sizeof (*sin); | |
1811 | ||
fe8ab488 A |
1812 | if ((inp = sotoinpcb(so)) == NULL |
1813 | #if NECP | |
1814 | || (necp_socket_should_use_flow_divert(inp)) | |
1815 | #endif /* NECP */ | |
1816 | ) { | |
39236c6e A |
1817 | return (inp == NULL ? EINVAL : EPROTOTYPE); |
1818 | } | |
1819 | ||
1820 | sin->sin_port = inp->inp_fport; | |
1821 | sin->sin_addr = inp->inp_faddr; | |
1822 | return (0); | |
1c79356b A |
1823 | } |
1824 | ||
1c79356b | 1825 | void |
2d21ac55 | 1826 | in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr, |
39236c6e | 1827 | int errno, void (*notify)(struct inpcb *, int)) |
1c79356b | 1828 | { |
91447636 A |
1829 | struct inpcb *inp; |
1830 | ||
39236c6e | 1831 | lck_rw_lock_shared(pcbinfo->ipi_lock); |
1c79356b | 1832 | |
39236c6e | 1833 | LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) { |
9bccf70c | 1834 | #if INET6 |
39236c6e | 1835 | if (!(inp->inp_vflag & INP_IPV4)) |
1c79356b | 1836 | continue; |
39236c6e | 1837 | #endif /* INET6 */ |
1c79356b | 1838 | if (inp->inp_faddr.s_addr != faddr.s_addr || |
9bccf70c | 1839 | inp->inp_socket == NULL) |
39236c6e A |
1840 | continue; |
1841 | if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) | |
91447636 A |
1842 | continue; |
1843 | socket_lock(inp->inp_socket, 1); | |
9bccf70c | 1844 | (*notify)(inp, errno); |
39236c6e | 1845 | (void) in_pcb_checkstate(inp, WNT_RELEASE, 1); |
91447636 | 1846 | socket_unlock(inp->inp_socket, 1); |
1c79356b | 1847 | } |
39236c6e | 1848 | lck_rw_done(pcbinfo->ipi_lock); |
1c79356b A |
1849 | } |
1850 | ||
1851 | /* | |
1852 | * Check for alternatives when higher level complains | |
1853 | * about service problems. For now, invalidate cached | |
1854 | * routing information. If the route was created dynamically | |
1855 | * (by a redirect), time to try a default gateway again. | |
1856 | */ | |
1857 | void | |
2d21ac55 | 1858 | in_losing(struct inpcb *inp) |
1c79356b | 1859 | { |
39236c6e | 1860 | boolean_t release = FALSE; |
2d21ac55 | 1861 | struct rtentry *rt; |
1c79356b | 1862 | |
b0d623f7 | 1863 | if ((rt = inp->inp_route.ro_rt) != NULL) { |
39236c6e | 1864 | struct in_ifaddr *ia = NULL; |
b0d623f7 | 1865 | |
b0d623f7 | 1866 | RT_LOCK(rt); |
b0d623f7 A |
1867 | if (rt->rt_flags & RTF_DYNAMIC) { |
1868 | /* | |
1869 | * Prevent another thread from modifying rt_key, | |
1870 | * rt_gateway via rt_setgate() after rt_lock is | |
1871 | * dropped by marking the route as defunct. | |
1872 | */ | |
1873 | rt->rt_flags |= RTF_CONDEMNED; | |
1874 | RT_UNLOCK(rt); | |
1875 | (void) rtrequest(RTM_DELETE, rt_key(rt), | |
39236c6e | 1876 | rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL); |
b0d623f7 A |
1877 | } else { |
1878 | RT_UNLOCK(rt); | |
1879 | } | |
2d21ac55 | 1880 | /* if the address is gone keep the old route in the pcb */ |
39236c6e A |
1881 | if (inp->inp_laddr.s_addr != INADDR_ANY && |
1882 | (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) { | |
1883 | /* | |
1884 | * Address is around; ditch the route. A new route | |
1885 | * can be allocated the next time output is attempted. | |
1886 | */ | |
1887 | release = TRUE; | |
2d21ac55 | 1888 | } |
39236c6e A |
1889 | if (ia != NULL) |
1890 | IFA_REMREF(&ia->ia_ifa); | |
1c79356b | 1891 | } |
39236c6e A |
1892 | if (rt == NULL || release) |
1893 | ROUTE_RELEASE(&inp->inp_route); | |
1c79356b A |
1894 | } |
1895 | ||
1896 | /* | |
1897 | * After a routing change, flush old routing | |
1898 | * and allocate a (hopefully) better one. | |
1899 | */ | |
9bccf70c | 1900 | void |
39236c6e | 1901 | in_rtchange(struct inpcb *inp, int errno) |
1c79356b | 1902 | { |
39236c6e A |
1903 | #pragma unused(errno) |
1904 | boolean_t release = FALSE; | |
2d21ac55 A |
1905 | struct rtentry *rt; |
1906 | ||
1907 | if ((rt = inp->inp_route.ro_rt) != NULL) { | |
39236c6e | 1908 | struct in_ifaddr *ia = NULL; |
b0d623f7 | 1909 | |
39236c6e A |
1910 | /* if address is gone, keep the old route */ |
1911 | if (inp->inp_laddr.s_addr != INADDR_ANY && | |
1912 | (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) { | |
1913 | /* | |
1914 | * Address is around; ditch the route. A new route | |
1915 | * can be allocated the next time output is attempted. | |
1916 | */ | |
1917 | release = TRUE; | |
2d21ac55 | 1918 | } |
39236c6e A |
1919 | if (ia != NULL) |
1920 | IFA_REMREF(&ia->ia_ifa); | |
1c79356b | 1921 | } |
39236c6e A |
1922 | if (rt == NULL || release) |
1923 | ROUTE_RELEASE(&inp->inp_route); | |
1c79356b A |
1924 | } |
1925 | ||
1926 | /* | |
1927 | * Lookup a PCB based on the local address and port. | |
1928 | */ | |
1929 | struct inpcb * | |
2d21ac55 | 1930 | in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr, |
39236c6e | 1931 | unsigned int lport_arg, int wild_okay) |
1c79356b | 1932 | { |
2d21ac55 | 1933 | struct inpcb *inp; |
1c79356b A |
1934 | int matchwild = 3, wildcard; |
1935 | u_short lport = lport_arg; | |
1936 | ||
39236c6e | 1937 | KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0, 0, 0, 0, 0); |
1c79356b A |
1938 | |
1939 | if (!wild_okay) { | |
1940 | struct inpcbhead *head; | |
1941 | /* | |
1942 | * Look for an unconnected (wildcard foreign addr) PCB that | |
1943 | * matches the local address and port we're looking for. | |
1944 | */ | |
39236c6e A |
1945 | head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, |
1946 | pcbinfo->ipi_hashmask)]; | |
9bccf70c A |
1947 | LIST_FOREACH(inp, head, inp_hash) { |
1948 | #if INET6 | |
39236c6e | 1949 | if (!(inp->inp_vflag & INP_IPV4)) |
1c79356b | 1950 | continue; |
39236c6e | 1951 | #endif /* INET6 */ |
1c79356b A |
1952 | if (inp->inp_faddr.s_addr == INADDR_ANY && |
1953 | inp->inp_laddr.s_addr == laddr.s_addr && | |
1954 | inp->inp_lport == lport) { | |
1955 | /* | |
1956 | * Found. | |
1957 | */ | |
1958 | return (inp); | |
1959 | } | |
1960 | } | |
1961 | /* | |
1962 | * Not found. | |
1963 | */ | |
39236c6e | 1964 | KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0, 0, 0, 0, 0); |
1c79356b A |
1965 | return (NULL); |
1966 | } else { | |
1967 | struct inpcbporthead *porthash; | |
1968 | struct inpcbport *phd; | |
1969 | struct inpcb *match = NULL; | |
1970 | /* | |
1971 | * Best fit PCB lookup. | |
1972 | * | |
1973 | * First see if this local port is in use by looking on the | |
1974 | * port hash list. | |
1975 | */ | |
39236c6e A |
1976 | porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport, |
1977 | pcbinfo->ipi_porthashmask)]; | |
9bccf70c | 1978 | LIST_FOREACH(phd, porthash, phd_hash) { |
1c79356b A |
1979 | if (phd->phd_port == lport) |
1980 | break; | |
1981 | } | |
1982 | if (phd != NULL) { | |
1983 | /* | |
1984 | * Port is in use by one or more PCBs. Look for best | |
1985 | * fit. | |
1986 | */ | |
9bccf70c | 1987 | LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { |
1c79356b | 1988 | wildcard = 0; |
9bccf70c | 1989 | #if INET6 |
39236c6e | 1990 | if (!(inp->inp_vflag & INP_IPV4)) |
1c79356b | 1991 | continue; |
39236c6e | 1992 | #endif /* INET6 */ |
1c79356b A |
1993 | if (inp->inp_faddr.s_addr != INADDR_ANY) |
1994 | wildcard++; | |
1995 | if (inp->inp_laddr.s_addr != INADDR_ANY) { | |
1996 | if (laddr.s_addr == INADDR_ANY) | |
1997 | wildcard++; | |
39236c6e A |
1998 | else if (inp->inp_laddr.s_addr != |
1999 | laddr.s_addr) | |
1c79356b A |
2000 | continue; |
2001 | } else { | |
2002 | if (laddr.s_addr != INADDR_ANY) | |
2003 | wildcard++; | |
2004 | } | |
2005 | if (wildcard < matchwild) { | |
2006 | match = inp; | |
2007 | matchwild = wildcard; | |
2008 | if (matchwild == 0) { | |
2009 | break; | |
2010 | } | |
2011 | } | |
2012 | } | |
2013 | } | |
39236c6e A |
2014 | KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match, |
2015 | 0, 0, 0, 0); | |
1c79356b A |
2016 | return (match); |
2017 | } | |
2018 | } | |
2019 | ||
6d2010ae A |
2020 | /* |
2021 | * Check if PCB exists in hash list. | |
2022 | */ | |
2023 | int | |
39236c6e A |
2024 | in_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in_addr faddr, |
2025 | u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard, | |
2026 | uid_t *uid, gid_t *gid, struct ifnet *ifp) | |
6d2010ae A |
2027 | { |
2028 | struct inpcbhead *head; | |
2029 | struct inpcb *inp; | |
2030 | u_short fport = fport_arg, lport = lport_arg; | |
39236c6e A |
2031 | int found = 0; |
2032 | struct inpcb *local_wild = NULL; | |
2033 | #if INET6 | |
2034 | struct inpcb *local_wild_mapped = NULL; | |
2035 | #endif /* INET6 */ | |
6d2010ae A |
2036 | |
2037 | *uid = UID_MAX; | |
2038 | *gid = GID_MAX; | |
316670eb | 2039 | |
6d2010ae A |
2040 | /* |
2041 | * We may have found the pcb in the last lookup - check this first. | |
2042 | */ | |
2043 | ||
39236c6e | 2044 | lck_rw_lock_shared(pcbinfo->ipi_lock); |
6d2010ae A |
2045 | |
2046 | /* | |
2047 | * First look for an exact match. | |
2048 | */ | |
39236c6e A |
2049 | head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, |
2050 | pcbinfo->ipi_hashmask)]; | |
6d2010ae A |
2051 | LIST_FOREACH(inp, head, inp_hash) { |
2052 | #if INET6 | |
39236c6e | 2053 | if (!(inp->inp_vflag & INP_IPV4)) |
6d2010ae | 2054 | continue; |
39236c6e | 2055 | #endif /* INET6 */ |
fe8ab488 | 2056 | if (inp_restricted_recv(inp, ifp)) |
316670eb A |
2057 | continue; |
2058 | ||
6d2010ae A |
2059 | if (inp->inp_faddr.s_addr == faddr.s_addr && |
2060 | inp->inp_laddr.s_addr == laddr.s_addr && | |
2061 | inp->inp_fport == fport && | |
2062 | inp->inp_lport == lport) { | |
2063 | if ((found = (inp->inp_socket != NULL))) { | |
2064 | /* | |
2065 | * Found. | |
2066 | */ | |
316670eb A |
2067 | *uid = kauth_cred_getuid( |
2068 | inp->inp_socket->so_cred); | |
2069 | *gid = kauth_cred_getgid( | |
2070 | inp->inp_socket->so_cred); | |
6d2010ae | 2071 | } |
39236c6e | 2072 | lck_rw_done(pcbinfo->ipi_lock); |
6d2010ae A |
2073 | return (found); |
2074 | } | |
2075 | } | |
6d2010ae | 2076 | |
39236c6e A |
2077 | if (!wildcard) { |
2078 | /* | |
2079 | * Not found. | |
2080 | */ | |
2081 | lck_rw_done(pcbinfo->ipi_lock); | |
2082 | return (0); | |
2083 | } | |
316670eb | 2084 | |
39236c6e A |
2085 | head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, |
2086 | pcbinfo->ipi_hashmask)]; | |
2087 | LIST_FOREACH(inp, head, inp_hash) { | |
6d2010ae | 2088 | #if INET6 |
39236c6e A |
2089 | if (!(inp->inp_vflag & INP_IPV4)) |
2090 | continue; | |
6d2010ae | 2091 | #endif /* INET6 */ |
fe8ab488 | 2092 | if (inp_restricted_recv(inp, ifp)) |
39236c6e A |
2093 | continue; |
2094 | ||
2095 | if (inp->inp_faddr.s_addr == INADDR_ANY && | |
2096 | inp->inp_lport == lport) { | |
2097 | if (inp->inp_laddr.s_addr == laddr.s_addr) { | |
2098 | if ((found = (inp->inp_socket != NULL))) { | |
316670eb | 2099 | *uid = kauth_cred_getuid( |
39236c6e | 2100 | inp->inp_socket->so_cred); |
316670eb | 2101 | *gid = kauth_cred_getgid( |
39236c6e | 2102 | inp->inp_socket->so_cred); |
6d2010ae | 2103 | } |
39236c6e | 2104 | lck_rw_done(pcbinfo->ipi_lock); |
6d2010ae | 2105 | return (found); |
39236c6e A |
2106 | } else if (inp->inp_laddr.s_addr == INADDR_ANY) { |
2107 | #if INET6 | |
2108 | if (inp->inp_socket && | |
2109 | SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) | |
2110 | local_wild_mapped = inp; | |
2111 | else | |
6d2010ae | 2112 | #endif /* INET6 */ |
39236c6e A |
2113 | local_wild = inp; |
2114 | } | |
6d2010ae | 2115 | } |
39236c6e A |
2116 | } |
2117 | if (local_wild == NULL) { | |
2118 | #if INET6 | |
2119 | if (local_wild_mapped != NULL) { | |
2120 | if ((found = (local_wild_mapped->inp_socket != NULL))) { | |
316670eb | 2121 | *uid = kauth_cred_getuid( |
39236c6e | 2122 | local_wild_mapped->inp_socket->so_cred); |
316670eb | 2123 | *gid = kauth_cred_getgid( |
39236c6e | 2124 | local_wild_mapped->inp_socket->so_cred); |
6d2010ae | 2125 | } |
39236c6e | 2126 | lck_rw_done(pcbinfo->ipi_lock); |
6d2010ae A |
2127 | return (found); |
2128 | } | |
39236c6e A |
2129 | #endif /* INET6 */ |
2130 | lck_rw_done(pcbinfo->ipi_lock); | |
2131 | return (0); | |
6d2010ae | 2132 | } |
39236c6e A |
2133 | if ((found = (local_wild->inp_socket != NULL))) { |
2134 | *uid = kauth_cred_getuid( | |
2135 | local_wild->inp_socket->so_cred); | |
2136 | *gid = kauth_cred_getgid( | |
2137 | local_wild->inp_socket->so_cred); | |
2138 | } | |
2139 | lck_rw_done(pcbinfo->ipi_lock); | |
2140 | return (found); | |
6d2010ae A |
2141 | } |
2142 | ||
1c79356b A |
2143 | /* |
2144 | * Lookup PCB in hash list. | |
2145 | */ | |
2146 | struct inpcb * | |
39236c6e A |
2147 | in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr, |
2148 | u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard, | |
2149 | struct ifnet *ifp) | |
1c79356b A |
2150 | { |
2151 | struct inpcbhead *head; | |
2d21ac55 | 2152 | struct inpcb *inp; |
1c79356b | 2153 | u_short fport = fport_arg, lport = lport_arg; |
39236c6e A |
2154 | struct inpcb *local_wild = NULL; |
2155 | #if INET6 | |
2156 | struct inpcb *local_wild_mapped = NULL; | |
2157 | #endif /* INET6 */ | |
1c79356b A |
2158 | |
2159 | /* | |
2160 | * We may have found the pcb in the last lookup - check this first. | |
2161 | */ | |
2162 | ||
39236c6e | 2163 | lck_rw_lock_shared(pcbinfo->ipi_lock); |
1c79356b A |
2164 | |
2165 | /* | |
2166 | * First look for an exact match. | |
2167 | */ | |
39236c6e A |
2168 | head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, |
2169 | pcbinfo->ipi_hashmask)]; | |
9bccf70c A |
2170 | LIST_FOREACH(inp, head, inp_hash) { |
2171 | #if INET6 | |
39236c6e | 2172 | if (!(inp->inp_vflag & INP_IPV4)) |
1c79356b | 2173 | continue; |
39236c6e | 2174 | #endif /* INET6 */ |
fe8ab488 | 2175 | if (inp_restricted_recv(inp, ifp)) |
316670eb A |
2176 | continue; |
2177 | ||
1c79356b A |
2178 | if (inp->inp_faddr.s_addr == faddr.s_addr && |
2179 | inp->inp_laddr.s_addr == laddr.s_addr && | |
2180 | inp->inp_fport == fport && | |
2181 | inp->inp_lport == lport) { | |
2182 | /* | |
2183 | * Found. | |
2184 | */ | |
39236c6e A |
2185 | if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != |
2186 | WNT_STOPUSING) { | |
2187 | lck_rw_done(pcbinfo->ipi_lock); | |
91447636 | 2188 | return (inp); |
39236c6e A |
2189 | } else { |
2190 | /* it's there but dead, say it isn't found */ | |
2191 | lck_rw_done(pcbinfo->ipi_lock); | |
316670eb | 2192 | return (NULL); |
91447636 | 2193 | } |
1c79356b A |
2194 | } |
2195 | } | |
1c79356b | 2196 | |
39236c6e A |
2197 | if (!wildcard) { |
2198 | /* | |
2199 | * Not found. | |
2200 | */ | |
2201 | lck_rw_done(pcbinfo->ipi_lock); | |
2202 | return (NULL); | |
2203 | } | |
2204 | ||
2205 | head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, | |
2206 | pcbinfo->ipi_hashmask)]; | |
2207 | LIST_FOREACH(inp, head, inp_hash) { | |
9bccf70c | 2208 | #if INET6 |
39236c6e A |
2209 | if (!(inp->inp_vflag & INP_IPV4)) |
2210 | continue; | |
2211 | #endif /* INET6 */ | |
fe8ab488 | 2212 | if (inp_restricted_recv(inp, ifp)) |
39236c6e A |
2213 | continue; |
2214 | ||
2215 | if (inp->inp_faddr.s_addr == INADDR_ANY && | |
2216 | inp->inp_lport == lport) { | |
2217 | if (inp->inp_laddr.s_addr == laddr.s_addr) { | |
2218 | if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != | |
2219 | WNT_STOPUSING) { | |
2220 | lck_rw_done(pcbinfo->ipi_lock); | |
2221 | return (inp); | |
2222 | } else { | |
2223 | /* it's dead; say it isn't found */ | |
2224 | lck_rw_done(pcbinfo->ipi_lock); | |
2225 | return (NULL); | |
91447636 | 2226 | } |
39236c6e | 2227 | } else if (inp->inp_laddr.s_addr == INADDR_ANY) { |
2d21ac55 | 2228 | #if INET6 |
39236c6e A |
2229 | if (SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) |
2230 | local_wild_mapped = inp; | |
2231 | else | |
2d21ac55 | 2232 | #endif /* INET6 */ |
1c79356b | 2233 | local_wild = inp; |
1c79356b A |
2234 | } |
2235 | } | |
39236c6e A |
2236 | } |
2237 | if (local_wild == NULL) { | |
2d21ac55 | 2238 | #if INET6 |
39236c6e A |
2239 | if (local_wild_mapped != NULL) { |
2240 | if (in_pcb_checkstate(local_wild_mapped, | |
2241 | WNT_ACQUIRE, 0) != WNT_STOPUSING) { | |
2242 | lck_rw_done(pcbinfo->ipi_lock); | |
2243 | return (local_wild_mapped); | |
2244 | } else { | |
2245 | /* it's dead; say it isn't found */ | |
2246 | lck_rw_done(pcbinfo->ipi_lock); | |
2247 | return (NULL); | |
91447636 | 2248 | } |
91447636 | 2249 | } |
39236c6e A |
2250 | #endif /* INET6 */ |
2251 | lck_rw_done(pcbinfo->ipi_lock); | |
2252 | return (NULL); | |
2253 | } | |
2254 | if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) { | |
2255 | lck_rw_done(pcbinfo->ipi_lock); | |
2256 | return (local_wild); | |
1c79356b | 2257 | } |
1c79356b | 2258 | /* |
39236c6e | 2259 | * It's either not found or is already dead. |
1c79356b | 2260 | */ |
39236c6e | 2261 | lck_rw_done(pcbinfo->ipi_lock); |
1c79356b A |
2262 | return (NULL); |
2263 | } | |
2264 | ||
2265 | /* | |
4bd07ac2 A |
2266 | * @brief Insert PCB onto various hash lists. |
2267 | * | |
2268 | * @param inp Pointer to internet protocol control block | |
2269 | * @param locked Implies if ipi_lock (protecting pcb list) | |
2270 | * is already locked or not. | |
2271 | * | |
2272 | * @return int error on failure and 0 on success | |
1c79356b A |
2273 | */ |
2274 | int | |
2d21ac55 | 2275 | in_pcbinshash(struct inpcb *inp, int locked) |
1c79356b A |
2276 | { |
2277 | struct inpcbhead *pcbhash; | |
2278 | struct inpcbporthead *pcbporthash; | |
2279 | struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; | |
2280 | struct inpcbport *phd; | |
2281 | u_int32_t hashkey_faddr; | |
2282 | ||
39236c6e A |
2283 | if (!locked) { |
2284 | if (!lck_rw_try_lock_exclusive(pcbinfo->ipi_lock)) { | |
2285 | /* | |
2286 | * Lock inversion issue, mostly with udp | |
2287 | * multicast packets | |
2288 | */ | |
2289 | socket_unlock(inp->inp_socket, 0); | |
2290 | lck_rw_lock_exclusive(pcbinfo->ipi_lock); | |
2291 | socket_lock(inp->inp_socket, 0); | |
39236c6e A |
2292 | } |
2293 | } | |
b0d623f7 | 2294 | |
4bd07ac2 A |
2295 | /* |
2296 | * This routine or its caller may have given up | |
2297 | * socket's protocol lock briefly. | |
2298 | * During that time the socket may have been dropped. | |
2299 | * Safe-guarding against that. | |
2300 | */ | |
2301 | if (inp->inp_state == INPCB_STATE_DEAD) { | |
2302 | if (!locked) { | |
2303 | lck_rw_done(pcbinfo->ipi_lock); | |
2304 | } | |
2305 | return (ECONNABORTED); | |
2306 | } | |
2307 | ||
2308 | ||
1c79356b A |
2309 | #if INET6 |
2310 | if (inp->inp_vflag & INP_IPV6) | |
2311 | hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; | |
2312 | else | |
2313 | #endif /* INET6 */ | |
39236c6e | 2314 | hashkey_faddr = inp->inp_faddr.s_addr; |
1c79356b | 2315 | |
39236c6e A |
2316 | inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport, |
2317 | inp->inp_fport, pcbinfo->ipi_hashmask); | |
91447636 | 2318 | |
39236c6e | 2319 | pcbhash = &pcbinfo->ipi_hashbase[inp->inp_hash_element]; |
1c79356b | 2320 | |
39236c6e A |
2321 | pcbporthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(inp->inp_lport, |
2322 | pcbinfo->ipi_porthashmask)]; | |
1c79356b A |
2323 | |
2324 | /* | |
2325 | * Go through port list and look for a head for this lport. | |
2326 | */ | |
9bccf70c | 2327 | LIST_FOREACH(phd, pcbporthash, phd_hash) { |
1c79356b A |
2328 | if (phd->phd_port == inp->inp_lport) |
2329 | break; | |
2330 | } | |
316670eb | 2331 | |
1c79356b A |
2332 | /* |
2333 | * If none exists, malloc one and tack it on. | |
2334 | */ | |
2335 | if (phd == NULL) { | |
39236c6e A |
2336 | MALLOC(phd, struct inpcbport *, sizeof (struct inpcbport), |
2337 | M_PCB, M_WAITOK); | |
1c79356b | 2338 | if (phd == NULL) { |
91447636 | 2339 | if (!locked) |
39236c6e | 2340 | lck_rw_done(pcbinfo->ipi_lock); |
1c79356b A |
2341 | return (ENOBUFS); /* XXX */ |
2342 | } | |
2343 | phd->phd_port = inp->inp_lport; | |
2344 | LIST_INIT(&phd->phd_pcblist); | |
2345 | LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); | |
2346 | } | |
fe8ab488 A |
2347 | |
2348 | VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST)); | |
1c79356b A |
2349 | inp->inp_phd = phd; |
2350 | LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); | |
2351 | LIST_INSERT_HEAD(pcbhash, inp, inp_hash); | |
fe8ab488 A |
2352 | inp->inp_flags2 |= INP2_INHASHLIST; |
2353 | ||
91447636 | 2354 | if (!locked) |
39236c6e | 2355 | lck_rw_done(pcbinfo->ipi_lock); |
39037602 | 2356 | |
fe8ab488 A |
2357 | #if NECP |
2358 | // This call catches the original setting of the local address | |
2359 | inp_update_necp_policy(inp, NULL, NULL, 0); | |
2360 | #endif /* NECP */ | |
39037602 | 2361 | |
1c79356b A |
2362 | return (0); |
2363 | } | |
2364 | ||
2365 | /* | |
2366 | * Move PCB to the proper hash bucket when { faddr, fport } have been | |
2367 | * changed. NOTE: This does not handle the case of the lport changing (the | |
2368 | * hashed port list would have to be updated as well), so the lport must | |
2369 | * not change after in_pcbinshash() has been called. | |
2370 | */ | |
2371 | void | |
2d21ac55 | 2372 | in_pcbrehash(struct inpcb *inp) |
1c79356b A |
2373 | { |
2374 | struct inpcbhead *head; | |
2375 | u_int32_t hashkey_faddr; | |
2376 | ||
2377 | #if INET6 | |
2378 | if (inp->inp_vflag & INP_IPV6) | |
2379 | hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; | |
2380 | else | |
2381 | #endif /* INET6 */ | |
39236c6e A |
2382 | hashkey_faddr = inp->inp_faddr.s_addr; |
2383 | ||
2384 | inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport, | |
2385 | inp->inp_fport, inp->inp_pcbinfo->ipi_hashmask); | |
2386 | head = &inp->inp_pcbinfo->ipi_hashbase[inp->inp_hash_element]; | |
1c79356b | 2387 | |
fe8ab488 A |
2388 | if (inp->inp_flags2 & INP2_INHASHLIST) { |
2389 | LIST_REMOVE(inp, inp_hash); | |
2390 | inp->inp_flags2 &= ~INP2_INHASHLIST; | |
2391 | } | |
2392 | ||
2393 | VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST)); | |
1c79356b | 2394 | LIST_INSERT_HEAD(head, inp, inp_hash); |
fe8ab488 | 2395 | inp->inp_flags2 |= INP2_INHASHLIST; |
39037602 | 2396 | |
fe8ab488 A |
2397 | #if NECP |
2398 | // This call catches updates to the remote addresses | |
2399 | inp_update_necp_policy(inp, NULL, NULL, 0); | |
2400 | #endif /* NECP */ | |
1c79356b A |
2401 | } |
2402 | ||
2403 | /* | |
2404 | * Remove PCB from various lists. | |
316670eb | 2405 | * Must be called pcbinfo lock is held in exclusive mode. |
1c79356b A |
2406 | */ |
2407 | void | |
2d21ac55 | 2408 | in_pcbremlists(struct inpcb *inp) |
1c79356b A |
2409 | { |
2410 | inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt; | |
1c79356b | 2411 | |
fe8ab488 A |
2412 | /* |
2413 | * Check if it's in hashlist -- an inp is placed in hashlist when | |
39037602 | 2414 | * it's local port gets assigned. So it should also be present |
fe8ab488 A |
2415 | * in the port list. |
2416 | */ | |
2417 | if (inp->inp_flags2 & INP2_INHASHLIST) { | |
1c79356b A |
2418 | struct inpcbport *phd = inp->inp_phd; |
2419 | ||
fe8ab488 A |
2420 | VERIFY(phd != NULL && inp->inp_lport > 0); |
2421 | ||
1c79356b | 2422 | LIST_REMOVE(inp, inp_hash); |
fe8ab488 A |
2423 | inp->inp_hash.le_next = NULL; |
2424 | inp->inp_hash.le_prev = NULL; | |
2425 | ||
1c79356b | 2426 | LIST_REMOVE(inp, inp_portlist); |
fe8ab488 A |
2427 | inp->inp_portlist.le_next = NULL; |
2428 | inp->inp_portlist.le_prev = NULL; | |
2429 | if (LIST_EMPTY(&phd->phd_pcblist)) { | |
1c79356b A |
2430 | LIST_REMOVE(phd, phd_hash); |
2431 | FREE(phd, M_PCB); | |
2432 | } | |
fe8ab488 A |
2433 | inp->inp_phd = NULL; |
2434 | inp->inp_flags2 &= ~INP2_INHASHLIST; | |
1c79356b | 2435 | } |
fe8ab488 | 2436 | VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST)); |
39236c6e A |
2437 | |
2438 | if (inp->inp_flags2 & INP2_TIMEWAIT) { | |
2439 | /* Remove from time-wait queue */ | |
2440 | tcp_remove_from_time_wait(inp); | |
2441 | inp->inp_flags2 &= ~INP2_TIMEWAIT; | |
2442 | VERIFY(inp->inp_pcbinfo->ipi_twcount != 0); | |
2443 | inp->inp_pcbinfo->ipi_twcount--; | |
2444 | } else { | |
2445 | /* Remove from global inp list if it is not time-wait */ | |
2446 | LIST_REMOVE(inp, inp_list); | |
2447 | } | |
316670eb | 2448 | |
bd504ef0 | 2449 | if (inp->inp_flags2 & INP2_IN_FCTREE) { |
39236c6e | 2450 | inp_fc_getinp(inp->inp_flowhash, (INPFC_SOLOCKED|INPFC_REMOVE)); |
bd504ef0 A |
2451 | VERIFY(!(inp->inp_flags2 & INP2_IN_FCTREE)); |
2452 | } | |
39236c6e | 2453 | |
1c79356b A |
2454 | inp->inp_pcbinfo->ipi_count--; |
2455 | } | |
2456 | ||
39236c6e A |
2457 | /* |
2458 | * Mechanism used to defer the memory release of PCBs | |
2459 | * The pcb list will contain the pcb until the reaper can clean it up if | |
2460 | * the following conditions are met: | |
2461 | * 1) state "DEAD", | |
2462 | * 2) wantcnt is STOPUSING | |
2463 | * 3) usecount is 0 | |
91447636 | 2464 | * This function will be called to either mark the pcb as |
39236c6e | 2465 | */ |
91447636 A |
2466 | int |
2467 | in_pcb_checkstate(struct inpcb *pcb, int mode, int locked) | |
91447636 | 2468 | { |
39236c6e | 2469 | volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt; |
2d21ac55 A |
2470 | UInt32 origwant; |
2471 | UInt32 newwant; | |
91447636 A |
2472 | |
2473 | switch (mode) { | |
39236c6e A |
2474 | case WNT_STOPUSING: |
2475 | /* | |
2476 | * Try to mark the pcb as ready for recycling. CAS with | |
2477 | * STOPUSING, if success we're good, if it's in use, will | |
2478 | * be marked later | |
2479 | */ | |
2480 | if (locked == 0) | |
2481 | socket_lock(pcb->inp_socket, 1); | |
2482 | pcb->inp_state = INPCB_STATE_DEAD; | |
91447636 | 2483 | |
39236c6e A |
2484 | stopusing: |
2485 | if (pcb->inp_socket->so_usecount < 0) { | |
2486 | panic("%s: pcb=%p so=%p usecount is negative\n", | |
2487 | __func__, pcb, pcb->inp_socket); | |
2488 | /* NOTREACHED */ | |
2489 | } | |
2490 | if (locked == 0) | |
2491 | socket_unlock(pcb->inp_socket, 1); | |
91447636 | 2492 | |
39236c6e | 2493 | inpcb_gc_sched(pcb->inp_pcbinfo, INPCB_TIMER_FAST); |
6d2010ae | 2494 | |
39236c6e A |
2495 | origwant = *wantcnt; |
2496 | if ((UInt16) origwant == 0xffff) /* should stop using */ | |
2497 | return (WNT_STOPUSING); | |
2498 | newwant = 0xffff; | |
2499 | if ((UInt16) origwant == 0) { | |
2500 | /* try to mark it as unsuable now */ | |
2501 | OSCompareAndSwap(origwant, newwant, wantcnt); | |
2502 | } | |
2503 | return (WNT_STOPUSING); | |
91447636 | 2504 | |
39236c6e A |
2505 | case WNT_ACQUIRE: |
2506 | /* | |
2507 | * Try to increase reference to pcb. If WNT_STOPUSING | |
2508 | * should bail out. If socket state DEAD, try to set count | |
2509 | * to STOPUSING, return failed otherwise increase cnt. | |
2510 | */ | |
2511 | do { | |
91447636 | 2512 | origwant = *wantcnt; |
39236c6e A |
2513 | if ((UInt16) origwant == 0xffff) { |
2514 | /* should stop using */ | |
91447636 | 2515 | return (WNT_STOPUSING); |
91447636 | 2516 | } |
39236c6e A |
2517 | newwant = origwant + 1; |
2518 | } while (!OSCompareAndSwap(origwant, newwant, wantcnt)); | |
2519 | return (WNT_ACQUIRE); | |
91447636 | 2520 | |
39236c6e A |
2521 | case WNT_RELEASE: |
2522 | /* | |
2523 | * Release reference. If result is null and pcb state | |
2524 | * is DEAD, set wanted bit to STOPUSING | |
2525 | */ | |
2526 | if (locked == 0) | |
2527 | socket_lock(pcb->inp_socket, 1); | |
91447636 | 2528 | |
39236c6e A |
2529 | do { |
2530 | origwant = *wantcnt; | |
2531 | if ((UInt16) origwant == 0x0) { | |
2532 | panic("%s: pcb=%p release with zero count", | |
2533 | __func__, pcb); | |
2534 | /* NOTREACHED */ | |
2535 | } | |
2536 | if ((UInt16) origwant == 0xffff) { | |
2537 | /* should stop using */ | |
2538 | if (locked == 0) | |
2539 | socket_unlock(pcb->inp_socket, 1); | |
2540 | return (WNT_STOPUSING); | |
2541 | } | |
2542 | newwant = origwant - 1; | |
2543 | } while (!OSCompareAndSwap(origwant, newwant, wantcnt)); | |
2544 | ||
2545 | if (pcb->inp_state == INPCB_STATE_DEAD) | |
2546 | goto stopusing; | |
2547 | if (pcb->inp_socket->so_usecount < 0) { | |
2548 | panic("%s: RELEASE pcb=%p so=%p usecount is negative\n", | |
2549 | __func__, pcb, pcb->inp_socket); | |
2550 | /* NOTREACHED */ | |
2551 | } | |
91447636 | 2552 | |
39236c6e A |
2553 | if (locked == 0) |
2554 | socket_unlock(pcb->inp_socket, 1); | |
2555 | return (WNT_RELEASE); | |
91447636 | 2556 | |
39236c6e A |
2557 | default: |
2558 | panic("%s: so=%p not a valid state =%x\n", __func__, | |
2559 | pcb->inp_socket, mode); | |
2560 | /* NOTREACHED */ | |
91447636 A |
2561 | } |
2562 | ||
2563 | /* NOTREACHED */ | |
2564 | return (mode); | |
2565 | } | |
2566 | ||
2567 | /* | |
2568 | * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat. | |
2569 | * The inpcb_compat data structure is passed to user space and must | |
b0d623f7 | 2570 | * not change. We intentionally avoid copying pointers. |
91447636 A |
2571 | */ |
2572 | void | |
39236c6e | 2573 | inpcb_to_compat(struct inpcb *inp, struct inpcb_compat *inp_compat) |
91447636 | 2574 | { |
39236c6e | 2575 | bzero(inp_compat, sizeof (*inp_compat)); |
91447636 A |
2576 | inp_compat->inp_fport = inp->inp_fport; |
2577 | inp_compat->inp_lport = inp->inp_lport; | |
316670eb | 2578 | inp_compat->nat_owner = 0; |
39236c6e | 2579 | inp_compat->nat_cookie = 0; |
91447636 A |
2580 | inp_compat->inp_gencnt = inp->inp_gencnt; |
2581 | inp_compat->inp_flags = inp->inp_flags; | |
2582 | inp_compat->inp_flow = inp->inp_flow; | |
2583 | inp_compat->inp_vflag = inp->inp_vflag; | |
2584 | inp_compat->inp_ip_ttl = inp->inp_ip_ttl; | |
2585 | inp_compat->inp_ip_p = inp->inp_ip_p; | |
39236c6e A |
2586 | inp_compat->inp_dependfaddr.inp6_foreign = |
2587 | inp->inp_dependfaddr.inp6_foreign; | |
2588 | inp_compat->inp_dependladdr.inp6_local = | |
2589 | inp->inp_dependladdr.inp6_local; | |
91447636 | 2590 | inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos; |
39236c6e | 2591 | inp_compat->inp_depend6.inp6_hlim = 0; |
91447636 | 2592 | inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum; |
39236c6e | 2593 | inp_compat->inp_depend6.inp6_ifindex = 0; |
91447636 A |
2594 | inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops; |
2595 | } | |
9bccf70c | 2596 | |
b0d623f7 | 2597 | void |
39236c6e | 2598 | inpcb_to_xinpcb64(struct inpcb *inp, struct xinpcb64 *xinp) |
b0d623f7 | 2599 | { |
6d2010ae A |
2600 | xinp->inp_fport = inp->inp_fport; |
2601 | xinp->inp_lport = inp->inp_lport; | |
2602 | xinp->inp_gencnt = inp->inp_gencnt; | |
2603 | xinp->inp_flags = inp->inp_flags; | |
2604 | xinp->inp_flow = inp->inp_flow; | |
2605 | xinp->inp_vflag = inp->inp_vflag; | |
2606 | xinp->inp_ip_ttl = inp->inp_ip_ttl; | |
2607 | xinp->inp_ip_p = inp->inp_ip_p; | |
2608 | xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign; | |
2609 | xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local; | |
2610 | xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos; | |
39236c6e | 2611 | xinp->inp_depend6.inp6_hlim = 0; |
6d2010ae | 2612 | xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum; |
39236c6e | 2613 | xinp->inp_depend6.inp6_ifindex = 0; |
6d2010ae | 2614 | xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops; |
b0d623f7 A |
2615 | } |
2616 | ||
b0d623f7 A |
2617 | /* |
2618 | * The following routines implement this scheme: | |
2619 | * | |
2620 | * Callers of ip_output() that intend to cache the route in the inpcb pass | |
2621 | * a local copy of the struct route to ip_output(). Using a local copy of | |
2622 | * the cached route significantly simplifies things as IP no longer has to | |
2623 | * worry about having exclusive access to the passed in struct route, since | |
2624 | * it's defined in the caller's stack; in essence, this allows for a lock- | |
2625 | * less operation when updating the struct route at the IP level and below, | |
2626 | * whenever necessary. The scheme works as follows: | |
2627 | * | |
2628 | * Prior to dropping the socket's lock and calling ip_output(), the caller | |
2629 | * copies the struct route from the inpcb into its stack, and adds a reference | |
2630 | * to the cached route entry, if there was any. The socket's lock is then | |
2631 | * dropped and ip_output() is called with a pointer to the copy of struct | |
2632 | * route defined on the stack (not to the one in the inpcb.) | |
2633 | * | |
2634 | * Upon returning from ip_output(), the caller then acquires the socket's | |
2635 | * lock and synchronizes the cache; if there is no route cached in the inpcb, | |
2636 | * it copies the local copy of struct route (which may or may not contain any | |
2637 | * route) back into the cache; otherwise, if the inpcb has a route cached in | |
2638 | * it, the one in the local copy will be freed, if there's any. Trashing the | |
2639 | * cached route in the inpcb can be avoided because ip_output() is single- | |
2640 | * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized | |
2641 | * by the socket/transport layer.) | |
2642 | */ | |
2643 | void | |
2644 | inp_route_copyout(struct inpcb *inp, struct route *dst) | |
2645 | { | |
2646 | struct route *src = &inp->inp_route; | |
2647 | ||
6d2010ae | 2648 | lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED); |
b0d623f7 | 2649 | |
0b4c1975 | 2650 | /* |
39236c6e | 2651 | * If the route in the PCB is stale or not for IPv4, blow it away; |
0b4c1975 A |
2652 | * this is possible in the case of IPv4-mapped address case. |
2653 | */ | |
39236c6e A |
2654 | if (ROUTE_UNUSABLE(src) || rt_key(src->ro_rt)->sa_family != AF_INET) |
2655 | ROUTE_RELEASE(src); | |
316670eb | 2656 | |
39236c6e | 2657 | route_copyout(dst, src, sizeof (*dst)); |
b0d623f7 A |
2658 | } |
2659 | ||
2660 | void | |
2661 | inp_route_copyin(struct inpcb *inp, struct route *src) | |
2662 | { | |
2663 | struct route *dst = &inp->inp_route; | |
2664 | ||
6d2010ae | 2665 | lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED); |
b0d623f7 A |
2666 | |
2667 | /* Minor sanity check */ | |
2668 | if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET) | |
2669 | panic("%s: wrong or corrupted route: %p", __func__, src); | |
2670 | ||
39236c6e | 2671 | route_copyin(src, dst, sizeof (*src)); |
6d2010ae A |
2672 | } |
2673 | ||
2674 | /* | |
39037602 | 2675 | * Handler for setting IP_BOUND_IF/IPV6_BOUND_IF socket option. |
6d2010ae | 2676 | */ |
316670eb | 2677 | int |
39236c6e | 2678 | inp_bindif(struct inpcb *inp, unsigned int ifscope, struct ifnet **pifp) |
6d2010ae | 2679 | { |
316670eb A |
2680 | struct ifnet *ifp = NULL; |
2681 | ||
2682 | ifnet_head_lock_shared(); | |
2683 | if ((ifscope > (unsigned)if_index) || (ifscope != IFSCOPE_NONE && | |
2684 | (ifp = ifindex2ifnet[ifscope]) == NULL)) { | |
2685 | ifnet_head_done(); | |
2686 | return (ENXIO); | |
2687 | } | |
2688 | ifnet_head_done(); | |
2689 | ||
2690 | VERIFY(ifp != NULL || ifscope == IFSCOPE_NONE); | |
2691 | ||
6d2010ae A |
2692 | /* |
2693 | * A zero interface scope value indicates an "unbind". | |
2694 | * Otherwise, take in whatever value the app desires; | |
2695 | * the app may already know the scope (or force itself | |
2696 | * to such a scope) ahead of time before the interface | |
2697 | * gets attached. It doesn't matter either way; any | |
2698 | * route lookup from this point on will require an | |
2699 | * exact match for the embedded interface scope. | |
2700 | */ | |
316670eb A |
2701 | inp->inp_boundifp = ifp; |
2702 | if (inp->inp_boundifp == NULL) | |
6d2010ae A |
2703 | inp->inp_flags &= ~INP_BOUND_IF; |
2704 | else | |
2705 | inp->inp_flags |= INP_BOUND_IF; | |
2706 | ||
2707 | /* Blow away any cached route in the PCB */ | |
39236c6e A |
2708 | ROUTE_RELEASE(&inp->inp_route); |
2709 | ||
2710 | if (pifp != NULL) | |
2711 | *pifp = ifp; | |
316670eb A |
2712 | |
2713 | return (0); | |
6d2010ae A |
2714 | } |
2715 | ||
2716 | /* | |
39236c6e A |
2717 | * Handler for setting IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option, |
2718 | * as well as for setting PROC_UUID_NO_CELLULAR policy. | |
6d2010ae | 2719 | */ |
39236c6e A |
2720 | void |
2721 | inp_set_nocellular(struct inpcb *inp) | |
6d2010ae | 2722 | { |
39236c6e | 2723 | inp->inp_flags |= INP_NO_IFT_CELLULAR; |
6d2010ae A |
2724 | |
2725 | /* Blow away any cached route in the PCB */ | |
39236c6e A |
2726 | ROUTE_RELEASE(&inp->inp_route); |
2727 | } | |
2728 | ||
2729 | /* | |
2730 | * Handler for clearing IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option, | |
2731 | * as well as for clearing PROC_UUID_NO_CELLULAR policy. | |
2732 | */ | |
2733 | void | |
2734 | inp_clear_nocellular(struct inpcb *inp) | |
2735 | { | |
2736 | struct socket *so = inp->inp_socket; | |
2737 | ||
2738 | /* | |
2739 | * SO_RESTRICT_DENY_CELLULAR socket restriction issued on the socket | |
2740 | * has a higher precendence than INP_NO_IFT_CELLULAR. Clear the flag | |
2741 | * if and only if the socket is unrestricted. | |
2742 | */ | |
2743 | if (so != NULL && !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) { | |
2744 | inp->inp_flags &= ~INP_NO_IFT_CELLULAR; | |
2745 | ||
2746 | /* Blow away any cached route in the PCB */ | |
2747 | ROUTE_RELEASE(&inp->inp_route); | |
6d2010ae | 2748 | } |
39236c6e | 2749 | } |
6d2010ae | 2750 | |
fe8ab488 A |
2751 | void |
2752 | inp_set_noexpensive(struct inpcb *inp) | |
2753 | { | |
2754 | inp->inp_flags2 |= INP2_NO_IFF_EXPENSIVE; | |
2755 | ||
2756 | /* Blow away any cached route in the PCB */ | |
2757 | ROUTE_RELEASE(&inp->inp_route); | |
2758 | } | |
2759 | ||
2760 | void | |
2761 | inp_set_awdl_unrestricted(struct inpcb *inp) | |
2762 | { | |
2763 | inp->inp_flags2 |= INP2_AWDL_UNRESTRICTED; | |
2764 | ||
2765 | /* Blow away any cached route in the PCB */ | |
2766 | ROUTE_RELEASE(&inp->inp_route); | |
2767 | } | |
2768 | ||
2769 | boolean_t | |
2770 | inp_get_awdl_unrestricted(struct inpcb *inp) | |
2771 | { | |
2772 | return (inp->inp_flags2 & INP2_AWDL_UNRESTRICTED) ? TRUE : FALSE; | |
2773 | } | |
2774 | ||
2775 | void | |
2776 | inp_clear_awdl_unrestricted(struct inpcb *inp) | |
2777 | { | |
2778 | inp->inp_flags2 &= ~INP2_AWDL_UNRESTRICTED; | |
2779 | ||
2780 | /* Blow away any cached route in the PCB */ | |
2781 | ROUTE_RELEASE(&inp->inp_route); | |
2782 | } | |
2783 | ||
39037602 A |
2784 | void |
2785 | inp_set_intcoproc_allowed(struct inpcb *inp) | |
2786 | { | |
2787 | inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED; | |
2788 | ||
2789 | /* Blow away any cached route in the PCB */ | |
2790 | ROUTE_RELEASE(&inp->inp_route); | |
2791 | } | |
2792 | ||
2793 | boolean_t | |
2794 | inp_get_intcoproc_allowed(struct inpcb *inp) | |
2795 | { | |
2796 | return (inp->inp_flags2 & INP2_INTCOPROC_ALLOWED) ? TRUE : FALSE; | |
2797 | } | |
2798 | ||
2799 | void | |
2800 | inp_clear_intcoproc_allowed(struct inpcb *inp) | |
2801 | { | |
2802 | inp->inp_flags2 &= ~INP2_INTCOPROC_ALLOWED; | |
2803 | ||
2804 | /* Blow away any cached route in the PCB */ | |
2805 | ROUTE_RELEASE(&inp->inp_route); | |
2806 | } | |
2807 | ||
fe8ab488 | 2808 | #if NECP |
39236c6e | 2809 | /* |
fe8ab488 | 2810 | * Called when PROC_UUID_NECP_APP_POLICY is set. |
39236c6e A |
2811 | */ |
2812 | void | |
fe8ab488 | 2813 | inp_set_want_app_policy(struct inpcb *inp) |
39236c6e | 2814 | { |
fe8ab488 | 2815 | inp->inp_flags2 |= INP2_WANT_APP_POLICY; |
39236c6e A |
2816 | } |
2817 | ||
2818 | /* | |
fe8ab488 | 2819 | * Called when PROC_UUID_NECP_APP_POLICY is cleared. |
39236c6e A |
2820 | */ |
2821 | void | |
fe8ab488 | 2822 | inp_clear_want_app_policy(struct inpcb *inp) |
39236c6e | 2823 | { |
fe8ab488 | 2824 | inp->inp_flags2 &= ~INP2_WANT_APP_POLICY; |
b0d623f7 | 2825 | } |
fe8ab488 | 2826 | #endif /* NECP */ |
316670eb A |
2827 | |
2828 | /* | |
2829 | * Calculate flow hash for an inp, used by an interface to identify a | |
2830 | * flow. When an interface provides flow control advisory, this flow | |
2831 | * hash is used as an identifier. | |
2832 | */ | |
2833 | u_int32_t | |
2834 | inp_calc_flowhash(struct inpcb *inp) | |
2835 | { | |
2836 | struct inp_flowhash_key fh __attribute__((aligned(8))); | |
2837 | u_int32_t flowhash = 0; | |
bd504ef0 | 2838 | struct inpcb *tmp_inp = NULL; |
316670eb A |
2839 | |
2840 | if (inp_hash_seed == 0) | |
2841 | inp_hash_seed = RandomULong(); | |
2842 | ||
2843 | bzero(&fh, sizeof (fh)); | |
2844 | ||
2845 | bcopy(&inp->inp_dependladdr, &fh.infh_laddr, sizeof (fh.infh_laddr)); | |
2846 | bcopy(&inp->inp_dependfaddr, &fh.infh_faddr, sizeof (fh.infh_faddr)); | |
2847 | ||
2848 | fh.infh_lport = inp->inp_lport; | |
2849 | fh.infh_fport = inp->inp_fport; | |
2850 | fh.infh_af = (inp->inp_vflag & INP_IPV6) ? AF_INET6 : AF_INET; | |
2851 | fh.infh_proto = inp->inp_ip_p; | |
2852 | fh.infh_rand1 = RandomULong(); | |
2853 | fh.infh_rand2 = RandomULong(); | |
2854 | ||
2855 | try_again: | |
2856 | flowhash = net_flowhash(&fh, sizeof (fh), inp_hash_seed); | |
2857 | if (flowhash == 0) { | |
2858 | /* try to get a non-zero flowhash */ | |
2859 | inp_hash_seed = RandomULong(); | |
2860 | goto try_again; | |
2861 | } | |
2862 | ||
bd504ef0 | 2863 | inp->inp_flowhash = flowhash; |
316670eb | 2864 | |
bd504ef0 | 2865 | /* Insert the inp into inp_fc_tree */ |
39236c6e | 2866 | lck_mtx_lock_spin(&inp_fc_lck); |
bd504ef0 A |
2867 | tmp_inp = RB_FIND(inp_fc_tree, &inp_fc_tree, inp); |
2868 | if (tmp_inp != NULL) { | |
316670eb | 2869 | /* |
bd504ef0 A |
2870 | * There is a different inp with the same flowhash. |
2871 | * There can be a collision on flow hash but the | |
39236c6e | 2872 | * probability is low. Let's recompute the |
bd504ef0 | 2873 | * flowhash. |
316670eb A |
2874 | */ |
2875 | lck_mtx_unlock(&inp_fc_lck); | |
bd504ef0 A |
2876 | /* recompute hash seed */ |
2877 | inp_hash_seed = RandomULong(); | |
2878 | goto try_again; | |
316670eb | 2879 | } |
39236c6e | 2880 | |
bd504ef0 A |
2881 | RB_INSERT(inp_fc_tree, &inp_fc_tree, inp); |
2882 | inp->inp_flags2 |= INP2_IN_FCTREE; | |
316670eb | 2883 | lck_mtx_unlock(&inp_fc_lck); |
bd504ef0 | 2884 | |
39236c6e A |
2885 | return (flowhash); |
2886 | } | |
2887 | ||
2888 | void | |
2889 | inp_flowadv(uint32_t flowhash) | |
2890 | { | |
2891 | struct inpcb *inp; | |
2892 | ||
2893 | inp = inp_fc_getinp(flowhash, 0); | |
2894 | ||
2895 | if (inp == NULL) | |
2896 | return; | |
2897 | inp_fc_feedback(inp); | |
316670eb A |
2898 | } |
2899 | ||
bd504ef0 A |
2900 | /* |
2901 | * Function to compare inp_fc_entries in inp flow control tree | |
2902 | */ | |
2903 | static inline int | |
2904 | infc_cmp(const struct inpcb *inp1, const struct inpcb *inp2) | |
316670eb | 2905 | { |
bd504ef0 | 2906 | return (memcmp(&(inp1->inp_flowhash), &(inp2->inp_flowhash), |
39236c6e | 2907 | sizeof(inp1->inp_flowhash))); |
bd504ef0 | 2908 | } |
316670eb | 2909 | |
39236c6e | 2910 | static struct inpcb * |
bd504ef0 A |
2911 | inp_fc_getinp(u_int32_t flowhash, u_int32_t flags) |
2912 | { | |
2913 | struct inpcb *inp = NULL; | |
2914 | int locked = (flags & INPFC_SOLOCKED) ? 1 : 0; | |
316670eb A |
2915 | |
2916 | lck_mtx_lock_spin(&inp_fc_lck); | |
bd504ef0 A |
2917 | key_inp.inp_flowhash = flowhash; |
2918 | inp = RB_FIND(inp_fc_tree, &inp_fc_tree, &key_inp); | |
2919 | if (inp == NULL) { | |
316670eb A |
2920 | /* inp is not present, return */ |
2921 | lck_mtx_unlock(&inp_fc_lck); | |
2922 | return (NULL); | |
2923 | } | |
2924 | ||
bd504ef0 A |
2925 | if (flags & INPFC_REMOVE) { |
2926 | RB_REMOVE(inp_fc_tree, &inp_fc_tree, inp); | |
2927 | lck_mtx_unlock(&inp_fc_lck); | |
316670eb | 2928 | |
bd504ef0 A |
2929 | bzero(&(inp->infc_link), sizeof (inp->infc_link)); |
2930 | inp->inp_flags2 &= ~INP2_IN_FCTREE; | |
2931 | return (NULL); | |
316670eb | 2932 | } |
39236c6e | 2933 | |
bd504ef0 A |
2934 | if (in_pcb_checkstate(inp, WNT_ACQUIRE, locked) == WNT_STOPUSING) |
2935 | inp = NULL; | |
316670eb A |
2936 | lck_mtx_unlock(&inp_fc_lck); |
2937 | ||
bd504ef0 | 2938 | return (inp); |
316670eb A |
2939 | } |
2940 | ||
39236c6e | 2941 | static void |
316670eb A |
2942 | inp_fc_feedback(struct inpcb *inp) |
2943 | { | |
2944 | struct socket *so = inp->inp_socket; | |
2945 | ||
2946 | /* we already hold a want_cnt on this inp, socket can't be null */ | |
39236c6e | 2947 | VERIFY(so != NULL); |
316670eb A |
2948 | socket_lock(so, 1); |
2949 | ||
2950 | if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) { | |
2951 | socket_unlock(so, 1); | |
2952 | return; | |
2953 | } | |
2954 | ||
fe8ab488 A |
2955 | if (inp->inp_sndinprog_cnt > 0) |
2956 | inp->inp_flags |= INP_FC_FEEDBACK; | |
2957 | ||
316670eb A |
2958 | /* |
2959 | * Return if the connection is not in flow-controlled state. | |
2960 | * This can happen if the connection experienced | |
2961 | * loss while it was in flow controlled state | |
2962 | */ | |
2963 | if (!INP_WAIT_FOR_IF_FEEDBACK(inp)) { | |
2964 | socket_unlock(so, 1); | |
2965 | return; | |
2966 | } | |
2967 | inp_reset_fc_state(inp); | |
2968 | ||
39236c6e | 2969 | if (SOCK_TYPE(so) == SOCK_STREAM) |
316670eb A |
2970 | inp_fc_unthrottle_tcp(inp); |
2971 | ||
2972 | socket_unlock(so, 1); | |
2973 | } | |
2974 | ||
2975 | void | |
2976 | inp_reset_fc_state(struct inpcb *inp) | |
2977 | { | |
2978 | struct socket *so = inp->inp_socket; | |
2979 | int suspended = (INP_IS_FLOW_SUSPENDED(inp)) ? 1 : 0; | |
2980 | int needwakeup = (INP_WAIT_FOR_IF_FEEDBACK(inp)) ? 1 : 0; | |
2981 | ||
2982 | inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED); | |
2983 | ||
2984 | if (suspended) { | |
2985 | so->so_flags &= ~(SOF_SUSPENDED); | |
2986 | soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_RESUME)); | |
2987 | } | |
2988 | ||
316670eb A |
2989 | /* Give a write wakeup to unblock the socket */ |
2990 | if (needwakeup) | |
2991 | sowwakeup(so); | |
2992 | } | |
2993 | ||
2994 | int | |
2995 | inp_set_fc_state(struct inpcb *inp, int advcode) | |
2996 | { | |
bd504ef0 | 2997 | struct inpcb *tmp_inp = NULL; |
316670eb | 2998 | /* |
39236c6e | 2999 | * If there was a feedback from the interface when |
316670eb A |
3000 | * send operation was in progress, we should ignore |
3001 | * this flow advisory to avoid a race between setting | |
3002 | * flow controlled state and receiving feedback from | |
3003 | * the interface | |
3004 | */ | |
3005 | if (inp->inp_flags & INP_FC_FEEDBACK) | |
39236c6e | 3006 | return (0); |
316670eb A |
3007 | |
3008 | inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED); | |
39236c6e A |
3009 | if ((tmp_inp = inp_fc_getinp(inp->inp_flowhash, |
3010 | INPFC_SOLOCKED)) != NULL) { | |
3011 | if (in_pcb_checkstate(tmp_inp, WNT_RELEASE, 1) == WNT_STOPUSING) | |
bd504ef0 A |
3012 | return (0); |
3013 | VERIFY(tmp_inp == inp); | |
316670eb A |
3014 | switch (advcode) { |
3015 | case FADV_FLOW_CONTROLLED: | |
3016 | inp->inp_flags |= INP_FLOW_CONTROLLED; | |
3017 | break; | |
3018 | case FADV_SUSPENDED: | |
3019 | inp->inp_flags |= INP_FLOW_SUSPENDED; | |
3020 | soevent(inp->inp_socket, | |
3021 | (SO_FILT_HINT_LOCKED | SO_FILT_HINT_SUSPEND)); | |
3022 | ||
3023 | /* Record the fact that suspend event was sent */ | |
3024 | inp->inp_socket->so_flags |= SOF_SUSPENDED; | |
3025 | break; | |
3026 | } | |
bd504ef0 | 3027 | return (1); |
316670eb | 3028 | } |
39236c6e | 3029 | return (0); |
316670eb A |
3030 | } |
3031 | ||
3032 | /* | |
3033 | * Handler for SO_FLUSH socket option. | |
3034 | */ | |
3035 | int | |
3036 | inp_flush(struct inpcb *inp, int optval) | |
3037 | { | |
3038 | u_int32_t flowhash = inp->inp_flowhash; | |
39236c6e | 3039 | struct ifnet *rtifp, *oifp; |
316670eb A |
3040 | |
3041 | /* Either all classes or one of the valid ones */ | |
3042 | if (optval != SO_TC_ALL && !SO_VALID_TC(optval)) | |
3043 | return (EINVAL); | |
3044 | ||
3045 | /* We need a flow hash for identification */ | |
3046 | if (flowhash == 0) | |
3047 | return (0); | |
3048 | ||
39236c6e A |
3049 | /* Grab the interfaces from the route and pcb */ |
3050 | rtifp = ((inp->inp_route.ro_rt != NULL) ? | |
3051 | inp->inp_route.ro_rt->rt_ifp : NULL); | |
3052 | oifp = inp->inp_last_outifp; | |
3053 | ||
3054 | if (rtifp != NULL) | |
3055 | if_qflush_sc(rtifp, so_tc2msc(optval), flowhash, NULL, NULL, 0); | |
3056 | if (oifp != NULL && oifp != rtifp) | |
3057 | if_qflush_sc(oifp, so_tc2msc(optval), flowhash, NULL, NULL, 0); | |
316670eb A |
3058 | |
3059 | return (0); | |
3060 | } | |
3061 | ||
3062 | /* | |
3063 | * Clear the INP_INADDR_ANY flag (special case for PPP only) | |
3064 | */ | |
39236c6e A |
3065 | void |
3066 | inp_clear_INP_INADDR_ANY(struct socket *so) | |
316670eb A |
3067 | { |
3068 | struct inpcb *inp = NULL; | |
3069 | ||
3070 | socket_lock(so, 1); | |
3071 | inp = sotoinpcb(so); | |
3072 | if (inp) { | |
3073 | inp->inp_flags &= ~INP_INADDR_ANY; | |
3074 | } | |
3075 | socket_unlock(so, 1); | |
3076 | } | |
3077 | ||
39236c6e A |
3078 | void |
3079 | inp_get_soprocinfo(struct inpcb *inp, struct so_procinfo *soprocinfo) | |
3080 | { | |
3081 | struct socket *so = inp->inp_socket; | |
3082 | ||
3083 | soprocinfo->spi_pid = so->last_pid; | |
fe8ab488 A |
3084 | if (so->last_pid != 0) |
3085 | uuid_copy(soprocinfo->spi_uuid, so->last_uuid); | |
39236c6e A |
3086 | /* |
3087 | * When not delegated, the effective pid is the same as the real pid | |
3088 | */ | |
fe8ab488 | 3089 | if (so->so_flags & SOF_DELEGATED) { |
3e170ce0 | 3090 | soprocinfo->spi_delegated = 1; |
39236c6e | 3091 | soprocinfo->spi_epid = so->e_pid; |
3e170ce0 | 3092 | uuid_copy(soprocinfo->spi_euuid, so->e_uuid); |
fe8ab488 | 3093 | } else { |
3e170ce0 | 3094 | soprocinfo->spi_delegated = 0; |
39236c6e | 3095 | soprocinfo->spi_epid = so->last_pid; |
fe8ab488 | 3096 | } |
39236c6e A |
3097 | } |
3098 | ||
3099 | int | |
3100 | inp_findinpcb_procinfo(struct inpcbinfo *pcbinfo, uint32_t flowhash, | |
3101 | struct so_procinfo *soprocinfo) | |
3102 | { | |
3103 | struct inpcb *inp = NULL; | |
3104 | int found = 0; | |
3105 | ||
3106 | bzero(soprocinfo, sizeof (struct so_procinfo)); | |
3107 | ||
3108 | if (!flowhash) | |
3109 | return (-1); | |
3110 | ||
3111 | lck_rw_lock_shared(pcbinfo->ipi_lock); | |
3112 | LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) { | |
3113 | if (inp->inp_state != INPCB_STATE_DEAD && | |
3114 | inp->inp_socket != NULL && | |
3115 | inp->inp_flowhash == flowhash) { | |
3116 | found = 1; | |
3117 | inp_get_soprocinfo(inp, soprocinfo); | |
3118 | break; | |
3119 | } | |
3120 | } | |
3121 | lck_rw_done(pcbinfo->ipi_lock); | |
3122 | ||
3123 | return (found); | |
3124 | } | |
3125 | ||
3126 | #if CONFIG_PROC_UUID_POLICY | |
3127 | static void | |
3128 | inp_update_cellular_policy(struct inpcb *inp, boolean_t set) | |
3129 | { | |
3130 | struct socket *so = inp->inp_socket; | |
3131 | int before, after; | |
3132 | ||
3133 | VERIFY(so != NULL); | |
3134 | VERIFY(inp->inp_state != INPCB_STATE_DEAD); | |
3135 | ||
fe8ab488 | 3136 | before = INP_NO_CELLULAR(inp); |
39236c6e A |
3137 | if (set) { |
3138 | inp_set_nocellular(inp); | |
3139 | } else { | |
3140 | inp_clear_nocellular(inp); | |
3141 | } | |
fe8ab488 | 3142 | after = INP_NO_CELLULAR(inp); |
39236c6e A |
3143 | if (net_io_policy_log && (before != after)) { |
3144 | static const char *ok = "OK"; | |
3145 | static const char *nok = "NOACCESS"; | |
3146 | uuid_string_t euuid_buf; | |
3147 | pid_t epid; | |
3148 | ||
3149 | if (so->so_flags & SOF_DELEGATED) { | |
3150 | uuid_unparse(so->e_uuid, euuid_buf); | |
3151 | epid = so->e_pid; | |
3152 | } else { | |
3153 | uuid_unparse(so->last_uuid, euuid_buf); | |
3154 | epid = so->last_pid; | |
3155 | } | |
3156 | ||
3157 | /* allow this socket to generate another notification event */ | |
3158 | so->so_ifdenied_notifies = 0; | |
3159 | ||
3160 | log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d " | |
3161 | "euuid %s%s %s->%s\n", __func__, | |
3162 | (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so), | |
3163 | SOCK_TYPE(so), epid, euuid_buf, | |
3164 | (so->so_flags & SOF_DELEGATED) ? | |
3165 | " [delegated]" : "", | |
3166 | ((before < after) ? ok : nok), | |
3167 | ((before < after) ? nok : ok)); | |
3168 | } | |
3169 | } | |
3170 | ||
fe8ab488 | 3171 | #if NECP |
39236c6e | 3172 | static void |
fe8ab488 | 3173 | inp_update_necp_want_app_policy(struct inpcb *inp, boolean_t set) |
39236c6e A |
3174 | { |
3175 | struct socket *so = inp->inp_socket; | |
3176 | int before, after; | |
3177 | ||
3178 | VERIFY(so != NULL); | |
3179 | VERIFY(inp->inp_state != INPCB_STATE_DEAD); | |
3180 | ||
fe8ab488 | 3181 | before = (inp->inp_flags2 & INP2_WANT_APP_POLICY); |
39236c6e | 3182 | if (set) { |
fe8ab488 | 3183 | inp_set_want_app_policy(inp); |
39236c6e | 3184 | } else { |
fe8ab488 | 3185 | inp_clear_want_app_policy(inp); |
39236c6e | 3186 | } |
fe8ab488 | 3187 | after = (inp->inp_flags2 & INP2_WANT_APP_POLICY); |
39236c6e A |
3188 | if (net_io_policy_log && (before != after)) { |
3189 | static const char *wanted = "WANTED"; | |
3190 | static const char *unwanted = "UNWANTED"; | |
3191 | uuid_string_t euuid_buf; | |
3192 | pid_t epid; | |
3193 | ||
3194 | if (so->so_flags & SOF_DELEGATED) { | |
3195 | uuid_unparse(so->e_uuid, euuid_buf); | |
3196 | epid = so->e_pid; | |
3197 | } else { | |
3198 | uuid_unparse(so->last_uuid, euuid_buf); | |
3199 | epid = so->last_pid; | |
3200 | } | |
3201 | ||
3202 | log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d " | |
3203 | "euuid %s%s %s->%s\n", __func__, | |
3204 | (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so), | |
3205 | SOCK_TYPE(so), epid, euuid_buf, | |
3206 | (so->so_flags & SOF_DELEGATED) ? | |
3207 | " [delegated]" : "", | |
3208 | ((before < after) ? unwanted : wanted), | |
3209 | ((before < after) ? wanted : unwanted)); | |
3210 | } | |
3211 | } | |
fe8ab488 | 3212 | #endif /* NECP */ |
39236c6e A |
3213 | #endif /* !CONFIG_PROC_UUID_POLICY */ |
3214 | ||
fe8ab488 A |
3215 | #if NECP |
3216 | void | |
3217 | inp_update_necp_policy(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int override_bound_interface) | |
3218 | { | |
3219 | necp_socket_find_policy_match(inp, override_local_addr, override_remote_addr, override_bound_interface); | |
3220 | if (necp_socket_should_rescope(inp) && | |
3221 | inp->inp_lport == 0 && | |
3222 | inp->inp_laddr.s_addr == INADDR_ANY && | |
3223 | IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { | |
3224 | // If we should rescope, and the socket is not yet bound | |
3225 | inp_bindif(inp, necp_socket_get_rescope_if_index(inp), NULL); | |
3226 | } | |
3227 | } | |
3228 | #endif /* NECP */ | |
3229 | ||
39236c6e A |
3230 | int |
3231 | inp_update_policy(struct inpcb *inp) | |
3232 | { | |
3233 | #if CONFIG_PROC_UUID_POLICY | |
3234 | struct socket *so = inp->inp_socket; | |
3235 | uint32_t pflags = 0; | |
3236 | int32_t ogencnt; | |
3237 | int err = 0; | |
3238 | ||
3239 | if (!net_io_policy_uuid || | |
3240 | so == NULL || inp->inp_state == INPCB_STATE_DEAD) | |
3241 | return (0); | |
3242 | ||
3243 | /* | |
3244 | * Kernel-created sockets that aren't delegating other sockets | |
3245 | * are currently exempted from UUID policy checks. | |
3246 | */ | |
3247 | if (so->last_pid == 0 && !(so->so_flags & SOF_DELEGATED)) | |
3248 | return (0); | |
3249 | ||
3250 | ogencnt = so->so_policy_gencnt; | |
3251 | err = proc_uuid_policy_lookup(((so->so_flags & SOF_DELEGATED) ? | |
3252 | so->e_uuid : so->last_uuid), &pflags, &so->so_policy_gencnt); | |
3253 | ||
3254 | /* | |
3255 | * Discard cached generation count if the entry is gone (ENOENT), | |
3256 | * so that we go thru the checks below. | |
3257 | */ | |
3258 | if (err == ENOENT && ogencnt != 0) | |
3259 | so->so_policy_gencnt = 0; | |
3260 | ||
3261 | /* | |
3262 | * If the generation count has changed, inspect the policy flags | |
3263 | * and act accordingly. If a policy flag was previously set and | |
3264 | * the UUID is no longer present in the table (ENOENT), treat it | |
3265 | * as if the flag has been cleared. | |
3266 | */ | |
3267 | if ((err == 0 || err == ENOENT) && ogencnt != so->so_policy_gencnt) { | |
3268 | /* update cellular policy for this socket */ | |
3269 | if (err == 0 && (pflags & PROC_UUID_NO_CELLULAR)) { | |
3270 | inp_update_cellular_policy(inp, TRUE); | |
3271 | } else if (!(pflags & PROC_UUID_NO_CELLULAR)) { | |
3272 | inp_update_cellular_policy(inp, FALSE); | |
3273 | } | |
fe8ab488 A |
3274 | #if NECP |
3275 | /* update necp want app policy for this socket */ | |
3276 | if (err == 0 && (pflags & PROC_UUID_NECP_APP_POLICY)) { | |
3277 | inp_update_necp_want_app_policy(inp, TRUE); | |
3278 | } else if (!(pflags & PROC_UUID_NECP_APP_POLICY)) { | |
3279 | inp_update_necp_want_app_policy(inp, FALSE); | |
39236c6e | 3280 | } |
fe8ab488 | 3281 | #endif /* NECP */ |
39236c6e A |
3282 | } |
3283 | ||
3284 | return ((err == ENOENT) ? 0 : err); | |
3285 | #else /* !CONFIG_PROC_UUID_POLICY */ | |
3286 | #pragma unused(inp) | |
3287 | return (0); | |
3288 | #endif /* !CONFIG_PROC_UUID_POLICY */ | |
3289 | } | |
39037602 A |
3290 | |
3291 | static unsigned int log_restricted; | |
3292 | SYSCTL_DECL(_net_inet); | |
3293 | SYSCTL_INT(_net_inet, OID_AUTO, log_restricted, | |
3294 | CTLFLAG_RW | CTLFLAG_LOCKED, &log_restricted, 0, | |
3295 | "Log network restrictions"); | |
fe8ab488 A |
3296 | /* |
3297 | * Called when we need to enforce policy restrictions in the input path. | |
3298 | * | |
3299 | * Returns TRUE if we're not allowed to receive data, otherwise FALSE. | |
3300 | */ | |
39037602 A |
3301 | static boolean_t |
3302 | _inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp) | |
39236c6e A |
3303 | { |
3304 | VERIFY(inp != NULL); | |
3305 | ||
fe8ab488 A |
3306 | /* |
3307 | * Inbound restrictions. | |
3308 | */ | |
39236c6e A |
3309 | if (!sorestrictrecv) |
3310 | return (FALSE); | |
3311 | ||
fe8ab488 A |
3312 | if (ifp == NULL) |
3313 | return (FALSE); | |
3314 | ||
3315 | if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp)) | |
3316 | return (TRUE); | |
3317 | ||
3318 | if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp)) | |
3319 | return (TRUE); | |
3320 | ||
3321 | if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp)) | |
3322 | return (TRUE); | |
39037602 | 3323 | |
fe8ab488 | 3324 | if (!(ifp->if_eflags & IFEF_RESTRICTED_RECV)) |
39236c6e A |
3325 | return (FALSE); |
3326 | ||
3327 | if (inp->inp_flags & INP_RECV_ANYIF) | |
3328 | return (FALSE); | |
3329 | ||
3330 | if ((inp->inp_flags & INP_BOUND_IF) && inp->inp_boundifp == ifp) | |
3331 | return (FALSE); | |
3332 | ||
39037602 A |
3333 | if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp)) |
3334 | return (TRUE); | |
3335 | ||
39236c6e A |
3336 | return (TRUE); |
3337 | } | |
fe8ab488 | 3338 | |
39037602 A |
3339 | boolean_t |
3340 | inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp) | |
3341 | { | |
3342 | boolean_t ret; | |
3343 | ||
3344 | ret = _inp_restricted_recv(inp, ifp); | |
3345 | if (ret == TRUE && log_restricted) { | |
743345f9 A |
3346 | printf("pid %d (%s) is unable to receive packets on %s\n", |
3347 | current_proc()->p_pid, proc_best_name(current_proc()), | |
3348 | ifp->if_xname); | |
39037602 A |
3349 | } |
3350 | return (ret); | |
3351 | } | |
3352 | ||
fe8ab488 A |
3353 | /* |
3354 | * Called when we need to enforce policy restrictions in the output path. | |
3355 | * | |
3356 | * Returns TRUE if we're not allowed to send data out, otherwise FALSE. | |
3357 | */ | |
39037602 A |
3358 | static boolean_t |
3359 | _inp_restricted_send(struct inpcb *inp, struct ifnet *ifp) | |
fe8ab488 A |
3360 | { |
3361 | VERIFY(inp != NULL); | |
3362 | ||
3363 | /* | |
3364 | * Outbound restrictions. | |
3365 | */ | |
3366 | if (!sorestrictsend) | |
3367 | return (FALSE); | |
3368 | ||
3369 | if (ifp == NULL) | |
3370 | return (FALSE); | |
3371 | ||
3372 | if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp)) | |
3373 | return (TRUE); | |
3374 | ||
3375 | if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp)) | |
3376 | return (TRUE); | |
3377 | ||
3378 | if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp)) | |
3379 | return (TRUE); | |
3380 | ||
39037602 A |
3381 | if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp)) |
3382 | return (TRUE); | |
3383 | ||
fe8ab488 A |
3384 | return (FALSE); |
3385 | } | |
39037602 A |
3386 | |
3387 | boolean_t | |
3388 | inp_restricted_send(struct inpcb *inp, struct ifnet *ifp) | |
3389 | { | |
3390 | boolean_t ret; | |
3391 | ||
3392 | ret = _inp_restricted_send(inp, ifp); | |
3393 | if (ret == TRUE && log_restricted) { | |
743345f9 A |
3394 | printf("pid %d (%s) is unable to transmit packets on %s\n", |
3395 | current_proc()->p_pid, proc_best_name(current_proc()), | |
3396 | ifp->if_xname); | |
39037602 A |
3397 | } |
3398 | return (ret); | |
3399 | } | |
3400 | ||
3401 | inline void | |
3402 | inp_count_sndbytes(struct inpcb *inp, u_int32_t th_ack) | |
3403 | { | |
3404 | struct ifnet *ifp = inp->inp_last_outifp; | |
3405 | struct socket *so = inp->inp_socket; | |
3406 | if (ifp != NULL && !(so->so_flags & SOF_MP_SUBFLOW) && | |
3407 | (ifp->if_type == IFT_CELLULAR || | |
3408 | ifp->if_subfamily == IFNET_SUBFAMILY_WIFI)) { | |
3409 | int32_t unsent; | |
3410 | ||
3411 | so->so_snd.sb_flags |= SB_SNDBYTE_CNT; | |
3412 | ||
3413 | /* | |
3414 | * There can be data outstanding before the connection | |
3415 | * becomes established -- TFO case | |
3416 | */ | |
3417 | if (so->so_snd.sb_cc > 0) | |
3418 | inp_incr_sndbytes_total(so, so->so_snd.sb_cc); | |
3419 | ||
3420 | unsent = inp_get_sndbytes_allunsent(so, th_ack); | |
3421 | if (unsent > 0) | |
3422 | inp_incr_sndbytes_unsent(so, unsent); | |
3423 | } | |
3424 | } | |
3425 | ||
3426 | inline void | |
3427 | inp_incr_sndbytes_total(struct socket *so, int32_t len) | |
3428 | { | |
3429 | struct inpcb *inp = (struct inpcb *)so->so_pcb; | |
3430 | struct ifnet *ifp = inp->inp_last_outifp; | |
3431 | ||
3432 | if (ifp != NULL) { | |
3433 | VERIFY(ifp->if_sndbyte_total >= 0); | |
3434 | OSAddAtomic64(len, &ifp->if_sndbyte_total); | |
3435 | } | |
3436 | } | |
3437 | ||
3438 | inline void | |
3439 | inp_decr_sndbytes_total(struct socket *so, int32_t len) | |
3440 | { | |
3441 | struct inpcb *inp = (struct inpcb *)so->so_pcb; | |
3442 | struct ifnet *ifp = inp->inp_last_outifp; | |
3443 | ||
3444 | if (ifp != NULL) { | |
3445 | VERIFY(ifp->if_sndbyte_total >= len); | |
3446 | OSAddAtomic64(-len, &ifp->if_sndbyte_total); | |
3447 | } | |
3448 | } | |
3449 | ||
3450 | inline void | |
3451 | inp_incr_sndbytes_unsent(struct socket *so, int32_t len) | |
3452 | { | |
3453 | struct inpcb *inp = (struct inpcb *)so->so_pcb; | |
3454 | struct ifnet *ifp = inp->inp_last_outifp; | |
3455 | ||
3456 | if (ifp != NULL) { | |
3457 | VERIFY(ifp->if_sndbyte_unsent >= 0); | |
3458 | OSAddAtomic64(len, &ifp->if_sndbyte_unsent); | |
3459 | } | |
3460 | } | |
3461 | ||
3462 | inline void | |
3463 | inp_decr_sndbytes_unsent(struct socket *so, int32_t len) | |
3464 | { | |
3465 | struct inpcb *inp = (struct inpcb *)so->so_pcb; | |
3466 | struct ifnet *ifp = inp->inp_last_outifp; | |
3467 | ||
3468 | if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT)) | |
3469 | return; | |
3470 | ||
3471 | if (ifp != NULL) { | |
3472 | if (ifp->if_sndbyte_unsent >= len) | |
3473 | OSAddAtomic64(-len, &ifp->if_sndbyte_unsent); | |
3474 | else | |
3475 | ifp->if_sndbyte_unsent = 0; | |
3476 | } | |
3477 | } | |
3478 | ||
3479 | inline void | |
3480 | inp_decr_sndbytes_allunsent(struct socket *so, u_int32_t th_ack) | |
3481 | { | |
3482 | int32_t len; | |
3483 | ||
3484 | if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT)) | |
3485 | return; | |
3486 | ||
3487 | len = inp_get_sndbytes_allunsent(so, th_ack); | |
3488 | inp_decr_sndbytes_unsent(so, len); | |
3489 | } |