]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
a39ff7e2 | 2 | * Copyright (c) 2000-2017 Apple Inc. All rights reserved. |
5d5c5d0d | 3 | * |
2d21ac55 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
39236c6e | 5 | * |
2d21ac55 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
39236c6e | 14 | * |
2d21ac55 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
39236c6e | 17 | * |
2d21ac55 A |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
8f6c56a5 A |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
2d21ac55 A |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
39236c6e | 25 | * |
2d21ac55 | 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
1c79356b A |
27 | */ |
28 | /* | |
29 | * Copyright (c) 1982, 1986, 1991, 1993, 1995 | |
30 | * The Regents of the University of California. All rights reserved. | |
31 | * | |
32 | * Redistribution and use in source and binary forms, with or without | |
33 | * modification, are permitted provided that the following conditions | |
34 | * are met: | |
35 | * 1. Redistributions of source code must retain the above copyright | |
36 | * notice, this list of conditions and the following disclaimer. | |
37 | * 2. Redistributions in binary form must reproduce the above copyright | |
38 | * notice, this list of conditions and the following disclaimer in the | |
39 | * documentation and/or other materials provided with the distribution. | |
40 | * 3. All advertising materials mentioning features or use of this software | |
41 | * must display the following acknowledgement: | |
42 | * This product includes software developed by the University of | |
43 | * California, Berkeley and its contributors. | |
44 | * 4. Neither the name of the University nor the names of its contributors | |
45 | * may be used to endorse or promote products derived from this software | |
46 | * without specific prior written permission. | |
47 | * | |
48 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
49 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
50 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
51 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
52 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
53 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
54 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
55 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
56 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
57 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
58 | * SUCH DAMAGE. | |
59 | * | |
60 | * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 | |
9bccf70c | 61 | * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $ |
1c79356b A |
62 | */ |
63 | ||
64 | #include <sys/param.h> | |
65 | #include <sys/systm.h> | |
66 | #include <sys/malloc.h> | |
67 | #include <sys/mbuf.h> | |
1c79356b | 68 | #include <sys/domain.h> |
1c79356b A |
69 | #include <sys/protosw.h> |
70 | #include <sys/socket.h> | |
71 | #include <sys/socketvar.h> | |
72 | #include <sys/proc.h> | |
73 | #include <sys/kernel.h> | |
74 | #include <sys/sysctl.h> | |
6d2010ae A |
75 | #include <sys/mcache.h> |
76 | #include <sys/kauth.h> | |
77 | #include <sys/priv.h> | |
39236c6e A |
78 | #include <sys/proc_uuid_policy.h> |
79 | #include <sys/syslog.h> | |
fe8ab488 | 80 | #include <sys/priv.h> |
39037602 | 81 | #include <net/dlil.h> |
39236c6e | 82 | |
91447636 | 83 | #include <libkern/OSAtomic.h> |
316670eb | 84 | #include <kern/locks.h> |
1c79356b A |
85 | |
86 | #include <machine/limits.h> | |
87 | ||
1c79356b | 88 | #include <kern/zalloc.h> |
1c79356b A |
89 | |
90 | #include <net/if.h> | |
1c79356b | 91 | #include <net/if_types.h> |
9bccf70c | 92 | #include <net/route.h> |
316670eb A |
93 | #include <net/flowhash.h> |
94 | #include <net/flowadv.h> | |
fe8ab488 | 95 | #include <net/ntstat.h> |
1c79356b A |
96 | |
97 | #include <netinet/in.h> | |
98 | #include <netinet/in_pcb.h> | |
99 | #include <netinet/in_var.h> | |
100 | #include <netinet/ip_var.h> | |
101 | #if INET6 | |
102 | #include <netinet/ip6.h> | |
103 | #include <netinet6/ip6_var.h> | |
104 | #endif /* INET6 */ | |
105 | ||
1c79356b | 106 | #include <sys/kdebug.h> |
b0d623f7 | 107 | #include <sys/random.h> |
39236c6e | 108 | |
316670eb | 109 | #include <dev/random/randomdev.h> |
39236c6e | 110 | #include <mach/boolean.h> |
1c79356b | 111 | |
39037602 A |
112 | #include <pexpert/pexpert.h> |
113 | ||
fe8ab488 A |
114 | #if NECP |
115 | #include <net/necp.h> | |
9bccf70c | 116 | #endif |
1c79356b | 117 | |
39037602 A |
118 | #include <sys/stat.h> |
119 | #include <sys/ubc.h> | |
120 | #include <sys/vnode.h> | |
121 | ||
39236c6e A |
122 | static lck_grp_t *inpcb_lock_grp; |
123 | static lck_attr_t *inpcb_lock_attr; | |
124 | static lck_grp_attr_t *inpcb_lock_grp_attr; | |
125 | decl_lck_mtx_data(static, inpcb_lock); /* global INPCB lock */ | |
126 | decl_lck_mtx_data(static, inpcb_timeout_lock); | |
127 | ||
128 | static TAILQ_HEAD(, inpcbinfo) inpcb_head = TAILQ_HEAD_INITIALIZER(inpcb_head); | |
129 | ||
130 | static u_int16_t inpcb_timeout_run = 0; /* INPCB timer is scheduled to run */ | |
131 | static boolean_t inpcb_garbage_collecting = FALSE; /* gc timer is scheduled */ | |
132 | static boolean_t inpcb_ticking = FALSE; /* "slow" timer is scheduled */ | |
133 | static boolean_t inpcb_fast_timer_on = FALSE; | |
fe8ab488 | 134 | |
743345f9 A |
135 | extern char *proc_best_name(proc_t); |
136 | ||
fe8ab488 | 137 | #define INPCB_GCREQ_THRESHOLD 50000 |
fe8ab488 | 138 | |
39037602 A |
139 | static thread_call_t inpcb_thread_call, inpcb_fast_thread_call; |
140 | static void inpcb_sched_timeout(void); | |
141 | static void inpcb_sched_lazy_timeout(void); | |
142 | static void _inpcb_sched_timeout(unsigned int); | |
143 | static void inpcb_timeout(void *, void *); | |
144 | const int inpcb_timeout_lazy = 10; /* 10 seconds leeway for lazy timers */ | |
39236c6e A |
145 | extern int tvtohz(struct timeval *); |
146 | ||
147 | #if CONFIG_PROC_UUID_POLICY | |
148 | static void inp_update_cellular_policy(struct inpcb *, boolean_t); | |
fe8ab488 A |
149 | #if NECP |
150 | static void inp_update_necp_want_app_policy(struct inpcb *, boolean_t); | |
151 | #endif /* NECP */ | |
39236c6e A |
152 | #endif /* !CONFIG_PROC_UUID_POLICY */ |
153 | ||
39236c6e A |
154 | #define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8)) |
155 | #define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1)) | |
1c79356b | 156 | |
1c79356b A |
157 | /* |
158 | * These configure the range of local port addresses assigned to | |
159 | * "unspecified" outgoing connections/packets/whatever. | |
160 | */ | |
9bccf70c A |
161 | int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */ |
162 | int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */ | |
39236c6e A |
163 | int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ |
164 | int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */ | |
9bccf70c A |
165 | int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ |
166 | int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */ | |
1c79356b | 167 | |
39236c6e | 168 | #define RANGECHK(var, min, max) \ |
1c79356b A |
169 | if ((var) < (min)) { (var) = (min); } \ |
170 | else if ((var) > (max)) { (var) = (max); } | |
171 | ||
1c79356b A |
172 | static int |
173 | sysctl_net_ipport_check SYSCTL_HANDLER_ARGS | |
174 | { | |
2d21ac55 | 175 | #pragma unused(arg1, arg2) |
39236c6e A |
176 | int error; |
177 | ||
178 | error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); | |
1c79356b A |
179 | if (!error) { |
180 | RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); | |
181 | RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1); | |
182 | RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX); | |
183 | RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX); | |
184 | RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX); | |
185 | RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX); | |
186 | } | |
39236c6e | 187 | return (error); |
1c79356b A |
188 | } |
189 | ||
190 | #undef RANGECHK | |
191 | ||
39236c6e A |
192 | SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, |
193 | CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IP Ports"); | |
194 | ||
195 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, | |
196 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, | |
197 | &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", ""); | |
198 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, | |
199 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, | |
200 | &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", ""); | |
201 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, | |
202 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, | |
203 | &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", ""); | |
204 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, | |
205 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, | |
206 | &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", ""); | |
207 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, | |
208 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, | |
209 | &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", ""); | |
210 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, | |
211 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, | |
212 | &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", ""); | |
1c79356b | 213 | |
39037602 A |
214 | static uint32_t apn_fallbk_debug = 0; |
215 | #define apn_fallbk_log(x) do { if (apn_fallbk_debug >= 1) log x; } while (0) | |
216 | ||
5ba3f43e A |
217 | #if CONFIG_EMBEDDED |
218 | static boolean_t apn_fallbk_enabled = TRUE; | |
219 | ||
220 | SYSCTL_DECL(_net_inet); | |
221 | SYSCTL_NODE(_net_inet, OID_AUTO, apn_fallback, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "APN Fallback"); | |
222 | SYSCTL_UINT(_net_inet_apn_fallback, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED, | |
223 | &apn_fallbk_debug, 0, "APN fallback debug enable"); | |
224 | #else | |
39037602 | 225 | static boolean_t apn_fallbk_enabled = FALSE; |
5ba3f43e | 226 | #endif |
39037602 | 227 | |
b0d623f7 A |
228 | extern int udp_use_randomport; |
229 | extern int tcp_use_randomport; | |
230 | ||
316670eb A |
231 | /* Structs used for flowhash computation */ |
232 | struct inp_flowhash_key_addr { | |
233 | union { | |
234 | struct in_addr v4; | |
235 | struct in6_addr v6; | |
236 | u_int8_t addr8[16]; | |
237 | u_int16_t addr16[8]; | |
238 | u_int32_t addr32[4]; | |
239 | } infha; | |
240 | }; | |
241 | ||
242 | struct inp_flowhash_key { | |
39236c6e | 243 | struct inp_flowhash_key_addr infh_laddr; |
316670eb A |
244 | struct inp_flowhash_key_addr infh_faddr; |
245 | u_int32_t infh_lport; | |
246 | u_int32_t infh_fport; | |
247 | u_int32_t infh_af; | |
248 | u_int32_t infh_proto; | |
249 | u_int32_t infh_rand1; | |
250 | u_int32_t infh_rand2; | |
251 | }; | |
252 | ||
39236c6e A |
253 | static u_int32_t inp_hash_seed = 0; |
254 | ||
255 | static int infc_cmp(const struct inpcb *, const struct inpcb *); | |
256 | ||
257 | /* Flags used by inp_fc_getinp */ | |
258 | #define INPFC_SOLOCKED 0x1 | |
259 | #define INPFC_REMOVE 0x2 | |
260 | static struct inpcb *inp_fc_getinp(u_int32_t, u_int32_t); | |
261 | ||
262 | static void inp_fc_feedback(struct inpcb *); | |
263 | extern void tcp_remove_from_time_wait(struct inpcb *inp); | |
316670eb | 264 | |
39236c6e | 265 | decl_lck_mtx_data(static, inp_fc_lck); |
316670eb | 266 | |
bd504ef0 A |
267 | RB_HEAD(inp_fc_tree, inpcb) inp_fc_tree; |
268 | RB_PROTOTYPE(inp_fc_tree, inpcb, infc_link, infc_cmp); | |
269 | RB_GENERATE(inp_fc_tree, inpcb, infc_link, infc_cmp); | |
316670eb | 270 | |
bd504ef0 A |
271 | /* |
272 | * Use this inp as a key to find an inp in the flowhash tree. | |
273 | * Accesses to it are protected by inp_fc_lck. | |
274 | */ | |
275 | struct inpcb key_inp; | |
316670eb | 276 | |
1c79356b A |
277 | /* |
278 | * in_pcb.c: manage the Protocol Control Blocks. | |
1c79356b A |
279 | */ |
280 | ||
316670eb | 281 | void |
39236c6e | 282 | in_pcbinit(void) |
316670eb | 283 | { |
39236c6e | 284 | static int inpcb_initialized = 0; |
316670eb | 285 | |
39236c6e A |
286 | VERIFY(!inpcb_initialized); |
287 | inpcb_initialized = 1; | |
316670eb | 288 | |
39236c6e A |
289 | inpcb_lock_grp_attr = lck_grp_attr_alloc_init(); |
290 | inpcb_lock_grp = lck_grp_alloc_init("inpcb", inpcb_lock_grp_attr); | |
291 | inpcb_lock_attr = lck_attr_alloc_init(); | |
292 | lck_mtx_init(&inpcb_lock, inpcb_lock_grp, inpcb_lock_attr); | |
293 | lck_mtx_init(&inpcb_timeout_lock, inpcb_lock_grp, inpcb_lock_attr); | |
39037602 A |
294 | inpcb_thread_call = thread_call_allocate_with_priority(inpcb_timeout, |
295 | NULL, THREAD_CALL_PRIORITY_KERNEL); | |
296 | inpcb_fast_thread_call = thread_call_allocate_with_priority( | |
297 | inpcb_timeout, NULL, THREAD_CALL_PRIORITY_KERNEL); | |
298 | if (inpcb_thread_call == NULL || inpcb_fast_thread_call == NULL) | |
299 | panic("unable to alloc the inpcb thread call"); | |
39236c6e A |
300 | |
301 | /* | |
302 | * Initialize data structures required to deliver | |
303 | * flow advisories. | |
304 | */ | |
305 | lck_mtx_init(&inp_fc_lck, inpcb_lock_grp, inpcb_lock_attr); | |
bd504ef0 | 306 | lck_mtx_lock(&inp_fc_lck); |
316670eb | 307 | RB_INIT(&inp_fc_tree); |
bd504ef0 A |
308 | bzero(&key_inp, sizeof(key_inp)); |
309 | lck_mtx_unlock(&inp_fc_lck); | |
316670eb A |
310 | } |
311 | ||
39236c6e A |
312 | #define INPCB_HAVE_TIMER_REQ(req) (((req).intimer_lazy > 0) || \ |
313 | ((req).intimer_fast > 0) || ((req).intimer_nodelay > 0)) | |
314 | static void | |
39037602 | 315 | inpcb_timeout(void *arg0, void *arg1) |
39236c6e | 316 | { |
5ba3f43e | 317 | #pragma unused(arg0, arg1) |
39236c6e A |
318 | struct inpcbinfo *ipi; |
319 | boolean_t t, gc; | |
320 | struct intimercount gccnt, tmcnt; | |
39236c6e A |
321 | |
322 | /* | |
323 | * Update coarse-grained networking timestamp (in sec.); the idea | |
324 | * is to piggy-back on the timeout callout to update the counter | |
325 | * returnable via net_uptime(). | |
326 | */ | |
327 | net_update_uptime(); | |
328 | ||
fe8ab488 A |
329 | bzero(&gccnt, sizeof(gccnt)); |
330 | bzero(&tmcnt, sizeof(tmcnt)); | |
331 | ||
39236c6e A |
332 | lck_mtx_lock_spin(&inpcb_timeout_lock); |
333 | gc = inpcb_garbage_collecting; | |
334 | inpcb_garbage_collecting = FALSE; | |
39236c6e A |
335 | |
336 | t = inpcb_ticking; | |
337 | inpcb_ticking = FALSE; | |
338 | ||
339 | if (gc || t) { | |
340 | lck_mtx_unlock(&inpcb_timeout_lock); | |
341 | ||
342 | lck_mtx_lock(&inpcb_lock); | |
343 | TAILQ_FOREACH(ipi, &inpcb_head, ipi_entry) { | |
344 | if (INPCB_HAVE_TIMER_REQ(ipi->ipi_gc_req)) { | |
345 | bzero(&ipi->ipi_gc_req, | |
346 | sizeof(ipi->ipi_gc_req)); | |
347 | if (gc && ipi->ipi_gc != NULL) { | |
348 | ipi->ipi_gc(ipi); | |
349 | gccnt.intimer_lazy += | |
350 | ipi->ipi_gc_req.intimer_lazy; | |
351 | gccnt.intimer_fast += | |
352 | ipi->ipi_gc_req.intimer_fast; | |
353 | gccnt.intimer_nodelay += | |
354 | ipi->ipi_gc_req.intimer_nodelay; | |
355 | } | |
356 | } | |
357 | if (INPCB_HAVE_TIMER_REQ(ipi->ipi_timer_req)) { | |
358 | bzero(&ipi->ipi_timer_req, | |
359 | sizeof(ipi->ipi_timer_req)); | |
360 | if (t && ipi->ipi_timer != NULL) { | |
361 | ipi->ipi_timer(ipi); | |
362 | tmcnt.intimer_lazy += | |
363 | ipi->ipi_timer_req.intimer_lazy; | |
5ba3f43e | 364 | tmcnt.intimer_fast += |
39236c6e A |
365 | ipi->ipi_timer_req.intimer_fast; |
366 | tmcnt.intimer_nodelay += | |
367 | ipi->ipi_timer_req.intimer_nodelay; | |
368 | } | |
369 | } | |
370 | } | |
371 | lck_mtx_unlock(&inpcb_lock); | |
372 | lck_mtx_lock_spin(&inpcb_timeout_lock); | |
373 | } | |
374 | ||
375 | /* lock was dropped above, so check first before overriding */ | |
376 | if (!inpcb_garbage_collecting) | |
377 | inpcb_garbage_collecting = INPCB_HAVE_TIMER_REQ(gccnt); | |
378 | if (!inpcb_ticking) | |
379 | inpcb_ticking = INPCB_HAVE_TIMER_REQ(tmcnt); | |
380 | ||
381 | /* re-arm the timer if there's work to do */ | |
5ba3f43e A |
382 | inpcb_timeout_run--; |
383 | VERIFY(inpcb_timeout_run >= 0 && inpcb_timeout_run < 2); | |
39236c6e | 384 | |
39236c6e | 385 | if (gccnt.intimer_nodelay > 0 || tmcnt.intimer_nodelay > 0) |
39037602 | 386 | inpcb_sched_timeout(); |
39236c6e A |
387 | else if ((gccnt.intimer_fast + tmcnt.intimer_fast) <= 5) |
388 | /* be lazy when idle with little activity */ | |
39037602 | 389 | inpcb_sched_lazy_timeout(); |
39236c6e | 390 | else |
39037602 | 391 | inpcb_sched_timeout(); |
39236c6e A |
392 | |
393 | lck_mtx_unlock(&inpcb_timeout_lock); | |
394 | } | |
395 | ||
396 | static void | |
39037602 | 397 | inpcb_sched_timeout(void) |
39236c6e | 398 | { |
39037602 A |
399 | _inpcb_sched_timeout(0); |
400 | } | |
401 | ||
402 | static void | |
403 | inpcb_sched_lazy_timeout(void) | |
404 | { | |
405 | _inpcb_sched_timeout(inpcb_timeout_lazy); | |
406 | } | |
39236c6e | 407 | |
39037602 A |
408 | static void |
409 | _inpcb_sched_timeout(unsigned int offset) | |
410 | { | |
411 | uint64_t deadline, leeway; | |
412 | ||
413 | clock_interval_to_deadline(1, NSEC_PER_SEC, &deadline); | |
5ba3f43e | 414 | LCK_MTX_ASSERT(&inpcb_timeout_lock, LCK_MTX_ASSERT_OWNED); |
39236c6e | 415 | if (inpcb_timeout_run == 0 && |
39037602 | 416 | (inpcb_garbage_collecting || inpcb_ticking)) { |
39236c6e A |
417 | lck_mtx_convert_spin(&inpcb_timeout_lock); |
418 | inpcb_timeout_run++; | |
39037602 | 419 | if (offset == 0) { |
39236c6e | 420 | inpcb_fast_timer_on = TRUE; |
39037602 A |
421 | thread_call_enter_delayed(inpcb_thread_call, |
422 | deadline); | |
39236c6e A |
423 | } else { |
424 | inpcb_fast_timer_on = FALSE; | |
39037602 A |
425 | clock_interval_to_absolutetime_interval(offset, |
426 | NSEC_PER_SEC, &leeway); | |
427 | thread_call_enter_delayed_with_leeway( | |
428 | inpcb_thread_call, NULL, deadline, leeway, | |
429 | THREAD_CALL_DELAY_LEEWAY); | |
39236c6e A |
430 | } |
431 | } else if (inpcb_timeout_run == 1 && | |
39037602 | 432 | offset == 0 && !inpcb_fast_timer_on) { |
39236c6e A |
433 | /* |
434 | * Since the request was for a fast timer but the | |
435 | * scheduled timer is a lazy timer, try to schedule | |
39037602 | 436 | * another instance of fast timer also. |
39236c6e A |
437 | */ |
438 | lck_mtx_convert_spin(&inpcb_timeout_lock); | |
439 | inpcb_timeout_run++; | |
440 | inpcb_fast_timer_on = TRUE; | |
39037602 | 441 | thread_call_enter_delayed(inpcb_fast_thread_call, deadline); |
39236c6e A |
442 | } |
443 | } | |
444 | ||
445 | void | |
446 | inpcb_gc_sched(struct inpcbinfo *ipi, u_int32_t type) | |
447 | { | |
fe8ab488 | 448 | u_int32_t gccnt; |
39037602 | 449 | |
39236c6e A |
450 | lck_mtx_lock_spin(&inpcb_timeout_lock); |
451 | inpcb_garbage_collecting = TRUE; | |
fe8ab488 A |
452 | gccnt = ipi->ipi_gc_req.intimer_nodelay + |
453 | ipi->ipi_gc_req.intimer_fast; | |
454 | ||
5ba3f43e A |
455 | if (gccnt > INPCB_GCREQ_THRESHOLD) { |
456 | type = INPCB_TIMER_FAST; | |
fe8ab488 A |
457 | } |
458 | ||
39236c6e A |
459 | switch (type) { |
460 | case INPCB_TIMER_NODELAY: | |
461 | atomic_add_32(&ipi->ipi_gc_req.intimer_nodelay, 1); | |
39037602 | 462 | inpcb_sched_timeout(); |
39236c6e A |
463 | break; |
464 | case INPCB_TIMER_FAST: | |
465 | atomic_add_32(&ipi->ipi_gc_req.intimer_fast, 1); | |
39037602 | 466 | inpcb_sched_timeout(); |
39236c6e A |
467 | break; |
468 | default: | |
469 | atomic_add_32(&ipi->ipi_gc_req.intimer_lazy, 1); | |
39037602 | 470 | inpcb_sched_lazy_timeout(); |
39236c6e A |
471 | break; |
472 | } | |
473 | lck_mtx_unlock(&inpcb_timeout_lock); | |
474 | } | |
475 | ||
476 | void | |
477 | inpcb_timer_sched(struct inpcbinfo *ipi, u_int32_t type) | |
478 | { | |
39037602 | 479 | |
39236c6e A |
480 | lck_mtx_lock_spin(&inpcb_timeout_lock); |
481 | inpcb_ticking = TRUE; | |
482 | switch (type) { | |
483 | case INPCB_TIMER_NODELAY: | |
484 | atomic_add_32(&ipi->ipi_timer_req.intimer_nodelay, 1); | |
39037602 | 485 | inpcb_sched_timeout(); |
39236c6e A |
486 | break; |
487 | case INPCB_TIMER_FAST: | |
488 | atomic_add_32(&ipi->ipi_timer_req.intimer_fast, 1); | |
39037602 | 489 | inpcb_sched_timeout(); |
39236c6e A |
490 | break; |
491 | default: | |
492 | atomic_add_32(&ipi->ipi_timer_req.intimer_lazy, 1); | |
39037602 | 493 | inpcb_sched_lazy_timeout(); |
39236c6e A |
494 | break; |
495 | } | |
496 | lck_mtx_unlock(&inpcb_timeout_lock); | |
497 | } | |
498 | ||
499 | void | |
500 | in_pcbinfo_attach(struct inpcbinfo *ipi) | |
501 | { | |
502 | struct inpcbinfo *ipi0; | |
503 | ||
504 | lck_mtx_lock(&inpcb_lock); | |
505 | TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) { | |
506 | if (ipi0 == ipi) { | |
507 | panic("%s: ipi %p already in the list\n", | |
508 | __func__, ipi); | |
509 | /* NOTREACHED */ | |
510 | } | |
511 | } | |
512 | TAILQ_INSERT_TAIL(&inpcb_head, ipi, ipi_entry); | |
513 | lck_mtx_unlock(&inpcb_lock); | |
514 | } | |
515 | ||
516 | int | |
517 | in_pcbinfo_detach(struct inpcbinfo *ipi) | |
518 | { | |
519 | struct inpcbinfo *ipi0; | |
520 | int error = 0; | |
521 | ||
522 | lck_mtx_lock(&inpcb_lock); | |
523 | TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) { | |
524 | if (ipi0 == ipi) | |
525 | break; | |
526 | } | |
527 | if (ipi0 != NULL) | |
528 | TAILQ_REMOVE(&inpcb_head, ipi0, ipi_entry); | |
529 | else | |
530 | error = ENXIO; | |
531 | lck_mtx_unlock(&inpcb_lock); | |
532 | ||
533 | return (error); | |
534 | } | |
535 | ||
1c79356b A |
536 | /* |
537 | * Allocate a PCB and associate it with the socket. | |
2d21ac55 A |
538 | * |
539 | * Returns: 0 Success | |
540 | * ENOBUFS | |
541 | * ENOMEM | |
1c79356b A |
542 | */ |
543 | int | |
39236c6e | 544 | in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p) |
1c79356b | 545 | { |
39236c6e | 546 | #pragma unused(p) |
2d21ac55 | 547 | struct inpcb *inp; |
39236c6e | 548 | caddr_t temp; |
2d21ac55 A |
549 | #if CONFIG_MACF_NET |
550 | int mac_error; | |
39236c6e | 551 | #endif /* CONFIG_MACF_NET */ |
1c79356b | 552 | |
3e170ce0 | 553 | if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) { |
39236c6e A |
554 | inp = (struct inpcb *)zalloc(pcbinfo->ipi_zone); |
555 | if (inp == NULL) | |
556 | return (ENOBUFS); | |
557 | bzero((caddr_t)inp, sizeof (*inp)); | |
558 | } else { | |
559 | inp = (struct inpcb *)(void *)so->so_saved_pcb; | |
560 | temp = inp->inp_saved_ppcb; | |
561 | bzero((caddr_t)inp, sizeof (*inp)); | |
562 | inp->inp_saved_ppcb = temp; | |
1c79356b A |
563 | } |
564 | ||
565 | inp->inp_gencnt = ++pcbinfo->ipi_gencnt; | |
566 | inp->inp_pcbinfo = pcbinfo; | |
567 | inp->inp_socket = so; | |
2d21ac55 A |
568 | #if CONFIG_MACF_NET |
569 | mac_error = mac_inpcb_label_init(inp, M_WAITOK); | |
570 | if (mac_error != 0) { | |
3e170ce0 | 571 | if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) |
2d21ac55 A |
572 | zfree(pcbinfo->ipi_zone, inp); |
573 | return (mac_error); | |
574 | } | |
575 | mac_inpcb_label_associate(so, inp); | |
39236c6e A |
576 | #endif /* CONFIG_MACF_NET */ |
577 | /* make sure inp_stat is always 64-bit aligned */ | |
578 | inp->inp_stat = (struct inp_stat *)P2ROUNDUP(inp->inp_stat_store, | |
579 | sizeof (u_int64_t)); | |
580 | if (((uintptr_t)inp->inp_stat - (uintptr_t)inp->inp_stat_store) + | |
581 | sizeof (*inp->inp_stat) > sizeof (inp->inp_stat_store)) { | |
582 | panic("%s: insufficient space to align inp_stat", __func__); | |
583 | /* NOTREACHED */ | |
584 | } | |
585 | ||
586 | /* make sure inp_cstat is always 64-bit aligned */ | |
587 | inp->inp_cstat = (struct inp_stat *)P2ROUNDUP(inp->inp_cstat_store, | |
588 | sizeof (u_int64_t)); | |
589 | if (((uintptr_t)inp->inp_cstat - (uintptr_t)inp->inp_cstat_store) + | |
590 | sizeof (*inp->inp_cstat) > sizeof (inp->inp_cstat_store)) { | |
591 | panic("%s: insufficient space to align inp_cstat", __func__); | |
592 | /* NOTREACHED */ | |
593 | } | |
594 | ||
595 | /* make sure inp_wstat is always 64-bit aligned */ | |
596 | inp->inp_wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_wstat_store, | |
597 | sizeof (u_int64_t)); | |
598 | if (((uintptr_t)inp->inp_wstat - (uintptr_t)inp->inp_wstat_store) + | |
599 | sizeof (*inp->inp_wstat) > sizeof (inp->inp_wstat_store)) { | |
600 | panic("%s: insufficient space to align inp_wstat", __func__); | |
601 | /* NOTREACHED */ | |
6d2010ae A |
602 | } |
603 | ||
fe8ab488 A |
604 | /* make sure inp_Wstat is always 64-bit aligned */ |
605 | inp->inp_Wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_Wstat_store, | |
606 | sizeof (u_int64_t)); | |
607 | if (((uintptr_t)inp->inp_Wstat - (uintptr_t)inp->inp_Wstat_store) + | |
608 | sizeof (*inp->inp_Wstat) > sizeof (inp->inp_Wstat_store)) { | |
609 | panic("%s: insufficient space to align inp_Wstat", __func__); | |
610 | /* NOTREACHED */ | |
611 | } | |
39037602 | 612 | |
91447636 A |
613 | so->so_pcb = (caddr_t)inp; |
614 | ||
615 | if (so->so_proto->pr_flags & PR_PCBLOCK) { | |
39236c6e A |
616 | lck_mtx_init(&inp->inpcb_mtx, pcbinfo->ipi_lock_grp, |
617 | pcbinfo->ipi_lock_attr); | |
91447636 A |
618 | } |
619 | ||
2d21ac55 | 620 | #if INET6 |
39236c6e | 621 | if (SOCK_DOM(so) == PF_INET6 && !ip6_mapped_addr_on) |
9bccf70c | 622 | inp->inp_flags |= IN6P_IPV6_V6ONLY; |
39236c6e | 623 | |
9bccf70c A |
624 | if (ip6_auto_flowlabel) |
625 | inp->inp_flags |= IN6P_AUTOFLOWLABEL; | |
39236c6e | 626 | #endif /* INET6 */ |
39037602 A |
627 | if (intcoproc_unrestricted) |
628 | inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED; | |
39236c6e A |
629 | |
630 | (void) inp_update_policy(inp); | |
631 | ||
632 | lck_rw_lock_exclusive(pcbinfo->ipi_lock); | |
91447636 | 633 | inp->inp_gencnt = ++pcbinfo->ipi_gencnt; |
39236c6e | 634 | LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list); |
91447636 | 635 | pcbinfo->ipi_count++; |
39236c6e | 636 | lck_rw_done(pcbinfo->ipi_lock); |
1c79356b A |
637 | return (0); |
638 | } | |
639 | ||
2d21ac55 | 640 | /* |
39236c6e A |
641 | * in_pcblookup_local_and_cleanup does everything |
642 | * in_pcblookup_local does but it checks for a socket | |
643 | * that's going away. Since we know that the lock is | |
644 | * held read+write when this funciton is called, we | |
645 | * can safely dispose of this socket like the slow | |
646 | * timer would usually do and return NULL. This is | |
647 | * great for bind. | |
648 | */ | |
649 | struct inpcb * | |
650 | in_pcblookup_local_and_cleanup(struct inpcbinfo *pcbinfo, struct in_addr laddr, | |
651 | u_int lport_arg, int wild_okay) | |
2d21ac55 A |
652 | { |
653 | struct inpcb *inp; | |
39236c6e | 654 | |
2d21ac55 A |
655 | /* Perform normal lookup */ |
656 | inp = in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay); | |
39236c6e | 657 | |
2d21ac55 | 658 | /* Check if we found a match but it's waiting to be disposed */ |
39236c6e | 659 | if (inp != NULL && inp->inp_wantcnt == WNT_STOPUSING) { |
2d21ac55 | 660 | struct socket *so = inp->inp_socket; |
39236c6e | 661 | |
5ba3f43e | 662 | socket_lock(so, 0); |
39236c6e | 663 | |
2d21ac55 | 664 | if (so->so_usecount == 0) { |
b0d623f7 A |
665 | if (inp->inp_state != INPCB_STATE_DEAD) |
666 | in_pcbdetach(inp); | |
39236c6e | 667 | in_pcbdispose(inp); /* will unlock & destroy */ |
2d21ac55 | 668 | inp = NULL; |
39236c6e | 669 | } else { |
5ba3f43e | 670 | socket_unlock(so, 0); |
2d21ac55 A |
671 | } |
672 | } | |
39236c6e A |
673 | |
674 | return (inp); | |
2d21ac55 A |
675 | } |
676 | ||
c910b4d9 | 677 | static void |
2d21ac55 A |
678 | in_pcb_conflict_post_msg(u_int16_t port) |
679 | { | |
39236c6e A |
680 | /* |
681 | * Radar 5523020 send a kernel event notification if a | |
682 | * non-participating socket tries to bind the port a socket | |
683 | * who has set SOF_NOTIFYCONFLICT owns. | |
2d21ac55 | 684 | */ |
39236c6e | 685 | struct kev_msg ev_msg; |
2d21ac55 A |
686 | struct kev_in_portinuse in_portinuse; |
687 | ||
39236c6e A |
688 | bzero(&in_portinuse, sizeof (struct kev_in_portinuse)); |
689 | bzero(&ev_msg, sizeof (struct kev_msg)); | |
2d21ac55 A |
690 | in_portinuse.port = ntohs(port); /* port in host order */ |
691 | in_portinuse.req_pid = proc_selfpid(); | |
692 | ev_msg.vendor_code = KEV_VENDOR_APPLE; | |
693 | ev_msg.kev_class = KEV_NETWORK_CLASS; | |
694 | ev_msg.kev_subclass = KEV_INET_SUBCLASS; | |
695 | ev_msg.event_code = KEV_INET_PORTINUSE; | |
696 | ev_msg.dv[0].data_ptr = &in_portinuse; | |
39236c6e | 697 | ev_msg.dv[0].data_length = sizeof (struct kev_in_portinuse); |
2d21ac55 | 698 | ev_msg.dv[1].data_length = 0; |
39037602 | 699 | dlil_post_complete_msg(NULL, &ev_msg); |
2d21ac55 | 700 | } |
39236c6e | 701 | |
2d21ac55 | 702 | /* |
39236c6e A |
703 | * Bind an INPCB to an address and/or port. This routine should not alter |
704 | * the caller-supplied local address "nam". | |
705 | * | |
2d21ac55 A |
706 | * Returns: 0 Success |
707 | * EADDRNOTAVAIL Address not available. | |
708 | * EINVAL Invalid argument | |
709 | * EAFNOSUPPORT Address family not supported [notdef] | |
710 | * EACCES Permission denied | |
711 | * EADDRINUSE Address in use | |
712 | * EAGAIN Resource unavailable, try again | |
6d2010ae | 713 | * priv_check_cred:EPERM Operation not permitted |
2d21ac55 | 714 | */ |
1c79356b | 715 | int |
2d21ac55 | 716 | in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) |
1c79356b | 717 | { |
2d21ac55 | 718 | struct socket *so = inp->inp_socket; |
9bccf70c | 719 | unsigned short *lastport; |
1c79356b | 720 | struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; |
b0d623f7 | 721 | u_short lport = 0, rand_port = 0; |
1c79356b | 722 | int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); |
b0d623f7 | 723 | int error, randomport, conflict = 0; |
fe8ab488 | 724 | boolean_t anonport = FALSE; |
6d2010ae | 725 | kauth_cred_t cred; |
fe8ab488 A |
726 | struct in_addr laddr; |
727 | struct ifnet *outif = NULL; | |
1c79356b A |
728 | |
729 | if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */ | |
730 | return (EADDRNOTAVAIL); | |
39236c6e | 731 | if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT))) |
1c79356b | 732 | wild = 1; |
fe8ab488 A |
733 | |
734 | bzero(&laddr, sizeof(laddr)); | |
735 | ||
4bd07ac2 A |
736 | socket_unlock(so, 0); /* keep reference on socket */ |
737 | lck_rw_lock_exclusive(pcbinfo->ipi_lock); | |
a39ff7e2 A |
738 | if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) { |
739 | /* another thread completed the bind */ | |
740 | lck_rw_done(pcbinfo->ipi_lock); | |
741 | socket_lock(so, 0); | |
742 | return (EINVAL); | |
743 | } | |
4bd07ac2 | 744 | |
39236c6e | 745 | if (nam != NULL) { |
39236c6e A |
746 | if (nam->sa_len != sizeof (struct sockaddr_in)) { |
747 | lck_rw_done(pcbinfo->ipi_lock); | |
91447636 | 748 | socket_lock(so, 0); |
1c79356b | 749 | return (EINVAL); |
91447636 | 750 | } |
39236c6e | 751 | #if 0 |
1c79356b A |
752 | /* |
753 | * We should check the family, but old programs | |
754 | * incorrectly fail to initialize it. | |
755 | */ | |
39236c6e A |
756 | if (nam->sa_family != AF_INET) { |
757 | lck_rw_done(pcbinfo->ipi_lock); | |
91447636 | 758 | socket_lock(so, 0); |
1c79356b | 759 | return (EAFNOSUPPORT); |
91447636 | 760 | } |
39236c6e A |
761 | #endif /* 0 */ |
762 | lport = SIN(nam)->sin_port; | |
763 | ||
764 | if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr))) { | |
1c79356b A |
765 | /* |
766 | * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; | |
767 | * allow complete duplication of binding if | |
768 | * SO_REUSEPORT is set, or if SO_REUSEADDR is set | |
769 | * and a multicast address is bound on both | |
770 | * new and duplicated sockets. | |
771 | */ | |
772 | if (so->so_options & SO_REUSEADDR) | |
773 | reuseport = SO_REUSEADDR|SO_REUSEPORT; | |
39236c6e A |
774 | } else if (SIN(nam)->sin_addr.s_addr != INADDR_ANY) { |
775 | struct sockaddr_in sin; | |
91447636 | 776 | struct ifaddr *ifa; |
39236c6e A |
777 | |
778 | /* Sanitized for interface address searches */ | |
779 | bzero(&sin, sizeof (sin)); | |
780 | sin.sin_family = AF_INET; | |
781 | sin.sin_len = sizeof (struct sockaddr_in); | |
782 | sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr; | |
783 | ||
784 | ifa = ifa_ifwithaddr(SA(&sin)); | |
785 | if (ifa == NULL) { | |
786 | lck_rw_done(pcbinfo->ipi_lock); | |
91447636 | 787 | socket_lock(so, 0); |
1c79356b | 788 | return (EADDRNOTAVAIL); |
39236c6e A |
789 | } else { |
790 | /* | |
791 | * Opportunistically determine the outbound | |
792 | * interface that may be used; this may not | |
793 | * hold true if we end up using a route | |
794 | * going over a different interface, e.g. | |
795 | * when sending to a local address. This | |
796 | * will get updated again after sending. | |
797 | */ | |
6d2010ae | 798 | IFA_LOCK(ifa); |
316670eb | 799 | outif = ifa->ifa_ifp; |
6d2010ae A |
800 | IFA_UNLOCK(ifa); |
801 | IFA_REMREF(ifa); | |
91447636 | 802 | } |
1c79356b | 803 | } |
39236c6e | 804 | if (lport != 0) { |
1c79356b | 805 | struct inpcb *t; |
39236c6e | 806 | uid_t u; |
1c79356b | 807 | |
5ba3f43e | 808 | #if !CONFIG_EMBEDDED |
6d2010ae A |
809 | if (ntohs(lport) < IPPORT_RESERVED) { |
810 | cred = kauth_cred_proc_ref(p); | |
39236c6e A |
811 | error = priv_check_cred(cred, |
812 | PRIV_NETINET_RESERVEDPORT, 0); | |
6d2010ae A |
813 | kauth_cred_unref(&cred); |
814 | if (error != 0) { | |
39236c6e | 815 | lck_rw_done(pcbinfo->ipi_lock); |
6d2010ae A |
816 | socket_lock(so, 0); |
817 | return (EACCES); | |
818 | } | |
91447636 | 819 | } |
5ba3f43e | 820 | #endif /* !CONFIG_EMBEDDED */ |
39236c6e A |
821 | if (!IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) && |
822 | (u = kauth_cred_getuid(so->so_cred)) != 0 && | |
823 | (t = in_pcblookup_local_and_cleanup( | |
824 | inp->inp_pcbinfo, SIN(nam)->sin_addr, lport, | |
825 | INPLOOKUP_WILDCARD)) != NULL && | |
826 | (SIN(nam)->sin_addr.s_addr != INADDR_ANY || | |
827 | t->inp_laddr.s_addr != INADDR_ANY || | |
828 | !(t->inp_socket->so_options & SO_REUSEPORT)) && | |
829 | (u != kauth_cred_getuid(t->inp_socket->so_cred)) && | |
830 | !(t->inp_socket->so_flags & SOF_REUSESHAREUID) && | |
831 | (SIN(nam)->sin_addr.s_addr != INADDR_ANY || | |
832 | t->inp_laddr.s_addr != INADDR_ANY)) { | |
833 | if ((t->inp_socket->so_flags & | |
834 | SOF_NOTIFYCONFLICT) && | |
835 | !(so->so_flags & SOF_NOTIFYCONFLICT)) | |
836 | conflict = 1; | |
837 | ||
838 | lck_rw_done(pcbinfo->ipi_lock); | |
839 | ||
840 | if (conflict) | |
841 | in_pcb_conflict_post_msg(lport); | |
2d21ac55 | 842 | |
39236c6e A |
843 | socket_lock(so, 0); |
844 | return (EADDRINUSE); | |
1c79356b | 845 | } |
39236c6e A |
846 | t = in_pcblookup_local_and_cleanup(pcbinfo, |
847 | SIN(nam)->sin_addr, lport, wild); | |
848 | if (t != NULL && | |
1c79356b A |
849 | (reuseport & t->inp_socket->so_options) == 0) { |
850 | #if INET6 | |
39236c6e A |
851 | if (SIN(nam)->sin_addr.s_addr != INADDR_ANY || |
852 | t->inp_laddr.s_addr != INADDR_ANY || | |
853 | SOCK_DOM(so) != PF_INET6 || | |
854 | SOCK_DOM(t->inp_socket) != PF_INET6) | |
2d21ac55 A |
855 | #endif /* INET6 */ |
856 | { | |
2d21ac55 | 857 | |
39236c6e A |
858 | if ((t->inp_socket->so_flags & |
859 | SOF_NOTIFYCONFLICT) && | |
860 | !(so->so_flags & SOF_NOTIFYCONFLICT)) | |
2d21ac55 A |
861 | conflict = 1; |
862 | ||
39236c6e | 863 | lck_rw_done(pcbinfo->ipi_lock); |
2d21ac55 A |
864 | |
865 | if (conflict) | |
866 | in_pcb_conflict_post_msg(lport); | |
91447636 A |
867 | socket_lock(so, 0); |
868 | return (EADDRINUSE); | |
869 | } | |
1c79356b A |
870 | } |
871 | } | |
fe8ab488 | 872 | laddr = SIN(nam)->sin_addr; |
1c79356b A |
873 | } |
874 | if (lport == 0) { | |
875 | u_short first, last; | |
876 | int count; | |
5ba3f43e | 877 | bool found; |
1c79356b | 878 | |
39236c6e A |
879 | randomport = (so->so_flags & SOF_BINDRANDOMPORT) || |
880 | (so->so_type == SOCK_STREAM ? tcp_use_randomport : | |
881 | udp_use_randomport); | |
882 | ||
883 | /* | |
fe8ab488 A |
884 | * Even though this looks similar to the code in |
885 | * in6_pcbsetport, the v6 vs v4 checks are different. | |
39236c6e | 886 | */ |
fe8ab488 | 887 | anonport = TRUE; |
1c79356b A |
888 | if (inp->inp_flags & INP_HIGHPORT) { |
889 | first = ipport_hifirstauto; /* sysctl */ | |
890 | last = ipport_hilastauto; | |
39236c6e | 891 | lastport = &pcbinfo->ipi_lasthi; |
1c79356b | 892 | } else if (inp->inp_flags & INP_LOWPORT) { |
6d2010ae | 893 | cred = kauth_cred_proc_ref(p); |
39236c6e A |
894 | error = priv_check_cred(cred, |
895 | PRIV_NETINET_RESERVEDPORT, 0); | |
6d2010ae A |
896 | kauth_cred_unref(&cred); |
897 | if (error != 0) { | |
39236c6e | 898 | lck_rw_done(pcbinfo->ipi_lock); |
91447636 | 899 | socket_lock(so, 0); |
39236c6e | 900 | return (error); |
91447636 | 901 | } |
1c79356b A |
902 | first = ipport_lowfirstauto; /* 1023 */ |
903 | last = ipport_lowlastauto; /* 600 */ | |
39236c6e | 904 | lastport = &pcbinfo->ipi_lastlow; |
1c79356b A |
905 | } else { |
906 | first = ipport_firstauto; /* sysctl */ | |
907 | last = ipport_lastauto; | |
39236c6e | 908 | lastport = &pcbinfo->ipi_lastport; |
1c79356b | 909 | } |
b0d623f7 A |
910 | /* No point in randomizing if only one port is available */ |
911 | ||
912 | if (first == last) | |
39236c6e | 913 | randomport = 0; |
1c79356b A |
914 | /* |
915 | * Simple check to ensure all ports are not used up causing | |
916 | * a deadlock here. | |
917 | * | |
918 | * We split the two cases (up and down) so that the direction | |
919 | * is not being tested on each round of the loop. | |
920 | */ | |
921 | if (first > last) { | |
5ba3f43e A |
922 | struct in_addr lookup_addr; |
923 | ||
1c79356b A |
924 | /* |
925 | * counting down | |
926 | */ | |
b0d623f7 | 927 | if (randomport) { |
5ba3f43e | 928 | read_frandom(&rand_port, sizeof (rand_port)); |
39236c6e A |
929 | *lastport = |
930 | first - (rand_port % (first - last)); | |
b0d623f7 | 931 | } |
1c79356b A |
932 | count = first - last; |
933 | ||
5ba3f43e A |
934 | lookup_addr = (laddr.s_addr != INADDR_ANY) ? laddr : |
935 | inp->inp_laddr; | |
936 | ||
937 | found = false; | |
1c79356b A |
938 | do { |
939 | if (count-- < 0) { /* completely used? */ | |
39236c6e | 940 | lck_rw_done(pcbinfo->ipi_lock); |
91447636 | 941 | socket_lock(so, 0); |
9bccf70c | 942 | return (EADDRNOTAVAIL); |
1c79356b A |
943 | } |
944 | --*lastport; | |
945 | if (*lastport > first || *lastport < last) | |
946 | *lastport = first; | |
947 | lport = htons(*lastport); | |
5ba3f43e A |
948 | |
949 | found = in_pcblookup_local_and_cleanup(pcbinfo, | |
950 | lookup_addr, lport, wild) == NULL; | |
951 | } while (!found); | |
1c79356b | 952 | } else { |
5ba3f43e A |
953 | struct in_addr lookup_addr; |
954 | ||
1c79356b A |
955 | /* |
956 | * counting up | |
957 | */ | |
b0d623f7 | 958 | if (randomport) { |
5ba3f43e | 959 | read_frandom(&rand_port, sizeof (rand_port)); |
39236c6e A |
960 | *lastport = |
961 | first + (rand_port % (first - last)); | |
b0d623f7 | 962 | } |
1c79356b A |
963 | count = last - first; |
964 | ||
5ba3f43e A |
965 | lookup_addr = (laddr.s_addr != INADDR_ANY) ? laddr : |
966 | inp->inp_laddr; | |
967 | ||
968 | found = false; | |
1c79356b A |
969 | do { |
970 | if (count-- < 0) { /* completely used? */ | |
39236c6e | 971 | lck_rw_done(pcbinfo->ipi_lock); |
91447636 | 972 | socket_lock(so, 0); |
9bccf70c | 973 | return (EADDRNOTAVAIL); |
1c79356b A |
974 | } |
975 | ++*lastport; | |
976 | if (*lastport < first || *lastport > last) | |
977 | *lastport = first; | |
978 | lport = htons(*lastport); | |
5ba3f43e A |
979 | |
980 | found = in_pcblookup_local_and_cleanup(pcbinfo, | |
981 | lookup_addr, lport, wild) == NULL; | |
982 | } while (!found); | |
1c79356b A |
983 | } |
984 | } | |
91447636 | 985 | socket_lock(so, 0); |
4bd07ac2 A |
986 | |
987 | /* | |
988 | * We unlocked socket's protocol lock for a long time. | |
989 | * The socket might have been dropped/defuncted. | |
990 | * Checking if world has changed since. | |
991 | */ | |
992 | if (inp->inp_state == INPCB_STATE_DEAD) { | |
993 | lck_rw_done(pcbinfo->ipi_lock); | |
994 | return (ECONNABORTED); | |
995 | } | |
996 | ||
fe8ab488 A |
997 | if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) { |
998 | lck_rw_done(pcbinfo->ipi_lock); | |
999 | return (EINVAL); | |
1000 | } | |
1001 | ||
1002 | if (laddr.s_addr != INADDR_ANY) { | |
1003 | inp->inp_laddr = laddr; | |
1004 | inp->inp_last_outifp = outif; | |
1005 | } | |
1c79356b | 1006 | inp->inp_lport = lport; |
fe8ab488 A |
1007 | if (anonport) |
1008 | inp->inp_flags |= INP_ANONPORT; | |
1009 | ||
91447636 | 1010 | if (in_pcbinshash(inp, 1) != 0) { |
1c79356b | 1011 | inp->inp_laddr.s_addr = INADDR_ANY; |
316670eb | 1012 | inp->inp_last_outifp = NULL; |
fe8ab488 A |
1013 | |
1014 | inp->inp_lport = 0; | |
1015 | if (anonport) | |
1016 | inp->inp_flags &= ~INP_ANONPORT; | |
39236c6e | 1017 | lck_rw_done(pcbinfo->ipi_lock); |
1c79356b A |
1018 | return (EAGAIN); |
1019 | } | |
39236c6e | 1020 | lck_rw_done(pcbinfo->ipi_lock); |
2d21ac55 | 1021 | sflt_notify(so, sock_evt_bound, NULL); |
1c79356b A |
1022 | return (0); |
1023 | } | |
1024 | ||
39037602 A |
1025 | #define APN_FALLBACK_IP_FILTER(a) \ |
1026 | (IN_LINKLOCAL(ntohl((a)->sin_addr.s_addr)) || \ | |
1027 | IN_LOOPBACK(ntohl((a)->sin_addr.s_addr)) || \ | |
1028 | IN_ZERONET(ntohl((a)->sin_addr.s_addr)) || \ | |
1029 | IN_MULTICAST(ntohl((a)->sin_addr.s_addr)) || \ | |
1030 | IN_PRIVATE(ntohl((a)->sin_addr.s_addr))) | |
1031 | ||
1032 | #define APN_FALLBACK_NOTIF_INTERVAL 2 /* Magic Number */ | |
1033 | static uint64_t last_apn_fallback = 0; | |
1034 | ||
1035 | static boolean_t | |
1036 | apn_fallback_required (proc_t proc, struct socket *so, struct sockaddr_in *p_dstv4) | |
1037 | { | |
1038 | uint64_t timenow; | |
1039 | struct sockaddr_storage lookup_default_addr; | |
1040 | struct rtentry *rt = NULL; | |
1041 | ||
1042 | VERIFY(proc != NULL); | |
1043 | ||
1044 | if (apn_fallbk_enabled == FALSE) | |
1045 | return FALSE; | |
1046 | ||
1047 | if (proc == kernproc) | |
1048 | return FALSE; | |
1049 | ||
1050 | if (so && (so->so_options & SO_NOAPNFALLBK)) | |
1051 | return FALSE; | |
1052 | ||
1053 | timenow = net_uptime(); | |
1054 | if ((timenow - last_apn_fallback) < APN_FALLBACK_NOTIF_INTERVAL) { | |
1055 | apn_fallbk_log((LOG_INFO, "APN fallback notification throttled.\n")); | |
1056 | return FALSE; | |
1057 | } | |
1058 | ||
1059 | if (p_dstv4 && APN_FALLBACK_IP_FILTER(p_dstv4)) | |
1060 | return FALSE; | |
1061 | ||
1062 | /* Check if we have unscoped IPv6 default route through cellular */ | |
1063 | bzero(&lookup_default_addr, sizeof(lookup_default_addr)); | |
1064 | lookup_default_addr.ss_family = AF_INET6; | |
1065 | lookup_default_addr.ss_len = sizeof(struct sockaddr_in6); | |
1066 | ||
1067 | rt = rtalloc1((struct sockaddr *)&lookup_default_addr, 0, 0); | |
1068 | if (NULL == rt) { | |
1069 | apn_fallbk_log((LOG_INFO, "APN fallback notification could not find " | |
1070 | "unscoped default IPv6 route.\n")); | |
1071 | return FALSE; | |
1072 | } | |
1073 | ||
1074 | if (!IFNET_IS_CELLULAR(rt->rt_ifp)) { | |
1075 | rtfree(rt); | |
1076 | apn_fallbk_log((LOG_INFO, "APN fallback notification could not find " | |
1077 | "unscoped default IPv6 route through cellular interface.\n")); | |
1078 | return FALSE; | |
1079 | } | |
1080 | ||
1081 | /* | |
1082 | * We have a default IPv6 route, ensure that | |
1083 | * we do not have IPv4 default route before triggering | |
1084 | * the event | |
1085 | */ | |
1086 | rtfree(rt); | |
1087 | rt = NULL; | |
1088 | ||
1089 | bzero(&lookup_default_addr, sizeof(lookup_default_addr)); | |
1090 | lookup_default_addr.ss_family = AF_INET; | |
1091 | lookup_default_addr.ss_len = sizeof(struct sockaddr_in); | |
1092 | ||
1093 | rt = rtalloc1((struct sockaddr *)&lookup_default_addr, 0, 0); | |
1094 | ||
1095 | if (rt) { | |
1096 | rtfree(rt); | |
1097 | rt = NULL; | |
1098 | apn_fallbk_log((LOG_INFO, "APN fallback notification found unscoped " | |
1099 | "IPv4 default route!\n")); | |
1100 | return FALSE; | |
1101 | } | |
1102 | ||
1103 | { | |
1104 | /* | |
1105 | * We disable APN fallback if the binary is not a third-party app. | |
1106 | * Note that platform daemons use their process name as a | |
1107 | * bundle ID so we filter out bundle IDs without dots. | |
1108 | */ | |
1109 | const char *bundle_id = cs_identity_get(proc); | |
1110 | if (bundle_id == NULL || | |
1111 | bundle_id[0] == '\0' || | |
1112 | strchr(bundle_id, '.') == NULL || | |
1113 | strncmp(bundle_id, "com.apple.", sizeof("com.apple.") - 1) == 0) { | |
1114 | apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found first-" | |
1115 | "party bundle ID \"%s\"!\n", (bundle_id ? bundle_id : "NULL"))); | |
1116 | return FALSE; | |
1117 | } | |
1118 | } | |
1119 | ||
1120 | { | |
1121 | /* | |
1122 | * The Apple App Store IPv6 requirement started on | |
1123 | * June 1st, 2016 at 12:00:00 AM PDT. | |
1124 | * We disable APN fallback if the binary is more recent than that. | |
1125 | * We check both atime and birthtime since birthtime is not always supported. | |
1126 | */ | |
1127 | static const long ipv6_start_date = 1464764400L; | |
1128 | vfs_context_t context; | |
1129 | struct stat64 sb; | |
1130 | int vn_stat_error; | |
1131 | ||
1132 | bzero(&sb, sizeof(struct stat64)); | |
1133 | context = vfs_context_create(NULL); | |
1134 | vn_stat_error = vn_stat(proc->p_textvp, &sb, NULL, 1, context); | |
1135 | (void)vfs_context_rele(context); | |
1136 | ||
1137 | if (vn_stat_error != 0 || | |
1138 | sb.st_atimespec.tv_sec >= ipv6_start_date || | |
1139 | sb.st_birthtimespec.tv_sec >= ipv6_start_date) { | |
1140 | apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found binary " | |
1141 | "too recent! (err %d atime %ld mtime %ld ctime %ld birthtime %ld)\n", | |
1142 | vn_stat_error, sb.st_atimespec.tv_sec, sb.st_mtimespec.tv_sec, | |
1143 | sb.st_ctimespec.tv_sec, sb.st_birthtimespec.tv_sec)); | |
1144 | return FALSE; | |
1145 | } | |
1146 | } | |
1147 | return TRUE; | |
1148 | } | |
1149 | ||
1150 | static void | |
1151 | apn_fallback_trigger(proc_t proc) | |
1152 | { | |
1153 | pid_t pid = 0; | |
1154 | struct kev_msg ev_msg; | |
1155 | struct kev_netevent_apnfallbk_data apnfallbk_data; | |
1156 | ||
1157 | last_apn_fallback = net_uptime(); | |
1158 | pid = proc_pid(proc); | |
1159 | uuid_t application_uuid; | |
1160 | uuid_clear(application_uuid); | |
1161 | proc_getexecutableuuid(proc, application_uuid, | |
1162 | sizeof(application_uuid)); | |
1163 | ||
1164 | bzero(&ev_msg, sizeof (struct kev_msg)); | |
1165 | ev_msg.vendor_code = KEV_VENDOR_APPLE; | |
1166 | ev_msg.kev_class = KEV_NETWORK_CLASS; | |
1167 | ev_msg.kev_subclass = KEV_NETEVENT_SUBCLASS; | |
1168 | ev_msg.event_code = KEV_NETEVENT_APNFALLBACK; | |
1169 | ||
1170 | bzero(&apnfallbk_data, sizeof(apnfallbk_data)); | |
1171 | apnfallbk_data.epid = pid; | |
1172 | uuid_copy(apnfallbk_data.euuid, application_uuid); | |
1173 | ||
1174 | ev_msg.dv[0].data_ptr = &apnfallbk_data; | |
1175 | ev_msg.dv[0].data_length = sizeof(apnfallbk_data); | |
1176 | kev_post_msg(&ev_msg); | |
1177 | apn_fallbk_log((LOG_INFO, "APN fallback notification issued.\n")); | |
1178 | } | |
1179 | ||
1c79356b | 1180 | /* |
39236c6e A |
1181 | * Transform old in_pcbconnect() into an inner subroutine for new |
1182 | * in_pcbconnect(); do some validity-checking on the remote address | |
1183 | * (in "nam") and then determine local host address (i.e., which | |
1184 | * interface) to use to access that remote host. | |
1185 | * | |
1186 | * This routine may alter the caller-supplied remote address "nam". | |
1c79356b | 1187 | * |
39236c6e A |
1188 | * The caller may override the bound-to-interface setting of the socket |
1189 | * by specifying the ifscope parameter (e.g. from IP_PKTINFO.) | |
1190 | * | |
1191 | * This routine might return an ifp with a reference held if the caller | |
1192 | * provides a non-NULL outif, even in the error case. The caller is | |
1193 | * responsible for releasing its reference. | |
2d21ac55 A |
1194 | * |
1195 | * Returns: 0 Success | |
1196 | * EINVAL Invalid argument | |
1197 | * EAFNOSUPPORT Address family not supported | |
1198 | * EADDRNOTAVAIL Address not available | |
1c79356b | 1199 | */ |
1c79356b | 1200 | int |
39236c6e | 1201 | in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr, |
39037602 | 1202 | unsigned int ifscope, struct ifnet **outif, int raw) |
1c79356b | 1203 | { |
39236c6e A |
1204 | struct route *ro = &inp->inp_route; |
1205 | struct in_ifaddr *ia = NULL; | |
1206 | struct sockaddr_in sin; | |
1207 | int error = 0; | |
fe8ab488 | 1208 | boolean_t restricted = FALSE; |
39236c6e A |
1209 | |
1210 | if (outif != NULL) | |
1211 | *outif = NULL; | |
1212 | if (nam->sa_len != sizeof (struct sockaddr_in)) | |
1c79356b | 1213 | return (EINVAL); |
39236c6e | 1214 | if (SIN(nam)->sin_family != AF_INET) |
1c79356b | 1215 | return (EAFNOSUPPORT); |
39037602 | 1216 | if (raw == 0 && SIN(nam)->sin_port == 0) |
1c79356b | 1217 | return (EADDRNOTAVAIL); |
b0d623f7 | 1218 | |
39236c6e A |
1219 | /* |
1220 | * If the destination address is INADDR_ANY, | |
1221 | * use the primary local address. | |
1222 | * If the supplied address is INADDR_BROADCAST, | |
1223 | * and the primary interface supports broadcast, | |
1224 | * choose the broadcast address for that interface. | |
1225 | */ | |
39037602 A |
1226 | if (raw == 0 && (SIN(nam)->sin_addr.s_addr == INADDR_ANY || |
1227 | SIN(nam)->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST)) { | |
39236c6e A |
1228 | lck_rw_lock_shared(in_ifaddr_rwlock); |
1229 | if (!TAILQ_EMPTY(&in_ifaddrhead)) { | |
1230 | ia = TAILQ_FIRST(&in_ifaddrhead); | |
1231 | IFA_LOCK_SPIN(&ia->ia_ifa); | |
1232 | if (SIN(nam)->sin_addr.s_addr == INADDR_ANY) { | |
1233 | SIN(nam)->sin_addr = IA_SIN(ia)->sin_addr; | |
1234 | } else if (ia->ia_ifp->if_flags & IFF_BROADCAST) { | |
1235 | SIN(nam)->sin_addr = | |
1236 | SIN(&ia->ia_broadaddr)->sin_addr; | |
1237 | } | |
1238 | IFA_UNLOCK(&ia->ia_ifa); | |
1239 | ia = NULL; | |
1240 | } | |
1241 | lck_rw_done(in_ifaddr_rwlock); | |
1242 | } | |
1243 | /* | |
1244 | * Otherwise, if the socket has already bound the source, just use it. | |
1245 | */ | |
1246 | if (inp->inp_laddr.s_addr != INADDR_ANY) { | |
1247 | VERIFY(ia == NULL); | |
1248 | *laddr = inp->inp_laddr; | |
1249 | return (0); | |
1c79356b | 1250 | } |
6d2010ae | 1251 | |
39236c6e A |
1252 | /* |
1253 | * If the ifscope is specified by the caller (e.g. IP_PKTINFO) | |
1254 | * then it overrides the sticky ifscope set for the socket. | |
1255 | */ | |
1256 | if (ifscope == IFSCOPE_NONE && (inp->inp_flags & INP_BOUND_IF)) | |
1257 | ifscope = inp->inp_boundifp->if_index; | |
6d2010ae | 1258 | |
39236c6e A |
1259 | /* |
1260 | * If route is known or can be allocated now, | |
1261 | * our src addr is taken from the i/f, else punt. | |
1262 | * Note that we should check the address family of the cached | |
1263 | * destination, in case of sharing the cache with IPv6. | |
1264 | */ | |
1265 | if (ro->ro_rt != NULL) | |
1266 | RT_LOCK_SPIN(ro->ro_rt); | |
1267 | if (ROUTE_UNUSABLE(ro) || ro->ro_dst.sa_family != AF_INET || | |
1268 | SIN(&ro->ro_dst)->sin_addr.s_addr != SIN(nam)->sin_addr.s_addr || | |
1269 | (inp->inp_socket->so_options & SO_DONTROUTE)) { | |
b0d623f7 | 1270 | if (ro->ro_rt != NULL) |
b0d623f7 | 1271 | RT_UNLOCK(ro->ro_rt); |
39236c6e A |
1272 | ROUTE_RELEASE(ro); |
1273 | } | |
1274 | if (!(inp->inp_socket->so_options & SO_DONTROUTE) && | |
1275 | (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL)) { | |
1276 | if (ro->ro_rt != NULL) | |
1277 | RT_UNLOCK(ro->ro_rt); | |
1278 | ROUTE_RELEASE(ro); | |
1279 | /* No route yet, so try to acquire one */ | |
1280 | bzero(&ro->ro_dst, sizeof (struct sockaddr_in)); | |
1281 | ro->ro_dst.sa_family = AF_INET; | |
1282 | ro->ro_dst.sa_len = sizeof (struct sockaddr_in); | |
1283 | SIN(&ro->ro_dst)->sin_addr = SIN(nam)->sin_addr; | |
1284 | rtalloc_scoped(ro, ifscope); | |
1285 | if (ro->ro_rt != NULL) | |
1286 | RT_LOCK_SPIN(ro->ro_rt); | |
1287 | } | |
1288 | /* Sanitized local copy for interface address searches */ | |
1289 | bzero(&sin, sizeof (sin)); | |
1290 | sin.sin_family = AF_INET; | |
1291 | sin.sin_len = sizeof (struct sockaddr_in); | |
1292 | sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr; | |
1293 | /* | |
1294 | * If we did not find (or use) a route, assume dest is reachable | |
1295 | * on a directly connected network and try to find a corresponding | |
1296 | * interface to take the source address from. | |
1297 | */ | |
1298 | if (ro->ro_rt == NULL) { | |
39037602 A |
1299 | proc_t proc = current_proc(); |
1300 | ||
39236c6e A |
1301 | VERIFY(ia == NULL); |
1302 | ia = ifatoia(ifa_ifwithdstaddr(SA(&sin))); | |
1303 | if (ia == NULL) | |
1304 | ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope)); | |
1305 | error = ((ia == NULL) ? ENETUNREACH : 0); | |
743345f9 | 1306 | |
39037602 A |
1307 | if (apn_fallback_required(proc, inp->inp_socket, |
1308 | (void *)nam)) | |
1309 | apn_fallback_trigger(proc); | |
1310 | ||
39236c6e A |
1311 | goto done; |
1312 | } | |
1313 | RT_LOCK_ASSERT_HELD(ro->ro_rt); | |
1314 | /* | |
1315 | * If the outgoing interface on the route found is not | |
1316 | * a loopback interface, use the address from that interface. | |
1317 | */ | |
1318 | if (!(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) { | |
1319 | VERIFY(ia == NULL); | |
6d2010ae A |
1320 | /* |
1321 | * If the route points to a cellular interface and the | |
1322 | * caller forbids our using interfaces of such type, | |
1323 | * pretend that there is no route. | |
fe8ab488 | 1324 | * Apply the same logic for expensive interfaces. |
6d2010ae | 1325 | */ |
fe8ab488 | 1326 | if (inp_restricted_send(inp, ro->ro_rt->rt_ifp)) { |
39236c6e A |
1327 | RT_UNLOCK(ro->ro_rt); |
1328 | ROUTE_RELEASE(ro); | |
1329 | error = EHOSTUNREACH; | |
fe8ab488 | 1330 | restricted = TRUE; |
39236c6e | 1331 | } else { |
6d2010ae A |
1332 | /* Become a regular mutex */ |
1333 | RT_CONVERT_LOCK(ro->ro_rt); | |
39236c6e A |
1334 | ia = ifatoia(ro->ro_rt->rt_ifa); |
1335 | IFA_ADDREF(&ia->ia_ifa); | |
b0d623f7 | 1336 | RT_UNLOCK(ro->ro_rt); |
39236c6e | 1337 | error = 0; |
91447636 | 1338 | } |
39236c6e A |
1339 | goto done; |
1340 | } | |
1341 | VERIFY(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK); | |
1342 | RT_UNLOCK(ro->ro_rt); | |
1343 | /* | |
1344 | * The outgoing interface is marked with 'loopback net', so a route | |
1345 | * to ourselves is here. | |
1346 | * Try to find the interface of the destination address and then | |
1347 | * take the address from there. That interface is not necessarily | |
1348 | * a loopback interface. | |
1349 | */ | |
1350 | VERIFY(ia == NULL); | |
1351 | ia = ifatoia(ifa_ifwithdstaddr(SA(&sin))); | |
1352 | if (ia == NULL) | |
1353 | ia = ifatoia(ifa_ifwithaddr_scoped(SA(&sin), ifscope)); | |
1354 | if (ia == NULL) | |
1355 | ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope)); | |
1356 | if (ia == NULL) { | |
1357 | RT_LOCK(ro->ro_rt); | |
1358 | ia = ifatoia(ro->ro_rt->rt_ifa); | |
1359 | if (ia != NULL) | |
1360 | IFA_ADDREF(&ia->ia_ifa); | |
1361 | RT_UNLOCK(ro->ro_rt); | |
1362 | } | |
1363 | error = ((ia == NULL) ? ENETUNREACH : 0); | |
1364 | ||
1365 | done: | |
1366 | /* | |
1367 | * If the destination address is multicast and an outgoing | |
1368 | * interface has been set as a multicast option, use the | |
1369 | * address of that interface as our source address. | |
1370 | */ | |
15129b1c | 1371 | if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) && |
39236c6e A |
1372 | inp->inp_moptions != NULL) { |
1373 | struct ip_moptions *imo; | |
1374 | struct ifnet *ifp; | |
1375 | ||
1376 | imo = inp->inp_moptions; | |
1377 | IMO_LOCK(imo); | |
1378 | if (imo->imo_multicast_ifp != NULL && (ia == NULL || | |
1379 | ia->ia_ifp != imo->imo_multicast_ifp)) { | |
1380 | ifp = imo->imo_multicast_ifp; | |
1381 | if (ia != NULL) | |
6d2010ae | 1382 | IFA_REMREF(&ia->ia_ifa); |
39236c6e A |
1383 | lck_rw_lock_shared(in_ifaddr_rwlock); |
1384 | TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { | |
1385 | if (ia->ia_ifp == ifp) | |
1386 | break; | |
6d2010ae | 1387 | } |
39236c6e A |
1388 | if (ia != NULL) |
1389 | IFA_ADDREF(&ia->ia_ifa); | |
1390 | lck_rw_done(in_ifaddr_rwlock); | |
1391 | if (ia == NULL) | |
1392 | error = EADDRNOTAVAIL; | |
15129b1c A |
1393 | else |
1394 | error = 0; | |
1c79356b | 1395 | } |
39236c6e A |
1396 | IMO_UNLOCK(imo); |
1397 | } | |
1398 | /* | |
1399 | * Don't do pcblookup call here; return interface in laddr | |
1400 | * and exit to caller, that will do the lookup. | |
1401 | */ | |
1402 | if (ia != NULL) { | |
1c79356b | 1403 | /* |
39236c6e A |
1404 | * If the source address belongs to a cellular interface |
1405 | * and the socket forbids our using interfaces of such | |
1406 | * type, pretend that there is no source address. | |
fe8ab488 | 1407 | * Apply the same logic for expensive interfaces. |
1c79356b | 1408 | */ |
39236c6e | 1409 | IFA_LOCK_SPIN(&ia->ia_ifa); |
fe8ab488 | 1410 | if (inp_restricted_send(inp, ia->ia_ifa.ifa_ifp)) { |
39236c6e A |
1411 | IFA_UNLOCK(&ia->ia_ifa); |
1412 | error = EHOSTUNREACH; | |
fe8ab488 | 1413 | restricted = TRUE; |
39236c6e A |
1414 | } else if (error == 0) { |
1415 | *laddr = ia->ia_addr.sin_addr; | |
1416 | if (outif != NULL) { | |
1417 | struct ifnet *ifp; | |
1418 | ||
1419 | if (ro->ro_rt != NULL) | |
1420 | ifp = ro->ro_rt->rt_ifp; | |
1421 | else | |
1422 | ifp = ia->ia_ifp; | |
1423 | ||
1424 | VERIFY(ifp != NULL); | |
1425 | IFA_CONVERT_LOCK(&ia->ia_ifa); | |
1426 | ifnet_reference(ifp); /* for caller */ | |
1427 | if (*outif != NULL) | |
1428 | ifnet_release(*outif); | |
1429 | *outif = ifp; | |
1c79356b | 1430 | } |
39236c6e A |
1431 | IFA_UNLOCK(&ia->ia_ifa); |
1432 | } else { | |
1433 | IFA_UNLOCK(&ia->ia_ifa); | |
1c79356b | 1434 | } |
6d2010ae | 1435 | IFA_REMREF(&ia->ia_ifa); |
39236c6e A |
1436 | ia = NULL; |
1437 | } | |
1438 | ||
fe8ab488 | 1439 | if (restricted && error == EHOSTUNREACH) { |
39236c6e A |
1440 | soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED | |
1441 | SO_FILT_HINT_IFDENIED)); | |
1c79356b | 1442 | } |
39236c6e A |
1443 | |
1444 | return (error); | |
1c79356b A |
1445 | } |
1446 | ||
1447 | /* | |
1448 | * Outer subroutine: | |
1449 | * Connect from a socket to a specified address. | |
1450 | * Both address and port must be specified in argument sin. | |
1451 | * If don't have a local address for this socket yet, | |
1452 | * then pick one. | |
39236c6e A |
1453 | * |
1454 | * The caller may override the bound-to-interface setting of the socket | |
1455 | * by specifying the ifscope parameter (e.g. from IP_PKTINFO.) | |
1c79356b A |
1456 | */ |
1457 | int | |
316670eb | 1458 | in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p, |
39236c6e | 1459 | unsigned int ifscope, struct ifnet **outif) |
1c79356b | 1460 | { |
39236c6e | 1461 | struct in_addr laddr; |
316670eb | 1462 | struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam; |
91447636 | 1463 | struct inpcb *pcb; |
1c79356b | 1464 | int error; |
fe8ab488 | 1465 | struct socket *so = inp->inp_socket; |
1c79356b A |
1466 | |
1467 | /* | |
1468 | * Call inner routine, to assign local interface address. | |
1469 | */ | |
39037602 | 1470 | if ((error = in_pcbladdr(inp, nam, &laddr, ifscope, outif, 0)) != 0) |
39236c6e | 1471 | return (error); |
1c79356b | 1472 | |
fe8ab488 | 1473 | socket_unlock(so, 0); |
91447636 | 1474 | pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port, |
39236c6e | 1475 | inp->inp_laddr.s_addr ? inp->inp_laddr : laddr, |
91447636 | 1476 | inp->inp_lport, 0, NULL); |
fe8ab488 | 1477 | socket_lock(so, 0); |
6d2010ae | 1478 | |
39236c6e A |
1479 | /* |
1480 | * Check if the socket is still in a valid state. When we unlock this | |
1481 | * embryonic socket, it can get aborted if another thread is closing | |
6d2010ae A |
1482 | * the listener (radar 7947600). |
1483 | */ | |
fe8ab488 | 1484 | if ((so->so_flags & SOF_ABORTED) != 0) |
39236c6e | 1485 | return (ECONNREFUSED); |
6d2010ae | 1486 | |
91447636 | 1487 | if (pcb != NULL) { |
0b4c1975 | 1488 | in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0); |
1c79356b A |
1489 | return (EADDRINUSE); |
1490 | } | |
1491 | if (inp->inp_laddr.s_addr == INADDR_ANY) { | |
9bccf70c | 1492 | if (inp->inp_lport == 0) { |
39236c6e | 1493 | error = in_pcbbind(inp, NULL, p); |
9bccf70c | 1494 | if (error) |
39236c6e | 1495 | return (error); |
9bccf70c | 1496 | } |
39236c6e A |
1497 | if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { |
1498 | /* | |
1499 | * Lock inversion issue, mostly with udp | |
1500 | * multicast packets. | |
1501 | */ | |
fe8ab488 | 1502 | socket_unlock(so, 0); |
39236c6e | 1503 | lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); |
fe8ab488 | 1504 | socket_lock(so, 0); |
91447636 | 1505 | } |
39236c6e A |
1506 | inp->inp_laddr = laddr; |
1507 | /* no reference needed */ | |
316670eb | 1508 | inp->inp_last_outifp = (outif != NULL) ? *outif : NULL; |
55e303ae | 1509 | inp->inp_flags |= INP_INADDR_ANY; |
39236c6e | 1510 | } else { |
3e170ce0 A |
1511 | /* |
1512 | * Usage of IP_PKTINFO, without local port already | |
1513 | * speficified will cause kernel to panic, | |
1514 | * see rdar://problem/18508185. | |
1515 | * For now returning error to avoid a kernel panic | |
1516 | * This routines can be refactored and handle this better | |
1517 | * in future. | |
1518 | */ | |
1519 | if (inp->inp_lport == 0) | |
1520 | return (EINVAL); | |
39236c6e A |
1521 | if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { |
1522 | /* | |
1523 | * Lock inversion issue, mostly with udp | |
1524 | * multicast packets. | |
1525 | */ | |
fe8ab488 | 1526 | socket_unlock(so, 0); |
39236c6e | 1527 | lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); |
fe8ab488 | 1528 | socket_lock(so, 0); |
91447636 | 1529 | } |
1c79356b A |
1530 | } |
1531 | inp->inp_faddr = sin->sin_addr; | |
1532 | inp->inp_fport = sin->sin_port; | |
fe8ab488 A |
1533 | if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) |
1534 | nstat_pcb_invalidate_cache(inp); | |
1c79356b | 1535 | in_pcbrehash(inp); |
39236c6e | 1536 | lck_rw_done(inp->inp_pcbinfo->ipi_lock); |
1c79356b A |
1537 | return (0); |
1538 | } | |
1539 | ||
1540 | void | |
2d21ac55 | 1541 | in_pcbdisconnect(struct inpcb *inp) |
1c79356b | 1542 | { |
39236c6e | 1543 | struct socket *so = inp->inp_socket; |
1c79356b | 1544 | |
fe8ab488 A |
1545 | if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) |
1546 | nstat_pcb_cache(inp); | |
1547 | ||
1c79356b A |
1548 | inp->inp_faddr.s_addr = INADDR_ANY; |
1549 | inp->inp_fport = 0; | |
91447636 | 1550 | |
39236c6e A |
1551 | if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { |
1552 | /* lock inversion issue, mostly with udp multicast packets */ | |
1553 | socket_unlock(so, 0); | |
1554 | lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); | |
1555 | socket_lock(so, 0); | |
91447636 A |
1556 | } |
1557 | ||
1c79356b | 1558 | in_pcbrehash(inp); |
39236c6e A |
1559 | lck_rw_done(inp->inp_pcbinfo->ipi_lock); |
1560 | /* | |
1561 | * A multipath subflow socket would have its SS_NOFDREF set by default, | |
1562 | * so check for SOF_MP_SUBFLOW socket flag before detaching the PCB; | |
1563 | * when the socket is closed for real, SOF_MP_SUBFLOW would be cleared. | |
1564 | */ | |
1565 | if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF)) | |
1c79356b A |
1566 | in_pcbdetach(inp); |
1567 | } | |
1568 | ||
1569 | void | |
2d21ac55 | 1570 | in_pcbdetach(struct inpcb *inp) |
1c79356b A |
1571 | { |
1572 | struct socket *so = inp->inp_socket; | |
1c79356b | 1573 | |
39236c6e A |
1574 | if (so->so_pcb == NULL) { |
1575 | /* PCB has been disposed */ | |
1576 | panic("%s: inp=%p so=%p proto=%d so_pcb is null!\n", __func__, | |
1577 | inp, so, SOCK_PROTO(so)); | |
1578 | /* NOTREACHED */ | |
91447636 | 1579 | } |
39037602 | 1580 | |
1c79356b | 1581 | #if IPSEC |
39236c6e A |
1582 | if (inp->inp_sp != NULL) { |
1583 | (void) ipsec4_delete_pcbpolicy(inp); | |
91447636 | 1584 | } |
39236c6e | 1585 | #endif /* IPSEC */ |
39037602 | 1586 | |
5ba3f43e A |
1587 | if (inp->inp_stat != NULL && SOCK_PROTO(so) == IPPROTO_UDP) { |
1588 | if (inp->inp_stat->rxpackets == 0 && inp->inp_stat->txpackets == 0) { | |
1589 | INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_inet_dgram_no_data); | |
1590 | } | |
1591 | } | |
1592 | ||
fe8ab488 A |
1593 | /* |
1594 | * Let NetworkStatistics know this PCB is going away | |
1595 | * before we detach it. | |
1596 | */ | |
39037602 | 1597 | if (nstat_collect && |
fe8ab488 A |
1598 | (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP)) |
1599 | nstat_pcb_detach(inp); | |
3e170ce0 A |
1600 | |
1601 | /* Free memory buffer held for generating keep alives */ | |
1602 | if (inp->inp_keepalive_data != NULL) { | |
1603 | FREE(inp->inp_keepalive_data, M_TEMP); | |
1604 | inp->inp_keepalive_data = NULL; | |
1605 | } | |
1606 | ||
91447636 | 1607 | /* mark socket state as dead */ |
39236c6e A |
1608 | if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) { |
1609 | panic("%s: so=%p proto=%d couldn't set to STOPUSING\n", | |
1610 | __func__, so, SOCK_PROTO(so)); | |
1611 | /* NOTREACHED */ | |
1612 | } | |
1c79356b | 1613 | |
39236c6e | 1614 | if (!(so->so_flags & SOF_PCBCLEARING)) { |
6d2010ae | 1615 | struct ip_moptions *imo; |
2d21ac55 | 1616 | |
91447636 | 1617 | inp->inp_vflag = 0; |
39236c6e A |
1618 | if (inp->inp_options != NULL) { |
1619 | (void) m_free(inp->inp_options); | |
1620 | inp->inp_options = NULL; | |
91447636 | 1621 | } |
39236c6e | 1622 | ROUTE_RELEASE(&inp->inp_route); |
6d2010ae | 1623 | imo = inp->inp_moptions; |
91447636 A |
1624 | inp->inp_moptions = NULL; |
1625 | sofreelastref(so, 0); | |
1626 | inp->inp_state = INPCB_STATE_DEAD; | |
39236c6e A |
1627 | /* makes sure we're not called twice from so_close */ |
1628 | so->so_flags |= SOF_PCBCLEARING; | |
1629 | ||
1630 | inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST); | |
39037602 A |
1631 | |
1632 | /* | |
1633 | * See inp_join_group() for why we need to unlock | |
1634 | */ | |
1635 | if (imo != NULL) { | |
1636 | socket_unlock(so, 0); | |
1637 | IMO_REMREF(imo); | |
1638 | socket_lock(so, 0); | |
1639 | } | |
91447636 A |
1640 | } |
1641 | } | |
1c79356b | 1642 | |
1c79356b | 1643 | |
39236c6e A |
1644 | void |
1645 | in_pcbdispose(struct inpcb *inp) | |
91447636 A |
1646 | { |
1647 | struct socket *so = inp->inp_socket; | |
1648 | struct inpcbinfo *ipi = inp->inp_pcbinfo; | |
1649 | ||
39236c6e A |
1650 | if (so != NULL && so->so_usecount != 0) { |
1651 | panic("%s: so %p [%d,%d] usecount %d lockhistory %s\n", | |
1652 | __func__, so, SOCK_DOM(so), SOCK_TYPE(so), so->so_usecount, | |
1653 | solockhistory_nr(so)); | |
1654 | /* NOTREACHED */ | |
1655 | } else if (inp->inp_wantcnt != WNT_STOPUSING) { | |
1656 | if (so != NULL) { | |
1657 | panic_plain("%s: inp %p invalid wantcnt %d, so %p " | |
1658 | "[%d,%d] usecount %d retaincnt %d state 0x%x " | |
1659 | "flags 0x%x lockhistory %s\n", __func__, inp, | |
1660 | inp->inp_wantcnt, so, SOCK_DOM(so), SOCK_TYPE(so), | |
1661 | so->so_usecount, so->so_retaincnt, so->so_state, | |
1662 | so->so_flags, solockhistory_nr(so)); | |
1663 | /* NOTREACHED */ | |
1664 | } else { | |
1665 | panic("%s: inp %p invalid wantcnt %d no socket\n", | |
1666 | __func__, inp, inp->inp_wantcnt); | |
1667 | /* NOTREACHED */ | |
1668 | } | |
91447636 | 1669 | } |
91447636 | 1670 | |
5ba3f43e | 1671 | LCK_RW_ASSERT(ipi->ipi_lock, LCK_RW_ASSERT_EXCLUSIVE); |
91447636 A |
1672 | |
1673 | inp->inp_gencnt = ++ipi->ipi_gencnt; | |
316670eb | 1674 | /* access ipi in in_pcbremlists */ |
91447636 | 1675 | in_pcbremlists(inp); |
316670eb | 1676 | |
39236c6e | 1677 | if (so != NULL) { |
91447636 A |
1678 | if (so->so_proto->pr_flags & PR_PCBLOCK) { |
1679 | sofreelastref(so, 0); | |
39236c6e A |
1680 | if (so->so_rcv.sb_cc > 0 || so->so_snd.sb_cc > 0) { |
1681 | /* | |
1682 | * selthreadclear() already called | |
1683 | * during sofreelastref() above. | |
1684 | */ | |
91447636 A |
1685 | sbrelease(&so->so_rcv); |
1686 | sbrelease(&so->so_snd); | |
1687 | } | |
39236c6e A |
1688 | if (so->so_head != NULL) { |
1689 | panic("%s: so=%p head still exist\n", | |
1690 | __func__, so); | |
1691 | /* NOTREACHED */ | |
1692 | } | |
1693 | lck_mtx_unlock(&inp->inpcb_mtx); | |
5ba3f43e A |
1694 | |
1695 | #if NECP | |
1696 | necp_inpcb_remove_cb(inp); | |
1697 | #endif /* NECP */ | |
1698 | ||
39236c6e | 1699 | lck_mtx_destroy(&inp->inpcb_mtx, ipi->ipi_lock_grp); |
9bccf70c | 1700 | } |
39236c6e A |
1701 | /* makes sure we're not called twice from so_close */ |
1702 | so->so_flags |= SOF_PCBCLEARING; | |
1703 | so->so_saved_pcb = (caddr_t)inp; | |
1704 | so->so_pcb = NULL; | |
1705 | inp->inp_socket = NULL; | |
2d21ac55 A |
1706 | #if CONFIG_MACF_NET |
1707 | mac_inpcb_label_destroy(inp); | |
39236c6e | 1708 | #endif /* CONFIG_MACF_NET */ |
39037602 A |
1709 | #if NECP |
1710 | necp_inpcb_dispose(inp); | |
1711 | #endif /* NECP */ | |
b0d623f7 A |
1712 | /* |
1713 | * In case there a route cached after a detach (possible | |
1714 | * in the tcp case), make sure that it is freed before | |
1715 | * we deallocate the structure. | |
1716 | */ | |
39236c6e | 1717 | ROUTE_RELEASE(&inp->inp_route); |
3e170ce0 | 1718 | if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) { |
91447636 | 1719 | zfree(ipi->ipi_zone, inp); |
55e303ae | 1720 | } |
91447636 | 1721 | sodealloc(so); |
9bccf70c | 1722 | } |
1c79356b A |
1723 | } |
1724 | ||
1725 | /* | |
39236c6e | 1726 | * The calling convention of in_getsockaddr() and in_getpeeraddr() was |
1c79356b A |
1727 | * modified to match the pru_sockaddr() and pru_peeraddr() entry points |
1728 | * in struct pr_usrreqs, so that protocols can just reference then directly | |
39236c6e | 1729 | * without the need for a wrapper function. |
1c79356b A |
1730 | */ |
1731 | int | |
39236c6e | 1732 | in_getsockaddr(struct socket *so, struct sockaddr **nam) |
1c79356b | 1733 | { |
2d21ac55 A |
1734 | struct inpcb *inp; |
1735 | struct sockaddr_in *sin; | |
1c79356b A |
1736 | |
1737 | /* | |
1738 | * Do the malloc first in case it blocks. | |
1739 | */ | |
39236c6e | 1740 | MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK); |
0b4e3aa0 | 1741 | if (sin == NULL) |
39236c6e A |
1742 | return (ENOBUFS); |
1743 | bzero(sin, sizeof (*sin)); | |
1c79356b | 1744 | sin->sin_family = AF_INET; |
39236c6e | 1745 | sin->sin_len = sizeof (*sin); |
1c79356b | 1746 | |
39236c6e | 1747 | if ((inp = sotoinpcb(so)) == NULL) { |
1c79356b | 1748 | FREE(sin, M_SONAME); |
39236c6e | 1749 | return (EINVAL); |
1c79356b A |
1750 | } |
1751 | sin->sin_port = inp->inp_lport; | |
1752 | sin->sin_addr = inp->inp_laddr; | |
1c79356b A |
1753 | |
1754 | *nam = (struct sockaddr *)sin; | |
39236c6e | 1755 | return (0); |
1c79356b A |
1756 | } |
1757 | ||
1758 | int | |
5ba3f43e | 1759 | in_getsockaddr_s(struct socket *so, struct sockaddr_in *ss) |
1c79356b | 1760 | { |
5ba3f43e | 1761 | struct sockaddr_in *sin = ss; |
1c79356b | 1762 | struct inpcb *inp; |
1c79356b | 1763 | |
39236c6e A |
1764 | VERIFY(ss != NULL); |
1765 | bzero(ss, sizeof (*ss)); | |
1766 | ||
1c79356b | 1767 | sin->sin_family = AF_INET; |
39236c6e | 1768 | sin->sin_len = sizeof (*sin); |
1c79356b | 1769 | |
5ba3f43e A |
1770 | if ((inp = sotoinpcb(so)) == NULL) |
1771 | return (EINVAL); | |
39236c6e A |
1772 | |
1773 | sin->sin_port = inp->inp_lport; | |
1774 | sin->sin_addr = inp->inp_laddr; | |
1775 | return (0); | |
1776 | } | |
1777 | ||
1778 | int | |
1779 | in_getpeeraddr(struct socket *so, struct sockaddr **nam) | |
1780 | { | |
1781 | struct inpcb *inp; | |
1782 | struct sockaddr_in *sin; | |
1783 | ||
1784 | /* | |
1785 | * Do the malloc first in case it blocks. | |
1786 | */ | |
1787 | MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK); | |
1788 | if (sin == NULL) | |
1789 | return (ENOBUFS); | |
1790 | bzero((caddr_t)sin, sizeof (*sin)); | |
1791 | sin->sin_family = AF_INET; | |
1792 | sin->sin_len = sizeof (*sin); | |
1793 | ||
1794 | if ((inp = sotoinpcb(so)) == NULL) { | |
1c79356b | 1795 | FREE(sin, M_SONAME); |
39236c6e | 1796 | return (EINVAL); |
1c79356b A |
1797 | } |
1798 | sin->sin_port = inp->inp_fport; | |
1799 | sin->sin_addr = inp->inp_faddr; | |
1c79356b A |
1800 | |
1801 | *nam = (struct sockaddr *)sin; | |
39236c6e A |
1802 | return (0); |
1803 | } | |
1804 | ||
1c79356b | 1805 | void |
2d21ac55 | 1806 | in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr, |
39236c6e | 1807 | int errno, void (*notify)(struct inpcb *, int)) |
1c79356b | 1808 | { |
91447636 A |
1809 | struct inpcb *inp; |
1810 | ||
39236c6e | 1811 | lck_rw_lock_shared(pcbinfo->ipi_lock); |
1c79356b | 1812 | |
39236c6e | 1813 | LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) { |
9bccf70c | 1814 | #if INET6 |
39236c6e | 1815 | if (!(inp->inp_vflag & INP_IPV4)) |
1c79356b | 1816 | continue; |
39236c6e | 1817 | #endif /* INET6 */ |
1c79356b | 1818 | if (inp->inp_faddr.s_addr != faddr.s_addr || |
9bccf70c | 1819 | inp->inp_socket == NULL) |
39236c6e A |
1820 | continue; |
1821 | if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) | |
91447636 A |
1822 | continue; |
1823 | socket_lock(inp->inp_socket, 1); | |
9bccf70c | 1824 | (*notify)(inp, errno); |
39236c6e | 1825 | (void) in_pcb_checkstate(inp, WNT_RELEASE, 1); |
91447636 | 1826 | socket_unlock(inp->inp_socket, 1); |
1c79356b | 1827 | } |
39236c6e | 1828 | lck_rw_done(pcbinfo->ipi_lock); |
1c79356b A |
1829 | } |
1830 | ||
1831 | /* | |
1832 | * Check for alternatives when higher level complains | |
1833 | * about service problems. For now, invalidate cached | |
1834 | * routing information. If the route was created dynamically | |
1835 | * (by a redirect), time to try a default gateway again. | |
1836 | */ | |
1837 | void | |
2d21ac55 | 1838 | in_losing(struct inpcb *inp) |
1c79356b | 1839 | { |
39236c6e | 1840 | boolean_t release = FALSE; |
2d21ac55 | 1841 | struct rtentry *rt; |
1c79356b | 1842 | |
b0d623f7 | 1843 | if ((rt = inp->inp_route.ro_rt) != NULL) { |
39236c6e | 1844 | struct in_ifaddr *ia = NULL; |
b0d623f7 | 1845 | |
b0d623f7 | 1846 | RT_LOCK(rt); |
b0d623f7 A |
1847 | if (rt->rt_flags & RTF_DYNAMIC) { |
1848 | /* | |
1849 | * Prevent another thread from modifying rt_key, | |
1850 | * rt_gateway via rt_setgate() after rt_lock is | |
1851 | * dropped by marking the route as defunct. | |
1852 | */ | |
1853 | rt->rt_flags |= RTF_CONDEMNED; | |
1854 | RT_UNLOCK(rt); | |
1855 | (void) rtrequest(RTM_DELETE, rt_key(rt), | |
39236c6e | 1856 | rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL); |
b0d623f7 A |
1857 | } else { |
1858 | RT_UNLOCK(rt); | |
1859 | } | |
2d21ac55 | 1860 | /* if the address is gone keep the old route in the pcb */ |
39236c6e A |
1861 | if (inp->inp_laddr.s_addr != INADDR_ANY && |
1862 | (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) { | |
1863 | /* | |
1864 | * Address is around; ditch the route. A new route | |
1865 | * can be allocated the next time output is attempted. | |
1866 | */ | |
1867 | release = TRUE; | |
2d21ac55 | 1868 | } |
39236c6e A |
1869 | if (ia != NULL) |
1870 | IFA_REMREF(&ia->ia_ifa); | |
1c79356b | 1871 | } |
39236c6e A |
1872 | if (rt == NULL || release) |
1873 | ROUTE_RELEASE(&inp->inp_route); | |
1c79356b A |
1874 | } |
1875 | ||
1876 | /* | |
1877 | * After a routing change, flush old routing | |
1878 | * and allocate a (hopefully) better one. | |
1879 | */ | |
9bccf70c | 1880 | void |
39236c6e | 1881 | in_rtchange(struct inpcb *inp, int errno) |
1c79356b | 1882 | { |
39236c6e A |
1883 | #pragma unused(errno) |
1884 | boolean_t release = FALSE; | |
2d21ac55 A |
1885 | struct rtentry *rt; |
1886 | ||
1887 | if ((rt = inp->inp_route.ro_rt) != NULL) { | |
39236c6e | 1888 | struct in_ifaddr *ia = NULL; |
b0d623f7 | 1889 | |
39236c6e A |
1890 | /* if address is gone, keep the old route */ |
1891 | if (inp->inp_laddr.s_addr != INADDR_ANY && | |
1892 | (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) { | |
1893 | /* | |
1894 | * Address is around; ditch the route. A new route | |
1895 | * can be allocated the next time output is attempted. | |
1896 | */ | |
1897 | release = TRUE; | |
2d21ac55 | 1898 | } |
39236c6e A |
1899 | if (ia != NULL) |
1900 | IFA_REMREF(&ia->ia_ifa); | |
1c79356b | 1901 | } |
39236c6e A |
1902 | if (rt == NULL || release) |
1903 | ROUTE_RELEASE(&inp->inp_route); | |
1c79356b A |
1904 | } |
1905 | ||
1906 | /* | |
1907 | * Lookup a PCB based on the local address and port. | |
1908 | */ | |
1909 | struct inpcb * | |
2d21ac55 | 1910 | in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr, |
39236c6e | 1911 | unsigned int lport_arg, int wild_okay) |
1c79356b | 1912 | { |
2d21ac55 | 1913 | struct inpcb *inp; |
1c79356b A |
1914 | int matchwild = 3, wildcard; |
1915 | u_short lport = lport_arg; | |
1916 | ||
39236c6e | 1917 | KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0, 0, 0, 0, 0); |
1c79356b A |
1918 | |
1919 | if (!wild_okay) { | |
1920 | struct inpcbhead *head; | |
1921 | /* | |
1922 | * Look for an unconnected (wildcard foreign addr) PCB that | |
1923 | * matches the local address and port we're looking for. | |
1924 | */ | |
39236c6e A |
1925 | head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, |
1926 | pcbinfo->ipi_hashmask)]; | |
9bccf70c A |
1927 | LIST_FOREACH(inp, head, inp_hash) { |
1928 | #if INET6 | |
39236c6e | 1929 | if (!(inp->inp_vflag & INP_IPV4)) |
1c79356b | 1930 | continue; |
39236c6e | 1931 | #endif /* INET6 */ |
1c79356b A |
1932 | if (inp->inp_faddr.s_addr == INADDR_ANY && |
1933 | inp->inp_laddr.s_addr == laddr.s_addr && | |
1934 | inp->inp_lport == lport) { | |
1935 | /* | |
1936 | * Found. | |
1937 | */ | |
1938 | return (inp); | |
1939 | } | |
1940 | } | |
1941 | /* | |
1942 | * Not found. | |
1943 | */ | |
39236c6e | 1944 | KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0, 0, 0, 0, 0); |
1c79356b A |
1945 | return (NULL); |
1946 | } else { | |
1947 | struct inpcbporthead *porthash; | |
1948 | struct inpcbport *phd; | |
1949 | struct inpcb *match = NULL; | |
1950 | /* | |
1951 | * Best fit PCB lookup. | |
1952 | * | |
1953 | * First see if this local port is in use by looking on the | |
1954 | * port hash list. | |
1955 | */ | |
39236c6e A |
1956 | porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport, |
1957 | pcbinfo->ipi_porthashmask)]; | |
9bccf70c | 1958 | LIST_FOREACH(phd, porthash, phd_hash) { |
1c79356b A |
1959 | if (phd->phd_port == lport) |
1960 | break; | |
1961 | } | |
1962 | if (phd != NULL) { | |
1963 | /* | |
1964 | * Port is in use by one or more PCBs. Look for best | |
1965 | * fit. | |
1966 | */ | |
9bccf70c | 1967 | LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { |
1c79356b | 1968 | wildcard = 0; |
9bccf70c | 1969 | #if INET6 |
39236c6e | 1970 | if (!(inp->inp_vflag & INP_IPV4)) |
1c79356b | 1971 | continue; |
39236c6e | 1972 | #endif /* INET6 */ |
1c79356b A |
1973 | if (inp->inp_faddr.s_addr != INADDR_ANY) |
1974 | wildcard++; | |
1975 | if (inp->inp_laddr.s_addr != INADDR_ANY) { | |
1976 | if (laddr.s_addr == INADDR_ANY) | |
1977 | wildcard++; | |
39236c6e A |
1978 | else if (inp->inp_laddr.s_addr != |
1979 | laddr.s_addr) | |
1c79356b A |
1980 | continue; |
1981 | } else { | |
1982 | if (laddr.s_addr != INADDR_ANY) | |
1983 | wildcard++; | |
1984 | } | |
1985 | if (wildcard < matchwild) { | |
1986 | match = inp; | |
1987 | matchwild = wildcard; | |
1988 | if (matchwild == 0) { | |
1989 | break; | |
1990 | } | |
1991 | } | |
1992 | } | |
1993 | } | |
39236c6e A |
1994 | KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match, |
1995 | 0, 0, 0, 0); | |
1c79356b A |
1996 | return (match); |
1997 | } | |
1998 | } | |
1999 | ||
6d2010ae A |
2000 | /* |
2001 | * Check if PCB exists in hash list. | |
2002 | */ | |
2003 | int | |
39236c6e A |
2004 | in_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in_addr faddr, |
2005 | u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard, | |
2006 | uid_t *uid, gid_t *gid, struct ifnet *ifp) | |
6d2010ae A |
2007 | { |
2008 | struct inpcbhead *head; | |
2009 | struct inpcb *inp; | |
2010 | u_short fport = fport_arg, lport = lport_arg; | |
39236c6e A |
2011 | int found = 0; |
2012 | struct inpcb *local_wild = NULL; | |
2013 | #if INET6 | |
2014 | struct inpcb *local_wild_mapped = NULL; | |
2015 | #endif /* INET6 */ | |
6d2010ae A |
2016 | |
2017 | *uid = UID_MAX; | |
2018 | *gid = GID_MAX; | |
316670eb | 2019 | |
6d2010ae A |
2020 | /* |
2021 | * We may have found the pcb in the last lookup - check this first. | |
2022 | */ | |
2023 | ||
39236c6e | 2024 | lck_rw_lock_shared(pcbinfo->ipi_lock); |
6d2010ae A |
2025 | |
2026 | /* | |
2027 | * First look for an exact match. | |
2028 | */ | |
39236c6e A |
2029 | head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, |
2030 | pcbinfo->ipi_hashmask)]; | |
6d2010ae A |
2031 | LIST_FOREACH(inp, head, inp_hash) { |
2032 | #if INET6 | |
39236c6e | 2033 | if (!(inp->inp_vflag & INP_IPV4)) |
6d2010ae | 2034 | continue; |
39236c6e | 2035 | #endif /* INET6 */ |
fe8ab488 | 2036 | if (inp_restricted_recv(inp, ifp)) |
316670eb A |
2037 | continue; |
2038 | ||
6d2010ae A |
2039 | if (inp->inp_faddr.s_addr == faddr.s_addr && |
2040 | inp->inp_laddr.s_addr == laddr.s_addr && | |
2041 | inp->inp_fport == fport && | |
2042 | inp->inp_lport == lport) { | |
2043 | if ((found = (inp->inp_socket != NULL))) { | |
2044 | /* | |
2045 | * Found. | |
2046 | */ | |
316670eb A |
2047 | *uid = kauth_cred_getuid( |
2048 | inp->inp_socket->so_cred); | |
2049 | *gid = kauth_cred_getgid( | |
2050 | inp->inp_socket->so_cred); | |
6d2010ae | 2051 | } |
39236c6e | 2052 | lck_rw_done(pcbinfo->ipi_lock); |
6d2010ae A |
2053 | return (found); |
2054 | } | |
2055 | } | |
6d2010ae | 2056 | |
39236c6e A |
2057 | if (!wildcard) { |
2058 | /* | |
2059 | * Not found. | |
2060 | */ | |
2061 | lck_rw_done(pcbinfo->ipi_lock); | |
2062 | return (0); | |
2063 | } | |
316670eb | 2064 | |
39236c6e A |
2065 | head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, |
2066 | pcbinfo->ipi_hashmask)]; | |
2067 | LIST_FOREACH(inp, head, inp_hash) { | |
6d2010ae | 2068 | #if INET6 |
39236c6e A |
2069 | if (!(inp->inp_vflag & INP_IPV4)) |
2070 | continue; | |
6d2010ae | 2071 | #endif /* INET6 */ |
fe8ab488 | 2072 | if (inp_restricted_recv(inp, ifp)) |
39236c6e A |
2073 | continue; |
2074 | ||
2075 | if (inp->inp_faddr.s_addr == INADDR_ANY && | |
2076 | inp->inp_lport == lport) { | |
2077 | if (inp->inp_laddr.s_addr == laddr.s_addr) { | |
2078 | if ((found = (inp->inp_socket != NULL))) { | |
316670eb | 2079 | *uid = kauth_cred_getuid( |
39236c6e | 2080 | inp->inp_socket->so_cred); |
316670eb | 2081 | *gid = kauth_cred_getgid( |
39236c6e | 2082 | inp->inp_socket->so_cred); |
6d2010ae | 2083 | } |
39236c6e | 2084 | lck_rw_done(pcbinfo->ipi_lock); |
6d2010ae | 2085 | return (found); |
39236c6e A |
2086 | } else if (inp->inp_laddr.s_addr == INADDR_ANY) { |
2087 | #if INET6 | |
2088 | if (inp->inp_socket && | |
2089 | SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) | |
2090 | local_wild_mapped = inp; | |
2091 | else | |
6d2010ae | 2092 | #endif /* INET6 */ |
39236c6e A |
2093 | local_wild = inp; |
2094 | } | |
6d2010ae | 2095 | } |
39236c6e A |
2096 | } |
2097 | if (local_wild == NULL) { | |
2098 | #if INET6 | |
2099 | if (local_wild_mapped != NULL) { | |
2100 | if ((found = (local_wild_mapped->inp_socket != NULL))) { | |
316670eb | 2101 | *uid = kauth_cred_getuid( |
39236c6e | 2102 | local_wild_mapped->inp_socket->so_cred); |
316670eb | 2103 | *gid = kauth_cred_getgid( |
39236c6e | 2104 | local_wild_mapped->inp_socket->so_cred); |
6d2010ae | 2105 | } |
39236c6e | 2106 | lck_rw_done(pcbinfo->ipi_lock); |
6d2010ae A |
2107 | return (found); |
2108 | } | |
39236c6e A |
2109 | #endif /* INET6 */ |
2110 | lck_rw_done(pcbinfo->ipi_lock); | |
2111 | return (0); | |
6d2010ae | 2112 | } |
39236c6e A |
2113 | if ((found = (local_wild->inp_socket != NULL))) { |
2114 | *uid = kauth_cred_getuid( | |
2115 | local_wild->inp_socket->so_cred); | |
2116 | *gid = kauth_cred_getgid( | |
2117 | local_wild->inp_socket->so_cred); | |
2118 | } | |
2119 | lck_rw_done(pcbinfo->ipi_lock); | |
2120 | return (found); | |
6d2010ae A |
2121 | } |
2122 | ||
1c79356b A |
2123 | /* |
2124 | * Lookup PCB in hash list. | |
2125 | */ | |
2126 | struct inpcb * | |
39236c6e A |
2127 | in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr, |
2128 | u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard, | |
2129 | struct ifnet *ifp) | |
1c79356b A |
2130 | { |
2131 | struct inpcbhead *head; | |
2d21ac55 | 2132 | struct inpcb *inp; |
1c79356b | 2133 | u_short fport = fport_arg, lport = lport_arg; |
39236c6e A |
2134 | struct inpcb *local_wild = NULL; |
2135 | #if INET6 | |
2136 | struct inpcb *local_wild_mapped = NULL; | |
2137 | #endif /* INET6 */ | |
1c79356b A |
2138 | |
2139 | /* | |
2140 | * We may have found the pcb in the last lookup - check this first. | |
2141 | */ | |
2142 | ||
39236c6e | 2143 | lck_rw_lock_shared(pcbinfo->ipi_lock); |
1c79356b A |
2144 | |
2145 | /* | |
2146 | * First look for an exact match. | |
2147 | */ | |
39236c6e A |
2148 | head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, |
2149 | pcbinfo->ipi_hashmask)]; | |
9bccf70c A |
2150 | LIST_FOREACH(inp, head, inp_hash) { |
2151 | #if INET6 | |
39236c6e | 2152 | if (!(inp->inp_vflag & INP_IPV4)) |
1c79356b | 2153 | continue; |
39236c6e | 2154 | #endif /* INET6 */ |
fe8ab488 | 2155 | if (inp_restricted_recv(inp, ifp)) |
316670eb A |
2156 | continue; |
2157 | ||
1c79356b A |
2158 | if (inp->inp_faddr.s_addr == faddr.s_addr && |
2159 | inp->inp_laddr.s_addr == laddr.s_addr && | |
2160 | inp->inp_fport == fport && | |
2161 | inp->inp_lport == lport) { | |
2162 | /* | |
2163 | * Found. | |
2164 | */ | |
39236c6e A |
2165 | if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != |
2166 | WNT_STOPUSING) { | |
2167 | lck_rw_done(pcbinfo->ipi_lock); | |
91447636 | 2168 | return (inp); |
39236c6e A |
2169 | } else { |
2170 | /* it's there but dead, say it isn't found */ | |
2171 | lck_rw_done(pcbinfo->ipi_lock); | |
316670eb | 2172 | return (NULL); |
91447636 | 2173 | } |
1c79356b A |
2174 | } |
2175 | } | |
1c79356b | 2176 | |
39236c6e A |
2177 | if (!wildcard) { |
2178 | /* | |
2179 | * Not found. | |
2180 | */ | |
2181 | lck_rw_done(pcbinfo->ipi_lock); | |
2182 | return (NULL); | |
2183 | } | |
2184 | ||
2185 | head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, | |
2186 | pcbinfo->ipi_hashmask)]; | |
2187 | LIST_FOREACH(inp, head, inp_hash) { | |
9bccf70c | 2188 | #if INET6 |
39236c6e A |
2189 | if (!(inp->inp_vflag & INP_IPV4)) |
2190 | continue; | |
2191 | #endif /* INET6 */ | |
fe8ab488 | 2192 | if (inp_restricted_recv(inp, ifp)) |
39236c6e A |
2193 | continue; |
2194 | ||
2195 | if (inp->inp_faddr.s_addr == INADDR_ANY && | |
2196 | inp->inp_lport == lport) { | |
2197 | if (inp->inp_laddr.s_addr == laddr.s_addr) { | |
2198 | if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != | |
2199 | WNT_STOPUSING) { | |
2200 | lck_rw_done(pcbinfo->ipi_lock); | |
2201 | return (inp); | |
2202 | } else { | |
2203 | /* it's dead; say it isn't found */ | |
2204 | lck_rw_done(pcbinfo->ipi_lock); | |
2205 | return (NULL); | |
91447636 | 2206 | } |
39236c6e | 2207 | } else if (inp->inp_laddr.s_addr == INADDR_ANY) { |
2d21ac55 | 2208 | #if INET6 |
39236c6e A |
2209 | if (SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) |
2210 | local_wild_mapped = inp; | |
2211 | else | |
2d21ac55 | 2212 | #endif /* INET6 */ |
1c79356b | 2213 | local_wild = inp; |
1c79356b A |
2214 | } |
2215 | } | |
39236c6e A |
2216 | } |
2217 | if (local_wild == NULL) { | |
2d21ac55 | 2218 | #if INET6 |
39236c6e A |
2219 | if (local_wild_mapped != NULL) { |
2220 | if (in_pcb_checkstate(local_wild_mapped, | |
2221 | WNT_ACQUIRE, 0) != WNT_STOPUSING) { | |
2222 | lck_rw_done(pcbinfo->ipi_lock); | |
2223 | return (local_wild_mapped); | |
2224 | } else { | |
2225 | /* it's dead; say it isn't found */ | |
2226 | lck_rw_done(pcbinfo->ipi_lock); | |
2227 | return (NULL); | |
91447636 | 2228 | } |
91447636 | 2229 | } |
39236c6e A |
2230 | #endif /* INET6 */ |
2231 | lck_rw_done(pcbinfo->ipi_lock); | |
2232 | return (NULL); | |
2233 | } | |
2234 | if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) { | |
2235 | lck_rw_done(pcbinfo->ipi_lock); | |
2236 | return (local_wild); | |
1c79356b | 2237 | } |
1c79356b | 2238 | /* |
39236c6e | 2239 | * It's either not found or is already dead. |
1c79356b | 2240 | */ |
39236c6e | 2241 | lck_rw_done(pcbinfo->ipi_lock); |
1c79356b A |
2242 | return (NULL); |
2243 | } | |
2244 | ||
2245 | /* | |
4bd07ac2 A |
2246 | * @brief Insert PCB onto various hash lists. |
2247 | * | |
2248 | * @param inp Pointer to internet protocol control block | |
2249 | * @param locked Implies if ipi_lock (protecting pcb list) | |
2250 | * is already locked or not. | |
2251 | * | |
2252 | * @return int error on failure and 0 on success | |
1c79356b A |
2253 | */ |
2254 | int | |
2d21ac55 | 2255 | in_pcbinshash(struct inpcb *inp, int locked) |
1c79356b A |
2256 | { |
2257 | struct inpcbhead *pcbhash; | |
2258 | struct inpcbporthead *pcbporthash; | |
2259 | struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; | |
2260 | struct inpcbport *phd; | |
2261 | u_int32_t hashkey_faddr; | |
2262 | ||
39236c6e A |
2263 | if (!locked) { |
2264 | if (!lck_rw_try_lock_exclusive(pcbinfo->ipi_lock)) { | |
2265 | /* | |
2266 | * Lock inversion issue, mostly with udp | |
2267 | * multicast packets | |
2268 | */ | |
2269 | socket_unlock(inp->inp_socket, 0); | |
2270 | lck_rw_lock_exclusive(pcbinfo->ipi_lock); | |
2271 | socket_lock(inp->inp_socket, 0); | |
39236c6e A |
2272 | } |
2273 | } | |
b0d623f7 | 2274 | |
4bd07ac2 A |
2275 | /* |
2276 | * This routine or its caller may have given up | |
2277 | * socket's protocol lock briefly. | |
2278 | * During that time the socket may have been dropped. | |
2279 | * Safe-guarding against that. | |
2280 | */ | |
2281 | if (inp->inp_state == INPCB_STATE_DEAD) { | |
2282 | if (!locked) { | |
2283 | lck_rw_done(pcbinfo->ipi_lock); | |
2284 | } | |
2285 | return (ECONNABORTED); | |
2286 | } | |
2287 | ||
2288 | ||
1c79356b A |
2289 | #if INET6 |
2290 | if (inp->inp_vflag & INP_IPV6) | |
2291 | hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; | |
2292 | else | |
2293 | #endif /* INET6 */ | |
39236c6e | 2294 | hashkey_faddr = inp->inp_faddr.s_addr; |
1c79356b | 2295 | |
39236c6e A |
2296 | inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport, |
2297 | inp->inp_fport, pcbinfo->ipi_hashmask); | |
91447636 | 2298 | |
39236c6e | 2299 | pcbhash = &pcbinfo->ipi_hashbase[inp->inp_hash_element]; |
1c79356b | 2300 | |
39236c6e A |
2301 | pcbporthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(inp->inp_lport, |
2302 | pcbinfo->ipi_porthashmask)]; | |
1c79356b A |
2303 | |
2304 | /* | |
2305 | * Go through port list and look for a head for this lport. | |
2306 | */ | |
9bccf70c | 2307 | LIST_FOREACH(phd, pcbporthash, phd_hash) { |
1c79356b A |
2308 | if (phd->phd_port == inp->inp_lport) |
2309 | break; | |
2310 | } | |
316670eb | 2311 | |
1c79356b A |
2312 | /* |
2313 | * If none exists, malloc one and tack it on. | |
2314 | */ | |
2315 | if (phd == NULL) { | |
39236c6e A |
2316 | MALLOC(phd, struct inpcbport *, sizeof (struct inpcbport), |
2317 | M_PCB, M_WAITOK); | |
1c79356b | 2318 | if (phd == NULL) { |
91447636 | 2319 | if (!locked) |
39236c6e | 2320 | lck_rw_done(pcbinfo->ipi_lock); |
1c79356b A |
2321 | return (ENOBUFS); /* XXX */ |
2322 | } | |
2323 | phd->phd_port = inp->inp_lport; | |
2324 | LIST_INIT(&phd->phd_pcblist); | |
2325 | LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); | |
2326 | } | |
fe8ab488 A |
2327 | |
2328 | VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST)); | |
5ba3f43e A |
2329 | |
2330 | ||
1c79356b A |
2331 | inp->inp_phd = phd; |
2332 | LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); | |
2333 | LIST_INSERT_HEAD(pcbhash, inp, inp_hash); | |
fe8ab488 A |
2334 | inp->inp_flags2 |= INP2_INHASHLIST; |
2335 | ||
91447636 | 2336 | if (!locked) |
39236c6e | 2337 | lck_rw_done(pcbinfo->ipi_lock); |
39037602 | 2338 | |
fe8ab488 A |
2339 | #if NECP |
2340 | // This call catches the original setting of the local address | |
2341 | inp_update_necp_policy(inp, NULL, NULL, 0); | |
2342 | #endif /* NECP */ | |
39037602 | 2343 | |
1c79356b A |
2344 | return (0); |
2345 | } | |
2346 | ||
2347 | /* | |
2348 | * Move PCB to the proper hash bucket when { faddr, fport } have been | |
2349 | * changed. NOTE: This does not handle the case of the lport changing (the | |
2350 | * hashed port list would have to be updated as well), so the lport must | |
2351 | * not change after in_pcbinshash() has been called. | |
2352 | */ | |
2353 | void | |
2d21ac55 | 2354 | in_pcbrehash(struct inpcb *inp) |
1c79356b A |
2355 | { |
2356 | struct inpcbhead *head; | |
2357 | u_int32_t hashkey_faddr; | |
2358 | ||
2359 | #if INET6 | |
2360 | if (inp->inp_vflag & INP_IPV6) | |
2361 | hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; | |
2362 | else | |
2363 | #endif /* INET6 */ | |
39236c6e A |
2364 | hashkey_faddr = inp->inp_faddr.s_addr; |
2365 | ||
2366 | inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport, | |
2367 | inp->inp_fport, inp->inp_pcbinfo->ipi_hashmask); | |
2368 | head = &inp->inp_pcbinfo->ipi_hashbase[inp->inp_hash_element]; | |
1c79356b | 2369 | |
fe8ab488 A |
2370 | if (inp->inp_flags2 & INP2_INHASHLIST) { |
2371 | LIST_REMOVE(inp, inp_hash); | |
2372 | inp->inp_flags2 &= ~INP2_INHASHLIST; | |
2373 | } | |
2374 | ||
2375 | VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST)); | |
1c79356b | 2376 | LIST_INSERT_HEAD(head, inp, inp_hash); |
fe8ab488 | 2377 | inp->inp_flags2 |= INP2_INHASHLIST; |
39037602 | 2378 | |
fe8ab488 A |
2379 | #if NECP |
2380 | // This call catches updates to the remote addresses | |
2381 | inp_update_necp_policy(inp, NULL, NULL, 0); | |
2382 | #endif /* NECP */ | |
1c79356b A |
2383 | } |
2384 | ||
2385 | /* | |
2386 | * Remove PCB from various lists. | |
316670eb | 2387 | * Must be called pcbinfo lock is held in exclusive mode. |
1c79356b A |
2388 | */ |
2389 | void | |
2d21ac55 | 2390 | in_pcbremlists(struct inpcb *inp) |
1c79356b A |
2391 | { |
2392 | inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt; | |
1c79356b | 2393 | |
fe8ab488 A |
2394 | /* |
2395 | * Check if it's in hashlist -- an inp is placed in hashlist when | |
39037602 | 2396 | * it's local port gets assigned. So it should also be present |
fe8ab488 A |
2397 | * in the port list. |
2398 | */ | |
2399 | if (inp->inp_flags2 & INP2_INHASHLIST) { | |
1c79356b A |
2400 | struct inpcbport *phd = inp->inp_phd; |
2401 | ||
fe8ab488 A |
2402 | VERIFY(phd != NULL && inp->inp_lport > 0); |
2403 | ||
1c79356b | 2404 | LIST_REMOVE(inp, inp_hash); |
fe8ab488 A |
2405 | inp->inp_hash.le_next = NULL; |
2406 | inp->inp_hash.le_prev = NULL; | |
2407 | ||
1c79356b | 2408 | LIST_REMOVE(inp, inp_portlist); |
fe8ab488 A |
2409 | inp->inp_portlist.le_next = NULL; |
2410 | inp->inp_portlist.le_prev = NULL; | |
2411 | if (LIST_EMPTY(&phd->phd_pcblist)) { | |
1c79356b A |
2412 | LIST_REMOVE(phd, phd_hash); |
2413 | FREE(phd, M_PCB); | |
2414 | } | |
fe8ab488 A |
2415 | inp->inp_phd = NULL; |
2416 | inp->inp_flags2 &= ~INP2_INHASHLIST; | |
1c79356b | 2417 | } |
fe8ab488 | 2418 | VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST)); |
39236c6e A |
2419 | |
2420 | if (inp->inp_flags2 & INP2_TIMEWAIT) { | |
2421 | /* Remove from time-wait queue */ | |
2422 | tcp_remove_from_time_wait(inp); | |
2423 | inp->inp_flags2 &= ~INP2_TIMEWAIT; | |
2424 | VERIFY(inp->inp_pcbinfo->ipi_twcount != 0); | |
2425 | inp->inp_pcbinfo->ipi_twcount--; | |
2426 | } else { | |
2427 | /* Remove from global inp list if it is not time-wait */ | |
2428 | LIST_REMOVE(inp, inp_list); | |
2429 | } | |
316670eb | 2430 | |
bd504ef0 | 2431 | if (inp->inp_flags2 & INP2_IN_FCTREE) { |
39236c6e | 2432 | inp_fc_getinp(inp->inp_flowhash, (INPFC_SOLOCKED|INPFC_REMOVE)); |
bd504ef0 A |
2433 | VERIFY(!(inp->inp_flags2 & INP2_IN_FCTREE)); |
2434 | } | |
39236c6e | 2435 | |
1c79356b A |
2436 | inp->inp_pcbinfo->ipi_count--; |
2437 | } | |
2438 | ||
39236c6e A |
2439 | /* |
2440 | * Mechanism used to defer the memory release of PCBs | |
2441 | * The pcb list will contain the pcb until the reaper can clean it up if | |
2442 | * the following conditions are met: | |
2443 | * 1) state "DEAD", | |
2444 | * 2) wantcnt is STOPUSING | |
2445 | * 3) usecount is 0 | |
91447636 | 2446 | * This function will be called to either mark the pcb as |
39236c6e | 2447 | */ |
91447636 A |
2448 | int |
2449 | in_pcb_checkstate(struct inpcb *pcb, int mode, int locked) | |
91447636 | 2450 | { |
39236c6e | 2451 | volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt; |
2d21ac55 A |
2452 | UInt32 origwant; |
2453 | UInt32 newwant; | |
91447636 A |
2454 | |
2455 | switch (mode) { | |
39236c6e A |
2456 | case WNT_STOPUSING: |
2457 | /* | |
2458 | * Try to mark the pcb as ready for recycling. CAS with | |
2459 | * STOPUSING, if success we're good, if it's in use, will | |
2460 | * be marked later | |
2461 | */ | |
2462 | if (locked == 0) | |
2463 | socket_lock(pcb->inp_socket, 1); | |
2464 | pcb->inp_state = INPCB_STATE_DEAD; | |
91447636 | 2465 | |
39236c6e A |
2466 | stopusing: |
2467 | if (pcb->inp_socket->so_usecount < 0) { | |
2468 | panic("%s: pcb=%p so=%p usecount is negative\n", | |
2469 | __func__, pcb, pcb->inp_socket); | |
2470 | /* NOTREACHED */ | |
2471 | } | |
2472 | if (locked == 0) | |
2473 | socket_unlock(pcb->inp_socket, 1); | |
91447636 | 2474 | |
39236c6e | 2475 | inpcb_gc_sched(pcb->inp_pcbinfo, INPCB_TIMER_FAST); |
6d2010ae | 2476 | |
39236c6e A |
2477 | origwant = *wantcnt; |
2478 | if ((UInt16) origwant == 0xffff) /* should stop using */ | |
2479 | return (WNT_STOPUSING); | |
2480 | newwant = 0xffff; | |
2481 | if ((UInt16) origwant == 0) { | |
2482 | /* try to mark it as unsuable now */ | |
2483 | OSCompareAndSwap(origwant, newwant, wantcnt); | |
2484 | } | |
2485 | return (WNT_STOPUSING); | |
91447636 | 2486 | |
39236c6e A |
2487 | case WNT_ACQUIRE: |
2488 | /* | |
2489 | * Try to increase reference to pcb. If WNT_STOPUSING | |
2490 | * should bail out. If socket state DEAD, try to set count | |
2491 | * to STOPUSING, return failed otherwise increase cnt. | |
2492 | */ | |
2493 | do { | |
91447636 | 2494 | origwant = *wantcnt; |
39236c6e A |
2495 | if ((UInt16) origwant == 0xffff) { |
2496 | /* should stop using */ | |
91447636 | 2497 | return (WNT_STOPUSING); |
91447636 | 2498 | } |
39236c6e A |
2499 | newwant = origwant + 1; |
2500 | } while (!OSCompareAndSwap(origwant, newwant, wantcnt)); | |
2501 | return (WNT_ACQUIRE); | |
91447636 | 2502 | |
39236c6e A |
2503 | case WNT_RELEASE: |
2504 | /* | |
2505 | * Release reference. If result is null and pcb state | |
2506 | * is DEAD, set wanted bit to STOPUSING | |
2507 | */ | |
2508 | if (locked == 0) | |
2509 | socket_lock(pcb->inp_socket, 1); | |
91447636 | 2510 | |
39236c6e A |
2511 | do { |
2512 | origwant = *wantcnt; | |
2513 | if ((UInt16) origwant == 0x0) { | |
2514 | panic("%s: pcb=%p release with zero count", | |
2515 | __func__, pcb); | |
2516 | /* NOTREACHED */ | |
2517 | } | |
2518 | if ((UInt16) origwant == 0xffff) { | |
2519 | /* should stop using */ | |
2520 | if (locked == 0) | |
2521 | socket_unlock(pcb->inp_socket, 1); | |
2522 | return (WNT_STOPUSING); | |
2523 | } | |
2524 | newwant = origwant - 1; | |
2525 | } while (!OSCompareAndSwap(origwant, newwant, wantcnt)); | |
2526 | ||
2527 | if (pcb->inp_state == INPCB_STATE_DEAD) | |
2528 | goto stopusing; | |
2529 | if (pcb->inp_socket->so_usecount < 0) { | |
2530 | panic("%s: RELEASE pcb=%p so=%p usecount is negative\n", | |
2531 | __func__, pcb, pcb->inp_socket); | |
2532 | /* NOTREACHED */ | |
2533 | } | |
91447636 | 2534 | |
39236c6e A |
2535 | if (locked == 0) |
2536 | socket_unlock(pcb->inp_socket, 1); | |
2537 | return (WNT_RELEASE); | |
91447636 | 2538 | |
39236c6e A |
2539 | default: |
2540 | panic("%s: so=%p not a valid state =%x\n", __func__, | |
2541 | pcb->inp_socket, mode); | |
2542 | /* NOTREACHED */ | |
91447636 A |
2543 | } |
2544 | ||
2545 | /* NOTREACHED */ | |
2546 | return (mode); | |
2547 | } | |
2548 | ||
2549 | /* | |
2550 | * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat. | |
2551 | * The inpcb_compat data structure is passed to user space and must | |
b0d623f7 | 2552 | * not change. We intentionally avoid copying pointers. |
91447636 A |
2553 | */ |
2554 | void | |
39236c6e | 2555 | inpcb_to_compat(struct inpcb *inp, struct inpcb_compat *inp_compat) |
91447636 | 2556 | { |
39236c6e | 2557 | bzero(inp_compat, sizeof (*inp_compat)); |
91447636 A |
2558 | inp_compat->inp_fport = inp->inp_fport; |
2559 | inp_compat->inp_lport = inp->inp_lport; | |
316670eb | 2560 | inp_compat->nat_owner = 0; |
39236c6e | 2561 | inp_compat->nat_cookie = 0; |
91447636 A |
2562 | inp_compat->inp_gencnt = inp->inp_gencnt; |
2563 | inp_compat->inp_flags = inp->inp_flags; | |
2564 | inp_compat->inp_flow = inp->inp_flow; | |
2565 | inp_compat->inp_vflag = inp->inp_vflag; | |
2566 | inp_compat->inp_ip_ttl = inp->inp_ip_ttl; | |
2567 | inp_compat->inp_ip_p = inp->inp_ip_p; | |
39236c6e A |
2568 | inp_compat->inp_dependfaddr.inp6_foreign = |
2569 | inp->inp_dependfaddr.inp6_foreign; | |
2570 | inp_compat->inp_dependladdr.inp6_local = | |
2571 | inp->inp_dependladdr.inp6_local; | |
91447636 | 2572 | inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos; |
39236c6e | 2573 | inp_compat->inp_depend6.inp6_hlim = 0; |
91447636 | 2574 | inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum; |
39236c6e | 2575 | inp_compat->inp_depend6.inp6_ifindex = 0; |
91447636 A |
2576 | inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops; |
2577 | } | |
9bccf70c | 2578 | |
5ba3f43e | 2579 | #if !CONFIG_EMBEDDED |
b0d623f7 | 2580 | void |
39236c6e | 2581 | inpcb_to_xinpcb64(struct inpcb *inp, struct xinpcb64 *xinp) |
b0d623f7 | 2582 | { |
6d2010ae A |
2583 | xinp->inp_fport = inp->inp_fport; |
2584 | xinp->inp_lport = inp->inp_lport; | |
2585 | xinp->inp_gencnt = inp->inp_gencnt; | |
2586 | xinp->inp_flags = inp->inp_flags; | |
2587 | xinp->inp_flow = inp->inp_flow; | |
2588 | xinp->inp_vflag = inp->inp_vflag; | |
2589 | xinp->inp_ip_ttl = inp->inp_ip_ttl; | |
2590 | xinp->inp_ip_p = inp->inp_ip_p; | |
2591 | xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign; | |
2592 | xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local; | |
2593 | xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos; | |
39236c6e | 2594 | xinp->inp_depend6.inp6_hlim = 0; |
6d2010ae | 2595 | xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum; |
39236c6e | 2596 | xinp->inp_depend6.inp6_ifindex = 0; |
6d2010ae | 2597 | xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops; |
b0d623f7 | 2598 | } |
5ba3f43e | 2599 | #endif /* !CONFIG_EMBEDDED */ |
b0d623f7 | 2600 | |
b0d623f7 A |
2601 | /* |
2602 | * The following routines implement this scheme: | |
2603 | * | |
2604 | * Callers of ip_output() that intend to cache the route in the inpcb pass | |
2605 | * a local copy of the struct route to ip_output(). Using a local copy of | |
2606 | * the cached route significantly simplifies things as IP no longer has to | |
2607 | * worry about having exclusive access to the passed in struct route, since | |
2608 | * it's defined in the caller's stack; in essence, this allows for a lock- | |
2609 | * less operation when updating the struct route at the IP level and below, | |
2610 | * whenever necessary. The scheme works as follows: | |
2611 | * | |
2612 | * Prior to dropping the socket's lock and calling ip_output(), the caller | |
2613 | * copies the struct route from the inpcb into its stack, and adds a reference | |
2614 | * to the cached route entry, if there was any. The socket's lock is then | |
2615 | * dropped and ip_output() is called with a pointer to the copy of struct | |
2616 | * route defined on the stack (not to the one in the inpcb.) | |
2617 | * | |
2618 | * Upon returning from ip_output(), the caller then acquires the socket's | |
2619 | * lock and synchronizes the cache; if there is no route cached in the inpcb, | |
2620 | * it copies the local copy of struct route (which may or may not contain any | |
2621 | * route) back into the cache; otherwise, if the inpcb has a route cached in | |
2622 | * it, the one in the local copy will be freed, if there's any. Trashing the | |
2623 | * cached route in the inpcb can be avoided because ip_output() is single- | |
2624 | * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized | |
2625 | * by the socket/transport layer.) | |
2626 | */ | |
2627 | void | |
2628 | inp_route_copyout(struct inpcb *inp, struct route *dst) | |
2629 | { | |
2630 | struct route *src = &inp->inp_route; | |
2631 | ||
5ba3f43e | 2632 | socket_lock_assert_owned(inp->inp_socket); |
b0d623f7 | 2633 | |
0b4c1975 | 2634 | /* |
39236c6e | 2635 | * If the route in the PCB is stale or not for IPv4, blow it away; |
0b4c1975 A |
2636 | * this is possible in the case of IPv4-mapped address case. |
2637 | */ | |
39236c6e A |
2638 | if (ROUTE_UNUSABLE(src) || rt_key(src->ro_rt)->sa_family != AF_INET) |
2639 | ROUTE_RELEASE(src); | |
316670eb | 2640 | |
39236c6e | 2641 | route_copyout(dst, src, sizeof (*dst)); |
b0d623f7 A |
2642 | } |
2643 | ||
2644 | void | |
2645 | inp_route_copyin(struct inpcb *inp, struct route *src) | |
2646 | { | |
2647 | struct route *dst = &inp->inp_route; | |
2648 | ||
5ba3f43e | 2649 | socket_lock_assert_owned(inp->inp_socket); |
b0d623f7 A |
2650 | |
2651 | /* Minor sanity check */ | |
2652 | if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET) | |
2653 | panic("%s: wrong or corrupted route: %p", __func__, src); | |
2654 | ||
39236c6e | 2655 | route_copyin(src, dst, sizeof (*src)); |
6d2010ae A |
2656 | } |
2657 | ||
2658 | /* | |
39037602 | 2659 | * Handler for setting IP_BOUND_IF/IPV6_BOUND_IF socket option. |
6d2010ae | 2660 | */ |
316670eb | 2661 | int |
39236c6e | 2662 | inp_bindif(struct inpcb *inp, unsigned int ifscope, struct ifnet **pifp) |
6d2010ae | 2663 | { |
316670eb A |
2664 | struct ifnet *ifp = NULL; |
2665 | ||
2666 | ifnet_head_lock_shared(); | |
2667 | if ((ifscope > (unsigned)if_index) || (ifscope != IFSCOPE_NONE && | |
2668 | (ifp = ifindex2ifnet[ifscope]) == NULL)) { | |
2669 | ifnet_head_done(); | |
2670 | return (ENXIO); | |
2671 | } | |
2672 | ifnet_head_done(); | |
2673 | ||
2674 | VERIFY(ifp != NULL || ifscope == IFSCOPE_NONE); | |
2675 | ||
6d2010ae A |
2676 | /* |
2677 | * A zero interface scope value indicates an "unbind". | |
2678 | * Otherwise, take in whatever value the app desires; | |
2679 | * the app may already know the scope (or force itself | |
2680 | * to such a scope) ahead of time before the interface | |
2681 | * gets attached. It doesn't matter either way; any | |
2682 | * route lookup from this point on will require an | |
2683 | * exact match for the embedded interface scope. | |
2684 | */ | |
316670eb A |
2685 | inp->inp_boundifp = ifp; |
2686 | if (inp->inp_boundifp == NULL) | |
6d2010ae A |
2687 | inp->inp_flags &= ~INP_BOUND_IF; |
2688 | else | |
2689 | inp->inp_flags |= INP_BOUND_IF; | |
2690 | ||
2691 | /* Blow away any cached route in the PCB */ | |
39236c6e A |
2692 | ROUTE_RELEASE(&inp->inp_route); |
2693 | ||
2694 | if (pifp != NULL) | |
2695 | *pifp = ifp; | |
316670eb A |
2696 | |
2697 | return (0); | |
6d2010ae A |
2698 | } |
2699 | ||
2700 | /* | |
39236c6e A |
2701 | * Handler for setting IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option, |
2702 | * as well as for setting PROC_UUID_NO_CELLULAR policy. | |
6d2010ae | 2703 | */ |
39236c6e A |
2704 | void |
2705 | inp_set_nocellular(struct inpcb *inp) | |
6d2010ae | 2706 | { |
39236c6e | 2707 | inp->inp_flags |= INP_NO_IFT_CELLULAR; |
6d2010ae A |
2708 | |
2709 | /* Blow away any cached route in the PCB */ | |
39236c6e A |
2710 | ROUTE_RELEASE(&inp->inp_route); |
2711 | } | |
2712 | ||
2713 | /* | |
2714 | * Handler for clearing IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option, | |
2715 | * as well as for clearing PROC_UUID_NO_CELLULAR policy. | |
2716 | */ | |
2717 | void | |
2718 | inp_clear_nocellular(struct inpcb *inp) | |
2719 | { | |
2720 | struct socket *so = inp->inp_socket; | |
2721 | ||
2722 | /* | |
2723 | * SO_RESTRICT_DENY_CELLULAR socket restriction issued on the socket | |
2724 | * has a higher precendence than INP_NO_IFT_CELLULAR. Clear the flag | |
2725 | * if and only if the socket is unrestricted. | |
2726 | */ | |
2727 | if (so != NULL && !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) { | |
2728 | inp->inp_flags &= ~INP_NO_IFT_CELLULAR; | |
2729 | ||
2730 | /* Blow away any cached route in the PCB */ | |
2731 | ROUTE_RELEASE(&inp->inp_route); | |
6d2010ae | 2732 | } |
39236c6e | 2733 | } |
6d2010ae | 2734 | |
fe8ab488 A |
2735 | void |
2736 | inp_set_noexpensive(struct inpcb *inp) | |
2737 | { | |
2738 | inp->inp_flags2 |= INP2_NO_IFF_EXPENSIVE; | |
2739 | ||
2740 | /* Blow away any cached route in the PCB */ | |
2741 | ROUTE_RELEASE(&inp->inp_route); | |
2742 | } | |
2743 | ||
2744 | void | |
2745 | inp_set_awdl_unrestricted(struct inpcb *inp) | |
2746 | { | |
2747 | inp->inp_flags2 |= INP2_AWDL_UNRESTRICTED; | |
2748 | ||
2749 | /* Blow away any cached route in the PCB */ | |
2750 | ROUTE_RELEASE(&inp->inp_route); | |
2751 | } | |
2752 | ||
2753 | boolean_t | |
2754 | inp_get_awdl_unrestricted(struct inpcb *inp) | |
2755 | { | |
2756 | return (inp->inp_flags2 & INP2_AWDL_UNRESTRICTED) ? TRUE : FALSE; | |
2757 | } | |
2758 | ||
2759 | void | |
2760 | inp_clear_awdl_unrestricted(struct inpcb *inp) | |
2761 | { | |
2762 | inp->inp_flags2 &= ~INP2_AWDL_UNRESTRICTED; | |
2763 | ||
2764 | /* Blow away any cached route in the PCB */ | |
2765 | ROUTE_RELEASE(&inp->inp_route); | |
2766 | } | |
2767 | ||
39037602 A |
2768 | void |
2769 | inp_set_intcoproc_allowed(struct inpcb *inp) | |
2770 | { | |
2771 | inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED; | |
2772 | ||
2773 | /* Blow away any cached route in the PCB */ | |
2774 | ROUTE_RELEASE(&inp->inp_route); | |
2775 | } | |
2776 | ||
2777 | boolean_t | |
2778 | inp_get_intcoproc_allowed(struct inpcb *inp) | |
2779 | { | |
2780 | return (inp->inp_flags2 & INP2_INTCOPROC_ALLOWED) ? TRUE : FALSE; | |
2781 | } | |
2782 | ||
2783 | void | |
2784 | inp_clear_intcoproc_allowed(struct inpcb *inp) | |
2785 | { | |
2786 | inp->inp_flags2 &= ~INP2_INTCOPROC_ALLOWED; | |
2787 | ||
2788 | /* Blow away any cached route in the PCB */ | |
2789 | ROUTE_RELEASE(&inp->inp_route); | |
2790 | } | |
2791 | ||
fe8ab488 | 2792 | #if NECP |
39236c6e | 2793 | /* |
fe8ab488 | 2794 | * Called when PROC_UUID_NECP_APP_POLICY is set. |
39236c6e A |
2795 | */ |
2796 | void | |
fe8ab488 | 2797 | inp_set_want_app_policy(struct inpcb *inp) |
39236c6e | 2798 | { |
fe8ab488 | 2799 | inp->inp_flags2 |= INP2_WANT_APP_POLICY; |
39236c6e A |
2800 | } |
2801 | ||
2802 | /* | |
fe8ab488 | 2803 | * Called when PROC_UUID_NECP_APP_POLICY is cleared. |
39236c6e A |
2804 | */ |
2805 | void | |
fe8ab488 | 2806 | inp_clear_want_app_policy(struct inpcb *inp) |
39236c6e | 2807 | { |
fe8ab488 | 2808 | inp->inp_flags2 &= ~INP2_WANT_APP_POLICY; |
b0d623f7 | 2809 | } |
fe8ab488 | 2810 | #endif /* NECP */ |
316670eb A |
2811 | |
2812 | /* | |
2813 | * Calculate flow hash for an inp, used by an interface to identify a | |
2814 | * flow. When an interface provides flow control advisory, this flow | |
2815 | * hash is used as an identifier. | |
2816 | */ | |
2817 | u_int32_t | |
2818 | inp_calc_flowhash(struct inpcb *inp) | |
2819 | { | |
2820 | struct inp_flowhash_key fh __attribute__((aligned(8))); | |
2821 | u_int32_t flowhash = 0; | |
bd504ef0 | 2822 | struct inpcb *tmp_inp = NULL; |
316670eb A |
2823 | |
2824 | if (inp_hash_seed == 0) | |
2825 | inp_hash_seed = RandomULong(); | |
2826 | ||
2827 | bzero(&fh, sizeof (fh)); | |
2828 | ||
2829 | bcopy(&inp->inp_dependladdr, &fh.infh_laddr, sizeof (fh.infh_laddr)); | |
2830 | bcopy(&inp->inp_dependfaddr, &fh.infh_faddr, sizeof (fh.infh_faddr)); | |
2831 | ||
2832 | fh.infh_lport = inp->inp_lport; | |
2833 | fh.infh_fport = inp->inp_fport; | |
2834 | fh.infh_af = (inp->inp_vflag & INP_IPV6) ? AF_INET6 : AF_INET; | |
2835 | fh.infh_proto = inp->inp_ip_p; | |
2836 | fh.infh_rand1 = RandomULong(); | |
2837 | fh.infh_rand2 = RandomULong(); | |
2838 | ||
2839 | try_again: | |
2840 | flowhash = net_flowhash(&fh, sizeof (fh), inp_hash_seed); | |
2841 | if (flowhash == 0) { | |
2842 | /* try to get a non-zero flowhash */ | |
2843 | inp_hash_seed = RandomULong(); | |
2844 | goto try_again; | |
2845 | } | |
2846 | ||
bd504ef0 | 2847 | inp->inp_flowhash = flowhash; |
316670eb | 2848 | |
bd504ef0 | 2849 | /* Insert the inp into inp_fc_tree */ |
39236c6e | 2850 | lck_mtx_lock_spin(&inp_fc_lck); |
bd504ef0 A |
2851 | tmp_inp = RB_FIND(inp_fc_tree, &inp_fc_tree, inp); |
2852 | if (tmp_inp != NULL) { | |
316670eb | 2853 | /* |
bd504ef0 A |
2854 | * There is a different inp with the same flowhash. |
2855 | * There can be a collision on flow hash but the | |
39236c6e | 2856 | * probability is low. Let's recompute the |
bd504ef0 | 2857 | * flowhash. |
316670eb A |
2858 | */ |
2859 | lck_mtx_unlock(&inp_fc_lck); | |
bd504ef0 A |
2860 | /* recompute hash seed */ |
2861 | inp_hash_seed = RandomULong(); | |
2862 | goto try_again; | |
316670eb | 2863 | } |
39236c6e | 2864 | |
bd504ef0 A |
2865 | RB_INSERT(inp_fc_tree, &inp_fc_tree, inp); |
2866 | inp->inp_flags2 |= INP2_IN_FCTREE; | |
316670eb | 2867 | lck_mtx_unlock(&inp_fc_lck); |
bd504ef0 | 2868 | |
39236c6e A |
2869 | return (flowhash); |
2870 | } | |
2871 | ||
2872 | void | |
2873 | inp_flowadv(uint32_t flowhash) | |
2874 | { | |
2875 | struct inpcb *inp; | |
2876 | ||
2877 | inp = inp_fc_getinp(flowhash, 0); | |
2878 | ||
2879 | if (inp == NULL) | |
2880 | return; | |
2881 | inp_fc_feedback(inp); | |
316670eb A |
2882 | } |
2883 | ||
bd504ef0 A |
2884 | /* |
2885 | * Function to compare inp_fc_entries in inp flow control tree | |
2886 | */ | |
2887 | static inline int | |
2888 | infc_cmp(const struct inpcb *inp1, const struct inpcb *inp2) | |
316670eb | 2889 | { |
bd504ef0 | 2890 | return (memcmp(&(inp1->inp_flowhash), &(inp2->inp_flowhash), |
39236c6e | 2891 | sizeof(inp1->inp_flowhash))); |
bd504ef0 | 2892 | } |
316670eb | 2893 | |
39236c6e | 2894 | static struct inpcb * |
bd504ef0 A |
2895 | inp_fc_getinp(u_int32_t flowhash, u_int32_t flags) |
2896 | { | |
2897 | struct inpcb *inp = NULL; | |
2898 | int locked = (flags & INPFC_SOLOCKED) ? 1 : 0; | |
316670eb A |
2899 | |
2900 | lck_mtx_lock_spin(&inp_fc_lck); | |
bd504ef0 A |
2901 | key_inp.inp_flowhash = flowhash; |
2902 | inp = RB_FIND(inp_fc_tree, &inp_fc_tree, &key_inp); | |
2903 | if (inp == NULL) { | |
316670eb A |
2904 | /* inp is not present, return */ |
2905 | lck_mtx_unlock(&inp_fc_lck); | |
2906 | return (NULL); | |
2907 | } | |
2908 | ||
bd504ef0 A |
2909 | if (flags & INPFC_REMOVE) { |
2910 | RB_REMOVE(inp_fc_tree, &inp_fc_tree, inp); | |
2911 | lck_mtx_unlock(&inp_fc_lck); | |
316670eb | 2912 | |
bd504ef0 A |
2913 | bzero(&(inp->infc_link), sizeof (inp->infc_link)); |
2914 | inp->inp_flags2 &= ~INP2_IN_FCTREE; | |
2915 | return (NULL); | |
316670eb | 2916 | } |
39236c6e | 2917 | |
bd504ef0 A |
2918 | if (in_pcb_checkstate(inp, WNT_ACQUIRE, locked) == WNT_STOPUSING) |
2919 | inp = NULL; | |
316670eb A |
2920 | lck_mtx_unlock(&inp_fc_lck); |
2921 | ||
bd504ef0 | 2922 | return (inp); |
316670eb A |
2923 | } |
2924 | ||
39236c6e | 2925 | static void |
316670eb A |
2926 | inp_fc_feedback(struct inpcb *inp) |
2927 | { | |
2928 | struct socket *so = inp->inp_socket; | |
2929 | ||
2930 | /* we already hold a want_cnt on this inp, socket can't be null */ | |
39236c6e | 2931 | VERIFY(so != NULL); |
316670eb A |
2932 | socket_lock(so, 1); |
2933 | ||
2934 | if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) { | |
2935 | socket_unlock(so, 1); | |
2936 | return; | |
2937 | } | |
2938 | ||
fe8ab488 A |
2939 | if (inp->inp_sndinprog_cnt > 0) |
2940 | inp->inp_flags |= INP_FC_FEEDBACK; | |
2941 | ||
316670eb A |
2942 | /* |
2943 | * Return if the connection is not in flow-controlled state. | |
2944 | * This can happen if the connection experienced | |
2945 | * loss while it was in flow controlled state | |
2946 | */ | |
2947 | if (!INP_WAIT_FOR_IF_FEEDBACK(inp)) { | |
2948 | socket_unlock(so, 1); | |
2949 | return; | |
2950 | } | |
2951 | inp_reset_fc_state(inp); | |
2952 | ||
39236c6e | 2953 | if (SOCK_TYPE(so) == SOCK_STREAM) |
316670eb A |
2954 | inp_fc_unthrottle_tcp(inp); |
2955 | ||
2956 | socket_unlock(so, 1); | |
2957 | } | |
2958 | ||
2959 | void | |
2960 | inp_reset_fc_state(struct inpcb *inp) | |
2961 | { | |
2962 | struct socket *so = inp->inp_socket; | |
2963 | int suspended = (INP_IS_FLOW_SUSPENDED(inp)) ? 1 : 0; | |
2964 | int needwakeup = (INP_WAIT_FOR_IF_FEEDBACK(inp)) ? 1 : 0; | |
2965 | ||
2966 | inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED); | |
2967 | ||
2968 | if (suspended) { | |
2969 | so->so_flags &= ~(SOF_SUSPENDED); | |
2970 | soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_RESUME)); | |
2971 | } | |
2972 | ||
316670eb A |
2973 | /* Give a write wakeup to unblock the socket */ |
2974 | if (needwakeup) | |
2975 | sowwakeup(so); | |
2976 | } | |
2977 | ||
2978 | int | |
2979 | inp_set_fc_state(struct inpcb *inp, int advcode) | |
2980 | { | |
bd504ef0 | 2981 | struct inpcb *tmp_inp = NULL; |
316670eb | 2982 | /* |
39236c6e | 2983 | * If there was a feedback from the interface when |
316670eb A |
2984 | * send operation was in progress, we should ignore |
2985 | * this flow advisory to avoid a race between setting | |
2986 | * flow controlled state and receiving feedback from | |
2987 | * the interface | |
2988 | */ | |
2989 | if (inp->inp_flags & INP_FC_FEEDBACK) | |
39236c6e | 2990 | return (0); |
316670eb A |
2991 | |
2992 | inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED); | |
39236c6e A |
2993 | if ((tmp_inp = inp_fc_getinp(inp->inp_flowhash, |
2994 | INPFC_SOLOCKED)) != NULL) { | |
2995 | if (in_pcb_checkstate(tmp_inp, WNT_RELEASE, 1) == WNT_STOPUSING) | |
bd504ef0 A |
2996 | return (0); |
2997 | VERIFY(tmp_inp == inp); | |
316670eb A |
2998 | switch (advcode) { |
2999 | case FADV_FLOW_CONTROLLED: | |
3000 | inp->inp_flags |= INP_FLOW_CONTROLLED; | |
3001 | break; | |
3002 | case FADV_SUSPENDED: | |
3003 | inp->inp_flags |= INP_FLOW_SUSPENDED; | |
3004 | soevent(inp->inp_socket, | |
3005 | (SO_FILT_HINT_LOCKED | SO_FILT_HINT_SUSPEND)); | |
3006 | ||
3007 | /* Record the fact that suspend event was sent */ | |
3008 | inp->inp_socket->so_flags |= SOF_SUSPENDED; | |
3009 | break; | |
3010 | } | |
bd504ef0 | 3011 | return (1); |
316670eb | 3012 | } |
39236c6e | 3013 | return (0); |
316670eb A |
3014 | } |
3015 | ||
3016 | /* | |
3017 | * Handler for SO_FLUSH socket option. | |
3018 | */ | |
3019 | int | |
3020 | inp_flush(struct inpcb *inp, int optval) | |
3021 | { | |
3022 | u_int32_t flowhash = inp->inp_flowhash; | |
39236c6e | 3023 | struct ifnet *rtifp, *oifp; |
316670eb A |
3024 | |
3025 | /* Either all classes or one of the valid ones */ | |
3026 | if (optval != SO_TC_ALL && !SO_VALID_TC(optval)) | |
3027 | return (EINVAL); | |
3028 | ||
3029 | /* We need a flow hash for identification */ | |
3030 | if (flowhash == 0) | |
3031 | return (0); | |
3032 | ||
39236c6e A |
3033 | /* Grab the interfaces from the route and pcb */ |
3034 | rtifp = ((inp->inp_route.ro_rt != NULL) ? | |
3035 | inp->inp_route.ro_rt->rt_ifp : NULL); | |
3036 | oifp = inp->inp_last_outifp; | |
3037 | ||
3038 | if (rtifp != NULL) | |
3039 | if_qflush_sc(rtifp, so_tc2msc(optval), flowhash, NULL, NULL, 0); | |
3040 | if (oifp != NULL && oifp != rtifp) | |
3041 | if_qflush_sc(oifp, so_tc2msc(optval), flowhash, NULL, NULL, 0); | |
316670eb A |
3042 | |
3043 | return (0); | |
3044 | } | |
3045 | ||
3046 | /* | |
3047 | * Clear the INP_INADDR_ANY flag (special case for PPP only) | |
3048 | */ | |
39236c6e A |
3049 | void |
3050 | inp_clear_INP_INADDR_ANY(struct socket *so) | |
316670eb A |
3051 | { |
3052 | struct inpcb *inp = NULL; | |
3053 | ||
3054 | socket_lock(so, 1); | |
3055 | inp = sotoinpcb(so); | |
3056 | if (inp) { | |
3057 | inp->inp_flags &= ~INP_INADDR_ANY; | |
3058 | } | |
3059 | socket_unlock(so, 1); | |
3060 | } | |
3061 | ||
39236c6e A |
3062 | void |
3063 | inp_get_soprocinfo(struct inpcb *inp, struct so_procinfo *soprocinfo) | |
3064 | { | |
3065 | struct socket *so = inp->inp_socket; | |
3066 | ||
3067 | soprocinfo->spi_pid = so->last_pid; | |
fe8ab488 A |
3068 | if (so->last_pid != 0) |
3069 | uuid_copy(soprocinfo->spi_uuid, so->last_uuid); | |
39236c6e A |
3070 | /* |
3071 | * When not delegated, the effective pid is the same as the real pid | |
3072 | */ | |
fe8ab488 | 3073 | if (so->so_flags & SOF_DELEGATED) { |
3e170ce0 | 3074 | soprocinfo->spi_delegated = 1; |
39236c6e | 3075 | soprocinfo->spi_epid = so->e_pid; |
3e170ce0 | 3076 | uuid_copy(soprocinfo->spi_euuid, so->e_uuid); |
fe8ab488 | 3077 | } else { |
3e170ce0 | 3078 | soprocinfo->spi_delegated = 0; |
39236c6e | 3079 | soprocinfo->spi_epid = so->last_pid; |
fe8ab488 | 3080 | } |
39236c6e A |
3081 | } |
3082 | ||
3083 | int | |
3084 | inp_findinpcb_procinfo(struct inpcbinfo *pcbinfo, uint32_t flowhash, | |
3085 | struct so_procinfo *soprocinfo) | |
3086 | { | |
3087 | struct inpcb *inp = NULL; | |
3088 | int found = 0; | |
3089 | ||
3090 | bzero(soprocinfo, sizeof (struct so_procinfo)); | |
3091 | ||
3092 | if (!flowhash) | |
3093 | return (-1); | |
3094 | ||
3095 | lck_rw_lock_shared(pcbinfo->ipi_lock); | |
3096 | LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) { | |
3097 | if (inp->inp_state != INPCB_STATE_DEAD && | |
3098 | inp->inp_socket != NULL && | |
3099 | inp->inp_flowhash == flowhash) { | |
3100 | found = 1; | |
3101 | inp_get_soprocinfo(inp, soprocinfo); | |
3102 | break; | |
3103 | } | |
3104 | } | |
3105 | lck_rw_done(pcbinfo->ipi_lock); | |
3106 | ||
3107 | return (found); | |
3108 | } | |
3109 | ||
3110 | #if CONFIG_PROC_UUID_POLICY | |
3111 | static void | |
3112 | inp_update_cellular_policy(struct inpcb *inp, boolean_t set) | |
3113 | { | |
3114 | struct socket *so = inp->inp_socket; | |
3115 | int before, after; | |
3116 | ||
3117 | VERIFY(so != NULL); | |
3118 | VERIFY(inp->inp_state != INPCB_STATE_DEAD); | |
3119 | ||
fe8ab488 | 3120 | before = INP_NO_CELLULAR(inp); |
39236c6e A |
3121 | if (set) { |
3122 | inp_set_nocellular(inp); | |
3123 | } else { | |
3124 | inp_clear_nocellular(inp); | |
3125 | } | |
fe8ab488 | 3126 | after = INP_NO_CELLULAR(inp); |
39236c6e A |
3127 | if (net_io_policy_log && (before != after)) { |
3128 | static const char *ok = "OK"; | |
3129 | static const char *nok = "NOACCESS"; | |
3130 | uuid_string_t euuid_buf; | |
3131 | pid_t epid; | |
3132 | ||
3133 | if (so->so_flags & SOF_DELEGATED) { | |
3134 | uuid_unparse(so->e_uuid, euuid_buf); | |
3135 | epid = so->e_pid; | |
3136 | } else { | |
3137 | uuid_unparse(so->last_uuid, euuid_buf); | |
3138 | epid = so->last_pid; | |
3139 | } | |
3140 | ||
3141 | /* allow this socket to generate another notification event */ | |
3142 | so->so_ifdenied_notifies = 0; | |
3143 | ||
3144 | log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d " | |
3145 | "euuid %s%s %s->%s\n", __func__, | |
3146 | (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so), | |
3147 | SOCK_TYPE(so), epid, euuid_buf, | |
3148 | (so->so_flags & SOF_DELEGATED) ? | |
3149 | " [delegated]" : "", | |
3150 | ((before < after) ? ok : nok), | |
3151 | ((before < after) ? nok : ok)); | |
3152 | } | |
3153 | } | |
3154 | ||
fe8ab488 | 3155 | #if NECP |
39236c6e | 3156 | static void |
fe8ab488 | 3157 | inp_update_necp_want_app_policy(struct inpcb *inp, boolean_t set) |
39236c6e A |
3158 | { |
3159 | struct socket *so = inp->inp_socket; | |
3160 | int before, after; | |
3161 | ||
3162 | VERIFY(so != NULL); | |
3163 | VERIFY(inp->inp_state != INPCB_STATE_DEAD); | |
3164 | ||
fe8ab488 | 3165 | before = (inp->inp_flags2 & INP2_WANT_APP_POLICY); |
39236c6e | 3166 | if (set) { |
fe8ab488 | 3167 | inp_set_want_app_policy(inp); |
39236c6e | 3168 | } else { |
fe8ab488 | 3169 | inp_clear_want_app_policy(inp); |
39236c6e | 3170 | } |
fe8ab488 | 3171 | after = (inp->inp_flags2 & INP2_WANT_APP_POLICY); |
39236c6e A |
3172 | if (net_io_policy_log && (before != after)) { |
3173 | static const char *wanted = "WANTED"; | |
3174 | static const char *unwanted = "UNWANTED"; | |
3175 | uuid_string_t euuid_buf; | |
3176 | pid_t epid; | |
3177 | ||
3178 | if (so->so_flags & SOF_DELEGATED) { | |
3179 | uuid_unparse(so->e_uuid, euuid_buf); | |
3180 | epid = so->e_pid; | |
3181 | } else { | |
3182 | uuid_unparse(so->last_uuid, euuid_buf); | |
3183 | epid = so->last_pid; | |
3184 | } | |
3185 | ||
3186 | log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d " | |
3187 | "euuid %s%s %s->%s\n", __func__, | |
3188 | (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so), | |
3189 | SOCK_TYPE(so), epid, euuid_buf, | |
3190 | (so->so_flags & SOF_DELEGATED) ? | |
3191 | " [delegated]" : "", | |
3192 | ((before < after) ? unwanted : wanted), | |
3193 | ((before < after) ? wanted : unwanted)); | |
3194 | } | |
3195 | } | |
fe8ab488 | 3196 | #endif /* NECP */ |
39236c6e A |
3197 | #endif /* !CONFIG_PROC_UUID_POLICY */ |
3198 | ||
fe8ab488 A |
3199 | #if NECP |
3200 | void | |
3201 | inp_update_necp_policy(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int override_bound_interface) | |
3202 | { | |
3203 | necp_socket_find_policy_match(inp, override_local_addr, override_remote_addr, override_bound_interface); | |
3204 | if (necp_socket_should_rescope(inp) && | |
3205 | inp->inp_lport == 0 && | |
3206 | inp->inp_laddr.s_addr == INADDR_ANY && | |
3207 | IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { | |
3208 | // If we should rescope, and the socket is not yet bound | |
3209 | inp_bindif(inp, necp_socket_get_rescope_if_index(inp), NULL); | |
3210 | } | |
3211 | } | |
3212 | #endif /* NECP */ | |
3213 | ||
39236c6e A |
3214 | int |
3215 | inp_update_policy(struct inpcb *inp) | |
3216 | { | |
3217 | #if CONFIG_PROC_UUID_POLICY | |
3218 | struct socket *so = inp->inp_socket; | |
3219 | uint32_t pflags = 0; | |
3220 | int32_t ogencnt; | |
3221 | int err = 0; | |
3222 | ||
3223 | if (!net_io_policy_uuid || | |
3224 | so == NULL || inp->inp_state == INPCB_STATE_DEAD) | |
3225 | return (0); | |
3226 | ||
3227 | /* | |
3228 | * Kernel-created sockets that aren't delegating other sockets | |
3229 | * are currently exempted from UUID policy checks. | |
3230 | */ | |
3231 | if (so->last_pid == 0 && !(so->so_flags & SOF_DELEGATED)) | |
3232 | return (0); | |
3233 | ||
3234 | ogencnt = so->so_policy_gencnt; | |
3235 | err = proc_uuid_policy_lookup(((so->so_flags & SOF_DELEGATED) ? | |
3236 | so->e_uuid : so->last_uuid), &pflags, &so->so_policy_gencnt); | |
3237 | ||
3238 | /* | |
3239 | * Discard cached generation count if the entry is gone (ENOENT), | |
3240 | * so that we go thru the checks below. | |
3241 | */ | |
3242 | if (err == ENOENT && ogencnt != 0) | |
3243 | so->so_policy_gencnt = 0; | |
3244 | ||
3245 | /* | |
3246 | * If the generation count has changed, inspect the policy flags | |
3247 | * and act accordingly. If a policy flag was previously set and | |
3248 | * the UUID is no longer present in the table (ENOENT), treat it | |
3249 | * as if the flag has been cleared. | |
3250 | */ | |
3251 | if ((err == 0 || err == ENOENT) && ogencnt != so->so_policy_gencnt) { | |
3252 | /* update cellular policy for this socket */ | |
3253 | if (err == 0 && (pflags & PROC_UUID_NO_CELLULAR)) { | |
3254 | inp_update_cellular_policy(inp, TRUE); | |
3255 | } else if (!(pflags & PROC_UUID_NO_CELLULAR)) { | |
3256 | inp_update_cellular_policy(inp, FALSE); | |
3257 | } | |
fe8ab488 A |
3258 | #if NECP |
3259 | /* update necp want app policy for this socket */ | |
3260 | if (err == 0 && (pflags & PROC_UUID_NECP_APP_POLICY)) { | |
3261 | inp_update_necp_want_app_policy(inp, TRUE); | |
3262 | } else if (!(pflags & PROC_UUID_NECP_APP_POLICY)) { | |
3263 | inp_update_necp_want_app_policy(inp, FALSE); | |
39236c6e | 3264 | } |
fe8ab488 | 3265 | #endif /* NECP */ |
39236c6e A |
3266 | } |
3267 | ||
3268 | return ((err == ENOENT) ? 0 : err); | |
3269 | #else /* !CONFIG_PROC_UUID_POLICY */ | |
3270 | #pragma unused(inp) | |
3271 | return (0); | |
3272 | #endif /* !CONFIG_PROC_UUID_POLICY */ | |
3273 | } | |
39037602 A |
3274 | |
3275 | static unsigned int log_restricted; | |
3276 | SYSCTL_DECL(_net_inet); | |
3277 | SYSCTL_INT(_net_inet, OID_AUTO, log_restricted, | |
3278 | CTLFLAG_RW | CTLFLAG_LOCKED, &log_restricted, 0, | |
3279 | "Log network restrictions"); | |
fe8ab488 A |
3280 | /* |
3281 | * Called when we need to enforce policy restrictions in the input path. | |
3282 | * | |
3283 | * Returns TRUE if we're not allowed to receive data, otherwise FALSE. | |
3284 | */ | |
39037602 A |
3285 | static boolean_t |
3286 | _inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp) | |
39236c6e A |
3287 | { |
3288 | VERIFY(inp != NULL); | |
3289 | ||
fe8ab488 A |
3290 | /* |
3291 | * Inbound restrictions. | |
3292 | */ | |
39236c6e A |
3293 | if (!sorestrictrecv) |
3294 | return (FALSE); | |
3295 | ||
fe8ab488 A |
3296 | if (ifp == NULL) |
3297 | return (FALSE); | |
3298 | ||
3299 | if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp)) | |
3300 | return (TRUE); | |
3301 | ||
3302 | if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp)) | |
3303 | return (TRUE); | |
3304 | ||
3305 | if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp)) | |
3306 | return (TRUE); | |
39037602 | 3307 | |
fe8ab488 | 3308 | if (!(ifp->if_eflags & IFEF_RESTRICTED_RECV)) |
39236c6e A |
3309 | return (FALSE); |
3310 | ||
3311 | if (inp->inp_flags & INP_RECV_ANYIF) | |
3312 | return (FALSE); | |
3313 | ||
3314 | if ((inp->inp_flags & INP_BOUND_IF) && inp->inp_boundifp == ifp) | |
3315 | return (FALSE); | |
3316 | ||
39037602 A |
3317 | if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp)) |
3318 | return (TRUE); | |
3319 | ||
39236c6e A |
3320 | return (TRUE); |
3321 | } | |
fe8ab488 | 3322 | |
39037602 A |
3323 | boolean_t |
3324 | inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp) | |
3325 | { | |
3326 | boolean_t ret; | |
3327 | ||
3328 | ret = _inp_restricted_recv(inp, ifp); | |
3329 | if (ret == TRUE && log_restricted) { | |
743345f9 A |
3330 | printf("pid %d (%s) is unable to receive packets on %s\n", |
3331 | current_proc()->p_pid, proc_best_name(current_proc()), | |
3332 | ifp->if_xname); | |
39037602 A |
3333 | } |
3334 | return (ret); | |
3335 | } | |
3336 | ||
fe8ab488 A |
3337 | /* |
3338 | * Called when we need to enforce policy restrictions in the output path. | |
3339 | * | |
3340 | * Returns TRUE if we're not allowed to send data out, otherwise FALSE. | |
3341 | */ | |
39037602 A |
3342 | static boolean_t |
3343 | _inp_restricted_send(struct inpcb *inp, struct ifnet *ifp) | |
fe8ab488 A |
3344 | { |
3345 | VERIFY(inp != NULL); | |
3346 | ||
3347 | /* | |
3348 | * Outbound restrictions. | |
3349 | */ | |
3350 | if (!sorestrictsend) | |
3351 | return (FALSE); | |
3352 | ||
3353 | if (ifp == NULL) | |
3354 | return (FALSE); | |
3355 | ||
3356 | if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp)) | |
3357 | return (TRUE); | |
3358 | ||
3359 | if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp)) | |
3360 | return (TRUE); | |
3361 | ||
3362 | if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp)) | |
3363 | return (TRUE); | |
3364 | ||
39037602 A |
3365 | if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp)) |
3366 | return (TRUE); | |
3367 | ||
fe8ab488 A |
3368 | return (FALSE); |
3369 | } | |
39037602 A |
3370 | |
3371 | boolean_t | |
3372 | inp_restricted_send(struct inpcb *inp, struct ifnet *ifp) | |
3373 | { | |
3374 | boolean_t ret; | |
3375 | ||
3376 | ret = _inp_restricted_send(inp, ifp); | |
3377 | if (ret == TRUE && log_restricted) { | |
743345f9 A |
3378 | printf("pid %d (%s) is unable to transmit packets on %s\n", |
3379 | current_proc()->p_pid, proc_best_name(current_proc()), | |
3380 | ifp->if_xname); | |
39037602 A |
3381 | } |
3382 | return (ret); | |
3383 | } | |
3384 | ||
3385 | inline void | |
3386 | inp_count_sndbytes(struct inpcb *inp, u_int32_t th_ack) | |
3387 | { | |
3388 | struct ifnet *ifp = inp->inp_last_outifp; | |
3389 | struct socket *so = inp->inp_socket; | |
3390 | if (ifp != NULL && !(so->so_flags & SOF_MP_SUBFLOW) && | |
3391 | (ifp->if_type == IFT_CELLULAR || | |
3392 | ifp->if_subfamily == IFNET_SUBFAMILY_WIFI)) { | |
3393 | int32_t unsent; | |
3394 | ||
3395 | so->so_snd.sb_flags |= SB_SNDBYTE_CNT; | |
3396 | ||
3397 | /* | |
3398 | * There can be data outstanding before the connection | |
3399 | * becomes established -- TFO case | |
3400 | */ | |
3401 | if (so->so_snd.sb_cc > 0) | |
3402 | inp_incr_sndbytes_total(so, so->so_snd.sb_cc); | |
3403 | ||
3404 | unsent = inp_get_sndbytes_allunsent(so, th_ack); | |
3405 | if (unsent > 0) | |
3406 | inp_incr_sndbytes_unsent(so, unsent); | |
3407 | } | |
3408 | } | |
3409 | ||
3410 | inline void | |
3411 | inp_incr_sndbytes_total(struct socket *so, int32_t len) | |
3412 | { | |
3413 | struct inpcb *inp = (struct inpcb *)so->so_pcb; | |
3414 | struct ifnet *ifp = inp->inp_last_outifp; | |
3415 | ||
3416 | if (ifp != NULL) { | |
3417 | VERIFY(ifp->if_sndbyte_total >= 0); | |
3418 | OSAddAtomic64(len, &ifp->if_sndbyte_total); | |
3419 | } | |
3420 | } | |
3421 | ||
3422 | inline void | |
3423 | inp_decr_sndbytes_total(struct socket *so, int32_t len) | |
3424 | { | |
3425 | struct inpcb *inp = (struct inpcb *)so->so_pcb; | |
3426 | struct ifnet *ifp = inp->inp_last_outifp; | |
3427 | ||
3428 | if (ifp != NULL) { | |
3429 | VERIFY(ifp->if_sndbyte_total >= len); | |
3430 | OSAddAtomic64(-len, &ifp->if_sndbyte_total); | |
3431 | } | |
3432 | } | |
3433 | ||
3434 | inline void | |
3435 | inp_incr_sndbytes_unsent(struct socket *so, int32_t len) | |
3436 | { | |
3437 | struct inpcb *inp = (struct inpcb *)so->so_pcb; | |
3438 | struct ifnet *ifp = inp->inp_last_outifp; | |
3439 | ||
3440 | if (ifp != NULL) { | |
3441 | VERIFY(ifp->if_sndbyte_unsent >= 0); | |
3442 | OSAddAtomic64(len, &ifp->if_sndbyte_unsent); | |
3443 | } | |
3444 | } | |
3445 | ||
3446 | inline void | |
3447 | inp_decr_sndbytes_unsent(struct socket *so, int32_t len) | |
3448 | { | |
3449 | struct inpcb *inp = (struct inpcb *)so->so_pcb; | |
3450 | struct ifnet *ifp = inp->inp_last_outifp; | |
3451 | ||
3452 | if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT)) | |
3453 | return; | |
3454 | ||
3455 | if (ifp != NULL) { | |
3456 | if (ifp->if_sndbyte_unsent >= len) | |
3457 | OSAddAtomic64(-len, &ifp->if_sndbyte_unsent); | |
3458 | else | |
3459 | ifp->if_sndbyte_unsent = 0; | |
3460 | } | |
3461 | } | |
3462 | ||
3463 | inline void | |
3464 | inp_decr_sndbytes_allunsent(struct socket *so, u_int32_t th_ack) | |
3465 | { | |
3466 | int32_t len; | |
3467 | ||
3468 | if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT)) | |
3469 | return; | |
3470 | ||
3471 | len = inp_get_sndbytes_allunsent(so, th_ack); | |
3472 | inp_decr_sndbytes_unsent(so, len); | |
3473 | } | |
5ba3f43e A |
3474 | |
3475 | ||
3476 | inline void | |
3477 | inp_set_activity_bitmap(struct inpcb *inp) | |
3478 | { | |
3479 | in_stat_set_activity_bitmap(&inp->inp_nw_activity, net_uptime()); | |
3480 | } | |
3481 | ||
3482 | inline void | |
3483 | inp_get_activity_bitmap(struct inpcb *inp, activity_bitmap_t *ab) | |
3484 | { | |
3485 | bcopy(&inp->inp_nw_activity, ab, sizeof (*ab)); | |
3486 | } |