]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
fe8ab488 | 2 | * Copyright (c) 2000-2014 Apple Inc. All rights reserved. |
5d5c5d0d | 3 | * |
2d21ac55 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
39236c6e | 5 | * |
2d21ac55 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
39236c6e | 14 | * |
2d21ac55 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
39236c6e | 17 | * |
2d21ac55 A |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
8f6c56a5 A |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
2d21ac55 A |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
39236c6e | 25 | * |
2d21ac55 | 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
1c79356b A |
27 | */ |
28 | /* | |
29 | * Copyright (c) 1982, 1986, 1991, 1993, 1995 | |
30 | * The Regents of the University of California. All rights reserved. | |
31 | * | |
32 | * Redistribution and use in source and binary forms, with or without | |
33 | * modification, are permitted provided that the following conditions | |
34 | * are met: | |
35 | * 1. Redistributions of source code must retain the above copyright | |
36 | * notice, this list of conditions and the following disclaimer. | |
37 | * 2. Redistributions in binary form must reproduce the above copyright | |
38 | * notice, this list of conditions and the following disclaimer in the | |
39 | * documentation and/or other materials provided with the distribution. | |
40 | * 3. All advertising materials mentioning features or use of this software | |
41 | * must display the following acknowledgement: | |
42 | * This product includes software developed by the University of | |
43 | * California, Berkeley and its contributors. | |
44 | * 4. Neither the name of the University nor the names of its contributors | |
45 | * may be used to endorse or promote products derived from this software | |
46 | * without specific prior written permission. | |
47 | * | |
48 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
49 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
50 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
51 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
52 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
53 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
54 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
55 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
56 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
57 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
58 | * SUCH DAMAGE. | |
59 | * | |
60 | * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 | |
9bccf70c | 61 | * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $ |
1c79356b A |
62 | */ |
63 | ||
64 | #include <sys/param.h> | |
65 | #include <sys/systm.h> | |
66 | #include <sys/malloc.h> | |
67 | #include <sys/mbuf.h> | |
1c79356b | 68 | #include <sys/domain.h> |
1c79356b A |
69 | #include <sys/protosw.h> |
70 | #include <sys/socket.h> | |
71 | #include <sys/socketvar.h> | |
72 | #include <sys/proc.h> | |
73 | #include <sys/kernel.h> | |
74 | #include <sys/sysctl.h> | |
6d2010ae A |
75 | #include <sys/mcache.h> |
76 | #include <sys/kauth.h> | |
77 | #include <sys/priv.h> | |
39236c6e A |
78 | #include <sys/proc_uuid_policy.h> |
79 | #include <sys/syslog.h> | |
fe8ab488 | 80 | #include <sys/priv.h> |
39236c6e | 81 | |
91447636 | 82 | #include <libkern/OSAtomic.h> |
316670eb | 83 | #include <kern/locks.h> |
1c79356b A |
84 | |
85 | #include <machine/limits.h> | |
86 | ||
1c79356b | 87 | #include <kern/zalloc.h> |
1c79356b A |
88 | |
89 | #include <net/if.h> | |
1c79356b | 90 | #include <net/if_types.h> |
9bccf70c | 91 | #include <net/route.h> |
316670eb A |
92 | #include <net/flowhash.h> |
93 | #include <net/flowadv.h> | |
fe8ab488 | 94 | #include <net/ntstat.h> |
1c79356b A |
95 | |
96 | #include <netinet/in.h> | |
97 | #include <netinet/in_pcb.h> | |
98 | #include <netinet/in_var.h> | |
99 | #include <netinet/ip_var.h> | |
100 | #if INET6 | |
101 | #include <netinet/ip6.h> | |
102 | #include <netinet6/ip6_var.h> | |
103 | #endif /* INET6 */ | |
104 | ||
1c79356b | 105 | #include <sys/kdebug.h> |
b0d623f7 | 106 | #include <sys/random.h> |
39236c6e | 107 | |
316670eb | 108 | #include <dev/random/randomdev.h> |
39236c6e | 109 | #include <mach/boolean.h> |
1c79356b | 110 | |
fe8ab488 A |
111 | #if NECP |
112 | #include <net/necp.h> | |
9bccf70c | 113 | #endif |
1c79356b | 114 | |
39236c6e A |
115 | static lck_grp_t *inpcb_lock_grp; |
116 | static lck_attr_t *inpcb_lock_attr; | |
117 | static lck_grp_attr_t *inpcb_lock_grp_attr; | |
118 | decl_lck_mtx_data(static, inpcb_lock); /* global INPCB lock */ | |
119 | decl_lck_mtx_data(static, inpcb_timeout_lock); | |
120 | ||
121 | static TAILQ_HEAD(, inpcbinfo) inpcb_head = TAILQ_HEAD_INITIALIZER(inpcb_head); | |
122 | ||
123 | static u_int16_t inpcb_timeout_run = 0; /* INPCB timer is scheduled to run */ | |
124 | static boolean_t inpcb_garbage_collecting = FALSE; /* gc timer is scheduled */ | |
125 | static boolean_t inpcb_ticking = FALSE; /* "slow" timer is scheduled */ | |
126 | static boolean_t inpcb_fast_timer_on = FALSE; | |
fe8ab488 A |
127 | |
128 | /* | |
129 | * If the total number of gc reqs is above a threshold, schedule | |
130 | * garbage collect timer sooner | |
131 | */ | |
132 | static boolean_t inpcb_toomany_gcreq = FALSE; | |
133 | ||
134 | #define INPCB_GCREQ_THRESHOLD 50000 | |
135 | #define INPCB_TOOMANY_GCREQ_TIMER (hz/10) /* 10 times a second */ | |
136 | ||
39236c6e A |
137 | static void inpcb_sched_timeout(struct timeval *); |
138 | static void inpcb_timeout(void *); | |
139 | int inpcb_timeout_lazy = 10; /* 10 seconds leeway for lazy timers */ | |
140 | extern int tvtohz(struct timeval *); | |
141 | ||
142 | #if CONFIG_PROC_UUID_POLICY | |
143 | static void inp_update_cellular_policy(struct inpcb *, boolean_t); | |
fe8ab488 A |
144 | #if NECP |
145 | static void inp_update_necp_want_app_policy(struct inpcb *, boolean_t); | |
146 | #endif /* NECP */ | |
39236c6e A |
147 | #endif /* !CONFIG_PROC_UUID_POLICY */ |
148 | ||
39236c6e A |
149 | #define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8)) |
150 | #define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1)) | |
1c79356b | 151 | |
1c79356b A |
152 | /* |
153 | * These configure the range of local port addresses assigned to | |
154 | * "unspecified" outgoing connections/packets/whatever. | |
155 | */ | |
9bccf70c A |
156 | int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */ |
157 | int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */ | |
39236c6e A |
158 | int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ |
159 | int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */ | |
9bccf70c A |
160 | int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ |
161 | int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */ | |
1c79356b | 162 | |
39236c6e | 163 | #define RANGECHK(var, min, max) \ |
1c79356b A |
164 | if ((var) < (min)) { (var) = (min); } \ |
165 | else if ((var) > (max)) { (var) = (max); } | |
166 | ||
1c79356b A |
167 | static int |
168 | sysctl_net_ipport_check SYSCTL_HANDLER_ARGS | |
169 | { | |
2d21ac55 | 170 | #pragma unused(arg1, arg2) |
39236c6e A |
171 | int error; |
172 | ||
173 | error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); | |
1c79356b A |
174 | if (!error) { |
175 | RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); | |
176 | RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1); | |
177 | RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX); | |
178 | RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX); | |
179 | RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX); | |
180 | RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX); | |
181 | } | |
39236c6e | 182 | return (error); |
1c79356b A |
183 | } |
184 | ||
185 | #undef RANGECHK | |
186 | ||
39236c6e A |
187 | SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, |
188 | CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IP Ports"); | |
189 | ||
190 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, | |
191 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, | |
192 | &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", ""); | |
193 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, | |
194 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, | |
195 | &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", ""); | |
196 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, | |
197 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, | |
198 | &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", ""); | |
199 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, | |
200 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, | |
201 | &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", ""); | |
202 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, | |
203 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, | |
204 | &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", ""); | |
205 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, | |
206 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, | |
207 | &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", ""); | |
1c79356b | 208 | |
b0d623f7 A |
209 | extern int udp_use_randomport; |
210 | extern int tcp_use_randomport; | |
211 | ||
316670eb A |
212 | /* Structs used for flowhash computation */ |
213 | struct inp_flowhash_key_addr { | |
214 | union { | |
215 | struct in_addr v4; | |
216 | struct in6_addr v6; | |
217 | u_int8_t addr8[16]; | |
218 | u_int16_t addr16[8]; | |
219 | u_int32_t addr32[4]; | |
220 | } infha; | |
221 | }; | |
222 | ||
223 | struct inp_flowhash_key { | |
39236c6e | 224 | struct inp_flowhash_key_addr infh_laddr; |
316670eb A |
225 | struct inp_flowhash_key_addr infh_faddr; |
226 | u_int32_t infh_lport; | |
227 | u_int32_t infh_fport; | |
228 | u_int32_t infh_af; | |
229 | u_int32_t infh_proto; | |
230 | u_int32_t infh_rand1; | |
231 | u_int32_t infh_rand2; | |
232 | }; | |
233 | ||
39236c6e A |
234 | static u_int32_t inp_hash_seed = 0; |
235 | ||
236 | static int infc_cmp(const struct inpcb *, const struct inpcb *); | |
237 | ||
238 | /* Flags used by inp_fc_getinp */ | |
239 | #define INPFC_SOLOCKED 0x1 | |
240 | #define INPFC_REMOVE 0x2 | |
241 | static struct inpcb *inp_fc_getinp(u_int32_t, u_int32_t); | |
242 | ||
243 | static void inp_fc_feedback(struct inpcb *); | |
244 | extern void tcp_remove_from_time_wait(struct inpcb *inp); | |
316670eb | 245 | |
39236c6e | 246 | decl_lck_mtx_data(static, inp_fc_lck); |
316670eb | 247 | |
bd504ef0 A |
248 | RB_HEAD(inp_fc_tree, inpcb) inp_fc_tree; |
249 | RB_PROTOTYPE(inp_fc_tree, inpcb, infc_link, infc_cmp); | |
250 | RB_GENERATE(inp_fc_tree, inpcb, infc_link, infc_cmp); | |
316670eb | 251 | |
bd504ef0 A |
252 | /* |
253 | * Use this inp as a key to find an inp in the flowhash tree. | |
254 | * Accesses to it are protected by inp_fc_lck. | |
255 | */ | |
256 | struct inpcb key_inp; | |
316670eb | 257 | |
1c79356b A |
258 | /* |
259 | * in_pcb.c: manage the Protocol Control Blocks. | |
1c79356b A |
260 | */ |
261 | ||
316670eb | 262 | void |
39236c6e | 263 | in_pcbinit(void) |
316670eb | 264 | { |
39236c6e | 265 | static int inpcb_initialized = 0; |
316670eb | 266 | |
39236c6e A |
267 | VERIFY(!inpcb_initialized); |
268 | inpcb_initialized = 1; | |
316670eb | 269 | |
39236c6e A |
270 | inpcb_lock_grp_attr = lck_grp_attr_alloc_init(); |
271 | inpcb_lock_grp = lck_grp_alloc_init("inpcb", inpcb_lock_grp_attr); | |
272 | inpcb_lock_attr = lck_attr_alloc_init(); | |
273 | lck_mtx_init(&inpcb_lock, inpcb_lock_grp, inpcb_lock_attr); | |
274 | lck_mtx_init(&inpcb_timeout_lock, inpcb_lock_grp, inpcb_lock_attr); | |
275 | ||
276 | /* | |
277 | * Initialize data structures required to deliver | |
278 | * flow advisories. | |
279 | */ | |
280 | lck_mtx_init(&inp_fc_lck, inpcb_lock_grp, inpcb_lock_attr); | |
bd504ef0 | 281 | lck_mtx_lock(&inp_fc_lck); |
316670eb | 282 | RB_INIT(&inp_fc_tree); |
bd504ef0 A |
283 | bzero(&key_inp, sizeof(key_inp)); |
284 | lck_mtx_unlock(&inp_fc_lck); | |
316670eb A |
285 | } |
286 | ||
39236c6e A |
287 | #define INPCB_HAVE_TIMER_REQ(req) (((req).intimer_lazy > 0) || \ |
288 | ((req).intimer_fast > 0) || ((req).intimer_nodelay > 0)) | |
289 | static void | |
290 | inpcb_timeout(void *arg) | |
291 | { | |
292 | #pragma unused(arg) | |
293 | struct inpcbinfo *ipi; | |
294 | boolean_t t, gc; | |
295 | struct intimercount gccnt, tmcnt; | |
296 | struct timeval leeway; | |
fe8ab488 A |
297 | boolean_t toomany_gc = FALSE; |
298 | ||
299 | if (arg != NULL) { | |
300 | VERIFY(arg == &inpcb_toomany_gcreq); | |
301 | toomany_gc = *(boolean_t *)arg; | |
302 | } | |
39236c6e A |
303 | |
304 | /* | |
305 | * Update coarse-grained networking timestamp (in sec.); the idea | |
306 | * is to piggy-back on the timeout callout to update the counter | |
307 | * returnable via net_uptime(). | |
308 | */ | |
309 | net_update_uptime(); | |
310 | ||
fe8ab488 A |
311 | bzero(&gccnt, sizeof(gccnt)); |
312 | bzero(&tmcnt, sizeof(tmcnt)); | |
313 | ||
39236c6e A |
314 | lck_mtx_lock_spin(&inpcb_timeout_lock); |
315 | gc = inpcb_garbage_collecting; | |
316 | inpcb_garbage_collecting = FALSE; | |
39236c6e A |
317 | |
318 | t = inpcb_ticking; | |
319 | inpcb_ticking = FALSE; | |
320 | ||
321 | if (gc || t) { | |
322 | lck_mtx_unlock(&inpcb_timeout_lock); | |
323 | ||
324 | lck_mtx_lock(&inpcb_lock); | |
325 | TAILQ_FOREACH(ipi, &inpcb_head, ipi_entry) { | |
326 | if (INPCB_HAVE_TIMER_REQ(ipi->ipi_gc_req)) { | |
327 | bzero(&ipi->ipi_gc_req, | |
328 | sizeof(ipi->ipi_gc_req)); | |
329 | if (gc && ipi->ipi_gc != NULL) { | |
330 | ipi->ipi_gc(ipi); | |
331 | gccnt.intimer_lazy += | |
332 | ipi->ipi_gc_req.intimer_lazy; | |
333 | gccnt.intimer_fast += | |
334 | ipi->ipi_gc_req.intimer_fast; | |
335 | gccnt.intimer_nodelay += | |
336 | ipi->ipi_gc_req.intimer_nodelay; | |
337 | } | |
338 | } | |
339 | if (INPCB_HAVE_TIMER_REQ(ipi->ipi_timer_req)) { | |
340 | bzero(&ipi->ipi_timer_req, | |
341 | sizeof(ipi->ipi_timer_req)); | |
342 | if (t && ipi->ipi_timer != NULL) { | |
343 | ipi->ipi_timer(ipi); | |
344 | tmcnt.intimer_lazy += | |
345 | ipi->ipi_timer_req.intimer_lazy; | |
346 | tmcnt.intimer_lazy += | |
347 | ipi->ipi_timer_req.intimer_fast; | |
348 | tmcnt.intimer_nodelay += | |
349 | ipi->ipi_timer_req.intimer_nodelay; | |
350 | } | |
351 | } | |
352 | } | |
353 | lck_mtx_unlock(&inpcb_lock); | |
354 | lck_mtx_lock_spin(&inpcb_timeout_lock); | |
355 | } | |
356 | ||
357 | /* lock was dropped above, so check first before overriding */ | |
358 | if (!inpcb_garbage_collecting) | |
359 | inpcb_garbage_collecting = INPCB_HAVE_TIMER_REQ(gccnt); | |
360 | if (!inpcb_ticking) | |
361 | inpcb_ticking = INPCB_HAVE_TIMER_REQ(tmcnt); | |
362 | ||
363 | /* re-arm the timer if there's work to do */ | |
fe8ab488 A |
364 | if (toomany_gc) { |
365 | inpcb_toomany_gcreq = FALSE; | |
366 | } else { | |
367 | inpcb_timeout_run--; | |
368 | VERIFY(inpcb_timeout_run >= 0 && inpcb_timeout_run < 2); | |
369 | } | |
39236c6e A |
370 | |
371 | bzero(&leeway, sizeof(leeway)); | |
372 | leeway.tv_sec = inpcb_timeout_lazy; | |
373 | if (gccnt.intimer_nodelay > 0 || tmcnt.intimer_nodelay > 0) | |
374 | inpcb_sched_timeout(NULL); | |
375 | else if ((gccnt.intimer_fast + tmcnt.intimer_fast) <= 5) | |
376 | /* be lazy when idle with little activity */ | |
377 | inpcb_sched_timeout(&leeway); | |
378 | else | |
379 | inpcb_sched_timeout(NULL); | |
380 | ||
381 | lck_mtx_unlock(&inpcb_timeout_lock); | |
382 | } | |
383 | ||
384 | static void | |
385 | inpcb_sched_timeout(struct timeval *leeway) | |
386 | { | |
387 | lck_mtx_assert(&inpcb_timeout_lock, LCK_MTX_ASSERT_OWNED); | |
388 | ||
389 | if (inpcb_timeout_run == 0 && | |
390 | (inpcb_garbage_collecting || inpcb_ticking)) { | |
391 | lck_mtx_convert_spin(&inpcb_timeout_lock); | |
392 | inpcb_timeout_run++; | |
393 | if (leeway == NULL) { | |
394 | inpcb_fast_timer_on = TRUE; | |
395 | timeout(inpcb_timeout, NULL, hz); | |
396 | } else { | |
397 | inpcb_fast_timer_on = FALSE; | |
398 | timeout_with_leeway(inpcb_timeout, NULL, hz, | |
399 | tvtohz(leeway)); | |
400 | } | |
401 | } else if (inpcb_timeout_run == 1 && | |
402 | leeway == NULL && !inpcb_fast_timer_on) { | |
403 | /* | |
404 | * Since the request was for a fast timer but the | |
405 | * scheduled timer is a lazy timer, try to schedule | |
406 | * another instance of fast timer also | |
407 | */ | |
408 | lck_mtx_convert_spin(&inpcb_timeout_lock); | |
409 | inpcb_timeout_run++; | |
410 | inpcb_fast_timer_on = TRUE; | |
411 | timeout(inpcb_timeout, NULL, hz); | |
412 | } | |
413 | } | |
414 | ||
415 | void | |
416 | inpcb_gc_sched(struct inpcbinfo *ipi, u_int32_t type) | |
417 | { | |
418 | struct timeval leeway; | |
fe8ab488 | 419 | u_int32_t gccnt; |
39236c6e A |
420 | lck_mtx_lock_spin(&inpcb_timeout_lock); |
421 | inpcb_garbage_collecting = TRUE; | |
fe8ab488 A |
422 | |
423 | gccnt = ipi->ipi_gc_req.intimer_nodelay + | |
424 | ipi->ipi_gc_req.intimer_fast; | |
425 | ||
426 | if (gccnt > INPCB_GCREQ_THRESHOLD && !inpcb_toomany_gcreq) { | |
427 | inpcb_toomany_gcreq = TRUE; | |
428 | ||
429 | /* | |
430 | * There are toomany pcbs waiting to be garbage collected, | |
431 | * schedule a much faster timeout in addition to | |
432 | * the caller's request | |
433 | */ | |
434 | lck_mtx_convert_spin(&inpcb_timeout_lock); | |
435 | timeout(inpcb_timeout, (void *)&inpcb_toomany_gcreq, | |
436 | INPCB_TOOMANY_GCREQ_TIMER); | |
437 | } | |
438 | ||
39236c6e A |
439 | switch (type) { |
440 | case INPCB_TIMER_NODELAY: | |
441 | atomic_add_32(&ipi->ipi_gc_req.intimer_nodelay, 1); | |
442 | inpcb_sched_timeout(NULL); | |
443 | break; | |
444 | case INPCB_TIMER_FAST: | |
445 | atomic_add_32(&ipi->ipi_gc_req.intimer_fast, 1); | |
446 | inpcb_sched_timeout(NULL); | |
447 | break; | |
448 | default: | |
449 | atomic_add_32(&ipi->ipi_gc_req.intimer_lazy, 1); | |
450 | leeway.tv_sec = inpcb_timeout_lazy; | |
451 | leeway.tv_usec = 0; | |
452 | inpcb_sched_timeout(&leeway); | |
453 | break; | |
454 | } | |
455 | lck_mtx_unlock(&inpcb_timeout_lock); | |
456 | } | |
457 | ||
458 | void | |
459 | inpcb_timer_sched(struct inpcbinfo *ipi, u_int32_t type) | |
460 | { | |
461 | struct timeval leeway; | |
462 | lck_mtx_lock_spin(&inpcb_timeout_lock); | |
463 | inpcb_ticking = TRUE; | |
464 | switch (type) { | |
465 | case INPCB_TIMER_NODELAY: | |
466 | atomic_add_32(&ipi->ipi_timer_req.intimer_nodelay, 1); | |
467 | inpcb_sched_timeout(NULL); | |
468 | break; | |
469 | case INPCB_TIMER_FAST: | |
470 | atomic_add_32(&ipi->ipi_timer_req.intimer_fast, 1); | |
471 | inpcb_sched_timeout(NULL); | |
472 | break; | |
473 | default: | |
474 | atomic_add_32(&ipi->ipi_timer_req.intimer_lazy, 1); | |
475 | leeway.tv_sec = inpcb_timeout_lazy; | |
476 | leeway.tv_usec = 0; | |
477 | inpcb_sched_timeout(&leeway); | |
478 | break; | |
479 | } | |
480 | lck_mtx_unlock(&inpcb_timeout_lock); | |
481 | } | |
482 | ||
483 | void | |
484 | in_pcbinfo_attach(struct inpcbinfo *ipi) | |
485 | { | |
486 | struct inpcbinfo *ipi0; | |
487 | ||
488 | lck_mtx_lock(&inpcb_lock); | |
489 | TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) { | |
490 | if (ipi0 == ipi) { | |
491 | panic("%s: ipi %p already in the list\n", | |
492 | __func__, ipi); | |
493 | /* NOTREACHED */ | |
494 | } | |
495 | } | |
496 | TAILQ_INSERT_TAIL(&inpcb_head, ipi, ipi_entry); | |
497 | lck_mtx_unlock(&inpcb_lock); | |
498 | } | |
499 | ||
500 | int | |
501 | in_pcbinfo_detach(struct inpcbinfo *ipi) | |
502 | { | |
503 | struct inpcbinfo *ipi0; | |
504 | int error = 0; | |
505 | ||
506 | lck_mtx_lock(&inpcb_lock); | |
507 | TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) { | |
508 | if (ipi0 == ipi) | |
509 | break; | |
510 | } | |
511 | if (ipi0 != NULL) | |
512 | TAILQ_REMOVE(&inpcb_head, ipi0, ipi_entry); | |
513 | else | |
514 | error = ENXIO; | |
515 | lck_mtx_unlock(&inpcb_lock); | |
516 | ||
517 | return (error); | |
518 | } | |
519 | ||
1c79356b A |
520 | /* |
521 | * Allocate a PCB and associate it with the socket. | |
2d21ac55 A |
522 | * |
523 | * Returns: 0 Success | |
524 | * ENOBUFS | |
525 | * ENOMEM | |
1c79356b A |
526 | */ |
527 | int | |
39236c6e | 528 | in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p) |
1c79356b | 529 | { |
39236c6e | 530 | #pragma unused(p) |
2d21ac55 | 531 | struct inpcb *inp; |
39236c6e | 532 | caddr_t temp; |
2d21ac55 A |
533 | #if CONFIG_MACF_NET |
534 | int mac_error; | |
39236c6e | 535 | #endif /* CONFIG_MACF_NET */ |
1c79356b | 536 | |
39236c6e A |
537 | if (!so->cached_in_sock_layer) { |
538 | inp = (struct inpcb *)zalloc(pcbinfo->ipi_zone); | |
539 | if (inp == NULL) | |
540 | return (ENOBUFS); | |
541 | bzero((caddr_t)inp, sizeof (*inp)); | |
542 | } else { | |
543 | inp = (struct inpcb *)(void *)so->so_saved_pcb; | |
544 | temp = inp->inp_saved_ppcb; | |
545 | bzero((caddr_t)inp, sizeof (*inp)); | |
546 | inp->inp_saved_ppcb = temp; | |
1c79356b A |
547 | } |
548 | ||
549 | inp->inp_gencnt = ++pcbinfo->ipi_gencnt; | |
550 | inp->inp_pcbinfo = pcbinfo; | |
551 | inp->inp_socket = so; | |
2d21ac55 A |
552 | #if CONFIG_MACF_NET |
553 | mac_error = mac_inpcb_label_init(inp, M_WAITOK); | |
554 | if (mac_error != 0) { | |
39236c6e | 555 | if (!so->cached_in_sock_layer) |
2d21ac55 A |
556 | zfree(pcbinfo->ipi_zone, inp); |
557 | return (mac_error); | |
558 | } | |
559 | mac_inpcb_label_associate(so, inp); | |
39236c6e A |
560 | #endif /* CONFIG_MACF_NET */ |
561 | /* make sure inp_stat is always 64-bit aligned */ | |
562 | inp->inp_stat = (struct inp_stat *)P2ROUNDUP(inp->inp_stat_store, | |
563 | sizeof (u_int64_t)); | |
564 | if (((uintptr_t)inp->inp_stat - (uintptr_t)inp->inp_stat_store) + | |
565 | sizeof (*inp->inp_stat) > sizeof (inp->inp_stat_store)) { | |
566 | panic("%s: insufficient space to align inp_stat", __func__); | |
567 | /* NOTREACHED */ | |
568 | } | |
569 | ||
570 | /* make sure inp_cstat is always 64-bit aligned */ | |
571 | inp->inp_cstat = (struct inp_stat *)P2ROUNDUP(inp->inp_cstat_store, | |
572 | sizeof (u_int64_t)); | |
573 | if (((uintptr_t)inp->inp_cstat - (uintptr_t)inp->inp_cstat_store) + | |
574 | sizeof (*inp->inp_cstat) > sizeof (inp->inp_cstat_store)) { | |
575 | panic("%s: insufficient space to align inp_cstat", __func__); | |
576 | /* NOTREACHED */ | |
577 | } | |
578 | ||
579 | /* make sure inp_wstat is always 64-bit aligned */ | |
580 | inp->inp_wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_wstat_store, | |
581 | sizeof (u_int64_t)); | |
582 | if (((uintptr_t)inp->inp_wstat - (uintptr_t)inp->inp_wstat_store) + | |
583 | sizeof (*inp->inp_wstat) > sizeof (inp->inp_wstat_store)) { | |
584 | panic("%s: insufficient space to align inp_wstat", __func__); | |
585 | /* NOTREACHED */ | |
6d2010ae A |
586 | } |
587 | ||
fe8ab488 A |
588 | /* make sure inp_Wstat is always 64-bit aligned */ |
589 | inp->inp_Wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_Wstat_store, | |
590 | sizeof (u_int64_t)); | |
591 | if (((uintptr_t)inp->inp_Wstat - (uintptr_t)inp->inp_Wstat_store) + | |
592 | sizeof (*inp->inp_Wstat) > sizeof (inp->inp_Wstat_store)) { | |
593 | panic("%s: insufficient space to align inp_Wstat", __func__); | |
594 | /* NOTREACHED */ | |
595 | } | |
596 | ||
91447636 A |
597 | so->so_pcb = (caddr_t)inp; |
598 | ||
599 | if (so->so_proto->pr_flags & PR_PCBLOCK) { | |
39236c6e A |
600 | lck_mtx_init(&inp->inpcb_mtx, pcbinfo->ipi_lock_grp, |
601 | pcbinfo->ipi_lock_attr); | |
91447636 A |
602 | } |
603 | ||
2d21ac55 | 604 | #if INET6 |
39236c6e | 605 | if (SOCK_DOM(so) == PF_INET6 && !ip6_mapped_addr_on) |
9bccf70c | 606 | inp->inp_flags |= IN6P_IPV6_V6ONLY; |
39236c6e | 607 | |
9bccf70c A |
608 | if (ip6_auto_flowlabel) |
609 | inp->inp_flags |= IN6P_AUTOFLOWLABEL; | |
39236c6e A |
610 | #endif /* INET6 */ |
611 | ||
612 | (void) inp_update_policy(inp); | |
613 | ||
614 | lck_rw_lock_exclusive(pcbinfo->ipi_lock); | |
91447636 | 615 | inp->inp_gencnt = ++pcbinfo->ipi_gencnt; |
39236c6e | 616 | LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list); |
91447636 | 617 | pcbinfo->ipi_count++; |
39236c6e | 618 | lck_rw_done(pcbinfo->ipi_lock); |
1c79356b A |
619 | return (0); |
620 | } | |
621 | ||
2d21ac55 | 622 | /* |
39236c6e A |
623 | * in_pcblookup_local_and_cleanup does everything |
624 | * in_pcblookup_local does but it checks for a socket | |
625 | * that's going away. Since we know that the lock is | |
626 | * held read+write when this funciton is called, we | |
627 | * can safely dispose of this socket like the slow | |
628 | * timer would usually do and return NULL. This is | |
629 | * great for bind. | |
630 | */ | |
631 | struct inpcb * | |
632 | in_pcblookup_local_and_cleanup(struct inpcbinfo *pcbinfo, struct in_addr laddr, | |
633 | u_int lport_arg, int wild_okay) | |
2d21ac55 A |
634 | { |
635 | struct inpcb *inp; | |
39236c6e | 636 | |
2d21ac55 A |
637 | /* Perform normal lookup */ |
638 | inp = in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay); | |
39236c6e | 639 | |
2d21ac55 | 640 | /* Check if we found a match but it's waiting to be disposed */ |
39236c6e | 641 | if (inp != NULL && inp->inp_wantcnt == WNT_STOPUSING) { |
2d21ac55 | 642 | struct socket *so = inp->inp_socket; |
39236c6e | 643 | |
6d2010ae | 644 | lck_mtx_lock(&inp->inpcb_mtx); |
39236c6e | 645 | |
2d21ac55 | 646 | if (so->so_usecount == 0) { |
b0d623f7 A |
647 | if (inp->inp_state != INPCB_STATE_DEAD) |
648 | in_pcbdetach(inp); | |
39236c6e | 649 | in_pcbdispose(inp); /* will unlock & destroy */ |
2d21ac55 | 650 | inp = NULL; |
39236c6e | 651 | } else { |
6d2010ae | 652 | lck_mtx_unlock(&inp->inpcb_mtx); |
2d21ac55 A |
653 | } |
654 | } | |
39236c6e A |
655 | |
656 | return (inp); | |
2d21ac55 A |
657 | } |
658 | ||
c910b4d9 | 659 | static void |
2d21ac55 A |
660 | in_pcb_conflict_post_msg(u_int16_t port) |
661 | { | |
39236c6e A |
662 | /* |
663 | * Radar 5523020 send a kernel event notification if a | |
664 | * non-participating socket tries to bind the port a socket | |
665 | * who has set SOF_NOTIFYCONFLICT owns. | |
2d21ac55 | 666 | */ |
39236c6e | 667 | struct kev_msg ev_msg; |
2d21ac55 A |
668 | struct kev_in_portinuse in_portinuse; |
669 | ||
39236c6e A |
670 | bzero(&in_portinuse, sizeof (struct kev_in_portinuse)); |
671 | bzero(&ev_msg, sizeof (struct kev_msg)); | |
2d21ac55 A |
672 | in_portinuse.port = ntohs(port); /* port in host order */ |
673 | in_portinuse.req_pid = proc_selfpid(); | |
674 | ev_msg.vendor_code = KEV_VENDOR_APPLE; | |
675 | ev_msg.kev_class = KEV_NETWORK_CLASS; | |
676 | ev_msg.kev_subclass = KEV_INET_SUBCLASS; | |
677 | ev_msg.event_code = KEV_INET_PORTINUSE; | |
678 | ev_msg.dv[0].data_ptr = &in_portinuse; | |
39236c6e | 679 | ev_msg.dv[0].data_length = sizeof (struct kev_in_portinuse); |
2d21ac55 A |
680 | ev_msg.dv[1].data_length = 0; |
681 | kev_post_msg(&ev_msg); | |
682 | } | |
39236c6e | 683 | |
2d21ac55 | 684 | /* |
39236c6e A |
685 | * Bind an INPCB to an address and/or port. This routine should not alter |
686 | * the caller-supplied local address "nam". | |
687 | * | |
2d21ac55 A |
688 | * Returns: 0 Success |
689 | * EADDRNOTAVAIL Address not available. | |
690 | * EINVAL Invalid argument | |
691 | * EAFNOSUPPORT Address family not supported [notdef] | |
692 | * EACCES Permission denied | |
693 | * EADDRINUSE Address in use | |
694 | * EAGAIN Resource unavailable, try again | |
6d2010ae | 695 | * priv_check_cred:EPERM Operation not permitted |
2d21ac55 | 696 | */ |
1c79356b | 697 | int |
2d21ac55 | 698 | in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) |
1c79356b | 699 | { |
2d21ac55 | 700 | struct socket *so = inp->inp_socket; |
9bccf70c | 701 | unsigned short *lastport; |
1c79356b | 702 | struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; |
b0d623f7 | 703 | u_short lport = 0, rand_port = 0; |
1c79356b | 704 | int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); |
b0d623f7 | 705 | int error, randomport, conflict = 0; |
fe8ab488 | 706 | boolean_t anonport = FALSE; |
6d2010ae | 707 | kauth_cred_t cred; |
fe8ab488 A |
708 | struct in_addr laddr; |
709 | struct ifnet *outif = NULL; | |
1c79356b A |
710 | |
711 | if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */ | |
712 | return (EADDRNOTAVAIL); | |
39236c6e | 713 | if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) |
1c79356b | 714 | return (EINVAL); |
39236c6e | 715 | if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT))) |
1c79356b | 716 | wild = 1; |
91447636 | 717 | socket_unlock(so, 0); /* keep reference on socket */ |
39236c6e | 718 | lck_rw_lock_exclusive(pcbinfo->ipi_lock); |
fe8ab488 A |
719 | |
720 | bzero(&laddr, sizeof(laddr)); | |
721 | ||
39236c6e | 722 | if (nam != NULL) { |
6d2010ae | 723 | |
39236c6e A |
724 | if (nam->sa_len != sizeof (struct sockaddr_in)) { |
725 | lck_rw_done(pcbinfo->ipi_lock); | |
91447636 | 726 | socket_lock(so, 0); |
1c79356b | 727 | return (EINVAL); |
91447636 | 728 | } |
39236c6e | 729 | #if 0 |
1c79356b A |
730 | /* |
731 | * We should check the family, but old programs | |
732 | * incorrectly fail to initialize it. | |
733 | */ | |
39236c6e A |
734 | if (nam->sa_family != AF_INET) { |
735 | lck_rw_done(pcbinfo->ipi_lock); | |
91447636 | 736 | socket_lock(so, 0); |
1c79356b | 737 | return (EAFNOSUPPORT); |
91447636 | 738 | } |
39236c6e A |
739 | #endif /* 0 */ |
740 | lport = SIN(nam)->sin_port; | |
741 | ||
742 | if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr))) { | |
1c79356b A |
743 | /* |
744 | * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; | |
745 | * allow complete duplication of binding if | |
746 | * SO_REUSEPORT is set, or if SO_REUSEADDR is set | |
747 | * and a multicast address is bound on both | |
748 | * new and duplicated sockets. | |
749 | */ | |
750 | if (so->so_options & SO_REUSEADDR) | |
751 | reuseport = SO_REUSEADDR|SO_REUSEPORT; | |
39236c6e A |
752 | } else if (SIN(nam)->sin_addr.s_addr != INADDR_ANY) { |
753 | struct sockaddr_in sin; | |
91447636 | 754 | struct ifaddr *ifa; |
39236c6e A |
755 | |
756 | /* Sanitized for interface address searches */ | |
757 | bzero(&sin, sizeof (sin)); | |
758 | sin.sin_family = AF_INET; | |
759 | sin.sin_len = sizeof (struct sockaddr_in); | |
760 | sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr; | |
761 | ||
762 | ifa = ifa_ifwithaddr(SA(&sin)); | |
763 | if (ifa == NULL) { | |
764 | lck_rw_done(pcbinfo->ipi_lock); | |
91447636 | 765 | socket_lock(so, 0); |
1c79356b | 766 | return (EADDRNOTAVAIL); |
39236c6e A |
767 | } else { |
768 | /* | |
769 | * Opportunistically determine the outbound | |
770 | * interface that may be used; this may not | |
771 | * hold true if we end up using a route | |
772 | * going over a different interface, e.g. | |
773 | * when sending to a local address. This | |
774 | * will get updated again after sending. | |
775 | */ | |
6d2010ae | 776 | IFA_LOCK(ifa); |
316670eb | 777 | outif = ifa->ifa_ifp; |
6d2010ae A |
778 | IFA_UNLOCK(ifa); |
779 | IFA_REMREF(ifa); | |
91447636 | 780 | } |
1c79356b | 781 | } |
39236c6e | 782 | if (lport != 0) { |
1c79356b | 783 | struct inpcb *t; |
39236c6e | 784 | uid_t u; |
1c79356b | 785 | |
6d2010ae A |
786 | if (ntohs(lport) < IPPORT_RESERVED) { |
787 | cred = kauth_cred_proc_ref(p); | |
39236c6e A |
788 | error = priv_check_cred(cred, |
789 | PRIV_NETINET_RESERVEDPORT, 0); | |
6d2010ae A |
790 | kauth_cred_unref(&cred); |
791 | if (error != 0) { | |
39236c6e | 792 | lck_rw_done(pcbinfo->ipi_lock); |
6d2010ae A |
793 | socket_lock(so, 0); |
794 | return (EACCES); | |
795 | } | |
91447636 | 796 | } |
39236c6e A |
797 | if (!IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) && |
798 | (u = kauth_cred_getuid(so->so_cred)) != 0 && | |
799 | (t = in_pcblookup_local_and_cleanup( | |
800 | inp->inp_pcbinfo, SIN(nam)->sin_addr, lport, | |
801 | INPLOOKUP_WILDCARD)) != NULL && | |
802 | (SIN(nam)->sin_addr.s_addr != INADDR_ANY || | |
803 | t->inp_laddr.s_addr != INADDR_ANY || | |
804 | !(t->inp_socket->so_options & SO_REUSEPORT)) && | |
805 | (u != kauth_cred_getuid(t->inp_socket->so_cred)) && | |
806 | !(t->inp_socket->so_flags & SOF_REUSESHAREUID) && | |
807 | (SIN(nam)->sin_addr.s_addr != INADDR_ANY || | |
808 | t->inp_laddr.s_addr != INADDR_ANY)) { | |
809 | if ((t->inp_socket->so_flags & | |
810 | SOF_NOTIFYCONFLICT) && | |
811 | !(so->so_flags & SOF_NOTIFYCONFLICT)) | |
812 | conflict = 1; | |
813 | ||
814 | lck_rw_done(pcbinfo->ipi_lock); | |
815 | ||
816 | if (conflict) | |
817 | in_pcb_conflict_post_msg(lport); | |
2d21ac55 | 818 | |
39236c6e A |
819 | socket_lock(so, 0); |
820 | return (EADDRINUSE); | |
1c79356b | 821 | } |
39236c6e A |
822 | t = in_pcblookup_local_and_cleanup(pcbinfo, |
823 | SIN(nam)->sin_addr, lport, wild); | |
824 | if (t != NULL && | |
1c79356b A |
825 | (reuseport & t->inp_socket->so_options) == 0) { |
826 | #if INET6 | |
39236c6e A |
827 | if (SIN(nam)->sin_addr.s_addr != INADDR_ANY || |
828 | t->inp_laddr.s_addr != INADDR_ANY || | |
829 | SOCK_DOM(so) != PF_INET6 || | |
830 | SOCK_DOM(t->inp_socket) != PF_INET6) | |
2d21ac55 A |
831 | #endif /* INET6 */ |
832 | { | |
2d21ac55 | 833 | |
39236c6e A |
834 | if ((t->inp_socket->so_flags & |
835 | SOF_NOTIFYCONFLICT) && | |
836 | !(so->so_flags & SOF_NOTIFYCONFLICT)) | |
2d21ac55 A |
837 | conflict = 1; |
838 | ||
39236c6e | 839 | lck_rw_done(pcbinfo->ipi_lock); |
2d21ac55 A |
840 | |
841 | if (conflict) | |
842 | in_pcb_conflict_post_msg(lport); | |
91447636 A |
843 | socket_lock(so, 0); |
844 | return (EADDRINUSE); | |
845 | } | |
1c79356b A |
846 | } |
847 | } | |
fe8ab488 | 848 | laddr = SIN(nam)->sin_addr; |
1c79356b A |
849 | } |
850 | if (lport == 0) { | |
851 | u_short first, last; | |
852 | int count; | |
853 | ||
39236c6e A |
854 | randomport = (so->so_flags & SOF_BINDRANDOMPORT) || |
855 | (so->so_type == SOCK_STREAM ? tcp_use_randomport : | |
856 | udp_use_randomport); | |
857 | ||
858 | /* | |
fe8ab488 A |
859 | * Even though this looks similar to the code in |
860 | * in6_pcbsetport, the v6 vs v4 checks are different. | |
39236c6e | 861 | */ |
fe8ab488 | 862 | anonport = TRUE; |
1c79356b A |
863 | if (inp->inp_flags & INP_HIGHPORT) { |
864 | first = ipport_hifirstauto; /* sysctl */ | |
865 | last = ipport_hilastauto; | |
39236c6e | 866 | lastport = &pcbinfo->ipi_lasthi; |
1c79356b | 867 | } else if (inp->inp_flags & INP_LOWPORT) { |
6d2010ae | 868 | cred = kauth_cred_proc_ref(p); |
39236c6e A |
869 | error = priv_check_cred(cred, |
870 | PRIV_NETINET_RESERVEDPORT, 0); | |
6d2010ae A |
871 | kauth_cred_unref(&cred); |
872 | if (error != 0) { | |
39236c6e | 873 | lck_rw_done(pcbinfo->ipi_lock); |
91447636 | 874 | socket_lock(so, 0); |
39236c6e | 875 | return (error); |
91447636 | 876 | } |
1c79356b A |
877 | first = ipport_lowfirstauto; /* 1023 */ |
878 | last = ipport_lowlastauto; /* 600 */ | |
39236c6e | 879 | lastport = &pcbinfo->ipi_lastlow; |
1c79356b A |
880 | } else { |
881 | first = ipport_firstauto; /* sysctl */ | |
882 | last = ipport_lastauto; | |
39236c6e | 883 | lastport = &pcbinfo->ipi_lastport; |
1c79356b | 884 | } |
b0d623f7 A |
885 | /* No point in randomizing if only one port is available */ |
886 | ||
887 | if (first == last) | |
39236c6e | 888 | randomport = 0; |
1c79356b A |
889 | /* |
890 | * Simple check to ensure all ports are not used up causing | |
891 | * a deadlock here. | |
892 | * | |
893 | * We split the two cases (up and down) so that the direction | |
894 | * is not being tested on each round of the loop. | |
895 | */ | |
896 | if (first > last) { | |
897 | /* | |
898 | * counting down | |
899 | */ | |
b0d623f7 | 900 | if (randomport) { |
39236c6e A |
901 | read_random(&rand_port, sizeof (rand_port)); |
902 | *lastport = | |
903 | first - (rand_port % (first - last)); | |
b0d623f7 | 904 | } |
1c79356b A |
905 | count = first - last; |
906 | ||
907 | do { | |
908 | if (count-- < 0) { /* completely used? */ | |
39236c6e | 909 | lck_rw_done(pcbinfo->ipi_lock); |
91447636 | 910 | socket_lock(so, 0); |
9bccf70c | 911 | return (EADDRNOTAVAIL); |
1c79356b A |
912 | } |
913 | --*lastport; | |
914 | if (*lastport > first || *lastport < last) | |
915 | *lastport = first; | |
916 | lport = htons(*lastport); | |
2d21ac55 | 917 | } while (in_pcblookup_local_and_cleanup(pcbinfo, |
fe8ab488 A |
918 | ((laddr.s_addr != INADDR_ANY) ? laddr : |
919 | inp->inp_laddr), lport, wild)); | |
1c79356b A |
920 | } else { |
921 | /* | |
922 | * counting up | |
923 | */ | |
b0d623f7 | 924 | if (randomport) { |
39236c6e A |
925 | read_random(&rand_port, sizeof (rand_port)); |
926 | *lastport = | |
927 | first + (rand_port % (first - last)); | |
b0d623f7 | 928 | } |
1c79356b A |
929 | count = last - first; |
930 | ||
931 | do { | |
932 | if (count-- < 0) { /* completely used? */ | |
39236c6e | 933 | lck_rw_done(pcbinfo->ipi_lock); |
91447636 | 934 | socket_lock(so, 0); |
9bccf70c | 935 | return (EADDRNOTAVAIL); |
1c79356b A |
936 | } |
937 | ++*lastport; | |
938 | if (*lastport < first || *lastport > last) | |
939 | *lastport = first; | |
940 | lport = htons(*lastport); | |
2d21ac55 | 941 | } while (in_pcblookup_local_and_cleanup(pcbinfo, |
fe8ab488 A |
942 | ((laddr.s_addr != INADDR_ANY) ? laddr : |
943 | inp->inp_laddr), lport, wild)); | |
1c79356b A |
944 | } |
945 | } | |
91447636 | 946 | socket_lock(so, 0); |
fe8ab488 A |
947 | if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) { |
948 | lck_rw_done(pcbinfo->ipi_lock); | |
949 | return (EINVAL); | |
950 | } | |
951 | ||
952 | if (laddr.s_addr != INADDR_ANY) { | |
953 | inp->inp_laddr = laddr; | |
954 | inp->inp_last_outifp = outif; | |
955 | } | |
1c79356b | 956 | inp->inp_lport = lport; |
fe8ab488 A |
957 | if (anonport) |
958 | inp->inp_flags |= INP_ANONPORT; | |
959 | ||
91447636 | 960 | if (in_pcbinshash(inp, 1) != 0) { |
1c79356b | 961 | inp->inp_laddr.s_addr = INADDR_ANY; |
316670eb | 962 | inp->inp_last_outifp = NULL; |
fe8ab488 A |
963 | |
964 | inp->inp_lport = 0; | |
965 | if (anonport) | |
966 | inp->inp_flags &= ~INP_ANONPORT; | |
39236c6e | 967 | lck_rw_done(pcbinfo->ipi_lock); |
1c79356b A |
968 | return (EAGAIN); |
969 | } | |
39236c6e | 970 | lck_rw_done(pcbinfo->ipi_lock); |
2d21ac55 | 971 | sflt_notify(so, sock_evt_bound, NULL); |
1c79356b A |
972 | return (0); |
973 | } | |
974 | ||
975 | /* | |
39236c6e A |
976 | * Transform old in_pcbconnect() into an inner subroutine for new |
977 | * in_pcbconnect(); do some validity-checking on the remote address | |
978 | * (in "nam") and then determine local host address (i.e., which | |
979 | * interface) to use to access that remote host. | |
980 | * | |
981 | * This routine may alter the caller-supplied remote address "nam". | |
1c79356b | 982 | * |
39236c6e A |
983 | * The caller may override the bound-to-interface setting of the socket |
984 | * by specifying the ifscope parameter (e.g. from IP_PKTINFO.) | |
985 | * | |
986 | * This routine might return an ifp with a reference held if the caller | |
987 | * provides a non-NULL outif, even in the error case. The caller is | |
988 | * responsible for releasing its reference. | |
2d21ac55 A |
989 | * |
990 | * Returns: 0 Success | |
991 | * EINVAL Invalid argument | |
992 | * EAFNOSUPPORT Address family not supported | |
993 | * EADDRNOTAVAIL Address not available | |
1c79356b | 994 | */ |
1c79356b | 995 | int |
39236c6e A |
996 | in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr, |
997 | unsigned int ifscope, struct ifnet **outif) | |
1c79356b | 998 | { |
39236c6e A |
999 | struct route *ro = &inp->inp_route; |
1000 | struct in_ifaddr *ia = NULL; | |
1001 | struct sockaddr_in sin; | |
1002 | int error = 0; | |
fe8ab488 | 1003 | boolean_t restricted = FALSE; |
39236c6e A |
1004 | |
1005 | if (outif != NULL) | |
1006 | *outif = NULL; | |
1007 | if (nam->sa_len != sizeof (struct sockaddr_in)) | |
1c79356b | 1008 | return (EINVAL); |
39236c6e | 1009 | if (SIN(nam)->sin_family != AF_INET) |
1c79356b | 1010 | return (EAFNOSUPPORT); |
39236c6e | 1011 | if (SIN(nam)->sin_port == 0) |
1c79356b | 1012 | return (EADDRNOTAVAIL); |
b0d623f7 | 1013 | |
39236c6e A |
1014 | /* |
1015 | * If the destination address is INADDR_ANY, | |
1016 | * use the primary local address. | |
1017 | * If the supplied address is INADDR_BROADCAST, | |
1018 | * and the primary interface supports broadcast, | |
1019 | * choose the broadcast address for that interface. | |
1020 | */ | |
1021 | if (SIN(nam)->sin_addr.s_addr == INADDR_ANY || | |
1022 | SIN(nam)->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST) { | |
1023 | lck_rw_lock_shared(in_ifaddr_rwlock); | |
1024 | if (!TAILQ_EMPTY(&in_ifaddrhead)) { | |
1025 | ia = TAILQ_FIRST(&in_ifaddrhead); | |
1026 | IFA_LOCK_SPIN(&ia->ia_ifa); | |
1027 | if (SIN(nam)->sin_addr.s_addr == INADDR_ANY) { | |
1028 | SIN(nam)->sin_addr = IA_SIN(ia)->sin_addr; | |
1029 | } else if (ia->ia_ifp->if_flags & IFF_BROADCAST) { | |
1030 | SIN(nam)->sin_addr = | |
1031 | SIN(&ia->ia_broadaddr)->sin_addr; | |
1032 | } | |
1033 | IFA_UNLOCK(&ia->ia_ifa); | |
1034 | ia = NULL; | |
1035 | } | |
1036 | lck_rw_done(in_ifaddr_rwlock); | |
1037 | } | |
1038 | /* | |
1039 | * Otherwise, if the socket has already bound the source, just use it. | |
1040 | */ | |
1041 | if (inp->inp_laddr.s_addr != INADDR_ANY) { | |
1042 | VERIFY(ia == NULL); | |
1043 | *laddr = inp->inp_laddr; | |
1044 | return (0); | |
1c79356b | 1045 | } |
6d2010ae | 1046 | |
39236c6e A |
1047 | /* |
1048 | * If the ifscope is specified by the caller (e.g. IP_PKTINFO) | |
1049 | * then it overrides the sticky ifscope set for the socket. | |
1050 | */ | |
1051 | if (ifscope == IFSCOPE_NONE && (inp->inp_flags & INP_BOUND_IF)) | |
1052 | ifscope = inp->inp_boundifp->if_index; | |
6d2010ae | 1053 | |
39236c6e A |
1054 | /* |
1055 | * If route is known or can be allocated now, | |
1056 | * our src addr is taken from the i/f, else punt. | |
1057 | * Note that we should check the address family of the cached | |
1058 | * destination, in case of sharing the cache with IPv6. | |
1059 | */ | |
1060 | if (ro->ro_rt != NULL) | |
1061 | RT_LOCK_SPIN(ro->ro_rt); | |
1062 | if (ROUTE_UNUSABLE(ro) || ro->ro_dst.sa_family != AF_INET || | |
1063 | SIN(&ro->ro_dst)->sin_addr.s_addr != SIN(nam)->sin_addr.s_addr || | |
1064 | (inp->inp_socket->so_options & SO_DONTROUTE)) { | |
b0d623f7 | 1065 | if (ro->ro_rt != NULL) |
b0d623f7 | 1066 | RT_UNLOCK(ro->ro_rt); |
39236c6e A |
1067 | ROUTE_RELEASE(ro); |
1068 | } | |
1069 | if (!(inp->inp_socket->so_options & SO_DONTROUTE) && | |
1070 | (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL)) { | |
1071 | if (ro->ro_rt != NULL) | |
1072 | RT_UNLOCK(ro->ro_rt); | |
1073 | ROUTE_RELEASE(ro); | |
1074 | /* No route yet, so try to acquire one */ | |
1075 | bzero(&ro->ro_dst, sizeof (struct sockaddr_in)); | |
1076 | ro->ro_dst.sa_family = AF_INET; | |
1077 | ro->ro_dst.sa_len = sizeof (struct sockaddr_in); | |
1078 | SIN(&ro->ro_dst)->sin_addr = SIN(nam)->sin_addr; | |
1079 | rtalloc_scoped(ro, ifscope); | |
1080 | if (ro->ro_rt != NULL) | |
1081 | RT_LOCK_SPIN(ro->ro_rt); | |
1082 | } | |
1083 | /* Sanitized local copy for interface address searches */ | |
1084 | bzero(&sin, sizeof (sin)); | |
1085 | sin.sin_family = AF_INET; | |
1086 | sin.sin_len = sizeof (struct sockaddr_in); | |
1087 | sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr; | |
1088 | /* | |
1089 | * If we did not find (or use) a route, assume dest is reachable | |
1090 | * on a directly connected network and try to find a corresponding | |
1091 | * interface to take the source address from. | |
1092 | */ | |
1093 | if (ro->ro_rt == NULL) { | |
1094 | VERIFY(ia == NULL); | |
1095 | ia = ifatoia(ifa_ifwithdstaddr(SA(&sin))); | |
1096 | if (ia == NULL) | |
1097 | ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope)); | |
1098 | error = ((ia == NULL) ? ENETUNREACH : 0); | |
1099 | goto done; | |
1100 | } | |
1101 | RT_LOCK_ASSERT_HELD(ro->ro_rt); | |
1102 | /* | |
1103 | * If the outgoing interface on the route found is not | |
1104 | * a loopback interface, use the address from that interface. | |
1105 | */ | |
1106 | if (!(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) { | |
1107 | VERIFY(ia == NULL); | |
6d2010ae A |
1108 | /* |
1109 | * If the route points to a cellular interface and the | |
1110 | * caller forbids our using interfaces of such type, | |
1111 | * pretend that there is no route. | |
fe8ab488 | 1112 | * Apply the same logic for expensive interfaces. |
6d2010ae | 1113 | */ |
fe8ab488 | 1114 | if (inp_restricted_send(inp, ro->ro_rt->rt_ifp)) { |
39236c6e A |
1115 | RT_UNLOCK(ro->ro_rt); |
1116 | ROUTE_RELEASE(ro); | |
1117 | error = EHOSTUNREACH; | |
fe8ab488 | 1118 | restricted = TRUE; |
39236c6e | 1119 | } else { |
6d2010ae A |
1120 | /* Become a regular mutex */ |
1121 | RT_CONVERT_LOCK(ro->ro_rt); | |
39236c6e A |
1122 | ia = ifatoia(ro->ro_rt->rt_ifa); |
1123 | IFA_ADDREF(&ia->ia_ifa); | |
b0d623f7 | 1124 | RT_UNLOCK(ro->ro_rt); |
39236c6e | 1125 | error = 0; |
91447636 | 1126 | } |
39236c6e A |
1127 | goto done; |
1128 | } | |
1129 | VERIFY(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK); | |
1130 | RT_UNLOCK(ro->ro_rt); | |
1131 | /* | |
1132 | * The outgoing interface is marked with 'loopback net', so a route | |
1133 | * to ourselves is here. | |
1134 | * Try to find the interface of the destination address and then | |
1135 | * take the address from there. That interface is not necessarily | |
1136 | * a loopback interface. | |
1137 | */ | |
1138 | VERIFY(ia == NULL); | |
1139 | ia = ifatoia(ifa_ifwithdstaddr(SA(&sin))); | |
1140 | if (ia == NULL) | |
1141 | ia = ifatoia(ifa_ifwithaddr_scoped(SA(&sin), ifscope)); | |
1142 | if (ia == NULL) | |
1143 | ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope)); | |
1144 | if (ia == NULL) { | |
1145 | RT_LOCK(ro->ro_rt); | |
1146 | ia = ifatoia(ro->ro_rt->rt_ifa); | |
1147 | if (ia != NULL) | |
1148 | IFA_ADDREF(&ia->ia_ifa); | |
1149 | RT_UNLOCK(ro->ro_rt); | |
1150 | } | |
1151 | error = ((ia == NULL) ? ENETUNREACH : 0); | |
1152 | ||
1153 | done: | |
1154 | /* | |
1155 | * If the destination address is multicast and an outgoing | |
1156 | * interface has been set as a multicast option, use the | |
1157 | * address of that interface as our source address. | |
1158 | */ | |
15129b1c | 1159 | if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) && |
39236c6e A |
1160 | inp->inp_moptions != NULL) { |
1161 | struct ip_moptions *imo; | |
1162 | struct ifnet *ifp; | |
1163 | ||
1164 | imo = inp->inp_moptions; | |
1165 | IMO_LOCK(imo); | |
1166 | if (imo->imo_multicast_ifp != NULL && (ia == NULL || | |
1167 | ia->ia_ifp != imo->imo_multicast_ifp)) { | |
1168 | ifp = imo->imo_multicast_ifp; | |
1169 | if (ia != NULL) | |
6d2010ae | 1170 | IFA_REMREF(&ia->ia_ifa); |
39236c6e A |
1171 | lck_rw_lock_shared(in_ifaddr_rwlock); |
1172 | TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { | |
1173 | if (ia->ia_ifp == ifp) | |
1174 | break; | |
6d2010ae | 1175 | } |
39236c6e A |
1176 | if (ia != NULL) |
1177 | IFA_ADDREF(&ia->ia_ifa); | |
1178 | lck_rw_done(in_ifaddr_rwlock); | |
1179 | if (ia == NULL) | |
1180 | error = EADDRNOTAVAIL; | |
15129b1c A |
1181 | else |
1182 | error = 0; | |
1c79356b | 1183 | } |
39236c6e A |
1184 | IMO_UNLOCK(imo); |
1185 | } | |
1186 | /* | |
1187 | * Don't do pcblookup call here; return interface in laddr | |
1188 | * and exit to caller, that will do the lookup. | |
1189 | */ | |
1190 | if (ia != NULL) { | |
1c79356b | 1191 | /* |
39236c6e A |
1192 | * If the source address belongs to a cellular interface |
1193 | * and the socket forbids our using interfaces of such | |
1194 | * type, pretend that there is no source address. | |
fe8ab488 | 1195 | * Apply the same logic for expensive interfaces. |
1c79356b | 1196 | */ |
39236c6e | 1197 | IFA_LOCK_SPIN(&ia->ia_ifa); |
fe8ab488 | 1198 | if (inp_restricted_send(inp, ia->ia_ifa.ifa_ifp)) { |
39236c6e A |
1199 | IFA_UNLOCK(&ia->ia_ifa); |
1200 | error = EHOSTUNREACH; | |
fe8ab488 | 1201 | restricted = TRUE; |
39236c6e A |
1202 | } else if (error == 0) { |
1203 | *laddr = ia->ia_addr.sin_addr; | |
1204 | if (outif != NULL) { | |
1205 | struct ifnet *ifp; | |
1206 | ||
1207 | if (ro->ro_rt != NULL) | |
1208 | ifp = ro->ro_rt->rt_ifp; | |
1209 | else | |
1210 | ifp = ia->ia_ifp; | |
1211 | ||
1212 | VERIFY(ifp != NULL); | |
1213 | IFA_CONVERT_LOCK(&ia->ia_ifa); | |
1214 | ifnet_reference(ifp); /* for caller */ | |
1215 | if (*outif != NULL) | |
1216 | ifnet_release(*outif); | |
1217 | *outif = ifp; | |
1c79356b | 1218 | } |
39236c6e A |
1219 | IFA_UNLOCK(&ia->ia_ifa); |
1220 | } else { | |
1221 | IFA_UNLOCK(&ia->ia_ifa); | |
1c79356b | 1222 | } |
6d2010ae | 1223 | IFA_REMREF(&ia->ia_ifa); |
39236c6e A |
1224 | ia = NULL; |
1225 | } | |
1226 | ||
fe8ab488 | 1227 | if (restricted && error == EHOSTUNREACH) { |
39236c6e A |
1228 | soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED | |
1229 | SO_FILT_HINT_IFDENIED)); | |
1c79356b | 1230 | } |
39236c6e A |
1231 | |
1232 | return (error); | |
1c79356b A |
1233 | } |
1234 | ||
1235 | /* | |
1236 | * Outer subroutine: | |
1237 | * Connect from a socket to a specified address. | |
1238 | * Both address and port must be specified in argument sin. | |
1239 | * If don't have a local address for this socket yet, | |
1240 | * then pick one. | |
39236c6e A |
1241 | * |
1242 | * The caller may override the bound-to-interface setting of the socket | |
1243 | * by specifying the ifscope parameter (e.g. from IP_PKTINFO.) | |
1c79356b A |
1244 | */ |
1245 | int | |
316670eb | 1246 | in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p, |
39236c6e | 1247 | unsigned int ifscope, struct ifnet **outif) |
1c79356b | 1248 | { |
39236c6e | 1249 | struct in_addr laddr; |
316670eb | 1250 | struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam; |
91447636 | 1251 | struct inpcb *pcb; |
1c79356b | 1252 | int error; |
fe8ab488 | 1253 | struct socket *so = inp->inp_socket; |
1c79356b A |
1254 | |
1255 | /* | |
1256 | * Call inner routine, to assign local interface address. | |
1257 | */ | |
39236c6e A |
1258 | if ((error = in_pcbladdr(inp, nam, &laddr, ifscope, outif)) != 0) |
1259 | return (error); | |
1c79356b | 1260 | |
fe8ab488 | 1261 | socket_unlock(so, 0); |
91447636 | 1262 | pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port, |
39236c6e | 1263 | inp->inp_laddr.s_addr ? inp->inp_laddr : laddr, |
91447636 | 1264 | inp->inp_lport, 0, NULL); |
fe8ab488 | 1265 | socket_lock(so, 0); |
6d2010ae | 1266 | |
39236c6e A |
1267 | /* |
1268 | * Check if the socket is still in a valid state. When we unlock this | |
1269 | * embryonic socket, it can get aborted if another thread is closing | |
6d2010ae A |
1270 | * the listener (radar 7947600). |
1271 | */ | |
fe8ab488 | 1272 | if ((so->so_flags & SOF_ABORTED) != 0) |
39236c6e | 1273 | return (ECONNREFUSED); |
6d2010ae | 1274 | |
91447636 | 1275 | if (pcb != NULL) { |
0b4c1975 | 1276 | in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0); |
1c79356b A |
1277 | return (EADDRINUSE); |
1278 | } | |
1279 | if (inp->inp_laddr.s_addr == INADDR_ANY) { | |
9bccf70c | 1280 | if (inp->inp_lport == 0) { |
39236c6e | 1281 | error = in_pcbbind(inp, NULL, p); |
9bccf70c | 1282 | if (error) |
39236c6e | 1283 | return (error); |
9bccf70c | 1284 | } |
39236c6e A |
1285 | if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { |
1286 | /* | |
1287 | * Lock inversion issue, mostly with udp | |
1288 | * multicast packets. | |
1289 | */ | |
fe8ab488 | 1290 | socket_unlock(so, 0); |
39236c6e | 1291 | lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); |
fe8ab488 | 1292 | socket_lock(so, 0); |
91447636 | 1293 | } |
39236c6e A |
1294 | inp->inp_laddr = laddr; |
1295 | /* no reference needed */ | |
316670eb | 1296 | inp->inp_last_outifp = (outif != NULL) ? *outif : NULL; |
55e303ae | 1297 | inp->inp_flags |= INP_INADDR_ANY; |
39236c6e A |
1298 | } else { |
1299 | if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { | |
1300 | /* | |
1301 | * Lock inversion issue, mostly with udp | |
1302 | * multicast packets. | |
1303 | */ | |
fe8ab488 | 1304 | socket_unlock(so, 0); |
39236c6e | 1305 | lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); |
fe8ab488 | 1306 | socket_lock(so, 0); |
91447636 | 1307 | } |
1c79356b A |
1308 | } |
1309 | inp->inp_faddr = sin->sin_addr; | |
1310 | inp->inp_fport = sin->sin_port; | |
fe8ab488 A |
1311 | if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) |
1312 | nstat_pcb_invalidate_cache(inp); | |
1c79356b | 1313 | in_pcbrehash(inp); |
39236c6e | 1314 | lck_rw_done(inp->inp_pcbinfo->ipi_lock); |
1c79356b A |
1315 | return (0); |
1316 | } | |
1317 | ||
1318 | void | |
2d21ac55 | 1319 | in_pcbdisconnect(struct inpcb *inp) |
1c79356b | 1320 | { |
39236c6e | 1321 | struct socket *so = inp->inp_socket; |
1c79356b | 1322 | |
fe8ab488 A |
1323 | if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) |
1324 | nstat_pcb_cache(inp); | |
1325 | ||
1c79356b A |
1326 | inp->inp_faddr.s_addr = INADDR_ANY; |
1327 | inp->inp_fport = 0; | |
91447636 | 1328 | |
39236c6e A |
1329 | if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { |
1330 | /* lock inversion issue, mostly with udp multicast packets */ | |
1331 | socket_unlock(so, 0); | |
1332 | lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); | |
1333 | socket_lock(so, 0); | |
91447636 A |
1334 | } |
1335 | ||
1c79356b | 1336 | in_pcbrehash(inp); |
39236c6e A |
1337 | lck_rw_done(inp->inp_pcbinfo->ipi_lock); |
1338 | /* | |
1339 | * A multipath subflow socket would have its SS_NOFDREF set by default, | |
1340 | * so check for SOF_MP_SUBFLOW socket flag before detaching the PCB; | |
1341 | * when the socket is closed for real, SOF_MP_SUBFLOW would be cleared. | |
1342 | */ | |
1343 | if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF)) | |
1c79356b A |
1344 | in_pcbdetach(inp); |
1345 | } | |
1346 | ||
1347 | void | |
2d21ac55 | 1348 | in_pcbdetach(struct inpcb *inp) |
1c79356b A |
1349 | { |
1350 | struct socket *so = inp->inp_socket; | |
1c79356b | 1351 | |
39236c6e A |
1352 | if (so->so_pcb == NULL) { |
1353 | /* PCB has been disposed */ | |
1354 | panic("%s: inp=%p so=%p proto=%d so_pcb is null!\n", __func__, | |
1355 | inp, so, SOCK_PROTO(so)); | |
1356 | /* NOTREACHED */ | |
91447636 | 1357 | } |
fe8ab488 | 1358 | |
1c79356b | 1359 | #if IPSEC |
39236c6e A |
1360 | if (inp->inp_sp != NULL) { |
1361 | (void) ipsec4_delete_pcbpolicy(inp); | |
91447636 | 1362 | } |
39236c6e | 1363 | #endif /* IPSEC */ |
fe8ab488 A |
1364 | |
1365 | /* | |
1366 | * Let NetworkStatistics know this PCB is going away | |
1367 | * before we detach it. | |
1368 | */ | |
1369 | if (nstat_collect && | |
1370 | (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP)) | |
1371 | nstat_pcb_detach(inp); | |
91447636 | 1372 | /* mark socket state as dead */ |
39236c6e A |
1373 | if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) { |
1374 | panic("%s: so=%p proto=%d couldn't set to STOPUSING\n", | |
1375 | __func__, so, SOCK_PROTO(so)); | |
1376 | /* NOTREACHED */ | |
1377 | } | |
1c79356b | 1378 | |
39236c6e | 1379 | if (!(so->so_flags & SOF_PCBCLEARING)) { |
6d2010ae | 1380 | struct ip_moptions *imo; |
2d21ac55 | 1381 | |
91447636 | 1382 | inp->inp_vflag = 0; |
39236c6e A |
1383 | if (inp->inp_options != NULL) { |
1384 | (void) m_free(inp->inp_options); | |
1385 | inp->inp_options = NULL; | |
91447636 | 1386 | } |
39236c6e | 1387 | ROUTE_RELEASE(&inp->inp_route); |
6d2010ae | 1388 | imo = inp->inp_moptions; |
91447636 | 1389 | inp->inp_moptions = NULL; |
6d2010ae A |
1390 | if (imo != NULL) |
1391 | IMO_REMREF(imo); | |
91447636 A |
1392 | sofreelastref(so, 0); |
1393 | inp->inp_state = INPCB_STATE_DEAD; | |
39236c6e A |
1394 | /* makes sure we're not called twice from so_close */ |
1395 | so->so_flags |= SOF_PCBCLEARING; | |
1396 | ||
1397 | inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST); | |
91447636 A |
1398 | } |
1399 | } | |
1c79356b | 1400 | |
1c79356b | 1401 | |
39236c6e A |
1402 | void |
1403 | in_pcbdispose(struct inpcb *inp) | |
91447636 A |
1404 | { |
1405 | struct socket *so = inp->inp_socket; | |
1406 | struct inpcbinfo *ipi = inp->inp_pcbinfo; | |
1407 | ||
39236c6e A |
1408 | if (so != NULL && so->so_usecount != 0) { |
1409 | panic("%s: so %p [%d,%d] usecount %d lockhistory %s\n", | |
1410 | __func__, so, SOCK_DOM(so), SOCK_TYPE(so), so->so_usecount, | |
1411 | solockhistory_nr(so)); | |
1412 | /* NOTREACHED */ | |
1413 | } else if (inp->inp_wantcnt != WNT_STOPUSING) { | |
1414 | if (so != NULL) { | |
1415 | panic_plain("%s: inp %p invalid wantcnt %d, so %p " | |
1416 | "[%d,%d] usecount %d retaincnt %d state 0x%x " | |
1417 | "flags 0x%x lockhistory %s\n", __func__, inp, | |
1418 | inp->inp_wantcnt, so, SOCK_DOM(so), SOCK_TYPE(so), | |
1419 | so->so_usecount, so->so_retaincnt, so->so_state, | |
1420 | so->so_flags, solockhistory_nr(so)); | |
1421 | /* NOTREACHED */ | |
1422 | } else { | |
1423 | panic("%s: inp %p invalid wantcnt %d no socket\n", | |
1424 | __func__, inp, inp->inp_wantcnt); | |
1425 | /* NOTREACHED */ | |
1426 | } | |
91447636 | 1427 | } |
91447636 | 1428 | |
39236c6e | 1429 | lck_rw_assert(ipi->ipi_lock, LCK_RW_ASSERT_EXCLUSIVE); |
91447636 A |
1430 | |
1431 | inp->inp_gencnt = ++ipi->ipi_gencnt; | |
316670eb | 1432 | /* access ipi in in_pcbremlists */ |
91447636 | 1433 | in_pcbremlists(inp); |
316670eb | 1434 | |
39236c6e | 1435 | if (so != NULL) { |
91447636 A |
1436 | if (so->so_proto->pr_flags & PR_PCBLOCK) { |
1437 | sofreelastref(so, 0); | |
39236c6e A |
1438 | if (so->so_rcv.sb_cc > 0 || so->so_snd.sb_cc > 0) { |
1439 | /* | |
1440 | * selthreadclear() already called | |
1441 | * during sofreelastref() above. | |
1442 | */ | |
91447636 A |
1443 | sbrelease(&so->so_rcv); |
1444 | sbrelease(&so->so_snd); | |
1445 | } | |
39236c6e A |
1446 | if (so->so_head != NULL) { |
1447 | panic("%s: so=%p head still exist\n", | |
1448 | __func__, so); | |
1449 | /* NOTREACHED */ | |
1450 | } | |
1451 | lck_mtx_unlock(&inp->inpcb_mtx); | |
1452 | lck_mtx_destroy(&inp->inpcb_mtx, ipi->ipi_lock_grp); | |
9bccf70c | 1453 | } |
39236c6e A |
1454 | /* makes sure we're not called twice from so_close */ |
1455 | so->so_flags |= SOF_PCBCLEARING; | |
1456 | so->so_saved_pcb = (caddr_t)inp; | |
1457 | so->so_pcb = NULL; | |
1458 | inp->inp_socket = NULL; | |
2d21ac55 A |
1459 | #if CONFIG_MACF_NET |
1460 | mac_inpcb_label_destroy(inp); | |
39236c6e | 1461 | #endif /* CONFIG_MACF_NET */ |
b0d623f7 A |
1462 | /* |
1463 | * In case there a route cached after a detach (possible | |
1464 | * in the tcp case), make sure that it is freed before | |
1465 | * we deallocate the structure. | |
1466 | */ | |
39236c6e A |
1467 | ROUTE_RELEASE(&inp->inp_route); |
1468 | if (!so->cached_in_sock_layer) { | |
91447636 | 1469 | zfree(ipi->ipi_zone, inp); |
55e303ae | 1470 | } |
91447636 | 1471 | sodealloc(so); |
9bccf70c | 1472 | } |
1c79356b A |
1473 | } |
1474 | ||
1475 | /* | |
39236c6e | 1476 | * The calling convention of in_getsockaddr() and in_getpeeraddr() was |
1c79356b A |
1477 | * modified to match the pru_sockaddr() and pru_peeraddr() entry points |
1478 | * in struct pr_usrreqs, so that protocols can just reference then directly | |
39236c6e | 1479 | * without the need for a wrapper function. |
1c79356b A |
1480 | */ |
1481 | int | |
39236c6e | 1482 | in_getsockaddr(struct socket *so, struct sockaddr **nam) |
1c79356b | 1483 | { |
2d21ac55 A |
1484 | struct inpcb *inp; |
1485 | struct sockaddr_in *sin; | |
1c79356b A |
1486 | |
1487 | /* | |
1488 | * Do the malloc first in case it blocks. | |
1489 | */ | |
39236c6e | 1490 | MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK); |
0b4e3aa0 | 1491 | if (sin == NULL) |
39236c6e A |
1492 | return (ENOBUFS); |
1493 | bzero(sin, sizeof (*sin)); | |
1c79356b | 1494 | sin->sin_family = AF_INET; |
39236c6e | 1495 | sin->sin_len = sizeof (*sin); |
1c79356b | 1496 | |
39236c6e | 1497 | if ((inp = sotoinpcb(so)) == NULL) { |
1c79356b | 1498 | FREE(sin, M_SONAME); |
39236c6e | 1499 | return (EINVAL); |
1c79356b A |
1500 | } |
1501 | sin->sin_port = inp->inp_lport; | |
1502 | sin->sin_addr = inp->inp_laddr; | |
1c79356b A |
1503 | |
1504 | *nam = (struct sockaddr *)sin; | |
39236c6e | 1505 | return (0); |
1c79356b A |
1506 | } |
1507 | ||
1508 | int | |
39236c6e | 1509 | in_getsockaddr_s(struct socket *so, struct sockaddr_storage *ss) |
1c79356b | 1510 | { |
39236c6e | 1511 | struct sockaddr_in *sin = SIN(ss); |
1c79356b | 1512 | struct inpcb *inp; |
1c79356b | 1513 | |
39236c6e A |
1514 | VERIFY(ss != NULL); |
1515 | bzero(ss, sizeof (*ss)); | |
1516 | ||
1c79356b | 1517 | sin->sin_family = AF_INET; |
39236c6e | 1518 | sin->sin_len = sizeof (*sin); |
1c79356b | 1519 | |
fe8ab488 A |
1520 | if ((inp = sotoinpcb(so)) == NULL |
1521 | #if NECP | |
1522 | || (necp_socket_should_use_flow_divert(inp)) | |
1523 | #endif /* NECP */ | |
1524 | ) | |
39236c6e A |
1525 | return (inp == NULL ? EINVAL : EPROTOTYPE); |
1526 | ||
1527 | sin->sin_port = inp->inp_lport; | |
1528 | sin->sin_addr = inp->inp_laddr; | |
1529 | return (0); | |
1530 | } | |
1531 | ||
1532 | int | |
1533 | in_getpeeraddr(struct socket *so, struct sockaddr **nam) | |
1534 | { | |
1535 | struct inpcb *inp; | |
1536 | struct sockaddr_in *sin; | |
1537 | ||
1538 | /* | |
1539 | * Do the malloc first in case it blocks. | |
1540 | */ | |
1541 | MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK); | |
1542 | if (sin == NULL) | |
1543 | return (ENOBUFS); | |
1544 | bzero((caddr_t)sin, sizeof (*sin)); | |
1545 | sin->sin_family = AF_INET; | |
1546 | sin->sin_len = sizeof (*sin); | |
1547 | ||
1548 | if ((inp = sotoinpcb(so)) == NULL) { | |
1c79356b | 1549 | FREE(sin, M_SONAME); |
39236c6e | 1550 | return (EINVAL); |
1c79356b A |
1551 | } |
1552 | sin->sin_port = inp->inp_fport; | |
1553 | sin->sin_addr = inp->inp_faddr; | |
1c79356b A |
1554 | |
1555 | *nam = (struct sockaddr *)sin; | |
39236c6e A |
1556 | return (0); |
1557 | } | |
1558 | ||
1559 | int | |
1560 | in_getpeeraddr_s(struct socket *so, struct sockaddr_storage *ss) | |
1561 | { | |
1562 | struct sockaddr_in *sin = SIN(ss); | |
1563 | struct inpcb *inp; | |
1564 | ||
1565 | VERIFY(ss != NULL); | |
1566 | bzero(ss, sizeof (*ss)); | |
1567 | ||
1568 | sin->sin_family = AF_INET; | |
1569 | sin->sin_len = sizeof (*sin); | |
1570 | ||
fe8ab488 A |
1571 | if ((inp = sotoinpcb(so)) == NULL |
1572 | #if NECP | |
1573 | || (necp_socket_should_use_flow_divert(inp)) | |
1574 | #endif /* NECP */ | |
1575 | ) { | |
39236c6e A |
1576 | return (inp == NULL ? EINVAL : EPROTOTYPE); |
1577 | } | |
1578 | ||
1579 | sin->sin_port = inp->inp_fport; | |
1580 | sin->sin_addr = inp->inp_faddr; | |
1581 | return (0); | |
1c79356b A |
1582 | } |
1583 | ||
1c79356b | 1584 | void |
2d21ac55 | 1585 | in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr, |
39236c6e | 1586 | int errno, void (*notify)(struct inpcb *, int)) |
1c79356b | 1587 | { |
91447636 A |
1588 | struct inpcb *inp; |
1589 | ||
39236c6e | 1590 | lck_rw_lock_shared(pcbinfo->ipi_lock); |
1c79356b | 1591 | |
39236c6e | 1592 | LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) { |
9bccf70c | 1593 | #if INET6 |
39236c6e | 1594 | if (!(inp->inp_vflag & INP_IPV4)) |
1c79356b | 1595 | continue; |
39236c6e | 1596 | #endif /* INET6 */ |
1c79356b | 1597 | if (inp->inp_faddr.s_addr != faddr.s_addr || |
9bccf70c | 1598 | inp->inp_socket == NULL) |
39236c6e A |
1599 | continue; |
1600 | if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) | |
91447636 A |
1601 | continue; |
1602 | socket_lock(inp->inp_socket, 1); | |
9bccf70c | 1603 | (*notify)(inp, errno); |
39236c6e | 1604 | (void) in_pcb_checkstate(inp, WNT_RELEASE, 1); |
91447636 | 1605 | socket_unlock(inp->inp_socket, 1); |
1c79356b | 1606 | } |
39236c6e | 1607 | lck_rw_done(pcbinfo->ipi_lock); |
1c79356b A |
1608 | } |
1609 | ||
1610 | /* | |
1611 | * Check for alternatives when higher level complains | |
1612 | * about service problems. For now, invalidate cached | |
1613 | * routing information. If the route was created dynamically | |
1614 | * (by a redirect), time to try a default gateway again. | |
1615 | */ | |
1616 | void | |
2d21ac55 | 1617 | in_losing(struct inpcb *inp) |
1c79356b | 1618 | { |
39236c6e | 1619 | boolean_t release = FALSE; |
2d21ac55 | 1620 | struct rtentry *rt; |
1c79356b A |
1621 | struct rt_addrinfo info; |
1622 | ||
b0d623f7 | 1623 | if ((rt = inp->inp_route.ro_rt) != NULL) { |
39236c6e | 1624 | struct in_ifaddr *ia = NULL; |
b0d623f7 | 1625 | |
39236c6e | 1626 | bzero((caddr_t)&info, sizeof (info)); |
b0d623f7 | 1627 | RT_LOCK(rt); |
1c79356b | 1628 | info.rti_info[RTAX_DST] = |
39236c6e | 1629 | (struct sockaddr *)&inp->inp_route.ro_dst; |
1c79356b A |
1630 | info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; |
1631 | info.rti_info[RTAX_NETMASK] = rt_mask(rt); | |
1632 | rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0); | |
b0d623f7 A |
1633 | if (rt->rt_flags & RTF_DYNAMIC) { |
1634 | /* | |
1635 | * Prevent another thread from modifying rt_key, | |
1636 | * rt_gateway via rt_setgate() after rt_lock is | |
1637 | * dropped by marking the route as defunct. | |
1638 | */ | |
1639 | rt->rt_flags |= RTF_CONDEMNED; | |
1640 | RT_UNLOCK(rt); | |
1641 | (void) rtrequest(RTM_DELETE, rt_key(rt), | |
39236c6e | 1642 | rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL); |
b0d623f7 A |
1643 | } else { |
1644 | RT_UNLOCK(rt); | |
1645 | } | |
2d21ac55 | 1646 | /* if the address is gone keep the old route in the pcb */ |
39236c6e A |
1647 | if (inp->inp_laddr.s_addr != INADDR_ANY && |
1648 | (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) { | |
1649 | /* | |
1650 | * Address is around; ditch the route. A new route | |
1651 | * can be allocated the next time output is attempted. | |
1652 | */ | |
1653 | release = TRUE; | |
2d21ac55 | 1654 | } |
39236c6e A |
1655 | if (ia != NULL) |
1656 | IFA_REMREF(&ia->ia_ifa); | |
1c79356b | 1657 | } |
39236c6e A |
1658 | if (rt == NULL || release) |
1659 | ROUTE_RELEASE(&inp->inp_route); | |
1c79356b A |
1660 | } |
1661 | ||
1662 | /* | |
1663 | * After a routing change, flush old routing | |
1664 | * and allocate a (hopefully) better one. | |
1665 | */ | |
9bccf70c | 1666 | void |
39236c6e | 1667 | in_rtchange(struct inpcb *inp, int errno) |
1c79356b | 1668 | { |
39236c6e A |
1669 | #pragma unused(errno) |
1670 | boolean_t release = FALSE; | |
2d21ac55 A |
1671 | struct rtentry *rt; |
1672 | ||
1673 | if ((rt = inp->inp_route.ro_rt) != NULL) { | |
39236c6e | 1674 | struct in_ifaddr *ia = NULL; |
b0d623f7 | 1675 | |
39236c6e A |
1676 | /* if address is gone, keep the old route */ |
1677 | if (inp->inp_laddr.s_addr != INADDR_ANY && | |
1678 | (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) { | |
1679 | /* | |
1680 | * Address is around; ditch the route. A new route | |
1681 | * can be allocated the next time output is attempted. | |
1682 | */ | |
1683 | release = TRUE; | |
2d21ac55 | 1684 | } |
39236c6e A |
1685 | if (ia != NULL) |
1686 | IFA_REMREF(&ia->ia_ifa); | |
1c79356b | 1687 | } |
39236c6e A |
1688 | if (rt == NULL || release) |
1689 | ROUTE_RELEASE(&inp->inp_route); | |
1c79356b A |
1690 | } |
1691 | ||
1692 | /* | |
1693 | * Lookup a PCB based on the local address and port. | |
1694 | */ | |
1695 | struct inpcb * | |
2d21ac55 | 1696 | in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr, |
39236c6e | 1697 | unsigned int lport_arg, int wild_okay) |
1c79356b | 1698 | { |
2d21ac55 | 1699 | struct inpcb *inp; |
1c79356b A |
1700 | int matchwild = 3, wildcard; |
1701 | u_short lport = lport_arg; | |
1702 | ||
39236c6e | 1703 | KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0, 0, 0, 0, 0); |
1c79356b A |
1704 | |
1705 | if (!wild_okay) { | |
1706 | struct inpcbhead *head; | |
1707 | /* | |
1708 | * Look for an unconnected (wildcard foreign addr) PCB that | |
1709 | * matches the local address and port we're looking for. | |
1710 | */ | |
39236c6e A |
1711 | head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, |
1712 | pcbinfo->ipi_hashmask)]; | |
9bccf70c A |
1713 | LIST_FOREACH(inp, head, inp_hash) { |
1714 | #if INET6 | |
39236c6e | 1715 | if (!(inp->inp_vflag & INP_IPV4)) |
1c79356b | 1716 | continue; |
39236c6e | 1717 | #endif /* INET6 */ |
1c79356b A |
1718 | if (inp->inp_faddr.s_addr == INADDR_ANY && |
1719 | inp->inp_laddr.s_addr == laddr.s_addr && | |
1720 | inp->inp_lport == lport) { | |
1721 | /* | |
1722 | * Found. | |
1723 | */ | |
1724 | return (inp); | |
1725 | } | |
1726 | } | |
1727 | /* | |
1728 | * Not found. | |
1729 | */ | |
39236c6e | 1730 | KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0, 0, 0, 0, 0); |
1c79356b A |
1731 | return (NULL); |
1732 | } else { | |
1733 | struct inpcbporthead *porthash; | |
1734 | struct inpcbport *phd; | |
1735 | struct inpcb *match = NULL; | |
1736 | /* | |
1737 | * Best fit PCB lookup. | |
1738 | * | |
1739 | * First see if this local port is in use by looking on the | |
1740 | * port hash list. | |
1741 | */ | |
39236c6e A |
1742 | porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport, |
1743 | pcbinfo->ipi_porthashmask)]; | |
9bccf70c | 1744 | LIST_FOREACH(phd, porthash, phd_hash) { |
1c79356b A |
1745 | if (phd->phd_port == lport) |
1746 | break; | |
1747 | } | |
1748 | if (phd != NULL) { | |
1749 | /* | |
1750 | * Port is in use by one or more PCBs. Look for best | |
1751 | * fit. | |
1752 | */ | |
9bccf70c | 1753 | LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { |
1c79356b | 1754 | wildcard = 0; |
9bccf70c | 1755 | #if INET6 |
39236c6e | 1756 | if (!(inp->inp_vflag & INP_IPV4)) |
1c79356b | 1757 | continue; |
39236c6e | 1758 | #endif /* INET6 */ |
1c79356b A |
1759 | if (inp->inp_faddr.s_addr != INADDR_ANY) |
1760 | wildcard++; | |
1761 | if (inp->inp_laddr.s_addr != INADDR_ANY) { | |
1762 | if (laddr.s_addr == INADDR_ANY) | |
1763 | wildcard++; | |
39236c6e A |
1764 | else if (inp->inp_laddr.s_addr != |
1765 | laddr.s_addr) | |
1c79356b A |
1766 | continue; |
1767 | } else { | |
1768 | if (laddr.s_addr != INADDR_ANY) | |
1769 | wildcard++; | |
1770 | } | |
1771 | if (wildcard < matchwild) { | |
1772 | match = inp; | |
1773 | matchwild = wildcard; | |
1774 | if (matchwild == 0) { | |
1775 | break; | |
1776 | } | |
1777 | } | |
1778 | } | |
1779 | } | |
39236c6e A |
1780 | KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match, |
1781 | 0, 0, 0, 0); | |
1c79356b A |
1782 | return (match); |
1783 | } | |
1784 | } | |
1785 | ||
6d2010ae A |
1786 | /* |
1787 | * Check if PCB exists in hash list. | |
1788 | */ | |
1789 | int | |
39236c6e A |
1790 | in_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in_addr faddr, |
1791 | u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard, | |
1792 | uid_t *uid, gid_t *gid, struct ifnet *ifp) | |
6d2010ae A |
1793 | { |
1794 | struct inpcbhead *head; | |
1795 | struct inpcb *inp; | |
1796 | u_short fport = fport_arg, lport = lport_arg; | |
39236c6e A |
1797 | int found = 0; |
1798 | struct inpcb *local_wild = NULL; | |
1799 | #if INET6 | |
1800 | struct inpcb *local_wild_mapped = NULL; | |
1801 | #endif /* INET6 */ | |
6d2010ae A |
1802 | |
1803 | *uid = UID_MAX; | |
1804 | *gid = GID_MAX; | |
316670eb | 1805 | |
6d2010ae A |
1806 | /* |
1807 | * We may have found the pcb in the last lookup - check this first. | |
1808 | */ | |
1809 | ||
39236c6e | 1810 | lck_rw_lock_shared(pcbinfo->ipi_lock); |
6d2010ae A |
1811 | |
1812 | /* | |
1813 | * First look for an exact match. | |
1814 | */ | |
39236c6e A |
1815 | head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, |
1816 | pcbinfo->ipi_hashmask)]; | |
6d2010ae A |
1817 | LIST_FOREACH(inp, head, inp_hash) { |
1818 | #if INET6 | |
39236c6e | 1819 | if (!(inp->inp_vflag & INP_IPV4)) |
6d2010ae | 1820 | continue; |
39236c6e | 1821 | #endif /* INET6 */ |
fe8ab488 | 1822 | if (inp_restricted_recv(inp, ifp)) |
316670eb A |
1823 | continue; |
1824 | ||
6d2010ae A |
1825 | if (inp->inp_faddr.s_addr == faddr.s_addr && |
1826 | inp->inp_laddr.s_addr == laddr.s_addr && | |
1827 | inp->inp_fport == fport && | |
1828 | inp->inp_lport == lport) { | |
1829 | if ((found = (inp->inp_socket != NULL))) { | |
1830 | /* | |
1831 | * Found. | |
1832 | */ | |
316670eb A |
1833 | *uid = kauth_cred_getuid( |
1834 | inp->inp_socket->so_cred); | |
1835 | *gid = kauth_cred_getgid( | |
1836 | inp->inp_socket->so_cred); | |
6d2010ae | 1837 | } |
39236c6e | 1838 | lck_rw_done(pcbinfo->ipi_lock); |
6d2010ae A |
1839 | return (found); |
1840 | } | |
1841 | } | |
6d2010ae | 1842 | |
39236c6e A |
1843 | if (!wildcard) { |
1844 | /* | |
1845 | * Not found. | |
1846 | */ | |
1847 | lck_rw_done(pcbinfo->ipi_lock); | |
1848 | return (0); | |
1849 | } | |
316670eb | 1850 | |
39236c6e A |
1851 | head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, |
1852 | pcbinfo->ipi_hashmask)]; | |
1853 | LIST_FOREACH(inp, head, inp_hash) { | |
6d2010ae | 1854 | #if INET6 |
39236c6e A |
1855 | if (!(inp->inp_vflag & INP_IPV4)) |
1856 | continue; | |
6d2010ae | 1857 | #endif /* INET6 */ |
fe8ab488 | 1858 | if (inp_restricted_recv(inp, ifp)) |
39236c6e A |
1859 | continue; |
1860 | ||
1861 | if (inp->inp_faddr.s_addr == INADDR_ANY && | |
1862 | inp->inp_lport == lport) { | |
1863 | if (inp->inp_laddr.s_addr == laddr.s_addr) { | |
1864 | if ((found = (inp->inp_socket != NULL))) { | |
316670eb | 1865 | *uid = kauth_cred_getuid( |
39236c6e | 1866 | inp->inp_socket->so_cred); |
316670eb | 1867 | *gid = kauth_cred_getgid( |
39236c6e | 1868 | inp->inp_socket->so_cred); |
6d2010ae | 1869 | } |
39236c6e | 1870 | lck_rw_done(pcbinfo->ipi_lock); |
6d2010ae | 1871 | return (found); |
39236c6e A |
1872 | } else if (inp->inp_laddr.s_addr == INADDR_ANY) { |
1873 | #if INET6 | |
1874 | if (inp->inp_socket && | |
1875 | SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) | |
1876 | local_wild_mapped = inp; | |
1877 | else | |
6d2010ae | 1878 | #endif /* INET6 */ |
39236c6e A |
1879 | local_wild = inp; |
1880 | } | |
6d2010ae | 1881 | } |
39236c6e A |
1882 | } |
1883 | if (local_wild == NULL) { | |
1884 | #if INET6 | |
1885 | if (local_wild_mapped != NULL) { | |
1886 | if ((found = (local_wild_mapped->inp_socket != NULL))) { | |
316670eb | 1887 | *uid = kauth_cred_getuid( |
39236c6e | 1888 | local_wild_mapped->inp_socket->so_cred); |
316670eb | 1889 | *gid = kauth_cred_getgid( |
39236c6e | 1890 | local_wild_mapped->inp_socket->so_cred); |
6d2010ae | 1891 | } |
39236c6e | 1892 | lck_rw_done(pcbinfo->ipi_lock); |
6d2010ae A |
1893 | return (found); |
1894 | } | |
39236c6e A |
1895 | #endif /* INET6 */ |
1896 | lck_rw_done(pcbinfo->ipi_lock); | |
1897 | return (0); | |
6d2010ae | 1898 | } |
39236c6e A |
1899 | if ((found = (local_wild->inp_socket != NULL))) { |
1900 | *uid = kauth_cred_getuid( | |
1901 | local_wild->inp_socket->so_cred); | |
1902 | *gid = kauth_cred_getgid( | |
1903 | local_wild->inp_socket->so_cred); | |
1904 | } | |
1905 | lck_rw_done(pcbinfo->ipi_lock); | |
1906 | return (found); | |
6d2010ae A |
1907 | } |
1908 | ||
1c79356b A |
1909 | /* |
1910 | * Lookup PCB in hash list. | |
1911 | */ | |
1912 | struct inpcb * | |
39236c6e A |
1913 | in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr, |
1914 | u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard, | |
1915 | struct ifnet *ifp) | |
1c79356b A |
1916 | { |
1917 | struct inpcbhead *head; | |
2d21ac55 | 1918 | struct inpcb *inp; |
1c79356b | 1919 | u_short fport = fport_arg, lport = lport_arg; |
39236c6e A |
1920 | struct inpcb *local_wild = NULL; |
1921 | #if INET6 | |
1922 | struct inpcb *local_wild_mapped = NULL; | |
1923 | #endif /* INET6 */ | |
1c79356b A |
1924 | |
1925 | /* | |
1926 | * We may have found the pcb in the last lookup - check this first. | |
1927 | */ | |
1928 | ||
39236c6e | 1929 | lck_rw_lock_shared(pcbinfo->ipi_lock); |
1c79356b A |
1930 | |
1931 | /* | |
1932 | * First look for an exact match. | |
1933 | */ | |
39236c6e A |
1934 | head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, |
1935 | pcbinfo->ipi_hashmask)]; | |
9bccf70c A |
1936 | LIST_FOREACH(inp, head, inp_hash) { |
1937 | #if INET6 | |
39236c6e | 1938 | if (!(inp->inp_vflag & INP_IPV4)) |
1c79356b | 1939 | continue; |
39236c6e | 1940 | #endif /* INET6 */ |
fe8ab488 | 1941 | if (inp_restricted_recv(inp, ifp)) |
316670eb A |
1942 | continue; |
1943 | ||
1c79356b A |
1944 | if (inp->inp_faddr.s_addr == faddr.s_addr && |
1945 | inp->inp_laddr.s_addr == laddr.s_addr && | |
1946 | inp->inp_fport == fport && | |
1947 | inp->inp_lport == lport) { | |
1948 | /* | |
1949 | * Found. | |
1950 | */ | |
39236c6e A |
1951 | if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != |
1952 | WNT_STOPUSING) { | |
1953 | lck_rw_done(pcbinfo->ipi_lock); | |
91447636 | 1954 | return (inp); |
39236c6e A |
1955 | } else { |
1956 | /* it's there but dead, say it isn't found */ | |
1957 | lck_rw_done(pcbinfo->ipi_lock); | |
316670eb | 1958 | return (NULL); |
91447636 | 1959 | } |
1c79356b A |
1960 | } |
1961 | } | |
1c79356b | 1962 | |
39236c6e A |
1963 | if (!wildcard) { |
1964 | /* | |
1965 | * Not found. | |
1966 | */ | |
1967 | lck_rw_done(pcbinfo->ipi_lock); | |
1968 | return (NULL); | |
1969 | } | |
1970 | ||
1971 | head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, | |
1972 | pcbinfo->ipi_hashmask)]; | |
1973 | LIST_FOREACH(inp, head, inp_hash) { | |
9bccf70c | 1974 | #if INET6 |
39236c6e A |
1975 | if (!(inp->inp_vflag & INP_IPV4)) |
1976 | continue; | |
1977 | #endif /* INET6 */ | |
fe8ab488 | 1978 | if (inp_restricted_recv(inp, ifp)) |
39236c6e A |
1979 | continue; |
1980 | ||
1981 | if (inp->inp_faddr.s_addr == INADDR_ANY && | |
1982 | inp->inp_lport == lport) { | |
1983 | if (inp->inp_laddr.s_addr == laddr.s_addr) { | |
1984 | if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != | |
1985 | WNT_STOPUSING) { | |
1986 | lck_rw_done(pcbinfo->ipi_lock); | |
1987 | return (inp); | |
1988 | } else { | |
1989 | /* it's dead; say it isn't found */ | |
1990 | lck_rw_done(pcbinfo->ipi_lock); | |
1991 | return (NULL); | |
91447636 | 1992 | } |
39236c6e | 1993 | } else if (inp->inp_laddr.s_addr == INADDR_ANY) { |
2d21ac55 | 1994 | #if INET6 |
39236c6e A |
1995 | if (SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) |
1996 | local_wild_mapped = inp; | |
1997 | else | |
2d21ac55 | 1998 | #endif /* INET6 */ |
1c79356b | 1999 | local_wild = inp; |
1c79356b A |
2000 | } |
2001 | } | |
39236c6e A |
2002 | } |
2003 | if (local_wild == NULL) { | |
2d21ac55 | 2004 | #if INET6 |
39236c6e A |
2005 | if (local_wild_mapped != NULL) { |
2006 | if (in_pcb_checkstate(local_wild_mapped, | |
2007 | WNT_ACQUIRE, 0) != WNT_STOPUSING) { | |
2008 | lck_rw_done(pcbinfo->ipi_lock); | |
2009 | return (local_wild_mapped); | |
2010 | } else { | |
2011 | /* it's dead; say it isn't found */ | |
2012 | lck_rw_done(pcbinfo->ipi_lock); | |
2013 | return (NULL); | |
91447636 | 2014 | } |
91447636 | 2015 | } |
39236c6e A |
2016 | #endif /* INET6 */ |
2017 | lck_rw_done(pcbinfo->ipi_lock); | |
2018 | return (NULL); | |
2019 | } | |
2020 | if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) { | |
2021 | lck_rw_done(pcbinfo->ipi_lock); | |
2022 | return (local_wild); | |
1c79356b | 2023 | } |
1c79356b | 2024 | /* |
39236c6e | 2025 | * It's either not found or is already dead. |
1c79356b | 2026 | */ |
39236c6e | 2027 | lck_rw_done(pcbinfo->ipi_lock); |
1c79356b A |
2028 | return (NULL); |
2029 | } | |
2030 | ||
2031 | /* | |
2032 | * Insert PCB onto various hash lists. | |
2033 | */ | |
2034 | int | |
2d21ac55 | 2035 | in_pcbinshash(struct inpcb *inp, int locked) |
1c79356b A |
2036 | { |
2037 | struct inpcbhead *pcbhash; | |
2038 | struct inpcbporthead *pcbporthash; | |
2039 | struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; | |
2040 | struct inpcbport *phd; | |
2041 | u_int32_t hashkey_faddr; | |
2042 | ||
39236c6e A |
2043 | if (!locked) { |
2044 | if (!lck_rw_try_lock_exclusive(pcbinfo->ipi_lock)) { | |
2045 | /* | |
2046 | * Lock inversion issue, mostly with udp | |
2047 | * multicast packets | |
2048 | */ | |
2049 | socket_unlock(inp->inp_socket, 0); | |
2050 | lck_rw_lock_exclusive(pcbinfo->ipi_lock); | |
2051 | socket_lock(inp->inp_socket, 0); | |
6d2010ae | 2052 | if (inp->inp_state == INPCB_STATE_DEAD) { |
39236c6e A |
2053 | /* |
2054 | * The socket got dropped when | |
2055 | * it was unlocked | |
2056 | */ | |
2057 | lck_rw_done(pcbinfo->ipi_lock); | |
2058 | return (ECONNABORTED); | |
6d2010ae | 2059 | } |
39236c6e A |
2060 | } |
2061 | } | |
b0d623f7 | 2062 | |
1c79356b A |
2063 | #if INET6 |
2064 | if (inp->inp_vflag & INP_IPV6) | |
2065 | hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; | |
2066 | else | |
2067 | #endif /* INET6 */ | |
39236c6e | 2068 | hashkey_faddr = inp->inp_faddr.s_addr; |
1c79356b | 2069 | |
39236c6e A |
2070 | inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport, |
2071 | inp->inp_fport, pcbinfo->ipi_hashmask); | |
91447636 | 2072 | |
39236c6e | 2073 | pcbhash = &pcbinfo->ipi_hashbase[inp->inp_hash_element]; |
1c79356b | 2074 | |
39236c6e A |
2075 | pcbporthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(inp->inp_lport, |
2076 | pcbinfo->ipi_porthashmask)]; | |
1c79356b A |
2077 | |
2078 | /* | |
2079 | * Go through port list and look for a head for this lport. | |
2080 | */ | |
9bccf70c | 2081 | LIST_FOREACH(phd, pcbporthash, phd_hash) { |
1c79356b A |
2082 | if (phd->phd_port == inp->inp_lport) |
2083 | break; | |
2084 | } | |
316670eb A |
2085 | |
2086 | VERIFY(inp->inp_state != INPCB_STATE_DEAD); | |
2087 | ||
1c79356b A |
2088 | /* |
2089 | * If none exists, malloc one and tack it on. | |
2090 | */ | |
2091 | if (phd == NULL) { | |
39236c6e A |
2092 | MALLOC(phd, struct inpcbport *, sizeof (struct inpcbport), |
2093 | M_PCB, M_WAITOK); | |
1c79356b | 2094 | if (phd == NULL) { |
91447636 | 2095 | if (!locked) |
39236c6e | 2096 | lck_rw_done(pcbinfo->ipi_lock); |
1c79356b A |
2097 | return (ENOBUFS); /* XXX */ |
2098 | } | |
2099 | phd->phd_port = inp->inp_lport; | |
2100 | LIST_INIT(&phd->phd_pcblist); | |
2101 | LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); | |
2102 | } | |
fe8ab488 A |
2103 | |
2104 | VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST)); | |
1c79356b A |
2105 | inp->inp_phd = phd; |
2106 | LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); | |
2107 | LIST_INSERT_HEAD(pcbhash, inp, inp_hash); | |
fe8ab488 A |
2108 | inp->inp_flags2 |= INP2_INHASHLIST; |
2109 | ||
91447636 | 2110 | if (!locked) |
39236c6e | 2111 | lck_rw_done(pcbinfo->ipi_lock); |
fe8ab488 A |
2112 | |
2113 | #if NECP | |
2114 | // This call catches the original setting of the local address | |
2115 | inp_update_necp_policy(inp, NULL, NULL, 0); | |
2116 | #endif /* NECP */ | |
2117 | ||
1c79356b A |
2118 | return (0); |
2119 | } | |
2120 | ||
2121 | /* | |
2122 | * Move PCB to the proper hash bucket when { faddr, fport } have been | |
2123 | * changed. NOTE: This does not handle the case of the lport changing (the | |
2124 | * hashed port list would have to be updated as well), so the lport must | |
2125 | * not change after in_pcbinshash() has been called. | |
2126 | */ | |
2127 | void | |
2d21ac55 | 2128 | in_pcbrehash(struct inpcb *inp) |
1c79356b A |
2129 | { |
2130 | struct inpcbhead *head; | |
2131 | u_int32_t hashkey_faddr; | |
2132 | ||
2133 | #if INET6 | |
2134 | if (inp->inp_vflag & INP_IPV6) | |
2135 | hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; | |
2136 | else | |
2137 | #endif /* INET6 */ | |
39236c6e A |
2138 | hashkey_faddr = inp->inp_faddr.s_addr; |
2139 | ||
2140 | inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport, | |
2141 | inp->inp_fport, inp->inp_pcbinfo->ipi_hashmask); | |
2142 | head = &inp->inp_pcbinfo->ipi_hashbase[inp->inp_hash_element]; | |
1c79356b | 2143 | |
fe8ab488 A |
2144 | if (inp->inp_flags2 & INP2_INHASHLIST) { |
2145 | LIST_REMOVE(inp, inp_hash); | |
2146 | inp->inp_flags2 &= ~INP2_INHASHLIST; | |
2147 | } | |
2148 | ||
2149 | VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST)); | |
1c79356b | 2150 | LIST_INSERT_HEAD(head, inp, inp_hash); |
fe8ab488 A |
2151 | inp->inp_flags2 |= INP2_INHASHLIST; |
2152 | ||
2153 | #if NECP | |
2154 | // This call catches updates to the remote addresses | |
2155 | inp_update_necp_policy(inp, NULL, NULL, 0); | |
2156 | #endif /* NECP */ | |
1c79356b A |
2157 | } |
2158 | ||
2159 | /* | |
2160 | * Remove PCB from various lists. | |
316670eb | 2161 | * Must be called pcbinfo lock is held in exclusive mode. |
1c79356b A |
2162 | */ |
2163 | void | |
2d21ac55 | 2164 | in_pcbremlists(struct inpcb *inp) |
1c79356b A |
2165 | { |
2166 | inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt; | |
1c79356b | 2167 | |
fe8ab488 A |
2168 | /* |
2169 | * Check if it's in hashlist -- an inp is placed in hashlist when | |
2170 | * it's local port gets assigned. So it should also be present | |
2171 | * in the port list. | |
2172 | */ | |
2173 | if (inp->inp_flags2 & INP2_INHASHLIST) { | |
1c79356b A |
2174 | struct inpcbport *phd = inp->inp_phd; |
2175 | ||
fe8ab488 A |
2176 | VERIFY(phd != NULL && inp->inp_lport > 0); |
2177 | ||
1c79356b | 2178 | LIST_REMOVE(inp, inp_hash); |
fe8ab488 A |
2179 | inp->inp_hash.le_next = NULL; |
2180 | inp->inp_hash.le_prev = NULL; | |
2181 | ||
1c79356b | 2182 | LIST_REMOVE(inp, inp_portlist); |
fe8ab488 A |
2183 | inp->inp_portlist.le_next = NULL; |
2184 | inp->inp_portlist.le_prev = NULL; | |
2185 | if (LIST_EMPTY(&phd->phd_pcblist)) { | |
1c79356b A |
2186 | LIST_REMOVE(phd, phd_hash); |
2187 | FREE(phd, M_PCB); | |
2188 | } | |
fe8ab488 A |
2189 | inp->inp_phd = NULL; |
2190 | inp->inp_flags2 &= ~INP2_INHASHLIST; | |
1c79356b | 2191 | } |
fe8ab488 | 2192 | VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST)); |
39236c6e A |
2193 | |
2194 | if (inp->inp_flags2 & INP2_TIMEWAIT) { | |
2195 | /* Remove from time-wait queue */ | |
2196 | tcp_remove_from_time_wait(inp); | |
2197 | inp->inp_flags2 &= ~INP2_TIMEWAIT; | |
2198 | VERIFY(inp->inp_pcbinfo->ipi_twcount != 0); | |
2199 | inp->inp_pcbinfo->ipi_twcount--; | |
2200 | } else { | |
2201 | /* Remove from global inp list if it is not time-wait */ | |
2202 | LIST_REMOVE(inp, inp_list); | |
2203 | } | |
316670eb | 2204 | |
bd504ef0 | 2205 | if (inp->inp_flags2 & INP2_IN_FCTREE) { |
39236c6e | 2206 | inp_fc_getinp(inp->inp_flowhash, (INPFC_SOLOCKED|INPFC_REMOVE)); |
bd504ef0 A |
2207 | VERIFY(!(inp->inp_flags2 & INP2_IN_FCTREE)); |
2208 | } | |
39236c6e | 2209 | |
1c79356b A |
2210 | inp->inp_pcbinfo->ipi_count--; |
2211 | } | |
2212 | ||
39236c6e A |
2213 | /* |
2214 | * Mechanism used to defer the memory release of PCBs | |
2215 | * The pcb list will contain the pcb until the reaper can clean it up if | |
2216 | * the following conditions are met: | |
2217 | * 1) state "DEAD", | |
2218 | * 2) wantcnt is STOPUSING | |
2219 | * 3) usecount is 0 | |
91447636 | 2220 | * This function will be called to either mark the pcb as |
39236c6e | 2221 | */ |
91447636 A |
2222 | int |
2223 | in_pcb_checkstate(struct inpcb *pcb, int mode, int locked) | |
91447636 | 2224 | { |
39236c6e | 2225 | volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt; |
2d21ac55 A |
2226 | UInt32 origwant; |
2227 | UInt32 newwant; | |
91447636 A |
2228 | |
2229 | switch (mode) { | |
39236c6e A |
2230 | case WNT_STOPUSING: |
2231 | /* | |
2232 | * Try to mark the pcb as ready for recycling. CAS with | |
2233 | * STOPUSING, if success we're good, if it's in use, will | |
2234 | * be marked later | |
2235 | */ | |
2236 | if (locked == 0) | |
2237 | socket_lock(pcb->inp_socket, 1); | |
2238 | pcb->inp_state = INPCB_STATE_DEAD; | |
91447636 | 2239 | |
39236c6e A |
2240 | stopusing: |
2241 | if (pcb->inp_socket->so_usecount < 0) { | |
2242 | panic("%s: pcb=%p so=%p usecount is negative\n", | |
2243 | __func__, pcb, pcb->inp_socket); | |
2244 | /* NOTREACHED */ | |
2245 | } | |
2246 | if (locked == 0) | |
2247 | socket_unlock(pcb->inp_socket, 1); | |
91447636 | 2248 | |
39236c6e | 2249 | inpcb_gc_sched(pcb->inp_pcbinfo, INPCB_TIMER_FAST); |
6d2010ae | 2250 | |
39236c6e A |
2251 | origwant = *wantcnt; |
2252 | if ((UInt16) origwant == 0xffff) /* should stop using */ | |
2253 | return (WNT_STOPUSING); | |
2254 | newwant = 0xffff; | |
2255 | if ((UInt16) origwant == 0) { | |
2256 | /* try to mark it as unsuable now */ | |
2257 | OSCompareAndSwap(origwant, newwant, wantcnt); | |
2258 | } | |
2259 | return (WNT_STOPUSING); | |
2260 | break; | |
91447636 | 2261 | |
39236c6e A |
2262 | case WNT_ACQUIRE: |
2263 | /* | |
2264 | * Try to increase reference to pcb. If WNT_STOPUSING | |
2265 | * should bail out. If socket state DEAD, try to set count | |
2266 | * to STOPUSING, return failed otherwise increase cnt. | |
2267 | */ | |
2268 | do { | |
91447636 | 2269 | origwant = *wantcnt; |
39236c6e A |
2270 | if ((UInt16) origwant == 0xffff) { |
2271 | /* should stop using */ | |
91447636 | 2272 | return (WNT_STOPUSING); |
91447636 | 2273 | } |
39236c6e A |
2274 | newwant = origwant + 1; |
2275 | } while (!OSCompareAndSwap(origwant, newwant, wantcnt)); | |
2276 | return (WNT_ACQUIRE); | |
2277 | break; | |
91447636 | 2278 | |
39236c6e A |
2279 | case WNT_RELEASE: |
2280 | /* | |
2281 | * Release reference. If result is null and pcb state | |
2282 | * is DEAD, set wanted bit to STOPUSING | |
2283 | */ | |
2284 | if (locked == 0) | |
2285 | socket_lock(pcb->inp_socket, 1); | |
91447636 | 2286 | |
39236c6e A |
2287 | do { |
2288 | origwant = *wantcnt; | |
2289 | if ((UInt16) origwant == 0x0) { | |
2290 | panic("%s: pcb=%p release with zero count", | |
2291 | __func__, pcb); | |
2292 | /* NOTREACHED */ | |
2293 | } | |
2294 | if ((UInt16) origwant == 0xffff) { | |
2295 | /* should stop using */ | |
2296 | if (locked == 0) | |
2297 | socket_unlock(pcb->inp_socket, 1); | |
2298 | return (WNT_STOPUSING); | |
2299 | } | |
2300 | newwant = origwant - 1; | |
2301 | } while (!OSCompareAndSwap(origwant, newwant, wantcnt)); | |
2302 | ||
2303 | if (pcb->inp_state == INPCB_STATE_DEAD) | |
2304 | goto stopusing; | |
2305 | if (pcb->inp_socket->so_usecount < 0) { | |
2306 | panic("%s: RELEASE pcb=%p so=%p usecount is negative\n", | |
2307 | __func__, pcb, pcb->inp_socket); | |
2308 | /* NOTREACHED */ | |
2309 | } | |
91447636 | 2310 | |
39236c6e A |
2311 | if (locked == 0) |
2312 | socket_unlock(pcb->inp_socket, 1); | |
2313 | return (WNT_RELEASE); | |
2314 | break; | |
91447636 | 2315 | |
39236c6e A |
2316 | default: |
2317 | panic("%s: so=%p not a valid state =%x\n", __func__, | |
2318 | pcb->inp_socket, mode); | |
2319 | /* NOTREACHED */ | |
91447636 A |
2320 | } |
2321 | ||
2322 | /* NOTREACHED */ | |
2323 | return (mode); | |
2324 | } | |
2325 | ||
2326 | /* | |
2327 | * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat. | |
2328 | * The inpcb_compat data structure is passed to user space and must | |
b0d623f7 | 2329 | * not change. We intentionally avoid copying pointers. |
91447636 A |
2330 | */ |
2331 | void | |
39236c6e | 2332 | inpcb_to_compat(struct inpcb *inp, struct inpcb_compat *inp_compat) |
91447636 | 2333 | { |
39236c6e | 2334 | bzero(inp_compat, sizeof (*inp_compat)); |
91447636 A |
2335 | inp_compat->inp_fport = inp->inp_fport; |
2336 | inp_compat->inp_lport = inp->inp_lport; | |
316670eb | 2337 | inp_compat->nat_owner = 0; |
39236c6e | 2338 | inp_compat->nat_cookie = 0; |
91447636 A |
2339 | inp_compat->inp_gencnt = inp->inp_gencnt; |
2340 | inp_compat->inp_flags = inp->inp_flags; | |
2341 | inp_compat->inp_flow = inp->inp_flow; | |
2342 | inp_compat->inp_vflag = inp->inp_vflag; | |
2343 | inp_compat->inp_ip_ttl = inp->inp_ip_ttl; | |
2344 | inp_compat->inp_ip_p = inp->inp_ip_p; | |
39236c6e A |
2345 | inp_compat->inp_dependfaddr.inp6_foreign = |
2346 | inp->inp_dependfaddr.inp6_foreign; | |
2347 | inp_compat->inp_dependladdr.inp6_local = | |
2348 | inp->inp_dependladdr.inp6_local; | |
91447636 | 2349 | inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos; |
39236c6e | 2350 | inp_compat->inp_depend6.inp6_hlim = 0; |
91447636 | 2351 | inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum; |
39236c6e | 2352 | inp_compat->inp_depend6.inp6_ifindex = 0; |
91447636 A |
2353 | inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops; |
2354 | } | |
9bccf70c | 2355 | |
b0d623f7 | 2356 | void |
39236c6e | 2357 | inpcb_to_xinpcb64(struct inpcb *inp, struct xinpcb64 *xinp) |
b0d623f7 | 2358 | { |
6d2010ae A |
2359 | xinp->inp_fport = inp->inp_fport; |
2360 | xinp->inp_lport = inp->inp_lport; | |
2361 | xinp->inp_gencnt = inp->inp_gencnt; | |
2362 | xinp->inp_flags = inp->inp_flags; | |
2363 | xinp->inp_flow = inp->inp_flow; | |
2364 | xinp->inp_vflag = inp->inp_vflag; | |
2365 | xinp->inp_ip_ttl = inp->inp_ip_ttl; | |
2366 | xinp->inp_ip_p = inp->inp_ip_p; | |
2367 | xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign; | |
2368 | xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local; | |
2369 | xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos; | |
39236c6e | 2370 | xinp->inp_depend6.inp6_hlim = 0; |
6d2010ae | 2371 | xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum; |
39236c6e | 2372 | xinp->inp_depend6.inp6_ifindex = 0; |
6d2010ae | 2373 | xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops; |
b0d623f7 A |
2374 | } |
2375 | ||
b0d623f7 A |
2376 | /* |
2377 | * The following routines implement this scheme: | |
2378 | * | |
2379 | * Callers of ip_output() that intend to cache the route in the inpcb pass | |
2380 | * a local copy of the struct route to ip_output(). Using a local copy of | |
2381 | * the cached route significantly simplifies things as IP no longer has to | |
2382 | * worry about having exclusive access to the passed in struct route, since | |
2383 | * it's defined in the caller's stack; in essence, this allows for a lock- | |
2384 | * less operation when updating the struct route at the IP level and below, | |
2385 | * whenever necessary. The scheme works as follows: | |
2386 | * | |
2387 | * Prior to dropping the socket's lock and calling ip_output(), the caller | |
2388 | * copies the struct route from the inpcb into its stack, and adds a reference | |
2389 | * to the cached route entry, if there was any. The socket's lock is then | |
2390 | * dropped and ip_output() is called with a pointer to the copy of struct | |
2391 | * route defined on the stack (not to the one in the inpcb.) | |
2392 | * | |
2393 | * Upon returning from ip_output(), the caller then acquires the socket's | |
2394 | * lock and synchronizes the cache; if there is no route cached in the inpcb, | |
2395 | * it copies the local copy of struct route (which may or may not contain any | |
2396 | * route) back into the cache; otherwise, if the inpcb has a route cached in | |
2397 | * it, the one in the local copy will be freed, if there's any. Trashing the | |
2398 | * cached route in the inpcb can be avoided because ip_output() is single- | |
2399 | * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized | |
2400 | * by the socket/transport layer.) | |
2401 | */ | |
2402 | void | |
2403 | inp_route_copyout(struct inpcb *inp, struct route *dst) | |
2404 | { | |
2405 | struct route *src = &inp->inp_route; | |
2406 | ||
6d2010ae | 2407 | lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED); |
b0d623f7 | 2408 | |
0b4c1975 | 2409 | /* |
39236c6e | 2410 | * If the route in the PCB is stale or not for IPv4, blow it away; |
0b4c1975 A |
2411 | * this is possible in the case of IPv4-mapped address case. |
2412 | */ | |
39236c6e A |
2413 | if (ROUTE_UNUSABLE(src) || rt_key(src->ro_rt)->sa_family != AF_INET) |
2414 | ROUTE_RELEASE(src); | |
316670eb | 2415 | |
39236c6e | 2416 | route_copyout(dst, src, sizeof (*dst)); |
b0d623f7 A |
2417 | } |
2418 | ||
2419 | void | |
2420 | inp_route_copyin(struct inpcb *inp, struct route *src) | |
2421 | { | |
2422 | struct route *dst = &inp->inp_route; | |
2423 | ||
6d2010ae | 2424 | lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED); |
b0d623f7 A |
2425 | |
2426 | /* Minor sanity check */ | |
2427 | if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET) | |
2428 | panic("%s: wrong or corrupted route: %p", __func__, src); | |
2429 | ||
39236c6e | 2430 | route_copyin(src, dst, sizeof (*src)); |
6d2010ae A |
2431 | } |
2432 | ||
2433 | /* | |
2434 | * Handler for setting IP_FORCE_OUT_IFP/IP_BOUND_IF/IPV6_BOUND_IF socket option. | |
2435 | */ | |
316670eb | 2436 | int |
39236c6e | 2437 | inp_bindif(struct inpcb *inp, unsigned int ifscope, struct ifnet **pifp) |
6d2010ae | 2438 | { |
316670eb A |
2439 | struct ifnet *ifp = NULL; |
2440 | ||
2441 | ifnet_head_lock_shared(); | |
2442 | if ((ifscope > (unsigned)if_index) || (ifscope != IFSCOPE_NONE && | |
2443 | (ifp = ifindex2ifnet[ifscope]) == NULL)) { | |
2444 | ifnet_head_done(); | |
2445 | return (ENXIO); | |
2446 | } | |
2447 | ifnet_head_done(); | |
2448 | ||
2449 | VERIFY(ifp != NULL || ifscope == IFSCOPE_NONE); | |
2450 | ||
6d2010ae A |
2451 | /* |
2452 | * A zero interface scope value indicates an "unbind". | |
2453 | * Otherwise, take in whatever value the app desires; | |
2454 | * the app may already know the scope (or force itself | |
2455 | * to such a scope) ahead of time before the interface | |
2456 | * gets attached. It doesn't matter either way; any | |
2457 | * route lookup from this point on will require an | |
2458 | * exact match for the embedded interface scope. | |
2459 | */ | |
316670eb A |
2460 | inp->inp_boundifp = ifp; |
2461 | if (inp->inp_boundifp == NULL) | |
6d2010ae A |
2462 | inp->inp_flags &= ~INP_BOUND_IF; |
2463 | else | |
2464 | inp->inp_flags |= INP_BOUND_IF; | |
2465 | ||
2466 | /* Blow away any cached route in the PCB */ | |
39236c6e A |
2467 | ROUTE_RELEASE(&inp->inp_route); |
2468 | ||
2469 | if (pifp != NULL) | |
2470 | *pifp = ifp; | |
316670eb A |
2471 | |
2472 | return (0); | |
6d2010ae A |
2473 | } |
2474 | ||
2475 | /* | |
39236c6e A |
2476 | * Handler for setting IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option, |
2477 | * as well as for setting PROC_UUID_NO_CELLULAR policy. | |
6d2010ae | 2478 | */ |
39236c6e A |
2479 | void |
2480 | inp_set_nocellular(struct inpcb *inp) | |
6d2010ae | 2481 | { |
39236c6e | 2482 | inp->inp_flags |= INP_NO_IFT_CELLULAR; |
6d2010ae A |
2483 | |
2484 | /* Blow away any cached route in the PCB */ | |
39236c6e A |
2485 | ROUTE_RELEASE(&inp->inp_route); |
2486 | } | |
2487 | ||
2488 | /* | |
2489 | * Handler for clearing IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option, | |
2490 | * as well as for clearing PROC_UUID_NO_CELLULAR policy. | |
2491 | */ | |
2492 | void | |
2493 | inp_clear_nocellular(struct inpcb *inp) | |
2494 | { | |
2495 | struct socket *so = inp->inp_socket; | |
2496 | ||
2497 | /* | |
2498 | * SO_RESTRICT_DENY_CELLULAR socket restriction issued on the socket | |
2499 | * has a higher precendence than INP_NO_IFT_CELLULAR. Clear the flag | |
2500 | * if and only if the socket is unrestricted. | |
2501 | */ | |
2502 | if (so != NULL && !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) { | |
2503 | inp->inp_flags &= ~INP_NO_IFT_CELLULAR; | |
2504 | ||
2505 | /* Blow away any cached route in the PCB */ | |
2506 | ROUTE_RELEASE(&inp->inp_route); | |
6d2010ae | 2507 | } |
39236c6e | 2508 | } |
6d2010ae | 2509 | |
fe8ab488 A |
2510 | void |
2511 | inp_set_noexpensive(struct inpcb *inp) | |
2512 | { | |
2513 | inp->inp_flags2 |= INP2_NO_IFF_EXPENSIVE; | |
2514 | ||
2515 | /* Blow away any cached route in the PCB */ | |
2516 | ROUTE_RELEASE(&inp->inp_route); | |
2517 | } | |
2518 | ||
2519 | void | |
2520 | inp_set_awdl_unrestricted(struct inpcb *inp) | |
2521 | { | |
2522 | inp->inp_flags2 |= INP2_AWDL_UNRESTRICTED; | |
2523 | ||
2524 | /* Blow away any cached route in the PCB */ | |
2525 | ROUTE_RELEASE(&inp->inp_route); | |
2526 | } | |
2527 | ||
2528 | boolean_t | |
2529 | inp_get_awdl_unrestricted(struct inpcb *inp) | |
2530 | { | |
2531 | return (inp->inp_flags2 & INP2_AWDL_UNRESTRICTED) ? TRUE : FALSE; | |
2532 | } | |
2533 | ||
2534 | void | |
2535 | inp_clear_awdl_unrestricted(struct inpcb *inp) | |
2536 | { | |
2537 | inp->inp_flags2 &= ~INP2_AWDL_UNRESTRICTED; | |
2538 | ||
2539 | /* Blow away any cached route in the PCB */ | |
2540 | ROUTE_RELEASE(&inp->inp_route); | |
2541 | } | |
2542 | ||
2543 | #if NECP | |
39236c6e | 2544 | /* |
fe8ab488 | 2545 | * Called when PROC_UUID_NECP_APP_POLICY is set. |
39236c6e A |
2546 | */ |
2547 | void | |
fe8ab488 | 2548 | inp_set_want_app_policy(struct inpcb *inp) |
39236c6e | 2549 | { |
fe8ab488 | 2550 | inp->inp_flags2 |= INP2_WANT_APP_POLICY; |
39236c6e A |
2551 | } |
2552 | ||
2553 | /* | |
fe8ab488 | 2554 | * Called when PROC_UUID_NECP_APP_POLICY is cleared. |
39236c6e A |
2555 | */ |
2556 | void | |
fe8ab488 | 2557 | inp_clear_want_app_policy(struct inpcb *inp) |
39236c6e | 2558 | { |
fe8ab488 | 2559 | inp->inp_flags2 &= ~INP2_WANT_APP_POLICY; |
b0d623f7 | 2560 | } |
fe8ab488 | 2561 | #endif /* NECP */ |
316670eb A |
2562 | |
2563 | /* | |
2564 | * Calculate flow hash for an inp, used by an interface to identify a | |
2565 | * flow. When an interface provides flow control advisory, this flow | |
2566 | * hash is used as an identifier. | |
2567 | */ | |
2568 | u_int32_t | |
2569 | inp_calc_flowhash(struct inpcb *inp) | |
2570 | { | |
2571 | struct inp_flowhash_key fh __attribute__((aligned(8))); | |
2572 | u_int32_t flowhash = 0; | |
bd504ef0 | 2573 | struct inpcb *tmp_inp = NULL; |
316670eb A |
2574 | |
2575 | if (inp_hash_seed == 0) | |
2576 | inp_hash_seed = RandomULong(); | |
2577 | ||
2578 | bzero(&fh, sizeof (fh)); | |
2579 | ||
2580 | bcopy(&inp->inp_dependladdr, &fh.infh_laddr, sizeof (fh.infh_laddr)); | |
2581 | bcopy(&inp->inp_dependfaddr, &fh.infh_faddr, sizeof (fh.infh_faddr)); | |
2582 | ||
2583 | fh.infh_lport = inp->inp_lport; | |
2584 | fh.infh_fport = inp->inp_fport; | |
2585 | fh.infh_af = (inp->inp_vflag & INP_IPV6) ? AF_INET6 : AF_INET; | |
2586 | fh.infh_proto = inp->inp_ip_p; | |
2587 | fh.infh_rand1 = RandomULong(); | |
2588 | fh.infh_rand2 = RandomULong(); | |
2589 | ||
2590 | try_again: | |
2591 | flowhash = net_flowhash(&fh, sizeof (fh), inp_hash_seed); | |
2592 | if (flowhash == 0) { | |
2593 | /* try to get a non-zero flowhash */ | |
2594 | inp_hash_seed = RandomULong(); | |
2595 | goto try_again; | |
2596 | } | |
2597 | ||
bd504ef0 | 2598 | inp->inp_flowhash = flowhash; |
316670eb | 2599 | |
bd504ef0 | 2600 | /* Insert the inp into inp_fc_tree */ |
39236c6e | 2601 | lck_mtx_lock_spin(&inp_fc_lck); |
bd504ef0 A |
2602 | tmp_inp = RB_FIND(inp_fc_tree, &inp_fc_tree, inp); |
2603 | if (tmp_inp != NULL) { | |
316670eb | 2604 | /* |
bd504ef0 A |
2605 | * There is a different inp with the same flowhash. |
2606 | * There can be a collision on flow hash but the | |
39236c6e | 2607 | * probability is low. Let's recompute the |
bd504ef0 | 2608 | * flowhash. |
316670eb A |
2609 | */ |
2610 | lck_mtx_unlock(&inp_fc_lck); | |
bd504ef0 A |
2611 | /* recompute hash seed */ |
2612 | inp_hash_seed = RandomULong(); | |
2613 | goto try_again; | |
316670eb | 2614 | } |
39236c6e | 2615 | |
bd504ef0 A |
2616 | RB_INSERT(inp_fc_tree, &inp_fc_tree, inp); |
2617 | inp->inp_flags2 |= INP2_IN_FCTREE; | |
316670eb | 2618 | lck_mtx_unlock(&inp_fc_lck); |
bd504ef0 | 2619 | |
39236c6e A |
2620 | return (flowhash); |
2621 | } | |
2622 | ||
2623 | void | |
2624 | inp_flowadv(uint32_t flowhash) | |
2625 | { | |
2626 | struct inpcb *inp; | |
2627 | ||
2628 | inp = inp_fc_getinp(flowhash, 0); | |
2629 | ||
2630 | if (inp == NULL) | |
2631 | return; | |
2632 | inp_fc_feedback(inp); | |
316670eb A |
2633 | } |
2634 | ||
bd504ef0 A |
2635 | /* |
2636 | * Function to compare inp_fc_entries in inp flow control tree | |
2637 | */ | |
2638 | static inline int | |
2639 | infc_cmp(const struct inpcb *inp1, const struct inpcb *inp2) | |
316670eb | 2640 | { |
bd504ef0 | 2641 | return (memcmp(&(inp1->inp_flowhash), &(inp2->inp_flowhash), |
39236c6e | 2642 | sizeof(inp1->inp_flowhash))); |
bd504ef0 | 2643 | } |
316670eb | 2644 | |
39236c6e | 2645 | static struct inpcb * |
bd504ef0 A |
2646 | inp_fc_getinp(u_int32_t flowhash, u_int32_t flags) |
2647 | { | |
2648 | struct inpcb *inp = NULL; | |
2649 | int locked = (flags & INPFC_SOLOCKED) ? 1 : 0; | |
316670eb A |
2650 | |
2651 | lck_mtx_lock_spin(&inp_fc_lck); | |
bd504ef0 A |
2652 | key_inp.inp_flowhash = flowhash; |
2653 | inp = RB_FIND(inp_fc_tree, &inp_fc_tree, &key_inp); | |
2654 | if (inp == NULL) { | |
316670eb A |
2655 | /* inp is not present, return */ |
2656 | lck_mtx_unlock(&inp_fc_lck); | |
2657 | return (NULL); | |
2658 | } | |
2659 | ||
bd504ef0 A |
2660 | if (flags & INPFC_REMOVE) { |
2661 | RB_REMOVE(inp_fc_tree, &inp_fc_tree, inp); | |
2662 | lck_mtx_unlock(&inp_fc_lck); | |
316670eb | 2663 | |
bd504ef0 A |
2664 | bzero(&(inp->infc_link), sizeof (inp->infc_link)); |
2665 | inp->inp_flags2 &= ~INP2_IN_FCTREE; | |
2666 | return (NULL); | |
316670eb | 2667 | } |
39236c6e | 2668 | |
bd504ef0 A |
2669 | if (in_pcb_checkstate(inp, WNT_ACQUIRE, locked) == WNT_STOPUSING) |
2670 | inp = NULL; | |
316670eb A |
2671 | lck_mtx_unlock(&inp_fc_lck); |
2672 | ||
bd504ef0 | 2673 | return (inp); |
316670eb A |
2674 | } |
2675 | ||
39236c6e | 2676 | static void |
316670eb A |
2677 | inp_fc_feedback(struct inpcb *inp) |
2678 | { | |
2679 | struct socket *so = inp->inp_socket; | |
2680 | ||
2681 | /* we already hold a want_cnt on this inp, socket can't be null */ | |
39236c6e | 2682 | VERIFY(so != NULL); |
316670eb A |
2683 | socket_lock(so, 1); |
2684 | ||
2685 | if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) { | |
2686 | socket_unlock(so, 1); | |
2687 | return; | |
2688 | } | |
2689 | ||
fe8ab488 A |
2690 | if (inp->inp_sndinprog_cnt > 0) |
2691 | inp->inp_flags |= INP_FC_FEEDBACK; | |
2692 | ||
316670eb A |
2693 | /* |
2694 | * Return if the connection is not in flow-controlled state. | |
2695 | * This can happen if the connection experienced | |
2696 | * loss while it was in flow controlled state | |
2697 | */ | |
2698 | if (!INP_WAIT_FOR_IF_FEEDBACK(inp)) { | |
2699 | socket_unlock(so, 1); | |
2700 | return; | |
2701 | } | |
2702 | inp_reset_fc_state(inp); | |
2703 | ||
39236c6e | 2704 | if (SOCK_TYPE(so) == SOCK_STREAM) |
316670eb A |
2705 | inp_fc_unthrottle_tcp(inp); |
2706 | ||
2707 | socket_unlock(so, 1); | |
2708 | } | |
2709 | ||
2710 | void | |
2711 | inp_reset_fc_state(struct inpcb *inp) | |
2712 | { | |
2713 | struct socket *so = inp->inp_socket; | |
2714 | int suspended = (INP_IS_FLOW_SUSPENDED(inp)) ? 1 : 0; | |
2715 | int needwakeup = (INP_WAIT_FOR_IF_FEEDBACK(inp)) ? 1 : 0; | |
2716 | ||
2717 | inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED); | |
2718 | ||
2719 | if (suspended) { | |
2720 | so->so_flags &= ~(SOF_SUSPENDED); | |
2721 | soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_RESUME)); | |
2722 | } | |
2723 | ||
316670eb A |
2724 | /* Give a write wakeup to unblock the socket */ |
2725 | if (needwakeup) | |
2726 | sowwakeup(so); | |
2727 | } | |
2728 | ||
2729 | int | |
2730 | inp_set_fc_state(struct inpcb *inp, int advcode) | |
2731 | { | |
bd504ef0 | 2732 | struct inpcb *tmp_inp = NULL; |
316670eb | 2733 | /* |
39236c6e | 2734 | * If there was a feedback from the interface when |
316670eb A |
2735 | * send operation was in progress, we should ignore |
2736 | * this flow advisory to avoid a race between setting | |
2737 | * flow controlled state and receiving feedback from | |
2738 | * the interface | |
2739 | */ | |
2740 | if (inp->inp_flags & INP_FC_FEEDBACK) | |
39236c6e | 2741 | return (0); |
316670eb A |
2742 | |
2743 | inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED); | |
39236c6e A |
2744 | if ((tmp_inp = inp_fc_getinp(inp->inp_flowhash, |
2745 | INPFC_SOLOCKED)) != NULL) { | |
2746 | if (in_pcb_checkstate(tmp_inp, WNT_RELEASE, 1) == WNT_STOPUSING) | |
bd504ef0 A |
2747 | return (0); |
2748 | VERIFY(tmp_inp == inp); | |
316670eb A |
2749 | switch (advcode) { |
2750 | case FADV_FLOW_CONTROLLED: | |
2751 | inp->inp_flags |= INP_FLOW_CONTROLLED; | |
2752 | break; | |
2753 | case FADV_SUSPENDED: | |
2754 | inp->inp_flags |= INP_FLOW_SUSPENDED; | |
2755 | soevent(inp->inp_socket, | |
2756 | (SO_FILT_HINT_LOCKED | SO_FILT_HINT_SUSPEND)); | |
2757 | ||
2758 | /* Record the fact that suspend event was sent */ | |
2759 | inp->inp_socket->so_flags |= SOF_SUSPENDED; | |
2760 | break; | |
2761 | } | |
bd504ef0 | 2762 | return (1); |
316670eb | 2763 | } |
39236c6e | 2764 | return (0); |
316670eb A |
2765 | } |
2766 | ||
2767 | /* | |
2768 | * Handler for SO_FLUSH socket option. | |
2769 | */ | |
2770 | int | |
2771 | inp_flush(struct inpcb *inp, int optval) | |
2772 | { | |
2773 | u_int32_t flowhash = inp->inp_flowhash; | |
39236c6e | 2774 | struct ifnet *rtifp, *oifp; |
316670eb A |
2775 | |
2776 | /* Either all classes or one of the valid ones */ | |
2777 | if (optval != SO_TC_ALL && !SO_VALID_TC(optval)) | |
2778 | return (EINVAL); | |
2779 | ||
2780 | /* We need a flow hash for identification */ | |
2781 | if (flowhash == 0) | |
2782 | return (0); | |
2783 | ||
39236c6e A |
2784 | /* Grab the interfaces from the route and pcb */ |
2785 | rtifp = ((inp->inp_route.ro_rt != NULL) ? | |
2786 | inp->inp_route.ro_rt->rt_ifp : NULL); | |
2787 | oifp = inp->inp_last_outifp; | |
2788 | ||
2789 | if (rtifp != NULL) | |
2790 | if_qflush_sc(rtifp, so_tc2msc(optval), flowhash, NULL, NULL, 0); | |
2791 | if (oifp != NULL && oifp != rtifp) | |
2792 | if_qflush_sc(oifp, so_tc2msc(optval), flowhash, NULL, NULL, 0); | |
316670eb A |
2793 | |
2794 | return (0); | |
2795 | } | |
2796 | ||
2797 | /* | |
2798 | * Clear the INP_INADDR_ANY flag (special case for PPP only) | |
2799 | */ | |
39236c6e A |
2800 | void |
2801 | inp_clear_INP_INADDR_ANY(struct socket *so) | |
316670eb A |
2802 | { |
2803 | struct inpcb *inp = NULL; | |
2804 | ||
2805 | socket_lock(so, 1); | |
2806 | inp = sotoinpcb(so); | |
2807 | if (inp) { | |
2808 | inp->inp_flags &= ~INP_INADDR_ANY; | |
2809 | } | |
2810 | socket_unlock(so, 1); | |
2811 | } | |
2812 | ||
39236c6e A |
2813 | void |
2814 | inp_get_soprocinfo(struct inpcb *inp, struct so_procinfo *soprocinfo) | |
2815 | { | |
2816 | struct socket *so = inp->inp_socket; | |
2817 | ||
2818 | soprocinfo->spi_pid = so->last_pid; | |
fe8ab488 A |
2819 | if (so->last_pid != 0) |
2820 | uuid_copy(soprocinfo->spi_uuid, so->last_uuid); | |
39236c6e A |
2821 | /* |
2822 | * When not delegated, the effective pid is the same as the real pid | |
2823 | */ | |
fe8ab488 | 2824 | if (so->so_flags & SOF_DELEGATED) { |
39236c6e | 2825 | soprocinfo->spi_epid = so->e_pid; |
fe8ab488 A |
2826 | if (so->e_pid != 0) |
2827 | uuid_copy(soprocinfo->spi_euuid, so->e_uuid); | |
2828 | } else { | |
39236c6e | 2829 | soprocinfo->spi_epid = so->last_pid; |
fe8ab488 | 2830 | } |
39236c6e A |
2831 | } |
2832 | ||
2833 | int | |
2834 | inp_findinpcb_procinfo(struct inpcbinfo *pcbinfo, uint32_t flowhash, | |
2835 | struct so_procinfo *soprocinfo) | |
2836 | { | |
2837 | struct inpcb *inp = NULL; | |
2838 | int found = 0; | |
2839 | ||
2840 | bzero(soprocinfo, sizeof (struct so_procinfo)); | |
2841 | ||
2842 | if (!flowhash) | |
2843 | return (-1); | |
2844 | ||
2845 | lck_rw_lock_shared(pcbinfo->ipi_lock); | |
2846 | LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) { | |
2847 | if (inp->inp_state != INPCB_STATE_DEAD && | |
2848 | inp->inp_socket != NULL && | |
2849 | inp->inp_flowhash == flowhash) { | |
2850 | found = 1; | |
2851 | inp_get_soprocinfo(inp, soprocinfo); | |
2852 | break; | |
2853 | } | |
2854 | } | |
2855 | lck_rw_done(pcbinfo->ipi_lock); | |
2856 | ||
2857 | return (found); | |
2858 | } | |
2859 | ||
2860 | #if CONFIG_PROC_UUID_POLICY | |
2861 | static void | |
2862 | inp_update_cellular_policy(struct inpcb *inp, boolean_t set) | |
2863 | { | |
2864 | struct socket *so = inp->inp_socket; | |
2865 | int before, after; | |
2866 | ||
2867 | VERIFY(so != NULL); | |
2868 | VERIFY(inp->inp_state != INPCB_STATE_DEAD); | |
2869 | ||
fe8ab488 | 2870 | before = INP_NO_CELLULAR(inp); |
39236c6e A |
2871 | if (set) { |
2872 | inp_set_nocellular(inp); | |
2873 | } else { | |
2874 | inp_clear_nocellular(inp); | |
2875 | } | |
fe8ab488 | 2876 | after = INP_NO_CELLULAR(inp); |
39236c6e A |
2877 | if (net_io_policy_log && (before != after)) { |
2878 | static const char *ok = "OK"; | |
2879 | static const char *nok = "NOACCESS"; | |
2880 | uuid_string_t euuid_buf; | |
2881 | pid_t epid; | |
2882 | ||
2883 | if (so->so_flags & SOF_DELEGATED) { | |
2884 | uuid_unparse(so->e_uuid, euuid_buf); | |
2885 | epid = so->e_pid; | |
2886 | } else { | |
2887 | uuid_unparse(so->last_uuid, euuid_buf); | |
2888 | epid = so->last_pid; | |
2889 | } | |
2890 | ||
2891 | /* allow this socket to generate another notification event */ | |
2892 | so->so_ifdenied_notifies = 0; | |
2893 | ||
2894 | log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d " | |
2895 | "euuid %s%s %s->%s\n", __func__, | |
2896 | (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so), | |
2897 | SOCK_TYPE(so), epid, euuid_buf, | |
2898 | (so->so_flags & SOF_DELEGATED) ? | |
2899 | " [delegated]" : "", | |
2900 | ((before < after) ? ok : nok), | |
2901 | ((before < after) ? nok : ok)); | |
2902 | } | |
2903 | } | |
2904 | ||
fe8ab488 | 2905 | #if NECP |
39236c6e | 2906 | static void |
fe8ab488 | 2907 | inp_update_necp_want_app_policy(struct inpcb *inp, boolean_t set) |
39236c6e A |
2908 | { |
2909 | struct socket *so = inp->inp_socket; | |
2910 | int before, after; | |
2911 | ||
2912 | VERIFY(so != NULL); | |
2913 | VERIFY(inp->inp_state != INPCB_STATE_DEAD); | |
2914 | ||
fe8ab488 | 2915 | before = (inp->inp_flags2 & INP2_WANT_APP_POLICY); |
39236c6e | 2916 | if (set) { |
fe8ab488 | 2917 | inp_set_want_app_policy(inp); |
39236c6e | 2918 | } else { |
fe8ab488 | 2919 | inp_clear_want_app_policy(inp); |
39236c6e | 2920 | } |
fe8ab488 | 2921 | after = (inp->inp_flags2 & INP2_WANT_APP_POLICY); |
39236c6e A |
2922 | if (net_io_policy_log && (before != after)) { |
2923 | static const char *wanted = "WANTED"; | |
2924 | static const char *unwanted = "UNWANTED"; | |
2925 | uuid_string_t euuid_buf; | |
2926 | pid_t epid; | |
2927 | ||
2928 | if (so->so_flags & SOF_DELEGATED) { | |
2929 | uuid_unparse(so->e_uuid, euuid_buf); | |
2930 | epid = so->e_pid; | |
2931 | } else { | |
2932 | uuid_unparse(so->last_uuid, euuid_buf); | |
2933 | epid = so->last_pid; | |
2934 | } | |
2935 | ||
2936 | log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d " | |
2937 | "euuid %s%s %s->%s\n", __func__, | |
2938 | (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so), | |
2939 | SOCK_TYPE(so), epid, euuid_buf, | |
2940 | (so->so_flags & SOF_DELEGATED) ? | |
2941 | " [delegated]" : "", | |
2942 | ((before < after) ? unwanted : wanted), | |
2943 | ((before < after) ? wanted : unwanted)); | |
2944 | } | |
2945 | } | |
fe8ab488 | 2946 | #endif /* NECP */ |
39236c6e A |
2947 | #endif /* !CONFIG_PROC_UUID_POLICY */ |
2948 | ||
fe8ab488 A |
2949 | #if NECP |
2950 | void | |
2951 | inp_update_necp_policy(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int override_bound_interface) | |
2952 | { | |
2953 | necp_socket_find_policy_match(inp, override_local_addr, override_remote_addr, override_bound_interface); | |
2954 | if (necp_socket_should_rescope(inp) && | |
2955 | inp->inp_lport == 0 && | |
2956 | inp->inp_laddr.s_addr == INADDR_ANY && | |
2957 | IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { | |
2958 | // If we should rescope, and the socket is not yet bound | |
2959 | inp_bindif(inp, necp_socket_get_rescope_if_index(inp), NULL); | |
2960 | } | |
2961 | } | |
2962 | #endif /* NECP */ | |
2963 | ||
39236c6e A |
2964 | int |
2965 | inp_update_policy(struct inpcb *inp) | |
2966 | { | |
2967 | #if CONFIG_PROC_UUID_POLICY | |
2968 | struct socket *so = inp->inp_socket; | |
2969 | uint32_t pflags = 0; | |
2970 | int32_t ogencnt; | |
2971 | int err = 0; | |
2972 | ||
2973 | if (!net_io_policy_uuid || | |
2974 | so == NULL || inp->inp_state == INPCB_STATE_DEAD) | |
2975 | return (0); | |
2976 | ||
2977 | /* | |
2978 | * Kernel-created sockets that aren't delegating other sockets | |
2979 | * are currently exempted from UUID policy checks. | |
2980 | */ | |
2981 | if (so->last_pid == 0 && !(so->so_flags & SOF_DELEGATED)) | |
2982 | return (0); | |
2983 | ||
2984 | ogencnt = so->so_policy_gencnt; | |
2985 | err = proc_uuid_policy_lookup(((so->so_flags & SOF_DELEGATED) ? | |
2986 | so->e_uuid : so->last_uuid), &pflags, &so->so_policy_gencnt); | |
2987 | ||
2988 | /* | |
2989 | * Discard cached generation count if the entry is gone (ENOENT), | |
2990 | * so that we go thru the checks below. | |
2991 | */ | |
2992 | if (err == ENOENT && ogencnt != 0) | |
2993 | so->so_policy_gencnt = 0; | |
2994 | ||
2995 | /* | |
2996 | * If the generation count has changed, inspect the policy flags | |
2997 | * and act accordingly. If a policy flag was previously set and | |
2998 | * the UUID is no longer present in the table (ENOENT), treat it | |
2999 | * as if the flag has been cleared. | |
3000 | */ | |
3001 | if ((err == 0 || err == ENOENT) && ogencnt != so->so_policy_gencnt) { | |
3002 | /* update cellular policy for this socket */ | |
3003 | if (err == 0 && (pflags & PROC_UUID_NO_CELLULAR)) { | |
3004 | inp_update_cellular_policy(inp, TRUE); | |
3005 | } else if (!(pflags & PROC_UUID_NO_CELLULAR)) { | |
3006 | inp_update_cellular_policy(inp, FALSE); | |
3007 | } | |
fe8ab488 A |
3008 | #if NECP |
3009 | /* update necp want app policy for this socket */ | |
3010 | if (err == 0 && (pflags & PROC_UUID_NECP_APP_POLICY)) { | |
3011 | inp_update_necp_want_app_policy(inp, TRUE); | |
3012 | } else if (!(pflags & PROC_UUID_NECP_APP_POLICY)) { | |
3013 | inp_update_necp_want_app_policy(inp, FALSE); | |
39236c6e | 3014 | } |
fe8ab488 | 3015 | #endif /* NECP */ |
39236c6e A |
3016 | } |
3017 | ||
3018 | return ((err == ENOENT) ? 0 : err); | |
3019 | #else /* !CONFIG_PROC_UUID_POLICY */ | |
3020 | #pragma unused(inp) | |
3021 | return (0); | |
3022 | #endif /* !CONFIG_PROC_UUID_POLICY */ | |
3023 | } | |
fe8ab488 A |
3024 | /* |
3025 | * Called when we need to enforce policy restrictions in the input path. | |
3026 | * | |
3027 | * Returns TRUE if we're not allowed to receive data, otherwise FALSE. | |
3028 | */ | |
39236c6e | 3029 | boolean_t |
fe8ab488 | 3030 | inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp) |
39236c6e A |
3031 | { |
3032 | VERIFY(inp != NULL); | |
3033 | ||
fe8ab488 A |
3034 | /* |
3035 | * Inbound restrictions. | |
3036 | */ | |
39236c6e A |
3037 | if (!sorestrictrecv) |
3038 | return (FALSE); | |
3039 | ||
fe8ab488 A |
3040 | if (ifp == NULL) |
3041 | return (FALSE); | |
3042 | ||
3043 | if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp)) | |
3044 | return (TRUE); | |
3045 | ||
3046 | if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp)) | |
3047 | return (TRUE); | |
3048 | ||
3049 | if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp)) | |
3050 | return (TRUE); | |
3051 | ||
3052 | if (!(ifp->if_eflags & IFEF_RESTRICTED_RECV)) | |
39236c6e A |
3053 | return (FALSE); |
3054 | ||
3055 | if (inp->inp_flags & INP_RECV_ANYIF) | |
3056 | return (FALSE); | |
3057 | ||
3058 | if ((inp->inp_flags & INP_BOUND_IF) && inp->inp_boundifp == ifp) | |
3059 | return (FALSE); | |
3060 | ||
3061 | return (TRUE); | |
3062 | } | |
fe8ab488 A |
3063 | |
3064 | /* | |
3065 | * Called when we need to enforce policy restrictions in the output path. | |
3066 | * | |
3067 | * Returns TRUE if we're not allowed to send data out, otherwise FALSE. | |
3068 | */ | |
3069 | boolean_t | |
3070 | inp_restricted_send(struct inpcb *inp, struct ifnet *ifp) | |
3071 | { | |
3072 | VERIFY(inp != NULL); | |
3073 | ||
3074 | /* | |
3075 | * Outbound restrictions. | |
3076 | */ | |
3077 | if (!sorestrictsend) | |
3078 | return (FALSE); | |
3079 | ||
3080 | if (ifp == NULL) | |
3081 | return (FALSE); | |
3082 | ||
3083 | if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp)) | |
3084 | return (TRUE); | |
3085 | ||
3086 | if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp)) | |
3087 | return (TRUE); | |
3088 | ||
3089 | if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp)) | |
3090 | return (TRUE); | |
3091 | ||
3092 | return (FALSE); | |
3093 | } |