]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
3e170ce0 | 2 | * Copyright (c) 2000-2015 Apple Inc. All rights reserved. |
5d5c5d0d | 3 | * |
2d21ac55 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
39236c6e | 5 | * |
2d21ac55 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
39236c6e | 14 | * |
2d21ac55 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
39236c6e | 17 | * |
2d21ac55 A |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
8f6c56a5 A |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
2d21ac55 A |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
39236c6e | 25 | * |
2d21ac55 | 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
1c79356b A |
27 | */ |
28 | /* | |
29 | * Copyright (c) 1982, 1986, 1991, 1993, 1995 | |
30 | * The Regents of the University of California. All rights reserved. | |
31 | * | |
32 | * Redistribution and use in source and binary forms, with or without | |
33 | * modification, are permitted provided that the following conditions | |
34 | * are met: | |
35 | * 1. Redistributions of source code must retain the above copyright | |
36 | * notice, this list of conditions and the following disclaimer. | |
37 | * 2. Redistributions in binary form must reproduce the above copyright | |
38 | * notice, this list of conditions and the following disclaimer in the | |
39 | * documentation and/or other materials provided with the distribution. | |
40 | * 3. All advertising materials mentioning features or use of this software | |
41 | * must display the following acknowledgement: | |
42 | * This product includes software developed by the University of | |
43 | * California, Berkeley and its contributors. | |
44 | * 4. Neither the name of the University nor the names of its contributors | |
45 | * may be used to endorse or promote products derived from this software | |
46 | * without specific prior written permission. | |
47 | * | |
48 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
49 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
50 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
51 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
52 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
53 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
54 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
55 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
56 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
57 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
58 | * SUCH DAMAGE. | |
59 | * | |
60 | * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 | |
9bccf70c | 61 | * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $ |
1c79356b A |
62 | */ |
63 | ||
64 | #include <sys/param.h> | |
65 | #include <sys/systm.h> | |
66 | #include <sys/malloc.h> | |
67 | #include <sys/mbuf.h> | |
1c79356b | 68 | #include <sys/domain.h> |
1c79356b A |
69 | #include <sys/protosw.h> |
70 | #include <sys/socket.h> | |
71 | #include <sys/socketvar.h> | |
72 | #include <sys/proc.h> | |
73 | #include <sys/kernel.h> | |
74 | #include <sys/sysctl.h> | |
6d2010ae A |
75 | #include <sys/mcache.h> |
76 | #include <sys/kauth.h> | |
77 | #include <sys/priv.h> | |
39236c6e A |
78 | #include <sys/proc_uuid_policy.h> |
79 | #include <sys/syslog.h> | |
fe8ab488 | 80 | #include <sys/priv.h> |
39236c6e | 81 | |
91447636 | 82 | #include <libkern/OSAtomic.h> |
316670eb | 83 | #include <kern/locks.h> |
1c79356b A |
84 | |
85 | #include <machine/limits.h> | |
86 | ||
1c79356b | 87 | #include <kern/zalloc.h> |
1c79356b A |
88 | |
89 | #include <net/if.h> | |
1c79356b | 90 | #include <net/if_types.h> |
9bccf70c | 91 | #include <net/route.h> |
316670eb A |
92 | #include <net/flowhash.h> |
93 | #include <net/flowadv.h> | |
fe8ab488 | 94 | #include <net/ntstat.h> |
1c79356b A |
95 | |
96 | #include <netinet/in.h> | |
97 | #include <netinet/in_pcb.h> | |
98 | #include <netinet/in_var.h> | |
99 | #include <netinet/ip_var.h> | |
100 | #if INET6 | |
101 | #include <netinet/ip6.h> | |
102 | #include <netinet6/ip6_var.h> | |
103 | #endif /* INET6 */ | |
104 | ||
1c79356b | 105 | #include <sys/kdebug.h> |
b0d623f7 | 106 | #include <sys/random.h> |
39236c6e | 107 | |
316670eb | 108 | #include <dev/random/randomdev.h> |
39236c6e | 109 | #include <mach/boolean.h> |
1c79356b | 110 | |
fe8ab488 A |
111 | #if NECP |
112 | #include <net/necp.h> | |
9bccf70c | 113 | #endif |
1c79356b | 114 | |
39236c6e A |
115 | static lck_grp_t *inpcb_lock_grp; |
116 | static lck_attr_t *inpcb_lock_attr; | |
117 | static lck_grp_attr_t *inpcb_lock_grp_attr; | |
118 | decl_lck_mtx_data(static, inpcb_lock); /* global INPCB lock */ | |
119 | decl_lck_mtx_data(static, inpcb_timeout_lock); | |
120 | ||
121 | static TAILQ_HEAD(, inpcbinfo) inpcb_head = TAILQ_HEAD_INITIALIZER(inpcb_head); | |
122 | ||
123 | static u_int16_t inpcb_timeout_run = 0; /* INPCB timer is scheduled to run */ | |
124 | static boolean_t inpcb_garbage_collecting = FALSE; /* gc timer is scheduled */ | |
125 | static boolean_t inpcb_ticking = FALSE; /* "slow" timer is scheduled */ | |
126 | static boolean_t inpcb_fast_timer_on = FALSE; | |
fe8ab488 A |
127 | |
128 | /* | |
129 | * If the total number of gc reqs is above a threshold, schedule | |
130 | * garbage collect timer sooner | |
131 | */ | |
132 | static boolean_t inpcb_toomany_gcreq = FALSE; | |
133 | ||
134 | #define INPCB_GCREQ_THRESHOLD 50000 | |
135 | #define INPCB_TOOMANY_GCREQ_TIMER (hz/10) /* 10 times a second */ | |
136 | ||
39236c6e A |
137 | static void inpcb_sched_timeout(struct timeval *); |
138 | static void inpcb_timeout(void *); | |
139 | int inpcb_timeout_lazy = 10; /* 10 seconds leeway for lazy timers */ | |
140 | extern int tvtohz(struct timeval *); | |
141 | ||
142 | #if CONFIG_PROC_UUID_POLICY | |
143 | static void inp_update_cellular_policy(struct inpcb *, boolean_t); | |
fe8ab488 A |
144 | #if NECP |
145 | static void inp_update_necp_want_app_policy(struct inpcb *, boolean_t); | |
146 | #endif /* NECP */ | |
39236c6e A |
147 | #endif /* !CONFIG_PROC_UUID_POLICY */ |
148 | ||
39236c6e A |
149 | #define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8)) |
150 | #define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1)) | |
1c79356b | 151 | |
1c79356b A |
152 | /* |
153 | * These configure the range of local port addresses assigned to | |
154 | * "unspecified" outgoing connections/packets/whatever. | |
155 | */ | |
9bccf70c A |
156 | int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */ |
157 | int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */ | |
39236c6e A |
158 | int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ |
159 | int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */ | |
9bccf70c A |
160 | int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ |
161 | int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */ | |
1c79356b | 162 | |
39236c6e | 163 | #define RANGECHK(var, min, max) \ |
1c79356b A |
164 | if ((var) < (min)) { (var) = (min); } \ |
165 | else if ((var) > (max)) { (var) = (max); } | |
166 | ||
1c79356b A |
167 | static int |
168 | sysctl_net_ipport_check SYSCTL_HANDLER_ARGS | |
169 | { | |
2d21ac55 | 170 | #pragma unused(arg1, arg2) |
39236c6e A |
171 | int error; |
172 | ||
173 | error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); | |
1c79356b A |
174 | if (!error) { |
175 | RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); | |
176 | RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1); | |
177 | RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX); | |
178 | RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX); | |
179 | RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX); | |
180 | RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX); | |
181 | } | |
39236c6e | 182 | return (error); |
1c79356b A |
183 | } |
184 | ||
185 | #undef RANGECHK | |
186 | ||
39236c6e A |
187 | SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, |
188 | CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IP Ports"); | |
189 | ||
190 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, | |
191 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, | |
192 | &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", ""); | |
193 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, | |
194 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, | |
195 | &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", ""); | |
196 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, | |
197 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, | |
198 | &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", ""); | |
199 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, | |
200 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, | |
201 | &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", ""); | |
202 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, | |
203 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, | |
204 | &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", ""); | |
205 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, | |
206 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, | |
207 | &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", ""); | |
1c79356b | 208 | |
b0d623f7 A |
209 | extern int udp_use_randomport; |
210 | extern int tcp_use_randomport; | |
211 | ||
316670eb A |
212 | /* Structs used for flowhash computation */ |
213 | struct inp_flowhash_key_addr { | |
214 | union { | |
215 | struct in_addr v4; | |
216 | struct in6_addr v6; | |
217 | u_int8_t addr8[16]; | |
218 | u_int16_t addr16[8]; | |
219 | u_int32_t addr32[4]; | |
220 | } infha; | |
221 | }; | |
222 | ||
223 | struct inp_flowhash_key { | |
39236c6e | 224 | struct inp_flowhash_key_addr infh_laddr; |
316670eb A |
225 | struct inp_flowhash_key_addr infh_faddr; |
226 | u_int32_t infh_lport; | |
227 | u_int32_t infh_fport; | |
228 | u_int32_t infh_af; | |
229 | u_int32_t infh_proto; | |
230 | u_int32_t infh_rand1; | |
231 | u_int32_t infh_rand2; | |
232 | }; | |
233 | ||
39236c6e A |
234 | static u_int32_t inp_hash_seed = 0; |
235 | ||
236 | static int infc_cmp(const struct inpcb *, const struct inpcb *); | |
237 | ||
238 | /* Flags used by inp_fc_getinp */ | |
239 | #define INPFC_SOLOCKED 0x1 | |
240 | #define INPFC_REMOVE 0x2 | |
241 | static struct inpcb *inp_fc_getinp(u_int32_t, u_int32_t); | |
242 | ||
243 | static void inp_fc_feedback(struct inpcb *); | |
244 | extern void tcp_remove_from_time_wait(struct inpcb *inp); | |
316670eb | 245 | |
39236c6e | 246 | decl_lck_mtx_data(static, inp_fc_lck); |
316670eb | 247 | |
bd504ef0 A |
248 | RB_HEAD(inp_fc_tree, inpcb) inp_fc_tree; |
249 | RB_PROTOTYPE(inp_fc_tree, inpcb, infc_link, infc_cmp); | |
250 | RB_GENERATE(inp_fc_tree, inpcb, infc_link, infc_cmp); | |
316670eb | 251 | |
bd504ef0 A |
252 | /* |
253 | * Use this inp as a key to find an inp in the flowhash tree. | |
254 | * Accesses to it are protected by inp_fc_lck. | |
255 | */ | |
256 | struct inpcb key_inp; | |
316670eb | 257 | |
1c79356b A |
258 | /* |
259 | * in_pcb.c: manage the Protocol Control Blocks. | |
1c79356b A |
260 | */ |
261 | ||
316670eb | 262 | void |
39236c6e | 263 | in_pcbinit(void) |
316670eb | 264 | { |
39236c6e | 265 | static int inpcb_initialized = 0; |
316670eb | 266 | |
39236c6e A |
267 | VERIFY(!inpcb_initialized); |
268 | inpcb_initialized = 1; | |
316670eb | 269 | |
39236c6e A |
270 | inpcb_lock_grp_attr = lck_grp_attr_alloc_init(); |
271 | inpcb_lock_grp = lck_grp_alloc_init("inpcb", inpcb_lock_grp_attr); | |
272 | inpcb_lock_attr = lck_attr_alloc_init(); | |
273 | lck_mtx_init(&inpcb_lock, inpcb_lock_grp, inpcb_lock_attr); | |
274 | lck_mtx_init(&inpcb_timeout_lock, inpcb_lock_grp, inpcb_lock_attr); | |
275 | ||
276 | /* | |
277 | * Initialize data structures required to deliver | |
278 | * flow advisories. | |
279 | */ | |
280 | lck_mtx_init(&inp_fc_lck, inpcb_lock_grp, inpcb_lock_attr); | |
bd504ef0 | 281 | lck_mtx_lock(&inp_fc_lck); |
316670eb | 282 | RB_INIT(&inp_fc_tree); |
bd504ef0 A |
283 | bzero(&key_inp, sizeof(key_inp)); |
284 | lck_mtx_unlock(&inp_fc_lck); | |
316670eb A |
285 | } |
286 | ||
39236c6e A |
287 | #define INPCB_HAVE_TIMER_REQ(req) (((req).intimer_lazy > 0) || \ |
288 | ((req).intimer_fast > 0) || ((req).intimer_nodelay > 0)) | |
289 | static void | |
290 | inpcb_timeout(void *arg) | |
291 | { | |
292 | #pragma unused(arg) | |
293 | struct inpcbinfo *ipi; | |
294 | boolean_t t, gc; | |
295 | struct intimercount gccnt, tmcnt; | |
296 | struct timeval leeway; | |
fe8ab488 A |
297 | boolean_t toomany_gc = FALSE; |
298 | ||
299 | if (arg != NULL) { | |
300 | VERIFY(arg == &inpcb_toomany_gcreq); | |
301 | toomany_gc = *(boolean_t *)arg; | |
302 | } | |
39236c6e A |
303 | |
304 | /* | |
305 | * Update coarse-grained networking timestamp (in sec.); the idea | |
306 | * is to piggy-back on the timeout callout to update the counter | |
307 | * returnable via net_uptime(). | |
308 | */ | |
309 | net_update_uptime(); | |
310 | ||
fe8ab488 A |
311 | bzero(&gccnt, sizeof(gccnt)); |
312 | bzero(&tmcnt, sizeof(tmcnt)); | |
313 | ||
39236c6e A |
314 | lck_mtx_lock_spin(&inpcb_timeout_lock); |
315 | gc = inpcb_garbage_collecting; | |
316 | inpcb_garbage_collecting = FALSE; | |
39236c6e A |
317 | |
318 | t = inpcb_ticking; | |
319 | inpcb_ticking = FALSE; | |
320 | ||
321 | if (gc || t) { | |
322 | lck_mtx_unlock(&inpcb_timeout_lock); | |
323 | ||
324 | lck_mtx_lock(&inpcb_lock); | |
325 | TAILQ_FOREACH(ipi, &inpcb_head, ipi_entry) { | |
326 | if (INPCB_HAVE_TIMER_REQ(ipi->ipi_gc_req)) { | |
327 | bzero(&ipi->ipi_gc_req, | |
328 | sizeof(ipi->ipi_gc_req)); | |
329 | if (gc && ipi->ipi_gc != NULL) { | |
330 | ipi->ipi_gc(ipi); | |
331 | gccnt.intimer_lazy += | |
332 | ipi->ipi_gc_req.intimer_lazy; | |
333 | gccnt.intimer_fast += | |
334 | ipi->ipi_gc_req.intimer_fast; | |
335 | gccnt.intimer_nodelay += | |
336 | ipi->ipi_gc_req.intimer_nodelay; | |
337 | } | |
338 | } | |
339 | if (INPCB_HAVE_TIMER_REQ(ipi->ipi_timer_req)) { | |
340 | bzero(&ipi->ipi_timer_req, | |
341 | sizeof(ipi->ipi_timer_req)); | |
342 | if (t && ipi->ipi_timer != NULL) { | |
343 | ipi->ipi_timer(ipi); | |
344 | tmcnt.intimer_lazy += | |
345 | ipi->ipi_timer_req.intimer_lazy; | |
346 | tmcnt.intimer_lazy += | |
347 | ipi->ipi_timer_req.intimer_fast; | |
348 | tmcnt.intimer_nodelay += | |
349 | ipi->ipi_timer_req.intimer_nodelay; | |
350 | } | |
351 | } | |
352 | } | |
353 | lck_mtx_unlock(&inpcb_lock); | |
354 | lck_mtx_lock_spin(&inpcb_timeout_lock); | |
355 | } | |
356 | ||
357 | /* lock was dropped above, so check first before overriding */ | |
358 | if (!inpcb_garbage_collecting) | |
359 | inpcb_garbage_collecting = INPCB_HAVE_TIMER_REQ(gccnt); | |
360 | if (!inpcb_ticking) | |
361 | inpcb_ticking = INPCB_HAVE_TIMER_REQ(tmcnt); | |
362 | ||
363 | /* re-arm the timer if there's work to do */ | |
fe8ab488 A |
364 | if (toomany_gc) { |
365 | inpcb_toomany_gcreq = FALSE; | |
366 | } else { | |
367 | inpcb_timeout_run--; | |
368 | VERIFY(inpcb_timeout_run >= 0 && inpcb_timeout_run < 2); | |
369 | } | |
39236c6e A |
370 | |
371 | bzero(&leeway, sizeof(leeway)); | |
372 | leeway.tv_sec = inpcb_timeout_lazy; | |
373 | if (gccnt.intimer_nodelay > 0 || tmcnt.intimer_nodelay > 0) | |
374 | inpcb_sched_timeout(NULL); | |
375 | else if ((gccnt.intimer_fast + tmcnt.intimer_fast) <= 5) | |
376 | /* be lazy when idle with little activity */ | |
377 | inpcb_sched_timeout(&leeway); | |
378 | else | |
379 | inpcb_sched_timeout(NULL); | |
380 | ||
381 | lck_mtx_unlock(&inpcb_timeout_lock); | |
382 | } | |
383 | ||
384 | static void | |
385 | inpcb_sched_timeout(struct timeval *leeway) | |
386 | { | |
387 | lck_mtx_assert(&inpcb_timeout_lock, LCK_MTX_ASSERT_OWNED); | |
388 | ||
389 | if (inpcb_timeout_run == 0 && | |
390 | (inpcb_garbage_collecting || inpcb_ticking)) { | |
391 | lck_mtx_convert_spin(&inpcb_timeout_lock); | |
392 | inpcb_timeout_run++; | |
393 | if (leeway == NULL) { | |
394 | inpcb_fast_timer_on = TRUE; | |
395 | timeout(inpcb_timeout, NULL, hz); | |
396 | } else { | |
397 | inpcb_fast_timer_on = FALSE; | |
398 | timeout_with_leeway(inpcb_timeout, NULL, hz, | |
399 | tvtohz(leeway)); | |
400 | } | |
401 | } else if (inpcb_timeout_run == 1 && | |
402 | leeway == NULL && !inpcb_fast_timer_on) { | |
403 | /* | |
404 | * Since the request was for a fast timer but the | |
405 | * scheduled timer is a lazy timer, try to schedule | |
406 | * another instance of fast timer also | |
407 | */ | |
408 | lck_mtx_convert_spin(&inpcb_timeout_lock); | |
409 | inpcb_timeout_run++; | |
410 | inpcb_fast_timer_on = TRUE; | |
411 | timeout(inpcb_timeout, NULL, hz); | |
412 | } | |
413 | } | |
414 | ||
415 | void | |
416 | inpcb_gc_sched(struct inpcbinfo *ipi, u_int32_t type) | |
417 | { | |
418 | struct timeval leeway; | |
fe8ab488 | 419 | u_int32_t gccnt; |
39236c6e A |
420 | lck_mtx_lock_spin(&inpcb_timeout_lock); |
421 | inpcb_garbage_collecting = TRUE; | |
fe8ab488 A |
422 | |
423 | gccnt = ipi->ipi_gc_req.intimer_nodelay + | |
424 | ipi->ipi_gc_req.intimer_fast; | |
425 | ||
426 | if (gccnt > INPCB_GCREQ_THRESHOLD && !inpcb_toomany_gcreq) { | |
427 | inpcb_toomany_gcreq = TRUE; | |
428 | ||
429 | /* | |
430 | * There are toomany pcbs waiting to be garbage collected, | |
431 | * schedule a much faster timeout in addition to | |
432 | * the caller's request | |
433 | */ | |
434 | lck_mtx_convert_spin(&inpcb_timeout_lock); | |
435 | timeout(inpcb_timeout, (void *)&inpcb_toomany_gcreq, | |
436 | INPCB_TOOMANY_GCREQ_TIMER); | |
437 | } | |
438 | ||
39236c6e A |
439 | switch (type) { |
440 | case INPCB_TIMER_NODELAY: | |
441 | atomic_add_32(&ipi->ipi_gc_req.intimer_nodelay, 1); | |
442 | inpcb_sched_timeout(NULL); | |
443 | break; | |
444 | case INPCB_TIMER_FAST: | |
445 | atomic_add_32(&ipi->ipi_gc_req.intimer_fast, 1); | |
446 | inpcb_sched_timeout(NULL); | |
447 | break; | |
448 | default: | |
449 | atomic_add_32(&ipi->ipi_gc_req.intimer_lazy, 1); | |
450 | leeway.tv_sec = inpcb_timeout_lazy; | |
451 | leeway.tv_usec = 0; | |
452 | inpcb_sched_timeout(&leeway); | |
453 | break; | |
454 | } | |
455 | lck_mtx_unlock(&inpcb_timeout_lock); | |
456 | } | |
457 | ||
458 | void | |
459 | inpcb_timer_sched(struct inpcbinfo *ipi, u_int32_t type) | |
460 | { | |
461 | struct timeval leeway; | |
462 | lck_mtx_lock_spin(&inpcb_timeout_lock); | |
463 | inpcb_ticking = TRUE; | |
464 | switch (type) { | |
465 | case INPCB_TIMER_NODELAY: | |
466 | atomic_add_32(&ipi->ipi_timer_req.intimer_nodelay, 1); | |
467 | inpcb_sched_timeout(NULL); | |
468 | break; | |
469 | case INPCB_TIMER_FAST: | |
470 | atomic_add_32(&ipi->ipi_timer_req.intimer_fast, 1); | |
471 | inpcb_sched_timeout(NULL); | |
472 | break; | |
473 | default: | |
474 | atomic_add_32(&ipi->ipi_timer_req.intimer_lazy, 1); | |
475 | leeway.tv_sec = inpcb_timeout_lazy; | |
476 | leeway.tv_usec = 0; | |
477 | inpcb_sched_timeout(&leeway); | |
478 | break; | |
479 | } | |
480 | lck_mtx_unlock(&inpcb_timeout_lock); | |
481 | } | |
482 | ||
483 | void | |
484 | in_pcbinfo_attach(struct inpcbinfo *ipi) | |
485 | { | |
486 | struct inpcbinfo *ipi0; | |
487 | ||
488 | lck_mtx_lock(&inpcb_lock); | |
489 | TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) { | |
490 | if (ipi0 == ipi) { | |
491 | panic("%s: ipi %p already in the list\n", | |
492 | __func__, ipi); | |
493 | /* NOTREACHED */ | |
494 | } | |
495 | } | |
496 | TAILQ_INSERT_TAIL(&inpcb_head, ipi, ipi_entry); | |
497 | lck_mtx_unlock(&inpcb_lock); | |
498 | } | |
499 | ||
500 | int | |
501 | in_pcbinfo_detach(struct inpcbinfo *ipi) | |
502 | { | |
503 | struct inpcbinfo *ipi0; | |
504 | int error = 0; | |
505 | ||
506 | lck_mtx_lock(&inpcb_lock); | |
507 | TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) { | |
508 | if (ipi0 == ipi) | |
509 | break; | |
510 | } | |
511 | if (ipi0 != NULL) | |
512 | TAILQ_REMOVE(&inpcb_head, ipi0, ipi_entry); | |
513 | else | |
514 | error = ENXIO; | |
515 | lck_mtx_unlock(&inpcb_lock); | |
516 | ||
517 | return (error); | |
518 | } | |
519 | ||
1c79356b A |
520 | /* |
521 | * Allocate a PCB and associate it with the socket. | |
2d21ac55 A |
522 | * |
523 | * Returns: 0 Success | |
524 | * ENOBUFS | |
525 | * ENOMEM | |
1c79356b A |
526 | */ |
527 | int | |
39236c6e | 528 | in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p) |
1c79356b | 529 | { |
39236c6e | 530 | #pragma unused(p) |
2d21ac55 | 531 | struct inpcb *inp; |
39236c6e | 532 | caddr_t temp; |
2d21ac55 A |
533 | #if CONFIG_MACF_NET |
534 | int mac_error; | |
39236c6e | 535 | #endif /* CONFIG_MACF_NET */ |
1c79356b | 536 | |
3e170ce0 | 537 | if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) { |
39236c6e A |
538 | inp = (struct inpcb *)zalloc(pcbinfo->ipi_zone); |
539 | if (inp == NULL) | |
540 | return (ENOBUFS); | |
541 | bzero((caddr_t)inp, sizeof (*inp)); | |
542 | } else { | |
543 | inp = (struct inpcb *)(void *)so->so_saved_pcb; | |
544 | temp = inp->inp_saved_ppcb; | |
545 | bzero((caddr_t)inp, sizeof (*inp)); | |
546 | inp->inp_saved_ppcb = temp; | |
1c79356b A |
547 | } |
548 | ||
549 | inp->inp_gencnt = ++pcbinfo->ipi_gencnt; | |
550 | inp->inp_pcbinfo = pcbinfo; | |
551 | inp->inp_socket = so; | |
2d21ac55 A |
552 | #if CONFIG_MACF_NET |
553 | mac_error = mac_inpcb_label_init(inp, M_WAITOK); | |
554 | if (mac_error != 0) { | |
3e170ce0 | 555 | if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) |
2d21ac55 A |
556 | zfree(pcbinfo->ipi_zone, inp); |
557 | return (mac_error); | |
558 | } | |
559 | mac_inpcb_label_associate(so, inp); | |
39236c6e A |
560 | #endif /* CONFIG_MACF_NET */ |
561 | /* make sure inp_stat is always 64-bit aligned */ | |
562 | inp->inp_stat = (struct inp_stat *)P2ROUNDUP(inp->inp_stat_store, | |
563 | sizeof (u_int64_t)); | |
564 | if (((uintptr_t)inp->inp_stat - (uintptr_t)inp->inp_stat_store) + | |
565 | sizeof (*inp->inp_stat) > sizeof (inp->inp_stat_store)) { | |
566 | panic("%s: insufficient space to align inp_stat", __func__); | |
567 | /* NOTREACHED */ | |
568 | } | |
569 | ||
570 | /* make sure inp_cstat is always 64-bit aligned */ | |
571 | inp->inp_cstat = (struct inp_stat *)P2ROUNDUP(inp->inp_cstat_store, | |
572 | sizeof (u_int64_t)); | |
573 | if (((uintptr_t)inp->inp_cstat - (uintptr_t)inp->inp_cstat_store) + | |
574 | sizeof (*inp->inp_cstat) > sizeof (inp->inp_cstat_store)) { | |
575 | panic("%s: insufficient space to align inp_cstat", __func__); | |
576 | /* NOTREACHED */ | |
577 | } | |
578 | ||
579 | /* make sure inp_wstat is always 64-bit aligned */ | |
580 | inp->inp_wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_wstat_store, | |
581 | sizeof (u_int64_t)); | |
582 | if (((uintptr_t)inp->inp_wstat - (uintptr_t)inp->inp_wstat_store) + | |
583 | sizeof (*inp->inp_wstat) > sizeof (inp->inp_wstat_store)) { | |
584 | panic("%s: insufficient space to align inp_wstat", __func__); | |
585 | /* NOTREACHED */ | |
6d2010ae A |
586 | } |
587 | ||
fe8ab488 A |
588 | /* make sure inp_Wstat is always 64-bit aligned */ |
589 | inp->inp_Wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_Wstat_store, | |
590 | sizeof (u_int64_t)); | |
591 | if (((uintptr_t)inp->inp_Wstat - (uintptr_t)inp->inp_Wstat_store) + | |
592 | sizeof (*inp->inp_Wstat) > sizeof (inp->inp_Wstat_store)) { | |
593 | panic("%s: insufficient space to align inp_Wstat", __func__); | |
594 | /* NOTREACHED */ | |
595 | } | |
596 | ||
91447636 A |
597 | so->so_pcb = (caddr_t)inp; |
598 | ||
599 | if (so->so_proto->pr_flags & PR_PCBLOCK) { | |
39236c6e A |
600 | lck_mtx_init(&inp->inpcb_mtx, pcbinfo->ipi_lock_grp, |
601 | pcbinfo->ipi_lock_attr); | |
91447636 A |
602 | } |
603 | ||
2d21ac55 | 604 | #if INET6 |
39236c6e | 605 | if (SOCK_DOM(so) == PF_INET6 && !ip6_mapped_addr_on) |
9bccf70c | 606 | inp->inp_flags |= IN6P_IPV6_V6ONLY; |
39236c6e | 607 | |
9bccf70c A |
608 | if (ip6_auto_flowlabel) |
609 | inp->inp_flags |= IN6P_AUTOFLOWLABEL; | |
39236c6e A |
610 | #endif /* INET6 */ |
611 | ||
612 | (void) inp_update_policy(inp); | |
613 | ||
614 | lck_rw_lock_exclusive(pcbinfo->ipi_lock); | |
91447636 | 615 | inp->inp_gencnt = ++pcbinfo->ipi_gencnt; |
39236c6e | 616 | LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list); |
91447636 | 617 | pcbinfo->ipi_count++; |
39236c6e | 618 | lck_rw_done(pcbinfo->ipi_lock); |
1c79356b A |
619 | return (0); |
620 | } | |
621 | ||
2d21ac55 | 622 | /* |
39236c6e A |
623 | * in_pcblookup_local_and_cleanup does everything |
624 | * in_pcblookup_local does but it checks for a socket | |
625 | * that's going away. Since we know that the lock is | |
626 | * held read+write when this funciton is called, we | |
627 | * can safely dispose of this socket like the slow | |
628 | * timer would usually do and return NULL. This is | |
629 | * great for bind. | |
630 | */ | |
631 | struct inpcb * | |
632 | in_pcblookup_local_and_cleanup(struct inpcbinfo *pcbinfo, struct in_addr laddr, | |
633 | u_int lport_arg, int wild_okay) | |
2d21ac55 A |
634 | { |
635 | struct inpcb *inp; | |
39236c6e | 636 | |
2d21ac55 A |
637 | /* Perform normal lookup */ |
638 | inp = in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay); | |
39236c6e | 639 | |
2d21ac55 | 640 | /* Check if we found a match but it's waiting to be disposed */ |
39236c6e | 641 | if (inp != NULL && inp->inp_wantcnt == WNT_STOPUSING) { |
2d21ac55 | 642 | struct socket *so = inp->inp_socket; |
39236c6e | 643 | |
6d2010ae | 644 | lck_mtx_lock(&inp->inpcb_mtx); |
39236c6e | 645 | |
2d21ac55 | 646 | if (so->so_usecount == 0) { |
b0d623f7 A |
647 | if (inp->inp_state != INPCB_STATE_DEAD) |
648 | in_pcbdetach(inp); | |
39236c6e | 649 | in_pcbdispose(inp); /* will unlock & destroy */ |
2d21ac55 | 650 | inp = NULL; |
39236c6e | 651 | } else { |
6d2010ae | 652 | lck_mtx_unlock(&inp->inpcb_mtx); |
2d21ac55 A |
653 | } |
654 | } | |
39236c6e A |
655 | |
656 | return (inp); | |
2d21ac55 A |
657 | } |
658 | ||
c910b4d9 | 659 | static void |
2d21ac55 A |
660 | in_pcb_conflict_post_msg(u_int16_t port) |
661 | { | |
39236c6e A |
662 | /* |
663 | * Radar 5523020 send a kernel event notification if a | |
664 | * non-participating socket tries to bind the port a socket | |
665 | * who has set SOF_NOTIFYCONFLICT owns. | |
2d21ac55 | 666 | */ |
39236c6e | 667 | struct kev_msg ev_msg; |
2d21ac55 A |
668 | struct kev_in_portinuse in_portinuse; |
669 | ||
39236c6e A |
670 | bzero(&in_portinuse, sizeof (struct kev_in_portinuse)); |
671 | bzero(&ev_msg, sizeof (struct kev_msg)); | |
2d21ac55 A |
672 | in_portinuse.port = ntohs(port); /* port in host order */ |
673 | in_portinuse.req_pid = proc_selfpid(); | |
674 | ev_msg.vendor_code = KEV_VENDOR_APPLE; | |
675 | ev_msg.kev_class = KEV_NETWORK_CLASS; | |
676 | ev_msg.kev_subclass = KEV_INET_SUBCLASS; | |
677 | ev_msg.event_code = KEV_INET_PORTINUSE; | |
678 | ev_msg.dv[0].data_ptr = &in_portinuse; | |
39236c6e | 679 | ev_msg.dv[0].data_length = sizeof (struct kev_in_portinuse); |
2d21ac55 A |
680 | ev_msg.dv[1].data_length = 0; |
681 | kev_post_msg(&ev_msg); | |
682 | } | |
39236c6e | 683 | |
2d21ac55 | 684 | /* |
39236c6e A |
685 | * Bind an INPCB to an address and/or port. This routine should not alter |
686 | * the caller-supplied local address "nam". | |
687 | * | |
2d21ac55 A |
688 | * Returns: 0 Success |
689 | * EADDRNOTAVAIL Address not available. | |
690 | * EINVAL Invalid argument | |
691 | * EAFNOSUPPORT Address family not supported [notdef] | |
692 | * EACCES Permission denied | |
693 | * EADDRINUSE Address in use | |
694 | * EAGAIN Resource unavailable, try again | |
6d2010ae | 695 | * priv_check_cred:EPERM Operation not permitted |
2d21ac55 | 696 | */ |
1c79356b | 697 | int |
2d21ac55 | 698 | in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) |
1c79356b | 699 | { |
2d21ac55 | 700 | struct socket *so = inp->inp_socket; |
9bccf70c | 701 | unsigned short *lastport; |
1c79356b | 702 | struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; |
b0d623f7 | 703 | u_short lport = 0, rand_port = 0; |
1c79356b | 704 | int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); |
b0d623f7 | 705 | int error, randomport, conflict = 0; |
fe8ab488 | 706 | boolean_t anonport = FALSE; |
6d2010ae | 707 | kauth_cred_t cred; |
fe8ab488 A |
708 | struct in_addr laddr; |
709 | struct ifnet *outif = NULL; | |
1c79356b A |
710 | |
711 | if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */ | |
712 | return (EADDRNOTAVAIL); | |
39236c6e | 713 | if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) |
1c79356b | 714 | return (EINVAL); |
39236c6e | 715 | if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT))) |
1c79356b | 716 | wild = 1; |
fe8ab488 A |
717 | |
718 | bzero(&laddr, sizeof(laddr)); | |
719 | ||
4bd07ac2 A |
720 | socket_unlock(so, 0); /* keep reference on socket */ |
721 | lck_rw_lock_exclusive(pcbinfo->ipi_lock); | |
722 | ||
39236c6e | 723 | if (nam != NULL) { |
6d2010ae | 724 | |
39236c6e A |
725 | if (nam->sa_len != sizeof (struct sockaddr_in)) { |
726 | lck_rw_done(pcbinfo->ipi_lock); | |
91447636 | 727 | socket_lock(so, 0); |
1c79356b | 728 | return (EINVAL); |
91447636 | 729 | } |
39236c6e | 730 | #if 0 |
1c79356b A |
731 | /* |
732 | * We should check the family, but old programs | |
733 | * incorrectly fail to initialize it. | |
734 | */ | |
39236c6e A |
735 | if (nam->sa_family != AF_INET) { |
736 | lck_rw_done(pcbinfo->ipi_lock); | |
91447636 | 737 | socket_lock(so, 0); |
1c79356b | 738 | return (EAFNOSUPPORT); |
91447636 | 739 | } |
39236c6e A |
740 | #endif /* 0 */ |
741 | lport = SIN(nam)->sin_port; | |
742 | ||
743 | if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr))) { | |
1c79356b A |
744 | /* |
745 | * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; | |
746 | * allow complete duplication of binding if | |
747 | * SO_REUSEPORT is set, or if SO_REUSEADDR is set | |
748 | * and a multicast address is bound on both | |
749 | * new and duplicated sockets. | |
750 | */ | |
751 | if (so->so_options & SO_REUSEADDR) | |
752 | reuseport = SO_REUSEADDR|SO_REUSEPORT; | |
39236c6e A |
753 | } else if (SIN(nam)->sin_addr.s_addr != INADDR_ANY) { |
754 | struct sockaddr_in sin; | |
91447636 | 755 | struct ifaddr *ifa; |
39236c6e A |
756 | |
757 | /* Sanitized for interface address searches */ | |
758 | bzero(&sin, sizeof (sin)); | |
759 | sin.sin_family = AF_INET; | |
760 | sin.sin_len = sizeof (struct sockaddr_in); | |
761 | sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr; | |
762 | ||
763 | ifa = ifa_ifwithaddr(SA(&sin)); | |
764 | if (ifa == NULL) { | |
765 | lck_rw_done(pcbinfo->ipi_lock); | |
91447636 | 766 | socket_lock(so, 0); |
1c79356b | 767 | return (EADDRNOTAVAIL); |
39236c6e A |
768 | } else { |
769 | /* | |
770 | * Opportunistically determine the outbound | |
771 | * interface that may be used; this may not | |
772 | * hold true if we end up using a route | |
773 | * going over a different interface, e.g. | |
774 | * when sending to a local address. This | |
775 | * will get updated again after sending. | |
776 | */ | |
6d2010ae | 777 | IFA_LOCK(ifa); |
316670eb | 778 | outif = ifa->ifa_ifp; |
6d2010ae A |
779 | IFA_UNLOCK(ifa); |
780 | IFA_REMREF(ifa); | |
91447636 | 781 | } |
1c79356b | 782 | } |
39236c6e | 783 | if (lport != 0) { |
1c79356b | 784 | struct inpcb *t; |
39236c6e | 785 | uid_t u; |
1c79356b | 786 | |
6d2010ae A |
787 | if (ntohs(lport) < IPPORT_RESERVED) { |
788 | cred = kauth_cred_proc_ref(p); | |
39236c6e A |
789 | error = priv_check_cred(cred, |
790 | PRIV_NETINET_RESERVEDPORT, 0); | |
6d2010ae A |
791 | kauth_cred_unref(&cred); |
792 | if (error != 0) { | |
39236c6e | 793 | lck_rw_done(pcbinfo->ipi_lock); |
6d2010ae A |
794 | socket_lock(so, 0); |
795 | return (EACCES); | |
796 | } | |
91447636 | 797 | } |
39236c6e A |
798 | if (!IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) && |
799 | (u = kauth_cred_getuid(so->so_cred)) != 0 && | |
800 | (t = in_pcblookup_local_and_cleanup( | |
801 | inp->inp_pcbinfo, SIN(nam)->sin_addr, lport, | |
802 | INPLOOKUP_WILDCARD)) != NULL && | |
803 | (SIN(nam)->sin_addr.s_addr != INADDR_ANY || | |
804 | t->inp_laddr.s_addr != INADDR_ANY || | |
805 | !(t->inp_socket->so_options & SO_REUSEPORT)) && | |
806 | (u != kauth_cred_getuid(t->inp_socket->so_cred)) && | |
807 | !(t->inp_socket->so_flags & SOF_REUSESHAREUID) && | |
808 | (SIN(nam)->sin_addr.s_addr != INADDR_ANY || | |
809 | t->inp_laddr.s_addr != INADDR_ANY)) { | |
810 | if ((t->inp_socket->so_flags & | |
811 | SOF_NOTIFYCONFLICT) && | |
812 | !(so->so_flags & SOF_NOTIFYCONFLICT)) | |
813 | conflict = 1; | |
814 | ||
815 | lck_rw_done(pcbinfo->ipi_lock); | |
816 | ||
817 | if (conflict) | |
818 | in_pcb_conflict_post_msg(lport); | |
2d21ac55 | 819 | |
39236c6e A |
820 | socket_lock(so, 0); |
821 | return (EADDRINUSE); | |
1c79356b | 822 | } |
39236c6e A |
823 | t = in_pcblookup_local_and_cleanup(pcbinfo, |
824 | SIN(nam)->sin_addr, lport, wild); | |
825 | if (t != NULL && | |
1c79356b A |
826 | (reuseport & t->inp_socket->so_options) == 0) { |
827 | #if INET6 | |
39236c6e A |
828 | if (SIN(nam)->sin_addr.s_addr != INADDR_ANY || |
829 | t->inp_laddr.s_addr != INADDR_ANY || | |
830 | SOCK_DOM(so) != PF_INET6 || | |
831 | SOCK_DOM(t->inp_socket) != PF_INET6) | |
2d21ac55 A |
832 | #endif /* INET6 */ |
833 | { | |
2d21ac55 | 834 | |
39236c6e A |
835 | if ((t->inp_socket->so_flags & |
836 | SOF_NOTIFYCONFLICT) && | |
837 | !(so->so_flags & SOF_NOTIFYCONFLICT)) | |
2d21ac55 A |
838 | conflict = 1; |
839 | ||
39236c6e | 840 | lck_rw_done(pcbinfo->ipi_lock); |
2d21ac55 A |
841 | |
842 | if (conflict) | |
843 | in_pcb_conflict_post_msg(lport); | |
91447636 A |
844 | socket_lock(so, 0); |
845 | return (EADDRINUSE); | |
846 | } | |
1c79356b A |
847 | } |
848 | } | |
fe8ab488 | 849 | laddr = SIN(nam)->sin_addr; |
1c79356b A |
850 | } |
851 | if (lport == 0) { | |
852 | u_short first, last; | |
853 | int count; | |
854 | ||
39236c6e A |
855 | randomport = (so->so_flags & SOF_BINDRANDOMPORT) || |
856 | (so->so_type == SOCK_STREAM ? tcp_use_randomport : | |
857 | udp_use_randomport); | |
858 | ||
859 | /* | |
fe8ab488 A |
860 | * Even though this looks similar to the code in |
861 | * in6_pcbsetport, the v6 vs v4 checks are different. | |
39236c6e | 862 | */ |
fe8ab488 | 863 | anonport = TRUE; |
1c79356b A |
864 | if (inp->inp_flags & INP_HIGHPORT) { |
865 | first = ipport_hifirstauto; /* sysctl */ | |
866 | last = ipport_hilastauto; | |
39236c6e | 867 | lastport = &pcbinfo->ipi_lasthi; |
1c79356b | 868 | } else if (inp->inp_flags & INP_LOWPORT) { |
6d2010ae | 869 | cred = kauth_cred_proc_ref(p); |
39236c6e A |
870 | error = priv_check_cred(cred, |
871 | PRIV_NETINET_RESERVEDPORT, 0); | |
6d2010ae A |
872 | kauth_cred_unref(&cred); |
873 | if (error != 0) { | |
39236c6e | 874 | lck_rw_done(pcbinfo->ipi_lock); |
91447636 | 875 | socket_lock(so, 0); |
39236c6e | 876 | return (error); |
91447636 | 877 | } |
1c79356b A |
878 | first = ipport_lowfirstauto; /* 1023 */ |
879 | last = ipport_lowlastauto; /* 600 */ | |
39236c6e | 880 | lastport = &pcbinfo->ipi_lastlow; |
1c79356b A |
881 | } else { |
882 | first = ipport_firstauto; /* sysctl */ | |
883 | last = ipport_lastauto; | |
39236c6e | 884 | lastport = &pcbinfo->ipi_lastport; |
1c79356b | 885 | } |
b0d623f7 A |
886 | /* No point in randomizing if only one port is available */ |
887 | ||
888 | if (first == last) | |
39236c6e | 889 | randomport = 0; |
1c79356b A |
890 | /* |
891 | * Simple check to ensure all ports are not used up causing | |
892 | * a deadlock here. | |
893 | * | |
894 | * We split the two cases (up and down) so that the direction | |
895 | * is not being tested on each round of the loop. | |
896 | */ | |
897 | if (first > last) { | |
898 | /* | |
899 | * counting down | |
900 | */ | |
b0d623f7 | 901 | if (randomport) { |
39236c6e A |
902 | read_random(&rand_port, sizeof (rand_port)); |
903 | *lastport = | |
904 | first - (rand_port % (first - last)); | |
b0d623f7 | 905 | } |
1c79356b A |
906 | count = first - last; |
907 | ||
908 | do { | |
909 | if (count-- < 0) { /* completely used? */ | |
39236c6e | 910 | lck_rw_done(pcbinfo->ipi_lock); |
91447636 | 911 | socket_lock(so, 0); |
9bccf70c | 912 | return (EADDRNOTAVAIL); |
1c79356b A |
913 | } |
914 | --*lastport; | |
915 | if (*lastport > first || *lastport < last) | |
916 | *lastport = first; | |
917 | lport = htons(*lastport); | |
2d21ac55 | 918 | } while (in_pcblookup_local_and_cleanup(pcbinfo, |
fe8ab488 A |
919 | ((laddr.s_addr != INADDR_ANY) ? laddr : |
920 | inp->inp_laddr), lport, wild)); | |
1c79356b A |
921 | } else { |
922 | /* | |
923 | * counting up | |
924 | */ | |
b0d623f7 | 925 | if (randomport) { |
39236c6e A |
926 | read_random(&rand_port, sizeof (rand_port)); |
927 | *lastport = | |
928 | first + (rand_port % (first - last)); | |
b0d623f7 | 929 | } |
1c79356b A |
930 | count = last - first; |
931 | ||
932 | do { | |
933 | if (count-- < 0) { /* completely used? */ | |
39236c6e | 934 | lck_rw_done(pcbinfo->ipi_lock); |
91447636 | 935 | socket_lock(so, 0); |
9bccf70c | 936 | return (EADDRNOTAVAIL); |
1c79356b A |
937 | } |
938 | ++*lastport; | |
939 | if (*lastport < first || *lastport > last) | |
940 | *lastport = first; | |
941 | lport = htons(*lastport); | |
2d21ac55 | 942 | } while (in_pcblookup_local_and_cleanup(pcbinfo, |
fe8ab488 A |
943 | ((laddr.s_addr != INADDR_ANY) ? laddr : |
944 | inp->inp_laddr), lport, wild)); | |
1c79356b A |
945 | } |
946 | } | |
91447636 | 947 | socket_lock(so, 0); |
4bd07ac2 A |
948 | |
949 | /* | |
950 | * We unlocked socket's protocol lock for a long time. | |
951 | * The socket might have been dropped/defuncted. | |
952 | * Checking if world has changed since. | |
953 | */ | |
954 | if (inp->inp_state == INPCB_STATE_DEAD) { | |
955 | lck_rw_done(pcbinfo->ipi_lock); | |
956 | return (ECONNABORTED); | |
957 | } | |
958 | ||
fe8ab488 A |
959 | if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) { |
960 | lck_rw_done(pcbinfo->ipi_lock); | |
961 | return (EINVAL); | |
962 | } | |
963 | ||
964 | if (laddr.s_addr != INADDR_ANY) { | |
965 | inp->inp_laddr = laddr; | |
966 | inp->inp_last_outifp = outif; | |
967 | } | |
1c79356b | 968 | inp->inp_lport = lport; |
fe8ab488 A |
969 | if (anonport) |
970 | inp->inp_flags |= INP_ANONPORT; | |
971 | ||
91447636 | 972 | if (in_pcbinshash(inp, 1) != 0) { |
1c79356b | 973 | inp->inp_laddr.s_addr = INADDR_ANY; |
316670eb | 974 | inp->inp_last_outifp = NULL; |
fe8ab488 A |
975 | |
976 | inp->inp_lport = 0; | |
977 | if (anonport) | |
978 | inp->inp_flags &= ~INP_ANONPORT; | |
39236c6e | 979 | lck_rw_done(pcbinfo->ipi_lock); |
1c79356b A |
980 | return (EAGAIN); |
981 | } | |
39236c6e | 982 | lck_rw_done(pcbinfo->ipi_lock); |
2d21ac55 | 983 | sflt_notify(so, sock_evt_bound, NULL); |
1c79356b A |
984 | return (0); |
985 | } | |
986 | ||
987 | /* | |
39236c6e A |
988 | * Transform old in_pcbconnect() into an inner subroutine for new |
989 | * in_pcbconnect(); do some validity-checking on the remote address | |
990 | * (in "nam") and then determine local host address (i.e., which | |
991 | * interface) to use to access that remote host. | |
992 | * | |
993 | * This routine may alter the caller-supplied remote address "nam". | |
1c79356b | 994 | * |
39236c6e A |
995 | * The caller may override the bound-to-interface setting of the socket |
996 | * by specifying the ifscope parameter (e.g. from IP_PKTINFO.) | |
997 | * | |
998 | * This routine might return an ifp with a reference held if the caller | |
999 | * provides a non-NULL outif, even in the error case. The caller is | |
1000 | * responsible for releasing its reference. | |
2d21ac55 A |
1001 | * |
1002 | * Returns: 0 Success | |
1003 | * EINVAL Invalid argument | |
1004 | * EAFNOSUPPORT Address family not supported | |
1005 | * EADDRNOTAVAIL Address not available | |
1c79356b | 1006 | */ |
1c79356b | 1007 | int |
39236c6e A |
1008 | in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr, |
1009 | unsigned int ifscope, struct ifnet **outif) | |
1c79356b | 1010 | { |
39236c6e A |
1011 | struct route *ro = &inp->inp_route; |
1012 | struct in_ifaddr *ia = NULL; | |
1013 | struct sockaddr_in sin; | |
1014 | int error = 0; | |
fe8ab488 | 1015 | boolean_t restricted = FALSE; |
39236c6e A |
1016 | |
1017 | if (outif != NULL) | |
1018 | *outif = NULL; | |
1019 | if (nam->sa_len != sizeof (struct sockaddr_in)) | |
1c79356b | 1020 | return (EINVAL); |
39236c6e | 1021 | if (SIN(nam)->sin_family != AF_INET) |
1c79356b | 1022 | return (EAFNOSUPPORT); |
39236c6e | 1023 | if (SIN(nam)->sin_port == 0) |
1c79356b | 1024 | return (EADDRNOTAVAIL); |
b0d623f7 | 1025 | |
39236c6e A |
1026 | /* |
1027 | * If the destination address is INADDR_ANY, | |
1028 | * use the primary local address. | |
1029 | * If the supplied address is INADDR_BROADCAST, | |
1030 | * and the primary interface supports broadcast, | |
1031 | * choose the broadcast address for that interface. | |
1032 | */ | |
1033 | if (SIN(nam)->sin_addr.s_addr == INADDR_ANY || | |
1034 | SIN(nam)->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST) { | |
1035 | lck_rw_lock_shared(in_ifaddr_rwlock); | |
1036 | if (!TAILQ_EMPTY(&in_ifaddrhead)) { | |
1037 | ia = TAILQ_FIRST(&in_ifaddrhead); | |
1038 | IFA_LOCK_SPIN(&ia->ia_ifa); | |
1039 | if (SIN(nam)->sin_addr.s_addr == INADDR_ANY) { | |
1040 | SIN(nam)->sin_addr = IA_SIN(ia)->sin_addr; | |
1041 | } else if (ia->ia_ifp->if_flags & IFF_BROADCAST) { | |
1042 | SIN(nam)->sin_addr = | |
1043 | SIN(&ia->ia_broadaddr)->sin_addr; | |
1044 | } | |
1045 | IFA_UNLOCK(&ia->ia_ifa); | |
1046 | ia = NULL; | |
1047 | } | |
1048 | lck_rw_done(in_ifaddr_rwlock); | |
1049 | } | |
1050 | /* | |
1051 | * Otherwise, if the socket has already bound the source, just use it. | |
1052 | */ | |
1053 | if (inp->inp_laddr.s_addr != INADDR_ANY) { | |
1054 | VERIFY(ia == NULL); | |
1055 | *laddr = inp->inp_laddr; | |
1056 | return (0); | |
1c79356b | 1057 | } |
6d2010ae | 1058 | |
39236c6e A |
1059 | /* |
1060 | * If the ifscope is specified by the caller (e.g. IP_PKTINFO) | |
1061 | * then it overrides the sticky ifscope set for the socket. | |
1062 | */ | |
1063 | if (ifscope == IFSCOPE_NONE && (inp->inp_flags & INP_BOUND_IF)) | |
1064 | ifscope = inp->inp_boundifp->if_index; | |
6d2010ae | 1065 | |
39236c6e A |
1066 | /* |
1067 | * If route is known or can be allocated now, | |
1068 | * our src addr is taken from the i/f, else punt. | |
1069 | * Note that we should check the address family of the cached | |
1070 | * destination, in case of sharing the cache with IPv6. | |
1071 | */ | |
1072 | if (ro->ro_rt != NULL) | |
1073 | RT_LOCK_SPIN(ro->ro_rt); | |
1074 | if (ROUTE_UNUSABLE(ro) || ro->ro_dst.sa_family != AF_INET || | |
1075 | SIN(&ro->ro_dst)->sin_addr.s_addr != SIN(nam)->sin_addr.s_addr || | |
1076 | (inp->inp_socket->so_options & SO_DONTROUTE)) { | |
b0d623f7 | 1077 | if (ro->ro_rt != NULL) |
b0d623f7 | 1078 | RT_UNLOCK(ro->ro_rt); |
39236c6e A |
1079 | ROUTE_RELEASE(ro); |
1080 | } | |
1081 | if (!(inp->inp_socket->so_options & SO_DONTROUTE) && | |
1082 | (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL)) { | |
1083 | if (ro->ro_rt != NULL) | |
1084 | RT_UNLOCK(ro->ro_rt); | |
1085 | ROUTE_RELEASE(ro); | |
1086 | /* No route yet, so try to acquire one */ | |
1087 | bzero(&ro->ro_dst, sizeof (struct sockaddr_in)); | |
1088 | ro->ro_dst.sa_family = AF_INET; | |
1089 | ro->ro_dst.sa_len = sizeof (struct sockaddr_in); | |
1090 | SIN(&ro->ro_dst)->sin_addr = SIN(nam)->sin_addr; | |
1091 | rtalloc_scoped(ro, ifscope); | |
1092 | if (ro->ro_rt != NULL) | |
1093 | RT_LOCK_SPIN(ro->ro_rt); | |
1094 | } | |
1095 | /* Sanitized local copy for interface address searches */ | |
1096 | bzero(&sin, sizeof (sin)); | |
1097 | sin.sin_family = AF_INET; | |
1098 | sin.sin_len = sizeof (struct sockaddr_in); | |
1099 | sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr; | |
1100 | /* | |
1101 | * If we did not find (or use) a route, assume dest is reachable | |
1102 | * on a directly connected network and try to find a corresponding | |
1103 | * interface to take the source address from. | |
1104 | */ | |
1105 | if (ro->ro_rt == NULL) { | |
1106 | VERIFY(ia == NULL); | |
1107 | ia = ifatoia(ifa_ifwithdstaddr(SA(&sin))); | |
1108 | if (ia == NULL) | |
1109 | ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope)); | |
1110 | error = ((ia == NULL) ? ENETUNREACH : 0); | |
1111 | goto done; | |
1112 | } | |
1113 | RT_LOCK_ASSERT_HELD(ro->ro_rt); | |
1114 | /* | |
1115 | * If the outgoing interface on the route found is not | |
1116 | * a loopback interface, use the address from that interface. | |
1117 | */ | |
1118 | if (!(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) { | |
1119 | VERIFY(ia == NULL); | |
6d2010ae A |
1120 | /* |
1121 | * If the route points to a cellular interface and the | |
1122 | * caller forbids our using interfaces of such type, | |
1123 | * pretend that there is no route. | |
fe8ab488 | 1124 | * Apply the same logic for expensive interfaces. |
6d2010ae | 1125 | */ |
fe8ab488 | 1126 | if (inp_restricted_send(inp, ro->ro_rt->rt_ifp)) { |
39236c6e A |
1127 | RT_UNLOCK(ro->ro_rt); |
1128 | ROUTE_RELEASE(ro); | |
1129 | error = EHOSTUNREACH; | |
fe8ab488 | 1130 | restricted = TRUE; |
39236c6e | 1131 | } else { |
6d2010ae A |
1132 | /* Become a regular mutex */ |
1133 | RT_CONVERT_LOCK(ro->ro_rt); | |
39236c6e A |
1134 | ia = ifatoia(ro->ro_rt->rt_ifa); |
1135 | IFA_ADDREF(&ia->ia_ifa); | |
b0d623f7 | 1136 | RT_UNLOCK(ro->ro_rt); |
39236c6e | 1137 | error = 0; |
91447636 | 1138 | } |
39236c6e A |
1139 | goto done; |
1140 | } | |
1141 | VERIFY(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK); | |
1142 | RT_UNLOCK(ro->ro_rt); | |
1143 | /* | |
1144 | * The outgoing interface is marked with 'loopback net', so a route | |
1145 | * to ourselves is here. | |
1146 | * Try to find the interface of the destination address and then | |
1147 | * take the address from there. That interface is not necessarily | |
1148 | * a loopback interface. | |
1149 | */ | |
1150 | VERIFY(ia == NULL); | |
1151 | ia = ifatoia(ifa_ifwithdstaddr(SA(&sin))); | |
1152 | if (ia == NULL) | |
1153 | ia = ifatoia(ifa_ifwithaddr_scoped(SA(&sin), ifscope)); | |
1154 | if (ia == NULL) | |
1155 | ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope)); | |
1156 | if (ia == NULL) { | |
1157 | RT_LOCK(ro->ro_rt); | |
1158 | ia = ifatoia(ro->ro_rt->rt_ifa); | |
1159 | if (ia != NULL) | |
1160 | IFA_ADDREF(&ia->ia_ifa); | |
1161 | RT_UNLOCK(ro->ro_rt); | |
1162 | } | |
1163 | error = ((ia == NULL) ? ENETUNREACH : 0); | |
1164 | ||
1165 | done: | |
1166 | /* | |
1167 | * If the destination address is multicast and an outgoing | |
1168 | * interface has been set as a multicast option, use the | |
1169 | * address of that interface as our source address. | |
1170 | */ | |
15129b1c | 1171 | if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) && |
39236c6e A |
1172 | inp->inp_moptions != NULL) { |
1173 | struct ip_moptions *imo; | |
1174 | struct ifnet *ifp; | |
1175 | ||
1176 | imo = inp->inp_moptions; | |
1177 | IMO_LOCK(imo); | |
1178 | if (imo->imo_multicast_ifp != NULL && (ia == NULL || | |
1179 | ia->ia_ifp != imo->imo_multicast_ifp)) { | |
1180 | ifp = imo->imo_multicast_ifp; | |
1181 | if (ia != NULL) | |
6d2010ae | 1182 | IFA_REMREF(&ia->ia_ifa); |
39236c6e A |
1183 | lck_rw_lock_shared(in_ifaddr_rwlock); |
1184 | TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { | |
1185 | if (ia->ia_ifp == ifp) | |
1186 | break; | |
6d2010ae | 1187 | } |
39236c6e A |
1188 | if (ia != NULL) |
1189 | IFA_ADDREF(&ia->ia_ifa); | |
1190 | lck_rw_done(in_ifaddr_rwlock); | |
1191 | if (ia == NULL) | |
1192 | error = EADDRNOTAVAIL; | |
15129b1c A |
1193 | else |
1194 | error = 0; | |
1c79356b | 1195 | } |
39236c6e A |
1196 | IMO_UNLOCK(imo); |
1197 | } | |
1198 | /* | |
1199 | * Don't do pcblookup call here; return interface in laddr | |
1200 | * and exit to caller, that will do the lookup. | |
1201 | */ | |
1202 | if (ia != NULL) { | |
1c79356b | 1203 | /* |
39236c6e A |
1204 | * If the source address belongs to a cellular interface |
1205 | * and the socket forbids our using interfaces of such | |
1206 | * type, pretend that there is no source address. | |
fe8ab488 | 1207 | * Apply the same logic for expensive interfaces. |
1c79356b | 1208 | */ |
39236c6e | 1209 | IFA_LOCK_SPIN(&ia->ia_ifa); |
fe8ab488 | 1210 | if (inp_restricted_send(inp, ia->ia_ifa.ifa_ifp)) { |
39236c6e A |
1211 | IFA_UNLOCK(&ia->ia_ifa); |
1212 | error = EHOSTUNREACH; | |
fe8ab488 | 1213 | restricted = TRUE; |
39236c6e A |
1214 | } else if (error == 0) { |
1215 | *laddr = ia->ia_addr.sin_addr; | |
1216 | if (outif != NULL) { | |
1217 | struct ifnet *ifp; | |
1218 | ||
1219 | if (ro->ro_rt != NULL) | |
1220 | ifp = ro->ro_rt->rt_ifp; | |
1221 | else | |
1222 | ifp = ia->ia_ifp; | |
1223 | ||
1224 | VERIFY(ifp != NULL); | |
1225 | IFA_CONVERT_LOCK(&ia->ia_ifa); | |
1226 | ifnet_reference(ifp); /* for caller */ | |
1227 | if (*outif != NULL) | |
1228 | ifnet_release(*outif); | |
1229 | *outif = ifp; | |
1c79356b | 1230 | } |
39236c6e A |
1231 | IFA_UNLOCK(&ia->ia_ifa); |
1232 | } else { | |
1233 | IFA_UNLOCK(&ia->ia_ifa); | |
1c79356b | 1234 | } |
6d2010ae | 1235 | IFA_REMREF(&ia->ia_ifa); |
39236c6e A |
1236 | ia = NULL; |
1237 | } | |
1238 | ||
fe8ab488 | 1239 | if (restricted && error == EHOSTUNREACH) { |
39236c6e A |
1240 | soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED | |
1241 | SO_FILT_HINT_IFDENIED)); | |
1c79356b | 1242 | } |
39236c6e A |
1243 | |
1244 | return (error); | |
1c79356b A |
1245 | } |
1246 | ||
1247 | /* | |
1248 | * Outer subroutine: | |
1249 | * Connect from a socket to a specified address. | |
1250 | * Both address and port must be specified in argument sin. | |
1251 | * If don't have a local address for this socket yet, | |
1252 | * then pick one. | |
39236c6e A |
1253 | * |
1254 | * The caller may override the bound-to-interface setting of the socket | |
1255 | * by specifying the ifscope parameter (e.g. from IP_PKTINFO.) | |
1c79356b A |
1256 | */ |
1257 | int | |
316670eb | 1258 | in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p, |
39236c6e | 1259 | unsigned int ifscope, struct ifnet **outif) |
1c79356b | 1260 | { |
39236c6e | 1261 | struct in_addr laddr; |
316670eb | 1262 | struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam; |
91447636 | 1263 | struct inpcb *pcb; |
1c79356b | 1264 | int error; |
fe8ab488 | 1265 | struct socket *so = inp->inp_socket; |
1c79356b A |
1266 | |
1267 | /* | |
1268 | * Call inner routine, to assign local interface address. | |
1269 | */ | |
39236c6e A |
1270 | if ((error = in_pcbladdr(inp, nam, &laddr, ifscope, outif)) != 0) |
1271 | return (error); | |
1c79356b | 1272 | |
fe8ab488 | 1273 | socket_unlock(so, 0); |
91447636 | 1274 | pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port, |
39236c6e | 1275 | inp->inp_laddr.s_addr ? inp->inp_laddr : laddr, |
91447636 | 1276 | inp->inp_lport, 0, NULL); |
fe8ab488 | 1277 | socket_lock(so, 0); |
6d2010ae | 1278 | |
39236c6e A |
1279 | /* |
1280 | * Check if the socket is still in a valid state. When we unlock this | |
1281 | * embryonic socket, it can get aborted if another thread is closing | |
6d2010ae A |
1282 | * the listener (radar 7947600). |
1283 | */ | |
fe8ab488 | 1284 | if ((so->so_flags & SOF_ABORTED) != 0) |
39236c6e | 1285 | return (ECONNREFUSED); |
6d2010ae | 1286 | |
91447636 | 1287 | if (pcb != NULL) { |
0b4c1975 | 1288 | in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0); |
1c79356b A |
1289 | return (EADDRINUSE); |
1290 | } | |
1291 | if (inp->inp_laddr.s_addr == INADDR_ANY) { | |
9bccf70c | 1292 | if (inp->inp_lport == 0) { |
39236c6e | 1293 | error = in_pcbbind(inp, NULL, p); |
9bccf70c | 1294 | if (error) |
39236c6e | 1295 | return (error); |
9bccf70c | 1296 | } |
39236c6e A |
1297 | if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { |
1298 | /* | |
1299 | * Lock inversion issue, mostly with udp | |
1300 | * multicast packets. | |
1301 | */ | |
fe8ab488 | 1302 | socket_unlock(so, 0); |
39236c6e | 1303 | lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); |
fe8ab488 | 1304 | socket_lock(so, 0); |
91447636 | 1305 | } |
39236c6e A |
1306 | inp->inp_laddr = laddr; |
1307 | /* no reference needed */ | |
316670eb | 1308 | inp->inp_last_outifp = (outif != NULL) ? *outif : NULL; |
55e303ae | 1309 | inp->inp_flags |= INP_INADDR_ANY; |
39236c6e | 1310 | } else { |
3e170ce0 A |
1311 | /* |
1312 | * Usage of IP_PKTINFO, without local port already | |
1313 | * speficified will cause kernel to panic, | |
1314 | * see rdar://problem/18508185. | |
1315 | * For now returning error to avoid a kernel panic | |
1316 | * This routines can be refactored and handle this better | |
1317 | * in future. | |
1318 | */ | |
1319 | if (inp->inp_lport == 0) | |
1320 | return (EINVAL); | |
39236c6e A |
1321 | if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { |
1322 | /* | |
1323 | * Lock inversion issue, mostly with udp | |
1324 | * multicast packets. | |
1325 | */ | |
fe8ab488 | 1326 | socket_unlock(so, 0); |
39236c6e | 1327 | lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); |
fe8ab488 | 1328 | socket_lock(so, 0); |
91447636 | 1329 | } |
1c79356b A |
1330 | } |
1331 | inp->inp_faddr = sin->sin_addr; | |
1332 | inp->inp_fport = sin->sin_port; | |
fe8ab488 A |
1333 | if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) |
1334 | nstat_pcb_invalidate_cache(inp); | |
1c79356b | 1335 | in_pcbrehash(inp); |
39236c6e | 1336 | lck_rw_done(inp->inp_pcbinfo->ipi_lock); |
1c79356b A |
1337 | return (0); |
1338 | } | |
1339 | ||
1340 | void | |
2d21ac55 | 1341 | in_pcbdisconnect(struct inpcb *inp) |
1c79356b | 1342 | { |
39236c6e | 1343 | struct socket *so = inp->inp_socket; |
1c79356b | 1344 | |
fe8ab488 A |
1345 | if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) |
1346 | nstat_pcb_cache(inp); | |
1347 | ||
1c79356b A |
1348 | inp->inp_faddr.s_addr = INADDR_ANY; |
1349 | inp->inp_fport = 0; | |
91447636 | 1350 | |
39236c6e A |
1351 | if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { |
1352 | /* lock inversion issue, mostly with udp multicast packets */ | |
1353 | socket_unlock(so, 0); | |
1354 | lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); | |
1355 | socket_lock(so, 0); | |
91447636 A |
1356 | } |
1357 | ||
1c79356b | 1358 | in_pcbrehash(inp); |
39236c6e A |
1359 | lck_rw_done(inp->inp_pcbinfo->ipi_lock); |
1360 | /* | |
1361 | * A multipath subflow socket would have its SS_NOFDREF set by default, | |
1362 | * so check for SOF_MP_SUBFLOW socket flag before detaching the PCB; | |
1363 | * when the socket is closed for real, SOF_MP_SUBFLOW would be cleared. | |
1364 | */ | |
1365 | if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF)) | |
1c79356b A |
1366 | in_pcbdetach(inp); |
1367 | } | |
1368 | ||
1369 | void | |
2d21ac55 | 1370 | in_pcbdetach(struct inpcb *inp) |
1c79356b A |
1371 | { |
1372 | struct socket *so = inp->inp_socket; | |
1c79356b | 1373 | |
39236c6e A |
1374 | if (so->so_pcb == NULL) { |
1375 | /* PCB has been disposed */ | |
1376 | panic("%s: inp=%p so=%p proto=%d so_pcb is null!\n", __func__, | |
1377 | inp, so, SOCK_PROTO(so)); | |
1378 | /* NOTREACHED */ | |
91447636 | 1379 | } |
fe8ab488 | 1380 | |
1c79356b | 1381 | #if IPSEC |
39236c6e A |
1382 | if (inp->inp_sp != NULL) { |
1383 | (void) ipsec4_delete_pcbpolicy(inp); | |
91447636 | 1384 | } |
39236c6e | 1385 | #endif /* IPSEC */ |
fe8ab488 A |
1386 | |
1387 | /* | |
1388 | * Let NetworkStatistics know this PCB is going away | |
1389 | * before we detach it. | |
1390 | */ | |
1391 | if (nstat_collect && | |
1392 | (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP)) | |
1393 | nstat_pcb_detach(inp); | |
3e170ce0 A |
1394 | |
1395 | /* Free memory buffer held for generating keep alives */ | |
1396 | if (inp->inp_keepalive_data != NULL) { | |
1397 | FREE(inp->inp_keepalive_data, M_TEMP); | |
1398 | inp->inp_keepalive_data = NULL; | |
1399 | } | |
1400 | ||
91447636 | 1401 | /* mark socket state as dead */ |
39236c6e A |
1402 | if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) { |
1403 | panic("%s: so=%p proto=%d couldn't set to STOPUSING\n", | |
1404 | __func__, so, SOCK_PROTO(so)); | |
1405 | /* NOTREACHED */ | |
1406 | } | |
1c79356b | 1407 | |
39236c6e | 1408 | if (!(so->so_flags & SOF_PCBCLEARING)) { |
6d2010ae | 1409 | struct ip_moptions *imo; |
2d21ac55 | 1410 | |
91447636 | 1411 | inp->inp_vflag = 0; |
39236c6e A |
1412 | if (inp->inp_options != NULL) { |
1413 | (void) m_free(inp->inp_options); | |
1414 | inp->inp_options = NULL; | |
91447636 | 1415 | } |
39236c6e | 1416 | ROUTE_RELEASE(&inp->inp_route); |
6d2010ae | 1417 | imo = inp->inp_moptions; |
91447636 | 1418 | inp->inp_moptions = NULL; |
6d2010ae A |
1419 | if (imo != NULL) |
1420 | IMO_REMREF(imo); | |
91447636 A |
1421 | sofreelastref(so, 0); |
1422 | inp->inp_state = INPCB_STATE_DEAD; | |
39236c6e A |
1423 | /* makes sure we're not called twice from so_close */ |
1424 | so->so_flags |= SOF_PCBCLEARING; | |
1425 | ||
1426 | inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST); | |
91447636 A |
1427 | } |
1428 | } | |
1c79356b | 1429 | |
1c79356b | 1430 | |
39236c6e A |
1431 | void |
1432 | in_pcbdispose(struct inpcb *inp) | |
91447636 A |
1433 | { |
1434 | struct socket *so = inp->inp_socket; | |
1435 | struct inpcbinfo *ipi = inp->inp_pcbinfo; | |
1436 | ||
39236c6e A |
1437 | if (so != NULL && so->so_usecount != 0) { |
1438 | panic("%s: so %p [%d,%d] usecount %d lockhistory %s\n", | |
1439 | __func__, so, SOCK_DOM(so), SOCK_TYPE(so), so->so_usecount, | |
1440 | solockhistory_nr(so)); | |
1441 | /* NOTREACHED */ | |
1442 | } else if (inp->inp_wantcnt != WNT_STOPUSING) { | |
1443 | if (so != NULL) { | |
1444 | panic_plain("%s: inp %p invalid wantcnt %d, so %p " | |
1445 | "[%d,%d] usecount %d retaincnt %d state 0x%x " | |
1446 | "flags 0x%x lockhistory %s\n", __func__, inp, | |
1447 | inp->inp_wantcnt, so, SOCK_DOM(so), SOCK_TYPE(so), | |
1448 | so->so_usecount, so->so_retaincnt, so->so_state, | |
1449 | so->so_flags, solockhistory_nr(so)); | |
1450 | /* NOTREACHED */ | |
1451 | } else { | |
1452 | panic("%s: inp %p invalid wantcnt %d no socket\n", | |
1453 | __func__, inp, inp->inp_wantcnt); | |
1454 | /* NOTREACHED */ | |
1455 | } | |
91447636 | 1456 | } |
91447636 | 1457 | |
39236c6e | 1458 | lck_rw_assert(ipi->ipi_lock, LCK_RW_ASSERT_EXCLUSIVE); |
91447636 A |
1459 | |
1460 | inp->inp_gencnt = ++ipi->ipi_gencnt; | |
316670eb | 1461 | /* access ipi in in_pcbremlists */ |
91447636 | 1462 | in_pcbremlists(inp); |
316670eb | 1463 | |
39236c6e | 1464 | if (so != NULL) { |
91447636 A |
1465 | if (so->so_proto->pr_flags & PR_PCBLOCK) { |
1466 | sofreelastref(so, 0); | |
39236c6e A |
1467 | if (so->so_rcv.sb_cc > 0 || so->so_snd.sb_cc > 0) { |
1468 | /* | |
1469 | * selthreadclear() already called | |
1470 | * during sofreelastref() above. | |
1471 | */ | |
91447636 A |
1472 | sbrelease(&so->so_rcv); |
1473 | sbrelease(&so->so_snd); | |
1474 | } | |
39236c6e A |
1475 | if (so->so_head != NULL) { |
1476 | panic("%s: so=%p head still exist\n", | |
1477 | __func__, so); | |
1478 | /* NOTREACHED */ | |
1479 | } | |
1480 | lck_mtx_unlock(&inp->inpcb_mtx); | |
1481 | lck_mtx_destroy(&inp->inpcb_mtx, ipi->ipi_lock_grp); | |
9bccf70c | 1482 | } |
39236c6e A |
1483 | /* makes sure we're not called twice from so_close */ |
1484 | so->so_flags |= SOF_PCBCLEARING; | |
1485 | so->so_saved_pcb = (caddr_t)inp; | |
1486 | so->so_pcb = NULL; | |
1487 | inp->inp_socket = NULL; | |
2d21ac55 A |
1488 | #if CONFIG_MACF_NET |
1489 | mac_inpcb_label_destroy(inp); | |
39236c6e | 1490 | #endif /* CONFIG_MACF_NET */ |
b0d623f7 A |
1491 | /* |
1492 | * In case there a route cached after a detach (possible | |
1493 | * in the tcp case), make sure that it is freed before | |
1494 | * we deallocate the structure. | |
1495 | */ | |
39236c6e | 1496 | ROUTE_RELEASE(&inp->inp_route); |
3e170ce0 | 1497 | if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) { |
91447636 | 1498 | zfree(ipi->ipi_zone, inp); |
55e303ae | 1499 | } |
91447636 | 1500 | sodealloc(so); |
9bccf70c | 1501 | } |
1c79356b A |
1502 | } |
1503 | ||
1504 | /* | |
39236c6e | 1505 | * The calling convention of in_getsockaddr() and in_getpeeraddr() was |
1c79356b A |
1506 | * modified to match the pru_sockaddr() and pru_peeraddr() entry points |
1507 | * in struct pr_usrreqs, so that protocols can just reference then directly | |
39236c6e | 1508 | * without the need for a wrapper function. |
1c79356b A |
1509 | */ |
1510 | int | |
39236c6e | 1511 | in_getsockaddr(struct socket *so, struct sockaddr **nam) |
1c79356b | 1512 | { |
2d21ac55 A |
1513 | struct inpcb *inp; |
1514 | struct sockaddr_in *sin; | |
1c79356b A |
1515 | |
1516 | /* | |
1517 | * Do the malloc first in case it blocks. | |
1518 | */ | |
39236c6e | 1519 | MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK); |
0b4e3aa0 | 1520 | if (sin == NULL) |
39236c6e A |
1521 | return (ENOBUFS); |
1522 | bzero(sin, sizeof (*sin)); | |
1c79356b | 1523 | sin->sin_family = AF_INET; |
39236c6e | 1524 | sin->sin_len = sizeof (*sin); |
1c79356b | 1525 | |
39236c6e | 1526 | if ((inp = sotoinpcb(so)) == NULL) { |
1c79356b | 1527 | FREE(sin, M_SONAME); |
39236c6e | 1528 | return (EINVAL); |
1c79356b A |
1529 | } |
1530 | sin->sin_port = inp->inp_lport; | |
1531 | sin->sin_addr = inp->inp_laddr; | |
1c79356b A |
1532 | |
1533 | *nam = (struct sockaddr *)sin; | |
39236c6e | 1534 | return (0); |
1c79356b A |
1535 | } |
1536 | ||
1537 | int | |
39236c6e | 1538 | in_getsockaddr_s(struct socket *so, struct sockaddr_storage *ss) |
1c79356b | 1539 | { |
39236c6e | 1540 | struct sockaddr_in *sin = SIN(ss); |
1c79356b | 1541 | struct inpcb *inp; |
1c79356b | 1542 | |
39236c6e A |
1543 | VERIFY(ss != NULL); |
1544 | bzero(ss, sizeof (*ss)); | |
1545 | ||
1c79356b | 1546 | sin->sin_family = AF_INET; |
39236c6e | 1547 | sin->sin_len = sizeof (*sin); |
1c79356b | 1548 | |
fe8ab488 A |
1549 | if ((inp = sotoinpcb(so)) == NULL |
1550 | #if NECP | |
1551 | || (necp_socket_should_use_flow_divert(inp)) | |
1552 | #endif /* NECP */ | |
1553 | ) | |
39236c6e A |
1554 | return (inp == NULL ? EINVAL : EPROTOTYPE); |
1555 | ||
1556 | sin->sin_port = inp->inp_lport; | |
1557 | sin->sin_addr = inp->inp_laddr; | |
1558 | return (0); | |
1559 | } | |
1560 | ||
1561 | int | |
1562 | in_getpeeraddr(struct socket *so, struct sockaddr **nam) | |
1563 | { | |
1564 | struct inpcb *inp; | |
1565 | struct sockaddr_in *sin; | |
1566 | ||
1567 | /* | |
1568 | * Do the malloc first in case it blocks. | |
1569 | */ | |
1570 | MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK); | |
1571 | if (sin == NULL) | |
1572 | return (ENOBUFS); | |
1573 | bzero((caddr_t)sin, sizeof (*sin)); | |
1574 | sin->sin_family = AF_INET; | |
1575 | sin->sin_len = sizeof (*sin); | |
1576 | ||
1577 | if ((inp = sotoinpcb(so)) == NULL) { | |
1c79356b | 1578 | FREE(sin, M_SONAME); |
39236c6e | 1579 | return (EINVAL); |
1c79356b A |
1580 | } |
1581 | sin->sin_port = inp->inp_fport; | |
1582 | sin->sin_addr = inp->inp_faddr; | |
1c79356b A |
1583 | |
1584 | *nam = (struct sockaddr *)sin; | |
39236c6e A |
1585 | return (0); |
1586 | } | |
1587 | ||
1588 | int | |
1589 | in_getpeeraddr_s(struct socket *so, struct sockaddr_storage *ss) | |
1590 | { | |
1591 | struct sockaddr_in *sin = SIN(ss); | |
1592 | struct inpcb *inp; | |
1593 | ||
1594 | VERIFY(ss != NULL); | |
1595 | bzero(ss, sizeof (*ss)); | |
1596 | ||
1597 | sin->sin_family = AF_INET; | |
1598 | sin->sin_len = sizeof (*sin); | |
1599 | ||
fe8ab488 A |
1600 | if ((inp = sotoinpcb(so)) == NULL |
1601 | #if NECP | |
1602 | || (necp_socket_should_use_flow_divert(inp)) | |
1603 | #endif /* NECP */ | |
1604 | ) { | |
39236c6e A |
1605 | return (inp == NULL ? EINVAL : EPROTOTYPE); |
1606 | } | |
1607 | ||
1608 | sin->sin_port = inp->inp_fport; | |
1609 | sin->sin_addr = inp->inp_faddr; | |
1610 | return (0); | |
1c79356b A |
1611 | } |
1612 | ||
1c79356b | 1613 | void |
2d21ac55 | 1614 | in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr, |
39236c6e | 1615 | int errno, void (*notify)(struct inpcb *, int)) |
1c79356b | 1616 | { |
91447636 A |
1617 | struct inpcb *inp; |
1618 | ||
39236c6e | 1619 | lck_rw_lock_shared(pcbinfo->ipi_lock); |
1c79356b | 1620 | |
39236c6e | 1621 | LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) { |
9bccf70c | 1622 | #if INET6 |
39236c6e | 1623 | if (!(inp->inp_vflag & INP_IPV4)) |
1c79356b | 1624 | continue; |
39236c6e | 1625 | #endif /* INET6 */ |
1c79356b | 1626 | if (inp->inp_faddr.s_addr != faddr.s_addr || |
9bccf70c | 1627 | inp->inp_socket == NULL) |
39236c6e A |
1628 | continue; |
1629 | if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) | |
91447636 A |
1630 | continue; |
1631 | socket_lock(inp->inp_socket, 1); | |
9bccf70c | 1632 | (*notify)(inp, errno); |
39236c6e | 1633 | (void) in_pcb_checkstate(inp, WNT_RELEASE, 1); |
91447636 | 1634 | socket_unlock(inp->inp_socket, 1); |
1c79356b | 1635 | } |
39236c6e | 1636 | lck_rw_done(pcbinfo->ipi_lock); |
1c79356b A |
1637 | } |
1638 | ||
1639 | /* | |
1640 | * Check for alternatives when higher level complains | |
1641 | * about service problems. For now, invalidate cached | |
1642 | * routing information. If the route was created dynamically | |
1643 | * (by a redirect), time to try a default gateway again. | |
1644 | */ | |
1645 | void | |
2d21ac55 | 1646 | in_losing(struct inpcb *inp) |
1c79356b | 1647 | { |
39236c6e | 1648 | boolean_t release = FALSE; |
2d21ac55 | 1649 | struct rtentry *rt; |
1c79356b | 1650 | |
b0d623f7 | 1651 | if ((rt = inp->inp_route.ro_rt) != NULL) { |
39236c6e | 1652 | struct in_ifaddr *ia = NULL; |
b0d623f7 | 1653 | |
b0d623f7 | 1654 | RT_LOCK(rt); |
b0d623f7 A |
1655 | if (rt->rt_flags & RTF_DYNAMIC) { |
1656 | /* | |
1657 | * Prevent another thread from modifying rt_key, | |
1658 | * rt_gateway via rt_setgate() after rt_lock is | |
1659 | * dropped by marking the route as defunct. | |
1660 | */ | |
1661 | rt->rt_flags |= RTF_CONDEMNED; | |
1662 | RT_UNLOCK(rt); | |
1663 | (void) rtrequest(RTM_DELETE, rt_key(rt), | |
39236c6e | 1664 | rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL); |
b0d623f7 A |
1665 | } else { |
1666 | RT_UNLOCK(rt); | |
1667 | } | |
2d21ac55 | 1668 | /* if the address is gone keep the old route in the pcb */ |
39236c6e A |
1669 | if (inp->inp_laddr.s_addr != INADDR_ANY && |
1670 | (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) { | |
1671 | /* | |
1672 | * Address is around; ditch the route. A new route | |
1673 | * can be allocated the next time output is attempted. | |
1674 | */ | |
1675 | release = TRUE; | |
2d21ac55 | 1676 | } |
39236c6e A |
1677 | if (ia != NULL) |
1678 | IFA_REMREF(&ia->ia_ifa); | |
1c79356b | 1679 | } |
39236c6e A |
1680 | if (rt == NULL || release) |
1681 | ROUTE_RELEASE(&inp->inp_route); | |
1c79356b A |
1682 | } |
1683 | ||
1684 | /* | |
1685 | * After a routing change, flush old routing | |
1686 | * and allocate a (hopefully) better one. | |
1687 | */ | |
9bccf70c | 1688 | void |
39236c6e | 1689 | in_rtchange(struct inpcb *inp, int errno) |
1c79356b | 1690 | { |
39236c6e A |
1691 | #pragma unused(errno) |
1692 | boolean_t release = FALSE; | |
2d21ac55 A |
1693 | struct rtentry *rt; |
1694 | ||
1695 | if ((rt = inp->inp_route.ro_rt) != NULL) { | |
39236c6e | 1696 | struct in_ifaddr *ia = NULL; |
b0d623f7 | 1697 | |
39236c6e A |
1698 | /* if address is gone, keep the old route */ |
1699 | if (inp->inp_laddr.s_addr != INADDR_ANY && | |
1700 | (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) { | |
1701 | /* | |
1702 | * Address is around; ditch the route. A new route | |
1703 | * can be allocated the next time output is attempted. | |
1704 | */ | |
1705 | release = TRUE; | |
2d21ac55 | 1706 | } |
39236c6e A |
1707 | if (ia != NULL) |
1708 | IFA_REMREF(&ia->ia_ifa); | |
1c79356b | 1709 | } |
39236c6e A |
1710 | if (rt == NULL || release) |
1711 | ROUTE_RELEASE(&inp->inp_route); | |
1c79356b A |
1712 | } |
1713 | ||
1714 | /* | |
1715 | * Lookup a PCB based on the local address and port. | |
1716 | */ | |
1717 | struct inpcb * | |
2d21ac55 | 1718 | in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr, |
39236c6e | 1719 | unsigned int lport_arg, int wild_okay) |
1c79356b | 1720 | { |
2d21ac55 | 1721 | struct inpcb *inp; |
1c79356b A |
1722 | int matchwild = 3, wildcard; |
1723 | u_short lport = lport_arg; | |
1724 | ||
39236c6e | 1725 | KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0, 0, 0, 0, 0); |
1c79356b A |
1726 | |
1727 | if (!wild_okay) { | |
1728 | struct inpcbhead *head; | |
1729 | /* | |
1730 | * Look for an unconnected (wildcard foreign addr) PCB that | |
1731 | * matches the local address and port we're looking for. | |
1732 | */ | |
39236c6e A |
1733 | head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, |
1734 | pcbinfo->ipi_hashmask)]; | |
9bccf70c A |
1735 | LIST_FOREACH(inp, head, inp_hash) { |
1736 | #if INET6 | |
39236c6e | 1737 | if (!(inp->inp_vflag & INP_IPV4)) |
1c79356b | 1738 | continue; |
39236c6e | 1739 | #endif /* INET6 */ |
1c79356b A |
1740 | if (inp->inp_faddr.s_addr == INADDR_ANY && |
1741 | inp->inp_laddr.s_addr == laddr.s_addr && | |
1742 | inp->inp_lport == lport) { | |
1743 | /* | |
1744 | * Found. | |
1745 | */ | |
1746 | return (inp); | |
1747 | } | |
1748 | } | |
1749 | /* | |
1750 | * Not found. | |
1751 | */ | |
39236c6e | 1752 | KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0, 0, 0, 0, 0); |
1c79356b A |
1753 | return (NULL); |
1754 | } else { | |
1755 | struct inpcbporthead *porthash; | |
1756 | struct inpcbport *phd; | |
1757 | struct inpcb *match = NULL; | |
1758 | /* | |
1759 | * Best fit PCB lookup. | |
1760 | * | |
1761 | * First see if this local port is in use by looking on the | |
1762 | * port hash list. | |
1763 | */ | |
39236c6e A |
1764 | porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport, |
1765 | pcbinfo->ipi_porthashmask)]; | |
9bccf70c | 1766 | LIST_FOREACH(phd, porthash, phd_hash) { |
1c79356b A |
1767 | if (phd->phd_port == lport) |
1768 | break; | |
1769 | } | |
1770 | if (phd != NULL) { | |
1771 | /* | |
1772 | * Port is in use by one or more PCBs. Look for best | |
1773 | * fit. | |
1774 | */ | |
9bccf70c | 1775 | LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { |
1c79356b | 1776 | wildcard = 0; |
9bccf70c | 1777 | #if INET6 |
39236c6e | 1778 | if (!(inp->inp_vflag & INP_IPV4)) |
1c79356b | 1779 | continue; |
39236c6e | 1780 | #endif /* INET6 */ |
1c79356b A |
1781 | if (inp->inp_faddr.s_addr != INADDR_ANY) |
1782 | wildcard++; | |
1783 | if (inp->inp_laddr.s_addr != INADDR_ANY) { | |
1784 | if (laddr.s_addr == INADDR_ANY) | |
1785 | wildcard++; | |
39236c6e A |
1786 | else if (inp->inp_laddr.s_addr != |
1787 | laddr.s_addr) | |
1c79356b A |
1788 | continue; |
1789 | } else { | |
1790 | if (laddr.s_addr != INADDR_ANY) | |
1791 | wildcard++; | |
1792 | } | |
1793 | if (wildcard < matchwild) { | |
1794 | match = inp; | |
1795 | matchwild = wildcard; | |
1796 | if (matchwild == 0) { | |
1797 | break; | |
1798 | } | |
1799 | } | |
1800 | } | |
1801 | } | |
39236c6e A |
1802 | KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match, |
1803 | 0, 0, 0, 0); | |
1c79356b A |
1804 | return (match); |
1805 | } | |
1806 | } | |
1807 | ||
6d2010ae A |
1808 | /* |
1809 | * Check if PCB exists in hash list. | |
1810 | */ | |
1811 | int | |
39236c6e A |
1812 | in_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in_addr faddr, |
1813 | u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard, | |
1814 | uid_t *uid, gid_t *gid, struct ifnet *ifp) | |
6d2010ae A |
1815 | { |
1816 | struct inpcbhead *head; | |
1817 | struct inpcb *inp; | |
1818 | u_short fport = fport_arg, lport = lport_arg; | |
39236c6e A |
1819 | int found = 0; |
1820 | struct inpcb *local_wild = NULL; | |
1821 | #if INET6 | |
1822 | struct inpcb *local_wild_mapped = NULL; | |
1823 | #endif /* INET6 */ | |
6d2010ae A |
1824 | |
1825 | *uid = UID_MAX; | |
1826 | *gid = GID_MAX; | |
316670eb | 1827 | |
6d2010ae A |
1828 | /* |
1829 | * We may have found the pcb in the last lookup - check this first. | |
1830 | */ | |
1831 | ||
39236c6e | 1832 | lck_rw_lock_shared(pcbinfo->ipi_lock); |
6d2010ae A |
1833 | |
1834 | /* | |
1835 | * First look for an exact match. | |
1836 | */ | |
39236c6e A |
1837 | head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, |
1838 | pcbinfo->ipi_hashmask)]; | |
6d2010ae A |
1839 | LIST_FOREACH(inp, head, inp_hash) { |
1840 | #if INET6 | |
39236c6e | 1841 | if (!(inp->inp_vflag & INP_IPV4)) |
6d2010ae | 1842 | continue; |
39236c6e | 1843 | #endif /* INET6 */ |
fe8ab488 | 1844 | if (inp_restricted_recv(inp, ifp)) |
316670eb A |
1845 | continue; |
1846 | ||
6d2010ae A |
1847 | if (inp->inp_faddr.s_addr == faddr.s_addr && |
1848 | inp->inp_laddr.s_addr == laddr.s_addr && | |
1849 | inp->inp_fport == fport && | |
1850 | inp->inp_lport == lport) { | |
1851 | if ((found = (inp->inp_socket != NULL))) { | |
1852 | /* | |
1853 | * Found. | |
1854 | */ | |
316670eb A |
1855 | *uid = kauth_cred_getuid( |
1856 | inp->inp_socket->so_cred); | |
1857 | *gid = kauth_cred_getgid( | |
1858 | inp->inp_socket->so_cred); | |
6d2010ae | 1859 | } |
39236c6e | 1860 | lck_rw_done(pcbinfo->ipi_lock); |
6d2010ae A |
1861 | return (found); |
1862 | } | |
1863 | } | |
6d2010ae | 1864 | |
39236c6e A |
1865 | if (!wildcard) { |
1866 | /* | |
1867 | * Not found. | |
1868 | */ | |
1869 | lck_rw_done(pcbinfo->ipi_lock); | |
1870 | return (0); | |
1871 | } | |
316670eb | 1872 | |
39236c6e A |
1873 | head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, |
1874 | pcbinfo->ipi_hashmask)]; | |
1875 | LIST_FOREACH(inp, head, inp_hash) { | |
6d2010ae | 1876 | #if INET6 |
39236c6e A |
1877 | if (!(inp->inp_vflag & INP_IPV4)) |
1878 | continue; | |
6d2010ae | 1879 | #endif /* INET6 */ |
fe8ab488 | 1880 | if (inp_restricted_recv(inp, ifp)) |
39236c6e A |
1881 | continue; |
1882 | ||
1883 | if (inp->inp_faddr.s_addr == INADDR_ANY && | |
1884 | inp->inp_lport == lport) { | |
1885 | if (inp->inp_laddr.s_addr == laddr.s_addr) { | |
1886 | if ((found = (inp->inp_socket != NULL))) { | |
316670eb | 1887 | *uid = kauth_cred_getuid( |
39236c6e | 1888 | inp->inp_socket->so_cred); |
316670eb | 1889 | *gid = kauth_cred_getgid( |
39236c6e | 1890 | inp->inp_socket->so_cred); |
6d2010ae | 1891 | } |
39236c6e | 1892 | lck_rw_done(pcbinfo->ipi_lock); |
6d2010ae | 1893 | return (found); |
39236c6e A |
1894 | } else if (inp->inp_laddr.s_addr == INADDR_ANY) { |
1895 | #if INET6 | |
1896 | if (inp->inp_socket && | |
1897 | SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) | |
1898 | local_wild_mapped = inp; | |
1899 | else | |
6d2010ae | 1900 | #endif /* INET6 */ |
39236c6e A |
1901 | local_wild = inp; |
1902 | } | |
6d2010ae | 1903 | } |
39236c6e A |
1904 | } |
1905 | if (local_wild == NULL) { | |
1906 | #if INET6 | |
1907 | if (local_wild_mapped != NULL) { | |
1908 | if ((found = (local_wild_mapped->inp_socket != NULL))) { | |
316670eb | 1909 | *uid = kauth_cred_getuid( |
39236c6e | 1910 | local_wild_mapped->inp_socket->so_cred); |
316670eb | 1911 | *gid = kauth_cred_getgid( |
39236c6e | 1912 | local_wild_mapped->inp_socket->so_cred); |
6d2010ae | 1913 | } |
39236c6e | 1914 | lck_rw_done(pcbinfo->ipi_lock); |
6d2010ae A |
1915 | return (found); |
1916 | } | |
39236c6e A |
1917 | #endif /* INET6 */ |
1918 | lck_rw_done(pcbinfo->ipi_lock); | |
1919 | return (0); | |
6d2010ae | 1920 | } |
39236c6e A |
1921 | if ((found = (local_wild->inp_socket != NULL))) { |
1922 | *uid = kauth_cred_getuid( | |
1923 | local_wild->inp_socket->so_cred); | |
1924 | *gid = kauth_cred_getgid( | |
1925 | local_wild->inp_socket->so_cred); | |
1926 | } | |
1927 | lck_rw_done(pcbinfo->ipi_lock); | |
1928 | return (found); | |
6d2010ae A |
1929 | } |
1930 | ||
1c79356b A |
1931 | /* |
1932 | * Lookup PCB in hash list. | |
1933 | */ | |
1934 | struct inpcb * | |
39236c6e A |
1935 | in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr, |
1936 | u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard, | |
1937 | struct ifnet *ifp) | |
1c79356b A |
1938 | { |
1939 | struct inpcbhead *head; | |
2d21ac55 | 1940 | struct inpcb *inp; |
1c79356b | 1941 | u_short fport = fport_arg, lport = lport_arg; |
39236c6e A |
1942 | struct inpcb *local_wild = NULL; |
1943 | #if INET6 | |
1944 | struct inpcb *local_wild_mapped = NULL; | |
1945 | #endif /* INET6 */ | |
1c79356b A |
1946 | |
1947 | /* | |
1948 | * We may have found the pcb in the last lookup - check this first. | |
1949 | */ | |
1950 | ||
39236c6e | 1951 | lck_rw_lock_shared(pcbinfo->ipi_lock); |
1c79356b A |
1952 | |
1953 | /* | |
1954 | * First look for an exact match. | |
1955 | */ | |
39236c6e A |
1956 | head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, |
1957 | pcbinfo->ipi_hashmask)]; | |
9bccf70c A |
1958 | LIST_FOREACH(inp, head, inp_hash) { |
1959 | #if INET6 | |
39236c6e | 1960 | if (!(inp->inp_vflag & INP_IPV4)) |
1c79356b | 1961 | continue; |
39236c6e | 1962 | #endif /* INET6 */ |
fe8ab488 | 1963 | if (inp_restricted_recv(inp, ifp)) |
316670eb A |
1964 | continue; |
1965 | ||
1c79356b A |
1966 | if (inp->inp_faddr.s_addr == faddr.s_addr && |
1967 | inp->inp_laddr.s_addr == laddr.s_addr && | |
1968 | inp->inp_fport == fport && | |
1969 | inp->inp_lport == lport) { | |
1970 | /* | |
1971 | * Found. | |
1972 | */ | |
39236c6e A |
1973 | if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != |
1974 | WNT_STOPUSING) { | |
1975 | lck_rw_done(pcbinfo->ipi_lock); | |
91447636 | 1976 | return (inp); |
39236c6e A |
1977 | } else { |
1978 | /* it's there but dead, say it isn't found */ | |
1979 | lck_rw_done(pcbinfo->ipi_lock); | |
316670eb | 1980 | return (NULL); |
91447636 | 1981 | } |
1c79356b A |
1982 | } |
1983 | } | |
1c79356b | 1984 | |
39236c6e A |
1985 | if (!wildcard) { |
1986 | /* | |
1987 | * Not found. | |
1988 | */ | |
1989 | lck_rw_done(pcbinfo->ipi_lock); | |
1990 | return (NULL); | |
1991 | } | |
1992 | ||
1993 | head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, | |
1994 | pcbinfo->ipi_hashmask)]; | |
1995 | LIST_FOREACH(inp, head, inp_hash) { | |
9bccf70c | 1996 | #if INET6 |
39236c6e A |
1997 | if (!(inp->inp_vflag & INP_IPV4)) |
1998 | continue; | |
1999 | #endif /* INET6 */ | |
fe8ab488 | 2000 | if (inp_restricted_recv(inp, ifp)) |
39236c6e A |
2001 | continue; |
2002 | ||
2003 | if (inp->inp_faddr.s_addr == INADDR_ANY && | |
2004 | inp->inp_lport == lport) { | |
2005 | if (inp->inp_laddr.s_addr == laddr.s_addr) { | |
2006 | if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != | |
2007 | WNT_STOPUSING) { | |
2008 | lck_rw_done(pcbinfo->ipi_lock); | |
2009 | return (inp); | |
2010 | } else { | |
2011 | /* it's dead; say it isn't found */ | |
2012 | lck_rw_done(pcbinfo->ipi_lock); | |
2013 | return (NULL); | |
91447636 | 2014 | } |
39236c6e | 2015 | } else if (inp->inp_laddr.s_addr == INADDR_ANY) { |
2d21ac55 | 2016 | #if INET6 |
39236c6e A |
2017 | if (SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) |
2018 | local_wild_mapped = inp; | |
2019 | else | |
2d21ac55 | 2020 | #endif /* INET6 */ |
1c79356b | 2021 | local_wild = inp; |
1c79356b A |
2022 | } |
2023 | } | |
39236c6e A |
2024 | } |
2025 | if (local_wild == NULL) { | |
2d21ac55 | 2026 | #if INET6 |
39236c6e A |
2027 | if (local_wild_mapped != NULL) { |
2028 | if (in_pcb_checkstate(local_wild_mapped, | |
2029 | WNT_ACQUIRE, 0) != WNT_STOPUSING) { | |
2030 | lck_rw_done(pcbinfo->ipi_lock); | |
2031 | return (local_wild_mapped); | |
2032 | } else { | |
2033 | /* it's dead; say it isn't found */ | |
2034 | lck_rw_done(pcbinfo->ipi_lock); | |
2035 | return (NULL); | |
91447636 | 2036 | } |
91447636 | 2037 | } |
39236c6e A |
2038 | #endif /* INET6 */ |
2039 | lck_rw_done(pcbinfo->ipi_lock); | |
2040 | return (NULL); | |
2041 | } | |
2042 | if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) { | |
2043 | lck_rw_done(pcbinfo->ipi_lock); | |
2044 | return (local_wild); | |
1c79356b | 2045 | } |
1c79356b | 2046 | /* |
39236c6e | 2047 | * It's either not found or is already dead. |
1c79356b | 2048 | */ |
39236c6e | 2049 | lck_rw_done(pcbinfo->ipi_lock); |
1c79356b A |
2050 | return (NULL); |
2051 | } | |
2052 | ||
2053 | /* | |
4bd07ac2 A |
2054 | * @brief Insert PCB onto various hash lists. |
2055 | * | |
2056 | * @param inp Pointer to internet protocol control block | |
2057 | * @param locked Implies if ipi_lock (protecting pcb list) | |
2058 | * is already locked or not. | |
2059 | * | |
2060 | * @return int error on failure and 0 on success | |
1c79356b A |
2061 | */ |
2062 | int | |
2d21ac55 | 2063 | in_pcbinshash(struct inpcb *inp, int locked) |
1c79356b A |
2064 | { |
2065 | struct inpcbhead *pcbhash; | |
2066 | struct inpcbporthead *pcbporthash; | |
2067 | struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; | |
2068 | struct inpcbport *phd; | |
2069 | u_int32_t hashkey_faddr; | |
2070 | ||
39236c6e A |
2071 | if (!locked) { |
2072 | if (!lck_rw_try_lock_exclusive(pcbinfo->ipi_lock)) { | |
2073 | /* | |
2074 | * Lock inversion issue, mostly with udp | |
2075 | * multicast packets | |
2076 | */ | |
2077 | socket_unlock(inp->inp_socket, 0); | |
2078 | lck_rw_lock_exclusive(pcbinfo->ipi_lock); | |
2079 | socket_lock(inp->inp_socket, 0); | |
39236c6e A |
2080 | } |
2081 | } | |
b0d623f7 | 2082 | |
4bd07ac2 A |
2083 | /* |
2084 | * This routine or its caller may have given up | |
2085 | * socket's protocol lock briefly. | |
2086 | * During that time the socket may have been dropped. | |
2087 | * Safe-guarding against that. | |
2088 | */ | |
2089 | if (inp->inp_state == INPCB_STATE_DEAD) { | |
2090 | if (!locked) { | |
2091 | lck_rw_done(pcbinfo->ipi_lock); | |
2092 | } | |
2093 | return (ECONNABORTED); | |
2094 | } | |
2095 | ||
2096 | ||
1c79356b A |
2097 | #if INET6 |
2098 | if (inp->inp_vflag & INP_IPV6) | |
2099 | hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; | |
2100 | else | |
2101 | #endif /* INET6 */ | |
39236c6e | 2102 | hashkey_faddr = inp->inp_faddr.s_addr; |
1c79356b | 2103 | |
39236c6e A |
2104 | inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport, |
2105 | inp->inp_fport, pcbinfo->ipi_hashmask); | |
91447636 | 2106 | |
39236c6e | 2107 | pcbhash = &pcbinfo->ipi_hashbase[inp->inp_hash_element]; |
1c79356b | 2108 | |
39236c6e A |
2109 | pcbporthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(inp->inp_lport, |
2110 | pcbinfo->ipi_porthashmask)]; | |
1c79356b A |
2111 | |
2112 | /* | |
2113 | * Go through port list and look for a head for this lport. | |
2114 | */ | |
9bccf70c | 2115 | LIST_FOREACH(phd, pcbporthash, phd_hash) { |
1c79356b A |
2116 | if (phd->phd_port == inp->inp_lport) |
2117 | break; | |
2118 | } | |
316670eb | 2119 | |
1c79356b A |
2120 | /* |
2121 | * If none exists, malloc one and tack it on. | |
2122 | */ | |
2123 | if (phd == NULL) { | |
39236c6e A |
2124 | MALLOC(phd, struct inpcbport *, sizeof (struct inpcbport), |
2125 | M_PCB, M_WAITOK); | |
1c79356b | 2126 | if (phd == NULL) { |
91447636 | 2127 | if (!locked) |
39236c6e | 2128 | lck_rw_done(pcbinfo->ipi_lock); |
1c79356b A |
2129 | return (ENOBUFS); /* XXX */ |
2130 | } | |
2131 | phd->phd_port = inp->inp_lport; | |
2132 | LIST_INIT(&phd->phd_pcblist); | |
2133 | LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); | |
2134 | } | |
fe8ab488 A |
2135 | |
2136 | VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST)); | |
1c79356b A |
2137 | inp->inp_phd = phd; |
2138 | LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); | |
2139 | LIST_INSERT_HEAD(pcbhash, inp, inp_hash); | |
fe8ab488 A |
2140 | inp->inp_flags2 |= INP2_INHASHLIST; |
2141 | ||
91447636 | 2142 | if (!locked) |
39236c6e | 2143 | lck_rw_done(pcbinfo->ipi_lock); |
fe8ab488 A |
2144 | |
2145 | #if NECP | |
2146 | // This call catches the original setting of the local address | |
2147 | inp_update_necp_policy(inp, NULL, NULL, 0); | |
2148 | #endif /* NECP */ | |
2149 | ||
1c79356b A |
2150 | return (0); |
2151 | } | |
2152 | ||
2153 | /* | |
2154 | * Move PCB to the proper hash bucket when { faddr, fport } have been | |
2155 | * changed. NOTE: This does not handle the case of the lport changing (the | |
2156 | * hashed port list would have to be updated as well), so the lport must | |
2157 | * not change after in_pcbinshash() has been called. | |
2158 | */ | |
2159 | void | |
2d21ac55 | 2160 | in_pcbrehash(struct inpcb *inp) |
1c79356b A |
2161 | { |
2162 | struct inpcbhead *head; | |
2163 | u_int32_t hashkey_faddr; | |
2164 | ||
2165 | #if INET6 | |
2166 | if (inp->inp_vflag & INP_IPV6) | |
2167 | hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; | |
2168 | else | |
2169 | #endif /* INET6 */ | |
39236c6e A |
2170 | hashkey_faddr = inp->inp_faddr.s_addr; |
2171 | ||
2172 | inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport, | |
2173 | inp->inp_fport, inp->inp_pcbinfo->ipi_hashmask); | |
2174 | head = &inp->inp_pcbinfo->ipi_hashbase[inp->inp_hash_element]; | |
1c79356b | 2175 | |
fe8ab488 A |
2176 | if (inp->inp_flags2 & INP2_INHASHLIST) { |
2177 | LIST_REMOVE(inp, inp_hash); | |
2178 | inp->inp_flags2 &= ~INP2_INHASHLIST; | |
2179 | } | |
2180 | ||
2181 | VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST)); | |
1c79356b | 2182 | LIST_INSERT_HEAD(head, inp, inp_hash); |
fe8ab488 A |
2183 | inp->inp_flags2 |= INP2_INHASHLIST; |
2184 | ||
2185 | #if NECP | |
2186 | // This call catches updates to the remote addresses | |
2187 | inp_update_necp_policy(inp, NULL, NULL, 0); | |
2188 | #endif /* NECP */ | |
1c79356b A |
2189 | } |
2190 | ||
2191 | /* | |
2192 | * Remove PCB from various lists. | |
316670eb | 2193 | * Must be called pcbinfo lock is held in exclusive mode. |
1c79356b A |
2194 | */ |
2195 | void | |
2d21ac55 | 2196 | in_pcbremlists(struct inpcb *inp) |
1c79356b A |
2197 | { |
2198 | inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt; | |
1c79356b | 2199 | |
fe8ab488 A |
2200 | /* |
2201 | * Check if it's in hashlist -- an inp is placed in hashlist when | |
2202 | * it's local port gets assigned. So it should also be present | |
2203 | * in the port list. | |
2204 | */ | |
2205 | if (inp->inp_flags2 & INP2_INHASHLIST) { | |
1c79356b A |
2206 | struct inpcbport *phd = inp->inp_phd; |
2207 | ||
fe8ab488 A |
2208 | VERIFY(phd != NULL && inp->inp_lport > 0); |
2209 | ||
1c79356b | 2210 | LIST_REMOVE(inp, inp_hash); |
fe8ab488 A |
2211 | inp->inp_hash.le_next = NULL; |
2212 | inp->inp_hash.le_prev = NULL; | |
2213 | ||
1c79356b | 2214 | LIST_REMOVE(inp, inp_portlist); |
fe8ab488 A |
2215 | inp->inp_portlist.le_next = NULL; |
2216 | inp->inp_portlist.le_prev = NULL; | |
2217 | if (LIST_EMPTY(&phd->phd_pcblist)) { | |
1c79356b A |
2218 | LIST_REMOVE(phd, phd_hash); |
2219 | FREE(phd, M_PCB); | |
2220 | } | |
fe8ab488 A |
2221 | inp->inp_phd = NULL; |
2222 | inp->inp_flags2 &= ~INP2_INHASHLIST; | |
1c79356b | 2223 | } |
fe8ab488 | 2224 | VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST)); |
39236c6e A |
2225 | |
2226 | if (inp->inp_flags2 & INP2_TIMEWAIT) { | |
2227 | /* Remove from time-wait queue */ | |
2228 | tcp_remove_from_time_wait(inp); | |
2229 | inp->inp_flags2 &= ~INP2_TIMEWAIT; | |
2230 | VERIFY(inp->inp_pcbinfo->ipi_twcount != 0); | |
2231 | inp->inp_pcbinfo->ipi_twcount--; | |
2232 | } else { | |
2233 | /* Remove from global inp list if it is not time-wait */ | |
2234 | LIST_REMOVE(inp, inp_list); | |
2235 | } | |
316670eb | 2236 | |
bd504ef0 | 2237 | if (inp->inp_flags2 & INP2_IN_FCTREE) { |
39236c6e | 2238 | inp_fc_getinp(inp->inp_flowhash, (INPFC_SOLOCKED|INPFC_REMOVE)); |
bd504ef0 A |
2239 | VERIFY(!(inp->inp_flags2 & INP2_IN_FCTREE)); |
2240 | } | |
39236c6e | 2241 | |
1c79356b A |
2242 | inp->inp_pcbinfo->ipi_count--; |
2243 | } | |
2244 | ||
39236c6e A |
2245 | /* |
2246 | * Mechanism used to defer the memory release of PCBs | |
2247 | * The pcb list will contain the pcb until the reaper can clean it up if | |
2248 | * the following conditions are met: | |
2249 | * 1) state "DEAD", | |
2250 | * 2) wantcnt is STOPUSING | |
2251 | * 3) usecount is 0 | |
91447636 | 2252 | * This function will be called to either mark the pcb as |
39236c6e | 2253 | */ |
91447636 A |
2254 | int |
2255 | in_pcb_checkstate(struct inpcb *pcb, int mode, int locked) | |
91447636 | 2256 | { |
39236c6e | 2257 | volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt; |
2d21ac55 A |
2258 | UInt32 origwant; |
2259 | UInt32 newwant; | |
91447636 A |
2260 | |
2261 | switch (mode) { | |
39236c6e A |
2262 | case WNT_STOPUSING: |
2263 | /* | |
2264 | * Try to mark the pcb as ready for recycling. CAS with | |
2265 | * STOPUSING, if success we're good, if it's in use, will | |
2266 | * be marked later | |
2267 | */ | |
2268 | if (locked == 0) | |
2269 | socket_lock(pcb->inp_socket, 1); | |
2270 | pcb->inp_state = INPCB_STATE_DEAD; | |
91447636 | 2271 | |
39236c6e A |
2272 | stopusing: |
2273 | if (pcb->inp_socket->so_usecount < 0) { | |
2274 | panic("%s: pcb=%p so=%p usecount is negative\n", | |
2275 | __func__, pcb, pcb->inp_socket); | |
2276 | /* NOTREACHED */ | |
2277 | } | |
2278 | if (locked == 0) | |
2279 | socket_unlock(pcb->inp_socket, 1); | |
91447636 | 2280 | |
39236c6e | 2281 | inpcb_gc_sched(pcb->inp_pcbinfo, INPCB_TIMER_FAST); |
6d2010ae | 2282 | |
39236c6e A |
2283 | origwant = *wantcnt; |
2284 | if ((UInt16) origwant == 0xffff) /* should stop using */ | |
2285 | return (WNT_STOPUSING); | |
2286 | newwant = 0xffff; | |
2287 | if ((UInt16) origwant == 0) { | |
2288 | /* try to mark it as unsuable now */ | |
2289 | OSCompareAndSwap(origwant, newwant, wantcnt); | |
2290 | } | |
2291 | return (WNT_STOPUSING); | |
2292 | break; | |
91447636 | 2293 | |
39236c6e A |
2294 | case WNT_ACQUIRE: |
2295 | /* | |
2296 | * Try to increase reference to pcb. If WNT_STOPUSING | |
2297 | * should bail out. If socket state DEAD, try to set count | |
2298 | * to STOPUSING, return failed otherwise increase cnt. | |
2299 | */ | |
2300 | do { | |
91447636 | 2301 | origwant = *wantcnt; |
39236c6e A |
2302 | if ((UInt16) origwant == 0xffff) { |
2303 | /* should stop using */ | |
91447636 | 2304 | return (WNT_STOPUSING); |
91447636 | 2305 | } |
39236c6e A |
2306 | newwant = origwant + 1; |
2307 | } while (!OSCompareAndSwap(origwant, newwant, wantcnt)); | |
2308 | return (WNT_ACQUIRE); | |
2309 | break; | |
91447636 | 2310 | |
39236c6e A |
2311 | case WNT_RELEASE: |
2312 | /* | |
2313 | * Release reference. If result is null and pcb state | |
2314 | * is DEAD, set wanted bit to STOPUSING | |
2315 | */ | |
2316 | if (locked == 0) | |
2317 | socket_lock(pcb->inp_socket, 1); | |
91447636 | 2318 | |
39236c6e A |
2319 | do { |
2320 | origwant = *wantcnt; | |
2321 | if ((UInt16) origwant == 0x0) { | |
2322 | panic("%s: pcb=%p release with zero count", | |
2323 | __func__, pcb); | |
2324 | /* NOTREACHED */ | |
2325 | } | |
2326 | if ((UInt16) origwant == 0xffff) { | |
2327 | /* should stop using */ | |
2328 | if (locked == 0) | |
2329 | socket_unlock(pcb->inp_socket, 1); | |
2330 | return (WNT_STOPUSING); | |
2331 | } | |
2332 | newwant = origwant - 1; | |
2333 | } while (!OSCompareAndSwap(origwant, newwant, wantcnt)); | |
2334 | ||
2335 | if (pcb->inp_state == INPCB_STATE_DEAD) | |
2336 | goto stopusing; | |
2337 | if (pcb->inp_socket->so_usecount < 0) { | |
2338 | panic("%s: RELEASE pcb=%p so=%p usecount is negative\n", | |
2339 | __func__, pcb, pcb->inp_socket); | |
2340 | /* NOTREACHED */ | |
2341 | } | |
91447636 | 2342 | |
39236c6e A |
2343 | if (locked == 0) |
2344 | socket_unlock(pcb->inp_socket, 1); | |
2345 | return (WNT_RELEASE); | |
2346 | break; | |
91447636 | 2347 | |
39236c6e A |
2348 | default: |
2349 | panic("%s: so=%p not a valid state =%x\n", __func__, | |
2350 | pcb->inp_socket, mode); | |
2351 | /* NOTREACHED */ | |
91447636 A |
2352 | } |
2353 | ||
2354 | /* NOTREACHED */ | |
2355 | return (mode); | |
2356 | } | |
2357 | ||
2358 | /* | |
2359 | * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat. | |
2360 | * The inpcb_compat data structure is passed to user space and must | |
b0d623f7 | 2361 | * not change. We intentionally avoid copying pointers. |
91447636 A |
2362 | */ |
2363 | void | |
39236c6e | 2364 | inpcb_to_compat(struct inpcb *inp, struct inpcb_compat *inp_compat) |
91447636 | 2365 | { |
39236c6e | 2366 | bzero(inp_compat, sizeof (*inp_compat)); |
91447636 A |
2367 | inp_compat->inp_fport = inp->inp_fport; |
2368 | inp_compat->inp_lport = inp->inp_lport; | |
316670eb | 2369 | inp_compat->nat_owner = 0; |
39236c6e | 2370 | inp_compat->nat_cookie = 0; |
91447636 A |
2371 | inp_compat->inp_gencnt = inp->inp_gencnt; |
2372 | inp_compat->inp_flags = inp->inp_flags; | |
2373 | inp_compat->inp_flow = inp->inp_flow; | |
2374 | inp_compat->inp_vflag = inp->inp_vflag; | |
2375 | inp_compat->inp_ip_ttl = inp->inp_ip_ttl; | |
2376 | inp_compat->inp_ip_p = inp->inp_ip_p; | |
39236c6e A |
2377 | inp_compat->inp_dependfaddr.inp6_foreign = |
2378 | inp->inp_dependfaddr.inp6_foreign; | |
2379 | inp_compat->inp_dependladdr.inp6_local = | |
2380 | inp->inp_dependladdr.inp6_local; | |
91447636 | 2381 | inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos; |
39236c6e | 2382 | inp_compat->inp_depend6.inp6_hlim = 0; |
91447636 | 2383 | inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum; |
39236c6e | 2384 | inp_compat->inp_depend6.inp6_ifindex = 0; |
91447636 A |
2385 | inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops; |
2386 | } | |
9bccf70c | 2387 | |
b0d623f7 | 2388 | void |
39236c6e | 2389 | inpcb_to_xinpcb64(struct inpcb *inp, struct xinpcb64 *xinp) |
b0d623f7 | 2390 | { |
6d2010ae A |
2391 | xinp->inp_fport = inp->inp_fport; |
2392 | xinp->inp_lport = inp->inp_lport; | |
2393 | xinp->inp_gencnt = inp->inp_gencnt; | |
2394 | xinp->inp_flags = inp->inp_flags; | |
2395 | xinp->inp_flow = inp->inp_flow; | |
2396 | xinp->inp_vflag = inp->inp_vflag; | |
2397 | xinp->inp_ip_ttl = inp->inp_ip_ttl; | |
2398 | xinp->inp_ip_p = inp->inp_ip_p; | |
2399 | xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign; | |
2400 | xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local; | |
2401 | xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos; | |
39236c6e | 2402 | xinp->inp_depend6.inp6_hlim = 0; |
6d2010ae | 2403 | xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum; |
39236c6e | 2404 | xinp->inp_depend6.inp6_ifindex = 0; |
6d2010ae | 2405 | xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops; |
b0d623f7 A |
2406 | } |
2407 | ||
b0d623f7 A |
2408 | /* |
2409 | * The following routines implement this scheme: | |
2410 | * | |
2411 | * Callers of ip_output() that intend to cache the route in the inpcb pass | |
2412 | * a local copy of the struct route to ip_output(). Using a local copy of | |
2413 | * the cached route significantly simplifies things as IP no longer has to | |
2414 | * worry about having exclusive access to the passed in struct route, since | |
2415 | * it's defined in the caller's stack; in essence, this allows for a lock- | |
2416 | * less operation when updating the struct route at the IP level and below, | |
2417 | * whenever necessary. The scheme works as follows: | |
2418 | * | |
2419 | * Prior to dropping the socket's lock and calling ip_output(), the caller | |
2420 | * copies the struct route from the inpcb into its stack, and adds a reference | |
2421 | * to the cached route entry, if there was any. The socket's lock is then | |
2422 | * dropped and ip_output() is called with a pointer to the copy of struct | |
2423 | * route defined on the stack (not to the one in the inpcb.) | |
2424 | * | |
2425 | * Upon returning from ip_output(), the caller then acquires the socket's | |
2426 | * lock and synchronizes the cache; if there is no route cached in the inpcb, | |
2427 | * it copies the local copy of struct route (which may or may not contain any | |
2428 | * route) back into the cache; otherwise, if the inpcb has a route cached in | |
2429 | * it, the one in the local copy will be freed, if there's any. Trashing the | |
2430 | * cached route in the inpcb can be avoided because ip_output() is single- | |
2431 | * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized | |
2432 | * by the socket/transport layer.) | |
2433 | */ | |
2434 | void | |
2435 | inp_route_copyout(struct inpcb *inp, struct route *dst) | |
2436 | { | |
2437 | struct route *src = &inp->inp_route; | |
2438 | ||
6d2010ae | 2439 | lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED); |
b0d623f7 | 2440 | |
0b4c1975 | 2441 | /* |
39236c6e | 2442 | * If the route in the PCB is stale or not for IPv4, blow it away; |
0b4c1975 A |
2443 | * this is possible in the case of IPv4-mapped address case. |
2444 | */ | |
39236c6e A |
2445 | if (ROUTE_UNUSABLE(src) || rt_key(src->ro_rt)->sa_family != AF_INET) |
2446 | ROUTE_RELEASE(src); | |
316670eb | 2447 | |
39236c6e | 2448 | route_copyout(dst, src, sizeof (*dst)); |
b0d623f7 A |
2449 | } |
2450 | ||
2451 | void | |
2452 | inp_route_copyin(struct inpcb *inp, struct route *src) | |
2453 | { | |
2454 | struct route *dst = &inp->inp_route; | |
2455 | ||
6d2010ae | 2456 | lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED); |
b0d623f7 A |
2457 | |
2458 | /* Minor sanity check */ | |
2459 | if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET) | |
2460 | panic("%s: wrong or corrupted route: %p", __func__, src); | |
2461 | ||
39236c6e | 2462 | route_copyin(src, dst, sizeof (*src)); |
6d2010ae A |
2463 | } |
2464 | ||
2465 | /* | |
2466 | * Handler for setting IP_FORCE_OUT_IFP/IP_BOUND_IF/IPV6_BOUND_IF socket option. | |
2467 | */ | |
316670eb | 2468 | int |
39236c6e | 2469 | inp_bindif(struct inpcb *inp, unsigned int ifscope, struct ifnet **pifp) |
6d2010ae | 2470 | { |
316670eb A |
2471 | struct ifnet *ifp = NULL; |
2472 | ||
2473 | ifnet_head_lock_shared(); | |
2474 | if ((ifscope > (unsigned)if_index) || (ifscope != IFSCOPE_NONE && | |
2475 | (ifp = ifindex2ifnet[ifscope]) == NULL)) { | |
2476 | ifnet_head_done(); | |
2477 | return (ENXIO); | |
2478 | } | |
2479 | ifnet_head_done(); | |
2480 | ||
2481 | VERIFY(ifp != NULL || ifscope == IFSCOPE_NONE); | |
2482 | ||
6d2010ae A |
2483 | /* |
2484 | * A zero interface scope value indicates an "unbind". | |
2485 | * Otherwise, take in whatever value the app desires; | |
2486 | * the app may already know the scope (or force itself | |
2487 | * to such a scope) ahead of time before the interface | |
2488 | * gets attached. It doesn't matter either way; any | |
2489 | * route lookup from this point on will require an | |
2490 | * exact match for the embedded interface scope. | |
2491 | */ | |
316670eb A |
2492 | inp->inp_boundifp = ifp; |
2493 | if (inp->inp_boundifp == NULL) | |
6d2010ae A |
2494 | inp->inp_flags &= ~INP_BOUND_IF; |
2495 | else | |
2496 | inp->inp_flags |= INP_BOUND_IF; | |
2497 | ||
2498 | /* Blow away any cached route in the PCB */ | |
39236c6e A |
2499 | ROUTE_RELEASE(&inp->inp_route); |
2500 | ||
2501 | if (pifp != NULL) | |
2502 | *pifp = ifp; | |
316670eb A |
2503 | |
2504 | return (0); | |
6d2010ae A |
2505 | } |
2506 | ||
2507 | /* | |
39236c6e A |
2508 | * Handler for setting IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option, |
2509 | * as well as for setting PROC_UUID_NO_CELLULAR policy. | |
6d2010ae | 2510 | */ |
39236c6e A |
2511 | void |
2512 | inp_set_nocellular(struct inpcb *inp) | |
6d2010ae | 2513 | { |
39236c6e | 2514 | inp->inp_flags |= INP_NO_IFT_CELLULAR; |
6d2010ae A |
2515 | |
2516 | /* Blow away any cached route in the PCB */ | |
39236c6e A |
2517 | ROUTE_RELEASE(&inp->inp_route); |
2518 | } | |
2519 | ||
2520 | /* | |
2521 | * Handler for clearing IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option, | |
2522 | * as well as for clearing PROC_UUID_NO_CELLULAR policy. | |
2523 | */ | |
2524 | void | |
2525 | inp_clear_nocellular(struct inpcb *inp) | |
2526 | { | |
2527 | struct socket *so = inp->inp_socket; | |
2528 | ||
2529 | /* | |
2530 | * SO_RESTRICT_DENY_CELLULAR socket restriction issued on the socket | |
2531 | * has a higher precendence than INP_NO_IFT_CELLULAR. Clear the flag | |
2532 | * if and only if the socket is unrestricted. | |
2533 | */ | |
2534 | if (so != NULL && !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) { | |
2535 | inp->inp_flags &= ~INP_NO_IFT_CELLULAR; | |
2536 | ||
2537 | /* Blow away any cached route in the PCB */ | |
2538 | ROUTE_RELEASE(&inp->inp_route); | |
6d2010ae | 2539 | } |
39236c6e | 2540 | } |
6d2010ae | 2541 | |
fe8ab488 A |
2542 | void |
2543 | inp_set_noexpensive(struct inpcb *inp) | |
2544 | { | |
2545 | inp->inp_flags2 |= INP2_NO_IFF_EXPENSIVE; | |
2546 | ||
2547 | /* Blow away any cached route in the PCB */ | |
2548 | ROUTE_RELEASE(&inp->inp_route); | |
2549 | } | |
2550 | ||
2551 | void | |
2552 | inp_set_awdl_unrestricted(struct inpcb *inp) | |
2553 | { | |
2554 | inp->inp_flags2 |= INP2_AWDL_UNRESTRICTED; | |
2555 | ||
2556 | /* Blow away any cached route in the PCB */ | |
2557 | ROUTE_RELEASE(&inp->inp_route); | |
2558 | } | |
2559 | ||
2560 | boolean_t | |
2561 | inp_get_awdl_unrestricted(struct inpcb *inp) | |
2562 | { | |
2563 | return (inp->inp_flags2 & INP2_AWDL_UNRESTRICTED) ? TRUE : FALSE; | |
2564 | } | |
2565 | ||
2566 | void | |
2567 | inp_clear_awdl_unrestricted(struct inpcb *inp) | |
2568 | { | |
2569 | inp->inp_flags2 &= ~INP2_AWDL_UNRESTRICTED; | |
2570 | ||
2571 | /* Blow away any cached route in the PCB */ | |
2572 | ROUTE_RELEASE(&inp->inp_route); | |
2573 | } | |
2574 | ||
2575 | #if NECP | |
39236c6e | 2576 | /* |
fe8ab488 | 2577 | * Called when PROC_UUID_NECP_APP_POLICY is set. |
39236c6e A |
2578 | */ |
2579 | void | |
fe8ab488 | 2580 | inp_set_want_app_policy(struct inpcb *inp) |
39236c6e | 2581 | { |
fe8ab488 | 2582 | inp->inp_flags2 |= INP2_WANT_APP_POLICY; |
39236c6e A |
2583 | } |
2584 | ||
2585 | /* | |
fe8ab488 | 2586 | * Called when PROC_UUID_NECP_APP_POLICY is cleared. |
39236c6e A |
2587 | */ |
2588 | void | |
fe8ab488 | 2589 | inp_clear_want_app_policy(struct inpcb *inp) |
39236c6e | 2590 | { |
fe8ab488 | 2591 | inp->inp_flags2 &= ~INP2_WANT_APP_POLICY; |
b0d623f7 | 2592 | } |
fe8ab488 | 2593 | #endif /* NECP */ |
316670eb A |
2594 | |
2595 | /* | |
2596 | * Calculate flow hash for an inp, used by an interface to identify a | |
2597 | * flow. When an interface provides flow control advisory, this flow | |
2598 | * hash is used as an identifier. | |
2599 | */ | |
2600 | u_int32_t | |
2601 | inp_calc_flowhash(struct inpcb *inp) | |
2602 | { | |
2603 | struct inp_flowhash_key fh __attribute__((aligned(8))); | |
2604 | u_int32_t flowhash = 0; | |
bd504ef0 | 2605 | struct inpcb *tmp_inp = NULL; |
316670eb A |
2606 | |
2607 | if (inp_hash_seed == 0) | |
2608 | inp_hash_seed = RandomULong(); | |
2609 | ||
2610 | bzero(&fh, sizeof (fh)); | |
2611 | ||
2612 | bcopy(&inp->inp_dependladdr, &fh.infh_laddr, sizeof (fh.infh_laddr)); | |
2613 | bcopy(&inp->inp_dependfaddr, &fh.infh_faddr, sizeof (fh.infh_faddr)); | |
2614 | ||
2615 | fh.infh_lport = inp->inp_lport; | |
2616 | fh.infh_fport = inp->inp_fport; | |
2617 | fh.infh_af = (inp->inp_vflag & INP_IPV6) ? AF_INET6 : AF_INET; | |
2618 | fh.infh_proto = inp->inp_ip_p; | |
2619 | fh.infh_rand1 = RandomULong(); | |
2620 | fh.infh_rand2 = RandomULong(); | |
2621 | ||
2622 | try_again: | |
2623 | flowhash = net_flowhash(&fh, sizeof (fh), inp_hash_seed); | |
2624 | if (flowhash == 0) { | |
2625 | /* try to get a non-zero flowhash */ | |
2626 | inp_hash_seed = RandomULong(); | |
2627 | goto try_again; | |
2628 | } | |
2629 | ||
bd504ef0 | 2630 | inp->inp_flowhash = flowhash; |
316670eb | 2631 | |
bd504ef0 | 2632 | /* Insert the inp into inp_fc_tree */ |
39236c6e | 2633 | lck_mtx_lock_spin(&inp_fc_lck); |
bd504ef0 A |
2634 | tmp_inp = RB_FIND(inp_fc_tree, &inp_fc_tree, inp); |
2635 | if (tmp_inp != NULL) { | |
316670eb | 2636 | /* |
bd504ef0 A |
2637 | * There is a different inp with the same flowhash. |
2638 | * There can be a collision on flow hash but the | |
39236c6e | 2639 | * probability is low. Let's recompute the |
bd504ef0 | 2640 | * flowhash. |
316670eb A |
2641 | */ |
2642 | lck_mtx_unlock(&inp_fc_lck); | |
bd504ef0 A |
2643 | /* recompute hash seed */ |
2644 | inp_hash_seed = RandomULong(); | |
2645 | goto try_again; | |
316670eb | 2646 | } |
39236c6e | 2647 | |
bd504ef0 A |
2648 | RB_INSERT(inp_fc_tree, &inp_fc_tree, inp); |
2649 | inp->inp_flags2 |= INP2_IN_FCTREE; | |
316670eb | 2650 | lck_mtx_unlock(&inp_fc_lck); |
bd504ef0 | 2651 | |
39236c6e A |
2652 | return (flowhash); |
2653 | } | |
2654 | ||
2655 | void | |
2656 | inp_flowadv(uint32_t flowhash) | |
2657 | { | |
2658 | struct inpcb *inp; | |
2659 | ||
2660 | inp = inp_fc_getinp(flowhash, 0); | |
2661 | ||
2662 | if (inp == NULL) | |
2663 | return; | |
2664 | inp_fc_feedback(inp); | |
316670eb A |
2665 | } |
2666 | ||
bd504ef0 A |
2667 | /* |
2668 | * Function to compare inp_fc_entries in inp flow control tree | |
2669 | */ | |
2670 | static inline int | |
2671 | infc_cmp(const struct inpcb *inp1, const struct inpcb *inp2) | |
316670eb | 2672 | { |
bd504ef0 | 2673 | return (memcmp(&(inp1->inp_flowhash), &(inp2->inp_flowhash), |
39236c6e | 2674 | sizeof(inp1->inp_flowhash))); |
bd504ef0 | 2675 | } |
316670eb | 2676 | |
39236c6e | 2677 | static struct inpcb * |
bd504ef0 A |
2678 | inp_fc_getinp(u_int32_t flowhash, u_int32_t flags) |
2679 | { | |
2680 | struct inpcb *inp = NULL; | |
2681 | int locked = (flags & INPFC_SOLOCKED) ? 1 : 0; | |
316670eb A |
2682 | |
2683 | lck_mtx_lock_spin(&inp_fc_lck); | |
bd504ef0 A |
2684 | key_inp.inp_flowhash = flowhash; |
2685 | inp = RB_FIND(inp_fc_tree, &inp_fc_tree, &key_inp); | |
2686 | if (inp == NULL) { | |
316670eb A |
2687 | /* inp is not present, return */ |
2688 | lck_mtx_unlock(&inp_fc_lck); | |
2689 | return (NULL); | |
2690 | } | |
2691 | ||
bd504ef0 A |
2692 | if (flags & INPFC_REMOVE) { |
2693 | RB_REMOVE(inp_fc_tree, &inp_fc_tree, inp); | |
2694 | lck_mtx_unlock(&inp_fc_lck); | |
316670eb | 2695 | |
bd504ef0 A |
2696 | bzero(&(inp->infc_link), sizeof (inp->infc_link)); |
2697 | inp->inp_flags2 &= ~INP2_IN_FCTREE; | |
2698 | return (NULL); | |
316670eb | 2699 | } |
39236c6e | 2700 | |
bd504ef0 A |
2701 | if (in_pcb_checkstate(inp, WNT_ACQUIRE, locked) == WNT_STOPUSING) |
2702 | inp = NULL; | |
316670eb A |
2703 | lck_mtx_unlock(&inp_fc_lck); |
2704 | ||
bd504ef0 | 2705 | return (inp); |
316670eb A |
2706 | } |
2707 | ||
39236c6e | 2708 | static void |
316670eb A |
2709 | inp_fc_feedback(struct inpcb *inp) |
2710 | { | |
2711 | struct socket *so = inp->inp_socket; | |
2712 | ||
2713 | /* we already hold a want_cnt on this inp, socket can't be null */ | |
39236c6e | 2714 | VERIFY(so != NULL); |
316670eb A |
2715 | socket_lock(so, 1); |
2716 | ||
2717 | if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) { | |
2718 | socket_unlock(so, 1); | |
2719 | return; | |
2720 | } | |
2721 | ||
fe8ab488 A |
2722 | if (inp->inp_sndinprog_cnt > 0) |
2723 | inp->inp_flags |= INP_FC_FEEDBACK; | |
2724 | ||
316670eb A |
2725 | /* |
2726 | * Return if the connection is not in flow-controlled state. | |
2727 | * This can happen if the connection experienced | |
2728 | * loss while it was in flow controlled state | |
2729 | */ | |
2730 | if (!INP_WAIT_FOR_IF_FEEDBACK(inp)) { | |
2731 | socket_unlock(so, 1); | |
2732 | return; | |
2733 | } | |
2734 | inp_reset_fc_state(inp); | |
2735 | ||
39236c6e | 2736 | if (SOCK_TYPE(so) == SOCK_STREAM) |
316670eb A |
2737 | inp_fc_unthrottle_tcp(inp); |
2738 | ||
2739 | socket_unlock(so, 1); | |
2740 | } | |
2741 | ||
2742 | void | |
2743 | inp_reset_fc_state(struct inpcb *inp) | |
2744 | { | |
2745 | struct socket *so = inp->inp_socket; | |
2746 | int suspended = (INP_IS_FLOW_SUSPENDED(inp)) ? 1 : 0; | |
2747 | int needwakeup = (INP_WAIT_FOR_IF_FEEDBACK(inp)) ? 1 : 0; | |
2748 | ||
2749 | inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED); | |
2750 | ||
2751 | if (suspended) { | |
2752 | so->so_flags &= ~(SOF_SUSPENDED); | |
2753 | soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_RESUME)); | |
2754 | } | |
2755 | ||
316670eb A |
2756 | /* Give a write wakeup to unblock the socket */ |
2757 | if (needwakeup) | |
2758 | sowwakeup(so); | |
2759 | } | |
2760 | ||
2761 | int | |
2762 | inp_set_fc_state(struct inpcb *inp, int advcode) | |
2763 | { | |
bd504ef0 | 2764 | struct inpcb *tmp_inp = NULL; |
316670eb | 2765 | /* |
39236c6e | 2766 | * If there was a feedback from the interface when |
316670eb A |
2767 | * send operation was in progress, we should ignore |
2768 | * this flow advisory to avoid a race between setting | |
2769 | * flow controlled state and receiving feedback from | |
2770 | * the interface | |
2771 | */ | |
2772 | if (inp->inp_flags & INP_FC_FEEDBACK) | |
39236c6e | 2773 | return (0); |
316670eb A |
2774 | |
2775 | inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED); | |
39236c6e A |
2776 | if ((tmp_inp = inp_fc_getinp(inp->inp_flowhash, |
2777 | INPFC_SOLOCKED)) != NULL) { | |
2778 | if (in_pcb_checkstate(tmp_inp, WNT_RELEASE, 1) == WNT_STOPUSING) | |
bd504ef0 A |
2779 | return (0); |
2780 | VERIFY(tmp_inp == inp); | |
316670eb A |
2781 | switch (advcode) { |
2782 | case FADV_FLOW_CONTROLLED: | |
2783 | inp->inp_flags |= INP_FLOW_CONTROLLED; | |
2784 | break; | |
2785 | case FADV_SUSPENDED: | |
2786 | inp->inp_flags |= INP_FLOW_SUSPENDED; | |
2787 | soevent(inp->inp_socket, | |
2788 | (SO_FILT_HINT_LOCKED | SO_FILT_HINT_SUSPEND)); | |
2789 | ||
2790 | /* Record the fact that suspend event was sent */ | |
2791 | inp->inp_socket->so_flags |= SOF_SUSPENDED; | |
2792 | break; | |
2793 | } | |
bd504ef0 | 2794 | return (1); |
316670eb | 2795 | } |
39236c6e | 2796 | return (0); |
316670eb A |
2797 | } |
2798 | ||
2799 | /* | |
2800 | * Handler for SO_FLUSH socket option. | |
2801 | */ | |
2802 | int | |
2803 | inp_flush(struct inpcb *inp, int optval) | |
2804 | { | |
2805 | u_int32_t flowhash = inp->inp_flowhash; | |
39236c6e | 2806 | struct ifnet *rtifp, *oifp; |
316670eb A |
2807 | |
2808 | /* Either all classes or one of the valid ones */ | |
2809 | if (optval != SO_TC_ALL && !SO_VALID_TC(optval)) | |
2810 | return (EINVAL); | |
2811 | ||
2812 | /* We need a flow hash for identification */ | |
2813 | if (flowhash == 0) | |
2814 | return (0); | |
2815 | ||
39236c6e A |
2816 | /* Grab the interfaces from the route and pcb */ |
2817 | rtifp = ((inp->inp_route.ro_rt != NULL) ? | |
2818 | inp->inp_route.ro_rt->rt_ifp : NULL); | |
2819 | oifp = inp->inp_last_outifp; | |
2820 | ||
2821 | if (rtifp != NULL) | |
2822 | if_qflush_sc(rtifp, so_tc2msc(optval), flowhash, NULL, NULL, 0); | |
2823 | if (oifp != NULL && oifp != rtifp) | |
2824 | if_qflush_sc(oifp, so_tc2msc(optval), flowhash, NULL, NULL, 0); | |
316670eb A |
2825 | |
2826 | return (0); | |
2827 | } | |
2828 | ||
2829 | /* | |
2830 | * Clear the INP_INADDR_ANY flag (special case for PPP only) | |
2831 | */ | |
39236c6e A |
2832 | void |
2833 | inp_clear_INP_INADDR_ANY(struct socket *so) | |
316670eb A |
2834 | { |
2835 | struct inpcb *inp = NULL; | |
2836 | ||
2837 | socket_lock(so, 1); | |
2838 | inp = sotoinpcb(so); | |
2839 | if (inp) { | |
2840 | inp->inp_flags &= ~INP_INADDR_ANY; | |
2841 | } | |
2842 | socket_unlock(so, 1); | |
2843 | } | |
2844 | ||
39236c6e A |
2845 | void |
2846 | inp_get_soprocinfo(struct inpcb *inp, struct so_procinfo *soprocinfo) | |
2847 | { | |
2848 | struct socket *so = inp->inp_socket; | |
2849 | ||
2850 | soprocinfo->spi_pid = so->last_pid; | |
fe8ab488 A |
2851 | if (so->last_pid != 0) |
2852 | uuid_copy(soprocinfo->spi_uuid, so->last_uuid); | |
39236c6e A |
2853 | /* |
2854 | * When not delegated, the effective pid is the same as the real pid | |
2855 | */ | |
fe8ab488 | 2856 | if (so->so_flags & SOF_DELEGATED) { |
3e170ce0 | 2857 | soprocinfo->spi_delegated = 1; |
39236c6e | 2858 | soprocinfo->spi_epid = so->e_pid; |
3e170ce0 | 2859 | uuid_copy(soprocinfo->spi_euuid, so->e_uuid); |
fe8ab488 | 2860 | } else { |
3e170ce0 | 2861 | soprocinfo->spi_delegated = 0; |
39236c6e | 2862 | soprocinfo->spi_epid = so->last_pid; |
fe8ab488 | 2863 | } |
39236c6e A |
2864 | } |
2865 | ||
2866 | int | |
2867 | inp_findinpcb_procinfo(struct inpcbinfo *pcbinfo, uint32_t flowhash, | |
2868 | struct so_procinfo *soprocinfo) | |
2869 | { | |
2870 | struct inpcb *inp = NULL; | |
2871 | int found = 0; | |
2872 | ||
2873 | bzero(soprocinfo, sizeof (struct so_procinfo)); | |
2874 | ||
2875 | if (!flowhash) | |
2876 | return (-1); | |
2877 | ||
2878 | lck_rw_lock_shared(pcbinfo->ipi_lock); | |
2879 | LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) { | |
2880 | if (inp->inp_state != INPCB_STATE_DEAD && | |
2881 | inp->inp_socket != NULL && | |
2882 | inp->inp_flowhash == flowhash) { | |
2883 | found = 1; | |
2884 | inp_get_soprocinfo(inp, soprocinfo); | |
2885 | break; | |
2886 | } | |
2887 | } | |
2888 | lck_rw_done(pcbinfo->ipi_lock); | |
2889 | ||
2890 | return (found); | |
2891 | } | |
2892 | ||
2893 | #if CONFIG_PROC_UUID_POLICY | |
2894 | static void | |
2895 | inp_update_cellular_policy(struct inpcb *inp, boolean_t set) | |
2896 | { | |
2897 | struct socket *so = inp->inp_socket; | |
2898 | int before, after; | |
2899 | ||
2900 | VERIFY(so != NULL); | |
2901 | VERIFY(inp->inp_state != INPCB_STATE_DEAD); | |
2902 | ||
fe8ab488 | 2903 | before = INP_NO_CELLULAR(inp); |
39236c6e A |
2904 | if (set) { |
2905 | inp_set_nocellular(inp); | |
2906 | } else { | |
2907 | inp_clear_nocellular(inp); | |
2908 | } | |
fe8ab488 | 2909 | after = INP_NO_CELLULAR(inp); |
39236c6e A |
2910 | if (net_io_policy_log && (before != after)) { |
2911 | static const char *ok = "OK"; | |
2912 | static const char *nok = "NOACCESS"; | |
2913 | uuid_string_t euuid_buf; | |
2914 | pid_t epid; | |
2915 | ||
2916 | if (so->so_flags & SOF_DELEGATED) { | |
2917 | uuid_unparse(so->e_uuid, euuid_buf); | |
2918 | epid = so->e_pid; | |
2919 | } else { | |
2920 | uuid_unparse(so->last_uuid, euuid_buf); | |
2921 | epid = so->last_pid; | |
2922 | } | |
2923 | ||
2924 | /* allow this socket to generate another notification event */ | |
2925 | so->so_ifdenied_notifies = 0; | |
2926 | ||
2927 | log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d " | |
2928 | "euuid %s%s %s->%s\n", __func__, | |
2929 | (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so), | |
2930 | SOCK_TYPE(so), epid, euuid_buf, | |
2931 | (so->so_flags & SOF_DELEGATED) ? | |
2932 | " [delegated]" : "", | |
2933 | ((before < after) ? ok : nok), | |
2934 | ((before < after) ? nok : ok)); | |
2935 | } | |
2936 | } | |
2937 | ||
fe8ab488 | 2938 | #if NECP |
39236c6e | 2939 | static void |
fe8ab488 | 2940 | inp_update_necp_want_app_policy(struct inpcb *inp, boolean_t set) |
39236c6e A |
2941 | { |
2942 | struct socket *so = inp->inp_socket; | |
2943 | int before, after; | |
2944 | ||
2945 | VERIFY(so != NULL); | |
2946 | VERIFY(inp->inp_state != INPCB_STATE_DEAD); | |
2947 | ||
fe8ab488 | 2948 | before = (inp->inp_flags2 & INP2_WANT_APP_POLICY); |
39236c6e | 2949 | if (set) { |
fe8ab488 | 2950 | inp_set_want_app_policy(inp); |
39236c6e | 2951 | } else { |
fe8ab488 | 2952 | inp_clear_want_app_policy(inp); |
39236c6e | 2953 | } |
fe8ab488 | 2954 | after = (inp->inp_flags2 & INP2_WANT_APP_POLICY); |
39236c6e A |
2955 | if (net_io_policy_log && (before != after)) { |
2956 | static const char *wanted = "WANTED"; | |
2957 | static const char *unwanted = "UNWANTED"; | |
2958 | uuid_string_t euuid_buf; | |
2959 | pid_t epid; | |
2960 | ||
2961 | if (so->so_flags & SOF_DELEGATED) { | |
2962 | uuid_unparse(so->e_uuid, euuid_buf); | |
2963 | epid = so->e_pid; | |
2964 | } else { | |
2965 | uuid_unparse(so->last_uuid, euuid_buf); | |
2966 | epid = so->last_pid; | |
2967 | } | |
2968 | ||
2969 | log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d " | |
2970 | "euuid %s%s %s->%s\n", __func__, | |
2971 | (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so), | |
2972 | SOCK_TYPE(so), epid, euuid_buf, | |
2973 | (so->so_flags & SOF_DELEGATED) ? | |
2974 | " [delegated]" : "", | |
2975 | ((before < after) ? unwanted : wanted), | |
2976 | ((before < after) ? wanted : unwanted)); | |
2977 | } | |
2978 | } | |
fe8ab488 | 2979 | #endif /* NECP */ |
39236c6e A |
2980 | #endif /* !CONFIG_PROC_UUID_POLICY */ |
2981 | ||
fe8ab488 A |
2982 | #if NECP |
2983 | void | |
2984 | inp_update_necp_policy(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int override_bound_interface) | |
2985 | { | |
2986 | necp_socket_find_policy_match(inp, override_local_addr, override_remote_addr, override_bound_interface); | |
2987 | if (necp_socket_should_rescope(inp) && | |
2988 | inp->inp_lport == 0 && | |
2989 | inp->inp_laddr.s_addr == INADDR_ANY && | |
2990 | IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { | |
2991 | // If we should rescope, and the socket is not yet bound | |
2992 | inp_bindif(inp, necp_socket_get_rescope_if_index(inp), NULL); | |
2993 | } | |
2994 | } | |
2995 | #endif /* NECP */ | |
2996 | ||
39236c6e A |
2997 | int |
2998 | inp_update_policy(struct inpcb *inp) | |
2999 | { | |
3000 | #if CONFIG_PROC_UUID_POLICY | |
3001 | struct socket *so = inp->inp_socket; | |
3002 | uint32_t pflags = 0; | |
3003 | int32_t ogencnt; | |
3004 | int err = 0; | |
3005 | ||
3006 | if (!net_io_policy_uuid || | |
3007 | so == NULL || inp->inp_state == INPCB_STATE_DEAD) | |
3008 | return (0); | |
3009 | ||
3010 | /* | |
3011 | * Kernel-created sockets that aren't delegating other sockets | |
3012 | * are currently exempted from UUID policy checks. | |
3013 | */ | |
3014 | if (so->last_pid == 0 && !(so->so_flags & SOF_DELEGATED)) | |
3015 | return (0); | |
3016 | ||
3017 | ogencnt = so->so_policy_gencnt; | |
3018 | err = proc_uuid_policy_lookup(((so->so_flags & SOF_DELEGATED) ? | |
3019 | so->e_uuid : so->last_uuid), &pflags, &so->so_policy_gencnt); | |
3020 | ||
3021 | /* | |
3022 | * Discard cached generation count if the entry is gone (ENOENT), | |
3023 | * so that we go thru the checks below. | |
3024 | */ | |
3025 | if (err == ENOENT && ogencnt != 0) | |
3026 | so->so_policy_gencnt = 0; | |
3027 | ||
3028 | /* | |
3029 | * If the generation count has changed, inspect the policy flags | |
3030 | * and act accordingly. If a policy flag was previously set and | |
3031 | * the UUID is no longer present in the table (ENOENT), treat it | |
3032 | * as if the flag has been cleared. | |
3033 | */ | |
3034 | if ((err == 0 || err == ENOENT) && ogencnt != so->so_policy_gencnt) { | |
3035 | /* update cellular policy for this socket */ | |
3036 | if (err == 0 && (pflags & PROC_UUID_NO_CELLULAR)) { | |
3037 | inp_update_cellular_policy(inp, TRUE); | |
3038 | } else if (!(pflags & PROC_UUID_NO_CELLULAR)) { | |
3039 | inp_update_cellular_policy(inp, FALSE); | |
3040 | } | |
fe8ab488 A |
3041 | #if NECP |
3042 | /* update necp want app policy for this socket */ | |
3043 | if (err == 0 && (pflags & PROC_UUID_NECP_APP_POLICY)) { | |
3044 | inp_update_necp_want_app_policy(inp, TRUE); | |
3045 | } else if (!(pflags & PROC_UUID_NECP_APP_POLICY)) { | |
3046 | inp_update_necp_want_app_policy(inp, FALSE); | |
39236c6e | 3047 | } |
fe8ab488 | 3048 | #endif /* NECP */ |
39236c6e A |
3049 | } |
3050 | ||
3051 | return ((err == ENOENT) ? 0 : err); | |
3052 | #else /* !CONFIG_PROC_UUID_POLICY */ | |
3053 | #pragma unused(inp) | |
3054 | return (0); | |
3055 | #endif /* !CONFIG_PROC_UUID_POLICY */ | |
3056 | } | |
fe8ab488 A |
3057 | /* |
3058 | * Called when we need to enforce policy restrictions in the input path. | |
3059 | * | |
3060 | * Returns TRUE if we're not allowed to receive data, otherwise FALSE. | |
3061 | */ | |
39236c6e | 3062 | boolean_t |
fe8ab488 | 3063 | inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp) |
39236c6e A |
3064 | { |
3065 | VERIFY(inp != NULL); | |
3066 | ||
fe8ab488 A |
3067 | /* |
3068 | * Inbound restrictions. | |
3069 | */ | |
39236c6e A |
3070 | if (!sorestrictrecv) |
3071 | return (FALSE); | |
3072 | ||
fe8ab488 A |
3073 | if (ifp == NULL) |
3074 | return (FALSE); | |
3075 | ||
3076 | if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp)) | |
3077 | return (TRUE); | |
3078 | ||
3079 | if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp)) | |
3080 | return (TRUE); | |
3081 | ||
3082 | if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp)) | |
3083 | return (TRUE); | |
3084 | ||
3085 | if (!(ifp->if_eflags & IFEF_RESTRICTED_RECV)) | |
39236c6e A |
3086 | return (FALSE); |
3087 | ||
3088 | if (inp->inp_flags & INP_RECV_ANYIF) | |
3089 | return (FALSE); | |
3090 | ||
3091 | if ((inp->inp_flags & INP_BOUND_IF) && inp->inp_boundifp == ifp) | |
3092 | return (FALSE); | |
3093 | ||
3094 | return (TRUE); | |
3095 | } | |
fe8ab488 A |
3096 | |
3097 | /* | |
3098 | * Called when we need to enforce policy restrictions in the output path. | |
3099 | * | |
3100 | * Returns TRUE if we're not allowed to send data out, otherwise FALSE. | |
3101 | */ | |
3102 | boolean_t | |
3103 | inp_restricted_send(struct inpcb *inp, struct ifnet *ifp) | |
3104 | { | |
3105 | VERIFY(inp != NULL); | |
3106 | ||
3107 | /* | |
3108 | * Outbound restrictions. | |
3109 | */ | |
3110 | if (!sorestrictsend) | |
3111 | return (FALSE); | |
3112 | ||
3113 | if (ifp == NULL) | |
3114 | return (FALSE); | |
3115 | ||
3116 | if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp)) | |
3117 | return (TRUE); | |
3118 | ||
3119 | if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp)) | |
3120 | return (TRUE); | |
3121 | ||
3122 | if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp)) | |
3123 | return (TRUE); | |
3124 | ||
3125 | return (FALSE); | |
3126 | } |