]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
39236c6e | 2 | * Copyright (c) 2000-2013 Apple Inc. All rights reserved. |
5d5c5d0d | 3 | * |
2d21ac55 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
39236c6e | 5 | * |
2d21ac55 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
39236c6e | 14 | * |
2d21ac55 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
39236c6e | 17 | * |
2d21ac55 A |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
8f6c56a5 A |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
2d21ac55 A |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
39236c6e | 25 | * |
2d21ac55 | 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
1c79356b A |
27 | */ |
28 | /* | |
29 | * Copyright (c) 1982, 1986, 1991, 1993, 1995 | |
30 | * The Regents of the University of California. All rights reserved. | |
31 | * | |
32 | * Redistribution and use in source and binary forms, with or without | |
33 | * modification, are permitted provided that the following conditions | |
34 | * are met: | |
35 | * 1. Redistributions of source code must retain the above copyright | |
36 | * notice, this list of conditions and the following disclaimer. | |
37 | * 2. Redistributions in binary form must reproduce the above copyright | |
38 | * notice, this list of conditions and the following disclaimer in the | |
39 | * documentation and/or other materials provided with the distribution. | |
40 | * 3. All advertising materials mentioning features or use of this software | |
41 | * must display the following acknowledgement: | |
42 | * This product includes software developed by the University of | |
43 | * California, Berkeley and its contributors. | |
44 | * 4. Neither the name of the University nor the names of its contributors | |
45 | * may be used to endorse or promote products derived from this software | |
46 | * without specific prior written permission. | |
47 | * | |
48 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
49 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
50 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
51 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
52 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
53 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
54 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
55 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
56 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
57 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
58 | * SUCH DAMAGE. | |
59 | * | |
60 | * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 | |
9bccf70c | 61 | * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $ |
1c79356b A |
62 | */ |
63 | ||
64 | #include <sys/param.h> | |
65 | #include <sys/systm.h> | |
66 | #include <sys/malloc.h> | |
67 | #include <sys/mbuf.h> | |
1c79356b | 68 | #include <sys/domain.h> |
1c79356b A |
69 | #include <sys/protosw.h> |
70 | #include <sys/socket.h> | |
71 | #include <sys/socketvar.h> | |
72 | #include <sys/proc.h> | |
73 | #include <sys/kernel.h> | |
74 | #include <sys/sysctl.h> | |
6d2010ae A |
75 | #include <sys/mcache.h> |
76 | #include <sys/kauth.h> | |
77 | #include <sys/priv.h> | |
39236c6e A |
78 | #include <sys/proc_uuid_policy.h> |
79 | #include <sys/syslog.h> | |
80 | ||
91447636 | 81 | #include <libkern/OSAtomic.h> |
316670eb | 82 | #include <kern/locks.h> |
1c79356b A |
83 | |
84 | #include <machine/limits.h> | |
85 | ||
1c79356b | 86 | #include <kern/zalloc.h> |
1c79356b A |
87 | |
88 | #include <net/if.h> | |
1c79356b | 89 | #include <net/if_types.h> |
9bccf70c | 90 | #include <net/route.h> |
316670eb A |
91 | #include <net/flowhash.h> |
92 | #include <net/flowadv.h> | |
1c79356b A |
93 | |
94 | #include <netinet/in.h> | |
95 | #include <netinet/in_pcb.h> | |
96 | #include <netinet/in_var.h> | |
97 | #include <netinet/ip_var.h> | |
98 | #if INET6 | |
99 | #include <netinet/ip6.h> | |
100 | #include <netinet6/ip6_var.h> | |
101 | #endif /* INET6 */ | |
102 | ||
1c79356b A |
103 | #if IPSEC |
104 | #include <netinet6/ipsec.h> | |
105 | #include <netkey/key.h> | |
1c79356b A |
106 | #endif /* IPSEC */ |
107 | ||
108 | #include <sys/kdebug.h> | |
b0d623f7 | 109 | #include <sys/random.h> |
39236c6e | 110 | |
316670eb | 111 | #include <dev/random/randomdev.h> |
39236c6e | 112 | #include <mach/boolean.h> |
1c79356b | 113 | |
39236c6e A |
114 | #if FLOW_DIVERT |
115 | #include <netinet/flow_divert.h> | |
9bccf70c | 116 | #endif |
1c79356b | 117 | |
39236c6e A |
118 | static lck_grp_t *inpcb_lock_grp; |
119 | static lck_attr_t *inpcb_lock_attr; | |
120 | static lck_grp_attr_t *inpcb_lock_grp_attr; | |
121 | decl_lck_mtx_data(static, inpcb_lock); /* global INPCB lock */ | |
122 | decl_lck_mtx_data(static, inpcb_timeout_lock); | |
123 | ||
124 | static TAILQ_HEAD(, inpcbinfo) inpcb_head = TAILQ_HEAD_INITIALIZER(inpcb_head); | |
125 | ||
126 | static u_int16_t inpcb_timeout_run = 0; /* INPCB timer is scheduled to run */ | |
127 | static boolean_t inpcb_garbage_collecting = FALSE; /* gc timer is scheduled */ | |
128 | static boolean_t inpcb_ticking = FALSE; /* "slow" timer is scheduled */ | |
129 | static boolean_t inpcb_fast_timer_on = FALSE; | |
130 | static void inpcb_sched_timeout(struct timeval *); | |
131 | static void inpcb_timeout(void *); | |
132 | int inpcb_timeout_lazy = 10; /* 10 seconds leeway for lazy timers */ | |
133 | extern int tvtohz(struct timeval *); | |
134 | ||
135 | #if CONFIG_PROC_UUID_POLICY | |
136 | static void inp_update_cellular_policy(struct inpcb *, boolean_t); | |
137 | #if FLOW_DIVERT | |
138 | static void inp_update_flow_divert_policy(struct inpcb *, boolean_t); | |
139 | #endif /* FLOW_DIVERT */ | |
140 | #endif /* !CONFIG_PROC_UUID_POLICY */ | |
141 | ||
142 | #if IPSEC | |
143 | extern int ipsec_bypass; | |
144 | #endif /* IPSEC */ | |
1c79356b | 145 | |
39236c6e A |
146 | #define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8)) |
147 | #define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1)) | |
1c79356b | 148 | |
1c79356b A |
149 | /* |
150 | * These configure the range of local port addresses assigned to | |
151 | * "unspecified" outgoing connections/packets/whatever. | |
152 | */ | |
9bccf70c A |
153 | int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */ |
154 | int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */ | |
39236c6e A |
155 | int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ |
156 | int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */ | |
9bccf70c A |
157 | int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ |
158 | int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */ | |
1c79356b | 159 | |
39236c6e | 160 | #define RANGECHK(var, min, max) \ |
1c79356b A |
161 | if ((var) < (min)) { (var) = (min); } \ |
162 | else if ((var) > (max)) { (var) = (max); } | |
163 | ||
1c79356b A |
164 | static int |
165 | sysctl_net_ipport_check SYSCTL_HANDLER_ARGS | |
166 | { | |
2d21ac55 | 167 | #pragma unused(arg1, arg2) |
39236c6e A |
168 | int error; |
169 | ||
170 | error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); | |
1c79356b A |
171 | if (!error) { |
172 | RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); | |
173 | RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1); | |
174 | RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX); | |
175 | RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX); | |
176 | RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX); | |
177 | RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX); | |
178 | } | |
39236c6e | 179 | return (error); |
1c79356b A |
180 | } |
181 | ||
182 | #undef RANGECHK | |
183 | ||
39236c6e A |
184 | SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, |
185 | CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IP Ports"); | |
186 | ||
187 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, | |
188 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, | |
189 | &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", ""); | |
190 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, | |
191 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, | |
192 | &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", ""); | |
193 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, | |
194 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, | |
195 | &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", ""); | |
196 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, | |
197 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, | |
198 | &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", ""); | |
199 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, | |
200 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, | |
201 | &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", ""); | |
202 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, | |
203 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, | |
204 | &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", ""); | |
1c79356b | 205 | |
b0d623f7 A |
206 | extern int udp_use_randomport; |
207 | extern int tcp_use_randomport; | |
208 | ||
316670eb A |
209 | /* Structs used for flowhash computation */ |
210 | struct inp_flowhash_key_addr { | |
211 | union { | |
212 | struct in_addr v4; | |
213 | struct in6_addr v6; | |
214 | u_int8_t addr8[16]; | |
215 | u_int16_t addr16[8]; | |
216 | u_int32_t addr32[4]; | |
217 | } infha; | |
218 | }; | |
219 | ||
220 | struct inp_flowhash_key { | |
39236c6e | 221 | struct inp_flowhash_key_addr infh_laddr; |
316670eb A |
222 | struct inp_flowhash_key_addr infh_faddr; |
223 | u_int32_t infh_lport; | |
224 | u_int32_t infh_fport; | |
225 | u_int32_t infh_af; | |
226 | u_int32_t infh_proto; | |
227 | u_int32_t infh_rand1; | |
228 | u_int32_t infh_rand2; | |
229 | }; | |
230 | ||
39236c6e A |
231 | static u_int32_t inp_hash_seed = 0; |
232 | ||
233 | static int infc_cmp(const struct inpcb *, const struct inpcb *); | |
234 | ||
235 | /* Flags used by inp_fc_getinp */ | |
236 | #define INPFC_SOLOCKED 0x1 | |
237 | #define INPFC_REMOVE 0x2 | |
238 | static struct inpcb *inp_fc_getinp(u_int32_t, u_int32_t); | |
239 | ||
240 | static void inp_fc_feedback(struct inpcb *); | |
241 | extern void tcp_remove_from_time_wait(struct inpcb *inp); | |
316670eb | 242 | |
39236c6e | 243 | decl_lck_mtx_data(static, inp_fc_lck); |
316670eb | 244 | |
bd504ef0 A |
245 | RB_HEAD(inp_fc_tree, inpcb) inp_fc_tree; |
246 | RB_PROTOTYPE(inp_fc_tree, inpcb, infc_link, infc_cmp); | |
247 | RB_GENERATE(inp_fc_tree, inpcb, infc_link, infc_cmp); | |
316670eb | 248 | |
bd504ef0 A |
249 | /* |
250 | * Use this inp as a key to find an inp in the flowhash tree. | |
251 | * Accesses to it are protected by inp_fc_lck. | |
252 | */ | |
253 | struct inpcb key_inp; | |
316670eb | 254 | |
1c79356b A |
255 | /* |
256 | * in_pcb.c: manage the Protocol Control Blocks. | |
1c79356b A |
257 | */ |
258 | ||
316670eb | 259 | void |
39236c6e | 260 | in_pcbinit(void) |
316670eb | 261 | { |
39236c6e | 262 | static int inpcb_initialized = 0; |
316670eb | 263 | |
39236c6e A |
264 | VERIFY(!inpcb_initialized); |
265 | inpcb_initialized = 1; | |
316670eb | 266 | |
39236c6e A |
267 | inpcb_lock_grp_attr = lck_grp_attr_alloc_init(); |
268 | inpcb_lock_grp = lck_grp_alloc_init("inpcb", inpcb_lock_grp_attr); | |
269 | inpcb_lock_attr = lck_attr_alloc_init(); | |
270 | lck_mtx_init(&inpcb_lock, inpcb_lock_grp, inpcb_lock_attr); | |
271 | lck_mtx_init(&inpcb_timeout_lock, inpcb_lock_grp, inpcb_lock_attr); | |
272 | ||
273 | /* | |
274 | * Initialize data structures required to deliver | |
275 | * flow advisories. | |
276 | */ | |
277 | lck_mtx_init(&inp_fc_lck, inpcb_lock_grp, inpcb_lock_attr); | |
bd504ef0 | 278 | lck_mtx_lock(&inp_fc_lck); |
316670eb | 279 | RB_INIT(&inp_fc_tree); |
bd504ef0 A |
280 | bzero(&key_inp, sizeof(key_inp)); |
281 | lck_mtx_unlock(&inp_fc_lck); | |
316670eb A |
282 | } |
283 | ||
39236c6e A |
284 | #define INPCB_HAVE_TIMER_REQ(req) (((req).intimer_lazy > 0) || \ |
285 | ((req).intimer_fast > 0) || ((req).intimer_nodelay > 0)) | |
286 | static void | |
287 | inpcb_timeout(void *arg) | |
288 | { | |
289 | #pragma unused(arg) | |
290 | struct inpcbinfo *ipi; | |
291 | boolean_t t, gc; | |
292 | struct intimercount gccnt, tmcnt; | |
293 | struct timeval leeway; | |
294 | ||
295 | /* | |
296 | * Update coarse-grained networking timestamp (in sec.); the idea | |
297 | * is to piggy-back on the timeout callout to update the counter | |
298 | * returnable via net_uptime(). | |
299 | */ | |
300 | net_update_uptime(); | |
301 | ||
302 | lck_mtx_lock_spin(&inpcb_timeout_lock); | |
303 | gc = inpcb_garbage_collecting; | |
304 | inpcb_garbage_collecting = FALSE; | |
305 | bzero(&gccnt, sizeof(gccnt)); | |
306 | bzero(&tmcnt, sizeof(tmcnt)); | |
307 | ||
308 | t = inpcb_ticking; | |
309 | inpcb_ticking = FALSE; | |
310 | ||
311 | if (gc || t) { | |
312 | lck_mtx_unlock(&inpcb_timeout_lock); | |
313 | ||
314 | lck_mtx_lock(&inpcb_lock); | |
315 | TAILQ_FOREACH(ipi, &inpcb_head, ipi_entry) { | |
316 | if (INPCB_HAVE_TIMER_REQ(ipi->ipi_gc_req)) { | |
317 | bzero(&ipi->ipi_gc_req, | |
318 | sizeof(ipi->ipi_gc_req)); | |
319 | if (gc && ipi->ipi_gc != NULL) { | |
320 | ipi->ipi_gc(ipi); | |
321 | gccnt.intimer_lazy += | |
322 | ipi->ipi_gc_req.intimer_lazy; | |
323 | gccnt.intimer_fast += | |
324 | ipi->ipi_gc_req.intimer_fast; | |
325 | gccnt.intimer_nodelay += | |
326 | ipi->ipi_gc_req.intimer_nodelay; | |
327 | } | |
328 | } | |
329 | if (INPCB_HAVE_TIMER_REQ(ipi->ipi_timer_req)) { | |
330 | bzero(&ipi->ipi_timer_req, | |
331 | sizeof(ipi->ipi_timer_req)); | |
332 | if (t && ipi->ipi_timer != NULL) { | |
333 | ipi->ipi_timer(ipi); | |
334 | tmcnt.intimer_lazy += | |
335 | ipi->ipi_timer_req.intimer_lazy; | |
336 | tmcnt.intimer_lazy += | |
337 | ipi->ipi_timer_req.intimer_fast; | |
338 | tmcnt.intimer_nodelay += | |
339 | ipi->ipi_timer_req.intimer_nodelay; | |
340 | } | |
341 | } | |
342 | } | |
343 | lck_mtx_unlock(&inpcb_lock); | |
344 | lck_mtx_lock_spin(&inpcb_timeout_lock); | |
345 | } | |
346 | ||
347 | /* lock was dropped above, so check first before overriding */ | |
348 | if (!inpcb_garbage_collecting) | |
349 | inpcb_garbage_collecting = INPCB_HAVE_TIMER_REQ(gccnt); | |
350 | if (!inpcb_ticking) | |
351 | inpcb_ticking = INPCB_HAVE_TIMER_REQ(tmcnt); | |
352 | ||
353 | /* re-arm the timer if there's work to do */ | |
354 | inpcb_timeout_run--; | |
355 | VERIFY(inpcb_timeout_run >= 0 && inpcb_timeout_run < 2); | |
356 | ||
357 | bzero(&leeway, sizeof(leeway)); | |
358 | leeway.tv_sec = inpcb_timeout_lazy; | |
359 | if (gccnt.intimer_nodelay > 0 || tmcnt.intimer_nodelay > 0) | |
360 | inpcb_sched_timeout(NULL); | |
361 | else if ((gccnt.intimer_fast + tmcnt.intimer_fast) <= 5) | |
362 | /* be lazy when idle with little activity */ | |
363 | inpcb_sched_timeout(&leeway); | |
364 | else | |
365 | inpcb_sched_timeout(NULL); | |
366 | ||
367 | lck_mtx_unlock(&inpcb_timeout_lock); | |
368 | } | |
369 | ||
370 | static void | |
371 | inpcb_sched_timeout(struct timeval *leeway) | |
372 | { | |
373 | lck_mtx_assert(&inpcb_timeout_lock, LCK_MTX_ASSERT_OWNED); | |
374 | ||
375 | if (inpcb_timeout_run == 0 && | |
376 | (inpcb_garbage_collecting || inpcb_ticking)) { | |
377 | lck_mtx_convert_spin(&inpcb_timeout_lock); | |
378 | inpcb_timeout_run++; | |
379 | if (leeway == NULL) { | |
380 | inpcb_fast_timer_on = TRUE; | |
381 | timeout(inpcb_timeout, NULL, hz); | |
382 | } else { | |
383 | inpcb_fast_timer_on = FALSE; | |
384 | timeout_with_leeway(inpcb_timeout, NULL, hz, | |
385 | tvtohz(leeway)); | |
386 | } | |
387 | } else if (inpcb_timeout_run == 1 && | |
388 | leeway == NULL && !inpcb_fast_timer_on) { | |
389 | /* | |
390 | * Since the request was for a fast timer but the | |
391 | * scheduled timer is a lazy timer, try to schedule | |
392 | * another instance of fast timer also | |
393 | */ | |
394 | lck_mtx_convert_spin(&inpcb_timeout_lock); | |
395 | inpcb_timeout_run++; | |
396 | inpcb_fast_timer_on = TRUE; | |
397 | timeout(inpcb_timeout, NULL, hz); | |
398 | } | |
399 | } | |
400 | ||
401 | void | |
402 | inpcb_gc_sched(struct inpcbinfo *ipi, u_int32_t type) | |
403 | { | |
404 | struct timeval leeway; | |
405 | lck_mtx_lock_spin(&inpcb_timeout_lock); | |
406 | inpcb_garbage_collecting = TRUE; | |
407 | switch (type) { | |
408 | case INPCB_TIMER_NODELAY: | |
409 | atomic_add_32(&ipi->ipi_gc_req.intimer_nodelay, 1); | |
410 | inpcb_sched_timeout(NULL); | |
411 | break; | |
412 | case INPCB_TIMER_FAST: | |
413 | atomic_add_32(&ipi->ipi_gc_req.intimer_fast, 1); | |
414 | inpcb_sched_timeout(NULL); | |
415 | break; | |
416 | default: | |
417 | atomic_add_32(&ipi->ipi_gc_req.intimer_lazy, 1); | |
418 | leeway.tv_sec = inpcb_timeout_lazy; | |
419 | leeway.tv_usec = 0; | |
420 | inpcb_sched_timeout(&leeway); | |
421 | break; | |
422 | } | |
423 | lck_mtx_unlock(&inpcb_timeout_lock); | |
424 | } | |
425 | ||
426 | void | |
427 | inpcb_timer_sched(struct inpcbinfo *ipi, u_int32_t type) | |
428 | { | |
429 | struct timeval leeway; | |
430 | lck_mtx_lock_spin(&inpcb_timeout_lock); | |
431 | inpcb_ticking = TRUE; | |
432 | switch (type) { | |
433 | case INPCB_TIMER_NODELAY: | |
434 | atomic_add_32(&ipi->ipi_timer_req.intimer_nodelay, 1); | |
435 | inpcb_sched_timeout(NULL); | |
436 | break; | |
437 | case INPCB_TIMER_FAST: | |
438 | atomic_add_32(&ipi->ipi_timer_req.intimer_fast, 1); | |
439 | inpcb_sched_timeout(NULL); | |
440 | break; | |
441 | default: | |
442 | atomic_add_32(&ipi->ipi_timer_req.intimer_lazy, 1); | |
443 | leeway.tv_sec = inpcb_timeout_lazy; | |
444 | leeway.tv_usec = 0; | |
445 | inpcb_sched_timeout(&leeway); | |
446 | break; | |
447 | } | |
448 | lck_mtx_unlock(&inpcb_timeout_lock); | |
449 | } | |
450 | ||
451 | void | |
452 | in_pcbinfo_attach(struct inpcbinfo *ipi) | |
453 | { | |
454 | struct inpcbinfo *ipi0; | |
455 | ||
456 | lck_mtx_lock(&inpcb_lock); | |
457 | TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) { | |
458 | if (ipi0 == ipi) { | |
459 | panic("%s: ipi %p already in the list\n", | |
460 | __func__, ipi); | |
461 | /* NOTREACHED */ | |
462 | } | |
463 | } | |
464 | TAILQ_INSERT_TAIL(&inpcb_head, ipi, ipi_entry); | |
465 | lck_mtx_unlock(&inpcb_lock); | |
466 | } | |
467 | ||
468 | int | |
469 | in_pcbinfo_detach(struct inpcbinfo *ipi) | |
470 | { | |
471 | struct inpcbinfo *ipi0; | |
472 | int error = 0; | |
473 | ||
474 | lck_mtx_lock(&inpcb_lock); | |
475 | TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) { | |
476 | if (ipi0 == ipi) | |
477 | break; | |
478 | } | |
479 | if (ipi0 != NULL) | |
480 | TAILQ_REMOVE(&inpcb_head, ipi0, ipi_entry); | |
481 | else | |
482 | error = ENXIO; | |
483 | lck_mtx_unlock(&inpcb_lock); | |
484 | ||
485 | return (error); | |
486 | } | |
487 | ||
1c79356b A |
488 | /* |
489 | * Allocate a PCB and associate it with the socket. | |
2d21ac55 A |
490 | * |
491 | * Returns: 0 Success | |
492 | * ENOBUFS | |
493 | * ENOMEM | |
494 | * ipsec_init_policy:??? [IPSEC] | |
1c79356b A |
495 | */ |
496 | int | |
39236c6e | 497 | in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p) |
1c79356b | 498 | { |
39236c6e | 499 | #pragma unused(p) |
2d21ac55 | 500 | struct inpcb *inp; |
39236c6e | 501 | caddr_t temp; |
2d21ac55 A |
502 | #if CONFIG_MACF_NET |
503 | int mac_error; | |
39236c6e | 504 | #endif /* CONFIG_MACF_NET */ |
1c79356b | 505 | |
39236c6e A |
506 | if (!so->cached_in_sock_layer) { |
507 | inp = (struct inpcb *)zalloc(pcbinfo->ipi_zone); | |
508 | if (inp == NULL) | |
509 | return (ENOBUFS); | |
510 | bzero((caddr_t)inp, sizeof (*inp)); | |
511 | } else { | |
512 | inp = (struct inpcb *)(void *)so->so_saved_pcb; | |
513 | temp = inp->inp_saved_ppcb; | |
514 | bzero((caddr_t)inp, sizeof (*inp)); | |
515 | inp->inp_saved_ppcb = temp; | |
1c79356b A |
516 | } |
517 | ||
518 | inp->inp_gencnt = ++pcbinfo->ipi_gencnt; | |
519 | inp->inp_pcbinfo = pcbinfo; | |
520 | inp->inp_socket = so; | |
2d21ac55 A |
521 | #if CONFIG_MACF_NET |
522 | mac_error = mac_inpcb_label_init(inp, M_WAITOK); | |
523 | if (mac_error != 0) { | |
39236c6e | 524 | if (!so->cached_in_sock_layer) |
2d21ac55 A |
525 | zfree(pcbinfo->ipi_zone, inp); |
526 | return (mac_error); | |
527 | } | |
528 | mac_inpcb_label_associate(so, inp); | |
39236c6e A |
529 | #endif /* CONFIG_MACF_NET */ |
530 | /* make sure inp_stat is always 64-bit aligned */ | |
531 | inp->inp_stat = (struct inp_stat *)P2ROUNDUP(inp->inp_stat_store, | |
532 | sizeof (u_int64_t)); | |
533 | if (((uintptr_t)inp->inp_stat - (uintptr_t)inp->inp_stat_store) + | |
534 | sizeof (*inp->inp_stat) > sizeof (inp->inp_stat_store)) { | |
535 | panic("%s: insufficient space to align inp_stat", __func__); | |
536 | /* NOTREACHED */ | |
537 | } | |
538 | ||
539 | /* make sure inp_cstat is always 64-bit aligned */ | |
540 | inp->inp_cstat = (struct inp_stat *)P2ROUNDUP(inp->inp_cstat_store, | |
541 | sizeof (u_int64_t)); | |
542 | if (((uintptr_t)inp->inp_cstat - (uintptr_t)inp->inp_cstat_store) + | |
543 | sizeof (*inp->inp_cstat) > sizeof (inp->inp_cstat_store)) { | |
544 | panic("%s: insufficient space to align inp_cstat", __func__); | |
545 | /* NOTREACHED */ | |
546 | } | |
547 | ||
548 | /* make sure inp_wstat is always 64-bit aligned */ | |
549 | inp->inp_wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_wstat_store, | |
550 | sizeof (u_int64_t)); | |
551 | if (((uintptr_t)inp->inp_wstat - (uintptr_t)inp->inp_wstat_store) + | |
552 | sizeof (*inp->inp_wstat) > sizeof (inp->inp_wstat_store)) { | |
553 | panic("%s: insufficient space to align inp_wstat", __func__); | |
554 | /* NOTREACHED */ | |
6d2010ae A |
555 | } |
556 | ||
91447636 A |
557 | so->so_pcb = (caddr_t)inp; |
558 | ||
559 | if (so->so_proto->pr_flags & PR_PCBLOCK) { | |
39236c6e A |
560 | lck_mtx_init(&inp->inpcb_mtx, pcbinfo->ipi_lock_grp, |
561 | pcbinfo->ipi_lock_attr); | |
91447636 A |
562 | } |
563 | ||
39236c6e | 564 | |
2d21ac55 | 565 | #if INET6 |
39236c6e | 566 | if (SOCK_DOM(so) == PF_INET6 && !ip6_mapped_addr_on) |
9bccf70c | 567 | inp->inp_flags |= IN6P_IPV6_V6ONLY; |
39236c6e | 568 | |
9bccf70c A |
569 | if (ip6_auto_flowlabel) |
570 | inp->inp_flags |= IN6P_AUTOFLOWLABEL; | |
39236c6e A |
571 | #endif /* INET6 */ |
572 | ||
573 | (void) inp_update_policy(inp); | |
574 | ||
575 | lck_rw_lock_exclusive(pcbinfo->ipi_lock); | |
91447636 | 576 | inp->inp_gencnt = ++pcbinfo->ipi_gencnt; |
39236c6e | 577 | LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list); |
91447636 | 578 | pcbinfo->ipi_count++; |
39236c6e | 579 | lck_rw_done(pcbinfo->ipi_lock); |
1c79356b A |
580 | return (0); |
581 | } | |
582 | ||
2d21ac55 | 583 | /* |
39236c6e A |
584 | * in_pcblookup_local_and_cleanup does everything |
585 | * in_pcblookup_local does but it checks for a socket | |
586 | * that's going away. Since we know that the lock is | |
587 | * held read+write when this funciton is called, we | |
588 | * can safely dispose of this socket like the slow | |
589 | * timer would usually do and return NULL. This is | |
590 | * great for bind. | |
591 | */ | |
592 | struct inpcb * | |
593 | in_pcblookup_local_and_cleanup(struct inpcbinfo *pcbinfo, struct in_addr laddr, | |
594 | u_int lport_arg, int wild_okay) | |
2d21ac55 A |
595 | { |
596 | struct inpcb *inp; | |
39236c6e | 597 | |
2d21ac55 A |
598 | /* Perform normal lookup */ |
599 | inp = in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay); | |
39236c6e | 600 | |
2d21ac55 | 601 | /* Check if we found a match but it's waiting to be disposed */ |
39236c6e | 602 | if (inp != NULL && inp->inp_wantcnt == WNT_STOPUSING) { |
2d21ac55 | 603 | struct socket *so = inp->inp_socket; |
39236c6e | 604 | |
6d2010ae | 605 | lck_mtx_lock(&inp->inpcb_mtx); |
39236c6e | 606 | |
2d21ac55 | 607 | if (so->so_usecount == 0) { |
b0d623f7 A |
608 | if (inp->inp_state != INPCB_STATE_DEAD) |
609 | in_pcbdetach(inp); | |
39236c6e | 610 | in_pcbdispose(inp); /* will unlock & destroy */ |
2d21ac55 | 611 | inp = NULL; |
39236c6e | 612 | } else { |
6d2010ae | 613 | lck_mtx_unlock(&inp->inpcb_mtx); |
2d21ac55 A |
614 | } |
615 | } | |
39236c6e A |
616 | |
617 | return (inp); | |
2d21ac55 A |
618 | } |
619 | ||
c910b4d9 | 620 | static void |
2d21ac55 A |
621 | in_pcb_conflict_post_msg(u_int16_t port) |
622 | { | |
39236c6e A |
623 | /* |
624 | * Radar 5523020 send a kernel event notification if a | |
625 | * non-participating socket tries to bind the port a socket | |
626 | * who has set SOF_NOTIFYCONFLICT owns. | |
2d21ac55 | 627 | */ |
39236c6e | 628 | struct kev_msg ev_msg; |
2d21ac55 A |
629 | struct kev_in_portinuse in_portinuse; |
630 | ||
39236c6e A |
631 | bzero(&in_portinuse, sizeof (struct kev_in_portinuse)); |
632 | bzero(&ev_msg, sizeof (struct kev_msg)); | |
2d21ac55 A |
633 | in_portinuse.port = ntohs(port); /* port in host order */ |
634 | in_portinuse.req_pid = proc_selfpid(); | |
635 | ev_msg.vendor_code = KEV_VENDOR_APPLE; | |
636 | ev_msg.kev_class = KEV_NETWORK_CLASS; | |
637 | ev_msg.kev_subclass = KEV_INET_SUBCLASS; | |
638 | ev_msg.event_code = KEV_INET_PORTINUSE; | |
639 | ev_msg.dv[0].data_ptr = &in_portinuse; | |
39236c6e | 640 | ev_msg.dv[0].data_length = sizeof (struct kev_in_portinuse); |
2d21ac55 A |
641 | ev_msg.dv[1].data_length = 0; |
642 | kev_post_msg(&ev_msg); | |
643 | } | |
39236c6e | 644 | |
2d21ac55 | 645 | /* |
39236c6e A |
646 | * Bind an INPCB to an address and/or port. This routine should not alter |
647 | * the caller-supplied local address "nam". | |
648 | * | |
2d21ac55 A |
649 | * Returns: 0 Success |
650 | * EADDRNOTAVAIL Address not available. | |
651 | * EINVAL Invalid argument | |
652 | * EAFNOSUPPORT Address family not supported [notdef] | |
653 | * EACCES Permission denied | |
654 | * EADDRINUSE Address in use | |
655 | * EAGAIN Resource unavailable, try again | |
6d2010ae | 656 | * priv_check_cred:EPERM Operation not permitted |
2d21ac55 | 657 | */ |
1c79356b | 658 | int |
2d21ac55 | 659 | in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) |
1c79356b | 660 | { |
2d21ac55 | 661 | struct socket *so = inp->inp_socket; |
9bccf70c | 662 | unsigned short *lastport; |
1c79356b | 663 | struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; |
b0d623f7 | 664 | u_short lport = 0, rand_port = 0; |
1c79356b | 665 | int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); |
b0d623f7 | 666 | int error, randomport, conflict = 0; |
6d2010ae | 667 | kauth_cred_t cred; |
1c79356b A |
668 | |
669 | if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */ | |
670 | return (EADDRNOTAVAIL); | |
39236c6e | 671 | if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) |
1c79356b | 672 | return (EINVAL); |
39236c6e | 673 | if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT))) |
1c79356b | 674 | wild = 1; |
91447636 | 675 | socket_unlock(so, 0); /* keep reference on socket */ |
39236c6e A |
676 | lck_rw_lock_exclusive(pcbinfo->ipi_lock); |
677 | if (nam != NULL) { | |
316670eb | 678 | struct ifnet *outif = NULL; |
6d2010ae | 679 | |
39236c6e A |
680 | if (nam->sa_len != sizeof (struct sockaddr_in)) { |
681 | lck_rw_done(pcbinfo->ipi_lock); | |
91447636 | 682 | socket_lock(so, 0); |
1c79356b | 683 | return (EINVAL); |
91447636 | 684 | } |
39236c6e | 685 | #if 0 |
1c79356b A |
686 | /* |
687 | * We should check the family, but old programs | |
688 | * incorrectly fail to initialize it. | |
689 | */ | |
39236c6e A |
690 | if (nam->sa_family != AF_INET) { |
691 | lck_rw_done(pcbinfo->ipi_lock); | |
91447636 | 692 | socket_lock(so, 0); |
1c79356b | 693 | return (EAFNOSUPPORT); |
91447636 | 694 | } |
39236c6e A |
695 | #endif /* 0 */ |
696 | lport = SIN(nam)->sin_port; | |
697 | ||
698 | if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr))) { | |
1c79356b A |
699 | /* |
700 | * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; | |
701 | * allow complete duplication of binding if | |
702 | * SO_REUSEPORT is set, or if SO_REUSEADDR is set | |
703 | * and a multicast address is bound on both | |
704 | * new and duplicated sockets. | |
705 | */ | |
706 | if (so->so_options & SO_REUSEADDR) | |
707 | reuseport = SO_REUSEADDR|SO_REUSEPORT; | |
39236c6e A |
708 | } else if (SIN(nam)->sin_addr.s_addr != INADDR_ANY) { |
709 | struct sockaddr_in sin; | |
91447636 | 710 | struct ifaddr *ifa; |
39236c6e A |
711 | |
712 | /* Sanitized for interface address searches */ | |
713 | bzero(&sin, sizeof (sin)); | |
714 | sin.sin_family = AF_INET; | |
715 | sin.sin_len = sizeof (struct sockaddr_in); | |
716 | sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr; | |
717 | ||
718 | ifa = ifa_ifwithaddr(SA(&sin)); | |
719 | if (ifa == NULL) { | |
720 | lck_rw_done(pcbinfo->ipi_lock); | |
91447636 | 721 | socket_lock(so, 0); |
1c79356b | 722 | return (EADDRNOTAVAIL); |
39236c6e A |
723 | } else { |
724 | /* | |
725 | * Opportunistically determine the outbound | |
726 | * interface that may be used; this may not | |
727 | * hold true if we end up using a route | |
728 | * going over a different interface, e.g. | |
729 | * when sending to a local address. This | |
730 | * will get updated again after sending. | |
731 | */ | |
6d2010ae | 732 | IFA_LOCK(ifa); |
316670eb | 733 | outif = ifa->ifa_ifp; |
6d2010ae A |
734 | IFA_UNLOCK(ifa); |
735 | IFA_REMREF(ifa); | |
91447636 | 736 | } |
1c79356b | 737 | } |
39236c6e | 738 | if (lport != 0) { |
1c79356b | 739 | struct inpcb *t; |
39236c6e | 740 | uid_t u; |
1c79356b A |
741 | |
742 | /* GROSS */ | |
6d2010ae A |
743 | if (ntohs(lport) < IPPORT_RESERVED) { |
744 | cred = kauth_cred_proc_ref(p); | |
39236c6e A |
745 | error = priv_check_cred(cred, |
746 | PRIV_NETINET_RESERVEDPORT, 0); | |
6d2010ae A |
747 | kauth_cred_unref(&cred); |
748 | if (error != 0) { | |
39236c6e | 749 | lck_rw_done(pcbinfo->ipi_lock); |
6d2010ae A |
750 | socket_lock(so, 0); |
751 | return (EACCES); | |
752 | } | |
91447636 | 753 | } |
39236c6e A |
754 | if (!IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) && |
755 | (u = kauth_cred_getuid(so->so_cred)) != 0 && | |
756 | (t = in_pcblookup_local_and_cleanup( | |
757 | inp->inp_pcbinfo, SIN(nam)->sin_addr, lport, | |
758 | INPLOOKUP_WILDCARD)) != NULL && | |
759 | (SIN(nam)->sin_addr.s_addr != INADDR_ANY || | |
760 | t->inp_laddr.s_addr != INADDR_ANY || | |
761 | !(t->inp_socket->so_options & SO_REUSEPORT)) && | |
762 | (u != kauth_cred_getuid(t->inp_socket->so_cred)) && | |
763 | !(t->inp_socket->so_flags & SOF_REUSESHAREUID) && | |
764 | (SIN(nam)->sin_addr.s_addr != INADDR_ANY || | |
765 | t->inp_laddr.s_addr != INADDR_ANY)) { | |
766 | if ((t->inp_socket->so_flags & | |
767 | SOF_NOTIFYCONFLICT) && | |
768 | !(so->so_flags & SOF_NOTIFYCONFLICT)) | |
769 | conflict = 1; | |
770 | ||
771 | lck_rw_done(pcbinfo->ipi_lock); | |
772 | ||
773 | if (conflict) | |
774 | in_pcb_conflict_post_msg(lport); | |
2d21ac55 | 775 | |
39236c6e A |
776 | socket_lock(so, 0); |
777 | return (EADDRINUSE); | |
1c79356b | 778 | } |
39236c6e A |
779 | t = in_pcblookup_local_and_cleanup(pcbinfo, |
780 | SIN(nam)->sin_addr, lport, wild); | |
781 | if (t != NULL && | |
1c79356b A |
782 | (reuseport & t->inp_socket->so_options) == 0) { |
783 | #if INET6 | |
39236c6e A |
784 | if (SIN(nam)->sin_addr.s_addr != INADDR_ANY || |
785 | t->inp_laddr.s_addr != INADDR_ANY || | |
786 | SOCK_DOM(so) != PF_INET6 || | |
787 | SOCK_DOM(t->inp_socket) != PF_INET6) | |
2d21ac55 A |
788 | #endif /* INET6 */ |
789 | { | |
2d21ac55 | 790 | |
39236c6e A |
791 | if ((t->inp_socket->so_flags & |
792 | SOF_NOTIFYCONFLICT) && | |
793 | !(so->so_flags & SOF_NOTIFYCONFLICT)) | |
2d21ac55 A |
794 | conflict = 1; |
795 | ||
39236c6e | 796 | lck_rw_done(pcbinfo->ipi_lock); |
2d21ac55 A |
797 | |
798 | if (conflict) | |
799 | in_pcb_conflict_post_msg(lport); | |
91447636 A |
800 | socket_lock(so, 0); |
801 | return (EADDRINUSE); | |
802 | } | |
1c79356b A |
803 | } |
804 | } | |
39236c6e | 805 | inp->inp_laddr = SIN(nam)->sin_addr; |
316670eb | 806 | inp->inp_last_outifp = outif; |
1c79356b A |
807 | } |
808 | if (lport == 0) { | |
809 | u_short first, last; | |
810 | int count; | |
811 | ||
39236c6e A |
812 | randomport = (so->so_flags & SOF_BINDRANDOMPORT) || |
813 | (so->so_type == SOCK_STREAM ? tcp_use_randomport : | |
814 | udp_use_randomport); | |
815 | ||
816 | /* | |
817 | * TODO: | |
818 | * | |
819 | * The following should be moved into its own routine and | |
820 | * thus can be shared with in6_pcbsetport(); the latter | |
821 | * currently duplicates the logic. | |
822 | */ | |
b0d623f7 | 823 | |
1c79356b A |
824 | inp->inp_flags |= INP_ANONPORT; |
825 | ||
826 | if (inp->inp_flags & INP_HIGHPORT) { | |
827 | first = ipport_hifirstauto; /* sysctl */ | |
828 | last = ipport_hilastauto; | |
39236c6e | 829 | lastport = &pcbinfo->ipi_lasthi; |
1c79356b | 830 | } else if (inp->inp_flags & INP_LOWPORT) { |
6d2010ae | 831 | cred = kauth_cred_proc_ref(p); |
39236c6e A |
832 | error = priv_check_cred(cred, |
833 | PRIV_NETINET_RESERVEDPORT, 0); | |
6d2010ae A |
834 | kauth_cred_unref(&cred); |
835 | if (error != 0) { | |
39236c6e | 836 | lck_rw_done(pcbinfo->ipi_lock); |
91447636 | 837 | socket_lock(so, 0); |
39236c6e | 838 | return (error); |
91447636 | 839 | } |
1c79356b A |
840 | first = ipport_lowfirstauto; /* 1023 */ |
841 | last = ipport_lowlastauto; /* 600 */ | |
39236c6e | 842 | lastport = &pcbinfo->ipi_lastlow; |
1c79356b A |
843 | } else { |
844 | first = ipport_firstauto; /* sysctl */ | |
845 | last = ipport_lastauto; | |
39236c6e | 846 | lastport = &pcbinfo->ipi_lastport; |
1c79356b | 847 | } |
b0d623f7 A |
848 | /* No point in randomizing if only one port is available */ |
849 | ||
850 | if (first == last) | |
39236c6e | 851 | randomport = 0; |
1c79356b A |
852 | /* |
853 | * Simple check to ensure all ports are not used up causing | |
854 | * a deadlock here. | |
855 | * | |
856 | * We split the two cases (up and down) so that the direction | |
857 | * is not being tested on each round of the loop. | |
858 | */ | |
859 | if (first > last) { | |
860 | /* | |
861 | * counting down | |
862 | */ | |
b0d623f7 | 863 | if (randomport) { |
39236c6e A |
864 | read_random(&rand_port, sizeof (rand_port)); |
865 | *lastport = | |
866 | first - (rand_port % (first - last)); | |
b0d623f7 | 867 | } |
1c79356b A |
868 | count = first - last; |
869 | ||
870 | do { | |
871 | if (count-- < 0) { /* completely used? */ | |
39236c6e | 872 | lck_rw_done(pcbinfo->ipi_lock); |
91447636 | 873 | socket_lock(so, 0); |
1c79356b | 874 | inp->inp_laddr.s_addr = INADDR_ANY; |
316670eb | 875 | inp->inp_last_outifp = NULL; |
9bccf70c | 876 | return (EADDRNOTAVAIL); |
1c79356b A |
877 | } |
878 | --*lastport; | |
879 | if (*lastport > first || *lastport < last) | |
880 | *lastport = first; | |
881 | lport = htons(*lastport); | |
2d21ac55 | 882 | } while (in_pcblookup_local_and_cleanup(pcbinfo, |
39236c6e | 883 | inp->inp_laddr, lport, wild)); |
1c79356b A |
884 | } else { |
885 | /* | |
886 | * counting up | |
887 | */ | |
b0d623f7 | 888 | if (randomport) { |
39236c6e A |
889 | read_random(&rand_port, sizeof (rand_port)); |
890 | *lastport = | |
891 | first + (rand_port % (first - last)); | |
b0d623f7 | 892 | } |
1c79356b A |
893 | count = last - first; |
894 | ||
895 | do { | |
896 | if (count-- < 0) { /* completely used? */ | |
39236c6e | 897 | lck_rw_done(pcbinfo->ipi_lock); |
91447636 | 898 | socket_lock(so, 0); |
1c79356b | 899 | inp->inp_laddr.s_addr = INADDR_ANY; |
316670eb | 900 | inp->inp_last_outifp = NULL; |
9bccf70c | 901 | return (EADDRNOTAVAIL); |
1c79356b A |
902 | } |
903 | ++*lastport; | |
904 | if (*lastport < first || *lastport > last) | |
905 | *lastport = first; | |
906 | lport = htons(*lastport); | |
2d21ac55 | 907 | } while (in_pcblookup_local_and_cleanup(pcbinfo, |
39236c6e | 908 | inp->inp_laddr, lport, wild)); |
1c79356b A |
909 | } |
910 | } | |
91447636 | 911 | socket_lock(so, 0); |
1c79356b | 912 | inp->inp_lport = lport; |
91447636 | 913 | if (in_pcbinshash(inp, 1) != 0) { |
1c79356b A |
914 | inp->inp_laddr.s_addr = INADDR_ANY; |
915 | inp->inp_lport = 0; | |
316670eb | 916 | inp->inp_last_outifp = NULL; |
39236c6e | 917 | lck_rw_done(pcbinfo->ipi_lock); |
1c79356b A |
918 | return (EAGAIN); |
919 | } | |
39236c6e | 920 | lck_rw_done(pcbinfo->ipi_lock); |
2d21ac55 | 921 | sflt_notify(so, sock_evt_bound, NULL); |
1c79356b A |
922 | return (0); |
923 | } | |
924 | ||
925 | /* | |
39236c6e A |
926 | * Transform old in_pcbconnect() into an inner subroutine for new |
927 | * in_pcbconnect(); do some validity-checking on the remote address | |
928 | * (in "nam") and then determine local host address (i.e., which | |
929 | * interface) to use to access that remote host. | |
930 | * | |
931 | * This routine may alter the caller-supplied remote address "nam". | |
1c79356b | 932 | * |
39236c6e A |
933 | * The caller may override the bound-to-interface setting of the socket |
934 | * by specifying the ifscope parameter (e.g. from IP_PKTINFO.) | |
935 | * | |
936 | * This routine might return an ifp with a reference held if the caller | |
937 | * provides a non-NULL outif, even in the error case. The caller is | |
938 | * responsible for releasing its reference. | |
2d21ac55 A |
939 | * |
940 | * Returns: 0 Success | |
941 | * EINVAL Invalid argument | |
942 | * EAFNOSUPPORT Address family not supported | |
943 | * EADDRNOTAVAIL Address not available | |
1c79356b | 944 | */ |
1c79356b | 945 | int |
39236c6e A |
946 | in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr, |
947 | unsigned int ifscope, struct ifnet **outif) | |
1c79356b | 948 | { |
39236c6e A |
949 | boolean_t nocell = (inp->inp_flags & INP_NO_IFT_CELLULAR); |
950 | struct route *ro = &inp->inp_route; | |
951 | struct in_ifaddr *ia = NULL; | |
952 | struct sockaddr_in sin; | |
953 | int error = 0; | |
954 | ||
955 | if (outif != NULL) | |
956 | *outif = NULL; | |
957 | if (nam->sa_len != sizeof (struct sockaddr_in)) | |
1c79356b | 958 | return (EINVAL); |
39236c6e | 959 | if (SIN(nam)->sin_family != AF_INET) |
1c79356b | 960 | return (EAFNOSUPPORT); |
39236c6e | 961 | if (SIN(nam)->sin_port == 0) |
1c79356b | 962 | return (EADDRNOTAVAIL); |
b0d623f7 | 963 | |
39236c6e A |
964 | /* |
965 | * If the destination address is INADDR_ANY, | |
966 | * use the primary local address. | |
967 | * If the supplied address is INADDR_BROADCAST, | |
968 | * and the primary interface supports broadcast, | |
969 | * choose the broadcast address for that interface. | |
970 | */ | |
971 | if (SIN(nam)->sin_addr.s_addr == INADDR_ANY || | |
972 | SIN(nam)->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST) { | |
973 | lck_rw_lock_shared(in_ifaddr_rwlock); | |
974 | if (!TAILQ_EMPTY(&in_ifaddrhead)) { | |
975 | ia = TAILQ_FIRST(&in_ifaddrhead); | |
976 | IFA_LOCK_SPIN(&ia->ia_ifa); | |
977 | if (SIN(nam)->sin_addr.s_addr == INADDR_ANY) { | |
978 | SIN(nam)->sin_addr = IA_SIN(ia)->sin_addr; | |
979 | } else if (ia->ia_ifp->if_flags & IFF_BROADCAST) { | |
980 | SIN(nam)->sin_addr = | |
981 | SIN(&ia->ia_broadaddr)->sin_addr; | |
982 | } | |
983 | IFA_UNLOCK(&ia->ia_ifa); | |
984 | ia = NULL; | |
985 | } | |
986 | lck_rw_done(in_ifaddr_rwlock); | |
987 | } | |
988 | /* | |
989 | * Otherwise, if the socket has already bound the source, just use it. | |
990 | */ | |
991 | if (inp->inp_laddr.s_addr != INADDR_ANY) { | |
992 | VERIFY(ia == NULL); | |
993 | *laddr = inp->inp_laddr; | |
994 | return (0); | |
1c79356b | 995 | } |
6d2010ae | 996 | |
39236c6e A |
997 | /* |
998 | * If the ifscope is specified by the caller (e.g. IP_PKTINFO) | |
999 | * then it overrides the sticky ifscope set for the socket. | |
1000 | */ | |
1001 | if (ifscope == IFSCOPE_NONE && (inp->inp_flags & INP_BOUND_IF)) | |
1002 | ifscope = inp->inp_boundifp->if_index; | |
6d2010ae | 1003 | |
39236c6e A |
1004 | /* |
1005 | * If route is known or can be allocated now, | |
1006 | * our src addr is taken from the i/f, else punt. | |
1007 | * Note that we should check the address family of the cached | |
1008 | * destination, in case of sharing the cache with IPv6. | |
1009 | */ | |
1010 | if (ro->ro_rt != NULL) | |
1011 | RT_LOCK_SPIN(ro->ro_rt); | |
1012 | if (ROUTE_UNUSABLE(ro) || ro->ro_dst.sa_family != AF_INET || | |
1013 | SIN(&ro->ro_dst)->sin_addr.s_addr != SIN(nam)->sin_addr.s_addr || | |
1014 | (inp->inp_socket->so_options & SO_DONTROUTE)) { | |
b0d623f7 | 1015 | if (ro->ro_rt != NULL) |
b0d623f7 | 1016 | RT_UNLOCK(ro->ro_rt); |
39236c6e A |
1017 | ROUTE_RELEASE(ro); |
1018 | } | |
1019 | if (!(inp->inp_socket->so_options & SO_DONTROUTE) && | |
1020 | (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL)) { | |
1021 | if (ro->ro_rt != NULL) | |
1022 | RT_UNLOCK(ro->ro_rt); | |
1023 | ROUTE_RELEASE(ro); | |
1024 | /* No route yet, so try to acquire one */ | |
1025 | bzero(&ro->ro_dst, sizeof (struct sockaddr_in)); | |
1026 | ro->ro_dst.sa_family = AF_INET; | |
1027 | ro->ro_dst.sa_len = sizeof (struct sockaddr_in); | |
1028 | SIN(&ro->ro_dst)->sin_addr = SIN(nam)->sin_addr; | |
1029 | rtalloc_scoped(ro, ifscope); | |
1030 | if (ro->ro_rt != NULL) | |
1031 | RT_LOCK_SPIN(ro->ro_rt); | |
1032 | } | |
1033 | /* Sanitized local copy for interface address searches */ | |
1034 | bzero(&sin, sizeof (sin)); | |
1035 | sin.sin_family = AF_INET; | |
1036 | sin.sin_len = sizeof (struct sockaddr_in); | |
1037 | sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr; | |
1038 | /* | |
1039 | * If we did not find (or use) a route, assume dest is reachable | |
1040 | * on a directly connected network and try to find a corresponding | |
1041 | * interface to take the source address from. | |
1042 | */ | |
1043 | if (ro->ro_rt == NULL) { | |
1044 | VERIFY(ia == NULL); | |
1045 | ia = ifatoia(ifa_ifwithdstaddr(SA(&sin))); | |
1046 | if (ia == NULL) | |
1047 | ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope)); | |
1048 | error = ((ia == NULL) ? ENETUNREACH : 0); | |
1049 | goto done; | |
1050 | } | |
1051 | RT_LOCK_ASSERT_HELD(ro->ro_rt); | |
1052 | /* | |
1053 | * If the outgoing interface on the route found is not | |
1054 | * a loopback interface, use the address from that interface. | |
1055 | */ | |
1056 | if (!(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) { | |
1057 | VERIFY(ia == NULL); | |
6d2010ae A |
1058 | /* |
1059 | * If the route points to a cellular interface and the | |
1060 | * caller forbids our using interfaces of such type, | |
1061 | * pretend that there is no route. | |
1062 | */ | |
39236c6e A |
1063 | if (nocell && IFNET_IS_CELLULAR(ro->ro_rt->rt_ifp)) { |
1064 | RT_UNLOCK(ro->ro_rt); | |
1065 | ROUTE_RELEASE(ro); | |
1066 | error = EHOSTUNREACH; | |
1067 | } else { | |
6d2010ae A |
1068 | /* Become a regular mutex */ |
1069 | RT_CONVERT_LOCK(ro->ro_rt); | |
39236c6e A |
1070 | ia = ifatoia(ro->ro_rt->rt_ifa); |
1071 | IFA_ADDREF(&ia->ia_ifa); | |
b0d623f7 | 1072 | RT_UNLOCK(ro->ro_rt); |
39236c6e | 1073 | error = 0; |
91447636 | 1074 | } |
39236c6e A |
1075 | goto done; |
1076 | } | |
1077 | VERIFY(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK); | |
1078 | RT_UNLOCK(ro->ro_rt); | |
1079 | /* | |
1080 | * The outgoing interface is marked with 'loopback net', so a route | |
1081 | * to ourselves is here. | |
1082 | * Try to find the interface of the destination address and then | |
1083 | * take the address from there. That interface is not necessarily | |
1084 | * a loopback interface. | |
1085 | */ | |
1086 | VERIFY(ia == NULL); | |
1087 | ia = ifatoia(ifa_ifwithdstaddr(SA(&sin))); | |
1088 | if (ia == NULL) | |
1089 | ia = ifatoia(ifa_ifwithaddr_scoped(SA(&sin), ifscope)); | |
1090 | if (ia == NULL) | |
1091 | ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope)); | |
1092 | if (ia == NULL) { | |
1093 | RT_LOCK(ro->ro_rt); | |
1094 | ia = ifatoia(ro->ro_rt->rt_ifa); | |
1095 | if (ia != NULL) | |
1096 | IFA_ADDREF(&ia->ia_ifa); | |
1097 | RT_UNLOCK(ro->ro_rt); | |
1098 | } | |
1099 | error = ((ia == NULL) ? ENETUNREACH : 0); | |
1100 | ||
1101 | done: | |
1102 | /* | |
1103 | * If the destination address is multicast and an outgoing | |
1104 | * interface has been set as a multicast option, use the | |
1105 | * address of that interface as our source address. | |
1106 | */ | |
15129b1c | 1107 | if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) && |
39236c6e A |
1108 | inp->inp_moptions != NULL) { |
1109 | struct ip_moptions *imo; | |
1110 | struct ifnet *ifp; | |
1111 | ||
1112 | imo = inp->inp_moptions; | |
1113 | IMO_LOCK(imo); | |
1114 | if (imo->imo_multicast_ifp != NULL && (ia == NULL || | |
1115 | ia->ia_ifp != imo->imo_multicast_ifp)) { | |
1116 | ifp = imo->imo_multicast_ifp; | |
1117 | if (ia != NULL) | |
6d2010ae | 1118 | IFA_REMREF(&ia->ia_ifa); |
39236c6e A |
1119 | lck_rw_lock_shared(in_ifaddr_rwlock); |
1120 | TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { | |
1121 | if (ia->ia_ifp == ifp) | |
1122 | break; | |
6d2010ae | 1123 | } |
39236c6e A |
1124 | if (ia != NULL) |
1125 | IFA_ADDREF(&ia->ia_ifa); | |
1126 | lck_rw_done(in_ifaddr_rwlock); | |
1127 | if (ia == NULL) | |
1128 | error = EADDRNOTAVAIL; | |
15129b1c A |
1129 | else |
1130 | error = 0; | |
1c79356b | 1131 | } |
39236c6e A |
1132 | IMO_UNLOCK(imo); |
1133 | } | |
1134 | /* | |
1135 | * Don't do pcblookup call here; return interface in laddr | |
1136 | * and exit to caller, that will do the lookup. | |
1137 | */ | |
1138 | if (ia != NULL) { | |
1c79356b | 1139 | /* |
39236c6e A |
1140 | * If the source address belongs to a cellular interface |
1141 | * and the socket forbids our using interfaces of such | |
1142 | * type, pretend that there is no source address. | |
1c79356b | 1143 | */ |
39236c6e A |
1144 | IFA_LOCK_SPIN(&ia->ia_ifa); |
1145 | if (nocell && IFNET_IS_CELLULAR(ia->ia_ifa.ifa_ifp)) { | |
1146 | IFA_UNLOCK(&ia->ia_ifa); | |
1147 | error = EHOSTUNREACH; | |
1148 | } else if (error == 0) { | |
1149 | *laddr = ia->ia_addr.sin_addr; | |
1150 | if (outif != NULL) { | |
1151 | struct ifnet *ifp; | |
1152 | ||
1153 | if (ro->ro_rt != NULL) | |
1154 | ifp = ro->ro_rt->rt_ifp; | |
1155 | else | |
1156 | ifp = ia->ia_ifp; | |
1157 | ||
1158 | VERIFY(ifp != NULL); | |
1159 | IFA_CONVERT_LOCK(&ia->ia_ifa); | |
1160 | ifnet_reference(ifp); /* for caller */ | |
1161 | if (*outif != NULL) | |
1162 | ifnet_release(*outif); | |
1163 | *outif = ifp; | |
1c79356b | 1164 | } |
39236c6e A |
1165 | IFA_UNLOCK(&ia->ia_ifa); |
1166 | } else { | |
1167 | IFA_UNLOCK(&ia->ia_ifa); | |
1c79356b | 1168 | } |
6d2010ae | 1169 | IFA_REMREF(&ia->ia_ifa); |
39236c6e A |
1170 | ia = NULL; |
1171 | } | |
1172 | ||
1173 | if (nocell && error == EHOSTUNREACH) { | |
1174 | soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED | | |
1175 | SO_FILT_HINT_IFDENIED)); | |
1c79356b | 1176 | } |
39236c6e A |
1177 | |
1178 | return (error); | |
1c79356b A |
1179 | } |
1180 | ||
1181 | /* | |
1182 | * Outer subroutine: | |
1183 | * Connect from a socket to a specified address. | |
1184 | * Both address and port must be specified in argument sin. | |
1185 | * If don't have a local address for this socket yet, | |
1186 | * then pick one. | |
39236c6e A |
1187 | * |
1188 | * The caller may override the bound-to-interface setting of the socket | |
1189 | * by specifying the ifscope parameter (e.g. from IP_PKTINFO.) | |
1c79356b A |
1190 | */ |
1191 | int | |
316670eb | 1192 | in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p, |
39236c6e | 1193 | unsigned int ifscope, struct ifnet **outif) |
1c79356b | 1194 | { |
39236c6e | 1195 | struct in_addr laddr; |
316670eb | 1196 | struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam; |
91447636 | 1197 | struct inpcb *pcb; |
1c79356b A |
1198 | int error; |
1199 | ||
1200 | /* | |
1201 | * Call inner routine, to assign local interface address. | |
1202 | */ | |
39236c6e A |
1203 | if ((error = in_pcbladdr(inp, nam, &laddr, ifscope, outif)) != 0) |
1204 | return (error); | |
1c79356b | 1205 | |
91447636 A |
1206 | socket_unlock(inp->inp_socket, 0); |
1207 | pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port, | |
39236c6e | 1208 | inp->inp_laddr.s_addr ? inp->inp_laddr : laddr, |
91447636 A |
1209 | inp->inp_lport, 0, NULL); |
1210 | socket_lock(inp->inp_socket, 0); | |
6d2010ae | 1211 | |
39236c6e A |
1212 | /* |
1213 | * Check if the socket is still in a valid state. When we unlock this | |
1214 | * embryonic socket, it can get aborted if another thread is closing | |
6d2010ae A |
1215 | * the listener (radar 7947600). |
1216 | */ | |
39236c6e A |
1217 | if ((inp->inp_socket->so_flags & SOF_ABORTED) != 0) |
1218 | return (ECONNREFUSED); | |
6d2010ae | 1219 | |
91447636 | 1220 | if (pcb != NULL) { |
0b4c1975 | 1221 | in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0); |
1c79356b A |
1222 | return (EADDRINUSE); |
1223 | } | |
1224 | if (inp->inp_laddr.s_addr == INADDR_ANY) { | |
9bccf70c | 1225 | if (inp->inp_lport == 0) { |
39236c6e | 1226 | error = in_pcbbind(inp, NULL, p); |
9bccf70c | 1227 | if (error) |
39236c6e | 1228 | return (error); |
9bccf70c | 1229 | } |
39236c6e A |
1230 | if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { |
1231 | /* | |
1232 | * Lock inversion issue, mostly with udp | |
1233 | * multicast packets. | |
1234 | */ | |
91447636 | 1235 | socket_unlock(inp->inp_socket, 0); |
39236c6e | 1236 | lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); |
91447636 A |
1237 | socket_lock(inp->inp_socket, 0); |
1238 | } | |
39236c6e A |
1239 | inp->inp_laddr = laddr; |
1240 | /* no reference needed */ | |
316670eb | 1241 | inp->inp_last_outifp = (outif != NULL) ? *outif : NULL; |
55e303ae | 1242 | inp->inp_flags |= INP_INADDR_ANY; |
39236c6e A |
1243 | } else { |
1244 | if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { | |
1245 | /* | |
1246 | * Lock inversion issue, mostly with udp | |
1247 | * multicast packets. | |
1248 | */ | |
91447636 | 1249 | socket_unlock(inp->inp_socket, 0); |
39236c6e | 1250 | lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); |
91447636 A |
1251 | socket_lock(inp->inp_socket, 0); |
1252 | } | |
1c79356b A |
1253 | } |
1254 | inp->inp_faddr = sin->sin_addr; | |
1255 | inp->inp_fport = sin->sin_port; | |
1256 | in_pcbrehash(inp); | |
39236c6e | 1257 | lck_rw_done(inp->inp_pcbinfo->ipi_lock); |
1c79356b A |
1258 | return (0); |
1259 | } | |
1260 | ||
1261 | void | |
2d21ac55 | 1262 | in_pcbdisconnect(struct inpcb *inp) |
1c79356b | 1263 | { |
39236c6e | 1264 | struct socket *so = inp->inp_socket; |
1c79356b A |
1265 | |
1266 | inp->inp_faddr.s_addr = INADDR_ANY; | |
1267 | inp->inp_fport = 0; | |
91447636 | 1268 | |
39236c6e A |
1269 | if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { |
1270 | /* lock inversion issue, mostly with udp multicast packets */ | |
1271 | socket_unlock(so, 0); | |
1272 | lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); | |
1273 | socket_lock(so, 0); | |
91447636 A |
1274 | } |
1275 | ||
1c79356b | 1276 | in_pcbrehash(inp); |
39236c6e A |
1277 | lck_rw_done(inp->inp_pcbinfo->ipi_lock); |
1278 | /* | |
1279 | * A multipath subflow socket would have its SS_NOFDREF set by default, | |
1280 | * so check for SOF_MP_SUBFLOW socket flag before detaching the PCB; | |
1281 | * when the socket is closed for real, SOF_MP_SUBFLOW would be cleared. | |
1282 | */ | |
1283 | if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF)) | |
1c79356b A |
1284 | in_pcbdetach(inp); |
1285 | } | |
1286 | ||
1287 | void | |
2d21ac55 | 1288 | in_pcbdetach(struct inpcb *inp) |
1c79356b A |
1289 | { |
1290 | struct socket *so = inp->inp_socket; | |
1c79356b | 1291 | |
39236c6e A |
1292 | if (so->so_pcb == NULL) { |
1293 | /* PCB has been disposed */ | |
1294 | panic("%s: inp=%p so=%p proto=%d so_pcb is null!\n", __func__, | |
1295 | inp, so, SOCK_PROTO(so)); | |
1296 | /* NOTREACHED */ | |
91447636 | 1297 | } |
ab86ba33 | 1298 | |
1c79356b | 1299 | #if IPSEC |
39236c6e A |
1300 | if (inp->inp_sp != NULL) { |
1301 | (void) ipsec4_delete_pcbpolicy(inp); | |
91447636 | 1302 | } |
39236c6e | 1303 | #endif /* IPSEC */ |
91447636 A |
1304 | |
1305 | /* mark socket state as dead */ | |
39236c6e A |
1306 | if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) { |
1307 | panic("%s: so=%p proto=%d couldn't set to STOPUSING\n", | |
1308 | __func__, so, SOCK_PROTO(so)); | |
1309 | /* NOTREACHED */ | |
1310 | } | |
1c79356b | 1311 | |
39236c6e | 1312 | if (!(so->so_flags & SOF_PCBCLEARING)) { |
6d2010ae | 1313 | struct ip_moptions *imo; |
2d21ac55 | 1314 | |
91447636 | 1315 | inp->inp_vflag = 0; |
39236c6e A |
1316 | if (inp->inp_options != NULL) { |
1317 | (void) m_free(inp->inp_options); | |
1318 | inp->inp_options = NULL; | |
91447636 | 1319 | } |
39236c6e | 1320 | ROUTE_RELEASE(&inp->inp_route); |
6d2010ae | 1321 | imo = inp->inp_moptions; |
91447636 | 1322 | inp->inp_moptions = NULL; |
6d2010ae A |
1323 | if (imo != NULL) |
1324 | IMO_REMREF(imo); | |
91447636 A |
1325 | sofreelastref(so, 0); |
1326 | inp->inp_state = INPCB_STATE_DEAD; | |
39236c6e A |
1327 | /* makes sure we're not called twice from so_close */ |
1328 | so->so_flags |= SOF_PCBCLEARING; | |
1329 | ||
1330 | inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST); | |
91447636 A |
1331 | } |
1332 | } | |
1c79356b | 1333 | |
1c79356b | 1334 | |
39236c6e A |
1335 | void |
1336 | in_pcbdispose(struct inpcb *inp) | |
91447636 A |
1337 | { |
1338 | struct socket *so = inp->inp_socket; | |
1339 | struct inpcbinfo *ipi = inp->inp_pcbinfo; | |
1340 | ||
39236c6e A |
1341 | if (so != NULL && so->so_usecount != 0) { |
1342 | panic("%s: so %p [%d,%d] usecount %d lockhistory %s\n", | |
1343 | __func__, so, SOCK_DOM(so), SOCK_TYPE(so), so->so_usecount, | |
1344 | solockhistory_nr(so)); | |
1345 | /* NOTREACHED */ | |
1346 | } else if (inp->inp_wantcnt != WNT_STOPUSING) { | |
1347 | if (so != NULL) { | |
1348 | panic_plain("%s: inp %p invalid wantcnt %d, so %p " | |
1349 | "[%d,%d] usecount %d retaincnt %d state 0x%x " | |
1350 | "flags 0x%x lockhistory %s\n", __func__, inp, | |
1351 | inp->inp_wantcnt, so, SOCK_DOM(so), SOCK_TYPE(so), | |
1352 | so->so_usecount, so->so_retaincnt, so->so_state, | |
1353 | so->so_flags, solockhistory_nr(so)); | |
1354 | /* NOTREACHED */ | |
1355 | } else { | |
1356 | panic("%s: inp %p invalid wantcnt %d no socket\n", | |
1357 | __func__, inp, inp->inp_wantcnt); | |
1358 | /* NOTREACHED */ | |
1359 | } | |
91447636 | 1360 | } |
91447636 | 1361 | |
39236c6e | 1362 | lck_rw_assert(ipi->ipi_lock, LCK_RW_ASSERT_EXCLUSIVE); |
91447636 A |
1363 | |
1364 | inp->inp_gencnt = ++ipi->ipi_gencnt; | |
316670eb | 1365 | /* access ipi in in_pcbremlists */ |
91447636 | 1366 | in_pcbremlists(inp); |
316670eb | 1367 | |
39236c6e | 1368 | if (so != NULL) { |
91447636 A |
1369 | if (so->so_proto->pr_flags & PR_PCBLOCK) { |
1370 | sofreelastref(so, 0); | |
39236c6e A |
1371 | if (so->so_rcv.sb_cc > 0 || so->so_snd.sb_cc > 0) { |
1372 | /* | |
1373 | * selthreadclear() already called | |
1374 | * during sofreelastref() above. | |
1375 | */ | |
91447636 A |
1376 | sbrelease(&so->so_rcv); |
1377 | sbrelease(&so->so_snd); | |
1378 | } | |
39236c6e A |
1379 | if (so->so_head != NULL) { |
1380 | panic("%s: so=%p head still exist\n", | |
1381 | __func__, so); | |
1382 | /* NOTREACHED */ | |
1383 | } | |
1384 | lck_mtx_unlock(&inp->inpcb_mtx); | |
1385 | lck_mtx_destroy(&inp->inpcb_mtx, ipi->ipi_lock_grp); | |
9bccf70c | 1386 | } |
39236c6e A |
1387 | /* makes sure we're not called twice from so_close */ |
1388 | so->so_flags |= SOF_PCBCLEARING; | |
1389 | so->so_saved_pcb = (caddr_t)inp; | |
1390 | so->so_pcb = NULL; | |
1391 | inp->inp_socket = NULL; | |
2d21ac55 A |
1392 | #if CONFIG_MACF_NET |
1393 | mac_inpcb_label_destroy(inp); | |
39236c6e | 1394 | #endif /* CONFIG_MACF_NET */ |
b0d623f7 A |
1395 | /* |
1396 | * In case there a route cached after a detach (possible | |
1397 | * in the tcp case), make sure that it is freed before | |
1398 | * we deallocate the structure. | |
1399 | */ | |
39236c6e A |
1400 | ROUTE_RELEASE(&inp->inp_route); |
1401 | if (!so->cached_in_sock_layer) { | |
91447636 | 1402 | zfree(ipi->ipi_zone, inp); |
55e303ae | 1403 | } |
91447636 | 1404 | sodealloc(so); |
9bccf70c | 1405 | } |
1c79356b A |
1406 | } |
1407 | ||
1408 | /* | |
39236c6e | 1409 | * The calling convention of in_getsockaddr() and in_getpeeraddr() was |
1c79356b A |
1410 | * modified to match the pru_sockaddr() and pru_peeraddr() entry points |
1411 | * in struct pr_usrreqs, so that protocols can just reference then directly | |
39236c6e | 1412 | * without the need for a wrapper function. |
1c79356b A |
1413 | */ |
1414 | int | |
39236c6e | 1415 | in_getsockaddr(struct socket *so, struct sockaddr **nam) |
1c79356b | 1416 | { |
2d21ac55 A |
1417 | struct inpcb *inp; |
1418 | struct sockaddr_in *sin; | |
1c79356b A |
1419 | |
1420 | /* | |
1421 | * Do the malloc first in case it blocks. | |
1422 | */ | |
39236c6e | 1423 | MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK); |
0b4e3aa0 | 1424 | if (sin == NULL) |
39236c6e A |
1425 | return (ENOBUFS); |
1426 | bzero(sin, sizeof (*sin)); | |
1c79356b | 1427 | sin->sin_family = AF_INET; |
39236c6e | 1428 | sin->sin_len = sizeof (*sin); |
1c79356b | 1429 | |
39236c6e | 1430 | if ((inp = sotoinpcb(so)) == NULL) { |
1c79356b | 1431 | FREE(sin, M_SONAME); |
39236c6e | 1432 | return (EINVAL); |
1c79356b A |
1433 | } |
1434 | sin->sin_port = inp->inp_lport; | |
1435 | sin->sin_addr = inp->inp_laddr; | |
1c79356b A |
1436 | |
1437 | *nam = (struct sockaddr *)sin; | |
39236c6e | 1438 | return (0); |
1c79356b A |
1439 | } |
1440 | ||
1441 | int | |
39236c6e | 1442 | in_getsockaddr_s(struct socket *so, struct sockaddr_storage *ss) |
1c79356b | 1443 | { |
39236c6e | 1444 | struct sockaddr_in *sin = SIN(ss); |
1c79356b | 1445 | struct inpcb *inp; |
1c79356b | 1446 | |
39236c6e A |
1447 | VERIFY(ss != NULL); |
1448 | bzero(ss, sizeof (*ss)); | |
1449 | ||
1c79356b | 1450 | sin->sin_family = AF_INET; |
39236c6e | 1451 | sin->sin_len = sizeof (*sin); |
1c79356b | 1452 | |
39236c6e A |
1453 | if ((inp = sotoinpcb(so)) == NULL || |
1454 | (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) | |
1455 | return (inp == NULL ? EINVAL : EPROTOTYPE); | |
1456 | ||
1457 | sin->sin_port = inp->inp_lport; | |
1458 | sin->sin_addr = inp->inp_laddr; | |
1459 | return (0); | |
1460 | } | |
1461 | ||
1462 | int | |
1463 | in_getpeeraddr(struct socket *so, struct sockaddr **nam) | |
1464 | { | |
1465 | struct inpcb *inp; | |
1466 | struct sockaddr_in *sin; | |
1467 | ||
1468 | /* | |
1469 | * Do the malloc first in case it blocks. | |
1470 | */ | |
1471 | MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK); | |
1472 | if (sin == NULL) | |
1473 | return (ENOBUFS); | |
1474 | bzero((caddr_t)sin, sizeof (*sin)); | |
1475 | sin->sin_family = AF_INET; | |
1476 | sin->sin_len = sizeof (*sin); | |
1477 | ||
1478 | if ((inp = sotoinpcb(so)) == NULL) { | |
1c79356b | 1479 | FREE(sin, M_SONAME); |
39236c6e | 1480 | return (EINVAL); |
1c79356b A |
1481 | } |
1482 | sin->sin_port = inp->inp_fport; | |
1483 | sin->sin_addr = inp->inp_faddr; | |
1c79356b A |
1484 | |
1485 | *nam = (struct sockaddr *)sin; | |
39236c6e A |
1486 | return (0); |
1487 | } | |
1488 | ||
1489 | int | |
1490 | in_getpeeraddr_s(struct socket *so, struct sockaddr_storage *ss) | |
1491 | { | |
1492 | struct sockaddr_in *sin = SIN(ss); | |
1493 | struct inpcb *inp; | |
1494 | ||
1495 | VERIFY(ss != NULL); | |
1496 | bzero(ss, sizeof (*ss)); | |
1497 | ||
1498 | sin->sin_family = AF_INET; | |
1499 | sin->sin_len = sizeof (*sin); | |
1500 | ||
1501 | if ((inp = sotoinpcb(so)) == NULL || | |
1502 | (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) { | |
1503 | return (inp == NULL ? EINVAL : EPROTOTYPE); | |
1504 | } | |
1505 | ||
1506 | sin->sin_port = inp->inp_fport; | |
1507 | sin->sin_addr = inp->inp_faddr; | |
1508 | return (0); | |
1c79356b A |
1509 | } |
1510 | ||
1c79356b | 1511 | void |
2d21ac55 | 1512 | in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr, |
39236c6e | 1513 | int errno, void (*notify)(struct inpcb *, int)) |
1c79356b | 1514 | { |
91447636 A |
1515 | struct inpcb *inp; |
1516 | ||
39236c6e | 1517 | lck_rw_lock_shared(pcbinfo->ipi_lock); |
1c79356b | 1518 | |
39236c6e | 1519 | LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) { |
9bccf70c | 1520 | #if INET6 |
39236c6e | 1521 | if (!(inp->inp_vflag & INP_IPV4)) |
1c79356b | 1522 | continue; |
39236c6e | 1523 | #endif /* INET6 */ |
1c79356b | 1524 | if (inp->inp_faddr.s_addr != faddr.s_addr || |
9bccf70c | 1525 | inp->inp_socket == NULL) |
39236c6e A |
1526 | continue; |
1527 | if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) | |
91447636 A |
1528 | continue; |
1529 | socket_lock(inp->inp_socket, 1); | |
9bccf70c | 1530 | (*notify)(inp, errno); |
39236c6e | 1531 | (void) in_pcb_checkstate(inp, WNT_RELEASE, 1); |
91447636 | 1532 | socket_unlock(inp->inp_socket, 1); |
1c79356b | 1533 | } |
39236c6e | 1534 | lck_rw_done(pcbinfo->ipi_lock); |
1c79356b A |
1535 | } |
1536 | ||
1537 | /* | |
1538 | * Check for alternatives when higher level complains | |
1539 | * about service problems. For now, invalidate cached | |
1540 | * routing information. If the route was created dynamically | |
1541 | * (by a redirect), time to try a default gateway again. | |
1542 | */ | |
1543 | void | |
2d21ac55 | 1544 | in_losing(struct inpcb *inp) |
1c79356b | 1545 | { |
39236c6e | 1546 | boolean_t release = FALSE; |
2d21ac55 | 1547 | struct rtentry *rt; |
1c79356b A |
1548 | struct rt_addrinfo info; |
1549 | ||
b0d623f7 | 1550 | if ((rt = inp->inp_route.ro_rt) != NULL) { |
39236c6e | 1551 | struct in_ifaddr *ia = NULL; |
b0d623f7 | 1552 | |
39236c6e | 1553 | bzero((caddr_t)&info, sizeof (info)); |
b0d623f7 | 1554 | RT_LOCK(rt); |
1c79356b | 1555 | info.rti_info[RTAX_DST] = |
39236c6e | 1556 | (struct sockaddr *)&inp->inp_route.ro_dst; |
1c79356b A |
1557 | info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; |
1558 | info.rti_info[RTAX_NETMASK] = rt_mask(rt); | |
1559 | rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0); | |
b0d623f7 A |
1560 | if (rt->rt_flags & RTF_DYNAMIC) { |
1561 | /* | |
1562 | * Prevent another thread from modifying rt_key, | |
1563 | * rt_gateway via rt_setgate() after rt_lock is | |
1564 | * dropped by marking the route as defunct. | |
1565 | */ | |
1566 | rt->rt_flags |= RTF_CONDEMNED; | |
1567 | RT_UNLOCK(rt); | |
1568 | (void) rtrequest(RTM_DELETE, rt_key(rt), | |
39236c6e | 1569 | rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL); |
b0d623f7 A |
1570 | } else { |
1571 | RT_UNLOCK(rt); | |
1572 | } | |
2d21ac55 | 1573 | /* if the address is gone keep the old route in the pcb */ |
39236c6e A |
1574 | if (inp->inp_laddr.s_addr != INADDR_ANY && |
1575 | (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) { | |
1576 | /* | |
1577 | * Address is around; ditch the route. A new route | |
1578 | * can be allocated the next time output is attempted. | |
1579 | */ | |
1580 | release = TRUE; | |
2d21ac55 | 1581 | } |
39236c6e A |
1582 | if (ia != NULL) |
1583 | IFA_REMREF(&ia->ia_ifa); | |
1c79356b | 1584 | } |
39236c6e A |
1585 | if (rt == NULL || release) |
1586 | ROUTE_RELEASE(&inp->inp_route); | |
1c79356b A |
1587 | } |
1588 | ||
1589 | /* | |
1590 | * After a routing change, flush old routing | |
1591 | * and allocate a (hopefully) better one. | |
1592 | */ | |
9bccf70c | 1593 | void |
39236c6e | 1594 | in_rtchange(struct inpcb *inp, int errno) |
1c79356b | 1595 | { |
39236c6e A |
1596 | #pragma unused(errno) |
1597 | boolean_t release = FALSE; | |
2d21ac55 A |
1598 | struct rtentry *rt; |
1599 | ||
1600 | if ((rt = inp->inp_route.ro_rt) != NULL) { | |
39236c6e | 1601 | struct in_ifaddr *ia = NULL; |
b0d623f7 | 1602 | |
39236c6e A |
1603 | /* if address is gone, keep the old route */ |
1604 | if (inp->inp_laddr.s_addr != INADDR_ANY && | |
1605 | (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) { | |
1606 | /* | |
1607 | * Address is around; ditch the route. A new route | |
1608 | * can be allocated the next time output is attempted. | |
1609 | */ | |
1610 | release = TRUE; | |
2d21ac55 | 1611 | } |
39236c6e A |
1612 | if (ia != NULL) |
1613 | IFA_REMREF(&ia->ia_ifa); | |
1c79356b | 1614 | } |
39236c6e A |
1615 | if (rt == NULL || release) |
1616 | ROUTE_RELEASE(&inp->inp_route); | |
1c79356b A |
1617 | } |
1618 | ||
1619 | /* | |
1620 | * Lookup a PCB based on the local address and port. | |
1621 | */ | |
1622 | struct inpcb * | |
2d21ac55 | 1623 | in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr, |
39236c6e | 1624 | unsigned int lport_arg, int wild_okay) |
1c79356b | 1625 | { |
2d21ac55 | 1626 | struct inpcb *inp; |
1c79356b A |
1627 | int matchwild = 3, wildcard; |
1628 | u_short lport = lport_arg; | |
1629 | ||
39236c6e | 1630 | KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0, 0, 0, 0, 0); |
1c79356b A |
1631 | |
1632 | if (!wild_okay) { | |
1633 | struct inpcbhead *head; | |
1634 | /* | |
1635 | * Look for an unconnected (wildcard foreign addr) PCB that | |
1636 | * matches the local address and port we're looking for. | |
1637 | */ | |
39236c6e A |
1638 | head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, |
1639 | pcbinfo->ipi_hashmask)]; | |
9bccf70c A |
1640 | LIST_FOREACH(inp, head, inp_hash) { |
1641 | #if INET6 | |
39236c6e | 1642 | if (!(inp->inp_vflag & INP_IPV4)) |
1c79356b | 1643 | continue; |
39236c6e | 1644 | #endif /* INET6 */ |
1c79356b A |
1645 | if (inp->inp_faddr.s_addr == INADDR_ANY && |
1646 | inp->inp_laddr.s_addr == laddr.s_addr && | |
1647 | inp->inp_lport == lport) { | |
1648 | /* | |
1649 | * Found. | |
1650 | */ | |
1651 | return (inp); | |
1652 | } | |
1653 | } | |
1654 | /* | |
1655 | * Not found. | |
1656 | */ | |
39236c6e | 1657 | KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0, 0, 0, 0, 0); |
1c79356b A |
1658 | return (NULL); |
1659 | } else { | |
1660 | struct inpcbporthead *porthash; | |
1661 | struct inpcbport *phd; | |
1662 | struct inpcb *match = NULL; | |
1663 | /* | |
1664 | * Best fit PCB lookup. | |
1665 | * | |
1666 | * First see if this local port is in use by looking on the | |
1667 | * port hash list. | |
1668 | */ | |
39236c6e A |
1669 | porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport, |
1670 | pcbinfo->ipi_porthashmask)]; | |
9bccf70c | 1671 | LIST_FOREACH(phd, porthash, phd_hash) { |
1c79356b A |
1672 | if (phd->phd_port == lport) |
1673 | break; | |
1674 | } | |
1675 | if (phd != NULL) { | |
1676 | /* | |
1677 | * Port is in use by one or more PCBs. Look for best | |
1678 | * fit. | |
1679 | */ | |
9bccf70c | 1680 | LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { |
1c79356b | 1681 | wildcard = 0; |
9bccf70c | 1682 | #if INET6 |
39236c6e | 1683 | if (!(inp->inp_vflag & INP_IPV4)) |
1c79356b | 1684 | continue; |
39236c6e | 1685 | #endif /* INET6 */ |
1c79356b A |
1686 | if (inp->inp_faddr.s_addr != INADDR_ANY) |
1687 | wildcard++; | |
1688 | if (inp->inp_laddr.s_addr != INADDR_ANY) { | |
1689 | if (laddr.s_addr == INADDR_ANY) | |
1690 | wildcard++; | |
39236c6e A |
1691 | else if (inp->inp_laddr.s_addr != |
1692 | laddr.s_addr) | |
1c79356b A |
1693 | continue; |
1694 | } else { | |
1695 | if (laddr.s_addr != INADDR_ANY) | |
1696 | wildcard++; | |
1697 | } | |
1698 | if (wildcard < matchwild) { | |
1699 | match = inp; | |
1700 | matchwild = wildcard; | |
1701 | if (matchwild == 0) { | |
1702 | break; | |
1703 | } | |
1704 | } | |
1705 | } | |
1706 | } | |
39236c6e A |
1707 | KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match, |
1708 | 0, 0, 0, 0); | |
1c79356b A |
1709 | return (match); |
1710 | } | |
1711 | } | |
1712 | ||
6d2010ae A |
1713 | /* |
1714 | * Check if PCB exists in hash list. | |
1715 | */ | |
1716 | int | |
39236c6e A |
1717 | in_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in_addr faddr, |
1718 | u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard, | |
1719 | uid_t *uid, gid_t *gid, struct ifnet *ifp) | |
6d2010ae A |
1720 | { |
1721 | struct inpcbhead *head; | |
1722 | struct inpcb *inp; | |
1723 | u_short fport = fport_arg, lport = lport_arg; | |
39236c6e A |
1724 | int found = 0; |
1725 | struct inpcb *local_wild = NULL; | |
1726 | #if INET6 | |
1727 | struct inpcb *local_wild_mapped = NULL; | |
1728 | #endif /* INET6 */ | |
6d2010ae A |
1729 | |
1730 | *uid = UID_MAX; | |
1731 | *gid = GID_MAX; | |
316670eb | 1732 | |
6d2010ae A |
1733 | /* |
1734 | * We may have found the pcb in the last lookup - check this first. | |
1735 | */ | |
1736 | ||
39236c6e | 1737 | lck_rw_lock_shared(pcbinfo->ipi_lock); |
6d2010ae A |
1738 | |
1739 | /* | |
1740 | * First look for an exact match. | |
1741 | */ | |
39236c6e A |
1742 | head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, |
1743 | pcbinfo->ipi_hashmask)]; | |
6d2010ae A |
1744 | LIST_FOREACH(inp, head, inp_hash) { |
1745 | #if INET6 | |
39236c6e | 1746 | if (!(inp->inp_vflag & INP_IPV4)) |
6d2010ae | 1747 | continue; |
39236c6e A |
1748 | #endif /* INET6 */ |
1749 | if (inp_restricted(inp, ifp)) | |
1750 | continue; | |
1751 | ||
1752 | if (ifp != NULL && IFNET_IS_CELLULAR(ifp) && | |
1753 | (inp->inp_flags & INP_NO_IFT_CELLULAR)) | |
316670eb A |
1754 | continue; |
1755 | ||
6d2010ae A |
1756 | if (inp->inp_faddr.s_addr == faddr.s_addr && |
1757 | inp->inp_laddr.s_addr == laddr.s_addr && | |
1758 | inp->inp_fport == fport && | |
1759 | inp->inp_lport == lport) { | |
1760 | if ((found = (inp->inp_socket != NULL))) { | |
1761 | /* | |
1762 | * Found. | |
1763 | */ | |
316670eb A |
1764 | *uid = kauth_cred_getuid( |
1765 | inp->inp_socket->so_cred); | |
1766 | *gid = kauth_cred_getgid( | |
1767 | inp->inp_socket->so_cred); | |
6d2010ae | 1768 | } |
39236c6e | 1769 | lck_rw_done(pcbinfo->ipi_lock); |
6d2010ae A |
1770 | return (found); |
1771 | } | |
1772 | } | |
6d2010ae | 1773 | |
39236c6e A |
1774 | if (!wildcard) { |
1775 | /* | |
1776 | * Not found. | |
1777 | */ | |
1778 | lck_rw_done(pcbinfo->ipi_lock); | |
1779 | return (0); | |
1780 | } | |
316670eb | 1781 | |
39236c6e A |
1782 | head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, |
1783 | pcbinfo->ipi_hashmask)]; | |
1784 | LIST_FOREACH(inp, head, inp_hash) { | |
6d2010ae | 1785 | #if INET6 |
39236c6e A |
1786 | if (!(inp->inp_vflag & INP_IPV4)) |
1787 | continue; | |
6d2010ae | 1788 | #endif /* INET6 */ |
39236c6e A |
1789 | if (inp_restricted(inp, ifp)) |
1790 | continue; | |
1791 | ||
1792 | if (ifp != NULL && IFNET_IS_CELLULAR(ifp) && | |
1793 | (inp->inp_flags & INP_NO_IFT_CELLULAR)) | |
1794 | continue; | |
1795 | ||
1796 | if (inp->inp_faddr.s_addr == INADDR_ANY && | |
1797 | inp->inp_lport == lport) { | |
1798 | if (inp->inp_laddr.s_addr == laddr.s_addr) { | |
1799 | if ((found = (inp->inp_socket != NULL))) { | |
316670eb | 1800 | *uid = kauth_cred_getuid( |
39236c6e | 1801 | inp->inp_socket->so_cred); |
316670eb | 1802 | *gid = kauth_cred_getgid( |
39236c6e | 1803 | inp->inp_socket->so_cred); |
6d2010ae | 1804 | } |
39236c6e | 1805 | lck_rw_done(pcbinfo->ipi_lock); |
6d2010ae | 1806 | return (found); |
39236c6e A |
1807 | } else if (inp->inp_laddr.s_addr == INADDR_ANY) { |
1808 | #if INET6 | |
1809 | if (inp->inp_socket && | |
1810 | SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) | |
1811 | local_wild_mapped = inp; | |
1812 | else | |
6d2010ae | 1813 | #endif /* INET6 */ |
39236c6e A |
1814 | local_wild = inp; |
1815 | } | |
6d2010ae | 1816 | } |
39236c6e A |
1817 | } |
1818 | if (local_wild == NULL) { | |
1819 | #if INET6 | |
1820 | if (local_wild_mapped != NULL) { | |
1821 | if ((found = (local_wild_mapped->inp_socket != NULL))) { | |
316670eb | 1822 | *uid = kauth_cred_getuid( |
39236c6e | 1823 | local_wild_mapped->inp_socket->so_cred); |
316670eb | 1824 | *gid = kauth_cred_getgid( |
39236c6e | 1825 | local_wild_mapped->inp_socket->so_cred); |
6d2010ae | 1826 | } |
39236c6e | 1827 | lck_rw_done(pcbinfo->ipi_lock); |
6d2010ae A |
1828 | return (found); |
1829 | } | |
39236c6e A |
1830 | #endif /* INET6 */ |
1831 | lck_rw_done(pcbinfo->ipi_lock); | |
1832 | return (0); | |
6d2010ae | 1833 | } |
39236c6e A |
1834 | if ((found = (local_wild->inp_socket != NULL))) { |
1835 | *uid = kauth_cred_getuid( | |
1836 | local_wild->inp_socket->so_cred); | |
1837 | *gid = kauth_cred_getgid( | |
1838 | local_wild->inp_socket->so_cred); | |
1839 | } | |
1840 | lck_rw_done(pcbinfo->ipi_lock); | |
1841 | return (found); | |
6d2010ae A |
1842 | } |
1843 | ||
1c79356b A |
1844 | /* |
1845 | * Lookup PCB in hash list. | |
1846 | */ | |
1847 | struct inpcb * | |
39236c6e A |
1848 | in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr, |
1849 | u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard, | |
1850 | struct ifnet *ifp) | |
1c79356b A |
1851 | { |
1852 | struct inpcbhead *head; | |
2d21ac55 | 1853 | struct inpcb *inp; |
1c79356b | 1854 | u_short fport = fport_arg, lport = lport_arg; |
39236c6e A |
1855 | struct inpcb *local_wild = NULL; |
1856 | #if INET6 | |
1857 | struct inpcb *local_wild_mapped = NULL; | |
1858 | #endif /* INET6 */ | |
1c79356b A |
1859 | |
1860 | /* | |
1861 | * We may have found the pcb in the last lookup - check this first. | |
1862 | */ | |
1863 | ||
39236c6e | 1864 | lck_rw_lock_shared(pcbinfo->ipi_lock); |
1c79356b A |
1865 | |
1866 | /* | |
1867 | * First look for an exact match. | |
1868 | */ | |
39236c6e A |
1869 | head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, |
1870 | pcbinfo->ipi_hashmask)]; | |
9bccf70c A |
1871 | LIST_FOREACH(inp, head, inp_hash) { |
1872 | #if INET6 | |
39236c6e | 1873 | if (!(inp->inp_vflag & INP_IPV4)) |
1c79356b | 1874 | continue; |
39236c6e A |
1875 | #endif /* INET6 */ |
1876 | if (inp_restricted(inp, ifp)) | |
1877 | continue; | |
1878 | ||
1879 | if (ifp != NULL && IFNET_IS_CELLULAR(ifp) && | |
1880 | (inp->inp_flags & INP_NO_IFT_CELLULAR)) | |
316670eb A |
1881 | continue; |
1882 | ||
1c79356b A |
1883 | if (inp->inp_faddr.s_addr == faddr.s_addr && |
1884 | inp->inp_laddr.s_addr == laddr.s_addr && | |
1885 | inp->inp_fport == fport && | |
1886 | inp->inp_lport == lport) { | |
1887 | /* | |
1888 | * Found. | |
1889 | */ | |
39236c6e A |
1890 | if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != |
1891 | WNT_STOPUSING) { | |
1892 | lck_rw_done(pcbinfo->ipi_lock); | |
91447636 | 1893 | return (inp); |
39236c6e A |
1894 | } else { |
1895 | /* it's there but dead, say it isn't found */ | |
1896 | lck_rw_done(pcbinfo->ipi_lock); | |
316670eb | 1897 | return (NULL); |
91447636 | 1898 | } |
1c79356b A |
1899 | } |
1900 | } | |
1c79356b | 1901 | |
39236c6e A |
1902 | if (!wildcard) { |
1903 | /* | |
1904 | * Not found. | |
1905 | */ | |
1906 | lck_rw_done(pcbinfo->ipi_lock); | |
1907 | return (NULL); | |
1908 | } | |
1909 | ||
1910 | head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, | |
1911 | pcbinfo->ipi_hashmask)]; | |
1912 | LIST_FOREACH(inp, head, inp_hash) { | |
9bccf70c | 1913 | #if INET6 |
39236c6e A |
1914 | if (!(inp->inp_vflag & INP_IPV4)) |
1915 | continue; | |
1916 | #endif /* INET6 */ | |
1917 | if (inp_restricted(inp, ifp)) | |
1918 | continue; | |
316670eb | 1919 | |
39236c6e A |
1920 | if (ifp != NULL && IFNET_IS_CELLULAR(ifp) && |
1921 | (inp->inp_flags & INP_NO_IFT_CELLULAR)) | |
1922 | continue; | |
1923 | ||
1924 | if (inp->inp_faddr.s_addr == INADDR_ANY && | |
1925 | inp->inp_lport == lport) { | |
1926 | if (inp->inp_laddr.s_addr == laddr.s_addr) { | |
1927 | if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != | |
1928 | WNT_STOPUSING) { | |
1929 | lck_rw_done(pcbinfo->ipi_lock); | |
1930 | return (inp); | |
1931 | } else { | |
1932 | /* it's dead; say it isn't found */ | |
1933 | lck_rw_done(pcbinfo->ipi_lock); | |
1934 | return (NULL); | |
91447636 | 1935 | } |
39236c6e | 1936 | } else if (inp->inp_laddr.s_addr == INADDR_ANY) { |
2d21ac55 | 1937 | #if INET6 |
39236c6e A |
1938 | if (SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) |
1939 | local_wild_mapped = inp; | |
1940 | else | |
2d21ac55 | 1941 | #endif /* INET6 */ |
1c79356b | 1942 | local_wild = inp; |
1c79356b A |
1943 | } |
1944 | } | |
39236c6e A |
1945 | } |
1946 | if (local_wild == NULL) { | |
2d21ac55 | 1947 | #if INET6 |
39236c6e A |
1948 | if (local_wild_mapped != NULL) { |
1949 | if (in_pcb_checkstate(local_wild_mapped, | |
1950 | WNT_ACQUIRE, 0) != WNT_STOPUSING) { | |
1951 | lck_rw_done(pcbinfo->ipi_lock); | |
1952 | return (local_wild_mapped); | |
1953 | } else { | |
1954 | /* it's dead; say it isn't found */ | |
1955 | lck_rw_done(pcbinfo->ipi_lock); | |
1956 | return (NULL); | |
91447636 | 1957 | } |
91447636 | 1958 | } |
39236c6e A |
1959 | #endif /* INET6 */ |
1960 | lck_rw_done(pcbinfo->ipi_lock); | |
1961 | return (NULL); | |
1962 | } | |
1963 | if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) { | |
1964 | lck_rw_done(pcbinfo->ipi_lock); | |
1965 | return (local_wild); | |
1c79356b | 1966 | } |
1c79356b | 1967 | /* |
39236c6e | 1968 | * It's either not found or is already dead. |
1c79356b | 1969 | */ |
39236c6e | 1970 | lck_rw_done(pcbinfo->ipi_lock); |
1c79356b A |
1971 | return (NULL); |
1972 | } | |
1973 | ||
1974 | /* | |
1975 | * Insert PCB onto various hash lists. | |
1976 | */ | |
1977 | int | |
2d21ac55 | 1978 | in_pcbinshash(struct inpcb *inp, int locked) |
1c79356b A |
1979 | { |
1980 | struct inpcbhead *pcbhash; | |
1981 | struct inpcbporthead *pcbporthash; | |
1982 | struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; | |
1983 | struct inpcbport *phd; | |
1984 | u_int32_t hashkey_faddr; | |
1985 | ||
39236c6e A |
1986 | if (!locked) { |
1987 | if (!lck_rw_try_lock_exclusive(pcbinfo->ipi_lock)) { | |
1988 | /* | |
1989 | * Lock inversion issue, mostly with udp | |
1990 | * multicast packets | |
1991 | */ | |
1992 | socket_unlock(inp->inp_socket, 0); | |
1993 | lck_rw_lock_exclusive(pcbinfo->ipi_lock); | |
1994 | socket_lock(inp->inp_socket, 0); | |
6d2010ae | 1995 | if (inp->inp_state == INPCB_STATE_DEAD) { |
39236c6e A |
1996 | /* |
1997 | * The socket got dropped when | |
1998 | * it was unlocked | |
1999 | */ | |
2000 | lck_rw_done(pcbinfo->ipi_lock); | |
2001 | return (ECONNABORTED); | |
6d2010ae | 2002 | } |
39236c6e A |
2003 | } |
2004 | } | |
b0d623f7 | 2005 | |
1c79356b A |
2006 | #if INET6 |
2007 | if (inp->inp_vflag & INP_IPV6) | |
2008 | hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; | |
2009 | else | |
2010 | #endif /* INET6 */ | |
39236c6e | 2011 | hashkey_faddr = inp->inp_faddr.s_addr; |
1c79356b | 2012 | |
39236c6e A |
2013 | inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport, |
2014 | inp->inp_fport, pcbinfo->ipi_hashmask); | |
91447636 | 2015 | |
39236c6e | 2016 | pcbhash = &pcbinfo->ipi_hashbase[inp->inp_hash_element]; |
1c79356b | 2017 | |
39236c6e A |
2018 | pcbporthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(inp->inp_lport, |
2019 | pcbinfo->ipi_porthashmask)]; | |
1c79356b A |
2020 | |
2021 | /* | |
2022 | * Go through port list and look for a head for this lport. | |
2023 | */ | |
9bccf70c | 2024 | LIST_FOREACH(phd, pcbporthash, phd_hash) { |
1c79356b A |
2025 | if (phd->phd_port == inp->inp_lport) |
2026 | break; | |
2027 | } | |
316670eb A |
2028 | |
2029 | VERIFY(inp->inp_state != INPCB_STATE_DEAD); | |
2030 | ||
1c79356b A |
2031 | /* |
2032 | * If none exists, malloc one and tack it on. | |
2033 | */ | |
2034 | if (phd == NULL) { | |
39236c6e A |
2035 | MALLOC(phd, struct inpcbport *, sizeof (struct inpcbport), |
2036 | M_PCB, M_WAITOK); | |
1c79356b | 2037 | if (phd == NULL) { |
91447636 | 2038 | if (!locked) |
39236c6e | 2039 | lck_rw_done(pcbinfo->ipi_lock); |
1c79356b A |
2040 | return (ENOBUFS); /* XXX */ |
2041 | } | |
2042 | phd->phd_port = inp->inp_lport; | |
2043 | LIST_INIT(&phd->phd_pcblist); | |
2044 | LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); | |
2045 | } | |
2046 | inp->inp_phd = phd; | |
2047 | LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); | |
2048 | LIST_INSERT_HEAD(pcbhash, inp, inp_hash); | |
91447636 | 2049 | if (!locked) |
39236c6e | 2050 | lck_rw_done(pcbinfo->ipi_lock); |
1c79356b A |
2051 | return (0); |
2052 | } | |
2053 | ||
2054 | /* | |
2055 | * Move PCB to the proper hash bucket when { faddr, fport } have been | |
2056 | * changed. NOTE: This does not handle the case of the lport changing (the | |
2057 | * hashed port list would have to be updated as well), so the lport must | |
2058 | * not change after in_pcbinshash() has been called. | |
2059 | */ | |
2060 | void | |
2d21ac55 | 2061 | in_pcbrehash(struct inpcb *inp) |
1c79356b A |
2062 | { |
2063 | struct inpcbhead *head; | |
2064 | u_int32_t hashkey_faddr; | |
2065 | ||
2066 | #if INET6 | |
2067 | if (inp->inp_vflag & INP_IPV6) | |
2068 | hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; | |
2069 | else | |
2070 | #endif /* INET6 */ | |
39236c6e A |
2071 | hashkey_faddr = inp->inp_faddr.s_addr; |
2072 | ||
2073 | inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport, | |
2074 | inp->inp_fport, inp->inp_pcbinfo->ipi_hashmask); | |
2075 | head = &inp->inp_pcbinfo->ipi_hashbase[inp->inp_hash_element]; | |
1c79356b A |
2076 | |
2077 | LIST_REMOVE(inp, inp_hash); | |
2078 | LIST_INSERT_HEAD(head, inp, inp_hash); | |
1c79356b A |
2079 | } |
2080 | ||
2081 | /* | |
2082 | * Remove PCB from various lists. | |
316670eb | 2083 | * Must be called pcbinfo lock is held in exclusive mode. |
1c79356b A |
2084 | */ |
2085 | void | |
2d21ac55 | 2086 | in_pcbremlists(struct inpcb *inp) |
1c79356b A |
2087 | { |
2088 | inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt; | |
1c79356b A |
2089 | |
2090 | if (inp->inp_lport) { | |
2091 | struct inpcbport *phd = inp->inp_phd; | |
2092 | ||
2093 | LIST_REMOVE(inp, inp_hash); | |
2094 | LIST_REMOVE(inp, inp_portlist); | |
55e303ae | 2095 | if (phd != NULL && (LIST_FIRST(&phd->phd_pcblist) == NULL)) { |
1c79356b A |
2096 | LIST_REMOVE(phd, phd_hash); |
2097 | FREE(phd, M_PCB); | |
2098 | } | |
2099 | } | |
39236c6e A |
2100 | |
2101 | if (inp->inp_flags2 & INP2_TIMEWAIT) { | |
2102 | /* Remove from time-wait queue */ | |
2103 | tcp_remove_from_time_wait(inp); | |
2104 | inp->inp_flags2 &= ~INP2_TIMEWAIT; | |
2105 | VERIFY(inp->inp_pcbinfo->ipi_twcount != 0); | |
2106 | inp->inp_pcbinfo->ipi_twcount--; | |
2107 | } else { | |
2108 | /* Remove from global inp list if it is not time-wait */ | |
2109 | LIST_REMOVE(inp, inp_list); | |
2110 | } | |
316670eb | 2111 | |
bd504ef0 | 2112 | if (inp->inp_flags2 & INP2_IN_FCTREE) { |
39236c6e | 2113 | inp_fc_getinp(inp->inp_flowhash, (INPFC_SOLOCKED|INPFC_REMOVE)); |
bd504ef0 A |
2114 | VERIFY(!(inp->inp_flags2 & INP2_IN_FCTREE)); |
2115 | } | |
39236c6e | 2116 | |
1c79356b A |
2117 | inp->inp_pcbinfo->ipi_count--; |
2118 | } | |
2119 | ||
39236c6e A |
2120 | /* |
2121 | * Mechanism used to defer the memory release of PCBs | |
2122 | * The pcb list will contain the pcb until the reaper can clean it up if | |
2123 | * the following conditions are met: | |
2124 | * 1) state "DEAD", | |
2125 | * 2) wantcnt is STOPUSING | |
2126 | * 3) usecount is 0 | |
91447636 | 2127 | * This function will be called to either mark the pcb as |
39236c6e | 2128 | */ |
91447636 A |
2129 | int |
2130 | in_pcb_checkstate(struct inpcb *pcb, int mode, int locked) | |
91447636 | 2131 | { |
39236c6e | 2132 | volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt; |
2d21ac55 A |
2133 | UInt32 origwant; |
2134 | UInt32 newwant; | |
91447636 A |
2135 | |
2136 | switch (mode) { | |
39236c6e A |
2137 | case WNT_STOPUSING: |
2138 | /* | |
2139 | * Try to mark the pcb as ready for recycling. CAS with | |
2140 | * STOPUSING, if success we're good, if it's in use, will | |
2141 | * be marked later | |
2142 | */ | |
2143 | if (locked == 0) | |
2144 | socket_lock(pcb->inp_socket, 1); | |
2145 | pcb->inp_state = INPCB_STATE_DEAD; | |
91447636 | 2146 | |
39236c6e A |
2147 | stopusing: |
2148 | if (pcb->inp_socket->so_usecount < 0) { | |
2149 | panic("%s: pcb=%p so=%p usecount is negative\n", | |
2150 | __func__, pcb, pcb->inp_socket); | |
2151 | /* NOTREACHED */ | |
2152 | } | |
2153 | if (locked == 0) | |
2154 | socket_unlock(pcb->inp_socket, 1); | |
91447636 | 2155 | |
39236c6e | 2156 | inpcb_gc_sched(pcb->inp_pcbinfo, INPCB_TIMER_FAST); |
6d2010ae | 2157 | |
39236c6e A |
2158 | origwant = *wantcnt; |
2159 | if ((UInt16) origwant == 0xffff) /* should stop using */ | |
2160 | return (WNT_STOPUSING); | |
2161 | newwant = 0xffff; | |
2162 | if ((UInt16) origwant == 0) { | |
2163 | /* try to mark it as unsuable now */ | |
2164 | OSCompareAndSwap(origwant, newwant, wantcnt); | |
2165 | } | |
2166 | return (WNT_STOPUSING); | |
2167 | break; | |
91447636 | 2168 | |
39236c6e A |
2169 | case WNT_ACQUIRE: |
2170 | /* | |
2171 | * Try to increase reference to pcb. If WNT_STOPUSING | |
2172 | * should bail out. If socket state DEAD, try to set count | |
2173 | * to STOPUSING, return failed otherwise increase cnt. | |
2174 | */ | |
2175 | do { | |
91447636 | 2176 | origwant = *wantcnt; |
39236c6e A |
2177 | if ((UInt16) origwant == 0xffff) { |
2178 | /* should stop using */ | |
91447636 | 2179 | return (WNT_STOPUSING); |
91447636 | 2180 | } |
39236c6e A |
2181 | newwant = origwant + 1; |
2182 | } while (!OSCompareAndSwap(origwant, newwant, wantcnt)); | |
2183 | return (WNT_ACQUIRE); | |
2184 | break; | |
91447636 | 2185 | |
39236c6e A |
2186 | case WNT_RELEASE: |
2187 | /* | |
2188 | * Release reference. If result is null and pcb state | |
2189 | * is DEAD, set wanted bit to STOPUSING | |
2190 | */ | |
2191 | if (locked == 0) | |
2192 | socket_lock(pcb->inp_socket, 1); | |
91447636 | 2193 | |
39236c6e A |
2194 | do { |
2195 | origwant = *wantcnt; | |
2196 | if ((UInt16) origwant == 0x0) { | |
2197 | panic("%s: pcb=%p release with zero count", | |
2198 | __func__, pcb); | |
2199 | /* NOTREACHED */ | |
2200 | } | |
2201 | if ((UInt16) origwant == 0xffff) { | |
2202 | /* should stop using */ | |
2203 | if (locked == 0) | |
2204 | socket_unlock(pcb->inp_socket, 1); | |
2205 | return (WNT_STOPUSING); | |
2206 | } | |
2207 | newwant = origwant - 1; | |
2208 | } while (!OSCompareAndSwap(origwant, newwant, wantcnt)); | |
2209 | ||
2210 | if (pcb->inp_state == INPCB_STATE_DEAD) | |
2211 | goto stopusing; | |
2212 | if (pcb->inp_socket->so_usecount < 0) { | |
2213 | panic("%s: RELEASE pcb=%p so=%p usecount is negative\n", | |
2214 | __func__, pcb, pcb->inp_socket); | |
2215 | /* NOTREACHED */ | |
2216 | } | |
91447636 | 2217 | |
39236c6e A |
2218 | if (locked == 0) |
2219 | socket_unlock(pcb->inp_socket, 1); | |
2220 | return (WNT_RELEASE); | |
2221 | break; | |
91447636 | 2222 | |
39236c6e A |
2223 | default: |
2224 | panic("%s: so=%p not a valid state =%x\n", __func__, | |
2225 | pcb->inp_socket, mode); | |
2226 | /* NOTREACHED */ | |
91447636 A |
2227 | } |
2228 | ||
2229 | /* NOTREACHED */ | |
2230 | return (mode); | |
2231 | } | |
2232 | ||
2233 | /* | |
2234 | * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat. | |
2235 | * The inpcb_compat data structure is passed to user space and must | |
b0d623f7 | 2236 | * not change. We intentionally avoid copying pointers. |
91447636 A |
2237 | */ |
2238 | void | |
39236c6e | 2239 | inpcb_to_compat(struct inpcb *inp, struct inpcb_compat *inp_compat) |
91447636 | 2240 | { |
39236c6e | 2241 | bzero(inp_compat, sizeof (*inp_compat)); |
91447636 A |
2242 | inp_compat->inp_fport = inp->inp_fport; |
2243 | inp_compat->inp_lport = inp->inp_lport; | |
316670eb | 2244 | inp_compat->nat_owner = 0; |
39236c6e | 2245 | inp_compat->nat_cookie = 0; |
91447636 A |
2246 | inp_compat->inp_gencnt = inp->inp_gencnt; |
2247 | inp_compat->inp_flags = inp->inp_flags; | |
2248 | inp_compat->inp_flow = inp->inp_flow; | |
2249 | inp_compat->inp_vflag = inp->inp_vflag; | |
2250 | inp_compat->inp_ip_ttl = inp->inp_ip_ttl; | |
2251 | inp_compat->inp_ip_p = inp->inp_ip_p; | |
39236c6e A |
2252 | inp_compat->inp_dependfaddr.inp6_foreign = |
2253 | inp->inp_dependfaddr.inp6_foreign; | |
2254 | inp_compat->inp_dependladdr.inp6_local = | |
2255 | inp->inp_dependladdr.inp6_local; | |
91447636 | 2256 | inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos; |
39236c6e | 2257 | inp_compat->inp_depend6.inp6_hlim = 0; |
91447636 | 2258 | inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum; |
39236c6e | 2259 | inp_compat->inp_depend6.inp6_ifindex = 0; |
91447636 A |
2260 | inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops; |
2261 | } | |
9bccf70c | 2262 | |
b0d623f7 | 2263 | void |
39236c6e | 2264 | inpcb_to_xinpcb64(struct inpcb *inp, struct xinpcb64 *xinp) |
b0d623f7 | 2265 | { |
6d2010ae A |
2266 | xinp->inp_fport = inp->inp_fport; |
2267 | xinp->inp_lport = inp->inp_lport; | |
2268 | xinp->inp_gencnt = inp->inp_gencnt; | |
2269 | xinp->inp_flags = inp->inp_flags; | |
2270 | xinp->inp_flow = inp->inp_flow; | |
2271 | xinp->inp_vflag = inp->inp_vflag; | |
2272 | xinp->inp_ip_ttl = inp->inp_ip_ttl; | |
2273 | xinp->inp_ip_p = inp->inp_ip_p; | |
2274 | xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign; | |
2275 | xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local; | |
2276 | xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos; | |
39236c6e | 2277 | xinp->inp_depend6.inp6_hlim = 0; |
6d2010ae | 2278 | xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum; |
39236c6e | 2279 | xinp->inp_depend6.inp6_ifindex = 0; |
6d2010ae | 2280 | xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops; |
b0d623f7 A |
2281 | } |
2282 | ||
b0d623f7 A |
2283 | /* |
2284 | * The following routines implement this scheme: | |
2285 | * | |
2286 | * Callers of ip_output() that intend to cache the route in the inpcb pass | |
2287 | * a local copy of the struct route to ip_output(). Using a local copy of | |
2288 | * the cached route significantly simplifies things as IP no longer has to | |
2289 | * worry about having exclusive access to the passed in struct route, since | |
2290 | * it's defined in the caller's stack; in essence, this allows for a lock- | |
2291 | * less operation when updating the struct route at the IP level and below, | |
2292 | * whenever necessary. The scheme works as follows: | |
2293 | * | |
2294 | * Prior to dropping the socket's lock and calling ip_output(), the caller | |
2295 | * copies the struct route from the inpcb into its stack, and adds a reference | |
2296 | * to the cached route entry, if there was any. The socket's lock is then | |
2297 | * dropped and ip_output() is called with a pointer to the copy of struct | |
2298 | * route defined on the stack (not to the one in the inpcb.) | |
2299 | * | |
2300 | * Upon returning from ip_output(), the caller then acquires the socket's | |
2301 | * lock and synchronizes the cache; if there is no route cached in the inpcb, | |
2302 | * it copies the local copy of struct route (which may or may not contain any | |
2303 | * route) back into the cache; otherwise, if the inpcb has a route cached in | |
2304 | * it, the one in the local copy will be freed, if there's any. Trashing the | |
2305 | * cached route in the inpcb can be avoided because ip_output() is single- | |
2306 | * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized | |
2307 | * by the socket/transport layer.) | |
2308 | */ | |
2309 | void | |
2310 | inp_route_copyout(struct inpcb *inp, struct route *dst) | |
2311 | { | |
2312 | struct route *src = &inp->inp_route; | |
2313 | ||
6d2010ae | 2314 | lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED); |
b0d623f7 | 2315 | |
0b4c1975 | 2316 | /* |
39236c6e | 2317 | * If the route in the PCB is stale or not for IPv4, blow it away; |
0b4c1975 A |
2318 | * this is possible in the case of IPv4-mapped address case. |
2319 | */ | |
39236c6e A |
2320 | if (ROUTE_UNUSABLE(src) || rt_key(src->ro_rt)->sa_family != AF_INET) |
2321 | ROUTE_RELEASE(src); | |
316670eb | 2322 | |
39236c6e | 2323 | route_copyout(dst, src, sizeof (*dst)); |
b0d623f7 A |
2324 | } |
2325 | ||
2326 | void | |
2327 | inp_route_copyin(struct inpcb *inp, struct route *src) | |
2328 | { | |
2329 | struct route *dst = &inp->inp_route; | |
2330 | ||
6d2010ae | 2331 | lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED); |
b0d623f7 A |
2332 | |
2333 | /* Minor sanity check */ | |
2334 | if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET) | |
2335 | panic("%s: wrong or corrupted route: %p", __func__, src); | |
2336 | ||
39236c6e | 2337 | route_copyin(src, dst, sizeof (*src)); |
6d2010ae A |
2338 | } |
2339 | ||
2340 | /* | |
2341 | * Handler for setting IP_FORCE_OUT_IFP/IP_BOUND_IF/IPV6_BOUND_IF socket option. | |
2342 | */ | |
316670eb | 2343 | int |
39236c6e | 2344 | inp_bindif(struct inpcb *inp, unsigned int ifscope, struct ifnet **pifp) |
6d2010ae | 2345 | { |
316670eb A |
2346 | struct ifnet *ifp = NULL; |
2347 | ||
2348 | ifnet_head_lock_shared(); | |
2349 | if ((ifscope > (unsigned)if_index) || (ifscope != IFSCOPE_NONE && | |
2350 | (ifp = ifindex2ifnet[ifscope]) == NULL)) { | |
2351 | ifnet_head_done(); | |
2352 | return (ENXIO); | |
2353 | } | |
2354 | ifnet_head_done(); | |
2355 | ||
2356 | VERIFY(ifp != NULL || ifscope == IFSCOPE_NONE); | |
2357 | ||
6d2010ae A |
2358 | /* |
2359 | * A zero interface scope value indicates an "unbind". | |
2360 | * Otherwise, take in whatever value the app desires; | |
2361 | * the app may already know the scope (or force itself | |
2362 | * to such a scope) ahead of time before the interface | |
2363 | * gets attached. It doesn't matter either way; any | |
2364 | * route lookup from this point on will require an | |
2365 | * exact match for the embedded interface scope. | |
2366 | */ | |
316670eb A |
2367 | inp->inp_boundifp = ifp; |
2368 | if (inp->inp_boundifp == NULL) | |
6d2010ae A |
2369 | inp->inp_flags &= ~INP_BOUND_IF; |
2370 | else | |
2371 | inp->inp_flags |= INP_BOUND_IF; | |
2372 | ||
2373 | /* Blow away any cached route in the PCB */ | |
39236c6e A |
2374 | ROUTE_RELEASE(&inp->inp_route); |
2375 | ||
2376 | if (pifp != NULL) | |
2377 | *pifp = ifp; | |
316670eb A |
2378 | |
2379 | return (0); | |
6d2010ae A |
2380 | } |
2381 | ||
2382 | /* | |
39236c6e A |
2383 | * Handler for setting IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option, |
2384 | * as well as for setting PROC_UUID_NO_CELLULAR policy. | |
6d2010ae | 2385 | */ |
39236c6e A |
2386 | void |
2387 | inp_set_nocellular(struct inpcb *inp) | |
6d2010ae | 2388 | { |
39236c6e | 2389 | inp->inp_flags |= INP_NO_IFT_CELLULAR; |
6d2010ae A |
2390 | |
2391 | /* Blow away any cached route in the PCB */ | |
39236c6e A |
2392 | ROUTE_RELEASE(&inp->inp_route); |
2393 | } | |
2394 | ||
2395 | /* | |
2396 | * Handler for clearing IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option, | |
2397 | * as well as for clearing PROC_UUID_NO_CELLULAR policy. | |
2398 | */ | |
2399 | void | |
2400 | inp_clear_nocellular(struct inpcb *inp) | |
2401 | { | |
2402 | struct socket *so = inp->inp_socket; | |
2403 | ||
2404 | /* | |
2405 | * SO_RESTRICT_DENY_CELLULAR socket restriction issued on the socket | |
2406 | * has a higher precendence than INP_NO_IFT_CELLULAR. Clear the flag | |
2407 | * if and only if the socket is unrestricted. | |
2408 | */ | |
2409 | if (so != NULL && !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) { | |
2410 | inp->inp_flags &= ~INP_NO_IFT_CELLULAR; | |
2411 | ||
2412 | /* Blow away any cached route in the PCB */ | |
2413 | ROUTE_RELEASE(&inp->inp_route); | |
6d2010ae | 2414 | } |
39236c6e | 2415 | } |
6d2010ae | 2416 | |
39236c6e A |
2417 | #if FLOW_DIVERT |
2418 | /* | |
2419 | * Called when PROC_UUID_FLOW_DIVERT is set. | |
2420 | */ | |
2421 | void | |
2422 | inp_set_flow_divert(struct inpcb *inp) | |
2423 | { | |
2424 | inp->inp_flags2 |= INP2_WANT_FLOW_DIVERT; | |
2425 | } | |
2426 | ||
2427 | /* | |
2428 | * Called when PROC_UUID_FLOW_DIVERT is cleared. | |
2429 | */ | |
2430 | void | |
2431 | inp_clear_flow_divert(struct inpcb *inp) | |
2432 | { | |
2433 | inp->inp_flags2 &= ~INP2_WANT_FLOW_DIVERT; | |
b0d623f7 | 2434 | } |
39236c6e | 2435 | #endif /* FLOW_DIVERT */ |
316670eb A |
2436 | |
2437 | /* | |
2438 | * Calculate flow hash for an inp, used by an interface to identify a | |
2439 | * flow. When an interface provides flow control advisory, this flow | |
2440 | * hash is used as an identifier. | |
2441 | */ | |
2442 | u_int32_t | |
2443 | inp_calc_flowhash(struct inpcb *inp) | |
2444 | { | |
2445 | struct inp_flowhash_key fh __attribute__((aligned(8))); | |
2446 | u_int32_t flowhash = 0; | |
bd504ef0 | 2447 | struct inpcb *tmp_inp = NULL; |
316670eb A |
2448 | |
2449 | if (inp_hash_seed == 0) | |
2450 | inp_hash_seed = RandomULong(); | |
2451 | ||
2452 | bzero(&fh, sizeof (fh)); | |
2453 | ||
2454 | bcopy(&inp->inp_dependladdr, &fh.infh_laddr, sizeof (fh.infh_laddr)); | |
2455 | bcopy(&inp->inp_dependfaddr, &fh.infh_faddr, sizeof (fh.infh_faddr)); | |
2456 | ||
2457 | fh.infh_lport = inp->inp_lport; | |
2458 | fh.infh_fport = inp->inp_fport; | |
2459 | fh.infh_af = (inp->inp_vflag & INP_IPV6) ? AF_INET6 : AF_INET; | |
2460 | fh.infh_proto = inp->inp_ip_p; | |
2461 | fh.infh_rand1 = RandomULong(); | |
2462 | fh.infh_rand2 = RandomULong(); | |
2463 | ||
2464 | try_again: | |
2465 | flowhash = net_flowhash(&fh, sizeof (fh), inp_hash_seed); | |
2466 | if (flowhash == 0) { | |
2467 | /* try to get a non-zero flowhash */ | |
2468 | inp_hash_seed = RandomULong(); | |
2469 | goto try_again; | |
2470 | } | |
2471 | ||
bd504ef0 | 2472 | inp->inp_flowhash = flowhash; |
316670eb | 2473 | |
bd504ef0 | 2474 | /* Insert the inp into inp_fc_tree */ |
39236c6e | 2475 | lck_mtx_lock_spin(&inp_fc_lck); |
bd504ef0 A |
2476 | tmp_inp = RB_FIND(inp_fc_tree, &inp_fc_tree, inp); |
2477 | if (tmp_inp != NULL) { | |
316670eb | 2478 | /* |
bd504ef0 A |
2479 | * There is a different inp with the same flowhash. |
2480 | * There can be a collision on flow hash but the | |
39236c6e | 2481 | * probability is low. Let's recompute the |
bd504ef0 | 2482 | * flowhash. |
316670eb A |
2483 | */ |
2484 | lck_mtx_unlock(&inp_fc_lck); | |
bd504ef0 A |
2485 | /* recompute hash seed */ |
2486 | inp_hash_seed = RandomULong(); | |
2487 | goto try_again; | |
316670eb | 2488 | } |
39236c6e | 2489 | |
bd504ef0 A |
2490 | RB_INSERT(inp_fc_tree, &inp_fc_tree, inp); |
2491 | inp->inp_flags2 |= INP2_IN_FCTREE; | |
316670eb | 2492 | lck_mtx_unlock(&inp_fc_lck); |
bd504ef0 | 2493 | |
39236c6e A |
2494 | return (flowhash); |
2495 | } | |
2496 | ||
2497 | void | |
2498 | inp_flowadv(uint32_t flowhash) | |
2499 | { | |
2500 | struct inpcb *inp; | |
2501 | ||
2502 | inp = inp_fc_getinp(flowhash, 0); | |
2503 | ||
2504 | if (inp == NULL) | |
2505 | return; | |
2506 | inp_fc_feedback(inp); | |
316670eb A |
2507 | } |
2508 | ||
bd504ef0 A |
2509 | /* |
2510 | * Function to compare inp_fc_entries in inp flow control tree | |
2511 | */ | |
2512 | static inline int | |
2513 | infc_cmp(const struct inpcb *inp1, const struct inpcb *inp2) | |
316670eb | 2514 | { |
bd504ef0 | 2515 | return (memcmp(&(inp1->inp_flowhash), &(inp2->inp_flowhash), |
39236c6e | 2516 | sizeof(inp1->inp_flowhash))); |
bd504ef0 | 2517 | } |
316670eb | 2518 | |
39236c6e | 2519 | static struct inpcb * |
bd504ef0 A |
2520 | inp_fc_getinp(u_int32_t flowhash, u_int32_t flags) |
2521 | { | |
2522 | struct inpcb *inp = NULL; | |
2523 | int locked = (flags & INPFC_SOLOCKED) ? 1 : 0; | |
316670eb A |
2524 | |
2525 | lck_mtx_lock_spin(&inp_fc_lck); | |
bd504ef0 A |
2526 | key_inp.inp_flowhash = flowhash; |
2527 | inp = RB_FIND(inp_fc_tree, &inp_fc_tree, &key_inp); | |
2528 | if (inp == NULL) { | |
316670eb A |
2529 | /* inp is not present, return */ |
2530 | lck_mtx_unlock(&inp_fc_lck); | |
2531 | return (NULL); | |
2532 | } | |
2533 | ||
bd504ef0 A |
2534 | if (flags & INPFC_REMOVE) { |
2535 | RB_REMOVE(inp_fc_tree, &inp_fc_tree, inp); | |
2536 | lck_mtx_unlock(&inp_fc_lck); | |
316670eb | 2537 | |
bd504ef0 A |
2538 | bzero(&(inp->infc_link), sizeof (inp->infc_link)); |
2539 | inp->inp_flags2 &= ~INP2_IN_FCTREE; | |
2540 | return (NULL); | |
316670eb | 2541 | } |
39236c6e | 2542 | |
bd504ef0 A |
2543 | if (in_pcb_checkstate(inp, WNT_ACQUIRE, locked) == WNT_STOPUSING) |
2544 | inp = NULL; | |
316670eb A |
2545 | lck_mtx_unlock(&inp_fc_lck); |
2546 | ||
bd504ef0 | 2547 | return (inp); |
316670eb A |
2548 | } |
2549 | ||
39236c6e | 2550 | static void |
316670eb A |
2551 | inp_fc_feedback(struct inpcb *inp) |
2552 | { | |
2553 | struct socket *so = inp->inp_socket; | |
2554 | ||
2555 | /* we already hold a want_cnt on this inp, socket can't be null */ | |
39236c6e | 2556 | VERIFY(so != NULL); |
316670eb A |
2557 | socket_lock(so, 1); |
2558 | ||
2559 | if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) { | |
2560 | socket_unlock(so, 1); | |
2561 | return; | |
2562 | } | |
2563 | ||
2564 | /* | |
2565 | * Return if the connection is not in flow-controlled state. | |
2566 | * This can happen if the connection experienced | |
2567 | * loss while it was in flow controlled state | |
2568 | */ | |
2569 | if (!INP_WAIT_FOR_IF_FEEDBACK(inp)) { | |
2570 | socket_unlock(so, 1); | |
2571 | return; | |
2572 | } | |
2573 | inp_reset_fc_state(inp); | |
2574 | ||
39236c6e | 2575 | if (SOCK_TYPE(so) == SOCK_STREAM) |
316670eb A |
2576 | inp_fc_unthrottle_tcp(inp); |
2577 | ||
2578 | socket_unlock(so, 1); | |
2579 | } | |
2580 | ||
2581 | void | |
2582 | inp_reset_fc_state(struct inpcb *inp) | |
2583 | { | |
2584 | struct socket *so = inp->inp_socket; | |
2585 | int suspended = (INP_IS_FLOW_SUSPENDED(inp)) ? 1 : 0; | |
2586 | int needwakeup = (INP_WAIT_FOR_IF_FEEDBACK(inp)) ? 1 : 0; | |
2587 | ||
2588 | inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED); | |
2589 | ||
2590 | if (suspended) { | |
2591 | so->so_flags &= ~(SOF_SUSPENDED); | |
2592 | soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_RESUME)); | |
2593 | } | |
2594 | ||
2595 | if (inp->inp_sndinprog_cnt > 0) | |
2596 | inp->inp_flags |= INP_FC_FEEDBACK; | |
2597 | ||
2598 | /* Give a write wakeup to unblock the socket */ | |
2599 | if (needwakeup) | |
2600 | sowwakeup(so); | |
2601 | } | |
2602 | ||
2603 | int | |
2604 | inp_set_fc_state(struct inpcb *inp, int advcode) | |
2605 | { | |
bd504ef0 | 2606 | struct inpcb *tmp_inp = NULL; |
316670eb | 2607 | /* |
39236c6e | 2608 | * If there was a feedback from the interface when |
316670eb A |
2609 | * send operation was in progress, we should ignore |
2610 | * this flow advisory to avoid a race between setting | |
2611 | * flow controlled state and receiving feedback from | |
2612 | * the interface | |
2613 | */ | |
2614 | if (inp->inp_flags & INP_FC_FEEDBACK) | |
39236c6e | 2615 | return (0); |
316670eb A |
2616 | |
2617 | inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED); | |
39236c6e A |
2618 | if ((tmp_inp = inp_fc_getinp(inp->inp_flowhash, |
2619 | INPFC_SOLOCKED)) != NULL) { | |
2620 | if (in_pcb_checkstate(tmp_inp, WNT_RELEASE, 1) == WNT_STOPUSING) | |
bd504ef0 A |
2621 | return (0); |
2622 | VERIFY(tmp_inp == inp); | |
316670eb A |
2623 | switch (advcode) { |
2624 | case FADV_FLOW_CONTROLLED: | |
2625 | inp->inp_flags |= INP_FLOW_CONTROLLED; | |
2626 | break; | |
2627 | case FADV_SUSPENDED: | |
2628 | inp->inp_flags |= INP_FLOW_SUSPENDED; | |
2629 | soevent(inp->inp_socket, | |
2630 | (SO_FILT_HINT_LOCKED | SO_FILT_HINT_SUSPEND)); | |
2631 | ||
2632 | /* Record the fact that suspend event was sent */ | |
2633 | inp->inp_socket->so_flags |= SOF_SUSPENDED; | |
2634 | break; | |
2635 | } | |
bd504ef0 | 2636 | return (1); |
316670eb | 2637 | } |
39236c6e | 2638 | return (0); |
316670eb A |
2639 | } |
2640 | ||
2641 | /* | |
2642 | * Handler for SO_FLUSH socket option. | |
2643 | */ | |
2644 | int | |
2645 | inp_flush(struct inpcb *inp, int optval) | |
2646 | { | |
2647 | u_int32_t flowhash = inp->inp_flowhash; | |
39236c6e | 2648 | struct ifnet *rtifp, *oifp; |
316670eb A |
2649 | |
2650 | /* Either all classes or one of the valid ones */ | |
2651 | if (optval != SO_TC_ALL && !SO_VALID_TC(optval)) | |
2652 | return (EINVAL); | |
2653 | ||
2654 | /* We need a flow hash for identification */ | |
2655 | if (flowhash == 0) | |
2656 | return (0); | |
2657 | ||
39236c6e A |
2658 | /* Grab the interfaces from the route and pcb */ |
2659 | rtifp = ((inp->inp_route.ro_rt != NULL) ? | |
2660 | inp->inp_route.ro_rt->rt_ifp : NULL); | |
2661 | oifp = inp->inp_last_outifp; | |
2662 | ||
2663 | if (rtifp != NULL) | |
2664 | if_qflush_sc(rtifp, so_tc2msc(optval), flowhash, NULL, NULL, 0); | |
2665 | if (oifp != NULL && oifp != rtifp) | |
2666 | if_qflush_sc(oifp, so_tc2msc(optval), flowhash, NULL, NULL, 0); | |
316670eb A |
2667 | |
2668 | return (0); | |
2669 | } | |
2670 | ||
2671 | /* | |
2672 | * Clear the INP_INADDR_ANY flag (special case for PPP only) | |
2673 | */ | |
39236c6e A |
2674 | void |
2675 | inp_clear_INP_INADDR_ANY(struct socket *so) | |
316670eb A |
2676 | { |
2677 | struct inpcb *inp = NULL; | |
2678 | ||
2679 | socket_lock(so, 1); | |
2680 | inp = sotoinpcb(so); | |
2681 | if (inp) { | |
2682 | inp->inp_flags &= ~INP_INADDR_ANY; | |
2683 | } | |
2684 | socket_unlock(so, 1); | |
2685 | } | |
2686 | ||
39236c6e A |
2687 | void |
2688 | inp_get_soprocinfo(struct inpcb *inp, struct so_procinfo *soprocinfo) | |
2689 | { | |
2690 | struct socket *so = inp->inp_socket; | |
2691 | ||
2692 | soprocinfo->spi_pid = so->last_pid; | |
2693 | /* | |
2694 | * When not delegated, the effective pid is the same as the real pid | |
2695 | */ | |
2696 | if (so->so_flags & SOF_DELEGATED) | |
2697 | soprocinfo->spi_epid = so->e_pid; | |
2698 | else | |
2699 | soprocinfo->spi_epid = so->last_pid; | |
2700 | } | |
2701 | ||
2702 | int | |
2703 | inp_findinpcb_procinfo(struct inpcbinfo *pcbinfo, uint32_t flowhash, | |
2704 | struct so_procinfo *soprocinfo) | |
2705 | { | |
2706 | struct inpcb *inp = NULL; | |
2707 | int found = 0; | |
2708 | ||
2709 | bzero(soprocinfo, sizeof (struct so_procinfo)); | |
2710 | ||
2711 | if (!flowhash) | |
2712 | return (-1); | |
2713 | ||
2714 | lck_rw_lock_shared(pcbinfo->ipi_lock); | |
2715 | LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) { | |
2716 | if (inp->inp_state != INPCB_STATE_DEAD && | |
2717 | inp->inp_socket != NULL && | |
2718 | inp->inp_flowhash == flowhash) { | |
2719 | found = 1; | |
2720 | inp_get_soprocinfo(inp, soprocinfo); | |
2721 | break; | |
2722 | } | |
2723 | } | |
2724 | lck_rw_done(pcbinfo->ipi_lock); | |
2725 | ||
2726 | return (found); | |
2727 | } | |
2728 | ||
2729 | #if CONFIG_PROC_UUID_POLICY | |
2730 | static void | |
2731 | inp_update_cellular_policy(struct inpcb *inp, boolean_t set) | |
2732 | { | |
2733 | struct socket *so = inp->inp_socket; | |
2734 | int before, after; | |
2735 | ||
2736 | VERIFY(so != NULL); | |
2737 | VERIFY(inp->inp_state != INPCB_STATE_DEAD); | |
2738 | ||
2739 | before = (inp->inp_flags & INP_NO_IFT_CELLULAR); | |
2740 | if (set) { | |
2741 | inp_set_nocellular(inp); | |
2742 | } else { | |
2743 | inp_clear_nocellular(inp); | |
2744 | } | |
2745 | after = (inp->inp_flags & INP_NO_IFT_CELLULAR); | |
2746 | if (net_io_policy_log && (before != after)) { | |
2747 | static const char *ok = "OK"; | |
2748 | static const char *nok = "NOACCESS"; | |
2749 | uuid_string_t euuid_buf; | |
2750 | pid_t epid; | |
2751 | ||
2752 | if (so->so_flags & SOF_DELEGATED) { | |
2753 | uuid_unparse(so->e_uuid, euuid_buf); | |
2754 | epid = so->e_pid; | |
2755 | } else { | |
2756 | uuid_unparse(so->last_uuid, euuid_buf); | |
2757 | epid = so->last_pid; | |
2758 | } | |
2759 | ||
2760 | /* allow this socket to generate another notification event */ | |
2761 | so->so_ifdenied_notifies = 0; | |
2762 | ||
2763 | log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d " | |
2764 | "euuid %s%s %s->%s\n", __func__, | |
2765 | (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so), | |
2766 | SOCK_TYPE(so), epid, euuid_buf, | |
2767 | (so->so_flags & SOF_DELEGATED) ? | |
2768 | " [delegated]" : "", | |
2769 | ((before < after) ? ok : nok), | |
2770 | ((before < after) ? nok : ok)); | |
2771 | } | |
2772 | } | |
2773 | ||
2774 | #if FLOW_DIVERT | |
2775 | static void | |
2776 | inp_update_flow_divert_policy(struct inpcb *inp, boolean_t set) | |
2777 | { | |
2778 | struct socket *so = inp->inp_socket; | |
2779 | int before, after; | |
2780 | ||
2781 | VERIFY(so != NULL); | |
2782 | VERIFY(inp->inp_state != INPCB_STATE_DEAD); | |
2783 | ||
2784 | if (set && !(inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) { | |
2785 | set = !flow_divert_is_dns_service(so); | |
2786 | } | |
2787 | ||
2788 | before = (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT); | |
2789 | if (set) { | |
2790 | inp_set_flow_divert(inp); | |
2791 | } else { | |
2792 | inp_clear_flow_divert(inp); | |
2793 | } | |
2794 | after = (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT); | |
2795 | if (net_io_policy_log && (before != after)) { | |
2796 | static const char *wanted = "WANTED"; | |
2797 | static const char *unwanted = "UNWANTED"; | |
2798 | uuid_string_t euuid_buf; | |
2799 | pid_t epid; | |
2800 | ||
2801 | if (so->so_flags & SOF_DELEGATED) { | |
2802 | uuid_unparse(so->e_uuid, euuid_buf); | |
2803 | epid = so->e_pid; | |
2804 | } else { | |
2805 | uuid_unparse(so->last_uuid, euuid_buf); | |
2806 | epid = so->last_pid; | |
2807 | } | |
2808 | ||
2809 | log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d " | |
2810 | "euuid %s%s %s->%s\n", __func__, | |
2811 | (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so), | |
2812 | SOCK_TYPE(so), epid, euuid_buf, | |
2813 | (so->so_flags & SOF_DELEGATED) ? | |
2814 | " [delegated]" : "", | |
2815 | ((before < after) ? unwanted : wanted), | |
2816 | ((before < after) ? wanted : unwanted)); | |
2817 | } | |
2818 | } | |
2819 | #endif /* FLOW_DIVERT */ | |
2820 | #endif /* !CONFIG_PROC_UUID_POLICY */ | |
2821 | ||
2822 | int | |
2823 | inp_update_policy(struct inpcb *inp) | |
2824 | { | |
2825 | #if CONFIG_PROC_UUID_POLICY | |
2826 | struct socket *so = inp->inp_socket; | |
2827 | uint32_t pflags = 0; | |
2828 | int32_t ogencnt; | |
2829 | int err = 0; | |
2830 | ||
2831 | if (!net_io_policy_uuid || | |
2832 | so == NULL || inp->inp_state == INPCB_STATE_DEAD) | |
2833 | return (0); | |
2834 | ||
2835 | /* | |
2836 | * Kernel-created sockets that aren't delegating other sockets | |
2837 | * are currently exempted from UUID policy checks. | |
2838 | */ | |
2839 | if (so->last_pid == 0 && !(so->so_flags & SOF_DELEGATED)) | |
2840 | return (0); | |
2841 | ||
2842 | ogencnt = so->so_policy_gencnt; | |
2843 | err = proc_uuid_policy_lookup(((so->so_flags & SOF_DELEGATED) ? | |
2844 | so->e_uuid : so->last_uuid), &pflags, &so->so_policy_gencnt); | |
2845 | ||
2846 | /* | |
2847 | * Discard cached generation count if the entry is gone (ENOENT), | |
2848 | * so that we go thru the checks below. | |
2849 | */ | |
2850 | if (err == ENOENT && ogencnt != 0) | |
2851 | so->so_policy_gencnt = 0; | |
2852 | ||
2853 | /* | |
2854 | * If the generation count has changed, inspect the policy flags | |
2855 | * and act accordingly. If a policy flag was previously set and | |
2856 | * the UUID is no longer present in the table (ENOENT), treat it | |
2857 | * as if the flag has been cleared. | |
2858 | */ | |
2859 | if ((err == 0 || err == ENOENT) && ogencnt != so->so_policy_gencnt) { | |
2860 | /* update cellular policy for this socket */ | |
2861 | if (err == 0 && (pflags & PROC_UUID_NO_CELLULAR)) { | |
2862 | inp_update_cellular_policy(inp, TRUE); | |
2863 | } else if (!(pflags & PROC_UUID_NO_CELLULAR)) { | |
2864 | inp_update_cellular_policy(inp, FALSE); | |
2865 | } | |
2866 | #if FLOW_DIVERT | |
2867 | /* update flow divert policy for this socket */ | |
2868 | if (err == 0 && (pflags & PROC_UUID_FLOW_DIVERT)) { | |
2869 | inp_update_flow_divert_policy(inp, TRUE); | |
2870 | } else if (!(pflags & PROC_UUID_FLOW_DIVERT)) { | |
2871 | inp_update_flow_divert_policy(inp, FALSE); | |
2872 | } | |
2873 | #endif /* FLOW_DIVERT */ | |
2874 | } | |
2875 | ||
2876 | return ((err == ENOENT) ? 0 : err); | |
2877 | #else /* !CONFIG_PROC_UUID_POLICY */ | |
2878 | #pragma unused(inp) | |
2879 | return (0); | |
2880 | #endif /* !CONFIG_PROC_UUID_POLICY */ | |
2881 | } | |
2882 | ||
2883 | boolean_t | |
2884 | inp_restricted(struct inpcb *inp, struct ifnet *ifp) | |
2885 | { | |
2886 | VERIFY(inp != NULL); | |
2887 | ||
2888 | if (!sorestrictrecv) | |
2889 | return (FALSE); | |
2890 | ||
2891 | if (ifp == NULL || !(ifp->if_eflags & IFEF_RESTRICTED_RECV)) | |
2892 | return (FALSE); | |
2893 | ||
2894 | if (inp->inp_flags & INP_RECV_ANYIF) | |
2895 | return (FALSE); | |
2896 | ||
2897 | if ((inp->inp_flags & INP_BOUND_IF) && inp->inp_boundifp == ifp) | |
2898 | return (FALSE); | |
2899 | ||
2900 | return (TRUE); | |
2901 | } |