]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_socket.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / bsd / kern / uipc_socket.c
1 /*
2 * Copyright (c) 1998-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1982, 1986, 1988, 1990, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. All advertising materials mentioning features or use of this software
42 * must display the following acknowledgement:
43 * This product includes software developed by the University of
44 * California, Berkeley and its contributors.
45 * 4. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
62 */
63 /*
64 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
65 * support for mandatory and extensible security protections. This notice
66 * is included in support of clause 2.2 (b) of the Apple Public License,
67 * Version 2.0.
68 */
69
70 #include <sys/param.h>
71 #include <sys/systm.h>
72 #include <sys/filedesc.h>
73 #include <sys/proc.h>
74 #include <sys/proc_internal.h>
75 #include <sys/kauth.h>
76 #include <sys/file_internal.h>
77 #include <sys/fcntl.h>
78 #include <sys/malloc.h>
79 #include <sys/mbuf.h>
80 #include <sys/domain.h>
81 #include <sys/kernel.h>
82 #include <sys/event.h>
83 #include <sys/poll.h>
84 #include <sys/protosw.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/resourcevar.h>
88 #include <sys/signalvar.h>
89 #include <sys/sysctl.h>
90 #include <sys/syslog.h>
91 #include <sys/uio.h>
92 #include <sys/uio_internal.h>
93 #include <sys/ev.h>
94 #include <sys/kdebug.h>
95 #include <sys/un.h>
96 #include <sys/user.h>
97 #include <sys/priv.h>
98 #include <sys/kern_event.h>
99 #include <net/route.h>
100 #include <net/init.h>
101 #include <net/net_api_stats.h>
102 #include <net/ntstat.h>
103 #include <net/content_filter.h>
104 #include <netinet/in.h>
105 #include <netinet/in_pcb.h>
106 #include <netinet/in_tclass.h>
107 #include <netinet/in_var.h>
108 #include <netinet/tcp_var.h>
109 #include <netinet/ip6.h>
110 #include <netinet6/ip6_var.h>
111 #include <netinet/flow_divert.h>
112 #include <kern/zalloc.h>
113 #include <kern/locks.h>
114 #include <machine/limits.h>
115 #include <libkern/OSAtomic.h>
116 #include <pexpert/pexpert.h>
117 #include <kern/assert.h>
118 #include <kern/task.h>
119 #include <kern/policy_internal.h>
120
121 #include <sys/kpi_mbuf.h>
122 #include <sys/mcache.h>
123 #include <sys/unpcb.h>
124 #include <libkern/section_keywords.h>
125
126 #if CONFIG_MACF
127 #include <security/mac_framework.h>
128 #endif /* MAC */
129
130 #if MULTIPATH
131 #include <netinet/mp_pcb.h>
132 #include <netinet/mptcp_var.h>
133 #endif /* MULTIPATH */
134
135 #define ROUNDUP(a, b) (((a) + ((b) - 1)) & (~((b) - 1)))
136
137 #if DEBUG || DEVELOPMENT
138 #define DEBUG_KERNEL_ADDRPERM(_v) (_v)
139 #else
140 #define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
141 #endif
142
143 /* TODO: this should be in a header file somewhere */
144 extern char *proc_name_address(void *p);
145
146 static u_int32_t so_cache_hw; /* High water mark for socache */
147 static u_int32_t so_cache_timeouts; /* number of timeouts */
148 static u_int32_t so_cache_max_freed; /* max freed per timeout */
149 static u_int32_t cached_sock_count = 0;
150 STAILQ_HEAD(, socket) so_cache_head;
151 int max_cached_sock_count = MAX_CACHED_SOCKETS;
152 static u_int32_t so_cache_time;
153 static int socketinit_done;
154 static struct zone *so_cache_zone;
155
156 static LCK_GRP_DECLARE(so_cache_mtx_grp, "so_cache");
157 static LCK_MTX_DECLARE(so_cache_mtx, &so_cache_mtx_grp);
158
159 #include <machine/limits.h>
160
161 static int filt_sorattach(struct knote *kn, struct kevent_qos_s *kev);
162 static void filt_sordetach(struct knote *kn);
163 static int filt_soread(struct knote *kn, long hint);
164 static int filt_sortouch(struct knote *kn, struct kevent_qos_s *kev);
165 static int filt_sorprocess(struct knote *kn, struct kevent_qos_s *kev);
166
167 static int filt_sowattach(struct knote *kn, struct kevent_qos_s *kev);
168 static void filt_sowdetach(struct knote *kn);
169 static int filt_sowrite(struct knote *kn, long hint);
170 static int filt_sowtouch(struct knote *kn, struct kevent_qos_s *kev);
171 static int filt_sowprocess(struct knote *kn, struct kevent_qos_s *kev);
172
173 static int filt_sockattach(struct knote *kn, struct kevent_qos_s *kev);
174 static void filt_sockdetach(struct knote *kn);
175 static int filt_sockev(struct knote *kn, long hint);
176 static int filt_socktouch(struct knote *kn, struct kevent_qos_s *kev);
177 static int filt_sockprocess(struct knote *kn, struct kevent_qos_s *kev);
178
179 static int sooptcopyin_timeval(struct sockopt *, struct timeval *);
180 static int sooptcopyout_timeval(struct sockopt *, const struct timeval *);
181
182 SECURITY_READ_ONLY_EARLY(struct filterops) soread_filtops = {
183 .f_isfd = 1,
184 .f_attach = filt_sorattach,
185 .f_detach = filt_sordetach,
186 .f_event = filt_soread,
187 .f_touch = filt_sortouch,
188 .f_process = filt_sorprocess,
189 };
190
191 SECURITY_READ_ONLY_EARLY(struct filterops) sowrite_filtops = {
192 .f_isfd = 1,
193 .f_attach = filt_sowattach,
194 .f_detach = filt_sowdetach,
195 .f_event = filt_sowrite,
196 .f_touch = filt_sowtouch,
197 .f_process = filt_sowprocess,
198 };
199
200 SECURITY_READ_ONLY_EARLY(struct filterops) sock_filtops = {
201 .f_isfd = 1,
202 .f_attach = filt_sockattach,
203 .f_detach = filt_sockdetach,
204 .f_event = filt_sockev,
205 .f_touch = filt_socktouch,
206 .f_process = filt_sockprocess,
207 };
208
209 SECURITY_READ_ONLY_EARLY(struct filterops) soexcept_filtops = {
210 .f_isfd = 1,
211 .f_attach = filt_sorattach,
212 .f_detach = filt_sordetach,
213 .f_event = filt_soread,
214 .f_touch = filt_sortouch,
215 .f_process = filt_sorprocess,
216 };
217
218 SYSCTL_DECL(_kern_ipc);
219
220 #define EVEN_MORE_LOCKING_DEBUG 0
221
222 int socket_debug = 0;
223 SYSCTL_INT(_kern_ipc, OID_AUTO, socket_debug,
224 CTLFLAG_RW | CTLFLAG_LOCKED, &socket_debug, 0, "");
225
226 static unsigned long sodefunct_calls = 0;
227 SYSCTL_LONG(_kern_ipc, OID_AUTO, sodefunct_calls, CTLFLAG_LOCKED,
228 &sodefunct_calls, "");
229
230 ZONE_DECLARE(socket_zone, "socket", sizeof(struct socket), ZC_ZFREE_CLEARMEM);
231 so_gen_t so_gencnt; /* generation count for sockets */
232
233 MALLOC_DEFINE(M_SONAME, "soname", "socket name");
234 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
235
236 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
237 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
238 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
239 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
240 #define DBG_FNC_SOSEND NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1)
241 #define DBG_FNC_SOSEND_LIST NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 3)
242 #define DBG_FNC_SORECEIVE NETDBG_CODE(DBG_NETSOCK, (8 << 8))
243 #define DBG_FNC_SORECEIVE_LIST NETDBG_CODE(DBG_NETSOCK, (8 << 8) | 3)
244 #define DBG_FNC_SOSHUTDOWN NETDBG_CODE(DBG_NETSOCK, (9 << 8))
245
246 #define MAX_SOOPTGETM_SIZE (128 * MCLBYTES)
247
248 int somaxconn = SOMAXCONN;
249 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn,
250 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxconn, 0, "");
251
252 /* Should we get a maximum also ??? */
253 static int sosendmaxchain = 65536;
254 static int sosendminchain = 16384;
255 static int sorecvmincopy = 16384;
256 SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain,
257 CTLFLAG_RW | CTLFLAG_LOCKED, &sosendminchain, 0, "");
258 SYSCTL_INT(_kern_ipc, OID_AUTO, sorecvmincopy,
259 CTLFLAG_RW | CTLFLAG_LOCKED, &sorecvmincopy, 0, "");
260
261 /*
262 * Set to enable jumbo clusters (if available) for large writes when
263 * the socket is marked with SOF_MULTIPAGES; see below.
264 */
265 int sosendjcl = 1;
266 SYSCTL_INT(_kern_ipc, OID_AUTO, sosendjcl,
267 CTLFLAG_RW | CTLFLAG_LOCKED, &sosendjcl, 0, "");
268
269 /*
270 * Set this to ignore SOF_MULTIPAGES and use jumbo clusters for large
271 * writes on the socket for all protocols on any network interfaces,
272 * depending upon sosendjcl above. Be extra careful when setting this
273 * to 1, because sending down packets that cross physical pages down to
274 * broken drivers (those that falsely assume that the physical pages
275 * are contiguous) might lead to system panics or silent data corruption.
276 * When set to 0, the system will respect SOF_MULTIPAGES, which is set
277 * only for TCP sockets whose outgoing interface is IFNET_MULTIPAGES
278 * capable. Set this to 1 only for testing/debugging purposes.
279 */
280 int sosendjcl_ignore_capab = 0;
281 SYSCTL_INT(_kern_ipc, OID_AUTO, sosendjcl_ignore_capab,
282 CTLFLAG_RW | CTLFLAG_LOCKED, &sosendjcl_ignore_capab, 0, "");
283
284 /*
285 * Set this to ignore SOF1_IF_2KCL and use big clusters for large
286 * writes on the socket for all protocols on any network interfaces.
287 * Be extra careful when setting this to 1, because sending down packets with
288 * clusters larger that 2 KB might lead to system panics or data corruption.
289 * When set to 0, the system will respect SOF1_IF_2KCL, which is set
290 * on the outgoing interface
291 * Set this to 1 for testing/debugging purposes only.
292 */
293 int sosendbigcl_ignore_capab = 0;
294 SYSCTL_INT(_kern_ipc, OID_AUTO, sosendbigcl_ignore_capab,
295 CTLFLAG_RW | CTLFLAG_LOCKED, &sosendbigcl_ignore_capab, 0, "");
296
297 int sodefunctlog = 0;
298 SYSCTL_INT(_kern_ipc, OID_AUTO, sodefunctlog, CTLFLAG_RW | CTLFLAG_LOCKED,
299 &sodefunctlog, 0, "");
300
301 int sothrottlelog = 0;
302 SYSCTL_INT(_kern_ipc, OID_AUTO, sothrottlelog, CTLFLAG_RW | CTLFLAG_LOCKED,
303 &sothrottlelog, 0, "");
304
305 int sorestrictrecv = 1;
306 SYSCTL_INT(_kern_ipc, OID_AUTO, sorestrictrecv, CTLFLAG_RW | CTLFLAG_LOCKED,
307 &sorestrictrecv, 0, "Enable inbound interface restrictions");
308
309 int sorestrictsend = 1;
310 SYSCTL_INT(_kern_ipc, OID_AUTO, sorestrictsend, CTLFLAG_RW | CTLFLAG_LOCKED,
311 &sorestrictsend, 0, "Enable outbound interface restrictions");
312
313 int soreserveheadroom = 1;
314 SYSCTL_INT(_kern_ipc, OID_AUTO, soreserveheadroom, CTLFLAG_RW | CTLFLAG_LOCKED,
315 &soreserveheadroom, 0, "To allocate contiguous datagram buffers");
316
317 #if (DEBUG || DEVELOPMENT)
318 int so_notsent_lowat_check = 1;
319 SYSCTL_INT(_kern_ipc, OID_AUTO, notsent_lowat, CTLFLAG_RW | CTLFLAG_LOCKED,
320 &so_notsent_lowat_check, 0, "enable/disable notsnet lowat check");
321 #endif /* DEBUG || DEVELOPMENT */
322
323 int so_accept_list_waits = 0;
324 #if (DEBUG || DEVELOPMENT)
325 SYSCTL_INT(_kern_ipc, OID_AUTO, accept_list_waits, CTLFLAG_RW | CTLFLAG_LOCKED,
326 &so_accept_list_waits, 0, "number of waits for listener incomp list");
327 #endif /* DEBUG || DEVELOPMENT */
328
329 extern struct inpcbinfo tcbinfo;
330
331 /* TODO: these should be in header file */
332 extern int get_inpcb_str_size(void);
333 extern int get_tcp_str_size(void);
334
335 vm_size_t so_cache_zone_element_size;
336
337 static int sodelayed_copy(struct socket *, struct uio *, struct mbuf **,
338 user_ssize_t *);
339 static void cached_sock_alloc(struct socket **, zalloc_flags_t);
340 static void cached_sock_free(struct socket *);
341
342 /*
343 * Maximum of extended background idle sockets per process
344 * Set to zero to disable further setting of the option
345 */
346
347 #define SO_IDLE_BK_IDLE_MAX_PER_PROC 1
348 #define SO_IDLE_BK_IDLE_TIME 600
349 #define SO_IDLE_BK_IDLE_RCV_HIWAT 131072
350
351 struct soextbkidlestat soextbkidlestat;
352
353 SYSCTL_UINT(_kern_ipc, OID_AUTO, maxextbkidleperproc,
354 CTLFLAG_RW | CTLFLAG_LOCKED, &soextbkidlestat.so_xbkidle_maxperproc, 0,
355 "Maximum of extended background idle sockets per process");
356
357 SYSCTL_UINT(_kern_ipc, OID_AUTO, extbkidletime, CTLFLAG_RW | CTLFLAG_LOCKED,
358 &soextbkidlestat.so_xbkidle_time, 0,
359 "Time in seconds to keep extended background idle sockets");
360
361 SYSCTL_UINT(_kern_ipc, OID_AUTO, extbkidlercvhiwat, CTLFLAG_RW | CTLFLAG_LOCKED,
362 &soextbkidlestat.so_xbkidle_rcvhiwat, 0,
363 "High water mark for extended background idle sockets");
364
365 SYSCTL_STRUCT(_kern_ipc, OID_AUTO, extbkidlestat, CTLFLAG_RD | CTLFLAG_LOCKED,
366 &soextbkidlestat, soextbkidlestat, "");
367
368 int so_set_extended_bk_idle(struct socket *, int);
369
370
371 /*
372 * SOTCDB_NO_DSCP is set by default, to prevent the networking stack from
373 * setting the DSCP code on the packet based on the service class; see
374 * <rdar://problem/11277343> for details.
375 */
376 __private_extern__ u_int32_t sotcdb = 0;
377 SYSCTL_INT(_kern_ipc, OID_AUTO, sotcdb, CTLFLAG_RW | CTLFLAG_LOCKED,
378 &sotcdb, 0, "");
379
380 void
381 socketinit(void)
382 {
383 _CASSERT(sizeof(so_gencnt) == sizeof(uint64_t));
384 VERIFY(IS_P2ALIGNED(&so_gencnt, sizeof(uint32_t)));
385
386 #ifdef __LP64__
387 _CASSERT(sizeof(struct sa_endpoints) == sizeof(struct user64_sa_endpoints));
388 _CASSERT(offsetof(struct sa_endpoints, sae_srcif) == offsetof(struct user64_sa_endpoints, sae_srcif));
389 _CASSERT(offsetof(struct sa_endpoints, sae_srcaddr) == offsetof(struct user64_sa_endpoints, sae_srcaddr));
390 _CASSERT(offsetof(struct sa_endpoints, sae_srcaddrlen) == offsetof(struct user64_sa_endpoints, sae_srcaddrlen));
391 _CASSERT(offsetof(struct sa_endpoints, sae_dstaddr) == offsetof(struct user64_sa_endpoints, sae_dstaddr));
392 _CASSERT(offsetof(struct sa_endpoints, sae_dstaddrlen) == offsetof(struct user64_sa_endpoints, sae_dstaddrlen));
393 #else
394 _CASSERT(sizeof(struct sa_endpoints) == sizeof(struct user32_sa_endpoints));
395 _CASSERT(offsetof(struct sa_endpoints, sae_srcif) == offsetof(struct user32_sa_endpoints, sae_srcif));
396 _CASSERT(offsetof(struct sa_endpoints, sae_srcaddr) == offsetof(struct user32_sa_endpoints, sae_srcaddr));
397 _CASSERT(offsetof(struct sa_endpoints, sae_srcaddrlen) == offsetof(struct user32_sa_endpoints, sae_srcaddrlen));
398 _CASSERT(offsetof(struct sa_endpoints, sae_dstaddr) == offsetof(struct user32_sa_endpoints, sae_dstaddr));
399 _CASSERT(offsetof(struct sa_endpoints, sae_dstaddrlen) == offsetof(struct user32_sa_endpoints, sae_dstaddrlen));
400 #endif
401
402 if (socketinit_done) {
403 printf("socketinit: already called...\n");
404 return;
405 }
406 socketinit_done = 1;
407
408 PE_parse_boot_argn("socket_debug", &socket_debug,
409 sizeof(socket_debug));
410
411 STAILQ_INIT(&so_cache_head);
412
413 so_cache_zone_element_size = (vm_size_t)(sizeof(struct socket) + 4
414 + get_inpcb_str_size() + 4 + get_tcp_str_size());
415
416 so_cache_zone = zone_create("socache zone", so_cache_zone_element_size,
417 ZC_ZFREE_CLEARMEM | ZC_NOENCRYPT);
418
419 bzero(&soextbkidlestat, sizeof(struct soextbkidlestat));
420 soextbkidlestat.so_xbkidle_maxperproc = SO_IDLE_BK_IDLE_MAX_PER_PROC;
421 soextbkidlestat.so_xbkidle_time = SO_IDLE_BK_IDLE_TIME;
422 soextbkidlestat.so_xbkidle_rcvhiwat = SO_IDLE_BK_IDLE_RCV_HIWAT;
423
424 in_pcbinit();
425 socket_tclass_init();
426 #if MULTIPATH
427 mp_pcbinit();
428 #endif /* MULTIPATH */
429 }
430
431 static void
432 cached_sock_alloc(struct socket **so, zalloc_flags_t how)
433 {
434 caddr_t temp;
435 uintptr_t offset;
436
437 lck_mtx_lock(&so_cache_mtx);
438
439 if (!STAILQ_EMPTY(&so_cache_head)) {
440 VERIFY(cached_sock_count > 0);
441
442 *so = STAILQ_FIRST(&so_cache_head);
443 STAILQ_REMOVE_HEAD(&so_cache_head, so_cache_ent);
444 STAILQ_NEXT((*so), so_cache_ent) = NULL;
445
446 cached_sock_count--;
447 lck_mtx_unlock(&so_cache_mtx);
448
449 temp = (*so)->so_saved_pcb;
450 bzero((caddr_t)*so, sizeof(struct socket));
451
452 (*so)->so_saved_pcb = temp;
453 } else {
454 lck_mtx_unlock(&so_cache_mtx);
455
456 *so = zalloc_flags(so_cache_zone, how | Z_ZERO);
457
458 /*
459 * Define offsets for extra structures into our
460 * single block of memory. Align extra structures
461 * on longword boundaries.
462 */
463
464 offset = (uintptr_t)*so;
465 offset += sizeof(struct socket);
466
467 offset = ALIGN(offset);
468
469 (*so)->so_saved_pcb = (caddr_t)offset;
470 offset += get_inpcb_str_size();
471
472 offset = ALIGN(offset);
473
474 ((struct inpcb *)(void *)(*so)->so_saved_pcb)->inp_saved_ppcb =
475 (caddr_t)offset;
476 }
477
478 OSBitOrAtomic(SOF1_CACHED_IN_SOCK_LAYER, &(*so)->so_flags1);
479 }
480
481 static void
482 cached_sock_free(struct socket *so)
483 {
484 lck_mtx_lock(&so_cache_mtx);
485
486 so_cache_time = net_uptime();
487 if (++cached_sock_count > max_cached_sock_count) {
488 --cached_sock_count;
489 lck_mtx_unlock(&so_cache_mtx);
490 zfree(so_cache_zone, so);
491 } else {
492 if (so_cache_hw < cached_sock_count) {
493 so_cache_hw = cached_sock_count;
494 }
495
496 STAILQ_INSERT_TAIL(&so_cache_head, so, so_cache_ent);
497
498 so->cache_timestamp = so_cache_time;
499 lck_mtx_unlock(&so_cache_mtx);
500 }
501 }
502
503 void
504 so_update_last_owner_locked(struct socket *so, proc_t self)
505 {
506 if (so->last_pid != 0) {
507 /*
508 * last_pid and last_upid should remain zero for sockets
509 * created using sock_socket. The check above achieves that
510 */
511 if (self == PROC_NULL) {
512 self = current_proc();
513 }
514
515 if (so->last_upid != proc_uniqueid(self) ||
516 so->last_pid != proc_pid(self)) {
517 so->last_upid = proc_uniqueid(self);
518 so->last_pid = proc_pid(self);
519 proc_getexecutableuuid(self, so->last_uuid,
520 sizeof(so->last_uuid));
521 if (so->so_proto != NULL && so->so_proto->pr_update_last_owner != NULL) {
522 (*so->so_proto->pr_update_last_owner)(so, self, NULL);
523 }
524 }
525 proc_pidoriginatoruuid(so->so_vuuid, sizeof(so->so_vuuid));
526 }
527 }
528
529 void
530 so_update_policy(struct socket *so)
531 {
532 if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
533 (void) inp_update_policy(sotoinpcb(so));
534 }
535 }
536
537 #if NECP
538 static void
539 so_update_necp_policy(struct socket *so, struct sockaddr *override_local_addr,
540 struct sockaddr *override_remote_addr)
541 {
542 if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
543 inp_update_necp_policy(sotoinpcb(so), override_local_addr,
544 override_remote_addr, 0);
545 }
546 }
547 #endif /* NECP */
548
549 boolean_t
550 so_cache_timer(void)
551 {
552 struct socket *p;
553 int n_freed = 0;
554 boolean_t rc = FALSE;
555
556 lck_mtx_lock(&so_cache_mtx);
557 so_cache_timeouts++;
558 so_cache_time = net_uptime();
559
560 while (!STAILQ_EMPTY(&so_cache_head)) {
561 VERIFY(cached_sock_count > 0);
562 p = STAILQ_FIRST(&so_cache_head);
563 if ((so_cache_time - p->cache_timestamp) <
564 SO_CACHE_TIME_LIMIT) {
565 break;
566 }
567
568 STAILQ_REMOVE_HEAD(&so_cache_head, so_cache_ent);
569 --cached_sock_count;
570
571 zfree(so_cache_zone, p);
572
573 if (++n_freed >= SO_CACHE_MAX_FREE_BATCH) {
574 so_cache_max_freed++;
575 break;
576 }
577 }
578
579 /* Schedule again if there is more to cleanup */
580 if (!STAILQ_EMPTY(&so_cache_head)) {
581 rc = TRUE;
582 }
583
584 lck_mtx_unlock(&so_cache_mtx);
585 return rc;
586 }
587
588 /*
589 * Get a socket structure from our zone, and initialize it.
590 * We don't implement `waitok' yet (see comments in uipc_domain.c).
591 * Note that it would probably be better to allocate socket
592 * and PCB at the same time, but I'm not convinced that all
593 * the protocols can be easily modified to do this.
594 */
595 struct socket *
596 soalloc(int waitok, int dom, int type)
597 {
598 zalloc_flags_t how = waitok ? Z_WAITOK : Z_NOWAIT;
599 struct socket *so;
600
601 if ((dom == PF_INET) && (type == SOCK_STREAM)) {
602 cached_sock_alloc(&so, how);
603 } else {
604 so = zalloc_flags(socket_zone, how | Z_ZERO);
605 }
606 if (so != NULL) {
607 so->so_gencnt = OSIncrementAtomic64((SInt64 *)&so_gencnt);
608
609 /*
610 * Increment the socket allocation statistics
611 */
612 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_alloc_total);
613 }
614
615 return so;
616 }
617
618 int
619 socreate_internal(int dom, struct socket **aso, int type, int proto,
620 struct proc *p, uint32_t flags, struct proc *ep)
621 {
622 struct protosw *prp;
623 struct socket *so;
624 int error = 0;
625 #if defined(XNU_TARGET_OS_OSX)
626 pid_t rpid = -1;
627 #endif
628
629 #if TCPDEBUG
630 extern int tcpconsdebug;
631 #endif
632
633 VERIFY(aso != NULL);
634 *aso = NULL;
635
636 if (proto != 0) {
637 prp = pffindproto(dom, proto, type);
638 } else {
639 prp = pffindtype(dom, type);
640 }
641
642 if (prp == NULL || prp->pr_usrreqs->pru_attach == NULL) {
643 if (pffinddomain(dom) == NULL) {
644 return EAFNOSUPPORT;
645 }
646 if (proto != 0) {
647 if (pffindprotonotype(dom, proto) != NULL) {
648 return EPROTOTYPE;
649 }
650 }
651 return EPROTONOSUPPORT;
652 }
653 if (prp->pr_type != type) {
654 return EPROTOTYPE;
655 }
656 so = soalloc(1, dom, type);
657 if (so == NULL) {
658 return ENOBUFS;
659 }
660
661 switch (dom) {
662 case PF_LOCAL:
663 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_local_total);
664 break;
665 case PF_INET:
666 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_inet_total);
667 if (type == SOCK_STREAM) {
668 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_inet_stream_total);
669 } else {
670 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_inet_dgram_total);
671 }
672 break;
673 case PF_ROUTE:
674 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_route_total);
675 break;
676 case PF_NDRV:
677 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_ndrv_total);
678 break;
679 case PF_KEY:
680 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_key_total);
681 break;
682 case PF_INET6:
683 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_inet6_total);
684 if (type == SOCK_STREAM) {
685 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_inet6_stream_total);
686 } else {
687 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_inet6_dgram_total);
688 }
689 break;
690 case PF_SYSTEM:
691 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_system_total);
692 break;
693 case PF_MULTIPATH:
694 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_multipath_total);
695 break;
696 default:
697 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_other_total);
698 break;
699 }
700
701 if (flags & SOCF_MPTCP) {
702 so->so_state |= SS_NBIO;
703 }
704
705 TAILQ_INIT(&so->so_incomp);
706 TAILQ_INIT(&so->so_comp);
707 so->so_type = type;
708 so->last_upid = proc_uniqueid(p);
709 so->last_pid = proc_pid(p);
710 proc_getexecutableuuid(p, so->last_uuid, sizeof(so->last_uuid));
711 proc_pidoriginatoruuid(so->so_vuuid, sizeof(so->so_vuuid));
712
713 if (ep != PROC_NULL && ep != p) {
714 so->e_upid = proc_uniqueid(ep);
715 so->e_pid = proc_pid(ep);
716 proc_getexecutableuuid(ep, so->e_uuid, sizeof(so->e_uuid));
717 so->so_flags |= SOF_DELEGATED;
718 #if defined(XNU_TARGET_OS_OSX)
719 if (ep->p_responsible_pid != so->e_pid) {
720 rpid = ep->p_responsible_pid;
721 }
722 #endif
723 }
724
725 #if defined(XNU_TARGET_OS_OSX)
726 if (rpid < 0 && p->p_responsible_pid != so->last_pid) {
727 rpid = p->p_responsible_pid;
728 }
729
730 so->so_rpid = -1;
731 uuid_clear(so->so_ruuid);
732 if (rpid >= 0) {
733 proc_t rp = proc_find(rpid);
734 if (rp != PROC_NULL) {
735 proc_getexecutableuuid(rp, so->so_ruuid, sizeof(so->so_ruuid));
736 so->so_rpid = rpid;
737 proc_rele(rp);
738 }
739 }
740 #endif
741
742 so->so_cred = kauth_cred_proc_ref(p);
743 if (!suser(kauth_cred_get(), NULL)) {
744 so->so_state |= SS_PRIV;
745 }
746
747 so->so_proto = prp;
748 so->so_rcv.sb_flags |= SB_RECV;
749 so->so_rcv.sb_so = so->so_snd.sb_so = so;
750 so->next_lock_lr = 0;
751 so->next_unlock_lr = 0;
752
753 /*
754 * Attachment will create the per pcb lock if necessary and
755 * increase refcount for creation, make sure it's done before
756 * socket is inserted in lists.
757 */
758 so->so_usecount++;
759
760 error = (*prp->pr_usrreqs->pru_attach)(so, proto, p);
761 if (error != 0) {
762 /*
763 * Warning:
764 * If so_pcb is not zero, the socket will be leaked,
765 * so protocol attachment handler must be coded carefuly
766 */
767 so->so_state |= SS_NOFDREF;
768 VERIFY(so->so_usecount > 0);
769 so->so_usecount--;
770 sofreelastref(so, 1); /* will deallocate the socket */
771 return error;
772 }
773
774 /*
775 * Note: needs so_pcb to be set after pru_attach
776 */
777 if (prp->pr_update_last_owner != NULL) {
778 (*prp->pr_update_last_owner)(so, p, ep);
779 }
780
781 atomic_add_32(&prp->pr_domain->dom_refs, 1);
782
783 /* Attach socket filters for this protocol */
784 sflt_initsock(so);
785 #if TCPDEBUG
786 if (tcpconsdebug == 2) {
787 so->so_options |= SO_DEBUG;
788 }
789 #endif
790 so_set_default_traffic_class(so);
791
792 /*
793 * If this thread or task is marked to create backgrounded sockets,
794 * mark the socket as background.
795 */
796 if (!(flags & SOCF_MPTCP) &&
797 proc_get_effective_thread_policy(current_thread(), TASK_POLICY_NEW_SOCKETS_BG)) {
798 socket_set_traffic_mgt_flags(so, TRAFFIC_MGT_SO_BACKGROUND);
799 so->so_background_thread = current_thread();
800 }
801
802 switch (dom) {
803 /*
804 * Don't mark Unix domain or system
805 * eligible for defunct by default.
806 */
807 case PF_LOCAL:
808 case PF_SYSTEM:
809 so->so_flags |= SOF_NODEFUNCT;
810 break;
811 default:
812 break;
813 }
814
815 /*
816 * Entitlements can't be checked at socket creation time except if the
817 * application requested a feature guarded by a privilege (c.f., socket
818 * delegation).
819 * The priv(9) and the Sandboxing APIs are designed with the idea that
820 * a privilege check should only be triggered by a userland request.
821 * A privilege check at socket creation time is time consuming and
822 * could trigger many authorisation error messages from the security
823 * APIs.
824 */
825
826 *aso = so;
827
828 return 0;
829 }
830
831 /*
832 * Returns: 0 Success
833 * EAFNOSUPPORT
834 * EPROTOTYPE
835 * EPROTONOSUPPORT
836 * ENOBUFS
837 * <pru_attach>:ENOBUFS[AF_UNIX]
838 * <pru_attach>:ENOBUFS[TCP]
839 * <pru_attach>:ENOMEM[TCP]
840 * <pru_attach>:??? [other protocol families, IPSEC]
841 */
842 int
843 socreate(int dom, struct socket **aso, int type, int proto)
844 {
845 return socreate_internal(dom, aso, type, proto, current_proc(), 0,
846 PROC_NULL);
847 }
848
849 int
850 socreate_delegate(int dom, struct socket **aso, int type, int proto, pid_t epid)
851 {
852 int error = 0;
853 struct proc *ep = PROC_NULL;
854
855 if ((proc_selfpid() != epid) && ((ep = proc_find(epid)) == PROC_NULL)) {
856 error = ESRCH;
857 goto done;
858 }
859
860 error = socreate_internal(dom, aso, type, proto, current_proc(), 0, ep);
861
862 /*
863 * It might not be wise to hold the proc reference when calling
864 * socreate_internal since it calls soalloc with M_WAITOK
865 */
866 done:
867 if (ep != PROC_NULL) {
868 proc_rele(ep);
869 }
870
871 return error;
872 }
873
874 /*
875 * Returns: 0 Success
876 * <pru_bind>:EINVAL Invalid argument [COMMON_START]
877 * <pru_bind>:EAFNOSUPPORT Address family not supported
878 * <pru_bind>:EADDRNOTAVAIL Address not available.
879 * <pru_bind>:EINVAL Invalid argument
880 * <pru_bind>:EAFNOSUPPORT Address family not supported [notdef]
881 * <pru_bind>:EACCES Permission denied
882 * <pru_bind>:EADDRINUSE Address in use
883 * <pru_bind>:EAGAIN Resource unavailable, try again
884 * <pru_bind>:EPERM Operation not permitted
885 * <pru_bind>:???
886 * <sf_bind>:???
887 *
888 * Notes: It's not possible to fully enumerate the return codes above,
889 * since socket filter authors and protocol family authors may
890 * not choose to limit their error returns to those listed, even
891 * though this may result in some software operating incorrectly.
892 *
893 * The error codes which are enumerated above are those known to
894 * be returned by the tcp_usr_bind function supplied.
895 */
896 int
897 sobindlock(struct socket *so, struct sockaddr *nam, int dolock)
898 {
899 struct proc *p = current_proc();
900 int error = 0;
901
902 if (dolock) {
903 socket_lock(so, 1);
904 }
905
906 so_update_last_owner_locked(so, p);
907 so_update_policy(so);
908
909 #if NECP
910 so_update_necp_policy(so, nam, NULL);
911 #endif /* NECP */
912
913 /*
914 * If this is a bind request on a socket that has been marked
915 * as inactive, reject it now before we go any further.
916 */
917 if (so->so_flags & SOF_DEFUNCT) {
918 error = EINVAL;
919 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] (%d)\n",
920 __func__, proc_pid(p), proc_best_name(p),
921 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
922 SOCK_DOM(so), SOCK_TYPE(so), error);
923 goto out;
924 }
925
926 /* Socket filter */
927 error = sflt_bind(so, nam);
928
929 if (error == 0) {
930 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
931 }
932 out:
933 if (dolock) {
934 socket_unlock(so, 1);
935 }
936
937 if (error == EJUSTRETURN) {
938 error = 0;
939 }
940
941 return error;
942 }
943
944 void
945 sodealloc(struct socket *so)
946 {
947 kauth_cred_unref(&so->so_cred);
948
949 /* Remove any filters */
950 sflt_termsock(so);
951
952 #if CONTENT_FILTER
953 cfil_sock_detach(so);
954 #endif /* CONTENT_FILTER */
955
956 so->so_gencnt = OSIncrementAtomic64((SInt64 *)&so_gencnt);
957
958 if (so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) {
959 cached_sock_free(so);
960 } else {
961 zfree(socket_zone, so);
962 }
963 }
964
965 /*
966 * Returns: 0 Success
967 * EINVAL
968 * EOPNOTSUPP
969 * <pru_listen>:EINVAL[AF_UNIX]
970 * <pru_listen>:EINVAL[TCP]
971 * <pru_listen>:EADDRNOTAVAIL[TCP] Address not available.
972 * <pru_listen>:EINVAL[TCP] Invalid argument
973 * <pru_listen>:EAFNOSUPPORT[TCP] Address family not supported [notdef]
974 * <pru_listen>:EACCES[TCP] Permission denied
975 * <pru_listen>:EADDRINUSE[TCP] Address in use
976 * <pru_listen>:EAGAIN[TCP] Resource unavailable, try again
977 * <pru_listen>:EPERM[TCP] Operation not permitted
978 * <sf_listen>:???
979 *
980 * Notes: Other <pru_listen> returns depend on the protocol family; all
981 * <sf_listen> returns depend on what the filter author causes
982 * their filter to return.
983 */
984 int
985 solisten(struct socket *so, int backlog)
986 {
987 struct proc *p = current_proc();
988 int error = 0;
989
990 socket_lock(so, 1);
991
992 so_update_last_owner_locked(so, p);
993 so_update_policy(so);
994
995 #if NECP
996 so_update_necp_policy(so, NULL, NULL);
997 #endif /* NECP */
998
999 if (so->so_proto == NULL) {
1000 error = EINVAL;
1001 goto out;
1002 }
1003 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
1004 error = EOPNOTSUPP;
1005 goto out;
1006 }
1007
1008 /*
1009 * If the listen request is made on a socket that is not fully
1010 * disconnected, or on a socket that has been marked as inactive,
1011 * reject the request now.
1012 */
1013 if ((so->so_state &
1014 (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) ||
1015 (so->so_flags & SOF_DEFUNCT)) {
1016 error = EINVAL;
1017 if (so->so_flags & SOF_DEFUNCT) {
1018 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] "
1019 "(%d)\n", __func__, proc_pid(p),
1020 proc_best_name(p),
1021 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
1022 SOCK_DOM(so), SOCK_TYPE(so), error);
1023 }
1024 goto out;
1025 }
1026
1027 if ((so->so_restrictions & SO_RESTRICT_DENY_IN) != 0) {
1028 error = EPERM;
1029 goto out;
1030 }
1031
1032 error = sflt_listen(so);
1033 if (error == 0) {
1034 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
1035 }
1036
1037 if (error) {
1038 if (error == EJUSTRETURN) {
1039 error = 0;
1040 }
1041 goto out;
1042 }
1043
1044 if (TAILQ_EMPTY(&so->so_comp)) {
1045 so->so_options |= SO_ACCEPTCONN;
1046 }
1047 /*
1048 * POSIX: The implementation may have an upper limit on the length of
1049 * the listen queue-either global or per accepting socket. If backlog
1050 * exceeds this limit, the length of the listen queue is set to the
1051 * limit.
1052 *
1053 * If listen() is called with a backlog argument value that is less
1054 * than 0, the function behaves as if it had been called with a backlog
1055 * argument value of 0.
1056 *
1057 * A backlog argument of 0 may allow the socket to accept connections,
1058 * in which case the length of the listen queue may be set to an
1059 * implementation-defined minimum value.
1060 */
1061 if (backlog <= 0 || backlog > somaxconn) {
1062 backlog = somaxconn;
1063 }
1064
1065 so->so_qlimit = backlog;
1066 out:
1067 socket_unlock(so, 1);
1068 return error;
1069 }
1070
1071 /*
1072 * The "accept list lock" protects the fields related to the listener queues
1073 * because we can unlock a socket to respect the lock ordering between
1074 * the listener socket and its clients sockets. The lock ordering is first to
1075 * acquire the client socket before the listener socket.
1076 *
1077 * The accept list lock serializes access to the following fields:
1078 * - of the listener socket:
1079 * - so_comp
1080 * - so_incomp
1081 * - so_qlen
1082 * - so_inqlen
1083 * - of client sockets that are in so_comp or so_incomp:
1084 * - so_head
1085 * - so_list
1086 *
1087 * As one can see the accept list lock protects the consistent of the
1088 * linkage of the client sockets.
1089 *
1090 * Note that those fields may be read without holding the accept list lock
1091 * for a preflight provided the accept list lock is taken when committing
1092 * to take an action based on the result of the preflight. The preflight
1093 * saves the cost of doing the unlock/lock dance.
1094 */
1095 void
1096 so_acquire_accept_list(struct socket *head, struct socket *so)
1097 {
1098 lck_mtx_t *mutex_held;
1099
1100 if (head->so_proto->pr_getlock == NULL) {
1101 return;
1102 }
1103 mutex_held = (*head->so_proto->pr_getlock)(head, PR_F_WILLUNLOCK);
1104 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1105
1106 if (!(head->so_flags1 & SOF1_ACCEPT_LIST_HELD)) {
1107 head->so_flags1 |= SOF1_ACCEPT_LIST_HELD;
1108 return;
1109 }
1110 if (so != NULL) {
1111 socket_unlock(so, 0);
1112 }
1113 while (head->so_flags1 & SOF1_ACCEPT_LIST_HELD) {
1114 so_accept_list_waits += 1;
1115 msleep((caddr_t)&head->so_incomp, mutex_held,
1116 PSOCK | PCATCH, __func__, NULL);
1117 }
1118 head->so_flags1 |= SOF1_ACCEPT_LIST_HELD;
1119 if (so != NULL) {
1120 socket_unlock(head, 0);
1121 socket_lock(so, 0);
1122 socket_lock(head, 0);
1123 }
1124 }
1125
1126 void
1127 so_release_accept_list(struct socket *head)
1128 {
1129 if (head->so_proto->pr_getlock != NULL) {
1130 lck_mtx_t *mutex_held;
1131
1132 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
1133 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1134
1135 head->so_flags1 &= ~SOF1_ACCEPT_LIST_HELD;
1136 wakeup((caddr_t)&head->so_incomp);
1137 }
1138 }
1139
1140 void
1141 sofreelastref(struct socket *so, int dealloc)
1142 {
1143 struct socket *head = so->so_head;
1144
1145 /* Assume socket is locked */
1146
1147 if (!(so->so_flags & SOF_PCBCLEARING) || !(so->so_state & SS_NOFDREF)) {
1148 selthreadclear(&so->so_snd.sb_sel);
1149 selthreadclear(&so->so_rcv.sb_sel);
1150 so->so_rcv.sb_flags &= ~(SB_SEL | SB_UPCALL);
1151 so->so_snd.sb_flags &= ~(SB_SEL | SB_UPCALL);
1152 so->so_event = sonullevent;
1153 return;
1154 }
1155 if (head != NULL) {
1156 /*
1157 * Need to lock the listener when the protocol has
1158 * per socket locks
1159 */
1160 if (head->so_proto->pr_getlock != NULL) {
1161 socket_lock(head, 1);
1162 so_acquire_accept_list(head, so);
1163 }
1164 if (so->so_state & SS_INCOMP) {
1165 so->so_state &= ~SS_INCOMP;
1166 TAILQ_REMOVE(&head->so_incomp, so, so_list);
1167 head->so_incqlen--;
1168 head->so_qlen--;
1169 so->so_head = NULL;
1170
1171 if (head->so_proto->pr_getlock != NULL) {
1172 so_release_accept_list(head);
1173 socket_unlock(head, 1);
1174 }
1175 } else if (so->so_state & SS_COMP) {
1176 if (head->so_proto->pr_getlock != NULL) {
1177 so_release_accept_list(head);
1178 socket_unlock(head, 1);
1179 }
1180 /*
1181 * We must not decommission a socket that's
1182 * on the accept(2) queue. If we do, then
1183 * accept(2) may hang after select(2) indicated
1184 * that the listening socket was ready.
1185 */
1186 selthreadclear(&so->so_snd.sb_sel);
1187 selthreadclear(&so->so_rcv.sb_sel);
1188 so->so_rcv.sb_flags &= ~(SB_SEL | SB_UPCALL);
1189 so->so_snd.sb_flags &= ~(SB_SEL | SB_UPCALL);
1190 so->so_event = sonullevent;
1191 return;
1192 } else {
1193 if (head->so_proto->pr_getlock != NULL) {
1194 so_release_accept_list(head);
1195 socket_unlock(head, 1);
1196 }
1197 printf("sofree: not queued\n");
1198 }
1199 }
1200 sowflush(so);
1201 sorflush(so);
1202
1203 #if FLOW_DIVERT
1204 if (so->so_flags & SOF_FLOW_DIVERT) {
1205 flow_divert_detach(so);
1206 }
1207 #endif /* FLOW_DIVERT */
1208
1209 /* 3932268: disable upcall */
1210 so->so_rcv.sb_flags &= ~SB_UPCALL;
1211 so->so_snd.sb_flags &= ~(SB_UPCALL | SB_SNDBYTE_CNT);
1212 so->so_event = sonullevent;
1213
1214 if (dealloc) {
1215 sodealloc(so);
1216 }
1217 }
1218
1219 void
1220 soclose_wait_locked(struct socket *so)
1221 {
1222 lck_mtx_t *mutex_held;
1223
1224 if (so->so_proto->pr_getlock != NULL) {
1225 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1226 } else {
1227 mutex_held = so->so_proto->pr_domain->dom_mtx;
1228 }
1229 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1230
1231 /*
1232 * Double check here and return if there's no outstanding upcall;
1233 * otherwise proceed further only if SOF_UPCALLCLOSEWAIT is set.
1234 */
1235 if (!so->so_upcallusecount || !(so->so_flags & SOF_UPCALLCLOSEWAIT)) {
1236 return;
1237 }
1238 so->so_rcv.sb_flags &= ~SB_UPCALL;
1239 so->so_snd.sb_flags &= ~SB_UPCALL;
1240 so->so_flags |= SOF_CLOSEWAIT;
1241
1242 (void) msleep((caddr_t)&so->so_upcallusecount, mutex_held, (PZERO - 1),
1243 "soclose_wait_locked", NULL);
1244 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1245 so->so_flags &= ~SOF_CLOSEWAIT;
1246 }
1247
1248 /*
1249 * Close a socket on last file table reference removal.
1250 * Initiate disconnect if connected.
1251 * Free socket when disconnect complete.
1252 */
1253 int
1254 soclose_locked(struct socket *so)
1255 {
1256 int error = 0;
1257 struct timespec ts;
1258
1259 if (so->so_usecount == 0) {
1260 panic("soclose: so=%p refcount=0\n", so);
1261 /* NOTREACHED */
1262 }
1263
1264 sflt_notify(so, sock_evt_closing, NULL);
1265
1266 if (so->so_upcallusecount) {
1267 soclose_wait_locked(so);
1268 }
1269
1270 #if CONTENT_FILTER
1271 /*
1272 * We have to wait until the content filters are done
1273 */
1274 if ((so->so_flags & SOF_CONTENT_FILTER) != 0) {
1275 cfil_sock_close_wait(so);
1276 cfil_sock_is_closed(so);
1277 cfil_sock_detach(so);
1278 }
1279 #endif /* CONTENT_FILTER */
1280
1281 if (so->so_flags1 & SOF1_EXTEND_BK_IDLE_INPROG) {
1282 soresume(current_proc(), so, 1);
1283 so->so_flags1 &= ~SOF1_EXTEND_BK_IDLE_WANTED;
1284 }
1285
1286 if ((so->so_options & SO_ACCEPTCONN)) {
1287 struct socket *sp, *sonext;
1288 int persocklock = 0;
1289 int incomp_overflow_only;
1290
1291 /*
1292 * We do not want new connection to be added
1293 * to the connection queues
1294 */
1295 so->so_options &= ~SO_ACCEPTCONN;
1296
1297 /*
1298 * We can drop the lock on the listener once
1299 * we've acquired the incoming list
1300 */
1301 if (so->so_proto->pr_getlock != NULL) {
1302 persocklock = 1;
1303 so_acquire_accept_list(so, NULL);
1304 socket_unlock(so, 0);
1305 }
1306 again:
1307 incomp_overflow_only = 1;
1308
1309 TAILQ_FOREACH_SAFE(sp, &so->so_incomp, so_list, sonext) {
1310 /*
1311 * Radar 5350314
1312 * skip sockets thrown away by tcpdropdropblreq
1313 * they will get cleanup by the garbage collection.
1314 * otherwise, remove the incomp socket from the queue
1315 * and let soabort trigger the appropriate cleanup.
1316 */
1317 if (sp->so_flags & SOF_OVERFLOW) {
1318 continue;
1319 }
1320
1321 if (persocklock != 0) {
1322 socket_lock(sp, 1);
1323 }
1324
1325 /*
1326 * Radar 27945981
1327 * The extra reference for the list insure the
1328 * validity of the socket pointer when we perform the
1329 * unlock of the head above
1330 */
1331 if (sp->so_state & SS_INCOMP) {
1332 sp->so_state &= ~SS_INCOMP;
1333 sp->so_head = NULL;
1334 TAILQ_REMOVE(&so->so_incomp, sp, so_list);
1335 so->so_incqlen--;
1336 so->so_qlen--;
1337
1338 (void) soabort(sp);
1339 } else {
1340 panic("%s sp %p in so_incomp but !SS_INCOMP",
1341 __func__, sp);
1342 }
1343
1344 if (persocklock != 0) {
1345 socket_unlock(sp, 1);
1346 }
1347 }
1348
1349 TAILQ_FOREACH_SAFE(sp, &so->so_comp, so_list, sonext) {
1350 /* Dequeue from so_comp since sofree() won't do it */
1351 if (persocklock != 0) {
1352 socket_lock(sp, 1);
1353 }
1354
1355 if (sp->so_state & SS_COMP) {
1356 sp->so_state &= ~SS_COMP;
1357 sp->so_head = NULL;
1358 TAILQ_REMOVE(&so->so_comp, sp, so_list);
1359 so->so_qlen--;
1360
1361 (void) soabort(sp);
1362 } else {
1363 panic("%s sp %p in so_comp but !SS_COMP",
1364 __func__, sp);
1365 }
1366
1367 if (persocklock) {
1368 socket_unlock(sp, 1);
1369 }
1370 }
1371
1372 if (incomp_overflow_only == 0 && !TAILQ_EMPTY(&so->so_incomp)) {
1373 #if (DEBUG | DEVELOPMENT)
1374 panic("%s head %p so_comp not empty\n", __func__, so);
1375 #endif /* (DEVELOPMENT || DEBUG) */
1376
1377 goto again;
1378 }
1379
1380 if (!TAILQ_EMPTY(&so->so_comp)) {
1381 #if (DEBUG | DEVELOPMENT)
1382 panic("%s head %p so_comp not empty\n", __func__, so);
1383 #endif /* (DEVELOPMENT || DEBUG) */
1384
1385 goto again;
1386 }
1387
1388 if (persocklock) {
1389 socket_lock(so, 0);
1390 so_release_accept_list(so);
1391 }
1392 }
1393 if (so->so_pcb == NULL) {
1394 /* 3915887: mark the socket as ready for dealloc */
1395 so->so_flags |= SOF_PCBCLEARING;
1396 goto discard;
1397 }
1398 if (so->so_state & SS_ISCONNECTED) {
1399 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
1400 error = sodisconnectlocked(so);
1401 if (error) {
1402 goto drop;
1403 }
1404 }
1405 if (so->so_options & SO_LINGER) {
1406 lck_mtx_t *mutex_held;
1407
1408 if ((so->so_state & SS_ISDISCONNECTING) &&
1409 (so->so_state & SS_NBIO)) {
1410 goto drop;
1411 }
1412 if (so->so_proto->pr_getlock != NULL) {
1413 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1414 } else {
1415 mutex_held = so->so_proto->pr_domain->dom_mtx;
1416 }
1417 while (so->so_state & SS_ISCONNECTED) {
1418 ts.tv_sec = (so->so_linger / 100);
1419 ts.tv_nsec = (so->so_linger % 100) *
1420 NSEC_PER_USEC * 1000 * 10;
1421 error = msleep((caddr_t)&so->so_timeo,
1422 mutex_held, PSOCK | PCATCH, "soclose", &ts);
1423 if (error) {
1424 /*
1425 * It's OK when the time fires,
1426 * don't report an error
1427 */
1428 if (error == EWOULDBLOCK) {
1429 error = 0;
1430 }
1431 break;
1432 }
1433 }
1434 }
1435 }
1436 drop:
1437 if (so->so_usecount == 0) {
1438 panic("soclose: usecount is zero so=%p\n", so);
1439 /* NOTREACHED */
1440 }
1441 if (so->so_pcb != NULL && !(so->so_flags & SOF_PCBCLEARING)) {
1442 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
1443 if (error == 0) {
1444 error = error2;
1445 }
1446 }
1447 if (so->so_usecount <= 0) {
1448 panic("soclose: usecount is zero so=%p\n", so);
1449 /* NOTREACHED */
1450 }
1451 discard:
1452 if (so->so_pcb != NULL && !(so->so_flags & SOF_MP_SUBFLOW) &&
1453 (so->so_state & SS_NOFDREF)) {
1454 panic("soclose: NOFDREF");
1455 /* NOTREACHED */
1456 }
1457 so->so_state |= SS_NOFDREF;
1458
1459 if ((so->so_flags & SOF_KNOTE) != 0) {
1460 KNOTE(&so->so_klist, SO_FILT_HINT_LOCKED);
1461 }
1462
1463 atomic_add_32(&so->so_proto->pr_domain->dom_refs, -1);
1464
1465 VERIFY(so->so_usecount > 0);
1466 so->so_usecount--;
1467 sofree(so);
1468 return error;
1469 }
1470
1471 int
1472 soclose(struct socket *so)
1473 {
1474 int error = 0;
1475 socket_lock(so, 1);
1476
1477 if (so->so_retaincnt == 0) {
1478 error = soclose_locked(so);
1479 } else {
1480 /*
1481 * if the FD is going away, but socket is
1482 * retained in kernel remove its reference
1483 */
1484 so->so_usecount--;
1485 if (so->so_usecount < 2) {
1486 panic("soclose: retaincnt non null and so=%p "
1487 "usecount=%d\n", so, so->so_usecount);
1488 }
1489 }
1490 socket_unlock(so, 1);
1491 return error;
1492 }
1493
1494 /*
1495 * Must be called at splnet...
1496 */
1497 /* Should already be locked */
1498 int
1499 soabort(struct socket *so)
1500 {
1501 int error;
1502
1503 #ifdef MORE_LOCKING_DEBUG
1504 lck_mtx_t *mutex_held;
1505
1506 if (so->so_proto->pr_getlock != NULL) {
1507 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
1508 } else {
1509 mutex_held = so->so_proto->pr_domain->dom_mtx;
1510 }
1511 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1512 #endif
1513
1514 if ((so->so_flags & SOF_ABORTED) == 0) {
1515 so->so_flags |= SOF_ABORTED;
1516 error = (*so->so_proto->pr_usrreqs->pru_abort)(so);
1517 if (error) {
1518 sofree(so);
1519 return error;
1520 }
1521 }
1522 return 0;
1523 }
1524
1525 int
1526 soacceptlock(struct socket *so, struct sockaddr **nam, int dolock)
1527 {
1528 int error;
1529
1530 if (dolock) {
1531 socket_lock(so, 1);
1532 }
1533
1534 so_update_last_owner_locked(so, PROC_NULL);
1535 so_update_policy(so);
1536 #if NECP
1537 so_update_necp_policy(so, NULL, NULL);
1538 #endif /* NECP */
1539
1540 if ((so->so_state & SS_NOFDREF) == 0) {
1541 panic("soaccept: !NOFDREF");
1542 }
1543 so->so_state &= ~SS_NOFDREF;
1544 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
1545
1546 if (dolock) {
1547 socket_unlock(so, 1);
1548 }
1549 return error;
1550 }
1551
1552 int
1553 soaccept(struct socket *so, struct sockaddr **nam)
1554 {
1555 return soacceptlock(so, nam, 1);
1556 }
1557
1558 int
1559 soacceptfilter(struct socket *so, struct socket *head)
1560 {
1561 struct sockaddr *local = NULL, *remote = NULL;
1562 int error = 0;
1563
1564 /*
1565 * Hold the lock even if this socket has not been made visible
1566 * to the filter(s). For sockets with global locks, this protects
1567 * against the head or peer going away
1568 */
1569 socket_lock(so, 1);
1570 if (sogetaddr_locked(so, &remote, 1) != 0 ||
1571 sogetaddr_locked(so, &local, 0) != 0) {
1572 so->so_state &= ~SS_NOFDREF;
1573 socket_unlock(so, 1);
1574 soclose(so);
1575 /* Out of resources; try it again next time */
1576 error = ECONNABORTED;
1577 goto done;
1578 }
1579
1580 error = sflt_accept(head, so, local, remote);
1581
1582 /*
1583 * If we get EJUSTRETURN from one of the filters, mark this socket
1584 * as inactive and return it anyway. This newly accepted socket
1585 * will be disconnected later before we hand it off to the caller.
1586 */
1587 if (error == EJUSTRETURN) {
1588 error = 0;
1589 (void) sosetdefunct(current_proc(), so,
1590 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL, FALSE);
1591 }
1592
1593 if (error != 0) {
1594 /*
1595 * This may seem like a duplication to the above error
1596 * handling part when we return ECONNABORTED, except
1597 * the following is done while holding the lock since
1598 * the socket has been exposed to the filter(s) earlier.
1599 */
1600 so->so_state &= ~SS_NOFDREF;
1601 socket_unlock(so, 1);
1602 soclose(so);
1603 /* Propagate socket filter's error code to the caller */
1604 } else {
1605 socket_unlock(so, 1);
1606 }
1607 done:
1608 /* Callee checks for NULL pointer */
1609 sock_freeaddr(remote);
1610 sock_freeaddr(local);
1611 return error;
1612 }
1613
1614 /*
1615 * Returns: 0 Success
1616 * EOPNOTSUPP Operation not supported on socket
1617 * EISCONN Socket is connected
1618 * <pru_connect>:EADDRNOTAVAIL Address not available.
1619 * <pru_connect>:EINVAL Invalid argument
1620 * <pru_connect>:EAFNOSUPPORT Address family not supported [notdef]
1621 * <pru_connect>:EACCES Permission denied
1622 * <pru_connect>:EADDRINUSE Address in use
1623 * <pru_connect>:EAGAIN Resource unavailable, try again
1624 * <pru_connect>:EPERM Operation not permitted
1625 * <sf_connect_out>:??? [anything a filter writer might set]
1626 */
1627 int
1628 soconnectlock(struct socket *so, struct sockaddr *nam, int dolock)
1629 {
1630 int error;
1631 struct proc *p = current_proc();
1632
1633 if (dolock) {
1634 socket_lock(so, 1);
1635 }
1636
1637 so_update_last_owner_locked(so, p);
1638 so_update_policy(so);
1639
1640 #if NECP
1641 so_update_necp_policy(so, NULL, nam);
1642 #endif /* NECP */
1643
1644 /*
1645 * If this is a listening socket or if this is a previously-accepted
1646 * socket that has been marked as inactive, reject the connect request.
1647 */
1648 if ((so->so_options & SO_ACCEPTCONN) || (so->so_flags & SOF_DEFUNCT)) {
1649 error = EOPNOTSUPP;
1650 if (so->so_flags & SOF_DEFUNCT) {
1651 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] "
1652 "(%d)\n", __func__, proc_pid(p),
1653 proc_best_name(p),
1654 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
1655 SOCK_DOM(so), SOCK_TYPE(so), error);
1656 }
1657 if (dolock) {
1658 socket_unlock(so, 1);
1659 }
1660 return error;
1661 }
1662
1663 if ((so->so_restrictions & SO_RESTRICT_DENY_OUT) != 0) {
1664 if (dolock) {
1665 socket_unlock(so, 1);
1666 }
1667 return EPERM;
1668 }
1669
1670 /*
1671 * If protocol is connection-based, can only connect once.
1672 * Otherwise, if connected, try to disconnect first.
1673 * This allows user to disconnect by connecting to, e.g.,
1674 * a null address.
1675 */
1676 if (so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING) &&
1677 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
1678 (error = sodisconnectlocked(so)))) {
1679 error = EISCONN;
1680 } else {
1681 /*
1682 * Run connect filter before calling protocol:
1683 * - non-blocking connect returns before completion;
1684 */
1685 error = sflt_connectout(so, nam);
1686 if (error != 0) {
1687 if (error == EJUSTRETURN) {
1688 error = 0;
1689 }
1690 } else {
1691 error = (*so->so_proto->pr_usrreqs->pru_connect)
1692 (so, nam, p);
1693 if (error != 0) {
1694 so->so_state &= ~SS_ISCONNECTING;
1695 }
1696 }
1697 }
1698 if (dolock) {
1699 socket_unlock(so, 1);
1700 }
1701 return error;
1702 }
1703
1704 int
1705 soconnect(struct socket *so, struct sockaddr *nam)
1706 {
1707 return soconnectlock(so, nam, 1);
1708 }
1709
1710 /*
1711 * Returns: 0 Success
1712 * <pru_connect2>:EINVAL[AF_UNIX]
1713 * <pru_connect2>:EPROTOTYPE[AF_UNIX]
1714 * <pru_connect2>:??? [other protocol families]
1715 *
1716 * Notes: <pru_connect2> is not supported by [TCP].
1717 */
1718 int
1719 soconnect2(struct socket *so1, struct socket *so2)
1720 {
1721 int error;
1722
1723 socket_lock(so1, 1);
1724 if (so2->so_proto->pr_lock) {
1725 socket_lock(so2, 1);
1726 }
1727
1728 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
1729
1730 socket_unlock(so1, 1);
1731 if (so2->so_proto->pr_lock) {
1732 socket_unlock(so2, 1);
1733 }
1734 return error;
1735 }
1736
1737 int
1738 soconnectxlocked(struct socket *so, struct sockaddr *src,
1739 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
1740 sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
1741 uint32_t arglen, uio_t auio, user_ssize_t *bytes_written)
1742 {
1743 int error;
1744
1745 so_update_last_owner_locked(so, p);
1746 so_update_policy(so);
1747
1748 /*
1749 * If this is a listening socket or if this is a previously-accepted
1750 * socket that has been marked as inactive, reject the connect request.
1751 */
1752 if ((so->so_options & SO_ACCEPTCONN) || (so->so_flags & SOF_DEFUNCT)) {
1753 error = EOPNOTSUPP;
1754 if (so->so_flags & SOF_DEFUNCT) {
1755 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] "
1756 "(%d)\n", __func__, proc_pid(p),
1757 proc_best_name(p),
1758 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
1759 SOCK_DOM(so), SOCK_TYPE(so), error);
1760 }
1761 return error;
1762 }
1763
1764 if ((so->so_restrictions & SO_RESTRICT_DENY_OUT) != 0) {
1765 return EPERM;
1766 }
1767
1768 /*
1769 * If protocol is connection-based, can only connect once
1770 * unless PR_MULTICONN is set. Otherwise, if connected,
1771 * try to disconnect first. This allows user to disconnect
1772 * by connecting to, e.g., a null address.
1773 */
1774 if ((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING)) &&
1775 !(so->so_proto->pr_flags & PR_MULTICONN) &&
1776 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
1777 (error = sodisconnectlocked(so)) != 0)) {
1778 error = EISCONN;
1779 } else {
1780 if ((so->so_proto->pr_flags & PR_DATA_IDEMPOTENT) &&
1781 (flags & CONNECT_DATA_IDEMPOTENT)) {
1782 so->so_flags1 |= SOF1_DATA_IDEMPOTENT;
1783
1784 if (flags & CONNECT_DATA_AUTHENTICATED) {
1785 so->so_flags1 |= SOF1_DATA_AUTHENTICATED;
1786 }
1787 }
1788
1789 /*
1790 * Case 1: CONNECT_RESUME_ON_READ_WRITE set, no data.
1791 * Case 2: CONNECT_RESUME_ON_READ_WRITE set, with data (user error)
1792 * Case 3: CONNECT_RESUME_ON_READ_WRITE not set, with data
1793 * Case 3 allows user to combine write with connect even if they have
1794 * no use for TFO (such as regular TCP, and UDP).
1795 * Case 4: CONNECT_RESUME_ON_READ_WRITE not set, no data (regular case)
1796 */
1797 if ((so->so_proto->pr_flags & PR_PRECONN_WRITE) &&
1798 ((flags & CONNECT_RESUME_ON_READ_WRITE) || auio)) {
1799 so->so_flags1 |= SOF1_PRECONNECT_DATA;
1800 }
1801
1802 /*
1803 * If a user sets data idempotent and does not pass an uio, or
1804 * sets CONNECT_RESUME_ON_READ_WRITE, this is an error, reset
1805 * SOF1_DATA_IDEMPOTENT.
1806 */
1807 if (!(so->so_flags1 & SOF1_PRECONNECT_DATA) &&
1808 (so->so_flags1 & SOF1_DATA_IDEMPOTENT)) {
1809 /* We should return EINVAL instead perhaps. */
1810 so->so_flags1 &= ~SOF1_DATA_IDEMPOTENT;
1811 }
1812
1813 /*
1814 * Run connect filter before calling protocol:
1815 * - non-blocking connect returns before completion;
1816 */
1817 error = sflt_connectout(so, dst);
1818 if (error != 0) {
1819 /* Disable PRECONNECT_DATA, as we don't need to send a SYN anymore. */
1820 so->so_flags1 &= ~SOF1_PRECONNECT_DATA;
1821 if (error == EJUSTRETURN) {
1822 error = 0;
1823 }
1824 } else {
1825 error = (*so->so_proto->pr_usrreqs->pru_connectx)
1826 (so, src, dst, p, ifscope, aid, pcid,
1827 flags, arg, arglen, auio, bytes_written);
1828 if (error != 0) {
1829 so->so_state &= ~SS_ISCONNECTING;
1830 if (error != EINPROGRESS) {
1831 so->so_flags1 &= ~SOF1_PRECONNECT_DATA;
1832 }
1833 }
1834 }
1835 }
1836
1837 return error;
1838 }
1839
1840 int
1841 sodisconnectlocked(struct socket *so)
1842 {
1843 int error;
1844
1845 if ((so->so_state & SS_ISCONNECTED) == 0) {
1846 error = ENOTCONN;
1847 goto bad;
1848 }
1849 if (so->so_state & SS_ISDISCONNECTING) {
1850 error = EALREADY;
1851 goto bad;
1852 }
1853
1854 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
1855 if (error == 0) {
1856 sflt_notify(so, sock_evt_disconnected, NULL);
1857 }
1858
1859 bad:
1860 return error;
1861 }
1862
1863 /* Locking version */
1864 int
1865 sodisconnect(struct socket *so)
1866 {
1867 int error;
1868
1869 socket_lock(so, 1);
1870 error = sodisconnectlocked(so);
1871 socket_unlock(so, 1);
1872 return error;
1873 }
1874
1875 int
1876 sodisconnectxlocked(struct socket *so, sae_associd_t aid, sae_connid_t cid)
1877 {
1878 int error;
1879
1880 /*
1881 * Call the protocol disconnectx handler; let it handle all
1882 * matters related to the connection state of this session.
1883 */
1884 error = (*so->so_proto->pr_usrreqs->pru_disconnectx)(so, aid, cid);
1885 if (error == 0) {
1886 /*
1887 * The event applies only for the session, not for
1888 * the disconnection of individual subflows.
1889 */
1890 if (so->so_state & (SS_ISDISCONNECTING | SS_ISDISCONNECTED)) {
1891 sflt_notify(so, sock_evt_disconnected, NULL);
1892 }
1893 }
1894 return error;
1895 }
1896
1897 int
1898 sodisconnectx(struct socket *so, sae_associd_t aid, sae_connid_t cid)
1899 {
1900 int error;
1901
1902 socket_lock(so, 1);
1903 error = sodisconnectxlocked(so, aid, cid);
1904 socket_unlock(so, 1);
1905 return error;
1906 }
1907
1908 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT)
1909
1910 /*
1911 * sosendcheck will lock the socket buffer if it isn't locked and
1912 * verify that there is space for the data being inserted.
1913 *
1914 * Returns: 0 Success
1915 * EPIPE
1916 * sblock:EWOULDBLOCK
1917 * sblock:EINTR
1918 * sbwait:EBADF
1919 * sbwait:EINTR
1920 * [so_error]:???
1921 */
1922 int
1923 sosendcheck(struct socket *so, struct sockaddr *addr, user_ssize_t resid,
1924 int32_t clen, int32_t atomic, int flags, int *sblocked)
1925 {
1926 int error = 0;
1927 int32_t space;
1928 int assumelock = 0;
1929
1930 restart:
1931 if (*sblocked == 0) {
1932 if ((so->so_snd.sb_flags & SB_LOCK) != 0 &&
1933 so->so_send_filt_thread != 0 &&
1934 so->so_send_filt_thread == current_thread()) {
1935 /*
1936 * We're being called recursively from a filter,
1937 * allow this to continue. Radar 4150520.
1938 * Don't set sblocked because we don't want
1939 * to perform an unlock later.
1940 */
1941 assumelock = 1;
1942 } else {
1943 error = sblock(&so->so_snd, SBLOCKWAIT(flags));
1944 if (error) {
1945 if (so->so_flags & SOF_DEFUNCT) {
1946 goto defunct;
1947 }
1948 return error;
1949 }
1950 *sblocked = 1;
1951 }
1952 }
1953
1954 /*
1955 * If a send attempt is made on a socket that has been marked
1956 * as inactive (disconnected), reject the request.
1957 */
1958 if (so->so_flags & SOF_DEFUNCT) {
1959 defunct:
1960 error = EPIPE;
1961 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] (%d)\n",
1962 __func__, proc_selfpid(), proc_best_name(current_proc()),
1963 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
1964 SOCK_DOM(so), SOCK_TYPE(so), error);
1965 return error;
1966 }
1967
1968 if (so->so_state & SS_CANTSENDMORE) {
1969 #if CONTENT_FILTER
1970 /*
1971 * Can re-inject data of half closed connections
1972 */
1973 if ((so->so_state & SS_ISDISCONNECTED) == 0 &&
1974 so->so_snd.sb_cfil_thread == current_thread() &&
1975 cfil_sock_data_pending(&so->so_snd) != 0) {
1976 CFIL_LOG(LOG_INFO,
1977 "so %llx ignore SS_CANTSENDMORE",
1978 (uint64_t)DEBUG_KERNEL_ADDRPERM(so));
1979 } else
1980 #endif /* CONTENT_FILTER */
1981 return EPIPE;
1982 }
1983 if (so->so_error) {
1984 error = so->so_error;
1985 so->so_error = 0;
1986 return error;
1987 }
1988
1989 if ((so->so_state & SS_ISCONNECTED) == 0) {
1990 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) != 0) {
1991 if (((so->so_state & SS_ISCONFIRMING) == 0) &&
1992 (resid != 0 || clen == 0) &&
1993 !(so->so_flags1 & SOF1_PRECONNECT_DATA)) {
1994 return ENOTCONN;
1995 }
1996 } else if (addr == 0) {
1997 return (so->so_proto->pr_flags & PR_CONNREQUIRED) ?
1998 ENOTCONN : EDESTADDRREQ;
1999 }
2000 }
2001
2002 space = sbspace(&so->so_snd);
2003
2004 if (flags & MSG_OOB) {
2005 space += 1024;
2006 }
2007 if ((atomic && resid > so->so_snd.sb_hiwat) ||
2008 clen > so->so_snd.sb_hiwat) {
2009 return EMSGSIZE;
2010 }
2011
2012 if ((space < resid + clen &&
2013 (atomic || (space < (int32_t)so->so_snd.sb_lowat) ||
2014 space < clen)) ||
2015 (so->so_type == SOCK_STREAM && so_wait_for_if_feedback(so))) {
2016 /*
2017 * don't block the connectx call when there's more data
2018 * than can be copied.
2019 */
2020 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
2021 if (space == 0) {
2022 return EWOULDBLOCK;
2023 }
2024 if (space < (int32_t)so->so_snd.sb_lowat) {
2025 return 0;
2026 }
2027 }
2028 if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO) ||
2029 assumelock) {
2030 return EWOULDBLOCK;
2031 }
2032 sbunlock(&so->so_snd, TRUE); /* keep socket locked */
2033 *sblocked = 0;
2034 error = sbwait(&so->so_snd);
2035 if (error) {
2036 if (so->so_flags & SOF_DEFUNCT) {
2037 goto defunct;
2038 }
2039 return error;
2040 }
2041 goto restart;
2042 }
2043 return 0;
2044 }
2045
2046 /*
2047 * Send on a socket.
2048 * If send must go all at once and message is larger than
2049 * send buffering, then hard error.
2050 * Lock against other senders.
2051 * If must go all at once and not enough room now, then
2052 * inform user that this would block and do nothing.
2053 * Otherwise, if nonblocking, send as much as possible.
2054 * The data to be sent is described by "uio" if nonzero,
2055 * otherwise by the mbuf chain "top" (which must be null
2056 * if uio is not). Data provided in mbuf chain must be small
2057 * enough to send all at once.
2058 *
2059 * Returns nonzero on error, timeout or signal; callers
2060 * must check for short counts if EINTR/ERESTART are returned.
2061 * Data and control buffers are freed on return.
2062 *
2063 * Returns: 0 Success
2064 * EOPNOTSUPP
2065 * EINVAL
2066 * ENOBUFS
2067 * uiomove:EFAULT
2068 * sosendcheck:EPIPE
2069 * sosendcheck:EWOULDBLOCK
2070 * sosendcheck:EINTR
2071 * sosendcheck:EBADF
2072 * sosendcheck:EINTR
2073 * sosendcheck:??? [value from so_error]
2074 * <pru_send>:ECONNRESET[TCP]
2075 * <pru_send>:EINVAL[TCP]
2076 * <pru_send>:ENOBUFS[TCP]
2077 * <pru_send>:EADDRINUSE[TCP]
2078 * <pru_send>:EADDRNOTAVAIL[TCP]
2079 * <pru_send>:EAFNOSUPPORT[TCP]
2080 * <pru_send>:EACCES[TCP]
2081 * <pru_send>:EAGAIN[TCP]
2082 * <pru_send>:EPERM[TCP]
2083 * <pru_send>:EMSGSIZE[TCP]
2084 * <pru_send>:EHOSTUNREACH[TCP]
2085 * <pru_send>:ENETUNREACH[TCP]
2086 * <pru_send>:ENETDOWN[TCP]
2087 * <pru_send>:ENOMEM[TCP]
2088 * <pru_send>:ENOBUFS[TCP]
2089 * <pru_send>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
2090 * <pru_send>:EINVAL[AF_UNIX]
2091 * <pru_send>:EOPNOTSUPP[AF_UNIX]
2092 * <pru_send>:EPIPE[AF_UNIX]
2093 * <pru_send>:ENOTCONN[AF_UNIX]
2094 * <pru_send>:EISCONN[AF_UNIX]
2095 * <pru_send>:???[AF_UNIX] [whatever a filter author chooses]
2096 * <sf_data_out>:??? [whatever a filter author chooses]
2097 *
2098 * Notes: Other <pru_send> returns depend on the protocol family; all
2099 * <sf_data_out> returns depend on what the filter author causes
2100 * their filter to return.
2101 */
2102 int
2103 sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
2104 struct mbuf *top, struct mbuf *control, int flags)
2105 {
2106 struct mbuf **mp;
2107 struct mbuf *m, *freelist = NULL;
2108 user_ssize_t space, len, resid, orig_resid;
2109 int clen = 0, error, dontroute, mlen, sendflags;
2110 int atomic = sosendallatonce(so) || top;
2111 int sblocked = 0;
2112 struct proc *p = current_proc();
2113 uint16_t headroom = 0;
2114 boolean_t en_tracing = FALSE;
2115
2116 if (uio != NULL) {
2117 resid = uio_resid(uio);
2118 } else {
2119 resid = top->m_pkthdr.len;
2120 }
2121
2122 KERNEL_DEBUG((DBG_FNC_SOSEND | DBG_FUNC_START), so, resid,
2123 so->so_snd.sb_cc, so->so_snd.sb_lowat, so->so_snd.sb_hiwat);
2124
2125 socket_lock(so, 1);
2126
2127 /*
2128 * trace if tracing & network (vs. unix) sockets & and
2129 * non-loopback
2130 */
2131 if (ENTR_SHOULDTRACE &&
2132 (SOCK_CHECK_DOM(so, AF_INET) || SOCK_CHECK_DOM(so, AF_INET6))) {
2133 struct inpcb *inp = sotoinpcb(so);
2134 if (inp->inp_last_outifp != NULL &&
2135 !(inp->inp_last_outifp->if_flags & IFF_LOOPBACK)) {
2136 en_tracing = TRUE;
2137 KERNEL_ENERGYTRACE(kEnTrActKernSockWrite, DBG_FUNC_START,
2138 VM_KERNEL_ADDRPERM(so),
2139 ((so->so_state & SS_NBIO) ? kEnTrFlagNonBlocking : 0),
2140 (int64_t)resid);
2141 orig_resid = resid;
2142 }
2143 }
2144
2145 /*
2146 * Re-injection should not affect process accounting
2147 */
2148 if ((flags & MSG_SKIPCFIL) == 0) {
2149 so_update_last_owner_locked(so, p);
2150 so_update_policy(so);
2151
2152 #if NECP
2153 so_update_necp_policy(so, NULL, addr);
2154 #endif /* NECP */
2155 }
2156
2157 if (so->so_type != SOCK_STREAM && (flags & MSG_OOB) != 0) {
2158 error = EOPNOTSUPP;
2159 goto out_locked;
2160 }
2161
2162 /*
2163 * In theory resid should be unsigned.
2164 * However, space must be signed, as it might be less than 0
2165 * if we over-committed, and we must use a signed comparison
2166 * of space and resid. On the other hand, a negative resid
2167 * causes us to loop sending 0-length segments to the protocol.
2168 *
2169 * Usually, MSG_EOR isn't used on SOCK_STREAM type sockets.
2170 *
2171 * Note: We limit resid to be a positive int value as we use
2172 * imin() to set bytes_to_copy -- radr://14558484
2173 */
2174 if (resid < 0 || resid > INT_MAX ||
2175 (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
2176 error = EINVAL;
2177 goto out_locked;
2178 }
2179
2180 dontroute = (flags & MSG_DONTROUTE) &&
2181 (so->so_options & SO_DONTROUTE) == 0 &&
2182 (so->so_proto->pr_flags & PR_ATOMIC);
2183 OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgsnd);
2184
2185 if (control != NULL) {
2186 clen = control->m_len;
2187 }
2188
2189 if (soreserveheadroom != 0) {
2190 headroom = so->so_pktheadroom;
2191 }
2192
2193 do {
2194 error = sosendcheck(so, addr, resid, clen, atomic, flags,
2195 &sblocked);
2196 if (error) {
2197 goto out_locked;
2198 }
2199
2200 mp = &top;
2201 space = sbspace(&so->so_snd) - clen;
2202 space += ((flags & MSG_OOB) ? 1024 : 0);
2203
2204 do {
2205 if (uio == NULL) {
2206 /*
2207 * Data is prepackaged in "top".
2208 */
2209 resid = 0;
2210 if (flags & MSG_EOR) {
2211 top->m_flags |= M_EOR;
2212 }
2213 } else {
2214 int chainlength;
2215 int bytes_to_copy;
2216 boolean_t jumbocl;
2217 boolean_t bigcl;
2218 int bytes_to_alloc;
2219
2220 bytes_to_copy = imin(resid, space);
2221
2222 bytes_to_alloc = bytes_to_copy;
2223 if (top == NULL) {
2224 bytes_to_alloc += headroom;
2225 }
2226
2227 if (sosendminchain > 0) {
2228 chainlength = 0;
2229 } else {
2230 chainlength = sosendmaxchain;
2231 }
2232
2233 /*
2234 * Use big 4 KB cluster when the outgoing interface
2235 * does not prefer 2 KB clusters
2236 */
2237 bigcl = !(so->so_flags1 & SOF1_IF_2KCL) ||
2238 sosendbigcl_ignore_capab;
2239
2240 /*
2241 * Attempt to use larger than system page-size
2242 * clusters for large writes only if there is
2243 * a jumbo cluster pool and if the socket is
2244 * marked accordingly.
2245 */
2246 jumbocl = sosendjcl && njcl > 0 &&
2247 ((so->so_flags & SOF_MULTIPAGES) ||
2248 sosendjcl_ignore_capab) &&
2249 bigcl;
2250
2251 socket_unlock(so, 0);
2252
2253 do {
2254 int num_needed;
2255 int hdrs_needed = (top == NULL) ? 1 : 0;
2256
2257 /*
2258 * try to maintain a local cache of mbuf
2259 * clusters needed to complete this
2260 * write the list is further limited to
2261 * the number that are currently needed
2262 * to fill the socket this mechanism
2263 * allows a large number of mbufs/
2264 * clusters to be grabbed under a single
2265 * mbuf lock... if we can't get any
2266 * clusters, than fall back to trying
2267 * for mbufs if we fail early (or
2268 * miscalcluate the number needed) make
2269 * sure to release any clusters we
2270 * haven't yet consumed.
2271 */
2272 if (freelist == NULL &&
2273 bytes_to_alloc > MBIGCLBYTES &&
2274 jumbocl) {
2275 num_needed =
2276 bytes_to_alloc / M16KCLBYTES;
2277
2278 if ((bytes_to_alloc -
2279 (num_needed * M16KCLBYTES))
2280 >= MINCLSIZE) {
2281 num_needed++;
2282 }
2283
2284 freelist =
2285 m_getpackets_internal(
2286 (unsigned int *)&num_needed,
2287 hdrs_needed, M_WAIT, 0,
2288 M16KCLBYTES);
2289 /*
2290 * Fall back to 4K cluster size
2291 * if allocation failed
2292 */
2293 }
2294
2295 if (freelist == NULL &&
2296 bytes_to_alloc > MCLBYTES &&
2297 bigcl) {
2298 num_needed =
2299 bytes_to_alloc / MBIGCLBYTES;
2300
2301 if ((bytes_to_alloc -
2302 (num_needed * MBIGCLBYTES)) >=
2303 MINCLSIZE) {
2304 num_needed++;
2305 }
2306
2307 freelist =
2308 m_getpackets_internal(
2309 (unsigned int *)&num_needed,
2310 hdrs_needed, M_WAIT, 0,
2311 MBIGCLBYTES);
2312 /*
2313 * Fall back to cluster size
2314 * if allocation failed
2315 */
2316 }
2317
2318 /*
2319 * Allocate a cluster as we want to
2320 * avoid to split the data in more
2321 * that one segment and using MINCLSIZE
2322 * would lead us to allocate two mbufs
2323 */
2324 if (soreserveheadroom != 0 &&
2325 freelist == NULL &&
2326 ((top == NULL &&
2327 bytes_to_alloc > _MHLEN) ||
2328 bytes_to_alloc > _MLEN)) {
2329 num_needed = ROUNDUP(bytes_to_alloc, MCLBYTES) /
2330 MCLBYTES;
2331 freelist =
2332 m_getpackets_internal(
2333 (unsigned int *)&num_needed,
2334 hdrs_needed, M_WAIT, 0,
2335 MCLBYTES);
2336 /*
2337 * Fall back to a single mbuf
2338 * if allocation failed
2339 */
2340 } else if (freelist == NULL &&
2341 bytes_to_alloc > MINCLSIZE) {
2342 num_needed =
2343 bytes_to_alloc / MCLBYTES;
2344
2345 if ((bytes_to_alloc -
2346 (num_needed * MCLBYTES)) >=
2347 MINCLSIZE) {
2348 num_needed++;
2349 }
2350
2351 freelist =
2352 m_getpackets_internal(
2353 (unsigned int *)&num_needed,
2354 hdrs_needed, M_WAIT, 0,
2355 MCLBYTES);
2356 /*
2357 * Fall back to a single mbuf
2358 * if allocation failed
2359 */
2360 }
2361 /*
2362 * For datagram protocols, leave
2363 * headroom for protocol headers
2364 * in the first cluster of the chain
2365 */
2366 if (freelist != NULL && atomic &&
2367 top == NULL && headroom > 0) {
2368 freelist->m_data += headroom;
2369 }
2370
2371 /*
2372 * Fall back to regular mbufs without
2373 * reserving the socket headroom
2374 */
2375 if (freelist == NULL) {
2376 if (SOCK_TYPE(so) != SOCK_STREAM || bytes_to_alloc <= MINCLSIZE) {
2377 if (top == NULL) {
2378 MGETHDR(freelist,
2379 M_WAIT, MT_DATA);
2380 } else {
2381 MGET(freelist,
2382 M_WAIT, MT_DATA);
2383 }
2384 }
2385
2386 if (freelist == NULL) {
2387 error = ENOBUFS;
2388 socket_lock(so, 0);
2389 goto out_locked;
2390 }
2391 /*
2392 * For datagram protocols,
2393 * leave room for protocol
2394 * headers in first mbuf.
2395 */
2396 if (atomic && top == NULL &&
2397 bytes_to_copy < MHLEN) {
2398 MH_ALIGN(freelist,
2399 bytes_to_copy);
2400 }
2401 }
2402 m = freelist;
2403 freelist = m->m_next;
2404 m->m_next = NULL;
2405
2406 if ((m->m_flags & M_EXT)) {
2407 mlen = m->m_ext.ext_size -
2408 M_LEADINGSPACE(m);
2409 } else if ((m->m_flags & M_PKTHDR)) {
2410 mlen =
2411 MHLEN - M_LEADINGSPACE(m);
2412 } else {
2413 mlen = MLEN - M_LEADINGSPACE(m);
2414 }
2415 len = imin(mlen, bytes_to_copy);
2416
2417 chainlength += len;
2418
2419 space -= len;
2420
2421 error = uiomove(mtod(m, caddr_t),
2422 len, uio);
2423
2424 resid = uio_resid(uio);
2425
2426 m->m_len = len;
2427 *mp = m;
2428 top->m_pkthdr.len += len;
2429 if (error) {
2430 break;
2431 }
2432 mp = &m->m_next;
2433 if (resid <= 0) {
2434 if (flags & MSG_EOR) {
2435 top->m_flags |= M_EOR;
2436 }
2437 break;
2438 }
2439 bytes_to_copy = min(resid, space);
2440 } while (space > 0 &&
2441 (chainlength < sosendmaxchain || atomic ||
2442 resid < MINCLSIZE));
2443
2444 socket_lock(so, 0);
2445
2446 if (error) {
2447 goto out_locked;
2448 }
2449 }
2450
2451 if (dontroute) {
2452 so->so_options |= SO_DONTROUTE;
2453 }
2454
2455 /*
2456 * Compute flags here, for pru_send and NKEs
2457 *
2458 * If the user set MSG_EOF, the protocol
2459 * understands this flag and nothing left to
2460 * send then use PRU_SEND_EOF instead of PRU_SEND.
2461 */
2462 sendflags = (flags & MSG_OOB) ? PRUS_OOB :
2463 ((flags & MSG_EOF) &&
2464 (so->so_proto->pr_flags & PR_IMPLOPCL) &&
2465 (resid <= 0)) ? PRUS_EOF :
2466 /* If there is more to send set PRUS_MORETOCOME */
2467 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0;
2468
2469 if ((flags & MSG_SKIPCFIL) == 0) {
2470 /*
2471 * Socket filter processing
2472 */
2473 error = sflt_data_out(so, addr, &top,
2474 &control, (sendflags & MSG_OOB) ?
2475 sock_data_filt_flag_oob : 0);
2476 if (error) {
2477 if (error == EJUSTRETURN) {
2478 error = 0;
2479 goto packet_consumed;
2480 }
2481 goto out_locked;
2482 }
2483 #if CONTENT_FILTER
2484 /*
2485 * Content filter processing
2486 */
2487 error = cfil_sock_data_out(so, addr, top,
2488 control, sendflags);
2489 if (error) {
2490 if (error == EJUSTRETURN) {
2491 error = 0;
2492 goto packet_consumed;
2493 }
2494 goto out_locked;
2495 }
2496 #endif /* CONTENT_FILTER */
2497 }
2498 error = (*so->so_proto->pr_usrreqs->pru_send)
2499 (so, sendflags, top, addr, control, p);
2500
2501 packet_consumed:
2502 if (dontroute) {
2503 so->so_options &= ~SO_DONTROUTE;
2504 }
2505
2506 clen = 0;
2507 control = NULL;
2508 top = NULL;
2509 mp = &top;
2510 if (error) {
2511 goto out_locked;
2512 }
2513 } while (resid && space > 0);
2514 } while (resid);
2515
2516 out_locked:
2517 if (sblocked) {
2518 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
2519 } else {
2520 socket_unlock(so, 1);
2521 }
2522 if (top != NULL) {
2523 m_freem(top);
2524 }
2525 if (control != NULL) {
2526 m_freem(control);
2527 }
2528 if (freelist != NULL) {
2529 m_freem_list(freelist);
2530 }
2531
2532 soclearfastopen(so);
2533
2534 if (en_tracing) {
2535 /* resid passed here is the bytes left in uio */
2536 KERNEL_ENERGYTRACE(kEnTrActKernSockWrite, DBG_FUNC_END,
2537 VM_KERNEL_ADDRPERM(so),
2538 ((error == EWOULDBLOCK) ? kEnTrFlagNoWork : 0),
2539 (int64_t)(orig_resid - resid));
2540 }
2541 KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_END, so, resid,
2542 so->so_snd.sb_cc, space, error);
2543
2544 return error;
2545 }
2546
2547 int
2548 sosend_reinject(struct socket *so, struct sockaddr *addr, struct mbuf *top, struct mbuf *control, uint32_t sendflags)
2549 {
2550 struct mbuf *m0 = NULL, *control_end = NULL;
2551
2552 socket_lock_assert_owned(so);
2553
2554 /*
2555 * top must points to mbuf chain to be sent.
2556 * If control is not NULL, top must be packet header
2557 */
2558 VERIFY(top != NULL &&
2559 (control == NULL || top->m_flags & M_PKTHDR));
2560
2561 /*
2562 * If control is not passed in, see if we can get it
2563 * from top.
2564 */
2565 if (control == NULL && (top->m_flags & M_PKTHDR) == 0) {
2566 // Locate start of control if present and start of data
2567 for (m0 = top; m0 != NULL; m0 = m0->m_next) {
2568 if (m0->m_flags & M_PKTHDR) {
2569 top = m0;
2570 break;
2571 } else if (m0->m_type == MT_CONTROL) {
2572 if (control == NULL) {
2573 // Found start of control
2574 control = m0;
2575 }
2576 if (control != NULL && m0->m_next != NULL && m0->m_next->m_type != MT_CONTROL) {
2577 // Found end of control
2578 control_end = m0;
2579 }
2580 }
2581 }
2582 if (control_end != NULL) {
2583 control_end->m_next = NULL;
2584 }
2585 }
2586
2587 int error = (*so->so_proto->pr_usrreqs->pru_send)
2588 (so, sendflags, top, addr, control, current_proc());
2589
2590 return error;
2591 }
2592
2593 /*
2594 * Supported only connected sockets (no address) without ancillary data
2595 * (control mbuf) for atomic protocols
2596 */
2597 int
2598 sosend_list(struct socket *so, struct uio **uioarray, u_int uiocnt, int flags)
2599 {
2600 struct mbuf *m, *freelist = NULL;
2601 user_ssize_t len, resid;
2602 int error, dontroute, mlen;
2603 int atomic = sosendallatonce(so);
2604 int sblocked = 0;
2605 struct proc *p = current_proc();
2606 u_int uiofirst = 0;
2607 u_int uiolast = 0;
2608 struct mbuf *top = NULL;
2609 uint16_t headroom = 0;
2610 boolean_t bigcl;
2611
2612 KERNEL_DEBUG((DBG_FNC_SOSEND_LIST | DBG_FUNC_START), so, uiocnt,
2613 so->so_snd.sb_cc, so->so_snd.sb_lowat, so->so_snd.sb_hiwat);
2614
2615 if (so->so_type != SOCK_DGRAM) {
2616 error = EINVAL;
2617 goto out;
2618 }
2619 if (atomic == 0) {
2620 error = EINVAL;
2621 goto out;
2622 }
2623 if (so->so_proto->pr_usrreqs->pru_send_list == NULL) {
2624 error = EPROTONOSUPPORT;
2625 goto out;
2626 }
2627 if (flags & ~(MSG_DONTWAIT | MSG_NBIO)) {
2628 error = EINVAL;
2629 goto out;
2630 }
2631 resid = uio_array_resid(uioarray, uiocnt);
2632
2633 /*
2634 * In theory resid should be unsigned.
2635 * However, space must be signed, as it might be less than 0
2636 * if we over-committed, and we must use a signed comparison
2637 * of space and resid. On the other hand, a negative resid
2638 * causes us to loop sending 0-length segments to the protocol.
2639 *
2640 * Note: We limit resid to be a positive int value as we use
2641 * imin() to set bytes_to_copy -- radr://14558484
2642 */
2643 if (resid < 0 || resid > INT_MAX) {
2644 error = EINVAL;
2645 goto out;
2646 }
2647
2648 socket_lock(so, 1);
2649 so_update_last_owner_locked(so, p);
2650 so_update_policy(so);
2651
2652 #if NECP
2653 so_update_necp_policy(so, NULL, NULL);
2654 #endif /* NECP */
2655
2656 dontroute = (flags & MSG_DONTROUTE) &&
2657 (so->so_options & SO_DONTROUTE) == 0 &&
2658 (so->so_proto->pr_flags & PR_ATOMIC);
2659 OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgsnd);
2660
2661 error = sosendcheck(so, NULL, resid, 0, atomic, flags, &sblocked);
2662 if (error) {
2663 goto release;
2664 }
2665
2666 /*
2667 * Use big 4 KB clusters when the outgoing interface does not prefer
2668 * 2 KB clusters
2669 */
2670 bigcl = !(so->so_flags1 & SOF1_IF_2KCL) || sosendbigcl_ignore_capab;
2671
2672 if (soreserveheadroom != 0) {
2673 headroom = so->so_pktheadroom;
2674 }
2675
2676 do {
2677 int i;
2678 int num_needed = 0;
2679 int chainlength;
2680 size_t maxpktlen = 0;
2681 int bytes_to_alloc;
2682
2683 if (sosendminchain > 0) {
2684 chainlength = 0;
2685 } else {
2686 chainlength = sosendmaxchain;
2687 }
2688
2689 socket_unlock(so, 0);
2690
2691 /*
2692 * Find a set of uio that fit in a reasonable number
2693 * of mbuf packets
2694 */
2695 for (i = uiofirst; i < uiocnt; i++) {
2696 struct uio *auio = uioarray[i];
2697
2698 len = uio_resid(auio);
2699
2700 /* Do nothing for empty messages */
2701 if (len == 0) {
2702 continue;
2703 }
2704
2705 num_needed += 1;
2706 uiolast += 1;
2707
2708 if (len > maxpktlen) {
2709 maxpktlen = len;
2710 }
2711
2712 chainlength += len;
2713 if (chainlength > sosendmaxchain) {
2714 break;
2715 }
2716 }
2717 /*
2718 * Nothing left to send
2719 */
2720 if (num_needed == 0) {
2721 socket_lock(so, 0);
2722 break;
2723 }
2724 /*
2725 * Allocate buffer large enough to include headroom space for
2726 * network and link header
2727 *
2728 */
2729 bytes_to_alloc = maxpktlen + headroom;
2730
2731 /*
2732 * Allocate a single contiguous buffer of the smallest available
2733 * size when possible
2734 */
2735 if (bytes_to_alloc > MCLBYTES &&
2736 bytes_to_alloc <= MBIGCLBYTES && bigcl) {
2737 freelist = m_getpackets_internal(
2738 (unsigned int *)&num_needed,
2739 num_needed, M_WAIT, 1,
2740 MBIGCLBYTES);
2741 } else if (bytes_to_alloc > _MHLEN &&
2742 bytes_to_alloc <= MCLBYTES) {
2743 freelist = m_getpackets_internal(
2744 (unsigned int *)&num_needed,
2745 num_needed, M_WAIT, 1,
2746 MCLBYTES);
2747 } else {
2748 freelist = m_allocpacket_internal(
2749 (unsigned int *)&num_needed,
2750 bytes_to_alloc, NULL, M_WAIT, 1, 0);
2751 }
2752
2753 if (freelist == NULL) {
2754 socket_lock(so, 0);
2755 error = ENOMEM;
2756 goto release;
2757 }
2758 /*
2759 * Copy each uio of the set into its own mbuf packet
2760 */
2761 for (i = uiofirst, m = freelist;
2762 i < uiolast && m != NULL;
2763 i++) {
2764 int bytes_to_copy;
2765 struct mbuf *n;
2766 struct uio *auio = uioarray[i];
2767
2768 bytes_to_copy = uio_resid(auio);
2769
2770 /* Do nothing for empty messages */
2771 if (bytes_to_copy == 0) {
2772 continue;
2773 }
2774 /*
2775 * Leave headroom for protocol headers
2776 * in the first mbuf of the chain
2777 */
2778 m->m_data += headroom;
2779
2780 for (n = m; n != NULL; n = n->m_next) {
2781 if ((m->m_flags & M_EXT)) {
2782 mlen = m->m_ext.ext_size -
2783 M_LEADINGSPACE(m);
2784 } else if ((m->m_flags & M_PKTHDR)) {
2785 mlen =
2786 MHLEN - M_LEADINGSPACE(m);
2787 } else {
2788 mlen = MLEN - M_LEADINGSPACE(m);
2789 }
2790 len = imin(mlen, bytes_to_copy);
2791
2792 /*
2793 * Note: uiomove() decrements the iovec
2794 * length
2795 */
2796 error = uiomove(mtod(n, caddr_t),
2797 len, auio);
2798 if (error != 0) {
2799 break;
2800 }
2801 n->m_len = len;
2802 m->m_pkthdr.len += len;
2803
2804 VERIFY(m->m_pkthdr.len <= maxpktlen);
2805
2806 bytes_to_copy -= len;
2807 resid -= len;
2808 }
2809 if (m->m_pkthdr.len == 0) {
2810 printf(
2811 "%s:%d so %llx pkt %llx type %u len null\n",
2812 __func__, __LINE__,
2813 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
2814 (uint64_t)DEBUG_KERNEL_ADDRPERM(m),
2815 m->m_type);
2816 }
2817 if (error != 0) {
2818 break;
2819 }
2820 m = m->m_nextpkt;
2821 }
2822
2823 socket_lock(so, 0);
2824
2825 if (error) {
2826 goto release;
2827 }
2828 top = freelist;
2829 freelist = NULL;
2830
2831 if (dontroute) {
2832 so->so_options |= SO_DONTROUTE;
2833 }
2834
2835 if ((flags & MSG_SKIPCFIL) == 0) {
2836 struct mbuf **prevnextp = NULL;
2837
2838 for (i = uiofirst, m = top;
2839 i < uiolast && m != NULL;
2840 i++) {
2841 struct mbuf *nextpkt = m->m_nextpkt;
2842
2843 /*
2844 * Socket filter processing
2845 */
2846 error = sflt_data_out(so, NULL, &m,
2847 NULL, 0);
2848 if (error != 0 && error != EJUSTRETURN) {
2849 goto release;
2850 }
2851
2852 #if CONTENT_FILTER
2853 if (error == 0) {
2854 /*
2855 * Content filter processing
2856 */
2857 error = cfil_sock_data_out(so, NULL, m,
2858 NULL, 0);
2859 if (error != 0 && error != EJUSTRETURN) {
2860 goto release;
2861 }
2862 }
2863 #endif /* CONTENT_FILTER */
2864 /*
2865 * Remove packet from the list when
2866 * swallowed by a filter
2867 */
2868 if (error == EJUSTRETURN) {
2869 error = 0;
2870 if (prevnextp != NULL) {
2871 *prevnextp = nextpkt;
2872 } else {
2873 top = nextpkt;
2874 }
2875 }
2876
2877 m = nextpkt;
2878 if (m != NULL) {
2879 prevnextp = &m->m_nextpkt;
2880 }
2881 }
2882 }
2883 if (top != NULL) {
2884 error = (*so->so_proto->pr_usrreqs->pru_send_list)
2885 (so, 0, top, NULL, NULL, p);
2886 }
2887
2888 if (dontroute) {
2889 so->so_options &= ~SO_DONTROUTE;
2890 }
2891
2892 top = NULL;
2893 uiofirst = uiolast;
2894 } while (resid > 0 && error == 0);
2895 release:
2896 if (sblocked) {
2897 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
2898 } else {
2899 socket_unlock(so, 1);
2900 }
2901 out:
2902 if (top != NULL) {
2903 m_freem(top);
2904 }
2905 if (freelist != NULL) {
2906 m_freem_list(freelist);
2907 }
2908
2909 KERNEL_DEBUG(DBG_FNC_SOSEND_LIST | DBG_FUNC_END, so, resid,
2910 so->so_snd.sb_cc, 0, error);
2911
2912 return error;
2913 }
2914
2915 /*
2916 * May return ERESTART when packet is dropped by MAC policy check
2917 */
2918 static int
2919 soreceive_addr(struct proc *p, struct socket *so, struct sockaddr **psa,
2920 int flags, struct mbuf **mp, struct mbuf **nextrecordp, int canwait)
2921 {
2922 int error = 0;
2923 struct mbuf *m = *mp;
2924 struct mbuf *nextrecord = *nextrecordp;
2925
2926 KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
2927 #if CONFIG_MACF_SOCKET_SUBSET
2928 /*
2929 * Call the MAC framework for policy checking if we're in
2930 * the user process context and the socket isn't connected.
2931 */
2932 if (p != kernproc && !(so->so_state & SS_ISCONNECTED)) {
2933 struct mbuf *m0 = m;
2934 /*
2935 * Dequeue this record (temporarily) from the receive
2936 * list since we're about to drop the socket's lock
2937 * where a new record may arrive and be appended to
2938 * the list. Upon MAC policy failure, the record
2939 * will be freed. Otherwise, we'll add it back to
2940 * the head of the list. We cannot rely on SB_LOCK
2941 * because append operation uses the socket's lock.
2942 */
2943 do {
2944 m->m_nextpkt = NULL;
2945 sbfree(&so->so_rcv, m);
2946 m = m->m_next;
2947 } while (m != NULL);
2948 m = m0;
2949 so->so_rcv.sb_mb = nextrecord;
2950 SB_EMPTY_FIXUP(&so->so_rcv);
2951 SBLASTRECORDCHK(&so->so_rcv, "soreceive 1a");
2952 SBLASTMBUFCHK(&so->so_rcv, "soreceive 1a");
2953 socket_unlock(so, 0);
2954
2955 if (mac_socket_check_received(proc_ucred(p), so,
2956 mtod(m, struct sockaddr *)) != 0) {
2957 /*
2958 * MAC policy failure; free this record and
2959 * process the next record (or block until
2960 * one is available). We have adjusted sb_cc
2961 * and sb_mbcnt above so there is no need to
2962 * call sbfree() again.
2963 */
2964 m_freem(m);
2965 /*
2966 * Clear SB_LOCK but don't unlock the socket.
2967 * Process the next record or wait for one.
2968 */
2969 socket_lock(so, 0);
2970 sbunlock(&so->so_rcv, TRUE); /* stay locked */
2971 error = ERESTART;
2972 goto done;
2973 }
2974 socket_lock(so, 0);
2975 /*
2976 * If the socket has been defunct'd, drop it.
2977 */
2978 if (so->so_flags & SOF_DEFUNCT) {
2979 m_freem(m);
2980 error = ENOTCONN;
2981 goto done;
2982 }
2983 /*
2984 * Re-adjust the socket receive list and re-enqueue
2985 * the record in front of any packets which may have
2986 * been appended while we dropped the lock.
2987 */
2988 for (m = m0; m->m_next != NULL; m = m->m_next) {
2989 sballoc(&so->so_rcv, m);
2990 }
2991 sballoc(&so->so_rcv, m);
2992 if (so->so_rcv.sb_mb == NULL) {
2993 so->so_rcv.sb_lastrecord = m0;
2994 so->so_rcv.sb_mbtail = m;
2995 }
2996 m = m0;
2997 nextrecord = m->m_nextpkt = so->so_rcv.sb_mb;
2998 so->so_rcv.sb_mb = m;
2999 SBLASTRECORDCHK(&so->so_rcv, "soreceive 1b");
3000 SBLASTMBUFCHK(&so->so_rcv, "soreceive 1b");
3001 }
3002 #endif /* CONFIG_MACF_SOCKET_SUBSET */
3003 if (psa != NULL) {
3004 *psa = dup_sockaddr(mtod(m, struct sockaddr *), canwait);
3005 if ((*psa == NULL) && (flags & MSG_NEEDSA)) {
3006 error = EWOULDBLOCK;
3007 goto done;
3008 }
3009 }
3010 if (flags & MSG_PEEK) {
3011 m = m->m_next;
3012 } else {
3013 sbfree(&so->so_rcv, m);
3014 if (m->m_next == NULL && so->so_rcv.sb_cc != 0) {
3015 panic("%s: about to create invalid socketbuf",
3016 __func__);
3017 /* NOTREACHED */
3018 }
3019 MFREE(m, so->so_rcv.sb_mb);
3020 m = so->so_rcv.sb_mb;
3021 if (m != NULL) {
3022 m->m_nextpkt = nextrecord;
3023 } else {
3024 so->so_rcv.sb_mb = nextrecord;
3025 SB_EMPTY_FIXUP(&so->so_rcv);
3026 }
3027 }
3028 done:
3029 *mp = m;
3030 *nextrecordp = nextrecord;
3031
3032 return error;
3033 }
3034
3035 /*
3036 * When peeking SCM_RIGHTS, the actual file descriptors are not yet created
3037 * so clear the data portion in order not to leak the file pointers
3038 */
3039 static void
3040 sopeek_scm_rights(struct mbuf *rights)
3041 {
3042 struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
3043
3044 if (cm->cmsg_type == SCM_RIGHTS) {
3045 memset(cm + 1, 0, cm->cmsg_len - sizeof(*cm));
3046 }
3047 }
3048
3049 /*
3050 * Process one or more MT_CONTROL mbufs present before any data mbufs
3051 * in the first mbuf chain on the socket buffer. If MSG_PEEK, we
3052 * just copy the data; if !MSG_PEEK, we call into the protocol to
3053 * perform externalization.
3054 */
3055 static int
3056 soreceive_ctl(struct socket *so, struct mbuf **controlp, int flags,
3057 struct mbuf **mp, struct mbuf **nextrecordp)
3058 {
3059 int error = 0;
3060 struct mbuf *cm = NULL, *cmn;
3061 struct mbuf **cme = &cm;
3062 struct sockbuf *sb_rcv = &so->so_rcv;
3063 struct mbuf **msgpcm = NULL;
3064 struct mbuf *m = *mp;
3065 struct mbuf *nextrecord = *nextrecordp;
3066 struct protosw *pr = so->so_proto;
3067
3068 /*
3069 * Externalizing the control messages would require us to
3070 * drop the socket's lock below. Once we re-acquire the
3071 * lock, the mbuf chain might change. In order to preserve
3072 * consistency, we unlink all control messages from the
3073 * first mbuf chain in one shot and link them separately
3074 * onto a different chain.
3075 */
3076 do {
3077 if (flags & MSG_PEEK) {
3078 if (controlp != NULL) {
3079 if (*controlp == NULL) {
3080 msgpcm = controlp;
3081 }
3082 *controlp = m_copy(m, 0, m->m_len);
3083
3084 /*
3085 * If we failed to allocate an mbuf,
3086 * release any previously allocated
3087 * mbufs for control data. Return
3088 * an error. Keep the mbufs in the
3089 * socket as this is using
3090 * MSG_PEEK flag.
3091 */
3092 if (*controlp == NULL) {
3093 m_freem(*msgpcm);
3094 error = ENOBUFS;
3095 goto done;
3096 }
3097
3098 sopeek_scm_rights(*controlp);
3099
3100 controlp = &(*controlp)->m_next;
3101 }
3102 m = m->m_next;
3103 } else {
3104 m->m_nextpkt = NULL;
3105 sbfree(sb_rcv, m);
3106 sb_rcv->sb_mb = m->m_next;
3107 m->m_next = NULL;
3108 *cme = m;
3109 cme = &(*cme)->m_next;
3110 m = sb_rcv->sb_mb;
3111 }
3112 } while (m != NULL && m->m_type == MT_CONTROL);
3113
3114 if (!(flags & MSG_PEEK)) {
3115 if (sb_rcv->sb_mb != NULL) {
3116 sb_rcv->sb_mb->m_nextpkt = nextrecord;
3117 } else {
3118 sb_rcv->sb_mb = nextrecord;
3119 SB_EMPTY_FIXUP(sb_rcv);
3120 }
3121 if (nextrecord == NULL) {
3122 sb_rcv->sb_lastrecord = m;
3123 }
3124 }
3125
3126 SBLASTRECORDCHK(&so->so_rcv, "soreceive ctl");
3127 SBLASTMBUFCHK(&so->so_rcv, "soreceive ctl");
3128
3129 while (cm != NULL) {
3130 int cmsg_type;
3131
3132 cmn = cm->m_next;
3133 cm->m_next = NULL;
3134 cmsg_type = mtod(cm, struct cmsghdr *)->cmsg_type;
3135
3136 /*
3137 * Call the protocol to externalize SCM_RIGHTS message
3138 * and return the modified message to the caller upon
3139 * success. Otherwise, all other control messages are
3140 * returned unmodified to the caller. Note that we
3141 * only get into this loop if MSG_PEEK is not set.
3142 */
3143 if (pr->pr_domain->dom_externalize != NULL &&
3144 cmsg_type == SCM_RIGHTS) {
3145 /*
3146 * Release socket lock: see 3903171. This
3147 * would also allow more records to be appended
3148 * to the socket buffer. We still have SB_LOCK
3149 * set on it, so we can be sure that the head
3150 * of the mbuf chain won't change.
3151 */
3152 socket_unlock(so, 0);
3153 error = (*pr->pr_domain->dom_externalize)(cm);
3154 socket_lock(so, 0);
3155 } else {
3156 error = 0;
3157 }
3158
3159 if (controlp != NULL && error == 0) {
3160 *controlp = cm;
3161 controlp = &(*controlp)->m_next;
3162 } else {
3163 (void) m_free(cm);
3164 }
3165 cm = cmn;
3166 }
3167 /*
3168 * Update the value of nextrecord in case we received new
3169 * records when the socket was unlocked above for
3170 * externalizing SCM_RIGHTS.
3171 */
3172 if (m != NULL) {
3173 nextrecord = sb_rcv->sb_mb->m_nextpkt;
3174 } else {
3175 nextrecord = sb_rcv->sb_mb;
3176 }
3177
3178 done:
3179 *mp = m;
3180 *nextrecordp = nextrecord;
3181
3182 return error;
3183 }
3184
3185 /*
3186 * If we have less data than requested, block awaiting more
3187 * (subject to any timeout) if:
3188 * 1. the current count is less than the low water mark, or
3189 * 2. MSG_WAITALL is set, and it is possible to do the entire
3190 * receive operation at once if we block (resid <= hiwat).
3191 * 3. MSG_DONTWAIT is not set
3192 * If MSG_WAITALL is set but resid is larger than the receive buffer,
3193 * we have to do the receive in sections, and thus risk returning
3194 * a short count if a timeout or signal occurs after we start.
3195 */
3196 static boolean_t
3197 so_should_wait(struct socket *so, struct uio *uio, struct mbuf *m, int flags)
3198 {
3199 struct protosw *pr = so->so_proto;
3200
3201 /* No mbufs in the receive-queue? Wait! */
3202 if (m == NULL) {
3203 return true;
3204 }
3205
3206 /* Not enough data in the receive socket-buffer - we may have to wait */
3207 if ((flags & MSG_DONTWAIT) == 0 && so->so_rcv.sb_cc < uio_resid(uio) &&
3208 m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0) {
3209 /*
3210 * Application did set the lowater-mark, so we should wait for
3211 * this data to be present.
3212 */
3213 if (so->so_rcv.sb_cc < so->so_rcv.sb_lowat) {
3214 return true;
3215 }
3216
3217 /*
3218 * Application wants all the data - so let's try to do the
3219 * receive-operation at once by waiting for everything to
3220 * be there.
3221 */
3222 if ((flags & MSG_WAITALL) && uio_resid(uio) <= so->so_rcv.sb_hiwat) {
3223 return true;
3224 }
3225 }
3226
3227 return false;
3228 }
3229
3230 /*
3231 * Implement receive operations on a socket.
3232 * We depend on the way that records are added to the sockbuf
3233 * by sbappend*. In particular, each record (mbufs linked through m_next)
3234 * must begin with an address if the protocol so specifies,
3235 * followed by an optional mbuf or mbufs containing ancillary data,
3236 * and then zero or more mbufs of data.
3237 * In order to avoid blocking network interrupts for the entire time here,
3238 * we splx() while doing the actual copy to user space.
3239 * Although the sockbuf is locked, new data may still be appended,
3240 * and thus we must maintain consistency of the sockbuf during that time.
3241 *
3242 * The caller may receive the data as a single mbuf chain by supplying
3243 * an mbuf **mp0 for use in returning the chain. The uio is then used
3244 * only for the count in uio_resid.
3245 *
3246 * Returns: 0 Success
3247 * ENOBUFS
3248 * ENOTCONN
3249 * EWOULDBLOCK
3250 * uiomove:EFAULT
3251 * sblock:EWOULDBLOCK
3252 * sblock:EINTR
3253 * sbwait:EBADF
3254 * sbwait:EINTR
3255 * sodelayed_copy:EFAULT
3256 * <pru_rcvoob>:EINVAL[TCP]
3257 * <pru_rcvoob>:EWOULDBLOCK[TCP]
3258 * <pru_rcvoob>:???
3259 * <pr_domain->dom_externalize>:EMSGSIZE[AF_UNIX]
3260 * <pr_domain->dom_externalize>:ENOBUFS[AF_UNIX]
3261 * <pr_domain->dom_externalize>:???
3262 *
3263 * Notes: Additional return values from calls through <pru_rcvoob> and
3264 * <pr_domain->dom_externalize> depend on protocols other than
3265 * TCP or AF_UNIX, which are documented above.
3266 */
3267 int
3268 soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
3269 struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
3270 {
3271 struct mbuf *m, **mp, *ml = NULL;
3272 struct mbuf *nextrecord, *free_list;
3273 int flags, error, offset;
3274 user_ssize_t len;
3275 struct protosw *pr = so->so_proto;
3276 int moff, type = 0;
3277 user_ssize_t orig_resid = uio_resid(uio);
3278 user_ssize_t delayed_copy_len;
3279 int can_delay;
3280 struct proc *p = current_proc();
3281 boolean_t en_tracing = FALSE;
3282
3283 /*
3284 * Sanity check on the length passed by caller as we are making 'int'
3285 * comparisons
3286 */
3287 if (orig_resid < 0 || orig_resid > INT_MAX) {
3288 return EINVAL;
3289 }
3290
3291 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_START, so,
3292 uio_resid(uio), so->so_rcv.sb_cc, so->so_rcv.sb_lowat,
3293 so->so_rcv.sb_hiwat);
3294
3295 socket_lock(so, 1);
3296 so_update_last_owner_locked(so, p);
3297 so_update_policy(so);
3298
3299 #ifdef MORE_LOCKING_DEBUG
3300 if (so->so_usecount == 1) {
3301 panic("%s: so=%x no other reference on socket\n", __func__, so);
3302 /* NOTREACHED */
3303 }
3304 #endif
3305 mp = mp0;
3306 if (psa != NULL) {
3307 *psa = NULL;
3308 }
3309 if (controlp != NULL) {
3310 *controlp = NULL;
3311 }
3312 if (flagsp != NULL) {
3313 flags = *flagsp & ~MSG_EOR;
3314 } else {
3315 flags = 0;
3316 }
3317
3318 /*
3319 * If a recv attempt is made on a previously-accepted socket
3320 * that has been marked as inactive (disconnected), reject
3321 * the request.
3322 */
3323 if (so->so_flags & SOF_DEFUNCT) {
3324 struct sockbuf *sb = &so->so_rcv;
3325
3326 error = ENOTCONN;
3327 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] (%d)\n",
3328 __func__, proc_pid(p), proc_best_name(p),
3329 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
3330 SOCK_DOM(so), SOCK_TYPE(so), error);
3331 /*
3332 * This socket should have been disconnected and flushed
3333 * prior to being returned from sodefunct(); there should
3334 * be no data on its receive list, so panic otherwise.
3335 */
3336 if (so->so_state & SS_DEFUNCT) {
3337 sb_empty_assert(sb, __func__);
3338 }
3339 socket_unlock(so, 1);
3340 return error;
3341 }
3342
3343 if ((so->so_flags1 & SOF1_PRECONNECT_DATA) &&
3344 pr->pr_usrreqs->pru_preconnect) {
3345 /*
3346 * A user may set the CONNECT_RESUME_ON_READ_WRITE-flag but not
3347 * calling write() right after this. *If* the app calls a read
3348 * we do not want to block this read indefinetely. Thus,
3349 * we trigger a connect so that the session gets initiated.
3350 */
3351 error = (*pr->pr_usrreqs->pru_preconnect)(so);
3352
3353 if (error) {
3354 socket_unlock(so, 1);
3355 return error;
3356 }
3357 }
3358
3359 if (ENTR_SHOULDTRACE &&
3360 (SOCK_CHECK_DOM(so, AF_INET) || SOCK_CHECK_DOM(so, AF_INET6))) {
3361 /*
3362 * enable energy tracing for inet sockets that go over
3363 * non-loopback interfaces only.
3364 */
3365 struct inpcb *inp = sotoinpcb(so);
3366 if (inp->inp_last_outifp != NULL &&
3367 !(inp->inp_last_outifp->if_flags & IFF_LOOPBACK)) {
3368 en_tracing = TRUE;
3369 KERNEL_ENERGYTRACE(kEnTrActKernSockRead, DBG_FUNC_START,
3370 VM_KERNEL_ADDRPERM(so),
3371 ((so->so_state & SS_NBIO) ?
3372 kEnTrFlagNonBlocking : 0),
3373 (int64_t)orig_resid);
3374 }
3375 }
3376
3377 /*
3378 * When SO_WANTOOBFLAG is set we try to get out-of-band data
3379 * regardless of the flags argument. Here is the case were
3380 * out-of-band data is not inline.
3381 */
3382 if ((flags & MSG_OOB) ||
3383 ((so->so_options & SO_WANTOOBFLAG) != 0 &&
3384 (so->so_options & SO_OOBINLINE) == 0 &&
3385 (so->so_oobmark || (so->so_state & SS_RCVATMARK)))) {
3386 m = m_get(M_WAIT, MT_DATA);
3387 if (m == NULL) {
3388 socket_unlock(so, 1);
3389 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END,
3390 ENOBUFS, 0, 0, 0, 0);
3391 return ENOBUFS;
3392 }
3393 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
3394 if (error) {
3395 goto bad;
3396 }
3397 socket_unlock(so, 0);
3398 do {
3399 error = uiomove(mtod(m, caddr_t),
3400 imin(uio_resid(uio), m->m_len), uio);
3401 m = m_free(m);
3402 } while (uio_resid(uio) && error == 0 && m != NULL);
3403 socket_lock(so, 0);
3404 bad:
3405 if (m != NULL) {
3406 m_freem(m);
3407 }
3408
3409 if ((so->so_options & SO_WANTOOBFLAG) != 0) {
3410 if (error == EWOULDBLOCK || error == EINVAL) {
3411 /*
3412 * Let's try to get normal data:
3413 * EWOULDBLOCK: out-of-band data not
3414 * receive yet. EINVAL: out-of-band data
3415 * already read.
3416 */
3417 error = 0;
3418 goto nooob;
3419 } else if (error == 0 && flagsp != NULL) {
3420 *flagsp |= MSG_OOB;
3421 }
3422 }
3423 socket_unlock(so, 1);
3424 if (en_tracing) {
3425 KERNEL_ENERGYTRACE(kEnTrActKernSockRead, DBG_FUNC_END,
3426 VM_KERNEL_ADDRPERM(so), 0,
3427 (int64_t)(orig_resid - uio_resid(uio)));
3428 }
3429 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,
3430 0, 0, 0, 0);
3431
3432 return error;
3433 }
3434 nooob:
3435 if (mp != NULL) {
3436 *mp = NULL;
3437 }
3438
3439 if (so->so_state & SS_ISCONFIRMING && uio_resid(uio)) {
3440 (*pr->pr_usrreqs->pru_rcvd)(so, 0);
3441 }
3442
3443 free_list = NULL;
3444 delayed_copy_len = 0;
3445 restart:
3446 #ifdef MORE_LOCKING_DEBUG
3447 if (so->so_usecount <= 1) {
3448 printf("soreceive: sblock so=0x%llx ref=%d on socket\n",
3449 (uint64_t)DEBUG_KERNEL_ADDRPERM(so), so->so_usecount);
3450 }
3451 #endif
3452 /*
3453 * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE)
3454 * and if so just return to the caller. This could happen when
3455 * soreceive() is called by a socket upcall function during the
3456 * time the socket is freed. The socket buffer would have been
3457 * locked across the upcall, therefore we cannot put this thread
3458 * to sleep (else we will deadlock) or return EWOULDBLOCK (else
3459 * we may livelock), because the lock on the socket buffer will
3460 * only be released when the upcall routine returns to its caller.
3461 * Because the socket has been officially closed, there can be
3462 * no further read on it.
3463 *
3464 * A multipath subflow socket would have its SS_NOFDREF set by
3465 * default, so check for SOF_MP_SUBFLOW socket flag; when the
3466 * socket is closed for real, SOF_MP_SUBFLOW would be cleared.
3467 */
3468 if ((so->so_state & (SS_NOFDREF | SS_CANTRCVMORE)) ==
3469 (SS_NOFDREF | SS_CANTRCVMORE) && !(so->so_flags & SOF_MP_SUBFLOW)) {
3470 socket_unlock(so, 1);
3471 return 0;
3472 }
3473
3474 error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
3475 if (error) {
3476 socket_unlock(so, 1);
3477 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,
3478 0, 0, 0, 0);
3479 if (en_tracing) {
3480 KERNEL_ENERGYTRACE(kEnTrActKernSockRead, DBG_FUNC_END,
3481 VM_KERNEL_ADDRPERM(so), 0,
3482 (int64_t)(orig_resid - uio_resid(uio)));
3483 }
3484 return error;
3485 }
3486
3487 m = so->so_rcv.sb_mb;
3488 if (so_should_wait(so, uio, m, flags)) {
3489 /*
3490 * Panic if we notice inconsistencies in the socket's
3491 * receive list; both sb_mb and sb_cc should correctly
3492 * reflect the contents of the list, otherwise we may
3493 * end up with false positives during select() or poll()
3494 * which could put the application in a bad state.
3495 */
3496 SB_MB_CHECK(&so->so_rcv);
3497
3498 if (so->so_error) {
3499 if (m != NULL) {
3500 goto dontblock;
3501 }
3502 error = so->so_error;
3503 if ((flags & MSG_PEEK) == 0) {
3504 so->so_error = 0;
3505 }
3506 goto release;
3507 }
3508 if (so->so_state & SS_CANTRCVMORE) {
3509 #if CONTENT_FILTER
3510 /*
3511 * Deal with half closed connections
3512 */
3513 if ((so->so_state & SS_ISDISCONNECTED) == 0 &&
3514 cfil_sock_data_pending(&so->so_rcv) != 0) {
3515 CFIL_LOG(LOG_INFO,
3516 "so %llx ignore SS_CANTRCVMORE",
3517 (uint64_t)DEBUG_KERNEL_ADDRPERM(so));
3518 } else
3519 #endif /* CONTENT_FILTER */
3520 if (m != NULL) {
3521 goto dontblock;
3522 } else {
3523 goto release;
3524 }
3525 }
3526 for (; m != NULL; m = m->m_next) {
3527 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
3528 m = so->so_rcv.sb_mb;
3529 goto dontblock;
3530 }
3531 }
3532 if ((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING)) == 0 &&
3533 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
3534 error = ENOTCONN;
3535 goto release;
3536 }
3537 if (uio_resid(uio) == 0) {
3538 goto release;
3539 }
3540
3541 if ((so->so_state & SS_NBIO) ||
3542 (flags & (MSG_DONTWAIT | MSG_NBIO))) {
3543 error = EWOULDBLOCK;
3544 goto release;
3545 }
3546 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1");
3547 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1");
3548 sbunlock(&so->so_rcv, TRUE); /* keep socket locked */
3549 #if EVEN_MORE_LOCKING_DEBUG
3550 if (socket_debug) {
3551 printf("Waiting for socket data\n");
3552 }
3553 #endif
3554
3555 /*
3556 * Depending on the protocol (e.g. TCP), the following
3557 * might cause the socket lock to be dropped and later
3558 * be reacquired, and more data could have arrived and
3559 * have been appended to the receive socket buffer by
3560 * the time it returns. Therefore, we only sleep in
3561 * sbwait() below if and only if the wait-condition is still
3562 * true.
3563 */
3564 if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb != NULL) {
3565 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
3566 }
3567
3568 error = 0;
3569 if (so_should_wait(so, uio, so->so_rcv.sb_mb, flags)) {
3570 error = sbwait(&so->so_rcv);
3571 }
3572
3573 #if EVEN_MORE_LOCKING_DEBUG
3574 if (socket_debug) {
3575 printf("SORECEIVE - sbwait returned %d\n", error);
3576 }
3577 #endif
3578 if (so->so_usecount < 1) {
3579 panic("%s: after 2nd sblock so=%p ref=%d on socket\n",
3580 __func__, so, so->so_usecount);
3581 /* NOTREACHED */
3582 }
3583 if (error) {
3584 socket_unlock(so, 1);
3585 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,
3586 0, 0, 0, 0);
3587 if (en_tracing) {
3588 KERNEL_ENERGYTRACE(kEnTrActKernSockRead, DBG_FUNC_END,
3589 VM_KERNEL_ADDRPERM(so), 0,
3590 (int64_t)(orig_resid - uio_resid(uio)));
3591 }
3592 return error;
3593 }
3594 goto restart;
3595 }
3596 dontblock:
3597 OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgrcv);
3598 SBLASTRECORDCHK(&so->so_rcv, "soreceive 1");
3599 SBLASTMBUFCHK(&so->so_rcv, "soreceive 1");
3600 nextrecord = m->m_nextpkt;
3601
3602 if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) {
3603 error = soreceive_addr(p, so, psa, flags, &m, &nextrecord,
3604 mp0 == NULL);
3605 if (error == ERESTART) {
3606 goto restart;
3607 } else if (error != 0) {
3608 goto release;
3609 }
3610 orig_resid = 0;
3611 }
3612
3613 /*
3614 * Process one or more MT_CONTROL mbufs present before any data mbufs
3615 * in the first mbuf chain on the socket buffer. If MSG_PEEK, we
3616 * just copy the data; if !MSG_PEEK, we call into the protocol to
3617 * perform externalization.
3618 */
3619 if (m != NULL && m->m_type == MT_CONTROL) {
3620 error = soreceive_ctl(so, controlp, flags, &m, &nextrecord);
3621 if (error != 0) {
3622 goto release;
3623 }
3624 orig_resid = 0;
3625 }
3626
3627 if (m != NULL) {
3628 if (!(flags & MSG_PEEK)) {
3629 /*
3630 * We get here because m points to an mbuf following
3631 * any MT_SONAME or MT_CONTROL mbufs which have been
3632 * processed above. In any case, m should be pointing
3633 * to the head of the mbuf chain, and the nextrecord
3634 * should be either NULL or equal to m->m_nextpkt.
3635 * See comments above about SB_LOCK.
3636 */
3637 if (m != so->so_rcv.sb_mb ||
3638 m->m_nextpkt != nextrecord) {
3639 panic("%s: post-control !sync so=%p m=%p "
3640 "nextrecord=%p\n", __func__, so, m,
3641 nextrecord);
3642 /* NOTREACHED */
3643 }
3644 if (nextrecord == NULL) {
3645 so->so_rcv.sb_lastrecord = m;
3646 }
3647 }
3648 type = m->m_type;
3649 if (type == MT_OOBDATA) {
3650 flags |= MSG_OOB;
3651 }
3652 } else {
3653 if (!(flags & MSG_PEEK)) {
3654 SB_EMPTY_FIXUP(&so->so_rcv);
3655 }
3656 }
3657 SBLASTRECORDCHK(&so->so_rcv, "soreceive 2");
3658 SBLASTMBUFCHK(&so->so_rcv, "soreceive 2");
3659
3660 moff = 0;
3661 offset = 0;
3662
3663 if (!(flags & MSG_PEEK) && uio_resid(uio) > sorecvmincopy) {
3664 can_delay = 1;
3665 } else {
3666 can_delay = 0;
3667 }
3668
3669 while (m != NULL &&
3670 (uio_resid(uio) - delayed_copy_len) > 0 && error == 0) {
3671 if (m->m_type == MT_OOBDATA) {
3672 if (type != MT_OOBDATA) {
3673 break;
3674 }
3675 } else if (type == MT_OOBDATA) {
3676 break;
3677 }
3678
3679 if (m->m_type != MT_OOBDATA && m->m_type != MT_DATA &&
3680 m->m_type != MT_HEADER) {
3681 break;
3682 }
3683 /*
3684 * Make sure to allways set MSG_OOB event when getting
3685 * out of band data inline.
3686 */
3687 if ((so->so_options & SO_WANTOOBFLAG) != 0 &&
3688 (so->so_options & SO_OOBINLINE) != 0 &&
3689 (so->so_state & SS_RCVATMARK) != 0) {
3690 flags |= MSG_OOB;
3691 }
3692 so->so_state &= ~SS_RCVATMARK;
3693 len = uio_resid(uio) - delayed_copy_len;
3694 if (so->so_oobmark && len > so->so_oobmark - offset) {
3695 len = so->so_oobmark - offset;
3696 }
3697 if (len > m->m_len - moff) {
3698 len = m->m_len - moff;
3699 }
3700 /*
3701 * If mp is set, just pass back the mbufs.
3702 * Otherwise copy them out via the uio, then free.
3703 * Sockbuf must be consistent here (points to current mbuf,
3704 * it points to next record) when we drop priority;
3705 * we must note any additions to the sockbuf when we
3706 * block interrupts again.
3707 */
3708 if (mp == NULL) {
3709 SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove");
3710 SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove");
3711 if (can_delay && len == m->m_len) {
3712 /*
3713 * only delay the copy if we're consuming the
3714 * mbuf and we're NOT in MSG_PEEK mode
3715 * and we have enough data to make it worthwile
3716 * to drop and retake the lock... can_delay
3717 * reflects the state of the 2 latter
3718 * constraints moff should always be zero
3719 * in these cases
3720 */
3721 delayed_copy_len += len;
3722 } else {
3723 if (delayed_copy_len) {
3724 error = sodelayed_copy(so, uio,
3725 &free_list, &delayed_copy_len);
3726
3727 if (error) {
3728 goto release;
3729 }
3730 /*
3731 * can only get here if MSG_PEEK is not
3732 * set therefore, m should point at the
3733 * head of the rcv queue; if it doesn't,
3734 * it means something drastically
3735 * changed while we were out from behind
3736 * the lock in sodelayed_copy. perhaps
3737 * a RST on the stream. in any event,
3738 * the stream has been interrupted. it's
3739 * probably best just to return whatever
3740 * data we've moved and let the caller
3741 * sort it out...
3742 */
3743 if (m != so->so_rcv.sb_mb) {
3744 break;
3745 }
3746 }
3747 socket_unlock(so, 0);
3748 error = uiomove(mtod(m, caddr_t) + moff,
3749 (int)len, uio);
3750 socket_lock(so, 0);
3751
3752 if (error) {
3753 goto release;
3754 }
3755 }
3756 } else {
3757 uio_setresid(uio, (uio_resid(uio) - len));
3758 }
3759 if (len == m->m_len - moff) {
3760 if (m->m_flags & M_EOR) {
3761 flags |= MSG_EOR;
3762 }
3763 if (flags & MSG_PEEK) {
3764 m = m->m_next;
3765 moff = 0;
3766 } else {
3767 nextrecord = m->m_nextpkt;
3768 sbfree(&so->so_rcv, m);
3769 m->m_nextpkt = NULL;
3770
3771 if (mp != NULL) {
3772 *mp = m;
3773 mp = &m->m_next;
3774 so->so_rcv.sb_mb = m = m->m_next;
3775 *mp = NULL;
3776 } else {
3777 if (free_list == NULL) {
3778 free_list = m;
3779 } else {
3780 ml->m_next = m;
3781 }
3782 ml = m;
3783 so->so_rcv.sb_mb = m = m->m_next;
3784 ml->m_next = NULL;
3785 }
3786 if (m != NULL) {
3787 m->m_nextpkt = nextrecord;
3788 if (nextrecord == NULL) {
3789 so->so_rcv.sb_lastrecord = m;
3790 }
3791 } else {
3792 so->so_rcv.sb_mb = nextrecord;
3793 SB_EMPTY_FIXUP(&so->so_rcv);
3794 }
3795 SBLASTRECORDCHK(&so->so_rcv, "soreceive 3");
3796 SBLASTMBUFCHK(&so->so_rcv, "soreceive 3");
3797 }
3798 } else {
3799 if (flags & MSG_PEEK) {
3800 moff += len;
3801 } else {
3802 if (mp != NULL) {
3803 int copy_flag;
3804
3805 if (flags & MSG_DONTWAIT) {
3806 copy_flag = M_DONTWAIT;
3807 } else {
3808 copy_flag = M_WAIT;
3809 }
3810 *mp = m_copym(m, 0, len, copy_flag);
3811 /*
3812 * Failed to allocate an mbuf?
3813 * Adjust uio_resid back, it was
3814 * adjusted down by len bytes which
3815 * we didn't copy over.
3816 */
3817 if (*mp == NULL) {
3818 uio_setresid(uio,
3819 (uio_resid(uio) + len));
3820 break;
3821 }
3822 }
3823 m->m_data += len;
3824 m->m_len -= len;
3825 so->so_rcv.sb_cc -= len;
3826 }
3827 }
3828 if (so->so_oobmark) {
3829 if ((flags & MSG_PEEK) == 0) {
3830 so->so_oobmark -= len;
3831 if (so->so_oobmark == 0) {
3832 so->so_state |= SS_RCVATMARK;
3833 break;
3834 }
3835 } else {
3836 offset += len;
3837 if (offset == so->so_oobmark) {
3838 break;
3839 }
3840 }
3841 }
3842 if (flags & MSG_EOR) {
3843 break;
3844 }
3845 /*
3846 * If the MSG_WAITALL or MSG_WAITSTREAM flag is set
3847 * (for non-atomic socket), we must not quit until
3848 * "uio->uio_resid == 0" or an error termination.
3849 * If a signal/timeout occurs, return with a short
3850 * count but without error. Keep sockbuf locked
3851 * against other readers.
3852 */
3853 while (flags & (MSG_WAITALL | MSG_WAITSTREAM) && m == NULL &&
3854 (uio_resid(uio) - delayed_copy_len) > 0 &&
3855 !sosendallatonce(so) && !nextrecord) {
3856 if (so->so_error || ((so->so_state & SS_CANTRCVMORE)
3857 #if CONTENT_FILTER
3858 && cfil_sock_data_pending(&so->so_rcv) == 0
3859 #endif /* CONTENT_FILTER */
3860 )) {
3861 goto release;
3862 }
3863
3864 /*
3865 * Depending on the protocol (e.g. TCP), the following
3866 * might cause the socket lock to be dropped and later
3867 * be reacquired, and more data could have arrived and
3868 * have been appended to the receive socket buffer by
3869 * the time it returns. Therefore, we only sleep in
3870 * sbwait() below if and only if the socket buffer is
3871 * empty, in order to avoid a false sleep.
3872 */
3873 if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb != NULL) {
3874 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
3875 }
3876
3877 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2");
3878 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2");
3879
3880 if (so->so_rcv.sb_mb == NULL && sbwait(&so->so_rcv)) {
3881 error = 0;
3882 goto release;
3883 }
3884 /*
3885 * have to wait until after we get back from the sbwait
3886 * to do the copy because we will drop the lock if we
3887 * have enough data that has been delayed... by dropping
3888 * the lock we open up a window allowing the netisr
3889 * thread to process the incoming packets and to change
3890 * the state of this socket... we're issuing the sbwait
3891 * because the socket is empty and we're expecting the
3892 * netisr thread to wake us up when more packets arrive;
3893 * if we allow that processing to happen and then sbwait
3894 * we could stall forever with packets sitting in the
3895 * socket if no further packets arrive from the remote
3896 * side.
3897 *
3898 * we want to copy before we've collected all the data
3899 * to satisfy this request to allow the copy to overlap
3900 * the incoming packet processing on an MP system
3901 */
3902 if (delayed_copy_len > sorecvmincopy &&
3903 (delayed_copy_len > (so->so_rcv.sb_hiwat / 2))) {
3904 error = sodelayed_copy(so, uio,
3905 &free_list, &delayed_copy_len);
3906
3907 if (error) {
3908 goto release;
3909 }
3910 }
3911 m = so->so_rcv.sb_mb;
3912 if (m != NULL) {
3913 nextrecord = m->m_nextpkt;
3914 }
3915 SB_MB_CHECK(&so->so_rcv);
3916 }
3917 }
3918 #ifdef MORE_LOCKING_DEBUG
3919 if (so->so_usecount <= 1) {
3920 panic("%s: after big while so=%p ref=%d on socket\n",
3921 __func__, so, so->so_usecount);
3922 /* NOTREACHED */
3923 }
3924 #endif
3925
3926 if (m != NULL && pr->pr_flags & PR_ATOMIC) {
3927 if (so->so_options & SO_DONTTRUNC) {
3928 flags |= MSG_RCVMORE;
3929 } else {
3930 flags |= MSG_TRUNC;
3931 if ((flags & MSG_PEEK) == 0) {
3932 (void) sbdroprecord(&so->so_rcv);
3933 }
3934 }
3935 }
3936
3937 /*
3938 * pru_rcvd below (for TCP) may cause more data to be received
3939 * if the socket lock is dropped prior to sending the ACK; some
3940 * legacy OpenTransport applications don't handle this well
3941 * (if it receives less data than requested while MSG_HAVEMORE
3942 * is set), and so we set the flag now based on what we know
3943 * prior to calling pru_rcvd.
3944 */
3945 if ((so->so_options & SO_WANTMORE) && so->so_rcv.sb_cc > 0) {
3946 flags |= MSG_HAVEMORE;
3947 }
3948
3949 if ((flags & MSG_PEEK) == 0) {
3950 if (m == NULL) {
3951 so->so_rcv.sb_mb = nextrecord;
3952 /*
3953 * First part is an inline SB_EMPTY_FIXUP(). Second
3954 * part makes sure sb_lastrecord is up-to-date if
3955 * there is still data in the socket buffer.
3956 */
3957 if (so->so_rcv.sb_mb == NULL) {
3958 so->so_rcv.sb_mbtail = NULL;
3959 so->so_rcv.sb_lastrecord = NULL;
3960 } else if (nextrecord->m_nextpkt == NULL) {
3961 so->so_rcv.sb_lastrecord = nextrecord;
3962 }
3963 SB_MB_CHECK(&so->so_rcv);
3964 }
3965 SBLASTRECORDCHK(&so->so_rcv, "soreceive 4");
3966 SBLASTMBUFCHK(&so->so_rcv, "soreceive 4");
3967 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) {
3968 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
3969 }
3970 }
3971
3972 if (delayed_copy_len) {
3973 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
3974 if (error) {
3975 goto release;
3976 }
3977 }
3978 if (free_list != NULL) {
3979 m_freem_list(free_list);
3980 free_list = NULL;
3981 }
3982
3983 if (orig_resid == uio_resid(uio) && orig_resid &&
3984 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
3985 sbunlock(&so->so_rcv, TRUE); /* keep socket locked */
3986 goto restart;
3987 }
3988
3989 if (flagsp != NULL) {
3990 *flagsp |= flags;
3991 }
3992 release:
3993 #ifdef MORE_LOCKING_DEBUG
3994 if (so->so_usecount <= 1) {
3995 panic("%s: release so=%p ref=%d on socket\n", __func__,
3996 so, so->so_usecount);
3997 /* NOTREACHED */
3998 }
3999 #endif
4000 if (delayed_copy_len) {
4001 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
4002 }
4003
4004 if (free_list != NULL) {
4005 m_freem_list(free_list);
4006 }
4007
4008 sbunlock(&so->so_rcv, FALSE); /* will unlock socket */
4009
4010 if (en_tracing) {
4011 KERNEL_ENERGYTRACE(kEnTrActKernSockRead, DBG_FUNC_END,
4012 VM_KERNEL_ADDRPERM(so),
4013 ((error == EWOULDBLOCK) ? kEnTrFlagNoWork : 0),
4014 (int64_t)(orig_resid - uio_resid(uio)));
4015 }
4016 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, so, uio_resid(uio),
4017 so->so_rcv.sb_cc, 0, error);
4018
4019 return error;
4020 }
4021
4022 /*
4023 * Returns: 0 Success
4024 * uiomove:EFAULT
4025 */
4026 static int
4027 sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list,
4028 user_ssize_t *resid)
4029 {
4030 int error = 0;
4031 struct mbuf *m;
4032
4033 m = *free_list;
4034
4035 socket_unlock(so, 0);
4036
4037 while (m != NULL && error == 0) {
4038 error = uiomove(mtod(m, caddr_t), (int)m->m_len, uio);
4039 m = m->m_next;
4040 }
4041 m_freem_list(*free_list);
4042
4043 *free_list = NULL;
4044 *resid = 0;
4045
4046 socket_lock(so, 0);
4047
4048 return error;
4049 }
4050
4051 static int
4052 sodelayed_copy_list(struct socket *so, struct recv_msg_elem *msgarray,
4053 u_int uiocnt, struct mbuf **free_list, user_ssize_t *resid)
4054 {
4055 #pragma unused(so)
4056 int error = 0;
4057 struct mbuf *ml, *m;
4058 int i = 0;
4059 struct uio *auio;
4060
4061 for (ml = *free_list, i = 0; ml != NULL && i < uiocnt;
4062 ml = ml->m_nextpkt, i++) {
4063 auio = msgarray[i].uio;
4064 for (m = ml; m != NULL; m = m->m_next) {
4065 error = uiomove(mtod(m, caddr_t), m->m_len, auio);
4066 if (error != 0) {
4067 goto out;
4068 }
4069 }
4070 }
4071 out:
4072 m_freem_list(*free_list);
4073
4074 *free_list = NULL;
4075 *resid = 0;
4076
4077 return error;
4078 }
4079
4080 int
4081 soreceive_list(struct socket *so, struct recv_msg_elem *msgarray, u_int uiocnt,
4082 int *flagsp)
4083 {
4084 struct mbuf *m;
4085 struct mbuf *nextrecord;
4086 struct mbuf *ml = NULL, *free_list = NULL, *free_tail = NULL;
4087 int error;
4088 user_ssize_t len, pktlen, delayed_copy_len = 0;
4089 struct protosw *pr = so->so_proto;
4090 user_ssize_t resid;
4091 struct proc *p = current_proc();
4092 struct uio *auio = NULL;
4093 int npkts = 0;
4094 int sblocked = 0;
4095 struct sockaddr **psa = NULL;
4096 struct mbuf **controlp = NULL;
4097 int can_delay;
4098 int flags;
4099 struct mbuf *free_others = NULL;
4100
4101 KERNEL_DEBUG(DBG_FNC_SORECEIVE_LIST | DBG_FUNC_START,
4102 so, uiocnt,
4103 so->so_rcv.sb_cc, so->so_rcv.sb_lowat, so->so_rcv.sb_hiwat);
4104
4105 /*
4106 * Sanity checks:
4107 * - Only supports don't wait flags
4108 * - Only support datagram sockets (could be extended to raw)
4109 * - Must be atomic
4110 * - Protocol must support packet chains
4111 * - The uio array is NULL (should we panic?)
4112 */
4113 if (flagsp != NULL) {
4114 flags = *flagsp;
4115 } else {
4116 flags = 0;
4117 }
4118 if (flags & ~(MSG_PEEK | MSG_WAITALL | MSG_DONTWAIT | MSG_NEEDSA |
4119 MSG_NBIO)) {
4120 printf("%s invalid flags 0x%x\n", __func__, flags);
4121 error = EINVAL;
4122 goto out;
4123 }
4124 if (so->so_type != SOCK_DGRAM) {
4125 error = EINVAL;
4126 goto out;
4127 }
4128 if (sosendallatonce(so) == 0) {
4129 error = EINVAL;
4130 goto out;
4131 }
4132 if (so->so_proto->pr_usrreqs->pru_send_list == NULL) {
4133 error = EPROTONOSUPPORT;
4134 goto out;
4135 }
4136 if (msgarray == NULL) {
4137 printf("%s uioarray is NULL\n", __func__);
4138 error = EINVAL;
4139 goto out;
4140 }
4141 if (uiocnt == 0) {
4142 printf("%s uiocnt is 0\n", __func__);
4143 error = EINVAL;
4144 goto out;
4145 }
4146 /*
4147 * Sanity check on the length passed by caller as we are making 'int'
4148 * comparisons
4149 */
4150 resid = recv_msg_array_resid(msgarray, uiocnt);
4151 if (resid < 0 || resid > INT_MAX) {
4152 error = EINVAL;
4153 goto out;
4154 }
4155
4156 if (!(flags & MSG_PEEK) && sorecvmincopy > 0) {
4157 can_delay = 1;
4158 } else {
4159 can_delay = 0;
4160 }
4161
4162 socket_lock(so, 1);
4163 so_update_last_owner_locked(so, p);
4164 so_update_policy(so);
4165
4166 #if NECP
4167 so_update_necp_policy(so, NULL, NULL);
4168 #endif /* NECP */
4169
4170 /*
4171 * If a recv attempt is made on a previously-accepted socket
4172 * that has been marked as inactive (disconnected), reject
4173 * the request.
4174 */
4175 if (so->so_flags & SOF_DEFUNCT) {
4176 struct sockbuf *sb = &so->so_rcv;
4177
4178 error = ENOTCONN;
4179 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] (%d)\n",
4180 __func__, proc_pid(p), proc_best_name(p),
4181 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
4182 SOCK_DOM(so), SOCK_TYPE(so), error);
4183 /*
4184 * This socket should have been disconnected and flushed
4185 * prior to being returned from sodefunct(); there should
4186 * be no data on its receive list, so panic otherwise.
4187 */
4188 if (so->so_state & SS_DEFUNCT) {
4189 sb_empty_assert(sb, __func__);
4190 }
4191 goto release;
4192 }
4193
4194 next:
4195 /*
4196 * The uio may be empty
4197 */
4198 if (npkts >= uiocnt) {
4199 error = 0;
4200 goto release;
4201 }
4202 restart:
4203 /*
4204 * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE)
4205 * and if so just return to the caller. This could happen when
4206 * soreceive() is called by a socket upcall function during the
4207 * time the socket is freed. The socket buffer would have been
4208 * locked across the upcall, therefore we cannot put this thread
4209 * to sleep (else we will deadlock) or return EWOULDBLOCK (else
4210 * we may livelock), because the lock on the socket buffer will
4211 * only be released when the upcall routine returns to its caller.
4212 * Because the socket has been officially closed, there can be
4213 * no further read on it.
4214 */
4215 if ((so->so_state & (SS_NOFDREF | SS_CANTRCVMORE)) ==
4216 (SS_NOFDREF | SS_CANTRCVMORE)) {
4217 error = 0;
4218 goto release;
4219 }
4220
4221 error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
4222 if (error) {
4223 goto release;
4224 }
4225 sblocked = 1;
4226
4227 m = so->so_rcv.sb_mb;
4228 /*
4229 * Block awaiting more datagram if needed
4230 */
4231 if (m == NULL || (((flags & MSG_DONTWAIT) == 0 &&
4232 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
4233 ((flags & MSG_WAITALL) && npkts < uiocnt))))) {
4234 /*
4235 * Panic if we notice inconsistencies in the socket's
4236 * receive list; both sb_mb and sb_cc should correctly
4237 * reflect the contents of the list, otherwise we may
4238 * end up with false positives during select() or poll()
4239 * which could put the application in a bad state.
4240 */
4241 SB_MB_CHECK(&so->so_rcv);
4242
4243 if (so->so_error) {
4244 error = so->so_error;
4245 if ((flags & MSG_PEEK) == 0) {
4246 so->so_error = 0;
4247 }
4248 goto release;
4249 }
4250 if (so->so_state & SS_CANTRCVMORE) {
4251 goto release;
4252 }
4253 if ((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING)) == 0 &&
4254 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
4255 error = ENOTCONN;
4256 goto release;
4257 }
4258 if ((so->so_state & SS_NBIO) ||
4259 (flags & (MSG_DONTWAIT | MSG_NBIO))) {
4260 error = EWOULDBLOCK;
4261 goto release;
4262 }
4263 /*
4264 * Do not block if we got some data
4265 */
4266 if (free_list != NULL) {
4267 error = 0;
4268 goto release;
4269 }
4270
4271 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1");
4272 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1");
4273
4274 sbunlock(&so->so_rcv, TRUE); /* keep socket locked */
4275 sblocked = 0;
4276
4277 error = sbwait(&so->so_rcv);
4278 if (error) {
4279 goto release;
4280 }
4281 goto restart;
4282 }
4283
4284 OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgrcv);
4285 SBLASTRECORDCHK(&so->so_rcv, "soreceive 1");
4286 SBLASTMBUFCHK(&so->so_rcv, "soreceive 1");
4287
4288 /*
4289 * Consume the current uio index as we have a datagram
4290 */
4291 auio = msgarray[npkts].uio;
4292 resid = uio_resid(auio);
4293 msgarray[npkts].which |= SOCK_MSG_DATA;
4294 psa = (msgarray[npkts].which & SOCK_MSG_SA) ?
4295 &msgarray[npkts].psa : NULL;
4296 controlp = (msgarray[npkts].which & SOCK_MSG_CONTROL) ?
4297 &msgarray[npkts].controlp : NULL;
4298 npkts += 1;
4299 nextrecord = m->m_nextpkt;
4300
4301 if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) {
4302 error = soreceive_addr(p, so, psa, flags, &m, &nextrecord, 1);
4303 if (error == ERESTART) {
4304 goto restart;
4305 } else if (error != 0) {
4306 goto release;
4307 }
4308 }
4309
4310 if (m != NULL && m->m_type == MT_CONTROL) {
4311 error = soreceive_ctl(so, controlp, flags, &m, &nextrecord);
4312 if (error != 0) {
4313 goto release;
4314 }
4315 }
4316
4317 if (m->m_pkthdr.len == 0) {
4318 printf("%s:%d so %llx pkt %llx type %u pktlen null\n",
4319 __func__, __LINE__,
4320 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
4321 (uint64_t)DEBUG_KERNEL_ADDRPERM(m),
4322 m->m_type);
4323 }
4324
4325 /*
4326 * Loop to copy the mbufs of the current record
4327 * Support zero length packets
4328 */
4329 ml = NULL;
4330 pktlen = 0;
4331 while (m != NULL && (len = resid - pktlen) >= 0 && error == 0) {
4332 if (m->m_len == 0) {
4333 panic("%p m_len zero", m);
4334 }
4335 if (m->m_type == 0) {
4336 panic("%p m_type zero", m);
4337 }
4338 /*
4339 * Clip to the residual length
4340 */
4341 if (len > m->m_len) {
4342 len = m->m_len;
4343 }
4344 pktlen += len;
4345 /*
4346 * Copy the mbufs via the uio or delay the copy
4347 * Sockbuf must be consistent here (points to current mbuf,
4348 * it points to next record) when we drop priority;
4349 * we must note any additions to the sockbuf when we
4350 * block interrupts again.
4351 */
4352 if (len > 0 && can_delay == 0) {
4353 socket_unlock(so, 0);
4354 error = uiomove(mtod(m, caddr_t), (int)len, auio);
4355 socket_lock(so, 0);
4356 if (error) {
4357 goto release;
4358 }
4359 } else {
4360 delayed_copy_len += len;
4361 }
4362
4363 if (len == m->m_len) {
4364 /*
4365 * m was entirely copied
4366 */
4367 sbfree(&so->so_rcv, m);
4368 nextrecord = m->m_nextpkt;
4369 m->m_nextpkt = NULL;
4370
4371 /*
4372 * Set the first packet to the head of the free list
4373 */
4374 if (free_list == NULL) {
4375 free_list = m;
4376 }
4377 /*
4378 * Link current packet to tail of free list
4379 */
4380 if (ml == NULL) {
4381 if (free_tail != NULL) {
4382 free_tail->m_nextpkt = m;
4383 }
4384 free_tail = m;
4385 }
4386 /*
4387 * Link current mbuf to last mbuf of current packet
4388 */
4389 if (ml != NULL) {
4390 ml->m_next = m;
4391 }
4392 ml = m;
4393
4394 /*
4395 * Move next buf to head of socket buffer
4396 */
4397 so->so_rcv.sb_mb = m = ml->m_next;
4398 ml->m_next = NULL;
4399
4400 if (m != NULL) {
4401 m->m_nextpkt = nextrecord;
4402 if (nextrecord == NULL) {
4403 so->so_rcv.sb_lastrecord = m;
4404 }
4405 } else {
4406 so->so_rcv.sb_mb = nextrecord;
4407 SB_EMPTY_FIXUP(&so->so_rcv);
4408 }
4409 SBLASTRECORDCHK(&so->so_rcv, "soreceive 3");
4410 SBLASTMBUFCHK(&so->so_rcv, "soreceive 3");
4411 } else {
4412 /*
4413 * Stop the loop on partial copy
4414 */
4415 break;
4416 }
4417 }
4418 #ifdef MORE_LOCKING_DEBUG
4419 if (so->so_usecount <= 1) {
4420 panic("%s: after big while so=%llx ref=%d on socket\n",
4421 __func__,
4422 (uint64_t)DEBUG_KERNEL_ADDRPERM(so), so->so_usecount);
4423 /* NOTREACHED */
4424 }
4425 #endif
4426 /*
4427 * Tell the caller we made a partial copy
4428 */
4429 if (m != NULL) {
4430 if (so->so_options & SO_DONTTRUNC) {
4431 /*
4432 * Copyout first the freelist then the partial mbuf
4433 */
4434 socket_unlock(so, 0);
4435 if (delayed_copy_len) {
4436 error = sodelayed_copy_list(so, msgarray,
4437 uiocnt, &free_list, &delayed_copy_len);
4438 }
4439
4440 if (error == 0) {
4441 error = uiomove(mtod(m, caddr_t), (int)len,
4442 auio);
4443 }
4444 socket_lock(so, 0);
4445 if (error) {
4446 goto release;
4447 }
4448
4449 m->m_data += len;
4450 m->m_len -= len;
4451 so->so_rcv.sb_cc -= len;
4452 flags |= MSG_RCVMORE;
4453 } else {
4454 (void) sbdroprecord(&so->so_rcv);
4455 nextrecord = so->so_rcv.sb_mb;
4456 m = NULL;
4457 flags |= MSG_TRUNC;
4458 }
4459 }
4460
4461 if (m == NULL) {
4462 so->so_rcv.sb_mb = nextrecord;
4463 /*
4464 * First part is an inline SB_EMPTY_FIXUP(). Second
4465 * part makes sure sb_lastrecord is up-to-date if
4466 * there is still data in the socket buffer.
4467 */
4468 if (so->so_rcv.sb_mb == NULL) {
4469 so->so_rcv.sb_mbtail = NULL;
4470 so->so_rcv.sb_lastrecord = NULL;
4471 } else if (nextrecord->m_nextpkt == NULL) {
4472 so->so_rcv.sb_lastrecord = nextrecord;
4473 }
4474 SB_MB_CHECK(&so->so_rcv);
4475 }
4476 SBLASTRECORDCHK(&so->so_rcv, "soreceive 4");
4477 SBLASTMBUFCHK(&so->so_rcv, "soreceive 4");
4478
4479 /*
4480 * We can continue to the next packet as long as:
4481 * - We haven't exhausted the uio array
4482 * - There was no error
4483 * - A packet was not truncated
4484 * - We can still receive more data
4485 */
4486 if (npkts < uiocnt && error == 0 &&
4487 (flags & (MSG_RCVMORE | MSG_TRUNC)) == 0 &&
4488 (so->so_state & SS_CANTRCVMORE) == 0) {
4489 sbunlock(&so->so_rcv, TRUE); /* keep socket locked */
4490 sblocked = 0;
4491
4492 goto next;
4493 }
4494 if (flagsp != NULL) {
4495 *flagsp |= flags;
4496 }
4497
4498 release:
4499 /*
4500 * pru_rcvd may cause more data to be received if the socket lock
4501 * is dropped so we set MSG_HAVEMORE now based on what we know.
4502 * That way the caller won't be surprised if it receives less data
4503 * than requested.
4504 */
4505 if ((so->so_options & SO_WANTMORE) && so->so_rcv.sb_cc > 0) {
4506 flags |= MSG_HAVEMORE;
4507 }
4508
4509 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) {
4510 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
4511 }
4512
4513 if (sblocked) {
4514 sbunlock(&so->so_rcv, FALSE); /* will unlock socket */
4515 } else {
4516 socket_unlock(so, 1);
4517 }
4518
4519 if (delayed_copy_len) {
4520 error = sodelayed_copy_list(so, msgarray, uiocnt,
4521 &free_list, &delayed_copy_len);
4522 }
4523 out:
4524 /*
4525 * Amortize the cost of freeing the mbufs
4526 */
4527 if (free_list != NULL) {
4528 m_freem_list(free_list);
4529 }
4530 if (free_others != NULL) {
4531 m_freem_list(free_others);
4532 }
4533
4534 KERNEL_DEBUG(DBG_FNC_SORECEIVE_LIST | DBG_FUNC_END, error,
4535 0, 0, 0, 0);
4536 return error;
4537 }
4538
4539 static int
4540 so_statistics_event_to_nstat_event(int64_t *input_options,
4541 uint64_t *nstat_event)
4542 {
4543 int error = 0;
4544 switch (*input_options) {
4545 case SO_STATISTICS_EVENT_ENTER_CELLFALLBACK:
4546 *nstat_event = NSTAT_EVENT_SRC_ENTER_CELLFALLBACK;
4547 break;
4548 case SO_STATISTICS_EVENT_EXIT_CELLFALLBACK:
4549 *nstat_event = NSTAT_EVENT_SRC_EXIT_CELLFALLBACK;
4550 break;
4551 #if (DEBUG || DEVELOPMENT)
4552 case SO_STATISTICS_EVENT_RESERVED_1:
4553 *nstat_event = NSTAT_EVENT_SRC_RESERVED_1;
4554 break;
4555 case SO_STATISTICS_EVENT_RESERVED_2:
4556 *nstat_event = NSTAT_EVENT_SRC_RESERVED_2;
4557 break;
4558 #endif /* (DEBUG || DEVELOPMENT) */
4559 default:
4560 error = EINVAL;
4561 break;
4562 }
4563 return error;
4564 }
4565
4566 /*
4567 * Returns: 0 Success
4568 * EINVAL
4569 * ENOTCONN
4570 * <pru_shutdown>:EINVAL
4571 * <pru_shutdown>:EADDRNOTAVAIL[TCP]
4572 * <pru_shutdown>:ENOBUFS[TCP]
4573 * <pru_shutdown>:EMSGSIZE[TCP]
4574 * <pru_shutdown>:EHOSTUNREACH[TCP]
4575 * <pru_shutdown>:ENETUNREACH[TCP]
4576 * <pru_shutdown>:ENETDOWN[TCP]
4577 * <pru_shutdown>:ENOMEM[TCP]
4578 * <pru_shutdown>:EACCES[TCP]
4579 * <pru_shutdown>:EMSGSIZE[TCP]
4580 * <pru_shutdown>:ENOBUFS[TCP]
4581 * <pru_shutdown>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
4582 * <pru_shutdown>:??? [other protocol families]
4583 */
4584 int
4585 soshutdown(struct socket *so, int how)
4586 {
4587 int error;
4588
4589 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_START, how, 0, 0, 0, 0);
4590
4591 switch (how) {
4592 case SHUT_RD:
4593 case SHUT_WR:
4594 case SHUT_RDWR:
4595 socket_lock(so, 1);
4596 if ((so->so_state &
4597 (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) == 0) {
4598 error = ENOTCONN;
4599 } else {
4600 error = soshutdownlock(so, how);
4601 }
4602 socket_unlock(so, 1);
4603 break;
4604 default:
4605 error = EINVAL;
4606 break;
4607 }
4608
4609 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, how, error, 0, 0, 0);
4610
4611 return error;
4612 }
4613
4614 int
4615 soshutdownlock_final(struct socket *so, int how)
4616 {
4617 struct protosw *pr = so->so_proto;
4618 int error = 0;
4619
4620 sflt_notify(so, sock_evt_shutdown, &how);
4621
4622 if (how != SHUT_WR) {
4623 if ((so->so_state & SS_CANTRCVMORE) != 0) {
4624 /* read already shut down */
4625 error = ENOTCONN;
4626 goto done;
4627 }
4628 sorflush(so);
4629 }
4630 if (how != SHUT_RD) {
4631 if ((so->so_state & SS_CANTSENDMORE) != 0) {
4632 /* write already shut down */
4633 error = ENOTCONN;
4634 goto done;
4635 }
4636 error = (*pr->pr_usrreqs->pru_shutdown)(so);
4637 }
4638 done:
4639 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN, how, 1, 0, 0, 0);
4640 return error;
4641 }
4642
4643 int
4644 soshutdownlock(struct socket *so, int how)
4645 {
4646 int error = 0;
4647
4648 #if CONTENT_FILTER
4649 /*
4650 * A content filter may delay the actual shutdown until it
4651 * has processed the pending data
4652 */
4653 if (so->so_flags & SOF_CONTENT_FILTER) {
4654 error = cfil_sock_shutdown(so, &how);
4655 if (error == EJUSTRETURN) {
4656 error = 0;
4657 goto done;
4658 } else if (error != 0) {
4659 goto done;
4660 }
4661 }
4662 #endif /* CONTENT_FILTER */
4663
4664 error = soshutdownlock_final(so, how);
4665
4666 done:
4667 return error;
4668 }
4669
4670 void
4671 sowflush(struct socket *so)
4672 {
4673 struct sockbuf *sb = &so->so_snd;
4674
4675 /*
4676 * Obtain lock on the socket buffer (SB_LOCK). This is required
4677 * to prevent the socket buffer from being unexpectedly altered
4678 * while it is used by another thread in socket send/receive.
4679 *
4680 * sblock() must not fail here, hence the assertion.
4681 */
4682 (void) sblock(sb, SBL_WAIT | SBL_NOINTR | SBL_IGNDEFUNCT);
4683 VERIFY(sb->sb_flags & SB_LOCK);
4684
4685 sb->sb_flags &= ~(SB_SEL | SB_UPCALL);
4686 sb->sb_flags |= SB_DROP;
4687 sb->sb_upcall = NULL;
4688 sb->sb_upcallarg = NULL;
4689
4690 sbunlock(sb, TRUE); /* keep socket locked */
4691
4692 selthreadclear(&sb->sb_sel);
4693 sbrelease(sb);
4694 }
4695
4696 void
4697 sorflush(struct socket *so)
4698 {
4699 struct sockbuf *sb = &so->so_rcv;
4700 struct protosw *pr = so->so_proto;
4701 struct sockbuf asb;
4702 #ifdef notyet
4703 lck_mtx_t *mutex_held;
4704 /*
4705 * XXX: This code is currently commented out, because we may get here
4706 * as part of sofreelastref(), and at that time, pr_getlock() may no
4707 * longer be able to return us the lock; this will be fixed in future.
4708 */
4709 if (so->so_proto->pr_getlock != NULL) {
4710 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
4711 } else {
4712 mutex_held = so->so_proto->pr_domain->dom_mtx;
4713 }
4714
4715 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
4716 #endif /* notyet */
4717
4718 sflt_notify(so, sock_evt_flush_read, NULL);
4719
4720 socantrcvmore(so);
4721
4722 /*
4723 * Obtain lock on the socket buffer (SB_LOCK). This is required
4724 * to prevent the socket buffer from being unexpectedly altered
4725 * while it is used by another thread in socket send/receive.
4726 *
4727 * sblock() must not fail here, hence the assertion.
4728 */
4729 (void) sblock(sb, SBL_WAIT | SBL_NOINTR | SBL_IGNDEFUNCT);
4730 VERIFY(sb->sb_flags & SB_LOCK);
4731
4732 /*
4733 * Copy only the relevant fields from "sb" to "asb" which we
4734 * need for sbrelease() to function. In particular, skip
4735 * sb_sel as it contains the wait queue linkage, which would
4736 * wreak havoc if we were to issue selthreadclear() on "asb".
4737 * Make sure to not carry over SB_LOCK in "asb", as we need
4738 * to acquire it later as part of sbrelease().
4739 */
4740 bzero(&asb, sizeof(asb));
4741 asb.sb_cc = sb->sb_cc;
4742 asb.sb_hiwat = sb->sb_hiwat;
4743 asb.sb_mbcnt = sb->sb_mbcnt;
4744 asb.sb_mbmax = sb->sb_mbmax;
4745 asb.sb_ctl = sb->sb_ctl;
4746 asb.sb_lowat = sb->sb_lowat;
4747 asb.sb_mb = sb->sb_mb;
4748 asb.sb_mbtail = sb->sb_mbtail;
4749 asb.sb_lastrecord = sb->sb_lastrecord;
4750 asb.sb_so = sb->sb_so;
4751 asb.sb_flags = sb->sb_flags;
4752 asb.sb_flags &= ~(SB_LOCK | SB_SEL | SB_KNOTE | SB_UPCALL);
4753 asb.sb_flags |= SB_DROP;
4754
4755 /*
4756 * Ideally we'd bzero() these and preserve the ones we need;
4757 * but to do that we'd need to shuffle things around in the
4758 * sockbuf, and we can't do it now because there are KEXTS
4759 * that are directly referring to the socket structure.
4760 *
4761 * Setting SB_DROP acts as a barrier to prevent further appends.
4762 * Clearing SB_SEL is done for selthreadclear() below.
4763 */
4764 sb->sb_cc = 0;
4765 sb->sb_hiwat = 0;
4766 sb->sb_mbcnt = 0;
4767 sb->sb_mbmax = 0;
4768 sb->sb_ctl = 0;
4769 sb->sb_lowat = 0;
4770 sb->sb_mb = NULL;
4771 sb->sb_mbtail = NULL;
4772 sb->sb_lastrecord = NULL;
4773 sb->sb_timeo.tv_sec = 0;
4774 sb->sb_timeo.tv_usec = 0;
4775 sb->sb_upcall = NULL;
4776 sb->sb_upcallarg = NULL;
4777 sb->sb_flags &= ~(SB_SEL | SB_UPCALL);
4778 sb->sb_flags |= SB_DROP;
4779
4780 sbunlock(sb, TRUE); /* keep socket locked */
4781
4782 /*
4783 * Note that selthreadclear() is called on the original "sb" and
4784 * not the local "asb" because of the way wait queue linkage is
4785 * implemented. Given that selwakeup() may be triggered, SB_SEL
4786 * should no longer be set (cleared above.)
4787 */
4788 selthreadclear(&sb->sb_sel);
4789
4790 if ((pr->pr_flags & PR_RIGHTS) && pr->pr_domain->dom_dispose) {
4791 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
4792 }
4793
4794 sbrelease(&asb);
4795 }
4796
4797 /*
4798 * Perhaps this routine, and sooptcopyout(), below, ought to come in
4799 * an additional variant to handle the case where the option value needs
4800 * to be some kind of integer, but not a specific size.
4801 * In addition to their use here, these functions are also called by the
4802 * protocol-level pr_ctloutput() routines.
4803 *
4804 * Returns: 0 Success
4805 * EINVAL
4806 * copyin:EFAULT
4807 */
4808 int
4809 sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen)
4810 {
4811 size_t valsize;
4812
4813 /*
4814 * If the user gives us more than we wanted, we ignore it,
4815 * but if we don't get the minimum length the caller
4816 * wants, we return EINVAL. On success, sopt->sopt_valsize
4817 * is set to however much we actually retrieved.
4818 */
4819 if ((valsize = sopt->sopt_valsize) < minlen) {
4820 return EINVAL;
4821 }
4822 if (valsize > len) {
4823 sopt->sopt_valsize = valsize = len;
4824 }
4825
4826 if (sopt->sopt_p != kernproc) {
4827 return copyin(sopt->sopt_val, buf, valsize);
4828 }
4829
4830 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), buf, valsize);
4831 return 0;
4832 }
4833
4834 /*
4835 * sooptcopyin_timeval
4836 * Copy in a timeval value into tv_p, and take into account whether the
4837 * the calling process is 64-bit or 32-bit. Moved the sanity checking
4838 * code here so that we can verify the 64-bit tv_sec value before we lose
4839 * the top 32-bits assigning tv64.tv_sec to tv_p->tv_sec.
4840 */
4841 static int
4842 sooptcopyin_timeval(struct sockopt *sopt, struct timeval *tv_p)
4843 {
4844 int error;
4845
4846 if (proc_is64bit(sopt->sopt_p)) {
4847 struct user64_timeval tv64;
4848
4849 if (sopt->sopt_valsize < sizeof(tv64)) {
4850 return EINVAL;
4851 }
4852
4853 sopt->sopt_valsize = sizeof(tv64);
4854 if (sopt->sopt_p != kernproc) {
4855 error = copyin(sopt->sopt_val, &tv64, sizeof(tv64));
4856 if (error != 0) {
4857 return error;
4858 }
4859 } else {
4860 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), &tv64,
4861 sizeof(tv64));
4862 }
4863 if (tv64.tv_sec < 0 || tv64.tv_sec > LONG_MAX ||
4864 tv64.tv_usec < 0 || tv64.tv_usec >= 1000000) {
4865 return EDOM;
4866 }
4867
4868 tv_p->tv_sec = tv64.tv_sec;
4869 tv_p->tv_usec = tv64.tv_usec;
4870 } else {
4871 struct user32_timeval tv32;
4872
4873 if (sopt->sopt_valsize < sizeof(tv32)) {
4874 return EINVAL;
4875 }
4876
4877 sopt->sopt_valsize = sizeof(tv32);
4878 if (sopt->sopt_p != kernproc) {
4879 error = copyin(sopt->sopt_val, &tv32, sizeof(tv32));
4880 if (error != 0) {
4881 return error;
4882 }
4883 } else {
4884 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), &tv32,
4885 sizeof(tv32));
4886 }
4887 #ifndef __LP64__
4888 /*
4889 * K64todo "comparison is always false due to
4890 * limited range of data type"
4891 */
4892 if (tv32.tv_sec < 0 || tv32.tv_sec > LONG_MAX ||
4893 tv32.tv_usec < 0 || tv32.tv_usec >= 1000000) {
4894 return EDOM;
4895 }
4896 #endif
4897 tv_p->tv_sec = tv32.tv_sec;
4898 tv_p->tv_usec = tv32.tv_usec;
4899 }
4900 return 0;
4901 }
4902
4903 int
4904 soopt_cred_check(struct socket *so, int priv, boolean_t allow_root,
4905 boolean_t ignore_delegate)
4906 {
4907 kauth_cred_t cred = NULL;
4908 proc_t ep = PROC_NULL;
4909 uid_t uid;
4910 int error = 0;
4911
4912 if (ignore_delegate == false && so->so_flags & SOF_DELEGATED) {
4913 ep = proc_find(so->e_pid);
4914 if (ep) {
4915 cred = kauth_cred_proc_ref(ep);
4916 }
4917 }
4918
4919 uid = kauth_cred_getuid(cred ? cred : so->so_cred);
4920
4921 /* uid is 0 for root */
4922 if (uid != 0 || !allow_root) {
4923 error = priv_check_cred(cred ? cred : so->so_cred, priv, 0);
4924 }
4925 if (cred) {
4926 kauth_cred_unref(&cred);
4927 }
4928 if (ep != PROC_NULL) {
4929 proc_rele(ep);
4930 }
4931
4932 return error;
4933 }
4934
4935 /*
4936 * Returns: 0 Success
4937 * EINVAL
4938 * ENOPROTOOPT
4939 * ENOBUFS
4940 * EDOM
4941 * sooptcopyin:EINVAL
4942 * sooptcopyin:EFAULT
4943 * sooptcopyin_timeval:EINVAL
4944 * sooptcopyin_timeval:EFAULT
4945 * sooptcopyin_timeval:EDOM
4946 * <pr_ctloutput>:EOPNOTSUPP[AF_UNIX]
4947 * <pr_ctloutput>:???w
4948 * sflt_attach_private:??? [whatever a filter author chooses]
4949 * <sf_setoption>:??? [whatever a filter author chooses]
4950 *
4951 * Notes: Other <pru_listen> returns depend on the protocol family; all
4952 * <sf_listen> returns depend on what the filter author causes
4953 * their filter to return.
4954 */
4955 int
4956 sosetoptlock(struct socket *so, struct sockopt *sopt, int dolock)
4957 {
4958 int error, optval;
4959 int64_t long_optval;
4960 struct linger l;
4961 struct timeval tv;
4962
4963 if (sopt->sopt_dir != SOPT_SET) {
4964 sopt->sopt_dir = SOPT_SET;
4965 }
4966
4967 if (dolock) {
4968 socket_lock(so, 1);
4969 }
4970
4971 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
4972 (SS_CANTRCVMORE | SS_CANTSENDMORE) &&
4973 (so->so_flags & SOF_NPX_SETOPTSHUT) == 0) {
4974 /* the socket has been shutdown, no more sockopt's */
4975 error = EINVAL;
4976 goto out;
4977 }
4978
4979 error = sflt_setsockopt(so, sopt);
4980 if (error != 0) {
4981 if (error == EJUSTRETURN) {
4982 error = 0;
4983 }
4984 goto out;
4985 }
4986
4987 if (sopt->sopt_level != SOL_SOCKET) {
4988 if (so->so_proto != NULL &&
4989 so->so_proto->pr_ctloutput != NULL) {
4990 error = (*so->so_proto->pr_ctloutput)(so, sopt);
4991 goto out;
4992 }
4993 error = ENOPROTOOPT;
4994 } else {
4995 /*
4996 * Allow socket-level (SOL_SOCKET) options to be filtered by
4997 * the protocol layer, if needed. A zero value returned from
4998 * the handler means use default socket-level processing as
4999 * done by the rest of this routine. Otherwise, any other
5000 * return value indicates that the option is unsupported.
5001 */
5002 if (so->so_proto != NULL && (error = so->so_proto->pr_usrreqs->
5003 pru_socheckopt(so, sopt)) != 0) {
5004 goto out;
5005 }
5006
5007 error = 0;
5008 switch (sopt->sopt_name) {
5009 case SO_LINGER:
5010 case SO_LINGER_SEC:
5011 error = sooptcopyin(sopt, &l, sizeof(l), sizeof(l));
5012 if (error != 0) {
5013 goto out;
5014 }
5015
5016 so->so_linger = (sopt->sopt_name == SO_LINGER) ?
5017 l.l_linger : l.l_linger * hz;
5018 if (l.l_onoff != 0) {
5019 so->so_options |= SO_LINGER;
5020 } else {
5021 so->so_options &= ~SO_LINGER;
5022 }
5023 break;
5024
5025 case SO_DEBUG:
5026 case SO_KEEPALIVE:
5027 case SO_DONTROUTE:
5028 case SO_USELOOPBACK:
5029 case SO_BROADCAST:
5030 case SO_REUSEADDR:
5031 case SO_REUSEPORT:
5032 case SO_OOBINLINE:
5033 case SO_TIMESTAMP:
5034 case SO_TIMESTAMP_MONOTONIC:
5035 case SO_TIMESTAMP_CONTINUOUS:
5036 case SO_DONTTRUNC:
5037 case SO_WANTMORE:
5038 case SO_WANTOOBFLAG:
5039 case SO_NOWAKEFROMSLEEP:
5040 case SO_NOAPNFALLBK:
5041 error = sooptcopyin(sopt, &optval, sizeof(optval),
5042 sizeof(optval));
5043 if (error != 0) {
5044 goto out;
5045 }
5046 if (optval) {
5047 so->so_options |= sopt->sopt_name;
5048 } else {
5049 so->so_options &= ~sopt->sopt_name;
5050 }
5051 break;
5052
5053 case SO_SNDBUF:
5054 case SO_RCVBUF:
5055 case SO_SNDLOWAT:
5056 case SO_RCVLOWAT:
5057 error = sooptcopyin(sopt, &optval, sizeof(optval),
5058 sizeof(optval));
5059 if (error != 0) {
5060 goto out;
5061 }
5062
5063 /*
5064 * Values < 1 make no sense for any of these
5065 * options, so disallow them.
5066 */
5067 if (optval < 1) {
5068 error = EINVAL;
5069 goto out;
5070 }
5071
5072 switch (sopt->sopt_name) {
5073 case SO_SNDBUF:
5074 case SO_RCVBUF: {
5075 struct sockbuf *sb =
5076 (sopt->sopt_name == SO_SNDBUF) ?
5077 &so->so_snd : &so->so_rcv;
5078 if (sbreserve(sb, (u_int32_t)optval) == 0) {
5079 error = ENOBUFS;
5080 goto out;
5081 }
5082 sb->sb_flags |= SB_USRSIZE;
5083 sb->sb_flags &= ~SB_AUTOSIZE;
5084 sb->sb_idealsize = (u_int32_t)optval;
5085 break;
5086 }
5087 /*
5088 * Make sure the low-water is never greater than
5089 * the high-water.
5090 */
5091 case SO_SNDLOWAT: {
5092 int space = sbspace(&so->so_snd);
5093 u_int32_t hiwat = so->so_snd.sb_hiwat;
5094
5095 if (so->so_snd.sb_flags & SB_UNIX) {
5096 struct unpcb *unp =
5097 (struct unpcb *)(so->so_pcb);
5098 if (unp != NULL &&
5099 unp->unp_conn != NULL) {
5100 hiwat += unp->unp_conn->unp_cc;
5101 }
5102 }
5103
5104 so->so_snd.sb_lowat =
5105 (optval > hiwat) ?
5106 hiwat : optval;
5107
5108 if (space >= so->so_snd.sb_lowat) {
5109 sowwakeup(so);
5110 }
5111 break;
5112 }
5113 case SO_RCVLOWAT: {
5114 int64_t data_len;
5115 so->so_rcv.sb_lowat =
5116 (optval > so->so_rcv.sb_hiwat) ?
5117 so->so_rcv.sb_hiwat : optval;
5118 data_len = so->so_rcv.sb_cc
5119 - so->so_rcv.sb_ctl;
5120 if (data_len >= so->so_rcv.sb_lowat) {
5121 sorwakeup(so);
5122 }
5123 break;
5124 }
5125 }
5126 break;
5127
5128 case SO_SNDTIMEO:
5129 case SO_RCVTIMEO:
5130 error = sooptcopyin_timeval(sopt, &tv);
5131 if (error != 0) {
5132 goto out;
5133 }
5134
5135 switch (sopt->sopt_name) {
5136 case SO_SNDTIMEO:
5137 so->so_snd.sb_timeo = tv;
5138 break;
5139 case SO_RCVTIMEO:
5140 so->so_rcv.sb_timeo = tv;
5141 break;
5142 }
5143 break;
5144
5145 case SO_NKE: {
5146 struct so_nke nke;
5147
5148 error = sooptcopyin(sopt, &nke, sizeof(nke),
5149 sizeof(nke));
5150 if (error != 0) {
5151 goto out;
5152 }
5153
5154 error = sflt_attach_internal(so, nke.nke_handle);
5155 break;
5156 }
5157
5158 case SO_NOSIGPIPE:
5159 error = sooptcopyin(sopt, &optval, sizeof(optval),
5160 sizeof(optval));
5161 if (error != 0) {
5162 goto out;
5163 }
5164 if (optval != 0) {
5165 so->so_flags |= SOF_NOSIGPIPE;
5166 } else {
5167 so->so_flags &= ~SOF_NOSIGPIPE;
5168 }
5169 break;
5170
5171 case SO_NOADDRERR:
5172 error = sooptcopyin(sopt, &optval, sizeof(optval),
5173 sizeof(optval));
5174 if (error != 0) {
5175 goto out;
5176 }
5177 if (optval != 0) {
5178 so->so_flags |= SOF_NOADDRAVAIL;
5179 } else {
5180 so->so_flags &= ~SOF_NOADDRAVAIL;
5181 }
5182 break;
5183
5184 case SO_REUSESHAREUID:
5185 error = sooptcopyin(sopt, &optval, sizeof(optval),
5186 sizeof(optval));
5187 if (error != 0) {
5188 goto out;
5189 }
5190 if (optval != 0) {
5191 so->so_flags |= SOF_REUSESHAREUID;
5192 } else {
5193 so->so_flags &= ~SOF_REUSESHAREUID;
5194 }
5195 break;
5196
5197 case SO_NOTIFYCONFLICT:
5198 if (kauth_cred_issuser(kauth_cred_get()) == 0) {
5199 error = EPERM;
5200 goto out;
5201 }
5202 error = sooptcopyin(sopt, &optval, sizeof(optval),
5203 sizeof(optval));
5204 if (error != 0) {
5205 goto out;
5206 }
5207 if (optval != 0) {
5208 so->so_flags |= SOF_NOTIFYCONFLICT;
5209 } else {
5210 so->so_flags &= ~SOF_NOTIFYCONFLICT;
5211 }
5212 break;
5213
5214 case SO_RESTRICTIONS:
5215 error = sooptcopyin(sopt, &optval, sizeof(optval),
5216 sizeof(optval));
5217 if (error != 0) {
5218 goto out;
5219 }
5220
5221 error = so_set_restrictions(so, optval);
5222 break;
5223
5224 case SO_AWDL_UNRESTRICTED:
5225 if (SOCK_DOM(so) != PF_INET &&
5226 SOCK_DOM(so) != PF_INET6) {
5227 error = EOPNOTSUPP;
5228 goto out;
5229 }
5230 error = sooptcopyin(sopt, &optval, sizeof(optval),
5231 sizeof(optval));
5232 if (error != 0) {
5233 goto out;
5234 }
5235 if (optval != 0) {
5236 error = soopt_cred_check(so,
5237 PRIV_NET_RESTRICTED_AWDL, false, false);
5238 if (error == 0) {
5239 inp_set_awdl_unrestricted(
5240 sotoinpcb(so));
5241 }
5242 } else {
5243 inp_clear_awdl_unrestricted(sotoinpcb(so));
5244 }
5245 break;
5246 case SO_INTCOPROC_ALLOW:
5247 if (SOCK_DOM(so) != PF_INET6) {
5248 error = EOPNOTSUPP;
5249 goto out;
5250 }
5251 error = sooptcopyin(sopt, &optval, sizeof(optval),
5252 sizeof(optval));
5253 if (error != 0) {
5254 goto out;
5255 }
5256 if (optval != 0 &&
5257 inp_get_intcoproc_allowed(sotoinpcb(so)) == FALSE) {
5258 error = soopt_cred_check(so,
5259 PRIV_NET_RESTRICTED_INTCOPROC, false, false);
5260 if (error == 0) {
5261 inp_set_intcoproc_allowed(
5262 sotoinpcb(so));
5263 }
5264 } else if (optval == 0) {
5265 inp_clear_intcoproc_allowed(sotoinpcb(so));
5266 }
5267 break;
5268
5269 case SO_LABEL:
5270 error = EOPNOTSUPP;
5271 break;
5272
5273 case SO_UPCALLCLOSEWAIT:
5274 error = sooptcopyin(sopt, &optval, sizeof(optval),
5275 sizeof(optval));
5276 if (error != 0) {
5277 goto out;
5278 }
5279 if (optval != 0) {
5280 so->so_flags |= SOF_UPCALLCLOSEWAIT;
5281 } else {
5282 so->so_flags &= ~SOF_UPCALLCLOSEWAIT;
5283 }
5284 break;
5285
5286 case SO_RANDOMPORT:
5287 error = sooptcopyin(sopt, &optval, sizeof(optval),
5288 sizeof(optval));
5289 if (error != 0) {
5290 goto out;
5291 }
5292 if (optval != 0) {
5293 so->so_flags |= SOF_BINDRANDOMPORT;
5294 } else {
5295 so->so_flags &= ~SOF_BINDRANDOMPORT;
5296 }
5297 break;
5298
5299 case SO_NP_EXTENSIONS: {
5300 struct so_np_extensions sonpx;
5301
5302 error = sooptcopyin(sopt, &sonpx, sizeof(sonpx),
5303 sizeof(sonpx));
5304 if (error != 0) {
5305 goto out;
5306 }
5307 if (sonpx.npx_mask & ~SONPX_MASK_VALID) {
5308 error = EINVAL;
5309 goto out;
5310 }
5311 /*
5312 * Only one bit defined for now
5313 */
5314 if ((sonpx.npx_mask & SONPX_SETOPTSHUT)) {
5315 if ((sonpx.npx_flags & SONPX_SETOPTSHUT)) {
5316 so->so_flags |= SOF_NPX_SETOPTSHUT;
5317 } else {
5318 so->so_flags &= ~SOF_NPX_SETOPTSHUT;
5319 }
5320 }
5321 break;
5322 }
5323
5324 case SO_TRAFFIC_CLASS: {
5325 error = sooptcopyin(sopt, &optval, sizeof(optval),
5326 sizeof(optval));
5327 if (error != 0) {
5328 goto out;
5329 }
5330 if (optval >= SO_TC_NET_SERVICE_OFFSET) {
5331 int netsvc = optval - SO_TC_NET_SERVICE_OFFSET;
5332 error = so_set_net_service_type(so, netsvc);
5333 goto out;
5334 }
5335 error = so_set_traffic_class(so, optval);
5336 if (error != 0) {
5337 goto out;
5338 }
5339 so->so_flags1 &= ~SOF1_TC_NET_SERV_TYPE;
5340 so->so_netsvctype = _NET_SERVICE_TYPE_UNSPEC;
5341 break;
5342 }
5343
5344 case SO_RECV_TRAFFIC_CLASS: {
5345 error = sooptcopyin(sopt, &optval, sizeof(optval),
5346 sizeof(optval));
5347 if (error != 0) {
5348 goto out;
5349 }
5350 if (optval == 0) {
5351 so->so_flags &= ~SOF_RECV_TRAFFIC_CLASS;
5352 } else {
5353 so->so_flags |= SOF_RECV_TRAFFIC_CLASS;
5354 }
5355 break;
5356 }
5357
5358 #if (DEVELOPMENT || DEBUG)
5359 case SO_TRAFFIC_CLASS_DBG: {
5360 struct so_tcdbg so_tcdbg;
5361
5362 error = sooptcopyin(sopt, &so_tcdbg,
5363 sizeof(struct so_tcdbg), sizeof(struct so_tcdbg));
5364 if (error != 0) {
5365 goto out;
5366 }
5367 error = so_set_tcdbg(so, &so_tcdbg);
5368 if (error != 0) {
5369 goto out;
5370 }
5371 break;
5372 }
5373 #endif /* (DEVELOPMENT || DEBUG) */
5374
5375 case SO_PRIVILEGED_TRAFFIC_CLASS:
5376 error = priv_check_cred(kauth_cred_get(),
5377 PRIV_NET_PRIVILEGED_TRAFFIC_CLASS, 0);
5378 if (error != 0) {
5379 goto out;
5380 }
5381 error = sooptcopyin(sopt, &optval, sizeof(optval),
5382 sizeof(optval));
5383 if (error != 0) {
5384 goto out;
5385 }
5386 if (optval == 0) {
5387 so->so_flags &= ~SOF_PRIVILEGED_TRAFFIC_CLASS;
5388 } else {
5389 so->so_flags |= SOF_PRIVILEGED_TRAFFIC_CLASS;
5390 }
5391 break;
5392
5393 #if (DEVELOPMENT || DEBUG)
5394 case SO_DEFUNCTIT:
5395 error = sosetdefunct(current_proc(), so, 0, FALSE);
5396 if (error == 0) {
5397 error = sodefunct(current_proc(), so, 0);
5398 }
5399
5400 break;
5401 #endif /* (DEVELOPMENT || DEBUG) */
5402
5403 case SO_DEFUNCTOK:
5404 error = sooptcopyin(sopt, &optval, sizeof(optval),
5405 sizeof(optval));
5406 if (error != 0 || (so->so_flags & SOF_DEFUNCT)) {
5407 if (error == 0) {
5408 error = EBADF;
5409 }
5410 goto out;
5411 }
5412 /*
5413 * Any process can set SO_DEFUNCTOK (clear
5414 * SOF_NODEFUNCT), but only root can clear
5415 * SO_DEFUNCTOK (set SOF_NODEFUNCT).
5416 */
5417 if (optval == 0 &&
5418 kauth_cred_issuser(kauth_cred_get()) == 0) {
5419 error = EPERM;
5420 goto out;
5421 }
5422 if (optval) {
5423 so->so_flags &= ~SOF_NODEFUNCT;
5424 } else {
5425 so->so_flags |= SOF_NODEFUNCT;
5426 }
5427
5428 if (SOCK_DOM(so) == PF_INET ||
5429 SOCK_DOM(so) == PF_INET6) {
5430 char s[MAX_IPv6_STR_LEN];
5431 char d[MAX_IPv6_STR_LEN];
5432 struct inpcb *inp = sotoinpcb(so);
5433
5434 SODEFUNCTLOG("%s[%d, %s]: so 0x%llx "
5435 "[%s %s:%d -> %s:%d] is now marked "
5436 "as %seligible for "
5437 "defunct\n", __func__, proc_selfpid(),
5438 proc_best_name(current_proc()),
5439 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
5440 (SOCK_TYPE(so) == SOCK_STREAM) ?
5441 "TCP" : "UDP", inet_ntop(SOCK_DOM(so),
5442 ((SOCK_DOM(so) == PF_INET) ?
5443 (void *)&inp->inp_laddr.s_addr :
5444 (void *)&inp->in6p_laddr), s, sizeof(s)),
5445 ntohs(inp->in6p_lport),
5446 inet_ntop(SOCK_DOM(so),
5447 (SOCK_DOM(so) == PF_INET) ?
5448 (void *)&inp->inp_faddr.s_addr :
5449 (void *)&inp->in6p_faddr, d, sizeof(d)),
5450 ntohs(inp->in6p_fport),
5451 (so->so_flags & SOF_NODEFUNCT) ?
5452 "not " : "");
5453 } else {
5454 SODEFUNCTLOG("%s[%d, %s]: so 0x%llx [%d,%d] "
5455 "is now marked as %seligible for "
5456 "defunct\n",
5457 __func__, proc_selfpid(),
5458 proc_best_name(current_proc()),
5459 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
5460 SOCK_DOM(so), SOCK_TYPE(so),
5461 (so->so_flags & SOF_NODEFUNCT) ?
5462 "not " : "");
5463 }
5464 break;
5465
5466 case SO_ISDEFUNCT:
5467 /* This option is not settable */
5468 error = EINVAL;
5469 break;
5470
5471 case SO_OPPORTUNISTIC:
5472 error = sooptcopyin(sopt, &optval, sizeof(optval),
5473 sizeof(optval));
5474 if (error == 0) {
5475 error = so_set_opportunistic(so, optval);
5476 }
5477 break;
5478
5479 case SO_FLUSH:
5480 /* This option is handled by lower layer(s) */
5481 error = 0;
5482 break;
5483
5484 case SO_RECV_ANYIF:
5485 error = sooptcopyin(sopt, &optval, sizeof(optval),
5486 sizeof(optval));
5487 if (error == 0) {
5488 error = so_set_recv_anyif(so, optval);
5489 }
5490 break;
5491
5492 case SO_TRAFFIC_MGT_BACKGROUND: {
5493 /* This option is handled by lower layer(s) */
5494 error = 0;
5495 break;
5496 }
5497
5498 #if FLOW_DIVERT
5499 case SO_FLOW_DIVERT_TOKEN:
5500 error = flow_divert_token_set(so, sopt);
5501 break;
5502 #endif /* FLOW_DIVERT */
5503
5504
5505 case SO_DELEGATED:
5506 if ((error = sooptcopyin(sopt, &optval, sizeof(optval),
5507 sizeof(optval))) != 0) {
5508 break;
5509 }
5510
5511 error = so_set_effective_pid(so, optval, sopt->sopt_p, true);
5512 break;
5513
5514 case SO_DELEGATED_UUID: {
5515 uuid_t euuid;
5516
5517 if ((error = sooptcopyin(sopt, &euuid, sizeof(euuid),
5518 sizeof(euuid))) != 0) {
5519 break;
5520 }
5521
5522 error = so_set_effective_uuid(so, euuid, sopt->sopt_p, true);
5523 break;
5524 }
5525
5526 #if NECP
5527 case SO_NECP_ATTRIBUTES:
5528 error = necp_set_socket_attributes(so, sopt);
5529 break;
5530
5531 case SO_NECP_CLIENTUUID: {
5532 if (SOCK_DOM(so) == PF_MULTIPATH) {
5533 /* Handled by MPTCP itself */
5534 break;
5535 }
5536
5537 if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
5538 error = EINVAL;
5539 goto out;
5540 }
5541
5542 struct inpcb *inp = sotoinpcb(so);
5543 if (!uuid_is_null(inp->necp_client_uuid)) {
5544 // Clear out the old client UUID if present
5545 necp_inpcb_remove_cb(inp);
5546 }
5547
5548 error = sooptcopyin(sopt, &inp->necp_client_uuid,
5549 sizeof(uuid_t), sizeof(uuid_t));
5550 if (error != 0) {
5551 goto out;
5552 }
5553
5554 if (uuid_is_null(inp->necp_client_uuid)) {
5555 error = EINVAL;
5556 goto out;
5557 }
5558
5559 pid_t current_pid = proc_pid(current_proc());
5560 error = necp_client_register_socket_flow(current_pid,
5561 inp->necp_client_uuid, inp);
5562 if (error != 0) {
5563 uuid_clear(inp->necp_client_uuid);
5564 goto out;
5565 }
5566
5567 if (inp->inp_lport != 0) {
5568 // There is a bound local port, so this is not
5569 // a fresh socket. Assign to the client.
5570 necp_client_assign_from_socket(current_pid, inp->necp_client_uuid, inp);
5571 }
5572
5573 break;
5574 }
5575 case SO_NECP_LISTENUUID: {
5576 if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
5577 error = EINVAL;
5578 goto out;
5579 }
5580
5581 struct inpcb *inp = sotoinpcb(so);
5582 if (!uuid_is_null(inp->necp_client_uuid)) {
5583 error = EINVAL;
5584 goto out;
5585 }
5586
5587 error = sooptcopyin(sopt, &inp->necp_client_uuid,
5588 sizeof(uuid_t), sizeof(uuid_t));
5589 if (error != 0) {
5590 goto out;
5591 }
5592
5593 if (uuid_is_null(inp->necp_client_uuid)) {
5594 error = EINVAL;
5595 goto out;
5596 }
5597
5598 error = necp_client_register_socket_listener(proc_pid(current_proc()),
5599 inp->necp_client_uuid, inp);
5600 if (error != 0) {
5601 uuid_clear(inp->necp_client_uuid);
5602 goto out;
5603 }
5604
5605 // Mark that the port registration is held by NECP
5606 inp->inp_flags2 |= INP2_EXTERNAL_PORT;
5607
5608 break;
5609 }
5610 #endif /* NECP */
5611
5612 case SO_EXTENDED_BK_IDLE:
5613 error = sooptcopyin(sopt, &optval, sizeof(optval),
5614 sizeof(optval));
5615 if (error == 0) {
5616 error = so_set_extended_bk_idle(so, optval);
5617 }
5618 break;
5619
5620 case SO_MARK_CELLFALLBACK:
5621 error = sooptcopyin(sopt, &optval, sizeof(optval),
5622 sizeof(optval));
5623 if (error != 0) {
5624 goto out;
5625 }
5626 if (optval < 0) {
5627 error = EINVAL;
5628 goto out;
5629 }
5630 if (optval == 0) {
5631 so->so_flags1 &= ~SOF1_CELLFALLBACK;
5632 } else {
5633 so->so_flags1 |= SOF1_CELLFALLBACK;
5634 }
5635 break;
5636
5637 case SO_STATISTICS_EVENT:
5638 error = sooptcopyin(sopt, &long_optval,
5639 sizeof(long_optval), sizeof(long_optval));
5640 if (error != 0) {
5641 goto out;
5642 }
5643 u_int64_t nstat_event = 0;
5644 error = so_statistics_event_to_nstat_event(
5645 &long_optval, &nstat_event);
5646 if (error != 0) {
5647 goto out;
5648 }
5649 nstat_pcb_event(sotoinpcb(so), nstat_event);
5650 break;
5651
5652 case SO_NET_SERVICE_TYPE: {
5653 error = sooptcopyin(sopt, &optval, sizeof(optval),
5654 sizeof(optval));
5655 if (error != 0) {
5656 goto out;
5657 }
5658 error = so_set_net_service_type(so, optval);
5659 break;
5660 }
5661
5662 case SO_QOSMARKING_POLICY_OVERRIDE:
5663 error = priv_check_cred(kauth_cred_get(),
5664 PRIV_NET_QOSMARKING_POLICY_OVERRIDE, 0);
5665 if (error != 0) {
5666 goto out;
5667 }
5668 error = sooptcopyin(sopt, &optval, sizeof(optval),
5669 sizeof(optval));
5670 if (error != 0) {
5671 goto out;
5672 }
5673 if (optval == 0) {
5674 so->so_flags1 &= ~SOF1_QOSMARKING_POLICY_OVERRIDE;
5675 } else {
5676 so->so_flags1 |= SOF1_QOSMARKING_POLICY_OVERRIDE;
5677 }
5678 break;
5679
5680 case SO_MPKL_SEND_INFO: {
5681 struct so_mpkl_send_info so_mpkl_send_info;
5682
5683 error = sooptcopyin(sopt, &so_mpkl_send_info,
5684 sizeof(struct so_mpkl_send_info), sizeof(struct so_mpkl_send_info));
5685 if (error != 0) {
5686 goto out;
5687 }
5688 uuid_copy(so->so_mpkl_send_uuid, so_mpkl_send_info.mpkl_uuid);
5689 so->so_mpkl_send_proto = so_mpkl_send_info.mpkl_proto;
5690
5691 if (uuid_is_null(so->so_mpkl_send_uuid) && so->so_mpkl_send_proto == 0) {
5692 so->so_flags1 &= ~SOF1_MPKL_SEND_INFO;
5693 } else {
5694 so->so_flags1 |= SOF1_MPKL_SEND_INFO;
5695 }
5696 break;
5697 }
5698 case SO_WANT_KEV_SOCKET_CLOSED: {
5699 error = sooptcopyin(sopt, &optval, sizeof(optval),
5700 sizeof(optval));
5701 if (error != 0) {
5702 goto out;
5703 }
5704 if (optval == 0) {
5705 so->so_flags1 &= ~SOF1_WANT_KEV_SOCK_CLOSED;
5706 } else {
5707 so->so_flags1 |= SOF1_WANT_KEV_SOCK_CLOSED;
5708 }
5709 break;
5710 }
5711 default:
5712 error = ENOPROTOOPT;
5713 break;
5714 }
5715 if (error == 0 && so->so_proto != NULL &&
5716 so->so_proto->pr_ctloutput != NULL) {
5717 (void) so->so_proto->pr_ctloutput(so, sopt);
5718 }
5719 }
5720 out:
5721 if (dolock) {
5722 socket_unlock(so, 1);
5723 }
5724 return error;
5725 }
5726
5727 /* Helper routines for getsockopt */
5728 int
5729 sooptcopyout(struct sockopt *sopt, void *buf, size_t len)
5730 {
5731 int error;
5732 size_t valsize;
5733
5734 error = 0;
5735
5736 /*
5737 * Documented get behavior is that we always return a value,
5738 * possibly truncated to fit in the user's buffer.
5739 * Traditional behavior is that we always tell the user
5740 * precisely how much we copied, rather than something useful
5741 * like the total amount we had available for her.
5742 * Note that this interface is not idempotent; the entire answer must
5743 * generated ahead of time.
5744 */
5745 valsize = min(len, sopt->sopt_valsize);
5746 sopt->sopt_valsize = valsize;
5747 if (sopt->sopt_val != USER_ADDR_NULL) {
5748 if (sopt->sopt_p != kernproc) {
5749 error = copyout(buf, sopt->sopt_val, valsize);
5750 } else {
5751 bcopy(buf, CAST_DOWN(caddr_t, sopt->sopt_val), valsize);
5752 }
5753 }
5754 return error;
5755 }
5756
5757 static int
5758 sooptcopyout_timeval(struct sockopt *sopt, const struct timeval *tv_p)
5759 {
5760 int error;
5761 size_t len;
5762 struct user64_timeval tv64 = {};
5763 struct user32_timeval tv32 = {};
5764 const void * val;
5765 size_t valsize;
5766
5767 error = 0;
5768 if (proc_is64bit(sopt->sopt_p)) {
5769 len = sizeof(tv64);
5770 tv64.tv_sec = tv_p->tv_sec;
5771 tv64.tv_usec = tv_p->tv_usec;
5772 val = &tv64;
5773 } else {
5774 len = sizeof(tv32);
5775 tv32.tv_sec = tv_p->tv_sec;
5776 tv32.tv_usec = tv_p->tv_usec;
5777 val = &tv32;
5778 }
5779 valsize = min(len, sopt->sopt_valsize);
5780 sopt->sopt_valsize = valsize;
5781 if (sopt->sopt_val != USER_ADDR_NULL) {
5782 if (sopt->sopt_p != kernproc) {
5783 error = copyout(val, sopt->sopt_val, valsize);
5784 } else {
5785 bcopy(val, CAST_DOWN(caddr_t, sopt->sopt_val), valsize);
5786 }
5787 }
5788 return error;
5789 }
5790
5791 /*
5792 * Return: 0 Success
5793 * ENOPROTOOPT
5794 * <pr_ctloutput>:EOPNOTSUPP[AF_UNIX]
5795 * <pr_ctloutput>:???
5796 * <sf_getoption>:???
5797 */
5798 int
5799 sogetoptlock(struct socket *so, struct sockopt *sopt, int dolock)
5800 {
5801 int error, optval;
5802 struct linger l;
5803 struct timeval tv;
5804
5805 if (sopt->sopt_dir != SOPT_GET) {
5806 sopt->sopt_dir = SOPT_GET;
5807 }
5808
5809 if (dolock) {
5810 socket_lock(so, 1);
5811 }
5812
5813 error = sflt_getsockopt(so, sopt);
5814 if (error != 0) {
5815 if (error == EJUSTRETURN) {
5816 error = 0;
5817 }
5818 goto out;
5819 }
5820
5821 if (sopt->sopt_level != SOL_SOCKET) {
5822 if (so->so_proto != NULL &&
5823 so->so_proto->pr_ctloutput != NULL) {
5824 error = (*so->so_proto->pr_ctloutput)(so, sopt);
5825 goto out;
5826 }
5827 error = ENOPROTOOPT;
5828 } else {
5829 /*
5830 * Allow socket-level (SOL_SOCKET) options to be filtered by
5831 * the protocol layer, if needed. A zero value returned from
5832 * the handler means use default socket-level processing as
5833 * done by the rest of this routine. Otherwise, any other
5834 * return value indicates that the option is unsupported.
5835 */
5836 if (so->so_proto != NULL && (error = so->so_proto->pr_usrreqs->
5837 pru_socheckopt(so, sopt)) != 0) {
5838 goto out;
5839 }
5840
5841 error = 0;
5842 switch (sopt->sopt_name) {
5843 case SO_LINGER:
5844 case SO_LINGER_SEC:
5845 l.l_onoff = ((so->so_options & SO_LINGER) ? 1 : 0);
5846 l.l_linger = (sopt->sopt_name == SO_LINGER) ?
5847 so->so_linger : so->so_linger / hz;
5848 error = sooptcopyout(sopt, &l, sizeof(l));
5849 break;
5850
5851 case SO_USELOOPBACK:
5852 case SO_DONTROUTE:
5853 case SO_DEBUG:
5854 case SO_KEEPALIVE:
5855 case SO_REUSEADDR:
5856 case SO_REUSEPORT:
5857 case SO_BROADCAST:
5858 case SO_OOBINLINE:
5859 case SO_TIMESTAMP:
5860 case SO_TIMESTAMP_MONOTONIC:
5861 case SO_TIMESTAMP_CONTINUOUS:
5862 case SO_DONTTRUNC:
5863 case SO_WANTMORE:
5864 case SO_WANTOOBFLAG:
5865 case SO_NOWAKEFROMSLEEP:
5866 case SO_NOAPNFALLBK:
5867 optval = so->so_options & sopt->sopt_name;
5868 integer:
5869 error = sooptcopyout(sopt, &optval, sizeof(optval));
5870 break;
5871
5872 case SO_TYPE:
5873 optval = so->so_type;
5874 goto integer;
5875
5876 case SO_NREAD:
5877 if (so->so_proto->pr_flags & PR_ATOMIC) {
5878 int pkt_total;
5879 struct mbuf *m1;
5880
5881 pkt_total = 0;
5882 m1 = so->so_rcv.sb_mb;
5883 while (m1 != NULL) {
5884 if (m1->m_type == MT_DATA ||
5885 m1->m_type == MT_HEADER ||
5886 m1->m_type == MT_OOBDATA) {
5887 pkt_total += m1->m_len;
5888 }
5889 m1 = m1->m_next;
5890 }
5891 optval = pkt_total;
5892 } else {
5893 optval = so->so_rcv.sb_cc - so->so_rcv.sb_ctl;
5894 }
5895 goto integer;
5896
5897 case SO_NUMRCVPKT:
5898 if (so->so_proto->pr_flags & PR_ATOMIC) {
5899 int cnt = 0;
5900 struct mbuf *m1;
5901
5902 m1 = so->so_rcv.sb_mb;
5903 while (m1 != NULL) {
5904 cnt += 1;
5905 m1 = m1->m_nextpkt;
5906 }
5907 optval = cnt;
5908 goto integer;
5909 } else {
5910 error = ENOPROTOOPT;
5911 break;
5912 }
5913
5914 case SO_NWRITE:
5915 optval = so->so_snd.sb_cc;
5916 goto integer;
5917
5918 case SO_ERROR:
5919 optval = so->so_error;
5920 so->so_error = 0;
5921 goto integer;
5922
5923 case SO_SNDBUF: {
5924 u_int32_t hiwat = so->so_snd.sb_hiwat;
5925
5926 if (so->so_snd.sb_flags & SB_UNIX) {
5927 struct unpcb *unp =
5928 (struct unpcb *)(so->so_pcb);
5929 if (unp != NULL && unp->unp_conn != NULL) {
5930 hiwat += unp->unp_conn->unp_cc;
5931 }
5932 }
5933
5934 optval = hiwat;
5935 goto integer;
5936 }
5937 case SO_RCVBUF:
5938 optval = so->so_rcv.sb_hiwat;
5939 goto integer;
5940
5941 case SO_SNDLOWAT:
5942 optval = so->so_snd.sb_lowat;
5943 goto integer;
5944
5945 case SO_RCVLOWAT:
5946 optval = so->so_rcv.sb_lowat;
5947 goto integer;
5948
5949 case SO_SNDTIMEO:
5950 case SO_RCVTIMEO:
5951 tv = (sopt->sopt_name == SO_SNDTIMEO ?
5952 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
5953
5954 error = sooptcopyout_timeval(sopt, &tv);
5955 break;
5956
5957 case SO_NOSIGPIPE:
5958 optval = (so->so_flags & SOF_NOSIGPIPE);
5959 goto integer;
5960
5961 case SO_NOADDRERR:
5962 optval = (so->so_flags & SOF_NOADDRAVAIL);
5963 goto integer;
5964
5965 case SO_REUSESHAREUID:
5966 optval = (so->so_flags & SOF_REUSESHAREUID);
5967 goto integer;
5968
5969
5970 case SO_NOTIFYCONFLICT:
5971 optval = (so->so_flags & SOF_NOTIFYCONFLICT);
5972 goto integer;
5973
5974 case SO_RESTRICTIONS:
5975 optval = so_get_restrictions(so);
5976 goto integer;
5977
5978 case SO_AWDL_UNRESTRICTED:
5979 if (SOCK_DOM(so) == PF_INET ||
5980 SOCK_DOM(so) == PF_INET6) {
5981 optval = inp_get_awdl_unrestricted(
5982 sotoinpcb(so));
5983 goto integer;
5984 } else {
5985 error = EOPNOTSUPP;
5986 }
5987 break;
5988
5989 case SO_INTCOPROC_ALLOW:
5990 if (SOCK_DOM(so) == PF_INET6) {
5991 optval = inp_get_intcoproc_allowed(
5992 sotoinpcb(so));
5993 goto integer;
5994 } else {
5995 error = EOPNOTSUPP;
5996 }
5997 break;
5998
5999 case SO_LABEL:
6000 error = EOPNOTSUPP;
6001 break;
6002
6003 case SO_PEERLABEL:
6004 error = EOPNOTSUPP;
6005 break;
6006
6007 #ifdef __APPLE_API_PRIVATE
6008 case SO_UPCALLCLOSEWAIT:
6009 optval = (so->so_flags & SOF_UPCALLCLOSEWAIT);
6010 goto integer;
6011 #endif
6012 case SO_RANDOMPORT:
6013 optval = (so->so_flags & SOF_BINDRANDOMPORT);
6014 goto integer;
6015
6016 case SO_NP_EXTENSIONS: {
6017 struct so_np_extensions sonpx = {};
6018
6019 sonpx.npx_flags = (so->so_flags & SOF_NPX_SETOPTSHUT) ?
6020 SONPX_SETOPTSHUT : 0;
6021 sonpx.npx_mask = SONPX_MASK_VALID;
6022
6023 error = sooptcopyout(sopt, &sonpx,
6024 sizeof(struct so_np_extensions));
6025 break;
6026 }
6027
6028 case SO_TRAFFIC_CLASS:
6029 optval = so->so_traffic_class;
6030 goto integer;
6031
6032 case SO_RECV_TRAFFIC_CLASS:
6033 optval = (so->so_flags & SOF_RECV_TRAFFIC_CLASS);
6034 goto integer;
6035
6036 #if (DEVELOPMENT || DEBUG)
6037 case SO_TRAFFIC_CLASS_DBG:
6038 error = sogetopt_tcdbg(so, sopt);
6039 break;
6040 #endif /* (DEVELOPMENT || DEBUG) */
6041
6042 case SO_PRIVILEGED_TRAFFIC_CLASS:
6043 optval = (so->so_flags & SOF_PRIVILEGED_TRAFFIC_CLASS);
6044 goto integer;
6045
6046 case SO_DEFUNCTOK:
6047 optval = !(so->so_flags & SOF_NODEFUNCT);
6048 goto integer;
6049
6050 case SO_ISDEFUNCT:
6051 optval = (so->so_flags & SOF_DEFUNCT);
6052 goto integer;
6053
6054 case SO_OPPORTUNISTIC:
6055 optval = so_get_opportunistic(so);
6056 goto integer;
6057
6058 case SO_FLUSH:
6059 /* This option is not gettable */
6060 error = EINVAL;
6061 break;
6062
6063 case SO_RECV_ANYIF:
6064 optval = so_get_recv_anyif(so);
6065 goto integer;
6066
6067 case SO_TRAFFIC_MGT_BACKGROUND:
6068 /* This option is handled by lower layer(s) */
6069 if (so->so_proto != NULL &&
6070 so->so_proto->pr_ctloutput != NULL) {
6071 (void) so->so_proto->pr_ctloutput(so, sopt);
6072 }
6073 break;
6074
6075 #if FLOW_DIVERT
6076 case SO_FLOW_DIVERT_TOKEN:
6077 error = flow_divert_token_get(so, sopt);
6078 break;
6079 #endif /* FLOW_DIVERT */
6080
6081 #if NECP
6082 case SO_NECP_ATTRIBUTES:
6083 error = necp_get_socket_attributes(so, sopt);
6084 break;
6085
6086 case SO_NECP_CLIENTUUID: {
6087 uuid_t *ncu;
6088
6089 if (SOCK_DOM(so) == PF_MULTIPATH) {
6090 ncu = &mpsotomppcb(so)->necp_client_uuid;
6091 } else if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
6092 ncu = &sotoinpcb(so)->necp_client_uuid;
6093 } else {
6094 error = EINVAL;
6095 goto out;
6096 }
6097
6098 error = sooptcopyout(sopt, ncu, sizeof(uuid_t));
6099 break;
6100 }
6101
6102 case SO_NECP_LISTENUUID: {
6103 uuid_t *nlu;
6104
6105 if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
6106 if (sotoinpcb(so)->inp_flags2 & INP2_EXTERNAL_PORT) {
6107 nlu = &sotoinpcb(so)->necp_client_uuid;
6108 } else {
6109 error = ENOENT;
6110 goto out;
6111 }
6112 } else {
6113 error = EINVAL;
6114 goto out;
6115 }
6116
6117 error = sooptcopyout(sopt, nlu, sizeof(uuid_t));
6118 break;
6119 }
6120 #endif /* NECP */
6121
6122 #if CONTENT_FILTER
6123 case SO_CFIL_SOCK_ID: {
6124 cfil_sock_id_t sock_id;
6125
6126 sock_id = cfil_sock_id_from_socket(so);
6127
6128 error = sooptcopyout(sopt, &sock_id,
6129 sizeof(cfil_sock_id_t));
6130 break;
6131 }
6132 #endif /* CONTENT_FILTER */
6133
6134 case SO_EXTENDED_BK_IDLE:
6135 optval = (so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED);
6136 goto integer;
6137 case SO_MARK_CELLFALLBACK:
6138 optval = ((so->so_flags1 & SOF1_CELLFALLBACK) > 0)
6139 ? 1 : 0;
6140 goto integer;
6141 case SO_NET_SERVICE_TYPE: {
6142 if ((so->so_flags1 & SOF1_TC_NET_SERV_TYPE)) {
6143 optval = so->so_netsvctype;
6144 } else {
6145 optval = NET_SERVICE_TYPE_BE;
6146 }
6147 goto integer;
6148 }
6149 case SO_NETSVC_MARKING_LEVEL:
6150 optval = so_get_netsvc_marking_level(so);
6151 goto integer;
6152
6153 case SO_MPKL_SEND_INFO: {
6154 struct so_mpkl_send_info so_mpkl_send_info;
6155
6156 uuid_copy(so_mpkl_send_info.mpkl_uuid, so->so_mpkl_send_uuid);
6157 so_mpkl_send_info.mpkl_proto = so->so_mpkl_send_proto;
6158 error = sooptcopyout(sopt, &so_mpkl_send_info,
6159 sizeof(struct so_mpkl_send_info));
6160 break;
6161 }
6162 default:
6163 error = ENOPROTOOPT;
6164 break;
6165 }
6166 }
6167 out:
6168 if (dolock) {
6169 socket_unlock(so, 1);
6170 }
6171 return error;
6172 }
6173
6174 /*
6175 * The size limits on our soopt_getm is different from that on FreeBSD.
6176 * We limit the size of options to MCLBYTES. This will have to change
6177 * if we need to define options that need more space than MCLBYTES.
6178 */
6179 int
6180 soopt_getm(struct sockopt *sopt, struct mbuf **mp)
6181 {
6182 struct mbuf *m, *m_prev;
6183 int sopt_size = sopt->sopt_valsize;
6184 int how;
6185
6186 if (sopt_size <= 0 || sopt_size > MCLBYTES) {
6187 return EMSGSIZE;
6188 }
6189
6190 how = sopt->sopt_p != kernproc ? M_WAIT : M_DONTWAIT;
6191 MGET(m, how, MT_DATA);
6192 if (m == NULL) {
6193 return ENOBUFS;
6194 }
6195 if (sopt_size > MLEN) {
6196 MCLGET(m, how);
6197 if ((m->m_flags & M_EXT) == 0) {
6198 m_free(m);
6199 return ENOBUFS;
6200 }
6201 m->m_len = min(MCLBYTES, sopt_size);
6202 } else {
6203 m->m_len = min(MLEN, sopt_size);
6204 }
6205 sopt_size -= m->m_len;
6206 *mp = m;
6207 m_prev = m;
6208
6209 while (sopt_size > 0) {
6210 MGET(m, how, MT_DATA);
6211 if (m == NULL) {
6212 m_freem(*mp);
6213 return ENOBUFS;
6214 }
6215 if (sopt_size > MLEN) {
6216 MCLGET(m, how);
6217 if ((m->m_flags & M_EXT) == 0) {
6218 m_freem(*mp);
6219 m_freem(m);
6220 return ENOBUFS;
6221 }
6222 m->m_len = min(MCLBYTES, sopt_size);
6223 } else {
6224 m->m_len = min(MLEN, sopt_size);
6225 }
6226 sopt_size -= m->m_len;
6227 m_prev->m_next = m;
6228 m_prev = m;
6229 }
6230 return 0;
6231 }
6232
6233 /* copyin sopt data into mbuf chain */
6234 int
6235 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
6236 {
6237 struct mbuf *m0 = m;
6238
6239 if (sopt->sopt_val == USER_ADDR_NULL) {
6240 return 0;
6241 }
6242 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
6243 if (sopt->sopt_p != kernproc) {
6244 int error;
6245
6246 error = copyin(sopt->sopt_val, mtod(m, char *),
6247 m->m_len);
6248 if (error != 0) {
6249 m_freem(m0);
6250 return error;
6251 }
6252 } else {
6253 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val),
6254 mtod(m, char *), m->m_len);
6255 }
6256 sopt->sopt_valsize -= m->m_len;
6257 sopt->sopt_val += m->m_len;
6258 m = m->m_next;
6259 }
6260 /* should be allocated enoughly at ip6_sooptmcopyin() */
6261 if (m != NULL) {
6262 panic("soopt_mcopyin");
6263 /* NOTREACHED */
6264 }
6265 return 0;
6266 }
6267
6268 /* copyout mbuf chain data into soopt */
6269 int
6270 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
6271 {
6272 struct mbuf *m0 = m;
6273 size_t valsize = 0;
6274
6275 if (sopt->sopt_val == USER_ADDR_NULL) {
6276 return 0;
6277 }
6278 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
6279 if (sopt->sopt_p != kernproc) {
6280 int error;
6281
6282 error = copyout(mtod(m, char *), sopt->sopt_val,
6283 m->m_len);
6284 if (error != 0) {
6285 m_freem(m0);
6286 return error;
6287 }
6288 } else {
6289 bcopy(mtod(m, char *),
6290 CAST_DOWN(caddr_t, sopt->sopt_val), m->m_len);
6291 }
6292 sopt->sopt_valsize -= m->m_len;
6293 sopt->sopt_val += m->m_len;
6294 valsize += m->m_len;
6295 m = m->m_next;
6296 }
6297 if (m != NULL) {
6298 /* enough soopt buffer should be given from user-land */
6299 m_freem(m0);
6300 return EINVAL;
6301 }
6302 sopt->sopt_valsize = valsize;
6303 return 0;
6304 }
6305
6306 void
6307 sohasoutofband(struct socket *so)
6308 {
6309 if (so->so_pgid < 0) {
6310 gsignal(-so->so_pgid, SIGURG);
6311 } else if (so->so_pgid > 0) {
6312 proc_signal(so->so_pgid, SIGURG);
6313 }
6314 selwakeup(&so->so_rcv.sb_sel);
6315 if (so->so_rcv.sb_flags & SB_KNOTE) {
6316 KNOTE(&so->so_rcv.sb_sel.si_note,
6317 (NOTE_OOB | SO_FILT_HINT_LOCKED));
6318 }
6319 }
6320
6321 int
6322 sopoll(struct socket *so, int events, kauth_cred_t cred, void * wql)
6323 {
6324 #pragma unused(cred)
6325 struct proc *p = current_proc();
6326 int revents = 0;
6327
6328 socket_lock(so, 1);
6329 so_update_last_owner_locked(so, PROC_NULL);
6330 so_update_policy(so);
6331
6332 if (events & (POLLIN | POLLRDNORM)) {
6333 if (soreadable(so)) {
6334 revents |= events & (POLLIN | POLLRDNORM);
6335 }
6336 }
6337
6338 if (events & (POLLOUT | POLLWRNORM)) {
6339 if (sowriteable(so)) {
6340 revents |= events & (POLLOUT | POLLWRNORM);
6341 }
6342 }
6343
6344 if (events & (POLLPRI | POLLRDBAND)) {
6345 if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) {
6346 revents |= events & (POLLPRI | POLLRDBAND);
6347 }
6348 }
6349
6350 if (revents == 0) {
6351 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
6352 /*
6353 * Darwin sets the flag first,
6354 * BSD calls selrecord first
6355 */
6356 so->so_rcv.sb_flags |= SB_SEL;
6357 selrecord(p, &so->so_rcv.sb_sel, wql);
6358 }
6359
6360 if (events & (POLLOUT | POLLWRNORM)) {
6361 /*
6362 * Darwin sets the flag first,
6363 * BSD calls selrecord first
6364 */
6365 so->so_snd.sb_flags |= SB_SEL;
6366 selrecord(p, &so->so_snd.sb_sel, wql);
6367 }
6368 }
6369
6370 socket_unlock(so, 1);
6371 return revents;
6372 }
6373
6374 int
6375 soo_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
6376 {
6377 struct socket *so = (struct socket *)fp->fp_glob->fg_data;
6378 int result;
6379
6380 socket_lock(so, 1);
6381 so_update_last_owner_locked(so, PROC_NULL);
6382 so_update_policy(so);
6383
6384 switch (kn->kn_filter) {
6385 case EVFILT_READ:
6386 kn->kn_filtid = EVFILTID_SOREAD;
6387 break;
6388 case EVFILT_WRITE:
6389 kn->kn_filtid = EVFILTID_SOWRITE;
6390 break;
6391 case EVFILT_SOCK:
6392 kn->kn_filtid = EVFILTID_SCK;
6393 break;
6394 case EVFILT_EXCEPT:
6395 kn->kn_filtid = EVFILTID_SOEXCEPT;
6396 break;
6397 default:
6398 socket_unlock(so, 1);
6399 knote_set_error(kn, EINVAL);
6400 return 0;
6401 }
6402
6403 /*
6404 * call the appropriate sub-filter attach
6405 * with the socket still locked
6406 */
6407 result = knote_fops(kn)->f_attach(kn, kev);
6408
6409 socket_unlock(so, 1);
6410
6411 return result;
6412 }
6413
6414 static int
6415 filt_soread_common(struct knote *kn, struct kevent_qos_s *kev, struct socket *so)
6416 {
6417 int retval = 0;
6418 int64_t data = 0;
6419
6420 if (so->so_options & SO_ACCEPTCONN) {
6421 /*
6422 * Radar 6615193 handle the listen case dynamically
6423 * for kqueue read filter. This allows to call listen()
6424 * after registering the kqueue EVFILT_READ.
6425 */
6426
6427 retval = !TAILQ_EMPTY(&so->so_comp);
6428 data = so->so_qlen;
6429 goto out;
6430 }
6431
6432 /* socket isn't a listener */
6433 /*
6434 * NOTE_LOWAT specifies new low water mark in data, i.e.
6435 * the bytes of protocol data. We therefore exclude any
6436 * control bytes.
6437 */
6438 data = so->so_rcv.sb_cc - so->so_rcv.sb_ctl;
6439
6440 if (kn->kn_sfflags & NOTE_OOB) {
6441 if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) {
6442 kn->kn_fflags |= NOTE_OOB;
6443 data -= so->so_oobmark;
6444 retval = 1;
6445 goto out;
6446 }
6447 }
6448
6449 if ((so->so_state & SS_CANTRCVMORE)
6450 #if CONTENT_FILTER
6451 && cfil_sock_data_pending(&so->so_rcv) == 0
6452 #endif /* CONTENT_FILTER */
6453 ) {
6454 kn->kn_flags |= EV_EOF;
6455 kn->kn_fflags = so->so_error;
6456 retval = 1;
6457 goto out;
6458 }
6459
6460 if (so->so_error) { /* temporary udp error */
6461 retval = 1;
6462 goto out;
6463 }
6464
6465 int64_t lowwat = so->so_rcv.sb_lowat;
6466 /*
6467 * Ensure that when NOTE_LOWAT is used, the derived
6468 * low water mark is bounded by socket's rcv buf's
6469 * high and low water mark values.
6470 */
6471 if (kn->kn_sfflags & NOTE_LOWAT) {
6472 if (kn->kn_sdata > so->so_rcv.sb_hiwat) {
6473 lowwat = so->so_rcv.sb_hiwat;
6474 } else if (kn->kn_sdata > lowwat) {
6475 lowwat = kn->kn_sdata;
6476 }
6477 }
6478
6479 /*
6480 * While the `data` field is the amount of data to read,
6481 * 0-sized packets need to wake up the kqueue, see 58140856,
6482 * so we need to take control bytes into account too.
6483 */
6484 retval = (so->so_rcv.sb_cc >= lowwat);
6485
6486 out:
6487 if (retval && kev) {
6488 knote_fill_kevent(kn, kev, data);
6489 }
6490 return retval;
6491 }
6492
6493 static int
6494 filt_sorattach(struct knote *kn, __unused struct kevent_qos_s *kev)
6495 {
6496 struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data;
6497
6498 /* socket locked */
6499
6500 /*
6501 * If the caller explicitly asked for OOB results (e.g. poll())
6502 * from EVFILT_READ, then save that off in the hookid field
6503 * and reserve the kn_flags EV_OOBAND bit for output only.
6504 */
6505 if (kn->kn_filter == EVFILT_READ &&
6506 kn->kn_flags & EV_OOBAND) {
6507 kn->kn_flags &= ~EV_OOBAND;
6508 kn->kn_hook32 = EV_OOBAND;
6509 } else {
6510 kn->kn_hook32 = 0;
6511 }
6512 if (KNOTE_ATTACH(&so->so_rcv.sb_sel.si_note, kn)) {
6513 so->so_rcv.sb_flags |= SB_KNOTE;
6514 }
6515
6516 /* indicate if event is already fired */
6517 return filt_soread_common(kn, NULL, so);
6518 }
6519
6520 static void
6521 filt_sordetach(struct knote *kn)
6522 {
6523 struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data;
6524
6525 socket_lock(so, 1);
6526 if (so->so_rcv.sb_flags & SB_KNOTE) {
6527 if (KNOTE_DETACH(&so->so_rcv.sb_sel.si_note, kn)) {
6528 so->so_rcv.sb_flags &= ~SB_KNOTE;
6529 }
6530 }
6531 socket_unlock(so, 1);
6532 }
6533
6534 /*ARGSUSED*/
6535 static int
6536 filt_soread(struct knote *kn, long hint)
6537 {
6538 struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data;
6539 int retval;
6540
6541 if ((hint & SO_FILT_HINT_LOCKED) == 0) {
6542 socket_lock(so, 1);
6543 }
6544
6545 retval = filt_soread_common(kn, NULL, so);
6546
6547 if ((hint & SO_FILT_HINT_LOCKED) == 0) {
6548 socket_unlock(so, 1);
6549 }
6550
6551 return retval;
6552 }
6553
6554 static int
6555 filt_sortouch(struct knote *kn, struct kevent_qos_s *kev)
6556 {
6557 struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data;
6558 int retval;
6559
6560 socket_lock(so, 1);
6561
6562 /* save off the new input fflags and data */
6563 kn->kn_sfflags = kev->fflags;
6564 kn->kn_sdata = kev->data;
6565
6566 /* determine if changes result in fired events */
6567 retval = filt_soread_common(kn, NULL, so);
6568
6569 socket_unlock(so, 1);
6570
6571 return retval;
6572 }
6573
6574 static int
6575 filt_sorprocess(struct knote *kn, struct kevent_qos_s *kev)
6576 {
6577 struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data;
6578 int retval;
6579
6580 socket_lock(so, 1);
6581 retval = filt_soread_common(kn, kev, so);
6582 socket_unlock(so, 1);
6583
6584 return retval;
6585 }
6586
6587 int
6588 so_wait_for_if_feedback(struct socket *so)
6589 {
6590 if ((SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) &&
6591 (so->so_state & SS_ISCONNECTED)) {
6592 struct inpcb *inp = sotoinpcb(so);
6593 if (INP_WAIT_FOR_IF_FEEDBACK(inp)) {
6594 return 1;
6595 }
6596 }
6597 return 0;
6598 }
6599
6600 static int
6601 filt_sowrite_common(struct knote *kn, struct kevent_qos_s *kev, struct socket *so)
6602 {
6603 int ret = 0;
6604 int64_t data = sbspace(&so->so_snd);
6605
6606 if (so->so_state & SS_CANTSENDMORE) {
6607 kn->kn_flags |= EV_EOF;
6608 kn->kn_fflags = so->so_error;
6609 ret = 1;
6610 goto out;
6611 }
6612
6613 if (so->so_error) { /* temporary udp error */
6614 ret = 1;
6615 goto out;
6616 }
6617
6618 if (!socanwrite(so)) {
6619 ret = 0;
6620 goto out;
6621 }
6622
6623 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
6624 ret = 1;
6625 goto out;
6626 }
6627
6628 int64_t lowwat = so->so_snd.sb_lowat;
6629
6630 if (kn->kn_sfflags & NOTE_LOWAT) {
6631 if (kn->kn_sdata > so->so_snd.sb_hiwat) {
6632 lowwat = so->so_snd.sb_hiwat;
6633 } else if (kn->kn_sdata > lowwat) {
6634 lowwat = kn->kn_sdata;
6635 }
6636 }
6637
6638 if (data >= lowwat) {
6639 if ((so->so_flags & SOF_NOTSENT_LOWAT)
6640 #if (DEBUG || DEVELOPMENT)
6641 && so_notsent_lowat_check == 1
6642 #endif /* DEBUG || DEVELOPMENT */
6643 ) {
6644 if ((SOCK_DOM(so) == PF_INET ||
6645 SOCK_DOM(so) == PF_INET6) &&
6646 so->so_type == SOCK_STREAM) {
6647 ret = tcp_notsent_lowat_check(so);
6648 }
6649 #if MPTCP
6650 else if ((SOCK_DOM(so) == PF_MULTIPATH) &&
6651 (SOCK_PROTO(so) == IPPROTO_TCP)) {
6652 ret = mptcp_notsent_lowat_check(so);
6653 }
6654 #endif
6655 else {
6656 ret = 1;
6657 goto out;
6658 }
6659 } else {
6660 ret = 1;
6661 }
6662 }
6663 if (so_wait_for_if_feedback(so)) {
6664 ret = 0;
6665 }
6666
6667 out:
6668 if (ret && kev) {
6669 knote_fill_kevent(kn, kev, data);
6670 }
6671 return ret;
6672 }
6673
6674 static int
6675 filt_sowattach(struct knote *kn, __unused struct kevent_qos_s *kev)
6676 {
6677 struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data;
6678
6679 /* socket locked */
6680 if (KNOTE_ATTACH(&so->so_snd.sb_sel.si_note, kn)) {
6681 so->so_snd.sb_flags |= SB_KNOTE;
6682 }
6683
6684 /* determine if its already fired */
6685 return filt_sowrite_common(kn, NULL, so);
6686 }
6687
6688 static void
6689 filt_sowdetach(struct knote *kn)
6690 {
6691 struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data;
6692 socket_lock(so, 1);
6693
6694 if (so->so_snd.sb_flags & SB_KNOTE) {
6695 if (KNOTE_DETACH(&so->so_snd.sb_sel.si_note, kn)) {
6696 so->so_snd.sb_flags &= ~SB_KNOTE;
6697 }
6698 }
6699 socket_unlock(so, 1);
6700 }
6701
6702 /*ARGSUSED*/
6703 static int
6704 filt_sowrite(struct knote *kn, long hint)
6705 {
6706 struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data;
6707 int ret;
6708
6709 if ((hint & SO_FILT_HINT_LOCKED) == 0) {
6710 socket_lock(so, 1);
6711 }
6712
6713 ret = filt_sowrite_common(kn, NULL, so);
6714
6715 if ((hint & SO_FILT_HINT_LOCKED) == 0) {
6716 socket_unlock(so, 1);
6717 }
6718
6719 return ret;
6720 }
6721
6722 static int
6723 filt_sowtouch(struct knote *kn, struct kevent_qos_s *kev)
6724 {
6725 struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data;
6726 int ret;
6727
6728 socket_lock(so, 1);
6729
6730 /*save off the new input fflags and data */
6731 kn->kn_sfflags = kev->fflags;
6732 kn->kn_sdata = kev->data;
6733
6734 /* determine if these changes result in a triggered event */
6735 ret = filt_sowrite_common(kn, NULL, so);
6736
6737 socket_unlock(so, 1);
6738
6739 return ret;
6740 }
6741
6742 static int
6743 filt_sowprocess(struct knote *kn, struct kevent_qos_s *kev)
6744 {
6745 struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data;
6746 int ret;
6747
6748 socket_lock(so, 1);
6749 ret = filt_sowrite_common(kn, kev, so);
6750 socket_unlock(so, 1);
6751
6752 return ret;
6753 }
6754
6755 static int
6756 filt_sockev_common(struct knote *kn, struct kevent_qos_s *kev,
6757 struct socket *so, long ev_hint)
6758 {
6759 int ret = 0;
6760 int64_t data = 0;
6761 uint32_t level_trigger = 0;
6762
6763 if (ev_hint & SO_FILT_HINT_CONNRESET) {
6764 kn->kn_fflags |= NOTE_CONNRESET;
6765 }
6766 if (ev_hint & SO_FILT_HINT_TIMEOUT) {
6767 kn->kn_fflags |= NOTE_TIMEOUT;
6768 }
6769 if (ev_hint & SO_FILT_HINT_NOSRCADDR) {
6770 kn->kn_fflags |= NOTE_NOSRCADDR;
6771 }
6772 if (ev_hint & SO_FILT_HINT_IFDENIED) {
6773 kn->kn_fflags |= NOTE_IFDENIED;
6774 }
6775 if (ev_hint & SO_FILT_HINT_KEEPALIVE) {
6776 kn->kn_fflags |= NOTE_KEEPALIVE;
6777 }
6778 if (ev_hint & SO_FILT_HINT_ADAPTIVE_WTIMO) {
6779 kn->kn_fflags |= NOTE_ADAPTIVE_WTIMO;
6780 }
6781 if (ev_hint & SO_FILT_HINT_ADAPTIVE_RTIMO) {
6782 kn->kn_fflags |= NOTE_ADAPTIVE_RTIMO;
6783 }
6784 if ((ev_hint & SO_FILT_HINT_CONNECTED) ||
6785 (so->so_state & SS_ISCONNECTED)) {
6786 kn->kn_fflags |= NOTE_CONNECTED;
6787 level_trigger |= NOTE_CONNECTED;
6788 }
6789 if ((ev_hint & SO_FILT_HINT_DISCONNECTED) ||
6790 (so->so_state & SS_ISDISCONNECTED)) {
6791 kn->kn_fflags |= NOTE_DISCONNECTED;
6792 level_trigger |= NOTE_DISCONNECTED;
6793 }
6794 if (ev_hint & SO_FILT_HINT_CONNINFO_UPDATED) {
6795 if (so->so_proto != NULL &&
6796 (so->so_proto->pr_flags & PR_EVCONNINFO)) {
6797 kn->kn_fflags |= NOTE_CONNINFO_UPDATED;
6798 }
6799 }
6800
6801 if ((ev_hint & SO_FILT_HINT_NOTIFY_ACK) ||
6802 tcp_notify_ack_active(so)) {
6803 kn->kn_fflags |= NOTE_NOTIFY_ACK;
6804 }
6805
6806 if ((so->so_state & SS_CANTRCVMORE)
6807 #if CONTENT_FILTER
6808 && cfil_sock_data_pending(&so->so_rcv) == 0
6809 #endif /* CONTENT_FILTER */
6810 ) {
6811 kn->kn_fflags |= NOTE_READCLOSED;
6812 level_trigger |= NOTE_READCLOSED;
6813 }
6814
6815 if (so->so_state & SS_CANTSENDMORE) {
6816 kn->kn_fflags |= NOTE_WRITECLOSED;
6817 level_trigger |= NOTE_WRITECLOSED;
6818 }
6819
6820 if ((ev_hint & SO_FILT_HINT_SUSPEND) ||
6821 (so->so_flags & SOF_SUSPENDED)) {
6822 kn->kn_fflags &= ~(NOTE_SUSPEND | NOTE_RESUME);
6823
6824 /* If resume event was delivered before, reset it */
6825 kn->kn_hook32 &= ~NOTE_RESUME;
6826
6827 kn->kn_fflags |= NOTE_SUSPEND;
6828 level_trigger |= NOTE_SUSPEND;
6829 }
6830
6831 if ((ev_hint & SO_FILT_HINT_RESUME) ||
6832 (so->so_flags & SOF_SUSPENDED) == 0) {
6833 kn->kn_fflags &= ~(NOTE_SUSPEND | NOTE_RESUME);
6834
6835 /* If suspend event was delivered before, reset it */
6836 kn->kn_hook32 &= ~NOTE_SUSPEND;
6837
6838 kn->kn_fflags |= NOTE_RESUME;
6839 level_trigger |= NOTE_RESUME;
6840 }
6841
6842 if (so->so_error != 0) {
6843 ret = 1;
6844 data = so->so_error;
6845 kn->kn_flags |= EV_EOF;
6846 } else {
6847 u_int32_t data32 = 0;
6848 get_sockev_state(so, &data32);
6849 data = data32;
6850 }
6851
6852 /* Reset any events that are not requested on this knote */
6853 kn->kn_fflags &= (kn->kn_sfflags & EVFILT_SOCK_ALL_MASK);
6854 level_trigger &= (kn->kn_sfflags & EVFILT_SOCK_ALL_MASK);
6855
6856 /* Find the level triggerred events that are already delivered */
6857 level_trigger &= kn->kn_hook32;
6858 level_trigger &= EVFILT_SOCK_LEVEL_TRIGGER_MASK;
6859
6860 /* Do not deliver level triggerred events more than once */
6861 if ((kn->kn_fflags & ~level_trigger) != 0) {
6862 ret = 1;
6863 }
6864
6865 if (ret && kev) {
6866 /*
6867 * Store the state of the events being delivered. This
6868 * state can be used to deliver level triggered events
6869 * ateast once and still avoid waking up the application
6870 * multiple times as long as the event is active.
6871 */
6872 if (kn->kn_fflags != 0) {
6873 kn->kn_hook32 |= (kn->kn_fflags &
6874 EVFILT_SOCK_LEVEL_TRIGGER_MASK);
6875 }
6876
6877 /*
6878 * NOTE_RESUME and NOTE_SUSPEND are an exception, deliver
6879 * only one of them and remember the last one that was
6880 * delivered last
6881 */
6882 if (kn->kn_fflags & NOTE_SUSPEND) {
6883 kn->kn_hook32 &= ~NOTE_RESUME;
6884 }
6885 if (kn->kn_fflags & NOTE_RESUME) {
6886 kn->kn_hook32 &= ~NOTE_SUSPEND;
6887 }
6888
6889 knote_fill_kevent(kn, kev, data);
6890 }
6891 return ret;
6892 }
6893
6894 static int
6895 filt_sockattach(struct knote *kn, __unused struct kevent_qos_s *kev)
6896 {
6897 struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data;
6898
6899 /* socket locked */
6900 kn->kn_hook32 = 0;
6901 if (KNOTE_ATTACH(&so->so_klist, kn)) {
6902 so->so_flags |= SOF_KNOTE;
6903 }
6904
6905 /* determine if event already fired */
6906 return filt_sockev_common(kn, NULL, so, 0);
6907 }
6908
6909 static void
6910 filt_sockdetach(struct knote *kn)
6911 {
6912 struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data;
6913 socket_lock(so, 1);
6914
6915 if ((so->so_flags & SOF_KNOTE) != 0) {
6916 if (KNOTE_DETACH(&so->so_klist, kn)) {
6917 so->so_flags &= ~SOF_KNOTE;
6918 }
6919 }
6920 socket_unlock(so, 1);
6921 }
6922
6923 static int
6924 filt_sockev(struct knote *kn, long hint)
6925 {
6926 int ret = 0, locked = 0;
6927 struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data;
6928 long ev_hint = (hint & SO_FILT_HINT_EV);
6929
6930 if ((hint & SO_FILT_HINT_LOCKED) == 0) {
6931 socket_lock(so, 1);
6932 locked = 1;
6933 }
6934
6935 ret = filt_sockev_common(kn, NULL, so, ev_hint);
6936
6937 if (locked) {
6938 socket_unlock(so, 1);
6939 }
6940
6941 return ret;
6942 }
6943
6944
6945
6946 /*
6947 * filt_socktouch - update event state
6948 */
6949 static int
6950 filt_socktouch(
6951 struct knote *kn,
6952 struct kevent_qos_s *kev)
6953 {
6954 struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data;
6955 uint32_t changed_flags;
6956 int ret;
6957
6958 socket_lock(so, 1);
6959
6960 /* save off the [result] data and fflags */
6961 changed_flags = (kn->kn_sfflags ^ kn->kn_hook32);
6962
6963 /* save off the new input fflags and data */
6964 kn->kn_sfflags = kev->fflags;
6965 kn->kn_sdata = kev->data;
6966
6967 /* restrict the current results to the (smaller?) set of new interest */
6968 /*
6969 * For compatibility with previous implementations, we leave kn_fflags
6970 * as they were before.
6971 */
6972 //kn->kn_fflags &= kev->fflags;
6973
6974 /*
6975 * Since we keep track of events that are already
6976 * delivered, if any of those events are not requested
6977 * anymore the state related to them can be reset
6978 */
6979 kn->kn_hook32 &= ~(changed_flags & EVFILT_SOCK_LEVEL_TRIGGER_MASK);
6980
6981 /* determine if we have events to deliver */
6982 ret = filt_sockev_common(kn, NULL, so, 0);
6983
6984 socket_unlock(so, 1);
6985
6986 return ret;
6987 }
6988
6989 /*
6990 * filt_sockprocess - query event fired state and return data
6991 */
6992 static int
6993 filt_sockprocess(struct knote *kn, struct kevent_qos_s *kev)
6994 {
6995 struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data;
6996 int ret = 0;
6997
6998 socket_lock(so, 1);
6999
7000 ret = filt_sockev_common(kn, kev, so, 0);
7001
7002 socket_unlock(so, 1);
7003
7004 return ret;
7005 }
7006
7007 void
7008 get_sockev_state(struct socket *so, u_int32_t *statep)
7009 {
7010 u_int32_t state = *(statep);
7011
7012 /*
7013 * If the state variable is already used by a previous event,
7014 * reset it.
7015 */
7016 if (state != 0) {
7017 return;
7018 }
7019
7020 if (so->so_state & SS_ISCONNECTED) {
7021 state |= SOCKEV_CONNECTED;
7022 } else {
7023 state &= ~(SOCKEV_CONNECTED);
7024 }
7025 state |= ((so->so_state & SS_ISDISCONNECTED) ? SOCKEV_DISCONNECTED : 0);
7026 *(statep) = state;
7027 }
7028
7029 #define SO_LOCK_HISTORY_STR_LEN \
7030 (2 * SO_LCKDBG_MAX * (2 + (2 * sizeof (void *)) + 1) + 1)
7031
7032 __private_extern__ const char *
7033 solockhistory_nr(struct socket *so)
7034 {
7035 size_t n = 0;
7036 int i;
7037 static char lock_history_str[SO_LOCK_HISTORY_STR_LEN];
7038
7039 bzero(lock_history_str, sizeof(lock_history_str));
7040 for (i = SO_LCKDBG_MAX - 1; i >= 0; i--) {
7041 n += scnprintf(lock_history_str + n,
7042 SO_LOCK_HISTORY_STR_LEN - n, "%p:%p ",
7043 so->lock_lr[(so->next_lock_lr + i) % SO_LCKDBG_MAX],
7044 so->unlock_lr[(so->next_unlock_lr + i) % SO_LCKDBG_MAX]);
7045 }
7046 return lock_history_str;
7047 }
7048
7049 lck_mtx_t *
7050 socket_getlock(struct socket *so, int flags)
7051 {
7052 if (so->so_proto->pr_getlock != NULL) {
7053 return (*so->so_proto->pr_getlock)(so, flags);
7054 } else {
7055 return so->so_proto->pr_domain->dom_mtx;
7056 }
7057 }
7058
7059 void
7060 socket_lock(struct socket *so, int refcount)
7061 {
7062 void *lr_saved;
7063
7064 lr_saved = __builtin_return_address(0);
7065
7066 if (so->so_proto->pr_lock) {
7067 (*so->so_proto->pr_lock)(so, refcount, lr_saved);
7068 } else {
7069 #ifdef MORE_LOCKING_DEBUG
7070 LCK_MTX_ASSERT(so->so_proto->pr_domain->dom_mtx,
7071 LCK_MTX_ASSERT_NOTOWNED);
7072 #endif
7073 lck_mtx_lock(so->so_proto->pr_domain->dom_mtx);
7074 if (refcount) {
7075 so->so_usecount++;
7076 }
7077 so->lock_lr[so->next_lock_lr] = lr_saved;
7078 so->next_lock_lr = (so->next_lock_lr + 1) % SO_LCKDBG_MAX;
7079 }
7080 }
7081
7082 void
7083 socket_lock_assert_owned(struct socket *so)
7084 {
7085 lck_mtx_t *mutex_held;
7086
7087 if (so->so_proto->pr_getlock != NULL) {
7088 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
7089 } else {
7090 mutex_held = so->so_proto->pr_domain->dom_mtx;
7091 }
7092
7093 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
7094 }
7095
7096 int
7097 socket_try_lock(struct socket *so)
7098 {
7099 lck_mtx_t *mtx;
7100
7101 if (so->so_proto->pr_getlock != NULL) {
7102 mtx = (*so->so_proto->pr_getlock)(so, 0);
7103 } else {
7104 mtx = so->so_proto->pr_domain->dom_mtx;
7105 }
7106
7107 return lck_mtx_try_lock(mtx);
7108 }
7109
7110 void
7111 socket_unlock(struct socket *so, int refcount)
7112 {
7113 void *lr_saved;
7114 lck_mtx_t *mutex_held;
7115
7116 lr_saved = __builtin_return_address(0);
7117
7118 if (so == NULL || so->so_proto == NULL) {
7119 panic("%s: null so_proto so=%p\n", __func__, so);
7120 /* NOTREACHED */
7121 }
7122
7123 if (so->so_proto->pr_unlock) {
7124 (*so->so_proto->pr_unlock)(so, refcount, lr_saved);
7125 } else {
7126 mutex_held = so->so_proto->pr_domain->dom_mtx;
7127 #ifdef MORE_LOCKING_DEBUG
7128 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
7129 #endif
7130 so->unlock_lr[so->next_unlock_lr] = lr_saved;
7131 so->next_unlock_lr = (so->next_unlock_lr + 1) % SO_LCKDBG_MAX;
7132
7133 if (refcount) {
7134 if (so->so_usecount <= 0) {
7135 panic("%s: bad refcount=%d so=%p (%d, %d, %d) "
7136 "lrh=%s", __func__, so->so_usecount, so,
7137 SOCK_DOM(so), so->so_type,
7138 SOCK_PROTO(so), solockhistory_nr(so));
7139 /* NOTREACHED */
7140 }
7141
7142 so->so_usecount--;
7143 if (so->so_usecount == 0) {
7144 sofreelastref(so, 1);
7145 }
7146 }
7147 lck_mtx_unlock(mutex_held);
7148 }
7149 }
7150
7151 /* Called with socket locked, will unlock socket */
7152 void
7153 sofree(struct socket *so)
7154 {
7155 lck_mtx_t *mutex_held;
7156
7157 if (so->so_proto->pr_getlock != NULL) {
7158 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
7159 } else {
7160 mutex_held = so->so_proto->pr_domain->dom_mtx;
7161 }
7162 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
7163
7164 sofreelastref(so, 0);
7165 }
7166
7167 void
7168 soreference(struct socket *so)
7169 {
7170 socket_lock(so, 1); /* locks & take one reference on socket */
7171 socket_unlock(so, 0); /* unlock only */
7172 }
7173
7174 void
7175 sodereference(struct socket *so)
7176 {
7177 socket_lock(so, 0);
7178 socket_unlock(so, 1);
7179 }
7180
7181 /*
7182 * Set or clear SOF_MULTIPAGES on the socket to enable or disable the
7183 * possibility of using jumbo clusters. Caller must ensure to hold
7184 * the socket lock.
7185 */
7186 void
7187 somultipages(struct socket *so, boolean_t set)
7188 {
7189 if (set) {
7190 so->so_flags |= SOF_MULTIPAGES;
7191 } else {
7192 so->so_flags &= ~SOF_MULTIPAGES;
7193 }
7194 }
7195
7196 void
7197 soif2kcl(struct socket *so, boolean_t set)
7198 {
7199 if (set) {
7200 so->so_flags1 |= SOF1_IF_2KCL;
7201 } else {
7202 so->so_flags1 &= ~SOF1_IF_2KCL;
7203 }
7204 }
7205
7206 int
7207 so_isdstlocal(struct socket *so)
7208 {
7209 struct inpcb *inp = (struct inpcb *)so->so_pcb;
7210
7211 if (SOCK_DOM(so) == PF_INET) {
7212 return inaddr_local(inp->inp_faddr);
7213 } else if (SOCK_DOM(so) == PF_INET6) {
7214 return in6addr_local(&inp->in6p_faddr);
7215 }
7216
7217 return 0;
7218 }
7219
7220 int
7221 sosetdefunct(struct proc *p, struct socket *so, int level, boolean_t noforce)
7222 {
7223 struct sockbuf *rcv, *snd;
7224 int err = 0, defunct;
7225
7226 rcv = &so->so_rcv;
7227 snd = &so->so_snd;
7228
7229 defunct = (so->so_flags & SOF_DEFUNCT);
7230 if (defunct) {
7231 if (!(snd->sb_flags & rcv->sb_flags & SB_DROP)) {
7232 panic("%s: SB_DROP not set", __func__);
7233 /* NOTREACHED */
7234 }
7235 goto done;
7236 }
7237
7238 if (so->so_flags & SOF_NODEFUNCT) {
7239 if (noforce) {
7240 err = EOPNOTSUPP;
7241 if (p != PROC_NULL) {
7242 SODEFUNCTLOG("%s[%d, %s]: (target pid %d "
7243 "name %s level %d) so 0x%llx [%d,%d] "
7244 "is not eligible for defunct "
7245 "(%d)\n", __func__, proc_selfpid(),
7246 proc_best_name(current_proc()), proc_pid(p),
7247 proc_best_name(p), level,
7248 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
7249 SOCK_DOM(so), SOCK_TYPE(so), err);
7250 }
7251 return err;
7252 }
7253 so->so_flags &= ~SOF_NODEFUNCT;
7254 if (p != PROC_NULL) {
7255 SODEFUNCTLOG("%s[%d, %s]: (target pid %d "
7256 "name %s level %d) so 0x%llx [%d,%d] "
7257 "defunct by force "
7258 "(%d)\n", __func__, proc_selfpid(),
7259 proc_best_name(current_proc()), proc_pid(p),
7260 proc_best_name(p), level,
7261 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
7262 SOCK_DOM(so), SOCK_TYPE(so), err);
7263 }
7264 } else if (so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) {
7265 struct inpcb *inp = (struct inpcb *)so->so_pcb;
7266 struct ifnet *ifp = inp->inp_last_outifp;
7267
7268 if (ifp && IFNET_IS_CELLULAR(ifp)) {
7269 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_nocell);
7270 } else if (so->so_flags & SOF_DELEGATED) {
7271 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_nodlgtd);
7272 } else if (soextbkidlestat.so_xbkidle_time == 0) {
7273 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_notime);
7274 } else if (noforce && p != PROC_NULL) {
7275 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_active);
7276
7277 so->so_flags1 |= SOF1_EXTEND_BK_IDLE_INPROG;
7278 so->so_extended_bk_start = net_uptime();
7279 OSBitOrAtomic(P_LXBKIDLEINPROG, &p->p_ladvflag);
7280
7281 inpcb_timer_sched(inp->inp_pcbinfo, INPCB_TIMER_LAZY);
7282
7283 err = EOPNOTSUPP;
7284 SODEFUNCTLOG("%s[%d, %s]: (target pid %d "
7285 "name %s level %d) so 0x%llx [%d,%d] "
7286 "extend bk idle "
7287 "(%d)\n", __func__, proc_selfpid(),
7288 proc_best_name(current_proc()), proc_pid(p),
7289 proc_best_name(p), level,
7290 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
7291 SOCK_DOM(so), SOCK_TYPE(so), err);
7292 return err;
7293 } else {
7294 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_forced);
7295 }
7296 }
7297
7298 so->so_flags |= SOF_DEFUNCT;
7299
7300 /* Prevent further data from being appended to the socket buffers */
7301 snd->sb_flags |= SB_DROP;
7302 rcv->sb_flags |= SB_DROP;
7303
7304 /* Flush any existing data in the socket buffers */
7305 if (rcv->sb_cc != 0) {
7306 rcv->sb_flags &= ~SB_SEL;
7307 selthreadclear(&rcv->sb_sel);
7308 sbrelease(rcv);
7309 }
7310 if (snd->sb_cc != 0) {
7311 snd->sb_flags &= ~SB_SEL;
7312 selthreadclear(&snd->sb_sel);
7313 sbrelease(snd);
7314 }
7315
7316 done:
7317 if (p != PROC_NULL) {
7318 SODEFUNCTLOG("%s[%d, %s]: (target pid %d name %s level %d) "
7319 "so 0x%llx [%d,%d] %s defunct%s\n", __func__,
7320 proc_selfpid(), proc_best_name(current_proc()),
7321 proc_pid(p), proc_best_name(p), level,
7322 (uint64_t)DEBUG_KERNEL_ADDRPERM(so), SOCK_DOM(so),
7323 SOCK_TYPE(so), defunct ? "is already" : "marked as",
7324 (so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) ?
7325 " extbkidle" : "");
7326 }
7327 return err;
7328 }
7329
7330 int
7331 sodefunct(struct proc *p, struct socket *so, int level)
7332 {
7333 struct sockbuf *rcv, *snd;
7334
7335 if (!(so->so_flags & SOF_DEFUNCT)) {
7336 panic("%s improperly called", __func__);
7337 /* NOTREACHED */
7338 }
7339 if (so->so_state & SS_DEFUNCT) {
7340 goto done;
7341 }
7342
7343 rcv = &so->so_rcv;
7344 snd = &so->so_snd;
7345
7346 if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
7347 char s[MAX_IPv6_STR_LEN];
7348 char d[MAX_IPv6_STR_LEN];
7349 struct inpcb *inp = sotoinpcb(so);
7350
7351 if (p != PROC_NULL) {
7352 SODEFUNCTLOG(
7353 "%s[%d, %s]: (target pid %d name %s level %d) "
7354 "so 0x%llx [%s %s:%d -> %s:%d] is now defunct "
7355 "[rcv_si 0x%x, snd_si 0x%x, rcv_fl 0x%x, "
7356 " snd_fl 0x%x]\n", __func__,
7357 proc_selfpid(), proc_best_name(current_proc()),
7358 proc_pid(p), proc_best_name(p), level,
7359 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
7360 (SOCK_TYPE(so) == SOCK_STREAM) ? "TCP" : "UDP",
7361 inet_ntop(SOCK_DOM(so), ((SOCK_DOM(so) == PF_INET) ?
7362 (void *)&inp->inp_laddr.s_addr :
7363 (void *)&inp->in6p_laddr),
7364 s, sizeof(s)), ntohs(inp->in6p_lport),
7365 inet_ntop(SOCK_DOM(so), (SOCK_DOM(so) == PF_INET) ?
7366 (void *)&inp->inp_faddr.s_addr :
7367 (void *)&inp->in6p_faddr,
7368 d, sizeof(d)), ntohs(inp->in6p_fport),
7369 (uint32_t)rcv->sb_sel.si_flags,
7370 (uint32_t)snd->sb_sel.si_flags,
7371 rcv->sb_flags, snd->sb_flags);
7372 }
7373 } else if (p != PROC_NULL) {
7374 SODEFUNCTLOG("%s[%d, %s]: (target pid %d name %s level %d) "
7375 "so 0x%llx [%d,%d] is now defunct [rcv_si 0x%x, "
7376 "snd_si 0x%x, rcv_fl 0x%x, snd_fl 0x%x]\n", __func__,
7377 proc_selfpid(), proc_best_name(current_proc()),
7378 proc_pid(p), proc_best_name(p), level,
7379 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
7380 SOCK_DOM(so), SOCK_TYPE(so),
7381 (uint32_t)rcv->sb_sel.si_flags,
7382 (uint32_t)snd->sb_sel.si_flags, rcv->sb_flags,
7383 snd->sb_flags);
7384 }
7385
7386 /*
7387 * Unwedge threads blocked on sbwait() and sb_lock().
7388 */
7389 sbwakeup(rcv);
7390 sbwakeup(snd);
7391
7392 so->so_flags1 |= SOF1_DEFUNCTINPROG;
7393 if (rcv->sb_flags & SB_LOCK) {
7394 sbunlock(rcv, TRUE); /* keep socket locked */
7395 }
7396 if (snd->sb_flags & SB_LOCK) {
7397 sbunlock(snd, TRUE); /* keep socket locked */
7398 }
7399 /*
7400 * Flush the buffers and disconnect. We explicitly call shutdown
7401 * on both data directions to ensure that SS_CANT{RCV,SEND}MORE
7402 * states are set for the socket. This would also flush out data
7403 * hanging off the receive list of this socket.
7404 */
7405 (void) soshutdownlock_final(so, SHUT_RD);
7406 (void) soshutdownlock_final(so, SHUT_WR);
7407 (void) sodisconnectlocked(so);
7408
7409 /*
7410 * Explicitly handle connectionless-protocol disconnection
7411 * and release any remaining data in the socket buffers.
7412 */
7413 if (!(so->so_state & SS_ISDISCONNECTED)) {
7414 (void) soisdisconnected(so);
7415 }
7416
7417 if (so->so_error == 0) {
7418 so->so_error = EBADF;
7419 }
7420
7421 if (rcv->sb_cc != 0) {
7422 rcv->sb_flags &= ~SB_SEL;
7423 selthreadclear(&rcv->sb_sel);
7424 sbrelease(rcv);
7425 }
7426 if (snd->sb_cc != 0) {
7427 snd->sb_flags &= ~SB_SEL;
7428 selthreadclear(&snd->sb_sel);
7429 sbrelease(snd);
7430 }
7431 so->so_state |= SS_DEFUNCT;
7432 OSIncrementAtomicLong((volatile long *)&sodefunct_calls);
7433
7434 done:
7435 return 0;
7436 }
7437
7438 int
7439 soresume(struct proc *p, struct socket *so, int locked)
7440 {
7441 if (locked == 0) {
7442 socket_lock(so, 1);
7443 }
7444
7445 if (so->so_flags1 & SOF1_EXTEND_BK_IDLE_INPROG) {
7446 SODEFUNCTLOG("%s[%d, %s]: (target pid %d name %s) so 0x%llx "
7447 "[%d,%d] resumed from bk idle\n",
7448 __func__, proc_selfpid(), proc_best_name(current_proc()),
7449 proc_pid(p), proc_best_name(p),
7450 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
7451 SOCK_DOM(so), SOCK_TYPE(so));
7452
7453 so->so_flags1 &= ~SOF1_EXTEND_BK_IDLE_INPROG;
7454 so->so_extended_bk_start = 0;
7455 OSBitAndAtomic(~P_LXBKIDLEINPROG, &p->p_ladvflag);
7456
7457 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_resumed);
7458 OSDecrementAtomic(&soextbkidlestat.so_xbkidle_active);
7459 VERIFY(soextbkidlestat.so_xbkidle_active >= 0);
7460 }
7461 if (locked == 0) {
7462 socket_unlock(so, 1);
7463 }
7464
7465 return 0;
7466 }
7467
7468 /*
7469 * Does not attempt to account for sockets that are delegated from
7470 * the current process
7471 */
7472 int
7473 so_set_extended_bk_idle(struct socket *so, int optval)
7474 {
7475 int error = 0;
7476
7477 if ((SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) ||
7478 SOCK_PROTO(so) != IPPROTO_TCP) {
7479 OSDecrementAtomic(&soextbkidlestat.so_xbkidle_notsupp);
7480 error = EOPNOTSUPP;
7481 } else if (optval == 0) {
7482 so->so_flags1 &= ~SOF1_EXTEND_BK_IDLE_WANTED;
7483
7484 soresume(current_proc(), so, 1);
7485 } else {
7486 struct proc *p = current_proc();
7487 struct fileproc *fp;
7488 int count = 0;
7489
7490 /*
7491 * Unlock socket to avoid lock ordering issue with
7492 * the proc fd table lock
7493 */
7494 socket_unlock(so, 0);
7495
7496 proc_fdlock(p);
7497 fdt_foreach(fp, p) {
7498 struct socket *so2;
7499
7500 if (FILEGLOB_DTYPE(fp->fp_glob) != DTYPE_SOCKET) {
7501 continue;
7502 }
7503
7504 so2 = (struct socket *)fp->fp_glob->fg_data;
7505 if (so != so2 &&
7506 so2->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) {
7507 count++;
7508 }
7509 if (count >= soextbkidlestat.so_xbkidle_maxperproc) {
7510 break;
7511 }
7512 }
7513 proc_fdunlock(p);
7514
7515 socket_lock(so, 0);
7516
7517 if (count >= soextbkidlestat.so_xbkidle_maxperproc) {
7518 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_toomany);
7519 error = EBUSY;
7520 } else if (so->so_flags & SOF_DELEGATED) {
7521 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_nodlgtd);
7522 error = EBUSY;
7523 } else {
7524 so->so_flags1 |= SOF1_EXTEND_BK_IDLE_WANTED;
7525 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_wantok);
7526 }
7527 SODEFUNCTLOG("%s[%d, %s]: so 0x%llx [%d,%d] "
7528 "%s marked for extended bk idle\n",
7529 __func__, proc_selfpid(), proc_best_name(current_proc()),
7530 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
7531 SOCK_DOM(so), SOCK_TYPE(so),
7532 (so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) ?
7533 "is" : "not");
7534 }
7535
7536 return error;
7537 }
7538
7539 static void
7540 so_stop_extended_bk_idle(struct socket *so)
7541 {
7542 so->so_flags1 &= ~SOF1_EXTEND_BK_IDLE_INPROG;
7543 so->so_extended_bk_start = 0;
7544
7545 OSDecrementAtomic(&soextbkidlestat.so_xbkidle_active);
7546 VERIFY(soextbkidlestat.so_xbkidle_active >= 0);
7547 /*
7548 * Force defunct
7549 */
7550 sosetdefunct(current_proc(), so,
7551 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL, FALSE);
7552 if (so->so_flags & SOF_DEFUNCT) {
7553 sodefunct(current_proc(), so,
7554 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
7555 }
7556 }
7557
7558 void
7559 so_drain_extended_bk_idle(struct socket *so)
7560 {
7561 if (so && (so->so_flags1 & SOF1_EXTEND_BK_IDLE_INPROG)) {
7562 /*
7563 * Only penalize sockets that have outstanding data
7564 */
7565 if (so->so_rcv.sb_cc || so->so_snd.sb_cc) {
7566 so_stop_extended_bk_idle(so);
7567
7568 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_drained);
7569 }
7570 }
7571 }
7572
7573 /*
7574 * Return values tells if socket is still in extended background idle
7575 */
7576 int
7577 so_check_extended_bk_idle_time(struct socket *so)
7578 {
7579 int ret = 1;
7580
7581 if ((so->so_flags1 & SOF1_EXTEND_BK_IDLE_INPROG)) {
7582 SODEFUNCTLOG("%s[%d, %s]: so 0x%llx [%d,%d]\n",
7583 __func__, proc_selfpid(), proc_best_name(current_proc()),
7584 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
7585 SOCK_DOM(so), SOCK_TYPE(so));
7586 if (net_uptime() - so->so_extended_bk_start >
7587 soextbkidlestat.so_xbkidle_time) {
7588 so_stop_extended_bk_idle(so);
7589
7590 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_expired);
7591
7592 ret = 0;
7593 } else {
7594 struct inpcb *inp = (struct inpcb *)so->so_pcb;
7595
7596 inpcb_timer_sched(inp->inp_pcbinfo, INPCB_TIMER_LAZY);
7597 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_resched);
7598 }
7599 }
7600
7601 return ret;
7602 }
7603
7604 void
7605 resume_proc_sockets(proc_t p)
7606 {
7607 if (p->p_ladvflag & P_LXBKIDLEINPROG) {
7608 struct fileproc *fp;
7609 struct socket *so;
7610
7611 proc_fdlock(p);
7612 fdt_foreach(fp, p) {
7613 if (FILEGLOB_DTYPE(fp->fp_glob) != DTYPE_SOCKET) {
7614 continue;
7615 }
7616
7617 so = (struct socket *)fp->fp_glob->fg_data;
7618 (void) soresume(p, so, 0);
7619 }
7620 proc_fdunlock(p);
7621
7622 OSBitAndAtomic(~P_LXBKIDLEINPROG, &p->p_ladvflag);
7623 }
7624 }
7625
7626 __private_extern__ int
7627 so_set_recv_anyif(struct socket *so, int optval)
7628 {
7629 int ret = 0;
7630
7631 if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
7632 if (optval) {
7633 sotoinpcb(so)->inp_flags |= INP_RECV_ANYIF;
7634 } else {
7635 sotoinpcb(so)->inp_flags &= ~INP_RECV_ANYIF;
7636 }
7637 }
7638
7639
7640 return ret;
7641 }
7642
7643 __private_extern__ int
7644 so_get_recv_anyif(struct socket *so)
7645 {
7646 int ret = 0;
7647
7648 if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
7649 ret = (sotoinpcb(so)->inp_flags & INP_RECV_ANYIF) ? 1 : 0;
7650 }
7651
7652 return ret;
7653 }
7654
7655 int
7656 so_set_restrictions(struct socket *so, uint32_t vals)
7657 {
7658 int nocell_old, nocell_new;
7659 int noexpensive_old, noexpensive_new;
7660 int noconstrained_old, noconstrained_new;
7661
7662 /*
7663 * Deny-type restrictions are trapdoors; once set they cannot be
7664 * unset for the lifetime of the socket. This allows them to be
7665 * issued by a framework on behalf of the application without
7666 * having to worry that they can be undone.
7667 *
7668 * Note here that socket-level restrictions overrides any protocol
7669 * level restrictions. For instance, SO_RESTRICT_DENY_CELLULAR
7670 * socket restriction issued on the socket has a higher precendence
7671 * than INP_NO_IFT_CELLULAR. The latter is affected by the UUID
7672 * policy PROC_UUID_NO_CELLULAR for unrestricted sockets only,
7673 * i.e. when SO_RESTRICT_DENY_CELLULAR has not been issued.
7674 */
7675 nocell_old = (so->so_restrictions & SO_RESTRICT_DENY_CELLULAR);
7676 noexpensive_old = (so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE);
7677 noconstrained_old = (so->so_restrictions & SO_RESTRICT_DENY_CONSTRAINED);
7678 so->so_restrictions |= (vals & (SO_RESTRICT_DENY_IN |
7679 SO_RESTRICT_DENY_OUT | SO_RESTRICT_DENY_CELLULAR |
7680 SO_RESTRICT_DENY_EXPENSIVE | SO_RESTRICT_DENY_CONSTRAINED));
7681 nocell_new = (so->so_restrictions & SO_RESTRICT_DENY_CELLULAR);
7682 noexpensive_new = (so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE);
7683 noconstrained_new = (so->so_restrictions & SO_RESTRICT_DENY_CONSTRAINED);
7684
7685 /* we can only set, not clear restrictions */
7686 if ((nocell_new - nocell_old) == 0 &&
7687 (noexpensive_new - noexpensive_old) == 0 &&
7688 (noconstrained_new - noconstrained_old) == 0) {
7689 return 0;
7690 }
7691 if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
7692 if (nocell_new - nocell_old != 0) {
7693 /*
7694 * if deny cellular is now set, do what's needed
7695 * for INPCB
7696 */
7697 inp_set_nocellular(sotoinpcb(so));
7698 }
7699 if (noexpensive_new - noexpensive_old != 0) {
7700 inp_set_noexpensive(sotoinpcb(so));
7701 }
7702 if (noconstrained_new - noconstrained_old != 0) {
7703 inp_set_noconstrained(sotoinpcb(so));
7704 }
7705 }
7706
7707 if (SOCK_DOM(so) == PF_MULTIPATH) {
7708 mptcp_set_restrictions(so);
7709 }
7710
7711 return 0;
7712 }
7713
7714 uint32_t
7715 so_get_restrictions(struct socket *so)
7716 {
7717 return so->so_restrictions & (SO_RESTRICT_DENY_IN |
7718 SO_RESTRICT_DENY_OUT |
7719 SO_RESTRICT_DENY_CELLULAR | SO_RESTRICT_DENY_EXPENSIVE);
7720 }
7721
7722 int
7723 so_set_effective_pid(struct socket *so, int epid, struct proc *p, boolean_t check_cred)
7724 {
7725 struct proc *ep = PROC_NULL;
7726 int error = 0;
7727
7728 /* pid 0 is reserved for kernel */
7729 if (epid == 0) {
7730 error = EINVAL;
7731 goto done;
7732 }
7733
7734 /*
7735 * If this is an in-kernel socket, prevent its delegate
7736 * association from changing unless the socket option is
7737 * coming from within the kernel itself.
7738 */
7739 if (so->last_pid == 0 && p != kernproc) {
7740 error = EACCES;
7741 goto done;
7742 }
7743
7744 /*
7745 * If this is issued by a process that's recorded as the
7746 * real owner of the socket, or if the pid is the same as
7747 * the process's own pid, then proceed. Otherwise ensure
7748 * that the issuing process has the necessary privileges.
7749 */
7750 if (check_cred && (epid != so->last_pid || epid != proc_pid(p))) {
7751 if ((error = priv_check_cred(kauth_cred_get(),
7752 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0))) {
7753 error = EACCES;
7754 goto done;
7755 }
7756 }
7757
7758 /* Find the process that corresponds to the effective pid */
7759 if ((ep = proc_find(epid)) == PROC_NULL) {
7760 error = ESRCH;
7761 goto done;
7762 }
7763
7764 /*
7765 * If a process tries to delegate the socket to itself, then
7766 * there's really nothing to do; treat it as a way for the
7767 * delegate association to be cleared. Note that we check
7768 * the passed-in proc rather than calling proc_selfpid(),
7769 * as we need to check the process issuing the socket option
7770 * which could be kernproc. Given that we don't allow 0 for
7771 * effective pid, it means that a delegated in-kernel socket
7772 * stays delegated during its lifetime (which is probably OK.)
7773 */
7774 if (epid == proc_pid(p)) {
7775 so->so_flags &= ~SOF_DELEGATED;
7776 so->e_upid = 0;
7777 so->e_pid = 0;
7778 uuid_clear(so->e_uuid);
7779 } else {
7780 so->so_flags |= SOF_DELEGATED;
7781 so->e_upid = proc_uniqueid(ep);
7782 so->e_pid = proc_pid(ep);
7783 proc_getexecutableuuid(ep, so->e_uuid, sizeof(so->e_uuid));
7784
7785 #if defined(XNU_TARGET_OS_OSX)
7786 if (ep->p_responsible_pid != so->e_pid) {
7787 proc_t rp = proc_find(ep->p_responsible_pid);
7788 if (rp != PROC_NULL) {
7789 proc_getexecutableuuid(rp, so->so_ruuid, sizeof(so->so_ruuid));
7790 so->so_rpid = ep->p_responsible_pid;
7791 proc_rele(rp);
7792 } else {
7793 uuid_clear(so->so_ruuid);
7794 so->so_rpid = -1;
7795 }
7796 }
7797 #endif
7798 }
7799 if (so->so_proto != NULL && so->so_proto->pr_update_last_owner != NULL) {
7800 (*so->so_proto->pr_update_last_owner)(so, NULL, ep);
7801 }
7802 done:
7803 if (error == 0 && net_io_policy_log) {
7804 uuid_string_t buf;
7805
7806 uuid_unparse(so->e_uuid, buf);
7807 log(LOG_DEBUG, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d (%s) "
7808 "euuid %s%s\n", __func__, proc_name_address(p),
7809 proc_pid(p), (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
7810 SOCK_DOM(so), SOCK_TYPE(so),
7811 so->e_pid, proc_name_address(ep), buf,
7812 ((so->so_flags & SOF_DELEGATED) ? " [delegated]" : ""));
7813 } else if (error != 0 && net_io_policy_log) {
7814 log(LOG_ERR, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d (%s) "
7815 "ERROR (%d)\n", __func__, proc_name_address(p),
7816 proc_pid(p), (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
7817 SOCK_DOM(so), SOCK_TYPE(so),
7818 epid, (ep == PROC_NULL) ? "PROC_NULL" :
7819 proc_name_address(ep), error);
7820 }
7821
7822 /* Update this socket's policy upon success */
7823 if (error == 0) {
7824 so->so_policy_gencnt *= -1;
7825 so_update_policy(so);
7826 #if NECP
7827 so_update_necp_policy(so, NULL, NULL);
7828 #endif /* NECP */
7829 }
7830
7831 if (ep != PROC_NULL) {
7832 proc_rele(ep);
7833 }
7834
7835 return error;
7836 }
7837
7838 int
7839 so_set_effective_uuid(struct socket *so, uuid_t euuid, struct proc *p, boolean_t check_cred)
7840 {
7841 uuid_string_t buf;
7842 uuid_t uuid;
7843 int error = 0;
7844
7845 /* UUID must not be all-zeroes (reserved for kernel) */
7846 if (uuid_is_null(euuid)) {
7847 error = EINVAL;
7848 goto done;
7849 }
7850
7851 /*
7852 * If this is an in-kernel socket, prevent its delegate
7853 * association from changing unless the socket option is
7854 * coming from within the kernel itself.
7855 */
7856 if (so->last_pid == 0 && p != kernproc) {
7857 error = EACCES;
7858 goto done;
7859 }
7860
7861 /* Get the UUID of the issuing process */
7862 proc_getexecutableuuid(p, uuid, sizeof(uuid));
7863
7864 /*
7865 * If this is issued by a process that's recorded as the
7866 * real owner of the socket, or if the uuid is the same as
7867 * the process's own uuid, then proceed. Otherwise ensure
7868 * that the issuing process has the necessary privileges.
7869 */
7870 if (check_cred &&
7871 (uuid_compare(euuid, so->last_uuid) != 0 ||
7872 uuid_compare(euuid, uuid) != 0)) {
7873 if ((error = priv_check_cred(kauth_cred_get(),
7874 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0))) {
7875 error = EACCES;
7876 goto done;
7877 }
7878 }
7879
7880 /*
7881 * If a process tries to delegate the socket to itself, then
7882 * there's really nothing to do; treat it as a way for the
7883 * delegate association to be cleared. Note that we check
7884 * the uuid of the passed-in proc rather than that of the
7885 * current process, as we need to check the process issuing
7886 * the socket option which could be kernproc itself. Given
7887 * that we don't allow 0 for effective uuid, it means that
7888 * a delegated in-kernel socket stays delegated during its
7889 * lifetime (which is okay.)
7890 */
7891 if (uuid_compare(euuid, uuid) == 0) {
7892 so->so_flags &= ~SOF_DELEGATED;
7893 so->e_upid = 0;
7894 so->e_pid = 0;
7895 uuid_clear(so->e_uuid);
7896 } else {
7897 so->so_flags |= SOF_DELEGATED;
7898 /*
7899 * Unlike so_set_effective_pid(), we only have the UUID
7900 * here and the process ID is not known. Inherit the
7901 * real {pid,upid} of the socket.
7902 */
7903 so->e_upid = so->last_upid;
7904 so->e_pid = so->last_pid;
7905 uuid_copy(so->e_uuid, euuid);
7906 }
7907 /*
7908 * The following will clear the effective process name as it's the same
7909 * as the real process
7910 */
7911 if (so->so_proto != NULL && so->so_proto->pr_update_last_owner != NULL) {
7912 (*so->so_proto->pr_update_last_owner)(so, NULL, NULL);
7913 }
7914 done:
7915 if (error == 0 && net_io_policy_log) {
7916 uuid_unparse(so->e_uuid, buf);
7917 log(LOG_DEBUG, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d "
7918 "euuid %s%s\n", __func__, proc_name_address(p), proc_pid(p),
7919 (uint64_t)DEBUG_KERNEL_ADDRPERM(so), SOCK_DOM(so),
7920 SOCK_TYPE(so), so->e_pid, buf,
7921 ((so->so_flags & SOF_DELEGATED) ? " [delegated]" : ""));
7922 } else if (error != 0 && net_io_policy_log) {
7923 uuid_unparse(euuid, buf);
7924 log(LOG_DEBUG, "%s[%s,%d]: so 0x%llx [%d,%d] euuid %s "
7925 "ERROR (%d)\n", __func__, proc_name_address(p), proc_pid(p),
7926 (uint64_t)DEBUG_KERNEL_ADDRPERM(so), SOCK_DOM(so),
7927 SOCK_TYPE(so), buf, error);
7928 }
7929
7930 /* Update this socket's policy upon success */
7931 if (error == 0) {
7932 so->so_policy_gencnt *= -1;
7933 so_update_policy(so);
7934 #if NECP
7935 so_update_necp_policy(so, NULL, NULL);
7936 #endif /* NECP */
7937 }
7938
7939 return error;
7940 }
7941
7942 void
7943 netpolicy_post_msg(uint32_t ev_code, struct netpolicy_event_data *ev_data,
7944 uint32_t ev_datalen)
7945 {
7946 struct kev_msg ev_msg;
7947
7948 /*
7949 * A netpolicy event always starts with a netpolicy_event_data
7950 * structure, but the caller can provide for a longer event
7951 * structure to post, depending on the event code.
7952 */
7953 VERIFY(ev_data != NULL && ev_datalen >= sizeof(*ev_data));
7954
7955 bzero(&ev_msg, sizeof(ev_msg));
7956 ev_msg.vendor_code = KEV_VENDOR_APPLE;
7957 ev_msg.kev_class = KEV_NETWORK_CLASS;
7958 ev_msg.kev_subclass = KEV_NETPOLICY_SUBCLASS;
7959 ev_msg.event_code = ev_code;
7960
7961 ev_msg.dv[0].data_ptr = ev_data;
7962 ev_msg.dv[0].data_length = ev_datalen;
7963
7964 kev_post_msg(&ev_msg);
7965 }
7966
7967 void
7968 socket_post_kev_msg(uint32_t ev_code,
7969 struct kev_socket_event_data *ev_data,
7970 uint32_t ev_datalen)
7971 {
7972 struct kev_msg ev_msg;
7973
7974 bzero(&ev_msg, sizeof(ev_msg));
7975 ev_msg.vendor_code = KEV_VENDOR_APPLE;
7976 ev_msg.kev_class = KEV_NETWORK_CLASS;
7977 ev_msg.kev_subclass = KEV_SOCKET_SUBCLASS;
7978 ev_msg.event_code = ev_code;
7979
7980 ev_msg.dv[0].data_ptr = ev_data;
7981 ev_msg.dv[0].data_length = ev_datalen;
7982
7983 kev_post_msg(&ev_msg);
7984 }
7985
7986 void
7987 socket_post_kev_msg_closed(struct socket *so)
7988 {
7989 struct kev_socket_closed ev = {};
7990 struct sockaddr *socksa = NULL, *peersa = NULL;
7991 int err;
7992
7993 if ((so->so_flags1 & SOF1_WANT_KEV_SOCK_CLOSED) == 0) {
7994 return;
7995 }
7996 err = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &socksa);
7997 if (err == 0) {
7998 err = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so,
7999 &peersa);
8000 if (err == 0) {
8001 memcpy(&ev.ev_data.kev_sockname, socksa,
8002 min(socksa->sa_len,
8003 sizeof(ev.ev_data.kev_sockname)));
8004 memcpy(&ev.ev_data.kev_peername, peersa,
8005 min(peersa->sa_len,
8006 sizeof(ev.ev_data.kev_peername)));
8007 socket_post_kev_msg(KEV_SOCKET_CLOSED,
8008 &ev.ev_data, sizeof(ev));
8009 }
8010 }
8011 FREE(socksa, M_SONAME);
8012 FREE(peersa, M_SONAME);
8013 }