]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_socket.c
607af6d3c5aba890df75bbf09b4240c933bc4e38
[apple/xnu.git] / bsd / kern / uipc_socket.c
1 /*
2 * Copyright (c) 1998-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1982, 1986, 1988, 1990, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. All advertising materials mentioning features or use of this software
42 * must display the following acknowledgement:
43 * This product includes software developed by the University of
44 * California, Berkeley and its contributors.
45 * 4. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
62 */
63 /*
64 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
65 * support for mandatory and extensible security protections. This notice
66 * is included in support of clause 2.2 (b) of the Apple Public License,
67 * Version 2.0.
68 */
69
70 #include <sys/param.h>
71 #include <sys/systm.h>
72 #include <sys/filedesc.h>
73 #include <sys/proc.h>
74 #include <sys/proc_internal.h>
75 #include <sys/kauth.h>
76 #include <sys/file_internal.h>
77 #include <sys/fcntl.h>
78 #include <sys/malloc.h>
79 #include <sys/mbuf.h>
80 #include <sys/domain.h>
81 #include <sys/kernel.h>
82 #include <sys/event.h>
83 #include <sys/poll.h>
84 #include <sys/protosw.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/resourcevar.h>
88 #include <sys/signalvar.h>
89 #include <sys/sysctl.h>
90 #include <sys/syslog.h>
91 #include <sys/uio.h>
92 #include <sys/uio_internal.h>
93 #include <sys/ev.h>
94 #include <sys/kdebug.h>
95 #include <sys/un.h>
96 #include <sys/user.h>
97 #include <sys/priv.h>
98 #include <sys/kern_event.h>
99 #include <net/route.h>
100 #include <net/init.h>
101 #include <net/net_api_stats.h>
102 #include <net/ntstat.h>
103 #include <net/content_filter.h>
104 #include <netinet/in.h>
105 #include <netinet/in_pcb.h>
106 #include <netinet/in_tclass.h>
107 #include <netinet/in_var.h>
108 #include <netinet/tcp_var.h>
109 #include <netinet/ip6.h>
110 #include <netinet6/ip6_var.h>
111 #include <netinet/flow_divert.h>
112 #include <kern/zalloc.h>
113 #include <kern/locks.h>
114 #include <machine/limits.h>
115 #include <libkern/OSAtomic.h>
116 #include <pexpert/pexpert.h>
117 #include <kern/assert.h>
118 #include <kern/task.h>
119 #include <kern/policy_internal.h>
120
121 #include <sys/kpi_mbuf.h>
122 #include <sys/mcache.h>
123 #include <sys/unpcb.h>
124 #include <libkern/section_keywords.h>
125
126 #if CONFIG_MACF
127 #include <security/mac_framework.h>
128 #endif /* MAC */
129
130 #if MULTIPATH
131 #include <netinet/mp_pcb.h>
132 #include <netinet/mptcp_var.h>
133 #endif /* MULTIPATH */
134
135 #define ROUNDUP(a, b) (((a) + ((b) - 1)) & (~((b) - 1)))
136
137 #if DEBUG || DEVELOPMENT
138 #define DEBUG_KERNEL_ADDRPERM(_v) (_v)
139 #else
140 #define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
141 #endif
142
143 /* TODO: this should be in a header file somewhere */
144 extern char *proc_name_address(void *p);
145
146 static u_int32_t so_cache_hw; /* High water mark for socache */
147 static u_int32_t so_cache_timeouts; /* number of timeouts */
148 static u_int32_t so_cache_max_freed; /* max freed per timeout */
149 static u_int32_t cached_sock_count = 0;
150 STAILQ_HEAD(, socket) so_cache_head;
151 int max_cached_sock_count = MAX_CACHED_SOCKETS;
152 static u_int32_t so_cache_time;
153 static int socketinit_done;
154 static struct zone *so_cache_zone;
155
156 static lck_grp_t *so_cache_mtx_grp;
157 static lck_attr_t *so_cache_mtx_attr;
158 static lck_grp_attr_t *so_cache_mtx_grp_attr;
159 static lck_mtx_t *so_cache_mtx;
160
161 #include <machine/limits.h>
162
163 static int filt_sorattach(struct knote *kn, struct kevent_qos_s *kev);
164 static void filt_sordetach(struct knote *kn);
165 static int filt_soread(struct knote *kn, long hint);
166 static int filt_sortouch(struct knote *kn, struct kevent_qos_s *kev);
167 static int filt_sorprocess(struct knote *kn, struct kevent_qos_s *kev);
168
169 static int filt_sowattach(struct knote *kn, struct kevent_qos_s *kev);
170 static void filt_sowdetach(struct knote *kn);
171 static int filt_sowrite(struct knote *kn, long hint);
172 static int filt_sowtouch(struct knote *kn, struct kevent_qos_s *kev);
173 static int filt_sowprocess(struct knote *kn, struct kevent_qos_s *kev);
174
175 static int filt_sockattach(struct knote *kn, struct kevent_qos_s *kev);
176 static void filt_sockdetach(struct knote *kn);
177 static int filt_sockev(struct knote *kn, long hint);
178 static int filt_socktouch(struct knote *kn, struct kevent_qos_s *kev);
179 static int filt_sockprocess(struct knote *kn, struct kevent_qos_s *kev);
180
181 static int sooptcopyin_timeval(struct sockopt *, struct timeval *);
182 static int sooptcopyout_timeval(struct sockopt *, const struct timeval *);
183
184 SECURITY_READ_ONLY_EARLY(struct filterops) soread_filtops = {
185 .f_isfd = 1,
186 .f_attach = filt_sorattach,
187 .f_detach = filt_sordetach,
188 .f_event = filt_soread,
189 .f_touch = filt_sortouch,
190 .f_process = filt_sorprocess,
191 };
192
193 SECURITY_READ_ONLY_EARLY(struct filterops) sowrite_filtops = {
194 .f_isfd = 1,
195 .f_attach = filt_sowattach,
196 .f_detach = filt_sowdetach,
197 .f_event = filt_sowrite,
198 .f_touch = filt_sowtouch,
199 .f_process = filt_sowprocess,
200 };
201
202 SECURITY_READ_ONLY_EARLY(struct filterops) sock_filtops = {
203 .f_isfd = 1,
204 .f_attach = filt_sockattach,
205 .f_detach = filt_sockdetach,
206 .f_event = filt_sockev,
207 .f_touch = filt_socktouch,
208 .f_process = filt_sockprocess,
209 };
210
211 SECURITY_READ_ONLY_EARLY(struct filterops) soexcept_filtops = {
212 .f_isfd = 1,
213 .f_attach = filt_sorattach,
214 .f_detach = filt_sordetach,
215 .f_event = filt_soread,
216 .f_touch = filt_sortouch,
217 .f_process = filt_sorprocess,
218 };
219
220 SYSCTL_DECL(_kern_ipc);
221
222 #define EVEN_MORE_LOCKING_DEBUG 0
223
224 int socket_debug = 0;
225 SYSCTL_INT(_kern_ipc, OID_AUTO, socket_debug,
226 CTLFLAG_RW | CTLFLAG_LOCKED, &socket_debug, 0, "");
227
228 static unsigned long sodefunct_calls = 0;
229 SYSCTL_LONG(_kern_ipc, OID_AUTO, sodefunct_calls, CTLFLAG_LOCKED,
230 &sodefunct_calls, "");
231
232 ZONE_DECLARE(socket_zone, "socket", sizeof(struct socket), ZC_ZFREE_CLEARMEM);
233 so_gen_t so_gencnt; /* generation count for sockets */
234
235 MALLOC_DEFINE(M_SONAME, "soname", "socket name");
236 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
237
238 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
239 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
240 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
241 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
242 #define DBG_FNC_SOSEND NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1)
243 #define DBG_FNC_SOSEND_LIST NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 3)
244 #define DBG_FNC_SORECEIVE NETDBG_CODE(DBG_NETSOCK, (8 << 8))
245 #define DBG_FNC_SORECEIVE_LIST NETDBG_CODE(DBG_NETSOCK, (8 << 8) | 3)
246 #define DBG_FNC_SOSHUTDOWN NETDBG_CODE(DBG_NETSOCK, (9 << 8))
247
248 #define MAX_SOOPTGETM_SIZE (128 * MCLBYTES)
249
250 int somaxconn = SOMAXCONN;
251 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn,
252 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxconn, 0, "");
253
254 /* Should we get a maximum also ??? */
255 static int sosendmaxchain = 65536;
256 static int sosendminchain = 16384;
257 static int sorecvmincopy = 16384;
258 SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain,
259 CTLFLAG_RW | CTLFLAG_LOCKED, &sosendminchain, 0, "");
260 SYSCTL_INT(_kern_ipc, OID_AUTO, sorecvmincopy,
261 CTLFLAG_RW | CTLFLAG_LOCKED, &sorecvmincopy, 0, "");
262
263 /*
264 * Set to enable jumbo clusters (if available) for large writes when
265 * the socket is marked with SOF_MULTIPAGES; see below.
266 */
267 int sosendjcl = 1;
268 SYSCTL_INT(_kern_ipc, OID_AUTO, sosendjcl,
269 CTLFLAG_RW | CTLFLAG_LOCKED, &sosendjcl, 0, "");
270
271 /*
272 * Set this to ignore SOF_MULTIPAGES and use jumbo clusters for large
273 * writes on the socket for all protocols on any network interfaces,
274 * depending upon sosendjcl above. Be extra careful when setting this
275 * to 1, because sending down packets that cross physical pages down to
276 * broken drivers (those that falsely assume that the physical pages
277 * are contiguous) might lead to system panics or silent data corruption.
278 * When set to 0, the system will respect SOF_MULTIPAGES, which is set
279 * only for TCP sockets whose outgoing interface is IFNET_MULTIPAGES
280 * capable. Set this to 1 only for testing/debugging purposes.
281 */
282 int sosendjcl_ignore_capab = 0;
283 SYSCTL_INT(_kern_ipc, OID_AUTO, sosendjcl_ignore_capab,
284 CTLFLAG_RW | CTLFLAG_LOCKED, &sosendjcl_ignore_capab, 0, "");
285
286 /*
287 * Set this to ignore SOF1_IF_2KCL and use big clusters for large
288 * writes on the socket for all protocols on any network interfaces.
289 * Be extra careful when setting this to 1, because sending down packets with
290 * clusters larger that 2 KB might lead to system panics or data corruption.
291 * When set to 0, the system will respect SOF1_IF_2KCL, which is set
292 * on the outgoing interface
293 * Set this to 1 for testing/debugging purposes only.
294 */
295 int sosendbigcl_ignore_capab = 0;
296 SYSCTL_INT(_kern_ipc, OID_AUTO, sosendbigcl_ignore_capab,
297 CTLFLAG_RW | CTLFLAG_LOCKED, &sosendbigcl_ignore_capab, 0, "");
298
299 int sodefunctlog = 0;
300 SYSCTL_INT(_kern_ipc, OID_AUTO, sodefunctlog, CTLFLAG_RW | CTLFLAG_LOCKED,
301 &sodefunctlog, 0, "");
302
303 int sothrottlelog = 0;
304 SYSCTL_INT(_kern_ipc, OID_AUTO, sothrottlelog, CTLFLAG_RW | CTLFLAG_LOCKED,
305 &sothrottlelog, 0, "");
306
307 int sorestrictrecv = 1;
308 SYSCTL_INT(_kern_ipc, OID_AUTO, sorestrictrecv, CTLFLAG_RW | CTLFLAG_LOCKED,
309 &sorestrictrecv, 0, "Enable inbound interface restrictions");
310
311 int sorestrictsend = 1;
312 SYSCTL_INT(_kern_ipc, OID_AUTO, sorestrictsend, CTLFLAG_RW | CTLFLAG_LOCKED,
313 &sorestrictsend, 0, "Enable outbound interface restrictions");
314
315 int soreserveheadroom = 1;
316 SYSCTL_INT(_kern_ipc, OID_AUTO, soreserveheadroom, CTLFLAG_RW | CTLFLAG_LOCKED,
317 &soreserveheadroom, 0, "To allocate contiguous datagram buffers");
318
319 #if (DEBUG || DEVELOPMENT)
320 int so_notsent_lowat_check = 1;
321 SYSCTL_INT(_kern_ipc, OID_AUTO, notsent_lowat, CTLFLAG_RW | CTLFLAG_LOCKED,
322 &so_notsent_lowat_check, 0, "enable/disable notsnet lowat check");
323 #endif /* DEBUG || DEVELOPMENT */
324
325 int so_accept_list_waits = 0;
326 #if (DEBUG || DEVELOPMENT)
327 SYSCTL_INT(_kern_ipc, OID_AUTO, accept_list_waits, CTLFLAG_RW | CTLFLAG_LOCKED,
328 &so_accept_list_waits, 0, "number of waits for listener incomp list");
329 #endif /* DEBUG || DEVELOPMENT */
330
331 extern struct inpcbinfo tcbinfo;
332
333 /* TODO: these should be in header file */
334 extern int get_inpcb_str_size(void);
335 extern int get_tcp_str_size(void);
336
337 vm_size_t so_cache_zone_element_size;
338
339 static int sodelayed_copy(struct socket *, struct uio *, struct mbuf **,
340 user_ssize_t *);
341 static void cached_sock_alloc(struct socket **, zalloc_flags_t);
342 static void cached_sock_free(struct socket *);
343
344 /*
345 * Maximum of extended background idle sockets per process
346 * Set to zero to disable further setting of the option
347 */
348
349 #define SO_IDLE_BK_IDLE_MAX_PER_PROC 1
350 #define SO_IDLE_BK_IDLE_TIME 600
351 #define SO_IDLE_BK_IDLE_RCV_HIWAT 131072
352
353 struct soextbkidlestat soextbkidlestat;
354
355 SYSCTL_UINT(_kern_ipc, OID_AUTO, maxextbkidleperproc,
356 CTLFLAG_RW | CTLFLAG_LOCKED, &soextbkidlestat.so_xbkidle_maxperproc, 0,
357 "Maximum of extended background idle sockets per process");
358
359 SYSCTL_UINT(_kern_ipc, OID_AUTO, extbkidletime, CTLFLAG_RW | CTLFLAG_LOCKED,
360 &soextbkidlestat.so_xbkidle_time, 0,
361 "Time in seconds to keep extended background idle sockets");
362
363 SYSCTL_UINT(_kern_ipc, OID_AUTO, extbkidlercvhiwat, CTLFLAG_RW | CTLFLAG_LOCKED,
364 &soextbkidlestat.so_xbkidle_rcvhiwat, 0,
365 "High water mark for extended background idle sockets");
366
367 SYSCTL_STRUCT(_kern_ipc, OID_AUTO, extbkidlestat, CTLFLAG_RD | CTLFLAG_LOCKED,
368 &soextbkidlestat, soextbkidlestat, "");
369
370 int so_set_extended_bk_idle(struct socket *, int);
371
372
373 /*
374 * SOTCDB_NO_DSCP is set by default, to prevent the networking stack from
375 * setting the DSCP code on the packet based on the service class; see
376 * <rdar://problem/11277343> for details.
377 */
378 __private_extern__ u_int32_t sotcdb = 0;
379 SYSCTL_INT(_kern_ipc, OID_AUTO, sotcdb, CTLFLAG_RW | CTLFLAG_LOCKED,
380 &sotcdb, 0, "");
381
382 void
383 socketinit(void)
384 {
385 _CASSERT(sizeof(so_gencnt) == sizeof(uint64_t));
386 VERIFY(IS_P2ALIGNED(&so_gencnt, sizeof(uint32_t)));
387
388 #ifdef __LP64__
389 _CASSERT(sizeof(struct sa_endpoints) == sizeof(struct user64_sa_endpoints));
390 _CASSERT(offsetof(struct sa_endpoints, sae_srcif) == offsetof(struct user64_sa_endpoints, sae_srcif));
391 _CASSERT(offsetof(struct sa_endpoints, sae_srcaddr) == offsetof(struct user64_sa_endpoints, sae_srcaddr));
392 _CASSERT(offsetof(struct sa_endpoints, sae_srcaddrlen) == offsetof(struct user64_sa_endpoints, sae_srcaddrlen));
393 _CASSERT(offsetof(struct sa_endpoints, sae_dstaddr) == offsetof(struct user64_sa_endpoints, sae_dstaddr));
394 _CASSERT(offsetof(struct sa_endpoints, sae_dstaddrlen) == offsetof(struct user64_sa_endpoints, sae_dstaddrlen));
395 #else
396 _CASSERT(sizeof(struct sa_endpoints) == sizeof(struct user32_sa_endpoints));
397 _CASSERT(offsetof(struct sa_endpoints, sae_srcif) == offsetof(struct user32_sa_endpoints, sae_srcif));
398 _CASSERT(offsetof(struct sa_endpoints, sae_srcaddr) == offsetof(struct user32_sa_endpoints, sae_srcaddr));
399 _CASSERT(offsetof(struct sa_endpoints, sae_srcaddrlen) == offsetof(struct user32_sa_endpoints, sae_srcaddrlen));
400 _CASSERT(offsetof(struct sa_endpoints, sae_dstaddr) == offsetof(struct user32_sa_endpoints, sae_dstaddr));
401 _CASSERT(offsetof(struct sa_endpoints, sae_dstaddrlen) == offsetof(struct user32_sa_endpoints, sae_dstaddrlen));
402 #endif
403
404 if (socketinit_done) {
405 printf("socketinit: already called...\n");
406 return;
407 }
408 socketinit_done = 1;
409
410 PE_parse_boot_argn("socket_debug", &socket_debug,
411 sizeof(socket_debug));
412
413 /*
414 * allocate lock group attribute and group for socket cache mutex
415 */
416 so_cache_mtx_grp_attr = lck_grp_attr_alloc_init();
417 so_cache_mtx_grp = lck_grp_alloc_init("so_cache",
418 so_cache_mtx_grp_attr);
419
420 /*
421 * allocate the lock attribute for socket cache mutex
422 */
423 so_cache_mtx_attr = lck_attr_alloc_init();
424
425 /* cached sockets mutex */
426 so_cache_mtx = lck_mtx_alloc_init(so_cache_mtx_grp, so_cache_mtx_attr);
427 if (so_cache_mtx == NULL) {
428 panic("%s: unable to allocate so_cache_mtx\n", __func__);
429 /* NOTREACHED */
430 }
431 STAILQ_INIT(&so_cache_head);
432
433 so_cache_zone_element_size = (vm_size_t)(sizeof(struct socket) + 4
434 + get_inpcb_str_size() + 4 + get_tcp_str_size());
435
436 so_cache_zone = zone_create("socache zone", so_cache_zone_element_size,
437 ZC_ZFREE_CLEARMEM | ZC_NOENCRYPT);
438
439 bzero(&soextbkidlestat, sizeof(struct soextbkidlestat));
440 soextbkidlestat.so_xbkidle_maxperproc = SO_IDLE_BK_IDLE_MAX_PER_PROC;
441 soextbkidlestat.so_xbkidle_time = SO_IDLE_BK_IDLE_TIME;
442 soextbkidlestat.so_xbkidle_rcvhiwat = SO_IDLE_BK_IDLE_RCV_HIWAT;
443
444 in_pcbinit();
445 sflt_init();
446 socket_tclass_init();
447 #if MULTIPATH
448 mp_pcbinit();
449 #endif /* MULTIPATH */
450 }
451
452 static void
453 cached_sock_alloc(struct socket **so, zalloc_flags_t how)
454 {
455 caddr_t temp;
456 uintptr_t offset;
457
458 lck_mtx_lock(so_cache_mtx);
459
460 if (!STAILQ_EMPTY(&so_cache_head)) {
461 VERIFY(cached_sock_count > 0);
462
463 *so = STAILQ_FIRST(&so_cache_head);
464 STAILQ_REMOVE_HEAD(&so_cache_head, so_cache_ent);
465 STAILQ_NEXT((*so), so_cache_ent) = NULL;
466
467 cached_sock_count--;
468 lck_mtx_unlock(so_cache_mtx);
469
470 temp = (*so)->so_saved_pcb;
471 bzero((caddr_t)*so, sizeof(struct socket));
472
473 (*so)->so_saved_pcb = temp;
474 } else {
475 lck_mtx_unlock(so_cache_mtx);
476
477 *so = zalloc_flags(so_cache_zone, how | Z_ZERO);
478
479 /*
480 * Define offsets for extra structures into our
481 * single block of memory. Align extra structures
482 * on longword boundaries.
483 */
484
485 offset = (uintptr_t)*so;
486 offset += sizeof(struct socket);
487
488 offset = ALIGN(offset);
489
490 (*so)->so_saved_pcb = (caddr_t)offset;
491 offset += get_inpcb_str_size();
492
493 offset = ALIGN(offset);
494
495 ((struct inpcb *)(void *)(*so)->so_saved_pcb)->inp_saved_ppcb =
496 (caddr_t)offset;
497 }
498
499 OSBitOrAtomic(SOF1_CACHED_IN_SOCK_LAYER, &(*so)->so_flags1);
500 }
501
502 static void
503 cached_sock_free(struct socket *so)
504 {
505 lck_mtx_lock(so_cache_mtx);
506
507 so_cache_time = net_uptime();
508 if (++cached_sock_count > max_cached_sock_count) {
509 --cached_sock_count;
510 lck_mtx_unlock(so_cache_mtx);
511 zfree(so_cache_zone, so);
512 } else {
513 if (so_cache_hw < cached_sock_count) {
514 so_cache_hw = cached_sock_count;
515 }
516
517 STAILQ_INSERT_TAIL(&so_cache_head, so, so_cache_ent);
518
519 so->cache_timestamp = so_cache_time;
520 lck_mtx_unlock(so_cache_mtx);
521 }
522 }
523
524 void
525 so_update_last_owner_locked(struct socket *so, proc_t self)
526 {
527 if (so->last_pid != 0) {
528 /*
529 * last_pid and last_upid should remain zero for sockets
530 * created using sock_socket. The check above achieves that
531 */
532 if (self == PROC_NULL) {
533 self = current_proc();
534 }
535
536 if (so->last_upid != proc_uniqueid(self) ||
537 so->last_pid != proc_pid(self)) {
538 so->last_upid = proc_uniqueid(self);
539 so->last_pid = proc_pid(self);
540 proc_getexecutableuuid(self, so->last_uuid,
541 sizeof(so->last_uuid));
542 if (so->so_proto != NULL && so->so_proto->pr_update_last_owner != NULL) {
543 (*so->so_proto->pr_update_last_owner)(so, self, NULL);
544 }
545 }
546 proc_pidoriginatoruuid(so->so_vuuid, sizeof(so->so_vuuid));
547 }
548 }
549
550 void
551 so_update_policy(struct socket *so)
552 {
553 if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
554 (void) inp_update_policy(sotoinpcb(so));
555 }
556 }
557
558 #if NECP
559 static void
560 so_update_necp_policy(struct socket *so, struct sockaddr *override_local_addr,
561 struct sockaddr *override_remote_addr)
562 {
563 if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
564 inp_update_necp_policy(sotoinpcb(so), override_local_addr,
565 override_remote_addr, 0);
566 }
567 }
568 #endif /* NECP */
569
570 boolean_t
571 so_cache_timer(void)
572 {
573 struct socket *p;
574 int n_freed = 0;
575 boolean_t rc = FALSE;
576
577 lck_mtx_lock(so_cache_mtx);
578 so_cache_timeouts++;
579 so_cache_time = net_uptime();
580
581 while (!STAILQ_EMPTY(&so_cache_head)) {
582 VERIFY(cached_sock_count > 0);
583 p = STAILQ_FIRST(&so_cache_head);
584 if ((so_cache_time - p->cache_timestamp) <
585 SO_CACHE_TIME_LIMIT) {
586 break;
587 }
588
589 STAILQ_REMOVE_HEAD(&so_cache_head, so_cache_ent);
590 --cached_sock_count;
591
592 zfree(so_cache_zone, p);
593
594 if (++n_freed >= SO_CACHE_MAX_FREE_BATCH) {
595 so_cache_max_freed++;
596 break;
597 }
598 }
599
600 /* Schedule again if there is more to cleanup */
601 if (!STAILQ_EMPTY(&so_cache_head)) {
602 rc = TRUE;
603 }
604
605 lck_mtx_unlock(so_cache_mtx);
606 return rc;
607 }
608
609 /*
610 * Get a socket structure from our zone, and initialize it.
611 * We don't implement `waitok' yet (see comments in uipc_domain.c).
612 * Note that it would probably be better to allocate socket
613 * and PCB at the same time, but I'm not convinced that all
614 * the protocols can be easily modified to do this.
615 */
616 struct socket *
617 soalloc(int waitok, int dom, int type)
618 {
619 zalloc_flags_t how = waitok ? Z_WAITOK : Z_NOWAIT;
620 struct socket *so;
621
622 if ((dom == PF_INET) && (type == SOCK_STREAM)) {
623 cached_sock_alloc(&so, how);
624 } else {
625 so = zalloc_flags(socket_zone, how | Z_ZERO);
626 }
627 if (so != NULL) {
628 so->so_gencnt = OSIncrementAtomic64((SInt64 *)&so_gencnt);
629
630 /*
631 * Increment the socket allocation statistics
632 */
633 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_alloc_total);
634 }
635
636 return so;
637 }
638
639 int
640 socreate_internal(int dom, struct socket **aso, int type, int proto,
641 struct proc *p, uint32_t flags, struct proc *ep)
642 {
643 struct protosw *prp;
644 struct socket *so;
645 int error = 0;
646 #if defined(XNU_TARGET_OS_OSX)
647 pid_t rpid = -1;
648 #endif
649
650 #if TCPDEBUG
651 extern int tcpconsdebug;
652 #endif
653
654 VERIFY(aso != NULL);
655 *aso = NULL;
656
657 if (proto != 0) {
658 prp = pffindproto(dom, proto, type);
659 } else {
660 prp = pffindtype(dom, type);
661 }
662
663 if (prp == NULL || prp->pr_usrreqs->pru_attach == NULL) {
664 if (pffinddomain(dom) == NULL) {
665 return EAFNOSUPPORT;
666 }
667 if (proto != 0) {
668 if (pffindprotonotype(dom, proto) != NULL) {
669 return EPROTOTYPE;
670 }
671 }
672 return EPROTONOSUPPORT;
673 }
674 if (prp->pr_type != type) {
675 return EPROTOTYPE;
676 }
677 so = soalloc(1, dom, type);
678 if (so == NULL) {
679 return ENOBUFS;
680 }
681
682 switch (dom) {
683 case PF_LOCAL:
684 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_local_total);
685 break;
686 case PF_INET:
687 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_inet_total);
688 if (type == SOCK_STREAM) {
689 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_inet_stream_total);
690 } else {
691 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_inet_dgram_total);
692 }
693 break;
694 case PF_ROUTE:
695 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_route_total);
696 break;
697 case PF_NDRV:
698 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_ndrv_total);
699 break;
700 case PF_KEY:
701 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_key_total);
702 break;
703 case PF_INET6:
704 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_inet6_total);
705 if (type == SOCK_STREAM) {
706 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_inet6_stream_total);
707 } else {
708 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_inet6_dgram_total);
709 }
710 break;
711 case PF_SYSTEM:
712 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_system_total);
713 break;
714 case PF_MULTIPATH:
715 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_multipath_total);
716 break;
717 default:
718 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_other_total);
719 break;
720 }
721
722 if (flags & SOCF_MPTCP) {
723 so->so_state |= SS_NBIO;
724 }
725
726 TAILQ_INIT(&so->so_incomp);
727 TAILQ_INIT(&so->so_comp);
728 so->so_type = type;
729 so->last_upid = proc_uniqueid(p);
730 so->last_pid = proc_pid(p);
731 proc_getexecutableuuid(p, so->last_uuid, sizeof(so->last_uuid));
732 proc_pidoriginatoruuid(so->so_vuuid, sizeof(so->so_vuuid));
733
734 if (ep != PROC_NULL && ep != p) {
735 so->e_upid = proc_uniqueid(ep);
736 so->e_pid = proc_pid(ep);
737 proc_getexecutableuuid(ep, so->e_uuid, sizeof(so->e_uuid));
738 so->so_flags |= SOF_DELEGATED;
739 #if defined(XNU_TARGET_OS_OSX)
740 if (ep->p_responsible_pid != so->e_pid) {
741 rpid = ep->p_responsible_pid;
742 }
743 #endif
744 }
745
746 #if defined(XNU_TARGET_OS_OSX)
747 if (rpid < 0 && p->p_responsible_pid != so->last_pid) {
748 rpid = p->p_responsible_pid;
749 }
750
751 so->so_rpid = -1;
752 uuid_clear(so->so_ruuid);
753 if (rpid >= 0) {
754 proc_t rp = proc_find(rpid);
755 if (rp != PROC_NULL) {
756 proc_getexecutableuuid(rp, so->so_ruuid, sizeof(so->so_ruuid));
757 so->so_rpid = rpid;
758 proc_rele(rp);
759 }
760 }
761 #endif
762
763 so->so_cred = kauth_cred_proc_ref(p);
764 if (!suser(kauth_cred_get(), NULL)) {
765 so->so_state |= SS_PRIV;
766 }
767
768 so->so_proto = prp;
769 so->so_rcv.sb_flags |= SB_RECV;
770 so->so_rcv.sb_so = so->so_snd.sb_so = so;
771 so->next_lock_lr = 0;
772 so->next_unlock_lr = 0;
773
774 /*
775 * Attachment will create the per pcb lock if necessary and
776 * increase refcount for creation, make sure it's done before
777 * socket is inserted in lists.
778 */
779 so->so_usecount++;
780
781 error = (*prp->pr_usrreqs->pru_attach)(so, proto, p);
782 if (error != 0) {
783 /*
784 * Warning:
785 * If so_pcb is not zero, the socket will be leaked,
786 * so protocol attachment handler must be coded carefuly
787 */
788 so->so_state |= SS_NOFDREF;
789 VERIFY(so->so_usecount > 0);
790 so->so_usecount--;
791 sofreelastref(so, 1); /* will deallocate the socket */
792 return error;
793 }
794
795 /*
796 * Note: needs so_pcb to be set after pru_attach
797 */
798 if (prp->pr_update_last_owner != NULL) {
799 (*prp->pr_update_last_owner)(so, p, ep);
800 }
801
802 atomic_add_32(&prp->pr_domain->dom_refs, 1);
803
804 /* Attach socket filters for this protocol */
805 sflt_initsock(so);
806 #if TCPDEBUG
807 if (tcpconsdebug == 2) {
808 so->so_options |= SO_DEBUG;
809 }
810 #endif
811 so_set_default_traffic_class(so);
812
813 /*
814 * If this thread or task is marked to create backgrounded sockets,
815 * mark the socket as background.
816 */
817 if (!(flags & SOCF_MPTCP) &&
818 proc_get_effective_thread_policy(current_thread(), TASK_POLICY_NEW_SOCKETS_BG)) {
819 socket_set_traffic_mgt_flags(so, TRAFFIC_MGT_SO_BACKGROUND);
820 so->so_background_thread = current_thread();
821 }
822
823 switch (dom) {
824 /*
825 * Don't mark Unix domain or system
826 * eligible for defunct by default.
827 */
828 case PF_LOCAL:
829 case PF_SYSTEM:
830 so->so_flags |= SOF_NODEFUNCT;
831 break;
832 default:
833 break;
834 }
835
836 /*
837 * Entitlements can't be checked at socket creation time except if the
838 * application requested a feature guarded by a privilege (c.f., socket
839 * delegation).
840 * The priv(9) and the Sandboxing APIs are designed with the idea that
841 * a privilege check should only be triggered by a userland request.
842 * A privilege check at socket creation time is time consuming and
843 * could trigger many authorisation error messages from the security
844 * APIs.
845 */
846
847 *aso = so;
848
849 return 0;
850 }
851
852 /*
853 * Returns: 0 Success
854 * EAFNOSUPPORT
855 * EPROTOTYPE
856 * EPROTONOSUPPORT
857 * ENOBUFS
858 * <pru_attach>:ENOBUFS[AF_UNIX]
859 * <pru_attach>:ENOBUFS[TCP]
860 * <pru_attach>:ENOMEM[TCP]
861 * <pru_attach>:??? [other protocol families, IPSEC]
862 */
863 int
864 socreate(int dom, struct socket **aso, int type, int proto)
865 {
866 return socreate_internal(dom, aso, type, proto, current_proc(), 0,
867 PROC_NULL);
868 }
869
870 int
871 socreate_delegate(int dom, struct socket **aso, int type, int proto, pid_t epid)
872 {
873 int error = 0;
874 struct proc *ep = PROC_NULL;
875
876 if ((proc_selfpid() != epid) && ((ep = proc_find(epid)) == PROC_NULL)) {
877 error = ESRCH;
878 goto done;
879 }
880
881 error = socreate_internal(dom, aso, type, proto, current_proc(), 0, ep);
882
883 /*
884 * It might not be wise to hold the proc reference when calling
885 * socreate_internal since it calls soalloc with M_WAITOK
886 */
887 done:
888 if (ep != PROC_NULL) {
889 proc_rele(ep);
890 }
891
892 return error;
893 }
894
895 /*
896 * Returns: 0 Success
897 * <pru_bind>:EINVAL Invalid argument [COMMON_START]
898 * <pru_bind>:EAFNOSUPPORT Address family not supported
899 * <pru_bind>:EADDRNOTAVAIL Address not available.
900 * <pru_bind>:EINVAL Invalid argument
901 * <pru_bind>:EAFNOSUPPORT Address family not supported [notdef]
902 * <pru_bind>:EACCES Permission denied
903 * <pru_bind>:EADDRINUSE Address in use
904 * <pru_bind>:EAGAIN Resource unavailable, try again
905 * <pru_bind>:EPERM Operation not permitted
906 * <pru_bind>:???
907 * <sf_bind>:???
908 *
909 * Notes: It's not possible to fully enumerate the return codes above,
910 * since socket filter authors and protocol family authors may
911 * not choose to limit their error returns to those listed, even
912 * though this may result in some software operating incorrectly.
913 *
914 * The error codes which are enumerated above are those known to
915 * be returned by the tcp_usr_bind function supplied.
916 */
917 int
918 sobindlock(struct socket *so, struct sockaddr *nam, int dolock)
919 {
920 struct proc *p = current_proc();
921 int error = 0;
922
923 if (dolock) {
924 socket_lock(so, 1);
925 }
926
927 so_update_last_owner_locked(so, p);
928 so_update_policy(so);
929
930 #if NECP
931 so_update_necp_policy(so, nam, NULL);
932 #endif /* NECP */
933
934 /*
935 * If this is a bind request on a socket that has been marked
936 * as inactive, reject it now before we go any further.
937 */
938 if (so->so_flags & SOF_DEFUNCT) {
939 error = EINVAL;
940 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] (%d)\n",
941 __func__, proc_pid(p), proc_best_name(p),
942 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
943 SOCK_DOM(so), SOCK_TYPE(so), error);
944 goto out;
945 }
946
947 /* Socket filter */
948 error = sflt_bind(so, nam);
949
950 if (error == 0) {
951 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
952 }
953 out:
954 if (dolock) {
955 socket_unlock(so, 1);
956 }
957
958 if (error == EJUSTRETURN) {
959 error = 0;
960 }
961
962 return error;
963 }
964
965 void
966 sodealloc(struct socket *so)
967 {
968 kauth_cred_unref(&so->so_cred);
969
970 /* Remove any filters */
971 sflt_termsock(so);
972
973 #if CONTENT_FILTER
974 cfil_sock_detach(so);
975 #endif /* CONTENT_FILTER */
976
977 so->so_gencnt = OSIncrementAtomic64((SInt64 *)&so_gencnt);
978
979 if (so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) {
980 cached_sock_free(so);
981 } else {
982 zfree(socket_zone, so);
983 }
984 }
985
986 /*
987 * Returns: 0 Success
988 * EINVAL
989 * EOPNOTSUPP
990 * <pru_listen>:EINVAL[AF_UNIX]
991 * <pru_listen>:EINVAL[TCP]
992 * <pru_listen>:EADDRNOTAVAIL[TCP] Address not available.
993 * <pru_listen>:EINVAL[TCP] Invalid argument
994 * <pru_listen>:EAFNOSUPPORT[TCP] Address family not supported [notdef]
995 * <pru_listen>:EACCES[TCP] Permission denied
996 * <pru_listen>:EADDRINUSE[TCP] Address in use
997 * <pru_listen>:EAGAIN[TCP] Resource unavailable, try again
998 * <pru_listen>:EPERM[TCP] Operation not permitted
999 * <sf_listen>:???
1000 *
1001 * Notes: Other <pru_listen> returns depend on the protocol family; all
1002 * <sf_listen> returns depend on what the filter author causes
1003 * their filter to return.
1004 */
1005 int
1006 solisten(struct socket *so, int backlog)
1007 {
1008 struct proc *p = current_proc();
1009 int error = 0;
1010
1011 socket_lock(so, 1);
1012
1013 so_update_last_owner_locked(so, p);
1014 so_update_policy(so);
1015
1016 #if NECP
1017 so_update_necp_policy(so, NULL, NULL);
1018 #endif /* NECP */
1019
1020 if (so->so_proto == NULL) {
1021 error = EINVAL;
1022 goto out;
1023 }
1024 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
1025 error = EOPNOTSUPP;
1026 goto out;
1027 }
1028
1029 /*
1030 * If the listen request is made on a socket that is not fully
1031 * disconnected, or on a socket that has been marked as inactive,
1032 * reject the request now.
1033 */
1034 if ((so->so_state &
1035 (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) ||
1036 (so->so_flags & SOF_DEFUNCT)) {
1037 error = EINVAL;
1038 if (so->so_flags & SOF_DEFUNCT) {
1039 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] "
1040 "(%d)\n", __func__, proc_pid(p),
1041 proc_best_name(p),
1042 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
1043 SOCK_DOM(so), SOCK_TYPE(so), error);
1044 }
1045 goto out;
1046 }
1047
1048 if ((so->so_restrictions & SO_RESTRICT_DENY_IN) != 0) {
1049 error = EPERM;
1050 goto out;
1051 }
1052
1053 error = sflt_listen(so);
1054 if (error == 0) {
1055 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
1056 }
1057
1058 if (error) {
1059 if (error == EJUSTRETURN) {
1060 error = 0;
1061 }
1062 goto out;
1063 }
1064
1065 if (TAILQ_EMPTY(&so->so_comp)) {
1066 so->so_options |= SO_ACCEPTCONN;
1067 }
1068 /*
1069 * POSIX: The implementation may have an upper limit on the length of
1070 * the listen queue-either global or per accepting socket. If backlog
1071 * exceeds this limit, the length of the listen queue is set to the
1072 * limit.
1073 *
1074 * If listen() is called with a backlog argument value that is less
1075 * than 0, the function behaves as if it had been called with a backlog
1076 * argument value of 0.
1077 *
1078 * A backlog argument of 0 may allow the socket to accept connections,
1079 * in which case the length of the listen queue may be set to an
1080 * implementation-defined minimum value.
1081 */
1082 if (backlog <= 0 || backlog > somaxconn) {
1083 backlog = somaxconn;
1084 }
1085
1086 so->so_qlimit = backlog;
1087 out:
1088 socket_unlock(so, 1);
1089 return error;
1090 }
1091
1092 /*
1093 * The "accept list lock" protects the fields related to the listener queues
1094 * because we can unlock a socket to respect the lock ordering between
1095 * the listener socket and its clients sockets. The lock ordering is first to
1096 * acquire the client socket before the listener socket.
1097 *
1098 * The accept list lock serializes access to the following fields:
1099 * - of the listener socket:
1100 * - so_comp
1101 * - so_incomp
1102 * - so_qlen
1103 * - so_inqlen
1104 * - of client sockets that are in so_comp or so_incomp:
1105 * - so_head
1106 * - so_list
1107 *
1108 * As one can see the accept list lock protects the consistent of the
1109 * linkage of the client sockets.
1110 *
1111 * Note that those fields may be read without holding the accept list lock
1112 * for a preflight provided the accept list lock is taken when committing
1113 * to take an action based on the result of the preflight. The preflight
1114 * saves the cost of doing the unlock/lock dance.
1115 */
1116 void
1117 so_acquire_accept_list(struct socket *head, struct socket *so)
1118 {
1119 lck_mtx_t *mutex_held;
1120
1121 if (head->so_proto->pr_getlock == NULL) {
1122 return;
1123 }
1124 mutex_held = (*head->so_proto->pr_getlock)(head, PR_F_WILLUNLOCK);
1125 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1126
1127 if (!(head->so_flags1 & SOF1_ACCEPT_LIST_HELD)) {
1128 head->so_flags1 |= SOF1_ACCEPT_LIST_HELD;
1129 return;
1130 }
1131 if (so != NULL) {
1132 socket_unlock(so, 0);
1133 }
1134 while (head->so_flags1 & SOF1_ACCEPT_LIST_HELD) {
1135 so_accept_list_waits += 1;
1136 msleep((caddr_t)&head->so_incomp, mutex_held,
1137 PSOCK | PCATCH, __func__, NULL);
1138 }
1139 head->so_flags1 |= SOF1_ACCEPT_LIST_HELD;
1140 if (so != NULL) {
1141 socket_unlock(head, 0);
1142 socket_lock(so, 0);
1143 socket_lock(head, 0);
1144 }
1145 }
1146
1147 void
1148 so_release_accept_list(struct socket *head)
1149 {
1150 if (head->so_proto->pr_getlock != NULL) {
1151 lck_mtx_t *mutex_held;
1152
1153 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
1154 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1155
1156 head->so_flags1 &= ~SOF1_ACCEPT_LIST_HELD;
1157 wakeup((caddr_t)&head->so_incomp);
1158 }
1159 }
1160
1161 void
1162 sofreelastref(struct socket *so, int dealloc)
1163 {
1164 struct socket *head = so->so_head;
1165
1166 /* Assume socket is locked */
1167
1168 if (!(so->so_flags & SOF_PCBCLEARING) || !(so->so_state & SS_NOFDREF)) {
1169 selthreadclear(&so->so_snd.sb_sel);
1170 selthreadclear(&so->so_rcv.sb_sel);
1171 so->so_rcv.sb_flags &= ~(SB_SEL | SB_UPCALL);
1172 so->so_snd.sb_flags &= ~(SB_SEL | SB_UPCALL);
1173 so->so_event = sonullevent;
1174 return;
1175 }
1176 if (head != NULL) {
1177 /*
1178 * Need to lock the listener when the protocol has
1179 * per socket locks
1180 */
1181 if (head->so_proto->pr_getlock != NULL) {
1182 socket_lock(head, 1);
1183 so_acquire_accept_list(head, so);
1184 }
1185 if (so->so_state & SS_INCOMP) {
1186 so->so_state &= ~SS_INCOMP;
1187 TAILQ_REMOVE(&head->so_incomp, so, so_list);
1188 head->so_incqlen--;
1189 head->so_qlen--;
1190 so->so_head = NULL;
1191
1192 if (head->so_proto->pr_getlock != NULL) {
1193 so_release_accept_list(head);
1194 socket_unlock(head, 1);
1195 }
1196 } else if (so->so_state & SS_COMP) {
1197 if (head->so_proto->pr_getlock != NULL) {
1198 so_release_accept_list(head);
1199 socket_unlock(head, 1);
1200 }
1201 /*
1202 * We must not decommission a socket that's
1203 * on the accept(2) queue. If we do, then
1204 * accept(2) may hang after select(2) indicated
1205 * that the listening socket was ready.
1206 */
1207 selthreadclear(&so->so_snd.sb_sel);
1208 selthreadclear(&so->so_rcv.sb_sel);
1209 so->so_rcv.sb_flags &= ~(SB_SEL | SB_UPCALL);
1210 so->so_snd.sb_flags &= ~(SB_SEL | SB_UPCALL);
1211 so->so_event = sonullevent;
1212 return;
1213 } else {
1214 if (head->so_proto->pr_getlock != NULL) {
1215 so_release_accept_list(head);
1216 socket_unlock(head, 1);
1217 }
1218 printf("sofree: not queued\n");
1219 }
1220 }
1221 sowflush(so);
1222 sorflush(so);
1223
1224 #if FLOW_DIVERT
1225 if (so->so_flags & SOF_FLOW_DIVERT) {
1226 flow_divert_detach(so);
1227 }
1228 #endif /* FLOW_DIVERT */
1229
1230 /* 3932268: disable upcall */
1231 so->so_rcv.sb_flags &= ~SB_UPCALL;
1232 so->so_snd.sb_flags &= ~(SB_UPCALL | SB_SNDBYTE_CNT);
1233 so->so_event = sonullevent;
1234
1235 if (dealloc) {
1236 sodealloc(so);
1237 }
1238 }
1239
1240 void
1241 soclose_wait_locked(struct socket *so)
1242 {
1243 lck_mtx_t *mutex_held;
1244
1245 if (so->so_proto->pr_getlock != NULL) {
1246 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1247 } else {
1248 mutex_held = so->so_proto->pr_domain->dom_mtx;
1249 }
1250 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1251
1252 /*
1253 * Double check here and return if there's no outstanding upcall;
1254 * otherwise proceed further only if SOF_UPCALLCLOSEWAIT is set.
1255 */
1256 if (!so->so_upcallusecount || !(so->so_flags & SOF_UPCALLCLOSEWAIT)) {
1257 return;
1258 }
1259 so->so_rcv.sb_flags &= ~SB_UPCALL;
1260 so->so_snd.sb_flags &= ~SB_UPCALL;
1261 so->so_flags |= SOF_CLOSEWAIT;
1262
1263 (void) msleep((caddr_t)&so->so_upcallusecount, mutex_held, (PZERO - 1),
1264 "soclose_wait_locked", NULL);
1265 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1266 so->so_flags &= ~SOF_CLOSEWAIT;
1267 }
1268
1269 /*
1270 * Close a socket on last file table reference removal.
1271 * Initiate disconnect if connected.
1272 * Free socket when disconnect complete.
1273 */
1274 int
1275 soclose_locked(struct socket *so)
1276 {
1277 int error = 0;
1278 struct timespec ts;
1279
1280 if (so->so_usecount == 0) {
1281 panic("soclose: so=%p refcount=0\n", so);
1282 /* NOTREACHED */
1283 }
1284
1285 sflt_notify(so, sock_evt_closing, NULL);
1286
1287 if (so->so_upcallusecount) {
1288 soclose_wait_locked(so);
1289 }
1290
1291 #if CONTENT_FILTER
1292 /*
1293 * We have to wait until the content filters are done
1294 */
1295 if ((so->so_flags & SOF_CONTENT_FILTER) != 0) {
1296 cfil_sock_close_wait(so);
1297 cfil_sock_is_closed(so);
1298 cfil_sock_detach(so);
1299 }
1300 #endif /* CONTENT_FILTER */
1301
1302 if (so->so_flags1 & SOF1_EXTEND_BK_IDLE_INPROG) {
1303 soresume(current_proc(), so, 1);
1304 so->so_flags1 &= ~SOF1_EXTEND_BK_IDLE_WANTED;
1305 }
1306
1307 if ((so->so_options & SO_ACCEPTCONN)) {
1308 struct socket *sp, *sonext;
1309 int persocklock = 0;
1310 int incomp_overflow_only;
1311
1312 /*
1313 * We do not want new connection to be added
1314 * to the connection queues
1315 */
1316 so->so_options &= ~SO_ACCEPTCONN;
1317
1318 /*
1319 * We can drop the lock on the listener once
1320 * we've acquired the incoming list
1321 */
1322 if (so->so_proto->pr_getlock != NULL) {
1323 persocklock = 1;
1324 so_acquire_accept_list(so, NULL);
1325 socket_unlock(so, 0);
1326 }
1327 again:
1328 incomp_overflow_only = 1;
1329
1330 TAILQ_FOREACH_SAFE(sp, &so->so_incomp, so_list, sonext) {
1331 /*
1332 * Radar 5350314
1333 * skip sockets thrown away by tcpdropdropblreq
1334 * they will get cleanup by the garbage collection.
1335 * otherwise, remove the incomp socket from the queue
1336 * and let soabort trigger the appropriate cleanup.
1337 */
1338 if (sp->so_flags & SOF_OVERFLOW) {
1339 continue;
1340 }
1341
1342 if (persocklock != 0) {
1343 socket_lock(sp, 1);
1344 }
1345
1346 /*
1347 * Radar 27945981
1348 * The extra reference for the list insure the
1349 * validity of the socket pointer when we perform the
1350 * unlock of the head above
1351 */
1352 if (sp->so_state & SS_INCOMP) {
1353 sp->so_state &= ~SS_INCOMP;
1354 sp->so_head = NULL;
1355 TAILQ_REMOVE(&so->so_incomp, sp, so_list);
1356 so->so_incqlen--;
1357 so->so_qlen--;
1358
1359 (void) soabort(sp);
1360 } else {
1361 panic("%s sp %p in so_incomp but !SS_INCOMP",
1362 __func__, sp);
1363 }
1364
1365 if (persocklock != 0) {
1366 socket_unlock(sp, 1);
1367 }
1368 }
1369
1370 TAILQ_FOREACH_SAFE(sp, &so->so_comp, so_list, sonext) {
1371 /* Dequeue from so_comp since sofree() won't do it */
1372 if (persocklock != 0) {
1373 socket_lock(sp, 1);
1374 }
1375
1376 if (sp->so_state & SS_COMP) {
1377 sp->so_state &= ~SS_COMP;
1378 sp->so_head = NULL;
1379 TAILQ_REMOVE(&so->so_comp, sp, so_list);
1380 so->so_qlen--;
1381
1382 (void) soabort(sp);
1383 } else {
1384 panic("%s sp %p in so_comp but !SS_COMP",
1385 __func__, sp);
1386 }
1387
1388 if (persocklock) {
1389 socket_unlock(sp, 1);
1390 }
1391 }
1392
1393 if (incomp_overflow_only == 0 && !TAILQ_EMPTY(&so->so_incomp)) {
1394 #if (DEBUG | DEVELOPMENT)
1395 panic("%s head %p so_comp not empty\n", __func__, so);
1396 #endif /* (DEVELOPMENT || DEBUG) */
1397
1398 goto again;
1399 }
1400
1401 if (!TAILQ_EMPTY(&so->so_comp)) {
1402 #if (DEBUG | DEVELOPMENT)
1403 panic("%s head %p so_comp not empty\n", __func__, so);
1404 #endif /* (DEVELOPMENT || DEBUG) */
1405
1406 goto again;
1407 }
1408
1409 if (persocklock) {
1410 socket_lock(so, 0);
1411 so_release_accept_list(so);
1412 }
1413 }
1414 if (so->so_pcb == NULL) {
1415 /* 3915887: mark the socket as ready for dealloc */
1416 so->so_flags |= SOF_PCBCLEARING;
1417 goto discard;
1418 }
1419 if (so->so_state & SS_ISCONNECTED) {
1420 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
1421 error = sodisconnectlocked(so);
1422 if (error) {
1423 goto drop;
1424 }
1425 }
1426 if (so->so_options & SO_LINGER) {
1427 lck_mtx_t *mutex_held;
1428
1429 if ((so->so_state & SS_ISDISCONNECTING) &&
1430 (so->so_state & SS_NBIO)) {
1431 goto drop;
1432 }
1433 if (so->so_proto->pr_getlock != NULL) {
1434 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1435 } else {
1436 mutex_held = so->so_proto->pr_domain->dom_mtx;
1437 }
1438 while (so->so_state & SS_ISCONNECTED) {
1439 ts.tv_sec = (so->so_linger / 100);
1440 ts.tv_nsec = (so->so_linger % 100) *
1441 NSEC_PER_USEC * 1000 * 10;
1442 error = msleep((caddr_t)&so->so_timeo,
1443 mutex_held, PSOCK | PCATCH, "soclose", &ts);
1444 if (error) {
1445 /*
1446 * It's OK when the time fires,
1447 * don't report an error
1448 */
1449 if (error == EWOULDBLOCK) {
1450 error = 0;
1451 }
1452 break;
1453 }
1454 }
1455 }
1456 }
1457 drop:
1458 if (so->so_usecount == 0) {
1459 panic("soclose: usecount is zero so=%p\n", so);
1460 /* NOTREACHED */
1461 }
1462 if (so->so_pcb != NULL && !(so->so_flags & SOF_PCBCLEARING)) {
1463 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
1464 if (error == 0) {
1465 error = error2;
1466 }
1467 }
1468 if (so->so_usecount <= 0) {
1469 panic("soclose: usecount is zero so=%p\n", so);
1470 /* NOTREACHED */
1471 }
1472 discard:
1473 if (so->so_pcb != NULL && !(so->so_flags & SOF_MP_SUBFLOW) &&
1474 (so->so_state & SS_NOFDREF)) {
1475 panic("soclose: NOFDREF");
1476 /* NOTREACHED */
1477 }
1478 so->so_state |= SS_NOFDREF;
1479
1480 if ((so->so_flags & SOF_KNOTE) != 0) {
1481 KNOTE(&so->so_klist, SO_FILT_HINT_LOCKED);
1482 }
1483
1484 atomic_add_32(&so->so_proto->pr_domain->dom_refs, -1);
1485
1486 VERIFY(so->so_usecount > 0);
1487 so->so_usecount--;
1488 sofree(so);
1489 return error;
1490 }
1491
1492 int
1493 soclose(struct socket *so)
1494 {
1495 int error = 0;
1496 socket_lock(so, 1);
1497
1498 if (so->so_retaincnt == 0) {
1499 error = soclose_locked(so);
1500 } else {
1501 /*
1502 * if the FD is going away, but socket is
1503 * retained in kernel remove its reference
1504 */
1505 so->so_usecount--;
1506 if (so->so_usecount < 2) {
1507 panic("soclose: retaincnt non null and so=%p "
1508 "usecount=%d\n", so, so->so_usecount);
1509 }
1510 }
1511 socket_unlock(so, 1);
1512 return error;
1513 }
1514
1515 /*
1516 * Must be called at splnet...
1517 */
1518 /* Should already be locked */
1519 int
1520 soabort(struct socket *so)
1521 {
1522 int error;
1523
1524 #ifdef MORE_LOCKING_DEBUG
1525 lck_mtx_t *mutex_held;
1526
1527 if (so->so_proto->pr_getlock != NULL) {
1528 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
1529 } else {
1530 mutex_held = so->so_proto->pr_domain->dom_mtx;
1531 }
1532 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1533 #endif
1534
1535 if ((so->so_flags & SOF_ABORTED) == 0) {
1536 so->so_flags |= SOF_ABORTED;
1537 error = (*so->so_proto->pr_usrreqs->pru_abort)(so);
1538 if (error) {
1539 sofree(so);
1540 return error;
1541 }
1542 }
1543 return 0;
1544 }
1545
1546 int
1547 soacceptlock(struct socket *so, struct sockaddr **nam, int dolock)
1548 {
1549 int error;
1550
1551 if (dolock) {
1552 socket_lock(so, 1);
1553 }
1554
1555 so_update_last_owner_locked(so, PROC_NULL);
1556 so_update_policy(so);
1557 #if NECP
1558 so_update_necp_policy(so, NULL, NULL);
1559 #endif /* NECP */
1560
1561 if ((so->so_state & SS_NOFDREF) == 0) {
1562 panic("soaccept: !NOFDREF");
1563 }
1564 so->so_state &= ~SS_NOFDREF;
1565 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
1566
1567 if (dolock) {
1568 socket_unlock(so, 1);
1569 }
1570 return error;
1571 }
1572
1573 int
1574 soaccept(struct socket *so, struct sockaddr **nam)
1575 {
1576 return soacceptlock(so, nam, 1);
1577 }
1578
1579 int
1580 soacceptfilter(struct socket *so, struct socket *head)
1581 {
1582 struct sockaddr *local = NULL, *remote = NULL;
1583 int error = 0;
1584
1585 /*
1586 * Hold the lock even if this socket has not been made visible
1587 * to the filter(s). For sockets with global locks, this protects
1588 * against the head or peer going away
1589 */
1590 socket_lock(so, 1);
1591 if (sogetaddr_locked(so, &remote, 1) != 0 ||
1592 sogetaddr_locked(so, &local, 0) != 0) {
1593 so->so_state &= ~SS_NOFDREF;
1594 socket_unlock(so, 1);
1595 soclose(so);
1596 /* Out of resources; try it again next time */
1597 error = ECONNABORTED;
1598 goto done;
1599 }
1600
1601 error = sflt_accept(head, so, local, remote);
1602
1603 /*
1604 * If we get EJUSTRETURN from one of the filters, mark this socket
1605 * as inactive and return it anyway. This newly accepted socket
1606 * will be disconnected later before we hand it off to the caller.
1607 */
1608 if (error == EJUSTRETURN) {
1609 error = 0;
1610 (void) sosetdefunct(current_proc(), so,
1611 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL, FALSE);
1612 }
1613
1614 if (error != 0) {
1615 /*
1616 * This may seem like a duplication to the above error
1617 * handling part when we return ECONNABORTED, except
1618 * the following is done while holding the lock since
1619 * the socket has been exposed to the filter(s) earlier.
1620 */
1621 so->so_state &= ~SS_NOFDREF;
1622 socket_unlock(so, 1);
1623 soclose(so);
1624 /* Propagate socket filter's error code to the caller */
1625 } else {
1626 socket_unlock(so, 1);
1627 }
1628 done:
1629 /* Callee checks for NULL pointer */
1630 sock_freeaddr(remote);
1631 sock_freeaddr(local);
1632 return error;
1633 }
1634
1635 /*
1636 * Returns: 0 Success
1637 * EOPNOTSUPP Operation not supported on socket
1638 * EISCONN Socket is connected
1639 * <pru_connect>:EADDRNOTAVAIL Address not available.
1640 * <pru_connect>:EINVAL Invalid argument
1641 * <pru_connect>:EAFNOSUPPORT Address family not supported [notdef]
1642 * <pru_connect>:EACCES Permission denied
1643 * <pru_connect>:EADDRINUSE Address in use
1644 * <pru_connect>:EAGAIN Resource unavailable, try again
1645 * <pru_connect>:EPERM Operation not permitted
1646 * <sf_connect_out>:??? [anything a filter writer might set]
1647 */
1648 int
1649 soconnectlock(struct socket *so, struct sockaddr *nam, int dolock)
1650 {
1651 int error;
1652 struct proc *p = current_proc();
1653
1654 if (dolock) {
1655 socket_lock(so, 1);
1656 }
1657
1658 so_update_last_owner_locked(so, p);
1659 so_update_policy(so);
1660
1661 #if NECP
1662 so_update_necp_policy(so, NULL, nam);
1663 #endif /* NECP */
1664
1665 /*
1666 * If this is a listening socket or if this is a previously-accepted
1667 * socket that has been marked as inactive, reject the connect request.
1668 */
1669 if ((so->so_options & SO_ACCEPTCONN) || (so->so_flags & SOF_DEFUNCT)) {
1670 error = EOPNOTSUPP;
1671 if (so->so_flags & SOF_DEFUNCT) {
1672 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] "
1673 "(%d)\n", __func__, proc_pid(p),
1674 proc_best_name(p),
1675 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
1676 SOCK_DOM(so), SOCK_TYPE(so), error);
1677 }
1678 if (dolock) {
1679 socket_unlock(so, 1);
1680 }
1681 return error;
1682 }
1683
1684 if ((so->so_restrictions & SO_RESTRICT_DENY_OUT) != 0) {
1685 if (dolock) {
1686 socket_unlock(so, 1);
1687 }
1688 return EPERM;
1689 }
1690
1691 /*
1692 * If protocol is connection-based, can only connect once.
1693 * Otherwise, if connected, try to disconnect first.
1694 * This allows user to disconnect by connecting to, e.g.,
1695 * a null address.
1696 */
1697 if (so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING) &&
1698 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
1699 (error = sodisconnectlocked(so)))) {
1700 error = EISCONN;
1701 } else {
1702 /*
1703 * Run connect filter before calling protocol:
1704 * - non-blocking connect returns before completion;
1705 */
1706 error = sflt_connectout(so, nam);
1707 if (error != 0) {
1708 if (error == EJUSTRETURN) {
1709 error = 0;
1710 }
1711 } else {
1712 error = (*so->so_proto->pr_usrreqs->pru_connect)
1713 (so, nam, p);
1714 if (error != 0) {
1715 so->so_state &= ~SS_ISCONNECTING;
1716 }
1717 }
1718 }
1719 if (dolock) {
1720 socket_unlock(so, 1);
1721 }
1722 return error;
1723 }
1724
1725 int
1726 soconnect(struct socket *so, struct sockaddr *nam)
1727 {
1728 return soconnectlock(so, nam, 1);
1729 }
1730
1731 /*
1732 * Returns: 0 Success
1733 * <pru_connect2>:EINVAL[AF_UNIX]
1734 * <pru_connect2>:EPROTOTYPE[AF_UNIX]
1735 * <pru_connect2>:??? [other protocol families]
1736 *
1737 * Notes: <pru_connect2> is not supported by [TCP].
1738 */
1739 int
1740 soconnect2(struct socket *so1, struct socket *so2)
1741 {
1742 int error;
1743
1744 socket_lock(so1, 1);
1745 if (so2->so_proto->pr_lock) {
1746 socket_lock(so2, 1);
1747 }
1748
1749 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
1750
1751 socket_unlock(so1, 1);
1752 if (so2->so_proto->pr_lock) {
1753 socket_unlock(so2, 1);
1754 }
1755 return error;
1756 }
1757
1758 int
1759 soconnectxlocked(struct socket *so, struct sockaddr *src,
1760 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
1761 sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
1762 uint32_t arglen, uio_t auio, user_ssize_t *bytes_written)
1763 {
1764 int error;
1765
1766 so_update_last_owner_locked(so, p);
1767 so_update_policy(so);
1768
1769 /*
1770 * If this is a listening socket or if this is a previously-accepted
1771 * socket that has been marked as inactive, reject the connect request.
1772 */
1773 if ((so->so_options & SO_ACCEPTCONN) || (so->so_flags & SOF_DEFUNCT)) {
1774 error = EOPNOTSUPP;
1775 if (so->so_flags & SOF_DEFUNCT) {
1776 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] "
1777 "(%d)\n", __func__, proc_pid(p),
1778 proc_best_name(p),
1779 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
1780 SOCK_DOM(so), SOCK_TYPE(so), error);
1781 }
1782 return error;
1783 }
1784
1785 if ((so->so_restrictions & SO_RESTRICT_DENY_OUT) != 0) {
1786 return EPERM;
1787 }
1788
1789 /*
1790 * If protocol is connection-based, can only connect once
1791 * unless PR_MULTICONN is set. Otherwise, if connected,
1792 * try to disconnect first. This allows user to disconnect
1793 * by connecting to, e.g., a null address.
1794 */
1795 if ((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING)) &&
1796 !(so->so_proto->pr_flags & PR_MULTICONN) &&
1797 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
1798 (error = sodisconnectlocked(so)) != 0)) {
1799 error = EISCONN;
1800 } else {
1801 if ((so->so_proto->pr_flags & PR_DATA_IDEMPOTENT) &&
1802 (flags & CONNECT_DATA_IDEMPOTENT)) {
1803 so->so_flags1 |= SOF1_DATA_IDEMPOTENT;
1804
1805 if (flags & CONNECT_DATA_AUTHENTICATED) {
1806 so->so_flags1 |= SOF1_DATA_AUTHENTICATED;
1807 }
1808 }
1809
1810 /*
1811 * Case 1: CONNECT_RESUME_ON_READ_WRITE set, no data.
1812 * Case 2: CONNECT_RESUME_ON_READ_WRITE set, with data (user error)
1813 * Case 3: CONNECT_RESUME_ON_READ_WRITE not set, with data
1814 * Case 3 allows user to combine write with connect even if they have
1815 * no use for TFO (such as regular TCP, and UDP).
1816 * Case 4: CONNECT_RESUME_ON_READ_WRITE not set, no data (regular case)
1817 */
1818 if ((so->so_proto->pr_flags & PR_PRECONN_WRITE) &&
1819 ((flags & CONNECT_RESUME_ON_READ_WRITE) || auio)) {
1820 so->so_flags1 |= SOF1_PRECONNECT_DATA;
1821 }
1822
1823 /*
1824 * If a user sets data idempotent and does not pass an uio, or
1825 * sets CONNECT_RESUME_ON_READ_WRITE, this is an error, reset
1826 * SOF1_DATA_IDEMPOTENT.
1827 */
1828 if (!(so->so_flags1 & SOF1_PRECONNECT_DATA) &&
1829 (so->so_flags1 & SOF1_DATA_IDEMPOTENT)) {
1830 /* We should return EINVAL instead perhaps. */
1831 so->so_flags1 &= ~SOF1_DATA_IDEMPOTENT;
1832 }
1833
1834 /*
1835 * Run connect filter before calling protocol:
1836 * - non-blocking connect returns before completion;
1837 */
1838 error = sflt_connectout(so, dst);
1839 if (error != 0) {
1840 /* Disable PRECONNECT_DATA, as we don't need to send a SYN anymore. */
1841 so->so_flags1 &= ~SOF1_PRECONNECT_DATA;
1842 if (error == EJUSTRETURN) {
1843 error = 0;
1844 }
1845 } else {
1846 error = (*so->so_proto->pr_usrreqs->pru_connectx)
1847 (so, src, dst, p, ifscope, aid, pcid,
1848 flags, arg, arglen, auio, bytes_written);
1849 if (error != 0) {
1850 so->so_state &= ~SS_ISCONNECTING;
1851 if (error != EINPROGRESS) {
1852 so->so_flags1 &= ~SOF1_PRECONNECT_DATA;
1853 }
1854 }
1855 }
1856 }
1857
1858 return error;
1859 }
1860
1861 int
1862 sodisconnectlocked(struct socket *so)
1863 {
1864 int error;
1865
1866 if ((so->so_state & SS_ISCONNECTED) == 0) {
1867 error = ENOTCONN;
1868 goto bad;
1869 }
1870 if (so->so_state & SS_ISDISCONNECTING) {
1871 error = EALREADY;
1872 goto bad;
1873 }
1874
1875 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
1876 if (error == 0) {
1877 sflt_notify(so, sock_evt_disconnected, NULL);
1878 }
1879
1880 bad:
1881 return error;
1882 }
1883
1884 /* Locking version */
1885 int
1886 sodisconnect(struct socket *so)
1887 {
1888 int error;
1889
1890 socket_lock(so, 1);
1891 error = sodisconnectlocked(so);
1892 socket_unlock(so, 1);
1893 return error;
1894 }
1895
1896 int
1897 sodisconnectxlocked(struct socket *so, sae_associd_t aid, sae_connid_t cid)
1898 {
1899 int error;
1900
1901 /*
1902 * Call the protocol disconnectx handler; let it handle all
1903 * matters related to the connection state of this session.
1904 */
1905 error = (*so->so_proto->pr_usrreqs->pru_disconnectx)(so, aid, cid);
1906 if (error == 0) {
1907 /*
1908 * The event applies only for the session, not for
1909 * the disconnection of individual subflows.
1910 */
1911 if (so->so_state & (SS_ISDISCONNECTING | SS_ISDISCONNECTED)) {
1912 sflt_notify(so, sock_evt_disconnected, NULL);
1913 }
1914 }
1915 return error;
1916 }
1917
1918 int
1919 sodisconnectx(struct socket *so, sae_associd_t aid, sae_connid_t cid)
1920 {
1921 int error;
1922
1923 socket_lock(so, 1);
1924 error = sodisconnectxlocked(so, aid, cid);
1925 socket_unlock(so, 1);
1926 return error;
1927 }
1928
1929 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT)
1930
1931 /*
1932 * sosendcheck will lock the socket buffer if it isn't locked and
1933 * verify that there is space for the data being inserted.
1934 *
1935 * Returns: 0 Success
1936 * EPIPE
1937 * sblock:EWOULDBLOCK
1938 * sblock:EINTR
1939 * sbwait:EBADF
1940 * sbwait:EINTR
1941 * [so_error]:???
1942 */
1943 int
1944 sosendcheck(struct socket *so, struct sockaddr *addr, user_ssize_t resid,
1945 int32_t clen, int32_t atomic, int flags, int *sblocked)
1946 {
1947 int error = 0;
1948 int32_t space;
1949 int assumelock = 0;
1950
1951 restart:
1952 if (*sblocked == 0) {
1953 if ((so->so_snd.sb_flags & SB_LOCK) != 0 &&
1954 so->so_send_filt_thread != 0 &&
1955 so->so_send_filt_thread == current_thread()) {
1956 /*
1957 * We're being called recursively from a filter,
1958 * allow this to continue. Radar 4150520.
1959 * Don't set sblocked because we don't want
1960 * to perform an unlock later.
1961 */
1962 assumelock = 1;
1963 } else {
1964 error = sblock(&so->so_snd, SBLOCKWAIT(flags));
1965 if (error) {
1966 if (so->so_flags & SOF_DEFUNCT) {
1967 goto defunct;
1968 }
1969 return error;
1970 }
1971 *sblocked = 1;
1972 }
1973 }
1974
1975 /*
1976 * If a send attempt is made on a socket that has been marked
1977 * as inactive (disconnected), reject the request.
1978 */
1979 if (so->so_flags & SOF_DEFUNCT) {
1980 defunct:
1981 error = EPIPE;
1982 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] (%d)\n",
1983 __func__, proc_selfpid(), proc_best_name(current_proc()),
1984 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
1985 SOCK_DOM(so), SOCK_TYPE(so), error);
1986 return error;
1987 }
1988
1989 if (so->so_state & SS_CANTSENDMORE) {
1990 #if CONTENT_FILTER
1991 /*
1992 * Can re-inject data of half closed connections
1993 */
1994 if ((so->so_state & SS_ISDISCONNECTED) == 0 &&
1995 so->so_snd.sb_cfil_thread == current_thread() &&
1996 cfil_sock_data_pending(&so->so_snd) != 0) {
1997 CFIL_LOG(LOG_INFO,
1998 "so %llx ignore SS_CANTSENDMORE",
1999 (uint64_t)DEBUG_KERNEL_ADDRPERM(so));
2000 } else
2001 #endif /* CONTENT_FILTER */
2002 return EPIPE;
2003 }
2004 if (so->so_error) {
2005 error = so->so_error;
2006 so->so_error = 0;
2007 return error;
2008 }
2009
2010 if ((so->so_state & SS_ISCONNECTED) == 0) {
2011 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) != 0) {
2012 if (((so->so_state & SS_ISCONFIRMING) == 0) &&
2013 (resid != 0 || clen == 0) &&
2014 !(so->so_flags1 & SOF1_PRECONNECT_DATA)) {
2015 return ENOTCONN;
2016 }
2017 } else if (addr == 0) {
2018 return (so->so_proto->pr_flags & PR_CONNREQUIRED) ?
2019 ENOTCONN : EDESTADDRREQ;
2020 }
2021 }
2022
2023 space = sbspace(&so->so_snd);
2024
2025 if (flags & MSG_OOB) {
2026 space += 1024;
2027 }
2028 if ((atomic && resid > so->so_snd.sb_hiwat) ||
2029 clen > so->so_snd.sb_hiwat) {
2030 return EMSGSIZE;
2031 }
2032
2033 if ((space < resid + clen &&
2034 (atomic || (space < (int32_t)so->so_snd.sb_lowat) ||
2035 space < clen)) ||
2036 (so->so_type == SOCK_STREAM && so_wait_for_if_feedback(so))) {
2037 /*
2038 * don't block the connectx call when there's more data
2039 * than can be copied.
2040 */
2041 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
2042 if (space == 0) {
2043 return EWOULDBLOCK;
2044 }
2045 if (space < (int32_t)so->so_snd.sb_lowat) {
2046 return 0;
2047 }
2048 }
2049 if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO) ||
2050 assumelock) {
2051 return EWOULDBLOCK;
2052 }
2053 sbunlock(&so->so_snd, TRUE); /* keep socket locked */
2054 *sblocked = 0;
2055 error = sbwait(&so->so_snd);
2056 if (error) {
2057 if (so->so_flags & SOF_DEFUNCT) {
2058 goto defunct;
2059 }
2060 return error;
2061 }
2062 goto restart;
2063 }
2064 return 0;
2065 }
2066
2067 /*
2068 * Send on a socket.
2069 * If send must go all at once and message is larger than
2070 * send buffering, then hard error.
2071 * Lock against other senders.
2072 * If must go all at once and not enough room now, then
2073 * inform user that this would block and do nothing.
2074 * Otherwise, if nonblocking, send as much as possible.
2075 * The data to be sent is described by "uio" if nonzero,
2076 * otherwise by the mbuf chain "top" (which must be null
2077 * if uio is not). Data provided in mbuf chain must be small
2078 * enough to send all at once.
2079 *
2080 * Returns nonzero on error, timeout or signal; callers
2081 * must check for short counts if EINTR/ERESTART are returned.
2082 * Data and control buffers are freed on return.
2083 *
2084 * Returns: 0 Success
2085 * EOPNOTSUPP
2086 * EINVAL
2087 * ENOBUFS
2088 * uiomove:EFAULT
2089 * sosendcheck:EPIPE
2090 * sosendcheck:EWOULDBLOCK
2091 * sosendcheck:EINTR
2092 * sosendcheck:EBADF
2093 * sosendcheck:EINTR
2094 * sosendcheck:??? [value from so_error]
2095 * <pru_send>:ECONNRESET[TCP]
2096 * <pru_send>:EINVAL[TCP]
2097 * <pru_send>:ENOBUFS[TCP]
2098 * <pru_send>:EADDRINUSE[TCP]
2099 * <pru_send>:EADDRNOTAVAIL[TCP]
2100 * <pru_send>:EAFNOSUPPORT[TCP]
2101 * <pru_send>:EACCES[TCP]
2102 * <pru_send>:EAGAIN[TCP]
2103 * <pru_send>:EPERM[TCP]
2104 * <pru_send>:EMSGSIZE[TCP]
2105 * <pru_send>:EHOSTUNREACH[TCP]
2106 * <pru_send>:ENETUNREACH[TCP]
2107 * <pru_send>:ENETDOWN[TCP]
2108 * <pru_send>:ENOMEM[TCP]
2109 * <pru_send>:ENOBUFS[TCP]
2110 * <pru_send>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
2111 * <pru_send>:EINVAL[AF_UNIX]
2112 * <pru_send>:EOPNOTSUPP[AF_UNIX]
2113 * <pru_send>:EPIPE[AF_UNIX]
2114 * <pru_send>:ENOTCONN[AF_UNIX]
2115 * <pru_send>:EISCONN[AF_UNIX]
2116 * <pru_send>:???[AF_UNIX] [whatever a filter author chooses]
2117 * <sf_data_out>:??? [whatever a filter author chooses]
2118 *
2119 * Notes: Other <pru_send> returns depend on the protocol family; all
2120 * <sf_data_out> returns depend on what the filter author causes
2121 * their filter to return.
2122 */
2123 int
2124 sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
2125 struct mbuf *top, struct mbuf *control, int flags)
2126 {
2127 struct mbuf **mp;
2128 struct mbuf *m, *freelist = NULL;
2129 user_ssize_t space, len, resid, orig_resid;
2130 int clen = 0, error, dontroute, mlen, sendflags;
2131 int atomic = sosendallatonce(so) || top;
2132 int sblocked = 0;
2133 struct proc *p = current_proc();
2134 uint16_t headroom = 0;
2135 boolean_t en_tracing = FALSE;
2136
2137 if (uio != NULL) {
2138 resid = uio_resid(uio);
2139 } else {
2140 resid = top->m_pkthdr.len;
2141 }
2142
2143 KERNEL_DEBUG((DBG_FNC_SOSEND | DBG_FUNC_START), so, resid,
2144 so->so_snd.sb_cc, so->so_snd.sb_lowat, so->so_snd.sb_hiwat);
2145
2146 socket_lock(so, 1);
2147
2148 /*
2149 * trace if tracing & network (vs. unix) sockets & and
2150 * non-loopback
2151 */
2152 if (ENTR_SHOULDTRACE &&
2153 (SOCK_CHECK_DOM(so, AF_INET) || SOCK_CHECK_DOM(so, AF_INET6))) {
2154 struct inpcb *inp = sotoinpcb(so);
2155 if (inp->inp_last_outifp != NULL &&
2156 !(inp->inp_last_outifp->if_flags & IFF_LOOPBACK)) {
2157 en_tracing = TRUE;
2158 KERNEL_ENERGYTRACE(kEnTrActKernSockWrite, DBG_FUNC_START,
2159 VM_KERNEL_ADDRPERM(so),
2160 ((so->so_state & SS_NBIO) ? kEnTrFlagNonBlocking : 0),
2161 (int64_t)resid);
2162 orig_resid = resid;
2163 }
2164 }
2165
2166 /*
2167 * Re-injection should not affect process accounting
2168 */
2169 if ((flags & MSG_SKIPCFIL) == 0) {
2170 so_update_last_owner_locked(so, p);
2171 so_update_policy(so);
2172
2173 #if NECP
2174 so_update_necp_policy(so, NULL, addr);
2175 #endif /* NECP */
2176 }
2177
2178 if (so->so_type != SOCK_STREAM && (flags & MSG_OOB) != 0) {
2179 error = EOPNOTSUPP;
2180 goto out_locked;
2181 }
2182
2183 /*
2184 * In theory resid should be unsigned.
2185 * However, space must be signed, as it might be less than 0
2186 * if we over-committed, and we must use a signed comparison
2187 * of space and resid. On the other hand, a negative resid
2188 * causes us to loop sending 0-length segments to the protocol.
2189 *
2190 * Usually, MSG_EOR isn't used on SOCK_STREAM type sockets.
2191 *
2192 * Note: We limit resid to be a positive int value as we use
2193 * imin() to set bytes_to_copy -- radr://14558484
2194 */
2195 if (resid < 0 || resid > INT_MAX ||
2196 (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
2197 error = EINVAL;
2198 goto out_locked;
2199 }
2200
2201 dontroute = (flags & MSG_DONTROUTE) &&
2202 (so->so_options & SO_DONTROUTE) == 0 &&
2203 (so->so_proto->pr_flags & PR_ATOMIC);
2204 OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgsnd);
2205
2206 if (control != NULL) {
2207 clen = control->m_len;
2208 }
2209
2210 if (soreserveheadroom != 0) {
2211 headroom = so->so_pktheadroom;
2212 }
2213
2214 do {
2215 error = sosendcheck(so, addr, resid, clen, atomic, flags,
2216 &sblocked);
2217 if (error) {
2218 goto out_locked;
2219 }
2220
2221 mp = &top;
2222 space = sbspace(&so->so_snd) - clen;
2223 space += ((flags & MSG_OOB) ? 1024 : 0);
2224
2225 do {
2226 if (uio == NULL) {
2227 /*
2228 * Data is prepackaged in "top".
2229 */
2230 resid = 0;
2231 if (flags & MSG_EOR) {
2232 top->m_flags |= M_EOR;
2233 }
2234 } else {
2235 int chainlength;
2236 int bytes_to_copy;
2237 boolean_t jumbocl;
2238 boolean_t bigcl;
2239 int bytes_to_alloc;
2240
2241 bytes_to_copy = imin(resid, space);
2242
2243 bytes_to_alloc = bytes_to_copy;
2244 if (top == NULL) {
2245 bytes_to_alloc += headroom;
2246 }
2247
2248 if (sosendminchain > 0) {
2249 chainlength = 0;
2250 } else {
2251 chainlength = sosendmaxchain;
2252 }
2253
2254 /*
2255 * Use big 4 KB cluster when the outgoing interface
2256 * does not prefer 2 KB clusters
2257 */
2258 bigcl = !(so->so_flags1 & SOF1_IF_2KCL) ||
2259 sosendbigcl_ignore_capab;
2260
2261 /*
2262 * Attempt to use larger than system page-size
2263 * clusters for large writes only if there is
2264 * a jumbo cluster pool and if the socket is
2265 * marked accordingly.
2266 */
2267 jumbocl = sosendjcl && njcl > 0 &&
2268 ((so->so_flags & SOF_MULTIPAGES) ||
2269 sosendjcl_ignore_capab) &&
2270 bigcl;
2271
2272 socket_unlock(so, 0);
2273
2274 do {
2275 int num_needed;
2276 int hdrs_needed = (top == NULL) ? 1 : 0;
2277
2278 /*
2279 * try to maintain a local cache of mbuf
2280 * clusters needed to complete this
2281 * write the list is further limited to
2282 * the number that are currently needed
2283 * to fill the socket this mechanism
2284 * allows a large number of mbufs/
2285 * clusters to be grabbed under a single
2286 * mbuf lock... if we can't get any
2287 * clusters, than fall back to trying
2288 * for mbufs if we fail early (or
2289 * miscalcluate the number needed) make
2290 * sure to release any clusters we
2291 * haven't yet consumed.
2292 */
2293 if (freelist == NULL &&
2294 bytes_to_alloc > MBIGCLBYTES &&
2295 jumbocl) {
2296 num_needed =
2297 bytes_to_alloc / M16KCLBYTES;
2298
2299 if ((bytes_to_alloc -
2300 (num_needed * M16KCLBYTES))
2301 >= MINCLSIZE) {
2302 num_needed++;
2303 }
2304
2305 freelist =
2306 m_getpackets_internal(
2307 (unsigned int *)&num_needed,
2308 hdrs_needed, M_WAIT, 0,
2309 M16KCLBYTES);
2310 /*
2311 * Fall back to 4K cluster size
2312 * if allocation failed
2313 */
2314 }
2315
2316 if (freelist == NULL &&
2317 bytes_to_alloc > MCLBYTES &&
2318 bigcl) {
2319 num_needed =
2320 bytes_to_alloc / MBIGCLBYTES;
2321
2322 if ((bytes_to_alloc -
2323 (num_needed * MBIGCLBYTES)) >=
2324 MINCLSIZE) {
2325 num_needed++;
2326 }
2327
2328 freelist =
2329 m_getpackets_internal(
2330 (unsigned int *)&num_needed,
2331 hdrs_needed, M_WAIT, 0,
2332 MBIGCLBYTES);
2333 /*
2334 * Fall back to cluster size
2335 * if allocation failed
2336 */
2337 }
2338
2339 /*
2340 * Allocate a cluster as we want to
2341 * avoid to split the data in more
2342 * that one segment and using MINCLSIZE
2343 * would lead us to allocate two mbufs
2344 */
2345 if (soreserveheadroom != 0 &&
2346 freelist == NULL &&
2347 ((top == NULL &&
2348 bytes_to_alloc > _MHLEN) ||
2349 bytes_to_alloc > _MLEN)) {
2350 num_needed = ROUNDUP(bytes_to_alloc, MCLBYTES) /
2351 MCLBYTES;
2352 freelist =
2353 m_getpackets_internal(
2354 (unsigned int *)&num_needed,
2355 hdrs_needed, M_WAIT, 0,
2356 MCLBYTES);
2357 /*
2358 * Fall back to a single mbuf
2359 * if allocation failed
2360 */
2361 } else if (freelist == NULL &&
2362 bytes_to_alloc > MINCLSIZE) {
2363 num_needed =
2364 bytes_to_alloc / MCLBYTES;
2365
2366 if ((bytes_to_alloc -
2367 (num_needed * MCLBYTES)) >=
2368 MINCLSIZE) {
2369 num_needed++;
2370 }
2371
2372 freelist =
2373 m_getpackets_internal(
2374 (unsigned int *)&num_needed,
2375 hdrs_needed, M_WAIT, 0,
2376 MCLBYTES);
2377 /*
2378 * Fall back to a single mbuf
2379 * if allocation failed
2380 */
2381 }
2382 /*
2383 * For datagram protocols, leave
2384 * headroom for protocol headers
2385 * in the first cluster of the chain
2386 */
2387 if (freelist != NULL && atomic &&
2388 top == NULL && headroom > 0) {
2389 freelist->m_data += headroom;
2390 }
2391
2392 /*
2393 * Fall back to regular mbufs without
2394 * reserving the socket headroom
2395 */
2396 if (freelist == NULL) {
2397 if (SOCK_TYPE(so) != SOCK_STREAM || bytes_to_alloc <= MINCLSIZE) {
2398 if (top == NULL) {
2399 MGETHDR(freelist,
2400 M_WAIT, MT_DATA);
2401 } else {
2402 MGET(freelist,
2403 M_WAIT, MT_DATA);
2404 }
2405 }
2406
2407 if (freelist == NULL) {
2408 error = ENOBUFS;
2409 socket_lock(so, 0);
2410 goto out_locked;
2411 }
2412 /*
2413 * For datagram protocols,
2414 * leave room for protocol
2415 * headers in first mbuf.
2416 */
2417 if (atomic && top == NULL &&
2418 bytes_to_copy < MHLEN) {
2419 MH_ALIGN(freelist,
2420 bytes_to_copy);
2421 }
2422 }
2423 m = freelist;
2424 freelist = m->m_next;
2425 m->m_next = NULL;
2426
2427 if ((m->m_flags & M_EXT)) {
2428 mlen = m->m_ext.ext_size -
2429 M_LEADINGSPACE(m);
2430 } else if ((m->m_flags & M_PKTHDR)) {
2431 mlen =
2432 MHLEN - M_LEADINGSPACE(m);
2433 } else {
2434 mlen = MLEN - M_LEADINGSPACE(m);
2435 }
2436 len = imin(mlen, bytes_to_copy);
2437
2438 chainlength += len;
2439
2440 space -= len;
2441
2442 error = uiomove(mtod(m, caddr_t),
2443 len, uio);
2444
2445 resid = uio_resid(uio);
2446
2447 m->m_len = len;
2448 *mp = m;
2449 top->m_pkthdr.len += len;
2450 if (error) {
2451 break;
2452 }
2453 mp = &m->m_next;
2454 if (resid <= 0) {
2455 if (flags & MSG_EOR) {
2456 top->m_flags |= M_EOR;
2457 }
2458 break;
2459 }
2460 bytes_to_copy = min(resid, space);
2461 } while (space > 0 &&
2462 (chainlength < sosendmaxchain || atomic ||
2463 resid < MINCLSIZE));
2464
2465 socket_lock(so, 0);
2466
2467 if (error) {
2468 goto out_locked;
2469 }
2470 }
2471
2472 if (dontroute) {
2473 so->so_options |= SO_DONTROUTE;
2474 }
2475
2476 /*
2477 * Compute flags here, for pru_send and NKEs
2478 *
2479 * If the user set MSG_EOF, the protocol
2480 * understands this flag and nothing left to
2481 * send then use PRU_SEND_EOF instead of PRU_SEND.
2482 */
2483 sendflags = (flags & MSG_OOB) ? PRUS_OOB :
2484 ((flags & MSG_EOF) &&
2485 (so->so_proto->pr_flags & PR_IMPLOPCL) &&
2486 (resid <= 0)) ? PRUS_EOF :
2487 /* If there is more to send set PRUS_MORETOCOME */
2488 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0;
2489
2490 if ((flags & MSG_SKIPCFIL) == 0) {
2491 /*
2492 * Socket filter processing
2493 */
2494 error = sflt_data_out(so, addr, &top,
2495 &control, (sendflags & MSG_OOB) ?
2496 sock_data_filt_flag_oob : 0);
2497 if (error) {
2498 if (error == EJUSTRETURN) {
2499 error = 0;
2500 clen = 0;
2501 control = NULL;
2502 top = NULL;
2503 }
2504 goto out_locked;
2505 }
2506 #if CONTENT_FILTER
2507 /*
2508 * Content filter processing
2509 */
2510 error = cfil_sock_data_out(so, addr, top,
2511 control, sendflags);
2512 if (error) {
2513 if (error == EJUSTRETURN) {
2514 error = 0;
2515 clen = 0;
2516 control = NULL;
2517 top = NULL;
2518 }
2519 goto out_locked;
2520 }
2521 #endif /* CONTENT_FILTER */
2522 }
2523 error = (*so->so_proto->pr_usrreqs->pru_send)
2524 (so, sendflags, top, addr, control, p);
2525
2526 if (dontroute) {
2527 so->so_options &= ~SO_DONTROUTE;
2528 }
2529
2530 clen = 0;
2531 control = NULL;
2532 top = NULL;
2533 mp = &top;
2534 if (error) {
2535 goto out_locked;
2536 }
2537 } while (resid && space > 0);
2538 } while (resid);
2539
2540 out_locked:
2541 if (sblocked) {
2542 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
2543 } else {
2544 socket_unlock(so, 1);
2545 }
2546 if (top != NULL) {
2547 m_freem(top);
2548 }
2549 if (control != NULL) {
2550 m_freem(control);
2551 }
2552 if (freelist != NULL) {
2553 m_freem_list(freelist);
2554 }
2555
2556 soclearfastopen(so);
2557
2558 if (en_tracing) {
2559 /* resid passed here is the bytes left in uio */
2560 KERNEL_ENERGYTRACE(kEnTrActKernSockWrite, DBG_FUNC_END,
2561 VM_KERNEL_ADDRPERM(so),
2562 ((error == EWOULDBLOCK) ? kEnTrFlagNoWork : 0),
2563 (int64_t)(orig_resid - resid));
2564 }
2565 KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_END, so, resid,
2566 so->so_snd.sb_cc, space, error);
2567
2568 return error;
2569 }
2570
2571 int
2572 sosend_reinject(struct socket *so, struct sockaddr *addr, struct mbuf *top, struct mbuf *control, uint32_t sendflags)
2573 {
2574 struct mbuf *m0 = NULL, *control_end = NULL;
2575
2576 socket_lock_assert_owned(so);
2577
2578 /*
2579 * top must points to mbuf chain to be sent.
2580 * If control is not NULL, top must be packet header
2581 */
2582 VERIFY(top != NULL &&
2583 (control == NULL || top->m_flags & M_PKTHDR));
2584
2585 /*
2586 * If control is not passed in, see if we can get it
2587 * from top.
2588 */
2589 if (control == NULL && (top->m_flags & M_PKTHDR) == 0) {
2590 // Locate start of control if present and start of data
2591 for (m0 = top; m0 != NULL; m0 = m0->m_next) {
2592 if (m0->m_flags & M_PKTHDR) {
2593 top = m0;
2594 break;
2595 } else if (m0->m_type == MT_CONTROL) {
2596 if (control == NULL) {
2597 // Found start of control
2598 control = m0;
2599 }
2600 if (control != NULL && m0->m_next != NULL && m0->m_next->m_type != MT_CONTROL) {
2601 // Found end of control
2602 control_end = m0;
2603 }
2604 }
2605 }
2606 if (control_end != NULL) {
2607 control_end->m_next = NULL;
2608 }
2609 }
2610
2611 int error = (*so->so_proto->pr_usrreqs->pru_send)
2612 (so, sendflags, top, addr, control, current_proc());
2613
2614 return error;
2615 }
2616
2617 /*
2618 * Supported only connected sockets (no address) without ancillary data
2619 * (control mbuf) for atomic protocols
2620 */
2621 int
2622 sosend_list(struct socket *so, struct uio **uioarray, u_int uiocnt, int flags)
2623 {
2624 struct mbuf *m, *freelist = NULL;
2625 user_ssize_t len, resid;
2626 int error, dontroute, mlen;
2627 int atomic = sosendallatonce(so);
2628 int sblocked = 0;
2629 struct proc *p = current_proc();
2630 u_int uiofirst = 0;
2631 u_int uiolast = 0;
2632 struct mbuf *top = NULL;
2633 uint16_t headroom = 0;
2634 boolean_t bigcl;
2635
2636 KERNEL_DEBUG((DBG_FNC_SOSEND_LIST | DBG_FUNC_START), so, uiocnt,
2637 so->so_snd.sb_cc, so->so_snd.sb_lowat, so->so_snd.sb_hiwat);
2638
2639 if (so->so_type != SOCK_DGRAM) {
2640 error = EINVAL;
2641 goto out;
2642 }
2643 if (atomic == 0) {
2644 error = EINVAL;
2645 goto out;
2646 }
2647 if (so->so_proto->pr_usrreqs->pru_send_list == NULL) {
2648 error = EPROTONOSUPPORT;
2649 goto out;
2650 }
2651 if (flags & ~(MSG_DONTWAIT | MSG_NBIO)) {
2652 error = EINVAL;
2653 goto out;
2654 }
2655 resid = uio_array_resid(uioarray, uiocnt);
2656
2657 /*
2658 * In theory resid should be unsigned.
2659 * However, space must be signed, as it might be less than 0
2660 * if we over-committed, and we must use a signed comparison
2661 * of space and resid. On the other hand, a negative resid
2662 * causes us to loop sending 0-length segments to the protocol.
2663 *
2664 * Note: We limit resid to be a positive int value as we use
2665 * imin() to set bytes_to_copy -- radr://14558484
2666 */
2667 if (resid < 0 || resid > INT_MAX) {
2668 error = EINVAL;
2669 goto out;
2670 }
2671
2672 socket_lock(so, 1);
2673 so_update_last_owner_locked(so, p);
2674 so_update_policy(so);
2675
2676 #if NECP
2677 so_update_necp_policy(so, NULL, NULL);
2678 #endif /* NECP */
2679
2680 dontroute = (flags & MSG_DONTROUTE) &&
2681 (so->so_options & SO_DONTROUTE) == 0 &&
2682 (so->so_proto->pr_flags & PR_ATOMIC);
2683 OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgsnd);
2684
2685 error = sosendcheck(so, NULL, resid, 0, atomic, flags, &sblocked);
2686 if (error) {
2687 goto release;
2688 }
2689
2690 /*
2691 * Use big 4 KB clusters when the outgoing interface does not prefer
2692 * 2 KB clusters
2693 */
2694 bigcl = !(so->so_flags1 & SOF1_IF_2KCL) || sosendbigcl_ignore_capab;
2695
2696 if (soreserveheadroom != 0) {
2697 headroom = so->so_pktheadroom;
2698 }
2699
2700 do {
2701 int i;
2702 int num_needed = 0;
2703 int chainlength;
2704 size_t maxpktlen = 0;
2705 int bytes_to_alloc;
2706
2707 if (sosendminchain > 0) {
2708 chainlength = 0;
2709 } else {
2710 chainlength = sosendmaxchain;
2711 }
2712
2713 socket_unlock(so, 0);
2714
2715 /*
2716 * Find a set of uio that fit in a reasonable number
2717 * of mbuf packets
2718 */
2719 for (i = uiofirst; i < uiocnt; i++) {
2720 struct uio *auio = uioarray[i];
2721
2722 len = uio_resid(auio);
2723
2724 /* Do nothing for empty messages */
2725 if (len == 0) {
2726 continue;
2727 }
2728
2729 num_needed += 1;
2730 uiolast += 1;
2731
2732 if (len > maxpktlen) {
2733 maxpktlen = len;
2734 }
2735
2736 chainlength += len;
2737 if (chainlength > sosendmaxchain) {
2738 break;
2739 }
2740 }
2741 /*
2742 * Nothing left to send
2743 */
2744 if (num_needed == 0) {
2745 socket_lock(so, 0);
2746 break;
2747 }
2748 /*
2749 * Allocate buffer large enough to include headroom space for
2750 * network and link header
2751 *
2752 */
2753 bytes_to_alloc = maxpktlen + headroom;
2754
2755 /*
2756 * Allocate a single contiguous buffer of the smallest available
2757 * size when possible
2758 */
2759 if (bytes_to_alloc > MCLBYTES &&
2760 bytes_to_alloc <= MBIGCLBYTES && bigcl) {
2761 freelist = m_getpackets_internal(
2762 (unsigned int *)&num_needed,
2763 num_needed, M_WAIT, 1,
2764 MBIGCLBYTES);
2765 } else if (bytes_to_alloc > _MHLEN &&
2766 bytes_to_alloc <= MCLBYTES) {
2767 freelist = m_getpackets_internal(
2768 (unsigned int *)&num_needed,
2769 num_needed, M_WAIT, 1,
2770 MCLBYTES);
2771 } else {
2772 freelist = m_allocpacket_internal(
2773 (unsigned int *)&num_needed,
2774 bytes_to_alloc, NULL, M_WAIT, 1, 0);
2775 }
2776
2777 if (freelist == NULL) {
2778 socket_lock(so, 0);
2779 error = ENOMEM;
2780 goto release;
2781 }
2782 /*
2783 * Copy each uio of the set into its own mbuf packet
2784 */
2785 for (i = uiofirst, m = freelist;
2786 i < uiolast && m != NULL;
2787 i++) {
2788 int bytes_to_copy;
2789 struct mbuf *n;
2790 struct uio *auio = uioarray[i];
2791
2792 bytes_to_copy = uio_resid(auio);
2793
2794 /* Do nothing for empty messages */
2795 if (bytes_to_copy == 0) {
2796 continue;
2797 }
2798 /*
2799 * Leave headroom for protocol headers
2800 * in the first mbuf of the chain
2801 */
2802 m->m_data += headroom;
2803
2804 for (n = m; n != NULL; n = n->m_next) {
2805 if ((m->m_flags & M_EXT)) {
2806 mlen = m->m_ext.ext_size -
2807 M_LEADINGSPACE(m);
2808 } else if ((m->m_flags & M_PKTHDR)) {
2809 mlen =
2810 MHLEN - M_LEADINGSPACE(m);
2811 } else {
2812 mlen = MLEN - M_LEADINGSPACE(m);
2813 }
2814 len = imin(mlen, bytes_to_copy);
2815
2816 /*
2817 * Note: uiomove() decrements the iovec
2818 * length
2819 */
2820 error = uiomove(mtod(n, caddr_t),
2821 len, auio);
2822 if (error != 0) {
2823 break;
2824 }
2825 n->m_len = len;
2826 m->m_pkthdr.len += len;
2827
2828 VERIFY(m->m_pkthdr.len <= maxpktlen);
2829
2830 bytes_to_copy -= len;
2831 resid -= len;
2832 }
2833 if (m->m_pkthdr.len == 0) {
2834 printf(
2835 "%s:%d so %llx pkt %llx type %u len null\n",
2836 __func__, __LINE__,
2837 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
2838 (uint64_t)DEBUG_KERNEL_ADDRPERM(m),
2839 m->m_type);
2840 }
2841 if (error != 0) {
2842 break;
2843 }
2844 m = m->m_nextpkt;
2845 }
2846
2847 socket_lock(so, 0);
2848
2849 if (error) {
2850 goto release;
2851 }
2852 top = freelist;
2853 freelist = NULL;
2854
2855 if (dontroute) {
2856 so->so_options |= SO_DONTROUTE;
2857 }
2858
2859 if ((flags & MSG_SKIPCFIL) == 0) {
2860 struct mbuf **prevnextp = NULL;
2861
2862 for (i = uiofirst, m = top;
2863 i < uiolast && m != NULL;
2864 i++) {
2865 struct mbuf *nextpkt = m->m_nextpkt;
2866
2867 /*
2868 * Socket filter processing
2869 */
2870 error = sflt_data_out(so, NULL, &m,
2871 NULL, 0);
2872 if (error != 0 && error != EJUSTRETURN) {
2873 goto release;
2874 }
2875
2876 #if CONTENT_FILTER
2877 if (error == 0) {
2878 /*
2879 * Content filter processing
2880 */
2881 error = cfil_sock_data_out(so, NULL, m,
2882 NULL, 0);
2883 if (error != 0 && error != EJUSTRETURN) {
2884 goto release;
2885 }
2886 }
2887 #endif /* CONTENT_FILTER */
2888 /*
2889 * Remove packet from the list when
2890 * swallowed by a filter
2891 */
2892 if (error == EJUSTRETURN) {
2893 error = 0;
2894 if (prevnextp != NULL) {
2895 *prevnextp = nextpkt;
2896 } else {
2897 top = nextpkt;
2898 }
2899 }
2900
2901 m = nextpkt;
2902 if (m != NULL) {
2903 prevnextp = &m->m_nextpkt;
2904 }
2905 }
2906 }
2907 if (top != NULL) {
2908 error = (*so->so_proto->pr_usrreqs->pru_send_list)
2909 (so, 0, top, NULL, NULL, p);
2910 }
2911
2912 if (dontroute) {
2913 so->so_options &= ~SO_DONTROUTE;
2914 }
2915
2916 top = NULL;
2917 uiofirst = uiolast;
2918 } while (resid > 0 && error == 0);
2919 release:
2920 if (sblocked) {
2921 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
2922 } else {
2923 socket_unlock(so, 1);
2924 }
2925 out:
2926 if (top != NULL) {
2927 m_freem(top);
2928 }
2929 if (freelist != NULL) {
2930 m_freem_list(freelist);
2931 }
2932
2933 KERNEL_DEBUG(DBG_FNC_SOSEND_LIST | DBG_FUNC_END, so, resid,
2934 so->so_snd.sb_cc, 0, error);
2935
2936 return error;
2937 }
2938
2939 /*
2940 * May return ERESTART when packet is dropped by MAC policy check
2941 */
2942 static int
2943 soreceive_addr(struct proc *p, struct socket *so, struct sockaddr **psa,
2944 int flags, struct mbuf **mp, struct mbuf **nextrecordp, int canwait)
2945 {
2946 int error = 0;
2947 struct mbuf *m = *mp;
2948 struct mbuf *nextrecord = *nextrecordp;
2949
2950 KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
2951 #if CONFIG_MACF_SOCKET_SUBSET
2952 /*
2953 * Call the MAC framework for policy checking if we're in
2954 * the user process context and the socket isn't connected.
2955 */
2956 if (p != kernproc && !(so->so_state & SS_ISCONNECTED)) {
2957 struct mbuf *m0 = m;
2958 /*
2959 * Dequeue this record (temporarily) from the receive
2960 * list since we're about to drop the socket's lock
2961 * where a new record may arrive and be appended to
2962 * the list. Upon MAC policy failure, the record
2963 * will be freed. Otherwise, we'll add it back to
2964 * the head of the list. We cannot rely on SB_LOCK
2965 * because append operation uses the socket's lock.
2966 */
2967 do {
2968 m->m_nextpkt = NULL;
2969 sbfree(&so->so_rcv, m);
2970 m = m->m_next;
2971 } while (m != NULL);
2972 m = m0;
2973 so->so_rcv.sb_mb = nextrecord;
2974 SB_EMPTY_FIXUP(&so->so_rcv);
2975 SBLASTRECORDCHK(&so->so_rcv, "soreceive 1a");
2976 SBLASTMBUFCHK(&so->so_rcv, "soreceive 1a");
2977 socket_unlock(so, 0);
2978
2979 if (mac_socket_check_received(proc_ucred(p), so,
2980 mtod(m, struct sockaddr *)) != 0) {
2981 /*
2982 * MAC policy failure; free this record and
2983 * process the next record (or block until
2984 * one is available). We have adjusted sb_cc
2985 * and sb_mbcnt above so there is no need to
2986 * call sbfree() again.
2987 */
2988 m_freem(m);
2989 /*
2990 * Clear SB_LOCK but don't unlock the socket.
2991 * Process the next record or wait for one.
2992 */
2993 socket_lock(so, 0);
2994 sbunlock(&so->so_rcv, TRUE); /* stay locked */
2995 error = ERESTART;
2996 goto done;
2997 }
2998 socket_lock(so, 0);
2999 /*
3000 * If the socket has been defunct'd, drop it.
3001 */
3002 if (so->so_flags & SOF_DEFUNCT) {
3003 m_freem(m);
3004 error = ENOTCONN;
3005 goto done;
3006 }
3007 /*
3008 * Re-adjust the socket receive list and re-enqueue
3009 * the record in front of any packets which may have
3010 * been appended while we dropped the lock.
3011 */
3012 for (m = m0; m->m_next != NULL; m = m->m_next) {
3013 sballoc(&so->so_rcv, m);
3014 }
3015 sballoc(&so->so_rcv, m);
3016 if (so->so_rcv.sb_mb == NULL) {
3017 so->so_rcv.sb_lastrecord = m0;
3018 so->so_rcv.sb_mbtail = m;
3019 }
3020 m = m0;
3021 nextrecord = m->m_nextpkt = so->so_rcv.sb_mb;
3022 so->so_rcv.sb_mb = m;
3023 SBLASTRECORDCHK(&so->so_rcv, "soreceive 1b");
3024 SBLASTMBUFCHK(&so->so_rcv, "soreceive 1b");
3025 }
3026 #endif /* CONFIG_MACF_SOCKET_SUBSET */
3027 if (psa != NULL) {
3028 *psa = dup_sockaddr(mtod(m, struct sockaddr *), canwait);
3029 if ((*psa == NULL) && (flags & MSG_NEEDSA)) {
3030 error = EWOULDBLOCK;
3031 goto done;
3032 }
3033 }
3034 if (flags & MSG_PEEK) {
3035 m = m->m_next;
3036 } else {
3037 sbfree(&so->so_rcv, m);
3038 if (m->m_next == NULL && so->so_rcv.sb_cc != 0) {
3039 panic("%s: about to create invalid socketbuf",
3040 __func__);
3041 /* NOTREACHED */
3042 }
3043 MFREE(m, so->so_rcv.sb_mb);
3044 m = so->so_rcv.sb_mb;
3045 if (m != NULL) {
3046 m->m_nextpkt = nextrecord;
3047 } else {
3048 so->so_rcv.sb_mb = nextrecord;
3049 SB_EMPTY_FIXUP(&so->so_rcv);
3050 }
3051 }
3052 done:
3053 *mp = m;
3054 *nextrecordp = nextrecord;
3055
3056 return error;
3057 }
3058
3059 /*
3060 * Process one or more MT_CONTROL mbufs present before any data mbufs
3061 * in the first mbuf chain on the socket buffer. If MSG_PEEK, we
3062 * just copy the data; if !MSG_PEEK, we call into the protocol to
3063 * perform externalization.
3064 */
3065 static int
3066 soreceive_ctl(struct socket *so, struct mbuf **controlp, int flags,
3067 struct mbuf **mp, struct mbuf **nextrecordp)
3068 {
3069 int error = 0;
3070 struct mbuf *cm = NULL, *cmn;
3071 struct mbuf **cme = &cm;
3072 struct sockbuf *sb_rcv = &so->so_rcv;
3073 struct mbuf **msgpcm = NULL;
3074 struct mbuf *m = *mp;
3075 struct mbuf *nextrecord = *nextrecordp;
3076 struct protosw *pr = so->so_proto;
3077
3078 /*
3079 * Externalizing the control messages would require us to
3080 * drop the socket's lock below. Once we re-acquire the
3081 * lock, the mbuf chain might change. In order to preserve
3082 * consistency, we unlink all control messages from the
3083 * first mbuf chain in one shot and link them separately
3084 * onto a different chain.
3085 */
3086 do {
3087 if (flags & MSG_PEEK) {
3088 if (controlp != NULL) {
3089 if (*controlp == NULL) {
3090 msgpcm = controlp;
3091 }
3092 *controlp = m_copy(m, 0, m->m_len);
3093
3094 /*
3095 * If we failed to allocate an mbuf,
3096 * release any previously allocated
3097 * mbufs for control data. Return
3098 * an error. Keep the mbufs in the
3099 * socket as this is using
3100 * MSG_PEEK flag.
3101 */
3102 if (*controlp == NULL) {
3103 m_freem(*msgpcm);
3104 error = ENOBUFS;
3105 goto done;
3106 }
3107 controlp = &(*controlp)->m_next;
3108 }
3109 m = m->m_next;
3110 } else {
3111 m->m_nextpkt = NULL;
3112 sbfree(sb_rcv, m);
3113 sb_rcv->sb_mb = m->m_next;
3114 m->m_next = NULL;
3115 *cme = m;
3116 cme = &(*cme)->m_next;
3117 m = sb_rcv->sb_mb;
3118 }
3119 } while (m != NULL && m->m_type == MT_CONTROL);
3120
3121 if (!(flags & MSG_PEEK)) {
3122 if (sb_rcv->sb_mb != NULL) {
3123 sb_rcv->sb_mb->m_nextpkt = nextrecord;
3124 } else {
3125 sb_rcv->sb_mb = nextrecord;
3126 SB_EMPTY_FIXUP(sb_rcv);
3127 }
3128 if (nextrecord == NULL) {
3129 sb_rcv->sb_lastrecord = m;
3130 }
3131 }
3132
3133 SBLASTRECORDCHK(&so->so_rcv, "soreceive ctl");
3134 SBLASTMBUFCHK(&so->so_rcv, "soreceive ctl");
3135
3136 while (cm != NULL) {
3137 int cmsg_type;
3138
3139 cmn = cm->m_next;
3140 cm->m_next = NULL;
3141 cmsg_type = mtod(cm, struct cmsghdr *)->cmsg_type;
3142
3143 /*
3144 * Call the protocol to externalize SCM_RIGHTS message
3145 * and return the modified message to the caller upon
3146 * success. Otherwise, all other control messages are
3147 * returned unmodified to the caller. Note that we
3148 * only get into this loop if MSG_PEEK is not set.
3149 */
3150 if (pr->pr_domain->dom_externalize != NULL &&
3151 cmsg_type == SCM_RIGHTS) {
3152 /*
3153 * Release socket lock: see 3903171. This
3154 * would also allow more records to be appended
3155 * to the socket buffer. We still have SB_LOCK
3156 * set on it, so we can be sure that the head
3157 * of the mbuf chain won't change.
3158 */
3159 socket_unlock(so, 0);
3160 error = (*pr->pr_domain->dom_externalize)(cm);
3161 socket_lock(so, 0);
3162 } else {
3163 error = 0;
3164 }
3165
3166 if (controlp != NULL && error == 0) {
3167 *controlp = cm;
3168 controlp = &(*controlp)->m_next;
3169 } else {
3170 (void) m_free(cm);
3171 }
3172 cm = cmn;
3173 }
3174 /*
3175 * Update the value of nextrecord in case we received new
3176 * records when the socket was unlocked above for
3177 * externalizing SCM_RIGHTS.
3178 */
3179 if (m != NULL) {
3180 nextrecord = sb_rcv->sb_mb->m_nextpkt;
3181 } else {
3182 nextrecord = sb_rcv->sb_mb;
3183 }
3184
3185 done:
3186 *mp = m;
3187 *nextrecordp = nextrecord;
3188
3189 return error;
3190 }
3191
3192 /*
3193 * If we have less data than requested, block awaiting more
3194 * (subject to any timeout) if:
3195 * 1. the current count is less than the low water mark, or
3196 * 2. MSG_WAITALL is set, and it is possible to do the entire
3197 * receive operation at once if we block (resid <= hiwat).
3198 * 3. MSG_DONTWAIT is not set
3199 * If MSG_WAITALL is set but resid is larger than the receive buffer,
3200 * we have to do the receive in sections, and thus risk returning
3201 * a short count if a timeout or signal occurs after we start.
3202 */
3203 static boolean_t
3204 so_should_wait(struct socket *so, struct uio *uio, struct mbuf *m, int flags)
3205 {
3206 struct protosw *pr = so->so_proto;
3207
3208 /* No mbufs in the receive-queue? Wait! */
3209 if (m == NULL) {
3210 return true;
3211 }
3212
3213 /* Not enough data in the receive socket-buffer - we may have to wait */
3214 if ((flags & MSG_DONTWAIT) == 0 && so->so_rcv.sb_cc < uio_resid(uio) &&
3215 m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0) {
3216 /*
3217 * Application did set the lowater-mark, so we should wait for
3218 * this data to be present.
3219 */
3220 if (so->so_rcv.sb_cc < so->so_rcv.sb_lowat) {
3221 return true;
3222 }
3223
3224 /*
3225 * Application wants all the data - so let's try to do the
3226 * receive-operation at once by waiting for everything to
3227 * be there.
3228 */
3229 if ((flags & MSG_WAITALL) && uio_resid(uio) <= so->so_rcv.sb_hiwat) {
3230 return true;
3231 }
3232 }
3233
3234 return false;
3235 }
3236
3237 /*
3238 * Implement receive operations on a socket.
3239 * We depend on the way that records are added to the sockbuf
3240 * by sbappend*. In particular, each record (mbufs linked through m_next)
3241 * must begin with an address if the protocol so specifies,
3242 * followed by an optional mbuf or mbufs containing ancillary data,
3243 * and then zero or more mbufs of data.
3244 * In order to avoid blocking network interrupts for the entire time here,
3245 * we splx() while doing the actual copy to user space.
3246 * Although the sockbuf is locked, new data may still be appended,
3247 * and thus we must maintain consistency of the sockbuf during that time.
3248 *
3249 * The caller may receive the data as a single mbuf chain by supplying
3250 * an mbuf **mp0 for use in returning the chain. The uio is then used
3251 * only for the count in uio_resid.
3252 *
3253 * Returns: 0 Success
3254 * ENOBUFS
3255 * ENOTCONN
3256 * EWOULDBLOCK
3257 * uiomove:EFAULT
3258 * sblock:EWOULDBLOCK
3259 * sblock:EINTR
3260 * sbwait:EBADF
3261 * sbwait:EINTR
3262 * sodelayed_copy:EFAULT
3263 * <pru_rcvoob>:EINVAL[TCP]
3264 * <pru_rcvoob>:EWOULDBLOCK[TCP]
3265 * <pru_rcvoob>:???
3266 * <pr_domain->dom_externalize>:EMSGSIZE[AF_UNIX]
3267 * <pr_domain->dom_externalize>:ENOBUFS[AF_UNIX]
3268 * <pr_domain->dom_externalize>:???
3269 *
3270 * Notes: Additional return values from calls through <pru_rcvoob> and
3271 * <pr_domain->dom_externalize> depend on protocols other than
3272 * TCP or AF_UNIX, which are documented above.
3273 */
3274 int
3275 soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
3276 struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
3277 {
3278 struct mbuf *m, **mp, *ml = NULL;
3279 struct mbuf *nextrecord, *free_list;
3280 int flags, error, offset;
3281 user_ssize_t len;
3282 struct protosw *pr = so->so_proto;
3283 int moff, type = 0;
3284 user_ssize_t orig_resid = uio_resid(uio);
3285 user_ssize_t delayed_copy_len;
3286 int can_delay;
3287 struct proc *p = current_proc();
3288 boolean_t en_tracing = FALSE;
3289
3290 /*
3291 * Sanity check on the length passed by caller as we are making 'int'
3292 * comparisons
3293 */
3294 if (orig_resid < 0 || orig_resid > INT_MAX) {
3295 return EINVAL;
3296 }
3297
3298 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_START, so,
3299 uio_resid(uio), so->so_rcv.sb_cc, so->so_rcv.sb_lowat,
3300 so->so_rcv.sb_hiwat);
3301
3302 socket_lock(so, 1);
3303 so_update_last_owner_locked(so, p);
3304 so_update_policy(so);
3305
3306 #ifdef MORE_LOCKING_DEBUG
3307 if (so->so_usecount == 1) {
3308 panic("%s: so=%x no other reference on socket\n", __func__, so);
3309 /* NOTREACHED */
3310 }
3311 #endif
3312 mp = mp0;
3313 if (psa != NULL) {
3314 *psa = NULL;
3315 }
3316 if (controlp != NULL) {
3317 *controlp = NULL;
3318 }
3319 if (flagsp != NULL) {
3320 flags = *flagsp & ~MSG_EOR;
3321 } else {
3322 flags = 0;
3323 }
3324
3325 /*
3326 * If a recv attempt is made on a previously-accepted socket
3327 * that has been marked as inactive (disconnected), reject
3328 * the request.
3329 */
3330 if (so->so_flags & SOF_DEFUNCT) {
3331 struct sockbuf *sb = &so->so_rcv;
3332
3333 error = ENOTCONN;
3334 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] (%d)\n",
3335 __func__, proc_pid(p), proc_best_name(p),
3336 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
3337 SOCK_DOM(so), SOCK_TYPE(so), error);
3338 /*
3339 * This socket should have been disconnected and flushed
3340 * prior to being returned from sodefunct(); there should
3341 * be no data on its receive list, so panic otherwise.
3342 */
3343 if (so->so_state & SS_DEFUNCT) {
3344 sb_empty_assert(sb, __func__);
3345 }
3346 socket_unlock(so, 1);
3347 return error;
3348 }
3349
3350 if ((so->so_flags1 & SOF1_PRECONNECT_DATA) &&
3351 pr->pr_usrreqs->pru_preconnect) {
3352 /*
3353 * A user may set the CONNECT_RESUME_ON_READ_WRITE-flag but not
3354 * calling write() right after this. *If* the app calls a read
3355 * we do not want to block this read indefinetely. Thus,
3356 * we trigger a connect so that the session gets initiated.
3357 */
3358 error = (*pr->pr_usrreqs->pru_preconnect)(so);
3359
3360 if (error) {
3361 socket_unlock(so, 1);
3362 return error;
3363 }
3364 }
3365
3366 if (ENTR_SHOULDTRACE &&
3367 (SOCK_CHECK_DOM(so, AF_INET) || SOCK_CHECK_DOM(so, AF_INET6))) {
3368 /*
3369 * enable energy tracing for inet sockets that go over
3370 * non-loopback interfaces only.
3371 */
3372 struct inpcb *inp = sotoinpcb(so);
3373 if (inp->inp_last_outifp != NULL &&
3374 !(inp->inp_last_outifp->if_flags & IFF_LOOPBACK)) {
3375 en_tracing = TRUE;
3376 KERNEL_ENERGYTRACE(kEnTrActKernSockRead, DBG_FUNC_START,
3377 VM_KERNEL_ADDRPERM(so),
3378 ((so->so_state & SS_NBIO) ?
3379 kEnTrFlagNonBlocking : 0),
3380 (int64_t)orig_resid);
3381 }
3382 }
3383
3384 /*
3385 * When SO_WANTOOBFLAG is set we try to get out-of-band data
3386 * regardless of the flags argument. Here is the case were
3387 * out-of-band data is not inline.
3388 */
3389 if ((flags & MSG_OOB) ||
3390 ((so->so_options & SO_WANTOOBFLAG) != 0 &&
3391 (so->so_options & SO_OOBINLINE) == 0 &&
3392 (so->so_oobmark || (so->so_state & SS_RCVATMARK)))) {
3393 m = m_get(M_WAIT, MT_DATA);
3394 if (m == NULL) {
3395 socket_unlock(so, 1);
3396 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END,
3397 ENOBUFS, 0, 0, 0, 0);
3398 return ENOBUFS;
3399 }
3400 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
3401 if (error) {
3402 goto bad;
3403 }
3404 socket_unlock(so, 0);
3405 do {
3406 error = uiomove(mtod(m, caddr_t),
3407 imin(uio_resid(uio), m->m_len), uio);
3408 m = m_free(m);
3409 } while (uio_resid(uio) && error == 0 && m != NULL);
3410 socket_lock(so, 0);
3411 bad:
3412 if (m != NULL) {
3413 m_freem(m);
3414 }
3415
3416 if ((so->so_options & SO_WANTOOBFLAG) != 0) {
3417 if (error == EWOULDBLOCK || error == EINVAL) {
3418 /*
3419 * Let's try to get normal data:
3420 * EWOULDBLOCK: out-of-band data not
3421 * receive yet. EINVAL: out-of-band data
3422 * already read.
3423 */
3424 error = 0;
3425 goto nooob;
3426 } else if (error == 0 && flagsp != NULL) {
3427 *flagsp |= MSG_OOB;
3428 }
3429 }
3430 socket_unlock(so, 1);
3431 if (en_tracing) {
3432 KERNEL_ENERGYTRACE(kEnTrActKernSockRead, DBG_FUNC_END,
3433 VM_KERNEL_ADDRPERM(so), 0,
3434 (int64_t)(orig_resid - uio_resid(uio)));
3435 }
3436 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,
3437 0, 0, 0, 0);
3438
3439 return error;
3440 }
3441 nooob:
3442 if (mp != NULL) {
3443 *mp = NULL;
3444 }
3445
3446 if (so->so_state & SS_ISCONFIRMING && uio_resid(uio)) {
3447 (*pr->pr_usrreqs->pru_rcvd)(so, 0);
3448 }
3449
3450 free_list = NULL;
3451 delayed_copy_len = 0;
3452 restart:
3453 #ifdef MORE_LOCKING_DEBUG
3454 if (so->so_usecount <= 1) {
3455 printf("soreceive: sblock so=0x%llx ref=%d on socket\n",
3456 (uint64_t)DEBUG_KERNEL_ADDRPERM(so), so->so_usecount);
3457 }
3458 #endif
3459 /*
3460 * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE)
3461 * and if so just return to the caller. This could happen when
3462 * soreceive() is called by a socket upcall function during the
3463 * time the socket is freed. The socket buffer would have been
3464 * locked across the upcall, therefore we cannot put this thread
3465 * to sleep (else we will deadlock) or return EWOULDBLOCK (else
3466 * we may livelock), because the lock on the socket buffer will
3467 * only be released when the upcall routine returns to its caller.
3468 * Because the socket has been officially closed, there can be
3469 * no further read on it.
3470 *
3471 * A multipath subflow socket would have its SS_NOFDREF set by
3472 * default, so check for SOF_MP_SUBFLOW socket flag; when the
3473 * socket is closed for real, SOF_MP_SUBFLOW would be cleared.
3474 */
3475 if ((so->so_state & (SS_NOFDREF | SS_CANTRCVMORE)) ==
3476 (SS_NOFDREF | SS_CANTRCVMORE) && !(so->so_flags & SOF_MP_SUBFLOW)) {
3477 socket_unlock(so, 1);
3478 return 0;
3479 }
3480
3481 error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
3482 if (error) {
3483 socket_unlock(so, 1);
3484 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,
3485 0, 0, 0, 0);
3486 if (en_tracing) {
3487 KERNEL_ENERGYTRACE(kEnTrActKernSockRead, DBG_FUNC_END,
3488 VM_KERNEL_ADDRPERM(so), 0,
3489 (int64_t)(orig_resid - uio_resid(uio)));
3490 }
3491 return error;
3492 }
3493
3494 m = so->so_rcv.sb_mb;
3495 if (so_should_wait(so, uio, m, flags)) {
3496 /*
3497 * Panic if we notice inconsistencies in the socket's
3498 * receive list; both sb_mb and sb_cc should correctly
3499 * reflect the contents of the list, otherwise we may
3500 * end up with false positives during select() or poll()
3501 * which could put the application in a bad state.
3502 */
3503 SB_MB_CHECK(&so->so_rcv);
3504
3505 if (so->so_error) {
3506 if (m != NULL) {
3507 goto dontblock;
3508 }
3509 error = so->so_error;
3510 if ((flags & MSG_PEEK) == 0) {
3511 so->so_error = 0;
3512 }
3513 goto release;
3514 }
3515 if (so->so_state & SS_CANTRCVMORE) {
3516 #if CONTENT_FILTER
3517 /*
3518 * Deal with half closed connections
3519 */
3520 if ((so->so_state & SS_ISDISCONNECTED) == 0 &&
3521 cfil_sock_data_pending(&so->so_rcv) != 0) {
3522 CFIL_LOG(LOG_INFO,
3523 "so %llx ignore SS_CANTRCVMORE",
3524 (uint64_t)DEBUG_KERNEL_ADDRPERM(so));
3525 } else
3526 #endif /* CONTENT_FILTER */
3527 if (m != NULL) {
3528 goto dontblock;
3529 } else {
3530 goto release;
3531 }
3532 }
3533 for (; m != NULL; m = m->m_next) {
3534 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
3535 m = so->so_rcv.sb_mb;
3536 goto dontblock;
3537 }
3538 }
3539 if ((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING)) == 0 &&
3540 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
3541 error = ENOTCONN;
3542 goto release;
3543 }
3544 if (uio_resid(uio) == 0) {
3545 goto release;
3546 }
3547
3548 if ((so->so_state & SS_NBIO) ||
3549 (flags & (MSG_DONTWAIT | MSG_NBIO))) {
3550 error = EWOULDBLOCK;
3551 goto release;
3552 }
3553 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1");
3554 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1");
3555 sbunlock(&so->so_rcv, TRUE); /* keep socket locked */
3556 #if EVEN_MORE_LOCKING_DEBUG
3557 if (socket_debug) {
3558 printf("Waiting for socket data\n");
3559 }
3560 #endif
3561
3562 /*
3563 * Depending on the protocol (e.g. TCP), the following
3564 * might cause the socket lock to be dropped and later
3565 * be reacquired, and more data could have arrived and
3566 * have been appended to the receive socket buffer by
3567 * the time it returns. Therefore, we only sleep in
3568 * sbwait() below if and only if the wait-condition is still
3569 * true.
3570 */
3571 if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb != NULL) {
3572 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
3573 }
3574
3575 error = 0;
3576 if (so_should_wait(so, uio, so->so_rcv.sb_mb, flags)) {
3577 error = sbwait(&so->so_rcv);
3578 }
3579
3580 #if EVEN_MORE_LOCKING_DEBUG
3581 if (socket_debug) {
3582 printf("SORECEIVE - sbwait returned %d\n", error);
3583 }
3584 #endif
3585 if (so->so_usecount < 1) {
3586 panic("%s: after 2nd sblock so=%p ref=%d on socket\n",
3587 __func__, so, so->so_usecount);
3588 /* NOTREACHED */
3589 }
3590 if (error) {
3591 socket_unlock(so, 1);
3592 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,
3593 0, 0, 0, 0);
3594 if (en_tracing) {
3595 KERNEL_ENERGYTRACE(kEnTrActKernSockRead, DBG_FUNC_END,
3596 VM_KERNEL_ADDRPERM(so), 0,
3597 (int64_t)(orig_resid - uio_resid(uio)));
3598 }
3599 return error;
3600 }
3601 goto restart;
3602 }
3603 dontblock:
3604 OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgrcv);
3605 SBLASTRECORDCHK(&so->so_rcv, "soreceive 1");
3606 SBLASTMBUFCHK(&so->so_rcv, "soreceive 1");
3607 nextrecord = m->m_nextpkt;
3608
3609 if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) {
3610 error = soreceive_addr(p, so, psa, flags, &m, &nextrecord,
3611 mp0 == NULL);
3612 if (error == ERESTART) {
3613 goto restart;
3614 } else if (error != 0) {
3615 goto release;
3616 }
3617 orig_resid = 0;
3618 }
3619
3620 /*
3621 * Process one or more MT_CONTROL mbufs present before any data mbufs
3622 * in the first mbuf chain on the socket buffer. If MSG_PEEK, we
3623 * just copy the data; if !MSG_PEEK, we call into the protocol to
3624 * perform externalization.
3625 */
3626 if (m != NULL && m->m_type == MT_CONTROL) {
3627 error = soreceive_ctl(so, controlp, flags, &m, &nextrecord);
3628 if (error != 0) {
3629 goto release;
3630 }
3631 orig_resid = 0;
3632 }
3633
3634 if (m != NULL) {
3635 if (!(flags & MSG_PEEK)) {
3636 /*
3637 * We get here because m points to an mbuf following
3638 * any MT_SONAME or MT_CONTROL mbufs which have been
3639 * processed above. In any case, m should be pointing
3640 * to the head of the mbuf chain, and the nextrecord
3641 * should be either NULL or equal to m->m_nextpkt.
3642 * See comments above about SB_LOCK.
3643 */
3644 if (m != so->so_rcv.sb_mb ||
3645 m->m_nextpkt != nextrecord) {
3646 panic("%s: post-control !sync so=%p m=%p "
3647 "nextrecord=%p\n", __func__, so, m,
3648 nextrecord);
3649 /* NOTREACHED */
3650 }
3651 if (nextrecord == NULL) {
3652 so->so_rcv.sb_lastrecord = m;
3653 }
3654 }
3655 type = m->m_type;
3656 if (type == MT_OOBDATA) {
3657 flags |= MSG_OOB;
3658 }
3659 } else {
3660 if (!(flags & MSG_PEEK)) {
3661 SB_EMPTY_FIXUP(&so->so_rcv);
3662 }
3663 }
3664 SBLASTRECORDCHK(&so->so_rcv, "soreceive 2");
3665 SBLASTMBUFCHK(&so->so_rcv, "soreceive 2");
3666
3667 moff = 0;
3668 offset = 0;
3669
3670 if (!(flags & MSG_PEEK) && uio_resid(uio) > sorecvmincopy) {
3671 can_delay = 1;
3672 } else {
3673 can_delay = 0;
3674 }
3675
3676 while (m != NULL &&
3677 (uio_resid(uio) - delayed_copy_len) > 0 && error == 0) {
3678 if (m->m_type == MT_OOBDATA) {
3679 if (type != MT_OOBDATA) {
3680 break;
3681 }
3682 } else if (type == MT_OOBDATA) {
3683 break;
3684 }
3685 /*
3686 * Make sure to allways set MSG_OOB event when getting
3687 * out of band data inline.
3688 */
3689 if ((so->so_options & SO_WANTOOBFLAG) != 0 &&
3690 (so->so_options & SO_OOBINLINE) != 0 &&
3691 (so->so_state & SS_RCVATMARK) != 0) {
3692 flags |= MSG_OOB;
3693 }
3694 so->so_state &= ~SS_RCVATMARK;
3695 len = uio_resid(uio) - delayed_copy_len;
3696 if (so->so_oobmark && len > so->so_oobmark - offset) {
3697 len = so->so_oobmark - offset;
3698 }
3699 if (len > m->m_len - moff) {
3700 len = m->m_len - moff;
3701 }
3702 /*
3703 * If mp is set, just pass back the mbufs.
3704 * Otherwise copy them out via the uio, then free.
3705 * Sockbuf must be consistent here (points to current mbuf,
3706 * it points to next record) when we drop priority;
3707 * we must note any additions to the sockbuf when we
3708 * block interrupts again.
3709 */
3710 if (mp == NULL) {
3711 SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove");
3712 SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove");
3713 if (can_delay && len == m->m_len) {
3714 /*
3715 * only delay the copy if we're consuming the
3716 * mbuf and we're NOT in MSG_PEEK mode
3717 * and we have enough data to make it worthwile
3718 * to drop and retake the lock... can_delay
3719 * reflects the state of the 2 latter
3720 * constraints moff should always be zero
3721 * in these cases
3722 */
3723 delayed_copy_len += len;
3724 } else {
3725 if (delayed_copy_len) {
3726 error = sodelayed_copy(so, uio,
3727 &free_list, &delayed_copy_len);
3728
3729 if (error) {
3730 goto release;
3731 }
3732 /*
3733 * can only get here if MSG_PEEK is not
3734 * set therefore, m should point at the
3735 * head of the rcv queue; if it doesn't,
3736 * it means something drastically
3737 * changed while we were out from behind
3738 * the lock in sodelayed_copy. perhaps
3739 * a RST on the stream. in any event,
3740 * the stream has been interrupted. it's
3741 * probably best just to return whatever
3742 * data we've moved and let the caller
3743 * sort it out...
3744 */
3745 if (m != so->so_rcv.sb_mb) {
3746 break;
3747 }
3748 }
3749 socket_unlock(so, 0);
3750 error = uiomove(mtod(m, caddr_t) + moff,
3751 (int)len, uio);
3752 socket_lock(so, 0);
3753
3754 if (error) {
3755 goto release;
3756 }
3757 }
3758 } else {
3759 uio_setresid(uio, (uio_resid(uio) - len));
3760 }
3761 if (len == m->m_len - moff) {
3762 if (m->m_flags & M_EOR) {
3763 flags |= MSG_EOR;
3764 }
3765 if (flags & MSG_PEEK) {
3766 m = m->m_next;
3767 moff = 0;
3768 } else {
3769 nextrecord = m->m_nextpkt;
3770 sbfree(&so->so_rcv, m);
3771 m->m_nextpkt = NULL;
3772
3773 if (mp != NULL) {
3774 *mp = m;
3775 mp = &m->m_next;
3776 so->so_rcv.sb_mb = m = m->m_next;
3777 *mp = NULL;
3778 } else {
3779 if (free_list == NULL) {
3780 free_list = m;
3781 } else {
3782 ml->m_next = m;
3783 }
3784 ml = m;
3785 so->so_rcv.sb_mb = m = m->m_next;
3786 ml->m_next = NULL;
3787 }
3788 if (m != NULL) {
3789 m->m_nextpkt = nextrecord;
3790 if (nextrecord == NULL) {
3791 so->so_rcv.sb_lastrecord = m;
3792 }
3793 } else {
3794 so->so_rcv.sb_mb = nextrecord;
3795 SB_EMPTY_FIXUP(&so->so_rcv);
3796 }
3797 SBLASTRECORDCHK(&so->so_rcv, "soreceive 3");
3798 SBLASTMBUFCHK(&so->so_rcv, "soreceive 3");
3799 }
3800 } else {
3801 if (flags & MSG_PEEK) {
3802 moff += len;
3803 } else {
3804 if (mp != NULL) {
3805 int copy_flag;
3806
3807 if (flags & MSG_DONTWAIT) {
3808 copy_flag = M_DONTWAIT;
3809 } else {
3810 copy_flag = M_WAIT;
3811 }
3812 *mp = m_copym(m, 0, len, copy_flag);
3813 /*
3814 * Failed to allocate an mbuf?
3815 * Adjust uio_resid back, it was
3816 * adjusted down by len bytes which
3817 * we didn't copy over.
3818 */
3819 if (*mp == NULL) {
3820 uio_setresid(uio,
3821 (uio_resid(uio) + len));
3822 break;
3823 }
3824 }
3825 m->m_data += len;
3826 m->m_len -= len;
3827 so->so_rcv.sb_cc -= len;
3828 }
3829 }
3830 if (so->so_oobmark) {
3831 if ((flags & MSG_PEEK) == 0) {
3832 so->so_oobmark -= len;
3833 if (so->so_oobmark == 0) {
3834 so->so_state |= SS_RCVATMARK;
3835 break;
3836 }
3837 } else {
3838 offset += len;
3839 if (offset == so->so_oobmark) {
3840 break;
3841 }
3842 }
3843 }
3844 if (flags & MSG_EOR) {
3845 break;
3846 }
3847 /*
3848 * If the MSG_WAITALL or MSG_WAITSTREAM flag is set
3849 * (for non-atomic socket), we must not quit until
3850 * "uio->uio_resid == 0" or an error termination.
3851 * If a signal/timeout occurs, return with a short
3852 * count but without error. Keep sockbuf locked
3853 * against other readers.
3854 */
3855 while (flags & (MSG_WAITALL | MSG_WAITSTREAM) && m == NULL &&
3856 (uio_resid(uio) - delayed_copy_len) > 0 &&
3857 !sosendallatonce(so) && !nextrecord) {
3858 if (so->so_error || ((so->so_state & SS_CANTRCVMORE)
3859 #if CONTENT_FILTER
3860 && cfil_sock_data_pending(&so->so_rcv) == 0
3861 #endif /* CONTENT_FILTER */
3862 )) {
3863 goto release;
3864 }
3865
3866 /*
3867 * Depending on the protocol (e.g. TCP), the following
3868 * might cause the socket lock to be dropped and later
3869 * be reacquired, and more data could have arrived and
3870 * have been appended to the receive socket buffer by
3871 * the time it returns. Therefore, we only sleep in
3872 * sbwait() below if and only if the socket buffer is
3873 * empty, in order to avoid a false sleep.
3874 */
3875 if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb != NULL) {
3876 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
3877 }
3878
3879 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2");
3880 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2");
3881
3882 if (so->so_rcv.sb_mb == NULL && sbwait(&so->so_rcv)) {
3883 error = 0;
3884 goto release;
3885 }
3886 /*
3887 * have to wait until after we get back from the sbwait
3888 * to do the copy because we will drop the lock if we
3889 * have enough data that has been delayed... by dropping
3890 * the lock we open up a window allowing the netisr
3891 * thread to process the incoming packets and to change
3892 * the state of this socket... we're issuing the sbwait
3893 * because the socket is empty and we're expecting the
3894 * netisr thread to wake us up when more packets arrive;
3895 * if we allow that processing to happen and then sbwait
3896 * we could stall forever with packets sitting in the
3897 * socket if no further packets arrive from the remote
3898 * side.
3899 *
3900 * we want to copy before we've collected all the data
3901 * to satisfy this request to allow the copy to overlap
3902 * the incoming packet processing on an MP system
3903 */
3904 if (delayed_copy_len > sorecvmincopy &&
3905 (delayed_copy_len > (so->so_rcv.sb_hiwat / 2))) {
3906 error = sodelayed_copy(so, uio,
3907 &free_list, &delayed_copy_len);
3908
3909 if (error) {
3910 goto release;
3911 }
3912 }
3913 m = so->so_rcv.sb_mb;
3914 if (m != NULL) {
3915 nextrecord = m->m_nextpkt;
3916 }
3917 SB_MB_CHECK(&so->so_rcv);
3918 }
3919 }
3920 #ifdef MORE_LOCKING_DEBUG
3921 if (so->so_usecount <= 1) {
3922 panic("%s: after big while so=%p ref=%d on socket\n",
3923 __func__, so, so->so_usecount);
3924 /* NOTREACHED */
3925 }
3926 #endif
3927
3928 if (m != NULL && pr->pr_flags & PR_ATOMIC) {
3929 if (so->so_options & SO_DONTTRUNC) {
3930 flags |= MSG_RCVMORE;
3931 } else {
3932 flags |= MSG_TRUNC;
3933 if ((flags & MSG_PEEK) == 0) {
3934 (void) sbdroprecord(&so->so_rcv);
3935 }
3936 }
3937 }
3938
3939 /*
3940 * pru_rcvd below (for TCP) may cause more data to be received
3941 * if the socket lock is dropped prior to sending the ACK; some
3942 * legacy OpenTransport applications don't handle this well
3943 * (if it receives less data than requested while MSG_HAVEMORE
3944 * is set), and so we set the flag now based on what we know
3945 * prior to calling pru_rcvd.
3946 */
3947 if ((so->so_options & SO_WANTMORE) && so->so_rcv.sb_cc > 0) {
3948 flags |= MSG_HAVEMORE;
3949 }
3950
3951 if ((flags & MSG_PEEK) == 0) {
3952 if (m == NULL) {
3953 so->so_rcv.sb_mb = nextrecord;
3954 /*
3955 * First part is an inline SB_EMPTY_FIXUP(). Second
3956 * part makes sure sb_lastrecord is up-to-date if
3957 * there is still data in the socket buffer.
3958 */
3959 if (so->so_rcv.sb_mb == NULL) {
3960 so->so_rcv.sb_mbtail = NULL;
3961 so->so_rcv.sb_lastrecord = NULL;
3962 } else if (nextrecord->m_nextpkt == NULL) {
3963 so->so_rcv.sb_lastrecord = nextrecord;
3964 }
3965 SB_MB_CHECK(&so->so_rcv);
3966 }
3967 SBLASTRECORDCHK(&so->so_rcv, "soreceive 4");
3968 SBLASTMBUFCHK(&so->so_rcv, "soreceive 4");
3969 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) {
3970 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
3971 }
3972 }
3973
3974 if (delayed_copy_len) {
3975 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
3976 if (error) {
3977 goto release;
3978 }
3979 }
3980 if (free_list != NULL) {
3981 m_freem_list(free_list);
3982 free_list = NULL;
3983 }
3984
3985 if (orig_resid == uio_resid(uio) && orig_resid &&
3986 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
3987 sbunlock(&so->so_rcv, TRUE); /* keep socket locked */
3988 goto restart;
3989 }
3990
3991 if (flagsp != NULL) {
3992 *flagsp |= flags;
3993 }
3994 release:
3995 #ifdef MORE_LOCKING_DEBUG
3996 if (so->so_usecount <= 1) {
3997 panic("%s: release so=%p ref=%d on socket\n", __func__,
3998 so, so->so_usecount);
3999 /* NOTREACHED */
4000 }
4001 #endif
4002 if (delayed_copy_len) {
4003 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
4004 }
4005
4006 if (free_list != NULL) {
4007 m_freem_list(free_list);
4008 }
4009
4010 sbunlock(&so->so_rcv, FALSE); /* will unlock socket */
4011
4012 if (en_tracing) {
4013 KERNEL_ENERGYTRACE(kEnTrActKernSockRead, DBG_FUNC_END,
4014 VM_KERNEL_ADDRPERM(so),
4015 ((error == EWOULDBLOCK) ? kEnTrFlagNoWork : 0),
4016 (int64_t)(orig_resid - uio_resid(uio)));
4017 }
4018 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, so, uio_resid(uio),
4019 so->so_rcv.sb_cc, 0, error);
4020
4021 return error;
4022 }
4023
4024 /*
4025 * Returns: 0 Success
4026 * uiomove:EFAULT
4027 */
4028 static int
4029 sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list,
4030 user_ssize_t *resid)
4031 {
4032 int error = 0;
4033 struct mbuf *m;
4034
4035 m = *free_list;
4036
4037 socket_unlock(so, 0);
4038
4039 while (m != NULL && error == 0) {
4040 error = uiomove(mtod(m, caddr_t), (int)m->m_len, uio);
4041 m = m->m_next;
4042 }
4043 m_freem_list(*free_list);
4044
4045 *free_list = NULL;
4046 *resid = 0;
4047
4048 socket_lock(so, 0);
4049
4050 return error;
4051 }
4052
4053 static int
4054 sodelayed_copy_list(struct socket *so, struct recv_msg_elem *msgarray,
4055 u_int uiocnt, struct mbuf **free_list, user_ssize_t *resid)
4056 {
4057 #pragma unused(so)
4058 int error = 0;
4059 struct mbuf *ml, *m;
4060 int i = 0;
4061 struct uio *auio;
4062
4063 for (ml = *free_list, i = 0; ml != NULL && i < uiocnt;
4064 ml = ml->m_nextpkt, i++) {
4065 auio = msgarray[i].uio;
4066 for (m = ml; m != NULL; m = m->m_next) {
4067 error = uiomove(mtod(m, caddr_t), m->m_len, auio);
4068 if (error != 0) {
4069 goto out;
4070 }
4071 }
4072 }
4073 out:
4074 m_freem_list(*free_list);
4075
4076 *free_list = NULL;
4077 *resid = 0;
4078
4079 return error;
4080 }
4081
4082 int
4083 soreceive_list(struct socket *so, struct recv_msg_elem *msgarray, u_int uiocnt,
4084 int *flagsp)
4085 {
4086 struct mbuf *m;
4087 struct mbuf *nextrecord;
4088 struct mbuf *ml = NULL, *free_list = NULL, *free_tail = NULL;
4089 int error;
4090 user_ssize_t len, pktlen, delayed_copy_len = 0;
4091 struct protosw *pr = so->so_proto;
4092 user_ssize_t resid;
4093 struct proc *p = current_proc();
4094 struct uio *auio = NULL;
4095 int npkts = 0;
4096 int sblocked = 0;
4097 struct sockaddr **psa = NULL;
4098 struct mbuf **controlp = NULL;
4099 int can_delay;
4100 int flags;
4101 struct mbuf *free_others = NULL;
4102
4103 KERNEL_DEBUG(DBG_FNC_SORECEIVE_LIST | DBG_FUNC_START,
4104 so, uiocnt,
4105 so->so_rcv.sb_cc, so->so_rcv.sb_lowat, so->so_rcv.sb_hiwat);
4106
4107 /*
4108 * Sanity checks:
4109 * - Only supports don't wait flags
4110 * - Only support datagram sockets (could be extended to raw)
4111 * - Must be atomic
4112 * - Protocol must support packet chains
4113 * - The uio array is NULL (should we panic?)
4114 */
4115 if (flagsp != NULL) {
4116 flags = *flagsp;
4117 } else {
4118 flags = 0;
4119 }
4120 if (flags & ~(MSG_PEEK | MSG_WAITALL | MSG_DONTWAIT | MSG_NEEDSA |
4121 MSG_NBIO)) {
4122 printf("%s invalid flags 0x%x\n", __func__, flags);
4123 error = EINVAL;
4124 goto out;
4125 }
4126 if (so->so_type != SOCK_DGRAM) {
4127 error = EINVAL;
4128 goto out;
4129 }
4130 if (sosendallatonce(so) == 0) {
4131 error = EINVAL;
4132 goto out;
4133 }
4134 if (so->so_proto->pr_usrreqs->pru_send_list == NULL) {
4135 error = EPROTONOSUPPORT;
4136 goto out;
4137 }
4138 if (msgarray == NULL) {
4139 printf("%s uioarray is NULL\n", __func__);
4140 error = EINVAL;
4141 goto out;
4142 }
4143 if (uiocnt == 0) {
4144 printf("%s uiocnt is 0\n", __func__);
4145 error = EINVAL;
4146 goto out;
4147 }
4148 /*
4149 * Sanity check on the length passed by caller as we are making 'int'
4150 * comparisons
4151 */
4152 resid = recv_msg_array_resid(msgarray, uiocnt);
4153 if (resid < 0 || resid > INT_MAX) {
4154 error = EINVAL;
4155 goto out;
4156 }
4157
4158 if (!(flags & MSG_PEEK) && sorecvmincopy > 0) {
4159 can_delay = 1;
4160 } else {
4161 can_delay = 0;
4162 }
4163
4164 socket_lock(so, 1);
4165 so_update_last_owner_locked(so, p);
4166 so_update_policy(so);
4167
4168 #if NECP
4169 so_update_necp_policy(so, NULL, NULL);
4170 #endif /* NECP */
4171
4172 /*
4173 * If a recv attempt is made on a previously-accepted socket
4174 * that has been marked as inactive (disconnected), reject
4175 * the request.
4176 */
4177 if (so->so_flags & SOF_DEFUNCT) {
4178 struct sockbuf *sb = &so->so_rcv;
4179
4180 error = ENOTCONN;
4181 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] (%d)\n",
4182 __func__, proc_pid(p), proc_best_name(p),
4183 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
4184 SOCK_DOM(so), SOCK_TYPE(so), error);
4185 /*
4186 * This socket should have been disconnected and flushed
4187 * prior to being returned from sodefunct(); there should
4188 * be no data on its receive list, so panic otherwise.
4189 */
4190 if (so->so_state & SS_DEFUNCT) {
4191 sb_empty_assert(sb, __func__);
4192 }
4193 goto release;
4194 }
4195
4196 next:
4197 /*
4198 * The uio may be empty
4199 */
4200 if (npkts >= uiocnt) {
4201 error = 0;
4202 goto release;
4203 }
4204 restart:
4205 /*
4206 * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE)
4207 * and if so just return to the caller. This could happen when
4208 * soreceive() is called by a socket upcall function during the
4209 * time the socket is freed. The socket buffer would have been
4210 * locked across the upcall, therefore we cannot put this thread
4211 * to sleep (else we will deadlock) or return EWOULDBLOCK (else
4212 * we may livelock), because the lock on the socket buffer will
4213 * only be released when the upcall routine returns to its caller.
4214 * Because the socket has been officially closed, there can be
4215 * no further read on it.
4216 */
4217 if ((so->so_state & (SS_NOFDREF | SS_CANTRCVMORE)) ==
4218 (SS_NOFDREF | SS_CANTRCVMORE)) {
4219 error = 0;
4220 goto release;
4221 }
4222
4223 error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
4224 if (error) {
4225 goto release;
4226 }
4227 sblocked = 1;
4228
4229 m = so->so_rcv.sb_mb;
4230 /*
4231 * Block awaiting more datagram if needed
4232 */
4233 if (m == NULL || (((flags & MSG_DONTWAIT) == 0 &&
4234 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
4235 ((flags & MSG_WAITALL) && npkts < uiocnt))))) {
4236 /*
4237 * Panic if we notice inconsistencies in the socket's
4238 * receive list; both sb_mb and sb_cc should correctly
4239 * reflect the contents of the list, otherwise we may
4240 * end up with false positives during select() or poll()
4241 * which could put the application in a bad state.
4242 */
4243 SB_MB_CHECK(&so->so_rcv);
4244
4245 if (so->so_error) {
4246 error = so->so_error;
4247 if ((flags & MSG_PEEK) == 0) {
4248 so->so_error = 0;
4249 }
4250 goto release;
4251 }
4252 if (so->so_state & SS_CANTRCVMORE) {
4253 goto release;
4254 }
4255 if ((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING)) == 0 &&
4256 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
4257 error = ENOTCONN;
4258 goto release;
4259 }
4260 if ((so->so_state & SS_NBIO) ||
4261 (flags & (MSG_DONTWAIT | MSG_NBIO))) {
4262 error = EWOULDBLOCK;
4263 goto release;
4264 }
4265 /*
4266 * Do not block if we got some data
4267 */
4268 if (free_list != NULL) {
4269 error = 0;
4270 goto release;
4271 }
4272
4273 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1");
4274 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1");
4275
4276 sbunlock(&so->so_rcv, TRUE); /* keep socket locked */
4277 sblocked = 0;
4278
4279 error = sbwait(&so->so_rcv);
4280 if (error) {
4281 goto release;
4282 }
4283 goto restart;
4284 }
4285
4286 OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgrcv);
4287 SBLASTRECORDCHK(&so->so_rcv, "soreceive 1");
4288 SBLASTMBUFCHK(&so->so_rcv, "soreceive 1");
4289
4290 /*
4291 * Consume the current uio index as we have a datagram
4292 */
4293 auio = msgarray[npkts].uio;
4294 resid = uio_resid(auio);
4295 msgarray[npkts].which |= SOCK_MSG_DATA;
4296 psa = (msgarray[npkts].which & SOCK_MSG_SA) ?
4297 &msgarray[npkts].psa : NULL;
4298 controlp = (msgarray[npkts].which & SOCK_MSG_CONTROL) ?
4299 &msgarray[npkts].controlp : NULL;
4300 npkts += 1;
4301 nextrecord = m->m_nextpkt;
4302
4303 if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) {
4304 error = soreceive_addr(p, so, psa, flags, &m, &nextrecord, 1);
4305 if (error == ERESTART) {
4306 goto restart;
4307 } else if (error != 0) {
4308 goto release;
4309 }
4310 }
4311
4312 if (m != NULL && m->m_type == MT_CONTROL) {
4313 error = soreceive_ctl(so, controlp, flags, &m, &nextrecord);
4314 if (error != 0) {
4315 goto release;
4316 }
4317 }
4318
4319 if (m->m_pkthdr.len == 0) {
4320 printf("%s:%d so %llx pkt %llx type %u pktlen null\n",
4321 __func__, __LINE__,
4322 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
4323 (uint64_t)DEBUG_KERNEL_ADDRPERM(m),
4324 m->m_type);
4325 }
4326
4327 /*
4328 * Loop to copy the mbufs of the current record
4329 * Support zero length packets
4330 */
4331 ml = NULL;
4332 pktlen = 0;
4333 while (m != NULL && (len = resid - pktlen) >= 0 && error == 0) {
4334 if (m->m_len == 0) {
4335 panic("%p m_len zero", m);
4336 }
4337 if (m->m_type == 0) {
4338 panic("%p m_type zero", m);
4339 }
4340 /*
4341 * Clip to the residual length
4342 */
4343 if (len > m->m_len) {
4344 len = m->m_len;
4345 }
4346 pktlen += len;
4347 /*
4348 * Copy the mbufs via the uio or delay the copy
4349 * Sockbuf must be consistent here (points to current mbuf,
4350 * it points to next record) when we drop priority;
4351 * we must note any additions to the sockbuf when we
4352 * block interrupts again.
4353 */
4354 if (len > 0 && can_delay == 0) {
4355 socket_unlock(so, 0);
4356 error = uiomove(mtod(m, caddr_t), (int)len, auio);
4357 socket_lock(so, 0);
4358 if (error) {
4359 goto release;
4360 }
4361 } else {
4362 delayed_copy_len += len;
4363 }
4364
4365 if (len == m->m_len) {
4366 /*
4367 * m was entirely copied
4368 */
4369 sbfree(&so->so_rcv, m);
4370 nextrecord = m->m_nextpkt;
4371 m->m_nextpkt = NULL;
4372
4373 /*
4374 * Set the first packet to the head of the free list
4375 */
4376 if (free_list == NULL) {
4377 free_list = m;
4378 }
4379 /*
4380 * Link current packet to tail of free list
4381 */
4382 if (ml == NULL) {
4383 if (free_tail != NULL) {
4384 free_tail->m_nextpkt = m;
4385 }
4386 free_tail = m;
4387 }
4388 /*
4389 * Link current mbuf to last mbuf of current packet
4390 */
4391 if (ml != NULL) {
4392 ml->m_next = m;
4393 }
4394 ml = m;
4395
4396 /*
4397 * Move next buf to head of socket buffer
4398 */
4399 so->so_rcv.sb_mb = m = ml->m_next;
4400 ml->m_next = NULL;
4401
4402 if (m != NULL) {
4403 m->m_nextpkt = nextrecord;
4404 if (nextrecord == NULL) {
4405 so->so_rcv.sb_lastrecord = m;
4406 }
4407 } else {
4408 so->so_rcv.sb_mb = nextrecord;
4409 SB_EMPTY_FIXUP(&so->so_rcv);
4410 }
4411 SBLASTRECORDCHK(&so->so_rcv, "soreceive 3");
4412 SBLASTMBUFCHK(&so->so_rcv, "soreceive 3");
4413 } else {
4414 /*
4415 * Stop the loop on partial copy
4416 */
4417 break;
4418 }
4419 }
4420 #ifdef MORE_LOCKING_DEBUG
4421 if (so->so_usecount <= 1) {
4422 panic("%s: after big while so=%llx ref=%d on socket\n",
4423 __func__,
4424 (uint64_t)DEBUG_KERNEL_ADDRPERM(so), so->so_usecount);
4425 /* NOTREACHED */
4426 }
4427 #endif
4428 /*
4429 * Tell the caller we made a partial copy
4430 */
4431 if (m != NULL) {
4432 if (so->so_options & SO_DONTTRUNC) {
4433 /*
4434 * Copyout first the freelist then the partial mbuf
4435 */
4436 socket_unlock(so, 0);
4437 if (delayed_copy_len) {
4438 error = sodelayed_copy_list(so, msgarray,
4439 uiocnt, &free_list, &delayed_copy_len);
4440 }
4441
4442 if (error == 0) {
4443 error = uiomove(mtod(m, caddr_t), (int)len,
4444 auio);
4445 }
4446 socket_lock(so, 0);
4447 if (error) {
4448 goto release;
4449 }
4450
4451 m->m_data += len;
4452 m->m_len -= len;
4453 so->so_rcv.sb_cc -= len;
4454 flags |= MSG_RCVMORE;
4455 } else {
4456 (void) sbdroprecord(&so->so_rcv);
4457 nextrecord = so->so_rcv.sb_mb;
4458 m = NULL;
4459 flags |= MSG_TRUNC;
4460 }
4461 }
4462
4463 if (m == NULL) {
4464 so->so_rcv.sb_mb = nextrecord;
4465 /*
4466 * First part is an inline SB_EMPTY_FIXUP(). Second
4467 * part makes sure sb_lastrecord is up-to-date if
4468 * there is still data in the socket buffer.
4469 */
4470 if (so->so_rcv.sb_mb == NULL) {
4471 so->so_rcv.sb_mbtail = NULL;
4472 so->so_rcv.sb_lastrecord = NULL;
4473 } else if (nextrecord->m_nextpkt == NULL) {
4474 so->so_rcv.sb_lastrecord = nextrecord;
4475 }
4476 SB_MB_CHECK(&so->so_rcv);
4477 }
4478 SBLASTRECORDCHK(&so->so_rcv, "soreceive 4");
4479 SBLASTMBUFCHK(&so->so_rcv, "soreceive 4");
4480
4481 /*
4482 * We can continue to the next packet as long as:
4483 * - We haven't exhausted the uio array
4484 * - There was no error
4485 * - A packet was not truncated
4486 * - We can still receive more data
4487 */
4488 if (npkts < uiocnt && error == 0 &&
4489 (flags & (MSG_RCVMORE | MSG_TRUNC)) == 0 &&
4490 (so->so_state & SS_CANTRCVMORE) == 0) {
4491 sbunlock(&so->so_rcv, TRUE); /* keep socket locked */
4492 sblocked = 0;
4493
4494 goto next;
4495 }
4496 if (flagsp != NULL) {
4497 *flagsp |= flags;
4498 }
4499
4500 release:
4501 /*
4502 * pru_rcvd may cause more data to be received if the socket lock
4503 * is dropped so we set MSG_HAVEMORE now based on what we know.
4504 * That way the caller won't be surprised if it receives less data
4505 * than requested.
4506 */
4507 if ((so->so_options & SO_WANTMORE) && so->so_rcv.sb_cc > 0) {
4508 flags |= MSG_HAVEMORE;
4509 }
4510
4511 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) {
4512 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
4513 }
4514
4515 if (sblocked) {
4516 sbunlock(&so->so_rcv, FALSE); /* will unlock socket */
4517 } else {
4518 socket_unlock(so, 1);
4519 }
4520
4521 if (delayed_copy_len) {
4522 error = sodelayed_copy_list(so, msgarray, uiocnt,
4523 &free_list, &delayed_copy_len);
4524 }
4525 out:
4526 /*
4527 * Amortize the cost of freeing the mbufs
4528 */
4529 if (free_list != NULL) {
4530 m_freem_list(free_list);
4531 }
4532 if (free_others != NULL) {
4533 m_freem_list(free_others);
4534 }
4535
4536 KERNEL_DEBUG(DBG_FNC_SORECEIVE_LIST | DBG_FUNC_END, error,
4537 0, 0, 0, 0);
4538 return error;
4539 }
4540
4541 static int
4542 so_statistics_event_to_nstat_event(int64_t *input_options,
4543 uint64_t *nstat_event)
4544 {
4545 int error = 0;
4546 switch (*input_options) {
4547 case SO_STATISTICS_EVENT_ENTER_CELLFALLBACK:
4548 *nstat_event = NSTAT_EVENT_SRC_ENTER_CELLFALLBACK;
4549 break;
4550 case SO_STATISTICS_EVENT_EXIT_CELLFALLBACK:
4551 *nstat_event = NSTAT_EVENT_SRC_EXIT_CELLFALLBACK;
4552 break;
4553 #if (DEBUG || DEVELOPMENT)
4554 case SO_STATISTICS_EVENT_RESERVED_1:
4555 *nstat_event = NSTAT_EVENT_SRC_RESERVED_1;
4556 break;
4557 case SO_STATISTICS_EVENT_RESERVED_2:
4558 *nstat_event = NSTAT_EVENT_SRC_RESERVED_2;
4559 break;
4560 #endif /* (DEBUG || DEVELOPMENT) */
4561 default:
4562 error = EINVAL;
4563 break;
4564 }
4565 return error;
4566 }
4567
4568 /*
4569 * Returns: 0 Success
4570 * EINVAL
4571 * ENOTCONN
4572 * <pru_shutdown>:EINVAL
4573 * <pru_shutdown>:EADDRNOTAVAIL[TCP]
4574 * <pru_shutdown>:ENOBUFS[TCP]
4575 * <pru_shutdown>:EMSGSIZE[TCP]
4576 * <pru_shutdown>:EHOSTUNREACH[TCP]
4577 * <pru_shutdown>:ENETUNREACH[TCP]
4578 * <pru_shutdown>:ENETDOWN[TCP]
4579 * <pru_shutdown>:ENOMEM[TCP]
4580 * <pru_shutdown>:EACCES[TCP]
4581 * <pru_shutdown>:EMSGSIZE[TCP]
4582 * <pru_shutdown>:ENOBUFS[TCP]
4583 * <pru_shutdown>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
4584 * <pru_shutdown>:??? [other protocol families]
4585 */
4586 int
4587 soshutdown(struct socket *so, int how)
4588 {
4589 int error;
4590
4591 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_START, how, 0, 0, 0, 0);
4592
4593 switch (how) {
4594 case SHUT_RD:
4595 case SHUT_WR:
4596 case SHUT_RDWR:
4597 socket_lock(so, 1);
4598 if ((so->so_state &
4599 (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) == 0) {
4600 error = ENOTCONN;
4601 } else {
4602 error = soshutdownlock(so, how);
4603 }
4604 socket_unlock(so, 1);
4605 break;
4606 default:
4607 error = EINVAL;
4608 break;
4609 }
4610
4611 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, how, error, 0, 0, 0);
4612
4613 return error;
4614 }
4615
4616 int
4617 soshutdownlock_final(struct socket *so, int how)
4618 {
4619 struct protosw *pr = so->so_proto;
4620 int error = 0;
4621
4622 sflt_notify(so, sock_evt_shutdown, &how);
4623
4624 if (how != SHUT_WR) {
4625 if ((so->so_state & SS_CANTRCVMORE) != 0) {
4626 /* read already shut down */
4627 error = ENOTCONN;
4628 goto done;
4629 }
4630 sorflush(so);
4631 }
4632 if (how != SHUT_RD) {
4633 if ((so->so_state & SS_CANTSENDMORE) != 0) {
4634 /* write already shut down */
4635 error = ENOTCONN;
4636 goto done;
4637 }
4638 error = (*pr->pr_usrreqs->pru_shutdown)(so);
4639 }
4640 done:
4641 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN, how, 1, 0, 0, 0);
4642 return error;
4643 }
4644
4645 int
4646 soshutdownlock(struct socket *so, int how)
4647 {
4648 int error = 0;
4649
4650 #if CONTENT_FILTER
4651 /*
4652 * A content filter may delay the actual shutdown until it
4653 * has processed the pending data
4654 */
4655 if (so->so_flags & SOF_CONTENT_FILTER) {
4656 error = cfil_sock_shutdown(so, &how);
4657 if (error == EJUSTRETURN) {
4658 error = 0;
4659 goto done;
4660 } else if (error != 0) {
4661 goto done;
4662 }
4663 }
4664 #endif /* CONTENT_FILTER */
4665
4666 error = soshutdownlock_final(so, how);
4667
4668 done:
4669 return error;
4670 }
4671
4672 void
4673 sowflush(struct socket *so)
4674 {
4675 struct sockbuf *sb = &so->so_snd;
4676
4677 /*
4678 * Obtain lock on the socket buffer (SB_LOCK). This is required
4679 * to prevent the socket buffer from being unexpectedly altered
4680 * while it is used by another thread in socket send/receive.
4681 *
4682 * sblock() must not fail here, hence the assertion.
4683 */
4684 (void) sblock(sb, SBL_WAIT | SBL_NOINTR | SBL_IGNDEFUNCT);
4685 VERIFY(sb->sb_flags & SB_LOCK);
4686
4687 sb->sb_flags &= ~(SB_SEL | SB_UPCALL);
4688 sb->sb_flags |= SB_DROP;
4689 sb->sb_upcall = NULL;
4690 sb->sb_upcallarg = NULL;
4691
4692 sbunlock(sb, TRUE); /* keep socket locked */
4693
4694 selthreadclear(&sb->sb_sel);
4695 sbrelease(sb);
4696 }
4697
4698 void
4699 sorflush(struct socket *so)
4700 {
4701 struct sockbuf *sb = &so->so_rcv;
4702 struct protosw *pr = so->so_proto;
4703 struct sockbuf asb;
4704 #ifdef notyet
4705 lck_mtx_t *mutex_held;
4706 /*
4707 * XXX: This code is currently commented out, because we may get here
4708 * as part of sofreelastref(), and at that time, pr_getlock() may no
4709 * longer be able to return us the lock; this will be fixed in future.
4710 */
4711 if (so->so_proto->pr_getlock != NULL) {
4712 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
4713 } else {
4714 mutex_held = so->so_proto->pr_domain->dom_mtx;
4715 }
4716
4717 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
4718 #endif /* notyet */
4719
4720 sflt_notify(so, sock_evt_flush_read, NULL);
4721
4722 socantrcvmore(so);
4723
4724 /*
4725 * Obtain lock on the socket buffer (SB_LOCK). This is required
4726 * to prevent the socket buffer from being unexpectedly altered
4727 * while it is used by another thread in socket send/receive.
4728 *
4729 * sblock() must not fail here, hence the assertion.
4730 */
4731 (void) sblock(sb, SBL_WAIT | SBL_NOINTR | SBL_IGNDEFUNCT);
4732 VERIFY(sb->sb_flags & SB_LOCK);
4733
4734 /*
4735 * Copy only the relevant fields from "sb" to "asb" which we
4736 * need for sbrelease() to function. In particular, skip
4737 * sb_sel as it contains the wait queue linkage, which would
4738 * wreak havoc if we were to issue selthreadclear() on "asb".
4739 * Make sure to not carry over SB_LOCK in "asb", as we need
4740 * to acquire it later as part of sbrelease().
4741 */
4742 bzero(&asb, sizeof(asb));
4743 asb.sb_cc = sb->sb_cc;
4744 asb.sb_hiwat = sb->sb_hiwat;
4745 asb.sb_mbcnt = sb->sb_mbcnt;
4746 asb.sb_mbmax = sb->sb_mbmax;
4747 asb.sb_ctl = sb->sb_ctl;
4748 asb.sb_lowat = sb->sb_lowat;
4749 asb.sb_mb = sb->sb_mb;
4750 asb.sb_mbtail = sb->sb_mbtail;
4751 asb.sb_lastrecord = sb->sb_lastrecord;
4752 asb.sb_so = sb->sb_so;
4753 asb.sb_flags = sb->sb_flags;
4754 asb.sb_flags &= ~(SB_LOCK | SB_SEL | SB_KNOTE | SB_UPCALL);
4755 asb.sb_flags |= SB_DROP;
4756
4757 /*
4758 * Ideally we'd bzero() these and preserve the ones we need;
4759 * but to do that we'd need to shuffle things around in the
4760 * sockbuf, and we can't do it now because there are KEXTS
4761 * that are directly referring to the socket structure.
4762 *
4763 * Setting SB_DROP acts as a barrier to prevent further appends.
4764 * Clearing SB_SEL is done for selthreadclear() below.
4765 */
4766 sb->sb_cc = 0;
4767 sb->sb_hiwat = 0;
4768 sb->sb_mbcnt = 0;
4769 sb->sb_mbmax = 0;
4770 sb->sb_ctl = 0;
4771 sb->sb_lowat = 0;
4772 sb->sb_mb = NULL;
4773 sb->sb_mbtail = NULL;
4774 sb->sb_lastrecord = NULL;
4775 sb->sb_timeo.tv_sec = 0;
4776 sb->sb_timeo.tv_usec = 0;
4777 sb->sb_upcall = NULL;
4778 sb->sb_upcallarg = NULL;
4779 sb->sb_flags &= ~(SB_SEL | SB_UPCALL);
4780 sb->sb_flags |= SB_DROP;
4781
4782 sbunlock(sb, TRUE); /* keep socket locked */
4783
4784 /*
4785 * Note that selthreadclear() is called on the original "sb" and
4786 * not the local "asb" because of the way wait queue linkage is
4787 * implemented. Given that selwakeup() may be triggered, SB_SEL
4788 * should no longer be set (cleared above.)
4789 */
4790 selthreadclear(&sb->sb_sel);
4791
4792 if ((pr->pr_flags & PR_RIGHTS) && pr->pr_domain->dom_dispose) {
4793 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
4794 }
4795
4796 sbrelease(&asb);
4797 }
4798
4799 /*
4800 * Perhaps this routine, and sooptcopyout(), below, ought to come in
4801 * an additional variant to handle the case where the option value needs
4802 * to be some kind of integer, but not a specific size.
4803 * In addition to their use here, these functions are also called by the
4804 * protocol-level pr_ctloutput() routines.
4805 *
4806 * Returns: 0 Success
4807 * EINVAL
4808 * copyin:EFAULT
4809 */
4810 int
4811 sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen)
4812 {
4813 size_t valsize;
4814
4815 /*
4816 * If the user gives us more than we wanted, we ignore it,
4817 * but if we don't get the minimum length the caller
4818 * wants, we return EINVAL. On success, sopt->sopt_valsize
4819 * is set to however much we actually retrieved.
4820 */
4821 if ((valsize = sopt->sopt_valsize) < minlen) {
4822 return EINVAL;
4823 }
4824 if (valsize > len) {
4825 sopt->sopt_valsize = valsize = len;
4826 }
4827
4828 if (sopt->sopt_p != kernproc) {
4829 return copyin(sopt->sopt_val, buf, valsize);
4830 }
4831
4832 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), buf, valsize);
4833 return 0;
4834 }
4835
4836 /*
4837 * sooptcopyin_timeval
4838 * Copy in a timeval value into tv_p, and take into account whether the
4839 * the calling process is 64-bit or 32-bit. Moved the sanity checking
4840 * code here so that we can verify the 64-bit tv_sec value before we lose
4841 * the top 32-bits assigning tv64.tv_sec to tv_p->tv_sec.
4842 */
4843 static int
4844 sooptcopyin_timeval(struct sockopt *sopt, struct timeval *tv_p)
4845 {
4846 int error;
4847
4848 if (proc_is64bit(sopt->sopt_p)) {
4849 struct user64_timeval tv64;
4850
4851 if (sopt->sopt_valsize < sizeof(tv64)) {
4852 return EINVAL;
4853 }
4854
4855 sopt->sopt_valsize = sizeof(tv64);
4856 if (sopt->sopt_p != kernproc) {
4857 error = copyin(sopt->sopt_val, &tv64, sizeof(tv64));
4858 if (error != 0) {
4859 return error;
4860 }
4861 } else {
4862 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), &tv64,
4863 sizeof(tv64));
4864 }
4865 if (tv64.tv_sec < 0 || tv64.tv_sec > LONG_MAX ||
4866 tv64.tv_usec < 0 || tv64.tv_usec >= 1000000) {
4867 return EDOM;
4868 }
4869
4870 tv_p->tv_sec = tv64.tv_sec;
4871 tv_p->tv_usec = tv64.tv_usec;
4872 } else {
4873 struct user32_timeval tv32;
4874
4875 if (sopt->sopt_valsize < sizeof(tv32)) {
4876 return EINVAL;
4877 }
4878
4879 sopt->sopt_valsize = sizeof(tv32);
4880 if (sopt->sopt_p != kernproc) {
4881 error = copyin(sopt->sopt_val, &tv32, sizeof(tv32));
4882 if (error != 0) {
4883 return error;
4884 }
4885 } else {
4886 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), &tv32,
4887 sizeof(tv32));
4888 }
4889 #ifndef __LP64__
4890 /*
4891 * K64todo "comparison is always false due to
4892 * limited range of data type"
4893 */
4894 if (tv32.tv_sec < 0 || tv32.tv_sec > LONG_MAX ||
4895 tv32.tv_usec < 0 || tv32.tv_usec >= 1000000) {
4896 return EDOM;
4897 }
4898 #endif
4899 tv_p->tv_sec = tv32.tv_sec;
4900 tv_p->tv_usec = tv32.tv_usec;
4901 }
4902 return 0;
4903 }
4904
4905 int
4906 soopt_cred_check(struct socket *so, int priv, boolean_t allow_root,
4907 boolean_t ignore_delegate)
4908 {
4909 kauth_cred_t cred = NULL;
4910 proc_t ep = PROC_NULL;
4911 uid_t uid;
4912 int error = 0;
4913
4914 if (ignore_delegate == false && so->so_flags & SOF_DELEGATED) {
4915 ep = proc_find(so->e_pid);
4916 if (ep) {
4917 cred = kauth_cred_proc_ref(ep);
4918 }
4919 }
4920
4921 uid = kauth_cred_getuid(cred ? cred : so->so_cred);
4922
4923 /* uid is 0 for root */
4924 if (uid != 0 || !allow_root) {
4925 error = priv_check_cred(cred ? cred : so->so_cred, priv, 0);
4926 }
4927 if (cred) {
4928 kauth_cred_unref(&cred);
4929 }
4930 if (ep != PROC_NULL) {
4931 proc_rele(ep);
4932 }
4933
4934 return error;
4935 }
4936
4937 /*
4938 * Returns: 0 Success
4939 * EINVAL
4940 * ENOPROTOOPT
4941 * ENOBUFS
4942 * EDOM
4943 * sooptcopyin:EINVAL
4944 * sooptcopyin:EFAULT
4945 * sooptcopyin_timeval:EINVAL
4946 * sooptcopyin_timeval:EFAULT
4947 * sooptcopyin_timeval:EDOM
4948 * <pr_ctloutput>:EOPNOTSUPP[AF_UNIX]
4949 * <pr_ctloutput>:???w
4950 * sflt_attach_private:??? [whatever a filter author chooses]
4951 * <sf_setoption>:??? [whatever a filter author chooses]
4952 *
4953 * Notes: Other <pru_listen> returns depend on the protocol family; all
4954 * <sf_listen> returns depend on what the filter author causes
4955 * their filter to return.
4956 */
4957 int
4958 sosetoptlock(struct socket *so, struct sockopt *sopt, int dolock)
4959 {
4960 int error, optval;
4961 int64_t long_optval;
4962 struct linger l;
4963 struct timeval tv;
4964
4965 if (sopt->sopt_dir != SOPT_SET) {
4966 sopt->sopt_dir = SOPT_SET;
4967 }
4968
4969 if (dolock) {
4970 socket_lock(so, 1);
4971 }
4972
4973 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
4974 (SS_CANTRCVMORE | SS_CANTSENDMORE) &&
4975 (so->so_flags & SOF_NPX_SETOPTSHUT) == 0) {
4976 /* the socket has been shutdown, no more sockopt's */
4977 error = EINVAL;
4978 goto out;
4979 }
4980
4981 error = sflt_setsockopt(so, sopt);
4982 if (error != 0) {
4983 if (error == EJUSTRETURN) {
4984 error = 0;
4985 }
4986 goto out;
4987 }
4988
4989 if (sopt->sopt_level != SOL_SOCKET) {
4990 if (so->so_proto != NULL &&
4991 so->so_proto->pr_ctloutput != NULL) {
4992 error = (*so->so_proto->pr_ctloutput)(so, sopt);
4993 goto out;
4994 }
4995 error = ENOPROTOOPT;
4996 } else {
4997 /*
4998 * Allow socket-level (SOL_SOCKET) options to be filtered by
4999 * the protocol layer, if needed. A zero value returned from
5000 * the handler means use default socket-level processing as
5001 * done by the rest of this routine. Otherwise, any other
5002 * return value indicates that the option is unsupported.
5003 */
5004 if (so->so_proto != NULL && (error = so->so_proto->pr_usrreqs->
5005 pru_socheckopt(so, sopt)) != 0) {
5006 goto out;
5007 }
5008
5009 error = 0;
5010 switch (sopt->sopt_name) {
5011 case SO_LINGER:
5012 case SO_LINGER_SEC:
5013 error = sooptcopyin(sopt, &l, sizeof(l), sizeof(l));
5014 if (error != 0) {
5015 goto out;
5016 }
5017
5018 so->so_linger = (sopt->sopt_name == SO_LINGER) ?
5019 l.l_linger : l.l_linger * hz;
5020 if (l.l_onoff != 0) {
5021 so->so_options |= SO_LINGER;
5022 } else {
5023 so->so_options &= ~SO_LINGER;
5024 }
5025 break;
5026
5027 case SO_DEBUG:
5028 case SO_KEEPALIVE:
5029 case SO_DONTROUTE:
5030 case SO_USELOOPBACK:
5031 case SO_BROADCAST:
5032 case SO_REUSEADDR:
5033 case SO_REUSEPORT:
5034 case SO_OOBINLINE:
5035 case SO_TIMESTAMP:
5036 case SO_TIMESTAMP_MONOTONIC:
5037 case SO_TIMESTAMP_CONTINUOUS:
5038 case SO_DONTTRUNC:
5039 case SO_WANTMORE:
5040 case SO_WANTOOBFLAG:
5041 case SO_NOWAKEFROMSLEEP:
5042 case SO_NOAPNFALLBK:
5043 error = sooptcopyin(sopt, &optval, sizeof(optval),
5044 sizeof(optval));
5045 if (error != 0) {
5046 goto out;
5047 }
5048 if (optval) {
5049 so->so_options |= sopt->sopt_name;
5050 } else {
5051 so->so_options &= ~sopt->sopt_name;
5052 }
5053 break;
5054
5055 case SO_SNDBUF:
5056 case SO_RCVBUF:
5057 case SO_SNDLOWAT:
5058 case SO_RCVLOWAT:
5059 error = sooptcopyin(sopt, &optval, sizeof(optval),
5060 sizeof(optval));
5061 if (error != 0) {
5062 goto out;
5063 }
5064
5065 /*
5066 * Values < 1 make no sense for any of these
5067 * options, so disallow them.
5068 */
5069 if (optval < 1) {
5070 error = EINVAL;
5071 goto out;
5072 }
5073
5074 switch (sopt->sopt_name) {
5075 case SO_SNDBUF:
5076 case SO_RCVBUF: {
5077 struct sockbuf *sb =
5078 (sopt->sopt_name == SO_SNDBUF) ?
5079 &so->so_snd : &so->so_rcv;
5080 if (sbreserve(sb, (u_int32_t)optval) == 0) {
5081 error = ENOBUFS;
5082 goto out;
5083 }
5084 sb->sb_flags |= SB_USRSIZE;
5085 sb->sb_flags &= ~SB_AUTOSIZE;
5086 sb->sb_idealsize = (u_int32_t)optval;
5087 break;
5088 }
5089 /*
5090 * Make sure the low-water is never greater than
5091 * the high-water.
5092 */
5093 case SO_SNDLOWAT: {
5094 int space = sbspace(&so->so_snd);
5095 u_int32_t hiwat = so->so_snd.sb_hiwat;
5096
5097 if (so->so_snd.sb_flags & SB_UNIX) {
5098 struct unpcb *unp =
5099 (struct unpcb *)(so->so_pcb);
5100 if (unp != NULL &&
5101 unp->unp_conn != NULL) {
5102 hiwat += unp->unp_conn->unp_cc;
5103 }
5104 }
5105
5106 so->so_snd.sb_lowat =
5107 (optval > hiwat) ?
5108 hiwat : optval;
5109
5110 if (space >= so->so_snd.sb_lowat) {
5111 sowwakeup(so);
5112 }
5113 break;
5114 }
5115 case SO_RCVLOWAT: {
5116 int64_t data_len;
5117 so->so_rcv.sb_lowat =
5118 (optval > so->so_rcv.sb_hiwat) ?
5119 so->so_rcv.sb_hiwat : optval;
5120 data_len = so->so_rcv.sb_cc
5121 - so->so_rcv.sb_ctl;
5122 if (data_len >= so->so_rcv.sb_lowat) {
5123 sorwakeup(so);
5124 }
5125 break;
5126 }
5127 }
5128 break;
5129
5130 case SO_SNDTIMEO:
5131 case SO_RCVTIMEO:
5132 error = sooptcopyin_timeval(sopt, &tv);
5133 if (error != 0) {
5134 goto out;
5135 }
5136
5137 switch (sopt->sopt_name) {
5138 case SO_SNDTIMEO:
5139 so->so_snd.sb_timeo = tv;
5140 break;
5141 case SO_RCVTIMEO:
5142 so->so_rcv.sb_timeo = tv;
5143 break;
5144 }
5145 break;
5146
5147 case SO_NKE: {
5148 struct so_nke nke;
5149
5150 error = sooptcopyin(sopt, &nke, sizeof(nke),
5151 sizeof(nke));
5152 if (error != 0) {
5153 goto out;
5154 }
5155
5156 error = sflt_attach_internal(so, nke.nke_handle);
5157 break;
5158 }
5159
5160 case SO_NOSIGPIPE:
5161 error = sooptcopyin(sopt, &optval, sizeof(optval),
5162 sizeof(optval));
5163 if (error != 0) {
5164 goto out;
5165 }
5166 if (optval != 0) {
5167 so->so_flags |= SOF_NOSIGPIPE;
5168 } else {
5169 so->so_flags &= ~SOF_NOSIGPIPE;
5170 }
5171 break;
5172
5173 case SO_NOADDRERR:
5174 error = sooptcopyin(sopt, &optval, sizeof(optval),
5175 sizeof(optval));
5176 if (error != 0) {
5177 goto out;
5178 }
5179 if (optval != 0) {
5180 so->so_flags |= SOF_NOADDRAVAIL;
5181 } else {
5182 so->so_flags &= ~SOF_NOADDRAVAIL;
5183 }
5184 break;
5185
5186 case SO_REUSESHAREUID:
5187 error = sooptcopyin(sopt, &optval, sizeof(optval),
5188 sizeof(optval));
5189 if (error != 0) {
5190 goto out;
5191 }
5192 if (optval != 0) {
5193 so->so_flags |= SOF_REUSESHAREUID;
5194 } else {
5195 so->so_flags &= ~SOF_REUSESHAREUID;
5196 }
5197 break;
5198
5199 case SO_NOTIFYCONFLICT:
5200 if (kauth_cred_issuser(kauth_cred_get()) == 0) {
5201 error = EPERM;
5202 goto out;
5203 }
5204 error = sooptcopyin(sopt, &optval, sizeof(optval),
5205 sizeof(optval));
5206 if (error != 0) {
5207 goto out;
5208 }
5209 if (optval != 0) {
5210 so->so_flags |= SOF_NOTIFYCONFLICT;
5211 } else {
5212 so->so_flags &= ~SOF_NOTIFYCONFLICT;
5213 }
5214 break;
5215
5216 case SO_RESTRICTIONS:
5217 error = sooptcopyin(sopt, &optval, sizeof(optval),
5218 sizeof(optval));
5219 if (error != 0) {
5220 goto out;
5221 }
5222
5223 error = so_set_restrictions(so, optval);
5224 break;
5225
5226 case SO_AWDL_UNRESTRICTED:
5227 if (SOCK_DOM(so) != PF_INET &&
5228 SOCK_DOM(so) != PF_INET6) {
5229 error = EOPNOTSUPP;
5230 goto out;
5231 }
5232 error = sooptcopyin(sopt, &optval, sizeof(optval),
5233 sizeof(optval));
5234 if (error != 0) {
5235 goto out;
5236 }
5237 if (optval != 0) {
5238 error = soopt_cred_check(so,
5239 PRIV_NET_RESTRICTED_AWDL, false, false);
5240 if (error == 0) {
5241 inp_set_awdl_unrestricted(
5242 sotoinpcb(so));
5243 }
5244 } else {
5245 inp_clear_awdl_unrestricted(sotoinpcb(so));
5246 }
5247 break;
5248 case SO_INTCOPROC_ALLOW:
5249 if (SOCK_DOM(so) != PF_INET6) {
5250 error = EOPNOTSUPP;
5251 goto out;
5252 }
5253 error = sooptcopyin(sopt, &optval, sizeof(optval),
5254 sizeof(optval));
5255 if (error != 0) {
5256 goto out;
5257 }
5258 if (optval != 0 &&
5259 inp_get_intcoproc_allowed(sotoinpcb(so)) == FALSE) {
5260 error = soopt_cred_check(so,
5261 PRIV_NET_RESTRICTED_INTCOPROC, false, false);
5262 if (error == 0) {
5263 inp_set_intcoproc_allowed(
5264 sotoinpcb(so));
5265 }
5266 } else if (optval == 0) {
5267 inp_clear_intcoproc_allowed(sotoinpcb(so));
5268 }
5269 break;
5270
5271 case SO_LABEL:
5272 error = EOPNOTSUPP;
5273 break;
5274
5275 case SO_UPCALLCLOSEWAIT:
5276 error = sooptcopyin(sopt, &optval, sizeof(optval),
5277 sizeof(optval));
5278 if (error != 0) {
5279 goto out;
5280 }
5281 if (optval != 0) {
5282 so->so_flags |= SOF_UPCALLCLOSEWAIT;
5283 } else {
5284 so->so_flags &= ~SOF_UPCALLCLOSEWAIT;
5285 }
5286 break;
5287
5288 case SO_RANDOMPORT:
5289 error = sooptcopyin(sopt, &optval, sizeof(optval),
5290 sizeof(optval));
5291 if (error != 0) {
5292 goto out;
5293 }
5294 if (optval != 0) {
5295 so->so_flags |= SOF_BINDRANDOMPORT;
5296 } else {
5297 so->so_flags &= ~SOF_BINDRANDOMPORT;
5298 }
5299 break;
5300
5301 case SO_NP_EXTENSIONS: {
5302 struct so_np_extensions sonpx;
5303
5304 error = sooptcopyin(sopt, &sonpx, sizeof(sonpx),
5305 sizeof(sonpx));
5306 if (error != 0) {
5307 goto out;
5308 }
5309 if (sonpx.npx_mask & ~SONPX_MASK_VALID) {
5310 error = EINVAL;
5311 goto out;
5312 }
5313 /*
5314 * Only one bit defined for now
5315 */
5316 if ((sonpx.npx_mask & SONPX_SETOPTSHUT)) {
5317 if ((sonpx.npx_flags & SONPX_SETOPTSHUT)) {
5318 so->so_flags |= SOF_NPX_SETOPTSHUT;
5319 } else {
5320 so->so_flags &= ~SOF_NPX_SETOPTSHUT;
5321 }
5322 }
5323 break;
5324 }
5325
5326 case SO_TRAFFIC_CLASS: {
5327 error = sooptcopyin(sopt, &optval, sizeof(optval),
5328 sizeof(optval));
5329 if (error != 0) {
5330 goto out;
5331 }
5332 if (optval >= SO_TC_NET_SERVICE_OFFSET) {
5333 int netsvc = optval - SO_TC_NET_SERVICE_OFFSET;
5334 error = so_set_net_service_type(so, netsvc);
5335 goto out;
5336 }
5337 error = so_set_traffic_class(so, optval);
5338 if (error != 0) {
5339 goto out;
5340 }
5341 so->so_flags1 &= ~SOF1_TC_NET_SERV_TYPE;
5342 so->so_netsvctype = _NET_SERVICE_TYPE_UNSPEC;
5343 break;
5344 }
5345
5346 case SO_RECV_TRAFFIC_CLASS: {
5347 error = sooptcopyin(sopt, &optval, sizeof(optval),
5348 sizeof(optval));
5349 if (error != 0) {
5350 goto out;
5351 }
5352 if (optval == 0) {
5353 so->so_flags &= ~SOF_RECV_TRAFFIC_CLASS;
5354 } else {
5355 so->so_flags |= SOF_RECV_TRAFFIC_CLASS;
5356 }
5357 break;
5358 }
5359
5360 #if (DEVELOPMENT || DEBUG)
5361 case SO_TRAFFIC_CLASS_DBG: {
5362 struct so_tcdbg so_tcdbg;
5363
5364 error = sooptcopyin(sopt, &so_tcdbg,
5365 sizeof(struct so_tcdbg), sizeof(struct so_tcdbg));
5366 if (error != 0) {
5367 goto out;
5368 }
5369 error = so_set_tcdbg(so, &so_tcdbg);
5370 if (error != 0) {
5371 goto out;
5372 }
5373 break;
5374 }
5375 #endif /* (DEVELOPMENT || DEBUG) */
5376
5377 case SO_PRIVILEGED_TRAFFIC_CLASS:
5378 error = priv_check_cred(kauth_cred_get(),
5379 PRIV_NET_PRIVILEGED_TRAFFIC_CLASS, 0);
5380 if (error != 0) {
5381 goto out;
5382 }
5383 error = sooptcopyin(sopt, &optval, sizeof(optval),
5384 sizeof(optval));
5385 if (error != 0) {
5386 goto out;
5387 }
5388 if (optval == 0) {
5389 so->so_flags &= ~SOF_PRIVILEGED_TRAFFIC_CLASS;
5390 } else {
5391 so->so_flags |= SOF_PRIVILEGED_TRAFFIC_CLASS;
5392 }
5393 break;
5394
5395 #if (DEVELOPMENT || DEBUG)
5396 case SO_DEFUNCTIT:
5397 error = sosetdefunct(current_proc(), so, 0, FALSE);
5398 if (error == 0) {
5399 error = sodefunct(current_proc(), so, 0);
5400 }
5401
5402 break;
5403 #endif /* (DEVELOPMENT || DEBUG) */
5404
5405 case SO_DEFUNCTOK:
5406 error = sooptcopyin(sopt, &optval, sizeof(optval),
5407 sizeof(optval));
5408 if (error != 0 || (so->so_flags & SOF_DEFUNCT)) {
5409 if (error == 0) {
5410 error = EBADF;
5411 }
5412 goto out;
5413 }
5414 /*
5415 * Any process can set SO_DEFUNCTOK (clear
5416 * SOF_NODEFUNCT), but only root can clear
5417 * SO_DEFUNCTOK (set SOF_NODEFUNCT).
5418 */
5419 if (optval == 0 &&
5420 kauth_cred_issuser(kauth_cred_get()) == 0) {
5421 error = EPERM;
5422 goto out;
5423 }
5424 if (optval) {
5425 so->so_flags &= ~SOF_NODEFUNCT;
5426 } else {
5427 so->so_flags |= SOF_NODEFUNCT;
5428 }
5429
5430 if (SOCK_DOM(so) == PF_INET ||
5431 SOCK_DOM(so) == PF_INET6) {
5432 char s[MAX_IPv6_STR_LEN];
5433 char d[MAX_IPv6_STR_LEN];
5434 struct inpcb *inp = sotoinpcb(so);
5435
5436 SODEFUNCTLOG("%s[%d, %s]: so 0x%llx "
5437 "[%s %s:%d -> %s:%d] is now marked "
5438 "as %seligible for "
5439 "defunct\n", __func__, proc_selfpid(),
5440 proc_best_name(current_proc()),
5441 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
5442 (SOCK_TYPE(so) == SOCK_STREAM) ?
5443 "TCP" : "UDP", inet_ntop(SOCK_DOM(so),
5444 ((SOCK_DOM(so) == PF_INET) ?
5445 (void *)&inp->inp_laddr.s_addr :
5446 (void *)&inp->in6p_laddr), s, sizeof(s)),
5447 ntohs(inp->in6p_lport),
5448 inet_ntop(SOCK_DOM(so),
5449 (SOCK_DOM(so) == PF_INET) ?
5450 (void *)&inp->inp_faddr.s_addr :
5451 (void *)&inp->in6p_faddr, d, sizeof(d)),
5452 ntohs(inp->in6p_fport),
5453 (so->so_flags & SOF_NODEFUNCT) ?
5454 "not " : "");
5455 } else {
5456 SODEFUNCTLOG("%s[%d, %s]: so 0x%llx [%d,%d] "
5457 "is now marked as %seligible for "
5458 "defunct\n",
5459 __func__, proc_selfpid(),
5460 proc_best_name(current_proc()),
5461 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
5462 SOCK_DOM(so), SOCK_TYPE(so),
5463 (so->so_flags & SOF_NODEFUNCT) ?
5464 "not " : "");
5465 }
5466 break;
5467
5468 case SO_ISDEFUNCT:
5469 /* This option is not settable */
5470 error = EINVAL;
5471 break;
5472
5473 case SO_OPPORTUNISTIC:
5474 error = sooptcopyin(sopt, &optval, sizeof(optval),
5475 sizeof(optval));
5476 if (error == 0) {
5477 error = so_set_opportunistic(so, optval);
5478 }
5479 break;
5480
5481 case SO_FLUSH:
5482 /* This option is handled by lower layer(s) */
5483 error = 0;
5484 break;
5485
5486 case SO_RECV_ANYIF:
5487 error = sooptcopyin(sopt, &optval, sizeof(optval),
5488 sizeof(optval));
5489 if (error == 0) {
5490 error = so_set_recv_anyif(so, optval);
5491 }
5492 break;
5493
5494 case SO_TRAFFIC_MGT_BACKGROUND: {
5495 /* This option is handled by lower layer(s) */
5496 error = 0;
5497 break;
5498 }
5499
5500 #if FLOW_DIVERT
5501 case SO_FLOW_DIVERT_TOKEN:
5502 error = flow_divert_token_set(so, sopt);
5503 break;
5504 #endif /* FLOW_DIVERT */
5505
5506
5507 case SO_DELEGATED:
5508 if ((error = sooptcopyin(sopt, &optval, sizeof(optval),
5509 sizeof(optval))) != 0) {
5510 break;
5511 }
5512
5513 error = so_set_effective_pid(so, optval, sopt->sopt_p, true);
5514 break;
5515
5516 case SO_DELEGATED_UUID: {
5517 uuid_t euuid;
5518
5519 if ((error = sooptcopyin(sopt, &euuid, sizeof(euuid),
5520 sizeof(euuid))) != 0) {
5521 break;
5522 }
5523
5524 error = so_set_effective_uuid(so, euuid, sopt->sopt_p, true);
5525 break;
5526 }
5527
5528 #if NECP
5529 case SO_NECP_ATTRIBUTES:
5530 error = necp_set_socket_attributes(so, sopt);
5531 break;
5532
5533 case SO_NECP_CLIENTUUID: {
5534 if (SOCK_DOM(so) == PF_MULTIPATH) {
5535 /* Handled by MPTCP itself */
5536 break;
5537 }
5538
5539 if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
5540 error = EINVAL;
5541 goto out;
5542 }
5543
5544 struct inpcb *inp = sotoinpcb(so);
5545 if (!uuid_is_null(inp->necp_client_uuid)) {
5546 // Clear out the old client UUID if present
5547 necp_inpcb_remove_cb(inp);
5548 }
5549
5550 error = sooptcopyin(sopt, &inp->necp_client_uuid,
5551 sizeof(uuid_t), sizeof(uuid_t));
5552 if (error != 0) {
5553 goto out;
5554 }
5555
5556 if (uuid_is_null(inp->necp_client_uuid)) {
5557 error = EINVAL;
5558 goto out;
5559 }
5560
5561 pid_t current_pid = proc_pid(current_proc());
5562 error = necp_client_register_socket_flow(current_pid,
5563 inp->necp_client_uuid, inp);
5564 if (error != 0) {
5565 uuid_clear(inp->necp_client_uuid);
5566 goto out;
5567 }
5568
5569 if (inp->inp_lport != 0) {
5570 // There is a bound local port, so this is not
5571 // a fresh socket. Assign to the client.
5572 necp_client_assign_from_socket(current_pid, inp->necp_client_uuid, inp);
5573 }
5574
5575 break;
5576 }
5577 case SO_NECP_LISTENUUID: {
5578 if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
5579 error = EINVAL;
5580 goto out;
5581 }
5582
5583 struct inpcb *inp = sotoinpcb(so);
5584 if (!uuid_is_null(inp->necp_client_uuid)) {
5585 error = EINVAL;
5586 goto out;
5587 }
5588
5589 error = sooptcopyin(sopt, &inp->necp_client_uuid,
5590 sizeof(uuid_t), sizeof(uuid_t));
5591 if (error != 0) {
5592 goto out;
5593 }
5594
5595 if (uuid_is_null(inp->necp_client_uuid)) {
5596 error = EINVAL;
5597 goto out;
5598 }
5599
5600 error = necp_client_register_socket_listener(proc_pid(current_proc()),
5601 inp->necp_client_uuid, inp);
5602 if (error != 0) {
5603 uuid_clear(inp->necp_client_uuid);
5604 goto out;
5605 }
5606
5607 // Mark that the port registration is held by NECP
5608 inp->inp_flags2 |= INP2_EXTERNAL_PORT;
5609
5610 break;
5611 }
5612 #endif /* NECP */
5613
5614 case SO_EXTENDED_BK_IDLE:
5615 error = sooptcopyin(sopt, &optval, sizeof(optval),
5616 sizeof(optval));
5617 if (error == 0) {
5618 error = so_set_extended_bk_idle(so, optval);
5619 }
5620 break;
5621
5622 case SO_MARK_CELLFALLBACK:
5623 error = sooptcopyin(sopt, &optval, sizeof(optval),
5624 sizeof(optval));
5625 if (error != 0) {
5626 goto out;
5627 }
5628 if (optval < 0) {
5629 error = EINVAL;
5630 goto out;
5631 }
5632 if (optval == 0) {
5633 so->so_flags1 &= ~SOF1_CELLFALLBACK;
5634 } else {
5635 so->so_flags1 |= SOF1_CELLFALLBACK;
5636 }
5637 break;
5638
5639 case SO_STATISTICS_EVENT:
5640 error = sooptcopyin(sopt, &long_optval,
5641 sizeof(long_optval), sizeof(long_optval));
5642 if (error != 0) {
5643 goto out;
5644 }
5645 u_int64_t nstat_event = 0;
5646 error = so_statistics_event_to_nstat_event(
5647 &long_optval, &nstat_event);
5648 if (error != 0) {
5649 goto out;
5650 }
5651 nstat_pcb_event(sotoinpcb(so), nstat_event);
5652 break;
5653
5654 case SO_NET_SERVICE_TYPE: {
5655 error = sooptcopyin(sopt, &optval, sizeof(optval),
5656 sizeof(optval));
5657 if (error != 0) {
5658 goto out;
5659 }
5660 error = so_set_net_service_type(so, optval);
5661 break;
5662 }
5663
5664 case SO_QOSMARKING_POLICY_OVERRIDE:
5665 error = priv_check_cred(kauth_cred_get(),
5666 PRIV_NET_QOSMARKING_POLICY_OVERRIDE, 0);
5667 if (error != 0) {
5668 goto out;
5669 }
5670 error = sooptcopyin(sopt, &optval, sizeof(optval),
5671 sizeof(optval));
5672 if (error != 0) {
5673 goto out;
5674 }
5675 if (optval == 0) {
5676 so->so_flags1 &= ~SOF1_QOSMARKING_POLICY_OVERRIDE;
5677 } else {
5678 so->so_flags1 |= SOF1_QOSMARKING_POLICY_OVERRIDE;
5679 }
5680 break;
5681
5682 case SO_MPKL_SEND_INFO: {
5683 struct so_mpkl_send_info so_mpkl_send_info;
5684
5685 error = sooptcopyin(sopt, &so_mpkl_send_info,
5686 sizeof(struct so_mpkl_send_info), sizeof(struct so_mpkl_send_info));
5687 if (error != 0) {
5688 goto out;
5689 }
5690 uuid_copy(so->so_mpkl_send_uuid, so_mpkl_send_info.mpkl_uuid);
5691 so->so_mpkl_send_proto = so_mpkl_send_info.mpkl_proto;
5692
5693 if (uuid_is_null(so->so_mpkl_send_uuid) && so->so_mpkl_send_proto == 0) {
5694 so->so_flags1 &= ~SOF1_MPKL_SEND_INFO;
5695 } else {
5696 so->so_flags1 |= SOF1_MPKL_SEND_INFO;
5697 }
5698 break;
5699 }
5700 case SO_WANT_KEV_SOCKET_CLOSED: {
5701 error = sooptcopyin(sopt, &optval, sizeof(optval),
5702 sizeof(optval));
5703 if (error != 0) {
5704 goto out;
5705 }
5706 if (optval == 0) {
5707 so->so_flags1 &= ~SOF1_WANT_KEV_SOCK_CLOSED;
5708 } else {
5709 so->so_flags1 |= SOF1_WANT_KEV_SOCK_CLOSED;
5710 }
5711 break;
5712 }
5713 default:
5714 error = ENOPROTOOPT;
5715 break;
5716 }
5717 if (error == 0 && so->so_proto != NULL &&
5718 so->so_proto->pr_ctloutput != NULL) {
5719 (void) so->so_proto->pr_ctloutput(so, sopt);
5720 }
5721 }
5722 out:
5723 if (dolock) {
5724 socket_unlock(so, 1);
5725 }
5726 return error;
5727 }
5728
5729 /* Helper routines for getsockopt */
5730 int
5731 sooptcopyout(struct sockopt *sopt, void *buf, size_t len)
5732 {
5733 int error;
5734 size_t valsize;
5735
5736 error = 0;
5737
5738 /*
5739 * Documented get behavior is that we always return a value,
5740 * possibly truncated to fit in the user's buffer.
5741 * Traditional behavior is that we always tell the user
5742 * precisely how much we copied, rather than something useful
5743 * like the total amount we had available for her.
5744 * Note that this interface is not idempotent; the entire answer must
5745 * generated ahead of time.
5746 */
5747 valsize = min(len, sopt->sopt_valsize);
5748 sopt->sopt_valsize = valsize;
5749 if (sopt->sopt_val != USER_ADDR_NULL) {
5750 if (sopt->sopt_p != kernproc) {
5751 error = copyout(buf, sopt->sopt_val, valsize);
5752 } else {
5753 bcopy(buf, CAST_DOWN(caddr_t, sopt->sopt_val), valsize);
5754 }
5755 }
5756 return error;
5757 }
5758
5759 static int
5760 sooptcopyout_timeval(struct sockopt *sopt, const struct timeval *tv_p)
5761 {
5762 int error;
5763 size_t len;
5764 struct user64_timeval tv64 = {};
5765 struct user32_timeval tv32 = {};
5766 const void * val;
5767 size_t valsize;
5768
5769 error = 0;
5770 if (proc_is64bit(sopt->sopt_p)) {
5771 len = sizeof(tv64);
5772 tv64.tv_sec = tv_p->tv_sec;
5773 tv64.tv_usec = tv_p->tv_usec;
5774 val = &tv64;
5775 } else {
5776 len = sizeof(tv32);
5777 tv32.tv_sec = tv_p->tv_sec;
5778 tv32.tv_usec = tv_p->tv_usec;
5779 val = &tv32;
5780 }
5781 valsize = min(len, sopt->sopt_valsize);
5782 sopt->sopt_valsize = valsize;
5783 if (sopt->sopt_val != USER_ADDR_NULL) {
5784 if (sopt->sopt_p != kernproc) {
5785 error = copyout(val, sopt->sopt_val, valsize);
5786 } else {
5787 bcopy(val, CAST_DOWN(caddr_t, sopt->sopt_val), valsize);
5788 }
5789 }
5790 return error;
5791 }
5792
5793 /*
5794 * Return: 0 Success
5795 * ENOPROTOOPT
5796 * <pr_ctloutput>:EOPNOTSUPP[AF_UNIX]
5797 * <pr_ctloutput>:???
5798 * <sf_getoption>:???
5799 */
5800 int
5801 sogetoptlock(struct socket *so, struct sockopt *sopt, int dolock)
5802 {
5803 int error, optval;
5804 struct linger l;
5805 struct timeval tv;
5806
5807 if (sopt->sopt_dir != SOPT_GET) {
5808 sopt->sopt_dir = SOPT_GET;
5809 }
5810
5811 if (dolock) {
5812 socket_lock(so, 1);
5813 }
5814
5815 error = sflt_getsockopt(so, sopt);
5816 if (error != 0) {
5817 if (error == EJUSTRETURN) {
5818 error = 0;
5819 }
5820 goto out;
5821 }
5822
5823 if (sopt->sopt_level != SOL_SOCKET) {
5824 if (so->so_proto != NULL &&
5825 so->so_proto->pr_ctloutput != NULL) {
5826 error = (*so->so_proto->pr_ctloutput)(so, sopt);
5827 goto out;
5828 }
5829 error = ENOPROTOOPT;
5830 } else {
5831 /*
5832 * Allow socket-level (SOL_SOCKET) options to be filtered by
5833 * the protocol layer, if needed. A zero value returned from
5834 * the handler means use default socket-level processing as
5835 * done by the rest of this routine. Otherwise, any other
5836 * return value indicates that the option is unsupported.
5837 */
5838 if (so->so_proto != NULL && (error = so->so_proto->pr_usrreqs->
5839 pru_socheckopt(so, sopt)) != 0) {
5840 goto out;
5841 }
5842
5843 error = 0;
5844 switch (sopt->sopt_name) {
5845 case SO_LINGER:
5846 case SO_LINGER_SEC:
5847 l.l_onoff = ((so->so_options & SO_LINGER) ? 1 : 0);
5848 l.l_linger = (sopt->sopt_name == SO_LINGER) ?
5849 so->so_linger : so->so_linger / hz;
5850 error = sooptcopyout(sopt, &l, sizeof(l));
5851 break;
5852
5853 case SO_USELOOPBACK:
5854 case SO_DONTROUTE:
5855 case SO_DEBUG:
5856 case SO_KEEPALIVE:
5857 case SO_REUSEADDR:
5858 case SO_REUSEPORT:
5859 case SO_BROADCAST:
5860 case SO_OOBINLINE:
5861 case SO_TIMESTAMP:
5862 case SO_TIMESTAMP_MONOTONIC:
5863 case SO_TIMESTAMP_CONTINUOUS:
5864 case SO_DONTTRUNC:
5865 case SO_WANTMORE:
5866 case SO_WANTOOBFLAG:
5867 case SO_NOWAKEFROMSLEEP:
5868 case SO_NOAPNFALLBK:
5869 optval = so->so_options & sopt->sopt_name;
5870 integer:
5871 error = sooptcopyout(sopt, &optval, sizeof(optval));
5872 break;
5873
5874 case SO_TYPE:
5875 optval = so->so_type;
5876 goto integer;
5877
5878 case SO_NREAD:
5879 if (so->so_proto->pr_flags & PR_ATOMIC) {
5880 int pkt_total;
5881 struct mbuf *m1;
5882
5883 pkt_total = 0;
5884 m1 = so->so_rcv.sb_mb;
5885 while (m1 != NULL) {
5886 if (m1->m_type == MT_DATA ||
5887 m1->m_type == MT_HEADER ||
5888 m1->m_type == MT_OOBDATA) {
5889 pkt_total += m1->m_len;
5890 }
5891 m1 = m1->m_next;
5892 }
5893 optval = pkt_total;
5894 } else {
5895 optval = so->so_rcv.sb_cc - so->so_rcv.sb_ctl;
5896 }
5897 goto integer;
5898
5899 case SO_NUMRCVPKT:
5900 if (so->so_proto->pr_flags & PR_ATOMIC) {
5901 int cnt = 0;
5902 struct mbuf *m1;
5903
5904 m1 = so->so_rcv.sb_mb;
5905 while (m1 != NULL) {
5906 cnt += 1;
5907 m1 = m1->m_nextpkt;
5908 }
5909 optval = cnt;
5910 goto integer;
5911 } else {
5912 error = ENOPROTOOPT;
5913 break;
5914 }
5915
5916 case SO_NWRITE:
5917 optval = so->so_snd.sb_cc;
5918 goto integer;
5919
5920 case SO_ERROR:
5921 optval = so->so_error;
5922 so->so_error = 0;
5923 goto integer;
5924
5925 case SO_SNDBUF: {
5926 u_int32_t hiwat = so->so_snd.sb_hiwat;
5927
5928 if (so->so_snd.sb_flags & SB_UNIX) {
5929 struct unpcb *unp =
5930 (struct unpcb *)(so->so_pcb);
5931 if (unp != NULL && unp->unp_conn != NULL) {
5932 hiwat += unp->unp_conn->unp_cc;
5933 }
5934 }
5935
5936 optval = hiwat;
5937 goto integer;
5938 }
5939 case SO_RCVBUF:
5940 optval = so->so_rcv.sb_hiwat;
5941 goto integer;
5942
5943 case SO_SNDLOWAT:
5944 optval = so->so_snd.sb_lowat;
5945 goto integer;
5946
5947 case SO_RCVLOWAT:
5948 optval = so->so_rcv.sb_lowat;
5949 goto integer;
5950
5951 case SO_SNDTIMEO:
5952 case SO_RCVTIMEO:
5953 tv = (sopt->sopt_name == SO_SNDTIMEO ?
5954 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
5955
5956 error = sooptcopyout_timeval(sopt, &tv);
5957 break;
5958
5959 case SO_NOSIGPIPE:
5960 optval = (so->so_flags & SOF_NOSIGPIPE);
5961 goto integer;
5962
5963 case SO_NOADDRERR:
5964 optval = (so->so_flags & SOF_NOADDRAVAIL);
5965 goto integer;
5966
5967 case SO_REUSESHAREUID:
5968 optval = (so->so_flags & SOF_REUSESHAREUID);
5969 goto integer;
5970
5971
5972 case SO_NOTIFYCONFLICT:
5973 optval = (so->so_flags & SOF_NOTIFYCONFLICT);
5974 goto integer;
5975
5976 case SO_RESTRICTIONS:
5977 optval = so_get_restrictions(so);
5978 goto integer;
5979
5980 case SO_AWDL_UNRESTRICTED:
5981 if (SOCK_DOM(so) == PF_INET ||
5982 SOCK_DOM(so) == PF_INET6) {
5983 optval = inp_get_awdl_unrestricted(
5984 sotoinpcb(so));
5985 goto integer;
5986 } else {
5987 error = EOPNOTSUPP;
5988 }
5989 break;
5990
5991 case SO_INTCOPROC_ALLOW:
5992 if (SOCK_DOM(so) == PF_INET6) {
5993 optval = inp_get_intcoproc_allowed(
5994 sotoinpcb(so));
5995 goto integer;
5996 } else {
5997 error = EOPNOTSUPP;
5998 }
5999 break;
6000
6001 case SO_LABEL:
6002 error = EOPNOTSUPP;
6003 break;
6004
6005 case SO_PEERLABEL:
6006 error = EOPNOTSUPP;
6007 break;
6008
6009 #ifdef __APPLE_API_PRIVATE
6010 case SO_UPCALLCLOSEWAIT:
6011 optval = (so->so_flags & SOF_UPCALLCLOSEWAIT);
6012 goto integer;
6013 #endif
6014 case SO_RANDOMPORT:
6015 optval = (so->so_flags & SOF_BINDRANDOMPORT);
6016 goto integer;
6017
6018 case SO_NP_EXTENSIONS: {
6019 struct so_np_extensions sonpx = {};
6020
6021 sonpx.npx_flags = (so->so_flags & SOF_NPX_SETOPTSHUT) ?
6022 SONPX_SETOPTSHUT : 0;
6023 sonpx.npx_mask = SONPX_MASK_VALID;
6024
6025 error = sooptcopyout(sopt, &sonpx,
6026 sizeof(struct so_np_extensions));
6027 break;
6028 }
6029
6030 case SO_TRAFFIC_CLASS:
6031 optval = so->so_traffic_class;
6032 goto integer;
6033
6034 case SO_RECV_TRAFFIC_CLASS:
6035 optval = (so->so_flags & SOF_RECV_TRAFFIC_CLASS);
6036 goto integer;
6037
6038 #if (DEVELOPMENT || DEBUG)
6039 case SO_TRAFFIC_CLASS_DBG:
6040 error = sogetopt_tcdbg(so, sopt);
6041 break;
6042 #endif /* (DEVELOPMENT || DEBUG) */
6043
6044 case SO_PRIVILEGED_TRAFFIC_CLASS:
6045 optval = (so->so_flags & SOF_PRIVILEGED_TRAFFIC_CLASS);
6046 goto integer;
6047
6048 case SO_DEFUNCTOK:
6049 optval = !(so->so_flags & SOF_NODEFUNCT);
6050 goto integer;
6051
6052 case SO_ISDEFUNCT:
6053 optval = (so->so_flags & SOF_DEFUNCT);
6054 goto integer;
6055
6056 case SO_OPPORTUNISTIC:
6057 optval = so_get_opportunistic(so);
6058 goto integer;
6059
6060 case SO_FLUSH:
6061 /* This option is not gettable */
6062 error = EINVAL;
6063 break;
6064
6065 case SO_RECV_ANYIF:
6066 optval = so_get_recv_anyif(so);
6067 goto integer;
6068
6069 case SO_TRAFFIC_MGT_BACKGROUND:
6070 /* This option is handled by lower layer(s) */
6071 if (so->so_proto != NULL &&
6072 so->so_proto->pr_ctloutput != NULL) {
6073 (void) so->so_proto->pr_ctloutput(so, sopt);
6074 }
6075 break;
6076
6077 #if FLOW_DIVERT
6078 case SO_FLOW_DIVERT_TOKEN:
6079 error = flow_divert_token_get(so, sopt);
6080 break;
6081 #endif /* FLOW_DIVERT */
6082
6083 #if NECP
6084 case SO_NECP_ATTRIBUTES:
6085 error = necp_get_socket_attributes(so, sopt);
6086 break;
6087
6088 case SO_NECP_CLIENTUUID: {
6089 uuid_t *ncu;
6090
6091 if (SOCK_DOM(so) == PF_MULTIPATH) {
6092 ncu = &mpsotomppcb(so)->necp_client_uuid;
6093 } else if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
6094 ncu = &sotoinpcb(so)->necp_client_uuid;
6095 } else {
6096 error = EINVAL;
6097 goto out;
6098 }
6099
6100 error = sooptcopyout(sopt, ncu, sizeof(uuid_t));
6101 break;
6102 }
6103
6104 case SO_NECP_LISTENUUID: {
6105 uuid_t *nlu;
6106
6107 if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
6108 if (sotoinpcb(so)->inp_flags2 & INP2_EXTERNAL_PORT) {
6109 nlu = &sotoinpcb(so)->necp_client_uuid;
6110 } else {
6111 error = ENOENT;
6112 goto out;
6113 }
6114 } else {
6115 error = EINVAL;
6116 goto out;
6117 }
6118
6119 error = sooptcopyout(sopt, nlu, sizeof(uuid_t));
6120 break;
6121 }
6122 #endif /* NECP */
6123
6124 #if CONTENT_FILTER
6125 case SO_CFIL_SOCK_ID: {
6126 cfil_sock_id_t sock_id;
6127
6128 sock_id = cfil_sock_id_from_socket(so);
6129
6130 error = sooptcopyout(sopt, &sock_id,
6131 sizeof(cfil_sock_id_t));
6132 break;
6133 }
6134 #endif /* CONTENT_FILTER */
6135
6136 case SO_EXTENDED_BK_IDLE:
6137 optval = (so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED);
6138 goto integer;
6139 case SO_MARK_CELLFALLBACK:
6140 optval = ((so->so_flags1 & SOF1_CELLFALLBACK) > 0)
6141 ? 1 : 0;
6142 goto integer;
6143 case SO_NET_SERVICE_TYPE: {
6144 if ((so->so_flags1 & SOF1_TC_NET_SERV_TYPE)) {
6145 optval = so->so_netsvctype;
6146 } else {
6147 optval = NET_SERVICE_TYPE_BE;
6148 }
6149 goto integer;
6150 }
6151 case SO_NETSVC_MARKING_LEVEL:
6152 optval = so_get_netsvc_marking_level(so);
6153 goto integer;
6154
6155 case SO_MPKL_SEND_INFO: {
6156 struct so_mpkl_send_info so_mpkl_send_info;
6157
6158 uuid_copy(so_mpkl_send_info.mpkl_uuid, so->so_mpkl_send_uuid);
6159 so_mpkl_send_info.mpkl_proto = so->so_mpkl_send_proto;
6160 error = sooptcopyout(sopt, &so_mpkl_send_info,
6161 sizeof(struct so_mpkl_send_info));
6162 break;
6163 }
6164 default:
6165 error = ENOPROTOOPT;
6166 break;
6167 }
6168 }
6169 out:
6170 if (dolock) {
6171 socket_unlock(so, 1);
6172 }
6173 return error;
6174 }
6175
6176 /*
6177 * The size limits on our soopt_getm is different from that on FreeBSD.
6178 * We limit the size of options to MCLBYTES. This will have to change
6179 * if we need to define options that need more space than MCLBYTES.
6180 */
6181 int
6182 soopt_getm(struct sockopt *sopt, struct mbuf **mp)
6183 {
6184 struct mbuf *m, *m_prev;
6185 int sopt_size = sopt->sopt_valsize;
6186 int how;
6187
6188 if (sopt_size <= 0 || sopt_size > MCLBYTES) {
6189 return EMSGSIZE;
6190 }
6191
6192 how = sopt->sopt_p != kernproc ? M_WAIT : M_DONTWAIT;
6193 MGET(m, how, MT_DATA);
6194 if (m == NULL) {
6195 return ENOBUFS;
6196 }
6197 if (sopt_size > MLEN) {
6198 MCLGET(m, how);
6199 if ((m->m_flags & M_EXT) == 0) {
6200 m_free(m);
6201 return ENOBUFS;
6202 }
6203 m->m_len = min(MCLBYTES, sopt_size);
6204 } else {
6205 m->m_len = min(MLEN, sopt_size);
6206 }
6207 sopt_size -= m->m_len;
6208 *mp = m;
6209 m_prev = m;
6210
6211 while (sopt_size > 0) {
6212 MGET(m, how, MT_DATA);
6213 if (m == NULL) {
6214 m_freem(*mp);
6215 return ENOBUFS;
6216 }
6217 if (sopt_size > MLEN) {
6218 MCLGET(m, how);
6219 if ((m->m_flags & M_EXT) == 0) {
6220 m_freem(*mp);
6221 m_freem(m);
6222 return ENOBUFS;
6223 }
6224 m->m_len = min(MCLBYTES, sopt_size);
6225 } else {
6226 m->m_len = min(MLEN, sopt_size);
6227 }
6228 sopt_size -= m->m_len;
6229 m_prev->m_next = m;
6230 m_prev = m;
6231 }
6232 return 0;
6233 }
6234
6235 /* copyin sopt data into mbuf chain */
6236 int
6237 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
6238 {
6239 struct mbuf *m0 = m;
6240
6241 if (sopt->sopt_val == USER_ADDR_NULL) {
6242 return 0;
6243 }
6244 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
6245 if (sopt->sopt_p != kernproc) {
6246 int error;
6247
6248 error = copyin(sopt->sopt_val, mtod(m, char *),
6249 m->m_len);
6250 if (error != 0) {
6251 m_freem(m0);
6252 return error;
6253 }
6254 } else {
6255 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val),
6256 mtod(m, char *), m->m_len);
6257 }
6258 sopt->sopt_valsize -= m->m_len;
6259 sopt->sopt_val += m->m_len;
6260 m = m->m_next;
6261 }
6262 /* should be allocated enoughly at ip6_sooptmcopyin() */
6263 if (m != NULL) {
6264 panic("soopt_mcopyin");
6265 /* NOTREACHED */
6266 }
6267 return 0;
6268 }
6269
6270 /* copyout mbuf chain data into soopt */
6271 int
6272 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
6273 {
6274 struct mbuf *m0 = m;
6275 size_t valsize = 0;
6276
6277 if (sopt->sopt_val == USER_ADDR_NULL) {
6278 return 0;
6279 }
6280 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
6281 if (sopt->sopt_p != kernproc) {
6282 int error;
6283
6284 error = copyout(mtod(m, char *), sopt->sopt_val,
6285 m->m_len);
6286 if (error != 0) {
6287 m_freem(m0);
6288 return error;
6289 }
6290 } else {
6291 bcopy(mtod(m, char *),
6292 CAST_DOWN(caddr_t, sopt->sopt_val), m->m_len);
6293 }
6294 sopt->sopt_valsize -= m->m_len;
6295 sopt->sopt_val += m->m_len;
6296 valsize += m->m_len;
6297 m = m->m_next;
6298 }
6299 if (m != NULL) {
6300 /* enough soopt buffer should be given from user-land */
6301 m_freem(m0);
6302 return EINVAL;
6303 }
6304 sopt->sopt_valsize = valsize;
6305 return 0;
6306 }
6307
6308 void
6309 sohasoutofband(struct socket *so)
6310 {
6311 if (so->so_pgid < 0) {
6312 gsignal(-so->so_pgid, SIGURG);
6313 } else if (so->so_pgid > 0) {
6314 proc_signal(so->so_pgid, SIGURG);
6315 }
6316 selwakeup(&so->so_rcv.sb_sel);
6317 if (so->so_rcv.sb_flags & SB_KNOTE) {
6318 KNOTE(&so->so_rcv.sb_sel.si_note,
6319 (NOTE_OOB | SO_FILT_HINT_LOCKED));
6320 }
6321 }
6322
6323 int
6324 sopoll(struct socket *so, int events, kauth_cred_t cred, void * wql)
6325 {
6326 #pragma unused(cred)
6327 struct proc *p = current_proc();
6328 int revents = 0;
6329
6330 socket_lock(so, 1);
6331 so_update_last_owner_locked(so, PROC_NULL);
6332 so_update_policy(so);
6333
6334 if (events & (POLLIN | POLLRDNORM)) {
6335 if (soreadable(so)) {
6336 revents |= events & (POLLIN | POLLRDNORM);
6337 }
6338 }
6339
6340 if (events & (POLLOUT | POLLWRNORM)) {
6341 if (sowriteable(so)) {
6342 revents |= events & (POLLOUT | POLLWRNORM);
6343 }
6344 }
6345
6346 if (events & (POLLPRI | POLLRDBAND)) {
6347 if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) {
6348 revents |= events & (POLLPRI | POLLRDBAND);
6349 }
6350 }
6351
6352 if (revents == 0) {
6353 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
6354 /*
6355 * Darwin sets the flag first,
6356 * BSD calls selrecord first
6357 */
6358 so->so_rcv.sb_flags |= SB_SEL;
6359 selrecord(p, &so->so_rcv.sb_sel, wql);
6360 }
6361
6362 if (events & (POLLOUT | POLLWRNORM)) {
6363 /*
6364 * Darwin sets the flag first,
6365 * BSD calls selrecord first
6366 */
6367 so->so_snd.sb_flags |= SB_SEL;
6368 selrecord(p, &so->so_snd.sb_sel, wql);
6369 }
6370 }
6371
6372 socket_unlock(so, 1);
6373 return revents;
6374 }
6375
6376 int
6377 soo_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
6378 {
6379 struct socket *so = (struct socket *)fp->fp_glob->fg_data;
6380 int result;
6381
6382 socket_lock(so, 1);
6383 so_update_last_owner_locked(so, PROC_NULL);
6384 so_update_policy(so);
6385
6386 switch (kn->kn_filter) {
6387 case EVFILT_READ:
6388 kn->kn_filtid = EVFILTID_SOREAD;
6389 break;
6390 case EVFILT_WRITE:
6391 kn->kn_filtid = EVFILTID_SOWRITE;
6392 break;
6393 case EVFILT_SOCK:
6394 kn->kn_filtid = EVFILTID_SCK;
6395 break;
6396 case EVFILT_EXCEPT:
6397 kn->kn_filtid = EVFILTID_SOEXCEPT;
6398 break;
6399 default:
6400 socket_unlock(so, 1);
6401 knote_set_error(kn, EINVAL);
6402 return 0;
6403 }
6404
6405 /*
6406 * call the appropriate sub-filter attach
6407 * with the socket still locked
6408 */
6409 result = knote_fops(kn)->f_attach(kn, kev);
6410
6411 socket_unlock(so, 1);
6412
6413 return result;
6414 }
6415
6416 static int
6417 filt_soread_common(struct knote *kn, struct kevent_qos_s *kev, struct socket *so)
6418 {
6419 int retval = 0;
6420 int64_t data = 0;
6421
6422 if (so->so_options & SO_ACCEPTCONN) {
6423 /*
6424 * Radar 6615193 handle the listen case dynamically
6425 * for kqueue read filter. This allows to call listen()
6426 * after registering the kqueue EVFILT_READ.
6427 */
6428
6429 retval = !TAILQ_EMPTY(&so->so_comp);
6430 data = so->so_qlen;
6431 goto out;
6432 }
6433
6434 /* socket isn't a listener */
6435 /*
6436 * NOTE_LOWAT specifies new low water mark in data, i.e.
6437 * the bytes of protocol data. We therefore exclude any
6438 * control bytes.
6439 */
6440 data = so->so_rcv.sb_cc - so->so_rcv.sb_ctl;
6441
6442 if (kn->kn_sfflags & NOTE_OOB) {
6443 if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) {
6444 kn->kn_fflags |= NOTE_OOB;
6445 data -= so->so_oobmark;
6446 retval = 1;
6447 goto out;
6448 }
6449 }
6450
6451 if ((so->so_state & SS_CANTRCVMORE)
6452 #if CONTENT_FILTER
6453 && cfil_sock_data_pending(&so->so_rcv) == 0
6454 #endif /* CONTENT_FILTER */
6455 ) {
6456 kn->kn_flags |= EV_EOF;
6457 kn->kn_fflags = so->so_error;
6458 retval = 1;
6459 goto out;
6460 }
6461
6462 if (so->so_error) { /* temporary udp error */
6463 retval = 1;
6464 goto out;
6465 }
6466
6467 int64_t lowwat = so->so_rcv.sb_lowat;
6468 /*
6469 * Ensure that when NOTE_LOWAT is used, the derived
6470 * low water mark is bounded by socket's rcv buf's
6471 * high and low water mark values.
6472 */
6473 if (kn->kn_sfflags & NOTE_LOWAT) {
6474 if (kn->kn_sdata > so->so_rcv.sb_hiwat) {
6475 lowwat = so->so_rcv.sb_hiwat;
6476 } else if (kn->kn_sdata > lowwat) {
6477 lowwat = kn->kn_sdata;
6478 }
6479 }
6480
6481 /*
6482 * While the `data` field is the amount of data to read,
6483 * 0-sized packets need to wake up the kqueue, see 58140856,
6484 * so we need to take control bytes into account too.
6485 */
6486 retval = (so->so_rcv.sb_cc >= lowwat);
6487
6488 out:
6489 if (retval && kev) {
6490 knote_fill_kevent(kn, kev, data);
6491 }
6492 return retval;
6493 }
6494
6495 static int
6496 filt_sorattach(struct knote *kn, __unused struct kevent_qos_s *kev)
6497 {
6498 struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data;
6499
6500 /* socket locked */
6501
6502 /*
6503 * If the caller explicitly asked for OOB results (e.g. poll())
6504 * from EVFILT_READ, then save that off in the hookid field
6505 * and reserve the kn_flags EV_OOBAND bit for output only.
6506 */
6507 if (kn->kn_filter == EVFILT_READ &&
6508 kn->kn_flags & EV_OOBAND) {
6509 kn->kn_flags &= ~EV_OOBAND;
6510 kn->kn_hook32 = EV_OOBAND;
6511 } else {
6512 kn->kn_hook32 = 0;
6513 }
6514 if (KNOTE_ATTACH(&so->so_rcv.sb_sel.si_note, kn)) {
6515 so->so_rcv.sb_flags |= SB_KNOTE;
6516 }
6517
6518 /* indicate if event is already fired */
6519 return filt_soread_common(kn, NULL, so);
6520 }
6521
6522 static void
6523 filt_sordetach(struct knote *kn)
6524 {
6525 struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data;
6526
6527 socket_lock(so, 1);
6528 if (so->so_rcv.sb_flags & SB_KNOTE) {
6529 if (KNOTE_DETACH(&so->so_rcv.sb_sel.si_note, kn)) {
6530 so->so_rcv.sb_flags &= ~SB_KNOTE;
6531 }
6532 }
6533 socket_unlock(so, 1);
6534 }
6535
6536 /*ARGSUSED*/
6537 static int
6538 filt_soread(struct knote *kn, long hint)
6539 {
6540 struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data;
6541 int retval;
6542
6543 if ((hint & SO_FILT_HINT_LOCKED) == 0) {
6544 socket_lock(so, 1);
6545 }
6546
6547 retval = filt_soread_common(kn, NULL, so);
6548
6549 if ((hint & SO_FILT_HINT_LOCKED) == 0) {
6550 socket_unlock(so, 1);
6551 }
6552
6553 return retval;
6554 }
6555
6556 static int
6557 filt_sortouch(struct knote *kn, struct kevent_qos_s *kev)
6558 {
6559 struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data;
6560 int retval;
6561
6562 socket_lock(so, 1);
6563
6564 /* save off the new input fflags and data */
6565 kn->kn_sfflags = kev->fflags;
6566 kn->kn_sdata = kev->data;
6567
6568 /* determine if changes result in fired events */
6569 retval = filt_soread_common(kn, NULL, so);
6570
6571 socket_unlock(so, 1);
6572
6573 return retval;
6574 }
6575
6576 static int
6577 filt_sorprocess(struct knote *kn, struct kevent_qos_s *kev)
6578 {
6579 struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data;
6580 int retval;
6581
6582 socket_lock(so, 1);
6583 retval = filt_soread_common(kn, kev, so);
6584 socket_unlock(so, 1);
6585
6586 return retval;
6587 }
6588
6589 int
6590 so_wait_for_if_feedback(struct socket *so)
6591 {
6592 if ((SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) &&
6593 (so->so_state & SS_ISCONNECTED)) {
6594 struct inpcb *inp = sotoinpcb(so);
6595 if (INP_WAIT_FOR_IF_FEEDBACK(inp)) {
6596 return 1;
6597 }
6598 }
6599 return 0;
6600 }
6601
6602 static int
6603 filt_sowrite_common(struct knote *kn, struct kevent_qos_s *kev, struct socket *so)
6604 {
6605 int ret = 0;
6606 int64_t data = sbspace(&so->so_snd);
6607
6608 if (so->so_state & SS_CANTSENDMORE) {
6609 kn->kn_flags |= EV_EOF;
6610 kn->kn_fflags = so->so_error;
6611 ret = 1;
6612 goto out;
6613 }
6614
6615 if (so->so_error) { /* temporary udp error */
6616 ret = 1;
6617 goto out;
6618 }
6619
6620 if (!socanwrite(so)) {
6621 ret = 0;
6622 goto out;
6623 }
6624
6625 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
6626 ret = 1;
6627 goto out;
6628 }
6629
6630 int64_t lowwat = so->so_snd.sb_lowat;
6631
6632 if (kn->kn_sfflags & NOTE_LOWAT) {
6633 if (kn->kn_sdata > so->so_snd.sb_hiwat) {
6634 lowwat = so->so_snd.sb_hiwat;
6635 } else if (kn->kn_sdata > lowwat) {
6636 lowwat = kn->kn_sdata;
6637 }
6638 }
6639
6640 if (data >= lowwat) {
6641 if ((so->so_flags & SOF_NOTSENT_LOWAT)
6642 #if (DEBUG || DEVELOPMENT)
6643 && so_notsent_lowat_check == 1
6644 #endif /* DEBUG || DEVELOPMENT */
6645 ) {
6646 if ((SOCK_DOM(so) == PF_INET ||
6647 SOCK_DOM(so) == PF_INET6) &&
6648 so->so_type == SOCK_STREAM) {
6649 ret = tcp_notsent_lowat_check(so);
6650 }
6651 #if MPTCP
6652 else if ((SOCK_DOM(so) == PF_MULTIPATH) &&
6653 (SOCK_PROTO(so) == IPPROTO_TCP)) {
6654 ret = mptcp_notsent_lowat_check(so);
6655 }
6656 #endif
6657 else {
6658 ret = 1;
6659 goto out;
6660 }
6661 } else {
6662 ret = 1;
6663 }
6664 }
6665 if (so_wait_for_if_feedback(so)) {
6666 ret = 0;
6667 }
6668
6669 out:
6670 if (ret && kev) {
6671 knote_fill_kevent(kn, kev, data);
6672 }
6673 return ret;
6674 }
6675
6676 static int
6677 filt_sowattach(struct knote *kn, __unused struct kevent_qos_s *kev)
6678 {
6679 struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data;
6680
6681 /* socket locked */
6682 if (KNOTE_ATTACH(&so->so_snd.sb_sel.si_note, kn)) {
6683 so->so_snd.sb_flags |= SB_KNOTE;
6684 }
6685
6686 /* determine if its already fired */
6687 return filt_sowrite_common(kn, NULL, so);
6688 }
6689
6690 static void
6691 filt_sowdetach(struct knote *kn)
6692 {
6693 struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data;
6694 socket_lock(so, 1);
6695
6696 if (so->so_snd.sb_flags & SB_KNOTE) {
6697 if (KNOTE_DETACH(&so->so_snd.sb_sel.si_note, kn)) {
6698 so->so_snd.sb_flags &= ~SB_KNOTE;
6699 }
6700 }
6701 socket_unlock(so, 1);
6702 }
6703
6704 /*ARGSUSED*/
6705 static int
6706 filt_sowrite(struct knote *kn, long hint)
6707 {
6708 struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data;
6709 int ret;
6710
6711 if ((hint & SO_FILT_HINT_LOCKED) == 0) {
6712 socket_lock(so, 1);
6713 }
6714
6715 ret = filt_sowrite_common(kn, NULL, so);
6716
6717 if ((hint & SO_FILT_HINT_LOCKED) == 0) {
6718 socket_unlock(so, 1);
6719 }
6720
6721 return ret;
6722 }
6723
6724 static int
6725 filt_sowtouch(struct knote *kn, struct kevent_qos_s *kev)
6726 {
6727 struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data;
6728 int ret;
6729
6730 socket_lock(so, 1);
6731
6732 /*save off the new input fflags and data */
6733 kn->kn_sfflags = kev->fflags;
6734 kn->kn_sdata = kev->data;
6735
6736 /* determine if these changes result in a triggered event */
6737 ret = filt_sowrite_common(kn, NULL, so);
6738
6739 socket_unlock(so, 1);
6740
6741 return ret;
6742 }
6743
6744 static int
6745 filt_sowprocess(struct knote *kn, struct kevent_qos_s *kev)
6746 {
6747 struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data;
6748 int ret;
6749
6750 socket_lock(so, 1);
6751 ret = filt_sowrite_common(kn, kev, so);
6752 socket_unlock(so, 1);
6753
6754 return ret;
6755 }
6756
6757 static int
6758 filt_sockev_common(struct knote *kn, struct kevent_qos_s *kev,
6759 struct socket *so, long ev_hint)
6760 {
6761 int ret = 0;
6762 int64_t data = 0;
6763 uint32_t level_trigger = 0;
6764
6765 if (ev_hint & SO_FILT_HINT_CONNRESET) {
6766 kn->kn_fflags |= NOTE_CONNRESET;
6767 }
6768 if (ev_hint & SO_FILT_HINT_TIMEOUT) {
6769 kn->kn_fflags |= NOTE_TIMEOUT;
6770 }
6771 if (ev_hint & SO_FILT_HINT_NOSRCADDR) {
6772 kn->kn_fflags |= NOTE_NOSRCADDR;
6773 }
6774 if (ev_hint & SO_FILT_HINT_IFDENIED) {
6775 kn->kn_fflags |= NOTE_IFDENIED;
6776 }
6777 if (ev_hint & SO_FILT_HINT_KEEPALIVE) {
6778 kn->kn_fflags |= NOTE_KEEPALIVE;
6779 }
6780 if (ev_hint & SO_FILT_HINT_ADAPTIVE_WTIMO) {
6781 kn->kn_fflags |= NOTE_ADAPTIVE_WTIMO;
6782 }
6783 if (ev_hint & SO_FILT_HINT_ADAPTIVE_RTIMO) {
6784 kn->kn_fflags |= NOTE_ADAPTIVE_RTIMO;
6785 }
6786 if ((ev_hint & SO_FILT_HINT_CONNECTED) ||
6787 (so->so_state & SS_ISCONNECTED)) {
6788 kn->kn_fflags |= NOTE_CONNECTED;
6789 level_trigger |= NOTE_CONNECTED;
6790 }
6791 if ((ev_hint & SO_FILT_HINT_DISCONNECTED) ||
6792 (so->so_state & SS_ISDISCONNECTED)) {
6793 kn->kn_fflags |= NOTE_DISCONNECTED;
6794 level_trigger |= NOTE_DISCONNECTED;
6795 }
6796 if (ev_hint & SO_FILT_HINT_CONNINFO_UPDATED) {
6797 if (so->so_proto != NULL &&
6798 (so->so_proto->pr_flags & PR_EVCONNINFO)) {
6799 kn->kn_fflags |= NOTE_CONNINFO_UPDATED;
6800 }
6801 }
6802
6803 if ((ev_hint & SO_FILT_HINT_NOTIFY_ACK) ||
6804 tcp_notify_ack_active(so)) {
6805 kn->kn_fflags |= NOTE_NOTIFY_ACK;
6806 }
6807
6808 if ((so->so_state & SS_CANTRCVMORE)
6809 #if CONTENT_FILTER
6810 && cfil_sock_data_pending(&so->so_rcv) == 0
6811 #endif /* CONTENT_FILTER */
6812 ) {
6813 kn->kn_fflags |= NOTE_READCLOSED;
6814 level_trigger |= NOTE_READCLOSED;
6815 }
6816
6817 if (so->so_state & SS_CANTSENDMORE) {
6818 kn->kn_fflags |= NOTE_WRITECLOSED;
6819 level_trigger |= NOTE_WRITECLOSED;
6820 }
6821
6822 if ((ev_hint & SO_FILT_HINT_SUSPEND) ||
6823 (so->so_flags & SOF_SUSPENDED)) {
6824 kn->kn_fflags &= ~(NOTE_SUSPEND | NOTE_RESUME);
6825
6826 /* If resume event was delivered before, reset it */
6827 kn->kn_hook32 &= ~NOTE_RESUME;
6828
6829 kn->kn_fflags |= NOTE_SUSPEND;
6830 level_trigger |= NOTE_SUSPEND;
6831 }
6832
6833 if ((ev_hint & SO_FILT_HINT_RESUME) ||
6834 (so->so_flags & SOF_SUSPENDED) == 0) {
6835 kn->kn_fflags &= ~(NOTE_SUSPEND | NOTE_RESUME);
6836
6837 /* If suspend event was delivered before, reset it */
6838 kn->kn_hook32 &= ~NOTE_SUSPEND;
6839
6840 kn->kn_fflags |= NOTE_RESUME;
6841 level_trigger |= NOTE_RESUME;
6842 }
6843
6844 if (so->so_error != 0) {
6845 ret = 1;
6846 data = so->so_error;
6847 kn->kn_flags |= EV_EOF;
6848 } else {
6849 u_int32_t data32 = 0;
6850 get_sockev_state(so, &data32);
6851 data = data32;
6852 }
6853
6854 /* Reset any events that are not requested on this knote */
6855 kn->kn_fflags &= (kn->kn_sfflags & EVFILT_SOCK_ALL_MASK);
6856 level_trigger &= (kn->kn_sfflags & EVFILT_SOCK_ALL_MASK);
6857
6858 /* Find the level triggerred events that are already delivered */
6859 level_trigger &= kn->kn_hook32;
6860 level_trigger &= EVFILT_SOCK_LEVEL_TRIGGER_MASK;
6861
6862 /* Do not deliver level triggerred events more than once */
6863 if ((kn->kn_fflags & ~level_trigger) != 0) {
6864 ret = 1;
6865 }
6866
6867 if (ret && kev) {
6868 /*
6869 * Store the state of the events being delivered. This
6870 * state can be used to deliver level triggered events
6871 * ateast once and still avoid waking up the application
6872 * multiple times as long as the event is active.
6873 */
6874 if (kn->kn_fflags != 0) {
6875 kn->kn_hook32 |= (kn->kn_fflags &
6876 EVFILT_SOCK_LEVEL_TRIGGER_MASK);
6877 }
6878
6879 /*
6880 * NOTE_RESUME and NOTE_SUSPEND are an exception, deliver
6881 * only one of them and remember the last one that was
6882 * delivered last
6883 */
6884 if (kn->kn_fflags & NOTE_SUSPEND) {
6885 kn->kn_hook32 &= ~NOTE_RESUME;
6886 }
6887 if (kn->kn_fflags & NOTE_RESUME) {
6888 kn->kn_hook32 &= ~NOTE_SUSPEND;
6889 }
6890
6891 knote_fill_kevent(kn, kev, data);
6892 }
6893 return ret;
6894 }
6895
6896 static int
6897 filt_sockattach(struct knote *kn, __unused struct kevent_qos_s *kev)
6898 {
6899 struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data;
6900
6901 /* socket locked */
6902 kn->kn_hook32 = 0;
6903 if (KNOTE_ATTACH(&so->so_klist, kn)) {
6904 so->so_flags |= SOF_KNOTE;
6905 }
6906
6907 /* determine if event already fired */
6908 return filt_sockev_common(kn, NULL, so, 0);
6909 }
6910
6911 static void
6912 filt_sockdetach(struct knote *kn)
6913 {
6914 struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data;
6915 socket_lock(so, 1);
6916
6917 if ((so->so_flags & SOF_KNOTE) != 0) {
6918 if (KNOTE_DETACH(&so->so_klist, kn)) {
6919 so->so_flags &= ~SOF_KNOTE;
6920 }
6921 }
6922 socket_unlock(so, 1);
6923 }
6924
6925 static int
6926 filt_sockev(struct knote *kn, long hint)
6927 {
6928 int ret = 0, locked = 0;
6929 struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data;
6930 long ev_hint = (hint & SO_FILT_HINT_EV);
6931
6932 if ((hint & SO_FILT_HINT_LOCKED) == 0) {
6933 socket_lock(so, 1);
6934 locked = 1;
6935 }
6936
6937 ret = filt_sockev_common(kn, NULL, so, ev_hint);
6938
6939 if (locked) {
6940 socket_unlock(so, 1);
6941 }
6942
6943 return ret;
6944 }
6945
6946
6947
6948 /*
6949 * filt_socktouch - update event state
6950 */
6951 static int
6952 filt_socktouch(
6953 struct knote *kn,
6954 struct kevent_qos_s *kev)
6955 {
6956 struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data;
6957 uint32_t changed_flags;
6958 int ret;
6959
6960 socket_lock(so, 1);
6961
6962 /* save off the [result] data and fflags */
6963 changed_flags = (kn->kn_sfflags ^ kn->kn_hook32);
6964
6965 /* save off the new input fflags and data */
6966 kn->kn_sfflags = kev->fflags;
6967 kn->kn_sdata = kev->data;
6968
6969 /* restrict the current results to the (smaller?) set of new interest */
6970 /*
6971 * For compatibility with previous implementations, we leave kn_fflags
6972 * as they were before.
6973 */
6974 //kn->kn_fflags &= kev->fflags;
6975
6976 /*
6977 * Since we keep track of events that are already
6978 * delivered, if any of those events are not requested
6979 * anymore the state related to them can be reset
6980 */
6981 kn->kn_hook32 &= ~(changed_flags & EVFILT_SOCK_LEVEL_TRIGGER_MASK);
6982
6983 /* determine if we have events to deliver */
6984 ret = filt_sockev_common(kn, NULL, so, 0);
6985
6986 socket_unlock(so, 1);
6987
6988 return ret;
6989 }
6990
6991 /*
6992 * filt_sockprocess - query event fired state and return data
6993 */
6994 static int
6995 filt_sockprocess(struct knote *kn, struct kevent_qos_s *kev)
6996 {
6997 struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data;
6998 int ret = 0;
6999
7000 socket_lock(so, 1);
7001
7002 ret = filt_sockev_common(kn, kev, so, 0);
7003
7004 socket_unlock(so, 1);
7005
7006 return ret;
7007 }
7008
7009 void
7010 get_sockev_state(struct socket *so, u_int32_t *statep)
7011 {
7012 u_int32_t state = *(statep);
7013
7014 /*
7015 * If the state variable is already used by a previous event,
7016 * reset it.
7017 */
7018 if (state != 0) {
7019 return;
7020 }
7021
7022 if (so->so_state & SS_ISCONNECTED) {
7023 state |= SOCKEV_CONNECTED;
7024 } else {
7025 state &= ~(SOCKEV_CONNECTED);
7026 }
7027 state |= ((so->so_state & SS_ISDISCONNECTED) ? SOCKEV_DISCONNECTED : 0);
7028 *(statep) = state;
7029 }
7030
7031 #define SO_LOCK_HISTORY_STR_LEN \
7032 (2 * SO_LCKDBG_MAX * (2 + (2 * sizeof (void *)) + 1) + 1)
7033
7034 __private_extern__ const char *
7035 solockhistory_nr(struct socket *so)
7036 {
7037 size_t n = 0;
7038 int i;
7039 static char lock_history_str[SO_LOCK_HISTORY_STR_LEN];
7040
7041 bzero(lock_history_str, sizeof(lock_history_str));
7042 for (i = SO_LCKDBG_MAX - 1; i >= 0; i--) {
7043 n += scnprintf(lock_history_str + n,
7044 SO_LOCK_HISTORY_STR_LEN - n, "%p:%p ",
7045 so->lock_lr[(so->next_lock_lr + i) % SO_LCKDBG_MAX],
7046 so->unlock_lr[(so->next_unlock_lr + i) % SO_LCKDBG_MAX]);
7047 }
7048 return lock_history_str;
7049 }
7050
7051 lck_mtx_t *
7052 socket_getlock(struct socket *so, int flags)
7053 {
7054 if (so->so_proto->pr_getlock != NULL) {
7055 return (*so->so_proto->pr_getlock)(so, flags);
7056 } else {
7057 return so->so_proto->pr_domain->dom_mtx;
7058 }
7059 }
7060
7061 void
7062 socket_lock(struct socket *so, int refcount)
7063 {
7064 void *lr_saved;
7065
7066 lr_saved = __builtin_return_address(0);
7067
7068 if (so->so_proto->pr_lock) {
7069 (*so->so_proto->pr_lock)(so, refcount, lr_saved);
7070 } else {
7071 #ifdef MORE_LOCKING_DEBUG
7072 LCK_MTX_ASSERT(so->so_proto->pr_domain->dom_mtx,
7073 LCK_MTX_ASSERT_NOTOWNED);
7074 #endif
7075 lck_mtx_lock(so->so_proto->pr_domain->dom_mtx);
7076 if (refcount) {
7077 so->so_usecount++;
7078 }
7079 so->lock_lr[so->next_lock_lr] = lr_saved;
7080 so->next_lock_lr = (so->next_lock_lr + 1) % SO_LCKDBG_MAX;
7081 }
7082 }
7083
7084 void
7085 socket_lock_assert_owned(struct socket *so)
7086 {
7087 lck_mtx_t *mutex_held;
7088
7089 if (so->so_proto->pr_getlock != NULL) {
7090 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
7091 } else {
7092 mutex_held = so->so_proto->pr_domain->dom_mtx;
7093 }
7094
7095 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
7096 }
7097
7098 int
7099 socket_try_lock(struct socket *so)
7100 {
7101 lck_mtx_t *mtx;
7102
7103 if (so->so_proto->pr_getlock != NULL) {
7104 mtx = (*so->so_proto->pr_getlock)(so, 0);
7105 } else {
7106 mtx = so->so_proto->pr_domain->dom_mtx;
7107 }
7108
7109 return lck_mtx_try_lock(mtx);
7110 }
7111
7112 void
7113 socket_unlock(struct socket *so, int refcount)
7114 {
7115 void *lr_saved;
7116 lck_mtx_t *mutex_held;
7117
7118 lr_saved = __builtin_return_address(0);
7119
7120 if (so == NULL || so->so_proto == NULL) {
7121 panic("%s: null so_proto so=%p\n", __func__, so);
7122 /* NOTREACHED */
7123 }
7124
7125 if (so->so_proto->pr_unlock) {
7126 (*so->so_proto->pr_unlock)(so, refcount, lr_saved);
7127 } else {
7128 mutex_held = so->so_proto->pr_domain->dom_mtx;
7129 #ifdef MORE_LOCKING_DEBUG
7130 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
7131 #endif
7132 so->unlock_lr[so->next_unlock_lr] = lr_saved;
7133 so->next_unlock_lr = (so->next_unlock_lr + 1) % SO_LCKDBG_MAX;
7134
7135 if (refcount) {
7136 if (so->so_usecount <= 0) {
7137 panic("%s: bad refcount=%d so=%p (%d, %d, %d) "
7138 "lrh=%s", __func__, so->so_usecount, so,
7139 SOCK_DOM(so), so->so_type,
7140 SOCK_PROTO(so), solockhistory_nr(so));
7141 /* NOTREACHED */
7142 }
7143
7144 so->so_usecount--;
7145 if (so->so_usecount == 0) {
7146 sofreelastref(so, 1);
7147 }
7148 }
7149 lck_mtx_unlock(mutex_held);
7150 }
7151 }
7152
7153 /* Called with socket locked, will unlock socket */
7154 void
7155 sofree(struct socket *so)
7156 {
7157 lck_mtx_t *mutex_held;
7158
7159 if (so->so_proto->pr_getlock != NULL) {
7160 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
7161 } else {
7162 mutex_held = so->so_proto->pr_domain->dom_mtx;
7163 }
7164 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
7165
7166 sofreelastref(so, 0);
7167 }
7168
7169 void
7170 soreference(struct socket *so)
7171 {
7172 socket_lock(so, 1); /* locks & take one reference on socket */
7173 socket_unlock(so, 0); /* unlock only */
7174 }
7175
7176 void
7177 sodereference(struct socket *so)
7178 {
7179 socket_lock(so, 0);
7180 socket_unlock(so, 1);
7181 }
7182
7183 /*
7184 * Set or clear SOF_MULTIPAGES on the socket to enable or disable the
7185 * possibility of using jumbo clusters. Caller must ensure to hold
7186 * the socket lock.
7187 */
7188 void
7189 somultipages(struct socket *so, boolean_t set)
7190 {
7191 if (set) {
7192 so->so_flags |= SOF_MULTIPAGES;
7193 } else {
7194 so->so_flags &= ~SOF_MULTIPAGES;
7195 }
7196 }
7197
7198 void
7199 soif2kcl(struct socket *so, boolean_t set)
7200 {
7201 if (set) {
7202 so->so_flags1 |= SOF1_IF_2KCL;
7203 } else {
7204 so->so_flags1 &= ~SOF1_IF_2KCL;
7205 }
7206 }
7207
7208 int
7209 so_isdstlocal(struct socket *so)
7210 {
7211 struct inpcb *inp = (struct inpcb *)so->so_pcb;
7212
7213 if (SOCK_DOM(so) == PF_INET) {
7214 return inaddr_local(inp->inp_faddr);
7215 } else if (SOCK_DOM(so) == PF_INET6) {
7216 return in6addr_local(&inp->in6p_faddr);
7217 }
7218
7219 return 0;
7220 }
7221
7222 int
7223 sosetdefunct(struct proc *p, struct socket *so, int level, boolean_t noforce)
7224 {
7225 struct sockbuf *rcv, *snd;
7226 int err = 0, defunct;
7227
7228 rcv = &so->so_rcv;
7229 snd = &so->so_snd;
7230
7231 defunct = (so->so_flags & SOF_DEFUNCT);
7232 if (defunct) {
7233 if (!(snd->sb_flags & rcv->sb_flags & SB_DROP)) {
7234 panic("%s: SB_DROP not set", __func__);
7235 /* NOTREACHED */
7236 }
7237 goto done;
7238 }
7239
7240 if (so->so_flags & SOF_NODEFUNCT) {
7241 if (noforce) {
7242 err = EOPNOTSUPP;
7243 if (p != PROC_NULL) {
7244 SODEFUNCTLOG("%s[%d, %s]: (target pid %d "
7245 "name %s level %d) so 0x%llx [%d,%d] "
7246 "is not eligible for defunct "
7247 "(%d)\n", __func__, proc_selfpid(),
7248 proc_best_name(current_proc()), proc_pid(p),
7249 proc_best_name(p), level,
7250 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
7251 SOCK_DOM(so), SOCK_TYPE(so), err);
7252 }
7253 return err;
7254 }
7255 so->so_flags &= ~SOF_NODEFUNCT;
7256 if (p != PROC_NULL) {
7257 SODEFUNCTLOG("%s[%d, %s]: (target pid %d "
7258 "name %s level %d) so 0x%llx [%d,%d] "
7259 "defunct by force "
7260 "(%d)\n", __func__, proc_selfpid(),
7261 proc_best_name(current_proc()), proc_pid(p),
7262 proc_best_name(p), level,
7263 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
7264 SOCK_DOM(so), SOCK_TYPE(so), err);
7265 }
7266 } else if (so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) {
7267 struct inpcb *inp = (struct inpcb *)so->so_pcb;
7268 struct ifnet *ifp = inp->inp_last_outifp;
7269
7270 if (ifp && IFNET_IS_CELLULAR(ifp)) {
7271 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_nocell);
7272 } else if (so->so_flags & SOF_DELEGATED) {
7273 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_nodlgtd);
7274 } else if (soextbkidlestat.so_xbkidle_time == 0) {
7275 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_notime);
7276 } else if (noforce && p != PROC_NULL) {
7277 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_active);
7278
7279 so->so_flags1 |= SOF1_EXTEND_BK_IDLE_INPROG;
7280 so->so_extended_bk_start = net_uptime();
7281 OSBitOrAtomic(P_LXBKIDLEINPROG, &p->p_ladvflag);
7282
7283 inpcb_timer_sched(inp->inp_pcbinfo, INPCB_TIMER_LAZY);
7284
7285 err = EOPNOTSUPP;
7286 SODEFUNCTLOG("%s[%d, %s]: (target pid %d "
7287 "name %s level %d) so 0x%llx [%d,%d] "
7288 "extend bk idle "
7289 "(%d)\n", __func__, proc_selfpid(),
7290 proc_best_name(current_proc()), proc_pid(p),
7291 proc_best_name(p), level,
7292 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
7293 SOCK_DOM(so), SOCK_TYPE(so), err);
7294 return err;
7295 } else {
7296 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_forced);
7297 }
7298 }
7299
7300 so->so_flags |= SOF_DEFUNCT;
7301
7302 /* Prevent further data from being appended to the socket buffers */
7303 snd->sb_flags |= SB_DROP;
7304 rcv->sb_flags |= SB_DROP;
7305
7306 /* Flush any existing data in the socket buffers */
7307 if (rcv->sb_cc != 0) {
7308 rcv->sb_flags &= ~SB_SEL;
7309 selthreadclear(&rcv->sb_sel);
7310 sbrelease(rcv);
7311 }
7312 if (snd->sb_cc != 0) {
7313 snd->sb_flags &= ~SB_SEL;
7314 selthreadclear(&snd->sb_sel);
7315 sbrelease(snd);
7316 }
7317
7318 done:
7319 if (p != PROC_NULL) {
7320 SODEFUNCTLOG("%s[%d, %s]: (target pid %d name %s level %d) "
7321 "so 0x%llx [%d,%d] %s defunct%s\n", __func__,
7322 proc_selfpid(), proc_best_name(current_proc()),
7323 proc_pid(p), proc_best_name(p), level,
7324 (uint64_t)DEBUG_KERNEL_ADDRPERM(so), SOCK_DOM(so),
7325 SOCK_TYPE(so), defunct ? "is already" : "marked as",
7326 (so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) ?
7327 " extbkidle" : "");
7328 }
7329 return err;
7330 }
7331
7332 int
7333 sodefunct(struct proc *p, struct socket *so, int level)
7334 {
7335 struct sockbuf *rcv, *snd;
7336
7337 if (!(so->so_flags & SOF_DEFUNCT)) {
7338 panic("%s improperly called", __func__);
7339 /* NOTREACHED */
7340 }
7341 if (so->so_state & SS_DEFUNCT) {
7342 goto done;
7343 }
7344
7345 rcv = &so->so_rcv;
7346 snd = &so->so_snd;
7347
7348 if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
7349 char s[MAX_IPv6_STR_LEN];
7350 char d[MAX_IPv6_STR_LEN];
7351 struct inpcb *inp = sotoinpcb(so);
7352
7353 if (p != PROC_NULL) {
7354 SODEFUNCTLOG(
7355 "%s[%d, %s]: (target pid %d name %s level %d) "
7356 "so 0x%llx [%s %s:%d -> %s:%d] is now defunct "
7357 "[rcv_si 0x%x, snd_si 0x%x, rcv_fl 0x%x, "
7358 " snd_fl 0x%x]\n", __func__,
7359 proc_selfpid(), proc_best_name(current_proc()),
7360 proc_pid(p), proc_best_name(p), level,
7361 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
7362 (SOCK_TYPE(so) == SOCK_STREAM) ? "TCP" : "UDP",
7363 inet_ntop(SOCK_DOM(so), ((SOCK_DOM(so) == PF_INET) ?
7364 (void *)&inp->inp_laddr.s_addr :
7365 (void *)&inp->in6p_laddr),
7366 s, sizeof(s)), ntohs(inp->in6p_lport),
7367 inet_ntop(SOCK_DOM(so), (SOCK_DOM(so) == PF_INET) ?
7368 (void *)&inp->inp_faddr.s_addr :
7369 (void *)&inp->in6p_faddr,
7370 d, sizeof(d)), ntohs(inp->in6p_fport),
7371 (uint32_t)rcv->sb_sel.si_flags,
7372 (uint32_t)snd->sb_sel.si_flags,
7373 rcv->sb_flags, snd->sb_flags);
7374 }
7375 } else if (p != PROC_NULL) {
7376 SODEFUNCTLOG("%s[%d, %s]: (target pid %d name %s level %d) "
7377 "so 0x%llx [%d,%d] is now defunct [rcv_si 0x%x, "
7378 "snd_si 0x%x, rcv_fl 0x%x, snd_fl 0x%x]\n", __func__,
7379 proc_selfpid(), proc_best_name(current_proc()),
7380 proc_pid(p), proc_best_name(p), level,
7381 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
7382 SOCK_DOM(so), SOCK_TYPE(so),
7383 (uint32_t)rcv->sb_sel.si_flags,
7384 (uint32_t)snd->sb_sel.si_flags, rcv->sb_flags,
7385 snd->sb_flags);
7386 }
7387
7388 /*
7389 * Unwedge threads blocked on sbwait() and sb_lock().
7390 */
7391 sbwakeup(rcv);
7392 sbwakeup(snd);
7393
7394 so->so_flags1 |= SOF1_DEFUNCTINPROG;
7395 if (rcv->sb_flags & SB_LOCK) {
7396 sbunlock(rcv, TRUE); /* keep socket locked */
7397 }
7398 if (snd->sb_flags & SB_LOCK) {
7399 sbunlock(snd, TRUE); /* keep socket locked */
7400 }
7401 /*
7402 * Flush the buffers and disconnect. We explicitly call shutdown
7403 * on both data directions to ensure that SS_CANT{RCV,SEND}MORE
7404 * states are set for the socket. This would also flush out data
7405 * hanging off the receive list of this socket.
7406 */
7407 (void) soshutdownlock_final(so, SHUT_RD);
7408 (void) soshutdownlock_final(so, SHUT_WR);
7409 (void) sodisconnectlocked(so);
7410
7411 /*
7412 * Explicitly handle connectionless-protocol disconnection
7413 * and release any remaining data in the socket buffers.
7414 */
7415 if (!(so->so_state & SS_ISDISCONNECTED)) {
7416 (void) soisdisconnected(so);
7417 }
7418
7419 if (so->so_error == 0) {
7420 so->so_error = EBADF;
7421 }
7422
7423 if (rcv->sb_cc != 0) {
7424 rcv->sb_flags &= ~SB_SEL;
7425 selthreadclear(&rcv->sb_sel);
7426 sbrelease(rcv);
7427 }
7428 if (snd->sb_cc != 0) {
7429 snd->sb_flags &= ~SB_SEL;
7430 selthreadclear(&snd->sb_sel);
7431 sbrelease(snd);
7432 }
7433 so->so_state |= SS_DEFUNCT;
7434 OSIncrementAtomicLong((volatile long *)&sodefunct_calls);
7435
7436 done:
7437 return 0;
7438 }
7439
7440 int
7441 soresume(struct proc *p, struct socket *so, int locked)
7442 {
7443 if (locked == 0) {
7444 socket_lock(so, 1);
7445 }
7446
7447 if (so->so_flags1 & SOF1_EXTEND_BK_IDLE_INPROG) {
7448 SODEFUNCTLOG("%s[%d, %s]: (target pid %d name %s) so 0x%llx "
7449 "[%d,%d] resumed from bk idle\n",
7450 __func__, proc_selfpid(), proc_best_name(current_proc()),
7451 proc_pid(p), proc_best_name(p),
7452 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
7453 SOCK_DOM(so), SOCK_TYPE(so));
7454
7455 so->so_flags1 &= ~SOF1_EXTEND_BK_IDLE_INPROG;
7456 so->so_extended_bk_start = 0;
7457 OSBitAndAtomic(~P_LXBKIDLEINPROG, &p->p_ladvflag);
7458
7459 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_resumed);
7460 OSDecrementAtomic(&soextbkidlestat.so_xbkidle_active);
7461 VERIFY(soextbkidlestat.so_xbkidle_active >= 0);
7462 }
7463 if (locked == 0) {
7464 socket_unlock(so, 1);
7465 }
7466
7467 return 0;
7468 }
7469
7470 /*
7471 * Does not attempt to account for sockets that are delegated from
7472 * the current process
7473 */
7474 int
7475 so_set_extended_bk_idle(struct socket *so, int optval)
7476 {
7477 int error = 0;
7478
7479 if ((SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) ||
7480 SOCK_PROTO(so) != IPPROTO_TCP) {
7481 OSDecrementAtomic(&soextbkidlestat.so_xbkidle_notsupp);
7482 error = EOPNOTSUPP;
7483 } else if (optval == 0) {
7484 so->so_flags1 &= ~SOF1_EXTEND_BK_IDLE_WANTED;
7485
7486 soresume(current_proc(), so, 1);
7487 } else {
7488 struct proc *p = current_proc();
7489 struct fileproc *fp;
7490 int count = 0;
7491
7492 /*
7493 * Unlock socket to avoid lock ordering issue with
7494 * the proc fd table lock
7495 */
7496 socket_unlock(so, 0);
7497
7498 proc_fdlock(p);
7499 fdt_foreach(fp, p) {
7500 struct socket *so2;
7501
7502 if (FILEGLOB_DTYPE(fp->fp_glob) != DTYPE_SOCKET) {
7503 continue;
7504 }
7505
7506 so2 = (struct socket *)fp->fp_glob->fg_data;
7507 if (so != so2 &&
7508 so2->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) {
7509 count++;
7510 }
7511 if (count >= soextbkidlestat.so_xbkidle_maxperproc) {
7512 break;
7513 }
7514 }
7515 proc_fdunlock(p);
7516
7517 socket_lock(so, 0);
7518
7519 if (count >= soextbkidlestat.so_xbkidle_maxperproc) {
7520 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_toomany);
7521 error = EBUSY;
7522 } else if (so->so_flags & SOF_DELEGATED) {
7523 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_nodlgtd);
7524 error = EBUSY;
7525 } else {
7526 so->so_flags1 |= SOF1_EXTEND_BK_IDLE_WANTED;
7527 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_wantok);
7528 }
7529 SODEFUNCTLOG("%s[%d, %s]: so 0x%llx [%d,%d] "
7530 "%s marked for extended bk idle\n",
7531 __func__, proc_selfpid(), proc_best_name(current_proc()),
7532 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
7533 SOCK_DOM(so), SOCK_TYPE(so),
7534 (so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) ?
7535 "is" : "not");
7536 }
7537
7538 return error;
7539 }
7540
7541 static void
7542 so_stop_extended_bk_idle(struct socket *so)
7543 {
7544 so->so_flags1 &= ~SOF1_EXTEND_BK_IDLE_INPROG;
7545 so->so_extended_bk_start = 0;
7546
7547 OSDecrementAtomic(&soextbkidlestat.so_xbkidle_active);
7548 VERIFY(soextbkidlestat.so_xbkidle_active >= 0);
7549 /*
7550 * Force defunct
7551 */
7552 sosetdefunct(current_proc(), so,
7553 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL, FALSE);
7554 if (so->so_flags & SOF_DEFUNCT) {
7555 sodefunct(current_proc(), so,
7556 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
7557 }
7558 }
7559
7560 void
7561 so_drain_extended_bk_idle(struct socket *so)
7562 {
7563 if (so && (so->so_flags1 & SOF1_EXTEND_BK_IDLE_INPROG)) {
7564 /*
7565 * Only penalize sockets that have outstanding data
7566 */
7567 if (so->so_rcv.sb_cc || so->so_snd.sb_cc) {
7568 so_stop_extended_bk_idle(so);
7569
7570 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_drained);
7571 }
7572 }
7573 }
7574
7575 /*
7576 * Return values tells if socket is still in extended background idle
7577 */
7578 int
7579 so_check_extended_bk_idle_time(struct socket *so)
7580 {
7581 int ret = 1;
7582
7583 if ((so->so_flags1 & SOF1_EXTEND_BK_IDLE_INPROG)) {
7584 SODEFUNCTLOG("%s[%d, %s]: so 0x%llx [%d,%d]\n",
7585 __func__, proc_selfpid(), proc_best_name(current_proc()),
7586 (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
7587 SOCK_DOM(so), SOCK_TYPE(so));
7588 if (net_uptime() - so->so_extended_bk_start >
7589 soextbkidlestat.so_xbkidle_time) {
7590 so_stop_extended_bk_idle(so);
7591
7592 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_expired);
7593
7594 ret = 0;
7595 } else {
7596 struct inpcb *inp = (struct inpcb *)so->so_pcb;
7597
7598 inpcb_timer_sched(inp->inp_pcbinfo, INPCB_TIMER_LAZY);
7599 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_resched);
7600 }
7601 }
7602
7603 return ret;
7604 }
7605
7606 void
7607 resume_proc_sockets(proc_t p)
7608 {
7609 if (p->p_ladvflag & P_LXBKIDLEINPROG) {
7610 struct fileproc *fp;
7611 struct socket *so;
7612
7613 proc_fdlock(p);
7614 fdt_foreach(fp, p) {
7615 if (FILEGLOB_DTYPE(fp->fp_glob) != DTYPE_SOCKET) {
7616 continue;
7617 }
7618
7619 so = (struct socket *)fp->fp_glob->fg_data;
7620 (void) soresume(p, so, 0);
7621 }
7622 proc_fdunlock(p);
7623
7624 OSBitAndAtomic(~P_LXBKIDLEINPROG, &p->p_ladvflag);
7625 }
7626 }
7627
7628 __private_extern__ int
7629 so_set_recv_anyif(struct socket *so, int optval)
7630 {
7631 int ret = 0;
7632
7633 if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
7634 if (optval) {
7635 sotoinpcb(so)->inp_flags |= INP_RECV_ANYIF;
7636 } else {
7637 sotoinpcb(so)->inp_flags &= ~INP_RECV_ANYIF;
7638 }
7639 }
7640
7641
7642 return ret;
7643 }
7644
7645 __private_extern__ int
7646 so_get_recv_anyif(struct socket *so)
7647 {
7648 int ret = 0;
7649
7650 if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
7651 ret = (sotoinpcb(so)->inp_flags & INP_RECV_ANYIF) ? 1 : 0;
7652 }
7653
7654 return ret;
7655 }
7656
7657 int
7658 so_set_restrictions(struct socket *so, uint32_t vals)
7659 {
7660 int nocell_old, nocell_new;
7661 int noexpensive_old, noexpensive_new;
7662 int noconstrained_old, noconstrained_new;
7663
7664 /*
7665 * Deny-type restrictions are trapdoors; once set they cannot be
7666 * unset for the lifetime of the socket. This allows them to be
7667 * issued by a framework on behalf of the application without
7668 * having to worry that they can be undone.
7669 *
7670 * Note here that socket-level restrictions overrides any protocol
7671 * level restrictions. For instance, SO_RESTRICT_DENY_CELLULAR
7672 * socket restriction issued on the socket has a higher precendence
7673 * than INP_NO_IFT_CELLULAR. The latter is affected by the UUID
7674 * policy PROC_UUID_NO_CELLULAR for unrestricted sockets only,
7675 * i.e. when SO_RESTRICT_DENY_CELLULAR has not been issued.
7676 */
7677 nocell_old = (so->so_restrictions & SO_RESTRICT_DENY_CELLULAR);
7678 noexpensive_old = (so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE);
7679 noconstrained_old = (so->so_restrictions & SO_RESTRICT_DENY_CONSTRAINED);
7680 so->so_restrictions |= (vals & (SO_RESTRICT_DENY_IN |
7681 SO_RESTRICT_DENY_OUT | SO_RESTRICT_DENY_CELLULAR |
7682 SO_RESTRICT_DENY_EXPENSIVE | SO_RESTRICT_DENY_CONSTRAINED));
7683 nocell_new = (so->so_restrictions & SO_RESTRICT_DENY_CELLULAR);
7684 noexpensive_new = (so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE);
7685 noconstrained_new = (so->so_restrictions & SO_RESTRICT_DENY_CONSTRAINED);
7686
7687 /* we can only set, not clear restrictions */
7688 if ((nocell_new - nocell_old) == 0 &&
7689 (noexpensive_new - noexpensive_old) == 0 &&
7690 (noconstrained_new - noconstrained_old) == 0) {
7691 return 0;
7692 }
7693 if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
7694 if (nocell_new - nocell_old != 0) {
7695 /*
7696 * if deny cellular is now set, do what's needed
7697 * for INPCB
7698 */
7699 inp_set_nocellular(sotoinpcb(so));
7700 }
7701 if (noexpensive_new - noexpensive_old != 0) {
7702 inp_set_noexpensive(sotoinpcb(so));
7703 }
7704 if (noconstrained_new - noconstrained_old != 0) {
7705 inp_set_noconstrained(sotoinpcb(so));
7706 }
7707 }
7708
7709 if (SOCK_DOM(so) == PF_MULTIPATH) {
7710 mptcp_set_restrictions(so);
7711 }
7712
7713 return 0;
7714 }
7715
7716 uint32_t
7717 so_get_restrictions(struct socket *so)
7718 {
7719 return so->so_restrictions & (SO_RESTRICT_DENY_IN |
7720 SO_RESTRICT_DENY_OUT |
7721 SO_RESTRICT_DENY_CELLULAR | SO_RESTRICT_DENY_EXPENSIVE);
7722 }
7723
7724 int
7725 so_set_effective_pid(struct socket *so, int epid, struct proc *p, boolean_t check_cred)
7726 {
7727 struct proc *ep = PROC_NULL;
7728 int error = 0;
7729
7730 /* pid 0 is reserved for kernel */
7731 if (epid == 0) {
7732 error = EINVAL;
7733 goto done;
7734 }
7735
7736 /*
7737 * If this is an in-kernel socket, prevent its delegate
7738 * association from changing unless the socket option is
7739 * coming from within the kernel itself.
7740 */
7741 if (so->last_pid == 0 && p != kernproc) {
7742 error = EACCES;
7743 goto done;
7744 }
7745
7746 /*
7747 * If this is issued by a process that's recorded as the
7748 * real owner of the socket, or if the pid is the same as
7749 * the process's own pid, then proceed. Otherwise ensure
7750 * that the issuing process has the necessary privileges.
7751 */
7752 if (check_cred && (epid != so->last_pid || epid != proc_pid(p))) {
7753 if ((error = priv_check_cred(kauth_cred_get(),
7754 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0))) {
7755 error = EACCES;
7756 goto done;
7757 }
7758 }
7759
7760 /* Find the process that corresponds to the effective pid */
7761 if ((ep = proc_find(epid)) == PROC_NULL) {
7762 error = ESRCH;
7763 goto done;
7764 }
7765
7766 /*
7767 * If a process tries to delegate the socket to itself, then
7768 * there's really nothing to do; treat it as a way for the
7769 * delegate association to be cleared. Note that we check
7770 * the passed-in proc rather than calling proc_selfpid(),
7771 * as we need to check the process issuing the socket option
7772 * which could be kernproc. Given that we don't allow 0 for
7773 * effective pid, it means that a delegated in-kernel socket
7774 * stays delegated during its lifetime (which is probably OK.)
7775 */
7776 if (epid == proc_pid(p)) {
7777 so->so_flags &= ~SOF_DELEGATED;
7778 so->e_upid = 0;
7779 so->e_pid = 0;
7780 uuid_clear(so->e_uuid);
7781 } else {
7782 so->so_flags |= SOF_DELEGATED;
7783 so->e_upid = proc_uniqueid(ep);
7784 so->e_pid = proc_pid(ep);
7785 proc_getexecutableuuid(ep, so->e_uuid, sizeof(so->e_uuid));
7786
7787 #if defined(XNU_TARGET_OS_OSX)
7788 if (ep->p_responsible_pid != so->e_pid) {
7789 proc_t rp = proc_find(ep->p_responsible_pid);
7790 if (rp != PROC_NULL) {
7791 proc_getexecutableuuid(rp, so->so_ruuid, sizeof(so->so_ruuid));
7792 so->so_rpid = ep->p_responsible_pid;
7793 proc_rele(rp);
7794 } else {
7795 uuid_clear(so->so_ruuid);
7796 so->so_rpid = -1;
7797 }
7798 }
7799 #endif
7800 }
7801 if (so->so_proto != NULL && so->so_proto->pr_update_last_owner != NULL) {
7802 (*so->so_proto->pr_update_last_owner)(so, NULL, ep);
7803 }
7804 done:
7805 if (error == 0 && net_io_policy_log) {
7806 uuid_string_t buf;
7807
7808 uuid_unparse(so->e_uuid, buf);
7809 log(LOG_DEBUG, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d (%s) "
7810 "euuid %s%s\n", __func__, proc_name_address(p),
7811 proc_pid(p), (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
7812 SOCK_DOM(so), SOCK_TYPE(so),
7813 so->e_pid, proc_name_address(ep), buf,
7814 ((so->so_flags & SOF_DELEGATED) ? " [delegated]" : ""));
7815 } else if (error != 0 && net_io_policy_log) {
7816 log(LOG_ERR, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d (%s) "
7817 "ERROR (%d)\n", __func__, proc_name_address(p),
7818 proc_pid(p), (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
7819 SOCK_DOM(so), SOCK_TYPE(so),
7820 epid, (ep == PROC_NULL) ? "PROC_NULL" :
7821 proc_name_address(ep), error);
7822 }
7823
7824 /* Update this socket's policy upon success */
7825 if (error == 0) {
7826 so->so_policy_gencnt *= -1;
7827 so_update_policy(so);
7828 #if NECP
7829 so_update_necp_policy(so, NULL, NULL);
7830 #endif /* NECP */
7831 }
7832
7833 if (ep != PROC_NULL) {
7834 proc_rele(ep);
7835 }
7836
7837 return error;
7838 }
7839
7840 int
7841 so_set_effective_uuid(struct socket *so, uuid_t euuid, struct proc *p, boolean_t check_cred)
7842 {
7843 uuid_string_t buf;
7844 uuid_t uuid;
7845 int error = 0;
7846
7847 /* UUID must not be all-zeroes (reserved for kernel) */
7848 if (uuid_is_null(euuid)) {
7849 error = EINVAL;
7850 goto done;
7851 }
7852
7853 /*
7854 * If this is an in-kernel socket, prevent its delegate
7855 * association from changing unless the socket option is
7856 * coming from within the kernel itself.
7857 */
7858 if (so->last_pid == 0 && p != kernproc) {
7859 error = EACCES;
7860 goto done;
7861 }
7862
7863 /* Get the UUID of the issuing process */
7864 proc_getexecutableuuid(p, uuid, sizeof(uuid));
7865
7866 /*
7867 * If this is issued by a process that's recorded as the
7868 * real owner of the socket, or if the uuid is the same as
7869 * the process's own uuid, then proceed. Otherwise ensure
7870 * that the issuing process has the necessary privileges.
7871 */
7872 if (check_cred &&
7873 (uuid_compare(euuid, so->last_uuid) != 0 ||
7874 uuid_compare(euuid, uuid) != 0)) {
7875 if ((error = priv_check_cred(kauth_cred_get(),
7876 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0))) {
7877 error = EACCES;
7878 goto done;
7879 }
7880 }
7881
7882 /*
7883 * If a process tries to delegate the socket to itself, then
7884 * there's really nothing to do; treat it as a way for the
7885 * delegate association to be cleared. Note that we check
7886 * the uuid of the passed-in proc rather than that of the
7887 * current process, as we need to check the process issuing
7888 * the socket option which could be kernproc itself. Given
7889 * that we don't allow 0 for effective uuid, it means that
7890 * a delegated in-kernel socket stays delegated during its
7891 * lifetime (which is okay.)
7892 */
7893 if (uuid_compare(euuid, uuid) == 0) {
7894 so->so_flags &= ~SOF_DELEGATED;
7895 so->e_upid = 0;
7896 so->e_pid = 0;
7897 uuid_clear(so->e_uuid);
7898 } else {
7899 so->so_flags |= SOF_DELEGATED;
7900 /*
7901 * Unlike so_set_effective_pid(), we only have the UUID
7902 * here and the process ID is not known. Inherit the
7903 * real {pid,upid} of the socket.
7904 */
7905 so->e_upid = so->last_upid;
7906 so->e_pid = so->last_pid;
7907 uuid_copy(so->e_uuid, euuid);
7908 }
7909 /*
7910 * The following will clear the effective process name as it's the same
7911 * as the real process
7912 */
7913 if (so->so_proto != NULL && so->so_proto->pr_update_last_owner != NULL) {
7914 (*so->so_proto->pr_update_last_owner)(so, NULL, NULL);
7915 }
7916 done:
7917 if (error == 0 && net_io_policy_log) {
7918 uuid_unparse(so->e_uuid, buf);
7919 log(LOG_DEBUG, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d "
7920 "euuid %s%s\n", __func__, proc_name_address(p), proc_pid(p),
7921 (uint64_t)DEBUG_KERNEL_ADDRPERM(so), SOCK_DOM(so),
7922 SOCK_TYPE(so), so->e_pid, buf,
7923 ((so->so_flags & SOF_DELEGATED) ? " [delegated]" : ""));
7924 } else if (error != 0 && net_io_policy_log) {
7925 uuid_unparse(euuid, buf);
7926 log(LOG_DEBUG, "%s[%s,%d]: so 0x%llx [%d,%d] euuid %s "
7927 "ERROR (%d)\n", __func__, proc_name_address(p), proc_pid(p),
7928 (uint64_t)DEBUG_KERNEL_ADDRPERM(so), SOCK_DOM(so),
7929 SOCK_TYPE(so), buf, error);
7930 }
7931
7932 /* Update this socket's policy upon success */
7933 if (error == 0) {
7934 so->so_policy_gencnt *= -1;
7935 so_update_policy(so);
7936 #if NECP
7937 so_update_necp_policy(so, NULL, NULL);
7938 #endif /* NECP */
7939 }
7940
7941 return error;
7942 }
7943
7944 void
7945 netpolicy_post_msg(uint32_t ev_code, struct netpolicy_event_data *ev_data,
7946 uint32_t ev_datalen)
7947 {
7948 struct kev_msg ev_msg;
7949
7950 /*
7951 * A netpolicy event always starts with a netpolicy_event_data
7952 * structure, but the caller can provide for a longer event
7953 * structure to post, depending on the event code.
7954 */
7955 VERIFY(ev_data != NULL && ev_datalen >= sizeof(*ev_data));
7956
7957 bzero(&ev_msg, sizeof(ev_msg));
7958 ev_msg.vendor_code = KEV_VENDOR_APPLE;
7959 ev_msg.kev_class = KEV_NETWORK_CLASS;
7960 ev_msg.kev_subclass = KEV_NETPOLICY_SUBCLASS;
7961 ev_msg.event_code = ev_code;
7962
7963 ev_msg.dv[0].data_ptr = ev_data;
7964 ev_msg.dv[0].data_length = ev_datalen;
7965
7966 kev_post_msg(&ev_msg);
7967 }
7968
7969 void
7970 socket_post_kev_msg(uint32_t ev_code,
7971 struct kev_socket_event_data *ev_data,
7972 uint32_t ev_datalen)
7973 {
7974 struct kev_msg ev_msg;
7975
7976 bzero(&ev_msg, sizeof(ev_msg));
7977 ev_msg.vendor_code = KEV_VENDOR_APPLE;
7978 ev_msg.kev_class = KEV_NETWORK_CLASS;
7979 ev_msg.kev_subclass = KEV_SOCKET_SUBCLASS;
7980 ev_msg.event_code = ev_code;
7981
7982 ev_msg.dv[0].data_ptr = ev_data;
7983 ev_msg.dv[0].data_length = ev_datalen;
7984
7985 kev_post_msg(&ev_msg);
7986 }
7987
7988 void
7989 socket_post_kev_msg_closed(struct socket *so)
7990 {
7991 struct kev_socket_closed ev = {};
7992 struct sockaddr *socksa = NULL, *peersa = NULL;
7993 int err;
7994
7995 if ((so->so_flags1 & SOF1_WANT_KEV_SOCK_CLOSED) == 0) {
7996 return;
7997 }
7998 err = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &socksa);
7999 if (err == 0) {
8000 err = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so,
8001 &peersa);
8002 if (err == 0) {
8003 memcpy(&ev.ev_data.kev_sockname, socksa,
8004 min(socksa->sa_len,
8005 sizeof(ev.ev_data.kev_sockname)));
8006 memcpy(&ev.ev_data.kev_peername, peersa,
8007 min(peersa->sa_len,
8008 sizeof(ev.ev_data.kev_peername)));
8009 socket_post_kev_msg(KEV_SOCKET_CLOSED,
8010 &ev.ev_data, sizeof(ev));
8011 }
8012 }
8013 if (socksa != NULL) {
8014 FREE(socksa, M_SONAME);
8015 }
8016 if (peersa != NULL) {
8017 FREE(peersa, M_SONAME);
8018 }
8019 }