2 * Copyright (c) 1998-2019 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
30 * Copyright (c) 1982, 1986, 1988, 1990, 1993
31 * The Regents of the University of California. All rights reserved.
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. All advertising materials mentioning features or use of this software
42 * must display the following acknowledgement:
43 * This product includes software developed by the University of
44 * California, Berkeley and its contributors.
45 * 4. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
64 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
65 * support for mandatory and extensible security protections. This notice
66 * is included in support of clause 2.2 (b) of the Apple Public License,
70 #include <sys/param.h>
71 #include <sys/systm.h>
72 #include <sys/filedesc.h>
74 #include <sys/proc_internal.h>
75 #include <sys/kauth.h>
76 #include <sys/file_internal.h>
77 #include <sys/fcntl.h>
78 #include <sys/malloc.h>
80 #include <sys/domain.h>
81 #include <sys/kernel.h>
82 #include <sys/event.h>
84 #include <sys/protosw.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/resourcevar.h>
88 #include <sys/signalvar.h>
89 #include <sys/sysctl.h>
90 #include <sys/syslog.h>
92 #include <sys/uio_internal.h>
94 #include <sys/kdebug.h>
98 #include <sys/kern_event.h>
99 #include <net/route.h>
100 #include <net/init.h>
101 #include <net/net_api_stats.h>
102 #include <net/ntstat.h>
103 #include <net/content_filter.h>
104 #include <netinet/in.h>
105 #include <netinet/in_pcb.h>
106 #include <netinet/in_tclass.h>
107 #include <netinet/in_var.h>
108 #include <netinet/tcp_var.h>
109 #include <netinet/ip6.h>
110 #include <netinet6/ip6_var.h>
111 #include <netinet/flow_divert.h>
112 #include <kern/zalloc.h>
113 #include <kern/locks.h>
114 #include <machine/limits.h>
115 #include <libkern/OSAtomic.h>
116 #include <pexpert/pexpert.h>
117 #include <kern/assert.h>
118 #include <kern/task.h>
119 #include <kern/policy_internal.h>
121 #include <sys/kpi_mbuf.h>
122 #include <sys/mcache.h>
123 #include <sys/unpcb.h>
124 #include <libkern/section_keywords.h>
127 #include <security/mac_framework.h>
131 #include <netinet/mp_pcb.h>
132 #include <netinet/mptcp_var.h>
133 #endif /* MULTIPATH */
135 #define ROUNDUP(a, b) (((a) + ((b) - 1)) & (~((b) - 1)))
137 #if DEBUG || DEVELOPMENT
138 #define DEBUG_KERNEL_ADDRPERM(_v) (_v)
140 #define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
143 /* TODO: this should be in a header file somewhere */
144 extern char *proc_name_address(void *p
);
146 static u_int32_t so_cache_hw
; /* High water mark for socache */
147 static u_int32_t so_cache_timeouts
; /* number of timeouts */
148 static u_int32_t so_cache_max_freed
; /* max freed per timeout */
149 static u_int32_t cached_sock_count
= 0;
150 STAILQ_HEAD(, socket
) so_cache_head
;
151 int max_cached_sock_count
= MAX_CACHED_SOCKETS
;
152 static u_int32_t so_cache_time
;
153 static int socketinit_done
;
154 static struct zone
*so_cache_zone
;
156 static lck_grp_t
*so_cache_mtx_grp
;
157 static lck_attr_t
*so_cache_mtx_attr
;
158 static lck_grp_attr_t
*so_cache_mtx_grp_attr
;
159 static lck_mtx_t
*so_cache_mtx
;
161 #include <machine/limits.h>
163 static int filt_sorattach(struct knote
*kn
, struct kevent_qos_s
*kev
);
164 static void filt_sordetach(struct knote
*kn
);
165 static int filt_soread(struct knote
*kn
, long hint
);
166 static int filt_sortouch(struct knote
*kn
, struct kevent_qos_s
*kev
);
167 static int filt_sorprocess(struct knote
*kn
, struct kevent_qos_s
*kev
);
169 static int filt_sowattach(struct knote
*kn
, struct kevent_qos_s
*kev
);
170 static void filt_sowdetach(struct knote
*kn
);
171 static int filt_sowrite(struct knote
*kn
, long hint
);
172 static int filt_sowtouch(struct knote
*kn
, struct kevent_qos_s
*kev
);
173 static int filt_sowprocess(struct knote
*kn
, struct kevent_qos_s
*kev
);
175 static int filt_sockattach(struct knote
*kn
, struct kevent_qos_s
*kev
);
176 static void filt_sockdetach(struct knote
*kn
);
177 static int filt_sockev(struct knote
*kn
, long hint
);
178 static int filt_socktouch(struct knote
*kn
, struct kevent_qos_s
*kev
);
179 static int filt_sockprocess(struct knote
*kn
, struct kevent_qos_s
*kev
);
181 static int sooptcopyin_timeval(struct sockopt
*, struct timeval
*);
182 static int sooptcopyout_timeval(struct sockopt
*, const struct timeval
*);
184 SECURITY_READ_ONLY_EARLY(struct filterops
) soread_filtops
= {
186 .f_attach
= filt_sorattach
,
187 .f_detach
= filt_sordetach
,
188 .f_event
= filt_soread
,
189 .f_touch
= filt_sortouch
,
190 .f_process
= filt_sorprocess
,
193 SECURITY_READ_ONLY_EARLY(struct filterops
) sowrite_filtops
= {
195 .f_attach
= filt_sowattach
,
196 .f_detach
= filt_sowdetach
,
197 .f_event
= filt_sowrite
,
198 .f_touch
= filt_sowtouch
,
199 .f_process
= filt_sowprocess
,
202 SECURITY_READ_ONLY_EARLY(struct filterops
) sock_filtops
= {
204 .f_attach
= filt_sockattach
,
205 .f_detach
= filt_sockdetach
,
206 .f_event
= filt_sockev
,
207 .f_touch
= filt_socktouch
,
208 .f_process
= filt_sockprocess
,
211 SECURITY_READ_ONLY_EARLY(struct filterops
) soexcept_filtops
= {
213 .f_attach
= filt_sorattach
,
214 .f_detach
= filt_sordetach
,
215 .f_event
= filt_soread
,
216 .f_touch
= filt_sortouch
,
217 .f_process
= filt_sorprocess
,
220 SYSCTL_DECL(_kern_ipc
);
222 #define EVEN_MORE_LOCKING_DEBUG 0
224 int socket_debug
= 0;
225 SYSCTL_INT(_kern_ipc
, OID_AUTO
, socket_debug
,
226 CTLFLAG_RW
| CTLFLAG_LOCKED
, &socket_debug
, 0, "");
228 static unsigned long sodefunct_calls
= 0;
229 SYSCTL_LONG(_kern_ipc
, OID_AUTO
, sodefunct_calls
, CTLFLAG_LOCKED
,
230 &sodefunct_calls
, "");
232 static int socket_zone
= M_SOCKET
;
233 so_gen_t so_gencnt
; /* generation count for sockets */
235 MALLOC_DEFINE(M_SONAME
, "soname", "socket name");
236 MALLOC_DEFINE(M_PCB
, "pcb", "protocol control block");
238 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
239 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
240 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
241 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
242 #define DBG_FNC_SOSEND NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1)
243 #define DBG_FNC_SOSEND_LIST NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 3)
244 #define DBG_FNC_SORECEIVE NETDBG_CODE(DBG_NETSOCK, (8 << 8))
245 #define DBG_FNC_SORECEIVE_LIST NETDBG_CODE(DBG_NETSOCK, (8 << 8) | 3)
246 #define DBG_FNC_SOSHUTDOWN NETDBG_CODE(DBG_NETSOCK, (9 << 8))
248 #define MAX_SOOPTGETM_SIZE (128 * MCLBYTES)
250 int somaxconn
= SOMAXCONN
;
251 SYSCTL_INT(_kern_ipc
, KIPC_SOMAXCONN
, somaxconn
,
252 CTLFLAG_RW
| CTLFLAG_LOCKED
, &somaxconn
, 0, "");
254 /* Should we get a maximum also ??? */
255 static int sosendmaxchain
= 65536;
256 static int sosendminchain
= 16384;
257 static int sorecvmincopy
= 16384;
258 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sosendminchain
,
259 CTLFLAG_RW
| CTLFLAG_LOCKED
, &sosendminchain
, 0, "");
260 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sorecvmincopy
,
261 CTLFLAG_RW
| CTLFLAG_LOCKED
, &sorecvmincopy
, 0, "");
264 * Set to enable jumbo clusters (if available) for large writes when
265 * the socket is marked with SOF_MULTIPAGES; see below.
268 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sosendjcl
,
269 CTLFLAG_RW
| CTLFLAG_LOCKED
, &sosendjcl
, 0, "");
272 * Set this to ignore SOF_MULTIPAGES and use jumbo clusters for large
273 * writes on the socket for all protocols on any network interfaces,
274 * depending upon sosendjcl above. Be extra careful when setting this
275 * to 1, because sending down packets that cross physical pages down to
276 * broken drivers (those that falsely assume that the physical pages
277 * are contiguous) might lead to system panics or silent data corruption.
278 * When set to 0, the system will respect SOF_MULTIPAGES, which is set
279 * only for TCP sockets whose outgoing interface is IFNET_MULTIPAGES
280 * capable. Set this to 1 only for testing/debugging purposes.
282 int sosendjcl_ignore_capab
= 0;
283 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sosendjcl_ignore_capab
,
284 CTLFLAG_RW
| CTLFLAG_LOCKED
, &sosendjcl_ignore_capab
, 0, "");
287 * Set this to ignore SOF1_IF_2KCL and use big clusters for large
288 * writes on the socket for all protocols on any network interfaces.
289 * Be extra careful when setting this to 1, because sending down packets with
290 * clusters larger that 2 KB might lead to system panics or data corruption.
291 * When set to 0, the system will respect SOF1_IF_2KCL, which is set
292 * on the outgoing interface
293 * Set this to 1 for testing/debugging purposes only.
295 int sosendbigcl_ignore_capab
= 0;
296 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sosendbigcl_ignore_capab
,
297 CTLFLAG_RW
| CTLFLAG_LOCKED
, &sosendbigcl_ignore_capab
, 0, "");
299 int sodefunctlog
= 0;
300 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sodefunctlog
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
301 &sodefunctlog
, 0, "");
303 int sothrottlelog
= 0;
304 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sothrottlelog
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
305 &sothrottlelog
, 0, "");
307 int sorestrictrecv
= 1;
308 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sorestrictrecv
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
309 &sorestrictrecv
, 0, "Enable inbound interface restrictions");
311 int sorestrictsend
= 1;
312 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sorestrictsend
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
313 &sorestrictsend
, 0, "Enable outbound interface restrictions");
315 int soreserveheadroom
= 1;
316 SYSCTL_INT(_kern_ipc
, OID_AUTO
, soreserveheadroom
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
317 &soreserveheadroom
, 0, "To allocate contiguous datagram buffers");
319 #if (DEBUG || DEVELOPMENT)
320 int so_notsent_lowat_check
= 1;
321 SYSCTL_INT(_kern_ipc
, OID_AUTO
, notsent_lowat
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
322 &so_notsent_lowat_check
, 0, "enable/disable notsnet lowat check");
323 #endif /* DEBUG || DEVELOPMENT */
325 int so_accept_list_waits
= 0;
326 #if (DEBUG || DEVELOPMENT)
327 SYSCTL_INT(_kern_ipc
, OID_AUTO
, accept_list_waits
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
328 &so_accept_list_waits
, 0, "number of waits for listener incomp list");
329 #endif /* DEBUG || DEVELOPMENT */
331 extern struct inpcbinfo tcbinfo
;
333 /* TODO: these should be in header file */
334 extern int get_inpcb_str_size(void);
335 extern int get_tcp_str_size(void);
337 vm_size_t so_cache_zone_element_size
;
339 static int sodelayed_copy(struct socket
*, struct uio
*, struct mbuf
**,
341 static void cached_sock_alloc(struct socket
**, int);
342 static void cached_sock_free(struct socket
*);
345 * Maximum of extended background idle sockets per process
346 * Set to zero to disable further setting of the option
349 #define SO_IDLE_BK_IDLE_MAX_PER_PROC 1
350 #define SO_IDLE_BK_IDLE_TIME 600
351 #define SO_IDLE_BK_IDLE_RCV_HIWAT 131072
353 struct soextbkidlestat soextbkidlestat
;
355 SYSCTL_UINT(_kern_ipc
, OID_AUTO
, maxextbkidleperproc
,
356 CTLFLAG_RW
| CTLFLAG_LOCKED
, &soextbkidlestat
.so_xbkidle_maxperproc
, 0,
357 "Maximum of extended background idle sockets per process");
359 SYSCTL_UINT(_kern_ipc
, OID_AUTO
, extbkidletime
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
360 &soextbkidlestat
.so_xbkidle_time
, 0,
361 "Time in seconds to keep extended background idle sockets");
363 SYSCTL_UINT(_kern_ipc
, OID_AUTO
, extbkidlercvhiwat
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
364 &soextbkidlestat
.so_xbkidle_rcvhiwat
, 0,
365 "High water mark for extended background idle sockets");
367 SYSCTL_STRUCT(_kern_ipc
, OID_AUTO
, extbkidlestat
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
368 &soextbkidlestat
, soextbkidlestat
, "");
370 int so_set_extended_bk_idle(struct socket
*, int);
374 * SOTCDB_NO_DSCP is set by default, to prevent the networking stack from
375 * setting the DSCP code on the packet based on the service class; see
376 * <rdar://problem/11277343> for details.
378 __private_extern__ u_int32_t sotcdb
= 0;
379 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sotcdb
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
385 _CASSERT(sizeof(so_gencnt
) == sizeof(uint64_t));
386 VERIFY(IS_P2ALIGNED(&so_gencnt
, sizeof(uint32_t)));
389 _CASSERT(sizeof(struct sa_endpoints
) == sizeof(struct user64_sa_endpoints
));
390 _CASSERT(offsetof(struct sa_endpoints
, sae_srcif
) == offsetof(struct user64_sa_endpoints
, sae_srcif
));
391 _CASSERT(offsetof(struct sa_endpoints
, sae_srcaddr
) == offsetof(struct user64_sa_endpoints
, sae_srcaddr
));
392 _CASSERT(offsetof(struct sa_endpoints
, sae_srcaddrlen
) == offsetof(struct user64_sa_endpoints
, sae_srcaddrlen
));
393 _CASSERT(offsetof(struct sa_endpoints
, sae_dstaddr
) == offsetof(struct user64_sa_endpoints
, sae_dstaddr
));
394 _CASSERT(offsetof(struct sa_endpoints
, sae_dstaddrlen
) == offsetof(struct user64_sa_endpoints
, sae_dstaddrlen
));
396 _CASSERT(sizeof(struct sa_endpoints
) == sizeof(struct user32_sa_endpoints
));
397 _CASSERT(offsetof(struct sa_endpoints
, sae_srcif
) == offsetof(struct user32_sa_endpoints
, sae_srcif
));
398 _CASSERT(offsetof(struct sa_endpoints
, sae_srcaddr
) == offsetof(struct user32_sa_endpoints
, sae_srcaddr
));
399 _CASSERT(offsetof(struct sa_endpoints
, sae_srcaddrlen
) == offsetof(struct user32_sa_endpoints
, sae_srcaddrlen
));
400 _CASSERT(offsetof(struct sa_endpoints
, sae_dstaddr
) == offsetof(struct user32_sa_endpoints
, sae_dstaddr
));
401 _CASSERT(offsetof(struct sa_endpoints
, sae_dstaddrlen
) == offsetof(struct user32_sa_endpoints
, sae_dstaddrlen
));
404 if (socketinit_done
) {
405 printf("socketinit: already called...\n");
410 PE_parse_boot_argn("socket_debug", &socket_debug
,
411 sizeof(socket_debug
));
414 * allocate lock group attribute and group for socket cache mutex
416 so_cache_mtx_grp_attr
= lck_grp_attr_alloc_init();
417 so_cache_mtx_grp
= lck_grp_alloc_init("so_cache",
418 so_cache_mtx_grp_attr
);
421 * allocate the lock attribute for socket cache mutex
423 so_cache_mtx_attr
= lck_attr_alloc_init();
425 /* cached sockets mutex */
426 so_cache_mtx
= lck_mtx_alloc_init(so_cache_mtx_grp
, so_cache_mtx_attr
);
427 if (so_cache_mtx
== NULL
) {
428 panic("%s: unable to allocate so_cache_mtx\n", __func__
);
431 STAILQ_INIT(&so_cache_head
);
433 so_cache_zone_element_size
= (vm_size_t
)(sizeof(struct socket
) + 4
434 + get_inpcb_str_size() + 4 + get_tcp_str_size());
436 so_cache_zone
= zinit(so_cache_zone_element_size
,
437 (120000 * so_cache_zone_element_size
), 8192, "socache zone");
438 zone_change(so_cache_zone
, Z_CALLERACCT
, FALSE
);
439 zone_change(so_cache_zone
, Z_NOENCRYPT
, TRUE
);
441 bzero(&soextbkidlestat
, sizeof(struct soextbkidlestat
));
442 soextbkidlestat
.so_xbkidle_maxperproc
= SO_IDLE_BK_IDLE_MAX_PER_PROC
;
443 soextbkidlestat
.so_xbkidle_time
= SO_IDLE_BK_IDLE_TIME
;
444 soextbkidlestat
.so_xbkidle_rcvhiwat
= SO_IDLE_BK_IDLE_RCV_HIWAT
;
448 socket_tclass_init();
451 #endif /* MULTIPATH */
455 cached_sock_alloc(struct socket
**so
, int waitok
)
460 lck_mtx_lock(so_cache_mtx
);
462 if (!STAILQ_EMPTY(&so_cache_head
)) {
463 VERIFY(cached_sock_count
> 0);
465 *so
= STAILQ_FIRST(&so_cache_head
);
466 STAILQ_REMOVE_HEAD(&so_cache_head
, so_cache_ent
);
467 STAILQ_NEXT((*so
), so_cache_ent
) = NULL
;
470 lck_mtx_unlock(so_cache_mtx
);
472 temp
= (*so
)->so_saved_pcb
;
473 bzero((caddr_t
)*so
, sizeof(struct socket
));
475 (*so
)->so_saved_pcb
= temp
;
477 lck_mtx_unlock(so_cache_mtx
);
480 *so
= (struct socket
*)zalloc(so_cache_zone
);
482 *so
= (struct socket
*)zalloc_noblock(so_cache_zone
);
489 bzero((caddr_t
)*so
, sizeof(struct socket
));
492 * Define offsets for extra structures into our
493 * single block of memory. Align extra structures
494 * on longword boundaries.
497 offset
= (uintptr_t)*so
;
498 offset
+= sizeof(struct socket
);
500 offset
= ALIGN(offset
);
502 (*so
)->so_saved_pcb
= (caddr_t
)offset
;
503 offset
+= get_inpcb_str_size();
505 offset
= ALIGN(offset
);
507 ((struct inpcb
*)(void *)(*so
)->so_saved_pcb
)->inp_saved_ppcb
=
511 OSBitOrAtomic(SOF1_CACHED_IN_SOCK_LAYER
, &(*so
)->so_flags1
);
515 cached_sock_free(struct socket
*so
)
517 lck_mtx_lock(so_cache_mtx
);
519 so_cache_time
= net_uptime();
520 if (++cached_sock_count
> max_cached_sock_count
) {
522 lck_mtx_unlock(so_cache_mtx
);
523 zfree(so_cache_zone
, so
);
525 if (so_cache_hw
< cached_sock_count
) {
526 so_cache_hw
= cached_sock_count
;
529 STAILQ_INSERT_TAIL(&so_cache_head
, so
, so_cache_ent
);
531 so
->cache_timestamp
= so_cache_time
;
532 lck_mtx_unlock(so_cache_mtx
);
537 so_update_last_owner_locked(struct socket
*so
, proc_t self
)
539 if (so
->last_pid
!= 0) {
541 * last_pid and last_upid should remain zero for sockets
542 * created using sock_socket. The check above achieves that
544 if (self
== PROC_NULL
) {
545 self
= current_proc();
548 if (so
->last_upid
!= proc_uniqueid(self
) ||
549 so
->last_pid
!= proc_pid(self
)) {
550 so
->last_upid
= proc_uniqueid(self
);
551 so
->last_pid
= proc_pid(self
);
552 proc_getexecutableuuid(self
, so
->last_uuid
,
553 sizeof(so
->last_uuid
));
554 if (so
->so_proto
!= NULL
&& so
->so_proto
->pr_update_last_owner
!= NULL
) {
555 (*so
->so_proto
->pr_update_last_owner
)(so
, self
, NULL
);
558 proc_pidoriginatoruuid(so
->so_vuuid
, sizeof(so
->so_vuuid
));
563 so_update_policy(struct socket
*so
)
565 if (SOCK_DOM(so
) == PF_INET
|| SOCK_DOM(so
) == PF_INET6
) {
566 (void) inp_update_policy(sotoinpcb(so
));
572 so_update_necp_policy(struct socket
*so
, struct sockaddr
*override_local_addr
,
573 struct sockaddr
*override_remote_addr
)
575 if (SOCK_DOM(so
) == PF_INET
|| SOCK_DOM(so
) == PF_INET6
) {
576 inp_update_necp_policy(sotoinpcb(so
), override_local_addr
,
577 override_remote_addr
, 0);
587 boolean_t rc
= FALSE
;
589 lck_mtx_lock(so_cache_mtx
);
591 so_cache_time
= net_uptime();
593 while (!STAILQ_EMPTY(&so_cache_head
)) {
594 VERIFY(cached_sock_count
> 0);
595 p
= STAILQ_FIRST(&so_cache_head
);
596 if ((so_cache_time
- p
->cache_timestamp
) <
597 SO_CACHE_TIME_LIMIT
) {
601 STAILQ_REMOVE_HEAD(&so_cache_head
, so_cache_ent
);
604 zfree(so_cache_zone
, p
);
606 if (++n_freed
>= SO_CACHE_MAX_FREE_BATCH
) {
607 so_cache_max_freed
++;
612 /* Schedule again if there is more to cleanup */
613 if (!STAILQ_EMPTY(&so_cache_head
)) {
617 lck_mtx_unlock(so_cache_mtx
);
622 * Get a socket structure from our zone, and initialize it.
623 * We don't implement `waitok' yet (see comments in uipc_domain.c).
624 * Note that it would probably be better to allocate socket
625 * and PCB at the same time, but I'm not convinced that all
626 * the protocols can be easily modified to do this.
629 soalloc(int waitok
, int dom
, int type
)
633 if ((dom
== PF_INET
) && (type
== SOCK_STREAM
)) {
634 cached_sock_alloc(&so
, waitok
);
636 MALLOC_ZONE(so
, struct socket
*, sizeof(*so
), socket_zone
,
639 bzero(so
, sizeof(*so
));
643 so
->so_gencnt
= OSIncrementAtomic64((SInt64
*)&so_gencnt
);
644 so
->so_zone
= socket_zone
;
647 * Increment the socket allocation statistics
649 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_alloc_total
);
651 #if CONFIG_MACF_SOCKET
652 /* Convert waitok to M_WAITOK/M_NOWAIT for MAC Framework. */
653 if (mac_socket_label_init(so
, !waitok
) != 0) {
657 #endif /* MAC_SOCKET */
664 socreate_internal(int dom
, struct socket
**aso
, int type
, int proto
,
665 struct proc
*p
, uint32_t flags
, struct proc
*ep
)
670 #if defined(XNU_TARGET_OS_OSX)
675 extern int tcpconsdebug
;
682 prp
= pffindproto(dom
, proto
, type
);
684 prp
= pffindtype(dom
, type
);
687 if (prp
== NULL
|| prp
->pr_usrreqs
->pru_attach
== NULL
) {
688 if (pffinddomain(dom
) == NULL
) {
692 if (pffindprotonotype(dom
, proto
) != NULL
) {
696 return EPROTONOSUPPORT
;
698 if (prp
->pr_type
!= type
) {
701 so
= soalloc(1, dom
, type
);
708 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_domain_local_total
);
711 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_domain_inet_total
);
712 if (type
== SOCK_STREAM
) {
713 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_inet_stream_total
);
715 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_inet_dgram_total
);
719 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_domain_route_total
);
722 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_domain_ndrv_total
);
725 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_domain_key_total
);
728 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_domain_inet6_total
);
729 if (type
== SOCK_STREAM
) {
730 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_inet6_stream_total
);
732 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_inet6_dgram_total
);
736 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_domain_system_total
);
739 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_domain_multipath_total
);
742 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_domain_other_total
);
746 if (flags
& SOCF_MPTCP
) {
747 so
->so_state
|= SS_NBIO
;
750 TAILQ_INIT(&so
->so_incomp
);
751 TAILQ_INIT(&so
->so_comp
);
753 so
->last_upid
= proc_uniqueid(p
);
754 so
->last_pid
= proc_pid(p
);
755 proc_getexecutableuuid(p
, so
->last_uuid
, sizeof(so
->last_uuid
));
756 proc_pidoriginatoruuid(so
->so_vuuid
, sizeof(so
->so_vuuid
));
758 if (ep
!= PROC_NULL
&& ep
!= p
) {
759 so
->e_upid
= proc_uniqueid(ep
);
760 so
->e_pid
= proc_pid(ep
);
761 proc_getexecutableuuid(ep
, so
->e_uuid
, sizeof(so
->e_uuid
));
762 so
->so_flags
|= SOF_DELEGATED
;
763 #if defined(XNU_TARGET_OS_OSX)
764 if (ep
->p_responsible_pid
!= so
->e_pid
) {
765 rpid
= ep
->p_responsible_pid
;
770 #if defined(XNU_TARGET_OS_OSX)
771 if (rpid
< 0 && p
->p_responsible_pid
!= so
->last_pid
) {
772 rpid
= p
->p_responsible_pid
;
776 uuid_clear(so
->so_ruuid
);
778 proc_t rp
= proc_find(rpid
);
779 if (rp
!= PROC_NULL
) {
780 proc_getexecutableuuid(rp
, so
->so_ruuid
, sizeof(so
->so_ruuid
));
787 so
->so_cred
= kauth_cred_proc_ref(p
);
788 if (!suser(kauth_cred_get(), NULL
)) {
789 so
->so_state
|= SS_PRIV
;
793 so
->so_rcv
.sb_flags
|= SB_RECV
;
794 so
->so_rcv
.sb_so
= so
->so_snd
.sb_so
= so
;
795 so
->next_lock_lr
= 0;
796 so
->next_unlock_lr
= 0;
798 #if CONFIG_MACF_SOCKET
799 mac_socket_label_associate(kauth_cred_get(), so
);
800 #endif /* MAC_SOCKET */
803 * Attachment will create the per pcb lock if necessary and
804 * increase refcount for creation, make sure it's done before
805 * socket is inserted in lists.
809 error
= (*prp
->pr_usrreqs
->pru_attach
)(so
, proto
, p
);
813 * If so_pcb is not zero, the socket will be leaked,
814 * so protocol attachment handler must be coded carefuly
816 so
->so_state
|= SS_NOFDREF
;
817 VERIFY(so
->so_usecount
> 0);
819 sofreelastref(so
, 1); /* will deallocate the socket */
824 * Note: needs so_pcb to be set after pru_attach
826 if (prp
->pr_update_last_owner
!= NULL
) {
827 (*prp
->pr_update_last_owner
)(so
, p
, ep
);
830 atomic_add_32(&prp
->pr_domain
->dom_refs
, 1);
831 TAILQ_INIT(&so
->so_evlist
);
833 /* Attach socket filters for this protocol */
836 if (tcpconsdebug
== 2) {
837 so
->so_options
|= SO_DEBUG
;
840 so_set_default_traffic_class(so
);
843 * If this thread or task is marked to create backgrounded sockets,
844 * mark the socket as background.
846 if (!(flags
& SOCF_MPTCP
) &&
847 proc_get_effective_thread_policy(current_thread(), TASK_POLICY_NEW_SOCKETS_BG
)) {
848 socket_set_traffic_mgt_flags(so
, TRAFFIC_MGT_SO_BACKGROUND
);
849 so
->so_background_thread
= current_thread();
854 * Don't mark Unix domain or system
855 * eligible for defunct by default.
859 so
->so_flags
|= SOF_NODEFUNCT
;
866 * Entitlements can't be checked at socket creation time except if the
867 * application requested a feature guarded by a privilege (c.f., socket
869 * The priv(9) and the Sandboxing APIs are designed with the idea that
870 * a privilege check should only be triggered by a userland request.
871 * A privilege check at socket creation time is time consuming and
872 * could trigger many authorisation error messages from the security
887 * <pru_attach>:ENOBUFS[AF_UNIX]
888 * <pru_attach>:ENOBUFS[TCP]
889 * <pru_attach>:ENOMEM[TCP]
890 * <pru_attach>:??? [other protocol families, IPSEC]
893 socreate(int dom
, struct socket
**aso
, int type
, int proto
)
895 return socreate_internal(dom
, aso
, type
, proto
, current_proc(), 0,
900 socreate_delegate(int dom
, struct socket
**aso
, int type
, int proto
, pid_t epid
)
903 struct proc
*ep
= PROC_NULL
;
905 if ((proc_selfpid() != epid
) && ((ep
= proc_find(epid
)) == PROC_NULL
)) {
910 error
= socreate_internal(dom
, aso
, type
, proto
, current_proc(), 0, ep
);
913 * It might not be wise to hold the proc reference when calling
914 * socreate_internal since it calls soalloc with M_WAITOK
917 if (ep
!= PROC_NULL
) {
926 * <pru_bind>:EINVAL Invalid argument [COMMON_START]
927 * <pru_bind>:EAFNOSUPPORT Address family not supported
928 * <pru_bind>:EADDRNOTAVAIL Address not available.
929 * <pru_bind>:EINVAL Invalid argument
930 * <pru_bind>:EAFNOSUPPORT Address family not supported [notdef]
931 * <pru_bind>:EACCES Permission denied
932 * <pru_bind>:EADDRINUSE Address in use
933 * <pru_bind>:EAGAIN Resource unavailable, try again
934 * <pru_bind>:EPERM Operation not permitted
938 * Notes: It's not possible to fully enumerate the return codes above,
939 * since socket filter authors and protocol family authors may
940 * not choose to limit their error returns to those listed, even
941 * though this may result in some software operating incorrectly.
943 * The error codes which are enumerated above are those known to
944 * be returned by the tcp_usr_bind function supplied.
947 sobindlock(struct socket
*so
, struct sockaddr
*nam
, int dolock
)
949 struct proc
*p
= current_proc();
956 so_update_last_owner_locked(so
, p
);
957 so_update_policy(so
);
960 so_update_necp_policy(so
, nam
, NULL
);
964 * If this is a bind request on a socket that has been marked
965 * as inactive, reject it now before we go any further.
967 if (so
->so_flags
& SOF_DEFUNCT
) {
969 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] (%d)\n",
970 __func__
, proc_pid(p
), proc_best_name(p
),
971 (uint64_t)DEBUG_KERNEL_ADDRPERM(so
),
972 SOCK_DOM(so
), SOCK_TYPE(so
), error
);
977 error
= sflt_bind(so
, nam
);
980 error
= (*so
->so_proto
->pr_usrreqs
->pru_bind
)(so
, nam
, p
);
984 socket_unlock(so
, 1);
987 if (error
== EJUSTRETURN
) {
995 sodealloc(struct socket
*so
)
997 kauth_cred_unref(&so
->so_cred
);
999 /* Remove any filters */
1003 cfil_sock_detach(so
);
1004 #endif /* CONTENT_FILTER */
1006 /* Delete the state allocated for msg queues on a socket */
1007 if (so
->so_flags
& SOF_ENABLE_MSGS
) {
1008 FREE(so
->so_msg_state
, M_TEMP
);
1009 so
->so_msg_state
= NULL
;
1011 VERIFY(so
->so_msg_state
== NULL
);
1013 so
->so_gencnt
= OSIncrementAtomic64((SInt64
*)&so_gencnt
);
1015 #if CONFIG_MACF_SOCKET
1016 mac_socket_label_destroy(so
);
1017 #endif /* MAC_SOCKET */
1019 if (so
->so_flags1
& SOF1_CACHED_IN_SOCK_LAYER
) {
1020 cached_sock_free(so
);
1022 FREE_ZONE(so
, sizeof(*so
), so
->so_zone
);
1027 * Returns: 0 Success
1030 * <pru_listen>:EINVAL[AF_UNIX]
1031 * <pru_listen>:EINVAL[TCP]
1032 * <pru_listen>:EADDRNOTAVAIL[TCP] Address not available.
1033 * <pru_listen>:EINVAL[TCP] Invalid argument
1034 * <pru_listen>:EAFNOSUPPORT[TCP] Address family not supported [notdef]
1035 * <pru_listen>:EACCES[TCP] Permission denied
1036 * <pru_listen>:EADDRINUSE[TCP] Address in use
1037 * <pru_listen>:EAGAIN[TCP] Resource unavailable, try again
1038 * <pru_listen>:EPERM[TCP] Operation not permitted
1041 * Notes: Other <pru_listen> returns depend on the protocol family; all
1042 * <sf_listen> returns depend on what the filter author causes
1043 * their filter to return.
1046 solisten(struct socket
*so
, int backlog
)
1048 struct proc
*p
= current_proc();
1053 so_update_last_owner_locked(so
, p
);
1054 so_update_policy(so
);
1057 so_update_necp_policy(so
, NULL
, NULL
);
1060 if (so
->so_proto
== NULL
) {
1064 if ((so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) == 0) {
1070 * If the listen request is made on a socket that is not fully
1071 * disconnected, or on a socket that has been marked as inactive,
1072 * reject the request now.
1075 (SS_ISCONNECTED
| SS_ISCONNECTING
| SS_ISDISCONNECTING
)) ||
1076 (so
->so_flags
& SOF_DEFUNCT
)) {
1078 if (so
->so_flags
& SOF_DEFUNCT
) {
1079 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] "
1080 "(%d)\n", __func__
, proc_pid(p
),
1082 (uint64_t)DEBUG_KERNEL_ADDRPERM(so
),
1083 SOCK_DOM(so
), SOCK_TYPE(so
), error
);
1088 if ((so
->so_restrictions
& SO_RESTRICT_DENY_IN
) != 0) {
1093 error
= sflt_listen(so
);
1095 error
= (*so
->so_proto
->pr_usrreqs
->pru_listen
)(so
, p
);
1099 if (error
== EJUSTRETURN
) {
1105 if (TAILQ_EMPTY(&so
->so_comp
)) {
1106 so
->so_options
|= SO_ACCEPTCONN
;
1109 * POSIX: The implementation may have an upper limit on the length of
1110 * the listen queue-either global or per accepting socket. If backlog
1111 * exceeds this limit, the length of the listen queue is set to the
1114 * If listen() is called with a backlog argument value that is less
1115 * than 0, the function behaves as if it had been called with a backlog
1116 * argument value of 0.
1118 * A backlog argument of 0 may allow the socket to accept connections,
1119 * in which case the length of the listen queue may be set to an
1120 * implementation-defined minimum value.
1122 if (backlog
<= 0 || backlog
> somaxconn
) {
1123 backlog
= somaxconn
;
1126 so
->so_qlimit
= backlog
;
1128 socket_unlock(so
, 1);
1133 * The "accept list lock" protects the fields related to the listener queues
1134 * because we can unlock a socket to respect the lock ordering between
1135 * the listener socket and its clients sockets. The lock ordering is first to
1136 * acquire the client socket before the listener socket.
1138 * The accept list lock serializes access to the following fields:
1139 * - of the listener socket:
1144 * - of client sockets that are in so_comp or so_incomp:
1148 * As one can see the accept list lock protects the consistent of the
1149 * linkage of the client sockets.
1151 * Note that those fields may be read without holding the accept list lock
1152 * for a preflight provided the accept list lock is taken when committing
1153 * to take an action based on the result of the preflight. The preflight
1154 * saves the cost of doing the unlock/lock dance.
1157 so_acquire_accept_list(struct socket
*head
, struct socket
*so
)
1159 lck_mtx_t
*mutex_held
;
1161 if (head
->so_proto
->pr_getlock
== NULL
) {
1164 mutex_held
= (*head
->so_proto
->pr_getlock
)(head
, PR_F_WILLUNLOCK
);
1165 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
);
1167 if (!(head
->so_flags1
& SOF1_ACCEPT_LIST_HELD
)) {
1168 head
->so_flags1
|= SOF1_ACCEPT_LIST_HELD
;
1172 socket_unlock(so
, 0);
1174 while (head
->so_flags1
& SOF1_ACCEPT_LIST_HELD
) {
1175 so_accept_list_waits
+= 1;
1176 msleep((caddr_t
)&head
->so_incomp
, mutex_held
,
1177 PSOCK
| PCATCH
, __func__
, NULL
);
1179 head
->so_flags1
|= SOF1_ACCEPT_LIST_HELD
;
1181 socket_unlock(head
, 0);
1183 socket_lock(head
, 0);
1188 so_release_accept_list(struct socket
*head
)
1190 if (head
->so_proto
->pr_getlock
!= NULL
) {
1191 lck_mtx_t
*mutex_held
;
1193 mutex_held
= (*head
->so_proto
->pr_getlock
)(head
, 0);
1194 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
);
1196 head
->so_flags1
&= ~SOF1_ACCEPT_LIST_HELD
;
1197 wakeup((caddr_t
)&head
->so_incomp
);
1202 sofreelastref(struct socket
*so
, int dealloc
)
1204 struct socket
*head
= so
->so_head
;
1206 /* Assume socket is locked */
1208 if (!(so
->so_flags
& SOF_PCBCLEARING
) || !(so
->so_state
& SS_NOFDREF
)) {
1209 selthreadclear(&so
->so_snd
.sb_sel
);
1210 selthreadclear(&so
->so_rcv
.sb_sel
);
1211 so
->so_rcv
.sb_flags
&= ~(SB_SEL
| SB_UPCALL
);
1212 so
->so_snd
.sb_flags
&= ~(SB_SEL
| SB_UPCALL
);
1213 so
->so_event
= sonullevent
;
1218 * Need to lock the listener when the protocol has
1221 if (head
->so_proto
->pr_getlock
!= NULL
) {
1222 socket_lock(head
, 1);
1223 so_acquire_accept_list(head
, so
);
1225 if (so
->so_state
& SS_INCOMP
) {
1226 so
->so_state
&= ~SS_INCOMP
;
1227 TAILQ_REMOVE(&head
->so_incomp
, so
, so_list
);
1232 if (head
->so_proto
->pr_getlock
!= NULL
) {
1233 so_release_accept_list(head
);
1234 socket_unlock(head
, 1);
1236 } else if (so
->so_state
& SS_COMP
) {
1237 if (head
->so_proto
->pr_getlock
!= NULL
) {
1238 so_release_accept_list(head
);
1239 socket_unlock(head
, 1);
1242 * We must not decommission a socket that's
1243 * on the accept(2) queue. If we do, then
1244 * accept(2) may hang after select(2) indicated
1245 * that the listening socket was ready.
1247 selthreadclear(&so
->so_snd
.sb_sel
);
1248 selthreadclear(&so
->so_rcv
.sb_sel
);
1249 so
->so_rcv
.sb_flags
&= ~(SB_SEL
| SB_UPCALL
);
1250 so
->so_snd
.sb_flags
&= ~(SB_SEL
| SB_UPCALL
);
1251 so
->so_event
= sonullevent
;
1254 if (head
->so_proto
->pr_getlock
!= NULL
) {
1255 so_release_accept_list(head
);
1256 socket_unlock(head
, 1);
1258 printf("sofree: not queued\n");
1265 if (so
->so_flags
& SOF_FLOW_DIVERT
) {
1266 flow_divert_detach(so
);
1268 #endif /* FLOW_DIVERT */
1270 /* 3932268: disable upcall */
1271 so
->so_rcv
.sb_flags
&= ~SB_UPCALL
;
1272 so
->so_snd
.sb_flags
&= ~(SB_UPCALL
| SB_SNDBYTE_CNT
);
1273 so
->so_event
= sonullevent
;
1281 soclose_wait_locked(struct socket
*so
)
1283 lck_mtx_t
*mutex_held
;
1285 if (so
->so_proto
->pr_getlock
!= NULL
) {
1286 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, PR_F_WILLUNLOCK
);
1288 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
1290 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
);
1293 * Double check here and return if there's no outstanding upcall;
1294 * otherwise proceed further only if SOF_UPCALLCLOSEWAIT is set.
1296 if (!so
->so_upcallusecount
|| !(so
->so_flags
& SOF_UPCALLCLOSEWAIT
)) {
1299 so
->so_rcv
.sb_flags
&= ~SB_UPCALL
;
1300 so
->so_snd
.sb_flags
&= ~SB_UPCALL
;
1301 so
->so_flags
|= SOF_CLOSEWAIT
;
1303 (void) msleep((caddr_t
)&so
->so_upcallusecount
, mutex_held
, (PZERO
- 1),
1304 "soclose_wait_locked", NULL
);
1305 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
);
1306 so
->so_flags
&= ~SOF_CLOSEWAIT
;
1310 * Close a socket on last file table reference removal.
1311 * Initiate disconnect if connected.
1312 * Free socket when disconnect complete.
1315 soclose_locked(struct socket
*so
)
1320 if (so
->so_usecount
== 0) {
1321 panic("soclose: so=%p refcount=0\n", so
);
1325 sflt_notify(so
, sock_evt_closing
, NULL
);
1327 if (so
->so_upcallusecount
) {
1328 soclose_wait_locked(so
);
1333 * We have to wait until the content filters are done
1335 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0) {
1336 cfil_sock_close_wait(so
);
1337 cfil_sock_is_closed(so
);
1338 cfil_sock_detach(so
);
1340 #endif /* CONTENT_FILTER */
1342 if (so
->so_flags1
& SOF1_EXTEND_BK_IDLE_INPROG
) {
1343 soresume(current_proc(), so
, 1);
1344 so
->so_flags1
&= ~SOF1_EXTEND_BK_IDLE_WANTED
;
1347 if ((so
->so_options
& SO_ACCEPTCONN
)) {
1348 struct socket
*sp
, *sonext
;
1349 int persocklock
= 0;
1350 int incomp_overflow_only
;
1353 * We do not want new connection to be added
1354 * to the connection queues
1356 so
->so_options
&= ~SO_ACCEPTCONN
;
1359 * We can drop the lock on the listener once
1360 * we've acquired the incoming list
1362 if (so
->so_proto
->pr_getlock
!= NULL
) {
1364 so_acquire_accept_list(so
, NULL
);
1365 socket_unlock(so
, 0);
1368 incomp_overflow_only
= 1;
1370 TAILQ_FOREACH_SAFE(sp
, &so
->so_incomp
, so_list
, sonext
) {
1373 * skip sockets thrown away by tcpdropdropblreq
1374 * they will get cleanup by the garbage collection.
1375 * otherwise, remove the incomp socket from the queue
1376 * and let soabort trigger the appropriate cleanup.
1378 if (sp
->so_flags
& SOF_OVERFLOW
) {
1382 if (persocklock
!= 0) {
1388 * The extra reference for the list insure the
1389 * validity of the socket pointer when we perform the
1390 * unlock of the head above
1392 if (sp
->so_state
& SS_INCOMP
) {
1393 sp
->so_state
&= ~SS_INCOMP
;
1395 TAILQ_REMOVE(&so
->so_incomp
, sp
, so_list
);
1401 panic("%s sp %p in so_incomp but !SS_INCOMP",
1405 if (persocklock
!= 0) {
1406 socket_unlock(sp
, 1);
1410 TAILQ_FOREACH_SAFE(sp
, &so
->so_comp
, so_list
, sonext
) {
1411 /* Dequeue from so_comp since sofree() won't do it */
1412 if (persocklock
!= 0) {
1416 if (sp
->so_state
& SS_COMP
) {
1417 sp
->so_state
&= ~SS_COMP
;
1419 TAILQ_REMOVE(&so
->so_comp
, sp
, so_list
);
1424 panic("%s sp %p in so_comp but !SS_COMP",
1429 socket_unlock(sp
, 1);
1433 if (incomp_overflow_only
== 0 && !TAILQ_EMPTY(&so
->so_incomp
)) {
1434 #if (DEBUG | DEVELOPMENT)
1435 panic("%s head %p so_comp not empty\n", __func__
, so
);
1436 #endif /* (DEVELOPMENT || DEBUG) */
1441 if (!TAILQ_EMPTY(&so
->so_comp
)) {
1442 #if (DEBUG | DEVELOPMENT)
1443 panic("%s head %p so_comp not empty\n", __func__
, so
);
1444 #endif /* (DEVELOPMENT || DEBUG) */
1451 so_release_accept_list(so
);
1454 if (so
->so_pcb
== NULL
) {
1455 /* 3915887: mark the socket as ready for dealloc */
1456 so
->so_flags
|= SOF_PCBCLEARING
;
1459 if (so
->so_state
& SS_ISCONNECTED
) {
1460 if ((so
->so_state
& SS_ISDISCONNECTING
) == 0) {
1461 error
= sodisconnectlocked(so
);
1466 if (so
->so_options
& SO_LINGER
) {
1467 lck_mtx_t
*mutex_held
;
1469 if ((so
->so_state
& SS_ISDISCONNECTING
) &&
1470 (so
->so_state
& SS_NBIO
)) {
1473 if (so
->so_proto
->pr_getlock
!= NULL
) {
1474 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, PR_F_WILLUNLOCK
);
1476 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
1478 while (so
->so_state
& SS_ISCONNECTED
) {
1479 ts
.tv_sec
= (so
->so_linger
/ 100);
1480 ts
.tv_nsec
= (so
->so_linger
% 100) *
1481 NSEC_PER_USEC
* 1000 * 10;
1482 error
= msleep((caddr_t
)&so
->so_timeo
,
1483 mutex_held
, PSOCK
| PCATCH
, "soclose", &ts
);
1486 * It's OK when the time fires,
1487 * don't report an error
1489 if (error
== EWOULDBLOCK
) {
1498 if (so
->so_usecount
== 0) {
1499 panic("soclose: usecount is zero so=%p\n", so
);
1502 if (so
->so_pcb
!= NULL
&& !(so
->so_flags
& SOF_PCBCLEARING
)) {
1503 int error2
= (*so
->so_proto
->pr_usrreqs
->pru_detach
)(so
);
1508 if (so
->so_usecount
<= 0) {
1509 panic("soclose: usecount is zero so=%p\n", so
);
1513 if (so
->so_pcb
!= NULL
&& !(so
->so_flags
& SOF_MP_SUBFLOW
) &&
1514 (so
->so_state
& SS_NOFDREF
)) {
1515 panic("soclose: NOFDREF");
1518 so
->so_state
|= SS_NOFDREF
;
1520 if ((so
->so_flags
& SOF_KNOTE
) != 0) {
1521 KNOTE(&so
->so_klist
, SO_FILT_HINT_LOCKED
);
1524 atomic_add_32(&so
->so_proto
->pr_domain
->dom_refs
, -1);
1527 VERIFY(so
->so_usecount
> 0);
1534 soclose(struct socket
*so
)
1539 if (so
->so_retaincnt
== 0) {
1540 error
= soclose_locked(so
);
1543 * if the FD is going away, but socket is
1544 * retained in kernel remove its reference
1547 if (so
->so_usecount
< 2) {
1548 panic("soclose: retaincnt non null and so=%p "
1549 "usecount=%d\n", so
, so
->so_usecount
);
1552 socket_unlock(so
, 1);
1557 * Must be called at splnet...
1559 /* Should already be locked */
1561 soabort(struct socket
*so
)
1565 #ifdef MORE_LOCKING_DEBUG
1566 lck_mtx_t
*mutex_held
;
1568 if (so
->so_proto
->pr_getlock
!= NULL
) {
1569 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
1571 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
1573 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
);
1576 if ((so
->so_flags
& SOF_ABORTED
) == 0) {
1577 so
->so_flags
|= SOF_ABORTED
;
1578 error
= (*so
->so_proto
->pr_usrreqs
->pru_abort
)(so
);
1588 soacceptlock(struct socket
*so
, struct sockaddr
**nam
, int dolock
)
1596 so_update_last_owner_locked(so
, PROC_NULL
);
1597 so_update_policy(so
);
1599 so_update_necp_policy(so
, NULL
, NULL
);
1602 if ((so
->so_state
& SS_NOFDREF
) == 0) {
1603 panic("soaccept: !NOFDREF");
1605 so
->so_state
&= ~SS_NOFDREF
;
1606 error
= (*so
->so_proto
->pr_usrreqs
->pru_accept
)(so
, nam
);
1609 socket_unlock(so
, 1);
1615 soaccept(struct socket
*so
, struct sockaddr
**nam
)
1617 return soacceptlock(so
, nam
, 1);
1621 soacceptfilter(struct socket
*so
, struct socket
*head
)
1623 struct sockaddr
*local
= NULL
, *remote
= NULL
;
1627 * Hold the lock even if this socket has not been made visible
1628 * to the filter(s). For sockets with global locks, this protects
1629 * against the head or peer going away
1632 if (sogetaddr_locked(so
, &remote
, 1) != 0 ||
1633 sogetaddr_locked(so
, &local
, 0) != 0) {
1634 so
->so_state
&= ~SS_NOFDREF
;
1635 socket_unlock(so
, 1);
1637 /* Out of resources; try it again next time */
1638 error
= ECONNABORTED
;
1642 error
= sflt_accept(head
, so
, local
, remote
);
1645 * If we get EJUSTRETURN from one of the filters, mark this socket
1646 * as inactive and return it anyway. This newly accepted socket
1647 * will be disconnected later before we hand it off to the caller.
1649 if (error
== EJUSTRETURN
) {
1651 (void) sosetdefunct(current_proc(), so
,
1652 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL
, FALSE
);
1657 * This may seem like a duplication to the above error
1658 * handling part when we return ECONNABORTED, except
1659 * the following is done while holding the lock since
1660 * the socket has been exposed to the filter(s) earlier.
1662 so
->so_state
&= ~SS_NOFDREF
;
1663 socket_unlock(so
, 1);
1665 /* Propagate socket filter's error code to the caller */
1667 socket_unlock(so
, 1);
1670 /* Callee checks for NULL pointer */
1671 sock_freeaddr(remote
);
1672 sock_freeaddr(local
);
1677 * Returns: 0 Success
1678 * EOPNOTSUPP Operation not supported on socket
1679 * EISCONN Socket is connected
1680 * <pru_connect>:EADDRNOTAVAIL Address not available.
1681 * <pru_connect>:EINVAL Invalid argument
1682 * <pru_connect>:EAFNOSUPPORT Address family not supported [notdef]
1683 * <pru_connect>:EACCES Permission denied
1684 * <pru_connect>:EADDRINUSE Address in use
1685 * <pru_connect>:EAGAIN Resource unavailable, try again
1686 * <pru_connect>:EPERM Operation not permitted
1687 * <sf_connect_out>:??? [anything a filter writer might set]
1690 soconnectlock(struct socket
*so
, struct sockaddr
*nam
, int dolock
)
1693 struct proc
*p
= current_proc();
1699 so_update_last_owner_locked(so
, p
);
1700 so_update_policy(so
);
1703 so_update_necp_policy(so
, NULL
, nam
);
1707 * If this is a listening socket or if this is a previously-accepted
1708 * socket that has been marked as inactive, reject the connect request.
1710 if ((so
->so_options
& SO_ACCEPTCONN
) || (so
->so_flags
& SOF_DEFUNCT
)) {
1712 if (so
->so_flags
& SOF_DEFUNCT
) {
1713 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] "
1714 "(%d)\n", __func__
, proc_pid(p
),
1716 (uint64_t)DEBUG_KERNEL_ADDRPERM(so
),
1717 SOCK_DOM(so
), SOCK_TYPE(so
), error
);
1720 socket_unlock(so
, 1);
1725 if ((so
->so_restrictions
& SO_RESTRICT_DENY_OUT
) != 0) {
1727 socket_unlock(so
, 1);
1733 * If protocol is connection-based, can only connect once.
1734 * Otherwise, if connected, try to disconnect first.
1735 * This allows user to disconnect by connecting to, e.g.,
1738 if (so
->so_state
& (SS_ISCONNECTED
| SS_ISCONNECTING
) &&
1739 ((so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) ||
1740 (error
= sodisconnectlocked(so
)))) {
1744 * Run connect filter before calling protocol:
1745 * - non-blocking connect returns before completion;
1747 error
= sflt_connectout(so
, nam
);
1749 if (error
== EJUSTRETURN
) {
1753 error
= (*so
->so_proto
->pr_usrreqs
->pru_connect
)
1756 so
->so_state
&= ~SS_ISCONNECTING
;
1761 socket_unlock(so
, 1);
1767 soconnect(struct socket
*so
, struct sockaddr
*nam
)
1769 return soconnectlock(so
, nam
, 1);
1773 * Returns: 0 Success
1774 * <pru_connect2>:EINVAL[AF_UNIX]
1775 * <pru_connect2>:EPROTOTYPE[AF_UNIX]
1776 * <pru_connect2>:??? [other protocol families]
1778 * Notes: <pru_connect2> is not supported by [TCP].
1781 soconnect2(struct socket
*so1
, struct socket
*so2
)
1785 socket_lock(so1
, 1);
1786 if (so2
->so_proto
->pr_lock
) {
1787 socket_lock(so2
, 1);
1790 error
= (*so1
->so_proto
->pr_usrreqs
->pru_connect2
)(so1
, so2
);
1792 socket_unlock(so1
, 1);
1793 if (so2
->so_proto
->pr_lock
) {
1794 socket_unlock(so2
, 1);
1800 soconnectxlocked(struct socket
*so
, struct sockaddr
*src
,
1801 struct sockaddr
*dst
, struct proc
*p
, uint32_t ifscope
,
1802 sae_associd_t aid
, sae_connid_t
*pcid
, uint32_t flags
, void *arg
,
1803 uint32_t arglen
, uio_t auio
, user_ssize_t
*bytes_written
)
1807 so_update_last_owner_locked(so
, p
);
1808 so_update_policy(so
);
1811 * If this is a listening socket or if this is a previously-accepted
1812 * socket that has been marked as inactive, reject the connect request.
1814 if ((so
->so_options
& SO_ACCEPTCONN
) || (so
->so_flags
& SOF_DEFUNCT
)) {
1816 if (so
->so_flags
& SOF_DEFUNCT
) {
1817 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] "
1818 "(%d)\n", __func__
, proc_pid(p
),
1820 (uint64_t)DEBUG_KERNEL_ADDRPERM(so
),
1821 SOCK_DOM(so
), SOCK_TYPE(so
), error
);
1826 if ((so
->so_restrictions
& SO_RESTRICT_DENY_OUT
) != 0) {
1831 * If protocol is connection-based, can only connect once
1832 * unless PR_MULTICONN is set. Otherwise, if connected,
1833 * try to disconnect first. This allows user to disconnect
1834 * by connecting to, e.g., a null address.
1836 if ((so
->so_state
& (SS_ISCONNECTED
| SS_ISCONNECTING
)) &&
1837 !(so
->so_proto
->pr_flags
& PR_MULTICONN
) &&
1838 ((so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) ||
1839 (error
= sodisconnectlocked(so
)) != 0)) {
1842 if ((so
->so_proto
->pr_flags
& PR_DATA_IDEMPOTENT
) &&
1843 (flags
& CONNECT_DATA_IDEMPOTENT
)) {
1844 so
->so_flags1
|= SOF1_DATA_IDEMPOTENT
;
1846 if (flags
& CONNECT_DATA_AUTHENTICATED
) {
1847 so
->so_flags1
|= SOF1_DATA_AUTHENTICATED
;
1852 * Case 1: CONNECT_RESUME_ON_READ_WRITE set, no data.
1853 * Case 2: CONNECT_RESUME_ON_READ_WRITE set, with data (user error)
1854 * Case 3: CONNECT_RESUME_ON_READ_WRITE not set, with data
1855 * Case 3 allows user to combine write with connect even if they have
1856 * no use for TFO (such as regular TCP, and UDP).
1857 * Case 4: CONNECT_RESUME_ON_READ_WRITE not set, no data (regular case)
1859 if ((so
->so_proto
->pr_flags
& PR_PRECONN_WRITE
) &&
1860 ((flags
& CONNECT_RESUME_ON_READ_WRITE
) || auio
)) {
1861 so
->so_flags1
|= SOF1_PRECONNECT_DATA
;
1865 * If a user sets data idempotent and does not pass an uio, or
1866 * sets CONNECT_RESUME_ON_READ_WRITE, this is an error, reset
1867 * SOF1_DATA_IDEMPOTENT.
1869 if (!(so
->so_flags1
& SOF1_PRECONNECT_DATA
) &&
1870 (so
->so_flags1
& SOF1_DATA_IDEMPOTENT
)) {
1871 /* We should return EINVAL instead perhaps. */
1872 so
->so_flags1
&= ~SOF1_DATA_IDEMPOTENT
;
1876 * Run connect filter before calling protocol:
1877 * - non-blocking connect returns before completion;
1879 error
= sflt_connectout(so
, dst
);
1881 /* Disable PRECONNECT_DATA, as we don't need to send a SYN anymore. */
1882 so
->so_flags1
&= ~SOF1_PRECONNECT_DATA
;
1883 if (error
== EJUSTRETURN
) {
1887 error
= (*so
->so_proto
->pr_usrreqs
->pru_connectx
)
1888 (so
, src
, dst
, p
, ifscope
, aid
, pcid
,
1889 flags
, arg
, arglen
, auio
, bytes_written
);
1891 so
->so_state
&= ~SS_ISCONNECTING
;
1892 if (error
!= EINPROGRESS
) {
1893 so
->so_flags1
&= ~SOF1_PRECONNECT_DATA
;
1903 sodisconnectlocked(struct socket
*so
)
1907 if ((so
->so_state
& SS_ISCONNECTED
) == 0) {
1911 if (so
->so_state
& SS_ISDISCONNECTING
) {
1916 error
= (*so
->so_proto
->pr_usrreqs
->pru_disconnect
)(so
);
1918 sflt_notify(so
, sock_evt_disconnected
, NULL
);
1925 /* Locking version */
1927 sodisconnect(struct socket
*so
)
1932 error
= sodisconnectlocked(so
);
1933 socket_unlock(so
, 1);
1938 sodisconnectxlocked(struct socket
*so
, sae_associd_t aid
, sae_connid_t cid
)
1943 * Call the protocol disconnectx handler; let it handle all
1944 * matters related to the connection state of this session.
1946 error
= (*so
->so_proto
->pr_usrreqs
->pru_disconnectx
)(so
, aid
, cid
);
1949 * The event applies only for the session, not for
1950 * the disconnection of individual subflows.
1952 if (so
->so_state
& (SS_ISDISCONNECTING
| SS_ISDISCONNECTED
)) {
1953 sflt_notify(so
, sock_evt_disconnected
, NULL
);
1960 sodisconnectx(struct socket
*so
, sae_associd_t aid
, sae_connid_t cid
)
1965 error
= sodisconnectxlocked(so
, aid
, cid
);
1966 socket_unlock(so
, 1);
1970 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT)
1973 * sosendcheck will lock the socket buffer if it isn't locked and
1974 * verify that there is space for the data being inserted.
1976 * Returns: 0 Success
1978 * sblock:EWOULDBLOCK
1985 sosendcheck(struct socket
*so
, struct sockaddr
*addr
, user_ssize_t resid
,
1986 int32_t clen
, int32_t atomic
, int flags
, int *sblocked
,
1987 struct mbuf
*control
)
1994 if (*sblocked
== 0) {
1995 if ((so
->so_snd
.sb_flags
& SB_LOCK
) != 0 &&
1996 so
->so_send_filt_thread
!= 0 &&
1997 so
->so_send_filt_thread
== current_thread()) {
1999 * We're being called recursively from a filter,
2000 * allow this to continue. Radar 4150520.
2001 * Don't set sblocked because we don't want
2002 * to perform an unlock later.
2006 error
= sblock(&so
->so_snd
, SBLOCKWAIT(flags
));
2008 if (so
->so_flags
& SOF_DEFUNCT
) {
2018 * If a send attempt is made on a socket that has been marked
2019 * as inactive (disconnected), reject the request.
2021 if (so
->so_flags
& SOF_DEFUNCT
) {
2024 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] (%d)\n",
2025 __func__
, proc_selfpid(), proc_best_name(current_proc()),
2026 (uint64_t)DEBUG_KERNEL_ADDRPERM(so
),
2027 SOCK_DOM(so
), SOCK_TYPE(so
), error
);
2031 if (so
->so_state
& SS_CANTSENDMORE
) {
2034 * Can re-inject data of half closed connections
2036 if ((so
->so_state
& SS_ISDISCONNECTED
) == 0 &&
2037 so
->so_snd
.sb_cfil_thread
== current_thread() &&
2038 cfil_sock_data_pending(&so
->so_snd
) != 0) {
2040 "so %llx ignore SS_CANTSENDMORE",
2041 (uint64_t)DEBUG_KERNEL_ADDRPERM(so
));
2043 #endif /* CONTENT_FILTER */
2047 error
= so
->so_error
;
2052 if ((so
->so_state
& SS_ISCONNECTED
) == 0) {
2053 if ((so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) != 0) {
2054 if (((so
->so_state
& SS_ISCONFIRMING
) == 0) &&
2055 (resid
!= 0 || clen
== 0) &&
2056 !(so
->so_flags1
& SOF1_PRECONNECT_DATA
)) {
2059 } else if (addr
== 0) {
2060 return (so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) ?
2061 ENOTCONN
: EDESTADDRREQ
;
2065 if (so
->so_flags
& SOF_ENABLE_MSGS
) {
2066 space
= msgq_sbspace(so
, control
);
2068 space
= sbspace(&so
->so_snd
);
2071 if (flags
& MSG_OOB
) {
2074 if ((atomic
&& resid
> so
->so_snd
.sb_hiwat
) ||
2075 clen
> so
->so_snd
.sb_hiwat
) {
2079 if ((space
< resid
+ clen
&&
2080 (atomic
|| (space
< (int32_t)so
->so_snd
.sb_lowat
) ||
2082 (so
->so_type
== SOCK_STREAM
&& so_wait_for_if_feedback(so
))) {
2084 * don't block the connectx call when there's more data
2085 * than can be copied.
2087 if (so
->so_flags1
& SOF1_PRECONNECT_DATA
) {
2091 if (space
< (int32_t)so
->so_snd
.sb_lowat
) {
2095 if ((so
->so_state
& SS_NBIO
) || (flags
& MSG_NBIO
) ||
2099 sbunlock(&so
->so_snd
, TRUE
); /* keep socket locked */
2101 error
= sbwait(&so
->so_snd
);
2103 if (so
->so_flags
& SOF_DEFUNCT
) {
2115 * If send must go all at once and message is larger than
2116 * send buffering, then hard error.
2117 * Lock against other senders.
2118 * If must go all at once and not enough room now, then
2119 * inform user that this would block and do nothing.
2120 * Otherwise, if nonblocking, send as much as possible.
2121 * The data to be sent is described by "uio" if nonzero,
2122 * otherwise by the mbuf chain "top" (which must be null
2123 * if uio is not). Data provided in mbuf chain must be small
2124 * enough to send all at once.
2126 * Returns nonzero on error, timeout or signal; callers
2127 * must check for short counts if EINTR/ERESTART are returned.
2128 * Data and control buffers are freed on return.
2130 * Returns: 0 Success
2136 * sosendcheck:EWOULDBLOCK
2140 * sosendcheck:??? [value from so_error]
2141 * <pru_send>:ECONNRESET[TCP]
2142 * <pru_send>:EINVAL[TCP]
2143 * <pru_send>:ENOBUFS[TCP]
2144 * <pru_send>:EADDRINUSE[TCP]
2145 * <pru_send>:EADDRNOTAVAIL[TCP]
2146 * <pru_send>:EAFNOSUPPORT[TCP]
2147 * <pru_send>:EACCES[TCP]
2148 * <pru_send>:EAGAIN[TCP]
2149 * <pru_send>:EPERM[TCP]
2150 * <pru_send>:EMSGSIZE[TCP]
2151 * <pru_send>:EHOSTUNREACH[TCP]
2152 * <pru_send>:ENETUNREACH[TCP]
2153 * <pru_send>:ENETDOWN[TCP]
2154 * <pru_send>:ENOMEM[TCP]
2155 * <pru_send>:ENOBUFS[TCP]
2156 * <pru_send>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
2157 * <pru_send>:EINVAL[AF_UNIX]
2158 * <pru_send>:EOPNOTSUPP[AF_UNIX]
2159 * <pru_send>:EPIPE[AF_UNIX]
2160 * <pru_send>:ENOTCONN[AF_UNIX]
2161 * <pru_send>:EISCONN[AF_UNIX]
2162 * <pru_send>:???[AF_UNIX] [whatever a filter author chooses]
2163 * <sf_data_out>:??? [whatever a filter author chooses]
2165 * Notes: Other <pru_send> returns depend on the protocol family; all
2166 * <sf_data_out> returns depend on what the filter author causes
2167 * their filter to return.
2170 sosend(struct socket
*so
, struct sockaddr
*addr
, struct uio
*uio
,
2171 struct mbuf
*top
, struct mbuf
*control
, int flags
)
2174 struct mbuf
*m
, *freelist
= NULL
;
2175 user_ssize_t space
, len
, resid
, orig_resid
;
2176 int clen
= 0, error
, dontroute
, mlen
, sendflags
;
2177 int atomic
= sosendallatonce(so
) || top
;
2179 struct proc
*p
= current_proc();
2180 struct mbuf
*control_copy
= NULL
;
2181 uint16_t headroom
= 0;
2182 boolean_t en_tracing
= FALSE
;
2185 resid
= uio_resid(uio
);
2187 resid
= top
->m_pkthdr
.len
;
2190 KERNEL_DEBUG((DBG_FNC_SOSEND
| DBG_FUNC_START
), so
, resid
,
2191 so
->so_snd
.sb_cc
, so
->so_snd
.sb_lowat
, so
->so_snd
.sb_hiwat
);
2196 * trace if tracing & network (vs. unix) sockets & and
2199 if (ENTR_SHOULDTRACE
&&
2200 (SOCK_CHECK_DOM(so
, AF_INET
) || SOCK_CHECK_DOM(so
, AF_INET6
))) {
2201 struct inpcb
*inp
= sotoinpcb(so
);
2202 if (inp
->inp_last_outifp
!= NULL
&&
2203 !(inp
->inp_last_outifp
->if_flags
& IFF_LOOPBACK
)) {
2205 KERNEL_ENERGYTRACE(kEnTrActKernSockWrite
, DBG_FUNC_START
,
2206 VM_KERNEL_ADDRPERM(so
),
2207 ((so
->so_state
& SS_NBIO
) ? kEnTrFlagNonBlocking
: 0),
2214 * Re-injection should not affect process accounting
2216 if ((flags
& MSG_SKIPCFIL
) == 0) {
2217 so_update_last_owner_locked(so
, p
);
2218 so_update_policy(so
);
2221 so_update_necp_policy(so
, NULL
, addr
);
2225 if (so
->so_type
!= SOCK_STREAM
&& (flags
& MSG_OOB
) != 0) {
2231 * In theory resid should be unsigned.
2232 * However, space must be signed, as it might be less than 0
2233 * if we over-committed, and we must use a signed comparison
2234 * of space and resid. On the other hand, a negative resid
2235 * causes us to loop sending 0-length segments to the protocol.
2237 * Usually, MSG_EOR isn't used on SOCK_STREAM type sockets.
2238 * But it will be used by sockets doing message delivery.
2240 * Note: We limit resid to be a positive int value as we use
2241 * imin() to set bytes_to_copy -- radr://14558484
2243 if (resid
< 0 || resid
> INT_MAX
|| (so
->so_type
== SOCK_STREAM
&&
2244 !(so
->so_flags
& SOF_ENABLE_MSGS
) && (flags
& MSG_EOR
))) {
2249 dontroute
= (flags
& MSG_DONTROUTE
) &&
2250 (so
->so_options
& SO_DONTROUTE
) == 0 &&
2251 (so
->so_proto
->pr_flags
& PR_ATOMIC
);
2252 OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgsnd
);
2254 if (control
!= NULL
) {
2255 clen
= control
->m_len
;
2258 if (soreserveheadroom
!= 0) {
2259 headroom
= so
->so_pktheadroom
;
2263 error
= sosendcheck(so
, addr
, resid
, clen
, atomic
, flags
,
2264 &sblocked
, control
);
2270 if (so
->so_flags
& SOF_ENABLE_MSGS
) {
2271 space
= msgq_sbspace(so
, control
);
2273 space
= sbspace(&so
->so_snd
) - clen
;
2275 space
+= ((flags
& MSG_OOB
) ? 1024 : 0);
2280 * Data is prepackaged in "top".
2283 if (flags
& MSG_EOR
) {
2284 top
->m_flags
|= M_EOR
;
2293 bytes_to_copy
= imin(resid
, space
);
2295 bytes_to_alloc
= bytes_to_copy
;
2297 bytes_to_alloc
+= headroom
;
2300 if (sosendminchain
> 0) {
2303 chainlength
= sosendmaxchain
;
2307 * Use big 4 KB cluster when the outgoing interface
2308 * does not prefer 2 KB clusters
2310 bigcl
= !(so
->so_flags1
& SOF1_IF_2KCL
) ||
2311 sosendbigcl_ignore_capab
;
2314 * Attempt to use larger than system page-size
2315 * clusters for large writes only if there is
2316 * a jumbo cluster pool and if the socket is
2317 * marked accordingly.
2319 jumbocl
= sosendjcl
&& njcl
> 0 &&
2320 ((so
->so_flags
& SOF_MULTIPAGES
) ||
2321 sosendjcl_ignore_capab
) &&
2324 socket_unlock(so
, 0);
2328 int hdrs_needed
= (top
== NULL
) ? 1 : 0;
2331 * try to maintain a local cache of mbuf
2332 * clusters needed to complete this
2333 * write the list is further limited to
2334 * the number that are currently needed
2335 * to fill the socket this mechanism
2336 * allows a large number of mbufs/
2337 * clusters to be grabbed under a single
2338 * mbuf lock... if we can't get any
2339 * clusters, than fall back to trying
2340 * for mbufs if we fail early (or
2341 * miscalcluate the number needed) make
2342 * sure to release any clusters we
2343 * haven't yet consumed.
2345 if (freelist
== NULL
&&
2346 bytes_to_alloc
> MBIGCLBYTES
&&
2349 bytes_to_alloc
/ M16KCLBYTES
;
2351 if ((bytes_to_alloc
-
2352 (num_needed
* M16KCLBYTES
))
2358 m_getpackets_internal(
2359 (unsigned int *)&num_needed
,
2360 hdrs_needed
, M_WAIT
, 0,
2363 * Fall back to 4K cluster size
2364 * if allocation failed
2368 if (freelist
== NULL
&&
2369 bytes_to_alloc
> MCLBYTES
&&
2372 bytes_to_alloc
/ MBIGCLBYTES
;
2374 if ((bytes_to_alloc
-
2375 (num_needed
* MBIGCLBYTES
)) >=
2381 m_getpackets_internal(
2382 (unsigned int *)&num_needed
,
2383 hdrs_needed
, M_WAIT
, 0,
2386 * Fall back to cluster size
2387 * if allocation failed
2392 * Allocate a cluster as we want to
2393 * avoid to split the data in more
2394 * that one segment and using MINCLSIZE
2395 * would lead us to allocate two mbufs
2397 if (soreserveheadroom
!= 0 &&
2400 bytes_to_alloc
> _MHLEN
) ||
2401 bytes_to_alloc
> _MLEN
)) {
2402 num_needed
= ROUNDUP(bytes_to_alloc
, MCLBYTES
) /
2405 m_getpackets_internal(
2406 (unsigned int *)&num_needed
,
2407 hdrs_needed
, M_WAIT
, 0,
2410 * Fall back to a single mbuf
2411 * if allocation failed
2413 } else if (freelist
== NULL
&&
2414 bytes_to_alloc
> MINCLSIZE
) {
2416 bytes_to_alloc
/ MCLBYTES
;
2418 if ((bytes_to_alloc
-
2419 (num_needed
* MCLBYTES
)) >=
2425 m_getpackets_internal(
2426 (unsigned int *)&num_needed
,
2427 hdrs_needed
, M_WAIT
, 0,
2430 * Fall back to a single mbuf
2431 * if allocation failed
2435 * For datagram protocols, leave
2436 * headroom for protocol headers
2437 * in the first cluster of the chain
2439 if (freelist
!= NULL
&& atomic
&&
2440 top
== NULL
&& headroom
> 0) {
2441 freelist
->m_data
+= headroom
;
2445 * Fall back to regular mbufs without
2446 * reserving the socket headroom
2448 if (freelist
== NULL
) {
2457 if (freelist
== NULL
) {
2463 * For datagram protocols,
2464 * leave room for protocol
2465 * headers in first mbuf.
2467 if (atomic
&& top
== NULL
&&
2468 bytes_to_copy
< MHLEN
) {
2474 freelist
= m
->m_next
;
2477 if ((m
->m_flags
& M_EXT
)) {
2478 mlen
= m
->m_ext
.ext_size
-
2480 } else if ((m
->m_flags
& M_PKTHDR
)) {
2482 MHLEN
- M_LEADINGSPACE(m
);
2484 mlen
= MLEN
- M_LEADINGSPACE(m
);
2486 len
= imin(mlen
, bytes_to_copy
);
2492 error
= uiomove(mtod(m
, caddr_t
),
2495 resid
= uio_resid(uio
);
2499 top
->m_pkthdr
.len
+= len
;
2505 if (flags
& MSG_EOR
) {
2506 top
->m_flags
|= M_EOR
;
2510 bytes_to_copy
= min(resid
, space
);
2511 } while (space
> 0 &&
2512 (chainlength
< sosendmaxchain
|| atomic
||
2513 resid
< MINCLSIZE
));
2523 so
->so_options
|= SO_DONTROUTE
;
2527 * Compute flags here, for pru_send and NKEs
2529 * If the user set MSG_EOF, the protocol
2530 * understands this flag and nothing left to
2531 * send then use PRU_SEND_EOF instead of PRU_SEND.
2533 sendflags
= (flags
& MSG_OOB
) ? PRUS_OOB
:
2534 ((flags
& MSG_EOF
) &&
2535 (so
->so_proto
->pr_flags
& PR_IMPLOPCL
) &&
2536 (resid
<= 0)) ? PRUS_EOF
:
2537 /* If there is more to send set PRUS_MORETOCOME */
2538 (resid
> 0 && space
> 0) ? PRUS_MORETOCOME
: 0;
2540 if ((flags
& MSG_SKIPCFIL
) == 0) {
2542 * Socket filter processing
2544 error
= sflt_data_out(so
, addr
, &top
,
2545 &control
, (sendflags
& MSG_OOB
) ?
2546 sock_data_filt_flag_oob
: 0);
2548 if (error
== EJUSTRETURN
) {
2558 * Content filter processing
2560 error
= cfil_sock_data_out(so
, addr
, top
,
2561 control
, sendflags
);
2563 if (error
== EJUSTRETURN
) {
2571 #endif /* CONTENT_FILTER */
2573 if (so
->so_flags
& SOF_ENABLE_MSGS
) {
2575 * Make a copy of control mbuf,
2576 * so that msg priority can be
2577 * passed to subsequent mbufs.
2579 control_copy
= m_dup(control
, M_NOWAIT
);
2581 error
= (*so
->so_proto
->pr_usrreqs
->pru_send
)
2582 (so
, sendflags
, top
, addr
, control
, p
);
2585 so
->so_options
&= ~SO_DONTROUTE
;
2589 control
= control_copy
;
2590 control_copy
= NULL
;
2596 } while (resid
&& space
> 0);
2601 sbunlock(&so
->so_snd
, FALSE
); /* will unlock socket */
2603 socket_unlock(so
, 1);
2608 if (control
!= NULL
) {
2611 if (freelist
!= NULL
) {
2612 m_freem_list(freelist
);
2614 if (control_copy
!= NULL
) {
2615 m_freem(control_copy
);
2618 soclearfastopen(so
);
2621 /* resid passed here is the bytes left in uio */
2622 KERNEL_ENERGYTRACE(kEnTrActKernSockWrite
, DBG_FUNC_END
,
2623 VM_KERNEL_ADDRPERM(so
),
2624 ((error
== EWOULDBLOCK
) ? kEnTrFlagNoWork
: 0),
2625 (int64_t)(orig_resid
- resid
));
2627 KERNEL_DEBUG(DBG_FNC_SOSEND
| DBG_FUNC_END
, so
, resid
,
2628 so
->so_snd
.sb_cc
, space
, error
);
2634 sosend_reinject(struct socket
*so
, struct sockaddr
*addr
, struct mbuf
*top
, struct mbuf
*control
, uint32_t sendflags
)
2636 struct mbuf
*m0
= NULL
, *control_end
= NULL
;
2638 socket_lock_assert_owned(so
);
2641 * top must points to mbuf chain to be sent.
2642 * If control is not NULL, top must be packet header
2644 VERIFY(top
!= NULL
&&
2645 (control
== NULL
|| top
->m_flags
& M_PKTHDR
));
2648 * If control is not passed in, see if we can get it
2651 if (control
== NULL
&& (top
->m_flags
& M_PKTHDR
) == 0) {
2652 // Locate start of control if present and start of data
2653 for (m0
= top
; m0
!= NULL
; m0
= m0
->m_next
) {
2654 if (m0
->m_flags
& M_PKTHDR
) {
2657 } else if (m0
->m_type
== MT_CONTROL
) {
2658 if (control
== NULL
) {
2659 // Found start of control
2662 if (control
!= NULL
&& m0
->m_next
!= NULL
&& m0
->m_next
->m_type
!= MT_CONTROL
) {
2663 // Found end of control
2668 if (control_end
!= NULL
) {
2669 control_end
->m_next
= NULL
;
2673 int error
= (*so
->so_proto
->pr_usrreqs
->pru_send
)
2674 (so
, sendflags
, top
, addr
, control
, current_proc());
2680 * Supported only connected sockets (no address) without ancillary data
2681 * (control mbuf) for atomic protocols
2684 sosend_list(struct socket
*so
, struct uio
**uioarray
, u_int uiocnt
, int flags
)
2686 struct mbuf
*m
, *freelist
= NULL
;
2687 user_ssize_t len
, resid
;
2688 int error
, dontroute
, mlen
;
2689 int atomic
= sosendallatonce(so
);
2691 struct proc
*p
= current_proc();
2694 struct mbuf
*top
= NULL
;
2695 uint16_t headroom
= 0;
2698 KERNEL_DEBUG((DBG_FNC_SOSEND_LIST
| DBG_FUNC_START
), so
, uiocnt
,
2699 so
->so_snd
.sb_cc
, so
->so_snd
.sb_lowat
, so
->so_snd
.sb_hiwat
);
2701 if (so
->so_type
!= SOCK_DGRAM
) {
2709 if (so
->so_proto
->pr_usrreqs
->pru_send_list
== NULL
) {
2710 error
= EPROTONOSUPPORT
;
2713 if (flags
& ~(MSG_DONTWAIT
| MSG_NBIO
)) {
2717 resid
= uio_array_resid(uioarray
, uiocnt
);
2720 * In theory resid should be unsigned.
2721 * However, space must be signed, as it might be less than 0
2722 * if we over-committed, and we must use a signed comparison
2723 * of space and resid. On the other hand, a negative resid
2724 * causes us to loop sending 0-length segments to the protocol.
2726 * Note: We limit resid to be a positive int value as we use
2727 * imin() to set bytes_to_copy -- radr://14558484
2729 if (resid
< 0 || resid
> INT_MAX
) {
2735 so_update_last_owner_locked(so
, p
);
2736 so_update_policy(so
);
2739 so_update_necp_policy(so
, NULL
, NULL
);
2742 dontroute
= (flags
& MSG_DONTROUTE
) &&
2743 (so
->so_options
& SO_DONTROUTE
) == 0 &&
2744 (so
->so_proto
->pr_flags
& PR_ATOMIC
);
2745 OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgsnd
);
2747 error
= sosendcheck(so
, NULL
, resid
, 0, atomic
, flags
,
2754 * Use big 4 KB clusters when the outgoing interface does not prefer
2757 bigcl
= !(so
->so_flags1
& SOF1_IF_2KCL
) || sosendbigcl_ignore_capab
;
2759 if (soreserveheadroom
!= 0) {
2760 headroom
= so
->so_pktheadroom
;
2767 size_t maxpktlen
= 0;
2770 if (sosendminchain
> 0) {
2773 chainlength
= sosendmaxchain
;
2776 socket_unlock(so
, 0);
2779 * Find a set of uio that fit in a reasonable number
2782 for (i
= uiofirst
; i
< uiocnt
; i
++) {
2783 struct uio
*auio
= uioarray
[i
];
2785 len
= uio_resid(auio
);
2787 /* Do nothing for empty messages */
2795 if (len
> maxpktlen
) {
2800 if (chainlength
> sosendmaxchain
) {
2805 * Nothing left to send
2807 if (num_needed
== 0) {
2812 * Allocate buffer large enough to include headroom space for
2813 * network and link header
2816 bytes_to_alloc
= maxpktlen
+ headroom
;
2819 * Allocate a single contiguous buffer of the smallest available
2820 * size when possible
2822 if (bytes_to_alloc
> MCLBYTES
&&
2823 bytes_to_alloc
<= MBIGCLBYTES
&& bigcl
) {
2824 freelist
= m_getpackets_internal(
2825 (unsigned int *)&num_needed
,
2826 num_needed
, M_WAIT
, 1,
2828 } else if (bytes_to_alloc
> _MHLEN
&&
2829 bytes_to_alloc
<= MCLBYTES
) {
2830 freelist
= m_getpackets_internal(
2831 (unsigned int *)&num_needed
,
2832 num_needed
, M_WAIT
, 1,
2835 freelist
= m_allocpacket_internal(
2836 (unsigned int *)&num_needed
,
2837 bytes_to_alloc
, NULL
, M_WAIT
, 1, 0);
2840 if (freelist
== NULL
) {
2846 * Copy each uio of the set into its own mbuf packet
2848 for (i
= uiofirst
, m
= freelist
;
2849 i
< uiolast
&& m
!= NULL
;
2853 struct uio
*auio
= uioarray
[i
];
2855 bytes_to_copy
= uio_resid(auio
);
2857 /* Do nothing for empty messages */
2858 if (bytes_to_copy
== 0) {
2862 * Leave headroom for protocol headers
2863 * in the first mbuf of the chain
2865 m
->m_data
+= headroom
;
2867 for (n
= m
; n
!= NULL
; n
= n
->m_next
) {
2868 if ((m
->m_flags
& M_EXT
)) {
2869 mlen
= m
->m_ext
.ext_size
-
2871 } else if ((m
->m_flags
& M_PKTHDR
)) {
2873 MHLEN
- M_LEADINGSPACE(m
);
2875 mlen
= MLEN
- M_LEADINGSPACE(m
);
2877 len
= imin(mlen
, bytes_to_copy
);
2880 * Note: uiomove() decrements the iovec
2883 error
= uiomove(mtod(n
, caddr_t
),
2889 m
->m_pkthdr
.len
+= len
;
2891 VERIFY(m
->m_pkthdr
.len
<= maxpktlen
);
2893 bytes_to_copy
-= len
;
2896 if (m
->m_pkthdr
.len
== 0) {
2898 "%s:%d so %llx pkt %llx type %u len null\n",
2900 (uint64_t)DEBUG_KERNEL_ADDRPERM(so
),
2901 (uint64_t)DEBUG_KERNEL_ADDRPERM(m
),
2919 so
->so_options
|= SO_DONTROUTE
;
2922 if ((flags
& MSG_SKIPCFIL
) == 0) {
2923 struct mbuf
**prevnextp
= NULL
;
2925 for (i
= uiofirst
, m
= top
;
2926 i
< uiolast
&& m
!= NULL
;
2928 struct mbuf
*nextpkt
= m
->m_nextpkt
;
2931 * Socket filter processing
2933 error
= sflt_data_out(so
, NULL
, &m
,
2935 if (error
!= 0 && error
!= EJUSTRETURN
) {
2942 * Content filter processing
2944 error
= cfil_sock_data_out(so
, NULL
, m
,
2946 if (error
!= 0 && error
!= EJUSTRETURN
) {
2950 #endif /* CONTENT_FILTER */
2952 * Remove packet from the list when
2953 * swallowed by a filter
2955 if (error
== EJUSTRETURN
) {
2957 if (prevnextp
!= NULL
) {
2958 *prevnextp
= nextpkt
;
2966 prevnextp
= &m
->m_nextpkt
;
2971 error
= (*so
->so_proto
->pr_usrreqs
->pru_send_list
)
2972 (so
, 0, top
, NULL
, NULL
, p
);
2976 so
->so_options
&= ~SO_DONTROUTE
;
2981 } while (resid
> 0 && error
== 0);
2984 sbunlock(&so
->so_snd
, FALSE
); /* will unlock socket */
2986 socket_unlock(so
, 1);
2992 if (freelist
!= NULL
) {
2993 m_freem_list(freelist
);
2996 KERNEL_DEBUG(DBG_FNC_SOSEND_LIST
| DBG_FUNC_END
, so
, resid
,
2997 so
->so_snd
.sb_cc
, 0, error
);
3003 * May return ERESTART when packet is dropped by MAC policy check
3006 soreceive_addr(struct proc
*p
, struct socket
*so
, struct sockaddr
**psa
,
3007 int flags
, struct mbuf
**mp
, struct mbuf
**nextrecordp
, int canwait
)
3010 struct mbuf
*m
= *mp
;
3011 struct mbuf
*nextrecord
= *nextrecordp
;
3013 KASSERT(m
->m_type
== MT_SONAME
, ("receive 1a"));
3014 #if CONFIG_MACF_SOCKET_SUBSET
3016 * Call the MAC framework for policy checking if we're in
3017 * the user process context and the socket isn't connected.
3019 if (p
!= kernproc
&& !(so
->so_state
& SS_ISCONNECTED
)) {
3020 struct mbuf
*m0
= m
;
3022 * Dequeue this record (temporarily) from the receive
3023 * list since we're about to drop the socket's lock
3024 * where a new record may arrive and be appended to
3025 * the list. Upon MAC policy failure, the record
3026 * will be freed. Otherwise, we'll add it back to
3027 * the head of the list. We cannot rely on SB_LOCK
3028 * because append operation uses the socket's lock.
3031 m
->m_nextpkt
= NULL
;
3032 sbfree(&so
->so_rcv
, m
);
3034 } while (m
!= NULL
);
3036 so
->so_rcv
.sb_mb
= nextrecord
;
3037 SB_EMPTY_FIXUP(&so
->so_rcv
);
3038 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 1a");
3039 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 1a");
3040 socket_unlock(so
, 0);
3042 if (mac_socket_check_received(proc_ucred(p
), so
,
3043 mtod(m
, struct sockaddr
*)) != 0) {
3045 * MAC policy failure; free this record and
3046 * process the next record (or block until
3047 * one is available). We have adjusted sb_cc
3048 * and sb_mbcnt above so there is no need to
3049 * call sbfree() again.
3053 * Clear SB_LOCK but don't unlock the socket.
3054 * Process the next record or wait for one.
3057 sbunlock(&so
->so_rcv
, TRUE
); /* stay locked */
3063 * If the socket has been defunct'd, drop it.
3065 if (so
->so_flags
& SOF_DEFUNCT
) {
3071 * Re-adjust the socket receive list and re-enqueue
3072 * the record in front of any packets which may have
3073 * been appended while we dropped the lock.
3075 for (m
= m0
; m
->m_next
!= NULL
; m
= m
->m_next
) {
3076 sballoc(&so
->so_rcv
, m
);
3078 sballoc(&so
->so_rcv
, m
);
3079 if (so
->so_rcv
.sb_mb
== NULL
) {
3080 so
->so_rcv
.sb_lastrecord
= m0
;
3081 so
->so_rcv
.sb_mbtail
= m
;
3084 nextrecord
= m
->m_nextpkt
= so
->so_rcv
.sb_mb
;
3085 so
->so_rcv
.sb_mb
= m
;
3086 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 1b");
3087 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 1b");
3089 #endif /* CONFIG_MACF_SOCKET_SUBSET */
3091 *psa
= dup_sockaddr(mtod(m
, struct sockaddr
*), canwait
);
3092 if ((*psa
== NULL
) && (flags
& MSG_NEEDSA
)) {
3093 error
= EWOULDBLOCK
;
3097 if (flags
& MSG_PEEK
) {
3100 sbfree(&so
->so_rcv
, m
);
3101 if (m
->m_next
== NULL
&& so
->so_rcv
.sb_cc
!= 0) {
3102 panic("%s: about to create invalid socketbuf",
3106 MFREE(m
, so
->so_rcv
.sb_mb
);
3107 m
= so
->so_rcv
.sb_mb
;
3109 m
->m_nextpkt
= nextrecord
;
3111 so
->so_rcv
.sb_mb
= nextrecord
;
3112 SB_EMPTY_FIXUP(&so
->so_rcv
);
3117 *nextrecordp
= nextrecord
;
3123 * Process one or more MT_CONTROL mbufs present before any data mbufs
3124 * in the first mbuf chain on the socket buffer. If MSG_PEEK, we
3125 * just copy the data; if !MSG_PEEK, we call into the protocol to
3126 * perform externalization.
3129 soreceive_ctl(struct socket
*so
, struct mbuf
**controlp
, int flags
,
3130 struct mbuf
**mp
, struct mbuf
**nextrecordp
)
3133 struct mbuf
*cm
= NULL
, *cmn
;
3134 struct mbuf
**cme
= &cm
;
3135 struct sockbuf
*sb_rcv
= &so
->so_rcv
;
3136 struct mbuf
**msgpcm
= NULL
;
3137 struct mbuf
*m
= *mp
;
3138 struct mbuf
*nextrecord
= *nextrecordp
;
3139 struct protosw
*pr
= so
->so_proto
;
3142 * Externalizing the control messages would require us to
3143 * drop the socket's lock below. Once we re-acquire the
3144 * lock, the mbuf chain might change. In order to preserve
3145 * consistency, we unlink all control messages from the
3146 * first mbuf chain in one shot and link them separately
3147 * onto a different chain.
3150 if (flags
& MSG_PEEK
) {
3151 if (controlp
!= NULL
) {
3152 if (*controlp
== NULL
) {
3155 *controlp
= m_copy(m
, 0, m
->m_len
);
3158 * If we failed to allocate an mbuf,
3159 * release any previously allocated
3160 * mbufs for control data. Return
3161 * an error. Keep the mbufs in the
3162 * socket as this is using
3165 if (*controlp
== NULL
) {
3170 controlp
= &(*controlp
)->m_next
;
3174 m
->m_nextpkt
= NULL
;
3176 sb_rcv
->sb_mb
= m
->m_next
;
3179 cme
= &(*cme
)->m_next
;
3182 } while (m
!= NULL
&& m
->m_type
== MT_CONTROL
);
3184 if (!(flags
& MSG_PEEK
)) {
3185 if (sb_rcv
->sb_mb
!= NULL
) {
3186 sb_rcv
->sb_mb
->m_nextpkt
= nextrecord
;
3188 sb_rcv
->sb_mb
= nextrecord
;
3189 SB_EMPTY_FIXUP(sb_rcv
);
3191 if (nextrecord
== NULL
) {
3192 sb_rcv
->sb_lastrecord
= m
;
3196 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive ctl");
3197 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive ctl");
3199 while (cm
!= NULL
) {
3204 cmsg_type
= mtod(cm
, struct cmsghdr
*)->cmsg_type
;
3207 * Call the protocol to externalize SCM_RIGHTS message
3208 * and return the modified message to the caller upon
3209 * success. Otherwise, all other control messages are
3210 * returned unmodified to the caller. Note that we
3211 * only get into this loop if MSG_PEEK is not set.
3213 if (pr
->pr_domain
->dom_externalize
!= NULL
&&
3214 cmsg_type
== SCM_RIGHTS
) {
3216 * Release socket lock: see 3903171. This
3217 * would also allow more records to be appended
3218 * to the socket buffer. We still have SB_LOCK
3219 * set on it, so we can be sure that the head
3220 * of the mbuf chain won't change.
3222 socket_unlock(so
, 0);
3223 error
= (*pr
->pr_domain
->dom_externalize
)(cm
);
3229 if (controlp
!= NULL
&& error
== 0) {
3231 controlp
= &(*controlp
)->m_next
;
3238 * Update the value of nextrecord in case we received new
3239 * records when the socket was unlocked above for
3240 * externalizing SCM_RIGHTS.
3243 nextrecord
= sb_rcv
->sb_mb
->m_nextpkt
;
3245 nextrecord
= sb_rcv
->sb_mb
;
3250 *nextrecordp
= nextrecord
;
3256 * Implement receive operations on a socket.
3257 * We depend on the way that records are added to the sockbuf
3258 * by sbappend*. In particular, each record (mbufs linked through m_next)
3259 * must begin with an address if the protocol so specifies,
3260 * followed by an optional mbuf or mbufs containing ancillary data,
3261 * and then zero or more mbufs of data.
3262 * In order to avoid blocking network interrupts for the entire time here,
3263 * we splx() while doing the actual copy to user space.
3264 * Although the sockbuf is locked, new data may still be appended,
3265 * and thus we must maintain consistency of the sockbuf during that time.
3267 * The caller may receive the data as a single mbuf chain by supplying
3268 * an mbuf **mp0 for use in returning the chain. The uio is then used
3269 * only for the count in uio_resid.
3271 * Returns: 0 Success
3276 * sblock:EWOULDBLOCK
3280 * sodelayed_copy:EFAULT
3281 * <pru_rcvoob>:EINVAL[TCP]
3282 * <pru_rcvoob>:EWOULDBLOCK[TCP]
3284 * <pr_domain->dom_externalize>:EMSGSIZE[AF_UNIX]
3285 * <pr_domain->dom_externalize>:ENOBUFS[AF_UNIX]
3286 * <pr_domain->dom_externalize>:???
3288 * Notes: Additional return values from calls through <pru_rcvoob> and
3289 * <pr_domain->dom_externalize> depend on protocols other than
3290 * TCP or AF_UNIX, which are documented above.
3293 soreceive(struct socket
*so
, struct sockaddr
**psa
, struct uio
*uio
,
3294 struct mbuf
**mp0
, struct mbuf
**controlp
, int *flagsp
)
3296 struct mbuf
*m
, **mp
, *ml
= NULL
;
3297 struct mbuf
*nextrecord
, *free_list
;
3298 int flags
, error
, offset
;
3300 struct protosw
*pr
= so
->so_proto
;
3302 user_ssize_t orig_resid
= uio_resid(uio
);
3303 user_ssize_t delayed_copy_len
;
3306 struct proc
*p
= current_proc();
3307 boolean_t en_tracing
= FALSE
;
3310 * Sanity check on the length passed by caller as we are making 'int'
3313 if (orig_resid
< 0 || orig_resid
> INT_MAX
) {
3317 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_START
, so
,
3318 uio_resid(uio
), so
->so_rcv
.sb_cc
, so
->so_rcv
.sb_lowat
,
3319 so
->so_rcv
.sb_hiwat
);
3322 so_update_last_owner_locked(so
, p
);
3323 so_update_policy(so
);
3325 #ifdef MORE_LOCKING_DEBUG
3326 if (so
->so_usecount
== 1) {
3327 panic("%s: so=%x no other reference on socket\n", __func__
, so
);
3335 if (controlp
!= NULL
) {
3338 if (flagsp
!= NULL
) {
3339 flags
= *flagsp
& ~MSG_EOR
;
3345 * If a recv attempt is made on a previously-accepted socket
3346 * that has been marked as inactive (disconnected), reject
3349 if (so
->so_flags
& SOF_DEFUNCT
) {
3350 struct sockbuf
*sb
= &so
->so_rcv
;
3353 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] (%d)\n",
3354 __func__
, proc_pid(p
), proc_best_name(p
),
3355 (uint64_t)DEBUG_KERNEL_ADDRPERM(so
),
3356 SOCK_DOM(so
), SOCK_TYPE(so
), error
);
3358 * This socket should have been disconnected and flushed
3359 * prior to being returned from sodefunct(); there should
3360 * be no data on its receive list, so panic otherwise.
3362 if (so
->so_state
& SS_DEFUNCT
) {
3363 sb_empty_assert(sb
, __func__
);
3365 socket_unlock(so
, 1);
3369 if ((so
->so_flags1
& SOF1_PRECONNECT_DATA
) &&
3370 pr
->pr_usrreqs
->pru_preconnect
) {
3372 * A user may set the CONNECT_RESUME_ON_READ_WRITE-flag but not
3373 * calling write() right after this. *If* the app calls a read
3374 * we do not want to block this read indefinetely. Thus,
3375 * we trigger a connect so that the session gets initiated.
3377 error
= (*pr
->pr_usrreqs
->pru_preconnect
)(so
);
3380 socket_unlock(so
, 1);
3385 if (ENTR_SHOULDTRACE
&&
3386 (SOCK_CHECK_DOM(so
, AF_INET
) || SOCK_CHECK_DOM(so
, AF_INET6
))) {
3388 * enable energy tracing for inet sockets that go over
3389 * non-loopback interfaces only.
3391 struct inpcb
*inp
= sotoinpcb(so
);
3392 if (inp
->inp_last_outifp
!= NULL
&&
3393 !(inp
->inp_last_outifp
->if_flags
& IFF_LOOPBACK
)) {
3395 KERNEL_ENERGYTRACE(kEnTrActKernSockRead
, DBG_FUNC_START
,
3396 VM_KERNEL_ADDRPERM(so
),
3397 ((so
->so_state
& SS_NBIO
) ?
3398 kEnTrFlagNonBlocking
: 0),
3399 (int64_t)orig_resid
);
3404 * When SO_WANTOOBFLAG is set we try to get out-of-band data
3405 * regardless of the flags argument. Here is the case were
3406 * out-of-band data is not inline.
3408 if ((flags
& MSG_OOB
) ||
3409 ((so
->so_options
& SO_WANTOOBFLAG
) != 0 &&
3410 (so
->so_options
& SO_OOBINLINE
) == 0 &&
3411 (so
->so_oobmark
|| (so
->so_state
& SS_RCVATMARK
)))) {
3412 m
= m_get(M_WAIT
, MT_DATA
);
3414 socket_unlock(so
, 1);
3415 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_END
,
3416 ENOBUFS
, 0, 0, 0, 0);
3419 error
= (*pr
->pr_usrreqs
->pru_rcvoob
)(so
, m
, flags
& MSG_PEEK
);
3423 socket_unlock(so
, 0);
3425 error
= uiomove(mtod(m
, caddr_t
),
3426 imin(uio_resid(uio
), m
->m_len
), uio
);
3428 } while (uio_resid(uio
) && error
== 0 && m
!= NULL
);
3435 if ((so
->so_options
& SO_WANTOOBFLAG
) != 0) {
3436 if (error
== EWOULDBLOCK
|| error
== EINVAL
) {
3438 * Let's try to get normal data:
3439 * EWOULDBLOCK: out-of-band data not
3440 * receive yet. EINVAL: out-of-band data
3445 } else if (error
== 0 && flagsp
!= NULL
) {
3449 socket_unlock(so
, 1);
3451 KERNEL_ENERGYTRACE(kEnTrActKernSockRead
, DBG_FUNC_END
,
3452 VM_KERNEL_ADDRPERM(so
), 0,
3453 (int64_t)(orig_resid
- uio_resid(uio
)));
3455 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_END
, error
,
3465 if (so
->so_state
& SS_ISCONFIRMING
&& uio_resid(uio
)) {
3466 (*pr
->pr_usrreqs
->pru_rcvd
)(so
, 0);
3470 delayed_copy_len
= 0;
3472 #ifdef MORE_LOCKING_DEBUG
3473 if (so
->so_usecount
<= 1) {
3474 printf("soreceive: sblock so=0x%llx ref=%d on socket\n",
3475 (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), so
->so_usecount
);
3479 * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE)
3480 * and if so just return to the caller. This could happen when
3481 * soreceive() is called by a socket upcall function during the
3482 * time the socket is freed. The socket buffer would have been
3483 * locked across the upcall, therefore we cannot put this thread
3484 * to sleep (else we will deadlock) or return EWOULDBLOCK (else
3485 * we may livelock), because the lock on the socket buffer will
3486 * only be released when the upcall routine returns to its caller.
3487 * Because the socket has been officially closed, there can be
3488 * no further read on it.
3490 * A multipath subflow socket would have its SS_NOFDREF set by
3491 * default, so check for SOF_MP_SUBFLOW socket flag; when the
3492 * socket is closed for real, SOF_MP_SUBFLOW would be cleared.
3494 if ((so
->so_state
& (SS_NOFDREF
| SS_CANTRCVMORE
)) ==
3495 (SS_NOFDREF
| SS_CANTRCVMORE
) && !(so
->so_flags
& SOF_MP_SUBFLOW
)) {
3496 socket_unlock(so
, 1);
3500 error
= sblock(&so
->so_rcv
, SBLOCKWAIT(flags
));
3502 socket_unlock(so
, 1);
3503 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_END
, error
,
3506 KERNEL_ENERGYTRACE(kEnTrActKernSockRead
, DBG_FUNC_END
,
3507 VM_KERNEL_ADDRPERM(so
), 0,
3508 (int64_t)(orig_resid
- uio_resid(uio
)));
3513 m
= so
->so_rcv
.sb_mb
;
3515 * If we have less data than requested, block awaiting more
3516 * (subject to any timeout) if:
3517 * 1. the current count is less than the low water mark, or
3518 * 2. MSG_WAITALL is set, and it is possible to do the entire
3519 * receive operation at once if we block (resid <= hiwat).
3520 * 3. MSG_DONTWAIT is not set
3521 * If MSG_WAITALL is set but resid is larger than the receive buffer,
3522 * we have to do the receive in sections, and thus risk returning
3523 * a short count if a timeout or signal occurs after we start.
3525 if (m
== NULL
|| (((flags
& MSG_DONTWAIT
) == 0 &&
3526 so
->so_rcv
.sb_cc
< uio_resid(uio
)) &&
3527 (so
->so_rcv
.sb_cc
< so
->so_rcv
.sb_lowat
||
3528 ((flags
& MSG_WAITALL
) && uio_resid(uio
) <= so
->so_rcv
.sb_hiwat
)) &&
3529 m
->m_nextpkt
== NULL
&& (pr
->pr_flags
& PR_ATOMIC
) == 0)) {
3531 * Panic if we notice inconsistencies in the socket's
3532 * receive list; both sb_mb and sb_cc should correctly
3533 * reflect the contents of the list, otherwise we may
3534 * end up with false positives during select() or poll()
3535 * which could put the application in a bad state.
3537 SB_MB_CHECK(&so
->so_rcv
);
3543 error
= so
->so_error
;
3544 if ((flags
& MSG_PEEK
) == 0) {
3549 if (so
->so_state
& SS_CANTRCVMORE
) {
3552 * Deal with half closed connections
3554 if ((so
->so_state
& SS_ISDISCONNECTED
) == 0 &&
3555 cfil_sock_data_pending(&so
->so_rcv
) != 0) {
3557 "so %llx ignore SS_CANTRCVMORE",
3558 (uint64_t)DEBUG_KERNEL_ADDRPERM(so
));
3560 #endif /* CONTENT_FILTER */
3567 for (; m
!= NULL
; m
= m
->m_next
) {
3568 if (m
->m_type
== MT_OOBDATA
|| (m
->m_flags
& M_EOR
)) {
3569 m
= so
->so_rcv
.sb_mb
;
3573 if ((so
->so_state
& (SS_ISCONNECTED
| SS_ISCONNECTING
)) == 0 &&
3574 (so
->so_proto
->pr_flags
& PR_CONNREQUIRED
)) {
3578 if (uio_resid(uio
) == 0) {
3582 if ((so
->so_state
& SS_NBIO
) ||
3583 (flags
& (MSG_DONTWAIT
| MSG_NBIO
))) {
3584 error
= EWOULDBLOCK
;
3587 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive sbwait 1");
3588 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive sbwait 1");
3589 sbunlock(&so
->so_rcv
, TRUE
); /* keep socket locked */
3590 #if EVEN_MORE_LOCKING_DEBUG
3592 printf("Waiting for socket data\n");
3596 error
= sbwait(&so
->so_rcv
);
3597 #if EVEN_MORE_LOCKING_DEBUG
3599 printf("SORECEIVE - sbwait returned %d\n", error
);
3602 if (so
->so_usecount
< 1) {
3603 panic("%s: after 2nd sblock so=%p ref=%d on socket\n",
3604 __func__
, so
, so
->so_usecount
);
3608 socket_unlock(so
, 1);
3609 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_END
, error
,
3612 KERNEL_ENERGYTRACE(kEnTrActKernSockRead
, DBG_FUNC_END
,
3613 VM_KERNEL_ADDRPERM(so
), 0,
3614 (int64_t)(orig_resid
- uio_resid(uio
)));
3621 OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgrcv
);
3622 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 1");
3623 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 1");
3624 nextrecord
= m
->m_nextpkt
;
3626 if ((pr
->pr_flags
& PR_ADDR
) && m
->m_type
== MT_SONAME
) {
3627 error
= soreceive_addr(p
, so
, psa
, flags
, &m
, &nextrecord
,
3629 if (error
== ERESTART
) {
3631 } else if (error
!= 0) {
3638 * Process one or more MT_CONTROL mbufs present before any data mbufs
3639 * in the first mbuf chain on the socket buffer. If MSG_PEEK, we
3640 * just copy the data; if !MSG_PEEK, we call into the protocol to
3641 * perform externalization.
3643 if (m
!= NULL
&& m
->m_type
== MT_CONTROL
) {
3644 error
= soreceive_ctl(so
, controlp
, flags
, &m
, &nextrecord
);
3652 * If the socket is a TCP socket with message delivery
3653 * enabled, then create a control msg to deliver the
3654 * relative TCP sequence number for this data. Waiting
3655 * until this point will protect against failures to
3656 * allocate an mbuf for control msgs.
3658 if (so
->so_type
== SOCK_STREAM
&& SOCK_PROTO(so
) == IPPROTO_TCP
&&
3659 (so
->so_flags
& SOF_ENABLE_MSGS
) && controlp
!= NULL
) {
3660 struct mbuf
*seq_cm
;
3662 seq_cm
= sbcreatecontrol((caddr_t
)&m
->m_pkthdr
.msg_seq
,
3663 sizeof(uint32_t), SCM_SEQNUM
, SOL_SOCKET
);
3664 if (seq_cm
== NULL
) {
3665 /* unable to allocate a control mbuf */
3670 controlp
= &seq_cm
->m_next
;
3674 if (!(flags
& MSG_PEEK
)) {
3676 * We get here because m points to an mbuf following
3677 * any MT_SONAME or MT_CONTROL mbufs which have been
3678 * processed above. In any case, m should be pointing
3679 * to the head of the mbuf chain, and the nextrecord
3680 * should be either NULL or equal to m->m_nextpkt.
3681 * See comments above about SB_LOCK.
3683 if (m
!= so
->so_rcv
.sb_mb
||
3684 m
->m_nextpkt
!= nextrecord
) {
3685 panic("%s: post-control !sync so=%p m=%p "
3686 "nextrecord=%p\n", __func__
, so
, m
,
3690 if (nextrecord
== NULL
) {
3691 so
->so_rcv
.sb_lastrecord
= m
;
3695 if (type
== MT_OOBDATA
) {
3699 if (!(flags
& MSG_PEEK
)) {
3700 SB_EMPTY_FIXUP(&so
->so_rcv
);
3703 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 2");
3704 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 2");
3709 if (!(flags
& MSG_PEEK
) && uio_resid(uio
) > sorecvmincopy
) {
3718 (uio_resid(uio
) - delayed_copy_len
) > 0 && error
== 0) {
3719 if (m
->m_type
== MT_OOBDATA
) {
3720 if (type
!= MT_OOBDATA
) {
3723 } else if (type
== MT_OOBDATA
) {
3727 * Make sure to allways set MSG_OOB event when getting
3728 * out of band data inline.
3730 if ((so
->so_options
& SO_WANTOOBFLAG
) != 0 &&
3731 (so
->so_options
& SO_OOBINLINE
) != 0 &&
3732 (so
->so_state
& SS_RCVATMARK
) != 0) {
3735 so
->so_state
&= ~SS_RCVATMARK
;
3736 len
= uio_resid(uio
) - delayed_copy_len
;
3737 if (so
->so_oobmark
&& len
> so
->so_oobmark
- offset
) {
3738 len
= so
->so_oobmark
- offset
;
3740 if (len
> m
->m_len
- moff
) {
3741 len
= m
->m_len
- moff
;
3744 * If mp is set, just pass back the mbufs.
3745 * Otherwise copy them out via the uio, then free.
3746 * Sockbuf must be consistent here (points to current mbuf,
3747 * it points to next record) when we drop priority;
3748 * we must note any additions to the sockbuf when we
3749 * block interrupts again.
3752 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive uiomove");
3753 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive uiomove");
3754 if (can_delay
&& len
== m
->m_len
) {
3756 * only delay the copy if we're consuming the
3757 * mbuf and we're NOT in MSG_PEEK mode
3758 * and we have enough data to make it worthwile
3759 * to drop and retake the lock... can_delay
3760 * reflects the state of the 2 latter
3761 * constraints moff should always be zero
3764 delayed_copy_len
+= len
;
3766 if (delayed_copy_len
) {
3767 error
= sodelayed_copy(so
, uio
,
3768 &free_list
, &delayed_copy_len
);
3774 * can only get here if MSG_PEEK is not
3775 * set therefore, m should point at the
3776 * head of the rcv queue; if it doesn't,
3777 * it means something drastically
3778 * changed while we were out from behind
3779 * the lock in sodelayed_copy. perhaps
3780 * a RST on the stream. in any event,
3781 * the stream has been interrupted. it's
3782 * probably best just to return whatever
3783 * data we've moved and let the caller
3786 if (m
!= so
->so_rcv
.sb_mb
) {
3790 socket_unlock(so
, 0);
3791 error
= uiomove(mtod(m
, caddr_t
) + moff
,
3800 uio_setresid(uio
, (uio_resid(uio
) - len
));
3802 if (len
== m
->m_len
- moff
) {
3803 if (m
->m_flags
& M_EOR
) {
3806 if (flags
& MSG_PEEK
) {
3810 nextrecord
= m
->m_nextpkt
;
3811 sbfree(&so
->so_rcv
, m
);
3812 m
->m_nextpkt
= NULL
;
3815 * If this packet is an unordered packet
3816 * (indicated by M_UNORDERED_DATA flag), remove
3817 * the additional bytes added to the
3818 * receive socket buffer size.
3820 if ((so
->so_flags
& SOF_ENABLE_MSGS
) &&
3822 (m
->m_flags
& M_UNORDERED_DATA
) &&
3823 sbreserve(&so
->so_rcv
,
3824 so
->so_rcv
.sb_hiwat
- m
->m_len
)) {
3825 if (so
->so_msg_state
->msg_uno_bytes
>
3828 msg_uno_bytes
-= m
->m_len
;
3833 m
->m_flags
&= ~M_UNORDERED_DATA
;
3839 so
->so_rcv
.sb_mb
= m
= m
->m_next
;
3842 if (free_list
== NULL
) {
3848 so
->so_rcv
.sb_mb
= m
= m
->m_next
;
3852 m
->m_nextpkt
= nextrecord
;
3853 if (nextrecord
== NULL
) {
3854 so
->so_rcv
.sb_lastrecord
= m
;
3857 so
->so_rcv
.sb_mb
= nextrecord
;
3858 SB_EMPTY_FIXUP(&so
->so_rcv
);
3860 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 3");
3861 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 3");
3864 if (flags
& MSG_PEEK
) {
3870 if (flags
& MSG_DONTWAIT
) {
3871 copy_flag
= M_DONTWAIT
;
3875 *mp
= m_copym(m
, 0, len
, copy_flag
);
3877 * Failed to allocate an mbuf?
3878 * Adjust uio_resid back, it was
3879 * adjusted down by len bytes which
3880 * we didn't copy over.
3884 (uio_resid(uio
) + len
));
3890 so
->so_rcv
.sb_cc
-= len
;
3893 if (so
->so_oobmark
) {
3894 if ((flags
& MSG_PEEK
) == 0) {
3895 so
->so_oobmark
-= len
;
3896 if (so
->so_oobmark
== 0) {
3897 so
->so_state
|= SS_RCVATMARK
;
3899 * delay posting the actual event until
3900 * after any delayed copy processing
3908 if (offset
== so
->so_oobmark
) {
3913 if (flags
& MSG_EOR
) {
3917 * If the MSG_WAITALL or MSG_WAITSTREAM flag is set
3918 * (for non-atomic socket), we must not quit until
3919 * "uio->uio_resid == 0" or an error termination.
3920 * If a signal/timeout occurs, return with a short
3921 * count but without error. Keep sockbuf locked
3922 * against other readers.
3924 while (flags
& (MSG_WAITALL
| MSG_WAITSTREAM
) && m
== NULL
&&
3925 (uio_resid(uio
) - delayed_copy_len
) > 0 &&
3926 !sosendallatonce(so
) && !nextrecord
) {
3927 if (so
->so_error
|| ((so
->so_state
& SS_CANTRCVMORE
)
3929 && cfil_sock_data_pending(&so
->so_rcv
) == 0
3930 #endif /* CONTENT_FILTER */
3936 * Depending on the protocol (e.g. TCP), the following
3937 * might cause the socket lock to be dropped and later
3938 * be reacquired, and more data could have arrived and
3939 * have been appended to the receive socket buffer by
3940 * the time it returns. Therefore, we only sleep in
3941 * sbwait() below if and only if the socket buffer is
3942 * empty, in order to avoid a false sleep.
3944 if (pr
->pr_flags
& PR_WANTRCVD
&& so
->so_pcb
&&
3945 (((struct inpcb
*)so
->so_pcb
)->inp_state
!=
3946 INPCB_STATE_DEAD
)) {
3947 (*pr
->pr_usrreqs
->pru_rcvd
)(so
, flags
);
3950 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive sbwait 2");
3951 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive sbwait 2");
3953 if (so
->so_rcv
.sb_mb
== NULL
&& sbwait(&so
->so_rcv
)) {
3958 * have to wait until after we get back from the sbwait
3959 * to do the copy because we will drop the lock if we
3960 * have enough data that has been delayed... by dropping
3961 * the lock we open up a window allowing the netisr
3962 * thread to process the incoming packets and to change
3963 * the state of this socket... we're issuing the sbwait
3964 * because the socket is empty and we're expecting the
3965 * netisr thread to wake us up when more packets arrive;
3966 * if we allow that processing to happen and then sbwait
3967 * we could stall forever with packets sitting in the
3968 * socket if no further packets arrive from the remote
3971 * we want to copy before we've collected all the data
3972 * to satisfy this request to allow the copy to overlap
3973 * the incoming packet processing on an MP system
3975 if (delayed_copy_len
> sorecvmincopy
&&
3976 (delayed_copy_len
> (so
->so_rcv
.sb_hiwat
/ 2))) {
3977 error
= sodelayed_copy(so
, uio
,
3978 &free_list
, &delayed_copy_len
);
3984 m
= so
->so_rcv
.sb_mb
;
3986 nextrecord
= m
->m_nextpkt
;
3988 SB_MB_CHECK(&so
->so_rcv
);
3991 #ifdef MORE_LOCKING_DEBUG
3992 if (so
->so_usecount
<= 1) {
3993 panic("%s: after big while so=%p ref=%d on socket\n",
3994 __func__
, so
, so
->so_usecount
);
3999 if (m
!= NULL
&& pr
->pr_flags
& PR_ATOMIC
) {
4000 if (so
->so_options
& SO_DONTTRUNC
) {
4001 flags
|= MSG_RCVMORE
;
4004 if ((flags
& MSG_PEEK
) == 0) {
4005 (void) sbdroprecord(&so
->so_rcv
);
4011 * pru_rcvd below (for TCP) may cause more data to be received
4012 * if the socket lock is dropped prior to sending the ACK; some
4013 * legacy OpenTransport applications don't handle this well
4014 * (if it receives less data than requested while MSG_HAVEMORE
4015 * is set), and so we set the flag now based on what we know
4016 * prior to calling pru_rcvd.
4018 if ((so
->so_options
& SO_WANTMORE
) && so
->so_rcv
.sb_cc
> 0) {
4019 flags
|= MSG_HAVEMORE
;
4022 if ((flags
& MSG_PEEK
) == 0) {
4024 so
->so_rcv
.sb_mb
= nextrecord
;
4026 * First part is an inline SB_EMPTY_FIXUP(). Second
4027 * part makes sure sb_lastrecord is up-to-date if
4028 * there is still data in the socket buffer.
4030 if (so
->so_rcv
.sb_mb
== NULL
) {
4031 so
->so_rcv
.sb_mbtail
= NULL
;
4032 so
->so_rcv
.sb_lastrecord
= NULL
;
4033 } else if (nextrecord
->m_nextpkt
== NULL
) {
4034 so
->so_rcv
.sb_lastrecord
= nextrecord
;
4036 SB_MB_CHECK(&so
->so_rcv
);
4038 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 4");
4039 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 4");
4040 if (pr
->pr_flags
& PR_WANTRCVD
&& so
->so_pcb
) {
4041 (*pr
->pr_usrreqs
->pru_rcvd
)(so
, flags
);
4045 if (delayed_copy_len
) {
4046 error
= sodelayed_copy(so
, uio
, &free_list
, &delayed_copy_len
);
4051 if (free_list
!= NULL
) {
4052 m_freem_list(free_list
);
4056 postevent(so
, 0, EV_OOB
);
4059 if (orig_resid
== uio_resid(uio
) && orig_resid
&&
4060 (flags
& MSG_EOR
) == 0 && (so
->so_state
& SS_CANTRCVMORE
) == 0) {
4061 sbunlock(&so
->so_rcv
, TRUE
); /* keep socket locked */
4065 if (flagsp
!= NULL
) {
4069 #ifdef MORE_LOCKING_DEBUG
4070 if (so
->so_usecount
<= 1) {
4071 panic("%s: release so=%p ref=%d on socket\n", __func__
,
4072 so
, so
->so_usecount
);
4076 if (delayed_copy_len
) {
4077 error
= sodelayed_copy(so
, uio
, &free_list
, &delayed_copy_len
);
4080 if (free_list
!= NULL
) {
4081 m_freem_list(free_list
);
4084 sbunlock(&so
->so_rcv
, FALSE
); /* will unlock socket */
4087 KERNEL_ENERGYTRACE(kEnTrActKernSockRead
, DBG_FUNC_END
,
4088 VM_KERNEL_ADDRPERM(so
),
4089 ((error
== EWOULDBLOCK
) ? kEnTrFlagNoWork
: 0),
4090 (int64_t)(orig_resid
- uio_resid(uio
)));
4092 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_END
, so
, uio_resid(uio
),
4093 so
->so_rcv
.sb_cc
, 0, error
);
4099 * Returns: 0 Success
4103 sodelayed_copy(struct socket
*so
, struct uio
*uio
, struct mbuf
**free_list
,
4104 user_ssize_t
*resid
)
4111 socket_unlock(so
, 0);
4113 while (m
!= NULL
&& error
== 0) {
4114 error
= uiomove(mtod(m
, caddr_t
), (int)m
->m_len
, uio
);
4117 m_freem_list(*free_list
);
4128 sodelayed_copy_list(struct socket
*so
, struct recv_msg_elem
*msgarray
,
4129 u_int uiocnt
, struct mbuf
**free_list
, user_ssize_t
*resid
)
4133 struct mbuf
*ml
, *m
;
4137 for (ml
= *free_list
, i
= 0; ml
!= NULL
&& i
< uiocnt
;
4138 ml
= ml
->m_nextpkt
, i
++) {
4139 auio
= msgarray
[i
].uio
;
4140 for (m
= ml
; m
!= NULL
; m
= m
->m_next
) {
4141 error
= uiomove(mtod(m
, caddr_t
), m
->m_len
, auio
);
4148 m_freem_list(*free_list
);
4157 soreceive_list(struct socket
*so
, struct recv_msg_elem
*msgarray
, u_int uiocnt
,
4161 struct mbuf
*nextrecord
;
4162 struct mbuf
*ml
= NULL
, *free_list
= NULL
, *free_tail
= NULL
;
4164 user_ssize_t len
, pktlen
, delayed_copy_len
= 0;
4165 struct protosw
*pr
= so
->so_proto
;
4167 struct proc
*p
= current_proc();
4168 struct uio
*auio
= NULL
;
4171 struct sockaddr
**psa
= NULL
;
4172 struct mbuf
**controlp
= NULL
;
4175 struct mbuf
*free_others
= NULL
;
4177 KERNEL_DEBUG(DBG_FNC_SORECEIVE_LIST
| DBG_FUNC_START
,
4179 so
->so_rcv
.sb_cc
, so
->so_rcv
.sb_lowat
, so
->so_rcv
.sb_hiwat
);
4183 * - Only supports don't wait flags
4184 * - Only support datagram sockets (could be extended to raw)
4186 * - Protocol must support packet chains
4187 * - The uio array is NULL (should we panic?)
4189 if (flagsp
!= NULL
) {
4194 if (flags
& ~(MSG_PEEK
| MSG_WAITALL
| MSG_DONTWAIT
| MSG_NEEDSA
|
4196 printf("%s invalid flags 0x%x\n", __func__
, flags
);
4200 if (so
->so_type
!= SOCK_DGRAM
) {
4204 if (sosendallatonce(so
) == 0) {
4208 if (so
->so_proto
->pr_usrreqs
->pru_send_list
== NULL
) {
4209 error
= EPROTONOSUPPORT
;
4212 if (msgarray
== NULL
) {
4213 printf("%s uioarray is NULL\n", __func__
);
4218 printf("%s uiocnt is 0\n", __func__
);
4223 * Sanity check on the length passed by caller as we are making 'int'
4226 resid
= recv_msg_array_resid(msgarray
, uiocnt
);
4227 if (resid
< 0 || resid
> INT_MAX
) {
4232 if (!(flags
& MSG_PEEK
) && sorecvmincopy
> 0) {
4239 so_update_last_owner_locked(so
, p
);
4240 so_update_policy(so
);
4243 so_update_necp_policy(so
, NULL
, NULL
);
4247 * If a recv attempt is made on a previously-accepted socket
4248 * that has been marked as inactive (disconnected), reject
4251 if (so
->so_flags
& SOF_DEFUNCT
) {
4252 struct sockbuf
*sb
= &so
->so_rcv
;
4255 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] (%d)\n",
4256 __func__
, proc_pid(p
), proc_best_name(p
),
4257 (uint64_t)DEBUG_KERNEL_ADDRPERM(so
),
4258 SOCK_DOM(so
), SOCK_TYPE(so
), error
);
4260 * This socket should have been disconnected and flushed
4261 * prior to being returned from sodefunct(); there should
4262 * be no data on its receive list, so panic otherwise.
4264 if (so
->so_state
& SS_DEFUNCT
) {
4265 sb_empty_assert(sb
, __func__
);
4272 * The uio may be empty
4274 if (npkts
>= uiocnt
) {
4280 * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE)
4281 * and if so just return to the caller. This could happen when
4282 * soreceive() is called by a socket upcall function during the
4283 * time the socket is freed. The socket buffer would have been
4284 * locked across the upcall, therefore we cannot put this thread
4285 * to sleep (else we will deadlock) or return EWOULDBLOCK (else
4286 * we may livelock), because the lock on the socket buffer will
4287 * only be released when the upcall routine returns to its caller.
4288 * Because the socket has been officially closed, there can be
4289 * no further read on it.
4291 if ((so
->so_state
& (SS_NOFDREF
| SS_CANTRCVMORE
)) ==
4292 (SS_NOFDREF
| SS_CANTRCVMORE
)) {
4297 error
= sblock(&so
->so_rcv
, SBLOCKWAIT(flags
));
4303 m
= so
->so_rcv
.sb_mb
;
4305 * Block awaiting more datagram if needed
4307 if (m
== NULL
|| (((flags
& MSG_DONTWAIT
) == 0 &&
4308 (so
->so_rcv
.sb_cc
< so
->so_rcv
.sb_lowat
||
4309 ((flags
& MSG_WAITALL
) && npkts
< uiocnt
))))) {
4311 * Panic if we notice inconsistencies in the socket's
4312 * receive list; both sb_mb and sb_cc should correctly
4313 * reflect the contents of the list, otherwise we may
4314 * end up with false positives during select() or poll()
4315 * which could put the application in a bad state.
4317 SB_MB_CHECK(&so
->so_rcv
);
4320 error
= so
->so_error
;
4321 if ((flags
& MSG_PEEK
) == 0) {
4326 if (so
->so_state
& SS_CANTRCVMORE
) {
4329 if ((so
->so_state
& (SS_ISCONNECTED
| SS_ISCONNECTING
)) == 0 &&
4330 (so
->so_proto
->pr_flags
& PR_CONNREQUIRED
)) {
4334 if ((so
->so_state
& SS_NBIO
) ||
4335 (flags
& (MSG_DONTWAIT
| MSG_NBIO
))) {
4336 error
= EWOULDBLOCK
;
4340 * Do not block if we got some data
4342 if (free_list
!= NULL
) {
4347 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive sbwait 1");
4348 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive sbwait 1");
4350 sbunlock(&so
->so_rcv
, TRUE
); /* keep socket locked */
4353 error
= sbwait(&so
->so_rcv
);
4360 OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgrcv
);
4361 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 1");
4362 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 1");
4365 * Consume the current uio index as we have a datagram
4367 auio
= msgarray
[npkts
].uio
;
4368 resid
= uio_resid(auio
);
4369 msgarray
[npkts
].which
|= SOCK_MSG_DATA
;
4370 psa
= (msgarray
[npkts
].which
& SOCK_MSG_SA
) ?
4371 &msgarray
[npkts
].psa
: NULL
;
4372 controlp
= (msgarray
[npkts
].which
& SOCK_MSG_CONTROL
) ?
4373 &msgarray
[npkts
].controlp
: NULL
;
4375 nextrecord
= m
->m_nextpkt
;
4377 if ((pr
->pr_flags
& PR_ADDR
) && m
->m_type
== MT_SONAME
) {
4378 error
= soreceive_addr(p
, so
, psa
, flags
, &m
, &nextrecord
, 1);
4379 if (error
== ERESTART
) {
4381 } else if (error
!= 0) {
4386 if (m
!= NULL
&& m
->m_type
== MT_CONTROL
) {
4387 error
= soreceive_ctl(so
, controlp
, flags
, &m
, &nextrecord
);
4393 if (m
->m_pkthdr
.len
== 0) {
4394 printf("%s:%d so %llx pkt %llx type %u pktlen null\n",
4396 (uint64_t)DEBUG_KERNEL_ADDRPERM(so
),
4397 (uint64_t)DEBUG_KERNEL_ADDRPERM(m
),
4402 * Loop to copy the mbufs of the current record
4403 * Support zero length packets
4407 while (m
!= NULL
&& (len
= resid
- pktlen
) >= 0 && error
== 0) {
4408 if (m
->m_len
== 0) {
4409 panic("%p m_len zero", m
);
4411 if (m
->m_type
== 0) {
4412 panic("%p m_type zero", m
);
4415 * Clip to the residual length
4417 if (len
> m
->m_len
) {
4422 * Copy the mbufs via the uio or delay the copy
4423 * Sockbuf must be consistent here (points to current mbuf,
4424 * it points to next record) when we drop priority;
4425 * we must note any additions to the sockbuf when we
4426 * block interrupts again.
4428 if (len
> 0 && can_delay
== 0) {
4429 socket_unlock(so
, 0);
4430 error
= uiomove(mtod(m
, caddr_t
), (int)len
, auio
);
4436 delayed_copy_len
+= len
;
4439 if (len
== m
->m_len
) {
4441 * m was entirely copied
4443 sbfree(&so
->so_rcv
, m
);
4444 nextrecord
= m
->m_nextpkt
;
4445 m
->m_nextpkt
= NULL
;
4448 * Set the first packet to the head of the free list
4450 if (free_list
== NULL
) {
4454 * Link current packet to tail of free list
4457 if (free_tail
!= NULL
) {
4458 free_tail
->m_nextpkt
= m
;
4463 * Link current mbuf to last mbuf of current packet
4471 * Move next buf to head of socket buffer
4473 so
->so_rcv
.sb_mb
= m
= ml
->m_next
;
4477 m
->m_nextpkt
= nextrecord
;
4478 if (nextrecord
== NULL
) {
4479 so
->so_rcv
.sb_lastrecord
= m
;
4482 so
->so_rcv
.sb_mb
= nextrecord
;
4483 SB_EMPTY_FIXUP(&so
->so_rcv
);
4485 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 3");
4486 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 3");
4489 * Stop the loop on partial copy
4494 #ifdef MORE_LOCKING_DEBUG
4495 if (so
->so_usecount
<= 1) {
4496 panic("%s: after big while so=%llx ref=%d on socket\n",
4498 (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), so
->so_usecount
);
4503 * Tell the caller we made a partial copy
4506 if (so
->so_options
& SO_DONTTRUNC
) {
4508 * Copyout first the freelist then the partial mbuf
4510 socket_unlock(so
, 0);
4511 if (delayed_copy_len
) {
4512 error
= sodelayed_copy_list(so
, msgarray
,
4513 uiocnt
, &free_list
, &delayed_copy_len
);
4517 error
= uiomove(mtod(m
, caddr_t
), (int)len
,
4527 so
->so_rcv
.sb_cc
-= len
;
4528 flags
|= MSG_RCVMORE
;
4530 (void) sbdroprecord(&so
->so_rcv
);
4531 nextrecord
= so
->so_rcv
.sb_mb
;
4538 so
->so_rcv
.sb_mb
= nextrecord
;
4540 * First part is an inline SB_EMPTY_FIXUP(). Second
4541 * part makes sure sb_lastrecord is up-to-date if
4542 * there is still data in the socket buffer.
4544 if (so
->so_rcv
.sb_mb
== NULL
) {
4545 so
->so_rcv
.sb_mbtail
= NULL
;
4546 so
->so_rcv
.sb_lastrecord
= NULL
;
4547 } else if (nextrecord
->m_nextpkt
== NULL
) {
4548 so
->so_rcv
.sb_lastrecord
= nextrecord
;
4550 SB_MB_CHECK(&so
->so_rcv
);
4552 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 4");
4553 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 4");
4556 * We can continue to the next packet as long as:
4557 * - We haven't exhausted the uio array
4558 * - There was no error
4559 * - A packet was not truncated
4560 * - We can still receive more data
4562 if (npkts
< uiocnt
&& error
== 0 &&
4563 (flags
& (MSG_RCVMORE
| MSG_TRUNC
)) == 0 &&
4564 (so
->so_state
& SS_CANTRCVMORE
) == 0) {
4565 sbunlock(&so
->so_rcv
, TRUE
); /* keep socket locked */
4570 if (flagsp
!= NULL
) {
4576 * pru_rcvd may cause more data to be received if the socket lock
4577 * is dropped so we set MSG_HAVEMORE now based on what we know.
4578 * That way the caller won't be surprised if it receives less data
4581 if ((so
->so_options
& SO_WANTMORE
) && so
->so_rcv
.sb_cc
> 0) {
4582 flags
|= MSG_HAVEMORE
;
4585 if (pr
->pr_flags
& PR_WANTRCVD
&& so
->so_pcb
) {
4586 (*pr
->pr_usrreqs
->pru_rcvd
)(so
, flags
);
4590 sbunlock(&so
->so_rcv
, FALSE
); /* will unlock socket */
4592 socket_unlock(so
, 1);
4595 if (delayed_copy_len
) {
4596 error
= sodelayed_copy_list(so
, msgarray
, uiocnt
,
4597 &free_list
, &delayed_copy_len
);
4601 * Amortize the cost of freeing the mbufs
4603 if (free_list
!= NULL
) {
4604 m_freem_list(free_list
);
4606 if (free_others
!= NULL
) {
4607 m_freem_list(free_others
);
4610 KERNEL_DEBUG(DBG_FNC_SORECEIVE_LIST
| DBG_FUNC_END
, error
,
4616 so_statistics_event_to_nstat_event(int64_t *input_options
,
4617 uint64_t *nstat_event
)
4620 switch (*input_options
) {
4621 case SO_STATISTICS_EVENT_ENTER_CELLFALLBACK
:
4622 *nstat_event
= NSTAT_EVENT_SRC_ENTER_CELLFALLBACK
;
4624 case SO_STATISTICS_EVENT_EXIT_CELLFALLBACK
:
4625 *nstat_event
= NSTAT_EVENT_SRC_EXIT_CELLFALLBACK
;
4627 #if (DEBUG || DEVELOPMENT)
4628 case SO_STATISTICS_EVENT_RESERVED_1
:
4629 *nstat_event
= NSTAT_EVENT_SRC_RESERVED_1
;
4631 case SO_STATISTICS_EVENT_RESERVED_2
:
4632 *nstat_event
= NSTAT_EVENT_SRC_RESERVED_2
;
4634 #endif /* (DEBUG || DEVELOPMENT) */
4643 * Returns: 0 Success
4646 * <pru_shutdown>:EINVAL
4647 * <pru_shutdown>:EADDRNOTAVAIL[TCP]
4648 * <pru_shutdown>:ENOBUFS[TCP]
4649 * <pru_shutdown>:EMSGSIZE[TCP]
4650 * <pru_shutdown>:EHOSTUNREACH[TCP]
4651 * <pru_shutdown>:ENETUNREACH[TCP]
4652 * <pru_shutdown>:ENETDOWN[TCP]
4653 * <pru_shutdown>:ENOMEM[TCP]
4654 * <pru_shutdown>:EACCES[TCP]
4655 * <pru_shutdown>:EMSGSIZE[TCP]
4656 * <pru_shutdown>:ENOBUFS[TCP]
4657 * <pru_shutdown>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
4658 * <pru_shutdown>:??? [other protocol families]
4661 soshutdown(struct socket
*so
, int how
)
4665 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN
| DBG_FUNC_START
, how
, 0, 0, 0, 0);
4673 (SS_ISCONNECTED
| SS_ISCONNECTING
| SS_ISDISCONNECTING
)) == 0) {
4676 error
= soshutdownlock(so
, how
);
4678 socket_unlock(so
, 1);
4685 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN
| DBG_FUNC_END
, how
, error
, 0, 0, 0);
4691 soshutdownlock_final(struct socket
*so
, int how
)
4693 struct protosw
*pr
= so
->so_proto
;
4696 sflt_notify(so
, sock_evt_shutdown
, &how
);
4698 if (how
!= SHUT_WR
) {
4699 if ((so
->so_state
& SS_CANTRCVMORE
) != 0) {
4700 /* read already shut down */
4705 postevent(so
, 0, EV_RCLOSED
);
4707 if (how
!= SHUT_RD
) {
4708 if ((so
->so_state
& SS_CANTSENDMORE
) != 0) {
4709 /* write already shut down */
4713 error
= (*pr
->pr_usrreqs
->pru_shutdown
)(so
);
4714 postevent(so
, 0, EV_WCLOSED
);
4717 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN
, how
, 1, 0, 0, 0);
4722 soshutdownlock(struct socket
*so
, int how
)
4728 * A content filter may delay the actual shutdown until it
4729 * has processed the pending data
4731 if (so
->so_flags
& SOF_CONTENT_FILTER
) {
4732 error
= cfil_sock_shutdown(so
, &how
);
4733 if (error
== EJUSTRETURN
) {
4736 } else if (error
!= 0) {
4740 #endif /* CONTENT_FILTER */
4742 error
= soshutdownlock_final(so
, how
);
4749 sowflush(struct socket
*so
)
4751 struct sockbuf
*sb
= &so
->so_snd
;
4754 * Obtain lock on the socket buffer (SB_LOCK). This is required
4755 * to prevent the socket buffer from being unexpectedly altered
4756 * while it is used by another thread in socket send/receive.
4758 * sblock() must not fail here, hence the assertion.
4760 (void) sblock(sb
, SBL_WAIT
| SBL_NOINTR
| SBL_IGNDEFUNCT
);
4761 VERIFY(sb
->sb_flags
& SB_LOCK
);
4763 sb
->sb_flags
&= ~(SB_SEL
| SB_UPCALL
);
4764 sb
->sb_flags
|= SB_DROP
;
4765 sb
->sb_upcall
= NULL
;
4766 sb
->sb_upcallarg
= NULL
;
4768 sbunlock(sb
, TRUE
); /* keep socket locked */
4770 selthreadclear(&sb
->sb_sel
);
4775 sorflush(struct socket
*so
)
4777 struct sockbuf
*sb
= &so
->so_rcv
;
4778 struct protosw
*pr
= so
->so_proto
;
4781 lck_mtx_t
*mutex_held
;
4783 * XXX: This code is currently commented out, because we may get here
4784 * as part of sofreelastref(), and at that time, pr_getlock() may no
4785 * longer be able to return us the lock; this will be fixed in future.
4787 if (so
->so_proto
->pr_getlock
!= NULL
) {
4788 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
4790 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
4793 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
);
4796 sflt_notify(so
, sock_evt_flush_read
, NULL
);
4801 * Obtain lock on the socket buffer (SB_LOCK). This is required
4802 * to prevent the socket buffer from being unexpectedly altered
4803 * while it is used by another thread in socket send/receive.
4805 * sblock() must not fail here, hence the assertion.
4807 (void) sblock(sb
, SBL_WAIT
| SBL_NOINTR
| SBL_IGNDEFUNCT
);
4808 VERIFY(sb
->sb_flags
& SB_LOCK
);
4811 * Copy only the relevant fields from "sb" to "asb" which we
4812 * need for sbrelease() to function. In particular, skip
4813 * sb_sel as it contains the wait queue linkage, which would
4814 * wreak havoc if we were to issue selthreadclear() on "asb".
4815 * Make sure to not carry over SB_LOCK in "asb", as we need
4816 * to acquire it later as part of sbrelease().
4818 bzero(&asb
, sizeof(asb
));
4819 asb
.sb_cc
= sb
->sb_cc
;
4820 asb
.sb_hiwat
= sb
->sb_hiwat
;
4821 asb
.sb_mbcnt
= sb
->sb_mbcnt
;
4822 asb
.sb_mbmax
= sb
->sb_mbmax
;
4823 asb
.sb_ctl
= sb
->sb_ctl
;
4824 asb
.sb_lowat
= sb
->sb_lowat
;
4825 asb
.sb_mb
= sb
->sb_mb
;
4826 asb
.sb_mbtail
= sb
->sb_mbtail
;
4827 asb
.sb_lastrecord
= sb
->sb_lastrecord
;
4828 asb
.sb_so
= sb
->sb_so
;
4829 asb
.sb_flags
= sb
->sb_flags
;
4830 asb
.sb_flags
&= ~(SB_LOCK
| SB_SEL
| SB_KNOTE
| SB_UPCALL
);
4831 asb
.sb_flags
|= SB_DROP
;
4834 * Ideally we'd bzero() these and preserve the ones we need;
4835 * but to do that we'd need to shuffle things around in the
4836 * sockbuf, and we can't do it now because there are KEXTS
4837 * that are directly referring to the socket structure.
4839 * Setting SB_DROP acts as a barrier to prevent further appends.
4840 * Clearing SB_SEL is done for selthreadclear() below.
4849 sb
->sb_mbtail
= NULL
;
4850 sb
->sb_lastrecord
= NULL
;
4851 sb
->sb_timeo
.tv_sec
= 0;
4852 sb
->sb_timeo
.tv_usec
= 0;
4853 sb
->sb_upcall
= NULL
;
4854 sb
->sb_upcallarg
= NULL
;
4855 sb
->sb_flags
&= ~(SB_SEL
| SB_UPCALL
);
4856 sb
->sb_flags
|= SB_DROP
;
4858 sbunlock(sb
, TRUE
); /* keep socket locked */
4861 * Note that selthreadclear() is called on the original "sb" and
4862 * not the local "asb" because of the way wait queue linkage is
4863 * implemented. Given that selwakeup() may be triggered, SB_SEL
4864 * should no longer be set (cleared above.)
4866 selthreadclear(&sb
->sb_sel
);
4868 if ((pr
->pr_flags
& PR_RIGHTS
) && pr
->pr_domain
->dom_dispose
) {
4869 (*pr
->pr_domain
->dom_dispose
)(asb
.sb_mb
);
4876 * Perhaps this routine, and sooptcopyout(), below, ought to come in
4877 * an additional variant to handle the case where the option value needs
4878 * to be some kind of integer, but not a specific size.
4879 * In addition to their use here, these functions are also called by the
4880 * protocol-level pr_ctloutput() routines.
4882 * Returns: 0 Success
4887 sooptcopyin(struct sockopt
*sopt
, void *buf
, size_t len
, size_t minlen
)
4892 * If the user gives us more than we wanted, we ignore it,
4893 * but if we don't get the minimum length the caller
4894 * wants, we return EINVAL. On success, sopt->sopt_valsize
4895 * is set to however much we actually retrieved.
4897 if ((valsize
= sopt
->sopt_valsize
) < minlen
) {
4900 if (valsize
> len
) {
4901 sopt
->sopt_valsize
= valsize
= len
;
4904 if (sopt
->sopt_p
!= kernproc
) {
4905 return copyin(sopt
->sopt_val
, buf
, valsize
);
4908 bcopy(CAST_DOWN(caddr_t
, sopt
->sopt_val
), buf
, valsize
);
4913 * sooptcopyin_timeval
4914 * Copy in a timeval value into tv_p, and take into account whether the
4915 * the calling process is 64-bit or 32-bit. Moved the sanity checking
4916 * code here so that we can verify the 64-bit tv_sec value before we lose
4917 * the top 32-bits assigning tv64.tv_sec to tv_p->tv_sec.
4920 sooptcopyin_timeval(struct sockopt
*sopt
, struct timeval
*tv_p
)
4924 if (proc_is64bit(sopt
->sopt_p
)) {
4925 struct user64_timeval tv64
;
4927 if (sopt
->sopt_valsize
< sizeof(tv64
)) {
4931 sopt
->sopt_valsize
= sizeof(tv64
);
4932 if (sopt
->sopt_p
!= kernproc
) {
4933 error
= copyin(sopt
->sopt_val
, &tv64
, sizeof(tv64
));
4938 bcopy(CAST_DOWN(caddr_t
, sopt
->sopt_val
), &tv64
,
4941 if (tv64
.tv_sec
< 0 || tv64
.tv_sec
> LONG_MAX
||
4942 tv64
.tv_usec
< 0 || tv64
.tv_usec
>= 1000000) {
4946 tv_p
->tv_sec
= tv64
.tv_sec
;
4947 tv_p
->tv_usec
= tv64
.tv_usec
;
4949 struct user32_timeval tv32
;
4951 if (sopt
->sopt_valsize
< sizeof(tv32
)) {
4955 sopt
->sopt_valsize
= sizeof(tv32
);
4956 if (sopt
->sopt_p
!= kernproc
) {
4957 error
= copyin(sopt
->sopt_val
, &tv32
, sizeof(tv32
));
4962 bcopy(CAST_DOWN(caddr_t
, sopt
->sopt_val
), &tv32
,
4967 * K64todo "comparison is always false due to
4968 * limited range of data type"
4970 if (tv32
.tv_sec
< 0 || tv32
.tv_sec
> LONG_MAX
||
4971 tv32
.tv_usec
< 0 || tv32
.tv_usec
>= 1000000) {
4975 tv_p
->tv_sec
= tv32
.tv_sec
;
4976 tv_p
->tv_usec
= tv32
.tv_usec
;
4982 soopt_cred_check(struct socket
*so
, int priv
, boolean_t allow_root
,
4983 boolean_t ignore_delegate
)
4985 kauth_cred_t cred
= NULL
;
4986 proc_t ep
= PROC_NULL
;
4990 if (ignore_delegate
== false && so
->so_flags
& SOF_DELEGATED
) {
4991 ep
= proc_find(so
->e_pid
);
4993 cred
= kauth_cred_proc_ref(ep
);
4997 uid
= kauth_cred_getuid(cred
? cred
: so
->so_cred
);
4999 /* uid is 0 for root */
5000 if (uid
!= 0 || !allow_root
) {
5001 error
= priv_check_cred(cred
? cred
: so
->so_cred
, priv
, 0);
5004 kauth_cred_unref(&cred
);
5006 if (ep
!= PROC_NULL
) {
5014 * Returns: 0 Success
5019 * sooptcopyin:EINVAL
5020 * sooptcopyin:EFAULT
5021 * sooptcopyin_timeval:EINVAL
5022 * sooptcopyin_timeval:EFAULT
5023 * sooptcopyin_timeval:EDOM
5024 * <pr_ctloutput>:EOPNOTSUPP[AF_UNIX]
5025 * <pr_ctloutput>:???w
5026 * sflt_attach_private:??? [whatever a filter author chooses]
5027 * <sf_setoption>:??? [whatever a filter author chooses]
5029 * Notes: Other <pru_listen> returns depend on the protocol family; all
5030 * <sf_listen> returns depend on what the filter author causes
5031 * their filter to return.
5034 sosetoptlock(struct socket
*so
, struct sockopt
*sopt
, int dolock
)
5037 int64_t long_optval
;
5040 #if CONFIG_MACF_SOCKET
5042 #endif /* MAC_SOCKET */
5044 if (sopt
->sopt_dir
!= SOPT_SET
) {
5045 sopt
->sopt_dir
= SOPT_SET
;
5052 if ((so
->so_state
& (SS_CANTRCVMORE
| SS_CANTSENDMORE
)) ==
5053 (SS_CANTRCVMORE
| SS_CANTSENDMORE
) &&
5054 (so
->so_flags
& SOF_NPX_SETOPTSHUT
) == 0) {
5055 /* the socket has been shutdown, no more sockopt's */
5060 error
= sflt_setsockopt(so
, sopt
);
5062 if (error
== EJUSTRETURN
) {
5068 if (sopt
->sopt_level
!= SOL_SOCKET
) {
5069 if (so
->so_proto
!= NULL
&&
5070 so
->so_proto
->pr_ctloutput
!= NULL
) {
5071 error
= (*so
->so_proto
->pr_ctloutput
)(so
, sopt
);
5074 error
= ENOPROTOOPT
;
5077 * Allow socket-level (SOL_SOCKET) options to be filtered by
5078 * the protocol layer, if needed. A zero value returned from
5079 * the handler means use default socket-level processing as
5080 * done by the rest of this routine. Otherwise, any other
5081 * return value indicates that the option is unsupported.
5083 if (so
->so_proto
!= NULL
&& (error
= so
->so_proto
->pr_usrreqs
->
5084 pru_socheckopt(so
, sopt
)) != 0) {
5089 switch (sopt
->sopt_name
) {
5092 error
= sooptcopyin(sopt
, &l
, sizeof(l
), sizeof(l
));
5097 so
->so_linger
= (sopt
->sopt_name
== SO_LINGER
) ?
5098 l
.l_linger
: l
.l_linger
* hz
;
5099 if (l
.l_onoff
!= 0) {
5100 so
->so_options
|= SO_LINGER
;
5102 so
->so_options
&= ~SO_LINGER
;
5109 case SO_USELOOPBACK
:
5115 case SO_TIMESTAMP_MONOTONIC
:
5116 case SO_TIMESTAMP_CONTINUOUS
:
5119 case SO_WANTOOBFLAG
:
5120 case SO_NOWAKEFROMSLEEP
:
5121 case SO_NOAPNFALLBK
:
5122 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
5128 so
->so_options
|= sopt
->sopt_name
;
5130 so
->so_options
&= ~sopt
->sopt_name
;
5138 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
5145 * Values < 1 make no sense for any of these
5146 * options, so disallow them.
5153 switch (sopt
->sopt_name
) {
5156 struct sockbuf
*sb
=
5157 (sopt
->sopt_name
== SO_SNDBUF
) ?
5158 &so
->so_snd
: &so
->so_rcv
;
5159 if (sbreserve(sb
, (u_int32_t
)optval
) == 0) {
5163 sb
->sb_flags
|= SB_USRSIZE
;
5164 sb
->sb_flags
&= ~SB_AUTOSIZE
;
5165 sb
->sb_idealsize
= (u_int32_t
)optval
;
5169 * Make sure the low-water is never greater than
5173 int space
= sbspace(&so
->so_snd
);
5174 u_int32_t hiwat
= so
->so_snd
.sb_hiwat
;
5176 if (so
->so_snd
.sb_flags
& SB_UNIX
) {
5178 (struct unpcb
*)(so
->so_pcb
);
5180 unp
->unp_conn
!= NULL
) {
5181 hiwat
+= unp
->unp_conn
->unp_cc
;
5185 so
->so_snd
.sb_lowat
=
5189 if (space
>= so
->so_snd
.sb_lowat
) {
5196 so
->so_rcv
.sb_lowat
=
5197 (optval
> so
->so_rcv
.sb_hiwat
) ?
5198 so
->so_rcv
.sb_hiwat
: optval
;
5199 data_len
= so
->so_rcv
.sb_cc
5200 - so
->so_rcv
.sb_ctl
;
5201 if (data_len
>= so
->so_rcv
.sb_lowat
) {
5211 error
= sooptcopyin_timeval(sopt
, &tv
);
5216 switch (sopt
->sopt_name
) {
5218 so
->so_snd
.sb_timeo
= tv
;
5221 so
->so_rcv
.sb_timeo
= tv
;
5229 error
= sooptcopyin(sopt
, &nke
, sizeof(nke
),
5235 error
= sflt_attach_internal(so
, nke
.nke_handle
);
5240 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
5246 so
->so_flags
|= SOF_NOSIGPIPE
;
5248 so
->so_flags
&= ~SOF_NOSIGPIPE
;
5253 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
5259 so
->so_flags
|= SOF_NOADDRAVAIL
;
5261 so
->so_flags
&= ~SOF_NOADDRAVAIL
;
5265 case SO_REUSESHAREUID
:
5266 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
5272 so
->so_flags
|= SOF_REUSESHAREUID
;
5274 so
->so_flags
&= ~SOF_REUSESHAREUID
;
5278 case SO_NOTIFYCONFLICT
:
5279 if (kauth_cred_issuser(kauth_cred_get()) == 0) {
5283 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
5289 so
->so_flags
|= SOF_NOTIFYCONFLICT
;
5291 so
->so_flags
&= ~SOF_NOTIFYCONFLICT
;
5295 case SO_RESTRICTIONS
:
5296 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
5302 error
= so_set_restrictions(so
, optval
);
5305 case SO_AWDL_UNRESTRICTED
:
5306 if (SOCK_DOM(so
) != PF_INET
&&
5307 SOCK_DOM(so
) != PF_INET6
) {
5311 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
5317 error
= soopt_cred_check(so
,
5318 PRIV_NET_RESTRICTED_AWDL
, false, false);
5320 inp_set_awdl_unrestricted(
5324 inp_clear_awdl_unrestricted(sotoinpcb(so
));
5327 case SO_INTCOPROC_ALLOW
:
5328 if (SOCK_DOM(so
) != PF_INET6
) {
5332 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
5338 inp_get_intcoproc_allowed(sotoinpcb(so
)) == FALSE
) {
5339 error
= soopt_cred_check(so
,
5340 PRIV_NET_RESTRICTED_INTCOPROC
, false, false);
5342 inp_set_intcoproc_allowed(
5345 } else if (optval
== 0) {
5346 inp_clear_intcoproc_allowed(sotoinpcb(so
));
5351 #if CONFIG_MACF_SOCKET
5352 if ((error
= sooptcopyin(sopt
, &extmac
, sizeof(extmac
),
5353 sizeof(extmac
))) != 0) {
5357 error
= mac_setsockopt_label(proc_ucred(sopt
->sopt_p
),
5361 #endif /* MAC_SOCKET */
5364 case SO_UPCALLCLOSEWAIT
:
5365 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
5371 so
->so_flags
|= SOF_UPCALLCLOSEWAIT
;
5373 so
->so_flags
&= ~SOF_UPCALLCLOSEWAIT
;
5378 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
5384 so
->so_flags
|= SOF_BINDRANDOMPORT
;
5386 so
->so_flags
&= ~SOF_BINDRANDOMPORT
;
5390 case SO_NP_EXTENSIONS
: {
5391 struct so_np_extensions sonpx
;
5393 error
= sooptcopyin(sopt
, &sonpx
, sizeof(sonpx
),
5398 if (sonpx
.npx_mask
& ~SONPX_MASK_VALID
) {
5403 * Only one bit defined for now
5405 if ((sonpx
.npx_mask
& SONPX_SETOPTSHUT
)) {
5406 if ((sonpx
.npx_flags
& SONPX_SETOPTSHUT
)) {
5407 so
->so_flags
|= SOF_NPX_SETOPTSHUT
;
5409 so
->so_flags
&= ~SOF_NPX_SETOPTSHUT
;
5415 case SO_TRAFFIC_CLASS
: {
5416 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
5421 if (optval
>= SO_TC_NET_SERVICE_OFFSET
) {
5422 int netsvc
= optval
- SO_TC_NET_SERVICE_OFFSET
;
5423 error
= so_set_net_service_type(so
, netsvc
);
5426 error
= so_set_traffic_class(so
, optval
);
5430 so
->so_flags1
&= ~SOF1_TC_NET_SERV_TYPE
;
5431 so
->so_netsvctype
= _NET_SERVICE_TYPE_UNSPEC
;
5435 case SO_RECV_TRAFFIC_CLASS
: {
5436 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
5442 so
->so_flags
&= ~SOF_RECV_TRAFFIC_CLASS
;
5444 so
->so_flags
|= SOF_RECV_TRAFFIC_CLASS
;
5449 #if (DEVELOPMENT || DEBUG)
5450 case SO_TRAFFIC_CLASS_DBG
: {
5451 struct so_tcdbg so_tcdbg
;
5453 error
= sooptcopyin(sopt
, &so_tcdbg
,
5454 sizeof(struct so_tcdbg
), sizeof(struct so_tcdbg
));
5458 error
= so_set_tcdbg(so
, &so_tcdbg
);
5464 #endif /* (DEVELOPMENT || DEBUG) */
5466 case SO_PRIVILEGED_TRAFFIC_CLASS
:
5467 error
= priv_check_cred(kauth_cred_get(),
5468 PRIV_NET_PRIVILEGED_TRAFFIC_CLASS
, 0);
5472 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
5478 so
->so_flags
&= ~SOF_PRIVILEGED_TRAFFIC_CLASS
;
5480 so
->so_flags
|= SOF_PRIVILEGED_TRAFFIC_CLASS
;
5484 #if (DEVELOPMENT || DEBUG)
5486 error
= sosetdefunct(current_proc(), so
, 0, FALSE
);
5488 error
= sodefunct(current_proc(), so
, 0);
5492 #endif /* (DEVELOPMENT || DEBUG) */
5495 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
5497 if (error
!= 0 || (so
->so_flags
& SOF_DEFUNCT
)) {
5504 * Any process can set SO_DEFUNCTOK (clear
5505 * SOF_NODEFUNCT), but only root can clear
5506 * SO_DEFUNCTOK (set SOF_NODEFUNCT).
5509 kauth_cred_issuser(kauth_cred_get()) == 0) {
5514 so
->so_flags
&= ~SOF_NODEFUNCT
;
5516 so
->so_flags
|= SOF_NODEFUNCT
;
5519 if (SOCK_DOM(so
) == PF_INET
||
5520 SOCK_DOM(so
) == PF_INET6
) {
5521 char s
[MAX_IPv6_STR_LEN
];
5522 char d
[MAX_IPv6_STR_LEN
];
5523 struct inpcb
*inp
= sotoinpcb(so
);
5525 SODEFUNCTLOG("%s[%d, %s]: so 0x%llx "
5526 "[%s %s:%d -> %s:%d] is now marked "
5527 "as %seligible for "
5528 "defunct\n", __func__
, proc_selfpid(),
5529 proc_best_name(current_proc()),
5530 (uint64_t)DEBUG_KERNEL_ADDRPERM(so
),
5531 (SOCK_TYPE(so
) == SOCK_STREAM
) ?
5532 "TCP" : "UDP", inet_ntop(SOCK_DOM(so
),
5533 ((SOCK_DOM(so
) == PF_INET
) ?
5534 (void *)&inp
->inp_laddr
.s_addr
:
5535 (void *)&inp
->in6p_laddr
), s
, sizeof(s
)),
5536 ntohs(inp
->in6p_lport
),
5537 inet_ntop(SOCK_DOM(so
),
5538 (SOCK_DOM(so
) == PF_INET
) ?
5539 (void *)&inp
->inp_faddr
.s_addr
:
5540 (void *)&inp
->in6p_faddr
, d
, sizeof(d
)),
5541 ntohs(inp
->in6p_fport
),
5542 (so
->so_flags
& SOF_NODEFUNCT
) ?
5545 SODEFUNCTLOG("%s[%d, %s]: so 0x%llx [%d,%d] "
5546 "is now marked as %seligible for "
5548 __func__
, proc_selfpid(),
5549 proc_best_name(current_proc()),
5550 (uint64_t)DEBUG_KERNEL_ADDRPERM(so
),
5551 SOCK_DOM(so
), SOCK_TYPE(so
),
5552 (so
->so_flags
& SOF_NODEFUNCT
) ?
5558 /* This option is not settable */
5562 case SO_OPPORTUNISTIC
:
5563 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
5566 error
= so_set_opportunistic(so
, optval
);
5571 /* This option is handled by lower layer(s) */
5576 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
5579 error
= so_set_recv_anyif(so
, optval
);
5583 case SO_TRAFFIC_MGT_BACKGROUND
: {
5584 /* This option is handled by lower layer(s) */
5590 case SO_FLOW_DIVERT_TOKEN
:
5591 error
= flow_divert_token_set(so
, sopt
);
5593 #endif /* FLOW_DIVERT */
5597 if ((error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
5598 sizeof(optval
))) != 0) {
5602 error
= so_set_effective_pid(so
, optval
, sopt
->sopt_p
, true);
5605 case SO_DELEGATED_UUID
: {
5608 if ((error
= sooptcopyin(sopt
, &euuid
, sizeof(euuid
),
5609 sizeof(euuid
))) != 0) {
5613 error
= so_set_effective_uuid(so
, euuid
, sopt
->sopt_p
, true);
5618 case SO_NECP_ATTRIBUTES
:
5619 error
= necp_set_socket_attributes(so
, sopt
);
5622 case SO_NECP_CLIENTUUID
: {
5623 if (SOCK_DOM(so
) == PF_MULTIPATH
) {
5624 /* Handled by MPTCP itself */
5628 if (SOCK_DOM(so
) != PF_INET
&& SOCK_DOM(so
) != PF_INET6
) {
5633 struct inpcb
*inp
= sotoinpcb(so
);
5634 if (!uuid_is_null(inp
->necp_client_uuid
)) {
5635 // Clear out the old client UUID if present
5636 necp_inpcb_remove_cb(inp
);
5639 error
= sooptcopyin(sopt
, &inp
->necp_client_uuid
,
5640 sizeof(uuid_t
), sizeof(uuid_t
));
5645 if (uuid_is_null(inp
->necp_client_uuid
)) {
5650 pid_t current_pid
= proc_pid(current_proc());
5651 error
= necp_client_register_socket_flow(current_pid
,
5652 inp
->necp_client_uuid
, inp
);
5654 uuid_clear(inp
->necp_client_uuid
);
5658 if (inp
->inp_lport
!= 0) {
5659 // There is a bound local port, so this is not
5660 // a fresh socket. Assign to the client.
5661 necp_client_assign_from_socket(current_pid
, inp
->necp_client_uuid
, inp
);
5666 case SO_NECP_LISTENUUID
: {
5667 if (SOCK_DOM(so
) != PF_INET
&& SOCK_DOM(so
) != PF_INET6
) {
5672 struct inpcb
*inp
= sotoinpcb(so
);
5673 if (!uuid_is_null(inp
->necp_client_uuid
)) {
5678 error
= sooptcopyin(sopt
, &inp
->necp_client_uuid
,
5679 sizeof(uuid_t
), sizeof(uuid_t
));
5684 if (uuid_is_null(inp
->necp_client_uuid
)) {
5689 error
= necp_client_register_socket_listener(proc_pid(current_proc()),
5690 inp
->necp_client_uuid
, inp
);
5692 uuid_clear(inp
->necp_client_uuid
);
5696 // Mark that the port registration is held by NECP
5697 inp
->inp_flags2
|= INP2_EXTERNAL_PORT
;
5703 case SO_EXTENDED_BK_IDLE
:
5704 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
5707 error
= so_set_extended_bk_idle(so
, optval
);
5711 case SO_MARK_CELLFALLBACK
:
5712 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
5722 so
->so_flags1
&= ~SOF1_CELLFALLBACK
;
5724 so
->so_flags1
|= SOF1_CELLFALLBACK
;
5728 case SO_STATISTICS_EVENT
:
5729 error
= sooptcopyin(sopt
, &long_optval
,
5730 sizeof(long_optval
), sizeof(long_optval
));
5734 u_int64_t nstat_event
= 0;
5735 error
= so_statistics_event_to_nstat_event(
5736 &long_optval
, &nstat_event
);
5740 nstat_pcb_event(sotoinpcb(so
), nstat_event
);
5743 case SO_NET_SERVICE_TYPE
: {
5744 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
5749 error
= so_set_net_service_type(so
, optval
);
5753 case SO_QOSMARKING_POLICY_OVERRIDE
:
5754 error
= priv_check_cred(kauth_cred_get(),
5755 PRIV_NET_QOSMARKING_POLICY_OVERRIDE
, 0);
5759 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
5765 so
->so_flags1
&= ~SOF1_QOSMARKING_POLICY_OVERRIDE
;
5767 so
->so_flags1
|= SOF1_QOSMARKING_POLICY_OVERRIDE
;
5771 case SO_MPKL_SEND_INFO
: {
5772 struct so_mpkl_send_info so_mpkl_send_info
;
5774 error
= sooptcopyin(sopt
, &so_mpkl_send_info
,
5775 sizeof(struct so_mpkl_send_info
), sizeof(struct so_mpkl_send_info
));
5779 uuid_copy(so
->so_mpkl_send_uuid
, so_mpkl_send_info
.mpkl_uuid
);
5780 so
->so_mpkl_send_proto
= so_mpkl_send_info
.mpkl_proto
;
5782 if (uuid_is_null(so
->so_mpkl_send_uuid
) && so
->so_mpkl_send_proto
== 0) {
5783 so
->so_flags1
&= ~SOF1_MPKL_SEND_INFO
;
5785 so
->so_flags1
|= SOF1_MPKL_SEND_INFO
;
5790 error
= ENOPROTOOPT
;
5793 if (error
== 0 && so
->so_proto
!= NULL
&&
5794 so
->so_proto
->pr_ctloutput
!= NULL
) {
5795 (void) so
->so_proto
->pr_ctloutput(so
, sopt
);
5800 socket_unlock(so
, 1);
5805 /* Helper routines for getsockopt */
5807 sooptcopyout(struct sockopt
*sopt
, void *buf
, size_t len
)
5815 * Documented get behavior is that we always return a value,
5816 * possibly truncated to fit in the user's buffer.
5817 * Traditional behavior is that we always tell the user
5818 * precisely how much we copied, rather than something useful
5819 * like the total amount we had available for her.
5820 * Note that this interface is not idempotent; the entire answer must
5821 * generated ahead of time.
5823 valsize
= min(len
, sopt
->sopt_valsize
);
5824 sopt
->sopt_valsize
= valsize
;
5825 if (sopt
->sopt_val
!= USER_ADDR_NULL
) {
5826 if (sopt
->sopt_p
!= kernproc
) {
5827 error
= copyout(buf
, sopt
->sopt_val
, valsize
);
5829 bcopy(buf
, CAST_DOWN(caddr_t
, sopt
->sopt_val
), valsize
);
5836 sooptcopyout_timeval(struct sockopt
*sopt
, const struct timeval
*tv_p
)
5840 struct user64_timeval tv64
= {};
5841 struct user32_timeval tv32
= {};
5846 if (proc_is64bit(sopt
->sopt_p
)) {
5848 tv64
.tv_sec
= tv_p
->tv_sec
;
5849 tv64
.tv_usec
= tv_p
->tv_usec
;
5853 tv32
.tv_sec
= tv_p
->tv_sec
;
5854 tv32
.tv_usec
= tv_p
->tv_usec
;
5857 valsize
= min(len
, sopt
->sopt_valsize
);
5858 sopt
->sopt_valsize
= valsize
;
5859 if (sopt
->sopt_val
!= USER_ADDR_NULL
) {
5860 if (sopt
->sopt_p
!= kernproc
) {
5861 error
= copyout(val
, sopt
->sopt_val
, valsize
);
5863 bcopy(val
, CAST_DOWN(caddr_t
, sopt
->sopt_val
), valsize
);
5872 * <pr_ctloutput>:EOPNOTSUPP[AF_UNIX]
5873 * <pr_ctloutput>:???
5874 * <sf_getoption>:???
5877 sogetoptlock(struct socket
*so
, struct sockopt
*sopt
, int dolock
)
5882 #if CONFIG_MACF_SOCKET
5884 #endif /* MAC_SOCKET */
5886 if (sopt
->sopt_dir
!= SOPT_GET
) {
5887 sopt
->sopt_dir
= SOPT_GET
;
5894 error
= sflt_getsockopt(so
, sopt
);
5896 if (error
== EJUSTRETURN
) {
5902 if (sopt
->sopt_level
!= SOL_SOCKET
) {
5903 if (so
->so_proto
!= NULL
&&
5904 so
->so_proto
->pr_ctloutput
!= NULL
) {
5905 error
= (*so
->so_proto
->pr_ctloutput
)(so
, sopt
);
5908 error
= ENOPROTOOPT
;
5911 * Allow socket-level (SOL_SOCKET) options to be filtered by
5912 * the protocol layer, if needed. A zero value returned from
5913 * the handler means use default socket-level processing as
5914 * done by the rest of this routine. Otherwise, any other
5915 * return value indicates that the option is unsupported.
5917 if (so
->so_proto
!= NULL
&& (error
= so
->so_proto
->pr_usrreqs
->
5918 pru_socheckopt(so
, sopt
)) != 0) {
5923 switch (sopt
->sopt_name
) {
5926 l
.l_onoff
= ((so
->so_options
& SO_LINGER
) ? 1 : 0);
5927 l
.l_linger
= (sopt
->sopt_name
== SO_LINGER
) ?
5928 so
->so_linger
: so
->so_linger
/ hz
;
5929 error
= sooptcopyout(sopt
, &l
, sizeof(l
));
5932 case SO_USELOOPBACK
:
5941 case SO_TIMESTAMP_MONOTONIC
:
5942 case SO_TIMESTAMP_CONTINUOUS
:
5945 case SO_WANTOOBFLAG
:
5946 case SO_NOWAKEFROMSLEEP
:
5947 case SO_NOAPNFALLBK
:
5948 optval
= so
->so_options
& sopt
->sopt_name
;
5950 error
= sooptcopyout(sopt
, &optval
, sizeof(optval
));
5954 optval
= so
->so_type
;
5958 if (so
->so_proto
->pr_flags
& PR_ATOMIC
) {
5963 m1
= so
->so_rcv
.sb_mb
;
5964 while (m1
!= NULL
) {
5965 if (m1
->m_type
== MT_DATA
||
5966 m1
->m_type
== MT_HEADER
||
5967 m1
->m_type
== MT_OOBDATA
) {
5968 pkt_total
+= m1
->m_len
;
5974 optval
= so
->so_rcv
.sb_cc
- so
->so_rcv
.sb_ctl
;
5979 if (so
->so_proto
->pr_flags
& PR_ATOMIC
) {
5983 m1
= so
->so_rcv
.sb_mb
;
5984 while (m1
!= NULL
) {
5991 error
= ENOPROTOOPT
;
5996 optval
= so
->so_snd
.sb_cc
;
6000 optval
= so
->so_error
;
6005 u_int32_t hiwat
= so
->so_snd
.sb_hiwat
;
6007 if (so
->so_snd
.sb_flags
& SB_UNIX
) {
6009 (struct unpcb
*)(so
->so_pcb
);
6010 if (unp
!= NULL
&& unp
->unp_conn
!= NULL
) {
6011 hiwat
+= unp
->unp_conn
->unp_cc
;
6019 optval
= so
->so_rcv
.sb_hiwat
;
6023 optval
= so
->so_snd
.sb_lowat
;
6027 optval
= so
->so_rcv
.sb_lowat
;
6032 tv
= (sopt
->sopt_name
== SO_SNDTIMEO
?
6033 so
->so_snd
.sb_timeo
: so
->so_rcv
.sb_timeo
);
6035 error
= sooptcopyout_timeval(sopt
, &tv
);
6039 optval
= (so
->so_flags
& SOF_NOSIGPIPE
);
6043 optval
= (so
->so_flags
& SOF_NOADDRAVAIL
);
6046 case SO_REUSESHAREUID
:
6047 optval
= (so
->so_flags
& SOF_REUSESHAREUID
);
6051 case SO_NOTIFYCONFLICT
:
6052 optval
= (so
->so_flags
& SOF_NOTIFYCONFLICT
);
6055 case SO_RESTRICTIONS
:
6056 optval
= so_get_restrictions(so
);
6059 case SO_AWDL_UNRESTRICTED
:
6060 if (SOCK_DOM(so
) == PF_INET
||
6061 SOCK_DOM(so
) == PF_INET6
) {
6062 optval
= inp_get_awdl_unrestricted(
6070 case SO_INTCOPROC_ALLOW
:
6071 if (SOCK_DOM(so
) == PF_INET6
) {
6072 optval
= inp_get_intcoproc_allowed(
6081 #if CONFIG_MACF_SOCKET
6082 if ((error
= sooptcopyin(sopt
, &extmac
, sizeof(extmac
),
6083 sizeof(extmac
))) != 0 ||
6084 (error
= mac_socket_label_get(proc_ucred(
6085 sopt
->sopt_p
), so
, &extmac
)) != 0) {
6089 error
= sooptcopyout(sopt
, &extmac
, sizeof(extmac
));
6092 #endif /* MAC_SOCKET */
6096 #if CONFIG_MACF_SOCKET
6097 if ((error
= sooptcopyin(sopt
, &extmac
, sizeof(extmac
),
6098 sizeof(extmac
))) != 0 ||
6099 (error
= mac_socketpeer_label_get(proc_ucred(
6100 sopt
->sopt_p
), so
, &extmac
)) != 0) {
6104 error
= sooptcopyout(sopt
, &extmac
, sizeof(extmac
));
6107 #endif /* MAC_SOCKET */
6110 #ifdef __APPLE_API_PRIVATE
6111 case SO_UPCALLCLOSEWAIT
:
6112 optval
= (so
->so_flags
& SOF_UPCALLCLOSEWAIT
);
6116 optval
= (so
->so_flags
& SOF_BINDRANDOMPORT
);
6119 case SO_NP_EXTENSIONS
: {
6120 struct so_np_extensions sonpx
= {};
6122 sonpx
.npx_flags
= (so
->so_flags
& SOF_NPX_SETOPTSHUT
) ?
6123 SONPX_SETOPTSHUT
: 0;
6124 sonpx
.npx_mask
= SONPX_MASK_VALID
;
6126 error
= sooptcopyout(sopt
, &sonpx
,
6127 sizeof(struct so_np_extensions
));
6131 case SO_TRAFFIC_CLASS
:
6132 optval
= so
->so_traffic_class
;
6135 case SO_RECV_TRAFFIC_CLASS
:
6136 optval
= (so
->so_flags
& SOF_RECV_TRAFFIC_CLASS
);
6139 case SO_TRAFFIC_CLASS_STATS
:
6140 error
= sooptcopyout(sopt
, &so
->so_tc_stats
,
6141 sizeof(so
->so_tc_stats
));
6144 #if (DEVELOPMENT || DEBUG)
6145 case SO_TRAFFIC_CLASS_DBG
:
6146 error
= sogetopt_tcdbg(so
, sopt
);
6148 #endif /* (DEVELOPMENT || DEBUG) */
6150 case SO_PRIVILEGED_TRAFFIC_CLASS
:
6151 optval
= (so
->so_flags
& SOF_PRIVILEGED_TRAFFIC_CLASS
);
6155 optval
= !(so
->so_flags
& SOF_NODEFUNCT
);
6159 optval
= (so
->so_flags
& SOF_DEFUNCT
);
6162 case SO_OPPORTUNISTIC
:
6163 optval
= so_get_opportunistic(so
);
6167 /* This option is not gettable */
6172 optval
= so_get_recv_anyif(so
);
6175 case SO_TRAFFIC_MGT_BACKGROUND
:
6176 /* This option is handled by lower layer(s) */
6177 if (so
->so_proto
!= NULL
&&
6178 so
->so_proto
->pr_ctloutput
!= NULL
) {
6179 (void) so
->so_proto
->pr_ctloutput(so
, sopt
);
6184 case SO_FLOW_DIVERT_TOKEN
:
6185 error
= flow_divert_token_get(so
, sopt
);
6187 #endif /* FLOW_DIVERT */
6190 case SO_NECP_ATTRIBUTES
:
6191 error
= necp_get_socket_attributes(so
, sopt
);
6194 case SO_NECP_CLIENTUUID
: {
6197 if (SOCK_DOM(so
) == PF_MULTIPATH
) {
6198 ncu
= &mpsotomppcb(so
)->necp_client_uuid
;
6199 } else if (SOCK_DOM(so
) == PF_INET
|| SOCK_DOM(so
) == PF_INET6
) {
6200 ncu
= &sotoinpcb(so
)->necp_client_uuid
;
6206 error
= sooptcopyout(sopt
, ncu
, sizeof(uuid_t
));
6210 case SO_NECP_LISTENUUID
: {
6213 if (SOCK_DOM(so
) == PF_INET
|| SOCK_DOM(so
) == PF_INET6
) {
6214 if (sotoinpcb(so
)->inp_flags2
& INP2_EXTERNAL_PORT
) {
6215 nlu
= &sotoinpcb(so
)->necp_client_uuid
;
6225 error
= sooptcopyout(sopt
, nlu
, sizeof(uuid_t
));
6231 case SO_CFIL_SOCK_ID
: {
6232 cfil_sock_id_t sock_id
;
6234 sock_id
= cfil_sock_id_from_socket(so
);
6236 error
= sooptcopyout(sopt
, &sock_id
,
6237 sizeof(cfil_sock_id_t
));
6240 #endif /* CONTENT_FILTER */
6242 case SO_EXTENDED_BK_IDLE
:
6243 optval
= (so
->so_flags1
& SOF1_EXTEND_BK_IDLE_WANTED
);
6245 case SO_MARK_CELLFALLBACK
:
6246 optval
= ((so
->so_flags1
& SOF1_CELLFALLBACK
) > 0)
6249 case SO_NET_SERVICE_TYPE
: {
6250 if ((so
->so_flags1
& SOF1_TC_NET_SERV_TYPE
)) {
6251 optval
= so
->so_netsvctype
;
6253 optval
= NET_SERVICE_TYPE_BE
;
6257 case SO_NETSVC_MARKING_LEVEL
:
6258 optval
= so_get_netsvc_marking_level(so
);
6261 case SO_MPKL_SEND_INFO
: {
6262 struct so_mpkl_send_info so_mpkl_send_info
;
6264 uuid_copy(so_mpkl_send_info
.mpkl_uuid
, so
->so_mpkl_send_uuid
);
6265 so_mpkl_send_info
.mpkl_proto
= so
->so_mpkl_send_proto
;
6266 error
= sooptcopyout(sopt
, &so_mpkl_send_info
,
6267 sizeof(struct so_mpkl_send_info
));
6271 error
= ENOPROTOOPT
;
6277 socket_unlock(so
, 1);
6283 * The size limits on our soopt_getm is different from that on FreeBSD.
6284 * We limit the size of options to MCLBYTES. This will have to change
6285 * if we need to define options that need more space than MCLBYTES.
6288 soopt_getm(struct sockopt
*sopt
, struct mbuf
**mp
)
6290 struct mbuf
*m
, *m_prev
;
6291 int sopt_size
= sopt
->sopt_valsize
;
6294 if (sopt_size
<= 0 || sopt_size
> MCLBYTES
) {
6298 how
= sopt
->sopt_p
!= kernproc
? M_WAIT
: M_DONTWAIT
;
6299 MGET(m
, how
, MT_DATA
);
6303 if (sopt_size
> MLEN
) {
6305 if ((m
->m_flags
& M_EXT
) == 0) {
6309 m
->m_len
= min(MCLBYTES
, sopt_size
);
6311 m
->m_len
= min(MLEN
, sopt_size
);
6313 sopt_size
-= m
->m_len
;
6317 while (sopt_size
> 0) {
6318 MGET(m
, how
, MT_DATA
);
6323 if (sopt_size
> MLEN
) {
6325 if ((m
->m_flags
& M_EXT
) == 0) {
6330 m
->m_len
= min(MCLBYTES
, sopt_size
);
6332 m
->m_len
= min(MLEN
, sopt_size
);
6334 sopt_size
-= m
->m_len
;
6341 /* copyin sopt data into mbuf chain */
6343 soopt_mcopyin(struct sockopt
*sopt
, struct mbuf
*m
)
6345 struct mbuf
*m0
= m
;
6347 if (sopt
->sopt_val
== USER_ADDR_NULL
) {
6350 while (m
!= NULL
&& sopt
->sopt_valsize
>= m
->m_len
) {
6351 if (sopt
->sopt_p
!= kernproc
) {
6354 error
= copyin(sopt
->sopt_val
, mtod(m
, char *),
6361 bcopy(CAST_DOWN(caddr_t
, sopt
->sopt_val
),
6362 mtod(m
, char *), m
->m_len
);
6364 sopt
->sopt_valsize
-= m
->m_len
;
6365 sopt
->sopt_val
+= m
->m_len
;
6368 /* should be allocated enoughly at ip6_sooptmcopyin() */
6370 panic("soopt_mcopyin");
6376 /* copyout mbuf chain data into soopt */
6378 soopt_mcopyout(struct sockopt
*sopt
, struct mbuf
*m
)
6380 struct mbuf
*m0
= m
;
6383 if (sopt
->sopt_val
== USER_ADDR_NULL
) {
6386 while (m
!= NULL
&& sopt
->sopt_valsize
>= m
->m_len
) {
6387 if (sopt
->sopt_p
!= kernproc
) {
6390 error
= copyout(mtod(m
, char *), sopt
->sopt_val
,
6397 bcopy(mtod(m
, char *),
6398 CAST_DOWN(caddr_t
, sopt
->sopt_val
), m
->m_len
);
6400 sopt
->sopt_valsize
-= m
->m_len
;
6401 sopt
->sopt_val
+= m
->m_len
;
6402 valsize
+= m
->m_len
;
6406 /* enough soopt buffer should be given from user-land */
6410 sopt
->sopt_valsize
= valsize
;
6415 sohasoutofband(struct socket
*so
)
6417 if (so
->so_pgid
< 0) {
6418 gsignal(-so
->so_pgid
, SIGURG
);
6419 } else if (so
->so_pgid
> 0) {
6420 proc_signal(so
->so_pgid
, SIGURG
);
6422 selwakeup(&so
->so_rcv
.sb_sel
);
6423 if (so
->so_rcv
.sb_flags
& SB_KNOTE
) {
6424 KNOTE(&so
->so_rcv
.sb_sel
.si_note
,
6425 (NOTE_OOB
| SO_FILT_HINT_LOCKED
));
6430 sopoll(struct socket
*so
, int events
, kauth_cred_t cred
, void * wql
)
6432 #pragma unused(cred)
6433 struct proc
*p
= current_proc();
6437 so_update_last_owner_locked(so
, PROC_NULL
);
6438 so_update_policy(so
);
6440 if (events
& (POLLIN
| POLLRDNORM
)) {
6441 if (soreadable(so
)) {
6442 revents
|= events
& (POLLIN
| POLLRDNORM
);
6446 if (events
& (POLLOUT
| POLLWRNORM
)) {
6447 if (sowriteable(so
)) {
6448 revents
|= events
& (POLLOUT
| POLLWRNORM
);
6452 if (events
& (POLLPRI
| POLLRDBAND
)) {
6453 if (so
->so_oobmark
|| (so
->so_state
& SS_RCVATMARK
)) {
6454 revents
|= events
& (POLLPRI
| POLLRDBAND
);
6459 if (events
& (POLLIN
| POLLPRI
| POLLRDNORM
| POLLRDBAND
)) {
6461 * Darwin sets the flag first,
6462 * BSD calls selrecord first
6464 so
->so_rcv
.sb_flags
|= SB_SEL
;
6465 selrecord(p
, &so
->so_rcv
.sb_sel
, wql
);
6468 if (events
& (POLLOUT
| POLLWRNORM
)) {
6470 * Darwin sets the flag first,
6471 * BSD calls selrecord first
6473 so
->so_snd
.sb_flags
|= SB_SEL
;
6474 selrecord(p
, &so
->so_snd
.sb_sel
, wql
);
6478 socket_unlock(so
, 1);
6483 soo_kqfilter(struct fileproc
*fp
, struct knote
*kn
, struct kevent_qos_s
*kev
)
6485 struct socket
*so
= (struct socket
*)fp
->f_fglob
->fg_data
;
6489 so_update_last_owner_locked(so
, PROC_NULL
);
6490 so_update_policy(so
);
6492 #if CONFIG_MACF_SOCKET
6493 proc_t p
= knote_get_kq(kn
)->kq_p
;
6494 if (mac_socket_check_kqfilter(proc_ucred(p
), kn
, so
) != 0) {
6495 socket_unlock(so
, 1);
6496 knote_set_error(kn
, EPERM
);
6499 #endif /* MAC_SOCKET */
6501 switch (kn
->kn_filter
) {
6503 kn
->kn_filtid
= EVFILTID_SOREAD
;
6506 kn
->kn_filtid
= EVFILTID_SOWRITE
;
6509 kn
->kn_filtid
= EVFILTID_SCK
;
6512 kn
->kn_filtid
= EVFILTID_SOEXCEPT
;
6515 socket_unlock(so
, 1);
6516 knote_set_error(kn
, EINVAL
);
6521 * call the appropriate sub-filter attach
6522 * with the socket still locked
6524 result
= knote_fops(kn
)->f_attach(kn
, kev
);
6526 socket_unlock(so
, 1);
6532 filt_soread_common(struct knote
*kn
, struct kevent_qos_s
*kev
, struct socket
*so
)
6537 if (so
->so_options
& SO_ACCEPTCONN
) {
6539 * Radar 6615193 handle the listen case dynamically
6540 * for kqueue read filter. This allows to call listen()
6541 * after registering the kqueue EVFILT_READ.
6544 retval
= !TAILQ_EMPTY(&so
->so_comp
);
6549 /* socket isn't a listener */
6551 * NOTE_LOWAT specifies new low water mark in data, i.e.
6552 * the bytes of protocol data. We therefore exclude any
6555 data
= so
->so_rcv
.sb_cc
- so
->so_rcv
.sb_ctl
;
6557 if (kn
->kn_sfflags
& NOTE_OOB
) {
6558 if (so
->so_oobmark
|| (so
->so_state
& SS_RCVATMARK
)) {
6559 kn
->kn_fflags
|= NOTE_OOB
;
6560 data
-= so
->so_oobmark
;
6566 if ((so
->so_state
& SS_CANTRCVMORE
)
6568 && cfil_sock_data_pending(&so
->so_rcv
) == 0
6569 #endif /* CONTENT_FILTER */
6571 kn
->kn_flags
|= EV_EOF
;
6572 kn
->kn_fflags
= so
->so_error
;
6577 if (so
->so_error
) { /* temporary udp error */
6582 int64_t lowwat
= so
->so_rcv
.sb_lowat
;
6584 * Ensure that when NOTE_LOWAT is used, the derived
6585 * low water mark is bounded by socket's rcv buf's
6586 * high and low water mark values.
6588 if (kn
->kn_sfflags
& NOTE_LOWAT
) {
6589 if (kn
->kn_sdata
> so
->so_rcv
.sb_hiwat
) {
6590 lowwat
= so
->so_rcv
.sb_hiwat
;
6591 } else if (kn
->kn_sdata
> lowwat
) {
6592 lowwat
= kn
->kn_sdata
;
6597 * While the `data` field is the amount of data to read,
6598 * 0-sized packets need to wake up the kqueue, see 58140856,
6599 * so we need to take control bytes into account too.
6601 retval
= (so
->so_rcv
.sb_cc
>= lowwat
);
6604 if (retval
&& kev
) {
6605 knote_fill_kevent(kn
, kev
, data
);
6611 filt_sorattach(struct knote
*kn
, __unused
struct kevent_qos_s
*kev
)
6613 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
6618 * If the caller explicitly asked for OOB results (e.g. poll())
6619 * from EVFILT_READ, then save that off in the hookid field
6620 * and reserve the kn_flags EV_OOBAND bit for output only.
6622 if (kn
->kn_filter
== EVFILT_READ
&&
6623 kn
->kn_flags
& EV_OOBAND
) {
6624 kn
->kn_flags
&= ~EV_OOBAND
;
6625 kn
->kn_hook32
= EV_OOBAND
;
6629 if (KNOTE_ATTACH(&so
->so_rcv
.sb_sel
.si_note
, kn
)) {
6630 so
->so_rcv
.sb_flags
|= SB_KNOTE
;
6633 /* indicate if event is already fired */
6634 return filt_soread_common(kn
, NULL
, so
);
6638 filt_sordetach(struct knote
*kn
)
6640 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
6643 if (so
->so_rcv
.sb_flags
& SB_KNOTE
) {
6644 if (KNOTE_DETACH(&so
->so_rcv
.sb_sel
.si_note
, kn
)) {
6645 so
->so_rcv
.sb_flags
&= ~SB_KNOTE
;
6648 socket_unlock(so
, 1);
6653 filt_soread(struct knote
*kn
, long hint
)
6655 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
6658 if ((hint
& SO_FILT_HINT_LOCKED
) == 0) {
6662 retval
= filt_soread_common(kn
, NULL
, so
);
6664 if ((hint
& SO_FILT_HINT_LOCKED
) == 0) {
6665 socket_unlock(so
, 1);
6672 filt_sortouch(struct knote
*kn
, struct kevent_qos_s
*kev
)
6674 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
6679 /* save off the new input fflags and data */
6680 kn
->kn_sfflags
= kev
->fflags
;
6681 kn
->kn_sdata
= kev
->data
;
6683 /* determine if changes result in fired events */
6684 retval
= filt_soread_common(kn
, NULL
, so
);
6686 socket_unlock(so
, 1);
6692 filt_sorprocess(struct knote
*kn
, struct kevent_qos_s
*kev
)
6694 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
6698 retval
= filt_soread_common(kn
, kev
, so
);
6699 socket_unlock(so
, 1);
6705 so_wait_for_if_feedback(struct socket
*so
)
6707 if ((SOCK_DOM(so
) == PF_INET
|| SOCK_DOM(so
) == PF_INET6
) &&
6708 (so
->so_state
& SS_ISCONNECTED
)) {
6709 struct inpcb
*inp
= sotoinpcb(so
);
6710 if (INP_WAIT_FOR_IF_FEEDBACK(inp
)) {
6718 filt_sowrite_common(struct knote
*kn
, struct kevent_qos_s
*kev
, struct socket
*so
)
6721 int64_t data
= sbspace(&so
->so_snd
);
6723 if (so
->so_state
& SS_CANTSENDMORE
) {
6724 kn
->kn_flags
|= EV_EOF
;
6725 kn
->kn_fflags
= so
->so_error
;
6730 if (so
->so_error
) { /* temporary udp error */
6735 if (!socanwrite(so
)) {
6740 if (so
->so_flags1
& SOF1_PRECONNECT_DATA
) {
6745 int64_t lowwat
= so
->so_snd
.sb_lowat
;
6747 if (kn
->kn_sfflags
& NOTE_LOWAT
) {
6748 if (kn
->kn_sdata
> so
->so_snd
.sb_hiwat
) {
6749 lowwat
= so
->so_snd
.sb_hiwat
;
6750 } else if (kn
->kn_sdata
> lowwat
) {
6751 lowwat
= kn
->kn_sdata
;
6755 if (data
>= lowwat
) {
6756 if ((so
->so_flags
& SOF_NOTSENT_LOWAT
)
6757 #if (DEBUG || DEVELOPMENT)
6758 && so_notsent_lowat_check
== 1
6759 #endif /* DEBUG || DEVELOPMENT */
6761 if ((SOCK_DOM(so
) == PF_INET
||
6762 SOCK_DOM(so
) == PF_INET6
) &&
6763 so
->so_type
== SOCK_STREAM
) {
6764 ret
= tcp_notsent_lowat_check(so
);
6767 else if ((SOCK_DOM(so
) == PF_MULTIPATH
) &&
6768 (SOCK_PROTO(so
) == IPPROTO_TCP
)) {
6769 ret
= mptcp_notsent_lowat_check(so
);
6780 if (so_wait_for_if_feedback(so
)) {
6786 knote_fill_kevent(kn
, kev
, data
);
6792 filt_sowattach(struct knote
*kn
, __unused
struct kevent_qos_s
*kev
)
6794 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
6797 if (KNOTE_ATTACH(&so
->so_snd
.sb_sel
.si_note
, kn
)) {
6798 so
->so_snd
.sb_flags
|= SB_KNOTE
;
6801 /* determine if its already fired */
6802 return filt_sowrite_common(kn
, NULL
, so
);
6806 filt_sowdetach(struct knote
*kn
)
6808 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
6811 if (so
->so_snd
.sb_flags
& SB_KNOTE
) {
6812 if (KNOTE_DETACH(&so
->so_snd
.sb_sel
.si_note
, kn
)) {
6813 so
->so_snd
.sb_flags
&= ~SB_KNOTE
;
6816 socket_unlock(so
, 1);
6821 filt_sowrite(struct knote
*kn
, long hint
)
6823 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
6826 if ((hint
& SO_FILT_HINT_LOCKED
) == 0) {
6830 ret
= filt_sowrite_common(kn
, NULL
, so
);
6832 if ((hint
& SO_FILT_HINT_LOCKED
) == 0) {
6833 socket_unlock(so
, 1);
6840 filt_sowtouch(struct knote
*kn
, struct kevent_qos_s
*kev
)
6842 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
6847 /*save off the new input fflags and data */
6848 kn
->kn_sfflags
= kev
->fflags
;
6849 kn
->kn_sdata
= kev
->data
;
6851 /* determine if these changes result in a triggered event */
6852 ret
= filt_sowrite_common(kn
, NULL
, so
);
6854 socket_unlock(so
, 1);
6860 filt_sowprocess(struct knote
*kn
, struct kevent_qos_s
*kev
)
6862 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
6866 ret
= filt_sowrite_common(kn
, kev
, so
);
6867 socket_unlock(so
, 1);
6873 filt_sockev_common(struct knote
*kn
, struct kevent_qos_s
*kev
,
6874 struct socket
*so
, long ev_hint
)
6878 uint32_t level_trigger
= 0;
6880 if (ev_hint
& SO_FILT_HINT_CONNRESET
) {
6881 kn
->kn_fflags
|= NOTE_CONNRESET
;
6883 if (ev_hint
& SO_FILT_HINT_TIMEOUT
) {
6884 kn
->kn_fflags
|= NOTE_TIMEOUT
;
6886 if (ev_hint
& SO_FILT_HINT_NOSRCADDR
) {
6887 kn
->kn_fflags
|= NOTE_NOSRCADDR
;
6889 if (ev_hint
& SO_FILT_HINT_IFDENIED
) {
6890 kn
->kn_fflags
|= NOTE_IFDENIED
;
6892 if (ev_hint
& SO_FILT_HINT_KEEPALIVE
) {
6893 kn
->kn_fflags
|= NOTE_KEEPALIVE
;
6895 if (ev_hint
& SO_FILT_HINT_ADAPTIVE_WTIMO
) {
6896 kn
->kn_fflags
|= NOTE_ADAPTIVE_WTIMO
;
6898 if (ev_hint
& SO_FILT_HINT_ADAPTIVE_RTIMO
) {
6899 kn
->kn_fflags
|= NOTE_ADAPTIVE_RTIMO
;
6901 if ((ev_hint
& SO_FILT_HINT_CONNECTED
) ||
6902 (so
->so_state
& SS_ISCONNECTED
)) {
6903 kn
->kn_fflags
|= NOTE_CONNECTED
;
6904 level_trigger
|= NOTE_CONNECTED
;
6906 if ((ev_hint
& SO_FILT_HINT_DISCONNECTED
) ||
6907 (so
->so_state
& SS_ISDISCONNECTED
)) {
6908 kn
->kn_fflags
|= NOTE_DISCONNECTED
;
6909 level_trigger
|= NOTE_DISCONNECTED
;
6911 if (ev_hint
& SO_FILT_HINT_CONNINFO_UPDATED
) {
6912 if (so
->so_proto
!= NULL
&&
6913 (so
->so_proto
->pr_flags
& PR_EVCONNINFO
)) {
6914 kn
->kn_fflags
|= NOTE_CONNINFO_UPDATED
;
6918 if ((ev_hint
& SO_FILT_HINT_NOTIFY_ACK
) ||
6919 tcp_notify_ack_active(so
)) {
6920 kn
->kn_fflags
|= NOTE_NOTIFY_ACK
;
6923 if ((so
->so_state
& SS_CANTRCVMORE
)
6925 && cfil_sock_data_pending(&so
->so_rcv
) == 0
6926 #endif /* CONTENT_FILTER */
6928 kn
->kn_fflags
|= NOTE_READCLOSED
;
6929 level_trigger
|= NOTE_READCLOSED
;
6932 if (so
->so_state
& SS_CANTSENDMORE
) {
6933 kn
->kn_fflags
|= NOTE_WRITECLOSED
;
6934 level_trigger
|= NOTE_WRITECLOSED
;
6937 if ((ev_hint
& SO_FILT_HINT_SUSPEND
) ||
6938 (so
->so_flags
& SOF_SUSPENDED
)) {
6939 kn
->kn_fflags
&= ~(NOTE_SUSPEND
| NOTE_RESUME
);
6941 /* If resume event was delivered before, reset it */
6942 kn
->kn_hook32
&= ~NOTE_RESUME
;
6944 kn
->kn_fflags
|= NOTE_SUSPEND
;
6945 level_trigger
|= NOTE_SUSPEND
;
6948 if ((ev_hint
& SO_FILT_HINT_RESUME
) ||
6949 (so
->so_flags
& SOF_SUSPENDED
) == 0) {
6950 kn
->kn_fflags
&= ~(NOTE_SUSPEND
| NOTE_RESUME
);
6952 /* If suspend event was delivered before, reset it */
6953 kn
->kn_hook32
&= ~NOTE_SUSPEND
;
6955 kn
->kn_fflags
|= NOTE_RESUME
;
6956 level_trigger
|= NOTE_RESUME
;
6959 if (so
->so_error
!= 0) {
6961 data
= so
->so_error
;
6962 kn
->kn_flags
|= EV_EOF
;
6965 get_sockev_state(so
, &data32
);
6969 /* Reset any events that are not requested on this knote */
6970 kn
->kn_fflags
&= (kn
->kn_sfflags
& EVFILT_SOCK_ALL_MASK
);
6971 level_trigger
&= (kn
->kn_sfflags
& EVFILT_SOCK_ALL_MASK
);
6973 /* Find the level triggerred events that are already delivered */
6974 level_trigger
&= kn
->kn_hook32
;
6975 level_trigger
&= EVFILT_SOCK_LEVEL_TRIGGER_MASK
;
6977 /* Do not deliver level triggerred events more than once */
6978 if ((kn
->kn_fflags
& ~level_trigger
) != 0) {
6984 * Store the state of the events being delivered. This
6985 * state can be used to deliver level triggered events
6986 * ateast once and still avoid waking up the application
6987 * multiple times as long as the event is active.
6989 if (kn
->kn_fflags
!= 0) {
6990 kn
->kn_hook32
|= (kn
->kn_fflags
&
6991 EVFILT_SOCK_LEVEL_TRIGGER_MASK
);
6995 * NOTE_RESUME and NOTE_SUSPEND are an exception, deliver
6996 * only one of them and remember the last one that was
6999 if (kn
->kn_fflags
& NOTE_SUSPEND
) {
7000 kn
->kn_hook32
&= ~NOTE_RESUME
;
7002 if (kn
->kn_fflags
& NOTE_RESUME
) {
7003 kn
->kn_hook32
&= ~NOTE_SUSPEND
;
7006 knote_fill_kevent(kn
, kev
, data
);
7012 filt_sockattach(struct knote
*kn
, __unused
struct kevent_qos_s
*kev
)
7014 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
7018 if (KNOTE_ATTACH(&so
->so_klist
, kn
)) {
7019 so
->so_flags
|= SOF_KNOTE
;
7022 /* determine if event already fired */
7023 return filt_sockev_common(kn
, NULL
, so
, 0);
7027 filt_sockdetach(struct knote
*kn
)
7029 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
7032 if ((so
->so_flags
& SOF_KNOTE
) != 0) {
7033 if (KNOTE_DETACH(&so
->so_klist
, kn
)) {
7034 so
->so_flags
&= ~SOF_KNOTE
;
7037 socket_unlock(so
, 1);
7041 filt_sockev(struct knote
*kn
, long hint
)
7043 int ret
= 0, locked
= 0;
7044 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
7045 long ev_hint
= (hint
& SO_FILT_HINT_EV
);
7047 if ((hint
& SO_FILT_HINT_LOCKED
) == 0) {
7052 ret
= filt_sockev_common(kn
, NULL
, so
, ev_hint
);
7055 socket_unlock(so
, 1);
7064 * filt_socktouch - update event state
7069 struct kevent_qos_s
*kev
)
7071 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
7072 uint32_t changed_flags
;
7077 /* save off the [result] data and fflags */
7078 changed_flags
= (kn
->kn_sfflags
^ kn
->kn_hook32
);
7080 /* save off the new input fflags and data */
7081 kn
->kn_sfflags
= kev
->fflags
;
7082 kn
->kn_sdata
= kev
->data
;
7084 /* restrict the current results to the (smaller?) set of new interest */
7086 * For compatibility with previous implementations, we leave kn_fflags
7087 * as they were before.
7089 //kn->kn_fflags &= kev->fflags;
7092 * Since we keep track of events that are already
7093 * delivered, if any of those events are not requested
7094 * anymore the state related to them can be reset
7096 kn
->kn_hook32
&= ~(changed_flags
& EVFILT_SOCK_LEVEL_TRIGGER_MASK
);
7098 /* determine if we have events to deliver */
7099 ret
= filt_sockev_common(kn
, NULL
, so
, 0);
7101 socket_unlock(so
, 1);
7107 * filt_sockprocess - query event fired state and return data
7110 filt_sockprocess(struct knote
*kn
, struct kevent_qos_s
*kev
)
7112 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
7117 ret
= filt_sockev_common(kn
, kev
, so
, 0);
7119 socket_unlock(so
, 1);
7125 get_sockev_state(struct socket
*so
, u_int32_t
*statep
)
7127 u_int32_t state
= *(statep
);
7130 * If the state variable is already used by a previous event,
7137 if (so
->so_state
& SS_ISCONNECTED
) {
7138 state
|= SOCKEV_CONNECTED
;
7140 state
&= ~(SOCKEV_CONNECTED
);
7142 state
|= ((so
->so_state
& SS_ISDISCONNECTED
) ? SOCKEV_DISCONNECTED
: 0);
7146 #define SO_LOCK_HISTORY_STR_LEN \
7147 (2 * SO_LCKDBG_MAX * (2 + (2 * sizeof (void *)) + 1) + 1)
7149 __private_extern__
const char *
7150 solockhistory_nr(struct socket
*so
)
7154 static char lock_history_str
[SO_LOCK_HISTORY_STR_LEN
];
7156 bzero(lock_history_str
, sizeof(lock_history_str
));
7157 for (i
= SO_LCKDBG_MAX
- 1; i
>= 0; i
--) {
7158 n
+= scnprintf(lock_history_str
+ n
,
7159 SO_LOCK_HISTORY_STR_LEN
- n
, "%p:%p ",
7160 so
->lock_lr
[(so
->next_lock_lr
+ i
) % SO_LCKDBG_MAX
],
7161 so
->unlock_lr
[(so
->next_unlock_lr
+ i
) % SO_LCKDBG_MAX
]);
7163 return lock_history_str
;
7167 socket_getlock(struct socket
*so
, int flags
)
7169 if (so
->so_proto
->pr_getlock
!= NULL
) {
7170 return (*so
->so_proto
->pr_getlock
)(so
, flags
);
7172 return so
->so_proto
->pr_domain
->dom_mtx
;
7177 socket_lock(struct socket
*so
, int refcount
)
7181 lr_saved
= __builtin_return_address(0);
7183 if (so
->so_proto
->pr_lock
) {
7184 (*so
->so_proto
->pr_lock
)(so
, refcount
, lr_saved
);
7186 #ifdef MORE_LOCKING_DEBUG
7187 LCK_MTX_ASSERT(so
->so_proto
->pr_domain
->dom_mtx
,
7188 LCK_MTX_ASSERT_NOTOWNED
);
7190 lck_mtx_lock(so
->so_proto
->pr_domain
->dom_mtx
);
7194 so
->lock_lr
[so
->next_lock_lr
] = lr_saved
;
7195 so
->next_lock_lr
= (so
->next_lock_lr
+ 1) % SO_LCKDBG_MAX
;
7200 socket_lock_assert_owned(struct socket
*so
)
7202 lck_mtx_t
*mutex_held
;
7204 if (so
->so_proto
->pr_getlock
!= NULL
) {
7205 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
7207 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
7210 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
);
7214 socket_try_lock(struct socket
*so
)
7218 if (so
->so_proto
->pr_getlock
!= NULL
) {
7219 mtx
= (*so
->so_proto
->pr_getlock
)(so
, 0);
7221 mtx
= so
->so_proto
->pr_domain
->dom_mtx
;
7224 return lck_mtx_try_lock(mtx
);
7228 socket_unlock(struct socket
*so
, int refcount
)
7231 lck_mtx_t
*mutex_held
;
7233 lr_saved
= __builtin_return_address(0);
7235 if (so
== NULL
|| so
->so_proto
== NULL
) {
7236 panic("%s: null so_proto so=%p\n", __func__
, so
);
7240 if (so
->so_proto
->pr_unlock
) {
7241 (*so
->so_proto
->pr_unlock
)(so
, refcount
, lr_saved
);
7243 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
7244 #ifdef MORE_LOCKING_DEBUG
7245 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
);
7247 so
->unlock_lr
[so
->next_unlock_lr
] = lr_saved
;
7248 so
->next_unlock_lr
= (so
->next_unlock_lr
+ 1) % SO_LCKDBG_MAX
;
7251 if (so
->so_usecount
<= 0) {
7252 panic("%s: bad refcount=%d so=%p (%d, %d, %d) "
7253 "lrh=%s", __func__
, so
->so_usecount
, so
,
7254 SOCK_DOM(so
), so
->so_type
,
7255 SOCK_PROTO(so
), solockhistory_nr(so
));
7260 if (so
->so_usecount
== 0) {
7261 sofreelastref(so
, 1);
7264 lck_mtx_unlock(mutex_held
);
7268 /* Called with socket locked, will unlock socket */
7270 sofree(struct socket
*so
)
7272 lck_mtx_t
*mutex_held
;
7274 if (so
->so_proto
->pr_getlock
!= NULL
) {
7275 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
7277 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
7279 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
);
7281 sofreelastref(so
, 0);
7285 soreference(struct socket
*so
)
7287 socket_lock(so
, 1); /* locks & take one reference on socket */
7288 socket_unlock(so
, 0); /* unlock only */
7292 sodereference(struct socket
*so
)
7295 socket_unlock(so
, 1);
7299 * Set or clear SOF_MULTIPAGES on the socket to enable or disable the
7300 * possibility of using jumbo clusters. Caller must ensure to hold
7304 somultipages(struct socket
*so
, boolean_t set
)
7307 so
->so_flags
|= SOF_MULTIPAGES
;
7309 so
->so_flags
&= ~SOF_MULTIPAGES
;
7314 soif2kcl(struct socket
*so
, boolean_t set
)
7317 so
->so_flags1
|= SOF1_IF_2KCL
;
7319 so
->so_flags1
&= ~SOF1_IF_2KCL
;
7324 so_isdstlocal(struct socket
*so
)
7326 struct inpcb
*inp
= (struct inpcb
*)so
->so_pcb
;
7328 if (SOCK_DOM(so
) == PF_INET
) {
7329 return inaddr_local(inp
->inp_faddr
);
7330 } else if (SOCK_DOM(so
) == PF_INET6
) {
7331 return in6addr_local(&inp
->in6p_faddr
);
7338 sosetdefunct(struct proc
*p
, struct socket
*so
, int level
, boolean_t noforce
)
7340 struct sockbuf
*rcv
, *snd
;
7341 int err
= 0, defunct
;
7346 defunct
= (so
->so_flags
& SOF_DEFUNCT
);
7348 if (!(snd
->sb_flags
& rcv
->sb_flags
& SB_DROP
)) {
7349 panic("%s: SB_DROP not set", __func__
);
7355 if (so
->so_flags
& SOF_NODEFUNCT
) {
7358 if (p
!= PROC_NULL
) {
7359 SODEFUNCTLOG("%s[%d, %s]: (target pid %d "
7360 "name %s level %d) so 0x%llx [%d,%d] "
7361 "is not eligible for defunct "
7362 "(%d)\n", __func__
, proc_selfpid(),
7363 proc_best_name(current_proc()), proc_pid(p
),
7364 proc_best_name(p
), level
,
7365 (uint64_t)DEBUG_KERNEL_ADDRPERM(so
),
7366 SOCK_DOM(so
), SOCK_TYPE(so
), err
);
7370 so
->so_flags
&= ~SOF_NODEFUNCT
;
7371 if (p
!= PROC_NULL
) {
7372 SODEFUNCTLOG("%s[%d, %s]: (target pid %d "
7373 "name %s level %d) so 0x%llx [%d,%d] "
7375 "(%d)\n", __func__
, proc_selfpid(),
7376 proc_best_name(current_proc()), proc_pid(p
),
7377 proc_best_name(p
), level
,
7378 (uint64_t)DEBUG_KERNEL_ADDRPERM(so
),
7379 SOCK_DOM(so
), SOCK_TYPE(so
), err
);
7381 } else if (so
->so_flags1
& SOF1_EXTEND_BK_IDLE_WANTED
) {
7382 struct inpcb
*inp
= (struct inpcb
*)so
->so_pcb
;
7383 struct ifnet
*ifp
= inp
->inp_last_outifp
;
7385 if (ifp
&& IFNET_IS_CELLULAR(ifp
)) {
7386 OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_nocell
);
7387 } else if (so
->so_flags
& SOF_DELEGATED
) {
7388 OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_nodlgtd
);
7389 } else if (soextbkidlestat
.so_xbkidle_time
== 0) {
7390 OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_notime
);
7391 } else if (noforce
&& p
!= PROC_NULL
) {
7392 OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_active
);
7394 so
->so_flags1
|= SOF1_EXTEND_BK_IDLE_INPROG
;
7395 so
->so_extended_bk_start
= net_uptime();
7396 OSBitOrAtomic(P_LXBKIDLEINPROG
, &p
->p_ladvflag
);
7398 inpcb_timer_sched(inp
->inp_pcbinfo
, INPCB_TIMER_LAZY
);
7401 SODEFUNCTLOG("%s[%d, %s]: (target pid %d "
7402 "name %s level %d) so 0x%llx [%d,%d] "
7404 "(%d)\n", __func__
, proc_selfpid(),
7405 proc_best_name(current_proc()), proc_pid(p
),
7406 proc_best_name(p
), level
,
7407 (uint64_t)DEBUG_KERNEL_ADDRPERM(so
),
7408 SOCK_DOM(so
), SOCK_TYPE(so
), err
);
7411 OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_forced
);
7415 so
->so_flags
|= SOF_DEFUNCT
;
7417 /* Prevent further data from being appended to the socket buffers */
7418 snd
->sb_flags
|= SB_DROP
;
7419 rcv
->sb_flags
|= SB_DROP
;
7421 /* Flush any existing data in the socket buffers */
7422 if (rcv
->sb_cc
!= 0) {
7423 rcv
->sb_flags
&= ~SB_SEL
;
7424 selthreadclear(&rcv
->sb_sel
);
7427 if (snd
->sb_cc
!= 0) {
7428 snd
->sb_flags
&= ~SB_SEL
;
7429 selthreadclear(&snd
->sb_sel
);
7434 if (p
!= PROC_NULL
) {
7435 SODEFUNCTLOG("%s[%d, %s]: (target pid %d name %s level %d) "
7436 "so 0x%llx [%d,%d] %s defunct%s\n", __func__
,
7437 proc_selfpid(), proc_best_name(current_proc()),
7438 proc_pid(p
), proc_best_name(p
), level
,
7439 (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), SOCK_DOM(so
),
7440 SOCK_TYPE(so
), defunct
? "is already" : "marked as",
7441 (so
->so_flags1
& SOF1_EXTEND_BK_IDLE_WANTED
) ?
7448 sodefunct(struct proc
*p
, struct socket
*so
, int level
)
7450 struct sockbuf
*rcv
, *snd
;
7452 if (!(so
->so_flags
& SOF_DEFUNCT
)) {
7453 panic("%s improperly called", __func__
);
7456 if (so
->so_state
& SS_DEFUNCT
) {
7463 if (SOCK_DOM(so
) == PF_INET
|| SOCK_DOM(so
) == PF_INET6
) {
7464 char s
[MAX_IPv6_STR_LEN
];
7465 char d
[MAX_IPv6_STR_LEN
];
7466 struct inpcb
*inp
= sotoinpcb(so
);
7468 if (p
!= PROC_NULL
) {
7470 "%s[%d, %s]: (target pid %d name %s level %d) "
7471 "so 0x%llx [%s %s:%d -> %s:%d] is now defunct "
7472 "[rcv_si 0x%x, snd_si 0x%x, rcv_fl 0x%x, "
7473 " snd_fl 0x%x]\n", __func__
,
7474 proc_selfpid(), proc_best_name(current_proc()),
7475 proc_pid(p
), proc_best_name(p
), level
,
7476 (uint64_t)DEBUG_KERNEL_ADDRPERM(so
),
7477 (SOCK_TYPE(so
) == SOCK_STREAM
) ? "TCP" : "UDP",
7478 inet_ntop(SOCK_DOM(so
), ((SOCK_DOM(so
) == PF_INET
) ?
7479 (void *)&inp
->inp_laddr
.s_addr
:
7480 (void *)&inp
->in6p_laddr
),
7481 s
, sizeof(s
)), ntohs(inp
->in6p_lport
),
7482 inet_ntop(SOCK_DOM(so
), (SOCK_DOM(so
) == PF_INET
) ?
7483 (void *)&inp
->inp_faddr
.s_addr
:
7484 (void *)&inp
->in6p_faddr
,
7485 d
, sizeof(d
)), ntohs(inp
->in6p_fport
),
7486 (uint32_t)rcv
->sb_sel
.si_flags
,
7487 (uint32_t)snd
->sb_sel
.si_flags
,
7488 rcv
->sb_flags
, snd
->sb_flags
);
7490 } else if (p
!= PROC_NULL
) {
7491 SODEFUNCTLOG("%s[%d, %s]: (target pid %d name %s level %d) "
7492 "so 0x%llx [%d,%d] is now defunct [rcv_si 0x%x, "
7493 "snd_si 0x%x, rcv_fl 0x%x, snd_fl 0x%x]\n", __func__
,
7494 proc_selfpid(), proc_best_name(current_proc()),
7495 proc_pid(p
), proc_best_name(p
), level
,
7496 (uint64_t)DEBUG_KERNEL_ADDRPERM(so
),
7497 SOCK_DOM(so
), SOCK_TYPE(so
),
7498 (uint32_t)rcv
->sb_sel
.si_flags
,
7499 (uint32_t)snd
->sb_sel
.si_flags
, rcv
->sb_flags
,
7504 * Unwedge threads blocked on sbwait() and sb_lock().
7509 so
->so_flags1
|= SOF1_DEFUNCTINPROG
;
7510 if (rcv
->sb_flags
& SB_LOCK
) {
7511 sbunlock(rcv
, TRUE
); /* keep socket locked */
7513 if (snd
->sb_flags
& SB_LOCK
) {
7514 sbunlock(snd
, TRUE
); /* keep socket locked */
7517 * Flush the buffers and disconnect. We explicitly call shutdown
7518 * on both data directions to ensure that SS_CANT{RCV,SEND}MORE
7519 * states are set for the socket. This would also flush out data
7520 * hanging off the receive list of this socket.
7522 (void) soshutdownlock_final(so
, SHUT_RD
);
7523 (void) soshutdownlock_final(so
, SHUT_WR
);
7524 (void) sodisconnectlocked(so
);
7527 * Explicitly handle connectionless-protocol disconnection
7528 * and release any remaining data in the socket buffers.
7530 if (!(so
->so_state
& SS_ISDISCONNECTED
)) {
7531 (void) soisdisconnected(so
);
7534 if (so
->so_error
== 0) {
7535 so
->so_error
= EBADF
;
7538 if (rcv
->sb_cc
!= 0) {
7539 rcv
->sb_flags
&= ~SB_SEL
;
7540 selthreadclear(&rcv
->sb_sel
);
7543 if (snd
->sb_cc
!= 0) {
7544 snd
->sb_flags
&= ~SB_SEL
;
7545 selthreadclear(&snd
->sb_sel
);
7548 so
->so_state
|= SS_DEFUNCT
;
7549 OSIncrementAtomicLong((volatile long *)&sodefunct_calls
);
7556 soresume(struct proc
*p
, struct socket
*so
, int locked
)
7562 if (so
->so_flags1
& SOF1_EXTEND_BK_IDLE_INPROG
) {
7563 SODEFUNCTLOG("%s[%d, %s]: (target pid %d name %s) so 0x%llx "
7564 "[%d,%d] resumed from bk idle\n",
7565 __func__
, proc_selfpid(), proc_best_name(current_proc()),
7566 proc_pid(p
), proc_best_name(p
),
7567 (uint64_t)DEBUG_KERNEL_ADDRPERM(so
),
7568 SOCK_DOM(so
), SOCK_TYPE(so
));
7570 so
->so_flags1
&= ~SOF1_EXTEND_BK_IDLE_INPROG
;
7571 so
->so_extended_bk_start
= 0;
7572 OSBitAndAtomic(~P_LXBKIDLEINPROG
, &p
->p_ladvflag
);
7574 OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_resumed
);
7575 OSDecrementAtomic(&soextbkidlestat
.so_xbkidle_active
);
7576 VERIFY(soextbkidlestat
.so_xbkidle_active
>= 0);
7579 socket_unlock(so
, 1);
7586 * Does not attempt to account for sockets that are delegated from
7587 * the current process
7590 so_set_extended_bk_idle(struct socket
*so
, int optval
)
7594 if ((SOCK_DOM(so
) != PF_INET
&& SOCK_DOM(so
) != PF_INET6
) ||
7595 SOCK_PROTO(so
) != IPPROTO_TCP
) {
7596 OSDecrementAtomic(&soextbkidlestat
.so_xbkidle_notsupp
);
7598 } else if (optval
== 0) {
7599 so
->so_flags1
&= ~SOF1_EXTEND_BK_IDLE_WANTED
;
7601 soresume(current_proc(), so
, 1);
7603 struct proc
*p
= current_proc();
7605 struct filedesc
*fdp
;
7609 * Unlock socket to avoid lock ordering issue with
7610 * the proc fd table lock
7612 socket_unlock(so
, 0);
7617 for (i
= 0; i
< fdp
->fd_nfiles
; i
++) {
7618 struct fileproc
*fp
= fdp
->fd_ofiles
[i
];
7622 (fdp
->fd_ofileflags
[i
] & UF_RESERVED
) != 0 ||
7623 FILEGLOB_DTYPE(fp
->f_fglob
) != DTYPE_SOCKET
) {
7627 so2
= (struct socket
*)fp
->f_fglob
->fg_data
;
7629 so2
->so_flags1
& SOF1_EXTEND_BK_IDLE_WANTED
) {
7632 if (count
>= soextbkidlestat
.so_xbkidle_maxperproc
) {
7640 if (count
>= soextbkidlestat
.so_xbkidle_maxperproc
) {
7641 OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_toomany
);
7643 } else if (so
->so_flags
& SOF_DELEGATED
) {
7644 OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_nodlgtd
);
7647 so
->so_flags1
|= SOF1_EXTEND_BK_IDLE_WANTED
;
7648 OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_wantok
);
7650 SODEFUNCTLOG("%s[%d, %s]: so 0x%llx [%d,%d] "
7651 "%s marked for extended bk idle\n",
7652 __func__
, proc_selfpid(), proc_best_name(current_proc()),
7653 (uint64_t)DEBUG_KERNEL_ADDRPERM(so
),
7654 SOCK_DOM(so
), SOCK_TYPE(so
),
7655 (so
->so_flags1
& SOF1_EXTEND_BK_IDLE_WANTED
) ?
7663 so_stop_extended_bk_idle(struct socket
*so
)
7665 so
->so_flags1
&= ~SOF1_EXTEND_BK_IDLE_INPROG
;
7666 so
->so_extended_bk_start
= 0;
7668 OSDecrementAtomic(&soextbkidlestat
.so_xbkidle_active
);
7669 VERIFY(soextbkidlestat
.so_xbkidle_active
>= 0);
7673 sosetdefunct(current_proc(), so
,
7674 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL
, FALSE
);
7675 if (so
->so_flags
& SOF_DEFUNCT
) {
7676 sodefunct(current_proc(), so
,
7677 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL
);
7682 so_drain_extended_bk_idle(struct socket
*so
)
7684 if (so
&& (so
->so_flags1
& SOF1_EXTEND_BK_IDLE_INPROG
)) {
7686 * Only penalize sockets that have outstanding data
7688 if (so
->so_rcv
.sb_cc
|| so
->so_snd
.sb_cc
) {
7689 so_stop_extended_bk_idle(so
);
7691 OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_drained
);
7697 * Return values tells if socket is still in extended background idle
7700 so_check_extended_bk_idle_time(struct socket
*so
)
7704 if ((so
->so_flags1
& SOF1_EXTEND_BK_IDLE_INPROG
)) {
7705 SODEFUNCTLOG("%s[%d, %s]: so 0x%llx [%d,%d]\n",
7706 __func__
, proc_selfpid(), proc_best_name(current_proc()),
7707 (uint64_t)DEBUG_KERNEL_ADDRPERM(so
),
7708 SOCK_DOM(so
), SOCK_TYPE(so
));
7709 if (net_uptime() - so
->so_extended_bk_start
>
7710 soextbkidlestat
.so_xbkidle_time
) {
7711 so_stop_extended_bk_idle(so
);
7713 OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_expired
);
7717 struct inpcb
*inp
= (struct inpcb
*)so
->so_pcb
;
7719 inpcb_timer_sched(inp
->inp_pcbinfo
, INPCB_TIMER_LAZY
);
7720 OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_resched
);
7728 resume_proc_sockets(proc_t p
)
7730 if (p
->p_ladvflag
& P_LXBKIDLEINPROG
) {
7731 struct filedesc
*fdp
;
7736 for (i
= 0; i
< fdp
->fd_nfiles
; i
++) {
7737 struct fileproc
*fp
;
7740 fp
= fdp
->fd_ofiles
[i
];
7742 (fdp
->fd_ofileflags
[i
] & UF_RESERVED
) != 0 ||
7743 FILEGLOB_DTYPE(fp
->f_fglob
) != DTYPE_SOCKET
) {
7747 so
= (struct socket
*)fp
->f_fglob
->fg_data
;
7748 (void) soresume(p
, so
, 0);
7752 OSBitAndAtomic(~P_LXBKIDLEINPROG
, &p
->p_ladvflag
);
7756 __private_extern__
int
7757 so_set_recv_anyif(struct socket
*so
, int optval
)
7762 if (SOCK_DOM(so
) == PF_INET
|| SOCK_DOM(so
) == PF_INET6
) {
7764 if (SOCK_DOM(so
) == PF_INET
) {
7767 sotoinpcb(so
)->inp_flags
|= INP_RECV_ANYIF
;
7769 sotoinpcb(so
)->inp_flags
&= ~INP_RECV_ANYIF
;
7777 __private_extern__
int
7778 so_get_recv_anyif(struct socket
*so
)
7783 if (SOCK_DOM(so
) == PF_INET
|| SOCK_DOM(so
) == PF_INET6
) {
7785 if (SOCK_DOM(so
) == PF_INET
) {
7787 ret
= (sotoinpcb(so
)->inp_flags
& INP_RECV_ANYIF
) ? 1 : 0;
7794 so_set_restrictions(struct socket
*so
, uint32_t vals
)
7796 int nocell_old
, nocell_new
;
7797 int noexpensive_old
, noexpensive_new
;
7798 int noconstrained_old
, noconstrained_new
;
7801 * Deny-type restrictions are trapdoors; once set they cannot be
7802 * unset for the lifetime of the socket. This allows them to be
7803 * issued by a framework on behalf of the application without
7804 * having to worry that they can be undone.
7806 * Note here that socket-level restrictions overrides any protocol
7807 * level restrictions. For instance, SO_RESTRICT_DENY_CELLULAR
7808 * socket restriction issued on the socket has a higher precendence
7809 * than INP_NO_IFT_CELLULAR. The latter is affected by the UUID
7810 * policy PROC_UUID_NO_CELLULAR for unrestricted sockets only,
7811 * i.e. when SO_RESTRICT_DENY_CELLULAR has not been issued.
7813 nocell_old
= (so
->so_restrictions
& SO_RESTRICT_DENY_CELLULAR
);
7814 noexpensive_old
= (so
->so_restrictions
& SO_RESTRICT_DENY_EXPENSIVE
);
7815 noconstrained_old
= (so
->so_restrictions
& SO_RESTRICT_DENY_CONSTRAINED
);
7816 so
->so_restrictions
|= (vals
& (SO_RESTRICT_DENY_IN
|
7817 SO_RESTRICT_DENY_OUT
| SO_RESTRICT_DENY_CELLULAR
|
7818 SO_RESTRICT_DENY_EXPENSIVE
| SO_RESTRICT_DENY_CONSTRAINED
));
7819 nocell_new
= (so
->so_restrictions
& SO_RESTRICT_DENY_CELLULAR
);
7820 noexpensive_new
= (so
->so_restrictions
& SO_RESTRICT_DENY_EXPENSIVE
);
7821 noconstrained_new
= (so
->so_restrictions
& SO_RESTRICT_DENY_CONSTRAINED
);
7823 /* we can only set, not clear restrictions */
7824 if ((nocell_new
- nocell_old
) == 0 &&
7825 (noexpensive_new
- noexpensive_old
) == 0 &&
7826 (noconstrained_new
- noconstrained_old
) == 0) {
7830 if (SOCK_DOM(so
) == PF_INET
|| SOCK_DOM(so
) == PF_INET6
) {
7832 if (SOCK_DOM(so
) == PF_INET
) {
7834 if (nocell_new
- nocell_old
!= 0) {
7836 * if deny cellular is now set, do what's needed
7839 inp_set_nocellular(sotoinpcb(so
));
7841 if (noexpensive_new
- noexpensive_old
!= 0) {
7842 inp_set_noexpensive(sotoinpcb(so
));
7844 if (noconstrained_new
- noconstrained_old
!= 0) {
7845 inp_set_noconstrained(sotoinpcb(so
));
7849 if (SOCK_DOM(so
) == PF_MULTIPATH
) {
7850 mptcp_set_restrictions(so
);
7857 so_get_restrictions(struct socket
*so
)
7859 return so
->so_restrictions
& (SO_RESTRICT_DENY_IN
|
7860 SO_RESTRICT_DENY_OUT
|
7861 SO_RESTRICT_DENY_CELLULAR
| SO_RESTRICT_DENY_EXPENSIVE
);
7865 so_set_effective_pid(struct socket
*so
, int epid
, struct proc
*p
, boolean_t check_cred
)
7867 struct proc
*ep
= PROC_NULL
;
7870 /* pid 0 is reserved for kernel */
7877 * If this is an in-kernel socket, prevent its delegate
7878 * association from changing unless the socket option is
7879 * coming from within the kernel itself.
7881 if (so
->last_pid
== 0 && p
!= kernproc
) {
7887 * If this is issued by a process that's recorded as the
7888 * real owner of the socket, or if the pid is the same as
7889 * the process's own pid, then proceed. Otherwise ensure
7890 * that the issuing process has the necessary privileges.
7892 if (check_cred
&& (epid
!= so
->last_pid
|| epid
!= proc_pid(p
))) {
7893 if ((error
= priv_check_cred(kauth_cred_get(),
7894 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE
, 0))) {
7900 /* Find the process that corresponds to the effective pid */
7901 if ((ep
= proc_find(epid
)) == PROC_NULL
) {
7907 * If a process tries to delegate the socket to itself, then
7908 * there's really nothing to do; treat it as a way for the
7909 * delegate association to be cleared. Note that we check
7910 * the passed-in proc rather than calling proc_selfpid(),
7911 * as we need to check the process issuing the socket option
7912 * which could be kernproc. Given that we don't allow 0 for
7913 * effective pid, it means that a delegated in-kernel socket
7914 * stays delegated during its lifetime (which is probably OK.)
7916 if (epid
== proc_pid(p
)) {
7917 so
->so_flags
&= ~SOF_DELEGATED
;
7920 uuid_clear(so
->e_uuid
);
7922 so
->so_flags
|= SOF_DELEGATED
;
7923 so
->e_upid
= proc_uniqueid(ep
);
7924 so
->e_pid
= proc_pid(ep
);
7925 proc_getexecutableuuid(ep
, so
->e_uuid
, sizeof(so
->e_uuid
));
7927 #if defined(XNU_TARGET_OS_OSX)
7928 if (ep
->p_responsible_pid
!= so
->e_pid
) {
7929 proc_t rp
= proc_find(ep
->p_responsible_pid
);
7930 if (rp
!= PROC_NULL
) {
7931 proc_getexecutableuuid(rp
, so
->so_ruuid
, sizeof(so
->so_ruuid
));
7932 so
->so_rpid
= ep
->p_responsible_pid
;
7935 uuid_clear(so
->so_ruuid
);
7941 if (so
->so_proto
!= NULL
&& so
->so_proto
->pr_update_last_owner
!= NULL
) {
7942 (*so
->so_proto
->pr_update_last_owner
)(so
, NULL
, ep
);
7945 if (error
== 0 && net_io_policy_log
) {
7948 uuid_unparse(so
->e_uuid
, buf
);
7949 log(LOG_DEBUG
, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d (%s) "
7950 "euuid %s%s\n", __func__
, proc_name_address(p
),
7951 proc_pid(p
), (uint64_t)DEBUG_KERNEL_ADDRPERM(so
),
7952 SOCK_DOM(so
), SOCK_TYPE(so
),
7953 so
->e_pid
, proc_name_address(ep
), buf
,
7954 ((so
->so_flags
& SOF_DELEGATED
) ? " [delegated]" : ""));
7955 } else if (error
!= 0 && net_io_policy_log
) {
7956 log(LOG_ERR
, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d (%s) "
7957 "ERROR (%d)\n", __func__
, proc_name_address(p
),
7958 proc_pid(p
), (uint64_t)DEBUG_KERNEL_ADDRPERM(so
),
7959 SOCK_DOM(so
), SOCK_TYPE(so
),
7960 epid
, (ep
== PROC_NULL
) ? "PROC_NULL" :
7961 proc_name_address(ep
), error
);
7964 /* Update this socket's policy upon success */
7966 so
->so_policy_gencnt
*= -1;
7967 so_update_policy(so
);
7969 so_update_necp_policy(so
, NULL
, NULL
);
7973 if (ep
!= PROC_NULL
) {
7981 so_set_effective_uuid(struct socket
*so
, uuid_t euuid
, struct proc
*p
, boolean_t check_cred
)
7987 /* UUID must not be all-zeroes (reserved for kernel) */
7988 if (uuid_is_null(euuid
)) {
7994 * If this is an in-kernel socket, prevent its delegate
7995 * association from changing unless the socket option is
7996 * coming from within the kernel itself.
7998 if (so
->last_pid
== 0 && p
!= kernproc
) {
8003 /* Get the UUID of the issuing process */
8004 proc_getexecutableuuid(p
, uuid
, sizeof(uuid
));
8007 * If this is issued by a process that's recorded as the
8008 * real owner of the socket, or if the uuid is the same as
8009 * the process's own uuid, then proceed. Otherwise ensure
8010 * that the issuing process has the necessary privileges.
8013 (uuid_compare(euuid
, so
->last_uuid
) != 0 ||
8014 uuid_compare(euuid
, uuid
) != 0)) {
8015 if ((error
= priv_check_cred(kauth_cred_get(),
8016 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE
, 0))) {
8023 * If a process tries to delegate the socket to itself, then
8024 * there's really nothing to do; treat it as a way for the
8025 * delegate association to be cleared. Note that we check
8026 * the uuid of the passed-in proc rather than that of the
8027 * current process, as we need to check the process issuing
8028 * the socket option which could be kernproc itself. Given
8029 * that we don't allow 0 for effective uuid, it means that
8030 * a delegated in-kernel socket stays delegated during its
8031 * lifetime (which is okay.)
8033 if (uuid_compare(euuid
, uuid
) == 0) {
8034 so
->so_flags
&= ~SOF_DELEGATED
;
8037 uuid_clear(so
->e_uuid
);
8039 so
->so_flags
|= SOF_DELEGATED
;
8041 * Unlike so_set_effective_pid(), we only have the UUID
8042 * here and the process ID is not known. Inherit the
8043 * real {pid,upid} of the socket.
8045 so
->e_upid
= so
->last_upid
;
8046 so
->e_pid
= so
->last_pid
;
8047 uuid_copy(so
->e_uuid
, euuid
);
8050 * The following will clear the effective process name as it's the same
8051 * as the real process
8053 if (so
->so_proto
!= NULL
&& so
->so_proto
->pr_update_last_owner
!= NULL
) {
8054 (*so
->so_proto
->pr_update_last_owner
)(so
, NULL
, NULL
);
8057 if (error
== 0 && net_io_policy_log
) {
8058 uuid_unparse(so
->e_uuid
, buf
);
8059 log(LOG_DEBUG
, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d "
8060 "euuid %s%s\n", __func__
, proc_name_address(p
), proc_pid(p
),
8061 (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), SOCK_DOM(so
),
8062 SOCK_TYPE(so
), so
->e_pid
, buf
,
8063 ((so
->so_flags
& SOF_DELEGATED
) ? " [delegated]" : ""));
8064 } else if (error
!= 0 && net_io_policy_log
) {
8065 uuid_unparse(euuid
, buf
);
8066 log(LOG_DEBUG
, "%s[%s,%d]: so 0x%llx [%d,%d] euuid %s "
8067 "ERROR (%d)\n", __func__
, proc_name_address(p
), proc_pid(p
),
8068 (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), SOCK_DOM(so
),
8069 SOCK_TYPE(so
), buf
, error
);
8072 /* Update this socket's policy upon success */
8074 so
->so_policy_gencnt
*= -1;
8075 so_update_policy(so
);
8077 so_update_necp_policy(so
, NULL
, NULL
);
8085 netpolicy_post_msg(uint32_t ev_code
, struct netpolicy_event_data
*ev_data
,
8086 uint32_t ev_datalen
)
8088 struct kev_msg ev_msg
;
8091 * A netpolicy event always starts with a netpolicy_event_data
8092 * structure, but the caller can provide for a longer event
8093 * structure to post, depending on the event code.
8095 VERIFY(ev_data
!= NULL
&& ev_datalen
>= sizeof(*ev_data
));
8097 bzero(&ev_msg
, sizeof(ev_msg
));
8098 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
8099 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
8100 ev_msg
.kev_subclass
= KEV_NETPOLICY_SUBCLASS
;
8101 ev_msg
.event_code
= ev_code
;
8103 ev_msg
.dv
[0].data_ptr
= ev_data
;
8104 ev_msg
.dv
[0].data_length
= ev_datalen
;
8106 kev_post_msg(&ev_msg
);
8110 socket_post_kev_msg(uint32_t ev_code
,
8111 struct kev_socket_event_data
*ev_data
,
8112 uint32_t ev_datalen
)
8114 struct kev_msg ev_msg
;
8116 bzero(&ev_msg
, sizeof(ev_msg
));
8117 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
8118 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
8119 ev_msg
.kev_subclass
= KEV_SOCKET_SUBCLASS
;
8120 ev_msg
.event_code
= ev_code
;
8122 ev_msg
.dv
[0].data_ptr
= ev_data
;
8123 ev_msg
.dv
[0].data_length
= ev_datalen
;
8125 kev_post_msg(&ev_msg
);
8129 socket_post_kev_msg_closed(struct socket
*so
)
8131 struct kev_socket_closed ev
;
8132 struct sockaddr
*socksa
= NULL
, *peersa
= NULL
;
8134 bzero(&ev
, sizeof(ev
));
8135 err
= (*so
->so_proto
->pr_usrreqs
->pru_sockaddr
)(so
, &socksa
);
8137 err
= (*so
->so_proto
->pr_usrreqs
->pru_peeraddr
)(so
,
8140 memcpy(&ev
.ev_data
.kev_sockname
, socksa
,
8142 sizeof(ev
.ev_data
.kev_sockname
)));
8143 memcpy(&ev
.ev_data
.kev_peername
, peersa
,
8145 sizeof(ev
.ev_data
.kev_peername
)));
8146 socket_post_kev_msg(KEV_SOCKET_CLOSED
,
8147 &ev
.ev_data
, sizeof(ev
));
8150 if (socksa
!= NULL
) {
8151 FREE(socksa
, M_SONAME
);
8153 if (peersa
!= NULL
) {
8154 FREE(peersa
, M_SONAME
);