2 * Copyright (c) 1998-2015 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
30 * Copyright (c) 1982, 1986, 1988, 1990, 1993
31 * The Regents of the University of California. All rights reserved.
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. All advertising materials mentioning features or use of this software
42 * must display the following acknowledgement:
43 * This product includes software developed by the University of
44 * California, Berkeley and its contributors.
45 * 4. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
64 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
65 * support for mandatory and extensible security protections. This notice
66 * is included in support of clause 2.2 (b) of the Apple Public License,
70 #include <sys/param.h>
71 #include <sys/systm.h>
72 #include <sys/filedesc.h>
74 #include <sys/proc_internal.h>
75 #include <sys/kauth.h>
76 #include <sys/file_internal.h>
77 #include <sys/fcntl.h>
78 #include <sys/malloc.h>
80 #include <sys/domain.h>
81 #include <sys/kernel.h>
82 #include <sys/event.h>
84 #include <sys/protosw.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/resourcevar.h>
88 #include <sys/signalvar.h>
89 #include <sys/sysctl.h>
90 #include <sys/syslog.h>
92 #include <sys/uio_internal.h>
94 #include <sys/kdebug.h>
98 #include <sys/kern_event.h>
99 #include <net/route.h>
100 #include <net/init.h>
101 #include <net/ntstat.h>
102 #include <net/content_filter.h>
103 #include <netinet/in.h>
104 #include <netinet/in_pcb.h>
105 #include <netinet/ip6.h>
106 #include <netinet6/ip6_var.h>
107 #include <netinet/flow_divert.h>
108 #include <kern/zalloc.h>
109 #include <kern/locks.h>
110 #include <machine/limits.h>
111 #include <libkern/OSAtomic.h>
112 #include <pexpert/pexpert.h>
113 #include <kern/assert.h>
114 #include <kern/task.h>
115 #include <sys/kpi_mbuf.h>
116 #include <sys/mcache.h>
117 #include <sys/unpcb.h>
120 #include <security/mac.h>
121 #include <security/mac_framework.h>
125 #include <netinet/mp_pcb.h>
126 #include <netinet/mptcp_var.h>
127 #endif /* MULTIPATH */
129 /* TODO: this should be in a header file somewhere */
130 extern char *proc_name_address(void *p
);
132 static u_int32_t so_cache_hw
; /* High water mark for socache */
133 static u_int32_t so_cache_timeouts
; /* number of timeouts */
134 static u_int32_t so_cache_max_freed
; /* max freed per timeout */
135 static u_int32_t cached_sock_count
= 0;
136 STAILQ_HEAD(, socket
) so_cache_head
;
137 int max_cached_sock_count
= MAX_CACHED_SOCKETS
;
138 static u_int32_t so_cache_time
;
139 static int socketinit_done
;
140 static struct zone
*so_cache_zone
;
142 static lck_grp_t
*so_cache_mtx_grp
;
143 static lck_attr_t
*so_cache_mtx_attr
;
144 static lck_grp_attr_t
*so_cache_mtx_grp_attr
;
145 static lck_mtx_t
*so_cache_mtx
;
147 #include <machine/limits.h>
149 static void filt_sordetach(struct knote
*kn
);
150 static int filt_soread(struct knote
*kn
, long hint
);
151 static void filt_sowdetach(struct knote
*kn
);
152 static int filt_sowrite(struct knote
*kn
, long hint
);
153 static void filt_sockdetach(struct knote
*kn
);
154 static int filt_sockev(struct knote
*kn
, long hint
);
156 static int sooptcopyin_timeval(struct sockopt
*, struct timeval
*);
157 static int sooptcopyout_timeval(struct sockopt
*, const struct timeval
*);
159 static struct filterops soread_filtops
= {
161 .f_detach
= filt_sordetach
,
162 .f_event
= filt_soread
,
165 static struct filterops sowrite_filtops
= {
167 .f_detach
= filt_sowdetach
,
168 .f_event
= filt_sowrite
,
171 static struct filterops sock_filtops
= {
173 .f_detach
= filt_sockdetach
,
174 .f_event
= filt_sockev
,
177 SYSCTL_DECL(_kern_ipc
);
179 #define EVEN_MORE_LOCKING_DEBUG 0
181 int socket_debug
= 0;
182 SYSCTL_INT(_kern_ipc
, OID_AUTO
, socket_debug
,
183 CTLFLAG_RW
| CTLFLAG_LOCKED
, &socket_debug
, 0, "");
185 static int socket_zone
= M_SOCKET
;
186 so_gen_t so_gencnt
; /* generation count for sockets */
188 MALLOC_DEFINE(M_SONAME
, "soname", "socket name");
189 MALLOC_DEFINE(M_PCB
, "pcb", "protocol control block");
191 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
192 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
193 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
194 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
195 #define DBG_FNC_SOSEND NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1)
196 #define DBG_FNC_SOSEND_LIST NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 3)
197 #define DBG_FNC_SORECEIVE NETDBG_CODE(DBG_NETSOCK, (8 << 8))
198 #define DBG_FNC_SORECEIVE_LIST NETDBG_CODE(DBG_NETSOCK, (8 << 8) | 3)
199 #define DBG_FNC_SOSHUTDOWN NETDBG_CODE(DBG_NETSOCK, (9 << 8))
201 #define MAX_SOOPTGETM_SIZE (128 * MCLBYTES)
203 int somaxconn
= SOMAXCONN
;
204 SYSCTL_INT(_kern_ipc
, KIPC_SOMAXCONN
, somaxconn
,
205 CTLFLAG_RW
| CTLFLAG_LOCKED
, &somaxconn
, 0, "");
207 /* Should we get a maximum also ??? */
208 static int sosendmaxchain
= 65536;
209 static int sosendminchain
= 16384;
210 static int sorecvmincopy
= 16384;
211 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sosendminchain
,
212 CTLFLAG_RW
| CTLFLAG_LOCKED
, &sosendminchain
, 0, "");
213 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sorecvmincopy
,
214 CTLFLAG_RW
| CTLFLAG_LOCKED
, &sorecvmincopy
, 0, "");
217 * Set to enable jumbo clusters (if available) for large writes when
218 * the socket is marked with SOF_MULTIPAGES; see below.
221 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sosendjcl
,
222 CTLFLAG_RW
| CTLFLAG_LOCKED
, &sosendjcl
, 0, "");
225 * Set this to ignore SOF_MULTIPAGES and use jumbo clusters for large
226 * writes on the socket for all protocols on any network interfaces,
227 * depending upon sosendjcl above. Be extra careful when setting this
228 * to 1, because sending down packets that cross physical pages down to
229 * broken drivers (those that falsely assume that the physical pages
230 * are contiguous) might lead to system panics or silent data corruption.
231 * When set to 0, the system will respect SOF_MULTIPAGES, which is set
232 * only for TCP sockets whose outgoing interface is IFNET_MULTIPAGES
233 * capable. Set this to 1 only for testing/debugging purposes.
235 int sosendjcl_ignore_capab
= 0;
236 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sosendjcl_ignore_capab
,
237 CTLFLAG_RW
| CTLFLAG_LOCKED
, &sosendjcl_ignore_capab
, 0, "");
239 int sosendbigcl_ignore_capab
= 0;
240 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sosendbigcl_ignore_capab
,
241 CTLFLAG_RW
| CTLFLAG_LOCKED
, &sosendbigcl_ignore_capab
, 0, "");
243 int sodefunctlog
= 0;
244 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sodefunctlog
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
245 &sodefunctlog
, 0, "");
247 int sothrottlelog
= 0;
248 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sothrottlelog
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
249 &sothrottlelog
, 0, "");
251 int sorestrictrecv
= 1;
252 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sorestrictrecv
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
253 &sorestrictrecv
, 0, "Enable inbound interface restrictions");
255 int sorestrictsend
= 1;
256 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sorestrictsend
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
257 &sorestrictsend
, 0, "Enable outbound interface restrictions");
259 extern struct inpcbinfo tcbinfo
;
261 /* TODO: these should be in header file */
262 extern int get_inpcb_str_size(void);
263 extern int get_tcp_str_size(void);
265 static unsigned int sl_zone_size
; /* size of sockaddr_list */
266 static struct zone
*sl_zone
; /* zone for sockaddr_list */
268 static unsigned int se_zone_size
; /* size of sockaddr_entry */
269 static struct zone
*se_zone
; /* zone for sockaddr_entry */
271 vm_size_t so_cache_zone_element_size
;
273 static int sodelayed_copy(struct socket
*, struct uio
*, struct mbuf
**, user_ssize_t
*);
274 static void cached_sock_alloc(struct socket
**, int);
275 static void cached_sock_free(struct socket
*);
278 * SOTCDB_NO_DSCP is set by default, to prevent the networking stack from
279 * setting the DSCP code on the packet based on the service class; see
280 * <rdar://problem/11277343> for details.
282 __private_extern__ u_int32_t sotcdb
= SOTCDB_NO_DSCP
;
283 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sotcdb
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
289 _CASSERT(sizeof(so_gencnt
) == sizeof(uint64_t));
290 VERIFY(IS_P2ALIGNED(&so_gencnt
, sizeof(uint32_t)));
292 if (socketinit_done
) {
293 printf("socketinit: already called...\n");
298 PE_parse_boot_argn("socket_debug", &socket_debug
,
299 sizeof (socket_debug
));
302 * allocate lock group attribute and group for socket cache mutex
304 so_cache_mtx_grp_attr
= lck_grp_attr_alloc_init();
305 so_cache_mtx_grp
= lck_grp_alloc_init("so_cache",
306 so_cache_mtx_grp_attr
);
309 * allocate the lock attribute for socket cache mutex
311 so_cache_mtx_attr
= lck_attr_alloc_init();
313 /* cached sockets mutex */
314 so_cache_mtx
= lck_mtx_alloc_init(so_cache_mtx_grp
, so_cache_mtx_attr
);
315 if (so_cache_mtx
== NULL
) {
316 panic("%s: unable to allocate so_cache_mtx\n", __func__
);
319 STAILQ_INIT(&so_cache_head
);
321 so_cache_zone_element_size
= (vm_size_t
)(sizeof (struct socket
) + 4
322 + get_inpcb_str_size() + 4 + get_tcp_str_size());
324 so_cache_zone
= zinit(so_cache_zone_element_size
,
325 (120000 * so_cache_zone_element_size
), 8192, "socache zone");
326 zone_change(so_cache_zone
, Z_CALLERACCT
, FALSE
);
327 zone_change(so_cache_zone
, Z_NOENCRYPT
, TRUE
);
329 sl_zone_size
= sizeof (struct sockaddr_list
);
330 if ((sl_zone
= zinit(sl_zone_size
, 1024 * sl_zone_size
, 1024,
331 "sockaddr_list")) == NULL
) {
332 panic("%s: unable to allocate sockaddr_list zone\n", __func__
);
335 zone_change(sl_zone
, Z_CALLERACCT
, FALSE
);
336 zone_change(sl_zone
, Z_EXPAND
, TRUE
);
338 se_zone_size
= sizeof (struct sockaddr_entry
);
339 if ((se_zone
= zinit(se_zone_size
, 1024 * se_zone_size
, 1024,
340 "sockaddr_entry")) == NULL
) {
341 panic("%s: unable to allocate sockaddr_entry zone\n", __func__
);
344 zone_change(se_zone
, Z_CALLERACCT
, FALSE
);
345 zone_change(se_zone
, Z_EXPAND
, TRUE
);
350 socket_tclass_init();
353 #endif /* MULTIPATH */
357 cached_sock_alloc(struct socket
**so
, int waitok
)
362 lck_mtx_lock(so_cache_mtx
);
364 if (!STAILQ_EMPTY(&so_cache_head
)) {
365 VERIFY(cached_sock_count
> 0);
367 *so
= STAILQ_FIRST(&so_cache_head
);
368 STAILQ_REMOVE_HEAD(&so_cache_head
, so_cache_ent
);
369 STAILQ_NEXT((*so
), so_cache_ent
) = NULL
;
372 lck_mtx_unlock(so_cache_mtx
);
374 temp
= (*so
)->so_saved_pcb
;
375 bzero((caddr_t
)*so
, sizeof (struct socket
));
377 (*so
)->so_saved_pcb
= temp
;
380 lck_mtx_unlock(so_cache_mtx
);
383 *so
= (struct socket
*)zalloc(so_cache_zone
);
385 *so
= (struct socket
*)zalloc_noblock(so_cache_zone
);
390 bzero((caddr_t
)*so
, sizeof (struct socket
));
393 * Define offsets for extra structures into our
394 * single block of memory. Align extra structures
395 * on longword boundaries.
398 offset
= (uintptr_t)*so
;
399 offset
+= sizeof (struct socket
);
401 offset
= ALIGN(offset
);
403 (*so
)->so_saved_pcb
= (caddr_t
)offset
;
404 offset
+= get_inpcb_str_size();
406 offset
= ALIGN(offset
);
408 ((struct inpcb
*)(void *)(*so
)->so_saved_pcb
)->inp_saved_ppcb
=
412 (*so
)->cached_in_sock_layer
= true;
416 cached_sock_free(struct socket
*so
)
419 lck_mtx_lock(so_cache_mtx
);
421 so_cache_time
= net_uptime();
422 if (++cached_sock_count
> max_cached_sock_count
) {
424 lck_mtx_unlock(so_cache_mtx
);
425 zfree(so_cache_zone
, so
);
427 if (so_cache_hw
< cached_sock_count
)
428 so_cache_hw
= cached_sock_count
;
430 STAILQ_INSERT_TAIL(&so_cache_head
, so
, so_cache_ent
);
432 so
->cache_timestamp
= so_cache_time
;
433 lck_mtx_unlock(so_cache_mtx
);
438 so_update_last_owner_locked(struct socket
*so
, proc_t self
)
440 if (so
->last_pid
!= 0) {
442 * last_pid and last_upid should remain zero for sockets
443 * created using sock_socket. The check above achieves that
445 if (self
== PROC_NULL
)
446 self
= current_proc();
448 if (so
->last_upid
!= proc_uniqueid(self
) ||
449 so
->last_pid
!= proc_pid(self
)) {
450 so
->last_upid
= proc_uniqueid(self
);
451 so
->last_pid
= proc_pid(self
);
452 proc_getexecutableuuid(self
, so
->last_uuid
,
453 sizeof (so
->last_uuid
));
455 proc_pidoriginatoruuid(so
->so_vuuid
, sizeof(so
->so_vuuid
));
460 so_update_policy(struct socket
*so
)
462 if (SOCK_DOM(so
) == PF_INET
|| SOCK_DOM(so
) == PF_INET6
)
463 (void) inp_update_policy(sotoinpcb(so
));
468 so_update_necp_policy(struct socket
*so
, struct sockaddr
*override_local_addr
, struct sockaddr
*override_remote_addr
)
470 if (SOCK_DOM(so
) == PF_INET
|| SOCK_DOM(so
) == PF_INET6
)
471 inp_update_necp_policy(sotoinpcb(so
), override_local_addr
, override_remote_addr
, 0);
480 boolean_t rc
= FALSE
;
482 lck_mtx_lock(so_cache_mtx
);
484 so_cache_time
= net_uptime();
486 while (!STAILQ_EMPTY(&so_cache_head
)) {
487 VERIFY(cached_sock_count
> 0);
488 p
= STAILQ_FIRST(&so_cache_head
);
489 if ((so_cache_time
- p
->cache_timestamp
) <
493 STAILQ_REMOVE_HEAD(&so_cache_head
, so_cache_ent
);
496 zfree(so_cache_zone
, p
);
498 if (++n_freed
>= SO_CACHE_MAX_FREE_BATCH
) {
499 so_cache_max_freed
++;
504 /* Schedule again if there is more to cleanup */
505 if (!STAILQ_EMPTY(&so_cache_head
))
508 lck_mtx_unlock(so_cache_mtx
);
513 * Get a socket structure from our zone, and initialize it.
514 * We don't implement `waitok' yet (see comments in uipc_domain.c).
515 * Note that it would probably be better to allocate socket
516 * and PCB at the same time, but I'm not convinced that all
517 * the protocols can be easily modified to do this.
520 soalloc(int waitok
, int dom
, int type
)
524 if ((dom
== PF_INET
) && (type
== SOCK_STREAM
)) {
525 cached_sock_alloc(&so
, waitok
);
527 MALLOC_ZONE(so
, struct socket
*, sizeof (*so
), socket_zone
,
530 bzero(so
, sizeof (*so
));
533 so
->so_gencnt
= OSIncrementAtomic64((SInt64
*)&so_gencnt
);
534 so
->so_zone
= socket_zone
;
535 #if CONFIG_MACF_SOCKET
536 /* Convert waitok to M_WAITOK/M_NOWAIT for MAC Framework. */
537 if (mac_socket_label_init(so
, !waitok
) != 0) {
541 #endif /* MAC_SOCKET */
548 socreate_internal(int dom
, struct socket
**aso
, int type
, int proto
,
549 struct proc
*p
, uint32_t flags
, struct proc
*ep
)
556 extern int tcpconsdebug
;
563 prp
= pffindproto(dom
, proto
, type
);
565 prp
= pffindtype(dom
, type
);
567 if (prp
== NULL
|| prp
->pr_usrreqs
->pru_attach
== NULL
) {
568 if (pffinddomain(dom
) == NULL
)
569 return (EAFNOSUPPORT
);
571 if (pffindprotonotype(dom
, proto
) != NULL
)
574 return (EPROTONOSUPPORT
);
576 if (prp
->pr_type
!= type
)
578 so
= soalloc(1, dom
, type
);
582 if (flags
& SOCF_ASYNC
)
583 so
->so_state
|= SS_NBIO
;
585 if (flags
& SOCF_MP_SUBFLOW
) {
587 * A multipath subflow socket is used internally in the kernel,
588 * therefore it does not have a file desciptor associated by
591 so
->so_state
|= SS_NOFDREF
;
592 so
->so_flags
|= SOF_MP_SUBFLOW
;
594 #endif /* MULTIPATH */
596 TAILQ_INIT(&so
->so_incomp
);
597 TAILQ_INIT(&so
->so_comp
);
599 so
->last_upid
= proc_uniqueid(p
);
600 so
->last_pid
= proc_pid(p
);
601 proc_getexecutableuuid(p
, so
->last_uuid
, sizeof (so
->last_uuid
));
602 proc_pidoriginatoruuid(so
->so_vuuid
, sizeof(so
->so_vuuid
));
604 if (ep
!= PROC_NULL
&& ep
!= p
) {
605 so
->e_upid
= proc_uniqueid(ep
);
606 so
->e_pid
= proc_pid(ep
);
607 proc_getexecutableuuid(ep
, so
->e_uuid
, sizeof (so
->e_uuid
));
608 so
->so_flags
|= SOF_DELEGATED
;
611 so
->so_cred
= kauth_cred_proc_ref(p
);
612 if (!suser(kauth_cred_get(), NULL
))
613 so
->so_state
|= SS_PRIV
;
616 so
->so_rcv
.sb_flags
|= SB_RECV
;
617 so
->so_rcv
.sb_so
= so
->so_snd
.sb_so
= so
;
618 so
->next_lock_lr
= 0;
619 so
->next_unlock_lr
= 0;
621 #if CONFIG_MACF_SOCKET
622 mac_socket_label_associate(kauth_cred_get(), so
);
623 #endif /* MAC_SOCKET */
626 * Attachment will create the per pcb lock if necessary and
627 * increase refcount for creation, make sure it's done before
628 * socket is inserted in lists.
632 error
= (*prp
->pr_usrreqs
->pru_attach
)(so
, proto
, p
);
636 * If so_pcb is not zero, the socket will be leaked,
637 * so protocol attachment handler must be coded carefuly
639 so
->so_state
|= SS_NOFDREF
;
641 sofreelastref(so
, 1); /* will deallocate the socket */
645 atomic_add_32(&prp
->pr_domain
->dom_refs
, 1);
646 TAILQ_INIT(&so
->so_evlist
);
648 /* Attach socket filters for this protocol */
651 if (tcpconsdebug
== 2)
652 so
->so_options
|= SO_DEBUG
;
654 so_set_default_traffic_class(so
);
657 * If this thread or task is marked to create backgrounded sockets,
658 * mark the socket as background.
660 if (proc_get_effective_thread_policy(current_thread(), TASK_POLICY_NEW_SOCKETS_BG
)) {
661 socket_set_traffic_mgt_flags(so
, TRAFFIC_MGT_SO_BACKGROUND
);
662 so
->so_background_thread
= current_thread();
667 * Don't mark Unix domain, system or multipath sockets as
668 * eligible for defunct by default.
673 so
->so_flags
|= SOF_NODEFUNCT
;
680 * Entitlements can't be checked at socket creation time except if the
681 * application requested a feature guarded by a privilege (c.f., socket
683 * The priv(9) and the Sandboxing APIs are designed with the idea that
684 * a privilege check should only be triggered by a userland request.
685 * A privilege check at socket creation time is time consuming and
686 * could trigger many authorisation error messages from the security
701 * <pru_attach>:ENOBUFS[AF_UNIX]
702 * <pru_attach>:ENOBUFS[TCP]
703 * <pru_attach>:ENOMEM[TCP]
704 * <pru_attach>:??? [other protocol families, IPSEC]
707 socreate(int dom
, struct socket
**aso
, int type
, int proto
)
709 return (socreate_internal(dom
, aso
, type
, proto
, current_proc(), 0,
714 socreate_delegate(int dom
, struct socket
**aso
, int type
, int proto
, pid_t epid
)
717 struct proc
*ep
= PROC_NULL
;
719 if ((proc_selfpid() != epid
) && ((ep
= proc_find(epid
)) == PROC_NULL
)) {
724 error
= socreate_internal(dom
, aso
, type
, proto
, current_proc(), 0, ep
);
727 * It might not be wise to hold the proc reference when calling
728 * socreate_internal since it calls soalloc with M_WAITOK
739 * <pru_bind>:EINVAL Invalid argument [COMMON_START]
740 * <pru_bind>:EAFNOSUPPORT Address family not supported
741 * <pru_bind>:EADDRNOTAVAIL Address not available.
742 * <pru_bind>:EINVAL Invalid argument
743 * <pru_bind>:EAFNOSUPPORT Address family not supported [notdef]
744 * <pru_bind>:EACCES Permission denied
745 * <pru_bind>:EADDRINUSE Address in use
746 * <pru_bind>:EAGAIN Resource unavailable, try again
747 * <pru_bind>:EPERM Operation not permitted
751 * Notes: It's not possible to fully enumerate the return codes above,
752 * since socket filter authors and protocol family authors may
753 * not choose to limit their error returns to those listed, even
754 * though this may result in some software operating incorrectly.
756 * The error codes which are enumerated above are those known to
757 * be returned by the tcp_usr_bind function supplied.
760 sobindlock(struct socket
*so
, struct sockaddr
*nam
, int dolock
)
762 struct proc
*p
= current_proc();
767 VERIFY(so
->so_usecount
> 1);
769 so_update_last_owner_locked(so
, p
);
770 so_update_policy(so
);
773 so_update_necp_policy(so
, nam
, NULL
);
777 * If this is a bind request on a socket that has been marked
778 * as inactive, reject it now before we go any further.
780 if (so
->so_flags
& SOF_DEFUNCT
) {
782 SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] (%d)\n",
783 __func__
, proc_pid(p
), (uint64_t)VM_KERNEL_ADDRPERM(so
),
784 SOCK_DOM(so
), SOCK_TYPE(so
), error
));
789 error
= sflt_bind(so
, nam
);
792 error
= (*so
->so_proto
->pr_usrreqs
->pru_bind
)(so
, nam
, p
);
795 socket_unlock(so
, 1);
797 if (error
== EJUSTRETURN
)
804 sodealloc(struct socket
*so
)
806 kauth_cred_unref(&so
->so_cred
);
808 /* Remove any filters */
812 cfil_sock_detach(so
);
813 #endif /* CONTENT_FILTER */
815 /* Delete the state allocated for msg queues on a socket */
816 if (so
->so_flags
& SOF_ENABLE_MSGS
) {
817 FREE(so
->so_msg_state
, M_TEMP
);
818 so
->so_msg_state
= NULL
;
820 VERIFY(so
->so_msg_state
== NULL
);
822 so
->so_gencnt
= OSIncrementAtomic64((SInt64
*)&so_gencnt
);
824 #if CONFIG_MACF_SOCKET
825 mac_socket_label_destroy(so
);
826 #endif /* MAC_SOCKET */
828 if (so
->cached_in_sock_layer
) {
829 cached_sock_free(so
);
831 FREE_ZONE(so
, sizeof (*so
), so
->so_zone
);
839 * <pru_listen>:EINVAL[AF_UNIX]
840 * <pru_listen>:EINVAL[TCP]
841 * <pru_listen>:EADDRNOTAVAIL[TCP] Address not available.
842 * <pru_listen>:EINVAL[TCP] Invalid argument
843 * <pru_listen>:EAFNOSUPPORT[TCP] Address family not supported [notdef]
844 * <pru_listen>:EACCES[TCP] Permission denied
845 * <pru_listen>:EADDRINUSE[TCP] Address in use
846 * <pru_listen>:EAGAIN[TCP] Resource unavailable, try again
847 * <pru_listen>:EPERM[TCP] Operation not permitted
850 * Notes: Other <pru_listen> returns depend on the protocol family; all
851 * <sf_listen> returns depend on what the filter author causes
852 * their filter to return.
855 solisten(struct socket
*so
, int backlog
)
857 struct proc
*p
= current_proc();
862 so_update_last_owner_locked(so
, p
);
863 so_update_policy(so
);
866 so_update_necp_policy(so
, NULL
, NULL
);
869 if (so
->so_proto
== NULL
) {
873 if ((so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) == 0) {
879 * If the listen request is made on a socket that is not fully
880 * disconnected, or on a socket that has been marked as inactive,
881 * reject the request now.
884 (SS_ISCONNECTED
|SS_ISCONNECTING
|SS_ISDISCONNECTING
)) ||
885 (so
->so_flags
& SOF_DEFUNCT
)) {
887 if (so
->so_flags
& SOF_DEFUNCT
) {
888 SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] "
889 "(%d)\n", __func__
, proc_pid(p
),
890 (uint64_t)VM_KERNEL_ADDRPERM(so
),
891 SOCK_DOM(so
), SOCK_TYPE(so
), error
));
896 if ((so
->so_restrictions
& SO_RESTRICT_DENY_IN
) != 0) {
901 error
= sflt_listen(so
);
903 error
= (*so
->so_proto
->pr_usrreqs
->pru_listen
)(so
, p
);
906 if (error
== EJUSTRETURN
)
911 if (TAILQ_EMPTY(&so
->so_comp
))
912 so
->so_options
|= SO_ACCEPTCONN
;
914 * POSIX: The implementation may have an upper limit on the length of
915 * the listen queue-either global or per accepting socket. If backlog
916 * exceeds this limit, the length of the listen queue is set to the
919 * If listen() is called with a backlog argument value that is less
920 * than 0, the function behaves as if it had been called with a backlog
921 * argument value of 0.
923 * A backlog argument of 0 may allow the socket to accept connections,
924 * in which case the length of the listen queue may be set to an
925 * implementation-defined minimum value.
927 if (backlog
<= 0 || backlog
> somaxconn
)
930 so
->so_qlimit
= backlog
;
932 socket_unlock(so
, 1);
937 sofreelastref(struct socket
*so
, int dealloc
)
939 struct socket
*head
= so
->so_head
;
941 /* Assume socket is locked */
943 if (!(so
->so_flags
& SOF_PCBCLEARING
) || !(so
->so_state
& SS_NOFDREF
)) {
944 selthreadclear(&so
->so_snd
.sb_sel
);
945 selthreadclear(&so
->so_rcv
.sb_sel
);
946 so
->so_rcv
.sb_flags
&= ~(SB_SEL
|SB_UPCALL
);
947 so
->so_snd
.sb_flags
&= ~(SB_SEL
|SB_UPCALL
);
948 so
->so_event
= sonullevent
;
952 socket_lock(head
, 1);
953 if (so
->so_state
& SS_INCOMP
) {
954 TAILQ_REMOVE(&head
->so_incomp
, so
, so_list
);
956 } else if (so
->so_state
& SS_COMP
) {
958 * We must not decommission a socket that's
959 * on the accept(2) queue. If we do, then
960 * accept(2) may hang after select(2) indicated
961 * that the listening socket was ready.
963 selthreadclear(&so
->so_snd
.sb_sel
);
964 selthreadclear(&so
->so_rcv
.sb_sel
);
965 so
->so_rcv
.sb_flags
&= ~(SB_SEL
|SB_UPCALL
);
966 so
->so_snd
.sb_flags
&= ~(SB_SEL
|SB_UPCALL
);
967 so
->so_event
= sonullevent
;
968 socket_unlock(head
, 1);
971 panic("sofree: not queued");
974 so
->so_state
&= ~SS_INCOMP
;
976 socket_unlock(head
, 1);
982 if (so
->so_flags
& SOF_FLOW_DIVERT
) {
983 flow_divert_detach(so
);
985 #endif /* FLOW_DIVERT */
987 /* 3932268: disable upcall */
988 so
->so_rcv
.sb_flags
&= ~SB_UPCALL
;
989 so
->so_snd
.sb_flags
&= ~SB_UPCALL
;
990 so
->so_event
= sonullevent
;
997 soclose_wait_locked(struct socket
*so
)
999 lck_mtx_t
*mutex_held
;
1001 if (so
->so_proto
->pr_getlock
!= NULL
)
1002 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
1004 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
1005 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
1008 * Double check here and return if there's no outstanding upcall;
1009 * otherwise proceed further only if SOF_UPCALLCLOSEWAIT is set.
1011 if (!so
->so_upcallusecount
|| !(so
->so_flags
& SOF_UPCALLCLOSEWAIT
))
1013 so
->so_rcv
.sb_flags
&= ~SB_UPCALL
;
1014 so
->so_snd
.sb_flags
&= ~SB_UPCALL
;
1015 so
->so_flags
|= SOF_CLOSEWAIT
;
1016 (void) msleep((caddr_t
)&so
->so_upcallusecount
, mutex_held
, (PZERO
- 1),
1017 "soclose_wait_locked", NULL
);
1018 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
1019 so
->so_flags
&= ~SOF_CLOSEWAIT
;
1023 * Close a socket on last file table reference removal.
1024 * Initiate disconnect if connected.
1025 * Free socket when disconnect complete.
1028 soclose_locked(struct socket
*so
)
1031 lck_mtx_t
*mutex_held
;
1034 if (so
->so_usecount
== 0) {
1035 panic("soclose: so=%p refcount=0\n", so
);
1039 sflt_notify(so
, sock_evt_closing
, NULL
);
1041 if (so
->so_upcallusecount
)
1042 soclose_wait_locked(so
);
1046 * We have to wait until the content filters are done
1048 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0) {
1049 cfil_sock_close_wait(so
);
1050 cfil_sock_is_closed(so
);
1051 cfil_sock_detach(so
);
1053 #endif /* CONTENT_FILTER */
1055 if ((so
->so_options
& SO_ACCEPTCONN
)) {
1056 struct socket
*sp
, *sonext
;
1060 * We do not want new connection to be added
1061 * to the connection queues
1063 so
->so_options
&= ~SO_ACCEPTCONN
;
1065 for (sp
= TAILQ_FIRST(&so
->so_incomp
);
1066 sp
!= NULL
; sp
= sonext
) {
1067 sonext
= TAILQ_NEXT(sp
, so_list
);
1071 * skip sockets thrown away by tcpdropdropblreq
1072 * they will get cleanup by the garbage collection.
1073 * otherwise, remove the incomp socket from the queue
1074 * and let soabort trigger the appropriate cleanup.
1076 if (sp
->so_flags
& SOF_OVERFLOW
)
1079 if (so
->so_proto
->pr_getlock
!= NULL
) {
1081 * Lock ordering for consistency with the
1082 * rest of the stack, we lock the socket
1083 * first and then grabb the head.
1085 socket_unlock(so
, 0);
1091 TAILQ_REMOVE(&so
->so_incomp
, sp
, so_list
);
1094 if (sp
->so_state
& SS_INCOMP
) {
1095 sp
->so_state
&= ~SS_INCOMP
;
1102 socket_unlock(sp
, 1);
1105 while ((sp
= TAILQ_FIRST(&so
->so_comp
)) != NULL
) {
1106 /* Dequeue from so_comp since sofree() won't do it */
1107 TAILQ_REMOVE(&so
->so_comp
, sp
, so_list
);
1110 if (so
->so_proto
->pr_getlock
!= NULL
) {
1111 socket_unlock(so
, 0);
1115 if (sp
->so_state
& SS_COMP
) {
1116 sp
->so_state
&= ~SS_COMP
;
1122 if (so
->so_proto
->pr_getlock
!= NULL
) {
1123 socket_unlock(sp
, 1);
1128 if (so
->so_pcb
== NULL
) {
1129 /* 3915887: mark the socket as ready for dealloc */
1130 so
->so_flags
|= SOF_PCBCLEARING
;
1133 if (so
->so_state
& SS_ISCONNECTED
) {
1134 if ((so
->so_state
& SS_ISDISCONNECTING
) == 0) {
1135 error
= sodisconnectlocked(so
);
1139 if (so
->so_options
& SO_LINGER
) {
1140 if ((so
->so_state
& SS_ISDISCONNECTING
) &&
1141 (so
->so_state
& SS_NBIO
))
1143 if (so
->so_proto
->pr_getlock
!= NULL
)
1144 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
1146 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
1147 while (so
->so_state
& SS_ISCONNECTED
) {
1148 ts
.tv_sec
= (so
->so_linger
/100);
1149 ts
.tv_nsec
= (so
->so_linger
% 100) *
1150 NSEC_PER_USEC
* 1000 * 10;
1151 error
= msleep((caddr_t
)&so
->so_timeo
,
1152 mutex_held
, PSOCK
| PCATCH
, "soclose", &ts
);
1155 * It's OK when the time fires,
1156 * don't report an error
1158 if (error
== EWOULDBLOCK
)
1166 if (so
->so_usecount
== 0) {
1167 panic("soclose: usecount is zero so=%p\n", so
);
1170 if (so
->so_pcb
!= NULL
&& !(so
->so_flags
& SOF_PCBCLEARING
)) {
1171 int error2
= (*so
->so_proto
->pr_usrreqs
->pru_detach
)(so
);
1175 if (so
->so_usecount
<= 0) {
1176 panic("soclose: usecount is zero so=%p\n", so
);
1180 if (so
->so_pcb
!= NULL
&& !(so
->so_flags
& SOF_MP_SUBFLOW
) &&
1181 (so
->so_state
& SS_NOFDREF
)) {
1182 panic("soclose: NOFDREF");
1185 so
->so_state
|= SS_NOFDREF
;
1187 if (so
->so_flags
& SOF_MP_SUBFLOW
)
1188 so
->so_flags
&= ~SOF_MP_SUBFLOW
;
1190 if ((so
->so_flags
& SOF_KNOTE
) != 0)
1191 KNOTE(&so
->so_klist
, SO_FILT_HINT_LOCKED
);
1193 atomic_add_32(&so
->so_proto
->pr_domain
->dom_refs
, -1);
1202 soclose(struct socket
*so
)
1207 if (so
->so_retaincnt
== 0) {
1208 error
= soclose_locked(so
);
1211 * if the FD is going away, but socket is
1212 * retained in kernel remove its reference
1215 if (so
->so_usecount
< 2)
1216 panic("soclose: retaincnt non null and so=%p "
1217 "usecount=%d\n", so
, so
->so_usecount
);
1219 socket_unlock(so
, 1);
1224 * Must be called at splnet...
1226 /* Should already be locked */
1228 soabort(struct socket
*so
)
1232 #ifdef MORE_LOCKING_DEBUG
1233 lck_mtx_t
*mutex_held
;
1235 if (so
->so_proto
->pr_getlock
!= NULL
)
1236 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
1238 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
1239 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
1242 if ((so
->so_flags
& SOF_ABORTED
) == 0) {
1243 so
->so_flags
|= SOF_ABORTED
;
1244 error
= (*so
->so_proto
->pr_usrreqs
->pru_abort
)(so
);
1254 soacceptlock(struct socket
*so
, struct sockaddr
**nam
, int dolock
)
1261 so_update_last_owner_locked(so
, PROC_NULL
);
1262 so_update_policy(so
);
1264 so_update_necp_policy(so
, NULL
, NULL
);
1267 if ((so
->so_state
& SS_NOFDREF
) == 0)
1268 panic("soaccept: !NOFDREF");
1269 so
->so_state
&= ~SS_NOFDREF
;
1270 error
= (*so
->so_proto
->pr_usrreqs
->pru_accept
)(so
, nam
);
1273 socket_unlock(so
, 1);
1278 soaccept(struct socket
*so
, struct sockaddr
**nam
)
1280 return (soacceptlock(so
, nam
, 1));
1284 soacceptfilter(struct socket
*so
)
1286 struct sockaddr
*local
= NULL
, *remote
= NULL
;
1288 struct socket
*head
= so
->so_head
;
1291 * Hold the lock even if this socket has not been made visible
1292 * to the filter(s). For sockets with global locks, this protects
1293 * against the head or peer going away
1296 if (sogetaddr_locked(so
, &remote
, 1) != 0 ||
1297 sogetaddr_locked(so
, &local
, 0) != 0) {
1298 so
->so_state
&= ~(SS_NOFDREF
| SS_COMP
);
1300 socket_unlock(so
, 1);
1302 /* Out of resources; try it again next time */
1303 error
= ECONNABORTED
;
1307 error
= sflt_accept(head
, so
, local
, remote
);
1310 * If we get EJUSTRETURN from one of the filters, mark this socket
1311 * as inactive and return it anyway. This newly accepted socket
1312 * will be disconnected later before we hand it off to the caller.
1314 if (error
== EJUSTRETURN
) {
1316 (void) sosetdefunct(current_proc(), so
,
1317 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL
, FALSE
);
1322 * This may seem like a duplication to the above error
1323 * handling part when we return ECONNABORTED, except
1324 * the following is done while holding the lock since
1325 * the socket has been exposed to the filter(s) earlier.
1327 so
->so_state
&= ~(SS_NOFDREF
| SS_COMP
);
1329 socket_unlock(so
, 1);
1331 /* Propagate socket filter's error code to the caller */
1333 socket_unlock(so
, 1);
1336 /* Callee checks for NULL pointer */
1337 sock_freeaddr(remote
);
1338 sock_freeaddr(local
);
1343 * Returns: 0 Success
1344 * EOPNOTSUPP Operation not supported on socket
1345 * EISCONN Socket is connected
1346 * <pru_connect>:EADDRNOTAVAIL Address not available.
1347 * <pru_connect>:EINVAL Invalid argument
1348 * <pru_connect>:EAFNOSUPPORT Address family not supported [notdef]
1349 * <pru_connect>:EACCES Permission denied
1350 * <pru_connect>:EADDRINUSE Address in use
1351 * <pru_connect>:EAGAIN Resource unavailable, try again
1352 * <pru_connect>:EPERM Operation not permitted
1353 * <sf_connect_out>:??? [anything a filter writer might set]
1356 soconnectlock(struct socket
*so
, struct sockaddr
*nam
, int dolock
)
1359 struct proc
*p
= current_proc();
1364 so_update_last_owner_locked(so
, p
);
1365 so_update_policy(so
);
1368 so_update_necp_policy(so
, NULL
, nam
);
1372 * If this is a listening socket or if this is a previously-accepted
1373 * socket that has been marked as inactive, reject the connect request.
1375 if ((so
->so_options
& SO_ACCEPTCONN
) || (so
->so_flags
& SOF_DEFUNCT
)) {
1377 if (so
->so_flags
& SOF_DEFUNCT
) {
1378 SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] "
1379 "(%d)\n", __func__
, proc_pid(p
),
1380 (uint64_t)VM_KERNEL_ADDRPERM(so
),
1381 SOCK_DOM(so
), SOCK_TYPE(so
), error
));
1384 socket_unlock(so
, 1);
1388 if ((so
->so_restrictions
& SO_RESTRICT_DENY_OUT
) != 0) {
1390 socket_unlock(so
, 1);
1395 * If protocol is connection-based, can only connect once.
1396 * Otherwise, if connected, try to disconnect first.
1397 * This allows user to disconnect by connecting to, e.g.,
1400 if (so
->so_state
& (SS_ISCONNECTED
|SS_ISCONNECTING
) &&
1401 ((so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) ||
1402 (error
= sodisconnectlocked(so
)))) {
1406 * Run connect filter before calling protocol:
1407 * - non-blocking connect returns before completion;
1409 error
= sflt_connectout(so
, nam
);
1411 if (error
== EJUSTRETURN
)
1414 error
= (*so
->so_proto
->pr_usrreqs
->pru_connect
)
1419 socket_unlock(so
, 1);
1424 soconnect(struct socket
*so
, struct sockaddr
*nam
)
1426 return (soconnectlock(so
, nam
, 1));
1430 * Returns: 0 Success
1431 * <pru_connect2>:EINVAL[AF_UNIX]
1432 * <pru_connect2>:EPROTOTYPE[AF_UNIX]
1433 * <pru_connect2>:??? [other protocol families]
1435 * Notes: <pru_connect2> is not supported by [TCP].
1438 soconnect2(struct socket
*so1
, struct socket
*so2
)
1442 socket_lock(so1
, 1);
1443 if (so2
->so_proto
->pr_lock
)
1444 socket_lock(so2
, 1);
1446 error
= (*so1
->so_proto
->pr_usrreqs
->pru_connect2
)(so1
, so2
);
1448 socket_unlock(so1
, 1);
1449 if (so2
->so_proto
->pr_lock
)
1450 socket_unlock(so2
, 1);
1455 soconnectxlocked(struct socket
*so
, struct sockaddr_list
**src_sl
,
1456 struct sockaddr_list
**dst_sl
, struct proc
*p
, uint32_t ifscope
,
1457 associd_t aid
, connid_t
*pcid
, uint32_t flags
, void *arg
,
1462 so_update_last_owner_locked(so
, p
);
1463 so_update_policy(so
);
1466 * If this is a listening socket or if this is a previously-accepted
1467 * socket that has been marked as inactive, reject the connect request.
1469 if ((so
->so_options
& SO_ACCEPTCONN
) || (so
->so_flags
& SOF_DEFUNCT
)) {
1471 if (so
->so_flags
& SOF_DEFUNCT
) {
1472 SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] "
1473 "(%d)\n", __func__
, proc_pid(p
),
1474 (uint64_t)VM_KERNEL_ADDRPERM(so
),
1475 SOCK_DOM(so
), SOCK_TYPE(so
), error
));
1480 if ((so
->so_restrictions
& SO_RESTRICT_DENY_OUT
) != 0)
1484 * If protocol is connection-based, can only connect once
1485 * unless PR_MULTICONN is set. Otherwise, if connected,
1486 * try to disconnect first. This allows user to disconnect
1487 * by connecting to, e.g., a null address.
1489 if ((so
->so_state
& (SS_ISCONNECTED
|SS_ISCONNECTING
)) &&
1490 !(so
->so_proto
->pr_flags
& PR_MULTICONN
) &&
1491 ((so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) ||
1492 (error
= sodisconnectlocked(so
)) != 0)) {
1496 * Run connect filter before calling protocol:
1497 * - non-blocking connect returns before completion;
1499 error
= sflt_connectxout(so
, dst_sl
);
1501 if (error
== EJUSTRETURN
)
1504 error
= (*so
->so_proto
->pr_usrreqs
->pru_connectx
)
1505 (so
, src_sl
, dst_sl
, p
, ifscope
, aid
, pcid
,
1506 flags
, arg
, arglen
);
1514 sodisconnectlocked(struct socket
*so
)
1518 if ((so
->so_state
& SS_ISCONNECTED
) == 0) {
1522 if (so
->so_state
& SS_ISDISCONNECTING
) {
1527 error
= (*so
->so_proto
->pr_usrreqs
->pru_disconnect
)(so
);
1529 sflt_notify(so
, sock_evt_disconnected
, NULL
);
1535 /* Locking version */
1537 sodisconnect(struct socket
*so
)
1542 error
= sodisconnectlocked(so
);
1543 socket_unlock(so
, 1);
1548 sodisconnectxlocked(struct socket
*so
, associd_t aid
, connid_t cid
)
1553 * Call the protocol disconnectx handler; let it handle all
1554 * matters related to the connection state of this session.
1556 error
= (*so
->so_proto
->pr_usrreqs
->pru_disconnectx
)(so
, aid
, cid
);
1559 * The event applies only for the session, not for
1560 * the disconnection of individual subflows.
1562 if (so
->so_state
& (SS_ISDISCONNECTING
|SS_ISDISCONNECTED
))
1563 sflt_notify(so
, sock_evt_disconnected
, NULL
);
1569 sodisconnectx(struct socket
*so
, associd_t aid
, connid_t cid
)
1574 error
= sodisconnectxlocked(so
, aid
, cid
);
1575 socket_unlock(so
, 1);
1580 sopeelofflocked(struct socket
*so
, associd_t aid
, struct socket
**psop
)
1582 return ((*so
->so_proto
->pr_usrreqs
->pru_peeloff
)(so
, aid
, psop
));
1585 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT)
1588 * sosendcheck will lock the socket buffer if it isn't locked and
1589 * verify that there is space for the data being inserted.
1591 * Returns: 0 Success
1593 * sblock:EWOULDBLOCK
1600 sosendcheck(struct socket
*so
, struct sockaddr
*addr
, user_ssize_t resid
,
1601 int32_t clen
, int32_t atomic
, int flags
, int *sblocked
,
1602 struct mbuf
*control
)
1609 if (*sblocked
== 0) {
1610 if ((so
->so_snd
.sb_flags
& SB_LOCK
) != 0 &&
1611 so
->so_send_filt_thread
!= 0 &&
1612 so
->so_send_filt_thread
== current_thread()) {
1614 * We're being called recursively from a filter,
1615 * allow this to continue. Radar 4150520.
1616 * Don't set sblocked because we don't want
1617 * to perform an unlock later.
1621 error
= sblock(&so
->so_snd
, SBLOCKWAIT(flags
));
1623 if (so
->so_flags
& SOF_DEFUNCT
)
1632 * If a send attempt is made on a socket that has been marked
1633 * as inactive (disconnected), reject the request.
1635 if (so
->so_flags
& SOF_DEFUNCT
) {
1638 SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] (%d)\n",
1639 __func__
, proc_selfpid(), (uint64_t)VM_KERNEL_ADDRPERM(so
),
1640 SOCK_DOM(so
), SOCK_TYPE(so
), error
));
1644 if (so
->so_state
& SS_CANTSENDMORE
) {
1647 * Can re-inject data of half closed connections
1649 if ((so
->so_state
& SS_ISDISCONNECTED
) == 0 &&
1650 so
->so_snd
.sb_cfil_thread
== current_thread() &&
1651 cfil_sock_data_pending(&so
->so_snd
) != 0)
1653 "so %llx ignore SS_CANTSENDMORE",
1654 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1656 #endif /* CONTENT_FILTER */
1660 error
= so
->so_error
;
1665 if ((so
->so_state
& SS_ISCONNECTED
) == 0) {
1666 if ((so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) != 0) {
1667 if (((so
->so_state
& SS_ISCONFIRMING
) == 0) &&
1668 (resid
!= 0 || clen
== 0)) {
1671 * MPTCP Fast Join sends data before the
1672 * socket is truly connected.
1674 if ((so
->so_flags
& (SOF_MP_SUBFLOW
|
1675 SOF_MPTCP_FASTJOIN
)) !=
1676 (SOF_MP_SUBFLOW
| SOF_MPTCP_FASTJOIN
))
1680 } else if (addr
== 0 && !(flags
&MSG_HOLD
)) {
1681 return ((so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) ?
1682 ENOTCONN
: EDESTADDRREQ
);
1685 if (so
->so_flags
& SOF_ENABLE_MSGS
)
1686 space
= msgq_sbspace(so
, control
);
1688 space
= sbspace(&so
->so_snd
);
1690 if (flags
& MSG_OOB
)
1692 if ((atomic
&& resid
> so
->so_snd
.sb_hiwat
) ||
1693 clen
> so
->so_snd
.sb_hiwat
)
1696 if ((space
< resid
+ clen
&&
1697 (atomic
|| space
< (int32_t)so
->so_snd
.sb_lowat
|| space
< clen
)) ||
1698 (so
->so_type
== SOCK_STREAM
&& so_wait_for_if_feedback(so
))) {
1699 if ((so
->so_state
& SS_NBIO
) || (flags
& MSG_NBIO
) ||
1701 return (EWOULDBLOCK
);
1703 sbunlock(&so
->so_snd
, TRUE
); /* keep socket locked */
1705 error
= sbwait(&so
->so_snd
);
1707 if (so
->so_flags
& SOF_DEFUNCT
)
1718 * If send must go all at once and message is larger than
1719 * send buffering, then hard error.
1720 * Lock against other senders.
1721 * If must go all at once and not enough room now, then
1722 * inform user that this would block and do nothing.
1723 * Otherwise, if nonblocking, send as much as possible.
1724 * The data to be sent is described by "uio" if nonzero,
1725 * otherwise by the mbuf chain "top" (which must be null
1726 * if uio is not). Data provided in mbuf chain must be small
1727 * enough to send all at once.
1729 * Returns nonzero on error, timeout or signal; callers
1730 * must check for short counts if EINTR/ERESTART are returned.
1731 * Data and control buffers are freed on return.
1733 * MSG_HOLD: go thru most of sosend(), but just enqueue the mbuf
1734 * MSG_SEND: go thru as for MSG_HOLD on current fragment, then
1735 * point at the mbuf chain being constructed and go from there.
1737 * Returns: 0 Success
1743 * sosendcheck:EWOULDBLOCK
1747 * sosendcheck:??? [value from so_error]
1748 * <pru_send>:ECONNRESET[TCP]
1749 * <pru_send>:EINVAL[TCP]
1750 * <pru_send>:ENOBUFS[TCP]
1751 * <pru_send>:EADDRINUSE[TCP]
1752 * <pru_send>:EADDRNOTAVAIL[TCP]
1753 * <pru_send>:EAFNOSUPPORT[TCP]
1754 * <pru_send>:EACCES[TCP]
1755 * <pru_send>:EAGAIN[TCP]
1756 * <pru_send>:EPERM[TCP]
1757 * <pru_send>:EMSGSIZE[TCP]
1758 * <pru_send>:EHOSTUNREACH[TCP]
1759 * <pru_send>:ENETUNREACH[TCP]
1760 * <pru_send>:ENETDOWN[TCP]
1761 * <pru_send>:ENOMEM[TCP]
1762 * <pru_send>:ENOBUFS[TCP]
1763 * <pru_send>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1764 * <pru_send>:EINVAL[AF_UNIX]
1765 * <pru_send>:EOPNOTSUPP[AF_UNIX]
1766 * <pru_send>:EPIPE[AF_UNIX]
1767 * <pru_send>:ENOTCONN[AF_UNIX]
1768 * <pru_send>:EISCONN[AF_UNIX]
1769 * <pru_send>:???[AF_UNIX] [whatever a filter author chooses]
1770 * <sf_data_out>:??? [whatever a filter author chooses]
1772 * Notes: Other <pru_send> returns depend on the protocol family; all
1773 * <sf_data_out> returns depend on what the filter author causes
1774 * their filter to return.
1777 sosend(struct socket
*so
, struct sockaddr
*addr
, struct uio
*uio
,
1778 struct mbuf
*top
, struct mbuf
*control
, int flags
)
1781 struct mbuf
*m
, *freelist
= NULL
;
1782 user_ssize_t space
, len
, resid
;
1783 int clen
= 0, error
, dontroute
, mlen
, sendflags
;
1784 int atomic
= sosendallatonce(so
) || top
;
1786 struct proc
*p
= current_proc();
1787 struct mbuf
*control_copy
= NULL
;
1790 resid
= uio_resid(uio
);
1792 resid
= top
->m_pkthdr
.len
;
1794 KERNEL_DEBUG((DBG_FNC_SOSEND
| DBG_FUNC_START
), so
, resid
,
1795 so
->so_snd
.sb_cc
, so
->so_snd
.sb_lowat
, so
->so_snd
.sb_hiwat
);
1800 * Re-injection should not affect process accounting
1802 if ((flags
& MSG_SKIPCFIL
) == 0) {
1803 so_update_last_owner_locked(so
, p
);
1804 so_update_policy(so
);
1807 so_update_necp_policy(so
, NULL
, addr
);
1811 if (so
->so_type
!= SOCK_STREAM
&& (flags
& MSG_OOB
) != 0) {
1813 socket_unlock(so
, 1);
1818 * In theory resid should be unsigned.
1819 * However, space must be signed, as it might be less than 0
1820 * if we over-committed, and we must use a signed comparison
1821 * of space and resid. On the other hand, a negative resid
1822 * causes us to loop sending 0-length segments to the protocol.
1824 * Usually, MSG_EOR isn't used on SOCK_STREAM type sockets.
1825 * But it will be used by sockets doing message delivery.
1827 * Note: We limit resid to be a positive int value as we use
1828 * imin() to set bytes_to_copy -- radr://14558484
1830 if (resid
< 0 || resid
> INT_MAX
|| (so
->so_type
== SOCK_STREAM
&&
1831 !(so
->so_flags
& SOF_ENABLE_MSGS
) && (flags
& MSG_EOR
))) {
1833 socket_unlock(so
, 1);
1837 dontroute
= (flags
& MSG_DONTROUTE
) &&
1838 (so
->so_options
& SO_DONTROUTE
) == 0 &&
1839 (so
->so_proto
->pr_flags
& PR_ATOMIC
);
1840 OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgsnd
);
1842 if (control
!= NULL
)
1843 clen
= control
->m_len
;
1846 error
= sosendcheck(so
, addr
, resid
, clen
, atomic
, flags
,
1847 &sblocked
, control
);
1852 if (so
->so_flags
& SOF_ENABLE_MSGS
)
1853 space
= msgq_sbspace(so
, control
);
1855 space
= sbspace(&so
->so_snd
) - clen
;
1856 space
+= ((flags
& MSG_OOB
) ? 1024 : 0);
1861 * Data is prepackaged in "top".
1864 if (flags
& MSG_EOR
)
1865 top
->m_flags
|= M_EOR
;
1872 bytes_to_copy
= imin(resid
, space
);
1874 if (sosendminchain
> 0)
1877 chainlength
= sosendmaxchain
;
1880 * Use big 4 KB cluster only when outgoing
1881 * interface does not want 2 LB clusters
1884 !(so
->so_flags1
& SOF1_IF_2KCL
) ||
1885 sosendbigcl_ignore_capab
;
1888 * Attempt to use larger than system page-size
1889 * clusters for large writes only if there is
1890 * a jumbo cluster pool and if the socket is
1891 * marked accordingly.
1893 jumbocl
= sosendjcl
&& njcl
> 0 &&
1894 ((so
->so_flags
& SOF_MULTIPAGES
) ||
1895 sosendjcl_ignore_capab
) &&
1898 socket_unlock(so
, 0);
1902 int hdrs_needed
= (top
== NULL
) ? 1 : 0;
1905 * try to maintain a local cache of mbuf
1906 * clusters needed to complete this
1907 * write the list is further limited to
1908 * the number that are currently needed
1909 * to fill the socket this mechanism
1910 * allows a large number of mbufs/
1911 * clusters to be grabbed under a single
1912 * mbuf lock... if we can't get any
1913 * clusters, than fall back to trying
1914 * for mbufs if we fail early (or
1915 * miscalcluate the number needed) make
1916 * sure to release any clusters we
1917 * haven't yet consumed.
1919 if (freelist
== NULL
&&
1920 bytes_to_copy
> MBIGCLBYTES
&&
1923 bytes_to_copy
/ M16KCLBYTES
;
1925 if ((bytes_to_copy
-
1926 (num_needed
* M16KCLBYTES
))
1931 m_getpackets_internal(
1932 (unsigned int *)&num_needed
,
1933 hdrs_needed
, M_WAIT
, 0,
1936 * Fall back to 4K cluster size
1937 * if allocation failed
1941 if (freelist
== NULL
&&
1942 bytes_to_copy
> MCLBYTES
&&
1945 bytes_to_copy
/ MBIGCLBYTES
;
1947 if ((bytes_to_copy
-
1948 (num_needed
* MBIGCLBYTES
)) >=
1953 m_getpackets_internal(
1954 (unsigned int *)&num_needed
,
1955 hdrs_needed
, M_WAIT
, 0,
1958 * Fall back to cluster size
1959 * if allocation failed
1963 if (freelist
== NULL
&&
1964 bytes_to_copy
> MINCLSIZE
) {
1966 bytes_to_copy
/ MCLBYTES
;
1968 if ((bytes_to_copy
-
1969 (num_needed
* MCLBYTES
)) >=
1974 m_getpackets_internal(
1975 (unsigned int *)&num_needed
,
1976 hdrs_needed
, M_WAIT
, 0,
1979 * Fall back to a single mbuf
1980 * if allocation failed
1984 if (freelist
== NULL
) {
1992 if (freelist
== NULL
) {
1998 * For datagram protocols,
1999 * leave room for protocol
2000 * headers in first mbuf.
2002 if (atomic
&& top
== NULL
&&
2003 bytes_to_copy
< MHLEN
) {
2009 freelist
= m
->m_next
;
2012 if ((m
->m_flags
& M_EXT
))
2013 mlen
= m
->m_ext
.ext_size
;
2014 else if ((m
->m_flags
& M_PKTHDR
))
2016 MHLEN
- m_leadingspace(m
);
2019 len
= imin(mlen
, bytes_to_copy
);
2025 error
= uiomove(mtod(m
, caddr_t
),
2028 resid
= uio_resid(uio
);
2032 top
->m_pkthdr
.len
+= len
;
2037 if (flags
& MSG_EOR
)
2038 top
->m_flags
|= M_EOR
;
2041 bytes_to_copy
= min(resid
, space
);
2043 } while (space
> 0 &&
2044 (chainlength
< sosendmaxchain
|| atomic
||
2045 resid
< MINCLSIZE
));
2053 if (flags
& (MSG_HOLD
|MSG_SEND
)) {
2054 /* Enqueue for later, go away if HOLD */
2056 if (so
->so_temp
&& (flags
& MSG_FLUSH
)) {
2057 m_freem(so
->so_temp
);
2061 so
->so_tail
->m_next
= top
;
2068 if (flags
& MSG_HOLD
) {
2075 so
->so_options
|= SO_DONTROUTE
;
2077 /* Compute flags here, for pru_send and NKEs */
2078 sendflags
= (flags
& MSG_OOB
) ? PRUS_OOB
:
2080 * If the user set MSG_EOF, the protocol
2081 * understands this flag and nothing left to
2082 * send then use PRU_SEND_EOF instead of PRU_SEND.
2084 ((flags
& MSG_EOF
) &&
2085 (so
->so_proto
->pr_flags
& PR_IMPLOPCL
) &&
2086 (resid
<= 0)) ? PRUS_EOF
:
2087 /* If there is more to send set PRUS_MORETOCOME */
2088 (resid
> 0 && space
> 0) ? PRUS_MORETOCOME
: 0;
2090 if ((flags
& MSG_SKIPCFIL
) == 0) {
2092 * Socket filter processing
2094 error
= sflt_data_out(so
, addr
, &top
,
2095 &control
, (sendflags
& MSG_OOB
) ?
2096 sock_data_filt_flag_oob
: 0);
2098 if (error
== EJUSTRETURN
) {
2108 * Content filter processing
2110 error
= cfil_sock_data_out(so
, addr
, top
,
2111 control
, (sendflags
& MSG_OOB
) ?
2112 sock_data_filt_flag_oob
: 0);
2114 if (error
== EJUSTRETURN
) {
2122 #endif /* CONTENT_FILTER */
2124 if (so
->so_flags
& SOF_ENABLE_MSGS
) {
2126 * Make a copy of control mbuf,
2127 * so that msg priority can be
2128 * passed to subsequent mbufs.
2130 control_copy
= m_dup(control
, M_NOWAIT
);
2132 error
= (*so
->so_proto
->pr_usrreqs
->pru_send
)
2133 (so
, sendflags
, top
, addr
, control
, p
);
2135 if (flags
& MSG_SEND
)
2139 so
->so_options
&= ~SO_DONTROUTE
;
2142 control
= control_copy
;
2143 control_copy
= NULL
;
2148 } while (resid
&& space
> 0);
2153 sbunlock(&so
->so_snd
, FALSE
); /* will unlock socket */
2155 socket_unlock(so
, 1);
2159 if (control
!= NULL
)
2161 if (freelist
!= NULL
)
2162 m_freem_list(freelist
);
2163 if (control_copy
!= NULL
)
2164 m_freem(control_copy
);
2166 KERNEL_DEBUG(DBG_FNC_SOSEND
| DBG_FUNC_END
, so
, resid
, so
->so_snd
.sb_cc
,
2173 sosend_list(struct socket
*so
, struct sockaddr
*addr
, struct uio
**uioarray
,
2174 u_int uiocnt
, struct mbuf
*top
, struct mbuf
*control
, int flags
)
2176 struct mbuf
*m
, *freelist
= NULL
;
2177 user_ssize_t len
, resid
;
2178 int clen
= 0, error
, dontroute
, mlen
;
2179 int atomic
= sosendallatonce(so
) || top
;
2181 struct proc
*p
= current_proc();
2185 KERNEL_DEBUG((DBG_FNC_SOSEND_LIST
| DBG_FUNC_START
), so
, uiocnt
,
2186 so
->so_snd
.sb_cc
, so
->so_snd
.sb_lowat
, so
->so_snd
.sb_hiwat
);
2188 if (so
->so_type
!= SOCK_DGRAM
) {
2196 if (so
->so_proto
->pr_usrreqs
->pru_send_list
== NULL
) {
2197 error
= EPROTONOSUPPORT
;
2200 if (flags
& ~(MSG_DONTWAIT
| MSG_NBIO
)) {
2204 if (uioarray
!= NULL
)
2205 resid
= uio_array_resid(uioarray
, uiocnt
);
2207 resid
= mbuf_pkt_list_len(top
);
2210 * In theory resid should be unsigned.
2211 * However, space must be signed, as it might be less than 0
2212 * if we over-committed, and we must use a signed comparison
2213 * of space and resid. On the other hand, a negative resid
2214 * causes us to loop sending 0-length segments to the protocol.
2216 * Note: We limit resid to be a positive int value as we use
2217 * imin() to set bytes_to_copy -- radr://14558484
2219 if (resid
< 0 || resid
> INT_MAX
) {
2224 * Disallow functionality not currently supported
2225 * Note: Will need to treat arrays of addresses and controls
2228 printf("%s addr not supported\n", __func__
);
2232 if (control
!= NULL
) {
2233 printf("%s control not supported\n", __func__
);
2239 so_update_last_owner_locked(so
, p
);
2240 so_update_policy(so
);
2243 so_update_necp_policy(so
, NULL
, addr
);
2246 dontroute
= (flags
& MSG_DONTROUTE
) &&
2247 (so
->so_options
& SO_DONTROUTE
) == 0 &&
2248 (so
->so_proto
->pr_flags
& PR_ATOMIC
);
2249 OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgsnd
);
2251 if (control
!= NULL
)
2252 clen
= control
->m_len
;
2254 error
= sosendcheck(so
, addr
, resid
, clen
, atomic
, flags
,
2255 &sblocked
, control
);
2262 if (uioarray
== NULL
) {
2264 * Data is prepackaged in "top".
2270 size_t maxpktlen
= 0;
2272 if (sosendminchain
> 0)
2275 chainlength
= sosendmaxchain
;
2277 socket_unlock(so
, 0);
2280 * Find a set of uio that fit in a reasonable number
2283 for (i
= uiofirst
; i
< uiocnt
; i
++) {
2284 struct uio
*auio
= uioarray
[i
];
2286 len
= uio_resid(auio
);
2288 /* Do nothing for empty messages */
2295 if (len
> maxpktlen
)
2299 if (chainlength
> sosendmaxchain
)
2303 * Nothing left to send
2305 if (num_needed
== 0) {
2310 * Allocate the mbuf packets at once
2312 freelist
= m_allocpacket_internal(
2313 (unsigned int *)&num_needed
,
2314 maxpktlen
, NULL
, M_WAIT
, 1, 0);
2316 if (freelist
== NULL
) {
2322 * Copy each uio of the set into its own mbuf packet
2324 for (i
= uiofirst
, m
= freelist
;
2325 i
< uiolast
&& m
!= NULL
;
2329 struct uio
*auio
= uioarray
[i
];
2331 bytes_to_copy
= uio_resid(auio
);
2333 /* Do nothing for empty messages */
2334 if (bytes_to_copy
== 0)
2337 for (n
= m
; n
!= NULL
; n
= n
->m_next
) {
2338 mlen
= mbuf_maxlen(n
);
2340 len
= imin(mlen
, bytes_to_copy
);
2343 * Note: uiomove() decrements the iovec
2346 error
= uiomove(mtod(n
, caddr_t
),
2351 m
->m_pkthdr
.len
+= len
;
2353 VERIFY(m
->m_pkthdr
.len
<= maxpktlen
);
2355 bytes_to_copy
-= len
;
2358 if (m
->m_pkthdr
.len
== 0) {
2359 printf("%s so %llx pkt %llx len null\n",
2361 (uint64_t)VM_KERNEL_ADDRPERM(so
),
2362 (uint64_t)VM_KERNEL_ADDRPERM(m
));
2378 so
->so_options
|= SO_DONTROUTE
;
2380 if ((flags
& MSG_SKIPCFIL
) == 0) {
2381 struct mbuf
**prevnextp
= NULL
;
2383 for (i
= uiofirst
, m
= top
;
2384 i
< uiolast
&& m
!= NULL
;
2386 struct mbuf
*nextpkt
= m
->m_nextpkt
;
2389 * Socket filter processing
2391 error
= sflt_data_out(so
, addr
, &m
,
2393 if (error
!= 0 && error
!= EJUSTRETURN
)
2399 * Content filter processing
2401 error
= cfil_sock_data_out(so
, addr
, m
,
2403 if (error
!= 0 && error
!= EJUSTRETURN
)
2406 #endif /* CONTENT_FILTER */
2408 * Remove packet from the list when
2409 * swallowed by a filter
2411 if (error
== EJUSTRETURN
) {
2413 if (prevnextp
!= NULL
)
2414 *prevnextp
= nextpkt
;
2421 prevnextp
= &m
->m_nextpkt
;
2425 error
= (*so
->so_proto
->pr_usrreqs
->pru_send_list
)
2426 (so
, 0, top
, addr
, control
, p
);
2429 so
->so_options
&= ~SO_DONTROUTE
;
2434 } while (resid
> 0 && error
== 0);
2437 sbunlock(&so
->so_snd
, FALSE
); /* will unlock socket */
2439 socket_unlock(so
, 1);
2443 if (control
!= NULL
)
2445 if (freelist
!= NULL
)
2446 m_freem_list(freelist
);
2448 KERNEL_DEBUG(DBG_FNC_SOSEND_LIST
| DBG_FUNC_END
, so
, resid
,
2449 so
->so_snd
.sb_cc
, 0, error
);
2455 * Implement receive operations on a socket.
2456 * We depend on the way that records are added to the sockbuf
2457 * by sbappend*. In particular, each record (mbufs linked through m_next)
2458 * must begin with an address if the protocol so specifies,
2459 * followed by an optional mbuf or mbufs containing ancillary data,
2460 * and then zero or more mbufs of data.
2461 * In order to avoid blocking network interrupts for the entire time here,
2462 * we splx() while doing the actual copy to user space.
2463 * Although the sockbuf is locked, new data may still be appended,
2464 * and thus we must maintain consistency of the sockbuf during that time.
2466 * The caller may receive the data as a single mbuf chain by supplying
2467 * an mbuf **mp0 for use in returning the chain. The uio is then used
2468 * only for the count in uio_resid.
2470 * Returns: 0 Success
2475 * sblock:EWOULDBLOCK
2479 * sodelayed_copy:EFAULT
2480 * <pru_rcvoob>:EINVAL[TCP]
2481 * <pru_rcvoob>:EWOULDBLOCK[TCP]
2483 * <pr_domain->dom_externalize>:EMSGSIZE[AF_UNIX]
2484 * <pr_domain->dom_externalize>:ENOBUFS[AF_UNIX]
2485 * <pr_domain->dom_externalize>:???
2487 * Notes: Additional return values from calls through <pru_rcvoob> and
2488 * <pr_domain->dom_externalize> depend on protocols other than
2489 * TCP or AF_UNIX, which are documented above.
2492 soreceive(struct socket
*so
, struct sockaddr
**psa
, struct uio
*uio
,
2493 struct mbuf
**mp0
, struct mbuf
**controlp
, int *flagsp
)
2495 struct mbuf
*m
, **mp
, *ml
= NULL
;
2496 struct mbuf
*nextrecord
, *free_list
;
2497 int flags
, error
, offset
;
2499 struct protosw
*pr
= so
->so_proto
;
2501 user_ssize_t orig_resid
= uio_resid(uio
);
2502 user_ssize_t delayed_copy_len
;
2505 struct proc
*p
= current_proc();
2507 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_START
, so
, uio_resid(uio
),
2508 so
->so_rcv
.sb_cc
, so
->so_rcv
.sb_lowat
, so
->so_rcv
.sb_hiwat
);
2511 * Sanity check on the length passed by caller as we are making 'int'
2514 if (orig_resid
< 0 || orig_resid
> INT_MAX
)
2518 so_update_last_owner_locked(so
, p
);
2519 so_update_policy(so
);
2521 #ifdef MORE_LOCKING_DEBUG
2522 if (so
->so_usecount
== 1) {
2523 panic("%s: so=%x no other reference on socket\n", __func__
, so
);
2530 if (controlp
!= NULL
)
2533 flags
= *flagsp
&~ MSG_EOR
;
2538 * If a recv attempt is made on a previously-accepted socket
2539 * that has been marked as inactive (disconnected), reject
2542 if (so
->so_flags
& SOF_DEFUNCT
) {
2543 struct sockbuf
*sb
= &so
->so_rcv
;
2546 SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] (%d)\n",
2547 __func__
, proc_pid(p
), (uint64_t)VM_KERNEL_ADDRPERM(so
),
2548 SOCK_DOM(so
), SOCK_TYPE(so
), error
));
2550 * This socket should have been disconnected and flushed
2551 * prior to being returned from sodefunct(); there should
2552 * be no data on its receive list, so panic otherwise.
2554 if (so
->so_state
& SS_DEFUNCT
)
2555 sb_empty_assert(sb
, __func__
);
2556 socket_unlock(so
, 1);
2561 * When SO_WANTOOBFLAG is set we try to get out-of-band data
2562 * regardless of the flags argument. Here is the case were
2563 * out-of-band data is not inline.
2565 if ((flags
& MSG_OOB
) ||
2566 ((so
->so_options
& SO_WANTOOBFLAG
) != 0 &&
2567 (so
->so_options
& SO_OOBINLINE
) == 0 &&
2568 (so
->so_oobmark
|| (so
->so_state
& SS_RCVATMARK
)))) {
2569 m
= m_get(M_WAIT
, MT_DATA
);
2571 socket_unlock(so
, 1);
2572 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_END
,
2573 ENOBUFS
, 0, 0, 0, 0);
2576 error
= (*pr
->pr_usrreqs
->pru_rcvoob
)(so
, m
, flags
& MSG_PEEK
);
2579 socket_unlock(so
, 0);
2581 error
= uiomove(mtod(m
, caddr_t
),
2582 imin(uio_resid(uio
), m
->m_len
), uio
);
2584 } while (uio_resid(uio
) && error
== 0 && m
!= NULL
);
2590 if ((so
->so_options
& SO_WANTOOBFLAG
) != 0) {
2591 if (error
== EWOULDBLOCK
|| error
== EINVAL
) {
2593 * Let's try to get normal data:
2594 * EWOULDBLOCK: out-of-band data not
2595 * receive yet. EINVAL: out-of-band data
2600 } else if (error
== 0 && flagsp
!= NULL
) {
2604 socket_unlock(so
, 1);
2605 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_END
, error
,
2614 if (so
->so_state
& SS_ISCONFIRMING
&& uio_resid(uio
)) {
2615 (*pr
->pr_usrreqs
->pru_rcvd
)(so
, 0);
2619 delayed_copy_len
= 0;
2621 #ifdef MORE_LOCKING_DEBUG
2622 if (so
->so_usecount
<= 1)
2623 printf("soreceive: sblock so=0x%llx ref=%d on socket\n",
2624 (uint64_t)VM_KERNEL_ADDRPERM(so
), so
->so_usecount
);
2627 * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE)
2628 * and if so just return to the caller. This could happen when
2629 * soreceive() is called by a socket upcall function during the
2630 * time the socket is freed. The socket buffer would have been
2631 * locked across the upcall, therefore we cannot put this thread
2632 * to sleep (else we will deadlock) or return EWOULDBLOCK (else
2633 * we may livelock), because the lock on the socket buffer will
2634 * only be released when the upcall routine returns to its caller.
2635 * Because the socket has been officially closed, there can be
2636 * no further read on it.
2638 * A multipath subflow socket would have its SS_NOFDREF set by
2639 * default, so check for SOF_MP_SUBFLOW socket flag; when the
2640 * socket is closed for real, SOF_MP_SUBFLOW would be cleared.
2642 if ((so
->so_state
& (SS_NOFDREF
| SS_CANTRCVMORE
)) ==
2643 (SS_NOFDREF
| SS_CANTRCVMORE
) && !(so
->so_flags
& SOF_MP_SUBFLOW
)) {
2644 socket_unlock(so
, 1);
2648 error
= sblock(&so
->so_rcv
, SBLOCKWAIT(flags
));
2650 socket_unlock(so
, 1);
2651 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_END
, error
,
2656 m
= so
->so_rcv
.sb_mb
;
2658 * If we have less data than requested, block awaiting more
2659 * (subject to any timeout) if:
2660 * 1. the current count is less than the low water mark, or
2661 * 2. MSG_WAITALL is set, and it is possible to do the entire
2662 * receive operation at once if we block (resid <= hiwat).
2663 * 3. MSG_DONTWAIT is not set
2664 * If MSG_WAITALL is set but resid is larger than the receive buffer,
2665 * we have to do the receive in sections, and thus risk returning
2666 * a short count if a timeout or signal occurs after we start.
2668 if (m
== NULL
|| (((flags
& MSG_DONTWAIT
) == 0 &&
2669 so
->so_rcv
.sb_cc
< uio_resid(uio
)) &&
2670 (so
->so_rcv
.sb_cc
< so
->so_rcv
.sb_lowat
||
2671 ((flags
& MSG_WAITALL
) && uio_resid(uio
) <= so
->so_rcv
.sb_hiwat
)) &&
2672 m
->m_nextpkt
== NULL
&& (pr
->pr_flags
& PR_ATOMIC
) == 0)) {
2674 * Panic if we notice inconsistencies in the socket's
2675 * receive list; both sb_mb and sb_cc should correctly
2676 * reflect the contents of the list, otherwise we may
2677 * end up with false positives during select() or poll()
2678 * which could put the application in a bad state.
2680 SB_MB_CHECK(&so
->so_rcv
);
2685 error
= so
->so_error
;
2686 if ((flags
& MSG_PEEK
) == 0)
2690 if (so
->so_state
& SS_CANTRCVMORE
) {
2693 * Deal with half closed connections
2695 if ((so
->so_state
& SS_ISDISCONNECTED
) == 0 &&
2696 cfil_sock_data_pending(&so
->so_rcv
) != 0)
2698 "so %llx ignore SS_CANTRCVMORE",
2699 (uint64_t)VM_KERNEL_ADDRPERM(so
));
2701 #endif /* CONTENT_FILTER */
2707 for (; m
!= NULL
; m
= m
->m_next
)
2708 if (m
->m_type
== MT_OOBDATA
|| (m
->m_flags
& M_EOR
)) {
2709 m
= so
->so_rcv
.sb_mb
;
2712 if ((so
->so_state
& (SS_ISCONNECTED
|SS_ISCONNECTING
)) == 0 &&
2713 (so
->so_proto
->pr_flags
& PR_CONNREQUIRED
)) {
2717 if (uio_resid(uio
) == 0)
2719 if ((so
->so_state
& SS_NBIO
) ||
2720 (flags
& (MSG_DONTWAIT
|MSG_NBIO
))) {
2721 error
= EWOULDBLOCK
;
2724 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive sbwait 1");
2725 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive sbwait 1");
2726 sbunlock(&so
->so_rcv
, TRUE
); /* keep socket locked */
2727 #if EVEN_MORE_LOCKING_DEBUG
2729 printf("Waiting for socket data\n");
2732 error
= sbwait(&so
->so_rcv
);
2733 #if EVEN_MORE_LOCKING_DEBUG
2735 printf("SORECEIVE - sbwait returned %d\n", error
);
2737 if (so
->so_usecount
< 1) {
2738 panic("%s: after 2nd sblock so=%p ref=%d on socket\n",
2739 __func__
, so
, so
->so_usecount
);
2743 socket_unlock(so
, 1);
2744 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_END
, error
,
2751 OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgrcv
);
2752 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 1");
2753 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 1");
2754 nextrecord
= m
->m_nextpkt
;
2755 if ((pr
->pr_flags
& PR_ADDR
) && m
->m_type
== MT_SONAME
) {
2756 KASSERT(m
->m_type
== MT_SONAME
, ("receive 1a"));
2757 #if CONFIG_MACF_SOCKET_SUBSET
2759 * Call the MAC framework for policy checking if we're in
2760 * the user process context and the socket isn't connected.
2762 if (p
!= kernproc
&& !(so
->so_state
& SS_ISCONNECTED
)) {
2763 struct mbuf
*m0
= m
;
2765 * Dequeue this record (temporarily) from the receive
2766 * list since we're about to drop the socket's lock
2767 * where a new record may arrive and be appended to
2768 * the list. Upon MAC policy failure, the record
2769 * will be freed. Otherwise, we'll add it back to
2770 * the head of the list. We cannot rely on SB_LOCK
2771 * because append operation uses the socket's lock.
2774 m
->m_nextpkt
= NULL
;
2775 sbfree(&so
->so_rcv
, m
);
2777 } while (m
!= NULL
);
2779 so
->so_rcv
.sb_mb
= nextrecord
;
2780 SB_EMPTY_FIXUP(&so
->so_rcv
);
2781 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 1a");
2782 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 1a");
2783 socket_unlock(so
, 0);
2785 if (mac_socket_check_received(proc_ucred(p
), so
,
2786 mtod(m
, struct sockaddr
*)) != 0) {
2788 * MAC policy failure; free this record and
2789 * process the next record (or block until
2790 * one is available). We have adjusted sb_cc
2791 * and sb_mbcnt above so there is no need to
2792 * call sbfree() again.
2796 } while (m
!= NULL
);
2798 * Clear SB_LOCK but don't unlock the socket.
2799 * Process the next record or wait for one.
2802 sbunlock(&so
->so_rcv
, TRUE
); /* stay locked */
2807 * If the socket has been defunct'd, drop it.
2809 if (so
->so_flags
& SOF_DEFUNCT
) {
2815 * Re-adjust the socket receive list and re-enqueue
2816 * the record in front of any packets which may have
2817 * been appended while we dropped the lock.
2819 for (m
= m0
; m
->m_next
!= NULL
; m
= m
->m_next
)
2820 sballoc(&so
->so_rcv
, m
);
2821 sballoc(&so
->so_rcv
, m
);
2822 if (so
->so_rcv
.sb_mb
== NULL
) {
2823 so
->so_rcv
.sb_lastrecord
= m0
;
2824 so
->so_rcv
.sb_mbtail
= m
;
2827 nextrecord
= m
->m_nextpkt
= so
->so_rcv
.sb_mb
;
2828 so
->so_rcv
.sb_mb
= m
;
2829 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 1b");
2830 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 1b");
2832 #endif /* CONFIG_MACF_SOCKET_SUBSET */
2835 *psa
= dup_sockaddr(mtod(m
, struct sockaddr
*),
2837 if ((*psa
== NULL
) && (flags
& MSG_NEEDSA
)) {
2838 error
= EWOULDBLOCK
;
2842 if (flags
& MSG_PEEK
) {
2845 sbfree(&so
->so_rcv
, m
);
2846 if (m
->m_next
== NULL
&& so
->so_rcv
.sb_cc
!= 0) {
2847 panic("%s: about to create invalid socketbuf",
2851 MFREE(m
, so
->so_rcv
.sb_mb
);
2852 m
= so
->so_rcv
.sb_mb
;
2854 m
->m_nextpkt
= nextrecord
;
2856 so
->so_rcv
.sb_mb
= nextrecord
;
2857 SB_EMPTY_FIXUP(&so
->so_rcv
);
2863 * Process one or more MT_CONTROL mbufs present before any data mbufs
2864 * in the first mbuf chain on the socket buffer. If MSG_PEEK, we
2865 * just copy the data; if !MSG_PEEK, we call into the protocol to
2866 * perform externalization.
2868 if (m
!= NULL
&& m
->m_type
== MT_CONTROL
) {
2869 struct mbuf
*cm
= NULL
, *cmn
;
2870 struct mbuf
**cme
= &cm
;
2871 struct sockbuf
*sb_rcv
= &so
->so_rcv
;
2872 struct mbuf
**msgpcm
= NULL
;
2875 * Externalizing the control messages would require us to
2876 * drop the socket's lock below. Once we re-acquire the
2877 * lock, the mbuf chain might change. In order to preserve
2878 * consistency, we unlink all control messages from the
2879 * first mbuf chain in one shot and link them separately
2880 * onto a different chain.
2883 if (flags
& MSG_PEEK
) {
2884 if (controlp
!= NULL
) {
2885 if (*controlp
== NULL
) {
2888 *controlp
= m_copy(m
, 0, m
->m_len
);
2891 * If we failed to allocate an mbuf,
2892 * release any previously allocated
2893 * mbufs for control data. Return
2894 * an error. Keep the mbufs in the
2895 * socket as this is using
2898 if (*controlp
== NULL
) {
2903 controlp
= &(*controlp
)->m_next
;
2907 m
->m_nextpkt
= NULL
;
2909 sb_rcv
->sb_mb
= m
->m_next
;
2912 cme
= &(*cme
)->m_next
;
2915 } while (m
!= NULL
&& m
->m_type
== MT_CONTROL
);
2917 if (!(flags
& MSG_PEEK
)) {
2918 if (sb_rcv
->sb_mb
!= NULL
) {
2919 sb_rcv
->sb_mb
->m_nextpkt
= nextrecord
;
2921 sb_rcv
->sb_mb
= nextrecord
;
2922 SB_EMPTY_FIXUP(sb_rcv
);
2924 if (nextrecord
== NULL
)
2925 sb_rcv
->sb_lastrecord
= m
;
2928 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive ctl");
2929 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive ctl");
2931 while (cm
!= NULL
) {
2936 cmsg_type
= mtod(cm
, struct cmsghdr
*)->cmsg_type
;
2939 * Call the protocol to externalize SCM_RIGHTS message
2940 * and return the modified message to the caller upon
2941 * success. Otherwise, all other control messages are
2942 * returned unmodified to the caller. Note that we
2943 * only get into this loop if MSG_PEEK is not set.
2945 if (pr
->pr_domain
->dom_externalize
!= NULL
&&
2946 cmsg_type
== SCM_RIGHTS
) {
2948 * Release socket lock: see 3903171. This
2949 * would also allow more records to be appended
2950 * to the socket buffer. We still have SB_LOCK
2951 * set on it, so we can be sure that the head
2952 * of the mbuf chain won't change.
2954 socket_unlock(so
, 0);
2955 error
= (*pr
->pr_domain
->dom_externalize
)(cm
);
2961 if (controlp
!= NULL
&& error
== 0) {
2963 controlp
= &(*controlp
)->m_next
;
2971 * Update the value of nextrecord in case we received new
2972 * records when the socket was unlocked above for
2973 * externalizing SCM_RIGHTS.
2976 nextrecord
= sb_rcv
->sb_mb
->m_nextpkt
;
2978 nextrecord
= sb_rcv
->sb_mb
;
2983 * If the socket is a TCP socket with message delivery
2984 * enabled, then create a control msg to deliver the
2985 * relative TCP sequence number for this data. Waiting
2986 * until this point will protect against failures to
2987 * allocate an mbuf for control msgs.
2989 if (so
->so_type
== SOCK_STREAM
&& SOCK_PROTO(so
) == IPPROTO_TCP
&&
2990 (so
->so_flags
& SOF_ENABLE_MSGS
) && controlp
!= NULL
) {
2991 struct mbuf
*seq_cm
;
2993 seq_cm
= sbcreatecontrol((caddr_t
)&m
->m_pkthdr
.msg_seq
,
2994 sizeof (uint32_t), SCM_SEQNUM
, SOL_SOCKET
);
2995 if (seq_cm
== NULL
) {
2996 /* unable to allocate a control mbuf */
3001 controlp
= &seq_cm
->m_next
;
3005 if (!(flags
& MSG_PEEK
)) {
3007 * We get here because m points to an mbuf following
3008 * any MT_SONAME or MT_CONTROL mbufs which have been
3009 * processed above. In any case, m should be pointing
3010 * to the head of the mbuf chain, and the nextrecord
3011 * should be either NULL or equal to m->m_nextpkt.
3012 * See comments above about SB_LOCK.
3014 if (m
!= so
->so_rcv
.sb_mb
||
3015 m
->m_nextpkt
!= nextrecord
) {
3016 panic("%s: post-control !sync so=%p m=%p "
3017 "nextrecord=%p\n", __func__
, so
, m
,
3021 if (nextrecord
== NULL
)
3022 so
->so_rcv
.sb_lastrecord
= m
;
3025 if (type
== MT_OOBDATA
)
3028 if (!(flags
& MSG_PEEK
)) {
3029 SB_EMPTY_FIXUP(&so
->so_rcv
);
3032 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 2");
3033 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 2");
3038 if (!(flags
& MSG_PEEK
) && uio_resid(uio
) > sorecvmincopy
)
3046 (uio_resid(uio
) - delayed_copy_len
) > 0 && error
== 0) {
3047 if (m
->m_type
== MT_OOBDATA
) {
3048 if (type
!= MT_OOBDATA
)
3050 } else if (type
== MT_OOBDATA
) {
3054 * Make sure to allways set MSG_OOB event when getting
3055 * out of band data inline.
3057 if ((so
->so_options
& SO_WANTOOBFLAG
) != 0 &&
3058 (so
->so_options
& SO_OOBINLINE
) != 0 &&
3059 (so
->so_state
& SS_RCVATMARK
) != 0) {
3062 so
->so_state
&= ~SS_RCVATMARK
;
3063 len
= uio_resid(uio
) - delayed_copy_len
;
3064 if (so
->so_oobmark
&& len
> so
->so_oobmark
- offset
)
3065 len
= so
->so_oobmark
- offset
;
3066 if (len
> m
->m_len
- moff
)
3067 len
= m
->m_len
- moff
;
3069 * If mp is set, just pass back the mbufs.
3070 * Otherwise copy them out via the uio, then free.
3071 * Sockbuf must be consistent here (points to current mbuf,
3072 * it points to next record) when we drop priority;
3073 * we must note any additions to the sockbuf when we
3074 * block interrupts again.
3077 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive uiomove");
3078 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive uiomove");
3079 if (can_delay
&& len
== m
->m_len
) {
3081 * only delay the copy if we're consuming the
3082 * mbuf and we're NOT in MSG_PEEK mode
3083 * and we have enough data to make it worthwile
3084 * to drop and retake the lock... can_delay
3085 * reflects the state of the 2 latter
3086 * constraints moff should always be zero
3089 delayed_copy_len
+= len
;
3091 if (delayed_copy_len
) {
3092 error
= sodelayed_copy(so
, uio
,
3093 &free_list
, &delayed_copy_len
);
3099 * can only get here if MSG_PEEK is not
3100 * set therefore, m should point at the
3101 * head of the rcv queue; if it doesn't,
3102 * it means something drastically
3103 * changed while we were out from behind
3104 * the lock in sodelayed_copy. perhaps
3105 * a RST on the stream. in any event,
3106 * the stream has been interrupted. it's
3107 * probably best just to return whatever
3108 * data we've moved and let the caller
3111 if (m
!= so
->so_rcv
.sb_mb
) {
3115 socket_unlock(so
, 0);
3116 error
= uiomove(mtod(m
, caddr_t
) + moff
,
3124 uio_setresid(uio
, (uio_resid(uio
) - len
));
3126 if (len
== m
->m_len
- moff
) {
3127 if (m
->m_flags
& M_EOR
)
3129 if (flags
& MSG_PEEK
) {
3133 nextrecord
= m
->m_nextpkt
;
3134 sbfree(&so
->so_rcv
, m
);
3135 m
->m_nextpkt
= NULL
;
3138 * If this packet is an unordered packet
3139 * (indicated by M_UNORDERED_DATA flag), remove
3140 * the additional bytes added to the
3141 * receive socket buffer size.
3143 if ((so
->so_flags
& SOF_ENABLE_MSGS
) &&
3145 (m
->m_flags
& M_UNORDERED_DATA
) &&
3146 sbreserve(&so
->so_rcv
,
3147 so
->so_rcv
.sb_hiwat
- m
->m_len
)) {
3148 if (so
->so_msg_state
->msg_uno_bytes
>
3151 msg_uno_bytes
-= m
->m_len
;
3156 m
->m_flags
&= ~M_UNORDERED_DATA
;
3162 so
->so_rcv
.sb_mb
= m
= m
->m_next
;
3165 if (free_list
== NULL
)
3170 so
->so_rcv
.sb_mb
= m
= m
->m_next
;
3174 m
->m_nextpkt
= nextrecord
;
3175 if (nextrecord
== NULL
)
3176 so
->so_rcv
.sb_lastrecord
= m
;
3178 so
->so_rcv
.sb_mb
= nextrecord
;
3179 SB_EMPTY_FIXUP(&so
->so_rcv
);
3181 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 3");
3182 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 3");
3185 if (flags
& MSG_PEEK
) {
3191 if (flags
& MSG_DONTWAIT
)
3192 copy_flag
= M_DONTWAIT
;
3195 *mp
= m_copym(m
, 0, len
, copy_flag
);
3197 * Failed to allocate an mbuf?
3198 * Adjust uio_resid back, it was
3199 * adjusted down by len bytes which
3200 * we didn't copy over.
3204 (uio_resid(uio
) + len
));
3210 so
->so_rcv
.sb_cc
-= len
;
3213 if (so
->so_oobmark
) {
3214 if ((flags
& MSG_PEEK
) == 0) {
3215 so
->so_oobmark
-= len
;
3216 if (so
->so_oobmark
== 0) {
3217 so
->so_state
|= SS_RCVATMARK
;
3219 * delay posting the actual event until
3220 * after any delayed copy processing
3228 if (offset
== so
->so_oobmark
)
3232 if (flags
& MSG_EOR
)
3235 * If the MSG_WAITALL or MSG_WAITSTREAM flag is set
3236 * (for non-atomic socket), we must not quit until
3237 * "uio->uio_resid == 0" or an error termination.
3238 * If a signal/timeout occurs, return with a short
3239 * count but without error. Keep sockbuf locked
3240 * against other readers.
3242 while (flags
& (MSG_WAITALL
|MSG_WAITSTREAM
) && m
== NULL
&&
3243 (uio_resid(uio
) - delayed_copy_len
) > 0 &&
3244 !sosendallatonce(so
) && !nextrecord
) {
3245 if (so
->so_error
|| ((so
->so_state
& SS_CANTRCVMORE
)
3247 && cfil_sock_data_pending(&so
->so_rcv
) == 0
3248 #endif /* CONTENT_FILTER */
3253 * Depending on the protocol (e.g. TCP), the following
3254 * might cause the socket lock to be dropped and later
3255 * be reacquired, and more data could have arrived and
3256 * have been appended to the receive socket buffer by
3257 * the time it returns. Therefore, we only sleep in
3258 * sbwait() below if and only if the socket buffer is
3259 * empty, in order to avoid a false sleep.
3261 if (pr
->pr_flags
& PR_WANTRCVD
&& so
->so_pcb
&&
3262 (((struct inpcb
*)so
->so_pcb
)->inp_state
!=
3264 (*pr
->pr_usrreqs
->pru_rcvd
)(so
, flags
);
3266 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive sbwait 2");
3267 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive sbwait 2");
3269 if (so
->so_rcv
.sb_mb
== NULL
&& sbwait(&so
->so_rcv
)) {
3274 * have to wait until after we get back from the sbwait
3275 * to do the copy because we will drop the lock if we
3276 * have enough data that has been delayed... by dropping
3277 * the lock we open up a window allowing the netisr
3278 * thread to process the incoming packets and to change
3279 * the state of this socket... we're issuing the sbwait
3280 * because the socket is empty and we're expecting the
3281 * netisr thread to wake us up when more packets arrive;
3282 * if we allow that processing to happen and then sbwait
3283 * we could stall forever with packets sitting in the
3284 * socket if no further packets arrive from the remote
3287 * we want to copy before we've collected all the data
3288 * to satisfy this request to allow the copy to overlap
3289 * the incoming packet processing on an MP system
3291 if (delayed_copy_len
> sorecvmincopy
&&
3292 (delayed_copy_len
> (so
->so_rcv
.sb_hiwat
/ 2))) {
3293 error
= sodelayed_copy(so
, uio
,
3294 &free_list
, &delayed_copy_len
);
3299 m
= so
->so_rcv
.sb_mb
;
3301 nextrecord
= m
->m_nextpkt
;
3303 SB_MB_CHECK(&so
->so_rcv
);
3306 #ifdef MORE_LOCKING_DEBUG
3307 if (so
->so_usecount
<= 1) {
3308 panic("%s: after big while so=%p ref=%d on socket\n",
3309 __func__
, so
, so
->so_usecount
);
3314 if (m
!= NULL
&& pr
->pr_flags
& PR_ATOMIC
) {
3315 if (so
->so_options
& SO_DONTTRUNC
) {
3316 flags
|= MSG_RCVMORE
;
3319 if ((flags
& MSG_PEEK
) == 0)
3320 (void) sbdroprecord(&so
->so_rcv
);
3325 * pru_rcvd below (for TCP) may cause more data to be received
3326 * if the socket lock is dropped prior to sending the ACK; some
3327 * legacy OpenTransport applications don't handle this well
3328 * (if it receives less data than requested while MSG_HAVEMORE
3329 * is set), and so we set the flag now based on what we know
3330 * prior to calling pru_rcvd.
3332 if ((so
->so_options
& SO_WANTMORE
) && so
->so_rcv
.sb_cc
> 0)
3333 flags
|= MSG_HAVEMORE
;
3335 if ((flags
& MSG_PEEK
) == 0) {
3337 so
->so_rcv
.sb_mb
= nextrecord
;
3339 * First part is an inline SB_EMPTY_FIXUP(). Second
3340 * part makes sure sb_lastrecord is up-to-date if
3341 * there is still data in the socket buffer.
3343 if (so
->so_rcv
.sb_mb
== NULL
) {
3344 so
->so_rcv
.sb_mbtail
= NULL
;
3345 so
->so_rcv
.sb_lastrecord
= NULL
;
3346 } else if (nextrecord
->m_nextpkt
== NULL
) {
3347 so
->so_rcv
.sb_lastrecord
= nextrecord
;
3349 SB_MB_CHECK(&so
->so_rcv
);
3351 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 4");
3352 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 4");
3353 if (pr
->pr_flags
& PR_WANTRCVD
&& so
->so_pcb
)
3354 (*pr
->pr_usrreqs
->pru_rcvd
)(so
, flags
);
3357 if (delayed_copy_len
) {
3358 error
= sodelayed_copy(so
, uio
, &free_list
, &delayed_copy_len
);
3362 if (free_list
!= NULL
) {
3363 m_freem_list(free_list
);
3367 postevent(so
, 0, EV_OOB
);
3369 if (orig_resid
== uio_resid(uio
) && orig_resid
&&
3370 (flags
& MSG_EOR
) == 0 && (so
->so_state
& SS_CANTRCVMORE
) == 0) {
3371 sbunlock(&so
->so_rcv
, TRUE
); /* keep socket locked */
3378 #ifdef MORE_LOCKING_DEBUG
3379 if (so
->so_usecount
<= 1) {
3380 panic("%s: release so=%p ref=%d on socket\n", __func__
,
3381 so
, so
->so_usecount
);
3385 if (delayed_copy_len
)
3386 error
= sodelayed_copy(so
, uio
, &free_list
, &delayed_copy_len
);
3388 if (free_list
!= NULL
)
3389 m_freem_list(free_list
);
3391 sbunlock(&so
->so_rcv
, FALSE
); /* will unlock socket */
3393 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_END
, so
, uio_resid(uio
),
3394 so
->so_rcv
.sb_cc
, 0, error
);
3400 * Returns: 0 Success
3404 sodelayed_copy(struct socket
*so
, struct uio
*uio
, struct mbuf
**free_list
,
3405 user_ssize_t
*resid
)
3412 socket_unlock(so
, 0);
3414 while (m
!= NULL
&& error
== 0) {
3415 error
= uiomove(mtod(m
, caddr_t
), (int)m
->m_len
, uio
);
3418 m_freem_list(*free_list
);
3429 soreceive_list(struct socket
*so
, struct sockaddr
**psa
, struct uio
**uioarray
,
3430 u_int uiocnt
, struct mbuf
**mp0
, struct mbuf
**controlp
, int *flagsp
)
3432 struct mbuf
*m
, **mp
;
3433 struct mbuf
*nextrecord
;
3434 struct mbuf
*ml
= NULL
, *free_list
= NULL
;
3435 int flags
, error
, offset
;
3437 struct protosw
*pr
= so
->so_proto
;
3438 user_ssize_t orig_resid
, resid
;
3439 struct proc
*p
= current_proc();
3440 struct uio
*auio
= NULL
;
3444 KERNEL_DEBUG(DBG_FNC_SORECEIVE_LIST
| DBG_FUNC_START
,
3446 so
->so_rcv
.sb_cc
, so
->so_rcv
.sb_lowat
, so
->so_rcv
.sb_hiwat
);
3451 if (controlp
!= NULL
)
3454 flags
= *flagsp
&~ MSG_EOR
;
3458 * Disallow functionality not currently supported
3461 printf("%s mp0 not supported\n", __func__
);
3466 printf("%s sockaddr not supported\n", __func__
);
3470 if (controlp
!= NULL
) {
3471 printf("%s control not supported\n", __func__
);
3478 * - Only supports don't wait flags
3479 * - Only support datagram sockets (could be extended to raw)
3481 * - Protocol must support packet chains
3482 * - The uio array is NULL (should we panic?)
3484 if (flags
& ~(MSG_DONTWAIT
| MSG_NBIO
)) {
3485 printf("%s flags not supported\n", __func__
);
3489 if (so
->so_type
!= SOCK_DGRAM
) {
3493 if (sosendallatonce(so
) == 0) {
3497 if (so
->so_proto
->pr_usrreqs
->pru_send_list
== NULL
) {
3498 error
= EPROTONOSUPPORT
;
3501 if (uioarray
== NULL
) {
3502 printf("%s uioarray is NULL\n", __func__
);
3507 printf("%s uiocnt is 0\n", __func__
);
3512 * Sanity check on the length passed by caller as we are making 'int'
3515 resid
= orig_resid
= uio_array_resid(uioarray
, uiocnt
);
3516 if (orig_resid
< 0 || orig_resid
> INT_MAX
) {
3522 so_update_last_owner_locked(so
, p
);
3523 so_update_policy(so
);
3526 so_update_necp_policy(so
, NULL
, NULL
);
3530 * If a recv attempt is made on a previously-accepted socket
3531 * that has been marked as inactive (disconnected), reject
3534 if (so
->so_flags
& SOF_DEFUNCT
) {
3535 struct sockbuf
*sb
= &so
->so_rcv
;
3538 SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] (%d)\n",
3539 __func__
, proc_pid(p
), (uint64_t)VM_KERNEL_ADDRPERM(so
),
3540 SOCK_DOM(so
), SOCK_TYPE(so
), error
));
3542 * This socket should have been disconnected and flushed
3543 * prior to being returned from sodefunct(); there should
3544 * be no data on its receive list, so panic otherwise.
3546 if (so
->so_state
& SS_DEFUNCT
)
3547 sb_empty_assert(sb
, __func__
);
3554 * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE)
3555 * and if so just return to the caller. This could happen when
3556 * soreceive() is called by a socket upcall function during the
3557 * time the socket is freed. The socket buffer would have been
3558 * locked across the upcall, therefore we cannot put this thread
3559 * to sleep (else we will deadlock) or return EWOULDBLOCK (else
3560 * we may livelock), because the lock on the socket buffer will
3561 * only be released when the upcall routine returns to its caller.
3562 * Because the socket has been officially closed, there can be
3563 * no further read on it.
3565 if ((so
->so_state
& (SS_NOFDREF
| SS_CANTRCVMORE
)) ==
3566 (SS_NOFDREF
| SS_CANTRCVMORE
)) {
3571 error
= sblock(&so
->so_rcv
, SBLOCKWAIT(flags
));
3581 while (uio_resid(auio
) == 0) {
3589 m
= so
->so_rcv
.sb_mb
;
3591 * Block awaiting more datagram if needed
3595 * Panic if we notice inconsistencies in the socket's
3596 * receive list; both sb_mb and sb_cc should correctly
3597 * reflect the contents of the list, otherwise we may
3598 * end up with false positives during select() or poll()
3599 * which could put the application in a bad state.
3601 SB_MB_CHECK(&so
->so_rcv
);
3604 error
= so
->so_error
;
3607 if (so
->so_state
& SS_CANTRCVMORE
) {
3610 if ((so
->so_state
& (SS_ISCONNECTED
|SS_ISCONNECTING
)) == 0 &&
3611 (so
->so_proto
->pr_flags
& PR_CONNREQUIRED
)) {
3615 if ((so
->so_state
& SS_NBIO
) ||
3616 (flags
& (MSG_DONTWAIT
|MSG_NBIO
))) {
3617 error
= EWOULDBLOCK
;
3621 * Do not block if we got some data
3622 * Note: We could use MSG_WAITALL to wait
3624 resid
= uio_array_resid(uioarray
, uiocnt
);
3625 if (resid
!= orig_resid
) {
3630 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive sbwait 1");
3631 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive sbwait 1");
3633 sbunlock(&so
->so_rcv
, TRUE
); /* keep socket locked */
3636 error
= sbwait(&so
->so_rcv
);
3643 if (m
->m_pkthdr
.len
== 0) {
3644 printf("%s so %llx pkt %llx len is null\n",
3646 (uint64_t)VM_KERNEL_ADDRPERM(so
),
3647 (uint64_t)VM_KERNEL_ADDRPERM(m
));
3650 OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgrcv
);
3651 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 1");
3652 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 1");
3655 * Consume the current uio index as we have a datagram
3658 nextrecord
= m
->m_nextpkt
;
3660 #if SO_RECEIVE_LIST_SOCKADDR_NOT_YET
3661 if ((pr
->pr_flags
& PR_ADDR
) && m
->m_type
== MT_SONAME
) {
3663 * to be adapted from soreceive()
3666 #endif /* SO_RECEIVE_LIST_SOCKADDR_NOT_YET */
3668 #if SO_RECEIVE_LIST_CONTROL_NOT_YET
3670 * Process one or more MT_CONTROL mbufs present before any data mbufs
3671 * in the first mbuf chain on the socket buffer. If MSG_PEEK, we
3672 * just copy the data; if !MSG_PEEK, we call into the protocol to
3673 * perform externalization.
3675 if (m
!= NULL
&& m
->m_type
== MT_CONTROL
) {
3677 * to be adapted from soreceive()
3680 #endif /* SO_RECEIVE_LIST_CONTROL_NOT_YET */
3685 * Loop to copy out the mbufs of the current record
3687 while (m
!= NULL
&& uio_resid(auio
) > 0 && error
== 0) {
3688 len
= uio_resid(auio
);
3691 printf("%s: so %llx m %llx m_len is 0\n",
3693 (uint64_t)VM_KERNEL_ADDRPERM(so
),
3694 (uint64_t)VM_KERNEL_ADDRPERM(m
));
3697 * Clip to the residual length
3702 * If mp is set, just pass back the mbufs.
3703 * Otherwise copy them out via the uio, then free.
3704 * Sockbuf must be consistent here (points to current mbuf,
3705 * it points to next record) when we drop priority;
3706 * we must note any additions to the sockbuf when we
3707 * block interrupts again.
3710 uio_setresid(auio
, (uio_resid(auio
) - len
));
3712 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive uiomove");
3713 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive uiomove");
3715 socket_unlock(so
, 0);
3716 error
= uiomove(mtod(m
, caddr_t
), (int)len
, auio
);
3722 if (len
== m
->m_len
) {
3724 * m was entirely copied
3726 nextrecord
= m
->m_nextpkt
;
3727 sbfree(&so
->so_rcv
, m
);
3728 m
->m_nextpkt
= NULL
;
3736 so
->so_rcv
.sb_mb
= m
= m
->m_next
;
3739 if (free_list
== NULL
)
3744 so
->so_rcv
.sb_mb
= m
= m
->m_next
;
3746 ml
->m_nextpkt
= NULL
;
3749 m
->m_nextpkt
= nextrecord
;
3750 if (nextrecord
== NULL
)
3751 so
->so_rcv
.sb_lastrecord
= m
;
3753 so
->so_rcv
.sb_mb
= nextrecord
;
3754 SB_EMPTY_FIXUP(&so
->so_rcv
);
3756 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 3");
3757 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 3");
3760 * Stop the loop on partial copy
3765 if (flags
& MSG_DONTWAIT
)
3766 copy_flag
= M_DONTWAIT
;
3769 *mp
= m_copym(m
, 0, len
, copy_flag
);
3771 * Failed to allocate an mbuf?
3772 * Adjust uio_resid back, it was
3773 * adjusted down by len bytes which
3774 * we didn't copy over.
3778 (uio_resid(auio
) + len
));
3786 #ifdef MORE_LOCKING_DEBUG
3787 if (so
->so_usecount
<= 1) {
3788 panic("%s: after big while so=%llx ref=%d on socket\n",
3790 (uint64_t)VM_KERNEL_ADDRPERM(so
), so
->so_usecount
);
3795 * Tell the caller we made a partial copy
3798 if (so
->so_options
& SO_DONTTRUNC
) {
3801 so
->so_rcv
.sb_cc
-= len
;
3802 flags
|= MSG_RCVMORE
;
3804 (void) sbdroprecord(&so
->so_rcv
);
3805 nextrecord
= so
->so_rcv
.sb_mb
;
3812 so
->so_rcv
.sb_mb
= nextrecord
;
3814 * First part is an inline SB_EMPTY_FIXUP(). Second
3815 * part makes sure sb_lastrecord is up-to-date if
3816 * there is still data in the socket buffer.
3818 if (so
->so_rcv
.sb_mb
== NULL
) {
3819 so
->so_rcv
.sb_mbtail
= NULL
;
3820 so
->so_rcv
.sb_lastrecord
= NULL
;
3821 } else if (nextrecord
->m_nextpkt
== NULL
) {
3822 so
->so_rcv
.sb_lastrecord
= nextrecord
;
3824 SB_MB_CHECK(&so
->so_rcv
);
3826 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 4");
3827 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 4");
3830 * We can continue to the next packet as long as:
3831 * - We haven't exhausted the uio array
3832 * - There was no error
3833 * - A packet was not truncated
3834 * - We can still receive more data
3836 if (i
< uiocnt
&& error
== 0 &&
3837 (flags
& (MSG_RCVMORE
| MSG_TRUNC
)) == 0
3838 && (so
->so_state
& SS_CANTRCVMORE
) == 0) {
3839 sbunlock(&so
->so_rcv
, TRUE
); /* keep socket locked */
3847 * pru_rcvd may cause more data to be received if the socket lock
3848 * is dropped so we set MSG_HAVEMORE now based on what we know.
3849 * That way the caller won't be surprised if it receives less data than requested.
3851 if ((so
->so_options
& SO_WANTMORE
) && so
->so_rcv
.sb_cc
> 0)
3852 flags
|= MSG_HAVEMORE
;
3854 if (pr
->pr_flags
& PR_WANTRCVD
&& so
->so_pcb
)
3855 (*pr
->pr_usrreqs
->pru_rcvd
)(so
, flags
);
3860 sbunlock(&so
->so_rcv
, FALSE
); /* will unlock socket */
3862 socket_unlock(so
, 1);
3867 if (free_list
!= NULL
)
3868 m_freem_list(free_list
);
3870 KERNEL_DEBUG(DBG_FNC_SORECEIVE_LIST
| DBG_FUNC_END
, error
,
3876 * Returns: 0 Success
3879 * <pru_shutdown>:EINVAL
3880 * <pru_shutdown>:EADDRNOTAVAIL[TCP]
3881 * <pru_shutdown>:ENOBUFS[TCP]
3882 * <pru_shutdown>:EMSGSIZE[TCP]
3883 * <pru_shutdown>:EHOSTUNREACH[TCP]
3884 * <pru_shutdown>:ENETUNREACH[TCP]
3885 * <pru_shutdown>:ENETDOWN[TCP]
3886 * <pru_shutdown>:ENOMEM[TCP]
3887 * <pru_shutdown>:EACCES[TCP]
3888 * <pru_shutdown>:EMSGSIZE[TCP]
3889 * <pru_shutdown>:ENOBUFS[TCP]
3890 * <pru_shutdown>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
3891 * <pru_shutdown>:??? [other protocol families]
3894 soshutdown(struct socket
*so
, int how
)
3898 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN
| DBG_FUNC_START
, how
, 0, 0, 0, 0);
3906 (SS_ISCONNECTED
|SS_ISCONNECTING
|SS_ISDISCONNECTING
)) == 0) {
3909 error
= soshutdownlock(so
, how
);
3911 socket_unlock(so
, 1);
3918 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN
| DBG_FUNC_END
, how
, error
, 0, 0, 0);
3924 soshutdownlock_final(struct socket
*so
, int how
)
3926 struct protosw
*pr
= so
->so_proto
;
3929 sflt_notify(so
, sock_evt_shutdown
, &how
);
3931 if (how
!= SHUT_WR
) {
3932 if ((so
->so_state
& SS_CANTRCVMORE
) != 0) {
3933 /* read already shut down */
3938 postevent(so
, 0, EV_RCLOSED
);
3940 if (how
!= SHUT_RD
) {
3941 if ((so
->so_state
& SS_CANTSENDMORE
) != 0) {
3942 /* write already shut down */
3946 error
= (*pr
->pr_usrreqs
->pru_shutdown
)(so
);
3947 postevent(so
, 0, EV_WCLOSED
);
3950 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN
, how
, 1, 0, 0, 0);
3955 soshutdownlock(struct socket
*so
, int how
)
3961 * A content filter may delay the actual shutdown until it
3962 * has processed the pending data
3964 if (so
->so_flags
& SOF_CONTENT_FILTER
) {
3965 error
= cfil_sock_shutdown(so
, &how
);
3966 if (error
== EJUSTRETURN
) {
3969 } else if (error
!= 0) {
3973 #endif /* CONTENT_FILTER */
3975 error
= soshutdownlock_final(so
, how
);
3982 sowflush(struct socket
*so
)
3984 struct sockbuf
*sb
= &so
->so_snd
;
3986 lck_mtx_t
*mutex_held
;
3988 * XXX: This code is currently commented out, because we may get here
3989 * as part of sofreelastref(), and at that time, pr_getlock() may no
3990 * longer be able to return us the lock; this will be fixed in future.
3992 if (so
->so_proto
->pr_getlock
!= NULL
)
3993 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
3995 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
3997 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
4001 * Obtain lock on the socket buffer (SB_LOCK). This is required
4002 * to prevent the socket buffer from being unexpectedly altered
4003 * while it is used by another thread in socket send/receive.
4005 * sblock() must not fail here, hence the assertion.
4007 (void) sblock(sb
, SBL_WAIT
| SBL_NOINTR
| SBL_IGNDEFUNCT
);
4008 VERIFY(sb
->sb_flags
& SB_LOCK
);
4010 sb
->sb_flags
&= ~(SB_SEL
|SB_UPCALL
);
4011 sb
->sb_flags
|= SB_DROP
;
4012 sb
->sb_upcall
= NULL
;
4013 sb
->sb_upcallarg
= NULL
;
4015 sbunlock(sb
, TRUE
); /* keep socket locked */
4017 selthreadclear(&sb
->sb_sel
);
4022 sorflush(struct socket
*so
)
4024 struct sockbuf
*sb
= &so
->so_rcv
;
4025 struct protosw
*pr
= so
->so_proto
;
4028 lck_mtx_t
*mutex_held
;
4030 * XXX: This code is currently commented out, because we may get here
4031 * as part of sofreelastref(), and at that time, pr_getlock() may no
4032 * longer be able to return us the lock; this will be fixed in future.
4034 if (so
->so_proto
->pr_getlock
!= NULL
)
4035 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
4037 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
4039 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
4042 sflt_notify(so
, sock_evt_flush_read
, NULL
);
4047 * Obtain lock on the socket buffer (SB_LOCK). This is required
4048 * to prevent the socket buffer from being unexpectedly altered
4049 * while it is used by another thread in socket send/receive.
4051 * sblock() must not fail here, hence the assertion.
4053 (void) sblock(sb
, SBL_WAIT
| SBL_NOINTR
| SBL_IGNDEFUNCT
);
4054 VERIFY(sb
->sb_flags
& SB_LOCK
);
4057 * Copy only the relevant fields from "sb" to "asb" which we
4058 * need for sbrelease() to function. In particular, skip
4059 * sb_sel as it contains the wait queue linkage, which would
4060 * wreak havoc if we were to issue selthreadclear() on "asb".
4061 * Make sure to not carry over SB_LOCK in "asb", as we need
4062 * to acquire it later as part of sbrelease().
4064 bzero(&asb
, sizeof (asb
));
4065 asb
.sb_cc
= sb
->sb_cc
;
4066 asb
.sb_hiwat
= sb
->sb_hiwat
;
4067 asb
.sb_mbcnt
= sb
->sb_mbcnt
;
4068 asb
.sb_mbmax
= sb
->sb_mbmax
;
4069 asb
.sb_ctl
= sb
->sb_ctl
;
4070 asb
.sb_lowat
= sb
->sb_lowat
;
4071 asb
.sb_mb
= sb
->sb_mb
;
4072 asb
.sb_mbtail
= sb
->sb_mbtail
;
4073 asb
.sb_lastrecord
= sb
->sb_lastrecord
;
4074 asb
.sb_so
= sb
->sb_so
;
4075 asb
.sb_flags
= sb
->sb_flags
;
4076 asb
.sb_flags
&= ~(SB_LOCK
|SB_SEL
|SB_KNOTE
|SB_UPCALL
);
4077 asb
.sb_flags
|= SB_DROP
;
4080 * Ideally we'd bzero() these and preserve the ones we need;
4081 * but to do that we'd need to shuffle things around in the
4082 * sockbuf, and we can't do it now because there are KEXTS
4083 * that are directly referring to the socket structure.
4085 * Setting SB_DROP acts as a barrier to prevent further appends.
4086 * Clearing SB_SEL is done for selthreadclear() below.
4095 sb
->sb_mbtail
= NULL
;
4096 sb
->sb_lastrecord
= NULL
;
4097 sb
->sb_timeo
.tv_sec
= 0;
4098 sb
->sb_timeo
.tv_usec
= 0;
4099 sb
->sb_upcall
= NULL
;
4100 sb
->sb_upcallarg
= NULL
;
4101 sb
->sb_flags
&= ~(SB_SEL
|SB_UPCALL
);
4102 sb
->sb_flags
|= SB_DROP
;
4104 sbunlock(sb
, TRUE
); /* keep socket locked */
4107 * Note that selthreadclear() is called on the original "sb" and
4108 * not the local "asb" because of the way wait queue linkage is
4109 * implemented. Given that selwakeup() may be triggered, SB_SEL
4110 * should no longer be set (cleared above.)
4112 selthreadclear(&sb
->sb_sel
);
4114 if ((pr
->pr_flags
& PR_RIGHTS
) && pr
->pr_domain
->dom_dispose
)
4115 (*pr
->pr_domain
->dom_dispose
)(asb
.sb_mb
);
4121 * Perhaps this routine, and sooptcopyout(), below, ought to come in
4122 * an additional variant to handle the case where the option value needs
4123 * to be some kind of integer, but not a specific size.
4124 * In addition to their use here, these functions are also called by the
4125 * protocol-level pr_ctloutput() routines.
4127 * Returns: 0 Success
4132 sooptcopyin(struct sockopt
*sopt
, void *buf
, size_t len
, size_t minlen
)
4137 * If the user gives us more than we wanted, we ignore it,
4138 * but if we don't get the minimum length the caller
4139 * wants, we return EINVAL. On success, sopt->sopt_valsize
4140 * is set to however much we actually retrieved.
4142 if ((valsize
= sopt
->sopt_valsize
) < minlen
)
4145 sopt
->sopt_valsize
= valsize
= len
;
4147 if (sopt
->sopt_p
!= kernproc
)
4148 return (copyin(sopt
->sopt_val
, buf
, valsize
));
4150 bcopy(CAST_DOWN(caddr_t
, sopt
->sopt_val
), buf
, valsize
);
4155 * sooptcopyin_timeval
4156 * Copy in a timeval value into tv_p, and take into account whether the
4157 * the calling process is 64-bit or 32-bit. Moved the sanity checking
4158 * code here so that we can verify the 64-bit tv_sec value before we lose
4159 * the top 32-bits assigning tv64.tv_sec to tv_p->tv_sec.
4162 sooptcopyin_timeval(struct sockopt
*sopt
, struct timeval
*tv_p
)
4166 if (proc_is64bit(sopt
->sopt_p
)) {
4167 struct user64_timeval tv64
;
4169 if (sopt
->sopt_valsize
< sizeof (tv64
))
4172 sopt
->sopt_valsize
= sizeof (tv64
);
4173 if (sopt
->sopt_p
!= kernproc
) {
4174 error
= copyin(sopt
->sopt_val
, &tv64
, sizeof (tv64
));
4178 bcopy(CAST_DOWN(caddr_t
, sopt
->sopt_val
), &tv64
,
4181 if (tv64
.tv_sec
< 0 || tv64
.tv_sec
> LONG_MAX
||
4182 tv64
.tv_usec
< 0 || tv64
.tv_usec
>= 1000000)
4185 tv_p
->tv_sec
= tv64
.tv_sec
;
4186 tv_p
->tv_usec
= tv64
.tv_usec
;
4188 struct user32_timeval tv32
;
4190 if (sopt
->sopt_valsize
< sizeof (tv32
))
4193 sopt
->sopt_valsize
= sizeof (tv32
);
4194 if (sopt
->sopt_p
!= kernproc
) {
4195 error
= copyin(sopt
->sopt_val
, &tv32
, sizeof (tv32
));
4200 bcopy(CAST_DOWN(caddr_t
, sopt
->sopt_val
), &tv32
,
4205 * K64todo "comparison is always false due to
4206 * limited range of data type"
4208 if (tv32
.tv_sec
< 0 || tv32
.tv_sec
> LONG_MAX
||
4209 tv32
.tv_usec
< 0 || tv32
.tv_usec
>= 1000000)
4212 tv_p
->tv_sec
= tv32
.tv_sec
;
4213 tv_p
->tv_usec
= tv32
.tv_usec
;
4219 * Returns: 0 Success
4224 * sooptcopyin:EINVAL
4225 * sooptcopyin:EFAULT
4226 * sooptcopyin_timeval:EINVAL
4227 * sooptcopyin_timeval:EFAULT
4228 * sooptcopyin_timeval:EDOM
4229 * <pr_ctloutput>:EOPNOTSUPP[AF_UNIX]
4230 * <pr_ctloutput>:???w
4231 * sflt_attach_private:??? [whatever a filter author chooses]
4232 * <sf_setoption>:??? [whatever a filter author chooses]
4234 * Notes: Other <pru_listen> returns depend on the protocol family; all
4235 * <sf_listen> returns depend on what the filter author causes
4236 * their filter to return.
4239 sosetoptlock(struct socket
*so
, struct sockopt
*sopt
, int dolock
)
4244 #if CONFIG_MACF_SOCKET
4246 #endif /* MAC_SOCKET */
4248 if (sopt
->sopt_dir
!= SOPT_SET
)
4249 sopt
->sopt_dir
= SOPT_SET
;
4254 if ((so
->so_state
& (SS_CANTRCVMORE
| SS_CANTSENDMORE
)) ==
4255 (SS_CANTRCVMORE
| SS_CANTSENDMORE
) &&
4256 (so
->so_flags
& SOF_NPX_SETOPTSHUT
) == 0) {
4257 /* the socket has been shutdown, no more sockopt's */
4262 error
= sflt_setsockopt(so
, sopt
);
4264 if (error
== EJUSTRETURN
)
4269 if (sopt
->sopt_level
!= SOL_SOCKET
) {
4270 if (so
->so_proto
!= NULL
&&
4271 so
->so_proto
->pr_ctloutput
!= NULL
) {
4272 error
= (*so
->so_proto
->pr_ctloutput
)(so
, sopt
);
4275 error
= ENOPROTOOPT
;
4278 * Allow socket-level (SOL_SOCKET) options to be filtered by
4279 * the protocol layer, if needed. A zero value returned from
4280 * the handler means use default socket-level processing as
4281 * done by the rest of this routine. Otherwise, any other
4282 * return value indicates that the option is unsupported.
4284 if (so
->so_proto
!= NULL
&& (error
= so
->so_proto
->pr_usrreqs
->
4285 pru_socheckopt(so
, sopt
)) != 0)
4289 switch (sopt
->sopt_name
) {
4292 error
= sooptcopyin(sopt
, &l
, sizeof (l
), sizeof (l
));
4296 so
->so_linger
= (sopt
->sopt_name
== SO_LINGER
) ?
4297 l
.l_linger
: l
.l_linger
* hz
;
4299 so
->so_options
|= SO_LINGER
;
4301 so
->so_options
&= ~SO_LINGER
;
4307 case SO_USELOOPBACK
:
4313 case SO_TIMESTAMP_MONOTONIC
:
4316 case SO_WANTOOBFLAG
:
4317 case SO_NOWAKEFROMSLEEP
:
4318 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
4323 so
->so_options
|= sopt
->sopt_name
;
4325 so
->so_options
&= ~sopt
->sopt_name
;
4332 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
4338 * Values < 1 make no sense for any of these
4339 * options, so disallow them.
4346 switch (sopt
->sopt_name
) {
4349 struct sockbuf
*sb
=
4350 (sopt
->sopt_name
== SO_SNDBUF
) ?
4351 &so
->so_snd
: &so
->so_rcv
;
4352 if (sbreserve(sb
, (u_int32_t
)optval
) == 0) {
4356 sb
->sb_flags
|= SB_USRSIZE
;
4357 sb
->sb_flags
&= ~SB_AUTOSIZE
;
4358 sb
->sb_idealsize
= (u_int32_t
)optval
;
4362 * Make sure the low-water is never greater than
4366 int space
= sbspace(&so
->so_snd
);
4367 u_int32_t hiwat
= so
->so_snd
.sb_hiwat
;
4369 if (so
->so_snd
.sb_flags
& SB_UNIX
) {
4371 (struct unpcb
*)(so
->so_pcb
);
4372 if (unp
!= NULL
&& unp
->unp_conn
!= NULL
) {
4373 hiwat
+= unp
->unp_conn
->unp_cc
;
4377 so
->so_snd
.sb_lowat
=
4381 if (space
>= so
->so_snd
.sb_lowat
) {
4388 so
->so_rcv
.sb_lowat
=
4389 (optval
> so
->so_rcv
.sb_hiwat
) ?
4390 so
->so_rcv
.sb_hiwat
: optval
;
4391 data_len
= so
->so_rcv
.sb_cc
4392 - so
->so_rcv
.sb_ctl
;
4393 if (data_len
>= so
->so_rcv
.sb_lowat
)
4402 error
= sooptcopyin_timeval(sopt
, &tv
);
4406 switch (sopt
->sopt_name
) {
4408 so
->so_snd
.sb_timeo
= tv
;
4411 so
->so_rcv
.sb_timeo
= tv
;
4419 error
= sooptcopyin(sopt
, &nke
, sizeof (nke
),
4424 error
= sflt_attach_internal(so
, nke
.nke_handle
);
4429 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
4434 so
->so_flags
|= SOF_NOSIGPIPE
;
4436 so
->so_flags
&= ~SOF_NOSIGPIPE
;
4440 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
4445 so
->so_flags
|= SOF_NOADDRAVAIL
;
4447 so
->so_flags
&= ~SOF_NOADDRAVAIL
;
4450 case SO_REUSESHAREUID
:
4451 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
4456 so
->so_flags
|= SOF_REUSESHAREUID
;
4458 so
->so_flags
&= ~SOF_REUSESHAREUID
;
4461 case SO_NOTIFYCONFLICT
:
4462 if (kauth_cred_issuser(kauth_cred_get()) == 0) {
4466 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
4471 so
->so_flags
|= SOF_NOTIFYCONFLICT
;
4473 so
->so_flags
&= ~SOF_NOTIFYCONFLICT
;
4476 case SO_RESTRICTIONS
:
4477 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
4482 error
= so_set_restrictions(so
, optval
);
4485 case SO_AWDL_UNRESTRICTED
:
4486 if (SOCK_DOM(so
) != PF_INET
&&
4487 SOCK_DOM(so
) != PF_INET6
) {
4491 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
4496 kauth_cred_t cred
= NULL
;
4497 proc_t ep
= PROC_NULL
;
4499 if (so
->so_flags
& SOF_DELEGATED
) {
4500 ep
= proc_find(so
->e_pid
);
4502 cred
= kauth_cred_proc_ref(ep
);
4504 error
= priv_check_cred(
4505 cred
? cred
: so
->so_cred
,
4506 PRIV_NET_RESTRICTED_AWDL
, 0);
4508 inp_set_awdl_unrestricted(
4511 kauth_cred_unref(&cred
);
4512 if (ep
!= PROC_NULL
)
4515 inp_clear_awdl_unrestricted(sotoinpcb(so
));
4519 #if CONFIG_MACF_SOCKET
4520 if ((error
= sooptcopyin(sopt
, &extmac
, sizeof (extmac
),
4521 sizeof (extmac
))) != 0)
4524 error
= mac_setsockopt_label(proc_ucred(sopt
->sopt_p
),
4528 #endif /* MAC_SOCKET */
4531 case SO_UPCALLCLOSEWAIT
:
4532 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
4537 so
->so_flags
|= SOF_UPCALLCLOSEWAIT
;
4539 so
->so_flags
&= ~SOF_UPCALLCLOSEWAIT
;
4543 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
4548 so
->so_flags
|= SOF_BINDRANDOMPORT
;
4550 so
->so_flags
&= ~SOF_BINDRANDOMPORT
;
4553 case SO_NP_EXTENSIONS
: {
4554 struct so_np_extensions sonpx
;
4556 error
= sooptcopyin(sopt
, &sonpx
, sizeof (sonpx
),
4560 if (sonpx
.npx_mask
& ~SONPX_MASK_VALID
) {
4565 * Only one bit defined for now
4567 if ((sonpx
.npx_mask
& SONPX_SETOPTSHUT
)) {
4568 if ((sonpx
.npx_flags
& SONPX_SETOPTSHUT
))
4569 so
->so_flags
|= SOF_NPX_SETOPTSHUT
;
4571 so
->so_flags
&= ~SOF_NPX_SETOPTSHUT
;
4576 case SO_TRAFFIC_CLASS
: {
4577 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
4581 error
= so_set_traffic_class(so
, optval
);
4587 case SO_RECV_TRAFFIC_CLASS
: {
4588 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
4593 so
->so_flags
&= ~SOF_RECV_TRAFFIC_CLASS
;
4595 so
->so_flags
|= SOF_RECV_TRAFFIC_CLASS
;
4599 case SO_TRAFFIC_CLASS_DBG
: {
4600 struct so_tcdbg so_tcdbg
;
4602 error
= sooptcopyin(sopt
, &so_tcdbg
,
4603 sizeof (struct so_tcdbg
), sizeof (struct so_tcdbg
));
4606 error
= so_set_tcdbg(so
, &so_tcdbg
);
4612 case SO_PRIVILEGED_TRAFFIC_CLASS
:
4613 error
= priv_check_cred(kauth_cred_get(),
4614 PRIV_NET_PRIVILEGED_TRAFFIC_CLASS
, 0);
4617 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
4622 so
->so_flags
&= ~SOF_PRIVILEGED_TRAFFIC_CLASS
;
4624 so
->so_flags
|= SOF_PRIVILEGED_TRAFFIC_CLASS
;
4628 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
4630 if (error
!= 0 || (so
->so_flags
& SOF_DEFUNCT
)) {
4636 * Any process can set SO_DEFUNCTOK (clear
4637 * SOF_NODEFUNCT), but only root can clear
4638 * SO_DEFUNCTOK (set SOF_NODEFUNCT).
4641 kauth_cred_issuser(kauth_cred_get()) == 0) {
4646 so
->so_flags
&= ~SOF_NODEFUNCT
;
4648 so
->so_flags
|= SOF_NODEFUNCT
;
4650 if (SOCK_DOM(so
) == PF_INET
||
4651 SOCK_DOM(so
) == PF_INET6
) {
4652 char s
[MAX_IPv6_STR_LEN
];
4653 char d
[MAX_IPv6_STR_LEN
];
4654 struct inpcb
*inp
= sotoinpcb(so
);
4656 SODEFUNCTLOG(("%s[%d]: so 0x%llx [%s %s:%d -> "
4657 "%s:%d] is now marked as %seligible for "
4658 "defunct\n", __func__
, proc_selfpid(),
4659 (uint64_t)VM_KERNEL_ADDRPERM(so
),
4660 (SOCK_TYPE(so
) == SOCK_STREAM
) ?
4661 "TCP" : "UDP", inet_ntop(SOCK_DOM(so
),
4662 ((SOCK_DOM(so
) == PF_INET
) ?
4663 (void *)&inp
->inp_laddr
.s_addr
:
4664 (void *)&inp
->in6p_laddr
), s
, sizeof (s
)),
4665 ntohs(inp
->in6p_lport
),
4666 inet_ntop(SOCK_DOM(so
),
4667 (SOCK_DOM(so
) == PF_INET
) ?
4668 (void *)&inp
->inp_faddr
.s_addr
:
4669 (void *)&inp
->in6p_faddr
, d
, sizeof (d
)),
4670 ntohs(inp
->in6p_fport
),
4671 (so
->so_flags
& SOF_NODEFUNCT
) ?
4674 SODEFUNCTLOG(("%s[%d]: so 0x%llx [%d,%d] is "
4675 "now marked as %seligible for defunct\n",
4676 __func__
, proc_selfpid(),
4677 (uint64_t)VM_KERNEL_ADDRPERM(so
),
4678 SOCK_DOM(so
), SOCK_TYPE(so
),
4679 (so
->so_flags
& SOF_NODEFUNCT
) ?
4685 /* This option is not settable */
4689 case SO_OPPORTUNISTIC
:
4690 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
4693 error
= so_set_opportunistic(so
, optval
);
4697 /* This option is handled by lower layer(s) */
4702 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
4705 error
= so_set_recv_anyif(so
, optval
);
4708 case SO_TRAFFIC_MGT_BACKGROUND
: {
4709 /* This option is handled by lower layer(s) */
4715 case SO_FLOW_DIVERT_TOKEN
:
4716 error
= flow_divert_token_set(so
, sopt
);
4718 #endif /* FLOW_DIVERT */
4722 if ((error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
4723 sizeof (optval
))) != 0)
4726 error
= so_set_effective_pid(so
, optval
, sopt
->sopt_p
);
4729 case SO_DELEGATED_UUID
: {
4732 if ((error
= sooptcopyin(sopt
, &euuid
, sizeof (euuid
),
4733 sizeof (euuid
))) != 0)
4736 error
= so_set_effective_uuid(so
, euuid
, sopt
->sopt_p
);
4741 case SO_NECP_ATTRIBUTES
:
4742 error
= necp_set_socket_attributes(so
, sopt
);
4747 case SO_MPTCP_FASTJOIN
:
4748 if (!((so
->so_flags
& SOF_MP_SUBFLOW
) ||
4749 ((SOCK_CHECK_DOM(so
, PF_MULTIPATH
)) &&
4750 (SOCK_CHECK_PROTO(so
, IPPROTO_TCP
))))) {
4751 error
= ENOPROTOOPT
;
4755 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
4760 so
->so_flags
&= ~SOF_MPTCP_FASTJOIN
;
4762 so
->so_flags
|= SOF_MPTCP_FASTJOIN
;
4767 error
= ENOPROTOOPT
;
4770 if (error
== 0 && so
->so_proto
!= NULL
&&
4771 so
->so_proto
->pr_ctloutput
!= NULL
) {
4772 (void) so
->so_proto
->pr_ctloutput(so
, sopt
);
4777 socket_unlock(so
, 1);
4781 /* Helper routines for getsockopt */
4783 sooptcopyout(struct sockopt
*sopt
, void *buf
, size_t len
)
4791 * Documented get behavior is that we always return a value,
4792 * possibly truncated to fit in the user's buffer.
4793 * Traditional behavior is that we always tell the user
4794 * precisely how much we copied, rather than something useful
4795 * like the total amount we had available for her.
4796 * Note that this interface is not idempotent; the entire answer must
4797 * generated ahead of time.
4799 valsize
= min(len
, sopt
->sopt_valsize
);
4800 sopt
->sopt_valsize
= valsize
;
4801 if (sopt
->sopt_val
!= USER_ADDR_NULL
) {
4802 if (sopt
->sopt_p
!= kernproc
)
4803 error
= copyout(buf
, sopt
->sopt_val
, valsize
);
4805 bcopy(buf
, CAST_DOWN(caddr_t
, sopt
->sopt_val
), valsize
);
4811 sooptcopyout_timeval(struct sockopt
*sopt
, const struct timeval
*tv_p
)
4815 struct user64_timeval tv64
;
4816 struct user32_timeval tv32
;
4821 if (proc_is64bit(sopt
->sopt_p
)) {
4822 len
= sizeof (tv64
);
4823 tv64
.tv_sec
= tv_p
->tv_sec
;
4824 tv64
.tv_usec
= tv_p
->tv_usec
;
4827 len
= sizeof (tv32
);
4828 tv32
.tv_sec
= tv_p
->tv_sec
;
4829 tv32
.tv_usec
= tv_p
->tv_usec
;
4832 valsize
= min(len
, sopt
->sopt_valsize
);
4833 sopt
->sopt_valsize
= valsize
;
4834 if (sopt
->sopt_val
!= USER_ADDR_NULL
) {
4835 if (sopt
->sopt_p
!= kernproc
)
4836 error
= copyout(val
, sopt
->sopt_val
, valsize
);
4838 bcopy(val
, CAST_DOWN(caddr_t
, sopt
->sopt_val
), valsize
);
4846 * <pr_ctloutput>:EOPNOTSUPP[AF_UNIX]
4847 * <pr_ctloutput>:???
4848 * <sf_getoption>:???
4851 sogetoptlock(struct socket
*so
, struct sockopt
*sopt
, int dolock
)
4856 #if CONFIG_MACF_SOCKET
4858 #endif /* MAC_SOCKET */
4860 if (sopt
->sopt_dir
!= SOPT_GET
)
4861 sopt
->sopt_dir
= SOPT_GET
;
4866 error
= sflt_getsockopt(so
, sopt
);
4868 if (error
== EJUSTRETURN
)
4873 if (sopt
->sopt_level
!= SOL_SOCKET
) {
4874 if (so
->so_proto
!= NULL
&&
4875 so
->so_proto
->pr_ctloutput
!= NULL
) {
4876 error
= (*so
->so_proto
->pr_ctloutput
)(so
, sopt
);
4879 error
= ENOPROTOOPT
;
4882 * Allow socket-level (SOL_SOCKET) options to be filtered by
4883 * the protocol layer, if needed. A zero value returned from
4884 * the handler means use default socket-level processing as
4885 * done by the rest of this routine. Otherwise, any other
4886 * return value indicates that the option is unsupported.
4888 if (so
->so_proto
!= NULL
&& (error
= so
->so_proto
->pr_usrreqs
->
4889 pru_socheckopt(so
, sopt
)) != 0)
4893 switch (sopt
->sopt_name
) {
4896 l
.l_onoff
= ((so
->so_options
& SO_LINGER
) ? 1 : 0);
4897 l
.l_linger
= (sopt
->sopt_name
== SO_LINGER
) ?
4898 so
->so_linger
: so
->so_linger
/ hz
;
4899 error
= sooptcopyout(sopt
, &l
, sizeof (l
));
4902 case SO_USELOOPBACK
:
4911 case SO_TIMESTAMP_MONOTONIC
:
4914 case SO_WANTOOBFLAG
:
4915 case SO_NOWAKEFROMSLEEP
:
4916 optval
= so
->so_options
& sopt
->sopt_name
;
4918 error
= sooptcopyout(sopt
, &optval
, sizeof (optval
));
4922 optval
= so
->so_type
;
4926 if (so
->so_proto
->pr_flags
& PR_ATOMIC
) {
4931 m1
= so
->so_rcv
.sb_mb
;
4932 while (m1
!= NULL
) {
4933 if (m1
->m_type
== MT_DATA
||
4934 m1
->m_type
== MT_HEADER
||
4935 m1
->m_type
== MT_OOBDATA
)
4936 pkt_total
+= m1
->m_len
;
4941 optval
= so
->so_rcv
.sb_cc
- so
->so_rcv
.sb_ctl
;
4946 if (so
->so_proto
->pr_flags
& PR_ATOMIC
) {
4950 m1
= so
->so_rcv
.sb_mb
;
4951 while (m1
!= NULL
) {
4952 if (m1
->m_type
== MT_DATA
||
4953 m1
->m_type
== MT_HEADER
||
4954 m1
->m_type
== MT_OOBDATA
)
4966 optval
= so
->so_snd
.sb_cc
;
4970 optval
= so
->so_error
;
4975 u_int32_t hiwat
= so
->so_snd
.sb_hiwat
;
4977 if (so
->so_snd
.sb_flags
& SB_UNIX
) {
4979 (struct unpcb
*)(so
->so_pcb
);
4980 if (unp
!= NULL
&& unp
->unp_conn
!= NULL
) {
4981 hiwat
+= unp
->unp_conn
->unp_cc
;
4989 optval
= so
->so_rcv
.sb_hiwat
;
4993 optval
= so
->so_snd
.sb_lowat
;
4997 optval
= so
->so_rcv
.sb_lowat
;
5002 tv
= (sopt
->sopt_name
== SO_SNDTIMEO
?
5003 so
->so_snd
.sb_timeo
: so
->so_rcv
.sb_timeo
);
5005 error
= sooptcopyout_timeval(sopt
, &tv
);
5009 optval
= (so
->so_flags
& SOF_NOSIGPIPE
);
5013 optval
= (so
->so_flags
& SOF_NOADDRAVAIL
);
5016 case SO_REUSESHAREUID
:
5017 optval
= (so
->so_flags
& SOF_REUSESHAREUID
);
5021 case SO_NOTIFYCONFLICT
:
5022 optval
= (so
->so_flags
& SOF_NOTIFYCONFLICT
);
5025 case SO_RESTRICTIONS
:
5026 optval
= so_get_restrictions(so
);
5029 case SO_AWDL_UNRESTRICTED
:
5030 if (SOCK_DOM(so
) == PF_INET
||
5031 SOCK_DOM(so
) == PF_INET6
) {
5032 optval
= inp_get_awdl_unrestricted(
5040 #if CONFIG_MACF_SOCKET
5041 if ((error
= sooptcopyin(sopt
, &extmac
, sizeof (extmac
),
5042 sizeof (extmac
))) != 0 ||
5043 (error
= mac_socket_label_get(proc_ucred(
5044 sopt
->sopt_p
), so
, &extmac
)) != 0)
5047 error
= sooptcopyout(sopt
, &extmac
, sizeof (extmac
));
5050 #endif /* MAC_SOCKET */
5054 #if CONFIG_MACF_SOCKET
5055 if ((error
= sooptcopyin(sopt
, &extmac
, sizeof (extmac
),
5056 sizeof (extmac
))) != 0 ||
5057 (error
= mac_socketpeer_label_get(proc_ucred(
5058 sopt
->sopt_p
), so
, &extmac
)) != 0)
5061 error
= sooptcopyout(sopt
, &extmac
, sizeof (extmac
));
5064 #endif /* MAC_SOCKET */
5067 #ifdef __APPLE_API_PRIVATE
5068 case SO_UPCALLCLOSEWAIT
:
5069 optval
= (so
->so_flags
& SOF_UPCALLCLOSEWAIT
);
5073 optval
= (so
->so_flags
& SOF_BINDRANDOMPORT
);
5076 case SO_NP_EXTENSIONS
: {
5077 struct so_np_extensions sonpx
;
5079 sonpx
.npx_flags
= (so
->so_flags
& SOF_NPX_SETOPTSHUT
) ?
5080 SONPX_SETOPTSHUT
: 0;
5081 sonpx
.npx_mask
= SONPX_MASK_VALID
;
5083 error
= sooptcopyout(sopt
, &sonpx
,
5084 sizeof (struct so_np_extensions
));
5088 case SO_TRAFFIC_CLASS
:
5089 optval
= so
->so_traffic_class
;
5092 case SO_RECV_TRAFFIC_CLASS
:
5093 optval
= (so
->so_flags
& SOF_RECV_TRAFFIC_CLASS
);
5096 case SO_TRAFFIC_CLASS_STATS
:
5097 error
= sooptcopyout(sopt
, &so
->so_tc_stats
,
5098 sizeof (so
->so_tc_stats
));
5101 case SO_TRAFFIC_CLASS_DBG
:
5102 error
= sogetopt_tcdbg(so
, sopt
);
5105 case SO_PRIVILEGED_TRAFFIC_CLASS
:
5106 optval
= (so
->so_flags
& SOF_PRIVILEGED_TRAFFIC_CLASS
);
5110 optval
= !(so
->so_flags
& SOF_NODEFUNCT
);
5114 optval
= (so
->so_flags
& SOF_DEFUNCT
);
5117 case SO_OPPORTUNISTIC
:
5118 optval
= so_get_opportunistic(so
);
5122 /* This option is not gettable */
5127 optval
= so_get_recv_anyif(so
);
5130 case SO_TRAFFIC_MGT_BACKGROUND
:
5131 /* This option is handled by lower layer(s) */
5132 if (so
->so_proto
!= NULL
&&
5133 so
->so_proto
->pr_ctloutput
!= NULL
) {
5134 (void) so
->so_proto
->pr_ctloutput(so
, sopt
);
5139 case SO_FLOW_DIVERT_TOKEN
:
5140 error
= flow_divert_token_get(so
, sopt
);
5142 #endif /* FLOW_DIVERT */
5145 case SO_NECP_ATTRIBUTES
:
5146 error
= necp_get_socket_attributes(so
, sopt
);
5151 case SO_CFIL_SOCK_ID
: {
5152 cfil_sock_id_t sock_id
;
5154 sock_id
= cfil_sock_id_from_socket(so
);
5156 error
= sooptcopyout(sopt
, &sock_id
,
5157 sizeof(cfil_sock_id_t
));
5160 #endif /* CONTENT_FILTER */
5163 case SO_MPTCP_FASTJOIN
:
5164 if (!((so
->so_flags
& SOF_MP_SUBFLOW
) ||
5165 ((SOCK_CHECK_DOM(so
, PF_MULTIPATH
)) &&
5166 (SOCK_CHECK_PROTO(so
, IPPROTO_TCP
))))) {
5167 error
= ENOPROTOOPT
;
5170 optval
= (so
->so_flags
& SOF_MPTCP_FASTJOIN
);
5175 error
= ENOPROTOOPT
;
5181 socket_unlock(so
, 1);
5186 * The size limits on our soopt_getm is different from that on FreeBSD.
5187 * We limit the size of options to MCLBYTES. This will have to change
5188 * if we need to define options that need more space than MCLBYTES.
5191 soopt_getm(struct sockopt
*sopt
, struct mbuf
**mp
)
5193 struct mbuf
*m
, *m_prev
;
5194 int sopt_size
= sopt
->sopt_valsize
;
5197 if (sopt_size
<= 0 || sopt_size
> MCLBYTES
)
5200 how
= sopt
->sopt_p
!= kernproc
? M_WAIT
: M_DONTWAIT
;
5201 MGET(m
, how
, MT_DATA
);
5204 if (sopt_size
> MLEN
) {
5206 if ((m
->m_flags
& M_EXT
) == 0) {
5210 m
->m_len
= min(MCLBYTES
, sopt_size
);
5212 m
->m_len
= min(MLEN
, sopt_size
);
5214 sopt_size
-= m
->m_len
;
5218 while (sopt_size
> 0) {
5219 MGET(m
, how
, MT_DATA
);
5224 if (sopt_size
> MLEN
) {
5226 if ((m
->m_flags
& M_EXT
) == 0) {
5231 m
->m_len
= min(MCLBYTES
, sopt_size
);
5233 m
->m_len
= min(MLEN
, sopt_size
);
5235 sopt_size
-= m
->m_len
;
5242 /* copyin sopt data into mbuf chain */
5244 soopt_mcopyin(struct sockopt
*sopt
, struct mbuf
*m
)
5246 struct mbuf
*m0
= m
;
5248 if (sopt
->sopt_val
== USER_ADDR_NULL
)
5250 while (m
!= NULL
&& sopt
->sopt_valsize
>= m
->m_len
) {
5251 if (sopt
->sopt_p
!= kernproc
) {
5254 error
= copyin(sopt
->sopt_val
, mtod(m
, char *),
5261 bcopy(CAST_DOWN(caddr_t
, sopt
->sopt_val
),
5262 mtod(m
, char *), m
->m_len
);
5264 sopt
->sopt_valsize
-= m
->m_len
;
5265 sopt
->sopt_val
+= m
->m_len
;
5268 /* should be allocated enoughly at ip6_sooptmcopyin() */
5270 panic("soopt_mcopyin");
5276 /* copyout mbuf chain data into soopt */
5278 soopt_mcopyout(struct sockopt
*sopt
, struct mbuf
*m
)
5280 struct mbuf
*m0
= m
;
5283 if (sopt
->sopt_val
== USER_ADDR_NULL
)
5285 while (m
!= NULL
&& sopt
->sopt_valsize
>= m
->m_len
) {
5286 if (sopt
->sopt_p
!= kernproc
) {
5289 error
= copyout(mtod(m
, char *), sopt
->sopt_val
,
5296 bcopy(mtod(m
, char *),
5297 CAST_DOWN(caddr_t
, sopt
->sopt_val
), m
->m_len
);
5299 sopt
->sopt_valsize
-= m
->m_len
;
5300 sopt
->sopt_val
+= m
->m_len
;
5301 valsize
+= m
->m_len
;
5305 /* enough soopt buffer should be given from user-land */
5309 sopt
->sopt_valsize
= valsize
;
5314 sohasoutofband(struct socket
*so
)
5316 if (so
->so_pgid
< 0)
5317 gsignal(-so
->so_pgid
, SIGURG
);
5318 else if (so
->so_pgid
> 0)
5319 proc_signal(so
->so_pgid
, SIGURG
);
5320 selwakeup(&so
->so_rcv
.sb_sel
);
5324 sopoll(struct socket
*so
, int events
, kauth_cred_t cred
, void * wql
)
5326 #pragma unused(cred)
5327 struct proc
*p
= current_proc();
5331 so_update_last_owner_locked(so
, PROC_NULL
);
5332 so_update_policy(so
);
5334 if (events
& (POLLIN
| POLLRDNORM
))
5336 revents
|= events
& (POLLIN
| POLLRDNORM
);
5338 if (events
& (POLLOUT
| POLLWRNORM
))
5339 if (sowriteable(so
))
5340 revents
|= events
& (POLLOUT
| POLLWRNORM
);
5342 if (events
& (POLLPRI
| POLLRDBAND
))
5343 if (so
->so_oobmark
|| (so
->so_state
& SS_RCVATMARK
))
5344 revents
|= events
& (POLLPRI
| POLLRDBAND
);
5347 if (events
& (POLLIN
| POLLPRI
| POLLRDNORM
| POLLRDBAND
)) {
5349 * Darwin sets the flag first,
5350 * BSD calls selrecord first
5352 so
->so_rcv
.sb_flags
|= SB_SEL
;
5353 selrecord(p
, &so
->so_rcv
.sb_sel
, wql
);
5356 if (events
& (POLLOUT
| POLLWRNORM
)) {
5358 * Darwin sets the flag first,
5359 * BSD calls selrecord first
5361 so
->so_snd
.sb_flags
|= SB_SEL
;
5362 selrecord(p
, &so
->so_snd
.sb_sel
, wql
);
5366 socket_unlock(so
, 1);
5371 soo_kqfilter(struct fileproc
*fp
, struct knote
*kn
, vfs_context_t ctx
)
5374 #if !CONFIG_MACF_SOCKET
5376 #endif /* MAC_SOCKET */
5377 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
5381 so_update_last_owner_locked(so
, PROC_NULL
);
5382 so_update_policy(so
);
5384 #if CONFIG_MACF_SOCKET
5385 if (mac_socket_check_kqfilter(proc_ucred(vfs_context_proc(ctx
)),
5387 socket_unlock(so
, 1);
5390 #endif /* MAC_SOCKET */
5392 switch (kn
->kn_filter
) {
5394 kn
->kn_fop
= &soread_filtops
;
5396 * If the caller explicitly asked for OOB results (e.g. poll()),
5397 * save that off in the hookid field and reserve the kn_flags
5398 * EV_OOBAND bit for output only).
5400 if (kn
->kn_flags
& EV_OOBAND
) {
5401 kn
->kn_flags
&= ~EV_OOBAND
;
5402 kn
->kn_hookid
= EV_OOBAND
;
5406 skl
= &so
->so_rcv
.sb_sel
.si_note
;
5409 kn
->kn_fop
= &sowrite_filtops
;
5410 skl
= &so
->so_snd
.sb_sel
.si_note
;
5413 kn
->kn_fop
= &sock_filtops
;
5414 skl
= &so
->so_klist
;
5417 socket_unlock(so
, 1);
5421 if (KNOTE_ATTACH(skl
, kn
)) {
5422 switch (kn
->kn_filter
) {
5424 so
->so_rcv
.sb_flags
|= SB_KNOTE
;
5427 so
->so_snd
.sb_flags
|= SB_KNOTE
;
5430 so
->so_flags
|= SOF_KNOTE
;
5433 socket_unlock(so
, 1);
5437 socket_unlock(so
, 1);
5442 filt_sordetach(struct knote
*kn
)
5444 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
5447 if (so
->so_rcv
.sb_flags
& SB_KNOTE
)
5448 if (KNOTE_DETACH(&so
->so_rcv
.sb_sel
.si_note
, kn
))
5449 so
->so_rcv
.sb_flags
&= ~SB_KNOTE
;
5450 socket_unlock(so
, 1);
5455 filt_soread(struct knote
*kn
, long hint
)
5457 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
5459 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
5462 if (so
->so_options
& SO_ACCEPTCONN
) {
5466 * Radar 6615193 handle the listen case dynamically
5467 * for kqueue read filter. This allows to call listen()
5468 * after registering the kqueue EVFILT_READ.
5471 kn
->kn_data
= so
->so_qlen
;
5472 isempty
= ! TAILQ_EMPTY(&so
->so_comp
);
5474 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
5475 socket_unlock(so
, 1);
5480 /* socket isn't a listener */
5481 kn
->kn_data
= so
->so_rcv
.sb_cc
- so
->so_rcv
.sb_ctl
;
5483 * Clear out EV_OOBAND that filt_soread may have set in the
5486 kn
->kn_flags
&= ~EV_OOBAND
;
5488 if ((so
->so_oobmark
) || (so
->so_state
& SS_RCVATMARK
)){
5489 kn
->kn_flags
|= EV_OOBAND
;
5491 * If caller registered explicit interest in OOB data,
5492 * return immediately (data == amount beyond mark, for
5493 * legacy reasons - that should be changed later).
5495 if (kn
->kn_hookid
== EV_OOBAND
) {
5497 * When so_state is SS_RCVATMARK, so_oobmark
5500 kn
->kn_data
-= so
->so_oobmark
;
5501 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
5502 socket_unlock(so
, 1);
5507 if ((so
->so_state
& SS_CANTRCVMORE
)
5509 && cfil_sock_data_pending(&so
->so_rcv
) == 0
5510 #endif /* CONTENT_FILTER */
5512 kn
->kn_flags
|= EV_EOF
;
5513 kn
->kn_fflags
= so
->so_error
;
5514 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
5515 socket_unlock(so
, 1);
5519 if (so
->so_error
) { /* temporary udp error */
5520 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
5521 socket_unlock(so
, 1);
5525 int64_t lowwat
= so
->so_rcv
.sb_lowat
;
5526 if (kn
->kn_sfflags
& NOTE_LOWAT
) {
5527 if (kn
->kn_sdata
> so
->so_rcv
.sb_hiwat
)
5528 lowwat
= so
->so_rcv
.sb_hiwat
;
5529 else if (kn
->kn_sdata
> lowwat
)
5530 lowwat
= kn
->kn_sdata
;
5533 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
5534 socket_unlock(so
, 1);
5536 return (kn
->kn_data
>= lowwat
);
5540 filt_sowdetach(struct knote
*kn
)
5542 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
5545 if (so
->so_snd
.sb_flags
& SB_KNOTE
)
5546 if (KNOTE_DETACH(&so
->so_snd
.sb_sel
.si_note
, kn
))
5547 so
->so_snd
.sb_flags
&= ~SB_KNOTE
;
5548 socket_unlock(so
, 1);
5552 so_wait_for_if_feedback(struct socket
*so
)
5554 if ((SOCK_DOM(so
) == PF_INET
|| SOCK_DOM(so
) == PF_INET6
) &&
5555 (so
->so_state
& SS_ISCONNECTED
)) {
5556 struct inpcb
*inp
= sotoinpcb(so
);
5557 if (INP_WAIT_FOR_IF_FEEDBACK(inp
))
5565 filt_sowrite(struct knote
*kn
, long hint
)
5567 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
5570 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
5573 kn
->kn_data
= sbspace(&so
->so_snd
);
5574 if (so
->so_state
& SS_CANTSENDMORE
) {
5575 kn
->kn_flags
|= EV_EOF
;
5576 kn
->kn_fflags
= so
->so_error
;
5580 if (so
->so_error
) { /* temporary udp error */
5584 if (((so
->so_state
& SS_ISCONNECTED
) == 0) &&
5585 (so
->so_proto
->pr_flags
& PR_CONNREQUIRED
)) {
5589 int64_t lowwat
= so
->so_snd
.sb_lowat
;
5590 if (kn
->kn_sfflags
& NOTE_LOWAT
) {
5591 if (kn
->kn_sdata
> so
->so_snd
.sb_hiwat
)
5592 lowwat
= so
->so_snd
.sb_hiwat
;
5593 else if (kn
->kn_sdata
> lowwat
)
5594 lowwat
= kn
->kn_sdata
;
5596 if (kn
->kn_data
>= lowwat
) {
5597 if (so
->so_flags
& SOF_NOTSENT_LOWAT
) {
5598 if ((SOCK_DOM(so
) == PF_INET
5599 || SOCK_DOM(so
) == PF_INET6
)
5600 && so
->so_type
== SOCK_STREAM
) {
5601 ret
= tcp_notsent_lowat_check(so
);
5604 else if ((SOCK_DOM(so
) == PF_MULTIPATH
) &&
5605 (SOCK_PROTO(so
) == IPPROTO_TCP
)) {
5606 ret
= mptcp_notsent_lowat_check(so
);
5616 if (so_wait_for_if_feedback(so
))
5619 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
5620 socket_unlock(so
, 1);
5625 filt_sockdetach(struct knote
*kn
)
5627 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
5630 if ((so
->so_flags
& SOF_KNOTE
) != 0)
5631 if (KNOTE_DETACH(&so
->so_klist
, kn
))
5632 so
->so_flags
&= ~SOF_KNOTE
;
5633 socket_unlock(so
, 1);
5637 filt_sockev(struct knote
*kn
, long hint
)
5639 int ret
= 0, locked
= 0;
5640 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
5641 long ev_hint
= (hint
& SO_FILT_HINT_EV
);
5643 if ((hint
& SO_FILT_HINT_LOCKED
) == 0) {
5648 if (ev_hint
& SO_FILT_HINT_CONNRESET
) {
5649 if (kn
->kn_sfflags
& NOTE_CONNRESET
)
5650 kn
->kn_fflags
|= NOTE_CONNRESET
;
5652 if (ev_hint
& SO_FILT_HINT_TIMEOUT
) {
5653 if (kn
->kn_sfflags
& NOTE_TIMEOUT
)
5654 kn
->kn_fflags
|= NOTE_TIMEOUT
;
5656 if (ev_hint
& SO_FILT_HINT_NOSRCADDR
) {
5657 if (kn
->kn_sfflags
& NOTE_NOSRCADDR
)
5658 kn
->kn_fflags
|= NOTE_NOSRCADDR
;
5660 if (ev_hint
& SO_FILT_HINT_IFDENIED
) {
5661 if ((kn
->kn_sfflags
& NOTE_IFDENIED
))
5662 kn
->kn_fflags
|= NOTE_IFDENIED
;
5664 if (ev_hint
& SO_FILT_HINT_KEEPALIVE
) {
5665 if (kn
->kn_sfflags
& NOTE_KEEPALIVE
)
5666 kn
->kn_fflags
|= NOTE_KEEPALIVE
;
5668 if (ev_hint
& SO_FILT_HINT_ADAPTIVE_WTIMO
) {
5669 if (kn
->kn_sfflags
& NOTE_ADAPTIVE_WTIMO
)
5670 kn
->kn_fflags
|= NOTE_ADAPTIVE_WTIMO
;
5672 if (ev_hint
& SO_FILT_HINT_ADAPTIVE_RTIMO
) {
5673 if (kn
->kn_sfflags
& NOTE_ADAPTIVE_RTIMO
)
5674 kn
->kn_fflags
|= NOTE_ADAPTIVE_RTIMO
;
5676 if (ev_hint
& SO_FILT_HINT_CONNECTED
) {
5677 if (kn
->kn_sfflags
& NOTE_CONNECTED
)
5678 kn
->kn_fflags
|= NOTE_CONNECTED
;
5680 if (ev_hint
& SO_FILT_HINT_DISCONNECTED
) {
5681 if (kn
->kn_sfflags
& NOTE_DISCONNECTED
)
5682 kn
->kn_fflags
|= NOTE_DISCONNECTED
;
5684 if (ev_hint
& SO_FILT_HINT_CONNINFO_UPDATED
) {
5685 if (so
->so_proto
!= NULL
&&
5686 (so
->so_proto
->pr_flags
& PR_EVCONNINFO
) &&
5687 (kn
->kn_sfflags
& NOTE_CONNINFO_UPDATED
))
5688 kn
->kn_fflags
|= NOTE_CONNINFO_UPDATED
;
5691 if ((kn
->kn_sfflags
& NOTE_READCLOSED
) &&
5692 (so
->so_state
& SS_CANTRCVMORE
)
5694 && cfil_sock_data_pending(&so
->so_rcv
) == 0
5695 #endif /* CONTENT_FILTER */
5697 kn
->kn_fflags
|= NOTE_READCLOSED
;
5699 if ((kn
->kn_sfflags
& NOTE_WRITECLOSED
) &&
5700 (so
->so_state
& SS_CANTSENDMORE
))
5701 kn
->kn_fflags
|= NOTE_WRITECLOSED
;
5703 if ((kn
->kn_sfflags
& NOTE_SUSPEND
) &&
5704 ((ev_hint
& SO_FILT_HINT_SUSPEND
) ||
5705 (so
->so_flags
& SOF_SUSPENDED
))) {
5706 kn
->kn_fflags
&= ~(NOTE_SUSPEND
| NOTE_RESUME
);
5707 kn
->kn_fflags
|= NOTE_SUSPEND
;
5710 if ((kn
->kn_sfflags
& NOTE_RESUME
) &&
5711 ((ev_hint
& SO_FILT_HINT_RESUME
) ||
5712 (so
->so_flags
& SOF_SUSPENDED
) == 0)) {
5713 kn
->kn_fflags
&= ~(NOTE_SUSPEND
| NOTE_RESUME
);
5714 kn
->kn_fflags
|= NOTE_RESUME
;
5717 if (so
->so_error
!= 0) {
5719 kn
->kn_data
= so
->so_error
;
5720 kn
->kn_flags
|= EV_EOF
;
5722 get_sockev_state(so
, (u_int32_t
*)&(kn
->kn_data
));
5725 if (kn
->kn_fflags
!= 0)
5729 socket_unlock(so
, 1);
5735 get_sockev_state(struct socket
*so
, u_int32_t
*statep
)
5737 u_int32_t state
= *(statep
);
5739 if (so
->so_state
& SS_ISCONNECTED
)
5740 state
|= SOCKEV_CONNECTED
;
5742 state
&= ~(SOCKEV_CONNECTED
);
5743 state
|= ((so
->so_state
& SS_ISDISCONNECTED
) ? SOCKEV_DISCONNECTED
: 0);
5747 #define SO_LOCK_HISTORY_STR_LEN \
5748 (2 * SO_LCKDBG_MAX * (2 + (2 * sizeof (void *)) + 1) + 1)
5750 __private_extern__
const char *
5751 solockhistory_nr(struct socket
*so
)
5755 static char lock_history_str
[SO_LOCK_HISTORY_STR_LEN
];
5757 bzero(lock_history_str
, sizeof (lock_history_str
));
5758 for (i
= SO_LCKDBG_MAX
- 1; i
>= 0; i
--) {
5759 n
+= snprintf(lock_history_str
+ n
,
5760 SO_LOCK_HISTORY_STR_LEN
- n
, "%p:%p ",
5761 so
->lock_lr
[(so
->next_lock_lr
+ i
) % SO_LCKDBG_MAX
],
5762 so
->unlock_lr
[(so
->next_unlock_lr
+ i
) % SO_LCKDBG_MAX
]);
5764 return (lock_history_str
);
5768 socket_lock(struct socket
*so
, int refcount
)
5773 lr_saved
= __builtin_return_address(0);
5775 if (so
->so_proto
->pr_lock
) {
5776 error
= (*so
->so_proto
->pr_lock
)(so
, refcount
, lr_saved
);
5778 #ifdef MORE_LOCKING_DEBUG
5779 lck_mtx_assert(so
->so_proto
->pr_domain
->dom_mtx
,
5780 LCK_MTX_ASSERT_NOTOWNED
);
5782 lck_mtx_lock(so
->so_proto
->pr_domain
->dom_mtx
);
5785 so
->lock_lr
[so
->next_lock_lr
] = lr_saved
;
5786 so
->next_lock_lr
= (so
->next_lock_lr
+1) % SO_LCKDBG_MAX
;
5793 socket_unlock(struct socket
*so
, int refcount
)
5797 lck_mtx_t
*mutex_held
;
5799 lr_saved
= __builtin_return_address(0);
5801 if (so
->so_proto
== NULL
) {
5802 panic("%s: null so_proto so=%p\n", __func__
, so
);
5806 if (so
&& so
->so_proto
->pr_unlock
) {
5807 error
= (*so
->so_proto
->pr_unlock
)(so
, refcount
, lr_saved
);
5809 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
5810 #ifdef MORE_LOCKING_DEBUG
5811 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
5813 so
->unlock_lr
[so
->next_unlock_lr
] = lr_saved
;
5814 so
->next_unlock_lr
= (so
->next_unlock_lr
+1) % SO_LCKDBG_MAX
;
5817 if (so
->so_usecount
<= 0) {
5818 panic("%s: bad refcount=%d so=%p (%d, %d, %d) "
5819 "lrh=%s", __func__
, so
->so_usecount
, so
,
5820 SOCK_DOM(so
), so
->so_type
,
5821 SOCK_PROTO(so
), solockhistory_nr(so
));
5826 if (so
->so_usecount
== 0)
5827 sofreelastref(so
, 1);
5829 lck_mtx_unlock(mutex_held
);
5835 /* Called with socket locked, will unlock socket */
5837 sofree(struct socket
*so
)
5839 lck_mtx_t
*mutex_held
;
5841 if (so
->so_proto
->pr_getlock
!= NULL
)
5842 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
5844 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
5845 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
5847 sofreelastref(so
, 0);
5851 soreference(struct socket
*so
)
5853 socket_lock(so
, 1); /* locks & take one reference on socket */
5854 socket_unlock(so
, 0); /* unlock only */
5858 sodereference(struct socket
*so
)
5861 socket_unlock(so
, 1);
5865 * Set or clear SOF_MULTIPAGES on the socket to enable or disable the
5866 * possibility of using jumbo clusters. Caller must ensure to hold
5870 somultipages(struct socket
*so
, boolean_t set
)
5873 so
->so_flags
|= SOF_MULTIPAGES
;
5875 so
->so_flags
&= ~SOF_MULTIPAGES
;
5879 soif2kcl(struct socket
*so
, boolean_t set
)
5882 so
->so_flags1
|= SOF1_IF_2KCL
;
5884 so
->so_flags1
&= ~SOF1_IF_2KCL
;
5888 so_isdstlocal(struct socket
*so
) {
5890 struct inpcb
*inp
= (struct inpcb
*)so
->so_pcb
;
5892 if (SOCK_DOM(so
) == PF_INET
)
5893 return (inaddr_local(inp
->inp_faddr
));
5894 else if (SOCK_DOM(so
) == PF_INET6
)
5895 return (in6addr_local(&inp
->in6p_faddr
));
5901 sosetdefunct(struct proc
*p
, struct socket
*so
, int level
, boolean_t noforce
)
5903 struct sockbuf
*rcv
, *snd
;
5904 int err
= 0, defunct
;
5909 defunct
= (so
->so_flags
& SOF_DEFUNCT
);
5911 if (!(snd
->sb_flags
& rcv
->sb_flags
& SB_DROP
)) {
5912 panic("%s: SB_DROP not set", __func__
);
5918 if (so
->so_flags
& SOF_NODEFUNCT
) {
5921 SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) "
5922 "so 0x%llx [%d,%d] is not eligible for defunct "
5923 "(%d)\n", __func__
, proc_selfpid(), proc_pid(p
),
5924 level
, (uint64_t)VM_KERNEL_ADDRPERM(so
),
5925 SOCK_DOM(so
), SOCK_TYPE(so
), err
));
5928 so
->so_flags
&= ~SOF_NODEFUNCT
;
5929 SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so 0x%llx "
5930 "[%d,%d] defunct by force\n", __func__
, proc_selfpid(),
5931 proc_pid(p
), level
, (uint64_t)VM_KERNEL_ADDRPERM(so
),
5932 SOCK_DOM(so
), SOCK_TYPE(so
)));
5935 so
->so_flags
|= SOF_DEFUNCT
;
5937 /* Prevent further data from being appended to the socket buffers */
5938 snd
->sb_flags
|= SB_DROP
;
5939 rcv
->sb_flags
|= SB_DROP
;
5941 /* Flush any existing data in the socket buffers */
5942 if (rcv
->sb_cc
!= 0) {
5943 rcv
->sb_flags
&= ~SB_SEL
;
5944 selthreadclear(&rcv
->sb_sel
);
5947 if (snd
->sb_cc
!= 0) {
5948 snd
->sb_flags
&= ~SB_SEL
;
5949 selthreadclear(&snd
->sb_sel
);
5954 SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so 0x%llx [%d,%d] %s "
5955 "defunct\n", __func__
, proc_selfpid(), proc_pid(p
), level
,
5956 (uint64_t)VM_KERNEL_ADDRPERM(so
), SOCK_DOM(so
), SOCK_TYPE(so
),
5957 defunct
? "is already" : "marked as"));
5963 sodefunct(struct proc
*p
, struct socket
*so
, int level
)
5965 struct sockbuf
*rcv
, *snd
;
5967 if (!(so
->so_flags
& SOF_DEFUNCT
)) {
5968 panic("%s improperly called", __func__
);
5971 if (so
->so_state
& SS_DEFUNCT
)
5977 if (SOCK_DOM(so
) == PF_INET
|| SOCK_DOM(so
) == PF_INET6
) {
5978 char s
[MAX_IPv6_STR_LEN
];
5979 char d
[MAX_IPv6_STR_LEN
];
5980 struct inpcb
*inp
= sotoinpcb(so
);
5982 SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so 0x%llx [%s "
5983 "%s:%d -> %s:%d] is now defunct [rcv_si 0x%x, snd_si 0x%x, "
5984 "rcv_fl 0x%x, snd_fl 0x%x]\n", __func__
, proc_selfpid(),
5985 proc_pid(p
), level
, (uint64_t)VM_KERNEL_ADDRPERM(so
),
5986 (SOCK_TYPE(so
) == SOCK_STREAM
) ? "TCP" : "UDP",
5987 inet_ntop(SOCK_DOM(so
), ((SOCK_DOM(so
) == PF_INET
) ?
5988 (void *)&inp
->inp_laddr
.s_addr
: (void *)&inp
->in6p_laddr
),
5989 s
, sizeof (s
)), ntohs(inp
->in6p_lport
),
5990 inet_ntop(SOCK_DOM(so
), (SOCK_DOM(so
) == PF_INET
) ?
5991 (void *)&inp
->inp_faddr
.s_addr
: (void *)&inp
->in6p_faddr
,
5992 d
, sizeof (d
)), ntohs(inp
->in6p_fport
),
5993 (uint32_t)rcv
->sb_sel
.si_flags
,
5994 (uint32_t)snd
->sb_sel
.si_flags
,
5995 rcv
->sb_flags
, snd
->sb_flags
));
5997 SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so 0x%llx "
5998 "[%d,%d] is now defunct [rcv_si 0x%x, snd_si 0x%x, "
5999 "rcv_fl 0x%x, snd_fl 0x%x]\n", __func__
, proc_selfpid(),
6000 proc_pid(p
), level
, (uint64_t)VM_KERNEL_ADDRPERM(so
),
6001 SOCK_DOM(so
), SOCK_TYPE(so
), (uint32_t)rcv
->sb_sel
.si_flags
,
6002 (uint32_t)snd
->sb_sel
.si_flags
, rcv
->sb_flags
,
6007 * Unwedge threads blocked on sbwait() and sb_lock().
6012 so
->so_flags1
|= SOF1_DEFUNCTINPROG
;
6013 if (rcv
->sb_flags
& SB_LOCK
)
6014 sbunlock(rcv
, TRUE
); /* keep socket locked */
6015 if (snd
->sb_flags
& SB_LOCK
)
6016 sbunlock(snd
, TRUE
); /* keep socket locked */
6019 * Flush the buffers and disconnect. We explicitly call shutdown
6020 * on both data directions to ensure that SS_CANT{RCV,SEND}MORE
6021 * states are set for the socket. This would also flush out data
6022 * hanging off the receive list of this socket.
6024 (void) soshutdownlock_final(so
, SHUT_RD
);
6025 (void) soshutdownlock_final(so
, SHUT_WR
);
6026 (void) sodisconnectlocked(so
);
6029 * Explicitly handle connectionless-protocol disconnection
6030 * and release any remaining data in the socket buffers.
6032 if (!(so
->so_flags
& SS_ISDISCONNECTED
))
6033 (void) soisdisconnected(so
);
6035 if (so
->so_error
== 0)
6036 so
->so_error
= EBADF
;
6038 if (rcv
->sb_cc
!= 0) {
6039 rcv
->sb_flags
&= ~SB_SEL
;
6040 selthreadclear(&rcv
->sb_sel
);
6043 if (snd
->sb_cc
!= 0) {
6044 snd
->sb_flags
&= ~SB_SEL
;
6045 selthreadclear(&snd
->sb_sel
);
6048 so
->so_state
|= SS_DEFUNCT
;
6054 __private_extern__
int
6055 so_set_recv_anyif(struct socket
*so
, int optval
)
6060 if (SOCK_DOM(so
) == PF_INET
|| SOCK_DOM(so
) == PF_INET6
) {
6062 if (SOCK_DOM(so
) == PF_INET
) {
6065 sotoinpcb(so
)->inp_flags
|= INP_RECV_ANYIF
;
6067 sotoinpcb(so
)->inp_flags
&= ~INP_RECV_ANYIF
;
6073 __private_extern__
int
6074 so_get_recv_anyif(struct socket
*so
)
6079 if (SOCK_DOM(so
) == PF_INET
|| SOCK_DOM(so
) == PF_INET6
) {
6081 if (SOCK_DOM(so
) == PF_INET
) {
6083 ret
= (sotoinpcb(so
)->inp_flags
& INP_RECV_ANYIF
) ? 1 : 0;
6090 so_set_restrictions(struct socket
*so
, uint32_t vals
)
6092 int nocell_old
, nocell_new
;
6093 int noexpensive_old
, noexpensive_new
;
6096 * Deny-type restrictions are trapdoors; once set they cannot be
6097 * unset for the lifetime of the socket. This allows them to be
6098 * issued by a framework on behalf of the application without
6099 * having to worry that they can be undone.
6101 * Note here that socket-level restrictions overrides any protocol
6102 * level restrictions. For instance, SO_RESTRICT_DENY_CELLULAR
6103 * socket restriction issued on the socket has a higher precendence
6104 * than INP_NO_IFT_CELLULAR. The latter is affected by the UUID
6105 * policy PROC_UUID_NO_CELLULAR for unrestricted sockets only,
6106 * i.e. when SO_RESTRICT_DENY_CELLULAR has not been issued.
6108 nocell_old
= (so
->so_restrictions
& SO_RESTRICT_DENY_CELLULAR
);
6109 noexpensive_old
= (so
->so_restrictions
& SO_RESTRICT_DENY_EXPENSIVE
);
6110 so
->so_restrictions
|= (vals
& (SO_RESTRICT_DENY_IN
|
6111 SO_RESTRICT_DENY_OUT
| SO_RESTRICT_DENY_CELLULAR
|
6112 SO_RESTRICT_DENY_EXPENSIVE
));
6113 nocell_new
= (so
->so_restrictions
& SO_RESTRICT_DENY_CELLULAR
);
6114 noexpensive_new
= (so
->so_restrictions
& SO_RESTRICT_DENY_EXPENSIVE
);
6116 /* we can only set, not clear restrictions */
6117 if ((nocell_new
- nocell_old
) == 0 &&
6118 (noexpensive_new
- noexpensive_old
) == 0)
6121 if (SOCK_DOM(so
) == PF_INET
|| SOCK_DOM(so
) == PF_INET6
) {
6123 if (SOCK_DOM(so
) == PF_INET
) {
6125 if (nocell_new
- nocell_old
!= 0) {
6126 /* if deny cellular is now set, do what's needed for INPCB */
6127 inp_set_nocellular(sotoinpcb(so
));
6129 if (noexpensive_new
- noexpensive_old
!= 0) {
6130 inp_set_noexpensive(sotoinpcb(so
));
6138 so_get_restrictions(struct socket
*so
)
6140 return (so
->so_restrictions
& (SO_RESTRICT_DENY_IN
|
6141 SO_RESTRICT_DENY_OUT
|
6142 SO_RESTRICT_DENY_CELLULAR
| SO_RESTRICT_DENY_EXPENSIVE
));
6145 struct sockaddr_entry
*
6146 sockaddrentry_alloc(int how
)
6148 struct sockaddr_entry
*se
;
6150 se
= (how
== M_WAITOK
) ? zalloc(se_zone
) : zalloc_noblock(se_zone
);
6152 bzero(se
, se_zone_size
);
6158 sockaddrentry_free(struct sockaddr_entry
*se
)
6160 if (se
->se_addr
!= NULL
) {
6161 FREE(se
->se_addr
, M_SONAME
);
6167 struct sockaddr_entry
*
6168 sockaddrentry_dup(const struct sockaddr_entry
*src_se
, int how
)
6170 struct sockaddr_entry
*dst_se
;
6172 dst_se
= sockaddrentry_alloc(how
);
6173 if (dst_se
!= NULL
) {
6174 int len
= src_se
->se_addr
->sa_len
;
6176 MALLOC(dst_se
->se_addr
, struct sockaddr
*,
6177 len
, M_SONAME
, how
| M_ZERO
);
6178 if (dst_se
->se_addr
!= NULL
) {
6179 bcopy(src_se
->se_addr
, dst_se
->se_addr
, len
);
6181 sockaddrentry_free(dst_se
);
6189 struct sockaddr_list
*
6190 sockaddrlist_alloc(int how
)
6192 struct sockaddr_list
*sl
;
6194 sl
= (how
== M_WAITOK
) ? zalloc(sl_zone
) : zalloc_noblock(sl_zone
);
6196 bzero(sl
, sl_zone_size
);
6197 TAILQ_INIT(&sl
->sl_head
);
6203 sockaddrlist_free(struct sockaddr_list
*sl
)
6205 struct sockaddr_entry
*se
, *tse
;
6207 TAILQ_FOREACH_SAFE(se
, &sl
->sl_head
, se_link
, tse
) {
6208 sockaddrlist_remove(sl
, se
);
6209 sockaddrentry_free(se
);
6211 VERIFY(sl
->sl_cnt
== 0 && TAILQ_EMPTY(&sl
->sl_head
));
6216 sockaddrlist_insert(struct sockaddr_list
*sl
, struct sockaddr_entry
*se
)
6218 VERIFY(!(se
->se_flags
& SEF_ATTACHED
));
6219 se
->se_flags
|= SEF_ATTACHED
;
6220 TAILQ_INSERT_TAIL(&sl
->sl_head
, se
, se_link
);
6222 VERIFY(sl
->sl_cnt
!= 0);
6226 sockaddrlist_remove(struct sockaddr_list
*sl
, struct sockaddr_entry
*se
)
6228 VERIFY(se
->se_flags
& SEF_ATTACHED
);
6229 se
->se_flags
&= ~SEF_ATTACHED
;
6230 VERIFY(sl
->sl_cnt
!= 0);
6232 TAILQ_REMOVE(&sl
->sl_head
, se
, se_link
);
6235 struct sockaddr_list
*
6236 sockaddrlist_dup(const struct sockaddr_list
*src_sl
, int how
)
6238 struct sockaddr_entry
*src_se
, *tse
;
6239 struct sockaddr_list
*dst_sl
;
6241 dst_sl
= sockaddrlist_alloc(how
);
6245 TAILQ_FOREACH_SAFE(src_se
, &src_sl
->sl_head
, se_link
, tse
) {
6246 struct sockaddr_entry
*dst_se
;
6248 if (src_se
->se_addr
== NULL
)
6251 dst_se
= sockaddrentry_dup(src_se
, how
);
6252 if (dst_se
== NULL
) {
6253 sockaddrlist_free(dst_sl
);
6257 sockaddrlist_insert(dst_sl
, dst_se
);
6259 VERIFY(src_sl
->sl_cnt
== dst_sl
->sl_cnt
);
6265 so_set_effective_pid(struct socket
*so
, int epid
, struct proc
*p
)
6267 struct proc
*ep
= PROC_NULL
;
6270 /* pid 0 is reserved for kernel */
6277 * If this is an in-kernel socket, prevent its delegate
6278 * association from changing unless the socket option is
6279 * coming from within the kernel itself.
6281 if (so
->last_pid
== 0 && p
!= kernproc
) {
6287 * If this is issued by a process that's recorded as the
6288 * real owner of the socket, or if the pid is the same as
6289 * the process's own pid, then proceed. Otherwise ensure
6290 * that the issuing process has the necessary privileges.
6292 if (epid
!= so
->last_pid
|| epid
!= proc_pid(p
)) {
6293 if ((error
= priv_check_cred(kauth_cred_get(),
6294 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE
, 0))) {
6300 /* Find the process that corresponds to the effective pid */
6301 if ((ep
= proc_find(epid
)) == PROC_NULL
) {
6307 * If a process tries to delegate the socket to itself, then
6308 * there's really nothing to do; treat it as a way for the
6309 * delegate association to be cleared. Note that we check
6310 * the passed-in proc rather than calling proc_selfpid(),
6311 * as we need to check the process issuing the socket option
6312 * which could be kernproc. Given that we don't allow 0 for
6313 * effective pid, it means that a delegated in-kernel socket
6314 * stays delegated during its lifetime (which is probably OK.)
6316 if (epid
== proc_pid(p
)) {
6317 so
->so_flags
&= ~SOF_DELEGATED
;
6320 uuid_clear(so
->e_uuid
);
6322 so
->so_flags
|= SOF_DELEGATED
;
6323 so
->e_upid
= proc_uniqueid(ep
);
6324 so
->e_pid
= proc_pid(ep
);
6325 proc_getexecutableuuid(ep
, so
->e_uuid
, sizeof (so
->e_uuid
));
6328 if (error
== 0 && net_io_policy_log
) {
6331 uuid_unparse(so
->e_uuid
, buf
);
6332 log(LOG_DEBUG
, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d (%s) "
6333 "euuid %s%s\n", __func__
, proc_name_address(p
),
6334 proc_pid(p
), (uint64_t)VM_KERNEL_ADDRPERM(so
), SOCK_DOM(so
),
6335 SOCK_TYPE(so
), so
->e_pid
, proc_name_address(ep
), buf
,
6336 ((so
->so_flags
& SOF_DELEGATED
) ? " [delegated]" : ""));
6337 } else if (error
!= 0 && net_io_policy_log
) {
6338 log(LOG_ERR
, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d (%s) "
6339 "ERROR (%d)\n", __func__
, proc_name_address(p
),
6340 proc_pid(p
), (uint64_t)VM_KERNEL_ADDRPERM(so
), SOCK_DOM(so
),
6341 SOCK_TYPE(so
), epid
, (ep
== PROC_NULL
) ? "PROC_NULL" :
6342 proc_name_address(ep
), error
);
6345 /* Update this socket's policy upon success */
6347 so
->so_policy_gencnt
*= -1;
6348 so_update_policy(so
);
6350 so_update_necp_policy(so
, NULL
, NULL
);
6354 if (ep
!= PROC_NULL
)
6361 so_set_effective_uuid(struct socket
*so
, uuid_t euuid
, struct proc
*p
)
6367 /* UUID must not be all-zeroes (reserved for kernel) */
6368 if (uuid_is_null(euuid
)) {
6374 * If this is an in-kernel socket, prevent its delegate
6375 * association from changing unless the socket option is
6376 * coming from within the kernel itself.
6378 if (so
->last_pid
== 0 && p
!= kernproc
) {
6383 /* Get the UUID of the issuing process */
6384 proc_getexecutableuuid(p
, uuid
, sizeof (uuid
));
6387 * If this is issued by a process that's recorded as the
6388 * real owner of the socket, or if the uuid is the same as
6389 * the process's own uuid, then proceed. Otherwise ensure
6390 * that the issuing process has the necessary privileges.
6392 if (uuid_compare(euuid
, so
->last_uuid
) != 0 ||
6393 uuid_compare(euuid
, uuid
) != 0) {
6394 if ((error
= priv_check_cred(kauth_cred_get(),
6395 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE
, 0))) {
6402 * If a process tries to delegate the socket to itself, then
6403 * there's really nothing to do; treat it as a way for the
6404 * delegate association to be cleared. Note that we check
6405 * the uuid of the passed-in proc rather than that of the
6406 * current process, as we need to check the process issuing
6407 * the socket option which could be kernproc itself. Given
6408 * that we don't allow 0 for effective uuid, it means that
6409 * a delegated in-kernel socket stays delegated during its
6410 * lifetime (which is okay.)
6412 if (uuid_compare(euuid
, uuid
) == 0) {
6413 so
->so_flags
&= ~SOF_DELEGATED
;
6416 uuid_clear(so
->e_uuid
);
6418 so
->so_flags
|= SOF_DELEGATED
;
6420 * Unlike so_set_effective_pid(), we only have the UUID
6421 * here and the process ID is not known. Inherit the
6422 * real {pid,upid} of the socket.
6424 so
->e_upid
= so
->last_upid
;
6425 so
->e_pid
= so
->last_pid
;
6426 uuid_copy(so
->e_uuid
, euuid
);
6430 if (error
== 0 && net_io_policy_log
) {
6431 uuid_unparse(so
->e_uuid
, buf
);
6432 log(LOG_DEBUG
, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d "
6433 "euuid %s%s\n", __func__
, proc_name_address(p
), proc_pid(p
),
6434 (uint64_t)VM_KERNEL_ADDRPERM(so
), SOCK_DOM(so
),
6435 SOCK_TYPE(so
), so
->e_pid
, buf
,
6436 ((so
->so_flags
& SOF_DELEGATED
) ? " [delegated]" : ""));
6437 } else if (error
!= 0 && net_io_policy_log
) {
6438 uuid_unparse(euuid
, buf
);
6439 log(LOG_DEBUG
, "%s[%s,%d]: so 0x%llx [%d,%d] euuid %s "
6440 "ERROR (%d)\n", __func__
, proc_name_address(p
), proc_pid(p
),
6441 (uint64_t)VM_KERNEL_ADDRPERM(so
), SOCK_DOM(so
),
6442 SOCK_TYPE(so
), buf
, error
);
6445 /* Update this socket's policy upon success */
6447 so
->so_policy_gencnt
*= -1;
6448 so_update_policy(so
);
6450 so_update_necp_policy(so
, NULL
, NULL
);
6458 netpolicy_post_msg(uint32_t ev_code
, struct netpolicy_event_data
*ev_data
,
6459 uint32_t ev_datalen
)
6461 struct kev_msg ev_msg
;
6464 * A netpolicy event always starts with a netpolicy_event_data
6465 * structure, but the caller can provide for a longer event
6466 * structure to post, depending on the event code.
6468 VERIFY(ev_data
!= NULL
&& ev_datalen
>= sizeof (*ev_data
));
6470 bzero(&ev_msg
, sizeof (ev_msg
));
6471 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
6472 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
6473 ev_msg
.kev_subclass
= KEV_NETPOLICY_SUBCLASS
;
6474 ev_msg
.event_code
= ev_code
;
6476 ev_msg
.dv
[0].data_ptr
= ev_data
;
6477 ev_msg
.dv
[0].data_length
= ev_datalen
;
6479 kev_post_msg(&ev_msg
);
6483 socket_post_kev_msg(uint32_t ev_code
,
6484 struct kev_socket_event_data
*ev_data
,
6485 uint32_t ev_datalen
)
6487 struct kev_msg ev_msg
;
6489 bzero(&ev_msg
, sizeof(ev_msg
));
6490 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
6491 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
6492 ev_msg
.kev_subclass
= KEV_SOCKET_SUBCLASS
;
6493 ev_msg
.event_code
= ev_code
;
6495 ev_msg
.dv
[0].data_ptr
= ev_data
;
6496 ev_msg
.dv
[0]. data_length
= ev_datalen
;
6498 kev_post_msg(&ev_msg
);
6502 socket_post_kev_msg_closed(struct socket
*so
)
6504 struct kev_socket_closed ev
;
6505 struct sockaddr
*socksa
= NULL
, *peersa
= NULL
;
6507 bzero(&ev
, sizeof(ev
));
6508 err
= (*so
->so_proto
->pr_usrreqs
->pru_sockaddr
)(so
, &socksa
);
6510 err
= (*so
->so_proto
->pr_usrreqs
->pru_peeraddr
)(so
,
6513 memcpy(&ev
.ev_data
.kev_sockname
, socksa
,
6515 sizeof (ev
.ev_data
.kev_sockname
)));
6516 memcpy(&ev
.ev_data
.kev_peername
, peersa
,
6518 sizeof (ev
.ev_data
.kev_peername
)));
6519 socket_post_kev_msg(KEV_SOCKET_CLOSED
,
6520 &ev
.ev_data
, sizeof (ev
));
6524 FREE(socksa
, M_SONAME
);
6526 FREE(peersa
, M_SONAME
);