2 * Copyright (c) 1998-2013 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
30 * Copyright (c) 1982, 1986, 1988, 1990, 1993
31 * The Regents of the University of California. All rights reserved.
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. All advertising materials mentioning features or use of this software
42 * must display the following acknowledgement:
43 * This product includes software developed by the University of
44 * California, Berkeley and its contributors.
45 * 4. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
64 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
65 * support for mandatory and extensible security protections. This notice
66 * is included in support of clause 2.2 (b) of the Apple Public License,
70 #include <sys/param.h>
71 #include <sys/systm.h>
72 #include <sys/filedesc.h>
74 #include <sys/proc_internal.h>
75 #include <sys/kauth.h>
76 #include <sys/file_internal.h>
77 #include <sys/fcntl.h>
78 #include <sys/malloc.h>
80 #include <sys/domain.h>
81 #include <sys/kernel.h>
82 #include <sys/event.h>
84 #include <sys/protosw.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/resourcevar.h>
88 #include <sys/signalvar.h>
89 #include <sys/sysctl.h>
90 #include <sys/syslog.h>
93 #include <sys/kdebug.h>
97 #include <sys/kern_event.h>
98 #include <net/route.h>
100 #include <net/ntstat.h>
101 #include <netinet/in.h>
102 #include <netinet/in_pcb.h>
103 #include <netinet/ip6.h>
104 #include <netinet6/ip6_var.h>
105 #include <netinet/flow_divert.h>
106 #include <kern/zalloc.h>
107 #include <kern/locks.h>
108 #include <machine/limits.h>
109 #include <libkern/OSAtomic.h>
110 #include <pexpert/pexpert.h>
111 #include <kern/assert.h>
112 #include <kern/task.h>
113 #include <sys/kpi_mbuf.h>
114 #include <sys/mcache.h>
117 #include <security/mac.h>
118 #include <security/mac_framework.h>
122 #include <netinet/mp_pcb.h>
123 #endif /* MULTIPATH */
125 /* TODO: this should be in a header file somewhere */
126 extern char *proc_name_address(void *p
);
128 static u_int32_t so_cache_hw
; /* High water mark for socache */
129 static u_int32_t so_cache_timeouts
; /* number of timeouts */
130 static u_int32_t so_cache_max_freed
; /* max freed per timeout */
131 static u_int32_t cached_sock_count
= 0;
132 STAILQ_HEAD(, socket
) so_cache_head
;
133 int max_cached_sock_count
= MAX_CACHED_SOCKETS
;
134 static u_int32_t so_cache_time
;
135 static int socketinit_done
;
136 static struct zone
*so_cache_zone
;
138 static lck_grp_t
*so_cache_mtx_grp
;
139 static lck_attr_t
*so_cache_mtx_attr
;
140 static lck_grp_attr_t
*so_cache_mtx_grp_attr
;
141 static lck_mtx_t
*so_cache_mtx
;
143 #include <machine/limits.h>
145 static void filt_sordetach(struct knote
*kn
);
146 static int filt_soread(struct knote
*kn
, long hint
);
147 static void filt_sowdetach(struct knote
*kn
);
148 static int filt_sowrite(struct knote
*kn
, long hint
);
149 static void filt_sockdetach(struct knote
*kn
);
150 static int filt_sockev(struct knote
*kn
, long hint
);
152 static int sooptcopyin_timeval(struct sockopt
*, struct timeval
*);
153 static int sooptcopyout_timeval(struct sockopt
*, const struct timeval
*);
155 static struct filterops soread_filtops
= {
157 .f_detach
= filt_sordetach
,
158 .f_event
= filt_soread
,
161 static struct filterops sowrite_filtops
= {
163 .f_detach
= filt_sowdetach
,
164 .f_event
= filt_sowrite
,
167 static struct filterops sock_filtops
= {
169 .f_detach
= filt_sockdetach
,
170 .f_event
= filt_sockev
,
173 #define EVEN_MORE_LOCKING_DEBUG 0
174 int socket_debug
= 0;
175 static int socket_zone
= M_SOCKET
;
176 so_gen_t so_gencnt
; /* generation count for sockets */
178 MALLOC_DEFINE(M_SONAME
, "soname", "socket name");
179 MALLOC_DEFINE(M_PCB
, "pcb", "protocol control block");
181 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
182 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
183 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
184 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
185 #define DBG_FNC_SOSEND NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1)
186 #define DBG_FNC_SORECEIVE NETDBG_CODE(DBG_NETSOCK, (8 << 8))
187 #define DBG_FNC_SOSHUTDOWN NETDBG_CODE(DBG_NETSOCK, (9 << 8))
189 #define MAX_SOOPTGETM_SIZE (128 * MCLBYTES)
191 SYSCTL_DECL(_kern_ipc
);
193 int somaxconn
= SOMAXCONN
;
194 SYSCTL_INT(_kern_ipc
, KIPC_SOMAXCONN
, somaxconn
,
195 CTLFLAG_RW
| CTLFLAG_LOCKED
, &somaxconn
, 0, "");
197 /* Should we get a maximum also ??? */
198 static int sosendmaxchain
= 65536;
199 static int sosendminchain
= 16384;
200 static int sorecvmincopy
= 16384;
201 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sosendminchain
,
202 CTLFLAG_RW
| CTLFLAG_LOCKED
, &sosendminchain
, 0, "");
203 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sorecvmincopy
,
204 CTLFLAG_RW
| CTLFLAG_LOCKED
, &sorecvmincopy
, 0, "");
207 * Set to enable jumbo clusters (if available) for large writes when
208 * the socket is marked with SOF_MULTIPAGES; see below.
211 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sosendjcl
,
212 CTLFLAG_RW
| CTLFLAG_LOCKED
, &sosendjcl
, 0, "");
215 * Set this to ignore SOF_MULTIPAGES and use jumbo clusters for large
216 * writes on the socket for all protocols on any network interfaces,
217 * depending upon sosendjcl above. Be extra careful when setting this
218 * to 1, because sending down packets that cross physical pages down to
219 * broken drivers (those that falsely assume that the physical pages
220 * are contiguous) might lead to system panics or silent data corruption.
221 * When set to 0, the system will respect SOF_MULTIPAGES, which is set
222 * only for TCP sockets whose outgoing interface is IFNET_MULTIPAGES
223 * capable. Set this to 1 only for testing/debugging purposes.
225 int sosendjcl_ignore_capab
= 0;
226 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sosendjcl_ignore_capab
,
227 CTLFLAG_RW
| CTLFLAG_LOCKED
, &sosendjcl_ignore_capab
, 0, "");
229 int sodefunctlog
= 0;
230 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sodefunctlog
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
231 &sodefunctlog
, 0, "");
233 int sothrottlelog
= 0;
234 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sothrottlelog
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
235 &sothrottlelog
, 0, "");
237 int sorestrictrecv
= 1;
238 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sorestrictrecv
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
239 &sorestrictrecv
, 0, "Enable inbound interface restrictions");
242 * Socket operation routines.
243 * These routines are called by the routines in
244 * sys_socket.c or from a system process, and
245 * implement the semantics of socket operations by
246 * switching out to the protocol specific routines.
250 extern void postevent(struct socket
*, struct sockbuf
*, int);
251 extern void evsofree(struct socket
*);
252 extern int tcp_notsent_lowat_check(struct socket
*so
);
253 extern struct inpcbinfo tcbinfo
;
255 /* TODO: these should be in header file */
256 extern int get_inpcb_str_size(void);
257 extern int get_tcp_str_size(void);
259 static unsigned int sl_zone_size
; /* size of sockaddr_list */
260 static struct zone
*sl_zone
; /* zone for sockaddr_list */
262 static unsigned int se_zone_size
; /* size of sockaddr_entry */
263 static struct zone
*se_zone
; /* zone for sockaddr_entry */
265 vm_size_t so_cache_zone_element_size
;
267 static int sodelayed_copy(struct socket
*, struct uio
*, struct mbuf
**, user_ssize_t
*);
268 static void cached_sock_alloc(struct socket
**, int);
269 static void cached_sock_free(struct socket
*);
272 * SOTCDB_NO_DSCP is set by default, to prevent the networking stack from
273 * setting the DSCP code on the packet based on the service class; see
274 * <rdar://problem/11277343> for details.
276 __private_extern__ u_int32_t sotcdb
= SOTCDB_NO_DSCP
;
277 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sotcdb
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
283 if (socketinit_done
) {
284 printf("socketinit: already called...\n");
289 PE_parse_boot_argn("socket_debug", &socket_debug
,
290 sizeof (socket_debug
));
293 * allocate lock group attribute and group for socket cache mutex
295 so_cache_mtx_grp_attr
= lck_grp_attr_alloc_init();
296 so_cache_mtx_grp
= lck_grp_alloc_init("so_cache",
297 so_cache_mtx_grp_attr
);
300 * allocate the lock attribute for socket cache mutex
302 so_cache_mtx_attr
= lck_attr_alloc_init();
304 /* cached sockets mutex */
305 so_cache_mtx
= lck_mtx_alloc_init(so_cache_mtx_grp
, so_cache_mtx_attr
);
306 if (so_cache_mtx
== NULL
) {
307 panic("%s: unable to allocate so_cache_mtx\n", __func__
);
310 STAILQ_INIT(&so_cache_head
);
312 so_cache_zone_element_size
= (vm_size_t
)(sizeof (struct socket
) + 4
313 + get_inpcb_str_size() + 4 + get_tcp_str_size());
315 so_cache_zone
= zinit(so_cache_zone_element_size
,
316 (120000 * so_cache_zone_element_size
), 8192, "socache zone");
317 zone_change(so_cache_zone
, Z_CALLERACCT
, FALSE
);
318 zone_change(so_cache_zone
, Z_NOENCRYPT
, TRUE
);
320 sl_zone_size
= sizeof (struct sockaddr_list
);
321 if ((sl_zone
= zinit(sl_zone_size
, 1024 * sl_zone_size
, 1024,
322 "sockaddr_list")) == NULL
) {
323 panic("%s: unable to allocate sockaddr_list zone\n", __func__
);
326 zone_change(sl_zone
, Z_CALLERACCT
, FALSE
);
327 zone_change(sl_zone
, Z_EXPAND
, TRUE
);
329 se_zone_size
= sizeof (struct sockaddr_entry
);
330 if ((se_zone
= zinit(se_zone_size
, 1024 * se_zone_size
, 1024,
331 "sockaddr_entry")) == NULL
) {
332 panic("%s: unable to allocate sockaddr_entry zone\n", __func__
);
335 zone_change(se_zone
, Z_CALLERACCT
, FALSE
);
336 zone_change(se_zone
, Z_EXPAND
, TRUE
);
341 socket_tclass_init();
344 #endif /* MULTIPATH */
348 cached_sock_alloc(struct socket
**so
, int waitok
)
353 lck_mtx_lock(so_cache_mtx
);
355 if (!STAILQ_EMPTY(&so_cache_head
)) {
356 VERIFY(cached_sock_count
> 0);
358 *so
= STAILQ_FIRST(&so_cache_head
);
359 STAILQ_REMOVE_HEAD(&so_cache_head
, so_cache_ent
);
360 STAILQ_NEXT((*so
), so_cache_ent
) = NULL
;
363 lck_mtx_unlock(so_cache_mtx
);
365 temp
= (*so
)->so_saved_pcb
;
366 bzero((caddr_t
)*so
, sizeof (struct socket
));
368 (*so
)->so_saved_pcb
= temp
;
371 lck_mtx_unlock(so_cache_mtx
);
374 *so
= (struct socket
*)zalloc(so_cache_zone
);
376 *so
= (struct socket
*)zalloc_noblock(so_cache_zone
);
381 bzero((caddr_t
)*so
, sizeof (struct socket
));
384 * Define offsets for extra structures into our
385 * single block of memory. Align extra structures
386 * on longword boundaries.
389 offset
= (uintptr_t)*so
;
390 offset
+= sizeof (struct socket
);
392 offset
= ALIGN(offset
);
394 (*so
)->so_saved_pcb
= (caddr_t
)offset
;
395 offset
+= get_inpcb_str_size();
397 offset
= ALIGN(offset
);
399 ((struct inpcb
*)(void *)(*so
)->so_saved_pcb
)->inp_saved_ppcb
=
403 (*so
)->cached_in_sock_layer
= true;
407 cached_sock_free(struct socket
*so
)
410 lck_mtx_lock(so_cache_mtx
);
412 so_cache_time
= net_uptime();
413 if (++cached_sock_count
> max_cached_sock_count
) {
415 lck_mtx_unlock(so_cache_mtx
);
416 zfree(so_cache_zone
, so
);
418 if (so_cache_hw
< cached_sock_count
)
419 so_cache_hw
= cached_sock_count
;
421 STAILQ_INSERT_TAIL(&so_cache_head
, so
, so_cache_ent
);
423 so
->cache_timestamp
= so_cache_time
;
424 lck_mtx_unlock(so_cache_mtx
);
429 so_update_last_owner_locked(struct socket
*so
, proc_t self
)
431 if (so
->last_pid
!= 0) {
433 * last_pid and last_upid should remain zero for sockets
434 * created using sock_socket. The check above achieves that
436 if (self
== PROC_NULL
)
437 self
= current_proc();
439 if (so
->last_upid
!= proc_uniqueid(self
) ||
440 so
->last_pid
!= proc_pid(self
)) {
441 so
->last_upid
= proc_uniqueid(self
);
442 so
->last_pid
= proc_pid(self
);
443 proc_getexecutableuuid(self
, so
->last_uuid
,
444 sizeof (so
->last_uuid
));
450 so_update_policy(struct socket
*so
)
452 if (SOCK_DOM(so
) == PF_INET
|| SOCK_DOM(so
) == PF_INET6
)
453 (void) inp_update_policy(sotoinpcb(so
));
461 boolean_t rc
= FALSE
;
463 lck_mtx_lock(so_cache_mtx
);
465 so_cache_time
= net_uptime();
467 while (!STAILQ_EMPTY(&so_cache_head
)) {
468 VERIFY(cached_sock_count
> 0);
469 p
= STAILQ_FIRST(&so_cache_head
);
470 if ((so_cache_time
- p
->cache_timestamp
) <
474 STAILQ_REMOVE_HEAD(&so_cache_head
, so_cache_ent
);
477 zfree(so_cache_zone
, p
);
479 if (++n_freed
>= SO_CACHE_MAX_FREE_BATCH
) {
480 so_cache_max_freed
++;
485 /* Schedule again if there is more to cleanup */
486 if (!STAILQ_EMPTY(&so_cache_head
))
489 lck_mtx_unlock(so_cache_mtx
);
494 * Get a socket structure from our zone, and initialize it.
495 * We don't implement `waitok' yet (see comments in uipc_domain.c).
496 * Note that it would probably be better to allocate socket
497 * and PCB at the same time, but I'm not convinced that all
498 * the protocols can be easily modified to do this.
501 soalloc(int waitok
, int dom
, int type
)
505 if ((dom
== PF_INET
) && (type
== SOCK_STREAM
)) {
506 cached_sock_alloc(&so
, waitok
);
508 MALLOC_ZONE(so
, struct socket
*, sizeof (*so
), socket_zone
,
511 bzero(so
, sizeof (*so
));
514 so
->so_gencnt
= ++so_gencnt
;
515 so
->so_zone
= socket_zone
;
516 #if CONFIG_MACF_SOCKET
517 /* Convert waitok to M_WAITOK/M_NOWAIT for MAC Framework. */
518 if (mac_socket_label_init(so
, !waitok
) != 0) {
522 #endif /* MAC_SOCKET */
529 socreate_internal(int dom
, struct socket
**aso
, int type
, int proto
,
530 struct proc
*p
, uint32_t flags
, struct proc
*ep
)
537 extern int tcpconsdebug
;
544 prp
= pffindproto(dom
, proto
, type
);
546 prp
= pffindtype(dom
, type
);
548 if (prp
== NULL
|| prp
->pr_usrreqs
->pru_attach
== NULL
) {
549 if (pffinddomain(dom
) == NULL
)
550 return (EAFNOSUPPORT
);
552 if (pffindprotonotype(dom
, proto
) != NULL
)
555 return (EPROTONOSUPPORT
);
557 if (prp
->pr_type
!= type
)
559 so
= soalloc(1, dom
, type
);
563 if (flags
& SOCF_ASYNC
)
564 so
->so_state
|= SS_NBIO
;
566 if (flags
& SOCF_MP_SUBFLOW
) {
568 * A multipath subflow socket is used internally in the kernel,
569 * therefore it does not have a file desciptor associated by
572 so
->so_state
|= SS_NOFDREF
;
573 so
->so_flags
|= SOF_MP_SUBFLOW
;
575 #endif /* MULTIPATH */
577 TAILQ_INIT(&so
->so_incomp
);
578 TAILQ_INIT(&so
->so_comp
);
580 so
->last_upid
= proc_uniqueid(p
);
581 so
->last_pid
= proc_pid(p
);
582 proc_getexecutableuuid(p
, so
->last_uuid
, sizeof (so
->last_uuid
));
584 if (ep
!= PROC_NULL
&& ep
!= p
) {
585 so
->e_upid
= proc_uniqueid(ep
);
586 so
->e_pid
= proc_pid(ep
);
587 proc_getexecutableuuid(ep
, so
->e_uuid
, sizeof (so
->e_uuid
));
588 so
->so_flags
|= SOF_DELEGATED
;
591 so
->so_cred
= kauth_cred_proc_ref(p
);
592 if (!suser(kauth_cred_get(), NULL
))
593 so
->so_state
|= SS_PRIV
;
596 so
->so_rcv
.sb_flags
|= SB_RECV
;
597 so
->so_rcv
.sb_so
= so
->so_snd
.sb_so
= so
;
598 so
->next_lock_lr
= 0;
599 so
->next_unlock_lr
= 0;
601 #if CONFIG_MACF_SOCKET
602 mac_socket_label_associate(kauth_cred_get(), so
);
603 #endif /* MAC_SOCKET */
606 * Attachment will create the per pcb lock if necessary and
607 * increase refcount for creation, make sure it's done before
608 * socket is inserted in lists.
612 error
= (*prp
->pr_usrreqs
->pru_attach
)(so
, proto
, p
);
616 * If so_pcb is not zero, the socket will be leaked,
617 * so protocol attachment handler must be coded carefuly
619 so
->so_state
|= SS_NOFDREF
;
621 sofreelastref(so
, 1); /* will deallocate the socket */
625 atomic_add_32(&prp
->pr_domain
->dom_refs
, 1);
626 TAILQ_INIT(&so
->so_evlist
);
628 /* Attach socket filters for this protocol */
631 if (tcpconsdebug
== 2)
632 so
->so_options
|= SO_DEBUG
;
634 so_set_default_traffic_class(so
);
637 * If this thread or task is marked to create backgrounded sockets,
638 * mark the socket as background.
640 if (proc_get_effective_thread_policy(current_thread(), TASK_POLICY_NEW_SOCKETS_BG
)) {
641 socket_set_traffic_mgt_flags(so
, TRAFFIC_MGT_SO_BACKGROUND
);
642 so
->so_background_thread
= current_thread();
647 * Don't mark Unix domain, system or multipath sockets as
648 * eligible for defunct by default.
653 so
->so_flags
|= SOF_NODEFUNCT
;
670 * <pru_attach>:ENOBUFS[AF_UNIX]
671 * <pru_attach>:ENOBUFS[TCP]
672 * <pru_attach>:ENOMEM[TCP]
673 * <pru_attach>:??? [other protocol families, IPSEC]
676 socreate(int dom
, struct socket
**aso
, int type
, int proto
)
678 return (socreate_internal(dom
, aso
, type
, proto
, current_proc(), 0,
683 socreate_delegate(int dom
, struct socket
**aso
, int type
, int proto
, pid_t epid
)
686 struct proc
*ep
= PROC_NULL
;
688 if ((proc_selfpid() != epid
) && ((ep
= proc_find(epid
)) == PROC_NULL
)) {
693 error
= socreate_internal(dom
, aso
, type
, proto
, current_proc(), 0, ep
);
696 * It might not be wise to hold the proc reference when calling
697 * socreate_internal since it calls soalloc with M_WAITOK
708 * <pru_bind>:EINVAL Invalid argument [COMMON_START]
709 * <pru_bind>:EAFNOSUPPORT Address family not supported
710 * <pru_bind>:EADDRNOTAVAIL Address not available.
711 * <pru_bind>:EINVAL Invalid argument
712 * <pru_bind>:EAFNOSUPPORT Address family not supported [notdef]
713 * <pru_bind>:EACCES Permission denied
714 * <pru_bind>:EADDRINUSE Address in use
715 * <pru_bind>:EAGAIN Resource unavailable, try again
716 * <pru_bind>:EPERM Operation not permitted
720 * Notes: It's not possible to fully enumerate the return codes above,
721 * since socket filter authors and protocol family authors may
722 * not choose to limit their error returns to those listed, even
723 * though this may result in some software operating incorrectly.
725 * The error codes which are enumerated above are those known to
726 * be returned by the tcp_usr_bind function supplied.
729 sobindlock(struct socket
*so
, struct sockaddr
*nam
, int dolock
)
731 struct proc
*p
= current_proc();
736 VERIFY(so
->so_usecount
> 1);
738 so_update_last_owner_locked(so
, p
);
739 so_update_policy(so
);
742 * If this is a bind request on a socket that has been marked
743 * as inactive, reject it now before we go any further.
745 if (so
->so_flags
& SOF_DEFUNCT
) {
747 SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] (%d)\n",
748 __func__
, proc_pid(p
), (uint64_t)VM_KERNEL_ADDRPERM(so
),
749 SOCK_DOM(so
), SOCK_TYPE(so
), error
));
754 error
= sflt_bind(so
, nam
);
757 error
= (*so
->so_proto
->pr_usrreqs
->pru_bind
)(so
, nam
, p
);
760 socket_unlock(so
, 1);
762 if (error
== EJUSTRETURN
)
769 sodealloc(struct socket
*so
)
771 kauth_cred_unref(&so
->so_cred
);
773 /* Remove any filters */
776 /* Delete the state allocated for msg queues on a socket */
777 if (so
->so_flags
& SOF_ENABLE_MSGS
) {
778 FREE(so
->so_msg_state
, M_TEMP
);
779 so
->so_msg_state
= NULL
;
781 VERIFY(so
->so_msg_state
== NULL
);
783 so
->so_gencnt
= ++so_gencnt
;
785 #if CONFIG_MACF_SOCKET
786 mac_socket_label_destroy(so
);
787 #endif /* MAC_SOCKET */
789 if (so
->cached_in_sock_layer
) {
790 cached_sock_free(so
);
792 FREE_ZONE(so
, sizeof (*so
), so
->so_zone
);
800 * <pru_listen>:EINVAL[AF_UNIX]
801 * <pru_listen>:EINVAL[TCP]
802 * <pru_listen>:EADDRNOTAVAIL[TCP] Address not available.
803 * <pru_listen>:EINVAL[TCP] Invalid argument
804 * <pru_listen>:EAFNOSUPPORT[TCP] Address family not supported [notdef]
805 * <pru_listen>:EACCES[TCP] Permission denied
806 * <pru_listen>:EADDRINUSE[TCP] Address in use
807 * <pru_listen>:EAGAIN[TCP] Resource unavailable, try again
808 * <pru_listen>:EPERM[TCP] Operation not permitted
811 * Notes: Other <pru_listen> returns depend on the protocol family; all
812 * <sf_listen> returns depend on what the filter author causes
813 * their filter to return.
816 solisten(struct socket
*so
, int backlog
)
818 struct proc
*p
= current_proc();
823 so_update_last_owner_locked(so
, p
);
824 so_update_policy(so
);
826 if (so
->so_proto
== NULL
) {
830 if ((so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) == 0) {
836 * If the listen request is made on a socket that is not fully
837 * disconnected, or on a socket that has been marked as inactive,
838 * reject the request now.
841 (SS_ISCONNECTED
|SS_ISCONNECTING
|SS_ISDISCONNECTING
)) ||
842 (so
->so_flags
& SOF_DEFUNCT
)) {
844 if (so
->so_flags
& SOF_DEFUNCT
) {
845 SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] "
846 "(%d)\n", __func__
, proc_pid(p
),
847 (uint64_t)VM_KERNEL_ADDRPERM(so
),
848 SOCK_DOM(so
), SOCK_TYPE(so
), error
));
853 if ((so
->so_restrictions
& SO_RESTRICT_DENY_IN
) != 0) {
858 error
= sflt_listen(so
);
860 error
= (*so
->so_proto
->pr_usrreqs
->pru_listen
)(so
, p
);
863 if (error
== EJUSTRETURN
)
868 if (TAILQ_EMPTY(&so
->so_comp
))
869 so
->so_options
|= SO_ACCEPTCONN
;
871 * POSIX: The implementation may have an upper limit on the length of
872 * the listen queue-either global or per accepting socket. If backlog
873 * exceeds this limit, the length of the listen queue is set to the
876 * If listen() is called with a backlog argument value that is less
877 * than 0, the function behaves as if it had been called with a backlog
878 * argument value of 0.
880 * A backlog argument of 0 may allow the socket to accept connections,
881 * in which case the length of the listen queue may be set to an
882 * implementation-defined minimum value.
884 if (backlog
<= 0 || backlog
> somaxconn
)
887 so
->so_qlimit
= backlog
;
889 socket_unlock(so
, 1);
894 sofreelastref(struct socket
*so
, int dealloc
)
896 struct socket
*head
= so
->so_head
;
898 /* Assume socket is locked */
900 if (!(so
->so_flags
& SOF_PCBCLEARING
) || !(so
->so_state
& SS_NOFDREF
)) {
901 selthreadclear(&so
->so_snd
.sb_sel
);
902 selthreadclear(&so
->so_rcv
.sb_sel
);
903 so
->so_rcv
.sb_flags
&= ~(SB_SEL
|SB_UPCALL
);
904 so
->so_snd
.sb_flags
&= ~(SB_SEL
|SB_UPCALL
);
909 socket_lock(head
, 1);
910 if (so
->so_state
& SS_INCOMP
) {
911 TAILQ_REMOVE(&head
->so_incomp
, so
, so_list
);
913 } else if (so
->so_state
& SS_COMP
) {
915 * We must not decommission a socket that's
916 * on the accept(2) queue. If we do, then
917 * accept(2) may hang after select(2) indicated
918 * that the listening socket was ready.
920 selthreadclear(&so
->so_snd
.sb_sel
);
921 selthreadclear(&so
->so_rcv
.sb_sel
);
922 so
->so_rcv
.sb_flags
&= ~(SB_SEL
|SB_UPCALL
);
923 so
->so_snd
.sb_flags
&= ~(SB_SEL
|SB_UPCALL
);
925 socket_unlock(head
, 1);
928 panic("sofree: not queued");
931 so
->so_state
&= ~SS_INCOMP
;
933 socket_unlock(head
, 1);
939 if (so
->so_flags
& SOF_FLOW_DIVERT
) {
940 flow_divert_detach(so
);
942 #endif /* FLOW_DIVERT */
944 /* 3932268: disable upcall */
945 so
->so_rcv
.sb_flags
&= ~SB_UPCALL
;
946 so
->so_snd
.sb_flags
&= ~SB_UPCALL
;
954 soclose_wait_locked(struct socket
*so
)
956 lck_mtx_t
*mutex_held
;
958 if (so
->so_proto
->pr_getlock
!= NULL
)
959 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
961 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
962 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
965 * Double check here and return if there's no outstanding upcall;
966 * otherwise proceed further only if SOF_UPCALLCLOSEWAIT is set.
968 if (!so
->so_upcallusecount
|| !(so
->so_flags
& SOF_UPCALLCLOSEWAIT
))
970 so
->so_rcv
.sb_flags
&= ~SB_UPCALL
;
971 so
->so_snd
.sb_flags
&= ~SB_UPCALL
;
972 so
->so_flags
|= SOF_CLOSEWAIT
;
973 (void) msleep((caddr_t
)&so
->so_upcallusecount
, mutex_held
, (PZERO
- 1),
974 "soclose_wait_locked", NULL
);
975 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
976 so
->so_flags
&= ~SOF_CLOSEWAIT
;
980 * Close a socket on last file table reference removal.
981 * Initiate disconnect if connected.
982 * Free socket when disconnect complete.
985 soclose_locked(struct socket
*so
)
988 lck_mtx_t
*mutex_held
;
991 if (so
->so_usecount
== 0) {
992 panic("soclose: so=%p refcount=0\n", so
);
996 sflt_notify(so
, sock_evt_closing
, NULL
);
998 if (so
->so_upcallusecount
)
999 soclose_wait_locked(so
);
1001 if ((so
->so_options
& SO_ACCEPTCONN
)) {
1002 struct socket
*sp
, *sonext
;
1006 * We do not want new connection to be added
1007 * to the connection queues
1009 so
->so_options
&= ~SO_ACCEPTCONN
;
1011 for (sp
= TAILQ_FIRST(&so
->so_incomp
);
1012 sp
!= NULL
; sp
= sonext
) {
1013 sonext
= TAILQ_NEXT(sp
, so_list
);
1017 * skip sockets thrown away by tcpdropdropblreq
1018 * they will get cleanup by the garbage collection.
1019 * otherwise, remove the incomp socket from the queue
1020 * and let soabort trigger the appropriate cleanup.
1022 if (sp
->so_flags
& SOF_OVERFLOW
)
1025 if (so
->so_proto
->pr_getlock
!= NULL
) {
1027 * Lock ordering for consistency with the
1028 * rest of the stack, we lock the socket
1029 * first and then grabb the head.
1031 socket_unlock(so
, 0);
1037 TAILQ_REMOVE(&so
->so_incomp
, sp
, so_list
);
1040 if (sp
->so_state
& SS_INCOMP
) {
1041 sp
->so_state
&= ~SS_INCOMP
;
1048 socket_unlock(sp
, 1);
1051 while ((sp
= TAILQ_FIRST(&so
->so_comp
)) != NULL
) {
1052 /* Dequeue from so_comp since sofree() won't do it */
1053 TAILQ_REMOVE(&so
->so_comp
, sp
, so_list
);
1056 if (so
->so_proto
->pr_getlock
!= NULL
) {
1057 socket_unlock(so
, 0);
1061 if (sp
->so_state
& SS_COMP
) {
1062 sp
->so_state
&= ~SS_COMP
;
1068 if (so
->so_proto
->pr_getlock
!= NULL
) {
1069 socket_unlock(sp
, 1);
1074 if (so
->so_pcb
== NULL
) {
1075 /* 3915887: mark the socket as ready for dealloc */
1076 so
->so_flags
|= SOF_PCBCLEARING
;
1079 if (so
->so_state
& SS_ISCONNECTED
) {
1080 if ((so
->so_state
& SS_ISDISCONNECTING
) == 0) {
1081 error
= sodisconnectlocked(so
);
1085 if (so
->so_options
& SO_LINGER
) {
1086 if ((so
->so_state
& SS_ISDISCONNECTING
) &&
1087 (so
->so_state
& SS_NBIO
))
1089 if (so
->so_proto
->pr_getlock
!= NULL
)
1090 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
1092 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
1093 while (so
->so_state
& SS_ISCONNECTED
) {
1094 ts
.tv_sec
= (so
->so_linger
/100);
1095 ts
.tv_nsec
= (so
->so_linger
% 100) *
1096 NSEC_PER_USEC
* 1000 * 10;
1097 error
= msleep((caddr_t
)&so
->so_timeo
,
1098 mutex_held
, PSOCK
| PCATCH
, "soclose", &ts
);
1101 * It's OK when the time fires,
1102 * don't report an error
1104 if (error
== EWOULDBLOCK
)
1112 if (so
->so_usecount
== 0) {
1113 panic("soclose: usecount is zero so=%p\n", so
);
1116 if (so
->so_pcb
!= NULL
&& !(so
->so_flags
& SOF_PCBCLEARING
)) {
1118 * Let NetworkStatistics know this PCB is going away
1119 * before we detach it.
1121 if (nstat_collect
&&
1122 (SOCK_DOM(so
) == PF_INET
|| SOCK_DOM(so
) == PF_INET6
))
1123 nstat_pcb_detach(so
->so_pcb
);
1125 int error2
= (*so
->so_proto
->pr_usrreqs
->pru_detach
)(so
);
1129 if (so
->so_usecount
<= 0) {
1130 panic("soclose: usecount is zero so=%p\n", so
);
1134 if (so
->so_pcb
!= NULL
&& !(so
->so_flags
& SOF_MP_SUBFLOW
) &&
1135 (so
->so_state
& SS_NOFDREF
)) {
1136 panic("soclose: NOFDREF");
1139 so
->so_state
|= SS_NOFDREF
;
1141 if (so
->so_flags
& SOF_MP_SUBFLOW
)
1142 so
->so_flags
&= ~SOF_MP_SUBFLOW
;
1144 if ((so
->so_flags
& SOF_KNOTE
) != 0)
1145 KNOTE(&so
->so_klist
, SO_FILT_HINT_LOCKED
);
1147 atomic_add_32(&so
->so_proto
->pr_domain
->dom_refs
, -1);
1156 soclose(struct socket
*so
)
1161 if (so
->so_retaincnt
== 0) {
1162 error
= soclose_locked(so
);
1165 * if the FD is going away, but socket is
1166 * retained in kernel remove its reference
1169 if (so
->so_usecount
< 2)
1170 panic("soclose: retaincnt non null and so=%p "
1171 "usecount=%d\n", so
, so
->so_usecount
);
1173 socket_unlock(so
, 1);
1178 * Must be called at splnet...
1180 /* Should already be locked */
1182 soabort(struct socket
*so
)
1186 #ifdef MORE_LOCKING_DEBUG
1187 lck_mtx_t
*mutex_held
;
1189 if (so
->so_proto
->pr_getlock
!= NULL
)
1190 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
1192 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
1193 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
1196 if ((so
->so_flags
& SOF_ABORTED
) == 0) {
1197 so
->so_flags
|= SOF_ABORTED
;
1198 error
= (*so
->so_proto
->pr_usrreqs
->pru_abort
)(so
);
1208 soacceptlock(struct socket
*so
, struct sockaddr
**nam
, int dolock
)
1215 so_update_last_owner_locked(so
, PROC_NULL
);
1216 so_update_policy(so
);
1218 if ((so
->so_state
& SS_NOFDREF
) == 0)
1219 panic("soaccept: !NOFDREF");
1220 so
->so_state
&= ~SS_NOFDREF
;
1221 error
= (*so
->so_proto
->pr_usrreqs
->pru_accept
)(so
, nam
);
1224 socket_unlock(so
, 1);
1229 soaccept(struct socket
*so
, struct sockaddr
**nam
)
1231 return (soacceptlock(so
, nam
, 1));
1235 soacceptfilter(struct socket
*so
)
1237 struct sockaddr
*local
= NULL
, *remote
= NULL
;
1239 struct socket
*head
= so
->so_head
;
1242 * Hold the lock even if this socket has not been made visible
1243 * to the filter(s). For sockets with global locks, this protects
1244 * against the head or peer going away
1247 if (sogetaddr_locked(so
, &remote
, 1) != 0 ||
1248 sogetaddr_locked(so
, &local
, 0) != 0) {
1249 so
->so_state
&= ~(SS_NOFDREF
| SS_COMP
);
1251 socket_unlock(so
, 1);
1253 /* Out of resources; try it again next time */
1254 error
= ECONNABORTED
;
1258 error
= sflt_accept(head
, so
, local
, remote
);
1261 * If we get EJUSTRETURN from one of the filters, mark this socket
1262 * as inactive and return it anyway. This newly accepted socket
1263 * will be disconnected later before we hand it off to the caller.
1265 if (error
== EJUSTRETURN
) {
1267 (void) sosetdefunct(current_proc(), so
,
1268 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL
, FALSE
);
1273 * This may seem like a duplication to the above error
1274 * handling part when we return ECONNABORTED, except
1275 * the following is done while holding the lock since
1276 * the socket has been exposed to the filter(s) earlier.
1278 so
->so_state
&= ~(SS_NOFDREF
| SS_COMP
);
1280 socket_unlock(so
, 1);
1282 /* Propagate socket filter's error code to the caller */
1284 socket_unlock(so
, 1);
1287 /* Callee checks for NULL pointer */
1288 sock_freeaddr(remote
);
1289 sock_freeaddr(local
);
1294 * Returns: 0 Success
1295 * EOPNOTSUPP Operation not supported on socket
1296 * EISCONN Socket is connected
1297 * <pru_connect>:EADDRNOTAVAIL Address not available.
1298 * <pru_connect>:EINVAL Invalid argument
1299 * <pru_connect>:EAFNOSUPPORT Address family not supported [notdef]
1300 * <pru_connect>:EACCES Permission denied
1301 * <pru_connect>:EADDRINUSE Address in use
1302 * <pru_connect>:EAGAIN Resource unavailable, try again
1303 * <pru_connect>:EPERM Operation not permitted
1304 * <sf_connect_out>:??? [anything a filter writer might set]
1307 soconnectlock(struct socket
*so
, struct sockaddr
*nam
, int dolock
)
1310 struct proc
*p
= current_proc();
1315 so_update_last_owner_locked(so
, p
);
1316 so_update_policy(so
);
1319 * If this is a listening socket or if this is a previously-accepted
1320 * socket that has been marked as inactive, reject the connect request.
1322 if ((so
->so_options
& SO_ACCEPTCONN
) || (so
->so_flags
& SOF_DEFUNCT
)) {
1324 if (so
->so_flags
& SOF_DEFUNCT
) {
1325 SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] "
1326 "(%d)\n", __func__
, proc_pid(p
),
1327 (uint64_t)VM_KERNEL_ADDRPERM(so
),
1328 SOCK_DOM(so
), SOCK_TYPE(so
), error
));
1331 socket_unlock(so
, 1);
1335 if ((so
->so_restrictions
& SO_RESTRICT_DENY_OUT
) != 0) {
1337 socket_unlock(so
, 1);
1342 * If protocol is connection-based, can only connect once.
1343 * Otherwise, if connected, try to disconnect first.
1344 * This allows user to disconnect by connecting to, e.g.,
1347 if (so
->so_state
& (SS_ISCONNECTED
|SS_ISCONNECTING
) &&
1348 ((so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) ||
1349 (error
= sodisconnectlocked(so
)))) {
1353 * Run connect filter before calling protocol:
1354 * - non-blocking connect returns before completion;
1356 error
= sflt_connectout(so
, nam
);
1358 if (error
== EJUSTRETURN
)
1361 error
= (*so
->so_proto
->pr_usrreqs
->pru_connect
)
1366 socket_unlock(so
, 1);
1371 soconnect(struct socket
*so
, struct sockaddr
*nam
)
1373 return (soconnectlock(so
, nam
, 1));
1377 * Returns: 0 Success
1378 * <pru_connect2>:EINVAL[AF_UNIX]
1379 * <pru_connect2>:EPROTOTYPE[AF_UNIX]
1380 * <pru_connect2>:??? [other protocol families]
1382 * Notes: <pru_connect2> is not supported by [TCP].
1385 soconnect2(struct socket
*so1
, struct socket
*so2
)
1389 socket_lock(so1
, 1);
1390 if (so2
->so_proto
->pr_lock
)
1391 socket_lock(so2
, 1);
1393 error
= (*so1
->so_proto
->pr_usrreqs
->pru_connect2
)(so1
, so2
);
1395 socket_unlock(so1
, 1);
1396 if (so2
->so_proto
->pr_lock
)
1397 socket_unlock(so2
, 1);
1402 soconnectxlocked(struct socket
*so
, struct sockaddr_list
**src_sl
,
1403 struct sockaddr_list
**dst_sl
, struct proc
*p
, uint32_t ifscope
,
1404 associd_t aid
, connid_t
*pcid
, uint32_t flags
, void *arg
,
1410 * If this is a listening socket or if this is a previously-accepted
1411 * socket that has been marked as inactive, reject the connect request.
1413 if ((so
->so_options
& SO_ACCEPTCONN
) || (so
->so_flags
& SOF_DEFUNCT
)) {
1415 if (so
->so_flags
& SOF_DEFUNCT
) {
1416 SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] "
1417 "(%d)\n", __func__
, proc_pid(p
),
1418 (uint64_t)VM_KERNEL_ADDRPERM(so
),
1419 SOCK_DOM(so
), SOCK_TYPE(so
), error
));
1424 if ((so
->so_restrictions
& SO_RESTRICT_DENY_OUT
) != 0)
1428 * If protocol is connection-based, can only connect once
1429 * unless PR_MULTICONN is set. Otherwise, if connected,
1430 * try to disconnect first. This allows user to disconnect
1431 * by connecting to, e.g., a null address.
1433 if ((so
->so_state
& (SS_ISCONNECTED
|SS_ISCONNECTING
)) &&
1434 !(so
->so_proto
->pr_flags
& PR_MULTICONN
) &&
1435 ((so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) ||
1436 (error
= sodisconnectlocked(so
)) != 0)) {
1440 * Run connect filter before calling protocol:
1441 * - non-blocking connect returns before completion;
1443 error
= sflt_connectxout(so
, dst_sl
);
1445 if (error
== EJUSTRETURN
)
1448 error
= (*so
->so_proto
->pr_usrreqs
->pru_connectx
)
1449 (so
, src_sl
, dst_sl
, p
, ifscope
, aid
, pcid
,
1450 flags
, arg
, arglen
);
1458 sodisconnectlocked(struct socket
*so
)
1462 if ((so
->so_state
& SS_ISCONNECTED
) == 0) {
1466 if (so
->so_state
& SS_ISDISCONNECTING
) {
1471 error
= (*so
->so_proto
->pr_usrreqs
->pru_disconnect
)(so
);
1473 sflt_notify(so
, sock_evt_disconnected
, NULL
);
1479 /* Locking version */
1481 sodisconnect(struct socket
*so
)
1486 error
= sodisconnectlocked(so
);
1487 socket_unlock(so
, 1);
1492 sodisconnectxlocked(struct socket
*so
, associd_t aid
, connid_t cid
)
1497 * Call the protocol disconnectx handler; let it handle all
1498 * matters related to the connection state of this session.
1500 error
= (*so
->so_proto
->pr_usrreqs
->pru_disconnectx
)(so
, aid
, cid
);
1503 * The event applies only for the session, not for
1504 * the disconnection of individual subflows.
1506 if (so
->so_state
& (SS_ISDISCONNECTING
|SS_ISDISCONNECTED
))
1507 sflt_notify(so
, sock_evt_disconnected
, NULL
);
1513 sodisconnectx(struct socket
*so
, associd_t aid
, connid_t cid
)
1518 error
= sodisconnectxlocked(so
, aid
, cid
);
1519 socket_unlock(so
, 1);
1524 sopeelofflocked(struct socket
*so
, associd_t aid
, struct socket
**psop
)
1526 return ((*so
->so_proto
->pr_usrreqs
->pru_peeloff
)(so
, aid
, psop
));
1529 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT)
1532 * sosendcheck will lock the socket buffer if it isn't locked and
1533 * verify that there is space for the data being inserted.
1535 * Returns: 0 Success
1537 * sblock:EWOULDBLOCK
1544 sosendcheck(struct socket
*so
, struct sockaddr
*addr
, user_ssize_t resid
,
1545 int32_t clen
, int32_t atomic
, int flags
, int *sblocked
,
1546 struct mbuf
*control
)
1553 if (*sblocked
== 0) {
1554 if ((so
->so_snd
.sb_flags
& SB_LOCK
) != 0 &&
1555 so
->so_send_filt_thread
!= 0 &&
1556 so
->so_send_filt_thread
== current_thread()) {
1558 * We're being called recursively from a filter,
1559 * allow this to continue. Radar 4150520.
1560 * Don't set sblocked because we don't want
1561 * to perform an unlock later.
1565 error
= sblock(&so
->so_snd
, SBLOCKWAIT(flags
));
1567 if (so
->so_flags
& SOF_DEFUNCT
)
1576 * If a send attempt is made on a socket that has been marked
1577 * as inactive (disconnected), reject the request.
1579 if (so
->so_flags
& SOF_DEFUNCT
) {
1582 SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] (%d)\n",
1583 __func__
, proc_selfpid(), (uint64_t)VM_KERNEL_ADDRPERM(so
),
1584 SOCK_DOM(so
), SOCK_TYPE(so
), error
));
1588 if (so
->so_state
& SS_CANTSENDMORE
)
1592 error
= so
->so_error
;
1597 if ((so
->so_state
& SS_ISCONNECTED
) == 0) {
1598 if ((so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) != 0) {
1599 if ((so
->so_state
& SS_ISCONFIRMING
) == 0 &&
1600 !(resid
== 0 && clen
!= 0))
1602 } else if (addr
== 0 && !(flags
&MSG_HOLD
)) {
1603 return ((so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) ?
1604 ENOTCONN
: EDESTADDRREQ
);
1607 if (so
->so_flags
& SOF_ENABLE_MSGS
)
1608 space
= msgq_sbspace(so
, control
);
1610 space
= sbspace(&so
->so_snd
);
1612 if (flags
& MSG_OOB
)
1614 if ((atomic
&& resid
> so
->so_snd
.sb_hiwat
) ||
1615 clen
> so
->so_snd
.sb_hiwat
)
1618 if ((space
< resid
+ clen
&&
1619 (atomic
|| space
< (int32_t)so
->so_snd
.sb_lowat
|| space
< clen
)) ||
1620 (so
->so_type
== SOCK_STREAM
&& so_wait_for_if_feedback(so
))) {
1621 if ((so
->so_state
& SS_NBIO
) || (flags
& MSG_NBIO
) ||
1623 return (EWOULDBLOCK
);
1625 sbunlock(&so
->so_snd
, TRUE
); /* keep socket locked */
1627 error
= sbwait(&so
->so_snd
);
1629 if (so
->so_flags
& SOF_DEFUNCT
)
1640 * If send must go all at once and message is larger than
1641 * send buffering, then hard error.
1642 * Lock against other senders.
1643 * If must go all at once and not enough room now, then
1644 * inform user that this would block and do nothing.
1645 * Otherwise, if nonblocking, send as much as possible.
1646 * The data to be sent is described by "uio" if nonzero,
1647 * otherwise by the mbuf chain "top" (which must be null
1648 * if uio is not). Data provided in mbuf chain must be small
1649 * enough to send all at once.
1651 * Returns nonzero on error, timeout or signal; callers
1652 * must check for short counts if EINTR/ERESTART are returned.
1653 * Data and control buffers are freed on return.
1655 * MSG_HOLD: go thru most of sosend(), but just enqueue the mbuf
1656 * MSG_SEND: go thru as for MSG_HOLD on current fragment, then
1657 * point at the mbuf chain being constructed and go from there.
1659 * Returns: 0 Success
1665 * sosendcheck:EWOULDBLOCK
1669 * sosendcheck:??? [value from so_error]
1670 * <pru_send>:ECONNRESET[TCP]
1671 * <pru_send>:EINVAL[TCP]
1672 * <pru_send>:ENOBUFS[TCP]
1673 * <pru_send>:EADDRINUSE[TCP]
1674 * <pru_send>:EADDRNOTAVAIL[TCP]
1675 * <pru_send>:EAFNOSUPPORT[TCP]
1676 * <pru_send>:EACCES[TCP]
1677 * <pru_send>:EAGAIN[TCP]
1678 * <pru_send>:EPERM[TCP]
1679 * <pru_send>:EMSGSIZE[TCP]
1680 * <pru_send>:EHOSTUNREACH[TCP]
1681 * <pru_send>:ENETUNREACH[TCP]
1682 * <pru_send>:ENETDOWN[TCP]
1683 * <pru_send>:ENOMEM[TCP]
1684 * <pru_send>:ENOBUFS[TCP]
1685 * <pru_send>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1686 * <pru_send>:EINVAL[AF_UNIX]
1687 * <pru_send>:EOPNOTSUPP[AF_UNIX]
1688 * <pru_send>:EPIPE[AF_UNIX]
1689 * <pru_send>:ENOTCONN[AF_UNIX]
1690 * <pru_send>:EISCONN[AF_UNIX]
1691 * <pru_send>:???[AF_UNIX] [whatever a filter author chooses]
1692 * <sf_data_out>:??? [whatever a filter author chooses]
1694 * Notes: Other <pru_send> returns depend on the protocol family; all
1695 * <sf_data_out> returns depend on what the filter author causes
1696 * their filter to return.
1699 sosend(struct socket
*so
, struct sockaddr
*addr
, struct uio
*uio
,
1700 struct mbuf
*top
, struct mbuf
*control
, int flags
)
1703 struct mbuf
*m
, *freelist
= NULL
;
1704 user_ssize_t space
, len
, resid
;
1705 int clen
= 0, error
, dontroute
, mlen
, sendflags
;
1706 int atomic
= sosendallatonce(so
) || top
;
1708 struct proc
*p
= current_proc();
1709 struct mbuf
*control_copy
= NULL
;
1712 resid
= uio_resid(uio
);
1714 resid
= top
->m_pkthdr
.len
;
1716 KERNEL_DEBUG((DBG_FNC_SOSEND
| DBG_FUNC_START
), so
, resid
,
1717 so
->so_snd
.sb_cc
, so
->so_snd
.sb_lowat
, so
->so_snd
.sb_hiwat
);
1720 so_update_last_owner_locked(so
, p
);
1721 so_update_policy(so
);
1723 if (so
->so_type
!= SOCK_STREAM
&& (flags
& MSG_OOB
) != 0) {
1725 socket_unlock(so
, 1);
1730 * In theory resid should be unsigned.
1731 * However, space must be signed, as it might be less than 0
1732 * if we over-committed, and we must use a signed comparison
1733 * of space and resid. On the other hand, a negative resid
1734 * causes us to loop sending 0-length segments to the protocol.
1736 * Usually, MSG_EOR isn't used on SOCK_STREAM type sockets.
1737 * But it will be used by sockets doing message delivery.
1739 * Note: We limit resid to be a positive 32 bits value as we use
1740 * imin() to set bytes_to_copy -- radr://14558484
1742 if ((int32_t)resid
< 0 || (so
->so_type
== SOCK_STREAM
&&
1743 !(so
->so_flags
& SOF_ENABLE_MSGS
) && (flags
& MSG_EOR
))) {
1745 socket_unlock(so
, 1);
1749 dontroute
= (flags
& MSG_DONTROUTE
) &&
1750 (so
->so_options
& SO_DONTROUTE
) == 0 &&
1751 (so
->so_proto
->pr_flags
& PR_ATOMIC
);
1752 OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgsnd
);
1754 if (control
!= NULL
)
1755 clen
= control
->m_len
;
1758 error
= sosendcheck(so
, addr
, resid
, clen
, atomic
, flags
,
1759 &sblocked
, control
);
1764 if (so
->so_flags
& SOF_ENABLE_MSGS
)
1765 space
= msgq_sbspace(so
, control
);
1767 space
= sbspace(&so
->so_snd
) - clen
;
1768 space
+= ((flags
& MSG_OOB
) ? 1024 : 0);
1773 * Data is prepackaged in "top".
1776 if (flags
& MSG_EOR
)
1777 top
->m_flags
|= M_EOR
;
1783 bytes_to_copy
= imin(resid
, space
);
1785 if (sosendminchain
> 0)
1788 chainlength
= sosendmaxchain
;
1791 * Attempt to use larger than system page-size
1792 * clusters for large writes only if there is
1793 * a jumbo cluster pool and if the socket is
1794 * marked accordingly.
1796 jumbocl
= sosendjcl
&& njcl
> 0 &&
1797 ((so
->so_flags
& SOF_MULTIPAGES
) ||
1798 sosendjcl_ignore_capab
);
1800 socket_unlock(so
, 0);
1804 int hdrs_needed
= (top
== NULL
) ? 1 : 0;
1807 * try to maintain a local cache of mbuf
1808 * clusters needed to complete this
1809 * write the list is further limited to
1810 * the number that are currently needed
1811 * to fill the socket this mechanism
1812 * allows a large number of mbufs/
1813 * clusters to be grabbed under a single
1814 * mbuf lock... if we can't get any
1815 * clusters, than fall back to trying
1816 * for mbufs if we fail early (or
1817 * miscalcluate the number needed) make
1818 * sure to release any clusters we
1819 * haven't yet consumed.
1821 if (freelist
== NULL
&&
1822 bytes_to_copy
> MBIGCLBYTES
&&
1825 bytes_to_copy
/ M16KCLBYTES
;
1827 if ((bytes_to_copy
-
1828 (num_needed
* M16KCLBYTES
))
1833 m_getpackets_internal(
1834 (unsigned int *)&num_needed
,
1835 hdrs_needed
, M_WAIT
, 0,
1838 * Fall back to 4K cluster size
1839 * if allocation failed
1843 if (freelist
== NULL
&&
1844 bytes_to_copy
> MCLBYTES
) {
1846 bytes_to_copy
/ MBIGCLBYTES
;
1848 if ((bytes_to_copy
-
1849 (num_needed
* MBIGCLBYTES
)) >=
1854 m_getpackets_internal(
1855 (unsigned int *)&num_needed
,
1856 hdrs_needed
, M_WAIT
, 0,
1859 * Fall back to cluster size
1860 * if allocation failed
1864 if (freelist
== NULL
&&
1865 bytes_to_copy
> MINCLSIZE
) {
1867 bytes_to_copy
/ MCLBYTES
;
1869 if ((bytes_to_copy
-
1870 (num_needed
* MCLBYTES
)) >=
1875 m_getpackets_internal(
1876 (unsigned int *)&num_needed
,
1877 hdrs_needed
, M_WAIT
, 0,
1880 * Fall back to a single mbuf
1881 * if allocation failed
1885 if (freelist
== NULL
) {
1893 if (freelist
== NULL
) {
1899 * For datagram protocols,
1900 * leave room for protocol
1901 * headers in first mbuf.
1903 if (atomic
&& top
== NULL
&&
1904 bytes_to_copy
< MHLEN
) {
1910 freelist
= m
->m_next
;
1913 if ((m
->m_flags
& M_EXT
))
1914 mlen
= m
->m_ext
.ext_size
;
1915 else if ((m
->m_flags
& M_PKTHDR
))
1917 MHLEN
- m_leadingspace(m
);
1920 len
= imin(mlen
, bytes_to_copy
);
1926 error
= uiomove(mtod(m
, caddr_t
),
1929 resid
= uio_resid(uio
);
1933 top
->m_pkthdr
.len
+= len
;
1938 if (flags
& MSG_EOR
)
1939 top
->m_flags
|= M_EOR
;
1942 bytes_to_copy
= min(resid
, space
);
1944 } while (space
> 0 &&
1945 (chainlength
< sosendmaxchain
|| atomic
||
1946 resid
< MINCLSIZE
));
1954 if (flags
& (MSG_HOLD
|MSG_SEND
)) {
1955 /* Enqueue for later, go away if HOLD */
1957 if (so
->so_temp
&& (flags
& MSG_FLUSH
)) {
1958 m_freem(so
->so_temp
);
1962 so
->so_tail
->m_next
= top
;
1969 if (flags
& MSG_HOLD
) {
1976 so
->so_options
|= SO_DONTROUTE
;
1978 /* Compute flags here, for pru_send and NKEs */
1979 sendflags
= (flags
& MSG_OOB
) ? PRUS_OOB
:
1981 * If the user set MSG_EOF, the protocol
1982 * understands this flag and nothing left to
1983 * send then use PRU_SEND_EOF instead of PRU_SEND.
1985 ((flags
& MSG_EOF
) &&
1986 (so
->so_proto
->pr_flags
& PR_IMPLOPCL
) &&
1987 (resid
<= 0)) ? PRUS_EOF
:
1988 /* If there is more to send set PRUS_MORETOCOME */
1989 (resid
> 0 && space
> 0) ? PRUS_MORETOCOME
: 0;
1992 * Socket filter processing
1994 error
= sflt_data_out(so
, addr
, &top
,
1995 &control
, (sendflags
& MSG_OOB
) ?
1996 sock_data_filt_flag_oob
: 0);
1998 if (error
== EJUSTRETURN
) {
2008 * End Socket filter processing
2011 if (so
->so_flags
& SOF_ENABLE_MSGS
) {
2013 * Make a copy of control mbuf,
2014 * so that msg priority can be
2015 * passed to subsequent mbufs.
2017 control_copy
= m_dup(control
, M_NOWAIT
);
2019 error
= (*so
->so_proto
->pr_usrreqs
->pru_send
)
2020 (so
, sendflags
, top
, addr
, control
, p
);
2022 if (flags
& MSG_SEND
)
2026 so
->so_options
&= ~SO_DONTROUTE
;
2029 control
= control_copy
;
2030 control_copy
= NULL
;
2035 } while (resid
&& space
> 0);
2040 sbunlock(&so
->so_snd
, FALSE
); /* will unlock socket */
2042 socket_unlock(so
, 1);
2046 if (control
!= NULL
)
2048 if (freelist
!= NULL
)
2049 m_freem_list(freelist
);
2050 if (control_copy
!= NULL
)
2051 m_freem(control_copy
);
2053 KERNEL_DEBUG(DBG_FNC_SOSEND
| DBG_FUNC_END
, so
, resid
, so
->so_snd
.sb_cc
,
2060 * Implement receive operations on a socket.
2061 * We depend on the way that records are added to the sockbuf
2062 * by sbappend*. In particular, each record (mbufs linked through m_next)
2063 * must begin with an address if the protocol so specifies,
2064 * followed by an optional mbuf or mbufs containing ancillary data,
2065 * and then zero or more mbufs of data.
2066 * In order to avoid blocking network interrupts for the entire time here,
2067 * we splx() while doing the actual copy to user space.
2068 * Although the sockbuf is locked, new data may still be appended,
2069 * and thus we must maintain consistency of the sockbuf during that time.
2071 * The caller may receive the data as a single mbuf chain by supplying
2072 * an mbuf **mp0 for use in returning the chain. The uio is then used
2073 * only for the count in uio_resid.
2075 * Returns: 0 Success
2080 * sblock:EWOULDBLOCK
2084 * sodelayed_copy:EFAULT
2085 * <pru_rcvoob>:EINVAL[TCP]
2086 * <pru_rcvoob>:EWOULDBLOCK[TCP]
2088 * <pr_domain->dom_externalize>:EMSGSIZE[AF_UNIX]
2089 * <pr_domain->dom_externalize>:ENOBUFS[AF_UNIX]
2090 * <pr_domain->dom_externalize>:???
2092 * Notes: Additional return values from calls through <pru_rcvoob> and
2093 * <pr_domain->dom_externalize> depend on protocols other than
2094 * TCP or AF_UNIX, which are documented above.
2097 soreceive(struct socket
*so
, struct sockaddr
**psa
, struct uio
*uio
,
2098 struct mbuf
**mp0
, struct mbuf
**controlp
, int *flagsp
)
2100 struct mbuf
*m
, **mp
, *ml
= NULL
;
2101 struct mbuf
*nextrecord
, *free_list
;
2102 int flags
, error
, offset
;
2104 struct protosw
*pr
= so
->so_proto
;
2106 user_ssize_t orig_resid
= uio_resid(uio
);
2107 user_ssize_t delayed_copy_len
;
2110 struct proc
*p
= current_proc();
2112 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_START
, so
, uio_resid(uio
),
2113 so
->so_rcv
.sb_cc
, so
->so_rcv
.sb_lowat
, so
->so_rcv
.sb_hiwat
);
2116 so_update_last_owner_locked(so
, p
);
2117 so_update_policy(so
);
2119 #ifdef MORE_LOCKING_DEBUG
2120 if (so
->so_usecount
== 1) {
2121 panic("%s: so=%x no other reference on socket\n", __func__
, so
);
2128 if (controlp
!= NULL
)
2131 flags
= *flagsp
&~ MSG_EOR
;
2136 * If a recv attempt is made on a previously-accepted socket
2137 * that has been marked as inactive (disconnected), reject
2140 if (so
->so_flags
& SOF_DEFUNCT
) {
2141 struct sockbuf
*sb
= &so
->so_rcv
;
2144 SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] (%d)\n",
2145 __func__
, proc_pid(p
), (uint64_t)VM_KERNEL_ADDRPERM(so
),
2146 SOCK_DOM(so
), SOCK_TYPE(so
), error
));
2148 * This socket should have been disconnected and flushed
2149 * prior to being returned from sodefunct(); there should
2150 * be no data on its receive list, so panic otherwise.
2152 if (so
->so_state
& SS_DEFUNCT
)
2153 sb_empty_assert(sb
, __func__
);
2154 socket_unlock(so
, 1);
2159 * When SO_WANTOOBFLAG is set we try to get out-of-band data
2160 * regardless of the flags argument. Here is the case were
2161 * out-of-band data is not inline.
2163 if ((flags
& MSG_OOB
) ||
2164 ((so
->so_options
& SO_WANTOOBFLAG
) != 0 &&
2165 (so
->so_options
& SO_OOBINLINE
) == 0 &&
2166 (so
->so_oobmark
|| (so
->so_state
& SS_RCVATMARK
)))) {
2167 m
= m_get(M_WAIT
, MT_DATA
);
2169 socket_unlock(so
, 1);
2170 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_END
,
2171 ENOBUFS
, 0, 0, 0, 0);
2174 error
= (*pr
->pr_usrreqs
->pru_rcvoob
)(so
, m
, flags
& MSG_PEEK
);
2177 socket_unlock(so
, 0);
2179 error
= uiomove(mtod(m
, caddr_t
),
2180 imin(uio_resid(uio
), m
->m_len
), uio
);
2182 } while (uio_resid(uio
) && error
== 0 && m
!= NULL
);
2188 if ((so
->so_options
& SO_WANTOOBFLAG
) != 0) {
2189 if (error
== EWOULDBLOCK
|| error
== EINVAL
) {
2191 * Let's try to get normal data:
2192 * EWOULDBLOCK: out-of-band data not
2193 * receive yet. EINVAL: out-of-band data
2198 } else if (error
== 0 && flagsp
!= NULL
) {
2202 socket_unlock(so
, 1);
2203 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_END
, error
,
2211 if (so
->so_state
& SS_ISCONFIRMING
&& uio_resid(uio
))
2212 (*pr
->pr_usrreqs
->pru_rcvd
)(so
, 0);
2215 delayed_copy_len
= 0;
2217 #ifdef MORE_LOCKING_DEBUG
2218 if (so
->so_usecount
<= 1)
2219 printf("soreceive: sblock so=%p ref=%d on socket\n",
2220 so
, so
->so_usecount
);
2223 * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE)
2224 * and if so just return to the caller. This could happen when
2225 * soreceive() is called by a socket upcall function during the
2226 * time the socket is freed. The socket buffer would have been
2227 * locked across the upcall, therefore we cannot put this thread
2228 * to sleep (else we will deadlock) or return EWOULDBLOCK (else
2229 * we may livelock), because the lock on the socket buffer will
2230 * only be released when the upcall routine returns to its caller.
2231 * Because the socket has been officially closed, there can be
2232 * no further read on it.
2234 * A multipath subflow socket would have its SS_NOFDREF set by
2235 * default, so check for SOF_MP_SUBFLOW socket flag; when the
2236 * socket is closed for real, SOF_MP_SUBFLOW would be cleared.
2238 if ((so
->so_state
& (SS_NOFDREF
| SS_CANTRCVMORE
)) ==
2239 (SS_NOFDREF
| SS_CANTRCVMORE
) && !(so
->so_flags
& SOF_MP_SUBFLOW
)) {
2240 socket_unlock(so
, 1);
2244 error
= sblock(&so
->so_rcv
, SBLOCKWAIT(flags
));
2246 socket_unlock(so
, 1);
2247 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_END
, error
,
2252 m
= so
->so_rcv
.sb_mb
;
2254 * If we have less data than requested, block awaiting more
2255 * (subject to any timeout) if:
2256 * 1. the current count is less than the low water mark, or
2257 * 2. MSG_WAITALL is set, and it is possible to do the entire
2258 * receive operation at once if we block (resid <= hiwat).
2259 * 3. MSG_DONTWAIT is not set
2260 * If MSG_WAITALL is set but resid is larger than the receive buffer,
2261 * we have to do the receive in sections, and thus risk returning
2262 * a short count if a timeout or signal occurs after we start.
2264 if (m
== NULL
|| (((flags
& MSG_DONTWAIT
) == 0 &&
2265 so
->so_rcv
.sb_cc
< uio_resid(uio
)) &&
2266 (so
->so_rcv
.sb_cc
< so
->so_rcv
.sb_lowat
||
2267 ((flags
& MSG_WAITALL
) && uio_resid(uio
) <= so
->so_rcv
.sb_hiwat
)) &&
2268 m
->m_nextpkt
== NULL
&& (pr
->pr_flags
& PR_ATOMIC
) == 0)) {
2270 * Panic if we notice inconsistencies in the socket's
2271 * receive list; both sb_mb and sb_cc should correctly
2272 * reflect the contents of the list, otherwise we may
2273 * end up with false positives during select() or poll()
2274 * which could put the application in a bad state.
2276 SB_MB_CHECK(&so
->so_rcv
);
2281 error
= so
->so_error
;
2282 if ((flags
& MSG_PEEK
) == 0)
2286 if (so
->so_state
& SS_CANTRCVMORE
) {
2292 for (; m
!= NULL
; m
= m
->m_next
)
2293 if (m
->m_type
== MT_OOBDATA
|| (m
->m_flags
& M_EOR
)) {
2294 m
= so
->so_rcv
.sb_mb
;
2297 if ((so
->so_state
& (SS_ISCONNECTED
|SS_ISCONNECTING
)) == 0 &&
2298 (so
->so_proto
->pr_flags
& PR_CONNREQUIRED
)) {
2302 if (uio_resid(uio
) == 0)
2304 if ((so
->so_state
& SS_NBIO
) ||
2305 (flags
& (MSG_DONTWAIT
|MSG_NBIO
))) {
2306 error
= EWOULDBLOCK
;
2309 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive sbwait 1");
2310 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive sbwait 1");
2311 sbunlock(&so
->so_rcv
, TRUE
); /* keep socket locked */
2312 #if EVEN_MORE_LOCKING_DEBUG
2314 printf("Waiting for socket data\n");
2317 error
= sbwait(&so
->so_rcv
);
2318 #if EVEN_MORE_LOCKING_DEBUG
2320 printf("SORECEIVE - sbwait returned %d\n", error
);
2322 if (so
->so_usecount
< 1) {
2323 panic("%s: after 2nd sblock so=%p ref=%d on socket\n",
2324 __func__
, so
, so
->so_usecount
);
2328 socket_unlock(so
, 1);
2329 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_END
, error
,
2336 OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgrcv
);
2337 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 1");
2338 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 1");
2339 nextrecord
= m
->m_nextpkt
;
2340 if ((pr
->pr_flags
& PR_ADDR
) && m
->m_type
== MT_SONAME
) {
2341 KASSERT(m
->m_type
== MT_SONAME
, ("receive 1a"));
2342 #if CONFIG_MACF_SOCKET_SUBSET
2344 * Call the MAC framework for policy checking if we're in
2345 * the user process context and the socket isn't connected.
2347 if (p
!= kernproc
&& !(so
->so_state
& SS_ISCONNECTED
)) {
2348 struct mbuf
*m0
= m
;
2350 * Dequeue this record (temporarily) from the receive
2351 * list since we're about to drop the socket's lock
2352 * where a new record may arrive and be appended to
2353 * the list. Upon MAC policy failure, the record
2354 * will be freed. Otherwise, we'll add it back to
2355 * the head of the list. We cannot rely on SB_LOCK
2356 * because append operation uses the socket's lock.
2359 m
->m_nextpkt
= NULL
;
2360 sbfree(&so
->so_rcv
, m
);
2362 } while (m
!= NULL
);
2364 so
->so_rcv
.sb_mb
= nextrecord
;
2365 SB_EMPTY_FIXUP(&so
->so_rcv
);
2366 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 1a");
2367 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 1a");
2368 socket_unlock(so
, 0);
2369 if (mac_socket_check_received(proc_ucred(p
), so
,
2370 mtod(m
, struct sockaddr
*)) != 0) {
2372 * MAC policy failure; free this record and
2373 * process the next record (or block until
2374 * one is available). We have adjusted sb_cc
2375 * and sb_mbcnt above so there is no need to
2376 * call sbfree() again.
2380 } while (m
!= NULL
);
2382 * Clear SB_LOCK but don't unlock the socket.
2383 * Process the next record or wait for one.
2386 sbunlock(&so
->so_rcv
, TRUE
); /* stay locked */
2391 * If the socket has been defunct'd, drop it.
2393 if (so
->so_flags
& SOF_DEFUNCT
) {
2399 * Re-adjust the socket receive list and re-enqueue
2400 * the record in front of any packets which may have
2401 * been appended while we dropped the lock.
2403 for (m
= m0
; m
->m_next
!= NULL
; m
= m
->m_next
)
2404 sballoc(&so
->so_rcv
, m
);
2405 sballoc(&so
->so_rcv
, m
);
2406 if (so
->so_rcv
.sb_mb
== NULL
) {
2407 so
->so_rcv
.sb_lastrecord
= m0
;
2408 so
->so_rcv
.sb_mbtail
= m
;
2411 nextrecord
= m
->m_nextpkt
= so
->so_rcv
.sb_mb
;
2412 so
->so_rcv
.sb_mb
= m
;
2413 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 1b");
2414 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 1b");
2416 #endif /* CONFIG_MACF_SOCKET_SUBSET */
2419 *psa
= dup_sockaddr(mtod(m
, struct sockaddr
*),
2421 if ((*psa
== NULL
) && (flags
& MSG_NEEDSA
)) {
2422 error
= EWOULDBLOCK
;
2426 if (flags
& MSG_PEEK
) {
2429 sbfree(&so
->so_rcv
, m
);
2430 if (m
->m_next
== NULL
&& so
->so_rcv
.sb_cc
!= 0) {
2431 panic("%s: about to create invalid socketbuf",
2435 MFREE(m
, so
->so_rcv
.sb_mb
);
2436 m
= so
->so_rcv
.sb_mb
;
2438 m
->m_nextpkt
= nextrecord
;
2440 so
->so_rcv
.sb_mb
= nextrecord
;
2441 SB_EMPTY_FIXUP(&so
->so_rcv
);
2447 * Process one or more MT_CONTROL mbufs present before any data mbufs
2448 * in the first mbuf chain on the socket buffer. If MSG_PEEK, we
2449 * just copy the data; if !MSG_PEEK, we call into the protocol to
2450 * perform externalization.
2452 if (m
!= NULL
&& m
->m_type
== MT_CONTROL
) {
2453 struct mbuf
*cm
= NULL
, *cmn
;
2454 struct mbuf
**cme
= &cm
;
2455 struct sockbuf
*sb_rcv
= &so
->so_rcv
;
2456 struct mbuf
**msgpcm
= NULL
;
2459 * Externalizing the control messages would require us to
2460 * drop the socket's lock below. Once we re-acquire the
2461 * lock, the mbuf chain might change. In order to preserve
2462 * consistency, we unlink all control messages from the
2463 * first mbuf chain in one shot and link them separately
2464 * onto a different chain.
2467 if (flags
& MSG_PEEK
) {
2468 if (controlp
!= NULL
) {
2469 if (*controlp
== NULL
) {
2472 *controlp
= m_copy(m
, 0, m
->m_len
);
2475 * If we failed to allocate an mbuf,
2476 * release any previously allocated
2477 * mbufs for control data. Return
2478 * an error. Keep the mbufs in the
2479 * socket as this is using
2482 if (*controlp
== NULL
) {
2487 controlp
= &(*controlp
)->m_next
;
2491 m
->m_nextpkt
= NULL
;
2493 sb_rcv
->sb_mb
= m
->m_next
;
2496 cme
= &(*cme
)->m_next
;
2499 } while (m
!= NULL
&& m
->m_type
== MT_CONTROL
);
2501 if (!(flags
& MSG_PEEK
)) {
2502 if (sb_rcv
->sb_mb
!= NULL
) {
2503 sb_rcv
->sb_mb
->m_nextpkt
= nextrecord
;
2505 sb_rcv
->sb_mb
= nextrecord
;
2506 SB_EMPTY_FIXUP(sb_rcv
);
2508 if (nextrecord
== NULL
)
2509 sb_rcv
->sb_lastrecord
= m
;
2512 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive ctl");
2513 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive ctl");
2515 while (cm
!= NULL
) {
2520 cmsg_type
= mtod(cm
, struct cmsghdr
*)->cmsg_type
;
2523 * Call the protocol to externalize SCM_RIGHTS message
2524 * and return the modified message to the caller upon
2525 * success. Otherwise, all other control messages are
2526 * returned unmodified to the caller. Note that we
2527 * only get into this loop if MSG_PEEK is not set.
2529 if (pr
->pr_domain
->dom_externalize
!= NULL
&&
2530 cmsg_type
== SCM_RIGHTS
) {
2532 * Release socket lock: see 3903171. This
2533 * would also allow more records to be appended
2534 * to the socket buffer. We still have SB_LOCK
2535 * set on it, so we can be sure that the head
2536 * of the mbuf chain won't change.
2538 socket_unlock(so
, 0);
2539 error
= (*pr
->pr_domain
->dom_externalize
)(cm
);
2545 if (controlp
!= NULL
&& error
== 0) {
2547 controlp
= &(*controlp
)->m_next
;
2555 * Update the value of nextrecord in case we received new
2556 * records when the socket was unlocked above for
2557 * externalizing SCM_RIGHTS.
2560 nextrecord
= sb_rcv
->sb_mb
->m_nextpkt
;
2562 nextrecord
= sb_rcv
->sb_mb
;
2567 * If the socket is a TCP socket with message delivery
2568 * enabled, then create a control msg to deliver the
2569 * relative TCP sequence number for this data. Waiting
2570 * until this point will protect against failures to
2571 * allocate an mbuf for control msgs.
2573 if (so
->so_type
== SOCK_STREAM
&& SOCK_PROTO(so
) == IPPROTO_TCP
&&
2574 (so
->so_flags
& SOF_ENABLE_MSGS
) && controlp
!= NULL
) {
2575 struct mbuf
*seq_cm
;
2577 seq_cm
= sbcreatecontrol((caddr_t
)&m
->m_pkthdr
.msg_seq
,
2578 sizeof (uint32_t), SCM_SEQNUM
, SOL_SOCKET
);
2579 if (seq_cm
== NULL
) {
2580 /* unable to allocate a control mbuf */
2585 controlp
= &seq_cm
->m_next
;
2589 if (!(flags
& MSG_PEEK
)) {
2591 * We get here because m points to an mbuf following
2592 * any MT_SONAME or MT_CONTROL mbufs which have been
2593 * processed above. In any case, m should be pointing
2594 * to the head of the mbuf chain, and the nextrecord
2595 * should be either NULL or equal to m->m_nextpkt.
2596 * See comments above about SB_LOCK.
2598 if (m
!= so
->so_rcv
.sb_mb
||
2599 m
->m_nextpkt
!= nextrecord
) {
2600 panic("%s: post-control !sync so=%p m=%p "
2601 "nextrecord=%p\n", __func__
, so
, m
,
2605 if (nextrecord
== NULL
)
2606 so
->so_rcv
.sb_lastrecord
= m
;
2609 if (type
== MT_OOBDATA
)
2612 if (!(flags
& MSG_PEEK
)) {
2613 SB_EMPTY_FIXUP(&so
->so_rcv
);
2616 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 2");
2617 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 2");
2622 if (!(flags
& MSG_PEEK
) && uio_resid(uio
) > sorecvmincopy
)
2630 (uio_resid(uio
) - delayed_copy_len
) > 0 && error
== 0) {
2631 if (m
->m_type
== MT_OOBDATA
) {
2632 if (type
!= MT_OOBDATA
)
2634 } else if (type
== MT_OOBDATA
) {
2638 * Make sure to allways set MSG_OOB event when getting
2639 * out of band data inline.
2641 if ((so
->so_options
& SO_WANTOOBFLAG
) != 0 &&
2642 (so
->so_options
& SO_OOBINLINE
) != 0 &&
2643 (so
->so_state
& SS_RCVATMARK
) != 0) {
2646 so
->so_state
&= ~SS_RCVATMARK
;
2647 len
= uio_resid(uio
) - delayed_copy_len
;
2648 if (so
->so_oobmark
&& len
> so
->so_oobmark
- offset
)
2649 len
= so
->so_oobmark
- offset
;
2650 if (len
> m
->m_len
- moff
)
2651 len
= m
->m_len
- moff
;
2653 * If mp is set, just pass back the mbufs.
2654 * Otherwise copy them out via the uio, then free.
2655 * Sockbuf must be consistent here (points to current mbuf,
2656 * it points to next record) when we drop priority;
2657 * we must note any additions to the sockbuf when we
2658 * block interrupts again.
2661 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive uiomove");
2662 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive uiomove");
2663 if (can_delay
&& len
== m
->m_len
) {
2665 * only delay the copy if we're consuming the
2666 * mbuf and we're NOT in MSG_PEEK mode
2667 * and we have enough data to make it worthwile
2668 * to drop and retake the lock... can_delay
2669 * reflects the state of the 2 latter
2670 * constraints moff should always be zero
2673 delayed_copy_len
+= len
;
2675 if (delayed_copy_len
) {
2676 error
= sodelayed_copy(so
, uio
,
2677 &free_list
, &delayed_copy_len
);
2683 * can only get here if MSG_PEEK is not
2684 * set therefore, m should point at the
2685 * head of the rcv queue; if it doesn't,
2686 * it means something drastically
2687 * changed while we were out from behind
2688 * the lock in sodelayed_copy. perhaps
2689 * a RST on the stream. in any event,
2690 * the stream has been interrupted. it's
2691 * probably best just to return whatever
2692 * data we've moved and let the caller
2695 if (m
!= so
->so_rcv
.sb_mb
) {
2699 socket_unlock(so
, 0);
2700 error
= uiomove(mtod(m
, caddr_t
) + moff
,
2708 uio_setresid(uio
, (uio_resid(uio
) - len
));
2710 if (len
== m
->m_len
- moff
) {
2711 if (m
->m_flags
& M_EOR
)
2713 if (flags
& MSG_PEEK
) {
2717 nextrecord
= m
->m_nextpkt
;
2718 sbfree(&so
->so_rcv
, m
);
2719 m
->m_nextpkt
= NULL
;
2722 * If this packet is an unordered packet
2723 * (indicated by M_UNORDERED_DATA flag), remove
2724 * the additional bytes added to the
2725 * receive socket buffer size.
2727 if ((so
->so_flags
& SOF_ENABLE_MSGS
) &&
2729 (m
->m_flags
& M_UNORDERED_DATA
) &&
2730 sbreserve(&so
->so_rcv
,
2731 so
->so_rcv
.sb_hiwat
- m
->m_len
)) {
2732 if (so
->so_msg_state
->msg_uno_bytes
>
2735 msg_uno_bytes
-= m
->m_len
;
2740 m
->m_flags
&= ~M_UNORDERED_DATA
;
2746 so
->so_rcv
.sb_mb
= m
= m
->m_next
;
2749 if (free_list
== NULL
)
2754 so
->so_rcv
.sb_mb
= m
= m
->m_next
;
2758 m
->m_nextpkt
= nextrecord
;
2759 if (nextrecord
== NULL
)
2760 so
->so_rcv
.sb_lastrecord
= m
;
2762 so
->so_rcv
.sb_mb
= nextrecord
;
2763 SB_EMPTY_FIXUP(&so
->so_rcv
);
2765 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 3");
2766 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 3");
2769 if (flags
& MSG_PEEK
) {
2775 if (flags
& MSG_DONTWAIT
)
2776 copy_flag
= M_DONTWAIT
;
2779 *mp
= m_copym(m
, 0, len
, copy_flag
);
2781 * Failed to allocate an mbuf?
2782 * Adjust uio_resid back, it was
2783 * adjusted down by len bytes which
2784 * we didn't copy over.
2788 (uio_resid(uio
) + len
));
2794 so
->so_rcv
.sb_cc
-= len
;
2797 if (so
->so_oobmark
) {
2798 if ((flags
& MSG_PEEK
) == 0) {
2799 so
->so_oobmark
-= len
;
2800 if (so
->so_oobmark
== 0) {
2801 so
->so_state
|= SS_RCVATMARK
;
2803 * delay posting the actual event until
2804 * after any delayed copy processing
2812 if (offset
== so
->so_oobmark
)
2816 if (flags
& MSG_EOR
)
2819 * If the MSG_WAITALL or MSG_WAITSTREAM flag is set
2820 * (for non-atomic socket), we must not quit until
2821 * "uio->uio_resid == 0" or an error termination.
2822 * If a signal/timeout occurs, return with a short
2823 * count but without error. Keep sockbuf locked
2824 * against other readers.
2826 while (flags
& (MSG_WAITALL
|MSG_WAITSTREAM
) && m
== NULL
&&
2827 (uio_resid(uio
) - delayed_copy_len
) > 0 &&
2828 !sosendallatonce(so
) && !nextrecord
) {
2829 if (so
->so_error
|| so
->so_state
& SS_CANTRCVMORE
)
2833 * Depending on the protocol (e.g. TCP), the following
2834 * might cause the socket lock to be dropped and later
2835 * be reacquired, and more data could have arrived and
2836 * have been appended to the receive socket buffer by
2837 * the time it returns. Therefore, we only sleep in
2838 * sbwait() below if and only if the socket buffer is
2839 * empty, in order to avoid a false sleep.
2841 if (pr
->pr_flags
& PR_WANTRCVD
&& so
->so_pcb
&&
2842 (((struct inpcb
*)so
->so_pcb
)->inp_state
!=
2844 (*pr
->pr_usrreqs
->pru_rcvd
)(so
, flags
);
2846 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive sbwait 2");
2847 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive sbwait 2");
2849 if (so
->so_rcv
.sb_mb
== NULL
&& sbwait(&so
->so_rcv
)) {
2854 * have to wait until after we get back from the sbwait
2855 * to do the copy because we will drop the lock if we
2856 * have enough data that has been delayed... by dropping
2857 * the lock we open up a window allowing the netisr
2858 * thread to process the incoming packets and to change
2859 * the state of this socket... we're issuing the sbwait
2860 * because the socket is empty and we're expecting the
2861 * netisr thread to wake us up when more packets arrive;
2862 * if we allow that processing to happen and then sbwait
2863 * we could stall forever with packets sitting in the
2864 * socket if no further packets arrive from the remote
2867 * we want to copy before we've collected all the data
2868 * to satisfy this request to allow the copy to overlap
2869 * the incoming packet processing on an MP system
2871 if (delayed_copy_len
> sorecvmincopy
&&
2872 (delayed_copy_len
> (so
->so_rcv
.sb_hiwat
/ 2))) {
2873 error
= sodelayed_copy(so
, uio
,
2874 &free_list
, &delayed_copy_len
);
2879 m
= so
->so_rcv
.sb_mb
;
2881 nextrecord
= m
->m_nextpkt
;
2883 SB_MB_CHECK(&so
->so_rcv
);
2886 #ifdef MORE_LOCKING_DEBUG
2887 if (so
->so_usecount
<= 1) {
2888 panic("%s: after big while so=%p ref=%d on socket\n",
2889 __func__
, so
, so
->so_usecount
);
2894 if (m
!= NULL
&& pr
->pr_flags
& PR_ATOMIC
) {
2895 if (so
->so_options
& SO_DONTTRUNC
) {
2896 flags
|= MSG_RCVMORE
;
2899 if ((flags
& MSG_PEEK
) == 0)
2900 (void) sbdroprecord(&so
->so_rcv
);
2905 * pru_rcvd below (for TCP) may cause more data to be received
2906 * if the socket lock is dropped prior to sending the ACK; some
2907 * legacy OpenTransport applications don't handle this well
2908 * (if it receives less data than requested while MSG_HAVEMORE
2909 * is set), and so we set the flag now based on what we know
2910 * prior to calling pru_rcvd.
2912 if ((so
->so_options
& SO_WANTMORE
) && so
->so_rcv
.sb_cc
> 0)
2913 flags
|= MSG_HAVEMORE
;
2915 if ((flags
& MSG_PEEK
) == 0) {
2917 so
->so_rcv
.sb_mb
= nextrecord
;
2919 * First part is an inline SB_EMPTY_FIXUP(). Second
2920 * part makes sure sb_lastrecord is up-to-date if
2921 * there is still data in the socket buffer.
2923 if (so
->so_rcv
.sb_mb
== NULL
) {
2924 so
->so_rcv
.sb_mbtail
= NULL
;
2925 so
->so_rcv
.sb_lastrecord
= NULL
;
2926 } else if (nextrecord
->m_nextpkt
== NULL
) {
2927 so
->so_rcv
.sb_lastrecord
= nextrecord
;
2929 SB_MB_CHECK(&so
->so_rcv
);
2931 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 4");
2932 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 4");
2933 if (pr
->pr_flags
& PR_WANTRCVD
&& so
->so_pcb
)
2934 (*pr
->pr_usrreqs
->pru_rcvd
)(so
, flags
);
2937 if (delayed_copy_len
) {
2938 error
= sodelayed_copy(so
, uio
, &free_list
, &delayed_copy_len
);
2942 if (free_list
!= NULL
) {
2943 m_freem_list(free_list
);
2947 postevent(so
, 0, EV_OOB
);
2949 if (orig_resid
== uio_resid(uio
) && orig_resid
&&
2950 (flags
& MSG_EOR
) == 0 && (so
->so_state
& SS_CANTRCVMORE
) == 0) {
2951 sbunlock(&so
->so_rcv
, TRUE
); /* keep socket locked */
2958 #ifdef MORE_LOCKING_DEBUG
2959 if (so
->so_usecount
<= 1) {
2960 panic("%s: release so=%p ref=%d on socket\n", __func__
,
2961 so
, so
->so_usecount
);
2965 if (delayed_copy_len
)
2966 error
= sodelayed_copy(so
, uio
, &free_list
, &delayed_copy_len
);
2968 if (free_list
!= NULL
)
2969 m_freem_list(free_list
);
2971 sbunlock(&so
->so_rcv
, FALSE
); /* will unlock socket */
2973 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_END
, so
, uio_resid(uio
),
2974 so
->so_rcv
.sb_cc
, 0, error
);
2980 * Returns: 0 Success
2984 sodelayed_copy(struct socket
*so
, struct uio
*uio
, struct mbuf
**free_list
,
2985 user_ssize_t
*resid
)
2992 socket_unlock(so
, 0);
2994 while (m
!= NULL
&& error
== 0) {
2995 error
= uiomove(mtod(m
, caddr_t
), (int)m
->m_len
, uio
);
2998 m_freem_list(*free_list
);
3009 * Returns: 0 Success
3012 * <pru_shutdown>:EINVAL
3013 * <pru_shutdown>:EADDRNOTAVAIL[TCP]
3014 * <pru_shutdown>:ENOBUFS[TCP]
3015 * <pru_shutdown>:EMSGSIZE[TCP]
3016 * <pru_shutdown>:EHOSTUNREACH[TCP]
3017 * <pru_shutdown>:ENETUNREACH[TCP]
3018 * <pru_shutdown>:ENETDOWN[TCP]
3019 * <pru_shutdown>:ENOMEM[TCP]
3020 * <pru_shutdown>:EACCES[TCP]
3021 * <pru_shutdown>:EMSGSIZE[TCP]
3022 * <pru_shutdown>:ENOBUFS[TCP]
3023 * <pru_shutdown>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
3024 * <pru_shutdown>:??? [other protocol families]
3027 soshutdown(struct socket
*so
, int how
)
3037 (SS_ISCONNECTED
|SS_ISCONNECTING
|SS_ISDISCONNECTING
)) == 0) {
3040 error
= soshutdownlock(so
, how
);
3042 socket_unlock(so
, 1);
3053 soshutdownlock(struct socket
*so
, int how
)
3055 struct protosw
*pr
= so
->so_proto
;
3058 sflt_notify(so
, sock_evt_shutdown
, &how
);
3060 if (how
!= SHUT_WR
) {
3061 if ((so
->so_state
& SS_CANTRCVMORE
) != 0) {
3062 /* read already shut down */
3067 postevent(so
, 0, EV_RCLOSED
);
3069 if (how
!= SHUT_RD
) {
3070 if ((so
->so_state
& SS_CANTSENDMORE
) != 0) {
3071 /* write already shut down */
3075 error
= (*pr
->pr_usrreqs
->pru_shutdown
)(so
);
3076 postevent(so
, 0, EV_WCLOSED
);
3079 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
3084 sowflush(struct socket
*so
)
3086 struct sockbuf
*sb
= &so
->so_snd
;
3088 lck_mtx_t
*mutex_held
;
3090 * XXX: This code is currently commented out, because we may get here
3091 * as part of sofreelastref(), and at that time, pr_getlock() may no
3092 * longer be able to return us the lock; this will be fixed in future.
3094 if (so
->so_proto
->pr_getlock
!= NULL
)
3095 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
3097 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
3099 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
3103 * Obtain lock on the socket buffer (SB_LOCK). This is required
3104 * to prevent the socket buffer from being unexpectedly altered
3105 * while it is used by another thread in socket send/receive.
3107 * sblock() must not fail here, hence the assertion.
3109 (void) sblock(sb
, SBL_WAIT
| SBL_NOINTR
| SBL_IGNDEFUNCT
);
3110 VERIFY(sb
->sb_flags
& SB_LOCK
);
3112 sb
->sb_flags
&= ~(SB_SEL
|SB_UPCALL
);
3113 sb
->sb_flags
|= SB_DROP
;
3114 sb
->sb_upcall
= NULL
;
3115 sb
->sb_upcallarg
= NULL
;
3117 sbunlock(sb
, TRUE
); /* keep socket locked */
3119 selthreadclear(&sb
->sb_sel
);
3124 sorflush(struct socket
*so
)
3126 struct sockbuf
*sb
= &so
->so_rcv
;
3127 struct protosw
*pr
= so
->so_proto
;
3130 lck_mtx_t
*mutex_held
;
3132 * XXX: This code is currently commented out, because we may get here
3133 * as part of sofreelastref(), and at that time, pr_getlock() may no
3134 * longer be able to return us the lock; this will be fixed in future.
3136 if (so
->so_proto
->pr_getlock
!= NULL
)
3137 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
3139 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
3141 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
3144 sflt_notify(so
, sock_evt_flush_read
, NULL
);
3149 * Obtain lock on the socket buffer (SB_LOCK). This is required
3150 * to prevent the socket buffer from being unexpectedly altered
3151 * while it is used by another thread in socket send/receive.
3153 * sblock() must not fail here, hence the assertion.
3155 (void) sblock(sb
, SBL_WAIT
| SBL_NOINTR
| SBL_IGNDEFUNCT
);
3156 VERIFY(sb
->sb_flags
& SB_LOCK
);
3159 * Copy only the relevant fields from "sb" to "asb" which we
3160 * need for sbrelease() to function. In particular, skip
3161 * sb_sel as it contains the wait queue linkage, which would
3162 * wreak havoc if we were to issue selthreadclear() on "asb".
3163 * Make sure to not carry over SB_LOCK in "asb", as we need
3164 * to acquire it later as part of sbrelease().
3166 bzero(&asb
, sizeof (asb
));
3167 asb
.sb_cc
= sb
->sb_cc
;
3168 asb
.sb_hiwat
= sb
->sb_hiwat
;
3169 asb
.sb_mbcnt
= sb
->sb_mbcnt
;
3170 asb
.sb_mbmax
= sb
->sb_mbmax
;
3171 asb
.sb_ctl
= sb
->sb_ctl
;
3172 asb
.sb_lowat
= sb
->sb_lowat
;
3173 asb
.sb_mb
= sb
->sb_mb
;
3174 asb
.sb_mbtail
= sb
->sb_mbtail
;
3175 asb
.sb_lastrecord
= sb
->sb_lastrecord
;
3176 asb
.sb_so
= sb
->sb_so
;
3177 asb
.sb_flags
= sb
->sb_flags
;
3178 asb
.sb_flags
&= ~(SB_LOCK
|SB_SEL
|SB_KNOTE
|SB_UPCALL
);
3179 asb
.sb_flags
|= SB_DROP
;
3182 * Ideally we'd bzero() these and preserve the ones we need;
3183 * but to do that we'd need to shuffle things around in the
3184 * sockbuf, and we can't do it now because there are KEXTS
3185 * that are directly referring to the socket structure.
3187 * Setting SB_DROP acts as a barrier to prevent further appends.
3188 * Clearing SB_SEL is done for selthreadclear() below.
3197 sb
->sb_mbtail
= NULL
;
3198 sb
->sb_lastrecord
= NULL
;
3199 sb
->sb_timeo
.tv_sec
= 0;
3200 sb
->sb_timeo
.tv_usec
= 0;
3201 sb
->sb_upcall
= NULL
;
3202 sb
->sb_upcallarg
= NULL
;
3203 sb
->sb_flags
&= ~(SB_SEL
|SB_UPCALL
);
3204 sb
->sb_flags
|= SB_DROP
;
3206 sbunlock(sb
, TRUE
); /* keep socket locked */
3209 * Note that selthreadclear() is called on the original "sb" and
3210 * not the local "asb" because of the way wait queue linkage is
3211 * implemented. Given that selwakeup() may be triggered, SB_SEL
3212 * should no longer be set (cleared above.)
3214 selthreadclear(&sb
->sb_sel
);
3216 if ((pr
->pr_flags
& PR_RIGHTS
) && pr
->pr_domain
->dom_dispose
)
3217 (*pr
->pr_domain
->dom_dispose
)(asb
.sb_mb
);
3223 * Perhaps this routine, and sooptcopyout(), below, ought to come in
3224 * an additional variant to handle the case where the option value needs
3225 * to be some kind of integer, but not a specific size.
3226 * In addition to their use here, these functions are also called by the
3227 * protocol-level pr_ctloutput() routines.
3229 * Returns: 0 Success
3234 sooptcopyin(struct sockopt
*sopt
, void *buf
, size_t len
, size_t minlen
)
3239 * If the user gives us more than we wanted, we ignore it,
3240 * but if we don't get the minimum length the caller
3241 * wants, we return EINVAL. On success, sopt->sopt_valsize
3242 * is set to however much we actually retrieved.
3244 if ((valsize
= sopt
->sopt_valsize
) < minlen
)
3247 sopt
->sopt_valsize
= valsize
= len
;
3249 if (sopt
->sopt_p
!= kernproc
)
3250 return (copyin(sopt
->sopt_val
, buf
, valsize
));
3252 bcopy(CAST_DOWN(caddr_t
, sopt
->sopt_val
), buf
, valsize
);
3257 * sooptcopyin_timeval
3258 * Copy in a timeval value into tv_p, and take into account whether the
3259 * the calling process is 64-bit or 32-bit. Moved the sanity checking
3260 * code here so that we can verify the 64-bit tv_sec value before we lose
3261 * the top 32-bits assigning tv64.tv_sec to tv_p->tv_sec.
3264 sooptcopyin_timeval(struct sockopt
*sopt
, struct timeval
*tv_p
)
3268 if (proc_is64bit(sopt
->sopt_p
)) {
3269 struct user64_timeval tv64
;
3271 if (sopt
->sopt_valsize
< sizeof (tv64
))
3274 sopt
->sopt_valsize
= sizeof (tv64
);
3275 if (sopt
->sopt_p
!= kernproc
) {
3276 error
= copyin(sopt
->sopt_val
, &tv64
, sizeof (tv64
));
3280 bcopy(CAST_DOWN(caddr_t
, sopt
->sopt_val
), &tv64
,
3283 if (tv64
.tv_sec
< 0 || tv64
.tv_sec
> LONG_MAX
||
3284 tv64
.tv_usec
< 0 || tv64
.tv_usec
>= 1000000)
3287 tv_p
->tv_sec
= tv64
.tv_sec
;
3288 tv_p
->tv_usec
= tv64
.tv_usec
;
3290 struct user32_timeval tv32
;
3292 if (sopt
->sopt_valsize
< sizeof (tv32
))
3295 sopt
->sopt_valsize
= sizeof (tv32
);
3296 if (sopt
->sopt_p
!= kernproc
) {
3297 error
= copyin(sopt
->sopt_val
, &tv32
, sizeof (tv32
));
3302 bcopy(CAST_DOWN(caddr_t
, sopt
->sopt_val
), &tv32
,
3307 * K64todo "comparison is always false due to
3308 * limited range of data type"
3310 if (tv32
.tv_sec
< 0 || tv32
.tv_sec
> LONG_MAX
||
3311 tv32
.tv_usec
< 0 || tv32
.tv_usec
>= 1000000)
3314 tv_p
->tv_sec
= tv32
.tv_sec
;
3315 tv_p
->tv_usec
= tv32
.tv_usec
;
3321 * Returns: 0 Success
3326 * sooptcopyin:EINVAL
3327 * sooptcopyin:EFAULT
3328 * sooptcopyin_timeval:EINVAL
3329 * sooptcopyin_timeval:EFAULT
3330 * sooptcopyin_timeval:EDOM
3331 * <pr_ctloutput>:EOPNOTSUPP[AF_UNIX]
3332 * <pr_ctloutput>:???w
3333 * sflt_attach_private:??? [whatever a filter author chooses]
3334 * <sf_setoption>:??? [whatever a filter author chooses]
3336 * Notes: Other <pru_listen> returns depend on the protocol family; all
3337 * <sf_listen> returns depend on what the filter author causes
3338 * their filter to return.
3341 sosetoptlock(struct socket
*so
, struct sockopt
*sopt
, int dolock
)
3346 #if CONFIG_MACF_SOCKET
3348 #endif /* MAC_SOCKET */
3350 if (sopt
->sopt_dir
!= SOPT_SET
)
3351 sopt
->sopt_dir
= SOPT_SET
;
3356 if ((so
->so_state
& (SS_CANTRCVMORE
| SS_CANTSENDMORE
)) ==
3357 (SS_CANTRCVMORE
| SS_CANTSENDMORE
) &&
3358 (so
->so_flags
& SOF_NPX_SETOPTSHUT
) == 0) {
3359 /* the socket has been shutdown, no more sockopt's */
3364 error
= sflt_setsockopt(so
, sopt
);
3366 if (error
== EJUSTRETURN
)
3371 if (sopt
->sopt_level
!= SOL_SOCKET
) {
3372 if (so
->so_proto
!= NULL
&&
3373 so
->so_proto
->pr_ctloutput
!= NULL
) {
3374 error
= (*so
->so_proto
->pr_ctloutput
)(so
, sopt
);
3377 error
= ENOPROTOOPT
;
3380 * Allow socket-level (SOL_SOCKET) options to be filtered by
3381 * the protocol layer, if needed. A zero value returned from
3382 * the handler means use default socket-level processing as
3383 * done by the rest of this routine. Otherwise, any other
3384 * return value indicates that the option is unsupported.
3386 if (so
->so_proto
!= NULL
&& (error
= so
->so_proto
->pr_usrreqs
->
3387 pru_socheckopt(so
, sopt
)) != 0)
3391 switch (sopt
->sopt_name
) {
3394 error
= sooptcopyin(sopt
, &l
, sizeof (l
), sizeof (l
));
3398 so
->so_linger
= (sopt
->sopt_name
== SO_LINGER
) ?
3399 l
.l_linger
: l
.l_linger
* hz
;
3401 so
->so_options
|= SO_LINGER
;
3403 so
->so_options
&= ~SO_LINGER
;
3409 case SO_USELOOPBACK
:
3415 case SO_TIMESTAMP_MONOTONIC
:
3418 case SO_WANTOOBFLAG
:
3419 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3424 so
->so_options
|= sopt
->sopt_name
;
3426 so
->so_options
&= ~sopt
->sopt_name
;
3433 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3439 * Values < 1 make no sense for any of these
3440 * options, so disallow them.
3447 switch (sopt
->sopt_name
) {
3450 struct sockbuf
*sb
=
3451 (sopt
->sopt_name
== SO_SNDBUF
) ?
3452 &so
->so_snd
: &so
->so_rcv
;
3453 if (sbreserve(sb
, (u_int32_t
)optval
) == 0) {
3457 sb
->sb_flags
|= SB_USRSIZE
;
3458 sb
->sb_flags
&= ~SB_AUTOSIZE
;
3459 sb
->sb_idealsize
= (u_int32_t
)optval
;
3463 * Make sure the low-water is never greater than
3467 so
->so_snd
.sb_lowat
=
3468 (optval
> so
->so_snd
.sb_hiwat
) ?
3469 so
->so_snd
.sb_hiwat
: optval
;
3472 so
->so_rcv
.sb_lowat
=
3473 (optval
> so
->so_rcv
.sb_hiwat
) ?
3474 so
->so_rcv
.sb_hiwat
: optval
;
3481 error
= sooptcopyin_timeval(sopt
, &tv
);
3485 switch (sopt
->sopt_name
) {
3487 so
->so_snd
.sb_timeo
= tv
;
3490 so
->so_rcv
.sb_timeo
= tv
;
3498 error
= sooptcopyin(sopt
, &nke
, sizeof (nke
),
3503 error
= sflt_attach_internal(so
, nke
.nke_handle
);
3508 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3513 so
->so_flags
|= SOF_NOSIGPIPE
;
3515 so
->so_flags
&= ~SOF_NOSIGPIPE
;
3519 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3524 so
->so_flags
|= SOF_NOADDRAVAIL
;
3526 so
->so_flags
&= ~SOF_NOADDRAVAIL
;
3529 case SO_REUSESHAREUID
:
3530 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3535 so
->so_flags
|= SOF_REUSESHAREUID
;
3537 so
->so_flags
&= ~SOF_REUSESHAREUID
;
3540 case SO_NOTIFYCONFLICT
:
3541 if (kauth_cred_issuser(kauth_cred_get()) == 0) {
3545 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3550 so
->so_flags
|= SOF_NOTIFYCONFLICT
;
3552 so
->so_flags
&= ~SOF_NOTIFYCONFLICT
;
3555 case SO_RESTRICTIONS
:
3556 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3561 error
= so_set_restrictions(so
, optval
);
3565 #if CONFIG_MACF_SOCKET
3566 if ((error
= sooptcopyin(sopt
, &extmac
, sizeof (extmac
),
3567 sizeof (extmac
))) != 0)
3570 error
= mac_setsockopt_label(proc_ucred(sopt
->sopt_p
),
3574 #endif /* MAC_SOCKET */
3577 case SO_UPCALLCLOSEWAIT
:
3578 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3583 so
->so_flags
|= SOF_UPCALLCLOSEWAIT
;
3585 so
->so_flags
&= ~SOF_UPCALLCLOSEWAIT
;
3589 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3594 so
->so_flags
|= SOF_BINDRANDOMPORT
;
3596 so
->so_flags
&= ~SOF_BINDRANDOMPORT
;
3599 case SO_NP_EXTENSIONS
: {
3600 struct so_np_extensions sonpx
;
3602 error
= sooptcopyin(sopt
, &sonpx
, sizeof (sonpx
),
3606 if (sonpx
.npx_mask
& ~SONPX_MASK_VALID
) {
3611 * Only one bit defined for now
3613 if ((sonpx
.npx_mask
& SONPX_SETOPTSHUT
)) {
3614 if ((sonpx
.npx_flags
& SONPX_SETOPTSHUT
))
3615 so
->so_flags
|= SOF_NPX_SETOPTSHUT
;
3617 so
->so_flags
&= ~SOF_NPX_SETOPTSHUT
;
3622 case SO_TRAFFIC_CLASS
: {
3623 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3627 error
= so_set_traffic_class(so
, optval
);
3633 case SO_RECV_TRAFFIC_CLASS
: {
3634 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3639 so
->so_flags
&= ~SOF_RECV_TRAFFIC_CLASS
;
3641 so
->so_flags
|= SOF_RECV_TRAFFIC_CLASS
;
3645 case SO_TRAFFIC_CLASS_DBG
: {
3646 struct so_tcdbg so_tcdbg
;
3648 error
= sooptcopyin(sopt
, &so_tcdbg
,
3649 sizeof (struct so_tcdbg
), sizeof (struct so_tcdbg
));
3652 error
= so_set_tcdbg(so
, &so_tcdbg
);
3658 case SO_PRIVILEGED_TRAFFIC_CLASS
:
3659 error
= priv_check_cred(kauth_cred_get(),
3660 PRIV_NET_PRIVILEGED_TRAFFIC_CLASS
, 0);
3663 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3668 so
->so_flags
&= ~SOF_PRIVILEGED_TRAFFIC_CLASS
;
3670 so
->so_flags
|= SOF_PRIVILEGED_TRAFFIC_CLASS
;
3674 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3676 if (error
!= 0 || (so
->so_flags
& SOF_DEFUNCT
)) {
3682 * Any process can set SO_DEFUNCTOK (clear
3683 * SOF_NODEFUNCT), but only root can clear
3684 * SO_DEFUNCTOK (set SOF_NODEFUNCT).
3687 kauth_cred_issuser(kauth_cred_get()) == 0) {
3692 so
->so_flags
&= ~SOF_NODEFUNCT
;
3694 so
->so_flags
|= SOF_NODEFUNCT
;
3696 if (SOCK_DOM(so
) == PF_INET
||
3697 SOCK_DOM(so
) == PF_INET6
) {
3698 char s
[MAX_IPv6_STR_LEN
];
3699 char d
[MAX_IPv6_STR_LEN
];
3700 struct inpcb
*inp
= sotoinpcb(so
);
3702 SODEFUNCTLOG(("%s[%d]: so 0x%llx [%s %s:%d -> "
3703 "%s:%d] is now marked as %seligible for "
3704 "defunct\n", __func__
, proc_selfpid(),
3705 (uint64_t)VM_KERNEL_ADDRPERM(so
),
3706 (SOCK_TYPE(so
) == SOCK_STREAM
) ?
3707 "TCP" : "UDP", inet_ntop(SOCK_DOM(so
),
3708 ((SOCK_DOM(so
) == PF_INET
) ?
3709 (void *)&inp
->inp_laddr
.s_addr
:
3710 (void *)&inp
->in6p_laddr
), s
, sizeof (s
)),
3711 ntohs(inp
->in6p_lport
),
3712 inet_ntop(SOCK_DOM(so
),
3713 (SOCK_DOM(so
) == PF_INET
) ?
3714 (void *)&inp
->inp_faddr
.s_addr
:
3715 (void *)&inp
->in6p_faddr
, d
, sizeof (d
)),
3716 ntohs(inp
->in6p_fport
),
3717 (so
->so_flags
& SOF_NODEFUNCT
) ?
3720 SODEFUNCTLOG(("%s[%d]: so 0x%llx [%d,%d] is "
3721 "now marked as %seligible for defunct\n",
3722 __func__
, proc_selfpid(),
3723 (uint64_t)VM_KERNEL_ADDRPERM(so
),
3724 SOCK_DOM(so
), SOCK_TYPE(so
),
3725 (so
->so_flags
& SOF_NODEFUNCT
) ?
3731 /* This option is not settable */
3735 case SO_OPPORTUNISTIC
:
3736 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3739 error
= so_set_opportunistic(so
, optval
);
3743 /* This option is handled by lower layer(s) */
3748 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3751 error
= so_set_recv_anyif(so
, optval
);
3754 case SO_TRAFFIC_MGT_BACKGROUND
: {
3755 /* This option is handled by lower layer(s) */
3761 case SO_FLOW_DIVERT_TOKEN
:
3762 error
= flow_divert_token_set(so
, sopt
);
3764 #endif /* FLOW_DIVERT */
3768 if ((error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3769 sizeof (optval
))) != 0)
3772 error
= so_set_effective_pid(so
, optval
, sopt
->sopt_p
);
3775 case SO_DELEGATED_UUID
: {
3778 if ((error
= sooptcopyin(sopt
, &euuid
, sizeof (euuid
),
3779 sizeof (euuid
))) != 0)
3782 error
= so_set_effective_uuid(so
, euuid
, sopt
->sopt_p
);
3787 error
= ENOPROTOOPT
;
3790 if (error
== 0 && so
->so_proto
!= NULL
&&
3791 so
->so_proto
->pr_ctloutput
!= NULL
) {
3792 (void) so
->so_proto
->pr_ctloutput(so
, sopt
);
3797 socket_unlock(so
, 1);
3801 /* Helper routines for getsockopt */
3803 sooptcopyout(struct sockopt
*sopt
, void *buf
, size_t len
)
3811 * Documented get behavior is that we always return a value,
3812 * possibly truncated to fit in the user's buffer.
3813 * Traditional behavior is that we always tell the user
3814 * precisely how much we copied, rather than something useful
3815 * like the total amount we had available for her.
3816 * Note that this interface is not idempotent; the entire answer must
3817 * generated ahead of time.
3819 valsize
= min(len
, sopt
->sopt_valsize
);
3820 sopt
->sopt_valsize
= valsize
;
3821 if (sopt
->sopt_val
!= USER_ADDR_NULL
) {
3822 if (sopt
->sopt_p
!= kernproc
)
3823 error
= copyout(buf
, sopt
->sopt_val
, valsize
);
3825 bcopy(buf
, CAST_DOWN(caddr_t
, sopt
->sopt_val
), valsize
);
3831 sooptcopyout_timeval(struct sockopt
*sopt
, const struct timeval
*tv_p
)
3835 struct user64_timeval tv64
;
3836 struct user32_timeval tv32
;
3841 if (proc_is64bit(sopt
->sopt_p
)) {
3842 len
= sizeof (tv64
);
3843 tv64
.tv_sec
= tv_p
->tv_sec
;
3844 tv64
.tv_usec
= tv_p
->tv_usec
;
3847 len
= sizeof (tv32
);
3848 tv32
.tv_sec
= tv_p
->tv_sec
;
3849 tv32
.tv_usec
= tv_p
->tv_usec
;
3852 valsize
= min(len
, sopt
->sopt_valsize
);
3853 sopt
->sopt_valsize
= valsize
;
3854 if (sopt
->sopt_val
!= USER_ADDR_NULL
) {
3855 if (sopt
->sopt_p
!= kernproc
)
3856 error
= copyout(val
, sopt
->sopt_val
, valsize
);
3858 bcopy(val
, CAST_DOWN(caddr_t
, sopt
->sopt_val
), valsize
);
3866 * <pr_ctloutput>:EOPNOTSUPP[AF_UNIX]
3867 * <pr_ctloutput>:???
3868 * <sf_getoption>:???
3871 sogetoptlock(struct socket
*so
, struct sockopt
*sopt
, int dolock
)
3876 #if CONFIG_MACF_SOCKET
3878 #endif /* MAC_SOCKET */
3880 if (sopt
->sopt_dir
!= SOPT_GET
)
3881 sopt
->sopt_dir
= SOPT_GET
;
3886 error
= sflt_getsockopt(so
, sopt
);
3888 if (error
== EJUSTRETURN
)
3893 if (sopt
->sopt_level
!= SOL_SOCKET
) {
3894 if (so
->so_proto
!= NULL
&&
3895 so
->so_proto
->pr_ctloutput
!= NULL
) {
3896 error
= (*so
->so_proto
->pr_ctloutput
)(so
, sopt
);
3899 error
= ENOPROTOOPT
;
3902 * Allow socket-level (SOL_SOCKET) options to be filtered by
3903 * the protocol layer, if needed. A zero value returned from
3904 * the handler means use default socket-level processing as
3905 * done by the rest of this routine. Otherwise, any other
3906 * return value indicates that the option is unsupported.
3908 if (so
->so_proto
!= NULL
&& (error
= so
->so_proto
->pr_usrreqs
->
3909 pru_socheckopt(so
, sopt
)) != 0)
3913 switch (sopt
->sopt_name
) {
3916 l
.l_onoff
= ((so
->so_options
& SO_LINGER
) ? 1 : 0);
3917 l
.l_linger
= (sopt
->sopt_name
== SO_LINGER
) ?
3918 so
->so_linger
: so
->so_linger
/ hz
;
3919 error
= sooptcopyout(sopt
, &l
, sizeof (l
));
3922 case SO_USELOOPBACK
:
3931 case SO_TIMESTAMP_MONOTONIC
:
3934 case SO_WANTOOBFLAG
:
3935 optval
= so
->so_options
& sopt
->sopt_name
;
3937 error
= sooptcopyout(sopt
, &optval
, sizeof (optval
));
3941 optval
= so
->so_type
;
3945 if (so
->so_proto
->pr_flags
& PR_ATOMIC
) {
3950 m1
= so
->so_rcv
.sb_mb
;
3951 while (m1
!= NULL
) {
3952 if (m1
->m_type
== MT_DATA
||
3953 m1
->m_type
== MT_HEADER
||
3954 m1
->m_type
== MT_OOBDATA
)
3955 pkt_total
+= m1
->m_len
;
3960 optval
= so
->so_rcv
.sb_cc
- so
->so_rcv
.sb_ctl
;
3965 optval
= so
->so_snd
.sb_cc
;
3969 optval
= so
->so_error
;
3974 optval
= so
->so_snd
.sb_hiwat
;
3978 optval
= so
->so_rcv
.sb_hiwat
;
3982 optval
= so
->so_snd
.sb_lowat
;
3986 optval
= so
->so_rcv
.sb_lowat
;
3991 tv
= (sopt
->sopt_name
== SO_SNDTIMEO
?
3992 so
->so_snd
.sb_timeo
: so
->so_rcv
.sb_timeo
);
3994 error
= sooptcopyout_timeval(sopt
, &tv
);
3998 optval
= (so
->so_flags
& SOF_NOSIGPIPE
);
4002 optval
= (so
->so_flags
& SOF_NOADDRAVAIL
);
4005 case SO_REUSESHAREUID
:
4006 optval
= (so
->so_flags
& SOF_REUSESHAREUID
);
4010 case SO_NOTIFYCONFLICT
:
4011 optval
= (so
->so_flags
& SOF_NOTIFYCONFLICT
);
4014 case SO_RESTRICTIONS
:
4015 optval
= so_get_restrictions(so
);
4019 #if CONFIG_MACF_SOCKET
4020 if ((error
= sooptcopyin(sopt
, &extmac
, sizeof (extmac
),
4021 sizeof (extmac
))) != 0 ||
4022 (error
= mac_socket_label_get(proc_ucred(
4023 sopt
->sopt_p
), so
, &extmac
)) != 0)
4026 error
= sooptcopyout(sopt
, &extmac
, sizeof (extmac
));
4029 #endif /* MAC_SOCKET */
4033 #if CONFIG_MACF_SOCKET
4034 if ((error
= sooptcopyin(sopt
, &extmac
, sizeof (extmac
),
4035 sizeof (extmac
))) != 0 ||
4036 (error
= mac_socketpeer_label_get(proc_ucred(
4037 sopt
->sopt_p
), so
, &extmac
)) != 0)
4040 error
= sooptcopyout(sopt
, &extmac
, sizeof (extmac
));
4043 #endif /* MAC_SOCKET */
4046 #ifdef __APPLE_API_PRIVATE
4047 case SO_UPCALLCLOSEWAIT
:
4048 optval
= (so
->so_flags
& SOF_UPCALLCLOSEWAIT
);
4052 optval
= (so
->so_flags
& SOF_BINDRANDOMPORT
);
4055 case SO_NP_EXTENSIONS
: {
4056 struct so_np_extensions sonpx
;
4058 sonpx
.npx_flags
= (so
->so_flags
& SOF_NPX_SETOPTSHUT
) ?
4059 SONPX_SETOPTSHUT
: 0;
4060 sonpx
.npx_mask
= SONPX_MASK_VALID
;
4062 error
= sooptcopyout(sopt
, &sonpx
,
4063 sizeof (struct so_np_extensions
));
4067 case SO_TRAFFIC_CLASS
:
4068 optval
= so
->so_traffic_class
;
4071 case SO_RECV_TRAFFIC_CLASS
:
4072 optval
= (so
->so_flags
& SOF_RECV_TRAFFIC_CLASS
);
4075 case SO_TRAFFIC_CLASS_STATS
:
4076 error
= sooptcopyout(sopt
, &so
->so_tc_stats
,
4077 sizeof (so
->so_tc_stats
));
4080 case SO_TRAFFIC_CLASS_DBG
:
4081 error
= sogetopt_tcdbg(so
, sopt
);
4084 case SO_PRIVILEGED_TRAFFIC_CLASS
:
4085 optval
= (so
->so_flags
& SOF_PRIVILEGED_TRAFFIC_CLASS
);
4089 optval
= !(so
->so_flags
& SOF_NODEFUNCT
);
4093 optval
= (so
->so_flags
& SOF_DEFUNCT
);
4096 case SO_OPPORTUNISTIC
:
4097 optval
= so_get_opportunistic(so
);
4101 /* This option is not gettable */
4106 optval
= so_get_recv_anyif(so
);
4109 case SO_TRAFFIC_MGT_BACKGROUND
:
4110 /* This option is handled by lower layer(s) */
4111 if (so
->so_proto
!= NULL
&&
4112 so
->so_proto
->pr_ctloutput
!= NULL
) {
4113 (void) so
->so_proto
->pr_ctloutput(so
, sopt
);
4118 case SO_FLOW_DIVERT_TOKEN
:
4119 error
= flow_divert_token_get(so
, sopt
);
4121 #endif /* FLOW_DIVERT */
4124 error
= ENOPROTOOPT
;
4130 socket_unlock(so
, 1);
4135 * The size limits on our soopt_getm is different from that on FreeBSD.
4136 * We limit the size of options to MCLBYTES. This will have to change
4137 * if we need to define options that need more space than MCLBYTES.
4140 soopt_getm(struct sockopt
*sopt
, struct mbuf
**mp
)
4142 struct mbuf
*m
, *m_prev
;
4143 int sopt_size
= sopt
->sopt_valsize
;
4146 if (sopt_size
<= 0 || sopt_size
> MCLBYTES
)
4149 how
= sopt
->sopt_p
!= kernproc
? M_WAIT
: M_DONTWAIT
;
4150 MGET(m
, how
, MT_DATA
);
4153 if (sopt_size
> MLEN
) {
4155 if ((m
->m_flags
& M_EXT
) == 0) {
4159 m
->m_len
= min(MCLBYTES
, sopt_size
);
4161 m
->m_len
= min(MLEN
, sopt_size
);
4163 sopt_size
-= m
->m_len
;
4167 while (sopt_size
> 0) {
4168 MGET(m
, how
, MT_DATA
);
4173 if (sopt_size
> MLEN
) {
4175 if ((m
->m_flags
& M_EXT
) == 0) {
4180 m
->m_len
= min(MCLBYTES
, sopt_size
);
4182 m
->m_len
= min(MLEN
, sopt_size
);
4184 sopt_size
-= m
->m_len
;
4191 /* copyin sopt data into mbuf chain */
4193 soopt_mcopyin(struct sockopt
*sopt
, struct mbuf
*m
)
4195 struct mbuf
*m0
= m
;
4197 if (sopt
->sopt_val
== USER_ADDR_NULL
)
4199 while (m
!= NULL
&& sopt
->sopt_valsize
>= m
->m_len
) {
4200 if (sopt
->sopt_p
!= kernproc
) {
4203 error
= copyin(sopt
->sopt_val
, mtod(m
, char *),
4210 bcopy(CAST_DOWN(caddr_t
, sopt
->sopt_val
),
4211 mtod(m
, char *), m
->m_len
);
4213 sopt
->sopt_valsize
-= m
->m_len
;
4214 sopt
->sopt_val
+= m
->m_len
;
4217 /* should be allocated enoughly at ip6_sooptmcopyin() */
4219 panic("soopt_mcopyin");
4225 /* copyout mbuf chain data into soopt */
4227 soopt_mcopyout(struct sockopt
*sopt
, struct mbuf
*m
)
4229 struct mbuf
*m0
= m
;
4232 if (sopt
->sopt_val
== USER_ADDR_NULL
)
4234 while (m
!= NULL
&& sopt
->sopt_valsize
>= m
->m_len
) {
4235 if (sopt
->sopt_p
!= kernproc
) {
4238 error
= copyout(mtod(m
, char *), sopt
->sopt_val
,
4245 bcopy(mtod(m
, char *),
4246 CAST_DOWN(caddr_t
, sopt
->sopt_val
), m
->m_len
);
4248 sopt
->sopt_valsize
-= m
->m_len
;
4249 sopt
->sopt_val
+= m
->m_len
;
4250 valsize
+= m
->m_len
;
4254 /* enough soopt buffer should be given from user-land */
4258 sopt
->sopt_valsize
= valsize
;
4263 sohasoutofband(struct socket
*so
)
4265 if (so
->so_pgid
< 0)
4266 gsignal(-so
->so_pgid
, SIGURG
);
4267 else if (so
->so_pgid
> 0)
4268 proc_signal(so
->so_pgid
, SIGURG
);
4269 selwakeup(&so
->so_rcv
.sb_sel
);
4273 sopoll(struct socket
*so
, int events
, kauth_cred_t cred
, void * wql
)
4275 #pragma unused(cred)
4276 struct proc
*p
= current_proc();
4280 so_update_last_owner_locked(so
, PROC_NULL
);
4281 so_update_policy(so
);
4283 if (events
& (POLLIN
| POLLRDNORM
))
4285 revents
|= events
& (POLLIN
| POLLRDNORM
);
4287 if (events
& (POLLOUT
| POLLWRNORM
))
4288 if (sowriteable(so
))
4289 revents
|= events
& (POLLOUT
| POLLWRNORM
);
4291 if (events
& (POLLPRI
| POLLRDBAND
))
4292 if (so
->so_oobmark
|| (so
->so_state
& SS_RCVATMARK
))
4293 revents
|= events
& (POLLPRI
| POLLRDBAND
);
4296 if (events
& (POLLIN
| POLLPRI
| POLLRDNORM
| POLLRDBAND
)) {
4298 * Darwin sets the flag first,
4299 * BSD calls selrecord first
4301 so
->so_rcv
.sb_flags
|= SB_SEL
;
4302 selrecord(p
, &so
->so_rcv
.sb_sel
, wql
);
4305 if (events
& (POLLOUT
| POLLWRNORM
)) {
4307 * Darwin sets the flag first,
4308 * BSD calls selrecord first
4310 so
->so_snd
.sb_flags
|= SB_SEL
;
4311 selrecord(p
, &so
->so_snd
.sb_sel
, wql
);
4315 socket_unlock(so
, 1);
4320 soo_kqfilter(struct fileproc
*fp
, struct knote
*kn
, vfs_context_t ctx
)
4323 #if !CONFIG_MACF_SOCKET
4325 #endif /* MAC_SOCKET */
4326 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
4330 so_update_last_owner_locked(so
, PROC_NULL
);
4331 so_update_policy(so
);
4333 #if CONFIG_MACF_SOCKET
4334 if (mac_socket_check_kqfilter(proc_ucred(vfs_context_proc(ctx
)),
4336 socket_unlock(so
, 1);
4339 #endif /* MAC_SOCKET */
4341 switch (kn
->kn_filter
) {
4343 kn
->kn_fop
= &soread_filtops
;
4344 skl
= &so
->so_rcv
.sb_sel
.si_note
;
4347 kn
->kn_fop
= &sowrite_filtops
;
4348 skl
= &so
->so_snd
.sb_sel
.si_note
;
4351 kn
->kn_fop
= &sock_filtops
;
4352 skl
= &so
->so_klist
;
4355 socket_unlock(so
, 1);
4359 if (KNOTE_ATTACH(skl
, kn
)) {
4360 switch (kn
->kn_filter
) {
4362 so
->so_rcv
.sb_flags
|= SB_KNOTE
;
4365 so
->so_snd
.sb_flags
|= SB_KNOTE
;
4368 so
->so_flags
|= SOF_KNOTE
;
4371 socket_unlock(so
, 1);
4375 socket_unlock(so
, 1);
4380 filt_sordetach(struct knote
*kn
)
4382 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
4385 if (so
->so_rcv
.sb_flags
& SB_KNOTE
)
4386 if (KNOTE_DETACH(&so
->so_rcv
.sb_sel
.si_note
, kn
))
4387 so
->so_rcv
.sb_flags
&= ~SB_KNOTE
;
4388 socket_unlock(so
, 1);
4393 filt_soread(struct knote
*kn
, long hint
)
4395 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
4397 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
4400 if (so
->so_options
& SO_ACCEPTCONN
) {
4404 * Radar 6615193 handle the listen case dynamically
4405 * for kqueue read filter. This allows to call listen()
4406 * after registering the kqueue EVFILT_READ.
4409 kn
->kn_data
= so
->so_qlen
;
4410 isempty
= ! TAILQ_EMPTY(&so
->so_comp
);
4412 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
4413 socket_unlock(so
, 1);
4418 /* socket isn't a listener */
4420 kn
->kn_data
= so
->so_rcv
.sb_cc
- so
->so_rcv
.sb_ctl
;
4422 if (so
->so_oobmark
) {
4423 if (kn
->kn_flags
& EV_OOBAND
) {
4424 kn
->kn_data
-= so
->so_oobmark
;
4425 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
4426 socket_unlock(so
, 1);
4429 kn
->kn_data
= so
->so_oobmark
;
4430 kn
->kn_flags
|= EV_OOBAND
;
4432 if (so
->so_state
& SS_CANTRCVMORE
) {
4433 kn
->kn_flags
|= EV_EOF
;
4434 kn
->kn_fflags
= so
->so_error
;
4435 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
4436 socket_unlock(so
, 1);
4441 if (so
->so_state
& SS_RCVATMARK
) {
4442 if (kn
->kn_flags
& EV_OOBAND
) {
4443 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
4444 socket_unlock(so
, 1);
4447 kn
->kn_flags
|= EV_OOBAND
;
4448 } else if (kn
->kn_flags
& EV_OOBAND
) {
4450 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
4451 socket_unlock(so
, 1);
4455 if (so
->so_error
) { /* temporary udp error */
4456 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
4457 socket_unlock(so
, 1);
4461 int64_t lowwat
= so
->so_rcv
.sb_lowat
;
4462 if (kn
->kn_sfflags
& NOTE_LOWAT
) {
4463 if (kn
->kn_sdata
> so
->so_rcv
.sb_hiwat
)
4464 lowwat
= so
->so_rcv
.sb_hiwat
;
4465 else if (kn
->kn_sdata
> lowwat
)
4466 lowwat
= kn
->kn_sdata
;
4469 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
4470 socket_unlock(so
, 1);
4472 return ((kn
->kn_flags
& EV_OOBAND
) || kn
->kn_data
>= lowwat
);
4476 filt_sowdetach(struct knote
*kn
)
4478 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
4481 if (so
->so_snd
.sb_flags
& SB_KNOTE
)
4482 if (KNOTE_DETACH(&so
->so_snd
.sb_sel
.si_note
, kn
))
4483 so
->so_snd
.sb_flags
&= ~SB_KNOTE
;
4484 socket_unlock(so
, 1);
4488 so_wait_for_if_feedback(struct socket
*so
)
4490 if ((SOCK_DOM(so
) == PF_INET
|| SOCK_DOM(so
) == PF_INET6
) &&
4491 (so
->so_state
& SS_ISCONNECTED
)) {
4492 struct inpcb
*inp
= sotoinpcb(so
);
4493 if (INP_WAIT_FOR_IF_FEEDBACK(inp
))
4501 filt_sowrite(struct knote
*kn
, long hint
)
4503 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
4506 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
4509 kn
->kn_data
= sbspace(&so
->so_snd
);
4510 if (so
->so_state
& SS_CANTSENDMORE
) {
4511 kn
->kn_flags
|= EV_EOF
;
4512 kn
->kn_fflags
= so
->so_error
;
4516 if (so
->so_error
) { /* temporary udp error */
4520 if (((so
->so_state
& SS_ISCONNECTED
) == 0) &&
4521 (so
->so_proto
->pr_flags
& PR_CONNREQUIRED
)) {
4525 int64_t lowwat
= so
->so_snd
.sb_lowat
;
4526 if (kn
->kn_sfflags
& NOTE_LOWAT
) {
4527 if (kn
->kn_sdata
> so
->so_snd
.sb_hiwat
)
4528 lowwat
= so
->so_snd
.sb_hiwat
;
4529 else if (kn
->kn_sdata
> lowwat
)
4530 lowwat
= kn
->kn_sdata
;
4532 if (kn
->kn_data
>= lowwat
) {
4533 if ((so
->so_flags
& SOF_NOTSENT_LOWAT
) != 0) {
4534 ret
= tcp_notsent_lowat_check(so
);
4539 if (so_wait_for_if_feedback(so
))
4542 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
4543 socket_unlock(so
, 1);
4548 filt_sockdetach(struct knote
*kn
)
4550 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
4553 if ((so
->so_flags
& SOF_KNOTE
) != 0)
4554 if (KNOTE_DETACH(&so
->so_klist
, kn
))
4555 so
->so_flags
&= ~SOF_KNOTE
;
4556 socket_unlock(so
, 1);
4560 filt_sockev(struct knote
*kn
, long hint
)
4562 int ret
= 0, locked
= 0;
4563 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
4564 long ev_hint
= (hint
& SO_FILT_HINT_EV
);
4566 if ((hint
& SO_FILT_HINT_LOCKED
) == 0) {
4571 if (ev_hint
& SO_FILT_HINT_CONNRESET
) {
4572 if (kn
->kn_sfflags
& NOTE_CONNRESET
)
4573 kn
->kn_fflags
|= NOTE_CONNRESET
;
4575 if (ev_hint
& SO_FILT_HINT_TIMEOUT
) {
4576 if (kn
->kn_sfflags
& NOTE_TIMEOUT
)
4577 kn
->kn_fflags
|= NOTE_TIMEOUT
;
4579 if (ev_hint
& SO_FILT_HINT_NOSRCADDR
) {
4580 if (kn
->kn_sfflags
& NOTE_NOSRCADDR
)
4581 kn
->kn_fflags
|= NOTE_NOSRCADDR
;
4583 if (ev_hint
& SO_FILT_HINT_IFDENIED
) {
4584 if ((kn
->kn_sfflags
& NOTE_IFDENIED
))
4585 kn
->kn_fflags
|= NOTE_IFDENIED
;
4587 if (ev_hint
& SO_FILT_HINT_KEEPALIVE
) {
4588 if (kn
->kn_sfflags
& NOTE_KEEPALIVE
)
4589 kn
->kn_fflags
|= NOTE_KEEPALIVE
;
4591 if (ev_hint
& SO_FILT_HINT_ADAPTIVE_WTIMO
) {
4592 if (kn
->kn_sfflags
& NOTE_ADAPTIVE_WTIMO
)
4593 kn
->kn_fflags
|= NOTE_ADAPTIVE_WTIMO
;
4595 if (ev_hint
& SO_FILT_HINT_ADAPTIVE_RTIMO
) {
4596 if (kn
->kn_sfflags
& NOTE_ADAPTIVE_RTIMO
)
4597 kn
->kn_fflags
|= NOTE_ADAPTIVE_RTIMO
;
4599 if (ev_hint
& SO_FILT_HINT_CONNECTED
) {
4600 if (kn
->kn_sfflags
& NOTE_CONNECTED
)
4601 kn
->kn_fflags
|= NOTE_CONNECTED
;
4603 if (ev_hint
& SO_FILT_HINT_DISCONNECTED
) {
4604 if (kn
->kn_sfflags
& NOTE_DISCONNECTED
)
4605 kn
->kn_fflags
|= NOTE_DISCONNECTED
;
4607 if (ev_hint
& SO_FILT_HINT_CONNINFO_UPDATED
) {
4608 if (so
->so_proto
!= NULL
&&
4609 (so
->so_proto
->pr_flags
& PR_EVCONNINFO
) &&
4610 (kn
->kn_sfflags
& NOTE_CONNINFO_UPDATED
))
4611 kn
->kn_fflags
|= NOTE_CONNINFO_UPDATED
;
4614 if ((kn
->kn_sfflags
& NOTE_READCLOSED
) &&
4615 (so
->so_state
& SS_CANTRCVMORE
))
4616 kn
->kn_fflags
|= NOTE_READCLOSED
;
4618 if ((kn
->kn_sfflags
& NOTE_WRITECLOSED
) &&
4619 (so
->so_state
& SS_CANTSENDMORE
))
4620 kn
->kn_fflags
|= NOTE_WRITECLOSED
;
4622 if ((kn
->kn_sfflags
& NOTE_SUSPEND
) &&
4623 ((ev_hint
& SO_FILT_HINT_SUSPEND
) ||
4624 (so
->so_flags
& SOF_SUSPENDED
))) {
4625 kn
->kn_fflags
&= ~(NOTE_SUSPEND
| NOTE_RESUME
);
4626 kn
->kn_fflags
|= NOTE_SUSPEND
;
4629 if ((kn
->kn_sfflags
& NOTE_RESUME
) &&
4630 ((ev_hint
& SO_FILT_HINT_RESUME
) ||
4631 (so
->so_flags
& SOF_SUSPENDED
) == 0)) {
4632 kn
->kn_fflags
&= ~(NOTE_SUSPEND
| NOTE_RESUME
);
4633 kn
->kn_fflags
|= NOTE_RESUME
;
4636 if (so
->so_error
!= 0) {
4638 kn
->kn_data
= so
->so_error
;
4639 kn
->kn_flags
|= EV_EOF
;
4641 get_sockev_state(so
, (u_int32_t
*)&(kn
->kn_data
));
4644 if (kn
->kn_fflags
!= 0)
4648 socket_unlock(so
, 1);
4654 get_sockev_state(struct socket
*so
, u_int32_t
*statep
)
4656 u_int32_t state
= *(statep
);
4658 if (so
->so_state
& SS_ISCONNECTED
)
4659 state
|= SOCKEV_CONNECTED
;
4661 state
&= ~(SOCKEV_CONNECTED
);
4662 state
|= ((so
->so_state
& SS_ISDISCONNECTED
) ? SOCKEV_DISCONNECTED
: 0);
4666 #define SO_LOCK_HISTORY_STR_LEN \
4667 (2 * SO_LCKDBG_MAX * (2 + (2 * sizeof (void *)) + 1) + 1)
4669 __private_extern__
const char *
4670 solockhistory_nr(struct socket
*so
)
4674 static char lock_history_str
[SO_LOCK_HISTORY_STR_LEN
];
4676 bzero(lock_history_str
, sizeof (lock_history_str
));
4677 for (i
= SO_LCKDBG_MAX
- 1; i
>= 0; i
--) {
4678 n
+= snprintf(lock_history_str
+ n
,
4679 SO_LOCK_HISTORY_STR_LEN
- n
, "%p:%p ",
4680 so
->lock_lr
[(so
->next_lock_lr
+ i
) % SO_LCKDBG_MAX
],
4681 so
->unlock_lr
[(so
->next_unlock_lr
+ i
) % SO_LCKDBG_MAX
]);
4683 return (lock_history_str
);
4687 socket_lock(struct socket
*so
, int refcount
)
4692 lr_saved
= __builtin_return_address(0);
4694 if (so
->so_proto
->pr_lock
) {
4695 error
= (*so
->so_proto
->pr_lock
)(so
, refcount
, lr_saved
);
4697 #ifdef MORE_LOCKING_DEBUG
4698 lck_mtx_assert(so
->so_proto
->pr_domain
->dom_mtx
,
4699 LCK_MTX_ASSERT_NOTOWNED
);
4701 lck_mtx_lock(so
->so_proto
->pr_domain
->dom_mtx
);
4704 so
->lock_lr
[so
->next_lock_lr
] = lr_saved
;
4705 so
->next_lock_lr
= (so
->next_lock_lr
+1) % SO_LCKDBG_MAX
;
4712 socket_unlock(struct socket
*so
, int refcount
)
4716 lck_mtx_t
*mutex_held
;
4718 lr_saved
= __builtin_return_address(0);
4720 if (so
->so_proto
== NULL
) {
4721 panic("%s: null so_proto so=%p\n", __func__
, so
);
4725 if (so
&& so
->so_proto
->pr_unlock
) {
4726 error
= (*so
->so_proto
->pr_unlock
)(so
, refcount
, lr_saved
);
4728 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
4729 #ifdef MORE_LOCKING_DEBUG
4730 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
4732 so
->unlock_lr
[so
->next_unlock_lr
] = lr_saved
;
4733 so
->next_unlock_lr
= (so
->next_unlock_lr
+1) % SO_LCKDBG_MAX
;
4736 if (so
->so_usecount
<= 0) {
4737 panic("%s: bad refcount=%d so=%p (%d, %d, %d) "
4738 "lrh=%s", __func__
, so
->so_usecount
, so
,
4739 SOCK_DOM(so
), so
->so_type
,
4740 SOCK_PROTO(so
), solockhistory_nr(so
));
4745 if (so
->so_usecount
== 0)
4746 sofreelastref(so
, 1);
4748 lck_mtx_unlock(mutex_held
);
4754 /* Called with socket locked, will unlock socket */
4756 sofree(struct socket
*so
)
4758 lck_mtx_t
*mutex_held
;
4760 if (so
->so_proto
->pr_getlock
!= NULL
)
4761 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
4763 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
4764 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
4766 sofreelastref(so
, 0);
4770 soreference(struct socket
*so
)
4772 socket_lock(so
, 1); /* locks & take one reference on socket */
4773 socket_unlock(so
, 0); /* unlock only */
4777 sodereference(struct socket
*so
)
4780 socket_unlock(so
, 1);
4784 * Set or clear SOF_MULTIPAGES on the socket to enable or disable the
4785 * possibility of using jumbo clusters. Caller must ensure to hold
4789 somultipages(struct socket
*so
, boolean_t set
)
4792 so
->so_flags
|= SOF_MULTIPAGES
;
4794 so
->so_flags
&= ~SOF_MULTIPAGES
;
4798 so_isdstlocal(struct socket
*so
) {
4800 struct inpcb
*inp
= (struct inpcb
*)so
->so_pcb
;
4802 if (SOCK_DOM(so
) == PF_INET
)
4803 return (inaddr_local(inp
->inp_faddr
));
4804 else if (SOCK_DOM(so
) == PF_INET6
)
4805 return (in6addr_local(&inp
->in6p_faddr
));
4811 sosetdefunct(struct proc
*p
, struct socket
*so
, int level
, boolean_t noforce
)
4813 struct sockbuf
*rcv
, *snd
;
4814 int err
= 0, defunct
;
4819 defunct
= (so
->so_flags
& SOF_DEFUNCT
);
4821 if (!(snd
->sb_flags
& rcv
->sb_flags
& SB_DROP
)) {
4822 panic("%s: SB_DROP not set", __func__
);
4828 if (so
->so_flags
& SOF_NODEFUNCT
) {
4831 SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) "
4832 "so 0x%llx [%d,%d] is not eligible for defunct "
4833 "(%d)\n", __func__
, proc_selfpid(), proc_pid(p
),
4834 level
, (uint64_t)VM_KERNEL_ADDRPERM(so
),
4835 SOCK_DOM(so
), SOCK_TYPE(so
), err
));
4838 so
->so_flags
&= ~SOF_NODEFUNCT
;
4839 SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so 0x%llx "
4840 "[%d,%d] defunct by force\n", __func__
, proc_selfpid(),
4841 proc_pid(p
), level
, (uint64_t)VM_KERNEL_ADDRPERM(so
),
4842 SOCK_DOM(so
), SOCK_TYPE(so
)));
4845 so
->so_flags
|= SOF_DEFUNCT
;
4847 /* Prevent further data from being appended to the socket buffers */
4848 snd
->sb_flags
|= SB_DROP
;
4849 rcv
->sb_flags
|= SB_DROP
;
4851 /* Flush any existing data in the socket buffers */
4852 if (rcv
->sb_cc
!= 0) {
4853 rcv
->sb_flags
&= ~SB_SEL
;
4854 selthreadclear(&rcv
->sb_sel
);
4857 if (snd
->sb_cc
!= 0) {
4858 snd
->sb_flags
&= ~SB_SEL
;
4859 selthreadclear(&snd
->sb_sel
);
4864 SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so 0x%llx [%d,%d] %s "
4865 "defunct\n", __func__
, proc_selfpid(), proc_pid(p
), level
,
4866 (uint64_t)VM_KERNEL_ADDRPERM(so
), SOCK_DOM(so
), SOCK_TYPE(so
),
4867 defunct
? "is already" : "marked as"));
4873 sodefunct(struct proc
*p
, struct socket
*so
, int level
)
4875 struct sockbuf
*rcv
, *snd
;
4877 if (!(so
->so_flags
& SOF_DEFUNCT
)) {
4878 panic("%s improperly called", __func__
);
4881 if (so
->so_state
& SS_DEFUNCT
)
4887 if (SOCK_DOM(so
) == PF_INET
|| SOCK_DOM(so
) == PF_INET6
) {
4888 char s
[MAX_IPv6_STR_LEN
];
4889 char d
[MAX_IPv6_STR_LEN
];
4890 struct inpcb
*inp
= sotoinpcb(so
);
4892 SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so 0x%llx [%s "
4893 "%s:%d -> %s:%d] is now defunct [rcv_si 0x%x, snd_si 0x%x, "
4894 "rcv_fl 0x%x, snd_fl 0x%x]\n", __func__
, proc_selfpid(),
4895 proc_pid(p
), level
, (uint64_t)VM_KERNEL_ADDRPERM(so
),
4896 (SOCK_TYPE(so
) == SOCK_STREAM
) ? "TCP" : "UDP",
4897 inet_ntop(SOCK_DOM(so
), ((SOCK_DOM(so
) == PF_INET
) ?
4898 (void *)&inp
->inp_laddr
.s_addr
: (void *)&inp
->in6p_laddr
),
4899 s
, sizeof (s
)), ntohs(inp
->in6p_lport
),
4900 inet_ntop(SOCK_DOM(so
), (SOCK_DOM(so
) == PF_INET
) ?
4901 (void *)&inp
->inp_faddr
.s_addr
: (void *)&inp
->in6p_faddr
,
4902 d
, sizeof (d
)), ntohs(inp
->in6p_fport
),
4903 (uint32_t)rcv
->sb_sel
.si_flags
,
4904 (uint32_t)snd
->sb_sel
.si_flags
,
4905 rcv
->sb_flags
, snd
->sb_flags
));
4907 SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so 0x%llx "
4908 "[%d,%d] is now defunct [rcv_si 0x%x, snd_si 0x%x, "
4909 "rcv_fl 0x%x, snd_fl 0x%x]\n", __func__
, proc_selfpid(),
4910 proc_pid(p
), level
, (uint64_t)VM_KERNEL_ADDRPERM(so
),
4911 SOCK_DOM(so
), SOCK_TYPE(so
), (uint32_t)rcv
->sb_sel
.si_flags
,
4912 (uint32_t)snd
->sb_sel
.si_flags
, rcv
->sb_flags
,
4917 * Unwedge threads blocked on sbwait() and sb_lock().
4922 if (rcv
->sb_flags
& SB_LOCK
)
4923 sbunlock(rcv
, TRUE
); /* keep socket locked */
4924 if (snd
->sb_flags
& SB_LOCK
)
4925 sbunlock(snd
, TRUE
); /* keep socket locked */
4928 * Flush the buffers and disconnect. We explicitly call shutdown
4929 * on both data directions to ensure that SS_CANT{RCV,SEND}MORE
4930 * states are set for the socket. This would also flush out data
4931 * hanging off the receive list of this socket.
4933 (void) soshutdownlock(so
, SHUT_RD
);
4934 (void) soshutdownlock(so
, SHUT_WR
);
4935 (void) sodisconnectlocked(so
);
4938 * Explicitly handle connectionless-protocol disconnection
4939 * and release any remaining data in the socket buffers.
4941 if (!(so
->so_flags
& SS_ISDISCONNECTED
))
4942 (void) soisdisconnected(so
);
4944 if (so
->so_error
== 0)
4945 so
->so_error
= EBADF
;
4947 if (rcv
->sb_cc
!= 0) {
4948 rcv
->sb_flags
&= ~SB_SEL
;
4949 selthreadclear(&rcv
->sb_sel
);
4952 if (snd
->sb_cc
!= 0) {
4953 snd
->sb_flags
&= ~SB_SEL
;
4954 selthreadclear(&snd
->sb_sel
);
4957 so
->so_state
|= SS_DEFUNCT
;
4963 __private_extern__
int
4964 so_set_recv_anyif(struct socket
*so
, int optval
)
4969 if (SOCK_DOM(so
) == PF_INET
|| SOCK_DOM(so
) == PF_INET6
) {
4971 if (SOCK_DOM(so
) == PF_INET
) {
4974 sotoinpcb(so
)->inp_flags
|= INP_RECV_ANYIF
;
4976 sotoinpcb(so
)->inp_flags
&= ~INP_RECV_ANYIF
;
4982 __private_extern__
int
4983 so_get_recv_anyif(struct socket
*so
)
4988 if (SOCK_DOM(so
) == PF_INET
|| SOCK_DOM(so
) == PF_INET6
) {
4990 if (SOCK_DOM(so
) == PF_INET
) {
4992 ret
= (sotoinpcb(so
)->inp_flags
& INP_RECV_ANYIF
) ? 1 : 0;
4999 so_set_restrictions(struct socket
*so
, uint32_t vals
)
5001 int nocell_old
, nocell_new
;
5005 * Deny-type restrictions are trapdoors; once set they cannot be
5006 * unset for the lifetime of the socket. This allows them to be
5007 * issued by a framework on behalf of the application without
5008 * having to worry that they can be undone.
5010 * Note here that socket-level restrictions overrides any protocol
5011 * level restrictions. For instance, SO_RESTRICT_DENY_CELLULAR
5012 * socket restriction issued on the socket has a higher precendence
5013 * than INP_NO_IFT_CELLULAR. The latter is affected by the UUID
5014 * policy PROC_UUID_NO_CELLULAR for unrestricted sockets only,
5015 * i.e. when SO_RESTRICT_DENY_CELLULAR has not been issued.
5017 nocell_old
= (so
->so_restrictions
& SO_RESTRICT_DENY_CELLULAR
);
5018 so
->so_restrictions
|= (vals
& (SO_RESTRICT_DENY_IN
|
5019 SO_RESTRICT_DENY_OUT
| SO_RESTRICT_DENY_CELLULAR
));
5020 nocell_new
= (so
->so_restrictions
& SO_RESTRICT_DENY_CELLULAR
);
5022 /* other than deny cellular, there's nothing more to do */
5023 if ((nocell_new
- nocell_old
) == 0)
5026 /* we can only set, not clear restrictions */
5027 VERIFY((nocell_new
- nocell_old
) > 0);
5030 if (SOCK_DOM(so
) == PF_INET
|| SOCK_DOM(so
) == PF_INET6
) {
5032 if (SOCK_DOM(so
) == PF_INET
) {
5034 /* if deny cellular is now set, do what's needed for INPCB */
5035 inp_set_nocellular(sotoinpcb(so
));
5042 so_get_restrictions(struct socket
*so
)
5044 return (so
->so_restrictions
& (SO_RESTRICT_DENY_IN
|
5045 SO_RESTRICT_DENY_OUT
| SO_RESTRICT_DENY_CELLULAR
));
5048 struct sockaddr_entry
*
5049 sockaddrentry_alloc(int how
)
5051 struct sockaddr_entry
*se
;
5053 se
= (how
== M_WAITOK
) ? zalloc(se_zone
) : zalloc_noblock(se_zone
);
5055 bzero(se
, se_zone_size
);
5061 sockaddrentry_free(struct sockaddr_entry
*se
)
5063 if (se
->se_addr
!= NULL
) {
5064 FREE(se
->se_addr
, M_SONAME
);
5070 struct sockaddr_entry
*
5071 sockaddrentry_dup(const struct sockaddr_entry
*src_se
, int how
)
5073 struct sockaddr_entry
*dst_se
;
5075 dst_se
= sockaddrentry_alloc(how
);
5076 if (dst_se
!= NULL
) {
5077 int len
= src_se
->se_addr
->sa_len
;
5079 MALLOC(dst_se
->se_addr
, struct sockaddr
*,
5080 len
, M_SONAME
, how
| M_ZERO
);
5081 if (dst_se
->se_addr
!= NULL
) {
5082 bcopy(src_se
->se_addr
, dst_se
->se_addr
, len
);
5084 sockaddrentry_free(dst_se
);
5092 struct sockaddr_list
*
5093 sockaddrlist_alloc(int how
)
5095 struct sockaddr_list
*sl
;
5097 sl
= (how
== M_WAITOK
) ? zalloc(sl_zone
) : zalloc_noblock(sl_zone
);
5099 bzero(sl
, sl_zone_size
);
5100 TAILQ_INIT(&sl
->sl_head
);
5106 sockaddrlist_free(struct sockaddr_list
*sl
)
5108 struct sockaddr_entry
*se
, *tse
;
5110 TAILQ_FOREACH_SAFE(se
, &sl
->sl_head
, se_link
, tse
) {
5111 sockaddrlist_remove(sl
, se
);
5112 sockaddrentry_free(se
);
5114 VERIFY(sl
->sl_cnt
== 0 && TAILQ_EMPTY(&sl
->sl_head
));
5119 sockaddrlist_insert(struct sockaddr_list
*sl
, struct sockaddr_entry
*se
)
5121 VERIFY(!(se
->se_flags
& SEF_ATTACHED
));
5122 se
->se_flags
|= SEF_ATTACHED
;
5123 TAILQ_INSERT_TAIL(&sl
->sl_head
, se
, se_link
);
5125 VERIFY(sl
->sl_cnt
!= 0);
5129 sockaddrlist_remove(struct sockaddr_list
*sl
, struct sockaddr_entry
*se
)
5131 VERIFY(se
->se_flags
& SEF_ATTACHED
);
5132 se
->se_flags
&= ~SEF_ATTACHED
;
5133 VERIFY(sl
->sl_cnt
!= 0);
5135 TAILQ_REMOVE(&sl
->sl_head
, se
, se_link
);
5138 struct sockaddr_list
*
5139 sockaddrlist_dup(const struct sockaddr_list
*src_sl
, int how
)
5141 struct sockaddr_entry
*src_se
, *tse
;
5142 struct sockaddr_list
*dst_sl
;
5144 dst_sl
= sockaddrlist_alloc(how
);
5148 TAILQ_FOREACH_SAFE(src_se
, &src_sl
->sl_head
, se_link
, tse
) {
5149 struct sockaddr_entry
*dst_se
;
5151 if (src_se
->se_addr
== NULL
)
5154 dst_se
= sockaddrentry_dup(src_se
, how
);
5155 if (dst_se
== NULL
) {
5156 sockaddrlist_free(dst_sl
);
5160 sockaddrlist_insert(dst_sl
, dst_se
);
5162 VERIFY(src_sl
->sl_cnt
== dst_sl
->sl_cnt
);
5168 so_set_effective_pid(struct socket
*so
, int epid
, struct proc
*p
)
5170 struct proc
*ep
= PROC_NULL
;
5173 /* pid 0 is reserved for kernel */
5180 * If this is an in-kernel socket, prevent its delegate
5181 * association from changing unless the socket option is
5182 * coming from within the kernel itself.
5184 if (so
->last_pid
== 0 && p
!= kernproc
) {
5190 * If this is issued by a process that's recorded as the
5191 * real owner of the socket, or if the pid is the same as
5192 * the process's own pid, then proceed. Otherwise ensure
5193 * that the issuing process has the necessary privileges.
5195 if (epid
!= so
->last_pid
|| epid
!= proc_pid(p
)) {
5196 if ((error
= priv_check_cred(kauth_cred_get(),
5197 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE
, 0))) {
5203 /* Find the process that corresponds to the effective pid */
5204 if ((ep
= proc_find(epid
)) == PROC_NULL
) {
5210 * If a process tries to delegate the socket to itself, then
5211 * there's really nothing to do; treat it as a way for the
5212 * delegate association to be cleared. Note that we check
5213 * the passed-in proc rather than calling proc_selfpid(),
5214 * as we need to check the process issuing the socket option
5215 * which could be kernproc. Given that we don't allow 0 for
5216 * effective pid, it means that a delegated in-kernel socket
5217 * stays delegated during its lifetime (which is probably OK.)
5219 if (epid
== proc_pid(p
)) {
5220 so
->so_flags
&= ~SOF_DELEGATED
;
5223 uuid_clear(so
->e_uuid
);
5225 so
->so_flags
|= SOF_DELEGATED
;
5226 so
->e_upid
= proc_uniqueid(ep
);
5227 so
->e_pid
= proc_pid(ep
);
5228 proc_getexecutableuuid(ep
, so
->e_uuid
, sizeof (so
->e_uuid
));
5232 if (error
== 0 && net_io_policy_log
) {
5235 uuid_unparse(so
->e_uuid
, buf
);
5236 log(LOG_DEBUG
, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d (%s) "
5237 "euuid %s%s\n", __func__
, proc_name_address(p
),
5238 proc_pid(p
), (uint64_t)VM_KERNEL_ADDRPERM(so
), SOCK_DOM(so
),
5239 SOCK_TYPE(so
), so
->e_pid
, proc_name_address(ep
), buf
,
5240 ((so
->so_flags
& SOF_DELEGATED
) ? " [delegated]" : ""));
5241 } else if (error
!= 0 && net_io_policy_log
) {
5242 log(LOG_ERR
, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d (%s) "
5243 "ERROR (%d)\n", __func__
, proc_name_address(p
),
5244 proc_pid(p
), (uint64_t)VM_KERNEL_ADDRPERM(so
), SOCK_DOM(so
),
5245 SOCK_TYPE(so
), epid
, (ep
== PROC_NULL
) ? "PROC_NULL" :
5246 proc_name_address(ep
), error
);
5249 if (ep
!= PROC_NULL
)
5256 so_set_effective_uuid(struct socket
*so
, uuid_t euuid
, struct proc
*p
)
5262 /* UUID must not be all-zeroes (reserved for kernel) */
5263 if (uuid_is_null(euuid
)) {
5269 * If this is an in-kernel socket, prevent its delegate
5270 * association from changing unless the socket option is
5271 * coming from within the kernel itself.
5273 if (so
->last_pid
== 0 && p
!= kernproc
) {
5278 /* Get the UUID of the issuing process */
5279 proc_getexecutableuuid(p
, uuid
, sizeof (uuid
));
5282 * If this is issued by a process that's recorded as the
5283 * real owner of the socket, or if the uuid is the same as
5284 * the process's own uuid, then proceed. Otherwise ensure
5285 * that the issuing process has the necessary privileges.
5287 if (uuid_compare(euuid
, so
->last_uuid
) != 0 ||
5288 uuid_compare(euuid
, uuid
) != 0) {
5289 if ((error
= priv_check_cred(kauth_cred_get(),
5290 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE
, 0))) {
5297 * If a process tries to delegate the socket to itself, then
5298 * there's really nothing to do; treat it as a way for the
5299 * delegate association to be cleared. Note that we check
5300 * the uuid of the passed-in proc rather than that of the
5301 * current process, as we need to check the process issuing
5302 * the socket option which could be kernproc itself. Given
5303 * that we don't allow 0 for effective uuid, it means that
5304 * a delegated in-kernel socket stays delegated during its
5305 * lifetime (which is okay.)
5307 if (uuid_compare(euuid
, uuid
) == 0) {
5308 so
->so_flags
&= ~SOF_DELEGATED
;
5311 uuid_clear(so
->e_uuid
);
5313 so
->so_flags
|= SOF_DELEGATED
;
5315 * Unlike so_set_effective_pid(), we only have the UUID
5316 * here and the process ID is not known. Inherit the
5317 * real {pid,upid} of the socket.
5319 so
->e_upid
= so
->last_upid
;
5320 so
->e_pid
= so
->last_pid
;
5321 uuid_copy(so
->e_uuid
, euuid
);
5325 if (error
== 0 && net_io_policy_log
) {
5326 uuid_unparse(so
->e_uuid
, buf
);
5327 log(LOG_DEBUG
, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d "
5328 "euuid %s%s\n", __func__
, proc_name_address(p
), proc_pid(p
),
5329 (uint64_t)VM_KERNEL_ADDRPERM(so
), SOCK_DOM(so
),
5330 SOCK_TYPE(so
), so
->e_pid
, buf
,
5331 ((so
->so_flags
& SOF_DELEGATED
) ? " [delegated]" : ""));
5332 } else if (error
!= 0 && net_io_policy_log
) {
5333 uuid_unparse(euuid
, buf
);
5334 log(LOG_DEBUG
, "%s[%s,%d]: so 0x%llx [%d,%d] euuid %s "
5335 "ERROR (%d)\n", __func__
, proc_name_address(p
), proc_pid(p
),
5336 (uint64_t)VM_KERNEL_ADDRPERM(so
), SOCK_DOM(so
),
5337 SOCK_TYPE(so
), buf
, error
);
5344 netpolicy_post_msg(uint32_t ev_code
, struct netpolicy_event_data
*ev_data
,
5345 uint32_t ev_datalen
)
5347 struct kev_msg ev_msg
;
5350 * A netpolicy event always starts with a netpolicy_event_data
5351 * structure, but the caller can provide for a longer event
5352 * structure to post, depending on the event code.
5354 VERIFY(ev_data
!= NULL
&& ev_datalen
>= sizeof (*ev_data
));
5356 bzero(&ev_msg
, sizeof (ev_msg
));
5357 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
5358 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
5359 ev_msg
.kev_subclass
= KEV_NETPOLICY_SUBCLASS
;
5360 ev_msg
.event_code
= ev_code
;
5362 ev_msg
.dv
[0].data_ptr
= ev_data
;
5363 ev_msg
.dv
[0].data_length
= ev_datalen
;
5365 kev_post_msg(&ev_msg
);