2 * Copyright (c) 1998-2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
30 * Copyright (c) 1982, 1986, 1988, 1990, 1993
31 * The Regents of the University of California. All rights reserved.
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. All advertising materials mentioning features or use of this software
42 * must display the following acknowledgement:
43 * This product includes software developed by the University of
44 * California, Berkeley and its contributors.
45 * 4. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
62 * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.16 2001/06/14 20:46:06 ume Exp $
65 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
66 * support for mandatory and extensible security protections. This notice
67 * is included in support of clause 2.2 (b) of the Apple Public License,
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/filedesc.h>
75 #include <sys/proc_internal.h>
76 #include <sys/kauth.h>
77 #include <sys/file_internal.h>
78 #include <sys/fcntl.h>
79 #include <sys/malloc.h>
81 #include <sys/domain.h>
82 #include <sys/kernel.h>
83 #include <sys/event.h>
85 #include <sys/protosw.h>
86 #include <sys/socket.h>
87 #include <sys/socketvar.h>
88 #include <sys/resourcevar.h>
89 #include <sys/signalvar.h>
90 #include <sys/sysctl.h>
93 #include <sys/kdebug.h>
97 #include <net/route.h>
98 #include <net/ntstat.h>
99 #include <netinet/in.h>
100 #include <netinet/in_pcb.h>
101 #include <netinet/ip6.h>
102 #include <netinet6/ip6_var.h>
103 #include <kern/zalloc.h>
104 #include <kern/locks.h>
105 #include <machine/limits.h>
106 #include <libkern/OSAtomic.h>
107 #include <pexpert/pexpert.h>
108 #include <kern/assert.h>
109 #include <kern/task.h>
110 #include <sys/kpi_mbuf.h>
111 #include <sys/mcache.h>
114 #include <security/mac.h>
115 #include <security/mac_framework.h>
120 int so_cache_timeouts
= 0;
121 int so_cache_max_freed
= 0;
122 int cached_sock_count
= 0;
123 __private_extern__
int max_cached_sock_count
= MAX_CACHED_SOCKETS
;
124 struct socket
*socket_cache_head
= 0;
125 struct socket
*socket_cache_tail
= 0;
126 u_int32_t so_cache_time
= 0;
127 int so_cache_init_done
= 0;
128 struct zone
*so_cache_zone
;
130 static lck_grp_t
*so_cache_mtx_grp
;
131 static lck_attr_t
*so_cache_mtx_attr
;
132 static lck_grp_attr_t
*so_cache_mtx_grp_attr
;
133 lck_mtx_t
*so_cache_mtx
;
135 #include <machine/limits.h>
137 static void filt_sordetach(struct knote
*kn
);
138 static int filt_soread(struct knote
*kn
, long hint
);
139 static void filt_sowdetach(struct knote
*kn
);
140 static int filt_sowrite(struct knote
*kn
, long hint
);
141 static void filt_sockdetach(struct knote
*kn
);
142 static int filt_sockev(struct knote
*kn
, long hint
);
145 sooptcopyin_timeval(struct sockopt
*sopt
, struct timeval
* tv_p
);
148 sooptcopyout_timeval(struct sockopt
*sopt
, const struct timeval
* tv_p
);
150 static struct filterops soread_filtops
= {
152 .f_detach
= filt_sordetach
,
153 .f_event
= filt_soread
,
155 static struct filterops sowrite_filtops
= {
157 .f_detach
= filt_sowdetach
,
158 .f_event
= filt_sowrite
,
160 static struct filterops sock_filtops
= {
162 .f_detach
= filt_sockdetach
,
163 .f_event
= filt_sockev
,
166 #define EVEN_MORE_LOCKING_DEBUG 0
167 int socket_debug
= 0;
168 int socket_zone
= M_SOCKET
;
169 so_gen_t so_gencnt
; /* generation count for sockets */
171 MALLOC_DEFINE(M_SONAME
, "soname", "socket name");
172 MALLOC_DEFINE(M_PCB
, "pcb", "protocol control block");
174 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
175 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
176 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
177 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
178 #define DBG_FNC_SOSEND NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1)
179 #define DBG_FNC_SORECEIVE NETDBG_CODE(DBG_NETSOCK, (8 << 8))
180 #define DBG_FNC_SOSHUTDOWN NETDBG_CODE(DBG_NETSOCK, (9 << 8))
182 #define MAX_SOOPTGETM_SIZE (128 * MCLBYTES)
185 SYSCTL_DECL(_kern_ipc
);
187 int somaxconn
= SOMAXCONN
;
188 SYSCTL_INT(_kern_ipc
, KIPC_SOMAXCONN
, somaxconn
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &somaxconn
, 0, "");
190 /* Should we get a maximum also ??? */
191 static int sosendmaxchain
= 65536;
192 static int sosendminchain
= 16384;
193 static int sorecvmincopy
= 16384;
194 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sosendminchain
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &sosendminchain
,
196 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sorecvmincopy
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &sorecvmincopy
,
200 * Set to enable jumbo clusters (if available) for large writes when
201 * the socket is marked with SOF_MULTIPAGES; see below.
204 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sosendjcl
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &sosendjcl
, 0, "");
207 * Set this to ignore SOF_MULTIPAGES and use jumbo clusters for large
208 * writes on the socket for all protocols on any network interfaces,
209 * depending upon sosendjcl above. Be extra careful when setting this
210 * to 1, because sending down packets that cross physical pages down to
211 * broken drivers (those that falsely assume that the physical pages
212 * are contiguous) might lead to system panics or silent data corruption.
213 * When set to 0, the system will respect SOF_MULTIPAGES, which is set
214 * only for TCP sockets whose outgoing interface is IFNET_MULTIPAGES
215 * capable. Set this to 1 only for testing/debugging purposes.
217 int sosendjcl_ignore_capab
= 0;
218 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sosendjcl_ignore_capab
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
219 &sosendjcl_ignore_capab
, 0, "");
221 int sodefunctlog
= 0;
222 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sodefunctlog
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
223 &sodefunctlog
, 0, "");
225 int sothrottlelog
= 0;
226 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sothrottlelog
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
227 &sothrottlelog
, 0, "");
230 * Socket operation routines.
231 * These routines are called by the routines in
232 * sys_socket.c or from a system process, and
233 * implement the semantics of socket operations by
234 * switching out to the protocol specific routines.
238 extern void postevent(struct socket
*, struct sockbuf
*, int);
239 extern void evsofree(struct socket
*);
240 extern int tcp_notsent_lowat_check(struct socket
*so
);
242 /* TODO: these should be in header file */
243 extern int get_inpcb_str_size(void);
244 extern int get_tcp_str_size(void);
245 extern struct domain
*pffinddomain(int);
246 extern struct protosw
*pffindprotonotype(int, int);
247 extern int soclose_locked(struct socket
*);
248 extern int soo_kqfilter(struct fileproc
*, struct knote
*, struct proc
*);
252 vm_size_t so_cache_zone_element_size
;
254 static int sodelayed_copy(struct socket
*, struct uio
*, struct mbuf
**, int *);
255 static void cached_sock_alloc(struct socket
**, int);
256 static void cached_sock_free(struct socket
*);
257 static void so_cache_timer(void *);
259 void soclose_wait_locked(struct socket
*so
);
260 int so_isdstlocal(struct socket
*so
);
263 * SOTCDB_NO_DSCP is set by default, to prevent the networking stack from
264 * setting the DSCP code on the packet based on the service class; see
265 * <rdar://problem/11277343> for details.
267 __private_extern__ u_int32_t sotcdb
= SOTCDB_NO_DSCP
;
268 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sotcdb
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
276 if (so_cache_init_done
) {
277 printf("socketinit: already called...\n");
281 PE_parse_boot_argn("socket_debug", &socket_debug
, sizeof (socket_debug
));
284 * allocate lock group attribute and group for socket cache mutex
286 so_cache_mtx_grp_attr
= lck_grp_attr_alloc_init();
288 so_cache_mtx_grp
= lck_grp_alloc_init("so_cache",
289 so_cache_mtx_grp_attr
);
292 * allocate the lock attribute for socket cache mutex
294 so_cache_mtx_attr
= lck_attr_alloc_init();
296 so_cache_init_done
= 1;
298 /* cached sockets mutex */
299 so_cache_mtx
= lck_mtx_alloc_init(so_cache_mtx_grp
, so_cache_mtx_attr
);
301 if (so_cache_mtx
== NULL
)
302 return; /* we're hosed... */
304 str_size
= (vm_size_t
)(sizeof (struct socket
) + 4 +
305 get_inpcb_str_size() + 4 + get_tcp_str_size());
307 so_cache_zone
= zinit(str_size
, 120000*str_size
, 8192, "socache zone");
308 zone_change(so_cache_zone
, Z_CALLERACCT
, FALSE
);
309 zone_change(so_cache_zone
, Z_NOENCRYPT
, TRUE
);
311 printf("cached_sock_alloc -- so_cache_zone size is %x\n", str_size
);
313 timeout(so_cache_timer
, NULL
, (SO_CACHE_FLUSH_INTERVAL
* hz
));
315 so_cache_zone_element_size
= str_size
;
319 _CASSERT(_SO_TC_MAX
== SO_TC_STATS_MAX
);
321 socket_tclass_init();
323 socket_flowadv_init();
327 cached_sock_alloc(struct socket
**so
, int waitok
)
330 register uintptr_t offset
;
332 lck_mtx_lock(so_cache_mtx
);
334 if (cached_sock_count
) {
336 *so
= socket_cache_head
;
338 panic("cached_sock_alloc: cached sock is null");
340 socket_cache_head
= socket_cache_head
->cache_next
;
341 if (socket_cache_head
)
342 socket_cache_head
->cache_prev
= 0;
344 socket_cache_tail
= 0;
346 lck_mtx_unlock(so_cache_mtx
);
348 temp
= (*so
)->so_saved_pcb
;
349 bzero((caddr_t
)*so
, sizeof (struct socket
));
351 kprintf("cached_sock_alloc - retreiving cached sock %p - "
352 "count == %d\n", *so
, cached_sock_count
);
354 (*so
)->so_saved_pcb
= temp
;
355 (*so
)->cached_in_sock_layer
= 1;
358 kprintf("Allocating cached sock %p from memory\n", *so
);
361 lck_mtx_unlock(so_cache_mtx
);
364 *so
= (struct socket
*)zalloc(so_cache_zone
);
366 *so
= (struct socket
*)zalloc_noblock(so_cache_zone
);
371 bzero((caddr_t
)*so
, sizeof (struct socket
));
374 * Define offsets for extra structures into our single block of
375 * memory. Align extra structures on longword boundaries.
378 offset
= (uintptr_t) *so
;
379 offset
+= sizeof (struct socket
);
381 offset
= ALIGN(offset
);
383 (*so
)->so_saved_pcb
= (caddr_t
)offset
;
384 offset
+= get_inpcb_str_size();
386 offset
= ALIGN(offset
);
388 ((struct inpcb
*)(void *)(*so
)->so_saved_pcb
)->inp_saved_ppcb
=
391 kprintf("Allocating cached socket - %p, pcb=%p tcpcb=%p\n",
392 *so
, (*so
)->so_saved_pcb
,
393 ((struct inpcb
*)(*so
)->so_saved_pcb
)->inp_saved_ppcb
);
397 (*so
)->cached_in_sock_layer
= 1;
401 cached_sock_free(struct socket
*so
)
404 lck_mtx_lock(so_cache_mtx
);
406 if (++cached_sock_count
> max_cached_sock_count
) {
408 lck_mtx_unlock(so_cache_mtx
);
410 kprintf("Freeing overflowed cached socket %p\n", so
);
412 zfree(so_cache_zone
, so
);
415 kprintf("Freeing socket %p into cache\n", so
);
417 if (so_cache_hw
< cached_sock_count
)
418 so_cache_hw
= cached_sock_count
;
420 so
->cache_next
= socket_cache_head
;
422 if (socket_cache_head
)
423 socket_cache_head
->cache_prev
= so
;
425 socket_cache_tail
= so
;
427 so
->cache_timestamp
= so_cache_time
;
428 socket_cache_head
= so
;
429 lck_mtx_unlock(so_cache_mtx
);
433 kprintf("Freed cached sock %p into cache - count is %d\n",
434 so
, cached_sock_count
);
439 so_update_last_owner_locked(
443 if (so
->last_pid
!= 0)
446 self
= current_proc();
450 so
->last_upid
= proc_uniqueid(self
);
451 so
->last_pid
= proc_pid(self
);
457 so_cache_timer(__unused
void *dummy
)
459 register struct socket
*p
;
460 register int n_freed
= 0;
462 lck_mtx_lock(so_cache_mtx
);
466 while ((p
= socket_cache_tail
)) {
467 if ((so_cache_time
- p
->cache_timestamp
) < SO_CACHE_TIME_LIMIT
)
472 if ((socket_cache_tail
= p
->cache_prev
))
473 p
->cache_prev
->cache_next
= 0;
474 if (--cached_sock_count
== 0)
475 socket_cache_head
= 0;
477 zfree(so_cache_zone
, p
);
479 if (++n_freed
>= SO_CACHE_MAX_FREE_BATCH
) {
480 so_cache_max_freed
++;
484 lck_mtx_unlock(so_cache_mtx
);
486 timeout(so_cache_timer
, NULL
, (SO_CACHE_FLUSH_INTERVAL
* hz
));
488 #endif /* __APPLE__ */
491 * Get a socket structure from our zone, and initialize it.
492 * We don't implement `waitok' yet (see comments in uipc_domain.c).
493 * Note that it would probably be better to allocate socket
494 * and PCB at the same time, but I'm not convinced that all
495 * the protocols can be easily modified to do this.
498 soalloc(int waitok
, int dom
, int type
)
502 if ((dom
== PF_INET
) && (type
== SOCK_STREAM
)) {
503 cached_sock_alloc(&so
, waitok
);
505 MALLOC_ZONE(so
, struct socket
*, sizeof (*so
), socket_zone
,
508 bzero(so
, sizeof (*so
));
510 /* XXX race condition for reentrant kernel */
511 //###LD Atomic add for so_gencnt
513 so
->so_gencnt
= ++so_gencnt
;
514 so
->so_zone
= socket_zone
;
515 #if CONFIG_MACF_SOCKET
516 /* Convert waitok to M_WAITOK/M_NOWAIT for MAC Framework. */
517 if (mac_socket_label_init(so
, !waitok
) != 0) {
521 #endif /* MAC_SOCKET */
533 * <pru_attach>:ENOBUFS[AF_UNIX]
534 * <pru_attach>:ENOBUFS[TCP]
535 * <pru_attach>:ENOMEM[TCP]
536 * <pru_attach>:EISCONN[TCP]
537 * <pru_attach>:??? [other protocol families, IPSEC]
540 socreate(int dom
, struct socket
**aso
, int type
, int proto
)
542 struct proc
*p
= current_proc();
543 register struct protosw
*prp
;
544 register struct socket
*so
;
545 register int error
= 0;
548 extern int tcpconsdebug
;
551 prp
= pffindproto(dom
, proto
, type
);
553 prp
= pffindtype(dom
, type
);
555 if (prp
== 0 || prp
->pr_usrreqs
->pru_attach
== 0) {
556 if (pffinddomain(dom
) == NULL
) {
557 return (EAFNOSUPPORT
);
560 if (pffindprotonotype(dom
, proto
) != NULL
) {
564 return (EPROTONOSUPPORT
);
566 if (prp
->pr_type
!= type
)
568 so
= soalloc(1, dom
, type
);
572 TAILQ_INIT(&so
->so_incomp
);
573 TAILQ_INIT(&so
->so_comp
);
575 so
->last_upid
= proc_uniqueid(p
);
576 so
->last_pid
= proc_pid(p
);
578 so
->so_cred
= kauth_cred_proc_ref(p
);
579 if (!suser(kauth_cred_get(), NULL
))
580 so
->so_state
= SS_PRIV
;
584 so
->so_rcv
.sb_flags
|= SB_RECV
; /* XXX */
585 so
->so_rcv
.sb_so
= so
->so_snd
.sb_so
= so
;
587 so
->next_lock_lr
= 0;
588 so
->next_unlock_lr
= 0;
590 #if CONFIG_MACF_SOCKET
591 mac_socket_label_associate(kauth_cred_get(), so
);
592 #endif /* MAC_SOCKET */
594 //### Attachement will create the per pcb lock if necessary and increase refcount
596 * for creation, make sure it's done before
597 * socket is inserted in lists
601 error
= (*prp
->pr_usrreqs
->pru_attach
)(so
, proto
, p
);
605 * If so_pcb is not zero, the socket will be leaked,
606 * so protocol attachment handler must be coded carefuly
608 so
->so_state
|= SS_NOFDREF
;
610 sofreelastref(so
, 1); /* will deallocate the socket */
614 prp
->pr_domain
->dom_refs
++;
615 TAILQ_INIT(&so
->so_evlist
);
617 /* Attach socket filters for this protocol */
620 if (tcpconsdebug
== 2)
621 so
->so_options
|= SO_DEBUG
;
624 so_set_default_traffic_class(so
);
626 * If this is a background thread/task, mark the socket as such.
628 if (proc_get_self_isbackground() != 0) {
629 socket_set_traffic_mgt_flags(so
, TRAFFIC_MGT_SO_BACKGROUND
);
630 so
->so_background_thread
= current_thread();
635 * Don't mark Unix domain or system sockets as eligible for defunct by default.
639 so
->so_flags
|= SOF_NODEFUNCT
;
651 * <pru_bind>:EINVAL Invalid argument [COMMON_START]
652 * <pru_bind>:EAFNOSUPPORT Address family not supported
653 * <pru_bind>:EADDRNOTAVAIL Address not available.
654 * <pru_bind>:EINVAL Invalid argument
655 * <pru_bind>:EAFNOSUPPORT Address family not supported [notdef]
656 * <pru_bind>:EACCES Permission denied
657 * <pru_bind>:EADDRINUSE Address in use
658 * <pru_bind>:EAGAIN Resource unavailable, try again
659 * <pru_bind>:EPERM Operation not permitted
663 * Notes: It's not possible to fully enumerate the return codes above,
664 * since socket filter authors and protocol family authors may
665 * not choose to limit their error returns to those listed, even
666 * though this may result in some software operating incorrectly.
668 * The error codes which are enumerated above are those known to
669 * be returned by the tcp_usr_bind function supplied.
672 sobind(struct socket
*so
, struct sockaddr
*nam
)
674 struct proc
*p
= current_proc();
678 VERIFY(so
->so_usecount
> 1);
679 so_update_last_owner_locked(so
, p
);
682 * If this is a bind request on a socket that has been marked
683 * as inactive, reject it now before we go any further.
685 if (so
->so_flags
& SOF_DEFUNCT
) {
687 SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] (%d)\n",
688 __func__
, proc_pid(p
), so
, INP_SOCKAF(so
), INP_SOCKTYPE(so
),
694 error
= sflt_bind(so
, nam
);
697 error
= (*so
->so_proto
->pr_usrreqs
->pru_bind
)(so
, nam
, p
);
699 socket_unlock(so
, 1);
701 if (error
== EJUSTRETURN
)
708 sodealloc(struct socket
*so
)
710 kauth_cred_unref(&so
->so_cred
);
712 /* Remove any filters */
715 so
->so_gencnt
= ++so_gencnt
;
717 #if CONFIG_MACF_SOCKET
718 mac_socket_label_destroy(so
);
719 #endif /* MAC_SOCKET */
720 if (so
->cached_in_sock_layer
== 1) {
721 cached_sock_free(so
);
723 if (so
->cached_in_sock_layer
== -1)
724 panic("sodealloc: double dealloc: so=%p\n", so
);
725 so
->cached_in_sock_layer
= -1;
726 FREE_ZONE(so
, sizeof (*so
), so
->so_zone
);
734 * <pru_listen>:EINVAL[AF_UNIX]
735 * <pru_listen>:EINVAL[TCP]
736 * <pru_listen>:EADDRNOTAVAIL[TCP] Address not available.
737 * <pru_listen>:EINVAL[TCP] Invalid argument
738 * <pru_listen>:EAFNOSUPPORT[TCP] Address family not supported [notdef]
739 * <pru_listen>:EACCES[TCP] Permission denied
740 * <pru_listen>:EADDRINUSE[TCP] Address in use
741 * <pru_listen>:EAGAIN[TCP] Resource unavailable, try again
742 * <pru_listen>:EPERM[TCP] Operation not permitted
745 * Notes: Other <pru_listen> returns depend on the protocol family; all
746 * <sf_listen> returns depend on what the filter author causes
747 * their filter to return.
750 solisten(struct socket
*so
, int backlog
)
752 struct proc
*p
= current_proc();
757 if (so
->so_proto
== NULL
) {
761 if ((so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) == 0) {
767 * If the listen request is made on a socket that is not fully
768 * disconnected, or on a socket that has been marked as inactive,
769 * reject the request now.
772 (SS_ISCONNECTED
|SS_ISCONNECTING
|SS_ISDISCONNECTING
)) ||
773 (so
->so_flags
& SOF_DEFUNCT
)) {
775 if (so
->so_flags
& SOF_DEFUNCT
) {
776 SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] (%d)\n",
777 __func__
, proc_pid(p
), so
, INP_SOCKAF(so
),
778 INP_SOCKTYPE(so
), error
));
783 if ((so
->so_restrictions
& SO_RESTRICT_DENYIN
) != 0) {
788 error
= sflt_listen(so
);
791 error
= (*so
->so_proto
->pr_usrreqs
->pru_listen
)(so
, p
);
795 if (error
== EJUSTRETURN
)
800 if (TAILQ_EMPTY(&so
->so_comp
))
801 so
->so_options
|= SO_ACCEPTCONN
;
803 * POSIX: The implementation may have an upper limit on the length of
804 * the listen queue-either global or per accepting socket. If backlog
805 * exceeds this limit, the length of the listen queue is set to the
808 * If listen() is called with a backlog argument value that is less
809 * than 0, the function behaves as if it had been called with a backlog
810 * argument value of 0.
812 * A backlog argument of 0 may allow the socket to accept connections,
813 * in which case the length of the listen queue may be set to an
814 * implementation-defined minimum value.
816 if (backlog
<= 0 || backlog
> somaxconn
)
819 so
->so_qlimit
= backlog
;
821 socket_unlock(so
, 1);
826 sofreelastref(struct socket
*so
, int dealloc
)
828 struct socket
*head
= so
->so_head
;
830 /* Assume socket is locked */
832 if ((!(so
->so_flags
& SOF_PCBCLEARING
)) ||
833 ((so
->so_state
& SS_NOFDREF
) == 0)) {
835 selthreadclear(&so
->so_snd
.sb_sel
);
836 selthreadclear(&so
->so_rcv
.sb_sel
);
837 so
->so_rcv
.sb_flags
&= ~SB_UPCALL
;
838 so
->so_snd
.sb_flags
&= ~SB_UPCALL
;
843 socket_lock(head
, 1);
844 if (so
->so_state
& SS_INCOMP
) {
845 TAILQ_REMOVE(&head
->so_incomp
, so
, so_list
);
847 } else if (so
->so_state
& SS_COMP
) {
849 * We must not decommission a socket that's
850 * on the accept(2) queue. If we do, then
851 * accept(2) may hang after select(2) indicated
852 * that the listening socket was ready.
855 selthreadclear(&so
->so_snd
.sb_sel
);
856 selthreadclear(&so
->so_rcv
.sb_sel
);
857 so
->so_rcv
.sb_flags
&= ~SB_UPCALL
;
858 so
->so_snd
.sb_flags
&= ~SB_UPCALL
;
860 socket_unlock(head
, 1);
863 panic("sofree: not queued");
866 so
->so_state
&= ~SS_INCOMP
;
868 socket_unlock(head
, 1);
871 selthreadclear(&so
->so_snd
.sb_sel
);
872 sbrelease(&so
->so_snd
);
876 /* 3932268: disable upcall */
877 so
->so_rcv
.sb_flags
&= ~SB_UPCALL
;
878 so
->so_snd
.sb_flags
&= ~SB_UPCALL
;
885 soclose_wait_locked(struct socket
*so
)
887 lck_mtx_t
*mutex_held
;
889 if (so
->so_proto
->pr_getlock
!= NULL
)
890 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
892 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
893 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
896 * Double check here and return if there's no outstanding upcall;
897 * otherwise proceed further only if SOF_UPCALLCLOSEWAIT is set.
899 if (!so
->so_upcallusecount
|| !(so
->so_flags
& SOF_UPCALLCLOSEWAIT
))
901 so
->so_rcv
.sb_flags
&= ~SB_UPCALL
;
902 so
->so_snd
.sb_flags
&= ~SB_UPCALL
;
903 so
->so_flags
|= SOF_CLOSEWAIT
;
904 (void) msleep((caddr_t
)&so
->so_upcall
, mutex_held
, (PZERO
- 1),
905 "soclose_wait_locked", NULL
);
906 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
907 so
->so_flags
&= ~SOF_CLOSEWAIT
;
911 * Close a socket on last file table reference removal.
912 * Initiate disconnect if connected.
913 * Free socket when disconnect complete.
916 soclose_locked(struct socket
*so
)
919 lck_mtx_t
*mutex_held
;
922 if (so
->so_usecount
== 0) {
923 panic("soclose: so=%p refcount=0\n", so
);
926 sflt_notify(so
, sock_evt_closing
, NULL
);
928 if ((so
->so_options
& SO_ACCEPTCONN
)) {
929 struct socket
*sp
, *sonext
;
933 * We do not want new connection to be added
934 * to the connection queues
936 so
->so_options
&= ~SO_ACCEPTCONN
;
938 for (sp
= TAILQ_FIRST(&so
->so_incomp
); sp
!= NULL
; sp
= sonext
) {
939 sonext
= TAILQ_NEXT(sp
, so_list
);
942 * skip sockets thrown away by tcpdropdropblreq
943 * they will get cleanup by the garbage collection.
944 * otherwise, remove the incomp socket from the queue
945 * and let soabort trigger the appropriate cleanup.
947 if (sp
->so_flags
& SOF_OVERFLOW
)
950 if (so
->so_proto
->pr_getlock
!= NULL
) {
951 /* lock ordering for consistency with the rest of the stack,
952 * we lock the socket first and then grabb the head.
954 socket_unlock(so
, 0);
960 TAILQ_REMOVE(&so
->so_incomp
, sp
, so_list
);
963 if (sp
->so_state
& SS_INCOMP
) {
964 sp
->so_state
&= ~SS_INCOMP
;
971 socket_unlock(sp
, 1);
974 while ((sp
= TAILQ_FIRST(&so
->so_comp
)) != NULL
) {
975 /* Dequeue from so_comp since sofree() won't do it */
976 TAILQ_REMOVE(&so
->so_comp
, sp
, so_list
);
979 if (so
->so_proto
->pr_getlock
!= NULL
) {
980 socket_unlock(so
, 0);
984 if (sp
->so_state
& SS_COMP
) {
985 sp
->so_state
&= ~SS_COMP
;
991 if (so
->so_proto
->pr_getlock
!= NULL
) {
992 socket_unlock(sp
, 1);
997 if (so
->so_pcb
== 0) {
998 /* 3915887: mark the socket as ready for dealloc */
999 so
->so_flags
|= SOF_PCBCLEARING
;
1002 if (so
->so_state
& SS_ISCONNECTED
) {
1003 if ((so
->so_state
& SS_ISDISCONNECTING
) == 0) {
1004 error
= sodisconnectlocked(so
);
1008 if (so
->so_options
& SO_LINGER
) {
1009 if ((so
->so_state
& SS_ISDISCONNECTING
) &&
1010 (so
->so_state
& SS_NBIO
))
1012 if (so
->so_proto
->pr_getlock
!= NULL
)
1013 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
1015 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
1016 while (so
->so_state
& SS_ISCONNECTED
) {
1017 ts
.tv_sec
= (so
->so_linger
/100);
1018 ts
.tv_nsec
= (so
->so_linger
% 100) *
1019 NSEC_PER_USEC
* 1000 * 10;
1020 error
= msleep((caddr_t
)&so
->so_timeo
,
1021 mutex_held
, PSOCK
| PCATCH
, "soclose", &ts
);
1024 * It's OK when the time fires,
1025 * don't report an error
1027 if (error
== EWOULDBLOCK
)
1035 if (so
->so_usecount
== 0)
1036 panic("soclose: usecount is zero so=%p\n", so
);
1037 if (so
->so_pcb
&& !(so
->so_flags
& SOF_PCBCLEARING
)) {
1039 * Let NetworkStatistics know this PCB is going away
1040 * before we detach it.
1042 if (nstat_collect
&&
1043 (so
->so_proto
->pr_domain
->dom_family
== AF_INET
||
1044 so
->so_proto
->pr_domain
->dom_family
== AF_INET6
))
1045 nstat_pcb_detach(so
->so_pcb
);
1047 int error2
= (*so
->so_proto
->pr_usrreqs
->pru_detach
)(so
);
1051 if (so
->so_usecount
<= 0)
1052 panic("soclose: usecount is zero so=%p\n", so
);
1054 if (so
->so_pcb
&& so
->so_state
& SS_NOFDREF
)
1055 panic("soclose: NOFDREF");
1056 so
->so_state
|= SS_NOFDREF
;
1058 if ((so
->so_flags
& SOF_KNOTE
) != 0)
1059 KNOTE(&so
->so_klist
, SO_FILT_HINT_LOCKED
);
1061 so
->so_proto
->pr_domain
->dom_refs
--;
1070 soclose(struct socket
*so
)
1075 if (so
->so_upcallusecount
)
1076 soclose_wait_locked(so
);
1078 if (so
->so_retaincnt
== 0) {
1079 error
= soclose_locked(so
);
1082 * if the FD is going away, but socket is
1083 * retained in kernel remove its reference
1086 if (so
->so_usecount
< 2)
1087 panic("soclose: retaincnt non null and so=%p "
1088 "usecount=%d\n", so
, so
->so_usecount
);
1090 socket_unlock(so
, 1);
1095 * Must be called at splnet...
1097 /* Should already be locked */
1099 soabort(struct socket
*so
)
1103 #ifdef MORE_LOCKING_DEBUG
1104 lck_mtx_t
*mutex_held
;
1106 if (so
->so_proto
->pr_getlock
!= NULL
)
1107 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
1109 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
1110 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
1113 if ((so
->so_flags
& SOF_ABORTED
) == 0) {
1114 so
->so_flags
|= SOF_ABORTED
;
1115 error
= (*so
->so_proto
->pr_usrreqs
->pru_abort
)(so
);
1125 soacceptlock(struct socket
*so
, struct sockaddr
**nam
, int dolock
)
1132 if ((so
->so_state
& SS_NOFDREF
) == 0)
1133 panic("soaccept: !NOFDREF");
1134 so
->so_state
&= ~SS_NOFDREF
;
1135 error
= (*so
->so_proto
->pr_usrreqs
->pru_accept
)(so
, nam
);
1138 socket_unlock(so
, 1);
1143 soaccept(struct socket
*so
, struct sockaddr
**nam
)
1145 return (soacceptlock(so
, nam
, 1));
1149 soacceptfilter(struct socket
*so
)
1151 struct sockaddr
*local
= NULL
, *remote
= NULL
;
1153 struct socket
*head
= so
->so_head
;
1156 * Hold the lock even if this socket
1157 * has not been made visible to the filter(s).
1158 * For sockets with global locks, this protect against the
1159 * head or peer going away
1162 if (sogetaddr_locked(so
, &remote
, 1) != 0 ||
1163 sogetaddr_locked(so
, &local
, 0) != 0) {
1164 so
->so_state
&= ~(SS_NOFDREF
| SS_COMP
);
1166 socket_unlock(so
, 1);
1168 /* Out of resources; try it again next time */
1169 error
= ECONNABORTED
;
1173 error
= sflt_accept(head
, so
, local
, remote
);
1176 * If we get EJUSTRETURN from one of the filters, mark this socket
1177 * as inactive and return it anyway. This newly accepted socket
1178 * will be disconnected later before we hand it off to the caller.
1180 if (error
== EJUSTRETURN
) {
1182 (void) sosetdefunct(current_proc(), so
,
1183 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL
, FALSE
);
1188 * This may seem like a duplication to the above error
1189 * handling part when we return ECONNABORTED, except
1190 * the following is done while holding the lock since
1191 * the socket has been exposed to the filter(s) earlier.
1193 so
->so_state
&= ~(SS_NOFDREF
| SS_COMP
);
1195 socket_unlock(so
, 1);
1197 /* Propagate socket filter's error code to the caller */
1199 socket_unlock(so
, 1);
1202 /* Callee checks for NULL pointer */
1203 sock_freeaddr(remote
);
1204 sock_freeaddr(local
);
1209 * Returns: 0 Success
1210 * EOPNOTSUPP Operation not supported on socket
1211 * EISCONN Socket is connected
1212 * <pru_connect>:EADDRNOTAVAIL Address not available.
1213 * <pru_connect>:EINVAL Invalid argument
1214 * <pru_connect>:EAFNOSUPPORT Address family not supported [notdef]
1215 * <pru_connect>:EACCES Permission denied
1216 * <pru_connect>:EADDRINUSE Address in use
1217 * <pru_connect>:EAGAIN Resource unavailable, try again
1218 * <pru_connect>:EPERM Operation not permitted
1219 * <sf_connect_out>:??? [anything a filter writer might set]
1222 soconnectlock(struct socket
*so
, struct sockaddr
*nam
, int dolock
)
1225 struct proc
*p
= current_proc();
1231 * If this is a listening socket or if this is a previously-accepted
1232 * socket that has been marked as inactive, reject the connect request.
1234 if ((so
->so_options
& SO_ACCEPTCONN
) || (so
->so_flags
& SOF_DEFUNCT
)) {
1236 if (so
->so_flags
& SOF_DEFUNCT
) {
1237 SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] (%d)\n",
1238 __func__
, proc_pid(p
), so
, INP_SOCKAF(so
),
1239 INP_SOCKTYPE(so
), error
));
1242 socket_unlock(so
, 1);
1246 if ((so
->so_restrictions
& SO_RESTRICT_DENYOUT
) != 0) {
1248 socket_unlock(so
, 1);
1253 * If protocol is connection-based, can only connect once.
1254 * Otherwise, if connected, try to disconnect first.
1255 * This allows user to disconnect by connecting to, e.g.,
1258 if (so
->so_state
& (SS_ISCONNECTED
|SS_ISCONNECTING
) &&
1259 ((so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) ||
1260 (error
= sodisconnectlocked(so
)))) {
1264 * Run connect filter before calling protocol:
1265 * - non-blocking connect returns before completion;
1267 error
= sflt_connectout(so
, nam
);
1270 if (error
== EJUSTRETURN
)
1273 error
= (*so
->so_proto
->pr_usrreqs
->pru_connect
)(so
, nam
, p
);
1277 socket_unlock(so
, 1);
1282 soconnect(struct socket
*so
, struct sockaddr
*nam
)
1284 return (soconnectlock(so
, nam
, 1));
1288 * Returns: 0 Success
1289 * <pru_connect2>:EINVAL[AF_UNIX]
1290 * <pru_connect2>:EPROTOTYPE[AF_UNIX]
1291 * <pru_connect2>:??? [other protocol families]
1293 * Notes: <pru_connect2> is not supported by [TCP].
1296 soconnect2(struct socket
*so1
, struct socket
*so2
)
1300 socket_lock(so1
, 1);
1301 if (so2
->so_proto
->pr_lock
)
1302 socket_lock(so2
, 1);
1304 error
= (*so1
->so_proto
->pr_usrreqs
->pru_connect2
)(so1
, so2
);
1306 socket_unlock(so1
, 1);
1307 if (so2
->so_proto
->pr_lock
)
1308 socket_unlock(so2
, 1);
1313 sodisconnectlocked(struct socket
*so
)
1317 if ((so
->so_state
& SS_ISCONNECTED
) == 0) {
1321 if (so
->so_state
& SS_ISDISCONNECTING
) {
1326 error
= (*so
->so_proto
->pr_usrreqs
->pru_disconnect
)(so
);
1329 sflt_notify(so
, sock_evt_disconnected
, NULL
);
1335 /* Locking version */
1337 sodisconnect(struct socket
*so
)
1342 error
= sodisconnectlocked(so
);
1343 socket_unlock(so
, 1);
1347 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_DONTWAIT : M_WAIT)
1350 * sosendcheck will lock the socket buffer if it isn't locked and
1351 * verify that there is space for the data being inserted.
1353 * Returns: 0 Success
1355 * sblock:EWOULDBLOCK
1362 sosendcheck(struct socket
*so
, struct sockaddr
*addr
, int32_t resid
, int32_t clen
,
1363 int32_t atomic
, int flags
, int *sblocked
)
1370 if (*sblocked
== 0) {
1371 if ((so
->so_snd
.sb_flags
& SB_LOCK
) != 0 &&
1372 so
->so_send_filt_thread
!= 0 &&
1373 so
->so_send_filt_thread
== current_thread()) {
1375 * We're being called recursively from a filter,
1376 * allow this to continue. Radar 4150520.
1377 * Don't set sblocked because we don't want
1378 * to perform an unlock later.
1382 error
= sblock(&so
->so_snd
, SBLOCKWAIT(flags
));
1384 if (so
->so_flags
& SOF_DEFUNCT
)
1393 * If a send attempt is made on a socket that has been marked
1394 * as inactive (disconnected), reject the request.
1396 if (so
->so_flags
& SOF_DEFUNCT
) {
1399 SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] (%d)\n", __func__
,
1400 proc_selfpid(), so
, INP_SOCKAF(so
), INP_SOCKTYPE(so
),
1405 if (so
->so_state
& SS_CANTSENDMORE
)
1409 error
= so
->so_error
;
1414 if ((so
->so_state
& SS_ISCONNECTED
) == 0) {
1415 if ((so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) != 0) {
1416 if ((so
->so_state
& SS_ISCONFIRMING
) == 0 &&
1417 !(resid
== 0 && clen
!= 0))
1419 } else if (addr
== 0 && !(flags
&MSG_HOLD
)) {
1420 return ((so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) ?
1421 ENOTCONN
: EDESTADDRREQ
);
1424 space
= sbspace(&so
->so_snd
);
1425 if (flags
& MSG_OOB
)
1427 if ((atomic
&& resid
> so
->so_snd
.sb_hiwat
) ||
1428 clen
> so
->so_snd
.sb_hiwat
)
1430 if ((space
< resid
+ clen
&&
1431 (atomic
|| space
< (int32_t)so
->so_snd
.sb_lowat
|| space
< clen
)) ||
1432 (so
->so_type
== SOCK_STREAM
&& so_wait_for_if_feedback(so
))) {
1433 if ((so
->so_state
& SS_NBIO
) || (flags
& MSG_NBIO
) ||
1435 return (EWOULDBLOCK
);
1437 sbunlock(&so
->so_snd
, 1);
1439 error
= sbwait(&so
->so_snd
);
1441 if (so
->so_flags
& SOF_DEFUNCT
)
1453 * If send must go all at once and message is larger than
1454 * send buffering, then hard error.
1455 * Lock against other senders.
1456 * If must go all at once and not enough room now, then
1457 * inform user that this would block and do nothing.
1458 * Otherwise, if nonblocking, send as much as possible.
1459 * The data to be sent is described by "uio" if nonzero,
1460 * otherwise by the mbuf chain "top" (which must be null
1461 * if uio is not). Data provided in mbuf chain must be small
1462 * enough to send all at once.
1464 * Returns nonzero on error, timeout or signal; callers
1465 * must check for short counts if EINTR/ERESTART are returned.
1466 * Data and control buffers are freed on return.
1468 * MSG_HOLD: go thru most of sosend(), but just enqueue the mbuf
1469 * MSG_SEND: go thru as for MSG_HOLD on current fragment, then
1470 * point at the mbuf chain being constructed and go from there.
1472 * Returns: 0 Success
1478 * sosendcheck:EWOULDBLOCK
1482 * sosendcheck:??? [value from so_error]
1483 * <pru_send>:ECONNRESET[TCP]
1484 * <pru_send>:EINVAL[TCP]
1485 * <pru_send>:ENOBUFS[TCP]
1486 * <pru_send>:EADDRINUSE[TCP]
1487 * <pru_send>:EADDRNOTAVAIL[TCP]
1488 * <pru_send>:EAFNOSUPPORT[TCP]
1489 * <pru_send>:EACCES[TCP]
1490 * <pru_send>:EAGAIN[TCP]
1491 * <pru_send>:EPERM[TCP]
1492 * <pru_send>:EMSGSIZE[TCP]
1493 * <pru_send>:EHOSTUNREACH[TCP]
1494 * <pru_send>:ENETUNREACH[TCP]
1495 * <pru_send>:ENETDOWN[TCP]
1496 * <pru_send>:ENOMEM[TCP]
1497 * <pru_send>:ENOBUFS[TCP]
1498 * <pru_send>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1499 * <pru_send>:EINVAL[AF_UNIX]
1500 * <pru_send>:EOPNOTSUPP[AF_UNIX]
1501 * <pru_send>:EPIPE[AF_UNIX]
1502 * <pru_send>:ENOTCONN[AF_UNIX]
1503 * <pru_send>:EISCONN[AF_UNIX]
1504 * <pru_send>:???[AF_UNIX] [whatever a filter author chooses]
1505 * <sf_data_out>:??? [whatever a filter author chooses]
1507 * Notes: Other <pru_send> returns depend on the protocol family; all
1508 * <sf_data_out> returns depend on what the filter author causes
1509 * their filter to return.
1512 sosend(struct socket
*so
, struct sockaddr
*addr
, struct uio
*uio
,
1513 struct mbuf
*top
, struct mbuf
*control
, int flags
)
1516 register struct mbuf
*m
, *freelist
= NULL
;
1517 register int32_t space
, len
, resid
;
1518 int clen
= 0, error
, dontroute
, mlen
, sendflags
;
1519 int atomic
= sosendallatonce(so
) || top
;
1521 struct proc
*p
= current_proc();
1524 // LP64todo - fix this!
1525 resid
= uio_resid(uio
);
1527 resid
= top
->m_pkthdr
.len
;
1529 KERNEL_DEBUG((DBG_FNC_SOSEND
| DBG_FUNC_START
), so
, resid
,
1530 so
->so_snd
.sb_cc
, so
->so_snd
.sb_lowat
, so
->so_snd
.sb_hiwat
);
1533 so_update_last_owner_locked(so
, p
);
1535 if (so
->so_type
!= SOCK_STREAM
&& (flags
& MSG_OOB
) != 0) {
1537 socket_unlock(so
, 1);
1542 * In theory resid should be unsigned.
1543 * However, space must be signed, as it might be less than 0
1544 * if we over-committed, and we must use a signed comparison
1545 * of space and resid. On the other hand, a negative resid
1546 * causes us to loop sending 0-length segments to the protocol.
1548 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
1549 * type sockets since that's an error.
1551 if (resid
< 0 || (so
->so_type
== SOCK_STREAM
&& (flags
& MSG_EOR
))) {
1553 socket_unlock(so
, 1);
1558 (flags
& MSG_DONTROUTE
) && (so
->so_options
& SO_DONTROUTE
) == 0 &&
1559 (so
->so_proto
->pr_flags
& PR_ATOMIC
);
1560 OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgsnd
);
1562 clen
= control
->m_len
;
1565 error
= sosendcheck(so
, addr
, resid
, clen
, atomic
, flags
,
1571 space
= sbspace(&so
->so_snd
) - clen
+ ((flags
& MSG_OOB
) ?
1577 * Data is prepackaged in "top".
1580 if (flags
& MSG_EOR
)
1581 top
->m_flags
|= M_EOR
;
1587 bytes_to_copy
= imin(resid
, space
);
1589 if (sosendminchain
> 0) {
1592 chainlength
= sosendmaxchain
;
1596 * Attempt to use larger than system page-size
1597 * clusters for large writes only if there is
1598 * a jumbo cluster pool and if the socket is
1599 * marked accordingly.
1601 jumbocl
= sosendjcl
&& njcl
> 0 &&
1602 ((so
->so_flags
& SOF_MULTIPAGES
) ||
1603 sosendjcl_ignore_capab
);
1605 socket_unlock(so
, 0);
1609 int hdrs_needed
= (top
== 0) ? 1 : 0;
1612 * try to maintain a local cache of mbuf
1613 * clusters needed to complete this
1614 * write the list is further limited to
1615 * the number that are currently needed
1616 * to fill the socket this mechanism
1617 * allows a large number of mbufs/
1618 * clusters to be grabbed under a single
1619 * mbuf lock... if we can't get any
1620 * clusters, than fall back to trying
1621 * for mbufs if we fail early (or
1622 * miscalcluate the number needed) make
1623 * sure to release any clusters we
1624 * haven't yet consumed.
1626 if (freelist
== NULL
&&
1627 bytes_to_copy
> MBIGCLBYTES
&&
1630 bytes_to_copy
/ M16KCLBYTES
;
1632 if ((bytes_to_copy
-
1633 (num_needed
* M16KCLBYTES
))
1638 m_getpackets_internal(
1639 (unsigned int *)&num_needed
,
1640 hdrs_needed
, M_WAIT
, 0,
1643 * Fall back to 4K cluster size
1644 * if allocation failed
1648 if (freelist
== NULL
&&
1649 bytes_to_copy
> MCLBYTES
) {
1651 bytes_to_copy
/ MBIGCLBYTES
;
1653 if ((bytes_to_copy
-
1654 (num_needed
* MBIGCLBYTES
)) >=
1659 m_getpackets_internal(
1660 (unsigned int *)&num_needed
,
1661 hdrs_needed
, M_WAIT
, 0,
1664 * Fall back to cluster size
1665 * if allocation failed
1669 if (freelist
== NULL
&&
1670 bytes_to_copy
> MINCLSIZE
) {
1672 bytes_to_copy
/ MCLBYTES
;
1674 if ((bytes_to_copy
-
1675 (num_needed
* MCLBYTES
)) >=
1680 m_getpackets_internal(
1681 (unsigned int *)&num_needed
,
1682 hdrs_needed
, M_WAIT
, 0,
1685 * Fall back to a single mbuf
1686 * if allocation failed
1690 if (freelist
== NULL
) {
1698 if (freelist
== NULL
) {
1704 * For datagram protocols,
1705 * leave room for protocol
1706 * headers in first mbuf.
1708 if (atomic
&& top
== 0 &&
1709 bytes_to_copy
< MHLEN
) {
1715 freelist
= m
->m_next
;
1718 if ((m
->m_flags
& M_EXT
))
1719 mlen
= m
->m_ext
.ext_size
;
1720 else if ((m
->m_flags
& M_PKTHDR
))
1722 MHLEN
- m_leadingspace(m
);
1725 len
= imin(mlen
, bytes_to_copy
);
1731 error
= uiomove(mtod(m
, caddr_t
),
1734 resid
= uio_resid(uio
);
1738 top
->m_pkthdr
.len
+= len
;
1743 if (flags
& MSG_EOR
)
1744 top
->m_flags
|= M_EOR
;
1747 bytes_to_copy
= min(resid
, space
);
1749 } while (space
> 0 &&
1750 (chainlength
< sosendmaxchain
|| atomic
||
1751 resid
< MINCLSIZE
));
1759 if (flags
& (MSG_HOLD
|MSG_SEND
)) {
1760 /* Enqueue for later, go away if HOLD */
1761 register struct mbuf
*mb1
;
1762 if (so
->so_temp
&& (flags
& MSG_FLUSH
)) {
1763 m_freem(so
->so_temp
);
1767 so
->so_tail
->m_next
= top
;
1774 if (flags
& MSG_HOLD
) {
1781 so
->so_options
|= SO_DONTROUTE
;
1783 /* Compute flags here, for pru_send and NKEs */
1784 sendflags
= (flags
& MSG_OOB
) ? PRUS_OOB
:
1786 * If the user set MSG_EOF, the protocol
1787 * understands this flag and nothing left to
1788 * send then use PRU_SEND_EOF instead of PRU_SEND.
1790 ((flags
& MSG_EOF
) &&
1791 (so
->so_proto
->pr_flags
& PR_IMPLOPCL
) &&
1794 /* If there is more to send set PRUS_MORETOCOME */
1795 (resid
> 0 && space
> 0) ? PRUS_MORETOCOME
: 0;
1798 * Socket filter processing
1800 error
= sflt_data_out(so
, addr
, &top
, &control
,
1801 (sendflags
& MSG_OOB
) ? sock_data_filt_flag_oob
: 0);
1803 if (error
== EJUSTRETURN
) {
1813 * End Socket filter processing
1816 error
= (*so
->so_proto
->pr_usrreqs
->pru_send
)
1817 (so
, sendflags
, top
, addr
, control
, p
);
1819 if (flags
& MSG_SEND
)
1823 so
->so_options
&= ~SO_DONTROUTE
;
1831 } while (resid
&& space
> 0);
1836 sbunlock(&so
->so_snd
, 0); /* will unlock socket */
1838 socket_unlock(so
, 1);
1845 m_freem_list(freelist
);
1847 KERNEL_DEBUG(DBG_FNC_SOSEND
| DBG_FUNC_END
, so
, resid
, so
->so_snd
.sb_cc
,
1854 * Implement receive operations on a socket.
1855 * We depend on the way that records are added to the sockbuf
1856 * by sbappend*. In particular, each record (mbufs linked through m_next)
1857 * must begin with an address if the protocol so specifies,
1858 * followed by an optional mbuf or mbufs containing ancillary data,
1859 * and then zero or more mbufs of data.
1860 * In order to avoid blocking network interrupts for the entire time here,
1861 * we splx() while doing the actual copy to user space.
1862 * Although the sockbuf is locked, new data may still be appended,
1863 * and thus we must maintain consistency of the sockbuf during that time.
1865 * The caller may receive the data as a single mbuf chain by supplying
1866 * an mbuf **mp0 for use in returning the chain. The uio is then used
1867 * only for the count in uio_resid.
1869 * Returns: 0 Success
1874 * sblock:EWOULDBLOCK
1878 * sodelayed_copy:EFAULT
1879 * <pru_rcvoob>:EINVAL[TCP]
1880 * <pru_rcvoob>:EWOULDBLOCK[TCP]
1882 * <pr_domain->dom_externalize>:EMSGSIZE[AF_UNIX]
1883 * <pr_domain->dom_externalize>:ENOBUFS[AF_UNIX]
1884 * <pr_domain->dom_externalize>:???
1886 * Notes: Additional return values from calls through <pru_rcvoob> and
1887 * <pr_domain->dom_externalize> depend on protocols other than
1888 * TCP or AF_UNIX, which are documented above.
1891 soreceive(struct socket
*so
, struct sockaddr
**psa
, struct uio
*uio
,
1892 struct mbuf
**mp0
, struct mbuf
**controlp
, int *flagsp
)
1894 register struct mbuf
*m
, **mp
, *ml
= NULL
;
1895 register int flags
, len
, error
, offset
;
1896 struct protosw
*pr
= so
->so_proto
;
1897 struct mbuf
*nextrecord
;
1899 int orig_resid
= uio_resid(uio
);
1900 struct mbuf
*free_list
;
1901 int delayed_copy_len
;
1904 struct proc
*p
= current_proc();
1906 // LP64todo - fix this!
1907 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_START
, so
, uio_resid(uio
),
1908 so
->so_rcv
.sb_cc
, so
->so_rcv
.sb_lowat
, so
->so_rcv
.sb_hiwat
);
1911 so_update_last_owner_locked(so
, p
);
1913 #ifdef MORE_LOCKING_DEBUG
1914 if (so
->so_usecount
== 1)
1915 panic("soreceive: so=%x no other reference on socket\n", so
);
1923 flags
= *flagsp
&~ MSG_EOR
;
1928 * If a recv attempt is made on a previously-accepted socket
1929 * that has been marked as inactive (disconnected), reject
1932 if (so
->so_flags
& SOF_DEFUNCT
) {
1933 struct sockbuf
*sb
= &so
->so_rcv
;
1936 SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] (%d)\n", __func__
,
1937 proc_pid(p
), so
, INP_SOCKAF(so
), INP_SOCKTYPE(so
), error
));
1939 * This socket should have been disconnected and flushed
1940 * prior to being returned from sodefunct(); there should
1941 * be no data on its receive list, so panic otherwise.
1943 if (so
->so_state
& SS_DEFUNCT
)
1944 sb_empty_assert(sb
, __func__
);
1945 socket_unlock(so
, 1);
1950 * When SO_WANTOOBFLAG is set we try to get out-of-band data
1951 * regardless of the flags argument. Here is the case were
1952 * out-of-band data is not inline.
1954 if ((flags
& MSG_OOB
) ||
1955 ((so
->so_options
& SO_WANTOOBFLAG
) != 0 &&
1956 (so
->so_options
& SO_OOBINLINE
) == 0 &&
1957 (so
->so_oobmark
|| (so
->so_state
& SS_RCVATMARK
)))) {
1958 m
= m_get(M_WAIT
, MT_DATA
);
1960 socket_unlock(so
, 1);
1961 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_END
,
1962 ENOBUFS
, 0, 0, 0, 0);
1965 error
= (*pr
->pr_usrreqs
->pru_rcvoob
)(so
, m
, flags
& MSG_PEEK
);
1968 socket_unlock(so
, 0);
1970 error
= uiomove(mtod(m
, caddr_t
),
1971 imin(uio_resid(uio
), m
->m_len
), uio
);
1973 } while (uio_resid(uio
) && error
== 0 && m
);
1979 if ((so
->so_options
& SO_WANTOOBFLAG
) != 0) {
1980 if (error
== EWOULDBLOCK
|| error
== EINVAL
) {
1982 * Let's try to get normal data:
1983 * EWOULDBLOCK: out-of-band data not
1984 * receive yet. EINVAL: out-of-band data
1989 } else if (error
== 0 && flagsp
) {
1993 socket_unlock(so
, 1);
1994 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_END
, error
,
2001 *mp
= (struct mbuf
*)0;
2002 if (so
->so_state
& SS_ISCONFIRMING
&& uio_resid(uio
))
2003 (*pr
->pr_usrreqs
->pru_rcvd
)(so
, 0);
2006 free_list
= (struct mbuf
*)0;
2007 delayed_copy_len
= 0;
2009 #ifdef MORE_LOCKING_DEBUG
2010 if (so
->so_usecount
<= 1)
2011 printf("soreceive: sblock so=%p ref=%d on socket\n",
2012 so
, so
->so_usecount
);
2015 * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE)
2016 * and if so just return to the caller. This could happen when
2017 * soreceive() is called by a socket upcall function during the
2018 * time the socket is freed. The socket buffer would have been
2019 * locked across the upcall, therefore we cannot put this thread
2020 * to sleep (else we will deadlock) or return EWOULDBLOCK (else
2021 * we may livelock), because the lock on the socket buffer will
2022 * only be released when the upcall routine returns to its caller.
2023 * Because the socket has been officially closed, there can be
2024 * no further read on it.
2026 if ((so
->so_state
& (SS_NOFDREF
| SS_CANTRCVMORE
)) ==
2027 (SS_NOFDREF
| SS_CANTRCVMORE
)) {
2028 socket_unlock(so
, 1);
2032 error
= sblock(&so
->so_rcv
, SBLOCKWAIT(flags
));
2034 socket_unlock(so
, 1);
2035 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_END
, error
,
2040 m
= so
->so_rcv
.sb_mb
;
2042 * If we have less data than requested, block awaiting more
2043 * (subject to any timeout) if:
2044 * 1. the current count is less than the low water mark, or
2045 * 2. MSG_WAITALL is set, and it is possible to do the entire
2046 * receive operation at once if we block (resid <= hiwat).
2047 * 3. MSG_DONTWAIT is not set
2048 * If MSG_WAITALL is set but resid is larger than the receive buffer,
2049 * we have to do the receive in sections, and thus risk returning
2050 * a short count if a timeout or signal occurs after we start.
2052 if (m
== 0 || (((flags
& MSG_DONTWAIT
) == 0 &&
2053 so
->so_rcv
.sb_cc
< uio_resid(uio
)) &&
2054 (so
->so_rcv
.sb_cc
< so
->so_rcv
.sb_lowat
||
2055 ((flags
& MSG_WAITALL
) && uio_resid(uio
) <= so
->so_rcv
.sb_hiwat
)) &&
2056 m
->m_nextpkt
== 0 && (pr
->pr_flags
& PR_ATOMIC
) == 0)) {
2058 * Panic if we notice inconsistencies in the socket's
2059 * receive list; both sb_mb and sb_cc should correctly
2060 * reflect the contents of the list, otherwise we may
2061 * end up with false positives during select() or poll()
2062 * which could put the application in a bad state.
2064 SB_MB_CHECK(&so
->so_rcv
);
2069 error
= so
->so_error
;
2070 if ((flags
& MSG_PEEK
) == 0)
2074 if (so
->so_state
& SS_CANTRCVMORE
) {
2080 for (; m
; m
= m
->m_next
)
2081 if (m
->m_type
== MT_OOBDATA
|| (m
->m_flags
& M_EOR
)) {
2082 m
= so
->so_rcv
.sb_mb
;
2085 if ((so
->so_state
& (SS_ISCONNECTED
|SS_ISCONNECTING
)) == 0 &&
2086 (so
->so_proto
->pr_flags
& PR_CONNREQUIRED
)) {
2090 if (uio_resid(uio
) == 0)
2092 if ((so
->so_state
& SS_NBIO
) ||
2093 (flags
& (MSG_DONTWAIT
|MSG_NBIO
))) {
2094 error
= EWOULDBLOCK
;
2097 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive sbwait 1");
2098 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive sbwait 1");
2099 sbunlock(&so
->so_rcv
, 1);
2100 #if EVEN_MORE_LOCKING_DEBUG
2102 printf("Waiting for socket data\n");
2105 error
= sbwait(&so
->so_rcv
);
2106 #if EVEN_MORE_LOCKING_DEBUG
2108 printf("SORECEIVE - sbwait returned %d\n", error
);
2110 if (so
->so_usecount
< 1)
2111 panic("soreceive: after 2nd sblock so=%p ref=%d on "
2112 "socket\n", so
, so
->so_usecount
);
2114 socket_unlock(so
, 1);
2115 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_END
, error
,
2122 OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgrcv
);
2123 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 1");
2124 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 1");
2125 nextrecord
= m
->m_nextpkt
;
2126 if ((pr
->pr_flags
& PR_ADDR
) && m
->m_type
== MT_SONAME
) {
2127 KASSERT(m
->m_type
== MT_SONAME
, ("receive 1a"));
2128 #if CONFIG_MACF_SOCKET_SUBSET
2130 * Call the MAC framework for policy checking if we're in
2131 * the user process context and the socket isn't connected.
2133 if (p
!= kernproc
&& !(so
->so_state
& SS_ISCONNECTED
)) {
2134 struct mbuf
*m0
= m
;
2136 * Dequeue this record (temporarily) from the receive
2137 * list since we're about to drop the socket's lock
2138 * where a new record may arrive and be appended to
2139 * the list. Upon MAC policy failure, the record
2140 * will be freed. Otherwise, we'll add it back to
2141 * the head of the list. We cannot rely on SB_LOCK
2142 * because append operation uses the socket's lock.
2145 m
->m_nextpkt
= NULL
;
2146 sbfree(&so
->so_rcv
, m
);
2148 } while (m
!= NULL
);
2150 so
->so_rcv
.sb_mb
= nextrecord
;
2151 SB_EMPTY_FIXUP(&so
->so_rcv
);
2152 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 1a");
2153 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 1a");
2154 socket_unlock(so
, 0);
2155 if (mac_socket_check_received(proc_ucred(p
), so
,
2156 mtod(m
, struct sockaddr
*)) != 0) {
2158 * MAC policy failure; free this record and
2159 * process the next record (or block until
2160 * one is available). We have adjusted sb_cc
2161 * and sb_mbcnt above so there is no need to
2162 * call sbfree() again.
2166 } while (m
!= NULL
);
2168 * Clear SB_LOCK but don't unlock the socket.
2169 * Process the next record or wait for one.
2172 sbunlock(&so
->so_rcv
, 1);
2177 * If the socket has been defunct'd, drop it.
2179 if (so
->so_flags
& SOF_DEFUNCT
) {
2185 * Re-adjust the socket receive list and re-enqueue
2186 * the record in front of any packets which may have
2187 * been appended while we dropped the lock.
2189 for (m
= m0
; m
->m_next
!= NULL
; m
= m
->m_next
)
2190 sballoc(&so
->so_rcv
, m
);
2191 sballoc(&so
->so_rcv
, m
);
2192 if (so
->so_rcv
.sb_mb
== NULL
) {
2193 so
->so_rcv
.sb_lastrecord
= m0
;
2194 so
->so_rcv
.sb_mbtail
= m
;
2197 nextrecord
= m
->m_nextpkt
= so
->so_rcv
.sb_mb
;
2198 so
->so_rcv
.sb_mb
= m
;
2199 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 1b");
2200 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 1b");
2202 #endif /* CONFIG_MACF_SOCKET_SUBSET */
2205 *psa
= dup_sockaddr(mtod(m
, struct sockaddr
*),
2207 if ((*psa
== 0) && (flags
& MSG_NEEDSA
)) {
2208 error
= EWOULDBLOCK
;
2212 if (flags
& MSG_PEEK
) {
2215 sbfree(&so
->so_rcv
, m
);
2216 if (m
->m_next
== 0 && so
->so_rcv
.sb_cc
!= 0)
2217 panic("soreceive: about to create invalid "
2219 MFREE(m
, so
->so_rcv
.sb_mb
);
2220 m
= so
->so_rcv
.sb_mb
;
2222 m
->m_nextpkt
= nextrecord
;
2224 so
->so_rcv
.sb_mb
= nextrecord
;
2225 SB_EMPTY_FIXUP(&so
->so_rcv
);
2231 * Process one or more MT_CONTROL mbufs present before any data mbufs
2232 * in the first mbuf chain on the socket buffer. If MSG_PEEK, we
2233 * just copy the data; if !MSG_PEEK, we call into the protocol to
2234 * perform externalization.
2236 if (m
!= NULL
&& m
->m_type
== MT_CONTROL
) {
2237 struct mbuf
*cm
= NULL
, *cmn
;
2238 struct mbuf
**cme
= &cm
;
2239 struct sockbuf
*sb_rcv
= &so
->so_rcv
;
2240 struct mbuf
**msgpcm
= NULL
;
2243 * Externalizing the control messages would require us to
2244 * drop the socket's lock below. Once we re-acquire the
2245 * lock, the mbuf chain might change. In order to preserve
2246 * consistency, we unlink all control messages from the
2247 * first mbuf chain in one shot and link them separately
2248 * onto a different chain.
2251 if (flags
& MSG_PEEK
) {
2252 if (controlp
!= NULL
) {
2253 if (*controlp
== NULL
) {
2256 *controlp
= m_copy(m
, 0, m
->m_len
);
2258 /* If we failed to allocate an mbuf,
2259 * release any previously allocated
2260 * mbufs for control data. Return
2261 * an error. Keep the mbufs in the
2262 * socket as this is using
2265 if (*controlp
== NULL
) {
2270 controlp
= &(*controlp
)->m_next
;
2274 m
->m_nextpkt
= NULL
;
2276 sb_rcv
->sb_mb
= m
->m_next
;
2279 cme
= &(*cme
)->m_next
;
2282 } while (m
!= NULL
&& m
->m_type
== MT_CONTROL
);
2284 if (!(flags
& MSG_PEEK
)) {
2285 if (sb_rcv
->sb_mb
!= NULL
) {
2286 sb_rcv
->sb_mb
->m_nextpkt
= nextrecord
;
2288 sb_rcv
->sb_mb
= nextrecord
;
2289 SB_EMPTY_FIXUP(sb_rcv
);
2291 if (nextrecord
== NULL
)
2292 sb_rcv
->sb_lastrecord
= m
;
2295 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive ctl");
2296 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive ctl");
2298 while (cm
!= NULL
) {
2303 cmsg_type
= mtod(cm
, struct cmsghdr
*)->cmsg_type
;
2306 * Call the protocol to externalize SCM_RIGHTS message
2307 * and return the modified message to the caller upon
2308 * success. Otherwise, all other control messages are
2309 * returned unmodified to the caller. Note that we
2310 * only get into this loop if MSG_PEEK is not set.
2312 if (pr
->pr_domain
->dom_externalize
!= NULL
&&
2313 cmsg_type
== SCM_RIGHTS
) {
2315 * Release socket lock: see 3903171. This
2316 * would also allow more records to be appended
2317 * to the socket buffer. We still have SB_LOCK
2318 * set on it, so we can be sure that the head
2319 * of the mbuf chain won't change.
2321 socket_unlock(so
, 0);
2322 error
= (*pr
->pr_domain
->dom_externalize
)(cm
);
2328 if (controlp
!= NULL
&& error
== 0) {
2330 controlp
= &(*controlp
)->m_next
;
2338 * Update the value of nextrecord in case we received new
2339 * records when the socket was unlocked above for
2340 * externalizing SCM_RIGHTS.
2343 nextrecord
= sb_rcv
->sb_mb
->m_nextpkt
;
2345 nextrecord
= sb_rcv
->sb_mb
;
2350 if (!(flags
& MSG_PEEK
)) {
2352 * We get here because m points to an mbuf following
2353 * any MT_SONAME or MT_CONTROL mbufs which have been
2354 * processed above. In any case, m should be pointing
2355 * to the head of the mbuf chain, and the nextrecord
2356 * should be either NULL or equal to m->m_nextpkt.
2357 * See comments above about SB_LOCK.
2359 if (m
!= so
->so_rcv
.sb_mb
|| m
->m_nextpkt
!= nextrecord
)
2360 panic("soreceive: post-control !sync so=%p "
2361 "m=%p nextrecord=%p\n", so
, m
, nextrecord
);
2363 if (nextrecord
== NULL
)
2364 so
->so_rcv
.sb_lastrecord
= m
;
2367 if (type
== MT_OOBDATA
)
2370 if (!(flags
& MSG_PEEK
)) {
2371 SB_EMPTY_FIXUP(&so
->so_rcv
);
2374 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 2");
2375 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 2");
2380 if (!(flags
& MSG_PEEK
) && uio_resid(uio
) > sorecvmincopy
)
2387 while (m
&& (uio_resid(uio
) - delayed_copy_len
) > 0 && error
== 0) {
2388 if (m
->m_type
== MT_OOBDATA
) {
2389 if (type
!= MT_OOBDATA
)
2391 } else if (type
== MT_OOBDATA
) {
2395 * Make sure to allways set MSG_OOB event when getting
2396 * out of band data inline.
2398 if ((so
->so_options
& SO_WANTOOBFLAG
) != 0 &&
2399 (so
->so_options
& SO_OOBINLINE
) != 0 &&
2400 (so
->so_state
& SS_RCVATMARK
) != 0) {
2403 so
->so_state
&= ~SS_RCVATMARK
;
2404 len
= uio_resid(uio
) - delayed_copy_len
;
2405 if (so
->so_oobmark
&& len
> so
->so_oobmark
- offset
)
2406 len
= so
->so_oobmark
- offset
;
2407 if (len
> m
->m_len
- moff
)
2408 len
= m
->m_len
- moff
;
2410 * If mp is set, just pass back the mbufs.
2411 * Otherwise copy them out via the uio, then free.
2412 * Sockbuf must be consistent here (points to current mbuf,
2413 * it points to next record) when we drop priority;
2414 * we must note any additions to the sockbuf when we
2415 * block interrupts again.
2418 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive uiomove");
2419 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive uiomove");
2420 if (can_delay
&& len
== m
->m_len
) {
2422 * only delay the copy if we're consuming the
2423 * mbuf and we're NOT in MSG_PEEK mode
2424 * and we have enough data to make it worthwile
2425 * to drop and retake the lock... can_delay
2426 * reflects the state of the 2 latter
2427 * constraints moff should always be zero
2430 delayed_copy_len
+= len
;
2432 if (delayed_copy_len
) {
2433 error
= sodelayed_copy(so
, uio
,
2434 &free_list
, &delayed_copy_len
);
2440 * can only get here if MSG_PEEK is not
2441 * set therefore, m should point at the
2442 * head of the rcv queue; if it doesn't,
2443 * it means something drastically
2444 * changed while we were out from behind
2445 * the lock in sodelayed_copy. perhaps
2446 * a RST on the stream. in any event,
2447 * the stream has been interrupted. it's
2448 * probably best just to return whatever
2449 * data we've moved and let the caller
2452 if (m
!= so
->so_rcv
.sb_mb
) {
2456 socket_unlock(so
, 0);
2457 error
= uiomove(mtod(m
, caddr_t
) + moff
,
2465 uio_setresid(uio
, (uio_resid(uio
) - len
));
2467 if (len
== m
->m_len
- moff
) {
2468 if (m
->m_flags
& M_EOR
)
2470 if (flags
& MSG_PEEK
) {
2474 nextrecord
= m
->m_nextpkt
;
2475 sbfree(&so
->so_rcv
, m
);
2476 m
->m_nextpkt
= NULL
;
2481 so
->so_rcv
.sb_mb
= m
= m
->m_next
;
2482 *mp
= (struct mbuf
*)0;
2484 if (free_list
== NULL
)
2489 so
->so_rcv
.sb_mb
= m
= m
->m_next
;
2493 m
->m_nextpkt
= nextrecord
;
2494 if (nextrecord
== NULL
)
2495 so
->so_rcv
.sb_lastrecord
= m
;
2497 so
->so_rcv
.sb_mb
= nextrecord
;
2498 SB_EMPTY_FIXUP(&so
->so_rcv
);
2500 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 3");
2501 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 3");
2504 if (flags
& MSG_PEEK
) {
2510 if (flags
& MSG_DONTWAIT
)
2511 copy_flag
= M_DONTWAIT
;
2514 *mp
= m_copym(m
, 0, len
, copy_flag
);
2517 * Failed to allocate an mbuf.
2518 * Adjust uio_resid back, it was
2519 * adjusted down by len bytes which
2520 * we didn't copy over
2522 uio_setresid(uio
, (uio_resid(uio
) + len
));
2528 so
->so_rcv
.sb_cc
-= len
;
2531 if (so
->so_oobmark
) {
2532 if ((flags
& MSG_PEEK
) == 0) {
2533 so
->so_oobmark
-= len
;
2534 if (so
->so_oobmark
== 0) {
2535 so
->so_state
|= SS_RCVATMARK
;
2537 * delay posting the actual event until
2538 * after any delayed copy processing
2546 if (offset
== so
->so_oobmark
)
2550 if (flags
& MSG_EOR
)
2553 * If the MSG_WAITALL or MSG_WAITSTREAM flag is set
2554 * (for non-atomic socket), we must not quit until
2555 * "uio->uio_resid == 0" or an error termination.
2556 * If a signal/timeout occurs, return with a short
2557 * count but without error. Keep sockbuf locked
2558 * against other readers.
2560 while (flags
& (MSG_WAITALL
|MSG_WAITSTREAM
) && m
== 0 &&
2561 (uio_resid(uio
) - delayed_copy_len
) > 0 &&
2562 !sosendallatonce(so
) && !nextrecord
) {
2563 if (so
->so_error
|| so
->so_state
& SS_CANTRCVMORE
)
2567 * Depending on the protocol (e.g. TCP), the following
2568 * might cause the socket lock to be dropped and later
2569 * be reacquired, and more data could have arrived and
2570 * have been appended to the receive socket buffer by
2571 * the time it returns. Therefore, we only sleep in
2572 * sbwait() below if and only if the socket buffer is
2573 * empty, in order to avoid a false sleep.
2575 if (pr
->pr_flags
& PR_WANTRCVD
&& so
->so_pcb
&&
2576 (((struct inpcb
*)so
->so_pcb
)->inp_state
!=
2578 (*pr
->pr_usrreqs
->pru_rcvd
)(so
, flags
);
2580 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive sbwait 2");
2581 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive sbwait 2");
2583 if (so
->so_rcv
.sb_mb
== NULL
&& sbwait(&so
->so_rcv
)) {
2588 * have to wait until after we get back from the sbwait
2589 * to do the copy because we will drop the lock if we
2590 * have enough data that has been delayed... by dropping
2591 * the lock we open up a window allowing the netisr
2592 * thread to process the incoming packets and to change
2593 * the state of this socket... we're issuing the sbwait
2594 * because the socket is empty and we're expecting the
2595 * netisr thread to wake us up when more packets arrive;
2596 * if we allow that processing to happen and then sbwait
2597 * we could stall forever with packets sitting in the
2598 * socket if no further packets arrive from the remote
2601 * we want to copy before we've collected all the data
2602 * to satisfy this request to allow the copy to overlap
2603 * the incoming packet processing on an MP system
2605 if (delayed_copy_len
> sorecvmincopy
&&
2606 (delayed_copy_len
> (so
->so_rcv
.sb_hiwat
/ 2))) {
2607 error
= sodelayed_copy(so
, uio
,
2608 &free_list
, &delayed_copy_len
);
2613 m
= so
->so_rcv
.sb_mb
;
2615 nextrecord
= m
->m_nextpkt
;
2617 SB_MB_CHECK(&so
->so_rcv
);
2620 #ifdef MORE_LOCKING_DEBUG
2621 if (so
->so_usecount
<= 1)
2622 panic("soreceive: after big while so=%p ref=%d on socket\n",
2623 so
, so
->so_usecount
);
2626 if (m
&& pr
->pr_flags
& PR_ATOMIC
) {
2628 if (so
->so_options
& SO_DONTTRUNC
) {
2629 flags
|= MSG_RCVMORE
;
2633 if ((flags
& MSG_PEEK
) == 0)
2634 (void) sbdroprecord(&so
->so_rcv
);
2641 * pru_rcvd below (for TCP) may cause more data to be received
2642 * if the socket lock is dropped prior to sending the ACK; some
2643 * legacy OpenTransport applications don't handle this well
2644 * (if it receives less data than requested while MSG_HAVEMORE
2645 * is set), and so we set the flag now based on what we know
2646 * prior to calling pru_rcvd.
2648 if ((so
->so_options
& SO_WANTMORE
) && so
->so_rcv
.sb_cc
> 0)
2649 flags
|= MSG_HAVEMORE
;
2651 if ((flags
& MSG_PEEK
) == 0) {
2653 so
->so_rcv
.sb_mb
= nextrecord
;
2655 * First part is an inline SB_EMPTY_FIXUP(). Second
2656 * part makes sure sb_lastrecord is up-to-date if
2657 * there is still data in the socket buffer.
2659 if (so
->so_rcv
.sb_mb
== NULL
) {
2660 so
->so_rcv
.sb_mbtail
= NULL
;
2661 so
->so_rcv
.sb_lastrecord
= NULL
;
2662 } else if (nextrecord
->m_nextpkt
== NULL
) {
2663 so
->so_rcv
.sb_lastrecord
= nextrecord
;
2665 SB_MB_CHECK(&so
->so_rcv
);
2667 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 4");
2668 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 4");
2669 if (pr
->pr_flags
& PR_WANTRCVD
&& so
->so_pcb
)
2670 (*pr
->pr_usrreqs
->pru_rcvd
)(so
, flags
);
2673 if (delayed_copy_len
) {
2674 error
= sodelayed_copy(so
, uio
, &free_list
, &delayed_copy_len
);
2680 m_freem_list((struct mbuf
*)free_list
);
2681 free_list
= (struct mbuf
*)0;
2684 postevent(so
, 0, EV_OOB
);
2686 if (orig_resid
== uio_resid(uio
) && orig_resid
&&
2687 (flags
& MSG_EOR
) == 0 && (so
->so_state
& SS_CANTRCVMORE
) == 0) {
2688 sbunlock(&so
->so_rcv
, 1);
2695 #ifdef MORE_LOCKING_DEBUG
2696 if (so
->so_usecount
<= 1)
2697 panic("soreceive: release so=%p ref=%d on socket\n",
2698 so
, so
->so_usecount
);
2700 if (delayed_copy_len
) {
2701 error
= sodelayed_copy(so
, uio
, &free_list
, &delayed_copy_len
);
2704 m_freem_list((struct mbuf
*)free_list
);
2706 sbunlock(&so
->so_rcv
, 0); /* will unlock socket */
2708 // LP64todo - fix this!
2709 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_END
, so
, uio_resid(uio
),
2710 so
->so_rcv
.sb_cc
, 0, error
);
2716 * Returns: 0 Success
2720 sodelayed_copy(struct socket
*so
, struct uio
*uio
, struct mbuf
**free_list
,
2728 socket_unlock(so
, 0);
2730 while (m
&& error
== 0) {
2732 error
= uiomove(mtod(m
, caddr_t
), (int)m
->m_len
, uio
);
2736 m_freem_list(*free_list
);
2738 *free_list
= (struct mbuf
*)NULL
;
2748 * Returns: 0 Success
2751 * <pru_shutdown>:EINVAL
2752 * <pru_shutdown>:EADDRNOTAVAIL[TCP]
2753 * <pru_shutdown>:ENOBUFS[TCP]
2754 * <pru_shutdown>:EMSGSIZE[TCP]
2755 * <pru_shutdown>:EHOSTUNREACH[TCP]
2756 * <pru_shutdown>:ENETUNREACH[TCP]
2757 * <pru_shutdown>:ENETDOWN[TCP]
2758 * <pru_shutdown>:ENOMEM[TCP]
2759 * <pru_shutdown>:EACCES[TCP]
2760 * <pru_shutdown>:EMSGSIZE[TCP]
2761 * <pru_shutdown>:ENOBUFS[TCP]
2762 * <pru_shutdown>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
2763 * <pru_shutdown>:??? [other protocol families]
2766 soshutdown(struct socket
*so
, int how
)
2776 (SS_ISCONNECTED
|SS_ISCONNECTING
|SS_ISDISCONNECTING
)) == 0) {
2779 error
= soshutdownlock(so
, how
);
2781 socket_unlock(so
, 1);
2792 soshutdownlock(struct socket
*so
, int how
)
2794 struct protosw
*pr
= so
->so_proto
;
2797 sflt_notify(so
, sock_evt_shutdown
, &how
);
2799 if (how
!= SHUT_WR
) {
2800 if ((so
->so_state
& SS_CANTRCVMORE
) != 0) {
2801 /* read already shut down */
2806 postevent(so
, 0, EV_RCLOSED
);
2808 if (how
!= SHUT_RD
) {
2809 if ((so
->so_state
& SS_CANTSENDMORE
) != 0) {
2810 /* write already shut down */
2814 error
= (*pr
->pr_usrreqs
->pru_shutdown
)(so
);
2815 postevent(so
, 0, EV_WCLOSED
);
2818 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
2823 sorflush(struct socket
*so
)
2825 register struct sockbuf
*sb
= &so
->so_rcv
;
2826 register struct protosw
*pr
= so
->so_proto
;
2829 #ifdef MORE_LOCKING_DEBUG
2830 lck_mtx_t
*mutex_held
;
2832 if (so
->so_proto
->pr_getlock
!= NULL
)
2833 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
2835 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
2836 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
2839 sflt_notify(so
, sock_evt_flush_read
, NULL
);
2841 sb
->sb_flags
|= SB_NOINTR
;
2842 (void) sblock(sb
, M_WAIT
);
2846 selthreadclear(&sb
->sb_sel
);
2849 bzero((caddr_t
)sb
, sizeof (*sb
));
2850 sb
->sb_so
= so
; /* reestablish link to socket */
2851 if (asb
.sb_flags
& SB_KNOTE
) {
2852 sb
->sb_sel
.si_note
= asb
.sb_sel
.si_note
;
2853 sb
->sb_flags
= SB_KNOTE
;
2855 if (asb
.sb_flags
& SB_DROP
)
2856 sb
->sb_flags
|= SB_DROP
;
2857 if (asb
.sb_flags
& SB_UNIX
)
2858 sb
->sb_flags
|= SB_UNIX
;
2859 if ((pr
->pr_flags
& PR_RIGHTS
) && pr
->pr_domain
->dom_dispose
) {
2860 (*pr
->pr_domain
->dom_dispose
)(asb
.sb_mb
);
2866 * Perhaps this routine, and sooptcopyout(), below, ought to come in
2867 * an additional variant to handle the case where the option value needs
2868 * to be some kind of integer, but not a specific size.
2869 * In addition to their use here, these functions are also called by the
2870 * protocol-level pr_ctloutput() routines.
2872 * Returns: 0 Success
2877 sooptcopyin(struct sockopt
*sopt
, void *buf
, size_t len
, size_t minlen
)
2882 * If the user gives us more than we wanted, we ignore it,
2883 * but if we don't get the minimum length the caller
2884 * wants, we return EINVAL. On success, sopt->sopt_valsize
2885 * is set to however much we actually retrieved.
2887 if ((valsize
= sopt
->sopt_valsize
) < minlen
)
2890 sopt
->sopt_valsize
= valsize
= len
;
2892 if (sopt
->sopt_p
!= kernproc
)
2893 return (copyin(sopt
->sopt_val
, buf
, valsize
));
2895 bcopy(CAST_DOWN(caddr_t
, sopt
->sopt_val
), buf
, valsize
);
2900 * sooptcopyin_timeval
2901 * Copy in a timeval value into tv_p, and take into account whether the
2902 * the calling process is 64-bit or 32-bit. Moved the sanity checking
2903 * code here so that we can verify the 64-bit tv_sec value before we lose
2904 * the top 32-bits assigning tv64.tv_sec to tv_p->tv_sec.
2907 sooptcopyin_timeval(struct sockopt
*sopt
, struct timeval
* tv_p
)
2911 if (proc_is64bit(sopt
->sopt_p
)) {
2912 struct user64_timeval tv64
;
2914 if (sopt
->sopt_valsize
< sizeof(tv64
)) {
2917 sopt
->sopt_valsize
= sizeof(tv64
);
2918 if (sopt
->sopt_p
!= kernproc
) {
2919 error
= copyin(sopt
->sopt_val
, &tv64
, sizeof(tv64
));
2923 bcopy(CAST_DOWN(caddr_t
, sopt
->sopt_val
), &tv64
,
2926 if (tv64
.tv_sec
< 0 || tv64
.tv_sec
> LONG_MAX
2927 || tv64
.tv_usec
< 0 || tv64
.tv_usec
>= 1000000) {
2930 tv_p
->tv_sec
= tv64
.tv_sec
;
2931 tv_p
->tv_usec
= tv64
.tv_usec
;
2933 struct user32_timeval tv32
;
2935 if (sopt
->sopt_valsize
< sizeof(tv32
)) {
2938 sopt
->sopt_valsize
= sizeof(tv32
);
2939 if (sopt
->sopt_p
!= kernproc
) {
2940 error
= copyin(sopt
->sopt_val
, &tv32
, sizeof(tv32
));
2945 bcopy(CAST_DOWN(caddr_t
, sopt
->sopt_val
), &tv32
,
2948 #ifndef __LP64__ // K64todo "comparison is always false due to limited range of data type"
2949 if (tv32
.tv_sec
< 0 || tv32
.tv_sec
> LONG_MAX
2950 || tv32
.tv_usec
< 0 || tv32
.tv_usec
>= 1000000) {
2954 tv_p
->tv_sec
= tv32
.tv_sec
;
2955 tv_p
->tv_usec
= tv32
.tv_usec
;
2961 * Returns: 0 Success
2966 * sooptcopyin:EINVAL
2967 * sooptcopyin:EFAULT
2968 * sooptcopyin_timeval:EINVAL
2969 * sooptcopyin_timeval:EFAULT
2970 * sooptcopyin_timeval:EDOM
2971 * <pr_ctloutput>:EOPNOTSUPP[AF_UNIX]
2972 * <pr_ctloutput>:???w
2973 * sflt_attach_private:??? [whatever a filter author chooses]
2974 * <sf_setoption>:??? [whatever a filter author chooses]
2976 * Notes: Other <pru_listen> returns depend on the protocol family; all
2977 * <sf_listen> returns depend on what the filter author causes
2978 * their filter to return.
2981 sosetopt(struct socket
*so
, struct sockopt
*sopt
)
2986 #if CONFIG_MACF_SOCKET
2988 #endif /* MAC_SOCKET */
2992 if ((so
->so_state
& (SS_CANTRCVMORE
| SS_CANTSENDMORE
))
2993 == (SS_CANTRCVMORE
| SS_CANTSENDMORE
) &&
2994 (so
->so_flags
& SOF_NPX_SETOPTSHUT
) == 0) {
2995 /* the socket has been shutdown, no more sockopt's */
3000 if (sopt
->sopt_dir
!= SOPT_SET
) {
3001 sopt
->sopt_dir
= SOPT_SET
;
3004 error
= sflt_setsockopt(so
, sopt
);
3006 if (error
== EJUSTRETURN
)
3012 if (sopt
->sopt_level
!= SOL_SOCKET
) {
3013 if (so
->so_proto
&& so
->so_proto
->pr_ctloutput
) {
3014 error
= (*so
->so_proto
->pr_ctloutput
)(so
, sopt
);
3015 socket_unlock(so
, 1);
3018 error
= ENOPROTOOPT
;
3020 switch (sopt
->sopt_name
) {
3023 error
= sooptcopyin(sopt
, &l
, sizeof (l
), sizeof (l
));
3027 so
->so_linger
= (sopt
->sopt_name
== SO_LINGER
) ?
3028 l
.l_linger
: l
.l_linger
* hz
;
3030 so
->so_options
|= SO_LINGER
;
3032 so
->so_options
&= ~SO_LINGER
;
3038 case SO_USELOOPBACK
:
3044 case SO_TIMESTAMP_MONOTONIC
:
3048 case SO_WANTOOBFLAG
:
3050 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3055 so
->so_options
|= sopt
->sopt_name
;
3057 so
->so_options
&= ~sopt
->sopt_name
;
3064 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3070 * Values < 1 make no sense for any of these
3071 * options, so disallow them.
3078 switch (sopt
->sopt_name
) {
3082 struct sockbuf
*sb
= (sopt
->sopt_name
== SO_SNDBUF
) ?
3083 &so
->so_snd
: &so
->so_rcv
;
3084 if (sbreserve(sb
, (u_int32_t
) optval
) == 0) {
3088 sb
->sb_flags
|= SB_USRSIZE
;
3089 sb
->sb_flags
&= ~SB_AUTOSIZE
;
3090 sb
->sb_idealsize
= (u_int32_t
)optval
;
3095 * Make sure the low-water is never greater than
3099 so
->so_snd
.sb_lowat
=
3100 (optval
> so
->so_snd
.sb_hiwat
) ?
3101 so
->so_snd
.sb_hiwat
: optval
;
3104 so
->so_rcv
.sb_lowat
=
3105 (optval
> so
->so_rcv
.sb_hiwat
) ?
3106 so
->so_rcv
.sb_hiwat
: optval
;
3113 error
= sooptcopyin_timeval(sopt
, &tv
);
3117 switch (sopt
->sopt_name
) {
3119 so
->so_snd
.sb_timeo
= tv
;
3122 so
->so_rcv
.sb_timeo
= tv
;
3131 error
= sooptcopyin(sopt
, &nke
, sizeof (nke
),
3136 error
= sflt_attach_internal(so
, nke
.nke_handle
);
3141 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3146 so
->so_flags
|= SOF_NOSIGPIPE
;
3148 so
->so_flags
&= ~SOF_NOSIGPIPE
;
3153 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3158 so
->so_flags
|= SOF_NOADDRAVAIL
;
3160 so
->so_flags
&= ~SOF_NOADDRAVAIL
;
3164 case SO_REUSESHAREUID
:
3165 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3170 so
->so_flags
|= SOF_REUSESHAREUID
;
3172 so
->so_flags
&= ~SOF_REUSESHAREUID
;
3174 #ifdef __APPLE_API_PRIVATE
3175 case SO_NOTIFYCONFLICT
:
3176 if (kauth_cred_issuser(kauth_cred_get()) == 0) {
3180 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3185 so
->so_flags
|= SOF_NOTIFYCONFLICT
;
3187 so
->so_flags
&= ~SOF_NOTIFYCONFLICT
;
3190 case SO_RESTRICTIONS
:
3191 if (kauth_cred_issuser(kauth_cred_get()) == 0) {
3195 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3199 so
->so_restrictions
= (optval
& (SO_RESTRICT_DENYIN
|
3200 SO_RESTRICT_DENYOUT
| SO_RESTRICT_DENYSET
));
3204 #if CONFIG_MACF_SOCKET
3205 if ((error
= sooptcopyin(sopt
, &extmac
, sizeof (extmac
),
3206 sizeof (extmac
))) != 0)
3209 error
= mac_setsockopt_label(proc_ucred(sopt
->sopt_p
),
3213 #endif /* MAC_SOCKET */
3216 #ifdef __APPLE_API_PRIVATE
3217 case SO_UPCALLCLOSEWAIT
:
3218 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3223 so
->so_flags
|= SOF_UPCALLCLOSEWAIT
;
3225 so
->so_flags
&= ~SOF_UPCALLCLOSEWAIT
;
3230 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3235 so
->so_flags
|= SOF_BINDRANDOMPORT
;
3237 so
->so_flags
&= ~SOF_BINDRANDOMPORT
;
3240 case SO_NP_EXTENSIONS
: {
3241 struct so_np_extensions sonpx
;
3243 error
= sooptcopyin(sopt
, &sonpx
, sizeof(sonpx
), sizeof(sonpx
));
3246 if (sonpx
.npx_mask
& ~SONPX_MASK_VALID
) {
3251 * Only one bit defined for now
3253 if ((sonpx
.npx_mask
& SONPX_SETOPTSHUT
)) {
3254 if ((sonpx
.npx_flags
& SONPX_SETOPTSHUT
))
3255 so
->so_flags
|= SOF_NPX_SETOPTSHUT
;
3257 so
->so_flags
&= ~SOF_NPX_SETOPTSHUT
;
3262 case SO_TRAFFIC_CLASS
: {
3263 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3267 error
= so_set_traffic_class(so
, optval
);
3273 case SO_RECV_TRAFFIC_CLASS
: {
3274 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3279 so
->so_flags
&= ~SOF_RECV_TRAFFIC_CLASS
;
3281 so
->so_flags
|= SOF_RECV_TRAFFIC_CLASS
;
3285 case SO_TRAFFIC_CLASS_DBG
: {
3286 struct so_tcdbg so_tcdbg
;
3288 error
= sooptcopyin(sopt
, &so_tcdbg
,
3289 sizeof (struct so_tcdbg
), sizeof (struct so_tcdbg
));
3292 error
= so_set_tcdbg(so
, &so_tcdbg
);
3298 case SO_PRIVILEGED_TRAFFIC_CLASS
:
3299 error
= priv_check_cred(kauth_cred_get(),
3300 PRIV_NET_PRIVILEGED_TRAFFIC_CLASS
, 0);
3303 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3308 so
->so_flags
&= ~SOF_PRIVILEGED_TRAFFIC_CLASS
;
3310 so
->so_flags
|= SOF_PRIVILEGED_TRAFFIC_CLASS
;
3314 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3316 if (error
!= 0 || (so
->so_flags
& SOF_DEFUNCT
)) {
3322 * Any process can set SO_DEFUNCTOK (clear
3323 * SOF_NODEFUNCT), but only root can clear
3324 * SO_DEFUNCTOK (set SOF_NODEFUNCT).
3327 kauth_cred_issuser(kauth_cred_get()) == 0) {
3332 so
->so_flags
&= ~SOF_NODEFUNCT
;
3334 so
->so_flags
|= SOF_NODEFUNCT
;
3336 SODEFUNCTLOG(("%s[%d]: so %p [%d,%d] is now marked as "
3337 "%seligible for defunct\n", __func__
,
3338 proc_selfpid(), so
, INP_SOCKAF(so
),
3340 (so
->so_flags
& SOF_NODEFUNCT
) ? "not " : ""));
3344 /* This option is not settable */
3348 case SO_OPPORTUNISTIC
:
3349 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3352 error
= so_set_opportunistic(so
, optval
);
3356 /* This option is handled by lower layer(s) */
3361 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3364 error
= so_set_recv_anyif(so
, optval
);
3368 error
= ENOPROTOOPT
;
3371 if (error
== 0 && so
->so_proto
&& so
->so_proto
->pr_ctloutput
) {
3372 (void) ((*so
->so_proto
->pr_ctloutput
)(so
, sopt
));
3376 socket_unlock(so
, 1);
3380 /* Helper routines for getsockopt */
3382 sooptcopyout(struct sockopt
*sopt
, void *buf
, size_t len
)
3390 * Documented get behavior is that we always return a value,
3391 * possibly truncated to fit in the user's buffer.
3392 * Traditional behavior is that we always tell the user
3393 * precisely how much we copied, rather than something useful
3394 * like the total amount we had available for her.
3395 * Note that this interface is not idempotent; the entire answer must
3396 * generated ahead of time.
3398 valsize
= min(len
, sopt
->sopt_valsize
);
3399 sopt
->sopt_valsize
= valsize
;
3400 if (sopt
->sopt_val
!= USER_ADDR_NULL
) {
3401 if (sopt
->sopt_p
!= kernproc
)
3402 error
= copyout(buf
, sopt
->sopt_val
, valsize
);
3404 bcopy(buf
, CAST_DOWN(caddr_t
, sopt
->sopt_val
), valsize
);
3410 sooptcopyout_timeval(struct sockopt
*sopt
, const struct timeval
* tv_p
)
3414 struct user64_timeval tv64
;
3415 struct user32_timeval tv32
;
3420 if (proc_is64bit(sopt
->sopt_p
)) {
3422 tv64
.tv_sec
= tv_p
->tv_sec
;
3423 tv64
.tv_usec
= tv_p
->tv_usec
;
3427 tv32
.tv_sec
= tv_p
->tv_sec
;
3428 tv32
.tv_usec
= tv_p
->tv_usec
;
3431 valsize
= min(len
, sopt
->sopt_valsize
);
3432 sopt
->sopt_valsize
= valsize
;
3433 if (sopt
->sopt_val
!= USER_ADDR_NULL
) {
3434 if (sopt
->sopt_p
!= kernproc
)
3435 error
= copyout(val
, sopt
->sopt_val
, valsize
);
3437 bcopy(val
, CAST_DOWN(caddr_t
, sopt
->sopt_val
), valsize
);
3445 * <pr_ctloutput>:EOPNOTSUPP[AF_UNIX]
3446 * <pr_ctloutput>:???
3447 * <sf_getoption>:???
3450 sogetopt(struct socket
*so
, struct sockopt
*sopt
)
3455 #if CONFIG_MACF_SOCKET
3457 #endif /* MAC_SOCKET */
3459 if (sopt
->sopt_dir
!= SOPT_GET
) {
3460 sopt
->sopt_dir
= SOPT_GET
;
3465 error
= sflt_getsockopt(so
, sopt
);
3467 if (error
== EJUSTRETURN
)
3469 socket_unlock(so
, 1);
3474 if (sopt
->sopt_level
!= SOL_SOCKET
) {
3475 if (so
->so_proto
&& so
->so_proto
->pr_ctloutput
) {
3476 error
= (*so
->so_proto
->pr_ctloutput
)(so
, sopt
);
3477 socket_unlock(so
, 1);
3480 socket_unlock(so
, 1);
3481 return (ENOPROTOOPT
);
3484 switch (sopt
->sopt_name
) {
3487 l
.l_onoff
= so
->so_options
& SO_LINGER
;
3488 l
.l_linger
= (sopt
->sopt_name
== SO_LINGER
) ?
3489 so
->so_linger
: so
->so_linger
/ hz
;
3490 error
= sooptcopyout(sopt
, &l
, sizeof (l
));
3493 case SO_USELOOPBACK
:
3502 case SO_TIMESTAMP_MONOTONIC
:
3506 case SO_WANTOOBFLAG
:
3508 optval
= so
->so_options
& sopt
->sopt_name
;
3510 error
= sooptcopyout(sopt
, &optval
, sizeof (optval
));
3514 optval
= so
->so_type
;
3519 if (so
->so_proto
->pr_flags
& PR_ATOMIC
) {
3524 m1
= so
->so_rcv
.sb_mb
;
3526 if (m1
->m_type
== MT_DATA
|| m1
->m_type
== MT_HEADER
||
3527 m1
->m_type
== MT_OOBDATA
)
3528 pkt_total
+= m1
->m_len
;
3533 optval
= so
->so_rcv
.sb_cc
- so
->so_rcv
.sb_ctl
;
3538 optval
= so
->so_snd
.sb_cc
;
3542 optval
= so
->so_error
;
3547 optval
= so
->so_snd
.sb_hiwat
;
3551 optval
= so
->so_rcv
.sb_hiwat
;
3555 optval
= so
->so_snd
.sb_lowat
;
3559 optval
= so
->so_rcv
.sb_lowat
;
3564 tv
= (sopt
->sopt_name
== SO_SNDTIMEO
?
3565 so
->so_snd
.sb_timeo
: so
->so_rcv
.sb_timeo
);
3567 error
= sooptcopyout_timeval(sopt
, &tv
);
3571 optval
= (so
->so_flags
& SOF_NOSIGPIPE
);
3575 optval
= (so
->so_flags
& SOF_NOADDRAVAIL
);
3578 case SO_REUSESHAREUID
:
3579 optval
= (so
->so_flags
& SOF_REUSESHAREUID
);
3582 #ifdef __APPLE_API_PRIVATE
3583 case SO_NOTIFYCONFLICT
:
3584 optval
= (so
->so_flags
& SOF_NOTIFYCONFLICT
);
3587 case SO_RESTRICTIONS
:
3588 optval
= so
->so_restrictions
& (SO_RESTRICT_DENYIN
|
3589 SO_RESTRICT_DENYOUT
| SO_RESTRICT_DENYSET
);
3593 #if CONFIG_MACF_SOCKET
3594 if ((error
= sooptcopyin(sopt
, &extmac
, sizeof (extmac
),
3595 sizeof (extmac
))) != 0 ||
3596 (error
= mac_socket_label_get(proc_ucred(
3597 sopt
->sopt_p
), so
, &extmac
)) != 0)
3600 error
= sooptcopyout(sopt
, &extmac
, sizeof (extmac
));
3603 #endif /* MAC_SOCKET */
3607 #if CONFIG_MACF_SOCKET
3608 if ((error
= sooptcopyin(sopt
, &extmac
, sizeof (extmac
),
3609 sizeof (extmac
))) != 0 ||
3610 (error
= mac_socketpeer_label_get(proc_ucred(
3611 sopt
->sopt_p
), so
, &extmac
)) != 0)
3614 error
= sooptcopyout(sopt
, &extmac
, sizeof (extmac
));
3617 #endif /* MAC_SOCKET */
3620 #ifdef __APPLE_API_PRIVATE
3621 case SO_UPCALLCLOSEWAIT
:
3622 optval
= (so
->so_flags
& SOF_UPCALLCLOSEWAIT
);
3626 optval
= (so
->so_flags
& SOF_BINDRANDOMPORT
);
3629 case SO_NP_EXTENSIONS
: {
3630 struct so_np_extensions sonpx
;
3632 sonpx
.npx_flags
= (so
->so_flags
& SOF_NPX_SETOPTSHUT
) ? SONPX_SETOPTSHUT
: 0;
3633 sonpx
.npx_mask
= SONPX_MASK_VALID
;
3635 error
= sooptcopyout(sopt
, &sonpx
, sizeof(struct so_np_extensions
));
3639 case SO_TRAFFIC_CLASS
:
3640 optval
= so
->so_traffic_class
;
3643 case SO_RECV_TRAFFIC_CLASS
:
3644 optval
= (so
->so_flags
& SOF_RECV_TRAFFIC_CLASS
);
3647 case SO_TRAFFIC_CLASS_STATS
:
3648 error
= sooptcopyout(sopt
, &so
->so_tc_stats
, sizeof(so
->so_tc_stats
));
3651 case SO_TRAFFIC_CLASS_DBG
:
3652 error
= sogetopt_tcdbg(so
, sopt
);
3655 case SO_PRIVILEGED_TRAFFIC_CLASS
:
3656 optval
= (so
->so_flags
& SOF_PRIVILEGED_TRAFFIC_CLASS
);
3660 optval
= !(so
->so_flags
& SOF_NODEFUNCT
);
3664 optval
= (so
->so_flags
& SOF_DEFUNCT
);
3667 case SO_OPPORTUNISTIC
:
3668 optval
= so_get_opportunistic(so
);
3672 /* This option is not gettable */
3677 optval
= so_get_recv_anyif(so
);
3681 error
= ENOPROTOOPT
;
3684 socket_unlock(so
, 1);
3688 /* The size limits on our soopt_getm is different from that on FreeBSD.
3689 * We limit the size of options to MCLBYTES. This will have to change
3690 * if we need to define options that need more space than MCLBYTES.
3693 soopt_getm(struct sockopt
*sopt
, struct mbuf
**mp
)
3695 struct mbuf
*m
, *m_prev
;
3696 int sopt_size
= sopt
->sopt_valsize
;
3699 if (sopt_size
<= 0 || sopt_size
> MCLBYTES
)
3702 how
= sopt
->sopt_p
!= kernproc
? M_WAIT
: M_DONTWAIT
;
3703 MGET(m
, how
, MT_DATA
);
3706 if (sopt_size
> MLEN
) {
3708 if ((m
->m_flags
& M_EXT
) == 0) {
3712 m
->m_len
= min(MCLBYTES
, sopt_size
);
3714 m
->m_len
= min(MLEN
, sopt_size
);
3716 sopt_size
-= m
->m_len
;
3720 while (sopt_size
> 0) {
3721 MGET(m
, how
, MT_DATA
);
3726 if (sopt_size
> MLEN
) {
3728 if ((m
->m_flags
& M_EXT
) == 0) {
3733 m
->m_len
= min(MCLBYTES
, sopt_size
);
3735 m
->m_len
= min(MLEN
, sopt_size
);
3737 sopt_size
-= m
->m_len
;
3744 /* copyin sopt data into mbuf chain */
3746 soopt_mcopyin(struct sockopt
*sopt
, struct mbuf
*m
)
3748 struct mbuf
*m0
= m
;
3750 if (sopt
->sopt_val
== USER_ADDR_NULL
)
3752 while (m
!= NULL
&& sopt
->sopt_valsize
>= m
->m_len
) {
3753 if (sopt
->sopt_p
!= kernproc
) {
3756 error
= copyin(sopt
->sopt_val
, mtod(m
, char *),
3763 bcopy(CAST_DOWN(caddr_t
, sopt
->sopt_val
),
3764 mtod(m
, char *), m
->m_len
);
3766 sopt
->sopt_valsize
-= m
->m_len
;
3767 sopt
->sopt_val
+= m
->m_len
;
3770 if (m
!= NULL
) /* should be allocated enoughly at ip6_sooptmcopyin() */
3771 panic("soopt_mcopyin");
3775 /* copyout mbuf chain data into soopt */
3777 soopt_mcopyout(struct sockopt
*sopt
, struct mbuf
*m
)
3779 struct mbuf
*m0
= m
;
3782 if (sopt
->sopt_val
== USER_ADDR_NULL
)
3784 while (m
!= NULL
&& sopt
->sopt_valsize
>= m
->m_len
) {
3785 if (sopt
->sopt_p
!= kernproc
) {
3788 error
= copyout(mtod(m
, char *), sopt
->sopt_val
,
3795 bcopy(mtod(m
, char *),
3796 CAST_DOWN(caddr_t
, sopt
->sopt_val
), m
->m_len
);
3798 sopt
->sopt_valsize
-= m
->m_len
;
3799 sopt
->sopt_val
+= m
->m_len
;
3800 valsize
+= m
->m_len
;
3804 /* enough soopt buffer should be given from user-land */
3808 sopt
->sopt_valsize
= valsize
;
3813 sohasoutofband(struct socket
*so
)
3816 if (so
->so_pgid
< 0)
3817 gsignal(-so
->so_pgid
, SIGURG
);
3818 else if (so
->so_pgid
> 0)
3819 proc_signal(so
->so_pgid
, SIGURG
);
3820 selwakeup(&so
->so_rcv
.sb_sel
);
3824 sopoll(struct socket
*so
, int events
, __unused kauth_cred_t cred
, void * wql
)
3826 struct proc
*p
= current_proc();
3831 if (events
& (POLLIN
| POLLRDNORM
))
3833 revents
|= events
& (POLLIN
| POLLRDNORM
);
3835 if (events
& (POLLOUT
| POLLWRNORM
))
3836 if (sowriteable(so
))
3837 revents
|= events
& (POLLOUT
| POLLWRNORM
);
3839 if (events
& (POLLPRI
| POLLRDBAND
))
3840 if (so
->so_oobmark
|| (so
->so_state
& SS_RCVATMARK
))
3841 revents
|= events
& (POLLPRI
| POLLRDBAND
);
3844 if (events
& (POLLIN
| POLLPRI
| POLLRDNORM
| POLLRDBAND
)) {
3846 * Darwin sets the flag first,
3847 * BSD calls selrecord first
3849 so
->so_rcv
.sb_flags
|= SB_SEL
;
3850 selrecord(p
, &so
->so_rcv
.sb_sel
, wql
);
3853 if (events
& (POLLOUT
| POLLWRNORM
)) {
3855 * Darwin sets the flag first,
3856 * BSD calls selrecord first
3858 so
->so_snd
.sb_flags
|= SB_SEL
;
3859 selrecord(p
, &so
->so_snd
.sb_sel
, wql
);
3863 socket_unlock(so
, 1);
3868 soo_kqfilter(__unused
struct fileproc
*fp
, struct knote
*kn
,
3869 __unused
struct proc
*p
)
3871 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
3876 #if CONFIG_MACF_SOCKET
3877 if (mac_socket_check_kqfilter(proc_ucred(p
), kn
, so
) != 0) {
3878 socket_unlock(so
, 1);
3881 #endif /* MAC_SOCKET */
3883 switch (kn
->kn_filter
) {
3885 kn
->kn_fop
= &soread_filtops
;
3886 skl
= &so
->so_rcv
.sb_sel
.si_note
;
3889 kn
->kn_fop
= &sowrite_filtops
;
3890 skl
= &so
->so_snd
.sb_sel
.si_note
;
3893 kn
->kn_fop
= &sock_filtops
;
3894 skl
= &so
->so_klist
;
3897 socket_unlock(so
, 1);
3901 if (KNOTE_ATTACH(skl
, kn
)) {
3902 switch(kn
->kn_filter
) {
3904 so
->so_rcv
.sb_flags
|= SB_KNOTE
;
3907 so
->so_snd
.sb_flags
|= SB_KNOTE
;
3910 so
->so_flags
|= SOF_KNOTE
;
3913 socket_unlock(so
, 1);
3917 socket_unlock(so
, 1);
3922 filt_sordetach(struct knote
*kn
)
3924 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
3927 if (so
->so_rcv
.sb_flags
& SB_KNOTE
)
3928 if (KNOTE_DETACH(&so
->so_rcv
.sb_sel
.si_note
, kn
))
3929 so
->so_rcv
.sb_flags
&= ~SB_KNOTE
;
3930 socket_unlock(so
, 1);
3935 filt_soread(struct knote
*kn
, long hint
)
3937 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
3939 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
3942 if (so
->so_options
& SO_ACCEPTCONN
) {
3945 /* Radar 6615193 handle the listen case dynamically
3946 * for kqueue read filter. This allows to call listen() after registering
3947 * the kqueue EVFILT_READ.
3950 kn
->kn_data
= so
->so_qlen
;
3951 isempty
= ! TAILQ_EMPTY(&so
->so_comp
);
3953 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
3954 socket_unlock(so
, 1);
3959 /* socket isn't a listener */
3961 kn
->kn_data
= so
->so_rcv
.sb_cc
- so
->so_rcv
.sb_ctl
;
3963 if (so
->so_oobmark
) {
3964 if (kn
->kn_flags
& EV_OOBAND
) {
3965 kn
->kn_data
-= so
->so_oobmark
;
3966 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
3967 socket_unlock(so
, 1);
3970 kn
->kn_data
= so
->so_oobmark
;
3971 kn
->kn_flags
|= EV_OOBAND
;
3973 if (so
->so_state
& SS_CANTRCVMORE
) {
3974 kn
->kn_flags
|= EV_EOF
;
3975 kn
->kn_fflags
= so
->so_error
;
3976 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
3977 socket_unlock(so
, 1);
3982 if (so
->so_state
& SS_RCVATMARK
) {
3983 if (kn
->kn_flags
& EV_OOBAND
) {
3984 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
3985 socket_unlock(so
, 1);
3988 kn
->kn_flags
|= EV_OOBAND
;
3989 } else if (kn
->kn_flags
& EV_OOBAND
) {
3991 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
3992 socket_unlock(so
, 1);
3996 if (so
->so_error
) { /* temporary udp error */
3997 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
3998 socket_unlock(so
, 1);
4002 int64_t lowwat
= so
->so_rcv
.sb_lowat
;
4003 if (kn
->kn_sfflags
& NOTE_LOWAT
)
4005 if (kn
->kn_sdata
> so
->so_rcv
.sb_hiwat
)
4006 lowwat
= so
->so_rcv
.sb_hiwat
;
4007 else if (kn
->kn_sdata
> lowwat
)
4008 lowwat
= kn
->kn_sdata
;
4011 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
4012 socket_unlock(so
, 1);
4014 return ((kn
->kn_flags
& EV_OOBAND
) || kn
->kn_data
>= lowwat
);
4018 filt_sowdetach(struct knote
*kn
)
4020 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
4023 if (so
->so_snd
.sb_flags
& SB_KNOTE
)
4024 if (KNOTE_DETACH(&so
->so_snd
.sb_sel
.si_note
, kn
))
4025 so
->so_snd
.sb_flags
&= ~SB_KNOTE
;
4026 socket_unlock(so
, 1);
4030 so_wait_for_if_feedback(struct socket
*so
)
4032 if ((so
->so_proto
->pr_domain
->dom_family
== AF_INET
||
4033 so
->so_proto
->pr_domain
->dom_family
== AF_INET6
) &&
4034 (so
->so_state
& SS_ISCONNECTED
)) {
4035 struct inpcb
*inp
= sotoinpcb(so
);
4036 if (INP_WAIT_FOR_IF_FEEDBACK(inp
))
4044 filt_sowrite(struct knote
*kn
, long hint
)
4046 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
4049 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
4052 kn
->kn_data
= sbspace(&so
->so_snd
);
4053 if (so
->so_state
& SS_CANTSENDMORE
) {
4054 kn
->kn_flags
|= EV_EOF
;
4055 kn
->kn_fflags
= so
->so_error
;
4059 if (so
->so_error
) { /* temporary udp error */
4063 if (((so
->so_state
& SS_ISCONNECTED
) == 0) &&
4064 (so
->so_proto
->pr_flags
& PR_CONNREQUIRED
)) {
4068 int64_t lowwat
= so
->so_snd
.sb_lowat
;
4069 if (kn
->kn_sfflags
& NOTE_LOWAT
)
4071 if (kn
->kn_sdata
> so
->so_snd
.sb_hiwat
)
4072 lowwat
= so
->so_snd
.sb_hiwat
;
4073 else if (kn
->kn_sdata
> lowwat
)
4074 lowwat
= kn
->kn_sdata
;
4076 if (kn
->kn_data
>= lowwat
) {
4077 if ((so
->so_flags
& SOF_NOTSENT_LOWAT
) != 0) {
4078 ret
= tcp_notsent_lowat_check(so
);
4083 if (so_wait_for_if_feedback(so
))
4086 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
4087 socket_unlock(so
, 1);
4092 filt_sockdetach(struct knote
*kn
)
4094 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
4097 if ((so
->so_flags
& SOF_KNOTE
) != 0)
4098 if (KNOTE_DETACH(&so
->so_klist
, kn
))
4099 so
->so_flags
&= ~SOF_KNOTE
;
4100 socket_unlock(so
, 1);
4104 filt_sockev(struct knote
*kn
, long hint
)
4106 int ret
= 0, locked
= 0;
4107 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
4109 if ((hint
& SO_FILT_HINT_LOCKED
) == 0) {
4114 switch (hint
& SO_FILT_HINT_EV
) {
4115 case SO_FILT_HINT_CONNRESET
:
4116 if (kn
->kn_sfflags
& NOTE_CONNRESET
)
4117 kn
->kn_fflags
|= NOTE_CONNRESET
;
4119 case SO_FILT_HINT_TIMEOUT
:
4120 if (kn
->kn_sfflags
& NOTE_TIMEOUT
)
4121 kn
->kn_fflags
|= NOTE_TIMEOUT
;
4123 case SO_FILT_HINT_NOSRCADDR
:
4124 if (kn
->kn_sfflags
& NOTE_NOSRCADDR
)
4125 kn
->kn_fflags
|= NOTE_NOSRCADDR
;
4127 case SO_FILT_HINT_IFDENIED
:
4128 if ((kn
->kn_sfflags
& NOTE_IFDENIED
))
4129 kn
->kn_fflags
|= NOTE_IFDENIED
;
4131 case SO_FILT_HINT_KEEPALIVE
:
4132 if (kn
->kn_sfflags
& NOTE_KEEPALIVE
)
4133 kn
->kn_fflags
|= NOTE_KEEPALIVE
;
4136 if ((kn
->kn_sfflags
& NOTE_READCLOSED
) &&
4137 (so
->so_state
& SS_CANTRCVMORE
))
4138 kn
->kn_fflags
|= NOTE_READCLOSED
;
4140 if ((kn
->kn_sfflags
& NOTE_WRITECLOSED
) &&
4141 (so
->so_state
& SS_CANTSENDMORE
))
4142 kn
->kn_fflags
|= NOTE_WRITECLOSED
;
4144 if ((kn
->kn_sfflags
& NOTE_SUSPEND
) &&
4145 ((hint
& SO_FILT_HINT_SUSPEND
) ||
4146 (so
->so_flags
& SOF_SUSPENDED
))) {
4148 ~(NOTE_SUSPEND
| NOTE_RESUME
);
4149 kn
->kn_fflags
|= NOTE_SUSPEND
;
4152 if ((kn
->kn_sfflags
& NOTE_RESUME
) &&
4153 ((hint
& SO_FILT_HINT_RESUME
) ||
4154 (so
->so_flags
& SOF_SUSPENDED
) == 0)) {
4156 ~(NOTE_SUSPEND
| NOTE_RESUME
);
4157 kn
->kn_fflags
|= NOTE_RESUME
;
4160 if (so
->so_error
!= 0) {
4162 kn
->kn_data
= so
->so_error
;
4163 kn
->kn_flags
|= EV_EOF
;
4165 get_sockev_state(so
, (u_int32_t
*)&(kn
->kn_data
));
4168 if (kn
->kn_fflags
!= 0)
4172 socket_unlock(so
, 1);
4178 get_sockev_state(struct socket
*so
, u_int32_t
*statep
) {
4179 u_int32_t state
= *(statep
);
4181 if (so
->so_state
& SS_ISCONNECTED
)
4182 state
|= SOCKEV_CONNECTED
;
4184 state
&= ~(SOCKEV_CONNECTED
);
4185 state
|= ((so
->so_state
& SS_ISDISCONNECTED
) ?
4186 SOCKEV_DISCONNECTED
: 0);
4191 #define SO_LOCK_HISTORY_STR_LEN (2 * SO_LCKDBG_MAX * (2 + (2 * sizeof(void *)) + 1) + 1)
4193 __private_extern__
const char * solockhistory_nr(struct socket
*so
)
4197 static char lock_history_str
[SO_LOCK_HISTORY_STR_LEN
];
4199 bzero(lock_history_str
, sizeof(lock_history_str
));
4200 for (i
= SO_LCKDBG_MAX
- 1; i
>= 0; i
--) {
4201 n
+= snprintf(lock_history_str
+ n
, SO_LOCK_HISTORY_STR_LEN
- n
, "%lx:%lx ",
4202 (uintptr_t) so
->lock_lr
[(so
->next_lock_lr
+ i
) % SO_LCKDBG_MAX
],
4203 (uintptr_t) so
->unlock_lr
[(so
->next_unlock_lr
+ i
) % SO_LCKDBG_MAX
]);
4205 return lock_history_str
;
4209 socket_lock(struct socket
*so
, int refcount
)
4214 lr_saved
= __builtin_return_address(0);
4216 if (so
->so_proto
->pr_lock
) {
4217 error
= (*so
->so_proto
->pr_lock
)(so
, refcount
, lr_saved
);
4219 #ifdef MORE_LOCKING_DEBUG
4220 lck_mtx_assert(so
->so_proto
->pr_domain
->dom_mtx
,
4221 LCK_MTX_ASSERT_NOTOWNED
);
4223 lck_mtx_lock(so
->so_proto
->pr_domain
->dom_mtx
);
4226 so
->lock_lr
[so
->next_lock_lr
] = lr_saved
;
4227 so
->next_lock_lr
= (so
->next_lock_lr
+1) % SO_LCKDBG_MAX
;
4234 socket_unlock(struct socket
*so
, int refcount
)
4238 lck_mtx_t
*mutex_held
;
4240 lr_saved
= __builtin_return_address(0);
4242 if (so
->so_proto
== NULL
)
4243 panic("socket_unlock null so_proto so=%p\n", so
);
4245 if (so
&& so
->so_proto
->pr_unlock
) {
4246 error
= (*so
->so_proto
->pr_unlock
)(so
, refcount
, lr_saved
);
4248 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
4249 #ifdef MORE_LOCKING_DEBUG
4250 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
4252 so
->unlock_lr
[so
->next_unlock_lr
] = lr_saved
;
4253 so
->next_unlock_lr
= (so
->next_unlock_lr
+1) % SO_LCKDBG_MAX
;
4256 if (so
->so_usecount
<= 0)
4257 panic("socket_unlock: bad refcount=%d so=%p (%d, %d, %d) lrh=%s",
4258 so
->so_usecount
, so
, so
->so_proto
->pr_domain
->dom_family
,
4259 so
->so_type
, so
->so_proto
->pr_protocol
,
4260 solockhistory_nr(so
));
4263 if (so
->so_usecount
== 0) {
4264 sofreelastref(so
, 1);
4267 lck_mtx_unlock(mutex_held
);
4273 /* Called with socket locked, will unlock socket */
4275 sofree(struct socket
*so
)
4278 lck_mtx_t
*mutex_held
;
4279 if (so
->so_proto
->pr_getlock
!= NULL
)
4280 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
4282 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
4283 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
4285 sofreelastref(so
, 0);
4289 soreference(struct socket
*so
)
4291 socket_lock(so
, 1); /* locks & take one reference on socket */
4292 socket_unlock(so
, 0); /* unlock only */
4296 sodereference(struct socket
*so
)
4299 socket_unlock(so
, 1);
4303 * Set or clear SOF_MULTIPAGES on the socket to enable or disable the
4304 * possibility of using jumbo clusters. Caller must ensure to hold
4308 somultipages(struct socket
*so
, boolean_t set
)
4311 so
->so_flags
|= SOF_MULTIPAGES
;
4313 so
->so_flags
&= ~SOF_MULTIPAGES
;
4317 so_isdstlocal(struct socket
*so
) {
4319 struct inpcb
*inp
= (struct inpcb
*)so
->so_pcb
;
4321 if (so
->so_proto
->pr_domain
->dom_family
== AF_INET
) {
4322 return inaddr_local(inp
->inp_faddr
);
4323 } else if (so
->so_proto
->pr_domain
->dom_family
== AF_INET6
) {
4324 return in6addr_local(&inp
->in6p_faddr
);
4330 sosetdefunct(struct proc
*p
, struct socket
*so
, int level
, boolean_t noforce
)
4332 int err
= 0, defunct
;
4334 defunct
= (so
->so_flags
& SOF_DEFUNCT
);
4336 if (!(so
->so_snd
.sb_flags
& so
->so_rcv
.sb_flags
& SB_DROP
))
4337 panic("%s: SB_DROP not set", __func__
);
4341 if (so
->so_flags
& SOF_NODEFUNCT
) {
4344 SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so %p "
4345 "[%d,%d] is not eligible for defunct (%d)\n",
4346 __func__
, proc_selfpid(), proc_pid(p
), level
, so
,
4347 INP_SOCKAF(so
), INP_SOCKTYPE(so
), err
));
4350 so
->so_flags
&= ~SOF_NODEFUNCT
;
4351 SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so %p [%d,%d] "
4352 "defunct by force\n", __func__
, proc_selfpid(), proc_pid(p
),
4353 level
, so
, INP_SOCKAF(so
), INP_SOCKTYPE(so
)));
4356 so
->so_flags
|= SOF_DEFUNCT
;
4357 /* Prevent further data from being appended to the socket buffers */
4358 so
->so_snd
.sb_flags
|= SB_DROP
;
4359 so
->so_rcv
.sb_flags
|= SB_DROP
;
4362 SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so %p [%d,%d] %s "
4363 "defunct\n", __func__
, proc_selfpid(), proc_pid(p
), level
, so
,
4364 INP_SOCKAF(so
), INP_SOCKTYPE(so
),
4365 defunct
? "is already" : "marked as"));
4371 sodefunct(struct proc
*p
, struct socket
*so
, int level
)
4373 struct sockbuf
*rcv
, *snd
;
4375 if (!(so
->so_flags
& SOF_DEFUNCT
))
4376 panic("%s improperly called", __func__
);
4378 if (so
->so_state
& SS_DEFUNCT
)
4384 SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so %p [%d,%d] is now "
4385 "defunct [rcv_si 0x%x, snd_si 0x%x, rcv_fl 0x%x, snd_fl 0x%x]\n",
4386 __func__
, proc_selfpid(), proc_pid(p
), level
, so
,
4387 INP_SOCKAF(so
), INP_SOCKTYPE(so
),
4388 (uint32_t)rcv
->sb_sel
.si_flags
, (uint32_t)snd
->sb_sel
.si_flags
,
4389 (uint16_t)rcv
->sb_flags
, (uint16_t)snd
->sb_flags
));
4392 * Unwedge threads blocked on sbwait() and sb_lock().
4397 if (rcv
->sb_flags
& SB_LOCK
)
4399 if (snd
->sb_flags
& SB_LOCK
)
4403 * Flush the buffers and disconnect. We explicitly call shutdown
4404 * on both data directions to ensure that SS_CANT{RCV,SEND}MORE
4405 * states are set for the socket. This would also flush out data
4406 * hanging off the receive list of this socket.
4408 (void) soshutdownlock(so
, SHUT_RD
);
4409 (void) soshutdownlock(so
, SHUT_WR
);
4410 (void) sodisconnectlocked(so
);
4413 * Explicitly handle connectionless-protocol disconnection
4414 * and release any remaining data in the socket buffers.
4416 if (!(so
->so_flags
& SS_ISDISCONNECTED
))
4417 (void) soisdisconnected(so
);
4419 if (so
->so_error
== 0)
4420 so
->so_error
= EBADF
;
4422 if (rcv
->sb_cc
!= 0)
4424 if (snd
->sb_cc
!= 0)
4427 so
->so_state
|= SS_DEFUNCT
;
4433 __private_extern__
int
4434 so_set_recv_anyif(struct socket
*so
, int optval
)
4439 if (INP_SOCKAF(so
) == AF_INET
|| INP_SOCKAF(so
) == AF_INET6
) {
4441 if (INP_SOCKAF(so
) == AF_INET
) {
4444 sotoinpcb(so
)->inp_flags
|= INP_RECV_ANYIF
;
4446 sotoinpcb(so
)->inp_flags
&= ~INP_RECV_ANYIF
;
4448 ret
= EPROTONOSUPPORT
;
4454 __private_extern__
int
4455 so_get_recv_anyif(struct socket
*so
)
4460 if (INP_SOCKAF(so
) == AF_INET
|| INP_SOCKAF(so
) == AF_INET6
) {
4462 if (INP_SOCKAF(so
) == AF_INET
) {
4464 ret
= (sotoinpcb(so
)->inp_flags
& INP_RECV_ANYIF
) ? 1 : 0;