2 * Copyright (c) 1998-2011 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
30 * Copyright (c) 1982, 1986, 1988, 1990, 1993
31 * The Regents of the University of California. All rights reserved.
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. All advertising materials mentioning features or use of this software
42 * must display the following acknowledgement:
43 * This product includes software developed by the University of
44 * California, Berkeley and its contributors.
45 * 4. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
62 * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.16 2001/06/14 20:46:06 ume Exp $
65 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
66 * support for mandatory and extensible security protections. This notice
67 * is included in support of clause 2.2 (b) of the Apple Public License,
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/filedesc.h>
75 #include <sys/proc_internal.h>
76 #include <sys/kauth.h>
77 #include <sys/file_internal.h>
78 #include <sys/fcntl.h>
79 #include <sys/malloc.h>
81 #include <sys/domain.h>
82 #include <sys/kernel.h>
83 #include <sys/event.h>
85 #include <sys/protosw.h>
86 #include <sys/socket.h>
87 #include <sys/socketvar.h>
88 #include <sys/resourcevar.h>
89 #include <sys/signalvar.h>
90 #include <sys/sysctl.h>
93 #include <sys/kdebug.h>
96 #include <net/route.h>
97 #include <netinet/in.h>
98 #include <netinet/in_pcb.h>
99 #include <netinet/ip6.h>
100 #include <netinet6/ip6_var.h>
101 #include <kern/zalloc.h>
102 #include <kern/locks.h>
103 #include <machine/limits.h>
104 #include <libkern/OSAtomic.h>
105 #include <pexpert/pexpert.h>
106 #include <kern/assert.h>
107 #include <kern/task.h>
109 #include <sys/mcache.h>
112 #include <security/mac.h>
113 #include <security/mac_framework.h>
116 extern int in6_init_done
;
119 int so_cache_timeouts
= 0;
120 int so_cache_max_freed
= 0;
121 int cached_sock_count
= 0;
122 __private_extern__
int max_cached_sock_count
= MAX_CACHED_SOCKETS
;
123 struct socket
*socket_cache_head
= 0;
124 struct socket
*socket_cache_tail
= 0;
125 u_int32_t so_cache_time
= 0;
126 int so_cache_init_done
= 0;
127 struct zone
*so_cache_zone
;
129 static lck_grp_t
*so_cache_mtx_grp
;
130 static lck_attr_t
*so_cache_mtx_attr
;
131 static lck_grp_attr_t
*so_cache_mtx_grp_attr
;
132 lck_mtx_t
*so_cache_mtx
;
134 #include <machine/limits.h>
136 static void filt_sordetach(struct knote
*kn
);
137 static int filt_soread(struct knote
*kn
, long hint
);
138 static void filt_sowdetach(struct knote
*kn
);
139 static int filt_sowrite(struct knote
*kn
, long hint
);
142 sooptcopyin_timeval(struct sockopt
*sopt
, struct timeval
* tv_p
);
145 sooptcopyout_timeval(struct sockopt
*sopt
, const struct timeval
* tv_p
);
147 static struct filterops soread_filtops
= {
149 .f_detach
= filt_sordetach
,
150 .f_event
= filt_soread
,
152 static struct filterops sowrite_filtops
= {
154 .f_detach
= filt_sowdetach
,
155 .f_event
= filt_sowrite
,
158 #define EVEN_MORE_LOCKING_DEBUG 0
159 int socket_debug
= 0;
160 int socket_zone
= M_SOCKET
;
161 so_gen_t so_gencnt
; /* generation count for sockets */
163 MALLOC_DEFINE(M_SONAME
, "soname", "socket name");
164 MALLOC_DEFINE(M_PCB
, "pcb", "protocol control block");
166 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
167 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
168 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
169 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
170 #define DBG_FNC_SOSEND NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1)
171 #define DBG_FNC_SORECEIVE NETDBG_CODE(DBG_NETSOCK, (8 << 8))
172 #define DBG_FNC_SOSHUTDOWN NETDBG_CODE(DBG_NETSOCK, (9 << 8))
174 #define MAX_SOOPTGETM_SIZE (128 * MCLBYTES)
177 SYSCTL_DECL(_kern_ipc
);
179 int somaxconn
= SOMAXCONN
;
180 SYSCTL_INT(_kern_ipc
, KIPC_SOMAXCONN
, somaxconn
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &somaxconn
, 0, "");
182 /* Should we get a maximum also ??? */
183 static int sosendmaxchain
= 65536;
184 static int sosendminchain
= 16384;
185 static int sorecvmincopy
= 16384;
186 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sosendminchain
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &sosendminchain
,
188 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sorecvmincopy
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &sorecvmincopy
,
192 * Set to enable jumbo clusters (if available) for large writes when
193 * the socket is marked with SOF_MULTIPAGES; see below.
196 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sosendjcl
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &sosendjcl
, 0, "");
199 * Set this to ignore SOF_MULTIPAGES and use jumbo clusters for large
200 * writes on the socket for all protocols on any network interfaces,
201 * depending upon sosendjcl above. Be extra careful when setting this
202 * to 1, because sending down packets that cross physical pages down to
203 * broken drivers (those that falsely assume that the physical pages
204 * are contiguous) might lead to system panics or silent data corruption.
205 * When set to 0, the system will respect SOF_MULTIPAGES, which is set
206 * only for TCP sockets whose outgoing interface is IFNET_MULTIPAGES
207 * capable. Set this to 1 only for testing/debugging purposes.
209 int sosendjcl_ignore_capab
= 0;
210 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sosendjcl_ignore_capab
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
211 &sosendjcl_ignore_capab
, 0, "");
213 int sodefunctlog
= 0;
214 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sodefunctlog
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
215 &sodefunctlog
, 0, "");
218 * Socket operation routines.
219 * These routines are called by the routines in
220 * sys_socket.c or from a system process, and
221 * implement the semantics of socket operations by
222 * switching out to the protocol specific routines.
226 extern void postevent(struct socket
*, struct sockbuf
*, int);
227 extern void evsofree(struct socket
*);
229 /* TODO: these should be in header file */
230 extern int get_inpcb_str_size(void);
231 extern int get_tcp_str_size(void);
232 extern struct domain
*pffinddomain(int);
233 extern struct protosw
*pffindprotonotype(int, int);
234 extern int soclose_locked(struct socket
*);
235 extern int soo_kqfilter(struct fileproc
*, struct knote
*, struct proc
*);
238 extern int uthread_get_background_state(uthread_t
);
239 #endif /*CONFIG_EMBEDDED */
243 vm_size_t so_cache_zone_element_size
;
245 static int sodelayed_copy(struct socket
*, struct uio
*, struct mbuf
**, int *);
246 static void cached_sock_alloc(struct socket
**, int);
247 static void cached_sock_free(struct socket
*);
248 static void so_cache_timer(void *);
250 void soclose_wait_locked(struct socket
*so
);
251 int so_isdstlocal(struct socket
*so
);
253 __private_extern__ u_int32_t sotcdb
= 0;
254 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sotcdb
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
262 if (so_cache_init_done
) {
263 printf("socketinit: already called...\n");
267 PE_parse_boot_argn("socket_debug", &socket_debug
, sizeof (socket_debug
));
270 * allocate lock group attribute and group for socket cache mutex
272 so_cache_mtx_grp_attr
= lck_grp_attr_alloc_init();
274 so_cache_mtx_grp
= lck_grp_alloc_init("so_cache",
275 so_cache_mtx_grp_attr
);
278 * allocate the lock attribute for socket cache mutex
280 so_cache_mtx_attr
= lck_attr_alloc_init();
282 so_cache_init_done
= 1;
284 /* cached sockets mutex */
285 so_cache_mtx
= lck_mtx_alloc_init(so_cache_mtx_grp
, so_cache_mtx_attr
);
287 if (so_cache_mtx
== NULL
)
288 return; /* we're hosed... */
290 str_size
= (vm_size_t
)(sizeof (struct socket
) + 4 +
291 get_inpcb_str_size() + 4 + get_tcp_str_size());
293 so_cache_zone
= zinit(str_size
, 120000*str_size
, 8192, "socache zone");
294 zone_change(so_cache_zone
, Z_CALLERACCT
, FALSE
);
295 zone_change(so_cache_zone
, Z_NOENCRYPT
, TRUE
);
297 printf("cached_sock_alloc -- so_cache_zone size is %x\n", str_size
);
299 timeout(so_cache_timer
, NULL
, (SO_CACHE_FLUSH_INTERVAL
* hz
));
301 so_cache_zone_element_size
= str_size
;
305 VERIFY(SO_TC_MAX
== SO_TC_STATS_MAX
);
307 socket_tclass_init();
311 cached_sock_alloc(struct socket
**so
, int waitok
)
314 register uintptr_t offset
;
316 lck_mtx_lock(so_cache_mtx
);
318 if (cached_sock_count
) {
320 *so
= socket_cache_head
;
322 panic("cached_sock_alloc: cached sock is null");
324 socket_cache_head
= socket_cache_head
->cache_next
;
325 if (socket_cache_head
)
326 socket_cache_head
->cache_prev
= 0;
328 socket_cache_tail
= 0;
330 lck_mtx_unlock(so_cache_mtx
);
332 temp
= (*so
)->so_saved_pcb
;
333 bzero((caddr_t
)*so
, sizeof (struct socket
));
335 kprintf("cached_sock_alloc - retreiving cached sock %p - "
336 "count == %d\n", *so
, cached_sock_count
);
338 (*so
)->so_saved_pcb
= temp
;
339 (*so
)->cached_in_sock_layer
= 1;
342 kprintf("Allocating cached sock %p from memory\n", *so
);
345 lck_mtx_unlock(so_cache_mtx
);
348 *so
= (struct socket
*)zalloc(so_cache_zone
);
350 *so
= (struct socket
*)zalloc_noblock(so_cache_zone
);
355 bzero((caddr_t
)*so
, sizeof (struct socket
));
358 * Define offsets for extra structures into our single block of
359 * memory. Align extra structures on longword boundaries.
362 offset
= (uintptr_t) *so
;
363 offset
+= sizeof (struct socket
);
365 offset
= ALIGN(offset
);
367 (*so
)->so_saved_pcb
= (caddr_t
)offset
;
368 offset
+= get_inpcb_str_size();
370 offset
= ALIGN(offset
);
372 ((struct inpcb
*)(*so
)->so_saved_pcb
)->inp_saved_ppcb
=
375 kprintf("Allocating cached socket - %p, pcb=%p tcpcb=%p\n",
376 *so
, (*so
)->so_saved_pcb
,
377 ((struct inpcb
*)(*so
)->so_saved_pcb
)->inp_saved_ppcb
);
381 (*so
)->cached_in_sock_layer
= 1;
385 cached_sock_free(struct socket
*so
)
388 lck_mtx_lock(so_cache_mtx
);
390 if (++cached_sock_count
> max_cached_sock_count
) {
392 lck_mtx_unlock(so_cache_mtx
);
394 kprintf("Freeing overflowed cached socket %p\n", so
);
396 zfree(so_cache_zone
, so
);
399 kprintf("Freeing socket %p into cache\n", so
);
401 if (so_cache_hw
< cached_sock_count
)
402 so_cache_hw
= cached_sock_count
;
404 so
->cache_next
= socket_cache_head
;
406 if (socket_cache_head
)
407 socket_cache_head
->cache_prev
= so
;
409 socket_cache_tail
= so
;
411 so
->cache_timestamp
= so_cache_time
;
412 socket_cache_head
= so
;
413 lck_mtx_unlock(so_cache_mtx
);
417 kprintf("Freed cached sock %p into cache - count is %d\n",
418 so
, cached_sock_count
);
423 so_update_last_owner_locked(
428 self
= current_proc();
432 so
->last_upid
= proc_uniqueid(self
);
433 so
->last_pid
= proc_pid(self
);
438 so_cache_timer(__unused
void *dummy
)
440 register struct socket
*p
;
441 register int n_freed
= 0;
443 lck_mtx_lock(so_cache_mtx
);
447 while ((p
= socket_cache_tail
)) {
448 if ((so_cache_time
- p
->cache_timestamp
) < SO_CACHE_TIME_LIMIT
)
453 if ((socket_cache_tail
= p
->cache_prev
))
454 p
->cache_prev
->cache_next
= 0;
455 if (--cached_sock_count
== 0)
456 socket_cache_head
= 0;
458 zfree(so_cache_zone
, p
);
460 if (++n_freed
>= SO_CACHE_MAX_FREE_BATCH
) {
461 so_cache_max_freed
++;
465 lck_mtx_unlock(so_cache_mtx
);
467 timeout(so_cache_timer
, NULL
, (SO_CACHE_FLUSH_INTERVAL
* hz
));
469 #endif /* __APPLE__ */
472 * Get a socket structure from our zone, and initialize it.
473 * We don't implement `waitok' yet (see comments in uipc_domain.c).
474 * Note that it would probably be better to allocate socket
475 * and PCB at the same time, but I'm not convinced that all
476 * the protocols can be easily modified to do this.
479 soalloc(int waitok
, int dom
, int type
)
483 if ((dom
== PF_INET
) && (type
== SOCK_STREAM
)) {
484 cached_sock_alloc(&so
, waitok
);
486 MALLOC_ZONE(so
, struct socket
*, sizeof (*so
), socket_zone
,
489 bzero(so
, sizeof (*so
));
491 /* XXX race condition for reentrant kernel */
492 //###LD Atomic add for so_gencnt
494 so
->so_gencnt
= ++so_gencnt
;
495 so
->so_zone
= socket_zone
;
496 #if CONFIG_MACF_SOCKET
497 /* Convert waitok to M_WAITOK/M_NOWAIT for MAC Framework. */
498 if (mac_socket_label_init(so
, !waitok
) != 0) {
502 #endif /* MAC_SOCKET */
503 so_update_last_owner_locked(so
, NULL
);
515 * <pru_attach>:ENOBUFS[AF_UNIX]
516 * <pru_attach>:ENOBUFS[TCP]
517 * <pru_attach>:ENOMEM[TCP]
518 * <pru_attach>:EISCONN[TCP]
519 * <pru_attach>:??? [other protocol families, IPSEC]
522 socreate(int dom
, struct socket
**aso
, int type
, int proto
)
524 struct proc
*p
= current_proc();
525 register struct protosw
*prp
;
526 register struct socket
*so
;
527 register int error
= 0;
531 #endif /* CONFIG_EMBEDDED */
534 extern int tcpconsdebug
;
537 prp
= pffindproto(dom
, proto
, type
);
539 prp
= pffindtype(dom
, type
);
541 if (prp
== 0 || prp
->pr_usrreqs
->pru_attach
== 0) {
542 if (pffinddomain(dom
) == NULL
) {
543 return (EAFNOSUPPORT
);
546 if (pffindprotonotype(dom
, proto
) != NULL
) {
550 return (EPROTONOSUPPORT
);
552 if (prp
->pr_type
!= type
)
554 so
= soalloc(1, dom
, type
);
558 TAILQ_INIT(&so
->so_incomp
);
559 TAILQ_INIT(&so
->so_comp
);
562 so
->so_uid
= kauth_cred_getuid(kauth_cred_get());
563 so
->so_gid
= kauth_cred_getgid(kauth_cred_get());
564 if (!suser(kauth_cred_get(), NULL
))
565 so
->so_state
= SS_PRIV
;
569 so
->so_rcv
.sb_flags
|= SB_RECV
; /* XXX */
570 so
->so_rcv
.sb_so
= so
->so_snd
.sb_so
= so
;
572 so
->next_lock_lr
= 0;
573 so
->next_unlock_lr
= 0;
575 #if CONFIG_MACF_SOCKET
576 mac_socket_label_associate(kauth_cred_get(), so
);
577 #endif /* MAC_SOCKET */
579 //### Attachement will create the per pcb lock if necessary and increase refcount
581 * for creation, make sure it's done before
582 * socket is inserted in lists
586 error
= (*prp
->pr_usrreqs
->pru_attach
)(so
, proto
, p
);
590 * If so_pcb is not zero, the socket will be leaked,
591 * so protocol attachment handler must be coded carefuly
593 so
->so_state
|= SS_NOFDREF
;
595 sofreelastref(so
, 1); /* will deallocate the socket */
599 prp
->pr_domain
->dom_refs
++;
600 TAILQ_INIT(&so
->so_evlist
);
602 /* Attach socket filters for this protocol */
605 if (tcpconsdebug
== 2)
606 so
->so_options
|= SO_DEBUG
;
609 so_set_default_traffic_class(so
);
611 * If this is a background thread/task, mark the socket as such.
614 if (proc_get_self_isbackground() != 0)
615 #else /* !CONFIG_EMBEDDED */
616 thread
= current_thread();
617 ut
= get_bsdthread_info(thread
);
618 if (uthread_get_background_state(ut
))
619 #endif /* !CONFIG_EMBEDDED */
621 socket_set_traffic_mgt_flags(so
, TRAFFIC_MGT_SO_BACKGROUND
);
622 so
->so_background_thread
= current_thread();
627 * Don't mark Unix domain sockets as eligible for defunct by default.
630 so
->so_flags
|= SOF_NODEFUNCT
;
634 * Since v6 initialization is asynchronous and we can't hold
635 * up the main boot path, we need to at least hold off any
636 * sockets attempting to be created until the v6 stack is
640 if (in6_init_done
== 0)
653 * <pru_bind>:EINVAL Invalid argument [COMMON_START]
654 * <pru_bind>:EAFNOSUPPORT Address family not supported
655 * <pru_bind>:EADDRNOTAVAIL Address not available.
656 * <pru_bind>:EINVAL Invalid argument
657 * <pru_bind>:EAFNOSUPPORT Address family not supported [notdef]
658 * <pru_bind>:EACCES Permission denied
659 * <pru_bind>:EADDRINUSE Address in use
660 * <pru_bind>:EAGAIN Resource unavailable, try again
661 * <pru_bind>:EPERM Operation not permitted
665 * Notes: It's not possible to fully enumerate the return codes above,
666 * since socket filter authors and protocol family authors may
667 * not choose to limit their error returns to those listed, even
668 * though this may result in some software operating incorrectly.
670 * The error codes which are enumerated above are those known to
671 * be returned by the tcp_usr_bind function supplied.
674 sobind(struct socket
*so
, struct sockaddr
*nam
)
676 struct proc
*p
= current_proc();
681 so_update_last_owner_locked(so
, p
);
684 * If this is a bind request on a socket that has been marked
685 * as inactive, reject it now before we go any further.
687 if (so
->so_flags
& SOF_DEFUNCT
) {
689 SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] (%d)\n",
690 __func__
, proc_pid(p
), so
, INP_SOCKAF(so
), INP_SOCKTYPE(so
),
696 error
= sflt_bind(so
, nam
);
699 error
= (*so
->so_proto
->pr_usrreqs
->pru_bind
)(so
, nam
, p
);
701 socket_unlock(so
, 1);
703 if (error
== EJUSTRETURN
)
710 sodealloc(struct socket
*so
)
712 /* Remove any filters */
715 so
->so_gencnt
= ++so_gencnt
;
717 #if CONFIG_MACF_SOCKET
718 mac_socket_label_destroy(so
);
719 #endif /* MAC_SOCKET */
720 if (so
->cached_in_sock_layer
== 1) {
721 cached_sock_free(so
);
723 if (so
->cached_in_sock_layer
== -1)
724 panic("sodealloc: double dealloc: so=%p\n", so
);
725 so
->cached_in_sock_layer
= -1;
726 FREE_ZONE(so
, sizeof (*so
), so
->so_zone
);
734 * <pru_listen>:EINVAL[AF_UNIX]
735 * <pru_listen>:EINVAL[TCP]
736 * <pru_listen>:EADDRNOTAVAIL[TCP] Address not available.
737 * <pru_listen>:EINVAL[TCP] Invalid argument
738 * <pru_listen>:EAFNOSUPPORT[TCP] Address family not supported [notdef]
739 * <pru_listen>:EACCES[TCP] Permission denied
740 * <pru_listen>:EADDRINUSE[TCP] Address in use
741 * <pru_listen>:EAGAIN[TCP] Resource unavailable, try again
742 * <pru_listen>:EPERM[TCP] Operation not permitted
745 * Notes: Other <pru_listen> returns depend on the protocol family; all
746 * <sf_listen> returns depend on what the filter author causes
747 * their filter to return.
750 solisten(struct socket
*so
, int backlog
)
752 struct proc
*p
= current_proc();
757 so_update_last_owner_locked(so
, p
);
759 if (so
->so_proto
== NULL
) {
763 if ((so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) == 0) {
769 * If the listen request is made on a socket that is not fully
770 * disconnected, or on a socket that has been marked as inactive,
771 * reject the request now.
774 (SS_ISCONNECTED
|SS_ISCONNECTING
|SS_ISDISCONNECTING
)) ||
775 (so
->so_flags
& SOF_DEFUNCT
)) {
777 if (so
->so_flags
& SOF_DEFUNCT
) {
778 SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] (%d)\n",
779 __func__
, proc_pid(p
), so
, INP_SOCKAF(so
),
780 INP_SOCKTYPE(so
), error
));
785 if ((so
->so_restrictions
& SO_RESTRICT_DENYIN
) != 0) {
790 error
= sflt_listen(so
);
793 error
= (*so
->so_proto
->pr_usrreqs
->pru_listen
)(so
, p
);
797 if (error
== EJUSTRETURN
)
802 if (TAILQ_EMPTY(&so
->so_comp
))
803 so
->so_options
|= SO_ACCEPTCONN
;
805 * POSIX: The implementation may have an upper limit on the length of
806 * the listen queue-either global or per accepting socket. If backlog
807 * exceeds this limit, the length of the listen queue is set to the
810 * If listen() is called with a backlog argument value that is less
811 * than 0, the function behaves as if it had been called with a backlog
812 * argument value of 0.
814 * A backlog argument of 0 may allow the socket to accept connections,
815 * in which case the length of the listen queue may be set to an
816 * implementation-defined minimum value.
818 if (backlog
<= 0 || backlog
> somaxconn
)
821 so
->so_qlimit
= backlog
;
823 socket_unlock(so
, 1);
828 sofreelastref(struct socket
*so
, int dealloc
)
830 struct socket
*head
= so
->so_head
;
832 /* Assume socket is locked */
834 if ((!(so
->so_flags
& SOF_PCBCLEARING
)) ||
835 ((so
->so_state
& SS_NOFDREF
) == 0)) {
837 selthreadclear(&so
->so_snd
.sb_sel
);
838 selthreadclear(&so
->so_rcv
.sb_sel
);
839 so
->so_rcv
.sb_flags
&= ~SB_UPCALL
;
840 so
->so_snd
.sb_flags
&= ~SB_UPCALL
;
845 socket_lock(head
, 1);
846 if (so
->so_state
& SS_INCOMP
) {
847 TAILQ_REMOVE(&head
->so_incomp
, so
, so_list
);
849 } else if (so
->so_state
& SS_COMP
) {
851 * We must not decommission a socket that's
852 * on the accept(2) queue. If we do, then
853 * accept(2) may hang after select(2) indicated
854 * that the listening socket was ready.
857 selthreadclear(&so
->so_snd
.sb_sel
);
858 selthreadclear(&so
->so_rcv
.sb_sel
);
859 so
->so_rcv
.sb_flags
&= ~SB_UPCALL
;
860 so
->so_snd
.sb_flags
&= ~SB_UPCALL
;
862 socket_unlock(head
, 1);
865 panic("sofree: not queued");
868 so
->so_state
&= ~SS_INCOMP
;
870 socket_unlock(head
, 1);
873 selthreadclear(&so
->so_snd
.sb_sel
);
874 sbrelease(&so
->so_snd
);
878 /* 3932268: disable upcall */
879 so
->so_rcv
.sb_flags
&= ~SB_UPCALL
;
880 so
->so_snd
.sb_flags
&= ~SB_UPCALL
;
887 soclose_wait_locked(struct socket
*so
)
889 lck_mtx_t
*mutex_held
;
891 if (so
->so_proto
->pr_getlock
!= NULL
)
892 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
894 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
895 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
898 * Double check here and return if there's no outstanding upcall;
899 * otherwise proceed further only if SOF_UPCALLCLOSEWAIT is set.
901 if (!(so
->so_flags
& SOF_UPCALLINUSE
) ||
902 !(so
->so_flags
& SOF_UPCALLCLOSEWAIT
))
905 so
->so_flags
|= SOF_CLOSEWAIT
;
906 (void) msleep((caddr_t
)&so
->so_upcall
, mutex_held
, (PZERO
- 1),
907 "soclose_wait_locked", NULL
);
908 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
909 so
->so_flags
&= ~SOF_CLOSEWAIT
;
913 * Close a socket on last file table reference removal.
914 * Initiate disconnect if connected.
915 * Free socket when disconnect complete.
918 soclose_locked(struct socket
*so
)
921 lck_mtx_t
*mutex_held
;
924 if (so
->so_usecount
== 0) {
925 panic("soclose: so=%p refcount=0\n", so
);
928 sflt_notify(so
, sock_evt_closing
, NULL
);
930 if ((so
->so_options
& SO_ACCEPTCONN
)) {
931 struct socket
*sp
, *sonext
;
935 * We do not want new connection to be added
936 * to the connection queues
938 so
->so_options
&= ~SO_ACCEPTCONN
;
940 for (sp
= TAILQ_FIRST(&so
->so_incomp
); sp
!= NULL
; sp
= sonext
) {
941 sonext
= TAILQ_NEXT(sp
, so_list
);
944 * skip sockets thrown away by tcpdropdropblreq
945 * they will get cleanup by the garbage collection.
946 * otherwise, remove the incomp socket from the queue
947 * and let soabort trigger the appropriate cleanup.
949 if (sp
->so_flags
& SOF_OVERFLOW
)
952 if (so
->so_proto
->pr_getlock
!= NULL
) {
953 /* lock ordering for consistency with the rest of the stack,
954 * we lock the socket first and then grabb the head.
956 socket_unlock(so
, 0);
962 TAILQ_REMOVE(&so
->so_incomp
, sp
, so_list
);
965 if (sp
->so_state
& SS_INCOMP
) {
966 sp
->so_state
&= ~SS_INCOMP
;
973 socket_unlock(sp
, 1);
976 while ((sp
= TAILQ_FIRST(&so
->so_comp
)) != NULL
) {
977 /* Dequeue from so_comp since sofree() won't do it */
978 TAILQ_REMOVE(&so
->so_comp
, sp
, so_list
);
981 if (so
->so_proto
->pr_getlock
!= NULL
) {
982 socket_unlock(so
, 0);
986 if (sp
->so_state
& SS_COMP
) {
987 sp
->so_state
&= ~SS_COMP
;
993 if (so
->so_proto
->pr_getlock
!= NULL
) {
994 socket_unlock(sp
, 1);
999 if (so
->so_pcb
== 0) {
1000 /* 3915887: mark the socket as ready for dealloc */
1001 so
->so_flags
|= SOF_PCBCLEARING
;
1004 if (so
->so_state
& SS_ISCONNECTED
) {
1005 if ((so
->so_state
& SS_ISDISCONNECTING
) == 0) {
1006 error
= sodisconnectlocked(so
);
1010 if (so
->so_options
& SO_LINGER
) {
1011 if ((so
->so_state
& SS_ISDISCONNECTING
) &&
1012 (so
->so_state
& SS_NBIO
))
1014 if (so
->so_proto
->pr_getlock
!= NULL
)
1015 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
1017 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
1018 while (so
->so_state
& SS_ISCONNECTED
) {
1019 ts
.tv_sec
= (so
->so_linger
/100);
1020 ts
.tv_nsec
= (so
->so_linger
% 100) *
1021 NSEC_PER_USEC
* 1000 * 10;
1022 error
= msleep((caddr_t
)&so
->so_timeo
,
1023 mutex_held
, PSOCK
| PCATCH
, "soclose", &ts
);
1026 * It's OK when the time fires,
1027 * don't report an error
1029 if (error
== EWOULDBLOCK
)
1037 if (so
->so_usecount
== 0)
1038 panic("soclose: usecount is zero so=%p\n", so
);
1039 if (so
->so_pcb
&& !(so
->so_flags
& SOF_PCBCLEARING
)) {
1040 int error2
= (*so
->so_proto
->pr_usrreqs
->pru_detach
)(so
);
1044 if (so
->so_usecount
<= 0)
1045 panic("soclose: usecount is zero so=%p\n", so
);
1047 if (so
->so_pcb
&& so
->so_state
& SS_NOFDREF
)
1048 panic("soclose: NOFDREF");
1049 so
->so_state
|= SS_NOFDREF
;
1051 so
->so_proto
->pr_domain
->dom_refs
--;
1060 soclose(struct socket
*so
)
1065 if (so
->so_flags
& SOF_UPCALLINUSE
)
1066 soclose_wait_locked(so
);
1068 if (so
->so_retaincnt
== 0) {
1069 error
= soclose_locked(so
);
1072 * if the FD is going away, but socket is
1073 * retained in kernel remove its reference
1076 if (so
->so_usecount
< 2)
1077 panic("soclose: retaincnt non null and so=%p "
1078 "usecount=%d\n", so
, so
->so_usecount
);
1080 socket_unlock(so
, 1);
1085 * Must be called at splnet...
1087 /* Should already be locked */
1089 soabort(struct socket
*so
)
1093 #ifdef MORE_LOCKING_DEBUG
1094 lck_mtx_t
*mutex_held
;
1096 if (so
->so_proto
->pr_getlock
!= NULL
)
1097 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
1099 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
1100 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
1103 if ((so
->so_flags
& SOF_ABORTED
) == 0) {
1104 so
->so_flags
|= SOF_ABORTED
;
1105 error
= (*so
->so_proto
->pr_usrreqs
->pru_abort
)(so
);
1115 soacceptlock(struct socket
*so
, struct sockaddr
**nam
, int dolock
)
1122 if ((so
->so_state
& SS_NOFDREF
) == 0)
1123 panic("soaccept: !NOFDREF");
1124 so
->so_state
&= ~SS_NOFDREF
;
1125 error
= (*so
->so_proto
->pr_usrreqs
->pru_accept
)(so
, nam
);
1128 socket_unlock(so
, 1);
1133 soaccept(struct socket
*so
, struct sockaddr
**nam
)
1135 return (soacceptlock(so
, nam
, 1));
1139 soacceptfilter(struct socket
*so
)
1141 struct sockaddr
*local
= NULL
, *remote
= NULL
;
1143 struct socket
*head
= so
->so_head
;
1146 * Hold the lock even if this socket
1147 * has not been made visible to the filter(s).
1148 * For sockets with global locks, this protect against the
1149 * head or peer going away
1152 if (sogetaddr_locked(so
, &remote
, 1) != 0 ||
1153 sogetaddr_locked(so
, &local
, 0) != 0) {
1154 so
->so_state
&= ~(SS_NOFDREF
| SS_COMP
);
1156 socket_unlock(so
, 1);
1158 /* Out of resources; try it again next time */
1159 error
= ECONNABORTED
;
1163 error
= sflt_accept(head
, so
, local
, remote
);
1166 * If we get EJUSTRETURN from one of the filters, mark this socket
1167 * as inactive and return it anyway. This newly accepted socket
1168 * will be disconnected later before we hand it off to the caller.
1170 if (error
== EJUSTRETURN
) {
1172 (void) sosetdefunct(current_proc(), so
,
1173 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL
, FALSE
);
1178 * This may seem like a duplication to the above error
1179 * handling part when we return ECONNABORTED, except
1180 * the following is done while holding the lock since
1181 * the socket has been exposed to the filter(s) earlier.
1183 so
->so_state
&= ~(SS_NOFDREF
| SS_COMP
);
1185 socket_unlock(so
, 1);
1187 /* Propagate socket filter's error code to the caller */
1189 socket_unlock(so
, 1);
1192 /* Callee checks for NULL pointer */
1193 sock_freeaddr(remote
);
1194 sock_freeaddr(local
);
1199 * Returns: 0 Success
1200 * EOPNOTSUPP Operation not supported on socket
1201 * EISCONN Socket is connected
1202 * <pru_connect>:EADDRNOTAVAIL Address not available.
1203 * <pru_connect>:EINVAL Invalid argument
1204 * <pru_connect>:EAFNOSUPPORT Address family not supported [notdef]
1205 * <pru_connect>:EACCES Permission denied
1206 * <pru_connect>:EADDRINUSE Address in use
1207 * <pru_connect>:EAGAIN Resource unavailable, try again
1208 * <pru_connect>:EPERM Operation not permitted
1209 * <sf_connect_out>:??? [anything a filter writer might set]
1212 soconnectlock(struct socket
*so
, struct sockaddr
*nam
, int dolock
)
1215 struct proc
*p
= current_proc();
1220 so_update_last_owner_locked(so
, p
);
1223 * If this is a listening socket or if this is a previously-accepted
1224 * socket that has been marked as inactive, reject the connect request.
1226 if ((so
->so_options
& SO_ACCEPTCONN
) || (so
->so_flags
& SOF_DEFUNCT
)) {
1228 if (so
->so_flags
& SOF_DEFUNCT
) {
1229 SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] (%d)\n",
1230 __func__
, proc_pid(p
), so
, INP_SOCKAF(so
),
1231 INP_SOCKTYPE(so
), error
));
1234 socket_unlock(so
, 1);
1238 if ((so
->so_restrictions
& SO_RESTRICT_DENYOUT
) != 0) {
1240 socket_unlock(so
, 1);
1245 * If protocol is connection-based, can only connect once.
1246 * Otherwise, if connected, try to disconnect first.
1247 * This allows user to disconnect by connecting to, e.g.,
1250 if (so
->so_state
& (SS_ISCONNECTED
|SS_ISCONNECTING
) &&
1251 ((so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) ||
1252 (error
= sodisconnectlocked(so
)))) {
1256 * Run connect filter before calling protocol:
1257 * - non-blocking connect returns before completion;
1259 error
= sflt_connectout(so
, nam
);
1262 if (error
== EJUSTRETURN
)
1265 error
= (*so
->so_proto
->pr_usrreqs
->pru_connect
)(so
, nam
, p
);
1269 socket_unlock(so
, 1);
1274 soconnect(struct socket
*so
, struct sockaddr
*nam
)
1276 return (soconnectlock(so
, nam
, 1));
1280 * Returns: 0 Success
1281 * <pru_connect2>:EINVAL[AF_UNIX]
1282 * <pru_connect2>:EPROTOTYPE[AF_UNIX]
1283 * <pru_connect2>:??? [other protocol families]
1285 * Notes: <pru_connect2> is not supported by [TCP].
1288 soconnect2(struct socket
*so1
, struct socket
*so2
)
1292 socket_lock(so1
, 1);
1293 if (so2
->so_proto
->pr_lock
)
1294 socket_lock(so2
, 1);
1296 error
= (*so1
->so_proto
->pr_usrreqs
->pru_connect2
)(so1
, so2
);
1298 socket_unlock(so1
, 1);
1299 if (so2
->so_proto
->pr_lock
)
1300 socket_unlock(so2
, 1);
1305 sodisconnectlocked(struct socket
*so
)
1309 if ((so
->so_state
& SS_ISCONNECTED
) == 0) {
1313 if (so
->so_state
& SS_ISDISCONNECTING
) {
1318 error
= (*so
->so_proto
->pr_usrreqs
->pru_disconnect
)(so
);
1321 sflt_notify(so
, sock_evt_disconnected
, NULL
);
1327 /* Locking version */
1329 sodisconnect(struct socket
*so
)
1334 error
= sodisconnectlocked(so
);
1335 socket_unlock(so
, 1);
1339 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_DONTWAIT : M_WAIT)
1342 * sosendcheck will lock the socket buffer if it isn't locked and
1343 * verify that there is space for the data being inserted.
1345 * Returns: 0 Success
1347 * sblock:EWOULDBLOCK
1354 sosendcheck(struct socket
*so
, struct sockaddr
*addr
, int32_t resid
, int32_t clen
,
1355 int32_t atomic
, int flags
, int *sblocked
)
1362 if (*sblocked
== 0) {
1363 if ((so
->so_snd
.sb_flags
& SB_LOCK
) != 0 &&
1364 so
->so_send_filt_thread
!= 0 &&
1365 so
->so_send_filt_thread
== current_thread()) {
1367 * We're being called recursively from a filter,
1368 * allow this to continue. Radar 4150520.
1369 * Don't set sblocked because we don't want
1370 * to perform an unlock later.
1374 error
= sblock(&so
->so_snd
, SBLOCKWAIT(flags
));
1376 if (so
->so_flags
& SOF_DEFUNCT
)
1385 * If a send attempt is made on a socket that has been marked
1386 * as inactive (disconnected), reject the request.
1388 if (so
->so_flags
& SOF_DEFUNCT
) {
1391 SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] (%d)\n", __func__
,
1392 proc_selfpid(), so
, INP_SOCKAF(so
), INP_SOCKTYPE(so
),
1397 if (so
->so_state
& SS_CANTSENDMORE
)
1401 error
= so
->so_error
;
1406 if ((so
->so_state
& SS_ISCONNECTED
) == 0) {
1407 if ((so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) != 0) {
1408 if ((so
->so_state
& SS_ISCONFIRMING
) == 0 &&
1409 !(resid
== 0 && clen
!= 0))
1411 } else if (addr
== 0 && !(flags
&MSG_HOLD
)) {
1412 return ((so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) ?
1413 ENOTCONN
: EDESTADDRREQ
);
1416 space
= sbspace(&so
->so_snd
);
1417 if (flags
& MSG_OOB
)
1419 if ((atomic
&& resid
> so
->so_snd
.sb_hiwat
) ||
1420 clen
> so
->so_snd
.sb_hiwat
)
1422 if (space
< resid
+ clen
&&
1423 (atomic
|| space
< (int32_t)so
->so_snd
.sb_lowat
|| space
< clen
)) {
1424 if ((so
->so_state
& SS_NBIO
) || (flags
& MSG_NBIO
) ||
1426 return (EWOULDBLOCK
);
1428 sbunlock(&so
->so_snd
, 1);
1430 error
= sbwait(&so
->so_snd
);
1432 if (so
->so_flags
& SOF_DEFUNCT
)
1444 * If send must go all at once and message is larger than
1445 * send buffering, then hard error.
1446 * Lock against other senders.
1447 * If must go all at once and not enough room now, then
1448 * inform user that this would block and do nothing.
1449 * Otherwise, if nonblocking, send as much as possible.
1450 * The data to be sent is described by "uio" if nonzero,
1451 * otherwise by the mbuf chain "top" (which must be null
1452 * if uio is not). Data provided in mbuf chain must be small
1453 * enough to send all at once.
1455 * Returns nonzero on error, timeout or signal; callers
1456 * must check for short counts if EINTR/ERESTART are returned.
1457 * Data and control buffers are freed on return.
1459 * MSG_HOLD: go thru most of sosend(), but just enqueue the mbuf
1460 * MSG_SEND: go thru as for MSG_HOLD on current fragment, then
1461 * point at the mbuf chain being constructed and go from there.
1463 * Returns: 0 Success
1469 * sosendcheck:EWOULDBLOCK
1473 * sosendcheck:??? [value from so_error]
1474 * <pru_send>:ECONNRESET[TCP]
1475 * <pru_send>:EINVAL[TCP]
1476 * <pru_send>:ENOBUFS[TCP]
1477 * <pru_send>:EADDRINUSE[TCP]
1478 * <pru_send>:EADDRNOTAVAIL[TCP]
1479 * <pru_send>:EAFNOSUPPORT[TCP]
1480 * <pru_send>:EACCES[TCP]
1481 * <pru_send>:EAGAIN[TCP]
1482 * <pru_send>:EPERM[TCP]
1483 * <pru_send>:EMSGSIZE[TCP]
1484 * <pru_send>:EHOSTUNREACH[TCP]
1485 * <pru_send>:ENETUNREACH[TCP]
1486 * <pru_send>:ENETDOWN[TCP]
1487 * <pru_send>:ENOMEM[TCP]
1488 * <pru_send>:ENOBUFS[TCP]
1489 * <pru_send>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1490 * <pru_send>:EINVAL[AF_UNIX]
1491 * <pru_send>:EOPNOTSUPP[AF_UNIX]
1492 * <pru_send>:EPIPE[AF_UNIX]
1493 * <pru_send>:ENOTCONN[AF_UNIX]
1494 * <pru_send>:EISCONN[AF_UNIX]
1495 * <pru_send>:???[AF_UNIX] [whatever a filter author chooses]
1496 * <sf_data_out>:??? [whatever a filter author chooses]
1498 * Notes: Other <pru_send> returns depend on the protocol family; all
1499 * <sf_data_out> returns depend on what the filter author causes
1500 * their filter to return.
1503 sosend(struct socket
*so
, struct sockaddr
*addr
, struct uio
*uio
,
1504 struct mbuf
*top
, struct mbuf
*control
, int flags
)
1507 register struct mbuf
*m
, *freelist
= NULL
;
1508 register int32_t space
, len
, resid
;
1509 int clen
= 0, error
, dontroute
, mlen
, sendflags
;
1510 int atomic
= sosendallatonce(so
) || top
;
1512 struct proc
*p
= current_proc();
1515 // LP64todo - fix this!
1516 resid
= uio_resid(uio
);
1518 resid
= top
->m_pkthdr
.len
;
1520 KERNEL_DEBUG((DBG_FNC_SOSEND
| DBG_FUNC_START
), so
, resid
,
1521 so
->so_snd
.sb_cc
, so
->so_snd
.sb_lowat
, so
->so_snd
.sb_hiwat
);
1524 so_update_last_owner_locked(so
, p
);
1526 if (so
->so_type
!= SOCK_STREAM
&& (flags
& MSG_OOB
) != 0) {
1528 socket_unlock(so
, 1);
1533 * In theory resid should be unsigned.
1534 * However, space must be signed, as it might be less than 0
1535 * if we over-committed, and we must use a signed comparison
1536 * of space and resid. On the other hand, a negative resid
1537 * causes us to loop sending 0-length segments to the protocol.
1539 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
1540 * type sockets since that's an error.
1542 if (resid
< 0 || (so
->so_type
== SOCK_STREAM
&& (flags
& MSG_EOR
))) {
1544 socket_unlock(so
, 1);
1549 (flags
& MSG_DONTROUTE
) && (so
->so_options
& SO_DONTROUTE
) == 0 &&
1550 (so
->so_proto
->pr_flags
& PR_ATOMIC
);
1551 OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgsnd
);
1553 clen
= control
->m_len
;
1556 error
= sosendcheck(so
, addr
, resid
, clen
, atomic
, flags
,
1562 space
= sbspace(&so
->so_snd
) - clen
+ ((flags
& MSG_OOB
) ?
1568 * Data is prepackaged in "top".
1571 if (flags
& MSG_EOR
)
1572 top
->m_flags
|= M_EOR
;
1578 bytes_to_copy
= imin(resid
, space
);
1580 if (sosendminchain
> 0) {
1583 chainlength
= sosendmaxchain
;
1587 * Attempt to use larger than system page-size
1588 * clusters for large writes only if there is
1589 * a jumbo cluster pool and if the socket is
1590 * marked accordingly.
1592 jumbocl
= sosendjcl
&& njcl
> 0 &&
1593 ((so
->so_flags
& SOF_MULTIPAGES
) ||
1594 sosendjcl_ignore_capab
);
1596 socket_unlock(so
, 0);
1600 int hdrs_needed
= (top
== 0) ? 1 : 0;
1603 * try to maintain a local cache of mbuf
1604 * clusters needed to complete this
1605 * write the list is further limited to
1606 * the number that are currently needed
1607 * to fill the socket this mechanism
1608 * allows a large number of mbufs/
1609 * clusters to be grabbed under a single
1610 * mbuf lock... if we can't get any
1611 * clusters, than fall back to trying
1612 * for mbufs if we fail early (or
1613 * miscalcluate the number needed) make
1614 * sure to release any clusters we
1615 * haven't yet consumed.
1617 if (freelist
== NULL
&&
1618 bytes_to_copy
> MBIGCLBYTES
&&
1621 bytes_to_copy
/ M16KCLBYTES
;
1623 if ((bytes_to_copy
-
1624 (num_needed
* M16KCLBYTES
))
1629 m_getpackets_internal(
1630 (unsigned int *)&num_needed
,
1631 hdrs_needed
, M_WAIT
, 0,
1634 * Fall back to 4K cluster size
1635 * if allocation failed
1639 if (freelist
== NULL
&&
1640 bytes_to_copy
> MCLBYTES
) {
1642 bytes_to_copy
/ MBIGCLBYTES
;
1644 if ((bytes_to_copy
-
1645 (num_needed
* MBIGCLBYTES
)) >=
1650 m_getpackets_internal(
1651 (unsigned int *)&num_needed
,
1652 hdrs_needed
, M_WAIT
, 0,
1655 * Fall back to cluster size
1656 * if allocation failed
1660 if (freelist
== NULL
&&
1661 bytes_to_copy
> MINCLSIZE
) {
1663 bytes_to_copy
/ MCLBYTES
;
1665 if ((bytes_to_copy
-
1666 (num_needed
* MCLBYTES
)) >=
1671 m_getpackets_internal(
1672 (unsigned int *)&num_needed
,
1673 hdrs_needed
, M_WAIT
, 0,
1676 * Fall back to a single mbuf
1677 * if allocation failed
1681 if (freelist
== NULL
) {
1689 if (freelist
== NULL
) {
1695 * For datagram protocols,
1696 * leave room for protocol
1697 * headers in first mbuf.
1699 if (atomic
&& top
== 0 &&
1700 bytes_to_copy
< MHLEN
) {
1706 freelist
= m
->m_next
;
1709 if ((m
->m_flags
& M_EXT
))
1710 mlen
= m
->m_ext
.ext_size
;
1711 else if ((m
->m_flags
& M_PKTHDR
))
1713 MHLEN
- m_leadingspace(m
);
1716 len
= imin(mlen
, bytes_to_copy
);
1722 error
= uiomove(mtod(m
, caddr_t
),
1725 resid
= uio_resid(uio
);
1729 top
->m_pkthdr
.len
+= len
;
1734 if (flags
& MSG_EOR
)
1735 top
->m_flags
|= M_EOR
;
1738 bytes_to_copy
= min(resid
, space
);
1740 } while (space
> 0 &&
1741 (chainlength
< sosendmaxchain
|| atomic
||
1742 resid
< MINCLSIZE
));
1750 if (flags
& (MSG_HOLD
|MSG_SEND
)) {
1751 /* Enqueue for later, go away if HOLD */
1752 register struct mbuf
*mb1
;
1753 if (so
->so_temp
&& (flags
& MSG_FLUSH
)) {
1754 m_freem(so
->so_temp
);
1758 so
->so_tail
->m_next
= top
;
1765 if (flags
& MSG_HOLD
) {
1772 so
->so_options
|= SO_DONTROUTE
;
1774 /* Compute flags here, for pru_send and NKEs */
1775 sendflags
= (flags
& MSG_OOB
) ? PRUS_OOB
:
1777 * If the user set MSG_EOF, the protocol
1778 * understands this flag and nothing left to
1779 * send then use PRU_SEND_EOF instead of PRU_SEND.
1781 ((flags
& MSG_EOF
) &&
1782 (so
->so_proto
->pr_flags
& PR_IMPLOPCL
) &&
1785 /* If there is more to send set PRUS_MORETOCOME */
1786 (resid
> 0 && space
> 0) ? PRUS_MORETOCOME
: 0;
1789 * Socket filter processing
1791 error
= sflt_data_out(so
, addr
, &top
, &control
,
1792 (sendflags
& MSG_OOB
) ? sock_data_filt_flag_oob
: 0);
1794 if (error
== EJUSTRETURN
) {
1804 * End Socket filter processing
1807 error
= (*so
->so_proto
->pr_usrreqs
->pru_send
)
1808 (so
, sendflags
, top
, addr
, control
, p
);
1810 if (flags
& MSG_SEND
)
1814 so
->so_options
&= ~SO_DONTROUTE
;
1822 } while (resid
&& space
> 0);
1827 sbunlock(&so
->so_snd
, 0); /* will unlock socket */
1829 socket_unlock(so
, 1);
1836 m_freem_list(freelist
);
1838 KERNEL_DEBUG(DBG_FNC_SOSEND
| DBG_FUNC_END
, so
, resid
, so
->so_snd
.sb_cc
,
1845 * Implement receive operations on a socket.
1846 * We depend on the way that records are added to the sockbuf
1847 * by sbappend*. In particular, each record (mbufs linked through m_next)
1848 * must begin with an address if the protocol so specifies,
1849 * followed by an optional mbuf or mbufs containing ancillary data,
1850 * and then zero or more mbufs of data.
1851 * In order to avoid blocking network interrupts for the entire time here,
1852 * we splx() while doing the actual copy to user space.
1853 * Although the sockbuf is locked, new data may still be appended,
1854 * and thus we must maintain consistency of the sockbuf during that time.
1856 * The caller may receive the data as a single mbuf chain by supplying
1857 * an mbuf **mp0 for use in returning the chain. The uio is then used
1858 * only for the count in uio_resid.
1860 * Returns: 0 Success
1865 * sblock:EWOULDBLOCK
1869 * sodelayed_copy:EFAULT
1870 * <pru_rcvoob>:EINVAL[TCP]
1871 * <pru_rcvoob>:EWOULDBLOCK[TCP]
1873 * <pr_domain->dom_externalize>:EMSGSIZE[AF_UNIX]
1874 * <pr_domain->dom_externalize>:ENOBUFS[AF_UNIX]
1875 * <pr_domain->dom_externalize>:???
1877 * Notes: Additional return values from calls through <pru_rcvoob> and
1878 * <pr_domain->dom_externalize> depend on protocols other than
1879 * TCP or AF_UNIX, which are documented above.
1882 soreceive(struct socket
*so
, struct sockaddr
**psa
, struct uio
*uio
,
1883 struct mbuf
**mp0
, struct mbuf
**controlp
, int *flagsp
)
1885 register struct mbuf
*m
, **mp
, *ml
= NULL
;
1886 register int flags
, len
, error
, offset
;
1887 struct protosw
*pr
= so
->so_proto
;
1888 struct mbuf
*nextrecord
;
1890 int orig_resid
= uio_resid(uio
);
1891 struct mbuf
*free_list
;
1892 int delayed_copy_len
;
1895 struct proc
*p
= current_proc();
1897 // LP64todo - fix this!
1898 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_START
, so
, uio_resid(uio
),
1899 so
->so_rcv
.sb_cc
, so
->so_rcv
.sb_lowat
, so
->so_rcv
.sb_hiwat
);
1902 so_update_last_owner_locked(so
, p
);
1904 #ifdef MORE_LOCKING_DEBUG
1905 if (so
->so_usecount
== 1)
1906 panic("soreceive: so=%x no other reference on socket\n", so
);
1914 flags
= *flagsp
&~ MSG_EOR
;
1919 * If a recv attempt is made on a previously-accepted socket
1920 * that has been marked as inactive (disconnected), reject
1923 if (so
->so_flags
& SOF_DEFUNCT
) {
1924 struct sockbuf
*sb
= &so
->so_rcv
;
1927 SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] (%d)\n", __func__
,
1928 proc_pid(p
), so
, INP_SOCKAF(so
), INP_SOCKTYPE(so
), error
));
1930 * This socket should have been disconnected and flushed
1931 * prior to being returned from sodefunct(); there should
1932 * be no data on its receive list, so panic otherwise.
1934 if (so
->so_state
& SS_DEFUNCT
)
1935 sb_empty_assert(sb
, __func__
);
1936 socket_unlock(so
, 1);
1941 * When SO_WANTOOBFLAG is set we try to get out-of-band data
1942 * regardless of the flags argument. Here is the case were
1943 * out-of-band data is not inline.
1945 if ((flags
& MSG_OOB
) ||
1946 ((so
->so_options
& SO_WANTOOBFLAG
) != 0 &&
1947 (so
->so_options
& SO_OOBINLINE
) == 0 &&
1948 (so
->so_oobmark
|| (so
->so_state
& SS_RCVATMARK
)))) {
1949 m
= m_get(M_WAIT
, MT_DATA
);
1951 socket_unlock(so
, 1);
1952 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_END
,
1953 ENOBUFS
, 0, 0, 0, 0);
1956 error
= (*pr
->pr_usrreqs
->pru_rcvoob
)(so
, m
, flags
& MSG_PEEK
);
1959 socket_unlock(so
, 0);
1961 error
= uiomove(mtod(m
, caddr_t
),
1962 imin(uio_resid(uio
), m
->m_len
), uio
);
1964 } while (uio_resid(uio
) && error
== 0 && m
);
1970 if ((so
->so_options
& SO_WANTOOBFLAG
) != 0) {
1971 if (error
== EWOULDBLOCK
|| error
== EINVAL
) {
1973 * Let's try to get normal data:
1974 * EWOULDBLOCK: out-of-band data not
1975 * receive yet. EINVAL: out-of-band data
1980 } else if (error
== 0 && flagsp
) {
1984 socket_unlock(so
, 1);
1985 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_END
, error
,
1992 *mp
= (struct mbuf
*)0;
1993 if (so
->so_state
& SS_ISCONFIRMING
&& uio_resid(uio
))
1994 (*pr
->pr_usrreqs
->pru_rcvd
)(so
, 0);
1997 free_list
= (struct mbuf
*)0;
1998 delayed_copy_len
= 0;
2000 #ifdef MORE_LOCKING_DEBUG
2001 if (so
->so_usecount
<= 1)
2002 printf("soreceive: sblock so=%p ref=%d on socket\n",
2003 so
, so
->so_usecount
);
2006 * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE)
2007 * and if so just return to the caller. This could happen when
2008 * soreceive() is called by a socket upcall function during the
2009 * time the socket is freed. The socket buffer would have been
2010 * locked across the upcall, therefore we cannot put this thread
2011 * to sleep (else we will deadlock) or return EWOULDBLOCK (else
2012 * we may livelock), because the lock on the socket buffer will
2013 * only be released when the upcall routine returns to its caller.
2014 * Because the socket has been officially closed, there can be
2015 * no further read on it.
2017 if ((so
->so_state
& (SS_NOFDREF
| SS_CANTRCVMORE
)) ==
2018 (SS_NOFDREF
| SS_CANTRCVMORE
)) {
2019 socket_unlock(so
, 1);
2023 error
= sblock(&so
->so_rcv
, SBLOCKWAIT(flags
));
2025 socket_unlock(so
, 1);
2026 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_END
, error
,
2031 m
= so
->so_rcv
.sb_mb
;
2033 * If we have less data than requested, block awaiting more
2034 * (subject to any timeout) if:
2035 * 1. the current count is less than the low water mark, or
2036 * 2. MSG_WAITALL is set, and it is possible to do the entire
2037 * receive operation at once if we block (resid <= hiwat).
2038 * 3. MSG_DONTWAIT is not set
2039 * If MSG_WAITALL is set but resid is larger than the receive buffer,
2040 * we have to do the receive in sections, and thus risk returning
2041 * a short count if a timeout or signal occurs after we start.
2043 if (m
== 0 || (((flags
& MSG_DONTWAIT
) == 0 &&
2044 so
->so_rcv
.sb_cc
< uio_resid(uio
)) &&
2045 (so
->so_rcv
.sb_cc
< so
->so_rcv
.sb_lowat
||
2046 ((flags
& MSG_WAITALL
) && uio_resid(uio
) <= so
->so_rcv
.sb_hiwat
)) &&
2047 m
->m_nextpkt
== 0 && (pr
->pr_flags
& PR_ATOMIC
) == 0)) {
2049 * Panic if we notice inconsistencies in the socket's
2050 * receive list; both sb_mb and sb_cc should correctly
2051 * reflect the contents of the list, otherwise we may
2052 * end up with false positives during select() or poll()
2053 * which could put the application in a bad state.
2055 if (m
== NULL
&& so
->so_rcv
.sb_cc
!= 0)
2056 panic("soreceive corrupted so_rcv: m %p cc %u",
2057 m
, so
->so_rcv
.sb_cc
);
2062 error
= so
->so_error
;
2063 if ((flags
& MSG_PEEK
) == 0)
2067 if (so
->so_state
& SS_CANTRCVMORE
) {
2073 for (; m
; m
= m
->m_next
)
2074 if (m
->m_type
== MT_OOBDATA
|| (m
->m_flags
& M_EOR
)) {
2075 m
= so
->so_rcv
.sb_mb
;
2078 if ((so
->so_state
& (SS_ISCONNECTED
|SS_ISCONNECTING
)) == 0 &&
2079 (so
->so_proto
->pr_flags
& PR_CONNREQUIRED
)) {
2083 if (uio_resid(uio
) == 0)
2085 if ((so
->so_state
& SS_NBIO
) ||
2086 (flags
& (MSG_DONTWAIT
|MSG_NBIO
))) {
2087 error
= EWOULDBLOCK
;
2090 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive sbwait 1");
2091 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive sbwait 1");
2092 sbunlock(&so
->so_rcv
, 1);
2093 #if EVEN_MORE_LOCKING_DEBUG
2095 printf("Waiting for socket data\n");
2098 error
= sbwait(&so
->so_rcv
);
2099 #if EVEN_MORE_LOCKING_DEBUG
2101 printf("SORECEIVE - sbwait returned %d\n", error
);
2103 if (so
->so_usecount
< 1)
2104 panic("soreceive: after 2nd sblock so=%p ref=%d on "
2105 "socket\n", so
, so
->so_usecount
);
2107 socket_unlock(so
, 1);
2108 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_END
, error
,
2115 OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgrcv
);
2116 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 1");
2117 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 1");
2118 nextrecord
= m
->m_nextpkt
;
2119 if ((pr
->pr_flags
& PR_ADDR
) && m
->m_type
== MT_SONAME
) {
2120 KASSERT(m
->m_type
== MT_SONAME
, ("receive 1a"));
2121 #if CONFIG_MACF_SOCKET_SUBSET
2123 * Call the MAC framework for policy checking if we're in
2124 * the user process context and the socket isn't connected.
2126 if (p
!= kernproc
&& !(so
->so_state
& SS_ISCONNECTED
)) {
2127 struct mbuf
*m0
= m
;
2129 * Dequeue this record (temporarily) from the receive
2130 * list since we're about to drop the socket's lock
2131 * where a new record may arrive and be appended to
2132 * the list. Upon MAC policy failure, the record
2133 * will be freed. Otherwise, we'll add it back to
2134 * the head of the list. We cannot rely on SB_LOCK
2135 * because append operation uses the socket's lock.
2138 m
->m_nextpkt
= NULL
;
2139 sbfree(&so
->so_rcv
, m
);
2141 } while (m
!= NULL
);
2143 so
->so_rcv
.sb_mb
= nextrecord
;
2144 SB_EMPTY_FIXUP(&so
->so_rcv
);
2145 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 1a");
2146 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 1a");
2147 socket_unlock(so
, 0);
2148 if (mac_socket_check_received(proc_ucred(p
), so
,
2149 mtod(m
, struct sockaddr
*)) != 0) {
2151 * MAC policy failure; free this record and
2152 * process the next record (or block until
2153 * one is available). We have adjusted sb_cc
2154 * and sb_mbcnt above so there is no need to
2155 * call sbfree() again.
2159 } while (m
!= NULL
);
2161 * Clear SB_LOCK but don't unlock the socket.
2162 * Process the next record or wait for one.
2165 sbunlock(&so
->so_rcv
, 1);
2170 * If the socket has been defunct'd, drop it.
2172 if (so
->so_flags
& SOF_DEFUNCT
) {
2178 * Re-adjust the socket receive list and re-enqueue
2179 * the record in front of any packets which may have
2180 * been appended while we dropped the lock.
2182 for (m
= m0
; m
->m_next
!= NULL
; m
= m
->m_next
)
2183 sballoc(&so
->so_rcv
, m
);
2184 sballoc(&so
->so_rcv
, m
);
2185 if (so
->so_rcv
.sb_mb
== NULL
) {
2186 so
->so_rcv
.sb_lastrecord
= m0
;
2187 so
->so_rcv
.sb_mbtail
= m
;
2190 nextrecord
= m
->m_nextpkt
= so
->so_rcv
.sb_mb
;
2191 so
->so_rcv
.sb_mb
= m
;
2192 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 1b");
2193 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 1b");
2195 #endif /* CONFIG_MACF_SOCKET_SUBSET */
2198 *psa
= dup_sockaddr(mtod(m
, struct sockaddr
*),
2200 if ((*psa
== 0) && (flags
& MSG_NEEDSA
)) {
2201 error
= EWOULDBLOCK
;
2205 if (flags
& MSG_PEEK
) {
2208 sbfree(&so
->so_rcv
, m
);
2209 if (m
->m_next
== 0 && so
->so_rcv
.sb_cc
!= 0)
2210 panic("soreceive: about to create invalid "
2212 MFREE(m
, so
->so_rcv
.sb_mb
);
2213 m
= so
->so_rcv
.sb_mb
;
2215 m
->m_nextpkt
= nextrecord
;
2217 so
->so_rcv
.sb_mb
= nextrecord
;
2218 SB_EMPTY_FIXUP(&so
->so_rcv
);
2224 * Process one or more MT_CONTROL mbufs present before any data mbufs
2225 * in the first mbuf chain on the socket buffer. If MSG_PEEK, we
2226 * just copy the data; if !MSG_PEEK, we call into the protocol to
2227 * perform externalization.
2229 if (m
!= NULL
&& m
->m_type
== MT_CONTROL
) {
2230 struct mbuf
*cm
= NULL
, *cmn
;
2231 struct mbuf
**cme
= &cm
;
2232 struct sockbuf
*sb_rcv
= &so
->so_rcv
;
2233 struct mbuf
**msgpcm
= NULL
;
2236 * Externalizing the control messages would require us to
2237 * drop the socket's lock below. Once we re-acquire the
2238 * lock, the mbuf chain might change. In order to preserve
2239 * consistency, we unlink all control messages from the
2240 * first mbuf chain in one shot and link them separately
2241 * onto a different chain.
2244 if (flags
& MSG_PEEK
) {
2245 if (controlp
!= NULL
) {
2246 if (*controlp
== NULL
) {
2249 *controlp
= m_copy(m
, 0, m
->m_len
);
2251 /* If we failed to allocate an mbuf,
2252 * release any previously allocated
2253 * mbufs for control data. Return
2254 * an error. Keep the mbufs in the
2255 * socket as this is using
2258 if (*controlp
== NULL
) {
2263 controlp
= &(*controlp
)->m_next
;
2267 m
->m_nextpkt
= NULL
;
2269 sb_rcv
->sb_mb
= m
->m_next
;
2272 cme
= &(*cme
)->m_next
;
2275 } while (m
!= NULL
&& m
->m_type
== MT_CONTROL
);
2277 if (!(flags
& MSG_PEEK
)) {
2278 if (sb_rcv
->sb_mb
!= NULL
) {
2279 sb_rcv
->sb_mb
->m_nextpkt
= nextrecord
;
2281 sb_rcv
->sb_mb
= nextrecord
;
2282 SB_EMPTY_FIXUP(sb_rcv
);
2284 if (nextrecord
== NULL
)
2285 sb_rcv
->sb_lastrecord
= m
;
2288 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive ctl");
2289 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive ctl");
2291 while (cm
!= NULL
) {
2296 cmsg_type
= mtod(cm
, struct cmsghdr
*)->cmsg_type
;
2299 * Call the protocol to externalize SCM_RIGHTS message
2300 * and return the modified message to the caller upon
2301 * success. Otherwise, all other control messages are
2302 * returned unmodified to the caller. Note that we
2303 * only get into this loop if MSG_PEEK is not set.
2305 if (pr
->pr_domain
->dom_externalize
!= NULL
&&
2306 cmsg_type
== SCM_RIGHTS
) {
2308 * Release socket lock: see 3903171. This
2309 * would also allow more records to be appended
2310 * to the socket buffer. We still have SB_LOCK
2311 * set on it, so we can be sure that the head
2312 * of the mbuf chain won't change.
2314 socket_unlock(so
, 0);
2315 error
= (*pr
->pr_domain
->dom_externalize
)(cm
);
2321 if (controlp
!= NULL
&& error
== 0) {
2323 controlp
= &(*controlp
)->m_next
;
2331 if (sb_rcv
->sb_mb
!= NULL
)
2332 nextrecord
= sb_rcv
->sb_mb
->m_nextpkt
;
2338 if (!(flags
& MSG_PEEK
)) {
2340 * We get here because m points to an mbuf following
2341 * any MT_SONAME or MT_CONTROL mbufs which have been
2342 * processed above. In any case, m should be pointing
2343 * to the head of the mbuf chain, and the nextrecord
2344 * should be either NULL or equal to m->m_nextpkt.
2345 * See comments above about SB_LOCK.
2347 if (m
!= so
->so_rcv
.sb_mb
|| m
->m_nextpkt
!= nextrecord
)
2348 panic("soreceive: post-control !sync so=%p "
2349 "m=%p nextrecord=%p\n", so
, m
, nextrecord
);
2351 if (nextrecord
== NULL
)
2352 so
->so_rcv
.sb_lastrecord
= m
;
2355 if (type
== MT_OOBDATA
)
2358 if (!(flags
& MSG_PEEK
)) {
2359 so
->so_rcv
.sb_mb
= nextrecord
;
2360 SB_EMPTY_FIXUP(&so
->so_rcv
);
2363 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 2");
2364 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 2");
2369 if (!(flags
& MSG_PEEK
) && uio_resid(uio
) > sorecvmincopy
)
2376 while (m
&& (uio_resid(uio
) - delayed_copy_len
) > 0 && error
== 0) {
2377 if (m
->m_type
== MT_OOBDATA
) {
2378 if (type
!= MT_OOBDATA
)
2380 } else if (type
== MT_OOBDATA
) {
2384 * Make sure to allways set MSG_OOB event when getting
2385 * out of band data inline.
2387 if ((so
->so_options
& SO_WANTOOBFLAG
) != 0 &&
2388 (so
->so_options
& SO_OOBINLINE
) != 0 &&
2389 (so
->so_state
& SS_RCVATMARK
) != 0) {
2392 so
->so_state
&= ~SS_RCVATMARK
;
2393 len
= uio_resid(uio
) - delayed_copy_len
;
2394 if (so
->so_oobmark
&& len
> so
->so_oobmark
- offset
)
2395 len
= so
->so_oobmark
- offset
;
2396 if (len
> m
->m_len
- moff
)
2397 len
= m
->m_len
- moff
;
2399 * If mp is set, just pass back the mbufs.
2400 * Otherwise copy them out via the uio, then free.
2401 * Sockbuf must be consistent here (points to current mbuf,
2402 * it points to next record) when we drop priority;
2403 * we must note any additions to the sockbuf when we
2404 * block interrupts again.
2407 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive uiomove");
2408 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive uiomove");
2409 if (can_delay
&& len
== m
->m_len
) {
2411 * only delay the copy if we're consuming the
2412 * mbuf and we're NOT in MSG_PEEK mode
2413 * and we have enough data to make it worthwile
2414 * to drop and retake the lock... can_delay
2415 * reflects the state of the 2 latter
2416 * constraints moff should always be zero
2419 delayed_copy_len
+= len
;
2421 if (delayed_copy_len
) {
2422 error
= sodelayed_copy(so
, uio
,
2423 &free_list
, &delayed_copy_len
);
2429 * can only get here if MSG_PEEK is not
2430 * set therefore, m should point at the
2431 * head of the rcv queue; if it doesn't,
2432 * it means something drastically
2433 * changed while we were out from behind
2434 * the lock in sodelayed_copy. perhaps
2435 * a RST on the stream. in any event,
2436 * the stream has been interrupted. it's
2437 * probably best just to return whatever
2438 * data we've moved and let the caller
2441 if (m
!= so
->so_rcv
.sb_mb
) {
2445 socket_unlock(so
, 0);
2446 error
= uiomove(mtod(m
, caddr_t
) + moff
,
2454 uio_setresid(uio
, (uio_resid(uio
) - len
));
2456 if (len
== m
->m_len
- moff
) {
2457 if (m
->m_flags
& M_EOR
)
2459 if (flags
& MSG_PEEK
) {
2463 nextrecord
= m
->m_nextpkt
;
2464 sbfree(&so
->so_rcv
, m
);
2465 m
->m_nextpkt
= NULL
;
2470 so
->so_rcv
.sb_mb
= m
= m
->m_next
;
2471 *mp
= (struct mbuf
*)0;
2473 if (free_list
== NULL
)
2478 so
->so_rcv
.sb_mb
= m
= m
->m_next
;
2482 m
->m_nextpkt
= nextrecord
;
2483 if (nextrecord
== NULL
)
2484 so
->so_rcv
.sb_lastrecord
= m
;
2486 so
->so_rcv
.sb_mb
= nextrecord
;
2487 SB_EMPTY_FIXUP(&so
->so_rcv
);
2489 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 3");
2490 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 3");
2493 if (flags
& MSG_PEEK
) {
2499 if (flags
& MSG_DONTWAIT
)
2500 copy_flag
= M_DONTWAIT
;
2503 *mp
= m_copym(m
, 0, len
, copy_flag
);
2506 * Failed to allocate an mbuf.
2507 * Adjust uio_resid back, it was
2508 * adjusted down by len bytes which
2509 * we didn't copy over
2511 uio_setresid(uio
, (uio_resid(uio
) + len
));
2517 so
->so_rcv
.sb_cc
-= len
;
2520 if (so
->so_oobmark
) {
2521 if ((flags
& MSG_PEEK
) == 0) {
2522 so
->so_oobmark
-= len
;
2523 if (so
->so_oobmark
== 0) {
2524 so
->so_state
|= SS_RCVATMARK
;
2526 * delay posting the actual event until
2527 * after any delayed copy processing
2535 if (offset
== so
->so_oobmark
)
2539 if (flags
& MSG_EOR
)
2542 * If the MSG_WAITALL or MSG_WAITSTREAM flag is set
2543 * (for non-atomic socket), we must not quit until
2544 * "uio->uio_resid == 0" or an error termination.
2545 * If a signal/timeout occurs, return with a short
2546 * count but without error. Keep sockbuf locked
2547 * against other readers.
2549 while (flags
& (MSG_WAITALL
|MSG_WAITSTREAM
) && m
== 0 &&
2550 (uio_resid(uio
) - delayed_copy_len
) > 0 &&
2551 !sosendallatonce(so
) && !nextrecord
) {
2552 if (so
->so_error
|| so
->so_state
& SS_CANTRCVMORE
)
2556 * Depending on the protocol (e.g. TCP), the following
2557 * might cause the socket lock to be dropped and later
2558 * be reacquired, and more data could have arrived and
2559 * have been appended to the receive socket buffer by
2560 * the time it returns. Therefore, we only sleep in
2561 * sbwait() below if and only if the socket buffer is
2562 * empty, in order to avoid a false sleep.
2564 if (pr
->pr_flags
& PR_WANTRCVD
&& so
->so_pcb
&&
2565 (((struct inpcb
*)so
->so_pcb
)->inp_state
!=
2567 (*pr
->pr_usrreqs
->pru_rcvd
)(so
, flags
);
2569 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive sbwait 2");
2570 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive sbwait 2");
2572 if (so
->so_rcv
.sb_mb
== NULL
&& sbwait(&so
->so_rcv
)) {
2577 * have to wait until after we get back from the sbwait
2578 * to do the copy because we will drop the lock if we
2579 * have enough data that has been delayed... by dropping
2580 * the lock we open up a window allowing the netisr
2581 * thread to process the incoming packets and to change
2582 * the state of this socket... we're issuing the sbwait
2583 * because the socket is empty and we're expecting the
2584 * netisr thread to wake us up when more packets arrive;
2585 * if we allow that processing to happen and then sbwait
2586 * we could stall forever with packets sitting in the
2587 * socket if no further packets arrive from the remote
2590 * we want to copy before we've collected all the data
2591 * to satisfy this request to allow the copy to overlap
2592 * the incoming packet processing on an MP system
2594 if (delayed_copy_len
> sorecvmincopy
&&
2595 (delayed_copy_len
> (so
->so_rcv
.sb_hiwat
/ 2))) {
2596 error
= sodelayed_copy(so
, uio
,
2597 &free_list
, &delayed_copy_len
);
2602 m
= so
->so_rcv
.sb_mb
;
2604 nextrecord
= m
->m_nextpkt
;
2608 #ifdef MORE_LOCKING_DEBUG
2609 if (so
->so_usecount
<= 1)
2610 panic("soreceive: after big while so=%p ref=%d on socket\n",
2611 so
, so
->so_usecount
);
2614 if (m
&& pr
->pr_flags
& PR_ATOMIC
) {
2616 if (so
->so_options
& SO_DONTTRUNC
) {
2617 flags
|= MSG_RCVMORE
;
2621 if ((flags
& MSG_PEEK
) == 0)
2622 (void) sbdroprecord(&so
->so_rcv
);
2629 * pru_rcvd below (for TCP) may cause more data to be received
2630 * if the socket lock is dropped prior to sending the ACK; some
2631 * legacy OpenTransport applications don't handle this well
2632 * (if it receives less data than requested while MSG_HAVEMORE
2633 * is set), and so we set the flag now based on what we know
2634 * prior to calling pru_rcvd.
2636 if ((so
->so_options
& SO_WANTMORE
) && so
->so_rcv
.sb_cc
> 0)
2637 flags
|= MSG_HAVEMORE
;
2639 if ((flags
& MSG_PEEK
) == 0) {
2641 so
->so_rcv
.sb_mb
= nextrecord
;
2643 * First part is an inline SB_EMPTY_FIXUP(). Second
2644 * part makes sure sb_lastrecord is up-to-date if
2645 * there is still data in the socket buffer.
2647 if (so
->so_rcv
.sb_mb
== NULL
) {
2648 so
->so_rcv
.sb_mbtail
= NULL
;
2649 so
->so_rcv
.sb_lastrecord
= NULL
;
2650 } else if (nextrecord
->m_nextpkt
== NULL
) {
2651 so
->so_rcv
.sb_lastrecord
= nextrecord
;
2654 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 4");
2655 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 4");
2656 if (pr
->pr_flags
& PR_WANTRCVD
&& so
->so_pcb
)
2657 (*pr
->pr_usrreqs
->pru_rcvd
)(so
, flags
);
2660 if (delayed_copy_len
) {
2661 error
= sodelayed_copy(so
, uio
, &free_list
, &delayed_copy_len
);
2667 m_freem_list((struct mbuf
*)free_list
);
2668 free_list
= (struct mbuf
*)0;
2671 postevent(so
, 0, EV_OOB
);
2673 if (orig_resid
== uio_resid(uio
) && orig_resid
&&
2674 (flags
& MSG_EOR
) == 0 && (so
->so_state
& SS_CANTRCVMORE
) == 0) {
2675 sbunlock(&so
->so_rcv
, 1);
2682 #ifdef MORE_LOCKING_DEBUG
2683 if (so
->so_usecount
<= 1)
2684 panic("soreceive: release so=%p ref=%d on socket\n",
2685 so
, so
->so_usecount
);
2687 if (delayed_copy_len
) {
2688 error
= sodelayed_copy(so
, uio
, &free_list
, &delayed_copy_len
);
2691 m_freem_list((struct mbuf
*)free_list
);
2693 sbunlock(&so
->so_rcv
, 0); /* will unlock socket */
2695 // LP64todo - fix this!
2696 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_END
, so
, uio_resid(uio
),
2697 so
->so_rcv
.sb_cc
, 0, error
);
2703 * Returns: 0 Success
2707 sodelayed_copy(struct socket
*so
, struct uio
*uio
, struct mbuf
**free_list
,
2715 socket_unlock(so
, 0);
2717 while (m
&& error
== 0) {
2719 error
= uiomove(mtod(m
, caddr_t
), (int)m
->m_len
, uio
);
2723 m_freem_list(*free_list
);
2725 *free_list
= (struct mbuf
*)NULL
;
2735 * Returns: 0 Success
2738 * <pru_shutdown>:EINVAL
2739 * <pru_shutdown>:EADDRNOTAVAIL[TCP]
2740 * <pru_shutdown>:ENOBUFS[TCP]
2741 * <pru_shutdown>:EMSGSIZE[TCP]
2742 * <pru_shutdown>:EHOSTUNREACH[TCP]
2743 * <pru_shutdown>:ENETUNREACH[TCP]
2744 * <pru_shutdown>:ENETDOWN[TCP]
2745 * <pru_shutdown>:ENOMEM[TCP]
2746 * <pru_shutdown>:EACCES[TCP]
2747 * <pru_shutdown>:EMSGSIZE[TCP]
2748 * <pru_shutdown>:ENOBUFS[TCP]
2749 * <pru_shutdown>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
2750 * <pru_shutdown>:??? [other protocol families]
2753 soshutdown(struct socket
*so
, int how
)
2763 (SS_ISCONNECTED
|SS_ISCONNECTING
|SS_ISDISCONNECTING
)) == 0) {
2766 error
= soshutdownlock(so
, how
);
2768 socket_unlock(so
, 1);
2779 soshutdownlock(struct socket
*so
, int how
)
2781 struct protosw
*pr
= so
->so_proto
;
2784 sflt_notify(so
, sock_evt_shutdown
, &how
);
2786 if (how
!= SHUT_WR
) {
2787 if ((so
->so_state
& SS_CANTRCVMORE
) != 0) {
2788 /* read already shut down */
2793 postevent(so
, 0, EV_RCLOSED
);
2795 if (how
!= SHUT_RD
) {
2796 if ((so
->so_state
& SS_CANTSENDMORE
) != 0) {
2797 /* write already shut down */
2801 error
= (*pr
->pr_usrreqs
->pru_shutdown
)(so
);
2802 postevent(so
, 0, EV_WCLOSED
);
2805 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
2810 sorflush(struct socket
*so
)
2812 register struct sockbuf
*sb
= &so
->so_rcv
;
2813 register struct protosw
*pr
= so
->so_proto
;
2816 #ifdef MORE_LOCKING_DEBUG
2817 lck_mtx_t
*mutex_held
;
2819 if (so
->so_proto
->pr_getlock
!= NULL
)
2820 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
2822 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
2823 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
2826 sflt_notify(so
, sock_evt_flush_read
, NULL
);
2828 sb
->sb_flags
|= SB_NOINTR
;
2829 (void) sblock(sb
, M_WAIT
);
2833 selthreadclear(&sb
->sb_sel
);
2836 bzero((caddr_t
)sb
, sizeof (*sb
));
2837 sb
->sb_so
= so
; /* reestablish link to socket */
2838 if (asb
.sb_flags
& SB_KNOTE
) {
2839 sb
->sb_sel
.si_note
= asb
.sb_sel
.si_note
;
2840 sb
->sb_flags
= SB_KNOTE
;
2842 if (asb
.sb_flags
& SB_DROP
)
2843 sb
->sb_flags
|= SB_DROP
;
2844 if (asb
.sb_flags
& SB_UNIX
)
2845 sb
->sb_flags
|= SB_UNIX
;
2846 if ((pr
->pr_flags
& PR_RIGHTS
) && pr
->pr_domain
->dom_dispose
) {
2847 (*pr
->pr_domain
->dom_dispose
)(asb
.sb_mb
);
2853 * Perhaps this routine, and sooptcopyout(), below, ought to come in
2854 * an additional variant to handle the case where the option value needs
2855 * to be some kind of integer, but not a specific size.
2856 * In addition to their use here, these functions are also called by the
2857 * protocol-level pr_ctloutput() routines.
2859 * Returns: 0 Success
2864 sooptcopyin(struct sockopt
*sopt
, void *buf
, size_t len
, size_t minlen
)
2869 * If the user gives us more than we wanted, we ignore it,
2870 * but if we don't get the minimum length the caller
2871 * wants, we return EINVAL. On success, sopt->sopt_valsize
2872 * is set to however much we actually retrieved.
2874 if ((valsize
= sopt
->sopt_valsize
) < minlen
)
2877 sopt
->sopt_valsize
= valsize
= len
;
2879 if (sopt
->sopt_p
!= kernproc
)
2880 return (copyin(sopt
->sopt_val
, buf
, valsize
));
2882 bcopy(CAST_DOWN(caddr_t
, sopt
->sopt_val
), buf
, valsize
);
2887 * sooptcopyin_timeval
2888 * Copy in a timeval value into tv_p, and take into account whether the
2889 * the calling process is 64-bit or 32-bit. Moved the sanity checking
2890 * code here so that we can verify the 64-bit tv_sec value before we lose
2891 * the top 32-bits assigning tv64.tv_sec to tv_p->tv_sec.
2894 sooptcopyin_timeval(struct sockopt
*sopt
, struct timeval
* tv_p
)
2898 if (proc_is64bit(sopt
->sopt_p
)) {
2899 struct user64_timeval tv64
;
2901 if (sopt
->sopt_valsize
< sizeof(tv64
)) {
2904 sopt
->sopt_valsize
= sizeof(tv64
);
2905 if (sopt
->sopt_p
!= kernproc
) {
2906 error
= copyin(sopt
->sopt_val
, &tv64
, sizeof(tv64
));
2910 bcopy(CAST_DOWN(caddr_t
, sopt
->sopt_val
), &tv64
,
2913 if (tv64
.tv_sec
< 0 || tv64
.tv_sec
> LONG_MAX
2914 || tv64
.tv_usec
< 0 || tv64
.tv_usec
>= 1000000) {
2917 tv_p
->tv_sec
= tv64
.tv_sec
;
2918 tv_p
->tv_usec
= tv64
.tv_usec
;
2920 struct user32_timeval tv32
;
2922 if (sopt
->sopt_valsize
< sizeof(tv32
)) {
2925 sopt
->sopt_valsize
= sizeof(tv32
);
2926 if (sopt
->sopt_p
!= kernproc
) {
2927 error
= copyin(sopt
->sopt_val
, &tv32
, sizeof(tv32
));
2932 bcopy(CAST_DOWN(caddr_t
, sopt
->sopt_val
), &tv32
,
2935 #ifndef __LP64__ // K64todo "comparison is always false due to limited range of data type"
2936 if (tv32
.tv_sec
< 0 || tv32
.tv_sec
> LONG_MAX
2937 || tv32
.tv_usec
< 0 || tv32
.tv_usec
>= 1000000) {
2941 tv_p
->tv_sec
= tv32
.tv_sec
;
2942 tv_p
->tv_usec
= tv32
.tv_usec
;
2948 * Returns: 0 Success
2953 * sooptcopyin:EINVAL
2954 * sooptcopyin:EFAULT
2955 * sooptcopyin_timeval:EINVAL
2956 * sooptcopyin_timeval:EFAULT
2957 * sooptcopyin_timeval:EDOM
2958 * <pr_ctloutput>:EOPNOTSUPP[AF_UNIX]
2959 * <pr_ctloutput>:???w
2960 * sflt_attach_private:??? [whatever a filter author chooses]
2961 * <sf_setoption>:??? [whatever a filter author chooses]
2963 * Notes: Other <pru_listen> returns depend on the protocol family; all
2964 * <sf_listen> returns depend on what the filter author causes
2965 * their filter to return.
2968 sosetopt(struct socket
*so
, struct sockopt
*sopt
)
2973 #if CONFIG_MACF_SOCKET
2975 #endif /* MAC_SOCKET */
2978 so_update_last_owner_locked(so
, NULL
);
2980 if ((so
->so_state
& (SS_CANTRCVMORE
| SS_CANTSENDMORE
))
2981 == (SS_CANTRCVMORE
| SS_CANTSENDMORE
) &&
2982 (so
->so_flags
& SOF_NPX_SETOPTSHUT
) == 0) {
2983 /* the socket has been shutdown, no more sockopt's */
2988 if (sopt
->sopt_dir
!= SOPT_SET
) {
2989 sopt
->sopt_dir
= SOPT_SET
;
2992 error
= sflt_setsockopt(so
, sopt
);
2994 if (error
== EJUSTRETURN
)
3000 if (sopt
->sopt_level
!= SOL_SOCKET
) {
3001 if (so
->so_proto
&& so
->so_proto
->pr_ctloutput
) {
3002 error
= (*so
->so_proto
->pr_ctloutput
)(so
, sopt
);
3003 socket_unlock(so
, 1);
3006 error
= ENOPROTOOPT
;
3008 switch (sopt
->sopt_name
) {
3011 error
= sooptcopyin(sopt
, &l
, sizeof (l
), sizeof (l
));
3015 so
->so_linger
= (sopt
->sopt_name
== SO_LINGER
) ?
3016 l
.l_linger
: l
.l_linger
* hz
;
3018 so
->so_options
|= SO_LINGER
;
3020 so
->so_options
&= ~SO_LINGER
;
3026 case SO_USELOOPBACK
:
3032 case SO_TIMESTAMP_MONOTONIC
:
3036 case SO_WANTOOBFLAG
:
3038 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3043 so
->so_options
|= sopt
->sopt_name
;
3045 so
->so_options
&= ~sopt
->sopt_name
;
3052 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3058 * Values < 1 make no sense for any of these
3059 * options, so disallow them.
3066 switch (sopt
->sopt_name
) {
3069 if (sbreserve(sopt
->sopt_name
== SO_SNDBUF
?
3070 &so
->so_snd
: &so
->so_rcv
,
3071 (u_int32_t
) optval
) == 0) {
3075 if (sopt
->sopt_name
== SO_SNDBUF
)
3076 so
->so_snd
.sb_flags
|= SB_USRSIZE
;
3078 so
->so_rcv
.sb_flags
|= SB_USRSIZE
;
3082 * Make sure the low-water is never greater than
3086 so
->so_snd
.sb_lowat
=
3087 (optval
> so
->so_snd
.sb_hiwat
) ?
3088 so
->so_snd
.sb_hiwat
: optval
;
3091 so
->so_rcv
.sb_lowat
=
3092 (optval
> so
->so_rcv
.sb_hiwat
) ?
3093 so
->so_rcv
.sb_hiwat
: optval
;
3100 error
= sooptcopyin_timeval(sopt
, &tv
);
3104 switch (sopt
->sopt_name
) {
3106 so
->so_snd
.sb_timeo
= tv
;
3109 so
->so_rcv
.sb_timeo
= tv
;
3118 error
= sooptcopyin(sopt
, &nke
, sizeof (nke
),
3123 error
= sflt_attach_internal(so
, nke
.nke_handle
);
3128 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3133 so
->so_flags
|= SOF_NOSIGPIPE
;
3135 so
->so_flags
&= ~SOF_NOSIGPIPE
;
3140 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3145 so
->so_flags
|= SOF_NOADDRAVAIL
;
3147 so
->so_flags
&= ~SOF_NOADDRAVAIL
;
3151 case SO_REUSESHAREUID
:
3152 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3157 so
->so_flags
|= SOF_REUSESHAREUID
;
3159 so
->so_flags
&= ~SOF_REUSESHAREUID
;
3161 #ifdef __APPLE_API_PRIVATE
3162 case SO_NOTIFYCONFLICT
:
3163 if (kauth_cred_issuser(kauth_cred_get()) == 0) {
3167 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3172 so
->so_flags
|= SOF_NOTIFYCONFLICT
;
3174 so
->so_flags
&= ~SOF_NOTIFYCONFLICT
;
3177 case SO_RESTRICTIONS
:
3178 if (kauth_cred_issuser(kauth_cred_get()) == 0) {
3182 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3186 so
->so_restrictions
= (optval
& (SO_RESTRICT_DENYIN
|
3187 SO_RESTRICT_DENYOUT
| SO_RESTRICT_DENYSET
));
3191 #if CONFIG_MACF_SOCKET
3192 if ((error
= sooptcopyin(sopt
, &extmac
, sizeof (extmac
),
3193 sizeof (extmac
))) != 0)
3196 error
= mac_setsockopt_label(proc_ucred(sopt
->sopt_p
),
3200 #endif /* MAC_SOCKET */
3203 #ifdef __APPLE_API_PRIVATE
3204 case SO_UPCALLCLOSEWAIT
:
3205 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3210 so
->so_flags
|= SOF_UPCALLCLOSEWAIT
;
3212 so
->so_flags
&= ~SOF_UPCALLCLOSEWAIT
;
3217 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3222 so
->so_flags
|= SOF_BINDRANDOMPORT
;
3224 so
->so_flags
&= ~SOF_BINDRANDOMPORT
;
3227 case SO_NP_EXTENSIONS
: {
3228 struct so_np_extensions sonpx
;
3230 error
= sooptcopyin(sopt
, &sonpx
, sizeof(sonpx
), sizeof(sonpx
));
3233 if (sonpx
.npx_mask
& ~SONPX_MASK_VALID
) {
3238 * Only one bit defined for now
3240 if ((sonpx
.npx_mask
& SONPX_SETOPTSHUT
)) {
3241 if ((sonpx
.npx_flags
& SONPX_SETOPTSHUT
))
3242 so
->so_flags
|= SOF_NPX_SETOPTSHUT
;
3244 so
->so_flags
&= ~SOF_NPX_SETOPTSHUT
;
3249 case SO_TRAFFIC_CLASS
: {
3250 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3254 error
= so_set_traffic_class(so
, optval
);
3260 case SO_RECV_TRAFFIC_CLASS
: {
3261 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3266 so
->so_flags
&= ~SOF_RECV_TRAFFIC_CLASS
;
3268 so
->so_flags
|= SOF_RECV_TRAFFIC_CLASS
;
3272 case SO_TRAFFIC_CLASS_DBG
: {
3273 struct so_tcdbg so_tcdbg
;
3275 error
= sooptcopyin(sopt
, &so_tcdbg
, sizeof (struct so_tcdbg
),
3276 sizeof (struct so_tcdbg
));
3279 error
= so_set_tcdbg(so
, &so_tcdbg
);
3286 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
3288 if (error
!= 0 || (so
->so_flags
& SOF_DEFUNCT
)) {
3294 * Any process can set SO_DEFUNCTOK (clear
3295 * SOF_NODEFUNCT), but only root can clear
3296 * SO_DEFUNCTOK (set SOF_NODEFUNCT).
3299 kauth_cred_issuser(kauth_cred_get()) == 0) {
3304 so
->so_flags
&= ~SOF_NODEFUNCT
;
3306 so
->so_flags
|= SOF_NODEFUNCT
;
3308 SODEFUNCTLOG(("%s[%d]: so %p [%d,%d] is now marked as "
3309 "%seligible for defunct\n", __func__
,
3310 proc_selfpid(), so
, INP_SOCKAF(so
),
3312 (so
->so_flags
& SOF_NODEFUNCT
) ? "not " : ""));
3316 /* This option is not settable */
3321 error
= ENOPROTOOPT
;
3324 if (error
== 0 && so
->so_proto
&& so
->so_proto
->pr_ctloutput
) {
3325 (void) ((*so
->so_proto
->pr_ctloutput
)(so
, sopt
));
3329 socket_unlock(so
, 1);
3333 /* Helper routines for getsockopt */
3335 sooptcopyout(struct sockopt
*sopt
, void *buf
, size_t len
)
3343 * Documented get behavior is that we always return a value,
3344 * possibly truncated to fit in the user's buffer.
3345 * Traditional behavior is that we always tell the user
3346 * precisely how much we copied, rather than something useful
3347 * like the total amount we had available for her.
3348 * Note that this interface is not idempotent; the entire answer must
3349 * generated ahead of time.
3351 valsize
= min(len
, sopt
->sopt_valsize
);
3352 sopt
->sopt_valsize
= valsize
;
3353 if (sopt
->sopt_val
!= USER_ADDR_NULL
) {
3354 if (sopt
->sopt_p
!= kernproc
)
3355 error
= copyout(buf
, sopt
->sopt_val
, valsize
);
3357 bcopy(buf
, CAST_DOWN(caddr_t
, sopt
->sopt_val
), valsize
);
3363 sooptcopyout_timeval(struct sockopt
*sopt
, const struct timeval
* tv_p
)
3367 struct user64_timeval tv64
;
3368 struct user32_timeval tv32
;
3373 if (proc_is64bit(sopt
->sopt_p
)) {
3375 tv64
.tv_sec
= tv_p
->tv_sec
;
3376 tv64
.tv_usec
= tv_p
->tv_usec
;
3380 tv32
.tv_sec
= tv_p
->tv_sec
;
3381 tv32
.tv_usec
= tv_p
->tv_usec
;
3384 valsize
= min(len
, sopt
->sopt_valsize
);
3385 sopt
->sopt_valsize
= valsize
;
3386 if (sopt
->sopt_val
!= USER_ADDR_NULL
) {
3387 if (sopt
->sopt_p
!= kernproc
)
3388 error
= copyout(val
, sopt
->sopt_val
, valsize
);
3390 bcopy(val
, CAST_DOWN(caddr_t
, sopt
->sopt_val
), valsize
);
3398 * <pr_ctloutput>:EOPNOTSUPP[AF_UNIX]
3399 * <pr_ctloutput>:???
3400 * <sf_getoption>:???
3403 sogetopt(struct socket
*so
, struct sockopt
*sopt
)
3408 #if CONFIG_MACF_SOCKET
3410 #endif /* MAC_SOCKET */
3412 if (sopt
->sopt_dir
!= SOPT_GET
) {
3413 sopt
->sopt_dir
= SOPT_GET
;
3417 so_update_last_owner_locked(so
, NULL
);
3419 error
= sflt_getsockopt(so
, sopt
);
3421 if (error
== EJUSTRETURN
)
3423 socket_unlock(so
, 1);
3428 if (sopt
->sopt_level
!= SOL_SOCKET
) {
3429 if (so
->so_proto
&& so
->so_proto
->pr_ctloutput
) {
3430 error
= (*so
->so_proto
->pr_ctloutput
)(so
, sopt
);
3431 socket_unlock(so
, 1);
3434 socket_unlock(so
, 1);
3435 return (ENOPROTOOPT
);
3438 switch (sopt
->sopt_name
) {
3441 l
.l_onoff
= so
->so_options
& SO_LINGER
;
3442 l
.l_linger
= (sopt
->sopt_name
== SO_LINGER
) ?
3443 so
->so_linger
: so
->so_linger
/ hz
;
3444 error
= sooptcopyout(sopt
, &l
, sizeof (l
));
3447 case SO_USELOOPBACK
:
3456 case SO_TIMESTAMP_MONOTONIC
:
3460 case SO_WANTOOBFLAG
:
3462 optval
= so
->so_options
& sopt
->sopt_name
;
3464 error
= sooptcopyout(sopt
, &optval
, sizeof (optval
));
3468 optval
= so
->so_type
;
3473 if (so
->so_proto
->pr_flags
& PR_ATOMIC
) {
3478 m1
= so
->so_rcv
.sb_mb
;
3480 if (m1
->m_type
== MT_DATA
|| m1
->m_type
== MT_HEADER
||
3481 m1
->m_type
== MT_OOBDATA
)
3482 pkt_total
+= m1
->m_len
;
3487 optval
= so
->so_rcv
.sb_cc
- so
->so_rcv
.sb_ctl
;
3492 optval
= so
->so_snd
.sb_cc
;
3496 optval
= so
->so_error
;
3501 optval
= so
->so_snd
.sb_hiwat
;
3505 optval
= so
->so_rcv
.sb_hiwat
;
3509 optval
= so
->so_snd
.sb_lowat
;
3513 optval
= so
->so_rcv
.sb_lowat
;
3518 tv
= (sopt
->sopt_name
== SO_SNDTIMEO
?
3519 so
->so_snd
.sb_timeo
: so
->so_rcv
.sb_timeo
);
3521 error
= sooptcopyout_timeval(sopt
, &tv
);
3525 optval
= (so
->so_flags
& SOF_NOSIGPIPE
);
3529 optval
= (so
->so_flags
& SOF_NOADDRAVAIL
);
3532 case SO_REUSESHAREUID
:
3533 optval
= (so
->so_flags
& SOF_REUSESHAREUID
);
3536 #ifdef __APPLE_API_PRIVATE
3537 case SO_NOTIFYCONFLICT
:
3538 optval
= (so
->so_flags
& SOF_NOTIFYCONFLICT
);
3541 case SO_RESTRICTIONS
:
3542 optval
= so
->so_restrictions
& (SO_RESTRICT_DENYIN
|
3543 SO_RESTRICT_DENYOUT
| SO_RESTRICT_DENYSET
);
3547 #if CONFIG_MACF_SOCKET
3548 if ((error
= sooptcopyin(sopt
, &extmac
, sizeof (extmac
),
3549 sizeof (extmac
))) != 0 ||
3550 (error
= mac_socket_label_get(proc_ucred(
3551 sopt
->sopt_p
), so
, &extmac
)) != 0)
3554 error
= sooptcopyout(sopt
, &extmac
, sizeof (extmac
));
3557 #endif /* MAC_SOCKET */
3561 #if CONFIG_MACF_SOCKET
3562 if ((error
= sooptcopyin(sopt
, &extmac
, sizeof (extmac
),
3563 sizeof (extmac
))) != 0 ||
3564 (error
= mac_socketpeer_label_get(proc_ucred(
3565 sopt
->sopt_p
), so
, &extmac
)) != 0)
3568 error
= sooptcopyout(sopt
, &extmac
, sizeof (extmac
));
3571 #endif /* MAC_SOCKET */
3574 #ifdef __APPLE_API_PRIVATE
3575 case SO_UPCALLCLOSEWAIT
:
3576 optval
= (so
->so_flags
& SOF_UPCALLCLOSEWAIT
);
3580 optval
= (so
->so_flags
& SOF_BINDRANDOMPORT
);
3583 case SO_NP_EXTENSIONS
: {
3584 struct so_np_extensions sonpx
;
3586 sonpx
.npx_flags
= (so
->so_flags
& SOF_NPX_SETOPTSHUT
) ? SONPX_SETOPTSHUT
: 0;
3587 sonpx
.npx_mask
= SONPX_MASK_VALID
;
3589 error
= sooptcopyout(sopt
, &sonpx
, sizeof(struct so_np_extensions
));
3593 case SO_TRAFFIC_CLASS
:
3594 optval
= so
->so_traffic_class
;
3597 case SO_RECV_TRAFFIC_CLASS
:
3598 optval
= (so
->so_flags
& SOF_RECV_TRAFFIC_CLASS
);
3601 case SO_TRAFFIC_CLASS_STATS
:
3602 error
= sooptcopyout(sopt
, &so
->so_tc_stats
, sizeof(so
->so_tc_stats
));
3604 case SO_TRAFFIC_CLASS_DBG
:
3605 error
= sogetopt_tcdbg(so
, sopt
);
3609 optval
= !(so
->so_flags
& SOF_NODEFUNCT
);
3613 optval
= (so
->so_flags
& SOF_DEFUNCT
);
3617 error
= ENOPROTOOPT
;
3620 socket_unlock(so
, 1);
3624 /* The size limits on our soopt_getm is different from that on FreeBSD.
3625 * We limit the size of options to MCLBYTES. This will have to change
3626 * if we need to define options that need more space than MCLBYTES.
3629 soopt_getm(struct sockopt
*sopt
, struct mbuf
**mp
)
3631 struct mbuf
*m
, *m_prev
;
3632 int sopt_size
= sopt
->sopt_valsize
;
3635 if (sopt_size
<= 0 || sopt_size
> MCLBYTES
)
3638 how
= sopt
->sopt_p
!= kernproc
? M_WAIT
: M_DONTWAIT
;
3639 MGET(m
, how
, MT_DATA
);
3642 if (sopt_size
> MLEN
) {
3644 if ((m
->m_flags
& M_EXT
) == 0) {
3648 m
->m_len
= min(MCLBYTES
, sopt_size
);
3650 m
->m_len
= min(MLEN
, sopt_size
);
3652 sopt_size
-= m
->m_len
;
3656 while (sopt_size
> 0) {
3657 MGET(m
, how
, MT_DATA
);
3662 if (sopt_size
> MLEN
) {
3664 if ((m
->m_flags
& M_EXT
) == 0) {
3669 m
->m_len
= min(MCLBYTES
, sopt_size
);
3671 m
->m_len
= min(MLEN
, sopt_size
);
3673 sopt_size
-= m
->m_len
;
3680 /* copyin sopt data into mbuf chain */
3682 soopt_mcopyin(struct sockopt
*sopt
, struct mbuf
*m
)
3684 struct mbuf
*m0
= m
;
3686 if (sopt
->sopt_val
== USER_ADDR_NULL
)
3688 while (m
!= NULL
&& sopt
->sopt_valsize
>= m
->m_len
) {
3689 if (sopt
->sopt_p
!= kernproc
) {
3692 error
= copyin(sopt
->sopt_val
, mtod(m
, char *),
3699 bcopy(CAST_DOWN(caddr_t
, sopt
->sopt_val
),
3700 mtod(m
, char *), m
->m_len
);
3702 sopt
->sopt_valsize
-= m
->m_len
;
3703 sopt
->sopt_val
+= m
->m_len
;
3706 if (m
!= NULL
) /* should be allocated enoughly at ip6_sooptmcopyin() */
3707 panic("soopt_mcopyin");
3711 /* copyout mbuf chain data into soopt */
3713 soopt_mcopyout(struct sockopt
*sopt
, struct mbuf
*m
)
3715 struct mbuf
*m0
= m
;
3718 if (sopt
->sopt_val
== USER_ADDR_NULL
)
3720 while (m
!= NULL
&& sopt
->sopt_valsize
>= m
->m_len
) {
3721 if (sopt
->sopt_p
!= kernproc
) {
3724 error
= copyout(mtod(m
, char *), sopt
->sopt_val
,
3731 bcopy(mtod(m
, char *),
3732 CAST_DOWN(caddr_t
, sopt
->sopt_val
), m
->m_len
);
3734 sopt
->sopt_valsize
-= m
->m_len
;
3735 sopt
->sopt_val
+= m
->m_len
;
3736 valsize
+= m
->m_len
;
3740 /* enough soopt buffer should be given from user-land */
3744 sopt
->sopt_valsize
= valsize
;
3749 sohasoutofband(struct socket
*so
)
3752 if (so
->so_pgid
< 0)
3753 gsignal(-so
->so_pgid
, SIGURG
);
3754 else if (so
->so_pgid
> 0)
3755 proc_signal(so
->so_pgid
, SIGURG
);
3756 selwakeup(&so
->so_rcv
.sb_sel
);
3760 sopoll(struct socket
*so
, int events
, __unused kauth_cred_t cred
, void * wql
)
3762 struct proc
*p
= current_proc();
3766 so_update_last_owner_locked(so
, p
);
3768 if (events
& (POLLIN
| POLLRDNORM
))
3770 revents
|= events
& (POLLIN
| POLLRDNORM
);
3772 if (events
& (POLLOUT
| POLLWRNORM
))
3773 if (sowriteable(so
))
3774 revents
|= events
& (POLLOUT
| POLLWRNORM
);
3776 if (events
& (POLLPRI
| POLLRDBAND
))
3777 if (so
->so_oobmark
|| (so
->so_state
& SS_RCVATMARK
))
3778 revents
|= events
& (POLLPRI
| POLLRDBAND
);
3781 if (events
& (POLLIN
| POLLPRI
| POLLRDNORM
| POLLRDBAND
)) {
3783 * Darwin sets the flag first,
3784 * BSD calls selrecord first
3786 so
->so_rcv
.sb_flags
|= SB_SEL
;
3787 selrecord(p
, &so
->so_rcv
.sb_sel
, wql
);
3790 if (events
& (POLLOUT
| POLLWRNORM
)) {
3792 * Darwin sets the flag first,
3793 * BSD calls selrecord first
3795 so
->so_snd
.sb_flags
|= SB_SEL
;
3796 selrecord(p
, &so
->so_snd
.sb_sel
, wql
);
3800 socket_unlock(so
, 1);
3805 soo_kqfilter(__unused
struct fileproc
*fp
, struct knote
*kn
,
3806 __unused
struct proc
*p
)
3808 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
3813 #if CONFIG_MACF_SOCKET
3814 if (mac_socket_check_kqfilter(proc_ucred(p
), kn
, so
) != 0) {
3815 socket_unlock(so
, 1);
3818 #endif /* MAC_SOCKET */
3820 switch (kn
->kn_filter
) {
3822 kn
->kn_fop
= &soread_filtops
;
3826 kn
->kn_fop
= &sowrite_filtops
;
3830 socket_unlock(so
, 1);
3834 if (KNOTE_ATTACH(&sb
->sb_sel
.si_note
, kn
))
3835 sb
->sb_flags
|= SB_KNOTE
;
3836 socket_unlock(so
, 1);
3841 filt_sordetach(struct knote
*kn
)
3843 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
3846 if (so
->so_rcv
.sb_flags
& SB_KNOTE
)
3847 if (KNOTE_DETACH(&so
->so_rcv
.sb_sel
.si_note
, kn
))
3848 so
->so_rcv
.sb_flags
&= ~SB_KNOTE
;
3849 socket_unlock(so
, 1);
3854 filt_soread(struct knote
*kn
, long hint
)
3856 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
3858 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
3861 if (so
->so_options
& SO_ACCEPTCONN
) {
3864 /* Radar 6615193 handle the listen case dynamically
3865 * for kqueue read filter. This allows to call listen() after registering
3866 * the kqueue EVFILT_READ.
3869 kn
->kn_data
= so
->so_qlen
;
3870 isempty
= ! TAILQ_EMPTY(&so
->so_comp
);
3872 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
3873 socket_unlock(so
, 1);
3878 /* socket isn't a listener */
3880 kn
->kn_data
= so
->so_rcv
.sb_cc
- so
->so_rcv
.sb_ctl
;
3882 if (so
->so_oobmark
) {
3883 if (kn
->kn_flags
& EV_OOBAND
) {
3884 kn
->kn_data
-= so
->so_oobmark
;
3885 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
3886 socket_unlock(so
, 1);
3889 kn
->kn_data
= so
->so_oobmark
;
3890 kn
->kn_flags
|= EV_OOBAND
;
3892 if (so
->so_state
& SS_CANTRCVMORE
) {
3893 kn
->kn_flags
|= EV_EOF
;
3894 kn
->kn_fflags
= so
->so_error
;
3895 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
3896 socket_unlock(so
, 1);
3901 if (so
->so_state
& SS_RCVATMARK
) {
3902 if (kn
->kn_flags
& EV_OOBAND
) {
3903 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
3904 socket_unlock(so
, 1);
3907 kn
->kn_flags
|= EV_OOBAND
;
3908 } else if (kn
->kn_flags
& EV_OOBAND
) {
3910 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
3911 socket_unlock(so
, 1);
3915 if (so
->so_error
) { /* temporary udp error */
3916 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
3917 socket_unlock(so
, 1);
3921 int64_t lowwat
= so
->so_rcv
.sb_lowat
;
3922 if (kn
->kn_sfflags
& NOTE_LOWAT
)
3924 if (kn
->kn_sdata
> so
->so_rcv
.sb_hiwat
)
3925 lowwat
= so
->so_rcv
.sb_hiwat
;
3926 else if (kn
->kn_sdata
> lowwat
)
3927 lowwat
= kn
->kn_sdata
;
3930 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
3931 socket_unlock(so
, 1);
3933 return ((kn
->kn_flags
& EV_OOBAND
) || kn
->kn_data
>= lowwat
);
3937 filt_sowdetach(struct knote
*kn
)
3939 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
3942 if (so
->so_snd
.sb_flags
& SB_KNOTE
)
3943 if (KNOTE_DETACH(&so
->so_snd
.sb_sel
.si_note
, kn
))
3944 so
->so_snd
.sb_flags
&= ~SB_KNOTE
;
3945 socket_unlock(so
, 1);
3950 filt_sowrite(struct knote
*kn
, long hint
)
3952 struct socket
*so
= (struct socket
*)kn
->kn_fp
->f_fglob
->fg_data
;
3954 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
3957 kn
->kn_data
= sbspace(&so
->so_snd
);
3958 if (so
->so_state
& SS_CANTSENDMORE
) {
3959 kn
->kn_flags
|= EV_EOF
;
3960 kn
->kn_fflags
= so
->so_error
;
3961 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
3962 socket_unlock(so
, 1);
3965 if (so
->so_error
) { /* temporary udp error */
3966 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
3967 socket_unlock(so
, 1);
3970 if (((so
->so_state
& SS_ISCONNECTED
) == 0) &&
3971 (so
->so_proto
->pr_flags
& PR_CONNREQUIRED
)) {
3972 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
3973 socket_unlock(so
, 1);
3976 int64_t lowwat
= so
->so_snd
.sb_lowat
;
3977 if (kn
->kn_sfflags
& NOTE_LOWAT
)
3979 if (kn
->kn_sdata
> so
->so_snd
.sb_hiwat
)
3980 lowwat
= so
->so_snd
.sb_hiwat
;
3981 else if (kn
->kn_sdata
> lowwat
)
3982 lowwat
= kn
->kn_sdata
;
3984 if ((hint
& SO_FILT_HINT_LOCKED
) == 0)
3985 socket_unlock(so
, 1);
3986 return (kn
->kn_data
>= lowwat
);
3989 #define SO_LOCK_HISTORY_STR_LEN (2 * SO_LCKDBG_MAX * (2 + (2 * sizeof(void *)) + 1) + 1)
3991 __private_extern__
const char * solockhistory_nr(struct socket
*so
)
3995 static char lock_history_str
[SO_LOCK_HISTORY_STR_LEN
];
3997 bzero(lock_history_str
, sizeof(lock_history_str
));
3998 for (i
= SO_LCKDBG_MAX
- 1; i
>= 0; i
--) {
3999 n
+= snprintf(lock_history_str
+ n
, SO_LOCK_HISTORY_STR_LEN
- n
, "%lx:%lx ",
4000 (uintptr_t) so
->lock_lr
[(so
->next_lock_lr
+ i
) % SO_LCKDBG_MAX
],
4001 (uintptr_t) so
->unlock_lr
[(so
->next_unlock_lr
+ i
) % SO_LCKDBG_MAX
]);
4003 return lock_history_str
;
4007 socket_lock(struct socket
*so
, int refcount
)
4012 lr_saved
= __builtin_return_address(0);
4014 if (so
->so_proto
->pr_lock
) {
4015 error
= (*so
->so_proto
->pr_lock
)(so
, refcount
, lr_saved
);
4017 #ifdef MORE_LOCKING_DEBUG
4018 lck_mtx_assert(so
->so_proto
->pr_domain
->dom_mtx
,
4019 LCK_MTX_ASSERT_NOTOWNED
);
4021 lck_mtx_lock(so
->so_proto
->pr_domain
->dom_mtx
);
4024 so
->lock_lr
[so
->next_lock_lr
] = lr_saved
;
4025 so
->next_lock_lr
= (so
->next_lock_lr
+1) % SO_LCKDBG_MAX
;
4032 socket_unlock(struct socket
*so
, int refcount
)
4036 lck_mtx_t
*mutex_held
;
4038 lr_saved
= __builtin_return_address(0);
4040 if (so
->so_proto
== NULL
)
4041 panic("socket_unlock null so_proto so=%p\n", so
);
4043 if (so
&& so
->so_proto
->pr_unlock
) {
4044 error
= (*so
->so_proto
->pr_unlock
)(so
, refcount
, lr_saved
);
4046 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
4047 #ifdef MORE_LOCKING_DEBUG
4048 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
4050 so
->unlock_lr
[so
->next_unlock_lr
] = lr_saved
;
4051 so
->next_unlock_lr
= (so
->next_unlock_lr
+1) % SO_LCKDBG_MAX
;
4054 if (so
->so_usecount
<= 0)
4055 panic("socket_unlock: bad refcount=%d so=%p (%d, %d, %d) lrh=%s",
4056 so
->so_usecount
, so
, so
->so_proto
->pr_domain
->dom_family
,
4057 so
->so_type
, so
->so_proto
->pr_protocol
,
4058 solockhistory_nr(so
));
4061 if (so
->so_usecount
== 0) {
4062 sofreelastref(so
, 1);
4065 lck_mtx_unlock(mutex_held
);
4071 /* Called with socket locked, will unlock socket */
4073 sofree(struct socket
*so
)
4076 lck_mtx_t
*mutex_held
;
4077 if (so
->so_proto
->pr_getlock
!= NULL
)
4078 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
4080 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
4081 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
4083 sofreelastref(so
, 0);
4087 soreference(struct socket
*so
)
4089 socket_lock(so
, 1); /* locks & take one reference on socket */
4090 socket_unlock(so
, 0); /* unlock only */
4094 sodereference(struct socket
*so
)
4097 socket_unlock(so
, 1);
4101 * Set or clear SOF_MULTIPAGES on the socket to enable or disable the
4102 * possibility of using jumbo clusters. Caller must ensure to hold
4106 somultipages(struct socket
*so
, boolean_t set
)
4109 so
->so_flags
|= SOF_MULTIPAGES
;
4111 so
->so_flags
&= ~SOF_MULTIPAGES
;
4115 so_isdstlocal(struct socket
*so
) {
4117 struct inpcb
*inp
= (struct inpcb
*)so
->so_pcb
;
4119 if (so
->so_proto
->pr_domain
->dom_family
== AF_INET
) {
4120 return inaddr_local(inp
->inp_faddr
);
4121 } else if (so
->so_proto
->pr_domain
->dom_family
== AF_INET6
) {
4122 return in6addr_local(&inp
->in6p_faddr
);
4128 sosetdefunct(struct proc
*p
, struct socket
*so
, int level
, boolean_t noforce
)
4130 int err
= 0, defunct
;
4132 defunct
= (so
->so_flags
& SOF_DEFUNCT
);
4134 if (!(so
->so_snd
.sb_flags
& so
->so_rcv
.sb_flags
& SB_DROP
))
4135 panic("%s: SB_DROP not set", __func__
);
4139 if (so
->so_flags
& SOF_NODEFUNCT
) {
4142 SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so %p "
4143 "[%d,%d] is not eligible for defunct (%d)\n",
4144 __func__
, proc_selfpid(), proc_pid(p
), level
, so
,
4145 INP_SOCKAF(so
), INP_SOCKTYPE(so
), err
));
4148 so
->so_flags
&= ~SOF_NODEFUNCT
;
4149 SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so %p [%d,%d] "
4150 "defunct by force\n", __func__
, proc_selfpid(), proc_pid(p
),
4151 level
, so
, INP_SOCKAF(so
), INP_SOCKTYPE(so
)));
4154 so
->so_flags
|= SOF_DEFUNCT
;
4155 /* Prevent further data from being appended to the socket buffers */
4156 so
->so_snd
.sb_flags
|= SB_DROP
;
4157 so
->so_rcv
.sb_flags
|= SB_DROP
;
4160 SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so %p [%d,%d] %s "
4161 "defunct\n", __func__
, proc_selfpid(), proc_pid(p
), level
, so
,
4162 INP_SOCKAF(so
), INP_SOCKTYPE(so
),
4163 defunct
? "is already" : "marked as"));
4169 sodefunct(struct proc
*p
, struct socket
*so
, int level
)
4171 struct sockbuf
*rcv
, *snd
;
4173 if (!(so
->so_flags
& SOF_DEFUNCT
))
4174 panic("%s improperly called", __func__
);
4176 if (so
->so_state
& SS_DEFUNCT
)
4182 SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so %p [%d,%d] is now "
4183 "defunct [rcv_si 0x%x, snd_si 0x%x, rcv_fl 0x%x, snd_fl 0x%x]\n",
4184 __func__
, proc_selfpid(), proc_pid(p
), level
, so
,
4185 INP_SOCKAF(so
), INP_SOCKTYPE(so
),
4186 (uint32_t)rcv
->sb_sel
.si_flags
, (uint32_t)snd
->sb_sel
.si_flags
,
4187 (uint16_t)rcv
->sb_flags
, (uint16_t)snd
->sb_flags
));
4190 * Unwedge threads blocked on sbwait() and sb_lock().
4195 if (rcv
->sb_flags
& SB_LOCK
)
4197 if (snd
->sb_flags
& SB_LOCK
)
4201 * Flush the buffers and disconnect. We explicitly call shutdown
4202 * on both data directions to ensure that SS_CANT{RCV,SEND}MORE
4203 * states are set for the socket. This would also flush out data
4204 * hanging off the receive list of this socket.
4206 (void) soshutdownlock(so
, SHUT_RD
);
4207 (void) soshutdownlock(so
, SHUT_WR
);
4208 (void) sodisconnectlocked(so
);
4211 * Explicitly handle connectionless-protocol disconnection
4212 * and release any remaining data in the socket buffers.
4214 if (!(so
->so_flags
& SS_ISDISCONNECTED
))
4215 (void) soisdisconnected(so
);
4217 if (so
->so_error
== 0)
4218 so
->so_error
= EBADF
;
4220 if (rcv
->sb_cc
!= 0)
4222 if (snd
->sb_cc
!= 0)
4225 so
->so_state
|= SS_DEFUNCT
;