2  * Copyright (c) 1998-2017 Apple Inc. All rights reserved. 
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 
   6  * This file contains Original Code and/or Modifications of Original Code 
   7  * as defined in and that are subject to the Apple Public Source License 
   8  * Version 2.0 (the 'License'). You may not use this file except in 
   9  * compliance with the License. The rights granted to you under the License 
  10  * may not be used to create, or enable the creation or redistribution of, 
  11  * unlawful or unlicensed copies of an Apple operating system, or to 
  12  * circumvent, violate, or enable the circumvention or violation of, any 
  13  * terms of an Apple operating system software license agreement. 
  15  * Please obtain a copy of the License at 
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file. 
  18  * The Original Code and all software distributed under the License are 
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
  23  * Please see the License for the specific language governing rights and 
  24  * limitations under the License. 
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 
  28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ 
  30  * Copyright (c) 1982, 1986, 1988, 1990, 1993 
  31  *      The Regents of the University of California.  All rights reserved. 
  33  * Redistribution and use in source and binary forms, with or without 
  34  * modification, are permitted provided that the following conditions 
  36  * 1. Redistributions of source code must retain the above copyright 
  37  *    notice, this list of conditions and the following disclaimer. 
  38  * 2. Redistributions in binary form must reproduce the above copyright 
  39  *    notice, this list of conditions and the following disclaimer in the 
  40  *    documentation and/or other materials provided with the distribution. 
  41  * 3. All advertising materials mentioning features or use of this software 
  42  *    must display the following acknowledgement: 
  43  *      This product includes software developed by the University of 
  44  *      California, Berkeley and its contributors. 
  45  * 4. Neither the name of the University nor the names of its contributors 
  46  *    may be used to endorse or promote products derived from this software 
  47  *    without specific prior written permission. 
  49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 
  50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 
  53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
  54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 
  55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
  56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
  57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
  58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
  61  *      @(#)uipc_socket.c       8.3 (Berkeley) 4/15/94 
  64  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce 
  65  * support for mandatory and extensible security protections.  This notice 
  66  * is included in support of clause 2.2 (b) of the Apple Public License, 
  70 #include <sys/param.h> 
  71 #include <sys/systm.h> 
  72 #include <sys/filedesc.h> 
  74 #include <sys/proc_internal.h> 
  75 #include <sys/kauth.h> 
  76 #include <sys/file_internal.h> 
  77 #include <sys/fcntl.h> 
  78 #include <sys/malloc.h> 
  80 #include <sys/domain.h> 
  81 #include <sys/kernel.h> 
  82 #include <sys/event.h> 
  84 #include <sys/protosw.h> 
  85 #include <sys/socket.h> 
  86 #include <sys/socketvar.h> 
  87 #include <sys/resourcevar.h> 
  88 #include <sys/signalvar.h> 
  89 #include <sys/sysctl.h> 
  90 #include <sys/syslog.h> 
  92 #include <sys/uio_internal.h> 
  94 #include <sys/kdebug.h> 
  98 #include <sys/kern_event.h> 
  99 #include <net/route.h> 
 100 #include <net/init.h> 
 101 #include <net/net_api_stats.h> 
 102 #include <net/ntstat.h> 
 103 #include <net/content_filter.h> 
 104 #include <netinet/in.h> 
 105 #include <netinet/in_pcb.h> 
 106 #include <netinet/in_tclass.h> 
 107 #include <netinet/tcp_var.h> 
 108 #include <netinet/ip6.h> 
 109 #include <netinet6/ip6_var.h> 
 110 #include <netinet/flow_divert.h> 
 111 #include <kern/zalloc.h> 
 112 #include <kern/locks.h> 
 113 #include <machine/limits.h> 
 114 #include <libkern/OSAtomic.h> 
 115 #include <pexpert/pexpert.h> 
 116 #include <kern/assert.h> 
 117 #include <kern/task.h> 
 118 #include <kern/policy_internal.h> 
 120 #include <sys/kpi_mbuf.h> 
 121 #include <sys/mcache.h> 
 122 #include <sys/unpcb.h> 
 123 #include <libkern/section_keywords.h> 
 126 #include <security/mac_framework.h> 
 130 #include <netinet/mp_pcb.h> 
 131 #include <netinet/mptcp_var.h> 
 132 #endif /* MULTIPATH */ 
 134 #define ROUNDUP(a, b) (((a) + ((b) - 1)) & (~((b) - 1))) 
 136 #if DEBUG || DEVELOPMENT 
 137 #define DEBUG_KERNEL_ADDRPERM(_v) (_v) 
 139 #define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v) 
 142 /* TODO: this should be in a header file somewhere */ 
 143 extern char *proc_name_address(void *p
); 
 144 extern char *proc_best_name(proc_t
); 
 146 static u_int32_t        so_cache_hw
;    /* High water mark for socache */ 
 147 static u_int32_t        so_cache_timeouts
;      /* number of timeouts */ 
 148 static u_int32_t        so_cache_max_freed
;     /* max freed per timeout */ 
 149 static u_int32_t        cached_sock_count 
= 0; 
 150 STAILQ_HEAD(, socket
)   so_cache_head
; 
 151 int     max_cached_sock_count 
= MAX_CACHED_SOCKETS
; 
 152 static u_int32_t        so_cache_time
; 
 153 static int              socketinit_done
; 
 154 static struct zone      
*so_cache_zone
; 
 156 static lck_grp_t        
*so_cache_mtx_grp
; 
 157 static lck_attr_t       
*so_cache_mtx_attr
; 
 158 static lck_grp_attr_t   
*so_cache_mtx_grp_attr
; 
 159 static lck_mtx_t        
*so_cache_mtx
; 
 161 #include <machine/limits.h> 
 163 static int      filt_sorattach(struct knote 
*kn
, struct kevent_internal_s 
*kev
); 
 164 static void     filt_sordetach(struct knote 
*kn
); 
 165 static int      filt_soread(struct knote 
*kn
, long hint
); 
 166 static int      filt_sortouch(struct knote 
*kn
, struct kevent_internal_s 
*kev
); 
 167 static int      filt_sorprocess(struct knote 
*kn
, struct filt_process_s 
*data
, struct kevent_internal_s 
*kev
); 
 169 static int      filt_sowattach(struct knote 
*kn
, struct kevent_internal_s 
*kev
); 
 170 static void     filt_sowdetach(struct knote 
*kn
); 
 171 static int      filt_sowrite(struct knote 
*kn
, long hint
); 
 172 static int      filt_sowtouch(struct knote 
*kn
, struct kevent_internal_s 
*kev
); 
 173 static int      filt_sowprocess(struct knote 
*kn
, struct filt_process_s 
*data
, struct kevent_internal_s 
*kev
); 
 175 static int      filt_sockattach(struct knote 
*kn
, struct kevent_internal_s 
*kev
); 
 176 static void     filt_sockdetach(struct knote 
*kn
); 
 177 static int      filt_sockev(struct knote 
*kn
, long hint
); 
 178 static int      filt_socktouch(struct knote 
*kn
, struct kevent_internal_s 
*kev
); 
 179 static int      filt_sockprocess(struct knote 
*kn
, struct filt_process_s 
*data
, struct kevent_internal_s 
*kev
); 
 181 static int sooptcopyin_timeval(struct sockopt 
*, struct timeval 
*); 
 182 static int sooptcopyout_timeval(struct sockopt 
*, const struct timeval 
*); 
 184 SECURITY_READ_ONLY_EARLY(struct filterops
) soread_filtops 
= { 
 186         .f_attach 
= filt_sorattach
, 
 187         .f_detach 
= filt_sordetach
, 
 188         .f_event 
= filt_soread
, 
 189         .f_touch 
= filt_sortouch
, 
 190         .f_process 
= filt_sorprocess
, 
 193 SECURITY_READ_ONLY_EARLY(struct filterops
) sowrite_filtops 
= { 
 195         .f_attach 
= filt_sowattach
, 
 196         .f_detach 
= filt_sowdetach
, 
 197         .f_event 
= filt_sowrite
, 
 198         .f_touch 
= filt_sowtouch
, 
 199         .f_process 
= filt_sowprocess
, 
 202 SECURITY_READ_ONLY_EARLY(struct filterops
) sock_filtops 
= { 
 204         .f_attach 
= filt_sockattach
, 
 205         .f_detach 
= filt_sockdetach
, 
 206         .f_event 
= filt_sockev
, 
 207         .f_touch 
= filt_socktouch
, 
 208         .f_process 
= filt_sockprocess
, 
 211 SECURITY_READ_ONLY_EARLY(struct filterops
) soexcept_filtops 
= { 
 213         .f_attach 
= filt_sorattach
, 
 214         .f_detach 
= filt_sordetach
, 
 215         .f_event 
= filt_soread
, 
 216         .f_touch 
= filt_sortouch
, 
 217         .f_process 
= filt_sorprocess
, 
 220 SYSCTL_DECL(_kern_ipc
); 
 222 #define EVEN_MORE_LOCKING_DEBUG 0 
 224 int socket_debug 
= 0; 
 225 SYSCTL_INT(_kern_ipc
, OID_AUTO
, socket_debug
, 
 226         CTLFLAG_RW 
| CTLFLAG_LOCKED
, &socket_debug
, 0, ""); 
 228 static unsigned long sodefunct_calls 
= 0; 
 229 SYSCTL_LONG(_kern_ipc
, OID_AUTO
, sodefunct_calls
, CTLFLAG_LOCKED
, 
 230     &sodefunct_calls
, ""); 
 232 static int socket_zone 
= M_SOCKET
; 
 233 so_gen_t        so_gencnt
;      /* generation count for sockets */ 
 235 MALLOC_DEFINE(M_SONAME
, "soname", "socket name"); 
 236 MALLOC_DEFINE(M_PCB
, "pcb", "protocol control block"); 
 238 #define DBG_LAYER_IN_BEG        NETDBG_CODE(DBG_NETSOCK, 0) 
 239 #define DBG_LAYER_IN_END        NETDBG_CODE(DBG_NETSOCK, 2) 
 240 #define DBG_LAYER_OUT_BEG       NETDBG_CODE(DBG_NETSOCK, 1) 
 241 #define DBG_LAYER_OUT_END       NETDBG_CODE(DBG_NETSOCK, 3) 
 242 #define DBG_FNC_SOSEND          NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1) 
 243 #define DBG_FNC_SOSEND_LIST     NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 3) 
 244 #define DBG_FNC_SORECEIVE       NETDBG_CODE(DBG_NETSOCK, (8 << 8)) 
 245 #define DBG_FNC_SORECEIVE_LIST  NETDBG_CODE(DBG_NETSOCK, (8 << 8) | 3) 
 246 #define DBG_FNC_SOSHUTDOWN      NETDBG_CODE(DBG_NETSOCK, (9 << 8)) 
 248 #define MAX_SOOPTGETM_SIZE      (128 * MCLBYTES) 
 250 int somaxconn 
= SOMAXCONN
; 
 251 SYSCTL_INT(_kern_ipc
, KIPC_SOMAXCONN
, somaxconn
, 
 252         CTLFLAG_RW 
| CTLFLAG_LOCKED
, &somaxconn
, 0, ""); 
 254 /* Should we get a maximum also ??? */ 
 255 static int sosendmaxchain 
= 65536; 
 256 static int sosendminchain 
= 16384; 
 257 static int sorecvmincopy  
= 16384; 
 258 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sosendminchain
, 
 259         CTLFLAG_RW 
| CTLFLAG_LOCKED
, &sosendminchain
, 0, ""); 
 260 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sorecvmincopy
, 
 261         CTLFLAG_RW 
| CTLFLAG_LOCKED
, &sorecvmincopy
, 0, ""); 
 264  * Set to enable jumbo clusters (if available) for large writes when 
 265  * the socket is marked with SOF_MULTIPAGES; see below. 
 268 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sosendjcl
, 
 269         CTLFLAG_RW 
| CTLFLAG_LOCKED
, &sosendjcl
, 0, ""); 
 272  * Set this to ignore SOF_MULTIPAGES and use jumbo clusters for large 
 273  * writes on the socket for all protocols on any network interfaces, 
 274  * depending upon sosendjcl above.  Be extra careful when setting this 
 275  * to 1, because sending down packets that cross physical pages down to 
 276  * broken drivers (those that falsely assume that the physical pages 
 277  * are contiguous) might lead to system panics or silent data corruption. 
 278  * When set to 0, the system will respect SOF_MULTIPAGES, which is set 
 279  * only for TCP sockets whose outgoing interface is IFNET_MULTIPAGES 
 280  * capable.  Set this to 1 only for testing/debugging purposes. 
 282 int sosendjcl_ignore_capab 
= 0; 
 283 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sosendjcl_ignore_capab
, 
 284         CTLFLAG_RW 
| CTLFLAG_LOCKED
, &sosendjcl_ignore_capab
, 0, ""); 
 287  * Set this to ignore SOF1_IF_2KCL and use big clusters for large 
 288  * writes on the socket for all protocols on any network interfaces. 
 289  * Be extra careful when setting this to 1, because sending down packets with 
 290  * clusters larger that 2 KB might lead to system panics or data corruption. 
 291  * When set to 0, the system will respect SOF1_IF_2KCL, which is set 
 292  * on the outgoing interface 
 293  * Set this to 1  for testing/debugging purposes only. 
 295 int sosendbigcl_ignore_capab 
= 0; 
 296 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sosendbigcl_ignore_capab
, 
 297         CTLFLAG_RW 
| CTLFLAG_LOCKED
, &sosendbigcl_ignore_capab
, 0, ""); 
 299 int sodefunctlog 
= 0; 
 300 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sodefunctlog
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 301         &sodefunctlog
, 0, ""); 
 303 int sothrottlelog 
= 0; 
 304 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sothrottlelog
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 305         &sothrottlelog
, 0, ""); 
 307 int sorestrictrecv 
= 1; 
 308 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sorestrictrecv
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 309         &sorestrictrecv
, 0, "Enable inbound interface restrictions"); 
 311 int sorestrictsend 
= 1; 
 312 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sorestrictsend
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 313         &sorestrictsend
, 0, "Enable outbound interface restrictions"); 
 315 int soreserveheadroom 
= 1; 
 316 SYSCTL_INT(_kern_ipc
, OID_AUTO
, soreserveheadroom
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 317         &soreserveheadroom
, 0, "To allocate contiguous datagram buffers"); 
 319 #if (DEBUG || DEVELOPMENT) 
 320 int so_notsent_lowat_check 
= 1; 
 321 SYSCTL_INT(_kern_ipc
, OID_AUTO
, notsent_lowat
, CTLFLAG_RW
|CTLFLAG_LOCKED
, 
 322     &so_notsent_lowat_check
, 0, "enable/disable notsnet lowat check"); 
 323 #endif /* DEBUG || DEVELOPMENT */ 
 325 int so_accept_list_waits 
= 0; 
 326 #if (DEBUG || DEVELOPMENT) 
 327 SYSCTL_INT(_kern_ipc
, OID_AUTO
, accept_list_waits
, CTLFLAG_RW
|CTLFLAG_LOCKED
, 
 328     &so_accept_list_waits
, 0, "number of waits for listener incomp list"); 
 329 #endif /* DEBUG || DEVELOPMENT */ 
 331 extern struct inpcbinfo tcbinfo
; 
 333 /* TODO: these should be in header file */ 
 334 extern int get_inpcb_str_size(void); 
 335 extern int get_tcp_str_size(void); 
 337 vm_size_t       so_cache_zone_element_size
; 
 339 static int sodelayed_copy(struct socket 
*, struct uio 
*, struct mbuf 
**, 
 341 static void cached_sock_alloc(struct socket 
**, int); 
 342 static void cached_sock_free(struct socket 
*); 
 345  * Maximum of extended background idle sockets per process 
 346  * Set to zero to disable further setting of the option 
 349 #define SO_IDLE_BK_IDLE_MAX_PER_PROC    1 
 350 #define SO_IDLE_BK_IDLE_TIME            600 
 351 #define SO_IDLE_BK_IDLE_RCV_HIWAT       131072 
 353 struct soextbkidlestat soextbkidlestat
; 
 355 SYSCTL_UINT(_kern_ipc
, OID_AUTO
, maxextbkidleperproc
, 
 356         CTLFLAG_RW 
| CTLFLAG_LOCKED
, &soextbkidlestat
.so_xbkidle_maxperproc
, 0, 
 357         "Maximum of extended background idle sockets per process"); 
 359 SYSCTL_UINT(_kern_ipc
, OID_AUTO
, extbkidletime
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 360         &soextbkidlestat
.so_xbkidle_time
, 0, 
 361         "Time in seconds to keep extended background idle sockets"); 
 363 SYSCTL_UINT(_kern_ipc
, OID_AUTO
, extbkidlercvhiwat
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 364         &soextbkidlestat
.so_xbkidle_rcvhiwat
, 0, 
 365         "High water mark for extended background idle sockets"); 
 367 SYSCTL_STRUCT(_kern_ipc
, OID_AUTO
, extbkidlestat
, CTLFLAG_RD 
| CTLFLAG_LOCKED
, 
 368         &soextbkidlestat
, soextbkidlestat
, ""); 
 370 int so_set_extended_bk_idle(struct socket 
*, int); 
 374  * SOTCDB_NO_DSCP is set by default, to prevent the networking stack from 
 375  * setting the DSCP code on the packet based on the service class; see 
 376  * <rdar://problem/11277343> for details. 
 378 __private_extern__ u_int32_t sotcdb 
= 0; 
 379 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sotcdb
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 385         _CASSERT(sizeof(so_gencnt
) == sizeof(uint64_t)); 
 386         VERIFY(IS_P2ALIGNED(&so_gencnt
, sizeof(uint32_t))); 
 389         _CASSERT(sizeof(struct sa_endpoints
) == sizeof(struct user64_sa_endpoints
)); 
 390         _CASSERT(offsetof(struct sa_endpoints
, sae_srcif
) == offsetof(struct user64_sa_endpoints
, sae_srcif
)); 
 391         _CASSERT(offsetof(struct sa_endpoints
, sae_srcaddr
) == offsetof(struct user64_sa_endpoints
, sae_srcaddr
)); 
 392         _CASSERT(offsetof(struct sa_endpoints
, sae_srcaddrlen
) == offsetof(struct user64_sa_endpoints
, sae_srcaddrlen
)); 
 393         _CASSERT(offsetof(struct sa_endpoints
, sae_dstaddr
) == offsetof(struct user64_sa_endpoints
, sae_dstaddr
)); 
 394         _CASSERT(offsetof(struct sa_endpoints
, sae_dstaddrlen
) == offsetof(struct user64_sa_endpoints
, sae_dstaddrlen
)); 
 396         _CASSERT(sizeof(struct sa_endpoints
) == sizeof(struct user32_sa_endpoints
)); 
 397         _CASSERT(offsetof(struct sa_endpoints
, sae_srcif
) == offsetof(struct user32_sa_endpoints
, sae_srcif
)); 
 398         _CASSERT(offsetof(struct sa_endpoints
, sae_srcaddr
) == offsetof(struct user32_sa_endpoints
, sae_srcaddr
)); 
 399         _CASSERT(offsetof(struct sa_endpoints
, sae_srcaddrlen
) == offsetof(struct user32_sa_endpoints
, sae_srcaddrlen
)); 
 400         _CASSERT(offsetof(struct sa_endpoints
, sae_dstaddr
) == offsetof(struct user32_sa_endpoints
, sae_dstaddr
)); 
 401         _CASSERT(offsetof(struct sa_endpoints
, sae_dstaddrlen
) == offsetof(struct user32_sa_endpoints
, sae_dstaddrlen
)); 
 404         if (socketinit_done
) { 
 405                 printf("socketinit: already called...\n"); 
 410         PE_parse_boot_argn("socket_debug", &socket_debug
, 
 411             sizeof (socket_debug
)); 
 414          * allocate lock group attribute and group for socket cache mutex 
 416         so_cache_mtx_grp_attr 
= lck_grp_attr_alloc_init(); 
 417         so_cache_mtx_grp 
= lck_grp_alloc_init("so_cache", 
 418             so_cache_mtx_grp_attr
); 
 421          * allocate the lock attribute for socket cache mutex 
 423         so_cache_mtx_attr 
= lck_attr_alloc_init(); 
 425         /* cached sockets mutex */ 
 426         so_cache_mtx 
= lck_mtx_alloc_init(so_cache_mtx_grp
, so_cache_mtx_attr
); 
 427         if (so_cache_mtx 
== NULL
) { 
 428                 panic("%s: unable to allocate so_cache_mtx\n", __func__
); 
 431         STAILQ_INIT(&so_cache_head
); 
 433         so_cache_zone_element_size 
= (vm_size_t
)(sizeof (struct socket
) + 4 
 434             + get_inpcb_str_size() + 4 + get_tcp_str_size()); 
 436         so_cache_zone 
= zinit(so_cache_zone_element_size
, 
 437             (120000 * so_cache_zone_element_size
), 8192, "socache zone"); 
 438         zone_change(so_cache_zone
, Z_CALLERACCT
, FALSE
); 
 439         zone_change(so_cache_zone
, Z_NOENCRYPT
, TRUE
); 
 441         bzero(&soextbkidlestat
, sizeof(struct soextbkidlestat
)); 
 442         soextbkidlestat
.so_xbkidle_maxperproc 
= SO_IDLE_BK_IDLE_MAX_PER_PROC
; 
 443         soextbkidlestat
.so_xbkidle_time 
= SO_IDLE_BK_IDLE_TIME
; 
 444         soextbkidlestat
.so_xbkidle_rcvhiwat 
= SO_IDLE_BK_IDLE_RCV_HIWAT
; 
 448         socket_tclass_init(); 
 451 #endif /* MULTIPATH */ 
 455 cached_sock_alloc(struct socket 
**so
, int waitok
) 
 460         lck_mtx_lock(so_cache_mtx
); 
 462         if (!STAILQ_EMPTY(&so_cache_head
)) { 
 463                 VERIFY(cached_sock_count 
> 0); 
 465                 *so 
= STAILQ_FIRST(&so_cache_head
); 
 466                 STAILQ_REMOVE_HEAD(&so_cache_head
, so_cache_ent
); 
 467                 STAILQ_NEXT((*so
), so_cache_ent
) = NULL
; 
 470                 lck_mtx_unlock(so_cache_mtx
); 
 472                 temp 
= (*so
)->so_saved_pcb
; 
 473                 bzero((caddr_t
)*so
, sizeof (struct socket
)); 
 475                 (*so
)->so_saved_pcb 
= temp
; 
 478                 lck_mtx_unlock(so_cache_mtx
); 
 481                         *so 
= (struct socket 
*)zalloc(so_cache_zone
); 
 483                         *so 
= (struct socket 
*)zalloc_noblock(so_cache_zone
); 
 488                 bzero((caddr_t
)*so
, sizeof (struct socket
)); 
 491                  * Define offsets for extra structures into our 
 492                  * single block of memory. Align extra structures 
 493                  * on longword boundaries. 
 496                 offset 
= (uintptr_t)*so
; 
 497                 offset 
+= sizeof (struct socket
); 
 499                 offset 
= ALIGN(offset
); 
 501                 (*so
)->so_saved_pcb 
= (caddr_t
)offset
; 
 502                 offset 
+= get_inpcb_str_size(); 
 504                 offset 
= ALIGN(offset
); 
 506                 ((struct inpcb 
*)(void *)(*so
)->so_saved_pcb
)->inp_saved_ppcb 
= 
 510         OSBitOrAtomic(SOF1_CACHED_IN_SOCK_LAYER
, &(*so
)->so_flags1
); 
 514 cached_sock_free(struct socket 
*so
) 
 517         lck_mtx_lock(so_cache_mtx
); 
 519         so_cache_time 
= net_uptime(); 
 520         if (++cached_sock_count 
> max_cached_sock_count
) { 
 522                 lck_mtx_unlock(so_cache_mtx
); 
 523                 zfree(so_cache_zone
, so
); 
 525                 if (so_cache_hw 
< cached_sock_count
) 
 526                         so_cache_hw 
= cached_sock_count
; 
 528                 STAILQ_INSERT_TAIL(&so_cache_head
, so
, so_cache_ent
); 
 530                 so
->cache_timestamp 
= so_cache_time
; 
 531                 lck_mtx_unlock(so_cache_mtx
); 
 536 so_update_last_owner_locked(struct socket 
*so
, proc_t self
) 
 538         if (so
->last_pid 
!= 0) { 
 540                  * last_pid and last_upid should remain zero for sockets 
 541                  * created using sock_socket. The check above achieves that 
 543                 if (self 
== PROC_NULL
) 
 544                         self 
= current_proc(); 
 546                 if (so
->last_upid 
!= proc_uniqueid(self
) || 
 547                     so
->last_pid 
!= proc_pid(self
)) { 
 548                         so
->last_upid 
= proc_uniqueid(self
); 
 549                         so
->last_pid 
= proc_pid(self
); 
 550                         proc_getexecutableuuid(self
, so
->last_uuid
, 
 551                             sizeof (so
->last_uuid
)); 
 553                 proc_pidoriginatoruuid(so
->so_vuuid
, sizeof(so
->so_vuuid
)); 
 558 so_update_policy(struct socket 
*so
) 
 560         if (SOCK_DOM(so
) == PF_INET 
|| SOCK_DOM(so
) == PF_INET6
) 
 561                 (void) inp_update_policy(sotoinpcb(so
)); 
 566 so_update_necp_policy(struct socket 
*so
, struct sockaddr 
*override_local_addr
, 
 567     struct sockaddr 
*override_remote_addr
) 
 569         if (SOCK_DOM(so
) == PF_INET 
|| SOCK_DOM(so
) == PF_INET6
) 
 570                 inp_update_necp_policy(sotoinpcb(so
), override_local_addr
, 
 571                     override_remote_addr
, 0); 
 580         boolean_t rc 
= FALSE
; 
 582         lck_mtx_lock(so_cache_mtx
); 
 584         so_cache_time 
= net_uptime(); 
 586         while (!STAILQ_EMPTY(&so_cache_head
)) { 
 587                 VERIFY(cached_sock_count 
> 0); 
 588                 p 
= STAILQ_FIRST(&so_cache_head
); 
 589                 if ((so_cache_time 
- p
->cache_timestamp
) < 
 593                 STAILQ_REMOVE_HEAD(&so_cache_head
, so_cache_ent
); 
 596                 zfree(so_cache_zone
, p
); 
 598                 if (++n_freed 
>= SO_CACHE_MAX_FREE_BATCH
) { 
 599                         so_cache_max_freed
++; 
 604         /* Schedule again if there is more to cleanup */ 
 605         if (!STAILQ_EMPTY(&so_cache_head
)) 
 608         lck_mtx_unlock(so_cache_mtx
); 
 613  * Get a socket structure from our zone, and initialize it. 
 614  * We don't implement `waitok' yet (see comments in uipc_domain.c). 
 615  * Note that it would probably be better to allocate socket 
 616  * and PCB at the same time, but I'm not convinced that all 
 617  * the protocols can be easily modified to do this. 
 620 soalloc(int waitok
, int dom
, int type
) 
 624         if ((dom 
== PF_INET
) && (type 
== SOCK_STREAM
)) { 
 625                 cached_sock_alloc(&so
, waitok
); 
 627                 MALLOC_ZONE(so
, struct socket 
*, sizeof (*so
), socket_zone
, 
 630                         bzero(so
, sizeof (*so
)); 
 633                 so
->so_gencnt 
= OSIncrementAtomic64((SInt64 
*)&so_gencnt
); 
 634                 so
->so_zone 
= socket_zone
; 
 637                  * Increment the socket allocation statistics 
 639                 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_alloc_total
); 
 641 #if CONFIG_MACF_SOCKET 
 642                 /* Convert waitok to  M_WAITOK/M_NOWAIT for MAC Framework. */ 
 643                 if (mac_socket_label_init(so
, !waitok
) != 0) { 
 647 #endif /* MAC_SOCKET */ 
 654 socreate_internal(int dom
, struct socket 
**aso
, int type
, int proto
, 
 655     struct proc 
*p
, uint32_t flags
, struct proc 
*ep
) 
 662         extern int tcpconsdebug
; 
 669                 prp 
= pffindproto(dom
, proto
, type
); 
 671                 prp 
= pffindtype(dom
, type
); 
 673         if (prp 
== NULL 
|| prp
->pr_usrreqs
->pru_attach 
== NULL
) { 
 674                 if (pffinddomain(dom
) == NULL
) 
 675                         return (EAFNOSUPPORT
); 
 677                         if (pffindprotonotype(dom
, proto
) != NULL
) 
 680                 return (EPROTONOSUPPORT
); 
 682         if (prp
->pr_type 
!= type
) 
 684         so 
= soalloc(1, dom
, type
); 
 690                         INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_domain_local_total
); 
 693                         INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_domain_inet_total
); 
 694                         if (type 
== SOCK_STREAM
) { 
 695                                 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_inet_stream_total
); 
 697                                 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_inet_dgram_total
); 
 701                         INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_domain_route_total
); 
 704                         INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_domain_ndrv_total
); 
 707                         INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_domain_key_total
); 
 710                         INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_domain_inet6_total
); 
 711                         if (type 
== SOCK_STREAM
) { 
 712                                 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_inet6_stream_total
); 
 714                                 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_inet6_dgram_total
); 
 718                         INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_domain_system_total
); 
 721                         INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_domain_multipath_total
); 
 724                         INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_domain_other_total
); 
 728         if (flags 
& SOCF_ASYNC
) 
 729                 so
->so_state 
|= SS_NBIO
; 
 731         TAILQ_INIT(&so
->so_incomp
); 
 732         TAILQ_INIT(&so
->so_comp
); 
 734         so
->last_upid 
= proc_uniqueid(p
); 
 735         so
->last_pid 
= proc_pid(p
); 
 736         proc_getexecutableuuid(p
, so
->last_uuid
, sizeof (so
->last_uuid
)); 
 737         proc_pidoriginatoruuid(so
->so_vuuid
, sizeof(so
->so_vuuid
)); 
 739         if (ep 
!= PROC_NULL 
&& ep 
!= p
) { 
 740                 so
->e_upid 
= proc_uniqueid(ep
); 
 741                 so
->e_pid 
= proc_pid(ep
); 
 742                 proc_getexecutableuuid(ep
, so
->e_uuid
, sizeof (so
->e_uuid
)); 
 743                 so
->so_flags 
|= SOF_DELEGATED
; 
 746         so
->so_cred 
= kauth_cred_proc_ref(p
); 
 747         if (!suser(kauth_cred_get(), NULL
)) 
 748                 so
->so_state 
|= SS_PRIV
; 
 751         so
->so_rcv
.sb_flags 
|= SB_RECV
; 
 752         so
->so_rcv
.sb_so 
= so
->so_snd
.sb_so 
= so
; 
 753         so
->next_lock_lr 
= 0; 
 754         so
->next_unlock_lr 
= 0; 
 756 #if CONFIG_MACF_SOCKET 
 757         mac_socket_label_associate(kauth_cred_get(), so
); 
 758 #endif /* MAC_SOCKET */ 
 761          * Attachment will create the per pcb lock if necessary and 
 762          * increase refcount for creation, make sure it's done before 
 763          * socket is inserted in lists. 
 767         error 
= (*prp
->pr_usrreqs
->pru_attach
)(so
, proto
, p
); 
 771                  * If so_pcb is not zero, the socket will be leaked, 
 772                  * so protocol attachment handler must be coded carefuly 
 774                 so
->so_state 
|= SS_NOFDREF
; 
 775                 VERIFY(so
->so_usecount 
> 0); 
 777                 sofreelastref(so
, 1);   /* will deallocate the socket */ 
 781         atomic_add_32(&prp
->pr_domain
->dom_refs
, 1); 
 782         TAILQ_INIT(&so
->so_evlist
); 
 784         /* Attach socket filters for this protocol */ 
 787         if (tcpconsdebug 
== 2) 
 788                 so
->so_options 
|= SO_DEBUG
; 
 790         so_set_default_traffic_class(so
); 
 793          * If this thread or task is marked to create backgrounded sockets, 
 794          * mark the socket as background. 
 796         if (proc_get_effective_thread_policy(current_thread(), 
 797             TASK_POLICY_NEW_SOCKETS_BG
)) { 
 798                 socket_set_traffic_mgt_flags(so
, TRAFFIC_MGT_SO_BACKGROUND
); 
 799                 so
->so_background_thread 
= current_thread(); 
 804          * Don't mark Unix domain, system or multipath sockets as 
 805          * eligible for defunct by default. 
 810                 so
->so_flags 
|= SOF_NODEFUNCT
; 
 817          * Entitlements can't be checked at socket creation time except if the 
 818          * application requested a feature guarded by a privilege (c.f., socket 
 820          * The priv(9) and the Sandboxing APIs are designed with the idea that 
 821          * a privilege check should only be triggered by a userland request. 
 822          * A privilege check at socket creation time is time consuming and 
 823          * could trigger many authorisation error messages from the security 
 838  *      <pru_attach>:ENOBUFS[AF_UNIX] 
 839  *      <pru_attach>:ENOBUFS[TCP] 
 840  *      <pru_attach>:ENOMEM[TCP] 
 841  *      <pru_attach>:???                [other protocol families, IPSEC] 
 844 socreate(int dom
, struct socket 
**aso
, int type
, int proto
) 
 846         return (socreate_internal(dom
, aso
, type
, proto
, current_proc(), 0, 
 851 socreate_delegate(int dom
, struct socket 
**aso
, int type
, int proto
, pid_t epid
) 
 854         struct proc 
*ep 
= PROC_NULL
; 
 856         if ((proc_selfpid() != epid
) && ((ep 
= proc_find(epid
)) == PROC_NULL
)) { 
 861         error 
= socreate_internal(dom
, aso
, type
, proto
, current_proc(), 0, ep
); 
 864          * It might not be wise to hold the proc reference when calling 
 865          * socreate_internal since it calls soalloc with M_WAITOK 
 876  *      <pru_bind>:EINVAL               Invalid argument [COMMON_START] 
 877  *      <pru_bind>:EAFNOSUPPORT         Address family not supported 
 878  *      <pru_bind>:EADDRNOTAVAIL        Address not available. 
 879  *      <pru_bind>:EINVAL               Invalid argument 
 880  *      <pru_bind>:EAFNOSUPPORT         Address family not supported [notdef] 
 881  *      <pru_bind>:EACCES               Permission denied 
 882  *      <pru_bind>:EADDRINUSE           Address in use 
 883  *      <pru_bind>:EAGAIN               Resource unavailable, try again 
 884  *      <pru_bind>:EPERM                Operation not permitted 
 888  * Notes:       It's not possible to fully enumerate the return codes above, 
 889  *              since socket filter authors and protocol family authors may 
 890  *              not choose to limit their error returns to those listed, even 
 891  *              though this may result in some software operating incorrectly. 
 893  *              The error codes which are enumerated above are those known to 
 894  *              be returned by the tcp_usr_bind function supplied. 
 897 sobindlock(struct socket 
*so
, struct sockaddr 
*nam
, int dolock
) 
 899         struct proc 
*p 
= current_proc(); 
 905         so_update_last_owner_locked(so
, p
); 
 906         so_update_policy(so
); 
 909         so_update_necp_policy(so
, nam
, NULL
); 
 913          * If this is a bind request on a socket that has been marked 
 914          * as inactive, reject it now before we go any further. 
 916         if (so
->so_flags 
& SOF_DEFUNCT
) { 
 918                 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] (%d)\n", 
 919                     __func__
, proc_pid(p
), proc_best_name(p
), 
 920                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
 921                     SOCK_DOM(so
), SOCK_TYPE(so
), error
); 
 926         error 
= sflt_bind(so
, nam
); 
 929                 error 
= (*so
->so_proto
->pr_usrreqs
->pru_bind
)(so
, nam
, p
); 
 932                 socket_unlock(so
, 1); 
 934         if (error 
== EJUSTRETURN
) 
 941 sodealloc(struct socket 
*so
) 
 943         kauth_cred_unref(&so
->so_cred
); 
 945         /* Remove any filters */ 
 949         cfil_sock_detach(so
); 
 950 #endif /* CONTENT_FILTER */ 
 952         /* Delete the state allocated for msg queues on a socket */ 
 953         if (so
->so_flags 
& SOF_ENABLE_MSGS
) { 
 954                 FREE(so
->so_msg_state
, M_TEMP
); 
 955                 so
->so_msg_state 
= NULL
; 
 957         VERIFY(so
->so_msg_state 
== NULL
); 
 959         so
->so_gencnt 
= OSIncrementAtomic64((SInt64 
*)&so_gencnt
); 
 961 #if CONFIG_MACF_SOCKET 
 962         mac_socket_label_destroy(so
); 
 963 #endif /* MAC_SOCKET */ 
 965         if (so
->so_flags1 
& SOF1_CACHED_IN_SOCK_LAYER
) { 
 966                 cached_sock_free(so
); 
 968                 FREE_ZONE(so
, sizeof (*so
), so
->so_zone
); 
 976  *      <pru_listen>:EINVAL[AF_UNIX] 
 977  *      <pru_listen>:EINVAL[TCP] 
 978  *      <pru_listen>:EADDRNOTAVAIL[TCP] Address not available. 
 979  *      <pru_listen>:EINVAL[TCP]        Invalid argument 
 980  *      <pru_listen>:EAFNOSUPPORT[TCP]  Address family not supported [notdef] 
 981  *      <pru_listen>:EACCES[TCP]        Permission denied 
 982  *      <pru_listen>:EADDRINUSE[TCP]    Address in use 
 983  *      <pru_listen>:EAGAIN[TCP]        Resource unavailable, try again 
 984  *      <pru_listen>:EPERM[TCP]         Operation not permitted 
 987  * Notes:       Other <pru_listen> returns depend on the protocol family; all 
 988  *              <sf_listen> returns depend on what the filter author causes 
 989  *              their filter to return. 
 992 solisten(struct socket 
*so
, int backlog
) 
 994         struct proc 
*p 
= current_proc(); 
 999         so_update_last_owner_locked(so
, p
); 
1000         so_update_policy(so
); 
1003         so_update_necp_policy(so
, NULL
, NULL
); 
1006         if (so
->so_proto 
== NULL
) { 
1010         if ((so
->so_proto
->pr_flags 
& PR_CONNREQUIRED
) == 0) { 
1016          * If the listen request is made on a socket that is not fully 
1017          * disconnected, or on a socket that has been marked as inactive, 
1018          * reject the request now. 
1021             (SS_ISCONNECTED
|SS_ISCONNECTING
|SS_ISDISCONNECTING
)) || 
1022             (so
->so_flags 
& SOF_DEFUNCT
)) { 
1024                 if (so
->so_flags 
& SOF_DEFUNCT
) { 
1025                         SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] " 
1026                             "(%d)\n", __func__
, proc_pid(p
), 
1028                             (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
1029                             SOCK_DOM(so
), SOCK_TYPE(so
), error
); 
1034         if ((so
->so_restrictions 
& SO_RESTRICT_DENY_IN
) != 0) { 
1039         error 
= sflt_listen(so
); 
1041                 error 
= (*so
->so_proto
->pr_usrreqs
->pru_listen
)(so
, p
); 
1044                 if (error 
== EJUSTRETURN
) 
1049         if (TAILQ_EMPTY(&so
->so_comp
)) 
1050                 so
->so_options 
|= SO_ACCEPTCONN
; 
1052          * POSIX: The implementation may have an upper limit on the length of 
1053          * the listen queue-either global or per accepting socket. If backlog 
1054          * exceeds this limit, the length of the listen queue is set to the 
1057          * If listen() is called with a backlog argument value that is less 
1058          * than 0, the function behaves as if it had been called with a backlog 
1059          * argument value of 0. 
1061          * A backlog argument of 0 may allow the socket to accept connections, 
1062          * in which case the length of the listen queue may be set to an 
1063          * implementation-defined minimum value. 
1065         if (backlog 
<= 0 || backlog 
> somaxconn
) 
1066                 backlog 
= somaxconn
; 
1068         so
->so_qlimit 
= backlog
; 
1070         socket_unlock(so
, 1); 
1075  * The "accept list lock" protects the fields related to the listener queues 
1076  * because we can unlock a socket to respect the lock ordering between 
1077  * the listener socket and its clients sockets. The lock ordering is first to 
1078  * acquire the client socket before the listener socket. 
1080  * The accept list lock serializes access to the following fields: 
1081  * - of the listener socket: 
1086  * - of client sockets that are in so_comp or so_incomp: 
1090  * As one can see the accept list lock protects the consistent of the 
1091  * linkage of the client sockets. 
1093  * Note that those fields may be read without holding the accept list lock 
1094  * for a preflight provided the accept list lock is taken when committing 
1095  * to take an action based on the result of the preflight. The preflight 
1096  * saves the cost of doing the unlock/lock dance. 
1099 so_acquire_accept_list(struct socket 
*head
, struct socket 
*so
) 
1101         lck_mtx_t 
*mutex_held
; 
1103         if (head
->so_proto
->pr_getlock 
== NULL
) { 
1106         mutex_held 
= (*head
->so_proto
->pr_getlock
)(head
, PR_F_WILLUNLOCK
); 
1107         LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
); 
1109         if (!(head
->so_flags1 
& SOF1_ACCEPT_LIST_HELD
)) { 
1110                 head
->so_flags1 
|= SOF1_ACCEPT_LIST_HELD
; 
1114                 socket_unlock(so
, 0); 
1116         while (head
->so_flags1 
& SOF1_ACCEPT_LIST_HELD
) { 
1117                 so_accept_list_waits 
+= 1; 
1118                 msleep((caddr_t
)&head
->so_incomp
, mutex_held
, 
1119                     PSOCK 
| PCATCH
, __func__
, NULL
); 
1121         head
->so_flags1 
|= SOF1_ACCEPT_LIST_HELD
; 
1123                 socket_unlock(head
, 0); 
1125                 socket_lock(head
, 0); 
1130 so_release_accept_list(struct socket 
*head
) 
1132         if (head
->so_proto
->pr_getlock 
!= NULL
) { 
1133                 lck_mtx_t 
*mutex_held
; 
1135                 mutex_held 
= (*head
->so_proto
->pr_getlock
)(head
, 0); 
1136                 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
); 
1138                 head
->so_flags1 
&= ~SOF1_ACCEPT_LIST_HELD
; 
1139                 wakeup((caddr_t
)&head
->so_incomp
); 
1144 sofreelastref(struct socket 
*so
, int dealloc
) 
1146         struct socket 
*head 
= so
->so_head
; 
1148         /* Assume socket is locked */ 
1150         if (!(so
->so_flags 
& SOF_PCBCLEARING
) || !(so
->so_state 
& SS_NOFDREF
)) { 
1151                 selthreadclear(&so
->so_snd
.sb_sel
); 
1152                 selthreadclear(&so
->so_rcv
.sb_sel
); 
1153                 so
->so_rcv
.sb_flags 
&= ~(SB_SEL
|SB_UPCALL
); 
1154                 so
->so_snd
.sb_flags 
&= ~(SB_SEL
|SB_UPCALL
); 
1155                 so
->so_event 
= sonullevent
; 
1160                  * Need to lock the listener when the protocol has 
1163                 if (head
->so_proto
->pr_getlock 
!= NULL
) { 
1164                         socket_lock(head
, 1); 
1165                         so_acquire_accept_list(head
, so
); 
1167                 if (so
->so_state 
& SS_INCOMP
) { 
1168                         so
->so_state 
&= ~SS_INCOMP
; 
1169                         TAILQ_REMOVE(&head
->so_incomp
, so
, so_list
); 
1174                         if (head
->so_proto
->pr_getlock 
!= NULL
) { 
1175                                 so_release_accept_list(head
); 
1176                                 socket_unlock(head
, 1); 
1178                 } else if (so
->so_state 
& SS_COMP
) { 
1179                         if (head
->so_proto
->pr_getlock 
!= NULL
) { 
1180                                 so_release_accept_list(head
); 
1181                                 socket_unlock(head
, 1); 
1184                          * We must not decommission a socket that's 
1185                          * on the accept(2) queue.  If we do, then 
1186                          * accept(2) may hang after select(2) indicated 
1187                          * that the listening socket was ready. 
1189                         selthreadclear(&so
->so_snd
.sb_sel
); 
1190                         selthreadclear(&so
->so_rcv
.sb_sel
); 
1191                         so
->so_rcv
.sb_flags 
&= ~(SB_SEL
|SB_UPCALL
); 
1192                         so
->so_snd
.sb_flags 
&= ~(SB_SEL
|SB_UPCALL
); 
1193                         so
->so_event 
= sonullevent
; 
1196                         if (head
->so_proto
->pr_getlock 
!= NULL
) { 
1197                                 so_release_accept_list(head
); 
1198                         socket_unlock(head
, 1); 
1200                         printf("sofree: not queued\n"); 
1207         if (so
->so_flags 
& SOF_FLOW_DIVERT
) { 
1208                 flow_divert_detach(so
); 
1210 #endif  /* FLOW_DIVERT */ 
1212         /* 3932268: disable upcall */ 
1213         so
->so_rcv
.sb_flags 
&= ~SB_UPCALL
; 
1214         so
->so_snd
.sb_flags 
&= ~(SB_UPCALL
|SB_SNDBYTE_CNT
); 
1215         so
->so_event 
= sonullevent
; 
1222 soclose_wait_locked(struct socket 
*so
) 
1224         lck_mtx_t 
*mutex_held
; 
1226         if (so
->so_proto
->pr_getlock 
!= NULL
) 
1227                 mutex_held 
= (*so
->so_proto
->pr_getlock
)(so
, PR_F_WILLUNLOCK
); 
1229                 mutex_held 
= so
->so_proto
->pr_domain
->dom_mtx
; 
1230         LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
); 
1233          * Double check here and return if there's no outstanding upcall; 
1234          * otherwise proceed further only if SOF_UPCALLCLOSEWAIT is set. 
1236         if (!so
->so_upcallusecount 
|| !(so
->so_flags 
& SOF_UPCALLCLOSEWAIT
)) 
1238         so
->so_rcv
.sb_flags 
&= ~SB_UPCALL
; 
1239         so
->so_snd
.sb_flags 
&= ~SB_UPCALL
; 
1240         so
->so_flags 
|= SOF_CLOSEWAIT
; 
1242         (void) msleep((caddr_t
)&so
->so_upcallusecount
, mutex_held
, (PZERO 
- 1), 
1243             "soclose_wait_locked", NULL
); 
1244         LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
); 
1245         so
->so_flags 
&= ~SOF_CLOSEWAIT
; 
1249  * Close a socket on last file table reference removal. 
1250  * Initiate disconnect if connected. 
1251  * Free socket when disconnect complete. 
1254 soclose_locked(struct socket 
*so
) 
1259         if (so
->so_usecount 
== 0) { 
1260                 panic("soclose: so=%p refcount=0\n", so
); 
1264         sflt_notify(so
, sock_evt_closing
, NULL
); 
1266         if (so
->so_upcallusecount
) 
1267                 soclose_wait_locked(so
); 
1271          * We have to wait until the content filters are done 
1273         if ((so
->so_flags 
& SOF_CONTENT_FILTER
) != 0) { 
1274                 cfil_sock_close_wait(so
); 
1275                 cfil_sock_is_closed(so
); 
1276                 cfil_sock_detach(so
); 
1278 #endif /* CONTENT_FILTER */ 
1280         if (so
->so_flags1 
& SOF1_EXTEND_BK_IDLE_INPROG
) { 
1281                 soresume(current_proc(), so
, 1); 
1282                 so
->so_flags1 
&= ~SOF1_EXTEND_BK_IDLE_WANTED
; 
1285         if ((so
->so_options 
& SO_ACCEPTCONN
)) { 
1286                 struct socket 
*sp
, *sonext
; 
1287                 int persocklock 
= 0; 
1288                 int incomp_overflow_only
; 
1291                  * We do not want new connection to be added 
1292                  * to the connection queues 
1294                 so
->so_options 
&= ~SO_ACCEPTCONN
; 
1297                  * We can drop the lock on the listener once 
1298                  * we've acquired the incoming list 
1300                 if (so
->so_proto
->pr_getlock 
!= NULL
) { 
1302                         so_acquire_accept_list(so
, NULL
); 
1303                         socket_unlock(so
, 0); 
1306                 incomp_overflow_only 
= 1; 
1308                 TAILQ_FOREACH_SAFE(sp
, &so
->so_incomp
, so_list
, sonext
) { 
1311                          * skip sockets thrown away by tcpdropdropblreq 
1312                          * they will get cleanup by the garbage collection. 
1313                          * otherwise, remove the incomp socket from the queue 
1314                          * and let soabort trigger the appropriate cleanup. 
1316                         if (sp
->so_flags 
& SOF_OVERFLOW
) 
1319                         if (persocklock 
!= 0) 
1324                          * The extra reference for the list insure the 
1325                          * validity of the socket pointer when we perform the 
1326                          * unlock of the head above 
1328                         if (sp
->so_state 
& SS_INCOMP
) { 
1329                                 sp
->so_state 
&= ~SS_INCOMP
; 
1331                                 TAILQ_REMOVE(&so
->so_incomp
, sp
, so_list
); 
1337                                 panic("%s sp %p in so_incomp but !SS_INCOMP", 
1341                         if (persocklock 
!= 0) 
1342                                 socket_unlock(sp
, 1); 
1345                 TAILQ_FOREACH_SAFE(sp
, &so
->so_comp
, so_list
, sonext
) { 
1346                         /* Dequeue from so_comp since sofree() won't do it */ 
1347                         if (persocklock 
!= 0) 
1350                         if (sp
->so_state 
& SS_COMP
) { 
1351                                 sp
->so_state 
&= ~SS_COMP
; 
1353                                 TAILQ_REMOVE(&so
->so_comp
, sp
, so_list
); 
1358                                 panic("%s sp %p in so_comp but !SS_COMP", 
1363                                 socket_unlock(sp
, 1); 
1366                 if (incomp_overflow_only 
== 0 && !TAILQ_EMPTY(&so
->so_incomp
)) { 
1367 #if (DEBUG|DEVELOPMENT) 
1368                         panic("%s head %p so_comp not empty\n", __func__
, so
); 
1369 #endif /* (DEVELOPMENT || DEBUG) */ 
1374                 if (!TAILQ_EMPTY(&so
->so_comp
)) { 
1375 #if (DEBUG|DEVELOPMENT) 
1376                         panic("%s head %p so_comp not empty\n", __func__
, so
); 
1377 #endif /* (DEVELOPMENT || DEBUG) */ 
1384                         so_release_accept_list(so
); 
1387         if (so
->so_pcb 
== NULL
) { 
1388                 /* 3915887: mark the socket as ready for dealloc */ 
1389                 so
->so_flags 
|= SOF_PCBCLEARING
; 
1392         if (so
->so_state 
& SS_ISCONNECTED
) { 
1393                 if ((so
->so_state 
& SS_ISDISCONNECTING
) == 0) { 
1394                         error 
= sodisconnectlocked(so
); 
1398                 if (so
->so_options 
& SO_LINGER
) { 
1399                         lck_mtx_t 
*mutex_held
; 
1401                         if ((so
->so_state 
& SS_ISDISCONNECTING
) && 
1402                             (so
->so_state 
& SS_NBIO
)) 
1404                         if (so
->so_proto
->pr_getlock 
!= NULL
) 
1405                                 mutex_held 
= (*so
->so_proto
->pr_getlock
)(so
, PR_F_WILLUNLOCK
); 
1407                                 mutex_held 
= so
->so_proto
->pr_domain
->dom_mtx
; 
1408                         while (so
->so_state 
& SS_ISCONNECTED
) { 
1409                                 ts
.tv_sec 
= (so
->so_linger
/100); 
1410                                 ts
.tv_nsec 
= (so
->so_linger 
% 100) * 
1411                                     NSEC_PER_USEC 
* 1000 * 10; 
1412                                 error 
= msleep((caddr_t
)&so
->so_timeo
, 
1413                                     mutex_held
, PSOCK 
| PCATCH
, "soclose", &ts
); 
1416                                          * It's OK when the time fires, 
1417                                          * don't report an error 
1419                                         if (error 
== EWOULDBLOCK
) 
1427         if (so
->so_usecount 
== 0) { 
1428                 panic("soclose: usecount is zero so=%p\n", so
); 
1431         if (so
->so_pcb 
!= NULL 
&& !(so
->so_flags 
& SOF_PCBCLEARING
)) { 
1432                 int error2 
= (*so
->so_proto
->pr_usrreqs
->pru_detach
)(so
); 
1436         if (so
->so_usecount 
<= 0) { 
1437                 panic("soclose: usecount is zero so=%p\n", so
); 
1441         if (so
->so_pcb 
!= NULL 
&& !(so
->so_flags 
& SOF_MP_SUBFLOW
) && 
1442             (so
->so_state 
& SS_NOFDREF
)) { 
1443                 panic("soclose: NOFDREF"); 
1446         so
->so_state 
|= SS_NOFDREF
; 
1448         if ((so
->so_flags 
& SOF_KNOTE
) != 0) 
1449                 KNOTE(&so
->so_klist
, SO_FILT_HINT_LOCKED
); 
1451         atomic_add_32(&so
->so_proto
->pr_domain
->dom_refs
, -1); 
1454         VERIFY(so
->so_usecount 
> 0); 
1461 soclose(struct socket 
*so
) 
1466         if (so
->so_retaincnt 
== 0) { 
1467                 error 
= soclose_locked(so
); 
1470                  * if the FD is going away, but socket is 
1471                  * retained in kernel remove its reference 
1474                 if (so
->so_usecount 
< 2) 
1475                         panic("soclose: retaincnt non null and so=%p " 
1476                             "usecount=%d\n", so
, so
->so_usecount
); 
1478         socket_unlock(so
, 1); 
1483  * Must be called at splnet... 
1485 /* Should already be locked */ 
1487 soabort(struct socket 
*so
) 
1491 #ifdef MORE_LOCKING_DEBUG 
1492         lck_mtx_t 
*mutex_held
; 
1494         if (so
->so_proto
->pr_getlock 
!= NULL
) 
1495                 mutex_held 
= (*so
->so_proto
->pr_getlock
)(so
, 0); 
1497                 mutex_held 
= so
->so_proto
->pr_domain
->dom_mtx
; 
1498         LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
); 
1501         if ((so
->so_flags 
& SOF_ABORTED
) == 0) { 
1502                 so
->so_flags 
|= SOF_ABORTED
; 
1503                 error 
= (*so
->so_proto
->pr_usrreqs
->pru_abort
)(so
); 
1513 soacceptlock(struct socket 
*so
, struct sockaddr 
**nam
, int dolock
) 
1520         so_update_last_owner_locked(so
, PROC_NULL
); 
1521         so_update_policy(so
); 
1523         so_update_necp_policy(so
, NULL
, NULL
); 
1526         if ((so
->so_state 
& SS_NOFDREF
) == 0) 
1527                 panic("soaccept: !NOFDREF"); 
1528         so
->so_state 
&= ~SS_NOFDREF
; 
1529         error 
= (*so
->so_proto
->pr_usrreqs
->pru_accept
)(so
, nam
); 
1532                 socket_unlock(so
, 1); 
1537 soaccept(struct socket 
*so
, struct sockaddr 
**nam
) 
1539         return (soacceptlock(so
, nam
, 1)); 
1543 soacceptfilter(struct socket 
*so
, struct socket 
*head
) 
1545         struct sockaddr 
*local 
= NULL
, *remote 
= NULL
; 
1549          * Hold the lock even if this socket has not been made visible 
1550          * to the filter(s).  For sockets with global locks, this protects 
1551          * against the head or peer going away 
1554         if (sogetaddr_locked(so
, &remote
, 1) != 0 || 
1555             sogetaddr_locked(so
, &local
, 0) != 0) { 
1556                 so
->so_state 
&= ~SS_NOFDREF
; 
1557                 socket_unlock(so
, 1); 
1559                 /* Out of resources; try it again next time */ 
1560                 error 
= ECONNABORTED
; 
1564         error 
= sflt_accept(head
, so
, local
, remote
); 
1567          * If we get EJUSTRETURN from one of the filters, mark this socket 
1568          * as inactive and return it anyway.  This newly accepted socket 
1569          * will be disconnected later before we hand it off to the caller. 
1571         if (error 
== EJUSTRETURN
) { 
1573                 (void) sosetdefunct(current_proc(), so
, 
1574                     SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL
, FALSE
); 
1579                  * This may seem like a duplication to the above error 
1580                  * handling part when we return ECONNABORTED, except 
1581                  * the following is done while holding the lock since 
1582                  * the socket has been exposed to the filter(s) earlier. 
1584                 so
->so_state 
&= ~SS_NOFDREF
; 
1585                 socket_unlock(so
, 1); 
1587                 /* Propagate socket filter's error code to the caller */ 
1589                 socket_unlock(so
, 1); 
1592         /* Callee checks for NULL pointer */ 
1593         sock_freeaddr(remote
); 
1594         sock_freeaddr(local
); 
1599  * Returns:     0                       Success 
1600  *              EOPNOTSUPP              Operation not supported on socket 
1601  *              EISCONN                 Socket is connected 
1602  *      <pru_connect>:EADDRNOTAVAIL     Address not available. 
1603  *      <pru_connect>:EINVAL            Invalid argument 
1604  *      <pru_connect>:EAFNOSUPPORT      Address family not supported [notdef] 
1605  *      <pru_connect>:EACCES            Permission denied 
1606  *      <pru_connect>:EADDRINUSE        Address in use 
1607  *      <pru_connect>:EAGAIN            Resource unavailable, try again 
1608  *      <pru_connect>:EPERM             Operation not permitted 
1609  *      <sf_connect_out>:???            [anything a filter writer might set] 
1612 soconnectlock(struct socket 
*so
, struct sockaddr 
*nam
, int dolock
) 
1615         struct proc 
*p 
= current_proc(); 
1620         so_update_last_owner_locked(so
, p
); 
1621         so_update_policy(so
); 
1624         so_update_necp_policy(so
, NULL
, nam
); 
1628          * If this is a listening socket or if this is a previously-accepted 
1629          * socket that has been marked as inactive, reject the connect request. 
1631         if ((so
->so_options 
& SO_ACCEPTCONN
) || (so
->so_flags 
& SOF_DEFUNCT
)) { 
1633                 if (so
->so_flags 
& SOF_DEFUNCT
) { 
1634                         SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] " 
1635                             "(%d)\n", __func__
, proc_pid(p
), 
1637                             (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
1638                             SOCK_DOM(so
), SOCK_TYPE(so
), error
); 
1641                         socket_unlock(so
, 1); 
1645         if ((so
->so_restrictions 
& SO_RESTRICT_DENY_OUT
) != 0) { 
1647                         socket_unlock(so
, 1); 
1652          * If protocol is connection-based, can only connect once. 
1653          * Otherwise, if connected, try to disconnect first. 
1654          * This allows user to disconnect by connecting to, e.g., 
1657         if (so
->so_state 
& (SS_ISCONNECTED
|SS_ISCONNECTING
) && 
1658             ((so
->so_proto
->pr_flags 
& PR_CONNREQUIRED
) || 
1659             (error 
= sodisconnectlocked(so
)))) { 
1663                  * Run connect filter before calling protocol: 
1664                  *  - non-blocking connect returns before completion; 
1666                 error 
= sflt_connectout(so
, nam
); 
1668                         if (error 
== EJUSTRETURN
) 
1671                         error 
= (*so
->so_proto
->pr_usrreqs
->pru_connect
) 
1676                 socket_unlock(so
, 1); 
1681 soconnect(struct socket 
*so
, struct sockaddr 
*nam
) 
1683         return (soconnectlock(so
, nam
, 1)); 
1687  * Returns:     0                       Success 
1688  *      <pru_connect2>:EINVAL[AF_UNIX] 
1689  *      <pru_connect2>:EPROTOTYPE[AF_UNIX] 
1690  *      <pru_connect2>:???              [other protocol families] 
1692  * Notes:       <pru_connect2> is not supported by [TCP]. 
1695 soconnect2(struct socket 
*so1
, struct socket 
*so2
) 
1699         socket_lock(so1
, 1); 
1700         if (so2
->so_proto
->pr_lock
) 
1701                 socket_lock(so2
, 1); 
1703         error 
= (*so1
->so_proto
->pr_usrreqs
->pru_connect2
)(so1
, so2
); 
1705         socket_unlock(so1
, 1); 
1706         if (so2
->so_proto
->pr_lock
) 
1707                 socket_unlock(so2
, 1); 
1712 soconnectxlocked(struct socket 
*so
, struct sockaddr 
*src
, 
1713     struct sockaddr 
*dst
, struct proc 
*p
, uint32_t ifscope
, 
1714     sae_associd_t aid
, sae_connid_t 
*pcid
, uint32_t flags
, void *arg
, 
1715     uint32_t arglen
, uio_t auio
, user_ssize_t 
*bytes_written
) 
1719         so_update_last_owner_locked(so
, p
); 
1720         so_update_policy(so
); 
1723          * If this is a listening socket or if this is a previously-accepted 
1724          * socket that has been marked as inactive, reject the connect request. 
1726         if ((so
->so_options 
& SO_ACCEPTCONN
) || (so
->so_flags 
& SOF_DEFUNCT
)) { 
1728                 if (so
->so_flags 
& SOF_DEFUNCT
) { 
1729                         SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] " 
1730                             "(%d)\n", __func__
, proc_pid(p
), 
1732                             (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
1733                             SOCK_DOM(so
), SOCK_TYPE(so
), error
); 
1738         if ((so
->so_restrictions 
& SO_RESTRICT_DENY_OUT
) != 0) 
1742          * If protocol is connection-based, can only connect once 
1743          * unless PR_MULTICONN is set.  Otherwise, if connected, 
1744          * try to disconnect first.  This allows user to disconnect 
1745          * by connecting to, e.g., a null address. 
1747         if ((so
->so_state 
& (SS_ISCONNECTED
|SS_ISCONNECTING
)) && 
1748             !(so
->so_proto
->pr_flags 
& PR_MULTICONN
) && 
1749             ((so
->so_proto
->pr_flags 
& PR_CONNREQUIRED
) || 
1750             (error 
= sodisconnectlocked(so
)) != 0)) { 
1754                  * Run connect filter before calling protocol: 
1755                  *  - non-blocking connect returns before completion; 
1757                 error 
= sflt_connectout(so
, dst
); 
1759                         /* Disable PRECONNECT_DATA, as we don't need to send a SYN anymore. */ 
1760                         so
->so_flags1 
&= ~SOF1_PRECONNECT_DATA
; 
1761                         if (error 
== EJUSTRETURN
) 
1764                         error 
= (*so
->so_proto
->pr_usrreqs
->pru_connectx
) 
1765                             (so
, src
, dst
, p
, ifscope
, aid
, pcid
, 
1766                             flags
, arg
, arglen
, auio
, bytes_written
); 
1774 sodisconnectlocked(struct socket 
*so
) 
1778         if ((so
->so_state 
& SS_ISCONNECTED
) == 0) { 
1782         if (so
->so_state 
& SS_ISDISCONNECTING
) { 
1787         error 
= (*so
->so_proto
->pr_usrreqs
->pru_disconnect
)(so
); 
1789                 sflt_notify(so
, sock_evt_disconnected
, NULL
); 
1795 /* Locking version */ 
1797 sodisconnect(struct socket 
*so
) 
1802         error 
= sodisconnectlocked(so
); 
1803         socket_unlock(so
, 1); 
1808 sodisconnectxlocked(struct socket 
*so
, sae_associd_t aid
, sae_connid_t cid
) 
1813          * Call the protocol disconnectx handler; let it handle all 
1814          * matters related to the connection state of this session. 
1816         error 
= (*so
->so_proto
->pr_usrreqs
->pru_disconnectx
)(so
, aid
, cid
); 
1819                  * The event applies only for the session, not for 
1820                  * the disconnection of individual subflows. 
1822                 if (so
->so_state 
& (SS_ISDISCONNECTING
|SS_ISDISCONNECTED
)) 
1823                         sflt_notify(so
, sock_evt_disconnected
, NULL
); 
1829 sodisconnectx(struct socket 
*so
, sae_associd_t aid
, sae_connid_t cid
) 
1834         error 
= sodisconnectxlocked(so
, aid
, cid
); 
1835         socket_unlock(so
, 1); 
1839 #define SBLOCKWAIT(f)   (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT) 
1842  * sosendcheck will lock the socket buffer if it isn't locked and 
1843  * verify that there is space for the data being inserted. 
1845  * Returns:     0                       Success 
1847  *      sblock:EWOULDBLOCK 
1854 sosendcheck(struct socket 
*so
, struct sockaddr 
*addr
, user_ssize_t resid
, 
1855     int32_t clen
, int32_t atomic
, int flags
, int *sblocked
, 
1856     struct mbuf 
*control
) 
1863         if (*sblocked 
== 0) { 
1864                 if ((so
->so_snd
.sb_flags 
& SB_LOCK
) != 0 && 
1865                     so
->so_send_filt_thread 
!= 0 && 
1866                     so
->so_send_filt_thread 
== current_thread()) { 
1868                          * We're being called recursively from a filter, 
1869                          * allow this to continue. Radar 4150520. 
1870                          * Don't set sblocked because we don't want 
1871                          * to perform an unlock later. 
1875                         error 
= sblock(&so
->so_snd
, SBLOCKWAIT(flags
)); 
1877                                 if (so
->so_flags 
& SOF_DEFUNCT
) 
1886          * If a send attempt is made on a socket that has been marked 
1887          * as inactive (disconnected), reject the request. 
1889         if (so
->so_flags 
& SOF_DEFUNCT
) { 
1892                 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] (%d)\n", 
1893                     __func__
, proc_selfpid(), proc_best_name(current_proc()), 
1894                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
1895                     SOCK_DOM(so
), SOCK_TYPE(so
), error
); 
1899         if (so
->so_state 
& SS_CANTSENDMORE
) { 
1902                  * Can re-inject data of half closed connections 
1904                 if ((so
->so_state 
& SS_ISDISCONNECTED
) == 0 && 
1905                         so
->so_snd
.sb_cfil_thread 
== current_thread() && 
1906                         cfil_sock_data_pending(&so
->so_snd
) != 0) 
1908                                 "so %llx ignore SS_CANTSENDMORE", 
1909                                 (uint64_t)DEBUG_KERNEL_ADDRPERM(so
)); 
1911 #endif /* CONTENT_FILTER */ 
1915                 error 
= so
->so_error
; 
1920         if ((so
->so_state 
& SS_ISCONNECTED
) == 0) { 
1921                 if ((so
->so_proto
->pr_flags 
& PR_CONNREQUIRED
) != 0) { 
1922                         if (((so
->so_state 
& SS_ISCONFIRMING
) == 0) && 
1923                             (resid 
!= 0 || clen 
== 0) && 
1924                             !(so
->so_flags1 
& SOF1_PRECONNECT_DATA
)) 
1927                 } else if (addr 
== 0 && !(flags
&MSG_HOLD
)) { 
1928                         return ((so
->so_proto
->pr_flags 
& PR_CONNREQUIRED
) ? 
1929                             ENOTCONN 
: EDESTADDRREQ
); 
1933         if (so
->so_flags 
& SOF_ENABLE_MSGS
) 
1934                 space 
= msgq_sbspace(so
, control
); 
1936                 space 
= sbspace(&so
->so_snd
); 
1938         if (flags 
& MSG_OOB
) 
1940         if ((atomic 
&& resid 
> so
->so_snd
.sb_hiwat
) || 
1941             clen 
> so
->so_snd
.sb_hiwat
) 
1944         if ((space 
< resid 
+ clen 
&& 
1945             (atomic 
|| (space 
< (int32_t)so
->so_snd
.sb_lowat
) || 
1947             (so
->so_type 
== SOCK_STREAM 
&& so_wait_for_if_feedback(so
))) { 
1949                  * don't block the connectx call when there's more data 
1950                  * than can be copied. 
1952                 if (so
->so_flags1 
& SOF1_PRECONNECT_DATA
) { 
1954                                 return (EWOULDBLOCK
); 
1956                         if (space 
< (int32_t)so
->so_snd
.sb_lowat
) { 
1960                 if ((so
->so_state 
& SS_NBIO
) || (flags 
& MSG_NBIO
) || 
1962                         return (EWOULDBLOCK
); 
1964                 sbunlock(&so
->so_snd
, TRUE
);    /* keep socket locked */ 
1966                 error 
= sbwait(&so
->so_snd
); 
1968                         if (so
->so_flags 
& SOF_DEFUNCT
) 
1979  * If send must go all at once and message is larger than 
1980  * send buffering, then hard error. 
1981  * Lock against other senders. 
1982  * If must go all at once and not enough room now, then 
1983  * inform user that this would block and do nothing. 
1984  * Otherwise, if nonblocking, send as much as possible. 
1985  * The data to be sent is described by "uio" if nonzero, 
1986  * otherwise by the mbuf chain "top" (which must be null 
1987  * if uio is not).  Data provided in mbuf chain must be small 
1988  * enough to send all at once. 
1990  * Returns nonzero on error, timeout or signal; callers 
1991  * must check for short counts if EINTR/ERESTART are returned. 
1992  * Data and control buffers are freed on return. 
1994  * MSG_HOLD: go thru most of sosend(), but just enqueue the mbuf 
1995  * MSG_SEND: go thru as for MSG_HOLD on current fragment, then 
1996  *  point at the mbuf chain being constructed and go from there. 
1998  * Returns:     0                       Success 
2004  *      sosendcheck:EWOULDBLOCK 
2008  *      sosendcheck:???                 [value from so_error] 
2009  *      <pru_send>:ECONNRESET[TCP] 
2010  *      <pru_send>:EINVAL[TCP] 
2011  *      <pru_send>:ENOBUFS[TCP] 
2012  *      <pru_send>:EADDRINUSE[TCP] 
2013  *      <pru_send>:EADDRNOTAVAIL[TCP] 
2014  *      <pru_send>:EAFNOSUPPORT[TCP] 
2015  *      <pru_send>:EACCES[TCP] 
2016  *      <pru_send>:EAGAIN[TCP] 
2017  *      <pru_send>:EPERM[TCP] 
2018  *      <pru_send>:EMSGSIZE[TCP] 
2019  *      <pru_send>:EHOSTUNREACH[TCP] 
2020  *      <pru_send>:ENETUNREACH[TCP] 
2021  *      <pru_send>:ENETDOWN[TCP] 
2022  *      <pru_send>:ENOMEM[TCP] 
2023  *      <pru_send>:ENOBUFS[TCP] 
2024  *      <pru_send>:???[TCP]             [ignorable: mostly IPSEC/firewall/DLIL] 
2025  *      <pru_send>:EINVAL[AF_UNIX] 
2026  *      <pru_send>:EOPNOTSUPP[AF_UNIX] 
2027  *      <pru_send>:EPIPE[AF_UNIX] 
2028  *      <pru_send>:ENOTCONN[AF_UNIX] 
2029  *      <pru_send>:EISCONN[AF_UNIX] 
2030  *      <pru_send>:???[AF_UNIX]         [whatever a filter author chooses] 
2031  *      <sf_data_out>:???               [whatever a filter author chooses] 
2033  * Notes:       Other <pru_send> returns depend on the protocol family; all 
2034  *              <sf_data_out> returns depend on what the filter author causes 
2035  *              their filter to return. 
2038 sosend(struct socket 
*so
, struct sockaddr 
*addr
, struct uio 
*uio
, 
2039     struct mbuf 
*top
, struct mbuf 
*control
, int flags
) 
2042         struct mbuf 
*m
, *freelist 
= NULL
; 
2043         user_ssize_t space
, len
, resid
, orig_resid
; 
2044         int clen 
= 0, error
, dontroute
, mlen
, sendflags
; 
2045         int atomic 
= sosendallatonce(so
) || top
; 
2047         struct proc 
*p 
= current_proc(); 
2048         struct mbuf 
*control_copy 
= NULL
; 
2049         uint16_t headroom 
= 0; 
2050         boolean_t en_tracing 
= FALSE
; 
2053                 resid 
= uio_resid(uio
); 
2055                 resid 
= top
->m_pkthdr
.len
; 
2057         KERNEL_DEBUG((DBG_FNC_SOSEND 
| DBG_FUNC_START
), so
, resid
, 
2058             so
->so_snd
.sb_cc
, so
->so_snd
.sb_lowat
, so
->so_snd
.sb_hiwat
); 
2063          * trace if tracing & network (vs. unix) sockets & and 
2066         if (ENTR_SHOULDTRACE 
&& 
2067             (SOCK_CHECK_DOM(so
, AF_INET
) || SOCK_CHECK_DOM(so
, AF_INET6
))) { 
2068                 struct inpcb 
*inp 
= sotoinpcb(so
); 
2069                 if (inp
->inp_last_outifp 
!= NULL 
&& 
2070                     !(inp
->inp_last_outifp
->if_flags 
& IFF_LOOPBACK
)) { 
2072                         KERNEL_ENERGYTRACE(kEnTrActKernSockWrite
, DBG_FUNC_START
, 
2073                             VM_KERNEL_ADDRPERM(so
), 
2074                             ((so
->so_state 
& SS_NBIO
) ? kEnTrFlagNonBlocking 
: 0), 
2081          * Re-injection should not affect process accounting 
2083         if ((flags 
& MSG_SKIPCFIL
) == 0) { 
2084                 so_update_last_owner_locked(so
, p
); 
2085                 so_update_policy(so
); 
2088                 so_update_necp_policy(so
, NULL
, addr
); 
2092         if (so
->so_type 
!= SOCK_STREAM 
&& (flags 
& MSG_OOB
) != 0) { 
2098          * In theory resid should be unsigned. 
2099          * However, space must be signed, as it might be less than 0 
2100          * if we over-committed, and we must use a signed comparison 
2101          * of space and resid.  On the other hand, a negative resid 
2102          * causes us to loop sending 0-length segments to the protocol. 
2104          * Usually, MSG_EOR isn't used on SOCK_STREAM type sockets. 
2105          * But it will be used by sockets doing message delivery. 
2107          * Note: We limit resid to be a positive int value as we use 
2108          * imin() to set bytes_to_copy -- radr://14558484 
2110         if (resid 
< 0 || resid 
> INT_MAX 
|| (so
->so_type 
== SOCK_STREAM 
&& 
2111             !(so
->so_flags 
& SOF_ENABLE_MSGS
) && (flags 
& MSG_EOR
))) { 
2116         dontroute 
= (flags 
& MSG_DONTROUTE
) && 
2117             (so
->so_options 
& SO_DONTROUTE
) == 0 && 
2118             (so
->so_proto
->pr_flags 
& PR_ATOMIC
); 
2119         OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgsnd
); 
2121         if (control 
!= NULL
) 
2122                 clen 
= control
->m_len
; 
2124         if (soreserveheadroom 
!= 0) 
2125                 headroom 
= so
->so_pktheadroom
; 
2128                 error 
= sosendcheck(so
, addr
, resid
, clen
, atomic
, flags
, 
2129                     &sblocked
, control
); 
2134                 if (so
->so_flags 
& SOF_ENABLE_MSGS
) 
2135                         space 
= msgq_sbspace(so
, control
); 
2137                         space 
= sbspace(&so
->so_snd
) - clen
; 
2138                 space 
+= ((flags 
& MSG_OOB
) ? 1024 : 0); 
2143                                  * Data is prepackaged in "top". 
2146                                 if (flags 
& MSG_EOR
) 
2147                                         top
->m_flags 
|= M_EOR
; 
2155                                 bytes_to_copy 
= imin(resid
, space
); 
2157                                 bytes_to_alloc 
= bytes_to_copy
; 
2159                                         bytes_to_alloc 
+= headroom
; 
2161                                 if (sosendminchain 
> 0) 
2164                                         chainlength 
= sosendmaxchain
; 
2167                                  * Use big 4 KB cluster when the outgoing interface 
2168                                  * does not prefer 2 KB clusters 
2170                                 bigcl 
= !(so
->so_flags1 
& SOF1_IF_2KCL
) || 
2171                                     sosendbigcl_ignore_capab
; 
2174                                  * Attempt to use larger than system page-size 
2175                                  * clusters for large writes only if there is 
2176                                  * a jumbo cluster pool and if the socket is 
2177                                  * marked accordingly. 
2179                                 jumbocl 
= sosendjcl 
&& njcl 
> 0 && 
2180                                     ((so
->so_flags 
& SOF_MULTIPAGES
) || 
2181                                     sosendjcl_ignore_capab
) && 
2184                                 socket_unlock(so
, 0); 
2188                                         int hdrs_needed 
= (top 
== NULL
) ? 1 : 0; 
2191                                          * try to maintain a local cache of mbuf 
2192                                          * clusters needed to complete this 
2193                                          * write the list is further limited to 
2194                                          * the number that are currently needed 
2195                                          * to fill the socket this mechanism 
2196                                          * allows a large number of mbufs/ 
2197                                          * clusters to be grabbed under a single 
2198                                          * mbuf lock... if we can't get any 
2199                                          * clusters, than fall back to trying 
2200                                          * for mbufs if we fail early (or 
2201                                          * miscalcluate the number needed) make 
2202                                          * sure to release any clusters we 
2203                                          * haven't yet consumed. 
2205                                         if (freelist 
== NULL 
&& 
2206                                             bytes_to_alloc 
> MBIGCLBYTES 
&& 
2209                                                     bytes_to_alloc 
/ M16KCLBYTES
; 
2211                                                 if ((bytes_to_alloc 
- 
2212                                                     (num_needed 
* M16KCLBYTES
)) 
2217                                                     m_getpackets_internal( 
2218                                                     (unsigned int *)&num_needed
, 
2219                                                     hdrs_needed
, M_WAIT
, 0, 
2222                                                  * Fall back to 4K cluster size 
2223                                                  * if allocation failed 
2227                                         if (freelist 
== NULL 
&& 
2228                                             bytes_to_alloc 
> MCLBYTES 
&& 
2231                                                     bytes_to_alloc 
/ MBIGCLBYTES
; 
2233                                                 if ((bytes_to_alloc 
- 
2234                                                     (num_needed 
* MBIGCLBYTES
)) >= 
2239                                                     m_getpackets_internal( 
2240                                                     (unsigned int *)&num_needed
, 
2241                                                     hdrs_needed
, M_WAIT
, 0, 
2244                                                  * Fall back to cluster size 
2245                                                  * if allocation failed 
2250                                          * Allocate a cluster as we want to 
2251                                          * avoid to split the data in more 
2252                                          * that one segment and using MINCLSIZE 
2253                                          * would lead us to allocate two mbufs 
2255                                         if (soreserveheadroom 
!= 0 && 
2258                                             bytes_to_alloc 
> _MHLEN
) || 
2259                                             bytes_to_alloc 
> _MLEN
)) { 
2260                                                 num_needed 
= ROUNDUP(bytes_to_alloc
, MCLBYTES
) / 
2263                                                     m_getpackets_internal( 
2264                                                     (unsigned int *)&num_needed
, 
2265                                                     hdrs_needed
, M_WAIT
, 0, 
2268                                                  * Fall back to a single mbuf 
2269                                                  * if allocation failed 
2271                                         } else if (freelist 
== NULL 
&& 
2272                                             bytes_to_alloc 
> MINCLSIZE
) { 
2274                                                     bytes_to_alloc 
/ MCLBYTES
; 
2276                                                 if ((bytes_to_alloc 
- 
2277                                                     (num_needed 
* MCLBYTES
)) >= 
2282                                                     m_getpackets_internal( 
2283                                                     (unsigned int *)&num_needed
, 
2284                                                     hdrs_needed
, M_WAIT
, 0, 
2287                                                  * Fall back to a single mbuf 
2288                                                  * if allocation failed 
2292                                          * For datagram protocols, leave 
2293                                          * headroom for protocol headers 
2294                                          * in the first cluster of the chain 
2296                                         if (freelist 
!= NULL 
&& atomic 
&& 
2297                                             top 
== NULL 
&& headroom 
> 0) { 
2298                                                 freelist
->m_data 
+= headroom
; 
2302                                          * Fall back to regular mbufs without 
2303                                          * reserving the socket headroom 
2305                                         if (freelist 
== NULL
) { 
2313                                                 if (freelist 
== NULL
) { 
2319                                                  * For datagram protocols, 
2320                                                  * leave room for protocol 
2321                                                  * headers in first mbuf. 
2323                                                 if (atomic 
&& top 
== NULL 
&& 
2324                                                     bytes_to_copy 
< MHLEN
) { 
2330                                         freelist 
= m
->m_next
; 
2333                                         if ((m
->m_flags 
& M_EXT
)) 
2334                                                 mlen 
= m
->m_ext
.ext_size 
- 
2336                                         else if ((m
->m_flags 
& M_PKTHDR
)) 
2338                                                     MHLEN 
- m_leadingspace(m
); 
2340                                                 mlen 
= MLEN 
- m_leadingspace(m
); 
2341                                         len 
= imin(mlen
, bytes_to_copy
); 
2347                                         error 
= uiomove(mtod(m
, caddr_t
), 
2350                                         resid 
= uio_resid(uio
); 
2354                                         top
->m_pkthdr
.len 
+= len
; 
2359                                                 if (flags 
& MSG_EOR
) 
2360                                                         top
->m_flags 
|= M_EOR
; 
2363                                         bytes_to_copy 
= min(resid
, space
); 
2365                                 } while (space 
> 0 && 
2366                                     (chainlength 
< sosendmaxchain 
|| atomic 
|| 
2367                                     resid 
< MINCLSIZE
)); 
2375                         if (flags 
& (MSG_HOLD
|MSG_SEND
)) { 
2376                                 /* Enqueue for later, go away if HOLD */ 
2378                                 if (so
->so_temp 
&& (flags 
& MSG_FLUSH
)) { 
2379                                         m_freem(so
->so_temp
); 
2383                                         so
->so_tail
->m_next 
= top
; 
2390                                 if (flags 
& MSG_HOLD
) { 
2397                                 so
->so_options 
|= SO_DONTROUTE
; 
2400                          * Compute flags here, for pru_send and NKEs 
2402                          * If the user set MSG_EOF, the protocol 
2403                          * understands this flag and nothing left to 
2404                          * send then use PRU_SEND_EOF instead of PRU_SEND. 
2406                         sendflags 
= (flags 
& MSG_OOB
) ? PRUS_OOB 
: 
2407                             ((flags 
& MSG_EOF
) && 
2408                             (so
->so_proto
->pr_flags 
& PR_IMPLOPCL
) && 
2409                             (resid 
<= 0)) ? PRUS_EOF 
: 
2410                             /* If there is more to send set PRUS_MORETOCOME */ 
2411                             (resid 
> 0 && space 
> 0) ? PRUS_MORETOCOME 
: 0; 
2413                         if ((flags 
& MSG_SKIPCFIL
) == 0) { 
2415                                  * Socket filter processing 
2417                                 error 
= sflt_data_out(so
, addr
, &top
, 
2418                                     &control
, (sendflags 
& MSG_OOB
) ? 
2419                                     sock_data_filt_flag_oob 
: 0); 
2421                                         if (error 
== EJUSTRETURN
) { 
2431                                  * Content filter processing 
2433                                 error 
= cfil_sock_data_out(so
, addr
, top
, 
2434                                     control
, (sendflags 
& MSG_OOB
) ? 
2435                                     sock_data_filt_flag_oob 
: 0); 
2437                                         if (error 
== EJUSTRETURN
) { 
2445 #endif /* CONTENT_FILTER */ 
2447                         if (so
->so_flags 
& SOF_ENABLE_MSGS
) { 
2449                                  * Make a copy of control mbuf, 
2450                                  * so that msg priority can be 
2451                                  * passed to subsequent mbufs. 
2453                                 control_copy 
= m_dup(control
, M_NOWAIT
); 
2455                         error 
= (*so
->so_proto
->pr_usrreqs
->pru_send
) 
2456                             (so
, sendflags
, top
, addr
, control
, p
); 
2458                         if (flags 
& MSG_SEND
) 
2462                                 so
->so_options 
&= ~SO_DONTROUTE
; 
2465                         control 
= control_copy
; 
2466                         control_copy 
= NULL
; 
2471                 } while (resid 
&& space 
> 0); 
2476                 sbunlock(&so
->so_snd
, FALSE
);   /* will unlock socket */ 
2478                 socket_unlock(so
, 1); 
2481         if (control 
!= NULL
) 
2483         if (freelist 
!= NULL
) 
2484                 m_freem_list(freelist
); 
2485         if (control_copy 
!= NULL
) 
2486                 m_freem(control_copy
); 
2488         soclearfastopen(so
); 
2491                 /* resid passed here is the bytes left in uio */ 
2492                 KERNEL_ENERGYTRACE(kEnTrActKernSockWrite
, DBG_FUNC_END
, 
2493                     VM_KERNEL_ADDRPERM(so
), 
2494                     ((error 
== EWOULDBLOCK
) ? kEnTrFlagNoWork 
: 0), 
2495                     (int64_t)(orig_resid 
- resid
)); 
2497         KERNEL_DEBUG(DBG_FNC_SOSEND 
| DBG_FUNC_END
, so
, resid
, 
2498             so
->so_snd
.sb_cc
, space
, error
); 
2504  * Supported only connected sockets (no address) without ancillary data 
2505  * (control mbuf) for atomic protocols 
2508 sosend_list(struct socket 
*so
, struct uio 
**uioarray
, u_int uiocnt
, int flags
) 
2510         struct mbuf 
*m
, *freelist 
= NULL
; 
2511         user_ssize_t len
, resid
; 
2512         int error
, dontroute
, mlen
; 
2513         int atomic 
= sosendallatonce(so
); 
2515         struct proc 
*p 
= current_proc(); 
2518         struct mbuf 
*top 
= NULL
; 
2519         uint16_t headroom 
= 0; 
2522         KERNEL_DEBUG((DBG_FNC_SOSEND_LIST 
| DBG_FUNC_START
), so
, uiocnt
, 
2523             so
->so_snd
.sb_cc
, so
->so_snd
.sb_lowat
, so
->so_snd
.sb_hiwat
); 
2525         if (so
->so_type 
!= SOCK_DGRAM
) { 
2533         if (so
->so_proto
->pr_usrreqs
->pru_send_list 
== NULL
) { 
2534                 error 
= EPROTONOSUPPORT
; 
2537         if (flags 
& ~(MSG_DONTWAIT 
| MSG_NBIO
)) { 
2541         resid 
= uio_array_resid(uioarray
, uiocnt
); 
2544          * In theory resid should be unsigned. 
2545          * However, space must be signed, as it might be less than 0 
2546          * if we over-committed, and we must use a signed comparison 
2547          * of space and resid.  On the other hand, a negative resid 
2548          * causes us to loop sending 0-length segments to the protocol. 
2550          * Note: We limit resid to be a positive int value as we use 
2551          * imin() to set bytes_to_copy -- radr://14558484 
2553         if (resid 
< 0 || resid 
> INT_MAX
) { 
2559         so_update_last_owner_locked(so
, p
); 
2560         so_update_policy(so
); 
2563         so_update_necp_policy(so
, NULL
, NULL
); 
2566         dontroute 
= (flags 
& MSG_DONTROUTE
) && 
2567             (so
->so_options 
& SO_DONTROUTE
) == 0 && 
2568             (so
->so_proto
->pr_flags 
& PR_ATOMIC
); 
2569         OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgsnd
); 
2571         error 
= sosendcheck(so
, NULL
, resid
, 0, atomic
, flags
, 
2577          * Use big 4 KB clusters when the outgoing interface does not prefer 
2580         bigcl 
= !(so
->so_flags1 
& SOF1_IF_2KCL
) || sosendbigcl_ignore_capab
; 
2582         if (soreserveheadroom 
!= 0) 
2583                 headroom 
= so
->so_pktheadroom
; 
2589                 size_t maxpktlen 
= 0; 
2592                 if (sosendminchain 
> 0) 
2595                         chainlength 
= sosendmaxchain
; 
2597                 socket_unlock(so
, 0); 
2600                  * Find a set of uio that fit in a reasonable number 
2603                 for (i 
= uiofirst
; i 
< uiocnt
; i
++) { 
2604                         struct uio 
*auio 
= uioarray
[i
]; 
2606                         len 
= uio_resid(auio
); 
2608                         /* Do nothing for empty messages */ 
2615                         if (len 
> maxpktlen
) 
2619                         if (chainlength 
> sosendmaxchain
) 
2623                  * Nothing left to send 
2625                 if (num_needed 
== 0) { 
2630                  * Allocate buffer large enough to include headroom space for 
2631                  * network and link header 
2634                 bytes_to_alloc 
= maxpktlen 
+ headroom
; 
2637                  * Allocate a single contiguous buffer of the smallest available 
2638                  * size when possible 
2640                 if (bytes_to_alloc 
> MCLBYTES 
&& 
2641                     bytes_to_alloc 
<= MBIGCLBYTES 
&& bigcl
) { 
2642                         freelist 
= m_getpackets_internal( 
2643                             (unsigned int *)&num_needed
, 
2644                             num_needed
, M_WAIT
, 1, 
2646                 } else if (bytes_to_alloc 
> _MHLEN 
&& 
2647                     bytes_to_alloc 
<= MCLBYTES
) { 
2648                         freelist 
= m_getpackets_internal( 
2649                             (unsigned int *)&num_needed
, 
2650                             num_needed
, M_WAIT
, 1, 
2653                         freelist 
= m_allocpacket_internal( 
2654                             (unsigned int *)&num_needed
, 
2655                             bytes_to_alloc
, NULL
, M_WAIT
, 1, 0); 
2658                 if (freelist 
== NULL
) { 
2664                  * Copy each uio of the set into its own mbuf packet 
2666                 for (i 
= uiofirst
, m 
= freelist
; 
2667                     i 
< uiolast 
&& m 
!= NULL
; 
2671                         struct uio 
*auio 
= uioarray
[i
]; 
2673                         bytes_to_copy 
= uio_resid(auio
); 
2675                         /* Do nothing for empty messages */ 
2676                         if (bytes_to_copy 
== 0) 
2679                          * Leave headroom for protocol headers 
2680                          * in the first mbuf of the chain 
2682                         m
->m_data 
+= headroom
; 
2684                         for (n 
= m
; n 
!= NULL
; n 
= n
->m_next
) { 
2685                                 if ((m
->m_flags 
& M_EXT
)) 
2686                                         mlen 
= m
->m_ext
.ext_size 
- 
2688                                 else if ((m
->m_flags 
& M_PKTHDR
)) 
2690                                             MHLEN 
- m_leadingspace(m
); 
2692                                         mlen 
= MLEN 
- m_leadingspace(m
); 
2693                                 len 
= imin(mlen
, bytes_to_copy
); 
2696                                  * Note: uiomove() decrements the iovec 
2699                                 error 
= uiomove(mtod(n
, caddr_t
), 
2704                                 m
->m_pkthdr
.len 
+= len
; 
2706                                 VERIFY(m
->m_pkthdr
.len 
<= maxpktlen
); 
2708                                 bytes_to_copy 
-= len
; 
2711                         if (m
->m_pkthdr
.len 
== 0) { 
2713                                     "%s:%d so %llx pkt %llx type %u len null\n", 
2715                                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
2716                                     (uint64_t)DEBUG_KERNEL_ADDRPERM(m
), 
2732                         so
->so_options 
|= SO_DONTROUTE
; 
2734                 if ((flags 
& MSG_SKIPCFIL
) == 0) { 
2735                         struct mbuf 
**prevnextp 
= NULL
; 
2737                         for (i 
= uiofirst
, m 
= top
; 
2738                             i 
< uiolast 
&& m 
!= NULL
; 
2740                                 struct mbuf 
*nextpkt 
= m
->m_nextpkt
; 
2743                                  * Socket filter processing 
2745                                 error 
= sflt_data_out(so
, NULL
, &m
, 
2747                                 if (error 
!= 0 && error 
!= EJUSTRETURN
) 
2753                                          * Content filter processing 
2755                                         error 
= cfil_sock_data_out(so
, NULL
, m
, 
2757                                         if (error 
!= 0 && error 
!= EJUSTRETURN
) 
2760 #endif /* CONTENT_FILTER */ 
2762                                  * Remove packet from the list when 
2763                                  * swallowed by a filter 
2765                                 if (error 
== EJUSTRETURN
) { 
2767                                         if (prevnextp 
!= NULL
) 
2768                                                 *prevnextp 
= nextpkt
; 
2775                                         prevnextp 
= &m
->m_nextpkt
; 
2779                         error 
= (*so
->so_proto
->pr_usrreqs
->pru_send_list
) 
2780                             (so
, 0, top
, NULL
, NULL
, p
); 
2783                         so
->so_options 
&= ~SO_DONTROUTE
; 
2787         } while (resid 
> 0 && error 
== 0); 
2790                 sbunlock(&so
->so_snd
, FALSE
);   /* will unlock socket */ 
2792                 socket_unlock(so
, 1); 
2796         if (freelist 
!= NULL
) 
2797                 m_freem_list(freelist
); 
2799         KERNEL_DEBUG(DBG_FNC_SOSEND_LIST 
| DBG_FUNC_END
, so
, resid
, 
2800             so
->so_snd
.sb_cc
, 0, error
); 
2806  * May return ERESTART when packet is dropped by MAC policy check 
2809 soreceive_addr(struct proc 
*p
, struct socket 
*so
, struct sockaddr 
**psa
, 
2810     int flags
, struct mbuf 
**mp
, struct mbuf 
**nextrecordp
, int canwait
) 
2813         struct mbuf 
*m 
= *mp
; 
2814         struct mbuf 
*nextrecord 
= *nextrecordp
; 
2816         KASSERT(m
->m_type 
== MT_SONAME
, ("receive 1a")); 
2817 #if CONFIG_MACF_SOCKET_SUBSET 
2819          * Call the MAC framework for policy checking if we're in 
2820          * the user process context and the socket isn't connected. 
2822         if (p 
!= kernproc 
&& !(so
->so_state 
& SS_ISCONNECTED
)) { 
2823                 struct mbuf 
*m0 
= m
; 
2825                  * Dequeue this record (temporarily) from the receive 
2826                  * list since we're about to drop the socket's lock 
2827                  * where a new record may arrive and be appended to 
2828                  * the list.  Upon MAC policy failure, the record 
2829                  * will be freed.  Otherwise, we'll add it back to 
2830                  * the head of the list.  We cannot rely on SB_LOCK 
2831                  * because append operation uses the socket's lock. 
2834                         m
->m_nextpkt 
= NULL
; 
2835                         sbfree(&so
->so_rcv
, m
); 
2837                 } while (m 
!= NULL
); 
2839                 so
->so_rcv
.sb_mb 
= nextrecord
; 
2840                 SB_EMPTY_FIXUP(&so
->so_rcv
); 
2841                 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 1a"); 
2842                 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 1a"); 
2843                 socket_unlock(so
, 0); 
2845                 if (mac_socket_check_received(proc_ucred(p
), so
, 
2846                     mtod(m
, struct sockaddr 
*)) != 0) { 
2848                          * MAC policy failure; free this record and 
2849                          * process the next record (or block until 
2850                          * one is available).  We have adjusted sb_cc 
2851                          * and sb_mbcnt above so there is no need to 
2852                          * call sbfree() again. 
2856                          * Clear SB_LOCK but don't unlock the socket. 
2857                          * Process the next record or wait for one. 
2860                         sbunlock(&so
->so_rcv
, TRUE
); /* stay locked */ 
2866                  * If the socket has been defunct'd, drop it. 
2868                 if (so
->so_flags 
& SOF_DEFUNCT
) { 
2874                  * Re-adjust the socket receive list and re-enqueue 
2875                  * the record in front of any packets which may have 
2876                  * been appended while we dropped the lock. 
2878                 for (m 
= m0
; m
->m_next 
!= NULL
; m 
= m
->m_next
) 
2879                         sballoc(&so
->so_rcv
, m
); 
2880                 sballoc(&so
->so_rcv
, m
); 
2881                 if (so
->so_rcv
.sb_mb 
== NULL
) { 
2882                         so
->so_rcv
.sb_lastrecord 
= m0
; 
2883                         so
->so_rcv
.sb_mbtail 
= m
; 
2886                 nextrecord 
= m
->m_nextpkt 
= so
->so_rcv
.sb_mb
; 
2887                 so
->so_rcv
.sb_mb 
= m
; 
2888                 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 1b"); 
2889                 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 1b"); 
2891 #endif /* CONFIG_MACF_SOCKET_SUBSET */ 
2893                 *psa 
= dup_sockaddr(mtod(m
, struct sockaddr 
*), canwait
); 
2894                 if ((*psa 
== NULL
) && (flags 
& MSG_NEEDSA
)) { 
2895                         error 
= EWOULDBLOCK
; 
2899         if (flags 
& MSG_PEEK
) { 
2902                 sbfree(&so
->so_rcv
, m
); 
2903                 if (m
->m_next 
== NULL 
&& so
->so_rcv
.sb_cc 
!= 0) { 
2904                         panic("%s: about to create invalid socketbuf", 
2908                 MFREE(m
, so
->so_rcv
.sb_mb
); 
2909                 m 
= so
->so_rcv
.sb_mb
; 
2911                         m
->m_nextpkt 
= nextrecord
; 
2913                         so
->so_rcv
.sb_mb 
= nextrecord
; 
2914                         SB_EMPTY_FIXUP(&so
->so_rcv
); 
2919         *nextrecordp 
= nextrecord
; 
2925  * Process one or more MT_CONTROL mbufs present before any data mbufs 
2926  * in the first mbuf chain on the socket buffer.  If MSG_PEEK, we 
2927  * just copy the data; if !MSG_PEEK, we call into the protocol to 
2928  * perform externalization. 
2931 soreceive_ctl(struct socket 
*so
, struct mbuf 
**controlp
, int flags
, 
2932     struct mbuf 
**mp
, struct mbuf 
**nextrecordp
) 
2935         struct mbuf 
*cm 
= NULL
, *cmn
; 
2936         struct mbuf 
**cme 
= &cm
; 
2937         struct sockbuf 
*sb_rcv 
= &so
->so_rcv
; 
2938         struct mbuf 
**msgpcm 
= NULL
; 
2939         struct mbuf 
*m 
= *mp
; 
2940         struct mbuf 
*nextrecord 
= *nextrecordp
; 
2941         struct protosw 
*pr 
= so
->so_proto
; 
2944          * Externalizing the control messages would require us to 
2945          * drop the socket's lock below.  Once we re-acquire the 
2946          * lock, the mbuf chain might change.  In order to preserve 
2947          * consistency, we unlink all control messages from the 
2948          * first mbuf chain in one shot and link them separately 
2949          * onto a different chain. 
2952                 if (flags 
& MSG_PEEK
) { 
2953                         if (controlp 
!= NULL
) { 
2954                                 if (*controlp 
== NULL
) { 
2957                                 *controlp 
= m_copy(m
, 0, m
->m_len
); 
2960                                  * If we failed to allocate an mbuf, 
2961                                  * release any previously allocated 
2962                                  * mbufs for control data. Return 
2963                                  * an error. Keep the mbufs in the 
2964                                  * socket as this is using 
2967                                 if (*controlp 
== NULL
) { 
2972                                 controlp 
= &(*controlp
)->m_next
; 
2976                         m
->m_nextpkt 
= NULL
; 
2978                         sb_rcv
->sb_mb 
= m
->m_next
; 
2981                         cme 
= &(*cme
)->m_next
; 
2984         } while (m 
!= NULL 
&& m
->m_type 
== MT_CONTROL
); 
2986         if (!(flags 
& MSG_PEEK
)) { 
2987                 if (sb_rcv
->sb_mb 
!= NULL
) { 
2988                         sb_rcv
->sb_mb
->m_nextpkt 
= nextrecord
; 
2990                         sb_rcv
->sb_mb 
= nextrecord
; 
2991                         SB_EMPTY_FIXUP(sb_rcv
); 
2993                 if (nextrecord 
== NULL
) 
2994                         sb_rcv
->sb_lastrecord 
= m
; 
2997         SBLASTRECORDCHK(&so
->so_rcv
, "soreceive ctl"); 
2998         SBLASTMBUFCHK(&so
->so_rcv
, "soreceive ctl"); 
3000         while (cm 
!= NULL
) { 
3005                 cmsg_type 
= mtod(cm
, struct cmsghdr 
*)->cmsg_type
; 
3008                  * Call the protocol to externalize SCM_RIGHTS message 
3009                  * and return the modified message to the caller upon 
3010                  * success.  Otherwise, all other control messages are 
3011                  * returned unmodified to the caller.  Note that we 
3012                  * only get into this loop if MSG_PEEK is not set. 
3014                 if (pr
->pr_domain
->dom_externalize 
!= NULL 
&& 
3015                     cmsg_type 
== SCM_RIGHTS
) { 
3017                          * Release socket lock: see 3903171.  This 
3018                          * would also allow more records to be appended 
3019                          * to the socket buffer.  We still have SB_LOCK 
3020                          * set on it, so we can be sure that the head 
3021                          * of the mbuf chain won't change. 
3023                         socket_unlock(so
, 0); 
3024                         error 
= (*pr
->pr_domain
->dom_externalize
)(cm
); 
3030                 if (controlp 
!= NULL 
&& error 
== 0) { 
3032                         controlp 
= &(*controlp
)->m_next
; 
3039          * Update the value of nextrecord in case we received new 
3040          * records when the socket was unlocked above for 
3041          * externalizing SCM_RIGHTS. 
3044                 nextrecord 
= sb_rcv
->sb_mb
->m_nextpkt
; 
3046                 nextrecord 
= sb_rcv
->sb_mb
; 
3050         *nextrecordp 
= nextrecord
; 
3056  * Implement receive operations on a socket. 
3057  * We depend on the way that records are added to the sockbuf 
3058  * by sbappend*.  In particular, each record (mbufs linked through m_next) 
3059  * must begin with an address if the protocol so specifies, 
3060  * followed by an optional mbuf or mbufs containing ancillary data, 
3061  * and then zero or more mbufs of data. 
3062  * In order to avoid blocking network interrupts for the entire time here, 
3063  * we splx() while doing the actual copy to user space. 
3064  * Although the sockbuf is locked, new data may still be appended, 
3065  * and thus we must maintain consistency of the sockbuf during that time. 
3067  * The caller may receive the data as a single mbuf chain by supplying 
3068  * an mbuf **mp0 for use in returning the chain.  The uio is then used 
3069  * only for the count in uio_resid. 
3071  * Returns:     0                       Success 
3076  *      sblock:EWOULDBLOCK 
3080  *      sodelayed_copy:EFAULT 
3081  *      <pru_rcvoob>:EINVAL[TCP] 
3082  *      <pru_rcvoob>:EWOULDBLOCK[TCP] 
3084  *      <pr_domain->dom_externalize>:EMSGSIZE[AF_UNIX] 
3085  *      <pr_domain->dom_externalize>:ENOBUFS[AF_UNIX] 
3086  *      <pr_domain->dom_externalize>:??? 
3088  * Notes:       Additional return values from calls through <pru_rcvoob> and 
3089  *              <pr_domain->dom_externalize> depend on protocols other than 
3090  *              TCP or AF_UNIX, which are documented above. 
3093 soreceive(struct socket 
*so
, struct sockaddr 
**psa
, struct uio 
*uio
, 
3094     struct mbuf 
**mp0
, struct mbuf 
**controlp
, int *flagsp
) 
3096         struct mbuf 
*m
, **mp
, *ml 
= NULL
; 
3097         struct mbuf 
*nextrecord
, *free_list
; 
3098         int flags
, error
, offset
; 
3100         struct protosw 
*pr 
= so
->so_proto
; 
3102         user_ssize_t orig_resid 
= uio_resid(uio
); 
3103         user_ssize_t delayed_copy_len
; 
3106         struct proc 
*p 
= current_proc(); 
3107         boolean_t en_tracing 
= FALSE
; 
3110          * Sanity check on the length passed by caller as we are making 'int' 
3113         if (orig_resid 
< 0 || orig_resid 
> INT_MAX
) 
3116         KERNEL_DEBUG(DBG_FNC_SORECEIVE 
| DBG_FUNC_START
, so
, 
3117             uio_resid(uio
), so
->so_rcv
.sb_cc
, so
->so_rcv
.sb_lowat
, 
3118             so
->so_rcv
.sb_hiwat
); 
3121         so_update_last_owner_locked(so
, p
); 
3122         so_update_policy(so
); 
3124 #ifdef MORE_LOCKING_DEBUG 
3125         if (so
->so_usecount 
== 1) { 
3126                 panic("%s: so=%x no other reference on socket\n", __func__
, so
); 
3133         if (controlp 
!= NULL
) 
3136                 flags 
= *flagsp 
&~ MSG_EOR
; 
3141          * If a recv attempt is made on a previously-accepted socket 
3142          * that has been marked as inactive (disconnected), reject 
3145         if (so
->so_flags 
& SOF_DEFUNCT
) { 
3146                 struct sockbuf 
*sb 
= &so
->so_rcv
; 
3149                 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] (%d)\n", 
3150                     __func__
, proc_pid(p
), proc_best_name(p
), 
3151                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
3152                     SOCK_DOM(so
), SOCK_TYPE(so
), error
); 
3154                  * This socket should have been disconnected and flushed 
3155                  * prior to being returned from sodefunct(); there should 
3156                  * be no data on its receive list, so panic otherwise. 
3158                 if (so
->so_state 
& SS_DEFUNCT
) 
3159                         sb_empty_assert(sb
, __func__
); 
3160                 socket_unlock(so
, 1); 
3164         if ((so
->so_flags1 
& SOF1_PRECONNECT_DATA
) && 
3165             pr
->pr_usrreqs
->pru_preconnect
) { 
3167                  * A user may set the CONNECT_RESUME_ON_READ_WRITE-flag but not 
3168                  * calling write() right after this. *If* the app calls a read 
3169                  * we do not want to block this read indefinetely. Thus, 
3170                  * we trigger a connect so that the session gets initiated. 
3172                 error 
= (*pr
->pr_usrreqs
->pru_preconnect
)(so
); 
3175                         socket_unlock(so
, 1); 
3180         if (ENTR_SHOULDTRACE 
&& 
3181             (SOCK_CHECK_DOM(so
, AF_INET
) || SOCK_CHECK_DOM(so
, AF_INET6
))) { 
3183                  * enable energy tracing for inet sockets that go over 
3184                  * non-loopback interfaces only. 
3186                 struct inpcb 
*inp 
= sotoinpcb(so
); 
3187                 if (inp
->inp_last_outifp 
!= NULL 
&& 
3188                     !(inp
->inp_last_outifp
->if_flags 
& IFF_LOOPBACK
)) { 
3190                         KERNEL_ENERGYTRACE(kEnTrActKernSockRead
, DBG_FUNC_START
, 
3191                             VM_KERNEL_ADDRPERM(so
), 
3192                             ((so
->so_state 
& SS_NBIO
) ? 
3193                             kEnTrFlagNonBlocking 
: 0), 
3194                             (int64_t)orig_resid
); 
3199          * When SO_WANTOOBFLAG is set we try to get out-of-band data 
3200          * regardless of the flags argument. Here is the case were 
3201          * out-of-band data is not inline. 
3203         if ((flags 
& MSG_OOB
) || 
3204             ((so
->so_options 
& SO_WANTOOBFLAG
) != 0 && 
3205             (so
->so_options 
& SO_OOBINLINE
) == 0 && 
3206             (so
->so_oobmark 
|| (so
->so_state 
& SS_RCVATMARK
)))) { 
3207                 m 
= m_get(M_WAIT
, MT_DATA
); 
3209                         socket_unlock(so
, 1); 
3210                         KERNEL_DEBUG(DBG_FNC_SORECEIVE 
| DBG_FUNC_END
, 
3211                             ENOBUFS
, 0, 0, 0, 0); 
3214                 error 
= (*pr
->pr_usrreqs
->pru_rcvoob
)(so
, m
, flags 
& MSG_PEEK
); 
3217                 socket_unlock(so
, 0); 
3219                         error 
= uiomove(mtod(m
, caddr_t
), 
3220                             imin(uio_resid(uio
), m
->m_len
), uio
); 
3222                 } while (uio_resid(uio
) && error 
== 0 && m 
!= NULL
); 
3228                 if ((so
->so_options 
& SO_WANTOOBFLAG
) != 0) { 
3229                         if (error 
== EWOULDBLOCK 
|| error 
== EINVAL
) { 
3231                                  * Let's try to get normal data: 
3232                                  * EWOULDBLOCK: out-of-band data not 
3233                                  * receive yet. EINVAL: out-of-band data 
3238                         } else if (error 
== 0 && flagsp 
!= NULL
) { 
3242                 socket_unlock(so
, 1); 
3244                         KERNEL_ENERGYTRACE(kEnTrActKernSockRead
, DBG_FUNC_END
, 
3245                             VM_KERNEL_ADDRPERM(so
), 0, 
3246                             (int64_t)(orig_resid 
- uio_resid(uio
))); 
3248                 KERNEL_DEBUG(DBG_FNC_SORECEIVE 
| DBG_FUNC_END
, error
, 
3257         if (so
->so_state 
& SS_ISCONFIRMING 
&& uio_resid(uio
)) { 
3258                 (*pr
->pr_usrreqs
->pru_rcvd
)(so
, 0); 
3262         delayed_copy_len 
= 0; 
3264 #ifdef MORE_LOCKING_DEBUG 
3265         if (so
->so_usecount 
<= 1) 
3266                 printf("soreceive: sblock so=0x%llx ref=%d on socket\n", 
3267                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), so
->so_usecount
); 
3270          * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE) 
3271          * and if so just return to the caller.  This could happen when 
3272          * soreceive() is called by a socket upcall function during the 
3273          * time the socket is freed.  The socket buffer would have been 
3274          * locked across the upcall, therefore we cannot put this thread 
3275          * to sleep (else we will deadlock) or return EWOULDBLOCK (else 
3276          * we may livelock), because the lock on the socket buffer will 
3277          * only be released when the upcall routine returns to its caller. 
3278          * Because the socket has been officially closed, there can be 
3279          * no further read on it. 
3281          * A multipath subflow socket would have its SS_NOFDREF set by 
3282          * default, so check for SOF_MP_SUBFLOW socket flag; when the 
3283          * socket is closed for real, SOF_MP_SUBFLOW would be cleared. 
3285         if ((so
->so_state 
& (SS_NOFDREF 
| SS_CANTRCVMORE
)) == 
3286             (SS_NOFDREF 
| SS_CANTRCVMORE
) && !(so
->so_flags 
& SOF_MP_SUBFLOW
)) { 
3287                 socket_unlock(so
, 1); 
3291         error 
= sblock(&so
->so_rcv
, SBLOCKWAIT(flags
)); 
3293                 socket_unlock(so
, 1); 
3294                 KERNEL_DEBUG(DBG_FNC_SORECEIVE 
| DBG_FUNC_END
, error
, 
3297                         KERNEL_ENERGYTRACE(kEnTrActKernSockRead
, DBG_FUNC_END
, 
3298                             VM_KERNEL_ADDRPERM(so
), 0, 
3299                             (int64_t)(orig_resid 
- uio_resid(uio
))); 
3304         m 
= so
->so_rcv
.sb_mb
; 
3306          * If we have less data than requested, block awaiting more 
3307          * (subject to any timeout) if: 
3308          *   1. the current count is less than the low water mark, or 
3309          *   2. MSG_WAITALL is set, and it is possible to do the entire 
3310          *      receive operation at once if we block (resid <= hiwat). 
3311          *   3. MSG_DONTWAIT is not set 
3312          * If MSG_WAITALL is set but resid is larger than the receive buffer, 
3313          * we have to do the receive in sections, and thus risk returning 
3314          * a short count if a timeout or signal occurs after we start. 
3316         if (m 
== NULL 
|| (((flags 
& MSG_DONTWAIT
) == 0 && 
3317             so
->so_rcv
.sb_cc 
< uio_resid(uio
)) && 
3318             (so
->so_rcv
.sb_cc 
< so
->so_rcv
.sb_lowat 
|| 
3319             ((flags 
& MSG_WAITALL
) && uio_resid(uio
) <= so
->so_rcv
.sb_hiwat
)) && 
3320             m
->m_nextpkt 
== NULL 
&& (pr
->pr_flags 
& PR_ATOMIC
) == 0)) { 
3322                  * Panic if we notice inconsistencies in the socket's 
3323                  * receive list; both sb_mb and sb_cc should correctly 
3324                  * reflect the contents of the list, otherwise we may 
3325                  * end up with false positives during select() or poll() 
3326                  * which could put the application in a bad state. 
3328                 SB_MB_CHECK(&so
->so_rcv
); 
3333                         error 
= so
->so_error
; 
3334                         if ((flags 
& MSG_PEEK
) == 0) 
3338                 if (so
->so_state 
& SS_CANTRCVMORE
) { 
3341                          * Deal with half closed connections 
3343                         if ((so
->so_state 
& SS_ISDISCONNECTED
) == 0 && 
3344                                 cfil_sock_data_pending(&so
->so_rcv
) != 0) 
3346                                         "so %llx ignore SS_CANTRCVMORE", 
3347                                         (uint64_t)DEBUG_KERNEL_ADDRPERM(so
)); 
3349 #endif /* CONTENT_FILTER */ 
3355                 for (; m 
!= NULL
; m 
= m
->m_next
) 
3356                         if (m
->m_type 
== MT_OOBDATA 
|| (m
->m_flags 
& M_EOR
)) { 
3357                                 m 
= so
->so_rcv
.sb_mb
; 
3360                 if ((so
->so_state 
& (SS_ISCONNECTED
|SS_ISCONNECTING
)) == 0 && 
3361                     (so
->so_proto
->pr_flags 
& PR_CONNREQUIRED
)) { 
3365                 if (uio_resid(uio
) == 0) 
3368                 if ((so
->so_state 
& SS_NBIO
) || 
3369                     (flags 
& (MSG_DONTWAIT
|MSG_NBIO
))) { 
3370                         error 
= EWOULDBLOCK
; 
3373                 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive sbwait 1"); 
3374                 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive sbwait 1"); 
3375                 sbunlock(&so
->so_rcv
, TRUE
);    /* keep socket locked */ 
3376 #if EVEN_MORE_LOCKING_DEBUG 
3378                         printf("Waiting for socket data\n"); 
3381                 error 
= sbwait(&so
->so_rcv
); 
3382 #if EVEN_MORE_LOCKING_DEBUG 
3384                         printf("SORECEIVE - sbwait returned %d\n", error
); 
3386                 if (so
->so_usecount 
< 1) { 
3387                         panic("%s: after 2nd sblock so=%p ref=%d on socket\n", 
3388                             __func__
, so
, so
->so_usecount
); 
3392                         socket_unlock(so
, 1); 
3393                         KERNEL_DEBUG(DBG_FNC_SORECEIVE 
| DBG_FUNC_END
, error
, 
3396                                 KERNEL_ENERGYTRACE(kEnTrActKernSockRead
, DBG_FUNC_END
, 
3397                                     VM_KERNEL_ADDRPERM(so
), 0, 
3398                                     (int64_t)(orig_resid 
- uio_resid(uio
))); 
3405         OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgrcv
); 
3406         SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 1"); 
3407         SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 1"); 
3408         nextrecord 
= m
->m_nextpkt
; 
3410         if ((pr
->pr_flags 
& PR_ADDR
) && m
->m_type 
== MT_SONAME
) { 
3411                 error 
= soreceive_addr(p
, so
, psa
, flags
, &m
, &nextrecord
, 
3413                 if (error 
== ERESTART
) 
3415                 else if (error 
!= 0) 
3421          * Process one or more MT_CONTROL mbufs present before any data mbufs 
3422          * in the first mbuf chain on the socket buffer.  If MSG_PEEK, we 
3423          * just copy the data; if !MSG_PEEK, we call into the protocol to 
3424          * perform externalization. 
3426         if (m 
!= NULL 
&& m
->m_type 
== MT_CONTROL
) { 
3427                 error 
= soreceive_ctl(so
, controlp
, flags
, &m
, &nextrecord
); 
3434          * If the socket is a TCP socket with message delivery 
3435          * enabled, then create a control msg to deliver the 
3436          * relative TCP sequence number for this data. Waiting 
3437          * until this point will protect against failures to 
3438          * allocate an mbuf for control msgs. 
3440         if (so
->so_type 
== SOCK_STREAM 
&& SOCK_PROTO(so
) == IPPROTO_TCP 
&& 
3441             (so
->so_flags 
& SOF_ENABLE_MSGS
) && controlp 
!= NULL
) { 
3442                 struct mbuf 
*seq_cm
; 
3444                 seq_cm 
= sbcreatecontrol((caddr_t
)&m
->m_pkthdr
.msg_seq
, 
3445                     sizeof (uint32_t), SCM_SEQNUM
, SOL_SOCKET
); 
3446                 if (seq_cm 
== NULL
) { 
3447                         /* unable to allocate a control mbuf */ 
3452                 controlp 
= &seq_cm
->m_next
; 
3456                 if (!(flags 
& MSG_PEEK
)) { 
3458                          * We get here because m points to an mbuf following 
3459                          * any MT_SONAME or MT_CONTROL mbufs which have been 
3460                          * processed above.  In any case, m should be pointing 
3461                          * to the head of the mbuf chain, and the nextrecord 
3462                          * should be either NULL or equal to m->m_nextpkt. 
3463                          * See comments above about SB_LOCK. 
3465                         if (m 
!= so
->so_rcv
.sb_mb 
|| 
3466                             m
->m_nextpkt 
!= nextrecord
) { 
3467                                 panic("%s: post-control !sync so=%p m=%p " 
3468                                     "nextrecord=%p\n", __func__
, so
, m
, 
3472                         if (nextrecord 
== NULL
) 
3473                                 so
->so_rcv
.sb_lastrecord 
= m
; 
3476                 if (type 
== MT_OOBDATA
) 
3479                 if (!(flags 
& MSG_PEEK
)) { 
3480                         SB_EMPTY_FIXUP(&so
->so_rcv
); 
3483         SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 2"); 
3484         SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 2"); 
3489         if (!(flags 
& MSG_PEEK
) && uio_resid(uio
) > sorecvmincopy
) 
3497             (uio_resid(uio
) - delayed_copy_len
) > 0 && error 
== 0) { 
3498                 if (m
->m_type 
== MT_OOBDATA
) { 
3499                         if (type 
!= MT_OOBDATA
) 
3501                 } else if (type 
== MT_OOBDATA
) { 
3505                  * Make sure to allways set MSG_OOB event when getting 
3506                  * out of band data inline. 
3508                 if ((so
->so_options 
& SO_WANTOOBFLAG
) != 0 && 
3509                     (so
->so_options 
& SO_OOBINLINE
) != 0 && 
3510                     (so
->so_state 
& SS_RCVATMARK
) != 0) { 
3513                 so
->so_state 
&= ~SS_RCVATMARK
; 
3514                 len 
= uio_resid(uio
) - delayed_copy_len
; 
3515                 if (so
->so_oobmark 
&& len 
> so
->so_oobmark 
- offset
) 
3516                         len 
= so
->so_oobmark 
- offset
; 
3517                 if (len 
> m
->m_len 
- moff
) 
3518                         len 
= m
->m_len 
- moff
; 
3520                  * If mp is set, just pass back the mbufs. 
3521                  * Otherwise copy them out via the uio, then free. 
3522                  * Sockbuf must be consistent here (points to current mbuf, 
3523                  * it points to next record) when we drop priority; 
3524                  * we must note any additions to the sockbuf when we 
3525                  * block interrupts again. 
3528                         SBLASTRECORDCHK(&so
->so_rcv
, "soreceive uiomove"); 
3529                         SBLASTMBUFCHK(&so
->so_rcv
, "soreceive uiomove"); 
3530                         if (can_delay 
&& len 
== m
->m_len
) { 
3532                                  * only delay the copy if we're consuming the 
3533                                  * mbuf and we're NOT in MSG_PEEK mode 
3534                                  * and we have enough data to make it worthwile 
3535                                  * to drop and retake the lock... can_delay 
3536                                  * reflects the state of the 2 latter 
3537                                  * constraints moff should always be zero 
3540                                 delayed_copy_len 
+= len
; 
3542                                 if (delayed_copy_len
) { 
3543                                         error 
= sodelayed_copy(so
, uio
, 
3544                                             &free_list
, &delayed_copy_len
); 
3550                                          * can only get here if MSG_PEEK is not 
3551                                          * set therefore, m should point at the 
3552                                          * head of the rcv queue; if it doesn't, 
3553                                          * it means something drastically 
3554                                          * changed while we were out from behind 
3555                                          * the lock in sodelayed_copy. perhaps 
3556                                          * a RST on the stream. in any event, 
3557                                          * the stream has been interrupted. it's 
3558                                          * probably best just to return whatever 
3559                                          * data we've moved and let the caller 
3562                                         if (m 
!= so
->so_rcv
.sb_mb
) { 
3566                                 socket_unlock(so
, 0); 
3567                                 error 
= uiomove(mtod(m
, caddr_t
) + moff
, 
3575                         uio_setresid(uio
, (uio_resid(uio
) - len
)); 
3577                 if (len 
== m
->m_len 
- moff
) { 
3578                         if (m
->m_flags 
& M_EOR
) 
3580                         if (flags 
& MSG_PEEK
) { 
3584                                 nextrecord 
= m
->m_nextpkt
; 
3585                                 sbfree(&so
->so_rcv
, m
); 
3586                                 m
->m_nextpkt 
= NULL
; 
3589                                  * If this packet is an unordered packet 
3590                                  * (indicated by M_UNORDERED_DATA flag), remove 
3591                                  * the additional bytes added to the 
3592                                  * receive socket buffer size. 
3594                                 if ((so
->so_flags 
& SOF_ENABLE_MSGS
) && 
3596                                     (m
->m_flags 
& M_UNORDERED_DATA
) && 
3597                                     sbreserve(&so
->so_rcv
, 
3598                                     so
->so_rcv
.sb_hiwat 
- m
->m_len
)) { 
3599                                         if (so
->so_msg_state
->msg_uno_bytes 
> 
3602                                                     msg_uno_bytes 
-= m
->m_len
; 
3607                                         m
->m_flags 
&= ~M_UNORDERED_DATA
; 
3613                                         so
->so_rcv
.sb_mb 
= m 
= m
->m_next
; 
3616                                         if (free_list 
== NULL
) 
3621                                         so
->so_rcv
.sb_mb 
= m 
= m
->m_next
; 
3625                                         m
->m_nextpkt 
= nextrecord
; 
3626                                         if (nextrecord 
== NULL
) 
3627                                                 so
->so_rcv
.sb_lastrecord 
= m
; 
3629                                         so
->so_rcv
.sb_mb 
= nextrecord
; 
3630                                         SB_EMPTY_FIXUP(&so
->so_rcv
); 
3632                                 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 3"); 
3633                                 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 3"); 
3636                         if (flags 
& MSG_PEEK
) { 
3642                                         if (flags 
& MSG_DONTWAIT
) 
3643                                                 copy_flag 
= M_DONTWAIT
; 
3646                                         *mp 
= m_copym(m
, 0, len
, copy_flag
); 
3648                                          * Failed to allocate an mbuf? 
3649                                          * Adjust uio_resid back, it was 
3650                                          * adjusted down by len bytes which 
3651                                          * we didn't copy over. 
3655                                                     (uio_resid(uio
) + len
)); 
3661                                 so
->so_rcv
.sb_cc 
-= len
; 
3664                 if (so
->so_oobmark
) { 
3665                         if ((flags 
& MSG_PEEK
) == 0) { 
3666                                 so
->so_oobmark 
-= len
; 
3667                                 if (so
->so_oobmark 
== 0) { 
3668                                         so
->so_state 
|= SS_RCVATMARK
; 
3670                                          * delay posting the actual event until 
3671                                          * after any delayed copy processing 
3679                                 if (offset 
== so
->so_oobmark
) 
3683                 if (flags 
& MSG_EOR
) 
3686                  * If the MSG_WAITALL or MSG_WAITSTREAM flag is set 
3687                  * (for non-atomic socket), we must not quit until 
3688                  * "uio->uio_resid == 0" or an error termination. 
3689                  * If a signal/timeout occurs, return with a short 
3690                  * count but without error.  Keep sockbuf locked 
3691                  * against other readers. 
3693                 while (flags 
& (MSG_WAITALL
|MSG_WAITSTREAM
) && m 
== NULL 
&& 
3694                     (uio_resid(uio
) - delayed_copy_len
) > 0 && 
3695                     !sosendallatonce(so
) && !nextrecord
) { 
3696                         if (so
->so_error 
|| ((so
->so_state 
& SS_CANTRCVMORE
) 
3698                             && cfil_sock_data_pending(&so
->so_rcv
) == 0 
3699 #endif /* CONTENT_FILTER */ 
3704                          * Depending on the protocol (e.g. TCP), the following 
3705                          * might cause the socket lock to be dropped and later 
3706                          * be reacquired, and more data could have arrived and 
3707                          * have been appended to the receive socket buffer by 
3708                          * the time it returns.  Therefore, we only sleep in 
3709                          * sbwait() below if and only if the socket buffer is 
3710                          * empty, in order to avoid a false sleep. 
3712                         if (pr
->pr_flags 
& PR_WANTRCVD 
&& so
->so_pcb 
&& 
3713                             (((struct inpcb 
*)so
->so_pcb
)->inp_state 
!= 
3715                                 (*pr
->pr_usrreqs
->pru_rcvd
)(so
, flags
); 
3717                         SBLASTRECORDCHK(&so
->so_rcv
, "soreceive sbwait 2"); 
3718                         SBLASTMBUFCHK(&so
->so_rcv
, "soreceive sbwait 2"); 
3720                         if (so
->so_rcv
.sb_mb 
== NULL 
&& sbwait(&so
->so_rcv
)) { 
3725                          * have to wait until after we get back from the sbwait 
3726                          * to do the copy because we will drop the lock if we 
3727                          * have enough data that has been delayed... by dropping 
3728                          * the lock we open up a window allowing the netisr 
3729                          * thread to process the incoming packets and to change 
3730                          * the state of this socket... we're issuing the sbwait 
3731                          * because the socket is empty and we're expecting the 
3732                          * netisr thread to wake us up when more packets arrive; 
3733                          * if we allow that processing to happen and then sbwait 
3734                          * we could stall forever with packets sitting in the 
3735                          * socket if no further packets arrive from the remote 
3738                          * we want to copy before we've collected all the data 
3739                          * to satisfy this request to allow the copy to overlap 
3740                          * the incoming packet processing on an MP system 
3742                         if (delayed_copy_len 
> sorecvmincopy 
&& 
3743                             (delayed_copy_len 
> (so
->so_rcv
.sb_hiwat 
/ 2))) { 
3744                                 error 
= sodelayed_copy(so
, uio
, 
3745                                     &free_list
, &delayed_copy_len
); 
3750                         m 
= so
->so_rcv
.sb_mb
; 
3752                                 nextrecord 
= m
->m_nextpkt
; 
3754                         SB_MB_CHECK(&so
->so_rcv
); 
3757 #ifdef MORE_LOCKING_DEBUG 
3758         if (so
->so_usecount 
<= 1) { 
3759                 panic("%s: after big while so=%p ref=%d on socket\n", 
3760                     __func__
, so
, so
->so_usecount
); 
3765         if (m 
!= NULL 
&& pr
->pr_flags 
& PR_ATOMIC
) { 
3766                 if (so
->so_options 
& SO_DONTTRUNC
) { 
3767                         flags 
|= MSG_RCVMORE
; 
3770                         if ((flags 
& MSG_PEEK
) == 0) 
3771                                 (void) sbdroprecord(&so
->so_rcv
); 
3776          * pru_rcvd below (for TCP) may cause more data to be received 
3777          * if the socket lock is dropped prior to sending the ACK; some 
3778          * legacy OpenTransport applications don't handle this well 
3779          * (if it receives less data than requested while MSG_HAVEMORE 
3780          * is set), and so we set the flag now based on what we know 
3781          * prior to calling pru_rcvd. 
3783         if ((so
->so_options 
& SO_WANTMORE
) && so
->so_rcv
.sb_cc 
> 0) 
3784                 flags 
|= MSG_HAVEMORE
; 
3786         if ((flags 
& MSG_PEEK
) == 0) { 
3788                         so
->so_rcv
.sb_mb 
= nextrecord
; 
3790                          * First part is an inline SB_EMPTY_FIXUP().  Second 
3791                          * part makes sure sb_lastrecord is up-to-date if 
3792                          * there is still data in the socket buffer. 
3794                         if (so
->so_rcv
.sb_mb 
== NULL
) { 
3795                                 so
->so_rcv
.sb_mbtail 
= NULL
; 
3796                                 so
->so_rcv
.sb_lastrecord 
= NULL
; 
3797                         } else if (nextrecord
->m_nextpkt 
== NULL
) { 
3798                                 so
->so_rcv
.sb_lastrecord 
= nextrecord
; 
3800                         SB_MB_CHECK(&so
->so_rcv
); 
3802                 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 4"); 
3803                 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 4"); 
3804                 if (pr
->pr_flags 
& PR_WANTRCVD 
&& so
->so_pcb
) 
3805                         (*pr
->pr_usrreqs
->pru_rcvd
)(so
, flags
); 
3808         if (delayed_copy_len
) { 
3809                 error 
= sodelayed_copy(so
, uio
, &free_list
, &delayed_copy_len
); 
3813         if (free_list 
!= NULL
) { 
3814                 m_freem_list(free_list
); 
3818                 postevent(so
, 0, EV_OOB
); 
3820         if (orig_resid 
== uio_resid(uio
) && orig_resid 
&& 
3821             (flags 
& MSG_EOR
) == 0 && (so
->so_state 
& SS_CANTRCVMORE
) == 0) { 
3822                 sbunlock(&so
->so_rcv
, TRUE
);    /* keep socket locked */ 
3829 #ifdef MORE_LOCKING_DEBUG 
3830         if (so
->so_usecount 
<= 1) { 
3831                 panic("%s: release so=%p ref=%d on socket\n", __func__
, 
3832                     so
, so
->so_usecount
); 
3836         if (delayed_copy_len
) 
3837                 error 
= sodelayed_copy(so
, uio
, &free_list
, &delayed_copy_len
); 
3839         if (free_list 
!= NULL
) 
3840                 m_freem_list(free_list
); 
3842         sbunlock(&so
->so_rcv
, FALSE
);   /* will unlock socket */ 
3845                 KERNEL_ENERGYTRACE(kEnTrActKernSockRead
, DBG_FUNC_END
, 
3846                     VM_KERNEL_ADDRPERM(so
), 
3847                     ((error 
== EWOULDBLOCK
) ? kEnTrFlagNoWork 
: 0), 
3848                     (int64_t)(orig_resid 
- uio_resid(uio
))); 
3850         KERNEL_DEBUG(DBG_FNC_SORECEIVE 
| DBG_FUNC_END
, so
, uio_resid(uio
), 
3851             so
->so_rcv
.sb_cc
, 0, error
); 
3857  * Returns:     0                       Success 
3861 sodelayed_copy(struct socket 
*so
, struct uio 
*uio
, struct mbuf 
**free_list
, 
3862     user_ssize_t 
*resid
) 
3869         socket_unlock(so
, 0); 
3871         while (m 
!= NULL 
&& error 
== 0) { 
3872                 error 
= uiomove(mtod(m
, caddr_t
), (int)m
->m_len
, uio
); 
3875         m_freem_list(*free_list
); 
3886 sodelayed_copy_list(struct socket 
*so
, struct recv_msg_elem 
*msgarray
, 
3887     u_int uiocnt
, struct mbuf 
**free_list
, user_ssize_t 
*resid
) 
3891         struct mbuf 
*ml
, *m
; 
3895         for (ml 
= *free_list
, i 
= 0; ml 
!= NULL 
&& i 
< uiocnt
; 
3896             ml 
= ml
->m_nextpkt
, i
++) { 
3897                 auio 
= msgarray
[i
].uio
; 
3898                 for (m 
= ml
; m 
!= NULL
; m 
= m
->m_next
) { 
3899                         error 
= uiomove(mtod(m
, caddr_t
), m
->m_len
, auio
); 
3905         m_freem_list(*free_list
); 
3914 soreceive_list(struct socket 
*so
, struct recv_msg_elem 
*msgarray
, u_int uiocnt
, 
3918         struct mbuf 
*nextrecord
; 
3919         struct mbuf 
*ml 
= NULL
, *free_list 
= NULL
, *free_tail 
= NULL
; 
3921         user_ssize_t len
, pktlen
, delayed_copy_len 
= 0; 
3922         struct protosw 
*pr 
= so
->so_proto
; 
3924         struct proc 
*p 
= current_proc(); 
3925         struct uio 
*auio 
= NULL
; 
3928         struct sockaddr 
**psa 
= NULL
; 
3929         struct mbuf 
**controlp 
= NULL
; 
3932         struct mbuf 
*free_others 
= NULL
; 
3934         KERNEL_DEBUG(DBG_FNC_SORECEIVE_LIST 
| DBG_FUNC_START
, 
3936             so
->so_rcv
.sb_cc
, so
->so_rcv
.sb_lowat
, so
->so_rcv
.sb_hiwat
); 
3940          * - Only supports don't wait flags 
3941          * - Only support datagram sockets (could be extended to raw) 
3943          * - Protocol must support packet chains 
3944          * - The uio array is NULL (should we panic?) 
3950         if (flags 
& ~(MSG_PEEK 
| MSG_WAITALL 
| MSG_DONTWAIT 
| MSG_NEEDSA 
| 
3952                 printf("%s invalid flags 0x%x\n", __func__
, flags
); 
3956         if (so
->so_type 
!= SOCK_DGRAM
) { 
3960         if (sosendallatonce(so
) == 0) { 
3964         if (so
->so_proto
->pr_usrreqs
->pru_send_list 
== NULL
) { 
3965                 error 
= EPROTONOSUPPORT
; 
3968         if (msgarray 
== NULL
) { 
3969                 printf("%s uioarray is NULL\n", __func__
); 
3974                 printf("%s uiocnt is 0\n", __func__
); 
3979          * Sanity check on the length passed by caller as we are making 'int' 
3982         resid 
= recv_msg_array_resid(msgarray
, uiocnt
); 
3983         if (resid 
< 0 || resid 
> INT_MAX
) { 
3988         if (!(flags 
& MSG_PEEK
) && sorecvmincopy 
> 0) 
3994         so_update_last_owner_locked(so
, p
); 
3995         so_update_policy(so
); 
3998         so_update_necp_policy(so
, NULL
, NULL
); 
4002          * If a recv attempt is made on a previously-accepted socket 
4003          * that has been marked as inactive (disconnected), reject 
4006         if (so
->so_flags 
& SOF_DEFUNCT
) { 
4007                 struct sockbuf 
*sb 
= &so
->so_rcv
; 
4010                 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] (%d)\n", 
4011                     __func__
, proc_pid(p
), proc_best_name(p
), 
4012                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
4013                     SOCK_DOM(so
), SOCK_TYPE(so
), error
); 
4015                  * This socket should have been disconnected and flushed 
4016                  * prior to being returned from sodefunct(); there should 
4017                  * be no data on its receive list, so panic otherwise. 
4019                 if (so
->so_state 
& SS_DEFUNCT
) 
4020                         sb_empty_assert(sb
, __func__
); 
4026          * The uio may be empty 
4028         if (npkts 
>= uiocnt
) { 
4034          * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE) 
4035          * and if so just return to the caller.  This could happen when 
4036          * soreceive() is called by a socket upcall function during the 
4037          * time the socket is freed.  The socket buffer would have been 
4038          * locked across the upcall, therefore we cannot put this thread 
4039          * to sleep (else we will deadlock) or return EWOULDBLOCK (else 
4040          * we may livelock), because the lock on the socket buffer will 
4041          * only be released when the upcall routine returns to its caller. 
4042          * Because the socket has been officially closed, there can be 
4043          * no further read on it. 
4045         if ((so
->so_state 
& (SS_NOFDREF 
| SS_CANTRCVMORE
)) == 
4046             (SS_NOFDREF 
| SS_CANTRCVMORE
)) { 
4051         error 
= sblock(&so
->so_rcv
, SBLOCKWAIT(flags
)); 
4057         m 
= so
->so_rcv
.sb_mb
; 
4059          * Block awaiting more datagram if needed 
4061         if (m 
== NULL 
|| (((flags 
& MSG_DONTWAIT
) == 0 && 
4062             (so
->so_rcv
.sb_cc 
< so
->so_rcv
.sb_lowat 
|| 
4063             ((flags 
& MSG_WAITALL
) && npkts 
< uiocnt
))))) { 
4065                  * Panic if we notice inconsistencies in the socket's 
4066                  * receive list; both sb_mb and sb_cc should correctly 
4067                  * reflect the contents of the list, otherwise we may 
4068                  * end up with false positives during select() or poll() 
4069                  * which could put the application in a bad state. 
4071                 SB_MB_CHECK(&so
->so_rcv
); 
4074                         error 
= so
->so_error
; 
4075                         if ((flags 
& MSG_PEEK
) == 0) 
4079                 if (so
->so_state 
& SS_CANTRCVMORE
) { 
4082                 if ((so
->so_state 
& (SS_ISCONNECTED
|SS_ISCONNECTING
)) == 0 && 
4083                     (so
->so_proto
->pr_flags 
& PR_CONNREQUIRED
)) { 
4087                 if ((so
->so_state 
& SS_NBIO
) || 
4088                     (flags 
& (MSG_DONTWAIT
|MSG_NBIO
))) { 
4089                         error 
= EWOULDBLOCK
; 
4093                  * Do not block if we got some data 
4095                 if (free_list 
!= NULL
) { 
4100                 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive sbwait 1"); 
4101                 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive sbwait 1"); 
4103                 sbunlock(&so
->so_rcv
, TRUE
);    /* keep socket locked */ 
4106                 error 
= sbwait(&so
->so_rcv
); 
4113         OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgrcv
); 
4114         SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 1"); 
4115         SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 1"); 
4118          * Consume the current uio index as we have a datagram 
4120         auio 
= msgarray
[npkts
].uio
; 
4121         resid 
= uio_resid(auio
); 
4122         msgarray
[npkts
].which 
|= SOCK_MSG_DATA
; 
4123         psa 
= (msgarray
[npkts
].which 
& SOCK_MSG_SA
) ? 
4124             &msgarray
[npkts
].psa 
: NULL
; 
4125         controlp 
= (msgarray
[npkts
].which 
& SOCK_MSG_CONTROL
) ? 
4126             &msgarray
[npkts
].controlp 
: NULL
; 
4128         nextrecord 
= m
->m_nextpkt
; 
4130         if ((pr
->pr_flags 
& PR_ADDR
) && m
->m_type 
== MT_SONAME
) { 
4131                 error 
= soreceive_addr(p
, so
, psa
, flags
, &m
, &nextrecord
, 1); 
4132                 if (error 
== ERESTART
) 
4134                 else if (error 
!= 0) 
4138         if (m 
!= NULL 
&& m
->m_type 
== MT_CONTROL
) { 
4139                 error 
= soreceive_ctl(so
, controlp
, flags
, &m
, &nextrecord
); 
4144         if (m
->m_pkthdr
.len 
== 0) { 
4145                 printf("%s:%d so %llx pkt %llx type %u pktlen null\n", 
4147                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
4148                     (uint64_t)DEBUG_KERNEL_ADDRPERM(m
), 
4153          * Loop to copy the mbufs of the current record 
4154          * Support zero length packets 
4158         while (m 
!= NULL 
&& (len 
= resid 
- pktlen
) >= 0 && error 
== 0) { 
4160                         panic("%p m_len zero", m
); 
4162                         panic("%p m_type zero", m
); 
4164                  * Clip to the residual length 
4170                  * Copy the mbufs via the uio or delay the copy 
4171                  * Sockbuf must be consistent here (points to current mbuf, 
4172                  * it points to next record) when we drop priority; 
4173                  * we must note any additions to the sockbuf when we 
4174                  * block interrupts again. 
4176                 if (len 
> 0 && can_delay 
== 0) { 
4177                         socket_unlock(so
, 0); 
4178                         error 
= uiomove(mtod(m
, caddr_t
), (int)len
, auio
); 
4183                         delayed_copy_len 
+= len
; 
4186                 if (len 
== m
->m_len
) { 
4188                          * m was entirely copied 
4190                         sbfree(&so
->so_rcv
, m
); 
4191                         nextrecord 
= m
->m_nextpkt
; 
4192                         m
->m_nextpkt 
= NULL
; 
4195                          * Set the first packet to the head of the free list 
4197                         if (free_list 
== NULL
) 
4200                          * Link current packet to tail of free list 
4203                                 if (free_tail 
!= NULL
) 
4204                                         free_tail
->m_nextpkt 
= m
; 
4208                          * Link current mbuf to last mbuf of current packet 
4215                          * Move next buf to head of socket buffer 
4217                         so
->so_rcv
.sb_mb 
= m 
= ml
->m_next
; 
4221                                 m
->m_nextpkt 
= nextrecord
; 
4222                                 if (nextrecord 
== NULL
) 
4223                                         so
->so_rcv
.sb_lastrecord 
= m
; 
4225                                 so
->so_rcv
.sb_mb 
= nextrecord
; 
4226                                 SB_EMPTY_FIXUP(&so
->so_rcv
); 
4228                         SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 3"); 
4229                         SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 3"); 
4232                          * Stop the loop on partial copy 
4237 #ifdef MORE_LOCKING_DEBUG 
4238         if (so
->so_usecount 
<= 1) { 
4239                 panic("%s: after big while so=%llx ref=%d on socket\n", 
4241                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), so
->so_usecount
); 
4246          * Tell the caller we made a partial copy 
4249                 if (so
->so_options 
& SO_DONTTRUNC
) { 
4251                          * Copyout first the freelist then the partial mbuf 
4253                         socket_unlock(so
, 0); 
4254                         if (delayed_copy_len
) 
4255                                 error 
= sodelayed_copy_list(so
, msgarray
, 
4256                                     uiocnt
, &free_list
, &delayed_copy_len
); 
4259                                 error 
= uiomove(mtod(m
, caddr_t
), (int)len
, 
4268                         so
->so_rcv
.sb_cc 
-= len
; 
4269                         flags 
|= MSG_RCVMORE
; 
4271                         (void) sbdroprecord(&so
->so_rcv
); 
4272                         nextrecord 
= so
->so_rcv
.sb_mb
; 
4279                 so
->so_rcv
.sb_mb 
= nextrecord
; 
4281                  * First part is an inline SB_EMPTY_FIXUP().  Second 
4282                  * part makes sure sb_lastrecord is up-to-date if 
4283                  * there is still data in the socket buffer. 
4285                 if (so
->so_rcv
.sb_mb 
== NULL
) { 
4286                         so
->so_rcv
.sb_mbtail 
= NULL
; 
4287                         so
->so_rcv
.sb_lastrecord 
= NULL
; 
4288                 } else if (nextrecord
->m_nextpkt 
== NULL
) { 
4289                         so
->so_rcv
.sb_lastrecord 
= nextrecord
; 
4291                 SB_MB_CHECK(&so
->so_rcv
); 
4293         SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 4"); 
4294         SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 4"); 
4297          * We can continue to the next packet as long as: 
4298          * - We haven't exhausted the uio array 
4299          * - There was no error 
4300          * - A packet was not truncated 
4301          * - We can still receive more data 
4303         if (npkts 
< uiocnt 
&& error 
== 0 && 
4304             (flags 
& (MSG_RCVMORE 
| MSG_TRUNC
)) == 0 && 
4305             (so
->so_state 
& SS_CANTRCVMORE
) == 0) { 
4306                 sbunlock(&so
->so_rcv
, TRUE
);    /* keep socket locked */ 
4316          * pru_rcvd may cause more data to be received if the socket lock 
4317          * is dropped so we set MSG_HAVEMORE now based on what we know. 
4318          * That way the caller won't be surprised if it receives less data 
4321         if ((so
->so_options 
& SO_WANTMORE
) && so
->so_rcv
.sb_cc 
> 0) 
4322                 flags 
|= MSG_HAVEMORE
; 
4324         if (pr
->pr_flags 
& PR_WANTRCVD 
&& so
->so_pcb
) 
4325                 (*pr
->pr_usrreqs
->pru_rcvd
)(so
, flags
); 
4328                 sbunlock(&so
->so_rcv
, FALSE
);   /* will unlock socket */ 
4330                 socket_unlock(so
, 1); 
4332         if (delayed_copy_len
) 
4333                 error 
= sodelayed_copy_list(so
, msgarray
, uiocnt
, 
4334                     &free_list
, &delayed_copy_len
); 
4337          * Amortize the cost of freeing the mbufs 
4339         if (free_list 
!= NULL
) 
4340                 m_freem_list(free_list
); 
4341         if (free_others 
!= NULL
) 
4342                 m_freem_list(free_others
); 
4344         KERNEL_DEBUG(DBG_FNC_SORECEIVE_LIST 
| DBG_FUNC_END
, error
, 
4350  * Returns:     0                       Success 
4353  *      <pru_shutdown>:EINVAL 
4354  *      <pru_shutdown>:EADDRNOTAVAIL[TCP] 
4355  *      <pru_shutdown>:ENOBUFS[TCP] 
4356  *      <pru_shutdown>:EMSGSIZE[TCP] 
4357  *      <pru_shutdown>:EHOSTUNREACH[TCP] 
4358  *      <pru_shutdown>:ENETUNREACH[TCP] 
4359  *      <pru_shutdown>:ENETDOWN[TCP] 
4360  *      <pru_shutdown>:ENOMEM[TCP] 
4361  *      <pru_shutdown>:EACCES[TCP] 
4362  *      <pru_shutdown>:EMSGSIZE[TCP] 
4363  *      <pru_shutdown>:ENOBUFS[TCP] 
4364  *      <pru_shutdown>:???[TCP]         [ignorable: mostly IPSEC/firewall/DLIL] 
4365  *      <pru_shutdown>:???              [other protocol families] 
4368 soshutdown(struct socket 
*so
, int how
) 
4372         KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN 
| DBG_FUNC_START
, how
, 0, 0, 0, 0); 
4380                     (SS_ISCONNECTED
|SS_ISCONNECTING
|SS_ISDISCONNECTING
)) == 0) { 
4383                         error 
= soshutdownlock(so
, how
); 
4385                 socket_unlock(so
, 1); 
4392         KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN 
| DBG_FUNC_END
, how
, error
, 0, 0, 0); 
4398 soshutdownlock_final(struct socket 
*so
, int how
) 
4400         struct protosw 
*pr 
= so
->so_proto
; 
4403         sflt_notify(so
, sock_evt_shutdown
, &how
); 
4405         if (how 
!= SHUT_WR
) { 
4406                 if ((so
->so_state 
& SS_CANTRCVMORE
) != 0) { 
4407                         /* read already shut down */ 
4412                 postevent(so
, 0, EV_RCLOSED
); 
4414         if (how 
!= SHUT_RD
) { 
4415                 if ((so
->so_state 
& SS_CANTSENDMORE
) != 0) { 
4416                         /* write already shut down */ 
4420                 error 
= (*pr
->pr_usrreqs
->pru_shutdown
)(so
); 
4421                 postevent(so
, 0, EV_WCLOSED
); 
4424         KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN
, how
, 1, 0, 0, 0); 
4429 soshutdownlock(struct socket 
*so
, int how
) 
4435          * A content filter may delay the actual shutdown until it 
4436          * has processed the pending data 
4438         if (so
->so_flags 
& SOF_CONTENT_FILTER
) { 
4439                 error 
= cfil_sock_shutdown(so
, &how
); 
4440                 if (error 
== EJUSTRETURN
) { 
4443                 } else if (error 
!= 0) { 
4447 #endif /* CONTENT_FILTER */ 
4449         error 
= soshutdownlock_final(so
, how
); 
4456 sowflush(struct socket 
*so
) 
4458         struct sockbuf 
*sb 
= &so
->so_snd
; 
4461          * Obtain lock on the socket buffer (SB_LOCK).  This is required 
4462          * to prevent the socket buffer from being unexpectedly altered 
4463          * while it is used by another thread in socket send/receive. 
4465          * sblock() must not fail here, hence the assertion. 
4467         (void) sblock(sb
, SBL_WAIT 
| SBL_NOINTR 
| SBL_IGNDEFUNCT
); 
4468         VERIFY(sb
->sb_flags 
& SB_LOCK
); 
4470         sb
->sb_flags            
&= ~(SB_SEL
|SB_UPCALL
); 
4471         sb
->sb_flags            
|= SB_DROP
; 
4472         sb
->sb_upcall           
= NULL
; 
4473         sb
->sb_upcallarg        
= NULL
; 
4475         sbunlock(sb
, TRUE
);     /* keep socket locked */ 
4477         selthreadclear(&sb
->sb_sel
); 
4482 sorflush(struct socket 
*so
) 
4484         struct sockbuf 
*sb 
= &so
->so_rcv
; 
4485         struct protosw 
*pr 
= so
->so_proto
; 
4488         lck_mtx_t 
*mutex_held
; 
4490          * XXX: This code is currently commented out, because we may get here 
4491          * as part of sofreelastref(), and at that time, pr_getlock() may no 
4492          * longer be able to return us the lock; this will be fixed in future. 
4494         if (so
->so_proto
->pr_getlock 
!= NULL
) 
4495                 mutex_held 
= (*so
->so_proto
->pr_getlock
)(so
, 0); 
4497                 mutex_held 
= so
->so_proto
->pr_domain
->dom_mtx
; 
4499         LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
); 
4502         sflt_notify(so
, sock_evt_flush_read
, NULL
); 
4507          * Obtain lock on the socket buffer (SB_LOCK).  This is required 
4508          * to prevent the socket buffer from being unexpectedly altered 
4509          * while it is used by another thread in socket send/receive. 
4511          * sblock() must not fail here, hence the assertion. 
4513         (void) sblock(sb
, SBL_WAIT 
| SBL_NOINTR 
| SBL_IGNDEFUNCT
); 
4514         VERIFY(sb
->sb_flags 
& SB_LOCK
); 
4517          * Copy only the relevant fields from "sb" to "asb" which we 
4518          * need for sbrelease() to function.  In particular, skip 
4519          * sb_sel as it contains the wait queue linkage, which would 
4520          * wreak havoc if we were to issue selthreadclear() on "asb". 
4521          * Make sure to not carry over SB_LOCK in "asb", as we need 
4522          * to acquire it later as part of sbrelease(). 
4524         bzero(&asb
, sizeof (asb
)); 
4525         asb
.sb_cc               
= sb
->sb_cc
; 
4526         asb
.sb_hiwat            
= sb
->sb_hiwat
; 
4527         asb
.sb_mbcnt            
= sb
->sb_mbcnt
; 
4528         asb
.sb_mbmax            
= sb
->sb_mbmax
; 
4529         asb
.sb_ctl              
= sb
->sb_ctl
; 
4530         asb
.sb_lowat            
= sb
->sb_lowat
; 
4531         asb
.sb_mb               
= sb
->sb_mb
; 
4532         asb
.sb_mbtail           
= sb
->sb_mbtail
; 
4533         asb
.sb_lastrecord       
= sb
->sb_lastrecord
; 
4534         asb
.sb_so               
= sb
->sb_so
; 
4535         asb
.sb_flags            
= sb
->sb_flags
; 
4536         asb
.sb_flags            
&= ~(SB_LOCK
|SB_SEL
|SB_KNOTE
|SB_UPCALL
); 
4537         asb
.sb_flags            
|= SB_DROP
; 
4540          * Ideally we'd bzero() these and preserve the ones we need; 
4541          * but to do that we'd need to shuffle things around in the 
4542          * sockbuf, and we can't do it now because there are KEXTS 
4543          * that are directly referring to the socket structure. 
4545          * Setting SB_DROP acts as a barrier to prevent further appends. 
4546          * Clearing SB_SEL is done for selthreadclear() below. 
4555         sb
->sb_mbtail           
= NULL
; 
4556         sb
->sb_lastrecord       
= NULL
; 
4557         sb
->sb_timeo
.tv_sec     
= 0; 
4558         sb
->sb_timeo
.tv_usec    
= 0; 
4559         sb
->sb_upcall           
= NULL
; 
4560         sb
->sb_upcallarg        
= NULL
; 
4561         sb
->sb_flags            
&= ~(SB_SEL
|SB_UPCALL
); 
4562         sb
->sb_flags            
|= SB_DROP
; 
4564         sbunlock(sb
, TRUE
);     /* keep socket locked */ 
4567          * Note that selthreadclear() is called on the original "sb" and 
4568          * not the local "asb" because of the way wait queue linkage is 
4569          * implemented.  Given that selwakeup() may be triggered, SB_SEL 
4570          * should no longer be set (cleared above.) 
4572         selthreadclear(&sb
->sb_sel
); 
4574         if ((pr
->pr_flags 
& PR_RIGHTS
) && pr
->pr_domain
->dom_dispose
) 
4575                 (*pr
->pr_domain
->dom_dispose
)(asb
.sb_mb
); 
4581  * Perhaps this routine, and sooptcopyout(), below, ought to come in 
4582  * an additional variant to handle the case where the option value needs 
4583  * to be some kind of integer, but not a specific size. 
4584  * In addition to their use here, these functions are also called by the 
4585  * protocol-level pr_ctloutput() routines. 
4587  * Returns:     0                       Success 
4592 sooptcopyin(struct sockopt 
*sopt
, void *buf
, size_t len
, size_t minlen
) 
4597          * If the user gives us more than we wanted, we ignore it, 
4598          * but if we don't get the minimum length the caller 
4599          * wants, we return EINVAL.  On success, sopt->sopt_valsize 
4600          * is set to however much we actually retrieved. 
4602         if ((valsize 
= sopt
->sopt_valsize
) < minlen
) 
4605                 sopt
->sopt_valsize 
= valsize 
= len
; 
4607         if (sopt
->sopt_p 
!= kernproc
) 
4608                 return (copyin(sopt
->sopt_val
, buf
, valsize
)); 
4610         bcopy(CAST_DOWN(caddr_t
, sopt
->sopt_val
), buf
, valsize
); 
4615  * sooptcopyin_timeval 
4616  *   Copy in a timeval value into tv_p, and take into account whether the 
4617  *   the calling process is 64-bit or 32-bit.  Moved the sanity checking 
4618  *   code here so that we can verify the 64-bit tv_sec value before we lose 
4619  *   the top 32-bits assigning tv64.tv_sec to tv_p->tv_sec. 
4622 sooptcopyin_timeval(struct sockopt 
*sopt
, struct timeval 
*tv_p
) 
4626         if (proc_is64bit(sopt
->sopt_p
)) { 
4627                 struct user64_timeval   tv64
; 
4629                 if (sopt
->sopt_valsize 
< sizeof (tv64
)) 
4632                 sopt
->sopt_valsize 
= sizeof (tv64
); 
4633                 if (sopt
->sopt_p 
!= kernproc
) { 
4634                         error 
= copyin(sopt
->sopt_val
, &tv64
, sizeof (tv64
)); 
4638                         bcopy(CAST_DOWN(caddr_t
, sopt
->sopt_val
), &tv64
, 
4641                 if (tv64
.tv_sec 
< 0 || tv64
.tv_sec 
> LONG_MAX 
|| 
4642                     tv64
.tv_usec 
< 0 || tv64
.tv_usec 
>= 1000000) 
4645                 tv_p
->tv_sec 
= tv64
.tv_sec
; 
4646                 tv_p
->tv_usec 
= tv64
.tv_usec
; 
4648                 struct user32_timeval   tv32
; 
4650                 if (sopt
->sopt_valsize 
< sizeof (tv32
)) 
4653                 sopt
->sopt_valsize 
= sizeof (tv32
); 
4654                 if (sopt
->sopt_p 
!= kernproc
) { 
4655                         error 
= copyin(sopt
->sopt_val
, &tv32
, sizeof (tv32
)); 
4660                         bcopy(CAST_DOWN(caddr_t
, sopt
->sopt_val
), &tv32
, 
4665                  * K64todo "comparison is always false due to 
4666                  * limited range of data type" 
4668                 if (tv32
.tv_sec 
< 0 || tv32
.tv_sec 
> LONG_MAX 
|| 
4669                     tv32
.tv_usec 
< 0 || tv32
.tv_usec 
>= 1000000) 
4672                 tv_p
->tv_sec 
= tv32
.tv_sec
; 
4673                 tv_p
->tv_usec 
= tv32
.tv_usec
; 
4679 soopt_cred_check(struct socket 
*so
, int priv
, boolean_t allow_root
) 
4681         kauth_cred_t cred 
=  NULL
; 
4682         proc_t ep 
= PROC_NULL
; 
4686         if (so
->so_flags 
& SOF_DELEGATED
) { 
4687                 ep 
= proc_find(so
->e_pid
); 
4689                         cred 
= kauth_cred_proc_ref(ep
); 
4692         uid 
= kauth_cred_getuid(cred 
? cred 
: so
->so_cred
); 
4694         /* uid is 0 for root */ 
4695         if (uid 
!= 0 || !allow_root
) 
4696                 error 
= priv_check_cred(cred 
? cred 
: so
->so_cred
, priv
, 0); 
4698                 kauth_cred_unref(&cred
); 
4699         if (ep 
!= PROC_NULL
) 
4706  * Returns:     0                       Success 
4711  *      sooptcopyin:EINVAL 
4712  *      sooptcopyin:EFAULT 
4713  *      sooptcopyin_timeval:EINVAL 
4714  *      sooptcopyin_timeval:EFAULT 
4715  *      sooptcopyin_timeval:EDOM 
4716  *      <pr_ctloutput>:EOPNOTSUPP[AF_UNIX] 
4717  *      <pr_ctloutput>:???w 
4718  *      sflt_attach_private:???         [whatever a filter author chooses] 
4719  *      <sf_setoption>:???              [whatever a filter author chooses] 
4721  * Notes:       Other <pru_listen> returns depend on the protocol family; all 
4722  *              <sf_listen> returns depend on what the filter author causes 
4723  *              their filter to return. 
4726 sosetoptlock(struct socket 
*so
, struct sockopt 
*sopt
, int dolock
) 
4731 #if CONFIG_MACF_SOCKET 
4733 #endif /* MAC_SOCKET */ 
4735         if (sopt
->sopt_dir 
!= SOPT_SET
) 
4736                 sopt
->sopt_dir 
= SOPT_SET
; 
4741         if ((so
->so_state 
& (SS_CANTRCVMORE 
| SS_CANTSENDMORE
)) == 
4742             (SS_CANTRCVMORE 
| SS_CANTSENDMORE
) && 
4743             (so
->so_flags 
& SOF_NPX_SETOPTSHUT
) == 0) { 
4744                 /* the socket has been shutdown, no more sockopt's */ 
4749         error 
= sflt_setsockopt(so
, sopt
); 
4751                 if (error 
== EJUSTRETURN
) 
4756         if (sopt
->sopt_level 
!= SOL_SOCKET
) { 
4757                 if (so
->so_proto 
!= NULL 
&& 
4758                     so
->so_proto
->pr_ctloutput 
!= NULL
) { 
4759                         error 
= (*so
->so_proto
->pr_ctloutput
)(so
, sopt
); 
4762                 error 
= ENOPROTOOPT
; 
4765                  * Allow socket-level (SOL_SOCKET) options to be filtered by 
4766                  * the protocol layer, if needed.  A zero value returned from 
4767                  * the handler means use default socket-level processing as 
4768                  * done by the rest of this routine.  Otherwise, any other 
4769                  * return value indicates that the option is unsupported. 
4771                 if (so
->so_proto 
!= NULL 
&& (error 
= so
->so_proto
->pr_usrreqs
-> 
4772                     pru_socheckopt(so
, sopt
)) != 0) 
4776                 switch (sopt
->sopt_name
) { 
4779                         error 
= sooptcopyin(sopt
, &l
, sizeof (l
), sizeof (l
)); 
4783                         so
->so_linger 
= (sopt
->sopt_name 
== SO_LINGER
) ? 
4784                             l
.l_linger 
: l
.l_linger 
* hz
; 
4786                                 so
->so_options 
|= SO_LINGER
; 
4788                                 so
->so_options 
&= ~SO_LINGER
; 
4794                 case SO_USELOOPBACK
: 
4800                 case SO_TIMESTAMP_MONOTONIC
: 
4803                 case SO_WANTOOBFLAG
: 
4804                 case SO_NOWAKEFROMSLEEP
: 
4805                 case SO_NOAPNFALLBK
: 
4806                         error 
= sooptcopyin(sopt
, &optval
, sizeof (optval
), 
4811                                 so
->so_options 
|= sopt
->sopt_name
; 
4813                                 so
->so_options 
&= ~sopt
->sopt_name
; 
4820                         error 
= sooptcopyin(sopt
, &optval
, sizeof (optval
), 
4826                          * Values < 1 make no sense for any of these 
4827                          * options, so disallow them. 
4834                         switch (sopt
->sopt_name
) { 
4837                                 struct sockbuf 
*sb 
= 
4838                                     (sopt
->sopt_name 
== SO_SNDBUF
) ? 
4839                                     &so
->so_snd 
: &so
->so_rcv
; 
4840                                 if (sbreserve(sb
, (u_int32_t
)optval
) == 0) { 
4844                                 sb
->sb_flags 
|= SB_USRSIZE
; 
4845                                 sb
->sb_flags 
&= ~SB_AUTOSIZE
; 
4846                                 sb
->sb_idealsize 
= (u_int32_t
)optval
; 
4850                          * Make sure the low-water is never greater than 
4854                                 int space 
= sbspace(&so
->so_snd
); 
4855                                 u_int32_t hiwat 
= so
->so_snd
.sb_hiwat
; 
4857                                 if (so
->so_snd
.sb_flags 
& SB_UNIX
) { 
4859                                             (struct unpcb 
*)(so
->so_pcb
); 
4861                                             unp
->unp_conn 
!= NULL
) { 
4862                                                 hiwat 
+= unp
->unp_conn
->unp_cc
; 
4866                                 so
->so_snd
.sb_lowat 
= 
4870                                 if (space 
>= so
->so_snd
.sb_lowat
) { 
4877                                 so
->so_rcv
.sb_lowat 
= 
4878                                     (optval 
> so
->so_rcv
.sb_hiwat
) ? 
4879                                     so
->so_rcv
.sb_hiwat 
: optval
; 
4880                                 data_len 
= so
->so_rcv
.sb_cc
 
4881                                     - so
->so_rcv
.sb_ctl
; 
4882                                 if (data_len 
>= so
->so_rcv
.sb_lowat
) 
4891                         error 
= sooptcopyin_timeval(sopt
, &tv
); 
4895                         switch (sopt
->sopt_name
) { 
4897                                 so
->so_snd
.sb_timeo 
= tv
; 
4900                                 so
->so_rcv
.sb_timeo 
= tv
; 
4908                         error 
= sooptcopyin(sopt
, &nke
, sizeof (nke
), 
4913                         error 
= sflt_attach_internal(so
, nke
.nke_handle
); 
4918                         error 
= sooptcopyin(sopt
, &optval
, sizeof (optval
), 
4923                                 so
->so_flags 
|= SOF_NOSIGPIPE
; 
4925                                 so
->so_flags 
&= ~SOF_NOSIGPIPE
; 
4929                         error 
= sooptcopyin(sopt
, &optval
, sizeof (optval
), 
4934                                 so
->so_flags 
|= SOF_NOADDRAVAIL
; 
4936                                 so
->so_flags 
&= ~SOF_NOADDRAVAIL
; 
4939                 case SO_REUSESHAREUID
: 
4940                         error 
= sooptcopyin(sopt
, &optval
, sizeof (optval
), 
4945                                 so
->so_flags 
|= SOF_REUSESHAREUID
; 
4947                                 so
->so_flags 
&= ~SOF_REUSESHAREUID
; 
4950                 case SO_NOTIFYCONFLICT
: 
4951                         if (kauth_cred_issuser(kauth_cred_get()) == 0) { 
4955                         error 
= sooptcopyin(sopt
, &optval
, sizeof (optval
), 
4960                                 so
->so_flags 
|= SOF_NOTIFYCONFLICT
; 
4962                                 so
->so_flags 
&= ~SOF_NOTIFYCONFLICT
; 
4965                 case SO_RESTRICTIONS
: 
4966                         error 
= sooptcopyin(sopt
, &optval
, sizeof (optval
), 
4971                         error 
= so_set_restrictions(so
, optval
); 
4974                 case SO_AWDL_UNRESTRICTED
: 
4975                         if (SOCK_DOM(so
) != PF_INET 
&& 
4976                             SOCK_DOM(so
) != PF_INET6
) { 
4980                         error 
= sooptcopyin(sopt
, &optval
, sizeof(optval
), 
4985                                 error 
= soopt_cred_check(so
, 
4986                                     PRIV_NET_RESTRICTED_AWDL
, false); 
4988                                         inp_set_awdl_unrestricted( 
4991                                 inp_clear_awdl_unrestricted(sotoinpcb(so
)); 
4993                 case SO_INTCOPROC_ALLOW
: 
4994                         if (SOCK_DOM(so
) != PF_INET6
) { 
4998                         error 
= sooptcopyin(sopt
, &optval
, sizeof(optval
), 
5003                                         inp_get_intcoproc_allowed(sotoinpcb(so
)) == FALSE
) { 
5004                                 error 
= soopt_cred_check(so
, 
5005                                     PRIV_NET_RESTRICTED_INTCOPROC
, false); 
5007                                         inp_set_intcoproc_allowed( 
5009                         } else if (optval 
== 0) 
5010                                 inp_clear_intcoproc_allowed(sotoinpcb(so
)); 
5014 #if CONFIG_MACF_SOCKET 
5015                         if ((error 
= sooptcopyin(sopt
, &extmac
, sizeof (extmac
), 
5016                             sizeof (extmac
))) != 0) 
5019                         error 
= mac_setsockopt_label(proc_ucred(sopt
->sopt_p
), 
5023 #endif /* MAC_SOCKET */ 
5026                 case SO_UPCALLCLOSEWAIT
: 
5027                         error 
= sooptcopyin(sopt
, &optval
, sizeof (optval
), 
5032                                 so
->so_flags 
|= SOF_UPCALLCLOSEWAIT
; 
5034                                 so
->so_flags 
&= ~SOF_UPCALLCLOSEWAIT
; 
5038                         error 
= sooptcopyin(sopt
, &optval
, sizeof (optval
), 
5043                                 so
->so_flags 
|= SOF_BINDRANDOMPORT
; 
5045                                 so
->so_flags 
&= ~SOF_BINDRANDOMPORT
; 
5048                 case SO_NP_EXTENSIONS
: { 
5049                         struct so_np_extensions sonpx
; 
5051                         error 
= sooptcopyin(sopt
, &sonpx
, sizeof (sonpx
), 
5055                         if (sonpx
.npx_mask 
& ~SONPX_MASK_VALID
) { 
5060                          * Only one bit defined for now 
5062                         if ((sonpx
.npx_mask 
& SONPX_SETOPTSHUT
)) { 
5063                                 if ((sonpx
.npx_flags 
& SONPX_SETOPTSHUT
)) 
5064                                         so
->so_flags 
|= SOF_NPX_SETOPTSHUT
; 
5066                                         so
->so_flags 
&= ~SOF_NPX_SETOPTSHUT
; 
5071                 case SO_TRAFFIC_CLASS
: { 
5072                         error 
= sooptcopyin(sopt
, &optval
, sizeof (optval
), 
5076                         if (optval 
>= SO_TC_NET_SERVICE_OFFSET
) { 
5077                                 int netsvc 
= optval 
- SO_TC_NET_SERVICE_OFFSET
; 
5078                                 error 
= so_set_net_service_type(so
, netsvc
); 
5081                         error 
= so_set_traffic_class(so
, optval
); 
5084                         so
->so_flags1 
&= ~SOF1_TC_NET_SERV_TYPE
; 
5085                         so
->so_netsvctype 
= _NET_SERVICE_TYPE_UNSPEC
; 
5089                 case SO_RECV_TRAFFIC_CLASS
: { 
5090                         error 
= sooptcopyin(sopt
, &optval
, sizeof (optval
), 
5095                                 so
->so_flags 
&= ~SOF_RECV_TRAFFIC_CLASS
; 
5097                                 so
->so_flags 
|= SOF_RECV_TRAFFIC_CLASS
; 
5101 #if (DEVELOPMENT || DEBUG) 
5102                 case SO_TRAFFIC_CLASS_DBG
: { 
5103                         struct so_tcdbg so_tcdbg
; 
5105                         error 
= sooptcopyin(sopt
, &so_tcdbg
, 
5106                             sizeof (struct so_tcdbg
), sizeof (struct so_tcdbg
)); 
5109                         error 
= so_set_tcdbg(so
, &so_tcdbg
); 
5114 #endif /* (DEVELOPMENT || DEBUG) */ 
5116                 case SO_PRIVILEGED_TRAFFIC_CLASS
: 
5117                         error 
= priv_check_cred(kauth_cred_get(), 
5118                             PRIV_NET_PRIVILEGED_TRAFFIC_CLASS
, 0); 
5121                         error 
= sooptcopyin(sopt
, &optval
, sizeof (optval
), 
5126                                 so
->so_flags 
&= ~SOF_PRIVILEGED_TRAFFIC_CLASS
; 
5128                                 so
->so_flags 
|= SOF_PRIVILEGED_TRAFFIC_CLASS
; 
5132                         error 
= sooptcopyin(sopt
, &optval
, sizeof (optval
), 
5134                         if (error 
!= 0 || (so
->so_flags 
& SOF_DEFUNCT
)) { 
5140                          * Any process can set SO_DEFUNCTOK (clear 
5141                          * SOF_NODEFUNCT), but only root can clear 
5142                          * SO_DEFUNCTOK (set SOF_NODEFUNCT). 
5145                             kauth_cred_issuser(kauth_cred_get()) == 0) { 
5150                                 so
->so_flags 
&= ~SOF_NODEFUNCT
; 
5152                                 so
->so_flags 
|= SOF_NODEFUNCT
; 
5154                         if (SOCK_DOM(so
) == PF_INET 
|| 
5155                             SOCK_DOM(so
) == PF_INET6
) { 
5156                                 char s
[MAX_IPv6_STR_LEN
]; 
5157                                 char d
[MAX_IPv6_STR_LEN
]; 
5158                                 struct inpcb 
*inp 
= sotoinpcb(so
); 
5160                                 SODEFUNCTLOG("%s[%d, %s]: so 0x%llx " 
5161                                     "[%s %s:%d -> %s:%d] is now marked " 
5162                                     "as %seligible for " 
5163                                     "defunct\n", __func__
, proc_selfpid(), 
5164                                     proc_best_name(current_proc()), 
5165                                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
5166                                     (SOCK_TYPE(so
) == SOCK_STREAM
) ? 
5167                                     "TCP" : "UDP", inet_ntop(SOCK_DOM(so
), 
5168                                     ((SOCK_DOM(so
) == PF_INET
) ? 
5169                                     (void *)&inp
->inp_laddr
.s_addr 
: 
5170                                     (void *)&inp
->in6p_laddr
), s
, sizeof (s
)), 
5171                                     ntohs(inp
->in6p_lport
), 
5172                                     inet_ntop(SOCK_DOM(so
), 
5173                                     (SOCK_DOM(so
) == PF_INET
) ? 
5174                                     (void *)&inp
->inp_faddr
.s_addr 
: 
5175                                     (void *)&inp
->in6p_faddr
, d
, sizeof (d
)), 
5176                                     ntohs(inp
->in6p_fport
), 
5177                                     (so
->so_flags 
& SOF_NODEFUNCT
) ? 
5180                                 SODEFUNCTLOG("%s[%d, %s]: so 0x%llx [%d,%d] " 
5181                                     "is now marked as %seligible for " 
5183                                     __func__
, proc_selfpid(), 
5184                                     proc_best_name(current_proc()), 
5185                                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
5186                                     SOCK_DOM(so
), SOCK_TYPE(so
), 
5187                                     (so
->so_flags 
& SOF_NODEFUNCT
) ? 
5193                         /* This option is not settable */ 
5197                 case SO_OPPORTUNISTIC
: 
5198                         error 
= sooptcopyin(sopt
, &optval
, sizeof (optval
), 
5201                                 error 
= so_set_opportunistic(so
, optval
); 
5205                         /* This option is handled by lower layer(s) */ 
5210                         error 
= sooptcopyin(sopt
, &optval
, sizeof (optval
), 
5213                                 error 
= so_set_recv_anyif(so
, optval
); 
5216                 case SO_TRAFFIC_MGT_BACKGROUND
: { 
5217                         /* This option is handled by lower layer(s) */ 
5223                 case SO_FLOW_DIVERT_TOKEN
: 
5224                         error 
= flow_divert_token_set(so
, sopt
); 
5226 #endif  /* FLOW_DIVERT */ 
5230                         if ((error 
= sooptcopyin(sopt
, &optval
, sizeof (optval
), 
5231                             sizeof (optval
))) != 0) 
5234                         error 
= so_set_effective_pid(so
, optval
, sopt
->sopt_p
); 
5237                 case SO_DELEGATED_UUID
: { 
5240                         if ((error 
= sooptcopyin(sopt
, &euuid
, sizeof (euuid
), 
5241                             sizeof (euuid
))) != 0) 
5244                         error 
= so_set_effective_uuid(so
, euuid
, sopt
->sopt_p
); 
5249                 case SO_NECP_ATTRIBUTES
: 
5250                         error 
= necp_set_socket_attributes(so
, sopt
); 
5253                 case SO_NECP_CLIENTUUID
: 
5254                         if (SOCK_DOM(so
) == PF_MULTIPATH
) { 
5255                                 /* Handled by MPTCP itself */ 
5259                         if (SOCK_DOM(so
) != PF_INET 
&& SOCK_DOM(so
) != PF_INET6
) { 
5264                         struct inpcb 
*inp 
= sotoinpcb(so
); 
5265                         if (!uuid_is_null(inp
->necp_client_uuid
)) { 
5266                                 // Clear out the old client UUID if present 
5267                                 necp_inpcb_remove_cb(inp
); 
5270                         error 
= sooptcopyin(sopt
, &inp
->necp_client_uuid
, 
5271                                             sizeof(uuid_t
), sizeof(uuid_t
)); 
5276                         if (uuid_is_null(inp
->necp_client_uuid
)) { 
5281                         error 
= necp_client_register_socket_flow(so
->last_pid
, 
5282                             inp
->necp_client_uuid
, inp
); 
5284                                 uuid_clear(inp
->necp_client_uuid
); 
5288                         if (inp
->inp_lport 
!= 0) { 
5289                                 // There is bound local port, so this is not 
5290                                 // a fresh socket. Assign to the client. 
5291                                 necp_client_assign_from_socket(so
->last_pid
, inp
->necp_client_uuid
, inp
); 
5297                 case SO_EXTENDED_BK_IDLE
: 
5298                         error 
= sooptcopyin(sopt
, &optval
, sizeof (optval
), 
5301                                 error 
= so_set_extended_bk_idle(so
, optval
); 
5304                 case SO_MARK_CELLFALLBACK
: 
5305                         error 
= sooptcopyin(sopt
, &optval
, sizeof(optval
), 
5314                                 so
->so_flags1 
&= ~SOF1_CELLFALLBACK
; 
5316                                 so
->so_flags1 
|= SOF1_CELLFALLBACK
; 
5319                 case SO_NET_SERVICE_TYPE
: { 
5320                         error 
= sooptcopyin(sopt
, &optval
, sizeof(optval
), 
5324                         error 
= so_set_net_service_type(so
, optval
); 
5328                 case SO_QOSMARKING_POLICY_OVERRIDE
: 
5329                         error 
= priv_check_cred(kauth_cred_get(), 
5330                             PRIV_NET_QOSMARKING_POLICY_OVERRIDE
, 0); 
5333                         error 
= sooptcopyin(sopt
, &optval
, sizeof(optval
), 
5338                                 so
->so_flags1 
&= ~SOF1_QOSMARKING_POLICY_OVERRIDE
; 
5340                                 so
->so_flags1 
|= SOF1_QOSMARKING_POLICY_OVERRIDE
; 
5344                         error 
= ENOPROTOOPT
; 
5347                 if (error 
== 0 && so
->so_proto 
!= NULL 
&& 
5348                     so
->so_proto
->pr_ctloutput 
!= NULL
) { 
5349                         (void) so
->so_proto
->pr_ctloutput(so
, sopt
); 
5354                 socket_unlock(so
, 1); 
5358 /* Helper routines for getsockopt */ 
5360 sooptcopyout(struct sockopt 
*sopt
, void *buf
, size_t len
) 
5368          * Documented get behavior is that we always return a value, 
5369          * possibly truncated to fit in the user's buffer. 
5370          * Traditional behavior is that we always tell the user 
5371          * precisely how much we copied, rather than something useful 
5372          * like the total amount we had available for her. 
5373          * Note that this interface is not idempotent; the entire answer must 
5374          * generated ahead of time. 
5376         valsize 
= min(len
, sopt
->sopt_valsize
); 
5377         sopt
->sopt_valsize 
= valsize
; 
5378         if (sopt
->sopt_val 
!= USER_ADDR_NULL
) { 
5379                 if (sopt
->sopt_p 
!= kernproc
) 
5380                         error 
= copyout(buf
, sopt
->sopt_val
, valsize
); 
5382                         bcopy(buf
, CAST_DOWN(caddr_t
, sopt
->sopt_val
), valsize
); 
5388 sooptcopyout_timeval(struct sockopt 
*sopt
, const struct timeval 
*tv_p
) 
5392         struct user64_timeval   tv64 
= {}; 
5393         struct user32_timeval   tv32 
= {}; 
5398         if (proc_is64bit(sopt
->sopt_p
)) { 
5399                 len 
= sizeof (tv64
); 
5400                 tv64
.tv_sec 
= tv_p
->tv_sec
; 
5401                 tv64
.tv_usec 
= tv_p
->tv_usec
; 
5404                 len 
= sizeof (tv32
); 
5405                 tv32
.tv_sec 
= tv_p
->tv_sec
; 
5406                 tv32
.tv_usec 
= tv_p
->tv_usec
; 
5409         valsize 
= min(len
, sopt
->sopt_valsize
); 
5410         sopt
->sopt_valsize 
= valsize
; 
5411         if (sopt
->sopt_val 
!= USER_ADDR_NULL
) { 
5412                 if (sopt
->sopt_p 
!= kernproc
) 
5413                         error 
= copyout(val
, sopt
->sopt_val
, valsize
); 
5415                         bcopy(val
, CAST_DOWN(caddr_t
, sopt
->sopt_val
), valsize
); 
5423  *      <pr_ctloutput>:EOPNOTSUPP[AF_UNIX] 
5424  *      <pr_ctloutput>:??? 
5425  *      <sf_getoption>:??? 
5428 sogetoptlock(struct socket 
*so
, struct sockopt 
*sopt
, int dolock
) 
5433 #if CONFIG_MACF_SOCKET 
5435 #endif /* MAC_SOCKET */ 
5437         if (sopt
->sopt_dir 
!= SOPT_GET
) 
5438                 sopt
->sopt_dir 
= SOPT_GET
; 
5443         error 
= sflt_getsockopt(so
, sopt
); 
5445                 if (error 
== EJUSTRETURN
) 
5450         if (sopt
->sopt_level 
!= SOL_SOCKET
) { 
5451                 if (so
->so_proto 
!= NULL 
&& 
5452                     so
->so_proto
->pr_ctloutput 
!= NULL
) { 
5453                         error 
= (*so
->so_proto
->pr_ctloutput
)(so
, sopt
); 
5456                 error 
= ENOPROTOOPT
; 
5459                  * Allow socket-level (SOL_SOCKET) options to be filtered by 
5460                  * the protocol layer, if needed.  A zero value returned from 
5461                  * the handler means use default socket-level processing as 
5462                  * done by the rest of this routine.  Otherwise, any other 
5463                  * return value indicates that the option is unsupported. 
5465                 if (so
->so_proto 
!= NULL 
&& (error 
= so
->so_proto
->pr_usrreqs
-> 
5466                     pru_socheckopt(so
, sopt
)) != 0) 
5470                 switch (sopt
->sopt_name
) { 
5473                         l
.l_onoff 
= ((so
->so_options 
& SO_LINGER
) ? 1 : 0); 
5474                         l
.l_linger 
= (sopt
->sopt_name 
== SO_LINGER
) ? 
5475                             so
->so_linger 
: so
->so_linger 
/ hz
; 
5476                         error 
= sooptcopyout(sopt
, &l
, sizeof (l
)); 
5479                 case SO_USELOOPBACK
: 
5488                 case SO_TIMESTAMP_MONOTONIC
: 
5491                 case SO_WANTOOBFLAG
: 
5492                 case SO_NOWAKEFROMSLEEP
: 
5493                 case SO_NOAPNFALLBK
: 
5494                         optval 
= so
->so_options 
& sopt
->sopt_name
; 
5496                         error 
= sooptcopyout(sopt
, &optval
, sizeof (optval
)); 
5500                         optval 
= so
->so_type
; 
5504                         if (so
->so_proto
->pr_flags 
& PR_ATOMIC
) { 
5509                                 m1 
= so
->so_rcv
.sb_mb
; 
5510                                 while (m1 
!= NULL
) { 
5511                                         if (m1
->m_type 
== MT_DATA 
|| 
5512                                             m1
->m_type 
== MT_HEADER 
|| 
5513                                             m1
->m_type 
== MT_OOBDATA
) 
5514                                                 pkt_total 
+= m1
->m_len
; 
5519                                 optval 
= so
->so_rcv
.sb_cc 
- so
->so_rcv
.sb_ctl
; 
5524                         if (so
->so_proto
->pr_flags 
& PR_ATOMIC
) { 
5528                                 m1 
= so
->so_rcv
.sb_mb
; 
5529                                 while (m1 
!= NULL
) { 
5530                                         if (m1
->m_type 
== MT_DATA 
|| 
5531                                             m1
->m_type 
== MT_HEADER 
|| 
5532                                             m1
->m_type 
== MT_OOBDATA
) 
5544                         optval 
= so
->so_snd
.sb_cc
; 
5548                         optval 
= so
->so_error
; 
5553                         u_int32_t hiwat 
= so
->so_snd
.sb_hiwat
; 
5555                         if (so
->so_snd
.sb_flags 
& SB_UNIX
) { 
5557                                     (struct unpcb 
*)(so
->so_pcb
); 
5558                                 if (unp 
!= NULL 
&& unp
->unp_conn 
!= NULL
) { 
5559                                         hiwat 
+= unp
->unp_conn
->unp_cc
; 
5567                         optval 
= so
->so_rcv
.sb_hiwat
; 
5571                         optval 
= so
->so_snd
.sb_lowat
; 
5575                         optval 
= so
->so_rcv
.sb_lowat
; 
5580                         tv 
= (sopt
->sopt_name 
== SO_SNDTIMEO 
? 
5581                             so
->so_snd
.sb_timeo 
: so
->so_rcv
.sb_timeo
); 
5583                         error 
= sooptcopyout_timeval(sopt
, &tv
); 
5587                         optval 
= (so
->so_flags 
& SOF_NOSIGPIPE
); 
5591                         optval 
= (so
->so_flags 
& SOF_NOADDRAVAIL
); 
5594                 case SO_REUSESHAREUID
: 
5595                         optval 
= (so
->so_flags 
& SOF_REUSESHAREUID
); 
5599                 case SO_NOTIFYCONFLICT
: 
5600                         optval 
= (so
->so_flags 
& SOF_NOTIFYCONFLICT
); 
5603                 case SO_RESTRICTIONS
: 
5604                         optval 
= so_get_restrictions(so
); 
5607                 case SO_AWDL_UNRESTRICTED
: 
5608                         if (SOCK_DOM(so
) == PF_INET 
|| 
5609                             SOCK_DOM(so
) == PF_INET6
) { 
5610                                 optval 
= inp_get_awdl_unrestricted( 
5617                 case SO_INTCOPROC_ALLOW
: 
5618                         if (SOCK_DOM(so
) == PF_INET6
) { 
5619                                 optval 
= inp_get_intcoproc_allowed( 
5627 #if CONFIG_MACF_SOCKET 
5628                         if ((error 
= sooptcopyin(sopt
, &extmac
, sizeof (extmac
), 
5629                             sizeof (extmac
))) != 0 || 
5630                             (error 
= mac_socket_label_get(proc_ucred( 
5631                             sopt
->sopt_p
), so
, &extmac
)) != 0) 
5634                         error 
= sooptcopyout(sopt
, &extmac
, sizeof (extmac
)); 
5637 #endif /* MAC_SOCKET */ 
5641 #if CONFIG_MACF_SOCKET 
5642                         if ((error 
= sooptcopyin(sopt
, &extmac
, sizeof (extmac
), 
5643                             sizeof (extmac
))) != 0 || 
5644                             (error 
= mac_socketpeer_label_get(proc_ucred( 
5645                             sopt
->sopt_p
), so
, &extmac
)) != 0) 
5648                         error 
= sooptcopyout(sopt
, &extmac
, sizeof (extmac
)); 
5651 #endif /* MAC_SOCKET */ 
5654 #ifdef __APPLE_API_PRIVATE 
5655                 case SO_UPCALLCLOSEWAIT
: 
5656                         optval 
= (so
->so_flags 
& SOF_UPCALLCLOSEWAIT
); 
5660                         optval 
= (so
->so_flags 
& SOF_BINDRANDOMPORT
); 
5663                 case SO_NP_EXTENSIONS
: { 
5664                         struct so_np_extensions sonpx
; 
5666                         sonpx
.npx_flags 
= (so
->so_flags 
& SOF_NPX_SETOPTSHUT
) ? 
5667                             SONPX_SETOPTSHUT 
: 0; 
5668                         sonpx
.npx_mask 
= SONPX_MASK_VALID
; 
5670                         error 
= sooptcopyout(sopt
, &sonpx
, 
5671                             sizeof (struct so_np_extensions
)); 
5675                 case SO_TRAFFIC_CLASS
: 
5676                         optval 
= so
->so_traffic_class
; 
5679                 case SO_RECV_TRAFFIC_CLASS
: 
5680                         optval 
= (so
->so_flags 
& SOF_RECV_TRAFFIC_CLASS
); 
5683                 case SO_TRAFFIC_CLASS_STATS
: 
5684                         error 
= sooptcopyout(sopt
, &so
->so_tc_stats
, 
5685                             sizeof (so
->so_tc_stats
)); 
5688 #if (DEVELOPMENT || DEBUG) 
5689                 case SO_TRAFFIC_CLASS_DBG
: 
5690                         error 
= sogetopt_tcdbg(so
, sopt
); 
5692 #endif /* (DEVELOPMENT || DEBUG) */ 
5694                 case SO_PRIVILEGED_TRAFFIC_CLASS
: 
5695                         optval 
= (so
->so_flags 
& SOF_PRIVILEGED_TRAFFIC_CLASS
); 
5699                         optval 
= !(so
->so_flags 
& SOF_NODEFUNCT
); 
5703                         optval 
= (so
->so_flags 
& SOF_DEFUNCT
); 
5706                 case SO_OPPORTUNISTIC
: 
5707                         optval 
= so_get_opportunistic(so
); 
5711                         /* This option is not gettable */ 
5716                         optval 
= so_get_recv_anyif(so
); 
5719                 case SO_TRAFFIC_MGT_BACKGROUND
: 
5720                         /* This option is handled by lower layer(s) */ 
5721                         if (so
->so_proto 
!= NULL 
&& 
5722                             so
->so_proto
->pr_ctloutput 
!= NULL
) { 
5723                                 (void) so
->so_proto
->pr_ctloutput(so
, sopt
); 
5728                 case SO_FLOW_DIVERT_TOKEN
: 
5729                         error 
= flow_divert_token_get(so
, sopt
); 
5731 #endif  /* FLOW_DIVERT */ 
5734                 case SO_NECP_ATTRIBUTES
: 
5735                         error 
= necp_get_socket_attributes(so
, sopt
); 
5738                 case SO_NECP_CLIENTUUID
: 
5742                         if (SOCK_DOM(so
) == PF_MULTIPATH
) { 
5743                                 ncu 
= &mpsotomppcb(so
)->necp_client_uuid
; 
5744                         } else if (SOCK_DOM(so
) == PF_INET 
|| SOCK_DOM(so
) == PF_INET6
) { 
5745                                 ncu 
= &sotoinpcb(so
)->necp_client_uuid
; 
5751                         error 
= sooptcopyout(sopt
, ncu
, sizeof(uuid_t
)); 
5757                 case SO_CFIL_SOCK_ID
: { 
5758                         cfil_sock_id_t sock_id
; 
5760                         sock_id 
= cfil_sock_id_from_socket(so
); 
5762                         error 
= sooptcopyout(sopt
, &sock_id
, 
5763                                 sizeof(cfil_sock_id_t
)); 
5766 #endif  /* CONTENT_FILTER */ 
5768                 case SO_EXTENDED_BK_IDLE
: 
5769                         optval 
= (so
->so_flags1 
& SOF1_EXTEND_BK_IDLE_WANTED
); 
5771                 case SO_MARK_CELLFALLBACK
: 
5772                         optval 
= ((so
->so_flags1 
& SOF1_CELLFALLBACK
) > 0) 
5775                 case SO_NET_SERVICE_TYPE
: { 
5776                         if ((so
->so_flags1 
& SOF1_TC_NET_SERV_TYPE
)) 
5777                                 optval 
= so
->so_netsvctype
; 
5779                                 optval 
= NET_SERVICE_TYPE_BE
; 
5782                 case SO_NETSVC_MARKING_LEVEL
: 
5783                         optval 
= so_get_netsvc_marking_level(so
); 
5787                         error 
= ENOPROTOOPT
; 
5793                 socket_unlock(so
, 1); 
5798  * The size limits on our soopt_getm is different from that on FreeBSD. 
5799  * We limit the size of options to MCLBYTES. This will have to change 
5800  * if we need to define options that need more space than MCLBYTES. 
5803 soopt_getm(struct sockopt 
*sopt
, struct mbuf 
**mp
) 
5805         struct mbuf 
*m
, *m_prev
; 
5806         int sopt_size 
= sopt
->sopt_valsize
; 
5809         if (sopt_size 
<= 0 || sopt_size 
> MCLBYTES
) 
5812         how 
= sopt
->sopt_p 
!= kernproc 
? M_WAIT 
: M_DONTWAIT
; 
5813         MGET(m
, how
, MT_DATA
); 
5816         if (sopt_size 
> MLEN
) { 
5818                 if ((m
->m_flags 
& M_EXT
) == 0) { 
5822                 m
->m_len 
= min(MCLBYTES
, sopt_size
); 
5824                 m
->m_len 
= min(MLEN
, sopt_size
); 
5826         sopt_size 
-= m
->m_len
; 
5830         while (sopt_size 
> 0) { 
5831                 MGET(m
, how
, MT_DATA
); 
5836                 if (sopt_size 
> MLEN
) { 
5838                         if ((m
->m_flags 
& M_EXT
) == 0) { 
5843                         m
->m_len 
= min(MCLBYTES
, sopt_size
); 
5845                         m
->m_len 
= min(MLEN
, sopt_size
); 
5847                 sopt_size 
-= m
->m_len
; 
5854 /* copyin sopt data into mbuf chain */ 
5856 soopt_mcopyin(struct sockopt 
*sopt
, struct mbuf 
*m
) 
5858         struct mbuf 
*m0 
= m
; 
5860         if (sopt
->sopt_val 
== USER_ADDR_NULL
) 
5862         while (m 
!= NULL 
&& sopt
->sopt_valsize 
>= m
->m_len
) { 
5863                 if (sopt
->sopt_p 
!= kernproc
) { 
5866                         error 
= copyin(sopt
->sopt_val
, mtod(m
, char *), 
5873                         bcopy(CAST_DOWN(caddr_t
, sopt
->sopt_val
), 
5874                             mtod(m
, char *), m
->m_len
); 
5876                 sopt
->sopt_valsize 
-= m
->m_len
; 
5877                 sopt
->sopt_val 
+= m
->m_len
; 
5880         /* should be allocated enoughly at ip6_sooptmcopyin() */ 
5882                 panic("soopt_mcopyin"); 
5888 /* copyout mbuf chain data into soopt */ 
5890 soopt_mcopyout(struct sockopt 
*sopt
, struct mbuf 
*m
) 
5892         struct mbuf 
*m0 
= m
; 
5895         if (sopt
->sopt_val 
== USER_ADDR_NULL
) 
5897         while (m 
!= NULL 
&& sopt
->sopt_valsize 
>= m
->m_len
) { 
5898                 if (sopt
->sopt_p 
!= kernproc
) { 
5901                         error 
= copyout(mtod(m
, char *), sopt
->sopt_val
, 
5908                         bcopy(mtod(m
, char *), 
5909                             CAST_DOWN(caddr_t
, sopt
->sopt_val
), m
->m_len
); 
5911                 sopt
->sopt_valsize 
-= m
->m_len
; 
5912                 sopt
->sopt_val 
+= m
->m_len
; 
5913                 valsize 
+= m
->m_len
; 
5917                 /* enough soopt buffer should be given from user-land */ 
5921         sopt
->sopt_valsize 
= valsize
; 
5926 sohasoutofband(struct socket 
*so
) 
5928         if (so
->so_pgid 
< 0) 
5929                 gsignal(-so
->so_pgid
, SIGURG
); 
5930         else if (so
->so_pgid 
> 0) 
5931                 proc_signal(so
->so_pgid
, SIGURG
); 
5932         selwakeup(&so
->so_rcv
.sb_sel
); 
5933         if (so
->so_rcv
.sb_flags 
& SB_KNOTE
) { 
5934                 KNOTE(&so
->so_rcv
.sb_sel
.si_note
, 
5935                     (NOTE_OOB 
| SO_FILT_HINT_LOCKED
)); 
5940 sopoll(struct socket 
*so
, int events
, kauth_cred_t cred
, void * wql
) 
5942 #pragma unused(cred) 
5943         struct proc 
*p 
= current_proc(); 
5947         so_update_last_owner_locked(so
, PROC_NULL
); 
5948         so_update_policy(so
); 
5950         if (events 
& (POLLIN 
| POLLRDNORM
)) 
5952                         revents 
|= events 
& (POLLIN 
| POLLRDNORM
); 
5954         if (events 
& (POLLOUT 
| POLLWRNORM
)) 
5955                 if (sowriteable(so
)) 
5956                         revents 
|= events 
& (POLLOUT 
| POLLWRNORM
); 
5958         if (events 
& (POLLPRI 
| POLLRDBAND
)) 
5959                 if (so
->so_oobmark 
|| (so
->so_state 
& SS_RCVATMARK
)) 
5960                         revents 
|= events 
& (POLLPRI 
| POLLRDBAND
); 
5963                 if (events 
& (POLLIN 
| POLLPRI 
| POLLRDNORM 
| POLLRDBAND
)) { 
5965                          * Darwin sets the flag first, 
5966                          * BSD calls selrecord first 
5968                         so
->so_rcv
.sb_flags 
|= SB_SEL
; 
5969                         selrecord(p
, &so
->so_rcv
.sb_sel
, wql
); 
5972                 if (events 
& (POLLOUT 
| POLLWRNORM
)) { 
5974                          * Darwin sets the flag first, 
5975                          * BSD calls selrecord first 
5977                         so
->so_snd
.sb_flags 
|= SB_SEL
; 
5978                         selrecord(p
, &so
->so_snd
.sb_sel
, wql
); 
5982         socket_unlock(so
, 1); 
5987 soo_kqfilter(struct fileproc 
*fp
, struct knote 
*kn
, 
5988                 struct kevent_internal_s 
*kev
, vfs_context_t ctx
) 
5991 #if !CONFIG_MACF_SOCKET 
5993 #endif /* MAC_SOCKET */ 
5994         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->f_fglob
->fg_data
; 
5998         so_update_last_owner_locked(so
, PROC_NULL
); 
5999         so_update_policy(so
); 
6001 #if CONFIG_MACF_SOCKET 
6002         if (mac_socket_check_kqfilter(proc_ucred(vfs_context_proc(ctx
)), 
6004                 socket_unlock(so
, 1); 
6005                 kn
->kn_flags 
= EV_ERROR
; 
6006                 kn
->kn_data 
= EPERM
; 
6009 #endif /* MAC_SOCKET */ 
6011         switch (kn
->kn_filter
) { 
6013                 kn
->kn_filtid 
= EVFILTID_SOREAD
; 
6016                 kn
->kn_filtid 
= EVFILTID_SOWRITE
; 
6019                 kn
->kn_filtid 
= EVFILTID_SCK
; 
6022                 kn
->kn_filtid 
= EVFILTID_SOEXCEPT
; 
6025                 socket_unlock(so
, 1); 
6026                 kn
->kn_flags 
= EV_ERROR
; 
6027                 kn
->kn_data 
= EINVAL
; 
6032          * call the appropriate sub-filter attach 
6033          * with the socket still locked 
6035         result 
= knote_fops(kn
)->f_attach(kn
, kev
); 
6037         socket_unlock(so
, 1); 
6043 filt_soread_common(struct knote 
*kn
, struct socket 
*so
) 
6045         if (so
->so_options 
& SO_ACCEPTCONN
) { 
6049                  * Radar 6615193 handle the listen case dynamically 
6050                  * for kqueue read filter. This allows to call listen() 
6051                  * after registering the kqueue EVFILT_READ. 
6054                 kn
->kn_data 
= so
->so_qlen
; 
6055                 is_not_empty 
= ! TAILQ_EMPTY(&so
->so_comp
); 
6057                 return (is_not_empty
); 
6060         /* socket isn't a listener */ 
6062          * NOTE_LOWAT specifies new low water mark in data, i.e. 
6063          * the bytes of protocol data. We therefore exclude any 
6066         kn
->kn_data 
= so
->so_rcv
.sb_cc 
- so
->so_rcv
.sb_ctl
; 
6068         if (kn
->kn_sfflags 
& NOTE_OOB
) { 
6069                 if (so
->so_oobmark 
|| (so
->so_state 
& SS_RCVATMARK
)) { 
6070                         kn
->kn_fflags 
|= NOTE_OOB
; 
6071                         kn
->kn_data 
-= so
->so_oobmark
; 
6076         if ((so
->so_state 
& SS_CANTRCVMORE
) 
6078             && cfil_sock_data_pending(&so
->so_rcv
) == 0 
6079 #endif /* CONTENT_FILTER */ 
6081                 kn
->kn_flags 
|= EV_EOF
; 
6082                 kn
->kn_fflags 
= so
->so_error
; 
6086         if (so
->so_error
) {     /* temporary udp error */ 
6090         int64_t lowwat 
= so
->so_rcv
.sb_lowat
; 
6092          * Ensure that when NOTE_LOWAT is used, the derived 
6093          * low water mark is bounded by socket's rcv buf's 
6094          * high and low water mark values. 
6096         if (kn
->kn_sfflags 
& NOTE_LOWAT
) { 
6097                 if (kn
->kn_sdata 
> so
->so_rcv
.sb_hiwat
) 
6098                         lowwat 
= so
->so_rcv
.sb_hiwat
; 
6099                 else if (kn
->kn_sdata 
> lowwat
) 
6100                         lowwat 
= kn
->kn_sdata
; 
6104          * The order below is important. Since NOTE_LOWAT 
6105          * overrides sb_lowat, check for NOTE_LOWAT case 
6108         if (kn
->kn_sfflags 
& NOTE_LOWAT
) 
6109                 return (kn
->kn_data 
>= lowwat
); 
6111         return (so
->so_rcv
.sb_cc 
>= lowwat
); 
6115 filt_sorattach(struct knote 
*kn
, __unused 
struct kevent_internal_s 
*kev
) 
6117         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->f_fglob
->fg_data
; 
6122          * If the caller explicitly asked for OOB results (e.g. poll()) 
6123          * from EVFILT_READ, then save that off in the hookid field 
6124          * and reserve the kn_flags EV_OOBAND bit for output only. 
6126         if (kn
->kn_filter 
== EVFILT_READ 
&& 
6127             kn
->kn_flags 
& EV_OOBAND
) { 
6128                 kn
->kn_flags 
&= ~EV_OOBAND
; 
6129                 kn
->kn_hookid 
= EV_OOBAND
; 
6133         if (KNOTE_ATTACH(&so
->so_rcv
.sb_sel
.si_note
, kn
)) 
6134                 so
->so_rcv
.sb_flags 
|= SB_KNOTE
; 
6136         /* indicate if event is already fired */ 
6137         return filt_soread_common(kn
, so
); 
6141 filt_sordetach(struct knote 
*kn
) 
6143         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->f_fglob
->fg_data
; 
6146         if (so
->so_rcv
.sb_flags 
& SB_KNOTE
) 
6147                 if (KNOTE_DETACH(&so
->so_rcv
.sb_sel
.si_note
, kn
)) 
6148                         so
->so_rcv
.sb_flags 
&= ~SB_KNOTE
; 
6149         socket_unlock(so
, 1); 
6154 filt_soread(struct knote 
*kn
, long hint
) 
6156         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->f_fglob
->fg_data
; 
6159         if ((hint 
& SO_FILT_HINT_LOCKED
) == 0) 
6162         retval 
= filt_soread_common(kn
, so
); 
6164         if ((hint 
& SO_FILT_HINT_LOCKED
) == 0) 
6165                 socket_unlock(so
, 1); 
6171 filt_sortouch(struct knote 
*kn
, struct kevent_internal_s 
*kev
) 
6173         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->f_fglob
->fg_data
; 
6178         /* save off the new input fflags and data */ 
6179         kn
->kn_sfflags 
= kev
->fflags
; 
6180         kn
->kn_sdata 
= kev
->data
; 
6181         if ((kn
->kn_status 
& KN_UDATA_SPECIFIC
) == 0) 
6182                 kn
->kn_udata 
= kev
->udata
; 
6184         /* determine if changes result in fired events */ 
6185         retval 
= filt_soread_common(kn
, so
); 
6187         socket_unlock(so
, 1); 
6193 filt_sorprocess(struct knote 
*kn
, struct filt_process_s 
*data
, struct kevent_internal_s 
*kev
) 
6195 #pragma unused(data) 
6196         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->f_fglob
->fg_data
; 
6200         retval 
= filt_soread_common(kn
, so
); 
6202                 *kev 
= kn
->kn_kevent
; 
6203                 if (kn
->kn_flags 
& EV_CLEAR
) { 
6208         socket_unlock(so
, 1); 
6214 so_wait_for_if_feedback(struct socket 
*so
) 
6216         if ((SOCK_DOM(so
) == PF_INET 
|| SOCK_DOM(so
) == PF_INET6
) && 
6217             (so
->so_state 
& SS_ISCONNECTED
)) { 
6218                 struct inpcb 
*inp 
= sotoinpcb(so
); 
6219                 if (INP_WAIT_FOR_IF_FEEDBACK(inp
)) 
6226 filt_sowrite_common(struct knote 
*kn
, struct socket 
*so
) 
6230         kn
->kn_data 
= sbspace(&so
->so_snd
); 
6231         if (so
->so_state 
& SS_CANTSENDMORE
) { 
6232                 kn
->kn_flags 
|= EV_EOF
; 
6233                 kn
->kn_fflags 
= so
->so_error
; 
6236         if (so
->so_error
) {     /* temporary udp error */ 
6239         if (!socanwrite(so
)) { 
6242         if (so
->so_flags1 
& SOF1_PRECONNECT_DATA
) { 
6245         int64_t lowwat 
= so
->so_snd
.sb_lowat
; 
6246         if (kn
->kn_sfflags 
& NOTE_LOWAT
) { 
6247                 if (kn
->kn_sdata 
> so
->so_snd
.sb_hiwat
) 
6248                         lowwat 
= so
->so_snd
.sb_hiwat
; 
6249                 else if (kn
->kn_sdata 
> lowwat
) 
6250                         lowwat 
= kn
->kn_sdata
; 
6252         if (kn
->kn_data 
>= lowwat
) { 
6253                 if ((so
->so_flags 
& SOF_NOTSENT_LOWAT
) 
6254 #if (DEBUG || DEVELOPMENT) 
6255                     && so_notsent_lowat_check 
== 1 
6256 #endif /* DEBUG || DEVELOPMENT */ 
6258                         if ((SOCK_DOM(so
) == PF_INET 
|| 
6259                             SOCK_DOM(so
) == PF_INET6
) && 
6260                             so
->so_type 
== SOCK_STREAM
) { 
6261                                 ret 
= tcp_notsent_lowat_check(so
); 
6264                         else if ((SOCK_DOM(so
) == PF_MULTIPATH
) && 
6265                             (SOCK_PROTO(so
) == IPPROTO_TCP
)) { 
6266                                 ret 
= mptcp_notsent_lowat_check(so
); 
6276         if (so_wait_for_if_feedback(so
)) 
6282 filt_sowattach(struct knote 
*kn
, __unused 
struct kevent_internal_s 
*kev
) 
6284         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->f_fglob
->fg_data
; 
6287         if (KNOTE_ATTACH(&so
->so_snd
.sb_sel
.si_note
, kn
)) 
6288                 so
->so_snd
.sb_flags 
|= SB_KNOTE
; 
6290         /* determine if its already fired */ 
6291         return filt_sowrite_common(kn
, so
); 
6295 filt_sowdetach(struct knote 
*kn
) 
6297         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->f_fglob
->fg_data
; 
6300         if (so
->so_snd
.sb_flags 
& SB_KNOTE
) 
6301                 if (KNOTE_DETACH(&so
->so_snd
.sb_sel
.si_note
, kn
)) 
6302                         so
->so_snd
.sb_flags 
&= ~SB_KNOTE
; 
6303         socket_unlock(so
, 1); 
6308 filt_sowrite(struct knote 
*kn
, long hint
) 
6310         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->f_fglob
->fg_data
; 
6313         if ((hint 
& SO_FILT_HINT_LOCKED
) == 0) 
6316         ret 
= filt_sowrite_common(kn
, so
); 
6318         if ((hint 
& SO_FILT_HINT_LOCKED
) == 0) 
6319                 socket_unlock(so
, 1); 
6325 filt_sowtouch(struct knote 
*kn
, struct kevent_internal_s 
*kev
) 
6327         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->f_fglob
->fg_data
; 
6332         /*save off the new input fflags and data */ 
6333         kn
->kn_sfflags 
= kev
->fflags
; 
6334         kn
->kn_sdata 
= kev
->data
; 
6335         if ((kn
->kn_status 
& KN_UDATA_SPECIFIC
) == 0) 
6336                 kn
->kn_udata 
= kev
->udata
; 
6338         /* determine if these changes result in a triggered event */ 
6339         ret 
= filt_sowrite_common(kn
, so
); 
6341         socket_unlock(so
, 1); 
6347 filt_sowprocess(struct knote 
*kn
, struct filt_process_s 
*data
, struct kevent_internal_s 
*kev
) 
6349 #pragma unused(data) 
6350         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->f_fglob
->fg_data
; 
6354         ret 
= filt_sowrite_common(kn
, so
); 
6356                 *kev 
= kn
->kn_kevent
; 
6357                 if (kn
->kn_flags 
& EV_CLEAR
) { 
6362         socket_unlock(so
, 1); 
6367 filt_sockev_common(struct knote 
*kn
, struct socket 
*so
, long ev_hint
) 
6370         uint32_t level_trigger 
= 0; 
6372         if (ev_hint 
& SO_FILT_HINT_CONNRESET
) { 
6373                 kn
->kn_fflags 
|= NOTE_CONNRESET
; 
6375         if (ev_hint 
& SO_FILT_HINT_TIMEOUT
) { 
6376                 kn
->kn_fflags 
|= NOTE_TIMEOUT
; 
6378         if (ev_hint 
& SO_FILT_HINT_NOSRCADDR
) { 
6379                 kn
->kn_fflags 
|= NOTE_NOSRCADDR
; 
6381         if (ev_hint 
& SO_FILT_HINT_IFDENIED
) { 
6382                 kn
->kn_fflags 
|= NOTE_IFDENIED
; 
6384         if (ev_hint 
& SO_FILT_HINT_KEEPALIVE
) { 
6385                 kn
->kn_fflags 
|= NOTE_KEEPALIVE
; 
6387         if (ev_hint 
& SO_FILT_HINT_ADAPTIVE_WTIMO
) { 
6388                 kn
->kn_fflags 
|= NOTE_ADAPTIVE_WTIMO
; 
6390         if (ev_hint 
& SO_FILT_HINT_ADAPTIVE_RTIMO
) { 
6391                 kn
->kn_fflags 
|= NOTE_ADAPTIVE_RTIMO
; 
6393         if ((ev_hint 
& SO_FILT_HINT_CONNECTED
) || 
6394             (so
->so_state 
& SS_ISCONNECTED
)) { 
6395                 kn
->kn_fflags 
|= NOTE_CONNECTED
; 
6396                 level_trigger 
|= NOTE_CONNECTED
; 
6398         if ((ev_hint 
& SO_FILT_HINT_DISCONNECTED
) || 
6399             (so
->so_state 
& SS_ISDISCONNECTED
)) { 
6400                 kn
->kn_fflags 
|= NOTE_DISCONNECTED
; 
6401                 level_trigger 
|= NOTE_DISCONNECTED
; 
6403         if (ev_hint 
& SO_FILT_HINT_CONNINFO_UPDATED
) { 
6404                 if (so
->so_proto 
!= NULL 
&& 
6405                     (so
->so_proto
->pr_flags 
& PR_EVCONNINFO
)) 
6406                         kn
->kn_fflags 
|= NOTE_CONNINFO_UPDATED
; 
6409         if ((ev_hint 
& SO_FILT_HINT_NOTIFY_ACK
) || 
6410             tcp_notify_ack_active(so
)) { 
6411                 kn
->kn_fflags 
|= NOTE_NOTIFY_ACK
; 
6414         if ((so
->so_state 
& SS_CANTRCVMORE
) 
6416             && cfil_sock_data_pending(&so
->so_rcv
) == 0 
6417 #endif /* CONTENT_FILTER */ 
6419                 kn
->kn_fflags 
|= NOTE_READCLOSED
; 
6420                 level_trigger 
|= NOTE_READCLOSED
; 
6423         if (so
->so_state 
& SS_CANTSENDMORE
) { 
6424                 kn
->kn_fflags 
|= NOTE_WRITECLOSED
; 
6425                 level_trigger 
|= NOTE_WRITECLOSED
; 
6428         if ((ev_hint 
& SO_FILT_HINT_SUSPEND
) || 
6429             (so
->so_flags 
& SOF_SUSPENDED
)) { 
6430                 kn
->kn_fflags 
&= ~(NOTE_SUSPEND 
| NOTE_RESUME
); 
6432                 /* If resume event was delivered before, reset it */ 
6433                 kn
->kn_hookid 
&= ~NOTE_RESUME
; 
6435                 kn
->kn_fflags 
|= NOTE_SUSPEND
; 
6436                 level_trigger 
|= NOTE_SUSPEND
; 
6439         if ((ev_hint 
& SO_FILT_HINT_RESUME
) || 
6440             (so
->so_flags 
& SOF_SUSPENDED
) == 0) { 
6441                 kn
->kn_fflags 
&= ~(NOTE_SUSPEND 
| NOTE_RESUME
); 
6443                 /* If suspend event was delivered before, reset it */ 
6444                 kn
->kn_hookid 
&= ~NOTE_SUSPEND
; 
6446                 kn
->kn_fflags 
|= NOTE_RESUME
; 
6447                 level_trigger 
|= NOTE_RESUME
; 
6450         if (so
->so_error 
!= 0) { 
6452                 kn
->kn_data 
= so
->so_error
; 
6453                 kn
->kn_flags 
|= EV_EOF
; 
6455                 get_sockev_state(so
, (u_int32_t 
*)&(kn
->kn_data
)); 
6458         /* Reset any events that are not requested on this knote */ 
6459         kn
->kn_fflags 
&= (kn
->kn_sfflags 
& EVFILT_SOCK_ALL_MASK
); 
6460         level_trigger 
&= (kn
->kn_sfflags 
& EVFILT_SOCK_ALL_MASK
); 
6462         /* Find the level triggerred events that are already delivered */ 
6463         level_trigger 
&= kn
->kn_hookid
; 
6464         level_trigger 
&= EVFILT_SOCK_LEVEL_TRIGGER_MASK
; 
6466         /* Do not deliver level triggerred events more than once */ 
6467         if ((kn
->kn_fflags 
& ~level_trigger
) != 0) 
6474 filt_sockattach(struct knote 
*kn
, __unused 
struct kevent_internal_s 
*kev
) 
6476         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->f_fglob
->fg_data
; 
6480         if (KNOTE_ATTACH(&so
->so_klist
, kn
)) 
6481                 so
->so_flags 
|= SOF_KNOTE
; 
6483         /* determine if event already fired */ 
6484         return filt_sockev_common(kn
, so
, 0); 
6488 filt_sockdetach(struct knote 
*kn
) 
6490         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->f_fglob
->fg_data
; 
6493         if ((so
->so_flags 
& SOF_KNOTE
) != 0) 
6494                 if (KNOTE_DETACH(&so
->so_klist
, kn
)) 
6495                         so
->so_flags 
&= ~SOF_KNOTE
; 
6496         socket_unlock(so
, 1); 
6500 filt_sockev(struct knote 
*kn
, long hint
) 
6502         int ret 
= 0, locked 
= 0; 
6503         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->f_fglob
->fg_data
; 
6504         long ev_hint 
= (hint 
& SO_FILT_HINT_EV
); 
6506         if ((hint 
& SO_FILT_HINT_LOCKED
) == 0) { 
6511         ret 
= filt_sockev_common(kn
, so
, ev_hint
); 
6514                 socket_unlock(so
, 1); 
6522  *      filt_socktouch - update event state 
6527         struct kevent_internal_s 
*kev
) 
6529         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->f_fglob
->fg_data
; 
6530         uint32_t changed_flags
; 
6535         /* save off the [result] data and fflags */ 
6536         changed_flags 
= (kn
->kn_sfflags 
^ kn
->kn_hookid
); 
6538         /* save off the new input fflags and data */ 
6539         kn
->kn_sfflags 
= kev
->fflags
; 
6540         kn
->kn_sdata 
= kev
->data
; 
6541         if ((kn
->kn_status 
& KN_UDATA_SPECIFIC
) == 0) 
6542                 kn
->kn_udata 
= kev
->udata
; 
6544         /* restrict the current results to the (smaller?) set of new interest */ 
6546          * For compatibility with previous implementations, we leave kn_fflags 
6547          * as they were before. 
6549         //kn->kn_fflags &= kev->fflags; 
6552          * Since we keep track of events that are already 
6553          * delivered, if any of those events are not requested 
6554          * anymore the state related to them can be reset 
6557             ~(changed_flags 
& EVFILT_SOCK_LEVEL_TRIGGER_MASK
); 
6559         /* determine if we have events to deliver */ 
6560         ret 
= filt_sockev_common(kn
, so
, 0); 
6562         socket_unlock(so
, 1); 
6568  *      filt_sockprocess - query event fired state and return data 
6573         struct filt_process_s 
*data
, 
6574         struct kevent_internal_s 
*kev
) 
6576 #pragma unused(data) 
6578         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->f_fglob
->fg_data
; 
6583         ret 
= filt_sockev_common(kn
, so
, 0); 
6585                 *kev 
= kn
->kn_kevent
; 
6588                  * Store the state of the events being delivered. This 
6589                  * state can be used to deliver level triggered events 
6590                  * ateast once and still avoid waking up the application 
6591                  * multiple times as long as the event is active. 
6593                 if (kn
->kn_fflags 
!= 0) 
6594                         kn
->kn_hookid 
|= (kn
->kn_fflags 
& 
6595                                           EVFILT_SOCK_LEVEL_TRIGGER_MASK
); 
6598                  * NOTE_RESUME and NOTE_SUSPEND are an exception, deliver 
6599                  * only one of them and remember the last one that was 
6602                 if (kn
->kn_fflags 
& NOTE_SUSPEND
) 
6603                         kn
->kn_hookid 
&= ~NOTE_RESUME
; 
6604                 if (kn
->kn_fflags 
& NOTE_RESUME
) 
6605                         kn
->kn_hookid 
&= ~NOTE_SUSPEND
; 
6607                 if (kn
->kn_flags 
& EV_CLEAR
) { 
6613         socket_unlock(so
, 1); 
6619 get_sockev_state(struct socket 
*so
, u_int32_t 
*statep
) 
6621         u_int32_t state 
= *(statep
); 
6624          * If the state variable is already used by a previous event, 
6630         if (so
->so_state 
& SS_ISCONNECTED
) 
6631                 state 
|= SOCKEV_CONNECTED
; 
6633                 state 
&= ~(SOCKEV_CONNECTED
); 
6634         state 
|= ((so
->so_state 
& SS_ISDISCONNECTED
) ? SOCKEV_DISCONNECTED 
: 0); 
6638 #define SO_LOCK_HISTORY_STR_LEN \ 
6639         (2 * SO_LCKDBG_MAX * (2 + (2 * sizeof (void *)) + 1) + 1) 
6641 __private_extern__ 
const char * 
6642 solockhistory_nr(struct socket 
*so
) 
6646         static char lock_history_str
[SO_LOCK_HISTORY_STR_LEN
]; 
6648         bzero(lock_history_str
, sizeof (lock_history_str
)); 
6649         for (i 
= SO_LCKDBG_MAX 
- 1; i 
>= 0; i
--) { 
6650                 n 
+= snprintf(lock_history_str 
+ n
, 
6651                     SO_LOCK_HISTORY_STR_LEN 
- n
, "%p:%p ", 
6652                     so
->lock_lr
[(so
->next_lock_lr 
+ i
) % SO_LCKDBG_MAX
], 
6653                     so
->unlock_lr
[(so
->next_unlock_lr 
+ i
) % SO_LCKDBG_MAX
]); 
6655         return (lock_history_str
); 
6659 socket_lock(struct socket 
*so
, int refcount
) 
6663         lr_saved 
= __builtin_return_address(0); 
6665         if (so
->so_proto
->pr_lock
) { 
6666                 (*so
->so_proto
->pr_lock
)(so
, refcount
, lr_saved
); 
6668 #ifdef MORE_LOCKING_DEBUG 
6669                 LCK_MTX_ASSERT(so
->so_proto
->pr_domain
->dom_mtx
, 
6670                     LCK_MTX_ASSERT_NOTOWNED
); 
6672                 lck_mtx_lock(so
->so_proto
->pr_domain
->dom_mtx
); 
6675                 so
->lock_lr
[so
->next_lock_lr
] = lr_saved
; 
6676                 so
->next_lock_lr 
= (so
->next_lock_lr
+1) % SO_LCKDBG_MAX
; 
6681 socket_lock_assert_owned(struct socket 
*so
) 
6683         lck_mtx_t 
*mutex_held
; 
6685         if (so
->so_proto
->pr_getlock 
!= NULL
) 
6686                 mutex_held 
= (*so
->so_proto
->pr_getlock
)(so
, 0); 
6688                 mutex_held 
= so
->so_proto
->pr_domain
->dom_mtx
; 
6690         LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
); 
6694 socket_try_lock(struct socket 
*so
) 
6698         if (so
->so_proto
->pr_getlock 
!= NULL
) 
6699                 mtx 
= (*so
->so_proto
->pr_getlock
)(so
, 0); 
6701                 mtx 
= so
->so_proto
->pr_domain
->dom_mtx
; 
6703         return (lck_mtx_try_lock(mtx
)); 
6707 socket_unlock(struct socket 
*so
, int refcount
) 
6710         lck_mtx_t 
*mutex_held
; 
6712         lr_saved 
= __builtin_return_address(0); 
6714         if (so
->so_proto 
== NULL
) { 
6715                 panic("%s: null so_proto so=%p\n", __func__
, so
); 
6719         if (so 
&& so
->so_proto
->pr_unlock
) { 
6720                 (*so
->so_proto
->pr_unlock
)(so
, refcount
, lr_saved
); 
6722                 mutex_held 
= so
->so_proto
->pr_domain
->dom_mtx
; 
6723 #ifdef MORE_LOCKING_DEBUG 
6724                 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
); 
6726                 so
->unlock_lr
[so
->next_unlock_lr
] = lr_saved
; 
6727                 so
->next_unlock_lr 
= (so
->next_unlock_lr
+1) % SO_LCKDBG_MAX
; 
6730                         if (so
->so_usecount 
<= 0) { 
6731                                 panic("%s: bad refcount=%d so=%p (%d, %d, %d) " 
6732                                     "lrh=%s", __func__
, so
->so_usecount
, so
, 
6733                                     SOCK_DOM(so
), so
->so_type
, 
6734                                     SOCK_PROTO(so
), solockhistory_nr(so
)); 
6739                         if (so
->so_usecount 
== 0) 
6740                                 sofreelastref(so
, 1); 
6742                 lck_mtx_unlock(mutex_held
); 
6746 /* Called with socket locked, will unlock socket */ 
6748 sofree(struct socket 
*so
) 
6750         lck_mtx_t 
*mutex_held
; 
6752         if (so
->so_proto
->pr_getlock 
!= NULL
) 
6753                 mutex_held 
= (*so
->so_proto
->pr_getlock
)(so
, 0); 
6755                 mutex_held 
= so
->so_proto
->pr_domain
->dom_mtx
; 
6756         LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
); 
6758         sofreelastref(so
, 0); 
6762 soreference(struct socket 
*so
) 
6764         socket_lock(so
, 1);     /* locks & take one reference on socket */ 
6765         socket_unlock(so
, 0);   /* unlock only */ 
6769 sodereference(struct socket 
*so
) 
6772         socket_unlock(so
, 1); 
6776  * Set or clear SOF_MULTIPAGES on the socket to enable or disable the 
6777  * possibility of using jumbo clusters.  Caller must ensure to hold 
6781 somultipages(struct socket 
*so
, boolean_t set
) 
6784                 so
->so_flags 
|= SOF_MULTIPAGES
; 
6786                 so
->so_flags 
&= ~SOF_MULTIPAGES
; 
6790 soif2kcl(struct socket 
*so
, boolean_t set
) 
6793                 so
->so_flags1 
|= SOF1_IF_2KCL
; 
6795                 so
->so_flags1 
&= ~SOF1_IF_2KCL
; 
6799 so_isdstlocal(struct socket 
*so
) { 
6801         struct inpcb 
*inp 
= (struct inpcb 
*)so
->so_pcb
; 
6803         if (SOCK_DOM(so
) == PF_INET
) 
6804                 return (inaddr_local(inp
->inp_faddr
)); 
6805         else if (SOCK_DOM(so
) == PF_INET6
) 
6806                 return (in6addr_local(&inp
->in6p_faddr
)); 
6812 sosetdefunct(struct proc 
*p
, struct socket 
*so
, int level
, boolean_t noforce
) 
6814         struct sockbuf 
*rcv
, *snd
; 
6815         int err 
= 0, defunct
; 
6820         defunct 
= (so
->so_flags 
& SOF_DEFUNCT
); 
6822                 if (!(snd
->sb_flags 
& rcv
->sb_flags 
& SB_DROP
)) { 
6823                         panic("%s: SB_DROP not set", __func__
); 
6829         if (so
->so_flags 
& SOF_NODEFUNCT
) { 
6832                         SODEFUNCTLOG("%s[%d, %s]: (target pid %d " 
6833                             "name %s level %d) so 0x%llx [%d,%d] " 
6834                             "is not eligible for defunct " 
6835                             "(%d)\n", __func__
, proc_selfpid(), 
6836                             proc_best_name(current_proc()), proc_pid(p
), 
6837                             proc_best_name(p
), level
, 
6838                             (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
6839                             SOCK_DOM(so
), SOCK_TYPE(so
), err
); 
6842                 so
->so_flags 
&= ~SOF_NODEFUNCT
; 
6843                 SODEFUNCTLOG("%s[%d, %s]: (target pid %d name %s level %d) " 
6844                     "so 0x%llx [%d,%d] defunct by force\n", __func__
, 
6845                     proc_selfpid(), proc_best_name(current_proc()), 
6846                     proc_pid(p
), proc_best_name(p
), level
, 
6847                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
6848                     SOCK_DOM(so
), SOCK_TYPE(so
)); 
6849         } else if (so
->so_flags1 
& SOF1_EXTEND_BK_IDLE_WANTED
) { 
6850                 struct inpcb 
*inp 
= (struct inpcb 
*)so
->so_pcb
; 
6851                 struct ifnet 
*ifp 
= inp
->inp_last_outifp
; 
6853                 if (ifp 
&& IFNET_IS_CELLULAR(ifp
)) { 
6854                         OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_nocell
); 
6855                 } else if (so
->so_flags 
& SOF_DELEGATED
) { 
6856                         OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_nodlgtd
); 
6857                 } else if (soextbkidlestat
.so_xbkidle_time 
== 0) { 
6858                         OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_notime
); 
6859                 } else if (noforce
) { 
6860                         OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_active
); 
6862                         so
->so_flags1 
|= SOF1_EXTEND_BK_IDLE_INPROG
; 
6863                         so
->so_extended_bk_start 
= net_uptime(); 
6864                         OSBitOrAtomic(P_LXBKIDLEINPROG
, &p
->p_ladvflag
); 
6866                         inpcb_timer_sched(inp
->inp_pcbinfo
, INPCB_TIMER_LAZY
); 
6869                         SODEFUNCTLOG("%s[%d, %s]: (target pid %d name %s " 
6870                             "level %d) extend bk idle so 0x%llx rcv hw %d " 
6872                             __func__
, proc_selfpid(), 
6873                             proc_best_name(current_proc()), proc_pid(p
), 
6874                             proc_best_name(p
), level
, 
6875                             (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
6876                             so
->so_rcv
.sb_hiwat
, so
->so_rcv
.sb_cc
); 
6879                         OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_forced
); 
6883         so
->so_flags 
|= SOF_DEFUNCT
; 
6885         /* Prevent further data from being appended to the socket buffers */ 
6886         snd
->sb_flags 
|= SB_DROP
; 
6887         rcv
->sb_flags 
|= SB_DROP
; 
6889         /* Flush any existing data in the socket buffers */ 
6890         if (rcv
->sb_cc 
!= 0) { 
6891                 rcv
->sb_flags 
&= ~SB_SEL
; 
6892                 selthreadclear(&rcv
->sb_sel
); 
6895         if (snd
->sb_cc 
!= 0) { 
6896                 snd
->sb_flags 
&= ~SB_SEL
; 
6897                 selthreadclear(&snd
->sb_sel
); 
6902         SODEFUNCTLOG("%s[%d, %s]: (target pid %d name %s level %d) " 
6903             "so 0x%llx [%d,%d] %s defunct%s\n", __func__
, proc_selfpid(), 
6904             proc_best_name(current_proc()), proc_pid(p
), proc_best_name(p
), 
6905             level
, (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), SOCK_DOM(so
), 
6906             SOCK_TYPE(so
), defunct 
? "is already" : "marked as", 
6907             (so
->so_flags1 
& SOF1_EXTEND_BK_IDLE_WANTED
) ? " extbkidle" : ""); 
6913 sodefunct(struct proc 
*p
, struct socket 
*so
, int level
) 
6915         struct sockbuf 
*rcv
, *snd
; 
6917         if (!(so
->so_flags 
& SOF_DEFUNCT
)) { 
6918                 panic("%s improperly called", __func__
); 
6921         if (so
->so_state 
& SS_DEFUNCT
) 
6927         if (SOCK_DOM(so
) == PF_INET 
|| SOCK_DOM(so
) == PF_INET6
) { 
6928                 char s
[MAX_IPv6_STR_LEN
]; 
6929                 char d
[MAX_IPv6_STR_LEN
]; 
6930                 struct inpcb 
*inp 
= sotoinpcb(so
); 
6932                 SODEFUNCTLOG("%s[%d, %s]: (target pid %d name %s level %d) " 
6933                     "so 0x%llx [%s %s:%d -> %s:%d] is now defunct " 
6934                     "[rcv_si 0x%x, snd_si 0x%x, rcv_fl 0x%x, snd_fl 0x%x]\n", 
6935                     __func__
, proc_selfpid(), proc_best_name(current_proc()), 
6936                     proc_pid(p
), proc_best_name(p
), level
, 
6937                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
6938                     (SOCK_TYPE(so
) == SOCK_STREAM
) ? "TCP" : "UDP", 
6939                     inet_ntop(SOCK_DOM(so
), ((SOCK_DOM(so
) == PF_INET
) ? 
6940                     (void *)&inp
->inp_laddr
.s_addr 
: (void *)&inp
->in6p_laddr
), 
6941                     s
, sizeof (s
)), ntohs(inp
->in6p_lport
), 
6942                     inet_ntop(SOCK_DOM(so
), (SOCK_DOM(so
) == PF_INET
) ? 
6943                     (void *)&inp
->inp_faddr
.s_addr 
: (void *)&inp
->in6p_faddr
, 
6944                     d
, sizeof (d
)), ntohs(inp
->in6p_fport
), 
6945                     (uint32_t)rcv
->sb_sel
.si_flags
, 
6946                     (uint32_t)snd
->sb_sel
.si_flags
, 
6947                     rcv
->sb_flags
, snd
->sb_flags
); 
6949                 SODEFUNCTLOG("%s[%d, %s]: (target pid %d name %s level %d) " 
6950                     "so 0x%llx [%d,%d] is now defunct [rcv_si 0x%x, " 
6951                     "snd_si 0x%x, rcv_fl 0x%x, snd_fl 0x%x]\n", __func__
, 
6952                     proc_selfpid(), proc_best_name(current_proc()), 
6953                     proc_pid(p
), proc_best_name(p
), level
, 
6954                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
6955                     SOCK_DOM(so
), SOCK_TYPE(so
), 
6956                     (uint32_t)rcv
->sb_sel
.si_flags
, 
6957                     (uint32_t)snd
->sb_sel
.si_flags
, rcv
->sb_flags
, 
6962          * Unwedge threads blocked on sbwait() and sb_lock(). 
6967         so
->so_flags1 
|= SOF1_DEFUNCTINPROG
; 
6968         if (rcv
->sb_flags 
& SB_LOCK
) 
6969                 sbunlock(rcv
, TRUE
);    /* keep socket locked */ 
6970         if (snd
->sb_flags 
& SB_LOCK
) 
6971                 sbunlock(snd
, TRUE
);    /* keep socket locked */ 
6974          * Flush the buffers and disconnect.  We explicitly call shutdown 
6975          * on both data directions to ensure that SS_CANT{RCV,SEND}MORE 
6976          * states are set for the socket.  This would also flush out data 
6977          * hanging off the receive list of this socket. 
6979         (void) soshutdownlock_final(so
, SHUT_RD
); 
6980         (void) soshutdownlock_final(so
, SHUT_WR
); 
6981         (void) sodisconnectlocked(so
); 
6984          * Explicitly handle connectionless-protocol disconnection 
6985          * and release any remaining data in the socket buffers. 
6987         if (!(so
->so_state 
& SS_ISDISCONNECTED
)) 
6988                 (void) soisdisconnected(so
); 
6990         if (so
->so_error 
== 0) 
6991                 so
->so_error 
= EBADF
; 
6993         if (rcv
->sb_cc 
!= 0) { 
6994                 rcv
->sb_flags 
&= ~SB_SEL
; 
6995                 selthreadclear(&rcv
->sb_sel
); 
6998         if (snd
->sb_cc 
!= 0) { 
6999                 snd
->sb_flags 
&= ~SB_SEL
; 
7000                 selthreadclear(&snd
->sb_sel
); 
7003         so
->so_state 
|= SS_DEFUNCT
; 
7004         OSIncrementAtomicLong((volatile long *)&sodefunct_calls
); 
7011 soresume(struct proc 
*p
, struct socket 
*so
, int locked
) 
7016         if (so
->so_flags1 
& SOF1_EXTEND_BK_IDLE_INPROG
) { 
7017                 SODEFUNCTLOG("%s[%d, %s]: (target pid %d name %s) so 0x%llx " 
7018                     "[%d,%d] resumed from bk idle\n", 
7019                     __func__
, proc_selfpid(), proc_best_name(current_proc()), 
7020                     proc_pid(p
), proc_best_name(p
), 
7021                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
7022                     SOCK_DOM(so
), SOCK_TYPE(so
)); 
7024                 so
->so_flags1 
&= ~SOF1_EXTEND_BK_IDLE_INPROG
; 
7025                 so
->so_extended_bk_start 
= 0; 
7026                 OSBitAndAtomic(~P_LXBKIDLEINPROG
, &p
->p_ladvflag
); 
7028                 OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_resumed
); 
7029                 OSDecrementAtomic(&soextbkidlestat
.so_xbkidle_active
); 
7030                 VERIFY(soextbkidlestat
.so_xbkidle_active 
>= 0); 
7033                 socket_unlock(so
, 1); 
7039  * Does not attempt to account for sockets that are delegated from 
7040  * the current process 
7043 so_set_extended_bk_idle(struct socket 
*so
, int optval
) 
7047         if ((SOCK_DOM(so
) != PF_INET 
&& SOCK_DOM(so
) != PF_INET6
) || 
7048             SOCK_PROTO(so
) != IPPROTO_TCP
) { 
7049                 OSDecrementAtomic(&soextbkidlestat
.so_xbkidle_notsupp
); 
7051         } else if (optval 
== 0) { 
7052                 so
->so_flags1 
&= ~SOF1_EXTEND_BK_IDLE_WANTED
; 
7054                 soresume(current_proc(), so
, 1); 
7056                 struct proc 
*p 
= current_proc(); 
7058                 struct filedesc 
*fdp
; 
7062                  * Unlock socket to avoid lock ordering issue with 
7063                  * the proc fd table lock 
7065                 socket_unlock(so
, 0); 
7070                 for (i 
= 0; i 
< fdp
->fd_nfiles
; i
++) { 
7071                         struct fileproc 
*fp 
= fdp
->fd_ofiles
[i
]; 
7075                             (fdp
->fd_ofileflags
[i
] & UF_RESERVED
) != 0 || 
7076                             FILEGLOB_DTYPE(fp
->f_fglob
) != DTYPE_SOCKET
) 
7079                         so2 
= (struct socket 
*)fp
->f_fglob
->fg_data
; 
7081                             so2
->so_flags1 
& SOF1_EXTEND_BK_IDLE_WANTED
) 
7083                         if (count 
>= soextbkidlestat
.so_xbkidle_maxperproc
) 
7090                 if (count 
>= soextbkidlestat
.so_xbkidle_maxperproc
) { 
7091                         OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_toomany
); 
7093                 } else if (so
->so_flags 
& SOF_DELEGATED
) { 
7094                         OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_nodlgtd
); 
7097                         so
->so_flags1 
|= SOF1_EXTEND_BK_IDLE_WANTED
; 
7098                         OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_wantok
); 
7100                 SODEFUNCTLOG("%s[%d, %s]: so 0x%llx [%d,%d] " 
7101                     "%s marked for extended bk idle\n", 
7102                     __func__
, proc_selfpid(), proc_best_name(current_proc()), 
7103                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
7104                     SOCK_DOM(so
), SOCK_TYPE(so
), 
7105                     (so
->so_flags1 
& SOF1_EXTEND_BK_IDLE_WANTED
) ? 
7113 so_stop_extended_bk_idle(struct socket 
*so
) 
7115         so
->so_flags1 
&= ~SOF1_EXTEND_BK_IDLE_INPROG
; 
7116         so
->so_extended_bk_start 
= 0; 
7118         OSDecrementAtomic(&soextbkidlestat
.so_xbkidle_active
); 
7119         VERIFY(soextbkidlestat
.so_xbkidle_active 
>= 0); 
7123         sosetdefunct(current_proc(), so
, 
7124             SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL
, FALSE
); 
7125         if (so
->so_flags 
& SOF_DEFUNCT
) { 
7126                 sodefunct(current_proc(), so
, 
7127                     SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL
); 
7132 so_drain_extended_bk_idle(struct socket 
*so
) 
7134         if (so 
&& (so
->so_flags1 
& SOF1_EXTEND_BK_IDLE_INPROG
)) { 
7136                  * Only penalize sockets that have outstanding data 
7138                 if (so
->so_rcv
.sb_cc 
|| so
->so_snd
.sb_cc
) { 
7139                         so_stop_extended_bk_idle(so
); 
7141                         OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_drained
); 
7147  * Return values tells if socket is still in extended background idle 
7150 so_check_extended_bk_idle_time(struct socket 
*so
) 
7154         if ((so
->so_flags1 
& SOF1_EXTEND_BK_IDLE_INPROG
)) { 
7155                 SODEFUNCTLOG("%s[%d, %s]: so 0x%llx [%d,%d]\n", 
7156                     __func__
, proc_selfpid(), proc_best_name(current_proc()), 
7157                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
7158                     SOCK_DOM(so
), SOCK_TYPE(so
)); 
7159                 if (net_uptime() - so
->so_extended_bk_start 
> 
7160                     soextbkidlestat
.so_xbkidle_time
) { 
7161                         so_stop_extended_bk_idle(so
); 
7163                         OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_expired
); 
7167                         struct inpcb 
*inp 
= (struct inpcb 
*)so
->so_pcb
; 
7169                         inpcb_timer_sched(inp
->inp_pcbinfo
, INPCB_TIMER_LAZY
); 
7170                         OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_resched
); 
7178 resume_proc_sockets(proc_t p
) 
7180         if (p
->p_ladvflag 
& P_LXBKIDLEINPROG
) { 
7181                 struct filedesc 
*fdp
; 
7186                 for (i 
= 0; i 
< fdp
->fd_nfiles
; i
++) { 
7187                         struct fileproc 
*fp
; 
7190                         fp 
= fdp
->fd_ofiles
[i
]; 
7192                             (fdp
->fd_ofileflags
[i
] & UF_RESERVED
) != 0 || 
7193                             FILEGLOB_DTYPE(fp
->f_fglob
) != DTYPE_SOCKET
) 
7196                         so 
= (struct socket 
*)fp
->f_fglob
->fg_data
; 
7197                         (void) soresume(p
, so
, 0); 
7201                 OSBitAndAtomic(~P_LXBKIDLEINPROG
, &p
->p_ladvflag
); 
7205 __private_extern__ 
int 
7206 so_set_recv_anyif(struct socket 
*so
, int optval
) 
7211         if (SOCK_DOM(so
) == PF_INET 
|| SOCK_DOM(so
) == PF_INET6
) { 
7213         if (SOCK_DOM(so
) == PF_INET
) { 
7216                         sotoinpcb(so
)->inp_flags 
|= INP_RECV_ANYIF
; 
7218                         sotoinpcb(so
)->inp_flags 
&= ~INP_RECV_ANYIF
; 
7225 __private_extern__ 
int 
7226 so_get_recv_anyif(struct socket 
*so
) 
7231         if (SOCK_DOM(so
) == PF_INET 
|| SOCK_DOM(so
) == PF_INET6
) { 
7233         if (SOCK_DOM(so
) == PF_INET
) { 
7235                 ret 
= (sotoinpcb(so
)->inp_flags 
& INP_RECV_ANYIF
) ? 1 : 0; 
7242 so_set_restrictions(struct socket 
*so
, uint32_t vals
) 
7244         int nocell_old
, nocell_new
; 
7245         int noexpensive_old
, noexpensive_new
; 
7248          * Deny-type restrictions are trapdoors; once set they cannot be 
7249          * unset for the lifetime of the socket.  This allows them to be 
7250          * issued by a framework on behalf of the application without 
7251          * having to worry that they can be undone. 
7253          * Note here that socket-level restrictions overrides any protocol 
7254          * level restrictions.  For instance, SO_RESTRICT_DENY_CELLULAR 
7255          * socket restriction issued on the socket has a higher precendence 
7256          * than INP_NO_IFT_CELLULAR.  The latter is affected by the UUID 
7257          * policy PROC_UUID_NO_CELLULAR for unrestricted sockets only, 
7258          * i.e. when SO_RESTRICT_DENY_CELLULAR has not been issued. 
7260         nocell_old 
= (so
->so_restrictions 
& SO_RESTRICT_DENY_CELLULAR
); 
7261         noexpensive_old 
= (so
->so_restrictions 
& SO_RESTRICT_DENY_EXPENSIVE
); 
7262         so
->so_restrictions 
|= (vals 
& (SO_RESTRICT_DENY_IN 
| 
7263             SO_RESTRICT_DENY_OUT 
| SO_RESTRICT_DENY_CELLULAR 
| 
7264             SO_RESTRICT_DENY_EXPENSIVE
)); 
7265         nocell_new 
= (so
->so_restrictions 
& SO_RESTRICT_DENY_CELLULAR
); 
7266         noexpensive_new 
= (so
->so_restrictions 
& SO_RESTRICT_DENY_EXPENSIVE
); 
7268         /* we can only set, not clear restrictions */ 
7269         if ((nocell_new 
- nocell_old
) == 0 && 
7270             (noexpensive_new 
- noexpensive_old
) == 0) 
7273         if (SOCK_DOM(so
) == PF_INET 
|| SOCK_DOM(so
) == PF_INET6
) { 
7275         if (SOCK_DOM(so
) == PF_INET
) { 
7277                 if (nocell_new 
- nocell_old 
!= 0) { 
7279                          * if deny cellular is now set, do what's needed 
7282                         inp_set_nocellular(sotoinpcb(so
)); 
7284                 if (noexpensive_new 
- noexpensive_old 
!= 0) { 
7285                         inp_set_noexpensive(sotoinpcb(so
)); 
7289         if (SOCK_DOM(so
) == PF_MULTIPATH
) 
7290                 mptcp_set_restrictions(so
); 
7296 so_get_restrictions(struct socket 
*so
) 
7298         return (so
->so_restrictions 
& (SO_RESTRICT_DENY_IN 
| 
7299             SO_RESTRICT_DENY_OUT 
| 
7300             SO_RESTRICT_DENY_CELLULAR 
| SO_RESTRICT_DENY_EXPENSIVE
)); 
7304 so_set_effective_pid(struct socket 
*so
, int epid
, struct proc 
*p
) 
7306         struct proc 
*ep 
= PROC_NULL
; 
7309         /* pid 0 is reserved for kernel */ 
7316          * If this is an in-kernel socket, prevent its delegate 
7317          * association from changing unless the socket option is 
7318          * coming from within the kernel itself. 
7320         if (so
->last_pid 
== 0 && p 
!= kernproc
) { 
7326          * If this is issued by a process that's recorded as the 
7327          * real owner of the socket, or if the pid is the same as 
7328          * the process's own pid, then proceed.  Otherwise ensure 
7329          * that the issuing process has the necessary privileges. 
7331         if (epid 
!= so
->last_pid 
|| epid 
!= proc_pid(p
)) { 
7332                 if ((error 
= priv_check_cred(kauth_cred_get(), 
7333                     PRIV_NET_PRIVILEGED_SOCKET_DELEGATE
, 0))) { 
7339         /* Find the process that corresponds to the effective pid */ 
7340         if ((ep 
= proc_find(epid
)) == PROC_NULL
) { 
7346          * If a process tries to delegate the socket to itself, then 
7347          * there's really nothing to do; treat it as a way for the 
7348          * delegate association to be cleared.  Note that we check 
7349          * the passed-in proc rather than calling proc_selfpid(), 
7350          * as we need to check the process issuing the socket option 
7351          * which could be kernproc.  Given that we don't allow 0 for 
7352          * effective pid, it means that a delegated in-kernel socket 
7353          * stays delegated during its lifetime (which is probably OK.) 
7355         if (epid 
== proc_pid(p
)) { 
7356                 so
->so_flags 
&= ~SOF_DELEGATED
; 
7359                 uuid_clear(so
->e_uuid
); 
7361                 so
->so_flags 
|= SOF_DELEGATED
; 
7362                 so
->e_upid 
= proc_uniqueid(ep
); 
7363                 so
->e_pid 
= proc_pid(ep
); 
7364                 proc_getexecutableuuid(ep
, so
->e_uuid
, sizeof (so
->e_uuid
)); 
7367         if (error 
== 0 && net_io_policy_log
) { 
7370                 uuid_unparse(so
->e_uuid
, buf
); 
7371                 log(LOG_DEBUG
, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d (%s) " 
7372                     "euuid %s%s\n", __func__
, proc_name_address(p
), 
7373                     proc_pid(p
), (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
7374                     SOCK_DOM(so
), SOCK_TYPE(so
), 
7375                     so
->e_pid
, proc_name_address(ep
), buf
, 
7376                     ((so
->so_flags 
& SOF_DELEGATED
) ? " [delegated]" : "")); 
7377         } else if (error 
!= 0 && net_io_policy_log
) { 
7378                 log(LOG_ERR
, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d (%s) " 
7379                     "ERROR (%d)\n", __func__
, proc_name_address(p
), 
7380                     proc_pid(p
), (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
7381                     SOCK_DOM(so
), SOCK_TYPE(so
), 
7382                     epid
, (ep 
== PROC_NULL
) ? "PROC_NULL" : 
7383                     proc_name_address(ep
), error
); 
7386         /* Update this socket's policy upon success */ 
7388                 so
->so_policy_gencnt 
*= -1; 
7389                 so_update_policy(so
); 
7391                 so_update_necp_policy(so
, NULL
, NULL
); 
7395         if (ep 
!= PROC_NULL
) 
7402 so_set_effective_uuid(struct socket 
*so
, uuid_t euuid
, struct proc 
*p
) 
7408         /* UUID must not be all-zeroes (reserved for kernel) */ 
7409         if (uuid_is_null(euuid
)) { 
7415          * If this is an in-kernel socket, prevent its delegate 
7416          * association from changing unless the socket option is 
7417          * coming from within the kernel itself. 
7419         if (so
->last_pid 
== 0 && p 
!= kernproc
) { 
7424         /* Get the UUID of the issuing process */ 
7425         proc_getexecutableuuid(p
, uuid
, sizeof (uuid
)); 
7428          * If this is issued by a process that's recorded as the 
7429          * real owner of the socket, or if the uuid is the same as 
7430          * the process's own uuid, then proceed.  Otherwise ensure 
7431          * that the issuing process has the necessary privileges. 
7433         if (uuid_compare(euuid
, so
->last_uuid
) != 0 || 
7434             uuid_compare(euuid
, uuid
) != 0) { 
7435                 if ((error 
= priv_check_cred(kauth_cred_get(), 
7436                     PRIV_NET_PRIVILEGED_SOCKET_DELEGATE
, 0))) { 
7443          * If a process tries to delegate the socket to itself, then 
7444          * there's really nothing to do; treat it as a way for the 
7445          * delegate association to be cleared.  Note that we check 
7446          * the uuid of the passed-in proc rather than that of the 
7447          * current process, as we need to check the process issuing 
7448          * the socket option which could be kernproc itself.  Given 
7449          * that we don't allow 0 for effective uuid, it means that 
7450          * a delegated in-kernel socket stays delegated during its 
7451          * lifetime (which is okay.) 
7453         if (uuid_compare(euuid
, uuid
) == 0) { 
7454                 so
->so_flags 
&= ~SOF_DELEGATED
; 
7457                 uuid_clear(so
->e_uuid
); 
7459                 so
->so_flags 
|= SOF_DELEGATED
; 
7461                  * Unlike so_set_effective_pid(), we only have the UUID 
7462                  * here and the process ID is not known.  Inherit the 
7463                  * real {pid,upid} of the socket. 
7465                 so
->e_upid 
= so
->last_upid
; 
7466                 so
->e_pid 
= so
->last_pid
; 
7467                 uuid_copy(so
->e_uuid
, euuid
); 
7471         if (error 
== 0 && net_io_policy_log
) { 
7472                 uuid_unparse(so
->e_uuid
, buf
); 
7473                 log(LOG_DEBUG
, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d " 
7474                     "euuid %s%s\n", __func__
, proc_name_address(p
), proc_pid(p
), 
7475                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), SOCK_DOM(so
), 
7476                     SOCK_TYPE(so
), so
->e_pid
, buf
, 
7477                     ((so
->so_flags 
& SOF_DELEGATED
) ? " [delegated]" : "")); 
7478         } else if (error 
!= 0 && net_io_policy_log
) { 
7479                 uuid_unparse(euuid
, buf
); 
7480                 log(LOG_DEBUG
, "%s[%s,%d]: so 0x%llx [%d,%d] euuid %s " 
7481                     "ERROR (%d)\n", __func__
, proc_name_address(p
), proc_pid(p
), 
7482                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), SOCK_DOM(so
), 
7483                     SOCK_TYPE(so
), buf
, error
); 
7486         /* Update this socket's policy upon success */ 
7488                 so
->so_policy_gencnt 
*= -1; 
7489                 so_update_policy(so
); 
7491                 so_update_necp_policy(so
, NULL
, NULL
); 
7499 netpolicy_post_msg(uint32_t ev_code
, struct netpolicy_event_data 
*ev_data
, 
7500     uint32_t ev_datalen
) 
7502         struct kev_msg ev_msg
; 
7505          * A netpolicy event always starts with a netpolicy_event_data 
7506          * structure, but the caller can provide for a longer event 
7507          * structure to post, depending on the event code. 
7509         VERIFY(ev_data 
!= NULL 
&& ev_datalen 
>= sizeof (*ev_data
)); 
7511         bzero(&ev_msg
, sizeof (ev_msg
)); 
7512         ev_msg
.vendor_code      
= KEV_VENDOR_APPLE
; 
7513         ev_msg
.kev_class        
= KEV_NETWORK_CLASS
; 
7514         ev_msg
.kev_subclass     
= KEV_NETPOLICY_SUBCLASS
; 
7515         ev_msg
.event_code       
= ev_code
; 
7517         ev_msg
.dv
[0].data_ptr   
= ev_data
; 
7518         ev_msg
.dv
[0].data_length 
= ev_datalen
; 
7520         kev_post_msg(&ev_msg
); 
7524 socket_post_kev_msg(uint32_t ev_code
, 
7525     struct kev_socket_event_data 
*ev_data
, 
7526     uint32_t ev_datalen
) 
7528         struct kev_msg ev_msg
; 
7530         bzero(&ev_msg
, sizeof(ev_msg
)); 
7531         ev_msg
.vendor_code 
= KEV_VENDOR_APPLE
; 
7532         ev_msg
.kev_class 
= KEV_NETWORK_CLASS
; 
7533         ev_msg
.kev_subclass 
= KEV_SOCKET_SUBCLASS
; 
7534         ev_msg
.event_code 
= ev_code
; 
7536         ev_msg
.dv
[0].data_ptr 
= ev_data
; 
7537         ev_msg
.dv
[0]. data_length 
= ev_datalen
; 
7539         kev_post_msg(&ev_msg
); 
7543 socket_post_kev_msg_closed(struct socket 
*so
) 
7545         struct kev_socket_closed ev
; 
7546         struct sockaddr 
*socksa 
= NULL
, *peersa 
= NULL
; 
7548         bzero(&ev
, sizeof(ev
)); 
7549         err 
= (*so
->so_proto
->pr_usrreqs
->pru_sockaddr
)(so
, &socksa
); 
7551                 err 
= (*so
->so_proto
->pr_usrreqs
->pru_peeraddr
)(so
, 
7554                         memcpy(&ev
.ev_data
.kev_sockname
, socksa
, 
7556                             sizeof (ev
.ev_data
.kev_sockname
))); 
7557                         memcpy(&ev
.ev_data
.kev_peername
, peersa
, 
7559                             sizeof (ev
.ev_data
.kev_peername
))); 
7560                         socket_post_kev_msg(KEV_SOCKET_CLOSED
, 
7561                             &ev
.ev_data
, sizeof (ev
)); 
7565                 FREE(socksa
, M_SONAME
); 
7567                 FREE(peersa
, M_SONAME
);