2  * Copyright (c) 1998-2020 Apple Inc. All rights reserved. 
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 
   6  * This file contains Original Code and/or Modifications of Original Code 
   7  * as defined in and that are subject to the Apple Public Source License 
   8  * Version 2.0 (the 'License'). You may not use this file except in 
   9  * compliance with the License. The rights granted to you under the License 
  10  * may not be used to create, or enable the creation or redistribution of, 
  11  * unlawful or unlicensed copies of an Apple operating system, or to 
  12  * circumvent, violate, or enable the circumvention or violation of, any 
  13  * terms of an Apple operating system software license agreement. 
  15  * Please obtain a copy of the License at 
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file. 
  18  * The Original Code and all software distributed under the License are 
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
  23  * Please see the License for the specific language governing rights and 
  24  * limitations under the License. 
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 
  28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ 
  30  * Copyright (c) 1982, 1986, 1988, 1990, 1993 
  31  *      The Regents of the University of California.  All rights reserved. 
  33  * Redistribution and use in source and binary forms, with or without 
  34  * modification, are permitted provided that the following conditions 
  36  * 1. Redistributions of source code must retain the above copyright 
  37  *    notice, this list of conditions and the following disclaimer. 
  38  * 2. Redistributions in binary form must reproduce the above copyright 
  39  *    notice, this list of conditions and the following disclaimer in the 
  40  *    documentation and/or other materials provided with the distribution. 
  41  * 3. All advertising materials mentioning features or use of this software 
  42  *    must display the following acknowledgement: 
  43  *      This product includes software developed by the University of 
  44  *      California, Berkeley and its contributors. 
  45  * 4. Neither the name of the University nor the names of its contributors 
  46  *    may be used to endorse or promote products derived from this software 
  47  *    without specific prior written permission. 
  49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 
  50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 
  53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
  54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 
  55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
  56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
  57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
  58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
  61  *      @(#)uipc_socket.c       8.3 (Berkeley) 4/15/94 
  64  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce 
  65  * support for mandatory and extensible security protections.  This notice 
  66  * is included in support of clause 2.2 (b) of the Apple Public License, 
  70 #include <sys/param.h> 
  71 #include <sys/systm.h> 
  72 #include <sys/filedesc.h> 
  74 #include <sys/proc_internal.h> 
  75 #include <sys/kauth.h> 
  76 #include <sys/file_internal.h> 
  77 #include <sys/fcntl.h> 
  78 #include <sys/malloc.h> 
  80 #include <sys/domain.h> 
  81 #include <sys/kernel.h> 
  82 #include <sys/event.h> 
  84 #include <sys/protosw.h> 
  85 #include <sys/socket.h> 
  86 #include <sys/socketvar.h> 
  87 #include <sys/resourcevar.h> 
  88 #include <sys/signalvar.h> 
  89 #include <sys/sysctl.h> 
  90 #include <sys/syslog.h> 
  92 #include <sys/uio_internal.h> 
  94 #include <sys/kdebug.h> 
  98 #include <sys/kern_event.h> 
  99 #include <net/route.h> 
 100 #include <net/init.h> 
 101 #include <net/net_api_stats.h> 
 102 #include <net/ntstat.h> 
 103 #include <net/content_filter.h> 
 104 #include <netinet/in.h> 
 105 #include <netinet/in_pcb.h> 
 106 #include <netinet/in_tclass.h> 
 107 #include <netinet/in_var.h> 
 108 #include <netinet/tcp_var.h> 
 109 #include <netinet/ip6.h> 
 110 #include <netinet6/ip6_var.h> 
 111 #include <netinet/flow_divert.h> 
 112 #include <kern/zalloc.h> 
 113 #include <kern/locks.h> 
 114 #include <machine/limits.h> 
 115 #include <libkern/OSAtomic.h> 
 116 #include <pexpert/pexpert.h> 
 117 #include <kern/assert.h> 
 118 #include <kern/task.h> 
 119 #include <kern/policy_internal.h> 
 121 #include <sys/kpi_mbuf.h> 
 122 #include <sys/mcache.h> 
 123 #include <sys/unpcb.h> 
 124 #include <libkern/section_keywords.h> 
 127 #include <security/mac_framework.h> 
 131 #include <netinet/mp_pcb.h> 
 132 #include <netinet/mptcp_var.h> 
 133 #endif /* MULTIPATH */ 
 135 #define ROUNDUP(a, b) (((a) + ((b) - 1)) & (~((b) - 1))) 
 137 #if DEBUG || DEVELOPMENT 
 138 #define DEBUG_KERNEL_ADDRPERM(_v) (_v) 
 140 #define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v) 
 143 /* TODO: this should be in a header file somewhere */ 
 144 extern char *proc_name_address(void *p
); 
 146 static u_int32_t        so_cache_hw
;    /* High water mark for socache */ 
 147 static u_int32_t        so_cache_timeouts
;      /* number of timeouts */ 
 148 static u_int32_t        so_cache_max_freed
;     /* max freed per timeout */ 
 149 static u_int32_t        cached_sock_count 
= 0; 
 150 STAILQ_HEAD(, socket
)   so_cache_head
; 
 151 int     max_cached_sock_count 
= MAX_CACHED_SOCKETS
; 
 152 static u_int32_t        so_cache_time
; 
 153 static int              socketinit_done
; 
 154 static struct zone      
*so_cache_zone
; 
 156 static lck_grp_t        
*so_cache_mtx_grp
; 
 157 static lck_attr_t       
*so_cache_mtx_attr
; 
 158 static lck_grp_attr_t   
*so_cache_mtx_grp_attr
; 
 159 static lck_mtx_t        
*so_cache_mtx
; 
 161 #include <machine/limits.h> 
 163 static int      filt_sorattach(struct knote 
*kn
, struct kevent_qos_s 
*kev
); 
 164 static void     filt_sordetach(struct knote 
*kn
); 
 165 static int      filt_soread(struct knote 
*kn
, long hint
); 
 166 static int      filt_sortouch(struct knote 
*kn
, struct kevent_qos_s 
*kev
); 
 167 static int      filt_sorprocess(struct knote 
*kn
, struct kevent_qos_s 
*kev
); 
 169 static int      filt_sowattach(struct knote 
*kn
, struct kevent_qos_s 
*kev
); 
 170 static void     filt_sowdetach(struct knote 
*kn
); 
 171 static int      filt_sowrite(struct knote 
*kn
, long hint
); 
 172 static int      filt_sowtouch(struct knote 
*kn
, struct kevent_qos_s 
*kev
); 
 173 static int      filt_sowprocess(struct knote 
*kn
, struct kevent_qos_s 
*kev
); 
 175 static int      filt_sockattach(struct knote 
*kn
, struct kevent_qos_s 
*kev
); 
 176 static void     filt_sockdetach(struct knote 
*kn
); 
 177 static int      filt_sockev(struct knote 
*kn
, long hint
); 
 178 static int      filt_socktouch(struct knote 
*kn
, struct kevent_qos_s 
*kev
); 
 179 static int      filt_sockprocess(struct knote 
*kn
, struct kevent_qos_s 
*kev
); 
 181 static int sooptcopyin_timeval(struct sockopt 
*, struct timeval 
*); 
 182 static int sooptcopyout_timeval(struct sockopt 
*, const struct timeval 
*); 
 184 SECURITY_READ_ONLY_EARLY(struct filterops
) soread_filtops 
= { 
 186         .f_attach 
= filt_sorattach
, 
 187         .f_detach 
= filt_sordetach
, 
 188         .f_event 
= filt_soread
, 
 189         .f_touch 
= filt_sortouch
, 
 190         .f_process 
= filt_sorprocess
, 
 193 SECURITY_READ_ONLY_EARLY(struct filterops
) sowrite_filtops 
= { 
 195         .f_attach 
= filt_sowattach
, 
 196         .f_detach 
= filt_sowdetach
, 
 197         .f_event 
= filt_sowrite
, 
 198         .f_touch 
= filt_sowtouch
, 
 199         .f_process 
= filt_sowprocess
, 
 202 SECURITY_READ_ONLY_EARLY(struct filterops
) sock_filtops 
= { 
 204         .f_attach 
= filt_sockattach
, 
 205         .f_detach 
= filt_sockdetach
, 
 206         .f_event 
= filt_sockev
, 
 207         .f_touch 
= filt_socktouch
, 
 208         .f_process 
= filt_sockprocess
, 
 211 SECURITY_READ_ONLY_EARLY(struct filterops
) soexcept_filtops 
= { 
 213         .f_attach 
= filt_sorattach
, 
 214         .f_detach 
= filt_sordetach
, 
 215         .f_event 
= filt_soread
, 
 216         .f_touch 
= filt_sortouch
, 
 217         .f_process 
= filt_sorprocess
, 
 220 SYSCTL_DECL(_kern_ipc
); 
 222 #define EVEN_MORE_LOCKING_DEBUG 0 
 224 int socket_debug 
= 0; 
 225 SYSCTL_INT(_kern_ipc
, OID_AUTO
, socket_debug
, 
 226     CTLFLAG_RW 
| CTLFLAG_LOCKED
, &socket_debug
, 0, ""); 
 228 static unsigned long sodefunct_calls 
= 0; 
 229 SYSCTL_LONG(_kern_ipc
, OID_AUTO
, sodefunct_calls
, CTLFLAG_LOCKED
, 
 230     &sodefunct_calls
, ""); 
 232 ZONE_DECLARE(socket_zone
, "socket", sizeof(struct socket
), ZC_ZFREE_CLEARMEM
); 
 233 so_gen_t        so_gencnt
;      /* generation count for sockets */ 
 235 MALLOC_DEFINE(M_SONAME
, "soname", "socket name"); 
 236 MALLOC_DEFINE(M_PCB
, "pcb", "protocol control block"); 
 238 #define DBG_LAYER_IN_BEG        NETDBG_CODE(DBG_NETSOCK, 0) 
 239 #define DBG_LAYER_IN_END        NETDBG_CODE(DBG_NETSOCK, 2) 
 240 #define DBG_LAYER_OUT_BEG       NETDBG_CODE(DBG_NETSOCK, 1) 
 241 #define DBG_LAYER_OUT_END       NETDBG_CODE(DBG_NETSOCK, 3) 
 242 #define DBG_FNC_SOSEND          NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1) 
 243 #define DBG_FNC_SOSEND_LIST     NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 3) 
 244 #define DBG_FNC_SORECEIVE       NETDBG_CODE(DBG_NETSOCK, (8 << 8)) 
 245 #define DBG_FNC_SORECEIVE_LIST  NETDBG_CODE(DBG_NETSOCK, (8 << 8) | 3) 
 246 #define DBG_FNC_SOSHUTDOWN      NETDBG_CODE(DBG_NETSOCK, (9 << 8)) 
 248 #define MAX_SOOPTGETM_SIZE      (128 * MCLBYTES) 
 250 int somaxconn 
= SOMAXCONN
; 
 251 SYSCTL_INT(_kern_ipc
, KIPC_SOMAXCONN
, somaxconn
, 
 252     CTLFLAG_RW 
| CTLFLAG_LOCKED
, &somaxconn
, 0, ""); 
 254 /* Should we get a maximum also ??? */ 
 255 static int sosendmaxchain 
= 65536; 
 256 static int sosendminchain 
= 16384; 
 257 static int sorecvmincopy  
= 16384; 
 258 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sosendminchain
, 
 259     CTLFLAG_RW 
| CTLFLAG_LOCKED
, &sosendminchain
, 0, ""); 
 260 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sorecvmincopy
, 
 261     CTLFLAG_RW 
| CTLFLAG_LOCKED
, &sorecvmincopy
, 0, ""); 
 264  * Set to enable jumbo clusters (if available) for large writes when 
 265  * the socket is marked with SOF_MULTIPAGES; see below. 
 268 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sosendjcl
, 
 269     CTLFLAG_RW 
| CTLFLAG_LOCKED
, &sosendjcl
, 0, ""); 
 272  * Set this to ignore SOF_MULTIPAGES and use jumbo clusters for large 
 273  * writes on the socket for all protocols on any network interfaces, 
 274  * depending upon sosendjcl above.  Be extra careful when setting this 
 275  * to 1, because sending down packets that cross physical pages down to 
 276  * broken drivers (those that falsely assume that the physical pages 
 277  * are contiguous) might lead to system panics or silent data corruption. 
 278  * When set to 0, the system will respect SOF_MULTIPAGES, which is set 
 279  * only for TCP sockets whose outgoing interface is IFNET_MULTIPAGES 
 280  * capable.  Set this to 1 only for testing/debugging purposes. 
 282 int sosendjcl_ignore_capab 
= 0; 
 283 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sosendjcl_ignore_capab
, 
 284     CTLFLAG_RW 
| CTLFLAG_LOCKED
, &sosendjcl_ignore_capab
, 0, ""); 
 287  * Set this to ignore SOF1_IF_2KCL and use big clusters for large 
 288  * writes on the socket for all protocols on any network interfaces. 
 289  * Be extra careful when setting this to 1, because sending down packets with 
 290  * clusters larger that 2 KB might lead to system panics or data corruption. 
 291  * When set to 0, the system will respect SOF1_IF_2KCL, which is set 
 292  * on the outgoing interface 
 293  * Set this to 1  for testing/debugging purposes only. 
 295 int sosendbigcl_ignore_capab 
= 0; 
 296 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sosendbigcl_ignore_capab
, 
 297     CTLFLAG_RW 
| CTLFLAG_LOCKED
, &sosendbigcl_ignore_capab
, 0, ""); 
 299 int sodefunctlog 
= 0; 
 300 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sodefunctlog
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 301     &sodefunctlog
, 0, ""); 
 303 int sothrottlelog 
= 0; 
 304 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sothrottlelog
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 305     &sothrottlelog
, 0, ""); 
 307 int sorestrictrecv 
= 1; 
 308 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sorestrictrecv
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 309     &sorestrictrecv
, 0, "Enable inbound interface restrictions"); 
 311 int sorestrictsend 
= 1; 
 312 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sorestrictsend
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 313     &sorestrictsend
, 0, "Enable outbound interface restrictions"); 
 315 int soreserveheadroom 
= 1; 
 316 SYSCTL_INT(_kern_ipc
, OID_AUTO
, soreserveheadroom
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 317     &soreserveheadroom
, 0, "To allocate contiguous datagram buffers"); 
 319 #if (DEBUG || DEVELOPMENT) 
 320 int so_notsent_lowat_check 
= 1; 
 321 SYSCTL_INT(_kern_ipc
, OID_AUTO
, notsent_lowat
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 322     &so_notsent_lowat_check
, 0, "enable/disable notsnet lowat check"); 
 323 #endif /* DEBUG || DEVELOPMENT */ 
 325 int so_accept_list_waits 
= 0; 
 326 #if (DEBUG || DEVELOPMENT) 
 327 SYSCTL_INT(_kern_ipc
, OID_AUTO
, accept_list_waits
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 328     &so_accept_list_waits
, 0, "number of waits for listener incomp list"); 
 329 #endif /* DEBUG || DEVELOPMENT */ 
 331 extern struct inpcbinfo tcbinfo
; 
 333 /* TODO: these should be in header file */ 
 334 extern int get_inpcb_str_size(void); 
 335 extern int get_tcp_str_size(void); 
 337 vm_size_t       so_cache_zone_element_size
; 
 339 static int sodelayed_copy(struct socket 
*, struct uio 
*, struct mbuf 
**, 
 341 static void cached_sock_alloc(struct socket 
**, zalloc_flags_t
); 
 342 static void cached_sock_free(struct socket 
*); 
 345  * Maximum of extended background idle sockets per process 
 346  * Set to zero to disable further setting of the option 
 349 #define SO_IDLE_BK_IDLE_MAX_PER_PROC    1 
 350 #define SO_IDLE_BK_IDLE_TIME            600 
 351 #define SO_IDLE_BK_IDLE_RCV_HIWAT       131072 
 353 struct soextbkidlestat soextbkidlestat
; 
 355 SYSCTL_UINT(_kern_ipc
, OID_AUTO
, maxextbkidleperproc
, 
 356     CTLFLAG_RW 
| CTLFLAG_LOCKED
, &soextbkidlestat
.so_xbkidle_maxperproc
, 0, 
 357     "Maximum of extended background idle sockets per process"); 
 359 SYSCTL_UINT(_kern_ipc
, OID_AUTO
, extbkidletime
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 360     &soextbkidlestat
.so_xbkidle_time
, 0, 
 361     "Time in seconds to keep extended background idle sockets"); 
 363 SYSCTL_UINT(_kern_ipc
, OID_AUTO
, extbkidlercvhiwat
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 364     &soextbkidlestat
.so_xbkidle_rcvhiwat
, 0, 
 365     "High water mark for extended background idle sockets"); 
 367 SYSCTL_STRUCT(_kern_ipc
, OID_AUTO
, extbkidlestat
, CTLFLAG_RD 
| CTLFLAG_LOCKED
, 
 368     &soextbkidlestat
, soextbkidlestat
, ""); 
 370 int so_set_extended_bk_idle(struct socket 
*, int); 
 374  * SOTCDB_NO_DSCP is set by default, to prevent the networking stack from 
 375  * setting the DSCP code on the packet based on the service class; see 
 376  * <rdar://problem/11277343> for details. 
 378 __private_extern__ u_int32_t sotcdb 
= 0; 
 379 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sotcdb
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 385         _CASSERT(sizeof(so_gencnt
) == sizeof(uint64_t)); 
 386         VERIFY(IS_P2ALIGNED(&so_gencnt
, sizeof(uint32_t))); 
 389         _CASSERT(sizeof(struct sa_endpoints
) == sizeof(struct user64_sa_endpoints
)); 
 390         _CASSERT(offsetof(struct sa_endpoints
, sae_srcif
) == offsetof(struct user64_sa_endpoints
, sae_srcif
)); 
 391         _CASSERT(offsetof(struct sa_endpoints
, sae_srcaddr
) == offsetof(struct user64_sa_endpoints
, sae_srcaddr
)); 
 392         _CASSERT(offsetof(struct sa_endpoints
, sae_srcaddrlen
) == offsetof(struct user64_sa_endpoints
, sae_srcaddrlen
)); 
 393         _CASSERT(offsetof(struct sa_endpoints
, sae_dstaddr
) == offsetof(struct user64_sa_endpoints
, sae_dstaddr
)); 
 394         _CASSERT(offsetof(struct sa_endpoints
, sae_dstaddrlen
) == offsetof(struct user64_sa_endpoints
, sae_dstaddrlen
)); 
 396         _CASSERT(sizeof(struct sa_endpoints
) == sizeof(struct user32_sa_endpoints
)); 
 397         _CASSERT(offsetof(struct sa_endpoints
, sae_srcif
) == offsetof(struct user32_sa_endpoints
, sae_srcif
)); 
 398         _CASSERT(offsetof(struct sa_endpoints
, sae_srcaddr
) == offsetof(struct user32_sa_endpoints
, sae_srcaddr
)); 
 399         _CASSERT(offsetof(struct sa_endpoints
, sae_srcaddrlen
) == offsetof(struct user32_sa_endpoints
, sae_srcaddrlen
)); 
 400         _CASSERT(offsetof(struct sa_endpoints
, sae_dstaddr
) == offsetof(struct user32_sa_endpoints
, sae_dstaddr
)); 
 401         _CASSERT(offsetof(struct sa_endpoints
, sae_dstaddrlen
) == offsetof(struct user32_sa_endpoints
, sae_dstaddrlen
)); 
 404         if (socketinit_done
) { 
 405                 printf("socketinit: already called...\n"); 
 410         PE_parse_boot_argn("socket_debug", &socket_debug
, 
 411             sizeof(socket_debug
)); 
 414          * allocate lock group attribute and group for socket cache mutex 
 416         so_cache_mtx_grp_attr 
= lck_grp_attr_alloc_init(); 
 417         so_cache_mtx_grp 
= lck_grp_alloc_init("so_cache", 
 418             so_cache_mtx_grp_attr
); 
 421          * allocate the lock attribute for socket cache mutex 
 423         so_cache_mtx_attr 
= lck_attr_alloc_init(); 
 425         /* cached sockets mutex */ 
 426         so_cache_mtx 
= lck_mtx_alloc_init(so_cache_mtx_grp
, so_cache_mtx_attr
); 
 427         if (so_cache_mtx 
== NULL
) { 
 428                 panic("%s: unable to allocate so_cache_mtx\n", __func__
); 
 431         STAILQ_INIT(&so_cache_head
); 
 433         so_cache_zone_element_size 
= (vm_size_t
)(sizeof(struct socket
) + 4 
 434             + get_inpcb_str_size() + 4 + get_tcp_str_size()); 
 436         so_cache_zone 
= zone_create("socache zone", so_cache_zone_element_size
, 
 437             ZC_ZFREE_CLEARMEM 
| ZC_NOENCRYPT
); 
 439         bzero(&soextbkidlestat
, sizeof(struct soextbkidlestat
)); 
 440         soextbkidlestat
.so_xbkidle_maxperproc 
= SO_IDLE_BK_IDLE_MAX_PER_PROC
; 
 441         soextbkidlestat
.so_xbkidle_time 
= SO_IDLE_BK_IDLE_TIME
; 
 442         soextbkidlestat
.so_xbkidle_rcvhiwat 
= SO_IDLE_BK_IDLE_RCV_HIWAT
; 
 446         socket_tclass_init(); 
 449 #endif /* MULTIPATH */ 
 453 cached_sock_alloc(struct socket 
**so
, zalloc_flags_t how
) 
 458         lck_mtx_lock(so_cache_mtx
); 
 460         if (!STAILQ_EMPTY(&so_cache_head
)) { 
 461                 VERIFY(cached_sock_count 
> 0); 
 463                 *so 
= STAILQ_FIRST(&so_cache_head
); 
 464                 STAILQ_REMOVE_HEAD(&so_cache_head
, so_cache_ent
); 
 465                 STAILQ_NEXT((*so
), so_cache_ent
) = NULL
; 
 468                 lck_mtx_unlock(so_cache_mtx
); 
 470                 temp 
= (*so
)->so_saved_pcb
; 
 471                 bzero((caddr_t
)*so
, sizeof(struct socket
)); 
 473                 (*so
)->so_saved_pcb 
= temp
; 
 475                 lck_mtx_unlock(so_cache_mtx
); 
 477                 *so 
= zalloc_flags(so_cache_zone
, how 
| Z_ZERO
); 
 480                  * Define offsets for extra structures into our 
 481                  * single block of memory. Align extra structures 
 482                  * on longword boundaries. 
 485                 offset 
= (uintptr_t)*so
; 
 486                 offset 
+= sizeof(struct socket
); 
 488                 offset 
= ALIGN(offset
); 
 490                 (*so
)->so_saved_pcb 
= (caddr_t
)offset
; 
 491                 offset 
+= get_inpcb_str_size(); 
 493                 offset 
= ALIGN(offset
); 
 495                 ((struct inpcb 
*)(void *)(*so
)->so_saved_pcb
)->inp_saved_ppcb 
= 
 499         OSBitOrAtomic(SOF1_CACHED_IN_SOCK_LAYER
, &(*so
)->so_flags1
); 
 503 cached_sock_free(struct socket 
*so
) 
 505         lck_mtx_lock(so_cache_mtx
); 
 507         so_cache_time 
= net_uptime(); 
 508         if (++cached_sock_count 
> max_cached_sock_count
) { 
 510                 lck_mtx_unlock(so_cache_mtx
); 
 511                 zfree(so_cache_zone
, so
); 
 513                 if (so_cache_hw 
< cached_sock_count
) { 
 514                         so_cache_hw 
= cached_sock_count
; 
 517                 STAILQ_INSERT_TAIL(&so_cache_head
, so
, so_cache_ent
); 
 519                 so
->cache_timestamp 
= so_cache_time
; 
 520                 lck_mtx_unlock(so_cache_mtx
); 
 525 so_update_last_owner_locked(struct socket 
*so
, proc_t self
) 
 527         if (so
->last_pid 
!= 0) { 
 529                  * last_pid and last_upid should remain zero for sockets 
 530                  * created using sock_socket. The check above achieves that 
 532                 if (self 
== PROC_NULL
) { 
 533                         self 
= current_proc(); 
 536                 if (so
->last_upid 
!= proc_uniqueid(self
) || 
 537                     so
->last_pid 
!= proc_pid(self
)) { 
 538                         so
->last_upid 
= proc_uniqueid(self
); 
 539                         so
->last_pid 
= proc_pid(self
); 
 540                         proc_getexecutableuuid(self
, so
->last_uuid
, 
 541                             sizeof(so
->last_uuid
)); 
 542                         if (so
->so_proto 
!= NULL 
&& so
->so_proto
->pr_update_last_owner 
!= NULL
) { 
 543                                 (*so
->so_proto
->pr_update_last_owner
)(so
, self
, NULL
); 
 546                 proc_pidoriginatoruuid(so
->so_vuuid
, sizeof(so
->so_vuuid
)); 
 551 so_update_policy(struct socket 
*so
) 
 553         if (SOCK_DOM(so
) == PF_INET 
|| SOCK_DOM(so
) == PF_INET6
) { 
 554                 (void) inp_update_policy(sotoinpcb(so
)); 
 560 so_update_necp_policy(struct socket 
*so
, struct sockaddr 
*override_local_addr
, 
 561     struct sockaddr 
*override_remote_addr
) 
 563         if (SOCK_DOM(so
) == PF_INET 
|| SOCK_DOM(so
) == PF_INET6
) { 
 564                 inp_update_necp_policy(sotoinpcb(so
), override_local_addr
, 
 565                     override_remote_addr
, 0); 
 575         boolean_t rc 
= FALSE
; 
 577         lck_mtx_lock(so_cache_mtx
); 
 579         so_cache_time 
= net_uptime(); 
 581         while (!STAILQ_EMPTY(&so_cache_head
)) { 
 582                 VERIFY(cached_sock_count 
> 0); 
 583                 p 
= STAILQ_FIRST(&so_cache_head
); 
 584                 if ((so_cache_time 
- p
->cache_timestamp
) < 
 585                     SO_CACHE_TIME_LIMIT
) { 
 589                 STAILQ_REMOVE_HEAD(&so_cache_head
, so_cache_ent
); 
 592                 zfree(so_cache_zone
, p
); 
 594                 if (++n_freed 
>= SO_CACHE_MAX_FREE_BATCH
) { 
 595                         so_cache_max_freed
++; 
 600         /* Schedule again if there is more to cleanup */ 
 601         if (!STAILQ_EMPTY(&so_cache_head
)) { 
 605         lck_mtx_unlock(so_cache_mtx
); 
 610  * Get a socket structure from our zone, and initialize it. 
 611  * We don't implement `waitok' yet (see comments in uipc_domain.c). 
 612  * Note that it would probably be better to allocate socket 
 613  * and PCB at the same time, but I'm not convinced that all 
 614  * the protocols can be easily modified to do this. 
 617 soalloc(int waitok
, int dom
, int type
) 
 619         zalloc_flags_t how 
= waitok 
? Z_WAITOK 
: Z_NOWAIT
; 
 622         if ((dom 
== PF_INET
) && (type 
== SOCK_STREAM
)) { 
 623                 cached_sock_alloc(&so
, how
); 
 625                 so 
= zalloc_flags(socket_zone
, how 
| Z_ZERO
); 
 628                 so
->so_gencnt 
= OSIncrementAtomic64((SInt64 
*)&so_gencnt
); 
 631                  * Increment the socket allocation statistics 
 633                 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_alloc_total
); 
 640 socreate_internal(int dom
, struct socket 
**aso
, int type
, int proto
, 
 641     struct proc 
*p
, uint32_t flags
, struct proc 
*ep
) 
 646 #if defined(XNU_TARGET_OS_OSX) 
 651         extern int tcpconsdebug
; 
 658                 prp 
= pffindproto(dom
, proto
, type
); 
 660                 prp 
= pffindtype(dom
, type
); 
 663         if (prp 
== NULL 
|| prp
->pr_usrreqs
->pru_attach 
== NULL
) { 
 664                 if (pffinddomain(dom
) == NULL
) { 
 668                         if (pffindprotonotype(dom
, proto
) != NULL
) { 
 672                 return EPROTONOSUPPORT
; 
 674         if (prp
->pr_type 
!= type
) { 
 677         so 
= soalloc(1, dom
, type
); 
 684                 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_domain_local_total
); 
 687                 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_domain_inet_total
); 
 688                 if (type 
== SOCK_STREAM
) { 
 689                         INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_inet_stream_total
); 
 691                         INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_inet_dgram_total
); 
 695                 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_domain_route_total
); 
 698                 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_domain_ndrv_total
); 
 701                 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_domain_key_total
); 
 704                 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_domain_inet6_total
); 
 705                 if (type 
== SOCK_STREAM
) { 
 706                         INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_inet6_stream_total
); 
 708                         INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_inet6_dgram_total
); 
 712                 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_domain_system_total
); 
 715                 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_domain_multipath_total
); 
 718                 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_domain_other_total
); 
 722         if (flags 
& SOCF_MPTCP
) { 
 723                 so
->so_state 
|= SS_NBIO
; 
 726         TAILQ_INIT(&so
->so_incomp
); 
 727         TAILQ_INIT(&so
->so_comp
); 
 729         so
->last_upid 
= proc_uniqueid(p
); 
 730         so
->last_pid 
= proc_pid(p
); 
 731         proc_getexecutableuuid(p
, so
->last_uuid
, sizeof(so
->last_uuid
)); 
 732         proc_pidoriginatoruuid(so
->so_vuuid
, sizeof(so
->so_vuuid
)); 
 734         if (ep 
!= PROC_NULL 
&& ep 
!= p
) { 
 735                 so
->e_upid 
= proc_uniqueid(ep
); 
 736                 so
->e_pid 
= proc_pid(ep
); 
 737                 proc_getexecutableuuid(ep
, so
->e_uuid
, sizeof(so
->e_uuid
)); 
 738                 so
->so_flags 
|= SOF_DELEGATED
; 
 739 #if defined(XNU_TARGET_OS_OSX) 
 740                 if (ep
->p_responsible_pid 
!= so
->e_pid
) { 
 741                         rpid 
= ep
->p_responsible_pid
; 
 746 #if defined(XNU_TARGET_OS_OSX) 
 747         if (rpid 
< 0 && p
->p_responsible_pid 
!= so
->last_pid
) { 
 748                 rpid 
= p
->p_responsible_pid
; 
 752         uuid_clear(so
->so_ruuid
); 
 754                 proc_t rp 
= proc_find(rpid
); 
 755                 if (rp 
!= PROC_NULL
) { 
 756                         proc_getexecutableuuid(rp
, so
->so_ruuid
, sizeof(so
->so_ruuid
)); 
 763         so
->so_cred 
= kauth_cred_proc_ref(p
); 
 764         if (!suser(kauth_cred_get(), NULL
)) { 
 765                 so
->so_state 
|= SS_PRIV
; 
 769         so
->so_rcv
.sb_flags 
|= SB_RECV
; 
 770         so
->so_rcv
.sb_so 
= so
->so_snd
.sb_so 
= so
; 
 771         so
->next_lock_lr 
= 0; 
 772         so
->next_unlock_lr 
= 0; 
 775          * Attachment will create the per pcb lock if necessary and 
 776          * increase refcount for creation, make sure it's done before 
 777          * socket is inserted in lists. 
 781         error 
= (*prp
->pr_usrreqs
->pru_attach
)(so
, proto
, p
); 
 785                  * If so_pcb is not zero, the socket will be leaked, 
 786                  * so protocol attachment handler must be coded carefuly 
 788                 so
->so_state 
|= SS_NOFDREF
; 
 789                 VERIFY(so
->so_usecount 
> 0); 
 791                 sofreelastref(so
, 1);   /* will deallocate the socket */ 
 796          * Note: needs so_pcb to be set after pru_attach 
 798         if (prp
->pr_update_last_owner 
!= NULL
) { 
 799                 (*prp
->pr_update_last_owner
)(so
, p
, ep
); 
 802         atomic_add_32(&prp
->pr_domain
->dom_refs
, 1); 
 804         /* Attach socket filters for this protocol */ 
 807         if (tcpconsdebug 
== 2) { 
 808                 so
->so_options 
|= SO_DEBUG
; 
 811         so_set_default_traffic_class(so
); 
 814          * If this thread or task is marked to create backgrounded sockets, 
 815          * mark the socket as background. 
 817         if (!(flags 
& SOCF_MPTCP
) && 
 818             proc_get_effective_thread_policy(current_thread(), TASK_POLICY_NEW_SOCKETS_BG
)) { 
 819                 socket_set_traffic_mgt_flags(so
, TRAFFIC_MGT_SO_BACKGROUND
); 
 820                 so
->so_background_thread 
= current_thread(); 
 825          * Don't mark Unix domain or system 
 826          * eligible for defunct by default. 
 830                 so
->so_flags 
|= SOF_NODEFUNCT
; 
 837          * Entitlements can't be checked at socket creation time except if the 
 838          * application requested a feature guarded by a privilege (c.f., socket 
 840          * The priv(9) and the Sandboxing APIs are designed with the idea that 
 841          * a privilege check should only be triggered by a userland request. 
 842          * A privilege check at socket creation time is time consuming and 
 843          * could trigger many authorisation error messages from the security 
 858  *      <pru_attach>:ENOBUFS[AF_UNIX] 
 859  *      <pru_attach>:ENOBUFS[TCP] 
 860  *      <pru_attach>:ENOMEM[TCP] 
 861  *      <pru_attach>:???                [other protocol families, IPSEC] 
 864 socreate(int dom
, struct socket 
**aso
, int type
, int proto
) 
 866         return socreate_internal(dom
, aso
, type
, proto
, current_proc(), 0, 
 871 socreate_delegate(int dom
, struct socket 
**aso
, int type
, int proto
, pid_t epid
) 
 874         struct proc 
*ep 
= PROC_NULL
; 
 876         if ((proc_selfpid() != epid
) && ((ep 
= proc_find(epid
)) == PROC_NULL
)) { 
 881         error 
= socreate_internal(dom
, aso
, type
, proto
, current_proc(), 0, ep
); 
 884          * It might not be wise to hold the proc reference when calling 
 885          * socreate_internal since it calls soalloc with M_WAITOK 
 888         if (ep 
!= PROC_NULL
) { 
 897  *      <pru_bind>:EINVAL               Invalid argument [COMMON_START] 
 898  *      <pru_bind>:EAFNOSUPPORT         Address family not supported 
 899  *      <pru_bind>:EADDRNOTAVAIL        Address not available. 
 900  *      <pru_bind>:EINVAL               Invalid argument 
 901  *      <pru_bind>:EAFNOSUPPORT         Address family not supported [notdef] 
 902  *      <pru_bind>:EACCES               Permission denied 
 903  *      <pru_bind>:EADDRINUSE           Address in use 
 904  *      <pru_bind>:EAGAIN               Resource unavailable, try again 
 905  *      <pru_bind>:EPERM                Operation not permitted 
 909  * Notes:       It's not possible to fully enumerate the return codes above, 
 910  *              since socket filter authors and protocol family authors may 
 911  *              not choose to limit their error returns to those listed, even 
 912  *              though this may result in some software operating incorrectly. 
 914  *              The error codes which are enumerated above are those known to 
 915  *              be returned by the tcp_usr_bind function supplied. 
 918 sobindlock(struct socket 
*so
, struct sockaddr 
*nam
, int dolock
) 
 920         struct proc 
*p 
= current_proc(); 
 927         so_update_last_owner_locked(so
, p
); 
 928         so_update_policy(so
); 
 931         so_update_necp_policy(so
, nam
, NULL
); 
 935          * If this is a bind request on a socket that has been marked 
 936          * as inactive, reject it now before we go any further. 
 938         if (so
->so_flags 
& SOF_DEFUNCT
) { 
 940                 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] (%d)\n", 
 941                     __func__
, proc_pid(p
), proc_best_name(p
), 
 942                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
 943                     SOCK_DOM(so
), SOCK_TYPE(so
), error
); 
 948         error 
= sflt_bind(so
, nam
); 
 951                 error 
= (*so
->so_proto
->pr_usrreqs
->pru_bind
)(so
, nam
, p
); 
 955                 socket_unlock(so
, 1); 
 958         if (error 
== EJUSTRETURN
) { 
 966 sodealloc(struct socket 
*so
) 
 968         kauth_cred_unref(&so
->so_cred
); 
 970         /* Remove any filters */ 
 974         cfil_sock_detach(so
); 
 975 #endif /* CONTENT_FILTER */ 
 977         so
->so_gencnt 
= OSIncrementAtomic64((SInt64 
*)&so_gencnt
); 
 979         if (so
->so_flags1 
& SOF1_CACHED_IN_SOCK_LAYER
) { 
 980                 cached_sock_free(so
); 
 982                 zfree(socket_zone
, so
); 
 990  *      <pru_listen>:EINVAL[AF_UNIX] 
 991  *      <pru_listen>:EINVAL[TCP] 
 992  *      <pru_listen>:EADDRNOTAVAIL[TCP] Address not available. 
 993  *      <pru_listen>:EINVAL[TCP]        Invalid argument 
 994  *      <pru_listen>:EAFNOSUPPORT[TCP]  Address family not supported [notdef] 
 995  *      <pru_listen>:EACCES[TCP]        Permission denied 
 996  *      <pru_listen>:EADDRINUSE[TCP]    Address in use 
 997  *      <pru_listen>:EAGAIN[TCP]        Resource unavailable, try again 
 998  *      <pru_listen>:EPERM[TCP]         Operation not permitted 
1001  * Notes:       Other <pru_listen> returns depend on the protocol family; all 
1002  *              <sf_listen> returns depend on what the filter author causes 
1003  *              their filter to return. 
1006 solisten(struct socket 
*so
, int backlog
) 
1008         struct proc 
*p 
= current_proc(); 
1013         so_update_last_owner_locked(so
, p
); 
1014         so_update_policy(so
); 
1017         so_update_necp_policy(so
, NULL
, NULL
); 
1020         if (so
->so_proto 
== NULL
) { 
1024         if ((so
->so_proto
->pr_flags 
& PR_CONNREQUIRED
) == 0) { 
1030          * If the listen request is made on a socket that is not fully 
1031          * disconnected, or on a socket that has been marked as inactive, 
1032          * reject the request now. 
1035             (SS_ISCONNECTED 
| SS_ISCONNECTING 
| SS_ISDISCONNECTING
)) || 
1036             (so
->so_flags 
& SOF_DEFUNCT
)) { 
1038                 if (so
->so_flags 
& SOF_DEFUNCT
) { 
1039                         SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] " 
1040                             "(%d)\n", __func__
, proc_pid(p
), 
1042                             (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
1043                             SOCK_DOM(so
), SOCK_TYPE(so
), error
); 
1048         if ((so
->so_restrictions 
& SO_RESTRICT_DENY_IN
) != 0) { 
1053         error 
= sflt_listen(so
); 
1055                 error 
= (*so
->so_proto
->pr_usrreqs
->pru_listen
)(so
, p
); 
1059                 if (error 
== EJUSTRETURN
) { 
1065         if (TAILQ_EMPTY(&so
->so_comp
)) { 
1066                 so
->so_options 
|= SO_ACCEPTCONN
; 
1069          * POSIX: The implementation may have an upper limit on the length of 
1070          * the listen queue-either global or per accepting socket. If backlog 
1071          * exceeds this limit, the length of the listen queue is set to the 
1074          * If listen() is called with a backlog argument value that is less 
1075          * than 0, the function behaves as if it had been called with a backlog 
1076          * argument value of 0. 
1078          * A backlog argument of 0 may allow the socket to accept connections, 
1079          * in which case the length of the listen queue may be set to an 
1080          * implementation-defined minimum value. 
1082         if (backlog 
<= 0 || backlog 
> somaxconn
) { 
1083                 backlog 
= somaxconn
; 
1086         so
->so_qlimit 
= backlog
; 
1088         socket_unlock(so
, 1); 
1093  * The "accept list lock" protects the fields related to the listener queues 
1094  * because we can unlock a socket to respect the lock ordering between 
1095  * the listener socket and its clients sockets. The lock ordering is first to 
1096  * acquire the client socket before the listener socket. 
1098  * The accept list lock serializes access to the following fields: 
1099  * - of the listener socket: 
1104  * - of client sockets that are in so_comp or so_incomp: 
1108  * As one can see the accept list lock protects the consistent of the 
1109  * linkage of the client sockets. 
1111  * Note that those fields may be read without holding the accept list lock 
1112  * for a preflight provided the accept list lock is taken when committing 
1113  * to take an action based on the result of the preflight. The preflight 
1114  * saves the cost of doing the unlock/lock dance. 
1117 so_acquire_accept_list(struct socket 
*head
, struct socket 
*so
) 
1119         lck_mtx_t 
*mutex_held
; 
1121         if (head
->so_proto
->pr_getlock 
== NULL
) { 
1124         mutex_held 
= (*head
->so_proto
->pr_getlock
)(head
, PR_F_WILLUNLOCK
); 
1125         LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
); 
1127         if (!(head
->so_flags1 
& SOF1_ACCEPT_LIST_HELD
)) { 
1128                 head
->so_flags1 
|= SOF1_ACCEPT_LIST_HELD
; 
1132                 socket_unlock(so
, 0); 
1134         while (head
->so_flags1 
& SOF1_ACCEPT_LIST_HELD
) { 
1135                 so_accept_list_waits 
+= 1; 
1136                 msleep((caddr_t
)&head
->so_incomp
, mutex_held
, 
1137                     PSOCK 
| PCATCH
, __func__
, NULL
); 
1139         head
->so_flags1 
|= SOF1_ACCEPT_LIST_HELD
; 
1141                 socket_unlock(head
, 0); 
1143                 socket_lock(head
, 0); 
1148 so_release_accept_list(struct socket 
*head
) 
1150         if (head
->so_proto
->pr_getlock 
!= NULL
) { 
1151                 lck_mtx_t 
*mutex_held
; 
1153                 mutex_held 
= (*head
->so_proto
->pr_getlock
)(head
, 0); 
1154                 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
); 
1156                 head
->so_flags1 
&= ~SOF1_ACCEPT_LIST_HELD
; 
1157                 wakeup((caddr_t
)&head
->so_incomp
); 
1162 sofreelastref(struct socket 
*so
, int dealloc
) 
1164         struct socket 
*head 
= so
->so_head
; 
1166         /* Assume socket is locked */ 
1168         if (!(so
->so_flags 
& SOF_PCBCLEARING
) || !(so
->so_state 
& SS_NOFDREF
)) { 
1169                 selthreadclear(&so
->so_snd
.sb_sel
); 
1170                 selthreadclear(&so
->so_rcv
.sb_sel
); 
1171                 so
->so_rcv
.sb_flags 
&= ~(SB_SEL 
| SB_UPCALL
); 
1172                 so
->so_snd
.sb_flags 
&= ~(SB_SEL 
| SB_UPCALL
); 
1173                 so
->so_event 
= sonullevent
; 
1178                  * Need to lock the listener when the protocol has 
1181                 if (head
->so_proto
->pr_getlock 
!= NULL
) { 
1182                         socket_lock(head
, 1); 
1183                         so_acquire_accept_list(head
, so
); 
1185                 if (so
->so_state 
& SS_INCOMP
) { 
1186                         so
->so_state 
&= ~SS_INCOMP
; 
1187                         TAILQ_REMOVE(&head
->so_incomp
, so
, so_list
); 
1192                         if (head
->so_proto
->pr_getlock 
!= NULL
) { 
1193                                 so_release_accept_list(head
); 
1194                                 socket_unlock(head
, 1); 
1196                 } else if (so
->so_state 
& SS_COMP
) { 
1197                         if (head
->so_proto
->pr_getlock 
!= NULL
) { 
1198                                 so_release_accept_list(head
); 
1199                                 socket_unlock(head
, 1); 
1202                          * We must not decommission a socket that's 
1203                          * on the accept(2) queue.  If we do, then 
1204                          * accept(2) may hang after select(2) indicated 
1205                          * that the listening socket was ready. 
1207                         selthreadclear(&so
->so_snd
.sb_sel
); 
1208                         selthreadclear(&so
->so_rcv
.sb_sel
); 
1209                         so
->so_rcv
.sb_flags 
&= ~(SB_SEL 
| SB_UPCALL
); 
1210                         so
->so_snd
.sb_flags 
&= ~(SB_SEL 
| SB_UPCALL
); 
1211                         so
->so_event 
= sonullevent
; 
1214                         if (head
->so_proto
->pr_getlock 
!= NULL
) { 
1215                                 so_release_accept_list(head
); 
1216                                 socket_unlock(head
, 1); 
1218                         printf("sofree: not queued\n"); 
1225         if (so
->so_flags 
& SOF_FLOW_DIVERT
) { 
1226                 flow_divert_detach(so
); 
1228 #endif  /* FLOW_DIVERT */ 
1230         /* 3932268: disable upcall */ 
1231         so
->so_rcv
.sb_flags 
&= ~SB_UPCALL
; 
1232         so
->so_snd
.sb_flags 
&= ~(SB_UPCALL 
| SB_SNDBYTE_CNT
); 
1233         so
->so_event 
= sonullevent
; 
1241 soclose_wait_locked(struct socket 
*so
) 
1243         lck_mtx_t 
*mutex_held
; 
1245         if (so
->so_proto
->pr_getlock 
!= NULL
) { 
1246                 mutex_held 
= (*so
->so_proto
->pr_getlock
)(so
, PR_F_WILLUNLOCK
); 
1248                 mutex_held 
= so
->so_proto
->pr_domain
->dom_mtx
; 
1250         LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
); 
1253          * Double check here and return if there's no outstanding upcall; 
1254          * otherwise proceed further only if SOF_UPCALLCLOSEWAIT is set. 
1256         if (!so
->so_upcallusecount 
|| !(so
->so_flags 
& SOF_UPCALLCLOSEWAIT
)) { 
1259         so
->so_rcv
.sb_flags 
&= ~SB_UPCALL
; 
1260         so
->so_snd
.sb_flags 
&= ~SB_UPCALL
; 
1261         so
->so_flags 
|= SOF_CLOSEWAIT
; 
1263         (void) msleep((caddr_t
)&so
->so_upcallusecount
, mutex_held
, (PZERO 
- 1), 
1264             "soclose_wait_locked", NULL
); 
1265         LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
); 
1266         so
->so_flags 
&= ~SOF_CLOSEWAIT
; 
1270  * Close a socket on last file table reference removal. 
1271  * Initiate disconnect if connected. 
1272  * Free socket when disconnect complete. 
1275 soclose_locked(struct socket 
*so
) 
1280         if (so
->so_usecount 
== 0) { 
1281                 panic("soclose: so=%p refcount=0\n", so
); 
1285         sflt_notify(so
, sock_evt_closing
, NULL
); 
1287         if (so
->so_upcallusecount
) { 
1288                 soclose_wait_locked(so
); 
1293          * We have to wait until the content filters are done 
1295         if ((so
->so_flags 
& SOF_CONTENT_FILTER
) != 0) { 
1296                 cfil_sock_close_wait(so
); 
1297                 cfil_sock_is_closed(so
); 
1298                 cfil_sock_detach(so
); 
1300 #endif /* CONTENT_FILTER */ 
1302         if (so
->so_flags1 
& SOF1_EXTEND_BK_IDLE_INPROG
) { 
1303                 soresume(current_proc(), so
, 1); 
1304                 so
->so_flags1 
&= ~SOF1_EXTEND_BK_IDLE_WANTED
; 
1307         if ((so
->so_options 
& SO_ACCEPTCONN
)) { 
1308                 struct socket 
*sp
, *sonext
; 
1309                 int persocklock 
= 0; 
1310                 int incomp_overflow_only
; 
1313                  * We do not want new connection to be added 
1314                  * to the connection queues 
1316                 so
->so_options 
&= ~SO_ACCEPTCONN
; 
1319                  * We can drop the lock on the listener once 
1320                  * we've acquired the incoming list 
1322                 if (so
->so_proto
->pr_getlock 
!= NULL
) { 
1324                         so_acquire_accept_list(so
, NULL
); 
1325                         socket_unlock(so
, 0); 
1328                 incomp_overflow_only 
= 1; 
1330                 TAILQ_FOREACH_SAFE(sp
, &so
->so_incomp
, so_list
, sonext
) { 
1333                          * skip sockets thrown away by tcpdropdropblreq 
1334                          * they will get cleanup by the garbage collection. 
1335                          * otherwise, remove the incomp socket from the queue 
1336                          * and let soabort trigger the appropriate cleanup. 
1338                         if (sp
->so_flags 
& SOF_OVERFLOW
) { 
1342                         if (persocklock 
!= 0) { 
1348                          * The extra reference for the list insure the 
1349                          * validity of the socket pointer when we perform the 
1350                          * unlock of the head above 
1352                         if (sp
->so_state 
& SS_INCOMP
) { 
1353                                 sp
->so_state 
&= ~SS_INCOMP
; 
1355                                 TAILQ_REMOVE(&so
->so_incomp
, sp
, so_list
); 
1361                                 panic("%s sp %p in so_incomp but !SS_INCOMP", 
1365                         if (persocklock 
!= 0) { 
1366                                 socket_unlock(sp
, 1); 
1370                 TAILQ_FOREACH_SAFE(sp
, &so
->so_comp
, so_list
, sonext
) { 
1371                         /* Dequeue from so_comp since sofree() won't do it */ 
1372                         if (persocklock 
!= 0) { 
1376                         if (sp
->so_state 
& SS_COMP
) { 
1377                                 sp
->so_state 
&= ~SS_COMP
; 
1379                                 TAILQ_REMOVE(&so
->so_comp
, sp
, so_list
); 
1384                                 panic("%s sp %p in so_comp but !SS_COMP", 
1389                                 socket_unlock(sp
, 1); 
1393                 if (incomp_overflow_only 
== 0 && !TAILQ_EMPTY(&so
->so_incomp
)) { 
1394 #if (DEBUG | DEVELOPMENT) 
1395                         panic("%s head %p so_comp not empty\n", __func__
, so
); 
1396 #endif /* (DEVELOPMENT || DEBUG) */ 
1401                 if (!TAILQ_EMPTY(&so
->so_comp
)) { 
1402 #if (DEBUG | DEVELOPMENT) 
1403                         panic("%s head %p so_comp not empty\n", __func__
, so
); 
1404 #endif /* (DEVELOPMENT || DEBUG) */ 
1411                         so_release_accept_list(so
); 
1414         if (so
->so_pcb 
== NULL
) { 
1415                 /* 3915887: mark the socket as ready for dealloc */ 
1416                 so
->so_flags 
|= SOF_PCBCLEARING
; 
1419         if (so
->so_state 
& SS_ISCONNECTED
) { 
1420                 if ((so
->so_state 
& SS_ISDISCONNECTING
) == 0) { 
1421                         error 
= sodisconnectlocked(so
); 
1426                 if (so
->so_options 
& SO_LINGER
) { 
1427                         lck_mtx_t 
*mutex_held
; 
1429                         if ((so
->so_state 
& SS_ISDISCONNECTING
) && 
1430                             (so
->so_state 
& SS_NBIO
)) { 
1433                         if (so
->so_proto
->pr_getlock 
!= NULL
) { 
1434                                 mutex_held 
= (*so
->so_proto
->pr_getlock
)(so
, PR_F_WILLUNLOCK
); 
1436                                 mutex_held 
= so
->so_proto
->pr_domain
->dom_mtx
; 
1438                         while (so
->so_state 
& SS_ISCONNECTED
) { 
1439                                 ts
.tv_sec 
= (so
->so_linger 
/ 100); 
1440                                 ts
.tv_nsec 
= (so
->so_linger 
% 100) * 
1441                                     NSEC_PER_USEC 
* 1000 * 10; 
1442                                 error 
= msleep((caddr_t
)&so
->so_timeo
, 
1443                                     mutex_held
, PSOCK 
| PCATCH
, "soclose", &ts
); 
1446                                          * It's OK when the time fires, 
1447                                          * don't report an error 
1449                                         if (error 
== EWOULDBLOCK
) { 
1458         if (so
->so_usecount 
== 0) { 
1459                 panic("soclose: usecount is zero so=%p\n", so
); 
1462         if (so
->so_pcb 
!= NULL 
&& !(so
->so_flags 
& SOF_PCBCLEARING
)) { 
1463                 int error2 
= (*so
->so_proto
->pr_usrreqs
->pru_detach
)(so
); 
1468         if (so
->so_usecount 
<= 0) { 
1469                 panic("soclose: usecount is zero so=%p\n", so
); 
1473         if (so
->so_pcb 
!= NULL 
&& !(so
->so_flags 
& SOF_MP_SUBFLOW
) && 
1474             (so
->so_state 
& SS_NOFDREF
)) { 
1475                 panic("soclose: NOFDREF"); 
1478         so
->so_state 
|= SS_NOFDREF
; 
1480         if ((so
->so_flags 
& SOF_KNOTE
) != 0) { 
1481                 KNOTE(&so
->so_klist
, SO_FILT_HINT_LOCKED
); 
1484         atomic_add_32(&so
->so_proto
->pr_domain
->dom_refs
, -1); 
1486         VERIFY(so
->so_usecount 
> 0); 
1493 soclose(struct socket 
*so
) 
1498         if (so
->so_retaincnt 
== 0) { 
1499                 error 
= soclose_locked(so
); 
1502                  * if the FD is going away, but socket is 
1503                  * retained in kernel remove its reference 
1506                 if (so
->so_usecount 
< 2) { 
1507                         panic("soclose: retaincnt non null and so=%p " 
1508                             "usecount=%d\n", so
, so
->so_usecount
); 
1511         socket_unlock(so
, 1); 
1516  * Must be called at splnet... 
1518 /* Should already be locked */ 
1520 soabort(struct socket 
*so
) 
1524 #ifdef MORE_LOCKING_DEBUG 
1525         lck_mtx_t 
*mutex_held
; 
1527         if (so
->so_proto
->pr_getlock 
!= NULL
) { 
1528                 mutex_held 
= (*so
->so_proto
->pr_getlock
)(so
, 0); 
1530                 mutex_held 
= so
->so_proto
->pr_domain
->dom_mtx
; 
1532         LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
); 
1535         if ((so
->so_flags 
& SOF_ABORTED
) == 0) { 
1536                 so
->so_flags 
|= SOF_ABORTED
; 
1537                 error 
= (*so
->so_proto
->pr_usrreqs
->pru_abort
)(so
); 
1547 soacceptlock(struct socket 
*so
, struct sockaddr 
**nam
, int dolock
) 
1555         so_update_last_owner_locked(so
, PROC_NULL
); 
1556         so_update_policy(so
); 
1558         so_update_necp_policy(so
, NULL
, NULL
); 
1561         if ((so
->so_state 
& SS_NOFDREF
) == 0) { 
1562                 panic("soaccept: !NOFDREF"); 
1564         so
->so_state 
&= ~SS_NOFDREF
; 
1565         error 
= (*so
->so_proto
->pr_usrreqs
->pru_accept
)(so
, nam
); 
1568                 socket_unlock(so
, 1); 
1574 soaccept(struct socket 
*so
, struct sockaddr 
**nam
) 
1576         return soacceptlock(so
, nam
, 1); 
1580 soacceptfilter(struct socket 
*so
, struct socket 
*head
) 
1582         struct sockaddr 
*local 
= NULL
, *remote 
= NULL
; 
1586          * Hold the lock even if this socket has not been made visible 
1587          * to the filter(s).  For sockets with global locks, this protects 
1588          * against the head or peer going away 
1591         if (sogetaddr_locked(so
, &remote
, 1) != 0 || 
1592             sogetaddr_locked(so
, &local
, 0) != 0) { 
1593                 so
->so_state 
&= ~SS_NOFDREF
; 
1594                 socket_unlock(so
, 1); 
1596                 /* Out of resources; try it again next time */ 
1597                 error 
= ECONNABORTED
; 
1601         error 
= sflt_accept(head
, so
, local
, remote
); 
1604          * If we get EJUSTRETURN from one of the filters, mark this socket 
1605          * as inactive and return it anyway.  This newly accepted socket 
1606          * will be disconnected later before we hand it off to the caller. 
1608         if (error 
== EJUSTRETURN
) { 
1610                 (void) sosetdefunct(current_proc(), so
, 
1611                     SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL
, FALSE
); 
1616                  * This may seem like a duplication to the above error 
1617                  * handling part when we return ECONNABORTED, except 
1618                  * the following is done while holding the lock since 
1619                  * the socket has been exposed to the filter(s) earlier. 
1621                 so
->so_state 
&= ~SS_NOFDREF
; 
1622                 socket_unlock(so
, 1); 
1624                 /* Propagate socket filter's error code to the caller */ 
1626                 socket_unlock(so
, 1); 
1629         /* Callee checks for NULL pointer */ 
1630         sock_freeaddr(remote
); 
1631         sock_freeaddr(local
); 
1636  * Returns:     0                       Success 
1637  *              EOPNOTSUPP              Operation not supported on socket 
1638  *              EISCONN                 Socket is connected 
1639  *      <pru_connect>:EADDRNOTAVAIL     Address not available. 
1640  *      <pru_connect>:EINVAL            Invalid argument 
1641  *      <pru_connect>:EAFNOSUPPORT      Address family not supported [notdef] 
1642  *      <pru_connect>:EACCES            Permission denied 
1643  *      <pru_connect>:EADDRINUSE        Address in use 
1644  *      <pru_connect>:EAGAIN            Resource unavailable, try again 
1645  *      <pru_connect>:EPERM             Operation not permitted 
1646  *      <sf_connect_out>:???            [anything a filter writer might set] 
1649 soconnectlock(struct socket 
*so
, struct sockaddr 
*nam
, int dolock
) 
1652         struct proc 
*p 
= current_proc(); 
1658         so_update_last_owner_locked(so
, p
); 
1659         so_update_policy(so
); 
1662         so_update_necp_policy(so
, NULL
, nam
); 
1666          * If this is a listening socket or if this is a previously-accepted 
1667          * socket that has been marked as inactive, reject the connect request. 
1669         if ((so
->so_options 
& SO_ACCEPTCONN
) || (so
->so_flags 
& SOF_DEFUNCT
)) { 
1671                 if (so
->so_flags 
& SOF_DEFUNCT
) { 
1672                         SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] " 
1673                             "(%d)\n", __func__
, proc_pid(p
), 
1675                             (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
1676                             SOCK_DOM(so
), SOCK_TYPE(so
), error
); 
1679                         socket_unlock(so
, 1); 
1684         if ((so
->so_restrictions 
& SO_RESTRICT_DENY_OUT
) != 0) { 
1686                         socket_unlock(so
, 1); 
1692          * If protocol is connection-based, can only connect once. 
1693          * Otherwise, if connected, try to disconnect first. 
1694          * This allows user to disconnect by connecting to, e.g., 
1697         if (so
->so_state 
& (SS_ISCONNECTED 
| SS_ISCONNECTING
) && 
1698             ((so
->so_proto
->pr_flags 
& PR_CONNREQUIRED
) || 
1699             (error 
= sodisconnectlocked(so
)))) { 
1703                  * Run connect filter before calling protocol: 
1704                  *  - non-blocking connect returns before completion; 
1706                 error 
= sflt_connectout(so
, nam
); 
1708                         if (error 
== EJUSTRETURN
) { 
1712                         error 
= (*so
->so_proto
->pr_usrreqs
->pru_connect
) 
1715                                 so
->so_state 
&= ~SS_ISCONNECTING
; 
1720                 socket_unlock(so
, 1); 
1726 soconnect(struct socket 
*so
, struct sockaddr 
*nam
) 
1728         return soconnectlock(so
, nam
, 1); 
1732  * Returns:     0                       Success 
1733  *      <pru_connect2>:EINVAL[AF_UNIX] 
1734  *      <pru_connect2>:EPROTOTYPE[AF_UNIX] 
1735  *      <pru_connect2>:???              [other protocol families] 
1737  * Notes:       <pru_connect2> is not supported by [TCP]. 
1740 soconnect2(struct socket 
*so1
, struct socket 
*so2
) 
1744         socket_lock(so1
, 1); 
1745         if (so2
->so_proto
->pr_lock
) { 
1746                 socket_lock(so2
, 1); 
1749         error 
= (*so1
->so_proto
->pr_usrreqs
->pru_connect2
)(so1
, so2
); 
1751         socket_unlock(so1
, 1); 
1752         if (so2
->so_proto
->pr_lock
) { 
1753                 socket_unlock(so2
, 1); 
1759 soconnectxlocked(struct socket 
*so
, struct sockaddr 
*src
, 
1760     struct sockaddr 
*dst
, struct proc 
*p
, uint32_t ifscope
, 
1761     sae_associd_t aid
, sae_connid_t 
*pcid
, uint32_t flags
, void *arg
, 
1762     uint32_t arglen
, uio_t auio
, user_ssize_t 
*bytes_written
) 
1766         so_update_last_owner_locked(so
, p
); 
1767         so_update_policy(so
); 
1770          * If this is a listening socket or if this is a previously-accepted 
1771          * socket that has been marked as inactive, reject the connect request. 
1773         if ((so
->so_options 
& SO_ACCEPTCONN
) || (so
->so_flags 
& SOF_DEFUNCT
)) { 
1775                 if (so
->so_flags 
& SOF_DEFUNCT
) { 
1776                         SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] " 
1777                             "(%d)\n", __func__
, proc_pid(p
), 
1779                             (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
1780                             SOCK_DOM(so
), SOCK_TYPE(so
), error
); 
1785         if ((so
->so_restrictions 
& SO_RESTRICT_DENY_OUT
) != 0) { 
1790          * If protocol is connection-based, can only connect once 
1791          * unless PR_MULTICONN is set.  Otherwise, if connected, 
1792          * try to disconnect first.  This allows user to disconnect 
1793          * by connecting to, e.g., a null address. 
1795         if ((so
->so_state 
& (SS_ISCONNECTED 
| SS_ISCONNECTING
)) && 
1796             !(so
->so_proto
->pr_flags 
& PR_MULTICONN
) && 
1797             ((so
->so_proto
->pr_flags 
& PR_CONNREQUIRED
) || 
1798             (error 
= sodisconnectlocked(so
)) != 0)) { 
1801                 if ((so
->so_proto
->pr_flags 
& PR_DATA_IDEMPOTENT
) && 
1802                     (flags 
& CONNECT_DATA_IDEMPOTENT
)) { 
1803                         so
->so_flags1 
|= SOF1_DATA_IDEMPOTENT
; 
1805                         if (flags 
& CONNECT_DATA_AUTHENTICATED
) { 
1806                                 so
->so_flags1 
|= SOF1_DATA_AUTHENTICATED
; 
1811                  * Case 1: CONNECT_RESUME_ON_READ_WRITE set, no data. 
1812                  * Case 2: CONNECT_RESUME_ON_READ_WRITE set, with data (user error) 
1813                  * Case 3: CONNECT_RESUME_ON_READ_WRITE not set, with data 
1814                  * Case 3 allows user to combine write with connect even if they have 
1815                  * no use for TFO (such as regular TCP, and UDP). 
1816                  * Case 4: CONNECT_RESUME_ON_READ_WRITE not set, no data (regular case) 
1818                 if ((so
->so_proto
->pr_flags 
& PR_PRECONN_WRITE
) && 
1819                     ((flags 
& CONNECT_RESUME_ON_READ_WRITE
) || auio
)) { 
1820                         so
->so_flags1 
|= SOF1_PRECONNECT_DATA
; 
1824                  * If a user sets data idempotent and does not pass an uio, or 
1825                  * sets CONNECT_RESUME_ON_READ_WRITE, this is an error, reset 
1826                  * SOF1_DATA_IDEMPOTENT. 
1828                 if (!(so
->so_flags1 
& SOF1_PRECONNECT_DATA
) && 
1829                     (so
->so_flags1 
& SOF1_DATA_IDEMPOTENT
)) { 
1830                         /* We should return EINVAL instead perhaps. */ 
1831                         so
->so_flags1 
&= ~SOF1_DATA_IDEMPOTENT
; 
1835                  * Run connect filter before calling protocol: 
1836                  *  - non-blocking connect returns before completion; 
1838                 error 
= sflt_connectout(so
, dst
); 
1840                         /* Disable PRECONNECT_DATA, as we don't need to send a SYN anymore. */ 
1841                         so
->so_flags1 
&= ~SOF1_PRECONNECT_DATA
; 
1842                         if (error 
== EJUSTRETURN
) { 
1846                         error 
= (*so
->so_proto
->pr_usrreqs
->pru_connectx
) 
1847                             (so
, src
, dst
, p
, ifscope
, aid
, pcid
, 
1848                             flags
, arg
, arglen
, auio
, bytes_written
); 
1850                                 so
->so_state 
&= ~SS_ISCONNECTING
; 
1851                                 if (error 
!= EINPROGRESS
) { 
1852                                         so
->so_flags1 
&= ~SOF1_PRECONNECT_DATA
; 
1862 sodisconnectlocked(struct socket 
*so
) 
1866         if ((so
->so_state 
& SS_ISCONNECTED
) == 0) { 
1870         if (so
->so_state 
& SS_ISDISCONNECTING
) { 
1875         error 
= (*so
->so_proto
->pr_usrreqs
->pru_disconnect
)(so
); 
1877                 sflt_notify(so
, sock_evt_disconnected
, NULL
); 
1884 /* Locking version */ 
1886 sodisconnect(struct socket 
*so
) 
1891         error 
= sodisconnectlocked(so
); 
1892         socket_unlock(so
, 1); 
1897 sodisconnectxlocked(struct socket 
*so
, sae_associd_t aid
, sae_connid_t cid
) 
1902          * Call the protocol disconnectx handler; let it handle all 
1903          * matters related to the connection state of this session. 
1905         error 
= (*so
->so_proto
->pr_usrreqs
->pru_disconnectx
)(so
, aid
, cid
); 
1908                  * The event applies only for the session, not for 
1909                  * the disconnection of individual subflows. 
1911                 if (so
->so_state 
& (SS_ISDISCONNECTING 
| SS_ISDISCONNECTED
)) { 
1912                         sflt_notify(so
, sock_evt_disconnected
, NULL
); 
1919 sodisconnectx(struct socket 
*so
, sae_associd_t aid
, sae_connid_t cid
) 
1924         error 
= sodisconnectxlocked(so
, aid
, cid
); 
1925         socket_unlock(so
, 1); 
1929 #define SBLOCKWAIT(f)   (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT) 
1932  * sosendcheck will lock the socket buffer if it isn't locked and 
1933  * verify that there is space for the data being inserted. 
1935  * Returns:     0                       Success 
1937  *      sblock:EWOULDBLOCK 
1944 sosendcheck(struct socket 
*so
, struct sockaddr 
*addr
, user_ssize_t resid
, 
1945     int32_t clen
, int32_t atomic
, int flags
, int *sblocked
) 
1952         if (*sblocked 
== 0) { 
1953                 if ((so
->so_snd
.sb_flags 
& SB_LOCK
) != 0 && 
1954                     so
->so_send_filt_thread 
!= 0 && 
1955                     so
->so_send_filt_thread 
== current_thread()) { 
1957                          * We're being called recursively from a filter, 
1958                          * allow this to continue. Radar 4150520. 
1959                          * Don't set sblocked because we don't want 
1960                          * to perform an unlock later. 
1964                         error 
= sblock(&so
->so_snd
, SBLOCKWAIT(flags
)); 
1966                                 if (so
->so_flags 
& SOF_DEFUNCT
) { 
1976          * If a send attempt is made on a socket that has been marked 
1977          * as inactive (disconnected), reject the request. 
1979         if (so
->so_flags 
& SOF_DEFUNCT
) { 
1982                 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] (%d)\n", 
1983                     __func__
, proc_selfpid(), proc_best_name(current_proc()), 
1984                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
1985                     SOCK_DOM(so
), SOCK_TYPE(so
), error
); 
1989         if (so
->so_state 
& SS_CANTSENDMORE
) { 
1992                  * Can re-inject data of half closed connections 
1994                 if ((so
->so_state 
& SS_ISDISCONNECTED
) == 0 && 
1995                     so
->so_snd
.sb_cfil_thread 
== current_thread() && 
1996                     cfil_sock_data_pending(&so
->so_snd
) != 0) { 
1998                             "so %llx ignore SS_CANTSENDMORE", 
1999                             (uint64_t)DEBUG_KERNEL_ADDRPERM(so
)); 
2001 #endif /* CONTENT_FILTER */ 
2005                 error 
= so
->so_error
; 
2010         if ((so
->so_state 
& SS_ISCONNECTED
) == 0) { 
2011                 if ((so
->so_proto
->pr_flags 
& PR_CONNREQUIRED
) != 0) { 
2012                         if (((so
->so_state 
& SS_ISCONFIRMING
) == 0) && 
2013                             (resid 
!= 0 || clen 
== 0) && 
2014                             !(so
->so_flags1 
& SOF1_PRECONNECT_DATA
)) { 
2017                 } else if (addr 
== 0) { 
2018                         return (so
->so_proto
->pr_flags 
& PR_CONNREQUIRED
) ? 
2019                                ENOTCONN 
: EDESTADDRREQ
; 
2023         space 
= sbspace(&so
->so_snd
); 
2025         if (flags 
& MSG_OOB
) { 
2028         if ((atomic 
&& resid 
> so
->so_snd
.sb_hiwat
) || 
2029             clen 
> so
->so_snd
.sb_hiwat
) { 
2033         if ((space 
< resid 
+ clen 
&& 
2034             (atomic 
|| (space 
< (int32_t)so
->so_snd
.sb_lowat
) || 
2036             (so
->so_type 
== SOCK_STREAM 
&& so_wait_for_if_feedback(so
))) { 
2038                  * don't block the connectx call when there's more data 
2039                  * than can be copied. 
2041                 if (so
->so_flags1 
& SOF1_PRECONNECT_DATA
) { 
2045                         if (space 
< (int32_t)so
->so_snd
.sb_lowat
) { 
2049                 if ((so
->so_state 
& SS_NBIO
) || (flags 
& MSG_NBIO
) || 
2053                 sbunlock(&so
->so_snd
, TRUE
);    /* keep socket locked */ 
2055                 error 
= sbwait(&so
->so_snd
); 
2057                         if (so
->so_flags 
& SOF_DEFUNCT
) { 
2069  * If send must go all at once and message is larger than 
2070  * send buffering, then hard error. 
2071  * Lock against other senders. 
2072  * If must go all at once and not enough room now, then 
2073  * inform user that this would block and do nothing. 
2074  * Otherwise, if nonblocking, send as much as possible. 
2075  * The data to be sent is described by "uio" if nonzero, 
2076  * otherwise by the mbuf chain "top" (which must be null 
2077  * if uio is not).  Data provided in mbuf chain must be small 
2078  * enough to send all at once. 
2080  * Returns nonzero on error, timeout or signal; callers 
2081  * must check for short counts if EINTR/ERESTART are returned. 
2082  * Data and control buffers are freed on return. 
2084  * Returns:     0                       Success 
2090  *      sosendcheck:EWOULDBLOCK 
2094  *      sosendcheck:???                 [value from so_error] 
2095  *      <pru_send>:ECONNRESET[TCP] 
2096  *      <pru_send>:EINVAL[TCP] 
2097  *      <pru_send>:ENOBUFS[TCP] 
2098  *      <pru_send>:EADDRINUSE[TCP] 
2099  *      <pru_send>:EADDRNOTAVAIL[TCP] 
2100  *      <pru_send>:EAFNOSUPPORT[TCP] 
2101  *      <pru_send>:EACCES[TCP] 
2102  *      <pru_send>:EAGAIN[TCP] 
2103  *      <pru_send>:EPERM[TCP] 
2104  *      <pru_send>:EMSGSIZE[TCP] 
2105  *      <pru_send>:EHOSTUNREACH[TCP] 
2106  *      <pru_send>:ENETUNREACH[TCP] 
2107  *      <pru_send>:ENETDOWN[TCP] 
2108  *      <pru_send>:ENOMEM[TCP] 
2109  *      <pru_send>:ENOBUFS[TCP] 
2110  *      <pru_send>:???[TCP]             [ignorable: mostly IPSEC/firewall/DLIL] 
2111  *      <pru_send>:EINVAL[AF_UNIX] 
2112  *      <pru_send>:EOPNOTSUPP[AF_UNIX] 
2113  *      <pru_send>:EPIPE[AF_UNIX] 
2114  *      <pru_send>:ENOTCONN[AF_UNIX] 
2115  *      <pru_send>:EISCONN[AF_UNIX] 
2116  *      <pru_send>:???[AF_UNIX]         [whatever a filter author chooses] 
2117  *      <sf_data_out>:???               [whatever a filter author chooses] 
2119  * Notes:       Other <pru_send> returns depend on the protocol family; all 
2120  *              <sf_data_out> returns depend on what the filter author causes 
2121  *              their filter to return. 
2124 sosend(struct socket 
*so
, struct sockaddr 
*addr
, struct uio 
*uio
, 
2125     struct mbuf 
*top
, struct mbuf 
*control
, int flags
) 
2128         struct mbuf 
*m
, *freelist 
= NULL
; 
2129         user_ssize_t space
, len
, resid
, orig_resid
; 
2130         int clen 
= 0, error
, dontroute
, mlen
, sendflags
; 
2131         int atomic 
= sosendallatonce(so
) || top
; 
2133         struct proc 
*p 
= current_proc(); 
2134         uint16_t headroom 
= 0; 
2135         boolean_t en_tracing 
= FALSE
; 
2138                 resid 
= uio_resid(uio
); 
2140                 resid 
= top
->m_pkthdr
.len
; 
2143         KERNEL_DEBUG((DBG_FNC_SOSEND 
| DBG_FUNC_START
), so
, resid
, 
2144             so
->so_snd
.sb_cc
, so
->so_snd
.sb_lowat
, so
->so_snd
.sb_hiwat
); 
2149          * trace if tracing & network (vs. unix) sockets & and 
2152         if (ENTR_SHOULDTRACE 
&& 
2153             (SOCK_CHECK_DOM(so
, AF_INET
) || SOCK_CHECK_DOM(so
, AF_INET6
))) { 
2154                 struct inpcb 
*inp 
= sotoinpcb(so
); 
2155                 if (inp
->inp_last_outifp 
!= NULL 
&& 
2156                     !(inp
->inp_last_outifp
->if_flags 
& IFF_LOOPBACK
)) { 
2158                         KERNEL_ENERGYTRACE(kEnTrActKernSockWrite
, DBG_FUNC_START
, 
2159                             VM_KERNEL_ADDRPERM(so
), 
2160                             ((so
->so_state 
& SS_NBIO
) ? kEnTrFlagNonBlocking 
: 0), 
2167          * Re-injection should not affect process accounting 
2169         if ((flags 
& MSG_SKIPCFIL
) == 0) { 
2170                 so_update_last_owner_locked(so
, p
); 
2171                 so_update_policy(so
); 
2174                 so_update_necp_policy(so
, NULL
, addr
); 
2178         if (so
->so_type 
!= SOCK_STREAM 
&& (flags 
& MSG_OOB
) != 0) { 
2184          * In theory resid should be unsigned. 
2185          * However, space must be signed, as it might be less than 0 
2186          * if we over-committed, and we must use a signed comparison 
2187          * of space and resid.  On the other hand, a negative resid 
2188          * causes us to loop sending 0-length segments to the protocol. 
2190          * Usually, MSG_EOR isn't used on SOCK_STREAM type sockets. 
2192          * Note: We limit resid to be a positive int value as we use 
2193          * imin() to set bytes_to_copy -- radr://14558484 
2195         if (resid 
< 0 || resid 
> INT_MAX 
|| 
2196             (so
->so_type 
== SOCK_STREAM 
&& (flags 
& MSG_EOR
))) { 
2201         dontroute 
= (flags 
& MSG_DONTROUTE
) && 
2202             (so
->so_options 
& SO_DONTROUTE
) == 0 && 
2203             (so
->so_proto
->pr_flags 
& PR_ATOMIC
); 
2204         OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgsnd
); 
2206         if (control 
!= NULL
) { 
2207                 clen 
= control
->m_len
; 
2210         if (soreserveheadroom 
!= 0) { 
2211                 headroom 
= so
->so_pktheadroom
; 
2215                 error 
= sosendcheck(so
, addr
, resid
, clen
, atomic
, flags
, 
2222                 space 
= sbspace(&so
->so_snd
) - clen
; 
2223                 space 
+= ((flags 
& MSG_OOB
) ? 1024 : 0); 
2228                                  * Data is prepackaged in "top". 
2231                                 if (flags 
& MSG_EOR
) { 
2232                                         top
->m_flags 
|= M_EOR
; 
2241                                 bytes_to_copy 
= imin(resid
, space
); 
2243                                 bytes_to_alloc 
= bytes_to_copy
; 
2245                                         bytes_to_alloc 
+= headroom
; 
2248                                 if (sosendminchain 
> 0) { 
2251                                         chainlength 
= sosendmaxchain
; 
2255                                  * Use big 4 KB cluster when the outgoing interface 
2256                                  * does not prefer 2 KB clusters 
2258                                 bigcl 
= !(so
->so_flags1 
& SOF1_IF_2KCL
) || 
2259                                     sosendbigcl_ignore_capab
; 
2262                                  * Attempt to use larger than system page-size 
2263                                  * clusters for large writes only if there is 
2264                                  * a jumbo cluster pool and if the socket is 
2265                                  * marked accordingly. 
2267                                 jumbocl 
= sosendjcl 
&& njcl 
> 0 && 
2268                                     ((so
->so_flags 
& SOF_MULTIPAGES
) || 
2269                                     sosendjcl_ignore_capab
) && 
2272                                 socket_unlock(so
, 0); 
2276                                         int hdrs_needed 
= (top 
== NULL
) ? 1 : 0; 
2279                                          * try to maintain a local cache of mbuf 
2280                                          * clusters needed to complete this 
2281                                          * write the list is further limited to 
2282                                          * the number that are currently needed 
2283                                          * to fill the socket this mechanism 
2284                                          * allows a large number of mbufs/ 
2285                                          * clusters to be grabbed under a single 
2286                                          * mbuf lock... if we can't get any 
2287                                          * clusters, than fall back to trying 
2288                                          * for mbufs if we fail early (or 
2289                                          * miscalcluate the number needed) make 
2290                                          * sure to release any clusters we 
2291                                          * haven't yet consumed. 
2293                                         if (freelist 
== NULL 
&& 
2294                                             bytes_to_alloc 
> MBIGCLBYTES 
&& 
2297                                                     bytes_to_alloc 
/ M16KCLBYTES
; 
2299                                                 if ((bytes_to_alloc 
- 
2300                                                     (num_needed 
* M16KCLBYTES
)) 
2306                                                     m_getpackets_internal( 
2307                                                         (unsigned int *)&num_needed
, 
2308                                                         hdrs_needed
, M_WAIT
, 0, 
2311                                                  * Fall back to 4K cluster size 
2312                                                  * if allocation failed 
2316                                         if (freelist 
== NULL 
&& 
2317                                             bytes_to_alloc 
> MCLBYTES 
&& 
2320                                                     bytes_to_alloc 
/ MBIGCLBYTES
; 
2322                                                 if ((bytes_to_alloc 
- 
2323                                                     (num_needed 
* MBIGCLBYTES
)) >= 
2329                                                     m_getpackets_internal( 
2330                                                         (unsigned int *)&num_needed
, 
2331                                                         hdrs_needed
, M_WAIT
, 0, 
2334                                                  * Fall back to cluster size 
2335                                                  * if allocation failed 
2340                                          * Allocate a cluster as we want to 
2341                                          * avoid to split the data in more 
2342                                          * that one segment and using MINCLSIZE 
2343                                          * would lead us to allocate two mbufs 
2345                                         if (soreserveheadroom 
!= 0 && 
2348                                             bytes_to_alloc 
> _MHLEN
) || 
2349                                             bytes_to_alloc 
> _MLEN
)) { 
2350                                                 num_needed 
= ROUNDUP(bytes_to_alloc
, MCLBYTES
) / 
2353                                                     m_getpackets_internal( 
2354                                                         (unsigned int *)&num_needed
, 
2355                                                         hdrs_needed
, M_WAIT
, 0, 
2358                                                  * Fall back to a single mbuf 
2359                                                  * if allocation failed 
2361                                         } else if (freelist 
== NULL 
&& 
2362                                             bytes_to_alloc 
> MINCLSIZE
) { 
2364                                                     bytes_to_alloc 
/ MCLBYTES
; 
2366                                                 if ((bytes_to_alloc 
- 
2367                                                     (num_needed 
* MCLBYTES
)) >= 
2373                                                     m_getpackets_internal( 
2374                                                         (unsigned int *)&num_needed
, 
2375                                                         hdrs_needed
, M_WAIT
, 0, 
2378                                                  * Fall back to a single mbuf 
2379                                                  * if allocation failed 
2383                                          * For datagram protocols, leave 
2384                                          * headroom for protocol headers 
2385                                          * in the first cluster of the chain 
2387                                         if (freelist 
!= NULL 
&& atomic 
&& 
2388                                             top 
== NULL 
&& headroom 
> 0) { 
2389                                                 freelist
->m_data 
+= headroom
; 
2393                                          * Fall back to regular mbufs without 
2394                                          * reserving the socket headroom 
2396                                         if (freelist 
== NULL
) { 
2397                                                 if (SOCK_TYPE(so
) != SOCK_STREAM 
|| bytes_to_alloc 
<= MINCLSIZE
) { 
2407                                                 if (freelist 
== NULL
) { 
2413                                                  * For datagram protocols, 
2414                                                  * leave room for protocol 
2415                                                  * headers in first mbuf. 
2417                                                 if (atomic 
&& top 
== NULL 
&& 
2418                                                     bytes_to_copy 
< MHLEN
) { 
2424                                         freelist 
= m
->m_next
; 
2427                                         if ((m
->m_flags 
& M_EXT
)) { 
2428                                                 mlen 
= m
->m_ext
.ext_size 
- 
2430                                         } else if ((m
->m_flags 
& M_PKTHDR
)) { 
2432                                                     MHLEN 
- M_LEADINGSPACE(m
); 
2434                                                 mlen 
= MLEN 
- M_LEADINGSPACE(m
); 
2436                                         len 
= imin(mlen
, bytes_to_copy
); 
2442                                         error 
= uiomove(mtod(m
, caddr_t
), 
2445                                         resid 
= uio_resid(uio
); 
2449                                         top
->m_pkthdr
.len 
+= len
; 
2455                                                 if (flags 
& MSG_EOR
) { 
2456                                                         top
->m_flags 
|= M_EOR
; 
2460                                         bytes_to_copy 
= min(resid
, space
); 
2461                                 } while (space 
> 0 && 
2462                                     (chainlength 
< sosendmaxchain 
|| atomic 
|| 
2463                                     resid 
< MINCLSIZE
)); 
2473                                 so
->so_options 
|= SO_DONTROUTE
; 
2477                          * Compute flags here, for pru_send and NKEs 
2479                          * If the user set MSG_EOF, the protocol 
2480                          * understands this flag and nothing left to 
2481                          * send then use PRU_SEND_EOF instead of PRU_SEND. 
2483                         sendflags 
= (flags 
& MSG_OOB
) ? PRUS_OOB 
: 
2484                             ((flags 
& MSG_EOF
) && 
2485                             (so
->so_proto
->pr_flags 
& PR_IMPLOPCL
) && 
2486                             (resid 
<= 0)) ? PRUS_EOF 
: 
2487                             /* If there is more to send set PRUS_MORETOCOME */ 
2488                             (resid 
> 0 && space 
> 0) ? PRUS_MORETOCOME 
: 0; 
2490                         if ((flags 
& MSG_SKIPCFIL
) == 0) { 
2492                                  * Socket filter processing 
2494                                 error 
= sflt_data_out(so
, addr
, &top
, 
2495                                     &control
, (sendflags 
& MSG_OOB
) ? 
2496                                     sock_data_filt_flag_oob 
: 0); 
2498                                         if (error 
== EJUSTRETURN
) { 
2508                                  * Content filter processing 
2510                                 error 
= cfil_sock_data_out(so
, addr
, top
, 
2511                                     control
, sendflags
); 
2513                                         if (error 
== EJUSTRETURN
) { 
2521 #endif /* CONTENT_FILTER */ 
2523                         error 
= (*so
->so_proto
->pr_usrreqs
->pru_send
) 
2524                             (so
, sendflags
, top
, addr
, control
, p
); 
2527                                 so
->so_options 
&= ~SO_DONTROUTE
; 
2537                 } while (resid 
&& space 
> 0); 
2542                 sbunlock(&so
->so_snd
, FALSE
);   /* will unlock socket */ 
2544                 socket_unlock(so
, 1); 
2549         if (control 
!= NULL
) { 
2552         if (freelist 
!= NULL
) { 
2553                 m_freem_list(freelist
); 
2556         soclearfastopen(so
); 
2559                 /* resid passed here is the bytes left in uio */ 
2560                 KERNEL_ENERGYTRACE(kEnTrActKernSockWrite
, DBG_FUNC_END
, 
2561                     VM_KERNEL_ADDRPERM(so
), 
2562                     ((error 
== EWOULDBLOCK
) ? kEnTrFlagNoWork 
: 0), 
2563                     (int64_t)(orig_resid 
- resid
)); 
2565         KERNEL_DEBUG(DBG_FNC_SOSEND 
| DBG_FUNC_END
, so
, resid
, 
2566             so
->so_snd
.sb_cc
, space
, error
); 
2572 sosend_reinject(struct socket 
*so
, struct sockaddr 
*addr
, struct mbuf 
*top
, struct mbuf 
*control
, uint32_t sendflags
) 
2574         struct mbuf 
*m0 
= NULL
, *control_end 
= NULL
; 
2576         socket_lock_assert_owned(so
); 
2579          * top must points to mbuf chain to be sent. 
2580          * If control is not NULL, top must be packet header 
2582         VERIFY(top 
!= NULL 
&& 
2583             (control 
== NULL 
|| top
->m_flags 
& M_PKTHDR
)); 
2586          * If control is not passed in, see if we can get it 
2589         if (control 
== NULL 
&& (top
->m_flags 
& M_PKTHDR
) == 0) { 
2590                 // Locate start of control if present and start of data 
2591                 for (m0 
= top
; m0 
!= NULL
; m0 
= m0
->m_next
) { 
2592                         if (m0
->m_flags 
& M_PKTHDR
) { 
2595                         } else if (m0
->m_type 
== MT_CONTROL
) { 
2596                                 if (control 
== NULL
) { 
2597                                         // Found start of control 
2600                                 if (control 
!= NULL 
&& m0
->m_next 
!= NULL 
&& m0
->m_next
->m_type 
!= MT_CONTROL
) { 
2601                                         // Found end of control 
2606                 if (control_end 
!= NULL
) { 
2607                         control_end
->m_next 
= NULL
; 
2611         int error 
= (*so
->so_proto
->pr_usrreqs
->pru_send
) 
2612             (so
, sendflags
, top
, addr
, control
, current_proc()); 
2618  * Supported only connected sockets (no address) without ancillary data 
2619  * (control mbuf) for atomic protocols 
2622 sosend_list(struct socket 
*so
, struct uio 
**uioarray
, u_int uiocnt
, int flags
) 
2624         struct mbuf 
*m
, *freelist 
= NULL
; 
2625         user_ssize_t len
, resid
; 
2626         int error
, dontroute
, mlen
; 
2627         int atomic 
= sosendallatonce(so
); 
2629         struct proc 
*p 
= current_proc(); 
2632         struct mbuf 
*top 
= NULL
; 
2633         uint16_t headroom 
= 0; 
2636         KERNEL_DEBUG((DBG_FNC_SOSEND_LIST 
| DBG_FUNC_START
), so
, uiocnt
, 
2637             so
->so_snd
.sb_cc
, so
->so_snd
.sb_lowat
, so
->so_snd
.sb_hiwat
); 
2639         if (so
->so_type 
!= SOCK_DGRAM
) { 
2647         if (so
->so_proto
->pr_usrreqs
->pru_send_list 
== NULL
) { 
2648                 error 
= EPROTONOSUPPORT
; 
2651         if (flags 
& ~(MSG_DONTWAIT 
| MSG_NBIO
)) { 
2655         resid 
= uio_array_resid(uioarray
, uiocnt
); 
2658          * In theory resid should be unsigned. 
2659          * However, space must be signed, as it might be less than 0 
2660          * if we over-committed, and we must use a signed comparison 
2661          * of space and resid.  On the other hand, a negative resid 
2662          * causes us to loop sending 0-length segments to the protocol. 
2664          * Note: We limit resid to be a positive int value as we use 
2665          * imin() to set bytes_to_copy -- radr://14558484 
2667         if (resid 
< 0 || resid 
> INT_MAX
) { 
2673         so_update_last_owner_locked(so
, p
); 
2674         so_update_policy(so
); 
2677         so_update_necp_policy(so
, NULL
, NULL
); 
2680         dontroute 
= (flags 
& MSG_DONTROUTE
) && 
2681             (so
->so_options 
& SO_DONTROUTE
) == 0 && 
2682             (so
->so_proto
->pr_flags 
& PR_ATOMIC
); 
2683         OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgsnd
); 
2685         error 
= sosendcheck(so
, NULL
, resid
, 0, atomic
, flags
, &sblocked
); 
2691          * Use big 4 KB clusters when the outgoing interface does not prefer 
2694         bigcl 
= !(so
->so_flags1 
& SOF1_IF_2KCL
) || sosendbigcl_ignore_capab
; 
2696         if (soreserveheadroom 
!= 0) { 
2697                 headroom 
= so
->so_pktheadroom
; 
2704                 size_t maxpktlen 
= 0; 
2707                 if (sosendminchain 
> 0) { 
2710                         chainlength 
= sosendmaxchain
; 
2713                 socket_unlock(so
, 0); 
2716                  * Find a set of uio that fit in a reasonable number 
2719                 for (i 
= uiofirst
; i 
< uiocnt
; i
++) { 
2720                         struct uio 
*auio 
= uioarray
[i
]; 
2722                         len 
= uio_resid(auio
); 
2724                         /* Do nothing for empty messages */ 
2732                         if (len 
> maxpktlen
) { 
2737                         if (chainlength 
> sosendmaxchain
) { 
2742                  * Nothing left to send 
2744                 if (num_needed 
== 0) { 
2749                  * Allocate buffer large enough to include headroom space for 
2750                  * network and link header 
2753                 bytes_to_alloc 
= maxpktlen 
+ headroom
; 
2756                  * Allocate a single contiguous buffer of the smallest available 
2757                  * size when possible 
2759                 if (bytes_to_alloc 
> MCLBYTES 
&& 
2760                     bytes_to_alloc 
<= MBIGCLBYTES 
&& bigcl
) { 
2761                         freelist 
= m_getpackets_internal( 
2762                                 (unsigned int *)&num_needed
, 
2763                                 num_needed
, M_WAIT
, 1, 
2765                 } else if (bytes_to_alloc 
> _MHLEN 
&& 
2766                     bytes_to_alloc 
<= MCLBYTES
) { 
2767                         freelist 
= m_getpackets_internal( 
2768                                 (unsigned int *)&num_needed
, 
2769                                 num_needed
, M_WAIT
, 1, 
2772                         freelist 
= m_allocpacket_internal( 
2773                                 (unsigned int *)&num_needed
, 
2774                                 bytes_to_alloc
, NULL
, M_WAIT
, 1, 0); 
2777                 if (freelist 
== NULL
) { 
2783                  * Copy each uio of the set into its own mbuf packet 
2785                 for (i 
= uiofirst
, m 
= freelist
; 
2786                     i 
< uiolast 
&& m 
!= NULL
; 
2790                         struct uio 
*auio 
= uioarray
[i
]; 
2792                         bytes_to_copy 
= uio_resid(auio
); 
2794                         /* Do nothing for empty messages */ 
2795                         if (bytes_to_copy 
== 0) { 
2799                          * Leave headroom for protocol headers 
2800                          * in the first mbuf of the chain 
2802                         m
->m_data 
+= headroom
; 
2804                         for (n 
= m
; n 
!= NULL
; n 
= n
->m_next
) { 
2805                                 if ((m
->m_flags 
& M_EXT
)) { 
2806                                         mlen 
= m
->m_ext
.ext_size 
- 
2808                                 } else if ((m
->m_flags 
& M_PKTHDR
)) { 
2810                                             MHLEN 
- M_LEADINGSPACE(m
); 
2812                                         mlen 
= MLEN 
- M_LEADINGSPACE(m
); 
2814                                 len 
= imin(mlen
, bytes_to_copy
); 
2817                                  * Note: uiomove() decrements the iovec 
2820                                 error 
= uiomove(mtod(n
, caddr_t
), 
2826                                 m
->m_pkthdr
.len 
+= len
; 
2828                                 VERIFY(m
->m_pkthdr
.len 
<= maxpktlen
); 
2830                                 bytes_to_copy 
-= len
; 
2833                         if (m
->m_pkthdr
.len 
== 0) { 
2835                                         "%s:%d so %llx pkt %llx type %u len null\n", 
2837                                         (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
2838                                         (uint64_t)DEBUG_KERNEL_ADDRPERM(m
), 
2856                         so
->so_options 
|= SO_DONTROUTE
; 
2859                 if ((flags 
& MSG_SKIPCFIL
) == 0) { 
2860                         struct mbuf 
**prevnextp 
= NULL
; 
2862                         for (i 
= uiofirst
, m 
= top
; 
2863                             i 
< uiolast 
&& m 
!= NULL
; 
2865                                 struct mbuf 
*nextpkt 
= m
->m_nextpkt
; 
2868                                  * Socket filter processing 
2870                                 error 
= sflt_data_out(so
, NULL
, &m
, 
2872                                 if (error 
!= 0 && error 
!= EJUSTRETURN
) { 
2879                                          * Content filter processing 
2881                                         error 
= cfil_sock_data_out(so
, NULL
, m
, 
2883                                         if (error 
!= 0 && error 
!= EJUSTRETURN
) { 
2887 #endif /* CONTENT_FILTER */ 
2889                                  * Remove packet from the list when 
2890                                  * swallowed by a filter 
2892                                 if (error 
== EJUSTRETURN
) { 
2894                                         if (prevnextp 
!= NULL
) { 
2895                                                 *prevnextp 
= nextpkt
; 
2903                                         prevnextp 
= &m
->m_nextpkt
; 
2908                         error 
= (*so
->so_proto
->pr_usrreqs
->pru_send_list
) 
2909                             (so
, 0, top
, NULL
, NULL
, p
); 
2913                         so
->so_options 
&= ~SO_DONTROUTE
; 
2918         } while (resid 
> 0 && error 
== 0); 
2921                 sbunlock(&so
->so_snd
, FALSE
);   /* will unlock socket */ 
2923                 socket_unlock(so
, 1); 
2929         if (freelist 
!= NULL
) { 
2930                 m_freem_list(freelist
); 
2933         KERNEL_DEBUG(DBG_FNC_SOSEND_LIST 
| DBG_FUNC_END
, so
, resid
, 
2934             so
->so_snd
.sb_cc
, 0, error
); 
2940  * May return ERESTART when packet is dropped by MAC policy check 
2943 soreceive_addr(struct proc 
*p
, struct socket 
*so
, struct sockaddr 
**psa
, 
2944     int flags
, struct mbuf 
**mp
, struct mbuf 
**nextrecordp
, int canwait
) 
2947         struct mbuf 
*m 
= *mp
; 
2948         struct mbuf 
*nextrecord 
= *nextrecordp
; 
2950         KASSERT(m
->m_type 
== MT_SONAME
, ("receive 1a")); 
2951 #if CONFIG_MACF_SOCKET_SUBSET 
2953          * Call the MAC framework for policy checking if we're in 
2954          * the user process context and the socket isn't connected. 
2956         if (p 
!= kernproc 
&& !(so
->so_state 
& SS_ISCONNECTED
)) { 
2957                 struct mbuf 
*m0 
= m
; 
2959                  * Dequeue this record (temporarily) from the receive 
2960                  * list since we're about to drop the socket's lock 
2961                  * where a new record may arrive and be appended to 
2962                  * the list.  Upon MAC policy failure, the record 
2963                  * will be freed.  Otherwise, we'll add it back to 
2964                  * the head of the list.  We cannot rely on SB_LOCK 
2965                  * because append operation uses the socket's lock. 
2968                         m
->m_nextpkt 
= NULL
; 
2969                         sbfree(&so
->so_rcv
, m
); 
2971                 } while (m 
!= NULL
); 
2973                 so
->so_rcv
.sb_mb 
= nextrecord
; 
2974                 SB_EMPTY_FIXUP(&so
->so_rcv
); 
2975                 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 1a"); 
2976                 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 1a"); 
2977                 socket_unlock(so
, 0); 
2979                 if (mac_socket_check_received(proc_ucred(p
), so
, 
2980                     mtod(m
, struct sockaddr 
*)) != 0) { 
2982                          * MAC policy failure; free this record and 
2983                          * process the next record (or block until 
2984                          * one is available).  We have adjusted sb_cc 
2985                          * and sb_mbcnt above so there is no need to 
2986                          * call sbfree() again. 
2990                          * Clear SB_LOCK but don't unlock the socket. 
2991                          * Process the next record or wait for one. 
2994                         sbunlock(&so
->so_rcv
, TRUE
); /* stay locked */ 
3000                  * If the socket has been defunct'd, drop it. 
3002                 if (so
->so_flags 
& SOF_DEFUNCT
) { 
3008                  * Re-adjust the socket receive list and re-enqueue 
3009                  * the record in front of any packets which may have 
3010                  * been appended while we dropped the lock. 
3012                 for (m 
= m0
; m
->m_next 
!= NULL
; m 
= m
->m_next
) { 
3013                         sballoc(&so
->so_rcv
, m
); 
3015                 sballoc(&so
->so_rcv
, m
); 
3016                 if (so
->so_rcv
.sb_mb 
== NULL
) { 
3017                         so
->so_rcv
.sb_lastrecord 
= m0
; 
3018                         so
->so_rcv
.sb_mbtail 
= m
; 
3021                 nextrecord 
= m
->m_nextpkt 
= so
->so_rcv
.sb_mb
; 
3022                 so
->so_rcv
.sb_mb 
= m
; 
3023                 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 1b"); 
3024                 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 1b"); 
3026 #endif /* CONFIG_MACF_SOCKET_SUBSET */ 
3028                 *psa 
= dup_sockaddr(mtod(m
, struct sockaddr 
*), canwait
); 
3029                 if ((*psa 
== NULL
) && (flags 
& MSG_NEEDSA
)) { 
3030                         error 
= EWOULDBLOCK
; 
3034         if (flags 
& MSG_PEEK
) { 
3037                 sbfree(&so
->so_rcv
, m
); 
3038                 if (m
->m_next 
== NULL 
&& so
->so_rcv
.sb_cc 
!= 0) { 
3039                         panic("%s: about to create invalid socketbuf", 
3043                 MFREE(m
, so
->so_rcv
.sb_mb
); 
3044                 m 
= so
->so_rcv
.sb_mb
; 
3046                         m
->m_nextpkt 
= nextrecord
; 
3048                         so
->so_rcv
.sb_mb 
= nextrecord
; 
3049                         SB_EMPTY_FIXUP(&so
->so_rcv
); 
3054         *nextrecordp 
= nextrecord
; 
3060  * Process one or more MT_CONTROL mbufs present before any data mbufs 
3061  * in the first mbuf chain on the socket buffer.  If MSG_PEEK, we 
3062  * just copy the data; if !MSG_PEEK, we call into the protocol to 
3063  * perform externalization. 
3066 soreceive_ctl(struct socket 
*so
, struct mbuf 
**controlp
, int flags
, 
3067     struct mbuf 
**mp
, struct mbuf 
**nextrecordp
) 
3070         struct mbuf 
*cm 
= NULL
, *cmn
; 
3071         struct mbuf 
**cme 
= &cm
; 
3072         struct sockbuf 
*sb_rcv 
= &so
->so_rcv
; 
3073         struct mbuf 
**msgpcm 
= NULL
; 
3074         struct mbuf 
*m 
= *mp
; 
3075         struct mbuf 
*nextrecord 
= *nextrecordp
; 
3076         struct protosw 
*pr 
= so
->so_proto
; 
3079          * Externalizing the control messages would require us to 
3080          * drop the socket's lock below.  Once we re-acquire the 
3081          * lock, the mbuf chain might change.  In order to preserve 
3082          * consistency, we unlink all control messages from the 
3083          * first mbuf chain in one shot and link them separately 
3084          * onto a different chain. 
3087                 if (flags 
& MSG_PEEK
) { 
3088                         if (controlp 
!= NULL
) { 
3089                                 if (*controlp 
== NULL
) { 
3092                                 *controlp 
= m_copy(m
, 0, m
->m_len
); 
3095                                  * If we failed to allocate an mbuf, 
3096                                  * release any previously allocated 
3097                                  * mbufs for control data. Return 
3098                                  * an error. Keep the mbufs in the 
3099                                  * socket as this is using 
3102                                 if (*controlp 
== NULL
) { 
3107                                 controlp 
= &(*controlp
)->m_next
; 
3111                         m
->m_nextpkt 
= NULL
; 
3113                         sb_rcv
->sb_mb 
= m
->m_next
; 
3116                         cme 
= &(*cme
)->m_next
; 
3119         } while (m 
!= NULL 
&& m
->m_type 
== MT_CONTROL
); 
3121         if (!(flags 
& MSG_PEEK
)) { 
3122                 if (sb_rcv
->sb_mb 
!= NULL
) { 
3123                         sb_rcv
->sb_mb
->m_nextpkt 
= nextrecord
; 
3125                         sb_rcv
->sb_mb 
= nextrecord
; 
3126                         SB_EMPTY_FIXUP(sb_rcv
); 
3128                 if (nextrecord 
== NULL
) { 
3129                         sb_rcv
->sb_lastrecord 
= m
; 
3133         SBLASTRECORDCHK(&so
->so_rcv
, "soreceive ctl"); 
3134         SBLASTMBUFCHK(&so
->so_rcv
, "soreceive ctl"); 
3136         while (cm 
!= NULL
) { 
3141                 cmsg_type 
= mtod(cm
, struct cmsghdr 
*)->cmsg_type
; 
3144                  * Call the protocol to externalize SCM_RIGHTS message 
3145                  * and return the modified message to the caller upon 
3146                  * success.  Otherwise, all other control messages are 
3147                  * returned unmodified to the caller.  Note that we 
3148                  * only get into this loop if MSG_PEEK is not set. 
3150                 if (pr
->pr_domain
->dom_externalize 
!= NULL 
&& 
3151                     cmsg_type 
== SCM_RIGHTS
) { 
3153                          * Release socket lock: see 3903171.  This 
3154                          * would also allow more records to be appended 
3155                          * to the socket buffer.  We still have SB_LOCK 
3156                          * set on it, so we can be sure that the head 
3157                          * of the mbuf chain won't change. 
3159                         socket_unlock(so
, 0); 
3160                         error 
= (*pr
->pr_domain
->dom_externalize
)(cm
); 
3166                 if (controlp 
!= NULL 
&& error 
== 0) { 
3168                         controlp 
= &(*controlp
)->m_next
; 
3175          * Update the value of nextrecord in case we received new 
3176          * records when the socket was unlocked above for 
3177          * externalizing SCM_RIGHTS. 
3180                 nextrecord 
= sb_rcv
->sb_mb
->m_nextpkt
; 
3182                 nextrecord 
= sb_rcv
->sb_mb
; 
3187         *nextrecordp 
= nextrecord
; 
3193  * If we have less data than requested, block awaiting more 
3194  * (subject to any timeout) if: 
3195  *   1. the current count is less than the low water mark, or 
3196  *   2. MSG_WAITALL is set, and it is possible to do the entire 
3197  *      receive operation at once if we block (resid <= hiwat). 
3198  *   3. MSG_DONTWAIT is not set 
3199  * If MSG_WAITALL is set but resid is larger than the receive buffer, 
3200  * we have to do the receive in sections, and thus risk returning 
3201  * a short count if a timeout or signal occurs after we start. 
3204 so_should_wait(struct socket 
*so
, struct uio 
*uio
, struct mbuf 
*m
, int flags
) 
3206         struct protosw 
*pr 
= so
->so_proto
; 
3208         /* No mbufs in the receive-queue? Wait! */ 
3213         /* Not enough data in the receive socket-buffer - we may have to wait */ 
3214         if ((flags 
& MSG_DONTWAIT
) == 0 && so
->so_rcv
.sb_cc 
< uio_resid(uio
) && 
3215             m
->m_nextpkt 
== NULL 
&& (pr
->pr_flags 
& PR_ATOMIC
) == 0) { 
3217                  * Application did set the lowater-mark, so we should wait for 
3218                  * this data to be present. 
3220                 if (so
->so_rcv
.sb_cc 
< so
->so_rcv
.sb_lowat
) { 
3225                  * Application wants all the data - so let's try to do the 
3226                  * receive-operation at once by waiting for everything to 
3229                 if ((flags 
& MSG_WAITALL
) && uio_resid(uio
) <= so
->so_rcv
.sb_hiwat
) { 
3238  * Implement receive operations on a socket. 
3239  * We depend on the way that records are added to the sockbuf 
3240  * by sbappend*.  In particular, each record (mbufs linked through m_next) 
3241  * must begin with an address if the protocol so specifies, 
3242  * followed by an optional mbuf or mbufs containing ancillary data, 
3243  * and then zero or more mbufs of data. 
3244  * In order to avoid blocking network interrupts for the entire time here, 
3245  * we splx() while doing the actual copy to user space. 
3246  * Although the sockbuf is locked, new data may still be appended, 
3247  * and thus we must maintain consistency of the sockbuf during that time. 
3249  * The caller may receive the data as a single mbuf chain by supplying 
3250  * an mbuf **mp0 for use in returning the chain.  The uio is then used 
3251  * only for the count in uio_resid. 
3253  * Returns:     0                       Success 
3258  *      sblock:EWOULDBLOCK 
3262  *      sodelayed_copy:EFAULT 
3263  *      <pru_rcvoob>:EINVAL[TCP] 
3264  *      <pru_rcvoob>:EWOULDBLOCK[TCP] 
3266  *      <pr_domain->dom_externalize>:EMSGSIZE[AF_UNIX] 
3267  *      <pr_domain->dom_externalize>:ENOBUFS[AF_UNIX] 
3268  *      <pr_domain->dom_externalize>:??? 
3270  * Notes:       Additional return values from calls through <pru_rcvoob> and 
3271  *              <pr_domain->dom_externalize> depend on protocols other than 
3272  *              TCP or AF_UNIX, which are documented above. 
3275 soreceive(struct socket 
*so
, struct sockaddr 
**psa
, struct uio 
*uio
, 
3276     struct mbuf 
**mp0
, struct mbuf 
**controlp
, int *flagsp
) 
3278         struct mbuf 
*m
, **mp
, *ml 
= NULL
; 
3279         struct mbuf 
*nextrecord
, *free_list
; 
3280         int flags
, error
, offset
; 
3282         struct protosw 
*pr 
= so
->so_proto
; 
3284         user_ssize_t orig_resid 
= uio_resid(uio
); 
3285         user_ssize_t delayed_copy_len
; 
3287         struct proc 
*p 
= current_proc(); 
3288         boolean_t en_tracing 
= FALSE
; 
3291          * Sanity check on the length passed by caller as we are making 'int' 
3294         if (orig_resid 
< 0 || orig_resid 
> INT_MAX
) { 
3298         KERNEL_DEBUG(DBG_FNC_SORECEIVE 
| DBG_FUNC_START
, so
, 
3299             uio_resid(uio
), so
->so_rcv
.sb_cc
, so
->so_rcv
.sb_lowat
, 
3300             so
->so_rcv
.sb_hiwat
); 
3303         so_update_last_owner_locked(so
, p
); 
3304         so_update_policy(so
); 
3306 #ifdef MORE_LOCKING_DEBUG 
3307         if (so
->so_usecount 
== 1) { 
3308                 panic("%s: so=%x no other reference on socket\n", __func__
, so
); 
3316         if (controlp 
!= NULL
) { 
3319         if (flagsp 
!= NULL
) { 
3320                 flags 
= *flagsp 
& ~MSG_EOR
; 
3326          * If a recv attempt is made on a previously-accepted socket 
3327          * that has been marked as inactive (disconnected), reject 
3330         if (so
->so_flags 
& SOF_DEFUNCT
) { 
3331                 struct sockbuf 
*sb 
= &so
->so_rcv
; 
3334                 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] (%d)\n", 
3335                     __func__
, proc_pid(p
), proc_best_name(p
), 
3336                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
3337                     SOCK_DOM(so
), SOCK_TYPE(so
), error
); 
3339                  * This socket should have been disconnected and flushed 
3340                  * prior to being returned from sodefunct(); there should 
3341                  * be no data on its receive list, so panic otherwise. 
3343                 if (so
->so_state 
& SS_DEFUNCT
) { 
3344                         sb_empty_assert(sb
, __func__
); 
3346                 socket_unlock(so
, 1); 
3350         if ((so
->so_flags1 
& SOF1_PRECONNECT_DATA
) && 
3351             pr
->pr_usrreqs
->pru_preconnect
) { 
3353                  * A user may set the CONNECT_RESUME_ON_READ_WRITE-flag but not 
3354                  * calling write() right after this. *If* the app calls a read 
3355                  * we do not want to block this read indefinetely. Thus, 
3356                  * we trigger a connect so that the session gets initiated. 
3358                 error 
= (*pr
->pr_usrreqs
->pru_preconnect
)(so
); 
3361                         socket_unlock(so
, 1); 
3366         if (ENTR_SHOULDTRACE 
&& 
3367             (SOCK_CHECK_DOM(so
, AF_INET
) || SOCK_CHECK_DOM(so
, AF_INET6
))) { 
3369                  * enable energy tracing for inet sockets that go over 
3370                  * non-loopback interfaces only. 
3372                 struct inpcb 
*inp 
= sotoinpcb(so
); 
3373                 if (inp
->inp_last_outifp 
!= NULL 
&& 
3374                     !(inp
->inp_last_outifp
->if_flags 
& IFF_LOOPBACK
)) { 
3376                         KERNEL_ENERGYTRACE(kEnTrActKernSockRead
, DBG_FUNC_START
, 
3377                             VM_KERNEL_ADDRPERM(so
), 
3378                             ((so
->so_state 
& SS_NBIO
) ? 
3379                             kEnTrFlagNonBlocking 
: 0), 
3380                             (int64_t)orig_resid
); 
3385          * When SO_WANTOOBFLAG is set we try to get out-of-band data 
3386          * regardless of the flags argument. Here is the case were 
3387          * out-of-band data is not inline. 
3389         if ((flags 
& MSG_OOB
) || 
3390             ((so
->so_options 
& SO_WANTOOBFLAG
) != 0 && 
3391             (so
->so_options 
& SO_OOBINLINE
) == 0 && 
3392             (so
->so_oobmark 
|| (so
->so_state 
& SS_RCVATMARK
)))) { 
3393                 m 
= m_get(M_WAIT
, MT_DATA
); 
3395                         socket_unlock(so
, 1); 
3396                         KERNEL_DEBUG(DBG_FNC_SORECEIVE 
| DBG_FUNC_END
, 
3397                             ENOBUFS
, 0, 0, 0, 0); 
3400                 error 
= (*pr
->pr_usrreqs
->pru_rcvoob
)(so
, m
, flags 
& MSG_PEEK
); 
3404                 socket_unlock(so
, 0); 
3406                         error 
= uiomove(mtod(m
, caddr_t
), 
3407                             imin(uio_resid(uio
), m
->m_len
), uio
); 
3409                 } while (uio_resid(uio
) && error 
== 0 && m 
!= NULL
); 
3416                 if ((so
->so_options 
& SO_WANTOOBFLAG
) != 0) { 
3417                         if (error 
== EWOULDBLOCK 
|| error 
== EINVAL
) { 
3419                                  * Let's try to get normal data: 
3420                                  * EWOULDBLOCK: out-of-band data not 
3421                                  * receive yet. EINVAL: out-of-band data 
3426                         } else if (error 
== 0 && flagsp 
!= NULL
) { 
3430                 socket_unlock(so
, 1); 
3432                         KERNEL_ENERGYTRACE(kEnTrActKernSockRead
, DBG_FUNC_END
, 
3433                             VM_KERNEL_ADDRPERM(so
), 0, 
3434                             (int64_t)(orig_resid 
- uio_resid(uio
))); 
3436                 KERNEL_DEBUG(DBG_FNC_SORECEIVE 
| DBG_FUNC_END
, error
, 
3446         if (so
->so_state 
& SS_ISCONFIRMING 
&& uio_resid(uio
)) { 
3447                 (*pr
->pr_usrreqs
->pru_rcvd
)(so
, 0); 
3451         delayed_copy_len 
= 0; 
3453 #ifdef MORE_LOCKING_DEBUG 
3454         if (so
->so_usecount 
<= 1) { 
3455                 printf("soreceive: sblock so=0x%llx ref=%d on socket\n", 
3456                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), so
->so_usecount
); 
3460          * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE) 
3461          * and if so just return to the caller.  This could happen when 
3462          * soreceive() is called by a socket upcall function during the 
3463          * time the socket is freed.  The socket buffer would have been 
3464          * locked across the upcall, therefore we cannot put this thread 
3465          * to sleep (else we will deadlock) or return EWOULDBLOCK (else 
3466          * we may livelock), because the lock on the socket buffer will 
3467          * only be released when the upcall routine returns to its caller. 
3468          * Because the socket has been officially closed, there can be 
3469          * no further read on it. 
3471          * A multipath subflow socket would have its SS_NOFDREF set by 
3472          * default, so check for SOF_MP_SUBFLOW socket flag; when the 
3473          * socket is closed for real, SOF_MP_SUBFLOW would be cleared. 
3475         if ((so
->so_state 
& (SS_NOFDREF 
| SS_CANTRCVMORE
)) == 
3476             (SS_NOFDREF 
| SS_CANTRCVMORE
) && !(so
->so_flags 
& SOF_MP_SUBFLOW
)) { 
3477                 socket_unlock(so
, 1); 
3481         error 
= sblock(&so
->so_rcv
, SBLOCKWAIT(flags
)); 
3483                 socket_unlock(so
, 1); 
3484                 KERNEL_DEBUG(DBG_FNC_SORECEIVE 
| DBG_FUNC_END
, error
, 
3487                         KERNEL_ENERGYTRACE(kEnTrActKernSockRead
, DBG_FUNC_END
, 
3488                             VM_KERNEL_ADDRPERM(so
), 0, 
3489                             (int64_t)(orig_resid 
- uio_resid(uio
))); 
3494         m 
= so
->so_rcv
.sb_mb
; 
3495         if (so_should_wait(so
, uio
, m
, flags
)) { 
3497                  * Panic if we notice inconsistencies in the socket's 
3498                  * receive list; both sb_mb and sb_cc should correctly 
3499                  * reflect the contents of the list, otherwise we may 
3500                  * end up with false positives during select() or poll() 
3501                  * which could put the application in a bad state. 
3503                 SB_MB_CHECK(&so
->so_rcv
); 
3509                         error 
= so
->so_error
; 
3510                         if ((flags 
& MSG_PEEK
) == 0) { 
3515                 if (so
->so_state 
& SS_CANTRCVMORE
) { 
3518                          * Deal with half closed connections 
3520                         if ((so
->so_state 
& SS_ISDISCONNECTED
) == 0 && 
3521                             cfil_sock_data_pending(&so
->so_rcv
) != 0) { 
3523                                     "so %llx ignore SS_CANTRCVMORE", 
3524                                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
)); 
3526 #endif /* CONTENT_FILTER */ 
3533                 for (; m 
!= NULL
; m 
= m
->m_next
) { 
3534                         if (m
->m_type 
== MT_OOBDATA 
|| (m
->m_flags 
& M_EOR
)) { 
3535                                 m 
= so
->so_rcv
.sb_mb
; 
3539                 if ((so
->so_state 
& (SS_ISCONNECTED 
| SS_ISCONNECTING
)) == 0 && 
3540                     (so
->so_proto
->pr_flags 
& PR_CONNREQUIRED
)) { 
3544                 if (uio_resid(uio
) == 0) { 
3548                 if ((so
->so_state 
& SS_NBIO
) || 
3549                     (flags 
& (MSG_DONTWAIT 
| MSG_NBIO
))) { 
3550                         error 
= EWOULDBLOCK
; 
3553                 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive sbwait 1"); 
3554                 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive sbwait 1"); 
3555                 sbunlock(&so
->so_rcv
, TRUE
);    /* keep socket locked */ 
3556 #if EVEN_MORE_LOCKING_DEBUG 
3558                         printf("Waiting for socket data\n"); 
3563                  * Depending on the protocol (e.g. TCP), the following 
3564                  * might cause the socket lock to be dropped and later 
3565                  * be reacquired, and more data could have arrived and 
3566                  * have been appended to the receive socket buffer by 
3567                  * the time it returns.  Therefore, we only sleep in 
3568                  * sbwait() below if and only if the wait-condition is still 
3571                 if ((pr
->pr_flags 
& PR_WANTRCVD
) && so
->so_pcb 
!= NULL
) { 
3572                         (*pr
->pr_usrreqs
->pru_rcvd
)(so
, flags
); 
3576                 if (so_should_wait(so
, uio
, so
->so_rcv
.sb_mb
, flags
)) { 
3577                         error 
= sbwait(&so
->so_rcv
); 
3580 #if EVEN_MORE_LOCKING_DEBUG 
3582                         printf("SORECEIVE - sbwait returned %d\n", error
); 
3585                 if (so
->so_usecount 
< 1) { 
3586                         panic("%s: after 2nd sblock so=%p ref=%d on socket\n", 
3587                             __func__
, so
, so
->so_usecount
); 
3591                         socket_unlock(so
, 1); 
3592                         KERNEL_DEBUG(DBG_FNC_SORECEIVE 
| DBG_FUNC_END
, error
, 
3595                                 KERNEL_ENERGYTRACE(kEnTrActKernSockRead
, DBG_FUNC_END
, 
3596                                     VM_KERNEL_ADDRPERM(so
), 0, 
3597                                     (int64_t)(orig_resid 
- uio_resid(uio
))); 
3604         OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgrcv
); 
3605         SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 1"); 
3606         SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 1"); 
3607         nextrecord 
= m
->m_nextpkt
; 
3609         if ((pr
->pr_flags 
& PR_ADDR
) && m
->m_type 
== MT_SONAME
) { 
3610                 error 
= soreceive_addr(p
, so
, psa
, flags
, &m
, &nextrecord
, 
3612                 if (error 
== ERESTART
) { 
3614                 } else if (error 
!= 0) { 
3621          * Process one or more MT_CONTROL mbufs present before any data mbufs 
3622          * in the first mbuf chain on the socket buffer.  If MSG_PEEK, we 
3623          * just copy the data; if !MSG_PEEK, we call into the protocol to 
3624          * perform externalization. 
3626         if (m 
!= NULL 
&& m
->m_type 
== MT_CONTROL
) { 
3627                 error 
= soreceive_ctl(so
, controlp
, flags
, &m
, &nextrecord
); 
3635                 if (!(flags 
& MSG_PEEK
)) { 
3637                          * We get here because m points to an mbuf following 
3638                          * any MT_SONAME or MT_CONTROL mbufs which have been 
3639                          * processed above.  In any case, m should be pointing 
3640                          * to the head of the mbuf chain, and the nextrecord 
3641                          * should be either NULL or equal to m->m_nextpkt. 
3642                          * See comments above about SB_LOCK. 
3644                         if (m 
!= so
->so_rcv
.sb_mb 
|| 
3645                             m
->m_nextpkt 
!= nextrecord
) { 
3646                                 panic("%s: post-control !sync so=%p m=%p " 
3647                                     "nextrecord=%p\n", __func__
, so
, m
, 
3651                         if (nextrecord 
== NULL
) { 
3652                                 so
->so_rcv
.sb_lastrecord 
= m
; 
3656                 if (type 
== MT_OOBDATA
) { 
3660                 if (!(flags 
& MSG_PEEK
)) { 
3661                         SB_EMPTY_FIXUP(&so
->so_rcv
); 
3664         SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 2"); 
3665         SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 2"); 
3670         if (!(flags 
& MSG_PEEK
) && uio_resid(uio
) > sorecvmincopy
) { 
3677             (uio_resid(uio
) - delayed_copy_len
) > 0 && error 
== 0) { 
3678                 if (m
->m_type 
== MT_OOBDATA
) { 
3679                         if (type 
!= MT_OOBDATA
) { 
3682                 } else if (type 
== MT_OOBDATA
) { 
3686                  * Make sure to allways set MSG_OOB event when getting 
3687                  * out of band data inline. 
3689                 if ((so
->so_options 
& SO_WANTOOBFLAG
) != 0 && 
3690                     (so
->so_options 
& SO_OOBINLINE
) != 0 && 
3691                     (so
->so_state 
& SS_RCVATMARK
) != 0) { 
3694                 so
->so_state 
&= ~SS_RCVATMARK
; 
3695                 len 
= uio_resid(uio
) - delayed_copy_len
; 
3696                 if (so
->so_oobmark 
&& len 
> so
->so_oobmark 
- offset
) { 
3697                         len 
= so
->so_oobmark 
- offset
; 
3699                 if (len 
> m
->m_len 
- moff
) { 
3700                         len 
= m
->m_len 
- moff
; 
3703                  * If mp is set, just pass back the mbufs. 
3704                  * Otherwise copy them out via the uio, then free. 
3705                  * Sockbuf must be consistent here (points to current mbuf, 
3706                  * it points to next record) when we drop priority; 
3707                  * we must note any additions to the sockbuf when we 
3708                  * block interrupts again. 
3711                         SBLASTRECORDCHK(&so
->so_rcv
, "soreceive uiomove"); 
3712                         SBLASTMBUFCHK(&so
->so_rcv
, "soreceive uiomove"); 
3713                         if (can_delay 
&& len 
== m
->m_len
) { 
3715                                  * only delay the copy if we're consuming the 
3716                                  * mbuf and we're NOT in MSG_PEEK mode 
3717                                  * and we have enough data to make it worthwile 
3718                                  * to drop and retake the lock... can_delay 
3719                                  * reflects the state of the 2 latter 
3720                                  * constraints moff should always be zero 
3723                                 delayed_copy_len 
+= len
; 
3725                                 if (delayed_copy_len
) { 
3726                                         error 
= sodelayed_copy(so
, uio
, 
3727                                             &free_list
, &delayed_copy_len
); 
3733                                          * can only get here if MSG_PEEK is not 
3734                                          * set therefore, m should point at the 
3735                                          * head of the rcv queue; if it doesn't, 
3736                                          * it means something drastically 
3737                                          * changed while we were out from behind 
3738                                          * the lock in sodelayed_copy. perhaps 
3739                                          * a RST on the stream. in any event, 
3740                                          * the stream has been interrupted. it's 
3741                                          * probably best just to return whatever 
3742                                          * data we've moved and let the caller 
3745                                         if (m 
!= so
->so_rcv
.sb_mb
) { 
3749                                 socket_unlock(so
, 0); 
3750                                 error 
= uiomove(mtod(m
, caddr_t
) + moff
, 
3759                         uio_setresid(uio
, (uio_resid(uio
) - len
)); 
3761                 if (len 
== m
->m_len 
- moff
) { 
3762                         if (m
->m_flags 
& M_EOR
) { 
3765                         if (flags 
& MSG_PEEK
) { 
3769                                 nextrecord 
= m
->m_nextpkt
; 
3770                                 sbfree(&so
->so_rcv
, m
); 
3771                                 m
->m_nextpkt 
= NULL
; 
3776                                         so
->so_rcv
.sb_mb 
= m 
= m
->m_next
; 
3779                                         if (free_list 
== NULL
) { 
3785                                         so
->so_rcv
.sb_mb 
= m 
= m
->m_next
; 
3789                                         m
->m_nextpkt 
= nextrecord
; 
3790                                         if (nextrecord 
== NULL
) { 
3791                                                 so
->so_rcv
.sb_lastrecord 
= m
; 
3794                                         so
->so_rcv
.sb_mb 
= nextrecord
; 
3795                                         SB_EMPTY_FIXUP(&so
->so_rcv
); 
3797                                 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 3"); 
3798                                 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 3"); 
3801                         if (flags 
& MSG_PEEK
) { 
3807                                         if (flags 
& MSG_DONTWAIT
) { 
3808                                                 copy_flag 
= M_DONTWAIT
; 
3812                                         *mp 
= m_copym(m
, 0, len
, copy_flag
); 
3814                                          * Failed to allocate an mbuf? 
3815                                          * Adjust uio_resid back, it was 
3816                                          * adjusted down by len bytes which 
3817                                          * we didn't copy over. 
3821                                                     (uio_resid(uio
) + len
)); 
3827                                 so
->so_rcv
.sb_cc 
-= len
; 
3830                 if (so
->so_oobmark
) { 
3831                         if ((flags 
& MSG_PEEK
) == 0) { 
3832                                 so
->so_oobmark 
-= len
; 
3833                                 if (so
->so_oobmark 
== 0) { 
3834                                         so
->so_state 
|= SS_RCVATMARK
; 
3839                                 if (offset 
== so
->so_oobmark
) { 
3844                 if (flags 
& MSG_EOR
) { 
3848                  * If the MSG_WAITALL or MSG_WAITSTREAM flag is set 
3849                  * (for non-atomic socket), we must not quit until 
3850                  * "uio->uio_resid == 0" or an error termination. 
3851                  * If a signal/timeout occurs, return with a short 
3852                  * count but without error.  Keep sockbuf locked 
3853                  * against other readers. 
3855                 while (flags 
& (MSG_WAITALL 
| MSG_WAITSTREAM
) && m 
== NULL 
&& 
3856                     (uio_resid(uio
) - delayed_copy_len
) > 0 && 
3857                     !sosendallatonce(so
) && !nextrecord
) { 
3858                         if (so
->so_error 
|| ((so
->so_state 
& SS_CANTRCVMORE
) 
3860                             && cfil_sock_data_pending(&so
->so_rcv
) == 0 
3861 #endif /* CONTENT_FILTER */ 
3867                          * Depending on the protocol (e.g. TCP), the following 
3868                          * might cause the socket lock to be dropped and later 
3869                          * be reacquired, and more data could have arrived and 
3870                          * have been appended to the receive socket buffer by 
3871                          * the time it returns.  Therefore, we only sleep in 
3872                          * sbwait() below if and only if the socket buffer is 
3873                          * empty, in order to avoid a false sleep. 
3875                         if ((pr
->pr_flags 
& PR_WANTRCVD
) && so
->so_pcb 
!= NULL
) { 
3876                                 (*pr
->pr_usrreqs
->pru_rcvd
)(so
, flags
); 
3879                         SBLASTRECORDCHK(&so
->so_rcv
, "soreceive sbwait 2"); 
3880                         SBLASTMBUFCHK(&so
->so_rcv
, "soreceive sbwait 2"); 
3882                         if (so
->so_rcv
.sb_mb 
== NULL 
&& sbwait(&so
->so_rcv
)) { 
3887                          * have to wait until after we get back from the sbwait 
3888                          * to do the copy because we will drop the lock if we 
3889                          * have enough data that has been delayed... by dropping 
3890                          * the lock we open up a window allowing the netisr 
3891                          * thread to process the incoming packets and to change 
3892                          * the state of this socket... we're issuing the sbwait 
3893                          * because the socket is empty and we're expecting the 
3894                          * netisr thread to wake us up when more packets arrive; 
3895                          * if we allow that processing to happen and then sbwait 
3896                          * we could stall forever with packets sitting in the 
3897                          * socket if no further packets arrive from the remote 
3900                          * we want to copy before we've collected all the data 
3901                          * to satisfy this request to allow the copy to overlap 
3902                          * the incoming packet processing on an MP system 
3904                         if (delayed_copy_len 
> sorecvmincopy 
&& 
3905                             (delayed_copy_len 
> (so
->so_rcv
.sb_hiwat 
/ 2))) { 
3906                                 error 
= sodelayed_copy(so
, uio
, 
3907                                     &free_list
, &delayed_copy_len
); 
3913                         m 
= so
->so_rcv
.sb_mb
; 
3915                                 nextrecord 
= m
->m_nextpkt
; 
3917                         SB_MB_CHECK(&so
->so_rcv
); 
3920 #ifdef MORE_LOCKING_DEBUG 
3921         if (so
->so_usecount 
<= 1) { 
3922                 panic("%s: after big while so=%p ref=%d on socket\n", 
3923                     __func__
, so
, so
->so_usecount
); 
3928         if (m 
!= NULL 
&& pr
->pr_flags 
& PR_ATOMIC
) { 
3929                 if (so
->so_options 
& SO_DONTTRUNC
) { 
3930                         flags 
|= MSG_RCVMORE
; 
3933                         if ((flags 
& MSG_PEEK
) == 0) { 
3934                                 (void) sbdroprecord(&so
->so_rcv
); 
3940          * pru_rcvd below (for TCP) may cause more data to be received 
3941          * if the socket lock is dropped prior to sending the ACK; some 
3942          * legacy OpenTransport applications don't handle this well 
3943          * (if it receives less data than requested while MSG_HAVEMORE 
3944          * is set), and so we set the flag now based on what we know 
3945          * prior to calling pru_rcvd. 
3947         if ((so
->so_options 
& SO_WANTMORE
) && so
->so_rcv
.sb_cc 
> 0) { 
3948                 flags 
|= MSG_HAVEMORE
; 
3951         if ((flags 
& MSG_PEEK
) == 0) { 
3953                         so
->so_rcv
.sb_mb 
= nextrecord
; 
3955                          * First part is an inline SB_EMPTY_FIXUP().  Second 
3956                          * part makes sure sb_lastrecord is up-to-date if 
3957                          * there is still data in the socket buffer. 
3959                         if (so
->so_rcv
.sb_mb 
== NULL
) { 
3960                                 so
->so_rcv
.sb_mbtail 
= NULL
; 
3961                                 so
->so_rcv
.sb_lastrecord 
= NULL
; 
3962                         } else if (nextrecord
->m_nextpkt 
== NULL
) { 
3963                                 so
->so_rcv
.sb_lastrecord 
= nextrecord
; 
3965                         SB_MB_CHECK(&so
->so_rcv
); 
3967                 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 4"); 
3968                 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 4"); 
3969                 if (pr
->pr_flags 
& PR_WANTRCVD 
&& so
->so_pcb
) { 
3970                         (*pr
->pr_usrreqs
->pru_rcvd
)(so
, flags
); 
3974         if (delayed_copy_len
) { 
3975                 error 
= sodelayed_copy(so
, uio
, &free_list
, &delayed_copy_len
); 
3980         if (free_list 
!= NULL
) { 
3981                 m_freem_list(free_list
); 
3985         if (orig_resid 
== uio_resid(uio
) && orig_resid 
&& 
3986             (flags 
& MSG_EOR
) == 0 && (so
->so_state 
& SS_CANTRCVMORE
) == 0) { 
3987                 sbunlock(&so
->so_rcv
, TRUE
);    /* keep socket locked */ 
3991         if (flagsp 
!= NULL
) { 
3995 #ifdef MORE_LOCKING_DEBUG 
3996         if (so
->so_usecount 
<= 1) { 
3997                 panic("%s: release so=%p ref=%d on socket\n", __func__
, 
3998                     so
, so
->so_usecount
); 
4002         if (delayed_copy_len
) { 
4003                 error 
= sodelayed_copy(so
, uio
, &free_list
, &delayed_copy_len
); 
4006         if (free_list 
!= NULL
) { 
4007                 m_freem_list(free_list
); 
4010         sbunlock(&so
->so_rcv
, FALSE
);   /* will unlock socket */ 
4013                 KERNEL_ENERGYTRACE(kEnTrActKernSockRead
, DBG_FUNC_END
, 
4014                     VM_KERNEL_ADDRPERM(so
), 
4015                     ((error 
== EWOULDBLOCK
) ? kEnTrFlagNoWork 
: 0), 
4016                     (int64_t)(orig_resid 
- uio_resid(uio
))); 
4018         KERNEL_DEBUG(DBG_FNC_SORECEIVE 
| DBG_FUNC_END
, so
, uio_resid(uio
), 
4019             so
->so_rcv
.sb_cc
, 0, error
); 
4025  * Returns:     0                       Success 
4029 sodelayed_copy(struct socket 
*so
, struct uio 
*uio
, struct mbuf 
**free_list
, 
4030     user_ssize_t 
*resid
) 
4037         socket_unlock(so
, 0); 
4039         while (m 
!= NULL 
&& error 
== 0) { 
4040                 error 
= uiomove(mtod(m
, caddr_t
), (int)m
->m_len
, uio
); 
4043         m_freem_list(*free_list
); 
4054 sodelayed_copy_list(struct socket 
*so
, struct recv_msg_elem 
*msgarray
, 
4055     u_int uiocnt
, struct mbuf 
**free_list
, user_ssize_t 
*resid
) 
4059         struct mbuf 
*ml
, *m
; 
4063         for (ml 
= *free_list
, i 
= 0; ml 
!= NULL 
&& i 
< uiocnt
; 
4064             ml 
= ml
->m_nextpkt
, i
++) { 
4065                 auio 
= msgarray
[i
].uio
; 
4066                 for (m 
= ml
; m 
!= NULL
; m 
= m
->m_next
) { 
4067                         error 
= uiomove(mtod(m
, caddr_t
), m
->m_len
, auio
); 
4074         m_freem_list(*free_list
); 
4083 soreceive_list(struct socket 
*so
, struct recv_msg_elem 
*msgarray
, u_int uiocnt
, 
4087         struct mbuf 
*nextrecord
; 
4088         struct mbuf 
*ml 
= NULL
, *free_list 
= NULL
, *free_tail 
= NULL
; 
4090         user_ssize_t len
, pktlen
, delayed_copy_len 
= 0; 
4091         struct protosw 
*pr 
= so
->so_proto
; 
4093         struct proc 
*p 
= current_proc(); 
4094         struct uio 
*auio 
= NULL
; 
4097         struct sockaddr 
**psa 
= NULL
; 
4098         struct mbuf 
**controlp 
= NULL
; 
4101         struct mbuf 
*free_others 
= NULL
; 
4103         KERNEL_DEBUG(DBG_FNC_SORECEIVE_LIST 
| DBG_FUNC_START
, 
4105             so
->so_rcv
.sb_cc
, so
->so_rcv
.sb_lowat
, so
->so_rcv
.sb_hiwat
); 
4109          * - Only supports don't wait flags 
4110          * - Only support datagram sockets (could be extended to raw) 
4112          * - Protocol must support packet chains 
4113          * - The uio array is NULL (should we panic?) 
4115         if (flagsp 
!= NULL
) { 
4120         if (flags 
& ~(MSG_PEEK 
| MSG_WAITALL 
| MSG_DONTWAIT 
| MSG_NEEDSA 
| 
4122                 printf("%s invalid flags 0x%x\n", __func__
, flags
); 
4126         if (so
->so_type 
!= SOCK_DGRAM
) { 
4130         if (sosendallatonce(so
) == 0) { 
4134         if (so
->so_proto
->pr_usrreqs
->pru_send_list 
== NULL
) { 
4135                 error 
= EPROTONOSUPPORT
; 
4138         if (msgarray 
== NULL
) { 
4139                 printf("%s uioarray is NULL\n", __func__
); 
4144                 printf("%s uiocnt is 0\n", __func__
); 
4149          * Sanity check on the length passed by caller as we are making 'int' 
4152         resid 
= recv_msg_array_resid(msgarray
, uiocnt
); 
4153         if (resid 
< 0 || resid 
> INT_MAX
) { 
4158         if (!(flags 
& MSG_PEEK
) && sorecvmincopy 
> 0) { 
4165         so_update_last_owner_locked(so
, p
); 
4166         so_update_policy(so
); 
4169         so_update_necp_policy(so
, NULL
, NULL
); 
4173          * If a recv attempt is made on a previously-accepted socket 
4174          * that has been marked as inactive (disconnected), reject 
4177         if (so
->so_flags 
& SOF_DEFUNCT
) { 
4178                 struct sockbuf 
*sb 
= &so
->so_rcv
; 
4181                 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] (%d)\n", 
4182                     __func__
, proc_pid(p
), proc_best_name(p
), 
4183                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
4184                     SOCK_DOM(so
), SOCK_TYPE(so
), error
); 
4186                  * This socket should have been disconnected and flushed 
4187                  * prior to being returned from sodefunct(); there should 
4188                  * be no data on its receive list, so panic otherwise. 
4190                 if (so
->so_state 
& SS_DEFUNCT
) { 
4191                         sb_empty_assert(sb
, __func__
); 
4198          * The uio may be empty 
4200         if (npkts 
>= uiocnt
) { 
4206          * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE) 
4207          * and if so just return to the caller.  This could happen when 
4208          * soreceive() is called by a socket upcall function during the 
4209          * time the socket is freed.  The socket buffer would have been 
4210          * locked across the upcall, therefore we cannot put this thread 
4211          * to sleep (else we will deadlock) or return EWOULDBLOCK (else 
4212          * we may livelock), because the lock on the socket buffer will 
4213          * only be released when the upcall routine returns to its caller. 
4214          * Because the socket has been officially closed, there can be 
4215          * no further read on it. 
4217         if ((so
->so_state 
& (SS_NOFDREF 
| SS_CANTRCVMORE
)) == 
4218             (SS_NOFDREF 
| SS_CANTRCVMORE
)) { 
4223         error 
= sblock(&so
->so_rcv
, SBLOCKWAIT(flags
)); 
4229         m 
= so
->so_rcv
.sb_mb
; 
4231          * Block awaiting more datagram if needed 
4233         if (m 
== NULL 
|| (((flags 
& MSG_DONTWAIT
) == 0 && 
4234             (so
->so_rcv
.sb_cc 
< so
->so_rcv
.sb_lowat 
|| 
4235             ((flags 
& MSG_WAITALL
) && npkts 
< uiocnt
))))) { 
4237                  * Panic if we notice inconsistencies in the socket's 
4238                  * receive list; both sb_mb and sb_cc should correctly 
4239                  * reflect the contents of the list, otherwise we may 
4240                  * end up with false positives during select() or poll() 
4241                  * which could put the application in a bad state. 
4243                 SB_MB_CHECK(&so
->so_rcv
); 
4246                         error 
= so
->so_error
; 
4247                         if ((flags 
& MSG_PEEK
) == 0) { 
4252                 if (so
->so_state 
& SS_CANTRCVMORE
) { 
4255                 if ((so
->so_state 
& (SS_ISCONNECTED 
| SS_ISCONNECTING
)) == 0 && 
4256                     (so
->so_proto
->pr_flags 
& PR_CONNREQUIRED
)) { 
4260                 if ((so
->so_state 
& SS_NBIO
) || 
4261                     (flags 
& (MSG_DONTWAIT 
| MSG_NBIO
))) { 
4262                         error 
= EWOULDBLOCK
; 
4266                  * Do not block if we got some data 
4268                 if (free_list 
!= NULL
) { 
4273                 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive sbwait 1"); 
4274                 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive sbwait 1"); 
4276                 sbunlock(&so
->so_rcv
, TRUE
);    /* keep socket locked */ 
4279                 error 
= sbwait(&so
->so_rcv
); 
4286         OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgrcv
); 
4287         SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 1"); 
4288         SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 1"); 
4291          * Consume the current uio index as we have a datagram 
4293         auio 
= msgarray
[npkts
].uio
; 
4294         resid 
= uio_resid(auio
); 
4295         msgarray
[npkts
].which 
|= SOCK_MSG_DATA
; 
4296         psa 
= (msgarray
[npkts
].which 
& SOCK_MSG_SA
) ? 
4297             &msgarray
[npkts
].psa 
: NULL
; 
4298         controlp 
= (msgarray
[npkts
].which 
& SOCK_MSG_CONTROL
) ? 
4299             &msgarray
[npkts
].controlp 
: NULL
; 
4301         nextrecord 
= m
->m_nextpkt
; 
4303         if ((pr
->pr_flags 
& PR_ADDR
) && m
->m_type 
== MT_SONAME
) { 
4304                 error 
= soreceive_addr(p
, so
, psa
, flags
, &m
, &nextrecord
, 1); 
4305                 if (error 
== ERESTART
) { 
4307                 } else if (error 
!= 0) { 
4312         if (m 
!= NULL 
&& m
->m_type 
== MT_CONTROL
) { 
4313                 error 
= soreceive_ctl(so
, controlp
, flags
, &m
, &nextrecord
); 
4319         if (m
->m_pkthdr
.len 
== 0) { 
4320                 printf("%s:%d so %llx pkt %llx type %u pktlen null\n", 
4322                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
4323                     (uint64_t)DEBUG_KERNEL_ADDRPERM(m
), 
4328          * Loop to copy the mbufs of the current record 
4329          * Support zero length packets 
4333         while (m 
!= NULL 
&& (len 
= resid 
- pktlen
) >= 0 && error 
== 0) { 
4334                 if (m
->m_len 
== 0) { 
4335                         panic("%p m_len zero", m
); 
4337                 if (m
->m_type 
== 0) { 
4338                         panic("%p m_type zero", m
); 
4341                  * Clip to the residual length 
4343                 if (len 
> m
->m_len
) { 
4348                  * Copy the mbufs via the uio or delay the copy 
4349                  * Sockbuf must be consistent here (points to current mbuf, 
4350                  * it points to next record) when we drop priority; 
4351                  * we must note any additions to the sockbuf when we 
4352                  * block interrupts again. 
4354                 if (len 
> 0 && can_delay 
== 0) { 
4355                         socket_unlock(so
, 0); 
4356                         error 
= uiomove(mtod(m
, caddr_t
), (int)len
, auio
); 
4362                         delayed_copy_len 
+= len
; 
4365                 if (len 
== m
->m_len
) { 
4367                          * m was entirely copied 
4369                         sbfree(&so
->so_rcv
, m
); 
4370                         nextrecord 
= m
->m_nextpkt
; 
4371                         m
->m_nextpkt 
= NULL
; 
4374                          * Set the first packet to the head of the free list 
4376                         if (free_list 
== NULL
) { 
4380                          * Link current packet to tail of free list 
4383                                 if (free_tail 
!= NULL
) { 
4384                                         free_tail
->m_nextpkt 
= m
; 
4389                          * Link current mbuf to last mbuf of current packet 
4397                          * Move next buf to head of socket buffer 
4399                         so
->so_rcv
.sb_mb 
= m 
= ml
->m_next
; 
4403                                 m
->m_nextpkt 
= nextrecord
; 
4404                                 if (nextrecord 
== NULL
) { 
4405                                         so
->so_rcv
.sb_lastrecord 
= m
; 
4408                                 so
->so_rcv
.sb_mb 
= nextrecord
; 
4409                                 SB_EMPTY_FIXUP(&so
->so_rcv
); 
4411                         SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 3"); 
4412                         SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 3"); 
4415                          * Stop the loop on partial copy 
4420 #ifdef MORE_LOCKING_DEBUG 
4421         if (so
->so_usecount 
<= 1) { 
4422                 panic("%s: after big while so=%llx ref=%d on socket\n", 
4424                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), so
->so_usecount
); 
4429          * Tell the caller we made a partial copy 
4432                 if (so
->so_options 
& SO_DONTTRUNC
) { 
4434                          * Copyout first the freelist then the partial mbuf 
4436                         socket_unlock(so
, 0); 
4437                         if (delayed_copy_len
) { 
4438                                 error 
= sodelayed_copy_list(so
, msgarray
, 
4439                                     uiocnt
, &free_list
, &delayed_copy_len
); 
4443                                 error 
= uiomove(mtod(m
, caddr_t
), (int)len
, 
4453                         so
->so_rcv
.sb_cc 
-= len
; 
4454                         flags 
|= MSG_RCVMORE
; 
4456                         (void) sbdroprecord(&so
->so_rcv
); 
4457                         nextrecord 
= so
->so_rcv
.sb_mb
; 
4464                 so
->so_rcv
.sb_mb 
= nextrecord
; 
4466                  * First part is an inline SB_EMPTY_FIXUP().  Second 
4467                  * part makes sure sb_lastrecord is up-to-date if 
4468                  * there is still data in the socket buffer. 
4470                 if (so
->so_rcv
.sb_mb 
== NULL
) { 
4471                         so
->so_rcv
.sb_mbtail 
= NULL
; 
4472                         so
->so_rcv
.sb_lastrecord 
= NULL
; 
4473                 } else if (nextrecord
->m_nextpkt 
== NULL
) { 
4474                         so
->so_rcv
.sb_lastrecord 
= nextrecord
; 
4476                 SB_MB_CHECK(&so
->so_rcv
); 
4478         SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 4"); 
4479         SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 4"); 
4482          * We can continue to the next packet as long as: 
4483          * - We haven't exhausted the uio array 
4484          * - There was no error 
4485          * - A packet was not truncated 
4486          * - We can still receive more data 
4488         if (npkts 
< uiocnt 
&& error 
== 0 && 
4489             (flags 
& (MSG_RCVMORE 
| MSG_TRUNC
)) == 0 && 
4490             (so
->so_state 
& SS_CANTRCVMORE
) == 0) { 
4491                 sbunlock(&so
->so_rcv
, TRUE
);    /* keep socket locked */ 
4496         if (flagsp 
!= NULL
) { 
4502          * pru_rcvd may cause more data to be received if the socket lock 
4503          * is dropped so we set MSG_HAVEMORE now based on what we know. 
4504          * That way the caller won't be surprised if it receives less data 
4507         if ((so
->so_options 
& SO_WANTMORE
) && so
->so_rcv
.sb_cc 
> 0) { 
4508                 flags 
|= MSG_HAVEMORE
; 
4511         if (pr
->pr_flags 
& PR_WANTRCVD 
&& so
->so_pcb
) { 
4512                 (*pr
->pr_usrreqs
->pru_rcvd
)(so
, flags
); 
4516                 sbunlock(&so
->so_rcv
, FALSE
);   /* will unlock socket */ 
4518                 socket_unlock(so
, 1); 
4521         if (delayed_copy_len
) { 
4522                 error 
= sodelayed_copy_list(so
, msgarray
, uiocnt
, 
4523                     &free_list
, &delayed_copy_len
); 
4527          * Amortize the cost of freeing the mbufs 
4529         if (free_list 
!= NULL
) { 
4530                 m_freem_list(free_list
); 
4532         if (free_others 
!= NULL
) { 
4533                 m_freem_list(free_others
); 
4536         KERNEL_DEBUG(DBG_FNC_SORECEIVE_LIST 
| DBG_FUNC_END
, error
, 
4542 so_statistics_event_to_nstat_event(int64_t *input_options
, 
4543     uint64_t *nstat_event
) 
4546         switch (*input_options
) { 
4547         case SO_STATISTICS_EVENT_ENTER_CELLFALLBACK
: 
4548                 *nstat_event 
= NSTAT_EVENT_SRC_ENTER_CELLFALLBACK
; 
4550         case SO_STATISTICS_EVENT_EXIT_CELLFALLBACK
: 
4551                 *nstat_event 
= NSTAT_EVENT_SRC_EXIT_CELLFALLBACK
; 
4553 #if (DEBUG || DEVELOPMENT) 
4554         case SO_STATISTICS_EVENT_RESERVED_1
: 
4555                 *nstat_event 
= NSTAT_EVENT_SRC_RESERVED_1
; 
4557         case SO_STATISTICS_EVENT_RESERVED_2
: 
4558                 *nstat_event 
= NSTAT_EVENT_SRC_RESERVED_2
; 
4560 #endif /* (DEBUG || DEVELOPMENT) */ 
4569  * Returns:     0                       Success 
4572  *      <pru_shutdown>:EINVAL 
4573  *      <pru_shutdown>:EADDRNOTAVAIL[TCP] 
4574  *      <pru_shutdown>:ENOBUFS[TCP] 
4575  *      <pru_shutdown>:EMSGSIZE[TCP] 
4576  *      <pru_shutdown>:EHOSTUNREACH[TCP] 
4577  *      <pru_shutdown>:ENETUNREACH[TCP] 
4578  *      <pru_shutdown>:ENETDOWN[TCP] 
4579  *      <pru_shutdown>:ENOMEM[TCP] 
4580  *      <pru_shutdown>:EACCES[TCP] 
4581  *      <pru_shutdown>:EMSGSIZE[TCP] 
4582  *      <pru_shutdown>:ENOBUFS[TCP] 
4583  *      <pru_shutdown>:???[TCP]         [ignorable: mostly IPSEC/firewall/DLIL] 
4584  *      <pru_shutdown>:???              [other protocol families] 
4587 soshutdown(struct socket 
*so
, int how
) 
4591         KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN 
| DBG_FUNC_START
, how
, 0, 0, 0, 0); 
4599                     (SS_ISCONNECTED 
| SS_ISCONNECTING 
| SS_ISDISCONNECTING
)) == 0) { 
4602                         error 
= soshutdownlock(so
, how
); 
4604                 socket_unlock(so
, 1); 
4611         KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN 
| DBG_FUNC_END
, how
, error
, 0, 0, 0); 
4617 soshutdownlock_final(struct socket 
*so
, int how
) 
4619         struct protosw 
*pr 
= so
->so_proto
; 
4622         sflt_notify(so
, sock_evt_shutdown
, &how
); 
4624         if (how 
!= SHUT_WR
) { 
4625                 if ((so
->so_state 
& SS_CANTRCVMORE
) != 0) { 
4626                         /* read already shut down */ 
4632         if (how 
!= SHUT_RD
) { 
4633                 if ((so
->so_state 
& SS_CANTSENDMORE
) != 0) { 
4634                         /* write already shut down */ 
4638                 error 
= (*pr
->pr_usrreqs
->pru_shutdown
)(so
); 
4641         KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN
, how
, 1, 0, 0, 0); 
4646 soshutdownlock(struct socket 
*so
, int how
) 
4652          * A content filter may delay the actual shutdown until it 
4653          * has processed the pending data 
4655         if (so
->so_flags 
& SOF_CONTENT_FILTER
) { 
4656                 error 
= cfil_sock_shutdown(so
, &how
); 
4657                 if (error 
== EJUSTRETURN
) { 
4660                 } else if (error 
!= 0) { 
4664 #endif /* CONTENT_FILTER */ 
4666         error 
= soshutdownlock_final(so
, how
); 
4673 sowflush(struct socket 
*so
) 
4675         struct sockbuf 
*sb 
= &so
->so_snd
; 
4678          * Obtain lock on the socket buffer (SB_LOCK).  This is required 
4679          * to prevent the socket buffer from being unexpectedly altered 
4680          * while it is used by another thread in socket send/receive. 
4682          * sblock() must not fail here, hence the assertion. 
4684         (void) sblock(sb
, SBL_WAIT 
| SBL_NOINTR 
| SBL_IGNDEFUNCT
); 
4685         VERIFY(sb
->sb_flags 
& SB_LOCK
); 
4687         sb
->sb_flags            
&= ~(SB_SEL 
| SB_UPCALL
); 
4688         sb
->sb_flags            
|= SB_DROP
; 
4689         sb
->sb_upcall           
= NULL
; 
4690         sb
->sb_upcallarg        
= NULL
; 
4692         sbunlock(sb
, TRUE
);     /* keep socket locked */ 
4694         selthreadclear(&sb
->sb_sel
); 
4699 sorflush(struct socket 
*so
) 
4701         struct sockbuf 
*sb 
= &so
->so_rcv
; 
4702         struct protosw 
*pr 
= so
->so_proto
; 
4705         lck_mtx_t 
*mutex_held
; 
4707          * XXX: This code is currently commented out, because we may get here 
4708          * as part of sofreelastref(), and at that time, pr_getlock() may no 
4709          * longer be able to return us the lock; this will be fixed in future. 
4711         if (so
->so_proto
->pr_getlock 
!= NULL
) { 
4712                 mutex_held 
= (*so
->so_proto
->pr_getlock
)(so
, 0); 
4714                 mutex_held 
= so
->so_proto
->pr_domain
->dom_mtx
; 
4717         LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
); 
4720         sflt_notify(so
, sock_evt_flush_read
, NULL
); 
4725          * Obtain lock on the socket buffer (SB_LOCK).  This is required 
4726          * to prevent the socket buffer from being unexpectedly altered 
4727          * while it is used by another thread in socket send/receive. 
4729          * sblock() must not fail here, hence the assertion. 
4731         (void) sblock(sb
, SBL_WAIT 
| SBL_NOINTR 
| SBL_IGNDEFUNCT
); 
4732         VERIFY(sb
->sb_flags 
& SB_LOCK
); 
4735          * Copy only the relevant fields from "sb" to "asb" which we 
4736          * need for sbrelease() to function.  In particular, skip 
4737          * sb_sel as it contains the wait queue linkage, which would 
4738          * wreak havoc if we were to issue selthreadclear() on "asb". 
4739          * Make sure to not carry over SB_LOCK in "asb", as we need 
4740          * to acquire it later as part of sbrelease(). 
4742         bzero(&asb
, sizeof(asb
)); 
4743         asb
.sb_cc               
= sb
->sb_cc
; 
4744         asb
.sb_hiwat            
= sb
->sb_hiwat
; 
4745         asb
.sb_mbcnt            
= sb
->sb_mbcnt
; 
4746         asb
.sb_mbmax            
= sb
->sb_mbmax
; 
4747         asb
.sb_ctl              
= sb
->sb_ctl
; 
4748         asb
.sb_lowat            
= sb
->sb_lowat
; 
4749         asb
.sb_mb               
= sb
->sb_mb
; 
4750         asb
.sb_mbtail           
= sb
->sb_mbtail
; 
4751         asb
.sb_lastrecord       
= sb
->sb_lastrecord
; 
4752         asb
.sb_so               
= sb
->sb_so
; 
4753         asb
.sb_flags            
= sb
->sb_flags
; 
4754         asb
.sb_flags            
&= ~(SB_LOCK 
| SB_SEL 
| SB_KNOTE 
| SB_UPCALL
); 
4755         asb
.sb_flags            
|= SB_DROP
; 
4758          * Ideally we'd bzero() these and preserve the ones we need; 
4759          * but to do that we'd need to shuffle things around in the 
4760          * sockbuf, and we can't do it now because there are KEXTS 
4761          * that are directly referring to the socket structure. 
4763          * Setting SB_DROP acts as a barrier to prevent further appends. 
4764          * Clearing SB_SEL is done for selthreadclear() below. 
4773         sb
->sb_mbtail           
= NULL
; 
4774         sb
->sb_lastrecord       
= NULL
; 
4775         sb
->sb_timeo
.tv_sec     
= 0; 
4776         sb
->sb_timeo
.tv_usec    
= 0; 
4777         sb
->sb_upcall           
= NULL
; 
4778         sb
->sb_upcallarg        
= NULL
; 
4779         sb
->sb_flags            
&= ~(SB_SEL 
| SB_UPCALL
); 
4780         sb
->sb_flags            
|= SB_DROP
; 
4782         sbunlock(sb
, TRUE
);     /* keep socket locked */ 
4785          * Note that selthreadclear() is called on the original "sb" and 
4786          * not the local "asb" because of the way wait queue linkage is 
4787          * implemented.  Given that selwakeup() may be triggered, SB_SEL 
4788          * should no longer be set (cleared above.) 
4790         selthreadclear(&sb
->sb_sel
); 
4792         if ((pr
->pr_flags 
& PR_RIGHTS
) && pr
->pr_domain
->dom_dispose
) { 
4793                 (*pr
->pr_domain
->dom_dispose
)(asb
.sb_mb
); 
4800  * Perhaps this routine, and sooptcopyout(), below, ought to come in 
4801  * an additional variant to handle the case where the option value needs 
4802  * to be some kind of integer, but not a specific size. 
4803  * In addition to their use here, these functions are also called by the 
4804  * protocol-level pr_ctloutput() routines. 
4806  * Returns:     0                       Success 
4811 sooptcopyin(struct sockopt 
*sopt
, void *buf
, size_t len
, size_t minlen
) 
4816          * If the user gives us more than we wanted, we ignore it, 
4817          * but if we don't get the minimum length the caller 
4818          * wants, we return EINVAL.  On success, sopt->sopt_valsize 
4819          * is set to however much we actually retrieved. 
4821         if ((valsize 
= sopt
->sopt_valsize
) < minlen
) { 
4824         if (valsize 
> len
) { 
4825                 sopt
->sopt_valsize 
= valsize 
= len
; 
4828         if (sopt
->sopt_p 
!= kernproc
) { 
4829                 return copyin(sopt
->sopt_val
, buf
, valsize
); 
4832         bcopy(CAST_DOWN(caddr_t
, sopt
->sopt_val
), buf
, valsize
); 
4837  * sooptcopyin_timeval 
4838  *   Copy in a timeval value into tv_p, and take into account whether the 
4839  *   the calling process is 64-bit or 32-bit.  Moved the sanity checking 
4840  *   code here so that we can verify the 64-bit tv_sec value before we lose 
4841  *   the top 32-bits assigning tv64.tv_sec to tv_p->tv_sec. 
4844 sooptcopyin_timeval(struct sockopt 
*sopt
, struct timeval 
*tv_p
) 
4848         if (proc_is64bit(sopt
->sopt_p
)) { 
4849                 struct user64_timeval   tv64
; 
4851                 if (sopt
->sopt_valsize 
< sizeof(tv64
)) { 
4855                 sopt
->sopt_valsize 
= sizeof(tv64
); 
4856                 if (sopt
->sopt_p 
!= kernproc
) { 
4857                         error 
= copyin(sopt
->sopt_val
, &tv64
, sizeof(tv64
)); 
4862                         bcopy(CAST_DOWN(caddr_t
, sopt
->sopt_val
), &tv64
, 
4865                 if (tv64
.tv_sec 
< 0 || tv64
.tv_sec 
> LONG_MAX 
|| 
4866                     tv64
.tv_usec 
< 0 || tv64
.tv_usec 
>= 1000000) { 
4870                 tv_p
->tv_sec 
= tv64
.tv_sec
; 
4871                 tv_p
->tv_usec 
= tv64
.tv_usec
; 
4873                 struct user32_timeval   tv32
; 
4875                 if (sopt
->sopt_valsize 
< sizeof(tv32
)) { 
4879                 sopt
->sopt_valsize 
= sizeof(tv32
); 
4880                 if (sopt
->sopt_p 
!= kernproc
) { 
4881                         error 
= copyin(sopt
->sopt_val
, &tv32
, sizeof(tv32
)); 
4886                         bcopy(CAST_DOWN(caddr_t
, sopt
->sopt_val
), &tv32
, 
4891                  * K64todo "comparison is always false due to 
4892                  * limited range of data type" 
4894                 if (tv32
.tv_sec 
< 0 || tv32
.tv_sec 
> LONG_MAX 
|| 
4895                     tv32
.tv_usec 
< 0 || tv32
.tv_usec 
>= 1000000) { 
4899                 tv_p
->tv_sec 
= tv32
.tv_sec
; 
4900                 tv_p
->tv_usec 
= tv32
.tv_usec
; 
4906 soopt_cred_check(struct socket 
*so
, int priv
, boolean_t allow_root
, 
4907     boolean_t ignore_delegate
) 
4909         kauth_cred_t cred 
=  NULL
; 
4910         proc_t ep 
= PROC_NULL
; 
4914         if (ignore_delegate 
== false && so
->so_flags 
& SOF_DELEGATED
) { 
4915                 ep 
= proc_find(so
->e_pid
); 
4917                         cred 
= kauth_cred_proc_ref(ep
); 
4921         uid 
= kauth_cred_getuid(cred 
? cred 
: so
->so_cred
); 
4923         /* uid is 0 for root */ 
4924         if (uid 
!= 0 || !allow_root
) { 
4925                 error 
= priv_check_cred(cred 
? cred 
: so
->so_cred
, priv
, 0); 
4928                 kauth_cred_unref(&cred
); 
4930         if (ep 
!= PROC_NULL
) { 
4938  * Returns:     0                       Success 
4943  *      sooptcopyin:EINVAL 
4944  *      sooptcopyin:EFAULT 
4945  *      sooptcopyin_timeval:EINVAL 
4946  *      sooptcopyin_timeval:EFAULT 
4947  *      sooptcopyin_timeval:EDOM 
4948  *      <pr_ctloutput>:EOPNOTSUPP[AF_UNIX] 
4949  *      <pr_ctloutput>:???w 
4950  *      sflt_attach_private:???         [whatever a filter author chooses] 
4951  *      <sf_setoption>:???              [whatever a filter author chooses] 
4953  * Notes:       Other <pru_listen> returns depend on the protocol family; all 
4954  *              <sf_listen> returns depend on what the filter author causes 
4955  *              their filter to return. 
4958 sosetoptlock(struct socket 
*so
, struct sockopt 
*sopt
, int dolock
) 
4961         int64_t long_optval
; 
4965         if (sopt
->sopt_dir 
!= SOPT_SET
) { 
4966                 sopt
->sopt_dir 
= SOPT_SET
; 
4973         if ((so
->so_state 
& (SS_CANTRCVMORE 
| SS_CANTSENDMORE
)) == 
4974             (SS_CANTRCVMORE 
| SS_CANTSENDMORE
) && 
4975             (so
->so_flags 
& SOF_NPX_SETOPTSHUT
) == 0) { 
4976                 /* the socket has been shutdown, no more sockopt's */ 
4981         error 
= sflt_setsockopt(so
, sopt
); 
4983                 if (error 
== EJUSTRETURN
) { 
4989         if (sopt
->sopt_level 
!= SOL_SOCKET
) { 
4990                 if (so
->so_proto 
!= NULL 
&& 
4991                     so
->so_proto
->pr_ctloutput 
!= NULL
) { 
4992                         error 
= (*so
->so_proto
->pr_ctloutput
)(so
, sopt
); 
4995                 error 
= ENOPROTOOPT
; 
4998                  * Allow socket-level (SOL_SOCKET) options to be filtered by 
4999                  * the protocol layer, if needed.  A zero value returned from 
5000                  * the handler means use default socket-level processing as 
5001                  * done by the rest of this routine.  Otherwise, any other 
5002                  * return value indicates that the option is unsupported. 
5004                 if (so
->so_proto 
!= NULL 
&& (error 
= so
->so_proto
->pr_usrreqs
-> 
5005                     pru_socheckopt(so
, sopt
)) != 0) { 
5010                 switch (sopt
->sopt_name
) { 
5013                         error 
= sooptcopyin(sopt
, &l
, sizeof(l
), sizeof(l
)); 
5018                         so
->so_linger 
= (sopt
->sopt_name 
== SO_LINGER
) ? 
5019                             l
.l_linger 
: l
.l_linger 
* hz
; 
5020                         if (l
.l_onoff 
!= 0) { 
5021                                 so
->so_options 
|= SO_LINGER
; 
5023                                 so
->so_options 
&= ~SO_LINGER
; 
5030                 case SO_USELOOPBACK
: 
5036                 case SO_TIMESTAMP_MONOTONIC
: 
5037                 case SO_TIMESTAMP_CONTINUOUS
: 
5040                 case SO_WANTOOBFLAG
: 
5041                 case SO_NOWAKEFROMSLEEP
: 
5042                 case SO_NOAPNFALLBK
: 
5043                         error 
= sooptcopyin(sopt
, &optval
, sizeof(optval
), 
5049                                 so
->so_options 
|= sopt
->sopt_name
; 
5051                                 so
->so_options 
&= ~sopt
->sopt_name
; 
5059                         error 
= sooptcopyin(sopt
, &optval
, sizeof(optval
), 
5066                          * Values < 1 make no sense for any of these 
5067                          * options, so disallow them. 
5074                         switch (sopt
->sopt_name
) { 
5077                                 struct sockbuf 
*sb 
= 
5078                                     (sopt
->sopt_name 
== SO_SNDBUF
) ? 
5079                                     &so
->so_snd 
: &so
->so_rcv
; 
5080                                 if (sbreserve(sb
, (u_int32_t
)optval
) == 0) { 
5084                                 sb
->sb_flags 
|= SB_USRSIZE
; 
5085                                 sb
->sb_flags 
&= ~SB_AUTOSIZE
; 
5086                                 sb
->sb_idealsize 
= (u_int32_t
)optval
; 
5090                          * Make sure the low-water is never greater than 
5094                                 int space 
= sbspace(&so
->so_snd
); 
5095                                 u_int32_t hiwat 
= so
->so_snd
.sb_hiwat
; 
5097                                 if (so
->so_snd
.sb_flags 
& SB_UNIX
) { 
5099                                             (struct unpcb 
*)(so
->so_pcb
); 
5101                                             unp
->unp_conn 
!= NULL
) { 
5102                                                 hiwat 
+= unp
->unp_conn
->unp_cc
; 
5106                                 so
->so_snd
.sb_lowat 
= 
5110                                 if (space 
>= so
->so_snd
.sb_lowat
) { 
5117                                 so
->so_rcv
.sb_lowat 
= 
5118                                     (optval 
> so
->so_rcv
.sb_hiwat
) ? 
5119                                     so
->so_rcv
.sb_hiwat 
: optval
; 
5120                                 data_len 
= so
->so_rcv
.sb_cc
 
5121                                     - so
->so_rcv
.sb_ctl
; 
5122                                 if (data_len 
>= so
->so_rcv
.sb_lowat
) { 
5132                         error 
= sooptcopyin_timeval(sopt
, &tv
); 
5137                         switch (sopt
->sopt_name
) { 
5139                                 so
->so_snd
.sb_timeo 
= tv
; 
5142                                 so
->so_rcv
.sb_timeo 
= tv
; 
5150                         error 
= sooptcopyin(sopt
, &nke
, sizeof(nke
), 
5156                         error 
= sflt_attach_internal(so
, nke
.nke_handle
); 
5161                         error 
= sooptcopyin(sopt
, &optval
, sizeof(optval
), 
5167                                 so
->so_flags 
|= SOF_NOSIGPIPE
; 
5169                                 so
->so_flags 
&= ~SOF_NOSIGPIPE
; 
5174                         error 
= sooptcopyin(sopt
, &optval
, sizeof(optval
), 
5180                                 so
->so_flags 
|= SOF_NOADDRAVAIL
; 
5182                                 so
->so_flags 
&= ~SOF_NOADDRAVAIL
; 
5186                 case SO_REUSESHAREUID
: 
5187                         error 
= sooptcopyin(sopt
, &optval
, sizeof(optval
), 
5193                                 so
->so_flags 
|= SOF_REUSESHAREUID
; 
5195                                 so
->so_flags 
&= ~SOF_REUSESHAREUID
; 
5199                 case SO_NOTIFYCONFLICT
: 
5200                         if (kauth_cred_issuser(kauth_cred_get()) == 0) { 
5204                         error 
= sooptcopyin(sopt
, &optval
, sizeof(optval
), 
5210                                 so
->so_flags 
|= SOF_NOTIFYCONFLICT
; 
5212                                 so
->so_flags 
&= ~SOF_NOTIFYCONFLICT
; 
5216                 case SO_RESTRICTIONS
: 
5217                         error 
= sooptcopyin(sopt
, &optval
, sizeof(optval
), 
5223                         error 
= so_set_restrictions(so
, optval
); 
5226                 case SO_AWDL_UNRESTRICTED
: 
5227                         if (SOCK_DOM(so
) != PF_INET 
&& 
5228                             SOCK_DOM(so
) != PF_INET6
) { 
5232                         error 
= sooptcopyin(sopt
, &optval
, sizeof(optval
), 
5238                                 error 
= soopt_cred_check(so
, 
5239                                     PRIV_NET_RESTRICTED_AWDL
, false, false); 
5241                                         inp_set_awdl_unrestricted( 
5245                                 inp_clear_awdl_unrestricted(sotoinpcb(so
)); 
5248                 case SO_INTCOPROC_ALLOW
: 
5249                         if (SOCK_DOM(so
) != PF_INET6
) { 
5253                         error 
= sooptcopyin(sopt
, &optval
, sizeof(optval
), 
5259                             inp_get_intcoproc_allowed(sotoinpcb(so
)) == FALSE
) { 
5260                                 error 
= soopt_cred_check(so
, 
5261                                     PRIV_NET_RESTRICTED_INTCOPROC
, false, false); 
5263                                         inp_set_intcoproc_allowed( 
5266                         } else if (optval 
== 0) { 
5267                                 inp_clear_intcoproc_allowed(sotoinpcb(so
)); 
5275                 case SO_UPCALLCLOSEWAIT
: 
5276                         error 
= sooptcopyin(sopt
, &optval
, sizeof(optval
), 
5282                                 so
->so_flags 
|= SOF_UPCALLCLOSEWAIT
; 
5284                                 so
->so_flags 
&= ~SOF_UPCALLCLOSEWAIT
; 
5289                         error 
= sooptcopyin(sopt
, &optval
, sizeof(optval
), 
5295                                 so
->so_flags 
|= SOF_BINDRANDOMPORT
; 
5297                                 so
->so_flags 
&= ~SOF_BINDRANDOMPORT
; 
5301                 case SO_NP_EXTENSIONS
: { 
5302                         struct so_np_extensions sonpx
; 
5304                         error 
= sooptcopyin(sopt
, &sonpx
, sizeof(sonpx
), 
5309                         if (sonpx
.npx_mask 
& ~SONPX_MASK_VALID
) { 
5314                          * Only one bit defined for now 
5316                         if ((sonpx
.npx_mask 
& SONPX_SETOPTSHUT
)) { 
5317                                 if ((sonpx
.npx_flags 
& SONPX_SETOPTSHUT
)) { 
5318                                         so
->so_flags 
|= SOF_NPX_SETOPTSHUT
; 
5320                                         so
->so_flags 
&= ~SOF_NPX_SETOPTSHUT
; 
5326                 case SO_TRAFFIC_CLASS
: { 
5327                         error 
= sooptcopyin(sopt
, &optval
, sizeof(optval
), 
5332                         if (optval 
>= SO_TC_NET_SERVICE_OFFSET
) { 
5333                                 int netsvc 
= optval 
- SO_TC_NET_SERVICE_OFFSET
; 
5334                                 error 
= so_set_net_service_type(so
, netsvc
); 
5337                         error 
= so_set_traffic_class(so
, optval
); 
5341                         so
->so_flags1 
&= ~SOF1_TC_NET_SERV_TYPE
; 
5342                         so
->so_netsvctype 
= _NET_SERVICE_TYPE_UNSPEC
; 
5346                 case SO_RECV_TRAFFIC_CLASS
: { 
5347                         error 
= sooptcopyin(sopt
, &optval
, sizeof(optval
), 
5353                                 so
->so_flags 
&= ~SOF_RECV_TRAFFIC_CLASS
; 
5355                                 so
->so_flags 
|= SOF_RECV_TRAFFIC_CLASS
; 
5360 #if (DEVELOPMENT || DEBUG) 
5361                 case SO_TRAFFIC_CLASS_DBG
: { 
5362                         struct so_tcdbg so_tcdbg
; 
5364                         error 
= sooptcopyin(sopt
, &so_tcdbg
, 
5365                             sizeof(struct so_tcdbg
), sizeof(struct so_tcdbg
)); 
5369                         error 
= so_set_tcdbg(so
, &so_tcdbg
); 
5375 #endif /* (DEVELOPMENT || DEBUG) */ 
5377                 case SO_PRIVILEGED_TRAFFIC_CLASS
: 
5378                         error 
= priv_check_cred(kauth_cred_get(), 
5379                             PRIV_NET_PRIVILEGED_TRAFFIC_CLASS
, 0); 
5383                         error 
= sooptcopyin(sopt
, &optval
, sizeof(optval
), 
5389                                 so
->so_flags 
&= ~SOF_PRIVILEGED_TRAFFIC_CLASS
; 
5391                                 so
->so_flags 
|= SOF_PRIVILEGED_TRAFFIC_CLASS
; 
5395 #if (DEVELOPMENT || DEBUG) 
5397                         error 
= sosetdefunct(current_proc(), so
, 0, FALSE
); 
5399                                 error 
= sodefunct(current_proc(), so
, 0); 
5403 #endif /* (DEVELOPMENT || DEBUG) */ 
5406                         error 
= sooptcopyin(sopt
, &optval
, sizeof(optval
), 
5408                         if (error 
!= 0 || (so
->so_flags 
& SOF_DEFUNCT
)) { 
5415                          * Any process can set SO_DEFUNCTOK (clear 
5416                          * SOF_NODEFUNCT), but only root can clear 
5417                          * SO_DEFUNCTOK (set SOF_NODEFUNCT). 
5420                             kauth_cred_issuser(kauth_cred_get()) == 0) { 
5425                                 so
->so_flags 
&= ~SOF_NODEFUNCT
; 
5427                                 so
->so_flags 
|= SOF_NODEFUNCT
; 
5430                         if (SOCK_DOM(so
) == PF_INET 
|| 
5431                             SOCK_DOM(so
) == PF_INET6
) { 
5432                                 char s
[MAX_IPv6_STR_LEN
]; 
5433                                 char d
[MAX_IPv6_STR_LEN
]; 
5434                                 struct inpcb 
*inp 
= sotoinpcb(so
); 
5436                                 SODEFUNCTLOG("%s[%d, %s]: so 0x%llx " 
5437                                     "[%s %s:%d -> %s:%d] is now marked " 
5438                                     "as %seligible for " 
5439                                     "defunct\n", __func__
, proc_selfpid(), 
5440                                     proc_best_name(current_proc()), 
5441                                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
5442                                     (SOCK_TYPE(so
) == SOCK_STREAM
) ? 
5443                                     "TCP" : "UDP", inet_ntop(SOCK_DOM(so
), 
5444                                     ((SOCK_DOM(so
) == PF_INET
) ? 
5445                                     (void *)&inp
->inp_laddr
.s_addr 
: 
5446                                     (void *)&inp
->in6p_laddr
), s
, sizeof(s
)), 
5447                                     ntohs(inp
->in6p_lport
), 
5448                                     inet_ntop(SOCK_DOM(so
), 
5449                                     (SOCK_DOM(so
) == PF_INET
) ? 
5450                                     (void *)&inp
->inp_faddr
.s_addr 
: 
5451                                     (void *)&inp
->in6p_faddr
, d
, sizeof(d
)), 
5452                                     ntohs(inp
->in6p_fport
), 
5453                                     (so
->so_flags 
& SOF_NODEFUNCT
) ? 
5456                                 SODEFUNCTLOG("%s[%d, %s]: so 0x%llx [%d,%d] " 
5457                                     "is now marked as %seligible for " 
5459                                     __func__
, proc_selfpid(), 
5460                                     proc_best_name(current_proc()), 
5461                                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
5462                                     SOCK_DOM(so
), SOCK_TYPE(so
), 
5463                                     (so
->so_flags 
& SOF_NODEFUNCT
) ? 
5469                         /* This option is not settable */ 
5473                 case SO_OPPORTUNISTIC
: 
5474                         error 
= sooptcopyin(sopt
, &optval
, sizeof(optval
), 
5477                                 error 
= so_set_opportunistic(so
, optval
); 
5482                         /* This option is handled by lower layer(s) */ 
5487                         error 
= sooptcopyin(sopt
, &optval
, sizeof(optval
), 
5490                                 error 
= so_set_recv_anyif(so
, optval
); 
5494                 case SO_TRAFFIC_MGT_BACKGROUND
: { 
5495                         /* This option is handled by lower layer(s) */ 
5501                 case SO_FLOW_DIVERT_TOKEN
: 
5502                         error 
= flow_divert_token_set(so
, sopt
); 
5504 #endif  /* FLOW_DIVERT */ 
5508                         if ((error 
= sooptcopyin(sopt
, &optval
, sizeof(optval
), 
5509                             sizeof(optval
))) != 0) { 
5513                         error 
= so_set_effective_pid(so
, optval
, sopt
->sopt_p
, true); 
5516                 case SO_DELEGATED_UUID
: { 
5519                         if ((error 
= sooptcopyin(sopt
, &euuid
, sizeof(euuid
), 
5520                             sizeof(euuid
))) != 0) { 
5524                         error 
= so_set_effective_uuid(so
, euuid
, sopt
->sopt_p
, true); 
5529                 case SO_NECP_ATTRIBUTES
: 
5530                         error 
= necp_set_socket_attributes(so
, sopt
); 
5533                 case SO_NECP_CLIENTUUID
: { 
5534                         if (SOCK_DOM(so
) == PF_MULTIPATH
) { 
5535                                 /* Handled by MPTCP itself */ 
5539                         if (SOCK_DOM(so
) != PF_INET 
&& SOCK_DOM(so
) != PF_INET6
) { 
5544                         struct inpcb 
*inp 
= sotoinpcb(so
); 
5545                         if (!uuid_is_null(inp
->necp_client_uuid
)) { 
5546                                 // Clear out the old client UUID if present 
5547                                 necp_inpcb_remove_cb(inp
); 
5550                         error 
= sooptcopyin(sopt
, &inp
->necp_client_uuid
, 
5551                             sizeof(uuid_t
), sizeof(uuid_t
)); 
5556                         if (uuid_is_null(inp
->necp_client_uuid
)) { 
5561                         pid_t current_pid 
= proc_pid(current_proc()); 
5562                         error 
= necp_client_register_socket_flow(current_pid
, 
5563                             inp
->necp_client_uuid
, inp
); 
5565                                 uuid_clear(inp
->necp_client_uuid
); 
5569                         if (inp
->inp_lport 
!= 0) { 
5570                                 // There is a bound local port, so this is not 
5571                                 // a fresh socket. Assign to the client. 
5572                                 necp_client_assign_from_socket(current_pid
, inp
->necp_client_uuid
, inp
); 
5577                 case SO_NECP_LISTENUUID
: { 
5578                         if (SOCK_DOM(so
) != PF_INET 
&& SOCK_DOM(so
) != PF_INET6
) { 
5583                         struct inpcb 
*inp 
= sotoinpcb(so
); 
5584                         if (!uuid_is_null(inp
->necp_client_uuid
)) { 
5589                         error 
= sooptcopyin(sopt
, &inp
->necp_client_uuid
, 
5590                             sizeof(uuid_t
), sizeof(uuid_t
)); 
5595                         if (uuid_is_null(inp
->necp_client_uuid
)) { 
5600                         error 
= necp_client_register_socket_listener(proc_pid(current_proc()), 
5601                             inp
->necp_client_uuid
, inp
); 
5603                                 uuid_clear(inp
->necp_client_uuid
); 
5607                         // Mark that the port registration is held by NECP 
5608                         inp
->inp_flags2 
|= INP2_EXTERNAL_PORT
; 
5614                 case SO_EXTENDED_BK_IDLE
: 
5615                         error 
= sooptcopyin(sopt
, &optval
, sizeof(optval
), 
5618                                 error 
= so_set_extended_bk_idle(so
, optval
); 
5622                 case SO_MARK_CELLFALLBACK
: 
5623                         error 
= sooptcopyin(sopt
, &optval
, sizeof(optval
), 
5633                                 so
->so_flags1 
&= ~SOF1_CELLFALLBACK
; 
5635                                 so
->so_flags1 
|= SOF1_CELLFALLBACK
; 
5639                 case SO_STATISTICS_EVENT
: 
5640                         error 
= sooptcopyin(sopt
, &long_optval
, 
5641                             sizeof(long_optval
), sizeof(long_optval
)); 
5645                         u_int64_t nstat_event 
= 0; 
5646                         error 
= so_statistics_event_to_nstat_event( 
5647                                 &long_optval
, &nstat_event
); 
5651                         nstat_pcb_event(sotoinpcb(so
), nstat_event
); 
5654                 case SO_NET_SERVICE_TYPE
: { 
5655                         error 
= sooptcopyin(sopt
, &optval
, sizeof(optval
), 
5660                         error 
= so_set_net_service_type(so
, optval
); 
5664                 case SO_QOSMARKING_POLICY_OVERRIDE
: 
5665                         error 
= priv_check_cred(kauth_cred_get(), 
5666                             PRIV_NET_QOSMARKING_POLICY_OVERRIDE
, 0); 
5670                         error 
= sooptcopyin(sopt
, &optval
, sizeof(optval
), 
5676                                 so
->so_flags1 
&= ~SOF1_QOSMARKING_POLICY_OVERRIDE
; 
5678                                 so
->so_flags1 
|= SOF1_QOSMARKING_POLICY_OVERRIDE
; 
5682                 case SO_MPKL_SEND_INFO
: { 
5683                         struct so_mpkl_send_info so_mpkl_send_info
; 
5685                         error 
= sooptcopyin(sopt
, &so_mpkl_send_info
, 
5686                             sizeof(struct so_mpkl_send_info
), sizeof(struct so_mpkl_send_info
)); 
5690                         uuid_copy(so
->so_mpkl_send_uuid
, so_mpkl_send_info
.mpkl_uuid
); 
5691                         so
->so_mpkl_send_proto 
= so_mpkl_send_info
.mpkl_proto
; 
5693                         if (uuid_is_null(so
->so_mpkl_send_uuid
) && so
->so_mpkl_send_proto 
== 0) { 
5694                                 so
->so_flags1 
&= ~SOF1_MPKL_SEND_INFO
; 
5696                                 so
->so_flags1 
|= SOF1_MPKL_SEND_INFO
; 
5700                 case SO_WANT_KEV_SOCKET_CLOSED
: { 
5701                         error 
= sooptcopyin(sopt
, &optval
, sizeof(optval
), 
5707                                 so
->so_flags1 
&= ~SOF1_WANT_KEV_SOCK_CLOSED
; 
5709                                 so
->so_flags1 
|= SOF1_WANT_KEV_SOCK_CLOSED
; 
5714                         error 
= ENOPROTOOPT
; 
5717                 if (error 
== 0 && so
->so_proto 
!= NULL 
&& 
5718                     so
->so_proto
->pr_ctloutput 
!= NULL
) { 
5719                         (void) so
->so_proto
->pr_ctloutput(so
, sopt
); 
5724                 socket_unlock(so
, 1); 
5729 /* Helper routines for getsockopt */ 
5731 sooptcopyout(struct sockopt 
*sopt
, void *buf
, size_t len
) 
5739          * Documented get behavior is that we always return a value, 
5740          * possibly truncated to fit in the user's buffer. 
5741          * Traditional behavior is that we always tell the user 
5742          * precisely how much we copied, rather than something useful 
5743          * like the total amount we had available for her. 
5744          * Note that this interface is not idempotent; the entire answer must 
5745          * generated ahead of time. 
5747         valsize 
= min(len
, sopt
->sopt_valsize
); 
5748         sopt
->sopt_valsize 
= valsize
; 
5749         if (sopt
->sopt_val 
!= USER_ADDR_NULL
) { 
5750                 if (sopt
->sopt_p 
!= kernproc
) { 
5751                         error 
= copyout(buf
, sopt
->sopt_val
, valsize
); 
5753                         bcopy(buf
, CAST_DOWN(caddr_t
, sopt
->sopt_val
), valsize
); 
5760 sooptcopyout_timeval(struct sockopt 
*sopt
, const struct timeval 
*tv_p
) 
5764         struct user64_timeval   tv64 
= {}; 
5765         struct user32_timeval   tv32 
= {}; 
5770         if (proc_is64bit(sopt
->sopt_p
)) { 
5772                 tv64
.tv_sec 
= tv_p
->tv_sec
; 
5773                 tv64
.tv_usec 
= tv_p
->tv_usec
; 
5777                 tv32
.tv_sec 
= tv_p
->tv_sec
; 
5778                 tv32
.tv_usec 
= tv_p
->tv_usec
; 
5781         valsize 
= min(len
, sopt
->sopt_valsize
); 
5782         sopt
->sopt_valsize 
= valsize
; 
5783         if (sopt
->sopt_val 
!= USER_ADDR_NULL
) { 
5784                 if (sopt
->sopt_p 
!= kernproc
) { 
5785                         error 
= copyout(val
, sopt
->sopt_val
, valsize
); 
5787                         bcopy(val
, CAST_DOWN(caddr_t
, sopt
->sopt_val
), valsize
); 
5796  *      <pr_ctloutput>:EOPNOTSUPP[AF_UNIX] 
5797  *      <pr_ctloutput>:??? 
5798  *      <sf_getoption>:??? 
5801 sogetoptlock(struct socket 
*so
, struct sockopt 
*sopt
, int dolock
) 
5807         if (sopt
->sopt_dir 
!= SOPT_GET
) { 
5808                 sopt
->sopt_dir 
= SOPT_GET
; 
5815         error 
= sflt_getsockopt(so
, sopt
); 
5817                 if (error 
== EJUSTRETURN
) { 
5823         if (sopt
->sopt_level 
!= SOL_SOCKET
) { 
5824                 if (so
->so_proto 
!= NULL 
&& 
5825                     so
->so_proto
->pr_ctloutput 
!= NULL
) { 
5826                         error 
= (*so
->so_proto
->pr_ctloutput
)(so
, sopt
); 
5829                 error 
= ENOPROTOOPT
; 
5832                  * Allow socket-level (SOL_SOCKET) options to be filtered by 
5833                  * the protocol layer, if needed.  A zero value returned from 
5834                  * the handler means use default socket-level processing as 
5835                  * done by the rest of this routine.  Otherwise, any other 
5836                  * return value indicates that the option is unsupported. 
5838                 if (so
->so_proto 
!= NULL 
&& (error 
= so
->so_proto
->pr_usrreqs
-> 
5839                     pru_socheckopt(so
, sopt
)) != 0) { 
5844                 switch (sopt
->sopt_name
) { 
5847                         l
.l_onoff 
= ((so
->so_options 
& SO_LINGER
) ? 1 : 0); 
5848                         l
.l_linger 
= (sopt
->sopt_name 
== SO_LINGER
) ? 
5849                             so
->so_linger 
: so
->so_linger 
/ hz
; 
5850                         error 
= sooptcopyout(sopt
, &l
, sizeof(l
)); 
5853                 case SO_USELOOPBACK
: 
5862                 case SO_TIMESTAMP_MONOTONIC
: 
5863                 case SO_TIMESTAMP_CONTINUOUS
: 
5866                 case SO_WANTOOBFLAG
: 
5867                 case SO_NOWAKEFROMSLEEP
: 
5868                 case SO_NOAPNFALLBK
: 
5869                         optval 
= so
->so_options 
& sopt
->sopt_name
; 
5871                         error 
= sooptcopyout(sopt
, &optval
, sizeof(optval
)); 
5875                         optval 
= so
->so_type
; 
5879                         if (so
->so_proto
->pr_flags 
& PR_ATOMIC
) { 
5884                                 m1 
= so
->so_rcv
.sb_mb
; 
5885                                 while (m1 
!= NULL
) { 
5886                                         if (m1
->m_type 
== MT_DATA 
|| 
5887                                             m1
->m_type 
== MT_HEADER 
|| 
5888                                             m1
->m_type 
== MT_OOBDATA
) { 
5889                                                 pkt_total 
+= m1
->m_len
; 
5895                                 optval 
= so
->so_rcv
.sb_cc 
- so
->so_rcv
.sb_ctl
; 
5900                         if (so
->so_proto
->pr_flags 
& PR_ATOMIC
) { 
5904                                 m1 
= so
->so_rcv
.sb_mb
; 
5905                                 while (m1 
!= NULL
) { 
5912                                 error 
= ENOPROTOOPT
; 
5917                         optval 
= so
->so_snd
.sb_cc
; 
5921                         optval 
= so
->so_error
; 
5926                         u_int32_t hiwat 
= so
->so_snd
.sb_hiwat
; 
5928                         if (so
->so_snd
.sb_flags 
& SB_UNIX
) { 
5930                                     (struct unpcb 
*)(so
->so_pcb
); 
5931                                 if (unp 
!= NULL 
&& unp
->unp_conn 
!= NULL
) { 
5932                                         hiwat 
+= unp
->unp_conn
->unp_cc
; 
5940                         optval 
= so
->so_rcv
.sb_hiwat
; 
5944                         optval 
= so
->so_snd
.sb_lowat
; 
5948                         optval 
= so
->so_rcv
.sb_lowat
; 
5953                         tv 
= (sopt
->sopt_name 
== SO_SNDTIMEO 
? 
5954                             so
->so_snd
.sb_timeo 
: so
->so_rcv
.sb_timeo
); 
5956                         error 
= sooptcopyout_timeval(sopt
, &tv
); 
5960                         optval 
= (so
->so_flags 
& SOF_NOSIGPIPE
); 
5964                         optval 
= (so
->so_flags 
& SOF_NOADDRAVAIL
); 
5967                 case SO_REUSESHAREUID
: 
5968                         optval 
= (so
->so_flags 
& SOF_REUSESHAREUID
); 
5972                 case SO_NOTIFYCONFLICT
: 
5973                         optval 
= (so
->so_flags 
& SOF_NOTIFYCONFLICT
); 
5976                 case SO_RESTRICTIONS
: 
5977                         optval 
= so_get_restrictions(so
); 
5980                 case SO_AWDL_UNRESTRICTED
: 
5981                         if (SOCK_DOM(so
) == PF_INET 
|| 
5982                             SOCK_DOM(so
) == PF_INET6
) { 
5983                                 optval 
= inp_get_awdl_unrestricted( 
5991                 case SO_INTCOPROC_ALLOW
: 
5992                         if (SOCK_DOM(so
) == PF_INET6
) { 
5993                                 optval 
= inp_get_intcoproc_allowed( 
6009 #ifdef __APPLE_API_PRIVATE 
6010                 case SO_UPCALLCLOSEWAIT
: 
6011                         optval 
= (so
->so_flags 
& SOF_UPCALLCLOSEWAIT
); 
6015                         optval 
= (so
->so_flags 
& SOF_BINDRANDOMPORT
); 
6018                 case SO_NP_EXTENSIONS
: { 
6019                         struct so_np_extensions sonpx 
= {}; 
6021                         sonpx
.npx_flags 
= (so
->so_flags 
& SOF_NPX_SETOPTSHUT
) ? 
6022                             SONPX_SETOPTSHUT 
: 0; 
6023                         sonpx
.npx_mask 
= SONPX_MASK_VALID
; 
6025                         error 
= sooptcopyout(sopt
, &sonpx
, 
6026                             sizeof(struct so_np_extensions
)); 
6030                 case SO_TRAFFIC_CLASS
: 
6031                         optval 
= so
->so_traffic_class
; 
6034                 case SO_RECV_TRAFFIC_CLASS
: 
6035                         optval 
= (so
->so_flags 
& SOF_RECV_TRAFFIC_CLASS
); 
6038 #if (DEVELOPMENT || DEBUG) 
6039                 case SO_TRAFFIC_CLASS_DBG
: 
6040                         error 
= sogetopt_tcdbg(so
, sopt
); 
6042 #endif /* (DEVELOPMENT || DEBUG) */ 
6044                 case SO_PRIVILEGED_TRAFFIC_CLASS
: 
6045                         optval 
= (so
->so_flags 
& SOF_PRIVILEGED_TRAFFIC_CLASS
); 
6049                         optval 
= !(so
->so_flags 
& SOF_NODEFUNCT
); 
6053                         optval 
= (so
->so_flags 
& SOF_DEFUNCT
); 
6056                 case SO_OPPORTUNISTIC
: 
6057                         optval 
= so_get_opportunistic(so
); 
6061                         /* This option is not gettable */ 
6066                         optval 
= so_get_recv_anyif(so
); 
6069                 case SO_TRAFFIC_MGT_BACKGROUND
: 
6070                         /* This option is handled by lower layer(s) */ 
6071                         if (so
->so_proto 
!= NULL 
&& 
6072                             so
->so_proto
->pr_ctloutput 
!= NULL
) { 
6073                                 (void) so
->so_proto
->pr_ctloutput(so
, sopt
); 
6078                 case SO_FLOW_DIVERT_TOKEN
: 
6079                         error 
= flow_divert_token_get(so
, sopt
); 
6081 #endif  /* FLOW_DIVERT */ 
6084                 case SO_NECP_ATTRIBUTES
: 
6085                         error 
= necp_get_socket_attributes(so
, sopt
); 
6088                 case SO_NECP_CLIENTUUID
: { 
6091                         if (SOCK_DOM(so
) == PF_MULTIPATH
) { 
6092                                 ncu 
= &mpsotomppcb(so
)->necp_client_uuid
; 
6093                         } else if (SOCK_DOM(so
) == PF_INET 
|| SOCK_DOM(so
) == PF_INET6
) { 
6094                                 ncu 
= &sotoinpcb(so
)->necp_client_uuid
; 
6100                         error 
= sooptcopyout(sopt
, ncu
, sizeof(uuid_t
)); 
6104                 case SO_NECP_LISTENUUID
: { 
6107                         if (SOCK_DOM(so
) == PF_INET 
|| SOCK_DOM(so
) == PF_INET6
) { 
6108                                 if (sotoinpcb(so
)->inp_flags2 
& INP2_EXTERNAL_PORT
) { 
6109                                         nlu 
= &sotoinpcb(so
)->necp_client_uuid
; 
6119                         error 
= sooptcopyout(sopt
, nlu
, sizeof(uuid_t
)); 
6125                 case SO_CFIL_SOCK_ID
: { 
6126                         cfil_sock_id_t sock_id
; 
6128                         sock_id 
= cfil_sock_id_from_socket(so
); 
6130                         error 
= sooptcopyout(sopt
, &sock_id
, 
6131                             sizeof(cfil_sock_id_t
)); 
6134 #endif  /* CONTENT_FILTER */ 
6136                 case SO_EXTENDED_BK_IDLE
: 
6137                         optval 
= (so
->so_flags1 
& SOF1_EXTEND_BK_IDLE_WANTED
); 
6139                 case SO_MARK_CELLFALLBACK
: 
6140                         optval 
= ((so
->so_flags1 
& SOF1_CELLFALLBACK
) > 0) 
6143                 case SO_NET_SERVICE_TYPE
: { 
6144                         if ((so
->so_flags1 
& SOF1_TC_NET_SERV_TYPE
)) { 
6145                                 optval 
= so
->so_netsvctype
; 
6147                                 optval 
= NET_SERVICE_TYPE_BE
; 
6151                 case SO_NETSVC_MARKING_LEVEL
: 
6152                         optval 
= so_get_netsvc_marking_level(so
); 
6155                 case SO_MPKL_SEND_INFO
: { 
6156                         struct so_mpkl_send_info so_mpkl_send_info
; 
6158                         uuid_copy(so_mpkl_send_info
.mpkl_uuid
, so
->so_mpkl_send_uuid
); 
6159                         so_mpkl_send_info
.mpkl_proto 
= so
->so_mpkl_send_proto
; 
6160                         error 
= sooptcopyout(sopt
, &so_mpkl_send_info
, 
6161                             sizeof(struct so_mpkl_send_info
)); 
6165                         error 
= ENOPROTOOPT
; 
6171                 socket_unlock(so
, 1); 
6177  * The size limits on our soopt_getm is different from that on FreeBSD. 
6178  * We limit the size of options to MCLBYTES. This will have to change 
6179  * if we need to define options that need more space than MCLBYTES. 
6182 soopt_getm(struct sockopt 
*sopt
, struct mbuf 
**mp
) 
6184         struct mbuf 
*m
, *m_prev
; 
6185         int sopt_size 
= sopt
->sopt_valsize
; 
6188         if (sopt_size 
<= 0 || sopt_size 
> MCLBYTES
) { 
6192         how 
= sopt
->sopt_p 
!= kernproc 
? M_WAIT 
: M_DONTWAIT
; 
6193         MGET(m
, how
, MT_DATA
); 
6197         if (sopt_size 
> MLEN
) { 
6199                 if ((m
->m_flags 
& M_EXT
) == 0) { 
6203                 m
->m_len 
= min(MCLBYTES
, sopt_size
); 
6205                 m
->m_len 
= min(MLEN
, sopt_size
); 
6207         sopt_size 
-= m
->m_len
; 
6211         while (sopt_size 
> 0) { 
6212                 MGET(m
, how
, MT_DATA
); 
6217                 if (sopt_size 
> MLEN
) { 
6219                         if ((m
->m_flags 
& M_EXT
) == 0) { 
6224                         m
->m_len 
= min(MCLBYTES
, sopt_size
); 
6226                         m
->m_len 
= min(MLEN
, sopt_size
); 
6228                 sopt_size 
-= m
->m_len
; 
6235 /* copyin sopt data into mbuf chain */ 
6237 soopt_mcopyin(struct sockopt 
*sopt
, struct mbuf 
*m
) 
6239         struct mbuf 
*m0 
= m
; 
6241         if (sopt
->sopt_val 
== USER_ADDR_NULL
) { 
6244         while (m 
!= NULL 
&& sopt
->sopt_valsize 
>= m
->m_len
) { 
6245                 if (sopt
->sopt_p 
!= kernproc
) { 
6248                         error 
= copyin(sopt
->sopt_val
, mtod(m
, char *), 
6255                         bcopy(CAST_DOWN(caddr_t
, sopt
->sopt_val
), 
6256                             mtod(m
, char *), m
->m_len
); 
6258                 sopt
->sopt_valsize 
-= m
->m_len
; 
6259                 sopt
->sopt_val 
+= m
->m_len
; 
6262         /* should be allocated enoughly at ip6_sooptmcopyin() */ 
6264                 panic("soopt_mcopyin"); 
6270 /* copyout mbuf chain data into soopt */ 
6272 soopt_mcopyout(struct sockopt 
*sopt
, struct mbuf 
*m
) 
6274         struct mbuf 
*m0 
= m
; 
6277         if (sopt
->sopt_val 
== USER_ADDR_NULL
) { 
6280         while (m 
!= NULL 
&& sopt
->sopt_valsize 
>= m
->m_len
) { 
6281                 if (sopt
->sopt_p 
!= kernproc
) { 
6284                         error 
= copyout(mtod(m
, char *), sopt
->sopt_val
, 
6291                         bcopy(mtod(m
, char *), 
6292                             CAST_DOWN(caddr_t
, sopt
->sopt_val
), m
->m_len
); 
6294                 sopt
->sopt_valsize 
-= m
->m_len
; 
6295                 sopt
->sopt_val 
+= m
->m_len
; 
6296                 valsize 
+= m
->m_len
; 
6300                 /* enough soopt buffer should be given from user-land */ 
6304         sopt
->sopt_valsize 
= valsize
; 
6309 sohasoutofband(struct socket 
*so
) 
6311         if (so
->so_pgid 
< 0) { 
6312                 gsignal(-so
->so_pgid
, SIGURG
); 
6313         } else if (so
->so_pgid 
> 0) { 
6314                 proc_signal(so
->so_pgid
, SIGURG
); 
6316         selwakeup(&so
->so_rcv
.sb_sel
); 
6317         if (so
->so_rcv
.sb_flags 
& SB_KNOTE
) { 
6318                 KNOTE(&so
->so_rcv
.sb_sel
.si_note
, 
6319                     (NOTE_OOB 
| SO_FILT_HINT_LOCKED
)); 
6324 sopoll(struct socket 
*so
, int events
, kauth_cred_t cred
, void * wql
) 
6326 #pragma unused(cred) 
6327         struct proc 
*p 
= current_proc(); 
6331         so_update_last_owner_locked(so
, PROC_NULL
); 
6332         so_update_policy(so
); 
6334         if (events 
& (POLLIN 
| POLLRDNORM
)) { 
6335                 if (soreadable(so
)) { 
6336                         revents 
|= events 
& (POLLIN 
| POLLRDNORM
); 
6340         if (events 
& (POLLOUT 
| POLLWRNORM
)) { 
6341                 if (sowriteable(so
)) { 
6342                         revents 
|= events 
& (POLLOUT 
| POLLWRNORM
); 
6346         if (events 
& (POLLPRI 
| POLLRDBAND
)) { 
6347                 if (so
->so_oobmark 
|| (so
->so_state 
& SS_RCVATMARK
)) { 
6348                         revents 
|= events 
& (POLLPRI 
| POLLRDBAND
); 
6353                 if (events 
& (POLLIN 
| POLLPRI 
| POLLRDNORM 
| POLLRDBAND
)) { 
6355                          * Darwin sets the flag first, 
6356                          * BSD calls selrecord first 
6358                         so
->so_rcv
.sb_flags 
|= SB_SEL
; 
6359                         selrecord(p
, &so
->so_rcv
.sb_sel
, wql
); 
6362                 if (events 
& (POLLOUT 
| POLLWRNORM
)) { 
6364                          * Darwin sets the flag first, 
6365                          * BSD calls selrecord first 
6367                         so
->so_snd
.sb_flags 
|= SB_SEL
; 
6368                         selrecord(p
, &so
->so_snd
.sb_sel
, wql
); 
6372         socket_unlock(so
, 1); 
6377 soo_kqfilter(struct fileproc 
*fp
, struct knote 
*kn
, struct kevent_qos_s 
*kev
) 
6379         struct socket 
*so 
= (struct socket 
*)fp
->fp_glob
->fg_data
; 
6383         so_update_last_owner_locked(so
, PROC_NULL
); 
6384         so_update_policy(so
); 
6386         switch (kn
->kn_filter
) { 
6388                 kn
->kn_filtid 
= EVFILTID_SOREAD
; 
6391                 kn
->kn_filtid 
= EVFILTID_SOWRITE
; 
6394                 kn
->kn_filtid 
= EVFILTID_SCK
; 
6397                 kn
->kn_filtid 
= EVFILTID_SOEXCEPT
; 
6400                 socket_unlock(so
, 1); 
6401                 knote_set_error(kn
, EINVAL
); 
6406          * call the appropriate sub-filter attach 
6407          * with the socket still locked 
6409         result 
= knote_fops(kn
)->f_attach(kn
, kev
); 
6411         socket_unlock(so
, 1); 
6417 filt_soread_common(struct knote 
*kn
, struct kevent_qos_s 
*kev
, struct socket 
*so
) 
6422         if (so
->so_options 
& SO_ACCEPTCONN
) { 
6424                  * Radar 6615193 handle the listen case dynamically 
6425                  * for kqueue read filter. This allows to call listen() 
6426                  * after registering the kqueue EVFILT_READ. 
6429                 retval 
= !TAILQ_EMPTY(&so
->so_comp
); 
6434         /* socket isn't a listener */ 
6436          * NOTE_LOWAT specifies new low water mark in data, i.e. 
6437          * the bytes of protocol data. We therefore exclude any 
6440         data 
= so
->so_rcv
.sb_cc 
- so
->so_rcv
.sb_ctl
; 
6442         if (kn
->kn_sfflags 
& NOTE_OOB
) { 
6443                 if (so
->so_oobmark 
|| (so
->so_state 
& SS_RCVATMARK
)) { 
6444                         kn
->kn_fflags 
|= NOTE_OOB
; 
6445                         data 
-= so
->so_oobmark
; 
6451         if ((so
->so_state 
& SS_CANTRCVMORE
) 
6453             && cfil_sock_data_pending(&so
->so_rcv
) == 0 
6454 #endif /* CONTENT_FILTER */ 
6456                 kn
->kn_flags 
|= EV_EOF
; 
6457                 kn
->kn_fflags 
= so
->so_error
; 
6462         if (so
->so_error
) {     /* temporary udp error */ 
6467         int64_t lowwat 
= so
->so_rcv
.sb_lowat
; 
6469          * Ensure that when NOTE_LOWAT is used, the derived 
6470          * low water mark is bounded by socket's rcv buf's 
6471          * high and low water mark values. 
6473         if (kn
->kn_sfflags 
& NOTE_LOWAT
) { 
6474                 if (kn
->kn_sdata 
> so
->so_rcv
.sb_hiwat
) { 
6475                         lowwat 
= so
->so_rcv
.sb_hiwat
; 
6476                 } else if (kn
->kn_sdata 
> lowwat
) { 
6477                         lowwat 
= kn
->kn_sdata
; 
6482          * While the `data` field is the amount of data to read, 
6483          * 0-sized packets need to wake up the kqueue, see 58140856, 
6484          * so we need to take control bytes into account too. 
6486         retval 
= (so
->so_rcv
.sb_cc 
>= lowwat
); 
6489         if (retval 
&& kev
) { 
6490                 knote_fill_kevent(kn
, kev
, data
); 
6496 filt_sorattach(struct knote 
*kn
, __unused 
struct kevent_qos_s 
*kev
) 
6498         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->fp_glob
->fg_data
; 
6503          * If the caller explicitly asked for OOB results (e.g. poll()) 
6504          * from EVFILT_READ, then save that off in the hookid field 
6505          * and reserve the kn_flags EV_OOBAND bit for output only. 
6507         if (kn
->kn_filter 
== EVFILT_READ 
&& 
6508             kn
->kn_flags 
& EV_OOBAND
) { 
6509                 kn
->kn_flags 
&= ~EV_OOBAND
; 
6510                 kn
->kn_hook32 
= EV_OOBAND
; 
6514         if (KNOTE_ATTACH(&so
->so_rcv
.sb_sel
.si_note
, kn
)) { 
6515                 so
->so_rcv
.sb_flags 
|= SB_KNOTE
; 
6518         /* indicate if event is already fired */ 
6519         return filt_soread_common(kn
, NULL
, so
); 
6523 filt_sordetach(struct knote 
*kn
) 
6525         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->fp_glob
->fg_data
; 
6528         if (so
->so_rcv
.sb_flags 
& SB_KNOTE
) { 
6529                 if (KNOTE_DETACH(&so
->so_rcv
.sb_sel
.si_note
, kn
)) { 
6530                         so
->so_rcv
.sb_flags 
&= ~SB_KNOTE
; 
6533         socket_unlock(so
, 1); 
6538 filt_soread(struct knote 
*kn
, long hint
) 
6540         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->fp_glob
->fg_data
; 
6543         if ((hint 
& SO_FILT_HINT_LOCKED
) == 0) { 
6547         retval 
= filt_soread_common(kn
, NULL
, so
); 
6549         if ((hint 
& SO_FILT_HINT_LOCKED
) == 0) { 
6550                 socket_unlock(so
, 1); 
6557 filt_sortouch(struct knote 
*kn
, struct kevent_qos_s 
*kev
) 
6559         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->fp_glob
->fg_data
; 
6564         /* save off the new input fflags and data */ 
6565         kn
->kn_sfflags 
= kev
->fflags
; 
6566         kn
->kn_sdata 
= kev
->data
; 
6568         /* determine if changes result in fired events */ 
6569         retval 
= filt_soread_common(kn
, NULL
, so
); 
6571         socket_unlock(so
, 1); 
6577 filt_sorprocess(struct knote 
*kn
, struct kevent_qos_s 
*kev
) 
6579         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->fp_glob
->fg_data
; 
6583         retval 
= filt_soread_common(kn
, kev
, so
); 
6584         socket_unlock(so
, 1); 
6590 so_wait_for_if_feedback(struct socket 
*so
) 
6592         if ((SOCK_DOM(so
) == PF_INET 
|| SOCK_DOM(so
) == PF_INET6
) && 
6593             (so
->so_state 
& SS_ISCONNECTED
)) { 
6594                 struct inpcb 
*inp 
= sotoinpcb(so
); 
6595                 if (INP_WAIT_FOR_IF_FEEDBACK(inp
)) { 
6603 filt_sowrite_common(struct knote 
*kn
, struct kevent_qos_s 
*kev
, struct socket 
*so
) 
6606         int64_t data 
= sbspace(&so
->so_snd
); 
6608         if (so
->so_state 
& SS_CANTSENDMORE
) { 
6609                 kn
->kn_flags 
|= EV_EOF
; 
6610                 kn
->kn_fflags 
= so
->so_error
; 
6615         if (so
->so_error
) {     /* temporary udp error */ 
6620         if (!socanwrite(so
)) { 
6625         if (so
->so_flags1 
& SOF1_PRECONNECT_DATA
) { 
6630         int64_t lowwat 
= so
->so_snd
.sb_lowat
; 
6632         if (kn
->kn_sfflags 
& NOTE_LOWAT
) { 
6633                 if (kn
->kn_sdata 
> so
->so_snd
.sb_hiwat
) { 
6634                         lowwat 
= so
->so_snd
.sb_hiwat
; 
6635                 } else if (kn
->kn_sdata 
> lowwat
) { 
6636                         lowwat 
= kn
->kn_sdata
; 
6640         if (data 
>= lowwat
) { 
6641                 if ((so
->so_flags 
& SOF_NOTSENT_LOWAT
) 
6642 #if (DEBUG || DEVELOPMENT) 
6643                     && so_notsent_lowat_check 
== 1 
6644 #endif /* DEBUG || DEVELOPMENT */ 
6646                         if ((SOCK_DOM(so
) == PF_INET 
|| 
6647                             SOCK_DOM(so
) == PF_INET6
) && 
6648                             so
->so_type 
== SOCK_STREAM
) { 
6649                                 ret 
= tcp_notsent_lowat_check(so
); 
6652                         else if ((SOCK_DOM(so
) == PF_MULTIPATH
) && 
6653                             (SOCK_PROTO(so
) == IPPROTO_TCP
)) { 
6654                                 ret 
= mptcp_notsent_lowat_check(so
); 
6665         if (so_wait_for_if_feedback(so
)) { 
6671                 knote_fill_kevent(kn
, kev
, data
); 
6677 filt_sowattach(struct knote 
*kn
, __unused 
struct kevent_qos_s 
*kev
) 
6679         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->fp_glob
->fg_data
; 
6682         if (KNOTE_ATTACH(&so
->so_snd
.sb_sel
.si_note
, kn
)) { 
6683                 so
->so_snd
.sb_flags 
|= SB_KNOTE
; 
6686         /* determine if its already fired */ 
6687         return filt_sowrite_common(kn
, NULL
, so
); 
6691 filt_sowdetach(struct knote 
*kn
) 
6693         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->fp_glob
->fg_data
; 
6696         if (so
->so_snd
.sb_flags 
& SB_KNOTE
) { 
6697                 if (KNOTE_DETACH(&so
->so_snd
.sb_sel
.si_note
, kn
)) { 
6698                         so
->so_snd
.sb_flags 
&= ~SB_KNOTE
; 
6701         socket_unlock(so
, 1); 
6706 filt_sowrite(struct knote 
*kn
, long hint
) 
6708         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->fp_glob
->fg_data
; 
6711         if ((hint 
& SO_FILT_HINT_LOCKED
) == 0) { 
6715         ret 
= filt_sowrite_common(kn
, NULL
, so
); 
6717         if ((hint 
& SO_FILT_HINT_LOCKED
) == 0) { 
6718                 socket_unlock(so
, 1); 
6725 filt_sowtouch(struct knote 
*kn
, struct kevent_qos_s 
*kev
) 
6727         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->fp_glob
->fg_data
; 
6732         /*save off the new input fflags and data */ 
6733         kn
->kn_sfflags 
= kev
->fflags
; 
6734         kn
->kn_sdata 
= kev
->data
; 
6736         /* determine if these changes result in a triggered event */ 
6737         ret 
= filt_sowrite_common(kn
, NULL
, so
); 
6739         socket_unlock(so
, 1); 
6745 filt_sowprocess(struct knote 
*kn
, struct kevent_qos_s 
*kev
) 
6747         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->fp_glob
->fg_data
; 
6751         ret 
= filt_sowrite_common(kn
, kev
, so
); 
6752         socket_unlock(so
, 1); 
6758 filt_sockev_common(struct knote 
*kn
, struct kevent_qos_s 
*kev
, 
6759     struct socket 
*so
, long ev_hint
) 
6763         uint32_t level_trigger 
= 0; 
6765         if (ev_hint 
& SO_FILT_HINT_CONNRESET
) { 
6766                 kn
->kn_fflags 
|= NOTE_CONNRESET
; 
6768         if (ev_hint 
& SO_FILT_HINT_TIMEOUT
) { 
6769                 kn
->kn_fflags 
|= NOTE_TIMEOUT
; 
6771         if (ev_hint 
& SO_FILT_HINT_NOSRCADDR
) { 
6772                 kn
->kn_fflags 
|= NOTE_NOSRCADDR
; 
6774         if (ev_hint 
& SO_FILT_HINT_IFDENIED
) { 
6775                 kn
->kn_fflags 
|= NOTE_IFDENIED
; 
6777         if (ev_hint 
& SO_FILT_HINT_KEEPALIVE
) { 
6778                 kn
->kn_fflags 
|= NOTE_KEEPALIVE
; 
6780         if (ev_hint 
& SO_FILT_HINT_ADAPTIVE_WTIMO
) { 
6781                 kn
->kn_fflags 
|= NOTE_ADAPTIVE_WTIMO
; 
6783         if (ev_hint 
& SO_FILT_HINT_ADAPTIVE_RTIMO
) { 
6784                 kn
->kn_fflags 
|= NOTE_ADAPTIVE_RTIMO
; 
6786         if ((ev_hint 
& SO_FILT_HINT_CONNECTED
) || 
6787             (so
->so_state 
& SS_ISCONNECTED
)) { 
6788                 kn
->kn_fflags 
|= NOTE_CONNECTED
; 
6789                 level_trigger 
|= NOTE_CONNECTED
; 
6791         if ((ev_hint 
& SO_FILT_HINT_DISCONNECTED
) || 
6792             (so
->so_state 
& SS_ISDISCONNECTED
)) { 
6793                 kn
->kn_fflags 
|= NOTE_DISCONNECTED
; 
6794                 level_trigger 
|= NOTE_DISCONNECTED
; 
6796         if (ev_hint 
& SO_FILT_HINT_CONNINFO_UPDATED
) { 
6797                 if (so
->so_proto 
!= NULL 
&& 
6798                     (so
->so_proto
->pr_flags 
& PR_EVCONNINFO
)) { 
6799                         kn
->kn_fflags 
|= NOTE_CONNINFO_UPDATED
; 
6803         if ((ev_hint 
& SO_FILT_HINT_NOTIFY_ACK
) || 
6804             tcp_notify_ack_active(so
)) { 
6805                 kn
->kn_fflags 
|= NOTE_NOTIFY_ACK
; 
6808         if ((so
->so_state 
& SS_CANTRCVMORE
) 
6810             && cfil_sock_data_pending(&so
->so_rcv
) == 0 
6811 #endif /* CONTENT_FILTER */ 
6813                 kn
->kn_fflags 
|= NOTE_READCLOSED
; 
6814                 level_trigger 
|= NOTE_READCLOSED
; 
6817         if (so
->so_state 
& SS_CANTSENDMORE
) { 
6818                 kn
->kn_fflags 
|= NOTE_WRITECLOSED
; 
6819                 level_trigger 
|= NOTE_WRITECLOSED
; 
6822         if ((ev_hint 
& SO_FILT_HINT_SUSPEND
) || 
6823             (so
->so_flags 
& SOF_SUSPENDED
)) { 
6824                 kn
->kn_fflags 
&= ~(NOTE_SUSPEND 
| NOTE_RESUME
); 
6826                 /* If resume event was delivered before, reset it */ 
6827                 kn
->kn_hook32 
&= ~NOTE_RESUME
; 
6829                 kn
->kn_fflags 
|= NOTE_SUSPEND
; 
6830                 level_trigger 
|= NOTE_SUSPEND
; 
6833         if ((ev_hint 
& SO_FILT_HINT_RESUME
) || 
6834             (so
->so_flags 
& SOF_SUSPENDED
) == 0) { 
6835                 kn
->kn_fflags 
&= ~(NOTE_SUSPEND 
| NOTE_RESUME
); 
6837                 /* If suspend event was delivered before, reset it */ 
6838                 kn
->kn_hook32 
&= ~NOTE_SUSPEND
; 
6840                 kn
->kn_fflags 
|= NOTE_RESUME
; 
6841                 level_trigger 
|= NOTE_RESUME
; 
6844         if (so
->so_error 
!= 0) { 
6846                 data 
= so
->so_error
; 
6847                 kn
->kn_flags 
|= EV_EOF
; 
6849                 u_int32_t data32 
= 0; 
6850                 get_sockev_state(so
, &data32
); 
6854         /* Reset any events that are not requested on this knote */ 
6855         kn
->kn_fflags 
&= (kn
->kn_sfflags 
& EVFILT_SOCK_ALL_MASK
); 
6856         level_trigger 
&= (kn
->kn_sfflags 
& EVFILT_SOCK_ALL_MASK
); 
6858         /* Find the level triggerred events that are already delivered */ 
6859         level_trigger 
&= kn
->kn_hook32
; 
6860         level_trigger 
&= EVFILT_SOCK_LEVEL_TRIGGER_MASK
; 
6862         /* Do not deliver level triggerred events more than once */ 
6863         if ((kn
->kn_fflags 
& ~level_trigger
) != 0) { 
6869                  * Store the state of the events being delivered. This 
6870                  * state can be used to deliver level triggered events 
6871                  * ateast once and still avoid waking up the application 
6872                  * multiple times as long as the event is active. 
6874                 if (kn
->kn_fflags 
!= 0) { 
6875                         kn
->kn_hook32 
|= (kn
->kn_fflags 
& 
6876                             EVFILT_SOCK_LEVEL_TRIGGER_MASK
); 
6880                  * NOTE_RESUME and NOTE_SUSPEND are an exception, deliver 
6881                  * only one of them and remember the last one that was 
6884                 if (kn
->kn_fflags 
& NOTE_SUSPEND
) { 
6885                         kn
->kn_hook32 
&= ~NOTE_RESUME
; 
6887                 if (kn
->kn_fflags 
& NOTE_RESUME
) { 
6888                         kn
->kn_hook32 
&= ~NOTE_SUSPEND
; 
6891                 knote_fill_kevent(kn
, kev
, data
); 
6897 filt_sockattach(struct knote 
*kn
, __unused 
struct kevent_qos_s 
*kev
) 
6899         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->fp_glob
->fg_data
; 
6903         if (KNOTE_ATTACH(&so
->so_klist
, kn
)) { 
6904                 so
->so_flags 
|= SOF_KNOTE
; 
6907         /* determine if event already fired */ 
6908         return filt_sockev_common(kn
, NULL
, so
, 0); 
6912 filt_sockdetach(struct knote 
*kn
) 
6914         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->fp_glob
->fg_data
; 
6917         if ((so
->so_flags 
& SOF_KNOTE
) != 0) { 
6918                 if (KNOTE_DETACH(&so
->so_klist
, kn
)) { 
6919                         so
->so_flags 
&= ~SOF_KNOTE
; 
6922         socket_unlock(so
, 1); 
6926 filt_sockev(struct knote 
*kn
, long hint
) 
6928         int ret 
= 0, locked 
= 0; 
6929         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->fp_glob
->fg_data
; 
6930         long ev_hint 
= (hint 
& SO_FILT_HINT_EV
); 
6932         if ((hint 
& SO_FILT_HINT_LOCKED
) == 0) { 
6937         ret 
= filt_sockev_common(kn
, NULL
, so
, ev_hint
); 
6940                 socket_unlock(so
, 1); 
6949  *      filt_socktouch - update event state 
6954         struct kevent_qos_s 
*kev
) 
6956         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->fp_glob
->fg_data
; 
6957         uint32_t changed_flags
; 
6962         /* save off the [result] data and fflags */ 
6963         changed_flags 
= (kn
->kn_sfflags 
^ kn
->kn_hook32
); 
6965         /* save off the new input fflags and data */ 
6966         kn
->kn_sfflags 
= kev
->fflags
; 
6967         kn
->kn_sdata 
= kev
->data
; 
6969         /* restrict the current results to the (smaller?) set of new interest */ 
6971          * For compatibility with previous implementations, we leave kn_fflags 
6972          * as they were before. 
6974         //kn->kn_fflags &= kev->fflags; 
6977          * Since we keep track of events that are already 
6978          * delivered, if any of those events are not requested 
6979          * anymore the state related to them can be reset 
6981         kn
->kn_hook32 
&= ~(changed_flags 
& EVFILT_SOCK_LEVEL_TRIGGER_MASK
); 
6983         /* determine if we have events to deliver */ 
6984         ret 
= filt_sockev_common(kn
, NULL
, so
, 0); 
6986         socket_unlock(so
, 1); 
6992  *      filt_sockprocess - query event fired state and return data 
6995 filt_sockprocess(struct knote 
*kn
, struct kevent_qos_s 
*kev
) 
6997         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->fp_glob
->fg_data
; 
7002         ret 
= filt_sockev_common(kn
, kev
, so
, 0); 
7004         socket_unlock(so
, 1); 
7010 get_sockev_state(struct socket 
*so
, u_int32_t 
*statep
) 
7012         u_int32_t state 
= *(statep
); 
7015          * If the state variable is already used by a previous event, 
7022         if (so
->so_state 
& SS_ISCONNECTED
) { 
7023                 state 
|= SOCKEV_CONNECTED
; 
7025                 state 
&= ~(SOCKEV_CONNECTED
); 
7027         state 
|= ((so
->so_state 
& SS_ISDISCONNECTED
) ? SOCKEV_DISCONNECTED 
: 0); 
7031 #define SO_LOCK_HISTORY_STR_LEN \ 
7032         (2 * SO_LCKDBG_MAX * (2 + (2 * sizeof (void *)) + 1) + 1) 
7034 __private_extern__ 
const char * 
7035 solockhistory_nr(struct socket 
*so
) 
7039         static char lock_history_str
[SO_LOCK_HISTORY_STR_LEN
]; 
7041         bzero(lock_history_str
, sizeof(lock_history_str
)); 
7042         for (i 
= SO_LCKDBG_MAX 
- 1; i 
>= 0; i
--) { 
7043                 n 
+= scnprintf(lock_history_str 
+ n
, 
7044                     SO_LOCK_HISTORY_STR_LEN 
- n
, "%p:%p ", 
7045                     so
->lock_lr
[(so
->next_lock_lr 
+ i
) % SO_LCKDBG_MAX
], 
7046                     so
->unlock_lr
[(so
->next_unlock_lr 
+ i
) % SO_LCKDBG_MAX
]); 
7048         return lock_history_str
; 
7052 socket_getlock(struct socket 
*so
, int flags
) 
7054         if (so
->so_proto
->pr_getlock 
!= NULL
) { 
7055                 return (*so
->so_proto
->pr_getlock
)(so
, flags
); 
7057                 return so
->so_proto
->pr_domain
->dom_mtx
; 
7062 socket_lock(struct socket 
*so
, int refcount
) 
7066         lr_saved 
= __builtin_return_address(0); 
7068         if (so
->so_proto
->pr_lock
) { 
7069                 (*so
->so_proto
->pr_lock
)(so
, refcount
, lr_saved
); 
7071 #ifdef MORE_LOCKING_DEBUG 
7072                 LCK_MTX_ASSERT(so
->so_proto
->pr_domain
->dom_mtx
, 
7073                     LCK_MTX_ASSERT_NOTOWNED
); 
7075                 lck_mtx_lock(so
->so_proto
->pr_domain
->dom_mtx
); 
7079                 so
->lock_lr
[so
->next_lock_lr
] = lr_saved
; 
7080                 so
->next_lock_lr 
= (so
->next_lock_lr 
+ 1) % SO_LCKDBG_MAX
; 
7085 socket_lock_assert_owned(struct socket 
*so
) 
7087         lck_mtx_t 
*mutex_held
; 
7089         if (so
->so_proto
->pr_getlock 
!= NULL
) { 
7090                 mutex_held 
= (*so
->so_proto
->pr_getlock
)(so
, 0); 
7092                 mutex_held 
= so
->so_proto
->pr_domain
->dom_mtx
; 
7095         LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
); 
7099 socket_try_lock(struct socket 
*so
) 
7103         if (so
->so_proto
->pr_getlock 
!= NULL
) { 
7104                 mtx 
= (*so
->so_proto
->pr_getlock
)(so
, 0); 
7106                 mtx 
= so
->so_proto
->pr_domain
->dom_mtx
; 
7109         return lck_mtx_try_lock(mtx
); 
7113 socket_unlock(struct socket 
*so
, int refcount
) 
7116         lck_mtx_t 
*mutex_held
; 
7118         lr_saved 
= __builtin_return_address(0); 
7120         if (so 
== NULL 
|| so
->so_proto 
== NULL
) { 
7121                 panic("%s: null so_proto so=%p\n", __func__
, so
); 
7125         if (so
->so_proto
->pr_unlock
) { 
7126                 (*so
->so_proto
->pr_unlock
)(so
, refcount
, lr_saved
); 
7128                 mutex_held 
= so
->so_proto
->pr_domain
->dom_mtx
; 
7129 #ifdef MORE_LOCKING_DEBUG 
7130                 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
); 
7132                 so
->unlock_lr
[so
->next_unlock_lr
] = lr_saved
; 
7133                 so
->next_unlock_lr 
= (so
->next_unlock_lr 
+ 1) % SO_LCKDBG_MAX
; 
7136                         if (so
->so_usecount 
<= 0) { 
7137                                 panic("%s: bad refcount=%d so=%p (%d, %d, %d) " 
7138                                     "lrh=%s", __func__
, so
->so_usecount
, so
, 
7139                                     SOCK_DOM(so
), so
->so_type
, 
7140                                     SOCK_PROTO(so
), solockhistory_nr(so
)); 
7145                         if (so
->so_usecount 
== 0) { 
7146                                 sofreelastref(so
, 1); 
7149                 lck_mtx_unlock(mutex_held
); 
7153 /* Called with socket locked, will unlock socket */ 
7155 sofree(struct socket 
*so
) 
7157         lck_mtx_t 
*mutex_held
; 
7159         if (so
->so_proto
->pr_getlock 
!= NULL
) { 
7160                 mutex_held 
= (*so
->so_proto
->pr_getlock
)(so
, 0); 
7162                 mutex_held 
= so
->so_proto
->pr_domain
->dom_mtx
; 
7164         LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
); 
7166         sofreelastref(so
, 0); 
7170 soreference(struct socket 
*so
) 
7172         socket_lock(so
, 1);     /* locks & take one reference on socket */ 
7173         socket_unlock(so
, 0);   /* unlock only */ 
7177 sodereference(struct socket 
*so
) 
7180         socket_unlock(so
, 1); 
7184  * Set or clear SOF_MULTIPAGES on the socket to enable or disable the 
7185  * possibility of using jumbo clusters.  Caller must ensure to hold 
7189 somultipages(struct socket 
*so
, boolean_t set
) 
7192                 so
->so_flags 
|= SOF_MULTIPAGES
; 
7194                 so
->so_flags 
&= ~SOF_MULTIPAGES
; 
7199 soif2kcl(struct socket 
*so
, boolean_t set
) 
7202                 so
->so_flags1 
|= SOF1_IF_2KCL
; 
7204                 so
->so_flags1 
&= ~SOF1_IF_2KCL
; 
7209 so_isdstlocal(struct socket 
*so
) 
7211         struct inpcb 
*inp 
= (struct inpcb 
*)so
->so_pcb
; 
7213         if (SOCK_DOM(so
) == PF_INET
) { 
7214                 return inaddr_local(inp
->inp_faddr
); 
7215         } else if (SOCK_DOM(so
) == PF_INET6
) { 
7216                 return in6addr_local(&inp
->in6p_faddr
); 
7223 sosetdefunct(struct proc 
*p
, struct socket 
*so
, int level
, boolean_t noforce
) 
7225         struct sockbuf 
*rcv
, *snd
; 
7226         int err 
= 0, defunct
; 
7231         defunct 
= (so
->so_flags 
& SOF_DEFUNCT
); 
7233                 if (!(snd
->sb_flags 
& rcv
->sb_flags 
& SB_DROP
)) { 
7234                         panic("%s: SB_DROP not set", __func__
); 
7240         if (so
->so_flags 
& SOF_NODEFUNCT
) { 
7243                         if (p 
!= PROC_NULL
) { 
7244                                 SODEFUNCTLOG("%s[%d, %s]: (target pid %d " 
7245                                     "name %s level %d) so 0x%llx [%d,%d] " 
7246                                     "is not eligible for defunct " 
7247                                     "(%d)\n", __func__
, proc_selfpid(), 
7248                                     proc_best_name(current_proc()), proc_pid(p
), 
7249                                     proc_best_name(p
), level
, 
7250                                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
7251                                     SOCK_DOM(so
), SOCK_TYPE(so
), err
); 
7255                 so
->so_flags 
&= ~SOF_NODEFUNCT
; 
7256                 if (p 
!= PROC_NULL
) { 
7257                         SODEFUNCTLOG("%s[%d, %s]: (target pid %d " 
7258                             "name %s level %d) so 0x%llx [%d,%d] " 
7260                             "(%d)\n", __func__
, proc_selfpid(), 
7261                             proc_best_name(current_proc()), proc_pid(p
), 
7262                             proc_best_name(p
), level
, 
7263                             (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
7264                             SOCK_DOM(so
), SOCK_TYPE(so
), err
); 
7266         } else if (so
->so_flags1 
& SOF1_EXTEND_BK_IDLE_WANTED
) { 
7267                 struct inpcb 
*inp 
= (struct inpcb 
*)so
->so_pcb
; 
7268                 struct ifnet 
*ifp 
= inp
->inp_last_outifp
; 
7270                 if (ifp 
&& IFNET_IS_CELLULAR(ifp
)) { 
7271                         OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_nocell
); 
7272                 } else if (so
->so_flags 
& SOF_DELEGATED
) { 
7273                         OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_nodlgtd
); 
7274                 } else if (soextbkidlestat
.so_xbkidle_time 
== 0) { 
7275                         OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_notime
); 
7276                 } else if (noforce 
&& p 
!= PROC_NULL
) { 
7277                         OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_active
); 
7279                         so
->so_flags1 
|= SOF1_EXTEND_BK_IDLE_INPROG
; 
7280                         so
->so_extended_bk_start 
= net_uptime(); 
7281                         OSBitOrAtomic(P_LXBKIDLEINPROG
, &p
->p_ladvflag
); 
7283                         inpcb_timer_sched(inp
->inp_pcbinfo
, INPCB_TIMER_LAZY
); 
7286                         SODEFUNCTLOG("%s[%d, %s]: (target pid %d " 
7287                             "name %s level %d) so 0x%llx [%d,%d] " 
7289                             "(%d)\n", __func__
, proc_selfpid(), 
7290                             proc_best_name(current_proc()), proc_pid(p
), 
7291                             proc_best_name(p
), level
, 
7292                             (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
7293                             SOCK_DOM(so
), SOCK_TYPE(so
), err
); 
7296                         OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_forced
); 
7300         so
->so_flags 
|= SOF_DEFUNCT
; 
7302         /* Prevent further data from being appended to the socket buffers */ 
7303         snd
->sb_flags 
|= SB_DROP
; 
7304         rcv
->sb_flags 
|= SB_DROP
; 
7306         /* Flush any existing data in the socket buffers */ 
7307         if (rcv
->sb_cc 
!= 0) { 
7308                 rcv
->sb_flags 
&= ~SB_SEL
; 
7309                 selthreadclear(&rcv
->sb_sel
); 
7312         if (snd
->sb_cc 
!= 0) { 
7313                 snd
->sb_flags 
&= ~SB_SEL
; 
7314                 selthreadclear(&snd
->sb_sel
); 
7319         if (p 
!= PROC_NULL
) { 
7320                 SODEFUNCTLOG("%s[%d, %s]: (target pid %d name %s level %d) " 
7321                     "so 0x%llx [%d,%d] %s defunct%s\n", __func__
, 
7322                     proc_selfpid(), proc_best_name(current_proc()), 
7323                     proc_pid(p
), proc_best_name(p
), level
, 
7324                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), SOCK_DOM(so
), 
7325                     SOCK_TYPE(so
), defunct 
? "is already" : "marked as", 
7326                     (so
->so_flags1 
& SOF1_EXTEND_BK_IDLE_WANTED
) ? 
7333 sodefunct(struct proc 
*p
, struct socket 
*so
, int level
) 
7335         struct sockbuf 
*rcv
, *snd
; 
7337         if (!(so
->so_flags 
& SOF_DEFUNCT
)) { 
7338                 panic("%s improperly called", __func__
); 
7341         if (so
->so_state 
& SS_DEFUNCT
) { 
7348         if (SOCK_DOM(so
) == PF_INET 
|| SOCK_DOM(so
) == PF_INET6
) { 
7349                 char s
[MAX_IPv6_STR_LEN
]; 
7350                 char d
[MAX_IPv6_STR_LEN
]; 
7351                 struct inpcb 
*inp 
= sotoinpcb(so
); 
7353                 if (p 
!= PROC_NULL
) { 
7355                                 "%s[%d, %s]: (target pid %d name %s level %d) " 
7356                                 "so 0x%llx [%s %s:%d -> %s:%d] is now defunct " 
7357                                 "[rcv_si 0x%x, snd_si 0x%x, rcv_fl 0x%x, " 
7358                                 " snd_fl 0x%x]\n", __func__
, 
7359                                 proc_selfpid(), proc_best_name(current_proc()), 
7360                                 proc_pid(p
), proc_best_name(p
), level
, 
7361                                 (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
7362                                 (SOCK_TYPE(so
) == SOCK_STREAM
) ? "TCP" : "UDP", 
7363                                 inet_ntop(SOCK_DOM(so
), ((SOCK_DOM(so
) == PF_INET
) ? 
7364                                 (void *)&inp
->inp_laddr
.s_addr 
: 
7365                                 (void *)&inp
->in6p_laddr
), 
7366                                 s
, sizeof(s
)), ntohs(inp
->in6p_lport
), 
7367                                 inet_ntop(SOCK_DOM(so
), (SOCK_DOM(so
) == PF_INET
) ? 
7368                                 (void *)&inp
->inp_faddr
.s_addr 
: 
7369                                 (void *)&inp
->in6p_faddr
, 
7370                                 d
, sizeof(d
)), ntohs(inp
->in6p_fport
), 
7371                                 (uint32_t)rcv
->sb_sel
.si_flags
, 
7372                                 (uint32_t)snd
->sb_sel
.si_flags
, 
7373                                 rcv
->sb_flags
, snd
->sb_flags
); 
7375         } else if (p 
!= PROC_NULL
) { 
7376                 SODEFUNCTLOG("%s[%d, %s]: (target pid %d name %s level %d) " 
7377                     "so 0x%llx [%d,%d] is now defunct [rcv_si 0x%x, " 
7378                     "snd_si 0x%x, rcv_fl 0x%x, snd_fl 0x%x]\n", __func__
, 
7379                     proc_selfpid(), proc_best_name(current_proc()), 
7380                     proc_pid(p
), proc_best_name(p
), level
, 
7381                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
7382                     SOCK_DOM(so
), SOCK_TYPE(so
), 
7383                     (uint32_t)rcv
->sb_sel
.si_flags
, 
7384                     (uint32_t)snd
->sb_sel
.si_flags
, rcv
->sb_flags
, 
7389          * Unwedge threads blocked on sbwait() and sb_lock(). 
7394         so
->so_flags1 
|= SOF1_DEFUNCTINPROG
; 
7395         if (rcv
->sb_flags 
& SB_LOCK
) { 
7396                 sbunlock(rcv
, TRUE
);    /* keep socket locked */ 
7398         if (snd
->sb_flags 
& SB_LOCK
) { 
7399                 sbunlock(snd
, TRUE
);    /* keep socket locked */ 
7402          * Flush the buffers and disconnect.  We explicitly call shutdown 
7403          * on both data directions to ensure that SS_CANT{RCV,SEND}MORE 
7404          * states are set for the socket.  This would also flush out data 
7405          * hanging off the receive list of this socket. 
7407         (void) soshutdownlock_final(so
, SHUT_RD
); 
7408         (void) soshutdownlock_final(so
, SHUT_WR
); 
7409         (void) sodisconnectlocked(so
); 
7412          * Explicitly handle connectionless-protocol disconnection 
7413          * and release any remaining data in the socket buffers. 
7415         if (!(so
->so_state 
& SS_ISDISCONNECTED
)) { 
7416                 (void) soisdisconnected(so
); 
7419         if (so
->so_error 
== 0) { 
7420                 so
->so_error 
= EBADF
; 
7423         if (rcv
->sb_cc 
!= 0) { 
7424                 rcv
->sb_flags 
&= ~SB_SEL
; 
7425                 selthreadclear(&rcv
->sb_sel
); 
7428         if (snd
->sb_cc 
!= 0) { 
7429                 snd
->sb_flags 
&= ~SB_SEL
; 
7430                 selthreadclear(&snd
->sb_sel
); 
7433         so
->so_state 
|= SS_DEFUNCT
; 
7434         OSIncrementAtomicLong((volatile long *)&sodefunct_calls
); 
7441 soresume(struct proc 
*p
, struct socket 
*so
, int locked
) 
7447         if (so
->so_flags1 
& SOF1_EXTEND_BK_IDLE_INPROG
) { 
7448                 SODEFUNCTLOG("%s[%d, %s]: (target pid %d name %s) so 0x%llx " 
7449                     "[%d,%d] resumed from bk idle\n", 
7450                     __func__
, proc_selfpid(), proc_best_name(current_proc()), 
7451                     proc_pid(p
), proc_best_name(p
), 
7452                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
7453                     SOCK_DOM(so
), SOCK_TYPE(so
)); 
7455                 so
->so_flags1 
&= ~SOF1_EXTEND_BK_IDLE_INPROG
; 
7456                 so
->so_extended_bk_start 
= 0; 
7457                 OSBitAndAtomic(~P_LXBKIDLEINPROG
, &p
->p_ladvflag
); 
7459                 OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_resumed
); 
7460                 OSDecrementAtomic(&soextbkidlestat
.so_xbkidle_active
); 
7461                 VERIFY(soextbkidlestat
.so_xbkidle_active 
>= 0); 
7464                 socket_unlock(so
, 1); 
7471  * Does not attempt to account for sockets that are delegated from 
7472  * the current process 
7475 so_set_extended_bk_idle(struct socket 
*so
, int optval
) 
7479         if ((SOCK_DOM(so
) != PF_INET 
&& SOCK_DOM(so
) != PF_INET6
) || 
7480             SOCK_PROTO(so
) != IPPROTO_TCP
) { 
7481                 OSDecrementAtomic(&soextbkidlestat
.so_xbkidle_notsupp
); 
7483         } else if (optval 
== 0) { 
7484                 so
->so_flags1 
&= ~SOF1_EXTEND_BK_IDLE_WANTED
; 
7486                 soresume(current_proc(), so
, 1); 
7488                 struct proc 
*p 
= current_proc(); 
7489                 struct fileproc 
*fp
; 
7493                  * Unlock socket to avoid lock ordering issue with 
7494                  * the proc fd table lock 
7496                 socket_unlock(so
, 0); 
7499                 fdt_foreach(fp
, p
) { 
7502                         if (FILEGLOB_DTYPE(fp
->fp_glob
) != DTYPE_SOCKET
) { 
7506                         so2 
= (struct socket 
*)fp
->fp_glob
->fg_data
; 
7508                             so2
->so_flags1 
& SOF1_EXTEND_BK_IDLE_WANTED
) { 
7511                         if (count 
>= soextbkidlestat
.so_xbkidle_maxperproc
) { 
7519                 if (count 
>= soextbkidlestat
.so_xbkidle_maxperproc
) { 
7520                         OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_toomany
); 
7522                 } else if (so
->so_flags 
& SOF_DELEGATED
) { 
7523                         OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_nodlgtd
); 
7526                         so
->so_flags1 
|= SOF1_EXTEND_BK_IDLE_WANTED
; 
7527                         OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_wantok
); 
7529                 SODEFUNCTLOG("%s[%d, %s]: so 0x%llx [%d,%d] " 
7530                     "%s marked for extended bk idle\n", 
7531                     __func__
, proc_selfpid(), proc_best_name(current_proc()), 
7532                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
7533                     SOCK_DOM(so
), SOCK_TYPE(so
), 
7534                     (so
->so_flags1 
& SOF1_EXTEND_BK_IDLE_WANTED
) ? 
7542 so_stop_extended_bk_idle(struct socket 
*so
) 
7544         so
->so_flags1 
&= ~SOF1_EXTEND_BK_IDLE_INPROG
; 
7545         so
->so_extended_bk_start 
= 0; 
7547         OSDecrementAtomic(&soextbkidlestat
.so_xbkidle_active
); 
7548         VERIFY(soextbkidlestat
.so_xbkidle_active 
>= 0); 
7552         sosetdefunct(current_proc(), so
, 
7553             SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL
, FALSE
); 
7554         if (so
->so_flags 
& SOF_DEFUNCT
) { 
7555                 sodefunct(current_proc(), so
, 
7556                     SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL
); 
7561 so_drain_extended_bk_idle(struct socket 
*so
) 
7563         if (so 
&& (so
->so_flags1 
& SOF1_EXTEND_BK_IDLE_INPROG
)) { 
7565                  * Only penalize sockets that have outstanding data 
7567                 if (so
->so_rcv
.sb_cc 
|| so
->so_snd
.sb_cc
) { 
7568                         so_stop_extended_bk_idle(so
); 
7570                         OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_drained
); 
7576  * Return values tells if socket is still in extended background idle 
7579 so_check_extended_bk_idle_time(struct socket 
*so
) 
7583         if ((so
->so_flags1 
& SOF1_EXTEND_BK_IDLE_INPROG
)) { 
7584                 SODEFUNCTLOG("%s[%d, %s]: so 0x%llx [%d,%d]\n", 
7585                     __func__
, proc_selfpid(), proc_best_name(current_proc()), 
7586                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
7587                     SOCK_DOM(so
), SOCK_TYPE(so
)); 
7588                 if (net_uptime() - so
->so_extended_bk_start 
> 
7589                     soextbkidlestat
.so_xbkidle_time
) { 
7590                         so_stop_extended_bk_idle(so
); 
7592                         OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_expired
); 
7596                         struct inpcb 
*inp 
= (struct inpcb 
*)so
->so_pcb
; 
7598                         inpcb_timer_sched(inp
->inp_pcbinfo
, INPCB_TIMER_LAZY
); 
7599                         OSIncrementAtomic(&soextbkidlestat
.so_xbkidle_resched
); 
7607 resume_proc_sockets(proc_t p
) 
7609         if (p
->p_ladvflag 
& P_LXBKIDLEINPROG
) { 
7610                 struct fileproc 
*fp
; 
7614                 fdt_foreach(fp
, p
) { 
7615                         if (FILEGLOB_DTYPE(fp
->fp_glob
) != DTYPE_SOCKET
) { 
7619                         so 
= (struct socket 
*)fp
->fp_glob
->fg_data
; 
7620                         (void) soresume(p
, so
, 0); 
7624                 OSBitAndAtomic(~P_LXBKIDLEINPROG
, &p
->p_ladvflag
); 
7628 __private_extern__ 
int 
7629 so_set_recv_anyif(struct socket 
*so
, int optval
) 
7633         if (SOCK_DOM(so
) == PF_INET 
|| SOCK_DOM(so
) == PF_INET6
) { 
7635                         sotoinpcb(so
)->inp_flags 
|= INP_RECV_ANYIF
; 
7637                         sotoinpcb(so
)->inp_flags 
&= ~INP_RECV_ANYIF
; 
7645 __private_extern__ 
int 
7646 so_get_recv_anyif(struct socket 
*so
) 
7650         if (SOCK_DOM(so
) == PF_INET 
|| SOCK_DOM(so
) == PF_INET6
) { 
7651                 ret 
= (sotoinpcb(so
)->inp_flags 
& INP_RECV_ANYIF
) ? 1 : 0; 
7658 so_set_restrictions(struct socket 
*so
, uint32_t vals
) 
7660         int nocell_old
, nocell_new
; 
7661         int noexpensive_old
, noexpensive_new
; 
7662         int noconstrained_old
, noconstrained_new
; 
7665          * Deny-type restrictions are trapdoors; once set they cannot be 
7666          * unset for the lifetime of the socket.  This allows them to be 
7667          * issued by a framework on behalf of the application without 
7668          * having to worry that they can be undone. 
7670          * Note here that socket-level restrictions overrides any protocol 
7671          * level restrictions.  For instance, SO_RESTRICT_DENY_CELLULAR 
7672          * socket restriction issued on the socket has a higher precendence 
7673          * than INP_NO_IFT_CELLULAR.  The latter is affected by the UUID 
7674          * policy PROC_UUID_NO_CELLULAR for unrestricted sockets only, 
7675          * i.e. when SO_RESTRICT_DENY_CELLULAR has not been issued. 
7677         nocell_old 
= (so
->so_restrictions 
& SO_RESTRICT_DENY_CELLULAR
); 
7678         noexpensive_old 
= (so
->so_restrictions 
& SO_RESTRICT_DENY_EXPENSIVE
); 
7679         noconstrained_old 
= (so
->so_restrictions 
& SO_RESTRICT_DENY_CONSTRAINED
); 
7680         so
->so_restrictions 
|= (vals 
& (SO_RESTRICT_DENY_IN 
| 
7681             SO_RESTRICT_DENY_OUT 
| SO_RESTRICT_DENY_CELLULAR 
| 
7682             SO_RESTRICT_DENY_EXPENSIVE 
| SO_RESTRICT_DENY_CONSTRAINED
)); 
7683         nocell_new 
= (so
->so_restrictions 
& SO_RESTRICT_DENY_CELLULAR
); 
7684         noexpensive_new 
= (so
->so_restrictions 
& SO_RESTRICT_DENY_EXPENSIVE
); 
7685         noconstrained_new 
= (so
->so_restrictions 
& SO_RESTRICT_DENY_CONSTRAINED
); 
7687         /* we can only set, not clear restrictions */ 
7688         if ((nocell_new 
- nocell_old
) == 0 && 
7689             (noexpensive_new 
- noexpensive_old
) == 0 && 
7690             (noconstrained_new 
- noconstrained_old
) == 0) { 
7693         if (SOCK_DOM(so
) == PF_INET 
|| SOCK_DOM(so
) == PF_INET6
) { 
7694                 if (nocell_new 
- nocell_old 
!= 0) { 
7696                          * if deny cellular is now set, do what's needed 
7699                         inp_set_nocellular(sotoinpcb(so
)); 
7701                 if (noexpensive_new 
- noexpensive_old 
!= 0) { 
7702                         inp_set_noexpensive(sotoinpcb(so
)); 
7704                 if (noconstrained_new 
- noconstrained_old 
!= 0) { 
7705                         inp_set_noconstrained(sotoinpcb(so
)); 
7709         if (SOCK_DOM(so
) == PF_MULTIPATH
) { 
7710                 mptcp_set_restrictions(so
); 
7717 so_get_restrictions(struct socket 
*so
) 
7719         return so
->so_restrictions 
& (SO_RESTRICT_DENY_IN 
| 
7720                SO_RESTRICT_DENY_OUT 
| 
7721                SO_RESTRICT_DENY_CELLULAR 
| SO_RESTRICT_DENY_EXPENSIVE
); 
7725 so_set_effective_pid(struct socket 
*so
, int epid
, struct proc 
*p
, boolean_t check_cred
) 
7727         struct proc 
*ep 
= PROC_NULL
; 
7730         /* pid 0 is reserved for kernel */ 
7737          * If this is an in-kernel socket, prevent its delegate 
7738          * association from changing unless the socket option is 
7739          * coming from within the kernel itself. 
7741         if (so
->last_pid 
== 0 && p 
!= kernproc
) { 
7747          * If this is issued by a process that's recorded as the 
7748          * real owner of the socket, or if the pid is the same as 
7749          * the process's own pid, then proceed.  Otherwise ensure 
7750          * that the issuing process has the necessary privileges. 
7752         if (check_cred 
&& (epid 
!= so
->last_pid 
|| epid 
!= proc_pid(p
))) { 
7753                 if ((error 
= priv_check_cred(kauth_cred_get(), 
7754                     PRIV_NET_PRIVILEGED_SOCKET_DELEGATE
, 0))) { 
7760         /* Find the process that corresponds to the effective pid */ 
7761         if ((ep 
= proc_find(epid
)) == PROC_NULL
) { 
7767          * If a process tries to delegate the socket to itself, then 
7768          * there's really nothing to do; treat it as a way for the 
7769          * delegate association to be cleared.  Note that we check 
7770          * the passed-in proc rather than calling proc_selfpid(), 
7771          * as we need to check the process issuing the socket option 
7772          * which could be kernproc.  Given that we don't allow 0 for 
7773          * effective pid, it means that a delegated in-kernel socket 
7774          * stays delegated during its lifetime (which is probably OK.) 
7776         if (epid 
== proc_pid(p
)) { 
7777                 so
->so_flags 
&= ~SOF_DELEGATED
; 
7780                 uuid_clear(so
->e_uuid
); 
7782                 so
->so_flags 
|= SOF_DELEGATED
; 
7783                 so
->e_upid 
= proc_uniqueid(ep
); 
7784                 so
->e_pid 
= proc_pid(ep
); 
7785                 proc_getexecutableuuid(ep
, so
->e_uuid
, sizeof(so
->e_uuid
)); 
7787 #if defined(XNU_TARGET_OS_OSX) 
7788                 if (ep
->p_responsible_pid 
!= so
->e_pid
) { 
7789                         proc_t rp 
= proc_find(ep
->p_responsible_pid
); 
7790                         if (rp 
!= PROC_NULL
) { 
7791                                 proc_getexecutableuuid(rp
, so
->so_ruuid
, sizeof(so
->so_ruuid
)); 
7792                                 so
->so_rpid 
= ep
->p_responsible_pid
; 
7795                                 uuid_clear(so
->so_ruuid
); 
7801         if (so
->so_proto 
!= NULL 
&& so
->so_proto
->pr_update_last_owner 
!= NULL
) { 
7802                 (*so
->so_proto
->pr_update_last_owner
)(so
, NULL
, ep
); 
7805         if (error 
== 0 && net_io_policy_log
) { 
7808                 uuid_unparse(so
->e_uuid
, buf
); 
7809                 log(LOG_DEBUG
, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d (%s) " 
7810                     "euuid %s%s\n", __func__
, proc_name_address(p
), 
7811                     proc_pid(p
), (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
7812                     SOCK_DOM(so
), SOCK_TYPE(so
), 
7813                     so
->e_pid
, proc_name_address(ep
), buf
, 
7814                     ((so
->so_flags 
& SOF_DELEGATED
) ? " [delegated]" : "")); 
7815         } else if (error 
!= 0 && net_io_policy_log
) { 
7816                 log(LOG_ERR
, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d (%s) " 
7817                     "ERROR (%d)\n", __func__
, proc_name_address(p
), 
7818                     proc_pid(p
), (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), 
7819                     SOCK_DOM(so
), SOCK_TYPE(so
), 
7820                     epid
, (ep 
== PROC_NULL
) ? "PROC_NULL" : 
7821                     proc_name_address(ep
), error
); 
7824         /* Update this socket's policy upon success */ 
7826                 so
->so_policy_gencnt 
*= -1; 
7827                 so_update_policy(so
); 
7829                 so_update_necp_policy(so
, NULL
, NULL
); 
7833         if (ep 
!= PROC_NULL
) { 
7841 so_set_effective_uuid(struct socket 
*so
, uuid_t euuid
, struct proc 
*p
, boolean_t check_cred
) 
7847         /* UUID must not be all-zeroes (reserved for kernel) */ 
7848         if (uuid_is_null(euuid
)) { 
7854          * If this is an in-kernel socket, prevent its delegate 
7855          * association from changing unless the socket option is 
7856          * coming from within the kernel itself. 
7858         if (so
->last_pid 
== 0 && p 
!= kernproc
) { 
7863         /* Get the UUID of the issuing process */ 
7864         proc_getexecutableuuid(p
, uuid
, sizeof(uuid
)); 
7867          * If this is issued by a process that's recorded as the 
7868          * real owner of the socket, or if the uuid is the same as 
7869          * the process's own uuid, then proceed.  Otherwise ensure 
7870          * that the issuing process has the necessary privileges. 
7873             (uuid_compare(euuid
, so
->last_uuid
) != 0 || 
7874             uuid_compare(euuid
, uuid
) != 0)) { 
7875                 if ((error 
= priv_check_cred(kauth_cred_get(), 
7876                     PRIV_NET_PRIVILEGED_SOCKET_DELEGATE
, 0))) { 
7883          * If a process tries to delegate the socket to itself, then 
7884          * there's really nothing to do; treat it as a way for the 
7885          * delegate association to be cleared.  Note that we check 
7886          * the uuid of the passed-in proc rather than that of the 
7887          * current process, as we need to check the process issuing 
7888          * the socket option which could be kernproc itself.  Given 
7889          * that we don't allow 0 for effective uuid, it means that 
7890          * a delegated in-kernel socket stays delegated during its 
7891          * lifetime (which is okay.) 
7893         if (uuid_compare(euuid
, uuid
) == 0) { 
7894                 so
->so_flags 
&= ~SOF_DELEGATED
; 
7897                 uuid_clear(so
->e_uuid
); 
7899                 so
->so_flags 
|= SOF_DELEGATED
; 
7901                  * Unlike so_set_effective_pid(), we only have the UUID 
7902                  * here and the process ID is not known.  Inherit the 
7903                  * real {pid,upid} of the socket. 
7905                 so
->e_upid 
= so
->last_upid
; 
7906                 so
->e_pid 
= so
->last_pid
; 
7907                 uuid_copy(so
->e_uuid
, euuid
); 
7910          * The following will clear the effective process name as it's the same 
7911          * as the real process 
7913         if (so
->so_proto 
!= NULL 
&& so
->so_proto
->pr_update_last_owner 
!= NULL
) { 
7914                 (*so
->so_proto
->pr_update_last_owner
)(so
, NULL
, NULL
); 
7917         if (error 
== 0 && net_io_policy_log
) { 
7918                 uuid_unparse(so
->e_uuid
, buf
); 
7919                 log(LOG_DEBUG
, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d " 
7920                     "euuid %s%s\n", __func__
, proc_name_address(p
), proc_pid(p
), 
7921                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), SOCK_DOM(so
), 
7922                     SOCK_TYPE(so
), so
->e_pid
, buf
, 
7923                     ((so
->so_flags 
& SOF_DELEGATED
) ? " [delegated]" : "")); 
7924         } else if (error 
!= 0 && net_io_policy_log
) { 
7925                 uuid_unparse(euuid
, buf
); 
7926                 log(LOG_DEBUG
, "%s[%s,%d]: so 0x%llx [%d,%d] euuid %s " 
7927                     "ERROR (%d)\n", __func__
, proc_name_address(p
), proc_pid(p
), 
7928                     (uint64_t)DEBUG_KERNEL_ADDRPERM(so
), SOCK_DOM(so
), 
7929                     SOCK_TYPE(so
), buf
, error
); 
7932         /* Update this socket's policy upon success */ 
7934                 so
->so_policy_gencnt 
*= -1; 
7935                 so_update_policy(so
); 
7937                 so_update_necp_policy(so
, NULL
, NULL
); 
7945 netpolicy_post_msg(uint32_t ev_code
, struct netpolicy_event_data 
*ev_data
, 
7946     uint32_t ev_datalen
) 
7948         struct kev_msg ev_msg
; 
7951          * A netpolicy event always starts with a netpolicy_event_data 
7952          * structure, but the caller can provide for a longer event 
7953          * structure to post, depending on the event code. 
7955         VERIFY(ev_data 
!= NULL 
&& ev_datalen 
>= sizeof(*ev_data
)); 
7957         bzero(&ev_msg
, sizeof(ev_msg
)); 
7958         ev_msg
.vendor_code      
= KEV_VENDOR_APPLE
; 
7959         ev_msg
.kev_class        
= KEV_NETWORK_CLASS
; 
7960         ev_msg
.kev_subclass     
= KEV_NETPOLICY_SUBCLASS
; 
7961         ev_msg
.event_code       
= ev_code
; 
7963         ev_msg
.dv
[0].data_ptr   
= ev_data
; 
7964         ev_msg
.dv
[0].data_length 
= ev_datalen
; 
7966         kev_post_msg(&ev_msg
); 
7970 socket_post_kev_msg(uint32_t ev_code
, 
7971     struct kev_socket_event_data 
*ev_data
, 
7972     uint32_t ev_datalen
) 
7974         struct kev_msg ev_msg
; 
7976         bzero(&ev_msg
, sizeof(ev_msg
)); 
7977         ev_msg
.vendor_code 
= KEV_VENDOR_APPLE
; 
7978         ev_msg
.kev_class 
= KEV_NETWORK_CLASS
; 
7979         ev_msg
.kev_subclass 
= KEV_SOCKET_SUBCLASS
; 
7980         ev_msg
.event_code 
= ev_code
; 
7982         ev_msg
.dv
[0].data_ptr 
= ev_data
; 
7983         ev_msg
.dv
[0].data_length 
= ev_datalen
; 
7985         kev_post_msg(&ev_msg
); 
7989 socket_post_kev_msg_closed(struct socket 
*so
) 
7991         struct kev_socket_closed ev 
= {}; 
7992         struct sockaddr 
*socksa 
= NULL
, *peersa 
= NULL
; 
7995         if ((so
->so_flags1 
& SOF1_WANT_KEV_SOCK_CLOSED
) == 0) { 
7998         err 
= (*so
->so_proto
->pr_usrreqs
->pru_sockaddr
)(so
, &socksa
); 
8000                 err 
= (*so
->so_proto
->pr_usrreqs
->pru_peeraddr
)(so
, 
8003                         memcpy(&ev
.ev_data
.kev_sockname
, socksa
, 
8005                             sizeof(ev
.ev_data
.kev_sockname
))); 
8006                         memcpy(&ev
.ev_data
.kev_peername
, peersa
, 
8008                             sizeof(ev
.ev_data
.kev_peername
))); 
8009                         socket_post_kev_msg(KEV_SOCKET_CLOSED
, 
8010                             &ev
.ev_data
, sizeof(ev
)); 
8013         if (socksa 
!= NULL
) { 
8014                 FREE(socksa
, M_SONAME
); 
8016         if (peersa 
!= NULL
) { 
8017                 FREE(peersa
, M_SONAME
);