2  * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. 
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 
   6  * This file contains Original Code and/or Modifications of Original Code 
   7  * as defined in and that are subject to the Apple Public Source License 
   8  * Version 2.0 (the 'License'). You may not use this file except in 
   9  * compliance with the License. The rights granted to you under the License 
  10  * may not be used to create, or enable the creation or redistribution of, 
  11  * unlawful or unlicensed copies of an Apple operating system, or to 
  12  * circumvent, violate, or enable the circumvention or violation of, any 
  13  * terms of an Apple operating system software license agreement. 
  15  * Please obtain a copy of the License at 
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file. 
  18  * The Original Code and all software distributed under the License are 
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
  23  * Please see the License for the specific language governing rights and 
  24  * limitations under the License. 
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 
  28 /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */ 
  29 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ 
  31  * Copyright (c) 1982, 1986, 1988, 1990, 1993 
  32  *      The Regents of the University of California.  All rights reserved. 
  34  * Redistribution and use in source and binary forms, with or without 
  35  * modification, are permitted provided that the following conditions 
  37  * 1. Redistributions of source code must retain the above copyright 
  38  *    notice, this list of conditions and the following disclaimer. 
  39  * 2. Redistributions in binary form must reproduce the above copyright 
  40  *    notice, this list of conditions and the following disclaimer in the 
  41  *    documentation and/or other materials provided with the distribution. 
  42  * 3. All advertising materials mentioning features or use of this software 
  43  *    must display the following acknowledgement: 
  44  *      This product includes software developed by the University of 
  45  *      California, Berkeley and its contributors. 
  46  * 4. Neither the name of the University nor the names of its contributors 
  47  *    may be used to endorse or promote products derived from this software 
  48  *    without specific prior written permission. 
  50  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 
  51  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  52  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  53  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 
  54  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
  55  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 
  56  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
  57  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
  58  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
  59  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
  62  *      @(#)uipc_socket.c       8.3 (Berkeley) 4/15/94 
  63  * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.16 2001/06/14 20:46:06 ume Exp $ 
  66 #include <sys/param.h> 
  67 #include <sys/systm.h> 
  68 #include <sys/filedesc.h> 
  69 #include <sys/proc_internal.h> 
  70 #include <sys/kauth.h> 
  71 #include <sys/file_internal.h> 
  72 #include <sys/fcntl.h> 
  73 #include <sys/malloc.h> 
  75 #include <sys/domain.h> 
  76 #include <sys/kernel.h> 
  77 #include <sys/event.h> 
  79 #include <sys/protosw.h> 
  80 #include <sys/socket.h> 
  81 #include <sys/socketvar.h> 
  82 #include <sys/resourcevar.h> 
  83 #include <sys/signalvar.h> 
  84 #include <sys/sysctl.h> 
  87 #include <sys/kdebug.h> 
  88 #include <net/route.h> 
  89 #include <netinet/in.h> 
  90 #include <netinet/in_pcb.h> 
  91 #include <kern/zalloc.h> 
  92 #include <kern/locks.h> 
  93 #include <machine/limits.h> 
  96 int                     so_cache_timeouts 
= 0; 
  97 int                     so_cache_max_freed 
= 0; 
  98 int                     cached_sock_count 
= 0; 
  99 struct socket           
*socket_cache_head 
= 0; 
 100 struct socket           
*socket_cache_tail 
= 0; 
 101 u_long                  so_cache_time 
= 0; 
 102 int                     so_cache_init_done 
= 0; 
 103 struct zone             
*so_cache_zone
; 
 104 extern int              get_inpcb_str_size(); 
 105 extern int              get_tcp_str_size(); 
 107 static lck_grp_t                
*so_cache_mtx_grp
; 
 108 static lck_attr_t               
*so_cache_mtx_attr
; 
 109 static lck_grp_attr_t   
*so_cache_mtx_grp_attr
; 
 110 lck_mtx_t                               
*so_cache_mtx
; 
 112 #include <machine/limits.h> 
 114 static void     filt_sordetach(struct knote 
*kn
); 
 115 static int      filt_soread(struct knote 
*kn
, long hint
); 
 116 static void     filt_sowdetach(struct knote 
*kn
); 
 117 static int      filt_sowrite(struct knote 
*kn
, long hint
); 
 118 static int      filt_solisten(struct knote 
*kn
, long hint
); 
 120 static struct filterops solisten_filtops 
= 
 121   { 1, NULL
, filt_sordetach
, filt_solisten 
}; 
 122 static struct filterops soread_filtops 
= 
 123   { 1, NULL
, filt_sordetach
, filt_soread 
}; 
 124 static struct filterops sowrite_filtops 
= 
 125   { 1, NULL
, filt_sowdetach
, filt_sowrite 
}; 
 127 #define EVEN_MORE_LOCKING_DEBUG 0 
 128 int socket_debug 
= 0; 
 129 int socket_zone 
= M_SOCKET
; 
 130 so_gen_t        so_gencnt
;      /* generation count for sockets */ 
 132 MALLOC_DEFINE(M_SONAME
, "soname", "socket name"); 
 133 MALLOC_DEFINE(M_PCB
, "pcb", "protocol control block"); 
 135 #define DBG_LAYER_IN_BEG        NETDBG_CODE(DBG_NETSOCK, 0) 
 136 #define DBG_LAYER_IN_END        NETDBG_CODE(DBG_NETSOCK, 2) 
 137 #define DBG_LAYER_OUT_BEG       NETDBG_CODE(DBG_NETSOCK, 1) 
 138 #define DBG_LAYER_OUT_END       NETDBG_CODE(DBG_NETSOCK, 3) 
 139 #define DBG_FNC_SOSEND          NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1) 
 140 #define DBG_FNC_SORECEIVE       NETDBG_CODE(DBG_NETSOCK, (8 << 8)) 
 141 #define DBG_FNC_SOSHUTDOWN      NETDBG_CODE(DBG_NETSOCK, (9 << 8)) 
 143 #define MAX_SOOPTGETM_SIZE      (128 * MCLBYTES) 
 146 SYSCTL_DECL(_kern_ipc
); 
 148 static int somaxconn 
= SOMAXCONN
; 
 149 SYSCTL_INT(_kern_ipc
, KIPC_SOMAXCONN
, somaxconn
, CTLFLAG_RW
, &somaxconn
, 
 152 /* Should we get a maximum also ??? */ 
 153 static int sosendmaxchain 
= 65536; 
 154 static int sosendminchain 
= 16384; 
 155 static int sorecvmincopy  
= 16384; 
 156 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sosendminchain
, CTLFLAG_RW
, &sosendminchain
, 
 158 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sorecvmincopy
, CTLFLAG_RW
, &sorecvmincopy
, 
 161 void  so_cache_timer(); 
 164  * Socket operation routines. 
 165  * These routines are called by the routines in 
 166  * sys_socket.c or from a system process, and 
 167  * implement the semantics of socket operations by 
 168  * switching out to the protocol specific routines. 
 173 vm_size_t       so_cache_zone_element_size
; 
 175 static int sodelayed_copy(struct socket 
*so
, struct uio 
*uio
, struct mbuf 
**free_list
, int *resid
); 
 182         if (so_cache_init_done
) { 
 183                 printf("socketinit: already called...\n"); 
 188          * allocate lock group attribute and group for socket cache mutex 
 190         so_cache_mtx_grp_attr 
= lck_grp_attr_alloc_init(); 
 191         lck_grp_attr_setdefault(so_cache_mtx_grp_attr
); 
 193         so_cache_mtx_grp 
= lck_grp_alloc_init("so_cache", so_cache_mtx_grp_attr
); 
 196          * allocate the lock attribute for socket cache mutex 
 198         so_cache_mtx_attr 
= lck_attr_alloc_init(); 
 199         lck_attr_setdefault(so_cache_mtx_attr
); 
 201     so_cache_init_done 
= 1; 
 203     so_cache_mtx 
= lck_mtx_alloc_init(so_cache_mtx_grp
, so_cache_mtx_attr
);     /* cached sockets mutex */ 
 205     if (so_cache_mtx 
== NULL
) 
 206                 return; /* we're hosed... */ 
 208     str_size 
= (vm_size_t
)( sizeof(struct socket
) + 4 + 
 209                             get_inpcb_str_size()  + 4 + 
 211     so_cache_zone 
= zinit (str_size
, 120000*str_size
, 8192, "socache zone"); 
 213     printf("cached_sock_alloc -- so_cache_zone size is %x\n", str_size
); 
 215     timeout(so_cache_timer
, NULL
, (SO_CACHE_FLUSH_INTERVAL 
* hz
)); 
 217     so_cache_zone_element_size 
= str_size
; 
 223 void   cached_sock_alloc(so
, waitok
) 
 229     register u_long  offset
; 
 232         lck_mtx_lock(so_cache_mtx
); 
 234     if (cached_sock_count
) { 
 236             *so 
= socket_cache_head
; 
 238                     panic("cached_sock_alloc: cached sock is null"); 
 240             socket_cache_head 
= socket_cache_head
->cache_next
; 
 241             if (socket_cache_head
) 
 242                     socket_cache_head
->cache_prev 
= 0; 
 244                     socket_cache_tail 
= 0; 
 246                 lck_mtx_unlock(so_cache_mtx
); 
 248             temp 
= (*so
)->so_saved_pcb
; 
 249             bzero((caddr_t
)*so
, sizeof(struct socket
)); 
 251             kprintf("cached_sock_alloc - retreiving cached sock %x - count == %d\n", *so
, 
 254             (*so
)->so_saved_pcb 
= temp
; 
 255             (*so
)->cached_in_sock_layer 
= 1; 
 260             kprintf("Allocating cached sock %x from memory\n", *so
); 
 263             lck_mtx_unlock(so_cache_mtx
); 
 266                  *so 
= (struct socket 
*) zalloc(so_cache_zone
); 
 268                  *so 
= (struct socket 
*) zalloc_noblock(so_cache_zone
); 
 273             bzero((caddr_t
)*so
, sizeof(struct socket
)); 
 276              * Define offsets for extra structures into our single block of 
 277              * memory. Align extra structures on longword boundaries. 
 281             offset 
= (u_long
) *so
; 
 282             offset 
+= sizeof(struct socket
); 
 285                 offset 
&= 0xfffffffc; 
 287             (*so
)->so_saved_pcb 
= (caddr_t
) offset
; 
 288             offset 
+= get_inpcb_str_size(); 
 291                 offset 
&= 0xfffffffc; 
 294             ((struct inpcb 
*) (*so
)->so_saved_pcb
)->inp_saved_ppcb 
= (caddr_t
) offset
; 
 296             kprintf("Allocating cached socket - %x, pcb=%x tcpcb=%x\n", *so
, 
 298                     ((struct inpcb 
*)(*so
)->so_saved_pcb
)->inp_saved_ppcb
); 
 302     (*so
)->cached_in_sock_layer 
= 1; 
 306 void cached_sock_free(so
)  
 310         lck_mtx_lock(so_cache_mtx
); 
 312         if (++cached_sock_count 
> MAX_CACHED_SOCKETS
) { 
 314                 lck_mtx_unlock(so_cache_mtx
); 
 316                 kprintf("Freeing overflowed cached socket %x\n", so
); 
 318                 zfree(so_cache_zone
, so
); 
 322                 kprintf("Freeing socket %x into cache\n", so
); 
 324                 if (so_cache_hw 
< cached_sock_count
) 
 325                         so_cache_hw 
= cached_sock_count
; 
 327                 so
->cache_next 
= socket_cache_head
; 
 329                 if (socket_cache_head
) 
 330                         socket_cache_head
->cache_prev 
= so
; 
 332                         socket_cache_tail 
= so
; 
 334                 so
->cache_timestamp 
= so_cache_time
; 
 335                 socket_cache_head 
= so
; 
 336                 lck_mtx_unlock(so_cache_mtx
); 
 340         kprintf("Freed cached sock %x into cache - count is %d\n", so
, cached_sock_count
); 
 347 void so_cache_timer() 
 349         register struct socket  
*p
; 
 350         register int            n_freed 
= 0; 
 353         lck_mtx_lock(so_cache_mtx
); 
 357         while ( (p 
= socket_cache_tail
) ) 
 359                 if ((so_cache_time 
- p
->cache_timestamp
) < SO_CACHE_TIME_LIMIT
) 
 364                 if ( (socket_cache_tail 
= p
->cache_prev
) ) 
 365                         p
->cache_prev
->cache_next 
= 0; 
 366                 if (--cached_sock_count 
== 0) 
 367                         socket_cache_head 
= 0; 
 370                 zfree(so_cache_zone
, p
); 
 372                 if (++n_freed 
>= SO_CACHE_MAX_FREE_BATCH
) 
 374                         so_cache_max_freed
++; 
 378         lck_mtx_unlock(so_cache_mtx
); 
 380         timeout(so_cache_timer
, NULL
, (SO_CACHE_FLUSH_INTERVAL 
* hz
)); 
 384 #endif /* __APPLE__ */ 
 387  * Get a socket structure from our zone, and initialize it. 
 388  * We don't implement `waitok' yet (see comments in uipc_domain.c). 
 389  * Note that it would probably be better to allocate socket 
 390  * and PCB at the same time, but I'm not convinced that all 
 391  * the protocols can be easily modified to do this. 
 394 soalloc(waitok
, dom
, type
) 
 401         if ((dom 
== PF_INET
) && (type 
== SOCK_STREAM
))  
 402             cached_sock_alloc(&so
, waitok
); 
 405              MALLOC_ZONE(so
, struct socket 
*, sizeof(*so
), socket_zone
, M_WAITOK
); 
 407                   bzero(so
, sizeof *so
); 
 409         /* XXX race condition for reentrant kernel */ 
 410 //###LD Atomic add for so_gencnt 
 412              so
->so_gencnt 
= ++so_gencnt
; 
 413              so
->so_zone 
= socket_zone
; 
 420 socreate(dom
, aso
, type
, proto
) 
 426         struct proc 
*p 
= current_proc(); 
 427         register struct protosw 
*prp
; 
 428         register struct socket 
*so
; 
 429         register int error 
= 0; 
 431         extern int tcpconsdebug
; 
 434                 prp 
= pffindproto(dom
, proto
, type
); 
 436                 prp 
= pffindtype(dom
, type
); 
 438         if (prp 
== 0 || prp
->pr_usrreqs
->pru_attach 
== 0) 
 439                 return (EPROTONOSUPPORT
); 
 442         if (p
->p_prison 
&& jail_socket_unixiproute_only 
&& 
 443             prp
->pr_domain
->dom_family 
!= PF_LOCAL 
&& 
 444             prp
->pr_domain
->dom_family 
!= PF_INET 
&& 
 445             prp
->pr_domain
->dom_family 
!= PF_ROUTE
) { 
 446                 return (EPROTONOSUPPORT
); 
 450         if (prp
->pr_type 
!= type
) 
 452         so 
= soalloc(p 
!= 0, dom
, type
); 
 456         TAILQ_INIT(&so
->so_incomp
); 
 457         TAILQ_INIT(&so
->so_comp
); 
 462                 so
->so_uid 
= kauth_cred_getuid(kauth_cred_get()); 
 463                 if (!suser(kauth_cred_get(),NULL
)) 
 464                         so
->so_state 
= SS_PRIV
; 
 467         so
->so_cred 
= kauth_cred_get_with_ref(); 
 471         so
->so_rcv
.sb_flags 
|= SB_RECV
; /* XXX */ 
 472         so
->so_rcv
.sb_so 
= so
->so_snd
.sb_so 
= so
; 
 475 //### Attachement will create the per pcb lock if necessary and increase refcount 
 476         so
->so_usecount
++;      /* for creation, make sure it's done before socket is inserted in lists */ 
 478         error 
= (*prp
->pr_usrreqs
->pru_attach
)(so
, proto
, p
); 
 482                  * If so_pcb is not zero, the socket will be leaked,  
 483                  * so protocol attachment handler must be coded carefuly  
 485                 so
->so_state 
|= SS_NOFDREF
; 
 487                 sofreelastref(so
, 1);   /* will deallocate the socket */ 
 491         prp
->pr_domain
->dom_refs
++; 
 492         TAILQ_INIT(&so
->so_evlist
); 
 494         /* Attach socket filters for this protocol */ 
 497         if (tcpconsdebug 
== 2) 
 498                 so
->so_options 
|= SO_DEBUG
; 
 509         struct sockaddr 
*nam
; 
 512         struct proc 
*p 
= current_proc(); 
 514         struct socket_filter_entry      
*filter
; 
 521         for (filter 
= so
->so_filt
; filter 
&& (error 
== 0); 
 522                  filter 
= filter
->sfe_next_onsocket
) { 
 523                 if (filter
->sfe_filter
->sf_filter
.sf_bind
) { 
 527                                 socket_unlock(so
, 0); 
 529                         error 
= filter
->sfe_filter
->sf_filter
.sf_bind( 
 530                                                 filter
->sfe_cookie
, so
, nam
); 
 537         /* End socket filter */ 
 540                 error 
= (*so
->so_proto
->pr_usrreqs
->pru_bind
)(so
, nam
, p
); 
 542         socket_unlock(so
, 1); 
 544         if (error 
== EJUSTRETURN
) 
 554         so
->so_gencnt 
= ++so_gencnt
; 
 557         if (so
->so_rcv
.sb_hiwat
) 
 558                 (void)chgsbsize(so
->so_cred
->cr_uidinfo
, 
 559                     &so
->so_rcv
.sb_hiwat
, 0, RLIM_INFINITY
); 
 560         if (so
->so_snd
.sb_hiwat
) 
 561                 (void)chgsbsize(so
->so_cred
->cr_uidinfo
, 
 562                     &so
->so_snd
.sb_hiwat
, 0, RLIM_INFINITY
); 
 564         if (so
->so_accf 
!= NULL
) { 
 565                 if (so
->so_accf
->so_accept_filter 
!= NULL 
&&  
 566                         so
->so_accf
->so_accept_filter
->accf_destroy 
!= NULL
) { 
 567                         so
->so_accf
->so_accept_filter
->accf_destroy(so
); 
 569                 if (so
->so_accf
->so_accept_filter_str 
!= NULL
) 
 570                         FREE(so
->so_accf
->so_accept_filter_str
, M_ACCF
); 
 571                 FREE(so
->so_accf
, M_ACCF
); 
 574         kauth_cred_rele(so
->so_cred
); 
 575         zfreei(so
->so_zone
, so
); 
 577         if (so
->cached_in_sock_layer 
== 1)  
 578              cached_sock_free(so
); 
 580              if (so
->cached_in_sock_layer 
== -1) 
 581                         panic("sodealloc: double dealloc: so=%x\n", so
); 
 582              so
->cached_in_sock_layer 
= -1; 
 583              FREE_ZONE(so
, sizeof(*so
), so
->so_zone
); 
 585 #endif /* __APPLE__ */ 
 589 solisten(so
, backlog
) 
 590         register struct socket 
*so
; 
 594         struct proc 
*p 
= current_proc(); 
 600                 struct socket_filter_entry      
*filter
; 
 603                 for (filter 
= so
->so_filt
; filter 
&& (error 
== 0); 
 604                          filter 
= filter
->sfe_next_onsocket
) { 
 605                         if (filter
->sfe_filter
->sf_filter
.sf_listen
) { 
 609                                         socket_unlock(so
, 0); 
 611                                 error 
= filter
->sfe_filter
->sf_filter
.sf_listen( 
 612                                                         filter
->sfe_cookie
, so
); 
 622                 error 
= (*so
->so_proto
->pr_usrreqs
->pru_listen
)(so
, p
); 
 626                 socket_unlock(so
, 1); 
 627                 if (error 
== EJUSTRETURN
) 
 632         if (TAILQ_EMPTY(&so
->so_comp
)) 
 633                 so
->so_options 
|= SO_ACCEPTCONN
; 
 634         if (backlog 
< 0 || backlog 
> somaxconn
) 
 636         so
->so_qlimit 
= backlog
; 
 638         socket_unlock(so
, 1); 
 643 sofreelastref(so
, dealloc
) 
 644         register struct socket 
*so
; 
 648         struct socket 
*head 
= so
->so_head
; 
 650         /*### Assume socket is locked */ 
 652         /* Remove any filters - may be called more than once */ 
 655         if ((!(so
->so_flags 
& SOF_PCBCLEARING
)) || ((so
->so_state 
& SS_NOFDREF
) == 0)) { 
 657                 selthreadclear(&so
->so_snd
.sb_sel
); 
 658                 selthreadclear(&so
->so_rcv
.sb_sel
); 
 659                 so
->so_rcv
.sb_flags 
&= ~SB_UPCALL
; 
 660                 so
->so_snd
.sb_flags 
&= ~SB_UPCALL
; 
 665                 socket_lock(head
, 1); 
 666                 if (so
->so_state 
& SS_INCOMP
) { 
 667                         TAILQ_REMOVE(&head
->so_incomp
, so
, so_list
); 
 669                 } else if (so
->so_state 
& SS_COMP
) { 
 671                          * We must not decommission a socket that's 
 672                          * on the accept(2) queue.  If we do, then 
 673                          * accept(2) may hang after select(2) indicated 
 674                          * that the listening socket was ready. 
 677                         selthreadclear(&so
->so_snd
.sb_sel
); 
 678                         selthreadclear(&so
->so_rcv
.sb_sel
); 
 679                         so
->so_rcv
.sb_flags 
&= ~SB_UPCALL
; 
 680                         so
->so_snd
.sb_flags 
&= ~SB_UPCALL
; 
 682                         socket_unlock(head
, 1); 
 685                         panic("sofree: not queued"); 
 688                 so
->so_state 
&= ~SS_INCOMP
; 
 690                 socket_unlock(head
, 1); 
 693         selthreadclear(&so
->so_snd
.sb_sel
); 
 694         sbrelease(&so
->so_snd
); 
 698         /* 3932268: disable upcall */ 
 699         so
->so_rcv
.sb_flags 
&= ~SB_UPCALL
; 
 700         so
->so_snd
.sb_flags 
&= ~SB_UPCALL
; 
 707  * Close a socket on last file table reference removal. 
 708  * Initiate disconnect if connected. 
 709  * Free socket when disconnect complete. 
 713         register struct socket 
*so
; 
 716         lck_mtx_t 
* mutex_held
; 
 719         if (so
->so_usecount 
== 0) { 
 720                 panic("soclose: so=%x refcount=0\n", so
); 
 723         sflt_notify(so
, sock_evt_closing
, NULL
); 
 725         if ((so
->so_options 
& SO_ACCEPTCONN
)) { 
 728                 /* We do not want new connection to be added to the connection queues */ 
 729                 so
->so_options 
&= ~SO_ACCEPTCONN
; 
 731                 while ((sp 
= TAILQ_FIRST(&so
->so_incomp
)) != NULL
) { 
 732                         /* A bit tricky here. We need to keep 
 733                          * a lock if it's a protocol global lock 
 734                          * but we want the head, not the socket locked 
 735                          * in the case of per-socket lock... 
 737                         if (so
->so_proto
->pr_getlock 
!= NULL
) { 
 738                                 socket_unlock(so
, 0); 
 742                         if (so
->so_proto
->pr_getlock 
!= NULL
) { 
 743                                 socket_unlock(sp
, 1); 
 748                 while ((sp 
= TAILQ_FIRST(&so
->so_comp
)) != NULL
) { 
 749                         /* Dequeue from so_comp since sofree() won't do it */ 
 750                         TAILQ_REMOVE(&so
->so_comp
, sp
, so_list
);                         
 753                         if (so
->so_proto
->pr_getlock 
!= NULL
) { 
 754                                 socket_unlock(so
, 0); 
 758                         sp
->so_state 
&= ~SS_COMP
; 
 762                         if (so
->so_proto
->pr_getlock 
!= NULL
) { 
 763                                 socket_unlock(sp
, 1); 
 768         if (so
->so_pcb 
== 0) { 
 769                 /* 3915887: mark the socket as ready for dealloc */ 
 770                 so
->so_flags 
|= SOF_PCBCLEARING
; 
 773         if (so
->so_state 
& SS_ISCONNECTED
) { 
 774                 if ((so
->so_state 
& SS_ISDISCONNECTING
) == 0) { 
 775                         error 
= sodisconnectlocked(so
); 
 779                 if (so
->so_options 
& SO_LINGER
) { 
 780                         if ((so
->so_state 
& SS_ISDISCONNECTING
) && 
 781                             (so
->so_state 
& SS_NBIO
)) 
 783                         if (so
->so_proto
->pr_getlock 
!= NULL
)  
 784                                 mutex_held 
= (*so
->so_proto
->pr_getlock
)(so
, 0); 
 786                                 mutex_held 
= so
->so_proto
->pr_domain
->dom_mtx
; 
 787                         while (so
->so_state 
& SS_ISCONNECTED
) { 
 788                                 ts
.tv_sec 
= (so
->so_linger
/100); 
 789                                 ts
.tv_nsec 
= (so
->so_linger 
% 100) * NSEC_PER_USEC 
* 1000 * 10; 
 790                                 error 
= msleep((caddr_t
)&so
->so_timeo
, mutex_held
, 
 791                                     PSOCK 
| PCATCH
, "soclos", &ts
); 
 793                                         /* It's OK when the time fires, don't report an error */ 
 794                                         if (error 
== EWOULDBLOCK
) 
 802         if (so
->so_usecount 
== 0) 
 803                 panic("soclose: usecount is zero so=%x\n", so
); 
 804         if (so
->so_pcb 
&& !(so
->so_flags 
& SOF_PCBCLEARING
)) { 
 805                 int error2 
= (*so
->so_proto
->pr_usrreqs
->pru_detach
)(so
); 
 809         if (so
->so_usecount 
<= 0) 
 810                 panic("soclose: usecount is zero so=%x\n", so
); 
 812         if (so
->so_pcb 
&& so
->so_state 
& SS_NOFDREF
) 
 813                 panic("soclose: NOFDREF"); 
 814         so
->so_state 
|= SS_NOFDREF
; 
 816         so
->so_proto
->pr_domain
->dom_refs
--; 
 826         register struct socket 
*so
; 
 830         if (so
->so_retaincnt 
== 0) 
 831                 error 
= soclose_locked(so
); 
 832         else {  /* if the FD is going away, but socket is retained in kernel remove its reference */ 
 834                 if (so
->so_usecount 
< 2) 
 835                         panic("soclose: retaincnt non null and so=%x usecount=%x\n", so
->so_usecount
); 
 837         socket_unlock(so
, 1); 
 843  * Must be called at splnet... 
 845 //#### Should already be locked 
 852 #ifdef MORE_LOCKING_DEBUG 
 853         lck_mtx_t 
* mutex_held
; 
 855         if (so
->so_proto
->pr_getlock 
!= NULL
)  
 856                 mutex_held 
= (*so
->so_proto
->pr_getlock
)(so
, 0); 
 858                 mutex_held 
= so
->so_proto
->pr_domain
->dom_mtx
; 
 859         lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
); 
 862         error 
= (*so
->so_proto
->pr_usrreqs
->pru_abort
)(so
); 
 871 soacceptlock(so
, nam
, dolock
) 
 872         register struct socket 
*so
; 
 873         struct sockaddr 
**nam
; 
 878         if (dolock
) socket_lock(so
, 1); 
 880         if ((so
->so_state 
& SS_NOFDREF
) == 0) 
 881                 panic("soaccept: !NOFDREF"); 
 882         so
->so_state 
&= ~SS_NOFDREF
; 
 883         error 
= (*so
->so_proto
->pr_usrreqs
->pru_accept
)(so
, nam
); 
 885         if (dolock
) socket_unlock(so
, 1); 
 890         register struct socket 
*so
; 
 891         struct sockaddr 
**nam
; 
 893         return (soacceptlock(so
, nam
, 1)); 
 897 soconnectlock(so
, nam
, dolock
) 
 898         register struct socket 
*so
; 
 899         struct sockaddr 
*nam
; 
 905         struct proc 
*p 
= current_proc(); 
 907         if (dolock
) socket_lock(so
, 1); 
 909         if (so
->so_options 
& SO_ACCEPTCONN
) { 
 910                 if (dolock
) socket_unlock(so
, 1); 
 914          * If protocol is connection-based, can only connect once. 
 915          * Otherwise, if connected, try to disconnect first. 
 916          * This allows user to disconnect by connecting to, e.g., 
 919         if (so
->so_state 
& (SS_ISCONNECTED
|SS_ISCONNECTING
) && 
 920             ((so
->so_proto
->pr_flags 
& PR_CONNREQUIRED
) || 
 921             (error 
= sodisconnectlocked(so
)))) 
 925                  * Run connect filter before calling protocol: 
 926                  *  - non-blocking connect returns before completion; 
 929                         struct socket_filter_entry      
*filter
; 
 932                         for (filter 
= so
->so_filt
; filter 
&& (error 
== 0); 
 933                                  filter 
= filter
->sfe_next_onsocket
) { 
 934                                 if (filter
->sfe_filter
->sf_filter
.sf_connect_out
) { 
 938                                                 socket_unlock(so
, 0); 
 940                                         error 
= filter
->sfe_filter
->sf_filter
.sf_connect_out( 
 941                                                                 filter
->sfe_cookie
, so
, nam
); 
 950                         if (error 
== EJUSTRETURN
) 
 952                         if (dolock
) socket_unlock(so
, 1); 
 956                 error 
= (*so
->so_proto
->pr_usrreqs
->pru_connect
)(so
, nam
, p
); 
 958         if (dolock
) socket_unlock(so
, 1); 
 964         register struct socket 
*so
; 
 965         struct sockaddr 
*nam
; 
 967         return (soconnectlock(so
, nam
, 1)); 
 972         register struct socket 
*so1
; 
 976 //####### Assumes so1 is already locked / 
 980         error 
= (*so1
->so_proto
->pr_usrreqs
->pru_connect2
)(so1
, so2
); 
 982         socket_unlock(so2
, 1); 
 988 sodisconnectlocked(so
) 
 989         register struct socket 
*so
; 
 993         if ((so
->so_state 
& SS_ISCONNECTED
) == 0) { 
 997         if (so
->so_state 
& SS_ISDISCONNECTING
) { 
1002         error 
= (*so
->so_proto
->pr_usrreqs
->pru_disconnect
)(so
); 
1005                 sflt_notify(so
, sock_evt_disconnected
, NULL
); 
1011 //### Locking version 
1014         register struct socket 
*so
; 
1019         error 
= sodisconnectlocked(so
); 
1020         socket_unlock(so
, 1); 
1024 #define SBLOCKWAIT(f)   (((f) & MSG_DONTWAIT) ? M_DONTWAIT : M_WAIT) 
1027  * sosendcheck will lock the socket buffer if it isn't locked and 
1028  * verify that there is space for the data being inserted. 
1034         struct sockaddr 
*addr
, 
1046         if (*sblocked 
== 0) { 
1047                 if ((so
->so_snd
.sb_flags 
& SB_LOCK
) != 0 && 
1048                         so
->so_send_filt_thread 
!= 0 && 
1049                         so
->so_send_filt_thread 
== current_thread()) { 
1051                          * We're being called recursively from a filter, 
1052                          * allow this to continue. Radar 4150520. 
1053                          * Don't set sblocked because we don't want 
1054                          * to perform an unlock later. 
1059                         error 
= sblock(&so
->so_snd
, SBLOCKWAIT(flags
)); 
1067         if (so
->so_state 
& SS_CANTSENDMORE
)  
1071                 error 
= so
->so_error
; 
1076         if ((so
->so_state 
& SS_ISCONNECTED
) == 0) { 
1078                  * `sendto' and `sendmsg' is allowed on a connection- 
1079                  * based socket if it supports implied connect. 
1080                  * Return ENOTCONN if not connected and no address is 
1083                 if ((so
->so_proto
->pr_flags 
& PR_CONNREQUIRED
) && 
1084                         (so
->so_proto
->pr_flags 
& PR_IMPLOPCL
) == 0) { 
1085                         if ((so
->so_state 
& SS_ISCONFIRMING
) == 0 && 
1086                                 !(resid 
== 0 && clen 
!= 0)) 
1088                 } else if (addr 
== 0 && !(flags
&MSG_HOLD
)) 
1089                         return (so
->so_proto
->pr_flags 
& PR_CONNREQUIRED
) ? ENOTCONN 
: EDESTADDRREQ
; 
1091         space 
= sbspace(&so
->so_snd
); 
1092         if (flags 
& MSG_OOB
) 
1094         if ((atomic 
&& resid 
> so
->so_snd
.sb_hiwat
) || 
1095                 clen 
> so
->so_snd
.sb_hiwat
) 
1097         if (space 
< resid 
+ clen 
&&  
1098                 (atomic 
|| space 
< so
->so_snd
.sb_lowat 
|| space 
< clen
)) { 
1099                 if ((so
->so_state 
& SS_NBIO
) || (flags 
& MSG_NBIO
) || assumelock
) { 
1102                 sbunlock(&so
->so_snd
, 1); 
1103                 error 
= sbwait(&so
->so_snd
); 
1115  * If send must go all at once and message is larger than 
1116  * send buffering, then hard error. 
1117  * Lock against other senders. 
1118  * If must go all at once and not enough room now, then 
1119  * inform user that this would block and do nothing. 
1120  * Otherwise, if nonblocking, send as much as possible. 
1121  * The data to be sent is described by "uio" if nonzero, 
1122  * otherwise by the mbuf chain "top" (which must be null 
1123  * if uio is not).  Data provided in mbuf chain must be small 
1124  * enough to send all at once. 
1126  * Returns nonzero on error, timeout or signal; callers 
1127  * must check for short counts if EINTR/ERESTART are returned. 
1128  * Data and control buffers are freed on return. 
1130  * MSG_HOLD: go thru most of sosend(), but just enqueue the mbuf 
1131  * MSG_SEND: go thru as for MSG_HOLD on current fragment, then 
1132  *  point at the mbuf chain being constructed and go from there. 
1135 sosend(so
, addr
, uio
, top
, control
, flags
) 
1136         register struct socket 
*so
; 
1137         struct sockaddr 
*addr
; 
1140         struct mbuf 
*control
; 
1145         register struct mbuf 
*m
, *freelist 
= NULL
; 
1146         register long space
, len
, resid
; 
1147         int clen 
= 0, error
, dontroute
, mlen
, sendflags
; 
1148         int atomic 
= sosendallatonce(so
) || top
; 
1150         struct proc 
*p 
= current_proc(); 
1153                 // LP64todo - fix this! 
1154                 resid 
= uio_resid(uio
); 
1156                 resid 
= top
->m_pkthdr
.len
; 
1158         KERNEL_DEBUG((DBG_FNC_SOSEND 
| DBG_FUNC_START
), 
1162                      so
->so_snd
.sb_lowat
, 
1163                      so
->so_snd
.sb_hiwat
); 
1168          * In theory resid should be unsigned. 
1169          * However, space must be signed, as it might be less than 0 
1170          * if we over-committed, and we must use a signed comparison 
1171          * of space and resid.  On the other hand, a negative resid 
1172          * causes us to loop sending 0-length segments to the protocol. 
1174          * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM 
1175          * type sockets since that's an error. 
1177         if (resid 
< 0 || (so
->so_type 
== SOCK_STREAM 
&& (flags 
& MSG_EOR
))) { 
1179                 socket_unlock(so
, 1); 
1184             (flags 
& MSG_DONTROUTE
) && (so
->so_options 
& SO_DONTROUTE
) == 0 && 
1185             (so
->so_proto
->pr_flags 
& PR_ATOMIC
); 
1187                 p
->p_stats
->p_ru
.ru_msgsnd
++; 
1189                 clen 
= control
->m_len
; 
1192                 error 
= sosendcheck(so
, addr
, resid
, clen
, atomic
, flags
, &sblocked
); 
1197                 space 
= sbspace(&so
->so_snd
) - clen 
+ ((flags 
& MSG_OOB
) ? 1024 : 0); 
1203                                  * Data is prepackaged in "top". 
1206                                 if (flags 
& MSG_EOR
) 
1207                                         top
->m_flags 
|= M_EOR
; 
1212                                 bytes_to_copy 
= min(resid
, space
); 
1214                                 if (sosendminchain 
> 0) { 
1217                                         chainlength 
= sosendmaxchain
; 
1219                                 socket_unlock(so
, 0); 
1223                                         int hdrs_needed 
= (top 
== 0) ? 1 : 0; 
1226                                          * try to maintain a local cache of mbuf clusters needed to complete this write 
1227                                          * the list is further limited to the number that are currently needed to fill the socket 
1228                                          * this mechanism allows a large number of mbufs/clusters to be grabbed under a single  
1229                                          * mbuf lock... if we can't get any clusters, than fall back to trying for mbufs 
1230                                          * if we fail early (or miscalcluate the number needed) make sure to release any clusters 
1231                                          * we haven't yet consumed. 
1233                                         if (freelist 
== NULL 
&& bytes_to_copy 
> MCLBYTES
) { 
1234                                                 num_needed 
= bytes_to_copy 
/ NBPG
; 
1236                                                 if ((bytes_to_copy 
- (num_needed 
* NBPG
)) >= MINCLSIZE
) 
1239                                                 freelist 
= m_getpackets_internal(&num_needed
, hdrs_needed
, M_WAIT
, 0, NBPG
); 
1240                                                 /* Fall back to cluster size if allocation failed */ 
1243                                         if (freelist 
== NULL 
&& bytes_to_copy 
> MINCLSIZE
) { 
1244                                                 num_needed 
= bytes_to_copy 
/ MCLBYTES
; 
1246                                                 if ((bytes_to_copy 
- (num_needed 
* MCLBYTES
)) >= MINCLSIZE
) 
1249                                                 freelist 
= m_getpackets_internal(&num_needed
, hdrs_needed
, M_WAIT
, 0, MCLBYTES
); 
1250                                                 /* Fall back to a single mbuf if allocation failed */ 
1253                                         if (freelist 
== NULL
) { 
1255                                                         MGETHDR(freelist
, M_WAIT
, MT_DATA
); 
1257                                                         MGET(freelist
, M_WAIT
, MT_DATA
); 
1259                                                 if (freelist 
== NULL
) { 
1265                                                  * For datagram protocols, leave room 
1266                                                  * for protocol headers in first mbuf. 
1268                                                 if (atomic 
&& top 
== 0 && bytes_to_copy 
< MHLEN
) 
1269                                                         MH_ALIGN(freelist
, bytes_to_copy
); 
1272                                         freelist 
= m
->m_next
; 
1275                                         if ((m
->m_flags 
& M_EXT
)) 
1276                                                 mlen 
= m
->m_ext
.ext_size
; 
1277                                         else if ((m
->m_flags 
& M_PKTHDR
)) 
1278                                                 mlen 
= MHLEN 
- m_leadingspace(m
); 
1281                                         len 
= min(mlen
, bytes_to_copy
); 
1287                                         error 
= uiomove(mtod(m
, caddr_t
), (int)len
, uio
); 
1289                                         // LP64todo - fix this! 
1290                                         resid 
= uio_resid(uio
); 
1294                                         top
->m_pkthdr
.len 
+= len
; 
1299                                                 if (flags 
& MSG_EOR
) 
1300                                                         top
->m_flags 
|= M_EOR
; 
1303                                         bytes_to_copy 
= min(resid
, space
); 
1305                                 } while (space 
> 0 && (chainlength 
< sosendmaxchain 
|| atomic 
|| resid 
< MINCLSIZE
)); 
1313                     if (flags 
& (MSG_HOLD
|MSG_SEND
)) 
1315                                 /* Enqueue for later, go away if HOLD */ 
1316                                 register struct mbuf 
*mb1
; 
1317                                 if (so
->so_temp 
&& (flags 
& MSG_FLUSH
)) 
1319                                         m_freem(so
->so_temp
); 
1323                                         so
->so_tail
->m_next 
= top
; 
1330                                 if (flags 
& MSG_HOLD
) 
1338                             so
->so_options 
|= SO_DONTROUTE
; 
1339                     /* Compute flags here, for pru_send and NKEs */ 
1340                     sendflags 
= (flags 
& MSG_OOB
) ? PRUS_OOB 
: 
1342                          * If the user set MSG_EOF, the protocol 
1343                          * understands this flag and nothing left to 
1344                          * send then use PRU_SEND_EOF instead of PRU_SEND. 
1346                         ((flags 
& MSG_EOF
) && 
1347                          (so
->so_proto
->pr_flags 
& PR_IMPLOPCL
) && 
1350                         /* If there is more to send set PRUS_MORETOCOME */ 
1351                         (resid 
> 0 && space 
> 0) ? PRUS_MORETOCOME 
: 0; 
1354                          * Socket filter processing 
1357                                 struct socket_filter_entry 
*filter
; 
1362                                 for (filter 
= so
->so_filt
; filter 
&& (error 
== 0); 
1363                                          filter 
= filter
->sfe_next_onsocket
) { 
1364                                         if (filter
->sfe_filter
->sf_filter
.sf_data_out
) { 
1366                                                 if (filtered 
== 0) { 
1368                                                         so
->so_send_filt_thread 
= current_thread(); 
1370                                                         socket_unlock(so
, 0); 
1371                                                         so_flags 
= (sendflags 
& MSG_OOB
) ? sock_data_filt_flag_oob 
: 0; 
1373                                                 error 
= filter
->sfe_filter
->sf_filter
.sf_data_out( 
1374                                                                         filter
->sfe_cookie
, so
, addr
, &top
, &control
, so_flags
); 
1380                                          * At this point, we've run at least one filter. 
1381                                          * The socket is unlocked as is the socket buffer. 
1385                                         so
->so_send_filt_thread 
= 0; 
1387                                                 if (error 
== EJUSTRETURN
) { 
1399                          * End Socket filter processing 
1402                         if (error 
== EJUSTRETURN
) { 
1403                                 /* A socket filter handled this data */ 
1407                                 error 
= (*so
->so_proto
->pr_usrreqs
->pru_send
)(so
, 
1408                                                         sendflags
, top
, addr
, control
, p
); 
1411                     if (flags 
& MSG_SEND
) 
1415                             so
->so_options 
&= ~SO_DONTROUTE
; 
1422                 } while (resid 
&& space 
> 0); 
1427                 sbunlock(&so
->so_snd
, 0);       /* will unlock socket */ 
1429                 socket_unlock(so
, 1); 
1436                 m_freem_list(freelist
);      
1438         KERNEL_DEBUG(DBG_FNC_SOSEND 
| DBG_FUNC_END
, 
1449  * Implement receive operations on a socket. 
1450  * We depend on the way that records are added to the sockbuf 
1451  * by sbappend*.  In particular, each record (mbufs linked through m_next) 
1452  * must begin with an address if the protocol so specifies, 
1453  * followed by an optional mbuf or mbufs containing ancillary data, 
1454  * and then zero or more mbufs of data. 
1455  * In order to avoid blocking network interrupts for the entire time here, 
1456  * we splx() while doing the actual copy to user space. 
1457  * Although the sockbuf is locked, new data may still be appended, 
1458  * and thus we must maintain consistency of the sockbuf during that time. 
1460  * The caller may receive the data as a single mbuf chain by supplying 
1461  * an mbuf **mp0 for use in returning the chain.  The uio is then used 
1462  * only for the count in uio_resid. 
1465 soreceive(so
, psa
, uio
, mp0
, controlp
, flagsp
) 
1466         register struct socket 
*so
; 
1467         struct sockaddr 
**psa
; 
1470         struct mbuf 
**controlp
; 
1473         register struct mbuf 
*m
, **mp
, *ml 
= NULL
; 
1474         register int flags
, len
, error
, offset
; 
1475         struct protosw 
*pr 
= so
->so_proto
; 
1476         struct mbuf 
*nextrecord
; 
1478                 // LP64todo - fix this! 
1479         int orig_resid 
= uio_resid(uio
); 
1480         volatile struct mbuf 
*free_list
; 
1481         volatile int delayed_copy_len
; 
1484         struct proc 
*p 
= current_proc(); 
1487                 // LP64todo - fix this! 
1488         KERNEL_DEBUG(DBG_FNC_SORECEIVE 
| DBG_FUNC_START
, 
1492                      so
->so_rcv
.sb_lowat
, 
1493                      so
->so_rcv
.sb_hiwat
); 
1497 #ifdef MORE_LOCKING_DEBUG 
1498         if (so
->so_usecount 
== 1) 
1499                 panic("soreceive: so=%x no other reference on socket\n", so
); 
1507                 flags 
= *flagsp 
&~ MSG_EOR
; 
1511          * When SO_WANTOOBFLAG is set we try to get out-of-band data  
1512          * regardless of the flags argument. Here is the case were  
1513          * out-of-band data is not inline. 
1515         if ((flags 
& MSG_OOB
) ||  
1516             ((so
->so_options 
& SO_WANTOOBFLAG
) != 0 &&  
1517              (so
->so_options 
& SO_OOBINLINE
) == 0 && 
1518              (so
->so_oobmark 
|| (so
->so_state 
& SS_RCVATMARK
)))) { 
1519                 m 
= m_get(M_WAIT
, MT_DATA
); 
1521                         socket_unlock(so
, 1); 
1522                         KERNEL_DEBUG(DBG_FNC_SORECEIVE 
| DBG_FUNC_END
, ENOBUFS
,0,0,0,0); 
1525                 error 
= (*pr
->pr_usrreqs
->pru_rcvoob
)(so
, m
, flags 
& MSG_PEEK
); 
1528                 socket_unlock(so
, 0); 
1530                 // LP64todo - fix this! 
1531                         error 
= uiomove(mtod(m
, caddr_t
), 
1532                             (int) min(uio_resid(uio
), m
->m_len
), uio
); 
1534                 } while (uio_resid(uio
) && error 
== 0 && m
); 
1540                 if ((so
->so_options 
& SO_WANTOOBFLAG
) != 0) { 
1541                         if (error 
== EWOULDBLOCK 
|| error 
== EINVAL
) { 
1543                                  * Let's try to get normal data: 
1544                                  *  EWOULDBLOCK: out-of-band data not receive yet; 
1545                                  *  EINVAL: out-of-band data already read. 
1549                         } else if (error 
== 0 && flagsp
) 
1552                 socket_unlock(so
, 1); 
1553                 KERNEL_DEBUG(DBG_FNC_SORECEIVE 
| DBG_FUNC_END
, error
,0,0,0,0); 
1559                 *mp 
= (struct mbuf 
*)0; 
1560         if (so
->so_state 
& SS_ISCONFIRMING 
&& uio_resid(uio
)) 
1561                 (*pr
->pr_usrreqs
->pru_rcvd
)(so
, 0); 
1564         free_list 
= (struct mbuf 
*)0; 
1565         delayed_copy_len 
= 0; 
1567 #ifdef MORE_LOCKING_DEBUG 
1568         if (so
->so_usecount 
<= 1) 
1569                 printf("soreceive: sblock so=%x ref=%d on socket\n", so
, so
->so_usecount
); 
1571         error 
= sblock(&so
->so_rcv
, SBLOCKWAIT(flags
)); 
1573                 socket_unlock(so
, 1); 
1574                 KERNEL_DEBUG(DBG_FNC_SORECEIVE 
| DBG_FUNC_END
, error
,0,0,0,0); 
1578         m 
= so
->so_rcv
.sb_mb
; 
1580          * If we have less data than requested, block awaiting more 
1581          * (subject to any timeout) if: 
1582          *   1. the current count is less than the low water mark, or 
1583          *   2. MSG_WAITALL is set, and it is possible to do the entire 
1584          *      receive operation at once if we block (resid <= hiwat). 
1585          *   3. MSG_DONTWAIT is not set 
1586          * If MSG_WAITALL is set but resid is larger than the receive buffer, 
1587          * we have to do the receive in sections, and thus risk returning 
1588          * a short count if a timeout or signal occurs after we start. 
1590         if (m 
== 0 || (((flags 
& MSG_DONTWAIT
) == 0 && 
1591             so
->so_rcv
.sb_cc 
< uio_resid(uio
)) && 
1592            (so
->so_rcv
.sb_cc 
< so
->so_rcv
.sb_lowat 
|| 
1593             ((flags 
& MSG_WAITALL
) && uio_resid(uio
) <= so
->so_rcv
.sb_hiwat
)) && 
1594             m
->m_nextpkt 
== 0 && (pr
->pr_flags 
& PR_ATOMIC
) == 0)) { 
1596                 KASSERT(m 
!= 0 || !so
->so_rcv
.sb_cc
, ("receive 1")); 
1600                         error 
= so
->so_error
; 
1601                         if ((flags 
& MSG_PEEK
) == 0) 
1605                 if (so
->so_state 
& SS_CANTRCVMORE
) { 
1611                 for (; m
; m 
= m
->m_next
) 
1612                         if (m
->m_type 
== MT_OOBDATA  
|| (m
->m_flags 
& M_EOR
)) { 
1613                                 m 
= so
->so_rcv
.sb_mb
; 
1616                 if ((so
->so_state 
& (SS_ISCONNECTED
|SS_ISCONNECTING
)) == 0 && 
1617                     (so
->so_proto
->pr_flags 
& PR_CONNREQUIRED
)) { 
1621                 if (uio_resid(uio
) == 0) 
1623                 if ((so
->so_state 
& SS_NBIO
) || (flags 
& (MSG_DONTWAIT
|MSG_NBIO
))) { 
1624                         error 
= EWOULDBLOCK
; 
1627                 sbunlock(&so
->so_rcv
, 1); 
1628 #ifdef EVEN_MORE_LOCKING_DEBUG 
1630                     printf("Waiting for socket data\n"); 
1633                 error 
= sbwait(&so
->so_rcv
); 
1634 #ifdef EVEN_MORE_LOCKING_DEBUG 
1636                     printf("SORECEIVE - sbwait returned %d\n", error
); 
1638                 if (so
->so_usecount 
< 1) 
1639                         panic("soreceive: after 2nd sblock so=%x ref=%d on socket\n", so
, so
->so_usecount
); 
1641                         socket_unlock(so
, 1); 
1642                     KERNEL_DEBUG(DBG_FNC_SORECEIVE 
| DBG_FUNC_END
, error
,0,0,0,0); 
1650                 uio
->uio_procp
->p_stats
->p_ru
.ru_msgrcv
++; 
1651 #else   /* __APPLE__ */ 
1654          * This should be uio->uio-procp; however, some callers of this 
1655          * function use auto variables with stack garbage, and fail to 
1656          * fill out the uio structure properly. 
1659                 p
->p_stats
->p_ru
.ru_msgrcv
++; 
1660 #endif  /* __APPLE__ */ 
1661         nextrecord 
= m
->m_nextpkt
; 
1662         if ((pr
->pr_flags 
& PR_ADDR
) && m
->m_type 
== MT_SONAME
) { 
1663                 KASSERT(m
->m_type 
== MT_SONAME
, ("receive 1a")); 
1666                         *psa 
= dup_sockaddr(mtod(m
, struct sockaddr 
*), 
1668                         if ((*psa 
== 0) && (flags 
& MSG_NEEDSA
)) { 
1669                                 error 
= EWOULDBLOCK
; 
1673                 if (flags 
& MSG_PEEK
) { 
1676                         sbfree(&so
->so_rcv
, m
); 
1677                         if (m
->m_next 
== 0 && so
->so_rcv
.sb_cc 
!= 0) 
1678                                 panic("soreceive: about to create invalid socketbuf"); 
1679                         MFREE(m
, so
->so_rcv
.sb_mb
); 
1680                         m 
= so
->so_rcv
.sb_mb
; 
1683         while (m 
&& m
->m_type 
== MT_CONTROL 
&& error 
== 0) { 
1684                 if (flags 
& MSG_PEEK
) { 
1686                                 *controlp 
= m_copy(m
, 0, m
->m_len
); 
1689                         sbfree(&so
->so_rcv
, m
); 
1691                                 if (pr
->pr_domain
->dom_externalize 
&& 
1692                                     mtod(m
, struct cmsghdr 
*)->cmsg_type 
== 
1694                                    socket_unlock(so
, 0); /* release socket lock: see 3903171 */ 
1695                                    error 
= (*pr
->pr_domain
->dom_externalize
)(m
); 
1699                                 if (m
->m_next 
== 0 && so
->so_rcv
.sb_cc 
!= 0) 
1700                                         panic("soreceive: so->so_rcv.sb_mb->m_next == 0 && so->so_rcv.sb_cc != 0"); 
1701                                 so
->so_rcv
.sb_mb 
= m
->m_next
; 
1703                                 m 
= so
->so_rcv
.sb_mb
; 
1705                                 MFREE(m
, so
->so_rcv
.sb_mb
); 
1706                                 m 
= so
->so_rcv
.sb_mb
; 
1711                         controlp 
= &(*controlp
)->m_next
; 
1715                 if ((flags 
& MSG_PEEK
) == 0) 
1716                         m
->m_nextpkt 
= nextrecord
; 
1718                 if (type 
== MT_OOBDATA
) 
1724         if (!(flags 
& MSG_PEEK
) && uio_resid(uio
) > sorecvmincopy
) 
1731         while (m 
&& (uio_resid(uio
) - delayed_copy_len
) > 0 && error 
== 0) { 
1732                 if (m
->m_type 
== MT_OOBDATA
) { 
1733                         if (type 
!= MT_OOBDATA
) 
1735                 } else if (type 
== MT_OOBDATA
) 
1739  * This assertion needs rework.  The trouble is Appletalk is uses many 
1740  * mbuf types (NOT listed in mbuf.h!) which will trigger this panic. 
1741  * For now just remove the assertion...  CSM 9/98 
1744                     KASSERT(m
->m_type 
== MT_DATA 
|| m
->m_type 
== MT_HEADER
, 
1748                  * Make sure to allways set MSG_OOB event when getting  
1749                  * out of band data inline. 
1751                 if ((so
->so_options 
& SO_WANTOOBFLAG
) != 0 && 
1752                         (so
->so_options 
& SO_OOBINLINE
) != 0 &&  
1753                         (so
->so_state 
& SS_RCVATMARK
) != 0) { 
1757                 so
->so_state 
&= ~SS_RCVATMARK
; 
1758                 // LP64todo - fix this! 
1759                 len 
= uio_resid(uio
) - delayed_copy_len
; 
1760                 if (so
->so_oobmark 
&& len 
> so
->so_oobmark 
- offset
) 
1761                         len 
= so
->so_oobmark 
- offset
; 
1762                 if (len 
> m
->m_len 
- moff
) 
1763                         len 
= m
->m_len 
- moff
; 
1765                  * If mp is set, just pass back the mbufs. 
1766                  * Otherwise copy them out via the uio, then free. 
1767                  * Sockbuf must be consistent here (points to current mbuf, 
1768                  * it points to next record) when we drop priority; 
1769                  * we must note any additions to the sockbuf when we 
1770                  * block interrupts again. 
1773                         if (can_delay 
&& len 
== m
->m_len
) { 
1775                                  * only delay the copy if we're consuming the 
1776                                  * mbuf and we're NOT in MSG_PEEK mode 
1777                                  * and we have enough data to make it worthwile 
1778                                  * to drop and retake the funnel... can_delay 
1779                                  * reflects the state of the 2 latter constraints 
1780                                  * moff should always be zero in these cases 
1782                                 delayed_copy_len 
+= len
; 
1785                                 if (delayed_copy_len
) { 
1786                                         error 
= sodelayed_copy(so
, uio
, &free_list
, &delayed_copy_len
); 
1791                                         if (m 
!= so
->so_rcv
.sb_mb
) { 
1793                                                  * can only get here if MSG_PEEK is not set 
1794                                                  * therefore, m should point at the head of the rcv queue... 
1795                                                  * if it doesn't, it means something drastically changed 
1796                                                  * while we were out from behind the funnel in sodelayed_copy... 
1797                                                  * perhaps a RST on the stream... in any event, the stream has 
1798                                                  * been interrupted... it's probably best just to return  
1799                                                  * whatever data we've moved and let the caller sort it out... 
1804                                 socket_unlock(so
, 0); 
1805                                 error 
= uiomove(mtod(m
, caddr_t
) + moff
, (int)len
, uio
); 
1812                         uio_setresid(uio
, (uio_resid(uio
) - len
)); 
1814                 if (len 
== m
->m_len 
- moff
) { 
1815                         if (m
->m_flags 
& M_EOR
) 
1817                         if (flags 
& MSG_PEEK
) { 
1821                                 nextrecord 
= m
->m_nextpkt
; 
1822                                 sbfree(&so
->so_rcv
, m
); 
1823                                 m
->m_nextpkt 
= NULL
; 
1828                                         so
->so_rcv
.sb_mb 
= m 
= m
->m_next
; 
1829                                         *mp 
= (struct mbuf 
*)0; 
1831                                         if (free_list 
== NULL
) 
1836                                         so
->so_rcv
.sb_mb 
= m 
= m
->m_next
; 
1840                                         m
->m_nextpkt 
= nextrecord
; 
1843                         if (flags 
& MSG_PEEK
) 
1847                                         *mp 
= m_copym(m
, 0, len
, M_WAIT
); 
1850                                 so
->so_rcv
.sb_cc 
-= len
; 
1853                 if (so
->so_oobmark
) { 
1854                         if ((flags 
& MSG_PEEK
) == 0) { 
1855                                 so
->so_oobmark 
-= len
; 
1856                                 if (so
->so_oobmark 
== 0) { 
1857                                     so
->so_state 
|= SS_RCVATMARK
; 
1859                                      * delay posting the actual event until after 
1860                                      * any delayed copy processing has finished 
1867                                 if (offset 
== so
->so_oobmark
) 
1871                 if (flags 
& MSG_EOR
)  
1874                  * If the MSG_WAITALL or MSG_WAITSTREAM flag is set (for non-atomic socket), 
1875                  * we must not quit until "uio->uio_resid == 0" or an error 
1876                  * termination.  If a signal/timeout occurs, return 
1877                  * with a short count but without error. 
1878                  * Keep sockbuf locked against other readers. 
1880                 while (flags 
& (MSG_WAITALL
|MSG_WAITSTREAM
) && m 
== 0 && (uio_resid(uio
) - delayed_copy_len
) > 0 && 
1881                     !sosendallatonce(so
) && !nextrecord
) { 
1882                         if (so
->so_error 
|| so
->so_state 
& SS_CANTRCVMORE
) 
1885                         if (pr
->pr_flags 
& PR_WANTRCVD 
&& so
->so_pcb 
&& (((struct inpcb 
*)so
->so_pcb
)->inp_state 
!= INPCB_STATE_DEAD
)) 
1886                                 (*pr
->pr_usrreqs
->pru_rcvd
)(so
, flags
); 
1887                         if (sbwait(&so
->so_rcv
)) { 
1892                          * have to wait until after we get back from the sbwait to do the copy because 
1893                          * we will drop the funnel if we have enough data that has been delayed... by dropping 
1894                          * the funnel we open up a window allowing the netisr thread to process the incoming packets 
1895                          * and to change the state of this socket... we're issuing the sbwait because 
1896                          * the socket is empty and we're expecting the netisr thread to wake us up when more 
1897                          * packets arrive... if we allow that processing to happen and then sbwait, we 
1898                          * could stall forever with packets sitting in the socket if no further packets 
1899                          * arrive from the remote side. 
1901                          * we want to copy before we've collected all the data to satisfy this request to  
1902                          * allow the copy to overlap the incoming packet processing on an MP system 
1904                         if (delayed_copy_len 
> sorecvmincopy 
&& (delayed_copy_len 
> (so
->so_rcv
.sb_hiwat 
/ 2))) { 
1906                                 error 
= sodelayed_copy(so
, uio
, &free_list
, &delayed_copy_len
); 
1911                         m 
= so
->so_rcv
.sb_mb
; 
1913                                 nextrecord 
= m
->m_nextpkt
; 
1917 #ifdef MORE_LOCKING_DEBUG 
1918         if (so
->so_usecount 
<= 1) 
1919                 panic("soreceive: after big while so=%x ref=%d on socket\n", so
, so
->so_usecount
); 
1922         if (m 
&& pr
->pr_flags 
& PR_ATOMIC
) { 
1924                 if (so
->so_options 
& SO_DONTTRUNC
) 
1925                         flags 
|= MSG_RCVMORE
; 
1929                         if ((flags 
& MSG_PEEK
) == 0) 
1930                                 (void) sbdroprecord(&so
->so_rcv
); 
1935         if ((flags 
& MSG_PEEK
) == 0) { 
1937                         so
->so_rcv
.sb_mb 
= nextrecord
; 
1938                 if (pr
->pr_flags 
& PR_WANTRCVD 
&& so
->so_pcb
) 
1939                         (*pr
->pr_usrreqs
->pru_rcvd
)(so
, flags
); 
1942         if ((so
->so_options 
& SO_WANTMORE
) && so
->so_rcv
.sb_cc 
> 0) 
1943                 flags 
|= MSG_HAVEMORE
; 
1945         if (delayed_copy_len
) { 
1946                 error 
= sodelayed_copy(so
, uio
, &free_list
, &delayed_copy_len
); 
1952                 m_freem_list((struct mbuf 
*)free_list
); 
1953                 free_list 
= (struct mbuf 
*)0; 
1956                 postevent(so
, 0, EV_OOB
); 
1958         if (orig_resid 
== uio_resid(uio
) && orig_resid 
&& 
1959             (flags 
& MSG_EOR
) == 0 && (so
->so_state 
& SS_CANTRCVMORE
) == 0) { 
1960                 sbunlock(&so
->so_rcv
, 1); 
1967 #ifdef MORE_LOCKING_DEBUG 
1968         if (so
->so_usecount 
<= 1) 
1969                 panic("soreceive: release so=%x ref=%d on socket\n", so
, so
->so_usecount
); 
1971         if (delayed_copy_len
) { 
1972                 error 
= sodelayed_copy(so
, uio
, &free_list
, &delayed_copy_len
); 
1975                 m_freem_list((struct mbuf 
*)free_list
); 
1977         sbunlock(&so
->so_rcv
, 0);       /* will unlock socket */ 
1979                 // LP64todo - fix this! 
1980         KERNEL_DEBUG(DBG_FNC_SORECEIVE 
| DBG_FUNC_END
, 
1991 static int sodelayed_copy(struct socket 
*so
, struct uio 
*uio
, struct mbuf 
**free_list
, int *resid
) 
1998         socket_unlock(so
, 0); 
2000         while (m 
&& error 
== 0) { 
2002                 error 
= uiomove(mtod(m
, caddr_t
), (int)m
->m_len
, uio
); 
2006         m_freem_list(*free_list
); 
2008         *free_list 
= (struct mbuf 
*)NULL
; 
2019         register struct socket 
*so
; 
2022         register struct protosw 
*pr 
= so
->so_proto
; 
2027         sflt_notify(so
, sock_evt_shutdown
, &how
); 
2029         if (how 
!= SHUT_WR
) { 
2031                 postevent(so
, 0, EV_RCLOSED
); 
2033         if (how 
!= SHUT_RD
) { 
2034             ret 
= ((*pr
->pr_usrreqs
->pru_shutdown
)(so
)); 
2035             postevent(so
, 0, EV_WCLOSED
); 
2036             KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN 
| DBG_FUNC_END
, 0,0,0,0,0); 
2037                 socket_unlock(so
, 1); 
2041         KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN 
| DBG_FUNC_END
, 0,0,0,0,0); 
2042         socket_unlock(so
, 1); 
2048         register struct socket 
*so
; 
2050         register struct sockbuf 
*sb 
= &so
->so_rcv
; 
2051         register struct protosw 
*pr 
= so
->so_proto
; 
2054 #ifdef MORE_LOCKING_DEBUG 
2055         lck_mtx_t 
* mutex_held
; 
2057         if (so
->so_proto
->pr_getlock 
!= NULL
)  
2058                 mutex_held 
= (*so
->so_proto
->pr_getlock
)(so
, 0); 
2060                 mutex_held 
= so
->so_proto
->pr_domain
->dom_mtx
; 
2061         lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
); 
2064         sflt_notify(so
, sock_evt_flush_read
, NULL
); 
2066         sb
->sb_flags 
|= SB_NOINTR
; 
2067         (void) sblock(sb
, M_WAIT
); 
2071         selthreadclear(&sb
->sb_sel
); 
2074         bzero((caddr_t
)sb
, sizeof (*sb
)); 
2075         sb
->sb_so 
= so
; /* reestablish link to socket */ 
2076         if (asb
.sb_flags 
& SB_KNOTE
) { 
2077                 sb
->sb_sel
.si_note 
= asb
.sb_sel
.si_note
; 
2078                 sb
->sb_flags 
= SB_KNOTE
; 
2080         if (pr
->pr_flags 
& PR_RIGHTS 
&& pr
->pr_domain
->dom_dispose
) 
2081                 (*pr
->pr_domain
->dom_dispose
)(asb
.sb_mb
); 
2086  * Perhaps this routine, and sooptcopyout(), below, ought to come in 
2087  * an additional variant to handle the case where the option value needs 
2088  * to be some kind of integer, but not a specific size. 
2089  * In addition to their use here, these functions are also called by the 
2090  * protocol-level pr_ctloutput() routines. 
2093 sooptcopyin(sopt
, buf
, len
, minlen
) 
2094         struct  sockopt 
*sopt
; 
2102          * If the user gives us more than we wanted, we ignore it, 
2103          * but if we don't get the minimum length the caller 
2104          * wants, we return EINVAL.  On success, sopt->sopt_valsize 
2105          * is set to however much we actually retrieved. 
2107         if ((valsize 
= sopt
->sopt_valsize
) < minlen
) 
2110                 sopt
->sopt_valsize 
= valsize 
= len
; 
2112         if (sopt
->sopt_p 
!= 0) 
2113                 return (copyin(sopt
->sopt_val
, buf
, valsize
)); 
2115         bcopy(CAST_DOWN(caddr_t
, sopt
->sopt_val
), buf
, valsize
); 
2122         struct sockopt 
*sopt
; 
2131         if (sopt
->sopt_dir 
!= SOPT_SET
) { 
2132                 sopt
->sopt_dir 
= SOPT_SET
; 
2136                 struct socket_filter_entry      
*filter
; 
2139                 for (filter 
= so
->so_filt
; filter 
&& (error 
== 0); 
2140                          filter 
= filter
->sfe_next_onsocket
) { 
2141                         if (filter
->sfe_filter
->sf_filter
.sf_setoption
) { 
2142                                 if (filtered 
== 0) { 
2145                                         socket_unlock(so
, 0); 
2147                                 error 
= filter
->sfe_filter
->sf_filter
.sf_setoption( 
2148                                                         filter
->sfe_cookie
, so
, sopt
); 
2152                 if (filtered 
!= 0) { 
2157                                 if (error 
== EJUSTRETURN
) 
2165         if (sopt
->sopt_level 
!= SOL_SOCKET
) { 
2166                 if (so
->so_proto 
&& so
->so_proto
->pr_ctloutput
) { 
2167                         error 
= (*so
->so_proto
->pr_ctloutput
) 
2169                         socket_unlock(so
, 1); 
2172                 error 
= ENOPROTOOPT
; 
2174                 switch (sopt
->sopt_name
) { 
2177                         error 
= sooptcopyin(sopt
, &l
, sizeof l
, sizeof l
); 
2181                         so
->so_linger 
= (sopt
->sopt_name 
== SO_LINGER
) ? l
.l_linger 
: l
.l_linger 
* hz
; 
2183                                 so
->so_options 
|= SO_LINGER
; 
2185                                 so
->so_options 
&= ~SO_LINGER
; 
2191                 case SO_USELOOPBACK
: 
2200                 case SO_WANTOOBFLAG
: 
2202                         error 
= sooptcopyin(sopt
, &optval
, sizeof optval
, 
2207                                 so
->so_options 
|= sopt
->sopt_name
; 
2209                                 so
->so_options 
&= ~sopt
->sopt_name
; 
2216                         error 
= sooptcopyin(sopt
, &optval
, sizeof optval
, 
2222                          * Values < 1 make no sense for any of these 
2223                          * options, so disallow them. 
2230                         switch (sopt
->sopt_name
) { 
2233                                 if (sbreserve(sopt
->sopt_name 
== SO_SNDBUF 
? 
2234                                               &so
->so_snd 
: &so
->so_rcv
, 
2235                                               (u_long
) optval
) == 0) { 
2242                          * Make sure the low-water is never greater than 
2246                                 so
->so_snd
.sb_lowat 
= 
2247                                     (optval 
> so
->so_snd
.sb_hiwat
) ? 
2248                                     so
->so_snd
.sb_hiwat 
: optval
; 
2251                                 so
->so_rcv
.sb_lowat 
= 
2252                                     (optval 
> so
->so_rcv
.sb_hiwat
) ? 
2253                                     so
->so_rcv
.sb_hiwat 
: optval
; 
2260                         error 
= sooptcopyin(sopt
, &tv
, sizeof tv
, 
2265                         if (tv
.tv_sec 
< 0 || tv
.tv_sec 
> LONG_MAX 
|| 
2266                             tv
.tv_usec 
< 0 || tv
.tv_usec 
>= 1000000) { 
2271                         switch (sopt
->sopt_name
) { 
2273                                 so
->so_snd
.sb_timeo 
= tv
; 
2276                                 so
->so_rcv
.sb_timeo 
= tv
; 
2285                         error 
= sooptcopyin(sopt
, &nke
, 
2286                                                                 sizeof nke
, sizeof nke
); 
2290                         error 
= sflt_attach_private(so
, NULL
, nke
.nke_handle
, 1); 
2295                         error 
= sooptcopyin(sopt
, &optval
, sizeof optval
, 
2300                                 so
->so_flags 
|= SOF_NOSIGPIPE
; 
2302                                 so
->so_flags 
&= ~SOF_NOSIGPIPE
; 
2307                         error 
= sooptcopyin(sopt
, &optval
, sizeof optval
, 
2312                                 so
->so_flags 
|= SOF_NOADDRAVAIL
; 
2314                                 so
->so_flags 
&= ~SOF_NOADDRAVAIL
; 
2319                         error 
= ENOPROTOOPT
; 
2322                 if (error 
== 0 && so
->so_proto 
&& so
->so_proto
->pr_ctloutput
) { 
2323                         (void) ((*so
->so_proto
->pr_ctloutput
) 
2328         socket_unlock(so
, 1); 
2332 /* Helper routine for getsockopt */ 
2334 sooptcopyout(sopt
, buf
, len
) 
2335         struct  sockopt 
*sopt
; 
2345          * Documented get behavior is that we always return a value, 
2346          * possibly truncated to fit in the user's buffer. 
2347          * Traditional behavior is that we always tell the user 
2348          * precisely how much we copied, rather than something useful 
2349          * like the total amount we had available for her. 
2350          * Note that this interface is not idempotent; the entire answer must 
2351          * generated ahead of time. 
2353         valsize 
= min(len
, sopt
->sopt_valsize
); 
2354         sopt
->sopt_valsize 
= valsize
; 
2355         if (sopt
->sopt_val 
!= USER_ADDR_NULL
) { 
2356                 if (sopt
->sopt_p 
!= 0) 
2357                         error 
= copyout(buf
, sopt
->sopt_val
, valsize
); 
2359                         bcopy(buf
, CAST_DOWN(caddr_t
, sopt
->sopt_val
), valsize
); 
2367         struct sockopt 
*sopt
; 
2373         if (sopt
->sopt_dir 
!= SOPT_GET
) { 
2374                 sopt
->sopt_dir 
= SOPT_GET
; 
2380                 struct socket_filter_entry      
*filter
; 
2383                 for (filter 
= so
->so_filt
; filter 
&& (error 
== 0); 
2384                          filter 
= filter
->sfe_next_onsocket
) { 
2385                         if (filter
->sfe_filter
->sf_filter
.sf_getoption
) { 
2386                                 if (filtered 
== 0) { 
2389                                         socket_unlock(so
, 0); 
2391                                 error 
= filter
->sfe_filter
->sf_filter
.sf_getoption( 
2392                                                         filter
->sfe_cookie
, so
, sopt
); 
2395                 if (filtered 
!= 0) { 
2400                                 if (error 
== EJUSTRETURN
) 
2402                                 socket_unlock(so
, 1); 
2409         if (sopt
->sopt_level 
!= SOL_SOCKET
) { 
2410                 if (so
->so_proto 
&& so
->so_proto
->pr_ctloutput
) { 
2411                         error 
= (*so
->so_proto
->pr_ctloutput
) 
2413                         socket_unlock(so
, 1); 
2416                         socket_unlock(so
, 1); 
2417                         return (ENOPROTOOPT
); 
2420                 switch (sopt
->sopt_name
) { 
2423                         l
.l_onoff 
= so
->so_options 
& SO_LINGER
; 
2424                         l
.l_linger 
= (sopt
->sopt_name 
== SO_LINGER
) ? so
->so_linger 
:  
2426                         error 
= sooptcopyout(sopt
, &l
, sizeof l
); 
2429                 case SO_USELOOPBACK
: 
2441                 case SO_WANTOOBFLAG
: 
2443                         optval 
= so
->so_options 
& sopt
->sopt_name
; 
2445                         error 
= sooptcopyout(sopt
, &optval
, sizeof optval
); 
2449                         optval 
= so
->so_type
; 
2459                         m1 
= so
->so_rcv
.sb_mb
; 
2460                         if (so
->so_proto
->pr_flags 
& PR_ATOMIC
) 
2463                                         if (m1
->m_type 
== MT_DATA
) 
2464                                                 pkt_total 
+= m1
->m_len
; 
2469                                 optval 
= so
->so_rcv
.sb_cc
; 
2473                         optval 
= so
->so_snd
.sb_cc
; 
2477                         optval 
= so
->so_error
; 
2482                         optval 
= so
->so_snd
.sb_hiwat
; 
2486                         optval 
= so
->so_rcv
.sb_hiwat
; 
2490                         optval 
= so
->so_snd
.sb_lowat
; 
2494                         optval 
= so
->so_rcv
.sb_lowat
; 
2499                         tv 
= (sopt
->sopt_name 
== SO_SNDTIMEO 
? 
2500                                   so
->so_snd
.sb_timeo 
: so
->so_rcv
.sb_timeo
); 
2502                         error 
= sooptcopyout(sopt
, &tv
, sizeof tv
); 
2506                         optval 
= (so
->so_flags 
& SOF_NOSIGPIPE
); 
2510                         optval 
= (so
->so_flags 
& SOF_NOADDRAVAIL
); 
2514                         error 
= ENOPROTOOPT
; 
2517                 socket_unlock(so
, 1); 
2522 /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */ 
2524 soopt_getm(struct sockopt 
*sopt
, struct mbuf 
**mp
) 
2526         struct mbuf 
*m
, *m_prev
; 
2527         int sopt_size 
= sopt
->sopt_valsize
; 
2529         if (sopt_size 
> MAX_SOOPTGETM_SIZE
) 
2532         MGET(m
, sopt
->sopt_p 
? M_WAIT 
: M_DONTWAIT
, MT_DATA
); 
2535         if (sopt_size 
> MLEN
) { 
2536                 MCLGET(m
, sopt
->sopt_p 
? M_WAIT 
: M_DONTWAIT
); 
2537                 if ((m
->m_flags 
& M_EXT
) == 0) { 
2541                 m
->m_len 
= min(MCLBYTES
, sopt_size
); 
2543                 m
->m_len 
= min(MLEN
, sopt_size
); 
2545         sopt_size 
-= m
->m_len
; 
2550                 MGET(m
, sopt
->sopt_p 
? M_WAIT 
: M_DONTWAIT
, MT_DATA
); 
2555                 if (sopt_size 
> MLEN
) { 
2556                         MCLGET(m
, sopt
->sopt_p 
? M_WAIT 
: M_DONTWAIT
); 
2557                         if ((m
->m_flags 
& M_EXT
) == 0) { 
2561                         m
->m_len 
= min(MCLBYTES
, sopt_size
); 
2563                         m
->m_len 
= min(MLEN
, sopt_size
); 
2565                 sopt_size 
-= m
->m_len
; 
2572 /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */ 
2574 soopt_mcopyin(struct sockopt 
*sopt
, struct mbuf 
*m
) 
2576         struct mbuf 
*m0 
= m
; 
2578         if (sopt
->sopt_val 
== USER_ADDR_NULL
) 
2580         while (m 
!= NULL 
&& sopt
->sopt_valsize 
>= m
->m_len
) { 
2581                 if (sopt
->sopt_p 
!= NULL
) { 
2584                         error 
= copyin(sopt
->sopt_val
, mtod(m
, char *), m
->m_len
); 
2590                         bcopy(CAST_DOWN(caddr_t
, sopt
->sopt_val
), mtod(m
, char *), m
->m_len
); 
2591                 sopt
->sopt_valsize 
-= m
->m_len
; 
2592                 sopt
->sopt_val 
+= m
->m_len
;  
2595         if (m 
!= NULL
) /* should be allocated enoughly at ip6_sooptmcopyin() */ 
2596                 panic("soopt_mcopyin"); 
2600 /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */ 
2602 soopt_mcopyout(struct sockopt 
*sopt
, struct mbuf 
*m
) 
2604         struct mbuf 
*m0 
= m
; 
2607         if (sopt
->sopt_val 
== USER_ADDR_NULL
) 
2609         while (m 
!= NULL 
&& sopt
->sopt_valsize 
>= m
->m_len
) { 
2610                 if (sopt
->sopt_p 
!= NULL
) { 
2613                         error 
= copyout(mtod(m
, char *), sopt
->sopt_val
, m
->m_len
); 
2619                         bcopy(mtod(m
, char *), CAST_DOWN(caddr_t
, sopt
->sopt_val
), m
->m_len
); 
2620                sopt
->sopt_valsize 
-= m
->m_len
; 
2621                sopt
->sopt_val 
+= m
->m_len
; 
2622                valsize 
+= m
->m_len
; 
2626                 /* enough soopt buffer should be given from user-land */ 
2630         sopt
->sopt_valsize 
= valsize
; 
2636         register struct socket 
*so
; 
2640         if (so
->so_pgid 
< 0) 
2641                 gsignal(-so
->so_pgid
, SIGURG
); 
2642         else if (so
->so_pgid 
> 0 && (p 
= pfind(so
->so_pgid
)) != 0) 
2644         selwakeup(&so
->so_rcv
.sb_sel
); 
2648 sopoll(struct socket 
*so
, int events
, __unused kauth_cred_t cred
, void * wql
) 
2650         struct proc 
*p 
= current_proc(); 
2655         if (events 
& (POLLIN 
| POLLRDNORM
)) 
2657                         revents 
|= events 
& (POLLIN 
| POLLRDNORM
); 
2659         if (events 
& (POLLOUT 
| POLLWRNORM
)) 
2660                 if (sowriteable(so
)) 
2661                         revents 
|= events 
& (POLLOUT 
| POLLWRNORM
); 
2663         if (events 
& (POLLPRI 
| POLLRDBAND
)) 
2664                 if (so
->so_oobmark 
|| (so
->so_state 
& SS_RCVATMARK
)) 
2665                         revents 
|= events 
& (POLLPRI 
| POLLRDBAND
); 
2668                 if (events 
& (POLLIN 
| POLLPRI 
| POLLRDNORM 
| POLLRDBAND
)) { 
2669                         /* Darwin sets the flag first, BSD calls selrecord first */ 
2670                         so
->so_rcv
.sb_flags 
|= SB_SEL
; 
2671                         selrecord(p
, &so
->so_rcv
.sb_sel
, wql
); 
2674                 if (events 
& (POLLOUT 
| POLLWRNORM
)) { 
2675                         /* Darwin sets the flag first, BSD calls selrecord first */ 
2676                         so
->so_snd
.sb_flags 
|= SB_SEL
; 
2677                         selrecord(p
, &so
->so_snd
.sb_sel
, wql
); 
2681         socket_unlock(so
, 1); 
2685 int     soo_kqfilter(struct fileproc 
*fp
, struct knote 
*kn
, struct proc 
*p
); 
2688 soo_kqfilter(__unused 
struct fileproc 
*fp
, struct knote 
*kn
, __unused 
struct proc 
*p
) 
2690         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->f_fglob
->fg_data
; 
2694         switch (kn
->kn_filter
) { 
2696                 if (so
->so_options 
& SO_ACCEPTCONN
) 
2697                         kn
->kn_fop 
= &solisten_filtops
; 
2699                         kn
->kn_fop 
= &soread_filtops
; 
2703                 kn
->kn_fop 
= &sowrite_filtops
; 
2707                 socket_unlock(so
, 1); 
2711         if (KNOTE_ATTACH(&sb
->sb_sel
.si_note
, kn
)) 
2712                 sb
->sb_flags 
|= SB_KNOTE
; 
2713         socket_unlock(so
, 1); 
2718 filt_sordetach(struct knote 
*kn
) 
2720         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->f_fglob
->fg_data
; 
2723         if (so
->so_rcv
.sb_flags 
& SB_KNOTE
) 
2724                 if (KNOTE_DETACH(&so
->so_rcv
.sb_sel
.si_note
, kn
)) 
2725                         so
->so_rcv
.sb_flags 
&= ~SB_KNOTE
; 
2726         socket_unlock(so
, 1); 
2731 filt_soread(struct knote 
*kn
, long hint
) 
2733         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->f_fglob
->fg_data
; 
2735         if ((hint 
& SO_FILT_HINT_LOCKED
) == 0) 
2738         if (so
->so_oobmark
) { 
2739                 if (kn
->kn_flags 
& EV_OOBAND
) { 
2740                         kn
->kn_data 
= so
->so_rcv
.sb_cc 
- so
->so_oobmark
; 
2741                         if ((hint 
& SO_FILT_HINT_LOCKED
) == 0) 
2742                                 socket_unlock(so
, 1); 
2745                 kn
->kn_data 
= so
->so_oobmark
; 
2746                 kn
->kn_flags 
|= EV_OOBAND
; 
2748                 kn
->kn_data 
= so
->so_rcv
.sb_cc
; 
2749                 if (so
->so_state 
& SS_CANTRCVMORE
) { 
2750                         kn
->kn_flags 
|= EV_EOF
; 
2751                         kn
->kn_fflags 
= so
->so_error
; 
2752                         if ((hint 
& SO_FILT_HINT_LOCKED
) == 0) 
2753                                 socket_unlock(so
, 1); 
2758         if (so
->so_state 
& SS_RCVATMARK
) { 
2759                 if (kn
->kn_flags 
& EV_OOBAND
) { 
2760                         if ((hint 
& SO_FILT_HINT_LOCKED
) == 0) 
2761                                 socket_unlock(so
, 1); 
2764                 kn
->kn_flags 
|= EV_OOBAND
; 
2765         } else if (kn
->kn_flags 
& EV_OOBAND
) { 
2767                 if ((hint 
& SO_FILT_HINT_LOCKED
) == 0) 
2768                         socket_unlock(so
, 1); 
2772         if (so
->so_error
) {     /* temporary udp error */ 
2773                 if ((hint 
& SO_FILT_HINT_LOCKED
) == 0) 
2774                         socket_unlock(so
, 1); 
2778         if ((hint 
& SO_FILT_HINT_LOCKED
) == 0) 
2779                 socket_unlock(so
, 1); 
2781         return( kn
->kn_flags 
& EV_OOBAND 
|| 
2782                 kn
->kn_data 
>= ((kn
->kn_sfflags 
& NOTE_LOWAT
) ?  
2783                                 kn
->kn_sdata 
: so
->so_rcv
.sb_lowat
)); 
2787 filt_sowdetach(struct knote 
*kn
) 
2789         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->f_fglob
->fg_data
; 
2792         if(so
->so_snd
.sb_flags 
& SB_KNOTE
) 
2793                 if (KNOTE_DETACH(&so
->so_snd
.sb_sel
.si_note
, kn
)) 
2794                         so
->so_snd
.sb_flags 
&= ~SB_KNOTE
; 
2795         socket_unlock(so
, 1); 
2800 filt_sowrite(struct knote 
*kn
, long hint
) 
2802         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->f_fglob
->fg_data
; 
2804         if ((hint 
& SO_FILT_HINT_LOCKED
) == 0) 
2807         kn
->kn_data 
= sbspace(&so
->so_snd
); 
2808         if (so
->so_state 
& SS_CANTSENDMORE
) { 
2809                 kn
->kn_flags 
|= EV_EOF
;  
2810                 kn
->kn_fflags 
= so
->so_error
; 
2811                 if ((hint 
& SO_FILT_HINT_LOCKED
) == 0) 
2812                         socket_unlock(so
, 1); 
2815         if (so
->so_error
) {     /* temporary udp error */ 
2816                 if ((hint 
& SO_FILT_HINT_LOCKED
) == 0) 
2817                         socket_unlock(so
, 1); 
2820         if (((so
->so_state 
& SS_ISCONNECTED
) == 0) && 
2821             (so
->so_proto
->pr_flags 
& PR_CONNREQUIRED
)) { 
2822                 if ((hint 
& SO_FILT_HINT_LOCKED
) == 0) 
2823                         socket_unlock(so
, 1); 
2826         if ((hint 
& SO_FILT_HINT_LOCKED
) == 0) 
2827                 socket_unlock(so
, 1); 
2828         if (kn
->kn_sfflags 
& NOTE_LOWAT
) 
2829                 return (kn
->kn_data 
>= kn
->kn_sdata
); 
2830         return (kn
->kn_data 
>= so
->so_snd
.sb_lowat
); 
2835 filt_solisten(struct knote 
*kn
, long hint
) 
2837         struct socket 
*so 
= (struct socket 
*)kn
->kn_fp
->f_fglob
->fg_data
; 
2840         if ((hint 
& SO_FILT_HINT_LOCKED
) == 0) 
2842         kn
->kn_data 
= so
->so_qlen
; 
2843         isempty 
= ! TAILQ_EMPTY(&so
->so_comp
); 
2844         if ((hint 
& SO_FILT_HINT_LOCKED
) == 0) 
2845                 socket_unlock(so
, 1); 
2851 socket_lock(so
, refcount
) 
2855         int error 
= 0, lr
, lr_saved
; 
2857         __asm__ 
volatile("mflr %0" : "=r" (lr
)); 
2861         if (so
->so_proto
->pr_lock
) { 
2862                 error 
= (*so
->so_proto
->pr_lock
)(so
, refcount
, lr_saved
); 
2865 #ifdef MORE_LOCKING_DEBUG 
2866                 lck_mtx_assert(so
->so_proto
->pr_domain
->dom_mtx
, LCK_MTX_ASSERT_NOTOWNED
); 
2868                 lck_mtx_lock(so
->so_proto
->pr_domain
->dom_mtx
); 
2871                 so
->reserved3 
= (void*)lr_saved
; /* save caller for refcount going to zero */ 
2879 socket_unlock(so
, refcount
) 
2883         int error 
= 0, lr
, lr_saved
; 
2884         lck_mtx_t 
* mutex_held
; 
2887 __asm__ 
volatile("mflr %0" : "=r" (lr
)); 
2893         if (so
->so_proto 
== NULL
) 
2894                 panic("socket_unlock null so_proto so=%x\n", so
); 
2896         if (so 
&& so
->so_proto
->pr_unlock
)  
2897                 error 
= (*so
->so_proto
->pr_unlock
)(so
, refcount
, lr_saved
); 
2899                 mutex_held 
= so
->so_proto
->pr_domain
->dom_mtx
; 
2900 #ifdef MORE_LOCKING_DEBUG 
2901                 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
); 
2904                         if (so
->so_usecount 
<= 0) 
2905                                 panic("socket_unlock: bad refcount so=%x value=%d\n", so
, so
->so_usecount
); 
2907                         if (so
->so_usecount 
== 0) { 
2908                                 sofreelastref(so
, 1); 
2911                                 so
->reserved4 
= (void*)lr_saved
; /* save caller */ 
2913                 lck_mtx_unlock(mutex_held
); 
2918 //### Called with socket locked, will unlock socket 
2925         lck_mtx_t 
* mutex_held
; 
2927         __asm__ 
volatile("mflr %0" : "=r" (lr
)); 
2930         if (so
->so_proto
->pr_getlock 
!= NULL
)   
2931                 mutex_held 
= (*so
->so_proto
->pr_getlock
)(so
, 0); 
2933                 mutex_held 
= so
->so_proto
->pr_domain
->dom_mtx
; 
2934         lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
); 
2936         sofreelastref(so
, 0); 
2943         socket_lock(so
, 1);     /* locks & take one reference on socket */ 
2944         socket_unlock(so
, 0);   /* unlock only */ 
2952         socket_unlock(so
, 1);