2  * Copyright (c) 1998-2007 Apple Inc. All rights reserved. 
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 
   6  * This file contains Original Code and/or Modifications of Original Code 
   7  * as defined in and that are subject to the Apple Public Source License 
   8  * Version 2.0 (the 'License'). You may not use this file except in 
   9  * compliance with the License. The rights granted to you under the License 
  10  * may not be used to create, or enable the creation or redistribution of, 
  11  * unlawful or unlicensed copies of an Apple operating system, or to 
  12  * circumvent, violate, or enable the circumvention or violation of, any 
  13  * terms of an Apple operating system software license agreement. 
  15  * Please obtain a copy of the License at 
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file. 
  18  * The Original Code and all software distributed under the License are 
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
  23  * Please see the License for the specific language governing rights and 
  24  * limitations under the License. 
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 
  28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ 
  30  * Copyright (c) 1982, 1986, 1988, 1990, 1993 
  31  *      The Regents of the University of California.  All rights reserved. 
  33  * Redistribution and use in source and binary forms, with or without 
  34  * modification, are permitted provided that the following conditions 
  36  * 1. Redistributions of source code must retain the above copyright 
  37  *    notice, this list of conditions and the following disclaimer. 
  38  * 2. Redistributions in binary form must reproduce the above copyright 
  39  *    notice, this list of conditions and the following disclaimer in the 
  40  *    documentation and/or other materials provided with the distribution. 
  41  * 3. All advertising materials mentioning features or use of this software 
  42  *    must display the following acknowledgement: 
  43  *      This product includes software developed by the University of 
  44  *      California, Berkeley and its contributors. 
  45  * 4. Neither the name of the University nor the names of its contributors 
  46  *    may be used to endorse or promote products derived from this software 
  47  *    without specific prior written permission. 
  49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 
  50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 
  53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
  54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 
  55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
  56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
  57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
  58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
  61  *      @(#)uipc_socket2.c      8.1 (Berkeley) 6/10/93 
  62  * $FreeBSD: src/sys/kern/uipc_socket2.c,v 1.55.2.9 2001/07/26 18:53:02 peter Exp $ 
  65  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce 
  66  * support for mandatory and extensible security protections.  This notice 
  67  * is included in support of clause 2.2 (b) of the Apple Public License, 
  71 #include <sys/param.h> 
  72 #include <sys/systm.h> 
  73 #include <sys/domain.h> 
  74 #include <sys/kernel.h> 
  75 #include <sys/proc_internal.h> 
  76 #include <sys/kauth.h> 
  77 #include <sys/malloc.h> 
  79 #include <sys/protosw.h> 
  81 #include <sys/socket.h> 
  82 #include <sys/socketvar.h> 
  83 #include <sys/signalvar.h> 
  84 #include <sys/sysctl.h> 
  86 #include <kern/locks.h> 
  87 #include <net/route.h> 
  88 #include <netinet/in.h> 
  89 #include <netinet/in_pcb.h> 
  90 #include <sys/kdebug.h> 
  91 #include <libkern/OSAtomic.h> 
  94 #include <security/mac_framework.h> 
  97 /* TODO: this should be in a header file somewhere */ 
  98 extern void postevent(struct socket 
*, struct sockbuf 
*, int); 
 100 #define DBG_FNC_SBDROP  NETDBG_CODE(DBG_NETSOCK, 4) 
 101 #define DBG_FNC_SBAPPEND        NETDBG_CODE(DBG_NETSOCK, 5) 
 103 static inline void sbcompress(struct sockbuf 
*, struct mbuf 
*, struct mbuf 
*); 
 104 static struct socket 
*sonewconn_internal(struct socket 
*, int); 
 105 static int sbappendaddr_internal(struct sockbuf 
*, struct sockaddr 
*, 
 106     struct mbuf 
*, struct mbuf 
*); 
 107 static int sbappendcontrol_internal(struct sockbuf 
*, struct mbuf 
*, 
 111  * Primitive routines for operating on sockets and socket buffers 
 113 static int soqlimitcompat 
= 1; 
 114 static int soqlencomp 
= 0; 
 116 u_long  sb_max 
= SB_MAX
;                /* XXX should be static */ 
 118 static  u_long sb_efficiency 
= 8;       /* parameter for sbreserve() */ 
 119 __private_extern__ 
unsigned int total_mb_cnt 
= 0; 
 120 __private_extern__ 
unsigned int total_cl_cnt 
= 0; 
 121 __private_extern__ 
int sbspace_factor 
= 8; 
 124  * Procedures to manipulate state flags of socket 
 125  * and do appropriate wakeups.  Normal sequence from the 
 126  * active (originating) side is that soisconnecting() is 
 127  * called during processing of connect() call, 
 128  * resulting in an eventual call to soisconnected() if/when the 
 129  * connection is established.  When the connection is torn down 
 130  * soisdisconnecting() is called during processing of disconnect() call, 
 131  * and soisdisconnected() is called when the connection to the peer 
 132  * is totally severed.  The semantics of these routines are such that 
 133  * connectionless protocols can call soisconnected() and soisdisconnected() 
 134  * only, bypassing the in-progress calls when setting up a ``connection'' 
 137  * From the passive side, a socket is created with 
 138  * two queues of sockets: so_incomp for connections in progress 
 139  * and so_comp for connections already made and awaiting user acceptance. 
 140  * As a protocol is preparing incoming connections, it creates a socket 
 141  * structure queued on so_incomp by calling sonewconn().  When the connection 
 142  * is established, soisconnected() is called, and transfers the 
 143  * socket structure to so_comp, making it available to accept(). 
 145  * If a socket is closed with sockets on either 
 146  * so_incomp or so_comp, these sockets are dropped. 
 148  * If higher level protocols are implemented in 
 149  * the kernel, the wakeups done here will sometimes 
 150  * cause software-interrupt process scheduling. 
 153 soisconnecting(struct socket 
*so
) 
 156         so
->so_state 
&= ~(SS_ISCONNECTED
|SS_ISDISCONNECTING
); 
 157         so
->so_state 
|= SS_ISCONNECTING
; 
 159         sflt_notify(so
, sock_evt_connecting
, NULL
); 
 163 soisconnected(struct socket 
*so
) 
 165         struct socket 
*head 
= so
->so_head
; 
 167         so
->so_state 
&= ~(SS_ISCONNECTING
|SS_ISDISCONNECTING
|SS_ISCONFIRMING
); 
 168         so
->so_state 
|= SS_ISCONNECTED
; 
 170         sflt_notify(so
, sock_evt_connected
, NULL
); 
 172         if (head 
&& (so
->so_state 
& SS_INCOMP
)) { 
 173                 so
->so_state 
&= ~SS_INCOMP
; 
 174                 so
->so_state 
|= SS_COMP
; 
 175                 if (head
->so_proto
->pr_getlock 
!= NULL
) { 
 176                         socket_unlock(so
, 0); 
 177                         socket_lock(head
, 1); 
 179                 postevent(head
, 0, EV_RCONN
); 
 180                 TAILQ_REMOVE(&head
->so_incomp
, so
, so_list
); 
 182                 TAILQ_INSERT_TAIL(&head
->so_comp
, so
, so_list
); 
 184                 wakeup_one((caddr_t
)&head
->so_timeo
); 
 185                 if (head
->so_proto
->pr_getlock 
!= NULL
) { 
 186                         socket_unlock(head
, 1); 
 190                 postevent(so
, 0, EV_WCONN
); 
 191                 wakeup((caddr_t
)&so
->so_timeo
); 
 198 soisdisconnecting(struct socket 
*so
) 
 200         so
->so_state 
&= ~SS_ISCONNECTING
; 
 201         so
->so_state 
|= (SS_ISDISCONNECTING
|SS_CANTRCVMORE
|SS_CANTSENDMORE
); 
 202         sflt_notify(so
, sock_evt_disconnecting
, NULL
); 
 203         wakeup((caddr_t
)&so
->so_timeo
); 
 209 soisdisconnected(struct socket 
*so
) 
 211         so
->so_state 
&= ~(SS_ISCONNECTING
|SS_ISCONNECTED
|SS_ISDISCONNECTING
); 
 212         so
->so_state 
|= (SS_CANTRCVMORE
|SS_CANTSENDMORE
|SS_ISDISCONNECTED
); 
 213         sflt_notify(so
, sock_evt_disconnected
, NULL
); 
 214         wakeup((caddr_t
)&so
->so_timeo
); 
 220  * When an attempt at a new connection is noted on a socket 
 221  * which accepts connections, sonewconn is called.  If the 
 222  * connection is possible (subject to space constraints, etc.) 
 223  * then we allocate a new structure, propoerly linked into the 
 224  * data structure of the original socket, and return this. 
 225  * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED. 
 227 static struct socket 
* 
 228 sonewconn_internal(struct socket 
*head
, int connstatus
) 
 230         int so_qlen
, error 
= 0; 
 232         lck_mtx_t 
*mutex_held
; 
 234         if (head
->so_proto
->pr_getlock 
!= NULL
) 
 235                 mutex_held 
= (*head
->so_proto
->pr_getlock
)(head
, 0); 
 237                 mutex_held 
= head
->so_proto
->pr_domain
->dom_mtx
; 
 238         lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
); 
 242                  * This is the default case; so_qlen represents the 
 243                  * sum of both incomplete and completed queues. 
 245                 so_qlen 
= head
->so_qlen
; 
 248                  * When kern.ipc.soqlencomp is set to 1, so_qlen 
 249                  * represents only the completed queue.  Since we 
 250                  * cannot let the incomplete queue goes unbounded 
 251                  * (in case of SYN flood), we cap the incomplete 
 252                  * queue length to at most somaxconn, and use that 
 253                  * as so_qlen so that we fail immediately below. 
 255                 so_qlen 
= head
->so_qlen 
- head
->so_incqlen
; 
 256                 if (head
->so_incqlen 
> somaxconn
) 
 261             (soqlimitcompat 
? head
->so_qlimit 
: (3 * head
->so_qlimit 
/ 2))) 
 262                 return ((struct socket 
*)0); 
 263         so 
= soalloc(M_NOWAIT
, head
->so_proto
->pr_domain
->dom_family
, 
 266                 return ((struct socket 
*)0); 
 267         /* check if head was closed during the soalloc */ 
 268         if (head
->so_proto 
== NULL
) { 
 270                 return ((struct socket 
*)0); 
 274         so
->so_type 
= head
->so_type
; 
 275         so
->so_options 
= head
->so_options 
&~ SO_ACCEPTCONN
; 
 276         so
->so_linger 
= head
->so_linger
; 
 277         so
->so_state 
= head
->so_state 
| SS_NOFDREF
; 
 278         so
->so_proto 
= head
->so_proto
; 
 279         so
->so_timeo 
= head
->so_timeo
; 
 280         so
->so_pgid  
= head
->so_pgid
; 
 281         so
->so_uid 
= head
->so_uid
; 
 282         so
->so_flags 
= head
->so_flags 
& (SOF_REUSESHAREUID
|SOF_NOTIFYCONFLICT
); /* inherit SO_REUSESHAREUID and SO_NOTIFYCONFLICT ocket options */ 
 284         so
->next_lock_lr 
= 0; 
 285         so
->next_unlock_lr 
= 0; 
 288         so
->so_rcv
.sb_flags 
|= SB_RECV
; /* XXX */ 
 289         so
->so_rcv
.sb_so 
= so
->so_snd
.sb_so 
= so
; 
 290         TAILQ_INIT(&so
->so_evlist
); 
 293 #if CONFIG_MACF_SOCKET 
 294         mac_socket_label_associate_accept(head
, so
); 
 297         if (soreserve(so
, head
->so_snd
.sb_hiwat
, head
->so_rcv
.sb_hiwat
)) { 
 300                 return ((struct socket 
*)0); 
 304          * Must be done with head unlocked to avoid deadlock 
 305          * for protocol with per socket mutexes. 
 307         if (head
->so_proto
->pr_unlock
) 
 308                 socket_unlock(head
, 0); 
 309         if (((*so
->so_proto
->pr_usrreqs
->pru_attach
)(so
, 0, NULL
) != 0) || 
 313                 if (head
->so_proto
->pr_unlock
) 
 314                         socket_lock(head
, 0); 
 315                 return ((struct socket 
*)0); 
 317         if (head
->so_proto
->pr_unlock
) 
 318                 socket_lock(head
, 0); 
 320         so
->so_proto
->pr_domain
->dom_refs
++; 
 324                 TAILQ_INSERT_TAIL(&head
->so_comp
, so
, so_list
); 
 325                 so
->so_state 
|= SS_COMP
; 
 327                 TAILQ_INSERT_TAIL(&head
->so_incomp
, so
, so_list
); 
 328                 so
->so_state 
|= SS_INCOMP
; 
 334         /* Attach socket filters for this protocol */ 
 339                 so
->so_state 
|= connstatus
; 
 341                 wakeup((caddr_t
)&head
->so_timeo
); 
 348 sonewconn(struct socket 
*head
, int connstatus
, const struct sockaddr 
*from
) 
 351         struct socket_filter_entry 
*filter
; 
 354         for (filter 
= head
->so_filt
; filter 
&& (error 
== 0); 
 355             filter 
= filter
->sfe_next_onsocket
) { 
 356                 if (filter
->sfe_filter
->sf_filter
.sf_connect_in
) { 
 360                                 socket_unlock(head
, 0); 
 362                         error 
= filter
->sfe_filter
->sf_filter
. 
 363                             sf_connect_in(filter
->sfe_cookie
, head
, from
); 
 367                 socket_lock(head
, 0); 
 375         return (sonewconn_internal(head
, connstatus
)); 
 379  * Socantsendmore indicates that no more data will be sent on the 
 380  * socket; it would normally be applied to a socket when the user 
 381  * informs the system that no more data is to be sent, by the protocol 
 382  * code (in case PRU_SHUTDOWN).  Socantrcvmore indicates that no more data 
 383  * will be received, and will normally be applied to the socket by a 
 384  * protocol when it detects that the peer will send no more data. 
 385  * Data queued for reading in the socket may yet be read. 
 389 socantsendmore(struct socket 
*so
) 
 391         so
->so_state 
|= SS_CANTSENDMORE
; 
 392         sflt_notify(so
, sock_evt_cantsendmore
, NULL
); 
 397 socantrcvmore(struct socket 
*so
) 
 399         so
->so_state 
|= SS_CANTRCVMORE
; 
 400         sflt_notify(so
, sock_evt_cantrecvmore
, NULL
); 
 405  * Wait for data to arrive at/drain from a socket buffer. 
 412 sbwait(struct sockbuf 
*sb
) 
 414         int error 
= 0, lr_saved
; 
 415         struct socket 
*so 
= sb
->sb_so
; 
 416         lck_mtx_t 
*mutex_held
; 
 419         lr_saved 
= (unsigned int) __builtin_return_address(0); 
 421         if (so
->so_proto
->pr_getlock 
!= NULL
) 
 422                 mutex_held 
= (*so
->so_proto
->pr_getlock
)(so
, 0); 
 424                 mutex_held 
= so
->so_proto
->pr_domain
->dom_mtx
; 
 426         sb
->sb_flags 
|= SB_WAIT
; 
 428         if (so
->so_usecount 
< 1) 
 429                 panic("sbwait: so=%p refcount=%d\n", so
, so
->so_usecount
); 
 430         ts
.tv_sec 
= sb
->sb_timeo
.tv_sec
; 
 431         ts
.tv_nsec 
= sb
->sb_timeo
.tv_usec 
* 1000; 
 432         error 
= msleep((caddr_t
)&sb
->sb_cc
, mutex_held
, 
 433             (sb
->sb_flags 
& SB_NOINTR
) ? PSOCK 
: PSOCK 
| PCATCH
, "sbwait", &ts
); 
 435         lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
); 
 437         if (so
->so_usecount 
< 1) 
 438                 panic("sbwait: so=%p refcount=%d\n", so
, so
->so_usecount
); 
 440         if ((so
->so_state 
& SS_DRAINING
)) { 
 448  * Lock a sockbuf already known to be locked; 
 449  * return any error returned from sleep (EINTR). 
 455 sb_lock(struct sockbuf 
*sb
) 
 457         struct socket 
*so 
= sb
->sb_so
; 
 458         lck_mtx_t 
*mutex_held
; 
 462                 panic("sb_lock: null so back pointer sb=%p\n", sb
); 
 464         while (sb
->sb_flags 
& SB_LOCK
) { 
 465                 sb
->sb_flags 
|= SB_WANT
; 
 466                 if (so
->so_proto
->pr_getlock 
!= NULL
) 
 467                         mutex_held 
= (*so
->so_proto
->pr_getlock
)(so
, 0); 
 469                         mutex_held 
= so
->so_proto
->pr_domain
->dom_mtx
; 
 470                 if (so
->so_usecount 
< 1) 
 471                         panic("sb_lock: so=%p refcount=%d\n", so
, 
 474                 error 
= msleep((caddr_t
)&sb
->sb_flags
, mutex_held
, 
 475                     (sb
->sb_flags 
& SB_NOINTR
) ? PSOCK 
: PSOCK 
| PCATCH
, 
 477                 if (so
->so_usecount 
< 1) 
 478                         panic("sb_lock: 2 so=%p refcount=%d\n", so
, 
 483         sb
->sb_flags 
|= SB_LOCK
; 
 488  * Wakeup processes waiting on a socket buffer. 
 489  * Do asynchronous notification via SIGIO 
 490  * if the socket has the SS_ASYNC flag set. 
 493 sowakeup(struct socket 
*so
, struct sockbuf 
*sb
) 
 495         sb
->sb_flags 
&= ~SB_SEL
; 
 496         selwakeup(&sb
->sb_sel
); 
 497         if (sb
->sb_flags 
& SB_WAIT
) { 
 498                 sb
->sb_flags 
&= ~SB_WAIT
; 
 499                 wakeup((caddr_t
)&sb
->sb_cc
); 
 501         if (so
->so_state 
& SS_ASYNC
) { 
 503                         gsignal(-so
->so_pgid
, SIGIO
); 
 504                 else if (so
->so_pgid 
> 0) 
 505                         proc_signal(so
->so_pgid
, SIGIO
); 
 507         if (sb
->sb_flags 
& SB_KNOTE
) { 
 508                 KNOTE(&sb
->sb_sel
.si_note
, SO_FILT_HINT_LOCKED
); 
 510         if (sb
->sb_flags 
& SB_UPCALL
) { 
 511                 void (*so_upcall
)(struct socket 
*, caddr_t
, int); 
 512                 caddr_t so_upcallarg
; 
 514                 so_upcall 
= so
->so_upcall
; 
 515                 so_upcallarg 
= so
->so_upcallarg
; 
 516                 /* Let close know that we're about to do an upcall */ 
 517                 so
->so_flags 
|= SOF_UPCALLINUSE
; 
 519                 socket_unlock(so
, 0); 
 520                 (*so_upcall
)(so
, so_upcallarg
, M_DONTWAIT
); 
 523                 so
->so_flags 
&= ~SOF_UPCALLINUSE
; 
 524                 /* Tell close that it's safe to proceed */ 
 525                 if (so
->so_flags 
& SOF_CLOSEWAIT
) 
 526                         wakeup((caddr_t
)&so
->so_upcall
); 
 531  * Socket buffer (struct sockbuf) utility routines. 
 533  * Each socket contains two socket buffers: one for sending data and 
 534  * one for receiving data.  Each buffer contains a queue of mbufs, 
 535  * information about the number of mbufs and amount of data in the 
 536  * queue, and other fields allowing select() statements and notification 
 537  * on data availability to be implemented. 
 539  * Data stored in a socket buffer is maintained as a list of records. 
 540  * Each record is a list of mbufs chained together with the m_next 
 541  * field.  Records are chained together with the m_nextpkt field. The upper 
 542  * level routine soreceive() expects the following conventions to be 
 543  * observed when placing information in the receive buffer: 
 545  * 1. If the protocol requires each message be preceded by the sender's 
 546  *    name, then a record containing that name must be present before 
 547  *    any associated data (mbuf's must be of type MT_SONAME). 
 548  * 2. If the protocol supports the exchange of ``access rights'' (really 
 549  *    just additional data associated with the message), and there are 
 550  *    ``rights'' to be received, then a record containing this data 
 551  *    should be present (mbuf's must be of type MT_RIGHTS). 
 552  * 3. If a name or rights record exists, then it must be followed by 
 553  *    a data record, perhaps of zero length. 
 555  * Before using a new socket structure it is first necessary to reserve 
 556  * buffer space to the socket, by calling sbreserve().  This should commit 
 557  * some of the available buffer space in the system buffer pool for the 
 558  * socket (currently, it does nothing but enforce limits).  The space 
 559  * should be released by calling sbrelease() when the socket is destroyed. 
 567 soreserve(struct socket 
*so
, u_long sndcc
, u_long rcvcc
) 
 570         if (sbreserve(&so
->so_snd
, sndcc
) == 0) 
 572         if (sbreserve(&so
->so_rcv
, rcvcc
) == 0) 
 574         if (so
->so_rcv
.sb_lowat 
== 0) 
 575                 so
->so_rcv
.sb_lowat 
= 1; 
 576         if (so
->so_snd
.sb_lowat 
== 0) 
 577                 so
->so_snd
.sb_lowat 
= MCLBYTES
; 
 578         if (so
->so_snd
.sb_lowat 
> so
->so_snd
.sb_hiwat
) 
 579                 so
->so_snd
.sb_lowat 
= so
->so_snd
.sb_hiwat
; 
 583         selthreadclear(&so
->so_snd
.sb_sel
); 
 585         sbrelease(&so
->so_snd
); 
 591  * Allot mbufs to a sockbuf. 
 592  * Attempt to scale mbmax so that mbcnt doesn't become limiting 
 593  * if buffering efficiency is near the normal case. 
 596 sbreserve(struct sockbuf 
*sb
, u_long cc
) 
 598         if ((u_quad_t
)cc 
> (u_quad_t
)sb_max 
* MCLBYTES 
/ (MSIZE 
+ MCLBYTES
)) 
 601         sb
->sb_mbmax 
= min(cc 
* sb_efficiency
, sb_max
); 
 602         if (sb
->sb_lowat 
> sb
->sb_hiwat
) 
 603                 sb
->sb_lowat 
= sb
->sb_hiwat
; 
 608  * Free mbufs held by a socket, and reserved mbuf space. 
 610 /*  WARNING needs to do selthreadclear() before calling this */ 
 612 sbrelease(struct sockbuf 
*sb
) 
 620  * Routines to add and remove 
 621  * data from an mbuf queue. 
 623  * The routines sbappend() or sbappendrecord() are normally called to 
 624  * append new mbufs to a socket buffer, after checking that adequate 
 625  * space is available, comparing the function sbspace() with the amount 
 626  * of data to be added.  sbappendrecord() differs from sbappend() in 
 627  * that data supplied is treated as the beginning of a new record. 
 628  * To place a sender's address, optional access rights, and data in a 
 629  * socket receive buffer, sbappendaddr() should be used.  To place 
 630  * access rights and data in a socket receive buffer, sbappendrights() 
 631  * should be used.  In either case, the new data begins a new record. 
 632  * Note that unlike sbappend() and sbappendrecord(), these routines check 
 633  * for the caller that there will be enough space to store the data. 
 634  * Each fails if there is not enough space, or if it cannot find mbufs 
 635  * to store additional information in. 
 637  * Reliable protocols may use the socket send buffer to hold data 
 638  * awaiting acknowledgement.  Data is normally copied from a socket 
 639  * send buffer in a protocol with m_copy for output to a peer, 
 640  * and then removing the data from the socket buffer with sbdrop() 
 641  * or sbdroprecord() when the data is acknowledged by the peer. 
 645  * Append mbuf chain m to the last record in the 
 646  * socket buffer sb.  The additional space associated 
 647  * the mbuf chain is recorded in sb.  Empty mbufs are 
 648  * discarded and mbufs are compacted where possible. 
 651 sbappend(struct sockbuf 
*sb
, struct mbuf 
*m
) 
 653         struct socket 
*so 
= sb
->sb_so
; 
 655         if (m 
== NULL 
|| (sb
->sb_flags 
& SB_DROP
)) { 
 661         SBLASTRECORDCHK(sb
, "sbappend 1"); 
 663         if (sb
->sb_lastrecord 
!= NULL 
&& (sb
->sb_mbtail
->m_flags 
& M_EOR
)) 
 664                 return (sbappendrecord(sb
, m
)); 
 666         if (sb
->sb_flags 
& SB_RECV
) { 
 667                 int error 
= sflt_data_in(so
, NULL
, &m
, NULL
, 0, NULL
); 
 668                 SBLASTRECORDCHK(sb
, "sbappend 2"); 
 670                         if (error 
!= EJUSTRETURN
) 
 676         /* If this is the first record, it's also the last record */ 
 677         if (sb
->sb_lastrecord 
== NULL
) 
 678                 sb
->sb_lastrecord 
= m
; 
 680         sbcompress(sb
, m
, sb
->sb_mbtail
); 
 681         SBLASTRECORDCHK(sb
, "sbappend 3"); 
 686  * Similar to sbappend, except that this is optimized for stream sockets. 
 689 sbappendstream(struct sockbuf 
*sb
, struct mbuf 
*m
) 
 691         struct socket 
*so 
= sb
->sb_so
; 
 693         if (m
->m_nextpkt 
!= NULL 
|| (sb
->sb_mb 
!= sb
->sb_lastrecord
)) 
 694                 panic("sbappendstream: nexpkt %p || mb %p != lastrecord %p\n", 
 695                     m
->m_nextpkt
, sb
->sb_mb
, sb
->sb_lastrecord
); 
 697         SBLASTMBUFCHK(sb
, __func__
); 
 699         if (m 
== NULL 
|| (sb
->sb_flags 
& SB_DROP
)) { 
 705         if (sb
->sb_flags 
& SB_RECV
) { 
 706                 int error 
= sflt_data_in(so
, NULL
, &m
, NULL
, 0, NULL
); 
 707                 SBLASTRECORDCHK(sb
, "sbappendstream 1"); 
 709                         if (error 
!= EJUSTRETURN
) 
 715         sbcompress(sb
, m
, sb
->sb_mbtail
); 
 716         sb
->sb_lastrecord 
= sb
->sb_mb
; 
 717         SBLASTRECORDCHK(sb
, "sbappendstream 2"); 
 723 sbcheck(struct sockbuf 
*sb
) 
 727         u_long len 
= 0, mbcnt 
= 0; 
 728         lck_mtx_t 
*mutex_held
; 
 730         if (sb
->sb_so
->so_proto
->pr_getlock 
!= NULL
) 
 731                 mutex_held 
= (*sb
->sb_so
->so_proto
->pr_getlock
)(sb
->sb_so
, 0); 
 733                 mutex_held 
= sb
->sb_so
->so_proto
->pr_domain
->dom_mtx
; 
 735         lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
); 
 740         for (m 
= sb
->sb_mb
; m
; m 
= n
) { 
 742                 for (; m
; m 
= m
->m_next
) { 
 745                         /* XXX pretty sure this is bogus */ 
 746                         if (m
->m_flags 
& M_EXT
) 
 747                                 mbcnt 
+= m
->m_ext
.ext_size
; 
 750         if (len 
!= sb
->sb_cc 
|| mbcnt 
!= sb
->sb_mbcnt
) { 
 751                 panic("cc %ld != %ld || mbcnt %ld != %ld\n", len
, sb
->sb_cc
, 
 752                     mbcnt
, sb
->sb_mbcnt
); 
 758 sblastrecordchk(struct sockbuf 
*sb
, const char *where
) 
 760         struct mbuf 
*m 
= sb
->sb_mb
; 
 762         while (m 
&& m
->m_nextpkt
) 
 765         if (m 
!= sb
->sb_lastrecord
) { 
 766                 printf("sblastrecordchk: mb %p lastrecord %p last %p\n", 
 767                     sb
->sb_mb
, sb
->sb_lastrecord
, m
); 
 768                 printf("packet chain:\n"); 
 769                 for (m 
= sb
->sb_mb
; m 
!= NULL
; m 
= m
->m_nextpkt
) 
 771                 panic("sblastrecordchk from %s", where
); 
 776 sblastmbufchk(struct sockbuf 
*sb
, const char *where
) 
 778         struct mbuf 
*m 
= sb
->sb_mb
; 
 781         while (m 
&& m
->m_nextpkt
) 
 784         while (m 
&& m
->m_next
) 
 787         if (m 
!= sb
->sb_mbtail
) { 
 788                 printf("sblastmbufchk: mb %p mbtail %p last %p\n", 
 789                     sb
->sb_mb
, sb
->sb_mbtail
, m
); 
 790                 printf("packet tree:\n"); 
 791                 for (m 
= sb
->sb_mb
; m 
!= NULL
; m 
= m
->m_nextpkt
) { 
 793                         for (n 
= m
; n 
!= NULL
; n 
= n
->m_next
) 
 797                 panic("sblastmbufchk from %s", where
); 
 802  * Similar to sbappend, except the mbuf chain begins a new record. 
 805 sbappendrecord(struct sockbuf 
*sb
, struct mbuf 
*m0
) 
 810         if (m0 
== NULL 
|| (sb
->sb_flags 
& SB_DROP
)) { 
 816         for (m 
= m0
; m 
!= NULL
; m 
= m
->m_next
) 
 819         if (space 
> sbspace(sb
) && !(sb
->sb_flags 
& SB_UNIX
)) { 
 824         if (sb
->sb_flags 
& SB_RECV
) { 
 825                 int error 
= sflt_data_in(sb
->sb_so
, NULL
, &m0
, NULL
, 
 826                     sock_data_filt_flag_record
, NULL
); 
 828                         SBLASTRECORDCHK(sb
, "sbappendrecord 1"); 
 829                         if (error 
!= EJUSTRETURN
) 
 836          * Note this permits zero length records. 
 839         SBLASTRECORDCHK(sb
, "sbappendrecord 2"); 
 840         if (sb
->sb_lastrecord 
!= NULL
) { 
 841                 sb
->sb_lastrecord
->m_nextpkt 
= m0
; 
 845         sb
->sb_lastrecord 
= m0
; 
 849         if (m 
&& (m0
->m_flags 
& M_EOR
)) { 
 850                 m0
->m_flags 
&= ~M_EOR
; 
 853         sbcompress(sb
, m
, m0
); 
 854         SBLASTRECORDCHK(sb
, "sbappendrecord 3"); 
 859  * As above except that OOB data 
 860  * is inserted at the beginning of the sockbuf, 
 861  * but after any other OOB data. 
 864 sbinsertoob(struct sockbuf 
*sb
, struct mbuf 
*m0
) 
 872         SBLASTRECORDCHK(sb
, "sbinsertoob 1"); 
 874         if ((sb
->sb_flags 
& SB_RECV
) != 0) { 
 875                 int error 
= sflt_data_in(sb
->sb_so
, NULL
, &m0
, NULL
, 
 876                     sock_data_filt_flag_oob
, NULL
); 
 878                 SBLASTRECORDCHK(sb
, "sbinsertoob 2"); 
 880                         if (error 
!= EJUSTRETURN
) { 
 887         for (mp 
= &sb
->sb_mb
; *mp
; mp 
= &((*mp
)->m_nextpkt
)) { 
 893                         continue;               /* WANT next train */ 
 898                                 goto again
;     /* inspect THIS train further */ 
 903          * Put the first mbuf on the queue. 
 904          * Note this permits zero length records. 
 909                 /* m0 is actually the new tail */ 
 910                 sb
->sb_lastrecord 
= m0
; 
 915         if (m 
&& (m0
->m_flags 
& M_EOR
)) { 
 916                 m0
->m_flags 
&= ~M_EOR
; 
 919         sbcompress(sb
, m
, m0
); 
 920         SBLASTRECORDCHK(sb
, "sbinsertoob 3"); 
 925  * Append address and data, and optionally, control (ancillary) data 
 926  * to the receive queue of a socket.  If present, 
 927  * m0 must include a packet header with total length. 
 928  * Returns 0 if no space in sockbuf or insufficient mbufs. 
 930  * Returns:     0                       No space/out of mbufs 
 934 sbappendaddr_internal(struct sockbuf 
*sb
, struct sockaddr 
*asa
, 
 935     struct mbuf 
*m0
, struct mbuf 
*control
) 
 937         struct mbuf 
*m
, *n
, *nlast
; 
 938         int space 
= asa
->sa_len
; 
 940         if (m0 
&& (m0
->m_flags 
& M_PKTHDR
) == 0) 
 941                 panic("sbappendaddr"); 
 944                 space 
+= m0
->m_pkthdr
.len
; 
 945         for (n 
= control
; n
; n 
= n
->m_next
) { 
 947                 if (n
->m_next 
== 0)     /* keep pointer to last control buf */ 
 950         if (space 
> sbspace(sb
)) 
 952         if (asa
->sa_len 
> MLEN
) 
 954         MGET(m
, M_DONTWAIT
, MT_SONAME
); 
 957         m
->m_len 
= asa
->sa_len
; 
 958         bcopy((caddr_t
)asa
, mtod(m
, caddr_t
), asa
->sa_len
); 
 960                 n
->m_next 
= m0
;         /* concatenate data to control */ 
 965         SBLASTRECORDCHK(sb
, "sbappendadddr 1"); 
 967         for (n 
= m
; n
->m_next 
!= NULL
; n 
= n
->m_next
) 
 972         if (sb
->sb_lastrecord 
!= NULL
) { 
 973                 sb
->sb_lastrecord
->m_nextpkt 
= m
; 
 977         sb
->sb_lastrecord 
= m
; 
 978         sb
->sb_mbtail 
= nlast
; 
 980         SBLASTMBUFCHK(sb
, __func__
); 
 981         SBLASTRECORDCHK(sb
, "sbappendadddr 2"); 
 983         postevent(0, sb
, EV_RWBYTES
); 
 988  * Returns:     0                       Error: No space/out of mbufs/etc. 
 991  * Imputed:     (*error_out)            errno for error 
 993  *      sflt_data_in:???                [whatever a filter author chooses] 
 996 sbappendaddr(struct sockbuf 
*sb
, struct sockaddr 
*asa
, struct mbuf 
*m0
, 
 997     struct mbuf 
*control
, int *error_out
) 
1000         boolean_t sb_unix 
= (sb
->sb_flags 
& SB_UNIX
); 
1005         if (m0 
&& (m0
->m_flags 
& M_PKTHDR
) == 0) 
1006                 panic("sbappendaddrorfree"); 
1008         if (sb
->sb_flags 
& SB_DROP
) { 
1011                 if (control 
!= NULL 
&& !sb_unix
) 
1013                 if (error_out 
!= NULL
) 
1014                         *error_out 
= EINVAL
; 
1018         /* Call socket data in filters */ 
1019         if ((sb
->sb_flags 
& SB_RECV
) != 0) { 
1021                 error 
= sflt_data_in(sb
->sb_so
, asa
, &m0
, &control
, 0, NULL
); 
1022                 SBLASTRECORDCHK(sb
, __func__
); 
1024                         if (error 
!= EJUSTRETURN
) { 
1027                                 if (control 
!= NULL 
&& !sb_unix
) 
1036         result 
= sbappendaddr_internal(sb
, asa
, m0
, control
); 
1040                 if (control 
!= NULL 
&& !sb_unix
) 
1043                         *error_out 
= ENOBUFS
; 
1050 sbappendcontrol_internal(struct sockbuf 
*sb
, struct mbuf 
*m0
, 
1051     struct mbuf 
*control
) 
1053         struct mbuf 
*m
, *mlast
, *n
; 
1057                 panic("sbappendcontrol"); 
1059         for (m 
= control
; ; m 
= m
->m_next
) { 
1064         n 
= m
;                  /* save pointer to last control buffer */ 
1065         for (m 
= m0
; m
; m 
= m
->m_next
) 
1067         if (space 
> sbspace(sb
) && !(sb
->sb_flags 
& SB_UNIX
)) 
1069         n
->m_next 
= m0
;                 /* concatenate data to control */ 
1071         SBLASTRECORDCHK(sb
, "sbappendcontrol 1"); 
1073         for (m 
= control
; m
->m_next 
!= NULL
; m 
= m
->m_next
) 
1078         if (sb
->sb_lastrecord 
!= NULL
) { 
1079                 sb
->sb_lastrecord
->m_nextpkt 
= control
; 
1081                 sb
->sb_mb 
= control
; 
1083         sb
->sb_lastrecord 
= control
; 
1084         sb
->sb_mbtail 
= mlast
; 
1086         SBLASTMBUFCHK(sb
, __func__
); 
1087         SBLASTRECORDCHK(sb
, "sbappendcontrol 2"); 
1089         postevent(0, sb
, EV_RWBYTES
); 
1094 sbappendcontrol(struct sockbuf 
*sb
, struct mbuf 
*m0
, struct mbuf 
*control
, 
1098         boolean_t sb_unix 
= (sb
->sb_flags 
& SB_UNIX
); 
1103         if (sb
->sb_flags 
& SB_DROP
) { 
1106                 if (control 
!= NULL 
&& !sb_unix
) 
1108                 if (error_out 
!= NULL
) 
1109                         *error_out 
= EINVAL
; 
1113         if (sb
->sb_flags 
& SB_RECV
) { 
1116                 error 
= sflt_data_in(sb
->sb_so
, NULL
, &m0
, &control
, 0, NULL
); 
1117                 SBLASTRECORDCHK(sb
, __func__
); 
1119                         if (error 
!= EJUSTRETURN
) { 
1122                                 if (control 
!= NULL 
&& !sb_unix
) 
1131         result 
= sbappendcontrol_internal(sb
, m0
, control
); 
1135                 if (control 
!= NULL 
&& !sb_unix
) 
1138                         *error_out 
= ENOBUFS
; 
1145  * Compress mbuf chain m into the socket 
1146  * buffer sb following mbuf n.  If n 
1147  * is null, the buffer is presumed empty. 
1150 sbcompress(struct sockbuf 
*sb
, struct mbuf 
*m
, struct mbuf 
*n
) 
1156                 /* There is nothing to compress; just update the tail */ 
1157                 for (; n
->m_next 
!= NULL
; n 
= n
->m_next
) 
1164                 eor 
|= m
->m_flags 
& M_EOR
; 
1165                 if (m
->m_len 
== 0 && (eor 
== 0 || 
1166                     (((o 
= m
->m_next
) || (o 
= n
)) && o
->m_type 
== m
->m_type
))) { 
1167                         if (sb
->sb_lastrecord 
== m
) 
1168                                 sb
->sb_lastrecord 
= m
->m_next
; 
1172                 if (n 
&& (n
->m_flags 
& M_EOR
) == 0 && 
1176                     m
->m_len 
<= MCLBYTES 
/ 4 && /* XXX: Don't copy too much */ 
1177                     m
->m_len 
<= M_TRAILINGSPACE(n
) && 
1178                     n
->m_type 
== m
->m_type
) { 
1179                         bcopy(mtod(m
, caddr_t
), mtod(n
, caddr_t
) + n
->m_len
, 
1180                             (unsigned)m
->m_len
); 
1181                         n
->m_len 
+= m
->m_len
; 
1182                         sb
->sb_cc 
+= m
->m_len
; 
1183                         if (m
->m_type 
!= MT_DATA 
&& m
->m_type 
!= MT_HEADER 
&& 
1184                                 m
->m_type 
!= MT_OOBDATA
) 
1185                                 /* XXX: Probably don't need.*/ 
1186                                 sb
->sb_ctl 
+= m
->m_len
; 
1197                 m
->m_flags 
&= ~M_EOR
; 
1205                         printf("semi-panic: sbcompress\n"); 
1208         SBLASTMBUFCHK(sb
, __func__
); 
1209         postevent(0, sb
, EV_RWBYTES
); 
1213 sb_empty_assert(struct sockbuf 
*sb
, const char *where
) 
1215         if (!(sb
->sb_cc 
== 0 && sb
->sb_mb 
== NULL 
&& sb
->sb_mbcnt 
== 0 && 
1216             sb
->sb_mbtail 
== NULL 
&& sb
->sb_lastrecord 
== NULL
)) { 
1217                 panic("%s: sb %p so %p cc %ld mbcnt %ld mb %p mbtail %p " 
1218                     "lastrecord %p\n", where
, sb
, sb
->sb_so
, sb
->sb_cc
, 
1219                     sb
->sb_mbcnt
, sb
->sb_mb
, sb
->sb_mbtail
, sb
->sb_lastrecord
); 
1225  * Free all mbufs in a sockbuf. 
1226  * Check that all resources are reclaimed. 
1229 sbflush(struct sockbuf 
*sb
) 
1231         if (sb
->sb_so 
== NULL
) 
1232                 panic("sbflush sb->sb_so already null sb=%p\n", sb
); 
1233         (void) sblock(sb
, M_WAIT
); 
1234         while (sb
->sb_mbcnt
) { 
1236                  * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty: 
1237                  * we would loop forever. Panic instead. 
1239                 if (!sb
->sb_cc 
&& (sb
->sb_mb 
== NULL 
|| sb
->sb_mb
->m_len
)) 
1241                 sbdrop(sb
, (int)sb
->sb_cc
); 
1243         sb_empty_assert(sb
, __func__
); 
1244         postevent(0, sb
, EV_RWBYTES
); 
1245         sbunlock(sb
, 1);        /* keep socket locked */ 
1250  * Drop data from (the front of) a sockbuf. 
1251  * use m_freem_list to free the mbuf structures 
1252  * under a single lock... this is done by pruning 
1253  * the top of the tree from the body by keeping track 
1254  * of where we get to in the tree and then zeroing the 
1255  * two pertinent pointers m_nextpkt and m_next 
1256  * the socket buffer is then updated to point at the new 
1257  * top of the tree and the pruned area is released via 
1261 sbdrop(struct sockbuf 
*sb
, int len
) 
1263         struct mbuf 
*m
, *free_list
, *ml
; 
1264         struct mbuf 
*next
, *last
; 
1266         KERNEL_DEBUG((DBG_FNC_SBDROP 
| DBG_FUNC_START
), sb
, len
, 0, 0, 0); 
1268         next 
= (m 
= sb
->sb_mb
) ? m
->m_nextpkt 
: 0; 
1269         free_list 
= last 
= m
; 
1270         ml 
= (struct mbuf 
*)0; 
1276                                  * temporarily replacing this panic with printf 
1277                                  * because it occurs occasionally when closing 
1278                                  * a socket when there is no harm in ignoring 
1279                                  * it. This problem will be investigated 
1282                                 /* panic("sbdrop"); */ 
1283                                 printf("sbdrop - count not zero\n"); 
1286                                  * zero the counts. if we have no mbufs, 
1287                                  * we have no data (PR-2986815) 
1294                         next 
= m
->m_nextpkt
; 
1297                 if (m
->m_len 
> len
) { 
1301                         if (m
->m_type 
!= MT_DATA 
&& m
->m_type 
!= MT_HEADER 
&& 
1302                                 m
->m_type 
!= MT_OOBDATA
) 
1312         while (m 
&& m
->m_len 
== 0) { 
1319                 ml
->m_next 
= (struct mbuf 
*)0; 
1320                 last
->m_nextpkt 
= (struct mbuf 
*)0; 
1321                 m_freem_list(free_list
); 
1325                 m
->m_nextpkt 
= next
; 
1331          * First part is an inline SB_EMPTY_FIXUP().  Second part 
1332          * makes sure sb_lastrecord is up-to-date if we dropped 
1333          * part of the last record. 
1337                 sb
->sb_mbtail 
= NULL
; 
1338                 sb
->sb_lastrecord 
= NULL
; 
1339         } else if (m
->m_nextpkt 
== NULL
) { 
1340                 sb
->sb_lastrecord 
= m
; 
1343         postevent(0, sb
, EV_RWBYTES
); 
1345         KERNEL_DEBUG((DBG_FNC_SBDROP 
| DBG_FUNC_END
), sb
, 0, 0, 0, 0); 
1349  * Drop a record off the front of a sockbuf 
1350  * and move the next record to the front. 
1353 sbdroprecord(struct sockbuf 
*sb
) 
1355         struct mbuf 
*m
, *mn
; 
1359                 sb
->sb_mb 
= m
->m_nextpkt
; 
1367         postevent(0, sb
, EV_RWBYTES
); 
1371  * Create a "control" mbuf containing the specified data 
1372  * with the specified type for presentation on a socket buffer. 
1375 sbcreatecontrol(caddr_t p
, int size
, int type
, int level
) 
1380         if (CMSG_SPACE((u_int
)size
) > MLEN
) 
1381                 return ((struct mbuf 
*)NULL
); 
1382         if ((m 
= m_get(M_DONTWAIT
, MT_CONTROL
)) == NULL
) 
1383                 return ((struct mbuf 
*)NULL
); 
1384         cp 
= mtod(m
, struct cmsghdr 
*); 
1385         /* XXX check size? */ 
1386         (void) memcpy(CMSG_DATA(cp
), p
, size
); 
1387         m
->m_len 
= CMSG_SPACE(size
); 
1388         cp
->cmsg_len 
= CMSG_LEN(size
); 
1389         cp
->cmsg_level 
= level
; 
1390         cp
->cmsg_type 
= type
; 
1395  * Some routines that return EOPNOTSUPP for entry points that are not 
1396  * supported by a protocol.  Fill in as needed. 
1399 pru_abort_notsupp(__unused 
struct socket 
*so
) 
1401         return (EOPNOTSUPP
); 
1405 pru_accept_notsupp(__unused 
struct socket 
*so
, __unused 
struct sockaddr 
**nam
) 
1407         return (EOPNOTSUPP
); 
1411 pru_attach_notsupp(__unused 
struct socket 
*so
, __unused 
int proto
, 
1412     __unused 
struct proc 
*p
) 
1414         return (EOPNOTSUPP
); 
1418 pru_bind_notsupp(__unused 
struct socket 
*so
, __unused 
struct sockaddr 
*nam
, 
1419     __unused 
struct proc 
*p
) 
1421         return (EOPNOTSUPP
); 
1425 pru_connect_notsupp(__unused 
struct socket 
*so
, __unused 
struct sockaddr 
*nam
, 
1426     __unused 
struct proc 
*p
) 
1428         return (EOPNOTSUPP
); 
1432 pru_connect2_notsupp(__unused 
struct socket 
*so1
, __unused 
struct socket 
*so2
) 
1434         return (EOPNOTSUPP
); 
1438 pru_control_notsupp(__unused 
struct socket 
*so
, __unused u_long cmd
, 
1439     __unused caddr_t data
, __unused 
struct ifnet 
*ifp
, __unused 
struct proc 
*p
) 
1441         return (EOPNOTSUPP
); 
1445 pru_detach_notsupp(__unused 
struct socket 
*so
) 
1447         return (EOPNOTSUPP
); 
1451 pru_disconnect_notsupp(__unused 
struct socket 
*so
) 
1453         return (EOPNOTSUPP
); 
1457 pru_listen_notsupp(__unused 
struct socket 
*so
, __unused 
struct proc 
*p
) 
1459         return (EOPNOTSUPP
); 
1463 pru_peeraddr_notsupp(__unused 
struct socket 
*so
, __unused 
struct sockaddr 
**nam
) 
1465         return (EOPNOTSUPP
); 
1469 pru_rcvd_notsupp(__unused 
struct socket 
*so
, __unused 
int flags
) 
1471         return (EOPNOTSUPP
); 
1475 pru_rcvoob_notsupp(__unused 
struct socket 
*so
, __unused 
struct mbuf 
*m
, 
1478         return (EOPNOTSUPP
); 
1482 pru_send_notsupp(__unused 
struct socket 
*so
, __unused 
int flags
, 
1483     __unused 
struct mbuf 
*m
, __unused 
struct sockaddr 
*addr
, 
1484     __unused 
struct mbuf 
*control
, __unused 
struct proc 
*p
) 
1487         return (EOPNOTSUPP
); 
1492  * This isn't really a ``null'' operation, but it's the default one 
1493  * and doesn't do anything destructive. 
1496 pru_sense_null(struct socket 
*so
, void *ub
, int isstat64
) 
1498         if (isstat64 
!= 0) { 
1499                 struct stat64 
*sb64
; 
1501                 sb64 
= (struct stat64 
*)ub
; 
1502                 sb64
->st_blksize 
= so
->so_snd
.sb_hiwat
; 
1506                 sb 
= (struct stat 
*)ub
; 
1507                 sb
->st_blksize 
= so
->so_snd
.sb_hiwat
; 
1515 pru_sosend_notsupp(__unused 
struct socket 
*so
, __unused 
struct sockaddr 
*addr
, 
1516     __unused 
struct uio 
*uio
, __unused 
struct mbuf 
*top
, 
1517     __unused 
struct mbuf 
*control
, __unused 
int flags
) 
1520         return (EOPNOTSUPP
); 
1524 pru_soreceive_notsupp(__unused 
struct socket 
*so
, 
1525     __unused 
struct sockaddr 
**paddr
, 
1526     __unused 
struct uio 
*uio
, __unused 
struct mbuf 
**mp0
, 
1527     __unused 
struct mbuf 
**controlp
, __unused 
int *flagsp
) 
1529         return (EOPNOTSUPP
); 
1533 pru_shutdown_notsupp(__unused 
struct socket 
*so
) 
1535         return (EOPNOTSUPP
); 
1539 pru_sockaddr_notsupp(__unused 
struct socket 
*so
, __unused 
struct sockaddr 
**nam
) 
1541         return (EOPNOTSUPP
); 
1545 pru_sopoll_notsupp(__unused 
struct socket 
*so
, __unused 
int events
, 
1546     __unused kauth_cred_t cred
, __unused 
void *wql
) 
1548         return (EOPNOTSUPP
); 
1554  * The following are macros on BSD and functions on Darwin 
1558  * Do we need to notify the other side when I/O is possible? 
1562 sb_notify(struct sockbuf 
*sb
) 
1564         return ((sb
->sb_flags 
& 
1565             (SB_WAIT
|SB_SEL
|SB_ASYNC
|SB_UPCALL
|SB_KNOTE
)) != 0); 
1569  * How much space is there in a socket buffer (so->so_snd or so->so_rcv)? 
1570  * This is problematical if the fields are unsigned, as the space might 
1571  * still be negative (cc > hiwat or mbcnt > mbmax).  Should detect 
1572  * overflow and return 0.  Should use "lmin" but it doesn't exist now. 
1575 sbspace(struct sockbuf 
*sb
) 
1577         return ((long)imin((int)(sb
->sb_hiwat 
- sb
->sb_cc
), 
1578             (int)(sb
->sb_mbmax 
- sb
->sb_mbcnt
))); 
1581 /* do we have to send all at once on a socket? */ 
1583 sosendallatonce(struct socket 
*so
) 
1585         return (so
->so_proto
->pr_flags 
& PR_ATOMIC
); 
1588 /* can we read something from so? */ 
1590 soreadable(struct socket 
*so
) 
1592         return (so
->so_rcv
.sb_cc 
>= so
->so_rcv
.sb_lowat 
|| 
1593             (so
->so_state 
& SS_CANTRCVMORE
) || 
1594             so
->so_comp
.tqh_first 
|| so
->so_error
); 
1597 /* can we write something to so? */ 
1600 sowriteable(struct socket 
*so
) 
1602         return ((sbspace(&(so
)->so_snd
) >= (long)(so
)->so_snd
.sb_lowat 
&& 
1603             ((so
->so_state
&SS_ISCONNECTED
) || 
1604             (so
->so_proto
->pr_flags
&PR_CONNREQUIRED
) == 0)) || 
1605             (so
->so_state 
& SS_CANTSENDMORE
) || 
1609 /* adjust counters in sb reflecting allocation of m */ 
1612 sballoc(struct sockbuf 
*sb
, struct mbuf 
*m
) 
1615         sb
->sb_cc 
+= m
->m_len
;  
1616         if (m
->m_type 
!= MT_DATA 
&& m
->m_type 
!= MT_HEADER 
&&  
1617                 m
->m_type 
!= MT_OOBDATA
) 
1618                 sb
->sb_ctl 
+= m
->m_len
; 
1619         sb
->sb_mbcnt 
+= MSIZE
;  
1621         if (m
->m_flags 
& M_EXT
) { 
1622                 sb
->sb_mbcnt 
+= m
->m_ext
.ext_size
;  
1623                 cnt 
+= m
->m_ext
.ext_size 
/ MSIZE 
; 
1625         OSAddAtomic(cnt
, (SInt32
*)&total_mb_cnt
); 
1628 /* adjust counters in sb reflecting freeing of m */ 
1630 sbfree(struct sockbuf 
*sb
, struct mbuf 
*m
) 
1633         sb
->sb_cc 
-= m
->m_len
; 
1634         if (m
->m_type 
!= MT_DATA 
&& m
->m_type 
!= MT_HEADER 
&&      
1635                 m
->m_type 
!= MT_OOBDATA
) 
1636                 sb
->sb_ctl 
-= m
->m_len
; 
1637         sb
->sb_mbcnt 
-= MSIZE
;  
1638         if (m
->m_flags 
& M_EXT
) { 
1639                 sb
->sb_mbcnt 
-= m
->m_ext
.ext_size
;  
1640                 cnt 
-= m
->m_ext
.ext_size 
/ MSIZE 
; 
1642         OSAddAtomic(cnt
, (SInt32
*)&total_mb_cnt
); 
1646  * Set lock on sockbuf sb; sleep if lock is already held. 
1647  * Unless SB_NOINTR is set on sockbuf, sleep is interruptible. 
1648  * Returns error without lock if sleep is interrupted. 
1650  * Returns:     0                       Success 
1655 sblock(struct sockbuf 
*sb
, int wf
) 
1659         if (sb
->sb_flags 
& SB_LOCK
) 
1660                 error 
= (wf 
== M_WAIT
) ? sb_lock(sb
) : EWOULDBLOCK
; 
1662                 sb
->sb_flags 
|= SB_LOCK
; 
1667 /* release lock on sockbuf sb */ 
1669 sbunlock(struct sockbuf 
*sb
, int keeplocked
) 
1671         struct socket 
*so 
= sb
->sb_so
; 
1673         lck_mtx_t 
*mutex_held
; 
1675         lr_saved 
= (unsigned int) __builtin_return_address(0); 
1677         sb
->sb_flags 
&= ~SB_LOCK
; 
1679         if (sb
->sb_flags 
& SB_WANT
) { 
1680                 sb
->sb_flags 
&= ~SB_WANT
; 
1681                 if (so
->so_usecount 
< 0) 
1682                         panic("sbunlock: b4 wakeup so=%p ref=%d lr=%x " 
1683                             "sb_flags=%x\n", sb
->sb_so
, so
->so_usecount
, 
1684                             lr_saved
, sb
->sb_flags
); 
1686                 wakeup((caddr_t
)&(sb
)->sb_flags
); 
1688         if (keeplocked 
== 0) {  /* unlock on exit */ 
1689                 if (so
->so_proto
->pr_getlock 
!= NULL
)  
1690                         mutex_held 
= (*so
->so_proto
->pr_getlock
)(so
, 0); 
1692                         mutex_held 
= so
->so_proto
->pr_domain
->dom_mtx
; 
1694                 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
); 
1697                 if (so
->so_usecount 
< 0) 
1698                         panic("sbunlock: unlock on exit so=%p ref=%d lr=%x " 
1699                             "sb_flags=%x\n", so
, so
->so_usecount
, lr_saved
, 
1701                 so
->unlock_lr
[so
->next_unlock_lr
] = (u_int32_t
)lr_saved
; 
1702                 so
->next_unlock_lr 
= (so
->next_unlock_lr
+1) % SO_LCKDBG_MAX
; 
1703                 lck_mtx_unlock(mutex_held
); 
1708 sorwakeup(struct socket 
*so
) 
1710         if (sb_notify(&so
->so_rcv
)) 
1711                 sowakeup(so
, &so
->so_rcv
); 
1715 sowwakeup(struct socket 
*so
) 
1717         if (sb_notify(&so
->so_snd
)) 
1718                 sowakeup(so
, &so
->so_snd
); 
1720 #endif /* __APPLE__ */ 
1723  * Make a copy of a sockaddr in a malloced buffer of type M_SONAME. 
1726 dup_sockaddr(struct sockaddr 
*sa
, int canwait
) 
1728         struct sockaddr 
*sa2
; 
1730         MALLOC(sa2
, struct sockaddr 
*, sa
->sa_len
, M_SONAME
, 
1731             canwait 
? M_WAITOK 
: M_NOWAIT
); 
1733                 bcopy(sa
, sa2
, sa
->sa_len
); 
1738  * Create an external-format (``xsocket'') structure using the information 
1739  * in the kernel-format socket structure pointed to by so.  This is done 
1740  * to reduce the spew of irrelevant information over this interface, 
1741  * to isolate user code from changes in the kernel structure, and 
1742  * potentially to provide information-hiding if we decide that 
1743  * some of this information should be hidden from users. 
1746 sotoxsocket(struct socket 
*so
, struct xsocket 
*xso
) 
1748         xso
->xso_len 
= sizeof (*xso
); 
1750         xso
->so_type 
= so
->so_type
; 
1751         xso
->so_options 
= so
->so_options
; 
1752         xso
->so_linger 
= so
->so_linger
; 
1753         xso
->so_state 
= so
->so_state
; 
1754         xso
->so_pcb 
= so
->so_pcb
; 
1756                 xso
->xso_protocol 
= so
->so_proto
->pr_protocol
; 
1757                 xso
->xso_family 
= so
->so_proto
->pr_domain
->dom_family
; 
1759                 xso
->xso_protocol 
= xso
->xso_family 
= 0; 
1761         xso
->so_qlen 
= so
->so_qlen
; 
1762         xso
->so_incqlen 
= so
->so_incqlen
; 
1763         xso
->so_qlimit 
= so
->so_qlimit
; 
1764         xso
->so_timeo 
= so
->so_timeo
; 
1765         xso
->so_error 
= so
->so_error
; 
1766         xso
->so_pgid 
= so
->so_pgid
; 
1767         xso
->so_oobmark 
= so
->so_oobmark
; 
1768         sbtoxsockbuf(&so
->so_snd
, &xso
->so_snd
); 
1769         sbtoxsockbuf(&so
->so_rcv
, &xso
->so_rcv
); 
1770         xso
->so_uid 
= so
->so_uid
; 
1774  * This does the same for sockbufs.  Note that the xsockbuf structure, 
1775  * since it is always embedded in a socket, does not include a self 
1776  * pointer nor a length.  We make this entry point public in case 
1777  * some other mechanism needs it. 
1780 sbtoxsockbuf(struct sockbuf 
*sb
, struct xsockbuf 
*xsb
) 
1782         xsb
->sb_cc 
= sb
->sb_cc
; 
1783         xsb
->sb_hiwat 
= sb
->sb_hiwat
; 
1784         xsb
->sb_mbcnt 
= sb
->sb_mbcnt
; 
1785         xsb
->sb_mbmax 
= sb
->sb_mbmax
; 
1786         xsb
->sb_lowat 
= sb
->sb_lowat
; 
1787         xsb
->sb_flags 
= sb
->sb_flags
; 
1788         xsb
->sb_timeo 
= (u_long
) 
1789             (sb
->sb_timeo
.tv_sec 
* hz
) + sb
->sb_timeo
.tv_usec 
/ tick
; 
1790         if (xsb
->sb_timeo 
== 0 && sb
->sb_timeo
.tv_usec 
!= 0) 
1795  * Here is the definition of some of the basic objects in the kern.ipc 
1796  * branch of the MIB. 
1798 SYSCTL_NODE(_kern
, KERN_IPC
, ipc
, CTLFLAG_RW
|CTLFLAG_LOCKED
, 0, "IPC"); 
1800 /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */ 
1802 SYSCTL_INT(_kern
, KERN_DUMMY
, dummy
, CTLFLAG_RW
, &dummy
, 0, ""); 
1804 SYSCTL_INT(_kern_ipc
, KIPC_MAXSOCKBUF
, maxsockbuf
, CTLFLAG_RW
, 
1805     &sb_max
, 0, "Maximum socket buffer size"); 
1806 SYSCTL_INT(_kern_ipc
, OID_AUTO
, maxsockets
, CTLFLAG_RD
, 
1807     &maxsockets
, 0, "Maximum number of sockets avaliable"); 
1808 SYSCTL_INT(_kern_ipc
, KIPC_SOCKBUF_WASTE
, sockbuf_waste_factor
, CTLFLAG_RW
, 
1809     &sb_efficiency
, 0, ""); 
1810 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sbspace_factor
, CTLFLAG_RW
, 
1811     &sbspace_factor
, 0, "Ratio of mbuf/cluster use for socket layers"); 
1812 SYSCTL_INT(_kern_ipc
, KIPC_NMBCLUSTERS
, nmbclusters
, CTLFLAG_RD
, 
1813     &nmbclusters
, 0, ""); 
1814 SYSCTL_INT(_kern_ipc
, OID_AUTO
, njcl
, CTLFLAG_RD
, &njcl
, 0, ""); 
1815 SYSCTL_INT(_kern_ipc
, OID_AUTO
, njclbytes
, CTLFLAG_RD
, &njclbytes
, 0, ""); 
1816 SYSCTL_INT(_kern_ipc
, KIPC_SOQLIMITCOMPAT
, soqlimitcompat
, CTLFLAG_RW
, 
1817     &soqlimitcompat
, 1, "Enable socket queue limit compatibility"); 
1818 SYSCTL_INT(_kern_ipc
, OID_AUTO
, soqlencomp
, CTLFLAG_RW
, 
1819     &soqlencomp
, 0, "Listen backlog represents only complete queue");