2  * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. 
   4  * @APPLE_LICENSE_HEADER_START@ 
   6  * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved. 
   8  * This file contains Original Code and/or Modifications of Original Code 
   9  * as defined in and that are subject to the Apple Public Source License 
  10  * Version 2.0 (the 'License'). You may not use this file except in 
  11  * compliance with the License. Please obtain a copy of the License at 
  12  * http://www.opensource.apple.com/apsl/ and read it before using this 
  15  * The Original Code and all software distributed under the License are 
  16  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  17  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  18  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  19  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
  20  * Please see the License for the specific language governing rights and 
  21  * limitations under the License. 
  23  * @APPLE_LICENSE_HEADER_END@ 
  25 /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */ 
  26 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ 
  28  * Copyright (c) 1982, 1986, 1988, 1990, 1993 
  29  *      The Regents of the University of California.  All rights reserved. 
  31  * Redistribution and use in source and binary forms, with or without 
  32  * modification, are permitted provided that the following conditions 
  34  * 1. Redistributions of source code must retain the above copyright 
  35  *    notice, this list of conditions and the following disclaimer. 
  36  * 2. Redistributions in binary form must reproduce the above copyright 
  37  *    notice, this list of conditions and the following disclaimer in the 
  38  *    documentation and/or other materials provided with the distribution. 
  39  * 3. All advertising materials mentioning features or use of this software 
  40  *    must display the following acknowledgement: 
  41  *      This product includes software developed by the University of 
  42  *      California, Berkeley and its contributors. 
  43  * 4. Neither the name of the University nor the names of its contributors 
  44  *    may be used to endorse or promote products derived from this software 
  45  *    without specific prior written permission. 
  47  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 
  48  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  49  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  50  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 
  51  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
  52  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 
  53  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
  54  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
  55  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
  56  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
  59  *      @(#)uipc_socket.c       8.3 (Berkeley) 4/15/94 
  60  * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.16 2001/06/14 20:46:06 ume Exp $ 
  63 #include <sys/param.h> 
  64 #include <sys/systm.h> 
  66 #include <sys/fcntl.h> 
  67 #include <sys/malloc.h> 
  69 #include <sys/domain.h> 
  70 #include <sys/kernel.h> 
  72 #include <sys/protosw.h> 
  73 #include <sys/socket.h> 
  74 #include <sys/socketvar.h> 
  75 #include <sys/resourcevar.h> 
  76 #include <sys/signalvar.h> 
  77 #include <sys/sysctl.h> 
  80 #include <sys/kdebug.h> 
  81 #include <net/route.h> 
  82 #include <netinet/in.h> 
  83 #include <netinet/in_pcb.h> 
  84 #include <kern/zalloc.h> 
  85 #include <machine/limits.h> 
  88 int                     so_cache_timeouts 
= 0; 
  89 int                     so_cache_max_freed 
= 0; 
  90 int                     cached_sock_count 
= 0; 
  91 struct socket           
*socket_cache_head 
= 0; 
  92 struct socket           
*socket_cache_tail 
= 0; 
  93 u_long                  so_cache_time 
= 0; 
  94 int                     so_cache_init_done 
= 0; 
  95 struct zone             
*so_cache_zone
; 
  96 extern int              get_inpcb_str_size(); 
  97 extern int              get_tcp_str_size(); 
  99 #include <machine/limits.h> 
 101 int socket_debug 
= 0; 
 102 int socket_zone 
= M_SOCKET
; 
 103 so_gen_t        so_gencnt
;      /* generation count for sockets */ 
 105 MALLOC_DEFINE(M_SONAME
, "soname", "socket name"); 
 106 MALLOC_DEFINE(M_PCB
, "pcb", "protocol control block"); 
 108 #define DBG_LAYER_IN_BEG        NETDBG_CODE(DBG_NETSOCK, 0) 
 109 #define DBG_LAYER_IN_END        NETDBG_CODE(DBG_NETSOCK, 2) 
 110 #define DBG_LAYER_OUT_BEG       NETDBG_CODE(DBG_NETSOCK, 1) 
 111 #define DBG_LAYER_OUT_END       NETDBG_CODE(DBG_NETSOCK, 3) 
 112 #define DBG_FNC_SOSEND          NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1) 
 113 #define DBG_FNC_SORECEIVE       NETDBG_CODE(DBG_NETSOCK, (8 << 8)) 
 114 #define DBG_FNC_SOSHUTDOWN      NETDBG_CODE(DBG_NETSOCK, (9 << 8)) 
 117 SYSCTL_DECL(_kern_ipc
); 
 119 static int somaxconn 
= SOMAXCONN
; 
 120 SYSCTL_INT(_kern_ipc
, KIPC_SOMAXCONN
, somaxconn
, CTLFLAG_RW
, &somaxconn
, 
 123 /* Should we get a maximum also ??? */ 
 124 static int sosendmaxchain 
= 65536; 
 125 static int sosendminchain 
= 16384; 
 126 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sosendminchain
, CTLFLAG_RW
, &sosendminchain
, 
 129 void  so_cache_timer(); 
 130 struct mbuf 
*m_getpackets(int, int, int); 
 134  * Socket operation routines. 
 135  * These routines are called by the routines in 
 136  * sys_socket.c or from a system process, and 
 137  * implement the semantics of socket operations by 
 138  * switching out to the protocol specific routines. 
 146     so_cache_init_done 
= 1; 
 148     timeout(so_cache_timer
, NULL
, (SO_CACHE_FLUSH_INTERVAL 
* hz
)); 
 149     str_size 
= (vm_size_t
)( sizeof(struct socket
) + 4 + 
 150                             get_inpcb_str_size()  + 4 + 
 152     so_cache_zone 
= zinit (str_size
, 120000*str_size
, 8192, "socache zone"); 
 154     kprintf("cached_sock_alloc -- so_cache_zone size is %x\n", str_size
); 
 159 void   cached_sock_alloc(so
, waitok
) 
 166     register u_long  offset
; 
 170     if (cached_sock_count
) { 
 172             *so 
= socket_cache_head
; 
 174                     panic("cached_sock_alloc: cached sock is null"); 
 176             socket_cache_head 
= socket_cache_head
->cache_next
; 
 177             if (socket_cache_head
) 
 178                     socket_cache_head
->cache_prev 
= 0; 
 180                     socket_cache_tail 
= 0; 
 183             temp 
= (*so
)->so_saved_pcb
; 
 184             bzero((caddr_t
)*so
, sizeof(struct socket
)); 
 186             kprintf("cached_sock_alloc - retreiving cached sock %x - count == %d\n", *so
, 
 189             (*so
)->so_saved_pcb 
= temp
; 
 193             kprintf("Allocating cached sock %x from memory\n", *so
); 
 198                  *so 
= (struct socket 
*) zalloc(so_cache_zone
); 
 200                  *so 
= (struct socket 
*) zalloc_noblock(so_cache_zone
); 
 205             bzero((caddr_t
)*so
, sizeof(struct socket
)); 
 208              * Define offsets for extra structures into our single block of 
 209              * memory. Align extra structures on longword boundaries. 
 213             offset 
= (u_long
) *so
; 
 214             offset 
+= sizeof(struct socket
); 
 217                 offset 
&= 0xfffffffc; 
 219             (*so
)->so_saved_pcb 
= (caddr_t
) offset
; 
 220             offset 
+= get_inpcb_str_size(); 
 223                 offset 
&= 0xfffffffc; 
 226             ((struct inpcb 
*) (*so
)->so_saved_pcb
)->inp_saved_ppcb 
= (caddr_t
) offset
; 
 228             kprintf("Allocating cached socket - %x, pcb=%x tcpcb=%x\n", *so
, 
 230                     ((struct inpcb 
*)(*so
)->so_saved_pcb
)->inp_saved_ppcb
); 
 234     (*so
)->cached_in_sock_layer 
= 1; 
 238 void cached_sock_free(so
)  
 245         if (++cached_sock_count 
> MAX_CACHED_SOCKETS
) { 
 249                 kprintf("Freeing overflowed cached socket %x\n", so
); 
 251                 zfree(so_cache_zone
, (vm_offset_t
) so
); 
 255                 kprintf("Freeing socket %x into cache\n", so
); 
 257                 if (so_cache_hw 
< cached_sock_count
) 
 258                         so_cache_hw 
= cached_sock_count
; 
 260                 so
->cache_next 
= socket_cache_head
; 
 262                 if (socket_cache_head
) 
 263                         socket_cache_head
->cache_prev 
= so
; 
 265                         socket_cache_tail 
= so
; 
 267                 so
->cache_timestamp 
= so_cache_time
; 
 268                 socket_cache_head 
= so
; 
 273         kprintf("Freed cached sock %x into cache - count is %d\n", so
, cached_sock_count
); 
 280 void so_cache_timer() 
 282         register struct socket  
*p
; 
 284         register int            n_freed 
= 0; 
 285         boolean_t       funnel_state
; 
 287         funnel_state 
= thread_funnel_set(network_flock
, TRUE
); 
 293         while (p 
= socket_cache_tail
) 
 295                 if ((so_cache_time 
- p
->cache_timestamp
) < SO_CACHE_TIME_LIMIT
) 
 300                 if (socket_cache_tail 
= p
->cache_prev
) 
 301                         p
->cache_prev
->cache_next 
= 0; 
 302                 if (--cached_sock_count 
== 0) 
 303                         socket_cache_head 
= 0; 
 307                 zfree(so_cache_zone
, (vm_offset_t
) p
); 
 310                 if (++n_freed 
>= SO_CACHE_MAX_FREE_BATCH
) 
 312                         so_cache_max_freed
++; 
 318         timeout(so_cache_timer
, NULL
, (SO_CACHE_FLUSH_INTERVAL 
* hz
)); 
 320         (void) thread_funnel_set(network_flock
, FALSE
); 
 323 #endif /* __APPLE__ */ 
 326  * Get a socket structure from our zone, and initialize it. 
 327  * We don't implement `waitok' yet (see comments in uipc_domain.c). 
 328  * Note that it would probably be better to allocate socket 
 329  * and PCB at the same time, but I'm not convinced that all 
 330  * the protocols can be easily modified to do this. 
 333 soalloc(waitok
, dom
, type
) 
 340         if ((dom 
== PF_INET
) && (type 
== SOCK_STREAM
))  
 341             cached_sock_alloc(&so
, waitok
); 
 344              so 
= _MALLOC_ZONE(sizeof(*so
), socket_zone
, M_WAITOK
); 
 346                   bzero(so
, sizeof *so
); 
 348         /* XXX race condition for reentrant kernel */ 
 351              so
->so_gencnt 
= ++so_gencnt
; 
 352              so
->so_zone 
= socket_zone
; 
 359 socreate(dom
, aso
, type
, proto
) 
 365         struct proc 
*p 
= current_proc(); 
 366         register struct protosw 
*prp
; 
 367         register struct socket 
*so
; 
 368         register int error 
= 0; 
 371                 prp 
= pffindproto(dom
, proto
, type
); 
 373                 prp 
= pffindtype(dom
, type
); 
 375         if (prp 
== 0 || prp
->pr_usrreqs
->pru_attach 
== 0) 
 376                 return (EPROTONOSUPPORT
); 
 379         if (p
->p_prison 
&& jail_socket_unixiproute_only 
&& 
 380             prp
->pr_domain
->dom_family 
!= PF_LOCAL 
&& 
 381             prp
->pr_domain
->dom_family 
!= PF_INET 
&& 
 382             prp
->pr_domain
->dom_family 
!= PF_ROUTE
) { 
 383                 return (EPROTONOSUPPORT
); 
 387         if (prp
->pr_type 
!= type
) 
 389         so 
= soalloc(p 
!= 0, dom
, type
); 
 393         TAILQ_INIT(&so
->so_incomp
); 
 394         TAILQ_INIT(&so
->so_comp
); 
 399                 if (p
->p_ucred
->cr_uid 
== 0) 
 400                         so
->so_state 
= SS_PRIV
; 
 402                 so
->so_uid 
= p
->p_ucred
->cr_uid
; 
 405         so
->so_cred 
= p
->p_ucred
; 
 410         so
->so_rcv
.sb_flags 
|= SB_RECV
; /* XXX */ 
 411         if (prp
->pr_sfilter
.tqh_first
) 
 412                 error 
= sfilter_init(so
); 
 415                 error 
= (*prp
->pr_usrreqs
->pru_attach
)(so
, proto
, p
); 
 417                 so
->so_state 
|= SS_NOFDREF
; 
 422         prp
->pr_domain
->dom_refs
++; 
 423         so
->so_rcv
.sb_so 
= so
->so_snd
.sb_so 
= so
; 
 424         TAILQ_INIT(&so
->so_evlist
); 
 433         struct sockaddr 
*nam
; 
 436         struct proc 
*p 
= current_proc(); 
 441         error 
= (*so
->so_proto
->pr_usrreqs
->pru_bind
)(so
, nam
, p
); 
 445                         if (kp
->e_soif 
&& kp
->e_soif
->sf_sobind
) { 
 446                                 error 
= (*kp
->e_soif
->sf_sobind
)(so
, nam
, kp
); 
 448                                         if (error 
== EJUSTRETURN
) { 
 467         so
->so_gencnt 
= ++so_gencnt
; 
 470         if (so
->so_rcv
.sb_hiwat
) 
 471                 (void)chgsbsize(so
->so_cred
->cr_uidinfo
, 
 472                     &so
->so_rcv
.sb_hiwat
, 0, RLIM_INFINITY
); 
 473         if (so
->so_snd
.sb_hiwat
) 
 474                 (void)chgsbsize(so
->so_cred
->cr_uidinfo
, 
 475                     &so
->so_snd
.sb_hiwat
, 0, RLIM_INFINITY
); 
 477         if (so
->so_accf 
!= NULL
) { 
 478                 if (so
->so_accf
->so_accept_filter 
!= NULL 
&&  
 479                         so
->so_accf
->so_accept_filter
->accf_destroy 
!= NULL
) { 
 480                         so
->so_accf
->so_accept_filter
->accf_destroy(so
); 
 482                 if (so
->so_accf
->so_accept_filter_str 
!= NULL
) 
 483                         FREE(so
->so_accf
->so_accept_filter_str
, M_ACCF
); 
 484                 FREE(so
->so_accf
, M_ACCF
); 
 488         zfreei(so
->so_zone
, so
); 
 490         if (so
->cached_in_sock_layer 
== 1)  
 491              cached_sock_free(so
); 
 493              _FREE_ZONE(so
, sizeof(*so
), so
->so_zone
); 
 494 #endif /* __APPLE__ */ 
 498 solisten(so
, backlog
) 
 499         register struct socket 
*so
; 
 504         struct proc 
*p 
= current_proc(); 
 508         error 
= (*so
->so_proto
->pr_usrreqs
->pru_listen
)(so
, p
); 
 513         if (TAILQ_EMPTY(&so
->so_comp
)) 
 514                 so
->so_options 
|= SO_ACCEPTCONN
; 
 515         if (backlog 
< 0 || backlog 
> somaxconn
) 
 517         so
->so_qlimit 
= backlog
; 
 520                 if (kp
->e_soif 
&& kp
->e_soif
->sf_solisten
) { 
 521                         error 
= (*kp
->e_soif
->sf_solisten
)(so
, kp
); 
 523                                 if (error 
== EJUSTRETURN
) { 
 541         register struct socket 
*so
; 
 545         struct socket 
*head 
= so
->so_head
; 
 549                 if (kp
->e_soif 
&& kp
->e_soif
->sf_sofree
) { 
 550                         error 
= (*kp
->e_soif
->sf_sofree
)(so
, kp
); 
 552                                 selthreadclear(&so
->so_snd
.sb_sel
); 
 553                                 selthreadclear(&so
->so_rcv
.sb_sel
); 
 554                                 return; /* void fn */ 
 560         if (so
->so_pcb 
|| (so
->so_state 
& SS_NOFDREF
) == 0) { 
 562                 selthreadclear(&so
->so_snd
.sb_sel
); 
 563                 selthreadclear(&so
->so_rcv
.sb_sel
); 
 568                 if (so
->so_state 
& SS_INCOMP
) { 
 569                         TAILQ_REMOVE(&head
->so_incomp
, so
, so_list
); 
 571                 } else if (so
->so_state 
& SS_COMP
) { 
 573                          * We must not decommission a socket that's 
 574                          * on the accept(2) queue.  If we do, then 
 575                          * accept(2) may hang after select(2) indicated 
 576                          * that the listening socket was ready. 
 579                         selthreadclear(&so
->so_snd
.sb_sel
); 
 580                         selthreadclear(&so
->so_rcv
.sb_sel
); 
 584                         panic("sofree: not queued"); 
 587                 so
->so_state 
&= ~SS_INCOMP
; 
 591         selthreadclear(&so
->so_snd
.sb_sel
); 
 592         sbrelease(&so
->so_snd
); 
 600  * Close a socket on last file table reference removal. 
 601  * Initiate disconnect if connected. 
 602  * Free socket when disconnect complete. 
 606         register struct socket 
*so
; 
 608         int s 
= splnet();               /* conservative */ 
 613         funsetown(so
->so_sigio
); 
 617                 if (kp
->e_soif 
&& kp
->e_soif
->sf_soclose
) { 
 618                         error 
= (*kp
->e_soif
->sf_soclose
)(so
, kp
); 
 621                                 return((error 
== EJUSTRETURN
) ? 0 : error
); 
 627         if (so
->so_options 
& SO_ACCEPTCONN
) { 
 628                 struct socket 
*sp
, *sonext
; 
 630                 sp 
= TAILQ_FIRST(&so
->so_incomp
); 
 631                 for (; sp 
!= NULL
; sp 
= sonext
) { 
 632                         sonext 
= TAILQ_NEXT(sp
, so_list
); 
 635                 for (sp 
= TAILQ_FIRST(&so
->so_comp
); sp 
!= NULL
; sp 
= sonext
) { 
 636                         sonext 
= TAILQ_NEXT(sp
, so_list
); 
 637                         /* Dequeue from so_comp since sofree() won't do it */ 
 638                         TAILQ_REMOVE(&so
->so_comp
, sp
, so_list
); 
 640                         sp
->so_state 
&= ~SS_COMP
; 
 648         if (so
->so_state 
& SS_ISCONNECTED
) { 
 649                 if ((so
->so_state 
& SS_ISDISCONNECTING
) == 0) { 
 650                         error 
= sodisconnect(so
); 
 654                 if (so
->so_options 
& SO_LINGER
) { 
 655                         if ((so
->so_state 
& SS_ISDISCONNECTING
) && 
 656                             (so
->so_state 
& SS_NBIO
)) 
 658                         while (so
->so_state 
& SS_ISCONNECTED
) { 
 659                                 error 
= tsleep((caddr_t
)&so
->so_timeo
, 
 660                                     PSOCK 
| PCATCH
, "soclos", so
->so_linger
); 
 668                 int error2 
= (*so
->so_proto
->pr_usrreqs
->pru_detach
)(so
); 
 673         if (so
->so_pcb 
&& so
->so_state 
& SS_NOFDREF
) 
 674                 panic("soclose: NOFDREF"); 
 675         so
->so_state 
|= SS_NOFDREF
; 
 677         so
->so_proto
->pr_domain
->dom_refs
--; 
 686  * Must be called at splnet... 
 694         error 
= (*so
->so_proto
->pr_usrreqs
->pru_abort
)(so
); 
 704         register struct socket 
*so
; 
 705         struct sockaddr 
**nam
; 
 711         if ((so
->so_state 
& SS_NOFDREF
) == 0) 
 712                 panic("soaccept: !NOFDREF"); 
 713         so
->so_state 
&= ~SS_NOFDREF
; 
 714         error 
= (*so
->so_proto
->pr_usrreqs
->pru_accept
)(so
, nam
); 
 718                         if (kp
->e_soif 
&& kp
->e_soif
->sf_soaccept
) { 
 719                                 error 
= (*kp
->e_soif
->sf_soaccept
)(so
, nam
, kp
); 
 721                                         if (error 
== EJUSTRETURN
) { 
 740         register struct socket 
*so
; 
 741         struct sockaddr 
*nam
; 
 746         struct proc 
*p 
= current_proc(); 
 749         if (so
->so_options 
& SO_ACCEPTCONN
) 
 753          * If protocol is connection-based, can only connect once. 
 754          * Otherwise, if connected, try to disconnect first. 
 755          * This allows user to disconnect by connecting to, e.g., 
 758         if (so
->so_state 
& (SS_ISCONNECTED
|SS_ISCONNECTING
) && 
 759             ((so
->so_proto
->pr_flags 
& PR_CONNREQUIRED
) || 
 760             (error 
= sodisconnect(so
)))) 
 764                  * Run connect filter before calling protocol: 
 765                  *  - non-blocking connect returns before completion; 
 766                  *  - allows filters to modify address. 
 770                         if (kp
->e_soif 
&& kp
->e_soif
->sf_soconnect
) { 
 771                                 error 
= (*kp
->e_soif
->sf_soconnect
)(so
, nam
, kp
); 
 773                                         if (error 
== EJUSTRETURN
) { 
 782                 error 
= (*so
->so_proto
->pr_usrreqs
->pru_connect
)(so
, nam
, p
); 
 790         register struct socket 
*so1
; 
 797         error 
= (*so1
->so_proto
->pr_usrreqs
->pru_connect2
)(so1
, so2
); 
 799                 kp 
= sotokextcb(so1
); 
 801                         if (kp
->e_soif 
&& kp
->e_soif
->sf_soconnect2
) { 
 802                                 error 
= (*kp
->e_soif
->sf_soconnect2
)(so1
, so2
, kp
); 
 804                                         if (error 
== EJUSTRETURN
) { 
 821         register struct socket 
*so
; 
 827         if ((so
->so_state 
& SS_ISCONNECTED
) == 0) { 
 831         if (so
->so_state 
& SS_ISDISCONNECTING
) { 
 835         error 
= (*so
->so_proto
->pr_usrreqs
->pru_disconnect
)(so
); 
 839                         if (kp
->e_soif 
&& kp
->e_soif
->sf_sodisconnect
) { 
 840                                 error 
= (*kp
->e_soif
->sf_sodisconnect
)(so
, kp
); 
 842                                         if (error 
== EJUSTRETURN
) { 
 859 #define SBLOCKWAIT(f)   (((f) & MSG_DONTWAIT) ? M_DONTWAIT : M_WAIT) 
 862  * If send must go all at once and message is larger than 
 863  * send buffering, then hard error. 
 864  * Lock against other senders. 
 865  * If must go all at once and not enough room now, then 
 866  * inform user that this would block and do nothing. 
 867  * Otherwise, if nonblocking, send as much as possible. 
 868  * The data to be sent is described by "uio" if nonzero, 
 869  * otherwise by the mbuf chain "top" (which must be null 
 870  * if uio is not).  Data provided in mbuf chain must be small 
 871  * enough to send all at once. 
 873  * Returns nonzero on error, timeout or signal; callers 
 874  * must check for short counts if EINTR/ERESTART are returned. 
 875  * Data and control buffers are freed on return. 
 877  * MSG_HOLD: go thru most of sosend(), but just enqueue the mbuf 
 878  * MSG_SEND: go thru as for MSG_HOLD on current fragment, then 
 879  *  point at the mbuf chain being constructed and go from there. 
 882 sosend(so
, addr
, uio
, top
, control
, flags
) 
 883         register struct socket 
*so
; 
 884         struct sockaddr 
*addr
; 
 887         struct mbuf 
*control
; 
 892         register struct mbuf 
*m
, *freelist 
= NULL
; 
 893         register long space
, len
, resid
; 
 894         int clen 
= 0, error
, s
, dontroute
, mlen
, sendflags
; 
 895         int atomic 
= sosendallatonce(so
) || top
; 
 896         struct proc 
*p 
= current_proc(); 
 900                 resid 
= uio
->uio_resid
; 
 902                 resid 
= top
->m_pkthdr
.len
; 
 904         KERNEL_DEBUG((DBG_FNC_SOSEND 
| DBG_FUNC_START
), 
 909                      so
->so_snd
.sb_hiwat
); 
 912          * In theory resid should be unsigned. 
 913          * However, space must be signed, as it might be less than 0 
 914          * if we over-committed, and we must use a signed comparison 
 915          * of space and resid.  On the other hand, a negative resid 
 916          * causes us to loop sending 0-length segments to the protocol. 
 918          * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM 
 919          * type sockets since that's an error. 
 921         if (resid 
< 0 || so
->so_type 
== SOCK_STREAM 
&& (flags 
& MSG_EOR
)) { 
 927             (flags 
& MSG_DONTROUTE
) && (so
->so_options 
& SO_DONTROUTE
) == 0 && 
 928             (so
->so_proto
->pr_flags 
& PR_ATOMIC
); 
 930                 p
->p_stats
->p_ru
.ru_msgsnd
++; 
 932                 clen 
= control
->m_len
; 
 933 #define snderr(errno)   { error = errno; splx(s); goto release; } 
 936         error 
= sblock(&so
->so_snd
, SBLOCKWAIT(flags
)); 
 941                 if (so
->so_state 
& SS_CANTSENDMORE
) 
 944                         error 
= so
->so_error
; 
 949                 if ((so
->so_state 
& SS_ISCONNECTED
) == 0) { 
 951                          * `sendto' and `sendmsg' is allowed on a connection- 
 952                          * based socket if it supports implied connect. 
 953                          * Return ENOTCONN if not connected and no address is 
 956                         if ((so
->so_proto
->pr_flags 
& PR_CONNREQUIRED
) && 
 957                             (so
->so_proto
->pr_flags 
& PR_IMPLOPCL
) == 0) { 
 958                                 if ((so
->so_state 
& SS_ISCONFIRMING
) == 0 && 
 959                                     !(resid 
== 0 && clen 
!= 0)) 
 961                         } else if (addr 
== 0 && !(flags
&MSG_HOLD
)) 
 962                             snderr(so
->so_proto
->pr_flags 
& PR_CONNREQUIRED 
? 
 963                                    ENOTCONN 
: EDESTADDRREQ
); 
 965                 space 
= sbspace(&so
->so_snd
); 
 968                 if ((atomic 
&& resid 
> so
->so_snd
.sb_hiwat
) || 
 969                     clen 
> so
->so_snd
.sb_hiwat
) 
 971                 if (space 
< resid 
+ clen 
&& uio 
&& 
 972                     (atomic 
|| space 
< so
->so_snd
.sb_lowat 
|| space 
< clen
)) { 
 973                         if (so
->so_state 
& SS_NBIO
) 
 975                         sbunlock(&so
->so_snd
); 
 976                         error 
= sbwait(&so
->so_snd
); 
 989                          * Data is prepackaged in "top". 
 993                                 top
->m_flags 
|= M_EOR
; 
 995                         boolean_t       dropped_funnel 
= FALSE
; 
 999                         bytes_to_copy 
= min(resid
, space
); 
1001                         if (sosendminchain 
> 0) { 
1002                             if (bytes_to_copy 
>= sosendminchain
) { 
1003                                 dropped_funnel 
= TRUE
; 
1004                                 (void)thread_funnel_set(network_flock
, FALSE
); 
1008                             chainlength 
= sosendmaxchain
; 
1012                         if (bytes_to_copy 
>= MINCLSIZE
) { 
1014                            * try to maintain a local cache of mbuf clusters needed to complete this write 
1015                            * the list is further limited to the number that are currently needed to fill the socket 
1016                            * this mechanism allows a large number of mbufs/clusters to be grabbed under a single  
1017                            * mbuf lock... if we can't get any clusters, than fall back to trying for mbufs 
1018                            * if we fail early (or miscalcluate the number needed) make sure to release any clusters 
1019                            * we haven't yet consumed. 
1021                           if ((m 
= freelist
) == NULL
) { 
1023                                 int hdrs_needed 
= 0; 
1027                                 num_needed 
= bytes_to_copy 
/ MCLBYTES
; 
1029                                 if ((bytes_to_copy 
- (num_needed 
* MCLBYTES
)) >= MINCLSIZE
) 
1032                                 if ((freelist 
= m_getpackets(num_needed
, hdrs_needed
, M_WAIT
)) == NULL
) 
1033                                     goto getpackets_failed
; 
1036                             freelist 
= m
->m_next
; 
1040                             len 
= min(mlen
, bytes_to_copy
); 
1044                                 MGETHDR(m
, M_WAIT
, MT_DATA
); 
1046                                 m
->m_pkthdr
.len 
= 0; 
1047                                 m
->m_pkthdr
.rcvif 
= (struct ifnet 
*)0; 
1049                                 MGET(m
, M_WAIT
, MT_DATA
); 
1052                             len 
= min(mlen
, bytes_to_copy
); 
1054                              * For datagram protocols, leave room 
1055                              * for protocol headers in first mbuf. 
1057                             if (atomic 
&& top 
== 0 && len 
< mlen
) 
1064                         error 
= uiomove(mtod(m
, caddr_t
), (int)len
, uio
); 
1066                         resid 
= uio
->uio_resid
; 
1070                         top
->m_pkthdr
.len 
+= len
; 
1075                                 if (flags 
& MSG_EOR
) 
1076                                         top
->m_flags 
|= M_EOR
; 
1079                         bytes_to_copy 
= min(resid
, space
); 
1081                     } while (space 
> 0 && (chainlength 
< sosendmaxchain 
|| atomic 
|| resid 
< MINCLSIZE
)); 
1083                     if (dropped_funnel 
== TRUE
) 
1084                         (void)thread_funnel_set(network_flock
, TRUE
); 
1089                     if (flags 
& (MSG_HOLD
|MSG_SEND
)) 
1090                     {   /* Enqueue for later, go away if HOLD */ 
1091                         register struct mbuf 
*mb1
; 
1092                         if (so
->so_temp 
&& (flags 
& MSG_FLUSH
)) 
1093                         {       m_freem(so
->so_temp
); 
1097                                 so
->so_tail
->m_next 
= top
; 
1111                             so
->so_options 
|= SO_DONTROUTE
; 
1112                     s 
= splnet();                               /* XXX */ 
1113                     /* Compute flags here, for pru_send and NKEs */ 
1114                     sendflags 
= (flags 
& MSG_OOB
) ? PRUS_OOB 
: 
1116                          * If the user set MSG_EOF, the protocol 
1117                          * understands this flag and nothing left to 
1118                          * send then use PRU_SEND_EOF instead of PRU_SEND. 
1120                         ((flags 
& MSG_EOF
) && 
1121                          (so
->so_proto
->pr_flags 
& PR_IMPLOPCL
) && 
1124                         /* If there is more to send set PRUS_MORETOCOME */ 
1125                         (resid 
> 0 && space 
> 0) ? PRUS_MORETOCOME 
: 0; 
1126                     kp 
= sotokextcb(so
); 
1128                     {   if (kp
->e_soif 
&& kp
->e_soif
->sf_sosend
) { 
1129                                         error 
= (*kp
->e_soif
->sf_sosend
)(so
, &addr
, 
1136                                         if (error 
== EJUSTRETURN
) { 
1137                                                 sbunlock(&so
->so_snd
); 
1140                                                         m_freem_list(freelist
);      
1149                     error 
= (*so
->so_proto
->pr_usrreqs
->pru_send
)(so
, 
1150                         sendflags
, top
, addr
, control
, p
); 
1153                     if (flags 
& MSG_SEND
) 
1157                             so
->so_options 
&= ~SO_DONTROUTE
; 
1164                 } while (resid 
&& space 
> 0); 
1168         sbunlock(&so
->so_snd
); 
1175                 m_freem_list(freelist
);      
1177         KERNEL_DEBUG(DBG_FNC_SOSEND 
| DBG_FUNC_END
, 
1188  * Implement receive operations on a socket. 
1189  * We depend on the way that records are added to the sockbuf 
1190  * by sbappend*.  In particular, each record (mbufs linked through m_next) 
1191  * must begin with an address if the protocol so specifies, 
1192  * followed by an optional mbuf or mbufs containing ancillary data, 
1193  * and then zero or more mbufs of data. 
1194  * In order to avoid blocking network interrupts for the entire time here, 
1195  * we splx() while doing the actual copy to user space. 
1196  * Although the sockbuf is locked, new data may still be appended, 
1197  * and thus we must maintain consistency of the sockbuf during that time. 
1199  * The caller may receive the data as a single mbuf chain by supplying 
1200  * an mbuf **mp0 for use in returning the chain.  The uio is then used 
1201  * only for the count in uio_resid. 
1204 soreceive(so
, psa
, uio
, mp0
, controlp
, flagsp
) 
1205         register struct socket 
*so
; 
1206         struct sockaddr 
**psa
; 
1209         struct mbuf 
**controlp
; 
1212         register struct mbuf 
*m
, **mp
; 
1213         register struct mbuf 
*free_list
, *ml
; 
1214         register int flags
, len
, error
, s
, offset
; 
1215         struct protosw 
*pr 
= so
->so_proto
; 
1216         struct mbuf 
*nextrecord
; 
1218         int orig_resid 
= uio
->uio_resid
; 
1221         KERNEL_DEBUG(DBG_FNC_SORECEIVE 
| DBG_FUNC_START
, 
1225                      so
->so_rcv
.sb_lowat
, 
1226                      so
->so_rcv
.sb_hiwat
); 
1228         kp 
= sotokextcb(so
); 
1230                 if (kp
->e_soif 
&& kp
->e_soif
->sf_soreceive
) { 
1231                         error 
= (*kp
->e_soif
->sf_soreceive
)(so
, psa
, &uio
, 
1235                                 return((error 
== EJUSTRETURN
) ? 0 : error
); 
1246                 flags 
= *flagsp 
&~ MSG_EOR
; 
1250          * When SO_WANTOOBFLAG is set we try to get out-of-band data  
1251          * regardless of the flags argument. Here is the case were  
1252          * out-of-band data is not inline. 
1254         if ((flags 
& MSG_OOB
) ||  
1255             ((so
->so_options 
& SO_WANTOOBFLAG
) != 0 &&  
1256              (so
->so_options 
& SO_OOBINLINE
) == 0 && 
1257              (so
->so_oobmark 
|| (so
->so_state 
& SS_RCVATMARK
)))) { 
1258                 m 
= m_get(M_WAIT
, MT_DATA
); 
1261                 error 
= (*pr
->pr_usrreqs
->pru_rcvoob
)(so
, m
, flags 
& MSG_PEEK
); 
1265                         error 
= uiomove(mtod(m
, caddr_t
), 
1266                             (int) min(uio
->uio_resid
, m
->m_len
), uio
); 
1268                 } while (uio
->uio_resid 
&& error 
== 0 && m
); 
1273                 if ((so
->so_options 
& SO_WANTOOBFLAG
) != 0) { 
1274                         if (error 
== EWOULDBLOCK 
|| error 
== EINVAL
) { 
1276                                  * Let's try to get normal data: 
1277                                  *  EWOULDBLOCK: out-of-band data not receive yet; 
1278                                  *  EINVAL: out-of-band data already read. 
1282                         } else if (error 
== 0 && flagsp
) 
1285                 KERNEL_DEBUG(DBG_FNC_SORECEIVE 
| DBG_FUNC_END
, error
,0,0,0,0); 
1291                 *mp 
= (struct mbuf 
*)0; 
1292         if (so
->so_state 
& SS_ISCONFIRMING 
&& uio
->uio_resid
) 
1293                 (*pr
->pr_usrreqs
->pru_rcvd
)(so
, 0); 
1296         error 
= sblock(&so
->so_rcv
, SBLOCKWAIT(flags
)); 
1298                 KERNEL_DEBUG(DBG_FNC_SORECEIVE 
| DBG_FUNC_END
, error
,0,0,0,0); 
1303         m 
= so
->so_rcv
.sb_mb
; 
1305          * If we have less data than requested, block awaiting more 
1306          * (subject to any timeout) if: 
1307          *   1. the current count is less than the low water mark, or 
1308          *   2. MSG_WAITALL is set, and it is possible to do the entire 
1309          *      receive operation at once if we block (resid <= hiwat). 
1310          *   3. MSG_DONTWAIT is not set 
1311          * If MSG_WAITALL is set but resid is larger than the receive buffer, 
1312          * we have to do the receive in sections, and thus risk returning 
1313          * a short count if a timeout or signal occurs after we start. 
1315         if (m 
== 0 || (((flags 
& MSG_DONTWAIT
) == 0 && 
1316             so
->so_rcv
.sb_cc 
< uio
->uio_resid
) && 
1317             (so
->so_rcv
.sb_cc 
< so
->so_rcv
.sb_lowat 
|| 
1318             ((flags 
& MSG_WAITALL
) && uio
->uio_resid 
<= so
->so_rcv
.sb_hiwat
)) && 
1319             m
->m_nextpkt 
== 0 && (pr
->pr_flags 
& PR_ATOMIC
) == 0)) { 
1320                 KASSERT(m 
!= 0 || !so
->so_rcv
.sb_cc
, ("receive 1")); 
1324                         error 
= so
->so_error
; 
1325                         if ((flags 
& MSG_PEEK
) == 0) 
1329                 if (so
->so_state 
& SS_CANTRCVMORE
) { 
1335                 for (; m
; m 
= m
->m_next
) 
1336                         if (m
->m_type 
== MT_OOBDATA  
|| (m
->m_flags 
& M_EOR
)) { 
1337                                 m 
= so
->so_rcv
.sb_mb
; 
1340                 if ((so
->so_state 
& (SS_ISCONNECTED
|SS_ISCONNECTING
)) == 0 && 
1341                     (so
->so_proto
->pr_flags 
& PR_CONNREQUIRED
)) { 
1345                 if (uio
->uio_resid 
== 0) 
1347                 if ((so
->so_state 
& SS_NBIO
) || (flags 
& MSG_DONTWAIT
)) { 
1348                         error 
= EWOULDBLOCK
; 
1351                 sbunlock(&so
->so_rcv
); 
1353                     printf("Waiting for socket data\n"); 
1354                 error 
= sbwait(&so
->so_rcv
); 
1356                     printf("SORECEIVE - sbwait returned %d\n", error
); 
1359                     KERNEL_DEBUG(DBG_FNC_SORECEIVE 
| DBG_FUNC_END
, error
,0,0,0,0); 
1367                 uio
->uio_procp
->p_stats
->p_ru
.ru_msgrcv
++; 
1369         nextrecord 
= m
->m_nextpkt
; 
1370         if ((pr
->pr_flags 
& PR_ADDR
) && m
->m_type 
== MT_SONAME
) { 
1371                 KASSERT(m
->m_type 
== MT_SONAME
, ("receive 1a")); 
1374                         *psa 
= dup_sockaddr(mtod(m
, struct sockaddr 
*), 
1376                 if (flags 
& MSG_PEEK
) { 
1379                         sbfree(&so
->so_rcv
, m
); 
1380                         MFREE(m
, so
->so_rcv
.sb_mb
); 
1381                         m 
= so
->so_rcv
.sb_mb
; 
1384         while (m 
&& m
->m_type 
== MT_CONTROL 
&& error 
== 0) { 
1385                 if (flags 
& MSG_PEEK
) { 
1387                                 *controlp 
= m_copy(m
, 0, m
->m_len
); 
1390                         sbfree(&so
->so_rcv
, m
); 
1392                                 if (pr
->pr_domain
->dom_externalize 
&& 
1393                                     mtod(m
, struct cmsghdr 
*)->cmsg_type 
== 
1395                                    error 
= (*pr
->pr_domain
->dom_externalize
)(m
); 
1397                                 so
->so_rcv
.sb_mb 
= m
->m_next
; 
1399                                 m 
= so
->so_rcv
.sb_mb
; 
1401                                 MFREE(m
, so
->so_rcv
.sb_mb
); 
1402                                 m 
= so
->so_rcv
.sb_mb
; 
1407                         controlp 
= &(*controlp
)->m_next
; 
1411                 if ((flags 
& MSG_PEEK
) == 0) 
1412                         m
->m_nextpkt 
= nextrecord
; 
1414                 if (type 
== MT_OOBDATA
) 
1421         ml 
= (struct mbuf 
*)0; 
1423         while (m 
&& uio
->uio_resid 
> 0 && error 
== 0) { 
1424                 if (m
->m_type 
== MT_OOBDATA
) { 
1425                         if (type 
!= MT_OOBDATA
) 
1427                 } else if (type 
== MT_OOBDATA
) 
1431  * This assertion needs rework.  The trouble is Appletalk is uses many 
1432  * mbuf types (NOT listed in mbuf.h!) which will trigger this panic. 
1433  * For now just remove the assertion...  CSM 9/98 
1436                     KASSERT(m
->m_type 
== MT_DATA 
|| m
->m_type 
== MT_HEADER
, 
1440                  * Make sure to allways set MSG_OOB event when getting  
1441                  * out of band data inline. 
1443                 if ((so
->so_options 
& SO_WANTOOBFLAG
) != 0 && 
1444                         (so
->so_options 
& SO_OOBINLINE
) != 0 &&  
1445                         (so
->so_state 
& SS_RCVATMARK
) != 0) { 
1449                 so
->so_state 
&= ~SS_RCVATMARK
; 
1450                 len 
= uio
->uio_resid
; 
1451                 if (so
->so_oobmark 
&& len 
> so
->so_oobmark 
- offset
) 
1452                         len 
= so
->so_oobmark 
- offset
; 
1453                 if (len 
> m
->m_len 
- moff
) 
1454                         len 
= m
->m_len 
- moff
; 
1456                  * If mp is set, just pass back the mbufs. 
1457                  * Otherwise copy them out via the uio, then free. 
1458                  * Sockbuf must be consistent here (points to current mbuf, 
1459                  * it points to next record) when we drop priority; 
1460                  * we must note any additions to the sockbuf when we 
1461                  * block interrupts again. 
1465                         error 
= uiomove(mtod(m
, caddr_t
) + moff
, (int)len
, uio
); 
1470                         uio
->uio_resid 
-= len
; 
1471                 if (len 
== m
->m_len 
- moff
) { 
1472                         if (m
->m_flags 
& M_EOR
) 
1474                         if (flags 
& MSG_PEEK
) { 
1478                                 nextrecord 
= m
->m_nextpkt
; 
1479                                 sbfree(&so
->so_rcv
, m
); 
1483                                         so
->so_rcv
.sb_mb 
= m 
= m
->m_next
; 
1484                                         *mp 
= (struct mbuf 
*)0; 
1490                                         so
->so_rcv
.sb_mb 
= m 
= m
->m_next
; 
1494                                         m
->m_nextpkt 
= nextrecord
; 
1497                         if (flags 
& MSG_PEEK
) 
1501                                         *mp 
= m_copym(m
, 0, len
, M_WAIT
); 
1504                                 so
->so_rcv
.sb_cc 
-= len
; 
1507                 if (so
->so_oobmark
) { 
1508                         if ((flags 
& MSG_PEEK
) == 0) { 
1509                                 so
->so_oobmark 
-= len
; 
1510                                 if (so
->so_oobmark 
== 0) { 
1511                                     so
->so_state 
|= SS_RCVATMARK
; 
1512                                     postevent(so
, 0, EV_OOB
); 
1517                                 if (offset 
== so
->so_oobmark
) 
1521                 if (flags 
& MSG_EOR
) 
1524                  * If the MSG_WAITALL flag is set (for non-atomic socket), 
1525                  * we must not quit until "uio->uio_resid == 0" or an error 
1526                  * termination.  If a signal/timeout occurs, return 
1527                  * with a short count but without error. 
1528                  * Keep sockbuf locked against other readers. 
1530                 while (flags 
& MSG_WAITALL 
&& m 
== 0 && uio
->uio_resid 
> 0 && 
1531                     !sosendallatonce(so
) && !nextrecord
) { 
1532                         if (so
->so_error 
|| so
->so_state 
& SS_CANTRCVMORE
) 
1536                                 m_freem_list(free_list
); 
1538                         error 
= sbwait(&so
->so_rcv
); 
1540                                 sbunlock(&so
->so_rcv
); 
1542                                 KERNEL_DEBUG(DBG_FNC_SORECEIVE 
| DBG_FUNC_END
, 0,0,0,0,0); 
1545                         m 
= so
->so_rcv
.sb_mb
; 
1547                                 nextrecord 
= m
->m_nextpkt
; 
1550                         ml 
= (struct mbuf 
*)0; 
1554                 m_freem_list(free_list
); 
1557         if (m 
&& pr
->pr_flags 
& PR_ATOMIC
) { 
1559                 if (so
->so_options 
& SO_DONTTRUNC
) 
1560                         flags 
|= MSG_RCVMORE
; 
1564                         if ((flags 
& MSG_PEEK
) == 0) 
1565                                 (void) sbdroprecord(&so
->so_rcv
); 
1570         if ((flags 
& MSG_PEEK
) == 0) { 
1572                         so
->so_rcv
.sb_mb 
= nextrecord
; 
1573                 if (pr
->pr_flags 
& PR_WANTRCVD 
&& so
->so_pcb
) 
1574                         (*pr
->pr_usrreqs
->pru_rcvd
)(so
, flags
); 
1577         if ((so
->so_options 
& SO_WANTMORE
) && so
->so_rcv
.sb_cc 
> 0) 
1578                 flags 
|= MSG_HAVEMORE
; 
1580         if (orig_resid 
== uio
->uio_resid 
&& orig_resid 
&& 
1581             (flags 
& MSG_EOR
) == 0 && (so
->so_state 
& SS_CANTRCVMORE
) == 0) { 
1582                 sbunlock(&so
->so_rcv
); 
1590         sbunlock(&so
->so_rcv
); 
1593         KERNEL_DEBUG(DBG_FNC_SORECEIVE 
| DBG_FUNC_END
, 
1605         register struct socket 
*so
; 
1608         register struct protosw 
*pr 
= so
->so_proto
; 
1613         KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN 
| DBG_FUNC_START
, 0,0,0,0,0); 
1614         kp 
= sotokextcb(so
); 
1616                 if (kp
->e_soif 
&& kp
->e_soif
->sf_soshutdown
) { 
1617                         ret 
= (*kp
->e_soif
->sf_soshutdown
)(so
, how
, kp
); 
1619                                 return((ret 
== EJUSTRETURN
) ? 0 : ret
); 
1624         if (how 
!= SHUT_WR
) { 
1626                 postevent(so
, 0, EV_RCLOSED
); 
1628         if (how 
!= SHUT_RD
) { 
1629             ret 
= ((*pr
->pr_usrreqs
->pru_shutdown
)(so
)); 
1630             postevent(so
, 0, EV_WCLOSED
); 
1631             KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN 
| DBG_FUNC_END
, 0,0,0,0,0); 
1635         KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN 
| DBG_FUNC_END
, 0,0,0,0,0); 
1641         register struct socket 
*so
; 
1643         register struct sockbuf 
*sb 
= &so
->so_rcv
; 
1644         register struct protosw 
*pr 
= so
->so_proto
; 
1645         register int s
, error
; 
1649         kp 
= sotokextcb(so
); 
1651                 if (kp
->e_soif 
&& kp
->e_soif
->sf_sorflush
) { 
1652                         if ((*kp
->e_soif
->sf_sorflush
)(so
, kp
)) 
1658         sb
->sb_flags 
|= SB_NOINTR
; 
1659         (void) sblock(sb
, M_WAIT
); 
1664         selthreadclear(&sb
->sb_sel
); 
1667         bzero((caddr_t
)sb
, sizeof (*sb
)); 
1669         if (asb
.sb_flags 
& SB_KNOTE
) { 
1670                 sb
->sb_sel
.si_note 
= asb
.sb_sel
.si_note
; 
1671                 sb
->sb_flags 
= SB_KNOTE
; 
1675         if (pr
->pr_flags 
& PR_RIGHTS 
&& pr
->pr_domain
->dom_dispose
) 
1676                 (*pr
->pr_domain
->dom_dispose
)(asb
.sb_mb
); 
1682  * Perhaps this routine, and sooptcopyout(), below, ought to come in 
1683  * an additional variant to handle the case where the option value needs 
1684  * to be some kind of integer, but not a specific size. 
1685  * In addition to their use here, these functions are also called by the 
1686  * protocol-level pr_ctloutput() routines. 
1689 sooptcopyin(sopt
, buf
, len
, minlen
) 
1690         struct  sockopt 
*sopt
; 
1698          * If the user gives us more than we wanted, we ignore it, 
1699          * but if we don't get the minimum length the caller 
1700          * wants, we return EINVAL.  On success, sopt->sopt_valsize 
1701          * is set to however much we actually retrieved. 
1703         if ((valsize 
= sopt
->sopt_valsize
) < minlen
) 
1706                 sopt
->sopt_valsize 
= valsize 
= len
; 
1708         if (sopt
->sopt_p 
!= 0) 
1709                 return (copyin(sopt
->sopt_val
, buf
, valsize
)); 
1711         bcopy(sopt
->sopt_val
, buf
, valsize
); 
1718         struct sockopt 
*sopt
; 
1726         if (sopt
->sopt_dir 
!= SOPT_SET
) { 
1727                 sopt
->sopt_dir 
= SOPT_SET
; 
1730         kp 
= sotokextcb(so
); 
1732                 if (kp
->e_soif 
&& kp
->e_soif
->sf_socontrol
) { 
1733                         error 
= (*kp
->e_soif
->sf_socontrol
)(so
, sopt
, kp
); 
1735                                 return((error 
== EJUSTRETURN
) ? 0 : error
); 
1741         if (sopt
->sopt_level 
!= SOL_SOCKET
) { 
1742                 if (so
->so_proto 
&& so
->so_proto
->pr_ctloutput
) 
1743                         return ((*so
->so_proto
->pr_ctloutput
) 
1745                 error 
= ENOPROTOOPT
; 
1747                 switch (sopt
->sopt_name
) { 
1749                         error 
= sooptcopyin(sopt
, &l
, sizeof l
, sizeof l
); 
1753                         so
->so_linger 
= l
.l_linger
; 
1755                                 so
->so_options 
|= SO_LINGER
; 
1757                                 so
->so_options 
&= ~SO_LINGER
; 
1763                 case SO_USELOOPBACK
: 
1772                 case SO_WANTOOBFLAG
: 
1774                         error 
= sooptcopyin(sopt
, &optval
, sizeof optval
, 
1779                                 so
->so_options 
|= sopt
->sopt_name
; 
1781                                 so
->so_options 
&= ~sopt
->sopt_name
; 
1788                         error 
= sooptcopyin(sopt
, &optval
, sizeof optval
, 
1794                          * Values < 1 make no sense for any of these 
1795                          * options, so disallow them. 
1802                         switch (sopt
->sopt_name
) { 
1805                                 if (sbreserve(sopt
->sopt_name 
== SO_SNDBUF 
? 
1806                                               &so
->so_snd 
: &so
->so_rcv
, 
1807                                               (u_long
) optval
) == 0) { 
1814                          * Make sure the low-water is never greater than 
1818                                 so
->so_snd
.sb_lowat 
= 
1819                                     (optval 
> so
->so_snd
.sb_hiwat
) ? 
1820                                     so
->so_snd
.sb_hiwat 
: optval
; 
1823                                 so
->so_rcv
.sb_lowat 
= 
1824                                     (optval 
> so
->so_rcv
.sb_hiwat
) ? 
1825                                     so
->so_rcv
.sb_hiwat 
: optval
; 
1832                         error 
= sooptcopyin(sopt
, &tv
, sizeof tv
, 
1837                         /* assert(hz > 0); */ 
1838                         if (tv
.tv_sec 
< 0 || tv
.tv_sec 
> SHRT_MAX 
/ hz 
|| 
1839                             tv
.tv_usec 
< 0 || tv
.tv_usec 
>= 1000000) { 
1843                         /* assert(tick > 0); */ 
1844                         /* assert(ULONG_MAX - SHRT_MAX >= 1000000); */ 
1846                         long tmp 
= (u_long
)(tv
.tv_sec 
* hz
) + tv
.tv_usec 
/ tick
; 
1847                         if (tmp 
> SHRT_MAX
) { 
1854                         switch (sopt
->sopt_name
) { 
1856                                 so
->so_snd
.sb_timeo 
= val
; 
1859                                 so
->so_rcv
.sb_timeo 
= val
; 
1867                         struct NFDescriptor 
*nf1
, *nf2 
= NULL
; 
1869                         error 
= sooptcopyin(sopt
, &nke
, 
1870                                                                 sizeof nke
, sizeof nke
); 
1874                         error 
= nke_insert(so
, &nke
); 
1879                         error 
= sooptcopyin(sopt
, &optval
, sizeof optval
, 
1884                                 so
->so_flags 
|= SOF_NOSIGPIPE
; 
1886                                 so
->so_flags 
&= ~SOF_NOSIGPIPE
; 
1891                         error 
= ENOPROTOOPT
; 
1894                 if (error 
== 0 && so
->so_proto 
&& so
->so_proto
->pr_ctloutput
) { 
1895                         (void) ((*so
->so_proto
->pr_ctloutput
) 
1903 /* Helper routine for getsockopt */ 
1905 sooptcopyout(sopt
, buf
, len
) 
1906         struct  sockopt 
*sopt
; 
1916          * Documented get behavior is that we always return a value, 
1917          * possibly truncated to fit in the user's buffer. 
1918          * Traditional behavior is that we always tell the user 
1919          * precisely how much we copied, rather than something useful 
1920          * like the total amount we had available for her. 
1921          * Note that this interface is not idempotent; the entire answer must 
1922          * generated ahead of time. 
1924         valsize 
= min(len
, sopt
->sopt_valsize
); 
1925         sopt
->sopt_valsize 
= valsize
; 
1926         if (sopt
->sopt_val 
!= 0) { 
1927                 if (sopt
->sopt_p 
!= 0) 
1928                         error 
= copyout(buf
, sopt
->sopt_val
, valsize
); 
1930                         bcopy(buf
, sopt
->sopt_val
, valsize
); 
1938         struct sockopt 
*sopt
; 
1946         if (sopt
->sopt_dir 
!= SOPT_GET
) { 
1947                 sopt
->sopt_dir 
= SOPT_GET
; 
1950         kp 
= sotokextcb(so
); 
1952                 if (kp
->e_soif 
&& kp
->e_soif
->sf_socontrol
) { 
1953                         error 
= (*kp
->e_soif
->sf_socontrol
)(so
, sopt
, kp
); 
1955                                 return((error 
== EJUSTRETURN
) ? 0 : error
); 
1961         if (sopt
->sopt_level 
!= SOL_SOCKET
) { 
1962                 if (so
->so_proto 
&& so
->so_proto
->pr_ctloutput
) { 
1963                         return ((*so
->so_proto
->pr_ctloutput
) 
1966                         return (ENOPROTOOPT
); 
1968                 switch (sopt
->sopt_name
) { 
1970                         l
.l_onoff 
= so
->so_options 
& SO_LINGER
; 
1971                         l
.l_linger 
= so
->so_linger
; 
1972                         error 
= sooptcopyout(sopt
, &l
, sizeof l
); 
1975                 case SO_USELOOPBACK
: 
1987                 case SO_WANTOOBFLAG
: 
1989                         optval 
= so
->so_options 
& sopt
->sopt_name
; 
1991                         error 
= sooptcopyout(sopt
, &optval
, sizeof optval
); 
1995                         optval 
= so
->so_type
; 
2005                         m1 
= so
->so_rcv
.sb_mb
; 
2006                         if (so
->so_proto
->pr_flags 
& PR_ATOMIC
) 
2009                                 kprintf("SKT CC: %d\n", so
->so_rcv
.sb_cc
); 
2012                                         if (m1
->m_type 
== MT_DATA
) 
2013                                                 pkt_total 
+= m1
->m_len
; 
2015                                         kprintf("CNT: %d/%d\n", m1
->m_len
, pkt_total
); 
2021                                 optval 
= so
->so_rcv
.sb_cc
; 
2023                         kprintf("RTN: %d\n", optval
); 
2029                         optval 
= so
->so_error
; 
2034                         optval 
= so
->so_snd
.sb_hiwat
; 
2038                         optval 
= so
->so_rcv
.sb_hiwat
; 
2042                         optval 
= so
->so_snd
.sb_lowat
; 
2046                         optval 
= so
->so_rcv
.sb_lowat
; 
2051                         optval 
= (sopt
->sopt_name 
== SO_SNDTIMEO 
? 
2052                                   so
->so_snd
.sb_timeo 
: so
->so_rcv
.sb_timeo
); 
2054                         tv
.tv_sec 
= optval 
/ hz
; 
2055                         tv
.tv_usec 
= (optval 
% hz
) * tick
; 
2056                         error 
= sooptcopyout(sopt
, &tv
, sizeof tv
); 
2060                         optval 
= (so
->so_flags 
& SOF_NOSIGPIPE
); 
2064                         error 
= ENOPROTOOPT
; 
2073  * Network filter support 
2075 /* Run the list of filters, creating extension control blocks */ 
2076 sfilter_init(register struct socket 
*so
) 
2077 {       struct kextcb 
*kp
, **kpp
; 
2078         struct protosw 
*prp
; 
2079         struct NFDescriptor 
*nfp
; 
2082         nfp 
= prp
->pr_sfilter
.tqh_first
; /* non-null */ 
2086         {       MALLOC(kp
, struct kextcb 
*, sizeof(*kp
), 
2089                         return(ENOBUFS
); /* so_free will clean up */ 
2095                 kp
->e_soif 
= nfp
->nf_soif
; 
2096                 kp
->e_sout 
= nfp
->nf_soutil
; 
2098                  * Ignore return value for create 
2099                  * Everyone gets a chance at startup 
2101                 if (kp
->e_soif 
&& kp
->e_soif
->sf_socreate
) 
2102                         (*kp
->e_soif
->sf_socreate
)(so
, prp
, kp
); 
2103                 nfp 
= nfp
->nf_next
.tqe_next
; 
2109  * Run the list of filters, freeing extension control blocks 
2110  * Assumes the soif/soutil blocks have been handled. 
2112 sfilter_term(struct socket 
*so
) 
2113 {       struct kextcb 
*kp
, *kp1
; 
2119                  * Ignore return code on termination; everyone must 
2122                 if (kp
->e_soif 
&& kp
->e_soif
->sf_sofree
) 
2123                         kp
->e_soif
->sf_sofree(so
, kp
); 
2131 /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */ 
2133 soopt_getm(struct sockopt 
*sopt
, struct mbuf 
**mp
) 
2135         struct mbuf 
*m
, *m_prev
; 
2136         int sopt_size 
= sopt
->sopt_valsize
; 
2138         MGET(m
, sopt
->sopt_p 
? M_WAIT 
: M_DONTWAIT
, MT_DATA
); 
2141         if (sopt_size 
> MLEN
) { 
2142                 MCLGET(m
, sopt
->sopt_p 
? M_WAIT 
: M_DONTWAIT
); 
2143                 if ((m
->m_flags 
& M_EXT
) == 0) { 
2147                 m
->m_len 
= min(MCLBYTES
, sopt_size
); 
2149                 m
->m_len 
= min(MLEN
, sopt_size
); 
2151         sopt_size 
-= m
->m_len
; 
2156                 MGET(m
, sopt
->sopt_p 
? M_WAIT 
: M_DONTWAIT
, MT_DATA
); 
2161                 if (sopt_size 
> MLEN
) { 
2162                         MCLGET(m
, sopt
->sopt_p 
? M_WAIT 
: M_DONTWAIT
); 
2163                         if ((m
->m_flags 
& M_EXT
) == 0) { 
2167                         m
->m_len 
= min(MCLBYTES
, sopt_size
); 
2169                         m
->m_len 
= min(MLEN
, sopt_size
); 
2171                 sopt_size 
-= m
->m_len
; 
2178 /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */ 
2180 soopt_mcopyin(struct sockopt 
*sopt
, struct mbuf 
*m
) 
2182         struct mbuf 
*m0 
= m
; 
2184         if (sopt
->sopt_val 
== NULL
) 
2186         while (m 
!= NULL 
&& sopt
->sopt_valsize 
>= m
->m_len
) { 
2187                 if (sopt
->sopt_p 
!= NULL
) { 
2190                         error 
= copyin(sopt
->sopt_val
, mtod(m
, char *), 
2197                         bcopy(sopt
->sopt_val
, mtod(m
, char *), m
->m_len
); 
2198                 sopt
->sopt_valsize 
-= m
->m_len
; 
2199                 (caddr_t
)sopt
->sopt_val 
+= m
->m_len
; 
2202         if (m 
!= NULL
) /* should be allocated enoughly at ip6_sooptmcopyin() */ 
2203                 panic("soopt_mcopyin"); 
2207 /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */ 
2209 soopt_mcopyout(struct sockopt 
*sopt
, struct mbuf 
*m
) 
2211         struct mbuf 
*m0 
= m
; 
2214         if (sopt
->sopt_val 
== NULL
) 
2216         while (m 
!= NULL 
&& sopt
->sopt_valsize 
>= m
->m_len
) { 
2217                 if (sopt
->sopt_p 
!= NULL
) { 
2220                         error 
= copyout(mtod(m
, char *), sopt
->sopt_val
, 
2227                         bcopy(mtod(m
, char *), sopt
->sopt_val
, m
->m_len
); 
2228                sopt
->sopt_valsize 
-= m
->m_len
; 
2229                (caddr_t
)sopt
->sopt_val 
+= m
->m_len
; 
2230                valsize 
+= m
->m_len
; 
2234                 /* enough soopt buffer should be given from user-land */ 
2238         sopt
->sopt_valsize 
= valsize
; 
2244         register struct socket 
*so
; 
2249         kp 
= sotokextcb(so
); 
2251                 if (kp
->e_soif 
&& kp
->e_soif
->sf_sohasoutofband
) { 
2252                         if ((*kp
->e_soif
->sf_sohasoutofband
)(so
, kp
)) 
2257         if (so
->so_pgid 
< 0) 
2258                 gsignal(-so
->so_pgid
, SIGURG
); 
2259         else if (so
->so_pgid 
> 0 && (p 
= pfind(so
->so_pgid
)) != 0) 
2261         selwakeup(&so
->so_rcv
.sb_sel
); 
2265 sopoll(struct socket 
*so
, int events
, struct ucred 
*cred
, void * wql
) 
2267         struct proc 
*p 
= current_proc(); 
2271         if (events 
& (POLLIN 
| POLLRDNORM
)) 
2273                         revents 
|= events 
& (POLLIN 
| POLLRDNORM
); 
2275         if (events 
& (POLLOUT 
| POLLWRNORM
)) 
2276                 if (sowriteable(so
)) 
2277                         revents 
|= events 
& (POLLOUT 
| POLLWRNORM
); 
2279         if (events 
& (POLLPRI 
| POLLRDBAND
)) 
2280                 if (so
->so_oobmark 
|| (so
->so_state 
& SS_RCVATMARK
)) 
2281                         revents 
|= events 
& (POLLPRI 
| POLLRDBAND
); 
2284                 if (events 
& (POLLIN 
| POLLPRI 
| POLLRDNORM 
| POLLRDBAND
)) { 
2285                         /* Darwin sets the flag first, BSD calls selrecord first */ 
2286                         so
->so_rcv
.sb_flags 
|= SB_SEL
; 
2287                         selrecord(p
, &so
->so_rcv
.sb_sel
, wql
); 
2290                 if (events 
& (POLLOUT 
| POLLWRNORM
)) { 
2291                         /* Darwin sets the flag first, BSD calls selrecord first */ 
2292                         so
->so_snd
.sb_flags 
|= SB_SEL
; 
2293                         selrecord(p
, &so
->so_snd
.sb_sel
, wql
);