2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
23 * @APPLE_LICENSE_HEADER_END@
25 /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
26 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
28 * Copyright (c) 1982, 1986, 1988, 1990, 1993
29 * The Regents of the University of California. All rights reserved.
31 * Redistribution and use in source and binary forms, with or without
32 * modification, are permitted provided that the following conditions
34 * 1. Redistributions of source code must retain the above copyright
35 * notice, this list of conditions and the following disclaimer.
36 * 2. Redistributions in binary form must reproduce the above copyright
37 * notice, this list of conditions and the following disclaimer in the
38 * documentation and/or other materials provided with the distribution.
39 * 3. All advertising materials mentioning features or use of this software
40 * must display the following acknowledgement:
41 * This product includes software developed by the University of
42 * California, Berkeley and its contributors.
43 * 4. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission.
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
60 * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.16 2001/06/14 20:46:06 ume Exp $
63 #include <sys/param.h>
64 #include <sys/systm.h>
66 #include <sys/fcntl.h>
67 #include <sys/malloc.h>
69 #include <sys/domain.h>
70 #include <sys/kernel.h>
72 #include <sys/protosw.h>
73 #include <sys/socket.h>
74 #include <sys/socketvar.h>
75 #include <sys/resourcevar.h>
76 #include <sys/signalvar.h>
77 #include <sys/sysctl.h>
80 #include <sys/kdebug.h>
81 #include <net/route.h>
82 #include <netinet/in.h>
83 #include <netinet/in_pcb.h>
84 #include <kern/zalloc.h>
85 #include <machine/limits.h>
88 int so_cache_timeouts
= 0;
89 int so_cache_max_freed
= 0;
90 int cached_sock_count
= 0;
91 struct socket
*socket_cache_head
= 0;
92 struct socket
*socket_cache_tail
= 0;
93 u_long so_cache_time
= 0;
94 int so_cache_init_done
= 0;
95 struct zone
*so_cache_zone
;
96 extern int get_inpcb_str_size();
97 extern int get_tcp_str_size();
99 #include <machine/limits.h>
101 int socket_debug
= 0;
102 int socket_zone
= M_SOCKET
;
103 so_gen_t so_gencnt
; /* generation count for sockets */
105 MALLOC_DEFINE(M_SONAME
, "soname", "socket name");
106 MALLOC_DEFINE(M_PCB
, "pcb", "protocol control block");
108 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
109 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
110 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
111 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
112 #define DBG_FNC_SOSEND NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1)
113 #define DBG_FNC_SORECEIVE NETDBG_CODE(DBG_NETSOCK, (8 << 8))
114 #define DBG_FNC_SOSHUTDOWN NETDBG_CODE(DBG_NETSOCK, (9 << 8))
117 SYSCTL_DECL(_kern_ipc
);
119 static int somaxconn
= SOMAXCONN
;
120 SYSCTL_INT(_kern_ipc
, KIPC_SOMAXCONN
, somaxconn
, CTLFLAG_RW
, &somaxconn
,
123 /* Should we get a maximum also ??? */
124 static int sosendmaxchain
= 65536;
125 static int sosendminchain
= 16384;
126 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sosendminchain
, CTLFLAG_RW
, &sosendminchain
,
129 void so_cache_timer();
130 struct mbuf
*m_getpackets(int, int, int);
134 * Socket operation routines.
135 * These routines are called by the routines in
136 * sys_socket.c or from a system process, and
137 * implement the semantics of socket operations by
138 * switching out to the protocol specific routines.
146 so_cache_init_done
= 1;
148 timeout(so_cache_timer
, NULL
, (SO_CACHE_FLUSH_INTERVAL
* hz
));
149 str_size
= (vm_size_t
)( sizeof(struct socket
) + 4 +
150 get_inpcb_str_size() + 4 +
152 so_cache_zone
= zinit (str_size
, 120000*str_size
, 8192, "socache zone");
154 kprintf("cached_sock_alloc -- so_cache_zone size is %x\n", str_size
);
159 void cached_sock_alloc(so
, waitok
)
166 register u_long offset
;
170 if (cached_sock_count
) {
172 *so
= socket_cache_head
;
174 panic("cached_sock_alloc: cached sock is null");
176 socket_cache_head
= socket_cache_head
->cache_next
;
177 if (socket_cache_head
)
178 socket_cache_head
->cache_prev
= 0;
180 socket_cache_tail
= 0;
183 temp
= (*so
)->so_saved_pcb
;
184 bzero((caddr_t
)*so
, sizeof(struct socket
));
186 kprintf("cached_sock_alloc - retreiving cached sock %x - count == %d\n", *so
,
189 (*so
)->so_saved_pcb
= temp
;
193 kprintf("Allocating cached sock %x from memory\n", *so
);
198 *so
= (struct socket
*) zalloc(so_cache_zone
);
200 *so
= (struct socket
*) zalloc_noblock(so_cache_zone
);
205 bzero((caddr_t
)*so
, sizeof(struct socket
));
208 * Define offsets for extra structures into our single block of
209 * memory. Align extra structures on longword boundaries.
213 offset
= (u_long
) *so
;
214 offset
+= sizeof(struct socket
);
217 offset
&= 0xfffffffc;
219 (*so
)->so_saved_pcb
= (caddr_t
) offset
;
220 offset
+= get_inpcb_str_size();
223 offset
&= 0xfffffffc;
226 ((struct inpcb
*) (*so
)->so_saved_pcb
)->inp_saved_ppcb
= (caddr_t
) offset
;
228 kprintf("Allocating cached socket - %x, pcb=%x tcpcb=%x\n", *so
,
230 ((struct inpcb
*)(*so
)->so_saved_pcb
)->inp_saved_ppcb
);
234 (*so
)->cached_in_sock_layer
= 1;
238 void cached_sock_free(so
)
245 if (++cached_sock_count
> MAX_CACHED_SOCKETS
) {
249 kprintf("Freeing overflowed cached socket %x\n", so
);
251 zfree(so_cache_zone
, (vm_offset_t
) so
);
255 kprintf("Freeing socket %x into cache\n", so
);
257 if (so_cache_hw
< cached_sock_count
)
258 so_cache_hw
= cached_sock_count
;
260 so
->cache_next
= socket_cache_head
;
262 if (socket_cache_head
)
263 socket_cache_head
->cache_prev
= so
;
265 socket_cache_tail
= so
;
267 so
->cache_timestamp
= so_cache_time
;
268 socket_cache_head
= so
;
273 kprintf("Freed cached sock %x into cache - count is %d\n", so
, cached_sock_count
);
280 void so_cache_timer()
282 register struct socket
*p
;
284 register int n_freed
= 0;
285 boolean_t funnel_state
;
287 funnel_state
= thread_funnel_set(network_flock
, TRUE
);
293 while (p
= socket_cache_tail
)
295 if ((so_cache_time
- p
->cache_timestamp
) < SO_CACHE_TIME_LIMIT
)
300 if (socket_cache_tail
= p
->cache_prev
)
301 p
->cache_prev
->cache_next
= 0;
302 if (--cached_sock_count
== 0)
303 socket_cache_head
= 0;
307 zfree(so_cache_zone
, (vm_offset_t
) p
);
310 if (++n_freed
>= SO_CACHE_MAX_FREE_BATCH
)
312 so_cache_max_freed
++;
318 timeout(so_cache_timer
, NULL
, (SO_CACHE_FLUSH_INTERVAL
* hz
));
320 (void) thread_funnel_set(network_flock
, FALSE
);
323 #endif /* __APPLE__ */
326 * Get a socket structure from our zone, and initialize it.
327 * We don't implement `waitok' yet (see comments in uipc_domain.c).
328 * Note that it would probably be better to allocate socket
329 * and PCB at the same time, but I'm not convinced that all
330 * the protocols can be easily modified to do this.
333 soalloc(waitok
, dom
, type
)
340 if ((dom
== PF_INET
) && (type
== SOCK_STREAM
))
341 cached_sock_alloc(&so
, waitok
);
344 so
= _MALLOC_ZONE(sizeof(*so
), socket_zone
, M_WAITOK
);
346 bzero(so
, sizeof *so
);
348 /* XXX race condition for reentrant kernel */
351 so
->so_gencnt
= ++so_gencnt
;
352 so
->so_zone
= socket_zone
;
359 socreate(dom
, aso
, type
, proto
)
365 struct proc
*p
= current_proc();
366 register struct protosw
*prp
;
367 register struct socket
*so
;
368 register int error
= 0;
371 prp
= pffindproto(dom
, proto
, type
);
373 prp
= pffindtype(dom
, type
);
375 if (prp
== 0 || prp
->pr_usrreqs
->pru_attach
== 0)
376 return (EPROTONOSUPPORT
);
379 if (p
->p_prison
&& jail_socket_unixiproute_only
&&
380 prp
->pr_domain
->dom_family
!= PF_LOCAL
&&
381 prp
->pr_domain
->dom_family
!= PF_INET
&&
382 prp
->pr_domain
->dom_family
!= PF_ROUTE
) {
383 return (EPROTONOSUPPORT
);
387 if (prp
->pr_type
!= type
)
389 so
= soalloc(p
!= 0, dom
, type
);
393 TAILQ_INIT(&so
->so_incomp
);
394 TAILQ_INIT(&so
->so_comp
);
399 if (p
->p_ucred
->cr_uid
== 0)
400 so
->so_state
= SS_PRIV
;
402 so
->so_uid
= p
->p_ucred
->cr_uid
;
405 so
->so_cred
= p
->p_ucred
;
410 so
->so_rcv
.sb_flags
|= SB_RECV
; /* XXX */
411 if (prp
->pr_sfilter
.tqh_first
)
412 error
= sfilter_init(so
);
415 error
= (*prp
->pr_usrreqs
->pru_attach
)(so
, proto
, p
);
417 so
->so_state
|= SS_NOFDREF
;
422 prp
->pr_domain
->dom_refs
++;
423 so
->so_rcv
.sb_so
= so
->so_snd
.sb_so
= so
;
424 TAILQ_INIT(&so
->so_evlist
);
433 struct sockaddr
*nam
;
436 struct proc
*p
= current_proc();
441 error
= (*so
->so_proto
->pr_usrreqs
->pru_bind
)(so
, nam
, p
);
445 if (kp
->e_soif
&& kp
->e_soif
->sf_sobind
) {
446 error
= (*kp
->e_soif
->sf_sobind
)(so
, nam
, kp
);
448 if (error
== EJUSTRETURN
) {
467 so
->so_gencnt
= ++so_gencnt
;
470 if (so
->so_rcv
.sb_hiwat
)
471 (void)chgsbsize(so
->so_cred
->cr_uidinfo
,
472 &so
->so_rcv
.sb_hiwat
, 0, RLIM_INFINITY
);
473 if (so
->so_snd
.sb_hiwat
)
474 (void)chgsbsize(so
->so_cred
->cr_uidinfo
,
475 &so
->so_snd
.sb_hiwat
, 0, RLIM_INFINITY
);
477 if (so
->so_accf
!= NULL
) {
478 if (so
->so_accf
->so_accept_filter
!= NULL
&&
479 so
->so_accf
->so_accept_filter
->accf_destroy
!= NULL
) {
480 so
->so_accf
->so_accept_filter
->accf_destroy(so
);
482 if (so
->so_accf
->so_accept_filter_str
!= NULL
)
483 FREE(so
->so_accf
->so_accept_filter_str
, M_ACCF
);
484 FREE(so
->so_accf
, M_ACCF
);
488 zfreei(so
->so_zone
, so
);
490 if (so
->cached_in_sock_layer
== 1)
491 cached_sock_free(so
);
493 _FREE_ZONE(so
, sizeof(*so
), so
->so_zone
);
494 #endif /* __APPLE__ */
498 solisten(so
, backlog
)
499 register struct socket
*so
;
504 struct proc
*p
= current_proc();
508 error
= (*so
->so_proto
->pr_usrreqs
->pru_listen
)(so
, p
);
513 if (TAILQ_EMPTY(&so
->so_comp
))
514 so
->so_options
|= SO_ACCEPTCONN
;
515 if (backlog
< 0 || backlog
> somaxconn
)
517 so
->so_qlimit
= backlog
;
520 if (kp
->e_soif
&& kp
->e_soif
->sf_solisten
) {
521 error
= (*kp
->e_soif
->sf_solisten
)(so
, kp
);
523 if (error
== EJUSTRETURN
) {
541 register struct socket
*so
;
545 struct socket
*head
= so
->so_head
;
549 if (kp
->e_soif
&& kp
->e_soif
->sf_sofree
) {
550 error
= (*kp
->e_soif
->sf_sofree
)(so
, kp
);
552 selthreadclear(&so
->so_snd
.sb_sel
);
553 selthreadclear(&so
->so_rcv
.sb_sel
);
554 return; /* void fn */
560 if (so
->so_pcb
|| (so
->so_state
& SS_NOFDREF
) == 0) {
562 selthreadclear(&so
->so_snd
.sb_sel
);
563 selthreadclear(&so
->so_rcv
.sb_sel
);
568 if (so
->so_state
& SS_INCOMP
) {
569 TAILQ_REMOVE(&head
->so_incomp
, so
, so_list
);
571 } else if (so
->so_state
& SS_COMP
) {
573 * We must not decommission a socket that's
574 * on the accept(2) queue. If we do, then
575 * accept(2) may hang after select(2) indicated
576 * that the listening socket was ready.
579 selthreadclear(&so
->so_snd
.sb_sel
);
580 selthreadclear(&so
->so_rcv
.sb_sel
);
584 panic("sofree: not queued");
587 so
->so_state
&= ~SS_INCOMP
;
591 selthreadclear(&so
->so_snd
.sb_sel
);
592 sbrelease(&so
->so_snd
);
600 * Close a socket on last file table reference removal.
601 * Initiate disconnect if connected.
602 * Free socket when disconnect complete.
606 register struct socket
*so
;
608 int s
= splnet(); /* conservative */
613 funsetown(so
->so_sigio
);
617 if (kp
->e_soif
&& kp
->e_soif
->sf_soclose
) {
618 error
= (*kp
->e_soif
->sf_soclose
)(so
, kp
);
621 return((error
== EJUSTRETURN
) ? 0 : error
);
627 if (so
->so_options
& SO_ACCEPTCONN
) {
628 struct socket
*sp
, *sonext
;
630 sp
= TAILQ_FIRST(&so
->so_incomp
);
631 for (; sp
!= NULL
; sp
= sonext
) {
632 sonext
= TAILQ_NEXT(sp
, so_list
);
635 for (sp
= TAILQ_FIRST(&so
->so_comp
); sp
!= NULL
; sp
= sonext
) {
636 sonext
= TAILQ_NEXT(sp
, so_list
);
637 /* Dequeue from so_comp since sofree() won't do it */
638 TAILQ_REMOVE(&so
->so_comp
, sp
, so_list
);
640 sp
->so_state
&= ~SS_COMP
;
648 if (so
->so_state
& SS_ISCONNECTED
) {
649 if ((so
->so_state
& SS_ISDISCONNECTING
) == 0) {
650 error
= sodisconnect(so
);
654 if (so
->so_options
& SO_LINGER
) {
655 if ((so
->so_state
& SS_ISDISCONNECTING
) &&
656 (so
->so_state
& SS_NBIO
))
658 while (so
->so_state
& SS_ISCONNECTED
) {
659 error
= tsleep((caddr_t
)&so
->so_timeo
,
660 PSOCK
| PCATCH
, "soclos", so
->so_linger
);
668 int error2
= (*so
->so_proto
->pr_usrreqs
->pru_detach
)(so
);
673 if (so
->so_pcb
&& so
->so_state
& SS_NOFDREF
)
674 panic("soclose: NOFDREF");
675 so
->so_state
|= SS_NOFDREF
;
677 so
->so_proto
->pr_domain
->dom_refs
--;
686 * Must be called at splnet...
694 error
= (*so
->so_proto
->pr_usrreqs
->pru_abort
)(so
);
704 register struct socket
*so
;
705 struct sockaddr
**nam
;
711 if ((so
->so_state
& SS_NOFDREF
) == 0)
712 panic("soaccept: !NOFDREF");
713 so
->so_state
&= ~SS_NOFDREF
;
714 error
= (*so
->so_proto
->pr_usrreqs
->pru_accept
)(so
, nam
);
718 if (kp
->e_soif
&& kp
->e_soif
->sf_soaccept
) {
719 error
= (*kp
->e_soif
->sf_soaccept
)(so
, nam
, kp
);
721 if (error
== EJUSTRETURN
) {
740 register struct socket
*so
;
741 struct sockaddr
*nam
;
746 struct proc
*p
= current_proc();
749 if (so
->so_options
& SO_ACCEPTCONN
)
753 * If protocol is connection-based, can only connect once.
754 * Otherwise, if connected, try to disconnect first.
755 * This allows user to disconnect by connecting to, e.g.,
758 if (so
->so_state
& (SS_ISCONNECTED
|SS_ISCONNECTING
) &&
759 ((so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) ||
760 (error
= sodisconnect(so
))))
764 * Run connect filter before calling protocol:
765 * - non-blocking connect returns before completion;
766 * - allows filters to modify address.
770 if (kp
->e_soif
&& kp
->e_soif
->sf_soconnect
) {
771 error
= (*kp
->e_soif
->sf_soconnect
)(so
, nam
, kp
);
773 if (error
== EJUSTRETURN
) {
782 error
= (*so
->so_proto
->pr_usrreqs
->pru_connect
)(so
, nam
, p
);
790 register struct socket
*so1
;
797 error
= (*so1
->so_proto
->pr_usrreqs
->pru_connect2
)(so1
, so2
);
799 kp
= sotokextcb(so1
);
801 if (kp
->e_soif
&& kp
->e_soif
->sf_soconnect2
) {
802 error
= (*kp
->e_soif
->sf_soconnect2
)(so1
, so2
, kp
);
804 if (error
== EJUSTRETURN
) {
821 register struct socket
*so
;
827 if ((so
->so_state
& SS_ISCONNECTED
) == 0) {
831 if (so
->so_state
& SS_ISDISCONNECTING
) {
835 error
= (*so
->so_proto
->pr_usrreqs
->pru_disconnect
)(so
);
839 if (kp
->e_soif
&& kp
->e_soif
->sf_sodisconnect
) {
840 error
= (*kp
->e_soif
->sf_sodisconnect
)(so
, kp
);
842 if (error
== EJUSTRETURN
) {
859 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_DONTWAIT : M_WAIT)
862 * If send must go all at once and message is larger than
863 * send buffering, then hard error.
864 * Lock against other senders.
865 * If must go all at once and not enough room now, then
866 * inform user that this would block and do nothing.
867 * Otherwise, if nonblocking, send as much as possible.
868 * The data to be sent is described by "uio" if nonzero,
869 * otherwise by the mbuf chain "top" (which must be null
870 * if uio is not). Data provided in mbuf chain must be small
871 * enough to send all at once.
873 * Returns nonzero on error, timeout or signal; callers
874 * must check for short counts if EINTR/ERESTART are returned.
875 * Data and control buffers are freed on return.
877 * MSG_HOLD: go thru most of sosend(), but just enqueue the mbuf
878 * MSG_SEND: go thru as for MSG_HOLD on current fragment, then
879 * point at the mbuf chain being constructed and go from there.
882 sosend(so
, addr
, uio
, top
, control
, flags
)
883 register struct socket
*so
;
884 struct sockaddr
*addr
;
887 struct mbuf
*control
;
892 register struct mbuf
*m
, *freelist
= NULL
;
893 register long space
, len
, resid
;
894 int clen
= 0, error
, s
, dontroute
, mlen
, sendflags
;
895 int atomic
= sosendallatonce(so
) || top
;
896 struct proc
*p
= current_proc();
900 resid
= uio
->uio_resid
;
902 resid
= top
->m_pkthdr
.len
;
904 KERNEL_DEBUG((DBG_FNC_SOSEND
| DBG_FUNC_START
),
909 so
->so_snd
.sb_hiwat
);
912 * In theory resid should be unsigned.
913 * However, space must be signed, as it might be less than 0
914 * if we over-committed, and we must use a signed comparison
915 * of space and resid. On the other hand, a negative resid
916 * causes us to loop sending 0-length segments to the protocol.
918 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
919 * type sockets since that's an error.
921 if (resid
< 0 || so
->so_type
== SOCK_STREAM
&& (flags
& MSG_EOR
)) {
927 (flags
& MSG_DONTROUTE
) && (so
->so_options
& SO_DONTROUTE
) == 0 &&
928 (so
->so_proto
->pr_flags
& PR_ATOMIC
);
930 p
->p_stats
->p_ru
.ru_msgsnd
++;
932 clen
= control
->m_len
;
933 #define snderr(errno) { error = errno; splx(s); goto release; }
936 error
= sblock(&so
->so_snd
, SBLOCKWAIT(flags
));
941 if (so
->so_state
& SS_CANTSENDMORE
)
944 error
= so
->so_error
;
949 if ((so
->so_state
& SS_ISCONNECTED
) == 0) {
951 * `sendto' and `sendmsg' is allowed on a connection-
952 * based socket if it supports implied connect.
953 * Return ENOTCONN if not connected and no address is
956 if ((so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) &&
957 (so
->so_proto
->pr_flags
& PR_IMPLOPCL
) == 0) {
958 if ((so
->so_state
& SS_ISCONFIRMING
) == 0 &&
959 !(resid
== 0 && clen
!= 0))
961 } else if (addr
== 0 && !(flags
&MSG_HOLD
))
962 snderr(so
->so_proto
->pr_flags
& PR_CONNREQUIRED
?
963 ENOTCONN
: EDESTADDRREQ
);
965 space
= sbspace(&so
->so_snd
);
968 if ((atomic
&& resid
> so
->so_snd
.sb_hiwat
) ||
969 clen
> so
->so_snd
.sb_hiwat
)
971 if (space
< resid
+ clen
&& uio
&&
972 (atomic
|| space
< so
->so_snd
.sb_lowat
|| space
< clen
)) {
973 if (so
->so_state
& SS_NBIO
)
975 sbunlock(&so
->so_snd
);
976 error
= sbwait(&so
->so_snd
);
989 * Data is prepackaged in "top".
993 top
->m_flags
|= M_EOR
;
995 boolean_t dropped_funnel
= FALSE
;
999 bytes_to_copy
= min(resid
, space
);
1001 if (sosendminchain
> 0) {
1002 if (bytes_to_copy
>= sosendminchain
) {
1003 dropped_funnel
= TRUE
;
1004 (void)thread_funnel_set(network_flock
, FALSE
);
1008 chainlength
= sosendmaxchain
;
1012 if (bytes_to_copy
>= MINCLSIZE
) {
1014 * try to maintain a local cache of mbuf clusters needed to complete this write
1015 * the list is further limited to the number that are currently needed to fill the socket
1016 * this mechanism allows a large number of mbufs/clusters to be grabbed under a single
1017 * mbuf lock... if we can't get any clusters, than fall back to trying for mbufs
1018 * if we fail early (or miscalcluate the number needed) make sure to release any clusters
1019 * we haven't yet consumed.
1021 if ((m
= freelist
) == NULL
) {
1023 int hdrs_needed
= 0;
1027 num_needed
= bytes_to_copy
/ MCLBYTES
;
1029 if ((bytes_to_copy
- (num_needed
* MCLBYTES
)) >= MINCLSIZE
)
1032 if ((freelist
= m_getpackets(num_needed
, hdrs_needed
, M_WAIT
)) == NULL
)
1033 goto getpackets_failed
;
1036 freelist
= m
->m_next
;
1040 len
= min(mlen
, bytes_to_copy
);
1044 MGETHDR(m
, M_WAIT
, MT_DATA
);
1046 m
->m_pkthdr
.len
= 0;
1047 m
->m_pkthdr
.rcvif
= (struct ifnet
*)0;
1049 MGET(m
, M_WAIT
, MT_DATA
);
1052 len
= min(mlen
, bytes_to_copy
);
1054 * For datagram protocols, leave room
1055 * for protocol headers in first mbuf.
1057 if (atomic
&& top
== 0 && len
< mlen
)
1064 error
= uiomove(mtod(m
, caddr_t
), (int)len
, uio
);
1066 resid
= uio
->uio_resid
;
1070 top
->m_pkthdr
.len
+= len
;
1075 if (flags
& MSG_EOR
)
1076 top
->m_flags
|= M_EOR
;
1079 bytes_to_copy
= min(resid
, space
);
1081 } while (space
> 0 && (chainlength
< sosendmaxchain
|| atomic
|| resid
< MINCLSIZE
));
1083 if (dropped_funnel
== TRUE
)
1084 (void)thread_funnel_set(network_flock
, TRUE
);
1089 if (flags
& (MSG_HOLD
|MSG_SEND
))
1090 { /* Enqueue for later, go away if HOLD */
1091 register struct mbuf
*mb1
;
1092 if (so
->so_temp
&& (flags
& MSG_FLUSH
))
1093 { m_freem(so
->so_temp
);
1097 so
->so_tail
->m_next
= top
;
1111 so
->so_options
|= SO_DONTROUTE
;
1112 s
= splnet(); /* XXX */
1113 /* Compute flags here, for pru_send and NKEs */
1114 sendflags
= (flags
& MSG_OOB
) ? PRUS_OOB
:
1116 * If the user set MSG_EOF, the protocol
1117 * understands this flag and nothing left to
1118 * send then use PRU_SEND_EOF instead of PRU_SEND.
1120 ((flags
& MSG_EOF
) &&
1121 (so
->so_proto
->pr_flags
& PR_IMPLOPCL
) &&
1124 /* If there is more to send set PRUS_MORETOCOME */
1125 (resid
> 0 && space
> 0) ? PRUS_MORETOCOME
: 0;
1126 kp
= sotokextcb(so
);
1128 { if (kp
->e_soif
&& kp
->e_soif
->sf_sosend
) {
1129 error
= (*kp
->e_soif
->sf_sosend
)(so
, &addr
,
1136 if (error
== EJUSTRETURN
) {
1137 sbunlock(&so
->so_snd
);
1140 m_freem_list(freelist
);
1149 error
= (*so
->so_proto
->pr_usrreqs
->pru_send
)(so
,
1150 sendflags
, top
, addr
, control
, p
);
1153 if (flags
& MSG_SEND
)
1157 so
->so_options
&= ~SO_DONTROUTE
;
1164 } while (resid
&& space
> 0);
1168 sbunlock(&so
->so_snd
);
1175 m_freem_list(freelist
);
1177 KERNEL_DEBUG(DBG_FNC_SOSEND
| DBG_FUNC_END
,
1188 * Implement receive operations on a socket.
1189 * We depend on the way that records are added to the sockbuf
1190 * by sbappend*. In particular, each record (mbufs linked through m_next)
1191 * must begin with an address if the protocol so specifies,
1192 * followed by an optional mbuf or mbufs containing ancillary data,
1193 * and then zero or more mbufs of data.
1194 * In order to avoid blocking network interrupts for the entire time here,
1195 * we splx() while doing the actual copy to user space.
1196 * Although the sockbuf is locked, new data may still be appended,
1197 * and thus we must maintain consistency of the sockbuf during that time.
1199 * The caller may receive the data as a single mbuf chain by supplying
1200 * an mbuf **mp0 for use in returning the chain. The uio is then used
1201 * only for the count in uio_resid.
1204 soreceive(so
, psa
, uio
, mp0
, controlp
, flagsp
)
1205 register struct socket
*so
;
1206 struct sockaddr
**psa
;
1209 struct mbuf
**controlp
;
1212 register struct mbuf
*m
, **mp
;
1213 register struct mbuf
*free_list
, *ml
;
1214 register int flags
, len
, error
, s
, offset
;
1215 struct protosw
*pr
= so
->so_proto
;
1216 struct mbuf
*nextrecord
;
1218 int orig_resid
= uio
->uio_resid
;
1221 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_START
,
1225 so
->so_rcv
.sb_lowat
,
1226 so
->so_rcv
.sb_hiwat
);
1228 kp
= sotokextcb(so
);
1230 if (kp
->e_soif
&& kp
->e_soif
->sf_soreceive
) {
1231 error
= (*kp
->e_soif
->sf_soreceive
)(so
, psa
, &uio
,
1235 return((error
== EJUSTRETURN
) ? 0 : error
);
1246 flags
= *flagsp
&~ MSG_EOR
;
1250 * When SO_WANTOOBFLAG is set we try to get out-of-band data
1251 * regardless of the flags argument. Here is the case were
1252 * out-of-band data is not inline.
1254 if ((flags
& MSG_OOB
) ||
1255 ((so
->so_options
& SO_WANTOOBFLAG
) != 0 &&
1256 (so
->so_options
& SO_OOBINLINE
) == 0 &&
1257 (so
->so_oobmark
|| (so
->so_state
& SS_RCVATMARK
)))) {
1258 m
= m_get(M_WAIT
, MT_DATA
);
1261 error
= (*pr
->pr_usrreqs
->pru_rcvoob
)(so
, m
, flags
& MSG_PEEK
);
1265 error
= uiomove(mtod(m
, caddr_t
),
1266 (int) min(uio
->uio_resid
, m
->m_len
), uio
);
1268 } while (uio
->uio_resid
&& error
== 0 && m
);
1273 if ((so
->so_options
& SO_WANTOOBFLAG
) != 0) {
1274 if (error
== EWOULDBLOCK
|| error
== EINVAL
) {
1276 * Let's try to get normal data:
1277 * EWOULDBLOCK: out-of-band data not receive yet;
1278 * EINVAL: out-of-band data already read.
1282 } else if (error
== 0 && flagsp
)
1285 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_END
, error
,0,0,0,0);
1291 *mp
= (struct mbuf
*)0;
1292 if (so
->so_state
& SS_ISCONFIRMING
&& uio
->uio_resid
)
1293 (*pr
->pr_usrreqs
->pru_rcvd
)(so
, 0);
1296 error
= sblock(&so
->so_rcv
, SBLOCKWAIT(flags
));
1298 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_END
, error
,0,0,0,0);
1303 m
= so
->so_rcv
.sb_mb
;
1305 * If we have less data than requested, block awaiting more
1306 * (subject to any timeout) if:
1307 * 1. the current count is less than the low water mark, or
1308 * 2. MSG_WAITALL is set, and it is possible to do the entire
1309 * receive operation at once if we block (resid <= hiwat).
1310 * 3. MSG_DONTWAIT is not set
1311 * If MSG_WAITALL is set but resid is larger than the receive buffer,
1312 * we have to do the receive in sections, and thus risk returning
1313 * a short count if a timeout or signal occurs after we start.
1315 if (m
== 0 || (((flags
& MSG_DONTWAIT
) == 0 &&
1316 so
->so_rcv
.sb_cc
< uio
->uio_resid
) &&
1317 (so
->so_rcv
.sb_cc
< so
->so_rcv
.sb_lowat
||
1318 ((flags
& MSG_WAITALL
) && uio
->uio_resid
<= so
->so_rcv
.sb_hiwat
)) &&
1319 m
->m_nextpkt
== 0 && (pr
->pr_flags
& PR_ATOMIC
) == 0)) {
1320 KASSERT(m
!= 0 || !so
->so_rcv
.sb_cc
, ("receive 1"));
1324 error
= so
->so_error
;
1325 if ((flags
& MSG_PEEK
) == 0)
1329 if (so
->so_state
& SS_CANTRCVMORE
) {
1335 for (; m
; m
= m
->m_next
)
1336 if (m
->m_type
== MT_OOBDATA
|| (m
->m_flags
& M_EOR
)) {
1337 m
= so
->so_rcv
.sb_mb
;
1340 if ((so
->so_state
& (SS_ISCONNECTED
|SS_ISCONNECTING
)) == 0 &&
1341 (so
->so_proto
->pr_flags
& PR_CONNREQUIRED
)) {
1345 if (uio
->uio_resid
== 0)
1347 if ((so
->so_state
& SS_NBIO
) || (flags
& MSG_DONTWAIT
)) {
1348 error
= EWOULDBLOCK
;
1351 sbunlock(&so
->so_rcv
);
1353 printf("Waiting for socket data\n");
1354 error
= sbwait(&so
->so_rcv
);
1356 printf("SORECEIVE - sbwait returned %d\n", error
);
1359 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_END
, error
,0,0,0,0);
1367 uio
->uio_procp
->p_stats
->p_ru
.ru_msgrcv
++;
1369 nextrecord
= m
->m_nextpkt
;
1370 if ((pr
->pr_flags
& PR_ADDR
) && m
->m_type
== MT_SONAME
) {
1371 KASSERT(m
->m_type
== MT_SONAME
, ("receive 1a"));
1374 *psa
= dup_sockaddr(mtod(m
, struct sockaddr
*),
1376 if (flags
& MSG_PEEK
) {
1379 sbfree(&so
->so_rcv
, m
);
1380 MFREE(m
, so
->so_rcv
.sb_mb
);
1381 m
= so
->so_rcv
.sb_mb
;
1384 while (m
&& m
->m_type
== MT_CONTROL
&& error
== 0) {
1385 if (flags
& MSG_PEEK
) {
1387 *controlp
= m_copy(m
, 0, m
->m_len
);
1390 sbfree(&so
->so_rcv
, m
);
1392 if (pr
->pr_domain
->dom_externalize
&&
1393 mtod(m
, struct cmsghdr
*)->cmsg_type
==
1395 error
= (*pr
->pr_domain
->dom_externalize
)(m
);
1397 so
->so_rcv
.sb_mb
= m
->m_next
;
1399 m
= so
->so_rcv
.sb_mb
;
1401 MFREE(m
, so
->so_rcv
.sb_mb
);
1402 m
= so
->so_rcv
.sb_mb
;
1407 controlp
= &(*controlp
)->m_next
;
1411 if ((flags
& MSG_PEEK
) == 0)
1412 m
->m_nextpkt
= nextrecord
;
1414 if (type
== MT_OOBDATA
)
1421 ml
= (struct mbuf
*)0;
1423 while (m
&& uio
->uio_resid
> 0 && error
== 0) {
1424 if (m
->m_type
== MT_OOBDATA
) {
1425 if (type
!= MT_OOBDATA
)
1427 } else if (type
== MT_OOBDATA
)
1431 * This assertion needs rework. The trouble is Appletalk is uses many
1432 * mbuf types (NOT listed in mbuf.h!) which will trigger this panic.
1433 * For now just remove the assertion... CSM 9/98
1436 KASSERT(m
->m_type
== MT_DATA
|| m
->m_type
== MT_HEADER
,
1440 * Make sure to allways set MSG_OOB event when getting
1441 * out of band data inline.
1443 if ((so
->so_options
& SO_WANTOOBFLAG
) != 0 &&
1444 (so
->so_options
& SO_OOBINLINE
) != 0 &&
1445 (so
->so_state
& SS_RCVATMARK
) != 0) {
1449 so
->so_state
&= ~SS_RCVATMARK
;
1450 len
= uio
->uio_resid
;
1451 if (so
->so_oobmark
&& len
> so
->so_oobmark
- offset
)
1452 len
= so
->so_oobmark
- offset
;
1453 if (len
> m
->m_len
- moff
)
1454 len
= m
->m_len
- moff
;
1456 * If mp is set, just pass back the mbufs.
1457 * Otherwise copy them out via the uio, then free.
1458 * Sockbuf must be consistent here (points to current mbuf,
1459 * it points to next record) when we drop priority;
1460 * we must note any additions to the sockbuf when we
1461 * block interrupts again.
1465 error
= uiomove(mtod(m
, caddr_t
) + moff
, (int)len
, uio
);
1470 uio
->uio_resid
-= len
;
1471 if (len
== m
->m_len
- moff
) {
1472 if (m
->m_flags
& M_EOR
)
1474 if (flags
& MSG_PEEK
) {
1478 nextrecord
= m
->m_nextpkt
;
1479 sbfree(&so
->so_rcv
, m
);
1483 so
->so_rcv
.sb_mb
= m
= m
->m_next
;
1484 *mp
= (struct mbuf
*)0;
1490 so
->so_rcv
.sb_mb
= m
= m
->m_next
;
1494 m
->m_nextpkt
= nextrecord
;
1497 if (flags
& MSG_PEEK
)
1501 *mp
= m_copym(m
, 0, len
, M_WAIT
);
1504 so
->so_rcv
.sb_cc
-= len
;
1507 if (so
->so_oobmark
) {
1508 if ((flags
& MSG_PEEK
) == 0) {
1509 so
->so_oobmark
-= len
;
1510 if (so
->so_oobmark
== 0) {
1511 so
->so_state
|= SS_RCVATMARK
;
1512 postevent(so
, 0, EV_OOB
);
1517 if (offset
== so
->so_oobmark
)
1521 if (flags
& MSG_EOR
)
1524 * If the MSG_WAITALL flag is set (for non-atomic socket),
1525 * we must not quit until "uio->uio_resid == 0" or an error
1526 * termination. If a signal/timeout occurs, return
1527 * with a short count but without error.
1528 * Keep sockbuf locked against other readers.
1530 while (flags
& MSG_WAITALL
&& m
== 0 && uio
->uio_resid
> 0 &&
1531 !sosendallatonce(so
) && !nextrecord
) {
1532 if (so
->so_error
|| so
->so_state
& SS_CANTRCVMORE
)
1536 m_freem_list(free_list
);
1538 error
= sbwait(&so
->so_rcv
);
1540 sbunlock(&so
->so_rcv
);
1542 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_END
, 0,0,0,0,0);
1545 m
= so
->so_rcv
.sb_mb
;
1547 nextrecord
= m
->m_nextpkt
;
1550 ml
= (struct mbuf
*)0;
1554 m_freem_list(free_list
);
1557 if (m
&& pr
->pr_flags
& PR_ATOMIC
) {
1559 if (so
->so_options
& SO_DONTTRUNC
)
1560 flags
|= MSG_RCVMORE
;
1564 if ((flags
& MSG_PEEK
) == 0)
1565 (void) sbdroprecord(&so
->so_rcv
);
1570 if ((flags
& MSG_PEEK
) == 0) {
1572 so
->so_rcv
.sb_mb
= nextrecord
;
1573 if (pr
->pr_flags
& PR_WANTRCVD
&& so
->so_pcb
)
1574 (*pr
->pr_usrreqs
->pru_rcvd
)(so
, flags
);
1577 if ((so
->so_options
& SO_WANTMORE
) && so
->so_rcv
.sb_cc
> 0)
1578 flags
|= MSG_HAVEMORE
;
1580 if (orig_resid
== uio
->uio_resid
&& orig_resid
&&
1581 (flags
& MSG_EOR
) == 0 && (so
->so_state
& SS_CANTRCVMORE
) == 0) {
1582 sbunlock(&so
->so_rcv
);
1590 sbunlock(&so
->so_rcv
);
1593 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_END
,
1605 register struct socket
*so
;
1608 register struct protosw
*pr
= so
->so_proto
;
1613 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN
| DBG_FUNC_START
, 0,0,0,0,0);
1614 kp
= sotokextcb(so
);
1616 if (kp
->e_soif
&& kp
->e_soif
->sf_soshutdown
) {
1617 ret
= (*kp
->e_soif
->sf_soshutdown
)(so
, how
, kp
);
1619 return((ret
== EJUSTRETURN
) ? 0 : ret
);
1624 if (how
!= SHUT_WR
) {
1626 postevent(so
, 0, EV_RCLOSED
);
1628 if (how
!= SHUT_RD
) {
1629 ret
= ((*pr
->pr_usrreqs
->pru_shutdown
)(so
));
1630 postevent(so
, 0, EV_WCLOSED
);
1631 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN
| DBG_FUNC_END
, 0,0,0,0,0);
1635 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN
| DBG_FUNC_END
, 0,0,0,0,0);
1641 register struct socket
*so
;
1643 register struct sockbuf
*sb
= &so
->so_rcv
;
1644 register struct protosw
*pr
= so
->so_proto
;
1645 register int s
, error
;
1649 kp
= sotokextcb(so
);
1651 if (kp
->e_soif
&& kp
->e_soif
->sf_sorflush
) {
1652 if ((*kp
->e_soif
->sf_sorflush
)(so
, kp
))
1658 sb
->sb_flags
|= SB_NOINTR
;
1659 (void) sblock(sb
, M_WAIT
);
1664 selthreadclear(&sb
->sb_sel
);
1667 bzero((caddr_t
)sb
, sizeof (*sb
));
1669 if (asb
.sb_flags
& SB_KNOTE
) {
1670 sb
->sb_sel
.si_note
= asb
.sb_sel
.si_note
;
1671 sb
->sb_flags
= SB_KNOTE
;
1675 if (pr
->pr_flags
& PR_RIGHTS
&& pr
->pr_domain
->dom_dispose
)
1676 (*pr
->pr_domain
->dom_dispose
)(asb
.sb_mb
);
1682 * Perhaps this routine, and sooptcopyout(), below, ought to come in
1683 * an additional variant to handle the case where the option value needs
1684 * to be some kind of integer, but not a specific size.
1685 * In addition to their use here, these functions are also called by the
1686 * protocol-level pr_ctloutput() routines.
1689 sooptcopyin(sopt
, buf
, len
, minlen
)
1690 struct sockopt
*sopt
;
1698 * If the user gives us more than we wanted, we ignore it,
1699 * but if we don't get the minimum length the caller
1700 * wants, we return EINVAL. On success, sopt->sopt_valsize
1701 * is set to however much we actually retrieved.
1703 if ((valsize
= sopt
->sopt_valsize
) < minlen
)
1706 sopt
->sopt_valsize
= valsize
= len
;
1708 if (sopt
->sopt_p
!= 0)
1709 return (copyin(sopt
->sopt_val
, buf
, valsize
));
1711 bcopy(sopt
->sopt_val
, buf
, valsize
);
1718 struct sockopt
*sopt
;
1726 if (sopt
->sopt_dir
!= SOPT_SET
) {
1727 sopt
->sopt_dir
= SOPT_SET
;
1730 kp
= sotokextcb(so
);
1732 if (kp
->e_soif
&& kp
->e_soif
->sf_socontrol
) {
1733 error
= (*kp
->e_soif
->sf_socontrol
)(so
, sopt
, kp
);
1735 return((error
== EJUSTRETURN
) ? 0 : error
);
1741 if (sopt
->sopt_level
!= SOL_SOCKET
) {
1742 if (so
->so_proto
&& so
->so_proto
->pr_ctloutput
)
1743 return ((*so
->so_proto
->pr_ctloutput
)
1745 error
= ENOPROTOOPT
;
1747 switch (sopt
->sopt_name
) {
1749 error
= sooptcopyin(sopt
, &l
, sizeof l
, sizeof l
);
1753 so
->so_linger
= l
.l_linger
;
1755 so
->so_options
|= SO_LINGER
;
1757 so
->so_options
&= ~SO_LINGER
;
1763 case SO_USELOOPBACK
:
1772 case SO_WANTOOBFLAG
:
1774 error
= sooptcopyin(sopt
, &optval
, sizeof optval
,
1779 so
->so_options
|= sopt
->sopt_name
;
1781 so
->so_options
&= ~sopt
->sopt_name
;
1788 error
= sooptcopyin(sopt
, &optval
, sizeof optval
,
1794 * Values < 1 make no sense for any of these
1795 * options, so disallow them.
1802 switch (sopt
->sopt_name
) {
1805 if (sbreserve(sopt
->sopt_name
== SO_SNDBUF
?
1806 &so
->so_snd
: &so
->so_rcv
,
1807 (u_long
) optval
) == 0) {
1814 * Make sure the low-water is never greater than
1818 so
->so_snd
.sb_lowat
=
1819 (optval
> so
->so_snd
.sb_hiwat
) ?
1820 so
->so_snd
.sb_hiwat
: optval
;
1823 so
->so_rcv
.sb_lowat
=
1824 (optval
> so
->so_rcv
.sb_hiwat
) ?
1825 so
->so_rcv
.sb_hiwat
: optval
;
1832 error
= sooptcopyin(sopt
, &tv
, sizeof tv
,
1837 /* assert(hz > 0); */
1838 if (tv
.tv_sec
< 0 || tv
.tv_sec
> SHRT_MAX
/ hz
||
1839 tv
.tv_usec
< 0 || tv
.tv_usec
>= 1000000) {
1843 /* assert(tick > 0); */
1844 /* assert(ULONG_MAX - SHRT_MAX >= 1000000); */
1846 long tmp
= (u_long
)(tv
.tv_sec
* hz
) + tv
.tv_usec
/ tick
;
1847 if (tmp
> SHRT_MAX
) {
1854 switch (sopt
->sopt_name
) {
1856 so
->so_snd
.sb_timeo
= val
;
1859 so
->so_rcv
.sb_timeo
= val
;
1867 struct NFDescriptor
*nf1
, *nf2
= NULL
;
1869 error
= sooptcopyin(sopt
, &nke
,
1870 sizeof nke
, sizeof nke
);
1874 error
= nke_insert(so
, &nke
);
1879 error
= sooptcopyin(sopt
, &optval
, sizeof optval
,
1884 so
->so_flags
|= SOF_NOSIGPIPE
;
1886 so
->so_flags
&= ~SOF_NOSIGPIPE
;
1891 error
= ENOPROTOOPT
;
1894 if (error
== 0 && so
->so_proto
&& so
->so_proto
->pr_ctloutput
) {
1895 (void) ((*so
->so_proto
->pr_ctloutput
)
1903 /* Helper routine for getsockopt */
1905 sooptcopyout(sopt
, buf
, len
)
1906 struct sockopt
*sopt
;
1916 * Documented get behavior is that we always return a value,
1917 * possibly truncated to fit in the user's buffer.
1918 * Traditional behavior is that we always tell the user
1919 * precisely how much we copied, rather than something useful
1920 * like the total amount we had available for her.
1921 * Note that this interface is not idempotent; the entire answer must
1922 * generated ahead of time.
1924 valsize
= min(len
, sopt
->sopt_valsize
);
1925 sopt
->sopt_valsize
= valsize
;
1926 if (sopt
->sopt_val
!= 0) {
1927 if (sopt
->sopt_p
!= 0)
1928 error
= copyout(buf
, sopt
->sopt_val
, valsize
);
1930 bcopy(buf
, sopt
->sopt_val
, valsize
);
1938 struct sockopt
*sopt
;
1946 if (sopt
->sopt_dir
!= SOPT_GET
) {
1947 sopt
->sopt_dir
= SOPT_GET
;
1950 kp
= sotokextcb(so
);
1952 if (kp
->e_soif
&& kp
->e_soif
->sf_socontrol
) {
1953 error
= (*kp
->e_soif
->sf_socontrol
)(so
, sopt
, kp
);
1955 return((error
== EJUSTRETURN
) ? 0 : error
);
1961 if (sopt
->sopt_level
!= SOL_SOCKET
) {
1962 if (so
->so_proto
&& so
->so_proto
->pr_ctloutput
) {
1963 return ((*so
->so_proto
->pr_ctloutput
)
1966 return (ENOPROTOOPT
);
1968 switch (sopt
->sopt_name
) {
1970 l
.l_onoff
= so
->so_options
& SO_LINGER
;
1971 l
.l_linger
= so
->so_linger
;
1972 error
= sooptcopyout(sopt
, &l
, sizeof l
);
1975 case SO_USELOOPBACK
:
1987 case SO_WANTOOBFLAG
:
1989 optval
= so
->so_options
& sopt
->sopt_name
;
1991 error
= sooptcopyout(sopt
, &optval
, sizeof optval
);
1995 optval
= so
->so_type
;
2005 m1
= so
->so_rcv
.sb_mb
;
2006 if (so
->so_proto
->pr_flags
& PR_ATOMIC
)
2009 kprintf("SKT CC: %d\n", so
->so_rcv
.sb_cc
);
2012 if (m1
->m_type
== MT_DATA
)
2013 pkt_total
+= m1
->m_len
;
2015 kprintf("CNT: %d/%d\n", m1
->m_len
, pkt_total
);
2021 optval
= so
->so_rcv
.sb_cc
;
2023 kprintf("RTN: %d\n", optval
);
2029 optval
= so
->so_error
;
2034 optval
= so
->so_snd
.sb_hiwat
;
2038 optval
= so
->so_rcv
.sb_hiwat
;
2042 optval
= so
->so_snd
.sb_lowat
;
2046 optval
= so
->so_rcv
.sb_lowat
;
2051 optval
= (sopt
->sopt_name
== SO_SNDTIMEO
?
2052 so
->so_snd
.sb_timeo
: so
->so_rcv
.sb_timeo
);
2054 tv
.tv_sec
= optval
/ hz
;
2055 tv
.tv_usec
= (optval
% hz
) * tick
;
2056 error
= sooptcopyout(sopt
, &tv
, sizeof tv
);
2060 optval
= (so
->so_flags
& SOF_NOSIGPIPE
);
2064 error
= ENOPROTOOPT
;
2073 * Network filter support
2075 /* Run the list of filters, creating extension control blocks */
2076 sfilter_init(register struct socket
*so
)
2077 { struct kextcb
*kp
, **kpp
;
2078 struct protosw
*prp
;
2079 struct NFDescriptor
*nfp
;
2082 nfp
= prp
->pr_sfilter
.tqh_first
; /* non-null */
2086 { MALLOC(kp
, struct kextcb
*, sizeof(*kp
),
2089 return(ENOBUFS
); /* so_free will clean up */
2095 kp
->e_soif
= nfp
->nf_soif
;
2096 kp
->e_sout
= nfp
->nf_soutil
;
2098 * Ignore return value for create
2099 * Everyone gets a chance at startup
2101 if (kp
->e_soif
&& kp
->e_soif
->sf_socreate
)
2102 (*kp
->e_soif
->sf_socreate
)(so
, prp
, kp
);
2103 nfp
= nfp
->nf_next
.tqe_next
;
2109 * Run the list of filters, freeing extension control blocks
2110 * Assumes the soif/soutil blocks have been handled.
2112 sfilter_term(struct socket
*so
)
2113 { struct kextcb
*kp
, *kp1
;
2119 * Ignore return code on termination; everyone must
2122 if (kp
->e_soif
&& kp
->e_soif
->sf_sofree
)
2123 kp
->e_soif
->sf_sofree(so
, kp
);
2131 /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */
2133 soopt_getm(struct sockopt
*sopt
, struct mbuf
**mp
)
2135 struct mbuf
*m
, *m_prev
;
2136 int sopt_size
= sopt
->sopt_valsize
;
2138 MGET(m
, sopt
->sopt_p
? M_WAIT
: M_DONTWAIT
, MT_DATA
);
2141 if (sopt_size
> MLEN
) {
2142 MCLGET(m
, sopt
->sopt_p
? M_WAIT
: M_DONTWAIT
);
2143 if ((m
->m_flags
& M_EXT
) == 0) {
2147 m
->m_len
= min(MCLBYTES
, sopt_size
);
2149 m
->m_len
= min(MLEN
, sopt_size
);
2151 sopt_size
-= m
->m_len
;
2156 MGET(m
, sopt
->sopt_p
? M_WAIT
: M_DONTWAIT
, MT_DATA
);
2161 if (sopt_size
> MLEN
) {
2162 MCLGET(m
, sopt
->sopt_p
? M_WAIT
: M_DONTWAIT
);
2163 if ((m
->m_flags
& M_EXT
) == 0) {
2167 m
->m_len
= min(MCLBYTES
, sopt_size
);
2169 m
->m_len
= min(MLEN
, sopt_size
);
2171 sopt_size
-= m
->m_len
;
2178 /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */
2180 soopt_mcopyin(struct sockopt
*sopt
, struct mbuf
*m
)
2182 struct mbuf
*m0
= m
;
2184 if (sopt
->sopt_val
== NULL
)
2186 while (m
!= NULL
&& sopt
->sopt_valsize
>= m
->m_len
) {
2187 if (sopt
->sopt_p
!= NULL
) {
2190 error
= copyin(sopt
->sopt_val
, mtod(m
, char *),
2197 bcopy(sopt
->sopt_val
, mtod(m
, char *), m
->m_len
);
2198 sopt
->sopt_valsize
-= m
->m_len
;
2199 (caddr_t
)sopt
->sopt_val
+= m
->m_len
;
2202 if (m
!= NULL
) /* should be allocated enoughly at ip6_sooptmcopyin() */
2203 panic("soopt_mcopyin");
2207 /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */
2209 soopt_mcopyout(struct sockopt
*sopt
, struct mbuf
*m
)
2211 struct mbuf
*m0
= m
;
2214 if (sopt
->sopt_val
== NULL
)
2216 while (m
!= NULL
&& sopt
->sopt_valsize
>= m
->m_len
) {
2217 if (sopt
->sopt_p
!= NULL
) {
2220 error
= copyout(mtod(m
, char *), sopt
->sopt_val
,
2227 bcopy(mtod(m
, char *), sopt
->sopt_val
, m
->m_len
);
2228 sopt
->sopt_valsize
-= m
->m_len
;
2229 (caddr_t
)sopt
->sopt_val
+= m
->m_len
;
2230 valsize
+= m
->m_len
;
2234 /* enough soopt buffer should be given from user-land */
2238 sopt
->sopt_valsize
= valsize
;
2244 register struct socket
*so
;
2249 kp
= sotokextcb(so
);
2251 if (kp
->e_soif
&& kp
->e_soif
->sf_sohasoutofband
) {
2252 if ((*kp
->e_soif
->sf_sohasoutofband
)(so
, kp
))
2257 if (so
->so_pgid
< 0)
2258 gsignal(-so
->so_pgid
, SIGURG
);
2259 else if (so
->so_pgid
> 0 && (p
= pfind(so
->so_pgid
)) != 0)
2261 selwakeup(&so
->so_rcv
.sb_sel
);
2265 sopoll(struct socket
*so
, int events
, struct ucred
*cred
, void * wql
)
2267 struct proc
*p
= current_proc();
2271 if (events
& (POLLIN
| POLLRDNORM
))
2273 revents
|= events
& (POLLIN
| POLLRDNORM
);
2275 if (events
& (POLLOUT
| POLLWRNORM
))
2276 if (sowriteable(so
))
2277 revents
|= events
& (POLLOUT
| POLLWRNORM
);
2279 if (events
& (POLLPRI
| POLLRDBAND
))
2280 if (so
->so_oobmark
|| (so
->so_state
& SS_RCVATMARK
))
2281 revents
|= events
& (POLLPRI
| POLLRDBAND
);
2284 if (events
& (POLLIN
| POLLPRI
| POLLRDNORM
| POLLRDBAND
)) {
2285 /* Darwin sets the flag first, BSD calls selrecord first */
2286 so
->so_rcv
.sb_flags
|= SB_SEL
;
2287 selrecord(p
, &so
->so_rcv
.sb_sel
, wql
);
2290 if (events
& (POLLOUT
| POLLWRNORM
)) {
2291 /* Darwin sets the flag first, BSD calls selrecord first */
2292 so
->so_snd
.sb_flags
|= SB_SEL
;
2293 selrecord(p
, &so
->so_snd
.sb_sel
, wql
);