]>
git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_socket.c
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
22 /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
23 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
25 * Copyright (c) 1982, 1986, 1988, 1990, 1993
26 * The Regents of the University of California. All rights reserved.
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
31 * 1. Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in the
35 * documentation and/or other materials provided with the distribution.
36 * 3. All advertising materials mentioning features or use of this software
37 * must display the following acknowledgement:
38 * This product includes software developed by the University of
39 * California, Berkeley and its contributors.
40 * 4. Neither the name of the University nor the names of its contributors
41 * may be used to endorse or promote products derived from this software
42 * without specific prior written permission.
44 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
45 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
46 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
47 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
48 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
49 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
50 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
51 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
52 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
53 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
56 * @(#)uipc_socket.c 8.6 (Berkeley) 5/2/95
59 #include <sys/param.h>
60 #include <sys/systm.h>
62 #include <sys/fcntl.h>
63 #include <sys/malloc.h>
65 #include <sys/domain.h>
66 #include <sys/kernel.h>
68 #include <sys/protosw.h>
69 #include <sys/socket.h>
70 #include <sys/socketvar.h>
71 #include <sys/resourcevar.h>
72 #include <sys/signalvar.h>
73 #include <sys/sysctl.h>
76 #include <sys/kdebug.h>
77 #include <net/route.h>
78 #include <netinet/in.h>
79 #include <netinet/in_pcb.h>
80 #include <kern/zalloc.h>
81 #include <machine/limits.h>
84 int so_cache_timeouts
= 0;
85 int so_cache_max_freed
= 0;
86 int cached_sock_count
= 0;
87 struct socket
*socket_cache_head
= 0;
88 struct socket
*socket_cache_tail
= 0;
89 u_long so_cache_time
= 0;
90 int so_cache_init_done
= 0;
91 struct zone
*so_cache_zone
;
92 extern int get_inpcb_str_size();
93 extern int get_tcp_str_size();
95 #include <machine/limits.h>
98 int socket_zone
= M_SOCKET
;
99 so_gen_t so_gencnt
; /* generation count for sockets */
101 MALLOC_DEFINE(M_SONAME
, "soname", "socket name");
102 MALLOC_DEFINE(M_PCB
, "pcb", "protocol control block");
104 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
105 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
106 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
107 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
108 #define DBG_FNC_SOSEND NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1)
109 #define DBG_FNC_SORECEIVE NETDBG_CODE(DBG_NETSOCK, (8 << 8))
110 #define DBG_FNC_SOSHUTDOWN NETDBG_CODE(DBG_NETSOCK, (9 << 8))
113 SYSCTL_DECL(_kern_ipc
);
115 static int somaxconn
= SOMAXCONN
;
116 SYSCTL_INT(_kern_ipc
, KIPC_SOMAXCONN
, somaxconn
, CTLFLAG_RW
, &somaxconn
,
119 /* Should we get a maximum also ??? */
120 static int sosendminchain
= 16384;
121 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sosendminchain
, CTLFLAG_RW
, &sosendminchain
,
124 void so_cache_timer();
127 * Socket operation routines.
128 * These routines are called by the routines in
129 * sys_socket.c or from a system process, and
130 * implement the semantics of socket operations by
131 * switching out to the protocol specific routines.
138 so_cache_init_done
= 1;
140 timeout(so_cache_timer
, NULL
, (SO_CACHE_FLUSH_INTERVAL
* hz
));
141 str_size
= (vm_size_t
)( sizeof(struct socket
) + 4 +
142 get_inpcb_str_size() + 4 +
144 so_cache_zone
= zinit (str_size
, 120000*str_size
, 8192, "socache zone");
146 kprintf("cached_sock_alloc -- so_cache_zone size is %x\n", str_size
);
151 void cached_sock_alloc(so
, waitok
)
158 register u_long offset
;
162 if (cached_sock_count
) {
164 *so
= socket_cache_head
;
166 panic("cached_sock_alloc: cached sock is null");
168 socket_cache_head
= socket_cache_head
->cache_next
;
169 if (socket_cache_head
)
170 socket_cache_head
->cache_prev
= 0;
172 socket_cache_tail
= 0;
175 temp
= (*so
)->so_saved_pcb
;
176 bzero((caddr_t
)*so
, sizeof(struct socket
));
178 kprintf("cached_sock_alloc - retreiving cached sock %x - count == %d\n", *so
,
181 (*so
)->so_saved_pcb
= temp
;
185 kprintf("Allocating cached sock %x from memory\n", *so
);
190 *so
= (struct socket
*) zalloc(so_cache_zone
);
192 *so
= (struct socket
*) zalloc_noblock(so_cache_zone
);
197 bzero((caddr_t
)*so
, sizeof(struct socket
));
200 * Define offsets for extra structures into our single block of
201 * memory. Align extra structures on longword boundaries.
205 offset
= (u_long
) *so
;
206 offset
+= sizeof(struct socket
);
209 offset
&= 0xfffffffc;
211 (*so
)->so_saved_pcb
= (caddr_t
) offset
;
212 offset
+= get_inpcb_str_size();
215 offset
&= 0xfffffffc;
218 ((struct inpcb
*) (*so
)->so_saved_pcb
)->inp_saved_ppcb
= (caddr_t
) offset
;
220 kprintf("Allocating cached socket - %x, pcb=%x tcpcb=%x\n", *so
,
222 ((struct inpcb
*)(*so
)->so_saved_pcb
)->inp_saved_ppcb
);
226 (*so
)->cached_in_sock_layer
= 1;
230 void cached_sock_free(so
)
237 if (++cached_sock_count
> MAX_CACHED_SOCKETS
) {
241 kprintf("Freeing overflowed cached socket %x\n", so
);
243 zfree(so_cache_zone
, (vm_offset_t
) so
);
247 kprintf("Freeing socket %x into cache\n", so
);
249 if (so_cache_hw
< cached_sock_count
)
250 so_cache_hw
= cached_sock_count
;
252 so
->cache_next
= socket_cache_head
;
254 if (socket_cache_head
)
255 socket_cache_head
->cache_prev
= so
;
257 socket_cache_tail
= so
;
259 so
->cache_timestamp
= so_cache_time
;
260 socket_cache_head
= so
;
265 kprintf("Freed cached sock %x into cache - count is %d\n", so
, cached_sock_count
);
272 void so_cache_timer()
274 register struct socket
*p
;
276 register int n_freed
= 0;
277 boolean_t funnel_state
;
279 funnel_state
= thread_funnel_set(network_flock
, TRUE
);
285 while (p
= socket_cache_tail
)
287 if ((so_cache_time
- p
->cache_timestamp
) < SO_CACHE_TIME_LIMIT
)
292 if (socket_cache_tail
= p
->cache_prev
)
293 p
->cache_prev
->cache_next
= 0;
294 if (--cached_sock_count
== 0)
295 socket_cache_head
= 0;
299 zfree(so_cache_zone
, (vm_offset_t
) p
);
302 if (++n_freed
>= SO_CACHE_MAX_FREE_BATCH
)
304 so_cache_max_freed
++;
310 timeout(so_cache_timer
, NULL
, (SO_CACHE_FLUSH_INTERVAL
* hz
));
312 (void) thread_funnel_set(network_flock
, FALSE
);
318 * Get a socket structure from our zone, and initialize it.
319 * We don't implement `waitok' yet (see comments in uipc_domain.c).
320 * Note that it would probably be better to allocate socket
321 * and PCB at the same time, but I'm not convinced that all
322 * the protocols can be easily modified to do this.
325 soalloc(waitok
, dom
, type
)
332 if ((dom
== PF_INET
) && (type
== SOCK_STREAM
))
333 cached_sock_alloc(&so
, waitok
);
336 so
= _MALLOC_ZONE(sizeof(*so
), socket_zone
, M_WAITOK
);
338 bzero(so
, sizeof *so
);
340 /* XXX race condition for reentrant kernel */
343 so
->so_gencnt
= ++so_gencnt
;
344 so
->so_zone
= socket_zone
;
351 socreate(dom
, aso
, type
, proto
)
358 struct proc
*p
= current_proc();
359 register struct protosw
*prp
;
361 register int error
= 0;
364 prp
= pffindproto(dom
, proto
, type
);
366 prp
= pffindtype(dom
, type
);
367 if (prp
== 0 || prp
->pr_usrreqs
->pru_attach
== 0)
368 return (EPROTONOSUPPORT
);
369 if (prp
->pr_type
!= type
)
371 so
= soalloc(p
!= 0, dom
, type
);
375 TAILQ_INIT(&so
->so_incomp
);
376 TAILQ_INIT(&so
->so_comp
);
380 if (p
->p_ucred
->cr_uid
== 0)
381 so
->so_state
= SS_PRIV
;
383 so
->so_uid
= p
->p_ucred
->cr_uid
;
387 so
->so_rcv
.sb_flags
|= SB_RECV
; /* XXX */
388 if (prp
->pr_sfilter
.tqh_first
)
389 error
= sfilter_init(so
);
391 error
= (*prp
->pr_usrreqs
->pru_attach
)(so
, proto
, p
);
394 so
->so_state
|= SS_NOFDREF
;
398 prp
->pr_domain
->dom_refs
++;
399 so
->so_rcv
.sb_so
= so
->so_snd
.sb_so
= so
;
400 TAILQ_INIT(&so
->so_evlist
);
408 struct sockaddr
*nam
;
411 struct proc
*p
= current_proc();
416 error
= (*so
->so_proto
->pr_usrreqs
->pru_bind
)(so
, nam
, p
);
417 if (error
== 0) /* ??? */
418 { kp
= sotokextcb(so
);
420 { if (kp
->e_soif
&& kp
->e_soif
->sf_sobind
)
421 { error
= (*kp
->e_soif
->sf_sobind
)(so
, nam
, kp
);
423 { if (error
== EJUSTRETURN
)
440 so
->so_gencnt
= ++so_gencnt
;
442 if (so
->cached_in_sock_layer
== 1)
443 cached_sock_free(so
);
445 _FREE_ZONE(so
, sizeof(*so
), so
->so_zone
);
449 solisten(so
, backlog
)
450 register struct socket
*so
;
455 struct proc
*p
= current_proc();
459 error
= (*so
->so_proto
->pr_usrreqs
->pru_listen
)(so
, p
);
464 if (TAILQ_EMPTY(&so
->so_comp
))
465 so
->so_options
|= SO_ACCEPTCONN
;
466 if (backlog
< 0 || backlog
> somaxconn
)
468 so
->so_qlimit
= backlog
;
472 if (kp
->e_soif
&& kp
->e_soif
->sf_solisten
)
473 { error
= (*kp
->e_soif
->sf_solisten
)(so
, kp
);
475 { if (error
== EJUSTRETURN
)
491 register struct socket
*so
;
494 struct socket
*head
= so
->so_head
;
498 { if (kp
->e_soif
&& kp
->e_soif
->sf_sofree
)
499 { error
= (*kp
->e_soif
->sf_sofree
)(so
, kp
);
501 selthreadclear(&so
->so_snd
.sb_sel
);
502 selthreadclear(&so
->so_rcv
.sb_sel
);
503 return; /* void fn */
509 if (so
->so_pcb
|| (so
->so_state
& SS_NOFDREF
) == 0) {
510 selthreadclear(&so
->so_snd
.sb_sel
);
511 selthreadclear(&so
->so_rcv
.sb_sel
);
515 if (so
->so_state
& SS_INCOMP
) {
516 TAILQ_REMOVE(&head
->so_incomp
, so
, so_list
);
518 } else if (so
->so_state
& SS_COMP
) {
520 * We must not decommission a socket that's
521 * on the accept(2) queue. If we do, then
522 * accept(2) may hang after select(2) indicated
523 * that the listening socket was ready.
525 selthreadclear(&so
->so_snd
.sb_sel
);
526 selthreadclear(&so
->so_rcv
.sb_sel
);
529 panic("sofree: not queued");
532 so
->so_state
&= ~(SS_INCOMP
|SS_COMP
);
536 selthreadclear(&so
->so_snd
.sb_sel
);
537 sbrelease(&so
->so_snd
);
544 * Close a socket on last file table reference removal.
545 * Initiate disconnect if connected.
546 * Free socket when disconnect complete.
550 register struct socket
*so
;
552 int s
= splnet(); /* conservative */
557 funsetown(so
->so_pgid
);
561 { if (kp
->e_soif
&& kp
->e_soif
->sf_soclose
)
562 { error
= (*kp
->e_soif
->sf_soclose
)(so
, kp
);
565 return((error
== EJUSTRETURN
) ? 0 : error
);
571 if (so
->so_options
& SO_ACCEPTCONN
) {
572 struct socket
*sp
, *sonext
;
574 sp
= TAILQ_FIRST(&so
->so_incomp
);
575 for (; sp
!= NULL
; sp
= sonext
) {
576 sonext
= TAILQ_NEXT(sp
, so_list
);
579 for (sp
= TAILQ_FIRST(&so
->so_comp
); sp
!= NULL
; sp
= sonext
) {
580 sonext
= TAILQ_NEXT(sp
, so_list
);
581 /* Dequeue from so_comp since sofree() won't do it */
582 TAILQ_REMOVE(&so
->so_comp
, sp
, so_list
);
584 sp
->so_state
&= ~SS_COMP
;
592 if (so
->so_state
& SS_ISCONNECTED
) {
593 if ((so
->so_state
& SS_ISDISCONNECTING
) == 0) {
594 error
= sodisconnect(so
);
598 if (so
->so_options
& SO_LINGER
) {
599 if ((so
->so_state
& SS_ISDISCONNECTING
) &&
600 (so
->so_state
& SS_NBIO
))
602 while (so
->so_state
& SS_ISCONNECTED
) {
603 error
= tsleep((caddr_t
)&so
->so_timeo
,
604 PSOCK
| PCATCH
, "soclos", so
->so_linger
);
612 int error2
= (*so
->so_proto
->pr_usrreqs
->pru_detach
)(so
);
617 if (so
->so_pcb
&& so
->so_state
& SS_NOFDREF
)
618 panic("soclose: NOFDREF");
619 so
->so_state
|= SS_NOFDREF
;
620 so
->so_proto
->pr_domain
->dom_refs
--;
628 * Must be called at splnet...
635 return (*so
->so_proto
->pr_usrreqs
->pru_abort
)(so
);
640 register struct socket
*so
;
641 struct sockaddr
**nam
;
646 if ((so
->so_state
& SS_NOFDREF
) == 0)
647 panic("soaccept: !NOFDREF");
648 so
->so_state
&= ~SS_NOFDREF
;
649 error
= (*so
->so_proto
->pr_usrreqs
->pru_accept
)(so
, nam
);
651 { kp
= sotokextcb(so
);
653 if (kp
->e_soif
&& kp
->e_soif
->sf_soaccept
)
654 { error
= (*kp
->e_soif
->sf_soaccept
)(so
, nam
, kp
);
656 { if (error
== EJUSTRETURN
)
673 register struct socket
*so
;
674 struct sockaddr
*nam
;
679 struct proc
*p
= current_proc();
682 if (so
->so_options
& SO_ACCEPTCONN
)
686 * If protocol is connection-based, can only connect once.
687 * Otherwise, if connected, try to disconnect first.
688 * This allows user to disconnect by connecting to, e.g.,
691 if (so
->so_state
& (SS_ISCONNECTED
|SS_ISCONNECTING
) &&
692 ((so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) ||
693 (error
= sodisconnect(so
))))
696 error
= (*so
->so_proto
->pr_usrreqs
->pru_connect
)(so
, nam
, p
);
702 if (kp
->e_soif
&& kp
->e_soif
->sf_soconnect
)
703 { error
= (*kp
->e_soif
->sf_soconnect
)(so
, nam
, kp
);
705 { if (error
== EJUSTRETURN
)
722 register struct socket
*so1
;
729 error
= (*so1
->so_proto
->pr_usrreqs
->pru_connect2
)(so1
, so2
);
731 { kp
= sotokextcb(so1
);
733 { if (kp
->e_soif
&& kp
->e_soif
->sf_soconnect2
)
734 { error
= (*kp
->e_soif
->sf_soconnect2
)(so1
, so2
, kp
);
736 { if (error
== EJUSTRETURN
)
751 register struct socket
*so
;
757 if ((so
->so_state
& SS_ISCONNECTED
) == 0) {
761 if (so
->so_state
& SS_ISDISCONNECTING
) {
765 error
= (*so
->so_proto
->pr_usrreqs
->pru_disconnect
)(so
);
768 { kp
= sotokextcb(so
);
770 { if (kp
->e_soif
&& kp
->e_soif
->sf_sodisconnect
)
771 { error
= (*kp
->e_soif
->sf_sodisconnect
)(so
, kp
);
773 { if (error
== EJUSTRETURN
)
788 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_DONTWAIT : M_WAIT)
791 * If send must go all at once and message is larger than
792 * send buffering, then hard error.
793 * Lock against other senders.
794 * If must go all at once and not enough room now, then
795 * inform user that this would block and do nothing.
796 * Otherwise, if nonblocking, send as much as possible.
797 * The data to be sent is described by "uio" if nonzero,
798 * otherwise by the mbuf chain "top" (which must be null
799 * if uio is not). Data provided in mbuf chain must be small
800 * enough to send all at once.
802 * Returns nonzero on error, timeout or signal; callers
803 * must check for short counts if EINTR/ERESTART are returned.
804 * Data and control buffers are freed on return.
806 * MSG_HOLD: go thru most of sosend(), but just enqueue the mbuf
807 * MSG_SEND: go thru as for MSG_HOLD on current fragment, then
808 * point at the mbuf chain being constructed and go from there.
811 sosend(so
, addr
, uio
, top
, control
, flags
)
812 register struct socket
*so
;
813 struct sockaddr
*addr
;
816 struct mbuf
*control
;
821 register struct mbuf
*m
;
822 register long space
, len
, resid
;
823 int clen
= 0, error
, s
, dontroute
, mlen
, sendflags
;
824 int atomic
= sosendallatonce(so
) || top
;
825 struct proc
*p
= current_proc();
829 resid
= uio
->uio_resid
;
831 resid
= top
->m_pkthdr
.len
;
833 KERNEL_DEBUG((DBG_FNC_SOSEND
| DBG_FUNC_START
),
838 so
->so_snd
.sb_hiwat
);
841 * In theory resid should be unsigned.
842 * However, space must be signed, as it might be less than 0
843 * if we over-committed, and we must use a signed comparison
844 * of space and resid. On the other hand, a negative resid
845 * causes us to loop sending 0-length segments to the protocol.
847 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
848 * type sockets since that's an error.
850 if (resid
< 0 || so
->so_type
== SOCK_STREAM
&& (flags
& MSG_EOR
)) {
856 (flags
& MSG_DONTROUTE
) && (so
->so_options
& SO_DONTROUTE
) == 0 &&
857 (so
->so_proto
->pr_flags
& PR_ATOMIC
);
859 p
->p_stats
->p_ru
.ru_msgsnd
++;
861 clen
= control
->m_len
;
862 #define snderr(errno) { error = errno; splx(s); goto release; }
865 error
= sblock(&so
->so_snd
, SBLOCKWAIT(flags
));
870 if (so
->so_state
& SS_CANTSENDMORE
)
873 error
= so
->so_error
;
878 if ((so
->so_state
& SS_ISCONNECTED
) == 0) {
880 * `sendto' and `sendmsg' is allowed on a connection-
881 * based socket if it supports implied connect.
882 * Return ENOTCONN if not connected and no address is
885 if ((so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) &&
886 (so
->so_proto
->pr_flags
& PR_IMPLOPCL
) == 0) {
887 if ((so
->so_state
& SS_ISCONFIRMING
) == 0 &&
888 !(resid
== 0 && clen
!= 0))
890 } else if (addr
== 0 && !(flags
&MSG_HOLD
))
891 snderr(so
->so_proto
->pr_flags
& PR_CONNREQUIRED
?
892 ENOTCONN
: EDESTADDRREQ
);
894 space
= sbspace(&so
->so_snd
);
897 if ((atomic
&& resid
> so
->so_snd
.sb_hiwat
) ||
898 clen
> so
->so_snd
.sb_hiwat
)
900 if (space
< resid
+ clen
&& uio
&&
901 (atomic
|| space
< so
->so_snd
.sb_lowat
|| space
< clen
)) {
902 if (so
->so_state
& SS_NBIO
)
904 sbunlock(&so
->so_snd
);
905 error
= sbwait(&so
->so_snd
);
917 * Data is prepackaged in "top".
921 top
->m_flags
|= M_EOR
;
923 boolean_t funnel_state
= TRUE
;
924 int chainmbufs
= (sosendminchain
> 0 && resid
>= sosendminchain
);
927 funnel_state
= thread_funnel_set(network_flock
, FALSE
);
929 KERNEL_DEBUG(DBG_FNC_SOSEND
| DBG_FUNC_NONE
, -1, 0, 0, 0, 0);
931 MGETHDR(m
, M_WAIT
, MT_DATA
);
934 m
->m_pkthdr
.rcvif
= (struct ifnet
*)0;
936 MGET(m
, M_WAIT
, MT_DATA
);
939 if (resid
>= MINCLSIZE
) {
941 if ((m
->m_flags
& M_EXT
) == 0)
944 len
= min(min(mlen
, resid
), space
);
947 len
= min(min(mlen
, resid
), space
);
949 * For datagram protocols, leave room
950 * for protocol headers in first mbuf.
952 if (atomic
&& top
== 0 && len
< mlen
)
955 KERNEL_DEBUG(DBG_FNC_SOSEND
| DBG_FUNC_NONE
, -1, 0, 0, 0, 0);
957 error
= uiomove(mtod(m
, caddr_t
), (int)len
, uio
);
958 resid
= uio
->uio_resid
;
962 top
->m_pkthdr
.len
+= len
;
968 top
->m_flags
|= M_EOR
;
971 } while (space
> 0 && (chainmbufs
|| atomic
|| resid
< MINCLSIZE
));
973 funnel_state
= thread_funnel_set(network_flock
, TRUE
);
978 if (flags
& (MSG_HOLD
|MSG_SEND
))
979 { /* Enqueue for later, go away if HOLD */
980 register struct mbuf
*mb1
;
981 if (so
->so_temp
&& (flags
& MSG_FLUSH
))
982 { m_freem(so
->so_temp
);
986 so
->so_tail
->m_next
= top
;
1000 so
->so_options
|= SO_DONTROUTE
;
1001 s
= splnet(); /* XXX */
1002 kp
= sotokextcb(so
);
1003 /* Compute flags here, for pru_send and NKEs */
1004 sendflags
= (flags
& MSG_OOB
) ? PRUS_OOB
:
1006 * If the user set MSG_EOF, the protocol
1007 * understands this flag and nothing left to
1008 * send then use PRU_SEND_EOF instead of PRU_SEND.
1010 ((flags
& MSG_EOF
) &&
1011 (so
->so_proto
->pr_flags
& PR_IMPLOPCL
) &&
1014 /* If there is more to send set PRUS_MORETOCOME */
1015 (resid
> 0 && space
> 0) ? PRUS_MORETOCOME
: 0;
1017 { if (kp
->e_soif
&& kp
->e_soif
->sf_sosend
)
1018 { error
= (*kp
->e_soif
->sf_sosend
)(so
, &addr
,
1025 if (error
== EJUSTRETURN
)
1026 { sbunlock(&so
->so_snd
);
1035 error
= (*so
->so_proto
->pr_usrreqs
->pru_send
)(so
,
1036 sendflags
, top
, addr
, control
, p
);
1038 if (flags
& MSG_SEND
)
1042 so
->so_options
&= ~SO_DONTROUTE
;
1049 } while (resid
&& space
> 0);
1053 sbunlock(&so
->so_snd
);
1060 KERNEL_DEBUG(DBG_FNC_SOSEND
| DBG_FUNC_END
,
1071 * Implement receive operations on a socket.
1072 * We depend on the way that records are added to the sockbuf
1073 * by sbappend*. In particular, each record (mbufs linked through m_next)
1074 * must begin with an address if the protocol so specifies,
1075 * followed by an optional mbuf or mbufs containing ancillary data,
1076 * and then zero or more mbufs of data.
1077 * In order to avoid blocking network interrupts for the entire time here,
1078 * we splx() while doing the actual copy to user space.
1079 * Although the sockbuf is locked, new data may still be appended,
1080 * and thus we must maintain consistency of the sockbuf during that time.
1082 * The caller may receive the data as a single mbuf chain by supplying
1083 * an mbuf **mp0 for use in returning the chain. The uio is then used
1084 * only for the count in uio_resid.
1087 soreceive(so
, psa
, uio
, mp0
, controlp
, flagsp
)
1088 register struct socket
*so
;
1089 struct sockaddr
**psa
;
1092 struct mbuf
**controlp
;
1095 register struct mbuf
*m
, **mp
;
1096 register int flags
, len
, error
, s
, offset
;
1097 struct protosw
*pr
= so
->so_proto
;
1098 struct mbuf
*nextrecord
;
1100 int orig_resid
= uio
->uio_resid
;
1103 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_START
,
1107 so
->so_rcv
.sb_lowat
,
1108 so
->so_rcv
.sb_hiwat
);
1110 kp
= sotokextcb(so
);
1112 { if (kp
->e_soif
&& kp
->e_soif
->sf_soreceive
)
1113 { error
= (*kp
->e_soif
->sf_soreceive
)(so
, psa
, &uio
,
1117 return((error
== EJUSTRETURN
) ? 0 : error
);
1128 flags
= *flagsp
&~ MSG_EOR
;
1132 * When SO_WANTOOBFLAG is set we try to get out-of-band data
1133 * regardless of the flags argument. Here is the case were
1134 * out-of-band data is not inline.
1136 if ((flags
& MSG_OOB
) ||
1137 ((so
->so_options
& SO_WANTOOBFLAG
) != 0 &&
1138 (so
->so_options
& SO_OOBINLINE
) == 0 &&
1139 (so
->so_oobmark
|| (so
->so_state
& SS_RCVATMARK
)))) {
1140 m
= m_get(M_WAIT
, MT_DATA
);
1141 error
= (*pr
->pr_usrreqs
->pru_rcvoob
)(so
, m
, flags
& MSG_PEEK
);
1145 error
= uiomove(mtod(m
, caddr_t
),
1146 (int) min(uio
->uio_resid
, m
->m_len
), uio
);
1148 } while (uio
->uio_resid
&& error
== 0 && m
);
1152 if ((so
->so_options
& SO_WANTOOBFLAG
) != 0) {
1153 if (error
== EWOULDBLOCK
|| error
== EINVAL
) {
1155 * Let's try to get normal data:
1156 * EWOULDBLOCK: out-of-band data not receive yet;
1157 * EINVAL: out-of-band data already read.
1161 } else if (error
== 0 && flagsp
)
1164 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_END
, error
,0,0,0,0);
1169 *mp
= (struct mbuf
*)0;
1170 if (so
->so_state
& SS_ISCONFIRMING
&& uio
->uio_resid
)
1171 (*pr
->pr_usrreqs
->pru_rcvd
)(so
, 0);
1174 if (error
= sblock(&so
->so_rcv
, SBLOCKWAIT(flags
)))
1176 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_END
, error
,0,0,0,0);
1181 m
= so
->so_rcv
.sb_mb
;
1183 * If we have less data than requested, block awaiting more
1184 * (subject to any timeout) if:
1185 * 1. the current count is less than the low water mark, or
1186 * 2. MSG_WAITALL is set, and it is possible to do the entire
1187 * receive operation at once if we block (resid <= hiwat).
1188 * 3. MSG_DONTWAIT is not set
1189 * If MSG_WAITALL is set but resid is larger than the receive buffer,
1190 * we have to do the receive in sections, and thus risk returning
1191 * a short count if a timeout or signal occurs after we start.
1193 if (m
== 0 || (((flags
& MSG_DONTWAIT
) == 0 &&
1194 so
->so_rcv
.sb_cc
< uio
->uio_resid
) &&
1195 (so
->so_rcv
.sb_cc
< so
->so_rcv
.sb_lowat
||
1196 ((flags
& MSG_WAITALL
) && uio
->uio_resid
<= so
->so_rcv
.sb_hiwat
)) &&
1197 m
->m_nextpkt
== 0 && (pr
->pr_flags
& PR_ATOMIC
) == 0)) {
1198 KASSERT(m
!= 0 || !so
->so_rcv
.sb_cc
, ("receive 1"));
1202 error
= so
->so_error
;
1203 if ((flags
& MSG_PEEK
) == 0)
1207 if (so
->so_state
& SS_CANTRCVMORE
) {
1213 for (; m
; m
= m
->m_next
)
1214 if (m
->m_type
== MT_OOBDATA
|| (m
->m_flags
& M_EOR
)) {
1215 m
= so
->so_rcv
.sb_mb
;
1218 if ((so
->so_state
& (SS_ISCONNECTED
|SS_ISCONNECTING
)) == 0 &&
1219 (so
->so_proto
->pr_flags
& PR_CONNREQUIRED
)) {
1223 if (uio
->uio_resid
== 0)
1225 if ((so
->so_state
& SS_NBIO
) || (flags
& MSG_DONTWAIT
)) {
1226 error
= EWOULDBLOCK
;
1229 sbunlock(&so
->so_rcv
);
1231 printf("Waiting for socket data\n");
1232 error
= sbwait(&so
->so_rcv
);
1234 printf("SORECEIVE - sbwait returned %d\n", error
);
1238 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_END
, error
,0,0,0,0);
1244 #ifdef notyet /* XXXX */
1246 uio
->uio_procp
->p_stats
->p_ru
.ru_msgrcv
++;
1248 nextrecord
= m
->m_nextpkt
;
1249 if ((pr
->pr_flags
& PR_ADDR
) && m
->m_type
== MT_SONAME
) {
1250 KASSERT(m
->m_type
== MT_SONAME
, ("receive 1a"));
1253 *psa
= dup_sockaddr(mtod(m
, struct sockaddr
*),
1255 if (flags
& MSG_PEEK
) {
1258 sbfree(&so
->so_rcv
, m
);
1259 MFREE(m
, so
->so_rcv
.sb_mb
);
1260 m
= so
->so_rcv
.sb_mb
;
1263 while (m
&& m
->m_type
== MT_CONTROL
&& error
== 0) {
1264 if (flags
& MSG_PEEK
) {
1266 *controlp
= m_copy(m
, 0, m
->m_len
);
1269 sbfree(&so
->so_rcv
, m
);
1271 if (pr
->pr_domain
->dom_externalize
&&
1272 mtod(m
, struct cmsghdr
*)->cmsg_type
==
1274 error
= (*pr
->pr_domain
->dom_externalize
)(m
);
1276 so
->so_rcv
.sb_mb
= m
->m_next
;
1278 m
= so
->so_rcv
.sb_mb
;
1280 MFREE(m
, so
->so_rcv
.sb_mb
);
1281 m
= so
->so_rcv
.sb_mb
;
1286 controlp
= &(*controlp
)->m_next
;
1290 if ((flags
& MSG_PEEK
) == 0)
1291 m
->m_nextpkt
= nextrecord
;
1293 if (type
== MT_OOBDATA
)
1298 while (m
&& uio
->uio_resid
> 0 && error
== 0) {
1299 if (m
->m_type
== MT_OOBDATA
) {
1300 if (type
!= MT_OOBDATA
)
1302 } else if (type
== MT_OOBDATA
)
1306 * This assertion needs rework. The trouble is Appletalk is uses many
1307 * mbuf types (NOT listed in mbuf.h!) which will trigger this panic.
1308 * For now just remove the assertion... CSM 9/98
1311 KASSERT(m
->m_type
== MT_DATA
|| m
->m_type
== MT_HEADER
,
1315 * Make sure to allways set MSG_OOB event when getting
1316 * out of band data inline.
1318 if ((so
->so_options
& SO_WANTOOBFLAG
) != 0 &&
1319 (so
->so_options
& SO_OOBINLINE
) != 0 &&
1320 (so
->so_state
& SS_RCVATMARK
) != 0) {
1323 so
->so_state
&= ~SS_RCVATMARK
;
1324 len
= uio
->uio_resid
;
1325 if (so
->so_oobmark
&& len
> so
->so_oobmark
- offset
)
1326 len
= so
->so_oobmark
- offset
;
1327 if (len
> m
->m_len
- moff
)
1328 len
= m
->m_len
- moff
;
1330 * If mp is set, just pass back the mbufs.
1331 * Otherwise copy them out via the uio, then free.
1332 * Sockbuf must be consistent here (points to current mbuf,
1333 * it points to next record) when we drop priority;
1334 * we must note any additions to the sockbuf when we
1335 * block interrupts again.
1339 error
= uiomove(mtod(m
, caddr_t
) + moff
, (int)len
, uio
);
1344 uio
->uio_resid
-= len
;
1345 if (len
== m
->m_len
- moff
) {
1346 if (m
->m_flags
& M_EOR
)
1348 if (flags
& MSG_PEEK
) {
1352 nextrecord
= m
->m_nextpkt
;
1353 sbfree(&so
->so_rcv
, m
);
1357 so
->so_rcv
.sb_mb
= m
= m
->m_next
;
1358 *mp
= (struct mbuf
*)0;
1360 MFREE(m
, so
->so_rcv
.sb_mb
);
1361 m
= so
->so_rcv
.sb_mb
;
1364 m
->m_nextpkt
= nextrecord
;
1367 if (flags
& MSG_PEEK
)
1371 *mp
= m_copym(m
, 0, len
, M_WAIT
);
1374 so
->so_rcv
.sb_cc
-= len
;
1377 if (so
->so_oobmark
) {
1378 if ((flags
& MSG_PEEK
) == 0) {
1379 so
->so_oobmark
-= len
;
1380 if (so
->so_oobmark
== 0) {
1381 so
->so_state
|= SS_RCVATMARK
;
1382 postevent(so
, 0, EV_OOB
);
1387 if (offset
== so
->so_oobmark
)
1391 if (flags
& MSG_EOR
)
1394 * If the MSG_WAITALL flag is set (for non-atomic socket),
1395 * we must not quit until "uio->uio_resid == 0" or an error
1396 * termination. If a signal/timeout occurs, return
1397 * with a short count but without error.
1398 * Keep sockbuf locked against other readers.
1400 while (flags
& MSG_WAITALL
&& m
== 0 && uio
->uio_resid
> 0 &&
1401 !sosendallatonce(so
) && !nextrecord
) {
1402 if (so
->so_error
|| so
->so_state
& SS_CANTRCVMORE
)
1404 error
= sbwait(&so
->so_rcv
);
1406 sbunlock(&so
->so_rcv
);
1408 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_END
, 0,0,0,0,0);
1411 m
= so
->so_rcv
.sb_mb
;
1413 nextrecord
= m
->m_nextpkt
;
1417 if (m
&& pr
->pr_flags
& PR_ATOMIC
) {
1418 if (so
->so_options
& SO_DONTTRUNC
)
1419 flags
|= MSG_RCVMORE
;
1421 { flags
|= MSG_TRUNC
;
1422 if ((flags
& MSG_PEEK
) == 0)
1423 (void) sbdroprecord(&so
->so_rcv
);
1426 if ((flags
& MSG_PEEK
) == 0) {
1428 so
->so_rcv
.sb_mb
= nextrecord
;
1429 if (pr
->pr_flags
& PR_WANTRCVD
&& so
->so_pcb
)
1430 (*pr
->pr_usrreqs
->pru_rcvd
)(so
, flags
);
1432 if ((so
->so_options
& SO_WANTMORE
) && so
->so_rcv
.sb_cc
> 0)
1433 flags
|= MSG_HAVEMORE
;
1434 if (orig_resid
== uio
->uio_resid
&& orig_resid
&&
1435 (flags
& MSG_EOR
) == 0 && (so
->so_state
& SS_CANTRCVMORE
) == 0) {
1436 sbunlock(&so
->so_rcv
);
1444 sbunlock(&so
->so_rcv
);
1447 KERNEL_DEBUG(DBG_FNC_SORECEIVE
| DBG_FUNC_END
,
1459 register struct socket
*so
;
1462 register struct protosw
*pr
= so
->so_proto
;
1467 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN
| DBG_FUNC_START
, 0,0,0,0,0);
1468 kp
= sotokextcb(so
);
1470 { if (kp
->e_soif
&& kp
->e_soif
->sf_soshutdown
)
1471 { ret
= (*kp
->e_soif
->sf_soshutdown
)(so
, how
, kp
);
1473 return((ret
== EJUSTRETURN
) ? 0 : ret
);
1481 postevent(so
, 0, EV_RCLOSED
);
1484 ret
= ((*pr
->pr_usrreqs
->pru_shutdown
)(so
));
1485 postevent(so
, 0, EV_WCLOSED
);
1486 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN
| DBG_FUNC_END
, 0,0,0,0,0);
1490 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN
| DBG_FUNC_END
, 0,0,0,0,0);
1496 register struct socket
*so
;
1498 register struct sockbuf
*sb
= &so
->so_rcv
;
1499 register struct protosw
*pr
= so
->so_proto
;
1500 register int s
, error
;
1504 kp
= sotokextcb(so
);
1506 { if (kp
->e_soif
&& kp
->e_soif
->sf_sorflush
)
1507 { if ((*kp
->e_soif
->sf_sorflush
)(so
, kp
))
1513 sb
->sb_flags
|= SB_NOINTR
;
1514 (void) sblock(sb
, M_WAIT
);
1518 selthreadclear(&sb
->sb_sel
);
1520 bzero((caddr_t
)sb
, sizeof (*sb
));
1522 if (pr
->pr_flags
& PR_RIGHTS
&& pr
->pr_domain
->dom_dispose
)
1523 (*pr
->pr_domain
->dom_dispose
)(asb
.sb_mb
);
1528 * Perhaps this routine, and sooptcopyout(), below, ought to come in
1529 * an additional variant to handle the case where the option value needs
1530 * to be some kind of integer, but not a specific size.
1531 * In addition to their use here, these functions are also called by the
1532 * protocol-level pr_ctloutput() routines.
1535 sooptcopyin(sopt
, buf
, len
, minlen
)
1536 struct sockopt
*sopt
;
1544 * If the user gives us more than we wanted, we ignore it,
1545 * but if we don't get the minimum length the caller
1546 * wants, we return EINVAL. On success, sopt->sopt_valsize
1547 * is set to however much we actually retrieved.
1549 if ((valsize
= sopt
->sopt_valsize
) < minlen
)
1552 sopt
->sopt_valsize
= valsize
= len
;
1554 if (sopt
->sopt_p
!= 0)
1555 return (copyin(sopt
->sopt_val
, buf
, valsize
));
1557 bcopy(sopt
->sopt_val
, buf
, valsize
);
1564 struct sockopt
*sopt
;
1572 kp
= sotokextcb(so
);
1574 { if (kp
->e_soif
&& kp
->e_soif
->sf_socontrol
)
1575 { error
= (*kp
->e_soif
->sf_socontrol
)(so
, sopt
, kp
);
1577 return((error
== EJUSTRETURN
) ? 0 : error
);
1583 if (sopt
->sopt_level
!= SOL_SOCKET
) {
1584 if (so
->so_proto
&& so
->so_proto
->pr_ctloutput
)
1585 return ((*so
->so_proto
->pr_ctloutput
)
1587 error
= ENOPROTOOPT
;
1589 switch (sopt
->sopt_name
) {
1591 error
= sooptcopyin(sopt
, &l
, sizeof l
, sizeof l
);
1595 so
->so_linger
= l
.l_linger
;
1597 so
->so_options
|= SO_LINGER
;
1599 so
->so_options
&= ~SO_LINGER
;
1605 case SO_USELOOPBACK
:
1613 case SO_WANTOOBFLAG
:
1614 error
= sooptcopyin(sopt
, &optval
, sizeof optval
,
1619 so
->so_options
|= sopt
->sopt_name
;
1621 so
->so_options
&= ~sopt
->sopt_name
;
1628 error
= sooptcopyin(sopt
, &optval
, sizeof optval
,
1634 * Values < 1 make no sense for any of these
1635 * options, so disallow them.
1642 switch (sopt
->sopt_name
) {
1645 if (sbreserve(sopt
->sopt_name
== SO_SNDBUF
?
1646 &so
->so_snd
: &so
->so_rcv
,
1647 (u_long
) optval
) == 0) {
1654 * Make sure the low-water is never greater than
1658 so
->so_snd
.sb_lowat
=
1659 (optval
> so
->so_snd
.sb_hiwat
) ?
1660 so
->so_snd
.sb_hiwat
: optval
;
1663 so
->so_rcv
.sb_lowat
=
1664 (optval
> so
->so_rcv
.sb_hiwat
) ?
1665 so
->so_rcv
.sb_hiwat
: optval
;
1672 error
= sooptcopyin(sopt
, &tv
, sizeof tv
,
1677 if (tv
.tv_sec
> SHRT_MAX
/ hz
- hz
) {
1681 val
= tv
.tv_sec
* hz
+ tv
.tv_usec
/ tick
;
1683 switch (sopt
->sopt_name
) {
1685 so
->so_snd
.sb_timeo
= val
;
1688 so
->so_rcv
.sb_timeo
= val
;
1694 { struct so_nke nke
;
1695 struct NFDescriptor
*nf1
, *nf2
= NULL
;
1697 error
= sooptcopyin(sopt
, &nke
,
1698 sizeof nke
, sizeof nke
);
1702 error
= nke_insert(so
, &nke
);
1707 error
= ENOPROTOOPT
;
1710 if (error
== 0 && so
->so_proto
&& so
->so_proto
->pr_ctloutput
) {
1711 (void) ((*so
->so_proto
->pr_ctloutput
)
1719 /* Helper routine for getsockopt */
1721 sooptcopyout(sopt
, buf
, len
)
1722 struct sockopt
*sopt
;
1732 * Documented get behavior is that we always return a value,
1733 * possibly truncated to fit in the user's buffer.
1734 * Traditional behavior is that we always tell the user
1735 * precisely how much we copied, rather than something useful
1736 * like the total amount we had available for her.
1737 * Note that this interface is not idempotent; the entire answer must
1738 * generated ahead of time.
1740 valsize
= min(len
, sopt
->sopt_valsize
);
1741 sopt
->sopt_valsize
= valsize
;
1742 if (sopt
->sopt_val
!= 0) {
1743 if (sopt
->sopt_p
!= 0)
1744 error
= copyout(buf
, sopt
->sopt_val
, valsize
);
1746 bcopy(buf
, sopt
->sopt_val
, valsize
);
1754 struct sockopt
*sopt
;
1762 kp
= sotokextcb(so
);
1764 { if (kp
->e_soif
&& kp
->e_soif
->sf_socontrol
)
1765 { error
= (*kp
->e_soif
->sf_socontrol
)(so
, sopt
, kp
);
1767 return((error
== EJUSTRETURN
) ? 0 : error
);
1773 if (sopt
->sopt_level
!= SOL_SOCKET
) {
1774 if (so
->so_proto
&& so
->so_proto
->pr_ctloutput
) {
1775 return ((*so
->so_proto
->pr_ctloutput
)
1778 return (ENOPROTOOPT
);
1780 switch (sopt
->sopt_name
) {
1782 l
.l_onoff
= so
->so_options
& SO_LINGER
;
1783 l
.l_linger
= so
->so_linger
;
1784 error
= sooptcopyout(sopt
, &l
, sizeof l
);
1787 case SO_USELOOPBACK
:
1798 case SO_WANTOOBFLAG
:
1799 optval
= so
->so_options
& sopt
->sopt_name
;
1801 error
= sooptcopyout(sopt
, &optval
, sizeof optval
);
1805 optval
= so
->so_type
;
1813 m1
= so
->so_rcv
.sb_mb
;
1814 if (so
->so_proto
->pr_flags
& PR_ATOMIC
)
1817 kprintf("SKT CC: %d\n", so
->so_rcv
.sb_cc
);
1820 { if (m1
->m_type
== MT_DATA
)
1821 pkt_total
+= m1
->m_len
;
1823 kprintf("CNT: %d/%d\n", m1
->m_len
, pkt_total
);
1829 optval
= so
->so_rcv
.sb_cc
;
1831 kprintf("RTN: %d\n", optval
);
1836 optval
= so
->so_error
;
1841 optval
= so
->so_snd
.sb_hiwat
;
1845 optval
= so
->so_rcv
.sb_hiwat
;
1849 optval
= so
->so_snd
.sb_lowat
;
1853 optval
= so
->so_rcv
.sb_lowat
;
1858 optval
= (sopt
->sopt_name
== SO_SNDTIMEO
?
1859 so
->so_snd
.sb_timeo
: so
->so_rcv
.sb_timeo
);
1861 tv
.tv_sec
= optval
/ hz
;
1862 tv
.tv_usec
= (optval
% hz
) * tick
;
1863 error
= sooptcopyout(sopt
, &tv
, sizeof tv
);
1867 error
= ENOPROTOOPT
;
1876 register struct socket
*so
;
1882 kp
= sotokextcb(so
);
1884 { if (kp
->e_soif
&& kp
->e_soif
->sf_sohasoutofband
)
1885 { if ((*kp
->e_soif
->sf_sohasoutofband
)(so
, kp
))
1890 if (so
->so_pgid
< 0)
1891 gsignal(-so
->so_pgid
, SIGURG
);
1892 else if (so
->so_pgid
> 0 && (p
= pfind(so
->so_pgid
)) != 0)
1894 selwakeup(&so
->so_rcv
.sb_sel
);
1898 * Network filter support
1900 /* Run the list of filters, creating extension control blocks */
1901 sfilter_init(register struct socket
*so
)
1902 { struct kextcb
*kp
, **kpp
;
1903 struct protosw
*prp
;
1904 struct NFDescriptor
*nfp
;
1907 nfp
= prp
->pr_sfilter
.tqh_first
; /* non-null */
1911 { MALLOC(kp
, struct kextcb
*, sizeof(*kp
),
1914 return(ENOBUFS
); /* so_free will clean up */
1920 kp
->e_soif
= nfp
->nf_soif
;
1921 kp
->e_sout
= nfp
->nf_soutil
;
1923 * Ignore return value for create
1924 * Everyone gets a chance at startup
1926 if (kp
->e_soif
&& kp
->e_soif
->sf_socreate
)
1927 (*kp
->e_soif
->sf_socreate
)(so
, prp
, kp
);
1928 nfp
= nfp
->nf_next
.tqe_next
;
1935 * Run the list of filters, freeing extension control blocks
1936 * Assumes the soif/soutil blocks have been handled.
1938 sfilter_term(struct socket
*so
)
1939 { struct kextcb
*kp
, *kp1
;
1945 * Ignore return code on termination; everyone must
1948 if (kp
->e_soif
&& kp
->e_soif
->sf_sofree
)
1949 kp
->e_soif
->sf_sofree(so
, kp
);
1958 sopoll(struct socket
*so
, int events
, struct ucred
*cred
, void * wql
)
1960 struct proc
*p
= current_proc();
1964 if (events
& (POLLIN
| POLLRDNORM
))
1966 revents
|= events
& (POLLIN
| POLLRDNORM
);
1968 if (events
& (POLLOUT
| POLLWRNORM
))
1969 if (sowriteable(so
))
1970 revents
|= events
& (POLLOUT
| POLLWRNORM
);
1972 if (events
& (POLLPRI
| POLLRDBAND
))
1973 if (so
->so_oobmark
|| (so
->so_state
& SS_RCVATMARK
))
1974 revents
|= events
& (POLLPRI
| POLLRDBAND
);
1977 if (events
& (POLLIN
| POLLPRI
| POLLRDNORM
| POLLRDBAND
)) {
1978 so
->so_rcv
.sb_flags
|= SB_SEL
;
1979 selrecord(p
, &so
->so_rcv
.sb_sel
, wql
);
1982 if (events
& (POLLOUT
| POLLWRNORM
)) {
1983 so
->so_snd
.sb_flags
|= SB_SEL
;
1984 selrecord(p
, &so
->so_snd
.sb_sel
, wql
);
1992 /*#### IPv6 Integration. Added new routines */
1994 sooptgetm(struct sockopt
*sopt
, struct mbuf
**mp
)
1996 struct mbuf
*m
, *m_prev
;
1997 int sopt_size
= sopt
->sopt_valsize
;
1999 MGET(m
, sopt
->sopt_p
? M_WAIT
: M_DONTWAIT
, MT_DATA
);
2002 if (sopt_size
> MLEN
) {
2003 MCLGET(m
, sopt
->sopt_p
? M_WAIT
: M_DONTWAIT
);
2004 if ((m
->m_flags
& M_EXT
) == 0) {
2008 m
->m_len
= min(MCLBYTES
, sopt_size
);
2010 m
->m_len
= min(MLEN
, sopt_size
);
2012 sopt_size
-= m
->m_len
;
2017 MGET(m
, sopt
->sopt_p
? M_WAIT
: M_DONTWAIT
, MT_DATA
);
2022 if (sopt_size
> MLEN
) {
2023 MCLGET(m
, sopt
->sopt_p
? M_WAIT
: M_DONTWAIT
);
2024 if ((m
->m_flags
& M_EXT
) == 0) {
2028 m
->m_len
= min(MCLBYTES
, sopt_size
);
2030 m
->m_len
= min(MLEN
, sopt_size
);
2032 sopt_size
-= m
->m_len
;
2039 /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */
2041 sooptmcopyin(struct sockopt
*sopt
, struct mbuf
*m
)
2043 struct mbuf
*m0
= m
;
2045 if (sopt
->sopt_val
== NULL
)
2047 while (m
!= NULL
&& sopt
->sopt_valsize
>= m
->m_len
) {
2048 if (sopt
->sopt_p
!= NULL
) {
2051 error
= copyin(sopt
->sopt_val
, mtod(m
, char *),
2058 bcopy(sopt
->sopt_val
, mtod(m
, char *), m
->m_len
);
2059 sopt
->sopt_valsize
-= m
->m_len
;
2060 (caddr_t
)sopt
->sopt_val
+= m
->m_len
;
2063 if (m
!= NULL
) /* should be allocated enoughly at ip6_sooptmcopyin() */
2064 panic("sooptmcopyin");
2068 /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */
2070 sooptmcopyout(struct sockopt
*sopt
, struct mbuf
*m
)
2072 struct mbuf
*m0
= m
;
2075 if (sopt
->sopt_val
== NULL
)
2077 while (m
!= NULL
&& sopt
->sopt_valsize
>= m
->m_len
) {
2078 if (sopt
->sopt_p
!= NULL
) {
2081 error
= copyout(mtod(m
, char *), sopt
->sopt_val
,
2088 bcopy(mtod(m
, char *), sopt
->sopt_val
, m
->m_len
);
2089 sopt
->sopt_valsize
-= m
->m_len
;
2090 (caddr_t
)sopt
->sopt_val
+= m
->m_len
;
2091 valsize
+= m
->m_len
;
2095 /* enough soopt buffer should be given from user-land */
2099 sopt
->sopt_valsize
= valsize
;