X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/e3027f41d0120b4278cca462f397b6619dcd9ac5..55e303ae13a4cf49d70f2294092726f2fffb9ef2:/bsd/kern/uipc_socket.c diff --git a/bsd/kern/uipc_socket.c b/bsd/kern/uipc_socket.c index 56eb901ee..21595dab8 100644 --- a/bsd/kern/uipc_socket.c +++ b/bsd/kern/uipc_socket.c @@ -3,19 +3,22 @@ * * @APPLE_LICENSE_HEADER_START@ * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. + * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. * * @APPLE_LICENSE_HEADER_END@ */ @@ -53,17 +56,21 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)uipc_socket.c 8.6 (Berkeley) 5/2/95 + * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 + * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.16 2001/06/14 20:46:06 ume Exp $ */ #include #include +#include #include +#include #include #include #include #include #include +#include #include #include #include @@ -94,6 +101,19 @@ extern int get_tcp_str_size(); #include +static void filt_sordetach(struct knote *kn); +static int filt_soread(struct knote *kn, long hint); +static void filt_sowdetach(struct knote *kn); +static int filt_sowrite(struct knote *kn, long hint); +static int filt_solisten(struct knote *kn, long hint); + +static struct filterops solisten_filtops = + { 1, NULL, filt_sordetach, filt_solisten }; +static struct filterops soread_filtops = + { 1, NULL, filt_sordetach, filt_soread }; +static struct filterops sowrite_filtops = + { 1, NULL, filt_sowdetach, filt_sowrite }; + int socket_debug = 0; int socket_zone = M_SOCKET; so_gen_t so_gencnt; /* generation count for sockets */ @@ -117,11 +137,17 @@ SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn, 0, ""); /* Should we get a maximum also ??? */ +static int sosendmaxchain = 65536; static int sosendminchain = 16384; +static int sorecvmincopy = 16384; SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain, CTLFLAG_RW, &sosendminchain, 0, ""); +SYSCTL_INT(_kern_ipc, OID_AUTO, sorecvmincopy, CTLFLAG_RW, &sorecvmincopy, + 0, ""); void so_cache_timer(); +struct mbuf *m_getpackets(int, int, int); + /* * Socket operation routines. @@ -131,6 +157,7 @@ void so_cache_timer(); * switching out to the protocol specific routines. */ +#ifdef __APPLE__ void socketinit() { vm_size_t str_size; @@ -312,7 +339,7 @@ void so_cache_timer() (void) thread_funnel_set(network_flock, FALSE); } - +#endif /* __APPLE__ */ /* * Get a socket structure from our zone, and initialize it. @@ -353,19 +380,31 @@ socreate(dom, aso, type, proto) struct socket **aso; register int type; int proto; - { struct proc *p = current_proc(); register struct protosw *prp; - struct socket *so; + register struct socket *so; register int error = 0; - +#if TCPDEBUG + extern int tcpconsdebug; +#endif if (proto) prp = pffindproto(dom, proto, type); else prp = pffindtype(dom, type); + if (prp == 0 || prp->pr_usrreqs->pru_attach == 0) return (EPROTONOSUPPORT); +#ifndef __APPLE__ + + if (p->p_prison && jail_socket_unixiproute_only && + prp->pr_domain->dom_family != PF_LOCAL && + prp->pr_domain->dom_family != PF_INET && + prp->pr_domain->dom_family != PF_ROUTE) { + return (EPROTONOSUPPORT); + } + +#endif if (prp->pr_type != type) return (EPROTOTYPE); so = soalloc(p != 0, dom, type); @@ -376,28 +415,45 @@ socreate(dom, aso, type, proto) TAILQ_INIT(&so->so_comp); so->so_type = type; +#ifdef __APPLE__ if (p != 0) { if (p->p_ucred->cr_uid == 0) so->so_state = SS_PRIV; so->so_uid = p->p_ucred->cr_uid; } - +#else + so->so_cred = p->p_ucred; + crhold(so->so_cred); +#endif so->so_proto = prp; +#ifdef __APPLE__ so->so_rcv.sb_flags |= SB_RECV; /* XXX */ if (prp->pr_sfilter.tqh_first) error = sfilter_init(so); if (error == 0) +#endif error = (*prp->pr_usrreqs->pru_attach)(so, proto, p); - if (error) { + /* + * Warning: + * If so_pcb is not zero, the socket will be leaked, + * so protocol attachment handler must be coded carefuly + */ so->so_state |= SS_NOFDREF; sofree(so); return (error); } +#ifdef __APPLE__ prp->pr_domain->dom_refs++; so->so_rcv.sb_so = so->so_snd.sb_so = so; TAILQ_INIT(&so->so_evlist); +#if TCPDEBUG + if (tcpconsdebug == 2) + so->so_options |= SO_DEBUG; +#endif +#endif + *aso = so; return (0); } @@ -414,14 +470,16 @@ sobind(so, nam) int s = splnet(); error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p); - if (error == 0) /* ??? */ - { kp = sotokextcb(so); - while (kp) - { if (kp->e_soif && kp->e_soif->sf_sobind) - { error = (*kp->e_soif->sf_sobind)(so, nam, kp); - if (error) - { if (error == EJUSTRETURN) + if (error == 0) { + kp = sotokextcb(so); + while (kp) { + if (kp->e_soif && kp->e_soif->sf_sobind) { + error = (*kp->e_soif->sf_sobind)(so, nam, kp); + if (error) { + if (error == EJUSTRETURN) { + error = 0; break; + } splx(s); return(error); } @@ -439,10 +497,32 @@ sodealloc(so) { so->so_gencnt = ++so_gencnt; +#ifndef __APPLE__ + if (so->so_rcv.sb_hiwat) + (void)chgsbsize(so->so_cred->cr_uidinfo, + &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY); + if (so->so_snd.sb_hiwat) + (void)chgsbsize(so->so_cred->cr_uidinfo, + &so->so_snd.sb_hiwat, 0, RLIM_INFINITY); +#ifdef INET + if (so->so_accf != NULL) { + if (so->so_accf->so_accept_filter != NULL && + so->so_accf->so_accept_filter->accf_destroy != NULL) { + so->so_accf->so_accept_filter->accf_destroy(so); + } + if (so->so_accf->so_accept_filter_str != NULL) + FREE(so->so_accf->so_accept_filter_str, M_ACCF); + FREE(so->so_accf, M_ACCF); + } +#endif /* INET */ + crfree(so->so_cred); + zfreei(so->so_zone, so); +#else if (so->cached_in_sock_layer == 1) cached_sock_free(so); else _FREE_ZONE(so, sizeof(*so), so->so_zone); +#endif /* __APPLE__ */ } int @@ -467,13 +547,14 @@ solisten(so, backlog) backlog = somaxconn; so->so_qlimit = backlog; kp = sotokextcb(so); - while (kp) - { - if (kp->e_soif && kp->e_soif->sf_solisten) - { error = (*kp->e_soif->sf_solisten)(so, kp); - if (error) - { if (error == EJUSTRETURN) + while (kp) { + if (kp->e_soif && kp->e_soif->sf_solisten) { + error = (*kp->e_soif->sf_solisten)(so, kp); + if (error) { + if (error == EJUSTRETURN) { + error = 0; break; + } splx(s); return(error); } @@ -489,43 +570,58 @@ solisten(so, backlog) void sofree(so) register struct socket *so; -{ int error; +{ + int error; struct kextcb *kp; struct socket *head = so->so_head; kp = sotokextcb(so); - while (kp) - { if (kp->e_soif && kp->e_soif->sf_sofree) - { error = (*kp->e_soif->sf_sofree)(so, kp); - if (error) + while (kp) { + if (kp->e_soif && kp->e_soif->sf_sofree) { + error = (*kp->e_soif->sf_sofree)(so, kp); + if (error) { + selthreadclear(&so->so_snd.sb_sel); + selthreadclear(&so->so_rcv.sb_sel); return; /* void fn */ + } } kp = kp->e_next; } - if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) + if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) { +#ifdef __APPLE__ + selthreadclear(&so->so_snd.sb_sel); + selthreadclear(&so->so_rcv.sb_sel); +#endif return; - if (head != NULL) { - if (so->so_state & SS_INCOMP) { - TAILQ_REMOVE(&head->so_incomp, so, so_list); - head->so_incqlen--; - } else if (so->so_state & SS_COMP) { - /* - * We must not decommission a socket that's - * on the accept(2) queue. If we do, then - * accept(2) may hang after select(2) indicated - * that the listening socket was ready. - */ - return; - } else { - panic("sofree: not queued"); - } + } + if (head != NULL) { + if (so->so_state & SS_INCOMP) { + TAILQ_REMOVE(&head->so_incomp, so, so_list); + head->so_incqlen--; + } else if (so->so_state & SS_COMP) { + /* + * We must not decommission a socket that's + * on the accept(2) queue. If we do, then + * accept(2) may hang after select(2) indicated + * that the listening socket was ready. + */ +#ifdef __APPLE__ + selthreadclear(&so->so_snd.sb_sel); + selthreadclear(&so->so_rcv.sb_sel); +#endif + return; + } else { + panic("sofree: not queued"); + } head->so_qlen--; - so->so_state &= ~(SS_INCOMP|SS_COMP); + so->so_state &= ~SS_INCOMP; so->so_head = NULL; } - +#ifdef __APPLE__ + selthreadclear(&so->so_snd.sb_sel); sbrelease(&so->so_snd); +#endif sorflush(so); sfilter_term(so); sodealloc(so); @@ -544,15 +640,15 @@ soclose(so) int error = 0; struct kextcb *kp; -#if FB31SIG - funsetown(so->so_pgid); +#ifndef __APPLE__ + funsetown(so->so_sigio); #endif kp = sotokextcb(so); - while (kp) - { if (kp->e_soif && kp->e_soif->sf_soclose) - { error = (*kp->e_soif->sf_soclose)(so, kp); - if (error) - { splx(s); + while (kp) { + if (kp->e_soif && kp->e_soif->sf_soclose) { + error = (*kp->e_soif->sf_soclose)(so, kp); + if (error) { + splx(s); return((error == EJUSTRETURN) ? 0 : error); } } @@ -608,8 +704,10 @@ discard: if (so->so_pcb && so->so_state & SS_NOFDREF) panic("soclose: NOFDREF"); so->so_state |= SS_NOFDREF; +#ifdef __APPLE__ so->so_proto->pr_domain->dom_refs--; evsofree(so); +#endif sofree(so); splx(s); return (error); @@ -622,15 +720,22 @@ int soabort(so) struct socket *so; { + int error; - return (*so->so_proto->pr_usrreqs->pru_abort)(so); + error = (*so->so_proto->pr_usrreqs->pru_abort)(so); + if (error) { + sofree(so); + return error; + } + return (0); } int soaccept(so, nam) register struct socket *so; struct sockaddr **nam; -{ int s = splnet(); +{ + int s = splnet(); int error; struct kextcb *kp; @@ -638,14 +743,16 @@ soaccept(so, nam) panic("soaccept: !NOFDREF"); so->so_state &= ~SS_NOFDREF; error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); - if (error == 0) - { kp = sotokextcb(so); + if (error == 0) { + kp = sotokextcb(so); while (kp) { - if (kp->e_soif && kp->e_soif->sf_soaccept) - { error = (*kp->e_soif->sf_soaccept)(so, nam, kp); - if (error) - { if (error == EJUSTRETURN) + if (kp->e_soif && kp->e_soif->sf_soaccept) { + error = (*kp->e_soif->sf_soaccept)(so, nam, kp); + if (error) { + if (error == EJUSTRETURN) { + error = 0; break; + } splx(s); return(error); } @@ -684,26 +791,27 @@ soconnect(so, nam) (error = sodisconnect(so)))) error = EISCONN; else { + /* + * Run connect filter before calling protocol: + * - non-blocking connect returns before completion; + * - allows filters to modify address. + */ + kp = sotokextcb(so); + while (kp) { + if (kp->e_soif && kp->e_soif->sf_soconnect) { + error = (*kp->e_soif->sf_soconnect)(so, nam, kp); + if (error) { + if (error == EJUSTRETURN) { + error = 0; + } + splx(s); + return(error); + } + } + kp = kp->e_next; + } error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p); - if (error == 0) - { - kp = sotokextcb(so); - while (kp) - { - if (kp->e_soif && kp->e_soif->sf_soconnect) - { error = (*kp->e_soif->sf_soconnect)(so, nam, kp); - if (error) - { if (error == EJUSTRETURN) - break; - splx(s); - return(error); - } - } - kp = kp->e_next; - } - } } - splx(s); return (error); } @@ -718,14 +826,16 @@ soconnect2(so1, so2) struct kextcb *kp; error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2); - if (error == 0) - { kp = sotokextcb(so1); - while (kp) - { if (kp->e_soif && kp->e_soif->sf_soconnect2) - { error = (*kp->e_soif->sf_soconnect2)(so1, so2, kp); - if (error) - { if (error == EJUSTRETURN) + if (error == 0) { + kp = sotokextcb(so1); + while (kp) { + if (kp->e_soif && kp->e_soif->sf_soconnect2) { + error = (*kp->e_soif->sf_soconnect2)(so1, so2, kp); + if (error) { + if (error == EJUSTRETURN) { + return 0; break; + } splx(s); return(error); } @@ -754,15 +864,16 @@ sodisconnect(so) goto bad; } error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); - - if (error == 0) - { kp = sotokextcb(so); - while (kp) - { if (kp->e_soif && kp->e_soif->sf_sodisconnect) - { error = (*kp->e_soif->sf_sodisconnect)(so, kp); - if (error) - { if (error == EJUSTRETURN) + if (error == 0) { + kp = sotokextcb(so); + while (kp) { + if (kp->e_soif && kp->e_soif->sf_sodisconnect) { + error = (*kp->e_soif->sf_sodisconnect)(so, kp); + if (error) { + if (error == EJUSTRETURN) { + error = 0; break; + } splx(s); return(error); } @@ -809,7 +920,7 @@ sosend(so, addr, uio, top, control, flags) { struct mbuf **mp; - register struct mbuf *m; + register struct mbuf *m, *freelist = NULL; register long space, len, resid; int clen = 0, error, s, dontroute, mlen, sendflags; int atomic = sosendallatonce(so) || top; @@ -888,7 +999,7 @@ restart: if ((atomic && resid > so->so_snd.sb_hiwat) || clen > so->so_snd.sb_hiwat) snderr(EMSGSIZE); - if (space < resid + clen && uio && + if (space < resid + clen && (atomic || space < so->so_snd.sb_lowat || space < clen)) { if (so->so_state & SS_NBIO) snderr(EWOULDBLOCK); @@ -902,6 +1013,7 @@ restart: splx(s); mp = ⊤ space -= clen; + do { if (uio == NULL) { /* @@ -911,41 +1023,77 @@ restart: if (flags & MSG_EOR) top->m_flags |= M_EOR; } else { - boolean_t funnel_state = TRUE; - int chainmbufs = (sosendminchain > 0 && resid >= sosendminchain); - - if (chainmbufs) - funnel_state = thread_funnel_set(network_flock, FALSE); + boolean_t dropped_funnel = FALSE; + int chainlength; + int bytes_to_copy; + + bytes_to_copy = min(resid, space); + + if (sosendminchain > 0) { + if (bytes_to_copy >= sosendminchain) { + dropped_funnel = TRUE; + (void)thread_funnel_set(network_flock, FALSE); + } + chainlength = 0; + } else + chainlength = sosendmaxchain; + do { - KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_NONE, -1, 0, 0, 0, 0); - if (top == 0) { + + if (bytes_to_copy >= MINCLSIZE) { + /* + * try to maintain a local cache of mbuf clusters needed to complete this write + * the list is further limited to the number that are currently needed to fill the socket + * this mechanism allows a large number of mbufs/clusters to be grabbed under a single + * mbuf lock... if we can't get any clusters, than fall back to trying for mbufs + * if we fail early (or miscalcluate the number needed) make sure to release any clusters + * we haven't yet consumed. + */ + if ((m = freelist) == NULL) { + int num_needed; + int hdrs_needed = 0; + + if (top == 0) + hdrs_needed = 1; + num_needed = bytes_to_copy / MCLBYTES; + + if ((bytes_to_copy - (num_needed * MCLBYTES)) >= MINCLSIZE) + num_needed++; + + if ((freelist = m_getpackets(num_needed, hdrs_needed, M_WAIT)) == NULL) + goto getpackets_failed; + m = freelist; + } + freelist = m->m_next; + m->m_next = NULL; + + mlen = MCLBYTES; + len = min(mlen, bytes_to_copy); + } else { +getpackets_failed: + if (top == 0) { MGETHDR(m, M_WAIT, MT_DATA); mlen = MHLEN; m->m_pkthdr.len = 0; m->m_pkthdr.rcvif = (struct ifnet *)0; - } else { + } else { MGET(m, M_WAIT, MT_DATA); mlen = MLEN; + } + len = min(mlen, bytes_to_copy); + /* + * For datagram protocols, leave room + * for protocol headers in first mbuf. + */ + if (atomic && top == 0 && len < mlen) + MH_ALIGN(m, len); } - if (resid >= MINCLSIZE) { - MCLGET(m, M_WAIT); - if ((m->m_flags & M_EXT) == 0) - goto nopages; - mlen = MCLBYTES; - len = min(min(mlen, resid), space); - } else { -nopages: - len = min(min(mlen, resid), space); - /* - * For datagram protocols, leave room - * for protocol headers in first mbuf. - */ - if (atomic && top == 0 && len < mlen) - MH_ALIGN(m, len); - } - KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_NONE, -1, 0, 0, 0, 0); + chainlength += len; + space -= len; + error = uiomove(mtod(m, caddr_t), (int)len, uio); + resid = uio->uio_resid; m->m_len = len; @@ -959,9 +1107,12 @@ nopages: top->m_flags |= M_EOR; break; } - } while (space > 0 && (chainmbufs || atomic || resid < MINCLSIZE)); - if (chainmbufs) - funnel_state = thread_funnel_set(network_flock, TRUE); + bytes_to_copy = min(resid, space); + + } while (space > 0 && (chainlength < sosendmaxchain || atomic || resid < MINCLSIZE)); + + if (dropped_funnel == TRUE) + (void)thread_funnel_set(network_flock, TRUE); if (error) goto release; } @@ -990,7 +1141,6 @@ nopages: if (dontroute) so->so_options |= SO_DONTROUTE; s = splnet(); /* XXX */ - kp = sotokextcb(so); /* Compute flags here, for pru_send and NKEs */ sendflags = (flags & MSG_OOB) ? PRUS_OOB : /* @@ -1004,17 +1154,21 @@ nopages: PRUS_EOF : /* If there is more to send set PRUS_MORETOCOME */ (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0; + kp = sotokextcb(so); while (kp) - { if (kp->e_soif && kp->e_soif->sf_sosend) - { error = (*kp->e_soif->sf_sosend)(so, &addr, + { if (kp->e_soif && kp->e_soif->sf_sosend) { + error = (*kp->e_soif->sf_sosend)(so, &addr, &uio, &top, &control, &sendflags, kp); - if (error) - { splx(s); - if (error == EJUSTRETURN) - { sbunlock(&so->so_snd); + if (error) { + splx(s); + if (error == EJUSTRETURN) { + sbunlock(&so->so_snd); + + if (freelist) + m_freem_list(freelist); return(0); } goto release; @@ -1026,9 +1180,10 @@ nopages: error = (*so->so_proto->pr_usrreqs->pru_send)(so, sendflags, top, addr, control, p); splx(s); +#ifdef __APPLE__ if (flags & MSG_SEND) so->so_temp = NULL; - +#endif if (dontroute) so->so_options &= ~SO_DONTROUTE; clen = 0; @@ -1047,6 +1202,8 @@ out: m_freem(top); if (control) m_freem(control); + if (freelist) + m_freem_list(freelist); KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_END, so, @@ -1083,14 +1240,20 @@ soreceive(so, psa, uio, mp0, controlp, flagsp) struct mbuf **controlp; int *flagsp; { - register struct mbuf *m, **mp; + register struct mbuf *m, **mp, *ml; register int flags, len, error, s, offset; struct protosw *pr = so->so_proto; struct mbuf *nextrecord; int moff, type = 0; int orig_resid = uio->uio_resid; struct kextcb *kp; - + volatile struct mbuf *free_list; + volatile int delayed_copy_len; + int can_delay; + int need_event; + struct proc *p = current_proc(); + + KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_START, so, uio->uio_resid, @@ -1099,13 +1262,15 @@ soreceive(so, psa, uio, mp0, controlp, flagsp) so->so_rcv.sb_hiwat); kp = sotokextcb(so); - while (kp) - { if (kp->e_soif && kp->e_soif->sf_soreceive) - { error = (*kp->e_soif->sf_soreceive)(so, psa, &uio, + while (kp) { + if (kp->e_soif && kp->e_soif->sf_soreceive) { + error = (*kp->e_soif->sf_soreceive)(so, psa, &uio, mp0, controlp, flagsp, kp); - if (error) + if (error) { + KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0); return((error == EJUSTRETURN) ? 0 : error); + } } kp = kp->e_next; } @@ -1129,6 +1294,10 @@ soreceive(so, psa, uio, mp0, controlp, flagsp) (so->so_options & SO_OOBINLINE) == 0 && (so->so_oobmark || (so->so_state & SS_RCVATMARK)))) { m = m_get(M_WAIT, MT_DATA); + if (m == NULL) { + KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, ENOBUFS,0,0,0,0); + return (ENOBUFS); + } error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); if (error) goto bad; @@ -1140,19 +1309,21 @@ soreceive(so, psa, uio, mp0, controlp, flagsp) bad: if (m) m_freem(m); - if ((so->so_options & SO_WANTOOBFLAG) != 0) { - if (error == EWOULDBLOCK || error == EINVAL) { - /* - * Let's try to get normal data: - * EWOULDBLOCK: out-of-band data not receive yet; - * EINVAL: out-of-band data already read. - */ - error = 0; - goto nooob; - } else if (error == 0 && flagsp) - *flagsp |= MSG_OOB; - } +#ifdef __APPLE__ + if ((so->so_options & SO_WANTOOBFLAG) != 0) { + if (error == EWOULDBLOCK || error == EINVAL) { + /* + * Let's try to get normal data: + * EWOULDBLOCK: out-of-band data not receive yet; + * EINVAL: out-of-band data already read. + */ + error = 0; + goto nooob; + } else if (error == 0 && flagsp) + *flagsp |= MSG_OOB; + } KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0); +#endif return (error); } nooob: @@ -1161,9 +1332,12 @@ nooob: if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) (*pr->pr_usrreqs->pru_rcvd)(so, 0); + + free_list = (struct mbuf *)0; + delayed_copy_len = 0; restart: - if (error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) - { + error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); + if (error) { KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0); return (error); } @@ -1183,9 +1357,10 @@ restart: */ if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && so->so_rcv.sb_cc < uio->uio_resid) && - (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || + (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { + KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1")); if (so->so_error) { if (m) @@ -1220,22 +1395,31 @@ restart: sbunlock(&so->so_rcv); if (socket_debug) printf("Waiting for socket data\n"); + error = sbwait(&so->so_rcv); if (socket_debug) printf("SORECEIVE - sbwait returned %d\n", error); splx(s); - if (error) - { + if (error) { KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0); return (error); } goto restart; } dontblock: -#ifdef notyet /* XXXX */ +#ifndef __APPLE__ if (uio->uio_procp) uio->uio_procp->p_stats->p_ru.ru_msgrcv++; -#endif +#else /* __APPLE__ */ + /* + * 2207985 + * This should be uio->uio-procp; however, some callers of this + * function use auto variables with stack garbage, and fail to + * fill out the uio structure properly. + */ + if (p) + p->p_stats->p_ru.ru_msgrcv++; +#endif /* __APPLE__ */ nextrecord = m->m_nextpkt; if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) { KASSERT(m->m_type == MT_SONAME, ("receive 1a")); @@ -1286,13 +1470,22 @@ dontblock: } moff = 0; offset = 0; - while (m && uio->uio_resid > 0 && error == 0) { + + if (!(flags & MSG_PEEK) && uio->uio_resid > sorecvmincopy) + can_delay = 1; + else + can_delay = 0; + + need_event = 0; + + + while (m && (uio->uio_resid - delayed_copy_len) > 0 && error == 0) { if (m->m_type == MT_OOBDATA) { if (type != MT_OOBDATA) break; } else if (type == MT_OOBDATA) break; -#if 0 +#ifndef __APPLE__ /* * This assertion needs rework. The trouble is Appletalk is uses many * mbuf types (NOT listed in mbuf.h!) which will trigger this panic. @@ -1301,18 +1494,19 @@ dontblock: else KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER, ("receive 3")); -#endif - /* - * Make sure to allways set MSG_OOB event when getting - * out of band data inline. - */ +#else + /* + * Make sure to allways set MSG_OOB event when getting + * out of band data inline. + */ if ((so->so_options & SO_WANTOOBFLAG) != 0 && - (so->so_options & SO_OOBINLINE) != 0 && - (so->so_state & SS_RCVATMARK) != 0) { - flags |= MSG_OOB; - } + (so->so_options & SO_OOBINLINE) != 0 && + (so->so_state & SS_RCVATMARK) != 0) { + flags |= MSG_OOB; + } +#endif so->so_state &= ~SS_RCVATMARK; - len = uio->uio_resid; + len = uio->uio_resid - delayed_copy_len; if (so->so_oobmark && len > so->so_oobmark - offset) len = so->so_oobmark - offset; if (len > m->m_len - moff) @@ -1326,13 +1520,48 @@ dontblock: * block interrupts again. */ if (mp == 0) { - splx(s); - error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); - s = splnet(); - if (error) - goto release; + if (can_delay && len == m->m_len) { + /* + * only delay the copy if we're consuming the + * mbuf and we're NOT in MSG_PEEK mode + * and we have enough data to make it worthwile + * to drop and retake the funnel... can_delay + * reflects the state of the 2 latter constraints + * moff should always be zero in these cases + */ + delayed_copy_len += len; + } else { + splx(s); + + if (delayed_copy_len) { + error = sodelayed_copy(uio, &free_list, &delayed_copy_len); + + if (error) { + s = splnet(); + goto release; + } + if (m != so->so_rcv.sb_mb) { + /* + * can only get here if MSG_PEEK is not set + * therefore, m should point at the head of the rcv queue... + * if it doesn't, it means something drastically changed + * while we were out from behind the funnel in sodelayed_copy... + * perhaps a RST on the stream... in any event, the stream has + * been interrupted... it's probably best just to return + * whatever data we've moved and let the caller sort it out... + */ + break; + } + } + error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); + + s = splnet(); + if (error) + goto release; + } } else uio->uio_resid -= len; + if (len == m->m_len - moff) { if (m->m_flags & M_EOR) flags |= MSG_EOR; @@ -1342,14 +1571,21 @@ dontblock: } else { nextrecord = m->m_nextpkt; sbfree(&so->so_rcv, m); + if (mp) { *mp = m; mp = &m->m_next; so->so_rcv.sb_mb = m = m->m_next; *mp = (struct mbuf *)0; } else { - MFREE(m, so->so_rcv.sb_mb); - m = so->so_rcv.sb_mb; + m->m_nextpkt = 0; + if (free_list == NULL) + free_list = m; + else + ml->m_next = m; + ml = m; + so->so_rcv.sb_mb = m = m->m_next; + ml->m_next = 0; } if (m) m->m_nextpkt = nextrecord; @@ -1370,7 +1606,11 @@ dontblock: so->so_oobmark -= len; if (so->so_oobmark == 0) { so->so_state |= SS_RCVATMARK; - postevent(so, 0, EV_OOB); + /* + * delay posting the actual event until after + * any delayed copy processing has finished + */ + need_event = 1; break; } } else { @@ -1382,37 +1622,62 @@ dontblock: if (flags & MSG_EOR) break; /* - * If the MSG_WAITALL flag is set (for non-atomic socket), + * If the MSG_WAITALL or MSG_WAITSTREAM flag is set (for non-atomic socket), * we must not quit until "uio->uio_resid == 0" or an error * termination. If a signal/timeout occurs, return * with a short count but without error. * Keep sockbuf locked against other readers. */ - while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && + while (flags & (MSG_WAITALL|MSG_WAITSTREAM) && m == 0 && (uio->uio_resid - delayed_copy_len) > 0 && !sosendallatonce(so) && !nextrecord) { if (so->so_error || so->so_state & SS_CANTRCVMORE) - break; - error = sbwait(&so->so_rcv); - if (error) { - sbunlock(&so->so_rcv); - splx(s); - KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, 0,0,0,0,0); - return (0); + goto release; + + if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) + (*pr->pr_usrreqs->pru_rcvd)(so, flags); + if (sbwait(&so->so_rcv)) { + error = 0; + goto release; + } + /* + * have to wait until after we get back from the sbwait to do the copy because + * we will drop the funnel if we have enough data that has been delayed... by dropping + * the funnel we open up a window allowing the netisr thread to process the incoming packets + * and to change the state of this socket... we're issuing the sbwait because + * the socket is empty and we're expecting the netisr thread to wake us up when more + * packets arrive... if we allow that processing to happen and then sbwait, we + * could stall forever with packets sitting in the socket if no further packets + * arrive from the remote side. + * + * we want to copy before we've collected all the data to satisfy this request to + * allow the copy to overlap the incoming packet processing on an MP system + */ + if (delayed_copy_len > sorecvmincopy && (delayed_copy_len > (so->so_rcv.sb_hiwat / 2))) { + + error = sodelayed_copy(uio, &free_list, &delayed_copy_len); + + if (error) + goto release; } m = so->so_rcv.sb_mb; - if (m) + if (m) { nextrecord = m->m_nextpkt; + } } } if (m && pr->pr_flags & PR_ATOMIC) { +#ifdef __APPLE__ if (so->so_options & SO_DONTTRUNC) flags |= MSG_RCVMORE; - else - { flags |= MSG_TRUNC; + else { +#endif + flags |= MSG_TRUNC; if ((flags & MSG_PEEK) == 0) (void) sbdroprecord(&so->so_rcv); +#ifdef __APPLE__ } +#endif } if ((flags & MSG_PEEK) == 0) { if (m == 0) @@ -1420,8 +1685,23 @@ dontblock: if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) (*pr->pr_usrreqs->pru_rcvd)(so, flags); } +#ifdef __APPLE__ if ((so->so_options & SO_WANTMORE) && so->so_rcv.sb_cc > 0) flags |= MSG_HAVEMORE; + + if (delayed_copy_len) { + error = sodelayed_copy(uio, &free_list, &delayed_copy_len); + + if (error) + goto release; + } + if (free_list) { + m_freem_list((struct mbuf *)free_list); + free_list = (struct mbuf *)0; + } + if (need_event) + postevent(so, 0, EV_OOB); +#endif if (orig_resid == uio->uio_resid && orig_resid && (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { sbunlock(&so->so_rcv); @@ -1432,6 +1712,12 @@ dontblock: if (flagsp) *flagsp |= flags; release: + if (delayed_copy_len) { + error = sodelayed_copy(uio, &free_list, &delayed_copy_len); + } + if (free_list) { + m_freem_list((struct mbuf *)free_list); + } sbunlock(&so->so_rcv); splx(s); @@ -1445,6 +1731,38 @@ release: return (error); } + +int sodelayed_copy(struct uio *uio, struct mbuf **free_list, int *resid) +{ + int error = 0; + boolean_t dropped_funnel = FALSE; + struct mbuf *m; + + m = *free_list; + + if (*resid >= sorecvmincopy) { + dropped_funnel = TRUE; + + (void)thread_funnel_set(network_flock, FALSE); + } + while (m && error == 0) { + + error = uiomove(mtod(m, caddr_t), (int)m->m_len, uio); + + m = m->m_next; + } + m_freem_list(*free_list); + + *free_list = (struct mbuf *)NULL; + *resid = 0; + + if (dropped_funnel == TRUE) + (void)thread_funnel_set(network_flock, TRUE); + + return (error); +} + + int soshutdown(so, how) register struct socket *so; @@ -1457,21 +1775,22 @@ soshutdown(so, how) KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_START, 0,0,0,0,0); kp = sotokextcb(so); - while (kp) - { if (kp->e_soif && kp->e_soif->sf_soshutdown) - { ret = (*kp->e_soif->sf_soshutdown)(so, how, kp); - if (ret) + while (kp) { + if (kp->e_soif && kp->e_soif->sf_soshutdown) { + ret = (*kp->e_soif->sf_soshutdown)(so, how, kp); + if (ret) { + KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0); return((ret == EJUSTRETURN) ? 0 : ret); + } } kp = kp->e_next; } - how++; - if (how & FREAD) { + if (how != SHUT_WR) { sorflush(so); postevent(so, 0, EV_RCLOSED); } - if (how & FWRITE) { + if (how != SHUT_RD) { ret = ((*pr->pr_usrreqs->pru_shutdown)(so)); postevent(so, 0, EV_WCLOSED); KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0); @@ -1493,9 +1812,9 @@ sorflush(so) struct kextcb *kp; kp = sotokextcb(so); - while (kp) - { if (kp->e_soif && kp->e_soif->sf_sorflush) - { if ((*kp->e_soif->sf_sorflush)(so, kp)) + while (kp) { + if (kp->e_soif && kp->e_soif->sf_sorflush) { + if ((*kp->e_soif->sf_sorflush)(so, kp)) return; } kp = kp->e_next; @@ -1506,11 +1825,19 @@ sorflush(so) s = splimp(); socantrcvmore(so); sbunlock(sb); +#ifdef __APPLE__ + selthreadclear(&sb->sb_sel); +#endif asb = *sb; bzero((caddr_t)sb, sizeof (*sb)); + if (asb.sb_flags & SB_KNOTE) { + sb->sb_sel.si_note = asb.sb_sel.si_note; + sb->sb_flags = SB_KNOTE; + } splx(s); if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) (*pr->pr_domain->dom_dispose)(asb.sb_mb); + sbrelease(&asb); } @@ -1559,10 +1886,14 @@ sosetopt(so, sopt) short val; struct kextcb *kp; + if (sopt->sopt_dir != SOPT_SET) { + sopt->sopt_dir = SOPT_SET; + } + kp = sotokextcb(so); - while (kp) - { if (kp->e_soif && kp->e_soif->sf_socontrol) - { error = (*kp->e_soif->sf_socontrol)(so, sopt, kp); + while (kp) { + if (kp->e_soif && kp->e_soif->sf_socontrol) { + error = (*kp->e_soif->sf_socontrol)(so, sopt, kp); if (error) return((error == EJUSTRETURN) ? 0 : error); } @@ -1598,9 +1929,11 @@ sosetopt(so, sopt) case SO_REUSEPORT: case SO_OOBINLINE: case SO_TIMESTAMP: +#ifdef __APPLE__ case SO_DONTTRUNC: case SO_WANTMORE: - case SO_WANTOOBFLAG: + case SO_WANTOOBFLAG: +#endif error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) @@ -1664,11 +1997,22 @@ sosetopt(so, sopt) if (error) goto bad; - if (tv.tv_sec > SHRT_MAX / hz - hz) { + /* assert(hz > 0); */ + if (tv.tv_sec < 0 || tv.tv_sec > SHRT_MAX / hz || + tv.tv_usec < 0 || tv.tv_usec >= 1000000) { + error = EDOM; + goto bad; + } + /* assert(tick > 0); */ + /* assert(ULONG_MAX - SHRT_MAX >= 1000000); */ + { + long tmp = (u_long)(tv.tv_sec * hz) + tv.tv_usec / tick; + if (tmp > SHRT_MAX) { error = EDOM; goto bad; } - val = tv.tv_sec * hz + tv.tv_usec / tick; + val = tmp; + } switch (sopt->sopt_name) { case SO_SNDTIMEO: @@ -1681,11 +2025,12 @@ sosetopt(so, sopt) break; case SO_NKE: - { struct so_nke nke; + { + struct so_nke nke; struct NFDescriptor *nf1, *nf2 = NULL; - error = sooptcopyin(sopt, &nke, - sizeof nke, sizeof nke); + error = sooptcopyin(sopt, &nke, + sizeof nke, sizeof nke); if (error) goto bad; @@ -1693,6 +2038,30 @@ sosetopt(so, sopt) break; } + case SO_NOSIGPIPE: + error = sooptcopyin(sopt, &optval, sizeof optval, + sizeof optval); + if (error) + goto bad; + if (optval) + so->so_flags |= SOF_NOSIGPIPE; + else + so->so_flags &= ~SOF_NOSIGPIPE; + + break; + + case SO_NOADDRERR: + error = sooptcopyin(sopt, &optval, sizeof optval, + sizeof optval); + if (error) + goto bad; + if (optval) + so->so_flags |= SOF_NOADDRAVAIL; + else + so->so_flags &= ~SOF_NOADDRAVAIL; + + break; + default: error = ENOPROTOOPT; break; @@ -1749,10 +2118,14 @@ sogetopt(so, sopt) struct mbuf *m; struct kextcb *kp; + if (sopt->sopt_dir != SOPT_GET) { + sopt->sopt_dir = SOPT_GET; + } + kp = sotokextcb(so); - while (kp) - { if (kp->e_soif && kp->e_soif->sf_socontrol) - { error = (*kp->e_soif->sf_socontrol)(so, sopt, kp); + while (kp) { + if (kp->e_soif && kp->e_soif->sf_socontrol) { + error = (*kp->e_soif->sf_socontrol)(so, sopt, kp); if (error) return((error == EJUSTRETURN) ? 0 : error); } @@ -1783,9 +2156,11 @@ sogetopt(so, sopt) case SO_BROADCAST: case SO_OOBINLINE: case SO_TIMESTAMP: +#ifdef __APPLE__ case SO_DONTTRUNC: case SO_WANTMORE: - case SO_WANTOOBFLAG: + case SO_WANTOOBFLAG: +#endif optval = so->so_options & sopt->sopt_name; integer: error = sooptcopyout(sopt, &optval, sizeof optval); @@ -1795,8 +2170,10 @@ integer: optval = so->so_type; goto integer; +#ifdef __APPLE__ case SO_NREAD: - { int pkt_total; + { + int pkt_total; struct mbuf *m1; pkt_total = 0; @@ -1806,8 +2183,8 @@ integer: #if 0 kprintf("SKT CC: %d\n", so->so_rcv.sb_cc); #endif - while (m1) - { if (m1->m_type == MT_DATA) + while (m1) { + if (m1->m_type == MT_DATA) pkt_total += m1->m_len; #if 0 kprintf("CNT: %d/%d\n", m1->m_len, pkt_total); @@ -1822,6 +2199,7 @@ integer: #endif goto integer; } +#endif case SO_ERROR: optval = so->so_error; so->so_error = 0; @@ -1853,6 +2231,14 @@ integer: error = sooptcopyout(sopt, &tv, sizeof tv); break; + case SO_NOSIGPIPE: + optval = (so->so_flags & SOF_NOSIGPIPE); + goto integer; + + case SO_NOADDRERR: + optval = (so->so_flags & SOF_NOADDRAVAIL); + goto integer; + default: error = ENOPROTOOPT; break; @@ -1861,31 +2247,7 @@ integer: } } -void -sohasoutofband(so) - register struct socket *so; -{ - struct proc *p; - - struct kextcb *kp; - - kp = sotokextcb(so); - while (kp) - { if (kp->e_soif && kp->e_soif->sf_sohasoutofband) - { if ((*kp->e_soif->sf_sohasoutofband)(so, kp)) - return; - } - kp = kp->e_next; - } - if (so->so_pgid < 0) - gsignal(-so->so_pgid, SIGURG); - else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) - psignal(p, SIGURG); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - selwakeup(&so->so_rcv.sb_sel); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); -} - +#ifdef __APPLE__ /* * Network filter support */ @@ -1922,7 +2284,6 @@ sfilter_init(register struct socket *so) return(0); } - /* * Run the list of filters, freeing extension control blocks * Assumes the soif/soutil blocks have been handled. @@ -1944,46 +2305,11 @@ sfilter_term(struct socket *so) } return(0); } +#endif __APPLE__ - +/* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */ int -sopoll(struct socket *so, int events, struct ucred *cred) -{ - struct proc *p = current_proc(); - int revents = 0; - int s = splnet(); - - if (events & (POLLIN | POLLRDNORM)) - if (soreadable(so)) - revents |= events & (POLLIN | POLLRDNORM); - - if (events & (POLLOUT | POLLWRNORM)) - if (sowriteable(so)) - revents |= events & (POLLOUT | POLLWRNORM); - - if (events & (POLLPRI | POLLRDBAND)) - if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) - revents |= events & (POLLPRI | POLLRDBAND); - - if (revents == 0) { - if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) { - selrecord(p, &so->so_rcv.sb_sel); - so->so_rcv.sb_sel.si_flags |= SI_SBSEL; - } - - if (events & (POLLOUT | POLLWRNORM)) { - selrecord(p, &so->so_snd.sb_sel); - so->so_snd.sb_sel.si_flags |= SI_SBSEL; - } - } - - splx(s); - return (revents); -} - -/*#### IPv6 Integration. Added new routines */ -int -sooptgetm(struct sockopt *sopt, struct mbuf **mp) +soopt_getm(struct sockopt *sopt, struct mbuf **mp) { struct mbuf *m, *m_prev; int sopt_size = sopt->sopt_valsize; @@ -2030,7 +2356,7 @@ sooptgetm(struct sockopt *sopt, struct mbuf **mp) /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */ int -sooptmcopyin(struct sockopt *sopt, struct mbuf *m) +soopt_mcopyin(struct sockopt *sopt, struct mbuf *m) { struct mbuf *m0 = m; @@ -2053,13 +2379,13 @@ sooptmcopyin(struct sockopt *sopt, struct mbuf *m) m = m->m_next; } if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */ - panic("sooptmcopyin"); + panic("soopt_mcopyin"); return 0; } /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */ int -sooptmcopyout(struct sockopt *sopt, struct mbuf *m) +soopt_mcopyout(struct sockopt *sopt, struct mbuf *m) { struct mbuf *m0 = m; size_t valsize = 0; @@ -2092,3 +2418,173 @@ sooptmcopyout(struct sockopt *sopt, struct mbuf *m) return 0; } +void +sohasoutofband(so) + register struct socket *so; +{ + struct proc *p; + struct kextcb *kp; + + kp = sotokextcb(so); + while (kp) { + if (kp->e_soif && kp->e_soif->sf_sohasoutofband) { + if ((*kp->e_soif->sf_sohasoutofband)(so, kp)) + return; + } + kp = kp->e_next; + } + if (so->so_pgid < 0) + gsignal(-so->so_pgid, SIGURG); + else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) + psignal(p, SIGURG); + selwakeup(&so->so_rcv.sb_sel); +} + +int +sopoll(struct socket *so, int events, struct ucred *cred, void * wql) +{ + struct proc *p = current_proc(); + int revents = 0; + int s = splnet(); + + if (events & (POLLIN | POLLRDNORM)) + if (soreadable(so)) + revents |= events & (POLLIN | POLLRDNORM); + + if (events & (POLLOUT | POLLWRNORM)) + if (sowriteable(so)) + revents |= events & (POLLOUT | POLLWRNORM); + + if (events & (POLLPRI | POLLRDBAND)) + if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) + revents |= events & (POLLPRI | POLLRDBAND); + + if (revents == 0) { + if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) { + /* Darwin sets the flag first, BSD calls selrecord first */ + so->so_rcv.sb_flags |= SB_SEL; + selrecord(p, &so->so_rcv.sb_sel, wql); + } + + if (events & (POLLOUT | POLLWRNORM)) { + /* Darwin sets the flag first, BSD calls selrecord first */ + so->so_snd.sb_flags |= SB_SEL; + selrecord(p, &so->so_snd.sb_sel, wql); + } + } + + splx(s); + return (revents); +} + + +int +soo_kqfilter(struct file *fp, struct knote *kn, struct proc *p) +{ + struct socket *so = (struct socket *)kn->kn_fp->f_data; + struct sockbuf *sb; + int s; + + switch (kn->kn_filter) { + case EVFILT_READ: + if (so->so_options & SO_ACCEPTCONN) + kn->kn_fop = &solisten_filtops; + else + kn->kn_fop = &soread_filtops; + sb = &so->so_rcv; + break; + case EVFILT_WRITE: + kn->kn_fop = &sowrite_filtops; + sb = &so->so_snd; + break; + default: + return (1); + } + + if (sb->sb_sel.si_flags & SI_INITED) + return (1); + + s = splnet(); + if (KNOTE_ATTACH(&sb->sb_sel.si_note, kn)) + sb->sb_flags |= SB_KNOTE; + splx(s); + return (0); +} + +static void +filt_sordetach(struct knote *kn) +{ + struct socket *so = (struct socket *)kn->kn_fp->f_data; + int s = splnet(); + + if (so->so_rcv.sb_flags & SB_KNOTE && + !(so->so_rcv.sb_sel.si_flags & SI_INITED)) + if (KNOTE_DETACH(&so->so_rcv.sb_sel.si_note, kn)) + so->so_rcv.sb_flags &= ~SB_KNOTE; + splx(s); +} + +/*ARGSUSED*/ +static int +filt_soread(struct knote *kn, long hint) +{ + struct socket *so = (struct socket *)kn->kn_fp->f_data; + + kn->kn_data = so->so_rcv.sb_cc; + if (so->so_state & SS_CANTRCVMORE) { + kn->kn_flags |= EV_EOF; + kn->kn_fflags = so->so_error; + return (1); + } + if (so->so_error) /* temporary udp error */ + return (1); + if (kn->kn_sfflags & NOTE_LOWAT) + return (kn->kn_data >= kn->kn_sdata); + return (kn->kn_data >= so->so_rcv.sb_lowat); +} + +static void +filt_sowdetach(struct knote *kn) +{ + struct socket *so = (struct socket *)kn->kn_fp->f_data; + int s = splnet(); + + if(so->so_snd.sb_flags & SB_KNOTE && + !(so->so_snd.sb_sel.si_flags & SI_INITED)) + if (KNOTE_DETACH(&so->so_snd.sb_sel.si_note, kn)) + so->so_snd.sb_flags &= ~SB_KNOTE; + splx(s); +} + +/*ARGSUSED*/ +static int +filt_sowrite(struct knote *kn, long hint) +{ + struct socket *so = (struct socket *)kn->kn_fp->f_data; + + kn->kn_data = sbspace(&so->so_snd); + if (so->so_state & SS_CANTSENDMORE) { + kn->kn_flags |= EV_EOF; + kn->kn_fflags = so->so_error; + return (1); + } + if (so->so_error) /* temporary udp error */ + return (1); + if (((so->so_state & SS_ISCONNECTED) == 0) && + (so->so_proto->pr_flags & PR_CONNREQUIRED)) + return (0); + if (kn->kn_sfflags & NOTE_LOWAT) + return (kn->kn_data >= kn->kn_sdata); + return (kn->kn_data >= so->so_snd.sb_lowat); +} + +/*ARGSUSED*/ +static int +filt_solisten(struct knote *kn, long hint) +{ + struct socket *so = (struct socket *)kn->kn_fp->f_data; + + kn->kn_data = so->so_qlen; + return (! TAILQ_EMPTY(&so->so_comp)); +} +