* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * @(#)uipc_socket.c 8.6 (Berkeley) 5/2/95
+ * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
+ * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.16 2001/06/14 20:46:06 ume Exp $
*/
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/filedesc.h>
#include <sys/proc.h>
+#include <sys/file.h>
#include <sys/fcntl.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/domain.h>
#include <sys/kernel.h>
+#include <sys/event.h>
#include <sys/poll.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <machine/limits.h>
+static void filt_sordetach(struct knote *kn);
+static int filt_soread(struct knote *kn, long hint);
+static void filt_sowdetach(struct knote *kn);
+static int filt_sowrite(struct knote *kn, long hint);
+static int filt_solisten(struct knote *kn, long hint);
+
+static struct filterops solisten_filtops =
+ { 1, NULL, filt_sordetach, filt_solisten };
+static struct filterops soread_filtops =
+ { 1, NULL, filt_sordetach, filt_soread };
+static struct filterops sowrite_filtops =
+ { 1, NULL, filt_sowdetach, filt_sowrite };
+
int socket_debug = 0;
int socket_zone = M_SOCKET;
so_gen_t so_gencnt; /* generation count for sockets */
/* Should we get a maximum also ??? */
static int sosendmaxchain = 65536;
static int sosendminchain = 16384;
+static int sorecvmincopy = 16384;
SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain, CTLFLAG_RW, &sosendminchain,
0, "");
+SYSCTL_INT(_kern_ipc, OID_AUTO, sorecvmincopy, CTLFLAG_RW, &sorecvmincopy,
+ 0, "");
void so_cache_timer();
+struct mbuf *m_getpackets(int, int, int);
+
/*
* Socket operation routines.
* switching out to the protocol specific routines.
*/
+#ifdef __APPLE__
void socketinit()
{
vm_size_t str_size;
(void) thread_funnel_set(network_flock, FALSE);
}
-
+#endif /* __APPLE__ */
/*
* Get a socket structure from our zone, and initialize it.
struct socket **aso;
register int type;
int proto;
-
{
struct proc *p = current_proc();
register struct protosw *prp;
- struct socket *so;
+ register struct socket *so;
register int error = 0;
-
+#if TCPDEBUG
+ extern int tcpconsdebug;
+#endif
if (proto)
prp = pffindproto(dom, proto, type);
else
prp = pffindtype(dom, type);
+
if (prp == 0 || prp->pr_usrreqs->pru_attach == 0)
return (EPROTONOSUPPORT);
+#ifndef __APPLE__
+
+ if (p->p_prison && jail_socket_unixiproute_only &&
+ prp->pr_domain->dom_family != PF_LOCAL &&
+ prp->pr_domain->dom_family != PF_INET &&
+ prp->pr_domain->dom_family != PF_ROUTE) {
+ return (EPROTONOSUPPORT);
+ }
+
+#endif
if (prp->pr_type != type)
return (EPROTOTYPE);
so = soalloc(p != 0, dom, type);
TAILQ_INIT(&so->so_comp);
so->so_type = type;
+#ifdef __APPLE__
if (p != 0) {
if (p->p_ucred->cr_uid == 0)
so->so_state = SS_PRIV;
so->so_uid = p->p_ucred->cr_uid;
}
-
+#else
+ so->so_cred = p->p_ucred;
+ crhold(so->so_cred);
+#endif
so->so_proto = prp;
+#ifdef __APPLE__
so->so_rcv.sb_flags |= SB_RECV; /* XXX */
if (prp->pr_sfilter.tqh_first)
error = sfilter_init(so);
if (error == 0)
+#endif
error = (*prp->pr_usrreqs->pru_attach)(so, proto, p);
-
if (error) {
+ /*
+ * Warning:
+ * If so_pcb is not zero, the socket will be leaked,
+ * so protocol attachment handler must be coded carefuly
+ */
so->so_state |= SS_NOFDREF;
sofree(so);
return (error);
}
+#ifdef __APPLE__
prp->pr_domain->dom_refs++;
so->so_rcv.sb_so = so->so_snd.sb_so = so;
TAILQ_INIT(&so->so_evlist);
+#if TCPDEBUG
+ if (tcpconsdebug == 2)
+ so->so_options |= SO_DEBUG;
+#endif
+#endif
+
*aso = so;
return (0);
}
int s = splnet();
error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
- if (error == 0) /* ??? */
- { kp = sotokextcb(so);
- while (kp)
- { if (kp->e_soif && kp->e_soif->sf_sobind)
- { error = (*kp->e_soif->sf_sobind)(so, nam, kp);
- if (error)
- { if (error == EJUSTRETURN)
+ if (error == 0) {
+ kp = sotokextcb(so);
+ while (kp) {
+ if (kp->e_soif && kp->e_soif->sf_sobind) {
+ error = (*kp->e_soif->sf_sobind)(so, nam, kp);
+ if (error) {
+ if (error == EJUSTRETURN) {
+ error = 0;
break;
+ }
splx(s);
return(error);
}
{
so->so_gencnt = ++so_gencnt;
+#ifndef __APPLE__
+ if (so->so_rcv.sb_hiwat)
+ (void)chgsbsize(so->so_cred->cr_uidinfo,
+ &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY);
+ if (so->so_snd.sb_hiwat)
+ (void)chgsbsize(so->so_cred->cr_uidinfo,
+ &so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
+#ifdef INET
+ if (so->so_accf != NULL) {
+ if (so->so_accf->so_accept_filter != NULL &&
+ so->so_accf->so_accept_filter->accf_destroy != NULL) {
+ so->so_accf->so_accept_filter->accf_destroy(so);
+ }
+ if (so->so_accf->so_accept_filter_str != NULL)
+ FREE(so->so_accf->so_accept_filter_str, M_ACCF);
+ FREE(so->so_accf, M_ACCF);
+ }
+#endif /* INET */
+ crfree(so->so_cred);
+ zfreei(so->so_zone, so);
+#else
if (so->cached_in_sock_layer == 1)
cached_sock_free(so);
else
_FREE_ZONE(so, sizeof(*so), so->so_zone);
+#endif /* __APPLE__ */
}
int
backlog = somaxconn;
so->so_qlimit = backlog;
kp = sotokextcb(so);
- while (kp)
- {
- if (kp->e_soif && kp->e_soif->sf_solisten)
- { error = (*kp->e_soif->sf_solisten)(so, kp);
- if (error)
- { if (error == EJUSTRETURN)
+ while (kp) {
+ if (kp->e_soif && kp->e_soif->sf_solisten) {
+ error = (*kp->e_soif->sf_solisten)(so, kp);
+ if (error) {
+ if (error == EJUSTRETURN) {
+ error = 0;
break;
+ }
splx(s);
return(error);
}
void
sofree(so)
register struct socket *so;
-{ int error;
+{
+ int error;
struct kextcb *kp;
struct socket *head = so->so_head;
kp = sotokextcb(so);
- while (kp)
- { if (kp->e_soif && kp->e_soif->sf_sofree)
- { error = (*kp->e_soif->sf_sofree)(so, kp);
+ while (kp) {
+ if (kp->e_soif && kp->e_soif->sf_sofree) {
+ error = (*kp->e_soif->sf_sofree)(so, kp);
if (error) {
selthreadclear(&so->so_snd.sb_sel);
selthreadclear(&so->so_rcv.sb_sel);
}
if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) {
+#ifdef __APPLE__
selthreadclear(&so->so_snd.sb_sel);
selthreadclear(&so->so_rcv.sb_sel);
+#endif
return;
}
- if (head != NULL) {
- if (so->so_state & SS_INCOMP) {
- TAILQ_REMOVE(&head->so_incomp, so, so_list);
- head->so_incqlen--;
- } else if (so->so_state & SS_COMP) {
- /*
- * We must not decommission a socket that's
- * on the accept(2) queue. If we do, then
- * accept(2) may hang after select(2) indicated
- * that the listening socket was ready.
- */
- selthreadclear(&so->so_snd.sb_sel);
- selthreadclear(&so->so_rcv.sb_sel);
- return;
- } else {
- panic("sofree: not queued");
- }
+ if (head != NULL) {
+ if (so->so_state & SS_INCOMP) {
+ TAILQ_REMOVE(&head->so_incomp, so, so_list);
+ head->so_incqlen--;
+ } else if (so->so_state & SS_COMP) {
+ /*
+ * We must not decommission a socket that's
+ * on the accept(2) queue. If we do, then
+ * accept(2) may hang after select(2) indicated
+ * that the listening socket was ready.
+ */
+#ifdef __APPLE__
+ selthreadclear(&so->so_snd.sb_sel);
+ selthreadclear(&so->so_rcv.sb_sel);
+#endif
+ return;
+ } else {
+ panic("sofree: not queued");
+ }
head->so_qlen--;
- so->so_state &= ~(SS_INCOMP|SS_COMP);
+ so->so_state &= ~SS_INCOMP;
so->so_head = NULL;
}
-
+#ifdef __APPLE__
selthreadclear(&so->so_snd.sb_sel);
sbrelease(&so->so_snd);
+#endif
sorflush(so);
sfilter_term(so);
sodealloc(so);
int error = 0;
struct kextcb *kp;
-#if FB31SIG
- funsetown(so->so_pgid);
+#ifndef __APPLE__
+ funsetown(so->so_sigio);
#endif
kp = sotokextcb(so);
- while (kp)
- { if (kp->e_soif && kp->e_soif->sf_soclose)
- { error = (*kp->e_soif->sf_soclose)(so, kp);
- if (error)
- { splx(s);
+ while (kp) {
+ if (kp->e_soif && kp->e_soif->sf_soclose) {
+ error = (*kp->e_soif->sf_soclose)(so, kp);
+ if (error) {
+ splx(s);
return((error == EJUSTRETURN) ? 0 : error);
}
}
if (so->so_pcb && so->so_state & SS_NOFDREF)
panic("soclose: NOFDREF");
so->so_state |= SS_NOFDREF;
+#ifdef __APPLE__
so->so_proto->pr_domain->dom_refs--;
evsofree(so);
+#endif
sofree(so);
splx(s);
return (error);
soabort(so)
struct socket *so;
{
+ int error;
- return (*so->so_proto->pr_usrreqs->pru_abort)(so);
+ error = (*so->so_proto->pr_usrreqs->pru_abort)(so);
+ if (error) {
+ sofree(so);
+ return error;
+ }
+ return (0);
}
int
soaccept(so, nam)
register struct socket *so;
struct sockaddr **nam;
-{ int s = splnet();
+{
+ int s = splnet();
int error;
struct kextcb *kp;
panic("soaccept: !NOFDREF");
so->so_state &= ~SS_NOFDREF;
error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
- if (error == 0)
- { kp = sotokextcb(so);
+ if (error == 0) {
+ kp = sotokextcb(so);
while (kp) {
- if (kp->e_soif && kp->e_soif->sf_soaccept)
- { error = (*kp->e_soif->sf_soaccept)(so, nam, kp);
- if (error)
- { if (error == EJUSTRETURN)
+ if (kp->e_soif && kp->e_soif->sf_soaccept) {
+ error = (*kp->e_soif->sf_soaccept)(so, nam, kp);
+ if (error) {
+ if (error == EJUSTRETURN) {
+ error = 0;
break;
+ }
splx(s);
return(error);
}
(error = sodisconnect(so))))
error = EISCONN;
else {
+ /*
+ * Run connect filter before calling protocol:
+ * - non-blocking connect returns before completion;
+ * - allows filters to modify address.
+ */
+ kp = sotokextcb(so);
+ while (kp) {
+ if (kp->e_soif && kp->e_soif->sf_soconnect) {
+ error = (*kp->e_soif->sf_soconnect)(so, nam, kp);
+ if (error) {
+ if (error == EJUSTRETURN) {
+ error = 0;
+ }
+ splx(s);
+ return(error);
+ }
+ }
+ kp = kp->e_next;
+ }
error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p);
- if (error == 0)
- {
- kp = sotokextcb(so);
- while (kp)
- {
- if (kp->e_soif && kp->e_soif->sf_soconnect)
- { error = (*kp->e_soif->sf_soconnect)(so, nam, kp);
- if (error)
- { if (error == EJUSTRETURN)
- break;
- splx(s);
- return(error);
- }
- }
- kp = kp->e_next;
- }
- }
}
-
splx(s);
return (error);
}
struct kextcb *kp;
error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
- if (error == 0)
- { kp = sotokextcb(so1);
- while (kp)
- { if (kp->e_soif && kp->e_soif->sf_soconnect2)
- { error = (*kp->e_soif->sf_soconnect2)(so1, so2, kp);
- if (error)
- { if (error == EJUSTRETURN)
+ if (error == 0) {
+ kp = sotokextcb(so1);
+ while (kp) {
+ if (kp->e_soif && kp->e_soif->sf_soconnect2) {
+ error = (*kp->e_soif->sf_soconnect2)(so1, so2, kp);
+ if (error) {
+ if (error == EJUSTRETURN) {
+ return 0;
break;
+ }
splx(s);
return(error);
}
goto bad;
}
error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
-
- if (error == 0)
- { kp = sotokextcb(so);
- while (kp)
- { if (kp->e_soif && kp->e_soif->sf_sodisconnect)
- { error = (*kp->e_soif->sf_sodisconnect)(so, kp);
- if (error)
- { if (error == EJUSTRETURN)
+ if (error == 0) {
+ kp = sotokextcb(so);
+ while (kp) {
+ if (kp->e_soif && kp->e_soif->sf_sodisconnect) {
+ error = (*kp->e_soif->sf_sodisconnect)(so, kp);
+ if (error) {
+ if (error == EJUSTRETURN) {
+ error = 0;
break;
+ }
splx(s);
return(error);
}
if ((atomic && resid > so->so_snd.sb_hiwat) ||
clen > so->so_snd.sb_hiwat)
snderr(EMSGSIZE);
- if (space < resid + clen && uio &&
+ if (space < resid + clen &&
(atomic || space < so->so_snd.sb_lowat || space < clen)) {
if (so->so_state & SS_NBIO)
snderr(EWOULDBLOCK);
do {
if (bytes_to_copy >= MINCLSIZE) {
+ /*
+ * try to maintain a local cache of mbuf clusters needed to complete this write
+ * the list is further limited to the number that are currently needed to fill the socket
+ * this mechanism allows a large number of mbufs/clusters to be grabbed under a single
+ * mbuf lock... if we can't get any clusters, than fall back to trying for mbufs
+ * if we fail early (or miscalcluate the number needed) make sure to release any clusters
+ * we haven't yet consumed.
+ */
if ((m = freelist) == NULL) {
int num_needed;
int hdrs_needed = 0;
if (dontroute)
so->so_options |= SO_DONTROUTE;
s = splnet(); /* XXX */
- kp = sotokextcb(so);
/* Compute flags here, for pru_send and NKEs */
sendflags = (flags & MSG_OOB) ? PRUS_OOB :
/*
PRUS_EOF :
/* If there is more to send set PRUS_MORETOCOME */
(resid > 0 && space > 0) ? PRUS_MORETOCOME : 0;
+ kp = sotokextcb(so);
while (kp)
- { if (kp->e_soif && kp->e_soif->sf_sosend)
- { error = (*kp->e_soif->sf_sosend)(so, &addr,
+ { if (kp->e_soif && kp->e_soif->sf_sosend) {
+ error = (*kp->e_soif->sf_sosend)(so, &addr,
&uio, &top,
&control,
&sendflags,
kp);
- if (error)
- { splx(s);
- if (error == EJUSTRETURN)
- { sbunlock(&so->so_snd);
+ if (error) {
+ splx(s);
+ if (error == EJUSTRETURN) {
+ sbunlock(&so->so_snd);
if (freelist)
m_freem_list(freelist);
error = (*so->so_proto->pr_usrreqs->pru_send)(so,
sendflags, top, addr, control, p);
splx(s);
+#ifdef __APPLE__
if (flags & MSG_SEND)
so->so_temp = NULL;
-
+#endif
if (dontroute)
so->so_options &= ~SO_DONTROUTE;
clen = 0;
struct mbuf **controlp;
int *flagsp;
{
- register struct mbuf *m, **mp;
- register struct mbuf *free_list, *ml;
+ register struct mbuf *m, **mp, *ml;
register int flags, len, error, s, offset;
struct protosw *pr = so->so_proto;
struct mbuf *nextrecord;
int moff, type = 0;
int orig_resid = uio->uio_resid;
struct kextcb *kp;
-
+ volatile struct mbuf *free_list;
+ volatile int delayed_copy_len;
+ int can_delay;
+ int need_event;
+ struct proc *p = current_proc();
+
+
KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_START,
so,
uio->uio_resid,
so->so_rcv.sb_hiwat);
kp = sotokextcb(so);
- while (kp)
- { if (kp->e_soif && kp->e_soif->sf_soreceive)
- { error = (*kp->e_soif->sf_soreceive)(so, psa, &uio,
+ while (kp) {
+ if (kp->e_soif && kp->e_soif->sf_soreceive) {
+ error = (*kp->e_soif->sf_soreceive)(so, psa, &uio,
mp0, controlp,
flagsp, kp);
- if (error)
+ if (error) {
+ KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
return((error == EJUSTRETURN) ? 0 : error);
+ }
}
kp = kp->e_next;
}
(so->so_options & SO_OOBINLINE) == 0 &&
(so->so_oobmark || (so->so_state & SS_RCVATMARK)))) {
m = m_get(M_WAIT, MT_DATA);
+ if (m == NULL) {
+ KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, ENOBUFS,0,0,0,0);
+ return (ENOBUFS);
+ }
error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
if (error)
goto bad;
bad:
if (m)
m_freem(m);
- if ((so->so_options & SO_WANTOOBFLAG) != 0) {
- if (error == EWOULDBLOCK || error == EINVAL) {
- /*
- * Let's try to get normal data:
- * EWOULDBLOCK: out-of-band data not receive yet;
- * EINVAL: out-of-band data already read.
- */
- error = 0;
- goto nooob;
- } else if (error == 0 && flagsp)
- *flagsp |= MSG_OOB;
- }
+#ifdef __APPLE__
+ if ((so->so_options & SO_WANTOOBFLAG) != 0) {
+ if (error == EWOULDBLOCK || error == EINVAL) {
+ /*
+ * Let's try to get normal data:
+ * EWOULDBLOCK: out-of-band data not receive yet;
+ * EINVAL: out-of-band data already read.
+ */
+ error = 0;
+ goto nooob;
+ } else if (error == 0 && flagsp)
+ *flagsp |= MSG_OOB;
+ }
KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
+#endif
return (error);
}
nooob:
if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
(*pr->pr_usrreqs->pru_rcvd)(so, 0);
+
+ free_list = (struct mbuf *)0;
+ delayed_copy_len = 0;
restart:
- if (error = sblock(&so->so_rcv, SBLOCKWAIT(flags)))
- {
+ error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
+ if (error) {
KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
return (error);
}
*/
if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
so->so_rcv.sb_cc < uio->uio_resid) &&
- (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
+ (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
+
KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1"));
if (so->so_error) {
if (m)
sbunlock(&so->so_rcv);
if (socket_debug)
printf("Waiting for socket data\n");
+
error = sbwait(&so->so_rcv);
if (socket_debug)
printf("SORECEIVE - sbwait returned %d\n", error);
splx(s);
- if (error)
- {
+ if (error) {
KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
return (error);
}
goto restart;
}
dontblock:
-#ifdef notyet /* XXXX */
+#ifndef __APPLE__
if (uio->uio_procp)
uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
-#endif
+#else /* __APPLE__ */
+ /*
+ * 2207985
+ * This should be uio->uio-procp; however, some callers of this
+ * function use auto variables with stack garbage, and fail to
+ * fill out the uio structure properly.
+ */
+ if (p)
+ p->p_stats->p_ru.ru_msgrcv++;
+#endif /* __APPLE__ */
nextrecord = m->m_nextpkt;
if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) {
KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
orig_resid = 0;
- if (psa)
+ if (psa) {
*psa = dup_sockaddr(mtod(m, struct sockaddr *),
mp0 == 0);
+ if ((*psa == 0) && (flags & MSG_NEEDSA)) {
+ error = EWOULDBLOCK;
+ goto release;
+ }
+ }
if (flags & MSG_PEEK) {
m = m->m_next;
} else {
moff = 0;
offset = 0;
- free_list = m;
- ml = (struct mbuf *)0;
+ if (!(flags & MSG_PEEK) && uio->uio_resid > sorecvmincopy)
+ can_delay = 1;
+ else
+ can_delay = 0;
+
+ need_event = 0;
- while (m && uio->uio_resid > 0 && error == 0) {
+
+ while (m && (uio->uio_resid - delayed_copy_len) > 0 && error == 0) {
if (m->m_type == MT_OOBDATA) {
if (type != MT_OOBDATA)
break;
} else if (type == MT_OOBDATA)
break;
-#if 0
+#ifndef __APPLE__
/*
* This assertion needs rework. The trouble is Appletalk is uses many
* mbuf types (NOT listed in mbuf.h!) which will trigger this panic.
else
KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER,
("receive 3"));
-#endif
- /*
- * Make sure to allways set MSG_OOB event when getting
- * out of band data inline.
- */
+#else
+ /*
+ * Make sure to allways set MSG_OOB event when getting
+ * out of band data inline.
+ */
if ((so->so_options & SO_WANTOOBFLAG) != 0 &&
- (so->so_options & SO_OOBINLINE) != 0 &&
- (so->so_state & SS_RCVATMARK) != 0) {
- flags |= MSG_OOB;
- }
+ (so->so_options & SO_OOBINLINE) != 0 &&
+ (so->so_state & SS_RCVATMARK) != 0) {
+ flags |= MSG_OOB;
+ }
+#endif
so->so_state &= ~SS_RCVATMARK;
- len = uio->uio_resid;
+ len = uio->uio_resid - delayed_copy_len;
if (so->so_oobmark && len > so->so_oobmark - offset)
len = so->so_oobmark - offset;
if (len > m->m_len - moff)
* block interrupts again.
*/
if (mp == 0) {
- splx(s);
- error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
- s = splnet();
- if (error)
- goto release;
+ if (can_delay && len == m->m_len) {
+ /*
+ * only delay the copy if we're consuming the
+ * mbuf and we're NOT in MSG_PEEK mode
+ * and we have enough data to make it worthwile
+ * to drop and retake the funnel... can_delay
+ * reflects the state of the 2 latter constraints
+ * moff should always be zero in these cases
+ */
+ delayed_copy_len += len;
+ } else {
+ splx(s);
+
+ if (delayed_copy_len) {
+ error = sodelayed_copy(uio, &free_list, &delayed_copy_len);
+
+ if (error) {
+ s = splnet();
+ goto release;
+ }
+ if (m != so->so_rcv.sb_mb) {
+ /*
+ * can only get here if MSG_PEEK is not set
+ * therefore, m should point at the head of the rcv queue...
+ * if it doesn't, it means something drastically changed
+ * while we were out from behind the funnel in sodelayed_copy...
+ * perhaps a RST on the stream... in any event, the stream has
+ * been interrupted... it's probably best just to return
+ * whatever data we've moved and let the caller sort it out...
+ */
+ break;
+ }
+ }
+ error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
+
+ s = splnet();
+ if (error)
+ goto release;
+ }
} else
uio->uio_resid -= len;
+
if (len == m->m_len - moff) {
if (m->m_flags & M_EOR)
flags |= MSG_EOR;
} else {
nextrecord = m->m_nextpkt;
sbfree(&so->so_rcv, m);
+
if (mp) {
*mp = m;
mp = &m->m_next;
*mp = (struct mbuf *)0;
} else {
m->m_nextpkt = 0;
- if (ml != 0)
+ if (free_list == NULL)
+ free_list = m;
+ else
ml->m_next = m;
ml = m;
so->so_rcv.sb_mb = m = m->m_next;
so->so_oobmark -= len;
if (so->so_oobmark == 0) {
so->so_state |= SS_RCVATMARK;
- postevent(so, 0, EV_OOB);
+ /*
+ * delay posting the actual event until after
+ * any delayed copy processing has finished
+ */
+ need_event = 1;
break;
}
} else {
if (flags & MSG_EOR)
break;
/*
- * If the MSG_WAITALL flag is set (for non-atomic socket),
+ * If the MSG_WAITALL or MSG_WAITSTREAM flag is set (for non-atomic socket),
* we must not quit until "uio->uio_resid == 0" or an error
* termination. If a signal/timeout occurs, return
* with a short count but without error.
* Keep sockbuf locked against other readers.
*/
- while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
+ while (flags & (MSG_WAITALL|MSG_WAITSTREAM) && m == 0 && (uio->uio_resid - delayed_copy_len) > 0 &&
!sosendallatonce(so) && !nextrecord) {
if (so->so_error || so->so_state & SS_CANTRCVMORE)
- break;
+ goto release;
- if (ml) {
- m_freem_list(free_list);
+ if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
+ (*pr->pr_usrreqs->pru_rcvd)(so, flags);
+ if (sbwait(&so->so_rcv)) {
+ error = 0;
+ goto release;
}
- error = sbwait(&so->so_rcv);
- if (error) {
- sbunlock(&so->so_rcv);
- splx(s);
- KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, 0,0,0,0,0);
- return (0);
+ /*
+ * have to wait until after we get back from the sbwait to do the copy because
+ * we will drop the funnel if we have enough data that has been delayed... by dropping
+ * the funnel we open up a window allowing the netisr thread to process the incoming packets
+ * and to change the state of this socket... we're issuing the sbwait because
+ * the socket is empty and we're expecting the netisr thread to wake us up when more
+ * packets arrive... if we allow that processing to happen and then sbwait, we
+ * could stall forever with packets sitting in the socket if no further packets
+ * arrive from the remote side.
+ *
+ * we want to copy before we've collected all the data to satisfy this request to
+ * allow the copy to overlap the incoming packet processing on an MP system
+ */
+ if (delayed_copy_len > sorecvmincopy && (delayed_copy_len > (so->so_rcv.sb_hiwat / 2))) {
+
+ error = sodelayed_copy(uio, &free_list, &delayed_copy_len);
+
+ if (error)
+ goto release;
}
m = so->so_rcv.sb_mb;
if (m) {
nextrecord = m->m_nextpkt;
- free_list = m;
}
- ml = (struct mbuf *)0;
}
}
- if (ml) {
- m_freem_list(free_list);
- }
if (m && pr->pr_flags & PR_ATOMIC) {
+#ifdef __APPLE__
if (so->so_options & SO_DONTTRUNC)
flags |= MSG_RCVMORE;
- else
- { flags |= MSG_TRUNC;
+ else {
+#endif
+ flags |= MSG_TRUNC;
if ((flags & MSG_PEEK) == 0)
(void) sbdroprecord(&so->so_rcv);
+#ifdef __APPLE__
}
+#endif
}
if ((flags & MSG_PEEK) == 0) {
if (m == 0)
if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
(*pr->pr_usrreqs->pru_rcvd)(so, flags);
}
+#ifdef __APPLE__
if ((so->so_options & SO_WANTMORE) && so->so_rcv.sb_cc > 0)
flags |= MSG_HAVEMORE;
+
+ if (delayed_copy_len) {
+ error = sodelayed_copy(uio, &free_list, &delayed_copy_len);
+
+ if (error)
+ goto release;
+ }
+ if (free_list) {
+ m_freem_list((struct mbuf *)free_list);
+ free_list = (struct mbuf *)0;
+ }
+ if (need_event)
+ postevent(so, 0, EV_OOB);
+#endif
if (orig_resid == uio->uio_resid && orig_resid &&
(flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
sbunlock(&so->so_rcv);
if (flagsp)
*flagsp |= flags;
release:
+ if (delayed_copy_len) {
+ error = sodelayed_copy(uio, &free_list, &delayed_copy_len);
+ }
+ if (free_list) {
+ m_freem_list((struct mbuf *)free_list);
+ }
sbunlock(&so->so_rcv);
splx(s);
return (error);
}
+
+int sodelayed_copy(struct uio *uio, struct mbuf **free_list, int *resid)
+{
+ int error = 0;
+ boolean_t dropped_funnel = FALSE;
+ struct mbuf *m;
+
+ m = *free_list;
+
+ if (*resid >= sorecvmincopy) {
+ dropped_funnel = TRUE;
+
+ (void)thread_funnel_set(network_flock, FALSE);
+ }
+ while (m && error == 0) {
+
+ error = uiomove(mtod(m, caddr_t), (int)m->m_len, uio);
+
+ m = m->m_next;
+ }
+ m_freem_list(*free_list);
+
+ *free_list = (struct mbuf *)NULL;
+ *resid = 0;
+
+ if (dropped_funnel == TRUE)
+ (void)thread_funnel_set(network_flock, TRUE);
+
+ return (error);
+}
+
+
int
soshutdown(so, how)
register struct socket *so;
KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_START, 0,0,0,0,0);
kp = sotokextcb(so);
- while (kp)
- { if (kp->e_soif && kp->e_soif->sf_soshutdown)
- { ret = (*kp->e_soif->sf_soshutdown)(so, how, kp);
- if (ret)
+ while (kp) {
+ if (kp->e_soif && kp->e_soif->sf_soshutdown) {
+ ret = (*kp->e_soif->sf_soshutdown)(so, how, kp);
+ if (ret) {
+ KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0);
return((ret == EJUSTRETURN) ? 0 : ret);
+ }
}
kp = kp->e_next;
}
- how++;
- if (how & FREAD) {
+ if (how != SHUT_WR) {
sorflush(so);
postevent(so, 0, EV_RCLOSED);
}
- if (how & FWRITE) {
+ if (how != SHUT_RD) {
ret = ((*pr->pr_usrreqs->pru_shutdown)(so));
postevent(so, 0, EV_WCLOSED);
KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0);
struct kextcb *kp;
kp = sotokextcb(so);
- while (kp)
- { if (kp->e_soif && kp->e_soif->sf_sorflush)
- { if ((*kp->e_soif->sf_sorflush)(so, kp))
+ while (kp) {
+ if (kp->e_soif && kp->e_soif->sf_sorflush) {
+ if ((*kp->e_soif->sf_sorflush)(so, kp))
return;
}
kp = kp->e_next;
s = splimp();
socantrcvmore(so);
sbunlock(sb);
+#ifdef __APPLE__
selthreadclear(&sb->sb_sel);
+#endif
asb = *sb;
bzero((caddr_t)sb, sizeof (*sb));
+ if (asb.sb_flags & SB_KNOTE) {
+ sb->sb_sel.si_note = asb.sb_sel.si_note;
+ sb->sb_flags = SB_KNOTE;
+ }
splx(s);
if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
(*pr->pr_domain->dom_dispose)(asb.sb_mb);
+
sbrelease(&asb);
}
short val;
struct kextcb *kp;
+ if (sopt->sopt_dir != SOPT_SET) {
+ sopt->sopt_dir = SOPT_SET;
+ }
+
kp = sotokextcb(so);
- while (kp)
- { if (kp->e_soif && kp->e_soif->sf_socontrol)
- { error = (*kp->e_soif->sf_socontrol)(so, sopt, kp);
+ while (kp) {
+ if (kp->e_soif && kp->e_soif->sf_socontrol) {
+ error = (*kp->e_soif->sf_socontrol)(so, sopt, kp);
if (error)
return((error == EJUSTRETURN) ? 0 : error);
}
case SO_REUSEPORT:
case SO_OOBINLINE:
case SO_TIMESTAMP:
+#ifdef __APPLE__
case SO_DONTTRUNC:
case SO_WANTMORE:
- case SO_WANTOOBFLAG:
+ case SO_WANTOOBFLAG:
+#endif
error = sooptcopyin(sopt, &optval, sizeof optval,
sizeof optval);
if (error)
if (error)
goto bad;
- if (tv.tv_sec > SHRT_MAX / hz - hz) {
+ /* assert(hz > 0); */
+ if (tv.tv_sec < 0 || tv.tv_sec > SHRT_MAX / hz ||
+ tv.tv_usec < 0 || tv.tv_usec >= 1000000) {
error = EDOM;
goto bad;
}
- val = tv.tv_sec * hz + tv.tv_usec / tick;
+ /* assert(tick > 0); */
+ /* assert(ULONG_MAX - SHRT_MAX >= 1000000); */
+ {
+ long tmp = (u_long)(tv.tv_sec * hz) + tv.tv_usec / tick;
+ if (tmp > SHRT_MAX) {
+ error = EDOM;
+ goto bad;
+ }
+ val = tmp;
+ }
switch (sopt->sopt_name) {
case SO_SNDTIMEO:
break;
case SO_NKE:
- { struct so_nke nke;
+ {
+ struct so_nke nke;
struct NFDescriptor *nf1, *nf2 = NULL;
- error = sooptcopyin(sopt, &nke,
- sizeof nke, sizeof nke);
+ error = sooptcopyin(sopt, &nke,
+ sizeof nke, sizeof nke);
if (error)
goto bad;
break;
}
+ case SO_NOSIGPIPE:
+ error = sooptcopyin(sopt, &optval, sizeof optval,
+ sizeof optval);
+ if (error)
+ goto bad;
+ if (optval)
+ so->so_flags |= SOF_NOSIGPIPE;
+ else
+ so->so_flags &= ~SOF_NOSIGPIPE;
+
+ break;
+
+ case SO_NOADDRERR:
+ error = sooptcopyin(sopt, &optval, sizeof optval,
+ sizeof optval);
+ if (error)
+ goto bad;
+ if (optval)
+ so->so_flags |= SOF_NOADDRAVAIL;
+ else
+ so->so_flags &= ~SOF_NOADDRAVAIL;
+
+ break;
+
default:
error = ENOPROTOOPT;
break;
struct mbuf *m;
struct kextcb *kp;
+ if (sopt->sopt_dir != SOPT_GET) {
+ sopt->sopt_dir = SOPT_GET;
+ }
+
kp = sotokextcb(so);
- while (kp)
- { if (kp->e_soif && kp->e_soif->sf_socontrol)
- { error = (*kp->e_soif->sf_socontrol)(so, sopt, kp);
+ while (kp) {
+ if (kp->e_soif && kp->e_soif->sf_socontrol) {
+ error = (*kp->e_soif->sf_socontrol)(so, sopt, kp);
if (error)
return((error == EJUSTRETURN) ? 0 : error);
}
case SO_BROADCAST:
case SO_OOBINLINE:
case SO_TIMESTAMP:
+#ifdef __APPLE__
case SO_DONTTRUNC:
case SO_WANTMORE:
- case SO_WANTOOBFLAG:
+ case SO_WANTOOBFLAG:
+#endif
optval = so->so_options & sopt->sopt_name;
integer:
error = sooptcopyout(sopt, &optval, sizeof optval);
optval = so->so_type;
goto integer;
+#ifdef __APPLE__
case SO_NREAD:
- { int pkt_total;
+ {
+ int pkt_total;
struct mbuf *m1;
pkt_total = 0;
#if 0
kprintf("SKT CC: %d\n", so->so_rcv.sb_cc);
#endif
- while (m1)
- { if (m1->m_type == MT_DATA)
+ while (m1) {
+ if (m1->m_type == MT_DATA)
pkt_total += m1->m_len;
#if 0
kprintf("CNT: %d/%d\n", m1->m_len, pkt_total);
#endif
goto integer;
}
+#endif
case SO_ERROR:
optval = so->so_error;
so->so_error = 0;
error = sooptcopyout(sopt, &tv, sizeof tv);
break;
+ case SO_NOSIGPIPE:
+ optval = (so->so_flags & SOF_NOSIGPIPE);
+ goto integer;
+
+ case SO_NOADDRERR:
+ optval = (so->so_flags & SOF_NOADDRAVAIL);
+ goto integer;
+
default:
error = ENOPROTOOPT;
break;
}
}
-void
-sohasoutofband(so)
- register struct socket *so;
-{
- struct proc *p;
-
- struct kextcb *kp;
-
- kp = sotokextcb(so);
- while (kp)
- { if (kp->e_soif && kp->e_soif->sf_sohasoutofband)
- { if ((*kp->e_soif->sf_sohasoutofband)(so, kp))
- return;
- }
- kp = kp->e_next;
- }
- if (so->so_pgid < 0)
- gsignal(-so->so_pgid, SIGURG);
- else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
- psignal(p, SIGURG);
- selwakeup(&so->so_rcv.sb_sel);
-}
-
+#ifdef __APPLE__
/*
* Network filter support
*/
return(0);
}
-
/*
* Run the list of filters, freeing extension control blocks
* Assumes the soif/soutil blocks have been handled.
}
return(0);
}
+#endif __APPLE__
-
-int
-sopoll(struct socket *so, int events, struct ucred *cred, void * wql)
-{
- struct proc *p = current_proc();
- int revents = 0;
- int s = splnet();
-
- if (events & (POLLIN | POLLRDNORM))
- if (soreadable(so))
- revents |= events & (POLLIN | POLLRDNORM);
-
- if (events & (POLLOUT | POLLWRNORM))
- if (sowriteable(so))
- revents |= events & (POLLOUT | POLLWRNORM);
-
- if (events & (POLLPRI | POLLRDBAND))
- if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
- revents |= events & (POLLPRI | POLLRDBAND);
-
- if (revents == 0) {
- if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
- so->so_rcv.sb_flags |= SB_SEL;
- selrecord(p, &so->so_rcv.sb_sel, wql);
- }
-
- if (events & (POLLOUT | POLLWRNORM)) {
- so->so_snd.sb_flags |= SB_SEL;
- selrecord(p, &so->so_snd.sb_sel, wql);
- }
- }
-
- splx(s);
- return (revents);
-}
-
-/*#### IPv6 Integration. Added new routines */
+/* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */
int
-sooptgetm(struct sockopt *sopt, struct mbuf **mp)
+soopt_getm(struct sockopt *sopt, struct mbuf **mp)
{
struct mbuf *m, *m_prev;
int sopt_size = sopt->sopt_valsize;
/* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */
int
-sooptmcopyin(struct sockopt *sopt, struct mbuf *m)
+soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
{
struct mbuf *m0 = m;
m = m->m_next;
}
if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */
- panic("sooptmcopyin");
+ panic("soopt_mcopyin");
return 0;
}
/* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */
int
-sooptmcopyout(struct sockopt *sopt, struct mbuf *m)
+soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
{
struct mbuf *m0 = m;
size_t valsize = 0;
return 0;
}
+void
+sohasoutofband(so)
+ register struct socket *so;
+{
+ struct proc *p;
+ struct kextcb *kp;
+
+ kp = sotokextcb(so);
+ while (kp) {
+ if (kp->e_soif && kp->e_soif->sf_sohasoutofband) {
+ if ((*kp->e_soif->sf_sohasoutofband)(so, kp))
+ return;
+ }
+ kp = kp->e_next;
+ }
+ if (so->so_pgid < 0)
+ gsignal(-so->so_pgid, SIGURG);
+ else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
+ psignal(p, SIGURG);
+ selwakeup(&so->so_rcv.sb_sel);
+}
+
+int
+sopoll(struct socket *so, int events, struct ucred *cred, void * wql)
+{
+ struct proc *p = current_proc();
+ int revents = 0;
+ int s = splnet();
+
+ if (events & (POLLIN | POLLRDNORM))
+ if (soreadable(so))
+ revents |= events & (POLLIN | POLLRDNORM);
+
+ if (events & (POLLOUT | POLLWRNORM))
+ if (sowriteable(so))
+ revents |= events & (POLLOUT | POLLWRNORM);
+
+ if (events & (POLLPRI | POLLRDBAND))
+ if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
+ revents |= events & (POLLPRI | POLLRDBAND);
+
+ if (revents == 0) {
+ if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
+ /* Darwin sets the flag first, BSD calls selrecord first */
+ so->so_rcv.sb_flags |= SB_SEL;
+ selrecord(p, &so->so_rcv.sb_sel, wql);
+ }
+
+ if (events & (POLLOUT | POLLWRNORM)) {
+ /* Darwin sets the flag first, BSD calls selrecord first */
+ so->so_snd.sb_flags |= SB_SEL;
+ selrecord(p, &so->so_snd.sb_sel, wql);
+ }
+ }
+
+ splx(s);
+ return (revents);
+}
+
+
+int
+soo_kqfilter(struct file *fp, struct knote *kn, struct proc *p)
+{
+ struct socket *so = (struct socket *)kn->kn_fp->f_data;
+ struct sockbuf *sb;
+ int s;
+
+ switch (kn->kn_filter) {
+ case EVFILT_READ:
+ if (so->so_options & SO_ACCEPTCONN)
+ kn->kn_fop = &solisten_filtops;
+ else
+ kn->kn_fop = &soread_filtops;
+ sb = &so->so_rcv;
+ break;
+ case EVFILT_WRITE:
+ kn->kn_fop = &sowrite_filtops;
+ sb = &so->so_snd;
+ break;
+ default:
+ return (1);
+ }
+
+ if (sb->sb_sel.si_flags & SI_INITED)
+ return (1);
+
+ s = splnet();
+ if (KNOTE_ATTACH(&sb->sb_sel.si_note, kn))
+ sb->sb_flags |= SB_KNOTE;
+ splx(s);
+ return (0);
+}
+
+static void
+filt_sordetach(struct knote *kn)
+{
+ struct socket *so = (struct socket *)kn->kn_fp->f_data;
+ int s = splnet();
+
+ if (so->so_rcv.sb_flags & SB_KNOTE &&
+ !(so->so_rcv.sb_sel.si_flags & SI_INITED))
+ if (KNOTE_DETACH(&so->so_rcv.sb_sel.si_note, kn))
+ so->so_rcv.sb_flags &= ~SB_KNOTE;
+ splx(s);
+}
+
+/*ARGSUSED*/
+static int
+filt_soread(struct knote *kn, long hint)
+{
+ struct socket *so = (struct socket *)kn->kn_fp->f_data;
+
+ kn->kn_data = so->so_rcv.sb_cc;
+ if (so->so_state & SS_CANTRCVMORE) {
+ kn->kn_flags |= EV_EOF;
+ kn->kn_fflags = so->so_error;
+ return (1);
+ }
+ if (so->so_error) /* temporary udp error */
+ return (1);
+ if (kn->kn_sfflags & NOTE_LOWAT)
+ return (kn->kn_data >= kn->kn_sdata);
+ return (kn->kn_data >= so->so_rcv.sb_lowat);
+}
+
+static void
+filt_sowdetach(struct knote *kn)
+{
+ struct socket *so = (struct socket *)kn->kn_fp->f_data;
+ int s = splnet();
+
+ if(so->so_snd.sb_flags & SB_KNOTE &&
+ !(so->so_snd.sb_sel.si_flags & SI_INITED))
+ if (KNOTE_DETACH(&so->so_snd.sb_sel.si_note, kn))
+ so->so_snd.sb_flags &= ~SB_KNOTE;
+ splx(s);
+}
+
+/*ARGSUSED*/
+static int
+filt_sowrite(struct knote *kn, long hint)
+{
+ struct socket *so = (struct socket *)kn->kn_fp->f_data;
+
+ kn->kn_data = sbspace(&so->so_snd);
+ if (so->so_state & SS_CANTSENDMORE) {
+ kn->kn_flags |= EV_EOF;
+ kn->kn_fflags = so->so_error;
+ return (1);
+ }
+ if (so->so_error) /* temporary udp error */
+ return (1);
+ if (((so->so_state & SS_ISCONNECTED) == 0) &&
+ (so->so_proto->pr_flags & PR_CONNREQUIRED))
+ return (0);
+ if (kn->kn_sfflags & NOTE_LOWAT)
+ return (kn->kn_data >= kn->kn_sdata);
+ return (kn->kn_data >= so->so_snd.sb_lowat);
+}
+
+/*ARGSUSED*/
+static int
+filt_solisten(struct knote *kn, long hint)
+{
+ struct socket *so = (struct socket *)kn->kn_fp->f_data;
+
+ kn->kn_data = so->so_qlen;
+ return (! TAILQ_EMPTY(&so->so_comp));
+}
+