/*
- * Copyright (c) 1998-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 1998-2012 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <sys/ev.h>
#include <sys/kdebug.h>
#include <sys/un.h>
+#include <sys/user.h>
+#include <sys/priv.h>
#include <net/route.h>
+#include <net/ntstat.h>
#include <netinet/in.h>
#include <netinet/in_pcb.h>
+#include <netinet/ip6.h>
+#include <netinet6/ip6_var.h>
#include <kern/zalloc.h>
#include <kern/locks.h>
#include <machine/limits.h>
#include <libkern/OSAtomic.h>
#include <pexpert/pexpert.h>
+#include <kern/assert.h>
+#include <kern/task.h>
+#include <sys/kpi_mbuf.h>
+#include <sys/mcache.h>
#if CONFIG_MACF
#include <security/mac.h>
#include <security/mac_framework.h>
#endif /* MAC */
-/* how a timeval looks to a 64-bit process */
-struct timeval64 {
- int64_t tv_sec;
- int32_t tv_usec;
-};
int so_cache_hw = 0;
int so_cache_timeouts = 0;
int so_cache_max_freed = 0;
int cached_sock_count = 0;
+__private_extern__ int max_cached_sock_count = MAX_CACHED_SOCKETS;
struct socket *socket_cache_head = 0;
struct socket *socket_cache_tail = 0;
-u_long so_cache_time = 0;
+u_int32_t so_cache_time = 0;
int so_cache_init_done = 0;
struct zone *so_cache_zone;
static int filt_soread(struct knote *kn, long hint);
static void filt_sowdetach(struct knote *kn);
static int filt_sowrite(struct knote *kn, long hint);
-static int filt_solisten(struct knote *kn, long hint);
+static void filt_sockdetach(struct knote *kn);
+static int filt_sockev(struct knote *kn, long hint);
static int
sooptcopyin_timeval(struct sockopt *sopt, struct timeval * tv_p);
static int
sooptcopyout_timeval(struct sockopt *sopt, const struct timeval * tv_p);
-static struct filterops solisten_filtops =
- { 1, NULL, filt_sordetach, filt_solisten };
-static struct filterops soread_filtops =
- { 1, NULL, filt_sordetach, filt_soread };
-static struct filterops sowrite_filtops =
- { 1, NULL, filt_sowdetach, filt_sowrite };
+static struct filterops soread_filtops = {
+ .f_isfd = 1,
+ .f_detach = filt_sordetach,
+ .f_event = filt_soread,
+};
+static struct filterops sowrite_filtops = {
+ .f_isfd = 1,
+ .f_detach = filt_sowdetach,
+ .f_event = filt_sowrite,
+};
+static struct filterops sock_filtops = {
+ .f_isfd = 1,
+ .f_detach = filt_sockdetach,
+ .f_event = filt_sockev,
+};
#define EVEN_MORE_LOCKING_DEBUG 0
int socket_debug = 0;
SYSCTL_DECL(_kern_ipc);
int somaxconn = SOMAXCONN;
-SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn, 0, "");
+SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW | CTLFLAG_LOCKED, &somaxconn, 0, "");
/* Should we get a maximum also ??? */
static int sosendmaxchain = 65536;
static int sosendminchain = 16384;
static int sorecvmincopy = 16384;
-SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain, CTLFLAG_RW, &sosendminchain,
+SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain, CTLFLAG_RW | CTLFLAG_LOCKED, &sosendminchain,
0, "");
-SYSCTL_INT(_kern_ipc, OID_AUTO, sorecvmincopy, CTLFLAG_RW, &sorecvmincopy,
+SYSCTL_INT(_kern_ipc, OID_AUTO, sorecvmincopy, CTLFLAG_RW | CTLFLAG_LOCKED, &sorecvmincopy,
0, "");
/*
* the socket is marked with SOF_MULTIPAGES; see below.
*/
int sosendjcl = 1;
-SYSCTL_INT(_kern_ipc, OID_AUTO, sosendjcl, CTLFLAG_RW, &sosendjcl, 0, "");
+SYSCTL_INT(_kern_ipc, OID_AUTO, sosendjcl, CTLFLAG_RW | CTLFLAG_LOCKED, &sosendjcl, 0, "");
/*
* Set this to ignore SOF_MULTIPAGES and use jumbo clusters for large
* capable. Set this to 1 only for testing/debugging purposes.
*/
int sosendjcl_ignore_capab = 0;
-SYSCTL_INT(_kern_ipc, OID_AUTO, sosendjcl_ignore_capab, CTLFLAG_RW,
+SYSCTL_INT(_kern_ipc, OID_AUTO, sosendjcl_ignore_capab, CTLFLAG_RW | CTLFLAG_LOCKED,
&sosendjcl_ignore_capab, 0, "");
+int sodefunctlog = 0;
+SYSCTL_INT(_kern_ipc, OID_AUTO, sodefunctlog, CTLFLAG_RW | CTLFLAG_LOCKED,
+ &sodefunctlog, 0, "");
+
+int sothrottlelog = 0;
+SYSCTL_INT(_kern_ipc, OID_AUTO, sothrottlelog, CTLFLAG_RW | CTLFLAG_LOCKED,
+ &sothrottlelog, 0, "");
+
/*
* Socket operation routines.
* These routines are called by the routines in
/* sys_generic.c */
extern void postevent(struct socket *, struct sockbuf *, int);
extern void evsofree(struct socket *);
+extern int tcp_notsent_lowat_check(struct socket *so);
/* TODO: these should be in header file */
extern int get_inpcb_str_size(void);
static void so_cache_timer(void *);
void soclose_wait_locked(struct socket *so);
+int so_isdstlocal(struct socket *so);
+/*
+ * SOTCDB_NO_DSCP is set by default, to prevent the networking stack from
+ * setting the DSCP code on the packet based on the service class; see
+ * <rdar://problem/11277343> for details.
+ */
+__private_extern__ u_int32_t sotcdb = SOTCDB_NO_DSCP;
+SYSCTL_INT(_kern_ipc, OID_AUTO, sotcdb, CTLFLAG_RW | CTLFLAG_LOCKED,
+ &sotcdb, 0, "");
void
socketinit(void)
get_inpcb_str_size() + 4 + get_tcp_str_size());
so_cache_zone = zinit(str_size, 120000*str_size, 8192, "socache zone");
+ zone_change(so_cache_zone, Z_CALLERACCT, FALSE);
+ zone_change(so_cache_zone, Z_NOENCRYPT, TRUE);
#if TEMPDEBUG
printf("cached_sock_alloc -- so_cache_zone size is %x\n", str_size);
#endif
so_cache_zone_element_size = str_size;
sflt_init();
+
+ _CASSERT(_SO_TC_MAX == SO_TC_STATS_MAX);
+
+ socket_tclass_init();
+
+ socket_flowadv_init();
}
static void
cached_sock_alloc(struct socket **so, int waitok)
{
caddr_t temp;
- register u_long offset;
+ register uintptr_t offset;
lck_mtx_lock(so_cache_mtx);
* Define offsets for extra structures into our single block of
* memory. Align extra structures on longword boundaries.
*/
- offset = (u_long) *so;
+
+ offset = (uintptr_t) *so;
offset += sizeof (struct socket);
- if (offset & 0x3) {
- offset += 4;
- offset &= 0xfffffffc;
- }
+
+ offset = ALIGN(offset);
+
(*so)->so_saved_pcb = (caddr_t)offset;
offset += get_inpcb_str_size();
- if (offset & 0x3) {
- offset += 4;
- offset &= 0xfffffffc;
- }
- ((struct inpcb *)(*so)->so_saved_pcb)->inp_saved_ppcb =
+ offset = ALIGN(offset);
+
+ ((struct inpcb *)(void *)(*so)->so_saved_pcb)->inp_saved_ppcb =
(caddr_t)offset;
#if TEMPDEBUG
kprintf("Allocating cached socket - %p, pcb=%p tcpcb=%p\n",
lck_mtx_lock(so_cache_mtx);
- if (++cached_sock_count > MAX_CACHED_SOCKETS) {
+ if (++cached_sock_count > max_cached_sock_count) {
--cached_sock_count;
lck_mtx_unlock(so_cache_mtx);
#if TEMPDEBUG
#endif
}
+static void
+so_update_last_owner_locked(
+ struct socket *so,
+ proc_t self)
+{
+ if (so->last_pid != 0)
+ {
+ if (self == NULL)
+ self = current_proc();
+
+ if (self)
+ {
+ so->last_upid = proc_uniqueid(self);
+ so->last_pid = proc_pid(self);
+ }
+ }
+}
+
static void
so_cache_timer(__unused void *dummy)
{
register struct protosw *prp;
register struct socket *so;
register int error = 0;
+
#if TCPDEBUG
extern int tcpconsdebug;
#endif
}
if (prp->pr_type != type)
return (EPROTOTYPE);
- so = soalloc(p != 0, dom, type);
+ so = soalloc(1, dom, type);
if (so == 0)
return (ENOBUFS);
TAILQ_INIT(&so->so_incomp);
TAILQ_INIT(&so->so_comp);
so->so_type = type;
+ so->last_upid = proc_uniqueid(p);
+ so->last_pid = proc_pid(p);
+
+ so->so_cred = kauth_cred_proc_ref(p);
+ if (!suser(kauth_cred_get(), NULL))
+ so->so_state = SS_PRIV;
- if (p != 0) {
- so->so_uid = kauth_cred_getuid(kauth_cred_get());
- if (!suser(kauth_cred_get(), NULL))
- so->so_state = SS_PRIV;
- }
so->so_proto = prp;
#ifdef __APPLE__
so->so_rcv.sb_flags |= SB_RECV; /* XXX */
so->so_options |= SO_DEBUG;
#endif
#endif
+ so_set_default_traffic_class(so);
+ /*
+ * If this is a background thread/task, mark the socket as such.
+ */
+ if (proc_get_self_isbackground() != 0) {
+ socket_set_traffic_mgt_flags(so, TRAFFIC_MGT_SO_BACKGROUND);
+ so->so_background_thread = current_thread();
+ }
+
+ switch (dom) {
+ /*
+ * Don't mark Unix domain or system sockets as eligible for defunct by default.
+ */
+ case PF_LOCAL:
+ case PF_SYSTEM:
+ so->so_flags |= SOF_NODEFUNCT;
+ break;
+ default:
+ break;
+ }
+
*aso = so;
return (0);
}
{
struct proc *p = current_proc();
int error = 0;
- struct socket_filter_entry *filter;
- int filtered = 0;
socket_lock(so, 1);
+ VERIFY(so->so_usecount > 1);
+ so_update_last_owner_locked(so, p);
/*
- * If this is a bind request on a previously-accepted socket
- * that has been marked as inactive, reject it now before
- * we go any further.
+ * If this is a bind request on a socket that has been marked
+ * as inactive, reject it now before we go any further.
*/
if (so->so_flags & SOF_DEFUNCT) {
error = EINVAL;
+ SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] (%d)\n",
+ __func__, proc_pid(p), so, INP_SOCKAF(so), INP_SOCKTYPE(so),
+ error));
goto out;
}
/* Socket filter */
- error = 0;
- for (filter = so->so_filt; filter && (error == 0);
- filter = filter->sfe_next_onsocket) {
- if (filter->sfe_filter->sf_filter.sf_bind) {
- if (filtered == 0) {
- filtered = 1;
- sflt_use(so);
- socket_unlock(so, 0);
- }
- error = filter->sfe_filter->sf_filter.
- sf_bind(filter->sfe_cookie, so, nam);
- }
- }
- if (filtered != 0) {
- socket_lock(so, 0);
- sflt_unuse(so);
- }
- /* End socket filter */
+ error = sflt_bind(so, nam);
if (error == 0)
error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
void
sodealloc(struct socket *so)
{
+ kauth_cred_unref(&so->so_cred);
+
+ /* Remove any filters */
+ sflt_termsock(so);
+
so->so_gencnt = ++so_gencnt;
#if CONFIG_MACF_SOCKET
{
struct proc *p = current_proc();
int error = 0;
- struct socket_filter_entry *filter;
- int filtered = 0;
socket_lock(so, 1);
+
if (so->so_proto == NULL) {
error = EINVAL;
goto out;
/*
* If the listen request is made on a socket that is not fully
- * disconnected, or on a previously-accepted socket that has
- * been marked as inactive, reject the request now.
+ * disconnected, or on a socket that has been marked as inactive,
+ * reject the request now.
*/
if ((so->so_state &
(SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING)) ||
(so->so_flags & SOF_DEFUNCT)) {
error = EINVAL;
+ if (so->so_flags & SOF_DEFUNCT) {
+ SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] (%d)\n",
+ __func__, proc_pid(p), so, INP_SOCKAF(so),
+ INP_SOCKTYPE(so), error));
+ }
goto out;
}
goto out;
}
- error = 0;
- for (filter = so->so_filt; filter && (error == 0);
- filter = filter->sfe_next_onsocket) {
- if (filter->sfe_filter->sf_filter.sf_listen) {
- if (filtered == 0) {
- filtered = 1;
- sflt_use(so);
- socket_unlock(so, 0);
- }
- error = filter->sfe_filter->sf_filter.
- sf_listen(filter->sfe_cookie, so);
- }
- }
- if (filtered != 0) {
- socket_lock(so, 0);
- sflt_unuse(so);
- }
+ error = sflt_listen(so);
if (error == 0) {
error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
/* Assume socket is locked */
- /* Remove any filters - may be called more than once */
- sflt_termsock(so);
-
if ((!(so->so_flags & SOF_PCBCLEARING)) ||
((so->so_state & SS_NOFDREF) == 0)) {
#ifdef __APPLE__
* Double check here and return if there's no outstanding upcall;
* otherwise proceed further only if SOF_UPCALLCLOSEWAIT is set.
*/
- if (!(so->so_flags & SOF_UPCALLINUSE) ||
- !(so->so_flags & SOF_UPCALLCLOSEWAIT))
+ if (!so->so_upcallusecount || !(so->so_flags & SOF_UPCALLCLOSEWAIT))
return;
-
+ so->so_rcv.sb_flags &= ~SB_UPCALL;
+ so->so_snd.sb_flags &= ~SB_UPCALL;
so->so_flags |= SOF_CLOSEWAIT;
(void) msleep((caddr_t)&so->so_upcall, mutex_held, (PZERO - 1),
"soclose_wait_locked", NULL);
if (so->so_usecount == 0)
panic("soclose: usecount is zero so=%p\n", so);
if (so->so_pcb && !(so->so_flags & SOF_PCBCLEARING)) {
+ /*
+ * Let NetworkStatistics know this PCB is going away
+ * before we detach it.
+ */
+ if (nstat_collect &&
+ (so->so_proto->pr_domain->dom_family == AF_INET ||
+ so->so_proto->pr_domain->dom_family == AF_INET6))
+ nstat_pcb_detach(so->so_pcb);
+
int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
if (error == 0)
error = error2;
if (so->so_pcb && so->so_state & SS_NOFDREF)
panic("soclose: NOFDREF");
so->so_state |= SS_NOFDREF;
+
+ if ((so->so_flags & SOF_KNOTE) != 0)
+ KNOTE(&so->so_klist, SO_FILT_HINT_LOCKED);
#ifdef __APPLE__
so->so_proto->pr_domain->dom_refs--;
evsofree(so);
int error = 0;
socket_lock(so, 1);
- if (so->so_flags & SOF_UPCALLINUSE)
+ if (so->so_upcallusecount)
soclose_wait_locked(so);
if (so->so_retaincnt == 0) {
soacceptfilter(struct socket *so)
{
struct sockaddr *local = NULL, *remote = NULL;
- struct socket_filter_entry *filter;
- int error = 0, filtered = 0;
+ int error = 0;
struct socket *head = so->so_head;
/*
- * There's no need to hold the lock; this socket
+ * Hold the lock even if this socket
* has not been made visible to the filter(s).
+ * For sockets with global locks, this protect against the
+ * head or peer going away
*/
- if ((sock_getaddr(so, &remote, 1) != 0) ||
- sock_getaddr(so, &local, 0) != 0) {
+ socket_lock(so, 1);
+ if (sogetaddr_locked(so, &remote, 1) != 0 ||
+ sogetaddr_locked(so, &local, 0) != 0) {
so->so_state &= ~(SS_NOFDREF | SS_COMP);
so->so_head = NULL;
+ socket_unlock(so, 1);
soclose(so);
/* Out of resources; try it again next time */
error = ECONNABORTED;
goto done;
}
- /*
- * At this point, we have a reference on the listening socket
- * so we know it won't be going away. Do the same for the newly
- * accepted socket while we invoke the accept callback routine.
- */
- socket_lock(so, 1);
- for (filter = so->so_filt; filter != NULL && error == 0;
- filter = filter->sfe_next_onsocket) {
- if (filter->sfe_filter->sf_filter.sf_accept != NULL) {
- if (!filtered) {
- filtered = 1;
- sflt_use(so);
- socket_unlock(so, 0);
- }
- error = filter->sfe_filter->sf_filter.
- sf_accept(filter->sfe_cookie,
- head, so, local, remote);
- }
- }
-
- if (filtered) {
- socket_lock(so, 0);
- sflt_unuse(so);
- }
+ error = sflt_accept(head, so, local, remote);
/*
* If we get EJUSTRETURN from one of the filters, mark this socket
*/
if (error == EJUSTRETURN) {
error = 0;
- so->so_flags |= SOF_DEFUNCT;
- /* Prevent data from being appended to the socket buffers */
- so->so_snd.sb_flags |= SB_DROP;
- so->so_rcv.sb_flags |= SB_DROP;
+ (void) sosetdefunct(current_proc(), so,
+ SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL, FALSE);
}
if (error != 0) {
if (dolock)
socket_lock(so, 1);
-
+
/*
* If this is a listening socket or if this is a previously-accepted
* socket that has been marked as inactive, reject the connect request.
*/
if ((so->so_options & SO_ACCEPTCONN) || (so->so_flags & SOF_DEFUNCT)) {
+ error = EOPNOTSUPP;
+ if (so->so_flags & SOF_DEFUNCT) {
+ SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] (%d)\n",
+ __func__, proc_pid(p), so, INP_SOCKAF(so),
+ INP_SOCKTYPE(so), error));
+ }
if (dolock)
socket_unlock(so, 1);
- return (EOPNOTSUPP);
+ return (error);
}
if ((so->so_restrictions & SO_RESTRICT_DENYOUT) != 0) {
* Run connect filter before calling protocol:
* - non-blocking connect returns before completion;
*/
- struct socket_filter_entry *filter;
- int filtered = 0;
-
- error = 0;
- for (filter = so->so_filt; filter && (error == 0);
- filter = filter->sfe_next_onsocket) {
- if (filter->sfe_filter->sf_filter.sf_connect_out) {
- if (filtered == 0) {
- filtered = 1;
- sflt_use(so);
- socket_unlock(so, 0);
- }
- error = filter->sfe_filter->sf_filter.
- sf_connect_out(filter->sfe_cookie, so, nam);
- }
- }
- if (filtered != 0) {
- socket_lock(so, 0);
- sflt_unuse(so);
- }
+ error = sflt_connectout(so, nam);
if (error) {
if (error == EJUSTRETURN)
error = 0;
- if (dolock)
- socket_unlock(so, 1);
- return (error);
+ } else {
+ error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p);
}
-
- error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p);
}
if (dolock)
socket_unlock(so, 1);
* [so_error]:???
*/
static int
-sosendcheck(struct socket *so, struct sockaddr *addr, long resid, long clen,
- long atomic, int flags, int *sblocked)
+sosendcheck(struct socket *so, struct sockaddr *addr, int32_t resid, int32_t clen,
+ int32_t atomic, int flags, int *sblocked)
{
- int error = 0;
- long space;
+ int error = 0;
+ int32_t space;
int assumelock = 0;
restart:
} else {
error = sblock(&so->so_snd, SBLOCKWAIT(flags));
if (error) {
+ if (so->so_flags & SOF_DEFUNCT)
+ goto defunct;
return (error);
}
*sblocked = 1;
}
/*
- * If a send attempt is made on a previously-accepted socket
- * that has been marked as inactive (disconnected), reject
- * the request.
+ * If a send attempt is made on a socket that has been marked
+ * as inactive (disconnected), reject the request.
*/
- if (so->so_flags & SOF_DEFUNCT)
- return (ENOTCONN);
+ if (so->so_flags & SOF_DEFUNCT) {
+defunct:
+ error = EPIPE;
+ SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] (%d)\n", __func__,
+ proc_selfpid(), so, INP_SOCKAF(so), INP_SOCKTYPE(so),
+ error));
+ return (error);
+ }
if (so->so_state & SS_CANTSENDMORE)
return (EPIPE);
if ((atomic && resid > so->so_snd.sb_hiwat) ||
clen > so->so_snd.sb_hiwat)
return (EMSGSIZE);
- if (space < resid + clen &&
- (atomic || space < (long)so->so_snd.sb_lowat || space < clen)) {
+ if ((space < resid + clen &&
+ (atomic || space < (int32_t)so->so_snd.sb_lowat || space < clen)) ||
+ (so->so_type == SOCK_STREAM && so_wait_for_if_feedback(so))) {
if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO) ||
assumelock) {
return (EWOULDBLOCK);
}
sbunlock(&so->so_snd, 1);
+ *sblocked = 0;
error = sbwait(&so->so_snd);
if (error) {
+ if (so->so_flags & SOF_DEFUNCT)
+ goto defunct;
return (error);
}
goto restart;
{
struct mbuf **mp;
register struct mbuf *m, *freelist = NULL;
- register long space, len, resid;
+ register int32_t space, len, resid;
int clen = 0, error, dontroute, mlen, sendflags;
int atomic = sosendallatonce(so) || top;
int sblocked = 0;
so->so_snd.sb_cc, so->so_snd.sb_lowat, so->so_snd.sb_hiwat);
socket_lock(so, 1);
+ so_update_last_owner_locked(so, p);
+
if (so->so_type != SOCK_STREAM && (flags & MSG_OOB) != 0) {
error = EOPNOTSUPP;
socket_unlock(so, 1);
dontroute =
(flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
(so->so_proto->pr_flags & PR_ATOMIC);
- if (p)
- OSIncrementAtomic(&p->p_stats->p_ru.ru_msgsnd);
+ OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgsnd);
if (control)
clen = control->m_len;
1024 : 0);
do {
- struct socket_filter_entry *filter;
- int filtered;
- boolean_t recursive;
-
if (uio == NULL) {
/*
* Data is prepackaged in "top".
int bytes_to_copy;
boolean_t jumbocl;
- bytes_to_copy = min(resid, space);
+ bytes_to_copy = imin(resid, space);
if (sosendminchain > 0) {
chainlength = 0;
* haven't yet consumed.
*/
if (freelist == NULL &&
- bytes_to_copy > NBPG && jumbocl) {
+ bytes_to_copy > MBIGCLBYTES &&
+ jumbocl) {
num_needed =
bytes_to_copy / M16KCLBYTES;
if (freelist == NULL &&
bytes_to_copy > MCLBYTES) {
num_needed =
- bytes_to_copy / NBPG;
+ bytes_to_copy / MBIGCLBYTES;
if ((bytes_to_copy -
- (num_needed * NBPG)) >=
+ (num_needed * MBIGCLBYTES)) >=
MINCLSIZE)
num_needed++;
m_getpackets_internal(
(unsigned int *)&num_needed,
hdrs_needed, M_WAIT, 0,
- NBPG);
+ MBIGCLBYTES);
/*
* Fall back to cluster size
* if allocation failed
MHLEN - m_leadingspace(m);
else
mlen = MLEN;
- len = min(mlen, bytes_to_copy);
+ len = imin(mlen, bytes_to_copy);
chainlength += len;
space -= len;
error = uiomove(mtod(m, caddr_t),
- (int)len, uio);
+ len, uio);
- // LP64todo - fix this!
resid = uio_resid(uio);
m->m_len = len;
/*
* Socket filter processing
*/
- recursive = (so->so_send_filt_thread != NULL);
- filtered = 0;
- error = 0;
- for (filter = so->so_filt; filter && (error == 0);
- filter = filter->sfe_next_onsocket) {
- if (filter->sfe_filter->sf_filter.sf_data_out) {
- int so_flags = 0;
- if (filtered == 0) {
- filtered = 1;
- so->so_send_filt_thread =
- current_thread();
- sflt_use(so);
- socket_unlock(so, 0);
- so_flags =
- (sendflags & MSG_OOB) ?
- sock_data_filt_flag_oob : 0;
- }
- error = filter->sfe_filter->sf_filter.
- sf_data_out(filter->sfe_cookie, so,
- addr, &top, &control, so_flags);
+ error = sflt_data_out(so, addr, &top, &control,
+ (sendflags & MSG_OOB) ? sock_data_filt_flag_oob : 0);
+ if (error) {
+ if (error == EJUSTRETURN) {
+ error = 0;
+ clen = 0;
+ control = 0;
+ top = 0;
}
- }
-
- if (filtered) {
- /*
- * At this point, we've run at least one
- * filter. The socket is unlocked as is
- * the socket buffer. Clear the recorded
- * filter thread only when we are outside
- * of a filter's context. This allows for
- * a filter to issue multiple inject calls
- * from its sf_data_out callback routine.
- */
- socket_lock(so, 0);
- sflt_unuse(so);
- if (!recursive)
- so->so_send_filt_thread = 0;
- if (error) {
- if (error == EJUSTRETURN) {
- error = 0;
- clen = 0;
- control = 0;
- top = 0;
- }
- goto release;
- }
+ goto release;
}
/*
* End Socket filter processing
*/
- if (error == EJUSTRETURN) {
- /* A socket filter handled this data */
- error = 0;
- } else {
- error = (*so->so_proto->pr_usrreqs->pru_send)
- (so, sendflags, top, addr, control, p);
- }
+ error = (*so->so_proto->pr_usrreqs->pru_send)
+ (so, sendflags, top, addr, control, p);
#ifdef __APPLE__
if (flags & MSG_SEND)
so->so_temp = NULL;
struct protosw *pr = so->so_proto;
struct mbuf *nextrecord;
int moff, type = 0;
- // LP64todo - fix this!
int orig_resid = uio_resid(uio);
struct mbuf *free_list;
int delayed_copy_len;
so->so_rcv.sb_cc, so->so_rcv.sb_lowat, so->so_rcv.sb_hiwat);
socket_lock(so, 1);
+ so_update_last_owner_locked(so, p);
#ifdef MORE_LOCKING_DEBUG
if (so->so_usecount == 1)
if (so->so_flags & SOF_DEFUNCT) {
struct sockbuf *sb = &so->so_rcv;
+ error = ENOTCONN;
+ SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] (%d)\n", __func__,
+ proc_pid(p), so, INP_SOCKAF(so), INP_SOCKTYPE(so), error));
/*
* This socket should have been disconnected and flushed
- * prior to being returned from accept; there should be
- * no data on its receive list, so panic otherwise.
+ * prior to being returned from sodefunct(); there should
+ * be no data on its receive list, so panic otherwise.
*/
- sb_empty_assert(sb, __func__);
+ if (so->so_state & SS_DEFUNCT)
+ sb_empty_assert(sb, __func__);
socket_unlock(so, 1);
- return (ENOTCONN);
+ return (error);
}
/*
goto bad;
socket_unlock(so, 0);
do {
- // LP64todo - fix this!
error = uiomove(mtod(m, caddr_t),
- (int)min(uio_resid(uio), m->m_len), uio);
+ imin(uio_resid(uio), m->m_len), uio);
m = m_free(m);
} while (uio_resid(uio) && error == 0 && m);
socket_lock(so, 0);
* end up with false positives during select() or poll()
* which could put the application in a bad state.
*/
- if (m == NULL && so->so_rcv.sb_cc != 0)
- panic("soreceive corrupted so_rcv: m %p cc %lu",
- m, so->so_rcv.sb_cc);
+ SB_MB_CHECK(&so->so_rcv);
if (so->so_error) {
if (m)
goto restart;
}
dontblock:
-#ifndef __APPLE__
- if (uio->uio_procp)
- uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
-#else /* __APPLE__ */
- /*
- * 2207985
- * This should be uio->uio-procp; however, some callers of this
- * function use auto variables with stack garbage, and fail to
- * fill out the uio structure properly.
- */
- if (p)
- OSIncrementAtomic(&p->p_stats->p_ru.ru_msgrcv);
-#endif /* __APPLE__ */
+ OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgrcv);
SBLASTRECORDCHK(&so->so_rcv, "soreceive 1");
SBLASTMBUFCHK(&so->so_rcv, "soreceive 1");
nextrecord = m->m_nextpkt;
goto restart;
}
socket_lock(so, 0);
+ /*
+ * If the socket has been defunct'd, drop it.
+ */
+ if (so->so_flags & SOF_DEFUNCT) {
+ m_freem(m);
+ error = ENOTCONN;
+ goto release;
+ }
/*
* Re-adjust the socket receive list and re-enqueue
* the record in front of any packets which may have
struct mbuf *cm = NULL, *cmn;
struct mbuf **cme = &cm;
struct sockbuf *sb_rcv = &so->so_rcv;
+ struct mbuf **msgpcm = NULL;
/*
* Externalizing the control messages would require us to
do {
if (flags & MSG_PEEK) {
if (controlp != NULL) {
+ if (*controlp == NULL) {
+ msgpcm = controlp;
+ }
*controlp = m_copy(m, 0, m->m_len);
+
+ /* If we failed to allocate an mbuf,
+ * release any previously allocated
+ * mbufs for control data. Return
+ * an error. Keep the mbufs in the
+ * socket as this is using
+ * MSG_PEEK flag.
+ */
+ if (*controlp == NULL) {
+ m_freem(*msgpcm);
+ error = ENOBUFS;
+ goto release;
+ }
controlp = &(*controlp)->m_next;
}
m = m->m_next;
}
cm = cmn;
}
- orig_resid = 0;
- if (sb_rcv->sb_mb != NULL)
+ /*
+ * Update the value of nextrecord in case we received new
+ * records when the socket was unlocked above for
+ * externalizing SCM_RIGHTS.
+ */
+ if (m != NULL)
nextrecord = sb_rcv->sb_mb->m_nextpkt;
else
- nextrecord = NULL;
+ nextrecord = sb_rcv->sb_mb;
+ orig_resid = 0;
}
if (m != NULL) {
flags |= MSG_OOB;
} else {
if (!(flags & MSG_PEEK)) {
- so->so_rcv.sb_mb = nextrecord;
SB_EMPTY_FIXUP(&so->so_rcv);
}
}
flags |= MSG_OOB;
}
so->so_state &= ~SS_RCVATMARK;
- // LP64todo - fix this!
len = uio_resid(uio) - delayed_copy_len;
if (so->so_oobmark && len > so->so_oobmark - offset)
len = so->so_oobmark - offset;
if (flags & MSG_PEEK) {
moff += len;
} else {
- if (mp)
- *mp = m_copym(m, 0, len, M_WAIT);
+ if (mp != NULL) {
+ int copy_flag;
+
+ if (flags & MSG_DONTWAIT)
+ copy_flag = M_DONTWAIT;
+ else
+ copy_flag = M_WAIT;
+ *mp = m_copym(m, 0, len, copy_flag);
+ if (*mp == NULL) {
+ /*
+ * Failed to allocate an mbuf.
+ * Adjust uio_resid back, it was
+ * adjusted down by len bytes which
+ * we didn't copy over
+ */
+ uio_setresid(uio, (uio_resid(uio) + len));
+ break;
+ }
+ }
m->m_data += len;
m->m_len -= len;
so->so_rcv.sb_cc -= len;
if (m) {
nextrecord = m->m_nextpkt;
}
+ SB_MB_CHECK(&so->so_rcv);
}
}
#ifdef MORE_LOCKING_DEBUG
} else if (nextrecord->m_nextpkt == NULL) {
so->so_rcv.sb_lastrecord = nextrecord;
}
+ SB_MB_CHECK(&so->so_rcv);
}
SBLASTRECORDCHK(&so->so_rcv, "soreceive 4");
SBLASTMBUFCHK(&so->so_rcv, "soreceive 4");
if (asb.sb_flags & SB_UNIX)
sb->sb_flags |= SB_UNIX;
if ((pr->pr_flags & PR_RIGHTS) && pr->pr_domain->dom_dispose) {
- boolean_t unp = (pr->pr_domain->dom_dispose == unp_dispose);
- /*
- * Currently AF_UNIX domain uses a global domain mutex;
- * unp_dispose() may end up calling soclose() on another
- * AF_UNIX socket and therefore the lock must not be held
- * across the call.
- */
- if (unp)
- socket_unlock(so, 0);
(*pr->pr_domain->dom_dispose)(asb.sb_mb);
- if (unp)
- socket_lock(so, 0);
}
sbrelease(&asb);
}
if (valsize > len)
sopt->sopt_valsize = valsize = len;
- if (sopt->sopt_p != 0)
+ if (sopt->sopt_p != kernproc)
return (copyin(sopt->sopt_val, buf, valsize));
bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), buf, valsize);
sooptcopyin_timeval(struct sockopt *sopt, struct timeval * tv_p)
{
int error;
-
+
if (proc_is64bit(sopt->sopt_p)) {
- struct timeval64 tv64;
+ struct user64_timeval tv64;
if (sopt->sopt_valsize < sizeof(tv64)) {
return (EINVAL);
}
sopt->sopt_valsize = sizeof(tv64);
- error = copyin(sopt->sopt_val, &tv64, sizeof(tv64));
- if (error != 0) {
- return (error);
+ if (sopt->sopt_p != kernproc) {
+ error = copyin(sopt->sopt_val, &tv64, sizeof(tv64));
+ if (error != 0)
+ return (error);
+ } else {
+ bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), &tv64,
+ sizeof(tv64));
}
if (tv64.tv_sec < 0 || tv64.tv_sec > LONG_MAX
|| tv64.tv_usec < 0 || tv64.tv_usec >= 1000000) {
tv_p->tv_sec = tv64.tv_sec;
tv_p->tv_usec = tv64.tv_usec;
} else {
- if (sopt->sopt_valsize < sizeof(*tv_p)) {
+ struct user32_timeval tv32;
+
+ if (sopt->sopt_valsize < sizeof(tv32)) {
return (EINVAL);
}
- sopt->sopt_valsize = sizeof(*tv_p);
- if (sopt->sopt_p != 0) {
- error = copyin(sopt->sopt_val, tv_p, sizeof(*tv_p));
+ sopt->sopt_valsize = sizeof(tv32);
+ if (sopt->sopt_p != kernproc) {
+ error = copyin(sopt->sopt_val, &tv32, sizeof(tv32));
if (error != 0) {
return (error);
}
} else {
- bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), tv_p,
- sizeof(*tv_p));
+ bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), &tv32,
+ sizeof(tv32));
}
- if (tv_p->tv_sec < 0 || tv_p->tv_sec > LONG_MAX
- || tv_p->tv_usec < 0 || tv_p->tv_usec >= 1000000) {
+#ifndef __LP64__ // K64todo "comparison is always false due to limited range of data type"
+ if (tv32.tv_sec < 0 || tv32.tv_sec > LONG_MAX
+ || tv32.tv_usec < 0 || tv32.tv_usec >= 1000000) {
return (EDOM);
}
+#endif
+ tv_p->tv_sec = tv32.tv_sec;
+ tv_p->tv_usec = tv32.tv_usec;
}
return (0);
}
int error, optval;
struct linger l;
struct timeval tv;
- struct socket_filter_entry *filter;
- int filtered = 0;
#if CONFIG_MACF_SOCKET
struct mac extmac;
#endif /* MAC_SOCKET */
socket_lock(so, 1);
+
if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE))
- == (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
+ == (SS_CANTRCVMORE | SS_CANTSENDMORE) &&
+ (so->so_flags & SOF_NPX_SETOPTSHUT) == 0) {
/* the socket has been shutdown, no more sockopt's */
error = EINVAL;
goto bad;
sopt->sopt_dir = SOPT_SET;
}
- error = 0;
- for (filter = so->so_filt; filter && (error == 0);
- filter = filter->sfe_next_onsocket) {
- if (filter->sfe_filter->sf_filter.sf_setoption) {
- if (filtered == 0) {
- filtered = 1;
- sflt_use(so);
- socket_unlock(so, 0);
- }
- error = filter->sfe_filter->sf_filter.
- sf_setoption(filter->sfe_cookie, so, sopt);
- }
- }
-
- if (filtered != 0) {
- socket_lock(so, 0);
- sflt_unuse(so);
-
- if (error) {
- if (error == EJUSTRETURN)
- error = 0;
- goto bad;
- }
+ error = sflt_setsockopt(so, sopt);
+ if (error) {
+ if (error == EJUSTRETURN)
+ error = 0;
+ goto bad;
}
error = 0;
case SO_REUSEPORT:
case SO_OOBINLINE:
case SO_TIMESTAMP:
+ case SO_TIMESTAMP_MONOTONIC:
#ifdef __APPLE__
case SO_DONTTRUNC:
case SO_WANTMORE:
switch (sopt->sopt_name) {
case SO_SNDBUF:
case SO_RCVBUF:
- if (sbreserve(sopt->sopt_name == SO_SNDBUF ?
- &so->so_snd : &so->so_rcv,
- (u_long) optval) == 0) {
+ {
+ struct sockbuf *sb = (sopt->sopt_name == SO_SNDBUF) ?
+ &so->so_snd : &so->so_rcv;
+ if (sbreserve(sb, (u_int32_t) optval) == 0) {
error = ENOBUFS;
goto bad;
}
- if (sopt->sopt_name == SO_SNDBUF)
- so->so_snd.sb_flags |= SB_USRSIZE;
- else
- so->so_rcv.sb_flags |= SB_USRSIZE;
+ sb->sb_flags |= SB_USRSIZE;
+ sb->sb_flags &= ~SB_AUTOSIZE;
+ sb->sb_idealsize = (u_int32_t)optval;
break;
+ }
/*
* Make sure the low-water is never greater than
if (error)
goto bad;
- error = sflt_attach_private(so, NULL,
- nke.nke_handle, 1);
+ error = sflt_attach_internal(so, nke.nke_handle);
break;
}
break;
#endif
+ case SO_RANDOMPORT:
+ error = sooptcopyin(sopt, &optval, sizeof (optval),
+ sizeof (optval));
+ if (error)
+ goto bad;
+ if (optval)
+ so->so_flags |= SOF_BINDRANDOMPORT;
+ else
+ so->so_flags &= ~SOF_BINDRANDOMPORT;
+ break;
+
+ case SO_NP_EXTENSIONS: {
+ struct so_np_extensions sonpx;
+
+ error = sooptcopyin(sopt, &sonpx, sizeof(sonpx), sizeof(sonpx));
+ if (error)
+ goto bad;
+ if (sonpx.npx_mask & ~SONPX_MASK_VALID) {
+ error = EINVAL;
+ goto bad;
+ }
+ /*
+ * Only one bit defined for now
+ */
+ if ((sonpx.npx_mask & SONPX_SETOPTSHUT)) {
+ if ((sonpx.npx_flags & SONPX_SETOPTSHUT))
+ so->so_flags |= SOF_NPX_SETOPTSHUT;
+ else
+ so->so_flags &= ~SOF_NPX_SETOPTSHUT;
+ }
+ break;
+ }
+
+ case SO_TRAFFIC_CLASS: {
+ error = sooptcopyin(sopt, &optval, sizeof (optval),
+ sizeof (optval));
+ if (error)
+ goto bad;
+ error = so_set_traffic_class(so, optval);
+ if (error)
+ goto bad;
+ break;
+ }
+
+ case SO_RECV_TRAFFIC_CLASS: {
+ error = sooptcopyin(sopt, &optval, sizeof (optval),
+ sizeof (optval));
+ if (error)
+ goto bad;
+ if (optval == 0)
+ so->so_flags &= ~SOF_RECV_TRAFFIC_CLASS;
+ else
+ so->so_flags |= SOF_RECV_TRAFFIC_CLASS;
+ break;
+ }
+
+ case SO_TRAFFIC_CLASS_DBG: {
+ struct so_tcdbg so_tcdbg;
+
+ error = sooptcopyin(sopt, &so_tcdbg,
+ sizeof (struct so_tcdbg), sizeof (struct so_tcdbg));
+ if (error)
+ goto bad;
+ error = so_set_tcdbg(so, &so_tcdbg);
+ if (error)
+ goto bad;
+ break;
+ }
+
+ case SO_PRIVILEGED_TRAFFIC_CLASS:
+ error = priv_check_cred(kauth_cred_get(),
+ PRIV_NET_PRIVILEGED_TRAFFIC_CLASS, 0);
+ if (error)
+ goto bad;
+ error = sooptcopyin(sopt, &optval, sizeof (optval),
+ sizeof (optval));
+ if (error)
+ goto bad;
+ if (optval == 0)
+ so->so_flags &= ~SOF_PRIVILEGED_TRAFFIC_CLASS;
+ else
+ so->so_flags |= SOF_PRIVILEGED_TRAFFIC_CLASS;
+ break;
+
+ case SO_DEFUNCTOK:
+ error = sooptcopyin(sopt, &optval, sizeof (optval),
+ sizeof (optval));
+ if (error != 0 || (so->so_flags & SOF_DEFUNCT)) {
+ if (error == 0)
+ error = EBADF;
+ goto bad;
+ }
+ /*
+ * Any process can set SO_DEFUNCTOK (clear
+ * SOF_NODEFUNCT), but only root can clear
+ * SO_DEFUNCTOK (set SOF_NODEFUNCT).
+ */
+ if (optval == 0 &&
+ kauth_cred_issuser(kauth_cred_get()) == 0) {
+ error = EPERM;
+ goto bad;
+ }
+ if (optval)
+ so->so_flags &= ~SOF_NODEFUNCT;
+ else
+ so->so_flags |= SOF_NODEFUNCT;
+
+ SODEFUNCTLOG(("%s[%d]: so %p [%d,%d] is now marked as "
+ "%seligible for defunct\n", __func__,
+ proc_selfpid(), so, INP_SOCKAF(so),
+ INP_SOCKTYPE(so),
+ (so->so_flags & SOF_NODEFUNCT) ? "not " : ""));
+ break;
+
+ case SO_ISDEFUNCT:
+ /* This option is not settable */
+ error = EINVAL;
+ break;
+
+ case SO_OPPORTUNISTIC:
+ error = sooptcopyin(sopt, &optval, sizeof (optval),
+ sizeof (optval));
+ if (error == 0)
+ error = so_set_opportunistic(so, optval);
+ break;
+
+ case SO_FLUSH:
+ /* This option is handled by lower layer(s) */
+ error = 0;
+ break;
+
+ case SO_RECV_ANYIF:
+ error = sooptcopyin(sopt, &optval, sizeof (optval),
+ sizeof (optval));
+ if (error == 0)
+ error = so_set_recv_anyif(so, optval);
+ break;
+
default:
error = ENOPROTOOPT;
break;
valsize = min(len, sopt->sopt_valsize);
sopt->sopt_valsize = valsize;
if (sopt->sopt_val != USER_ADDR_NULL) {
- if (sopt->sopt_p != 0)
+ if (sopt->sopt_p != kernproc)
error = copyout(buf, sopt->sopt_val, valsize);
else
bcopy(buf, CAST_DOWN(caddr_t, sopt->sopt_val), valsize);
{
int error;
size_t len;
- struct timeval64 tv64;
+ struct user64_timeval tv64;
+ struct user32_timeval tv32;
const void * val;
size_t valsize;
-
+
error = 0;
if (proc_is64bit(sopt->sopt_p)) {
- len = sizeof(struct timeval64);
+ len = sizeof(tv64);
tv64.tv_sec = tv_p->tv_sec;
tv64.tv_usec = tv_p->tv_usec;
val = &tv64;
} else {
- len = sizeof(struct timeval);
- val = tv_p;
+ len = sizeof(tv32);
+ tv32.tv_sec = tv_p->tv_sec;
+ tv32.tv_usec = tv_p->tv_usec;
+ val = &tv32;
}
valsize = min(len, sopt->sopt_valsize);
sopt->sopt_valsize = valsize;
if (sopt->sopt_val != USER_ADDR_NULL) {
- if (sopt->sopt_p != 0)
+ if (sopt->sopt_p != kernproc)
error = copyout(val, sopt->sopt_val, valsize);
else
bcopy(val, CAST_DOWN(caddr_t, sopt->sopt_val), valsize);
int error, optval;
struct linger l;
struct timeval tv;
- struct socket_filter_entry *filter;
- int filtered = 0;
#if CONFIG_MACF_SOCKET
struct mac extmac;
#endif /* MAC_SOCKET */
socket_lock(so, 1);
- error = 0;
- for (filter = so->so_filt; filter && (error == 0);
- filter = filter->sfe_next_onsocket) {
- if (filter->sfe_filter->sf_filter.sf_getoption) {
- if (filtered == 0) {
- filtered = 1;
- sflt_use(so);
- socket_unlock(so, 0);
- }
- error = filter->sfe_filter->sf_filter.
- sf_getoption(filter->sfe_cookie, so, sopt);
- }
- }
- if (filtered != 0) {
- socket_lock(so, 0);
- sflt_unuse(so);
-
- if (error) {
- if (error == EJUSTRETURN)
- error = 0;
- socket_unlock(so, 1);
- return (error);
- }
+ error = sflt_getsockopt(so, sopt);
+ if (error) {
+ if (error == EJUSTRETURN)
+ error = 0;
+ socket_unlock(so, 1);
+ return (error);
}
-
+
error = 0;
if (sopt->sopt_level != SOL_SOCKET) {
if (so->so_proto && so->so_proto->pr_ctloutput) {
case SO_BROADCAST:
case SO_OOBINLINE:
case SO_TIMESTAMP:
+ case SO_TIMESTAMP_MONOTONIC:
#ifdef __APPLE__
case SO_DONTTRUNC:
case SO_WANTMORE:
optval = (so->so_flags & SOF_UPCALLCLOSEWAIT);
goto integer;
#endif
+ case SO_RANDOMPORT:
+ optval = (so->so_flags & SOF_BINDRANDOMPORT);
+ goto integer;
+
+ case SO_NP_EXTENSIONS: {
+ struct so_np_extensions sonpx;
+
+ sonpx.npx_flags = (so->so_flags & SOF_NPX_SETOPTSHUT) ? SONPX_SETOPTSHUT : 0;
+ sonpx.npx_mask = SONPX_MASK_VALID;
+
+ error = sooptcopyout(sopt, &sonpx, sizeof(struct so_np_extensions));
+ break;
+ }
+
+ case SO_TRAFFIC_CLASS:
+ optval = so->so_traffic_class;
+ goto integer;
+
+ case SO_RECV_TRAFFIC_CLASS:
+ optval = (so->so_flags & SOF_RECV_TRAFFIC_CLASS);
+ goto integer;
+
+ case SO_TRAFFIC_CLASS_STATS:
+ error = sooptcopyout(sopt, &so->so_tc_stats, sizeof(so->so_tc_stats));
+ break;
+
+ case SO_TRAFFIC_CLASS_DBG:
+ error = sogetopt_tcdbg(so, sopt);
+ break;
+
+ case SO_PRIVILEGED_TRAFFIC_CLASS:
+ optval = (so->so_flags & SOF_PRIVILEGED_TRAFFIC_CLASS);
+ goto integer;
+
+ case SO_DEFUNCTOK:
+ optval = !(so->so_flags & SOF_NODEFUNCT);
+ goto integer;
+
+ case SO_ISDEFUNCT:
+ optval = (so->so_flags & SOF_DEFUNCT);
+ goto integer;
+
+ case SO_OPPORTUNISTIC:
+ optval = so_get_opportunistic(so);
+ goto integer;
+
+ case SO_FLUSH:
+ /* This option is not gettable */
+ error = EINVAL;
+ break;
+
+ case SO_RECV_ANYIF:
+ optval = so_get_recv_anyif(so);
+ goto integer;
default:
error = ENOPROTOOPT;
return (error);
}
}
-
-/* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */
+/* The size limits on our soopt_getm is different from that on FreeBSD.
+ * We limit the size of options to MCLBYTES. This will have to change
+ * if we need to define options that need more space than MCLBYTES.
+ */
int
soopt_getm(struct sockopt *sopt, struct mbuf **mp)
{
struct mbuf *m, *m_prev;
int sopt_size = sopt->sopt_valsize;
+ int how;
- if (sopt_size > MAX_SOOPTGETM_SIZE)
+ if (sopt_size <= 0 || sopt_size > MCLBYTES)
return (EMSGSIZE);
- MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
+ how = sopt->sopt_p != kernproc ? M_WAIT : M_DONTWAIT;
+ MGET(m, how, MT_DATA);
if (m == 0)
return (ENOBUFS);
if (sopt_size > MLEN) {
- MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
+ MCLGET(m, how);
if ((m->m_flags & M_EXT) == 0) {
m_free(m);
return (ENOBUFS);
*mp = m;
m_prev = m;
- while (sopt_size) {
- MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
+ while (sopt_size > 0) {
+ MGET(m, how, MT_DATA);
if (m == 0) {
m_freem(*mp);
return (ENOBUFS);
}
if (sopt_size > MLEN) {
- MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
+ MCLGET(m, how);
if ((m->m_flags & M_EXT) == 0) {
m_freem(*mp);
+ m_freem(m);
return (ENOBUFS);
}
m->m_len = min(MCLBYTES, sopt_size);
return (0);
}
-/* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */
+/* copyin sopt data into mbuf chain */
int
soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
{
if (sopt->sopt_val == USER_ADDR_NULL)
return (0);
while (m != NULL && sopt->sopt_valsize >= m->m_len) {
- if (sopt->sopt_p != NULL) {
+ if (sopt->sopt_p != kernproc) {
int error;
error = copyin(sopt->sopt_val, mtod(m, char *),
return (0);
}
-/* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */
+/* copyout mbuf chain data into soopt */
int
soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
{
if (sopt->sopt_val == USER_ADDR_NULL)
return (0);
while (m != NULL && sopt->sopt_valsize >= m->m_len) {
- if (sopt->sopt_p != NULL) {
+ if (sopt->sopt_p != kernproc) {
int error;
error = copyout(mtod(m, char *), sopt->sopt_val,
__unused struct proc *p)
{
struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
- struct sockbuf *sb;
+ struct klist *skl;
socket_lock(so, 1);
switch (kn->kn_filter) {
case EVFILT_READ:
- if (so->so_options & SO_ACCEPTCONN)
- kn->kn_fop = &solisten_filtops;
- else
- kn->kn_fop = &soread_filtops;
- sb = &so->so_rcv;
+ kn->kn_fop = &soread_filtops;
+ skl = &so->so_rcv.sb_sel.si_note;
break;
case EVFILT_WRITE:
kn->kn_fop = &sowrite_filtops;
- sb = &so->so_snd;
+ skl = &so->so_snd.sb_sel.si_note;
+ break;
+ case EVFILT_SOCK:
+ kn->kn_fop = &sock_filtops;
+ skl = &so->so_klist;
break;
default:
socket_unlock(so, 1);
return (1);
}
- if (KNOTE_ATTACH(&sb->sb_sel.si_note, kn))
- sb->sb_flags |= SB_KNOTE;
+ if (KNOTE_ATTACH(skl, kn)) {
+ switch(kn->kn_filter) {
+ case EVFILT_READ:
+ so->so_rcv.sb_flags |= SB_KNOTE;
+ break;
+ case EVFILT_WRITE:
+ so->so_snd.sb_flags |= SB_KNOTE;
+ break;
+ case EVFILT_SOCK:
+ so->so_flags |= SOF_KNOTE;
+ break;
+ default:
+ socket_unlock(so, 1);
+ return (1);
+ }
+ }
socket_unlock(so, 1);
return (0);
}
if ((hint & SO_FILT_HINT_LOCKED) == 0)
socket_lock(so, 1);
+ if (so->so_options & SO_ACCEPTCONN) {
+ int isempty;
+
+ /* Radar 6615193 handle the listen case dynamically
+ * for kqueue read filter. This allows to call listen() after registering
+ * the kqueue EVFILT_READ.
+ */
+
+ kn->kn_data = so->so_qlen;
+ isempty = ! TAILQ_EMPTY(&so->so_comp);
+
+ if ((hint & SO_FILT_HINT_LOCKED) == 0)
+ socket_unlock(so, 1);
+
+ return (isempty);
+ }
+
+ /* socket isn't a listener */
+
kn->kn_data = so->so_rcv.sb_cc - so->so_rcv.sb_ctl;
if (so->so_oobmark) {
return (1);
}
+ int64_t lowwat = so->so_rcv.sb_lowat;
+ if (kn->kn_sfflags & NOTE_LOWAT)
+ {
+ if (kn->kn_sdata > so->so_rcv.sb_hiwat)
+ lowwat = so->so_rcv.sb_hiwat;
+ else if (kn->kn_sdata > lowwat)
+ lowwat = kn->kn_sdata;
+ }
+
if ((hint & SO_FILT_HINT_LOCKED) == 0)
socket_unlock(so, 1);
-
- return ((kn->kn_flags & EV_OOBAND) ||
- kn->kn_data >= ((kn->kn_sfflags & NOTE_LOWAT) ?
- kn->kn_sdata : so->so_rcv.sb_lowat));
+
+ return ((kn->kn_flags & EV_OOBAND) || kn->kn_data >= lowwat);
}
static void
socket_unlock(so, 1);
}
+int
+so_wait_for_if_feedback(struct socket *so)
+{
+ if ((so->so_proto->pr_domain->dom_family == AF_INET ||
+ so->so_proto->pr_domain->dom_family == AF_INET6) &&
+ (so->so_state & SS_ISCONNECTED)) {
+ struct inpcb *inp = sotoinpcb(so);
+ if (INP_WAIT_FOR_IF_FEEDBACK(inp))
+ return (1);
+ }
+ return (0);
+}
+
/*ARGSUSED*/
static int
filt_sowrite(struct knote *kn, long hint)
{
struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
+ int ret = 0;
if ((hint & SO_FILT_HINT_LOCKED) == 0)
socket_lock(so, 1);
if (so->so_state & SS_CANTSENDMORE) {
kn->kn_flags |= EV_EOF;
kn->kn_fflags = so->so_error;
- if ((hint & SO_FILT_HINT_LOCKED) == 0)
- socket_unlock(so, 1);
- return (1);
+ ret = 1;
+ goto out;
}
if (so->so_error) { /* temporary udp error */
- if ((hint & SO_FILT_HINT_LOCKED) == 0)
- socket_unlock(so, 1);
- return (1);
+ ret = 1;
+ goto out;
}
if (((so->so_state & SS_ISCONNECTED) == 0) &&
(so->so_proto->pr_flags & PR_CONNREQUIRED)) {
- if ((hint & SO_FILT_HINT_LOCKED) == 0)
- socket_unlock(so, 1);
- return (0);
+ ret = 0;
+ goto out;
+ }
+ int64_t lowwat = so->so_snd.sb_lowat;
+ if (kn->kn_sfflags & NOTE_LOWAT)
+ {
+ if (kn->kn_sdata > so->so_snd.sb_hiwat)
+ lowwat = so->so_snd.sb_hiwat;
+ else if (kn->kn_sdata > lowwat)
+ lowwat = kn->kn_sdata;
}
+ if (kn->kn_data >= lowwat) {
+ if ((so->so_flags & SOF_NOTSENT_LOWAT) != 0) {
+ ret = tcp_notsent_lowat_check(so);
+ } else {
+ ret = 1;
+ }
+ }
+ if (so_wait_for_if_feedback(so))
+ ret = 0;
+out:
if ((hint & SO_FILT_HINT_LOCKED) == 0)
socket_unlock(so, 1);
- if (kn->kn_sfflags & NOTE_LOWAT)
- return (kn->kn_data >= kn->kn_sdata);
- return (kn->kn_data >= so->so_snd.sb_lowat);
+ return(ret);
+}
+
+static void
+filt_sockdetach(struct knote *kn)
+{
+ struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
+ socket_lock(so, 1);
+
+ if ((so->so_flags & SOF_KNOTE) != 0)
+ if (KNOTE_DETACH(&so->so_klist, kn))
+ so->so_flags &= ~SOF_KNOTE;
+ socket_unlock(so, 1);
}
-/*ARGSUSED*/
static int
-filt_solisten(struct knote *kn, long hint)
+filt_sockev(struct knote *kn, long hint)
{
+ int ret = 0, locked = 0;
struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
- int isempty;
- if ((hint & SO_FILT_HINT_LOCKED) == 0)
+ if ((hint & SO_FILT_HINT_LOCKED) == 0) {
socket_lock(so, 1);
- kn->kn_data = so->so_qlen;
- isempty = ! TAILQ_EMPTY(&so->so_comp);
- if ((hint & SO_FILT_HINT_LOCKED) == 0)
+ locked = 1;
+ }
+
+ switch (hint & SO_FILT_HINT_EV) {
+ case SO_FILT_HINT_CONNRESET:
+ if (kn->kn_sfflags & NOTE_CONNRESET)
+ kn->kn_fflags |= NOTE_CONNRESET;
+ break;
+ case SO_FILT_HINT_TIMEOUT:
+ if (kn->kn_sfflags & NOTE_TIMEOUT)
+ kn->kn_fflags |= NOTE_TIMEOUT;
+ break;
+ case SO_FILT_HINT_NOSRCADDR:
+ if (kn->kn_sfflags & NOTE_NOSRCADDR)
+ kn->kn_fflags |= NOTE_NOSRCADDR;
+ break;
+ case SO_FILT_HINT_IFDENIED:
+ if ((kn->kn_sfflags & NOTE_IFDENIED))
+ kn->kn_fflags |= NOTE_IFDENIED;
+ break;
+ case SO_FILT_HINT_KEEPALIVE:
+ if (kn->kn_sfflags & NOTE_KEEPALIVE)
+ kn->kn_fflags |= NOTE_KEEPALIVE;
+ }
+
+ if ((kn->kn_sfflags & NOTE_READCLOSED) &&
+ (so->so_state & SS_CANTRCVMORE))
+ kn->kn_fflags |= NOTE_READCLOSED;
+
+ if ((kn->kn_sfflags & NOTE_WRITECLOSED) &&
+ (so->so_state & SS_CANTSENDMORE))
+ kn->kn_fflags |= NOTE_WRITECLOSED;
+
+ if ((kn->kn_sfflags & NOTE_SUSPEND) &&
+ ((hint & SO_FILT_HINT_SUSPEND) ||
+ (so->so_flags & SOF_SUSPENDED))) {
+ kn->kn_fflags &=
+ ~(NOTE_SUSPEND | NOTE_RESUME);
+ kn->kn_fflags |= NOTE_SUSPEND;
+ }
+
+ if ((kn->kn_sfflags & NOTE_RESUME) &&
+ ((hint & SO_FILT_HINT_RESUME) ||
+ (so->so_flags & SOF_SUSPENDED) == 0)) {
+ kn->kn_fflags &=
+ ~(NOTE_SUSPEND | NOTE_RESUME);
+ kn->kn_fflags |= NOTE_RESUME;
+ }
+
+ if (so->so_error != 0) {
+ ret = 1;
+ kn->kn_data = so->so_error;
+ kn->kn_flags |= EV_EOF;
+ } else {
+ get_sockev_state(so, (u_int32_t *)&(kn->kn_data));
+ }
+
+ if (kn->kn_fflags != 0)
+ ret = 1;
+
+ if (locked)
socket_unlock(so, 1);
- return (isempty);
+
+ return(ret);
}
+void
+get_sockev_state(struct socket *so, u_int32_t *statep) {
+ u_int32_t state = *(statep);
+
+ if (so->so_state & SS_ISCONNECTED)
+ state |= SOCKEV_CONNECTED;
+ else
+ state &= ~(SOCKEV_CONNECTED);
+ state |= ((so->so_state & SS_ISDISCONNECTED) ?
+ SOCKEV_DISCONNECTED : 0);
+ *(statep) = state;
+ return;
+}
+
+#define SO_LOCK_HISTORY_STR_LEN (2 * SO_LCKDBG_MAX * (2 + (2 * sizeof(void *)) + 1) + 1)
+
+__private_extern__ const char * solockhistory_nr(struct socket *so)
+{
+ size_t n = 0;
+ int i;
+ static char lock_history_str[SO_LOCK_HISTORY_STR_LEN];
+
+ bzero(lock_history_str, sizeof(lock_history_str));
+ for (i = SO_LCKDBG_MAX - 1; i >= 0; i--) {
+ n += snprintf(lock_history_str + n, SO_LOCK_HISTORY_STR_LEN - n, "%lx:%lx ",
+ (uintptr_t) so->lock_lr[(so->next_lock_lr + i) % SO_LCKDBG_MAX],
+ (uintptr_t) so->unlock_lr[(so->next_unlock_lr + i) % SO_LCKDBG_MAX]);
+ }
+ return lock_history_str;
+}
int
socket_lock(struct socket *so, int refcount)
{
- int error = 0, lr_saved;
+ int error = 0;
+ void *lr_saved;
- lr_saved = (unsigned int) __builtin_return_address(0);
+ lr_saved = __builtin_return_address(0);
if (so->so_proto->pr_lock) {
error = (*so->so_proto->pr_lock)(so, refcount, lr_saved);
lck_mtx_lock(so->so_proto->pr_domain->dom_mtx);
if (refcount)
so->so_usecount++;
- so->lock_lr[so->next_lock_lr] = (u_int32_t)lr_saved;
+ so->lock_lr[so->next_lock_lr] = lr_saved;
so->next_lock_lr = (so->next_lock_lr+1) % SO_LCKDBG_MAX;
}
int
socket_unlock(struct socket *so, int refcount)
{
- int error = 0, lr_saved;
+ int error = 0;
+ void *lr_saved;
lck_mtx_t *mutex_held;
- lr_saved = (unsigned int) __builtin_return_address(0);
+ lr_saved = __builtin_return_address(0);
if (so->so_proto == NULL)
panic("socket_unlock null so_proto so=%p\n", so);
#ifdef MORE_LOCKING_DEBUG
lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
#endif
- so->unlock_lr[so->next_unlock_lr] = (u_int32_t)lr_saved;
+ so->unlock_lr[so->next_unlock_lr] = lr_saved;
so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX;
if (refcount) {
if (so->so_usecount <= 0)
- panic("socket_unlock: bad refcount so=%p "
- "value=%d\n", so, so->so_usecount);
+ panic("socket_unlock: bad refcount=%d so=%p (%d, %d, %d) lrh=%s",
+ so->so_usecount, so, so->so_proto->pr_domain->dom_family,
+ so->so_type, so->so_proto->pr_protocol,
+ solockhistory_nr(so));
+
so->so_usecount--;
if (so->so_usecount == 0) {
sofreelastref(so, 1);
else
so->so_flags &= ~SOF_MULTIPAGES;
}
+
+int
+so_isdstlocal(struct socket *so) {
+
+ struct inpcb *inp = (struct inpcb *)so->so_pcb;
+
+ if (so->so_proto->pr_domain->dom_family == AF_INET) {
+ return inaddr_local(inp->inp_faddr);
+ } else if (so->so_proto->pr_domain->dom_family == AF_INET6) {
+ return in6addr_local(&inp->in6p_faddr);
+ }
+ return 0;
+}
+
+int
+sosetdefunct(struct proc *p, struct socket *so, int level, boolean_t noforce)
+{
+ int err = 0, defunct;
+
+ defunct = (so->so_flags & SOF_DEFUNCT);
+ if (defunct) {
+ if (!(so->so_snd.sb_flags & so->so_rcv.sb_flags & SB_DROP))
+ panic("%s: SB_DROP not set", __func__);
+ goto done;
+ }
+
+ if (so->so_flags & SOF_NODEFUNCT) {
+ if (noforce) {
+ err = EOPNOTSUPP;
+ SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so %p "
+ "[%d,%d] is not eligible for defunct (%d)\n",
+ __func__, proc_selfpid(), proc_pid(p), level, so,
+ INP_SOCKAF(so), INP_SOCKTYPE(so), err));
+ return (err);
+ }
+ so->so_flags &= ~SOF_NODEFUNCT;
+ SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so %p [%d,%d] "
+ "defunct by force\n", __func__, proc_selfpid(), proc_pid(p),
+ level, so, INP_SOCKAF(so), INP_SOCKTYPE(so)));
+ }
+
+ so->so_flags |= SOF_DEFUNCT;
+ /* Prevent further data from being appended to the socket buffers */
+ so->so_snd.sb_flags |= SB_DROP;
+ so->so_rcv.sb_flags |= SB_DROP;
+
+done:
+ SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so %p [%d,%d] %s "
+ "defunct\n", __func__, proc_selfpid(), proc_pid(p), level, so,
+ INP_SOCKAF(so), INP_SOCKTYPE(so),
+ defunct ? "is already" : "marked as"));
+
+ return (err);
+}
+
+int
+sodefunct(struct proc *p, struct socket *so, int level)
+{
+ struct sockbuf *rcv, *snd;
+
+ if (!(so->so_flags & SOF_DEFUNCT))
+ panic("%s improperly called", __func__);
+
+ if (so->so_state & SS_DEFUNCT)
+ goto done;
+
+ rcv = &so->so_rcv;
+ snd = &so->so_snd;
+
+ SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so %p [%d,%d] is now "
+ "defunct [rcv_si 0x%x, snd_si 0x%x, rcv_fl 0x%x, snd_fl 0x%x]\n",
+ __func__, proc_selfpid(), proc_pid(p), level, so,
+ INP_SOCKAF(so), INP_SOCKTYPE(so),
+ (uint32_t)rcv->sb_sel.si_flags, (uint32_t)snd->sb_sel.si_flags,
+ (uint16_t)rcv->sb_flags, (uint16_t)snd->sb_flags));
+
+ /*
+ * Unwedge threads blocked on sbwait() and sb_lock().
+ */
+ sbwakeup(rcv);
+ sbwakeup(snd);
+
+ if (rcv->sb_flags & SB_LOCK)
+ sbunlock(rcv, 1);
+ if (snd->sb_flags & SB_LOCK)
+ sbunlock(snd, 1);
+
+ /*
+ * Flush the buffers and disconnect. We explicitly call shutdown
+ * on both data directions to ensure that SS_CANT{RCV,SEND}MORE
+ * states are set for the socket. This would also flush out data
+ * hanging off the receive list of this socket.
+ */
+ (void) soshutdownlock(so, SHUT_RD);
+ (void) soshutdownlock(so, SHUT_WR);
+ (void) sodisconnectlocked(so);
+
+ /*
+ * Explicitly handle connectionless-protocol disconnection
+ * and release any remaining data in the socket buffers.
+ */
+ if (!(so->so_flags & SS_ISDISCONNECTED))
+ (void) soisdisconnected(so);
+
+ if (so->so_error == 0)
+ so->so_error = EBADF;
+
+ if (rcv->sb_cc != 0)
+ sbrelease(rcv);
+ if (snd->sb_cc != 0)
+ sbrelease(snd);
+
+ so->so_state |= SS_DEFUNCT;
+
+done:
+ return (0);
+}
+
+__private_extern__ int
+so_set_recv_anyif(struct socket *so, int optval)
+{
+ int ret = 0;
+
+#if INET6
+ if (INP_SOCKAF(so) == AF_INET || INP_SOCKAF(so) == AF_INET6) {
+#else
+ if (INP_SOCKAF(so) == AF_INET) {
+#endif /* !INET6 */
+ if (optval)
+ sotoinpcb(so)->inp_flags |= INP_RECV_ANYIF;
+ else
+ sotoinpcb(so)->inp_flags &= ~INP_RECV_ANYIF;
+ } else {
+ ret = EPROTONOSUPPORT;
+ }
+
+ return (ret);
+}
+
+__private_extern__ int
+so_get_recv_anyif(struct socket *so)
+{
+ int ret = 0;
+
+#if INET6
+ if (INP_SOCKAF(so) == AF_INET || INP_SOCKAF(so) == AF_INET6) {
+#else
+ if (INP_SOCKAF(so) == AF_INET) {
+#endif /* !INET6 */
+ ret = (sotoinpcb(so)->inp_flags & INP_RECV_ANYIF) ? 1 : 0;
+ }
+
+ return (ret);
+}