/*
- * Copyright (c) 1998-2013 Apple Inc. All rights reserved.
+ * Copyright (c) 1998-2014 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <sys/ev.h>
#include <kern/locks.h>
#include <net/route.h>
+#include <net/content_filter.h>
#include <netinet/in.h>
#include <netinet/in_pcb.h>
#include <sys/kdebug.h>
#include <mach/vm_param.h>
-/* TODO: this should be in a header file somewhere */
-extern void postevent(struct socket *, struct sockbuf *, int);
+#if MPTCP
+#include <netinet/mptcp_var.h>
+#endif
#define DBG_FNC_SBDROP NETDBG_CODE(DBG_NETSOCK, 4)
#define DBG_FNC_SBAPPEND NETDBG_CODE(DBG_NETSOCK, 5)
u_int32_t high_sb_max = SB_MAX;
static u_int32_t sb_efficiency = 8; /* parameter for sbreserve() */
-__private_extern__ int32_t total_sbmb_cnt = 0;
+int32_t total_sbmb_cnt __attribute__((aligned(8))) = 0;
+int32_t total_sbmb_cnt_peak __attribute__((aligned(8))) = 0;
+int64_t sbmb_limreached __attribute__((aligned(8))) = 0;
/* Control whether to throttle sockets eligible to be throttled */
__private_extern__ u_int32_t net_io_policy_throttled = 0;
wakeup((caddr_t)&so->so_timeo);
sowwakeup(so);
sorwakeup(so);
+
+#if CONTENT_FILTER
+ /* Notify content filters as soon as we cannot send/receive data */
+ cfil_sock_notify_shutdown(so, SHUT_RDWR);
+#endif /* CONTENT_FILTER */
}
/*
wakeup((caddr_t)&so->so_timeo);
sowwakeup(so);
sorwakeup(so);
+
+#if CONTENT_FILTER
+ /* Notify content filters as soon as we cannot send/receive data */
+ cfil_sock_notify_shutdown(so, SHUT_RDWR);
+#endif /* CONTENT_FILTER */
}
/*
so->so_upcallusecount == 0)
wakeup((caddr_t)&so->so_upcallusecount);
}
+#if CONTENT_FILTER
+ /*
+ * Trap disconnection events for content filters
+ */
+ if ((so->so_flags & SOF_CONTENT_FILTER) != 0) {
+ if ((sb->sb_flags & SB_RECV)) {
+ if (so->so_state & (SS_CANTRCVMORE))
+ cfil_sock_notify_shutdown(so, SHUT_RD);
+ } else {
+ if (so->so_state & (SS_CANTSENDMORE))
+ cfil_sock_notify_shutdown(so, SHUT_WR);
+ }
+ }
+#endif /* CONTENT_FILTER */
}
/*
if (sb->sb_lastrecord != NULL && (sb->sb_mbtail->m_flags & M_EOR))
return (sbappendrecord(sb, m));
- if (sb->sb_flags & SB_RECV) {
+ if (sb->sb_flags & SB_RECV && !(m && m->m_flags & M_SKIPCFIL)) {
int error = sflt_data_in(so, NULL, &m, NULL, 0);
SBLASTRECORDCHK(sb, "sbappend 2");
+
+#if CONTENT_FILTER
+ if (error == 0)
+ error = cfil_sock_data_in(so, NULL, m, NULL, 0);
+#endif /* CONTENT_FILTER */
+
if (error != 0) {
if (error != EJUSTRETURN)
m_freem(m);
return (0);
}
+ } else if (m) {
+ m->m_flags &= ~M_SKIPCFIL;
}
/* If this is the first record, it's also the last record */
SBLASTMBUFCHK(sb, __func__);
- if (sb->sb_flags & SB_RECV) {
+ if (sb->sb_flags & SB_RECV && !(m && m->m_flags & M_SKIPCFIL)) {
int error = sflt_data_in(so, NULL, &m, NULL, 0);
SBLASTRECORDCHK(sb, "sbappendstream 1");
+
+#if CONTENT_FILTER
+ if (error == 0)
+ error = cfil_sock_data_in(so, NULL, m, NULL, 0);
+#endif /* CONTENT_FILTER */
+
if (error != 0) {
if (error != EJUSTRETURN)
m_freem(m);
return (0);
}
+ } else if (m) {
+ m->m_flags &= ~M_SKIPCFIL;
}
sbcompress(sb, m, sb->sb_mbtail);
m = m->m_nextpkt;
if (m != sb->sb_lastrecord) {
- printf("sblastrecordchk: mb %p lastrecord %p last %p\n",
- sb->sb_mb, sb->sb_lastrecord, m);
+ printf("sblastrecordchk: mb 0x%llx lastrecord 0x%llx "
+ "last 0x%llx\n",
+ (uint64_t)VM_KERNEL_ADDRPERM(sb->sb_mb),
+ (uint64_t)VM_KERNEL_ADDRPERM(sb->sb_lastrecord),
+ (uint64_t)VM_KERNEL_ADDRPERM(m));
printf("packet chain:\n");
for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt)
- printf("\t%p\n", m);
+ printf("\t0x%llx\n", (uint64_t)VM_KERNEL_ADDRPERM(m));
panic("sblastrecordchk from %s", where);
}
}
m = m->m_next;
if (m != sb->sb_mbtail) {
- printf("sblastmbufchk: mb %p mbtail %p last %p\n",
- sb->sb_mb, sb->sb_mbtail, m);
+ printf("sblastmbufchk: mb 0x%llx mbtail 0x%llx last 0x%llx\n",
+ (uint64_t)VM_KERNEL_ADDRPERM(sb->sb_mb),
+ (uint64_t)VM_KERNEL_ADDRPERM(sb->sb_mbtail),
+ (uint64_t)VM_KERNEL_ADDRPERM(m));
printf("packet tree:\n");
for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
printf("\t");
for (n = m; n != NULL; n = n->m_next)
- printf("%p ", n);
+ printf("0x%llx ", (uint64_t)VM_KERNEL_ADDRPERM(n));
printf("\n");
}
panic("sblastmbufchk from %s", where);
return (0);
}
- if (sb->sb_flags & SB_RECV) {
+ if (sb->sb_flags & SB_RECV && !(m0 && m0->m_flags & M_SKIPCFIL)) {
int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL,
sock_data_filt_flag_record);
+
+#if CONTENT_FILTER
+ if (error == 0)
+ error = cfil_sock_data_in(sb->sb_so, NULL, m0, NULL, 0);
+#endif /* CONTENT_FILTER */
+
if (error != 0) {
SBLASTRECORDCHK(sb, "sbappendrecord 1");
if (error != EJUSTRETURN)
m_freem(m0);
return (0);
}
+ } else if (m0) {
+ m0->m_flags &= ~M_SKIPCFIL;
}
/*
SBLASTRECORDCHK(sb, "sbinsertoob 1");
- if ((sb->sb_flags & SB_RECV) != 0) {
+ if ((sb->sb_flags & SB_RECV && !(m0->m_flags & M_SKIPCFIL)) != 0) {
int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL,
sock_data_filt_flag_oob);
SBLASTRECORDCHK(sb, "sbinsertoob 2");
+
+#if CONTENT_FILTER
+ if (error == 0)
+ error = cfil_sock_data_in(sb->sb_so, NULL, m0, NULL, 0);
+#endif /* CONTENT_FILTER */
+
if (error) {
if (error != EJUSTRETURN) {
m_freem(m0);
}
return (0);
}
+ } else if (m0) {
+ m0->m_flags &= ~M_SKIPCFIL;
}
for (mp = &sb->sb_mb; *mp; mp = &((*mp)->m_nextpkt)) {
}
/* Call socket data in filters */
- if ((sb->sb_flags & SB_RECV) != 0) {
+ if (sb->sb_flags & SB_RECV && !(m0 && m0->m_flags & M_SKIPCFIL)) {
int error;
error = sflt_data_in(sb->sb_so, asa, &m0, &control, 0);
SBLASTRECORDCHK(sb, __func__);
+
+#if CONTENT_FILTER
+ if (error == 0)
+ error = cfil_sock_data_in(sb->sb_so, asa, m0, control, 0);
+#endif /* CONTENT_FILTER */
+
if (error) {
if (error != EJUSTRETURN) {
if (m0)
}
return (0);
}
+ } else if (m0) {
+ m0->m_flags &= ~M_SKIPCFIL;
}
result = sbappendaddr_internal(sb, asa, m0, control);
return (0);
}
- if (sb->sb_flags & SB_RECV) {
+ if (sb->sb_flags & SB_RECV && !(m0 && m0->m_flags & M_SKIPCFIL)) {
int error;
error = sflt_data_in(sb->sb_so, NULL, &m0, &control, 0);
SBLASTRECORDCHK(sb, __func__);
+
+#if CONTENT_FILTER
+ if (error == 0)
+ error = cfil_sock_data_in(sb->sb_so, NULL, m0, control, 0);
+#endif /* CONTENT_FILTER */
+
if (error) {
if (error != EJUSTRETURN) {
if (m0)
}
return (0);
}
+ } else if (m0) {
+ m0->m_flags &= ~M_SKIPCFIL;
}
result = sbappendcontrol_internal(sb, m0, control);
m_eor->m_flags |= M_UNORDERED_DATA;
data_len += m_eor->m_len;
so->so_msg_state->msg_uno_bytes += m_eor->m_len;
- } else {
+ } else {
m_eor->m_flags &= ~M_UNORDERED_DATA;
}
-
if (m_eor->m_next == NULL)
break;
}
__func__);
}
- ret = sbappendrecord(sb, m);
+ if (!unordered && (sb->sb_mbtail != NULL) &&
+ !(sb->sb_mbtail->m_flags & M_UNORDERED_DATA)) {
+ sb->sb_mbtail->m_flags &= ~M_EOR;
+ sbcompress(sb, m, sb->sb_mbtail);
+ ret = 1;
+ } else {
+ ret = sbappendrecord(sb, m);
+ }
+ VERIFY(sb->sb_mbtail->m_flags & M_EOR);
return (ret);
}
SBLASTMBUFCHK(sb, __func__);
- mptcp_adj_rmap(so, m);
+ if (mptcp_adj_rmap(so, m) != 0)
+ return (0);
/* No filter support (SB_RECV) on mptcp subflow sockets */
(!(sb->sb_flags & SB_RECV)) &&
((sb->sb_so->so_flags & SOF_MP_SUBFLOW) ||
((SOCK_CHECK_DOM(sb->sb_so, PF_MULTIPATH)) &&
- (SOCK_CHECK_PROTO(sb->sb_so, IPPROTO_TCP))))) {
+ (SOCK_CHECK_PROTO(sb->sb_so, IPPROTO_TCP)))) &&
+ (!(sb->sb_so->so_flags1 & SOF1_POST_FALLBACK_SYNC))) {
mptcp_preproc_sbdrop(m, (unsigned int)len);
}
#endif /* MPTCP */
sb->sb_lastrecord = m;
}
+#if CONTENT_FILTER
+ cfil_sock_buf_update(sb);
+#endif /* CONTENT_FILTER */
+
postevent(0, sb, EV_RWBYTES);
KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_END), sb, 0, 0, 0, 0);
return (EOPNOTSUPP);
}
+int
+pru_send_list_notsupp(struct socket *so, int flags, struct mbuf *m,
+ struct sockaddr *addr, struct mbuf *control, struct proc *p)
+{
+#pragma unused(so, flags, m, addr, control, p)
+ return (EOPNOTSUPP);
+}
+
/*
* This isn't really a ``null'' operation, but it's the default one
* and doesn't do anything destructive.
return (EOPNOTSUPP);
}
+int
+pru_sosend_list_notsupp(struct socket *so, struct sockaddr *addr, struct uio **uio,
+ u_int uiocnt, struct mbuf *top, struct mbuf *control, int flags)
+{
+#pragma unused(so, addr, uio, uiocnt, top, control, flags)
+ return (EOPNOTSUPP);
+}
+
int
pru_soreceive_notsupp(struct socket *so, struct sockaddr **paddr,
struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
return (EOPNOTSUPP);
}
+int
+pru_soreceive_list_notsupp(struct socket *so, struct sockaddr **paddr,
+ struct uio **uio, u_int uiocnt, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
+{
+#pragma unused(so, paddr, uio, uiocnt, mp0, controlp, flagsp)
+ return (EOPNOTSUPP);
+}
+
int
pru_shutdown_notsupp(struct socket *so)
{
DEFAULT(pru->pru_rcvd, pru_rcvd_notsupp);
DEFAULT(pru->pru_rcvoob, pru_rcvoob_notsupp);
DEFAULT(pru->pru_send, pru_send_notsupp);
+ DEFAULT(pru->pru_send_list, pru_send_list_notsupp);
DEFAULT(pru->pru_sense, pru_sense_null);
DEFAULT(pru->pru_shutdown, pru_shutdown_notsupp);
DEFAULT(pru->pru_sockaddr, pru_sockaddr_notsupp);
DEFAULT(pru->pru_sopoll, pru_sopoll_notsupp);
DEFAULT(pru->pru_soreceive, pru_soreceive_notsupp);
+ DEFAULT(pru->pru_soreceive_list, pru_soreceive_list_notsupp);
DEFAULT(pru->pru_sosend, pru_sosend_notsupp);
+ DEFAULT(pru->pru_sosend_list, pru_sosend_list_notsupp);
DEFAULT(pru->pru_socheckopt, pru_socheckopt_null);
#undef DEFAULT
}
int
sbspace(struct sockbuf *sb)
{
+ int pending = 0;
int space = imin((int)(sb->sb_hiwat - sb->sb_cc),
(int)(sb->sb_mbmax - sb->sb_mbcnt));
if (space < 0)
space = 0;
+ /* Compensate for data being processed by content filters */
+#if CONTENT_FILTER
+ pending = cfil_sock_data_space(sb);
+#endif /* CONTENT_FILTER */
+ if (pending > space)
+ space = 0;
+ else
+ space -= pending;
+
return (space);
}
{
int space = 0, error;
u_int32_t msgpri;
- VERIFY(so->so_type == SOCK_STREAM && SOCK_PROTO(so) == IPPROTO_TCP &&
- control != NULL);
- error = tcp_get_msg_priority(control, &msgpri);
- if (error)
- return (0);
+ VERIFY(so->so_type == SOCK_STREAM &&
+ SOCK_PROTO(so) == IPPROTO_TCP);
+ if (control != NULL) {
+ error = tcp_get_msg_priority(control, &msgpri);
+ if (error)
+ return (0);
+ } else {
+ msgpri = MSG_PRI_0;
+ }
space = (so->so_snd.sb_idealsize / MSG_PRI_COUNT) -
so->so_msg_state->msg_priq[msgpri].msgq_bytes;
if (space < 0)
soreadable(struct socket *so)
{
return (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat ||
- (so->so_state & SS_CANTRCVMORE) ||
+ ((so->so_state & SS_CANTRCVMORE)
+#if CONTENT_FILTER
+ && cfil_sock_data_pending(&so->so_rcv) == 0
+#endif /* CONTENT_FILTER */
+ ) ||
so->so_comp.tqh_first || so->so_error);
}
int
sowriteable(struct socket *so)
{
- return ((!so_wait_for_if_feedback(so) &&
+ if ((so->so_state & SS_CANTSENDMORE) ||
+ so->so_error > 0)
+ return (1);
+
+ if (!so_wait_for_if_feedback(so) &&
sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat &&
((so->so_state & SS_ISCONNECTED) ||
- (so->so_proto->pr_flags & PR_CONNREQUIRED) == 0)) ||
- (so->so_state & SS_CANTSENDMORE) ||
- so->so_error);
+ !(so->so_proto->pr_flags & PR_CONNREQUIRED))) {
+ if (so->so_flags & SOF_NOTSENT_LOWAT) {
+ if ((SOCK_DOM(so) == PF_INET6
+ || SOCK_DOM(so) == PF_INET)
+ && so->so_type == SOCK_STREAM) {
+ return (tcp_notsent_lowat_check(so));
+ }
+#if MPTCP
+ else if ((SOCK_DOM(so) == PF_MULTIPATH) &&
+ (SOCK_PROTO(so) == IPPROTO_TCP)) {
+ return (mptcp_notsent_lowat_check(so));
+ }
+#endif
+ else {
+ return (1);
+ }
+ } else {
+ return (1);
+ }
+ }
+ return (0);
}
/* adjust counters in sb reflecting allocation of m */
}
OSAddAtomic(cnt, &total_sbmb_cnt);
VERIFY(total_sbmb_cnt > 0);
+ if (total_sbmb_cnt > total_sbmb_cnt_peak)
+ total_sbmb_cnt_peak = total_sbmb_cnt;
}
/* adjust counters in sb reflecting freeing of m */
struct socket *so = sb->sb_so;
void * wchan;
int error = 0;
+ thread_t tp = current_thread();
VERIFY((flags & SBL_VALID) == flags);
/* NOTREACHED */
}
+ /*
+ * The content filter thread must hold the sockbuf lock
+ */
+ if ((so->so_flags & SOF_CONTENT_FILTER) && sb->sb_cfil_thread == tp) {
+ /*
+ * Don't panic if we are defunct because SB_LOCK has
+ * been cleared by sodefunct()
+ */
+ if (!(so->so_flags & SOF_DEFUNCT) && !(sb->sb_flags & SB_LOCK))
+ panic("%s: SB_LOCK not held for %p\n",
+ __func__, sb);
+
+ /* Keep the sockbuf locked */
+ return (0);
+ }
+
if ((sb->sb_flags & SB_LOCK) && !(flags & SBL_WAIT))
return (EWOULDBLOCK);
-
/*
* We may get here from sorflush(), in which case "sb" may not
* point to the real socket buffer. Use the actual socket buffer
wchan = (sb->sb_flags & SB_RECV) ?
&so->so_rcv.sb_flags : &so->so_snd.sb_flags;
- while (sb->sb_flags & SB_LOCK) {
+ /*
+ * A content filter thread has exclusive access to the sockbuf
+ * until it clears the
+ */
+ while ((sb->sb_flags & SB_LOCK) ||
+ ((so->so_flags & SOF_CONTENT_FILTER) &&
+ sb->sb_cfil_thread != NULL)) {
lck_mtx_t *mutex_held;
/*
{
void *lr_saved = __builtin_return_address(0);
struct socket *so = sb->sb_so;
+ thread_t tp = current_thread();
/* so_usecount may be 0 if we get here from sofreelastref() */
if (so == NULL) {
/* NOTREACHED */
}
- VERIFY(sb->sb_flags & SB_LOCK);
- sb->sb_flags &= ~SB_LOCK;
-
- if (sb->sb_wantlock > 0) {
+ /*
+ * The content filter thread must hold the sockbuf lock
+ */
+ if ((so->so_flags & SOF_CONTENT_FILTER) && sb->sb_cfil_thread == tp) {
/*
- * We may get here from sorflush(), in which case "sb" may not
- * point to the real socket buffer. Use the actual socket
- * buffer address from the socket instead.
+ * Don't panic if we are defunct because SB_LOCK has
+ * been cleared by sodefunct()
*/
- wakeup((sb->sb_flags & SB_RECV) ? &so->so_rcv.sb_flags :
- &so->so_snd.sb_flags);
+ if (!(so->so_flags & SOF_DEFUNCT) &&
+ !(sb->sb_flags & SB_LOCK) &&
+ !(so->so_state & SS_DEFUNCT) &&
+ !(so->so_flags1 & SOF1_DEFUNCTINPROG)) {
+ panic("%s: SB_LOCK not held for %p\n",
+ __func__, sb);
+ }
+ /* Keep the sockbuf locked and proceed*/
+ } else {
+ VERIFY((sb->sb_flags & SB_LOCK) ||
+ (so->so_state & SS_DEFUNCT) ||
+ (so->so_flags1 & SOF1_DEFUNCTINPROG));
+
+ sb->sb_flags &= ~SB_LOCK;
+
+ if (sb->sb_wantlock > 0) {
+ /*
+ * We may get here from sorflush(), in which case "sb" may not
+ * point to the real socket buffer. Use the actual socket
+ * buffer address from the socket instead.
+ */
+ wakeup((sb->sb_flags & SB_RECV) ? &so->so_rcv.sb_flags :
+ &so->so_snd.sb_flags);
+ }
}
if (!keeplocked) { /* unlock on exit */
soevupcall(so, hint);
- /* Don't post an event if this a subflow socket */
- if ((hint & SO_FILT_HINT_IFDENIED) && !(so->so_flags & SOF_MP_SUBFLOW))
+ /*
+ * Don't post an event if this a subflow socket or
+ * the app has opted out of using cellular interface
+ */
+ if ((hint & SO_FILT_HINT_IFDENIED) &&
+ !(so->so_flags & SOF_MP_SUBFLOW) &&
+ !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR) &&
+ !(so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE))
soevent_ifdenied(so);
}
void
soevupcall(struct socket *so, u_int32_t hint)
{
- void (*so_event)(struct socket *, void *, uint32_t);
-
- if ((so_event = so->so_event) != NULL) {
+ if (so->so_event != NULL) {
caddr_t so_eventarg = so->so_eventarg;
hint &= so->so_eventmask;
IS_SO_TC_BACKGROUND(so->so_traffic_class));
}
+int
+soissrcrealtime(struct socket *so)
+{
+ return (so->so_traffic_class >= SO_TC_AV);
+}
+
+void
+sonullevent(struct socket *so, void *arg, uint32_t hint)
+{
+#pragma unused(so, arg, hint)
+}
+
/*
* Here is the definition of some of the basic objects in the kern.ipc
* branch of the MIB.
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
&sb_max, 0, &sysctl_sb_max, "IU", "Maximum socket buffer size");
-SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets,
- CTLFLAG_RD | CTLFLAG_LOCKED, &maxsockets, 0,
- "Maximum number of sockets avaliable");
-
SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor,
CTLFLAG_RW | CTLFLAG_LOCKED, &sb_efficiency, 0, "");