/*
- * Copyright (c) 2009-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2009-2012 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
static int tfp_count = 0;
-static TAILQ_HEAD(, tclass_for_proc) tfp_head = TAILQ_HEAD_INITIALIZER(tfp_head);
+static TAILQ_HEAD(, tclass_for_proc) tfp_head =
+ TAILQ_HEAD_INITIALIZER(tfp_head);
struct tclass_for_proc {
TAILQ_ENTRY(tclass_for_proc) tfp_link;
- int tfp_class;
- pid_t tfp_pid;
- char tfp_pname[MAXCOMLEN + 1];
+ int tfp_class;
+ pid_t tfp_pid;
+ char tfp_pname[MAXCOMLEN + 1];
};
-extern void tcp_set_background_cc(struct socket *);
-extern void tcp_set_foreground_cc(struct socket *);
-
-int dscp_code_from_mbuf_tclass(int );
-
-static int get_pid_tclass(pid_t , int *);
-static int get_pname_tclass(const char * , int *);
-static int set_pid_tclass(pid_t , int );
-static int set_pname_tclass(const char * , int );
+static int dscp_code_from_mbuf_tclass(mbuf_traffic_class_t);
+static int get_pid_tclass(struct so_tcdbg *);
+static int get_pname_tclass(struct so_tcdbg *);
+static int set_pid_tclass(struct so_tcdbg *);
+static int set_pname_tclass(struct so_tcdbg *);
+static int flush_pid_tclass(struct so_tcdbg *);
static int purge_tclass_for_proc(void);
static int flush_tclass_for_proc(void);
+static void so_set_lro(struct socket*, int);
+int get_tclass_for_curr_proc(int *);
-
-static lck_grp_attr_t *tclass_lck_grp_attr = NULL; /* mutex group attributes */
-static lck_grp_t *tclass_lck_grp = NULL; /* mutex group definition */
-static lck_attr_t *tclass_lck_attr = NULL; /* mutex attributes */
-static lck_mtx_t *tclass_lock = NULL;
+static lck_grp_attr_t *tclass_lck_grp_attr = NULL; /* mutex group attributes */
+static lck_grp_t *tclass_lck_grp = NULL; /* mutex group definition */
+static lck_attr_t *tclass_lck_attr = NULL; /* mutex attributes */
+decl_lck_mtx_data(static, tclass_lock_data);
+static lck_mtx_t *tclass_lock = &tclass_lock_data;
/*
* Must be called with tclass_lock held
find_tfp_by_pid(pid_t pid)
{
struct tclass_for_proc *tfp;
-
+
TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
if (tfp->tfp_pid == pid)
break;
}
- return tfp;
+ return (tfp);
}
/*
find_tfp_by_pname(const char *pname)
{
struct tclass_for_proc *tfp;
-
+
TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
- if (strncmp(pname, tfp->tfp_pname, sizeof(tfp->tfp_pname)) == 0)
+ if (strncmp(pname, tfp->tfp_pname,
+ sizeof (tfp->tfp_pname)) == 0)
break;
}
- return tfp;
+ return (tfp);
}
-static int
-get_tclass_for_curr_proc(void)
+__private_extern__ int
+get_tclass_for_curr_proc(int *sotc)
{
- struct tclass_for_proc *tfp;
- int sotc = SO_TC_BE;
+ struct tclass_for_proc *tfp = NULL;
proc_t p = current_proc(); /* Not ref counted */
pid_t pid = proc_pid(p);
char *pname = proc_name_address(p);
-
+
+ *sotc = -1;
+
lck_mtx_lock(tclass_lock);
-
+
TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
- if ((tfp->tfp_pid == pid) ||
- (tfp->tfp_pid == -1 && strncmp(pname, tfp->tfp_pname, sizeof(tfp->tfp_pname)) == 0)) {
- sotc = tfp->tfp_class;
+ if ((tfp->tfp_pid == pid) || (tfp->tfp_pid == -1 &&
+ strncmp(pname, tfp->tfp_pname,
+ sizeof (tfp->tfp_pname)) == 0)) {
+ *sotc = tfp->tfp_class;
break;
- }
+ }
}
lck_mtx_unlock(tclass_lock);
- return sotc;
+ return ((tfp == NULL) ? 0 : 1);
}
/*
TAILQ_FOREACH_SAFE(tfp, &tfp_head, tfp_link, tvar) {
proc_t p;
-
+
if (tfp->tfp_pid == -1)
continue;
if ((p = proc_find(tfp->tfp_pid)) == NULL) {
tfp_count--;
TAILQ_REMOVE(&tfp_head, tfp, tfp_link);
-
+
_FREE(tfp, M_TEMP);
} else {
proc_rele(p);
}
lck_mtx_unlock(tclass_lock);
-
- return error;
+
+ return (error);
}
/*
TAILQ_FOREACH_SAFE(tfp, &tfp_head, tfp_link, tvar) {
free_tclass_for_proc(tfp);
}
-
+
lck_mtx_unlock(tclass_lock);
-
- return error;
+
+ return (error);
}
* Must be called with tclass_lock held
*/
static struct tclass_for_proc *
-alloc_tclass_for_proc(pid_t pid, const char *pname, int tclass)
+alloc_tclass_for_proc(pid_t pid, const char *pname)
{
struct tclass_for_proc *tfp;
-
+
if (pid == -1 && pname == NULL)
- return NULL;
+ return (NULL);
- tfp = _MALLOC(sizeof(struct tclass_for_proc), M_TEMP, M_NOWAIT | M_ZERO);
+ tfp = _MALLOC(sizeof (struct tclass_for_proc), M_TEMP, M_NOWAIT|M_ZERO);
if (tfp == NULL)
- return NULL;
-
+ return (NULL);
+
tfp->tfp_pid = pid;
- tfp->tfp_class = tclass;
/*
- * Add per pid entries before per proc name so we can find
+ * Add per pid entries before per proc name so we can find
* a specific instance of a process before the general name base entry.
*/
if (pid != -1) {
TAILQ_INSERT_HEAD(&tfp_head, tfp, tfp_link);
} else {
- strlcpy(tfp->tfp_pname, pname, sizeof(tfp->tfp_pname));
+ strlcpy(tfp->tfp_pname, pname, sizeof (tfp->tfp_pname));
TAILQ_INSERT_TAIL(&tfp_head, tfp, tfp_link);
}
-
+
tfp_count++;
- return tfp;
+ return (tfp);
}
/*
* -1 for tclass means to remove the entry
*/
-int
-set_pid_tclass(pid_t pid, int tclass)
+int
+set_pid_tclass(struct so_tcdbg *so_tcdbg)
{
int error = EINVAL;
proc_t p = NULL;
struct fileproc *fp;
struct tclass_for_proc *tfp;
int i;
+ pid_t pid = so_tcdbg->so_tcdbg_pid;
+ int tclass = so_tcdbg->so_tcdbg_tclass;
p = proc_find(pid);
if (p == NULL) {
- printf("set_pid_tclass proc_find(%d) \n", pid);
+ printf("%s proc_find(%d) failed\n", __func__, pid);
goto done;
}
-
+
/* Need a tfp */
lck_mtx_lock(tclass_lock);
-
+
tfp = find_tfp_by_pid(pid);
- if (tclass == -1) {
- if (tfp != NULL) {
- free_tclass_for_proc(tfp);
- error = 0;
- }
- lck_mtx_unlock(tclass_lock);
- goto done;
- } else {
+ if (tfp == NULL) {
+ tfp = alloc_tclass_for_proc(pid, NULL);
if (tfp == NULL) {
- tfp = alloc_tclass_for_proc(pid, NULL, tclass);
- if (tfp == NULL) {
- lck_mtx_unlock(tclass_lock);
- error = ENOBUFS;
- goto done;
- }
- } else {
- tfp->tfp_class = tclass;
+ lck_mtx_unlock(tclass_lock);
+ error = ENOBUFS;
+ goto done;
}
}
+ tfp->tfp_class = tclass;
+
lck_mtx_unlock(tclass_lock);
if (tfp != NULL) {
proc_fdlock(p);
-
+
fdp = p->p_fd;
for (i = 0; i < fdp->fd_nfiles; i++) {
struct socket *so;
-
+
fp = fdp->fd_ofiles[i];
- if (fp == NULL || (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 ||
- fp->f_fglob->fg_type != DTYPE_SOCKET)
+ if (fp == NULL ||
+ (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 ||
+ fp->f_fglob->fg_type != DTYPE_SOCKET)
continue;
-
+
so = (struct socket *)fp->f_fglob->fg_data;
- if (so->so_proto->pr_domain->dom_family != AF_INET &&
- so->so_proto->pr_domain->dom_family != AF_INET6)
+ if (so->so_proto->pr_domain->dom_family != AF_INET &&
+ so->so_proto->pr_domain->dom_family != AF_INET6)
continue;
socket_lock(so, 1);
- error = so_set_traffic_class(so, tclass != -1 ? tclass : SO_TC_BE);
- socket_unlock(so, 1);
- if (error != 0) {
- printf("set_pid_tclass so_set_traffic_class(%p, %d) failed %d\n", so, tclass, error);
- error = 0;
+ if (tclass != -1) {
+ error = so_set_traffic_class(so, tclass);
+ if (error != 0) {
+ printf("%s: so_set_traffic_class"
+ "(so=%p, fd=%d, tclass=%d) "
+ "failed %d\n", __func__,
+ so, i, tclass, error);
+ error = 0;
+ }
}
+ socket_unlock(so, 1);
}
-
+
proc_fdunlock(p);
}
-
- error = 0;
+
+ error = 0;
done:
if (p != NULL)
proc_rele(p);
-
- return error;
+
+ return (error);
}
-int
-set_pname_tclass(const char *pname, int tclass)
+int
+set_pname_tclass(struct so_tcdbg *so_tcdbg)
{
int error = EINVAL;
struct tclass_for_proc *tfp;
lck_mtx_lock(tclass_lock);
-
- tfp = find_tfp_by_pname(pname);
- if (tclass == -1) {
- if (tfp != NULL)
- free_tclass_for_proc(tfp);
- } else {
+
+ tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
+ if (tfp == NULL) {
+ tfp = alloc_tclass_for_proc(-1, so_tcdbg->so_tcdbg_pname);
if (tfp == NULL) {
- tfp = alloc_tclass_for_proc(-1, pname, tclass);
- if (tfp == NULL) {
- lck_mtx_unlock(tclass_lock);
- error = ENOBUFS;
- goto done;
- }
- } else {
- tfp->tfp_class = tclass;
+ lck_mtx_unlock(tclass_lock);
+ error = ENOBUFS;
+ goto done;
}
}
+ tfp->tfp_class = so_tcdbg->so_tcdbg_tclass;
+
lck_mtx_unlock(tclass_lock);
-
- error = 0;
+
+ error = 0;
done:
-
- return error;
+
+ return (error);
}
-int
-get_pid_tclass(pid_t pid, int *tclass)
+static int
+flush_pid_tclass(struct so_tcdbg *so_tcdbg)
+{
+ pid_t pid = so_tcdbg->so_tcdbg_pid;
+ int tclass = so_tcdbg->so_tcdbg_tclass;
+ struct filedesc *fdp;
+ int error = EINVAL;
+ proc_t p;
+ int i;
+
+ p = proc_find(pid);
+ if (p == PROC_NULL) {
+ printf("%s proc_find(%d) failed\n", __func__, pid);
+ goto done;
+ }
+
+ proc_fdlock(p);
+ fdp = p->p_fd;
+ for (i = 0; i < fdp->fd_nfiles; i++) {
+ struct socket *so;
+ struct fileproc *fp;
+
+ fp = fdp->fd_ofiles[i];
+ if (fp == NULL ||
+ (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 ||
+ fp->f_fglob->fg_type != DTYPE_SOCKET)
+ continue;
+
+ so = (struct socket *)fp->f_fglob->fg_data;
+ error = sock_setsockopt(so, SOL_SOCKET, SO_FLUSH, &tclass,
+ sizeof (tclass));
+ if (error != 0) {
+ printf("%s: setsockopt(SO_FLUSH) (so=%p, fd=%d, "
+ "tclass=%d) failed %d\n", __func__, so, i, tclass,
+ error);
+ error = 0;
+ }
+ }
+ proc_fdunlock(p);
+
+ error = 0;
+done:
+ if (p != PROC_NULL)
+ proc_rele(p);
+
+ return (error);
+}
+
+int
+get_pid_tclass(struct so_tcdbg *so_tcdbg)
{
int error = EINVAL;
proc_t p = NULL;
struct tclass_for_proc *tfp;
-
- *tclass = -1; /* Means not set */
+ pid_t pid = so_tcdbg->so_tcdbg_pid;
+
+ so_tcdbg->so_tcdbg_tclass = -1; /* Means not set */
+ so_tcdbg->so_tcdbg_opportunistic = -1; /* Means not set */
p = proc_find(pid);
if (p == NULL) {
- printf("get_pid_tclass proc_find(%d) \n", pid);
+ printf("%s proc_find(%d) failed\n", __func__, pid);
goto done;
}
-
+
/* Need a tfp */
lck_mtx_lock(tclass_lock);
-
+
tfp = find_tfp_by_pid(pid);
if (tfp != NULL) {
- *tclass = tfp->tfp_class ;
+ so_tcdbg->so_tcdbg_tclass = tfp->tfp_class;
error = 0;
}
lck_mtx_unlock(tclass_lock);
done:
if (p != NULL)
proc_rele(p);
-
- return error;
+
+ return (error);
}
-int
-get_pname_tclass(const char *pname, int *tclass)
+int
+get_pname_tclass(struct so_tcdbg *so_tcdbg)
{
int error = EINVAL;
struct tclass_for_proc *tfp;
-
- *tclass = -1; /* Means not set */
+
+ so_tcdbg->so_tcdbg_tclass = -1; /* Means not set */
+ so_tcdbg->so_tcdbg_opportunistic = -1; /* Means not set */
/* Need a tfp */
lck_mtx_lock(tclass_lock);
-
- tfp = find_tfp_by_pname(pname);
+
+ tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
if (tfp != NULL) {
- *tclass = tfp->tfp_class ;
+ so_tcdbg->so_tcdbg_tclass = tfp->tfp_class;
error = 0;
}
lck_mtx_unlock(tclass_lock);
-
- return error;
+
+ return (error);
}
+static int
+delete_tclass_for_pid_pname(struct so_tcdbg *so_tcdbg)
+{
+ int error = EINVAL;
+ pid_t pid = so_tcdbg->so_tcdbg_pid;
+ struct tclass_for_proc *tfp = NULL;
+
+ lck_mtx_lock(tclass_lock);
+ if (pid != -1)
+ tfp = find_tfp_by_pid(pid);
+ else
+ tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
+
+ if (tfp != NULL) {
+ free_tclass_for_proc(tfp);
+ error = 0;
+ }
+
+ lck_mtx_unlock(tclass_lock);
+
+ return (error);
+}
/*
* Setting options requires privileges
*/
-__private_extern__ int
+__private_extern__ int
so_set_tcdbg(struct socket *so, struct so_tcdbg *so_tcdbg)
{
int error = 0;
-
+
if ((so->so_state & SS_PRIV) == 0)
- return EPERM;
+ return (EPERM);
socket_unlock(so, 0);
switch (so_tcdbg->so_tcdbg_cmd) {
case SO_TCDBG_PID:
- error = set_pid_tclass(so_tcdbg->so_tcdbg_pid, so_tcdbg->so_tcdbg_tclass);
+ error = set_pid_tclass(so_tcdbg);
break;
-
+
case SO_TCDBG_PNAME:
- error = set_pname_tclass(so_tcdbg->so_tcdbg_pname, so_tcdbg->so_tcdbg_tclass);
+ error = set_pname_tclass(so_tcdbg);
break;
-
+
case SO_TCDBG_PURGE:
error = purge_tclass_for_proc();
break;
-
+
case SO_TCDBG_FLUSH:
error = flush_tclass_for_proc();
break;
-
+
+ case SO_TCDBG_DELETE:
+ error = delete_tclass_for_pid_pname(so_tcdbg);
+ break;
+
+ case SO_TCDBG_TCFLUSH_PID:
+ error = flush_pid_tclass(so_tcdbg);
+ break;
+
default:
error = EINVAL;
break;
-
}
socket_lock(so, 0);
- return error;
+ return (error);
}
/*
* Not required to be privileged to get
*/
-__private_extern__ int
+__private_extern__ int
sogetopt_tcdbg(struct socket *so, struct sockopt *sopt)
{
int error = 0;
void *buf = NULL;
size_t len = sopt->sopt_valsize;
- error = sooptcopyin(sopt, &so_tcdbg, sizeof(struct so_tcdbg), sizeof(struct so_tcdbg));
+ error = sooptcopyin(sopt, &so_tcdbg, sizeof (struct so_tcdbg),
+ sizeof (struct so_tcdbg));
if (error != 0)
- return error;
-
+ return (error);
+
sopt->sopt_valsize = len;
-
+
socket_unlock(so, 0);
switch (so_tcdbg.so_tcdbg_cmd) {
case SO_TCDBG_PID:
- error = get_pid_tclass(so_tcdbg.so_tcdbg_pid, &so_tcdbg.so_tcdbg_tclass);
+ error = get_pid_tclass(&so_tcdbg);
break;
-
+
case SO_TCDBG_PNAME:
- error = get_pname_tclass(so_tcdbg.so_tcdbg_pname, &so_tcdbg.so_tcdbg_tclass);
+ error = get_pname_tclass(&so_tcdbg);
break;
-
+
case SO_TCDBG_COUNT:
lck_mtx_lock(tclass_lock);
so_tcdbg.so_tcdbg_count = tfp_count;
error = EINVAL;
break;
}
- len = alloc_count * sizeof(struct so_tcdbg);
+ len = alloc_count * sizeof (struct so_tcdbg);
lck_mtx_unlock(tclass_lock);
buf = _MALLOC(len, M_TEMP, M_WAITOK | M_ZERO);
} else {
ptr->so_tcdbg_cmd = SO_TCDBG_PNAME;
ptr->so_tcdbg_pid = -1;
- strlcpy(ptr->so_tcdbg_pname, tfp->tfp_pname, sizeof(ptr->so_tcdbg_pname));
+ strlcpy(ptr->so_tcdbg_pname,
+ tfp->tfp_pname,
+ sizeof (ptr->so_tcdbg_pname));
}
ptr->so_tcdbg_tclass = tfp->tfp_class;
ptr++;
}
-
+
lck_mtx_unlock(tclass_lock);
}
break;
-
+
default:
error = EINVAL;
break;
-
}
socket_lock(so, 0);
if (error == 0) {
if (buf == NULL) {
- error = sooptcopyout(sopt, &so_tcdbg, sizeof(struct so_tcdbg));
+ error = sooptcopyout(sopt, &so_tcdbg,
+ sizeof (struct so_tcdbg));
} else {
error = sooptcopyout(sopt, buf, len);
_FREE(buf, M_TEMP);
}
}
- return error;
+ return (error);
}
so_set_traffic_class(struct socket *so, int optval)
{
int error = 0;
-
- if (optval < SO_TC_BE || optval > SO_TC_VO) {
+
+ if (optval < SO_TC_BE || optval > SO_TC_CTL) {
error = EINVAL;
} else {
- so->so_traffic_class = optval;
-
- if ((INP_SOCKAF(so) == AF_INET || INP_SOCKAF(so) == AF_INET6) &&
- INP_SOCKTYPE(so) == SOCK_STREAM) {
- set_tcp_stream_priority(so);
+ switch (optval) {
+ case _SO_TC_BK:
+ optval = SO_TC_BK;
+ break;
+ case _SO_TC_VI:
+ optval = SO_TC_VI;
+ break;
+ case _SO_TC_VO:
+ optval = SO_TC_VO;
+ break;
+ default:
+ if (!SO_VALID_TC(optval))
+ error = EINVAL;
+ break;
+ }
+
+ if (error == 0) {
+ int oldval = so->so_traffic_class;
+
+ VERIFY(SO_VALID_TC(optval));
+ so->so_traffic_class = optval;
+
+ if ((INP_SOCKAF(so) == AF_INET ||
+ INP_SOCKAF(so) == AF_INET6) &&
+ INP_SOCKTYPE(so) == SOCK_STREAM) {
+ set_tcp_stream_priority(so);
+
+ /* Set/unset use of Large Receive Offload */
+ so_set_lro(so, optval);
+ }
+
+ if ((INP_SOCKAF(so) == AF_INET ||
+ INP_SOCKAF(so) == AF_INET6) &&
+ optval != oldval && (optval == SO_TC_BK_SYS ||
+ oldval == SO_TC_BK_SYS)) {
+ /*
+ * If the app switches from BK_SYS to something
+ * else, resume the socket if it was suspended.
+ */
+ if (oldval == SO_TC_BK_SYS)
+ inp_reset_fc_state(so->so_pcb);
+
+ SOTHROTTLELOG(("throttle[%d]: so %p [%d,%d] "
+ "opportunistic %s\n", so->last_pid,
+ so, INP_SOCKAF(so), INP_SOCKTYPE(so),
+ (optval == SO_TC_BK_SYS) ? "ON" : "OFF"));
+ }
}
}
- return error;
+ return (error);
}
__private_extern__ void
so_set_default_traffic_class(struct socket *so)
{
- int sotc = SO_TC_BE;
+ int sotc = -1;
- if (tfp_count > 0 && (INP_SOCKAF(so) == AF_INET || INP_SOCKAF(so) == AF_INET6)) {
- sotc = get_tclass_for_curr_proc();
+ if (tfp_count > 0 &&
+ (INP_SOCKAF(so) == AF_INET || INP_SOCKAF(so) == AF_INET6)) {
+ get_tclass_for_curr_proc(&sotc);
}
-
- so->so_traffic_class = sotc;
-
- return;
+
+ so->so_traffic_class = (sotc != -1) ? sotc : SO_TC_BE;
}
+__private_extern__ int
+so_set_opportunistic(struct socket *so, int optval)
+{
+ return (so_set_traffic_class(so, (optval == 0) ?
+ SO_TC_BE : SO_TC_BK_SYS));
+}
__private_extern__ int
-mbuf_traffic_class_from_control(struct mbuf *control)
+so_get_opportunistic(struct socket *so)
+{
+ return (so->so_traffic_class == SO_TC_BK_SYS);
+}
+
+__private_extern__ mbuf_svc_class_t
+mbuf_service_class_from_control(struct mbuf *control)
{
struct cmsghdr *cm;
-
- for (cm = M_FIRST_CMSGHDR(control);
- cm != NULL;
- cm = M_NXT_CMSGHDR(control, cm)) {
+ mbuf_svc_class_t msc = MBUF_SC_UNSPEC;
+
+ for (cm = M_FIRST_CMSGHDR(control); cm != NULL;
+ cm = M_NXT_CMSGHDR(control, cm)) {
int tc;
- if (cm->cmsg_len < sizeof(struct cmsghdr))
+ if (cm->cmsg_len < sizeof (struct cmsghdr))
break;
-
+
if (cm->cmsg_level != SOL_SOCKET ||
- cm->cmsg_type != SO_TRAFFIC_CLASS)
+ cm->cmsg_type != SO_TRAFFIC_CLASS)
continue;
- if (cm->cmsg_len != CMSG_LEN(sizeof(int)))
+ if (cm->cmsg_len != CMSG_LEN(sizeof (int)))
continue;
-
- tc = *(int *)CMSG_DATA(cm);
-
- switch (tc) {
- case SO_TC_BE:
- return MBUF_TC_BE;
- case SO_TC_BK:
- return MBUF_TC_BK;
- case SO_TC_VI:
- return MBUF_TC_VI;
- case SO_TC_VO:
- return MBUF_TC_VO;
- default:
- break;
- }
+
+ tc = *(int *)(void *)CMSG_DATA(cm);
+ msc = so_tc2msc(tc);
+ if (MBUF_VALID_SC(msc))
+ break;
}
-
- return MBUF_TC_UNSPEC;
+
+ return (msc);
}
__private_extern__ int
-dscp_code_from_mbuf_tclass(int mtc)
+dscp_code_from_mbuf_tclass(mbuf_traffic_class_t mtc)
{
int dscp_code;
-
+
switch (mtc) {
default:
case MBUF_TC_BE:
dscp_code = 0x30;
break;
}
-
- return dscp_code;
+
+ return (dscp_code);
}
__private_extern__ void
so_recv_data_stat(struct socket *so, struct mbuf *m, size_t off)
{
- uint32_t sotc = m->m_pkthdr.prio;
+ uint32_t sotc = m_get_traffic_class(m);
if (sotc >= SO_TC_STATS_MAX)
sotc = SO_TC_BE;
-
- so->so_tc_stats[sotc].rxpackets += 1;
- so->so_tc_stats[sotc].rxbytes += ((m->m_flags & M_PKTHDR) ? m->m_pkthdr.len : 0) + off;
- return;
+ so->so_tc_stats[sotc].rxpackets += 1;
+ so->so_tc_stats[sotc].rxbytes +=
+ ((m->m_flags & M_PKTHDR) ? m->m_pkthdr.len : 0) + off;
}
__private_extern__ void
set_tcp_stream_priority(struct socket *so)
{
struct tcpcb *tp = intotcpcb(sotoinpcb(so));
+ int old_cc = tp->tcp_cc_index;
+ int recvbg = IS_TCP_RECV_BG(so);
- /* If the socket was marked as a background socket or if the
- * traffic class is set to background with traffic class socket
- * option then make both send and recv side of the stream to be
- * background. The variable sotcdb which can be set with sysctl
+ /*
+ * If the socket was marked as a background socket or if the
+ * traffic class is set to background with traffic class socket
+ * option then make both send and recv side of the stream to be
+ * background. The variable sotcdb which can be set with sysctl
* is used to disable these settings for testing.
*/
- if (soisbackground(so) || so->so_traffic_class == SO_TC_BK) {
+ if (soisthrottled(so) || IS_SO_TC_BACKGROUND(so->so_traffic_class)) {
if ((sotcdb & SOTCDB_NO_SENDTCPBG) != 0) {
- if (tp->tcp_cc_index == TCP_CC_ALGO_BACKGROUND_INDEX)
+ if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX)
tcp_set_foreground_cc(so);
} else {
- if (tp->tcp_cc_index != TCP_CC_ALGO_BACKGROUND_INDEX)
+ if (old_cc != TCP_CC_ALGO_BACKGROUND_INDEX)
tcp_set_background_cc(so);
}
-
+
/* Set receive side background flags */
- if ((sotcdb & SOTCDB_NO_RECVTCPBG) != 0) {
- so->so_traffic_mgt_flags &= ~(TRAFFIC_MGT_TCP_RECVBG);
- } else {
- so->so_traffic_mgt_flags |= TRAFFIC_MGT_TCP_RECVBG;
- }
+ if ((sotcdb & SOTCDB_NO_RECVTCPBG) != 0)
+ tcp_clear_recv_bg(so);
+ else
+ tcp_set_recv_bg(so);
} else {
- so->so_traffic_mgt_flags &= ~(TRAFFIC_MGT_TCP_RECVBG);
- if (tp->tcp_cc_index == TCP_CC_ALGO_BACKGROUND_INDEX)
+ tcp_clear_recv_bg(so);
+ if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX)
tcp_set_foreground_cc(so);
}
- return;
+
+ if (old_cc != tp->tcp_cc_index || recvbg != IS_TCP_RECV_BG(so)) {
+ SOTHROTTLELOG(("throttle[%d]: so %p [%d,%d] TCP %s send; "
+ "%s recv\n", so->last_pid, so, INP_SOCKAF(so),
+ INP_SOCKTYPE(so),
+ (tp->tcp_cc_index == TCP_CC_ALGO_BACKGROUND_INDEX) ?
+ "background" : "foreground",
+ IS_TCP_RECV_BG(so) ? "background" : "foreground"));
+ }
}
/*
* - set the DSCP code following the WMM mapping
*/
__private_extern__ void
-set_packet_tclass(struct mbuf *m, struct socket *so, int in_mtc, int isipv6)
+set_packet_service_class(struct mbuf *m, struct socket *so,
+ mbuf_svc_class_t in_msc, u_int32_t flags)
{
- int mtc = MBUF_TC_BE; /* Best effort by default */
- struct inpcb *inp = sotoinpcb(so); /* in6pcb and inpcb are the same */
+ mbuf_svc_class_t msc = MBUF_SC_BE; /* Best effort by default */
+ struct inpcb *inp = sotoinpcb(so); /* in6pcb and inpcb are the same */
struct ip *ip = mtod(m, struct ip *);
#if INET6
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
#endif /* INET6 */
-
+ int isipv6 = ((flags & PKT_SCF_IPV6) != 0) ? 1 : 0;
+
if (!(m->m_flags & M_PKTHDR))
return;
-
- /*
+
+ /*
* Here is the precedence:
* 1) TRAFFIC_MGT_SO_BACKGROUND trumps all
* 2) Traffic class passed via ancillary data to sendmsdg(2)
* 3) Traffic class socket option last
*/
- if (soisbackground(so)) {
- mtc = MBUF_TC_BK;
- } else if (in_mtc != MBUF_TC_UNSPEC) {
- if (in_mtc >= MBUF_TC_BE && in_mtc <= MBUF_TC_VO)
- mtc = in_mtc;
+ if (in_msc != MBUF_SC_UNSPEC) {
+ if (in_msc >= MBUF_SC_BE && in_msc <= MBUF_SC_CTL)
+ msc = in_msc;
} else {
- switch (so->so_traffic_class) {
- case SO_TC_BE:
- mtc = MBUF_TC_BE;
- break;
- case SO_TC_BK:
- mtc = MBUF_TC_BK;
- break;
- case SO_TC_VI:
- mtc = MBUF_TC_VI;
- break;
- case SO_TC_VO:
- mtc = MBUF_TC_VO;
- break;
- default:
- break;
- }
+ VERIFY(SO_VALID_TC(so->so_traffic_class));
+ msc = so_tc2msc(so->so_traffic_class);
+ /* Assert because tc must have been valid */
+ VERIFY(MBUF_VALID_SC(msc));
}
-
+
+ /*
+ * If TRAFFIC_MGT_SO_BACKGROUND is set, depress the priority.
+ */
+ if (soisthrottled(so) && !IS_MBUF_SC_BACKGROUND(msc))
+ msc = MBUF_SC_BK;
+
/*
- * Set the traffic class in the mbuf packet header prio field
+ * Set the traffic class in the mbuf packet header svc field
*/
- if ((sotcdb & SOTCDB_NO_MTC))
+ if (sotcdb & SOTCDB_NO_MTC)
goto no_mbtc;
- m->m_pkthdr.prio = mtc;
-
+
+ /* Elevate service class if the packet is a pure TCP ACK.
+ * We can do this only when the flow is not a background
+ * flow and the outgoing interface supports
+ * transmit-start model.
+ */
+ if (!IS_MBUF_SC_BACKGROUND(msc) && (flags & PKT_SCF_TCP_ACK))
+ msc = MBUF_SC_CTL;
+
+ (void) m_set_service_class(m, msc);
+
+ /*
+ * Set the privileged traffic auxiliary flag if applicable, or clear it.
+ */
+ if (!(sotcdb & SOTCDB_NO_PRIVILEGED) && soisprivilegedtraffic(so) &&
+ msc != MBUF_SC_UNSPEC)
+ m->m_pkthdr.aux_flags |= MAUXF_PRIO_PRIVILEGED;
+ else
+ m->m_pkthdr.aux_flags &= ~MAUXF_PRIO_PRIVILEGED;
+
no_mbtc:
/*
- * Quick exit when best effort
+ * Quick exit when best effort
*/
- if (mtc == MBUF_TC_BE)
+ if (msc == MBUF_SC_BE)
goto no_dscp;
+
/*
- * Now let set the DSCP code in IPv4 or IPv6 header
- * By default do this only for local traffic if a code is not already set
+ * The default behavior is for the networking stack to not set the
+ * DSCP code, based on SOTCDB_NO_DSCP being set. If the flag is
+ * cleared, set the DSCP code in IPv4 or IPv6 header only for local
+ * traffic, if it is not already set. <rdar://problem/11277343>
*/
- if ((sotcdb & SOTCDB_NO_DSCP))
+ if (sotcdb & SOTCDB_NO_DSCP)
goto no_dscp;
-
+
/*
- * Test if a IP TOS or IPV6 TCLASS has already been set on the socket or the raw packet
+ * Test if a IP TOS or IPV6 TCLASS has already been set
+ * on the socket or the raw packet.
*/
- if ((sotcdb & SOTCDB_NO_DSCPTST) == 0) {
+ if (!(sotcdb & SOTCDB_NO_DSCPTST)) {
#if INET6
- if (isipv6)
- {
- if ((so->so_type == SOCK_RAW && (ip6->ip6_flow & htonl(0xff << 20)) != 0) ||
- (inp->in6p_outputopts && inp->in6p_outputopts->ip6po_tclass != -1))
+ if (isipv6) {
+ if ((so->so_type == SOCK_RAW &&
+ (ip6->ip6_flow & htonl(0xff << 20)) != 0) ||
+ (inp->in6p_outputopts &&
+ inp->in6p_outputopts->ip6po_tclass != -1))
goto no_dscp;
- }
- else
+ } else
#endif /* INET6 */
- {
- if ((so->so_type == SOCK_RAW && (inp->inp_flags & INP_HDRINCL)) ||
- inp->inp_ip_tos != 0)
- goto no_dscp;
- }
+ if ((so->so_type == SOCK_RAW &&
+ (inp->inp_flags & INP_HDRINCL)) ||
+ inp->inp_ip_tos != 0)
+ goto no_dscp;
}
-
+
/*
* Test if destination is local
*/
- if ((sotcdb & SOTCDB_NO_LCLTST) == 0) {
+ if (!(sotcdb & SOTCDB_NO_LCLTST)) {
int islocal = 0;
- struct route *ro = &inp->inp_route;
+ struct rtentry *rt = inp->inp_route.ro_rt;
if (so->so_type == SOCK_STREAM) {
- struct tcpcb *tp = intotcpcb(inp);
-
- if ((tp->t_flags & TF_LOCAL))
+ if (intotcpcb(inp)->t_flags & TF_LOCAL)
islocal = 1;
- }
- else
-#if INET6
- if (isipv6)
- {
- if ((ro != NULL && ro->ro_rt != NULL &&
- (ro->ro_rt->rt_gateway->sa_family == AF_LINK ||
- (ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))) ||
- in6addr_local(&ip6->ip6_dst))
+ } else if (rt != NULL &&
+ (rt->rt_gateway->sa_family == AF_LINK ||
+ (rt->rt_ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT)))) {
+ if (!(rt->rt_ifp->if_flags & IFF_POINTOPOINT))
islocal = 1;
- }
- else
+ } else
+#if INET6
+ if (isipv6 && in6addr_local(&ip6->ip6_dst)) {
+ islocal = 1;
+ } else
#endif /* INET6 */
- {
- if ((ro != NULL && ro->ro_rt != NULL &&
- (ro->ro_rt->rt_gateway->sa_family == AF_LINK ||
- (ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))) ||
- inaddr_local(ip->ip_dst))
- islocal = 1;
+ if (inaddr_local(ip->ip_dst)) {
+ islocal = 1;
}
if (islocal == 0)
goto no_dscp;
#if INET6
if (isipv6)
- ip6->ip6_flow |=
- htonl(dscp_code_from_mbuf_tclass(m->m_pkthdr.prio) << 20);
+ ip6->ip6_flow |= htonl(dscp_code_from_mbuf_tclass(
+ m_get_traffic_class(m)) << 20);
else
#endif /* INET6 */
- ip->ip_tos |= dscp_code_from_mbuf_tclass(m->m_pkthdr.prio) << 2;
-
+ ip->ip_tos |= dscp_code_from_mbuf_tclass(
+ m_get_traffic_class(m)) << 2;
+
no_dscp:
/*
* For TCP with background traffic class switch CC algo based on sysctl
*/
- if (so->so_type == SOCK_STREAM) {
+ if (so->so_type == SOCK_STREAM)
set_tcp_stream_priority(so);
- }
-
+
+ so_tc_update_stats(m, so, msc);
+}
+
+__private_extern__ void
+so_tc_update_stats(struct mbuf *m, struct socket *so, mbuf_svc_class_t msc)
+{
+ mbuf_traffic_class_t mtc;
+
/*
* Assume socket and mbuf traffic class values are the same
- * Also assume the socket lock is held
+ * Also assume the socket lock is held. Note that the stats
+ * at the socket layer are reduced down to the legacy traffic
+ * classes; we could/should potentially expand so_tc_stats[].
*/
+ mtc = MBUF_SC2TC(msc);
+ VERIFY(mtc < SO_TC_STATS_MAX);
so->so_tc_stats[mtc].txpackets += 1;
so->so_tc_stats[mtc].txbytes += m->m_pkthdr.len;
-
- return;
}
__private_extern__ void
tclass_lck_grp_attr = lck_grp_attr_alloc_init();
tclass_lck_grp = lck_grp_alloc_init("tclass", tclass_lck_grp_attr);
tclass_lck_attr = lck_attr_alloc_init();
- if ((tclass_lock = lck_mtx_alloc_init(tclass_lck_grp, tclass_lck_attr)) == NULL) {
- panic("failed to allocate memory for tclass\n");
+ lck_mtx_init(tclass_lock, tclass_lck_grp, tclass_lck_attr);
+}
+
+__private_extern__ mbuf_svc_class_t
+so_tc2msc(int tc)
+{
+ mbuf_svc_class_t msc;
+
+ switch (tc) {
+ case SO_TC_BK_SYS:
+ msc = MBUF_SC_BK_SYS;
+ break;
+ case SO_TC_BK:
+ case _SO_TC_BK:
+ msc = MBUF_SC_BK;
+ break;
+ case SO_TC_BE:
+ msc = MBUF_SC_BE;
+ break;
+ case SO_TC_RD:
+ msc = MBUF_SC_RD;
+ break;
+ case SO_TC_OAM:
+ msc = MBUF_SC_OAM;
+ break;
+ case SO_TC_AV:
+ msc = MBUF_SC_AV;
+ break;
+ case SO_TC_RV:
+ msc = MBUF_SC_RV;
+ break;
+ case SO_TC_VI:
+ case _SO_TC_VI:
+ msc = MBUF_SC_VI;
+ break;
+ case SO_TC_VO:
+ case _SO_TC_VO:
+ msc = MBUF_SC_VO;
+ break;
+ case SO_TC_CTL:
+ msc = MBUF_SC_CTL;
+ break;
+ case SO_TC_ALL:
+ default:
+ msc = MBUF_SC_UNSPEC;
+ break;
}
+
+ return (msc);
}
+__private_extern__ int
+so_svc2tc(mbuf_svc_class_t svc)
+{
+ switch (svc) {
+ case MBUF_SC_UNSPEC:
+ return SO_TC_BE;
+ case MBUF_SC_BK_SYS:
+ return SO_TC_BK_SYS;
+ case MBUF_SC_BK:
+ return SO_TC_BK;
+ case MBUF_SC_BE:
+ return SO_TC_BE;
+ case MBUF_SC_RD:
+ return SO_TC_RD;
+ case MBUF_SC_OAM:
+ return SO_TC_OAM;
+ case MBUF_SC_AV:
+ return SO_TC_AV;
+ case MBUF_SC_RV:
+ return SO_TC_RV;
+ case MBUF_SC_VI:
+ return SO_TC_VI;
+ case MBUF_SC_VO:
+ return SO_TC_VO;
+ case MBUF_SC_CTL:
+ return SO_TC_CTL;
+ default:
+ return SO_TC_BE;
+ }
+}
+
+/*
+ * LRO is turned on for AV streaming and background classes.
+ */
+static void
+so_set_lro(struct socket *so, int optval)
+{
+ if ((optval == SO_TC_BK) ||
+ (optval == SO_TC_BK_SYS) ||
+ (optval == SO_TC_AV)) {
+ so->so_flags |= SOF_USELRO;
+ } else {
+ so->so_flags &= ~SOF_USELRO;
+ }
+}