X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/316670eb35587141e969394ae8537d66b9211e80..c3c9b80d004dbbfdf763edeb97968c6997e3b45b:/bsd/netinet/in_tclass.c diff --git a/bsd/netinet/in_tclass.c b/bsd/netinet/in_tclass.c index 02d9ccc86..d14323a4c 100644 --- a/bsd/netinet/in_tclass.c +++ b/bsd/netinet/in_tclass.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009-2012 Apple Inc. All rights reserved. + * Copyright (c) 2009-2020 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -39,8 +39,11 @@ #include #include #include +#include +#include #include +#include #include #include @@ -55,8 +58,225 @@ #include #include #include +#include -extern char *proc_name_address(void *p); +struct net_qos_dscp_map { + uint8_t sotc_to_dscp[SO_TC_MAX]; + uint8_t netsvctype_to_dscp[_NET_SERVICE_TYPE_COUNT]; +}; + +struct dcsp_msc_map { + uint8_t dscp; + mbuf_svc_class_t msc; +}; +static inline int so_throttle_best_effort(struct socket *, struct ifnet *); +static void set_dscp_to_wifi_ac_map(const struct dcsp_msc_map *, int); +static errno_t dscp_msc_map_from_netsvctype_dscp_map(struct netsvctype_dscp_map *, size_t, + struct dcsp_msc_map *); + +static lck_grp_attr_t *tclass_lck_grp_attr = NULL; /* mutex group attributes */ +static lck_grp_t *tclass_lck_grp = NULL; /* mutex group definition */ +static lck_attr_t *tclass_lck_attr = NULL; /* mutex attributes */ +decl_lck_mtx_data(static, tclass_lock_data); +static lck_mtx_t *tclass_lock = &tclass_lock_data; + +SYSCTL_NODE(_net, OID_AUTO, qos, + CTLFLAG_RW | CTLFLAG_LOCKED, 0, "QoS"); + +static int sysctl_default_netsvctype_to_dscp_map SYSCTL_HANDLER_ARGS; +SYSCTL_PROC(_net_qos, OID_AUTO, default_netsvctype_to_dscp_map, + CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED, + 0, 0, sysctl_default_netsvctype_to_dscp_map, "S", ""); + +static int sysctl_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS; +SYSCTL_PROC(_net_qos, OID_AUTO, dscp_to_wifi_ac_map, + CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED, + 0, 0, sysctl_dscp_to_wifi_ac_map, "S", ""); + +static int sysctl_reset_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS; +SYSCTL_PROC(_net_qos, OID_AUTO, reset_dscp_to_wifi_ac_map, + CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED, + 0, 0, sysctl_reset_dscp_to_wifi_ac_map, "I", ""); + +int net_qos_verbose = 0; +SYSCTL_INT(_net_qos, OID_AUTO, verbose, + CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_verbose, 0, ""); + +/* + * Fastlane QoS policy: + * By Default allow all apps to get traffic class to DSCP mapping + */ +SYSCTL_NODE(_net_qos, OID_AUTO, policy, + CTLFLAG_RW | CTLFLAG_LOCKED, 0, ""); + +int net_qos_policy_restricted = 0; +SYSCTL_INT(_net_qos_policy, OID_AUTO, restricted, + CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_restricted, 0, ""); + +int net_qos_policy_restrict_avapps = 0; +SYSCTL_INT(_net_qos_policy, OID_AUTO, restrict_avapps, + CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_restrict_avapps, 0, ""); + +int net_qos_policy_wifi_enabled = 0; +SYSCTL_INT(_net_qos_policy, OID_AUTO, wifi_enabled, + CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_wifi_enabled, 0, ""); + +int net_qos_policy_capable_enabled = 0; +SYSCTL_INT(_net_qos_policy, OID_AUTO, capable_enabled, + CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_capable_enabled, 0, ""); + +/* + * Socket traffic class from network service type + */ +const int sotc_by_netservicetype[_NET_SERVICE_TYPE_COUNT] = { + SO_TC_BE, /* NET_SERVICE_TYPE_BE */ + SO_TC_BK_SYS, /* NET_SERVICE_TYPE_BK */ + SO_TC_VI, /* NET_SERVICE_TYPE_SIG */ + SO_TC_VI, /* NET_SERVICE_TYPE_VI */ + SO_TC_VO, /* NET_SERVICE_TYPE_VO */ + SO_TC_RV, /* NET_SERVICE_TYPE_RV */ + SO_TC_AV, /* NET_SERVICE_TYPE_AV */ + SO_TC_OAM, /* NET_SERVICE_TYPE_OAM */ + SO_TC_RD /* NET_SERVICE_TYPE_RD */ +}; + +/* + * DSCP mappings for QoS Fastlane as based on network service types + */ +static const +struct netsvctype_dscp_map fastlane_netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = { + { .netsvctype = NET_SERVICE_TYPE_BE, .dscp = _DSCP_DF }, + { .netsvctype = NET_SERVICE_TYPE_BK, .dscp = _DSCP_AF11 }, + { .netsvctype = NET_SERVICE_TYPE_SIG, .dscp = _DSCP_CS3 }, + { .netsvctype = NET_SERVICE_TYPE_VI, .dscp = _DSCP_AF41 }, + { .netsvctype = NET_SERVICE_TYPE_VO, .dscp = _DSCP_EF }, + { .netsvctype = NET_SERVICE_TYPE_RV, .dscp = _DSCP_CS4 }, + { .netsvctype = NET_SERVICE_TYPE_AV, .dscp = _DSCP_AF31 }, + { .netsvctype = NET_SERVICE_TYPE_OAM, .dscp = _DSCP_CS2 }, + { .netsvctype = NET_SERVICE_TYPE_RD, .dscp = _DSCP_AF21 }, +}; + + +/* + * DSCP mappings for QoS RFC4594 as based on network service types + */ +static const +struct netsvctype_dscp_map rfc4594_netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = { + { .netsvctype = NET_SERVICE_TYPE_BE, .dscp = _DSCP_DF }, + { .netsvctype = NET_SERVICE_TYPE_BK, .dscp = _DSCP_CS1 }, + { .netsvctype = NET_SERVICE_TYPE_SIG, .dscp = _DSCP_CS5 }, + { .netsvctype = NET_SERVICE_TYPE_VI, .dscp = _DSCP_AF41 }, + { .netsvctype = NET_SERVICE_TYPE_VO, .dscp = _DSCP_EF }, + { .netsvctype = NET_SERVICE_TYPE_RV, .dscp = _DSCP_CS4 }, + { .netsvctype = NET_SERVICE_TYPE_AV, .dscp = _DSCP_AF31 }, + { .netsvctype = NET_SERVICE_TYPE_OAM, .dscp = _DSCP_CS2 }, + { .netsvctype = NET_SERVICE_TYPE_RD, .dscp = _DSCP_AF21 }, +}; + +static struct net_qos_dscp_map fastlane_net_qos_dscp_map; +static struct net_qos_dscp_map rfc4594_net_qos_dscp_map; + +/* + * The size is one more than the max because DSCP start at zero + */ +#define DSCP_ARRAY_SIZE (_MAX_DSCP + 1) + +/* + * The DSCP to UP mapping (via mbuf service class) for WiFi follows is the mapping + * that implemented at the 802.11 driver level when the mbuf service class is + * MBUF_SC_BE. + * + * This clashes with the recommended mapping documented by the IETF document + * draft-szigeti-tsvwg-ieee-802-11e-01.txt but we keep the mapping to maintain + * binary compatibility. Applications should use the network service type socket + * option instead to select L2 QoS marking instead of IP_TOS or IPV6_TCLASS. + */ +static const struct dcsp_msc_map default_dscp_to_wifi_ac_map[] = { + { .dscp = _DSCP_DF, .msc = MBUF_SC_BE }, /* RFC 2474 Standard */ + { .dscp = 1, .msc = MBUF_SC_BE }, /* */ + { .dscp = 2, .msc = MBUF_SC_BE }, /* */ + { .dscp = 3, .msc = MBUF_SC_BE }, /* */ + { .dscp = 4, .msc = MBUF_SC_BE }, /* */ + { .dscp = 5, .msc = MBUF_SC_BE }, /* */ + { .dscp = 6, .msc = MBUF_SC_BE }, /* */ + { .dscp = 7, .msc = MBUF_SC_BE }, /* */ + + { .dscp = _DSCP_CS1, .msc = MBUF_SC_BK }, /* RFC 3662 Low-Priority Data */ + { .dscp = 9, .msc = MBUF_SC_BK }, /* */ + { .dscp = _DSCP_AF11, .msc = MBUF_SC_BK }, /* RFC 2597 High-Throughput Data */ + { .dscp = 11, .msc = MBUF_SC_BK }, /* */ + { .dscp = _DSCP_AF12, .msc = MBUF_SC_BK }, /* RFC 2597 High-Throughput Data */ + { .dscp = 13, .msc = MBUF_SC_BK }, /* */ + { .dscp = _DSCP_AF13, .msc = MBUF_SC_BK }, /* RFC 2597 High-Throughput Data */ + { .dscp = 15, .msc = MBUF_SC_BK }, /* */ + + { .dscp = _DSCP_CS2, .msc = MBUF_SC_BK }, /* RFC 4594 OAM */ + { .dscp = 17, .msc = MBUF_SC_BK }, /* */ + { .dscp = _DSCP_AF21, .msc = MBUF_SC_BK }, /* RFC 2597 Low-Latency Data */ + { .dscp = 19, .msc = MBUF_SC_BK }, /* */ + { .dscp = _DSCP_AF22, .msc = MBUF_SC_BK }, /* RFC 2597 Low-Latency Data */ + { .dscp = 21, .msc = MBUF_SC_BK }, /* */ + { .dscp = _DSCP_AF23, .msc = MBUF_SC_BK }, /* RFC 2597 Low-Latency Data */ + { .dscp = 23, .msc = MBUF_SC_BK }, /* */ + + { .dscp = _DSCP_CS3, .msc = MBUF_SC_BE }, /* RFC 2474 Broadcast Video */ + { .dscp = 25, .msc = MBUF_SC_BE }, /* */ + { .dscp = _DSCP_AF31, .msc = MBUF_SC_BE }, /* RFC 2597 Multimedia Streaming */ + { .dscp = 27, .msc = MBUF_SC_BE }, /* */ + { .dscp = _DSCP_AF32, .msc = MBUF_SC_BE }, /* RFC 2597 Multimedia Streaming */ + { .dscp = 29, .msc = MBUF_SC_BE }, /* */ + { .dscp = _DSCP_AF33, .msc = MBUF_SC_BE }, /* RFC 2597 Multimedia Streaming */ + { .dscp = 31, .msc = MBUF_SC_BE }, /* */ + + { .dscp = _DSCP_CS4, .msc = MBUF_SC_VI }, /* RFC 2474 Real-Time Interactive */ + { .dscp = 33, .msc = MBUF_SC_VI }, /* */ + { .dscp = _DSCP_AF41, .msc = MBUF_SC_VI }, /* RFC 2597 Multimedia Conferencing */ + { .dscp = 35, .msc = MBUF_SC_VI }, /* */ + { .dscp = _DSCP_AF42, .msc = MBUF_SC_VI }, /* RFC 2597 Multimedia Conferencing */ + { .dscp = 37, .msc = MBUF_SC_VI }, /* */ + { .dscp = _DSCP_AF43, .msc = MBUF_SC_VI }, /* RFC 2597 Multimedia Conferencing */ + { .dscp = 39, .msc = MBUF_SC_VI }, /* */ + + { .dscp = _DSCP_CS5, .msc = MBUF_SC_VI }, /* RFC 2474 Signaling */ + { .dscp = 41, .msc = MBUF_SC_VI }, /* */ + { .dscp = 42, .msc = MBUF_SC_VI }, /* */ + { .dscp = 43, .msc = MBUF_SC_VI }, /* */ + { .dscp = _DSCP_VA, .msc = MBUF_SC_VI }, /* RFC 5865 VOICE-ADMIT */ + { .dscp = 45, .msc = MBUF_SC_VI }, /* */ + { .dscp = _DSCP_EF, .msc = MBUF_SC_VI }, /* RFC 3246 Telephony */ + { .dscp = 47, .msc = MBUF_SC_VI }, /* */ + + { .dscp = _DSCP_CS6, .msc = MBUF_SC_VO }, /* Wi-Fi WMM Certification: Chariot */ + { .dscp = 49, .msc = MBUF_SC_VO }, /* */ + { .dscp = 50, .msc = MBUF_SC_VO }, /* */ + { .dscp = 51, .msc = MBUF_SC_VO }, /* */ + { .dscp = 52, .msc = MBUF_SC_VO }, /* Wi-Fi WMM Certification: Sigma */ + { .dscp = 53, .msc = MBUF_SC_VO }, /* */ + { .dscp = 54, .msc = MBUF_SC_VO }, /* */ + { .dscp = 55, .msc = MBUF_SC_VO }, /* */ + + { .dscp = _DSCP_CS7, .msc = MBUF_SC_VO }, /* Wi-Fi WMM Certification: Chariot */ + { .dscp = 57, .msc = MBUF_SC_VO }, /* */ + { .dscp = 58, .msc = MBUF_SC_VO }, /* */ + { .dscp = 59, .msc = MBUF_SC_VO }, /* */ + { .dscp = 60, .msc = MBUF_SC_VO }, /* */ + { .dscp = 61, .msc = MBUF_SC_VO }, /* */ + { .dscp = 62, .msc = MBUF_SC_VO }, /* */ + { .dscp = 63, .msc = MBUF_SC_VO }, /* */ + + { .dscp = 255, .msc = MBUF_SC_UNSPEC } /* invalid DSCP to mark last entry */ +}; + +mbuf_svc_class_t wifi_dscp_to_msc_array[DSCP_ARRAY_SIZE]; + +/* + * If there is no foreground activity on the interface for bg_switch_time + * seconds, the background connections can switch to foreground TCP + * congestion control. + */ +#define TCP_BG_SWITCH_TIME 2 /* seconds */ + +#if (DEVELOPMENT || DEBUG) static int tfp_count = 0; @@ -64,13 +284,13 @@ static TAILQ_HEAD(, tclass_for_proc) tfp_head = TAILQ_HEAD_INITIALIZER(tfp_head); struct tclass_for_proc { - TAILQ_ENTRY(tclass_for_proc) tfp_link; - int tfp_class; - pid_t tfp_pid; - char tfp_pname[MAXCOMLEN + 1]; + TAILQ_ENTRY(tclass_for_proc) tfp_link; + int tfp_class; + pid_t tfp_pid; + char tfp_pname[(2 * MAXCOMLEN) + 1]; + uint32_t tfp_qos_mode; }; -static int dscp_code_from_mbuf_tclass(mbuf_traffic_class_t); static int get_pid_tclass(struct so_tcdbg *); static int get_pname_tclass(struct so_tcdbg *); static int set_pid_tclass(struct so_tcdbg *); @@ -78,14 +298,7 @@ static int set_pname_tclass(struct so_tcdbg *); static int flush_pid_tclass(struct so_tcdbg *); static int purge_tclass_for_proc(void); static int flush_tclass_for_proc(void); -static void so_set_lro(struct socket*, int); -int get_tclass_for_curr_proc(int *); - -static lck_grp_attr_t *tclass_lck_grp_attr = NULL; /* mutex group attributes */ -static lck_grp_t *tclass_lck_grp = NULL; /* mutex group definition */ -static lck_attr_t *tclass_lck_attr = NULL; /* mutex attributes */ -decl_lck_mtx_data(static, tclass_lock_data); -static lck_mtx_t *tclass_lock = &tclass_lock_data; +static void set_tclass_for_curr_proc(struct socket *); /* * Must be called with tclass_lock held @@ -96,10 +309,11 @@ find_tfp_by_pid(pid_t pid) struct tclass_for_proc *tfp; TAILQ_FOREACH(tfp, &tfp_head, tfp_link) { - if (tfp->tfp_pid == pid) + if (tfp->tfp_pid == pid) { break; + } } - return (tfp); + return tfp; } /* @@ -112,36 +326,41 @@ find_tfp_by_pname(const char *pname) TAILQ_FOREACH(tfp, &tfp_head, tfp_link) { if (strncmp(pname, tfp->tfp_pname, - sizeof (tfp->tfp_pname)) == 0) + sizeof(tfp->tfp_pname)) == 0) { break; + } } - return (tfp); + return tfp; } -__private_extern__ int -get_tclass_for_curr_proc(int *sotc) +__private_extern__ void +set_tclass_for_curr_proc(struct socket *so) { struct tclass_for_proc *tfp = NULL; - proc_t p = current_proc(); /* Not ref counted */ + proc_t p = current_proc(); /* Not ref counted */ pid_t pid = proc_pid(p); - char *pname = proc_name_address(p); - - *sotc = -1; + char *pname = proc_best_name(p); lck_mtx_lock(tclass_lock); TAILQ_FOREACH(tfp, &tfp_head, tfp_link) { if ((tfp->tfp_pid == pid) || (tfp->tfp_pid == -1 && strncmp(pname, tfp->tfp_pname, - sizeof (tfp->tfp_pname)) == 0)) { - *sotc = tfp->tfp_class; + sizeof(tfp->tfp_pname)) == 0)) { + if (tfp->tfp_class != SO_TC_UNSPEC) { + so->so_traffic_class = (uint16_t)tfp->tfp_class; + } + + if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_ENABLE) { + so->so_flags1 |= SOF1_QOSMARKING_ALLOWED; + } else if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_DISABLE) { + so->so_flags1 &= ~SOF1_QOSMARKING_ALLOWED; + } break; } } lck_mtx_unlock(tclass_lock); - - return ((tfp == NULL) ? 0 : 1); } /* @@ -158,8 +377,9 @@ purge_tclass_for_proc(void) TAILQ_FOREACH_SAFE(tfp, &tfp_head, tfp_link, tvar) { proc_t p; - if (tfp->tfp_pid == -1) + if (tfp->tfp_pid == -1) { continue; + } if ((p = proc_find(tfp->tfp_pid)) == NULL) { tfp_count--; TAILQ_REMOVE(&tfp_head, tfp, tfp_link); @@ -172,7 +392,7 @@ purge_tclass_for_proc(void) lck_mtx_unlock(tclass_lock); - return (error); + return error; } /* @@ -182,8 +402,9 @@ purge_tclass_for_proc(void) static void free_tclass_for_proc(struct tclass_for_proc *tfp) { - if (tfp == NULL) + if (tfp == NULL) { return; + } tfp_count--; TAILQ_REMOVE(&tfp_head, tfp, tfp_link); _FREE(tfp, M_TEMP); @@ -206,8 +427,7 @@ flush_tclass_for_proc(void) lck_mtx_unlock(tclass_lock); - return (error); - + return error; } /* @@ -218,12 +438,14 @@ alloc_tclass_for_proc(pid_t pid, const char *pname) { struct tclass_for_proc *tfp; - if (pid == -1 && pname == NULL) - return (NULL); + if (pid == -1 && pname == NULL) { + return NULL; + } - tfp = _MALLOC(sizeof (struct tclass_for_proc), M_TEMP, M_NOWAIT|M_ZERO); - if (tfp == NULL) - return (NULL); + tfp = _MALLOC(sizeof(struct tclass_for_proc), M_TEMP, M_NOWAIT | M_ZERO); + if (tfp == NULL) { + return NULL; + } tfp->tfp_pid = pid; /* @@ -233,29 +455,27 @@ alloc_tclass_for_proc(pid_t pid, const char *pname) if (pid != -1) { TAILQ_INSERT_HEAD(&tfp_head, tfp, tfp_link); } else { - strlcpy(tfp->tfp_pname, pname, sizeof (tfp->tfp_pname)); + strlcpy(tfp->tfp_pname, pname, sizeof(tfp->tfp_pname)); TAILQ_INSERT_TAIL(&tfp_head, tfp, tfp_link); } tfp_count++; - return (tfp); + return tfp; } /* - * -1 for tclass means to remove the entry + * SO_TC_UNSPEC for tclass means to remove the entry */ int set_pid_tclass(struct so_tcdbg *so_tcdbg) { int error = EINVAL; proc_t p = NULL; - struct filedesc *fdp; - struct fileproc *fp; struct tclass_for_proc *tfp; - int i; pid_t pid = so_tcdbg->so_tcdbg_pid; int tclass = so_tcdbg->so_tcdbg_tclass; + int netsvctype = so_tcdbg->so_tcdbg_netsvctype; p = proc_find(pid); if (p == NULL) { @@ -276,38 +496,41 @@ set_pid_tclass(struct so_tcdbg *so_tcdbg) } } tfp->tfp_class = tclass; + tfp->tfp_qos_mode = so_tcdbg->so_tcbbg_qos_mode; lck_mtx_unlock(tclass_lock); if (tfp != NULL) { - proc_fdlock(p); + struct fileproc *fp; - fdp = p->p_fd; - for (i = 0; i < fdp->fd_nfiles; i++) { + fdt_foreach(fp, p) { struct socket *so; - fp = fdp->fd_ofiles[i]; - if (fp == NULL || - (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 || - fp->f_fglob->fg_type != DTYPE_SOCKET) + if (FILEGLOB_DTYPE(fp->fp_glob) != DTYPE_SOCKET) { continue; + } - so = (struct socket *)fp->f_fglob->fg_data; - if (so->so_proto->pr_domain->dom_family != AF_INET && - so->so_proto->pr_domain->dom_family != AF_INET6) + so = (struct socket *)fp->fp_glob->fg_data; + if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) { continue; + } + socket_lock(so, 1); - if (tclass != -1) { - error = so_set_traffic_class(so, tclass); - if (error != 0) { - printf("%s: so_set_traffic_class" - "(so=%p, fd=%d, tclass=%d) " - "failed %d\n", __func__, - so, i, tclass, error); - error = 0; - } + if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_ENABLE) { + so->so_flags1 |= SOF1_QOSMARKING_ALLOWED; + } else if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_DISABLE) { + so->so_flags1 &= ~SOF1_QOSMARKING_ALLOWED; } socket_unlock(so, 1); + + if (netsvctype != _NET_SERVICE_TYPE_UNSPEC) { + error = sock_setsockopt(so, SOL_SOCKET, + SO_NET_SERVICE_TYPE, &netsvctype, sizeof(int)); + } + if (tclass != SO_TC_UNSPEC) { + error = sock_setsockopt(so, SOL_SOCKET, + SO_TRAFFIC_CLASS, &tclass, sizeof(int)); + } } proc_fdunlock(p); @@ -315,10 +538,11 @@ set_pid_tclass(struct so_tcdbg *so_tcdbg) error = 0; done: - if (p != NULL) + if (p != NULL) { proc_rele(p); + } - return (error); + return error; } int @@ -339,13 +563,14 @@ set_pname_tclass(struct so_tcdbg *so_tcdbg) } } tfp->tfp_class = so_tcdbg->so_tcdbg_tclass; + tfp->tfp_qos_mode = so_tcdbg->so_tcbbg_qos_mode; lck_mtx_unlock(tclass_lock); error = 0; done: - return (error); + return error; } static int @@ -353,47 +578,40 @@ flush_pid_tclass(struct so_tcdbg *so_tcdbg) { pid_t pid = so_tcdbg->so_tcdbg_pid; int tclass = so_tcdbg->so_tcdbg_tclass; - struct filedesc *fdp; - int error = EINVAL; + struct fileproc *fp; proc_t p; - int i; + int error; p = proc_find(pid); if (p == PROC_NULL) { printf("%s proc_find(%d) failed\n", __func__, pid); - goto done; + return EINVAL; } proc_fdlock(p); - fdp = p->p_fd; - for (i = 0; i < fdp->fd_nfiles; i++) { + + fdt_foreach(fp, p) { struct socket *so; - struct fileproc *fp; - fp = fdp->fd_ofiles[i]; - if (fp == NULL || - (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 || - fp->f_fglob->fg_type != DTYPE_SOCKET) + if (FILEGLOB_DTYPE(fp->fp_glob) != DTYPE_SOCKET) { continue; + } - so = (struct socket *)fp->f_fglob->fg_data; + so = (struct socket *)fp->fp_glob->fg_data; error = sock_setsockopt(so, SOL_SOCKET, SO_FLUSH, &tclass, - sizeof (tclass)); + sizeof(tclass)); if (error != 0) { - printf("%s: setsockopt(SO_FLUSH) (so=%p, fd=%d, " - "tclass=%d) failed %d\n", __func__, so, i, tclass, + printf("%s: setsockopt(SO_FLUSH) (so=0x%llx, fd=%d, " + "tclass=%d) failed %d\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(so), fdt_foreach_fd(), tclass, error); - error = 0; } } - proc_fdunlock(p); - error = 0; -done: - if (p != PROC_NULL) - proc_rele(p); + proc_fdunlock(p); - return (error); + proc_rele(p); + return 0; } int @@ -404,8 +622,7 @@ get_pid_tclass(struct so_tcdbg *so_tcdbg) struct tclass_for_proc *tfp; pid_t pid = so_tcdbg->so_tcdbg_pid; - so_tcdbg->so_tcdbg_tclass = -1; /* Means not set */ - so_tcdbg->so_tcdbg_opportunistic = -1; /* Means not set */ + so_tcdbg->so_tcdbg_tclass = SO_TC_UNSPEC; /* Means not set */ p = proc_find(pid); if (p == NULL) { @@ -419,14 +636,16 @@ get_pid_tclass(struct so_tcdbg *so_tcdbg) tfp = find_tfp_by_pid(pid); if (tfp != NULL) { so_tcdbg->so_tcdbg_tclass = tfp->tfp_class; + so_tcdbg->so_tcbbg_qos_mode = tfp->tfp_qos_mode; error = 0; } lck_mtx_unlock(tclass_lock); done: - if (p != NULL) + if (p != NULL) { proc_rele(p); + } - return (error); + return error; } int @@ -435,8 +654,7 @@ get_pname_tclass(struct so_tcdbg *so_tcdbg) int error = EINVAL; struct tclass_for_proc *tfp; - so_tcdbg->so_tcdbg_tclass = -1; /* Means not set */ - so_tcdbg->so_tcdbg_opportunistic = -1; /* Means not set */ + so_tcdbg->so_tcdbg_tclass = SO_TC_UNSPEC; /* Means not set */ /* Need a tfp */ lck_mtx_lock(tclass_lock); @@ -444,11 +662,12 @@ get_pname_tclass(struct so_tcdbg *so_tcdbg) tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname); if (tfp != NULL) { so_tcdbg->so_tcdbg_tclass = tfp->tfp_class; + so_tcdbg->so_tcbbg_qos_mode = tfp->tfp_qos_mode; error = 0; } lck_mtx_unlock(tclass_lock); - return (error); + return error; } static int @@ -460,10 +679,11 @@ delete_tclass_for_pid_pname(struct so_tcdbg *so_tcdbg) lck_mtx_lock(tclass_lock); - if (pid != -1) + if (pid != -1) { tfp = find_tfp_by_pid(pid); - else + } else { tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname); + } if (tfp != NULL) { free_tclass_for_proc(tfp); @@ -472,7 +692,7 @@ delete_tclass_for_pid_pname(struct so_tcdbg *so_tcdbg) lck_mtx_unlock(tclass_lock); - return (error); + return error; } /* @@ -483,44 +703,45 @@ so_set_tcdbg(struct socket *so, struct so_tcdbg *so_tcdbg) { int error = 0; - if ((so->so_state & SS_PRIV) == 0) - return (EPERM); + if ((so->so_state & SS_PRIV) == 0) { + return EPERM; + } socket_unlock(so, 0); switch (so_tcdbg->so_tcdbg_cmd) { - case SO_TCDBG_PID: - error = set_pid_tclass(so_tcdbg); - break; + case SO_TCDBG_PID: + error = set_pid_tclass(so_tcdbg); + break; - case SO_TCDBG_PNAME: - error = set_pname_tclass(so_tcdbg); - break; + case SO_TCDBG_PNAME: + error = set_pname_tclass(so_tcdbg); + break; - case SO_TCDBG_PURGE: - error = purge_tclass_for_proc(); - break; + case SO_TCDBG_PURGE: + error = purge_tclass_for_proc(); + break; - case SO_TCDBG_FLUSH: - error = flush_tclass_for_proc(); - break; + case SO_TCDBG_FLUSH: + error = flush_tclass_for_proc(); + break; - case SO_TCDBG_DELETE: - error = delete_tclass_for_pid_pname(so_tcdbg); - break; + case SO_TCDBG_DELETE: + error = delete_tclass_for_pid_pname(so_tcdbg); + break; - case SO_TCDBG_TCFLUSH_PID: - error = flush_pid_tclass(so_tcdbg); - break; + case SO_TCDBG_TCFLUSH_PID: + error = flush_pid_tclass(so_tcdbg); + break; - default: - error = EINVAL; - break; + default: + error = EINVAL; + break; } socket_lock(so, 0); - return (error); + return error; } /* @@ -534,77 +755,80 @@ sogetopt_tcdbg(struct socket *so, struct sockopt *sopt) void *buf = NULL; size_t len = sopt->sopt_valsize; - error = sooptcopyin(sopt, &so_tcdbg, sizeof (struct so_tcdbg), - sizeof (struct so_tcdbg)); - if (error != 0) - return (error); + error = sooptcopyin(sopt, &so_tcdbg, sizeof(struct so_tcdbg), + sizeof(struct so_tcdbg)); + if (error != 0) { + return error; + } sopt->sopt_valsize = len; socket_unlock(so, 0); switch (so_tcdbg.so_tcdbg_cmd) { - case SO_TCDBG_PID: - error = get_pid_tclass(&so_tcdbg); - break; + case SO_TCDBG_PID: + error = get_pid_tclass(&so_tcdbg); + break; - case SO_TCDBG_PNAME: - error = get_pname_tclass(&so_tcdbg); - break; + case SO_TCDBG_PNAME: + error = get_pname_tclass(&so_tcdbg); + break; - case SO_TCDBG_COUNT: - lck_mtx_lock(tclass_lock); - so_tcdbg.so_tcdbg_count = tfp_count; - lck_mtx_unlock(tclass_lock); - break; + case SO_TCDBG_COUNT: + lck_mtx_lock(tclass_lock); + so_tcdbg.so_tcdbg_count = tfp_count; + lck_mtx_unlock(tclass_lock); + break; - case SO_TCDBG_LIST: { - struct tclass_for_proc *tfp; - int n, alloc_count; - struct so_tcdbg *ptr; + case SO_TCDBG_LIST: { + struct tclass_for_proc *tfp; + int n, alloc_count; + struct so_tcdbg *ptr; - lck_mtx_lock(tclass_lock); - if ((alloc_count = tfp_count) == 0) { - lck_mtx_unlock(tclass_lock); - error = EINVAL; - break; - } - len = alloc_count * sizeof (struct so_tcdbg); + lck_mtx_lock(tclass_lock); + if ((alloc_count = tfp_count) == 0) { lck_mtx_unlock(tclass_lock); + error = EINVAL; + break; + } + len = alloc_count * sizeof(struct so_tcdbg); + lck_mtx_unlock(tclass_lock); + + buf = _MALLOC(len, M_TEMP, M_WAITOK | M_ZERO); + if (buf == NULL) { + error = ENOBUFS; + break; + } - buf = _MALLOC(len, M_TEMP, M_WAITOK | M_ZERO); - if (buf == NULL) { - error = ENOBUFS; + lck_mtx_lock(tclass_lock); + n = 0; + ptr = (struct so_tcdbg *)buf; + TAILQ_FOREACH(tfp, &tfp_head, tfp_link) { + if (++n > alloc_count) { break; } - - lck_mtx_lock(tclass_lock); - n = 0; - ptr = (struct so_tcdbg *)buf; - TAILQ_FOREACH(tfp, &tfp_head, tfp_link) { - if (++n > alloc_count) - break; - if (tfp->tfp_pid != -1) { - ptr->so_tcdbg_cmd = SO_TCDBG_PID; - ptr->so_tcdbg_pid = tfp->tfp_pid; - } else { - ptr->so_tcdbg_cmd = SO_TCDBG_PNAME; - ptr->so_tcdbg_pid = -1; - strlcpy(ptr->so_tcdbg_pname, - tfp->tfp_pname, - sizeof (ptr->so_tcdbg_pname)); - } - ptr->so_tcdbg_tclass = tfp->tfp_class; - ptr++; + if (tfp->tfp_pid != -1) { + ptr->so_tcdbg_cmd = SO_TCDBG_PID; + ptr->so_tcdbg_pid = tfp->tfp_pid; + } else { + ptr->so_tcdbg_cmd = SO_TCDBG_PNAME; + ptr->so_tcdbg_pid = -1; + strlcpy(ptr->so_tcdbg_pname, + tfp->tfp_pname, + sizeof(ptr->so_tcdbg_pname)); } + ptr->so_tcdbg_tclass = tfp->tfp_class; + ptr->so_tcbbg_qos_mode = tfp->tfp_qos_mode; + ptr++; + } - lck_mtx_unlock(tclass_lock); - } - break; + lck_mtx_unlock(tclass_lock); + } + break; - default: - error = EINVAL; - break; + default: + error = EINVAL; + break; } socket_lock(so, 0); @@ -612,15 +836,56 @@ sogetopt_tcdbg(struct socket *so, struct sockopt *sopt) if (error == 0) { if (buf == NULL) { error = sooptcopyout(sopt, &so_tcdbg, - sizeof (struct so_tcdbg)); + sizeof(struct so_tcdbg)); } else { error = sooptcopyout(sopt, buf, len); _FREE(buf, M_TEMP); } } - return (error); + return error; } +#endif /* (DEVELOPMENT || DEBUG) */ + +int +so_get_netsvc_marking_level(struct socket *so) +{ + int marking_level = NETSVC_MRKNG_UNKNOWN; + struct ifnet *ifp = NULL; + + switch (SOCK_DOM(so)) { + case PF_INET: { + struct inpcb *inp = sotoinpcb(so); + + if (inp != NULL) { + ifp = inp->inp_last_outifp; + } + break; + } + case PF_INET6: { + struct in6pcb *in6p = sotoin6pcb(so); + + if (in6p != NULL) { + ifp = in6p->in6p_last_outifp; + } + break; + } + default: + break; + } + if (ifp != NULL) { + if ((ifp->if_eflags & IFEF_QOSMARKING_ENABLED) != 0) { + if ((so->so_flags1 & SOF1_QOSMARKING_ALLOWED)) { + marking_level = NETSVC_MRKNG_LVL_L3L2_ALL; + } else { + marking_level = NETSVC_MRKNG_LVL_L3L2_BK; + } + } else { + marking_level = NETSVC_MRKNG_LVL_L2; + } + } + return marking_level; +} __private_extern__ int so_set_traffic_class(struct socket *so, int optval) @@ -641,8 +906,9 @@ so_set_traffic_class(struct socket *so, int optval) optval = SO_TC_VO; break; default: - if (!SO_VALID_TC(optval)) + if (!SO_VALID_TC(optval)) { error = EINVAL; + } break; } @@ -650,135 +916,225 @@ so_set_traffic_class(struct socket *so, int optval) int oldval = so->so_traffic_class; VERIFY(SO_VALID_TC(optval)); - so->so_traffic_class = optval; + so->so_traffic_class = (uint16_t)optval; - if ((INP_SOCKAF(so) == AF_INET || - INP_SOCKAF(so) == AF_INET6) && - INP_SOCKTYPE(so) == SOCK_STREAM) { + if ((SOCK_DOM(so) == PF_INET || + SOCK_DOM(so) == PF_INET6) && + SOCK_TYPE(so) == SOCK_STREAM) { set_tcp_stream_priority(so); - - /* Set/unset use of Large Receive Offload */ - so_set_lro(so, optval); } - if ((INP_SOCKAF(so) == AF_INET || - INP_SOCKAF(so) == AF_INET6) && + if ((SOCK_DOM(so) == PF_INET || + SOCK_DOM(so) == PF_INET6) && optval != oldval && (optval == SO_TC_BK_SYS || oldval == SO_TC_BK_SYS)) { /* * If the app switches from BK_SYS to something * else, resume the socket if it was suspended. */ - if (oldval == SO_TC_BK_SYS) + if (oldval == SO_TC_BK_SYS) { inp_reset_fc_state(so->so_pcb); + } - SOTHROTTLELOG(("throttle[%d]: so %p [%d,%d] " - "opportunistic %s\n", so->last_pid, - so, INP_SOCKAF(so), INP_SOCKTYPE(so), - (optval == SO_TC_BK_SYS) ? "ON" : "OFF")); + SOTHROTTLELOG("throttle[%d]: so 0x%llx " + "[%d,%d] opportunistic %s\n", so->last_pid, + (uint64_t)VM_KERNEL_ADDRPERM(so), + SOCK_DOM(so), SOCK_TYPE(so), + (optval == SO_TC_BK_SYS) ? "ON" : "OFF"); } } } - return (error); + return error; +} + +__private_extern__ int +so_set_net_service_type(struct socket *so, int netsvctype) +{ + int sotc; + int error; + + if (!IS_VALID_NET_SERVICE_TYPE(netsvctype)) { + return EINVAL; + } + + sotc = sotc_by_netservicetype[netsvctype]; + error = so_set_traffic_class(so, sotc); + if (error != 0) { + return error; + } + so->so_netsvctype = (int8_t)netsvctype; + so->so_flags1 |= SOF1_TC_NET_SERV_TYPE; + + return 0; } __private_extern__ void so_set_default_traffic_class(struct socket *so) { - int sotc = -1; + so->so_traffic_class = SO_TC_BE; - if (tfp_count > 0 && - (INP_SOCKAF(so) == AF_INET || INP_SOCKAF(so) == AF_INET6)) { - get_tclass_for_curr_proc(&sotc); + if ((SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6)) { + if (net_qos_policy_restricted == 0) { + so->so_flags1 |= SOF1_QOSMARKING_ALLOWED; + } +#if (DEVELOPMENT || DEBUG) + if (tfp_count > 0) { + set_tclass_for_curr_proc(so); + } +#endif /* (DEVELOPMENT || DEBUG) */ } - - so->so_traffic_class = (sotc != -1) ? sotc : SO_TC_BE; } __private_extern__ int so_set_opportunistic(struct socket *so, int optval) { - return (so_set_traffic_class(so, (optval == 0) ? - SO_TC_BE : SO_TC_BK_SYS)); + return so_set_traffic_class(so, (optval == 0) ? + SO_TC_BE : SO_TC_BK_SYS); } __private_extern__ int so_get_opportunistic(struct socket *so) { - return (so->so_traffic_class == SO_TC_BK_SYS); + return so->so_traffic_class == SO_TC_BK_SYS; } -__private_extern__ mbuf_svc_class_t -mbuf_service_class_from_control(struct mbuf *control) +__private_extern__ int +so_tc_from_control(struct mbuf *control, int *out_netsvctype) { struct cmsghdr *cm; - mbuf_svc_class_t msc = MBUF_SC_UNSPEC; + int sotc = SO_TC_UNSPEC; - for (cm = M_FIRST_CMSGHDR(control); cm != NULL; - cm = M_NXT_CMSGHDR(control, cm)) { - int tc; + *out_netsvctype = _NET_SERVICE_TYPE_UNSPEC; - if (cm->cmsg_len < sizeof (struct cmsghdr)) - break; + for (cm = M_FIRST_CMSGHDR(control); + is_cmsg_valid(control, cm); + cm = M_NXT_CMSGHDR(control, cm)) { + int val; if (cm->cmsg_level != SOL_SOCKET || - cm->cmsg_type != SO_TRAFFIC_CLASS) + cm->cmsg_len != CMSG_LEN(sizeof(int))) { continue; - if (cm->cmsg_len != CMSG_LEN(sizeof (int))) - continue; - - tc = *(int *)(void *)CMSG_DATA(cm); - msc = so_tc2msc(tc); - if (MBUF_VALID_SC(msc)) + } + val = *(int *)(void *)CMSG_DATA(cm); + /* + * The first valid option wins + */ + switch (cm->cmsg_type) { + case SO_TRAFFIC_CLASS: + if (SO_VALID_TC(val)) { + sotc = val; + return sotc; + /* NOT REACHED */ + } else if (val < SO_TC_NET_SERVICE_OFFSET) { + break; + } + /* + * Handle the case SO_NET_SERVICE_TYPE values are + * passed using SO_TRAFFIC_CLASS + */ + val = val - SO_TC_NET_SERVICE_OFFSET; + OS_FALLTHROUGH; + case SO_NET_SERVICE_TYPE: + if (!IS_VALID_NET_SERVICE_TYPE(val)) { + break; + } + *out_netsvctype = val; + sotc = sotc_by_netservicetype[val]; + return sotc; + /* NOT REACHED */ + default: break; + } } - return (msc); + return sotc; } -__private_extern__ int -dscp_code_from_mbuf_tclass(mbuf_traffic_class_t mtc) +__private_extern__ int +so_tos_from_control(struct mbuf *control) { - int dscp_code; + struct cmsghdr *cm; + int tos = IPTOS_UNSPEC; - switch (mtc) { - default: - case MBUF_TC_BE: - dscp_code = 0; - break; - case MBUF_TC_BK: - dscp_code = 0x08; - break; - case MBUF_TC_VI: - dscp_code = 0x20; - break; - case MBUF_TC_VO: - dscp_code = 0x30; + for (cm = M_FIRST_CMSGHDR(control); + is_cmsg_valid(control, cm); + cm = M_NXT_CMSGHDR(control, cm)) { + if (cm->cmsg_len != CMSG_LEN(sizeof(int))) { + continue; + } + + if ((cm->cmsg_level == IPPROTO_IP && + cm->cmsg_type == IP_TOS) || + (cm->cmsg_level == IPPROTO_IPV6 && + cm->cmsg_type == IPV6_TCLASS)) { + tos = *(int *)(void *)CMSG_DATA(cm) & IPTOS_MASK; + /* The first valid option wins */ break; + } } - return (dscp_code); + return tos; } __private_extern__ void so_recv_data_stat(struct socket *so, struct mbuf *m, size_t off) { - uint32_t sotc = m_get_traffic_class(m); + uint32_t mtc = m_get_traffic_class(m); - if (sotc >= SO_TC_STATS_MAX) - sotc = SO_TC_BE; + if (mtc >= SO_TC_STATS_MAX) { + mtc = MBUF_TC_BE; + } - so->so_tc_stats[sotc].rxpackets += 1; - so->so_tc_stats[sotc].rxbytes += + so->so_tc_stats[mtc].rxpackets += 1; + so->so_tc_stats[mtc].rxbytes += ((m->m_flags & M_PKTHDR) ? m->m_pkthdr.len : 0) + off; } +__private_extern__ void +so_inc_recv_data_stat(struct socket *so, size_t pkts, size_t bytes, + uint32_t mtc) +{ + if (mtc >= SO_TC_STATS_MAX) { + mtc = MBUF_TC_BE; + } + + so->so_tc_stats[mtc].rxpackets += pkts; + so->so_tc_stats[mtc].rxbytes += bytes; +} + +static inline int +so_throttle_best_effort(struct socket *so, struct ifnet *ifp) +{ + uint32_t uptime = (uint32_t)net_uptime(); + return soissrcbesteffort(so) && + net_io_policy_throttle_best_effort == 1 && + ifp->if_rt_sendts > 0 && + (int)(uptime - ifp->if_rt_sendts) <= TCP_BG_SWITCH_TIME; +} + __private_extern__ void set_tcp_stream_priority(struct socket *so) { - struct tcpcb *tp = intotcpcb(sotoinpcb(so)); - int old_cc = tp->tcp_cc_index; + struct inpcb *inp = sotoinpcb(so); + struct tcpcb *tp = intotcpcb(inp); + struct ifnet *outifp; + u_char old_cc = tp->tcp_cc_index; int recvbg = IS_TCP_RECV_BG(so); + bool is_local = false, fg_active = false; + uint32_t uptime; + + VERIFY((SOCK_CHECK_DOM(so, PF_INET) || + SOCK_CHECK_DOM(so, PF_INET6)) && + SOCK_CHECK_TYPE(so, SOCK_STREAM) && + SOCK_CHECK_PROTO(so, IPPROTO_TCP)); + + /* Return if the socket is in a terminal state */ + if (inp->inp_state == INPCB_STATE_DEAD) { + return; + } + + outifp = inp->inp_last_outifp; + uptime = (uint32_t)net_uptime(); /* * If the socket was marked as a background socket or if the @@ -787,33 +1143,88 @@ set_tcp_stream_priority(struct socket *so) * background. The variable sotcdb which can be set with sysctl * is used to disable these settings for testing. */ - if (soisthrottled(so) || IS_SO_TC_BACKGROUND(so->so_traffic_class)) { - if ((sotcdb & SOTCDB_NO_SENDTCPBG) != 0) { - if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX) + if (outifp == NULL || (outifp->if_flags & IFF_LOOPBACK)) { + is_local = true; + } + + /* Check if there has been recent foreground activity */ + if (outifp != NULL) { + /* + * If the traffic source is background, check if + * if it can be switched to foreground. This can + * happen when there is no indication of foreground + * activity. + */ + if (soissrcbackground(so) && outifp->if_fg_sendts > 0 && + (int)(uptime - outifp->if_fg_sendts) <= TCP_BG_SWITCH_TIME) { + fg_active = true; + } + + /* + * The traffic source is best-effort -- check if + * the policy to throttle best effort is enabled + * and there was realtime activity on this + * interface recently. If this is true, enable + * algorithms that respond to increased latency + * on best-effort traffic. + */ + if (so_throttle_best_effort(so, outifp)) { + fg_active = true; + } + } + + /* + * System initiated background traffic like cloud uploads should + * always use background delay sensitive algorithms. This will + * make the stream more responsive to other streams on the user's + * network and it will minimize latency induced. + */ + if (fg_active || IS_SO_TC_BACKGROUNDSYSTEM(so->so_traffic_class)) { + /* + * If the interface that the connection is using is + * loopback, do not use background congestion + * control algorithm. + * + * If there has been recent foreground activity or if + * there was an indication that a foreground application + * is going to use networking (net_io_policy_throttled), + * switch the backgroung streams to use background + * congestion control algorithm. Otherwise, even background + * flows can move into foreground. + */ + if ((sotcdb & SOTCDB_NO_SENDTCPBG) != 0 || is_local || + !IS_SO_TC_BACKGROUNDSYSTEM(so->so_traffic_class)) { + if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX) { tcp_set_foreground_cc(so); + } } else { - if (old_cc != TCP_CC_ALGO_BACKGROUND_INDEX) + if (old_cc != TCP_CC_ALGO_BACKGROUND_INDEX) { tcp_set_background_cc(so); + } } /* Set receive side background flags */ - if ((sotcdb & SOTCDB_NO_RECVTCPBG) != 0) + if ((sotcdb & SOTCDB_NO_RECVTCPBG) != 0 || is_local || + !IS_SO_TC_BACKGROUNDSYSTEM(so->so_traffic_class)) { tcp_clear_recv_bg(so); - else + } else { tcp_set_recv_bg(so); + } } else { tcp_clear_recv_bg(so); - if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX) + if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX) { tcp_set_foreground_cc(so); + } } if (old_cc != tp->tcp_cc_index || recvbg != IS_TCP_RECV_BG(so)) { - SOTHROTTLELOG(("throttle[%d]: so %p [%d,%d] TCP %s send; " - "%s recv\n", so->last_pid, so, INP_SOCKAF(so), - INP_SOCKTYPE(so), - (tp->tcp_cc_index == TCP_CC_ALGO_BACKGROUND_INDEX) ? - "background" : "foreground", - IS_TCP_RECV_BG(so) ? "background" : "foreground")); + SOTHROTTLELOG("throttle[%d]: so 0x%llx [%d,%d] TCP %s send; " + "%s recv\n", so->last_pid, + (uint64_t)VM_KERNEL_ADDRPERM(so), + SOCK_DOM(so), SOCK_TYPE(so), + (tp->tcp_cc_index == TCP_CC_ALGO_BACKGROUND_INDEX) ? + "background" : "foreground", + IS_TCP_RECV_BG(so) ? "background" : "foreground"); } } @@ -824,18 +1235,14 @@ set_tcp_stream_priority(struct socket *so) */ __private_extern__ void set_packet_service_class(struct mbuf *m, struct socket *so, - mbuf_svc_class_t in_msc, u_int32_t flags) + int sotc, uint32_t flags) { - mbuf_svc_class_t msc = MBUF_SC_BE; /* Best effort by default */ + mbuf_svc_class_t msc = MBUF_SC_BE; /* Best effort by default */ struct inpcb *inp = sotoinpcb(so); /* in6pcb and inpcb are the same */ - struct ip *ip = mtod(m, struct ip *); -#if INET6 - struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); -#endif /* INET6 */ - int isipv6 = ((flags & PKT_SCF_IPV6) != 0) ? 1 : 0; - if (!(m->m_flags & M_PKTHDR)) + if (!(m->m_flags & M_PKTHDR)) { return; + } /* * Here is the precedence: @@ -843,127 +1250,72 @@ set_packet_service_class(struct mbuf *m, struct socket *so, * 2) Traffic class passed via ancillary data to sendmsdg(2) * 3) Traffic class socket option last */ - if (in_msc != MBUF_SC_UNSPEC) { - if (in_msc >= MBUF_SC_BE && in_msc <= MBUF_SC_CTL) - msc = in_msc; - } else { - VERIFY(SO_VALID_TC(so->so_traffic_class)); - msc = so_tc2msc(so->so_traffic_class); + if (sotc != SO_TC_UNSPEC) { + VERIFY(SO_VALID_TC(sotc)); + msc = so_tc2msc(sotc); /* Assert because tc must have been valid */ VERIFY(MBUF_VALID_SC(msc)); } /* - * If TRAFFIC_MGT_SO_BACKGROUND is set, depress the priority. + * If TRAFFIC_MGT_SO_BACKGROUND is set or policy to throttle + * best effort is set, depress the priority. */ - if (soisthrottled(so) && !IS_MBUF_SC_BACKGROUND(msc)) + if (!IS_MBUF_SC_BACKGROUND(msc) && soisthrottled(so)) { + msc = MBUF_SC_BK; + } + + if (IS_MBUF_SC_BESTEFFORT(msc) && inp->inp_last_outifp != NULL && + so_throttle_best_effort(so, inp->inp_last_outifp)) { msc = MBUF_SC_BK; + } + + if (soissrcbackground(so)) { + m->m_pkthdr.pkt_flags |= PKTF_SO_BACKGROUND; + } + if (soissrcrealtime(so) || IS_MBUF_SC_REALTIME(msc)) { + m->m_pkthdr.pkt_flags |= PKTF_SO_REALTIME; + } /* * Set the traffic class in the mbuf packet header svc field */ - if (sotcdb & SOTCDB_NO_MTC) + if (sotcdb & SOTCDB_NO_MTC) { goto no_mbtc; + } - /* Elevate service class if the packet is a pure TCP ACK. + /* + * Elevate service class if the packet is a pure TCP ACK. * We can do this only when the flow is not a background - * flow and the outgoing interface supports + * flow and the outgoing interface supports * transmit-start model. */ - if (!IS_MBUF_SC_BACKGROUND(msc) && (flags & PKT_SCF_TCP_ACK)) + if (!IS_MBUF_SC_BACKGROUND(msc) && + (flags & (PKT_SCF_TCP_ACK | PKT_SCF_TCP_SYN)) != 0) { msc = MBUF_SC_CTL; + } (void) m_set_service_class(m, msc); /* - * Set the privileged traffic auxiliary flag if applicable, or clear it. + * Set the privileged traffic auxiliary flag if applicable, + * or clear it. */ if (!(sotcdb & SOTCDB_NO_PRIVILEGED) && soisprivilegedtraffic(so) && - msc != MBUF_SC_UNSPEC) - m->m_pkthdr.aux_flags |= MAUXF_PRIO_PRIVILEGED; - else - m->m_pkthdr.aux_flags &= ~MAUXF_PRIO_PRIVILEGED; + msc != MBUF_SC_UNSPEC) { + m->m_pkthdr.pkt_flags |= PKTF_PRIO_PRIVILEGED; + } else { + m->m_pkthdr.pkt_flags &= ~PKTF_PRIO_PRIVILEGED; + } no_mbtc: /* - * Quick exit when best effort - */ - if (msc == MBUF_SC_BE) - goto no_dscp; - - /* - * The default behavior is for the networking stack to not set the - * DSCP code, based on SOTCDB_NO_DSCP being set. If the flag is - * cleared, set the DSCP code in IPv4 or IPv6 header only for local - * traffic, if it is not already set. - */ - if (sotcdb & SOTCDB_NO_DSCP) - goto no_dscp; - - /* - * Test if a IP TOS or IPV6 TCLASS has already been set - * on the socket or the raw packet. + * For TCP with background traffic class switch CC algo based on sysctl */ - if (!(sotcdb & SOTCDB_NO_DSCPTST)) { -#if INET6 - if (isipv6) { - if ((so->so_type == SOCK_RAW && - (ip6->ip6_flow & htonl(0xff << 20)) != 0) || - (inp->in6p_outputopts && - inp->in6p_outputopts->ip6po_tclass != -1)) - goto no_dscp; - } else -#endif /* INET6 */ - if ((so->so_type == SOCK_RAW && - (inp->inp_flags & INP_HDRINCL)) || - inp->inp_ip_tos != 0) - goto no_dscp; + if (so->so_type == SOCK_STREAM) { + set_tcp_stream_priority(so); } - /* - * Test if destination is local - */ - if (!(sotcdb & SOTCDB_NO_LCLTST)) { - int islocal = 0; - struct rtentry *rt = inp->inp_route.ro_rt; - - if (so->so_type == SOCK_STREAM) { - if (intotcpcb(inp)->t_flags & TF_LOCAL) - islocal = 1; - } else if (rt != NULL && - (rt->rt_gateway->sa_family == AF_LINK || - (rt->rt_ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT)))) { - if (!(rt->rt_ifp->if_flags & IFF_POINTOPOINT)) - islocal = 1; - } else -#if INET6 - if (isipv6 && in6addr_local(&ip6->ip6_dst)) { - islocal = 1; - } else -#endif /* INET6 */ - if (inaddr_local(ip->ip_dst)) { - islocal = 1; - } - if (islocal == 0) - goto no_dscp; - } - -#if INET6 - if (isipv6) - ip6->ip6_flow |= htonl(dscp_code_from_mbuf_tclass( - m_get_traffic_class(m)) << 20); - else -#endif /* INET6 */ - ip->ip_tos |= dscp_code_from_mbuf_tclass( - m_get_traffic_class(m)) << 2; - -no_dscp: - /* - * For TCP with background traffic class switch CC algo based on sysctl - */ - if (so->so_type == SOCK_STREAM) - set_tcp_stream_priority(so); - so_tc_update_stats(m, so, msc); } @@ -987,6 +1339,8 @@ so_tc_update_stats(struct mbuf *m, struct socket *so, mbuf_svc_class_t msc) __private_extern__ void socket_tclass_init(void) { + _CASSERT(_SO_TC_MAX == SO_TC_STATS_MAX); + tclass_lck_grp_attr = lck_grp_attr_alloc_init(); tclass_lck_grp = lck_grp_alloc_init("tclass", tclass_lck_grp_attr); tclass_lck_attr = lck_attr_alloc_init(); @@ -1025,6 +1379,9 @@ so_tc2msc(int tc) case _SO_TC_VI: msc = MBUF_SC_VI; break; + case SO_TC_NETSVC_SIG: + msc = MBUF_SC_SIG; + break; case SO_TC_VO: case _SO_TC_VO: msc = MBUF_SC_VO; @@ -1038,15 +1395,13 @@ so_tc2msc(int tc) break; } - return (msc); + return msc; } __private_extern__ int so_svc2tc(mbuf_svc_class_t svc) { switch (svc) { - case MBUF_SC_UNSPEC: - return SO_TC_BE; case MBUF_SC_BK_SYS: return SO_TC_BK_SYS; case MBUF_SC_BK: @@ -1063,27 +1418,667 @@ so_svc2tc(mbuf_svc_class_t svc) return SO_TC_RV; case MBUF_SC_VI: return SO_TC_VI; + case MBUF_SC_SIG: + return SO_TC_NETSVC_SIG; case MBUF_SC_VO: return SO_TC_VO; case MBUF_SC_CTL: return SO_TC_CTL; + case MBUF_SC_UNSPEC: default: return SO_TC_BE; } } +static size_t +sotc_index(int sotc) +{ + switch (sotc) { + case SO_TC_BK_SYS: + return SOTCIX_BK_SYS; + case _SO_TC_BK: + case SO_TC_BK: + return SOTCIX_BK; + + case SO_TC_BE: + return SOTCIX_BE; + case SO_TC_RD: + return SOTCIX_RD; + case SO_TC_OAM: + return SOTCIX_OAM; + + case SO_TC_AV: + return SOTCIX_AV; + case SO_TC_RV: + return SOTCIX_RV; + case _SO_TC_VI: + case SO_TC_VI: + return SOTCIX_VI; + + case _SO_TC_VO: + case SO_TC_VO: + return SOTCIX_VO; + case SO_TC_CTL: + return SOTCIX_CTL; + + default: + break; + } + /* + * Unknown traffic class value + */ + return SIZE_T_MAX; +} + +uint8_t +fastlane_sc_to_dscp(uint32_t svc_class) +{ + uint8_t dscp = _DSCP_DF; + + switch (svc_class) { + case MBUF_SC_BK_SYS: + case MBUF_SC_BK: + dscp = _DSCP_AF11; + break; + + case MBUF_SC_BE: + dscp = _DSCP_DF; + break; + case MBUF_SC_RD: + dscp = _DSCP_AF21; + break; + case MBUF_SC_OAM: + dscp = _DSCP_CS2; + break; + + case MBUF_SC_AV: + dscp = _DSCP_AF31; + break; + case MBUF_SC_RV: + dscp = _DSCP_CS4; + break; + case MBUF_SC_VI: + dscp = _DSCP_AF41; + break; + case MBUF_SC_SIG: + dscp = _DSCP_CS3; + break; + + case MBUF_SC_VO: + dscp = _DSCP_EF; + break; + case MBUF_SC_CTL: + dscp = _DSCP_DF; + break; + default: + dscp = _DSCP_DF; + break; + } + + return dscp; +} + +uint8_t +rfc4594_sc_to_dscp(uint32_t svc_class) +{ + uint8_t dscp = _DSCP_DF; + + switch (svc_class) { + case MBUF_SC_BK_SYS: /* Low-Priority Data */ + case MBUF_SC_BK: + dscp = _DSCP_CS1; + break; + + case MBUF_SC_BE: /* Standard */ + dscp = _DSCP_DF; + break; + case MBUF_SC_RD: /* Low-Latency Data */ + dscp = _DSCP_AF21; + break; + + /* SVC_CLASS Not Defined: High-Throughput Data */ + + case MBUF_SC_OAM: /* OAM */ + dscp = _DSCP_CS2; + break; + + /* SVC_CLASS Not Defined: Broadcast Video */ + + case MBUF_SC_AV: /* Multimedia Streaming */ + dscp = _DSCP_AF31; + break; + case MBUF_SC_RV: /* Real-Time Interactive */ + dscp = _DSCP_CS4; + break; + case MBUF_SC_VI: /* Multimedia Conferencing */ + dscp = _DSCP_AF41; + break; + case MBUF_SC_SIG: /* Signaling */ + dscp = _DSCP_CS5; + break; + + case MBUF_SC_VO: /* Telephony */ + dscp = _DSCP_EF; + break; + case MBUF_SC_CTL: /* Network Control*/ + dscp = _DSCP_CS6; + break; + default: + dscp = _DSCP_DF; + break; + } + + return dscp; +} + +mbuf_traffic_class_t +rfc4594_dscp_to_tc(uint8_t dscp) +{ + mbuf_traffic_class_t tc = MBUF_TC_BE; + + switch (dscp) { + case _DSCP_CS1: + tc = MBUF_TC_BK; + break; + case _DSCP_DF: + case _DSCP_AF21: + case _DSCP_CS2: + tc = MBUF_TC_BE; + break; + case _DSCP_AF31: + case _DSCP_CS4: + case _DSCP_AF41: + case _DSCP_CS5: + tc = MBUF_TC_VI; + break; + case _DSCP_EF: + case _DSCP_CS6: + tc = MBUF_TC_VO; + break; + default: + tc = MBUF_TC_BE; + break; + } + + return tc; +} + /* - * LRO is turned on for AV streaming and background classes. + * Pass NULL ifp for default map */ -static void -so_set_lro(struct socket *so, int optval) +static errno_t +set_netsvctype_dscp_map(struct net_qos_dscp_map *net_qos_dscp_map, + const struct netsvctype_dscp_map *netsvctype_dscp_map) { - if ((optval == SO_TC_BK) || - (optval == SO_TC_BK_SYS) || - (optval == SO_TC_AV)) { - so->so_flags |= SOF_USELRO; + size_t i; + int netsvctype; + + /* + * Do not accept more that max number of distinct DSCPs + */ + if (net_qos_dscp_map == NULL || netsvctype_dscp_map == NULL) { + return EINVAL; + } + + /* + * Validate input parameters + */ + for (i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) { + if (!IS_VALID_NET_SERVICE_TYPE(netsvctype_dscp_map[i].netsvctype)) { + return EINVAL; + } + if (netsvctype_dscp_map[i].dscp > _MAX_DSCP) { + return EINVAL; + } + } + + for (i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) { + netsvctype = netsvctype_dscp_map[i].netsvctype; + + net_qos_dscp_map->netsvctype_to_dscp[netsvctype] = + netsvctype_dscp_map[i].dscp; + } + for (netsvctype = 0; netsvctype < _NET_SERVICE_TYPE_COUNT; netsvctype++) { + switch (netsvctype) { + case NET_SERVICE_TYPE_BE: + case NET_SERVICE_TYPE_BK: + case NET_SERVICE_TYPE_VI: + case NET_SERVICE_TYPE_VO: + case NET_SERVICE_TYPE_RV: + case NET_SERVICE_TYPE_AV: + case NET_SERVICE_TYPE_OAM: + case NET_SERVICE_TYPE_RD: { + size_t sotcix; + + sotcix = sotc_index(sotc_by_netservicetype[netsvctype]); + if (sotcix != SIZE_T_MAX) { + net_qos_dscp_map->sotc_to_dscp[sotcix] = + netsvctype_dscp_map[netsvctype].dscp; + } + break; + } + case NET_SERVICE_TYPE_SIG: + /* Signaling does not have its own traffic class */ + break; + default: + /* We should not be here */ + ASSERT(0); + } + } + if (net_qos_dscp_map == &fastlane_net_qos_dscp_map) { + /* Network control socket traffic class is always best effort for fastlane*/ + net_qos_dscp_map->sotc_to_dscp[SOTCIX_CTL] = _DSCP_DF; } else { - so->so_flags &= ~SOF_USELRO; + net_qos_dscp_map->sotc_to_dscp[SOTCIX_CTL] = _DSCP_CS6; + } + + /* Backround socket traffic class DSCP same as backround system */ + net_qos_dscp_map->sotc_to_dscp[SOTCIX_BK] = + net_qos_dscp_map->sotc_to_dscp[SOTCIX_BK_SYS]; + + return 0; +} + +static size_t +get_netsvctype_dscp_map(struct netsvctype_dscp_map *netsvctype_dscp_map) +{ + struct net_qos_dscp_map *net_qos_dscp_map; + int i; + + net_qos_dscp_map = &fastlane_net_qos_dscp_map; + + for (i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) { + netsvctype_dscp_map[i].netsvctype = i; + netsvctype_dscp_map[i].dscp = net_qos_dscp_map->netsvctype_to_dscp[i]; } + + return i * sizeof(struct netsvctype_dscp_map); } +void +net_qos_map_init() +{ + errno_t error; + + error = set_netsvctype_dscp_map(&fastlane_net_qos_dscp_map, + fastlane_netsvctype_dscp_map); + ASSERT(error == 0); + + error = set_netsvctype_dscp_map(&rfc4594_net_qos_dscp_map, + rfc4594_netsvctype_dscp_map); + ASSERT(error == 0); + + set_dscp_to_wifi_ac_map(default_dscp_to_wifi_ac_map, 1); +} + +int +sysctl_default_netsvctype_to_dscp_map SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + int error = 0; + + if (req->oldptr == USER_ADDR_NULL) { + req->oldidx = + _NET_SERVICE_TYPE_COUNT * sizeof(struct netsvctype_dscp_map); + } else if (req->oldlen > 0) { + struct netsvctype_dscp_map netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {}; + size_t len; + + len = get_netsvctype_dscp_map(netsvctype_dscp_map); + + error = SYSCTL_OUT(req, netsvctype_dscp_map, + MIN(len, req->oldlen)); + if (error != 0) { + goto done; + } + } + + if (req->newptr != USER_ADDR_NULL) { + error = EPERM; + } +done: + return error; +} + +__private_extern__ errno_t +set_packet_qos(struct mbuf *m, struct ifnet *ifp, boolean_t qos_allowed, + int sotc, int netsvctype, uint8_t *dscp_inout) +{ + if (ifp == NULL || dscp_inout == NULL) { + return EINVAL; + } + + if ((ifp->if_eflags & IFEF_QOSMARKING_ENABLED) != 0 && + ifp->if_qosmarking_mode != IFRTYPE_QOSMARKING_MODE_NONE) { + uint8_t dscp; + const struct net_qos_dscp_map *net_qos_dscp_map = NULL; + + switch (ifp->if_qosmarking_mode) { + case IFRTYPE_QOSMARKING_FASTLANE: + net_qos_dscp_map = &fastlane_net_qos_dscp_map; + break; + case IFRTYPE_QOSMARKING_RFC4594: + net_qos_dscp_map = &rfc4594_net_qos_dscp_map; + break; + default: + panic("invalid QoS marking type"); + /* NOTREACHED */ + } + + /* + * When on a Fastlane network, IP_TOS/IPV6_TCLASS are no-ops + */ + dscp = _DSCP_DF; + + /* + * For DSCP use the network service type is specified, otherwise + * use the socket traffic class + * + * When not whitelisted by the policy, set DSCP only for best + * effort and background, and set the mbuf service class to + * best effort as well so the packet will be queued and + * scheduled at a lower priority. + * We still want to prioritize control traffic on the interface + * so we do not change the mbuf service class for SO_TC_CTL + */ + if (IS_VALID_NET_SERVICE_TYPE(netsvctype) && + netsvctype != NET_SERVICE_TYPE_BE) { + dscp = net_qos_dscp_map->netsvctype_to_dscp[netsvctype]; + + if (qos_allowed == FALSE && + netsvctype != NET_SERVICE_TYPE_BE && + netsvctype != NET_SERVICE_TYPE_BK) { + dscp = _DSCP_DF; + if (sotc != SO_TC_CTL) { + m_set_service_class(m, MBUF_SC_BE); + } + } + } else if (sotc != SO_TC_UNSPEC) { + size_t sotcix = sotc_index(sotc); + if (sotcix != SIZE_T_MAX) { + dscp = net_qos_dscp_map->sotc_to_dscp[sotcix]; + + if (qos_allowed == FALSE && sotc != SO_TC_BE && + sotc != SO_TC_BK && sotc != SO_TC_BK_SYS && + sotc != SO_TC_CTL) { + dscp = _DSCP_DF; + if (sotc != SO_TC_CTL) { + m_set_service_class(m, MBUF_SC_BE); + } + } + } + } + if (net_qos_verbose != 0) { + printf("%s qos_allowed %d sotc %u netsvctype %u dscp %u\n", + __func__, qos_allowed, sotc, netsvctype, dscp); + } + + if (*dscp_inout != dscp) { + *dscp_inout = dscp; + } + } else if (*dscp_inout != _DSCP_DF && IFNET_IS_WIFI_INFRA(ifp)) { + mbuf_svc_class_t msc = m_get_service_class(m); + + /* + * For WiFi infra, when the mbuf service class is best effort + * and the DSCP is not default, set the service class based + * on DSCP + */ + if (msc == MBUF_SC_BE) { + msc = wifi_dscp_to_msc_array[*dscp_inout]; + + if (msc != MBUF_SC_BE) { + m_set_service_class(m, msc); + + if (net_qos_verbose != 0) { + printf("%s set msc %u for dscp %u\n", + __func__, msc, *dscp_inout); + } + } + } + } + + return 0; +} + +static void +set_dscp_to_wifi_ac_map(const struct dcsp_msc_map *map, int clear) +{ + int i; + + if (clear) { + bzero(wifi_dscp_to_msc_array, sizeof(wifi_dscp_to_msc_array)); + } + + for (i = 0; i < DSCP_ARRAY_SIZE; i++) { + const struct dcsp_msc_map *elem = map + i; + + if (elem->dscp > _MAX_DSCP || elem->msc == MBUF_SC_UNSPEC) { + break; + } + switch (elem->msc) { + case MBUF_SC_BK_SYS: + case MBUF_SC_BK: + wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_BK; + break; + default: + case MBUF_SC_BE: + case MBUF_SC_RD: + case MBUF_SC_OAM: + wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_BE; + break; + case MBUF_SC_AV: + case MBUF_SC_RV: + case MBUF_SC_VI: + wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_VI; + break; + case MBUF_SC_VO: + case MBUF_SC_CTL: + wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_VO; + break; + } + } +} + +static errno_t +dscp_msc_map_from_netsvctype_dscp_map(struct netsvctype_dscp_map *netsvctype_dscp_map, + size_t count, struct dcsp_msc_map *dcsp_msc_map) +{ + errno_t error = 0; + uint32_t i; + + /* + * Validate input parameters + */ + for (i = 0; i < count; i++) { + if (!SO_VALID_TC(netsvctype_dscp_map[i].netsvctype)) { + error = EINVAL; + goto done; + } + if (netsvctype_dscp_map[i].dscp > _MAX_DSCP) { + error = EINVAL; + goto done; + } + } + + bzero(dcsp_msc_map, DSCP_ARRAY_SIZE * sizeof(struct dcsp_msc_map)); + + for (i = 0; i < count; i++) { + dcsp_msc_map[i].dscp = netsvctype_dscp_map[i].dscp; + dcsp_msc_map[i].msc = so_tc2msc(netsvctype_dscp_map[i].netsvctype); + } +done: + return error; +} + +int +sysctl_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + int error = 0; + size_t len = DSCP_ARRAY_SIZE * sizeof(struct netsvctype_dscp_map); + struct netsvctype_dscp_map netsvctype_dscp_map[DSCP_ARRAY_SIZE] = {}; + struct dcsp_msc_map dcsp_msc_map[DSCP_ARRAY_SIZE]; + size_t count; + + if (req->oldptr == USER_ADDR_NULL) { + req->oldidx = len; + } else if (req->oldlen > 0) { + uint8_t i; + + for (i = 0; i < DSCP_ARRAY_SIZE; i++) { + netsvctype_dscp_map[i].dscp = i; + netsvctype_dscp_map[i].netsvctype = + so_svc2tc(wifi_dscp_to_msc_array[i]); + } + error = SYSCTL_OUT(req, netsvctype_dscp_map, + MIN(len, req->oldlen)); + if (error != 0) { + goto done; + } + } + + if (req->newptr == USER_ADDR_NULL) { + goto done; + } + + error = proc_suser(current_proc()); + if (error != 0) { + goto done; + } + + /* + * Check input length + */ + if (req->newlen > len) { + error = EINVAL; + goto done; + } + /* + * Cap the number of entries to copy from input buffer + */ + if (len > req->newlen) { + len = req->newlen; + } + error = SYSCTL_IN(req, netsvctype_dscp_map, len); + if (error != 0) { + goto done; + } + count = len / sizeof(struct netsvctype_dscp_map); + bzero(dcsp_msc_map, sizeof(dcsp_msc_map)); + error = dscp_msc_map_from_netsvctype_dscp_map(netsvctype_dscp_map, count, + dcsp_msc_map); + if (error != 0) { + goto done; + } + set_dscp_to_wifi_ac_map(dcsp_msc_map, 0); +done: + return error; +} + +int +sysctl_reset_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + int error = 0; + int val = 0; + + error = sysctl_handle_int(oidp, &val, 0, req); + if (error || !req->newptr) { + return error; + } + + set_dscp_to_wifi_ac_map(default_dscp_to_wifi_ac_map, 1); + + return 0; +} + +/* + * Returns whether a large upload or download transfer should be marked as + * BK service type for network activity. This is a system level + * hint/suggestion to classify application traffic based on statistics + * collected from the current network attachment + * + * Returns 1 for BK and 0 for default + */ + +int +net_qos_guideline(struct proc *p, struct net_qos_guideline_args *arg, + int *retval) +{ +#pragma unused(p) +#define RETURN_USE_BK 1 +#define RETURN_USE_DEFAULT 0 + struct net_qos_param qos_arg; + struct ifnet *ipv4_primary, *ipv6_primary; + int err = 0; + + if (arg->param == USER_ADDR_NULL || retval == NULL || + arg->param_len != sizeof(qos_arg)) { + return EINVAL; + } + err = copyin(arg->param, (caddr_t) &qos_arg, sizeof(qos_arg)); + if (err != 0) { + return err; + } + + *retval = RETURN_USE_DEFAULT; + ipv4_primary = ifindex2ifnet[get_primary_ifscope(AF_INET)]; + ipv6_primary = ifindex2ifnet[get_primary_ifscope(AF_INET6)]; + + /* + * If either of the interfaces is in Low Internet mode, enable + * background delay based algorithms on this transfer + */ + if (qos_arg.nq_uplink) { + if ((ipv4_primary != NULL && + (ipv4_primary->if_xflags & IFXF_LOW_INTERNET_UL)) || + (ipv6_primary != NULL && + (ipv6_primary->if_xflags & IFXF_LOW_INTERNET_UL))) { + *retval = RETURN_USE_BK; + return 0; + } + } else { + if ((ipv4_primary != NULL && + (ipv4_primary->if_xflags & IFXF_LOW_INTERNET_DL)) || + (ipv6_primary != NULL && + (ipv6_primary->if_xflags & IFXF_LOW_INTERNET_DL))) { + *retval = RETURN_USE_BK; + return 0; + } + } + + /* + * Some times IPv4 and IPv6 primary interfaces can be different. + * In this case, if either of them is non-cellular, we should mark + * the transfer as BK as it can potentially get used based on + * the host name resolution + */ + if (ipv4_primary != NULL && IFNET_IS_EXPENSIVE(ipv4_primary) && + ipv6_primary != NULL && IFNET_IS_EXPENSIVE(ipv6_primary)) { + if (qos_arg.nq_use_expensive) { + return 0; + } else { + *retval = RETURN_USE_BK; + return 0; + } + } + if (ipv4_primary != NULL && IFNET_IS_CONSTRAINED(ipv4_primary) && + ipv6_primary != NULL && IFNET_IS_CONSTRAINED(ipv6_primary)) { + if (qos_arg.nq_use_constrained) { + return 0; + } else { + *retval = RETURN_USE_BK; + return 0; + } + } + if (qos_arg.nq_transfer_size >= 5 * 1024 * 1024) { + *retval = RETURN_USE_BK; + return 0; + } + + +#undef RETURN_USE_BK +#undef RETURN_USE_DEFAULT + return 0; +}